From b750101eb236130cf056c675997decbac904cc49 Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Sun, 7 Apr 2024 17:35:18 +0200 Subject: Adding upstream version 252.22. Signed-off-by: Daniel Baumann --- src/basic/MurmurHash2.c | 91 + src/basic/MurmurHash2.h | 31 + src/basic/af-list.c | 52 + src/basic/af-list.h | 28 + src/basic/af-to-name.awk | 11 + src/basic/alloc-util.c | 108 + src/basic/alloc-util.h | 240 + src/basic/architecture.c | 178 + src/basic/architecture.h | 247 + src/basic/arphrd-to-name.awk | 14 + src/basic/arphrd-util.c | 45 + src/basic/arphrd-util.h | 10 + src/basic/async.c | 108 + src/basic/async.h | 13 + src/basic/audit-util.c | 148 + src/basic/audit-util.h | 17 + src/basic/build.c | 228 + src/basic/build.h | 6 + src/basic/bus-label.c | 79 + src/basic/bus-label.h | 14 + src/basic/cap-list.c | 122 + src/basic/cap-list.h | 11 + src/basic/cap-to-name.awk | 11 + src/basic/capability-util.c | 607 ++ src/basic/capability-util.h | 78 + src/basic/cgroup-util.c | 2309 ++++++++ src/basic/cgroup-util.h | 344 ++ src/basic/chase-symlinks.c | 603 ++ src/basic/chase-symlinks.h | 33 + src/basic/chattr-util.c | 162 + src/basic/chattr-util.h | 61 + src/basic/check-filesystems.sh | 36 + src/basic/compress.c | 1079 ++++ src/basic/compress.h | 112 + src/basic/conf-files.c | 320 ++ src/basic/conf-files.h | 23 + src/basic/coverage.h | 66 + src/basic/def.h | 82 + src/basic/devnum-util.c | 138 + src/basic/devnum-util.h | 52 + src/basic/dirent-util.c | 105 + src/basic/dirent-util.h | 61 + src/basic/dns-def.h | 17 + src/basic/efivars.c | 446 ++ src/basic/efivars.h | 100 + src/basic/env-file.c | 543 ++ src/basic/env-file.h | 17 + src/basic/env-util.c | 904 +++ src/basic/env-util.h | 74 + src/basic/errno-list.c | 37 + src/basic/errno-list.h | 15 + src/basic/errno-to-name.awk | 11 + src/basic/errno-util.h | 176 + src/basic/escape.c | 576 ++ src/basic/escape.h | 72 + src/basic/ether-addr-util.c | 272 + src/basic/ether-addr-util.h | 115 + src/basic/extract-word.c | 301 + src/basic/extract-word.h | 22 + src/basic/fd-util.c | 827 +++ src/basic/fd-util.h | 126 + src/basic/fileio.c | 1436 +++++ src/basic/fileio.h | 131 + src/basic/filesystems-gperf.gperf | 131 + src/basic/filesystems.c | 177 + src/basic/filesystems.h | 42 + src/basic/format-util.c | 101 + src/basic/format-util.h | 105 + src/basic/fs-util.c | 1109 ++++ src/basic/fs-util.h | 128 + src/basic/gcrypt-util.c | 54 + src/basic/gcrypt-util.h | 39 + src/basic/generate-af-list.sh | 8 + src/basic/generate-arphrd-list.sh | 8 + src/basic/generate-cap-list.sh | 8 + src/basic/generate-errno-list.sh | 11 + src/basic/generate-filesystem-list.py | 15 + src/basic/generate-filesystem-switch-case.py | 53 + src/basic/glob-util.c | 103 + src/basic/glob-util.h | 25 + src/basic/glyph-util.c | 139 + src/basic/glyph-util.h | 61 + src/basic/gunicode.c | 111 + src/basic/gunicode.h | 30 + src/basic/hash-funcs.c | 121 + src/basic/hash-funcs.h | 111 + src/basic/hashmap.c | 2100 +++++++ src/basic/hashmap.h | 446 ++ src/basic/hexdecoct.c | 930 ++++ src/basic/hexdecoct.h | 53 + src/basic/hmac.c | 60 + src/basic/hmac.h | 12 + src/basic/hostname-util.c | 209 + src/basic/hostname-util.h | 63 + src/basic/in-addr-util.c | 967 ++++ src/basic/in-addr-util.h | 193 + src/basic/inotify-util.c | 41 + src/basic/inotify-util.h | 38 + src/basic/io-util.c | 345 ++ src/basic/io-util.h | 107 + src/basic/ioprio-util.c | 31 + src/basic/ioprio-util.h | 27 + src/basic/limits-util.c | 192 + src/basic/limits-util.h | 10 + src/basic/linux/README | 8 + src/basic/linux/batman_adv.h | 704 +++ src/basic/linux/btrfs.h | 1173 ++++ src/basic/linux/btrfs_tree.h | 1260 +++++ src/basic/linux/can/netlink.h | 185 + src/basic/linux/can/vxcan.h | 13 + src/basic/linux/cfm_bridge.h | 64 + src/basic/linux/fib_rules.h | 90 + src/basic/linux/fou.h | 48 + src/basic/linux/genetlink.h | 103 + src/basic/linux/hdlc/ioctl.h | 94 + src/basic/linux/if.h | 297 + src/basic/linux/if_addr.h | 79 + src/basic/linux/if_bonding.h | 155 + src/basic/linux/if_bridge.h | 826 +++ src/basic/linux/if_ether.h | 181 + src/basic/linux/if_link.h | 1392 +++++ src/basic/linux/if_macsec.h | 194 + src/basic/linux/if_tun.h | 118 + src/basic/linux/if_tunnel.h | 185 + src/basic/linux/in.h | 331 ++ src/basic/linux/in6.h | 302 + src/basic/linux/ipv6_route.h | 64 + src/basic/linux/l2tp.h | 203 + src/basic/linux/libc-compat.h | 267 + src/basic/linux/mrp_bridge.h | 74 + src/basic/linux/netdevice.h | 66 + src/basic/linux/netfilter/nf_tables.h | 1963 +++++++ src/basic/linux/netfilter/nfnetlink.h | 82 + src/basic/linux/netlink.h | 378 ++ src/basic/linux/nexthop.h | 104 + src/basic/linux/nl80211.h | 7726 ++++++++++++++++++++++++++ src/basic/linux/pkt_sched.h | 1281 +++++ src/basic/linux/rtnetlink.h | 826 +++ src/basic/linux/stddef.h | 46 + src/basic/linux/update.sh | 11 + src/basic/linux/wireguard.h | 196 + src/basic/list.h | 198 + src/basic/locale-util.c | 372 ++ src/basic/locale-util.h | 55 + src/basic/log.c | 1524 +++++ src/basic/log.h | 409 ++ src/basic/login-util.c | 12 + src/basic/login-util.h | 20 + src/basic/macro.h | 442 ++ src/basic/math-util.h | 14 + src/basic/memfd-util.c | 135 + src/basic/memfd-util.h | 18 + src/basic/memory-util.c | 59 + src/basic/memory-util.h | 123 + src/basic/mempool.c | 83 + src/basic/mempool.h | 30 + src/basic/meson.build | 467 ++ src/basic/missing_audit.h | 24 + src/basic/missing_capability.h | 39 + src/basic/missing_drm.h | 10 + src/basic/missing_fcntl.h | 60 + src/basic/missing_fs.h | 71 + src/basic/missing_input.h | 45 + src/basic/missing_ioprio.h | 59 + src/basic/missing_keyctl.h | 79 + src/basic/missing_loop.h | 24 + src/basic/missing_magic.h | 194 + src/basic/missing_mman.h | 12 + src/basic/missing_mount.h | 9 + src/basic/missing_network.h | 83 + src/basic/missing_prctl.h | 14 + src/basic/missing_random.h | 20 + src/basic/missing_resource.h | 11 + src/basic/missing_sched.h | 26 + src/basic/missing_securebits.h | 18 + src/basic/missing_socket.h | 81 + src/basic/missing_stat.h | 135 + src/basic/missing_stdlib.h | 13 + src/basic/missing_syscall.h | 648 +++ src/basic/missing_syscall_def.h | 1199 ++++ src/basic/missing_syscalls.py | 162 + src/basic/missing_threads.h | 15 + src/basic/missing_timerfd.h | 8 + src/basic/missing_type.h | 12 + src/basic/missing_xfs.h | 42 + src/basic/mkdir.c | 249 + src/basic/mkdir.h | 25 + src/basic/mountpoint-util.c | 558 ++ src/basic/mountpoint-util.h | 26 + src/basic/namespace-util.c | 262 + src/basic/namespace-util.h | 48 + src/basic/nss-util.h | 273 + src/basic/nulstr-util.c | 17 + src/basic/nulstr-util.h | 17 + src/basic/ordered-set.c | 103 + src/basic/ordered-set.h | 109 + src/basic/os-util.c | 372 ++ src/basic/os-util.h | 35 + src/basic/parse-util.c | 719 +++ src/basic/parse-util.h | 152 + src/basic/path-lookup.c | 907 +++ src/basic/path-lookup.h | 83 + src/basic/path-util.c | 1397 +++++ src/basic/path-util.h | 208 + src/basic/pcapng.h | 115 + src/basic/percent-util.c | 157 + src/basic/percent-util.h | 65 + src/basic/prioq.c | 310 ++ src/basic/prioq.h | 33 + src/basic/proc-cmdline.c | 376 ++ src/basic/proc-cmdline.h | 39 + src/basic/process-util.c | 1691 ++++++ src/basic/process-util.h | 198 + src/basic/procfs-util.c | 268 + src/basic/procfs-util.h | 21 + src/basic/pthread-util.h | 16 + src/basic/random-util.c | 251 + src/basic/random-util.h | 31 + src/basic/ratelimit.c | 63 + src/basic/ratelimit.h | 28 + src/basic/raw-clone.h | 79 + src/basic/raw-reboot.h | 14 + src/basic/recurse-dir.c | 452 ++ src/basic/recurse-dir.h | 80 + src/basic/replace-var.c | 93 + src/basic/replace-var.h | 4 + src/basic/rlimit-util.c | 413 ++ src/basic/rlimit-util.h | 25 + src/basic/set.h | 155 + src/basic/sigbus.c | 151 + src/basic/sigbus.h | 7 + src/basic/signal-util.c | 285 + src/basic/signal-util.h | 67 + src/basic/siphash24.c | 201 + src/basic/siphash24.h | 53 + src/basic/socket-util.c | 1470 +++++ src/basic/socket-util.h | 343 ++ src/basic/sort-util.c | 33 + src/basic/sort-util.h | 78 + src/basic/sparse-endian.h | 90 + src/basic/special.h | 118 + src/basic/stat-util.c | 443 ++ src/basic/stat-util.h | 98 + src/basic/static-destruct.h | 63 + src/basic/stdio-util.h | 69 + src/basic/strbuf.c | 181 + src/basic/strbuf.h | 39 + src/basic/string-table.c | 15 + src/basic/string-table.h | 116 + src/basic/string-util.c | 1204 ++++ src/basic/string-util.h | 245 + src/basic/strv.c | 997 ++++ src/basic/strv.h | 268 + src/basic/strxcpyx.c | 145 + src/basic/strxcpyx.h | 33 + src/basic/sync-util.c | 163 + src/basic/sync-util.h | 11 + src/basic/syscall-list.txt | 514 ++ src/basic/syscalls-alpha.txt | 514 ++ src/basic/syscalls-arc.txt | 514 ++ src/basic/syscalls-arm.txt | 514 ++ src/basic/syscalls-arm64.txt | 514 ++ src/basic/syscalls-i386.txt | 514 ++ src/basic/syscalls-ia64.txt | 603 ++ src/basic/syscalls-loongarch64.txt | 514 ++ src/basic/syscalls-m68k.txt | 514 ++ src/basic/syscalls-mips64.txt | 514 ++ src/basic/syscalls-mips64n32.txt | 514 ++ src/basic/syscalls-mipso32.txt | 514 ++ src/basic/syscalls-parisc.txt | 514 ++ src/basic/syscalls-powerpc.txt | 514 ++ src/basic/syscalls-powerpc64.txt | 514 ++ src/basic/syscalls-riscv32.txt | 514 ++ src/basic/syscalls-riscv64.txt | 514 ++ src/basic/syscalls-s390.txt | 514 ++ src/basic/syscalls-s390x.txt | 514 ++ src/basic/syscalls-sparc.txt | 514 ++ src/basic/syscalls-x86_64.txt | 514 ++ src/basic/sysctl-util.c | 137 + src/basic/sysctl-util.h | 30 + src/basic/syslog-util.c | 131 + src/basic/syslog-util.h | 16 + src/basic/terminal-util.c | 1463 +++++ src/basic/terminal-util.h | 266 + src/basic/time-util.c | 1635 ++++++ src/basic/time-util.h | 217 + src/basic/tmpfile-util.c | 360 ++ src/basic/tmpfile-util.h | 21 + src/basic/uid-range.c | 237 + src/basic/uid-range.h | 34 + src/basic/umask-util.h | 29 + src/basic/unaligned.h | 99 + src/basic/unit-def.c | 324 ++ src/basic/unit-def.h | 335 ++ src/basic/unit-file.c | 822 +++ src/basic/unit-file.h | 62 + src/basic/unit-name.c | 905 +++ src/basic/unit-name.h | 69 + src/basic/user-util.c | 1068 ++++ src/basic/user-util.h | 150 + src/basic/utf8.c | 611 ++ src/basic/utf8.h | 59 + src/basic/util.c | 140 + src/basic/util.h | 80 + src/basic/virt.c | 1056 ++++ src/basic/virt.h | 70 + src/basic/xattr-util.c | 295 + src/basic/xattr-util.h | 38 + 308 files changed, 90143 insertions(+) create mode 100644 src/basic/MurmurHash2.c create mode 100644 src/basic/MurmurHash2.h create mode 100644 src/basic/af-list.c create mode 100644 src/basic/af-list.h create mode 100644 src/basic/af-to-name.awk create mode 100644 src/basic/alloc-util.c create mode 100644 src/basic/alloc-util.h create mode 100644 src/basic/architecture.c create mode 100644 src/basic/architecture.h create mode 100644 src/basic/arphrd-to-name.awk create mode 100644 src/basic/arphrd-util.c create mode 100644 src/basic/arphrd-util.h create mode 100644 src/basic/async.c create mode 100644 src/basic/async.h create mode 100644 src/basic/audit-util.c create mode 100644 src/basic/audit-util.h create mode 100644 src/basic/build.c create mode 100644 src/basic/build.h create mode 100644 src/basic/bus-label.c create mode 100644 src/basic/bus-label.h create mode 100644 src/basic/cap-list.c create mode 100644 src/basic/cap-list.h create mode 100644 src/basic/cap-to-name.awk create mode 100644 src/basic/capability-util.c create mode 100644 src/basic/capability-util.h create mode 100644 src/basic/cgroup-util.c create mode 100644 src/basic/cgroup-util.h create mode 100644 src/basic/chase-symlinks.c create mode 100644 src/basic/chase-symlinks.h create mode 100644 src/basic/chattr-util.c create mode 100644 src/basic/chattr-util.h create mode 100755 src/basic/check-filesystems.sh create mode 100644 src/basic/compress.c create mode 100644 src/basic/compress.h create mode 100644 src/basic/conf-files.c create mode 100644 src/basic/conf-files.h create mode 100644 src/basic/coverage.h create mode 100644 src/basic/def.h create mode 100644 src/basic/devnum-util.c create mode 100644 src/basic/devnum-util.h create mode 100644 src/basic/dirent-util.c create mode 100644 src/basic/dirent-util.h create mode 100644 src/basic/dns-def.h create mode 100644 src/basic/efivars.c create mode 100644 src/basic/efivars.h create mode 100644 src/basic/env-file.c create mode 100644 src/basic/env-file.h create mode 100644 src/basic/env-util.c create mode 100644 src/basic/env-util.h create mode 100644 src/basic/errno-list.c create mode 100644 src/basic/errno-list.h create mode 100644 src/basic/errno-to-name.awk create mode 100644 src/basic/errno-util.h create mode 100644 src/basic/escape.c create mode 100644 src/basic/escape.h create mode 100644 src/basic/ether-addr-util.c create mode 100644 src/basic/ether-addr-util.h create mode 100644 src/basic/extract-word.c create mode 100644 src/basic/extract-word.h create mode 100644 src/basic/fd-util.c create mode 100644 src/basic/fd-util.h create mode 100644 src/basic/fileio.c create mode 100644 src/basic/fileio.h create mode 100644 src/basic/filesystems-gperf.gperf create mode 100644 src/basic/filesystems.c create mode 100644 src/basic/filesystems.h create mode 100644 src/basic/format-util.c create mode 100644 src/basic/format-util.h create mode 100644 src/basic/fs-util.c create mode 100644 src/basic/fs-util.h create mode 100644 src/basic/gcrypt-util.c create mode 100644 src/basic/gcrypt-util.h create mode 100755 src/basic/generate-af-list.sh create mode 100755 src/basic/generate-arphrd-list.sh create mode 100755 src/basic/generate-cap-list.sh create mode 100755 src/basic/generate-errno-list.sh create mode 100755 src/basic/generate-filesystem-list.py create mode 100755 src/basic/generate-filesystem-switch-case.py create mode 100644 src/basic/glob-util.c create mode 100644 src/basic/glob-util.h create mode 100644 src/basic/glyph-util.c create mode 100644 src/basic/glyph-util.h create mode 100644 src/basic/gunicode.c create mode 100644 src/basic/gunicode.h create mode 100644 src/basic/hash-funcs.c create mode 100644 src/basic/hash-funcs.h create mode 100644 src/basic/hashmap.c create mode 100644 src/basic/hashmap.h create mode 100644 src/basic/hexdecoct.c create mode 100644 src/basic/hexdecoct.h create mode 100644 src/basic/hmac.c create mode 100644 src/basic/hmac.h create mode 100644 src/basic/hostname-util.c create mode 100644 src/basic/hostname-util.h create mode 100644 src/basic/in-addr-util.c create mode 100644 src/basic/in-addr-util.h create mode 100644 src/basic/inotify-util.c create mode 100644 src/basic/inotify-util.h create mode 100644 src/basic/io-util.c create mode 100644 src/basic/io-util.h create mode 100644 src/basic/ioprio-util.c create mode 100644 src/basic/ioprio-util.h create mode 100644 src/basic/limits-util.c create mode 100644 src/basic/limits-util.h create mode 100644 src/basic/linux/README create mode 100644 src/basic/linux/batman_adv.h create mode 100644 src/basic/linux/btrfs.h create mode 100644 src/basic/linux/btrfs_tree.h create mode 100644 src/basic/linux/can/netlink.h create mode 100644 src/basic/linux/can/vxcan.h create mode 100644 src/basic/linux/cfm_bridge.h create mode 100644 src/basic/linux/fib_rules.h create mode 100644 src/basic/linux/fou.h create mode 100644 src/basic/linux/genetlink.h create mode 100644 src/basic/linux/hdlc/ioctl.h create mode 100644 src/basic/linux/if.h create mode 100644 src/basic/linux/if_addr.h create mode 100644 src/basic/linux/if_bonding.h create mode 100644 src/basic/linux/if_bridge.h create mode 100644 src/basic/linux/if_ether.h create mode 100644 src/basic/linux/if_link.h create mode 100644 src/basic/linux/if_macsec.h create mode 100644 src/basic/linux/if_tun.h create mode 100644 src/basic/linux/if_tunnel.h create mode 100644 src/basic/linux/in.h create mode 100644 src/basic/linux/in6.h create mode 100644 src/basic/linux/ipv6_route.h create mode 100644 src/basic/linux/l2tp.h create mode 100644 src/basic/linux/libc-compat.h create mode 100644 src/basic/linux/mrp_bridge.h create mode 100644 src/basic/linux/netdevice.h create mode 100644 src/basic/linux/netfilter/nf_tables.h create mode 100644 src/basic/linux/netfilter/nfnetlink.h create mode 100644 src/basic/linux/netlink.h create mode 100644 src/basic/linux/nexthop.h create mode 100644 src/basic/linux/nl80211.h create mode 100644 src/basic/linux/pkt_sched.h create mode 100644 src/basic/linux/rtnetlink.h create mode 100644 src/basic/linux/stddef.h create mode 100755 src/basic/linux/update.sh create mode 100644 src/basic/linux/wireguard.h create mode 100644 src/basic/list.h create mode 100644 src/basic/locale-util.c create mode 100644 src/basic/locale-util.h create mode 100644 src/basic/log.c create mode 100644 src/basic/log.h create mode 100644 src/basic/login-util.c create mode 100644 src/basic/login-util.h create mode 100644 src/basic/macro.h create mode 100644 src/basic/math-util.h create mode 100644 src/basic/memfd-util.c create mode 100644 src/basic/memfd-util.h create mode 100644 src/basic/memory-util.c create mode 100644 src/basic/memory-util.h create mode 100644 src/basic/mempool.c create mode 100644 src/basic/mempool.h create mode 100644 src/basic/meson.build create mode 100644 src/basic/missing_audit.h create mode 100644 src/basic/missing_capability.h create mode 100644 src/basic/missing_drm.h create mode 100644 src/basic/missing_fcntl.h create mode 100644 src/basic/missing_fs.h create mode 100644 src/basic/missing_input.h create mode 100644 src/basic/missing_ioprio.h create mode 100644 src/basic/missing_keyctl.h create mode 100644 src/basic/missing_loop.h create mode 100644 src/basic/missing_magic.h create mode 100644 src/basic/missing_mman.h create mode 100644 src/basic/missing_mount.h create mode 100644 src/basic/missing_network.h create mode 100644 src/basic/missing_prctl.h create mode 100644 src/basic/missing_random.h create mode 100644 src/basic/missing_resource.h create mode 100644 src/basic/missing_sched.h create mode 100644 src/basic/missing_securebits.h create mode 100644 src/basic/missing_socket.h create mode 100644 src/basic/missing_stat.h create mode 100644 src/basic/missing_stdlib.h create mode 100644 src/basic/missing_syscall.h create mode 100644 src/basic/missing_syscall_def.h create mode 100644 src/basic/missing_syscalls.py create mode 100644 src/basic/missing_threads.h create mode 100644 src/basic/missing_timerfd.h create mode 100644 src/basic/missing_type.h create mode 100644 src/basic/missing_xfs.h create mode 100644 src/basic/mkdir.c create mode 100644 src/basic/mkdir.h create mode 100644 src/basic/mountpoint-util.c create mode 100644 src/basic/mountpoint-util.h create mode 100644 src/basic/namespace-util.c create mode 100644 src/basic/namespace-util.h create mode 100644 src/basic/nss-util.h create mode 100644 src/basic/nulstr-util.c create mode 100644 src/basic/nulstr-util.h create mode 100644 src/basic/ordered-set.c create mode 100644 src/basic/ordered-set.h create mode 100644 src/basic/os-util.c create mode 100644 src/basic/os-util.h create mode 100644 src/basic/parse-util.c create mode 100644 src/basic/parse-util.h create mode 100644 src/basic/path-lookup.c create mode 100644 src/basic/path-lookup.h create mode 100644 src/basic/path-util.c create mode 100644 src/basic/path-util.h create mode 100644 src/basic/pcapng.h create mode 100644 src/basic/percent-util.c create mode 100644 src/basic/percent-util.h create mode 100644 src/basic/prioq.c create mode 100644 src/basic/prioq.h create mode 100644 src/basic/proc-cmdline.c create mode 100644 src/basic/proc-cmdline.h create mode 100644 src/basic/process-util.c create mode 100644 src/basic/process-util.h create mode 100644 src/basic/procfs-util.c create mode 100644 src/basic/procfs-util.h create mode 100644 src/basic/pthread-util.h create mode 100644 src/basic/random-util.c create mode 100644 src/basic/random-util.h create mode 100644 src/basic/ratelimit.c create mode 100644 src/basic/ratelimit.h create mode 100644 src/basic/raw-clone.h create mode 100644 src/basic/raw-reboot.h create mode 100644 src/basic/recurse-dir.c create mode 100644 src/basic/recurse-dir.h create mode 100644 src/basic/replace-var.c create mode 100644 src/basic/replace-var.h create mode 100644 src/basic/rlimit-util.c create mode 100644 src/basic/rlimit-util.h create mode 100644 src/basic/set.h create mode 100644 src/basic/sigbus.c create mode 100644 src/basic/sigbus.h create mode 100644 src/basic/signal-util.c create mode 100644 src/basic/signal-util.h create mode 100644 src/basic/siphash24.c create mode 100644 src/basic/siphash24.h create mode 100644 src/basic/socket-util.c create mode 100644 src/basic/socket-util.h create mode 100644 src/basic/sort-util.c create mode 100644 src/basic/sort-util.h create mode 100644 src/basic/sparse-endian.h create mode 100644 src/basic/special.h create mode 100644 src/basic/stat-util.c create mode 100644 src/basic/stat-util.h create mode 100644 src/basic/static-destruct.h create mode 100644 src/basic/stdio-util.h create mode 100644 src/basic/strbuf.c create mode 100644 src/basic/strbuf.h create mode 100644 src/basic/string-table.c create mode 100644 src/basic/string-table.h create mode 100644 src/basic/string-util.c create mode 100644 src/basic/string-util.h create mode 100644 src/basic/strv.c create mode 100644 src/basic/strv.h create mode 100644 src/basic/strxcpyx.c create mode 100644 src/basic/strxcpyx.h create mode 100644 src/basic/sync-util.c create mode 100644 src/basic/sync-util.h create mode 100644 src/basic/syscall-list.txt create mode 100644 src/basic/syscalls-alpha.txt create mode 100644 src/basic/syscalls-arc.txt create mode 100644 src/basic/syscalls-arm.txt create mode 100644 src/basic/syscalls-arm64.txt create mode 100644 src/basic/syscalls-i386.txt create mode 100644 src/basic/syscalls-ia64.txt create mode 100644 src/basic/syscalls-loongarch64.txt create mode 100644 src/basic/syscalls-m68k.txt create mode 100644 src/basic/syscalls-mips64.txt create mode 100644 src/basic/syscalls-mips64n32.txt create mode 100644 src/basic/syscalls-mipso32.txt create mode 100644 src/basic/syscalls-parisc.txt create mode 100644 src/basic/syscalls-powerpc.txt create mode 100644 src/basic/syscalls-powerpc64.txt create mode 100644 src/basic/syscalls-riscv32.txt create mode 100644 src/basic/syscalls-riscv64.txt create mode 100644 src/basic/syscalls-s390.txt create mode 100644 src/basic/syscalls-s390x.txt create mode 100644 src/basic/syscalls-sparc.txt create mode 100644 src/basic/syscalls-x86_64.txt create mode 100644 src/basic/sysctl-util.c create mode 100644 src/basic/sysctl-util.h create mode 100644 src/basic/syslog-util.c create mode 100644 src/basic/syslog-util.h create mode 100644 src/basic/terminal-util.c create mode 100644 src/basic/terminal-util.h create mode 100644 src/basic/time-util.c create mode 100644 src/basic/time-util.h create mode 100644 src/basic/tmpfile-util.c create mode 100644 src/basic/tmpfile-util.h create mode 100644 src/basic/uid-range.c create mode 100644 src/basic/uid-range.h create mode 100644 src/basic/umask-util.h create mode 100644 src/basic/unaligned.h create mode 100644 src/basic/unit-def.c create mode 100644 src/basic/unit-def.h create mode 100644 src/basic/unit-file.c create mode 100644 src/basic/unit-file.h create mode 100644 src/basic/unit-name.c create mode 100644 src/basic/unit-name.h create mode 100644 src/basic/user-util.c create mode 100644 src/basic/user-util.h create mode 100644 src/basic/utf8.c create mode 100644 src/basic/utf8.h create mode 100644 src/basic/util.c create mode 100644 src/basic/util.h create mode 100644 src/basic/virt.c create mode 100644 src/basic/virt.h create mode 100644 src/basic/xattr-util.c create mode 100644 src/basic/xattr-util.h (limited to 'src/basic') diff --git a/src/basic/MurmurHash2.c b/src/basic/MurmurHash2.c new file mode 100644 index 0000000..43a89a0 --- /dev/null +++ b/src/basic/MurmurHash2.c @@ -0,0 +1,91 @@ +/* SPDX-License-Identifier: LicenseRef-murmurhash2-public-domain */ +//----------------------------------------------------------------------------- +// MurmurHash2 was written by Austin Appleby, and is placed in the public +// domain. The author hereby disclaims copyright to this source code. + +// Note - This code makes a few assumptions about how your machine behaves - + +// 1. We can read a 4-byte value from any address without crashing +// 2. sizeof(int) == 4 + +// And it has a few limitations - + +// 1. It will not work incrementally. +// 2. It will not produce the same results on little-endian and big-endian +// machines. + +#include "MurmurHash2.h" + +#if __GNUC__ >= 7 +_Pragma("GCC diagnostic ignored \"-Wimplicit-fallthrough\"") +#endif + +//----------------------------------------------------------------------------- +// Platform-specific functions and macros + +// Microsoft Visual Studio + +#if defined(_MSC_VER) + +#define BIG_CONSTANT(x) (x) + +// Other compilers + +#else // defined(_MSC_VER) + +#define BIG_CONSTANT(x) (x##LLU) + +#endif // !defined(_MSC_VER) + +//----------------------------------------------------------------------------- + +uint32_t MurmurHash2 ( const void * key, int len, uint32_t seed ) +{ + // 'm' and 'r' are mixing constants generated offline. + // They're not really 'magic', they just happen to work well. + + const uint32_t m = 0x5bd1e995; + const int r = 24; + + // Initialize the hash to a 'random' value + + uint32_t h = seed ^ len; + + // Mix 4 bytes at a time into the hash + + const unsigned char * data = (const unsigned char *)key; + + while (len >= 4) + { + uint32_t k = *(uint32_t*)data; + + k *= m; + k ^= k >> r; + k *= m; + + h *= m; + h ^= k; + + data += 4; + len -= 4; + } + + // Handle the last few bytes of the input array + + switch(len) + { + case 3: h ^= data[2] << 16; /* fall through */ + case 2: h ^= data[1] << 8; /* fall through */ + case 1: h ^= data[0]; /* fall through */ + h *= m; + }; + + // Do a few final mixes of the hash to ensure the last few + // bytes are well-incorporated. + + h ^= h >> 13; + h *= m; + h ^= h >> 15; + + return h; +} diff --git a/src/basic/MurmurHash2.h b/src/basic/MurmurHash2.h new file mode 100644 index 0000000..5758b86 --- /dev/null +++ b/src/basic/MurmurHash2.h @@ -0,0 +1,31 @@ +/* SPDX-License-Identifier: LicenseRef-murmurhash2-public-domain */ +//----------------------------------------------------------------------------- +// MurmurHash2 was written by Austin Appleby, and is placed in the public +// domain. The author hereby disclaims copyright to this source code. + +#pragma once + +//----------------------------------------------------------------------------- +// Platform-specific functions and macros + +// Microsoft Visual Studio + +#if defined(_MSC_VER) + +typedef unsigned char uint8_t; +typedef unsigned long uint32_t; +typedef unsigned __int64 uint64_t; + +// Other compilers + +#else // defined(_MSC_VER) + +#include + +#endif // !defined(_MSC_VER) + +//----------------------------------------------------------------------------- + +uint32_t MurmurHash2 ( const void * key, int len, uint32_t seed ); + +//----------------------------------------------------------------------------- diff --git a/src/basic/af-list.c b/src/basic/af-list.c new file mode 100644 index 0000000..a9ab891 --- /dev/null +++ b/src/basic/af-list.c @@ -0,0 +1,52 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include +#include +#include + +#include "af-list.h" +#include "macro.h" + +static const struct af_name* lookup_af(register const char *str, register GPERF_LEN_TYPE len); + +#include "af-from-name.h" +#include "af-to-name.h" + +const char *af_to_name(int id) { + + if (id <= 0) + return NULL; + + if ((size_t) id >= ELEMENTSOF(af_names)) + return NULL; + + return af_names[id]; +} + +int af_from_name(const char *name) { + const struct af_name *sc; + + assert(name); + + sc = lookup_af(name, strlen(name)); + if (!sc) + return -EINVAL; + + return sc->id; +} + +int af_max(void) { + return ELEMENTSOF(af_names); +} + +const char *af_to_ipv4_ipv6(int id) { + /* Pretty often we want to map the address family to the typically used protocol name for IPv4 + + * IPv6. Let's add special helpers for that. */ + return id == AF_INET ? "ipv4" : + id == AF_INET6 ? "ipv6" : NULL; +} + +int af_from_ipv4_ipv6(const char *af) { + return streq_ptr(af, "ipv4") ? AF_INET : + streq_ptr(af, "ipv6") ? AF_INET6 : AF_UNSPEC; +} diff --git a/src/basic/af-list.h b/src/basic/af-list.h new file mode 100644 index 0000000..9592b9e --- /dev/null +++ b/src/basic/af-list.h @@ -0,0 +1,28 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +#include + +#include "string-util.h" + +const char *af_to_name(int id); +int af_from_name(const char *name); + +static inline const char* af_to_name_short(int id) { + const char *f; + + if (id == AF_UNSPEC) + return "*"; + + f = af_to_name(id); + if (!f) + return "unknown"; + + assert(startswith(f, "AF_")); + return f + 3; +} + +const char* af_to_ipv4_ipv6(int id); +int af_from_ipv4_ipv6(const char *af); + +int af_max(void); diff --git a/src/basic/af-to-name.awk b/src/basic/af-to-name.awk new file mode 100644 index 0000000..b9cfbb7 --- /dev/null +++ b/src/basic/af-to-name.awk @@ -0,0 +1,11 @@ +# SPDX-License-Identifier: LGPL-2.1-or-later + +BEGIN{ + print "static const char* const af_names[] = { " +} +!/AF_FILE/ && !/AF_ROUTE/ && !/AF_LOCAL/ { + printf " [%s] = \"%s\",\n", $1, $1 +} +END{ + print "};" +} diff --git a/src/basic/alloc-util.c b/src/basic/alloc-util.c new file mode 100644 index 0000000..6063943 --- /dev/null +++ b/src/basic/alloc-util.c @@ -0,0 +1,108 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include +#include +#include + +#include "alloc-util.h" +#include "macro.h" +#include "memory-util.h" + +void* memdup(const void *p, size_t l) { + void *ret; + + assert(l == 0 || p); + + ret = malloc(l ?: 1); + if (!ret) + return NULL; + + return memcpy_safe(ret, p, l); +} + +void* memdup_suffix0(const void *p, size_t l) { + void *ret; + + assert(l == 0 || p); + + /* The same as memdup() but place a safety NUL byte after the allocated memory */ + + if (_unlikely_(l == SIZE_MAX)) /* prevent overflow */ + return NULL; + + ret = malloc(l + 1); + if (!ret) + return NULL; + + ((uint8_t*) ret)[l] = 0; + return memcpy_safe(ret, p, l); +} + +void* greedy_realloc( + void **p, + size_t need, + size_t size) { + + size_t a, newalloc; + void *q; + + assert(p); + + /* We use malloc_usable_size() for determining the current allocated size. On all systems we care + * about this should be safe to rely on. Should there ever arise the need to avoid relying on this we + * can instead locally fall back to realloc() on every call, rounded up to the next exponent of 2 or + * so. */ + + if (*p && (size == 0 || (MALLOC_SIZEOF_SAFE(*p) / size >= need))) + return *p; + + if (_unlikely_(need > SIZE_MAX/2)) /* Overflow check */ + return NULL; + newalloc = need * 2; + + if (size_multiply_overflow(newalloc, size)) + return NULL; + a = newalloc * size; + + if (a < 64) /* Allocate at least 64 bytes */ + a = 64; + + q = realloc(*p, a); + if (!q) + return NULL; + + return *p = q; +} + +void* greedy_realloc0( + void **p, + size_t need, + size_t size) { + + size_t before, after; + uint8_t *q; + + assert(p); + + before = MALLOC_SIZEOF_SAFE(*p); /* malloc_usable_size() will return 0 on NULL input, as per docs */ + + q = greedy_realloc(p, need, size); + if (!q) + return NULL; + + after = MALLOC_SIZEOF_SAFE(q); + + if (size == 0) /* avoid division by zero */ + before = 0; + else + before = (before / size) * size; /* Round down */ + + if (after > before) + memzero(q + before, after - before); + + return q; +} + +void *expand_to_usable(void *ptr, size_t newsize _unused_) { + return ptr; +} diff --git a/src/basic/alloc-util.h b/src/basic/alloc-util.h new file mode 100644 index 0000000..2659219 --- /dev/null +++ b/src/basic/alloc-util.h @@ -0,0 +1,240 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +#include +#include +#include +#include +#include + +#include "macro.h" + +#if HAS_FEATURE_MEMORY_SANITIZER +# include +#endif + +typedef void (*free_func_t)(void *p); +typedef void* (*mfree_func_t)(void *p); + +/* If for some reason more than 4M are allocated on the stack, let's abort immediately. It's better than + * proceeding and smashing the stack limits. Note that by default RLIMIT_STACK is 8M on Linux. */ +#define ALLOCA_MAX (4U*1024U*1024U) + +#define new(t, n) ((t*) malloc_multiply((n), sizeof(t))) + +#define new0(t, n) ((t*) calloc((n) ?: 1, sizeof(t))) + +#define alloca_safe(n) \ + ({ \ + size_t _nn_ = n; \ + assert(_nn_ <= ALLOCA_MAX); \ + alloca(_nn_ == 0 ? 1 : _nn_); \ + }) \ + +#define newa(t, n) \ + ({ \ + size_t _n_ = n; \ + assert(!size_multiply_overflow(sizeof(t), _n_)); \ + (t*) alloca_safe(sizeof(t)*_n_); \ + }) + +#define newa0(t, n) \ + ({ \ + size_t _n_ = n; \ + assert(!size_multiply_overflow(sizeof(t), _n_)); \ + (t*) alloca0((sizeof(t)*_n_)); \ + }) + +#define newdup(t, p, n) ((t*) memdup_multiply(p, (n), sizeof(t))) + +#define newdup_suffix0(t, p, n) ((t*) memdup_suffix0_multiply(p, (n), sizeof(t))) + +#define malloc0(n) (calloc(1, (n) ?: 1)) + +#define free_and_replace_full(a, b, free_func) \ + ({ \ + typeof(a)* _a = &(a); \ + typeof(b)* _b = &(b); \ + free_func(*_a); \ + *_a = *_b; \ + *_b = NULL; \ + 0; \ + }) + +#define free_and_replace(a, b) \ + free_and_replace_full(a, b, free) + +/* This is similar to free_and_replace_full(), but NULL is not assigned to 'b', and its reference counter is + * increased. */ +#define unref_and_replace_full(a, b, ref_func, unref_func) \ + ({ \ + typeof(a)* _a = &(a); \ + typeof(b) _b = ref_func(b); \ + unref_func(*_a); \ + *_a = _b; \ + 0; \ + }) + +void* memdup(const void *p, size_t l) _alloc_(2); +void* memdup_suffix0(const void *p, size_t l); /* We can't use _alloc_() here, since we return a buffer one byte larger than the specified size */ + +#define memdupa(p, l) \ + ({ \ + void *_q_; \ + size_t _l_ = l; \ + _q_ = alloca_safe(_l_); \ + memcpy_safe(_q_, p, _l_); \ + }) + +#define memdupa_suffix0(p, l) \ + ({ \ + void *_q_; \ + size_t _l_ = l; \ + _q_ = alloca_safe(_l_ + 1); \ + ((uint8_t*) _q_)[_l_] = 0; \ + memcpy_safe(_q_, p, _l_); \ + }) + +static inline void unsetp(void *p) { + /* A trivial "destructor" that can be used in cases where we want to + * unset a pointer from a _cleanup_ function. */ + + *(void**)p = NULL; +} + +static inline void freep(void *p) { + *(void**)p = mfree(*(void**) p); +} + +#define _cleanup_free_ _cleanup_(freep) + +static inline bool size_multiply_overflow(size_t size, size_t need) { + return _unlikely_(need != 0 && size > (SIZE_MAX / need)); +} + +_malloc_ _alloc_(1, 2) static inline void *malloc_multiply(size_t need, size_t size) { + if (size_multiply_overflow(size, need)) + return NULL; + + return malloc(size * need ?: 1); +} + +#if !HAVE_REALLOCARRAY +_alloc_(2, 3) static inline void *reallocarray(void *p, size_t need, size_t size) { + if (size_multiply_overflow(size, need)) + return NULL; + + return realloc(p, size * need ?: 1); +} +#endif + +_alloc_(2, 3) static inline void *memdup_multiply(const void *p, size_t need, size_t size) { + if (size_multiply_overflow(size, need)) + return NULL; + + return memdup(p, size * need); +} + +/* Note that we can't decorate this function with _alloc_() since the returned memory area is one byte larger + * than the product of its parameters. */ +static inline void *memdup_suffix0_multiply(const void *p, size_t need, size_t size) { + if (size_multiply_overflow(size, need)) + return NULL; + + return memdup_suffix0(p, size * need); +} + +void* greedy_realloc(void **p, size_t need, size_t size); +void* greedy_realloc0(void **p, size_t need, size_t size); + +#define GREEDY_REALLOC(array, need) \ + greedy_realloc((void**) &(array), (need), sizeof((array)[0])) + +#define GREEDY_REALLOC0(array, need) \ + greedy_realloc0((void**) &(array), (need), sizeof((array)[0])) + +#define alloca0(n) \ + ({ \ + char *_new_; \ + size_t _len_ = n; \ + _new_ = alloca_safe(_len_); \ + memset(_new_, 0, _len_); \ + }) + +/* It's not clear what alignment glibc/gcc alloca() guarantee, hence provide a guaranteed safe version */ +#define alloca_align(size, align) \ + ({ \ + void *_ptr_; \ + size_t _mask_ = (align) - 1; \ + size_t _size_ = size; \ + _ptr_ = alloca_safe(_size_ + _mask_); \ + (void*)(((uintptr_t)_ptr_ + _mask_) & ~_mask_); \ + }) + +#define alloca0_align(size, align) \ + ({ \ + void *_new_; \ + size_t _xsize_ = (size); \ + _new_ = alloca_align(_xsize_, (align)); \ + memset(_new_, 0, _xsize_); \ + }) + +#if HAS_FEATURE_MEMORY_SANITIZER +# define msan_unpoison(r, s) __msan_unpoison(r, s) +#else +# define msan_unpoison(r, s) +#endif + +/* Dummy allocator to tell the compiler that the new size of p is newsize. The implementation returns the + * pointer as is; the only reason for its existence is as a conduit for the _alloc_ attribute. This must not + * be inlined (hence a non-static function with _noinline_ because LTO otherwise tries to inline it) because + * gcc then loses the attributes on the function. + * See: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=96503 */ +void *expand_to_usable(void *p, size_t newsize) _alloc_(2) _returns_nonnull_ _noinline_; + +static inline size_t malloc_sizeof_safe(void **xp) { + if (_unlikely_(!xp || !*xp)) + return 0; + + size_t sz = malloc_usable_size(*xp); + *xp = expand_to_usable(*xp, sz); + /* GCC doesn't see the _returns_nonnull_ when built with ubsan, so yet another hint to make it doubly + * clear that expand_to_usable won't return NULL. + * See: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=79265 */ + if (!*xp) + assert_not_reached(); + return sz; +} + +/* This returns the number of usable bytes in a malloc()ed region as per malloc_usable_size(), which may + * return a value larger than the size that was actually allocated. Access to that additional memory is + * discouraged because it violates the C standard; a compiler cannot see that this as valid. To help the + * compiler out, the MALLOC_SIZEOF_SAFE macro 'allocates' the usable size using a dummy allocator function + * expand_to_usable. There is a possibility of malloc_usable_size() returning different values during the + * lifetime of an object, which may cause problems, but the glibc allocator does not do that at the moment. */ +#define MALLOC_SIZEOF_SAFE(x) \ + malloc_sizeof_safe((void**) &__builtin_choose_expr(__builtin_constant_p(x), (void*) { NULL }, (x))) + +/* Inspired by ELEMENTSOF() but operates on malloc()'ed memory areas: typesafely returns the number of items + * that fit into the specified memory block */ +#define MALLOC_ELEMENTSOF(x) \ + (__builtin_choose_expr( \ + __builtin_types_compatible_p(typeof(x), typeof(&*(x))), \ + MALLOC_SIZEOF_SAFE(x)/sizeof((x)[0]), \ + VOID_0)) + + +/* These are like strdupa()/strndupa(), but honour ALLOCA_MAX */ +#define strdupa_safe(s) \ + ({ \ + const char *_t = (s); \ + (char*) memdupa_suffix0(_t, strlen(_t)); \ + }) + +#define strndupa_safe(s, n) \ + ({ \ + const char *_t = (s); \ + (char*) memdupa_suffix0(_t, strnlen(_t, (n))); \ + }) + +#include "memory-util.h" diff --git a/src/basic/architecture.c b/src/basic/architecture.c new file mode 100644 index 0000000..773ee3c --- /dev/null +++ b/src/basic/architecture.c @@ -0,0 +1,178 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include + +#include "architecture.h" +#include "macro.h" +#include "string-table.h" +#include "string-util.h" + +Architecture uname_architecture(void) { + + /* Return a sanitized enum identifying the architecture we are running on. This + * is based on uname(), and the user may hence control what this returns by using + * personality(). This puts the user in control on systems that can run binaries + * of multiple architectures. + * + * We do not translate the string returned by uname() 1:1. Instead we try to + * clean it up and break down the confusion on x86 and arm in particular. + * + * We try to distinguish CPUs, not CPU features, i.e. actual architectures that + * have genuinely different code. */ + + static const struct { + const char *machine; + Architecture arch; + } arch_map[] = { +#if defined(__aarch64__) || defined(__arm__) + { "aarch64", ARCHITECTURE_ARM64 }, + { "aarch64_be", ARCHITECTURE_ARM64_BE }, + { "armv8l", ARCHITECTURE_ARM }, + { "armv8b", ARCHITECTURE_ARM_BE }, + { "armv7ml", ARCHITECTURE_ARM }, + { "armv7mb", ARCHITECTURE_ARM_BE }, + { "armv7l", ARCHITECTURE_ARM }, + { "armv7b", ARCHITECTURE_ARM_BE }, + { "armv6l", ARCHITECTURE_ARM }, + { "armv6b", ARCHITECTURE_ARM_BE }, + { "armv5tl", ARCHITECTURE_ARM }, + { "armv5tel", ARCHITECTURE_ARM }, + { "armv5tejl", ARCHITECTURE_ARM }, + { "armv5tejb", ARCHITECTURE_ARM_BE }, + { "armv5teb", ARCHITECTURE_ARM_BE }, + { "armv5tb", ARCHITECTURE_ARM_BE }, + { "armv4tl", ARCHITECTURE_ARM }, + { "armv4tb", ARCHITECTURE_ARM_BE }, + { "armv4l", ARCHITECTURE_ARM }, + { "armv4b", ARCHITECTURE_ARM_BE }, + +#elif defined(__alpha__) + { "alpha" , ARCHITECTURE_ALPHA }, + +#elif defined(__arc__) + { "arc", ARCHITECTURE_ARC }, + { "arceb", ARCHITECTURE_ARC_BE }, + +#elif defined(__cris__) + { "crisv32", ARCHITECTURE_CRIS }, + +#elif defined(__i386__) || defined(__x86_64__) + { "x86_64", ARCHITECTURE_X86_64 }, + { "i686", ARCHITECTURE_X86 }, + { "i586", ARCHITECTURE_X86 }, + { "i486", ARCHITECTURE_X86 }, + { "i386", ARCHITECTURE_X86 }, + +#elif defined(__ia64__) + { "ia64", ARCHITECTURE_IA64 }, + +#elif defined(__hppa__) || defined(__hppa64__) + { "parisc64", ARCHITECTURE_PARISC64 }, + { "parisc", ARCHITECTURE_PARISC }, + +#elif defined(__loongarch64) + { "loongarch64", ARCHITECTURE_LOONGARCH64 }, + +#elif defined(__m68k__) + { "m68k", ARCHITECTURE_M68K }, + +#elif defined(__mips__) || defined(__mips64__) + { "mips64", ARCHITECTURE_MIPS64 }, + { "mips", ARCHITECTURE_MIPS }, + +#elif defined(__nios2__) + { "nios2", ARCHITECTURE_NIOS2 }, + +#elif defined(__powerpc__) || defined(__powerpc64__) + { "ppc64le", ARCHITECTURE_PPC64_LE }, + { "ppc64", ARCHITECTURE_PPC64 }, + { "ppcle", ARCHITECTURE_PPC_LE }, + { "ppc", ARCHITECTURE_PPC }, + +#elif defined(__riscv) + { "riscv64", ARCHITECTURE_RISCV64 }, + { "riscv32", ARCHITECTURE_RISCV32 }, +# if __SIZEOF_POINTER__ == 4 + { "riscv", ARCHITECTURE_RISCV32 }, +# elif __SIZEOF_POINTER__ == 8 + { "riscv", ARCHITECTURE_RISCV64 }, +# endif + +#elif defined(__s390__) || defined(__s390x__) + { "s390x", ARCHITECTURE_S390X }, + { "s390", ARCHITECTURE_S390 }, + +#elif defined(__sh__) || defined(__sh64__) + { "sh5", ARCHITECTURE_SH64 }, + { "sh4a", ARCHITECTURE_SH }, + { "sh4", ARCHITECTURE_SH }, + { "sh3", ARCHITECTURE_SH }, + { "sh2a", ARCHITECTURE_SH }, + { "sh2", ARCHITECTURE_SH }, + +#elif defined(__sparc__) + { "sparc64", ARCHITECTURE_SPARC64 }, + { "sparc", ARCHITECTURE_SPARC }, + +#elif defined(__tilegx__) + { "tilegx", ARCHITECTURE_TILEGX }, + +#else +# error "Please register your architecture here!" +#endif + }; + + static Architecture cached = _ARCHITECTURE_INVALID; + struct utsname u; + + if (cached != _ARCHITECTURE_INVALID) + return cached; + + assert_se(uname(&u) >= 0); + + for (size_t i = 0; i < ELEMENTSOF(arch_map); i++) + if (streq(arch_map[i].machine, u.machine)) + return cached = arch_map[i].arch; + + assert_not_reached(); + return _ARCHITECTURE_INVALID; +} + +/* Maintain same order as in the table above. */ +static const char *const architecture_table[_ARCHITECTURE_MAX] = { + [ARCHITECTURE_ARM64] = "arm64", + [ARCHITECTURE_ARM64_BE] = "arm64-be", + [ARCHITECTURE_ARM] = "arm", + [ARCHITECTURE_ARM_BE] = "arm-be", + [ARCHITECTURE_ALPHA] = "alpha", + [ARCHITECTURE_ARC] = "arc", + [ARCHITECTURE_ARC_BE] = "arc-be", + [ARCHITECTURE_CRIS] = "cris", + [ARCHITECTURE_X86_64] = "x86-64", + [ARCHITECTURE_X86] = "x86", + [ARCHITECTURE_IA64] = "ia64", + [ARCHITECTURE_LOONGARCH64] = "loongarch64", + [ARCHITECTURE_M68K] = "m68k", + [ARCHITECTURE_MIPS64_LE] = "mips64-le", + [ARCHITECTURE_MIPS64] = "mips64", + [ARCHITECTURE_MIPS_LE] = "mips-le", + [ARCHITECTURE_MIPS] = "mips", + [ARCHITECTURE_NIOS2] = "nios2", + [ARCHITECTURE_PARISC64] = "parisc64", + [ARCHITECTURE_PARISC] = "parisc", + [ARCHITECTURE_PPC64_LE] = "ppc64-le", + [ARCHITECTURE_PPC64] = "ppc64", + [ARCHITECTURE_PPC] = "ppc", + [ARCHITECTURE_PPC_LE] = "ppc-le", + [ARCHITECTURE_RISCV32] = "riscv32", + [ARCHITECTURE_RISCV64] = "riscv64", + [ARCHITECTURE_S390X] = "s390x", + [ARCHITECTURE_S390] = "s390", + [ARCHITECTURE_SH64] = "sh64", + [ARCHITECTURE_SH] = "sh", + [ARCHITECTURE_SPARC64] = "sparc64", + [ARCHITECTURE_SPARC] = "sparc", + [ARCHITECTURE_TILEGX] = "tilegx", +}; + +DEFINE_STRING_TABLE_LOOKUP(architecture, Architecture); diff --git a/src/basic/architecture.h b/src/basic/architecture.h new file mode 100644 index 0000000..b86f5f3 --- /dev/null +++ b/src/basic/architecture.h @@ -0,0 +1,247 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +#include + +#include "macro.h" +#include "util.h" + +/* A cleaned up architecture definition. We don't want to get lost in + * processor features, models, generations or even ABIs. Hence we + * focus on general family, and distinguish word width and endianness. */ + +typedef enum { + ARCHITECTURE_ALPHA, + ARCHITECTURE_ARC, + ARCHITECTURE_ARC_BE, + ARCHITECTURE_ARM, + ARCHITECTURE_ARM64, + ARCHITECTURE_ARM64_BE, + ARCHITECTURE_ARM_BE, + ARCHITECTURE_CRIS, + ARCHITECTURE_IA64, + ARCHITECTURE_LOONGARCH64, + ARCHITECTURE_M68K, + ARCHITECTURE_MIPS, + ARCHITECTURE_MIPS64, + ARCHITECTURE_MIPS64_LE, + ARCHITECTURE_MIPS_LE, + ARCHITECTURE_NIOS2, + ARCHITECTURE_PARISC, + ARCHITECTURE_PARISC64, + ARCHITECTURE_PPC, + ARCHITECTURE_PPC64, + ARCHITECTURE_PPC64_LE, + ARCHITECTURE_PPC_LE, + ARCHITECTURE_RISCV32, + ARCHITECTURE_RISCV64, + ARCHITECTURE_S390, + ARCHITECTURE_S390X, + ARCHITECTURE_SH, + ARCHITECTURE_SH64, + ARCHITECTURE_SPARC, + ARCHITECTURE_SPARC64, + ARCHITECTURE_TILEGX, + ARCHITECTURE_X86, + ARCHITECTURE_X86_64, + _ARCHITECTURE_MAX, + _ARCHITECTURE_INVALID = -EINVAL, +} Architecture; + +Architecture uname_architecture(void); + +/* + * LIB_ARCH_TUPLE should resolve to the local library path + * architecture tuple systemd is built for, according to the Debian + * tuple list: + * + * https://wiki.debian.org/Multiarch/Tuples + * + * This is used in library search paths that should understand + * Debian's paths on all distributions. + */ + +#if defined(__x86_64__) +# define native_architecture() ARCHITECTURE_X86_64 +# if defined(__ILP32__) +# define LIB_ARCH_TUPLE "x86_64-linux-gnux32" +# else +# define LIB_ARCH_TUPLE "x86_64-linux-gnu" +# endif +# define ARCHITECTURE_SECONDARY ARCHITECTURE_X86 +#elif defined(__i386__) +# define native_architecture() ARCHITECTURE_X86 +# define LIB_ARCH_TUPLE "i386-linux-gnu" +#elif defined(__powerpc64__) +# if __BYTE_ORDER == __BIG_ENDIAN +# define native_architecture() ARCHITECTURE_PPC64 +# define LIB_ARCH_TUPLE "ppc64-linux-gnu" +# define ARCHITECTURE_SECONDARY ARCHITECTURE_PPC +# else +# define native_architecture() ARCHITECTURE_PPC64_LE +# define LIB_ARCH_TUPLE "powerpc64le-linux-gnu" +# define ARCHITECTURE_SECONDARY ARCHITECTURE_PPC_LE +# endif +#elif defined(__powerpc__) +# if __BYTE_ORDER == __BIG_ENDIAN +# define native_architecture() ARCHITECTURE_PPC +# if defined(__NO_FPRS__) +# define LIB_ARCH_TUPLE "powerpc-linux-gnuspe" +# else +# define LIB_ARCH_TUPLE "powerpc-linux-gnu" +# endif +# else +# define native_architecture() ARCHITECTURE_PPC_LE +# error "Missing LIB_ARCH_TUPLE for PPCLE" +# endif +#elif defined(__ia64__) +# define native_architecture() ARCHITECTURE_IA64 +# define LIB_ARCH_TUPLE "ia64-linux-gnu" +#elif defined(__hppa64__) +# define native_architecture() ARCHITECTURE_PARISC64 +# error "Missing LIB_ARCH_TUPLE for HPPA64" +#elif defined(__hppa__) +# define native_architecture() ARCHITECTURE_PARISC +# define LIB_ARCH_TUPLE "hppa‑linux‑gnu" +#elif defined(__s390x__) +# define native_architecture() ARCHITECTURE_S390X +# define LIB_ARCH_TUPLE "s390x-linux-gnu" +# define ARCHITECTURE_SECONDARY ARCHITECTURE_S390 +#elif defined(__s390__) +# define native_architecture() ARCHITECTURE_S390 +# define LIB_ARCH_TUPLE "s390-linux-gnu" +#elif defined(__sparc__) && defined (__arch64__) +# define native_architecture() ARCHITECTURE_SPARC64 +# define LIB_ARCH_TUPLE "sparc64-linux-gnu" +#elif defined(__sparc__) +# define native_architecture() ARCHITECTURE_SPARC +# define LIB_ARCH_TUPLE "sparc-linux-gnu" +#elif defined(__mips64) && defined(__LP64__) +# if __BYTE_ORDER == __BIG_ENDIAN +# define native_architecture() ARCHITECTURE_MIPS64 +# define LIB_ARCH_TUPLE "mips64-linux-gnuabi64" +# else +# define native_architecture() ARCHITECTURE_MIPS64_LE +# define LIB_ARCH_TUPLE "mips64el-linux-gnuabi64" +# endif +#elif defined(__mips64) +# if __BYTE_ORDER == __BIG_ENDIAN +# define native_architecture() ARCHITECTURE_MIPS64 +# define LIB_ARCH_TUPLE "mips64-linux-gnuabin32" +# else +# define native_architecture() ARCHITECTURE_MIPS64_LE +# define LIB_ARCH_TUPLE "mips64el-linux-gnuabin32" +# endif +#elif defined(__mips__) +# if __BYTE_ORDER == __BIG_ENDIAN +# define native_architecture() ARCHITECTURE_MIPS +# define LIB_ARCH_TUPLE "mips-linux-gnu" +# else +# define native_architecture() ARCHITECTURE_MIPS_LE +# define LIB_ARCH_TUPLE "mipsel-linux-gnu" +# endif +#elif defined(__alpha__) +# define native_architecture() ARCHITECTURE_ALPHA +# define LIB_ARCH_TUPLE "alpha-linux-gnu" +#elif defined(__aarch64__) +# if __BYTE_ORDER == __BIG_ENDIAN +# define native_architecture() ARCHITECTURE_ARM64_BE +# define LIB_ARCH_TUPLE "aarch64_be-linux-gnu" +# else +# define native_architecture() ARCHITECTURE_ARM64 +# define LIB_ARCH_TUPLE "aarch64-linux-gnu" +# define ARCHITECTURE_SECONDARY ARCHITECTURE_ARM +# endif +#elif defined(__arm__) +# if __BYTE_ORDER == __BIG_ENDIAN +# define native_architecture() ARCHITECTURE_ARM_BE +# if defined(__ARM_EABI__) +# if defined(__ARM_PCS_VFP) +# define LIB_ARCH_TUPLE "armeb-linux-gnueabihf" +# else +# define LIB_ARCH_TUPLE "armeb-linux-gnueabi" +# endif +# else +# define LIB_ARCH_TUPLE "armeb-linux-gnu" +# endif +# else +# define native_architecture() ARCHITECTURE_ARM +# if defined(__ARM_EABI__) +# if defined(__ARM_PCS_VFP) +# define LIB_ARCH_TUPLE "arm-linux-gnueabihf" +# else +# define LIB_ARCH_TUPLE "arm-linux-gnueabi" +# endif +# else +# define LIB_ARCH_TUPLE "arm-linux-gnu" +# endif +# endif +#elif defined(__sh64__) +# define native_architecture() ARCHITECTURE_SH64 +# error "Missing LIB_ARCH_TUPLE for SH64" +#elif defined(__sh__) +# define native_architecture() ARCHITECTURE_SH +# if defined(__SH1__) +# define LIB_ARCH_TUPLE "sh1-linux-gnu" +# elif defined(__SH2__) +# define LIB_ARCH_TUPLE "sh2-linux-gnu" +# elif defined(__SH2A__) +# define LIB_ARCH_TUPLE "sh2a-linux-gnu" +# elif defined(__SH2E__) +# define LIB_ARCH_TUPLE "sh2e-linux-gnu" +# elif defined(__SH3__) +# define LIB_ARCH_TUPLE "sh3-linux-gnu" +# elif defined(__SH3E__) +# define LIB_ARCH_TUPLE "sh3e-linux-gnu" +# elif defined(__SH4__) && !defined(__SH4A__) +# define LIB_ARCH_TUPLE "sh4-linux-gnu" +# elif defined(__SH4A__) +# define LIB_ARCH_TUPLE "sh4a-linux-gnu" +# endif +#elif defined(__loongarch64) +# define native_architecture() ARCHITECTURE_LOONGARCH64 +# if defined(__loongarch_double_float) +# define LIB_ARCH_TUPLE "loongarch64-linux-gnuf64" +# elif defined(__loongarch_single_float) +# define LIB_ARCH_TUPLE "loongarch64-linux-gnuf32" +# elif defined(__loongarch_soft_float) +# define LIB_ARCH_TUPLE "loongarch64-linux-gnusf" +# else +# error "Unrecognized loongarch architecture variant" +# endif +#elif defined(__m68k__) +# define native_architecture() ARCHITECTURE_M68K +# define LIB_ARCH_TUPLE "m68k-linux-gnu" +#elif defined(__tilegx__) +# define native_architecture() ARCHITECTURE_TILEGX +# define LIB_ARCH_TUPLE "tilegx-linux-gnu" +#elif defined(__cris__) +# define native_architecture() ARCHITECTURE_CRIS +# error "Missing LIB_ARCH_TUPLE for CRIS" +#elif defined(__nios2__) +# define native_architecture() ARCHITECTURE_NIOS2 +# define LIB_ARCH_TUPLE "nios2-linux-gnu" +#elif defined(__riscv) +# if __SIZEOF_POINTER__ == 4 +# define native_architecture() ARCHITECTURE_RISCV32 +# define LIB_ARCH_TUPLE "riscv32-linux-gnu" +# elif __SIZEOF_POINTER__ == 8 +# define native_architecture() ARCHITECTURE_RISCV64 +# define LIB_ARCH_TUPLE "riscv64-linux-gnu" +# else +# error "Unrecognized riscv architecture variant" +# endif +#elif defined(__arc__) +# if __BYTE_ORDER == __BIG_ENDIAN +# define native_architecture() ARCHITECTURE_ARC_BE +# define LIB_ARCH_TUPLE "arceb-linux" +# else +# define native_architecture() ARCHITECTURE_ARC +# define LIB_ARCH_TUPLE "arc-linux" +# endif +#else +# error "Please register your architecture here!" +#endif + +const char *architecture_to_string(Architecture a) _const_; +Architecture architecture_from_string(const char *s) _pure_; diff --git a/src/basic/arphrd-to-name.awk b/src/basic/arphrd-to-name.awk new file mode 100644 index 0000000..302504b --- /dev/null +++ b/src/basic/arphrd-to-name.awk @@ -0,0 +1,14 @@ +# SPDX-License-Identifier: LGPL-2.1-or-later + +BEGIN{ + print "const char *arphrd_to_name(int id) {" + print " switch (id) {" +} +!/^HDLC$/ { + printf " case ARPHRD_%s: return \"%s\";\n", $1, $1 +} +END{ + print " default: return NULL;" + print " }" + print "}" +} diff --git a/src/basic/arphrd-util.c b/src/basic/arphrd-util.c new file mode 100644 index 0000000..3ea2c9d --- /dev/null +++ b/src/basic/arphrd-util.c @@ -0,0 +1,45 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include +#include +#include +#include +#include + +#include "arphrd-util.h" +#include "macro.h" + +static const struct arphrd_name* lookup_arphrd(register const char *str, register GPERF_LEN_TYPE len); + +#include "arphrd-from-name.h" +#include "arphrd-to-name.h" + +int arphrd_from_name(const char *name) { + const struct arphrd_name *sc; + + assert(name); + + sc = lookup_arphrd(name, strlen(name)); + if (!sc) + return -EINVAL; + + return sc->id; +} + +size_t arphrd_to_hw_addr_len(uint16_t arphrd) { + switch (arphrd) { + case ARPHRD_ETHER: + return ETH_ALEN; + case ARPHRD_INFINIBAND: + return INFINIBAND_ALEN; + case ARPHRD_TUNNEL: + case ARPHRD_SIT: + case ARPHRD_IPGRE: + return sizeof(struct in_addr); + case ARPHRD_TUNNEL6: + case ARPHRD_IP6GRE: + return sizeof(struct in6_addr); + default: + return 0; + } +} diff --git a/src/basic/arphrd-util.h b/src/basic/arphrd-util.h new file mode 100644 index 0000000..33f5694 --- /dev/null +++ b/src/basic/arphrd-util.h @@ -0,0 +1,10 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +#include +#include + +const char *arphrd_to_name(int id); +int arphrd_from_name(const char *name); + +size_t arphrd_to_hw_addr_len(uint16_t arphrd); diff --git a/src/basic/async.c b/src/basic/async.c new file mode 100644 index 0000000..443cfa9 --- /dev/null +++ b/src/basic/async.c @@ -0,0 +1,108 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include +#include +#include +#include + +#include "async.h" +#include "errno-util.h" +#include "fd-util.h" +#include "log.h" +#include "macro.h" +#include "process-util.h" +#include "signal-util.h" +#include "util.h" + +int asynchronous_job(void* (*func)(void *p), void *arg) { + sigset_t ss, saved_ss; + pthread_attr_t a; + pthread_t t; + int r, k; + + /* It kinda sucks that we have to resort to threads to implement an asynchronous close(), but well, such is + * life. */ + + r = pthread_attr_init(&a); + if (r > 0) + return -r; + + r = pthread_attr_setdetachstate(&a, PTHREAD_CREATE_DETACHED); + if (r > 0) { + r = -r; + goto finish; + } + + assert_se(sigfillset(&ss) >= 0); + + /* Block all signals before forking off the thread, so that the new thread is started with all signals + * blocked. This way the existence of the new thread won't affect signal handling in other threads. */ + + r = pthread_sigmask(SIG_BLOCK, &ss, &saved_ss); + if (r > 0) { + r = -r; + goto finish; + } + + r = pthread_create(&t, &a, func, arg); + + k = pthread_sigmask(SIG_SETMASK, &saved_ss, NULL); + + if (r > 0) + r = -r; + else if (k > 0) + r = -k; + else + r = 0; + +finish: + pthread_attr_destroy(&a); + return r; +} + +int asynchronous_sync(pid_t *ret_pid) { + int r; + + /* This forks off an invocation of fork() as a child process, in order to initiate synchronization to + * disk. Note that we implement this as helper process rather than thread as we don't want the sync() to hang our + * original process ever, and a thread would do that as the process can't exit with threads hanging in blocking + * syscalls. */ + + r = safe_fork("(sd-sync)", FORK_RESET_SIGNALS|FORK_CLOSE_ALL_FDS, ret_pid); + if (r < 0) + return r; + if (r == 0) { + /* Child process */ + (void) sync(); + _exit(EXIT_SUCCESS); + } + + return 0; +} + +static void *close_thread(void *p) { + (void) pthread_setname_np(pthread_self(), "close"); + + assert_se(close_nointr(PTR_TO_FD(p)) != -EBADF); + return NULL; +} + +int asynchronous_close(int fd) { + int r; + + /* This is supposed to behave similar to safe_close(), but + * actually invoke close() asynchronously, so that it will + * never block. Ideally the kernel would have an API for this, + * but it doesn't, so we work around it, and hide this as a + * far away as we can. */ + + if (fd >= 0) { + PROTECT_ERRNO; + + r = asynchronous_job(close_thread, FD_TO_PTR(fd)); + if (r < 0) + assert_se(close_nointr(fd) != -EBADF); + } + + return -1; +} diff --git a/src/basic/async.h b/src/basic/async.h new file mode 100644 index 0000000..e0bbaa5 --- /dev/null +++ b/src/basic/async.h @@ -0,0 +1,13 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +#include + +#include "macro.h" + +int asynchronous_job(void* (*func)(void *p), void *arg); + +int asynchronous_sync(pid_t *ret_pid); +int asynchronous_close(int fd); + +DEFINE_TRIVIAL_CLEANUP_FUNC(int, asynchronous_close); diff --git a/src/basic/audit-util.c b/src/basic/audit-util.c new file mode 100644 index 0000000..f2dce20 --- /dev/null +++ b/src/basic/audit-util.c @@ -0,0 +1,148 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include +#include +#include +#include +#include + +#include "alloc-util.h" +#include "audit-util.h" +#include "fd-util.h" +#include "fileio.h" +#include "macro.h" +#include "parse-util.h" +#include "process-util.h" +#include "socket-util.h" +#include "user-util.h" + +int audit_session_from_pid(pid_t pid, uint32_t *id) { + _cleanup_free_ char *s = NULL; + const char *p; + uint32_t u; + int r; + + assert(id); + + /* We don't convert ENOENT to ESRCH here, since we can't + * really distinguish between "audit is not available in the + * kernel" and "the process does not exist", both which will + * result in ENOENT. */ + + p = procfs_file_alloca(pid, "sessionid"); + + r = read_one_line_file(p, &s); + if (r < 0) + return r; + + r = safe_atou32(s, &u); + if (r < 0) + return r; + + if (!audit_session_is_valid(u)) + return -ENODATA; + + *id = u; + return 0; +} + +int audit_loginuid_from_pid(pid_t pid, uid_t *uid) { + _cleanup_free_ char *s = NULL; + const char *p; + uid_t u; + int r; + + assert(uid); + + p = procfs_file_alloca(pid, "loginuid"); + + r = read_one_line_file(p, &s); + if (r < 0) + return r; + + r = parse_uid(s, &u); + if (r == -ENXIO) /* the UID was -1 */ + return -ENODATA; + if (r < 0) + return r; + + *uid = u; + return 0; +} + +static int try_audit_request(int fd) { + struct iovec iov; + struct msghdr mh; + ssize_t n; + + assert(fd >= 0); + + struct { + struct nlmsghdr hdr; + struct nlmsgerr err; + } _packed_ msg = { + .hdr.nlmsg_len = NLMSG_LENGTH(0), + .hdr.nlmsg_type = AUDIT_GET_FEATURE, + .hdr.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK, + }; + iov = (struct iovec) { + .iov_base = &msg, + .iov_len = msg.hdr.nlmsg_len, + }; + mh = (struct msghdr) { + .msg_iov = &iov, + .msg_iovlen = 1, + }; + + if (sendmsg(fd, &mh, MSG_NOSIGNAL) < 0) + return -errno; + + iov.iov_len = sizeof(msg); + + n = recvmsg_safe(fd, &mh, 0); + if (n < 0) + return -errno; + if (n != NLMSG_LENGTH(sizeof(struct nlmsgerr))) + return -EIO; + + if (msg.hdr.nlmsg_type != NLMSG_ERROR) + return -EINVAL; + + return msg.err.error; +} + +bool use_audit(void) { + static int cached_use = -1; + int r; + + if (cached_use < 0) { + int fd; + + fd = socket(AF_NETLINK, SOCK_RAW|SOCK_CLOEXEC|SOCK_NONBLOCK, NETLINK_AUDIT); + if (fd < 0) { + cached_use = !IN_SET(errno, EAFNOSUPPORT, EPROTONOSUPPORT, EPERM); + if (!cached_use) + log_debug_errno(errno, "Won't talk to audit: %m"); + } else { + /* If we try and use the audit fd but get -ECONNREFUSED, it is because + * we are not in the initial user namespace, and the kernel does not + * have support for audit outside of the initial user namespace + * (see https://elixir.bootlin.com/linux/latest/C/ident/audit_netlink_ok). + * + * If we receive any other error, do not disable audit because we are not + * sure that the error indicates that audit will not work in general. */ + r = try_audit_request(fd); + if (r < 0) { + cached_use = r != -ECONNREFUSED; + log_debug_errno(r, cached_use ? + "Failed to make request on audit fd, ignoring: %m" : + "Won't talk to audit: %m"); + } else + cached_use = true; + + safe_close(fd); + } + } + + return cached_use; +} diff --git a/src/basic/audit-util.h b/src/basic/audit-util.h new file mode 100644 index 0000000..964082b --- /dev/null +++ b/src/basic/audit-util.h @@ -0,0 +1,17 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +#include +#include +#include + +#define AUDIT_SESSION_INVALID UINT32_MAX + +int audit_session_from_pid(pid_t pid, uint32_t *id); +int audit_loginuid_from_pid(pid_t pid, uid_t *uid); + +bool use_audit(void); + +static inline bool audit_session_is_valid(uint32_t id) { + return id > 0 && id != AUDIT_SESSION_INVALID; +} diff --git a/src/basic/build.c b/src/basic/build.c new file mode 100644 index 0000000..4a15f90 --- /dev/null +++ b/src/basic/build.c @@ -0,0 +1,228 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include "build.h" + +const char* const systemd_features = + + /* PAM and MAC frameworks */ + +#if HAVE_PAM + "+PAM" +#else + "-PAM" +#endif + +#if HAVE_AUDIT + " +AUDIT" +#else + " -AUDIT" +#endif + +#if HAVE_SELINUX + " +SELINUX" +#else + " -SELINUX" +#endif + +#if HAVE_APPARMOR + " +APPARMOR" +#else + " -APPARMOR" +#endif + +#if ENABLE_IMA + " +IMA" +#else + " -IMA" +#endif + +#if ENABLE_SMACK + " +SMACK" +#else + " -SMACK" +#endif + +#if HAVE_SECCOMP + " +SECCOMP" +#else + " -SECCOMP" +#endif + + /* cryptographic libraries */ + +#if HAVE_GCRYPT + " +GCRYPT" +#else + " -GCRYPT" +#endif + +#if HAVE_GNUTLS + " +GNUTLS" +#else + " -GNUTLS" +#endif + +#if HAVE_OPENSSL + " +OPENSSL" +#else + " -OPENSSL" +#endif + + /* all other libraries, sorted alphabetically */ + +#if HAVE_ACL + " +ACL" +#else + " -ACL" +#endif + +#if HAVE_BLKID + " +BLKID" +#else + " -BLKID" +#endif + +#if HAVE_LIBCURL + " +CURL" +#else + " -CURL" +#endif + +#if HAVE_ELFUTILS + " +ELFUTILS" +#else + " -ELFUTILS" +#endif + +#if HAVE_LIBFIDO2 + " +FIDO2" +#else + " -FIDO2" +#endif + +#if HAVE_LIBIDN2 + " +IDN2" +#else + " -IDN2" +#endif + +#if HAVE_LIBIDN + " +IDN" +#else + " -IDN" +#endif + +#if HAVE_LIBIPTC + " +IPTC" +#else + " -IPTC" +#endif + +#if HAVE_KMOD + " +KMOD" +#else + " -KMOD" +#endif + +#if HAVE_LIBCRYPTSETUP + " +LIBCRYPTSETUP" +#else + " -LIBCRYPTSETUP" +#endif + +#if HAVE_LIBFDISK + " +LIBFDISK" +#else + " -LIBFDISK" +#endif + +#if HAVE_PCRE2 + " +PCRE2" +#else + " -PCRE2" +#endif + +#if HAVE_PWQUALITY + " +PWQUALITY" +#else + " -PWQUALITY" +#endif + +#if HAVE_P11KIT + " +P11KIT" +#else + " -P11KIT" +#endif + +#if HAVE_QRENCODE + " +QRENCODE" +#else + " -QRENCODE" +#endif + +#if HAVE_TPM2 + " +TPM2" +#else + " -TPM2" +#endif + + /* compressors */ + +#if HAVE_BZIP2 + " +BZIP2" +#else + " -BZIP2" +#endif + +#if HAVE_LZ4 + " +LZ4" +#else + " -LZ4" +#endif + +#if HAVE_XZ + " +XZ" +#else + " -XZ" +#endif + +#if HAVE_ZLIB + " +ZLIB" +#else + " -ZLIB" +#endif + +#if HAVE_ZSTD + " +ZSTD" +#else + " -ZSTD" +#endif + + /* other stuff that doesn't fit above */ + +#if BPF_FRAMEWORK + " +BPF_FRAMEWORK" +#else + " -BPF_FRAMEWORK" +#endif + +#if HAVE_XKBCOMMON + " +XKBCOMMON" +#else + " -XKBCOMMON" +#endif + +#if ENABLE_UTMP + " +UTMP" +#else + " -UTMP" +#endif + +#if HAVE_SYSV_COMPAT + " +SYSVINIT" +#else + " -SYSVINIT" +#endif + + " default-hierarchy=" DEFAULT_HIERARCHY_NAME + ; diff --git a/src/basic/build.h b/src/basic/build.h new file mode 100644 index 0000000..87276bf --- /dev/null +++ b/src/basic/build.h @@ -0,0 +1,6 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +#include "version.h" + +extern const char* const systemd_features; diff --git a/src/basic/bus-label.c b/src/basic/bus-label.c new file mode 100644 index 0000000..d33fc92 --- /dev/null +++ b/src/basic/bus-label.c @@ -0,0 +1,79 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include + +#include "alloc-util.h" +#include "bus-label.h" +#include "hexdecoct.h" +#include "macro.h" + +char *bus_label_escape(const char *s) { + char *r, *t; + const char *f; + + assert_return(s, NULL); + + /* Escapes all chars that D-Bus' object path cannot deal + * with. Can be reversed with bus_path_unescape(). We special + * case the empty string. */ + + if (*s == 0) + return strdup("_"); + + r = new(char, strlen(s)*3 + 1); + if (!r) + return NULL; + + for (f = s, t = r; *f; f++) { + + /* Escape everything that is not a-zA-Z0-9. We also escape 0-9 if it's the first character */ + + if (!ascii_isalpha(*f) && + !(f > s && ascii_isdigit(*f))) { + *(t++) = '_'; + *(t++) = hexchar(*f >> 4); + *(t++) = hexchar(*f); + } else + *(t++) = *f; + } + + *t = 0; + + return r; +} + +char *bus_label_unescape_n(const char *f, size_t l) { + char *r, *t; + size_t i; + + assert_return(f, NULL); + + /* Special case for the empty string */ + if (l == 1 && *f == '_') + return strdup(""); + + r = new(char, l + 1); + if (!r) + return NULL; + + for (i = 0, t = r; i < l; ++i) { + if (f[i] == '_') { + int a, b; + + if (l - i < 3 || + (a = unhexchar(f[i + 1])) < 0 || + (b = unhexchar(f[i + 2])) < 0) { + /* Invalid escape code, let's take it literal then */ + *(t++) = '_'; + } else { + *(t++) = (char) ((a << 4) | b); + i += 2; + } + } else + *(t++) = f[i]; + } + + *t = 0; + + return r; +} diff --git a/src/basic/bus-label.h b/src/basic/bus-label.h new file mode 100644 index 0000000..446daba --- /dev/null +++ b/src/basic/bus-label.h @@ -0,0 +1,14 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +#include +#include + +#include "string-util.h" + +char *bus_label_escape(const char *s); +char *bus_label_unescape_n(const char *f, size_t l); + +static inline char *bus_label_unescape(const char *f) { + return bus_label_unescape_n(f, strlen_ptr(f)); +} diff --git a/src/basic/cap-list.c b/src/basic/cap-list.c new file mode 100644 index 0000000..fdc7948 --- /dev/null +++ b/src/basic/cap-list.c @@ -0,0 +1,122 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include +#include + +#include "alloc-util.h" +#include "capability-util.h" +#include "cap-list.h" +#include "extract-word.h" +#include "macro.h" +#include "parse-util.h" +#include "stdio-util.h" +#include "util.h" + +static const struct capability_name* lookup_capability(register const char *str, register GPERF_LEN_TYPE len); + +#include "cap-from-name.h" +#include "cap-to-name.h" + +const char *capability_to_name(int id) { + if (id < 0) + return NULL; + + if ((size_t) id >= ELEMENTSOF(capability_names)) + return NULL; + + return capability_names[id]; +} + +int capability_from_name(const char *name) { + const struct capability_name *sc; + int r, i; + + assert(name); + + /* Try to parse numeric capability */ + r = safe_atoi(name, &i); + if (r >= 0) { + if (i >= 0 && i < 64) + return i; + else + return -EINVAL; + } + + /* Try to parse string capability */ + sc = lookup_capability(name, strlen(name)); + if (!sc) + return -EINVAL; + + return sc->id; +} + +/* This is the number of capability names we are *compiled* with. + * For the max capability number of the currently-running kernel, + * use cap_last_cap(). */ +int capability_list_length(void) { + return (int) ELEMENTSOF(capability_names); +} + +int capability_set_to_string_alloc(uint64_t set, char **s) { + _cleanup_free_ char *str = NULL; + size_t n = 0; + + assert(s); + + for (unsigned i = 0; i <= cap_last_cap(); i++) + if (set & (UINT64_C(1) << i)) { + const char *p; + char buf[2 + 16 + 1]; + size_t add; + + p = capability_to_name(i); + if (!p) { + xsprintf(buf, "0x%x", i); + p = buf; + } + + add = strlen(p); + + if (!GREEDY_REALLOC(str, n + add + 2)) + return -ENOMEM; + + strcpy(mempcpy(str + n, p, add), " "); + n += add + 1; + } + + if (!GREEDY_REALLOC(str, n + 1)) + return -ENOMEM; + + str[n > 0 ? n - 1 : 0] = '\0'; /* truncate the last space, if it's there */ + + *s = TAKE_PTR(str); + + return 0; +} + +int capability_set_from_string(const char *s, uint64_t *set) { + uint64_t val = 0; + + assert(set); + + for (const char *p = s;;) { + _cleanup_free_ char *word = NULL; + int r; + + r = extract_first_word(&p, &word, NULL, EXTRACT_UNQUOTE); + if (r == -ENOMEM) + return r; + if (r <= 0) + break; + + r = capability_from_name(word); + if (r < 0) + continue; + + val |= ((uint64_t) UINT64_C(1)) << (uint64_t) r; + } + + *set = val; + + return 0; +} diff --git a/src/basic/cap-list.h b/src/basic/cap-list.h new file mode 100644 index 0000000..71235d6 --- /dev/null +++ b/src/basic/cap-list.h @@ -0,0 +1,11 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +#include + +const char *capability_to_name(int id); +int capability_from_name(const char *name); +int capability_list_length(void); + +int capability_set_to_string_alloc(uint64_t set, char **s); +int capability_set_from_string(const char *s, uint64_t *set); diff --git a/src/basic/cap-to-name.awk b/src/basic/cap-to-name.awk new file mode 100644 index 0000000..bd8a28c --- /dev/null +++ b/src/basic/cap-to-name.awk @@ -0,0 +1,11 @@ +# SPDX-License-Identifier: LGPL-2.1-or-later + +BEGIN{ + print "static const char* const capability_names[] = { " +} +{ + printf " [%s] = \"%s\",\n", $1, tolower($1) +} +END{ + print "};" +} diff --git a/src/basic/capability-util.c b/src/basic/capability-util.c new file mode 100644 index 0000000..c9c0b50 --- /dev/null +++ b/src/basic/capability-util.c @@ -0,0 +1,607 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include +#include +#include +#include +#include + +#include "alloc-util.h" +#include "capability-util.h" +#include "cap-list.h" +#include "fileio.h" +#include "log.h" +#include "macro.h" +#include "missing_prctl.h" +#include "missing_threads.h" +#include "parse-util.h" +#include "user-util.h" +#include "util.h" + +int have_effective_cap(int value) { + _cleanup_cap_free_ cap_t cap = NULL; + cap_flag_value_t fv = CAP_CLEAR; /* To avoid false-positive use-of-uninitialized-value error reported + * by fuzzers. */ + + cap = cap_get_proc(); + if (!cap) + return -errno; + + if (cap_get_flag(cap, value, CAP_EFFECTIVE, &fv) < 0) + return -errno; + + return fv == CAP_SET; +} + +unsigned cap_last_cap(void) { + static thread_local unsigned saved; + static thread_local bool valid = false; + _cleanup_free_ char *content = NULL; + unsigned long p = 0; + int r; + + if (valid) + return saved; + + /* available since linux-3.2 */ + r = read_one_line_file("/proc/sys/kernel/cap_last_cap", &content); + if (r >= 0) { + r = safe_atolu(content, &p); + if (r >= 0) { + + if (p > 63) /* Safety for the future: if one day the kernel learns more than 64 caps, + * then we are in trouble (since we, as much userspace and kernel space + * store capability masks in uint64_t types). Let's hence protect + * ourselves against that and always cap at 63 for now. */ + p = 63; + + saved = p; + valid = true; + return p; + } + } + + /* fall back to syscall-probing for pre linux-3.2 */ + p = MIN((unsigned long) CAP_LAST_CAP, 63U); + + if (prctl(PR_CAPBSET_READ, p) < 0) { + + /* Hmm, look downwards, until we find one that works */ + for (p--; p > 0; p--) + if (prctl(PR_CAPBSET_READ, p) >= 0) + break; + + } else { + + /* Hmm, look upwards, until we find one that doesn't work */ + for (; p < 63; p++) + if (prctl(PR_CAPBSET_READ, p+1) < 0) + break; + } + + saved = p; + valid = true; + + return p; +} + +int capability_update_inherited_set(cap_t caps, uint64_t set) { + /* Add capabilities in the set to the inherited caps, drops capabilities not in the set. + * Do not apply them yet. */ + + for (unsigned i = 0; i <= cap_last_cap(); i++) { + cap_flag_value_t flag = set & (UINT64_C(1) << i) ? CAP_SET : CAP_CLEAR; + cap_value_t v; + + v = (cap_value_t) i; + + if (cap_set_flag(caps, CAP_INHERITABLE, 1, &v, flag) < 0) + return -errno; + } + + return 0; +} + +int capability_ambient_set_apply(uint64_t set, bool also_inherit) { + _cleanup_cap_free_ cap_t caps = NULL; + int r; + + /* Remove capabilities requested in ambient set, but not in the bounding set */ + for (unsigned i = 0; i <= cap_last_cap(); i++) { + if (set == 0) + break; + + if (FLAGS_SET(set, (UINT64_C(1) << i)) && prctl(PR_CAPBSET_READ, i) != 1) { + log_debug("Ambient capability %s requested but missing from bounding set," + " suppressing automatically.", capability_to_name(i)); + set &= ~(UINT64_C(1) << i); + } + } + + /* Add the capabilities to the ambient set (an possibly also the inheritable set) */ + + /* Check that we can use PR_CAP_AMBIENT or quit early. */ + if (!ambient_capabilities_supported()) + return (set & all_capabilities()) == 0 ? + 0 : -EOPNOTSUPP; /* if actually no ambient caps are to be set, be silent, + * otherwise fail recognizably */ + + if (also_inherit) { + caps = cap_get_proc(); + if (!caps) + return -errno; + + r = capability_update_inherited_set(caps, set); + if (r < 0) + return -errno; + + if (cap_set_proc(caps) < 0) + return -errno; + } + + for (unsigned i = 0; i <= cap_last_cap(); i++) { + + if (set & (UINT64_C(1) << i)) { + + /* Add the capability to the ambient set. */ + if (prctl(PR_CAP_AMBIENT, PR_CAP_AMBIENT_RAISE, i, 0, 0) < 0) + return -errno; + } else { + + /* Drop the capability so we don't inherit capabilities we didn't ask for. */ + r = prctl(PR_CAP_AMBIENT, PR_CAP_AMBIENT_IS_SET, i, 0, 0); + if (r < 0) + return -errno; + + if (r) + if (prctl(PR_CAP_AMBIENT, PR_CAP_AMBIENT_LOWER, i, 0, 0) < 0) + return -errno; + + } + } + + return 0; +} + +int capability_gain_cap_setpcap(cap_t *ret_before_caps) { + _cleanup_cap_free_ cap_t caps = NULL; + cap_flag_value_t fv; + caps = cap_get_proc(); + if (!caps) + return -errno; + + if (cap_get_flag(caps, CAP_SETPCAP, CAP_EFFECTIVE, &fv) < 0) + return -errno; + + if (fv != CAP_SET) { + _cleanup_cap_free_ cap_t temp_cap = NULL; + static const cap_value_t v = CAP_SETPCAP; + + temp_cap = cap_dup(caps); + if (!temp_cap) + return -errno; + + if (cap_set_flag(temp_cap, CAP_EFFECTIVE, 1, &v, CAP_SET) < 0) + return -errno; + + if (cap_set_proc(temp_cap) < 0) + log_debug_errno(errno, "Can't acquire effective CAP_SETPCAP bit, ignoring: %m"); + + /* If we didn't manage to acquire the CAP_SETPCAP bit, we continue anyway, after all this just means + * we'll fail later, when we actually intend to drop some capabilities or try to set securebits. */ + } + if (ret_before_caps) + /* Return the capabilities as they have been before setting CAP_SETPCAP */ + *ret_before_caps = TAKE_PTR(caps); + + return 0; +} + +int capability_bounding_set_drop(uint64_t keep, bool right_now) { + _cleanup_cap_free_ cap_t before_cap = NULL, after_cap = NULL; + int r; + + /* If we are run as PID 1 we will lack CAP_SETPCAP by default + * in the effective set (yes, the kernel drops that when + * executing init!), so get it back temporarily so that we can + * call PR_CAPBSET_DROP. */ + + r = capability_gain_cap_setpcap(&before_cap); + if (r < 0) + return r; + + after_cap = cap_dup(before_cap); + if (!after_cap) + return -errno; + + for (unsigned i = 0; i <= cap_last_cap(); i++) { + cap_value_t v; + + if ((keep & (UINT64_C(1) << i))) + continue; + + /* Drop it from the bounding set */ + if (prctl(PR_CAPBSET_DROP, i) < 0) { + r = -errno; + + /* If dropping the capability failed, let's see if we didn't have it in the first place. If so, + * continue anyway, as dropping a capability we didn't have in the first place doesn't really + * matter anyway. */ + if (prctl(PR_CAPBSET_READ, i) != 0) + goto finish; + } + v = (cap_value_t) i; + + /* Also drop it from the inheritable set, so + * that anything we exec() loses the + * capability for good. */ + if (cap_set_flag(after_cap, CAP_INHERITABLE, 1, &v, CAP_CLEAR) < 0) { + r = -errno; + goto finish; + } + + /* If we shall apply this right now drop it + * also from our own capability sets. */ + if (right_now) { + if (cap_set_flag(after_cap, CAP_PERMITTED, 1, &v, CAP_CLEAR) < 0 || + cap_set_flag(after_cap, CAP_EFFECTIVE, 1, &v, CAP_CLEAR) < 0) { + r = -errno; + goto finish; + } + } + } + + r = 0; + +finish: + if (cap_set_proc(after_cap) < 0) { + /* If there are no actual changes anyway then let's ignore this error. */ + if (cap_compare(before_cap, after_cap) != 0) + r = -errno; + } + + return r; +} + +static int drop_from_file(const char *fn, uint64_t keep) { + _cleanup_free_ char *p = NULL; + uint64_t current, after; + uint32_t hi, lo; + int r, k; + + r = read_one_line_file(fn, &p); + if (r < 0) + return r; + + k = sscanf(p, "%" PRIu32 " %" PRIu32, &lo, &hi); + if (k != 2) + return -EIO; + + current = (uint64_t) lo | ((uint64_t) hi << 32); + after = current & keep; + + if (current == after) + return 0; + + lo = after & UINT32_C(0xFFFFFFFF); + hi = (after >> 32) & UINT32_C(0xFFFFFFFF); + + return write_string_filef(fn, 0, "%" PRIu32 " %" PRIu32, lo, hi); +} + +int capability_bounding_set_drop_usermode(uint64_t keep) { + int r; + + r = drop_from_file("/proc/sys/kernel/usermodehelper/inheritable", keep); + if (r < 0) + return r; + + r = drop_from_file("/proc/sys/kernel/usermodehelper/bset", keep); + if (r < 0) + return r; + + return r; +} + +int drop_privileges(uid_t uid, gid_t gid, uint64_t keep_capabilities) { + int r; + + /* Unfortunately we cannot leave privilege dropping to PID 1 here, since we want to run as user but + * want to keep some capabilities. Since file capabilities have been introduced this cannot be done + * across exec() anymore, unless our binary has the capability configured in the file system, which + * we want to avoid. */ + + if (setresgid(gid, gid, gid) < 0) + return log_error_errno(errno, "Failed to change group ID: %m"); + + r = maybe_setgroups(0, NULL); + if (r < 0) + return log_error_errno(r, "Failed to drop auxiliary groups list: %m"); + + /* Ensure we keep the permitted caps across the setresuid(). Note that we do this even if we actually + * don't want to keep any capabilities, since we want to be able to drop them from the bounding set + * too, and we can only do that if we have capabilities. */ + if (prctl(PR_SET_KEEPCAPS, 1) < 0) + return log_error_errno(errno, "Failed to enable keep capabilities flag: %m"); + + if (setresuid(uid, uid, uid) < 0) + return log_error_errno(errno, "Failed to change user ID: %m"); + + if (prctl(PR_SET_KEEPCAPS, 0) < 0) + return log_error_errno(errno, "Failed to disable keep capabilities flag: %m"); + + /* Drop all caps from the bounding set (as well as the inheritable/permitted/effective sets), except + * the ones we want to keep */ + r = capability_bounding_set_drop(keep_capabilities, true); + if (r < 0) + return log_error_errno(r, "Failed to drop capabilities: %m"); + + /* Now upgrade the permitted caps we still kept to effective caps */ + if (keep_capabilities != 0) { + cap_value_t bits[log2u64(keep_capabilities) + 1]; + _cleanup_cap_free_ cap_t d = NULL; + unsigned i, j = 0; + + d = cap_init(); + if (!d) + return log_oom(); + + for (i = 0; i < ELEMENTSOF(bits); i++) + if (keep_capabilities & (1ULL << i)) + bits[j++] = i; + + /* use enough bits */ + assert(i == 64 || (keep_capabilities >> i) == 0); + /* don't use too many bits */ + assert(keep_capabilities & (UINT64_C(1) << (i - 1))); + + if (cap_set_flag(d, CAP_EFFECTIVE, j, bits, CAP_SET) < 0 || + cap_set_flag(d, CAP_PERMITTED, j, bits, CAP_SET) < 0) + return log_error_errno(errno, "Failed to enable capabilities bits: %m"); + + if (cap_set_proc(d) < 0) + return log_error_errno(errno, "Failed to increase capabilities: %m"); + } + + return 0; +} + +int drop_capability(cap_value_t cv) { + _cleanup_cap_free_ cap_t tmp_cap = NULL; + + tmp_cap = cap_get_proc(); + if (!tmp_cap) + return -errno; + + if ((cap_set_flag(tmp_cap, CAP_INHERITABLE, 1, &cv, CAP_CLEAR) < 0) || + (cap_set_flag(tmp_cap, CAP_PERMITTED, 1, &cv, CAP_CLEAR) < 0) || + (cap_set_flag(tmp_cap, CAP_EFFECTIVE, 1, &cv, CAP_CLEAR) < 0)) + return -errno; + + if (cap_set_proc(tmp_cap) < 0) + return -errno; + + return 0; +} + +bool ambient_capabilities_supported(void) { + static int cache = -1; + + if (cache >= 0) + return cache; + + /* If PR_CAP_AMBIENT returns something valid, or an unexpected error code we assume that ambient caps are + * available. */ + + cache = prctl(PR_CAP_AMBIENT, PR_CAP_AMBIENT_IS_SET, CAP_KILL, 0, 0) >= 0 || + !IN_SET(errno, EINVAL, EOPNOTSUPP, ENOSYS); + + return cache; +} + +bool capability_quintet_mangle(CapabilityQuintet *q) { + uint64_t combined, drop = 0; + bool ambient_supported; + + assert(q); + + combined = q->effective | q->bounding | q->inheritable | q->permitted; + + ambient_supported = q->ambient != UINT64_MAX; + if (ambient_supported) + combined |= q->ambient; + + for (unsigned i = 0; i <= cap_last_cap(); i++) { + unsigned long bit = UINT64_C(1) << i; + if (!FLAGS_SET(combined, bit)) + continue; + + if (prctl(PR_CAPBSET_READ, i) > 0) + continue; + + drop |= bit; + + log_debug("Not in the current bounding set: %s", capability_to_name(i)); + } + + q->effective &= ~drop; + q->bounding &= ~drop; + q->inheritable &= ~drop; + q->permitted &= ~drop; + + if (ambient_supported) + q->ambient &= ~drop; + + return drop != 0; /* Let the caller know we changed something */ +} + +int capability_quintet_enforce(const CapabilityQuintet *q) { + _cleanup_cap_free_ cap_t c = NULL, modified = NULL; + int r; + + if (q->ambient != UINT64_MAX) { + bool changed = false; + + c = cap_get_proc(); + if (!c) + return -errno; + + /* In order to raise the ambient caps set we first need to raise the matching + * inheritable + permitted cap */ + for (unsigned i = 0; i <= cap_last_cap(); i++) { + uint64_t m = UINT64_C(1) << i; + cap_value_t cv = (cap_value_t) i; + cap_flag_value_t old_value_inheritable, old_value_permitted; + + if ((q->ambient & m) == 0) + continue; + + if (cap_get_flag(c, cv, CAP_INHERITABLE, &old_value_inheritable) < 0) + return -errno; + if (cap_get_flag(c, cv, CAP_PERMITTED, &old_value_permitted) < 0) + return -errno; + + if (old_value_inheritable == CAP_SET && old_value_permitted == CAP_SET) + continue; + + if (cap_set_flag(c, CAP_INHERITABLE, 1, &cv, CAP_SET) < 0) + return -errno; + if (cap_set_flag(c, CAP_PERMITTED, 1, &cv, CAP_SET) < 0) + return -errno; + + changed = true; + } + + if (changed) + if (cap_set_proc(c) < 0) + return -errno; + + r = capability_ambient_set_apply(q->ambient, false); + if (r < 0) + return r; + } + + if (q->inheritable != UINT64_MAX || q->permitted != UINT64_MAX || q->effective != UINT64_MAX) { + bool changed = false; + + if (!c) { + c = cap_get_proc(); + if (!c) + return -errno; + } + + for (unsigned i = 0; i <= cap_last_cap(); i++) { + uint64_t m = UINT64_C(1) << i; + cap_value_t cv = (cap_value_t) i; + + if (q->inheritable != UINT64_MAX) { + cap_flag_value_t old_value, new_value; + + if (cap_get_flag(c, cv, CAP_INHERITABLE, &old_value) < 0) { + if (errno == EINVAL) /* If the kernel knows more caps than this + * version of libcap, then this will return + * EINVAL. In that case, simply ignore it, + * pretend it doesn't exist. */ + continue; + + return -errno; + } + + new_value = (q->inheritable & m) ? CAP_SET : CAP_CLEAR; + + if (old_value != new_value) { + changed = true; + + if (cap_set_flag(c, CAP_INHERITABLE, 1, &cv, new_value) < 0) + return -errno; + } + } + + if (q->permitted != UINT64_MAX) { + cap_flag_value_t old_value, new_value; + + if (cap_get_flag(c, cv, CAP_PERMITTED, &old_value) < 0) { + if (errno == EINVAL) + continue; + + return -errno; + } + + new_value = (q->permitted & m) ? CAP_SET : CAP_CLEAR; + + if (old_value != new_value) { + changed = true; + + if (cap_set_flag(c, CAP_PERMITTED, 1, &cv, new_value) < 0) + return -errno; + } + } + + if (q->effective != UINT64_MAX) { + cap_flag_value_t old_value, new_value; + + if (cap_get_flag(c, cv, CAP_EFFECTIVE, &old_value) < 0) { + if (errno == EINVAL) + continue; + + return -errno; + } + + new_value = (q->effective & m) ? CAP_SET : CAP_CLEAR; + + if (old_value != new_value) { + changed = true; + + if (cap_set_flag(c, CAP_EFFECTIVE, 1, &cv, new_value) < 0) + return -errno; + } + } + } + + if (changed) { + /* In order to change the bounding caps, we need to keep CAP_SETPCAP for a bit + * longer. Let's add it to our list hence for now. */ + if (q->bounding != UINT64_MAX) { + cap_value_t cv = CAP_SETPCAP; + + modified = cap_dup(c); + if (!modified) + return -ENOMEM; + + if (cap_set_flag(modified, CAP_PERMITTED, 1, &cv, CAP_SET) < 0) + return -errno; + if (cap_set_flag(modified, CAP_EFFECTIVE, 1, &cv, CAP_SET) < 0) + return -errno; + + if (cap_compare(modified, c) == 0) { + /* No change? then drop this nonsense again */ + cap_free(modified); + modified = NULL; + } + } + + /* Now, let's enforce the caps for the first time. Note that this is where we acquire + * caps in any of the sets we currently don't have. We have to do this before + * dropping the bounding caps below, since at that point we can never acquire new + * caps in inherited/permitted/effective anymore, but only lose them. */ + if (cap_set_proc(modified ?: c) < 0) + return -errno; + } + } + + if (q->bounding != UINT64_MAX) { + r = capability_bounding_set_drop(q->bounding, false); + if (r < 0) + return r; + } + + /* If needed, let's now set the caps again, this time in the final version, which differs from what + * we have already set only in the CAP_SETPCAP bit, which we needed for dropping the bounding + * bits. This call only undoes bits and doesn't acquire any which means the bounding caps don't + * matter. */ + if (modified) + if (cap_set_proc(c) < 0) + return -errno; + + return 0; +} diff --git a/src/basic/capability-util.h b/src/basic/capability-util.h new file mode 100644 index 0000000..dbce545 --- /dev/null +++ b/src/basic/capability-util.h @@ -0,0 +1,78 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +#include +#include +#include +#include + +#include "macro.h" +#include "missing_capability.h" +#include "util.h" + +#define CAP_ALL UINT64_MAX + +unsigned cap_last_cap(void); +int have_effective_cap(int value); +int capability_gain_cap_setpcap(cap_t *return_caps); +int capability_bounding_set_drop(uint64_t keep, bool right_now); +int capability_bounding_set_drop_usermode(uint64_t keep); + +int capability_ambient_set_apply(uint64_t set, bool also_inherit); +int capability_update_inherited_set(cap_t caps, uint64_t ambient_set); + +int drop_privileges(uid_t uid, gid_t gid, uint64_t keep_capabilities); + +int drop_capability(cap_value_t cv); + +DEFINE_TRIVIAL_CLEANUP_FUNC_FULL(cap_t, cap_free, NULL); +#define _cleanup_cap_free_ _cleanup_(cap_freep) + +static inline void cap_free_charpp(char **p) { + if (*p) + cap_free(*p); +} +#define _cleanup_cap_free_charp_ _cleanup_(cap_free_charpp) + +static inline uint64_t all_capabilities(void) { + return UINT64_MAX >> (63 - cap_last_cap()); +} + +static inline bool cap_test_all(uint64_t caps) { + return FLAGS_SET(caps, all_capabilities()); +} + +bool ambient_capabilities_supported(void); + +/* Identical to linux/capability.h's CAP_TO_MASK(), but uses an unsigned 1U instead of a signed 1 for shifting left, in + * order to avoid complaints about shifting a signed int left by 31 bits, which would make it negative. */ +#define CAP_TO_MASK_CORRECTED(x) (1U << ((x) & 31U)) + +typedef struct CapabilityQuintet { + /* Stores all five types of capabilities in one go. Note that we use UINT64_MAX for unset here. This hence + * needs to be updated as soon as Linux learns more than 63 caps. */ + uint64_t effective; + uint64_t bounding; + uint64_t inheritable; + uint64_t permitted; + uint64_t ambient; +} CapabilityQuintet; + +assert_cc(CAP_LAST_CAP < 64); + +#define CAPABILITY_QUINTET_NULL { UINT64_MAX, UINT64_MAX, UINT64_MAX, UINT64_MAX, UINT64_MAX } + +static inline bool capability_quintet_is_set(const CapabilityQuintet *q) { + return q->effective != UINT64_MAX || + q->bounding != UINT64_MAX || + q->inheritable != UINT64_MAX || + q->permitted != UINT64_MAX || + q->ambient != UINT64_MAX; +} + +/* Mangles the specified caps quintet taking the current bounding set into account: + * drops all caps from all five sets if our bounding set doesn't allow them. + * Returns true if the quintet was modified. */ +bool capability_quintet_mangle(CapabilityQuintet *q); + +int capability_quintet_enforce(const CapabilityQuintet *q); diff --git a/src/basic/cgroup-util.c b/src/basic/cgroup-util.c new file mode 100644 index 0000000..0b4731f --- /dev/null +++ b/src/basic/cgroup-util.c @@ -0,0 +1,2309 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "alloc-util.h" +#include "cgroup-util.h" +#include "def.h" +#include "dirent-util.h" +#include "extract-word.h" +#include "fd-util.h" +#include "fileio.h" +#include "format-util.h" +#include "fs-util.h" +#include "log.h" +#include "login-util.h" +#include "macro.h" +#include "missing_magic.h" +#include "missing_threads.h" +#include "mkdir.h" +#include "parse-util.h" +#include "path-util.h" +#include "process-util.h" +#include "set.h" +#include "special.h" +#include "stat-util.h" +#include "stdio-util.h" +#include "string-table.h" +#include "string-util.h" +#include "strv.h" +#include "unit-name.h" +#include "user-util.h" +#include "xattr-util.h" + +static int cg_enumerate_items(const char *controller, const char *path, FILE **_f, const char *item) { + _cleanup_free_ char *fs = NULL; + FILE *f; + int r; + + assert(_f); + + r = cg_get_path(controller, path, item, &fs); + if (r < 0) + return r; + + f = fopen(fs, "re"); + if (!f) + return -errno; + + *_f = f; + return 0; +} + +int cg_enumerate_processes(const char *controller, const char *path, FILE **_f) { + return cg_enumerate_items(controller, path, _f, "cgroup.procs"); +} + +int cg_read_pid(FILE *f, pid_t *_pid) { + unsigned long ul; + + /* Note that the cgroup.procs might contain duplicates! See + * cgroups.txt for details. */ + + assert(f); + assert(_pid); + + errno = 0; + if (fscanf(f, "%lu", &ul) != 1) { + + if (feof(f)) + return 0; + + return errno_or_else(EIO); + } + + if (ul <= 0) + return -EIO; + + *_pid = (pid_t) ul; + return 1; +} + +int cg_read_event( + const char *controller, + const char *path, + const char *event, + char **ret) { + + _cleanup_free_ char *events = NULL, *content = NULL; + int r; + + r = cg_get_path(controller, path, "cgroup.events", &events); + if (r < 0) + return r; + + r = read_full_virtual_file(events, &content, NULL); + if (r < 0) + return r; + + for (const char *p = content;;) { + _cleanup_free_ char *line = NULL, *key = NULL, *val = NULL; + const char *q; + + r = extract_first_word(&p, &line, "\n", 0); + if (r < 0) + return r; + if (r == 0) + return -ENOENT; + + q = line; + r = extract_first_word(&q, &key, " ", 0); + if (r < 0) + return r; + if (r == 0) + return -EINVAL; + + if (!streq(key, event)) + continue; + + val = strdup(q); + if (!val) + return -ENOMEM; + + *ret = TAKE_PTR(val); + return 0; + } +} + +bool cg_ns_supported(void) { + static thread_local int enabled = -1; + + if (enabled >= 0) + return enabled; + + if (access("/proc/self/ns/cgroup", F_OK) < 0) { + if (errno != ENOENT) + log_debug_errno(errno, "Failed to check whether /proc/self/ns/cgroup is available, assuming not: %m"); + enabled = false; + } else + enabled = true; + + return enabled; +} + +bool cg_freezer_supported(void) { + static thread_local int supported = -1; + + if (supported >= 0) + return supported; + + supported = cg_all_unified() > 0 && access("/sys/fs/cgroup/init.scope/cgroup.freeze", F_OK) == 0; + + return supported; +} + +bool cg_kill_supported(void) { + static thread_local int supported = -1; + + if (supported >= 0) + return supported; + + if (cg_all_unified() <= 0) + supported = false; + else if (access("/sys/fs/cgroup/init.scope/cgroup.kill", F_OK) < 0) { + if (errno != ENOENT) + log_debug_errno(errno, "Failed to check if cgroup.kill is available, assuming not: %m"); + supported = false; + } else + supported = true; + + return supported; +} + +int cg_enumerate_subgroups(const char *controller, const char *path, DIR **_d) { + _cleanup_free_ char *fs = NULL; + int r; + DIR *d; + + assert(_d); + + /* This is not recursive! */ + + r = cg_get_path(controller, path, NULL, &fs); + if (r < 0) + return r; + + d = opendir(fs); + if (!d) + return -errno; + + *_d = d; + return 0; +} + +int cg_read_subgroup(DIR *d, char **fn) { + assert(d); + assert(fn); + + FOREACH_DIRENT_ALL(de, d, return -errno) { + char *b; + + if (de->d_type != DT_DIR) + continue; + + if (dot_or_dot_dot(de->d_name)) + continue; + + b = strdup(de->d_name); + if (!b) + return -ENOMEM; + + *fn = b; + return 1; + } + + return 0; +} + +int cg_rmdir(const char *controller, const char *path) { + _cleanup_free_ char *p = NULL; + int r; + + r = cg_get_path(controller, path, NULL, &p); + if (r < 0) + return r; + + r = rmdir(p); + if (r < 0 && errno != ENOENT) + return -errno; + + r = cg_hybrid_unified(); + if (r <= 0) + return r; + + if (streq(controller, SYSTEMD_CGROUP_CONTROLLER)) { + r = cg_rmdir(SYSTEMD_CGROUP_CONTROLLER_LEGACY, path); + if (r < 0) + log_warning_errno(r, "Failed to remove compat systemd cgroup %s: %m", path); + } + + return 0; +} + +static int cg_kill_items( + const char *controller, + const char *path, + int sig, + CGroupFlags flags, + Set *s, + cg_kill_log_func_t log_kill, + void *userdata, + const char *item) { + + _cleanup_set_free_ Set *allocated_set = NULL; + bool done = false; + int r, ret = 0, ret_log_kill = 0; + pid_t my_pid; + + assert(sig >= 0); + + /* Don't send SIGCONT twice. Also, SIGKILL always works even when process is suspended, hence don't send + * SIGCONT on SIGKILL. */ + if (IN_SET(sig, SIGCONT, SIGKILL)) + flags &= ~CGROUP_SIGCONT; + + /* This goes through the tasks list and kills them all. This + * is repeated until no further processes are added to the + * tasks list, to properly handle forking processes */ + + if (!s) { + s = allocated_set = set_new(NULL); + if (!s) + return -ENOMEM; + } + + my_pid = getpid_cached(); + + do { + _cleanup_fclose_ FILE *f = NULL; + pid_t pid = 0; + done = true; + + r = cg_enumerate_items(controller, path, &f, item); + if (r < 0) { + if (ret >= 0 && r != -ENOENT) + return r; + + return ret; + } + + while ((r = cg_read_pid(f, &pid)) > 0) { + + if ((flags & CGROUP_IGNORE_SELF) && pid == my_pid) + continue; + + if (set_get(s, PID_TO_PTR(pid)) == PID_TO_PTR(pid)) + continue; + + if (log_kill) + ret_log_kill = log_kill(pid, sig, userdata); + + /* If we haven't killed this process yet, kill + * it */ + if (kill(pid, sig) < 0) { + if (ret >= 0 && errno != ESRCH) + ret = -errno; + } else { + if (flags & CGROUP_SIGCONT) + (void) kill(pid, SIGCONT); + + if (ret == 0) { + if (log_kill) + ret = ret_log_kill; + else + ret = 1; + } + } + + done = false; + + r = set_put(s, PID_TO_PTR(pid)); + if (r < 0) { + if (ret >= 0) + return r; + + return ret; + } + } + + if (r < 0) { + if (ret >= 0) + return r; + + return ret; + } + + /* To avoid racing against processes which fork + * quicker than we can kill them we repeat this until + * no new pids need to be killed. */ + + } while (!done); + + return ret; +} + +int cg_kill( + const char *controller, + const char *path, + int sig, + CGroupFlags flags, + Set *s, + cg_kill_log_func_t log_kill, + void *userdata) { + + int r, ret; + + r = cg_kill_items(controller, path, sig, flags, s, log_kill, userdata, "cgroup.procs"); + if (r < 0 || sig != SIGKILL) + return r; + + ret = r; + + /* Only in case of killing with SIGKILL and when using cgroupsv2, kill remaining threads manually as + a workaround for kernel bug. It was fixed in 5.2-rc5 (c03cd7738a83), backported to 4.19.66 + (4340d175b898) and 4.14.138 (feb6b123b7dd). */ + r = cg_unified_controller(controller); + if (r < 0) + return r; + if (r == 0) + return ret; + + r = cg_kill_items(controller, path, sig, flags, s, log_kill, userdata, "cgroup.threads"); + if (r < 0) + return r; + + return r > 0 || ret > 0; +} + +int cg_kill_kernel_sigkill(const char *controller, const char *path) { + /* Kills the cgroup at `path` directly by writing to its cgroup.kill file. + * This sends SIGKILL to all processes in the cgroup and has the advantage of + * being completely atomic, unlike cg_kill_items. */ + int r; + _cleanup_free_ char *killfile = NULL; + + assert(path); + + if (!cg_kill_supported()) + return -EOPNOTSUPP; + + r = cg_get_path(controller, path, "cgroup.kill", &killfile); + if (r < 0) + return r; + + r = write_string_file(killfile, "1", WRITE_STRING_FILE_DISABLE_BUFFER); + if (r < 0) + return r; + + return 0; +} + +int cg_kill_recursive( + const char *controller, + const char *path, + int sig, + CGroupFlags flags, + Set *s, + cg_kill_log_func_t log_kill, + void *userdata) { + + _cleanup_set_free_ Set *allocated_set = NULL; + _cleanup_closedir_ DIR *d = NULL; + int r, ret; + char *fn; + + assert(path); + assert(sig >= 0); + + if (sig == SIGKILL && cg_kill_supported() && + !FLAGS_SET(flags, CGROUP_IGNORE_SELF) && !s && !log_kill) { + /* ignore CGROUP_SIGCONT, since this is a no-op alongside SIGKILL */ + ret = cg_kill_kernel_sigkill(controller, path); + if (ret < 0) + return ret; + } else { + if (!s) { + s = allocated_set = set_new(NULL); + if (!s) + return -ENOMEM; + } + + ret = cg_kill(controller, path, sig, flags, s, log_kill, userdata); + + r = cg_enumerate_subgroups(controller, path, &d); + if (r < 0) { + if (ret >= 0 && r != -ENOENT) + return r; + + return ret; + } + + while ((r = cg_read_subgroup(d, &fn)) > 0) { + _cleanup_free_ char *p = NULL; + + p = path_join(empty_to_root(path), fn); + free(fn); + if (!p) + return -ENOMEM; + + r = cg_kill_recursive(controller, p, sig, flags, s, log_kill, userdata); + if (r != 0 && ret >= 0) + ret = r; + } + if (ret >= 0 && r < 0) + ret = r; + } + + if (FLAGS_SET(flags, CGROUP_REMOVE)) { + r = cg_rmdir(controller, path); + if (r < 0 && ret >= 0 && !IN_SET(r, -ENOENT, -EBUSY)) + return r; + } + + return ret; +} + +static const char *controller_to_dirname(const char *controller) { + assert(controller); + + /* Converts a controller name to the directory name below /sys/fs/cgroup/ we want to mount it + * to. Effectively, this just cuts off the name= prefixed used for named hierarchies, if it is + * specified. */ + + if (streq(controller, SYSTEMD_CGROUP_CONTROLLER)) { + if (cg_hybrid_unified() > 0) + controller = SYSTEMD_CGROUP_CONTROLLER_HYBRID; + else + controller = SYSTEMD_CGROUP_CONTROLLER_LEGACY; + } + + return startswith(controller, "name=") ?: controller; +} + +static int join_path_legacy(const char *controller, const char *path, const char *suffix, char **ret) { + const char *dn; + char *t = NULL; + + assert(ret); + assert(controller); + + dn = controller_to_dirname(controller); + + if (isempty(path) && isempty(suffix)) + t = path_join("/sys/fs/cgroup", dn); + else if (isempty(path)) + t = path_join("/sys/fs/cgroup", dn, suffix); + else if (isempty(suffix)) + t = path_join("/sys/fs/cgroup", dn, path); + else + t = path_join("/sys/fs/cgroup", dn, path, suffix); + if (!t) + return -ENOMEM; + + *ret = t; + return 0; +} + +static int join_path_unified(const char *path, const char *suffix, char **ret) { + char *t; + + assert(ret); + + if (isempty(path) && isempty(suffix)) + t = strdup("/sys/fs/cgroup"); + else if (isempty(path)) + t = path_join("/sys/fs/cgroup", suffix); + else if (isempty(suffix)) + t = path_join("/sys/fs/cgroup", path); + else + t = path_join("/sys/fs/cgroup", path, suffix); + if (!t) + return -ENOMEM; + + *ret = t; + return 0; +} + +int cg_get_path(const char *controller, const char *path, const char *suffix, char **ret) { + int r; + + assert(ret); + + if (!controller) { + char *t; + + /* If no controller is specified, we return the path *below* the controllers, without any + * prefix. */ + + if (isempty(path) && isempty(suffix)) + return -EINVAL; + + if (isempty(suffix)) + t = strdup(path); + else if (isempty(path)) + t = strdup(suffix); + else + t = path_join(path, suffix); + if (!t) + return -ENOMEM; + + *ret = path_simplify(t); + return 0; + } + + if (!cg_controller_is_valid(controller)) + return -EINVAL; + + r = cg_all_unified(); + if (r < 0) + return r; + if (r > 0) + r = join_path_unified(path, suffix, ret); + else + r = join_path_legacy(controller, path, suffix, ret); + if (r < 0) + return r; + + path_simplify(*ret); + return 0; +} + +static int controller_is_v1_accessible(const char *root, const char *controller) { + const char *cpath, *dn; + + assert(controller); + + dn = controller_to_dirname(controller); + + /* If root if specified, we check that: + * - possible subcgroup is created at root, + * - we can modify the hierarchy. */ + + cpath = strjoina("/sys/fs/cgroup/", dn, root, root ? "/cgroup.procs" : NULL); + return laccess(cpath, root ? W_OK : F_OK); +} + +int cg_get_path_and_check(const char *controller, const char *path, const char *suffix, char **fs) { + int r; + + assert(controller); + assert(fs); + + if (!cg_controller_is_valid(controller)) + return -EINVAL; + + r = cg_all_unified(); + if (r < 0) + return r; + if (r > 0) { + /* In the unified hierarchy all controllers are considered accessible, + * except for the named hierarchies */ + if (startswith(controller, "name=")) + return -EOPNOTSUPP; + } else { + /* Check if the specified controller is actually accessible */ + r = controller_is_v1_accessible(NULL, controller); + if (r < 0) + return r; + } + + return cg_get_path(controller, path, suffix, fs); +} + +int cg_set_xattr(const char *controller, const char *path, const char *name, const void *value, size_t size, int flags) { + _cleanup_free_ char *fs = NULL; + int r; + + assert(path); + assert(name); + assert(value || size <= 0); + + r = cg_get_path(controller, path, NULL, &fs); + if (r < 0) + return r; + + return RET_NERRNO(setxattr(fs, name, value, size, flags)); +} + +int cg_get_xattr(const char *controller, const char *path, const char *name, void *value, size_t size) { + _cleanup_free_ char *fs = NULL; + ssize_t n; + int r; + + assert(path); + assert(name); + + r = cg_get_path(controller, path, NULL, &fs); + if (r < 0) + return r; + + n = getxattr(fs, name, value, size); + if (n < 0) + return -errno; + + return (int) n; +} + +int cg_get_xattr_malloc(const char *controller, const char *path, const char *name, char **ret) { + _cleanup_free_ char *fs = NULL; + int r; + + assert(path); + assert(name); + + r = cg_get_path(controller, path, NULL, &fs); + if (r < 0) + return r; + + r = lgetxattr_malloc(fs, name, ret); + if (r < 0) + return r; + + return r; +} + +int cg_get_xattr_bool(const char *controller, const char *path, const char *name) { + _cleanup_free_ char *val = NULL; + int r; + + assert(path); + assert(name); + + r = cg_get_xattr_malloc(controller, path, name, &val); + if (r < 0) + return r; + + return parse_boolean(val); +} + +int cg_remove_xattr(const char *controller, const char *path, const char *name) { + _cleanup_free_ char *fs = NULL; + int r; + + assert(path); + assert(name); + + r = cg_get_path(controller, path, NULL, &fs); + if (r < 0) + return r; + + return RET_NERRNO(removexattr(fs, name)); +} + +int cg_pid_get_path(const char *controller, pid_t pid, char **ret_path) { + _cleanup_fclose_ FILE *f = NULL; + const char *fs, *controller_str = NULL; /* avoid false maybe-uninitialized warning */ + int unified, r; + + assert(pid >= 0); + assert(ret_path); + + if (controller) { + if (!cg_controller_is_valid(controller)) + return -EINVAL; + } else + controller = SYSTEMD_CGROUP_CONTROLLER; + + unified = cg_unified_controller(controller); + if (unified < 0) + return unified; + if (unified == 0) { + if (streq(controller, SYSTEMD_CGROUP_CONTROLLER)) + controller_str = SYSTEMD_CGROUP_CONTROLLER_LEGACY; + else + controller_str = controller; + } + + fs = procfs_file_alloca(pid, "cgroup"); + r = fopen_unlocked(fs, "re", &f); + if (r == -ENOENT) + return -ESRCH; + if (r < 0) + return r; + + for (;;) { + _cleanup_free_ char *line = NULL; + char *e; + + r = read_line(f, LONG_LINE_MAX, &line); + if (r < 0) + return r; + if (r == 0) + return -ENODATA; + + if (unified) { + e = startswith(line, "0:"); + if (!e) + continue; + + e = strchr(e, ':'); + if (!e) + continue; + } else { + char *l; + + l = strchr(line, ':'); + if (!l) + continue; + + l++; + e = strchr(l, ':'); + if (!e) + continue; + *e = 0; + + assert(controller_str); + r = string_contains_word(l, ",", controller_str); + if (r < 0) + return r; + if (r == 0) + continue; + } + + char *path = strdup(e + 1); + if (!path) + return -ENOMEM; + + /* Truncate suffix indicating the process is a zombie */ + e = endswith(path, " (deleted)"); + if (e) + *e = 0; + + *ret_path = path; + return 0; + } +} + +int cg_install_release_agent(const char *controller, const char *agent) { + _cleanup_free_ char *fs = NULL, *contents = NULL; + const char *sc; + int r; + + assert(agent); + + r = cg_unified_controller(controller); + if (r < 0) + return r; + if (r > 0) /* doesn't apply to unified hierarchy */ + return -EOPNOTSUPP; + + r = cg_get_path(controller, NULL, "release_agent", &fs); + if (r < 0) + return r; + + r = read_one_line_file(fs, &contents); + if (r < 0) + return r; + + sc = strstrip(contents); + if (isempty(sc)) { + r = write_string_file(fs, agent, WRITE_STRING_FILE_DISABLE_BUFFER); + if (r < 0) + return r; + } else if (!path_equal(sc, agent)) + return -EEXIST; + + fs = mfree(fs); + r = cg_get_path(controller, NULL, "notify_on_release", &fs); + if (r < 0) + return r; + + contents = mfree(contents); + r = read_one_line_file(fs, &contents); + if (r < 0) + return r; + + sc = strstrip(contents); + if (streq(sc, "0")) { + r = write_string_file(fs, "1", WRITE_STRING_FILE_DISABLE_BUFFER); + if (r < 0) + return r; + + return 1; + } + + if (!streq(sc, "1")) + return -EIO; + + return 0; +} + +int cg_uninstall_release_agent(const char *controller) { + _cleanup_free_ char *fs = NULL; + int r; + + r = cg_unified_controller(controller); + if (r < 0) + return r; + if (r > 0) /* Doesn't apply to unified hierarchy */ + return -EOPNOTSUPP; + + r = cg_get_path(controller, NULL, "notify_on_release", &fs); + if (r < 0) + return r; + + r = write_string_file(fs, "0", WRITE_STRING_FILE_DISABLE_BUFFER); + if (r < 0) + return r; + + fs = mfree(fs); + + r = cg_get_path(controller, NULL, "release_agent", &fs); + if (r < 0) + return r; + + r = write_string_file(fs, "", WRITE_STRING_FILE_DISABLE_BUFFER); + if (r < 0) + return r; + + return 0; +} + +int cg_is_empty(const char *controller, const char *path) { + _cleanup_fclose_ FILE *f = NULL; + pid_t pid; + int r; + + assert(path); + + r = cg_enumerate_processes(controller, path, &f); + if (r == -ENOENT) + return true; + if (r < 0) + return r; + + r = cg_read_pid(f, &pid); + if (r < 0) + return r; + + return r == 0; +} + +int cg_is_empty_recursive(const char *controller, const char *path) { + int r; + + assert(path); + + /* The root cgroup is always populated */ + if (controller && empty_or_root(path)) + return false; + + r = cg_unified_controller(controller); + if (r < 0) + return r; + if (r > 0) { + _cleanup_free_ char *t = NULL; + + /* On the unified hierarchy we can check empty state + * via the "populated" attribute of "cgroup.events". */ + + r = cg_read_event(controller, path, "populated", &t); + if (r == -ENOENT) + return true; + if (r < 0) + return r; + + return streq(t, "0"); + } else { + _cleanup_closedir_ DIR *d = NULL; + char *fn; + + r = cg_is_empty(controller, path); + if (r <= 0) + return r; + + r = cg_enumerate_subgroups(controller, path, &d); + if (r == -ENOENT) + return true; + if (r < 0) + return r; + + while ((r = cg_read_subgroup(d, &fn)) > 0) { + _cleanup_free_ char *p = NULL; + + p = path_join(path, fn); + free(fn); + if (!p) + return -ENOMEM; + + r = cg_is_empty_recursive(controller, p); + if (r <= 0) + return r; + } + if (r < 0) + return r; + + return true; + } +} + +int cg_split_spec(const char *spec, char **ret_controller, char **ret_path) { + _cleanup_free_ char *controller = NULL, *path = NULL; + + assert(spec); + + if (*spec == '/') { + if (!path_is_normalized(spec)) + return -EINVAL; + + if (ret_path) { + path = strdup(spec); + if (!path) + return -ENOMEM; + + path_simplify(path); + } + + } else { + const char *e; + + e = strchr(spec, ':'); + if (e) { + controller = strndup(spec, e-spec); + if (!controller) + return -ENOMEM; + if (!cg_controller_is_valid(controller)) + return -EINVAL; + + if (!isempty(e + 1)) { + path = strdup(e+1); + if (!path) + return -ENOMEM; + + if (!path_is_normalized(path) || + !path_is_absolute(path)) + return -EINVAL; + + path_simplify(path); + } + + } else { + if (!cg_controller_is_valid(spec)) + return -EINVAL; + + if (ret_controller) { + controller = strdup(spec); + if (!controller) + return -ENOMEM; + } + } + } + + if (ret_controller) + *ret_controller = TAKE_PTR(controller); + if (ret_path) + *ret_path = TAKE_PTR(path); + return 0; +} + +int cg_mangle_path(const char *path, char **result) { + _cleanup_free_ char *c = NULL, *p = NULL; + char *t; + int r; + + assert(path); + assert(result); + + /* First, check if it already is a filesystem path */ + if (path_startswith(path, "/sys/fs/cgroup")) { + + t = strdup(path); + if (!t) + return -ENOMEM; + + *result = path_simplify(t); + return 0; + } + + /* Otherwise, treat it as cg spec */ + r = cg_split_spec(path, &c, &p); + if (r < 0) + return r; + + return cg_get_path(c ?: SYSTEMD_CGROUP_CONTROLLER, p ?: "/", NULL, result); +} + +int cg_get_root_path(char **path) { + char *p, *e; + int r; + + assert(path); + + r = cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER, 1, &p); + if (r < 0) + return r; + + e = endswith(p, "/" SPECIAL_INIT_SCOPE); + if (!e) + e = endswith(p, "/" SPECIAL_SYSTEM_SLICE); /* legacy */ + if (!e) + e = endswith(p, "/system"); /* even more legacy */ + if (e) + *e = 0; + + *path = p; + return 0; +} + +int cg_shift_path(const char *cgroup, const char *root, const char **shifted) { + _cleanup_free_ char *rt = NULL; + char *p; + int r; + + assert(cgroup); + assert(shifted); + + if (!root) { + /* If the root was specified let's use that, otherwise + * let's determine it from PID 1 */ + + r = cg_get_root_path(&rt); + if (r < 0) + return r; + + root = rt; + } + + p = path_startswith(cgroup, root); + if (p && p > cgroup) + *shifted = p - 1; + else + *shifted = cgroup; + + return 0; +} + +int cg_pid_get_path_shifted(pid_t pid, const char *root, char **cgroup) { + _cleanup_free_ char *raw = NULL; + const char *c; + int r; + + assert(pid >= 0); + assert(cgroup); + + r = cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER, pid, &raw); + if (r < 0) + return r; + + r = cg_shift_path(raw, root, &c); + if (r < 0) + return r; + + if (c == raw) + *cgroup = TAKE_PTR(raw); + else { + char *n; + + n = strdup(c); + if (!n) + return -ENOMEM; + + *cgroup = n; + } + + return 0; +} + +int cg_path_decode_unit(const char *cgroup, char **unit) { + char *c, *s; + size_t n; + + assert(cgroup); + assert(unit); + + n = strcspn(cgroup, "/"); + if (n < 3) + return -ENXIO; + + c = strndupa_safe(cgroup, n); + c = cg_unescape(c); + + if (!unit_name_is_valid(c, UNIT_NAME_PLAIN|UNIT_NAME_INSTANCE)) + return -ENXIO; + + s = strdup(c); + if (!s) + return -ENOMEM; + + *unit = s; + return 0; +} + +static bool valid_slice_name(const char *p, size_t n) { + + if (!p) + return false; + + if (n < STRLEN("x.slice")) + return false; + + if (memcmp(p + n - 6, ".slice", 6) == 0) { + char buf[n+1], *c; + + memcpy(buf, p, n); + buf[n] = 0; + + c = cg_unescape(buf); + + return unit_name_is_valid(c, UNIT_NAME_PLAIN); + } + + return false; +} + +static const char *skip_slices(const char *p) { + assert(p); + + /* Skips over all slice assignments */ + + for (;;) { + size_t n; + + p += strspn(p, "/"); + + n = strcspn(p, "/"); + if (!valid_slice_name(p, n)) + return p; + + p += n; + } +} + +int cg_path_get_unit(const char *path, char **ret) { + _cleanup_free_ char *unit = NULL; + const char *e; + int r; + + assert(path); + assert(ret); + + e = skip_slices(path); + + r = cg_path_decode_unit(e, &unit); + if (r < 0) + return r; + + /* We skipped over the slices, don't accept any now */ + if (endswith(unit, ".slice")) + return -ENXIO; + + *ret = TAKE_PTR(unit); + return 0; +} + +int cg_pid_get_unit(pid_t pid, char **unit) { + _cleanup_free_ char *cgroup = NULL; + int r; + + assert(unit); + + r = cg_pid_get_path_shifted(pid, NULL, &cgroup); + if (r < 0) + return r; + + return cg_path_get_unit(cgroup, unit); +} + +/** + * Skip session-*.scope, but require it to be there. + */ +static const char *skip_session(const char *p) { + size_t n; + + if (isempty(p)) + return NULL; + + p += strspn(p, "/"); + + n = strcspn(p, "/"); + if (n < STRLEN("session-x.scope")) + return NULL; + + if (memcmp(p, "session-", 8) == 0 && memcmp(p + n - 6, ".scope", 6) == 0) { + char buf[n - 8 - 6 + 1]; + + memcpy(buf, p + 8, n - 8 - 6); + buf[n - 8 - 6] = 0; + + /* Note that session scopes never need unescaping, + * since they cannot conflict with the kernel's own + * names, hence we don't need to call cg_unescape() + * here. */ + + if (!session_id_valid(buf)) + return NULL; + + p += n; + p += strspn(p, "/"); + return p; + } + + return NULL; +} + +/** + * Skip user@*.service, but require it to be there. + */ +static const char *skip_user_manager(const char *p) { + size_t n; + + if (isempty(p)) + return NULL; + + p += strspn(p, "/"); + + n = strcspn(p, "/"); + if (n < STRLEN("user@x.service")) + return NULL; + + if (memcmp(p, "user@", 5) == 0 && memcmp(p + n - 8, ".service", 8) == 0) { + char buf[n - 5 - 8 + 1]; + + memcpy(buf, p + 5, n - 5 - 8); + buf[n - 5 - 8] = 0; + + /* Note that user manager services never need unescaping, + * since they cannot conflict with the kernel's own + * names, hence we don't need to call cg_unescape() + * here. */ + + if (parse_uid(buf, NULL) < 0) + return NULL; + + p += n; + p += strspn(p, "/"); + + return p; + } + + return NULL; +} + +static const char *skip_user_prefix(const char *path) { + const char *e, *t; + + assert(path); + + /* Skip slices, if there are any */ + e = skip_slices(path); + + /* Skip the user manager, if it's in the path now... */ + t = skip_user_manager(e); + if (t) + return t; + + /* Alternatively skip the user session if it is in the path... */ + return skip_session(e); +} + +int cg_path_get_user_unit(const char *path, char **ret) { + const char *t; + + assert(path); + assert(ret); + + t = skip_user_prefix(path); + if (!t) + return -ENXIO; + + /* And from here on it looks pretty much the same as for a system unit, hence let's use the same + * parser. */ + return cg_path_get_unit(t, ret); +} + +int cg_pid_get_user_unit(pid_t pid, char **unit) { + _cleanup_free_ char *cgroup = NULL; + int r; + + assert(unit); + + r = cg_pid_get_path_shifted(pid, NULL, &cgroup); + if (r < 0) + return r; + + return cg_path_get_user_unit(cgroup, unit); +} + +int cg_path_get_machine_name(const char *path, char **machine) { + _cleanup_free_ char *u = NULL; + const char *sl; + int r; + + r = cg_path_get_unit(path, &u); + if (r < 0) + return r; + + sl = strjoina("/run/systemd/machines/unit:", u); + return readlink_malloc(sl, machine); +} + +int cg_pid_get_machine_name(pid_t pid, char **machine) { + _cleanup_free_ char *cgroup = NULL; + int r; + + assert(machine); + + r = cg_pid_get_path_shifted(pid, NULL, &cgroup); + if (r < 0) + return r; + + return cg_path_get_machine_name(cgroup, machine); +} + +int cg_path_get_cgroupid(const char *path, uint64_t *ret) { + cg_file_handle fh = CG_FILE_HANDLE_INIT; + int mnt_id = -1; + + assert(path); + assert(ret); + + /* This is cgroupfs so we know the size of the handle, thus no need to loop around like + * name_to_handle_at_loop() does in mountpoint-util.c */ + if (name_to_handle_at(AT_FDCWD, path, &fh.file_handle, &mnt_id, 0) < 0) + return -errno; + + *ret = CG_FILE_HANDLE_CGROUPID(fh); + return 0; +} + +int cg_path_get_session(const char *path, char **session) { + _cleanup_free_ char *unit = NULL; + char *start, *end; + int r; + + assert(path); + + r = cg_path_get_unit(path, &unit); + if (r < 0) + return r; + + start = startswith(unit, "session-"); + if (!start) + return -ENXIO; + end = endswith(start, ".scope"); + if (!end) + return -ENXIO; + + *end = 0; + if (!session_id_valid(start)) + return -ENXIO; + + if (session) { + char *rr; + + rr = strdup(start); + if (!rr) + return -ENOMEM; + + *session = rr; + } + + return 0; +} + +int cg_pid_get_session(pid_t pid, char **session) { + _cleanup_free_ char *cgroup = NULL; + int r; + + r = cg_pid_get_path_shifted(pid, NULL, &cgroup); + if (r < 0) + return r; + + return cg_path_get_session(cgroup, session); +} + +int cg_path_get_owner_uid(const char *path, uid_t *uid) { + _cleanup_free_ char *slice = NULL; + char *start, *end; + int r; + + assert(path); + + r = cg_path_get_slice(path, &slice); + if (r < 0) + return r; + + start = startswith(slice, "user-"); + if (!start) + return -ENXIO; + end = endswith(start, ".slice"); + if (!end) + return -ENXIO; + + *end = 0; + if (parse_uid(start, uid) < 0) + return -ENXIO; + + return 0; +} + +int cg_pid_get_owner_uid(pid_t pid, uid_t *uid) { + _cleanup_free_ char *cgroup = NULL; + int r; + + r = cg_pid_get_path_shifted(pid, NULL, &cgroup); + if (r < 0) + return r; + + return cg_path_get_owner_uid(cgroup, uid); +} + +int cg_path_get_slice(const char *p, char **slice) { + const char *e = NULL; + + assert(p); + assert(slice); + + /* Finds the right-most slice unit from the beginning, but + * stops before we come to the first non-slice unit. */ + + for (;;) { + size_t n; + + p += strspn(p, "/"); + + n = strcspn(p, "/"); + if (!valid_slice_name(p, n)) { + + if (!e) { + char *s; + + s = strdup(SPECIAL_ROOT_SLICE); + if (!s) + return -ENOMEM; + + *slice = s; + return 0; + } + + return cg_path_decode_unit(e, slice); + } + + e = p; + p += n; + } +} + +int cg_pid_get_slice(pid_t pid, char **slice) { + _cleanup_free_ char *cgroup = NULL; + int r; + + assert(slice); + + r = cg_pid_get_path_shifted(pid, NULL, &cgroup); + if (r < 0) + return r; + + return cg_path_get_slice(cgroup, slice); +} + +int cg_path_get_user_slice(const char *p, char **slice) { + const char *t; + assert(p); + assert(slice); + + t = skip_user_prefix(p); + if (!t) + return -ENXIO; + + /* And now it looks pretty much the same as for a system + * slice, so let's just use the same parser from here on. */ + return cg_path_get_slice(t, slice); +} + +int cg_pid_get_user_slice(pid_t pid, char **slice) { + _cleanup_free_ char *cgroup = NULL; + int r; + + assert(slice); + + r = cg_pid_get_path_shifted(pid, NULL, &cgroup); + if (r < 0) + return r; + + return cg_path_get_user_slice(cgroup, slice); +} + +char *cg_escape(const char *p) { + bool need_prefix = false; + + /* This implements very minimal escaping for names to be used + * as file names in the cgroup tree: any name which might + * conflict with a kernel name or is prefixed with '_' is + * prefixed with a '_'. That way, when reading cgroup names it + * is sufficient to remove a single prefixing underscore if + * there is one. */ + + /* The return value of this function (unlike cg_unescape()) + * needs free()! */ + + if (IN_SET(p[0], 0, '_', '.') || + STR_IN_SET(p, "notify_on_release", "release_agent", "tasks") || + startswith(p, "cgroup.")) + need_prefix = true; + else { + const char *dot; + + dot = strrchr(p, '.'); + if (dot) { + CGroupController c; + size_t l = dot - p; + + for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) { + const char *n; + + n = cgroup_controller_to_string(c); + + if (l != strlen(n)) + continue; + + if (memcmp(p, n, l) != 0) + continue; + + need_prefix = true; + break; + } + } + } + + if (need_prefix) + return strjoin("_", p); + + return strdup(p); +} + +char *cg_unescape(const char *p) { + assert(p); + + /* The return value of this function (unlike cg_escape()) + * doesn't need free()! */ + + if (p[0] == '_') + return (char*) p+1; + + return (char*) p; +} + +#define CONTROLLER_VALID \ + DIGITS LETTERS \ + "_" + +bool cg_controller_is_valid(const char *p) { + const char *t, *s; + + if (!p) + return false; + + if (streq(p, SYSTEMD_CGROUP_CONTROLLER)) + return true; + + s = startswith(p, "name="); + if (s) + p = s; + + if (IN_SET(*p, 0, '_')) + return false; + + for (t = p; *t; t++) + if (!strchr(CONTROLLER_VALID, *t)) + return false; + + if (t - p > NAME_MAX) + return false; + + return true; +} + +int cg_slice_to_path(const char *unit, char **ret) { + _cleanup_free_ char *p = NULL, *s = NULL, *e = NULL; + const char *dash; + int r; + + assert(unit); + assert(ret); + + if (streq(unit, SPECIAL_ROOT_SLICE)) { + char *x; + + x = strdup(""); + if (!x) + return -ENOMEM; + *ret = x; + return 0; + } + + if (!unit_name_is_valid(unit, UNIT_NAME_PLAIN)) + return -EINVAL; + + if (!endswith(unit, ".slice")) + return -EINVAL; + + r = unit_name_to_prefix(unit, &p); + if (r < 0) + return r; + + dash = strchr(p, '-'); + + /* Don't allow initial dashes */ + if (dash == p) + return -EINVAL; + + while (dash) { + _cleanup_free_ char *escaped = NULL; + char n[dash - p + sizeof(".slice")]; + +#if HAS_FEATURE_MEMORY_SANITIZER + /* msan doesn't instrument stpncpy, so it thinks + * n is later used uninitialized: + * https://github.com/google/sanitizers/issues/926 + */ + zero(n); +#endif + + /* Don't allow trailing or double dashes */ + if (IN_SET(dash[1], 0, '-')) + return -EINVAL; + + strcpy(stpncpy(n, p, dash - p), ".slice"); + if (!unit_name_is_valid(n, UNIT_NAME_PLAIN)) + return -EINVAL; + + escaped = cg_escape(n); + if (!escaped) + return -ENOMEM; + + if (!strextend(&s, escaped, "/")) + return -ENOMEM; + + dash = strchr(dash+1, '-'); + } + + e = cg_escape(unit); + if (!e) + return -ENOMEM; + + if (!strextend(&s, e)) + return -ENOMEM; + + *ret = TAKE_PTR(s); + + return 0; +} + +int cg_is_threaded(const char *controller, const char *path) { + _cleanup_free_ char *fs = NULL, *contents = NULL; + _cleanup_strv_free_ char **v = NULL; + int r; + + r = cg_get_path(controller, path, "cgroup.type", &fs); + if (r < 0) + return r; + + r = read_full_virtual_file(fs, &contents, NULL); + if (r == -ENOENT) + return false; /* Assume no. */ + if (r < 0) + return r; + + v = strv_split(contents, NULL); + if (!v) + return -ENOMEM; + + /* If the cgroup is in the threaded mode, it contains "threaded". + * If one of the parents or siblings is in the threaded mode, it may contain "invalid". */ + return strv_contains(v, "threaded") || strv_contains(v, "invalid"); +} + +int cg_set_attribute(const char *controller, const char *path, const char *attribute, const char *value) { + _cleanup_free_ char *p = NULL; + int r; + + r = cg_get_path(controller, path, attribute, &p); + if (r < 0) + return r; + + return write_string_file(p, value, WRITE_STRING_FILE_DISABLE_BUFFER); +} + +int cg_get_attribute(const char *controller, const char *path, const char *attribute, char **ret) { + _cleanup_free_ char *p = NULL; + int r; + + r = cg_get_path(controller, path, attribute, &p); + if (r < 0) + return r; + + return read_one_line_file(p, ret); +} + +int cg_get_attribute_as_uint64(const char *controller, const char *path, const char *attribute, uint64_t *ret) { + _cleanup_free_ char *value = NULL; + uint64_t v; + int r; + + assert(ret); + + r = cg_get_attribute(controller, path, attribute, &value); + if (r == -ENOENT) + return -ENODATA; + if (r < 0) + return r; + + if (streq(value, "max")) { + *ret = CGROUP_LIMIT_MAX; + return 0; + } + + r = safe_atou64(value, &v); + if (r < 0) + return r; + + *ret = v; + return 0; +} + +int cg_get_attribute_as_bool(const char *controller, const char *path, const char *attribute, bool *ret) { + _cleanup_free_ char *value = NULL; + int r; + + assert(ret); + + r = cg_get_attribute(controller, path, attribute, &value); + if (r == -ENOENT) + return -ENODATA; + if (r < 0) + return r; + + r = parse_boolean(value); + if (r < 0) + return r; + + *ret = r; + return 0; +} + +int cg_get_owner(const char *controller, const char *path, uid_t *ret_uid) { + _cleanup_free_ char *f = NULL; + struct stat stats; + int r; + + assert(ret_uid); + + r = cg_get_path(controller, path, NULL, &f); + if (r < 0) + return r; + + r = stat(f, &stats); + if (r < 0) + return -errno; + + *ret_uid = stats.st_uid; + return 0; +} + +int cg_get_keyed_attribute_full( + const char *controller, + const char *path, + const char *attribute, + char **keys, + char **ret_values, + CGroupKeyMode mode) { + + _cleanup_free_ char *filename = NULL, *contents = NULL; + const char *p; + size_t n, i, n_done = 0; + char **v; + int r; + + /* Reads one or more fields of a cgroup v2 keyed attribute file. The 'keys' parameter should be an strv with + * all keys to retrieve. The 'ret_values' parameter should be passed as string size with the same number of + * entries as 'keys'. On success each entry will be set to the value of the matching key. + * + * If the attribute file doesn't exist at all returns ENOENT, if any key is not found returns ENXIO. If mode + * is set to GG_KEY_MODE_GRACEFUL we ignore missing keys and return those that were parsed successfully. */ + + r = cg_get_path(controller, path, attribute, &filename); + if (r < 0) + return r; + + r = read_full_file(filename, &contents, NULL); + if (r < 0) + return r; + + n = strv_length(keys); + if (n == 0) /* No keys to retrieve? That's easy, we are done then */ + return 0; + + /* Let's build this up in a temporary array for now in order not to clobber the return parameter on failure */ + v = newa0(char*, n); + + for (p = contents; *p;) { + const char *w = NULL; + + for (i = 0; i < n; i++) + if (!v[i]) { + w = first_word(p, keys[i]); + if (w) + break; + } + + if (w) { + size_t l; + + l = strcspn(w, NEWLINE); + v[i] = strndup(w, l); + if (!v[i]) { + r = -ENOMEM; + goto fail; + } + + n_done++; + if (n_done >= n) + goto done; + + p = w + l; + } else + p += strcspn(p, NEWLINE); + + p += strspn(p, NEWLINE); + } + + if (mode & CG_KEY_MODE_GRACEFUL) + goto done; + + r = -ENXIO; + +fail: + for (i = 0; i < n; i++) + free(v[i]); + + return r; + +done: + memcpy(ret_values, v, sizeof(char*) * n); + if (mode & CG_KEY_MODE_GRACEFUL) + return n_done; + + return 0; +} + +int cg_mask_to_string(CGroupMask mask, char **ret) { + _cleanup_free_ char *s = NULL; + bool space = false; + CGroupController c; + size_t n = 0; + + assert(ret); + + if (mask == 0) { + *ret = NULL; + return 0; + } + + for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) { + const char *k; + size_t l; + + if (!FLAGS_SET(mask, CGROUP_CONTROLLER_TO_MASK(c))) + continue; + + k = cgroup_controller_to_string(c); + l = strlen(k); + + if (!GREEDY_REALLOC(s, n + space + l + 1)) + return -ENOMEM; + + if (space) + s[n] = ' '; + memcpy(s + n + space, k, l); + n += space + l; + + space = true; + } + + assert(s); + + s[n] = 0; + *ret = TAKE_PTR(s); + + return 0; +} + +int cg_mask_from_string(const char *value, CGroupMask *ret) { + CGroupMask m = 0; + + assert(ret); + assert(value); + + for (;;) { + _cleanup_free_ char *n = NULL; + CGroupController v; + int r; + + r = extract_first_word(&value, &n, NULL, 0); + if (r < 0) + return r; + if (r == 0) + break; + + v = cgroup_controller_from_string(n); + if (v < 0) + continue; + + m |= CGROUP_CONTROLLER_TO_MASK(v); + } + + *ret = m; + return 0; +} + +int cg_mask_supported_subtree(const char *root, CGroupMask *ret) { + CGroupMask mask; + int r; + + /* Determines the mask of supported cgroup controllers. Only includes controllers we can make sense of and that + * are actually accessible. Only covers real controllers, i.e. not the CGROUP_CONTROLLER_BPF_xyz + * pseudo-controllers. */ + + r = cg_all_unified(); + if (r < 0) + return r; + if (r > 0) { + _cleanup_free_ char *controllers = NULL, *path = NULL; + + /* In the unified hierarchy we can read the supported and accessible controllers from + * the top-level cgroup attribute */ + + r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, root, "cgroup.controllers", &path); + if (r < 0) + return r; + + r = read_one_line_file(path, &controllers); + if (r < 0) + return r; + + r = cg_mask_from_string(controllers, &mask); + if (r < 0) + return r; + + /* Mask controllers that are not supported in unified hierarchy. */ + mask &= CGROUP_MASK_V2; + + } else { + CGroupController c; + + /* In the legacy hierarchy, we check which hierarchies are accessible. */ + + mask = 0; + for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) { + CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c); + const char *n; + + if (!FLAGS_SET(CGROUP_MASK_V1, bit)) + continue; + + n = cgroup_controller_to_string(c); + if (controller_is_v1_accessible(root, n) >= 0) + mask |= bit; + } + } + + *ret = mask; + return 0; +} + +int cg_mask_supported(CGroupMask *ret) { + _cleanup_free_ char *root = NULL; + int r; + + r = cg_get_root_path(&root); + if (r < 0) + return r; + + return cg_mask_supported_subtree(root, ret); +} + +int cg_kernel_controllers(Set **ret) { + _cleanup_set_free_ Set *controllers = NULL; + _cleanup_fclose_ FILE *f = NULL; + int r; + + assert(ret); + + /* Determines the full list of kernel-known controllers. Might include controllers we don't actually support + * and controllers that aren't currently accessible (because not mounted). This does not include "name=" + * pseudo-controllers. */ + + r = fopen_unlocked("/proc/cgroups", "re", &f); + if (r == -ENOENT) { + *ret = NULL; + return 0; + } + if (r < 0) + return r; + + /* Ignore the header line */ + (void) read_line(f, SIZE_MAX, NULL); + + for (;;) { + _cleanup_free_ char *controller = NULL; + int enabled = 0; + + errno = 0; + if (fscanf(f, "%ms %*i %*i %i", &controller, &enabled) != 2) { + + if (feof(f)) + break; + + if (ferror(f)) + return errno_or_else(EIO); + + return -EBADMSG; + } + + if (!enabled) + continue; + + if (!cg_controller_is_valid(controller)) + return -EBADMSG; + + r = set_ensure_consume(&controllers, &string_hash_ops_free, TAKE_PTR(controller)); + if (r < 0) + return r; + } + + *ret = TAKE_PTR(controllers); + + return 0; +} + +/* The hybrid mode was initially implemented in v232 and simply mounted cgroup2 on + * /sys/fs/cgroup/systemd. This unfortunately broke other tools (such as docker) which expected the v1 + * "name=systemd" hierarchy on /sys/fs/cgroup/systemd. From v233 and on, the hybrid mode mounts v2 on + * /sys/fs/cgroup/unified and maintains "name=systemd" hierarchy on /sys/fs/cgroup/systemd for compatibility + * with other tools. + * + * To keep live upgrade working, we detect and support v232 layout. When v232 layout is detected, to keep + * cgroup v2 process management but disable the compat dual layout, we return true on + * cg_unified_controller(SYSTEMD_CGROUP_CONTROLLER) and false on cg_hybrid_unified(). + */ +static thread_local bool unified_systemd_v232; + +int cg_unified_cached(bool flush) { + static thread_local CGroupUnified unified_cache = CGROUP_UNIFIED_UNKNOWN; + + struct statfs fs; + + /* Checks if we support the unified hierarchy. Returns an + * error when the cgroup hierarchies aren't mounted yet or we + * have any other trouble determining if the unified hierarchy + * is supported. */ + + if (flush) + unified_cache = CGROUP_UNIFIED_UNKNOWN; + else if (unified_cache >= CGROUP_UNIFIED_NONE) + return unified_cache; + + if (statfs("/sys/fs/cgroup/", &fs) < 0) + return log_debug_errno(errno, "statfs(\"/sys/fs/cgroup/\") failed: %m"); + + if (F_TYPE_EQUAL(fs.f_type, CGROUP2_SUPER_MAGIC)) { + log_debug("Found cgroup2 on /sys/fs/cgroup/, full unified hierarchy"); + unified_cache = CGROUP_UNIFIED_ALL; + } else if (F_TYPE_EQUAL(fs.f_type, TMPFS_MAGIC)) { + if (statfs("/sys/fs/cgroup/unified/", &fs) == 0 && + F_TYPE_EQUAL(fs.f_type, CGROUP2_SUPER_MAGIC)) { + log_debug("Found cgroup2 on /sys/fs/cgroup/unified, unified hierarchy for systemd controller"); + unified_cache = CGROUP_UNIFIED_SYSTEMD; + unified_systemd_v232 = false; + } else { + if (statfs("/sys/fs/cgroup/systemd/", &fs) < 0) { + if (errno == ENOENT) { + /* Some other software may have set up /sys/fs/cgroup in a configuration we do not recognize. */ + log_debug_errno(errno, "Unsupported cgroupsv1 setup detected: name=systemd hierarchy not found."); + return -ENOMEDIUM; + } + return log_debug_errno(errno, "statfs(\"/sys/fs/cgroup/systemd\" failed: %m"); + } + + if (F_TYPE_EQUAL(fs.f_type, CGROUP2_SUPER_MAGIC)) { + log_debug("Found cgroup2 on /sys/fs/cgroup/systemd, unified hierarchy for systemd controller (v232 variant)"); + unified_cache = CGROUP_UNIFIED_SYSTEMD; + unified_systemd_v232 = true; + } else if (F_TYPE_EQUAL(fs.f_type, CGROUP_SUPER_MAGIC)) { + log_debug("Found cgroup on /sys/fs/cgroup/systemd, legacy hierarchy"); + unified_cache = CGROUP_UNIFIED_NONE; + } else { + log_debug("Unexpected filesystem type %llx mounted on /sys/fs/cgroup/systemd, assuming legacy hierarchy", + (unsigned long long) fs.f_type); + unified_cache = CGROUP_UNIFIED_NONE; + } + } + } else if (F_TYPE_EQUAL(fs.f_type, SYSFS_MAGIC)) { + return log_debug_errno(SYNTHETIC_ERRNO(ENOMEDIUM), + "No filesystem is currently mounted on /sys/fs/cgroup."); + } else + return log_debug_errno(SYNTHETIC_ERRNO(ENOMEDIUM), + "Unknown filesystem type %llx mounted on /sys/fs/cgroup.", + (unsigned long long)fs.f_type); + + return unified_cache; +} + +int cg_unified_controller(const char *controller) { + int r; + + r = cg_unified_cached(false); + if (r < 0) + return r; + + if (r == CGROUP_UNIFIED_NONE) + return false; + + if (r >= CGROUP_UNIFIED_ALL) + return true; + + return streq_ptr(controller, SYSTEMD_CGROUP_CONTROLLER); +} + +int cg_all_unified(void) { + int r; + + r = cg_unified_cached(false); + if (r < 0) + return r; + + return r >= CGROUP_UNIFIED_ALL; +} + +int cg_hybrid_unified(void) { + int r; + + r = cg_unified_cached(false); + if (r < 0) + return r; + + return r == CGROUP_UNIFIED_SYSTEMD && !unified_systemd_v232; +} + +const uint64_t cgroup_io_limit_defaults[_CGROUP_IO_LIMIT_TYPE_MAX] = { + [CGROUP_IO_RBPS_MAX] = CGROUP_LIMIT_MAX, + [CGROUP_IO_WBPS_MAX] = CGROUP_LIMIT_MAX, + [CGROUP_IO_RIOPS_MAX] = CGROUP_LIMIT_MAX, + [CGROUP_IO_WIOPS_MAX] = CGROUP_LIMIT_MAX, +}; + +static const char* const cgroup_io_limit_type_table[_CGROUP_IO_LIMIT_TYPE_MAX] = { + [CGROUP_IO_RBPS_MAX] = "IOReadBandwidthMax", + [CGROUP_IO_WBPS_MAX] = "IOWriteBandwidthMax", + [CGROUP_IO_RIOPS_MAX] = "IOReadIOPSMax", + [CGROUP_IO_WIOPS_MAX] = "IOWriteIOPSMax", +}; + +DEFINE_STRING_TABLE_LOOKUP(cgroup_io_limit_type, CGroupIOLimitType); + +bool is_cgroup_fs(const struct statfs *s) { + return is_fs_type(s, CGROUP_SUPER_MAGIC) || + is_fs_type(s, CGROUP2_SUPER_MAGIC); +} + +bool fd_is_cgroup_fs(int fd) { + struct statfs s; + + if (fstatfs(fd, &s) < 0) + return -errno; + + return is_cgroup_fs(&s); +} + +static const char *const cgroup_controller_table[_CGROUP_CONTROLLER_MAX] = { + [CGROUP_CONTROLLER_CPU] = "cpu", + [CGROUP_CONTROLLER_CPUACCT] = "cpuacct", + [CGROUP_CONTROLLER_CPUSET] = "cpuset", + [CGROUP_CONTROLLER_IO] = "io", + [CGROUP_CONTROLLER_BLKIO] = "blkio", + [CGROUP_CONTROLLER_MEMORY] = "memory", + [CGROUP_CONTROLLER_DEVICES] = "devices", + [CGROUP_CONTROLLER_PIDS] = "pids", + [CGROUP_CONTROLLER_BPF_FIREWALL] = "bpf-firewall", + [CGROUP_CONTROLLER_BPF_DEVICES] = "bpf-devices", + [CGROUP_CONTROLLER_BPF_FOREIGN] = "bpf-foreign", + [CGROUP_CONTROLLER_BPF_SOCKET_BIND] = "bpf-socket-bind", + [CGROUP_CONTROLLER_BPF_RESTRICT_NETWORK_INTERFACES] = "bpf-restrict-network-interfaces", +}; + +DEFINE_STRING_TABLE_LOOKUP(cgroup_controller, CGroupController); + +CGroupMask get_cpu_accounting_mask(void) { + static CGroupMask needed_mask = (CGroupMask) -1; + + /* On kernel ≥4.15 with unified hierarchy, cpu.stat's usage_usec is + * provided externally from the CPU controller, which means we don't + * need to enable the CPU controller just to get metrics. This is good, + * because enabling the CPU controller comes at a minor performance + * hit, especially when it's propagated deep into large hierarchies. + * There's also no separate CPU accounting controller available within + * a unified hierarchy. + * + * This combination of factors results in the desired cgroup mask to + * enable for CPU accounting varying as follows: + * + * ╔═════════════════════╤═════════════════════╗ + * ║ Linux ≥4.15 │ Linux <4.15 ║ + * ╔═══════════════╬═════════════════════╪═════════════════════╣ + * ║ Unified ║ nothing │ CGROUP_MASK_CPU ║ + * ╟───────────────╫─────────────────────┼─────────────────────╢ + * ║ Hybrid/Legacy ║ CGROUP_MASK_CPUACCT │ CGROUP_MASK_CPUACCT ║ + * ╚═══════════════╩═════════════════════╧═════════════════════╝ + * + * We check kernel version here instead of manually checking whether + * cpu.stat is present for every cgroup, as that check in itself would + * already be fairly expensive. + * + * Kernels where this patch has been backported will therefore have the + * CPU controller enabled unnecessarily. This is more expensive than + * necessary, but harmless. ☺️ + */ + + if (needed_mask == (CGroupMask) -1) { + if (cg_all_unified()) { + struct utsname u; + assert_se(uname(&u) >= 0); + + if (strverscmp_improved(u.release, "4.15") < 0) + needed_mask = CGROUP_MASK_CPU; + else + needed_mask = 0; + } else + needed_mask = CGROUP_MASK_CPUACCT; + } + + return needed_mask; +} + +bool cpu_accounting_is_cheap(void) { + return get_cpu_accounting_mask() == 0; +} + +static const char* const managed_oom_mode_table[_MANAGED_OOM_MODE_MAX] = { + [MANAGED_OOM_AUTO] = "auto", + [MANAGED_OOM_KILL] = "kill", +}; + +DEFINE_STRING_TABLE_LOOKUP(managed_oom_mode, ManagedOOMMode); + +static const char* const managed_oom_preference_table[_MANAGED_OOM_PREFERENCE_MAX] = { + [MANAGED_OOM_PREFERENCE_NONE] = "none", + [MANAGED_OOM_PREFERENCE_AVOID] = "avoid", + [MANAGED_OOM_PREFERENCE_OMIT] = "omit", +}; + +DEFINE_STRING_TABLE_LOOKUP(managed_oom_preference, ManagedOOMPreference); diff --git a/src/basic/cgroup-util.h b/src/basic/cgroup-util.h new file mode 100644 index 0000000..df6d5b7 --- /dev/null +++ b/src/basic/cgroup-util.h @@ -0,0 +1,344 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +#include +#include +#include +#include +#include +#include +#include + +#include "def.h" +#include "set.h" + +#define SYSTEMD_CGROUP_CONTROLLER_LEGACY "name=systemd" +#define SYSTEMD_CGROUP_CONTROLLER_HYBRID "name=unified" +#define SYSTEMD_CGROUP_CONTROLLER "_systemd" + +/* An enum of well known cgroup controllers */ +typedef enum CGroupController { + /* Original cgroup controllers */ + CGROUP_CONTROLLER_CPU, + CGROUP_CONTROLLER_CPUACCT, /* v1 only */ + CGROUP_CONTROLLER_CPUSET, /* v2 only */ + CGROUP_CONTROLLER_IO, /* v2 only */ + CGROUP_CONTROLLER_BLKIO, /* v1 only */ + CGROUP_CONTROLLER_MEMORY, + CGROUP_CONTROLLER_DEVICES, /* v1 only */ + CGROUP_CONTROLLER_PIDS, + + /* BPF-based pseudo-controllers, v2 only */ + CGROUP_CONTROLLER_BPF_FIREWALL, + CGROUP_CONTROLLER_BPF_DEVICES, + CGROUP_CONTROLLER_BPF_FOREIGN, + CGROUP_CONTROLLER_BPF_SOCKET_BIND, + CGROUP_CONTROLLER_BPF_RESTRICT_NETWORK_INTERFACES, + /* The BPF hook implementing RestrictFileSystems= is not defined here. + * It's applied as late as possible in exec_child() so we don't block + * our own unit setup code. */ + + _CGROUP_CONTROLLER_MAX, + _CGROUP_CONTROLLER_INVALID = -EINVAL, +} CGroupController; + +#define CGROUP_CONTROLLER_TO_MASK(c) (1U << (c)) + +/* A bit mask of well known cgroup controllers */ +typedef enum CGroupMask { + CGROUP_MASK_CPU = CGROUP_CONTROLLER_TO_MASK(CGROUP_CONTROLLER_CPU), + CGROUP_MASK_CPUACCT = CGROUP_CONTROLLER_TO_MASK(CGROUP_CONTROLLER_CPUACCT), + CGROUP_MASK_CPUSET = CGROUP_CONTROLLER_TO_MASK(CGROUP_CONTROLLER_CPUSET), + CGROUP_MASK_IO = CGROUP_CONTROLLER_TO_MASK(CGROUP_CONTROLLER_IO), + CGROUP_MASK_BLKIO = CGROUP_CONTROLLER_TO_MASK(CGROUP_CONTROLLER_BLKIO), + CGROUP_MASK_MEMORY = CGROUP_CONTROLLER_TO_MASK(CGROUP_CONTROLLER_MEMORY), + CGROUP_MASK_DEVICES = CGROUP_CONTROLLER_TO_MASK(CGROUP_CONTROLLER_DEVICES), + CGROUP_MASK_PIDS = CGROUP_CONTROLLER_TO_MASK(CGROUP_CONTROLLER_PIDS), + CGROUP_MASK_BPF_FIREWALL = CGROUP_CONTROLLER_TO_MASK(CGROUP_CONTROLLER_BPF_FIREWALL), + CGROUP_MASK_BPF_DEVICES = CGROUP_CONTROLLER_TO_MASK(CGROUP_CONTROLLER_BPF_DEVICES), + CGROUP_MASK_BPF_FOREIGN = CGROUP_CONTROLLER_TO_MASK(CGROUP_CONTROLLER_BPF_FOREIGN), + CGROUP_MASK_BPF_SOCKET_BIND = CGROUP_CONTROLLER_TO_MASK(CGROUP_CONTROLLER_BPF_SOCKET_BIND), + CGROUP_MASK_BPF_RESTRICT_NETWORK_INTERFACES = CGROUP_CONTROLLER_TO_MASK(CGROUP_CONTROLLER_BPF_RESTRICT_NETWORK_INTERFACES), + + /* All real cgroup v1 controllers */ + CGROUP_MASK_V1 = CGROUP_MASK_CPU|CGROUP_MASK_CPUACCT|CGROUP_MASK_BLKIO|CGROUP_MASK_MEMORY|CGROUP_MASK_DEVICES|CGROUP_MASK_PIDS, + + /* All real cgroup v2 controllers */ + CGROUP_MASK_V2 = CGROUP_MASK_CPU|CGROUP_MASK_CPUSET|CGROUP_MASK_IO|CGROUP_MASK_MEMORY|CGROUP_MASK_PIDS, + + /* All cgroup v2 BPF pseudo-controllers */ + CGROUP_MASK_BPF = CGROUP_MASK_BPF_FIREWALL|CGROUP_MASK_BPF_DEVICES|CGROUP_MASK_BPF_FOREIGN|CGROUP_MASK_BPF_SOCKET_BIND|CGROUP_MASK_BPF_RESTRICT_NETWORK_INTERFACES, + + _CGROUP_MASK_ALL = CGROUP_CONTROLLER_TO_MASK(_CGROUP_CONTROLLER_MAX) - 1 +} CGroupMask; + +static inline CGroupMask CGROUP_MASK_EXTEND_JOINED(CGroupMask mask) { + /* We always mount "cpu" and "cpuacct" in the same hierarchy. Hence, when one bit is set also set the other */ + + if (mask & (CGROUP_MASK_CPU|CGROUP_MASK_CPUACCT)) + mask |= (CGROUP_MASK_CPU|CGROUP_MASK_CPUACCT); + + return mask; +} + +CGroupMask get_cpu_accounting_mask(void); +bool cpu_accounting_is_cheap(void); + +/* Special values for all weight knobs on unified hierarchy */ +#define CGROUP_WEIGHT_INVALID UINT64_MAX +#define CGROUP_WEIGHT_IDLE UINT64_C(0) +#define CGROUP_WEIGHT_MIN UINT64_C(1) +#define CGROUP_WEIGHT_MAX UINT64_C(10000) +#define CGROUP_WEIGHT_DEFAULT UINT64_C(100) + +#define CGROUP_LIMIT_MIN UINT64_C(0) +#define CGROUP_LIMIT_MAX UINT64_MAX + +static inline bool CGROUP_WEIGHT_IS_OK(uint64_t x) { + return + x == CGROUP_WEIGHT_INVALID || + (x >= CGROUP_WEIGHT_MIN && x <= CGROUP_WEIGHT_MAX); +} + +/* IO limits on unified hierarchy */ +typedef enum CGroupIOLimitType { + CGROUP_IO_RBPS_MAX, + CGROUP_IO_WBPS_MAX, + CGROUP_IO_RIOPS_MAX, + CGROUP_IO_WIOPS_MAX, + + _CGROUP_IO_LIMIT_TYPE_MAX, + _CGROUP_IO_LIMIT_TYPE_INVALID = -EINVAL, +} CGroupIOLimitType; + +extern const uint64_t cgroup_io_limit_defaults[_CGROUP_IO_LIMIT_TYPE_MAX]; + +const char* cgroup_io_limit_type_to_string(CGroupIOLimitType t) _const_; +CGroupIOLimitType cgroup_io_limit_type_from_string(const char *s) _pure_; + +/* Special values for the cpu.shares attribute */ +#define CGROUP_CPU_SHARES_INVALID UINT64_MAX +#define CGROUP_CPU_SHARES_MIN UINT64_C(2) +#define CGROUP_CPU_SHARES_MAX UINT64_C(262144) +#define CGROUP_CPU_SHARES_DEFAULT UINT64_C(1024) + +static inline bool CGROUP_CPU_SHARES_IS_OK(uint64_t x) { + return + x == CGROUP_CPU_SHARES_INVALID || + (x >= CGROUP_CPU_SHARES_MIN && x <= CGROUP_CPU_SHARES_MAX); +} + +/* Special values for the special {blkio,io}.bfq.weight attribute */ +#define CGROUP_BFQ_WEIGHT_INVALID UINT64_MAX +#define CGROUP_BFQ_WEIGHT_MIN UINT64_C(1) +#define CGROUP_BFQ_WEIGHT_MAX UINT64_C(1000) +#define CGROUP_BFQ_WEIGHT_DEFAULT UINT64_C(100) + +/* Convert the normal io.weight value to io.bfq.weight */ +static inline uint64_t BFQ_WEIGHT(uint64_t io_weight) { + return + io_weight <= CGROUP_WEIGHT_DEFAULT ? + CGROUP_BFQ_WEIGHT_DEFAULT - (CGROUP_WEIGHT_DEFAULT - io_weight) * (CGROUP_BFQ_WEIGHT_DEFAULT - CGROUP_BFQ_WEIGHT_MIN) / (CGROUP_WEIGHT_DEFAULT - CGROUP_WEIGHT_MIN) : + CGROUP_BFQ_WEIGHT_DEFAULT + (io_weight - CGROUP_WEIGHT_DEFAULT) * (CGROUP_BFQ_WEIGHT_MAX - CGROUP_BFQ_WEIGHT_DEFAULT) / (CGROUP_WEIGHT_MAX - CGROUP_WEIGHT_DEFAULT); +} + +/* Special values for the blkio.weight attribute */ +#define CGROUP_BLKIO_WEIGHT_INVALID UINT64_MAX +#define CGROUP_BLKIO_WEIGHT_MIN UINT64_C(10) +#define CGROUP_BLKIO_WEIGHT_MAX UINT64_C(1000) +#define CGROUP_BLKIO_WEIGHT_DEFAULT UINT64_C(500) + +static inline bool CGROUP_BLKIO_WEIGHT_IS_OK(uint64_t x) { + return + x == CGROUP_BLKIO_WEIGHT_INVALID || + (x >= CGROUP_BLKIO_WEIGHT_MIN && x <= CGROUP_BLKIO_WEIGHT_MAX); +} + +typedef enum CGroupUnified { + CGROUP_UNIFIED_UNKNOWN = -1, + CGROUP_UNIFIED_NONE = 0, /* Both systemd and controllers on legacy */ + CGROUP_UNIFIED_SYSTEMD = 1, /* Only systemd on unified */ + CGROUP_UNIFIED_ALL = 2, /* Both systemd and controllers on unified */ +} CGroupUnified; + +/* + * General rules: + * + * We accept named hierarchies in the syntax "foo" and "name=foo". + * + * We expect that named hierarchies do not conflict in name with a + * kernel hierarchy, modulo the "name=" prefix. + * + * We always generate "normalized" controller names, i.e. without the + * "name=" prefix. + * + * We require absolute cgroup paths. When returning, we will always + * generate paths with multiple adjacent / removed. + */ + +int cg_enumerate_processes(const char *controller, const char *path, FILE **_f); +int cg_read_pid(FILE *f, pid_t *_pid); +int cg_read_event(const char *controller, const char *path, const char *event, + char **val); + +int cg_enumerate_subgroups(const char *controller, const char *path, DIR **_d); +int cg_read_subgroup(DIR *d, char **fn); + +typedef enum CGroupFlags { + CGROUP_SIGCONT = 1 << 0, + CGROUP_IGNORE_SELF = 1 << 1, + CGROUP_REMOVE = 1 << 2, +} CGroupFlags; + +typedef int (*cg_kill_log_func_t)(pid_t pid, int sig, void *userdata); + +int cg_kill(const char *controller, const char *path, int sig, CGroupFlags flags, Set *s, cg_kill_log_func_t kill_log, void *userdata); +int cg_kill_kernel_sigkill(const char *controller, const char *path); +int cg_kill_recursive(const char *controller, const char *path, int sig, CGroupFlags flags, Set *s, cg_kill_log_func_t kill_log, void *userdata); + +int cg_split_spec(const char *spec, char **ret_controller, char **ret_path); +int cg_mangle_path(const char *path, char **result); + +int cg_get_path(const char *controller, const char *path, const char *suffix, char **fs); +int cg_get_path_and_check(const char *controller, const char *path, const char *suffix, char **fs); + +int cg_pid_get_path(const char *controller, pid_t pid, char **path); + +int cg_rmdir(const char *controller, const char *path); + +int cg_is_threaded(const char *controller, const char *path); + +typedef enum { + CG_KEY_MODE_GRACEFUL = 1 << 0, +} CGroupKeyMode; + +int cg_set_attribute(const char *controller, const char *path, const char *attribute, const char *value); +int cg_get_attribute(const char *controller, const char *path, const char *attribute, char **ret); +int cg_get_keyed_attribute_full(const char *controller, const char *path, const char *attribute, char **keys, char **values, CGroupKeyMode mode); + +static inline int cg_get_keyed_attribute( + const char *controller, + const char *path, + const char *attribute, + char **keys, + char **ret_values) { + return cg_get_keyed_attribute_full(controller, path, attribute, keys, ret_values, 0); +} + +static inline int cg_get_keyed_attribute_graceful( + const char *controller, + const char *path, + const char *attribute, + char **keys, + char **ret_values) { + return cg_get_keyed_attribute_full(controller, path, attribute, keys, ret_values, CG_KEY_MODE_GRACEFUL); +} + +int cg_get_attribute_as_uint64(const char *controller, const char *path, const char *attribute, uint64_t *ret); + +/* Does a parse_boolean() on the attribute contents and sets ret accordingly */ +int cg_get_attribute_as_bool(const char *controller, const char *path, const char *attribute, bool *ret); + +int cg_set_access(const char *controller, const char *path, uid_t uid, gid_t gid); +int cg_get_owner(const char *controller, const char *path, uid_t *ret_uid); + +int cg_set_xattr(const char *controller, const char *path, const char *name, const void *value, size_t size, int flags); +int cg_get_xattr(const char *controller, const char *path, const char *name, void *value, size_t size); +int cg_get_xattr_malloc(const char *controller, const char *path, const char *name, char **ret); +/* Returns negative on error, and 0 or 1 on success for the bool value */ +int cg_get_xattr_bool(const char *controller, const char *path, const char *name); +int cg_remove_xattr(const char *controller, const char *path, const char *name); + +int cg_install_release_agent(const char *controller, const char *agent); +int cg_uninstall_release_agent(const char *controller); + +int cg_is_empty(const char *controller, const char *path); +int cg_is_empty_recursive(const char *controller, const char *path); + +int cg_get_root_path(char **path); + +int cg_path_get_cgroupid(const char *path, uint64_t *ret); +int cg_path_get_session(const char *path, char **session); +int cg_path_get_owner_uid(const char *path, uid_t *uid); +int cg_path_get_unit(const char *path, char **unit); +int cg_path_get_user_unit(const char *path, char **unit); +int cg_path_get_machine_name(const char *path, char **machine); +int cg_path_get_slice(const char *path, char **slice); +int cg_path_get_user_slice(const char *path, char **slice); + +int cg_shift_path(const char *cgroup, const char *cached_root, const char **shifted); +int cg_pid_get_path_shifted(pid_t pid, const char *cached_root, char **cgroup); + +int cg_pid_get_session(pid_t pid, char **session); +int cg_pid_get_owner_uid(pid_t pid, uid_t *uid); +int cg_pid_get_unit(pid_t pid, char **unit); +int cg_pid_get_user_unit(pid_t pid, char **unit); +int cg_pid_get_machine_name(pid_t pid, char **machine); +int cg_pid_get_slice(pid_t pid, char **slice); +int cg_pid_get_user_slice(pid_t pid, char **slice); + +int cg_path_decode_unit(const char *cgroup, char **unit); + +char *cg_escape(const char *p); +char *cg_unescape(const char *p) _pure_; + +bool cg_controller_is_valid(const char *p); + +int cg_slice_to_path(const char *unit, char **ret); + +typedef const char* (*cg_migrate_callback_t)(CGroupMask mask, void *userdata); + +int cg_mask_supported(CGroupMask *ret); +int cg_mask_supported_subtree(const char *root, CGroupMask *ret); +int cg_mask_from_string(const char *s, CGroupMask *ret); +int cg_mask_to_string(CGroupMask mask, char **ret); + +int cg_kernel_controllers(Set **controllers); + +bool cg_ns_supported(void); +bool cg_freezer_supported(void); +bool cg_kill_supported(void); + +int cg_all_unified(void); +int cg_hybrid_unified(void); +int cg_unified_controller(const char *controller); +int cg_unified_cached(bool flush); +static inline int cg_unified(void) { + return cg_unified_cached(true); +} + +const char* cgroup_controller_to_string(CGroupController c) _const_; +CGroupController cgroup_controller_from_string(const char *s) _pure_; + +bool is_cgroup_fs(const struct statfs *s); +bool fd_is_cgroup_fs(int fd); + +typedef enum ManagedOOMMode { + MANAGED_OOM_AUTO, + MANAGED_OOM_KILL, + _MANAGED_OOM_MODE_MAX, + _MANAGED_OOM_MODE_INVALID = -EINVAL, +} ManagedOOMMode; + +const char* managed_oom_mode_to_string(ManagedOOMMode m) _const_; +ManagedOOMMode managed_oom_mode_from_string(const char *s) _pure_; + +typedef enum ManagedOOMPreference { + MANAGED_OOM_PREFERENCE_NONE = 0, + MANAGED_OOM_PREFERENCE_AVOID = 1, + MANAGED_OOM_PREFERENCE_OMIT = 2, + _MANAGED_OOM_PREFERENCE_MAX, + _MANAGED_OOM_PREFERENCE_INVALID = -EINVAL, +} ManagedOOMPreference; + +const char* managed_oom_preference_to_string(ManagedOOMPreference a) _const_; +ManagedOOMPreference managed_oom_preference_from_string(const char *s) _pure_; + +/* The structure to pass to name_to_handle_at() on cgroupfs2 */ +typedef union { + struct file_handle file_handle; + uint8_t space[offsetof(struct file_handle, f_handle) + sizeof(uint64_t)]; +} cg_file_handle; + +#define CG_FILE_HANDLE_INIT { .file_handle.handle_bytes = sizeof(uint64_t) } +#define CG_FILE_HANDLE_CGROUPID(fh) (*(uint64_t*) (fh).file_handle.f_handle) diff --git a/src/basic/chase-symlinks.c b/src/basic/chase-symlinks.c new file mode 100644 index 0000000..ac55311 --- /dev/null +++ b/src/basic/chase-symlinks.c @@ -0,0 +1,603 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include + +#include "alloc-util.h" +#include "chase-symlinks.h" +#include "fd-util.h" +#include "fileio.h" +#include "fs-util.h" +#include "glyph-util.h" +#include "log.h" +#include "path-util.h" +#include "string-util.h" +#include "user-util.h" + +bool unsafe_transition(const struct stat *a, const struct stat *b) { + /* Returns true if the transition from a to b is safe, i.e. that we never transition from unprivileged to + * privileged files or directories. Why bother? So that unprivileged code can't symlink to privileged files + * making us believe we read something safe even though it isn't safe in the specific context we open it in. */ + + if (a->st_uid == 0) /* Transitioning from privileged to unprivileged is always fine */ + return false; + + return a->st_uid != b->st_uid; /* Otherwise we need to stay within the same UID */ +} + +static int log_unsafe_transition(int a, int b, const char *path, ChaseSymlinksFlags flags) { + _cleanup_free_ char *n1 = NULL, *n2 = NULL, *user_a = NULL, *user_b = NULL; + struct stat st; + + if (!FLAGS_SET(flags, CHASE_WARN)) + return -ENOLINK; + + (void) fd_get_path(a, &n1); + (void) fd_get_path(b, &n2); + + if (fstat(a, &st) == 0) + user_a = uid_to_name(st.st_uid); + if (fstat(b, &st) == 0) + user_b = uid_to_name(st.st_uid); + + return log_warning_errno(SYNTHETIC_ERRNO(ENOLINK), + "Detected unsafe path transition %s (owned by %s) %s %s (owned by %s) during canonicalization of %s.", + strna(n1), strna(user_a), special_glyph(SPECIAL_GLYPH_ARROW_RIGHT), strna(n2), strna(user_b), path); +} + +static int log_autofs_mount_point(int fd, const char *path, ChaseSymlinksFlags flags) { + _cleanup_free_ char *n1 = NULL; + + if (!FLAGS_SET(flags, CHASE_WARN)) + return -EREMOTE; + + (void) fd_get_path(fd, &n1); + + return log_warning_errno(SYNTHETIC_ERRNO(EREMOTE), + "Detected autofs mount point %s during canonicalization of %s.", + strna(n1), path); +} + +int chase_symlinks( + const char *path, + const char *original_root, + ChaseSymlinksFlags flags, + char **ret_path, + int *ret_fd) { + + _cleanup_free_ char *buffer = NULL, *done = NULL, *root = NULL; + _cleanup_close_ int fd = -1; + unsigned max_follow = CHASE_SYMLINKS_MAX; /* how many symlinks to follow before giving up and returning ELOOP */ + bool exists = true, append_trail_slash = false; + struct stat previous_stat; + const char *todo; + int r; + + assert(path); + + /* Either the file may be missing, or we return an fd to the final object, but both make no sense */ + if ((flags & CHASE_NONEXISTENT) && ret_fd) + return -EINVAL; + + if ((flags & CHASE_STEP) && ret_fd) + return -EINVAL; + + if (isempty(path)) + return -EINVAL; + + /* This is a lot like canonicalize_file_name(), but takes an additional "root" parameter, that allows following + * symlinks relative to a root directory, instead of the root of the host. + * + * Note that "root" primarily matters if we encounter an absolute symlink. It is also used when following + * relative symlinks to ensure they cannot be used to "escape" the root directory. The path parameter passed is + * assumed to be already prefixed by it, except if the CHASE_PREFIX_ROOT flag is set, in which case it is first + * prefixed accordingly. + * + * Algorithmically this operates on two path buffers: "done" are the components of the path we already + * processed and resolved symlinks, "." and ".." of. "todo" are the components of the path we still need to + * process. On each iteration, we move one component from "todo" to "done", processing it's special meaning + * each time. The "todo" path always starts with at least one slash, the "done" path always ends in no + * slash. We always keep an O_PATH fd to the component we are currently processing, thus keeping lookup races + * to a minimum. + * + * Suggested usage: whenever you want to canonicalize a path, use this function. Pass the absolute path you got + * as-is: fully qualified and relative to your host's root. Optionally, specify the root parameter to tell this + * function what to do when encountering a symlink with an absolute path as directory: prefix it by the + * specified path. + * + * There are five ways to invoke this function: + * + * 1. Without CHASE_STEP or ret_fd: in this case the path is resolved and the normalized path is + * returned in `ret_path`. The return value is < 0 on error. If CHASE_NONEXISTENT is also set, 0 + * is returned if the file doesn't exist, > 0 otherwise. If CHASE_NONEXISTENT is not set, >= 0 is + * returned if the destination was found, -ENOENT if it wasn't. + * + * 2. With ret_fd: in this case the destination is opened after chasing it as O_PATH and this file + * descriptor is returned as return value. This is useful to open files relative to some root + * directory. Note that the returned O_PATH file descriptors must be converted into a regular one (using + * fd_reopen() or such) before it can be used for reading/writing. ret_fd may not be combined with + * CHASE_NONEXISTENT. + * + * 3. With CHASE_STEP: in this case only a single step of the normalization is executed, i.e. only the first + * symlink or ".." component of the path is resolved, and the resulting path is returned. This is useful if + * a caller wants to trace the path through the file system verbosely. Returns < 0 on error, > 0 if the + * path is fully normalized, and == 0 for each normalization step. This may be combined with + * CHASE_NONEXISTENT, in which case 1 is returned when a component is not found. + * + * 4. With CHASE_SAFE: in this case the path must not contain unsafe transitions, i.e. transitions from + * unprivileged to privileged files or directories. In such cases the return value is -ENOLINK. If + * CHASE_WARN is also set, a warning describing the unsafe transition is emitted. CHASE_WARN cannot + * be used in PID 1. + * + * 5. With CHASE_NO_AUTOFS: in this case if an autofs mount point is encountered, path normalization + * is aborted and -EREMOTE is returned. If CHASE_WARN is also set, a warning showing the path of + * the mount point is emitted. CHASE_WARN cannot be used in PID 1. + */ + + /* A root directory of "/" or "" is identical to none */ + if (empty_or_root(original_root)) + original_root = NULL; + + if (!original_root && !ret_path && !(flags & (CHASE_NONEXISTENT|CHASE_NO_AUTOFS|CHASE_SAFE|CHASE_STEP)) && ret_fd) { + /* Shortcut the ret_fd case if the caller isn't interested in the actual path and has no root set + * and doesn't care about any of the other special features we provide either. */ + r = open(path, O_PATH|O_CLOEXEC|((flags & CHASE_NOFOLLOW) ? O_NOFOLLOW : 0)); + if (r < 0) + return -errno; + + *ret_fd = r; + return 0; + } + + if (original_root) { + r = path_make_absolute_cwd(original_root, &root); + if (r < 0) + return r; + + /* Simplify the root directory, so that it has no duplicate slashes and nothing at the + * end. While we won't resolve the root path we still simplify it. Note that dropping the + * trailing slash should not change behaviour, since when opening it we specify O_DIRECTORY + * anyway. Moreover at the end of this function after processing everything we'll always turn + * the empty string back to "/". */ + delete_trailing_chars(root, "/"); + path_simplify(root); + + if (flags & CHASE_PREFIX_ROOT) { + buffer = path_join(root, path); + if (!buffer) + return -ENOMEM; + } + } + + if (!buffer) { + r = path_make_absolute_cwd(path, &buffer); + if (r < 0) + return r; + } + + fd = open(empty_to_root(root), O_CLOEXEC|O_DIRECTORY|O_PATH); + if (fd < 0) + return -errno; + + if (flags & CHASE_SAFE) + if (fstat(fd, &previous_stat) < 0) + return -errno; + + if (flags & CHASE_TRAIL_SLASH) + append_trail_slash = endswith(buffer, "/") || endswith(buffer, "/."); + + if (root) { + /* If we are operating on a root directory, let's take the root directory as it is. */ + + todo = path_startswith(buffer, root); + if (!todo) + return log_full_errno(flags & CHASE_WARN ? LOG_WARNING : LOG_DEBUG, + SYNTHETIC_ERRNO(ECHRNG), + "Specified path '%s' is outside of specified root directory '%s', refusing to resolve.", + path, root); + + done = strdup(root); + } else { + todo = buffer; + done = strdup("/"); + } + if (!done) + return -ENOMEM; + + for (;;) { + _cleanup_free_ char *first = NULL; + _cleanup_close_ int child = -1; + struct stat st; + const char *e; + + r = path_find_first_component(&todo, /* accept_dot_dot= */ true, &e); + if (r < 0) + return r; + if (r == 0) { /* We reached the end. */ + if (append_trail_slash) + if (!strextend(&done, "/")) + return -ENOMEM; + break; + } + + first = strndup(e, r); + if (!first) + return -ENOMEM; + + /* Two dots? Then chop off the last bit of what we already found out. */ + if (path_equal(first, "..")) { + _cleanup_free_ char *parent = NULL; + _cleanup_close_ int fd_parent = -1; + + /* If we already are at the top, then going up will not change anything. This is in-line with + * how the kernel handles this. */ + if (empty_or_root(done)) + continue; + + r = path_extract_directory(done, &parent); + if (r < 0) + return r; + + /* Don't allow this to leave the root dir. */ + if (root && + path_startswith(done, root) && + !path_startswith(parent, root)) + continue; + + free_and_replace(done, parent); + + if (flags & CHASE_STEP) + goto chased_one; + + fd_parent = openat(fd, "..", O_CLOEXEC|O_NOFOLLOW|O_PATH); + if (fd_parent < 0) + return -errno; + + if (flags & CHASE_SAFE) { + if (fstat(fd_parent, &st) < 0) + return -errno; + + if (unsafe_transition(&previous_stat, &st)) + return log_unsafe_transition(fd, fd_parent, path, flags); + + previous_stat = st; + } + + close_and_replace(fd, fd_parent); + + continue; + } + + /* Otherwise let's see what this is. */ + child = openat(fd, first, O_CLOEXEC|O_NOFOLLOW|O_PATH); + if (child < 0) { + if (errno == ENOENT && + (flags & CHASE_NONEXISTENT) && + (isempty(todo) || path_is_safe(todo))) { + /* If CHASE_NONEXISTENT is set, and the path does not exist, then + * that's OK, return what we got so far. But don't allow this if the + * remaining path contains "../" or something else weird. */ + + if (!path_extend(&done, first, todo)) + return -ENOMEM; + + exists = false; + break; + } + + return -errno; + } + + if (fstat(child, &st) < 0) + return -errno; + if ((flags & CHASE_SAFE) && + unsafe_transition(&previous_stat, &st)) + return log_unsafe_transition(fd, child, path, flags); + + previous_stat = st; + + if ((flags & CHASE_NO_AUTOFS) && + fd_is_fs_type(child, AUTOFS_SUPER_MAGIC) > 0) + return log_autofs_mount_point(child, path, flags); + + if (S_ISLNK(st.st_mode) && !((flags & CHASE_NOFOLLOW) && isempty(todo))) { + _cleanup_free_ char *destination = NULL; + + /* This is a symlink, in this case read the destination. But let's make sure we + * don't follow symlinks without bounds. */ + if (--max_follow <= 0) + return -ELOOP; + + r = readlinkat_malloc(fd, first, &destination); + if (r < 0) + return r; + if (isempty(destination)) + return -EINVAL; + + if (path_is_absolute(destination)) { + + /* An absolute destination. Start the loop from the beginning, but use the root + * directory as base. */ + + safe_close(fd); + fd = open(empty_to_root(root), O_CLOEXEC|O_DIRECTORY|O_PATH); + if (fd < 0) + return -errno; + + if (flags & CHASE_SAFE) { + if (fstat(fd, &st) < 0) + return -errno; + + if (unsafe_transition(&previous_stat, &st)) + return log_unsafe_transition(child, fd, path, flags); + + previous_stat = st; + } + + /* Note that we do not revalidate the root, we take it as is. */ + r = free_and_strdup(&done, empty_to_root(root)); + if (r < 0) + return r; + } + + /* Prefix what's left to do with what we just read, and start the loop again, but + * remain in the current directory. */ + if (!path_extend(&destination, todo)) + return -ENOMEM; + + free_and_replace(buffer, destination); + todo = buffer; + + if (flags & CHASE_STEP) + goto chased_one; + + continue; + } + + /* If this is not a symlink, then let's just add the name we read to what we already verified. */ + if (!path_extend(&done, first)) + return -ENOMEM; + + /* And iterate again, but go one directory further down. */ + close_and_replace(fd, child); + } + + if (ret_path) + *ret_path = TAKE_PTR(done); + + if (ret_fd) { + /* Return the O_PATH fd we currently are looking to the caller. It can translate it to a + * proper fd by opening /proc/self/fd/xyz. */ + + assert(fd >= 0); + *ret_fd = TAKE_FD(fd); + } + + if (flags & CHASE_STEP) + return 1; + + return exists; + +chased_one: + if (ret_path) { + const char *e; + + /* todo may contain slashes at the beginning. */ + r = path_find_first_component(&todo, /* accept_dot_dot= */ true, &e); + if (r < 0) + return r; + if (r == 0) + *ret_path = TAKE_PTR(done); + else { + char *c; + + c = path_join(done, e); + if (!c) + return -ENOMEM; + + *ret_path = c; + } + } + + return 0; +} + +int chase_symlinks_and_open( + const char *path, + const char *root, + ChaseSymlinksFlags chase_flags, + int open_flags, + char **ret_path) { + + _cleanup_close_ int path_fd = -1; + _cleanup_free_ char *p = NULL; + int r; + + if (chase_flags & (CHASE_NONEXISTENT|CHASE_STEP)) + return -EINVAL; + + if (empty_or_root(root) && !ret_path && (chase_flags & (CHASE_NO_AUTOFS|CHASE_SAFE)) == 0) { + /* Shortcut this call if none of the special features of this call are requested */ + r = open(path, open_flags | (FLAGS_SET(chase_flags, CHASE_NOFOLLOW) ? O_NOFOLLOW : 0)); + if (r < 0) + return -errno; + + return r; + } + + r = chase_symlinks(path, root, chase_flags, ret_path ? &p : NULL, &path_fd); + if (r < 0) + return r; + assert(path_fd >= 0); + + r = fd_reopen(path_fd, open_flags); + if (r < 0) + return r; + + if (ret_path) + *ret_path = TAKE_PTR(p); + + return r; +} + +int chase_symlinks_and_opendir( + const char *path, + const char *root, + ChaseSymlinksFlags chase_flags, + char **ret_path, + DIR **ret_dir) { + + _cleanup_close_ int path_fd = -1; + _cleanup_free_ char *p = NULL; + DIR *d; + int r; + + if (!ret_dir) + return -EINVAL; + if (chase_flags & (CHASE_NONEXISTENT|CHASE_STEP)) + return -EINVAL; + + if (empty_or_root(root) && !ret_path && (chase_flags & (CHASE_NO_AUTOFS|CHASE_SAFE)) == 0) { + /* Shortcut this call if none of the special features of this call are requested */ + d = opendir(path); + if (!d) + return -errno; + + *ret_dir = d; + return 0; + } + + r = chase_symlinks(path, root, chase_flags, ret_path ? &p : NULL, &path_fd); + if (r < 0) + return r; + assert(path_fd >= 0); + + d = xopendirat(path_fd, ".", O_NOFOLLOW); + if (!d) + return -errno; + + if (ret_path) + *ret_path = TAKE_PTR(p); + + *ret_dir = d; + return 0; +} + +int chase_symlinks_and_stat( + const char *path, + const char *root, + ChaseSymlinksFlags chase_flags, + char **ret_path, + struct stat *ret_stat, + int *ret_fd) { + + _cleanup_close_ int path_fd = -1; + _cleanup_free_ char *p = NULL; + int r; + + assert(path); + assert(ret_stat); + + if (chase_flags & (CHASE_NONEXISTENT|CHASE_STEP)) + return -EINVAL; + + if (empty_or_root(root) && !ret_path && (chase_flags & (CHASE_NO_AUTOFS|CHASE_SAFE)) == 0 && !ret_fd) { + /* Shortcut this call if none of the special features of this call are requested */ + + if (fstatat(AT_FDCWD, path, ret_stat, FLAGS_SET(chase_flags, CHASE_NOFOLLOW) ? AT_SYMLINK_NOFOLLOW : 0) < 0) + return -errno; + + return 1; + } + + r = chase_symlinks(path, root, chase_flags, ret_path ? &p : NULL, &path_fd); + if (r < 0) + return r; + assert(path_fd >= 0); + + if (fstat(path_fd, ret_stat) < 0) + return -errno; + + if (ret_path) + *ret_path = TAKE_PTR(p); + if (ret_fd) + *ret_fd = TAKE_FD(path_fd); + + return 1; +} + +int chase_symlinks_and_access( + const char *path, + const char *root, + ChaseSymlinksFlags chase_flags, + int access_mode, + char **ret_path, + int *ret_fd) { + + _cleanup_close_ int path_fd = -1; + _cleanup_free_ char *p = NULL; + int r; + + assert(path); + + if (chase_flags & (CHASE_NONEXISTENT|CHASE_STEP)) + return -EINVAL; + + if (empty_or_root(root) && !ret_path && (chase_flags & (CHASE_NO_AUTOFS|CHASE_SAFE)) == 0 && !ret_fd) { + /* Shortcut this call if none of the special features of this call are requested */ + + if (faccessat(AT_FDCWD, path, access_mode, FLAGS_SET(chase_flags, CHASE_NOFOLLOW) ? AT_SYMLINK_NOFOLLOW : 0) < 0) + return -errno; + + return 1; + } + + r = chase_symlinks(path, root, chase_flags, ret_path ? &p : NULL, &path_fd); + if (r < 0) + return r; + assert(path_fd >= 0); + + r = access_fd(path_fd, access_mode); + if (r < 0) + return r; + + if (ret_path) + *ret_path = TAKE_PTR(p); + if (ret_fd) + *ret_fd = TAKE_FD(path_fd); + + return 1; +} + +int chase_symlinks_and_fopen_unlocked( + const char *path, + const char *root, + ChaseSymlinksFlags chase_flags, + const char *open_flags, + char **ret_path, + FILE **ret_file) { + + _cleanup_free_ char *final_path = NULL; + _cleanup_close_ int fd = -1; + int mode_flags, r; + + assert(path); + assert(open_flags); + assert(ret_file); + + mode_flags = fopen_mode_to_flags(open_flags); + if (mode_flags < 0) + return mode_flags; + + fd = chase_symlinks_and_open(path, root, chase_flags, mode_flags, ret_path ? &final_path : NULL); + if (fd < 0) + return fd; + + r = take_fdopen_unlocked(&fd, open_flags, ret_file); + if (r < 0) + return r; + + if (ret_path) + *ret_path = TAKE_PTR(final_path); + + return 0; +} diff --git a/src/basic/chase-symlinks.h b/src/basic/chase-symlinks.h new file mode 100644 index 0000000..a9ee58f --- /dev/null +++ b/src/basic/chase-symlinks.h @@ -0,0 +1,33 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +#include +#include + +#include "stat-util.h" + +typedef enum ChaseSymlinksFlags { + CHASE_PREFIX_ROOT = 1 << 0, /* The specified path will be prefixed by the specified root before beginning the iteration */ + CHASE_NONEXISTENT = 1 << 1, /* It's OK if the path doesn't actually exist. */ + CHASE_NO_AUTOFS = 1 << 2, /* Return -EREMOTE if autofs mount point found */ + CHASE_SAFE = 1 << 3, /* Return -EPERM if we ever traverse from unprivileged to privileged files or directories */ + CHASE_TRAIL_SLASH = 1 << 4, /* Any trailing slash will be preserved */ + CHASE_STEP = 1 << 5, /* Just execute a single step of the normalization */ + CHASE_NOFOLLOW = 1 << 6, /* Do not follow the path's right-most component. With ret_fd, when the path's + * right-most component refers to symlink, return O_PATH fd of the symlink. */ + CHASE_WARN = 1 << 7, /* Emit an appropriate warning when an error is encountered. + * Note: this may do an NSS lookup, hence this flag cannot be used in PID 1. */ +} ChaseSymlinksFlags; + +bool unsafe_transition(const struct stat *a, const struct stat *b); + +/* How many iterations to execute before returning -ELOOP */ +#define CHASE_SYMLINKS_MAX 32 + +int chase_symlinks(const char *path_with_prefix, const char *root, ChaseSymlinksFlags chase_flags, char **ret_path, int *ret_fd); + +int chase_symlinks_and_open(const char *path, const char *root, ChaseSymlinksFlags chase_flags, int open_flags, char **ret_path); +int chase_symlinks_and_opendir(const char *path, const char *root, ChaseSymlinksFlags chase_flags, char **ret_path, DIR **ret_dir); +int chase_symlinks_and_stat(const char *path, const char *root, ChaseSymlinksFlags chase_flags, char **ret_path, struct stat *ret_stat, int *ret_fd); +int chase_symlinks_and_access(const char *path, const char *root, ChaseSymlinksFlags chase_flags, int access_mode, char **ret_path, int *ret_fd); +int chase_symlinks_and_fopen_unlocked(const char *path, const char *root, ChaseSymlinksFlags chase_flags, const char *open_flags, char **ret_path, FILE **ret_file); diff --git a/src/basic/chattr-util.c b/src/basic/chattr-util.c new file mode 100644 index 0000000..eddde13 --- /dev/null +++ b/src/basic/chattr-util.c @@ -0,0 +1,162 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include +#include +#include +#include +#include + +#include "chattr-util.h" +#include "errno-util.h" +#include "fd-util.h" +#include "macro.h" +#include "string-util.h" + +int chattr_full(const char *path, + int fd, + unsigned value, + unsigned mask, + unsigned *ret_previous, + unsigned *ret_final, + ChattrApplyFlags flags) { + + _cleanup_close_ int fd_will_close = -1; + unsigned old_attr, new_attr; + int set_flags_errno = 0; + struct stat st; + + assert(path || fd >= 0); + + if (fd < 0) { + fd = fd_will_close = open(path, O_RDONLY|O_CLOEXEC|O_NOCTTY|O_NOFOLLOW); + if (fd < 0) + return -errno; + } + + if (fstat(fd, &st) < 0) + return -errno; + + /* Explicitly check whether this is a regular file or directory. If it is anything else (such + * as a device node or fifo), then the ioctl will not hit the file systems but possibly + * drivers, where the ioctl might have different effects. Notably, DRM is using the same + * ioctl() number. */ + + if (!S_ISDIR(st.st_mode) && !S_ISREG(st.st_mode)) + return -ENOTTY; + + if (mask == 0 && !ret_previous && !ret_final) + return 0; + + if (ioctl(fd, FS_IOC_GETFLAGS, &old_attr) < 0) + return -errno; + + new_attr = (old_attr & ~mask) | (value & mask); + if (new_attr == old_attr) { + if (ret_previous) + *ret_previous = old_attr; + if (ret_final) + *ret_final = old_attr; + return 0; + } + + if (ioctl(fd, FS_IOC_SETFLAGS, &new_attr) >= 0) { + unsigned attr; + + /* Some filesystems (BTRFS) silently fail when a flag cannot be set. Let's make sure our + * changes actually went through by querying the flags again and verifying they're equal to + * the flags we tried to configure. */ + + if (ioctl(fd, FS_IOC_GETFLAGS, &attr) < 0) + return -errno; + + if (new_attr == attr) { + if (ret_previous) + *ret_previous = old_attr; + if (ret_final) + *ret_final = new_attr; + return 1; + } + + /* Trigger the fallback logic. */ + errno = EINVAL; + } + + if ((errno != EINVAL && !ERRNO_IS_NOT_SUPPORTED(errno)) || + !FLAGS_SET(flags, CHATTR_FALLBACK_BITWISE)) + return -errno; + + /* When -EINVAL is returned, we assume that incompatible attributes are simultaneously + * specified. E.g., compress(c) and nocow(C) attributes cannot be set to files on btrfs. + * As a fallback, let's try to set attributes one by one. + * + * Also, when we get EOPNOTSUPP (or a similar error code) we assume a flag might just not be + * supported, and we can ignore it too */ + + unsigned current_attr = old_attr; + for (unsigned i = 0; i < sizeof(unsigned) * 8; i++) { + unsigned new_one, mask_one = 1u << i; + + if (!FLAGS_SET(mask, mask_one)) + continue; + + new_one = UPDATE_FLAG(current_attr, mask_one, FLAGS_SET(value, mask_one)); + if (new_one == current_attr) + continue; + + if (ioctl(fd, FS_IOC_SETFLAGS, &new_one) < 0) { + if (errno != EINVAL && !ERRNO_IS_NOT_SUPPORTED(errno)) + return -errno; + + log_full_errno(FLAGS_SET(flags, CHATTR_WARN_UNSUPPORTED_FLAGS) ? LOG_WARNING : LOG_DEBUG, + errno, + "Unable to set file attribute 0x%x on %s, ignoring: %m", mask_one, strna(path)); + + /* Ensures that we record whether only EOPNOTSUPP&friends are encountered, or if a more serious + * error (thus worth logging at a different level, etc) was seen too. */ + if (set_flags_errno == 0 || !ERRNO_IS_NOT_SUPPORTED(errno)) + set_flags_errno = -errno; + + continue; + } + + if (ioctl(fd, FS_IOC_GETFLAGS, ¤t_attr) < 0) + return -errno; + } + + if (ret_previous) + *ret_previous = old_attr; + if (ret_final) + *ret_final = current_attr; + + /* -ENOANO indicates that some attributes cannot be set. ERRNO_IS_NOT_SUPPORTED indicates that all + * encountered failures were due to flags not supported by the FS, so return a specific error in + * that case, so callers can handle it properly (e.g.: tmpfiles.d can use debug level logging). */ + return current_attr == new_attr ? 1 : ERRNO_IS_NOT_SUPPORTED(set_flags_errno) ? set_flags_errno : -ENOANO; +} + +int read_attr_fd(int fd, unsigned *ret) { + struct stat st; + + assert(fd >= 0); + + if (fstat(fd, &st) < 0) + return -errno; + + if (!S_ISDIR(st.st_mode) && !S_ISREG(st.st_mode)) + return -ENOTTY; + + return RET_NERRNO(ioctl(fd, FS_IOC_GETFLAGS, ret)); +} + +int read_attr_path(const char *p, unsigned *ret) { + _cleanup_close_ int fd = -1; + + assert(p); + assert(ret); + + fd = open(p, O_RDONLY|O_CLOEXEC|O_NOCTTY|O_NOFOLLOW); + if (fd < 0) + return -errno; + + return read_attr_fd(fd, ret); +} diff --git a/src/basic/chattr-util.h b/src/basic/chattr-util.h new file mode 100644 index 0000000..82f91c6 --- /dev/null +++ b/src/basic/chattr-util.h @@ -0,0 +1,61 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +#include +#include +#include + +#include "missing_fs.h" + +/* The chattr() flags to apply when creating a new file *before* writing to it. In particular, flags such as + * FS_NOCOW_FL don't work if applied a-posteriori. All other flags are fine (or even necessary, think + * FS_IMMUTABLE_FL!) to apply after writing to the files. */ +#define CHATTR_EARLY_FL \ + (FS_NOATIME_FL | \ + FS_COMPR_FL | \ + FS_NOCOW_FL | \ + FS_NOCOMP_FL | \ + FS_PROJINHERIT_FL) + +#define CHATTR_ALL_FL \ + (FS_NOATIME_FL | \ + FS_SYNC_FL | \ + FS_DIRSYNC_FL | \ + FS_APPEND_FL | \ + FS_COMPR_FL | \ + FS_NODUMP_FL | \ + FS_EXTENT_FL | \ + FS_IMMUTABLE_FL | \ + FS_JOURNAL_DATA_FL | \ + FS_SECRM_FL | \ + FS_UNRM_FL | \ + FS_NOTAIL_FL | \ + FS_TOPDIR_FL | \ + FS_NOCOW_FL | \ + FS_PROJINHERIT_FL) + +typedef enum ChattrApplyFlags { + CHATTR_FALLBACK_BITWISE = 1 << 0, + CHATTR_WARN_UNSUPPORTED_FLAGS = 1 << 1, +} ChattrApplyFlags; + +int chattr_full(const char *path, int fd, unsigned value, unsigned mask, unsigned *ret_previous, unsigned *ret_final, ChattrApplyFlags flags); + +static inline int chattr_fd(int fd, unsigned value, unsigned mask, unsigned *previous) { + return chattr_full(NULL, fd, value, mask, previous, NULL, 0); +} +static inline int chattr_path(const char *path, unsigned value, unsigned mask, unsigned *previous) { + return chattr_full(path, -1, value, mask, previous, NULL, 0); +} + +int read_attr_fd(int fd, unsigned *ret); +int read_attr_path(const char *p, unsigned *ret); + +/* Combination of chattr flags, that should be appropriate for secrets stored on disk: Secure Remove + + * Exclusion from Dumping + Synchronous Writing (i.e. not caching in memory) + In-Place Updating (i.e. not + * spurious copies). */ +#define CHATTR_SECRET_FLAGS (FS_SECRM_FL|FS_NODUMP_FL|FS_SYNC_FL|FS_NOCOW_FL) + +static inline int chattr_secret(int fd, ChattrApplyFlags flags) { + return chattr_full(NULL, fd, CHATTR_SECRET_FLAGS, CHATTR_SECRET_FLAGS, NULL, NULL, flags|CHATTR_FALLBACK_BITWISE); +} diff --git a/src/basic/check-filesystems.sh b/src/basic/check-filesystems.sh new file mode 100755 index 0000000..696ef61 --- /dev/null +++ b/src/basic/check-filesystems.sh @@ -0,0 +1,36 @@ +#!/usr/bin/env bash +# SPDX-License-Identifier: LGPL-2.1-or-later +set -eu +set -o pipefail + +cpp="$1" +filesystems_gperf="$2" +shift 2 + +includes="" +for i in "$@"; do + includes="$includes -include $i" +done + +error=false + +# shellcheck disable=SC2086 +for fs in $($cpp -dM $includes - /dev/null; then + # STACK_END_MAGIC doesn't refer to a filesystem + # mtd_inode was removed in 2015 + # futexfs was removed in 2018 + if [[ "$fs" =~ ^(STACK_END_MAGIC|MTD_INODE_FS_MAGIC|FUTEXFS_SUPER_MAGIC)$ ]]; then + continue + fi + echo "Filesystem found in kernel header but not in $(basename "$filesystems_gperf"): $fs"; + error=true + fi +done + +if $error; then + exit 1 +fi diff --git a/src/basic/compress.c b/src/basic/compress.c new file mode 100644 index 0000000..1e94635 --- /dev/null +++ b/src/basic/compress.c @@ -0,0 +1,1079 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include +#include +#include +#include +#include +#include +#include + +#if HAVE_XZ +#include +#endif + +#if HAVE_LZ4 +#include +#include +#endif + +#if HAVE_ZSTD +#include +#include +#endif + +#include "alloc-util.h" +#include "compress.h" +#include "fd-util.h" +#include "fileio.h" +#include "io-util.h" +#include "macro.h" +#include "sparse-endian.h" +#include "string-table.h" +#include "string-util.h" +#include "unaligned.h" +#include "util.h" + +#if HAVE_LZ4 +DEFINE_TRIVIAL_CLEANUP_FUNC_FULL(LZ4F_compressionContext_t, LZ4F_freeCompressionContext, NULL); +DEFINE_TRIVIAL_CLEANUP_FUNC_FULL(LZ4F_decompressionContext_t, LZ4F_freeDecompressionContext, NULL); +#endif + +#if HAVE_ZSTD +DEFINE_TRIVIAL_CLEANUP_FUNC_FULL(ZSTD_CCtx*, ZSTD_freeCCtx, NULL); +DEFINE_TRIVIAL_CLEANUP_FUNC_FULL(ZSTD_DCtx*, ZSTD_freeDCtx, NULL); + +static int zstd_ret_to_errno(size_t ret) { + switch (ZSTD_getErrorCode(ret)) { + case ZSTD_error_dstSize_tooSmall: + return -ENOBUFS; + case ZSTD_error_memory_allocation: + return -ENOMEM; + default: + return -EBADMSG; + } +} +#endif + +#define ALIGN_8(l) ALIGN_TO(l, sizeof(size_t)) + +static const char* const compression_table[_COMPRESSION_MAX] = { + [COMPRESSION_NONE] = "NONE", + [COMPRESSION_XZ] = "XZ", + [COMPRESSION_LZ4] = "LZ4", + [COMPRESSION_ZSTD] = "ZSTD", +}; + +DEFINE_STRING_TABLE_LOOKUP(compression, Compression); + +int compress_blob_xz(const void *src, uint64_t src_size, + void *dst, size_t dst_alloc_size, size_t *dst_size) { +#if HAVE_XZ + static const lzma_options_lzma opt = { + 1u << 20u, NULL, 0, LZMA_LC_DEFAULT, LZMA_LP_DEFAULT, + LZMA_PB_DEFAULT, LZMA_MODE_FAST, 128, LZMA_MF_HC3, 4 + }; + static const lzma_filter filters[] = { + { LZMA_FILTER_LZMA2, (lzma_options_lzma*) &opt }, + { LZMA_VLI_UNKNOWN, NULL } + }; + lzma_ret ret; + size_t out_pos = 0; + + assert(src); + assert(src_size > 0); + assert(dst); + assert(dst_alloc_size > 0); + assert(dst_size); + + /* Returns < 0 if we couldn't compress the data or the + * compressed result is longer than the original */ + + if (src_size < 80) + return -ENOBUFS; + + ret = lzma_stream_buffer_encode((lzma_filter*) filters, LZMA_CHECK_NONE, NULL, + src, src_size, dst, &out_pos, dst_alloc_size); + if (ret != LZMA_OK) + return -ENOBUFS; + + *dst_size = out_pos; + return COMPRESSION_XZ; +#else + return -EPROTONOSUPPORT; +#endif +} + +int compress_blob_lz4(const void *src, uint64_t src_size, + void *dst, size_t dst_alloc_size, size_t *dst_size) { +#if HAVE_LZ4 + int r; + + assert(src); + assert(src_size > 0); + assert(dst); + assert(dst_alloc_size > 0); + assert(dst_size); + + /* Returns < 0 if we couldn't compress the data or the + * compressed result is longer than the original */ + + if (src_size < 9) + return -ENOBUFS; + + r = LZ4_compress_default(src, (char*)dst + 8, src_size, (int) dst_alloc_size - 8); + if (r <= 0) + return -ENOBUFS; + + unaligned_write_le64(dst, src_size); + *dst_size = r + 8; + + return COMPRESSION_LZ4; +#else + return -EPROTONOSUPPORT; +#endif +} + +int compress_blob_zstd( + const void *src, uint64_t src_size, + void *dst, size_t dst_alloc_size, size_t *dst_size) { +#if HAVE_ZSTD + size_t k; + + assert(src); + assert(src_size > 0); + assert(dst); + assert(dst_alloc_size > 0); + assert(dst_size); + + k = ZSTD_compress(dst, dst_alloc_size, src, src_size, 0); + if (ZSTD_isError(k)) + return zstd_ret_to_errno(k); + + *dst_size = k; + return COMPRESSION_ZSTD; +#else + return -EPROTONOSUPPORT; +#endif +} + +int decompress_blob_xz( + const void *src, + uint64_t src_size, + void **dst, + size_t* dst_size, + size_t dst_max) { + +#if HAVE_XZ + _cleanup_(lzma_end) lzma_stream s = LZMA_STREAM_INIT; + lzma_ret ret; + size_t space; + + assert(src); + assert(src_size > 0); + assert(dst); + assert(dst_size); + + ret = lzma_stream_decoder(&s, UINT64_MAX, 0); + if (ret != LZMA_OK) + return -ENOMEM; + + space = MIN(src_size * 2, dst_max ?: SIZE_MAX); + if (!greedy_realloc(dst, space, 1)) + return -ENOMEM; + + s.next_in = src; + s.avail_in = src_size; + + s.next_out = *dst; + s.avail_out = space; + + for (;;) { + size_t used; + + ret = lzma_code(&s, LZMA_FINISH); + + if (ret == LZMA_STREAM_END) + break; + else if (ret != LZMA_OK) + return -ENOMEM; + + if (dst_max > 0 && (space - s.avail_out) >= dst_max) + break; + else if (dst_max > 0 && space == dst_max) + return -ENOBUFS; + + used = space - s.avail_out; + space = MIN(2 * space, dst_max ?: SIZE_MAX); + if (!greedy_realloc(dst, space, 1)) + return -ENOMEM; + + s.avail_out = space - used; + s.next_out = *(uint8_t**)dst + used; + } + + *dst_size = space - s.avail_out; + return 0; +#else + return -EPROTONOSUPPORT; +#endif +} + +int decompress_blob_lz4( + const void *src, + uint64_t src_size, + void **dst, + size_t* dst_size, + size_t dst_max) { + +#if HAVE_LZ4 + char* out; + int r, size; /* LZ4 uses int for size */ + + assert(src); + assert(src_size > 0); + assert(dst); + assert(dst_size); + + if (src_size <= 8) + return -EBADMSG; + + size = unaligned_read_le64(src); + if (size < 0 || (unsigned) size != unaligned_read_le64(src)) + return -EFBIG; + out = greedy_realloc(dst, size, 1); + if (!out) + return -ENOMEM; + + r = LZ4_decompress_safe((char*)src + 8, out, src_size - 8, size); + if (r < 0 || r != size) + return -EBADMSG; + + *dst_size = size; + return 0; +#else + return -EPROTONOSUPPORT; +#endif +} + +int decompress_blob_zstd( + const void *src, + uint64_t src_size, + void **dst, + size_t *dst_size, + size_t dst_max) { + +#if HAVE_ZSTD + uint64_t size; + + assert(src); + assert(src_size > 0); + assert(dst); + assert(dst_size); + + size = ZSTD_getFrameContentSize(src, src_size); + if (IN_SET(size, ZSTD_CONTENTSIZE_ERROR, ZSTD_CONTENTSIZE_UNKNOWN)) + return -EBADMSG; + + if (dst_max > 0 && size > dst_max) + size = dst_max; + if (size > SIZE_MAX) + return -E2BIG; + + if (!(greedy_realloc(dst, MAX(ZSTD_DStreamOutSize(), size), 1))) + return -ENOMEM; + + _cleanup_(ZSTD_freeDCtxp) ZSTD_DCtx *dctx = ZSTD_createDCtx(); + if (!dctx) + return -ENOMEM; + + ZSTD_inBuffer input = { + .src = src, + .size = src_size, + }; + ZSTD_outBuffer output = { + .dst = *dst, + .size = MALLOC_SIZEOF_SAFE(*dst), + }; + + size_t k = ZSTD_decompressStream(dctx, &output, &input); + if (ZSTD_isError(k)) { + log_debug("ZSTD decoder failed: %s", ZSTD_getErrorName(k)); + return zstd_ret_to_errno(k); + } + assert(output.pos >= size); + + *dst_size = size; + return 0; +#else + return -EPROTONOSUPPORT; +#endif +} + +int decompress_blob( + Compression compression, + const void *src, + uint64_t src_size, + void **dst, + size_t* dst_size, + size_t dst_max) { + + if (compression == COMPRESSION_XZ) + return decompress_blob_xz( + src, src_size, + dst, dst_size, dst_max); + else if (compression == COMPRESSION_LZ4) + return decompress_blob_lz4( + src, src_size, + dst, dst_size, dst_max); + else if (compression == COMPRESSION_ZSTD) + return decompress_blob_zstd( + src, src_size, + dst, dst_size, dst_max); + else + return -EPROTONOSUPPORT; +} + +int decompress_startswith_xz( + const void *src, + uint64_t src_size, + void **buffer, + const void *prefix, + size_t prefix_len, + uint8_t extra) { + +#if HAVE_XZ + _cleanup_(lzma_end) lzma_stream s = LZMA_STREAM_INIT; + size_t allocated; + lzma_ret ret; + + /* Checks whether the decompressed blob starts with the mentioned prefix. The byte extra needs to + * follow the prefix */ + + assert(src); + assert(src_size > 0); + assert(buffer); + assert(prefix); + + ret = lzma_stream_decoder(&s, UINT64_MAX, 0); + if (ret != LZMA_OK) + return -EBADMSG; + + if (!(greedy_realloc(buffer, ALIGN_8(prefix_len + 1), 1))) + return -ENOMEM; + + allocated = MALLOC_SIZEOF_SAFE(*buffer); + + s.next_in = src; + s.avail_in = src_size; + + s.next_out = *buffer; + s.avail_out = allocated; + + for (;;) { + ret = lzma_code(&s, LZMA_FINISH); + + if (!IN_SET(ret, LZMA_OK, LZMA_STREAM_END)) + return -EBADMSG; + + if (allocated - s.avail_out >= prefix_len + 1) + return memcmp(*buffer, prefix, prefix_len) == 0 && + ((const uint8_t*) *buffer)[prefix_len] == extra; + + if (ret == LZMA_STREAM_END) + return 0; + + s.avail_out += allocated; + + if (!(greedy_realloc(buffer, allocated * 2, 1))) + return -ENOMEM; + + allocated = MALLOC_SIZEOF_SAFE(*buffer); + s.next_out = *(uint8_t**)buffer + allocated - s.avail_out; + } + +#else + return -EPROTONOSUPPORT; +#endif +} + +int decompress_startswith_lz4( + const void *src, + uint64_t src_size, + void **buffer, + const void *prefix, + size_t prefix_len, + uint8_t extra) { + +#if HAVE_LZ4 + /* Checks whether the decompressed blob starts with the mentioned prefix. The byte extra needs to + * follow the prefix */ + + size_t allocated; + int r; + + assert(src); + assert(src_size > 0); + assert(buffer); + assert(prefix); + + if (src_size <= 8) + return -EBADMSG; + + if (!(greedy_realloc(buffer, ALIGN_8(prefix_len + 1), 1))) + return -ENOMEM; + allocated = MALLOC_SIZEOF_SAFE(*buffer); + + r = LZ4_decompress_safe_partial( + (char*)src + 8, + *buffer, + src_size - 8, + prefix_len + 1, + allocated); + + /* One lz4 < 1.8.3, we might get "failure" (r < 0), or "success" where just a part of the buffer is + * decompressed. But if we get a smaller amount of bytes than requested, we don't know whether there + * isn't enough data to fill the requested size or whether we just got a partial answer. + */ + if (r < 0 || (size_t) r < prefix_len + 1) { + size_t size; + + if (LZ4_versionNumber() >= 10803) + /* We trust that the newer lz4 decompresses the number of bytes we + * requested if available in the compressed string. */ + return 0; + + if (r > 0) + /* Compare what we have first, in case of mismatch we can + * shortcut the full comparison. */ + if (memcmp(*buffer, prefix, r) != 0) + return 0; + + /* Before version 1.8.3, lz4 always tries to decode full a "sequence", + * so in pathological cases might need to decompress the full field. */ + r = decompress_blob_lz4(src, src_size, buffer, &size, 0); + if (r < 0) + return r; + + if (size < prefix_len + 1) + return 0; + } + + return memcmp(*buffer, prefix, prefix_len) == 0 && + ((const uint8_t*) *buffer)[prefix_len] == extra; +#else + return -EPROTONOSUPPORT; +#endif +} + +int decompress_startswith_zstd( + const void *src, + uint64_t src_size, + void **buffer, + const void *prefix, + size_t prefix_len, + uint8_t extra) { +#if HAVE_ZSTD + assert(src); + assert(src_size > 0); + assert(buffer); + assert(prefix); + + uint64_t size = ZSTD_getFrameContentSize(src, src_size); + if (IN_SET(size, ZSTD_CONTENTSIZE_ERROR, ZSTD_CONTENTSIZE_UNKNOWN)) + return -EBADMSG; + + if (size < prefix_len + 1) + return 0; /* Decompressed text too short to match the prefix and extra */ + + _cleanup_(ZSTD_freeDCtxp) ZSTD_DCtx *dctx = ZSTD_createDCtx(); + if (!dctx) + return -ENOMEM; + + if (!(greedy_realloc(buffer, MAX(ZSTD_DStreamOutSize(), prefix_len + 1), 1))) + return -ENOMEM; + + ZSTD_inBuffer input = { + .src = src, + .size = src_size, + }; + ZSTD_outBuffer output = { + .dst = *buffer, + .size = MALLOC_SIZEOF_SAFE(*buffer), + }; + size_t k; + + k = ZSTD_decompressStream(dctx, &output, &input); + if (ZSTD_isError(k)) { + log_debug("ZSTD decoder failed: %s", ZSTD_getErrorName(k)); + return zstd_ret_to_errno(k); + } + assert(output.pos >= prefix_len + 1); + + return memcmp(*buffer, prefix, prefix_len) == 0 && + ((const uint8_t*) *buffer)[prefix_len] == extra; +#else + return -EPROTONOSUPPORT; +#endif +} + +int decompress_startswith( + Compression compression, + const void *src, + uint64_t src_size, + void **buffer, + const void *prefix, + size_t prefix_len, + uint8_t extra) { + + if (compression == COMPRESSION_XZ) + return decompress_startswith_xz( + src, src_size, + buffer, + prefix, prefix_len, + extra); + + else if (compression == COMPRESSION_LZ4) + return decompress_startswith_lz4( + src, src_size, + buffer, + prefix, prefix_len, + extra); + else if (compression == COMPRESSION_ZSTD) + return decompress_startswith_zstd( + src, src_size, + buffer, + prefix, prefix_len, + extra); + else + return -EBADMSG; +} + +int compress_stream_xz(int fdf, int fdt, uint64_t max_bytes, uint64_t *ret_uncompressed_size) { +#if HAVE_XZ + _cleanup_(lzma_end) lzma_stream s = LZMA_STREAM_INIT; + lzma_ret ret; + uint8_t buf[BUFSIZ], out[BUFSIZ]; + lzma_action action = LZMA_RUN; + + assert(fdf >= 0); + assert(fdt >= 0); + + ret = lzma_easy_encoder(&s, LZMA_PRESET_DEFAULT, LZMA_CHECK_CRC64); + if (ret != LZMA_OK) + return log_error_errno(SYNTHETIC_ERRNO(EINVAL), + "Failed to initialize XZ encoder: code %u", + ret); + + for (;;) { + if (s.avail_in == 0 && action == LZMA_RUN) { + size_t m = sizeof(buf); + ssize_t n; + + if (max_bytes != UINT64_MAX && (uint64_t) m > max_bytes) + m = (size_t) max_bytes; + + n = read(fdf, buf, m); + if (n < 0) + return -errno; + if (n == 0) + action = LZMA_FINISH; + else { + s.next_in = buf; + s.avail_in = n; + + if (max_bytes != UINT64_MAX) { + assert(max_bytes >= (uint64_t) n); + max_bytes -= n; + } + } + } + + if (s.avail_out == 0) { + s.next_out = out; + s.avail_out = sizeof(out); + } + + ret = lzma_code(&s, action); + if (!IN_SET(ret, LZMA_OK, LZMA_STREAM_END)) + return log_error_errno(SYNTHETIC_ERRNO(EBADMSG), + "Compression failed: code %u", + ret); + + if (s.avail_out == 0 || ret == LZMA_STREAM_END) { + ssize_t n, k; + + n = sizeof(out) - s.avail_out; + + k = loop_write(fdt, out, n, false); + if (k < 0) + return k; + + if (ret == LZMA_STREAM_END) { + if (ret_uncompressed_size) + *ret_uncompressed_size = s.total_in; + + log_debug("XZ compression finished (%"PRIu64" -> %"PRIu64" bytes, %.1f%%)", + s.total_in, s.total_out, + (double) s.total_out / s.total_in * 100); + + return COMPRESSION_XZ; + } + } + } +#else + return -EPROTONOSUPPORT; +#endif +} + +#define LZ4_BUFSIZE (512*1024u) + +int compress_stream_lz4(int fdf, int fdt, uint64_t max_bytes, uint64_t *ret_uncompressed_size) { + +#if HAVE_LZ4 + LZ4F_errorCode_t c; + _cleanup_(LZ4F_freeCompressionContextp) LZ4F_compressionContext_t ctx = NULL; + _cleanup_free_ void *in_buff = NULL; + _cleanup_free_ char *out_buff = NULL; + size_t out_allocsize, n, offset = 0, frame_size; + uint64_t total_in = 0, total_out; + int r; + static const LZ4F_preferences_t preferences = { + .frameInfo.blockSizeID = 5, + }; + + c = LZ4F_createCompressionContext(&ctx, LZ4F_VERSION); + if (LZ4F_isError(c)) + return -ENOMEM; + + frame_size = LZ4F_compressBound(LZ4_BUFSIZE, &preferences); + out_allocsize = frame_size + 64*1024; /* add some space for header and trailer */ + out_buff = malloc(out_allocsize); + if (!out_buff) + return -ENOMEM; + + in_buff = malloc(LZ4_BUFSIZE); + if (!in_buff) + return -ENOMEM; + + n = offset = total_out = LZ4F_compressBegin(ctx, out_buff, out_allocsize, &preferences); + if (LZ4F_isError(n)) + return -EINVAL; + + log_debug("Buffer size is %zu bytes, header size %zu bytes.", out_allocsize, n); + + for (;;) { + ssize_t k; + + k = loop_read(fdf, in_buff, LZ4_BUFSIZE, true); + if (k < 0) + return k; + if (k == 0) + break; + n = LZ4F_compressUpdate(ctx, out_buff + offset, out_allocsize - offset, + in_buff, k, NULL); + if (LZ4F_isError(n)) + return -ENOTRECOVERABLE; + + total_in += k; + offset += n; + total_out += n; + + if (max_bytes != UINT64_MAX && total_out > (size_t) max_bytes) + return log_debug_errno(SYNTHETIC_ERRNO(EFBIG), + "Compressed stream longer than %" PRIu64 " bytes", max_bytes); + + if (out_allocsize - offset < frame_size + 4) { + k = loop_write(fdt, out_buff, offset, false); + if (k < 0) + return k; + offset = 0; + } + } + + n = LZ4F_compressEnd(ctx, out_buff + offset, out_allocsize - offset, NULL); + if (LZ4F_isError(n)) + return -ENOTRECOVERABLE; + + offset += n; + total_out += n; + r = loop_write(fdt, out_buff, offset, false); + if (r < 0) + return r; + + if (ret_uncompressed_size) + *ret_uncompressed_size = total_in; + + log_debug("LZ4 compression finished (%" PRIu64 " -> %" PRIu64 " bytes, %.1f%%)", + total_in, total_out, + (double) total_out / total_in * 100); + + return COMPRESSION_LZ4; +#else + return -EPROTONOSUPPORT; +#endif +} + +int decompress_stream_xz(int fdf, int fdt, uint64_t max_bytes) { + +#if HAVE_XZ + _cleanup_(lzma_end) lzma_stream s = LZMA_STREAM_INIT; + lzma_ret ret; + + uint8_t buf[BUFSIZ], out[BUFSIZ]; + lzma_action action = LZMA_RUN; + + assert(fdf >= 0); + assert(fdt >= 0); + + ret = lzma_stream_decoder(&s, UINT64_MAX, 0); + if (ret != LZMA_OK) + return log_debug_errno(SYNTHETIC_ERRNO(ENOMEM), + "Failed to initialize XZ decoder: code %u", + ret); + + for (;;) { + if (s.avail_in == 0 && action == LZMA_RUN) { + ssize_t n; + + n = read(fdf, buf, sizeof(buf)); + if (n < 0) + return -errno; + if (n == 0) + action = LZMA_FINISH; + else { + s.next_in = buf; + s.avail_in = n; + } + } + + if (s.avail_out == 0) { + s.next_out = out; + s.avail_out = sizeof(out); + } + + ret = lzma_code(&s, action); + if (!IN_SET(ret, LZMA_OK, LZMA_STREAM_END)) + return log_debug_errno(SYNTHETIC_ERRNO(EBADMSG), + "Decompression failed: code %u", + ret); + + if (s.avail_out == 0 || ret == LZMA_STREAM_END) { + ssize_t n, k; + + n = sizeof(out) - s.avail_out; + + if (max_bytes != UINT64_MAX) { + if (max_bytes < (uint64_t) n) + return -EFBIG; + + max_bytes -= n; + } + + k = loop_write(fdt, out, n, false); + if (k < 0) + return k; + + if (ret == LZMA_STREAM_END) { + log_debug("XZ decompression finished (%"PRIu64" -> %"PRIu64" bytes, %.1f%%)", + s.total_in, s.total_out, + (double) s.total_out / s.total_in * 100); + + return 0; + } + } + } +#else + return log_debug_errno(SYNTHETIC_ERRNO(EPROTONOSUPPORT), + "Cannot decompress file. Compiled without XZ support."); +#endif +} + +int decompress_stream_lz4(int in, int out, uint64_t max_bytes) { +#if HAVE_LZ4 + size_t c; + _cleanup_(LZ4F_freeDecompressionContextp) LZ4F_decompressionContext_t ctx = NULL; + _cleanup_free_ char *buf = NULL; + char *src; + struct stat st; + int r = 0; + size_t total_in = 0, total_out = 0; + + c = LZ4F_createDecompressionContext(&ctx, LZ4F_VERSION); + if (LZ4F_isError(c)) + return -ENOMEM; + + if (fstat(in, &st) < 0) + return log_debug_errno(errno, "fstat() failed: %m"); + + if (file_offset_beyond_memory_size(st.st_size)) + return -EFBIG; + + buf = malloc(LZ4_BUFSIZE); + if (!buf) + return -ENOMEM; + + src = mmap(NULL, st.st_size, PROT_READ, MAP_PRIVATE, in, 0); + if (src == MAP_FAILED) + return -errno; + + while (total_in < (size_t) st.st_size) { + size_t produced = LZ4_BUFSIZE; + size_t used = st.st_size - total_in; + + c = LZ4F_decompress(ctx, buf, &produced, src + total_in, &used, NULL); + if (LZ4F_isError(c)) { + r = -EBADMSG; + goto cleanup; + } + + total_in += used; + total_out += produced; + + if (max_bytes != UINT64_MAX && total_out > (size_t) max_bytes) { + log_debug("Decompressed stream longer than %"PRIu64" bytes", max_bytes); + r = -EFBIG; + goto cleanup; + } + + r = loop_write(out, buf, produced, false); + if (r < 0) + goto cleanup; + } + + log_debug("LZ4 decompression finished (%zu -> %zu bytes, %.1f%%)", + total_in, total_out, + total_in > 0 ? (double) total_out / total_in * 100 : 0.0); + cleanup: + munmap(src, st.st_size); + return r; +#else + return log_debug_errno(SYNTHETIC_ERRNO(EPROTONOSUPPORT), + "Cannot decompress file. Compiled without LZ4 support."); +#endif +} + +int compress_stream_zstd(int fdf, int fdt, uint64_t max_bytes, uint64_t *ret_uncompressed_size) { +#if HAVE_ZSTD + _cleanup_(ZSTD_freeCCtxp) ZSTD_CCtx *cctx = NULL; + _cleanup_free_ void *in_buff = NULL, *out_buff = NULL; + size_t in_allocsize, out_allocsize; + size_t z; + uint64_t left = max_bytes, in_bytes = 0; + + assert(fdf >= 0); + assert(fdt >= 0); + + /* Create the context and buffers */ + in_allocsize = ZSTD_CStreamInSize(); + out_allocsize = ZSTD_CStreamOutSize(); + in_buff = malloc(in_allocsize); + out_buff = malloc(out_allocsize); + cctx = ZSTD_createCCtx(); + if (!cctx || !out_buff || !in_buff) + return -ENOMEM; + + z = ZSTD_CCtx_setParameter(cctx, ZSTD_c_checksumFlag, 1); + if (ZSTD_isError(z)) + log_debug("Failed to enable ZSTD checksum, ignoring: %s", ZSTD_getErrorName(z)); + + /* This loop read from the input file, compresses that entire chunk, + * and writes all output produced to the output file. + */ + for (;;) { + bool is_last_chunk; + ZSTD_inBuffer input = { + .src = in_buff, + .size = 0, + .pos = 0 + }; + ssize_t red; + + red = loop_read(fdf, in_buff, in_allocsize, true); + if (red < 0) + return red; + is_last_chunk = red == 0; + + in_bytes += (size_t) red; + input.size = (size_t) red; + + for (bool finished = false; !finished;) { + ZSTD_outBuffer output = { + .dst = out_buff, + .size = out_allocsize, + .pos = 0 + }; + size_t remaining; + ssize_t wrote; + + /* Compress into the output buffer and write all of the + * output to the file so we can reuse the buffer next + * iteration. + */ + remaining = ZSTD_compressStream2( + cctx, &output, &input, + is_last_chunk ? ZSTD_e_end : ZSTD_e_continue); + + if (ZSTD_isError(remaining)) { + log_debug("ZSTD encoder failed: %s", ZSTD_getErrorName(remaining)); + return zstd_ret_to_errno(remaining); + } + + if (left < output.pos) + return -EFBIG; + + wrote = loop_write(fdt, output.dst, output.pos, 1); + if (wrote < 0) + return wrote; + + left -= output.pos; + + /* If we're on the last chunk we're finished when zstd + * returns 0, which means its consumed all the input AND + * finished the frame. Otherwise, we're finished when + * we've consumed all the input. + */ + finished = is_last_chunk ? (remaining == 0) : (input.pos == input.size); + } + + /* zstd only returns 0 when the input is completely consumed */ + assert(input.pos == input.size); + if (is_last_chunk) + break; + } + + if (ret_uncompressed_size) + *ret_uncompressed_size = in_bytes; + + if (in_bytes > 0) + log_debug("ZSTD compression finished (%" PRIu64 " -> %" PRIu64 " bytes, %.1f%%)", + in_bytes, max_bytes - left, (double) (max_bytes - left) / in_bytes * 100); + else + log_debug("ZSTD compression finished (%" PRIu64 " -> %" PRIu64 " bytes)", + in_bytes, max_bytes - left); + + return COMPRESSION_ZSTD; +#else + return -EPROTONOSUPPORT; +#endif +} + +int decompress_stream_zstd(int fdf, int fdt, uint64_t max_bytes) { +#if HAVE_ZSTD + _cleanup_(ZSTD_freeDCtxp) ZSTD_DCtx *dctx = NULL; + _cleanup_free_ void *in_buff = NULL, *out_buff = NULL; + size_t in_allocsize, out_allocsize; + size_t last_result = 0; + uint64_t left = max_bytes, in_bytes = 0; + + assert(fdf >= 0); + assert(fdt >= 0); + + /* Create the context and buffers */ + in_allocsize = ZSTD_DStreamInSize(); + out_allocsize = ZSTD_DStreamOutSize(); + in_buff = malloc(in_allocsize); + out_buff = malloc(out_allocsize); + dctx = ZSTD_createDCtx(); + if (!dctx || !out_buff || !in_buff) + return -ENOMEM; + + /* This loop assumes that the input file is one or more concatenated + * zstd streams. This example won't work if there is trailing non-zstd + * data at the end, but streaming decompression in general handles this + * case. ZSTD_decompressStream() returns 0 exactly when the frame is + * completed, and doesn't consume input after the frame. + */ + for (;;) { + bool has_error = false; + ZSTD_inBuffer input = { + .src = in_buff, + .size = 0, + .pos = 0 + }; + ssize_t red; + + red = loop_read(fdf, in_buff, in_allocsize, true); + if (red < 0) + return red; + if (red == 0) + break; + + in_bytes += (size_t) red; + input.size = (size_t) red; + input.pos = 0; + + /* Given a valid frame, zstd won't consume the last byte of the + * frame until it has flushed all of the decompressed data of + * the frame. So input.pos < input.size means frame is not done + * or there is still output available. + */ + while (input.pos < input.size) { + ZSTD_outBuffer output = { + .dst = out_buff, + .size = out_allocsize, + .pos = 0 + }; + ssize_t wrote; + /* The return code is zero if the frame is complete, but + * there may be multiple frames concatenated together. + * Zstd will automatically reset the context when a + * frame is complete. Still, calling ZSTD_DCtx_reset() + * can be useful to reset the context to a clean state, + * for instance if the last decompression call returned + * an error. + */ + last_result = ZSTD_decompressStream(dctx, &output, &input); + if (ZSTD_isError(last_result)) { + has_error = true; + break; + } + + if (left < output.pos) + return -EFBIG; + + wrote = loop_write(fdt, output.dst, output.pos, 1); + if (wrote < 0) + return wrote; + + left -= output.pos; + } + if (has_error) + break; + } + + if (in_bytes == 0) + return log_debug_errno(SYNTHETIC_ERRNO(EBADMSG), "ZSTD decoder failed: no data read"); + + if (last_result != 0) { + /* The last return value from ZSTD_decompressStream did not end + * on a frame, but we reached the end of the file! We assume + * this is an error, and the input was truncated. + */ + log_debug("ZSTD decoder failed: %s", ZSTD_getErrorName(last_result)); + return zstd_ret_to_errno(last_result); + } + + log_debug( + "ZSTD decompression finished (%" PRIu64 " -> %" PRIu64 " bytes, %.1f%%)", + in_bytes, + max_bytes - left, + (double) (max_bytes - left) / in_bytes * 100); + return 0; +#else + return log_debug_errno(SYNTHETIC_ERRNO(EPROTONOSUPPORT), + "Cannot decompress file. Compiled without ZSTD support."); +#endif +} + +int decompress_stream(const char *filename, int fdf, int fdt, uint64_t max_bytes) { + + if (endswith(filename, ".lz4")) + return decompress_stream_lz4(fdf, fdt, max_bytes); + else if (endswith(filename, ".xz")) + return decompress_stream_xz(fdf, fdt, max_bytes); + else if (endswith(filename, ".zst")) + return decompress_stream_zstd(fdf, fdt, max_bytes); + else + return -EPROTONOSUPPORT; +} diff --git a/src/basic/compress.h b/src/basic/compress.h new file mode 100644 index 0000000..583b105 --- /dev/null +++ b/src/basic/compress.h @@ -0,0 +1,112 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +#include +#include +#include + +typedef enum Compression { + COMPRESSION_NONE, + COMPRESSION_XZ, + COMPRESSION_LZ4, + COMPRESSION_ZSTD, + _COMPRESSION_MAX, + _COMPRESSION_INVALID = -EINVAL, +} Compression; + +const char* compression_to_string(Compression compression); +Compression compression_from_string(const char *compression); + +int compress_blob_xz(const void *src, uint64_t src_size, + void *dst, size_t dst_alloc_size, size_t *dst_size); +int compress_blob_lz4(const void *src, uint64_t src_size, + void *dst, size_t dst_alloc_size, size_t *dst_size); +int compress_blob_zstd(const void *src, uint64_t src_size, + void *dst, size_t dst_alloc_size, size_t *dst_size); + +int decompress_blob_xz(const void *src, uint64_t src_size, + void **dst, size_t* dst_size, size_t dst_max); +int decompress_blob_lz4(const void *src, uint64_t src_size, + void **dst, size_t* dst_size, size_t dst_max); +int decompress_blob_zstd(const void *src, uint64_t src_size, + void **dst, size_t* dst_size, size_t dst_max); +int decompress_blob(Compression compression, + const void *src, uint64_t src_size, + void **dst, size_t* dst_size, size_t dst_max); + +int decompress_startswith_xz(const void *src, uint64_t src_size, + void **buffer, + const void *prefix, size_t prefix_len, + uint8_t extra); +int decompress_startswith_lz4(const void *src, uint64_t src_size, + void **buffer, + const void *prefix, size_t prefix_len, + uint8_t extra); +int decompress_startswith_zstd(const void *src, uint64_t src_size, + void **buffer, + const void *prefix, size_t prefix_len, + uint8_t extra); +int decompress_startswith(Compression compression, + const void *src, uint64_t src_size, + void **buffer, + const void *prefix, size_t prefix_len, + uint8_t extra); + +int compress_stream_xz(int fdf, int fdt, uint64_t max_bytes, uint64_t *ret_uncompressed_size); +int compress_stream_lz4(int fdf, int fdt, uint64_t max_bytes, uint64_t *ret_uncompressed_size); +int compress_stream_zstd(int fdf, int fdt, uint64_t max_bytes, uint64_t *ret_uncompressed_size); + +int decompress_stream_xz(int fdf, int fdt, uint64_t max_size); +int decompress_stream_lz4(int fdf, int fdt, uint64_t max_size); +int decompress_stream_zstd(int fdf, int fdt, uint64_t max_size); + +static inline int compress_blob_explicit( + Compression compression, + const void *src, uint64_t src_size, + void *dst, size_t dst_alloc_size, size_t *dst_size) { + + switch (compression) { + case COMPRESSION_ZSTD: + return compress_blob_zstd(src, src_size, dst, dst_alloc_size, dst_size); + case COMPRESSION_LZ4: + return compress_blob_lz4(src, src_size, dst, dst_alloc_size, dst_size); + case COMPRESSION_XZ: + return compress_blob_xz(src, src_size, dst, dst_alloc_size, dst_size); + default: + return -EOPNOTSUPP; + } +} + +#define compress_blob(src, src_size, dst, dst_alloc_size, dst_size) \ + compress_blob_explicit( \ + DEFAULT_COMPRESSION, \ + src, src_size, \ + dst, dst_alloc_size, dst_size) + +static inline int compress_stream(int fdf, int fdt, uint64_t max_bytes, uint64_t *ret_uncompressed_size) { + switch (DEFAULT_COMPRESSION) { + case COMPRESSION_ZSTD: + return compress_stream_zstd(fdf, fdt, max_bytes, ret_uncompressed_size); + case COMPRESSION_LZ4: + return compress_stream_lz4(fdf, fdt, max_bytes, ret_uncompressed_size); + case COMPRESSION_XZ: + return compress_stream_xz(fdf, fdt, max_bytes, ret_uncompressed_size); + default: + return -EOPNOTSUPP; + } +} + +static inline const char* default_compression_extension(void) { + switch (DEFAULT_COMPRESSION) { + case COMPRESSION_ZSTD: + return ".zst"; + case COMPRESSION_LZ4: + return ".lz4"; + case COMPRESSION_XZ: + return ".xz"; + default: + return ""; + } +} + +int decompress_stream(const char *filename, int fdf, int fdt, uint64_t max_bytes); diff --git a/src/basic/conf-files.c b/src/basic/conf-files.c new file mode 100644 index 0000000..532c9d1 --- /dev/null +++ b/src/basic/conf-files.c @@ -0,0 +1,320 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include +#include +#include +#include + +#include "chase-symlinks.h" +#include "conf-files.h" +#include "def.h" +#include "dirent-util.h" +#include "fd-util.h" +#include "hashmap.h" +#include "log.h" +#include "macro.h" +#include "path-util.h" +#include "set.h" +#include "sort-util.h" +#include "stat-util.h" +#include "string-util.h" +#include "strv.h" +#include "terminal-util.h" + +static int files_add( + Hashmap *h, + Set *masked, + const char *suffix, + const char *root, + unsigned flags, + const char *path) { + + _cleanup_free_ char *dirpath = NULL; + _cleanup_closedir_ DIR *dir = NULL; + int r; + + assert(h); + assert((flags & CONF_FILES_FILTER_MASKED) == 0 || masked); + assert(path); + + r = chase_symlinks_and_opendir(path, root, CHASE_PREFIX_ROOT, &dirpath, &dir); + if (r == -ENOENT) + return 0; + if (r < 0) + return log_debug_errno(r, "Failed to open directory '%s/%s': %m", empty_or_root(root) ? "" : root, dirpath); + + FOREACH_DIRENT(de, dir, return -errno) { + struct stat st; + char *p, *key; + + /* Does this match the suffix? */ + if (suffix && !endswith(de->d_name, suffix)) + continue; + + /* Has this file already been found in an earlier directory? */ + if (hashmap_contains(h, de->d_name)) { + log_debug("Skipping overridden file '%s/%s'.", dirpath, de->d_name); + continue; + } + + /* Has this been masked in an earlier directory? */ + if ((flags & CONF_FILES_FILTER_MASKED) && set_contains(masked, de->d_name)) { + log_debug("File '%s/%s' is masked by previous entry.", dirpath, de->d_name); + continue; + } + + /* Read file metadata if we shall validate the check for file masks, for node types or whether the node is marked executable. */ + if (flags & (CONF_FILES_FILTER_MASKED|CONF_FILES_REGULAR|CONF_FILES_DIRECTORY|CONF_FILES_EXECUTABLE)) + if (fstatat(dirfd(dir), de->d_name, &st, 0) < 0) { + log_debug_errno(errno, "Failed to stat '%s/%s', ignoring: %m", dirpath, de->d_name); + continue; + } + + /* Is this a masking entry? */ + if ((flags & CONF_FILES_FILTER_MASKED)) + if (null_or_empty(&st)) { + assert(masked); + + /* Mark this one as masked */ + r = set_put_strdup(&masked, de->d_name); + if (r < 0) + return r; + + log_debug("File '%s/%s' is a mask.", dirpath, de->d_name); + continue; + } + + /* Does this node have the right type? */ + if (flags & (CONF_FILES_REGULAR|CONF_FILES_DIRECTORY)) + if (!((flags & CONF_FILES_DIRECTORY) && S_ISDIR(st.st_mode)) && + !((flags & CONF_FILES_REGULAR) && S_ISREG(st.st_mode))) { + log_debug("Ignoring '%s/%s', as it does not have the right type.", dirpath, de->d_name); + continue; + } + + /* Does this node have the executable bit set? */ + if (flags & CONF_FILES_EXECUTABLE) + /* As requested: check if the file is marked executable. Note that we don't check access(X_OK) + * here, as we care about whether the file is marked executable at all, and not whether it is + * executable for us, because if so, such errors are stuff we should log about. */ + + if ((st.st_mode & 0111) == 0) { /* not executable */ + log_debug("Ignoring '%s/%s', as it is not marked executable.", dirpath, de->d_name); + continue; + } + + if (flags & CONF_FILES_BASENAME) { + p = strdup(de->d_name); + if (!p) + return -ENOMEM; + + key = p; + } else { + p = path_join(dirpath, de->d_name); + if (!p) + return -ENOMEM; + + key = basename(p); + } + + r = hashmap_put(h, key, p); + if (r < 0) { + free(p); + return log_debug_errno(r, "Failed to add item to hashmap: %m"); + } + + assert(r > 0); + } + + return 0; +} + +static int base_cmp(char * const *a, char * const *b) { + return strcmp(basename(*a), basename(*b)); +} + +static int conf_files_list_strv_internal( + char ***ret, + const char *suffix, + const char *root, + unsigned flags, + char **dirs) { + + _cleanup_hashmap_free_ Hashmap *fh = NULL; + _cleanup_set_free_free_ Set *masked = NULL; + char **files; + int r; + + assert(ret); + + /* This alters the dirs string array */ + if (!path_strv_resolve_uniq(dirs, root)) + return -ENOMEM; + + fh = hashmap_new(&path_hash_ops); + if (!fh) + return -ENOMEM; + + if (flags & CONF_FILES_FILTER_MASKED) { + masked = set_new(&path_hash_ops); + if (!masked) + return -ENOMEM; + } + + STRV_FOREACH(p, dirs) { + r = files_add(fh, masked, suffix, root, flags, *p); + if (r == -ENOMEM) + return r; + if (r < 0) + log_debug_errno(r, "Failed to search for files in %s, ignoring: %m", *p); + } + + files = hashmap_get_strv(fh); + if (!files) + return -ENOMEM; + + typesafe_qsort(files, hashmap_size(fh), base_cmp); + *ret = files; + + return 0; +} + +int conf_files_insert(char ***strv, const char *root, char **dirs, const char *path) { + /* Insert a path into strv, at the place honouring the usual sorting rules: + * - we first compare by the basename + * - and then we compare by dirname, allowing just one file with the given + * basename. + * This means that we will + * - add a new entry if basename(path) was not on the list, + * - do nothing if an entry with higher priority was already present, + * - do nothing if our new entry matches the existing entry, + * - replace the existing entry if our new entry has higher priority. + */ + size_t i, n; + char *t; + int r; + + n = strv_length(*strv); + for (i = 0; i < n; i++) { + int c; + + c = base_cmp((char* const*) *strv + i, (char* const*) &path); + if (c == 0) + /* Oh, there already is an entry with a matching name (the last component). */ + STRV_FOREACH(dir, dirs) { + _cleanup_free_ char *rdir = NULL; + char *p1, *p2; + + rdir = path_join(root, *dir); + if (!rdir) + return -ENOMEM; + + p1 = path_startswith((*strv)[i], rdir); + if (p1) + /* Existing entry with higher priority + * or same priority, no need to do anything. */ + return 0; + + p2 = path_startswith(path, *dir); + if (p2) { + /* Our new entry has higher priority */ + + t = path_join(root, path); + if (!t) + return log_oom(); + + return free_and_replace((*strv)[i], t); + } + } + + else if (c > 0) + /* Following files have lower priority, let's go insert our + * new entry. */ + break; + + /* … we are not there yet, let's continue */ + } + + /* The new file has lower priority than all the existing entries */ + t = path_join(root, path); + if (!t) + return -ENOMEM; + + r = strv_insert(strv, i, t); + if (r < 0) + free(t); + + return r; +} + +int conf_files_list_strv(char ***ret, const char *suffix, const char *root, unsigned flags, const char* const* dirs) { + _cleanup_strv_free_ char **copy = NULL; + + assert(ret); + + copy = strv_copy((char**) dirs); + if (!copy) + return -ENOMEM; + + return conf_files_list_strv_internal(ret, suffix, root, flags, copy); +} + +int conf_files_list(char ***ret, const char *suffix, const char *root, unsigned flags, const char *dir) { + _cleanup_strv_free_ char **dirs = NULL; + + assert(ret); + + dirs = strv_new(dir); + if (!dirs) + return -ENOMEM; + + return conf_files_list_strv_internal(ret, suffix, root, flags, dirs); +} + +int conf_files_list_nulstr(char ***ret, const char *suffix, const char *root, unsigned flags, const char *dirs) { + _cleanup_strv_free_ char **d = NULL; + + assert(ret); + + d = strv_split_nulstr(dirs); + if (!d) + return -ENOMEM; + + return conf_files_list_strv_internal(ret, suffix, root, flags, d); +} + +int conf_files_list_with_replacement( + const char *root, + char **config_dirs, + const char *replacement, + char ***ret_files, + char **ret_replace_file) { + + _cleanup_strv_free_ char **f = NULL; + _cleanup_free_ char *p = NULL; + int r; + + assert(config_dirs); + assert(ret_files); + assert(ret_replace_file || !replacement); + + r = conf_files_list_strv(&f, ".conf", root, 0, (const char* const*) config_dirs); + if (r < 0) + return log_error_errno(r, "Failed to enumerate config files: %m"); + + if (replacement) { + r = conf_files_insert(&f, root, config_dirs, replacement); + if (r < 0) + return log_error_errno(r, "Failed to extend config file list: %m"); + + p = path_join(root, replacement); + if (!p) + return log_oom(); + } + + *ret_files = TAKE_PTR(f); + if (ret_replace_file) + *ret_replace_file = TAKE_PTR(p); + + return 0; +} diff --git a/src/basic/conf-files.h b/src/basic/conf-files.h new file mode 100644 index 0000000..7774ed7 --- /dev/null +++ b/src/basic/conf-files.h @@ -0,0 +1,23 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +#include "macro.h" + +enum { + CONF_FILES_EXECUTABLE = 1 << 0, + CONF_FILES_REGULAR = 1 << 1, + CONF_FILES_DIRECTORY = 1 << 2, + CONF_FILES_BASENAME = 1 << 3, + CONF_FILES_FILTER_MASKED = 1 << 4, +}; + +int conf_files_list(char ***ret, const char *suffix, const char *root, unsigned flags, const char *dir); +int conf_files_list_strv(char ***ret, const char *suffix, const char *root, unsigned flags, const char* const* dirs); +int conf_files_list_nulstr(char ***ret, const char *suffix, const char *root, unsigned flags, const char *dirs); +int conf_files_insert(char ***strv, const char *root, char **dirs, const char *path); +int conf_files_list_with_replacement( + const char *root, + char **config_dirs, + const char *replacement, + char ***files, + char **replace_file); diff --git a/src/basic/coverage.h b/src/basic/coverage.h new file mode 100644 index 0000000..5c30482 --- /dev/null +++ b/src/basic/coverage.h @@ -0,0 +1,66 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +/* Use the coverage-related tweaks below only for C stuff as they're not really + * C++ compatible, and the only thing that is built with a C++ compiler is + * the lone test-bus-vtable-cc unit test. + */ +#ifndef __cplusplus + +void __gcov_dump(void); +void __gcov_reset(void); + +/* When built with --coverage (gcov) we need to explicitly call __gcov_dump() + * in places where we use _exit(), since _exit() skips at-exit hooks resulting + * in lost coverage. + * + * To make sure we don't miss any _exit() calls, this header file is included + * explicitly on the compiler command line via the -include directive (only + * when built with -Db_coverage=true) + */ +void _exit(int); + +static inline _Noreturn void _coverage__exit(int status) { + __gcov_dump(); + _exit(status); +} +#define _exit(x) _coverage__exit(x) + +/* gcov provides wrappers for the exec*() calls but there's none for execveat() + * and execvpe() which means we lose all coverage prior to such call. To mitigate + * this, let's add simple wrappers in gcov's style[0] for these exec*() calls, + * which dump and reset the coverage data as needed. + * + * [0] https://gcc.gnu.org/git/?p=gcc.git;a=blob;f=libgcc/libgcov-interface.c;h=b2ee930864183b78c8826255183ca86e15e21ded;hb=HEAD + */ + +int execveat(int, const char *, char * const [], char * const [], int); +int execvpe(const char *, char * const [], char * const []); + +static inline int _coverage_execveat( + int dirfd, + const char *pathname, + char * const argv[], + char * const envp[], + int flags) { + __gcov_dump(); + int r = execveat(dirfd, pathname, argv, envp, flags); + __gcov_reset(); + + return r; +} +#define execveat(d,p,a,e,f) _coverage_execveat(d, p, a, e, f) + +static inline int _coverage_execvpe( + const char *file, + char * const argv[], + char * const envp[]) { + __gcov_dump(); + int r = execvpe(file, argv, envp); + __gcov_reset(); + + return r; +} +#define execvpe(f,a,e) _coverage_execvpe(f, a, e) + +#endif diff --git a/src/basic/def.h b/src/basic/def.h new file mode 100644 index 0000000..2b4de29 --- /dev/null +++ b/src/basic/def.h @@ -0,0 +1,82 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +#define DEFAULT_TIMEOUT_USEC (90*USEC_PER_SEC) +#define DEFAULT_RESTART_USEC (100*USEC_PER_MSEC) +#define DEFAULT_CONFIRM_USEC (30*USEC_PER_SEC) + +/* We use an extra-long timeout for the reload. This is because a reload or reexec means generators are rerun + * which are timed out after DEFAULT_TIMEOUT_USEC. Let's use twice that time here, so that the generators can + * have their timeout, and for everything else there's the same time budget in place. */ +#define DAEMON_RELOAD_TIMEOUT_SEC (DEFAULT_TIMEOUT_USEC * 2) + +#define DEFAULT_START_LIMIT_INTERVAL (10*USEC_PER_SEC) +#define DEFAULT_START_LIMIT_BURST 5 + +/* The default time after which exit-on-idle services exit. This + * should be kept lower than the watchdog timeout, because otherwise + * the watchdog pings will keep the loop busy. */ +#define DEFAULT_EXIT_USEC (30*USEC_PER_SEC) + +/* The default value for the net.unix.max_dgram_qlen sysctl */ +#define DEFAULT_UNIX_MAX_DGRAM_QLEN 512UL + +#define SIGNALS_CRASH_HANDLER SIGSEGV,SIGILL,SIGFPE,SIGBUS,SIGQUIT,SIGABRT +#define SIGNALS_IGNORE SIGPIPE + +#define NOTIFY_FD_MAX 768 +#define NOTIFY_BUFFER_MAX PIPE_BUF + +#if HAVE_SPLIT_USR +# define _CONF_PATHS_SPLIT_USR_NULSTR(n) "/lib/" n "\0" +# define _CONF_PATHS_SPLIT_USR(n) , "/lib/" n +#else +# define _CONF_PATHS_SPLIT_USR_NULSTR(n) +# define _CONF_PATHS_SPLIT_USR(n) +#endif + +/* Return a nulstr for a standard cascade of configuration paths, + * suitable to pass to conf_files_list_nulstr() or config_parse_many_nulstr() + * to implement drop-in directories for extending configuration + * files. */ +#define CONF_PATHS_NULSTR(n) \ + "/etc/" n "\0" \ + "/run/" n "\0" \ + "/usr/local/lib/" n "\0" \ + "/usr/lib/" n "\0" \ + _CONF_PATHS_SPLIT_USR_NULSTR(n) + +#define CONF_PATHS_USR(n) \ + "/etc/" n, \ + "/run/" n, \ + "/usr/local/lib/" n, \ + "/usr/lib/" n + +#define CONF_PATHS(n) \ + CONF_PATHS_USR(n) \ + _CONF_PATHS_SPLIT_USR(n) + +#define CONF_PATHS_USR_STRV(n) \ + STRV_MAKE(CONF_PATHS_USR(n)) + +#define CONF_PATHS_STRV(n) \ + STRV_MAKE(CONF_PATHS(n)) + +/* The limit for PID 1 itself (which is not inherited to children) */ +#define HIGH_RLIMIT_MEMLOCK (1024ULL*1024ULL*64ULL) + +/* Since kernel 5.16 the kernel default limit was raised to 8M. Let's adjust things on old kernels too, and + * in containers so that our children inherit that. */ +#define DEFAULT_RLIMIT_MEMLOCK (1024ULL*1024ULL*8ULL) + +#define PLYMOUTH_SOCKET { \ + .un.sun_family = AF_UNIX, \ + .un.sun_path = "\0/org/freedesktop/plymouthd", \ + } + +/* Path where PID1 listens for varlink subscriptions from systemd-oomd to notify of changes in ManagedOOM settings. */ +#define VARLINK_ADDR_PATH_MANAGED_OOM_SYSTEM "/run/systemd/io.system.ManagedOOM" +/* Path where systemd-oomd listens for varlink connections from user managers to report changes in ManagedOOM settings. */ +#define VARLINK_ADDR_PATH_MANAGED_OOM_USER "/run/systemd/oom/io.system.ManagedOOM" + +#define KERNEL_BASELINE_VERSION "4.15" diff --git a/src/basic/devnum-util.c b/src/basic/devnum-util.c new file mode 100644 index 0000000..bd1b4d6 --- /dev/null +++ b/src/basic/devnum-util.c @@ -0,0 +1,138 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include +#include + +#include "chase-symlinks.h" +#include "devnum-util.h" +#include "parse-util.h" +#include "path-util.h" +#include "string-util.h" + +int parse_devnum(const char *s, dev_t *ret) { + const char *major; + unsigned x, y; + size_t n; + int r; + + n = strspn(s, DIGITS); + if (n == 0) + return -EINVAL; + if (n > DECIMAL_STR_MAX(dev_t)) + return -EINVAL; + if (s[n] != ':') + return -EINVAL; + + major = strndupa_safe(s, n); + r = safe_atou(major, &x); + if (r < 0) + return r; + + r = safe_atou(s + n + 1, &y); + if (r < 0) + return r; + + if (!DEVICE_MAJOR_VALID(x) || !DEVICE_MINOR_VALID(y)) + return -ERANGE; + + *ret = makedev(x, y); + return 0; +} + +int device_path_make_major_minor(mode_t mode, dev_t devnum, char **ret) { + const char *t; + + /* Generates the /dev/{char|block}/MAJOR:MINOR path for a dev_t */ + + if (S_ISCHR(mode)) + t = "char"; + else if (S_ISBLK(mode)) + t = "block"; + else + return -ENODEV; + + if (asprintf(ret, "/dev/%s/" DEVNUM_FORMAT_STR, t, DEVNUM_FORMAT_VAL(devnum)) < 0) + return -ENOMEM; + + return 0; +} + +int device_path_make_inaccessible(mode_t mode, char **ret) { + char *s; + + assert(ret); + + if (S_ISCHR(mode)) + s = strdup("/run/systemd/inaccessible/chr"); + else if (S_ISBLK(mode)) + s = strdup("/run/systemd/inaccessible/blk"); + else + return -ENODEV; + if (!s) + return -ENOMEM; + + *ret = s; + return 0; +} + +int device_path_make_canonical(mode_t mode, dev_t devnum, char **ret) { + _cleanup_free_ char *p = NULL; + int r; + + /* Finds the canonical path for a device, i.e. resolves the /dev/{char|block}/MAJOR:MINOR path to the end. */ + + assert(ret); + + if (major(devnum) == 0 && minor(devnum) == 0) + /* A special hack to make sure our 'inaccessible' device nodes work. They won't have symlinks in + * /dev/block/ and /dev/char/, hence we handle them specially here. */ + return device_path_make_inaccessible(mode, ret); + + r = device_path_make_major_minor(mode, devnum, &p); + if (r < 0) + return r; + + return chase_symlinks(p, NULL, 0, ret, NULL); +} + +int device_path_parse_major_minor(const char *path, mode_t *ret_mode, dev_t *ret_devnum) { + mode_t mode; + dev_t devnum; + int r; + + /* Tries to extract the major/minor directly from the device path if we can. Handles /dev/block/ and /dev/char/ + * paths, as well out synthetic inaccessible device nodes. Never goes to disk. Returns -ENODEV if the device + * path cannot be parsed like this. */ + + if (path_equal(path, "/run/systemd/inaccessible/chr")) { + mode = S_IFCHR; + devnum = makedev(0, 0); + } else if (path_equal(path, "/run/systemd/inaccessible/blk")) { + mode = S_IFBLK; + devnum = makedev(0, 0); + } else { + const char *w; + + w = path_startswith(path, "/dev/block/"); + if (w) + mode = S_IFBLK; + else { + w = path_startswith(path, "/dev/char/"); + if (!w) + return -ENODEV; + + mode = S_IFCHR; + } + + r = parse_devnum(w, &devnum); + if (r < 0) + return r; + } + + if (ret_mode) + *ret_mode = mode; + if (ret_devnum) + *ret_devnum = devnum; + + return 0; +} diff --git a/src/basic/devnum-util.h b/src/basic/devnum-util.h new file mode 100644 index 0000000..38aa4ef --- /dev/null +++ b/src/basic/devnum-util.h @@ -0,0 +1,52 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +#include +#include +#include + +#include "stdio-util.h" + +int parse_devnum(const char *s, dev_t *ret); + +/* glibc and the Linux kernel have different ideas about the major/minor size. These calls will check whether the + * specified major is valid by the Linux kernel's standards, not by glibc's. Linux has 20bits of minor, and 12 bits of + * major space. See MINORBITS in linux/kdev_t.h in the kernel sources. (If you wonder why we define _y here, instead of + * comparing directly >= 0: it's to trick out -Wtype-limits, which would otherwise complain if the type is unsigned, as + * such a test would be pointless in such a case.) */ + +#define DEVICE_MAJOR_VALID(x) \ + ({ \ + typeof(x) _x = (x), _y = 0; \ + _x >= _y && _x < (UINT32_C(1) << 12); \ + \ + }) + +#define DEVICE_MINOR_VALID(x) \ + ({ \ + typeof(x) _x = (x), _y = 0; \ + _x >= _y && _x < (UINT32_C(1) << 20); \ + }) + +int device_path_make_major_minor(mode_t mode, dev_t devnum, char **ret); +int device_path_make_inaccessible(mode_t mode, char **ret); +int device_path_make_canonical(mode_t mode, dev_t devnum, char **ret); +int device_path_parse_major_minor(const char *path, mode_t *ret_mode, dev_t *ret_devnum); + +static inline bool devnum_set_and_equal(dev_t a, dev_t b) { + /* Returns true if a and b definitely refer to the same device. If either is zero, this means "don't + * know" and we'll return false */ + return a == b && a != 0; +} + +/* Maximum string length for a major:minor string. (Note that DECIMAL_STR_MAX includes space for a trailing NUL) */ +#define DEVNUM_STR_MAX (DECIMAL_STR_MAX(dev_t)-1+1+DECIMAL_STR_MAX(dev_t)) + +#define DEVNUM_FORMAT_STR "%u:%u" +#define DEVNUM_FORMAT_VAL(d) major(d), minor(d) + +static inline char *format_devnum(dev_t d, char buf[static DEVNUM_STR_MAX]) { + return ASSERT_PTR(snprintf_ok(buf, DEVNUM_STR_MAX, DEVNUM_FORMAT_STR, DEVNUM_FORMAT_VAL(d))); +} + +#define FORMAT_DEVNUM(d) format_devnum((d), (char[DEVNUM_STR_MAX]) {}) diff --git a/src/basic/dirent-util.c b/src/basic/dirent-util.c new file mode 100644 index 0000000..2eea228 --- /dev/null +++ b/src/basic/dirent-util.c @@ -0,0 +1,105 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include +#include + +#include "dirent-util.h" +#include "path-util.h" +#include "stat-util.h" +#include "string-util.h" + +static int dirent_ensure_type(DIR *d, struct dirent *de) { + STRUCT_STATX_DEFINE(sx); + int r; + + assert(d); + assert(de); + + if (de->d_type != DT_UNKNOWN) + return 0; + + if (dot_or_dot_dot(de->d_name)) { + de->d_type = DT_DIR; + return 0; + } + + /* Let's ask only for the type, nothing else. */ + r = statx_fallback(dirfd(d), de->d_name, AT_SYMLINK_NOFOLLOW|AT_NO_AUTOMOUNT, STATX_TYPE, &sx); + if (r < 0) + return r; + + assert(FLAGS_SET(sx.stx_mask, STATX_TYPE)); + de->d_type = IFTODT(sx.stx_mode); + + /* If the inode is passed too, update the field, i.e. report most recent data */ + if (FLAGS_SET(sx.stx_mask, STATX_INO)) + de->d_ino = sx.stx_ino; + + return 0; +} + +bool dirent_is_file(const struct dirent *de) { + assert(de); + + if (!IN_SET(de->d_type, DT_REG, DT_LNK, DT_UNKNOWN)) + return false; + + if (hidden_or_backup_file(de->d_name)) + return false; + + return true; +} + +bool dirent_is_file_with_suffix(const struct dirent *de, const char *suffix) { + assert(de); + + if (!IN_SET(de->d_type, DT_REG, DT_LNK, DT_UNKNOWN)) + return false; + + if (de->d_name[0] == '.') + return false; + + if (!suffix) + return true; + + return endswith(de->d_name, suffix); +} + +struct dirent *readdir_ensure_type(DIR *d) { + int r; + + assert(d); + + /* Like readdir(), but fills in .d_type if it is DT_UNKNOWN */ + + for (;;) { + struct dirent *de; + + errno = 0; + de = readdir(d); + if (!de) + return NULL; + + r = dirent_ensure_type(d, de); + if (r >= 0) + return de; + if (r != -ENOENT) { + errno = -r; /* We want to be compatible with readdir(), hence propagate error via errno here */ + return NULL; + } + + /* Vanished by now? Then skip immediately to next */ + } +} + +struct dirent *readdir_no_dot(DIR *d) { + assert(d); + + for (;;) { + struct dirent *de; + + de = readdir_ensure_type(d); + if (!de || !dot_or_dot_dot(de->d_name)) + return de; + } +} diff --git a/src/basic/dirent-util.h b/src/basic/dirent-util.h new file mode 100644 index 0000000..d2ac256 --- /dev/null +++ b/src/basic/dirent-util.h @@ -0,0 +1,61 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +#include +#include +#include + +#include "macro.h" +#include "path-util.h" + +bool dirent_is_file(const struct dirent *de) _pure_; +bool dirent_is_file_with_suffix(const struct dirent *de, const char *suffix) _pure_; + +struct dirent *readdir_ensure_type(DIR *d); +struct dirent *readdir_no_dot(DIR *dirp); + +#define FOREACH_DIRENT_ALL(de, d, on_error) \ + for (struct dirent *(de) = readdir_ensure_type(d);; (de) = readdir_ensure_type(d)) \ + if (!de) { \ + if (errno > 0) { \ + on_error; \ + } \ + break; \ + } else + +#define FOREACH_DIRENT(de, d, on_error) \ + FOREACH_DIRENT_ALL(de, d, on_error) \ + if (hidden_or_backup_file((de)->d_name)) \ + continue; \ + else + +/* Maximum space one dirent structure might require at most */ +#define DIRENT_SIZE_MAX CONST_MAX(sizeof(struct dirent), offsetof(struct dirent, d_name) + NAME_MAX + 1) + +/* Only if 64bit off_t is enabled struct dirent + struct dirent64 are actually the same. We require this, and + * we want them to be interchangeable to make getdents64() work, hence verify that. */ +assert_cc(_FILE_OFFSET_BITS == 64); +#if HAVE_STRUCT_DIRENT64 +assert_cc(sizeof(struct dirent) == sizeof(struct dirent64)); +assert_cc(offsetof(struct dirent, d_ino) == offsetof(struct dirent64, d_ino)); +assert_cc(sizeof_field(struct dirent, d_ino) == sizeof_field(struct dirent64, d_ino)); +assert_cc(offsetof(struct dirent, d_off) == offsetof(struct dirent64, d_off)); +assert_cc(sizeof_field(struct dirent, d_off) == sizeof_field(struct dirent64, d_off)); +assert_cc(offsetof(struct dirent, d_reclen) == offsetof(struct dirent64, d_reclen)); +assert_cc(sizeof_field(struct dirent, d_reclen) == sizeof_field(struct dirent64, d_reclen)); +assert_cc(offsetof(struct dirent, d_type) == offsetof(struct dirent64, d_type)); +assert_cc(sizeof_field(struct dirent, d_type) == sizeof_field(struct dirent64, d_type)); +assert_cc(offsetof(struct dirent, d_name) == offsetof(struct dirent64, d_name)); +assert_cc(sizeof_field(struct dirent, d_name) == sizeof_field(struct dirent64, d_name)); +#endif + +#define FOREACH_DIRENT_IN_BUFFER(de, buf, sz) \ + for (void *_end = (uint8_t*) ({ (de) = (buf); }) + (sz); \ + (uint8_t*) (de) < (uint8_t*) _end; \ + (de) = (struct dirent*) ((uint8_t*) (de) + (de)->d_reclen)) + +#define DEFINE_DIRENT_BUFFER(name, sz) \ + union { \ + struct dirent de; \ + uint8_t data[(sz) * DIRENT_SIZE_MAX]; \ + } name diff --git a/src/basic/dns-def.h b/src/basic/dns-def.h new file mode 100644 index 0000000..d70220b --- /dev/null +++ b/src/basic/dns-def.h @@ -0,0 +1,17 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +/* Length of a single label, with all escaping removed, excluding any trailing dot or NUL byte */ +#define DNS_LABEL_MAX 63 + +/* Worst case length of a single label, with all escaping applied and room for a trailing NUL byte. */ +#define DNS_LABEL_ESCAPED_MAX (DNS_LABEL_MAX*4+1) + +/* Maximum length of a full hostname, consisting of a series of unescaped labels, and no trailing dot or NUL byte */ +#define DNS_HOSTNAME_MAX 253 + +/* Maximum length of a full hostname, on the wire, including the final NUL byte */ +#define DNS_WIRE_FORMAT_HOSTNAME_MAX 255 + +/* Maximum number of labels per valid hostname */ +#define DNS_N_LABELS_MAX 127 diff --git a/src/basic/efivars.c b/src/basic/efivars.c new file mode 100644 index 0000000..847b6da --- /dev/null +++ b/src/basic/efivars.c @@ -0,0 +1,446 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include +#include +#include +#include +#include +#include +#include + +#include "sd-id128.h" + +#include "alloc-util.h" +#include "chattr-util.h" +#include "efivars.h" +#include "fd-util.h" +#include "fileio.h" +#include "io-util.h" +#include "macro.h" +#include "memory-util.h" +#include "stdio-util.h" +#include "strv.h" +#include "time-util.h" +#include "utf8.h" +#include "virt.h" + +#if ENABLE_EFI + +/* Reads from efivarfs sometimes fail with EINTR. Retry that many times. */ +#define EFI_N_RETRIES_NO_DELAY 20 +#define EFI_N_RETRIES_TOTAL 25 +#define EFI_RETRY_DELAY (50 * USEC_PER_MSEC) + +int efi_get_variable( + const char *variable, + uint32_t *ret_attribute, + void **ret_value, + size_t *ret_size) { + + _cleanup_close_ int fd = -1; + _cleanup_free_ void *buf = NULL; + struct stat st; + usec_t begin = 0; /* Unnecessary initialization to appease gcc */ + uint32_t a; + ssize_t n; + + assert(variable); + + const char *p = strjoina("/sys/firmware/efi/efivars/", variable); + + if (!ret_value && !ret_size && !ret_attribute) { + /* If caller is not interested in anything, just check if the variable exists and is + * readable. */ + if (access(p, R_OK) < 0) + return -errno; + + return 0; + } + + if (DEBUG_LOGGING) { + log_debug("Reading EFI variable %s.", p); + begin = now(CLOCK_MONOTONIC); + } + + fd = open(p, O_RDONLY|O_NOCTTY|O_CLOEXEC); + if (fd < 0) + return log_debug_errno(errno, "open(\"%s\") failed: %m", p); + + if (fstat(fd, &st) < 0) + return log_debug_errno(errno, "fstat(\"%s\") failed: %m", p); + if (st.st_size < 4) + return log_debug_errno(SYNTHETIC_ERRNO(ENODATA), "EFI variable %s is shorter than 4 bytes, refusing.", p); + if (st.st_size > 4*1024*1024 + 4) + return log_debug_errno(SYNTHETIC_ERRNO(E2BIG), "EFI variable %s is ridiculously large, refusing.", p); + + if (ret_value || ret_attribute) { + /* The kernel ratelimits reads from the efivarfs because EFI is inefficient, and we'll + * occasionally fail with EINTR here. A slowdown is better than a failure for us, so + * retry a few times and eventually fail with -EBUSY. + * + * See https://github.com/torvalds/linux/blob/master/fs/efivarfs/file.c#L75 + * and + * https://github.com/torvalds/linux/commit/bef3efbeb897b56867e271cdbc5f8adaacaeb9cd. + */ + for (unsigned try = 0;; try++) { + n = read(fd, &a, sizeof(a)); + if (n >= 0) + break; + log_debug_errno(errno, "Reading from \"%s\" failed: %m", p); + if (errno != EINTR) + return -errno; + if (try >= EFI_N_RETRIES_TOTAL) + return -EBUSY; + + if (try >= EFI_N_RETRIES_NO_DELAY) + (void) usleep(EFI_RETRY_DELAY); + } + + if (n != sizeof(a)) + return log_debug_errno(SYNTHETIC_ERRNO(EIO), + "Read %zi bytes from EFI variable %s, expected %zu.", n, p, sizeof(a)); + } + + if (ret_value) { + buf = malloc(st.st_size - 4 + 3); + if (!buf) + return -ENOMEM; + + n = read(fd, buf, (size_t) st.st_size - 4); + if (n < 0) + return log_debug_errno(errno, "Failed to read value of EFI variable %s: %m", p); + assert(n <= st.st_size - 4); + + /* Always NUL-terminate (3 bytes, to properly protect UTF-16, even if truncated in the middle + * of a character) */ + ((char*) buf)[n] = 0; + ((char*) buf)[n + 1] = 0; + ((char*) buf)[n + 2] = 0; + } else + /* Assume that the reported size is accurate */ + n = st.st_size - 4; + + if (DEBUG_LOGGING) { + usec_t end = now(CLOCK_MONOTONIC); + if (end > begin + EFI_RETRY_DELAY) + log_debug("Detected slow EFI variable read access on %s: %s", + variable, FORMAT_TIMESPAN(end - begin, 1)); + } + + /* Note that efivarfs interestingly doesn't require ftruncate() to update an existing EFI variable + * with a smaller value. */ + + if (ret_attribute) + *ret_attribute = a; + + if (ret_value) + *ret_value = TAKE_PTR(buf); + + if (ret_size) + *ret_size = n; + + return 0; +} + +int efi_get_variable_string(const char *variable, char **ret) { + _cleanup_free_ void *s = NULL; + size_t ss = 0; + int r; + char *x; + + r = efi_get_variable(variable, NULL, &s, &ss); + if (r < 0) + return r; + + x = utf16_to_utf8(s, ss); + if (!x) + return -ENOMEM; + + *ret = x; + return 0; +} + +static int efi_verify_variable(const char *variable, uint32_t attr, const void *value, size_t size) { + _cleanup_free_ void *buf = NULL; + size_t n; + uint32_t a; + int r; + + assert(variable); + assert(value || size == 0); + + r = efi_get_variable(variable, &a, &buf, &n); + if (r < 0) + return r; + + return a == attr && memcmp_nn(buf, n, value, size) == 0; +} + +int efi_set_variable(const char *variable, const void *value, size_t size) { + struct var { + uint32_t attr; + char buf[]; + } _packed_ * _cleanup_free_ buf = NULL; + _cleanup_close_ int fd = -1; + uint32_t attr = EFI_VARIABLE_NON_VOLATILE|EFI_VARIABLE_BOOTSERVICE_ACCESS|EFI_VARIABLE_RUNTIME_ACCESS; + bool saved_flags_valid = false; + unsigned saved_flags; + int r; + + assert(variable); + assert(value || size == 0); + + const char *p = strjoina("/sys/firmware/efi/efivars/", variable); + + /* size 0 means removal, empty variable would not be enough for that */ + if (size > 0 && efi_verify_variable(variable, attr, value, size) > 0) { + log_debug("Variable '%s' is already in wanted state, skipping write.", variable); + return 0; + } + + /* Newer efivarfs protects variables that are not in an allow list with FS_IMMUTABLE_FL by default, + * to protect them for accidental removal and modification. We are not changing these variables + * accidentally however, hence let's unset the bit first. */ + + r = chattr_path(p, 0, FS_IMMUTABLE_FL, &saved_flags); + if (r < 0 && r != -ENOENT) + log_debug_errno(r, "Failed to drop FS_IMMUTABLE_FL flag from '%s', ignoring: %m", p); + + saved_flags_valid = r >= 0; + + if (size == 0) { + if (unlink(p) < 0) { + r = -errno; + goto finish; + } + + return 0; + } + + fd = open(p, O_WRONLY|O_CREAT|O_NOCTTY|O_CLOEXEC, 0644); + if (fd < 0) { + r = -errno; + goto finish; + } + + buf = malloc(sizeof(uint32_t) + size); + if (!buf) { + r = -ENOMEM; + goto finish; + } + + buf->attr = attr; + memcpy(buf->buf, value, size); + + r = loop_write(fd, buf, sizeof(uint32_t) + size, false); + if (r < 0) + goto finish; + + /* For some reason efivarfs doesn't update mtime automatically. Let's do it manually then. This is + * useful for processes that cache EFI variables to detect when changes occurred. */ + if (futimens(fd, (struct timespec[2]) { + { .tv_nsec = UTIME_NOW }, + { .tv_nsec = UTIME_NOW } + }) < 0) + log_debug_errno(errno, "Failed to update mtime/atime on %s, ignoring: %m", p); + + r = 0; + +finish: + if (saved_flags_valid) { + int q; + + /* Restore the original flags field, just in case */ + if (fd < 0) + q = chattr_path(p, saved_flags, FS_IMMUTABLE_FL, NULL); + else + q = chattr_fd(fd, saved_flags, FS_IMMUTABLE_FL, NULL); + if (q < 0) + log_debug_errno(q, "Failed to restore FS_IMMUTABLE_FL on '%s', ignoring: %m", p); + } + + return r; +} + +int efi_set_variable_string(const char *variable, const char *value) { + _cleanup_free_ char16_t *u16 = NULL; + + u16 = utf8_to_utf16(value, strlen(value)); + if (!u16) + return -ENOMEM; + + return efi_set_variable(variable, u16, (char16_strlen(u16) + 1) * sizeof(char16_t)); +} + +bool is_efi_boot(void) { + static int cache = -1; + + if (cache < 0) { + if (detect_container() > 0) + cache = false; + else { + cache = access("/sys/firmware/efi/", F_OK) >= 0; + if (!cache && errno != ENOENT) + log_debug_errno(errno, "Unable to test whether /sys/firmware/efi/ exists, assuming EFI not available: %m"); + } + } + + return cache; +} + +static int read_flag(const char *variable) { + _cleanup_free_ void *v = NULL; + uint8_t b; + size_t s; + int r; + + if (!is_efi_boot()) /* If this is not an EFI boot, assume the queried flags are zero */ + return 0; + + r = efi_get_variable(variable, NULL, &v, &s); + if (r < 0) + return r; + + if (s != 1) + return -EINVAL; + + b = *(uint8_t *)v; + return !!b; +} + +bool is_efi_secure_boot(void) { + static int cache = -1; + int r; + + if (cache < 0) { + r = read_flag(EFI_GLOBAL_VARIABLE(SecureBoot)); + if (r == -ENOENT) + cache = false; + else if (r < 0) + log_debug_errno(r, "Error reading SecureBoot EFI variable, assuming not in SecureBoot mode: %m"); + else + cache = r; + } + + return cache > 0; +} + +SecureBootMode efi_get_secure_boot_mode(void) { + static SecureBootMode cache = _SECURE_BOOT_INVALID; + + if (cache != _SECURE_BOOT_INVALID) + return cache; + + int secure = read_flag(EFI_GLOBAL_VARIABLE(SecureBoot)); + if (secure < 0) { + if (secure != -ENOENT) + log_debug_errno(secure, "Error reading SecureBoot EFI variable, assuming not in SecureBoot mode: %m"); + + return (cache = SECURE_BOOT_UNSUPPORTED); + } + + /* We can assume false for all these if they are abscent (AuditMode and + * DeployedMode may not exist on older firmware). */ + int audit = read_flag(EFI_GLOBAL_VARIABLE(AuditMode)); + int deployed = read_flag(EFI_GLOBAL_VARIABLE(DeployedMode)); + int setup = read_flag(EFI_GLOBAL_VARIABLE(SetupMode)); + log_debug("Secure boot variables: SecureBoot=%d AuditMode=%d DeployedMode=%d SetupMode=%d", + secure, audit, deployed, setup); + + return (cache = decode_secure_boot_mode(secure, audit > 0, deployed > 0, setup > 0)); +} + +static int read_efi_options_variable(char **ret) { + int r; + + /* In SecureBoot mode this is probably not what you want. As your cmdline is cryptographically signed + * like when using Type #2 EFI Unified Kernel Images (https://systemd.io/BOOT_LOADER_SPECIFICATION) + * The user's intention is then that the cmdline should not be modified. You want to make sure that + * the system starts up as exactly specified in the signed artifact. + * + * (NB: For testing purposes, we still check the $SYSTEMD_EFI_OPTIONS env var before accessing this + * cache, even when in SecureBoot mode.) */ + if (is_efi_secure_boot()) { + /* Let's be helpful with the returned error and check if the variable exists at all. If it + * does, let's return a recognizable error (EPERM), and if not ENODATA. */ + + if (access(EFIVAR_PATH(EFI_SYSTEMD_VARIABLE(SystemdOptions)), F_OK) < 0) + return errno == ENOENT ? -ENODATA : -errno; + + return -EPERM; + } + + r = efi_get_variable_string(EFI_SYSTEMD_VARIABLE(SystemdOptions), ret); + if (r == -ENOENT) + return -ENODATA; + return r; +} + +int cache_efi_options_variable(void) { + _cleanup_free_ char *line = NULL; + int r; + + r = read_efi_options_variable(&line); + if (r < 0) + return r; + + return write_string_file(EFIVAR_CACHE_PATH(EFI_SYSTEMD_VARIABLE(SystemdOptions)), line, + WRITE_STRING_FILE_ATOMIC|WRITE_STRING_FILE_CREATE|WRITE_STRING_FILE_MKDIR_0755); +} + +int systemd_efi_options_variable(char **ret) { + const char *e; + int r; + + /* Returns the contents of the variable for current boot from the cache. */ + + assert(ret); + + /* For testing purposes it is sometimes useful to be able to override this */ + e = secure_getenv("SYSTEMD_EFI_OPTIONS"); + if (e) { + char *m; + + m = strdup(e); + if (!m) + return -ENOMEM; + + *ret = m; + return 0; + } + + r = read_one_line_file(EFIVAR_CACHE_PATH(EFI_SYSTEMD_VARIABLE(SystemdOptions)), ret); + if (r == -ENOENT) + return -ENODATA; + return r; +} + +static inline int compare_stat_mtime(const struct stat *a, const struct stat *b) { + return CMP(timespec_load(&a->st_mtim), timespec_load(&b->st_mtim)); +} + +int systemd_efi_options_efivarfs_if_newer(char **ret) { + struct stat a = {}, b; + int r; + + if (stat(EFIVAR_PATH(EFI_SYSTEMD_VARIABLE(SystemdOptions)), &a) < 0 && errno != ENOENT) + return log_debug_errno(errno, "Failed to stat EFI variable SystemdOptions: %m"); + + if (stat(EFIVAR_CACHE_PATH(EFI_SYSTEMD_VARIABLE(SystemdOptions)), &b) < 0) { + if (errno != ENOENT) + log_debug_errno(errno, "Failed to stat "EFIVAR_CACHE_PATH(EFI_SYSTEMD_VARIABLE(SystemdOptions))": %m"); + } else if (compare_stat_mtime(&a, &b) > 0) + log_debug("Variable SystemdOptions in evifarfs is newer than in cache."); + else { + log_debug("Variable SystemdOptions in cache is up to date."); + *ret = NULL; + return 0; + } + + r = read_efi_options_variable(ret); + if (r < 0) + return log_debug_errno(r, "Failed to read SystemdOptions EFI variable: %m"); + + return 0; +} +#endif diff --git a/src/basic/efivars.h b/src/basic/efivars.h new file mode 100644 index 0000000..bafe2d3 --- /dev/null +++ b/src/basic/efivars.h @@ -0,0 +1,100 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +#if !ENABLE_EFI +# include +#endif +#include +#include +#include + +#include "sd-id128.h" + +#include "efivars-fundamental.h" +#include "time-util.h" + +#define EFI_VENDOR_LOADER SD_ID128_MAKE(4a,67,b0,82,0a,4c,41,cf,b6,c7,44,0b,29,bb,8c,4f) +#define EFI_VENDOR_LOADER_STR SD_ID128_MAKE_UUID_STR(4a,67,b0,82,0a,4c,41,cf,b6,c7,44,0b,29,bb,8c,4f) +#define EFI_VENDOR_GLOBAL SD_ID128_MAKE(8b,e4,df,61,93,ca,11,d2,aa,0d,00,e0,98,03,2b,8c) +#define EFI_VENDOR_GLOBAL_STR SD_ID128_MAKE_UUID_STR(8b,e4,df,61,93,ca,11,d2,aa,0d,00,e0,98,03,2b,8c) +#define EFI_VENDOR_SYSTEMD SD_ID128_MAKE(8c,f2,64,4b,4b,0b,42,8f,93,87,6d,87,60,50,dc,67) +#define EFI_VENDOR_SYSTEMD_STR SD_ID128_MAKE_UUID_STR(8c,f2,64,4b,4b,0b,42,8f,93,87,6d,87,60,50,dc,67) + +#define EFI_VARIABLE_NON_VOLATILE UINT32_C(0x00000001) +#define EFI_VARIABLE_BOOTSERVICE_ACCESS UINT32_C(0x00000002) +#define EFI_VARIABLE_RUNTIME_ACCESS UINT32_C(0x00000004) + +/* Note that the - naming scheme is an efivarfs convention, i.e. part of the Linux + * API file system implementation for EFI. EFI itself processes UIDS in binary form. + */ + +#define EFI_VENDOR_VARIABLE_STR(vendor, name) name "-" vendor + +#define EFI_GLOBAL_VARIABLE_STR(name) EFI_VENDOR_VARIABLE_STR(EFI_VENDOR_GLOBAL_STR, name) +#define EFI_LOADER_VARIABLE_STR(name) EFI_VENDOR_VARIABLE_STR(EFI_VENDOR_LOADER_STR, name) +#define EFI_SYSTEMD_VARIABLE_STR(name) EFI_VENDOR_VARIABLE_STR(EFI_VENDOR_SYSTEMD_STR, name) + +#define EFI_GLOBAL_VARIABLE(name) EFI_GLOBAL_VARIABLE_STR(STRINGIFY(name)) +#define EFI_LOADER_VARIABLE(name) EFI_LOADER_VARIABLE_STR(STRINGIFY(name)) +#define EFI_SYSTEMD_VARIABLE(name) EFI_SYSTEMD_VARIABLE_STR(STRINGIFY(name)) + +#define EFIVAR_PATH(variable) "/sys/firmware/efi/efivars/" variable +#define EFIVAR_CACHE_PATH(variable) "/run/systemd/efivars/" variable + +#if ENABLE_EFI + +int efi_get_variable(const char *variable, uint32_t *attribute, void **ret_value, size_t *ret_size); +int efi_get_variable_string(const char *variable, char **ret); +int efi_set_variable(const char *variable, const void *value, size_t size); +int efi_set_variable_string(const char *variable, const char *p); + +bool is_efi_boot(void); +bool is_efi_secure_boot(void); +SecureBootMode efi_get_secure_boot_mode(void); + +int cache_efi_options_variable(void); +int systemd_efi_options_variable(char **ret); +int systemd_efi_options_efivarfs_if_newer(char **ret); + +#else + +static inline int efi_get_variable(const char *variable, uint32_t *attribute, void **value, size_t *size) { + return -EOPNOTSUPP; +} + +static inline int efi_get_variable_string(const char *variable, char **ret) { + return -EOPNOTSUPP; +} + +static inline int efi_set_variable(const char *variable, const void *value, size_t size) { + return -EOPNOTSUPP; +} + +static inline int efi_set_variable_string(const char *variable, const char *p) { + return -EOPNOTSUPP; +} + +static inline bool is_efi_boot(void) { + return false; +} + +static inline bool is_efi_secure_boot(void) { + return false; +} + +static inline SecureBootMode efi_get_secure_boot_mode(void) { + return SECURE_BOOT_UNKNOWN; +} + +static inline int cache_efi_options_variable(void) { + return -EOPNOTSUPP; +} + +static inline int systemd_efi_options_variable(char **line) { + return -ENODATA; +} + +static inline int systemd_efi_options_efivarfs_if_newer(char **line) { + return -ENODATA; +} +#endif diff --git a/src/basic/env-file.c b/src/basic/env-file.c new file mode 100644 index 0000000..e363bc8 --- /dev/null +++ b/src/basic/env-file.c @@ -0,0 +1,543 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include "alloc-util.h" +#include "env-file.h" +#include "env-util.h" +#include "escape.h" +#include "fd-util.h" +#include "fileio.h" +#include "fs-util.h" +#include "string-util.h" +#include "strv.h" +#include "tmpfile-util.h" +#include "utf8.h" + +static int parse_env_file_internal( + FILE *f, + const char *fname, + int (*push) (const char *filename, unsigned line, + const char *key, char *value, void *userdata), + void *userdata) { + + size_t n_key = 0, n_value = 0, last_value_whitespace = SIZE_MAX, last_key_whitespace = SIZE_MAX; + _cleanup_free_ char *contents = NULL, *key = NULL, *value = NULL; + unsigned line = 1; + int r; + + enum { + PRE_KEY, + KEY, + PRE_VALUE, + VALUE, + VALUE_ESCAPE, + SINGLE_QUOTE_VALUE, + DOUBLE_QUOTE_VALUE, + DOUBLE_QUOTE_VALUE_ESCAPE, + COMMENT, + COMMENT_ESCAPE + } state = PRE_KEY; + + if (f) + r = read_full_stream(f, &contents, NULL); + else + r = read_full_file(fname, &contents, NULL); + if (r < 0) + return r; + + for (char *p = contents; *p; p++) { + char c = *p; + + switch (state) { + + case PRE_KEY: + if (strchr(COMMENTS, c)) + state = COMMENT; + else if (!strchr(WHITESPACE, c)) { + state = KEY; + last_key_whitespace = SIZE_MAX; + + if (!GREEDY_REALLOC(key, n_key+2)) + return -ENOMEM; + + key[n_key++] = c; + } + break; + + case KEY: + if (strchr(NEWLINE, c)) { + state = PRE_KEY; + line++; + n_key = 0; + } else if (c == '=') { + state = PRE_VALUE; + last_value_whitespace = SIZE_MAX; + } else { + if (!strchr(WHITESPACE, c)) + last_key_whitespace = SIZE_MAX; + else if (last_key_whitespace == SIZE_MAX) + last_key_whitespace = n_key; + + if (!GREEDY_REALLOC(key, n_key+2)) + return -ENOMEM; + + key[n_key++] = c; + } + + break; + + case PRE_VALUE: + if (strchr(NEWLINE, c)) { + state = PRE_KEY; + line++; + key[n_key] = 0; + + if (value) + value[n_value] = 0; + + /* strip trailing whitespace from key */ + if (last_key_whitespace != SIZE_MAX) + key[last_key_whitespace] = 0; + + r = push(fname, line, key, value, userdata); + if (r < 0) + return r; + + n_key = 0; + value = NULL; + n_value = 0; + + } else if (c == '\'') + state = SINGLE_QUOTE_VALUE; + else if (c == '"') + state = DOUBLE_QUOTE_VALUE; + else if (c == '\\') + state = VALUE_ESCAPE; + else if (!strchr(WHITESPACE, c)) { + state = VALUE; + + if (!GREEDY_REALLOC(value, n_value+2)) + return -ENOMEM; + + value[n_value++] = c; + } + + break; + + case VALUE: + if (strchr(NEWLINE, c)) { + state = PRE_KEY; + line++; + + key[n_key] = 0; + + if (value) + value[n_value] = 0; + + /* Chomp off trailing whitespace from value */ + if (last_value_whitespace != SIZE_MAX) + value[last_value_whitespace] = 0; + + /* strip trailing whitespace from key */ + if (last_key_whitespace != SIZE_MAX) + key[last_key_whitespace] = 0; + + r = push(fname, line, key, value, userdata); + if (r < 0) + return r; + + n_key = 0; + value = NULL; + n_value = 0; + + } else if (c == '\\') { + state = VALUE_ESCAPE; + last_value_whitespace = SIZE_MAX; + } else { + if (!strchr(WHITESPACE, c)) + last_value_whitespace = SIZE_MAX; + else if (last_value_whitespace == SIZE_MAX) + last_value_whitespace = n_value; + + if (!GREEDY_REALLOC(value, n_value+2)) + return -ENOMEM; + + value[n_value++] = c; + } + + break; + + case VALUE_ESCAPE: + state = VALUE; + + if (!strchr(NEWLINE, c)) { + /* Escaped newlines we eat up entirely */ + if (!GREEDY_REALLOC(value, n_value+2)) + return -ENOMEM; + + value[n_value++] = c; + } + break; + + case SINGLE_QUOTE_VALUE: + if (c == '\'') + state = PRE_VALUE; + else { + if (!GREEDY_REALLOC(value, n_value+2)) + return -ENOMEM; + + value[n_value++] = c; + } + + break; + + case DOUBLE_QUOTE_VALUE: + if (c == '"') + state = PRE_VALUE; + else if (c == '\\') + state = DOUBLE_QUOTE_VALUE_ESCAPE; + else { + if (!GREEDY_REALLOC(value, n_value+2)) + return -ENOMEM; + + value[n_value++] = c; + } + + break; + + case DOUBLE_QUOTE_VALUE_ESCAPE: + state = DOUBLE_QUOTE_VALUE; + + if (strchr(SHELL_NEED_ESCAPE, c)) { + /* If this is a char that needs escaping, just unescape it. */ + if (!GREEDY_REALLOC(value, n_value+2)) + return -ENOMEM; + value[n_value++] = c; + } else if (c != '\n') { + /* If other char than what needs escaping, keep the "\" in place, like the + * real shell does. */ + if (!GREEDY_REALLOC(value, n_value+3)) + return -ENOMEM; + value[n_value++] = '\\'; + value[n_value++] = c; + } + + /* Escaped newlines (aka "continuation lines") are eaten up entirely */ + break; + + case COMMENT: + if (c == '\\') + state = COMMENT_ESCAPE; + else if (strchr(NEWLINE, c)) { + state = PRE_KEY; + line++; + } + break; + + case COMMENT_ESCAPE: + state = COMMENT; + break; + } + } + + if (IN_SET(state, + PRE_VALUE, + VALUE, + VALUE_ESCAPE, + SINGLE_QUOTE_VALUE, + DOUBLE_QUOTE_VALUE, + DOUBLE_QUOTE_VALUE_ESCAPE)) { + + key[n_key] = 0; + + if (value) + value[n_value] = 0; + + if (state == VALUE) + if (last_value_whitespace != SIZE_MAX) + value[last_value_whitespace] = 0; + + /* strip trailing whitespace from key */ + if (last_key_whitespace != SIZE_MAX) + key[last_key_whitespace] = 0; + + r = push(fname, line, key, value, userdata); + if (r < 0) + return r; + + value = NULL; + } + + return 0; +} + +static int check_utf8ness_and_warn( + const char *filename, unsigned line, + const char *key, char *value) { + + if (!utf8_is_valid(key)) { + _cleanup_free_ char *p = NULL; + + p = utf8_escape_invalid(key); + return log_error_errno(SYNTHETIC_ERRNO(EINVAL), + "%s:%u: invalid UTF-8 in key '%s', ignoring.", + strna(filename), line, p); + } + + if (value && !utf8_is_valid(value)) { + _cleanup_free_ char *p = NULL; + + p = utf8_escape_invalid(value); + return log_error_errno(SYNTHETIC_ERRNO(EINVAL), + "%s:%u: invalid UTF-8 value for key %s: '%s', ignoring.", + strna(filename), line, key, p); + } + + return 0; +} + +static int parse_env_file_push( + const char *filename, unsigned line, + const char *key, char *value, + void *userdata) { + + const char *k; + va_list aq, *ap = userdata; + int r; + + r = check_utf8ness_and_warn(filename, line, key, value); + if (r < 0) + return r; + + va_copy(aq, *ap); + + while ((k = va_arg(aq, const char *))) { + char **v; + + v = va_arg(aq, char **); + + if (streq(key, k)) { + va_end(aq); + free(*v); + *v = value; + + return 1; + } + } + + va_end(aq); + free(value); + + return 0; +} + +int parse_env_filev( + FILE *f, + const char *fname, + va_list ap) { + + int r; + va_list aq; + + va_copy(aq, ap); + r = parse_env_file_internal(f, fname, parse_env_file_push, &aq); + va_end(aq); + return r; +} + +int parse_env_file_sentinel( + FILE *f, + const char *fname, + ...) { + + va_list ap; + int r; + + va_start(ap, fname); + r = parse_env_filev(f, fname, ap); + va_end(ap); + + return r; +} + +static int load_env_file_push( + const char *filename, unsigned line, + const char *key, char *value, + void *userdata) { + char ***m = userdata; + char *p; + int r; + + r = check_utf8ness_and_warn(filename, line, key, value); + if (r < 0) + return r; + + p = strjoin(key, "=", value); + if (!p) + return -ENOMEM; + + r = strv_env_replace_consume(m, p); + if (r < 0) + return r; + + free(value); + return 0; +} + +int load_env_file(FILE *f, const char *fname, char ***rl) { + _cleanup_strv_free_ char **m = NULL; + int r; + + r = parse_env_file_internal(f, fname, load_env_file_push, &m); + if (r < 0) + return r; + + *rl = TAKE_PTR(m); + return 0; +} + +static int load_env_file_push_pairs( + const char *filename, unsigned line, + const char *key, char *value, + void *userdata) { + + char ***m = ASSERT_PTR(userdata); + int r; + + r = check_utf8ness_and_warn(filename, line, key, value); + if (r < 0) + return r; + + /* Check if the key is present */ + for (char **t = *m; t && *t; t += 2) + if (streq(t[0], key)) { + if (value) + return free_and_replace(t[1], value); + else + return free_and_strdup(t+1, ""); + } + + r = strv_extend(m, key); + if (r < 0) + return r; + + if (value) + return strv_push(m, value); + else + return strv_extend(m, ""); +} + +int load_env_file_pairs(FILE *f, const char *fname, char ***rl) { + _cleanup_strv_free_ char **m = NULL; + int r; + + r = parse_env_file_internal(f, fname, load_env_file_push_pairs, &m); + if (r < 0) + return r; + + *rl = TAKE_PTR(m); + return 0; +} + +static int merge_env_file_push( + const char *filename, unsigned line, + const char *key, char *value, + void *userdata) { + + char ***env = ASSERT_PTR(userdata); + char *expanded_value; + + if (!value) { + log_error("%s:%u: invalid syntax (around \"%s\"), ignoring.", strna(filename), line, key); + return 0; + } + + if (!env_name_is_valid(key)) { + log_error("%s:%u: invalid variable name \"%s\", ignoring.", strna(filename), line, key); + free(value); + return 0; + } + + expanded_value = replace_env(value, *env, + REPLACE_ENV_USE_ENVIRONMENT| + REPLACE_ENV_ALLOW_BRACELESS| + REPLACE_ENV_ALLOW_EXTENDED); + if (!expanded_value) + return -ENOMEM; + + free_and_replace(value, expanded_value); + + log_debug("%s:%u: setting %s=%s", filename, line, key, value); + + return load_env_file_push(filename, line, key, value, env); +} + +int merge_env_file( + char ***env, + FILE *f, + const char *fname) { + + /* NOTE: this function supports braceful and braceless variable expansions, + * plus "extended" substitutions, unlike other exported parsing functions. + */ + + return parse_env_file_internal(f, fname, merge_env_file_push, env); +} + +static void write_env_var(FILE *f, const char *v) { + const char *p; + + p = strchr(v, '='); + if (!p) { + /* Fallback */ + fputs_unlocked(v, f); + fputc_unlocked('\n', f); + return; + } + + p++; + fwrite_unlocked(v, 1, p-v, f); + + if (string_has_cc(p, NULL) || chars_intersect(p, WHITESPACE SHELL_NEED_QUOTES)) { + fputc_unlocked('"', f); + + for (; *p; p++) { + if (strchr(SHELL_NEED_ESCAPE, *p)) + fputc_unlocked('\\', f); + + fputc_unlocked(*p, f); + } + + fputc_unlocked('"', f); + } else + fputs_unlocked(p, f); + + fputc_unlocked('\n', f); +} + +int write_env_file(const char *fname, char **l) { + _cleanup_fclose_ FILE *f = NULL; + _cleanup_free_ char *p = NULL; + int r; + + assert(fname); + + r = fopen_temporary(fname, &f, &p); + if (r < 0) + return r; + + (void) fchmod_umask(fileno(f), 0644); + + STRV_FOREACH(i, l) + write_env_var(f, *i); + + r = fflush_and_check(f); + if (r >= 0) { + if (rename(p, fname) >= 0) + return 0; + + r = -errno; + } + + (void) unlink(p); + return r; +} diff --git a/src/basic/env-file.h b/src/basic/env-file.h new file mode 100644 index 0000000..de47588 --- /dev/null +++ b/src/basic/env-file.h @@ -0,0 +1,17 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +#include +#include + +#include "macro.h" + +int parse_env_filev(FILE *f, const char *fname, va_list ap); +int parse_env_file_sentinel(FILE *f, const char *fname, ...) _sentinel_; +#define parse_env_file(f, fname, ...) parse_env_file_sentinel(f, fname, __VA_ARGS__, NULL) +int load_env_file(FILE *f, const char *fname, char ***l); +int load_env_file_pairs(FILE *f, const char *fname, char ***l); + +int merge_env_file(char ***env, FILE *f, const char *fname); + +int write_env_file(const char *fname, char **l); diff --git a/src/basic/env-util.c b/src/basic/env-util.c new file mode 100644 index 0000000..55ac11a --- /dev/null +++ b/src/basic/env-util.c @@ -0,0 +1,904 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include +#include +#include +#include +#include + +#include "alloc-util.h" +#include "env-util.h" +#include "errno-util.h" +#include "escape.h" +#include "extract-word.h" +#include "macro.h" +#include "parse-util.h" +#include "path-util.h" +#include "process-util.h" +#include "stdio-util.h" +#include "string-util.h" +#include "strv.h" +#include "utf8.h" + +/* We follow bash for the character set. Different shells have different rules. */ +#define VALID_BASH_ENV_NAME_CHARS \ + DIGITS LETTERS \ + "_" + +static bool env_name_is_valid_n(const char *e, size_t n) { + if (!e) + return false; + + if (n <= 0) + return false; + + if (ascii_isdigit(e[0])) + return false; + + /* POSIX says the overall size of the environment block cannot + * be > ARG_MAX, an individual assignment hence cannot be + * either. Discounting the equal sign and trailing NUL this + * hence leaves ARG_MAX-2 as longest possible variable + * name. */ + if (n > (size_t) sysconf(_SC_ARG_MAX) - 2) + return false; + + for (const char *p = e; p < e + n; p++) + if (!strchr(VALID_BASH_ENV_NAME_CHARS, *p)) + return false; + + return true; +} + +bool env_name_is_valid(const char *e) { + return env_name_is_valid_n(e, strlen_ptr(e)); +} + +bool env_value_is_valid(const char *e) { + if (!e) + return false; + + if (!utf8_is_valid(e)) + return false; + + /* Note that variable *values* may contain control characters, in particular NL, TAB, BS, DEL, ESC… + * When printing those variables with show-environment, we'll escape them. Make sure to print + * environment variables carefully! */ + + /* POSIX says the overall size of the environment block cannot be > ARG_MAX, an individual assignment + * hence cannot be either. Discounting the shortest possible variable name of length 1, the equal + * sign and trailing NUL this hence leaves ARG_MAX-3 as longest possible variable value. */ + if (strlen(e) > sc_arg_max() - 3) + return false; + + return true; +} + +bool env_assignment_is_valid(const char *e) { + const char *eq; + + eq = strchr(e, '='); + if (!eq) + return false; + + if (!env_name_is_valid_n(e, eq - e)) + return false; + + if (!env_value_is_valid(eq + 1)) + return false; + + /* POSIX says the overall size of the environment block cannot be > ARG_MAX, hence the individual + * variable assignments cannot be either, but let's leave room for one trailing NUL byte. */ + if (strlen(e) > sc_arg_max() - 1) + return false; + + return true; +} + +bool strv_env_is_valid(char **e) { + STRV_FOREACH(p, e) { + size_t k; + + if (!env_assignment_is_valid(*p)) + return false; + + /* Check if there are duplicate assignments */ + k = strcspn(*p, "="); + STRV_FOREACH(q, p + 1) + if (strneq(*p, *q, k) && (*q)[k] == '=') + return false; + } + + return true; +} + +bool strv_env_name_is_valid(char **l) { + STRV_FOREACH(p, l) { + if (!env_name_is_valid(*p)) + return false; + + if (strv_contains(p + 1, *p)) + return false; + } + + return true; +} + +bool strv_env_name_or_assignment_is_valid(char **l) { + STRV_FOREACH(p, l) { + if (!env_assignment_is_valid(*p) && !env_name_is_valid(*p)) + return false; + + if (strv_contains(p + 1, *p)) + return false; + } + + return true; +} + +static int env_append(char **r, char ***k, char **a) { + assert(r); + assert(k); + assert(*k >= r); + + if (!a) + return 0; + + /* Expects the following arguments: 'r' shall point to the beginning of an strv we are going to append to, 'k' + * to a pointer pointing to the NULL entry at the end of the same array. 'a' shall point to another strv. + * + * This call adds every entry of 'a' to 'r', either overriding an existing matching entry, or appending to it. + * + * This call assumes 'r' has enough pre-allocated space to grow by all of 'a''s items. */ + + for (; *a; a++) { + char **j, *c; + size_t n; + + n = strcspn(*a, "="); + if ((*a)[n] == '=') + n++; + + for (j = r; j < *k; j++) + if (strneq(*j, *a, n)) + break; + + c = strdup(*a); + if (!c) + return -ENOMEM; + + if (j >= *k) { /* Append to the end? */ + (*k)[0] = c; + (*k)[1] = NULL; + (*k)++; + } else + free_and_replace(*j, c); /* Override existing item */ + } + + return 0; +} + +char** _strv_env_merge(char **first, ...) { + _cleanup_strv_free_ char **merged = NULL; + char **k; + va_list ap; + + /* Merges an arbitrary number of environment sets */ + + size_t n = strv_length(first); + + va_start(ap, first); + for (;;) { + char **l; + + l = va_arg(ap, char**); + if (l == POINTER_MAX) + break; + + n += strv_length(l); + } + va_end(ap); + + k = merged = new(char*, n + 1); + if (!merged) + return NULL; + merged[0] = NULL; + + if (env_append(merged, &k, first) < 0) + return NULL; + + va_start(ap, first); + for (;;) { + char **l; + + l = va_arg(ap, char**); + if (l == POINTER_MAX) + break; + + if (env_append(merged, &k, l) < 0) { + va_end(ap); + return NULL; + } + } + va_end(ap); + + return TAKE_PTR(merged); +} + +static bool env_match(const char *t, const char *pattern) { + assert(t); + assert(pattern); + + /* pattern a matches string a + * a matches a= + * a matches a=b + * a= matches a= + * a=b matches a=b + * a= does not match a + * a=b does not match a= + * a=b does not match a + * a=b does not match a=c */ + + if (streq(t, pattern)) + return true; + + if (!strchr(pattern, '=')) { + size_t l = strlen(pattern); + + return strneq(t, pattern, l) && t[l] == '='; + } + + return false; +} + +static bool env_entry_has_name(const char *entry, const char *name) { + const char *t; + + assert(entry); + assert(name); + + t = startswith(entry, name); + if (!t) + return false; + + return *t == '='; +} + +char **strv_env_delete(char **x, size_t n_lists, ...) { + size_t n, i = 0; + char **r; + va_list ap; + + /* Deletes every entry from x that is mentioned in the other + * string lists */ + + n = strv_length(x); + + r = new(char*, n+1); + if (!r) + return NULL; + + STRV_FOREACH(k, x) { + va_start(ap, n_lists); + for (size_t v = 0; v < n_lists; v++) { + char **l; + + l = va_arg(ap, char**); + STRV_FOREACH(j, l) + if (env_match(*k, *j)) + goto skip; + } + va_end(ap); + + r[i] = strdup(*k); + if (!r[i]) { + strv_free(r); + return NULL; + } + + i++; + continue; + + skip: + va_end(ap); + } + + r[i] = NULL; + + assert(i <= n); + + return r; +} + +char **strv_env_unset(char **l, const char *p) { + char **f, **t; + + if (!l) + return NULL; + + assert(p); + + /* Drops every occurrence of the env var setting p in the + * string list. Edits in-place. */ + + for (f = t = l; *f; f++) { + + if (env_match(*f, p)) { + free(*f); + continue; + } + + *(t++) = *f; + } + + *t = NULL; + return l; +} + +char **strv_env_unset_many(char **l, ...) { + char **f, **t; + + if (!l) + return NULL; + + /* Like strv_env_unset() but applies many at once. Edits in-place. */ + + for (f = t = l; *f; f++) { + bool found = false; + const char *p; + va_list ap; + + va_start(ap, l); + + while ((p = va_arg(ap, const char*))) { + if (env_match(*f, p)) { + found = true; + break; + } + } + + va_end(ap); + + if (found) { + free(*f); + continue; + } + + *(t++) = *f; + } + + *t = NULL; + return l; +} + +int strv_env_replace_consume(char ***l, char *p) { + const char *t, *name; + int r; + + assert(p); + + /* Replace first occurrence of the env var or add a new one in the string list. Drop other + * occurrences. Edits in-place. Does not copy p and CONSUMES p EVEN ON FAILURE. + * + * p must be a valid key=value assignment. */ + + t = strchr(p, '='); + if (!t) { + free(p); + return -EINVAL; + } + + name = strndupa_safe(p, t - p); + + STRV_FOREACH(f, *l) + if (env_entry_has_name(*f, name)) { + free_and_replace(*f, p); + strv_env_unset(f + 1, *f); + return 0; + } + + /* We didn't find a match, we need to append p or create a new strv */ + r = strv_consume(l, p); + if (r < 0) + return r; + + return 1; +} + +int strv_env_replace_strdup(char ***l, const char *assignment) { + /* Like strv_env_replace_consume(), but copies the argument. */ + + char *p = strdup(assignment); + if (!p) + return -ENOMEM; + + return strv_env_replace_consume(l, p); +} + +int strv_env_replace_strdup_passthrough(char ***l, const char *assignment) { + /* Like strv_env_replace_strdup(), but pulls the variable from the environment of + * the calling program, if a variable name without value is specified. + */ + char *p; + + if (strchr(assignment, '=')) { + if (!env_assignment_is_valid(assignment)) + return -EINVAL; + + p = strdup(assignment); + } else { + if (!env_name_is_valid(assignment)) + return -EINVAL; + + /* If we can't find the variable in our environment, we will use + * the empty string. This way "passthrough" is equivalent to passing + * --setenv=FOO=$FOO in the shell. */ + p = strjoin(assignment, "=", secure_getenv(assignment)); + } + if (!p) + return -ENOMEM; + + return strv_env_replace_consume(l, p); +} + +int strv_env_assign(char ***l, const char *key, const char *value) { + if (!env_name_is_valid(key)) + return -EINVAL; + + /* NULL removes assignment, "" creates an empty assignment. */ + + if (!value) { + strv_env_unset(*l, key); + return 0; + } + + char *p = strjoin(key, "=", value); + if (!p) + return -ENOMEM; + + return strv_env_replace_consume(l, p); +} + +char *strv_env_get_n(char **l, const char *name, size_t k, unsigned flags) { + assert(name); + + if (k <= 0) + return NULL; + + STRV_FOREACH_BACKWARDS(i, l) + if (strneq(*i, name, k) && + (*i)[k] == '=') + return *i + k + 1; + + if (flags & REPLACE_ENV_USE_ENVIRONMENT) { + const char *t; + + t = strndupa_safe(name, k); + return getenv(t); + }; + + return NULL; +} + +char *strv_env_get(char **l, const char *name) { + assert(name); + + return strv_env_get_n(l, name, strlen(name), 0); +} + +char *strv_env_pairs_get(char **l, const char *name) { + char *result = NULL; + + assert(name); + + STRV_FOREACH_PAIR(key, value, l) + if (streq(*key, name)) + result = *value; + + return result; +} + +char **strv_env_clean_with_callback(char **e, void (*invalid_callback)(const char *p, void *userdata), void *userdata) { + int k = 0; + + STRV_FOREACH(p, e) { + size_t n; + bool duplicate = false; + + if (!env_assignment_is_valid(*p)) { + if (invalid_callback) + invalid_callback(*p, userdata); + free(*p); + continue; + } + + n = strcspn(*p, "="); + STRV_FOREACH(q, p + 1) + if (strneq(*p, *q, n) && (*q)[n] == '=') { + duplicate = true; + break; + } + + if (duplicate) { + free(*p); + continue; + } + + e[k++] = *p; + } + + if (e) + e[k] = NULL; + + return e; +} + +char *replace_env_n(const char *format, size_t n, char **env, unsigned flags) { + enum { + WORD, + CURLY, + VARIABLE, + VARIABLE_RAW, + TEST, + DEFAULT_VALUE, + ALTERNATE_VALUE, + } state = WORD; + + const char *e, *word = format, *test_value = NULL; /* test_value is initialized to appease gcc */ + char *k; + _cleanup_free_ char *r = NULL; + size_t i, len = 0; /* len is initialized to appease gcc */ + int nest = 0; + + assert(format); + + for (e = format, i = 0; *e && i < n; e ++, i ++) + switch (state) { + + case WORD: + if (*e == '$') + state = CURLY; + break; + + case CURLY: + if (*e == '{') { + k = strnappend(r, word, e-word-1); + if (!k) + return NULL; + + free_and_replace(r, k); + + word = e-1; + state = VARIABLE; + nest++; + } else if (*e == '$') { + k = strnappend(r, word, e-word); + if (!k) + return NULL; + + free_and_replace(r, k); + + word = e+1; + state = WORD; + + } else if (flags & REPLACE_ENV_ALLOW_BRACELESS && strchr(VALID_BASH_ENV_NAME_CHARS, *e)) { + k = strnappend(r, word, e-word-1); + if (!k) + return NULL; + + free_and_replace(r, k); + + word = e-1; + state = VARIABLE_RAW; + + } else + state = WORD; + break; + + case VARIABLE: + if (*e == '}') { + const char *t; + + t = strv_env_get_n(env, word+2, e-word-2, flags); + + if (!strextend(&r, t)) + return NULL; + + word = e+1; + state = WORD; + nest--; + } else if (*e == ':') { + if (flags & REPLACE_ENV_ALLOW_EXTENDED) { + len = e - word - 2; + state = TEST; + } else + /* Treat this as unsupported syntax, i.e. do no replacement */ + state = WORD; + } + break; + + case TEST: + if (*e == '-') + state = DEFAULT_VALUE; + else if (*e == '+') + state = ALTERNATE_VALUE; + else { + state = WORD; + break; + } + + test_value = e+1; + break; + + case DEFAULT_VALUE: /* fall through */ + case ALTERNATE_VALUE: + assert(flags & REPLACE_ENV_ALLOW_EXTENDED); + + if (*e == '{') { + nest++; + break; + } + + if (*e != '}') + break; + + nest--; + if (nest == 0) { + const char *t; + _cleanup_free_ char *v = NULL; + + t = strv_env_get_n(env, word+2, len, flags); + + if (t && state == ALTERNATE_VALUE) + t = v = replace_env_n(test_value, e-test_value, env, flags); + else if (!t && state == DEFAULT_VALUE) + t = v = replace_env_n(test_value, e-test_value, env, flags); + + if (!strextend(&r, t)) + return NULL; + + word = e+1; + state = WORD; + } + break; + + case VARIABLE_RAW: + assert(flags & REPLACE_ENV_ALLOW_BRACELESS); + + if (!strchr(VALID_BASH_ENV_NAME_CHARS, *e)) { + const char *t; + + t = strv_env_get_n(env, word+1, e-word-1, flags); + + if (!strextend(&r, t)) + return NULL; + + word = e--; + i--; + state = WORD; + } + break; + } + + if (state == VARIABLE_RAW) { + const char *t; + + assert(flags & REPLACE_ENV_ALLOW_BRACELESS); + + t = strv_env_get_n(env, word+1, e-word-1, flags); + return strjoin(r, t); + } else + return strnappend(r, word, e-word); +} + +char **replace_env_argv(char **argv, char **env) { + char **ret; + size_t k = 0, l = 0; + + l = strv_length(argv); + + ret = new(char*, l+1); + if (!ret) + return NULL; + + STRV_FOREACH(i, argv) { + + /* If $FOO appears as single word, replace it by the split up variable */ + if ((*i)[0] == '$' && !IN_SET((*i)[1], '{', '$')) { + char *e; + char **w, **m = NULL; + size_t q; + + e = strv_env_get(env, *i+1); + if (e) { + int r; + + r = strv_split_full(&m, e, WHITESPACE, EXTRACT_RELAX|EXTRACT_UNQUOTE); + if (r < 0) { + ret[k] = NULL; + strv_free(ret); + return NULL; + } + } else + m = NULL; + + q = strv_length(m); + l = l + q - 1; + + w = reallocarray(ret, l + 1, sizeof(char *)); + if (!w) { + ret[k] = NULL; + strv_free(ret); + strv_free(m); + return NULL; + } + + ret = w; + if (m) { + memcpy(ret + k, m, q * sizeof(char*)); + free(m); + } + + k += q; + continue; + } + + /* If ${FOO} appears as part of a word, replace it by the variable as-is */ + ret[k] = replace_env(*i, env, 0); + if (!ret[k]) { + strv_free(ret); + return NULL; + } + k++; + } + + ret[k] = NULL; + return ret; +} + +int getenv_bool(const char *p) { + const char *e; + + e = getenv(p); + if (!e) + return -ENXIO; + + return parse_boolean(e); +} + +int getenv_bool_secure(const char *p) { + const char *e; + + e = secure_getenv(p); + if (!e) + return -ENXIO; + + return parse_boolean(e); +} + +int getenv_uint64_secure(const char *p, uint64_t *ret) { + const char *e; + + assert(p); + + e = secure_getenv(p); + if (!e) + return -ENXIO; + + return safe_atou64(e, ret); +} + +int set_unset_env(const char *name, const char *value, bool overwrite) { + assert(name); + + if (value) + return RET_NERRNO(setenv(name, value, overwrite)); + + return RET_NERRNO(unsetenv(name)); +} + +int putenv_dup(const char *assignment, bool override) { + const char *e, *n; + + e = strchr(assignment, '='); + if (!e) + return -EINVAL; + + n = strndupa_safe(assignment, e - assignment); + + /* This is like putenv(), but uses setenv() so that our memory doesn't become part of environ[]. */ + return RET_NERRNO(setenv(n, e + 1, override)); +} + +int setenv_systemd_exec_pid(bool update_only) { + char str[DECIMAL_STR_MAX(pid_t)]; + const char *e; + + /* Update $SYSTEMD_EXEC_PID=pid except when '*' is set for the variable. */ + + e = secure_getenv("SYSTEMD_EXEC_PID"); + if (!e && update_only) + return 0; + + if (streq_ptr(e, "*")) + return 0; + + xsprintf(str, PID_FMT, getpid_cached()); + + if (setenv("SYSTEMD_EXEC_PID", str, 1) < 0) + return -errno; + + return 1; +} + +int getenv_path_list(const char *name, char ***ret_paths) { + _cleanup_strv_free_ char **l = NULL; + const char *e; + int r; + + assert(name); + assert(ret_paths); + + e = secure_getenv(name); + if (!e) + return -ENXIO; + + r = strv_split_full(&l, e, ":", EXTRACT_DONT_COALESCE_SEPARATORS); + if (r < 0) + return log_debug_errno(r, "Failed to parse $%s: %m", name); + + STRV_FOREACH(p, l) { + if (!path_is_absolute(*p)) + return log_debug_errno(SYNTHETIC_ERRNO(EINVAL), + "Path '%s' is not absolute, refusing.", *p); + + if (!path_is_normalized(*p)) + return log_debug_errno(SYNTHETIC_ERRNO(EINVAL), + "Path '%s' is not normalized, refusing.", *p); + + if (path_equal(*p, "/")) + return log_debug_errno(SYNTHETIC_ERRNO(EINVAL), + "Path '%s' is the root fs, refusing.", *p); + } + + if (strv_isempty(l)) + return log_debug_errno(SYNTHETIC_ERRNO(EINVAL), + "No paths specified, refusing."); + + *ret_paths = TAKE_PTR(l); + return 1; +} + +int getenv_steal_erase(const char *name, char **ret) { + _cleanup_(erase_and_freep) char *a = NULL; + char *e; + + assert(name); + + /* Reads an environment variable, makes a copy of it, erases its memory in the environment block and removes + * it from there. Usecase: reading passwords from the env block (which is a bad idea, but useful for + * testing, and given that people are likely going to misuse this, be thorough) */ + + e = getenv(name); + if (!e) { + if (ret) + *ret = NULL; + return 0; + } + + if (ret) { + a = strdup(e); + if (!a) + return -ENOMEM; + } + + string_erase(e); + + if (unsetenv(name) < 0) + return -errno; + + if (ret) + *ret = TAKE_PTR(a); + + return 1; +} diff --git a/src/basic/env-util.h b/src/basic/env-util.h new file mode 100644 index 0000000..b927ac7 --- /dev/null +++ b/src/basic/env-util.h @@ -0,0 +1,74 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +#include +#include +#include +#include + +#include "macro.h" +#include "string.h" + +static inline size_t sc_arg_max(void) { + long l = sysconf(_SC_ARG_MAX); + assert(l > 0); + return (size_t) l; +} + +bool env_name_is_valid(const char *e); +bool env_value_is_valid(const char *e); +bool env_assignment_is_valid(const char *e); + +enum { + REPLACE_ENV_USE_ENVIRONMENT = 1 << 0, + REPLACE_ENV_ALLOW_BRACELESS = 1 << 1, + REPLACE_ENV_ALLOW_EXTENDED = 1 << 2, +}; + +char *replace_env_n(const char *format, size_t n, char **env, unsigned flags); +char **replace_env_argv(char **argv, char **env); + +static inline char *replace_env(const char *format, char **env, unsigned flags) { + return replace_env_n(format, strlen(format), env, flags); +} + +bool strv_env_is_valid(char **e); +#define strv_env_clean(l) strv_env_clean_with_callback(l, NULL, NULL) +char **strv_env_clean_with_callback(char **l, void (*invalid_callback)(const char *p, void *userdata), void *userdata); + +bool strv_env_name_is_valid(char **l); +bool strv_env_name_or_assignment_is_valid(char **l); + +char** _strv_env_merge(char **first, ...); +#define strv_env_merge(first, ...) _strv_env_merge(first, __VA_ARGS__, POINTER_MAX) +char **strv_env_delete(char **x, size_t n_lists, ...); /* New copy */ + +char **strv_env_unset(char **l, const char *p); /* In place ... */ +char **strv_env_unset_many(char **l, ...) _sentinel_; +int strv_env_replace_consume(char ***l, char *p); /* In place ... */ +int strv_env_replace_strdup(char ***l, const char *assignment); +int strv_env_replace_strdup_passthrough(char ***l, const char *assignment); +int strv_env_assign(char ***l, const char *key, const char *value); + +char *strv_env_get_n(char **l, const char *name, size_t k, unsigned flags) _pure_; +char *strv_env_get(char **x, const char *n) _pure_; +char *strv_env_pairs_get(char **l, const char *name) _pure_; + +int getenv_bool(const char *p); +int getenv_bool_secure(const char *p); + +int getenv_uint64_secure(const char *p, uint64_t *ret); + +/* Like setenv, but calls unsetenv if value == NULL. */ +int set_unset_env(const char *name, const char *value, bool overwrite); + +/* Like putenv, but duplicates the memory like setenv. */ +int putenv_dup(const char *assignment, bool override); + +int setenv_systemd_exec_pid(bool update_only); + +/* Parses and does sanity checks on an environment variable containing + * PATH-like colon-separated absolute paths */ +int getenv_path_list(const char *name, char ***ret_paths); + +int getenv_steal_erase(const char *name, char **ret); diff --git a/src/basic/errno-list.c b/src/basic/errno-list.c new file mode 100644 index 0000000..2aeb38c --- /dev/null +++ b/src/basic/errno-list.c @@ -0,0 +1,37 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include +#include + +#include "errno-list.h" +#include "macro.h" + +static const struct errno_name* lookup_errno(register const char *str, + register GPERF_LEN_TYPE len); + +#include "errno-from-name.h" +#include "errno-to-name.h" + +const char *errno_to_name(int id) { + + if (id < 0) + id = -id; + + if ((size_t) id >= ELEMENTSOF(errno_names)) + return NULL; + + return errno_names[id]; +} + +int errno_from_name(const char *name) { + const struct errno_name *sc; + + assert(name); + + sc = lookup_errno(name, strlen(name)); + if (!sc) + return -EINVAL; + + assert(sc->id > 0); + return sc->id; +} diff --git a/src/basic/errno-list.h b/src/basic/errno-list.h new file mode 100644 index 0000000..082b833 --- /dev/null +++ b/src/basic/errno-list.h @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +#include +/* + * MAX_ERRNO is defined as 4095 in linux/err.h + * We use the same value here. + */ +#define ERRNO_MAX 4095 + +const char *errno_to_name(int id); +int errno_from_name(const char *name); +static inline bool errno_is_valid(int n) { + return n > 0 && n <= ERRNO_MAX; +} diff --git a/src/basic/errno-to-name.awk b/src/basic/errno-to-name.awk new file mode 100644 index 0000000..8442124 --- /dev/null +++ b/src/basic/errno-to-name.awk @@ -0,0 +1,11 @@ +# SPDX-License-Identifier: LGPL-2.1-or-later + +BEGIN{ + print "static const char* const errno_names[] = { " +} +!/(EDEADLOCK|EWOULDBLOCK|ENOTSUP)/ { + printf " [%s] = \"%s\",\n", $1, $1 +} +END{ + print "};" +} diff --git a/src/basic/errno-util.h b/src/basic/errno-util.h new file mode 100644 index 0000000..091f99c --- /dev/null +++ b/src/basic/errno-util.h @@ -0,0 +1,176 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +#include +#include + +#include "macro.h" + +/* strerror(3) says that glibc uses a maximum length of 1024 bytes. */ +#define ERRNO_BUF_LEN 1024 + +/* Note: the lifetime of the compound literal is the immediately surrounding block, + * see C11 §6.5.2.5, and + * https://stackoverflow.com/questions/34880638/compound-literal-lifetime-and-if-blocks + * + * Note that we use the GNU variant of strerror_r() here. */ +#define STRERROR(errnum) strerror_r(abs(errnum), (char[ERRNO_BUF_LEN]){}, ERRNO_BUF_LEN) + +/* A helper to print an error message or message for functions that return 0 on EOF. + * Note that we can't use ({ … }) to define a temporary variable, so errnum is + * evaluated twice. */ +#define STRERROR_OR_EOF(errnum) ((errnum) != 0 ? STRERROR(errnum) : "Unexpected EOF") + +static inline void _reset_errno_(int *saved_errno) { + if (*saved_errno < 0) /* Invalidated by UNPROTECT_ERRNO? */ + return; + + errno = *saved_errno; +} + +#define PROTECT_ERRNO \ + _cleanup_(_reset_errno_) _unused_ int _saved_errno_ = errno + +#define UNPROTECT_ERRNO \ + do { \ + errno = _saved_errno_; \ + _saved_errno_ = -1; \ + } while (false) + +#define LOCAL_ERRNO(value) \ + PROTECT_ERRNO; \ + errno = abs(value) + +static inline int negative_errno(void) { + /* This helper should be used to shut up gcc if you know 'errno' is + * negative. Instead of "return -errno;", use "return negative_errno();" + * It will suppress bogus gcc warnings in case it assumes 'errno' might + * be 0 and thus the caller's error-handling might not be triggered. */ + assert_return(errno > 0, -EINVAL); + return -errno; +} + +static inline int RET_NERRNO(int ret) { + + /* Helper to wrap system calls in to make them return negative errno errors. This brings system call + * error handling in sync with how we usually handle errors in our own code, i.e. with immediate + * returning of negative errno. Usage is like this: + * + * … + * r = RET_NERRNO(unlink(t)); + * … + * + * or + * + * … + * fd = RET_NERRNO(open("/etc/fstab", O_RDONLY|O_CLOEXEC)); + * … + */ + + if (ret < 0) + return negative_errno(); + + return ret; +} + +static inline int errno_or_else(int fallback) { + /* To be used when invoking library calls where errno handling is not defined clearly: we return + * errno if it is set, and the specified error otherwise. The idea is that the caller initializes + * errno to zero before doing an API call, and then uses this helper to retrieve a somewhat useful + * error code */ + if (errno > 0) + return -errno; + + return -abs(fallback); +} + +/* For send()/recv() or read()/write(). */ +static inline bool ERRNO_IS_TRANSIENT(int r) { + return IN_SET(abs(r), + EAGAIN, + EINTR); +} + +/* Hint #1: ENETUNREACH happens if we try to connect to "non-existing" special IP addresses, such as ::5. + * + * Hint #2: The kernel sends e.g., EHOSTUNREACH or ENONET to userspace in some ICMP error cases. See the + * icmp_err_convert[] in net/ipv4/icmp.c in the kernel sources. + * + * Hint #3: When asynchronous connect() on TCP fails because the host never acknowledges a single packet, + * kernel tells us that with ETIMEDOUT, see tcp(7). */ +static inline bool ERRNO_IS_DISCONNECT(int r) { + return IN_SET(abs(r), + ECONNABORTED, + ECONNREFUSED, + ECONNRESET, + EHOSTDOWN, + EHOSTUNREACH, + ENETDOWN, + ENETRESET, + ENETUNREACH, + ENONET, + ENOPROTOOPT, + ENOTCONN, + EPIPE, + EPROTO, + ESHUTDOWN, + ETIMEDOUT); +} + +/* Transient errors we might get on accept() that we should ignore. As per error handling comment in + * the accept(2) man page. */ +static inline bool ERRNO_IS_ACCEPT_AGAIN(int r) { + return ERRNO_IS_DISCONNECT(r) || + ERRNO_IS_TRANSIENT(r) || + abs(r) == EOPNOTSUPP; +} + +/* Resource exhaustion, could be our fault or general system trouble */ +static inline bool ERRNO_IS_RESOURCE(int r) { + return IN_SET(abs(r), + EMFILE, + ENFILE, + ENOMEM); +} + +/* Seven different errors for "operation/system call/ioctl/socket feature not supported" */ +static inline bool ERRNO_IS_NOT_SUPPORTED(int r) { + return IN_SET(abs(r), + EOPNOTSUPP, + ENOTTY, + ENOSYS, + EAFNOSUPPORT, + EPFNOSUPPORT, + EPROTONOSUPPORT, + ESOCKTNOSUPPORT); +} + +/* Two different errors for access problems */ +static inline bool ERRNO_IS_PRIVILEGE(int r) { + return IN_SET(abs(r), + EACCES, + EPERM); +} + +/* Three different errors for "not enough disk space" */ +static inline bool ERRNO_IS_DISK_SPACE(int r) { + return IN_SET(abs(r), + ENOSPC, + EDQUOT, + EFBIG); +} + +/* Three different errors for "this device does not quite exist" */ +static inline bool ERRNO_IS_DEVICE_ABSENT(int r) { + return IN_SET(abs(r), + ENODEV, + ENXIO, + ENOENT); +} + +/* Quite often we want to handle cases where the backing FS doesn't support extended attributes at all and + * where it simply doesn't have the requested xattr the same way */ +static inline bool ERRNO_IS_XATTR_ABSENT(int r) { + return abs(r) == ENODATA || + ERRNO_IS_NOT_SUPPORTED(r); +} diff --git a/src/basic/escape.c b/src/basic/escape.c new file mode 100644 index 0000000..d2cb1c7 --- /dev/null +++ b/src/basic/escape.c @@ -0,0 +1,576 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include +#include +#include + +#include "alloc-util.h" +#include "escape.h" +#include "hexdecoct.h" +#include "macro.h" +#include "strv.h" +#include "utf8.h" + +int cescape_char(char c, char *buf) { + char *buf_old = buf; + + /* Needs space for 4 characters in the buffer */ + + switch (c) { + + case '\a': + *(buf++) = '\\'; + *(buf++) = 'a'; + break; + case '\b': + *(buf++) = '\\'; + *(buf++) = 'b'; + break; + case '\f': + *(buf++) = '\\'; + *(buf++) = 'f'; + break; + case '\n': + *(buf++) = '\\'; + *(buf++) = 'n'; + break; + case '\r': + *(buf++) = '\\'; + *(buf++) = 'r'; + break; + case '\t': + *(buf++) = '\\'; + *(buf++) = 't'; + break; + case '\v': + *(buf++) = '\\'; + *(buf++) = 'v'; + break; + case '\\': + *(buf++) = '\\'; + *(buf++) = '\\'; + break; + case '"': + *(buf++) = '\\'; + *(buf++) = '"'; + break; + case '\'': + *(buf++) = '\\'; + *(buf++) = '\''; + break; + + default: + /* For special chars we prefer octal over + * hexadecimal encoding, simply because glib's + * g_strescape() does the same */ + if ((c < ' ') || (c >= 127)) { + *(buf++) = '\\'; + *(buf++) = octchar((unsigned char) c >> 6); + *(buf++) = octchar((unsigned char) c >> 3); + *(buf++) = octchar((unsigned char) c); + } else + *(buf++) = c; + break; + } + + return buf - buf_old; +} + +char* cescape_length(const char *s, size_t n) { + const char *f; + char *r, *t; + + assert(s || n == 0); + + /* Does C style string escaping. May be reversed with + * cunescape(). */ + + r = new(char, n*4 + 1); + if (!r) + return NULL; + + for (f = s, t = r; f < s + n; f++) + t += cescape_char(*f, t); + + *t = 0; + + return r; +} + +char* cescape(const char *s) { + assert(s); + + return cescape_length(s, strlen(s)); +} + +int cunescape_one(const char *p, size_t length, char32_t *ret, bool *eight_bit, bool accept_nul) { + int r = 1; + + assert(p); + assert(ret); + + /* Unescapes C style. Returns the unescaped character in ret. + * Sets *eight_bit to true if the escaped sequence either fits in + * one byte in UTF-8 or is a non-unicode literal byte and should + * instead be copied directly. + */ + + if (length != SIZE_MAX && length < 1) + return -EINVAL; + + switch (p[0]) { + + case 'a': + *ret = '\a'; + break; + case 'b': + *ret = '\b'; + break; + case 'f': + *ret = '\f'; + break; + case 'n': + *ret = '\n'; + break; + case 'r': + *ret = '\r'; + break; + case 't': + *ret = '\t'; + break; + case 'v': + *ret = '\v'; + break; + case '\\': + *ret = '\\'; + break; + case '"': + *ret = '"'; + break; + case '\'': + *ret = '\''; + break; + + case 's': + /* This is an extension of the XDG syntax files */ + *ret = ' '; + break; + + case 'x': { + /* hexadecimal encoding */ + int a, b; + + if (length != SIZE_MAX && length < 3) + return -EINVAL; + + a = unhexchar(p[1]); + if (a < 0) + return -EINVAL; + + b = unhexchar(p[2]); + if (b < 0) + return -EINVAL; + + /* Don't allow NUL bytes */ + if (a == 0 && b == 0 && !accept_nul) + return -EINVAL; + + *ret = (a << 4U) | b; + *eight_bit = true; + r = 3; + break; + } + + case 'u': { + /* C++11 style 16bit unicode */ + + int a[4]; + size_t i; + uint32_t c; + + if (length != SIZE_MAX && length < 5) + return -EINVAL; + + for (i = 0; i < 4; i++) { + a[i] = unhexchar(p[1 + i]); + if (a[i] < 0) + return a[i]; + } + + c = ((uint32_t) a[0] << 12U) | ((uint32_t) a[1] << 8U) | ((uint32_t) a[2] << 4U) | (uint32_t) a[3]; + + /* Don't allow 0 chars */ + if (c == 0 && !accept_nul) + return -EINVAL; + + *ret = c; + r = 5; + break; + } + + case 'U': { + /* C++11 style 32bit unicode */ + + int a[8]; + size_t i; + char32_t c; + + if (length != SIZE_MAX && length < 9) + return -EINVAL; + + for (i = 0; i < 8; i++) { + a[i] = unhexchar(p[1 + i]); + if (a[i] < 0) + return a[i]; + } + + c = ((uint32_t) a[0] << 28U) | ((uint32_t) a[1] << 24U) | ((uint32_t) a[2] << 20U) | ((uint32_t) a[3] << 16U) | + ((uint32_t) a[4] << 12U) | ((uint32_t) a[5] << 8U) | ((uint32_t) a[6] << 4U) | (uint32_t) a[7]; + + /* Don't allow 0 chars */ + if (c == 0 && !accept_nul) + return -EINVAL; + + /* Don't allow invalid code points */ + if (!unichar_is_valid(c)) + return -EINVAL; + + *ret = c; + r = 9; + break; + } + + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': { + /* octal encoding */ + int a, b, c; + char32_t m; + + if (length != SIZE_MAX && length < 3) + return -EINVAL; + + a = unoctchar(p[0]); + if (a < 0) + return -EINVAL; + + b = unoctchar(p[1]); + if (b < 0) + return -EINVAL; + + c = unoctchar(p[2]); + if (c < 0) + return -EINVAL; + + /* don't allow NUL bytes */ + if (a == 0 && b == 0 && c == 0 && !accept_nul) + return -EINVAL; + + /* Don't allow bytes above 255 */ + m = ((uint32_t) a << 6U) | ((uint32_t) b << 3U) | (uint32_t) c; + if (m > 255) + return -EINVAL; + + *ret = m; + *eight_bit = true; + r = 3; + break; + } + + default: + return -EINVAL; + } + + return r; +} + +ssize_t cunescape_length_with_prefix(const char *s, size_t length, const char *prefix, UnescapeFlags flags, char **ret) { + _cleanup_free_ char *ans = NULL; + char *t; + const char *f; + size_t pl; + int r; + + assert(s); + assert(ret); + + /* Undoes C style string escaping, and optionally prefixes it. */ + + pl = strlen_ptr(prefix); + + ans = new(char, pl+length+1); + if (!ans) + return -ENOMEM; + + if (prefix) + memcpy(ans, prefix, pl); + + for (f = s, t = ans + pl; f < s + length; f++) { + size_t remaining; + bool eight_bit = false; + char32_t u; + + remaining = s + length - f; + assert(remaining > 0); + + if (*f != '\\') { + /* A literal, copy verbatim */ + *(t++) = *f; + continue; + } + + if (remaining == 1) { + if (flags & UNESCAPE_RELAX) { + /* A trailing backslash, copy verbatim */ + *(t++) = *f; + continue; + } + + return -EINVAL; + } + + r = cunescape_one(f + 1, remaining - 1, &u, &eight_bit, flags & UNESCAPE_ACCEPT_NUL); + if (r < 0) { + if (flags & UNESCAPE_RELAX) { + /* Invalid escape code, let's take it literal then */ + *(t++) = '\\'; + continue; + } + + return r; + } + + f += r; + if (eight_bit) + /* One byte? Set directly as specified */ + *(t++) = u; + else + /* Otherwise encode as multi-byte UTF-8 */ + t += utf8_encode_unichar(t, u); + } + + *t = 0; + + assert(t >= ans); /* Let static analyzers know that the answer is non-negative. */ + *ret = TAKE_PTR(ans); + return t - *ret; +} + +char* xescape_full(const char *s, const char *bad, size_t console_width, XEscapeFlags flags) { + char *ans, *t, *prev, *prev2; + const char *f; + + /* Escapes all chars in bad, in addition to \ and all special chars, in \xFF style escaping. May be + * reversed with cunescape(). If XESCAPE_8_BIT is specified, characters >= 127 are let through + * unchanged. This corresponds to non-ASCII printable characters in pre-unicode encodings. + * + * If console_width is reached, or XESCAPE_FORCE_ELLIPSIS is set, output is truncated and "..." is + * appended. */ + + if (console_width == 0) + return strdup(""); + + ans = new(char, MIN(strlen(s), console_width) * 4 + 1); + if (!ans) + return NULL; + + memset(ans, '_', MIN(strlen(s), console_width) * 4); + ans[MIN(strlen(s), console_width) * 4] = 0; + + bool force_ellipsis = FLAGS_SET(flags, XESCAPE_FORCE_ELLIPSIS); + + for (f = s, t = prev = prev2 = ans; ; f++) { + char *tmp_t = t; + + if (!*f) { + if (force_ellipsis) + break; + + *t = 0; + return ans; + } + + if ((unsigned char) *f < ' ' || + (!FLAGS_SET(flags, XESCAPE_8_BIT) && (unsigned char) *f >= 127) || + *f == '\\' || strchr(bad, *f)) { + if ((size_t) (t - ans) + 4 + 3 * force_ellipsis > console_width) + break; + + *(t++) = '\\'; + *(t++) = 'x'; + *(t++) = hexchar(*f >> 4); + *(t++) = hexchar(*f); + } else { + if ((size_t) (t - ans) + 1 + 3 * force_ellipsis > console_width) + break; + + *(t++) = *f; + } + + /* We might need to go back two cycles to fit three dots, so remember two positions */ + prev2 = prev; + prev = tmp_t; + } + + /* We can just write where we want, since chars are one-byte */ + size_t c = MIN(console_width, 3u); /* If the console is too narrow, write fewer dots */ + size_t off; + if (console_width - c >= (size_t) (t - ans)) + off = (size_t) (t - ans); + else if (console_width - c >= (size_t) (prev - ans)) + off = (size_t) (prev - ans); + else if (console_width - c >= (size_t) (prev2 - ans)) + off = (size_t) (prev2 - ans); + else + off = console_width - c; + assert(off <= (size_t) (t - ans)); + + memcpy(ans + off, "...", c); + ans[off + c] = '\0'; + return ans; +} + +char* escape_non_printable_full(const char *str, size_t console_width, XEscapeFlags flags) { + if (FLAGS_SET(flags, XESCAPE_8_BIT)) + return xescape_full(str, "", console_width, flags); + else + return utf8_escape_non_printable_full(str, + console_width, + FLAGS_SET(flags, XESCAPE_FORCE_ELLIPSIS)); +} + +char* octescape(const char *s, size_t len) { + char *buf, *t; + + /* Escapes all chars in bad, in addition to \ and " chars, in \nnn style escaping. */ + + assert(s || len == 0); + + t = buf = new(char, len * 4 + 1); + if (!buf) + return NULL; + + for (size_t i = 0; i < len; i++) { + uint8_t u = (uint8_t) s[i]; + + if (u < ' ' || u >= 127 || IN_SET(u, '\\', '"')) { + *(t++) = '\\'; + *(t++) = '0' + (u >> 6); + *(t++) = '0' + ((u >> 3) & 7); + *(t++) = '0' + (u & 7); + } else + *(t++) = u; + } + + *t = 0; + return buf; +} + +static char* strcpy_backslash_escaped(char *t, const char *s, const char *bad) { + assert(bad); + assert(t); + assert(s); + + while (*s) { + int l = utf8_encoded_valid_unichar(s, SIZE_MAX); + + if (char_is_cc(*s) || l < 0) + t += cescape_char(*(s++), t); + else if (l == 1) { + if (*s == '\\' || strchr(bad, *s)) + *(t++) = '\\'; + *(t++) = *(s++); + } else { + t = mempcpy(t, s, l); + s += l; + } + } + + return t; +} + +char* shell_escape(const char *s, const char *bad) { + char *buf, *t; + + buf = new(char, strlen(s)*4+1); + if (!buf) + return NULL; + + t = strcpy_backslash_escaped(buf, s, bad); + *t = 0; + + return buf; +} + +char* shell_maybe_quote(const char *s, ShellEscapeFlags flags) { + const char *p; + char *buf, *t; + + assert(s); + + /* Encloses a string in quotes if necessary to make it OK as a shell string. */ + + if (FLAGS_SET(flags, SHELL_ESCAPE_EMPTY) && isempty(s)) + return strdup("\"\""); /* We don't use $'' here in the POSIX mode. "" is fine too. */ + + for (p = s; *p; ) { + int l = utf8_encoded_valid_unichar(p, SIZE_MAX); + + if (char_is_cc(*p) || l < 0 || + strchr(WHITESPACE SHELL_NEED_QUOTES, *p)) + break; + + p += l; + } + + if (!*p) + return strdup(s); + + buf = new(char, FLAGS_SET(flags, SHELL_ESCAPE_POSIX) + 1 + strlen(s)*4 + 1 + 1); + if (!buf) + return NULL; + + t = buf; + if (FLAGS_SET(flags, SHELL_ESCAPE_POSIX)) { + *(t++) = '$'; + *(t++) = '\''; + } else + *(t++) = '"'; + + t = mempcpy(t, s, p - s); + + t = strcpy_backslash_escaped(t, p, + FLAGS_SET(flags, SHELL_ESCAPE_POSIX) ? SHELL_NEED_ESCAPE_POSIX : SHELL_NEED_ESCAPE); + + if (FLAGS_SET(flags, SHELL_ESCAPE_POSIX)) + *(t++) = '\''; + else + *(t++) = '"'; + *t = 0; + + return str_realloc(buf); +} + +char* quote_command_line(char **argv, ShellEscapeFlags flags) { + _cleanup_free_ char *result = NULL; + + assert(argv); + + STRV_FOREACH(a, argv) { + _cleanup_free_ char *t = NULL; + + t = shell_maybe_quote(*a, flags); + if (!t) + return NULL; + + if (!strextend_with_separator(&result, " ", t)) + return NULL; + } + + return str_realloc(TAKE_PTR(result)); +} diff --git a/src/basic/escape.h b/src/basic/escape.h new file mode 100644 index 0000000..318da6f --- /dev/null +++ b/src/basic/escape.h @@ -0,0 +1,72 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +#include +#include +#include +#include +#include + +#include "string-util.h" +#include "missing_type.h" + +/* What characters are special in the shell? */ +/* must be escaped outside and inside double-quotes */ +#define SHELL_NEED_ESCAPE "\"\\`$" + +/* Those that can be escaped or double-quoted. + * + * Strictly speaking, ! does not need to be escaped, except in interactive + * mode, but let's be extra nice to the user and quote ! in case this + * output is ever used in interactive mode. */ +#define SHELL_NEED_QUOTES SHELL_NEED_ESCAPE GLOB_CHARS "'()<>|&;!" + +/* Note that we assume control characters would need to be escaped too in + * addition to the "special" characters listed here, if they appear in the + * string. Current users disallow control characters. Also '"' shall not + * be escaped. + */ +#define SHELL_NEED_ESCAPE_POSIX "\\\'" + +typedef enum UnescapeFlags { + UNESCAPE_RELAX = 1 << 0, + UNESCAPE_ACCEPT_NUL = 1 << 1, +} UnescapeFlags; + +typedef enum ShellEscapeFlags { + /* The default is to add shell quotes ("") so the shell will consider this a single argument. + * Tabs and newlines are escaped. */ + + SHELL_ESCAPE_POSIX = 1 << 1, /* Use POSIX shell escape syntax (a string enclosed in $'') instead of plain quotes. */ + SHELL_ESCAPE_EMPTY = 1 << 2, /* Format empty arguments as "". */ +} ShellEscapeFlags; + +char* cescape(const char *s); +char* cescape_length(const char *s, size_t n); +int cescape_char(char c, char *buf); + +int cunescape_one(const char *p, size_t length, char32_t *ret, bool *eight_bit, bool accept_nul); + +ssize_t cunescape_length_with_prefix(const char *s, size_t length, const char *prefix, UnescapeFlags flags, char **ret); +static inline ssize_t cunescape_length(const char *s, size_t length, UnescapeFlags flags, char **ret) { + return cunescape_length_with_prefix(s, length, NULL, flags, ret); +} +static inline ssize_t cunescape(const char *s, UnescapeFlags flags, char **ret) { + return cunescape_length(s, strlen(s), flags, ret); +} + +typedef enum XEscapeFlags { + XESCAPE_8_BIT = 1 << 0, + XESCAPE_FORCE_ELLIPSIS = 1 << 1, +} XEscapeFlags; + +char* xescape_full(const char *s, const char *bad, size_t console_width, XEscapeFlags flags); +static inline char* xescape(const char *s, const char *bad) { + return xescape_full(s, bad, SIZE_MAX, 0); +} +char* octescape(const char *s, size_t len); +char* escape_non_printable_full(const char *str, size_t console_width, XEscapeFlags flags); + +char* shell_escape(const char *s, const char *bad); +char* shell_maybe_quote(const char *s, ShellEscapeFlags flags); +char* quote_command_line(char **argv, ShellEscapeFlags flags); diff --git a/src/basic/ether-addr-util.c b/src/basic/ether-addr-util.c new file mode 100644 index 0000000..0a6a54f --- /dev/null +++ b/src/basic/ether-addr-util.c @@ -0,0 +1,272 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include +#include +#include +#include +#include + +#include "ether-addr-util.h" +#include "hexdecoct.h" +#include "macro.h" +#include "string-util.h" + +char *hw_addr_to_string_full( + const struct hw_addr_data *addr, + HardwareAddressToStringFlags flags, + char buffer[static HW_ADDR_TO_STRING_MAX]) { + + assert(addr); + assert(buffer); + assert(addr->length <= HW_ADDR_MAX_SIZE); + + for (size_t i = 0, j = 0; i < addr->length; i++) { + buffer[j++] = hexchar(addr->bytes[i] >> 4); + buffer[j++] = hexchar(addr->bytes[i] & 0x0f); + if (!FLAGS_SET(flags, HW_ADDR_TO_STRING_NO_COLON)) + buffer[j++] = ':'; + } + + buffer[addr->length == 0 || FLAGS_SET(flags, HW_ADDR_TO_STRING_NO_COLON) ? + addr->length * 2 : + addr->length * 3 - 1] = '\0'; + return buffer; +} + +struct hw_addr_data *hw_addr_set(struct hw_addr_data *addr, const uint8_t *bytes, size_t length) { + assert(addr); + assert(length <= HW_ADDR_MAX_SIZE); + + addr->length = length; + memcpy_safe(addr->bytes, bytes, length); + return addr; +} + +int hw_addr_compare(const struct hw_addr_data *a, const struct hw_addr_data *b) { + int r; + + assert(a); + assert(b); + + r = CMP(a->length, b->length); + if (r != 0) + return r; + + return memcmp(a->bytes, b->bytes, a->length); +} + +void hw_addr_hash_func(const struct hw_addr_data *p, struct siphash *state) { + assert(p); + assert(state); + + siphash24_compress(&p->length, sizeof(p->length), state); + siphash24_compress(p->bytes, p->length, state); +} + +DEFINE_HASH_OPS(hw_addr_hash_ops, struct hw_addr_data, hw_addr_hash_func, hw_addr_compare); +DEFINE_HASH_OPS_WITH_KEY_DESTRUCTOR(hw_addr_hash_ops_free, struct hw_addr_data, hw_addr_hash_func, hw_addr_compare, free); + +char* ether_addr_to_string(const struct ether_addr *addr, char buffer[ETHER_ADDR_TO_STRING_MAX]) { + assert(addr); + assert(buffer); + + /* Like ether_ntoa() but uses %02x instead of %x to print + * ethernet addresses, which makes them look less funny. Also, + * doesn't use a static buffer. */ + + sprintf(buffer, "%02x:%02x:%02x:%02x:%02x:%02x", + addr->ether_addr_octet[0], + addr->ether_addr_octet[1], + addr->ether_addr_octet[2], + addr->ether_addr_octet[3], + addr->ether_addr_octet[4], + addr->ether_addr_octet[5]); + + return buffer; +} + +int ether_addr_to_string_alloc(const struct ether_addr *addr, char **ret) { + char *buf; + + assert(addr); + assert(ret); + + buf = new(char, ETHER_ADDR_TO_STRING_MAX); + if (!buf) + return -ENOMEM; + + ether_addr_to_string(addr, buf); + + *ret = buf; + return 0; +} + +int ether_addr_compare(const struct ether_addr *a, const struct ether_addr *b) { + return memcmp(a, b, ETH_ALEN); +} + +static void ether_addr_hash_func(const struct ether_addr *p, struct siphash *state) { + siphash24_compress(p, sizeof(struct ether_addr), state); +} + +DEFINE_HASH_OPS(ether_addr_hash_ops, struct ether_addr, ether_addr_hash_func, ether_addr_compare); +DEFINE_HASH_OPS_WITH_KEY_DESTRUCTOR(ether_addr_hash_ops_free, struct ether_addr, ether_addr_hash_func, ether_addr_compare, free); + +static int parse_hw_addr_one_field(const char **s, char sep, size_t len, uint8_t *buf) { + const char *hex = HEXDIGITS, *p; + uint16_t data = 0; + bool cont; + + assert(s); + assert(*s); + assert(IN_SET(len, 1, 2)); + assert(buf); + + p = *s; + + for (size_t i = 0; i < len * 2; i++) { + const char *hexoff; + size_t x; + + if (*p == '\0' || *p == sep) { + if (i == 0) + return -EINVAL; + break; + } + + hexoff = strchr(hex, *p); + if (!hexoff) + return -EINVAL; + + assert(hexoff >= hex); + x = hexoff - hex; + if (x >= 16) + x -= 6; /* A-F */ + + assert(x < 16); + data <<= 4; + data += x; + + p++; + } + + if (*p != '\0' && *p != sep) + return -EINVAL; + + switch (len) { + case 1: + buf[0] = data; + break; + case 2: + buf[0] = (data & 0xff00) >> 8; + buf[1] = data & 0xff; + break; + default: + assert_not_reached(); + } + + cont = *p == sep; + *s = p + cont; + return cont; +} + +int parse_hw_addr_full(const char *s, size_t expected_len, struct hw_addr_data *ret) { + size_t field_size, max_len, len = 0; + uint8_t bytes[HW_ADDR_MAX_SIZE]; + char sep; + int r; + + assert(s); + assert(expected_len <= HW_ADDR_MAX_SIZE || expected_len == SIZE_MAX); + assert(ret); + + /* This accepts the following formats: + * + * Dot separated 2 bytes format: xxyy.zzaa.bbcc + * Colon separated 1 bytes format: xx:yy:zz:aa:bb:cc + * Hyphen separated 1 bytes format: xx-yy-zz-aa-bb-cc + * + * Moreover, if expected_len == 0, 4, or 16, this also accepts: + * + * IPv4 format: used by IPv4 tunnel, e.g. ipgre + * IPv6 format: used by IPv6 tunnel, e.g. ip6gre + * + * The expected_len argument controls the length of acceptable addresses: + * + * 0: accepts 4 (AF_INET), 16 (AF_INET6), 6 (ETH_ALEN), or 20 (INFINIBAND_ALEN). + * SIZE_MAX: accepts arbitrary length, but at least one separator must be included. + * Otherwise: accepts addresses with matching length. + */ + + if (IN_SET(expected_len, 0, sizeof(struct in_addr), sizeof(struct in6_addr))) { + union in_addr_union a; + int family; + + if (expected_len == 0) + r = in_addr_from_string_auto(s, &family, &a); + else { + family = expected_len == sizeof(struct in_addr) ? AF_INET : AF_INET6; + r = in_addr_from_string(family, s, &a); + } + if (r >= 0) { + ret->length = FAMILY_ADDRESS_SIZE(family); + memcpy(ret->bytes, a.bytes, ret->length); + return 0; + } + } + + max_len = + expected_len == 0 ? INFINIBAND_ALEN : + expected_len == SIZE_MAX ? HW_ADDR_MAX_SIZE : expected_len; + sep = s[strspn(s, HEXDIGITS)]; + + if (sep == '.') + field_size = 2; + else if (IN_SET(sep, ':', '-')) + field_size = 1; + else + return -EINVAL; + + if (max_len % field_size != 0) + return -EINVAL; + + for (size_t i = 0; i < max_len / field_size; i++) { + r = parse_hw_addr_one_field(&s, sep, field_size, bytes + i * field_size); + if (r < 0) + return r; + if (r == 0) { + len = (i + 1) * field_size; + break; + } + } + + if (len == 0) + return -EINVAL; + + if (expected_len == 0) { + if (!IN_SET(len, 4, 16, ETH_ALEN, INFINIBAND_ALEN)) + return -EINVAL; + } else if (expected_len != SIZE_MAX) { + if (len != expected_len) + return -EINVAL; + } + + ret->length = len; + memcpy(ret->bytes, bytes, ret->length); + return 0; +} + +int parse_ether_addr(const char *s, struct ether_addr *ret) { + struct hw_addr_data a; + int r; + + assert(s); + assert(ret); + + r = parse_hw_addr_full(s, ETH_ALEN, &a); + if (r < 0) + return r; + + *ret = a.ether; + return 0; +} diff --git a/src/basic/ether-addr-util.h b/src/basic/ether-addr-util.h new file mode 100644 index 0000000..83ed77d --- /dev/null +++ b/src/basic/ether-addr-util.h @@ -0,0 +1,115 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +#include +#include +#include + +#include "hash-funcs.h" +#include "in-addr-util.h" +#include "macro.h" +#include "memory-util.h" + +/* This is MAX_ADDR_LEN as defined in linux/netdevice.h, but net/if_arp.h + * defines a macro of the same name with a much lower size. */ +#define HW_ADDR_MAX_SIZE 32 + +struct hw_addr_data { + size_t length; + union { + struct ether_addr ether; + uint8_t infiniband[INFINIBAND_ALEN]; + struct in_addr in; + struct in6_addr in6; + uint8_t bytes[HW_ADDR_MAX_SIZE]; + }; +}; + +int parse_hw_addr_full(const char *s, size_t expected_len, struct hw_addr_data *ret); +static inline int parse_hw_addr(const char *s, struct hw_addr_data *ret) { + return parse_hw_addr_full(s, 0, ret); +} +int parse_ether_addr(const char *s, struct ether_addr *ret); + +typedef enum HardwareAddressToStringFlags { + HW_ADDR_TO_STRING_NO_COLON = 1 << 0, +} HardwareAddressToStringFlags; + +#define HW_ADDR_TO_STRING_MAX (3*HW_ADDR_MAX_SIZE) +char *hw_addr_to_string_full( + const struct hw_addr_data *addr, + HardwareAddressToStringFlags flags, + char buffer[static HW_ADDR_TO_STRING_MAX]); +static inline char *hw_addr_to_string(const struct hw_addr_data *addr, char buffer[static HW_ADDR_TO_STRING_MAX]) { + return hw_addr_to_string_full(addr, 0, buffer); +} + +/* Note: the lifetime of the compound literal is the immediately surrounding block, + * see C11 §6.5.2.5, and + * https://stackoverflow.com/questions/34880638/compound-literal-lifetime-and-if-blocks */ +#define HW_ADDR_TO_STR_FULL(hw_addr, flags) hw_addr_to_string_full((hw_addr), flags, (char[HW_ADDR_TO_STRING_MAX]){}) +#define HW_ADDR_TO_STR(hw_addr) HW_ADDR_TO_STR_FULL(hw_addr, 0) + +#define HW_ADDR_NULL ((const struct hw_addr_data){}) + +struct hw_addr_data *hw_addr_set(struct hw_addr_data *addr, const uint8_t *bytes, size_t length); + +void hw_addr_hash_func(const struct hw_addr_data *p, struct siphash *state); +int hw_addr_compare(const struct hw_addr_data *a, const struct hw_addr_data *b); +static inline bool hw_addr_equal(const struct hw_addr_data *a, const struct hw_addr_data *b) { + return hw_addr_compare(a, b) == 0; +} +static inline bool hw_addr_is_null(const struct hw_addr_data *addr) { + assert(addr); + return addr->length == 0 || memeqzero(addr->bytes, addr->length); +} + +extern const struct hash_ops hw_addr_hash_ops; +extern const struct hash_ops hw_addr_hash_ops_free; + +#define ETHER_ADDR_FORMAT_STR "%02X%02X%02X%02X%02X%02X" +#define ETHER_ADDR_FORMAT_VAL(x) (x).ether_addr_octet[0], (x).ether_addr_octet[1], (x).ether_addr_octet[2], (x).ether_addr_octet[3], (x).ether_addr_octet[4], (x).ether_addr_octet[5] + +#define ETHER_ADDR_TO_STRING_MAX (3*6) +char* ether_addr_to_string(const struct ether_addr *addr, char buffer[ETHER_ADDR_TO_STRING_MAX]); +int ether_addr_to_string_alloc(const struct ether_addr *addr, char **ret); +/* Use only as function argument, never stand-alone! */ +#define ETHER_ADDR_TO_STR(addr) ether_addr_to_string((addr), (char[ETHER_ADDR_TO_STRING_MAX]){}) + +int ether_addr_compare(const struct ether_addr *a, const struct ether_addr *b); +static inline bool ether_addr_equal(const struct ether_addr *a, const struct ether_addr *b) { + return ether_addr_compare(a, b) == 0; +} + +#define ETHER_ADDR_NULL ((const struct ether_addr){}) + +static inline bool ether_addr_is_null(const struct ether_addr *addr) { + return ether_addr_equal(addr, ÐER_ADDR_NULL); +} + +static inline bool ether_addr_is_broadcast(const struct ether_addr *addr) { + assert(addr); + return memeqbyte(0xff, addr->ether_addr_octet, ETH_ALEN); +} + +static inline bool ether_addr_is_multicast(const struct ether_addr *addr) { + assert(addr); + return FLAGS_SET(addr->ether_addr_octet[0], 0x01); +} + +static inline bool ether_addr_is_unicast(const struct ether_addr *addr) { + return !ether_addr_is_multicast(addr); +} + +static inline bool ether_addr_is_local(const struct ether_addr *addr) { + /* Determine if the Ethernet address is locally-assigned one (IEEE 802) */ + assert(addr); + return FLAGS_SET(addr->ether_addr_octet[0], 0x02); +} + +static inline bool ether_addr_is_global(const struct ether_addr *addr) { + return !ether_addr_is_local(addr); +} + +extern const struct hash_ops ether_addr_hash_ops; +extern const struct hash_ops ether_addr_hash_ops_free; diff --git a/src/basic/extract-word.c b/src/basic/extract-word.c new file mode 100644 index 0000000..9f9bb0c --- /dev/null +++ b/src/basic/extract-word.c @@ -0,0 +1,301 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include +#include +#include +#include +#include +#include +#include + +#include "alloc-util.h" +#include "escape.h" +#include "extract-word.h" +#include "log.h" +#include "macro.h" +#include "string-util.h" +#include "strv.h" +#include "utf8.h" + +int extract_first_word(const char **p, char **ret, const char *separators, ExtractFlags flags) { + _cleanup_free_ char *s = NULL; + size_t sz = 0; + char quote = 0; /* 0 or ' or " */ + bool backslash = false; /* whether we've just seen a backslash */ + char c; + int r; + + assert(p); + assert(ret); + assert(!FLAGS_SET(flags, EXTRACT_KEEP_QUOTE | EXTRACT_UNQUOTE)); + + /* Bail early if called after last value or with no input */ + if (!*p) + goto finish; + c = **p; + + if (!separators) + separators = WHITESPACE; + + /* Parses the first word of a string, and returns it in + * *ret. Removes all quotes in the process. When parsing fails + * (because of an uneven number of quotes or similar), leaves + * the pointer *p at the first invalid character. */ + + if (flags & EXTRACT_DONT_COALESCE_SEPARATORS) + if (!GREEDY_REALLOC(s, sz+1)) + return -ENOMEM; + + for (;; (*p)++, c = **p) { + if (c == 0) + goto finish_force_terminate; + else if (strchr(separators, c)) { + if (flags & EXTRACT_DONT_COALESCE_SEPARATORS) { + if (!(flags & EXTRACT_RETAIN_SEPARATORS)) + (*p)++; + goto finish_force_next; + } + } else { + /* We found a non-blank character, so we will always + * want to return a string (even if it is empty), + * allocate it here. */ + if (!GREEDY_REALLOC(s, sz+1)) + return -ENOMEM; + break; + } + } + + for (;; (*p)++, c = **p) { + if (backslash) { + if (!GREEDY_REALLOC(s, sz+7)) + return -ENOMEM; + + if (c == 0) { + if ((flags & EXTRACT_UNESCAPE_RELAX) && + (quote == 0 || flags & EXTRACT_RELAX)) { + /* If we find an unquoted trailing backslash and we're in + * EXTRACT_UNESCAPE_RELAX mode, keep it verbatim in the + * output. + * + * Unbalanced quotes will only be allowed in EXTRACT_RELAX + * mode, EXTRACT_UNESCAPE_RELAX mode does not allow them. + */ + s[sz++] = '\\'; + goto finish_force_terminate; + } + if (flags & EXTRACT_RELAX) + goto finish_force_terminate; + return -EINVAL; + } + + if (flags & (EXTRACT_CUNESCAPE|EXTRACT_UNESCAPE_SEPARATORS)) { + bool eight_bit = false; + char32_t u; + + if ((flags & EXTRACT_CUNESCAPE) && + (r = cunescape_one(*p, SIZE_MAX, &u, &eight_bit, false)) >= 0) { + /* A valid escaped sequence */ + assert(r >= 1); + + (*p) += r - 1; + + if (eight_bit) + s[sz++] = u; + else + sz += utf8_encode_unichar(s + sz, u); + } else if ((flags & EXTRACT_UNESCAPE_SEPARATORS) && + (strchr(separators, **p) || **p == '\\')) + /* An escaped separator char or the escape char itself */ + s[sz++] = c; + else if (flags & EXTRACT_UNESCAPE_RELAX) { + s[sz++] = '\\'; + s[sz++] = c; + } else + return -EINVAL; + } else + s[sz++] = c; + + backslash = false; + + } else if (quote != 0) { /* inside either single or double quotes */ + for (;; (*p)++, c = **p) { + if (c == 0) { + if (flags & EXTRACT_RELAX) + goto finish_force_terminate; + return -EINVAL; + } else if (c == quote) { /* found the end quote */ + quote = 0; + if (flags & EXTRACT_UNQUOTE) + break; + } else if (c == '\\' && !(flags & EXTRACT_RETAIN_ESCAPE)) { + backslash = true; + break; + } + + if (!GREEDY_REALLOC(s, sz+2)) + return -ENOMEM; + + s[sz++] = c; + + if (quote == 0) + break; + } + + } else { + for (;; (*p)++, c = **p) { + if (c == 0) + goto finish_force_terminate; + else if (IN_SET(c, '\'', '"') && (flags & (EXTRACT_KEEP_QUOTE | EXTRACT_UNQUOTE))) { + quote = c; + if (flags & EXTRACT_UNQUOTE) + break; + } else if (c == '\\' && !(flags & EXTRACT_RETAIN_ESCAPE)) { + backslash = true; + break; + } else if (strchr(separators, c)) { + if (flags & EXTRACT_DONT_COALESCE_SEPARATORS) { + if (!(flags & EXTRACT_RETAIN_SEPARATORS)) + (*p)++; + goto finish_force_next; + } + if (!(flags & EXTRACT_RETAIN_SEPARATORS)) + /* Skip additional coalesced separators. */ + for (;; (*p)++, c = **p) { + if (c == 0) + goto finish_force_terminate; + if (!strchr(separators, c)) + break; + } + goto finish; + + } + + if (!GREEDY_REALLOC(s, sz+2)) + return -ENOMEM; + + s[sz++] = c; + + if (quote != 0) + break; + } + } + } + +finish_force_terminate: + *p = NULL; +finish: + if (!s) { + *p = NULL; + *ret = NULL; + return 0; + } + +finish_force_next: + s[sz] = 0; + *ret = TAKE_PTR(s); + + return 1; +} + +int extract_first_word_and_warn( + const char **p, + char **ret, + const char *separators, + ExtractFlags flags, + const char *unit, + const char *filename, + unsigned line, + const char *rvalue) { + + /* Try to unquote it, if it fails, warn about it and try again + * but this time using EXTRACT_UNESCAPE_RELAX to keep the + * backslashes verbatim in invalid escape sequences. */ + + const char *save; + int r; + + save = *p; + r = extract_first_word(p, ret, separators, flags); + if (r >= 0) + return r; + + if (r == -EINVAL && !(flags & EXTRACT_UNESCAPE_RELAX)) { + + /* Retry it with EXTRACT_UNESCAPE_RELAX. */ + *p = save; + r = extract_first_word(p, ret, separators, flags|EXTRACT_UNESCAPE_RELAX); + if (r >= 0) { + /* It worked this time, hence it must have been an invalid escape sequence. */ + log_syntax(unit, LOG_WARNING, filename, line, EINVAL, "Ignoring unknown escape sequences: \"%s\"", *ret); + return r; + } + + /* If it's still EINVAL; then it must be unbalanced quoting, report this. */ + if (r == -EINVAL) + return log_syntax(unit, LOG_ERR, filename, line, r, "Unbalanced quoting, ignoring: \"%s\"", rvalue); + } + + /* Can be any error, report it */ + return log_syntax(unit, LOG_ERR, filename, line, r, "Unable to decode word \"%s\", ignoring: %m", rvalue); +} + +/* We pass ExtractFlags as unsigned int (to avoid undefined behaviour when passing + * an object that undergoes default argument promotion as an argument to va_start). + * Let's make sure that ExtractFlags fits into an unsigned int. */ +assert_cc(sizeof(enum ExtractFlags) <= sizeof(unsigned)); + +int extract_many_words(const char **p, const char *separators, unsigned flags, ...) { + va_list ap; + char **l; + int n = 0, i, c, r; + + /* Parses a number of words from a string, stripping any + * quotes if necessary. */ + + assert(p); + + /* Count how many words are expected */ + va_start(ap, flags); + for (;;) { + if (!va_arg(ap, char **)) + break; + n++; + } + va_end(ap); + + if (n <= 0) + return 0; + + /* Read all words into a temporary array */ + l = newa0(char*, n); + for (c = 0; c < n; c++) { + + r = extract_first_word(p, &l[c], separators, flags); + if (r < 0) { + int j; + + for (j = 0; j < c; j++) + free(l[j]); + + return r; + } + + if (r == 0) + break; + } + + /* If we managed to parse all words, return them in the passed + * in parameters */ + va_start(ap, flags); + for (i = 0; i < n; i++) { + char **v; + + v = va_arg(ap, char **); + assert(v); + + *v = l[i]; + } + va_end(ap); + + return c; +} diff --git a/src/basic/extract-word.h b/src/basic/extract-word.h new file mode 100644 index 0000000..c82ad76 --- /dev/null +++ b/src/basic/extract-word.h @@ -0,0 +1,22 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +#include "macro.h" + +typedef enum ExtractFlags { + EXTRACT_RELAX = 1 << 0, /* Allow unbalanced quote and eat up trailing backslash. */ + EXTRACT_CUNESCAPE = 1 << 1, /* Unescape known escape sequences. */ + EXTRACT_UNESCAPE_RELAX = 1 << 2, /* Allow and keep unknown escape sequences, allow and keep trailing backslash. */ + EXTRACT_UNESCAPE_SEPARATORS = 1 << 3, /* Unescape separators (those specified, or whitespace by default). */ + EXTRACT_KEEP_QUOTE = 1 << 4, /* Ignore separators in quoting with "" and ''. */ + EXTRACT_UNQUOTE = 1 << 5, /* Ignore separators in quoting with "" and '', and remove the quotes. */ + EXTRACT_DONT_COALESCE_SEPARATORS = 1 << 6, /* Don't treat multiple adjacent separators as one */ + EXTRACT_RETAIN_ESCAPE = 1 << 7, /* Treat escape character '\' as any other character without special meaning */ + EXTRACT_RETAIN_SEPARATORS = 1 << 8, /* Do not advance the original string pointer past the separator(s) */ + + /* Note that if no flags are specified, escaped escape characters will be silently stripped. */ +} ExtractFlags; + +int extract_first_word(const char **p, char **ret, const char *separators, ExtractFlags flags); +int extract_first_word_and_warn(const char **p, char **ret, const char *separators, ExtractFlags flags, const char *unit, const char *filename, unsigned line, const char *rvalue); +int extract_many_words(const char **p, const char *separators, unsigned flags, ...) _sentinel_; diff --git a/src/basic/fd-util.c b/src/basic/fd-util.c new file mode 100644 index 0000000..66bb756 --- /dev/null +++ b/src/basic/fd-util.c @@ -0,0 +1,827 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include +#include +#if WANT_LINUX_FS_H +#include +#endif +#include +#include +#include +#include +#include + +#include "alloc-util.h" +#include "dirent-util.h" +#include "fd-util.h" +#include "fileio.h" +#include "fs-util.h" +#include "io-util.h" +#include "macro.h" +#include "missing_fcntl.h" +#include "missing_fs.h" +#include "missing_syscall.h" +#include "parse-util.h" +#include "path-util.h" +#include "process-util.h" +#include "socket-util.h" +#include "sort-util.h" +#include "stat-util.h" +#include "stdio-util.h" +#include "tmpfile-util.h" +#include "util.h" + +/* The maximum number of iterations in the loop to close descriptors in the fallback case + * when /proc/self/fd/ is inaccessible. */ +#define MAX_FD_LOOP_LIMIT (1024*1024) + +int close_nointr(int fd) { + assert(fd >= 0); + + if (close(fd) >= 0) + return 0; + + /* + * Just ignore EINTR; a retry loop is the wrong thing to do on + * Linux. + * + * http://lkml.indiana.edu/hypermail/linux/kernel/0509.1/0877.html + * https://bugzilla.gnome.org/show_bug.cgi?id=682819 + * http://utcc.utoronto.ca/~cks/space/blog/unix/CloseEINTR + * https://sites.google.com/site/michaelsafyan/software-engineering/checkforeintrwheninvokingclosethinkagain + */ + if (errno == EINTR) + return 0; + + return -errno; +} + +int safe_close(int fd) { + + /* + * Like close_nointr() but cannot fail. Guarantees errno is + * unchanged. Is a NOP with negative fds passed, and returns + * -1, so that it can be used in this syntax: + * + * fd = safe_close(fd); + */ + + if (fd >= 0) { + PROTECT_ERRNO; + + /* The kernel might return pretty much any error code + * via close(), but the fd will be closed anyway. The + * only condition we want to check for here is whether + * the fd was invalid at all... */ + + assert_se(close_nointr(fd) != -EBADF); + } + + return -1; +} + +void safe_close_pair(int p[static 2]) { + assert(p); + + if (p[0] == p[1]) { + /* Special case pairs which use the same fd in both + * directions... */ + p[0] = p[1] = safe_close(p[0]); + return; + } + + p[0] = safe_close(p[0]); + p[1] = safe_close(p[1]); +} + +void close_many(const int fds[], size_t n_fd) { + assert(fds || n_fd <= 0); + + for (size_t i = 0; i < n_fd; i++) + safe_close(fds[i]); +} + +int fclose_nointr(FILE *f) { + assert(f); + + /* Same as close_nointr(), but for fclose() */ + + errno = 0; /* Extra safety: if the FILE* object is not encapsulating an fd, it might not set errno + * correctly. Let's hence initialize it to zero first, so that we aren't confused by any + * prior errno here */ + if (fclose(f) == 0) + return 0; + + if (errno == EINTR) + return 0; + + return errno_or_else(EIO); +} + +FILE* safe_fclose(FILE *f) { + + /* Same as safe_close(), but for fclose() */ + + if (f) { + PROTECT_ERRNO; + + assert_se(fclose_nointr(f) != -EBADF); + } + + return NULL; +} + +DIR* safe_closedir(DIR *d) { + + if (d) { + PROTECT_ERRNO; + + assert_se(closedir(d) >= 0 || errno != EBADF); + } + + return NULL; +} + +int fd_nonblock(int fd, bool nonblock) { + int flags, nflags; + + assert(fd >= 0); + + flags = fcntl(fd, F_GETFL, 0); + if (flags < 0) + return -errno; + + nflags = UPDATE_FLAG(flags, O_NONBLOCK, nonblock); + if (nflags == flags) + return 0; + + return RET_NERRNO(fcntl(fd, F_SETFL, nflags)); +} + +int fd_cloexec(int fd, bool cloexec) { + int flags, nflags; + + assert(fd >= 0); + + flags = fcntl(fd, F_GETFD, 0); + if (flags < 0) + return -errno; + + nflags = UPDATE_FLAG(flags, FD_CLOEXEC, cloexec); + if (nflags == flags) + return 0; + + return RET_NERRNO(fcntl(fd, F_SETFD, nflags)); +} + +int fd_cloexec_many(const int fds[], size_t n_fds, bool cloexec) { + int ret = 0, r; + + assert(n_fds == 0 || fds); + + for (size_t i = 0; i < n_fds; i++) { + if (fds[i] < 0) /* Skip gracefully over already invalidated fds */ + continue; + + r = fd_cloexec(fds[i], cloexec); + if (r < 0 && ret >= 0) /* Continue going, but return first error */ + ret = r; + else + ret = 1; /* report if we did anything */ + } + + return ret; +} + +_pure_ static bool fd_in_set(int fd, const int fdset[], size_t n_fdset) { + assert(n_fdset == 0 || fdset); + + for (size_t i = 0; i < n_fdset; i++) { + if (fdset[i] < 0) + continue; + + if (fdset[i] == fd) + return true; + } + + return false; +} + +int get_max_fd(void) { + struct rlimit rl; + rlim_t m; + + /* Return the highest possible fd, based RLIMIT_NOFILE, but enforcing FD_SETSIZE-1 as lower boundary + * and INT_MAX as upper boundary. */ + + if (getrlimit(RLIMIT_NOFILE, &rl) < 0) + return -errno; + + m = MAX(rl.rlim_cur, rl.rlim_max); + if (m < FD_SETSIZE) /* Let's always cover at least 1024 fds */ + return FD_SETSIZE-1; + + if (m == RLIM_INFINITY || m > INT_MAX) /* Saturate on overflow. After all fds are "int", hence can + * never be above INT_MAX */ + return INT_MAX; + + return (int) (m - 1); +} + +static int close_all_fds_frugal(const int except[], size_t n_except) { + int max_fd, r = 0; + + assert(n_except == 0 || except); + + /* This is the inner fallback core of close_all_fds(). This never calls malloc() or opendir() or so + * and hence is safe to be called in signal handler context. Most users should call close_all_fds(), + * but when we assume we are called from signal handler context, then use this simpler call + * instead. */ + + max_fd = get_max_fd(); + if (max_fd < 0) + return max_fd; + + /* Refuse to do the loop over more too many elements. It's better to fail immediately than to + * spin the CPU for a long time. */ + if (max_fd > MAX_FD_LOOP_LIMIT) + return log_debug_errno(SYNTHETIC_ERRNO(EPERM), + "Refusing to loop over %d potential fds.", + max_fd); + + for (int fd = 3; fd >= 0; fd = fd < max_fd ? fd + 1 : -1) { + int q; + + if (fd_in_set(fd, except, n_except)) + continue; + + q = close_nointr(fd); + if (q < 0 && q != -EBADF && r >= 0) + r = q; + } + + return r; +} + +static bool have_close_range = true; /* Assume we live in the future */ + +static int close_all_fds_special_case(const int except[], size_t n_except) { + assert(n_except == 0 || except); + + /* Handles a few common special cases separately, since they are common and can be optimized really + * nicely, since we won't need sorting for them. Returns > 0 if the special casing worked, 0 + * otherwise. */ + + if (!have_close_range) + return 0; + + if (n_except == 1 && except[0] < 0) /* Minor optimization: if we only got one fd, and it's invalid, + * we got none */ + n_except = 0; + + switch (n_except) { + + case 0: + /* Close everything. Yay! */ + + if (close_range(3, -1, 0) >= 0) + return 1; + + if (ERRNO_IS_NOT_SUPPORTED(errno) || ERRNO_IS_PRIVILEGE(errno)) { + have_close_range = false; + return 0; + } + + return -errno; + + case 1: + /* Close all but exactly one, then we don't need no sorting. This is a pretty common + * case, hence let's handle it specially. */ + + if ((except[0] <= 3 || close_range(3, except[0]-1, 0) >= 0) && + (except[0] >= INT_MAX || close_range(MAX(3, except[0]+1), -1, 0) >= 0)) + return 1; + + if (ERRNO_IS_NOT_SUPPORTED(errno) || ERRNO_IS_PRIVILEGE(errno)) { + have_close_range = false; + return 0; + } + + return -errno; + + default: + return 0; + } +} + +int close_all_fds_without_malloc(const int except[], size_t n_except) { + int r; + + assert(n_except == 0 || except); + + r = close_all_fds_special_case(except, n_except); + if (r < 0) + return r; + if (r > 0) /* special case worked! */ + return 0; + + return close_all_fds_frugal(except, n_except); +} + +int close_all_fds(const int except[], size_t n_except) { + _cleanup_closedir_ DIR *d = NULL; + int r = 0; + + assert(n_except == 0 || except); + + r = close_all_fds_special_case(except, n_except); + if (r < 0) + return r; + if (r > 0) /* special case worked! */ + return 0; + + if (have_close_range) { + _cleanup_free_ int *sorted_malloc = NULL; + size_t n_sorted; + int *sorted; + + /* In the best case we have close_range() to close all fds between a start and an end fd, + * which we can use on the "inverted" exception array, i.e. all intervals between all + * adjacent pairs from the sorted exception array. This changes loop complexity from O(n) + * where n is number of open fds to O(m⋅log(m)) where m is the number of fds to keep + * open. Given that we assume n ≫ m that's preferable to us. */ + + assert(n_except < SIZE_MAX); + n_sorted = n_except + 1; + + if (n_sorted > 64) /* Use heap for large numbers of fds, stack otherwise */ + sorted = sorted_malloc = new(int, n_sorted); + else + sorted = newa(int, n_sorted); + + if (sorted) { + memcpy(sorted, except, n_except * sizeof(int)); + + /* Let's add fd 2 to the list of fds, to simplify the loop below, as this + * allows us to cover the head of the array the same way as the body */ + sorted[n_sorted-1] = 2; + + typesafe_qsort(sorted, n_sorted, cmp_int); + + for (size_t i = 0; i < n_sorted-1; i++) { + int start, end; + + start = MAX(sorted[i], 2); /* The first three fds shall always remain open */ + end = MAX(sorted[i+1], 2); + + assert(end >= start); + + if (end - start <= 1) + continue; + + /* Close everything between the start and end fds (both of which shall stay open) */ + if (close_range(start + 1, end - 1, 0) < 0) { + if (!ERRNO_IS_NOT_SUPPORTED(errno) && !ERRNO_IS_PRIVILEGE(errno)) + return -errno; + + have_close_range = false; + break; + } + } + + if (have_close_range) { + /* The loop succeeded. Let's now close everything beyond the end */ + + if (sorted[n_sorted-1] >= INT_MAX) /* Dont let the addition below overflow */ + return 0; + + if (close_range(sorted[n_sorted-1] + 1, -1, 0) >= 0) + return 0; + + if (!ERRNO_IS_NOT_SUPPORTED(errno) && !ERRNO_IS_PRIVILEGE(errno)) + return -errno; + + have_close_range = false; + } + } + + /* Fallback on OOM or if close_range() is not supported */ + } + + d = opendir("/proc/self/fd"); + if (!d) + return close_all_fds_frugal(except, n_except); /* ultimate fallback if /proc/ is not available */ + + FOREACH_DIRENT(de, d, return -errno) { + int fd = -1, q; + + if (!IN_SET(de->d_type, DT_LNK, DT_UNKNOWN)) + continue; + + if (safe_atoi(de->d_name, &fd) < 0) + /* Let's better ignore this, just in case */ + continue; + + if (fd < 3) + continue; + + if (fd == dirfd(d)) + continue; + + if (fd_in_set(fd, except, n_except)) + continue; + + q = close_nointr(fd); + if (q < 0 && q != -EBADF && r >= 0) /* Valgrind has its own FD and doesn't want to have it closed */ + r = q; + } + + return r; +} + +int same_fd(int a, int b) { + struct stat sta, stb; + pid_t pid; + int r, fa, fb; + + assert(a >= 0); + assert(b >= 0); + + /* Compares two file descriptors. Note that semantics are quite different depending on whether we + * have kcmp() or we don't. If we have kcmp() this will only return true for dup()ed file + * descriptors, but not otherwise. If we don't have kcmp() this will also return true for two fds of + * the same file, created by separate open() calls. Since we use this call mostly for filtering out + * duplicates in the fd store this difference hopefully doesn't matter too much. */ + + if (a == b) + return true; + + /* Try to use kcmp() if we have it. */ + pid = getpid_cached(); + r = kcmp(pid, pid, KCMP_FILE, a, b); + if (r == 0) + return true; + if (r > 0) + return false; + if (!ERRNO_IS_NOT_SUPPORTED(errno) && !ERRNO_IS_PRIVILEGE(errno)) + return -errno; + + /* We don't have kcmp(), use fstat() instead. */ + if (fstat(a, &sta) < 0) + return -errno; + + if (fstat(b, &stb) < 0) + return -errno; + + if (!stat_inode_same(&sta, &stb)) + return false; + + /* We consider all device fds different, since two device fds might refer to quite different device + * contexts even though they share the same inode and backing dev_t. */ + + if (S_ISCHR(sta.st_mode) || S_ISBLK(sta.st_mode)) + return false; + + /* The fds refer to the same inode on disk, let's also check if they have the same fd flags. This is + * useful to distinguish the read and write side of a pipe created with pipe(). */ + fa = fcntl(a, F_GETFL); + if (fa < 0) + return -errno; + + fb = fcntl(b, F_GETFL); + if (fb < 0) + return -errno; + + return fa == fb; +} + +void cmsg_close_all(struct msghdr *mh) { + struct cmsghdr *cmsg; + + assert(mh); + + CMSG_FOREACH(cmsg, mh) + if (cmsg->cmsg_level == SOL_SOCKET && cmsg->cmsg_type == SCM_RIGHTS) + close_many((int*) CMSG_DATA(cmsg), (cmsg->cmsg_len - CMSG_LEN(0)) / sizeof(int)); +} + +bool fdname_is_valid(const char *s) { + const char *p; + + /* Validates a name for $LISTEN_FDNAMES. We basically allow + * everything ASCII that's not a control character. Also, as + * special exception the ":" character is not allowed, as we + * use that as field separator in $LISTEN_FDNAMES. + * + * Note that the empty string is explicitly allowed + * here. However, we limit the length of the names to 255 + * characters. */ + + if (!s) + return false; + + for (p = s; *p; p++) { + if (*p < ' ') + return false; + if (*p >= 127) + return false; + if (*p == ':') + return false; + } + + return p - s <= FDNAME_MAX; +} + +int fd_get_path(int fd, char **ret) { + int r; + + r = readlink_malloc(FORMAT_PROC_FD_PATH(fd), ret); + if (r == -ENOENT) { + /* ENOENT can mean two things: that the fd does not exist or that /proc is not mounted. Let's make + * things debuggable and distinguish the two. */ + + if (proc_mounted() == 0) + return -ENOSYS; /* /proc is not available or not set up properly, we're most likely in some chroot + * environment. */ + return -EBADF; /* The directory exists, hence it's the fd that doesn't. */ + } + + return r; +} + +int move_fd(int from, int to, int cloexec) { + int r; + + /* Move fd 'from' to 'to', make sure FD_CLOEXEC remains equal if requested, and release the old fd. If + * 'cloexec' is passed as -1, the original FD_CLOEXEC is inherited for the new fd. If it is 0, it is turned + * off, if it is > 0 it is turned on. */ + + if (from < 0) + return -EBADF; + if (to < 0) + return -EBADF; + + if (from == to) { + + if (cloexec >= 0) { + r = fd_cloexec(to, cloexec); + if (r < 0) + return r; + } + + return to; + } + + if (cloexec < 0) { + int fl; + + fl = fcntl(from, F_GETFD, 0); + if (fl < 0) + return -errno; + + cloexec = !!(fl & FD_CLOEXEC); + } + + r = dup3(from, to, cloexec ? O_CLOEXEC : 0); + if (r < 0) + return -errno; + + assert(r == to); + + safe_close(from); + + return to; +} + +int fd_move_above_stdio(int fd) { + int flags, copy; + PROTECT_ERRNO; + + /* Moves the specified file descriptor if possible out of the range [0…2], i.e. the range of + * stdin/stdout/stderr. If it can't be moved outside of this range the original file descriptor is + * returned. This call is supposed to be used for long-lasting file descriptors we allocate in our code that + * might get loaded into foreign code, and where we want ensure our fds are unlikely used accidentally as + * stdin/stdout/stderr of unrelated code. + * + * Note that this doesn't fix any real bugs, it just makes it less likely that our code will be affected by + * buggy code from others that mindlessly invokes 'fprintf(stderr, …' or similar in places where stderr has + * been closed before. + * + * This function is written in a "best-effort" and "least-impact" style. This means whenever we encounter an + * error we simply return the original file descriptor, and we do not touch errno. */ + + if (fd < 0 || fd > 2) + return fd; + + flags = fcntl(fd, F_GETFD, 0); + if (flags < 0) + return fd; + + if (flags & FD_CLOEXEC) + copy = fcntl(fd, F_DUPFD_CLOEXEC, 3); + else + copy = fcntl(fd, F_DUPFD, 3); + if (copy < 0) + return fd; + + assert(copy > 2); + + (void) close(fd); + return copy; +} + +int rearrange_stdio(int original_input_fd, int original_output_fd, int original_error_fd) { + + int fd[3] = { /* Put together an array of fds we work on */ + original_input_fd, + original_output_fd, + original_error_fd + }; + + int r, i, + null_fd = -1, /* if we open /dev/null, we store the fd to it here */ + copy_fd[3] = { -1, -1, -1 }; /* This contains all fds we duplicate here temporarily, and hence need to close at the end */ + bool null_readable, null_writable; + + /* Sets up stdin, stdout, stderr with the three file descriptors passed in. If any of the descriptors is + * specified as -1 it will be connected with /dev/null instead. If any of the file descriptors is passed as + * itself (e.g. stdin as STDIN_FILENO) it is left unmodified, but the O_CLOEXEC bit is turned off should it be + * on. + * + * Note that if any of the passed file descriptors are > 2 they will be closed — both on success and on + * failure! Thus, callers should assume that when this function returns the input fds are invalidated. + * + * Note that when this function fails stdin/stdout/stderr might remain half set up! + * + * O_CLOEXEC is turned off for all three file descriptors (which is how it should be for + * stdin/stdout/stderr). */ + + null_readable = original_input_fd < 0; + null_writable = original_output_fd < 0 || original_error_fd < 0; + + /* First step, open /dev/null once, if we need it */ + if (null_readable || null_writable) { + + /* Let's open this with O_CLOEXEC first, and convert it to non-O_CLOEXEC when we move the fd to the final position. */ + null_fd = open("/dev/null", (null_readable && null_writable ? O_RDWR : + null_readable ? O_RDONLY : O_WRONLY) | O_CLOEXEC); + if (null_fd < 0) { + r = -errno; + goto finish; + } + + /* If this fd is in the 0…2 range, let's move it out of it */ + if (null_fd < 3) { + int copy; + + copy = fcntl(null_fd, F_DUPFD_CLOEXEC, 3); /* Duplicate this with O_CLOEXEC set */ + if (copy < 0) { + r = -errno; + goto finish; + } + + close_and_replace(null_fd, copy); + } + } + + /* Let's assemble fd[] with the fds to install in place of stdin/stdout/stderr */ + for (i = 0; i < 3; i++) { + + if (fd[i] < 0) + fd[i] = null_fd; /* A negative parameter means: connect this one to /dev/null */ + else if (fd[i] != i && fd[i] < 3) { + /* This fd is in the 0…2 territory, but not at its intended place, move it out of there, so that we can work there. */ + copy_fd[i] = fcntl(fd[i], F_DUPFD_CLOEXEC, 3); /* Duplicate this with O_CLOEXEC set */ + if (copy_fd[i] < 0) { + r = -errno; + goto finish; + } + + fd[i] = copy_fd[i]; + } + } + + /* At this point we now have the fds to use in fd[], and they are all above the stdio range, so that we + * have freedom to move them around. If the fds already were at the right places then the specific fds are + * -1. Let's now move them to the right places. This is the point of no return. */ + for (i = 0; i < 3; i++) { + + if (fd[i] == i) { + + /* fd is already in place, but let's make sure O_CLOEXEC is off */ + r = fd_cloexec(i, false); + if (r < 0) + goto finish; + + } else { + assert(fd[i] > 2); + + if (dup2(fd[i], i) < 0) { /* Turns off O_CLOEXEC on the new fd. */ + r = -errno; + goto finish; + } + } + } + + r = 0; + +finish: + /* Close the original fds, but only if they were outside of the stdio range. Also, properly check for the same + * fd passed in multiple times. */ + safe_close_above_stdio(original_input_fd); + if (original_output_fd != original_input_fd) + safe_close_above_stdio(original_output_fd); + if (original_error_fd != original_input_fd && original_error_fd != original_output_fd) + safe_close_above_stdio(original_error_fd); + + /* Close the copies we moved > 2 */ + for (i = 0; i < 3; i++) + safe_close(copy_fd[i]); + + /* Close our null fd, if it's > 2 */ + safe_close_above_stdio(null_fd); + + return r; +} + +int fd_reopen(int fd, int flags) { + int new_fd, r; + + /* Reopens the specified fd with new flags. This is useful for convert an O_PATH fd into a regular one, or to + * turn O_RDWR fds into O_RDONLY fds. + * + * This doesn't work on sockets (since they cannot be open()ed, ever). + * + * This implicitly resets the file read index to 0. */ + + if (FLAGS_SET(flags, O_DIRECTORY)) { + /* If we shall reopen the fd as directory we can just go via "." and thus bypass the whole + * magic /proc/ directory, and make ourselves independent of that being mounted. */ + new_fd = openat(fd, ".", flags); + if (new_fd < 0) + return -errno; + + return new_fd; + } + + new_fd = open(FORMAT_PROC_FD_PATH(fd), flags); + if (new_fd < 0) { + if (errno != ENOENT) + return -errno; + + r = proc_mounted(); + if (r == 0) + return -ENOSYS; /* if we have no /proc/, the concept is not implementable */ + + return r > 0 ? -EBADF : -ENOENT; /* If /proc/ is definitely around then this means the fd is + * not valid, otherwise let's propagate the original + * error */ + } + + return new_fd; +} + +int read_nr_open(void) { + _cleanup_free_ char *nr_open = NULL; + int r; + + /* Returns the kernel's current fd limit, either by reading it of /proc/sys if that works, or using the + * hard-coded default compiled-in value of current kernels (1M) if not. This call will never fail. */ + + r = read_one_line_file("/proc/sys/fs/nr_open", &nr_open); + if (r < 0) + log_debug_errno(r, "Failed to read /proc/sys/fs/nr_open, ignoring: %m"); + else { + int v; + + r = safe_atoi(nr_open, &v); + if (r < 0) + log_debug_errno(r, "Failed to parse /proc/sys/fs/nr_open value '%s', ignoring: %m", nr_open); + else + return v; + } + + /* If we fail, fall back to the hard-coded kernel limit of 1024 * 1024. */ + return 1024 * 1024; +} + +int fd_get_diskseq(int fd, uint64_t *ret) { + uint64_t diskseq; + + assert(fd >= 0); + assert(ret); + + if (ioctl(fd, BLKGETDISKSEQ, &diskseq) < 0) { + /* Note that the kernel is weird: non-existing ioctls currently return EINVAL + * rather than ENOTTY on loopback block devices. They should fix that in the kernel, + * but in the meantime we accept both here. */ + if (!ERRNO_IS_NOT_SUPPORTED(errno) && errno != EINVAL) + return -errno; + + return -EOPNOTSUPP; + } + + *ret = diskseq; + + return 0; +} diff --git a/src/basic/fd-util.h b/src/basic/fd-util.h new file mode 100644 index 0000000..29c7d86 --- /dev/null +++ b/src/basic/fd-util.h @@ -0,0 +1,126 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +#include +#include +#include +#include + +#include "macro.h" +#include "stdio-util.h" + +/* maximum length of fdname */ +#define FDNAME_MAX 255 + +/* Make sure we can distinguish fd 0 and NULL */ +#define FD_TO_PTR(fd) INT_TO_PTR((fd)+1) +#define PTR_TO_FD(p) (PTR_TO_INT(p)-1) + +int close_nointr(int fd); +int safe_close(int fd); +void safe_close_pair(int p[static 2]); + +static inline int safe_close_above_stdio(int fd) { + if (fd < 3) /* Don't close stdin/stdout/stderr, but still invalidate the fd by returning -1 */ + return -1; + + return safe_close(fd); +} + +void close_many(const int fds[], size_t n_fd); + +int fclose_nointr(FILE *f); +FILE* safe_fclose(FILE *f); +DIR* safe_closedir(DIR *f); + +static inline void closep(int *fd) { + safe_close(*fd); +} + +static inline void close_pairp(int (*p)[2]) { + safe_close_pair(*p); +} + +static inline void fclosep(FILE **f) { + safe_fclose(*f); +} + +DEFINE_TRIVIAL_CLEANUP_FUNC_FULL(FILE*, pclose, NULL); +DEFINE_TRIVIAL_CLEANUP_FUNC_FULL(DIR*, closedir, NULL); + +#define _cleanup_close_ _cleanup_(closep) +#define _cleanup_fclose_ _cleanup_(fclosep) +#define _cleanup_pclose_ _cleanup_(pclosep) +#define _cleanup_closedir_ _cleanup_(closedirp) +#define _cleanup_close_pair_ _cleanup_(close_pairp) + +int fd_nonblock(int fd, bool nonblock); +int fd_cloexec(int fd, bool cloexec); +int fd_cloexec_many(const int fds[], size_t n_fds, bool cloexec); + +int get_max_fd(void); + +int close_all_fds(const int except[], size_t n_except); +int close_all_fds_without_malloc(const int except[], size_t n_except); + +int same_fd(int a, int b); + +void cmsg_close_all(struct msghdr *mh); + +bool fdname_is_valid(const char *s); + +int fd_get_path(int fd, char **ret); + +int move_fd(int from, int to, int cloexec); + +enum { + ACQUIRE_NO_DEV_NULL = 1 << 0, + ACQUIRE_NO_MEMFD = 1 << 1, + ACQUIRE_NO_PIPE = 1 << 2, + ACQUIRE_NO_TMPFILE = 1 << 3, + ACQUIRE_NO_REGULAR = 1 << 4, +}; + +int fd_move_above_stdio(int fd); + +int rearrange_stdio(int original_input_fd, int original_output_fd, int original_error_fd); + +static inline int make_null_stdio(void) { + return rearrange_stdio(-1, -1, -1); +} + +/* Like TAKE_PTR() but for file descriptors, resetting them to -1 */ +#define TAKE_FD(fd) \ + ({ \ + int *_fd_ = &(fd); \ + int _ret_ = *_fd_; \ + *_fd_ = -1; \ + _ret_; \ + }) + +/* Like free_and_replace(), but for file descriptors */ +#define close_and_replace(a, b) \ + ({ \ + int *_fdp_ = &(a); \ + safe_close(*_fdp_); \ + *_fdp_ = TAKE_FD(b); \ + 0; \ + }) + +int fd_reopen(int fd, int flags); +int read_nr_open(void); +int fd_get_diskseq(int fd, uint64_t *ret); + +/* The maximum length a buffer for a /proc/self/fd/ path needs */ +#define PROC_FD_PATH_MAX \ + (STRLEN("/proc/self/fd/") + DECIMAL_STR_MAX(int)) + +static inline char *format_proc_fd_path(char buf[static PROC_FD_PATH_MAX], int fd) { + assert(buf); + assert(fd >= 0); + assert_se(snprintf_ok(buf, PROC_FD_PATH_MAX, "/proc/self/fd/%i", fd)); + return buf; +} + +#define FORMAT_PROC_FD_PATH(fd) \ + format_proc_fd_path((char[PROC_FD_PATH_MAX]) {}, (fd)) diff --git a/src/basic/fileio.c b/src/basic/fileio.c new file mode 100644 index 0000000..2c4ba89 --- /dev/null +++ b/src/basic/fileio.c @@ -0,0 +1,1436 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "alloc-util.h" +#include "chase-symlinks.h" +#include "fd-util.h" +#include "fileio.h" +#include "fs-util.h" +#include "hexdecoct.h" +#include "log.h" +#include "macro.h" +#include "mkdir.h" +#include "parse-util.h" +#include "path-util.h" +#include "socket-util.h" +#include "stdio-util.h" +#include "string-util.h" +#include "sync-util.h" +#include "tmpfile-util.h" + +/* The maximum size of the file we'll read in one go in read_full_file() (64M). */ +#define READ_FULL_BYTES_MAX (64U*1024U*1024U - 1U) + +/* The maximum size of virtual files (i.e. procfs, sysfs, and other virtual "API" files) we'll read in one go + * in read_virtual_file(). Note that this limit is different (and much lower) than the READ_FULL_BYTES_MAX + * limit. This reflects the fact that we use different strategies for reading virtual and regular files: + * virtual files we generally have to read in a single read() syscall since the kernel doesn't support + * continuation read()s for them. Thankfully they are somewhat size constrained. Thus we can allocate the + * full potential buffer in advance. Regular files OTOH can be much larger, and there we grow the allocations + * exponentially in a loop. We use a size limit of 4M-2 because 4M-1 is the maximum buffer that /proc/sys/ + * allows us to read() (larger reads will fail with ENOMEM), and we want to read one extra byte so that we + * can detect EOFs. */ +#define READ_VIRTUAL_BYTES_MAX (4U*1024U*1024U - 2U) + +int fopen_unlocked(const char *path, const char *options, FILE **ret) { + assert(ret); + + FILE *f = fopen(path, options); + if (!f) + return -errno; + + (void) __fsetlocking(f, FSETLOCKING_BYCALLER); + + *ret = f; + return 0; +} + +int fdopen_unlocked(int fd, const char *options, FILE **ret) { + assert(ret); + + FILE *f = fdopen(fd, options); + if (!f) + return -errno; + + (void) __fsetlocking(f, FSETLOCKING_BYCALLER); + + *ret = f; + return 0; +} + +int take_fdopen_unlocked(int *fd, const char *options, FILE **ret) { + int r; + + assert(fd); + + r = fdopen_unlocked(*fd, options, ret); + if (r < 0) + return r; + + *fd = -1; + + return 0; +} + +FILE* take_fdopen(int *fd, const char *options) { + assert(fd); + + FILE *f = fdopen(*fd, options); + if (!f) + return NULL; + + *fd = -1; + + return f; +} + +DIR* take_fdopendir(int *dfd) { + assert(dfd); + + DIR *d = fdopendir(*dfd); + if (!d) + return NULL; + + *dfd = -1; + + return d; +} + +FILE* open_memstream_unlocked(char **ptr, size_t *sizeloc) { + FILE *f = open_memstream(ptr, sizeloc); + if (!f) + return NULL; + + (void) __fsetlocking(f, FSETLOCKING_BYCALLER); + + return f; +} + +FILE* fmemopen_unlocked(void *buf, size_t size, const char *mode) { + FILE *f = fmemopen(buf, size, mode); + if (!f) + return NULL; + + (void) __fsetlocking(f, FSETLOCKING_BYCALLER); + + return f; +} + +int write_string_stream_ts( + FILE *f, + const char *line, + WriteStringFileFlags flags, + const struct timespec *ts) { + + bool needs_nl; + int r, fd = -1; + + assert(f); + assert(line); + + if (ferror(f)) + return -EIO; + + if (ts) { + /* If we shall set the timestamp we need the fd. But fmemopen() streams generally don't have + * an fd. Let's fail early in that case. */ + fd = fileno(f); + if (fd < 0) + return -EBADF; + } + + if (flags & WRITE_STRING_FILE_SUPPRESS_REDUNDANT_VIRTUAL) { + _cleanup_free_ char *t = NULL; + + /* If value to be written is same as that of the existing value, then suppress the write. */ + + if (fd < 0) { + fd = fileno(f); + if (fd < 0) + return -EBADF; + } + + /* Read an additional byte to detect cases where the prefix matches but the rest + * doesn't. Also, 0 returned by read_virtual_file_fd() means the read was truncated and + * it won't be equal to the new value. */ + if (read_virtual_file_fd(fd, strlen(line)+1, &t, NULL) > 0 && + streq_skip_trailing_chars(line, t, NEWLINE)) { + log_debug("No change in value '%s', suppressing write", line); + return 0; + } + + if (lseek(fd, 0, SEEK_SET) < 0) + return -errno; + } + + needs_nl = !(flags & WRITE_STRING_FILE_AVOID_NEWLINE) && !endswith(line, "\n"); + + if (needs_nl && (flags & WRITE_STRING_FILE_DISABLE_BUFFER)) { + /* If STDIO buffering was disabled, then let's append the newline character to the string + * itself, so that the write goes out in one go, instead of two */ + + line = strjoina(line, "\n"); + needs_nl = false; + } + + if (fputs(line, f) == EOF) + return -errno; + + if (needs_nl) + if (fputc('\n', f) == EOF) + return -errno; + + if (flags & WRITE_STRING_FILE_SYNC) + r = fflush_sync_and_check(f); + else + r = fflush_and_check(f); + if (r < 0) + return r; + + if (ts) { + const struct timespec twice[2] = {*ts, *ts}; + + assert(fd >= 0); + if (futimens(fd, twice) < 0) + return -errno; + } + + return 0; +} + +static int write_string_file_atomic( + const char *fn, + const char *line, + WriteStringFileFlags flags, + const struct timespec *ts) { + + _cleanup_fclose_ FILE *f = NULL; + _cleanup_free_ char *p = NULL; + int r; + + assert(fn); + assert(line); + + /* Note that we'd really like to use O_TMPFILE here, but can't really, since we want replacement + * semantics here, and O_TMPFILE can't offer that. i.e. rename() replaces but linkat() doesn't. */ + + r = fopen_temporary(fn, &f, &p); + if (r < 0) + return r; + + r = write_string_stream_ts(f, line, flags, ts); + if (r < 0) + goto fail; + + r = fchmod_umask(fileno(f), FLAGS_SET(flags, WRITE_STRING_FILE_MODE_0600) ? 0600 : 0644); + if (r < 0) + goto fail; + + if (rename(p, fn) < 0) { + r = -errno; + goto fail; + } + + if (FLAGS_SET(flags, WRITE_STRING_FILE_SYNC)) { + /* Sync the rename, too */ + r = fsync_directory_of_file(fileno(f)); + if (r < 0) + return r; + } + + return 0; + +fail: + (void) unlink(p); + return r; +} + +int write_string_file_ts( + const char *fn, + const char *line, + WriteStringFileFlags flags, + const struct timespec *ts) { + + _cleanup_fclose_ FILE *f = NULL; + int q, r, fd; + + assert(fn); + assert(line); + + /* We don't know how to verify whether the file contents was already on-disk. */ + assert(!((flags & WRITE_STRING_FILE_VERIFY_ON_FAILURE) && (flags & WRITE_STRING_FILE_SYNC))); + + if (flags & WRITE_STRING_FILE_MKDIR_0755) { + r = mkdir_parents(fn, 0755); + if (r < 0) + return r; + } + + if (flags & WRITE_STRING_FILE_ATOMIC) { + assert(flags & WRITE_STRING_FILE_CREATE); + + r = write_string_file_atomic(fn, line, flags, ts); + if (r < 0) + goto fail; + + return r; + } else + assert(!ts); + + /* We manually build our own version of fopen(..., "we") that works without O_CREAT and with O_NOFOLLOW if needed. */ + fd = open(fn, O_CLOEXEC|O_NOCTTY | + (FLAGS_SET(flags, WRITE_STRING_FILE_NOFOLLOW) ? O_NOFOLLOW : 0) | + (FLAGS_SET(flags, WRITE_STRING_FILE_CREATE) ? O_CREAT : 0) | + (FLAGS_SET(flags, WRITE_STRING_FILE_TRUNCATE) ? O_TRUNC : 0) | + (FLAGS_SET(flags, WRITE_STRING_FILE_SUPPRESS_REDUNDANT_VIRTUAL) ? O_RDWR : O_WRONLY), + (FLAGS_SET(flags, WRITE_STRING_FILE_MODE_0600) ? 0600 : 0666)); + if (fd < 0) { + r = -errno; + goto fail; + } + + r = fdopen_unlocked(fd, "w", &f); + if (r < 0) { + safe_close(fd); + goto fail; + } + + if (flags & WRITE_STRING_FILE_DISABLE_BUFFER) + setvbuf(f, NULL, _IONBF, 0); + + r = write_string_stream_ts(f, line, flags, ts); + if (r < 0) + goto fail; + + return 0; + +fail: + if (!(flags & WRITE_STRING_FILE_VERIFY_ON_FAILURE)) + return r; + + f = safe_fclose(f); + + /* OK, the operation failed, but let's see if the right + * contents in place already. If so, eat up the error. */ + + q = verify_file(fn, line, !(flags & WRITE_STRING_FILE_AVOID_NEWLINE) || (flags & WRITE_STRING_FILE_VERIFY_IGNORE_NEWLINE)); + if (q <= 0) + return r; + + return 0; +} + +int write_string_filef( + const char *fn, + WriteStringFileFlags flags, + const char *format, ...) { + + _cleanup_free_ char *p = NULL; + va_list ap; + int r; + + va_start(ap, format); + r = vasprintf(&p, format, ap); + va_end(ap); + + if (r < 0) + return -ENOMEM; + + return write_string_file(fn, p, flags); +} + +int read_one_line_file(const char *fn, char **line) { + _cleanup_fclose_ FILE *f = NULL; + int r; + + assert(fn); + assert(line); + + r = fopen_unlocked(fn, "re", &f); + if (r < 0) + return r; + + return read_line(f, LONG_LINE_MAX, line); +} + +int verify_file(const char *fn, const char *blob, bool accept_extra_nl) { + _cleanup_fclose_ FILE *f = NULL; + _cleanup_free_ char *buf = NULL; + size_t l, k; + int r; + + assert(fn); + assert(blob); + + l = strlen(blob); + + if (accept_extra_nl && endswith(blob, "\n")) + accept_extra_nl = false; + + buf = malloc(l + accept_extra_nl + 1); + if (!buf) + return -ENOMEM; + + r = fopen_unlocked(fn, "re", &f); + if (r < 0) + return r; + + /* We try to read one byte more than we need, so that we know whether we hit eof */ + errno = 0; + k = fread(buf, 1, l + accept_extra_nl + 1, f); + if (ferror(f)) + return errno_or_else(EIO); + + if (k != l && k != l + accept_extra_nl) + return 0; + if (memcmp(buf, blob, l) != 0) + return 0; + if (k > l && buf[l] != '\n') + return 0; + + return 1; +} + +int read_virtual_file_fd(int fd, size_t max_size, char **ret_contents, size_t *ret_size) { + _cleanup_free_ char *buf = NULL; + size_t n, size; + int n_retries; + bool truncated = false; + + /* Virtual filesystems such as sysfs or procfs use kernfs, and kernfs can work with two sorts of + * virtual files. One sort uses "seq_file", and the results of the first read are buffered for the + * second read. The other sort uses "raw" reads which always go direct to the device. In the latter + * case, the content of the virtual file must be retrieved with a single read otherwise a second read + * might get the new value instead of finding EOF immediately. That's the reason why the usage of + * fread(3) is prohibited in this case as it always performs a second call to read(2) looking for + * EOF. See issue #13585. + * + * max_size specifies a limit on the bytes read. If max_size is SIZE_MAX, the full file is read. If + * the full file is too large to read, an error is returned. For other values of max_size, *partial + * contents* may be returned. (Though the read is still done using one syscall.) Returns 0 on + * partial success, 1 if untruncated contents were read. */ + + assert(fd >= 0); + assert(max_size <= READ_VIRTUAL_BYTES_MAX || max_size == SIZE_MAX); + + /* Limit the number of attempts to read the number of bytes returned by fstat(). */ + n_retries = 3; + + for (;;) { + struct stat st; + + if (fstat(fd, &st) < 0) + return -errno; + + if (!S_ISREG(st.st_mode)) + return -EBADF; + + /* Be prepared for files from /proc which generally report a file size of 0. */ + assert_cc(READ_VIRTUAL_BYTES_MAX < SSIZE_MAX); + if (st.st_size > 0 && n_retries > 1) { + /* Let's use the file size if we have more than 1 attempt left. On the last attempt + * we'll ignore the file size */ + + if (st.st_size > SSIZE_MAX) { /* Avoid overflow with 32-bit size_t and 64-bit off_t. */ + + if (max_size == SIZE_MAX) + return -EFBIG; + + size = max_size; + } else { + size = MIN((size_t) st.st_size, max_size); + + if (size > READ_VIRTUAL_BYTES_MAX) + return -EFBIG; + } + + n_retries--; + } else if (n_retries > 1) { + /* Files in /proc are generally smaller than the page size so let's start with + * a page size buffer from malloc and only use the max buffer on the final try. */ + size = MIN3(page_size() - 1, READ_VIRTUAL_BYTES_MAX, max_size); + n_retries = 1; + } else { + size = MIN(READ_VIRTUAL_BYTES_MAX, max_size); + n_retries = 0; + } + + buf = malloc(size + 1); + if (!buf) + return -ENOMEM; + + /* Use a bigger allocation if we got it anyway, but not more than the limit. */ + size = MIN3(MALLOC_SIZEOF_SAFE(buf) - 1, max_size, READ_VIRTUAL_BYTES_MAX); + + for (;;) { + ssize_t k; + + /* Read one more byte so we can detect whether the content of the + * file has already changed or the guessed size for files from /proc + * wasn't large enough . */ + k = read(fd, buf, size + 1); + if (k >= 0) { + n = k; + break; + } + + if (errno != EINTR) + return -errno; + } + + /* Consider a short read as EOF */ + if (n <= size) + break; + + /* If a maximum size is specified and we already read more we know the file is larger, and + * can handle this as truncation case. Note that if the size of what we read equals the + * maximum size then this doesn't mean truncation, the file might or might not end on that + * byte. We need to rerun the loop in that case, with a larger buffer size, so that we read + * at least one more byte to be able to distinguish EOF from truncation. */ + if (max_size != SIZE_MAX && n > max_size) { + n = size; /* Make sure we never use more than what we sized the buffer for (so that + * we have one free byte in it for the trailing NUL we add below).*/ + truncated = true; + break; + } + + /* We have no further attempts left? Then the file is apparently larger than our limits. Give up. */ + if (n_retries <= 0) + return -EFBIG; + + /* Hmm... either we read too few bytes from /proc or less likely the content of the file + * might have been changed (and is now bigger) while we were processing, let's try again + * either with the new file size. */ + + if (lseek(fd, 0, SEEK_SET) < 0) + return -errno; + + buf = mfree(buf); + } + + if (ret_contents) { + + /* Safety check: if the caller doesn't want to know the size of what we just read it will + * rely on the trailing NUL byte. But if there's an embedded NUL byte, then we should refuse + * operation as otherwise there'd be ambiguity about what we just read. */ + if (!ret_size && memchr(buf, 0, n)) + return -EBADMSG; + + if (n < size) { + char *p; + + /* Return rest of the buffer to libc */ + p = realloc(buf, n + 1); + if (!p) + return -ENOMEM; + buf = p; + } + + buf[n] = 0; + *ret_contents = TAKE_PTR(buf); + } + + if (ret_size) + *ret_size = n; + + return !truncated; +} + +int read_virtual_file_at( + int dir_fd, + const char *filename, + size_t max_size, + char **ret_contents, + size_t *ret_size) { + + _cleanup_close_ int fd = -1; + + assert(dir_fd >= 0 || dir_fd == AT_FDCWD); + + if (!filename) { + if (dir_fd == AT_FDCWD) + return -EBADF; + + return read_virtual_file_fd(dir_fd, max_size, ret_contents, ret_size); + } + + fd = openat(dir_fd, filename, O_RDONLY | O_NOCTTY | O_CLOEXEC); + if (fd < 0) + return -errno; + + return read_virtual_file_fd(fd, max_size, ret_contents, ret_size); +} + +int read_full_stream_full( + FILE *f, + const char *filename, + uint64_t offset, + size_t size, + ReadFullFileFlags flags, + char **ret_contents, + size_t *ret_size) { + + _cleanup_free_ char *buf = NULL; + size_t n, n_next = 0, l; + int fd, r; + + assert(f); + assert(ret_contents); + assert(!FLAGS_SET(flags, READ_FULL_FILE_UNBASE64 | READ_FULL_FILE_UNHEX)); + assert(size != SIZE_MAX || !FLAGS_SET(flags, READ_FULL_FILE_FAIL_WHEN_LARGER)); + + if (offset != UINT64_MAX && offset > LONG_MAX) /* fseek() can only deal with "long" offsets */ + return -ERANGE; + + fd = fileno(f); + if (fd >= 0) { /* If the FILE* object is backed by an fd (as opposed to memory or such, see + * fmemopen()), let's optimize our buffering */ + struct stat st; + + if (fstat(fd, &st) < 0) + return -errno; + + if (S_ISREG(st.st_mode)) { + + /* Try to start with the right file size if we shall read the file in full. Note + * that we increase the size to read here by one, so that the first read attempt + * already makes us notice the EOF. If the reported size of the file is zero, we + * avoid this logic however, since quite likely it might be a virtual file in procfs + * that all report a zero file size. */ + + if (st.st_size > 0 && + (size == SIZE_MAX || FLAGS_SET(flags, READ_FULL_FILE_FAIL_WHEN_LARGER))) { + + uint64_t rsize = + LESS_BY((uint64_t) st.st_size, offset == UINT64_MAX ? 0 : offset); + + if (rsize < SIZE_MAX) /* overflow check */ + n_next = rsize + 1; + } + + if (flags & READ_FULL_FILE_WARN_WORLD_READABLE) + (void) warn_file_is_world_accessible(filename, &st, NULL, 0); + } + } + + /* If we don't know how much to read, figure it out now. If we shall read a part of the file, then + * allocate the requested size. If we shall load the full file start with LINE_MAX. Note that if + * READ_FULL_FILE_FAIL_WHEN_LARGER we consider the specified size a safety limit, and thus also start + * with LINE_MAX, under assumption the file is most likely much shorter. */ + if (n_next == 0) + n_next = size != SIZE_MAX && !FLAGS_SET(flags, READ_FULL_FILE_FAIL_WHEN_LARGER) ? size : LINE_MAX; + + /* Never read more than we need to determine that our own limit is hit */ + if (n_next > READ_FULL_BYTES_MAX) + n_next = READ_FULL_BYTES_MAX + 1; + + if (offset != UINT64_MAX && fseek(f, offset, SEEK_SET) < 0) + return -errno; + + n = l = 0; + for (;;) { + char *t; + size_t k; + + /* If we shall fail when reading overly large data, then read exactly one byte more than the + * specified size at max, since that'll tell us if there's anymore data beyond the limit*/ + if (FLAGS_SET(flags, READ_FULL_FILE_FAIL_WHEN_LARGER) && n_next > size) + n_next = size + 1; + + if (flags & READ_FULL_FILE_SECURE) { + t = malloc(n_next + 1); + if (!t) { + r = -ENOMEM; + goto finalize; + } + memcpy_safe(t, buf, n); + explicit_bzero_safe(buf, n); + free(buf); + } else { + t = realloc(buf, n_next + 1); + if (!t) + return -ENOMEM; + } + + buf = t; + /* Unless a size has been explicitly specified, try to read as much as fits into the memory + * we allocated (minus 1, to leave one byte for the safety NUL byte) */ + n = size == SIZE_MAX ? MALLOC_SIZEOF_SAFE(buf) - 1 : n_next; + + errno = 0; + k = fread(buf + l, 1, n - l, f); + + assert(k <= n - l); + l += k; + + if (ferror(f)) { + r = errno_or_else(EIO); + goto finalize; + } + if (feof(f)) + break; + + if (size != SIZE_MAX && !FLAGS_SET(flags, READ_FULL_FILE_FAIL_WHEN_LARGER)) { /* If we got asked to read some specific size, we already sized the buffer right, hence leave */ + assert(l == size); + break; + } + + assert(k > 0); /* we can't have read zero bytes because that would have been EOF */ + + if (FLAGS_SET(flags, READ_FULL_FILE_FAIL_WHEN_LARGER) && l > size) { + r = -E2BIG; + goto finalize; + } + + if (n >= READ_FULL_BYTES_MAX) { + r = -E2BIG; + goto finalize; + } + + n_next = MIN(n * 2, READ_FULL_BYTES_MAX); + } + + if (flags & (READ_FULL_FILE_UNBASE64 | READ_FULL_FILE_UNHEX)) { + _cleanup_free_ void *decoded = NULL; + size_t decoded_size; + + buf[l++] = 0; + if (flags & READ_FULL_FILE_UNBASE64) + r = unbase64mem_full(buf, l, flags & READ_FULL_FILE_SECURE, &decoded, &decoded_size); + else + r = unhexmem_full(buf, l, flags & READ_FULL_FILE_SECURE, &decoded, &decoded_size); + if (r < 0) + goto finalize; + + if (flags & READ_FULL_FILE_SECURE) + explicit_bzero_safe(buf, n); + free_and_replace(buf, decoded); + n = l = decoded_size; + } + + if (!ret_size) { + /* Safety check: if the caller doesn't want to know the size of what we just read it will rely on the + * trailing NUL byte. But if there's an embedded NUL byte, then we should refuse operation as otherwise + * there'd be ambiguity about what we just read. */ + + if (memchr(buf, 0, l)) { + r = -EBADMSG; + goto finalize; + } + } + + buf[l] = 0; + *ret_contents = TAKE_PTR(buf); + + if (ret_size) + *ret_size = l; + + return 0; + +finalize: + if (flags & READ_FULL_FILE_SECURE) + explicit_bzero_safe(buf, n); + + return r; +} + +int read_full_file_full( + int dir_fd, + const char *filename, + uint64_t offset, + size_t size, + ReadFullFileFlags flags, + const char *bind_name, + char **ret_contents, + size_t *ret_size) { + + _cleanup_fclose_ FILE *f = NULL; + int r; + + assert(filename); + assert(ret_contents); + + r = xfopenat(dir_fd, filename, "re", 0, &f); + if (r < 0) { + _cleanup_close_ int sk = -1; + + /* ENXIO is what Linux returns if we open a node that is an AF_UNIX socket */ + if (r != -ENXIO) + return r; + + /* If this is enabled, let's try to connect to it */ + if (!FLAGS_SET(flags, READ_FULL_FILE_CONNECT_SOCKET)) + return -ENXIO; + + /* Seeking is not supported on AF_UNIX sockets */ + if (offset != UINT64_MAX) + return -ENXIO; + + sk = socket(AF_UNIX, SOCK_STREAM|SOCK_CLOEXEC, 0); + if (sk < 0) + return -errno; + + if (bind_name) { + /* If the caller specified a socket name to bind to, do so before connecting. This is + * useful to communicate some minor, short meta-information token from the client to + * the server. */ + union sockaddr_union bsa; + + r = sockaddr_un_set_path(&bsa.un, bind_name); + if (r < 0) + return r; + + if (bind(sk, &bsa.sa, r) < 0) + return -errno; + } + + r = connect_unix_path(sk, dir_fd, filename); + if (IN_SET(r, -ENOTSOCK, -EINVAL)) /* propagate original error if this is not a socket after all */ + return -ENXIO; + if (r < 0) + return r; + + if (shutdown(sk, SHUT_WR) < 0) + return -errno; + + f = fdopen(sk, "r"); + if (!f) + return -errno; + + TAKE_FD(sk); + } + + (void) __fsetlocking(f, FSETLOCKING_BYCALLER); + + return read_full_stream_full(f, filename, offset, size, flags, ret_contents, ret_size); +} + +int executable_is_script(const char *path, char **interpreter) { + _cleanup_free_ char *line = NULL; + size_t len; + char *ans; + int r; + + assert(path); + + r = read_one_line_file(path, &line); + if (r == -ENOBUFS) /* First line overly long? if so, then it's not a script */ + return 0; + if (r < 0) + return r; + + if (!startswith(line, "#!")) + return 0; + + ans = strstrip(line + 2); + len = strcspn(ans, " \t"); + + if (len == 0) + return 0; + + ans = strndup(ans, len); + if (!ans) + return -ENOMEM; + + *interpreter = ans; + return 1; +} + +/** + * Retrieve one field from a file like /proc/self/status. pattern + * should not include whitespace or the delimiter (':'). pattern matches only + * the beginning of a line. Whitespace before ':' is skipped. Whitespace and + * zeros after the ':' will be skipped. field must be freed afterwards. + * terminator specifies the terminating characters of the field value (not + * included in the value). + */ +int get_proc_field(const char *filename, const char *pattern, const char *terminator, char **field) { + _cleanup_free_ char *status = NULL; + char *t, *f; + size_t len; + int r; + + assert(terminator); + assert(filename); + assert(pattern); + assert(field); + + r = read_full_virtual_file(filename, &status, NULL); + if (r < 0) + return r; + + t = status; + + do { + bool pattern_ok; + + do { + t = strstr(t, pattern); + if (!t) + return -ENOENT; + + /* Check that pattern occurs in beginning of line. */ + pattern_ok = (t == status || t[-1] == '\n'); + + t += strlen(pattern); + + } while (!pattern_ok); + + t += strspn(t, " \t"); + if (!*t) + return -ENOENT; + + } while (*t != ':'); + + t++; + + if (*t) { + t += strspn(t, " \t"); + + /* Also skip zeros, because when this is used for + * capabilities, we don't want the zeros. This way the + * same capability set always maps to the same string, + * irrespective of the total capability set size. For + * other numbers it shouldn't matter. */ + t += strspn(t, "0"); + /* Back off one char if there's nothing but whitespace + and zeros */ + if (!*t || isspace(*t)) + t--; + } + + len = strcspn(t, terminator); + + f = strndup(t, len); + if (!f) + return -ENOMEM; + + *field = f; + return 0; +} + +DIR *xopendirat(int fd, const char *name, int flags) { + int nfd; + DIR *d; + + assert(!(flags & O_CREAT)); + + if (fd == AT_FDCWD && flags == 0) + return opendir(name); + + nfd = openat(fd, name, O_RDONLY|O_NONBLOCK|O_DIRECTORY|O_CLOEXEC|flags, 0); + if (nfd < 0) + return NULL; + + d = fdopendir(nfd); + if (!d) { + safe_close(nfd); + return NULL; + } + + return d; +} + +int fopen_mode_to_flags(const char *mode) { + const char *p; + int flags; + + assert(mode); + + if ((p = startswith(mode, "r+"))) + flags = O_RDWR; + else if ((p = startswith(mode, "r"))) + flags = O_RDONLY; + else if ((p = startswith(mode, "w+"))) + flags = O_RDWR|O_CREAT|O_TRUNC; + else if ((p = startswith(mode, "w"))) + flags = O_WRONLY|O_CREAT|O_TRUNC; + else if ((p = startswith(mode, "a+"))) + flags = O_RDWR|O_CREAT|O_APPEND; + else if ((p = startswith(mode, "a"))) + flags = O_WRONLY|O_CREAT|O_APPEND; + else + return -EINVAL; + + for (; *p != 0; p++) { + + switch (*p) { + + case 'e': + flags |= O_CLOEXEC; + break; + + case 'x': + flags |= O_EXCL; + break; + + case 'm': + /* ignore this here, fdopen() might care later though */ + break; + + case 'c': /* not sure what to do about this one */ + default: + return -EINVAL; + } + } + + return flags; +} + +int xfopenat(int dir_fd, const char *path, const char *mode, int flags, FILE **ret) { + FILE *f; + + /* A combination of fopen() with openat() */ + + if (dir_fd == AT_FDCWD && flags == 0) { + f = fopen(path, mode); + if (!f) + return -errno; + } else { + int fd, mode_flags; + + mode_flags = fopen_mode_to_flags(mode); + if (mode_flags < 0) + return mode_flags; + + fd = openat(dir_fd, path, mode_flags | flags); + if (fd < 0) + return -errno; + + f = fdopen(fd, mode); + if (!f) { + safe_close(fd); + return -errno; + } + } + + *ret = f; + return 0; +} + +static int search_and_fopen_internal( + const char *path, + const char *mode, + const char *root, + char **search, + FILE **ret, + char **ret_path) { + + assert(path); + assert(mode); + assert(ret); + + if (!path_strv_resolve_uniq(search, root)) + return -ENOMEM; + + STRV_FOREACH(i, search) { + _cleanup_free_ char *p = NULL; + FILE *f; + + p = path_join(root, *i, path); + if (!p) + return -ENOMEM; + + f = fopen(p, mode); + if (f) { + if (ret_path) + *ret_path = path_simplify(TAKE_PTR(p)); + + *ret = f; + return 0; + } + + if (errno != ENOENT) + return -errno; + } + + return -ENOENT; +} + +int search_and_fopen( + const char *filename, + const char *mode, + const char *root, + const char **search, + FILE **ret, + char **ret_path) { + + _cleanup_strv_free_ char **copy = NULL; + + assert(filename); + assert(mode); + assert(ret); + + if (path_is_absolute(filename)) { + _cleanup_fclose_ FILE *f = NULL; + + f = fopen(filename, mode); + if (!f) + return -errno; + + if (ret_path) { + char *p; + + p = strdup(filename); + if (!p) + return -ENOMEM; + + *ret_path = path_simplify(p); + } + + *ret = TAKE_PTR(f); + return 0; + } + + copy = strv_copy((char**) search); + if (!copy) + return -ENOMEM; + + return search_and_fopen_internal(filename, mode, root, copy, ret, ret_path); +} + +int search_and_fopen_nulstr( + const char *filename, + const char *mode, + const char *root, + const char *search, + FILE **ret, + char **ret_path) { + + _cleanup_strv_free_ char **s = NULL; + + if (path_is_absolute(filename)) { + _cleanup_fclose_ FILE *f = NULL; + + f = fopen(filename, mode); + if (!f) + return -errno; + + if (ret_path) { + char *p; + + p = strdup(filename); + if (!p) + return -ENOMEM; + + *ret_path = path_simplify(p); + } + + *ret = TAKE_PTR(f); + return 0; + } + + s = strv_split_nulstr(search); + if (!s) + return -ENOMEM; + + return search_and_fopen_internal(filename, mode, root, s, ret, ret_path); +} + +int fflush_and_check(FILE *f) { + assert(f); + + errno = 0; + fflush(f); + + if (ferror(f)) + return errno_or_else(EIO); + + return 0; +} + +int fflush_sync_and_check(FILE *f) { + int r, fd; + + assert(f); + + r = fflush_and_check(f); + if (r < 0) + return r; + + /* Not all file streams have an fd associated (think: fmemopen()), let's handle this gracefully and + * assume that in that case we need no explicit syncing */ + fd = fileno(f); + if (fd < 0) + return 0; + + r = fsync_full(fd); + if (r < 0) + return r; + + return 0; +} + +int write_timestamp_file_atomic(const char *fn, usec_t n) { + char ln[DECIMAL_STR_MAX(n)+2]; + + /* Creates a "timestamp" file, that contains nothing but a + * usec_t timestamp, formatted in ASCII. */ + + if (!timestamp_is_set(n)) + return -ERANGE; + + xsprintf(ln, USEC_FMT "\n", n); + + return write_string_file(fn, ln, WRITE_STRING_FILE_CREATE|WRITE_STRING_FILE_ATOMIC); +} + +int read_timestamp_file(const char *fn, usec_t *ret) { + _cleanup_free_ char *ln = NULL; + uint64_t t; + int r; + + r = read_one_line_file(fn, &ln); + if (r < 0) + return r; + + r = safe_atou64(ln, &t); + if (r < 0) + return r; + + if (!timestamp_is_set(t)) + return -ERANGE; + + *ret = (usec_t) t; + return 0; +} + +int fputs_with_space(FILE *f, const char *s, const char *separator, bool *space) { + int r; + + assert(s); + + /* Outputs the specified string with fputs(), but optionally prefixes it with a separator. The *space parameter + * when specified shall initially point to a boolean variable initialized to false. It is set to true after the + * first invocation. This call is supposed to be use in loops, where a separator shall be inserted between each + * element, but not before the first one. */ + + if (!f) + f = stdout; + + if (space) { + if (!separator) + separator = " "; + + if (*space) { + r = fputs(separator, f); + if (r < 0) + return r; + } + + *space = true; + } + + return fputs(s, f); +} + +/* A bitmask of the EOL markers we know */ +typedef enum EndOfLineMarker { + EOL_NONE = 0, + EOL_ZERO = 1 << 0, /* \0 (aka NUL) */ + EOL_TEN = 1 << 1, /* \n (aka NL, aka LF) */ + EOL_THIRTEEN = 1 << 2, /* \r (aka CR) */ +} EndOfLineMarker; + +static EndOfLineMarker categorize_eol(char c, ReadLineFlags flags) { + + if (!IN_SET(flags, READ_LINE_ONLY_NUL)) { + if (c == '\n') + return EOL_TEN; + if (c == '\r') + return EOL_THIRTEEN; + } + + if (c == '\0') + return EOL_ZERO; + + return EOL_NONE; +} + +DEFINE_TRIVIAL_CLEANUP_FUNC_FULL(FILE*, funlockfile, NULL); + +int read_line_full(FILE *f, size_t limit, ReadLineFlags flags, char **ret) { + _cleanup_free_ char *buffer = NULL; + size_t n = 0, count = 0; + int r; + + assert(f); + + /* Something like a bounded version of getline(). + * + * Considers EOF, \n, \r and \0 end of line delimiters (or combinations of these), and does not include these + * delimiters in the string returned. Specifically, recognizes the following combinations of markers as line + * endings: + * + * • \n (UNIX) + * • \r (old MacOS) + * • \0 (C strings) + * • \n\0 + * • \r\0 + * • \r\n (Windows) + * • \n\r + * • \r\n\0 + * • \n\r\0 + * + * Returns the number of bytes read from the files (i.e. including delimiters — this hence usually differs from + * the number of characters in the returned string). When EOF is hit, 0 is returned. + * + * The input parameter limit is the maximum numbers of characters in the returned string, i.e. excluding + * delimiters. If the limit is hit we fail and return -ENOBUFS. + * + * If a line shall be skipped ret may be initialized as NULL. */ + + if (ret) { + if (!GREEDY_REALLOC(buffer, 1)) + return -ENOMEM; + } + + { + _unused_ _cleanup_(funlockfilep) FILE *flocked = f; + EndOfLineMarker previous_eol = EOL_NONE; + flockfile(f); + + for (;;) { + EndOfLineMarker eol; + char c; + + if (n >= limit) + return -ENOBUFS; + + if (count >= INT_MAX) /* We couldn't return the counter anymore as "int", hence refuse this */ + return -ENOBUFS; + + r = safe_fgetc(f, &c); + if (r < 0) + return r; + if (r == 0) /* EOF is definitely EOL */ + break; + + eol = categorize_eol(c, flags); + + if (FLAGS_SET(previous_eol, EOL_ZERO) || + (eol == EOL_NONE && previous_eol != EOL_NONE) || + (eol != EOL_NONE && (previous_eol & eol) != 0)) { + /* Previous char was a NUL? This is not an EOL, but the previous char was? This type of + * EOL marker has been seen right before? In either of these three cases we are + * done. But first, let's put this character back in the queue. (Note that we have to + * cast this to (unsigned char) here as ungetc() expects a positive 'int', and if we + * are on an architecture where 'char' equals 'signed char' we need to ensure we don't + * pass a negative value here. That said, to complicate things further ungetc() is + * actually happy with most negative characters and implicitly casts them back to + * positive ones as needed, except for \xff (aka -1, aka EOF), which it refuses. What a + * godawful API!) */ + assert_se(ungetc((unsigned char) c, f) != EOF); + break; + } + + count++; + + if (eol != EOL_NONE) { + /* If we are on a tty, we can't shouldn't wait for more input, because that + * generally means waiting for the user, interactively. In the case of a TTY + * we expect only \n as the single EOL marker, so we are in the lucky + * position that there is no need to wait. We check this condition last, to + * avoid isatty() check if not necessary. */ + + if ((flags & (READ_LINE_IS_A_TTY|READ_LINE_NOT_A_TTY)) == 0) { + int fd; + + fd = fileno(f); + if (fd < 0) /* Maybe an fmemopen() stream? Handle this gracefully, + * and don't call isatty() on an invalid fd */ + flags |= READ_LINE_NOT_A_TTY; + else + flags |= isatty(fd) ? READ_LINE_IS_A_TTY : READ_LINE_NOT_A_TTY; + } + if (FLAGS_SET(flags, READ_LINE_IS_A_TTY)) + break; + } + + if (eol != EOL_NONE) { + previous_eol |= eol; + continue; + } + + if (ret) { + if (!GREEDY_REALLOC(buffer, n + 2)) + return -ENOMEM; + + buffer[n] = c; + } + + n++; + } + } + + if (ret) { + buffer[n] = 0; + + *ret = TAKE_PTR(buffer); + } + + return (int) count; +} + +int safe_fgetc(FILE *f, char *ret) { + int k; + + assert(f); + + /* A safer version of plain fgetc(): let's propagate the error that happened while reading as such, and + * separate the EOF condition from the byte read, to avoid those confusion signed/unsigned issues fgetc() + * has. */ + + errno = 0; + k = fgetc(f); + if (k == EOF) { + if (ferror(f)) + return errno_or_else(EIO); + + if (ret) + *ret = 0; + + return 0; + } + + if (ret) + *ret = k; + + return 1; +} + +int warn_file_is_world_accessible(const char *filename, struct stat *st, const char *unit, unsigned line) { + struct stat _st; + + if (!filename) + return 0; + + if (!st) { + if (stat(filename, &_st) < 0) + return -errno; + st = &_st; + } + + if ((st->st_mode & S_IRWXO) == 0) + return 0; + + if (unit) + log_syntax(unit, LOG_WARNING, filename, line, 0, + "%s has %04o mode that is too permissive, please adjust the ownership and access mode.", + filename, st->st_mode & 07777); + else + log_warning("%s has %04o mode that is too permissive, please adjust the ownership and access mode.", + filename, st->st_mode & 07777); + return 0; +} diff --git a/src/basic/fileio.h b/src/basic/fileio.h new file mode 100644 index 0000000..9151d82 --- /dev/null +++ b/src/basic/fileio.h @@ -0,0 +1,131 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +#include +#include +#include +#include +#include +#include +#include + +#include "macro.h" +#include "time-util.h" + +#define LONG_LINE_MAX (1U*1024U*1024U) + +typedef enum { + WRITE_STRING_FILE_CREATE = 1 << 0, + WRITE_STRING_FILE_TRUNCATE = 1 << 1, + WRITE_STRING_FILE_ATOMIC = 1 << 2, + WRITE_STRING_FILE_AVOID_NEWLINE = 1 << 3, + WRITE_STRING_FILE_VERIFY_ON_FAILURE = 1 << 4, + WRITE_STRING_FILE_VERIFY_IGNORE_NEWLINE = 1 << 5, + WRITE_STRING_FILE_SYNC = 1 << 6, + WRITE_STRING_FILE_DISABLE_BUFFER = 1 << 7, + WRITE_STRING_FILE_NOFOLLOW = 1 << 8, + WRITE_STRING_FILE_MKDIR_0755 = 1 << 9, + WRITE_STRING_FILE_MODE_0600 = 1 << 10, + WRITE_STRING_FILE_SUPPRESS_REDUNDANT_VIRTUAL = 1 << 11, + + /* And before you wonder, why write_string_file_atomic_label_ts() is a separate function instead of just one + more flag here: it's about linking: we don't want to pull -lselinux into all users of write_string_file() + and friends. */ + +} WriteStringFileFlags; + +typedef enum { + READ_FULL_FILE_SECURE = 1 << 0, /* erase any buffers we employ internally, after use */ + READ_FULL_FILE_UNBASE64 = 1 << 1, /* base64 decode what we read */ + READ_FULL_FILE_UNHEX = 1 << 2, /* hex decode what we read */ + READ_FULL_FILE_WARN_WORLD_READABLE = 1 << 3, /* if regular file, log at LOG_WARNING level if access mode above 0700 */ + READ_FULL_FILE_CONNECT_SOCKET = 1 << 4, /* if socket inode, connect to it and read off it */ + READ_FULL_FILE_FAIL_WHEN_LARGER = 1 << 5, /* fail loading if file is larger than specified size */ +} ReadFullFileFlags; + +int fopen_unlocked(const char *path, const char *options, FILE **ret); +int fdopen_unlocked(int fd, const char *options, FILE **ret); +int take_fdopen_unlocked(int *fd, const char *options, FILE **ret); +FILE* take_fdopen(int *fd, const char *options); +DIR* take_fdopendir(int *dfd); +FILE* open_memstream_unlocked(char **ptr, size_t *sizeloc); +FILE* fmemopen_unlocked(void *buf, size_t size, const char *mode); + +int write_string_stream_ts(FILE *f, const char *line, WriteStringFileFlags flags, const struct timespec *ts); +static inline int write_string_stream(FILE *f, const char *line, WriteStringFileFlags flags) { + return write_string_stream_ts(f, line, flags, NULL); +} +int write_string_file_ts(const char *fn, const char *line, WriteStringFileFlags flags, const struct timespec *ts); +static inline int write_string_file(const char *fn, const char *line, WriteStringFileFlags flags) { + return write_string_file_ts(fn, line, flags, NULL); +} + +int write_string_filef(const char *fn, WriteStringFileFlags flags, const char *format, ...) _printf_(3, 4); + +int read_one_line_file(const char *filename, char **line); +int read_full_file_full(int dir_fd, const char *filename, uint64_t offset, size_t size, ReadFullFileFlags flags, const char *bind_name, char **ret_contents, size_t *ret_size); +static inline int read_full_file(const char *filename, char **ret_contents, size_t *ret_size) { + return read_full_file_full(AT_FDCWD, filename, UINT64_MAX, SIZE_MAX, 0, NULL, ret_contents, ret_size); +} + +int read_virtual_file_fd(int fd, size_t max_size, char **ret_contents, size_t *ret_size); +int read_virtual_file_at(int dir_fd, const char *filename, size_t max_size, char **ret_contents, size_t *ret_size); +static inline int read_virtual_file(const char *filename, size_t max_size, char **ret_contents, size_t *ret_size) { + return read_virtual_file_at(AT_FDCWD, filename, max_size, ret_contents, ret_size); +} +static inline int read_full_virtual_file(const char *filename, char **ret_contents, size_t *ret_size) { + return read_virtual_file(filename, SIZE_MAX, ret_contents, ret_size); +} + +int read_full_stream_full(FILE *f, const char *filename, uint64_t offset, size_t size, ReadFullFileFlags flags, char **ret_contents, size_t *ret_size); +static inline int read_full_stream(FILE *f, char **ret_contents, size_t *ret_size) { + return read_full_stream_full(f, NULL, UINT64_MAX, SIZE_MAX, 0, ret_contents, ret_size); +} + +int verify_file(const char *fn, const char *blob, bool accept_extra_nl); + +int executable_is_script(const char *path, char **interpreter); + +int get_proc_field(const char *filename, const char *pattern, const char *terminator, char **field); + +DIR *xopendirat(int dirfd, const char *name, int flags); +int xfopenat(int dir_fd, const char *path, const char *mode, int flags, FILE **ret); + +int search_and_fopen(const char *path, const char *mode, const char *root, const char **search, FILE **ret, char **ret_path); +int search_and_fopen_nulstr(const char *path, const char *mode, const char *root, const char *search, FILE **ret, char **ret_path); + +int fflush_and_check(FILE *f); +int fflush_sync_and_check(FILE *f); + +int write_timestamp_file_atomic(const char *fn, usec_t n); +int read_timestamp_file(const char *fn, usec_t *ret); + +int fputs_with_space(FILE *f, const char *s, const char *separator, bool *space); + +typedef enum ReadLineFlags { + READ_LINE_ONLY_NUL = 1 << 0, + READ_LINE_IS_A_TTY = 1 << 1, + READ_LINE_NOT_A_TTY = 1 << 2, +} ReadLineFlags; + +int read_line_full(FILE *f, size_t limit, ReadLineFlags flags, char **ret); + +static inline bool file_offset_beyond_memory_size(off_t x) { + if (x < 0) /* off_t is signed, filter that out */ + return false; + return (uint64_t) x > (uint64_t) SIZE_MAX; +} + +static inline int read_line(FILE *f, size_t limit, char **ret) { + return read_line_full(f, limit, 0, ret); +} + +static inline int read_nul_string(FILE *f, size_t limit, char **ret) { + return read_line_full(f, limit, READ_LINE_ONLY_NUL, ret); +} + +int safe_fgetc(FILE *f, char *ret); + +int warn_file_is_world_accessible(const char *filename, struct stat *st, const char *unit, unsigned line); + +int fopen_mode_to_flags(const char *mode); diff --git a/src/basic/filesystems-gperf.gperf b/src/basic/filesystems-gperf.gperf new file mode 100644 index 0000000..e8c5357 --- /dev/null +++ b/src/basic/filesystems-gperf.gperf @@ -0,0 +1,131 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +%{ +#include + +#include "filesystems.h" +#include "missing_magic.h" +#include "stat-util.h" + +struct FilesystemMagic { + const char *name; + statfs_f_type_t magic[FILESYSTEM_MAGIC_MAX]; +}; +%} +struct FilesystemMagic; +%language=ANSI-C +%define hash-function-name filesystems_gperf_hash +%define lookup-function-name filesystems_gperf_lookup +%define slot-name name +%readonly-tables +%omit-struct-type +%struct-type +%includes +%% +apparmorfs, {AAFS_MAGIC} +adfs, {ADFS_SUPER_MAGIC} +affs, {AFFS_SUPER_MAGIC} +afs, {AFS_FS_MAGIC, AFS_SUPER_MAGIC} +anon_inodefs, {ANON_INODE_FS_MAGIC} +autofs, {AUTOFS_SUPER_MAGIC} +balloon-kvm, {BALLOON_KVM_MAGIC} +bdev, {BDEVFS_MAGIC} +binder, {BINDERFS_SUPER_MAGIC} +binfmt_misc, {BINFMTFS_MAGIC} +bpf, {BPF_FS_MAGIC} +btrfs, {BTRFS_SUPER_MAGIC} +btrfs_test_fs, {BTRFS_TEST_MAGIC} +# cpuset's magic got reassigned to cgroupfs +cpuset, {CGROUP_SUPER_MAGIC} +ceph, {CEPH_SUPER_MAGIC} +cgroup2, {CGROUP2_SUPER_MAGIC} +# note that the cgroupfs magic got reassigned from cpuset +cgroup, {CGROUP_SUPER_MAGIC} +cifs, {CIFS_SUPER_MAGIC, SMB2_SUPER_MAGIC} +coda, {CODA_SUPER_MAGIC} +configfs, {CONFIGFS_MAGIC} +cramfs, {CRAMFS_MAGIC} +dax, {DAXFS_MAGIC} +debugfs, {DEBUGFS_MAGIC} +devmem, {DEVMEM_MAGIC} +devpts, {DEVPTS_SUPER_MAGIC} +# devtmpfs is just a special instance of tmpfs, hence it reports its magic +devtmpfs, {TMPFS_MAGIC} +dmabuf, {DMA_BUF_MAGIC} +ecryptfs, {ECRYPTFS_SUPER_MAGIC} +efivarfs, {EFIVARFS_MAGIC} +efs, {EFS_SUPER_MAGIC} +erofs, {EROFS_SUPER_MAGIC_V1} +# ext2 + ext3 + ext4 use the same magic +ext2, {EXT2_SUPER_MAGIC} +ext3, {EXT3_SUPER_MAGIC} +ext4, {EXT4_SUPER_MAGIC} +exfat, {EXFAT_SUPER_MAGIC} +f2fs, {F2FS_SUPER_MAGIC} +# fuseblk is so closely related to fuse that it shares the same magic +fuseblk, {FUSE_SUPER_MAGIC} +fuse, {FUSE_SUPER_MAGIC} +fusectl, {FUSE_CTL_SUPER_MAGIC} +# gfs is an old version of gfs2 and reuses the magic +gfs, {GFS2_MAGIC} +gfs2, {GFS2_MAGIC} +hostfs, {HOSTFS_SUPER_MAGIC} +hpfs, {HPFS_SUPER_MAGIC} +hugetlbfs, {HUGETLBFS_MAGIC} +iso9660, {ISOFS_SUPER_MAGIC} +jffs2, {JFFS2_SUPER_MAGIC} +minix, {MINIX_SUPER_MAGIC, MINIX_SUPER_MAGIC2, MINIX2_SUPER_MAGIC, MINIX2_SUPER_MAGIC2, MINIX3_SUPER_MAGIC} +mqueue, {MQUEUE_MAGIC} +# msdos is an older legacy version of vfat, shares the magic +msdos, {MSDOS_SUPER_MAGIC} +# ncp/ncpfs have been removed from the kernel, but ncpfs was the official name +ncp, {NCP_SUPER_MAGIC} +ncpfs, {NCP_SUPER_MAGIC} +# nfs is the old version of nfs4, and they share the same magic +nfs, {NFS_SUPER_MAGIC} +nfs4, {NFS_SUPER_MAGIC} +nilfs2, {NILFS_SUPER_MAGIC} +nsfs, {NSFS_MAGIC} +ntfs, {NTFS_SB_MAGIC} +ntfs3, {NTFS3_SUPER_MAGIC} +ocfs2, {OCFS2_SUPER_MAGIC} +openpromfs, {OPENPROM_SUPER_MAGIC} +orangefs, {ORANGEFS_DEVREQ_MAGIC} +overlay, {OVERLAYFS_SUPER_MAGIC} +pipefs, {PIPEFS_MAGIC} +ppc-cmm, {PPC_CMM_MAGIC} +proc, {PROC_SUPER_MAGIC} +pstore, {PSTOREFS_MAGIC} +# pvfs2 is the old version of orangefs +pvfs2, {ORANGEFS_DEVREQ_MAGIC} +qnx4, {QNX4_SUPER_MAGIC} +qnx6, {QNX6_SUPER_MAGIC} +ramfs, {RAMFS_MAGIC} +resctrl, {RDTGROUP_SUPER_MAGIC} +reiserfs, {REISERFS_SUPER_MAGIC} +rpc_pipefs, {RPC_PIPEFS_SUPER_MAGIC} +secretmem, {SECRETMEM_MAGIC} +securityfs, {SECURITYFS_MAGIC} +selinuxfs, {SELINUX_MAGIC} +shiftfs, {SHIFTFS_MAGIC} +smackfs, {SMACK_MAGIC} +# smb3 is an alias for cifs +smb3, {CIFS_SUPER_MAGIC} +# smbfs was removed from the kernel in 2010, the magic remains +smbfs, {SMB_SUPER_MAGIC} +sockfs, {SOCKFS_MAGIC} +squashfs, {SQUASHFS_MAGIC} +sysfs, {SYSFS_MAGIC} +# note that devtmpfs shares the same magic with tmpfs, given it is just a special named instance of it. +tmpfs, {TMPFS_MAGIC} +tracefs, {TRACEFS_MAGIC} +udf, {UDF_SUPER_MAGIC} +usbdevfs, {USBDEVICE_SUPER_MAGIC} +vboxsf, {VBOXSF_SUPER_MAGIC} +# note that msdos shares the same magic (and is the older version) +vfat, {MSDOS_SUPER_MAGIC} +v9fs, {V9FS_MAGIC} +xenfs, {XENFS_SUPER_MAGIC} +xfs, {XFS_SUPER_MAGIC} +z3fold, {Z3FOLD_MAGIC} +zonefs, {ZONEFS_MAGIC} +zsmalloc, {ZSMALLOC_MAGIC} diff --git a/src/basic/filesystems.c b/src/basic/filesystems.c new file mode 100644 index 0000000..0f71f8e --- /dev/null +++ b/src/basic/filesystems.c @@ -0,0 +1,177 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include "filesystems-gperf.h" +#include "stat-util.h" + +const char *fs_type_to_string(statfs_f_type_t magic) { + + switch (magic) { +#include "filesystem-switch-case.h" + } + + return NULL; +} + + +int fs_type_from_string(const char *name, const statfs_f_type_t **ret) { + const struct FilesystemMagic *fs_magic; + + assert(name); + assert(ret); + + fs_magic = filesystems_gperf_lookup(name, strlen(name)); + if (!fs_magic) + return -EINVAL; + + *ret = fs_magic->magic; + return 0; +} + +int fs_in_group(const struct statfs *s, FilesystemGroups fs_group) { + const char *fs; + int r; + + NULSTR_FOREACH(fs, filesystem_sets[fs_group].value) { + const statfs_f_type_t *magic; + + r = fs_type_from_string(fs, &magic); + if (r == 0) { + for (size_t i = 0; i < FILESYSTEM_MAGIC_MAX; i++) { + if (magic[i] == 0) + break; + + if (is_fs_type(s, magic[i])) + return true; + } + } + } + + return false; +} + +const FilesystemSet filesystem_sets[_FILESYSTEM_SET_MAX] = { + [FILESYSTEM_SET_BASIC_API] = { + .name = "@basic-api", + .help = "Basic filesystem API", + .value = + "cgroup\0" + "cgroup2\0" + "devpts\0" + "devtmpfs\0" + "mqueue\0" + "proc\0" + "sysfs\0" + }, + [FILESYSTEM_SET_ANONYMOUS] = { + .name = "@anonymous", + .help = "Anonymous inodes", + .value = + "anon_inodefs\0" + "pipefs\0" + "sockfs\0" + }, + [FILESYSTEM_SET_APPLICATION] = { + .name = "@application", + .help = "Application virtual filesystems", + .value = + "autofs\0" + "fuse\0" + "overlay\0" + }, + [FILESYSTEM_SET_AUXILIARY_API] = { + .name = "@auxiliary-api", + .help = "Auxiliary filesystem API", + .value = + "binfmt_misc\0" + "configfs\0" + "efivarfs\0" + "fusectl\0" + "hugetlbfs\0" + "rpc_pipefs\0" + "securityfs\0" + }, + [FILESYSTEM_SET_COMMON_BLOCK] = { + .name = "@common-block", + .help = "Common block device filesystems", + .value = + "btrfs\0" + "erofs\0" + "exfat\0" + "ext4\0" + "f2fs\0" + "iso9660\0" + "ntfs3\0" + "squashfs\0" + "udf\0" + "vfat\0" + "xfs\0" + }, + [FILESYSTEM_SET_HISTORICAL_BLOCK] = { + .name = "@historical-block", + .help = "Historical block device filesystems", + .value = + "ext2\0" + "ext3\0" + "minix\0" + }, + [FILESYSTEM_SET_NETWORK] = { + .name = "@network", + .help = "Well-known network filesystems", + .value = + "afs\0" + "ceph\0" + "cifs\0" + "gfs\0" + "gfs2\0" + "ncp\0" + "ncpfs\0" + "nfs\0" + "nfs4\0" + "ocfs2\0" + "orangefs\0" + "pvfs2\0" + "smb3\0" + "smbfs\0" + }, + [FILESYSTEM_SET_PRIVILEGED_API] = { + .name = "@privileged-api", + .help = "Privileged filesystem API", + .value = + "bpf\0" + "debugfs\0" + "pstore\0" + "tracefs\0" + }, + [FILESYSTEM_SET_SECURITY] = { + .name = "@security", + .help = "Security/MAC API VFS", + .value = + "apparmorfs\0" + "selinuxfs\0" + "smackfs\0" + }, + [FILESYSTEM_SET_TEMPORARY] = { + .name = "@temporary", + .help = "Temporary filesystems", + .value = + "ramfs\0" + "tmpfs\0" + }, + [FILESYSTEM_SET_KNOWN] = { + .name = "@known", + .help = "All known filesystems declared in the kernel", + .value = +#include "filesystem-list.h" + }, +}; + +const FilesystemSet *filesystem_set_find(const char *name) { + if (isempty(name) || name[0] != '@') + return NULL; + + for (FilesystemGroups i = 0; i < _FILESYSTEM_SET_MAX; i++) + if (streq(filesystem_sets[i].name, name)) + return filesystem_sets + i; + + return NULL; +} diff --git a/src/basic/filesystems.h b/src/basic/filesystems.h new file mode 100644 index 0000000..6d07a97 --- /dev/null +++ b/src/basic/filesystems.h @@ -0,0 +1,42 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +#include "nulstr-util.h" +#include "stat-util.h" +#include "string-util.h" + +#define FILESYSTEM_MAGIC_MAX 10 + +typedef enum FilesystemGroups { + /* Please leave BASIC_API first and KNOWN last, but sort the rest alphabetically */ + FILESYSTEM_SET_BASIC_API, + FILESYSTEM_SET_ANONYMOUS, + FILESYSTEM_SET_APPLICATION, + FILESYSTEM_SET_AUXILIARY_API, + FILESYSTEM_SET_COMMON_BLOCK, + FILESYSTEM_SET_HISTORICAL_BLOCK, + FILESYSTEM_SET_NETWORK, + FILESYSTEM_SET_PRIVILEGED_API, + FILESYSTEM_SET_SECURITY, + FILESYSTEM_SET_TEMPORARY, + FILESYSTEM_SET_KNOWN, + _FILESYSTEM_SET_MAX, + _FILESYSTEM_SET_INVALID = -EINVAL, +} FilesystemGroups; + +typedef struct FilesystemSet { + const char *name; + const char *help; + const char *value; +} FilesystemSet; + +extern const FilesystemSet filesystem_sets[]; + +const FilesystemSet *filesystem_set_find(const char *name); + +const char *fs_type_to_string(statfs_f_type_t magic); +int fs_type_from_string(const char *name, const statfs_f_type_t **ret); +int fs_in_group(const struct statfs *s, enum FilesystemGroups fs_group); + +/* gperf prototypes */ +const struct FilesystemMagic* filesystems_gperf_lookup(const char *key, GPERF_LEN_TYPE length); diff --git a/src/basic/format-util.c b/src/basic/format-util.c new file mode 100644 index 0000000..9450185 --- /dev/null +++ b/src/basic/format-util.c @@ -0,0 +1,101 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include "format-util.h" +#include "memory-util.h" +#include "stdio-util.h" +#include "strxcpyx.h" + +assert_cc(STRLEN("%") + DECIMAL_STR_MAX(int) <= IF_NAMESIZE); +int format_ifname_full(int ifindex, FormatIfnameFlag flag, char buf[static IF_NAMESIZE]) { + if (ifindex <= 0) + return -EINVAL; + + if (if_indextoname(ifindex, buf)) + return 0; + + if (!FLAGS_SET(flag, FORMAT_IFNAME_IFINDEX)) + return -errno; + + if (FLAGS_SET(flag, FORMAT_IFNAME_IFINDEX_WITH_PERCENT)) + assert(snprintf_ok(buf, IF_NAMESIZE, "%%%d", ifindex)); + else + assert(snprintf_ok(buf, IF_NAMESIZE, "%d", ifindex)); + + return 0; +} + +int format_ifname_full_alloc(int ifindex, FormatIfnameFlag flag, char **ret) { + char buf[IF_NAMESIZE], *copy; + int r; + + assert(ret); + + r = format_ifname_full(ifindex, flag, buf); + if (r < 0) + return r; + + copy = strdup(buf); + if (!copy) + return -ENOMEM; + + *ret = copy; + return 0; +} + +char *format_bytes_full(char *buf, size_t l, uint64_t t, FormatBytesFlag flag) { + typedef struct { + const char *suffix; + uint64_t factor; + } suffix_table; + static const suffix_table table_iec[] = { + { "E", UINT64_C(1024)*UINT64_C(1024)*UINT64_C(1024)*UINT64_C(1024)*UINT64_C(1024)*UINT64_C(1024) }, + { "P", UINT64_C(1024)*UINT64_C(1024)*UINT64_C(1024)*UINT64_C(1024)*UINT64_C(1024) }, + { "T", UINT64_C(1024)*UINT64_C(1024)*UINT64_C(1024)*UINT64_C(1024) }, + { "G", UINT64_C(1024)*UINT64_C(1024)*UINT64_C(1024) }, + { "M", UINT64_C(1024)*UINT64_C(1024) }, + { "K", UINT64_C(1024) }, + }, table_si[] = { + { "E", UINT64_C(1000)*UINT64_C(1000)*UINT64_C(1000)*UINT64_C(1000)*UINT64_C(1000)*UINT64_C(1000) }, + { "P", UINT64_C(1000)*UINT64_C(1000)*UINT64_C(1000)*UINT64_C(1000)*UINT64_C(1000) }, + { "T", UINT64_C(1000)*UINT64_C(1000)*UINT64_C(1000)*UINT64_C(1000) }, + { "G", UINT64_C(1000)*UINT64_C(1000)*UINT64_C(1000) }, + { "M", UINT64_C(1000)*UINT64_C(1000) }, + { "K", UINT64_C(1000) }, + }; + const suffix_table *table; + size_t n; + + assert_cc(ELEMENTSOF(table_iec) == ELEMENTSOF(table_si)); + + if (t == UINT64_MAX) + return NULL; + + table = flag & FORMAT_BYTES_USE_IEC ? table_iec : table_si; + n = ELEMENTSOF(table_iec); + + for (size_t i = 0; i < n; i++) + if (t >= table[i].factor) { + if (flag & FORMAT_BYTES_BELOW_POINT) { + (void) snprintf(buf, l, + "%" PRIu64 ".%" PRIu64 "%s", + t / table[i].factor, + i != n - 1 ? + (t / table[i + 1].factor * UINT64_C(10) / table[n - 1].factor) % UINT64_C(10): + (t * UINT64_C(10) / table[i].factor) % UINT64_C(10), + table[i].suffix); + } else + (void) snprintf(buf, l, + "%" PRIu64 "%s", + t / table[i].factor, + table[i].suffix); + + goto finish; + } + + (void) snprintf(buf, l, "%" PRIu64 "%s", t, flag & FORMAT_BYTES_TRAILING_B ? "B" : ""); + +finish: + buf[l-1] = 0; + return buf; + +} diff --git a/src/basic/format-util.h b/src/basic/format-util.h new file mode 100644 index 0000000..8719df3 --- /dev/null +++ b/src/basic/format-util.h @@ -0,0 +1,105 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +#include +#include +#include + +#include "cgroup-util.h" +#include "macro.h" + +assert_cc(sizeof(pid_t) == sizeof(int32_t)); +#define PID_PRI PRIi32 +#define PID_FMT "%" PID_PRI + +assert_cc(sizeof(uid_t) == sizeof(uint32_t)); +#define UID_FMT "%" PRIu32 + +assert_cc(sizeof(gid_t) == sizeof(uint32_t)); +#define GID_FMT "%" PRIu32 + +#if SIZEOF_TIME_T == 8 +# define PRI_TIME PRIi64 +#elif SIZEOF_TIME_T == 4 +# define PRI_TIME "li" +#else +# error Unknown time_t size +#endif + +#if SIZEOF_TIMEX_MEMBER == 8 +# define PRI_TIMEX PRIi64 +#elif SIZEOF_TIMEX_MEMBER == 4 +# define PRI_TIMEX "li" +#else +# error Unknown timex member size +#endif + +#if SIZEOF_RLIM_T == 8 +# define RLIM_FMT "%" PRIu64 +#elif SIZEOF_RLIM_T == 4 +# define RLIM_FMT "%" PRIu32 +#else +# error Unknown rlim_t size +#endif + +#if SIZEOF_DEV_T == 8 +# define DEV_FMT "%" PRIu64 +#elif SIZEOF_DEV_T == 4 +# define DEV_FMT "%" PRIu32 +#else +# error Unknown dev_t size +#endif + +#if SIZEOF_INO_T == 8 +# define INO_FMT "%" PRIu64 +#elif SIZEOF_INO_T == 4 +# define INO_FMT "%" PRIu32 +#else +# error Unknown ino_t size +#endif + +typedef enum { + FORMAT_IFNAME_IFINDEX = 1 << 0, + FORMAT_IFNAME_IFINDEX_WITH_PERCENT = (1 << 1) | FORMAT_IFNAME_IFINDEX, +} FormatIfnameFlag; + +int format_ifname_full(int ifindex, FormatIfnameFlag flag, char buf[static IF_NAMESIZE]); +int format_ifname_full_alloc(int ifindex, FormatIfnameFlag flag, char **ret); + +static inline int format_ifname(int ifindex, char buf[static IF_NAMESIZE]) { + return format_ifname_full(ifindex, 0, buf); +} +static inline int format_ifname_alloc(int ifindex, char **ret) { + return format_ifname_full_alloc(ifindex, 0, ret); +} + +static inline char *_format_ifname_full(int ifindex, FormatIfnameFlag flag, char buf[static IF_NAMESIZE]) { + (void) format_ifname_full(ifindex, flag, buf); + return buf; +} + +#define FORMAT_IFNAME_FULL(index, flag) _format_ifname_full(index, flag, (char[IF_NAMESIZE]){}) +#define FORMAT_IFNAME(index) _format_ifname_full(index, 0, (char[IF_NAMESIZE]){}) + +typedef enum { + FORMAT_BYTES_USE_IEC = 1 << 0, + FORMAT_BYTES_BELOW_POINT = 1 << 1, + FORMAT_BYTES_TRAILING_B = 1 << 2, +} FormatBytesFlag; + +#define FORMAT_BYTES_MAX 16U + +char *format_bytes_full(char *buf, size_t l, uint64_t t, FormatBytesFlag flag) _warn_unused_result_; + +_warn_unused_result_ +static inline char *format_bytes(char *buf, size_t l, uint64_t t) { + return format_bytes_full(buf, l, t, FORMAT_BYTES_USE_IEC | FORMAT_BYTES_BELOW_POINT | FORMAT_BYTES_TRAILING_B); +} + +/* Note: the lifetime of the compound literal is the immediately surrounding block, + * see C11 §6.5.2.5, and + * https://stackoverflow.com/questions/34880638/compound-literal-lifetime-and-if-blocks */ +#define FORMAT_BYTES(t) format_bytes((char[FORMAT_BYTES_MAX]){}, FORMAT_BYTES_MAX, t) +#define FORMAT_BYTES_FULL(t, flag) format_bytes_full((char[FORMAT_BYTES_MAX]){}, FORMAT_BYTES_MAX, t, flag) + +#define FORMAT_BYTES_CGROUP_PROTECTION(t) (t == CGROUP_LIMIT_MAX ? "infinity" : FORMAT_BYTES(t)) diff --git a/src/basic/fs-util.c b/src/basic/fs-util.c new file mode 100644 index 0000000..6b757bd --- /dev/null +++ b/src/basic/fs-util.c @@ -0,0 +1,1109 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include +#include +#include +#include +#include +#include + +#include "alloc-util.h" +#include "dirent-util.h" +#include "fd-util.h" +#include "fileio.h" +#include "fs-util.h" +#include "hostname-util.h" +#include "log.h" +#include "macro.h" +#include "missing_fcntl.h" +#include "missing_fs.h" +#include "missing_syscall.h" +#include "mkdir.h" +#include "parse-util.h" +#include "path-util.h" +#include "process-util.h" +#include "random-util.h" +#include "ratelimit.h" +#include "stat-util.h" +#include "stdio-util.h" +#include "string-util.h" +#include "strv.h" +#include "time-util.h" +#include "tmpfile-util.h" +#include "umask-util.h" +#include "user-util.h" +#include "util.h" + +int unlink_noerrno(const char *path) { + PROTECT_ERRNO; + return RET_NERRNO(unlink(path)); +} + +int rmdir_parents(const char *path, const char *stop) { + char *p; + int r; + + assert(path); + assert(stop); + + if (!path_is_safe(path)) + return -EINVAL; + + if (!path_is_safe(stop)) + return -EINVAL; + + p = strdupa_safe(path); + + for (;;) { + char *slash = NULL; + + /* skip the last component. */ + r = path_find_last_component(p, /* accept_dot_dot= */ false, (const char **) &slash, NULL); + if (r <= 0) + return r; + if (slash == p) + return 0; + + assert(*slash == '/'); + *slash = '\0'; + + if (path_startswith_full(stop, p, /* accept_dot_dot= */ false)) + return 0; + + if (rmdir(p) < 0 && errno != ENOENT) + return -errno; + } +} + +int rename_noreplace(int olddirfd, const char *oldpath, int newdirfd, const char *newpath) { + int r; + + /* Try the ideal approach first */ + if (renameat2(olddirfd, oldpath, newdirfd, newpath, RENAME_NOREPLACE) >= 0) + return 0; + + /* renameat2() exists since Linux 3.15, btrfs and FAT added support for it later. If it is not implemented, + * fall back to a different method. */ + if (!ERRNO_IS_NOT_SUPPORTED(errno) && errno != EINVAL) + return -errno; + + /* Let's try to use linkat()+unlinkat() as fallback. This doesn't work on directories and on some file systems + * that do not support hard links (such as FAT, most prominently), but for files it's pretty close to what we + * want — though not atomic (i.e. for a short period both the new and the old filename will exist). */ + if (linkat(olddirfd, oldpath, newdirfd, newpath, 0) >= 0) { + + r = RET_NERRNO(unlinkat(olddirfd, oldpath, 0)); + if (r < 0) { + (void) unlinkat(newdirfd, newpath, 0); + return r; + } + + return 0; + } + + if (!ERRNO_IS_NOT_SUPPORTED(errno) && !IN_SET(errno, EINVAL, EPERM)) /* FAT returns EPERM on link()… */ + return -errno; + + /* OK, neither RENAME_NOREPLACE nor linkat()+unlinkat() worked. Let's then fall back to the racy TOCTOU + * vulnerable accessat(F_OK) check followed by classic, replacing renameat(), we have nothing better. */ + + if (faccessat(newdirfd, newpath, F_OK, AT_SYMLINK_NOFOLLOW) >= 0) + return -EEXIST; + if (errno != ENOENT) + return -errno; + + return RET_NERRNO(renameat(olddirfd, oldpath, newdirfd, newpath)); +} + +int readlinkat_malloc(int fd, const char *p, char **ret) { + size_t l = PATH_MAX; + + assert(p); + + for (;;) { + _cleanup_free_ char *c = NULL; + ssize_t n; + + c = new(char, l+1); + if (!c) + return -ENOMEM; + + n = readlinkat(fd, p, c, l); + if (n < 0) + return -errno; + + if ((size_t) n < l) { + c[n] = 0; + + if (ret) + *ret = TAKE_PTR(c); + + return 0; + } + + if (l > (SSIZE_MAX-1)/2) /* readlinkat() returns an ssize_t, and we want an extra byte for a + * trailing NUL, hence do an overflow check relative to SSIZE_MAX-1 + * here */ + return -EFBIG; + + l *= 2; + } +} + +int readlink_malloc(const char *p, char **ret) { + return readlinkat_malloc(AT_FDCWD, p, ret); +} + +int readlink_value(const char *p, char **ret) { + _cleanup_free_ char *link = NULL, *name = NULL; + int r; + + assert(p); + assert(ret); + + r = readlink_malloc(p, &link); + if (r < 0) + return r; + + r = path_extract_filename(link, &name); + if (r < 0) + return r; + if (r == O_DIRECTORY) + return -EINVAL; + + *ret = TAKE_PTR(name); + return 0; +} + +int readlink_and_make_absolute(const char *p, char **r) { + _cleanup_free_ char *target = NULL; + char *k; + int j; + + assert(p); + assert(r); + + j = readlink_malloc(p, &target); + if (j < 0) + return j; + + k = file_in_same_dir(p, target); + if (!k) + return -ENOMEM; + + *r = k; + return 0; +} + +int chmod_and_chown(const char *path, mode_t mode, uid_t uid, gid_t gid) { + _cleanup_close_ int fd = -1; + + assert(path); + + fd = open(path, O_PATH|O_CLOEXEC|O_NOFOLLOW); /* Let's acquire an O_PATH fd, as precaution to change + * mode/owner on the same file */ + if (fd < 0) + return -errno; + + return fchmod_and_chown(fd, mode, uid, gid); +} + +int fchmod_and_chown_with_fallback(int fd, const char *path, mode_t mode, uid_t uid, gid_t gid) { + bool do_chown, do_chmod; + struct stat st; + int r; + + /* Change ownership and access mode of the specified fd. Tries to do so safely, ensuring that at no + * point in time the access mode is above the old access mode under the old ownership or the new + * access mode under the new ownership. Note: this call tries hard to leave the access mode + * unaffected if the uid/gid is changed, i.e. it undoes implicit suid/sgid dropping the kernel does + * on chown(). + * + * This call is happy with O_PATH fds. + * + * If path is given, allow a fallback path which does not use /proc/self/fd/. On any normal system + * /proc will be mounted, but in certain improperly assembled environments it might not be. This is + * less secure (potential TOCTOU), so should only be used after consideration. */ + + if (fstat(fd, &st) < 0) + return -errno; + + do_chown = + (uid != UID_INVALID && st.st_uid != uid) || + (gid != GID_INVALID && st.st_gid != gid); + + do_chmod = + !S_ISLNK(st.st_mode) && /* chmod is not defined on symlinks */ + ((mode != MODE_INVALID && ((st.st_mode ^ mode) & 07777) != 0) || + do_chown); /* If we change ownership, make sure we reset the mode afterwards, since chown() + * modifies the access mode too */ + + if (mode == MODE_INVALID) + mode = st.st_mode; /* If we only shall do a chown(), save original mode, since chown() might break it. */ + else if ((mode & S_IFMT) != 0 && ((mode ^ st.st_mode) & S_IFMT) != 0) + return -EINVAL; /* insist on the right file type if it was specified */ + + if (do_chown && do_chmod) { + mode_t minimal = st.st_mode & mode; /* the subset of the old and the new mask */ + + if (((minimal ^ st.st_mode) & 07777) != 0) { + r = fchmod_opath(fd, minimal & 07777); + if (r < 0) { + if (!path || r != -ENOSYS) + return r; + + /* Fallback path which doesn't use /proc/self/fd/. */ + if (chmod(path, minimal & 07777) < 0) + return -errno; + } + } + } + + if (do_chown) + if (fchownat(fd, "", uid, gid, AT_EMPTY_PATH) < 0) + return -errno; + + if (do_chmod) { + r = fchmod_opath(fd, mode & 07777); + if (r < 0) { + if (!path || r != -ENOSYS) + return r; + + /* Fallback path which doesn't use /proc/self/fd/. */ + if (chmod(path, mode & 07777) < 0) + return -errno; + } + } + + return do_chown || do_chmod; +} + +int fchmod_umask(int fd, mode_t m) { + _cleanup_umask_ mode_t u = umask(0777); + + return RET_NERRNO(fchmod(fd, m & (~u))); +} + +int fchmod_opath(int fd, mode_t m) { + /* This function operates also on fd that might have been opened with + * O_PATH. Indeed fchmodat() doesn't have the AT_EMPTY_PATH flag like + * fchownat() does. */ + + if (chmod(FORMAT_PROC_FD_PATH(fd), m) < 0) { + if (errno != ENOENT) + return -errno; + + if (proc_mounted() == 0) + return -ENOSYS; /* if we have no /proc/, the concept is not implementable */ + + return -ENOENT; + } + + return 0; +} + +int futimens_opath(int fd, const struct timespec ts[2]) { + /* Similar to fchmod_path() but for futimens() */ + + if (utimensat(AT_FDCWD, FORMAT_PROC_FD_PATH(fd), ts, 0) < 0) { + if (errno != ENOENT) + return -errno; + + if (proc_mounted() == 0) + return -ENOSYS; /* if we have no /proc/, the concept is not implementable */ + + return -ENOENT; + } + + return 0; +} + +int stat_warn_permissions(const char *path, const struct stat *st) { + assert(path); + assert(st); + + /* Don't complain if we are reading something that is not a file, for example /dev/null */ + if (!S_ISREG(st->st_mode)) + return 0; + + if (st->st_mode & 0111) + log_warning("Configuration file %s is marked executable. Please remove executable permission bits. Proceeding anyway.", path); + + if (st->st_mode & 0002) + log_warning("Configuration file %s is marked world-writable. Please remove world writability permission bits. Proceeding anyway.", path); + + if (getpid_cached() == 1 && (st->st_mode & 0044) != 0044) + log_warning("Configuration file %s is marked world-inaccessible. This has no effect as configuration data is accessible via APIs without restrictions. Proceeding anyway.", path); + + return 0; +} + +int fd_warn_permissions(const char *path, int fd) { + struct stat st; + + assert(path); + assert(fd >= 0); + + if (fstat(fd, &st) < 0) + return -errno; + + return stat_warn_permissions(path, &st); +} + +int touch_file(const char *path, bool parents, usec_t stamp, uid_t uid, gid_t gid, mode_t mode) { + _cleanup_close_ int fd = -1; + int r, ret; + + assert(path); + + /* Note that touch_file() does not follow symlinks: if invoked on an existing symlink, then it is the symlink + * itself which is updated, not its target + * + * Returns the first error we encounter, but tries to apply as much as possible. */ + + if (parents) + (void) mkdir_parents(path, 0755); + + /* Initially, we try to open the node with O_PATH, so that we get a reference to the node. This is useful in + * case the path refers to an existing device or socket node, as we can open it successfully in all cases, and + * won't trigger any driver magic or so. */ + fd = open(path, O_PATH|O_CLOEXEC|O_NOFOLLOW); + if (fd < 0) { + if (errno != ENOENT) + return -errno; + + /* if the node doesn't exist yet, we create it, but with O_EXCL, so that we only create a regular file + * here, and nothing else */ + fd = open(path, O_WRONLY|O_CREAT|O_EXCL|O_CLOEXEC, IN_SET(mode, 0, MODE_INVALID) ? 0644 : mode); + if (fd < 0) + return -errno; + } + + /* Let's make a path from the fd, and operate on that. With this logic, we can adjust the access mode, + * ownership and time of the file node in all cases, even if the fd refers to an O_PATH object — which is + * something fchown(), fchmod(), futimensat() don't allow. */ + ret = fchmod_and_chown(fd, mode, uid, gid); + + if (stamp != USEC_INFINITY) { + struct timespec ts[2]; + + timespec_store(&ts[0], stamp); + ts[1] = ts[0]; + r = futimens_opath(fd, ts); + } else + r = futimens_opath(fd, NULL); + if (r < 0 && ret >= 0) + return r; + + return ret; +} + +int symlink_idempotent(const char *from, const char *to, bool make_relative) { + _cleanup_free_ char *relpath = NULL; + int r; + + assert(from); + assert(to); + + if (make_relative) { + r = path_make_relative_parent(to, from, &relpath); + if (r < 0) + return r; + + from = relpath; + } + + if (symlink(from, to) < 0) { + _cleanup_free_ char *p = NULL; + + if (errno != EEXIST) + return -errno; + + r = readlink_malloc(to, &p); + if (r == -EINVAL) /* Not a symlink? In that case return the original error we encountered: -EEXIST */ + return -EEXIST; + if (r < 0) /* Any other error? In that case propagate it as is */ + return r; + + if (!streq(p, from)) /* Not the symlink we want it to be? In that case, propagate the original -EEXIST */ + return -EEXIST; + } + + return 0; +} + +int symlinkat_atomic_full(const char *from, int atfd, const char *to, bool make_relative) { + _cleanup_free_ char *relpath = NULL, *t = NULL; + int r; + + assert(from); + assert(to); + + if (make_relative) { + r = path_make_relative_parent(to, from, &relpath); + if (r < 0) + return r; + + from = relpath; + } + + r = tempfn_random(to, NULL, &t); + if (r < 0) + return r; + + if (symlinkat(from, atfd, t) < 0) + return -errno; + + r = RET_NERRNO(renameat(atfd, t, atfd, to)); + if (r < 0) { + (void) unlinkat(atfd, t, 0); + return r; + } + + return 0; +} + +int mknodat_atomic(int atfd, const char *path, mode_t mode, dev_t dev) { + _cleanup_free_ char *t = NULL; + int r; + + assert(path); + + r = tempfn_random(path, NULL, &t); + if (r < 0) + return r; + + if (mknodat(atfd, t, mode, dev) < 0) + return -errno; + + r = RET_NERRNO(renameat(atfd, t, atfd, path)); + if (r < 0) { + (void) unlinkat(atfd, t, 0); + return r; + } + + return 0; +} + +int mkfifoat_atomic(int atfd, const char *path, mode_t mode) { + _cleanup_free_ char *t = NULL; + int r; + + assert(path); + + /* We're only interested in the (random) filename. */ + r = tempfn_random(path, NULL, &t); + if (r < 0) + return r; + + if (mkfifoat(atfd, t, mode) < 0) + return -errno; + + r = RET_NERRNO(renameat(atfd, t, atfd, path)); + if (r < 0) { + (void) unlinkat(atfd, t, 0); + return r; + } + + return 0; +} + +int get_files_in_directory(const char *path, char ***list) { + _cleanup_strv_free_ char **l = NULL; + _cleanup_closedir_ DIR *d = NULL; + size_t n = 0; + + assert(path); + + /* Returns all files in a directory in *list, and the number + * of files as return value. If list is NULL returns only the + * number. */ + + d = opendir(path); + if (!d) + return -errno; + + FOREACH_DIRENT_ALL(de, d, return -errno) { + if (!dirent_is_file(de)) + continue; + + if (list) { + /* one extra slot is needed for the terminating NULL */ + if (!GREEDY_REALLOC(l, n + 2)) + return -ENOMEM; + + l[n] = strdup(de->d_name); + if (!l[n]) + return -ENOMEM; + + l[++n] = NULL; + } else + n++; + } + + if (list) + *list = TAKE_PTR(l); + + return n; +} + +static int getenv_tmp_dir(const char **ret_path) { + int r, ret = 0; + + assert(ret_path); + + /* We use the same order of environment variables python uses in tempfile.gettempdir(): + * https://docs.python.org/3/library/tempfile.html#tempfile.gettempdir */ + FOREACH_STRING(n, "TMPDIR", "TEMP", "TMP") { + const char *e; + + e = secure_getenv(n); + if (!e) + continue; + if (!path_is_absolute(e)) { + r = -ENOTDIR; + goto next; + } + if (!path_is_normalized(e)) { + r = -EPERM; + goto next; + } + + r = is_dir(e, true); + if (r < 0) + goto next; + if (r == 0) { + r = -ENOTDIR; + goto next; + } + + *ret_path = e; + return 1; + + next: + /* Remember first error, to make this more debuggable */ + if (ret >= 0) + ret = r; + } + + if (ret < 0) + return ret; + + *ret_path = NULL; + return ret; +} + +static int tmp_dir_internal(const char *def, const char **ret) { + const char *e; + int r, k; + + assert(def); + assert(ret); + + r = getenv_tmp_dir(&e); + if (r > 0) { + *ret = e; + return 0; + } + + k = is_dir(def, true); + if (k == 0) + k = -ENOTDIR; + if (k < 0) + return r < 0 ? r : k; + + *ret = def; + return 0; +} + +int var_tmp_dir(const char **ret) { + + /* Returns the location for "larger" temporary files, that is backed by physical storage if available, and thus + * even might survive a boot: /var/tmp. If $TMPDIR (or related environment variables) are set, its value is + * returned preferably however. Note that both this function and tmp_dir() below are affected by $TMPDIR, + * making it a variable that overrides all temporary file storage locations. */ + + return tmp_dir_internal("/var/tmp", ret); +} + +int tmp_dir(const char **ret) { + + /* Similar to var_tmp_dir() above, but returns the location for "smaller" temporary files, which is usually + * backed by an in-memory file system: /tmp. */ + + return tmp_dir_internal("/tmp", ret); +} + +int unlink_or_warn(const char *filename) { + if (unlink(filename) < 0 && errno != ENOENT) + /* If the file doesn't exist and the fs simply was read-only (in which + * case unlink() returns EROFS even if the file doesn't exist), don't + * complain */ + if (errno != EROFS || access(filename, F_OK) >= 0) + return log_error_errno(errno, "Failed to remove \"%s\": %m", filename); + + return 0; +} + +int access_fd(int fd, int mode) { + /* Like access() but operates on an already open fd */ + + if (access(FORMAT_PROC_FD_PATH(fd), mode) < 0) { + if (errno != ENOENT) + return -errno; + + /* ENOENT can mean two things: that the fd does not exist or that /proc is not mounted. Let's + * make things debuggable and distinguish the two. */ + + if (proc_mounted() == 0) + return -ENOSYS; /* /proc is not available or not set up properly, we're most likely in some chroot + * environment. */ + + return -EBADF; /* The directory exists, hence it's the fd that doesn't. */ + } + + return 0; +} + +void unlink_tempfilep(char (*p)[]) { + /* If the file is created with mkstemp(), it will (almost always) + * change the suffix. Treat this as a sign that the file was + * successfully created. We ignore both the rare case where the + * original suffix is used and unlink failures. */ + if (!endswith(*p, ".XXXXXX")) + (void) unlink_noerrno(*p); +} + +int unlinkat_deallocate(int fd, const char *name, UnlinkDeallocateFlags flags) { + _cleanup_close_ int truncate_fd = -1; + struct stat st; + off_t l, bs; + + assert((flags & ~(UNLINK_REMOVEDIR|UNLINK_ERASE)) == 0); + + /* Operates like unlinkat() but also deallocates the file contents if it is a regular file and there's no other + * link to it. This is useful to ensure that other processes that might have the file open for reading won't be + * able to keep the data pinned on disk forever. This call is particular useful whenever we execute clean-up + * jobs ("vacuuming"), where we want to make sure the data is really gone and the disk space released and + * returned to the free pool. + * + * Deallocation is preferably done by FALLOC_FL_PUNCH_HOLE|FALLOC_FL_KEEP_SIZE (👊) if supported, which means + * the file won't change size. That's a good thing since we shouldn't needlessly trigger SIGBUS in other + * programs that have mmap()ed the file. (The assumption here is that changing file contents to all zeroes + * underneath those programs is the better choice than simply triggering SIGBUS in them which truncation does.) + * However if hole punching is not implemented in the kernel or file system we'll fall back to normal file + * truncation (🔪), as our goal of deallocating the data space trumps our goal of being nice to readers (💐). + * + * Note that we attempt deallocation, but failure to succeed with that is not considered fatal, as long as the + * primary job – to delete the file – is accomplished. */ + + if (!FLAGS_SET(flags, UNLINK_REMOVEDIR)) { + truncate_fd = openat(fd, name, O_WRONLY|O_CLOEXEC|O_NOCTTY|O_NOFOLLOW|O_NONBLOCK); + if (truncate_fd < 0) { + + /* If this failed because the file doesn't exist propagate the error right-away. Also, + * AT_REMOVEDIR wasn't set, and we tried to open the file for writing, which means EISDIR is + * returned when this is a directory but we are not supposed to delete those, hence propagate + * the error right-away too. */ + if (IN_SET(errno, ENOENT, EISDIR)) + return -errno; + + if (errno != ELOOP) /* don't complain if this is a symlink */ + log_debug_errno(errno, "Failed to open file '%s' for deallocation, ignoring: %m", name); + } + } + + if (unlinkat(fd, name, FLAGS_SET(flags, UNLINK_REMOVEDIR) ? AT_REMOVEDIR : 0) < 0) + return -errno; + + if (truncate_fd < 0) /* Don't have a file handle, can't do more ☹️ */ + return 0; + + if (fstat(truncate_fd, &st) < 0) { + log_debug_errno(errno, "Failed to stat file '%s' for deallocation, ignoring: %m", name); + return 0; + } + + if (!S_ISREG(st.st_mode)) + return 0; + + if (FLAGS_SET(flags, UNLINK_ERASE) && st.st_size > 0 && st.st_nlink == 0) { + uint64_t left = st.st_size; + char buffer[64 * 1024]; + + /* If erasing is requested, let's overwrite the file with random data once before deleting + * it. This isn't going to give you shred(1) semantics, but hopefully should be good enough + * for stuff backed by tmpfs at least. + * + * Note that we only erase like this if the link count of the file is zero. If it is higher it + * is still linked by someone else and we'll leave it to them to remove it securely + * eventually! */ + + random_bytes(buffer, sizeof(buffer)); + + while (left > 0) { + ssize_t n; + + n = write(truncate_fd, buffer, MIN(sizeof(buffer), left)); + if (n < 0) { + log_debug_errno(errno, "Failed to erase data in file '%s', ignoring.", name); + break; + } + + assert(left >= (size_t) n); + left -= n; + } + + /* Let's refresh metadata */ + if (fstat(truncate_fd, &st) < 0) { + log_debug_errno(errno, "Failed to stat file '%s' for deallocation, ignoring: %m", name); + return 0; + } + } + + /* Don't dallocate if there's nothing to deallocate or if the file is linked elsewhere */ + if (st.st_blocks == 0 || st.st_nlink > 0) + return 0; + + /* If this is a regular file, it actually took up space on disk and there are no other links it's time to + * punch-hole/truncate this to release the disk space. */ + + bs = MAX(st.st_blksize, 512); + l = DIV_ROUND_UP(st.st_size, bs) * bs; /* Round up to next block size */ + + if (fallocate(truncate_fd, FALLOC_FL_PUNCH_HOLE|FALLOC_FL_KEEP_SIZE, 0, l) >= 0) + return 0; /* Successfully punched a hole! 😊 */ + + /* Fall back to truncation */ + if (ftruncate(truncate_fd, 0) < 0) { + log_debug_errno(errno, "Failed to truncate file to 0, ignoring: %m"); + return 0; + } + + return 0; +} + +int open_parent(const char *path, int flags, mode_t mode) { + _cleanup_free_ char *parent = NULL; + int r; + + r = path_extract_directory(path, &parent); + if (r < 0) + return r; + + /* Let's insist on O_DIRECTORY since the parent of a file or directory is a directory. Except if we open an + * O_TMPFILE file, because in that case we are actually create a regular file below the parent directory. */ + + if (FLAGS_SET(flags, O_PATH)) + flags |= O_DIRECTORY; + else if (!FLAGS_SET(flags, O_TMPFILE)) + flags |= O_DIRECTORY|O_RDONLY; + + return RET_NERRNO(open(parent, flags, mode)); +} + +int conservative_renameat( + int olddirfd, const char *oldpath, + int newdirfd, const char *newpath) { + + _cleanup_close_ int old_fd = -1, new_fd = -1; + struct stat old_stat, new_stat; + + /* Renames the old path to thew new path, much like renameat() — except if both are regular files and + * have the exact same contents and basic file attributes already. In that case remove the new file + * instead. This call is useful for reducing inotify wakeups on files that are updated but don't + * actually change. This function is written in a style that we rather rename too often than suppress + * too much. i.e. whenever we are in doubt we rather rename than fail. After all reducing inotify + * events is an optimization only, not more. */ + + old_fd = openat(olddirfd, oldpath, O_CLOEXEC|O_RDONLY|O_NOCTTY|O_NOFOLLOW); + if (old_fd < 0) + goto do_rename; + + new_fd = openat(newdirfd, newpath, O_CLOEXEC|O_RDONLY|O_NOCTTY|O_NOFOLLOW); + if (new_fd < 0) + goto do_rename; + + if (fstat(old_fd, &old_stat) < 0) + goto do_rename; + + if (!S_ISREG(old_stat.st_mode)) + goto do_rename; + + if (fstat(new_fd, &new_stat) < 0) + goto do_rename; + + if (stat_inode_same(&new_stat, &old_stat)) + goto is_same; + + if (old_stat.st_mode != new_stat.st_mode || + old_stat.st_size != new_stat.st_size || + old_stat.st_uid != new_stat.st_uid || + old_stat.st_gid != new_stat.st_gid) + goto do_rename; + + for (;;) { + uint8_t buf1[16*1024]; + uint8_t buf2[sizeof(buf1)]; + ssize_t l1, l2; + + l1 = read(old_fd, buf1, sizeof(buf1)); + if (l1 < 0) + goto do_rename; + + if (l1 == sizeof(buf1)) + /* Read the full block, hence read a full block in the other file too */ + + l2 = read(new_fd, buf2, l1); + else { + assert((size_t) l1 < sizeof(buf1)); + + /* Short read. This hence was the last block in the first file, and then came + * EOF. Read one byte more in the second file, so that we can verify we hit EOF there + * too. */ + + assert((size_t) (l1 + 1) <= sizeof(buf2)); + l2 = read(new_fd, buf2, l1 + 1); + } + if (l2 != l1) + goto do_rename; + + if (memcmp(buf1, buf2, l1) != 0) + goto do_rename; + + if ((size_t) l1 < sizeof(buf1)) /* We hit EOF on the first file, and the second file too, hence exit + * now. */ + break; + } + +is_same: + /* Everything matches? Then don't rename, instead remove the source file, and leave the existing + * destination in place */ + + if (unlinkat(olddirfd, oldpath, 0) < 0) + goto do_rename; + + return 0; + +do_rename: + if (renameat(olddirfd, oldpath, newdirfd, newpath) < 0) + return -errno; + + return 1; +} + +int posix_fallocate_loop(int fd, uint64_t offset, uint64_t size) { + RateLimit rl; + int r; + + r = posix_fallocate(fd, offset, size); /* returns positive errnos on error */ + if (r != EINTR) + return -r; /* Let's return negative errnos, like common in our codebase */ + + /* On EINTR try a couple of times more, but protect against busy looping + * (not more than 16 times per 10s) */ + rl = (RateLimit) { 10 * USEC_PER_SEC, 16 }; + while (ratelimit_below(&rl)) { + r = posix_fallocate(fd, offset, size); + if (r != EINTR) + return -r; + } + + return -EINTR; +} + +int parse_cifs_service( + const char *s, + char **ret_host, + char **ret_service, + char **ret_path) { + + _cleanup_free_ char *h = NULL, *ss = NULL, *x = NULL; + const char *p, *e, *d; + char delimiter; + + /* Parses a CIFS service in form of //host/service/path… and splitting it in three parts. The last + * part is optional, in which case NULL is returned there. To maximize compatibility syntax with + * backslashes instead of slashes is accepted too. */ + + if (!s) + return -EINVAL; + + p = startswith(s, "//"); + if (!p) { + p = startswith(s, "\\\\"); + if (!p) + return -EINVAL; + } + + delimiter = s[0]; + e = strchr(p, delimiter); + if (!e) + return -EINVAL; + + h = strndup(p, e - p); + if (!h) + return -ENOMEM; + + if (!hostname_is_valid(h, 0)) + return -EINVAL; + + e++; + + d = strchrnul(e, delimiter); + + ss = strndup(e, d - e); + if (!ss) + return -ENOMEM; + + if (!filename_is_valid(ss)) + return -EINVAL; + + if (!isempty(d)) { + x = strdup(skip_leading_chars(d, CHAR_TO_STR(delimiter))); + if (!x) + return -EINVAL; + + /* Make sure to convert Windows-style "\" → Unix-style / */ + for (char *i = x; *i; i++) + if (*i == delimiter) + *i = '/'; + + if (!path_is_valid(x)) + return -EINVAL; + + path_simplify(x); + if (!path_is_normalized(x)) + return -EINVAL; + } + + if (ret_host) + *ret_host = TAKE_PTR(h); + if (ret_service) + *ret_service = TAKE_PTR(ss); + if (ret_path) + *ret_path = TAKE_PTR(x); + + return 0; +} + +int open_mkdir_at(int dirfd, const char *path, int flags, mode_t mode) { + _cleanup_close_ int fd = -1, parent_fd = -1; + _cleanup_free_ char *fname = NULL; + bool made; + int r; + + /* Creates a directory with mkdirat() and then opens it, in the "most atomic" fashion we can + * do. Guarantees that the returned fd refers to a directory. If O_EXCL is specified will fail if the + * dir already exists. Otherwise will open an existing dir, but only if it is one. */ + + if (flags & ~(O_RDONLY|O_CLOEXEC|O_DIRECTORY|O_EXCL|O_NOATIME|O_NOFOLLOW|O_PATH)) + return -EINVAL; + if ((flags & O_ACCMODE) != O_RDONLY) + return -EINVAL; + + /* Note that O_DIRECTORY|O_NOFOLLOW is implied, but we allow specifying it anyway. The following + * flags actually make sense to specify: O_CLOEXEC, O_EXCL, O_NOATIME, O_PATH */ + + if (isempty(path)) + return -EINVAL; + + if (!filename_is_valid(path)) { + _cleanup_free_ char *parent = NULL; + + /* If this is not a valid filename, it's a path. Let's open the parent directory then, so + * that we can pin it, and operate below it. */ + + r = path_extract_directory(path, &parent); + if (r < 0) + return r; + + r = path_extract_filename(path, &fname); + if (r < 0) + return r; + + parent_fd = openat(dirfd, parent, O_PATH|O_DIRECTORY|O_CLOEXEC); + if (parent_fd < 0) + return -errno; + + dirfd = parent_fd; + path = fname; + } + + r = RET_NERRNO(mkdirat(dirfd, path, mode)); + if (r == -EEXIST) { + if (FLAGS_SET(flags, O_EXCL)) + return -EEXIST; + + made = false; + } else if (r < 0) + return r; + else + made = true; + + fd = RET_NERRNO(openat(dirfd, path, (flags & ~O_EXCL)|O_DIRECTORY|O_NOFOLLOW)); + if (fd < 0) { + if (fd == -ENOENT) /* We got ENOENT? then someone else immediately removed it after we + * created it. In that case let's return immediately without unlinking + * anything, because there simply isn't anything to unlink anymore. */ + return -ENOENT; + if (fd == -ELOOP) /* is a symlink? exists already → created by someone else, don't unlink */ + return -EEXIST; + if (fd == -ENOTDIR) /* not a directory? exists already → created by someone else, don't unlink */ + return -EEXIST; + + if (made) + (void) unlinkat(dirfd, path, AT_REMOVEDIR); + + return fd; + } + + return TAKE_FD(fd); +} + +int openat_report_new(int dirfd, const char *pathname, int flags, mode_t mode, bool *ret_newly_created) { + unsigned attempts = 7; + int fd; + + /* Just like openat(), but adds one thing: optionally returns whether we created the file anew or if + * it already existed before. This is only relevant if O_CREAT is set without O_EXCL, and thus will + * shortcut to openat() otherwise */ + + if (!ret_newly_created) + return RET_NERRNO(openat(dirfd, pathname, flags, mode)); + + if (!FLAGS_SET(flags, O_CREAT) || FLAGS_SET(flags, O_EXCL)) { + fd = openat(dirfd, pathname, flags, mode); + if (fd < 0) + return -errno; + + *ret_newly_created = FLAGS_SET(flags, O_CREAT); + return fd; + } + + for (;;) { + /* First, attempt to open without O_CREAT/O_EXCL, i.e. open existing file */ + fd = openat(dirfd, pathname, flags & ~(O_CREAT | O_EXCL), mode); + if (fd >= 0) { + *ret_newly_created = false; + return fd; + } + if (errno != ENOENT) + return -errno; + + /* So the file didn't exist yet, hence create it with O_CREAT/O_EXCL. */ + fd = openat(dirfd, pathname, flags | O_CREAT | O_EXCL, mode); + if (fd >= 0) { + *ret_newly_created = true; + return fd; + } + if (errno != EEXIST) + return -errno; + + /* Hmm, so now we got EEXIST? So it apparently exists now? If so, let's try to open again + * without the two flags. But let's not spin forever, hence put a limit on things */ + + if (--attempts == 0) /* Give up eventually, somebody is playing with us */ + return -EEXIST; + } +} diff --git a/src/basic/fs-util.h b/src/basic/fs-util.h new file mode 100644 index 0000000..c4dffc4 --- /dev/null +++ b/src/basic/fs-util.h @@ -0,0 +1,128 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "alloc-util.h" +#include "errno-util.h" +#include "time-util.h" +#include "user-util.h" + +#define MODE_INVALID ((mode_t) -1) + +/* The following macros add 1 when converting things, since 0 is a valid mode, while the pointer + * NULL is special */ +#define PTR_TO_MODE(p) ((mode_t) ((uintptr_t) (p)-1)) +#define MODE_TO_PTR(u) ((void *) ((uintptr_t) (u)+1)) + +int unlink_noerrno(const char *path); + +int rmdir_parents(const char *path, const char *stop); + +int rename_noreplace(int olddirfd, const char *oldpath, int newdirfd, const char *newpath); + +int readlinkat_malloc(int fd, const char *p, char **ret); +int readlink_malloc(const char *p, char **r); +int readlink_value(const char *p, char **ret); +int readlink_and_make_absolute(const char *p, char **r); + +int chmod_and_chown(const char *path, mode_t mode, uid_t uid, gid_t gid); +int fchmod_and_chown_with_fallback(int fd, const char *path, mode_t mode, uid_t uid, gid_t gid); +static inline int fchmod_and_chown(int fd, mode_t mode, uid_t uid, gid_t gid) { + return fchmod_and_chown_with_fallback(fd, NULL, mode, uid, gid); /* no fallback */ +} + +int fchmod_umask(int fd, mode_t mode); +int fchmod_opath(int fd, mode_t m); + +int futimens_opath(int fd, const struct timespec ts[2]); + +int fd_warn_permissions(const char *path, int fd); +int stat_warn_permissions(const char *path, const struct stat *st); + +#define laccess(path, mode) \ + RET_NERRNO(faccessat(AT_FDCWD, (path), (mode), AT_SYMLINK_NOFOLLOW)) + +int touch_file(const char *path, bool parents, usec_t stamp, uid_t uid, gid_t gid, mode_t mode); + +static inline int touch(const char *path) { + return touch_file(path, false, USEC_INFINITY, UID_INVALID, GID_INVALID, MODE_INVALID); +} + +int symlink_idempotent(const char *from, const char *to, bool make_relative); + +int symlinkat_atomic_full(const char *from, int atfd, const char *to, bool make_relative); +static inline int symlink_atomic(const char *from, const char *to) { + return symlinkat_atomic_full(from, AT_FDCWD, to, false); +} + +int mknodat_atomic(int atfd, const char *path, mode_t mode, dev_t dev); +static inline int mknod_atomic(const char *path, mode_t mode, dev_t dev) { + return mknodat_atomic(AT_FDCWD, path, mode, dev); +} + +int mkfifoat_atomic(int dir_fd, const char *path, mode_t mode); +static inline int mkfifo_atomic(const char *path, mode_t mode) { + return mkfifoat_atomic(AT_FDCWD, path, mode); +} + +int get_files_in_directory(const char *path, char ***list); + +int tmp_dir(const char **ret); +int var_tmp_dir(const char **ret); + +int unlink_or_warn(const char *filename); + +/* Useful for usage with _cleanup_(), removes a directory and frees the pointer */ +static inline char *rmdir_and_free(char *p) { + PROTECT_ERRNO; + + if (!p) + return NULL; + + (void) rmdir(p); + return mfree(p); +} +DEFINE_TRIVIAL_CLEANUP_FUNC(char*, rmdir_and_free); + +static inline char* unlink_and_free(char *p) { + if (!p) + return NULL; + + (void) unlink_noerrno(p); + return mfree(p); +} +DEFINE_TRIVIAL_CLEANUP_FUNC(char*, unlink_and_free); + +int access_fd(int fd, int mode); + +void unlink_tempfilep(char (*p)[]); + +typedef enum UnlinkDeallocateFlags { + UNLINK_REMOVEDIR = 1 << 0, + UNLINK_ERASE = 1 << 1, +} UnlinkDeallocateFlags; + +int unlinkat_deallocate(int fd, const char *name, UnlinkDeallocateFlags flags); + +int open_parent(const char *path, int flags, mode_t mode); + +int conservative_renameat(int olddirfd, const char *oldpath, int newdirfd, const char *newpath); +static inline int conservative_rename(const char *oldpath, const char *newpath) { + return conservative_renameat(AT_FDCWD, oldpath, AT_FDCWD, newpath); +} + +int posix_fallocate_loop(int fd, uint64_t offset, uint64_t size); + +int parse_cifs_service(const char *s, char **ret_host, char **ret_service, char **ret_path); + +int open_mkdir_at(int dirfd, const char *path, int flags, mode_t mode); + +int openat_report_new(int dirfd, const char *pathname, int flags, mode_t mode, bool *ret_newly_created); diff --git a/src/basic/gcrypt-util.c b/src/basic/gcrypt-util.c new file mode 100644 index 0000000..41c9362 --- /dev/null +++ b/src/basic/gcrypt-util.c @@ -0,0 +1,54 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#if HAVE_GCRYPT + +#include "gcrypt-util.h" +#include "hexdecoct.h" + +void initialize_libgcrypt(bool secmem) { + if (gcry_control(GCRYCTL_INITIALIZATION_FINISHED_P)) + return; + + gcry_control(GCRYCTL_SET_PREFERRED_RNG_TYPE, GCRY_RNG_TYPE_SYSTEM); + assert_se(gcry_check_version("1.4.5")); + + /* Turn off "secmem". Clients which wish to make use of this + * feature should initialize the library manually */ + if (!secmem) + gcry_control(GCRYCTL_DISABLE_SECMEM); + + gcry_control(GCRYCTL_INITIALIZATION_FINISHED, 0); +} + +# if !PREFER_OPENSSL +int string_hashsum(const char *s, size_t len, int md_algorithm, char **out) { + _cleanup_(gcry_md_closep) gcry_md_hd_t md = NULL; + gcry_error_t err; + size_t hash_size; + void *hash; + char *enc; + + initialize_libgcrypt(false); + + hash_size = gcry_md_get_algo_dlen(md_algorithm); + assert(hash_size > 0); + + err = gcry_md_open(&md, md_algorithm, 0); + if (gcry_err_code(err) != GPG_ERR_NO_ERROR || !md) + return -EIO; + + gcry_md_write(md, s, len); + + hash = gcry_md_read(md, 0); + if (!hash) + return -EIO; + + enc = hexmem(hash, hash_size); + if (!enc) + return -ENOMEM; + + *out = enc; + return 0; +} +# endif +#endif diff --git a/src/basic/gcrypt-util.h b/src/basic/gcrypt-util.h new file mode 100644 index 0000000..4c40cef --- /dev/null +++ b/src/basic/gcrypt-util.h @@ -0,0 +1,39 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#pragma once + +#include +#include +#include + +#if HAVE_GCRYPT +#include + +#include "macro.h" + +void initialize_libgcrypt(bool secmem); + +DEFINE_TRIVIAL_CLEANUP_FUNC_FULL(gcry_md_hd_t, gcry_md_close, NULL); +#endif + +#if !PREFER_OPENSSL +# if HAVE_GCRYPT +int string_hashsum(const char *s, size_t len, int md_algorithm, char **out); +# endif + +static inline int string_hashsum_sha224(const char *s, size_t len, char **out) { +# if HAVE_GCRYPT + return string_hashsum(s, len, GCRY_MD_SHA224, out); +# else + return -EOPNOTSUPP; +# endif +} + +static inline int string_hashsum_sha256(const char *s, size_t len, char **out) { +# if HAVE_GCRYPT + return string_hashsum(s, len, GCRY_MD_SHA256, out); +# else + return -EOPNOTSUPP; +# endif +} +#endif diff --git a/src/basic/generate-af-list.sh b/src/basic/generate-af-list.sh new file mode 100755 index 0000000..b081485 --- /dev/null +++ b/src/basic/generate-af-list.sh @@ -0,0 +1,8 @@ +#!/usr/bin/env bash +# SPDX-License-Identifier: LGPL-2.1-or-later +set -eu +set -o pipefail + +${1:?} -E -dM -include sys/socket.h -include "${2:?}" -include "${3:?}" - +#include +#include +#include + +#include "dirent-util.h" +#include "errno-util.h" +#include "glob-util.h" +#include "macro.h" +#include "path-util.h" +#include "strv.h" + +static void closedir_wrapper(void* v) { + (void) closedir(v); +} + +int safe_glob(const char *path, int flags, glob_t *pglob) { + int k; + + /* We want to set GLOB_ALTDIRFUNC ourselves, don't allow it to be set. */ + assert(!(flags & GLOB_ALTDIRFUNC)); + + if (!pglob->gl_closedir) + pglob->gl_closedir = closedir_wrapper; + if (!pglob->gl_readdir) + pglob->gl_readdir = (struct dirent *(*)(void *)) readdir_no_dot; + if (!pglob->gl_opendir) + pglob->gl_opendir = (void *(*)(const char *)) opendir; + if (!pglob->gl_lstat) + pglob->gl_lstat = lstat; + if (!pglob->gl_stat) + pglob->gl_stat = stat; + + errno = 0; + k = glob(path, flags | GLOB_ALTDIRFUNC, NULL, pglob); + if (k == GLOB_NOMATCH) + return -ENOENT; + if (k == GLOB_NOSPACE) + return -ENOMEM; + if (k != 0) + return errno_or_else(EIO); + if (strv_isempty(pglob->gl_pathv)) + return -ENOENT; + + return 0; +} + +int glob_first(const char *path, char **ret_first) { + _cleanup_globfree_ glob_t g = {}; + int k; + + assert(path); + + k = safe_glob(path, GLOB_NOSORT|GLOB_BRACE, &g); + if (k == -ENOENT) { + if (ret_first) + *ret_first = NULL; + return false; + } + if (k < 0) + return k; + + if (ret_first) { + char *first = strdup(g.gl_pathv[0]); + if (!first) + return log_oom_debug(); + *ret_first = first; + } + + return true; +} + +int glob_extend(char ***strv, const char *path, int flags) { + _cleanup_globfree_ glob_t g = {}; + int k; + + k = safe_glob(path, GLOB_NOSORT|GLOB_BRACE|flags, &g); + if (k < 0) + return k; + + return strv_extend_strv(strv, g.gl_pathv, false); +} + +int glob_non_glob_prefix(const char *path, char **ret) { + /* Return the path of the path that has no glob characters. */ + + size_t n = strcspn(path, GLOB_CHARS); + + if (path[n] != '\0') + while (n > 0 && path[n-1] != '/') + n--; + + if (n == 0) + return -ENOENT; + + char *ans = strndup(path, n); + if (!ans) + return -ENOMEM; + *ret = ans; + return 0; +} diff --git a/src/basic/glob-util.h b/src/basic/glob-util.h new file mode 100644 index 0000000..7ca26cc --- /dev/null +++ b/src/basic/glob-util.h @@ -0,0 +1,25 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +#include +#include + +#include "macro.h" +#include "string-util.h" + +/* Note: this function modifies pglob to set various functions. */ +int safe_glob(const char *path, int flags, glob_t *pglob); + +/* Note: which match is returned depends on the implementation/system and not guaranteed to be stable */ +int glob_first(const char *path, char **ret_first); +#define glob_exists(path) glob_first(path, NULL) +int glob_extend(char ***strv, const char *path, int flags); + +int glob_non_glob_prefix(const char *path, char **ret); + +#define _cleanup_globfree_ _cleanup_(globfree) + +_pure_ static inline bool string_is_glob(const char *p) { + /* Check if a string contains any glob patterns. */ + return !!strpbrk(p, GLOB_CHARS); +} diff --git a/src/basic/glyph-util.c b/src/basic/glyph-util.c new file mode 100644 index 0000000..7e26edb --- /dev/null +++ b/src/basic/glyph-util.c @@ -0,0 +1,139 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include "env-util.h" +#include "glyph-util.h" +#include "locale-util.h" +#include "strv.h" + +bool emoji_enabled(void) { + static int cached_emoji_enabled = -1; + + if (cached_emoji_enabled < 0) { + int val = getenv_bool("SYSTEMD_EMOJI"); + if (val >= 0) + return (cached_emoji_enabled = val); + + const char *term = getenv("TERM"); + if (!term || STR_IN_SET(term, "dumb", "linux")) + return (cached_emoji_enabled = false); + + cached_emoji_enabled = is_locale_utf8(); + } + + return cached_emoji_enabled; +} + +const char *special_glyph(SpecialGlyph code) { + + /* A list of a number of interesting unicode glyphs we can use to decorate our output. It's probably wise to be + * conservative here, and primarily stick to the glyphs defined in the eurlatgr font, so that display still + * works reasonably well on the Linux console. For details see: + * + * http://git.altlinux.org/people/legion/packages/kbd.git?p=kbd.git;a=blob;f=data/consolefonts/README.eurlatgr + */ + + static const char* const draw_table[2][_SPECIAL_GLYPH_MAX] = { + /* ASCII fallback */ + [false] = { + [SPECIAL_GLYPH_TREE_VERTICAL] = "| ", + [SPECIAL_GLYPH_TREE_BRANCH] = "|-", + [SPECIAL_GLYPH_TREE_RIGHT] = "`-", + [SPECIAL_GLYPH_TREE_SPACE] = " ", + [SPECIAL_GLYPH_TREE_TOP] = ",-", + [SPECIAL_GLYPH_VERTICAL_DOTTED] = ":", + [SPECIAL_GLYPH_TRIANGULAR_BULLET] = ">", + [SPECIAL_GLYPH_BLACK_CIRCLE] = "*", + [SPECIAL_GLYPH_WHITE_CIRCLE] = "*", + [SPECIAL_GLYPH_MULTIPLICATION_SIGN] = "x", + [SPECIAL_GLYPH_CIRCLE_ARROW] = "*", + [SPECIAL_GLYPH_BULLET] = "*", + [SPECIAL_GLYPH_MU] = "u", + [SPECIAL_GLYPH_CHECK_MARK] = "+", + [SPECIAL_GLYPH_CROSS_MARK] = "-", + [SPECIAL_GLYPH_LIGHT_SHADE] = "-", + [SPECIAL_GLYPH_DARK_SHADE] = "X", + [SPECIAL_GLYPH_SIGMA] = "S", + [SPECIAL_GLYPH_ARROW_LEFT] = "<-", + [SPECIAL_GLYPH_ARROW_RIGHT] = "->", + [SPECIAL_GLYPH_ARROW_UP] = "^", + [SPECIAL_GLYPH_ARROW_DOWN] = "v", + [SPECIAL_GLYPH_ELLIPSIS] = "...", + [SPECIAL_GLYPH_EXTERNAL_LINK] = "[LNK]", + [SPECIAL_GLYPH_ECSTATIC_SMILEY] = ":-]", + [SPECIAL_GLYPH_HAPPY_SMILEY] = ":-}", + [SPECIAL_GLYPH_SLIGHTLY_HAPPY_SMILEY] = ":-)", + [SPECIAL_GLYPH_NEUTRAL_SMILEY] = ":-|", + [SPECIAL_GLYPH_SLIGHTLY_UNHAPPY_SMILEY] = ":-(", + [SPECIAL_GLYPH_UNHAPPY_SMILEY] = ":-{", + [SPECIAL_GLYPH_DEPRESSED_SMILEY] = ":-[", + [SPECIAL_GLYPH_LOCK_AND_KEY] = "o-,", + [SPECIAL_GLYPH_TOUCH] = "O=", /* Yeah, not very convincing, can you do it better? */ + [SPECIAL_GLYPH_RECYCLING] = "~", + [SPECIAL_GLYPH_DOWNLOAD] = "\\", + [SPECIAL_GLYPH_SPARKLES] = "*", + }, + + /* UTF-8 */ + [true] = { + /* The following are multiple glyphs in both ASCII and in UNICODE */ + [SPECIAL_GLYPH_TREE_VERTICAL] = u8"│ ", + [SPECIAL_GLYPH_TREE_BRANCH] = u8"├─", + [SPECIAL_GLYPH_TREE_RIGHT] = u8"└─", + [SPECIAL_GLYPH_TREE_SPACE] = u8" ", + [SPECIAL_GLYPH_TREE_TOP] = u8"┌─", + + /* Single glyphs in both cases */ + [SPECIAL_GLYPH_VERTICAL_DOTTED] = u8"┆", + [SPECIAL_GLYPH_TRIANGULAR_BULLET] = u8"‣", + [SPECIAL_GLYPH_BLACK_CIRCLE] = u8"●", + [SPECIAL_GLYPH_WHITE_CIRCLE] = u8"○", + [SPECIAL_GLYPH_MULTIPLICATION_SIGN] = u8"×", + [SPECIAL_GLYPH_CIRCLE_ARROW] = u8"↻", + [SPECIAL_GLYPH_BULLET] = u8"•", + [SPECIAL_GLYPH_MU] = u8"μ", /* actually called: GREEK SMALL LETTER MU */ + [SPECIAL_GLYPH_CHECK_MARK] = u8"✓", + [SPECIAL_GLYPH_CROSS_MARK] = u8"✗", /* actually called: BALLOT X */ + [SPECIAL_GLYPH_LIGHT_SHADE] = u8"░", + [SPECIAL_GLYPH_DARK_SHADE] = u8"▒", + [SPECIAL_GLYPH_SIGMA] = u8"Σ", + [SPECIAL_GLYPH_ARROW_UP] = u8"↑", /* actually called: UPWARDS ARROW */ + [SPECIAL_GLYPH_ARROW_DOWN] = u8"↓", /* actually called: DOWNWARDS ARROW */ + + /* Single glyph in Unicode, two in ASCII */ + [SPECIAL_GLYPH_ARROW_LEFT] = u8"←", /* actually called: LEFTWARDS ARROW */ + [SPECIAL_GLYPH_ARROW_RIGHT] = u8"→", /* actually called: RIGHTWARDS ARROW */ + + /* Single glyph in Unicode, three in ASCII */ + [SPECIAL_GLYPH_ELLIPSIS] = u8"…", /* actually called: HORIZONTAL ELLIPSIS */ + + /* Three glyphs in Unicode, five in ASCII */ + [SPECIAL_GLYPH_EXTERNAL_LINK] = u8"[🡕]", /* actually called: NORTH EAST SANS-SERIF ARROW, enclosed in [] */ + + /* These smileys are a single glyph in Unicode, and three in ASCII */ + [SPECIAL_GLYPH_ECSTATIC_SMILEY] = u8"😇", /* actually called: SMILING FACE WITH HALO */ + [SPECIAL_GLYPH_HAPPY_SMILEY] = u8"😀", /* actually called: GRINNING FACE */ + [SPECIAL_GLYPH_SLIGHTLY_HAPPY_SMILEY] = u8"🙂", /* actually called: SLIGHTLY SMILING FACE */ + [SPECIAL_GLYPH_NEUTRAL_SMILEY] = u8"😐", /* actually called: NEUTRAL FACE */ + [SPECIAL_GLYPH_SLIGHTLY_UNHAPPY_SMILEY] = u8"🙁", /* actually called: SLIGHTLY FROWNING FACE */ + [SPECIAL_GLYPH_UNHAPPY_SMILEY] = u8"😨", /* actually called: FEARFUL FACE */ + [SPECIAL_GLYPH_DEPRESSED_SMILEY] = u8"🤢", /* actually called: NAUSEATED FACE */ + + /* This emoji is a single character cell glyph in Unicode, and three in ASCII */ + [SPECIAL_GLYPH_LOCK_AND_KEY] = u8"🔐", /* actually called: CLOSED LOCK WITH KEY */ + + /* This emoji is a single character cell glyph in Unicode, and two in ASCII */ + [SPECIAL_GLYPH_TOUCH] = u8"👆", /* actually called: BACKHAND INDEX POINTING UP */ + + /* These three emojis are single character cell glyphs in Unicode and also in ASCII. */ + [SPECIAL_GLYPH_RECYCLING] = u8"♻️", /* actually called: UNIVERSAL RECYCLNG SYMBOL */ + [SPECIAL_GLYPH_DOWNLOAD] = u8"⤵️", /* actually called: RIGHT ARROW CURVING DOWN */ + [SPECIAL_GLYPH_SPARKLES] = u8"✨", + }, + }; + + if (code < 0) + return NULL; + + assert(code < _SPECIAL_GLYPH_MAX); + return draw_table[code >= _SPECIAL_GLYPH_FIRST_EMOJI ? emoji_enabled() : is_locale_utf8()][code]; +} diff --git a/src/basic/glyph-util.h b/src/basic/glyph-util.h new file mode 100644 index 0000000..621d7a8 --- /dev/null +++ b/src/basic/glyph-util.h @@ -0,0 +1,61 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +#include +#include + +#include "macro.h" + +typedef enum SpecialGlyph { + SPECIAL_GLYPH_TREE_VERTICAL, + SPECIAL_GLYPH_TREE_BRANCH, + SPECIAL_GLYPH_TREE_RIGHT, + SPECIAL_GLYPH_TREE_SPACE, + SPECIAL_GLYPH_TREE_TOP, + SPECIAL_GLYPH_VERTICAL_DOTTED, + SPECIAL_GLYPH_TRIANGULAR_BULLET, + SPECIAL_GLYPH_BLACK_CIRCLE, + SPECIAL_GLYPH_WHITE_CIRCLE, + SPECIAL_GLYPH_MULTIPLICATION_SIGN, + SPECIAL_GLYPH_CIRCLE_ARROW, + SPECIAL_GLYPH_BULLET, + SPECIAL_GLYPH_MU, + SPECIAL_GLYPH_CHECK_MARK, + SPECIAL_GLYPH_CROSS_MARK, + SPECIAL_GLYPH_ARROW_LEFT, + SPECIAL_GLYPH_ARROW_RIGHT, + SPECIAL_GLYPH_ARROW_UP, + SPECIAL_GLYPH_ARROW_DOWN, + SPECIAL_GLYPH_ELLIPSIS, + SPECIAL_GLYPH_LIGHT_SHADE, + SPECIAL_GLYPH_DARK_SHADE, + SPECIAL_GLYPH_SIGMA, + SPECIAL_GLYPH_EXTERNAL_LINK, + _SPECIAL_GLYPH_FIRST_EMOJI, + SPECIAL_GLYPH_ECSTATIC_SMILEY = _SPECIAL_GLYPH_FIRST_EMOJI, + SPECIAL_GLYPH_HAPPY_SMILEY, + SPECIAL_GLYPH_SLIGHTLY_HAPPY_SMILEY, + SPECIAL_GLYPH_NEUTRAL_SMILEY, + SPECIAL_GLYPH_SLIGHTLY_UNHAPPY_SMILEY, + SPECIAL_GLYPH_UNHAPPY_SMILEY, + SPECIAL_GLYPH_DEPRESSED_SMILEY, + SPECIAL_GLYPH_LOCK_AND_KEY, + SPECIAL_GLYPH_TOUCH, + SPECIAL_GLYPH_RECYCLING, + SPECIAL_GLYPH_DOWNLOAD, + SPECIAL_GLYPH_SPARKLES, + _SPECIAL_GLYPH_MAX, + _SPECIAL_GLYPH_INVALID = -EINVAL, +} SpecialGlyph; + +const char *special_glyph(SpecialGlyph code) _const_; + +bool emoji_enabled(void); + +static inline const char *special_glyph_check_mark(bool b) { + return b ? special_glyph(SPECIAL_GLYPH_CHECK_MARK) : special_glyph(SPECIAL_GLYPH_CROSS_MARK); +} + +static inline const char *special_glyph_check_mark_space(bool b) { + return b ? special_glyph(SPECIAL_GLYPH_CHECK_MARK) : " "; +} diff --git a/src/basic/gunicode.c b/src/basic/gunicode.c new file mode 100644 index 0000000..36beb95 --- /dev/null +++ b/src/basic/gunicode.c @@ -0,0 +1,111 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +/* gunicode.c - Unicode manipulation functions + * + * Copyright (C) 1999, 2000 Tom Tromey + * Copyright © 2000, 2005 Red Hat, Inc. + */ + +#include "gunicode.h" + +#define unichar uint32_t + +/** + * g_utf8_prev_char: + * @p: a pointer to a position within a UTF-8 encoded string + * + * Finds the previous UTF-8 character in the string before @p. + * + * @p does not have to be at the beginning of a UTF-8 character. No check + * is made to see if the character found is actually valid other than + * it starts with an appropriate byte. If @p might be the first + * character of the string, you must use g_utf8_find_prev_char() instead. + * + * Return value: a pointer to the found character. + **/ +char * +utf8_prev_char (const char *p) +{ + for (;;) + { + p--; + if ((*p & 0xc0) != 0x80) + return (char *)p; + } +} + +struct Interval +{ + unichar start, end; +}; + +static int +interval_compare (const void *key, const void *elt) +{ + unichar c = (unichar) (long) (key); + struct Interval *interval = (struct Interval *)elt; + + if (c < interval->start) + return -1; + if (c > interval->end) + return +1; + + return 0; +} + +/* + * NOTE: + * + * The tables for g_unichar_iswide() and g_unichar_iswide_cjk() are + * generated from the Unicode Character Database's file + * extracted/DerivedEastAsianWidth.txt using the gen-iswide-table.py + * in this way: + * + * ./gen-iswide-table.py < path/to/ucd/extracted/DerivedEastAsianWidth.txt | fmt + * + * Last update for Unicode 6.0. + */ + +/** + * g_unichar_iswide: + * @c: a Unicode character + * + * Determines if a character is typically rendered in a double-width + * cell. + * + * Return value: %TRUE if the character is wide + **/ +bool +unichar_iswide (unichar c) +{ + /* See NOTE earlier for how to update this table. */ + static const struct Interval wide[] = { + {0x1100, 0x115F}, {0x2329, 0x232A}, {0x2E80, 0x2E99}, {0x2E9B, 0x2EF3}, + {0x2F00, 0x2FD5}, {0x2FF0, 0x2FFB}, {0x3000, 0x303E}, {0x3041, 0x3096}, + {0x3099, 0x30FF}, {0x3105, 0x312D}, {0x3131, 0x318E}, {0x3190, 0x31BA}, + {0x31C0, 0x31E3}, {0x31F0, 0x321E}, {0x3220, 0x3247}, {0x3250, 0x32FE}, + {0x3300, 0x4DBF}, {0x4E00, 0xA48C}, {0xA490, 0xA4C6}, {0xA960, 0xA97C}, + {0xAC00, 0xD7A3}, {0xF900, 0xFAFF}, {0xFE10, 0xFE19}, {0xFE30, 0xFE52}, + {0xFE54, 0xFE66}, {0xFE68, 0xFE6B}, {0xFF01, 0xFF60}, {0xFFE0, 0xFFE6}, + {0x1B000, 0x1B001}, {0x1F200, 0x1F202}, {0x1F210, 0x1F23A}, + {0x1F240, 0x1F248}, {0x1F250, 0x1F251}, + {0x1F300, 0x1F567}, /* Miscellaneous Symbols and Pictographs */ + {0x20000, 0x2FFFD}, {0x30000, 0x3FFFD}, + }; + + if (bsearch ((void *)(uintptr_t)c, wide, (sizeof (wide) / sizeof ((wide)[0])), sizeof wide[0], + interval_compare)) + return true; + + return false; +} + +const char utf8_skip_data[256] = { + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, + 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,5,5,5,5,6,6,1,1 +}; diff --git a/src/basic/gunicode.h b/src/basic/gunicode.h new file mode 100644 index 0000000..6b71839 --- /dev/null +++ b/src/basic/gunicode.h @@ -0,0 +1,30 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +/* gunicode.h - Unicode manipulation functions + * + * Copyright (C) 1999, 2000 Tom Tromey + * Copyright © 2000, 2005 Red Hat, Inc. + */ +#pragma once + +#include +#include +#include + +char *utf8_prev_char (const char *p); + +extern const char utf8_skip_data[256]; + +/** + * g_utf8_next_char: + * @p: Pointer to the start of a valid UTF-8 character + * + * Skips to the next character in a UTF-8 string. The string must be + * valid; this macro is as fast as possible, and has no error-checking. + * You would use this macro to iterate over a string character by + * character. The macro returns the start of the next UTF-8 character. + * Before using this macro, use g_utf8_validate() to validate strings + * that may contain invalid UTF-8. + */ +#define utf8_next_char(p) (char *)((p) + utf8_skip_data[*(const unsigned char *)(p)]) + +bool unichar_iswide (uint32_t c); diff --git a/src/basic/hash-funcs.c b/src/basic/hash-funcs.c new file mode 100644 index 0000000..5fac467 --- /dev/null +++ b/src/basic/hash-funcs.c @@ -0,0 +1,121 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include + +#include "hash-funcs.h" +#include "path-util.h" +#include "strv.h" + +void string_hash_func(const char *p, struct siphash *state) { + siphash24_compress(p, strlen(p) + 1, state); +} + +DEFINE_HASH_OPS(string_hash_ops, char, string_hash_func, string_compare_func); +DEFINE_HASH_OPS_WITH_KEY_DESTRUCTOR(string_hash_ops_free, + char, string_hash_func, string_compare_func, free); +DEFINE_HASH_OPS_FULL(string_hash_ops_free_free, + char, string_hash_func, string_compare_func, free, + void, free); +DEFINE_HASH_OPS_FULL(string_hash_ops_free_strv_free, + char, string_hash_func, string_compare_func, free, + char*, strv_free); + +void path_hash_func(const char *q, struct siphash *state) { + bool add_slash = false; + + assert(q); + assert(state); + + /* Calculates a hash for a path in a way this duplicate inner slashes don't make a differences, and also + * whether there's a trailing slash or not. This fits well with the semantics of path_compare(), which does + * similar checks and also doesn't care for trailing slashes. Note that relative and absolute paths (i.e. those + * which begin in a slash or not) will hash differently though. */ + + /* if path is absolute, add one "/" to the hash. */ + if (path_is_absolute(q)) + siphash24_compress("/", 1, state); + + for (;;) { + const char *e; + int r; + + r = path_find_first_component(&q, true, &e); + if (r == 0) + return; + + if (add_slash) + siphash24_compress_byte('/', state); + + if (r < 0) { + /* if a component is invalid, then add remaining part as a string. */ + string_hash_func(q, state); + return; + } + + /* Add this component to the hash. */ + siphash24_compress(e, r, state); + + add_slash = true; + } +} + +DEFINE_HASH_OPS(path_hash_ops, char, path_hash_func, path_compare); +DEFINE_HASH_OPS_WITH_KEY_DESTRUCTOR(path_hash_ops_free, + char, path_hash_func, path_compare, free); +DEFINE_HASH_OPS_FULL(path_hash_ops_free_free, + char, path_hash_func, path_compare, free, + void, free); + +void trivial_hash_func(const void *p, struct siphash *state) { + siphash24_compress(&p, sizeof(p), state); +} + +int trivial_compare_func(const void *a, const void *b) { + return CMP(a, b); +} + +const struct hash_ops trivial_hash_ops = { + .hash = trivial_hash_func, + .compare = trivial_compare_func, +}; + +const struct hash_ops trivial_hash_ops_free = { + .hash = trivial_hash_func, + .compare = trivial_compare_func, + .free_key = free, +}; + +const struct hash_ops trivial_hash_ops_free_free = { + .hash = trivial_hash_func, + .compare = trivial_compare_func, + .free_key = free, + .free_value = free, +}; + +void uint64_hash_func(const uint64_t *p, struct siphash *state) { + siphash24_compress(p, sizeof(uint64_t), state); +} + +int uint64_compare_func(const uint64_t *a, const uint64_t *b) { + return CMP(*a, *b); +} + +DEFINE_HASH_OPS(uint64_hash_ops, uint64_t, uint64_hash_func, uint64_compare_func); + +#if SIZEOF_DEV_T != 8 +void devt_hash_func(const dev_t *p, struct siphash *state) { + siphash24_compress(p, sizeof(dev_t), state); +} +#endif + +int devt_compare_func(const dev_t *a, const dev_t *b) { + int r; + + r = CMP(major(*a), major(*b)); + if (r != 0) + return r; + + return CMP(minor(*a), minor(*b)); +} + +DEFINE_HASH_OPS(devt_hash_ops, dev_t, devt_hash_func, devt_compare_func); diff --git a/src/basic/hash-funcs.h b/src/basic/hash-funcs.h new file mode 100644 index 0000000..be64289 --- /dev/null +++ b/src/basic/hash-funcs.h @@ -0,0 +1,111 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +#include "alloc-util.h" +#include "macro.h" +#include "siphash24.h" + +typedef void (*hash_func_t)(const void *p, struct siphash *state); +typedef int (*compare_func_t)(const void *a, const void *b); + +struct hash_ops { + hash_func_t hash; + compare_func_t compare; + free_func_t free_key; + free_func_t free_value; +}; + +#define _DEFINE_HASH_OPS(uq, name, type, hash_func, compare_func, free_key_func, free_value_func, scope) \ + _unused_ static void (* UNIQ_T(static_hash_wrapper, uq))(const type *, struct siphash *) = hash_func; \ + _unused_ static int (* UNIQ_T(static_compare_wrapper, uq))(const type *, const type *) = compare_func; \ + scope const struct hash_ops name = { \ + .hash = (hash_func_t) hash_func, \ + .compare = (compare_func_t) compare_func, \ + .free_key = free_key_func, \ + .free_value = free_value_func, \ + } + +#define _DEFINE_FREE_FUNC(uq, type, wrapper_name, func) \ + /* Type-safe free function */ \ + static void UNIQ_T(wrapper_name, uq)(void *a) { \ + type *_a = a; \ + func(_a); \ + } + +#define _DEFINE_HASH_OPS_WITH_KEY_DESTRUCTOR(uq, name, type, hash_func, compare_func, free_func, scope) \ + _DEFINE_FREE_FUNC(uq, type, static_free_wrapper, free_func); \ + _DEFINE_HASH_OPS(uq, name, type, hash_func, compare_func, \ + UNIQ_T(static_free_wrapper, uq), NULL, scope) + +#define _DEFINE_HASH_OPS_WITH_VALUE_DESTRUCTOR(uq, name, type, hash_func, compare_func, type_value, free_func, scope) \ + _DEFINE_FREE_FUNC(uq, type_value, static_free_wrapper, free_func); \ + _DEFINE_HASH_OPS(uq, name, type, hash_func, compare_func, \ + NULL, UNIQ_T(static_free_wrapper, uq), scope) + +#define _DEFINE_HASH_OPS_FULL(uq, name, type, hash_func, compare_func, free_key_func, type_value, free_value_func, scope) \ + _DEFINE_FREE_FUNC(uq, type, static_free_key_wrapper, free_key_func); \ + _DEFINE_FREE_FUNC(uq, type_value, static_free_value_wrapper, free_value_func); \ + _DEFINE_HASH_OPS(uq, name, type, hash_func, compare_func, \ + UNIQ_T(static_free_key_wrapper, uq), \ + UNIQ_T(static_free_value_wrapper, uq), scope) + +#define DEFINE_HASH_OPS(name, type, hash_func, compare_func) \ + _DEFINE_HASH_OPS(UNIQ, name, type, hash_func, compare_func, NULL, NULL,) + +#define DEFINE_PRIVATE_HASH_OPS(name, type, hash_func, compare_func) \ + _DEFINE_HASH_OPS(UNIQ, name, type, hash_func, compare_func, NULL, NULL, static) + +#define DEFINE_HASH_OPS_WITH_KEY_DESTRUCTOR(name, type, hash_func, compare_func, free_func) \ + _DEFINE_HASH_OPS_WITH_KEY_DESTRUCTOR(UNIQ, name, type, hash_func, compare_func, free_func,) + +#define DEFINE_PRIVATE_HASH_OPS_WITH_KEY_DESTRUCTOR(name, type, hash_func, compare_func, free_func) \ + _DEFINE_HASH_OPS_WITH_KEY_DESTRUCTOR(UNIQ, name, type, hash_func, compare_func, free_func, static) + +#define DEFINE_HASH_OPS_WITH_VALUE_DESTRUCTOR(name, type, hash_func, compare_func, value_type, free_func) \ + _DEFINE_HASH_OPS_WITH_VALUE_DESTRUCTOR(UNIQ, name, type, hash_func, compare_func, value_type, free_func,) + +#define DEFINE_PRIVATE_HASH_OPS_WITH_VALUE_DESTRUCTOR(name, type, hash_func, compare_func, value_type, free_func) \ + _DEFINE_HASH_OPS_WITH_VALUE_DESTRUCTOR(UNIQ, name, type, hash_func, compare_func, value_type, free_func, static) + +#define DEFINE_HASH_OPS_FULL(name, type, hash_func, compare_func, free_key_func, value_type, free_value_func) \ + _DEFINE_HASH_OPS_FULL(UNIQ, name, type, hash_func, compare_func, free_key_func, value_type, free_value_func,) + +#define DEFINE_PRIVATE_HASH_OPS_FULL(name, type, hash_func, compare_func, free_key_func, value_type, free_value_func) \ + _DEFINE_HASH_OPS_FULL(UNIQ, name, type, hash_func, compare_func, free_key_func, value_type, free_value_func, static) + +void string_hash_func(const char *p, struct siphash *state); +#define string_compare_func strcmp +extern const struct hash_ops string_hash_ops; +extern const struct hash_ops string_hash_ops_free; +extern const struct hash_ops string_hash_ops_free_free; +extern const struct hash_ops string_hash_ops_free_strv_free; + +void path_hash_func(const char *p, struct siphash *state); +extern const struct hash_ops path_hash_ops; +extern const struct hash_ops path_hash_ops_free; +extern const struct hash_ops path_hash_ops_free_free; + +/* This will compare the passed pointers directly, and will not dereference them. This is hence not useful for strings + * or suchlike. */ +void trivial_hash_func(const void *p, struct siphash *state); +int trivial_compare_func(const void *a, const void *b) _const_; +extern const struct hash_ops trivial_hash_ops; +extern const struct hash_ops trivial_hash_ops_free; +extern const struct hash_ops trivial_hash_ops_free_free; + +/* 32bit values we can always just embed in the pointer itself, but in order to support 32bit archs we need store 64bit + * values indirectly, since they don't fit in a pointer. */ +void uint64_hash_func(const uint64_t *p, struct siphash *state); +int uint64_compare_func(const uint64_t *a, const uint64_t *b) _pure_; +extern const struct hash_ops uint64_hash_ops; + +/* On some archs dev_t is 32bit, and on others 64bit. And sometimes it's 64bit on 32bit archs, and sometimes 32bit on + * 64bit archs. Yuck! */ +#if SIZEOF_DEV_T != 8 +void devt_hash_func(const dev_t *p, struct siphash *state); +#else +#define devt_hash_func uint64_hash_func +#endif + +int devt_compare_func(const dev_t *a, const dev_t *b) _pure_; +extern const struct hash_ops devt_hash_ops; diff --git a/src/basic/hashmap.c b/src/basic/hashmap.c new file mode 100644 index 0000000..6a14ea9 --- /dev/null +++ b/src/basic/hashmap.c @@ -0,0 +1,2100 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include +#include +#include +#include +#include + +#include "alloc-util.h" +#include "fileio.h" +#include "hashmap.h" +#include "macro.h" +#include "memory-util.h" +#include "mempool.h" +#include "missing_syscall.h" +#include "process-util.h" +#include "random-util.h" +#include "set.h" +#include "siphash24.h" +#include "string-util.h" +#include "strv.h" + +#if ENABLE_DEBUG_HASHMAP +#include "list.h" +#endif + +/* + * Implementation of hashmaps. + * Addressing: open + * - uses less RAM compared to closed addressing (chaining), because + * our entries are small (especially in Sets, which tend to contain + * the majority of entries in systemd). + * Collision resolution: Robin Hood + * - tends to equalize displacement of entries from their optimal buckets. + * Probe sequence: linear + * - though theoretically worse than random probing/uniform hashing/double + * hashing, it is good for cache locality. + * + * References: + * Celis, P. 1986. Robin Hood Hashing. + * Ph.D. Dissertation. University of Waterloo, Waterloo, Ont., Canada, Canada. + * https://cs.uwaterloo.ca/research/tr/1986/CS-86-14.pdf + * - The results are derived for random probing. Suggests deletion with + * tombstones and two mean-centered search methods. None of that works + * well for linear probing. + * + * Janson, S. 2005. Individual displacements for linear probing hashing with different insertion policies. + * ACM Trans. Algorithms 1, 2 (October 2005), 177-213. + * DOI=10.1145/1103963.1103964 http://doi.acm.org/10.1145/1103963.1103964 + * http://www.math.uu.se/~svante/papers/sj157.pdf + * - Applies to Robin Hood with linear probing. Contains remarks on + * the unsuitability of mean-centered search with linear probing. + * + * Viola, A. 2005. Exact distribution of individual displacements in linear probing hashing. + * ACM Trans. Algorithms 1, 2 (October 2005), 214-242. + * DOI=10.1145/1103963.1103965 http://doi.acm.org/10.1145/1103963.1103965 + * - Similar to Janson. Note that Viola writes about C_{m,n} (number of probes + * in a successful search), and Janson writes about displacement. C = d + 1. + * + * Goossaert, E. 2013. Robin Hood hashing: backward shift deletion. + * http://codecapsule.com/2013/11/17/robin-hood-hashing-backward-shift-deletion/ + * - Explanation of backward shift deletion with pictures. + * + * Khuong, P. 2013. The Other Robin Hood Hashing. + * http://www.pvk.ca/Blog/2013/11/26/the-other-robin-hood-hashing/ + * - Short summary of random vs. linear probing, and tombstones vs. backward shift. + */ + +/* + * XXX Ideas for improvement: + * For unordered hashmaps, randomize iteration order, similarly to Perl: + * http://blog.booking.com/hardening-perls-hash-function.html + */ + +/* INV_KEEP_FREE = 1 / (1 - max_load_factor) + * e.g. 1 / (1 - 0.8) = 5 ... keep one fifth of the buckets free. */ +#define INV_KEEP_FREE 5U + +/* Fields common to entries of all hashmap/set types */ +struct hashmap_base_entry { + const void *key; +}; + +/* Entry types for specific hashmap/set types + * hashmap_base_entry must be at the beginning of each entry struct. */ + +struct plain_hashmap_entry { + struct hashmap_base_entry b; + void *value; +}; + +struct ordered_hashmap_entry { + struct plain_hashmap_entry p; + unsigned iterate_next, iterate_previous; +}; + +struct set_entry { + struct hashmap_base_entry b; +}; + +/* In several functions it is advantageous to have the hash table extended + * virtually by a couple of additional buckets. We reserve special index values + * for these "swap" buckets. */ +#define _IDX_SWAP_BEGIN (UINT_MAX - 3) +#define IDX_PUT (_IDX_SWAP_BEGIN + 0) +#define IDX_TMP (_IDX_SWAP_BEGIN + 1) +#define _IDX_SWAP_END (_IDX_SWAP_BEGIN + 2) + +#define IDX_FIRST (UINT_MAX - 1) /* special index for freshly initialized iterators */ +#define IDX_NIL UINT_MAX /* special index value meaning "none" or "end" */ + +assert_cc(IDX_FIRST == _IDX_SWAP_END); +assert_cc(IDX_FIRST == _IDX_ITERATOR_FIRST); + +/* Storage space for the "swap" buckets. + * All entry types can fit into an ordered_hashmap_entry. */ +struct swap_entries { + struct ordered_hashmap_entry e[_IDX_SWAP_END - _IDX_SWAP_BEGIN]; +}; + +/* Distance from Initial Bucket */ +typedef uint8_t dib_raw_t; +#define DIB_RAW_OVERFLOW ((dib_raw_t)0xfdU) /* indicates DIB value is greater than representable */ +#define DIB_RAW_REHASH ((dib_raw_t)0xfeU) /* entry yet to be rehashed during in-place resize */ +#define DIB_RAW_FREE ((dib_raw_t)0xffU) /* a free bucket */ +#define DIB_RAW_INIT ((char)DIB_RAW_FREE) /* a byte to memset a DIB store with when initializing */ + +#define DIB_FREE UINT_MAX + +#if ENABLE_DEBUG_HASHMAP +struct hashmap_debug_info { + LIST_FIELDS(struct hashmap_debug_info, debug_list); + unsigned max_entries; /* high watermark of n_entries */ + + /* who allocated this hashmap */ + int line; + const char *file; + const char *func; + + /* fields to detect modification while iterating */ + unsigned put_count; /* counts puts into the hashmap */ + unsigned rem_count; /* counts removals from hashmap */ + unsigned last_rem_idx; /* remembers last removal index */ +}; + +/* Tracks all existing hashmaps. Get at it from gdb. See sd_dump_hashmaps.py */ +static LIST_HEAD(struct hashmap_debug_info, hashmap_debug_list); +static pthread_mutex_t hashmap_debug_list_mutex = PTHREAD_MUTEX_INITIALIZER; +#endif + +enum HashmapType { + HASHMAP_TYPE_PLAIN, + HASHMAP_TYPE_ORDERED, + HASHMAP_TYPE_SET, + _HASHMAP_TYPE_MAX +}; + +struct _packed_ indirect_storage { + void *storage; /* where buckets and DIBs are stored */ + uint8_t hash_key[HASH_KEY_SIZE]; /* hash key; changes during resize */ + + unsigned n_entries; /* number of stored entries */ + unsigned n_buckets; /* number of buckets */ + + unsigned idx_lowest_entry; /* Index below which all buckets are free. + Makes "while (hashmap_steal_first())" loops + O(n) instead of O(n^2) for unordered hashmaps. */ + uint8_t _pad[3]; /* padding for the whole HashmapBase */ + /* The bitfields in HashmapBase complete the alignment of the whole thing. */ +}; + +struct direct_storage { + /* This gives us 39 bytes on 64bit, or 35 bytes on 32bit. + * That's room for 4 set_entries + 4 DIB bytes + 3 unused bytes on 64bit, + * or 7 set_entries + 7 DIB bytes + 0 unused bytes on 32bit. */ + uint8_t storage[sizeof(struct indirect_storage)]; +}; + +#define DIRECT_BUCKETS(entry_t) \ + (sizeof(struct direct_storage) / (sizeof(entry_t) + sizeof(dib_raw_t))) + +/* We should be able to store at least one entry directly. */ +assert_cc(DIRECT_BUCKETS(struct ordered_hashmap_entry) >= 1); + +/* We have 3 bits for n_direct_entries. */ +assert_cc(DIRECT_BUCKETS(struct set_entry) < (1 << 3)); + +/* Hashmaps with directly stored entries all use this shared hash key. + * It's no big deal if the key is guessed, because there can be only + * a handful of directly stored entries in a hashmap. When a hashmap + * outgrows direct storage, it gets its own key for indirect storage. */ +static uint8_t shared_hash_key[HASH_KEY_SIZE]; + +/* Fields that all hashmap/set types must have */ +struct HashmapBase { + const struct hash_ops *hash_ops; /* hash and compare ops to use */ + + union _packed_ { + struct indirect_storage indirect; /* if has_indirect */ + struct direct_storage direct; /* if !has_indirect */ + }; + + enum HashmapType type:2; /* HASHMAP_TYPE_* */ + bool has_indirect:1; /* whether indirect storage is used */ + unsigned n_direct_entries:3; /* Number of entries in direct storage. + * Only valid if !has_indirect. */ + bool from_pool:1; /* whether was allocated from mempool */ + bool dirty:1; /* whether dirtied since last iterated_cache_get() */ + bool cached:1; /* whether this hashmap is being cached */ + +#if ENABLE_DEBUG_HASHMAP + struct hashmap_debug_info debug; +#endif +}; + +/* Specific hash types + * HashmapBase must be at the beginning of each hashmap struct. */ + +struct Hashmap { + struct HashmapBase b; +}; + +struct OrderedHashmap { + struct HashmapBase b; + unsigned iterate_list_head, iterate_list_tail; +}; + +struct Set { + struct HashmapBase b; +}; + +typedef struct CacheMem { + const void **ptr; + size_t n_populated; + bool active:1; +} CacheMem; + +struct IteratedCache { + HashmapBase *hashmap; + CacheMem keys, values; +}; + +DEFINE_MEMPOOL(hashmap_pool, Hashmap, 8); +DEFINE_MEMPOOL(ordered_hashmap_pool, OrderedHashmap, 8); +/* No need for a separate Set pool */ +assert_cc(sizeof(Hashmap) == sizeof(Set)); + +struct hashmap_type_info { + size_t head_size; + size_t entry_size; + struct mempool *mempool; + unsigned n_direct_buckets; +}; + +static _used_ const struct hashmap_type_info hashmap_type_info[_HASHMAP_TYPE_MAX] = { + [HASHMAP_TYPE_PLAIN] = { + .head_size = sizeof(Hashmap), + .entry_size = sizeof(struct plain_hashmap_entry), + .mempool = &hashmap_pool, + .n_direct_buckets = DIRECT_BUCKETS(struct plain_hashmap_entry), + }, + [HASHMAP_TYPE_ORDERED] = { + .head_size = sizeof(OrderedHashmap), + .entry_size = sizeof(struct ordered_hashmap_entry), + .mempool = &ordered_hashmap_pool, + .n_direct_buckets = DIRECT_BUCKETS(struct ordered_hashmap_entry), + }, + [HASHMAP_TYPE_SET] = { + .head_size = sizeof(Set), + .entry_size = sizeof(struct set_entry), + .mempool = &hashmap_pool, + .n_direct_buckets = DIRECT_BUCKETS(struct set_entry), + }, +}; + +#if VALGRIND +_destructor_ static void cleanup_pools(void) { + _cleanup_free_ char *t = NULL; + int r; + + /* Be nice to valgrind */ + + /* The pool is only allocated by the main thread, but the memory can + * be passed to other threads. Let's clean up if we are the main thread + * and no other threads are live. */ + /* We build our own is_main_thread() here, which doesn't use C11 + * TLS based caching of the result. That's because valgrind apparently + * doesn't like malloc() (which C11 TLS internally uses) to be called + * from a GCC destructors. */ + if (getpid() != gettid()) + return; + + r = get_proc_field("/proc/self/status", "Threads", WHITESPACE, &t); + if (r < 0 || !streq(t, "1")) + return; + + mempool_drop(&hashmap_pool); + mempool_drop(&ordered_hashmap_pool); +} +#endif + +static unsigned n_buckets(HashmapBase *h) { + return h->has_indirect ? h->indirect.n_buckets + : hashmap_type_info[h->type].n_direct_buckets; +} + +static unsigned n_entries(HashmapBase *h) { + return h->has_indirect ? h->indirect.n_entries + : h->n_direct_entries; +} + +static void n_entries_inc(HashmapBase *h) { + if (h->has_indirect) + h->indirect.n_entries++; + else + h->n_direct_entries++; +} + +static void n_entries_dec(HashmapBase *h) { + if (h->has_indirect) + h->indirect.n_entries--; + else + h->n_direct_entries--; +} + +static void* storage_ptr(HashmapBase *h) { + return h->has_indirect ? h->indirect.storage + : h->direct.storage; +} + +static uint8_t* hash_key(HashmapBase *h) { + return h->has_indirect ? h->indirect.hash_key + : shared_hash_key; +} + +static unsigned base_bucket_hash(HashmapBase *h, const void *p) { + struct siphash state; + uint64_t hash; + + siphash24_init(&state, hash_key(h)); + + h->hash_ops->hash(p, &state); + + hash = siphash24_finalize(&state); + + return (unsigned) (hash % n_buckets(h)); +} +#define bucket_hash(h, p) base_bucket_hash(HASHMAP_BASE(h), p) + +static void base_set_dirty(HashmapBase *h) { + h->dirty = true; +} +#define hashmap_set_dirty(h) base_set_dirty(HASHMAP_BASE(h)) + +static void get_hash_key(uint8_t hash_key[HASH_KEY_SIZE], bool reuse_is_ok) { + static uint8_t current[HASH_KEY_SIZE]; + static bool current_initialized = false; + + /* Returns a hash function key to use. In order to keep things + * fast we will not generate a new key each time we allocate a + * new hash table. Instead, we'll just reuse the most recently + * generated one, except if we never generated one or when we + * are rehashing an entire hash table because we reached a + * fill level */ + + if (!current_initialized || !reuse_is_ok) { + random_bytes(current, sizeof(current)); + current_initialized = true; + } + + memcpy(hash_key, current, sizeof(current)); +} + +static struct hashmap_base_entry* bucket_at(HashmapBase *h, unsigned idx) { + return (struct hashmap_base_entry*) + ((uint8_t*) storage_ptr(h) + idx * hashmap_type_info[h->type].entry_size); +} + +static struct plain_hashmap_entry* plain_bucket_at(Hashmap *h, unsigned idx) { + return (struct plain_hashmap_entry*) bucket_at(HASHMAP_BASE(h), idx); +} + +static struct ordered_hashmap_entry* ordered_bucket_at(OrderedHashmap *h, unsigned idx) { + return (struct ordered_hashmap_entry*) bucket_at(HASHMAP_BASE(h), idx); +} + +static struct set_entry *set_bucket_at(Set *h, unsigned idx) { + return (struct set_entry*) bucket_at(HASHMAP_BASE(h), idx); +} + +static struct ordered_hashmap_entry* bucket_at_swap(struct swap_entries *swap, unsigned idx) { + return &swap->e[idx - _IDX_SWAP_BEGIN]; +} + +/* Returns a pointer to the bucket at index idx. + * Understands real indexes and swap indexes, hence "_virtual". */ +static struct hashmap_base_entry* bucket_at_virtual(HashmapBase *h, struct swap_entries *swap, + unsigned idx) { + if (idx < _IDX_SWAP_BEGIN) + return bucket_at(h, idx); + + if (idx < _IDX_SWAP_END) + return &bucket_at_swap(swap, idx)->p.b; + + assert_not_reached(); +} + +static dib_raw_t* dib_raw_ptr(HashmapBase *h) { + return (dib_raw_t*) + ((uint8_t*) storage_ptr(h) + hashmap_type_info[h->type].entry_size * n_buckets(h)); +} + +static unsigned bucket_distance(HashmapBase *h, unsigned idx, unsigned from) { + return idx >= from ? idx - from + : n_buckets(h) + idx - from; +} + +static unsigned bucket_calculate_dib(HashmapBase *h, unsigned idx, dib_raw_t raw_dib) { + unsigned initial_bucket; + + if (raw_dib == DIB_RAW_FREE) + return DIB_FREE; + + if (_likely_(raw_dib < DIB_RAW_OVERFLOW)) + return raw_dib; + + /* + * Having an overflow DIB value is very unlikely. The hash function + * would have to be bad. For example, in a table of size 2^24 filled + * to load factor 0.9 the maximum observed DIB is only about 60. + * In theory (assuming I used Maxima correctly), for an infinite size + * hash table with load factor 0.8 the probability of a given entry + * having DIB > 40 is 1.9e-8. + * This returns the correct DIB value by recomputing the hash value in + * the unlikely case. XXX Hitting this case could be a hint to rehash. + */ + initial_bucket = bucket_hash(h, bucket_at(h, idx)->key); + return bucket_distance(h, idx, initial_bucket); +} + +static void bucket_set_dib(HashmapBase *h, unsigned idx, unsigned dib) { + dib_raw_ptr(h)[idx] = dib != DIB_FREE ? MIN(dib, DIB_RAW_OVERFLOW) : DIB_RAW_FREE; +} + +static unsigned skip_free_buckets(HashmapBase *h, unsigned idx) { + dib_raw_t *dibs; + + dibs = dib_raw_ptr(h); + + for ( ; idx < n_buckets(h); idx++) + if (dibs[idx] != DIB_RAW_FREE) + return idx; + + return IDX_NIL; +} + +static void bucket_mark_free(HashmapBase *h, unsigned idx) { + memzero(bucket_at(h, idx), hashmap_type_info[h->type].entry_size); + bucket_set_dib(h, idx, DIB_FREE); +} + +static void bucket_move_entry(HashmapBase *h, struct swap_entries *swap, + unsigned from, unsigned to) { + struct hashmap_base_entry *e_from, *e_to; + + assert(from != to); + + e_from = bucket_at_virtual(h, swap, from); + e_to = bucket_at_virtual(h, swap, to); + + memcpy(e_to, e_from, hashmap_type_info[h->type].entry_size); + + if (h->type == HASHMAP_TYPE_ORDERED) { + OrderedHashmap *lh = (OrderedHashmap*) h; + struct ordered_hashmap_entry *le, *le_to; + + le_to = (struct ordered_hashmap_entry*) e_to; + + if (le_to->iterate_next != IDX_NIL) { + le = (struct ordered_hashmap_entry*) + bucket_at_virtual(h, swap, le_to->iterate_next); + le->iterate_previous = to; + } + + if (le_to->iterate_previous != IDX_NIL) { + le = (struct ordered_hashmap_entry*) + bucket_at_virtual(h, swap, le_to->iterate_previous); + le->iterate_next = to; + } + + if (lh->iterate_list_head == from) + lh->iterate_list_head = to; + if (lh->iterate_list_tail == from) + lh->iterate_list_tail = to; + } +} + +static unsigned next_idx(HashmapBase *h, unsigned idx) { + return (idx + 1U) % n_buckets(h); +} + +static unsigned prev_idx(HashmapBase *h, unsigned idx) { + return (n_buckets(h) + idx - 1U) % n_buckets(h); +} + +static void* entry_value(HashmapBase *h, struct hashmap_base_entry *e) { + switch (h->type) { + + case HASHMAP_TYPE_PLAIN: + case HASHMAP_TYPE_ORDERED: + return ((struct plain_hashmap_entry*)e)->value; + + case HASHMAP_TYPE_SET: + return (void*) e->key; + + default: + assert_not_reached(); + } +} + +static void base_remove_entry(HashmapBase *h, unsigned idx) { + unsigned left, right, prev, dib; + dib_raw_t raw_dib, *dibs; + + dibs = dib_raw_ptr(h); + assert(dibs[idx] != DIB_RAW_FREE); + +#if ENABLE_DEBUG_HASHMAP + h->debug.rem_count++; + h->debug.last_rem_idx = idx; +#endif + + left = idx; + /* Find the stop bucket ("right"). It is either free or has DIB == 0. */ + for (right = next_idx(h, left); ; right = next_idx(h, right)) { + raw_dib = dibs[right]; + if (IN_SET(raw_dib, 0, DIB_RAW_FREE)) + break; + + /* The buckets are not supposed to be all occupied and with DIB > 0. + * That would mean we could make everyone better off by shifting them + * backward. This scenario is impossible. */ + assert(left != right); + } + + if (h->type == HASHMAP_TYPE_ORDERED) { + OrderedHashmap *lh = (OrderedHashmap*) h; + struct ordered_hashmap_entry *le = ordered_bucket_at(lh, idx); + + if (le->iterate_next != IDX_NIL) + ordered_bucket_at(lh, le->iterate_next)->iterate_previous = le->iterate_previous; + else + lh->iterate_list_tail = le->iterate_previous; + + if (le->iterate_previous != IDX_NIL) + ordered_bucket_at(lh, le->iterate_previous)->iterate_next = le->iterate_next; + else + lh->iterate_list_head = le->iterate_next; + } + + /* Now shift all buckets in the interval (left, right) one step backwards */ + for (prev = left, left = next_idx(h, left); left != right; + prev = left, left = next_idx(h, left)) { + dib = bucket_calculate_dib(h, left, dibs[left]); + assert(dib != 0); + bucket_move_entry(h, NULL, left, prev); + bucket_set_dib(h, prev, dib - 1); + } + + bucket_mark_free(h, prev); + n_entries_dec(h); + base_set_dirty(h); +} +#define remove_entry(h, idx) base_remove_entry(HASHMAP_BASE(h), idx) + +static unsigned hashmap_iterate_in_insertion_order(OrderedHashmap *h, Iterator *i) { + struct ordered_hashmap_entry *e; + unsigned idx; + + assert(h); + assert(i); + + if (i->idx == IDX_NIL) + goto at_end; + + if (i->idx == IDX_FIRST && h->iterate_list_head == IDX_NIL) + goto at_end; + + if (i->idx == IDX_FIRST) { + idx = h->iterate_list_head; + e = ordered_bucket_at(h, idx); + } else { + idx = i->idx; + e = ordered_bucket_at(h, idx); + /* + * We allow removing the current entry while iterating, but removal may cause + * a backward shift. The next entry may thus move one bucket to the left. + * To detect when it happens, we remember the key pointer of the entry we were + * going to iterate next. If it does not match, there was a backward shift. + */ + if (e->p.b.key != i->next_key) { + idx = prev_idx(HASHMAP_BASE(h), idx); + e = ordered_bucket_at(h, idx); + } + assert(e->p.b.key == i->next_key); + } + +#if ENABLE_DEBUG_HASHMAP + i->prev_idx = idx; +#endif + + if (e->iterate_next != IDX_NIL) { + struct ordered_hashmap_entry *n; + i->idx = e->iterate_next; + n = ordered_bucket_at(h, i->idx); + i->next_key = n->p.b.key; + } else + i->idx = IDX_NIL; + + return idx; + +at_end: + i->idx = IDX_NIL; + return IDX_NIL; +} + +static unsigned hashmap_iterate_in_internal_order(HashmapBase *h, Iterator *i) { + unsigned idx; + + assert(h); + assert(i); + + if (i->idx == IDX_NIL) + goto at_end; + + if (i->idx == IDX_FIRST) { + /* fast forward to the first occupied bucket */ + if (h->has_indirect) { + i->idx = skip_free_buckets(h, h->indirect.idx_lowest_entry); + h->indirect.idx_lowest_entry = i->idx; + } else + i->idx = skip_free_buckets(h, 0); + + if (i->idx == IDX_NIL) + goto at_end; + } else { + struct hashmap_base_entry *e; + + assert(i->idx > 0); + + e = bucket_at(h, i->idx); + /* + * We allow removing the current entry while iterating, but removal may cause + * a backward shift. The next entry may thus move one bucket to the left. + * To detect when it happens, we remember the key pointer of the entry we were + * going to iterate next. If it does not match, there was a backward shift. + */ + if (e->key != i->next_key) + e = bucket_at(h, --i->idx); + + assert(e->key == i->next_key); + } + + idx = i->idx; +#if ENABLE_DEBUG_HASHMAP + i->prev_idx = idx; +#endif + + i->idx = skip_free_buckets(h, i->idx + 1); + if (i->idx != IDX_NIL) + i->next_key = bucket_at(h, i->idx)->key; + else + i->idx = IDX_NIL; + + return idx; + +at_end: + i->idx = IDX_NIL; + return IDX_NIL; +} + +static unsigned hashmap_iterate_entry(HashmapBase *h, Iterator *i) { + if (!h) { + i->idx = IDX_NIL; + return IDX_NIL; + } + +#if ENABLE_DEBUG_HASHMAP + if (i->idx == IDX_FIRST) { + i->put_count = h->debug.put_count; + i->rem_count = h->debug.rem_count; + } else { + /* While iterating, must not add any new entries */ + assert(i->put_count == h->debug.put_count); + /* ... or remove entries other than the current one */ + assert(i->rem_count == h->debug.rem_count || + (i->rem_count == h->debug.rem_count - 1 && + i->prev_idx == h->debug.last_rem_idx)); + /* Reset our removals counter */ + i->rem_count = h->debug.rem_count; + } +#endif + + return h->type == HASHMAP_TYPE_ORDERED ? hashmap_iterate_in_insertion_order((OrderedHashmap*) h, i) + : hashmap_iterate_in_internal_order(h, i); +} + +bool _hashmap_iterate(HashmapBase *h, Iterator *i, void **value, const void **key) { + struct hashmap_base_entry *e; + void *data; + unsigned idx; + + idx = hashmap_iterate_entry(h, i); + if (idx == IDX_NIL) { + if (value) + *value = NULL; + if (key) + *key = NULL; + + return false; + } + + e = bucket_at(h, idx); + data = entry_value(h, e); + if (value) + *value = data; + if (key) + *key = e->key; + + return true; +} + +#define HASHMAP_FOREACH_IDX(idx, h, i) \ + for ((i) = ITERATOR_FIRST, (idx) = hashmap_iterate_entry((h), &(i)); \ + (idx != IDX_NIL); \ + (idx) = hashmap_iterate_entry((h), &(i))) + +IteratedCache* _hashmap_iterated_cache_new(HashmapBase *h) { + IteratedCache *cache; + + assert(h); + assert(!h->cached); + + if (h->cached) + return NULL; + + cache = new0(IteratedCache, 1); + if (!cache) + return NULL; + + cache->hashmap = h; + h->cached = true; + + return cache; +} + +static void reset_direct_storage(HashmapBase *h) { + const struct hashmap_type_info *hi = &hashmap_type_info[h->type]; + void *p; + + assert(!h->has_indirect); + + p = mempset(h->direct.storage, 0, hi->entry_size * hi->n_direct_buckets); + memset(p, DIB_RAW_INIT, sizeof(dib_raw_t) * hi->n_direct_buckets); +} + +static void shared_hash_key_initialize(void) { + random_bytes(shared_hash_key, sizeof(shared_hash_key)); +} + +static struct HashmapBase* hashmap_base_new(const struct hash_ops *hash_ops, enum HashmapType type HASHMAP_DEBUG_PARAMS) { + HashmapBase *h; + const struct hashmap_type_info *hi = &hashmap_type_info[type]; + + bool use_pool = mempool_enabled && mempool_enabled(); + + h = use_pool ? mempool_alloc0_tile(hi->mempool) : malloc0(hi->head_size); + if (!h) + return NULL; + + h->type = type; + h->from_pool = use_pool; + h->hash_ops = hash_ops ?: &trivial_hash_ops; + + if (type == HASHMAP_TYPE_ORDERED) { + OrderedHashmap *lh = (OrderedHashmap*)h; + lh->iterate_list_head = lh->iterate_list_tail = IDX_NIL; + } + + reset_direct_storage(h); + + static pthread_once_t once = PTHREAD_ONCE_INIT; + assert_se(pthread_once(&once, shared_hash_key_initialize) == 0); + +#if ENABLE_DEBUG_HASHMAP + h->debug.func = func; + h->debug.file = file; + h->debug.line = line; + assert_se(pthread_mutex_lock(&hashmap_debug_list_mutex) == 0); + LIST_PREPEND(debug_list, hashmap_debug_list, &h->debug); + assert_se(pthread_mutex_unlock(&hashmap_debug_list_mutex) == 0); +#endif + + return h; +} + +Hashmap *_hashmap_new(const struct hash_ops *hash_ops HASHMAP_DEBUG_PARAMS) { + return (Hashmap*) hashmap_base_new(hash_ops, HASHMAP_TYPE_PLAIN HASHMAP_DEBUG_PASS_ARGS); +} + +OrderedHashmap *_ordered_hashmap_new(const struct hash_ops *hash_ops HASHMAP_DEBUG_PARAMS) { + return (OrderedHashmap*) hashmap_base_new(hash_ops, HASHMAP_TYPE_ORDERED HASHMAP_DEBUG_PASS_ARGS); +} + +Set *_set_new(const struct hash_ops *hash_ops HASHMAP_DEBUG_PARAMS) { + return (Set*) hashmap_base_new(hash_ops, HASHMAP_TYPE_SET HASHMAP_DEBUG_PASS_ARGS); +} + +static int hashmap_base_ensure_allocated(HashmapBase **h, const struct hash_ops *hash_ops, + enum HashmapType type HASHMAP_DEBUG_PARAMS) { + HashmapBase *q; + + assert(h); + + if (*h) + return 0; + + q = hashmap_base_new(hash_ops, type HASHMAP_DEBUG_PASS_ARGS); + if (!q) + return -ENOMEM; + + *h = q; + return 1; +} + +int _hashmap_ensure_allocated(Hashmap **h, const struct hash_ops *hash_ops HASHMAP_DEBUG_PARAMS) { + return hashmap_base_ensure_allocated((HashmapBase**)h, hash_ops, HASHMAP_TYPE_PLAIN HASHMAP_DEBUG_PASS_ARGS); +} + +int _ordered_hashmap_ensure_allocated(OrderedHashmap **h, const struct hash_ops *hash_ops HASHMAP_DEBUG_PARAMS) { + return hashmap_base_ensure_allocated((HashmapBase**)h, hash_ops, HASHMAP_TYPE_ORDERED HASHMAP_DEBUG_PASS_ARGS); +} + +int _set_ensure_allocated(Set **s, const struct hash_ops *hash_ops HASHMAP_DEBUG_PARAMS) { + return hashmap_base_ensure_allocated((HashmapBase**)s, hash_ops, HASHMAP_TYPE_SET HASHMAP_DEBUG_PASS_ARGS); +} + +int _hashmap_ensure_put(Hashmap **h, const struct hash_ops *hash_ops, const void *key, void *value HASHMAP_DEBUG_PARAMS) { + int r; + + r = _hashmap_ensure_allocated(h, hash_ops HASHMAP_DEBUG_PASS_ARGS); + if (r < 0) + return r; + + return hashmap_put(*h, key, value); +} + +int _ordered_hashmap_ensure_put(OrderedHashmap **h, const struct hash_ops *hash_ops, const void *key, void *value HASHMAP_DEBUG_PARAMS) { + int r; + + r = _ordered_hashmap_ensure_allocated(h, hash_ops HASHMAP_DEBUG_PASS_ARGS); + if (r < 0) + return r; + + return ordered_hashmap_put(*h, key, value); +} + +static void hashmap_free_no_clear(HashmapBase *h) { + assert(!h->has_indirect); + assert(h->n_direct_entries == 0); + +#if ENABLE_DEBUG_HASHMAP + assert_se(pthread_mutex_lock(&hashmap_debug_list_mutex) == 0); + LIST_REMOVE(debug_list, hashmap_debug_list, &h->debug); + assert_se(pthread_mutex_unlock(&hashmap_debug_list_mutex) == 0); +#endif + + if (h->from_pool) { + /* Ensure that the object didn't get migrated between threads. */ + assert_se(is_main_thread()); + mempool_free_tile(hashmap_type_info[h->type].mempool, h); + } else + free(h); +} + +HashmapBase* _hashmap_free(HashmapBase *h, free_func_t default_free_key, free_func_t default_free_value) { + if (h) { + _hashmap_clear(h, default_free_key, default_free_value); + hashmap_free_no_clear(h); + } + + return NULL; +} + +void _hashmap_clear(HashmapBase *h, free_func_t default_free_key, free_func_t default_free_value) { + free_func_t free_key, free_value; + if (!h) + return; + + free_key = h->hash_ops->free_key ?: default_free_key; + free_value = h->hash_ops->free_value ?: default_free_value; + + if (free_key || free_value) { + + /* If destructor calls are defined, let's destroy things defensively: let's take the item out of the + * hash table, and only then call the destructor functions. If these destructors then try to unregister + * themselves from our hash table a second time, the entry is already gone. */ + + while (_hashmap_size(h) > 0) { + void *k = NULL; + void *v; + + v = _hashmap_first_key_and_value(h, true, &k); + + if (free_key) + free_key(k); + + if (free_value) + free_value(v); + } + } + + if (h->has_indirect) { + free(h->indirect.storage); + h->has_indirect = false; + } + + h->n_direct_entries = 0; + reset_direct_storage(h); + + if (h->type == HASHMAP_TYPE_ORDERED) { + OrderedHashmap *lh = (OrderedHashmap*) h; + lh->iterate_list_head = lh->iterate_list_tail = IDX_NIL; + } + + base_set_dirty(h); +} + +static int resize_buckets(HashmapBase *h, unsigned entries_add); + +/* + * Finds an empty bucket to put an entry into, starting the scan at 'idx'. + * Performs Robin Hood swaps as it goes. The entry to put must be placed + * by the caller into swap slot IDX_PUT. + * If used for in-place resizing, may leave a displaced entry in swap slot + * IDX_PUT. Caller must rehash it next. + * Returns: true if it left a displaced entry to rehash next in IDX_PUT, + * false otherwise. + */ +static bool hashmap_put_robin_hood(HashmapBase *h, unsigned idx, + struct swap_entries *swap) { + dib_raw_t raw_dib, *dibs; + unsigned dib, distance; + +#if ENABLE_DEBUG_HASHMAP + h->debug.put_count++; +#endif + + dibs = dib_raw_ptr(h); + + for (distance = 0; ; distance++) { + raw_dib = dibs[idx]; + if (IN_SET(raw_dib, DIB_RAW_FREE, DIB_RAW_REHASH)) { + if (raw_dib == DIB_RAW_REHASH) + bucket_move_entry(h, swap, idx, IDX_TMP); + + if (h->has_indirect && h->indirect.idx_lowest_entry > idx) + h->indirect.idx_lowest_entry = idx; + + bucket_set_dib(h, idx, distance); + bucket_move_entry(h, swap, IDX_PUT, idx); + if (raw_dib == DIB_RAW_REHASH) { + bucket_move_entry(h, swap, IDX_TMP, IDX_PUT); + return true; + } + + return false; + } + + dib = bucket_calculate_dib(h, idx, raw_dib); + + if (dib < distance) { + /* Found a wealthier entry. Go Robin Hood! */ + bucket_set_dib(h, idx, distance); + + /* swap the entries */ + bucket_move_entry(h, swap, idx, IDX_TMP); + bucket_move_entry(h, swap, IDX_PUT, idx); + bucket_move_entry(h, swap, IDX_TMP, IDX_PUT); + + distance = dib; + } + + idx = next_idx(h, idx); + } +} + +/* + * Puts an entry into a hashmap, boldly - no check whether key already exists. + * The caller must place the entry (only its key and value, not link indexes) + * in swap slot IDX_PUT. + * Caller must ensure: the key does not exist yet in the hashmap. + * that resize is not needed if !may_resize. + * Returns: 1 if entry was put successfully. + * -ENOMEM if may_resize==true and resize failed with -ENOMEM. + * Cannot return -ENOMEM if !may_resize. + */ +static int hashmap_base_put_boldly(HashmapBase *h, unsigned idx, + struct swap_entries *swap, bool may_resize) { + struct ordered_hashmap_entry *new_entry; + int r; + + assert(idx < n_buckets(h)); + + new_entry = bucket_at_swap(swap, IDX_PUT); + + if (may_resize) { + r = resize_buckets(h, 1); + if (r < 0) + return r; + if (r > 0) + idx = bucket_hash(h, new_entry->p.b.key); + } + assert(n_entries(h) < n_buckets(h)); + + if (h->type == HASHMAP_TYPE_ORDERED) { + OrderedHashmap *lh = (OrderedHashmap*) h; + + new_entry->iterate_next = IDX_NIL; + new_entry->iterate_previous = lh->iterate_list_tail; + + if (lh->iterate_list_tail != IDX_NIL) { + struct ordered_hashmap_entry *old_tail; + + old_tail = ordered_bucket_at(lh, lh->iterate_list_tail); + assert(old_tail->iterate_next == IDX_NIL); + old_tail->iterate_next = IDX_PUT; + } + + lh->iterate_list_tail = IDX_PUT; + if (lh->iterate_list_head == IDX_NIL) + lh->iterate_list_head = IDX_PUT; + } + + assert_se(hashmap_put_robin_hood(h, idx, swap) == false); + + n_entries_inc(h); +#if ENABLE_DEBUG_HASHMAP + h->debug.max_entries = MAX(h->debug.max_entries, n_entries(h)); +#endif + + base_set_dirty(h); + + return 1; +} +#define hashmap_put_boldly(h, idx, swap, may_resize) \ + hashmap_base_put_boldly(HASHMAP_BASE(h), idx, swap, may_resize) + +/* + * Returns 0 if resize is not needed. + * 1 if successfully resized. + * -ENOMEM on allocation failure. + */ +static int resize_buckets(HashmapBase *h, unsigned entries_add) { + struct swap_entries swap; + void *new_storage; + dib_raw_t *old_dibs, *new_dibs; + const struct hashmap_type_info *hi; + unsigned idx, optimal_idx; + unsigned old_n_buckets, new_n_buckets, n_rehashed, new_n_entries; + uint8_t new_shift; + bool rehash_next; + + assert(h); + + hi = &hashmap_type_info[h->type]; + new_n_entries = n_entries(h) + entries_add; + + /* overflow? */ + if (_unlikely_(new_n_entries < entries_add)) + return -ENOMEM; + + /* For direct storage we allow 100% load, because it's tiny. */ + if (!h->has_indirect && new_n_entries <= hi->n_direct_buckets) + return 0; + + /* + * Load factor = n/m = 1 - (1/INV_KEEP_FREE). + * From it follows: m = n + n/(INV_KEEP_FREE - 1) + */ + new_n_buckets = new_n_entries + new_n_entries / (INV_KEEP_FREE - 1); + /* overflow? */ + if (_unlikely_(new_n_buckets < new_n_entries)) + return -ENOMEM; + + if (_unlikely_(new_n_buckets > UINT_MAX / (hi->entry_size + sizeof(dib_raw_t)))) + return -ENOMEM; + + old_n_buckets = n_buckets(h); + + if (_likely_(new_n_buckets <= old_n_buckets)) + return 0; + + new_shift = log2u_round_up(MAX( + new_n_buckets * (hi->entry_size + sizeof(dib_raw_t)), + 2 * sizeof(struct direct_storage))); + + /* Realloc storage (buckets and DIB array). */ + new_storage = realloc(h->has_indirect ? h->indirect.storage : NULL, + 1U << new_shift); + if (!new_storage) + return -ENOMEM; + + /* Must upgrade direct to indirect storage. */ + if (!h->has_indirect) { + memcpy(new_storage, h->direct.storage, + old_n_buckets * (hi->entry_size + sizeof(dib_raw_t))); + h->indirect.n_entries = h->n_direct_entries; + h->indirect.idx_lowest_entry = 0; + h->n_direct_entries = 0; + } + + /* Get a new hash key. If we've just upgraded to indirect storage, + * allow reusing a previously generated key. It's still a different key + * from the shared one that we used for direct storage. */ + get_hash_key(h->indirect.hash_key, !h->has_indirect); + + h->has_indirect = true; + h->indirect.storage = new_storage; + h->indirect.n_buckets = (1U << new_shift) / + (hi->entry_size + sizeof(dib_raw_t)); + + old_dibs = (dib_raw_t*)((uint8_t*) new_storage + hi->entry_size * old_n_buckets); + new_dibs = dib_raw_ptr(h); + + /* + * Move the DIB array to the new place, replacing valid DIB values with + * DIB_RAW_REHASH to indicate all of the used buckets need rehashing. + * Note: Overlap is not possible, because we have at least doubled the + * number of buckets and dib_raw_t is smaller than any entry type. + */ + for (idx = 0; idx < old_n_buckets; idx++) { + assert(old_dibs[idx] != DIB_RAW_REHASH); + new_dibs[idx] = old_dibs[idx] == DIB_RAW_FREE ? DIB_RAW_FREE + : DIB_RAW_REHASH; + } + + /* Zero the area of newly added entries (including the old DIB area) */ + memzero(bucket_at(h, old_n_buckets), + (n_buckets(h) - old_n_buckets) * hi->entry_size); + + /* The upper half of the new DIB array needs initialization */ + memset(&new_dibs[old_n_buckets], DIB_RAW_INIT, + (n_buckets(h) - old_n_buckets) * sizeof(dib_raw_t)); + + /* Rehash entries that need it */ + n_rehashed = 0; + for (idx = 0; idx < old_n_buckets; idx++) { + if (new_dibs[idx] != DIB_RAW_REHASH) + continue; + + optimal_idx = bucket_hash(h, bucket_at(h, idx)->key); + + /* + * Not much to do if by luck the entry hashes to its current + * location. Just set its DIB. + */ + if (optimal_idx == idx) { + new_dibs[idx] = 0; + n_rehashed++; + continue; + } + + new_dibs[idx] = DIB_RAW_FREE; + bucket_move_entry(h, &swap, idx, IDX_PUT); + /* bucket_move_entry does not clear the source */ + memzero(bucket_at(h, idx), hi->entry_size); + + do { + /* + * Find the new bucket for the current entry. This may make + * another entry homeless and load it into IDX_PUT. + */ + rehash_next = hashmap_put_robin_hood(h, optimal_idx, &swap); + n_rehashed++; + + /* Did the current entry displace another one? */ + if (rehash_next) + optimal_idx = bucket_hash(h, bucket_at_swap(&swap, IDX_PUT)->p.b.key); + } while (rehash_next); + } + + assert_se(n_rehashed == n_entries(h)); + + return 1; +} + +/* + * Finds an entry with a matching key + * Returns: index of the found entry, or IDX_NIL if not found. + */ +static unsigned base_bucket_scan(HashmapBase *h, unsigned idx, const void *key) { + struct hashmap_base_entry *e; + unsigned dib, distance; + dib_raw_t *dibs = dib_raw_ptr(h); + + assert(idx < n_buckets(h)); + + for (distance = 0; ; distance++) { + if (dibs[idx] == DIB_RAW_FREE) + return IDX_NIL; + + dib = bucket_calculate_dib(h, idx, dibs[idx]); + + if (dib < distance) + return IDX_NIL; + if (dib == distance) { + e = bucket_at(h, idx); + if (h->hash_ops->compare(e->key, key) == 0) + return idx; + } + + idx = next_idx(h, idx); + } +} +#define bucket_scan(h, idx, key) base_bucket_scan(HASHMAP_BASE(h), idx, key) + +int hashmap_put(Hashmap *h, const void *key, void *value) { + struct swap_entries swap; + struct plain_hashmap_entry *e; + unsigned hash, idx; + + assert(h); + + hash = bucket_hash(h, key); + idx = bucket_scan(h, hash, key); + if (idx != IDX_NIL) { + e = plain_bucket_at(h, idx); + if (e->value == value) + return 0; + return -EEXIST; + } + + e = &bucket_at_swap(&swap, IDX_PUT)->p; + e->b.key = key; + e->value = value; + return hashmap_put_boldly(h, hash, &swap, true); +} + +int set_put(Set *s, const void *key) { + struct swap_entries swap; + struct hashmap_base_entry *e; + unsigned hash, idx; + + assert(s); + + hash = bucket_hash(s, key); + idx = bucket_scan(s, hash, key); + if (idx != IDX_NIL) + return 0; + + e = &bucket_at_swap(&swap, IDX_PUT)->p.b; + e->key = key; + return hashmap_put_boldly(s, hash, &swap, true); +} + +int _set_ensure_put(Set **s, const struct hash_ops *hash_ops, const void *key HASHMAP_DEBUG_PARAMS) { + int r; + + r = _set_ensure_allocated(s, hash_ops HASHMAP_DEBUG_PASS_ARGS); + if (r < 0) + return r; + + return set_put(*s, key); +} + +int _set_ensure_consume(Set **s, const struct hash_ops *hash_ops, void *key HASHMAP_DEBUG_PARAMS) { + int r; + + r = _set_ensure_put(s, hash_ops, key HASHMAP_DEBUG_PASS_ARGS); + if (r <= 0) { + if (hash_ops && hash_ops->free_key) + hash_ops->free_key(key); + else + free(key); + } + + return r; +} + +int hashmap_replace(Hashmap *h, const void *key, void *value) { + struct swap_entries swap; + struct plain_hashmap_entry *e; + unsigned hash, idx; + + assert(h); + + hash = bucket_hash(h, key); + idx = bucket_scan(h, hash, key); + if (idx != IDX_NIL) { + e = plain_bucket_at(h, idx); +#if ENABLE_DEBUG_HASHMAP + /* Although the key is equal, the key pointer may have changed, + * and this would break our assumption for iterating. So count + * this operation as incompatible with iteration. */ + if (e->b.key != key) { + h->b.debug.put_count++; + h->b.debug.rem_count++; + h->b.debug.last_rem_idx = idx; + } +#endif + e->b.key = key; + e->value = value; + hashmap_set_dirty(h); + + return 0; + } + + e = &bucket_at_swap(&swap, IDX_PUT)->p; + e->b.key = key; + e->value = value; + return hashmap_put_boldly(h, hash, &swap, true); +} + +int hashmap_update(Hashmap *h, const void *key, void *value) { + struct plain_hashmap_entry *e; + unsigned hash, idx; + + assert(h); + + hash = bucket_hash(h, key); + idx = bucket_scan(h, hash, key); + if (idx == IDX_NIL) + return -ENOENT; + + e = plain_bucket_at(h, idx); + e->value = value; + hashmap_set_dirty(h); + + return 0; +} + +void* _hashmap_get(HashmapBase *h, const void *key) { + struct hashmap_base_entry *e; + unsigned hash, idx; + + if (!h) + return NULL; + + hash = bucket_hash(h, key); + idx = bucket_scan(h, hash, key); + if (idx == IDX_NIL) + return NULL; + + e = bucket_at(h, idx); + return entry_value(h, e); +} + +void* hashmap_get2(Hashmap *h, const void *key, void **key2) { + struct plain_hashmap_entry *e; + unsigned hash, idx; + + if (!h) + return NULL; + + hash = bucket_hash(h, key); + idx = bucket_scan(h, hash, key); + if (idx == IDX_NIL) + return NULL; + + e = plain_bucket_at(h, idx); + if (key2) + *key2 = (void*) e->b.key; + + return e->value; +} + +bool _hashmap_contains(HashmapBase *h, const void *key) { + unsigned hash; + + if (!h) + return false; + + hash = bucket_hash(h, key); + return bucket_scan(h, hash, key) != IDX_NIL; +} + +void* _hashmap_remove(HashmapBase *h, const void *key) { + struct hashmap_base_entry *e; + unsigned hash, idx; + void *data; + + if (!h) + return NULL; + + hash = bucket_hash(h, key); + idx = bucket_scan(h, hash, key); + if (idx == IDX_NIL) + return NULL; + + e = bucket_at(h, idx); + data = entry_value(h, e); + remove_entry(h, idx); + + return data; +} + +void* hashmap_remove2(Hashmap *h, const void *key, void **rkey) { + struct plain_hashmap_entry *e; + unsigned hash, idx; + void *data; + + if (!h) { + if (rkey) + *rkey = NULL; + return NULL; + } + + hash = bucket_hash(h, key); + idx = bucket_scan(h, hash, key); + if (idx == IDX_NIL) { + if (rkey) + *rkey = NULL; + return NULL; + } + + e = plain_bucket_at(h, idx); + data = e->value; + if (rkey) + *rkey = (void*) e->b.key; + + remove_entry(h, idx); + + return data; +} + +int hashmap_remove_and_put(Hashmap *h, const void *old_key, const void *new_key, void *value) { + struct swap_entries swap; + struct plain_hashmap_entry *e; + unsigned old_hash, new_hash, idx; + + if (!h) + return -ENOENT; + + old_hash = bucket_hash(h, old_key); + idx = bucket_scan(h, old_hash, old_key); + if (idx == IDX_NIL) + return -ENOENT; + + new_hash = bucket_hash(h, new_key); + if (bucket_scan(h, new_hash, new_key) != IDX_NIL) + return -EEXIST; + + remove_entry(h, idx); + + e = &bucket_at_swap(&swap, IDX_PUT)->p; + e->b.key = new_key; + e->value = value; + assert_se(hashmap_put_boldly(h, new_hash, &swap, false) == 1); + + return 0; +} + +int set_remove_and_put(Set *s, const void *old_key, const void *new_key) { + struct swap_entries swap; + struct hashmap_base_entry *e; + unsigned old_hash, new_hash, idx; + + if (!s) + return -ENOENT; + + old_hash = bucket_hash(s, old_key); + idx = bucket_scan(s, old_hash, old_key); + if (idx == IDX_NIL) + return -ENOENT; + + new_hash = bucket_hash(s, new_key); + if (bucket_scan(s, new_hash, new_key) != IDX_NIL) + return -EEXIST; + + remove_entry(s, idx); + + e = &bucket_at_swap(&swap, IDX_PUT)->p.b; + e->key = new_key; + assert_se(hashmap_put_boldly(s, new_hash, &swap, false) == 1); + + return 0; +} + +int hashmap_remove_and_replace(Hashmap *h, const void *old_key, const void *new_key, void *value) { + struct swap_entries swap; + struct plain_hashmap_entry *e; + unsigned old_hash, new_hash, idx_old, idx_new; + + if (!h) + return -ENOENT; + + old_hash = bucket_hash(h, old_key); + idx_old = bucket_scan(h, old_hash, old_key); + if (idx_old == IDX_NIL) + return -ENOENT; + + old_key = bucket_at(HASHMAP_BASE(h), idx_old)->key; + + new_hash = bucket_hash(h, new_key); + idx_new = bucket_scan(h, new_hash, new_key); + if (idx_new != IDX_NIL) + if (idx_old != idx_new) { + remove_entry(h, idx_new); + /* Compensate for a possible backward shift. */ + if (old_key != bucket_at(HASHMAP_BASE(h), idx_old)->key) + idx_old = prev_idx(HASHMAP_BASE(h), idx_old); + assert(old_key == bucket_at(HASHMAP_BASE(h), idx_old)->key); + } + + remove_entry(h, idx_old); + + e = &bucket_at_swap(&swap, IDX_PUT)->p; + e->b.key = new_key; + e->value = value; + assert_se(hashmap_put_boldly(h, new_hash, &swap, false) == 1); + + return 0; +} + +void* _hashmap_remove_value(HashmapBase *h, const void *key, void *value) { + struct hashmap_base_entry *e; + unsigned hash, idx; + + if (!h) + return NULL; + + hash = bucket_hash(h, key); + idx = bucket_scan(h, hash, key); + if (idx == IDX_NIL) + return NULL; + + e = bucket_at(h, idx); + if (entry_value(h, e) != value) + return NULL; + + remove_entry(h, idx); + + return value; +} + +static unsigned find_first_entry(HashmapBase *h) { + Iterator i = ITERATOR_FIRST; + + if (!h || !n_entries(h)) + return IDX_NIL; + + return hashmap_iterate_entry(h, &i); +} + +void* _hashmap_first_key_and_value(HashmapBase *h, bool remove, void **ret_key) { + struct hashmap_base_entry *e; + void *key, *data; + unsigned idx; + + idx = find_first_entry(h); + if (idx == IDX_NIL) { + if (ret_key) + *ret_key = NULL; + return NULL; + } + + e = bucket_at(h, idx); + key = (void*) e->key; + data = entry_value(h, e); + + if (remove) + remove_entry(h, idx); + + if (ret_key) + *ret_key = key; + + return data; +} + +unsigned _hashmap_size(HashmapBase *h) { + if (!h) + return 0; + + return n_entries(h); +} + +unsigned _hashmap_buckets(HashmapBase *h) { + if (!h) + return 0; + + return n_buckets(h); +} + +int _hashmap_merge(Hashmap *h, Hashmap *other) { + Iterator i; + unsigned idx; + + assert(h); + + HASHMAP_FOREACH_IDX(idx, HASHMAP_BASE(other), i) { + struct plain_hashmap_entry *pe = plain_bucket_at(other, idx); + int r; + + r = hashmap_put(h, pe->b.key, pe->value); + if (r < 0 && r != -EEXIST) + return r; + } + + return 0; +} + +int set_merge(Set *s, Set *other) { + Iterator i; + unsigned idx; + + assert(s); + + HASHMAP_FOREACH_IDX(idx, HASHMAP_BASE(other), i) { + struct set_entry *se = set_bucket_at(other, idx); + int r; + + r = set_put(s, se->b.key); + if (r < 0) + return r; + } + + return 0; +} + +int _hashmap_reserve(HashmapBase *h, unsigned entries_add) { + int r; + + assert(h); + + r = resize_buckets(h, entries_add); + if (r < 0) + return r; + + return 0; +} + +/* + * The same as hashmap_merge(), but every new item from other is moved to h. + * Keys already in h are skipped and stay in other. + * Returns: 0 on success. + * -ENOMEM on alloc failure, in which case no move has been done. + */ +int _hashmap_move(HashmapBase *h, HashmapBase *other) { + struct swap_entries swap; + struct hashmap_base_entry *e, *n; + Iterator i; + unsigned idx; + int r; + + assert(h); + + if (!other) + return 0; + + assert(other->type == h->type); + + /* + * This reserves buckets for the worst case, where none of other's + * entries are yet present in h. This is preferable to risking + * an allocation failure in the middle of the moving and having to + * rollback or return a partial result. + */ + r = resize_buckets(h, n_entries(other)); + if (r < 0) + return r; + + HASHMAP_FOREACH_IDX(idx, other, i) { + unsigned h_hash; + + e = bucket_at(other, idx); + h_hash = bucket_hash(h, e->key); + if (bucket_scan(h, h_hash, e->key) != IDX_NIL) + continue; + + n = &bucket_at_swap(&swap, IDX_PUT)->p.b; + n->key = e->key; + if (h->type != HASHMAP_TYPE_SET) + ((struct plain_hashmap_entry*) n)->value = + ((struct plain_hashmap_entry*) e)->value; + assert_se(hashmap_put_boldly(h, h_hash, &swap, false) == 1); + + remove_entry(other, idx); + } + + return 0; +} + +int _hashmap_move_one(HashmapBase *h, HashmapBase *other, const void *key) { + struct swap_entries swap; + unsigned h_hash, other_hash, idx; + struct hashmap_base_entry *e, *n; + int r; + + assert(h); + + h_hash = bucket_hash(h, key); + if (bucket_scan(h, h_hash, key) != IDX_NIL) + return -EEXIST; + + if (!other) + return -ENOENT; + + assert(other->type == h->type); + + other_hash = bucket_hash(other, key); + idx = bucket_scan(other, other_hash, key); + if (idx == IDX_NIL) + return -ENOENT; + + e = bucket_at(other, idx); + + n = &bucket_at_swap(&swap, IDX_PUT)->p.b; + n->key = e->key; + if (h->type != HASHMAP_TYPE_SET) + ((struct plain_hashmap_entry*) n)->value = + ((struct plain_hashmap_entry*) e)->value; + r = hashmap_put_boldly(h, h_hash, &swap, true); + if (r < 0) + return r; + + remove_entry(other, idx); + return 0; +} + +HashmapBase* _hashmap_copy(HashmapBase *h HASHMAP_DEBUG_PARAMS) { + HashmapBase *copy; + int r; + + assert(h); + + copy = hashmap_base_new(h->hash_ops, h->type HASHMAP_DEBUG_PASS_ARGS); + if (!copy) + return NULL; + + switch (h->type) { + case HASHMAP_TYPE_PLAIN: + case HASHMAP_TYPE_ORDERED: + r = hashmap_merge((Hashmap*)copy, (Hashmap*)h); + break; + case HASHMAP_TYPE_SET: + r = set_merge((Set*)copy, (Set*)h); + break; + default: + assert_not_reached(); + } + + if (r < 0) + return _hashmap_free(copy, NULL, NULL); + + return copy; +} + +char** _hashmap_get_strv(HashmapBase *h) { + char **sv; + Iterator i; + unsigned idx, n; + + if (!h) + return new0(char*, 1); + + sv = new(char*, n_entries(h)+1); + if (!sv) + return NULL; + + n = 0; + HASHMAP_FOREACH_IDX(idx, h, i) + sv[n++] = entry_value(h, bucket_at(h, idx)); + sv[n] = NULL; + + return sv; +} + +void* ordered_hashmap_next(OrderedHashmap *h, const void *key) { + struct ordered_hashmap_entry *e; + unsigned hash, idx; + + if (!h) + return NULL; + + hash = bucket_hash(h, key); + idx = bucket_scan(h, hash, key); + if (idx == IDX_NIL) + return NULL; + + e = ordered_bucket_at(h, idx); + if (e->iterate_next == IDX_NIL) + return NULL; + return ordered_bucket_at(h, e->iterate_next)->p.value; +} + +int set_consume(Set *s, void *value) { + int r; + + assert(s); + assert(value); + + r = set_put(s, value); + if (r <= 0) + free(value); + + return r; +} + +int _hashmap_put_strdup_full(Hashmap **h, const struct hash_ops *hash_ops, const char *k, const char *v HASHMAP_DEBUG_PARAMS) { + int r; + + r = _hashmap_ensure_allocated(h, hash_ops HASHMAP_DEBUG_PASS_ARGS); + if (r < 0) + return r; + + _cleanup_free_ char *kdup = NULL, *vdup = NULL; + + kdup = strdup(k); + if (!kdup) + return -ENOMEM; + + if (v) { + vdup = strdup(v); + if (!vdup) + return -ENOMEM; + } + + r = hashmap_put(*h, kdup, vdup); + if (r < 0) { + if (r == -EEXIST && streq_ptr(v, hashmap_get(*h, kdup))) + return 0; + return r; + } + + /* 0 with non-null vdup would mean vdup is already in the hashmap, which cannot be */ + assert(vdup == NULL || r > 0); + if (r > 0) + kdup = vdup = NULL; + + return r; +} + +int _set_put_strndup_full(Set **s, const struct hash_ops *hash_ops, const char *p, size_t n HASHMAP_DEBUG_PARAMS) { + char *c; + int r; + + assert(s); + assert(p); + + r = _set_ensure_allocated(s, hash_ops HASHMAP_DEBUG_PASS_ARGS); + if (r < 0) + return r; + + if (n == SIZE_MAX) { + if (set_contains(*s, (char*) p)) + return 0; + + c = strdup(p); + } else + c = strndup(p, n); + if (!c) + return -ENOMEM; + + return set_consume(*s, c); +} + +int _set_put_strdupv_full(Set **s, const struct hash_ops *hash_ops, char **l HASHMAP_DEBUG_PARAMS) { + int n = 0, r; + + assert(s); + + STRV_FOREACH(i, l) { + r = _set_put_strndup_full(s, hash_ops, *i, SIZE_MAX HASHMAP_DEBUG_PASS_ARGS); + if (r < 0) + return r; + + n += r; + } + + return n; +} + +int set_put_strsplit(Set *s, const char *v, const char *separators, ExtractFlags flags) { + const char *p = ASSERT_PTR(v); + int r; + + assert(s); + + for (;;) { + char *word; + + r = extract_first_word(&p, &word, separators, flags); + if (r <= 0) + return r; + + r = set_consume(s, word); + if (r < 0) + return r; + } +} + +/* expand the cachemem if needed, return true if newly (re)activated. */ +static int cachemem_maintain(CacheMem *mem, size_t size) { + assert(mem); + + if (!GREEDY_REALLOC(mem->ptr, size)) { + if (size > 0) + return -ENOMEM; + } + + if (!mem->active) { + mem->active = true; + return true; + } + + return false; +} + +int iterated_cache_get(IteratedCache *cache, const void ***res_keys, const void ***res_values, unsigned *res_n_entries) { + bool sync_keys = false, sync_values = false; + size_t size; + int r; + + assert(cache); + assert(cache->hashmap); + + size = n_entries(cache->hashmap); + + if (res_keys) { + r = cachemem_maintain(&cache->keys, size); + if (r < 0) + return r; + + sync_keys = r; + } else + cache->keys.active = false; + + if (res_values) { + r = cachemem_maintain(&cache->values, size); + if (r < 0) + return r; + + sync_values = r; + } else + cache->values.active = false; + + if (cache->hashmap->dirty) { + if (cache->keys.active) + sync_keys = true; + if (cache->values.active) + sync_values = true; + + cache->hashmap->dirty = false; + } + + if (sync_keys || sync_values) { + unsigned i, idx; + Iterator iter; + + i = 0; + HASHMAP_FOREACH_IDX(idx, cache->hashmap, iter) { + struct hashmap_base_entry *e; + + e = bucket_at(cache->hashmap, idx); + + if (sync_keys) + cache->keys.ptr[i] = e->key; + if (sync_values) + cache->values.ptr[i] = entry_value(cache->hashmap, e); + i++; + } + } + + if (res_keys) + *res_keys = cache->keys.ptr; + if (res_values) + *res_values = cache->values.ptr; + if (res_n_entries) + *res_n_entries = size; + + return 0; +} + +IteratedCache* iterated_cache_free(IteratedCache *cache) { + if (cache) { + free(cache->keys.ptr); + free(cache->values.ptr); + } + + return mfree(cache); +} + +int set_strjoin(Set *s, const char *separator, bool wrap_with_separator, char **ret) { + _cleanup_free_ char *str = NULL; + size_t separator_len, len = 0; + const char *value; + bool first; + + assert(ret); + + if (set_isempty(s)) { + *ret = NULL; + return 0; + } + + separator_len = strlen_ptr(separator); + + if (separator_len == 0) + wrap_with_separator = false; + + first = !wrap_with_separator; + + SET_FOREACH(value, s) { + size_t l = strlen_ptr(value); + + if (l == 0) + continue; + + if (!GREEDY_REALLOC(str, len + l + (first ? 0 : separator_len) + (wrap_with_separator ? separator_len : 0) + 1)) + return -ENOMEM; + + if (separator_len > 0 && !first) { + memcpy(str + len, separator, separator_len); + len += separator_len; + } + + memcpy(str + len, value, l); + len += l; + first = false; + } + + if (wrap_with_separator) { + memcpy(str + len, separator, separator_len); + len += separator_len; + } + + str[len] = '\0'; + + *ret = TAKE_PTR(str); + return 0; +} + +bool set_equal(Set *a, Set *b) { + void *p; + + /* Checks whether each entry of 'a' is also in 'b' and vice versa, i.e. the two sets contain the same + * entries */ + + if (a == b) + return true; + + if (set_isempty(a) && set_isempty(b)) + return true; + + if (set_size(a) != set_size(b)) /* Cheap check that hopefully catches a lot of inequality cases + * already */ + return false; + + SET_FOREACH(p, a) + if (!set_contains(b, p)) + return false; + + /* If we have the same hashops, then we don't need to check things backwards given we compared the + * size and that all of a is in b. */ + if (a->b.hash_ops == b->b.hash_ops) + return true; + + SET_FOREACH(p, b) + if (!set_contains(a, p)) + return false; + + return true; +} + +static bool set_fnmatch_one(Set *patterns, const char *needle) { + const char *p; + + assert(needle); + + /* Any failure of fnmatch() is treated as equivalent to FNM_NOMATCH, i.e. as non-matching pattern */ + + SET_FOREACH(p, patterns) + if (fnmatch(p, needle, 0) == 0) + return true; + + return false; +} + +bool set_fnmatch(Set *include_patterns, Set *exclude_patterns, const char *needle) { + assert(needle); + + if (set_fnmatch_one(exclude_patterns, needle)) + return false; + + if (set_isempty(include_patterns)) + return true; + + return set_fnmatch_one(include_patterns, needle); +} diff --git a/src/basic/hashmap.h b/src/basic/hashmap.h new file mode 100644 index 0000000..91b3fe8 --- /dev/null +++ b/src/basic/hashmap.h @@ -0,0 +1,446 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +#include +#include +#include + +#include "hash-funcs.h" +#include "macro.h" +#include "util.h" + +/* + * A hash table implementation. As a minor optimization a NULL hashmap object + * will be treated as empty hashmap for all read operations. That way it is not + * necessary to instantiate an object for each Hashmap use. + * + * If ENABLE_DEBUG_HASHMAP is defined (by configuring with -Ddebug-extra=hashmap), + * the implementation will: + * - store extra data for debugging and statistics (see tools/gdb-sd_dump_hashmaps.py) + * - perform extra checks for invalid use of iterators + */ + +#define HASH_KEY_SIZE 16 + +typedef void* (*hashmap_destroy_t)(void *p); + +/* The base type for all hashmap and set types. Many functions in the implementation take (HashmapBase*) + * parameters and are run-time polymorphic, though the API is not meant to be polymorphic (do not call + * underscore-prefixed functions directly). */ +typedef struct HashmapBase HashmapBase; + +/* Specific hashmap/set types */ +typedef struct Hashmap Hashmap; /* Maps keys to values */ +typedef struct OrderedHashmap OrderedHashmap; /* Like Hashmap, but also remembers entry insertion order */ +typedef struct Set Set; /* Stores just keys */ + +typedef struct IteratedCache IteratedCache; /* Caches the iterated order of one of the above */ + +/* Ideally the Iterator would be an opaque struct, but it is instantiated + * by hashmap users, so the definition has to be here. Do not use its fields + * directly. */ +typedef struct { + unsigned idx; /* index of an entry to be iterated next */ + const void *next_key; /* expected value of that entry's key pointer */ +#if ENABLE_DEBUG_HASHMAP + unsigned put_count; /* hashmap's put_count recorded at start of iteration */ + unsigned rem_count; /* hashmap's rem_count in previous iteration */ + unsigned prev_idx; /* idx in previous iteration */ +#endif +} Iterator; + +#define _IDX_ITERATOR_FIRST (UINT_MAX - 1) +#define ITERATOR_FIRST ((Iterator) { .idx = _IDX_ITERATOR_FIRST, .next_key = NULL }) +#define ITERATOR_IS_FIRST(i) ((i).idx == _IDX_ITERATOR_FIRST) + +/* Macros for type checking */ +#define PTR_COMPATIBLE_WITH_HASHMAP_BASE(h) \ + (__builtin_types_compatible_p(typeof(h), HashmapBase*) || \ + __builtin_types_compatible_p(typeof(h), Hashmap*) || \ + __builtin_types_compatible_p(typeof(h), OrderedHashmap*) || \ + __builtin_types_compatible_p(typeof(h), Set*)) + +#define PTR_COMPATIBLE_WITH_PLAIN_HASHMAP(h) \ + (__builtin_types_compatible_p(typeof(h), Hashmap*) || \ + __builtin_types_compatible_p(typeof(h), OrderedHashmap*)) \ + +#define HASHMAP_BASE(h) \ + __builtin_choose_expr(PTR_COMPATIBLE_WITH_HASHMAP_BASE(h), \ + (HashmapBase*)(h), \ + (void)0) + +#define PLAIN_HASHMAP(h) \ + __builtin_choose_expr(PTR_COMPATIBLE_WITH_PLAIN_HASHMAP(h), \ + (Hashmap*)(h), \ + (void)0) + +#if ENABLE_DEBUG_HASHMAP +# define HASHMAP_DEBUG_PARAMS , const char *func, const char *file, int line +# define HASHMAP_DEBUG_SRC_ARGS , __func__, PROJECT_FILE, __LINE__ +# define HASHMAP_DEBUG_PASS_ARGS , func, file, line +#else +# define HASHMAP_DEBUG_PARAMS +# define HASHMAP_DEBUG_SRC_ARGS +# define HASHMAP_DEBUG_PASS_ARGS +#endif + +Hashmap* _hashmap_new(const struct hash_ops *hash_ops HASHMAP_DEBUG_PARAMS); +OrderedHashmap* _ordered_hashmap_new(const struct hash_ops *hash_ops HASHMAP_DEBUG_PARAMS); +#define hashmap_new(ops) _hashmap_new(ops HASHMAP_DEBUG_SRC_ARGS) +#define ordered_hashmap_new(ops) _ordered_hashmap_new(ops HASHMAP_DEBUG_SRC_ARGS) + +#define hashmap_free_and_replace(a, b) \ + free_and_replace_full(a, b, hashmap_free) + +HashmapBase* _hashmap_free(HashmapBase *h, free_func_t default_free_key, free_func_t default_free_value); +static inline Hashmap* hashmap_free(Hashmap *h) { + return (void*) _hashmap_free(HASHMAP_BASE(h), NULL, NULL); +} +static inline OrderedHashmap* ordered_hashmap_free(OrderedHashmap *h) { + return (void*) _hashmap_free(HASHMAP_BASE(h), NULL, NULL); +} + +static inline Hashmap* hashmap_free_free(Hashmap *h) { + return (void*) _hashmap_free(HASHMAP_BASE(h), NULL, free); +} +static inline OrderedHashmap* ordered_hashmap_free_free(OrderedHashmap *h) { + return (void*) _hashmap_free(HASHMAP_BASE(h), NULL, free); +} + +static inline Hashmap* hashmap_free_free_key(Hashmap *h) { + return (void*) _hashmap_free(HASHMAP_BASE(h), free, NULL); +} +static inline OrderedHashmap* ordered_hashmap_free_free_key(OrderedHashmap *h) { + return (void*) _hashmap_free(HASHMAP_BASE(h), free, NULL); +} + +static inline Hashmap* hashmap_free_free_free(Hashmap *h) { + return (void*) _hashmap_free(HASHMAP_BASE(h), free, free); +} +static inline OrderedHashmap* ordered_hashmap_free_free_free(OrderedHashmap *h) { + return (void*) _hashmap_free(HASHMAP_BASE(h), free, free); +} + +IteratedCache* iterated_cache_free(IteratedCache *cache); +int iterated_cache_get(IteratedCache *cache, const void ***res_keys, const void ***res_values, unsigned *res_n_entries); + +HashmapBase* _hashmap_copy(HashmapBase *h HASHMAP_DEBUG_PARAMS); +#define hashmap_copy(h) ((Hashmap*) _hashmap_copy(HASHMAP_BASE(h) HASHMAP_DEBUG_SRC_ARGS)) +#define ordered_hashmap_copy(h) ((OrderedHashmap*) _hashmap_copy(HASHMAP_BASE(h) HASHMAP_DEBUG_SRC_ARGS)) + +int _hashmap_ensure_allocated(Hashmap **h, const struct hash_ops *hash_ops HASHMAP_DEBUG_PARAMS); +int _hashmap_ensure_put(Hashmap **h, const struct hash_ops *hash_ops, const void *key, void *value HASHMAP_DEBUG_PARAMS); +int _ordered_hashmap_ensure_allocated(OrderedHashmap **h, const struct hash_ops *hash_ops HASHMAP_DEBUG_PARAMS); + +#define hashmap_ensure_allocated(h, ops) _hashmap_ensure_allocated(h, ops HASHMAP_DEBUG_SRC_ARGS) +#define hashmap_ensure_put(s, ops, key, value) _hashmap_ensure_put(s, ops, key, value HASHMAP_DEBUG_SRC_ARGS) +#define ordered_hashmap_ensure_allocated(h, ops) _ordered_hashmap_ensure_allocated(h, ops HASHMAP_DEBUG_SRC_ARGS) + +int _ordered_hashmap_ensure_put(OrderedHashmap **h, const struct hash_ops *hash_ops, const void *key, void *value HASHMAP_DEBUG_PARAMS); +#define ordered_hashmap_ensure_put(s, ops, key, value) _ordered_hashmap_ensure_put(s, ops, key, value HASHMAP_DEBUG_SRC_ARGS) + +IteratedCache* _hashmap_iterated_cache_new(HashmapBase *h); +static inline IteratedCache* hashmap_iterated_cache_new(Hashmap *h) { + return (IteratedCache*) _hashmap_iterated_cache_new(HASHMAP_BASE(h)); +} +static inline IteratedCache* ordered_hashmap_iterated_cache_new(OrderedHashmap *h) { + return (IteratedCache*) _hashmap_iterated_cache_new(HASHMAP_BASE(h)); +} + +int hashmap_put(Hashmap *h, const void *key, void *value); +static inline int ordered_hashmap_put(OrderedHashmap *h, const void *key, void *value) { + return hashmap_put(PLAIN_HASHMAP(h), key, value); +} + +int _hashmap_put_strdup_full(Hashmap **h, const struct hash_ops *hash_ops, const char *k, const char *v HASHMAP_DEBUG_PARAMS); +#define hashmap_put_strdup_full(h, hash_ops, k, v) _hashmap_put_strdup_full(h, hash_ops, k, v HASHMAP_DEBUG_SRC_ARGS) +#define hashmap_put_strdup(h, k, v) hashmap_put_strdup_full(h, &string_hash_ops_free_free, k, v) + +int hashmap_update(Hashmap *h, const void *key, void *value); +static inline int ordered_hashmap_update(OrderedHashmap *h, const void *key, void *value) { + return hashmap_update(PLAIN_HASHMAP(h), key, value); +} + +int hashmap_replace(Hashmap *h, const void *key, void *value); +static inline int ordered_hashmap_replace(OrderedHashmap *h, const void *key, void *value) { + return hashmap_replace(PLAIN_HASHMAP(h), key, value); +} + +void* _hashmap_get(HashmapBase *h, const void *key); +static inline void *hashmap_get(Hashmap *h, const void *key) { + return _hashmap_get(HASHMAP_BASE(h), key); +} +static inline void *ordered_hashmap_get(OrderedHashmap *h, const void *key) { + return _hashmap_get(HASHMAP_BASE(h), key); +} + +void* hashmap_get2(Hashmap *h, const void *key, void **rkey); +static inline void *ordered_hashmap_get2(OrderedHashmap *h, const void *key, void **rkey) { + return hashmap_get2(PLAIN_HASHMAP(h), key, rkey); +} + +bool _hashmap_contains(HashmapBase *h, const void *key); +static inline bool hashmap_contains(Hashmap *h, const void *key) { + return _hashmap_contains(HASHMAP_BASE(h), key); +} +static inline bool ordered_hashmap_contains(OrderedHashmap *h, const void *key) { + return _hashmap_contains(HASHMAP_BASE(h), key); +} + +void* _hashmap_remove(HashmapBase *h, const void *key); +static inline void *hashmap_remove(Hashmap *h, const void *key) { + return _hashmap_remove(HASHMAP_BASE(h), key); +} +static inline void *ordered_hashmap_remove(OrderedHashmap *h, const void *key) { + return _hashmap_remove(HASHMAP_BASE(h), key); +} + +void* hashmap_remove2(Hashmap *h, const void *key, void **rkey); +static inline void *ordered_hashmap_remove2(OrderedHashmap *h, const void *key, void **rkey) { + return hashmap_remove2(PLAIN_HASHMAP(h), key, rkey); +} + +void* _hashmap_remove_value(HashmapBase *h, const void *key, void *value); +static inline void *hashmap_remove_value(Hashmap *h, const void *key, void *value) { + return _hashmap_remove_value(HASHMAP_BASE(h), key, value); +} + +static inline void* ordered_hashmap_remove_value(OrderedHashmap *h, const void *key, void *value) { + return hashmap_remove_value(PLAIN_HASHMAP(h), key, value); +} + +int hashmap_remove_and_put(Hashmap *h, const void *old_key, const void *new_key, void *value); +static inline int ordered_hashmap_remove_and_put(OrderedHashmap *h, const void *old_key, const void *new_key, void *value) { + return hashmap_remove_and_put(PLAIN_HASHMAP(h), old_key, new_key, value); +} + +int hashmap_remove_and_replace(Hashmap *h, const void *old_key, const void *new_key, void *value); +static inline int ordered_hashmap_remove_and_replace(OrderedHashmap *h, const void *old_key, const void *new_key, void *value) { + return hashmap_remove_and_replace(PLAIN_HASHMAP(h), old_key, new_key, value); +} + +/* Since merging data from an OrderedHashmap into a Hashmap or vice-versa + * should just work, allow this by having looser type-checking here. */ +int _hashmap_merge(Hashmap *h, Hashmap *other); +#define hashmap_merge(h, other) _hashmap_merge(PLAIN_HASHMAP(h), PLAIN_HASHMAP(other)) +#define ordered_hashmap_merge(h, other) hashmap_merge(h, other) + +int _hashmap_reserve(HashmapBase *h, unsigned entries_add); +static inline int hashmap_reserve(Hashmap *h, unsigned entries_add) { + return _hashmap_reserve(HASHMAP_BASE(h), entries_add); +} +static inline int ordered_hashmap_reserve(OrderedHashmap *h, unsigned entries_add) { + return _hashmap_reserve(HASHMAP_BASE(h), entries_add); +} + +int _hashmap_move(HashmapBase *h, HashmapBase *other); +/* Unlike hashmap_merge, hashmap_move does not allow mixing the types. */ +static inline int hashmap_move(Hashmap *h, Hashmap *other) { + return _hashmap_move(HASHMAP_BASE(h), HASHMAP_BASE(other)); +} +static inline int ordered_hashmap_move(OrderedHashmap *h, OrderedHashmap *other) { + return _hashmap_move(HASHMAP_BASE(h), HASHMAP_BASE(other)); +} + +int _hashmap_move_one(HashmapBase *h, HashmapBase *other, const void *key); +static inline int hashmap_move_one(Hashmap *h, Hashmap *other, const void *key) { + return _hashmap_move_one(HASHMAP_BASE(h), HASHMAP_BASE(other), key); +} +static inline int ordered_hashmap_move_one(OrderedHashmap *h, OrderedHashmap *other, const void *key) { + return _hashmap_move_one(HASHMAP_BASE(h), HASHMAP_BASE(other), key); +} + +unsigned _hashmap_size(HashmapBase *h) _pure_; +static inline unsigned hashmap_size(Hashmap *h) { + return _hashmap_size(HASHMAP_BASE(h)); +} +static inline unsigned ordered_hashmap_size(OrderedHashmap *h) { + return _hashmap_size(HASHMAP_BASE(h)); +} + +static inline bool hashmap_isempty(Hashmap *h) { + return hashmap_size(h) == 0; +} +static inline bool ordered_hashmap_isempty(OrderedHashmap *h) { + return ordered_hashmap_size(h) == 0; +} + +unsigned _hashmap_buckets(HashmapBase *h) _pure_; +static inline unsigned hashmap_buckets(Hashmap *h) { + return _hashmap_buckets(HASHMAP_BASE(h)); +} +static inline unsigned ordered_hashmap_buckets(OrderedHashmap *h) { + return _hashmap_buckets(HASHMAP_BASE(h)); +} + +bool _hashmap_iterate(HashmapBase *h, Iterator *i, void **value, const void **key); +static inline bool hashmap_iterate(Hashmap *h, Iterator *i, void **value, const void **key) { + return _hashmap_iterate(HASHMAP_BASE(h), i, value, key); +} +static inline bool ordered_hashmap_iterate(OrderedHashmap *h, Iterator *i, void **value, const void **key) { + return _hashmap_iterate(HASHMAP_BASE(h), i, value, key); +} + +void _hashmap_clear(HashmapBase *h, free_func_t default_free_key, free_func_t default_free_value); +static inline void hashmap_clear(Hashmap *h) { + _hashmap_clear(HASHMAP_BASE(h), NULL, NULL); +} +static inline void ordered_hashmap_clear(OrderedHashmap *h) { + _hashmap_clear(HASHMAP_BASE(h), NULL, NULL); +} + +static inline void hashmap_clear_free(Hashmap *h) { + _hashmap_clear(HASHMAP_BASE(h), NULL, free); +} +static inline void ordered_hashmap_clear_free(OrderedHashmap *h) { + _hashmap_clear(HASHMAP_BASE(h), NULL, free); +} + +static inline void hashmap_clear_free_key(Hashmap *h) { + _hashmap_clear(HASHMAP_BASE(h), free, NULL); +} +static inline void ordered_hashmap_clear_free_key(OrderedHashmap *h) { + _hashmap_clear(HASHMAP_BASE(h), free, NULL); +} + +static inline void hashmap_clear_free_free(Hashmap *h) { + _hashmap_clear(HASHMAP_BASE(h), free, free); +} +static inline void ordered_hashmap_clear_free_free(OrderedHashmap *h) { + _hashmap_clear(HASHMAP_BASE(h), free, free); +} + +/* + * Note about all *_first*() functions + * + * For plain Hashmaps and Sets the order of entries is undefined. + * The functions find whatever entry is first in the implementation + * internal order. + * + * Only for OrderedHashmaps the order is well defined and finding + * the first entry is O(1). + */ + +void *_hashmap_first_key_and_value(HashmapBase *h, bool remove, void **ret_key); +static inline void *hashmap_steal_first_key_and_value(Hashmap *h, void **ret) { + return _hashmap_first_key_and_value(HASHMAP_BASE(h), true, ret); +} +static inline void *ordered_hashmap_steal_first_key_and_value(OrderedHashmap *h, void **ret) { + return _hashmap_first_key_and_value(HASHMAP_BASE(h), true, ret); +} +static inline void *hashmap_first_key_and_value(Hashmap *h, void **ret) { + return _hashmap_first_key_and_value(HASHMAP_BASE(h), false, ret); +} +static inline void *ordered_hashmap_first_key_and_value(OrderedHashmap *h, void **ret) { + return _hashmap_first_key_and_value(HASHMAP_BASE(h), false, ret); +} + +static inline void *hashmap_steal_first(Hashmap *h) { + return _hashmap_first_key_and_value(HASHMAP_BASE(h), true, NULL); +} +static inline void *ordered_hashmap_steal_first(OrderedHashmap *h) { + return _hashmap_first_key_and_value(HASHMAP_BASE(h), true, NULL); +} +static inline void *hashmap_first(Hashmap *h) { + return _hashmap_first_key_and_value(HASHMAP_BASE(h), false, NULL); +} +static inline void *ordered_hashmap_first(OrderedHashmap *h) { + return _hashmap_first_key_and_value(HASHMAP_BASE(h), false, NULL); +} + +static inline void *_hashmap_first_key(HashmapBase *h, bool remove) { + void *key = NULL; + + (void) _hashmap_first_key_and_value(HASHMAP_BASE(h), remove, &key); + return key; +} +static inline void *hashmap_steal_first_key(Hashmap *h) { + return _hashmap_first_key(HASHMAP_BASE(h), true); +} +static inline void *ordered_hashmap_steal_first_key(OrderedHashmap *h) { + return _hashmap_first_key(HASHMAP_BASE(h), true); +} +static inline void *hashmap_first_key(Hashmap *h) { + return _hashmap_first_key(HASHMAP_BASE(h), false); +} +static inline void *ordered_hashmap_first_key(OrderedHashmap *h) { + return _hashmap_first_key(HASHMAP_BASE(h), false); +} + +#define hashmap_clear_with_destructor(h, f) \ + ({ \ + Hashmap *_h = (h); \ + void *_item; \ + while ((_item = hashmap_steal_first(_h))) \ + f(_item); \ + _h; \ + }) +#define hashmap_free_with_destructor(h, f) \ + hashmap_free(hashmap_clear_with_destructor(h, f)) +#define ordered_hashmap_clear_with_destructor(h, f) \ + ({ \ + OrderedHashmap *_h = (h); \ + void *_item; \ + while ((_item = ordered_hashmap_steal_first(_h))) \ + f(_item); \ + _h; \ + }) +#define ordered_hashmap_free_with_destructor(h, f) \ + ordered_hashmap_free(ordered_hashmap_clear_with_destructor(h, f)) + +/* no hashmap_next */ +void* ordered_hashmap_next(OrderedHashmap *h, const void *key); + +char** _hashmap_get_strv(HashmapBase *h); +static inline char** hashmap_get_strv(Hashmap *h) { + return _hashmap_get_strv(HASHMAP_BASE(h)); +} +static inline char** ordered_hashmap_get_strv(OrderedHashmap *h) { + return _hashmap_get_strv(HASHMAP_BASE(h)); +} + +/* + * Hashmaps are iterated in unpredictable order. + * OrderedHashmaps are an exception to this. They are iterated in the order + * the entries were inserted. + * It is safe to remove the current entry. + */ +#define _HASHMAP_FOREACH(e, h, i) \ + for (Iterator i = ITERATOR_FIRST; hashmap_iterate((h), &i, (void**)&(e), NULL); ) +#define HASHMAP_FOREACH(e, h) \ + _HASHMAP_FOREACH(e, h, UNIQ_T(i, UNIQ)) + +#define _ORDERED_HASHMAP_FOREACH(e, h, i) \ + for (Iterator i = ITERATOR_FIRST; ordered_hashmap_iterate((h), &i, (void**)&(e), NULL); ) +#define ORDERED_HASHMAP_FOREACH(e, h) \ + _ORDERED_HASHMAP_FOREACH(e, h, UNIQ_T(i, UNIQ)) + +#define _HASHMAP_FOREACH_KEY(e, k, h, i) \ + for (Iterator i = ITERATOR_FIRST; hashmap_iterate((h), &i, (void**)&(e), (const void**) &(k)); ) +#define HASHMAP_FOREACH_KEY(e, k, h) \ + _HASHMAP_FOREACH_KEY(e, k, h, UNIQ_T(i, UNIQ)) + +#define _ORDERED_HASHMAP_FOREACH_KEY(e, k, h, i) \ + for (Iterator i = ITERATOR_FIRST; ordered_hashmap_iterate((h), &i, (void**)&(e), (const void**) &(k)); ) +#define ORDERED_HASHMAP_FOREACH_KEY(e, k, h) \ + _ORDERED_HASHMAP_FOREACH_KEY(e, k, h, UNIQ_T(i, UNIQ)) + +DEFINE_TRIVIAL_CLEANUP_FUNC(Hashmap*, hashmap_free); +DEFINE_TRIVIAL_CLEANUP_FUNC(Hashmap*, hashmap_free_free); +DEFINE_TRIVIAL_CLEANUP_FUNC(Hashmap*, hashmap_free_free_key); +DEFINE_TRIVIAL_CLEANUP_FUNC(Hashmap*, hashmap_free_free_free); +DEFINE_TRIVIAL_CLEANUP_FUNC(OrderedHashmap*, ordered_hashmap_free); +DEFINE_TRIVIAL_CLEANUP_FUNC(OrderedHashmap*, ordered_hashmap_free_free); +DEFINE_TRIVIAL_CLEANUP_FUNC(OrderedHashmap*, ordered_hashmap_free_free_key); +DEFINE_TRIVIAL_CLEANUP_FUNC(OrderedHashmap*, ordered_hashmap_free_free_free); + +#define _cleanup_hashmap_free_ _cleanup_(hashmap_freep) +#define _cleanup_hashmap_free_free_ _cleanup_(hashmap_free_freep) +#define _cleanup_hashmap_free_free_free_ _cleanup_(hashmap_free_free_freep) +#define _cleanup_ordered_hashmap_free_ _cleanup_(ordered_hashmap_freep) +#define _cleanup_ordered_hashmap_free_free_ _cleanup_(ordered_hashmap_free_freep) +#define _cleanup_ordered_hashmap_free_free_free_ _cleanup_(ordered_hashmap_free_free_freep) + +DEFINE_TRIVIAL_CLEANUP_FUNC(IteratedCache*, iterated_cache_free); + +#define _cleanup_iterated_cache_free_ _cleanup_(iterated_cache_freep) diff --git a/src/basic/hexdecoct.c b/src/basic/hexdecoct.c new file mode 100644 index 0000000..0ff8eb3 --- /dev/null +++ b/src/basic/hexdecoct.c @@ -0,0 +1,930 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include +#include +#include +#include + +#include "alloc-util.h" +#include "hexdecoct.h" +#include "macro.h" +#include "memory-util.h" +#include "string-util.h" + +char octchar(int x) { + return '0' + (x & 7); +} + +int unoctchar(char c) { + + if (c >= '0' && c <= '7') + return c - '0'; + + return -EINVAL; +} + +char decchar(int x) { + return '0' + (x % 10); +} + +int undecchar(char c) { + + if (c >= '0' && c <= '9') + return c - '0'; + + return -EINVAL; +} + +char hexchar(int x) { + static const char table[16] = "0123456789abcdef"; + + return table[x & 15]; +} + +int unhexchar(char c) { + + if (c >= '0' && c <= '9') + return c - '0'; + + if (c >= 'a' && c <= 'f') + return c - 'a' + 10; + + if (c >= 'A' && c <= 'F') + return c - 'A' + 10; + + return -EINVAL; +} + +char *hexmem(const void *p, size_t l) { + const uint8_t *x; + char *r, *z; + + assert(p || l == 0); + + z = r = new(char, l * 2 + 1); + if (!r) + return NULL; + + for (x = p; x && x < (const uint8_t*) p + l; x++) { + *(z++) = hexchar(*x >> 4); + *(z++) = hexchar(*x & 15); + } + + *z = 0; + return r; +} + +static int unhex_next(const char **p, size_t *l) { + int r; + + assert(p); + assert(l); + + /* Find the next non-whitespace character, and decode it. We + * greedily skip all preceding and all following whitespace. */ + + for (;;) { + if (*l == 0) + return -EPIPE; + + if (!strchr(WHITESPACE, **p)) + break; + + /* Skip leading whitespace */ + (*p)++, (*l)--; + } + + r = unhexchar(**p); + if (r < 0) + return r; + + for (;;) { + (*p)++, (*l)--; + + if (*l == 0 || !strchr(WHITESPACE, **p)) + break; + + /* Skip following whitespace */ + } + + return r; +} + +int unhexmem_full(const char *p, size_t l, bool secure, void **ret, size_t *ret_len) { + _cleanup_free_ uint8_t *buf = NULL; + size_t buf_size; + const char *x; + uint8_t *z; + int r; + + assert(p || l == 0); + + if (l == SIZE_MAX) + l = strlen(p); + + /* Note that the calculation of memory size is an upper boundary, as we ignore whitespace while decoding */ + buf_size = (l + 1) / 2 + 1; + buf = malloc(buf_size); + if (!buf) + return -ENOMEM; + + for (x = p, z = buf;;) { + int a, b; + + a = unhex_next(&x, &l); + if (a == -EPIPE) /* End of string */ + break; + if (a < 0) { + r = a; + goto on_failure; + } + + b = unhex_next(&x, &l); + if (b < 0) { + r = b; + goto on_failure; + } + + *(z++) = (uint8_t) a << 4 | (uint8_t) b; + } + + *z = 0; + + if (ret_len) + *ret_len = (size_t) (z - buf); + if (ret) + *ret = TAKE_PTR(buf); + + return 0; + +on_failure: + if (secure) + explicit_bzero_safe(buf, buf_size); + + return r; +} + +/* https://tools.ietf.org/html/rfc4648#section-6 + * Notice that base32hex differs from base32 in the alphabet it uses. + * The distinction is that the base32hex representation preserves the + * order of the underlying data when compared as bytestrings, this is + * useful when representing NSEC3 hashes, as one can then verify the + * order of hashes directly from their representation. */ +char base32hexchar(int x) { + static const char table[32] = "0123456789" + "ABCDEFGHIJKLMNOPQRSTUV"; + + return table[x & 31]; +} + +int unbase32hexchar(char c) { + unsigned offset; + + if (c >= '0' && c <= '9') + return c - '0'; + + offset = '9' - '0' + 1; + + if (c >= 'A' && c <= 'V') + return c - 'A' + offset; + + return -EINVAL; +} + +char *base32hexmem(const void *p, size_t l, bool padding) { + char *r, *z; + const uint8_t *x; + size_t len; + + assert(p || l == 0); + + if (padding) + /* five input bytes makes eight output bytes, padding is added so we must round up */ + len = 8 * (l + 4) / 5; + else { + /* same, but round down as there is no padding */ + len = 8 * l / 5; + + switch (l % 5) { + case 4: + len += 7; + break; + case 3: + len += 5; + break; + case 2: + len += 4; + break; + case 1: + len += 2; + break; + } + } + + z = r = malloc(len + 1); + if (!r) + return NULL; + + for (x = p; x < (const uint8_t*) p + (l / 5) * 5; x += 5) { + /* x[0] == XXXXXXXX; x[1] == YYYYYYYY; x[2] == ZZZZZZZZ + * x[3] == QQQQQQQQ; x[4] == WWWWWWWW */ + *(z++) = base32hexchar(x[0] >> 3); /* 000XXXXX */ + *(z++) = base32hexchar((x[0] & 7) << 2 | x[1] >> 6); /* 000XXXYY */ + *(z++) = base32hexchar((x[1] & 63) >> 1); /* 000YYYYY */ + *(z++) = base32hexchar((x[1] & 1) << 4 | x[2] >> 4); /* 000YZZZZ */ + *(z++) = base32hexchar((x[2] & 15) << 1 | x[3] >> 7); /* 000ZZZZQ */ + *(z++) = base32hexchar((x[3] & 127) >> 2); /* 000QQQQQ */ + *(z++) = base32hexchar((x[3] & 3) << 3 | x[4] >> 5); /* 000QQWWW */ + *(z++) = base32hexchar((x[4] & 31)); /* 000WWWWW */ + } + + switch (l % 5) { + case 4: + *(z++) = base32hexchar(x[0] >> 3); /* 000XXXXX */ + *(z++) = base32hexchar((x[0] & 7) << 2 | x[1] >> 6); /* 000XXXYY */ + *(z++) = base32hexchar((x[1] & 63) >> 1); /* 000YYYYY */ + *(z++) = base32hexchar((x[1] & 1) << 4 | x[2] >> 4); /* 000YZZZZ */ + *(z++) = base32hexchar((x[2] & 15) << 1 | x[3] >> 7); /* 000ZZZZQ */ + *(z++) = base32hexchar((x[3] & 127) >> 2); /* 000QQQQQ */ + *(z++) = base32hexchar((x[3] & 3) << 3); /* 000QQ000 */ + if (padding) + *(z++) = '='; + + break; + + case 3: + *(z++) = base32hexchar(x[0] >> 3); /* 000XXXXX */ + *(z++) = base32hexchar((x[0] & 7) << 2 | x[1] >> 6); /* 000XXXYY */ + *(z++) = base32hexchar((x[1] & 63) >> 1); /* 000YYYYY */ + *(z++) = base32hexchar((x[1] & 1) << 4 | x[2] >> 4); /* 000YZZZZ */ + *(z++) = base32hexchar((x[2] & 15) << 1); /* 000ZZZZ0 */ + if (padding) { + *(z++) = '='; + *(z++) = '='; + *(z++) = '='; + } + + break; + + case 2: + *(z++) = base32hexchar(x[0] >> 3); /* 000XXXXX */ + *(z++) = base32hexchar((x[0] & 7) << 2 | x[1] >> 6); /* 000XXXYY */ + *(z++) = base32hexchar((x[1] & 63) >> 1); /* 000YYYYY */ + *(z++) = base32hexchar((x[1] & 1) << 4); /* 000Y0000 */ + if (padding) { + *(z++) = '='; + *(z++) = '='; + *(z++) = '='; + *(z++) = '='; + } + + break; + + case 1: + *(z++) = base32hexchar(x[0] >> 3); /* 000XXXXX */ + *(z++) = base32hexchar((x[0] & 7) << 2); /* 000XXX00 */ + if (padding) { + *(z++) = '='; + *(z++) = '='; + *(z++) = '='; + *(z++) = '='; + *(z++) = '='; + *(z++) = '='; + } + + break; + } + + *z = 0; + return r; +} + +int unbase32hexmem(const char *p, size_t l, bool padding, void **mem, size_t *_len) { + _cleanup_free_ uint8_t *r = NULL; + int a, b, c, d, e, f, g, h; + uint8_t *z; + const char *x; + size_t len; + unsigned pad = 0; + + assert(p || l == 0); + assert(mem); + assert(_len); + + if (l == SIZE_MAX) + l = strlen(p); + + /* padding ensures any base32hex input has input divisible by 8 */ + if (padding && l % 8 != 0) + return -EINVAL; + + if (padding) { + /* strip the padding */ + while (l > 0 && p[l - 1] == '=' && pad < 7) { + pad++; + l--; + } + } + + /* a group of eight input bytes needs five output bytes, in case of + * padding we need to add some extra bytes */ + len = (l / 8) * 5; + + switch (l % 8) { + case 7: + len += 4; + break; + case 5: + len += 3; + break; + case 4: + len += 2; + break; + case 2: + len += 1; + break; + case 0: + break; + default: + return -EINVAL; + } + + z = r = malloc(len + 1); + if (!r) + return -ENOMEM; + + for (x = p; x < p + (l / 8) * 8; x += 8) { + /* a == 000XXXXX; b == 000YYYYY; c == 000ZZZZZ; d == 000WWWWW + * e == 000SSSSS; f == 000QQQQQ; g == 000VVVVV; h == 000RRRRR */ + a = unbase32hexchar(x[0]); + if (a < 0) + return -EINVAL; + + b = unbase32hexchar(x[1]); + if (b < 0) + return -EINVAL; + + c = unbase32hexchar(x[2]); + if (c < 0) + return -EINVAL; + + d = unbase32hexchar(x[3]); + if (d < 0) + return -EINVAL; + + e = unbase32hexchar(x[4]); + if (e < 0) + return -EINVAL; + + f = unbase32hexchar(x[5]); + if (f < 0) + return -EINVAL; + + g = unbase32hexchar(x[6]); + if (g < 0) + return -EINVAL; + + h = unbase32hexchar(x[7]); + if (h < 0) + return -EINVAL; + + *(z++) = (uint8_t) a << 3 | (uint8_t) b >> 2; /* XXXXXYYY */ + *(z++) = (uint8_t) b << 6 | (uint8_t) c << 1 | (uint8_t) d >> 4; /* YYZZZZZW */ + *(z++) = (uint8_t) d << 4 | (uint8_t) e >> 1; /* WWWWSSSS */ + *(z++) = (uint8_t) e << 7 | (uint8_t) f << 2 | (uint8_t) g >> 3; /* SQQQQQVV */ + *(z++) = (uint8_t) g << 5 | (uint8_t) h; /* VVVRRRRR */ + } + + switch (l % 8) { + case 7: + a = unbase32hexchar(x[0]); + if (a < 0) + return -EINVAL; + + b = unbase32hexchar(x[1]); + if (b < 0) + return -EINVAL; + + c = unbase32hexchar(x[2]); + if (c < 0) + return -EINVAL; + + d = unbase32hexchar(x[3]); + if (d < 0) + return -EINVAL; + + e = unbase32hexchar(x[4]); + if (e < 0) + return -EINVAL; + + f = unbase32hexchar(x[5]); + if (f < 0) + return -EINVAL; + + g = unbase32hexchar(x[6]); + if (g < 0) + return -EINVAL; + + /* g == 000VV000 */ + if (g & 7) + return -EINVAL; + + *(z++) = (uint8_t) a << 3 | (uint8_t) b >> 2; /* XXXXXYYY */ + *(z++) = (uint8_t) b << 6 | (uint8_t) c << 1 | (uint8_t) d >> 4; /* YYZZZZZW */ + *(z++) = (uint8_t) d << 4 | (uint8_t) e >> 1; /* WWWWSSSS */ + *(z++) = (uint8_t) e << 7 | (uint8_t) f << 2 | (uint8_t) g >> 3; /* SQQQQQVV */ + + break; + case 5: + a = unbase32hexchar(x[0]); + if (a < 0) + return -EINVAL; + + b = unbase32hexchar(x[1]); + if (b < 0) + return -EINVAL; + + c = unbase32hexchar(x[2]); + if (c < 0) + return -EINVAL; + + d = unbase32hexchar(x[3]); + if (d < 0) + return -EINVAL; + + e = unbase32hexchar(x[4]); + if (e < 0) + return -EINVAL; + + /* e == 000SSSS0 */ + if (e & 1) + return -EINVAL; + + *(z++) = (uint8_t) a << 3 | (uint8_t) b >> 2; /* XXXXXYYY */ + *(z++) = (uint8_t) b << 6 | (uint8_t) c << 1 | (uint8_t) d >> 4; /* YYZZZZZW */ + *(z++) = (uint8_t) d << 4 | (uint8_t) e >> 1; /* WWWWSSSS */ + + break; + case 4: + a = unbase32hexchar(x[0]); + if (a < 0) + return -EINVAL; + + b = unbase32hexchar(x[1]); + if (b < 0) + return -EINVAL; + + c = unbase32hexchar(x[2]); + if (c < 0) + return -EINVAL; + + d = unbase32hexchar(x[3]); + if (d < 0) + return -EINVAL; + + /* d == 000W0000 */ + if (d & 15) + return -EINVAL; + + *(z++) = (uint8_t) a << 3 | (uint8_t) b >> 2; /* XXXXXYYY */ + *(z++) = (uint8_t) b << 6 | (uint8_t) c << 1 | (uint8_t) d >> 4; /* YYZZZZZW */ + + break; + case 2: + a = unbase32hexchar(x[0]); + if (a < 0) + return -EINVAL; + + b = unbase32hexchar(x[1]); + if (b < 0) + return -EINVAL; + + /* b == 000YYY00 */ + if (b & 3) + return -EINVAL; + + *(z++) = (uint8_t) a << 3 | (uint8_t) b >> 2; /* XXXXXYYY */ + + break; + case 0: + break; + default: + return -EINVAL; + } + + *z = 0; + + *mem = TAKE_PTR(r); + *_len = len; + + return 0; +} + +/* https://tools.ietf.org/html/rfc4648#section-4 */ +char base64char(int x) { + static const char table[64] = "ABCDEFGHIJKLMNOPQRSTUVWXYZ" + "abcdefghijklmnopqrstuvwxyz" + "0123456789+/"; + return table[x & 63]; +} + +/* This is almost base64char(), but not entirely, as it uses the "url and filename safe" alphabet, + * since we don't want "/" appear in interface names (since interfaces appear in sysfs as filenames). + * See section #5 of RFC 4648. */ +char urlsafe_base64char(int x) { + static const char table[64] = "ABCDEFGHIJKLMNOPQRSTUVWXYZ" + "abcdefghijklmnopqrstuvwxyz" + "0123456789-_"; + return table[x & 63]; +} + +int unbase64char(char c) { + unsigned offset; + + if (c >= 'A' && c <= 'Z') + return c - 'A'; + + offset = 'Z' - 'A' + 1; + + if (c >= 'a' && c <= 'z') + return c - 'a' + offset; + + offset += 'z' - 'a' + 1; + + if (c >= '0' && c <= '9') + return c - '0' + offset; + + offset += '9' - '0' + 1; + + if (c == '+') + return offset; + + offset++; + + if (c == '/') + return offset; + + return -EINVAL; +} + +static void maybe_line_break(char **x, char *start, size_t line_break) { + size_t n; + + assert(x); + assert(*x); + assert(start); + assert(*x >= start); + + if (line_break == SIZE_MAX) + return; + + n = *x - start; + + if (n % (line_break + 1) == line_break) + *((*x)++) = '\n'; +} + +ssize_t base64mem_full( + const void *p, + size_t l, + size_t line_break, + char **out) { + + const uint8_t *x; + char *r, *z; + size_t m; + + assert(p || l == 0); + assert(out); + assert(line_break > 0); + + /* three input bytes makes four output bytes, padding is added so we must round up */ + m = 4 * (l + 2) / 3 + 1; + + if (line_break != SIZE_MAX) + m += m / line_break; + + z = r = malloc(m); + if (!r) + return -ENOMEM; + + for (x = p; x && x < (const uint8_t*) p + (l / 3) * 3; x += 3) { + /* x[0] == XXXXXXXX; x[1] == YYYYYYYY; x[2] == ZZZZZZZZ */ + maybe_line_break(&z, r, line_break); + *(z++) = base64char(x[0] >> 2); /* 00XXXXXX */ + maybe_line_break(&z, r, line_break); + *(z++) = base64char((x[0] & 3) << 4 | x[1] >> 4); /* 00XXYYYY */ + maybe_line_break(&z, r, line_break); + *(z++) = base64char((x[1] & 15) << 2 | x[2] >> 6); /* 00YYYYZZ */ + maybe_line_break(&z, r, line_break); + *(z++) = base64char(x[2] & 63); /* 00ZZZZZZ */ + } + + switch (l % 3) { + case 2: + maybe_line_break(&z, r, line_break); + *(z++) = base64char(x[0] >> 2); /* 00XXXXXX */ + maybe_line_break(&z, r, line_break); + *(z++) = base64char((x[0] & 3) << 4 | x[1] >> 4); /* 00XXYYYY */ + maybe_line_break(&z, r, line_break); + *(z++) = base64char((x[1] & 15) << 2); /* 00YYYY00 */ + maybe_line_break(&z, r, line_break); + *(z++) = '='; + + break; + case 1: + maybe_line_break(&z, r, line_break); + *(z++) = base64char(x[0] >> 2); /* 00XXXXXX */ + maybe_line_break(&z, r, line_break); + *(z++) = base64char((x[0] & 3) << 4); /* 00XX0000 */ + maybe_line_break(&z, r, line_break); + *(z++) = '='; + maybe_line_break(&z, r, line_break); + *(z++) = '='; + + break; + } + + *z = 0; + *out = r; + assert(z >= r); /* Let static analyzers know that the answer is non-negative. */ + return z - r; +} + +static ssize_t base64_append_width( + char **prefix, + size_t plen, + char sep, + size_t indent, + const void *p, + size_t l, + size_t width) { + + _cleanup_free_ char *x = NULL; + char *t, *s; + size_t lines; + ssize_t len; + + assert(prefix); + assert(*prefix || plen == 0); + assert(p || l == 0); + + len = base64mem(p, l, &x); + if (len < 0) + return len; + if (len == 0) + return plen; + + lines = DIV_ROUND_UP(len, width); + + if (plen >= SSIZE_MAX - 1 - 1 || + lines > (SSIZE_MAX - plen - 1 - 1) / (indent + width + 1)) + return -ENOMEM; + + t = realloc(*prefix, plen + 1 + 1 + (indent + width + 1) * lines); + if (!t) + return -ENOMEM; + + s = t + plen; + for (size_t line = 0; line < lines; line++) { + size_t act = MIN(width, (size_t) len); + + if (line > 0) + sep = '\n'; + + if (s > t) { + *s++ = sep; + if (sep == '\n') + s = mempset(s, ' ', indent); + } + + s = mempcpy(s, x + width * line, act); + len -= act; + } + assert(len == 0); + + *s = '\0'; + *prefix = t; + return s - t; +} + +ssize_t base64_append( + char **prefix, + size_t plen, + const void *p, + size_t l, + size_t indent, + size_t width) { + + if (plen > width / 2 || plen + indent > width) + /* leave indent on the left, keep last column free */ + return base64_append_width(prefix, plen, '\n', indent, p, l, width - indent); + else + /* leave plen on the left, keep last column free */ + return base64_append_width(prefix, plen, ' ', plen + 1, p, l, width - plen - 1); +} + +static int unbase64_next(const char **p, size_t *l) { + int ret; + + assert(p); + assert(l); + + /* Find the next non-whitespace character, and decode it. If we find padding, we return it as INT_MAX. We + * greedily skip all preceding and all following whitespace. */ + + for (;;) { + if (*l == 0) + return -EPIPE; + + if (!strchr(WHITESPACE, **p)) + break; + + /* Skip leading whitespace */ + (*p)++, (*l)--; + } + + if (**p == '=') + ret = INT_MAX; /* return padding as INT_MAX */ + else { + ret = unbase64char(**p); + if (ret < 0) + return ret; + } + + for (;;) { + (*p)++, (*l)--; + + if (*l == 0) + break; + if (!strchr(WHITESPACE, **p)) + break; + + /* Skip following whitespace */ + } + + return ret; +} + +int unbase64mem_full(const char *p, size_t l, bool secure, void **ret, size_t *ret_size) { + _cleanup_free_ uint8_t *buf = NULL; + const char *x; + uint8_t *z; + size_t len; + int r; + + assert(p || l == 0); + + if (l == SIZE_MAX) + l = strlen(p); + + /* A group of four input bytes needs three output bytes, in case of padding we need to add two or three extra + * bytes. Note that this calculation is an upper boundary, as we ignore whitespace while decoding */ + len = (l / 4) * 3 + (l % 4 != 0 ? (l % 4) - 1 : 0); + + buf = malloc(len + 1); + if (!buf) + return -ENOMEM; + + for (x = p, z = buf;;) { + int a, b, c, d; /* a == 00XXXXXX; b == 00YYYYYY; c == 00ZZZZZZ; d == 00WWWWWW */ + + a = unbase64_next(&x, &l); + if (a == -EPIPE) /* End of string */ + break; + if (a < 0) { + r = a; + goto on_failure; + } + if (a == INT_MAX) { /* Padding is not allowed at the beginning of a 4ch block */ + r = -EINVAL; + goto on_failure; + } + + b = unbase64_next(&x, &l); + if (b < 0) { + r = b; + goto on_failure; + } + if (b == INT_MAX) { /* Padding is not allowed at the second character of a 4ch block either */ + r = -EINVAL; + goto on_failure; + } + + c = unbase64_next(&x, &l); + if (c < 0) { + r = c; + goto on_failure; + } + + d = unbase64_next(&x, &l); + if (d < 0) { + r = d; + goto on_failure; + } + + if (c == INT_MAX) { /* Padding at the third character */ + + if (d != INT_MAX) { /* If the third character is padding, the fourth must be too */ + r = -EINVAL; + goto on_failure; + } + + /* b == 00YY0000 */ + if (b & 15) { + r = -EINVAL; + goto on_failure; + } + + if (l > 0) { /* Trailing rubbish? */ + r = -ENAMETOOLONG; + goto on_failure; + } + + *(z++) = (uint8_t) a << 2 | (uint8_t) (b >> 4); /* XXXXXXYY */ + break; + } + + if (d == INT_MAX) { + /* c == 00ZZZZ00 */ + if (c & 3) { + r = -EINVAL; + goto on_failure; + } + + if (l > 0) { /* Trailing rubbish? */ + r = -ENAMETOOLONG; + goto on_failure; + } + + *(z++) = (uint8_t) a << 2 | (uint8_t) b >> 4; /* XXXXXXYY */ + *(z++) = (uint8_t) b << 4 | (uint8_t) c >> 2; /* YYYYZZZZ */ + break; + } + + *(z++) = (uint8_t) a << 2 | (uint8_t) b >> 4; /* XXXXXXYY */ + *(z++) = (uint8_t) b << 4 | (uint8_t) c >> 2; /* YYYYZZZZ */ + *(z++) = (uint8_t) c << 6 | (uint8_t) d; /* ZZWWWWWW */ + } + + *z = 0; + + if (ret_size) + *ret_size = (size_t) (z - buf); + if (ret) + *ret = TAKE_PTR(buf); + + return 0; + +on_failure: + if (secure) + explicit_bzero_safe(buf, len); + + return r; +} + +void hexdump(FILE *f, const void *p, size_t s) { + const uint8_t *b = p; + unsigned n = 0; + + assert(b || s == 0); + + if (!f) + f = stdout; + + while (s > 0) { + size_t i; + + fprintf(f, "%04x ", n); + + for (i = 0; i < 16; i++) { + + if (i >= s) + fputs(" ", f); + else + fprintf(f, "%02x ", b[i]); + + if (i == 7) + fputc(' ', f); + } + + fputc(' ', f); + + for (i = 0; i < 16; i++) { + + if (i >= s) + fputc(' ', f); + else + fputc(isprint(b[i]) ? (char) b[i] : '.', f); + } + + fputc('\n', f); + + if (s < 16) + break; + + n += 16; + b += 16; + s -= 16; + } +} diff --git a/src/basic/hexdecoct.h b/src/basic/hexdecoct.h new file mode 100644 index 0000000..319b21a --- /dev/null +++ b/src/basic/hexdecoct.h @@ -0,0 +1,53 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +#include +#include +#include +#include + +#include "macro.h" + +char octchar(int x) _const_; +int unoctchar(char c) _const_; + +char decchar(int x) _const_; +int undecchar(char c) _const_; + +char hexchar(int x) _const_; +int unhexchar(char c) _const_; + +char *hexmem(const void *p, size_t l); +int unhexmem_full(const char *p, size_t l, bool secure, void **mem, size_t *len); +static inline int unhexmem(const char *p, size_t l, void **mem, size_t *len) { + return unhexmem_full(p, l, false, mem, len); +} + +char base32hexchar(int x) _const_; +int unbase32hexchar(char c) _const_; + +char base64char(int x) _const_; +char urlsafe_base64char(int x) _const_; +int unbase64char(char c) _const_; + +char *base32hexmem(const void *p, size_t l, bool padding); +int unbase32hexmem(const char *p, size_t l, bool padding, void **mem, size_t *len); + +ssize_t base64mem_full(const void *p, size_t l, size_t line_break, char **ret); +static inline ssize_t base64mem(const void *p, size_t l, char **ret) { + return base64mem_full(p, l, SIZE_MAX, ret); +} + +ssize_t base64_append( + char **prefix, + size_t plen, + const void *p, + size_t l, + size_t margin, + size_t width); +int unbase64mem_full(const char *p, size_t l, bool secure, void **mem, size_t *len); +static inline int unbase64mem(const char *p, size_t l, void **mem, size_t *len) { + return unbase64mem_full(p, l, false, mem, len); +} + +void hexdump(FILE *f, const void *p, size_t s); diff --git a/src/basic/hmac.c b/src/basic/hmac.c new file mode 100644 index 0000000..a5f66d5 --- /dev/null +++ b/src/basic/hmac.c @@ -0,0 +1,60 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include +#include + +#include "hmac.h" +#include "sha256.h" + +#define HMAC_BLOCK_SIZE 64 +#define INNER_PADDING_BYTE 0x36 +#define OUTER_PADDING_BYTE 0x5c + +void hmac_sha256(const void *key, + size_t key_size, + const void *input, + size_t input_size, + uint8_t res[static SHA256_DIGEST_SIZE]) { + + uint8_t inner_padding[HMAC_BLOCK_SIZE] = { }; + uint8_t outer_padding[HMAC_BLOCK_SIZE] = { }; + uint8_t replacement_key[SHA256_DIGEST_SIZE]; + struct sha256_ctx hash; + + assert(key); + assert(key_size > 0); + assert(res); + + /* Implement algorithm as described by FIPS 198. */ + + /* The key needs to be block size length or less, hash it if it's longer. */ + if (key_size > HMAC_BLOCK_SIZE) { + sha256_direct(key, key_size, replacement_key); + key = replacement_key; + key_size = SHA256_DIGEST_SIZE; + } + + /* First, copy the key into the padding arrays. If it's shorter than + * the block size, the arrays are already initialized to 0. */ + memcpy(inner_padding, key, key_size); + memcpy(outer_padding, key, key_size); + + /* Then, XOR the provided key and any padding leftovers with the fixed + * padding bytes as defined in FIPS 198. */ + for (size_t i = 0; i < HMAC_BLOCK_SIZE; i++) { + inner_padding[i] ^= INNER_PADDING_BYTE; + outer_padding[i] ^= OUTER_PADDING_BYTE; + } + + /* First pass: hash the inner padding array and the input. */ + sha256_init_ctx(&hash); + sha256_process_bytes(inner_padding, HMAC_BLOCK_SIZE, &hash); + sha256_process_bytes(input, input_size, &hash); + sha256_finish_ctx(&hash, res); + + /* Second pass: hash the outer padding array and the result of the first pass. */ + sha256_init_ctx(&hash); + sha256_process_bytes(outer_padding, HMAC_BLOCK_SIZE, &hash); + sha256_process_bytes(res, SHA256_DIGEST_SIZE, &hash); + sha256_finish_ctx(&hash, res); +} diff --git a/src/basic/hmac.h b/src/basic/hmac.h new file mode 100644 index 0000000..e58c183 --- /dev/null +++ b/src/basic/hmac.h @@ -0,0 +1,12 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +#include +#include + +#include "sha256.h" + +/* Unoptimized implementation based on FIPS 198. 'res' has to be allocated by + * the caller. Prefer external OpenSSL functions, and use this only when + * linking to OpenSSL is not desirable (eg: libsystemd.so). */ +void hmac_sha256(const void *key, size_t key_size, const void *input, size_t input_size, uint8_t res[static SHA256_DIGEST_SIZE]); diff --git a/src/basic/hostname-util.c b/src/basic/hostname-util.c new file mode 100644 index 0000000..b710f07 --- /dev/null +++ b/src/basic/hostname-util.c @@ -0,0 +1,209 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include +#include +#include +#include +#include +#include + +#include "alloc-util.h" +#include "env-file.h" +#include "hostname-util.h" +#include "os-util.h" +#include "string-util.h" +#include "strv.h" + +char* get_default_hostname(void) { + int r; + + const char *e = secure_getenv("SYSTEMD_DEFAULT_HOSTNAME"); + if (e) { + if (hostname_is_valid(e, 0)) + return strdup(e); + log_debug("Invalid hostname in $SYSTEMD_DEFAULT_HOSTNAME, ignoring: %s", e); + } + + _cleanup_free_ char *f = NULL; + r = parse_os_release(NULL, "DEFAULT_HOSTNAME", &f); + if (r < 0) + log_debug_errno(r, "Failed to parse os-release, ignoring: %m"); + else if (f) { + if (hostname_is_valid(f, 0)) + return TAKE_PTR(f); + log_debug("Invalid hostname in os-release, ignoring: %s", f); + } + + return strdup(FALLBACK_HOSTNAME); +} + +int gethostname_full(GetHostnameFlags flags, char **ret) { + _cleanup_free_ char *buf = NULL, *fallback = NULL; + struct utsname u; + const char *s; + + assert(ret); + + assert_se(uname(&u) >= 0); + + s = u.nodename; + if (isempty(s) || streq(s, "(none)") || + (!FLAGS_SET(flags, GET_HOSTNAME_ALLOW_LOCALHOST) && is_localhost(s)) || + (FLAGS_SET(flags, GET_HOSTNAME_SHORT) && s[0] == '.')) { + if (!FLAGS_SET(flags, GET_HOSTNAME_FALLBACK_DEFAULT)) + return -ENXIO; + + s = fallback = get_default_hostname(); + if (!s) + return -ENOMEM; + + if (FLAGS_SET(flags, GET_HOSTNAME_SHORT) && s[0] == '.') + return -ENXIO; + } + + if (FLAGS_SET(flags, GET_HOSTNAME_SHORT)) + buf = strndup(s, strcspn(s, ".")); + else + buf = strdup(s); + if (!buf) + return -ENOMEM; + + *ret = TAKE_PTR(buf); + return 0; +} + +bool valid_ldh_char(char c) { + /* "LDH" → "Letters, digits, hyphens", as per RFC 5890, Section 2.3.1 */ + + return ascii_isalpha(c) || + ascii_isdigit(c) || + c == '-'; +} + +bool hostname_is_valid(const char *s, ValidHostnameFlags flags) { + unsigned n_dots = 0; + const char *p; + bool dot, hyphen; + + /* Check if s looks like a valid hostname or FQDN. This does not do full DNS validation, but only + * checks if the name is composed of allowed characters and the length is not above the maximum + * allowed by Linux (c.f. dns_name_is_valid()). A trailing dot is allowed if + * VALID_HOSTNAME_TRAILING_DOT flag is set and at least two components are present in the name. Note + * that due to the restricted charset and length this call is substantially more conservative than + * dns_name_is_valid(). Doesn't accept empty hostnames, hostnames with leading dots, and hostnames + * with multiple dots in a sequence. Doesn't allow hyphens at the beginning or end of label. */ + + if (isempty(s)) + return false; + + if (streq(s, ".host")) /* Used by the container logic to denote the "root container" */ + return FLAGS_SET(flags, VALID_HOSTNAME_DOT_HOST); + + for (p = s, dot = hyphen = true; *p; p++) + if (*p == '.') { + if (dot || hyphen) + return false; + + dot = true; + hyphen = false; + n_dots++; + + } else if (*p == '-') { + if (dot) + return false; + + dot = false; + hyphen = true; + + } else { + if (!valid_ldh_char(*p)) + return false; + + dot = false; + hyphen = false; + } + + if (dot && (n_dots < 2 || !FLAGS_SET(flags, VALID_HOSTNAME_TRAILING_DOT))) + return false; + if (hyphen) + return false; + + if (p-s > HOST_NAME_MAX) /* Note that HOST_NAME_MAX is 64 on Linux, but DNS allows domain names up to + * 255 characters */ + return false; + + return true; +} + +char* hostname_cleanup(char *s) { + char *p, *d; + bool dot, hyphen; + + assert(s); + + for (p = s, d = s, dot = hyphen = true; *p && d - s < HOST_NAME_MAX; p++) + if (*p == '.') { + if (dot || hyphen) + continue; + + *(d++) = '.'; + dot = true; + hyphen = false; + + } else if (*p == '-') { + if (dot) + continue; + + *(d++) = '-'; + dot = false; + hyphen = true; + + } else if (valid_ldh_char(*p)) { + *(d++) = *p; + dot = false; + hyphen = false; + } + + if (d > s && IN_SET(d[-1], '-', '.')) + /* The dot can occur at most once, but we might have multiple + * hyphens, hence the loop */ + d--; + *d = 0; + + return s; +} + +bool is_localhost(const char *hostname) { + assert(hostname); + + /* This tries to identify local host and domain names + * described in RFC6761 plus the redhatism of localdomain */ + + return STRCASE_IN_SET( + hostname, + "localhost", + "localhost.", + "localhost.localdomain", + "localhost.localdomain.") || + endswith_no_case(hostname, ".localhost") || + endswith_no_case(hostname, ".localhost.") || + endswith_no_case(hostname, ".localhost.localdomain") || + endswith_no_case(hostname, ".localhost.localdomain."); +} + +int get_pretty_hostname(char **ret) { + _cleanup_free_ char *n = NULL; + int r; + + assert(ret); + + r = parse_env_file(NULL, "/etc/machine-info", "PRETTY_HOSTNAME", &n); + if (r < 0) + return r; + + if (isempty(n)) + return -ENXIO; + + *ret = TAKE_PTR(n); + return 0; +} diff --git a/src/basic/hostname-util.h b/src/basic/hostname-util.h new file mode 100644 index 0000000..a00b852 --- /dev/null +++ b/src/basic/hostname-util.h @@ -0,0 +1,63 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +#include +#include + +#include "macro.h" +#include "strv.h" + +typedef enum GetHostnameFlags { + GET_HOSTNAME_ALLOW_LOCALHOST = 1 << 0, /* accepts "localhost" or friends. */ + GET_HOSTNAME_FALLBACK_DEFAULT = 1 << 1, /* use default hostname if no hostname is set. */ + GET_HOSTNAME_SHORT = 1 << 2, /* kills the FQDN part if present. */ +} GetHostnameFlags; + +int gethostname_full(GetHostnameFlags flags, char **ret); +static inline int gethostname_strict(char **ret) { + return gethostname_full(0, ret); +} + +static inline char* gethostname_malloc(void) { + char *s; + + if (gethostname_full(GET_HOSTNAME_ALLOW_LOCALHOST | GET_HOSTNAME_FALLBACK_DEFAULT, &s) < 0) + return NULL; + + return s; +} + +static inline char* gethostname_short_malloc(void) { + char *s; + + if (gethostname_full(GET_HOSTNAME_ALLOW_LOCALHOST | GET_HOSTNAME_FALLBACK_DEFAULT | GET_HOSTNAME_SHORT, &s) < 0) + return NULL; + + return s; +} + +char* get_default_hostname(void); + +bool valid_ldh_char(char c) _const_; + +typedef enum ValidHostnameFlags { + VALID_HOSTNAME_TRAILING_DOT = 1 << 0, /* Accept trailing dot on multi-label names */ + VALID_HOSTNAME_DOT_HOST = 1 << 1, /* Accept ".host" as valid hostname */ +} ValidHostnameFlags; + +bool hostname_is_valid(const char *s, ValidHostnameFlags flags) _pure_; +char* hostname_cleanup(char *s); + +bool is_localhost(const char *hostname); + +static inline bool is_gateway_hostname(const char *hostname) { + /* This tries to identify the valid syntaxes for the our synthetic "gateway" host. */ + return STRCASE_IN_SET(hostname, "_gateway", "_gateway."); +} + +static inline bool is_outbound_hostname(const char *hostname) { + /* This tries to identify the valid syntaxes for the our synthetic "outbound" host. */ + return STRCASE_IN_SET(hostname, "_outbound", "_outbound."); +} + +int get_pretty_hostname(char **ret); diff --git a/src/basic/in-addr-util.c b/src/basic/in-addr-util.c new file mode 100644 index 0000000..05c729d --- /dev/null +++ b/src/basic/in-addr-util.c @@ -0,0 +1,967 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include +#include +#include +#include +#include +#include +#include + +#include "alloc-util.h" +#include "errno-util.h" +#include "in-addr-util.h" +#include "macro.h" +#include "parse-util.h" +#include "random-util.h" +#include "stdio-util.h" +#include "string-util.h" +#include "strxcpyx.h" +#include "util.h" + +bool in4_addr_is_null(const struct in_addr *a) { + assert(a); + + return a->s_addr == 0; +} + +bool in6_addr_is_null(const struct in6_addr *a) { + assert(a); + + return IN6_IS_ADDR_UNSPECIFIED(a); +} + +int in_addr_is_null(int family, const union in_addr_union *u) { + assert(u); + + if (family == AF_INET) + return in4_addr_is_null(&u->in); + + if (family == AF_INET6) + return in6_addr_is_null(&u->in6); + + return -EAFNOSUPPORT; +} + +bool in4_addr_is_link_local(const struct in_addr *a) { + assert(a); + + return (be32toh(a->s_addr) & UINT32_C(0xFFFF0000)) == (UINT32_C(169) << 24 | UINT32_C(254) << 16); +} + +bool in4_addr_is_link_local_dynamic(const struct in_addr *a) { + assert(a); + + if (!in4_addr_is_link_local(a)) + return false; + + /* 169.254.0.0/24 and 169.254.255.0/24 must not be used for the dynamic IPv4LL assignment. + * See RFC 3927 Section 2.1: + * The IPv4 prefix 169.254/16 is registered with the IANA for this purpose. The first 256 and last + * 256 addresses in the 169.254/16 prefix are reserved for future use and MUST NOT be selected by a + * host using this dynamic configuration mechanism. */ + return !IN_SET(be32toh(a->s_addr) & 0x0000FF00U, 0x0000U, 0xFF00U); +} + +bool in6_addr_is_link_local(const struct in6_addr *a) { + assert(a); + + return IN6_IS_ADDR_LINKLOCAL(a); +} + +int in_addr_is_link_local(int family, const union in_addr_union *u) { + assert(u); + + if (family == AF_INET) + return in4_addr_is_link_local(&u->in); + + if (family == AF_INET6) + return in6_addr_is_link_local(&u->in6); + + return -EAFNOSUPPORT; +} + +bool in6_addr_is_link_local_all_nodes(const struct in6_addr *a) { + assert(a); + + /* ff02::1 */ + return be32toh(a->s6_addr32[0]) == UINT32_C(0xff020000) && + a->s6_addr32[1] == 0 && + a->s6_addr32[2] == 0 && + be32toh(a->s6_addr32[3]) == UINT32_C(0x00000001); +} + +int in_addr_is_multicast(int family, const union in_addr_union *u) { + assert(u); + + if (family == AF_INET) + return IN_MULTICAST(be32toh(u->in.s_addr)); + + if (family == AF_INET6) + return IN6_IS_ADDR_MULTICAST(&u->in6); + + return -EAFNOSUPPORT; +} + +bool in4_addr_is_local_multicast(const struct in_addr *a) { + assert(a); + + return (be32toh(a->s_addr) & UINT32_C(0xffffff00)) == UINT32_C(0xe0000000); +} + +bool in4_addr_is_localhost(const struct in_addr *a) { + assert(a); + + /* All of 127.x.x.x is localhost. */ + return (be32toh(a->s_addr) & UINT32_C(0xFF000000)) == UINT32_C(127) << 24; +} + +bool in4_addr_is_non_local(const struct in_addr *a) { + /* Whether the address is not null and not localhost. + * + * As such, it is suitable to configure as DNS/NTP server from DHCP. */ + return !in4_addr_is_null(a) && + !in4_addr_is_localhost(a); +} + +int in_addr_is_localhost(int family, const union in_addr_union *u) { + assert(u); + + if (family == AF_INET) + return in4_addr_is_localhost(&u->in); + + if (family == AF_INET6) + return IN6_IS_ADDR_LOOPBACK(&u->in6); + + return -EAFNOSUPPORT; +} + +int in_addr_is_localhost_one(int family, const union in_addr_union *u) { + assert(u); + + if (family == AF_INET) + /* 127.0.0.1 */ + return be32toh(u->in.s_addr) == UINT32_C(0x7F000001); + + if (family == AF_INET6) + return IN6_IS_ADDR_LOOPBACK(&u->in6); + + return -EAFNOSUPPORT; +} + +bool in6_addr_is_ipv4_mapped_address(const struct in6_addr *a) { + return a->s6_addr32[0] == 0 && + a->s6_addr32[1] == 0 && + a->s6_addr32[2] == htobe32(UINT32_C(0x0000ffff)); +} + +bool in4_addr_equal(const struct in_addr *a, const struct in_addr *b) { + assert(a); + assert(b); + + return a->s_addr == b->s_addr; +} + +bool in6_addr_equal(const struct in6_addr *a, const struct in6_addr *b) { + assert(a); + assert(b); + + return IN6_ARE_ADDR_EQUAL(a, b); +} + +int in_addr_equal(int family, const union in_addr_union *a, const union in_addr_union *b) { + assert(a); + assert(b); + + if (family == AF_INET) + return in4_addr_equal(&a->in, &b->in); + + if (family == AF_INET6) + return in6_addr_equal(&a->in6, &b->in6); + + return -EAFNOSUPPORT; +} + +int in_addr_prefix_intersect( + int family, + const union in_addr_union *a, + unsigned aprefixlen, + const union in_addr_union *b, + unsigned bprefixlen) { + + unsigned m; + + assert(a); + assert(b); + + /* Checks whether there are any addresses that are in both networks */ + + m = MIN(aprefixlen, bprefixlen); + + if (family == AF_INET) { + uint32_t x, nm; + + x = be32toh(a->in.s_addr ^ b->in.s_addr); + nm = m == 0 ? 0 : 0xFFFFFFFFUL << (32 - m); + + return (x & nm) == 0; + } + + if (family == AF_INET6) { + unsigned i; + + if (m > 128) + m = 128; + + for (i = 0; i < 16; i++) { + uint8_t x, nm; + + x = a->in6.s6_addr[i] ^ b->in6.s6_addr[i]; + + if (m < 8) + nm = 0xFF << (8 - m); + else + nm = 0xFF; + + if ((x & nm) != 0) + return 0; + + if (m > 8) + m -= 8; + else + m = 0; + } + + return 1; + } + + return -EAFNOSUPPORT; +} + +int in_addr_prefix_next(int family, union in_addr_union *u, unsigned prefixlen) { + assert(u); + + /* Increases the network part of an address by one. Returns 0 if that succeeds, or -ERANGE if + * this overflows. */ + + return in_addr_prefix_nth(family, u, prefixlen, 1); +} + +/* + * Calculates the nth prefix of size prefixlen starting from the address denoted by u. + * + * On success 0 will be returned and the calculated prefix will be available in + * u. In case the calculation cannot be performed (invalid prefix length, + * overflows would occur) -ERANGE is returned. If the address family given isn't + * supported -EAFNOSUPPORT will be returned. + * + * Examples: + * - in_addr_prefix_nth(AF_INET, 192.168.0.0, 24, 2), returns 0, writes 192.168.2.0 to u + * - in_addr_prefix_nth(AF_INET, 192.168.0.0, 24, 0), returns 0, no data written + * - in_addr_prefix_nth(AF_INET, 255.255.255.0, 24, 1), returns -ERANGE, no data written + * - in_addr_prefix_nth(AF_INET, 255.255.255.0, 0, 1), returns -ERANGE, no data written + * - in_addr_prefix_nth(AF_INET6, 2001:db8, 64, 0xff00) returns 0, writes 2001:0db8:0000:ff00:: to u + */ +int in_addr_prefix_nth(int family, union in_addr_union *u, unsigned prefixlen, uint64_t nth) { + assert(u); + + if (prefixlen <= 0) + return -ERANGE; + + if (family == AF_INET) { + uint32_t c, n, t; + + if (prefixlen > 32) + return -ERANGE; + + c = be32toh(u->in.s_addr); + + t = nth << (32 - prefixlen); + + /* Check for wrap */ + if (c > UINT32_MAX - t) + return -ERANGE; + + n = c + t; + + n &= UINT32_C(0xFFFFFFFF) << (32 - prefixlen); + u->in.s_addr = htobe32(n); + return 0; + } + + if (family == AF_INET6) { + bool overflow = false; + + if (prefixlen > 128) + return -ERANGE; + + for (unsigned i = 16; i > 0; i--) { + unsigned t, j = i - 1, p = j * 8; + + if (p >= prefixlen) { + u->in6.s6_addr[j] = 0; + continue; + } + + if (prefixlen - p < 8) { + u->in6.s6_addr[j] &= 0xff << (8 - (prefixlen - p)); + t = u->in6.s6_addr[j] + ((nth & 0xff) << (8 - (prefixlen - p))); + nth >>= prefixlen - p; + } else { + t = u->in6.s6_addr[j] + (nth & 0xff) + overflow; + nth >>= 8; + } + + overflow = t > UINT8_MAX; + u->in6.s6_addr[j] = (uint8_t) (t & 0xff); + } + + if (overflow || nth != 0) + return -ERANGE; + + return 0; + } + + return -EAFNOSUPPORT; +} + +int in_addr_random_prefix( + int family, + union in_addr_union *u, + unsigned prefixlen_fixed_part, + unsigned prefixlen) { + + assert(u); + + /* Random network part of an address by one. */ + + if (prefixlen <= 0) + return 0; + + if (family == AF_INET) { + uint32_t c, n; + + if (prefixlen_fixed_part > 32) + prefixlen_fixed_part = 32; + if (prefixlen > 32) + prefixlen = 32; + if (prefixlen_fixed_part >= prefixlen) + return -EINVAL; + + c = be32toh(u->in.s_addr); + c &= ((UINT32_C(1) << prefixlen_fixed_part) - 1) << (32 - prefixlen_fixed_part); + + random_bytes(&n, sizeof(n)); + n &= ((UINT32_C(1) << (prefixlen - prefixlen_fixed_part)) - 1) << (32 - prefixlen); + + u->in.s_addr = htobe32(n | c); + return 1; + } + + if (family == AF_INET6) { + struct in6_addr n; + unsigned i, j; + + if (prefixlen_fixed_part > 128) + prefixlen_fixed_part = 128; + if (prefixlen > 128) + prefixlen = 128; + if (prefixlen_fixed_part >= prefixlen) + return -EINVAL; + + random_bytes(&n, sizeof(n)); + + for (i = 0; i < 16; i++) { + uint8_t mask_fixed_part = 0, mask = 0; + + if (i < (prefixlen_fixed_part + 7) / 8) { + if (i < prefixlen_fixed_part / 8) + mask_fixed_part = 0xffu; + else { + j = prefixlen_fixed_part % 8; + mask_fixed_part = ((UINT8_C(1) << (j + 1)) - 1) << (8 - j); + } + } + + if (i < (prefixlen + 7) / 8) { + if (i < prefixlen / 8) + mask = 0xffu ^ mask_fixed_part; + else { + j = prefixlen % 8; + mask = (((UINT8_C(1) << (j + 1)) - 1) << (8 - j)) ^ mask_fixed_part; + } + } + + u->in6.s6_addr[i] &= mask_fixed_part; + u->in6.s6_addr[i] |= n.s6_addr[i] & mask; + } + + return 1; + } + + return -EAFNOSUPPORT; +} + +int in_addr_prefix_range( + int family, + const union in_addr_union *in, + unsigned prefixlen, + union in_addr_union *ret_start, + union in_addr_union *ret_end) { + + union in_addr_union start, end; + int r; + + assert(in); + + if (!IN_SET(family, AF_INET, AF_INET6)) + return -EAFNOSUPPORT; + + if (ret_start) { + start = *in; + r = in_addr_prefix_nth(family, &start, prefixlen, 0); + if (r < 0) + return r; + } + + if (ret_end) { + end = *in; + r = in_addr_prefix_nth(family, &end, prefixlen, 1); + if (r < 0) + return r; + } + + if (ret_start) + *ret_start = start; + if (ret_end) + *ret_end = end; + + return 0; +} + +int in_addr_to_string(int family, const union in_addr_union *u, char **ret) { + _cleanup_free_ char *x = NULL; + size_t l; + + assert(u); + assert(ret); + + if (family == AF_INET) + l = INET_ADDRSTRLEN; + else if (family == AF_INET6) + l = INET6_ADDRSTRLEN; + else + return -EAFNOSUPPORT; + + x = new(char, l); + if (!x) + return -ENOMEM; + + errno = 0; + if (!typesafe_inet_ntop(family, u, x, l)) + return errno_or_else(EINVAL); + + *ret = TAKE_PTR(x); + return 0; +} + +int in_addr_prefix_to_string( + int family, + const union in_addr_union *u, + unsigned prefixlen, + char *buf, + size_t buf_len) { + + assert(u); + assert(buf); + + if (!IN_SET(family, AF_INET, AF_INET6)) + return -EAFNOSUPPORT; + + errno = 0; + if (!typesafe_inet_ntop(family, u, buf, buf_len)) + return errno_or_else(ENOSPC); + + size_t l = strlen(buf); + if (!snprintf_ok(buf + l, buf_len - l, "/%u", prefixlen)) + return -ENOSPC; + return 0; +} + +int in_addr_port_ifindex_name_to_string(int family, const union in_addr_union *u, uint16_t port, int ifindex, const char *server_name, char **ret) { + _cleanup_free_ char *ip_str = NULL, *x = NULL; + int r; + + assert(IN_SET(family, AF_INET, AF_INET6)); + assert(u); + assert(ret); + + /* Much like in_addr_to_string(), but optionally appends the zone interface index to the address, to properly + * handle IPv6 link-local addresses. */ + + r = in_addr_to_string(family, u, &ip_str); + if (r < 0) + return r; + + if (family == AF_INET6) { + r = in_addr_is_link_local(family, u); + if (r < 0) + return r; + if (r == 0) + ifindex = 0; + } else + ifindex = 0; /* For IPv4 address, ifindex is always ignored. */ + + if (port == 0 && ifindex == 0 && isempty(server_name)) { + *ret = TAKE_PTR(ip_str); + return 0; + } + + const char *separator = isempty(server_name) ? "" : "#"; + server_name = strempty(server_name); + + if (port > 0) { + if (family == AF_INET6) { + if (ifindex > 0) + r = asprintf(&x, "[%s]:%"PRIu16"%%%i%s%s", ip_str, port, ifindex, separator, server_name); + else + r = asprintf(&x, "[%s]:%"PRIu16"%s%s", ip_str, port, separator, server_name); + } else + r = asprintf(&x, "%s:%"PRIu16"%s%s", ip_str, port, separator, server_name); + } else { + if (ifindex > 0) + r = asprintf(&x, "%s%%%i%s%s", ip_str, ifindex, separator, server_name); + else { + x = strjoin(ip_str, separator, server_name); + r = x ? 0 : -ENOMEM; + } + } + if (r < 0) + return -ENOMEM; + + *ret = TAKE_PTR(x); + return 0; +} + +int in_addr_from_string(int family, const char *s, union in_addr_union *ret) { + union in_addr_union buffer; + assert(s); + + if (!IN_SET(family, AF_INET, AF_INET6)) + return -EAFNOSUPPORT; + + errno = 0; + if (inet_pton(family, s, ret ?: &buffer) <= 0) + return errno_or_else(EINVAL); + + return 0; +} + +int in_addr_from_string_auto(const char *s, int *ret_family, union in_addr_union *ret) { + int r; + + assert(s); + + r = in_addr_from_string(AF_INET, s, ret); + if (r >= 0) { + if (ret_family) + *ret_family = AF_INET; + return 0; + } + + r = in_addr_from_string(AF_INET6, s, ret); + if (r >= 0) { + if (ret_family) + *ret_family = AF_INET6; + return 0; + } + + return -EINVAL; +} + +unsigned char in4_addr_netmask_to_prefixlen(const struct in_addr *addr) { + assert(addr); + + return 32U - u32ctz(be32toh(addr->s_addr)); +} + +/* Calculate an IPv4 netmask from prefix length, for example /8 -> 255.0.0.0. */ +struct in_addr* in4_addr_prefixlen_to_netmask(struct in_addr *addr, unsigned char prefixlen) { + assert(addr); + assert(prefixlen <= 32); + + /* Shifting beyond 32 is not defined, handle this specially. */ + if (prefixlen == 0) + addr->s_addr = 0; + else + addr->s_addr = htobe32((0xffffffff << (32 - prefixlen)) & 0xffffffff); + + return addr; +} + +/* Calculate an IPv6 netmask from prefix length, for example /16 -> ffff::. */ +struct in6_addr* in6_addr_prefixlen_to_netmask(struct in6_addr *addr, unsigned char prefixlen) { + assert(addr); + assert(prefixlen <= 128); + + for (unsigned i = 0; i < 16; i++) { + uint8_t mask; + + if (prefixlen >= 8) { + mask = 0xFF; + prefixlen -= 8; + } else if (prefixlen > 0) { + mask = 0xFF << (8 - prefixlen); + prefixlen = 0; + } else { + assert(prefixlen == 0); + mask = 0; + } + + addr->s6_addr[i] = mask; + } + + return addr; +} + +/* Calculate an IPv4 or IPv6 netmask from prefix length, for example /8 -> 255.0.0.0 or /16 -> ffff::. */ +int in_addr_prefixlen_to_netmask(int family, union in_addr_union *addr, unsigned char prefixlen) { + assert(addr); + + switch (family) { + case AF_INET: + in4_addr_prefixlen_to_netmask(&addr->in, prefixlen); + return 0; + case AF_INET6: + in6_addr_prefixlen_to_netmask(&addr->in6, prefixlen); + return 0; + default: + return -EAFNOSUPPORT; + } +} + +int in4_addr_default_prefixlen(const struct in_addr *addr, unsigned char *prefixlen) { + uint8_t msb_octet = *(uint8_t*) addr; + + /* addr may not be aligned, so make sure we only access it byte-wise */ + + assert(addr); + assert(prefixlen); + + if (msb_octet < 128) + /* class A, leading bits: 0 */ + *prefixlen = 8; + else if (msb_octet < 192) + /* class B, leading bits 10 */ + *prefixlen = 16; + else if (msb_octet < 224) + /* class C, leading bits 110 */ + *prefixlen = 24; + else + /* class D or E, no default prefixlen */ + return -ERANGE; + + return 0; +} + +int in4_addr_default_subnet_mask(const struct in_addr *addr, struct in_addr *mask) { + unsigned char prefixlen; + int r; + + assert(addr); + assert(mask); + + r = in4_addr_default_prefixlen(addr, &prefixlen); + if (r < 0) + return r; + + in4_addr_prefixlen_to_netmask(mask, prefixlen); + return 0; +} + +int in4_addr_mask(struct in_addr *addr, unsigned char prefixlen) { + struct in_addr mask; + + assert(addr); + + if (!in4_addr_prefixlen_to_netmask(&mask, prefixlen)) + return -EINVAL; + + addr->s_addr &= mask.s_addr; + return 0; +} + +int in6_addr_mask(struct in6_addr *addr, unsigned char prefixlen) { + unsigned i; + + for (i = 0; i < 16; i++) { + uint8_t mask; + + if (prefixlen >= 8) { + mask = 0xFF; + prefixlen -= 8; + } else if (prefixlen > 0) { + mask = 0xFF << (8 - prefixlen); + prefixlen = 0; + } else { + assert(prefixlen == 0); + mask = 0; + } + + addr->s6_addr[i] &= mask; + } + + return 0; +} + +int in_addr_mask(int family, union in_addr_union *addr, unsigned char prefixlen) { + assert(addr); + + switch (family) { + case AF_INET: + return in4_addr_mask(&addr->in, prefixlen); + case AF_INET6: + return in6_addr_mask(&addr->in6, prefixlen); + default: + return -EAFNOSUPPORT; + } +} + +int in4_addr_prefix_covers( + const struct in_addr *prefix, + unsigned char prefixlen, + const struct in_addr *address) { + + struct in_addr masked_prefix, masked_address; + int r; + + assert(prefix); + assert(address); + + masked_prefix = *prefix; + r = in4_addr_mask(&masked_prefix, prefixlen); + if (r < 0) + return r; + + masked_address = *address; + r = in4_addr_mask(&masked_address, prefixlen); + if (r < 0) + return r; + + return in4_addr_equal(&masked_prefix, &masked_address); +} + +int in6_addr_prefix_covers( + const struct in6_addr *prefix, + unsigned char prefixlen, + const struct in6_addr *address) { + + struct in6_addr masked_prefix, masked_address; + int r; + + assert(prefix); + assert(address); + + masked_prefix = *prefix; + r = in6_addr_mask(&masked_prefix, prefixlen); + if (r < 0) + return r; + + masked_address = *address; + r = in6_addr_mask(&masked_address, prefixlen); + if (r < 0) + return r; + + return in6_addr_equal(&masked_prefix, &masked_address); +} + +int in_addr_prefix_covers( + int family, + const union in_addr_union *prefix, + unsigned char prefixlen, + const union in_addr_union *address) { + + assert(prefix); + assert(address); + + switch (family) { + case AF_INET: + return in4_addr_prefix_covers(&prefix->in, prefixlen, &address->in); + case AF_INET6: + return in6_addr_prefix_covers(&prefix->in6, prefixlen, &address->in6); + default: + return -EAFNOSUPPORT; + } +} + +int in_addr_parse_prefixlen(int family, const char *p, unsigned char *ret) { + uint8_t u; + int r; + + if (!IN_SET(family, AF_INET, AF_INET6)) + return -EAFNOSUPPORT; + + r = safe_atou8(p, &u); + if (r < 0) + return r; + + if (u > FAMILY_ADDRESS_SIZE(family) * 8) + return -ERANGE; + + *ret = u; + return 0; +} + +int in_addr_prefix_from_string( + const char *p, + int family, + union in_addr_union *ret_prefix, + unsigned char *ret_prefixlen) { + + _cleanup_free_ char *str = NULL; + union in_addr_union buffer; + const char *e, *l; + unsigned char k; + int r; + + assert(p); + + if (!IN_SET(family, AF_INET, AF_INET6)) + return -EAFNOSUPPORT; + + e = strchr(p, '/'); + if (e) { + str = strndup(p, e - p); + if (!str) + return -ENOMEM; + + l = str; + } else + l = p; + + r = in_addr_from_string(family, l, &buffer); + if (r < 0) + return r; + + if (e) { + r = in_addr_parse_prefixlen(family, e+1, &k); + if (r < 0) + return r; + } else + k = FAMILY_ADDRESS_SIZE(family) * 8; + + if (ret_prefix) + *ret_prefix = buffer; + if (ret_prefixlen) + *ret_prefixlen = k; + + return 0; +} + +int in_addr_prefix_from_string_auto_internal( + const char *p, + InAddrPrefixLenMode mode, + int *ret_family, + union in_addr_union *ret_prefix, + unsigned char *ret_prefixlen) { + + _cleanup_free_ char *str = NULL; + union in_addr_union buffer; + const char *e, *l; + unsigned char k; + int family, r; + + assert(p); + + e = strchr(p, '/'); + if (e) { + str = strndup(p, e - p); + if (!str) + return -ENOMEM; + + l = str; + } else + l = p; + + r = in_addr_from_string_auto(l, &family, &buffer); + if (r < 0) + return r; + + if (e) { + r = in_addr_parse_prefixlen(family, e+1, &k); + if (r < 0) + return r; + } else + switch (mode) { + case PREFIXLEN_FULL: + k = FAMILY_ADDRESS_SIZE(family) * 8; + break; + case PREFIXLEN_REFUSE: + return -ENOANO; /* To distinguish this error from others. */ + case PREFIXLEN_LEGACY: + if (family == AF_INET) { + r = in4_addr_default_prefixlen(&buffer.in, &k); + if (r < 0) + return r; + } else + k = 0; + break; + default: + assert_not_reached(); + } + + if (ret_family) + *ret_family = family; + if (ret_prefix) + *ret_prefix = buffer; + if (ret_prefixlen) + *ret_prefixlen = k; + + return 0; + +} + +static void in_addr_data_hash_func(const struct in_addr_data *a, struct siphash *state) { + assert(a); + assert(state); + + siphash24_compress(&a->family, sizeof(a->family), state); + siphash24_compress(&a->address, FAMILY_ADDRESS_SIZE(a->family), state); +} + +static int in_addr_data_compare_func(const struct in_addr_data *x, const struct in_addr_data *y) { + int r; + + assert(x); + assert(y); + + r = CMP(x->family, y->family); + if (r != 0) + return r; + + return memcmp(&x->address, &y->address, FAMILY_ADDRESS_SIZE(x->family)); +} + +DEFINE_HASH_OPS(in_addr_data_hash_ops, struct in_addr_data, in_addr_data_hash_func, in_addr_data_compare_func); + +void in6_addr_hash_func(const struct in6_addr *addr, struct siphash *state) { + assert(addr); + assert(state); + + siphash24_compress(addr, sizeof(*addr), state); +} + +int in6_addr_compare_func(const struct in6_addr *a, const struct in6_addr *b) { + assert(a); + assert(b); + + return memcmp(a, b, sizeof(*a)); +} + +DEFINE_HASH_OPS(in6_addr_hash_ops, struct in6_addr, in6_addr_hash_func, in6_addr_compare_func); +DEFINE_HASH_OPS_WITH_KEY_DESTRUCTOR( + in6_addr_hash_ops_free, + struct in6_addr, + in6_addr_hash_func, + in6_addr_compare_func, + free); diff --git a/src/basic/in-addr-util.h b/src/basic/in-addr-util.h new file mode 100644 index 0000000..19fa35f --- /dev/null +++ b/src/basic/in-addr-util.h @@ -0,0 +1,193 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +#include +#include +#include +#include + +#include "hash-funcs.h" +#include "macro.h" +#include "util.h" + +union in_addr_union { + struct in_addr in; + struct in6_addr in6; + uint8_t bytes[CONST_MAX(sizeof(struct in_addr), sizeof(struct in6_addr))]; +}; + +struct in_addr_data { + int family; + union in_addr_union address; +}; + +bool in4_addr_is_null(const struct in_addr *a); +static inline bool in4_addr_is_set(const struct in_addr *a) { + return !in4_addr_is_null(a); +} +bool in6_addr_is_null(const struct in6_addr *a); +static inline bool in6_addr_is_set(const struct in6_addr *a) { + return !in6_addr_is_null(a); +} +int in_addr_is_null(int family, const union in_addr_union *u); +static inline bool in_addr_is_set(int family, const union in_addr_union *u) { + return in_addr_is_null(family, u) == 0; +} +static inline int in_addr_data_is_null(const struct in_addr_data *a) { + assert(a); + return in_addr_is_null(a->family, &a->address); +} +static inline bool in_addr_data_is_set(const struct in_addr_data *a) { + return in_addr_data_is_null(a); +} + +int in_addr_is_multicast(int family, const union in_addr_union *u); + +bool in4_addr_is_link_local(const struct in_addr *a); +bool in4_addr_is_link_local_dynamic(const struct in_addr *a); +bool in6_addr_is_link_local(const struct in6_addr *a); +int in_addr_is_link_local(int family, const union in_addr_union *u); +bool in6_addr_is_link_local_all_nodes(const struct in6_addr *a); + +bool in4_addr_is_localhost(const struct in_addr *a); +int in_addr_is_localhost(int family, const union in_addr_union *u); +int in_addr_is_localhost_one(int family, const union in_addr_union *u); + +bool in4_addr_is_local_multicast(const struct in_addr *a); +bool in4_addr_is_non_local(const struct in_addr *a); +bool in6_addr_is_ipv4_mapped_address(const struct in6_addr *a); + +bool in4_addr_equal(const struct in_addr *a, const struct in_addr *b); +bool in6_addr_equal(const struct in6_addr *a, const struct in6_addr *b); +int in_addr_equal(int family, const union in_addr_union *a, const union in_addr_union *b); +int in_addr_prefix_intersect(int family, const union in_addr_union *a, unsigned aprefixlen, const union in_addr_union *b, unsigned bprefixlen); +int in_addr_prefix_next(int family, union in_addr_union *u, unsigned prefixlen); +int in_addr_prefix_nth(int family, union in_addr_union *u, unsigned prefixlen, uint64_t nth); +int in_addr_random_prefix(int family, union in_addr_union *u, unsigned prefixlen_fixed_part, unsigned prefixlen); +int in_addr_prefix_range( + int family, + const union in_addr_union *in, + unsigned prefixlen, + union in_addr_union *ret_start, + union in_addr_union *ret_end); + +int in_addr_to_string(int family, const union in_addr_union *u, char **ret); +static inline int in6_addr_to_string(const struct in6_addr *u, char **ret) { + return in_addr_to_string(AF_INET6, (const union in_addr_union*) u, ret); +} + +static inline const char* typesafe_inet_ntop(int family, const union in_addr_union *a, char *buf, size_t len) { + return inet_ntop(family, a, buf, len); +} +static inline const char* typesafe_inet_ntop4(const struct in_addr *a, char *buf, size_t len) { + return inet_ntop(AF_INET, a, buf, len); +} +static inline const char* typesafe_inet_ntop6(const struct in6_addr *a, char *buf, size_t len) { + return inet_ntop(AF_INET6, a, buf, len); +} + +/* Note: the lifetime of the compound literal is the immediately surrounding block, + * see C11 §6.5.2.5, and + * https://stackoverflow.com/questions/34880638/compound-literal-lifetime-and-if-blocks */ +#define IN_ADDR_MAX CONST_MAX(INET_ADDRSTRLEN, INET6_ADDRSTRLEN) +#define IN_ADDR_TO_STRING(family, addr) typesafe_inet_ntop(family, addr, (char[IN_ADDR_MAX]){}, IN_ADDR_MAX) +#define IN4_ADDR_TO_STRING(addr) typesafe_inet_ntop4(addr, (char[INET_ADDRSTRLEN]){}, INET_ADDRSTRLEN) +#define IN6_ADDR_TO_STRING(addr) typesafe_inet_ntop6(addr, (char[INET6_ADDRSTRLEN]){}, INET6_ADDRSTRLEN) + +int in_addr_prefix_to_string( + int family, + const union in_addr_union *u, + unsigned prefixlen, + char *buf, + size_t buf_len); + +static inline const char* _in_addr_prefix_to_string( + int family, + const union in_addr_union *u, + unsigned prefixlen, + char *buf, + size_t buf_len) { + /* We assume that this is called with an appropriately sized buffer and can never fail. */ + assert_se(in_addr_prefix_to_string(family, u, prefixlen, buf, buf_len) == 0); + return buf; +} +static inline const char* _in4_addr_prefix_to_string(const struct in_addr *a, unsigned prefixlen, char *buf, size_t buf_len) { + return _in_addr_prefix_to_string(AF_INET, (const union in_addr_union *) a, prefixlen, buf, buf_len); +} +static inline const char* _in6_addr_prefix_to_string(const struct in6_addr *a, unsigned prefixlen, char *buf, size_t buf_len) { + return _in_addr_prefix_to_string(AF_INET6, (const union in_addr_union *) a, prefixlen, buf, buf_len); +} + +#define PREFIX_SUFFIX_MAX (1 + DECIMAL_STR_MAX(unsigned)) +#define IN_ADDR_PREFIX_TO_STRING(family, addr, prefixlen) \ + _in_addr_prefix_to_string(family, addr, prefixlen, (char[IN_ADDR_MAX + PREFIX_SUFFIX_MAX]){}, IN_ADDR_MAX + PREFIX_SUFFIX_MAX) +#define IN4_ADDR_PREFIX_TO_STRING(addr, prefixlen) \ + _in4_addr_prefix_to_string(addr, prefixlen, (char[INET_ADDRSTRLEN + PREFIX_SUFFIX_MAX]){}, INET_ADDRSTRLEN + PREFIX_SUFFIX_MAX) +#define IN6_ADDR_PREFIX_TO_STRING(addr, prefixlen) \ + _in6_addr_prefix_to_string(addr, prefixlen, (char[INET6_ADDRSTRLEN + PREFIX_SUFFIX_MAX]){}, INET6_ADDRSTRLEN + PREFIX_SUFFIX_MAX) + +int in_addr_port_ifindex_name_to_string(int family, const union in_addr_union *u, uint16_t port, int ifindex, const char *server_name, char **ret); +static inline int in_addr_ifindex_to_string(int family, const union in_addr_union *u, int ifindex, char **ret) { + return in_addr_port_ifindex_name_to_string(family, u, 0, ifindex, NULL, ret); +} +static inline int in_addr_port_to_string(int family, const union in_addr_union *u, uint16_t port, char **ret) { + return in_addr_port_ifindex_name_to_string(family, u, port, 0, NULL, ret); +} +int in_addr_from_string(int family, const char *s, union in_addr_union *ret); +int in_addr_from_string_auto(const char *s, int *ret_family, union in_addr_union *ret); + +unsigned char in4_addr_netmask_to_prefixlen(const struct in_addr *addr); +struct in_addr* in4_addr_prefixlen_to_netmask(struct in_addr *addr, unsigned char prefixlen); +struct in6_addr* in6_addr_prefixlen_to_netmask(struct in6_addr *addr, unsigned char prefixlen); +int in_addr_prefixlen_to_netmask(int family, union in_addr_union *addr, unsigned char prefixlen); +int in4_addr_default_prefixlen(const struct in_addr *addr, unsigned char *prefixlen); +int in4_addr_default_subnet_mask(const struct in_addr *addr, struct in_addr *mask); +int in4_addr_mask(struct in_addr *addr, unsigned char prefixlen); +int in6_addr_mask(struct in6_addr *addr, unsigned char prefixlen); +int in_addr_mask(int family, union in_addr_union *addr, unsigned char prefixlen); +int in4_addr_prefix_covers(const struct in_addr *prefix, unsigned char prefixlen, const struct in_addr *address); +int in6_addr_prefix_covers(const struct in6_addr *prefix, unsigned char prefixlen, const struct in6_addr *address); +int in_addr_prefix_covers(int family, const union in_addr_union *prefix, unsigned char prefixlen, const union in_addr_union *address); +int in_addr_parse_prefixlen(int family, const char *p, unsigned char *ret); +int in_addr_prefix_from_string(const char *p, int family, union in_addr_union *ret_prefix, unsigned char *ret_prefixlen); + +typedef enum InAddrPrefixLenMode { + PREFIXLEN_FULL, /* Default to prefixlen of address size, 32 for IPv4 or 128 for IPv6, if not specified. */ + PREFIXLEN_REFUSE, /* Fail with -ENOANO if prefixlen is not specified. */ + PREFIXLEN_LEGACY, /* Default to legacy default prefixlen calculation from address if not specified. */ +} InAddrPrefixLenMode; + +int in_addr_prefix_from_string_auto_internal(const char *p, InAddrPrefixLenMode mode, int *ret_family, union in_addr_union *ret_prefix, unsigned char *ret_prefixlen); +static inline int in_addr_prefix_from_string_auto(const char *p, int *ret_family, union in_addr_union *ret_prefix, unsigned char *ret_prefixlen) { + return in_addr_prefix_from_string_auto_internal(p, PREFIXLEN_FULL, ret_family, ret_prefix, ret_prefixlen); +} + +static inline size_t FAMILY_ADDRESS_SIZE(int family) { + assert(IN_SET(family, AF_INET, AF_INET6)); + return family == AF_INET6 ? 16 : 4; +} + +#define FAMILY_ADDRESS_SIZE_SAFE(f) \ + ({ \ + int _f = (f); \ + _f == AF_INET ? sizeof(struct in_addr) : \ + _f == AF_INET6 ? sizeof(struct in6_addr) : 0; \ + }) + +/* Workaround for clang, explicitly specify the maximum-size element here. + * See also oss-fuzz#11344. */ +#define IN_ADDR_NULL ((union in_addr_union) { .in6 = {} }) + +void in6_addr_hash_func(const struct in6_addr *addr, struct siphash *state); +int in6_addr_compare_func(const struct in6_addr *a, const struct in6_addr *b); + +extern const struct hash_ops in_addr_data_hash_ops; +extern const struct hash_ops in6_addr_hash_ops; +extern const struct hash_ops in6_addr_hash_ops_free; + +#define IPV4_ADDRESS_FMT_STR "%u.%u.%u.%u" +#define IPV4_ADDRESS_FMT_VAL(address) \ + be32toh((address).s_addr) >> 24, \ + (be32toh((address).s_addr) >> 16) & 0xFFu, \ + (be32toh((address).s_addr) >> 8) & 0xFFu, \ + be32toh((address).s_addr) & 0xFFu diff --git a/src/basic/inotify-util.c b/src/basic/inotify-util.c new file mode 100644 index 0000000..6da974d --- /dev/null +++ b/src/basic/inotify-util.c @@ -0,0 +1,41 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include "fd-util.h" +#include "inotify-util.h" +#include "stat-util.h" + +int inotify_add_watch_fd(int fd, int what, uint32_t mask) { + int wd, r; + + /* This is like inotify_add_watch(), except that the file to watch is not referenced by a path, but by an fd */ + wd = inotify_add_watch(fd, FORMAT_PROC_FD_PATH(what), mask); + if (wd < 0) { + if (errno != ENOENT) + return -errno; + + /* Didn't work with ENOENT? If so, then either /proc/ isn't mounted, or the fd is bad */ + r = proc_mounted(); + if (r == 0) + return -ENOSYS; + if (r > 0) + return -EBADF; + + return -ENOENT; /* OK, no clue, let's propagate the original error */ + } + + return wd; +} + +int inotify_add_watch_and_warn(int fd, const char *pathname, uint32_t mask) { + int wd; + + wd = inotify_add_watch(fd, pathname, mask); + if (wd < 0) { + if (errno == ENOSPC) + return log_error_errno(errno, "Failed to add a watch for %s: inotify watch limit reached", pathname); + + return log_error_errno(errno, "Failed to add a watch for %s: %m", pathname); + } + + return wd; +} diff --git a/src/basic/inotify-util.h b/src/basic/inotify-util.h new file mode 100644 index 0000000..61951ff --- /dev/null +++ b/src/basic/inotify-util.h @@ -0,0 +1,38 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +#include +#include +#include +#include + +#include "log.h" + +#define INOTIFY_EVENT_MAX (offsetof(struct inotify_event, name) + NAME_MAX + 1) + +#define _FOREACH_INOTIFY_EVENT(e, buffer, sz, log_level, start, end) \ + for (struct inotify_event \ + *start = &((buffer).ev), \ + *end = (struct inotify_event*) ((uint8_t*) start + (sz)), \ + *e = start; \ + (size_t) ((uint8_t*) end - (uint8_t*) e) >= sizeof(struct inotify_event) && \ + ((size_t) ((uint8_t*) end - (uint8_t*) e) >= sizeof(struct inotify_event) + e->len || \ + (log_full(log_level, "Received invalid inotify event, ignoring."), false)); \ + e = (struct inotify_event*) ((uint8_t*) e + sizeof(struct inotify_event) + e->len)) + +#define _FOREACH_INOTIFY_EVENT_FULL(e, buffer, sz, log_level) \ + _FOREACH_INOTIFY_EVENT(e, buffer, sz, log_level, UNIQ_T(start, UNIQ), UNIQ_T(end, UNIQ)) + +#define FOREACH_INOTIFY_EVENT(e, buffer, sz) \ + _FOREACH_INOTIFY_EVENT_FULL(e, buffer, sz, LOG_DEBUG) + +#define FOREACH_INOTIFY_EVENT_WARN(e, buffer, sz) \ + _FOREACH_INOTIFY_EVENT_FULL(e, buffer, sz, LOG_WARNING) + +union inotify_event_buffer { + struct inotify_event ev; + uint8_t raw[INOTIFY_EVENT_MAX]; +}; + +int inotify_add_watch_fd(int fd, int what, uint32_t mask); +int inotify_add_watch_and_warn(int fd, const char *pathname, uint32_t mask); diff --git a/src/basic/io-util.c b/src/basic/io-util.c new file mode 100644 index 0000000..7bb9e15 --- /dev/null +++ b/src/basic/io-util.c @@ -0,0 +1,345 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include +#include +#include +#include + +#include "io-util.h" +#include "string-util.h" +#include "time-util.h" + +int flush_fd(int fd) { + int count = 0; + + /* Read from the specified file descriptor, until POLLIN is not set anymore, throwing away everything + * read. Note that some file descriptors (notable IP sockets) will trigger POLLIN even when no data can be read + * (due to IP packet checksum mismatches), hence this function is only safe to be non-blocking if the fd used + * was set to non-blocking too. */ + + for (;;) { + char buf[LINE_MAX]; + ssize_t l; + int r; + + r = fd_wait_for_event(fd, POLLIN, 0); + if (r < 0) { + if (r == -EINTR) + continue; + + return r; + } + if (r == 0) + return count; + + l = read(fd, buf, sizeof(buf)); + if (l < 0) { + if (errno == EINTR) + continue; + + if (errno == EAGAIN) + return count; + + return -errno; + } else if (l == 0) + return count; + + count += (int) l; + } +} + +ssize_t loop_read(int fd, void *buf, size_t nbytes, bool do_poll) { + uint8_t *p = ASSERT_PTR(buf); + ssize_t n = 0; + + assert(fd >= 0); + + /* If called with nbytes == 0, let's call read() at least once, to validate the operation */ + + if (nbytes > (size_t) SSIZE_MAX) + return -EINVAL; + + do { + ssize_t k; + + k = read(fd, p, nbytes); + if (k < 0) { + if (errno == EINTR) + continue; + + if (errno == EAGAIN && do_poll) { + + /* We knowingly ignore any return value here, + * and expect that any error/EOF is reported + * via read() */ + + (void) fd_wait_for_event(fd, POLLIN, USEC_INFINITY); + continue; + } + + return n > 0 ? n : -errno; + } + + if (k == 0) + return n; + + assert((size_t) k <= nbytes); + + p += k; + nbytes -= k; + n += k; + } while (nbytes > 0); + + return n; +} + +int loop_read_exact(int fd, void *buf, size_t nbytes, bool do_poll) { + ssize_t n; + + n = loop_read(fd, buf, nbytes, do_poll); + if (n < 0) + return (int) n; + if ((size_t) n != nbytes) + return -EIO; + + return 0; +} + +int loop_write(int fd, const void *buf, size_t nbytes, bool do_poll) { + const uint8_t *p = ASSERT_PTR(buf); + + assert(fd >= 0); + + if (_unlikely_(nbytes > (size_t) SSIZE_MAX)) + return -EINVAL; + + do { + ssize_t k; + + k = write(fd, p, nbytes); + if (k < 0) { + if (errno == EINTR) + continue; + + if (errno == EAGAIN && do_poll) { + /* We knowingly ignore any return value here, + * and expect that any error/EOF is reported + * via write() */ + + (void) fd_wait_for_event(fd, POLLOUT, USEC_INFINITY); + continue; + } + + return -errno; + } + + if (_unlikely_(nbytes > 0 && k == 0)) /* Can't really happen */ + return -EIO; + + assert((size_t) k <= nbytes); + + p += k; + nbytes -= k; + } while (nbytes > 0); + + return 0; +} + +int pipe_eof(int fd) { + int r; + + r = fd_wait_for_event(fd, POLLIN, 0); + if (r <= 0) + return r; + + return !!(r & POLLHUP); +} + +int ppoll_usec(struct pollfd *fds, size_t nfds, usec_t timeout) { + int r; + + assert(fds || nfds == 0); + + if (nfds == 0) + return 0; + + r = ppoll(fds, nfds, timeout == USEC_INFINITY ? NULL : TIMESPEC_STORE(timeout), NULL); + if (r < 0) + return -errno; + if (r == 0) + return 0; + + for (size_t i = 0, n = r; i < nfds && n > 0; i++) { + if (fds[i].revents == 0) + continue; + if (fds[i].revents & POLLNVAL) + return -EBADF; + n--; + } + + return r; +} + +int fd_wait_for_event(int fd, int event, usec_t timeout) { + struct pollfd pollfd = { + .fd = fd, + .events = event, + }; + int r; + + r = ppoll_usec(&pollfd, 1, timeout); + if (r <= 0) + return r; + + return pollfd.revents; +} + +static size_t nul_length(const uint8_t *p, size_t sz) { + size_t n = 0; + + while (sz > 0) { + if (*p != 0) + break; + + n++; + p++; + sz--; + } + + return n; +} + +ssize_t sparse_write(int fd, const void *p, size_t sz, size_t run_length) { + const uint8_t *q, *w, *e; + ssize_t l; + + q = w = p; + e = q + sz; + while (q < e) { + size_t n; + + n = nul_length(q, e - q); + + /* If there are more than the specified run length of + * NUL bytes, or if this is the beginning or the end + * of the buffer, then seek instead of write */ + if ((n > run_length) || + (n > 0 && q == p) || + (n > 0 && q + n >= e)) { + if (q > w) { + l = write(fd, w, q - w); + if (l < 0) + return -errno; + if (l != q -w) + return -EIO; + } + + if (lseek(fd, n, SEEK_CUR) == (off_t) -1) + return -errno; + + q += n; + w = q; + } else if (n > 0) + q += n; + else + q++; + } + + if (q > w) { + l = write(fd, w, q - w); + if (l < 0) + return -errno; + if (l != q - w) + return -EIO; + } + + return q - (const uint8_t*) p; +} + +char* set_iovec_string_field(struct iovec *iovec, size_t *n_iovec, const char *field, const char *value) { + char *x; + + x = strjoin(field, value); + if (x) + iovec[(*n_iovec)++] = IOVEC_MAKE_STRING(x); + return x; +} + +char* set_iovec_string_field_free(struct iovec *iovec, size_t *n_iovec, const char *field, char *value) { + char *x; + + x = set_iovec_string_field(iovec, n_iovec, field, value); + free(value); + return x; +} + +struct iovec_wrapper *iovw_new(void) { + return malloc0(sizeof(struct iovec_wrapper)); +} + +void iovw_free_contents(struct iovec_wrapper *iovw, bool free_vectors) { + if (free_vectors) + for (size_t i = 0; i < iovw->count; i++) + free(iovw->iovec[i].iov_base); + + iovw->iovec = mfree(iovw->iovec); + iovw->count = 0; +} + +struct iovec_wrapper *iovw_free_free(struct iovec_wrapper *iovw) { + iovw_free_contents(iovw, true); + + return mfree(iovw); +} + +struct iovec_wrapper *iovw_free(struct iovec_wrapper *iovw) { + iovw_free_contents(iovw, false); + + return mfree(iovw); +} + +int iovw_put(struct iovec_wrapper *iovw, void *data, size_t len) { + if (iovw->count >= IOV_MAX) + return -E2BIG; + + if (!GREEDY_REALLOC(iovw->iovec, iovw->count + 1)) + return -ENOMEM; + + iovw->iovec[iovw->count++] = IOVEC_MAKE(data, len); + return 0; +} + +int iovw_put_string_field(struct iovec_wrapper *iovw, const char *field, const char *value) { + _cleanup_free_ char *x = NULL; + int r; + + x = strjoin(field, value); + if (!x) + return -ENOMEM; + + r = iovw_put(iovw, x, strlen(x)); + if (r >= 0) + TAKE_PTR(x); + + return r; +} + +int iovw_put_string_field_free(struct iovec_wrapper *iovw, const char *field, char *value) { + _cleanup_free_ _unused_ char *free_ptr = value; + + return iovw_put_string_field(iovw, field, value); +} + +void iovw_rebase(struct iovec_wrapper *iovw, char *old, char *new) { + for (size_t i = 0; i < iovw->count; i++) + iovw->iovec[i].iov_base = (char *)iovw->iovec[i].iov_base - old + new; +} + +size_t iovw_size(struct iovec_wrapper *iovw) { + size_t n = 0; + + for (size_t i = 0; i < iovw->count; i++) + n += iovw->iovec[i].iov_len; + + return n; +} diff --git a/src/basic/io-util.h b/src/basic/io-util.h new file mode 100644 index 0000000..3afb134 --- /dev/null +++ b/src/basic/io-util.h @@ -0,0 +1,107 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +#include +#include +#include +#include +#include +#include + +#include "macro.h" +#include "time-util.h" + +int flush_fd(int fd); + +ssize_t loop_read(int fd, void *buf, size_t nbytes, bool do_poll); +int loop_read_exact(int fd, void *buf, size_t nbytes, bool do_poll); +int loop_write(int fd, const void *buf, size_t nbytes, bool do_poll); + +int pipe_eof(int fd); + +int ppoll_usec(struct pollfd *fds, size_t nfds, usec_t timeout); +int fd_wait_for_event(int fd, int event, usec_t timeout); + +ssize_t sparse_write(int fd, const void *p, size_t sz, size_t run_length); + +static inline size_t IOVEC_TOTAL_SIZE(const struct iovec *i, size_t n) { + size_t r = 0; + + for (size_t j = 0; j < n; j++) + r += i[j].iov_len; + + return r; +} + +static inline bool IOVEC_INCREMENT(struct iovec *i, size_t n, size_t k) { + /* Returns true if there is nothing else to send (bytes written cover all of the iovec), + * false if there's still work to do. */ + + for (size_t j = 0; j < n; j++) { + size_t sub; + + if (i[j].iov_len == 0) + continue; + if (k == 0) + return false; + + sub = MIN(i[j].iov_len, k); + i[j].iov_len -= sub; + i[j].iov_base = (uint8_t*) i[j].iov_base + sub; + k -= sub; + } + + assert(k == 0); /* Anything else would mean that we wrote more bytes than available, + * or the kernel reported writing more bytes than sent. */ + return true; +} + +static inline bool FILE_SIZE_VALID(uint64_t l) { + /* ftruncate() and friends take an unsigned file size, but actually cannot deal with file sizes larger than + * 2^63 since the kernel internally handles it as signed value. This call allows checking for this early. */ + + return (l >> 63) == 0; +} + +static inline bool FILE_SIZE_VALID_OR_INFINITY(uint64_t l) { + + /* Same as above, but allows one extra value: -1 as indication for infinity. */ + + if (l == UINT64_MAX) + return true; + + return FILE_SIZE_VALID(l); + +} + +#define IOVEC_INIT(base, len) { .iov_base = (base), .iov_len = (len) } +#define IOVEC_MAKE(base, len) (struct iovec) IOVEC_INIT(base, len) +#define IOVEC_INIT_STRING(string) IOVEC_INIT((char*) string, strlen(string)) +#define IOVEC_MAKE_STRING(string) (struct iovec) IOVEC_INIT_STRING(string) + +char* set_iovec_string_field(struct iovec *iovec, size_t *n_iovec, const char *field, const char *value); +char* set_iovec_string_field_free(struct iovec *iovec, size_t *n_iovec, const char *field, char *value); + +struct iovec_wrapper { + struct iovec *iovec; + size_t count; +}; + +struct iovec_wrapper *iovw_new(void); +struct iovec_wrapper *iovw_free(struct iovec_wrapper *iovw); +struct iovec_wrapper *iovw_free_free(struct iovec_wrapper *iovw); +void iovw_free_contents(struct iovec_wrapper *iovw, bool free_vectors); + +int iovw_put(struct iovec_wrapper *iovw, void *data, size_t len); +static inline int iovw_consume(struct iovec_wrapper *iovw, void *data, size_t len) { + /* Move data into iovw or free on error */ + int r = iovw_put(iovw, data, len); + if (r < 0) + free(data); + return r; +} + +int iovw_put_string_field(struct iovec_wrapper *iovw, const char *field, const char *value); +int iovw_put_string_field_free(struct iovec_wrapper *iovw, const char *field, char *value); +void iovw_rebase(struct iovec_wrapper *iovw, char *old, char *new); +size_t iovw_size(struct iovec_wrapper *iovw); diff --git a/src/basic/ioprio-util.c b/src/basic/ioprio-util.c new file mode 100644 index 0000000..b63650b --- /dev/null +++ b/src/basic/ioprio-util.c @@ -0,0 +1,31 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include "ioprio-util.h" +#include "parse-util.h" +#include "string-table.h" + +int ioprio_parse_priority(const char *s, int *ret) { + int i, r; + + assert(s); + assert(ret); + + r = safe_atoi(s, &i); + if (r < 0) + return r; + + if (!ioprio_priority_is_valid(i)) + return -EINVAL; + + *ret = i; + return 0; +} + +static const char *const ioprio_class_table[] = { + [IOPRIO_CLASS_NONE] = "none", + [IOPRIO_CLASS_RT] = "realtime", + [IOPRIO_CLASS_BE] = "best-effort", + [IOPRIO_CLASS_IDLE] = "idle", +}; + +DEFINE_STRING_TABLE_LOOKUP_WITH_FALLBACK(ioprio_class, int, IOPRIO_N_CLASSES); diff --git a/src/basic/ioprio-util.h b/src/basic/ioprio-util.h new file mode 100644 index 0000000..b8c9b7d --- /dev/null +++ b/src/basic/ioprio-util.h @@ -0,0 +1,27 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +#include "macro.h" +#include "missing_ioprio.h" + +int ioprio_class_to_string_alloc(int i, char **s); +int ioprio_class_from_string(const char *s); + +static inline bool ioprio_class_is_valid(int i) { + return IN_SET(i, IOPRIO_CLASS_NONE, IOPRIO_CLASS_RT, IOPRIO_CLASS_BE, IOPRIO_CLASS_IDLE); +} + +static inline bool ioprio_priority_is_valid(int i) { + return i >= 0 && i < IOPRIO_BE_NR; +} + +int ioprio_parse_priority(const char *s, int *ret); + +/* IOPRIO_CLASS_NONE with any prio value is another way to say IOPRIO_CLASS_BE with level 4. Encode that in a + * proper macro. */ +#define IOPRIO_DEFAULT_CLASS_AND_PRIO ioprio_prio_value(IOPRIO_CLASS_BE, 4) + +static inline int ioprio_normalize(int v) { + /* Converts IOPRIO_CLASS_NONE to what it actually means */ + return ioprio_prio_class(v) == IOPRIO_CLASS_NONE ? IOPRIO_DEFAULT_CLASS_AND_PRIO : v; +} diff --git a/src/basic/limits-util.c b/src/basic/limits-util.c new file mode 100644 index 0000000..35cb066 --- /dev/null +++ b/src/basic/limits-util.c @@ -0,0 +1,192 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include + +#include "alloc-util.h" +#include "cgroup-util.h" +#include "limits-util.h" +#include "memory-util.h" +#include "parse-util.h" +#include "process-util.h" +#include "procfs-util.h" +#include "string-util.h" + +uint64_t physical_memory(void) { + _cleanup_free_ char *root = NULL, *value = NULL; + uint64_t mem, lim; + size_t ps; + long sc; + int r; + + /* We return this as uint64_t in case we are running as 32bit process on a 64bit kernel with huge amounts of + * memory. + * + * In order to support containers nicely that have a configured memory limit we'll take the minimum of the + * physically reported amount of memory and the limit configured for the root cgroup, if there is any. */ + + sc = sysconf(_SC_PHYS_PAGES); + assert(sc > 0); + + ps = page_size(); + mem = (uint64_t) sc * (uint64_t) ps; + + r = cg_get_root_path(&root); + if (r < 0) { + log_debug_errno(r, "Failed to determine root cgroup, ignoring cgroup memory limit: %m"); + return mem; + } + + r = cg_all_unified(); + if (r < 0) { + log_debug_errno(r, "Failed to determine root unified mode, ignoring cgroup memory limit: %m"); + return mem; + } + if (r > 0) { + r = cg_get_attribute("memory", root, "memory.max", &value); + if (r == -ENOENT) /* Field does not exist on the system's top-level cgroup, hence don't + * complain. (Note that it might exist on our own root though, if we live + * in a cgroup namespace, hence check anyway instead of not even + * trying.) */ + return mem; + if (r < 0) { + log_debug_errno(r, "Failed to read memory.max cgroup attribute, ignoring cgroup memory limit: %m"); + return mem; + } + + if (streq(value, "max")) + return mem; + } else { + r = cg_get_attribute("memory", root, "memory.limit_in_bytes", &value); + if (r < 0) { + log_debug_errno(r, "Failed to read memory.limit_in_bytes cgroup attribute, ignoring cgroup memory limit: %m"); + return mem; + } + } + + r = safe_atou64(value, &lim); + if (r < 0) { + log_debug_errno(r, "Failed to parse cgroup memory limit '%s', ignoring: %m", value); + return mem; + } + if (lim == UINT64_MAX) + return mem; + + /* Make sure the limit is a multiple of our own page size */ + lim /= ps; + lim *= ps; + + return MIN(mem, lim); +} + +uint64_t physical_memory_scale(uint64_t v, uint64_t max) { + uint64_t p, m, ps; + + /* Shortcut two special cases */ + if (v == 0) + return 0; + if (v == max) + return physical_memory(); + + assert(max > 0); + + /* Returns the physical memory size, multiplied by v divided by max. Returns UINT64_MAX on overflow. On success + * the result is a multiple of the page size (rounds down). */ + + ps = page_size(); + assert(ps > 0); + + p = physical_memory() / ps; + assert(p > 0); + + if (v > UINT64_MAX / p) + return UINT64_MAX; + + m = p * v; + m /= max; + + if (m > UINT64_MAX / ps) + return UINT64_MAX; + + return m * ps; +} + +uint64_t system_tasks_max(void) { + uint64_t a = TASKS_MAX, b = TASKS_MAX, c = TASKS_MAX; + _cleanup_free_ char *root = NULL; + int r; + + /* Determine the maximum number of tasks that may run on this system. We check three sources to + * determine this limit: + * + * a) kernel.threads-max sysctl: the maximum number of tasks (threads) the kernel allows. + * + * This puts a direct limit on the number of concurrent tasks. + * + * b) kernel.pid_max sysctl: the maximum PID value. + * + * This limits the numeric range PIDs can take, and thus indirectly also limits the number of + * concurrent threads. It's primarily a compatibility concept: some crappy old code used a signed + * 16bit type for PIDs, hence the kernel provides a way to ensure the PIDs never go beyond + * INT16_MAX by default. + * + * Also note the weird definition: PIDs assigned will be kept below this value, which means + * the number of tasks that can be created is one lower, as PID 0 is not a valid process ID. + * + * c) pids.max on the root cgroup: the kernel's configured maximum number of tasks. + * + * and then pick the smallest of the three. + * + * By default pid_max is set to much lower values than threads-max, hence the limit people come into + * contact with first, as it's the lowest boundary they need to bump when they want higher number of + * processes. + */ + + r = procfs_get_threads_max(&a); + if (r < 0) + log_debug_errno(r, "Failed to read kernel.threads-max, ignoring: %m"); + + r = procfs_get_pid_max(&b); + if (r < 0) + log_debug_errno(r, "Failed to read kernel.pid_max, ignoring: %m"); + else if (b > 0) + /* Subtract one from pid_max, since PID 0 is not a valid PID */ + b--; + + r = cg_get_root_path(&root); + if (r < 0) + log_debug_errno(r, "Failed to determine cgroup root path, ignoring: %m"); + else { + /* We'll have the "pids.max" attribute on the our root cgroup only if we are in a + * CLONE_NEWCGROUP namespace. On the top-level namespace this attribute is missing, hence + * suppress any message about that */ + r = cg_get_attribute_as_uint64("pids", root, "pids.max", &c); + if (r < 0 && r != -ENODATA) + log_debug_errno(r, "Failed to read pids.max attribute of root cgroup, ignoring: %m"); + } + + return MIN3(a, b, c); +} + +uint64_t system_tasks_max_scale(uint64_t v, uint64_t max) { + uint64_t t, m; + + /* Shortcut two special cases */ + if (v == 0) + return 0; + if (v == max) + return system_tasks_max(); + + assert(max > 0); + + /* Multiply the system's task value by the fraction v/max. Hence, if max==100 this calculates percentages + * relative to the system's maximum number of tasks. Returns UINT64_MAX on overflow. */ + + t = system_tasks_max(); + assert(t > 0); + + if (v > UINT64_MAX / t) /* overflow? */ + return UINT64_MAX; + + m = t * v; + return m / max; +} diff --git a/src/basic/limits-util.h b/src/basic/limits-util.h new file mode 100644 index 0000000..d267fcf --- /dev/null +++ b/src/basic/limits-util.h @@ -0,0 +1,10 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +#include + +uint64_t physical_memory(void); +uint64_t physical_memory_scale(uint64_t v, uint64_t max); + +uint64_t system_tasks_max(void); +uint64_t system_tasks_max_scale(uint64_t v, uint64_t max); diff --git a/src/basic/linux/README b/src/basic/linux/README new file mode 100644 index 0000000..790b4fe --- /dev/null +++ b/src/basic/linux/README @@ -0,0 +1,8 @@ +The files in this directory are copied from current kernel master +(b06ed1e7a2fa9b636f368a9e97c3c8877623f8b2) or WireGuard master +(8416093498ac2c754536dad4757c5d86c9ba8809), and the following +modifications are applied: +- btrfs.h: drop '__user' attributes +- if.h: drop '#include ' and '__user' attributes +- stddef.h: drop '#include ' +- guard linux/fs.h include to avoid conflict with glibc 2.36 diff --git a/src/basic/linux/batman_adv.h b/src/basic/linux/batman_adv.h new file mode 100644 index 0000000..35dc016 --- /dev/null +++ b/src/basic/linux/batman_adv.h @@ -0,0 +1,704 @@ +/* SPDX-License-Identifier: MIT */ +/* Copyright (C) B.A.T.M.A.N. contributors: + * + * Matthias Schiffer + */ + +#ifndef _UAPI_LINUX_BATMAN_ADV_H_ +#define _UAPI_LINUX_BATMAN_ADV_H_ + +#define BATADV_NL_NAME "batadv" + +#define BATADV_NL_MCAST_GROUP_CONFIG "config" +#define BATADV_NL_MCAST_GROUP_TPMETER "tpmeter" + +/** + * enum batadv_tt_client_flags - TT client specific flags + * + * Bits from 0 to 7 are called _remote flags_ because they are sent on the wire. + * Bits from 8 to 15 are called _local flags_ because they are used for local + * computations only. + * + * Bits from 4 to 7 - a subset of remote flags - are ensured to be in sync with + * the other nodes in the network. To achieve this goal these flags are included + * in the TT CRC computation. + */ +enum batadv_tt_client_flags { + /** + * @BATADV_TT_CLIENT_DEL: the client has to be deleted from the table + */ + BATADV_TT_CLIENT_DEL = (1 << 0), + + /** + * @BATADV_TT_CLIENT_ROAM: the client roamed to/from another node and + * the new update telling its new real location has not been + * received/sent yet + */ + BATADV_TT_CLIENT_ROAM = (1 << 1), + + /** + * @BATADV_TT_CLIENT_WIFI: this client is connected through a wifi + * interface. This information is used by the "AP Isolation" feature + */ + BATADV_TT_CLIENT_WIFI = (1 << 4), + + /** + * @BATADV_TT_CLIENT_ISOLA: this client is considered "isolated". This + * information is used by the Extended Isolation feature + */ + BATADV_TT_CLIENT_ISOLA = (1 << 5), + + /** + * @BATADV_TT_CLIENT_NOPURGE: this client should never be removed from + * the table + */ + BATADV_TT_CLIENT_NOPURGE = (1 << 8), + + /** + * @BATADV_TT_CLIENT_NEW: this client has been added to the local table + * but has not been announced yet + */ + BATADV_TT_CLIENT_NEW = (1 << 9), + + /** + * @BATADV_TT_CLIENT_PENDING: this client is marked for removal but it + * is kept in the table for one more originator interval for consistency + * purposes + */ + BATADV_TT_CLIENT_PENDING = (1 << 10), + + /** + * @BATADV_TT_CLIENT_TEMP: this global client has been detected to be + * part of the network but no node has already announced it + */ + BATADV_TT_CLIENT_TEMP = (1 << 11), +}; + +/** + * enum batadv_mcast_flags_priv - Private, own multicast flags + * + * These are internal, multicast related flags. Currently they describe certain + * multicast related attributes of the segment this originator bridges into the + * mesh. + * + * Those attributes are used to determine the public multicast flags this + * originator is going to announce via TT. + * + * For netlink, if BATADV_MCAST_FLAGS_BRIDGED is unset then all querier + * related flags are undefined. + */ +enum batadv_mcast_flags_priv { + /** + * @BATADV_MCAST_FLAGS_BRIDGED: There is a bridge on top of the mesh + * interface. + */ + BATADV_MCAST_FLAGS_BRIDGED = (1 << 0), + + /** + * @BATADV_MCAST_FLAGS_QUERIER_IPV4_EXISTS: Whether an IGMP querier + * exists in the mesh + */ + BATADV_MCAST_FLAGS_QUERIER_IPV4_EXISTS = (1 << 1), + + /** + * @BATADV_MCAST_FLAGS_QUERIER_IPV6_EXISTS: Whether an MLD querier + * exists in the mesh + */ + BATADV_MCAST_FLAGS_QUERIER_IPV6_EXISTS = (1 << 2), + + /** + * @BATADV_MCAST_FLAGS_QUERIER_IPV4_SHADOWING: If an IGMP querier + * exists, whether it is potentially shadowing multicast listeners + * (i.e. querier is behind our own bridge segment) + */ + BATADV_MCAST_FLAGS_QUERIER_IPV4_SHADOWING = (1 << 3), + + /** + * @BATADV_MCAST_FLAGS_QUERIER_IPV6_SHADOWING: If an MLD querier + * exists, whether it is potentially shadowing multicast listeners + * (i.e. querier is behind our own bridge segment) + */ + BATADV_MCAST_FLAGS_QUERIER_IPV6_SHADOWING = (1 << 4), +}; + +/** + * enum batadv_gw_modes - gateway mode of node + */ +enum batadv_gw_modes { + /** @BATADV_GW_MODE_OFF: gw mode disabled */ + BATADV_GW_MODE_OFF, + + /** @BATADV_GW_MODE_CLIENT: send DHCP requests to gw servers */ + BATADV_GW_MODE_CLIENT, + + /** @BATADV_GW_MODE_SERVER: announce itself as gateway server */ + BATADV_GW_MODE_SERVER, +}; + +/** + * enum batadv_nl_attrs - batman-adv netlink attributes + */ +enum batadv_nl_attrs { + /** + * @BATADV_ATTR_UNSPEC: unspecified attribute to catch errors + */ + BATADV_ATTR_UNSPEC, + + /** + * @BATADV_ATTR_VERSION: batman-adv version string + */ + BATADV_ATTR_VERSION, + + /** + * @BATADV_ATTR_ALGO_NAME: name of routing algorithm + */ + BATADV_ATTR_ALGO_NAME, + + /** + * @BATADV_ATTR_MESH_IFINDEX: index of the batman-adv interface + */ + BATADV_ATTR_MESH_IFINDEX, + + /** + * @BATADV_ATTR_MESH_IFNAME: name of the batman-adv interface + */ + BATADV_ATTR_MESH_IFNAME, + + /** + * @BATADV_ATTR_MESH_ADDRESS: mac address of the batman-adv interface + */ + BATADV_ATTR_MESH_ADDRESS, + + /** + * @BATADV_ATTR_HARD_IFINDEX: index of the non-batman-adv interface + */ + BATADV_ATTR_HARD_IFINDEX, + + /** + * @BATADV_ATTR_HARD_IFNAME: name of the non-batman-adv interface + */ + BATADV_ATTR_HARD_IFNAME, + + /** + * @BATADV_ATTR_HARD_ADDRESS: mac address of the non-batman-adv + * interface + */ + BATADV_ATTR_HARD_ADDRESS, + + /** + * @BATADV_ATTR_ORIG_ADDRESS: originator mac address + */ + BATADV_ATTR_ORIG_ADDRESS, + + /** + * @BATADV_ATTR_TPMETER_RESULT: result of run (see + * batadv_tp_meter_status) + */ + BATADV_ATTR_TPMETER_RESULT, + + /** + * @BATADV_ATTR_TPMETER_TEST_TIME: time (msec) the run took + */ + BATADV_ATTR_TPMETER_TEST_TIME, + + /** + * @BATADV_ATTR_TPMETER_BYTES: amount of acked bytes during run + */ + BATADV_ATTR_TPMETER_BYTES, + + /** + * @BATADV_ATTR_TPMETER_COOKIE: session cookie to match tp_meter session + */ + BATADV_ATTR_TPMETER_COOKIE, + + /** + * @BATADV_ATTR_PAD: attribute used for padding for 64-bit alignment + */ + BATADV_ATTR_PAD, + + /** + * @BATADV_ATTR_ACTIVE: Flag indicating if the hard interface is active + */ + BATADV_ATTR_ACTIVE, + + /** + * @BATADV_ATTR_TT_ADDRESS: Client MAC address + */ + BATADV_ATTR_TT_ADDRESS, + + /** + * @BATADV_ATTR_TT_TTVN: Translation table version + */ + BATADV_ATTR_TT_TTVN, + + /** + * @BATADV_ATTR_TT_LAST_TTVN: Previous translation table version + */ + BATADV_ATTR_TT_LAST_TTVN, + + /** + * @BATADV_ATTR_TT_CRC32: CRC32 over translation table + */ + BATADV_ATTR_TT_CRC32, + + /** + * @BATADV_ATTR_TT_VID: VLAN ID + */ + BATADV_ATTR_TT_VID, + + /** + * @BATADV_ATTR_TT_FLAGS: Translation table client flags + */ + BATADV_ATTR_TT_FLAGS, + + /** + * @BATADV_ATTR_FLAG_BEST: Flags indicating entry is the best + */ + BATADV_ATTR_FLAG_BEST, + + /** + * @BATADV_ATTR_LAST_SEEN_MSECS: Time in milliseconds since last seen + */ + BATADV_ATTR_LAST_SEEN_MSECS, + + /** + * @BATADV_ATTR_NEIGH_ADDRESS: Neighbour MAC address + */ + BATADV_ATTR_NEIGH_ADDRESS, + + /** + * @BATADV_ATTR_TQ: TQ to neighbour + */ + BATADV_ATTR_TQ, + + /** + * @BATADV_ATTR_THROUGHPUT: Estimated throughput to Neighbour + */ + BATADV_ATTR_THROUGHPUT, + + /** + * @BATADV_ATTR_BANDWIDTH_UP: Reported uplink bandwidth + */ + BATADV_ATTR_BANDWIDTH_UP, + + /** + * @BATADV_ATTR_BANDWIDTH_DOWN: Reported downlink bandwidth + */ + BATADV_ATTR_BANDWIDTH_DOWN, + + /** + * @BATADV_ATTR_ROUTER: Gateway router MAC address + */ + BATADV_ATTR_ROUTER, + + /** + * @BATADV_ATTR_BLA_OWN: Flag indicating own originator + */ + BATADV_ATTR_BLA_OWN, + + /** + * @BATADV_ATTR_BLA_ADDRESS: Bridge loop avoidance claim MAC address + */ + BATADV_ATTR_BLA_ADDRESS, + + /** + * @BATADV_ATTR_BLA_VID: BLA VLAN ID + */ + BATADV_ATTR_BLA_VID, + + /** + * @BATADV_ATTR_BLA_BACKBONE: BLA gateway originator MAC address + */ + BATADV_ATTR_BLA_BACKBONE, + + /** + * @BATADV_ATTR_BLA_CRC: BLA CRC + */ + BATADV_ATTR_BLA_CRC, + + /** + * @BATADV_ATTR_DAT_CACHE_IP4ADDRESS: Client IPv4 address + */ + BATADV_ATTR_DAT_CACHE_IP4ADDRESS, + + /** + * @BATADV_ATTR_DAT_CACHE_HWADDRESS: Client MAC address + */ + BATADV_ATTR_DAT_CACHE_HWADDRESS, + + /** + * @BATADV_ATTR_DAT_CACHE_VID: VLAN ID + */ + BATADV_ATTR_DAT_CACHE_VID, + + /** + * @BATADV_ATTR_MCAST_FLAGS: Per originator multicast flags + */ + BATADV_ATTR_MCAST_FLAGS, + + /** + * @BATADV_ATTR_MCAST_FLAGS_PRIV: Private, own multicast flags + */ + BATADV_ATTR_MCAST_FLAGS_PRIV, + + /** + * @BATADV_ATTR_VLANID: VLAN id on top of soft interface + */ + BATADV_ATTR_VLANID, + + /** + * @BATADV_ATTR_AGGREGATED_OGMS_ENABLED: whether the batman protocol + * messages of the mesh interface shall be aggregated or not. + */ + BATADV_ATTR_AGGREGATED_OGMS_ENABLED, + + /** + * @BATADV_ATTR_AP_ISOLATION_ENABLED: whether the data traffic going + * from a wireless client to another wireless client will be silently + * dropped. + */ + BATADV_ATTR_AP_ISOLATION_ENABLED, + + /** + * @BATADV_ATTR_ISOLATION_MARK: the isolation mark which is used to + * classify clients as "isolated" by the Extended Isolation feature. + */ + BATADV_ATTR_ISOLATION_MARK, + + /** + * @BATADV_ATTR_ISOLATION_MASK: the isolation (bit)mask which is used to + * classify clients as "isolated" by the Extended Isolation feature. + */ + BATADV_ATTR_ISOLATION_MASK, + + /** + * @BATADV_ATTR_BONDING_ENABLED: whether the data traffic going through + * the mesh will be sent using multiple interfaces at the same time. + */ + BATADV_ATTR_BONDING_ENABLED, + + /** + * @BATADV_ATTR_BRIDGE_LOOP_AVOIDANCE_ENABLED: whether the bridge loop + * avoidance feature is enabled. This feature detects and avoids loops + * between the mesh and devices bridged with the soft interface + */ + BATADV_ATTR_BRIDGE_LOOP_AVOIDANCE_ENABLED, + + /** + * @BATADV_ATTR_DISTRIBUTED_ARP_TABLE_ENABLED: whether the distributed + * arp table feature is enabled. This feature uses a distributed hash + * table to answer ARP requests without flooding the request through + * the whole mesh. + */ + BATADV_ATTR_DISTRIBUTED_ARP_TABLE_ENABLED, + + /** + * @BATADV_ATTR_FRAGMENTATION_ENABLED: whether the data traffic going + * through the mesh will be fragmented or silently discarded if the + * packet size exceeds the outgoing interface MTU. + */ + BATADV_ATTR_FRAGMENTATION_ENABLED, + + /** + * @BATADV_ATTR_GW_BANDWIDTH_DOWN: defines the download bandwidth which + * is propagated by this node if %BATADV_ATTR_GW_BANDWIDTH_MODE was set + * to 'server'. + */ + BATADV_ATTR_GW_BANDWIDTH_DOWN, + + /** + * @BATADV_ATTR_GW_BANDWIDTH_UP: defines the upload bandwidth which + * is propagated by this node if %BATADV_ATTR_GW_BANDWIDTH_MODE was set + * to 'server'. + */ + BATADV_ATTR_GW_BANDWIDTH_UP, + + /** + * @BATADV_ATTR_GW_MODE: defines the state of the gateway features. + * Possible values are specified in enum batadv_gw_modes + */ + BATADV_ATTR_GW_MODE, + + /** + * @BATADV_ATTR_GW_SEL_CLASS: defines the selection criteria this node + * will use to choose a gateway if gw_mode was set to 'client'. + */ + BATADV_ATTR_GW_SEL_CLASS, + + /** + * @BATADV_ATTR_HOP_PENALTY: defines the penalty which will be applied + * to an originator message's tq-field on every hop and/or per + * hard interface + */ + BATADV_ATTR_HOP_PENALTY, + + /** + * @BATADV_ATTR_LOG_LEVEL: bitmask with to define which debug messages + * should be send to the debug log/trace ring buffer + */ + BATADV_ATTR_LOG_LEVEL, + + /** + * @BATADV_ATTR_MULTICAST_FORCEFLOOD_ENABLED: whether multicast + * optimizations should be replaced by simple broadcast-like flooding + * of multicast packets. If set to non-zero then all nodes in the mesh + * are going to use classic flooding for any multicast packet with no + * optimizations. + */ + BATADV_ATTR_MULTICAST_FORCEFLOOD_ENABLED, + + /** + * @BATADV_ATTR_NETWORK_CODING_ENABLED: whether Network Coding (using + * some magic to send fewer wifi packets but still the same content) is + * enabled or not. + */ + BATADV_ATTR_NETWORK_CODING_ENABLED, + + /** + * @BATADV_ATTR_ORIG_INTERVAL: defines the interval in milliseconds in + * which batman sends its protocol messages. + */ + BATADV_ATTR_ORIG_INTERVAL, + + /** + * @BATADV_ATTR_ELP_INTERVAL: defines the interval in milliseconds in + * which batman emits probing packets for neighbor sensing (ELP). + */ + BATADV_ATTR_ELP_INTERVAL, + + /** + * @BATADV_ATTR_THROUGHPUT_OVERRIDE: defines the throughput value to be + * used by B.A.T.M.A.N. V when estimating the link throughput using + * this interface. If the value is set to 0 then batman-adv will try to + * estimate the throughput by itself. + */ + BATADV_ATTR_THROUGHPUT_OVERRIDE, + + /** + * @BATADV_ATTR_MULTICAST_FANOUT: defines the maximum number of packet + * copies that may be generated for a multicast-to-unicast conversion. + * Once this limit is exceeded distribution will fall back to broadcast. + */ + BATADV_ATTR_MULTICAST_FANOUT, + + /* add attributes above here, update the policy in netlink.c */ + + /** + * @__BATADV_ATTR_AFTER_LAST: internal use + */ + __BATADV_ATTR_AFTER_LAST, + + /** + * @NUM_BATADV_ATTR: total number of batadv_nl_attrs available + */ + NUM_BATADV_ATTR = __BATADV_ATTR_AFTER_LAST, + + /** + * @BATADV_ATTR_MAX: highest attribute number currently defined + */ + BATADV_ATTR_MAX = __BATADV_ATTR_AFTER_LAST - 1 +}; + +/** + * enum batadv_nl_commands - supported batman-adv netlink commands + */ +enum batadv_nl_commands { + /** + * @BATADV_CMD_UNSPEC: unspecified command to catch errors + */ + BATADV_CMD_UNSPEC, + + /** + * @BATADV_CMD_GET_MESH: Get attributes from softif/mesh + */ + BATADV_CMD_GET_MESH, + + /** + * @BATADV_CMD_GET_MESH_INFO: Alias for @BATADV_CMD_GET_MESH + */ + BATADV_CMD_GET_MESH_INFO = BATADV_CMD_GET_MESH, + + /** + * @BATADV_CMD_TP_METER: Start a tp meter session + */ + BATADV_CMD_TP_METER, + + /** + * @BATADV_CMD_TP_METER_CANCEL: Cancel a tp meter session + */ + BATADV_CMD_TP_METER_CANCEL, + + /** + * @BATADV_CMD_GET_ROUTING_ALGOS: Query the list of routing algorithms. + */ + BATADV_CMD_GET_ROUTING_ALGOS, + + /** + * @BATADV_CMD_GET_HARDIF: Get attributes from a hardif of the + * current softif + */ + BATADV_CMD_GET_HARDIF, + + /** + * @BATADV_CMD_GET_HARDIFS: Alias for @BATADV_CMD_GET_HARDIF + */ + BATADV_CMD_GET_HARDIFS = BATADV_CMD_GET_HARDIF, + + /** + * @BATADV_CMD_GET_TRANSTABLE_LOCAL: Query list of local translations + */ + BATADV_CMD_GET_TRANSTABLE_LOCAL, + + /** + * @BATADV_CMD_GET_TRANSTABLE_GLOBAL: Query list of global translations + */ + BATADV_CMD_GET_TRANSTABLE_GLOBAL, + + /** + * @BATADV_CMD_GET_ORIGINATORS: Query list of originators + */ + BATADV_CMD_GET_ORIGINATORS, + + /** + * @BATADV_CMD_GET_NEIGHBORS: Query list of neighbours + */ + BATADV_CMD_GET_NEIGHBORS, + + /** + * @BATADV_CMD_GET_GATEWAYS: Query list of gateways + */ + BATADV_CMD_GET_GATEWAYS, + + /** + * @BATADV_CMD_GET_BLA_CLAIM: Query list of bridge loop avoidance claims + */ + BATADV_CMD_GET_BLA_CLAIM, + + /** + * @BATADV_CMD_GET_BLA_BACKBONE: Query list of bridge loop avoidance + * backbones + */ + BATADV_CMD_GET_BLA_BACKBONE, + + /** + * @BATADV_CMD_GET_DAT_CACHE: Query list of DAT cache entries + */ + BATADV_CMD_GET_DAT_CACHE, + + /** + * @BATADV_CMD_GET_MCAST_FLAGS: Query list of multicast flags + */ + BATADV_CMD_GET_MCAST_FLAGS, + + /** + * @BATADV_CMD_SET_MESH: Set attributes for softif/mesh + */ + BATADV_CMD_SET_MESH, + + /** + * @BATADV_CMD_SET_HARDIF: Set attributes for hardif of the + * current softif + */ + BATADV_CMD_SET_HARDIF, + + /** + * @BATADV_CMD_GET_VLAN: Get attributes from a VLAN of the + * current softif + */ + BATADV_CMD_GET_VLAN, + + /** + * @BATADV_CMD_SET_VLAN: Set attributes for VLAN of the + * current softif + */ + BATADV_CMD_SET_VLAN, + + /* add new commands above here */ + + /** + * @__BATADV_CMD_AFTER_LAST: internal use + */ + __BATADV_CMD_AFTER_LAST, + + /** + * @BATADV_CMD_MAX: highest used command number + */ + BATADV_CMD_MAX = __BATADV_CMD_AFTER_LAST - 1 +}; + +/** + * enum batadv_tp_meter_reason - reason of a tp meter test run stop + */ +enum batadv_tp_meter_reason { + /** + * @BATADV_TP_REASON_COMPLETE: sender finished tp run + */ + BATADV_TP_REASON_COMPLETE = 3, + + /** + * @BATADV_TP_REASON_CANCEL: sender was stopped during run + */ + BATADV_TP_REASON_CANCEL = 4, + + /* error status >= 128 */ + + /** + * @BATADV_TP_REASON_DST_UNREACHABLE: receiver could not be reached or + * didn't answer + */ + BATADV_TP_REASON_DST_UNREACHABLE = 128, + + /** + * @BATADV_TP_REASON_RESEND_LIMIT: (unused) sender retry reached limit + */ + BATADV_TP_REASON_RESEND_LIMIT = 129, + + /** + * @BATADV_TP_REASON_ALREADY_ONGOING: test to or from the same node + * already ongoing + */ + BATADV_TP_REASON_ALREADY_ONGOING = 130, + + /** + * @BATADV_TP_REASON_MEMORY_ERROR: test was stopped due to low memory + */ + BATADV_TP_REASON_MEMORY_ERROR = 131, + + /** + * @BATADV_TP_REASON_CANT_SEND: failed to send via outgoing interface + */ + BATADV_TP_REASON_CANT_SEND = 132, + + /** + * @BATADV_TP_REASON_TOO_MANY: too many ongoing sessions + */ + BATADV_TP_REASON_TOO_MANY = 133, +}; + +/** + * enum batadv_ifla_attrs - batman-adv ifla nested attributes + */ +enum batadv_ifla_attrs { + /** + * @IFLA_BATADV_UNSPEC: unspecified attribute which is not parsed by + * rtnetlink + */ + IFLA_BATADV_UNSPEC, + + /** + * @IFLA_BATADV_ALGO_NAME: routing algorithm (name) which should be + * used by the newly registered batadv net_device. + */ + IFLA_BATADV_ALGO_NAME, + + /* add attributes above here, update the policy in soft-interface.c */ + + /** + * @__IFLA_BATADV_MAX: internal use + */ + __IFLA_BATADV_MAX, +}; + +#define IFLA_BATADV_MAX (__IFLA_BATADV_MAX - 1) + +#endif /* _UAPI_LINUX_BATMAN_ADV_H_ */ diff --git a/src/basic/linux/btrfs.h b/src/basic/linux/btrfs.h new file mode 100644 index 0000000..74ed908 --- /dev/null +++ b/src/basic/linux/btrfs.h @@ -0,0 +1,1173 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * Copyright (C) 2007 Oracle. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + */ + +#ifndef _UAPI_LINUX_BTRFS_H +#define _UAPI_LINUX_BTRFS_H + +#ifdef __cplusplus +extern "C" { +#endif + +#include +#include +#if WANT_LINUX_FS_H +#include +#endif + +#define BTRFS_IOCTL_MAGIC 0x94 +#define BTRFS_VOL_NAME_MAX 255 +#define BTRFS_LABEL_SIZE 256 + +/* this should be 4k */ +#define BTRFS_PATH_NAME_MAX 4087 +struct btrfs_ioctl_vol_args { + __s64 fd; + char name[BTRFS_PATH_NAME_MAX + 1]; +}; + +#define BTRFS_DEVICE_PATH_NAME_MAX 1024 +#define BTRFS_SUBVOL_NAME_MAX 4039 + +#ifndef __KERNEL__ +/* Deprecated since 5.7 */ +# define BTRFS_SUBVOL_CREATE_ASYNC (1ULL << 0) +#endif +#define BTRFS_SUBVOL_RDONLY (1ULL << 1) +#define BTRFS_SUBVOL_QGROUP_INHERIT (1ULL << 2) + +#define BTRFS_DEVICE_SPEC_BY_ID (1ULL << 3) + +#define BTRFS_SUBVOL_SPEC_BY_ID (1ULL << 4) + +#define BTRFS_VOL_ARG_V2_FLAGS_SUPPORTED \ + (BTRFS_SUBVOL_RDONLY | \ + BTRFS_SUBVOL_QGROUP_INHERIT | \ + BTRFS_DEVICE_SPEC_BY_ID | \ + BTRFS_SUBVOL_SPEC_BY_ID) + +#define BTRFS_FSID_SIZE 16 +#define BTRFS_UUID_SIZE 16 +#define BTRFS_UUID_UNPARSED_SIZE 37 + +/* + * flags definition for qgroup limits + * + * Used by: + * struct btrfs_qgroup_limit.flags + * struct btrfs_qgroup_limit_item.flags + */ +#define BTRFS_QGROUP_LIMIT_MAX_RFER (1ULL << 0) +#define BTRFS_QGROUP_LIMIT_MAX_EXCL (1ULL << 1) +#define BTRFS_QGROUP_LIMIT_RSV_RFER (1ULL << 2) +#define BTRFS_QGROUP_LIMIT_RSV_EXCL (1ULL << 3) +#define BTRFS_QGROUP_LIMIT_RFER_CMPR (1ULL << 4) +#define BTRFS_QGROUP_LIMIT_EXCL_CMPR (1ULL << 5) + +struct btrfs_qgroup_limit { + __u64 flags; + __u64 max_rfer; + __u64 max_excl; + __u64 rsv_rfer; + __u64 rsv_excl; +}; + +/* + * flags definition for qgroup inheritance + * + * Used by: + * struct btrfs_qgroup_inherit.flags + */ +#define BTRFS_QGROUP_INHERIT_SET_LIMITS (1ULL << 0) + +struct btrfs_qgroup_inherit { + __u64 flags; + __u64 num_qgroups; + __u64 num_ref_copies; + __u64 num_excl_copies; + struct btrfs_qgroup_limit lim; + __u64 qgroups[]; +}; + +struct btrfs_ioctl_qgroup_limit_args { + __u64 qgroupid; + struct btrfs_qgroup_limit lim; +}; + +/* + * Arguments for specification of subvolumes or devices, supporting by-name or + * by-id and flags + * + * The set of supported flags depends on the ioctl + * + * BTRFS_SUBVOL_RDONLY is also provided/consumed by the following ioctls: + * - BTRFS_IOC_SUBVOL_GETFLAGS + * - BTRFS_IOC_SUBVOL_SETFLAGS + */ + +/* Supported flags for BTRFS_IOC_RM_DEV_V2 */ +#define BTRFS_DEVICE_REMOVE_ARGS_MASK \ + (BTRFS_DEVICE_SPEC_BY_ID) + +/* Supported flags for BTRFS_IOC_SNAP_CREATE_V2 and BTRFS_IOC_SUBVOL_CREATE_V2 */ +#define BTRFS_SUBVOL_CREATE_ARGS_MASK \ + (BTRFS_SUBVOL_RDONLY | \ + BTRFS_SUBVOL_QGROUP_INHERIT) + +/* Supported flags for BTRFS_IOC_SNAP_DESTROY_V2 */ +#define BTRFS_SUBVOL_DELETE_ARGS_MASK \ + (BTRFS_SUBVOL_SPEC_BY_ID) + +struct btrfs_ioctl_vol_args_v2 { + __s64 fd; + __u64 transid; + __u64 flags; + union { + struct { + __u64 size; + struct btrfs_qgroup_inherit *qgroup_inherit; + }; + __u64 unused[4]; + }; + union { + char name[BTRFS_SUBVOL_NAME_MAX + 1]; + __u64 devid; + __u64 subvolid; + }; +}; + +/* + * structure to report errors and progress to userspace, either as a + * result of a finished scrub, a canceled scrub or a progress inquiry + */ +struct btrfs_scrub_progress { + __u64 data_extents_scrubbed; /* # of data extents scrubbed */ + __u64 tree_extents_scrubbed; /* # of tree extents scrubbed */ + __u64 data_bytes_scrubbed; /* # of data bytes scrubbed */ + __u64 tree_bytes_scrubbed; /* # of tree bytes scrubbed */ + __u64 read_errors; /* # of read errors encountered (EIO) */ + __u64 csum_errors; /* # of failed csum checks */ + __u64 verify_errors; /* # of occurrences, where the metadata + * of a tree block did not match the + * expected values, like generation or + * logical */ + __u64 no_csum; /* # of 4k data block for which no csum + * is present, probably the result of + * data written with nodatasum */ + __u64 csum_discards; /* # of csum for which no data was found + * in the extent tree. */ + __u64 super_errors; /* # of bad super blocks encountered */ + __u64 malloc_errors; /* # of internal kmalloc errors. These + * will likely cause an incomplete + * scrub */ + __u64 uncorrectable_errors; /* # of errors where either no intact + * copy was found or the writeback + * failed */ + __u64 corrected_errors; /* # of errors corrected */ + __u64 last_physical; /* last physical address scrubbed. In + * case a scrub was aborted, this can + * be used to restart the scrub */ + __u64 unverified_errors; /* # of occurrences where a read for a + * full (64k) bio failed, but the re- + * check succeeded for each 4k piece. + * Intermittent error. */ +}; + +#define BTRFS_SCRUB_READONLY 1 +struct btrfs_ioctl_scrub_args { + __u64 devid; /* in */ + __u64 start; /* in */ + __u64 end; /* in */ + __u64 flags; /* in */ + struct btrfs_scrub_progress progress; /* out */ + /* pad to 1k */ + __u64 unused[(1024-32-sizeof(struct btrfs_scrub_progress))/8]; +}; + +#define BTRFS_IOCTL_DEV_REPLACE_CONT_READING_FROM_SRCDEV_MODE_ALWAYS 0 +#define BTRFS_IOCTL_DEV_REPLACE_CONT_READING_FROM_SRCDEV_MODE_AVOID 1 +struct btrfs_ioctl_dev_replace_start_params { + __u64 srcdevid; /* in, if 0, use srcdev_name instead */ + __u64 cont_reading_from_srcdev_mode; /* in, see #define + * above */ + __u8 srcdev_name[BTRFS_DEVICE_PATH_NAME_MAX + 1]; /* in */ + __u8 tgtdev_name[BTRFS_DEVICE_PATH_NAME_MAX + 1]; /* in */ +}; + +#define BTRFS_IOCTL_DEV_REPLACE_STATE_NEVER_STARTED 0 +#define BTRFS_IOCTL_DEV_REPLACE_STATE_STARTED 1 +#define BTRFS_IOCTL_DEV_REPLACE_STATE_FINISHED 2 +#define BTRFS_IOCTL_DEV_REPLACE_STATE_CANCELED 3 +#define BTRFS_IOCTL_DEV_REPLACE_STATE_SUSPENDED 4 +struct btrfs_ioctl_dev_replace_status_params { + __u64 replace_state; /* out, see #define above */ + __u64 progress_1000; /* out, 0 <= x <= 1000 */ + __u64 time_started; /* out, seconds since 1-Jan-1970 */ + __u64 time_stopped; /* out, seconds since 1-Jan-1970 */ + __u64 num_write_errors; /* out */ + __u64 num_uncorrectable_read_errors; /* out */ +}; + +#define BTRFS_IOCTL_DEV_REPLACE_CMD_START 0 +#define BTRFS_IOCTL_DEV_REPLACE_CMD_STATUS 1 +#define BTRFS_IOCTL_DEV_REPLACE_CMD_CANCEL 2 +#define BTRFS_IOCTL_DEV_REPLACE_RESULT_NO_ERROR 0 +#define BTRFS_IOCTL_DEV_REPLACE_RESULT_NOT_STARTED 1 +#define BTRFS_IOCTL_DEV_REPLACE_RESULT_ALREADY_STARTED 2 +#define BTRFS_IOCTL_DEV_REPLACE_RESULT_SCRUB_INPROGRESS 3 +struct btrfs_ioctl_dev_replace_args { + __u64 cmd; /* in */ + __u64 result; /* out */ + + union { + struct btrfs_ioctl_dev_replace_start_params start; + struct btrfs_ioctl_dev_replace_status_params status; + }; /* in/out */ + + __u64 spare[64]; +}; + +struct btrfs_ioctl_dev_info_args { + __u64 devid; /* in/out */ + __u8 uuid[BTRFS_UUID_SIZE]; /* in/out */ + __u64 bytes_used; /* out */ + __u64 total_bytes; /* out */ + __u64 unused[379]; /* pad to 4k */ + __u8 path[BTRFS_DEVICE_PATH_NAME_MAX]; /* out */ +}; + +/* + * Retrieve information about the filesystem + */ + +/* Request information about checksum type and size */ +#define BTRFS_FS_INFO_FLAG_CSUM_INFO (1 << 0) + +/* Request information about filesystem generation */ +#define BTRFS_FS_INFO_FLAG_GENERATION (1 << 1) +/* Request information about filesystem metadata UUID */ +#define BTRFS_FS_INFO_FLAG_METADATA_UUID (1 << 2) + +struct btrfs_ioctl_fs_info_args { + __u64 max_id; /* out */ + __u64 num_devices; /* out */ + __u8 fsid[BTRFS_FSID_SIZE]; /* out */ + __u32 nodesize; /* out */ + __u32 sectorsize; /* out */ + __u32 clone_alignment; /* out */ + /* See BTRFS_FS_INFO_FLAG_* */ + __u16 csum_type; /* out */ + __u16 csum_size; /* out */ + __u64 flags; /* in/out */ + __u64 generation; /* out */ + __u8 metadata_uuid[BTRFS_FSID_SIZE]; /* out */ + __u8 reserved[944]; /* pad to 1k */ +}; + +/* + * feature flags + * + * Used by: + * struct btrfs_ioctl_feature_flags + */ +#define BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE (1ULL << 0) +/* + * Older kernels (< 4.9) on big-endian systems produced broken free space tree + * bitmaps, and btrfs-progs also used to corrupt the free space tree (versions + * < 4.7.3). If this bit is clear, then the free space tree cannot be trusted. + * btrfs-progs can also intentionally clear this bit to ask the kernel to + * rebuild the free space tree, however this might not work on older kernels + * that do not know about this bit. If not sure, clear the cache manually on + * first mount when booting older kernel versions. + */ +#define BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE_VALID (1ULL << 1) +#define BTRFS_FEATURE_COMPAT_RO_VERITY (1ULL << 2) + +/* + * Put all block group items into a dedicated block group tree, greatly + * reducing mount time for large filesystem due to better locality. + */ +#define BTRFS_FEATURE_COMPAT_RO_BLOCK_GROUP_TREE (1ULL << 3) + +#define BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF (1ULL << 0) +#define BTRFS_FEATURE_INCOMPAT_DEFAULT_SUBVOL (1ULL << 1) +#define BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS (1ULL << 2) +#define BTRFS_FEATURE_INCOMPAT_COMPRESS_LZO (1ULL << 3) +#define BTRFS_FEATURE_INCOMPAT_COMPRESS_ZSTD (1ULL << 4) + +/* + * older kernels tried to do bigger metadata blocks, but the + * code was pretty buggy. Lets not let them try anymore. + */ +#define BTRFS_FEATURE_INCOMPAT_BIG_METADATA (1ULL << 5) + +#define BTRFS_FEATURE_INCOMPAT_EXTENDED_IREF (1ULL << 6) +#define BTRFS_FEATURE_INCOMPAT_RAID56 (1ULL << 7) +#define BTRFS_FEATURE_INCOMPAT_SKINNY_METADATA (1ULL << 8) +#define BTRFS_FEATURE_INCOMPAT_NO_HOLES (1ULL << 9) +#define BTRFS_FEATURE_INCOMPAT_METADATA_UUID (1ULL << 10) +#define BTRFS_FEATURE_INCOMPAT_RAID1C34 (1ULL << 11) +#define BTRFS_FEATURE_INCOMPAT_ZONED (1ULL << 12) +#define BTRFS_FEATURE_INCOMPAT_EXTENT_TREE_V2 (1ULL << 13) + +struct btrfs_ioctl_feature_flags { + __u64 compat_flags; + __u64 compat_ro_flags; + __u64 incompat_flags; +}; + +/* balance control ioctl modes */ +#define BTRFS_BALANCE_CTL_PAUSE 1 +#define BTRFS_BALANCE_CTL_CANCEL 2 + +/* + * this is packed, because it should be exactly the same as its disk + * byte order counterpart (struct btrfs_disk_balance_args) + */ +struct btrfs_balance_args { + __u64 profiles; + + /* + * usage filter + * BTRFS_BALANCE_ARGS_USAGE with a single value means '0..N' + * BTRFS_BALANCE_ARGS_USAGE_RANGE - range syntax, min..max + */ + union { + __u64 usage; + struct { + __u32 usage_min; + __u32 usage_max; + }; + }; + __u64 devid; + __u64 pstart; + __u64 pend; + __u64 vstart; + __u64 vend; + + __u64 target; + + __u64 flags; + + /* + * BTRFS_BALANCE_ARGS_LIMIT with value 'limit' + * BTRFS_BALANCE_ARGS_LIMIT_RANGE - the extend version can use minimum + * and maximum + */ + union { + __u64 limit; /* limit number of processed chunks */ + struct { + __u32 limit_min; + __u32 limit_max; + }; + }; + + /* + * Process chunks that cross stripes_min..stripes_max devices, + * BTRFS_BALANCE_ARGS_STRIPES_RANGE + */ + __u32 stripes_min; + __u32 stripes_max; + + __u64 unused[6]; +} __attribute__ ((__packed__)); + +/* report balance progress to userspace */ +struct btrfs_balance_progress { + __u64 expected; /* estimated # of chunks that will be + * relocated to fulfill the request */ + __u64 considered; /* # of chunks we have considered so far */ + __u64 completed; /* # of chunks relocated so far */ +}; + +/* + * flags definition for balance + * + * Restriper's general type filter + * + * Used by: + * btrfs_ioctl_balance_args.flags + * btrfs_balance_control.flags (internal) + */ +#define BTRFS_BALANCE_DATA (1ULL << 0) +#define BTRFS_BALANCE_SYSTEM (1ULL << 1) +#define BTRFS_BALANCE_METADATA (1ULL << 2) + +#define BTRFS_BALANCE_TYPE_MASK (BTRFS_BALANCE_DATA | \ + BTRFS_BALANCE_SYSTEM | \ + BTRFS_BALANCE_METADATA) + +#define BTRFS_BALANCE_FORCE (1ULL << 3) +#define BTRFS_BALANCE_RESUME (1ULL << 4) + +/* + * flags definitions for per-type balance args + * + * Balance filters + * + * Used by: + * struct btrfs_balance_args + */ +#define BTRFS_BALANCE_ARGS_PROFILES (1ULL << 0) +#define BTRFS_BALANCE_ARGS_USAGE (1ULL << 1) +#define BTRFS_BALANCE_ARGS_DEVID (1ULL << 2) +#define BTRFS_BALANCE_ARGS_DRANGE (1ULL << 3) +#define BTRFS_BALANCE_ARGS_VRANGE (1ULL << 4) +#define BTRFS_BALANCE_ARGS_LIMIT (1ULL << 5) +#define BTRFS_BALANCE_ARGS_LIMIT_RANGE (1ULL << 6) +#define BTRFS_BALANCE_ARGS_STRIPES_RANGE (1ULL << 7) +#define BTRFS_BALANCE_ARGS_USAGE_RANGE (1ULL << 10) + +#define BTRFS_BALANCE_ARGS_MASK \ + (BTRFS_BALANCE_ARGS_PROFILES | \ + BTRFS_BALANCE_ARGS_USAGE | \ + BTRFS_BALANCE_ARGS_DEVID | \ + BTRFS_BALANCE_ARGS_DRANGE | \ + BTRFS_BALANCE_ARGS_VRANGE | \ + BTRFS_BALANCE_ARGS_LIMIT | \ + BTRFS_BALANCE_ARGS_LIMIT_RANGE | \ + BTRFS_BALANCE_ARGS_STRIPES_RANGE | \ + BTRFS_BALANCE_ARGS_USAGE_RANGE) + +/* + * Profile changing flags. When SOFT is set we won't relocate chunk if + * it already has the target profile (even though it may be + * half-filled). + */ +#define BTRFS_BALANCE_ARGS_CONVERT (1ULL << 8) +#define BTRFS_BALANCE_ARGS_SOFT (1ULL << 9) + + +/* + * flags definition for balance state + * + * Used by: + * struct btrfs_ioctl_balance_args.state + */ +#define BTRFS_BALANCE_STATE_RUNNING (1ULL << 0) +#define BTRFS_BALANCE_STATE_PAUSE_REQ (1ULL << 1) +#define BTRFS_BALANCE_STATE_CANCEL_REQ (1ULL << 2) + +struct btrfs_ioctl_balance_args { + __u64 flags; /* in/out */ + __u64 state; /* out */ + + struct btrfs_balance_args data; /* in/out */ + struct btrfs_balance_args meta; /* in/out */ + struct btrfs_balance_args sys; /* in/out */ + + struct btrfs_balance_progress stat; /* out */ + + __u64 unused[72]; /* pad to 1k */ +}; + +#define BTRFS_INO_LOOKUP_PATH_MAX 4080 +struct btrfs_ioctl_ino_lookup_args { + __u64 treeid; + __u64 objectid; + char name[BTRFS_INO_LOOKUP_PATH_MAX]; +}; + +#define BTRFS_INO_LOOKUP_USER_PATH_MAX (4080 - BTRFS_VOL_NAME_MAX - 1) +struct btrfs_ioctl_ino_lookup_user_args { + /* in, inode number containing the subvolume of 'subvolid' */ + __u64 dirid; + /* in */ + __u64 treeid; + /* out, name of the subvolume of 'treeid' */ + char name[BTRFS_VOL_NAME_MAX + 1]; + /* + * out, constructed path from the directory with which the ioctl is + * called to dirid + */ + char path[BTRFS_INO_LOOKUP_USER_PATH_MAX]; +}; + +/* Search criteria for the btrfs SEARCH ioctl family. */ +struct btrfs_ioctl_search_key { + /* + * The tree we're searching in. 1 is the tree of tree roots, 2 is the + * extent tree, etc... + * + * A special tree_id value of 0 will cause a search in the subvolume + * tree that the inode which is passed to the ioctl is part of. + */ + __u64 tree_id; /* in */ + + /* + * When doing a tree search, we're actually taking a slice from a + * linear search space of 136-bit keys. + * + * A full 136-bit tree key is composed as: + * (objectid << 72) + (type << 64) + offset + * + * The individual min and max values for objectid, type and offset + * define the min_key and max_key values for the search range. All + * metadata items with a key in the interval [min_key, max_key] will be + * returned. + * + * Additionally, we can filter the items returned on transaction id of + * the metadata block they're stored in by specifying a transid range. + * Be aware that this transaction id only denotes when the metadata + * page that currently contains the item got written the last time as + * result of a COW operation. The number does not have any meaning + * related to the transaction in which an individual item that is being + * returned was created or changed. + */ + __u64 min_objectid; /* in */ + __u64 max_objectid; /* in */ + __u64 min_offset; /* in */ + __u64 max_offset; /* in */ + __u64 min_transid; /* in */ + __u64 max_transid; /* in */ + __u32 min_type; /* in */ + __u32 max_type; /* in */ + + /* + * input: The maximum amount of results desired. + * output: The actual amount of items returned, restricted by any of: + * - reaching the upper bound of the search range + * - reaching the input nr_items amount of items + * - completely filling the supplied memory buffer + */ + __u32 nr_items; /* in/out */ + + /* align to 64 bits */ + __u32 unused; + + /* some extra for later */ + __u64 unused1; + __u64 unused2; + __u64 unused3; + __u64 unused4; +}; + +struct btrfs_ioctl_search_header { + __u64 transid; + __u64 objectid; + __u64 offset; + __u32 type; + __u32 len; +} __attribute__ ((__may_alias__)); + +#define BTRFS_SEARCH_ARGS_BUFSIZE (4096 - sizeof(struct btrfs_ioctl_search_key)) +/* + * the buf is an array of search headers where + * each header is followed by the actual item + * the type field is expanded to 32 bits for alignment + */ +struct btrfs_ioctl_search_args { + struct btrfs_ioctl_search_key key; + char buf[BTRFS_SEARCH_ARGS_BUFSIZE]; +}; + +/* + * Extended version of TREE_SEARCH ioctl that can return more than 4k of bytes. + * The allocated size of the buffer is set in buf_size. + */ +struct btrfs_ioctl_search_args_v2 { + struct btrfs_ioctl_search_key key; /* in/out - search parameters */ + __u64 buf_size; /* in - size of buffer + * out - on EOVERFLOW: needed size + * to store item */ + __u64 buf[]; /* out - found items */ +}; + +/* With a @src_length of zero, the range from @src_offset->EOF is cloned! */ +struct btrfs_ioctl_clone_range_args { + __s64 src_fd; + __u64 src_offset, src_length; + __u64 dest_offset; +}; + +/* + * flags definition for the defrag range ioctl + * + * Used by: + * struct btrfs_ioctl_defrag_range_args.flags + */ +#define BTRFS_DEFRAG_RANGE_COMPRESS 1 +#define BTRFS_DEFRAG_RANGE_START_IO 2 +struct btrfs_ioctl_defrag_range_args { + /* start of the defrag operation */ + __u64 start; + + /* number of bytes to defrag, use (u64)-1 to say all */ + __u64 len; + + /* + * flags for the operation, which can include turning + * on compression for this one defrag + */ + __u64 flags; + + /* + * any extent bigger than this will be considered + * already defragged. Use 0 to take the kernel default + * Use 1 to say every single extent must be rewritten + */ + __u32 extent_thresh; + + /* + * which compression method to use if turning on compression + * for this defrag operation. If unspecified, zlib will + * be used + */ + __u32 compress_type; + + /* spare for later */ + __u32 unused[4]; +}; + + +#define BTRFS_SAME_DATA_DIFFERS 1 +/* For extent-same ioctl */ +struct btrfs_ioctl_same_extent_info { + __s64 fd; /* in - destination file */ + __u64 logical_offset; /* in - start of extent in destination */ + __u64 bytes_deduped; /* out - total # of bytes we were able + * to dedupe from this file */ + /* status of this dedupe operation: + * 0 if dedup succeeds + * < 0 for error + * == BTRFS_SAME_DATA_DIFFERS if data differs + */ + __s32 status; /* out - see above description */ + __u32 reserved; +}; + +struct btrfs_ioctl_same_args { + __u64 logical_offset; /* in - start of extent in source */ + __u64 length; /* in - length of extent */ + __u16 dest_count; /* in - total elements in info array */ + __u16 reserved1; + __u32 reserved2; + struct btrfs_ioctl_same_extent_info info[]; +}; + +struct btrfs_ioctl_space_info { + __u64 flags; + __u64 total_bytes; + __u64 used_bytes; +}; + +struct btrfs_ioctl_space_args { + __u64 space_slots; + __u64 total_spaces; + struct btrfs_ioctl_space_info spaces[]; +}; + +struct btrfs_data_container { + __u32 bytes_left; /* out -- bytes not needed to deliver output */ + __u32 bytes_missing; /* out -- additional bytes needed for result */ + __u32 elem_cnt; /* out */ + __u32 elem_missed; /* out */ + __u64 val[]; /* out */ +}; + +struct btrfs_ioctl_ino_path_args { + __u64 inum; /* in */ + __u64 size; /* in */ + __u64 reserved[4]; + /* struct btrfs_data_container *fspath; out */ + __u64 fspath; /* out */ +}; + +struct btrfs_ioctl_logical_ino_args { + __u64 logical; /* in */ + __u64 size; /* in */ + __u64 reserved[3]; /* must be 0 for now */ + __u64 flags; /* in, v2 only */ + /* struct btrfs_data_container *inodes; out */ + __u64 inodes; +}; + +/* + * Return every ref to the extent, not just those containing logical block. + * Requires logical == extent bytenr. + */ +#define BTRFS_LOGICAL_INO_ARGS_IGNORE_OFFSET (1ULL << 0) + +enum btrfs_dev_stat_values { + /* disk I/O failure stats */ + BTRFS_DEV_STAT_WRITE_ERRS, /* EIO or EREMOTEIO from lower layers */ + BTRFS_DEV_STAT_READ_ERRS, /* EIO or EREMOTEIO from lower layers */ + BTRFS_DEV_STAT_FLUSH_ERRS, /* EIO or EREMOTEIO from lower layers */ + + /* stats for indirect indications for I/O failures */ + BTRFS_DEV_STAT_CORRUPTION_ERRS, /* checksum error, bytenr error or + * contents is illegal: this is an + * indication that the block was damaged + * during read or write, or written to + * wrong location or read from wrong + * location */ + BTRFS_DEV_STAT_GENERATION_ERRS, /* an indication that blocks have not + * been written */ + + BTRFS_DEV_STAT_VALUES_MAX +}; + +/* Reset statistics after reading; needs SYS_ADMIN capability */ +#define BTRFS_DEV_STATS_RESET (1ULL << 0) + +struct btrfs_ioctl_get_dev_stats { + __u64 devid; /* in */ + __u64 nr_items; /* in/out */ + __u64 flags; /* in/out */ + + /* out values: */ + __u64 values[BTRFS_DEV_STAT_VALUES_MAX]; + + /* + * This pads the struct to 1032 bytes. It was originally meant to pad to + * 1024 bytes, but when adding the flags field, the padding calculation + * was not adjusted. + */ + __u64 unused[128 - 2 - BTRFS_DEV_STAT_VALUES_MAX]; +}; + +#define BTRFS_QUOTA_CTL_ENABLE 1 +#define BTRFS_QUOTA_CTL_DISABLE 2 +#define BTRFS_QUOTA_CTL_RESCAN__NOTUSED 3 +struct btrfs_ioctl_quota_ctl_args { + __u64 cmd; + __u64 status; +}; + +struct btrfs_ioctl_quota_rescan_args { + __u64 flags; + __u64 progress; + __u64 reserved[6]; +}; + +struct btrfs_ioctl_qgroup_assign_args { + __u64 assign; + __u64 src; + __u64 dst; +}; + +struct btrfs_ioctl_qgroup_create_args { + __u64 create; + __u64 qgroupid; +}; +struct btrfs_ioctl_timespec { + __u64 sec; + __u32 nsec; +}; + +struct btrfs_ioctl_received_subvol_args { + char uuid[BTRFS_UUID_SIZE]; /* in */ + __u64 stransid; /* in */ + __u64 rtransid; /* out */ + struct btrfs_ioctl_timespec stime; /* in */ + struct btrfs_ioctl_timespec rtime; /* out */ + __u64 flags; /* in */ + __u64 reserved[16]; /* in */ +}; + +/* + * Caller doesn't want file data in the send stream, even if the + * search of clone sources doesn't find an extent. UPDATE_EXTENT + * commands will be sent instead of WRITE commands. + */ +#define BTRFS_SEND_FLAG_NO_FILE_DATA 0x1 + +/* + * Do not add the leading stream header. Used when multiple snapshots + * are sent back to back. + */ +#define BTRFS_SEND_FLAG_OMIT_STREAM_HEADER 0x2 + +/* + * Omit the command at the end of the stream that indicated the end + * of the stream. This option is used when multiple snapshots are + * sent back to back. + */ +#define BTRFS_SEND_FLAG_OMIT_END_CMD 0x4 + +/* + * Read the protocol version in the structure + */ +#define BTRFS_SEND_FLAG_VERSION 0x8 + +/* + * Send compressed data using the ENCODED_WRITE command instead of decompressing + * the data and sending it with the WRITE command. This requires protocol + * version >= 2. + */ +#define BTRFS_SEND_FLAG_COMPRESSED 0x10 + +#define BTRFS_SEND_FLAG_MASK \ + (BTRFS_SEND_FLAG_NO_FILE_DATA | \ + BTRFS_SEND_FLAG_OMIT_STREAM_HEADER | \ + BTRFS_SEND_FLAG_OMIT_END_CMD | \ + BTRFS_SEND_FLAG_VERSION | \ + BTRFS_SEND_FLAG_COMPRESSED) + +struct btrfs_ioctl_send_args { + __s64 send_fd; /* in */ + __u64 clone_sources_count; /* in */ + __u64 *clone_sources; /* in */ + __u64 parent_root; /* in */ + __u64 flags; /* in */ + __u32 version; /* in */ + __u8 reserved[28]; /* in */ +}; + +/* + * Information about a fs tree root. + * + * All items are filled by the ioctl + */ +struct btrfs_ioctl_get_subvol_info_args { + /* Id of this subvolume */ + __u64 treeid; + + /* Name of this subvolume, used to get the real name at mount point */ + char name[BTRFS_VOL_NAME_MAX + 1]; + + /* + * Id of the subvolume which contains this subvolume. + * Zero for top-level subvolume or a deleted subvolume. + */ + __u64 parent_id; + + /* + * Inode number of the directory which contains this subvolume. + * Zero for top-level subvolume or a deleted subvolume + */ + __u64 dirid; + + /* Latest transaction id of this subvolume */ + __u64 generation; + + /* Flags of this subvolume */ + __u64 flags; + + /* UUID of this subvolume */ + __u8 uuid[BTRFS_UUID_SIZE]; + + /* + * UUID of the subvolume of which this subvolume is a snapshot. + * All zero for a non-snapshot subvolume. + */ + __u8 parent_uuid[BTRFS_UUID_SIZE]; + + /* + * UUID of the subvolume from which this subvolume was received. + * All zero for non-received subvolume. + */ + __u8 received_uuid[BTRFS_UUID_SIZE]; + + /* Transaction id indicating when change/create/send/receive happened */ + __u64 ctransid; + __u64 otransid; + __u64 stransid; + __u64 rtransid; + /* Time corresponding to c/o/s/rtransid */ + struct btrfs_ioctl_timespec ctime; + struct btrfs_ioctl_timespec otime; + struct btrfs_ioctl_timespec stime; + struct btrfs_ioctl_timespec rtime; + + /* Must be zero */ + __u64 reserved[8]; +}; + +#define BTRFS_MAX_ROOTREF_BUFFER_NUM 255 +struct btrfs_ioctl_get_subvol_rootref_args { + /* in/out, minimum id of rootref's treeid to be searched */ + __u64 min_treeid; + + /* out */ + struct { + __u64 treeid; + __u64 dirid; + } rootref[BTRFS_MAX_ROOTREF_BUFFER_NUM]; + + /* out, number of found items */ + __u8 num_items; + __u8 align[7]; +}; + +/* + * Data and metadata for an encoded read or write. + * + * Encoded I/O bypasses any encoding automatically done by the filesystem (e.g., + * compression). This can be used to read the compressed contents of a file or + * write pre-compressed data directly to a file. + * + * BTRFS_IOC_ENCODED_READ and BTRFS_IOC_ENCODED_WRITE are essentially + * preadv/pwritev with additional metadata about how the data is encoded and the + * size of the unencoded data. + * + * BTRFS_IOC_ENCODED_READ fills the given iovecs with the encoded data, fills + * the metadata fields, and returns the size of the encoded data. It reads one + * extent per call. It can also read data which is not encoded. + * + * BTRFS_IOC_ENCODED_WRITE uses the metadata fields, writes the encoded data + * from the iovecs, and returns the size of the encoded data. Note that the + * encoded data is not validated when it is written; if it is not valid (e.g., + * it cannot be decompressed), then a subsequent read may return an error. + * + * Since the filesystem page cache contains decoded data, encoded I/O bypasses + * the page cache. Encoded I/O requires CAP_SYS_ADMIN. + */ +struct btrfs_ioctl_encoded_io_args { + /* Input parameters for both reads and writes. */ + + /* + * iovecs containing encoded data. + * + * For reads, if the size of the encoded data is larger than the sum of + * iov[n].iov_len for 0 <= n < iovcnt, then the ioctl fails with + * ENOBUFS. + * + * For writes, the size of the encoded data is the sum of iov[n].iov_len + * for 0 <= n < iovcnt. This must be less than 128 KiB (this limit may + * increase in the future). This must also be less than or equal to + * unencoded_len. + */ + const struct iovec *iov; + /* Number of iovecs. */ + unsigned long iovcnt; + /* + * Offset in file. + * + * For writes, must be aligned to the sector size of the filesystem. + */ + __s64 offset; + /* Currently must be zero. */ + __u64 flags; + + /* + * For reads, the following members are output parameters that will + * contain the returned metadata for the encoded data. + * For writes, the following members must be set to the metadata for the + * encoded data. + */ + + /* + * Length of the data in the file. + * + * Must be less than or equal to unencoded_len - unencoded_offset. For + * writes, must be aligned to the sector size of the filesystem unless + * the data ends at or beyond the current end of the file. + */ + __u64 len; + /* + * Length of the unencoded (i.e., decrypted and decompressed) data. + * + * For writes, must be no more than 128 KiB (this limit may increase in + * the future). If the unencoded data is actually longer than + * unencoded_len, then it is truncated; if it is shorter, then it is + * extended with zeroes. + */ + __u64 unencoded_len; + /* + * Offset from the first byte of the unencoded data to the first byte of + * logical data in the file. + * + * Must be less than unencoded_len. + */ + __u64 unencoded_offset; + /* + * BTRFS_ENCODED_IO_COMPRESSION_* type. + * + * For writes, must not be BTRFS_ENCODED_IO_COMPRESSION_NONE. + */ + __u32 compression; + /* Currently always BTRFS_ENCODED_IO_ENCRYPTION_NONE. */ + __u32 encryption; + /* + * Reserved for future expansion. + * + * For reads, always returned as zero. Users should check for non-zero + * bytes. If there are any, then the kernel has a newer version of this + * structure with additional information that the user definition is + * missing. + * + * For writes, must be zeroed. + */ + __u8 reserved[64]; +}; + +/* Data is not compressed. */ +#define BTRFS_ENCODED_IO_COMPRESSION_NONE 0 +/* Data is compressed as a single zlib stream. */ +#define BTRFS_ENCODED_IO_COMPRESSION_ZLIB 1 +/* + * Data is compressed as a single zstd frame with the windowLog compression + * parameter set to no more than 17. + */ +#define BTRFS_ENCODED_IO_COMPRESSION_ZSTD 2 +/* + * Data is compressed sector by sector (using the sector size indicated by the + * name of the constant) with LZO1X and wrapped in the format documented in + * fs/btrfs/lzo.c. For writes, the compression sector size must match the + * filesystem sector size. + */ +#define BTRFS_ENCODED_IO_COMPRESSION_LZO_4K 3 +#define BTRFS_ENCODED_IO_COMPRESSION_LZO_8K 4 +#define BTRFS_ENCODED_IO_COMPRESSION_LZO_16K 5 +#define BTRFS_ENCODED_IO_COMPRESSION_LZO_32K 6 +#define BTRFS_ENCODED_IO_COMPRESSION_LZO_64K 7 +#define BTRFS_ENCODED_IO_COMPRESSION_TYPES 8 + +/* Data is not encrypted. */ +#define BTRFS_ENCODED_IO_ENCRYPTION_NONE 0 +#define BTRFS_ENCODED_IO_ENCRYPTION_TYPES 1 + +/* Error codes as returned by the kernel */ +enum btrfs_err_code { + BTRFS_ERROR_DEV_RAID1_MIN_NOT_MET = 1, + BTRFS_ERROR_DEV_RAID10_MIN_NOT_MET, + BTRFS_ERROR_DEV_RAID5_MIN_NOT_MET, + BTRFS_ERROR_DEV_RAID6_MIN_NOT_MET, + BTRFS_ERROR_DEV_TGT_REPLACE, + BTRFS_ERROR_DEV_MISSING_NOT_FOUND, + BTRFS_ERROR_DEV_ONLY_WRITABLE, + BTRFS_ERROR_DEV_EXCL_RUN_IN_PROGRESS, + BTRFS_ERROR_DEV_RAID1C3_MIN_NOT_MET, + BTRFS_ERROR_DEV_RAID1C4_MIN_NOT_MET, +}; + +#define BTRFS_IOC_SNAP_CREATE _IOW(BTRFS_IOCTL_MAGIC, 1, \ + struct btrfs_ioctl_vol_args) +#define BTRFS_IOC_DEFRAG _IOW(BTRFS_IOCTL_MAGIC, 2, \ + struct btrfs_ioctl_vol_args) +#define BTRFS_IOC_RESIZE _IOW(BTRFS_IOCTL_MAGIC, 3, \ + struct btrfs_ioctl_vol_args) +#define BTRFS_IOC_SCAN_DEV _IOW(BTRFS_IOCTL_MAGIC, 4, \ + struct btrfs_ioctl_vol_args) +#define BTRFS_IOC_FORGET_DEV _IOW(BTRFS_IOCTL_MAGIC, 5, \ + struct btrfs_ioctl_vol_args) +/* trans start and trans end are dangerous, and only for + * use by applications that know how to avoid the + * resulting deadlocks + */ +#define BTRFS_IOC_TRANS_START _IO(BTRFS_IOCTL_MAGIC, 6) +#define BTRFS_IOC_TRANS_END _IO(BTRFS_IOCTL_MAGIC, 7) +#define BTRFS_IOC_SYNC _IO(BTRFS_IOCTL_MAGIC, 8) + +#define BTRFS_IOC_CLONE _IOW(BTRFS_IOCTL_MAGIC, 9, int) +#define BTRFS_IOC_ADD_DEV _IOW(BTRFS_IOCTL_MAGIC, 10, \ + struct btrfs_ioctl_vol_args) +#define BTRFS_IOC_RM_DEV _IOW(BTRFS_IOCTL_MAGIC, 11, \ + struct btrfs_ioctl_vol_args) +#define BTRFS_IOC_BALANCE _IOW(BTRFS_IOCTL_MAGIC, 12, \ + struct btrfs_ioctl_vol_args) + +#define BTRFS_IOC_CLONE_RANGE _IOW(BTRFS_IOCTL_MAGIC, 13, \ + struct btrfs_ioctl_clone_range_args) + +#define BTRFS_IOC_SUBVOL_CREATE _IOW(BTRFS_IOCTL_MAGIC, 14, \ + struct btrfs_ioctl_vol_args) +#define BTRFS_IOC_SNAP_DESTROY _IOW(BTRFS_IOCTL_MAGIC, 15, \ + struct btrfs_ioctl_vol_args) +#define BTRFS_IOC_DEFRAG_RANGE _IOW(BTRFS_IOCTL_MAGIC, 16, \ + struct btrfs_ioctl_defrag_range_args) +#define BTRFS_IOC_TREE_SEARCH _IOWR(BTRFS_IOCTL_MAGIC, 17, \ + struct btrfs_ioctl_search_args) +#define BTRFS_IOC_TREE_SEARCH_V2 _IOWR(BTRFS_IOCTL_MAGIC, 17, \ + struct btrfs_ioctl_search_args_v2) +#define BTRFS_IOC_INO_LOOKUP _IOWR(BTRFS_IOCTL_MAGIC, 18, \ + struct btrfs_ioctl_ino_lookup_args) +#define BTRFS_IOC_DEFAULT_SUBVOL _IOW(BTRFS_IOCTL_MAGIC, 19, __u64) +#define BTRFS_IOC_SPACE_INFO _IOWR(BTRFS_IOCTL_MAGIC, 20, \ + struct btrfs_ioctl_space_args) +#define BTRFS_IOC_START_SYNC _IOR(BTRFS_IOCTL_MAGIC, 24, __u64) +#define BTRFS_IOC_WAIT_SYNC _IOW(BTRFS_IOCTL_MAGIC, 22, __u64) +#define BTRFS_IOC_SNAP_CREATE_V2 _IOW(BTRFS_IOCTL_MAGIC, 23, \ + struct btrfs_ioctl_vol_args_v2) +#define BTRFS_IOC_SUBVOL_CREATE_V2 _IOW(BTRFS_IOCTL_MAGIC, 24, \ + struct btrfs_ioctl_vol_args_v2) +#define BTRFS_IOC_SUBVOL_GETFLAGS _IOR(BTRFS_IOCTL_MAGIC, 25, __u64) +#define BTRFS_IOC_SUBVOL_SETFLAGS _IOW(BTRFS_IOCTL_MAGIC, 26, __u64) +#define BTRFS_IOC_SCRUB _IOWR(BTRFS_IOCTL_MAGIC, 27, \ + struct btrfs_ioctl_scrub_args) +#define BTRFS_IOC_SCRUB_CANCEL _IO(BTRFS_IOCTL_MAGIC, 28) +#define BTRFS_IOC_SCRUB_PROGRESS _IOWR(BTRFS_IOCTL_MAGIC, 29, \ + struct btrfs_ioctl_scrub_args) +#define BTRFS_IOC_DEV_INFO _IOWR(BTRFS_IOCTL_MAGIC, 30, \ + struct btrfs_ioctl_dev_info_args) +#define BTRFS_IOC_FS_INFO _IOR(BTRFS_IOCTL_MAGIC, 31, \ + struct btrfs_ioctl_fs_info_args) +#define BTRFS_IOC_BALANCE_V2 _IOWR(BTRFS_IOCTL_MAGIC, 32, \ + struct btrfs_ioctl_balance_args) +#define BTRFS_IOC_BALANCE_CTL _IOW(BTRFS_IOCTL_MAGIC, 33, int) +#define BTRFS_IOC_BALANCE_PROGRESS _IOR(BTRFS_IOCTL_MAGIC, 34, \ + struct btrfs_ioctl_balance_args) +#define BTRFS_IOC_INO_PATHS _IOWR(BTRFS_IOCTL_MAGIC, 35, \ + struct btrfs_ioctl_ino_path_args) +#define BTRFS_IOC_LOGICAL_INO _IOWR(BTRFS_IOCTL_MAGIC, 36, \ + struct btrfs_ioctl_logical_ino_args) +#define BTRFS_IOC_SET_RECEIVED_SUBVOL _IOWR(BTRFS_IOCTL_MAGIC, 37, \ + struct btrfs_ioctl_received_subvol_args) +#define BTRFS_IOC_SEND _IOW(BTRFS_IOCTL_MAGIC, 38, struct btrfs_ioctl_send_args) +#define BTRFS_IOC_DEVICES_READY _IOR(BTRFS_IOCTL_MAGIC, 39, \ + struct btrfs_ioctl_vol_args) +#define BTRFS_IOC_QUOTA_CTL _IOWR(BTRFS_IOCTL_MAGIC, 40, \ + struct btrfs_ioctl_quota_ctl_args) +#define BTRFS_IOC_QGROUP_ASSIGN _IOW(BTRFS_IOCTL_MAGIC, 41, \ + struct btrfs_ioctl_qgroup_assign_args) +#define BTRFS_IOC_QGROUP_CREATE _IOW(BTRFS_IOCTL_MAGIC, 42, \ + struct btrfs_ioctl_qgroup_create_args) +#define BTRFS_IOC_QGROUP_LIMIT _IOR(BTRFS_IOCTL_MAGIC, 43, \ + struct btrfs_ioctl_qgroup_limit_args) +#define BTRFS_IOC_QUOTA_RESCAN _IOW(BTRFS_IOCTL_MAGIC, 44, \ + struct btrfs_ioctl_quota_rescan_args) +#define BTRFS_IOC_QUOTA_RESCAN_STATUS _IOR(BTRFS_IOCTL_MAGIC, 45, \ + struct btrfs_ioctl_quota_rescan_args) +#define BTRFS_IOC_QUOTA_RESCAN_WAIT _IO(BTRFS_IOCTL_MAGIC, 46) +#define BTRFS_IOC_GET_FSLABEL FS_IOC_GETFSLABEL +#define BTRFS_IOC_SET_FSLABEL FS_IOC_SETFSLABEL +#define BTRFS_IOC_GET_DEV_STATS _IOWR(BTRFS_IOCTL_MAGIC, 52, \ + struct btrfs_ioctl_get_dev_stats) +#define BTRFS_IOC_DEV_REPLACE _IOWR(BTRFS_IOCTL_MAGIC, 53, \ + struct btrfs_ioctl_dev_replace_args) +#define BTRFS_IOC_FILE_EXTENT_SAME _IOWR(BTRFS_IOCTL_MAGIC, 54, \ + struct btrfs_ioctl_same_args) +#define BTRFS_IOC_GET_FEATURES _IOR(BTRFS_IOCTL_MAGIC, 57, \ + struct btrfs_ioctl_feature_flags) +#define BTRFS_IOC_SET_FEATURES _IOW(BTRFS_IOCTL_MAGIC, 57, \ + struct btrfs_ioctl_feature_flags[2]) +#define BTRFS_IOC_GET_SUPPORTED_FEATURES _IOR(BTRFS_IOCTL_MAGIC, 57, \ + struct btrfs_ioctl_feature_flags[3]) +#define BTRFS_IOC_RM_DEV_V2 _IOW(BTRFS_IOCTL_MAGIC, 58, \ + struct btrfs_ioctl_vol_args_v2) +#define BTRFS_IOC_LOGICAL_INO_V2 _IOWR(BTRFS_IOCTL_MAGIC, 59, \ + struct btrfs_ioctl_logical_ino_args) +#define BTRFS_IOC_GET_SUBVOL_INFO _IOR(BTRFS_IOCTL_MAGIC, 60, \ + struct btrfs_ioctl_get_subvol_info_args) +#define BTRFS_IOC_GET_SUBVOL_ROOTREF _IOWR(BTRFS_IOCTL_MAGIC, 61, \ + struct btrfs_ioctl_get_subvol_rootref_args) +#define BTRFS_IOC_INO_LOOKUP_USER _IOWR(BTRFS_IOCTL_MAGIC, 62, \ + struct btrfs_ioctl_ino_lookup_user_args) +#define BTRFS_IOC_SNAP_DESTROY_V2 _IOW(BTRFS_IOCTL_MAGIC, 63, \ + struct btrfs_ioctl_vol_args_v2) +#define BTRFS_IOC_ENCODED_READ _IOR(BTRFS_IOCTL_MAGIC, 64, \ + struct btrfs_ioctl_encoded_io_args) +#define BTRFS_IOC_ENCODED_WRITE _IOW(BTRFS_IOCTL_MAGIC, 64, \ + struct btrfs_ioctl_encoded_io_args) + +#ifdef __cplusplus +} +#endif + +#endif /* _UAPI_LINUX_BTRFS_H */ diff --git a/src/basic/linux/btrfs_tree.h b/src/basic/linux/btrfs_tree.h new file mode 100644 index 0000000..ab38d0f --- /dev/null +++ b/src/basic/linux/btrfs_tree.h @@ -0,0 +1,1260 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +#ifndef _BTRFS_CTREE_H_ +#define _BTRFS_CTREE_H_ + +#include +#include +#ifdef __KERNEL__ +#include +#else +#include +#endif + +/* ASCII for _BHRfS_M, no terminating nul */ +#define BTRFS_MAGIC 0x4D5F53665248425FULL + +#define BTRFS_MAX_LEVEL 8 + +/* + * We can actually store much bigger names, but lets not confuse the rest of + * linux. + */ +#define BTRFS_NAME_LEN 255 + +/* + * Theoretical limit is larger, but we keep this down to a sane value. That + * should limit greatly the possibility of collisions on inode ref items. + */ +#define BTRFS_LINK_MAX 65535U + +/* + * This header contains the structure definitions and constants used + * by file system objects that can be retrieved using + * the BTRFS_IOC_SEARCH_TREE ioctl. That means basically anything that + * is needed to describe a leaf node's key or item contents. + */ + +/* holds pointers to all of the tree roots */ +#define BTRFS_ROOT_TREE_OBJECTID 1ULL + +/* stores information about which extents are in use, and reference counts */ +#define BTRFS_EXTENT_TREE_OBJECTID 2ULL + +/* + * chunk tree stores translations from logical -> physical block numbering + * the super block points to the chunk tree + */ +#define BTRFS_CHUNK_TREE_OBJECTID 3ULL + +/* + * stores information about which areas of a given device are in use. + * one per device. The tree of tree roots points to the device tree + */ +#define BTRFS_DEV_TREE_OBJECTID 4ULL + +/* one per subvolume, storing files and directories */ +#define BTRFS_FS_TREE_OBJECTID 5ULL + +/* directory objectid inside the root tree */ +#define BTRFS_ROOT_TREE_DIR_OBJECTID 6ULL + +/* holds checksums of all the data extents */ +#define BTRFS_CSUM_TREE_OBJECTID 7ULL + +/* holds quota configuration and tracking */ +#define BTRFS_QUOTA_TREE_OBJECTID 8ULL + +/* for storing items that use the BTRFS_UUID_KEY* types */ +#define BTRFS_UUID_TREE_OBJECTID 9ULL + +/* tracks free space in block groups. */ +#define BTRFS_FREE_SPACE_TREE_OBJECTID 10ULL + +/* Holds the block group items for extent tree v2. */ +#define BTRFS_BLOCK_GROUP_TREE_OBJECTID 11ULL + +/* device stats in the device tree */ +#define BTRFS_DEV_STATS_OBJECTID 0ULL + +/* for storing balance parameters in the root tree */ +#define BTRFS_BALANCE_OBJECTID -4ULL + +/* orphan objectid for tracking unlinked/truncated files */ +#define BTRFS_ORPHAN_OBJECTID -5ULL + +/* does write ahead logging to speed up fsyncs */ +#define BTRFS_TREE_LOG_OBJECTID -6ULL +#define BTRFS_TREE_LOG_FIXUP_OBJECTID -7ULL + +/* for space balancing */ +#define BTRFS_TREE_RELOC_OBJECTID -8ULL +#define BTRFS_DATA_RELOC_TREE_OBJECTID -9ULL + +/* + * extent checksums all have this objectid + * this allows them to share the logging tree + * for fsyncs + */ +#define BTRFS_EXTENT_CSUM_OBJECTID -10ULL + +/* For storing free space cache */ +#define BTRFS_FREE_SPACE_OBJECTID -11ULL + +/* + * The inode number assigned to the special inode for storing + * free ino cache + */ +#define BTRFS_FREE_INO_OBJECTID -12ULL + +/* dummy objectid represents multiple objectids */ +#define BTRFS_MULTIPLE_OBJECTIDS -255ULL + +/* + * All files have objectids in this range. + */ +#define BTRFS_FIRST_FREE_OBJECTID 256ULL +#define BTRFS_LAST_FREE_OBJECTID -256ULL +#define BTRFS_FIRST_CHUNK_TREE_OBJECTID 256ULL + + +/* + * the device items go into the chunk tree. The key is in the form + * [ 1 BTRFS_DEV_ITEM_KEY device_id ] + */ +#define BTRFS_DEV_ITEMS_OBJECTID 1ULL + +#define BTRFS_BTREE_INODE_OBJECTID 1 + +#define BTRFS_EMPTY_SUBVOL_DIR_OBJECTID 2 + +#define BTRFS_DEV_REPLACE_DEVID 0ULL + +/* + * inode items have the data typically returned from stat and store other + * info about object characteristics. There is one for every file and dir in + * the FS + */ +#define BTRFS_INODE_ITEM_KEY 1 +#define BTRFS_INODE_REF_KEY 12 +#define BTRFS_INODE_EXTREF_KEY 13 +#define BTRFS_XATTR_ITEM_KEY 24 + +/* + * fs verity items are stored under two different key types on disk. + * The descriptor items: + * [ inode objectid, BTRFS_VERITY_DESC_ITEM_KEY, offset ] + * + * At offset 0, we store a btrfs_verity_descriptor_item which tracks the size + * of the descriptor item and some extra data for encryption. + * Starting at offset 1, these hold the generic fs verity descriptor. The + * latter are opaque to btrfs, we just read and write them as a blob for the + * higher level verity code. The most common descriptor size is 256 bytes. + * + * The merkle tree items: + * [ inode objectid, BTRFS_VERITY_MERKLE_ITEM_KEY, offset ] + * + * These also start at offset 0, and correspond to the merkle tree bytes. When + * fsverity asks for page 0 of the merkle tree, we pull up one page starting at + * offset 0 for this key type. These are also opaque to btrfs, we're blindly + * storing whatever fsverity sends down. + */ +#define BTRFS_VERITY_DESC_ITEM_KEY 36 +#define BTRFS_VERITY_MERKLE_ITEM_KEY 37 + +#define BTRFS_ORPHAN_ITEM_KEY 48 +/* reserve 2-15 close to the inode for later flexibility */ + +/* + * dir items are the name -> inode pointers in a directory. There is one + * for every name in a directory. BTRFS_DIR_LOG_ITEM_KEY is no longer used + * but it's still defined here for documentation purposes and to help avoid + * having its numerical value reused in the future. + */ +#define BTRFS_DIR_LOG_ITEM_KEY 60 +#define BTRFS_DIR_LOG_INDEX_KEY 72 +#define BTRFS_DIR_ITEM_KEY 84 +#define BTRFS_DIR_INDEX_KEY 96 +/* + * extent data is for file data + */ +#define BTRFS_EXTENT_DATA_KEY 108 + +/* + * extent csums are stored in a separate tree and hold csums for + * an entire extent on disk. + */ +#define BTRFS_EXTENT_CSUM_KEY 128 + +/* + * root items point to tree roots. They are typically in the root + * tree used by the super block to find all the other trees + */ +#define BTRFS_ROOT_ITEM_KEY 132 + +/* + * root backrefs tie subvols and snapshots to the directory entries that + * reference them + */ +#define BTRFS_ROOT_BACKREF_KEY 144 + +/* + * root refs make a fast index for listing all of the snapshots and + * subvolumes referenced by a given root. They point directly to the + * directory item in the root that references the subvol + */ +#define BTRFS_ROOT_REF_KEY 156 + +/* + * extent items are in the extent map tree. These record which blocks + * are used, and how many references there are to each block + */ +#define BTRFS_EXTENT_ITEM_KEY 168 + +/* + * The same as the BTRFS_EXTENT_ITEM_KEY, except it's metadata we already know + * the length, so we save the level in key->offset instead of the length. + */ +#define BTRFS_METADATA_ITEM_KEY 169 + +#define BTRFS_TREE_BLOCK_REF_KEY 176 + +#define BTRFS_EXTENT_DATA_REF_KEY 178 + +#define BTRFS_EXTENT_REF_V0_KEY 180 + +#define BTRFS_SHARED_BLOCK_REF_KEY 182 + +#define BTRFS_SHARED_DATA_REF_KEY 184 + +/* + * block groups give us hints into the extent allocation trees. Which + * blocks are free etc etc + */ +#define BTRFS_BLOCK_GROUP_ITEM_KEY 192 + +/* + * Every block group is represented in the free space tree by a free space info + * item, which stores some accounting information. It is keyed on + * (block_group_start, FREE_SPACE_INFO, block_group_length). + */ +#define BTRFS_FREE_SPACE_INFO_KEY 198 + +/* + * A free space extent tracks an extent of space that is free in a block group. + * It is keyed on (start, FREE_SPACE_EXTENT, length). + */ +#define BTRFS_FREE_SPACE_EXTENT_KEY 199 + +/* + * When a block group becomes very fragmented, we convert it to use bitmaps + * instead of extents. A free space bitmap is keyed on + * (start, FREE_SPACE_BITMAP, length); the corresponding item is a bitmap with + * (length / sectorsize) bits. + */ +#define BTRFS_FREE_SPACE_BITMAP_KEY 200 + +#define BTRFS_DEV_EXTENT_KEY 204 +#define BTRFS_DEV_ITEM_KEY 216 +#define BTRFS_CHUNK_ITEM_KEY 228 + +/* + * Records the overall state of the qgroups. + * There's only one instance of this key present, + * (0, BTRFS_QGROUP_STATUS_KEY, 0) + */ +#define BTRFS_QGROUP_STATUS_KEY 240 +/* + * Records the currently used space of the qgroup. + * One key per qgroup, (0, BTRFS_QGROUP_INFO_KEY, qgroupid). + */ +#define BTRFS_QGROUP_INFO_KEY 242 +/* + * Contains the user configured limits for the qgroup. + * One key per qgroup, (0, BTRFS_QGROUP_LIMIT_KEY, qgroupid). + */ +#define BTRFS_QGROUP_LIMIT_KEY 244 +/* + * Records the child-parent relationship of qgroups. For + * each relation, 2 keys are present: + * (childid, BTRFS_QGROUP_RELATION_KEY, parentid) + * (parentid, BTRFS_QGROUP_RELATION_KEY, childid) + */ +#define BTRFS_QGROUP_RELATION_KEY 246 + +/* + * Obsolete name, see BTRFS_TEMPORARY_ITEM_KEY. + */ +#define BTRFS_BALANCE_ITEM_KEY 248 + +/* + * The key type for tree items that are stored persistently, but do not need to + * exist for extended period of time. The items can exist in any tree. + * + * [subtype, BTRFS_TEMPORARY_ITEM_KEY, data] + * + * Existing items: + * + * - balance status item + * (BTRFS_BALANCE_OBJECTID, BTRFS_TEMPORARY_ITEM_KEY, 0) + */ +#define BTRFS_TEMPORARY_ITEM_KEY 248 + +/* + * Obsolete name, see BTRFS_PERSISTENT_ITEM_KEY + */ +#define BTRFS_DEV_STATS_KEY 249 + +/* + * The key type for tree items that are stored persistently and usually exist + * for a long period, eg. filesystem lifetime. The item kinds can be status + * information, stats or preference values. The item can exist in any tree. + * + * [subtype, BTRFS_PERSISTENT_ITEM_KEY, data] + * + * Existing items: + * + * - device statistics, store IO stats in the device tree, one key for all + * stats + * (BTRFS_DEV_STATS_OBJECTID, BTRFS_DEV_STATS_KEY, 0) + */ +#define BTRFS_PERSISTENT_ITEM_KEY 249 + +/* + * Persistently stores the device replace state in the device tree. + * The key is built like this: (0, BTRFS_DEV_REPLACE_KEY, 0). + */ +#define BTRFS_DEV_REPLACE_KEY 250 + +/* + * Stores items that allow to quickly map UUIDs to something else. + * These items are part of the filesystem UUID tree. + * The key is built like this: + * (UUID_upper_64_bits, BTRFS_UUID_KEY*, UUID_lower_64_bits). + */ +#if BTRFS_UUID_SIZE != 16 +#error "UUID items require BTRFS_UUID_SIZE == 16!" +#endif +#define BTRFS_UUID_KEY_SUBVOL 251 /* for UUIDs assigned to subvols */ +#define BTRFS_UUID_KEY_RECEIVED_SUBVOL 252 /* for UUIDs assigned to + * received subvols */ + +/* + * string items are for debugging. They just store a short string of + * data in the FS + */ +#define BTRFS_STRING_ITEM_KEY 253 + +/* Maximum metadata block size (nodesize) */ +#define BTRFS_MAX_METADATA_BLOCKSIZE 65536 + +/* 32 bytes in various csum fields */ +#define BTRFS_CSUM_SIZE 32 + +/* csum types */ +enum btrfs_csum_type { + BTRFS_CSUM_TYPE_CRC32 = 0, + BTRFS_CSUM_TYPE_XXHASH = 1, + BTRFS_CSUM_TYPE_SHA256 = 2, + BTRFS_CSUM_TYPE_BLAKE2 = 3, +}; + +/* + * flags definitions for directory entry item type + * + * Used by: + * struct btrfs_dir_item.type + * + * Values 0..7 must match common file type values in fs_types.h. + */ +#define BTRFS_FT_UNKNOWN 0 +#define BTRFS_FT_REG_FILE 1 +#define BTRFS_FT_DIR 2 +#define BTRFS_FT_CHRDEV 3 +#define BTRFS_FT_BLKDEV 4 +#define BTRFS_FT_FIFO 5 +#define BTRFS_FT_SOCK 6 +#define BTRFS_FT_SYMLINK 7 +#define BTRFS_FT_XATTR 8 +#define BTRFS_FT_MAX 9 +/* Directory contains encrypted data */ +#define BTRFS_FT_ENCRYPTED 0x80 + +static inline __u8 btrfs_dir_flags_to_ftype(__u8 flags) +{ + return flags & ~BTRFS_FT_ENCRYPTED; +} + +/* + * Inode flags + */ +#define BTRFS_INODE_NODATASUM (1U << 0) +#define BTRFS_INODE_NODATACOW (1U << 1) +#define BTRFS_INODE_READONLY (1U << 2) +#define BTRFS_INODE_NOCOMPRESS (1U << 3) +#define BTRFS_INODE_PREALLOC (1U << 4) +#define BTRFS_INODE_SYNC (1U << 5) +#define BTRFS_INODE_IMMUTABLE (1U << 6) +#define BTRFS_INODE_APPEND (1U << 7) +#define BTRFS_INODE_NODUMP (1U << 8) +#define BTRFS_INODE_NOATIME (1U << 9) +#define BTRFS_INODE_DIRSYNC (1U << 10) +#define BTRFS_INODE_COMPRESS (1U << 11) + +#define BTRFS_INODE_ROOT_ITEM_INIT (1U << 31) + +#define BTRFS_INODE_FLAG_MASK \ + (BTRFS_INODE_NODATASUM | \ + BTRFS_INODE_NODATACOW | \ + BTRFS_INODE_READONLY | \ + BTRFS_INODE_NOCOMPRESS | \ + BTRFS_INODE_PREALLOC | \ + BTRFS_INODE_SYNC | \ + BTRFS_INODE_IMMUTABLE | \ + BTRFS_INODE_APPEND | \ + BTRFS_INODE_NODUMP | \ + BTRFS_INODE_NOATIME | \ + BTRFS_INODE_DIRSYNC | \ + BTRFS_INODE_COMPRESS | \ + BTRFS_INODE_ROOT_ITEM_INIT) + +#define BTRFS_INODE_RO_VERITY (1U << 0) + +#define BTRFS_INODE_RO_FLAG_MASK (BTRFS_INODE_RO_VERITY) + +/* + * The key defines the order in the tree, and so it also defines (optimal) + * block layout. + * + * objectid corresponds to the inode number. + * + * type tells us things about the object, and is a kind of stream selector. + * so for a given inode, keys with type of 1 might refer to the inode data, + * type of 2 may point to file data in the btree and type == 3 may point to + * extents. + * + * offset is the starting byte offset for this key in the stream. + * + * btrfs_disk_key is in disk byte order. struct btrfs_key is always + * in cpu native order. Otherwise they are identical and their sizes + * should be the same (ie both packed) + */ +struct btrfs_disk_key { + __le64 objectid; + __u8 type; + __le64 offset; +} __attribute__ ((__packed__)); + +struct btrfs_key { + __u64 objectid; + __u8 type; + __u64 offset; +} __attribute__ ((__packed__)); + +/* + * Every tree block (leaf or node) starts with this header. + */ +struct btrfs_header { + /* These first four must match the super block */ + __u8 csum[BTRFS_CSUM_SIZE]; + /* FS specific uuid */ + __u8 fsid[BTRFS_FSID_SIZE]; + /* Which block this node is supposed to live in */ + __le64 bytenr; + __le64 flags; + + /* Allowed to be different from the super from here on down */ + __u8 chunk_tree_uuid[BTRFS_UUID_SIZE]; + __le64 generation; + __le64 owner; + __le32 nritems; + __u8 level; +} __attribute__ ((__packed__)); + +/* + * This is a very generous portion of the super block, giving us room to + * translate 14 chunks with 3 stripes each. + */ +#define BTRFS_SYSTEM_CHUNK_ARRAY_SIZE 2048 + +/* + * Just in case we somehow lose the roots and are not able to mount, we store + * an array of the roots from previous transactions in the super. + */ +#define BTRFS_NUM_BACKUP_ROOTS 4 +struct btrfs_root_backup { + __le64 tree_root; + __le64 tree_root_gen; + + __le64 chunk_root; + __le64 chunk_root_gen; + + __le64 extent_root; + __le64 extent_root_gen; + + __le64 fs_root; + __le64 fs_root_gen; + + __le64 dev_root; + __le64 dev_root_gen; + + __le64 csum_root; + __le64 csum_root_gen; + + __le64 total_bytes; + __le64 bytes_used; + __le64 num_devices; + /* future */ + __le64 unused_64[4]; + + __u8 tree_root_level; + __u8 chunk_root_level; + __u8 extent_root_level; + __u8 fs_root_level; + __u8 dev_root_level; + __u8 csum_root_level; + /* future and to align */ + __u8 unused_8[10]; +} __attribute__ ((__packed__)); + +/* + * A leaf is full of items. offset and size tell us where to find the item in + * the leaf (relative to the start of the data area) + */ +struct btrfs_item { + struct btrfs_disk_key key; + __le32 offset; + __le32 size; +} __attribute__ ((__packed__)); + +/* + * Leaves have an item area and a data area: + * [item0, item1....itemN] [free space] [dataN...data1, data0] + * + * The data is separate from the items to get the keys closer together during + * searches. + */ +struct btrfs_leaf { + struct btrfs_header header; + struct btrfs_item items[]; +} __attribute__ ((__packed__)); + +/* + * All non-leaf blocks are nodes, they hold only keys and pointers to other + * blocks. + */ +struct btrfs_key_ptr { + struct btrfs_disk_key key; + __le64 blockptr; + __le64 generation; +} __attribute__ ((__packed__)); + +struct btrfs_node { + struct btrfs_header header; + struct btrfs_key_ptr ptrs[]; +} __attribute__ ((__packed__)); + +struct btrfs_dev_item { + /* the internal btrfs device id */ + __le64 devid; + + /* size of the device */ + __le64 total_bytes; + + /* bytes used */ + __le64 bytes_used; + + /* optimal io alignment for this device */ + __le32 io_align; + + /* optimal io width for this device */ + __le32 io_width; + + /* minimal io size for this device */ + __le32 sector_size; + + /* type and info about this device */ + __le64 type; + + /* expected generation for this device */ + __le64 generation; + + /* + * starting byte of this partition on the device, + * to allow for stripe alignment in the future + */ + __le64 start_offset; + + /* grouping information for allocation decisions */ + __le32 dev_group; + + /* seek speed 0-100 where 100 is fastest */ + __u8 seek_speed; + + /* bandwidth 0-100 where 100 is fastest */ + __u8 bandwidth; + + /* btrfs generated uuid for this device */ + __u8 uuid[BTRFS_UUID_SIZE]; + + /* uuid of FS who owns this device */ + __u8 fsid[BTRFS_UUID_SIZE]; +} __attribute__ ((__packed__)); + +struct btrfs_stripe { + __le64 devid; + __le64 offset; + __u8 dev_uuid[BTRFS_UUID_SIZE]; +} __attribute__ ((__packed__)); + +struct btrfs_chunk { + /* size of this chunk in bytes */ + __le64 length; + + /* objectid of the root referencing this chunk */ + __le64 owner; + + __le64 stripe_len; + __le64 type; + + /* optimal io alignment for this chunk */ + __le32 io_align; + + /* optimal io width for this chunk */ + __le32 io_width; + + /* minimal io size for this chunk */ + __le32 sector_size; + + /* 2^16 stripes is quite a lot, a second limit is the size of a single + * item in the btree + */ + __le16 num_stripes; + + /* sub stripes only matter for raid10 */ + __le16 sub_stripes; + struct btrfs_stripe stripe; + /* additional stripes go here */ +} __attribute__ ((__packed__)); + +/* + * The super block basically lists the main trees of the FS. + */ +struct btrfs_super_block { + /* The first 4 fields must match struct btrfs_header */ + __u8 csum[BTRFS_CSUM_SIZE]; + /* FS specific UUID, visible to user */ + __u8 fsid[BTRFS_FSID_SIZE]; + /* This block number */ + __le64 bytenr; + __le64 flags; + + /* Allowed to be different from the btrfs_header from here own down */ + __le64 magic; + __le64 generation; + __le64 root; + __le64 chunk_root; + __le64 log_root; + + /* + * This member has never been utilized since the very beginning, thus + * it's always 0 regardless of kernel version. We always use + * generation + 1 to read log tree root. So here we mark it deprecated. + */ + __le64 __unused_log_root_transid; + __le64 total_bytes; + __le64 bytes_used; + __le64 root_dir_objectid; + __le64 num_devices; + __le32 sectorsize; + __le32 nodesize; + __le32 __unused_leafsize; + __le32 stripesize; + __le32 sys_chunk_array_size; + __le64 chunk_root_generation; + __le64 compat_flags; + __le64 compat_ro_flags; + __le64 incompat_flags; + __le16 csum_type; + __u8 root_level; + __u8 chunk_root_level; + __u8 log_root_level; + struct btrfs_dev_item dev_item; + + char label[BTRFS_LABEL_SIZE]; + + __le64 cache_generation; + __le64 uuid_tree_generation; + + /* The UUID written into btree blocks */ + __u8 metadata_uuid[BTRFS_FSID_SIZE]; + + __u64 nr_global_roots; + + /* Future expansion */ + __le64 reserved[27]; + __u8 sys_chunk_array[BTRFS_SYSTEM_CHUNK_ARRAY_SIZE]; + struct btrfs_root_backup super_roots[BTRFS_NUM_BACKUP_ROOTS]; + + /* Padded to 4096 bytes */ + __u8 padding[565]; +} __attribute__ ((__packed__)); + +#define BTRFS_FREE_SPACE_EXTENT 1 +#define BTRFS_FREE_SPACE_BITMAP 2 + +struct btrfs_free_space_entry { + __le64 offset; + __le64 bytes; + __u8 type; +} __attribute__ ((__packed__)); + +struct btrfs_free_space_header { + struct btrfs_disk_key location; + __le64 generation; + __le64 num_entries; + __le64 num_bitmaps; +} __attribute__ ((__packed__)); + +#define BTRFS_HEADER_FLAG_WRITTEN (1ULL << 0) +#define BTRFS_HEADER_FLAG_RELOC (1ULL << 1) + +/* Super block flags */ +/* Errors detected */ +#define BTRFS_SUPER_FLAG_ERROR (1ULL << 2) + +#define BTRFS_SUPER_FLAG_SEEDING (1ULL << 32) +#define BTRFS_SUPER_FLAG_METADUMP (1ULL << 33) +#define BTRFS_SUPER_FLAG_METADUMP_V2 (1ULL << 34) +#define BTRFS_SUPER_FLAG_CHANGING_FSID (1ULL << 35) +#define BTRFS_SUPER_FLAG_CHANGING_FSID_V2 (1ULL << 36) + + +/* + * items in the extent btree are used to record the objectid of the + * owner of the block and the number of references + */ + +struct btrfs_extent_item { + __le64 refs; + __le64 generation; + __le64 flags; +} __attribute__ ((__packed__)); + +struct btrfs_extent_item_v0 { + __le32 refs; +} __attribute__ ((__packed__)); + + +#define BTRFS_EXTENT_FLAG_DATA (1ULL << 0) +#define BTRFS_EXTENT_FLAG_TREE_BLOCK (1ULL << 1) + +/* following flags only apply to tree blocks */ + +/* use full backrefs for extent pointers in the block */ +#define BTRFS_BLOCK_FLAG_FULL_BACKREF (1ULL << 8) + +#define BTRFS_BACKREF_REV_MAX 256 +#define BTRFS_BACKREF_REV_SHIFT 56 +#define BTRFS_BACKREF_REV_MASK (((u64)BTRFS_BACKREF_REV_MAX - 1) << \ + BTRFS_BACKREF_REV_SHIFT) + +#define BTRFS_OLD_BACKREF_REV 0 +#define BTRFS_MIXED_BACKREF_REV 1 + +/* + * this flag is only used internally by scrub and may be changed at any time + * it is only declared here to avoid collisions + */ +#define BTRFS_EXTENT_FLAG_SUPER (1ULL << 48) + +struct btrfs_tree_block_info { + struct btrfs_disk_key key; + __u8 level; +} __attribute__ ((__packed__)); + +struct btrfs_extent_data_ref { + __le64 root; + __le64 objectid; + __le64 offset; + __le32 count; +} __attribute__ ((__packed__)); + +struct btrfs_shared_data_ref { + __le32 count; +} __attribute__ ((__packed__)); + +struct btrfs_extent_inline_ref { + __u8 type; + __le64 offset; +} __attribute__ ((__packed__)); + +/* dev extents record free space on individual devices. The owner + * field points back to the chunk allocation mapping tree that allocated + * the extent. The chunk tree uuid field is a way to double check the owner + */ +struct btrfs_dev_extent { + __le64 chunk_tree; + __le64 chunk_objectid; + __le64 chunk_offset; + __le64 length; + __u8 chunk_tree_uuid[BTRFS_UUID_SIZE]; +} __attribute__ ((__packed__)); + +struct btrfs_inode_ref { + __le64 index; + __le16 name_len; + /* name goes here */ +} __attribute__ ((__packed__)); + +struct btrfs_inode_extref { + __le64 parent_objectid; + __le64 index; + __le16 name_len; + __u8 name[]; + /* name goes here */ +} __attribute__ ((__packed__)); + +struct btrfs_timespec { + __le64 sec; + __le32 nsec; +} __attribute__ ((__packed__)); + +struct btrfs_inode_item { + /* nfs style generation number */ + __le64 generation; + /* transid that last touched this inode */ + __le64 transid; + __le64 size; + __le64 nbytes; + __le64 block_group; + __le32 nlink; + __le32 uid; + __le32 gid; + __le32 mode; + __le64 rdev; + __le64 flags; + + /* modification sequence number for NFS */ + __le64 sequence; + + /* + * a little future expansion, for more than this we can + * just grow the inode item and version it + */ + __le64 reserved[4]; + struct btrfs_timespec atime; + struct btrfs_timespec ctime; + struct btrfs_timespec mtime; + struct btrfs_timespec otime; +} __attribute__ ((__packed__)); + +struct btrfs_dir_log_item { + __le64 end; +} __attribute__ ((__packed__)); + +struct btrfs_dir_item { + struct btrfs_disk_key location; + __le64 transid; + __le16 data_len; + __le16 name_len; + __u8 type; +} __attribute__ ((__packed__)); + +#define BTRFS_ROOT_SUBVOL_RDONLY (1ULL << 0) + +/* + * Internal in-memory flag that a subvolume has been marked for deletion but + * still visible as a directory + */ +#define BTRFS_ROOT_SUBVOL_DEAD (1ULL << 48) + +struct btrfs_root_item { + struct btrfs_inode_item inode; + __le64 generation; + __le64 root_dirid; + __le64 bytenr; + __le64 byte_limit; + __le64 bytes_used; + __le64 last_snapshot; + __le64 flags; + __le32 refs; + struct btrfs_disk_key drop_progress; + __u8 drop_level; + __u8 level; + + /* + * The following fields appear after subvol_uuids+subvol_times + * were introduced. + */ + + /* + * This generation number is used to test if the new fields are valid + * and up to date while reading the root item. Every time the root item + * is written out, the "generation" field is copied into this field. If + * anyone ever mounted the fs with an older kernel, we will have + * mismatching generation values here and thus must invalidate the + * new fields. See btrfs_update_root and btrfs_find_last_root for + * details. + * the offset of generation_v2 is also used as the start for the memset + * when invalidating the fields. + */ + __le64 generation_v2; + __u8 uuid[BTRFS_UUID_SIZE]; + __u8 parent_uuid[BTRFS_UUID_SIZE]; + __u8 received_uuid[BTRFS_UUID_SIZE]; + __le64 ctransid; /* updated when an inode changes */ + __le64 otransid; /* trans when created */ + __le64 stransid; /* trans when sent. non-zero for received subvol */ + __le64 rtransid; /* trans when received. non-zero for received subvol */ + struct btrfs_timespec ctime; + struct btrfs_timespec otime; + struct btrfs_timespec stime; + struct btrfs_timespec rtime; + __le64 reserved[8]; /* for future */ +} __attribute__ ((__packed__)); + +/* + * Btrfs root item used to be smaller than current size. The old format ends + * at where member generation_v2 is. + */ +static inline __u32 btrfs_legacy_root_item_size(void) +{ + return offsetof(struct btrfs_root_item, generation_v2); +} + +/* + * this is used for both forward and backward root refs + */ +struct btrfs_root_ref { + __le64 dirid; + __le64 sequence; + __le16 name_len; +} __attribute__ ((__packed__)); + +struct btrfs_disk_balance_args { + /* + * profiles to operate on, single is denoted by + * BTRFS_AVAIL_ALLOC_BIT_SINGLE + */ + __le64 profiles; + + /* + * usage filter + * BTRFS_BALANCE_ARGS_USAGE with a single value means '0..N' + * BTRFS_BALANCE_ARGS_USAGE_RANGE - range syntax, min..max + */ + union { + __le64 usage; + struct { + __le32 usage_min; + __le32 usage_max; + }; + }; + + /* devid filter */ + __le64 devid; + + /* devid subset filter [pstart..pend) */ + __le64 pstart; + __le64 pend; + + /* btrfs virtual address space subset filter [vstart..vend) */ + __le64 vstart; + __le64 vend; + + /* + * profile to convert to, single is denoted by + * BTRFS_AVAIL_ALLOC_BIT_SINGLE + */ + __le64 target; + + /* BTRFS_BALANCE_ARGS_* */ + __le64 flags; + + /* + * BTRFS_BALANCE_ARGS_LIMIT with value 'limit' + * BTRFS_BALANCE_ARGS_LIMIT_RANGE - the extend version can use minimum + * and maximum + */ + union { + __le64 limit; + struct { + __le32 limit_min; + __le32 limit_max; + }; + }; + + /* + * Process chunks that cross stripes_min..stripes_max devices, + * BTRFS_BALANCE_ARGS_STRIPES_RANGE + */ + __le32 stripes_min; + __le32 stripes_max; + + __le64 unused[6]; +} __attribute__ ((__packed__)); + +/* + * store balance parameters to disk so that balance can be properly + * resumed after crash or unmount + */ +struct btrfs_balance_item { + /* BTRFS_BALANCE_* */ + __le64 flags; + + struct btrfs_disk_balance_args data; + struct btrfs_disk_balance_args meta; + struct btrfs_disk_balance_args sys; + + __le64 unused[4]; +} __attribute__ ((__packed__)); + +enum { + BTRFS_FILE_EXTENT_INLINE = 0, + BTRFS_FILE_EXTENT_REG = 1, + BTRFS_FILE_EXTENT_PREALLOC = 2, + BTRFS_NR_FILE_EXTENT_TYPES = 3, +}; + +struct btrfs_file_extent_item { + /* + * transaction id that created this extent + */ + __le64 generation; + /* + * max number of bytes to hold this extent in ram + * when we split a compressed extent we can't know how big + * each of the resulting pieces will be. So, this is + * an upper limit on the size of the extent in ram instead of + * an exact limit. + */ + __le64 ram_bytes; + + /* + * 32 bits for the various ways we might encode the data, + * including compression and encryption. If any of these + * are set to something a given disk format doesn't understand + * it is treated like an incompat flag for reading and writing, + * but not for stat. + */ + __u8 compression; + __u8 encryption; + __le16 other_encoding; /* spare for later use */ + + /* are we inline data or a real extent? */ + __u8 type; + + /* + * disk space consumed by the extent, checksum blocks are included + * in these numbers + * + * At this offset in the structure, the inline extent data start. + */ + __le64 disk_bytenr; + __le64 disk_num_bytes; + /* + * the logical offset in file blocks (no csums) + * this extent record is for. This allows a file extent to point + * into the middle of an existing extent on disk, sharing it + * between two snapshots (useful if some bytes in the middle of the + * extent have changed + */ + __le64 offset; + /* + * the logical number of file blocks (no csums included). This + * always reflects the size uncompressed and without encoding. + */ + __le64 num_bytes; + +} __attribute__ ((__packed__)); + +struct btrfs_csum_item { + __u8 csum; +} __attribute__ ((__packed__)); + +struct btrfs_dev_stats_item { + /* + * grow this item struct at the end for future enhancements and keep + * the existing values unchanged + */ + __le64 values[BTRFS_DEV_STAT_VALUES_MAX]; +} __attribute__ ((__packed__)); + +#define BTRFS_DEV_REPLACE_ITEM_CONT_READING_FROM_SRCDEV_MODE_ALWAYS 0 +#define BTRFS_DEV_REPLACE_ITEM_CONT_READING_FROM_SRCDEV_MODE_AVOID 1 + +struct btrfs_dev_replace_item { + /* + * grow this item struct at the end for future enhancements and keep + * the existing values unchanged + */ + __le64 src_devid; + __le64 cursor_left; + __le64 cursor_right; + __le64 cont_reading_from_srcdev_mode; + + __le64 replace_state; + __le64 time_started; + __le64 time_stopped; + __le64 num_write_errors; + __le64 num_uncorrectable_read_errors; +} __attribute__ ((__packed__)); + +/* different types of block groups (and chunks) */ +#define BTRFS_BLOCK_GROUP_DATA (1ULL << 0) +#define BTRFS_BLOCK_GROUP_SYSTEM (1ULL << 1) +#define BTRFS_BLOCK_GROUP_METADATA (1ULL << 2) +#define BTRFS_BLOCK_GROUP_RAID0 (1ULL << 3) +#define BTRFS_BLOCK_GROUP_RAID1 (1ULL << 4) +#define BTRFS_BLOCK_GROUP_DUP (1ULL << 5) +#define BTRFS_BLOCK_GROUP_RAID10 (1ULL << 6) +#define BTRFS_BLOCK_GROUP_RAID5 (1ULL << 7) +#define BTRFS_BLOCK_GROUP_RAID6 (1ULL << 8) +#define BTRFS_BLOCK_GROUP_RAID1C3 (1ULL << 9) +#define BTRFS_BLOCK_GROUP_RAID1C4 (1ULL << 10) +#define BTRFS_BLOCK_GROUP_RESERVED (BTRFS_AVAIL_ALLOC_BIT_SINGLE | \ + BTRFS_SPACE_INFO_GLOBAL_RSV) + +#define BTRFS_BLOCK_GROUP_TYPE_MASK (BTRFS_BLOCK_GROUP_DATA | \ + BTRFS_BLOCK_GROUP_SYSTEM | \ + BTRFS_BLOCK_GROUP_METADATA) + +#define BTRFS_BLOCK_GROUP_PROFILE_MASK (BTRFS_BLOCK_GROUP_RAID0 | \ + BTRFS_BLOCK_GROUP_RAID1 | \ + BTRFS_BLOCK_GROUP_RAID1C3 | \ + BTRFS_BLOCK_GROUP_RAID1C4 | \ + BTRFS_BLOCK_GROUP_RAID5 | \ + BTRFS_BLOCK_GROUP_RAID6 | \ + BTRFS_BLOCK_GROUP_DUP | \ + BTRFS_BLOCK_GROUP_RAID10) +#define BTRFS_BLOCK_GROUP_RAID56_MASK (BTRFS_BLOCK_GROUP_RAID5 | \ + BTRFS_BLOCK_GROUP_RAID6) + +#define BTRFS_BLOCK_GROUP_RAID1_MASK (BTRFS_BLOCK_GROUP_RAID1 | \ + BTRFS_BLOCK_GROUP_RAID1C3 | \ + BTRFS_BLOCK_GROUP_RAID1C4) + +/* + * We need a bit for restriper to be able to tell when chunks of type + * SINGLE are available. This "extended" profile format is used in + * fs_info->avail_*_alloc_bits (in-memory) and balance item fields + * (on-disk). The corresponding on-disk bit in chunk.type is reserved + * to avoid remappings between two formats in future. + */ +#define BTRFS_AVAIL_ALLOC_BIT_SINGLE (1ULL << 48) + +/* + * A fake block group type that is used to communicate global block reserve + * size to userspace via the SPACE_INFO ioctl. + */ +#define BTRFS_SPACE_INFO_GLOBAL_RSV (1ULL << 49) + +#define BTRFS_EXTENDED_PROFILE_MASK (BTRFS_BLOCK_GROUP_PROFILE_MASK | \ + BTRFS_AVAIL_ALLOC_BIT_SINGLE) + +static inline __u64 chunk_to_extended(__u64 flags) +{ + if ((flags & BTRFS_BLOCK_GROUP_PROFILE_MASK) == 0) + flags |= BTRFS_AVAIL_ALLOC_BIT_SINGLE; + + return flags; +} +static inline __u64 extended_to_chunk(__u64 flags) +{ + return flags & ~BTRFS_AVAIL_ALLOC_BIT_SINGLE; +} + +struct btrfs_block_group_item { + __le64 used; + __le64 chunk_objectid; + __le64 flags; +} __attribute__ ((__packed__)); + +struct btrfs_free_space_info { + __le32 extent_count; + __le32 flags; +} __attribute__ ((__packed__)); + +#define BTRFS_FREE_SPACE_USING_BITMAPS (1ULL << 0) + +#define BTRFS_QGROUP_LEVEL_SHIFT 48 +static inline __u16 btrfs_qgroup_level(__u64 qgroupid) +{ + return (__u16)(qgroupid >> BTRFS_QGROUP_LEVEL_SHIFT); +} + +/* + * is subvolume quota turned on? + */ +#define BTRFS_QGROUP_STATUS_FLAG_ON (1ULL << 0) +/* + * RESCAN is set during the initialization phase + */ +#define BTRFS_QGROUP_STATUS_FLAG_RESCAN (1ULL << 1) +/* + * Some qgroup entries are known to be out of date, + * either because the configuration has changed in a way that + * makes a rescan necessary, or because the fs has been mounted + * with a non-qgroup-aware version. + * Turning qouta off and on again makes it inconsistent, too. + */ +#define BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT (1ULL << 2) + +#define BTRFS_QGROUP_STATUS_FLAGS_MASK (BTRFS_QGROUP_STATUS_FLAG_ON | \ + BTRFS_QGROUP_STATUS_FLAG_RESCAN | \ + BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT) + +#define BTRFS_QGROUP_STATUS_VERSION 1 + +struct btrfs_qgroup_status_item { + __le64 version; + /* + * the generation is updated during every commit. As older + * versions of btrfs are not aware of qgroups, it will be + * possible to detect inconsistencies by checking the + * generation on mount time + */ + __le64 generation; + + /* flag definitions see above */ + __le64 flags; + + /* + * only used during scanning to record the progress + * of the scan. It contains a logical address + */ + __le64 rescan; +} __attribute__ ((__packed__)); + +struct btrfs_qgroup_info_item { + __le64 generation; + __le64 rfer; + __le64 rfer_cmpr; + __le64 excl; + __le64 excl_cmpr; +} __attribute__ ((__packed__)); + +struct btrfs_qgroup_limit_item { + /* + * only updated when any of the other values change + */ + __le64 flags; + __le64 max_rfer; + __le64 max_excl; + __le64 rsv_rfer; + __le64 rsv_excl; +} __attribute__ ((__packed__)); + +struct btrfs_verity_descriptor_item { + /* Size of the verity descriptor in bytes */ + __le64 size; + /* + * When we implement support for fscrypt, we will need to encrypt the + * Merkle tree for encrypted verity files. These 128 bits are for the + * eventual storage of an fscrypt initialization vector. + */ + __le64 reserved[2]; + __u8 encryption; +} __attribute__ ((__packed__)); + +#endif /* _BTRFS_CTREE_H_ */ diff --git a/src/basic/linux/can/netlink.h b/src/basic/linux/can/netlink.h new file mode 100644 index 0000000..02ec32d --- /dev/null +++ b/src/basic/linux/can/netlink.h @@ -0,0 +1,185 @@ +/* SPDX-License-Identifier: GPL-2.0-only WITH Linux-syscall-note */ +/* + * linux/can/netlink.h + * + * Definitions for the CAN netlink interface + * + * Copyright (c) 2009 Wolfgang Grandegger + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the version 2 of the GNU General Public License + * as published by the Free Software Foundation + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#ifndef _UAPI_CAN_NETLINK_H +#define _UAPI_CAN_NETLINK_H + +#include + +/* + * CAN bit-timing parameters + * + * For further information, please read chapter "8 BIT TIMING + * REQUIREMENTS" of the "Bosch CAN Specification version 2.0" + * at http://www.semiconductors.bosch.de/pdf/can2spec.pdf. + */ +struct can_bittiming { + __u32 bitrate; /* Bit-rate in bits/second */ + __u32 sample_point; /* Sample point in one-tenth of a percent */ + __u32 tq; /* Time quanta (TQ) in nanoseconds */ + __u32 prop_seg; /* Propagation segment in TQs */ + __u32 phase_seg1; /* Phase buffer segment 1 in TQs */ + __u32 phase_seg2; /* Phase buffer segment 2 in TQs */ + __u32 sjw; /* Synchronisation jump width in TQs */ + __u32 brp; /* Bit-rate prescaler */ +}; + +/* + * CAN hardware-dependent bit-timing constant + * + * Used for calculating and checking bit-timing parameters + */ +struct can_bittiming_const { + char name[16]; /* Name of the CAN controller hardware */ + __u32 tseg1_min; /* Time segment 1 = prop_seg + phase_seg1 */ + __u32 tseg1_max; + __u32 tseg2_min; /* Time segment 2 = phase_seg2 */ + __u32 tseg2_max; + __u32 sjw_max; /* Synchronisation jump width */ + __u32 brp_min; /* Bit-rate prescaler */ + __u32 brp_max; + __u32 brp_inc; +}; + +/* + * CAN clock parameters + */ +struct can_clock { + __u32 freq; /* CAN system clock frequency in Hz */ +}; + +/* + * CAN operational and error states + */ +enum can_state { + CAN_STATE_ERROR_ACTIVE = 0, /* RX/TX error count < 96 */ + CAN_STATE_ERROR_WARNING, /* RX/TX error count < 128 */ + CAN_STATE_ERROR_PASSIVE, /* RX/TX error count < 256 */ + CAN_STATE_BUS_OFF, /* RX/TX error count >= 256 */ + CAN_STATE_STOPPED, /* Device is stopped */ + CAN_STATE_SLEEPING, /* Device is sleeping */ + CAN_STATE_MAX +}; + +/* + * CAN bus error counters + */ +struct can_berr_counter { + __u16 txerr; + __u16 rxerr; +}; + +/* + * CAN controller mode + */ +struct can_ctrlmode { + __u32 mask; + __u32 flags; +}; + +#define CAN_CTRLMODE_LOOPBACK 0x01 /* Loopback mode */ +#define CAN_CTRLMODE_LISTENONLY 0x02 /* Listen-only mode */ +#define CAN_CTRLMODE_3_SAMPLES 0x04 /* Triple sampling mode */ +#define CAN_CTRLMODE_ONE_SHOT 0x08 /* One-Shot mode */ +#define CAN_CTRLMODE_BERR_REPORTING 0x10 /* Bus-error reporting */ +#define CAN_CTRLMODE_FD 0x20 /* CAN FD mode */ +#define CAN_CTRLMODE_PRESUME_ACK 0x40 /* Ignore missing CAN ACKs */ +#define CAN_CTRLMODE_FD_NON_ISO 0x80 /* CAN FD in non-ISO mode */ +#define CAN_CTRLMODE_CC_LEN8_DLC 0x100 /* Classic CAN DLC option */ +#define CAN_CTRLMODE_TDC_AUTO 0x200 /* CAN transiver automatically calculates TDCV */ +#define CAN_CTRLMODE_TDC_MANUAL 0x400 /* TDCV is manually set up by user */ + +/* + * CAN device statistics + */ +struct can_device_stats { + __u32 bus_error; /* Bus errors */ + __u32 error_warning; /* Changes to error warning state */ + __u32 error_passive; /* Changes to error passive state */ + __u32 bus_off; /* Changes to bus off state */ + __u32 arbitration_lost; /* Arbitration lost errors */ + __u32 restarts; /* CAN controller re-starts */ +}; + +/* + * CAN netlink interface + */ +enum { + IFLA_CAN_UNSPEC, + IFLA_CAN_BITTIMING, + IFLA_CAN_BITTIMING_CONST, + IFLA_CAN_CLOCK, + IFLA_CAN_STATE, + IFLA_CAN_CTRLMODE, + IFLA_CAN_RESTART_MS, + IFLA_CAN_RESTART, + IFLA_CAN_BERR_COUNTER, + IFLA_CAN_DATA_BITTIMING, + IFLA_CAN_DATA_BITTIMING_CONST, + IFLA_CAN_TERMINATION, + IFLA_CAN_TERMINATION_CONST, + IFLA_CAN_BITRATE_CONST, + IFLA_CAN_DATA_BITRATE_CONST, + IFLA_CAN_BITRATE_MAX, + IFLA_CAN_TDC, + IFLA_CAN_CTRLMODE_EXT, + + /* add new constants above here */ + __IFLA_CAN_MAX, + IFLA_CAN_MAX = __IFLA_CAN_MAX - 1 +}; + +/* + * CAN FD Transmitter Delay Compensation (TDC) + * + * Please refer to struct can_tdc_const and can_tdc in + * include/linux/can/bittiming.h for further details. + */ +enum { + IFLA_CAN_TDC_UNSPEC, + IFLA_CAN_TDC_TDCV_MIN, /* u32 */ + IFLA_CAN_TDC_TDCV_MAX, /* u32 */ + IFLA_CAN_TDC_TDCO_MIN, /* u32 */ + IFLA_CAN_TDC_TDCO_MAX, /* u32 */ + IFLA_CAN_TDC_TDCF_MIN, /* u32 */ + IFLA_CAN_TDC_TDCF_MAX, /* u32 */ + IFLA_CAN_TDC_TDCV, /* u32 */ + IFLA_CAN_TDC_TDCO, /* u32 */ + IFLA_CAN_TDC_TDCF, /* u32 */ + + /* add new constants above here */ + __IFLA_CAN_TDC, + IFLA_CAN_TDC_MAX = __IFLA_CAN_TDC - 1 +}; + +/* + * IFLA_CAN_CTRLMODE_EXT nest: controller mode extended parameters + */ +enum { + IFLA_CAN_CTRLMODE_UNSPEC, + IFLA_CAN_CTRLMODE_SUPPORTED, /* u32 */ + + /* add new constants above here */ + __IFLA_CAN_CTRLMODE, + IFLA_CAN_CTRLMODE_MAX = __IFLA_CAN_CTRLMODE - 1 +}; + +/* u16 termination range: 1..65535 Ohms */ +#define CAN_TERMINATION_DISABLED 0 + +#endif /* !_UAPI_CAN_NETLINK_H */ diff --git a/src/basic/linux/can/vxcan.h b/src/basic/linux/can/vxcan.h new file mode 100644 index 0000000..4fa9d87 --- /dev/null +++ b/src/basic/linux/can/vxcan.h @@ -0,0 +1,13 @@ +/* SPDX-License-Identifier: GPL-2.0-only WITH Linux-syscall-note */ +#ifndef _UAPI_CAN_VXCAN_H +#define _UAPI_CAN_VXCAN_H + +enum { + VXCAN_INFO_UNSPEC, + VXCAN_INFO_PEER, + + __VXCAN_INFO_MAX +#define VXCAN_INFO_MAX (__VXCAN_INFO_MAX - 1) +}; + +#endif diff --git a/src/basic/linux/cfm_bridge.h b/src/basic/linux/cfm_bridge.h new file mode 100644 index 0000000..3c1cbd1 --- /dev/null +++ b/src/basic/linux/cfm_bridge.h @@ -0,0 +1,64 @@ +/* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */ + +#ifndef _UAPI_LINUX_CFM_BRIDGE_H_ +#define _UAPI_LINUX_CFM_BRIDGE_H_ + +#include +#include + +#define ETHER_HEADER_LENGTH (6+6+4+2) +#define CFM_MAID_LENGTH 48 +#define CFM_CCM_PDU_LENGTH 75 +#define CFM_PORT_STATUS_TLV_LENGTH 4 +#define CFM_IF_STATUS_TLV_LENGTH 4 +#define CFM_IF_STATUS_TLV_TYPE 4 +#define CFM_PORT_STATUS_TLV_TYPE 2 +#define CFM_ENDE_TLV_TYPE 0 +#define CFM_CCM_MAX_FRAME_LENGTH (ETHER_HEADER_LENGTH+\ + CFM_CCM_PDU_LENGTH+\ + CFM_PORT_STATUS_TLV_LENGTH+\ + CFM_IF_STATUS_TLV_LENGTH) +#define CFM_FRAME_PRIO 7 +#define CFM_CCM_TLV_OFFSET 70 +#define CFM_CCM_PDU_MAID_OFFSET 10 +#define CFM_CCM_PDU_MEPID_OFFSET 8 +#define CFM_CCM_PDU_SEQNR_OFFSET 4 +#define CFM_CCM_PDU_TLV_OFFSET 74 +#define CFM_CCM_ITU_RESERVED_SIZE 16 + +struct br_cfm_common_hdr { + __u8 mdlevel_version; + __u8 opcode; + __u8 flags; + __u8 tlv_offset; +}; + +enum br_cfm_opcodes { + BR_CFM_OPCODE_CCM = 0x1, +}; + +/* MEP domain */ +enum br_cfm_domain { + BR_CFM_PORT, + BR_CFM_VLAN, +}; + +/* MEP direction */ +enum br_cfm_mep_direction { + BR_CFM_MEP_DIRECTION_DOWN, + BR_CFM_MEP_DIRECTION_UP, +}; + +/* CCM interval supported. */ +enum br_cfm_ccm_interval { + BR_CFM_CCM_INTERVAL_NONE, + BR_CFM_CCM_INTERVAL_3_3_MS, + BR_CFM_CCM_INTERVAL_10_MS, + BR_CFM_CCM_INTERVAL_100_MS, + BR_CFM_CCM_INTERVAL_1_SEC, + BR_CFM_CCM_INTERVAL_10_SEC, + BR_CFM_CCM_INTERVAL_1_MIN, + BR_CFM_CCM_INTERVAL_10_MIN, +}; + +#endif diff --git a/src/basic/linux/fib_rules.h b/src/basic/linux/fib_rules.h new file mode 100644 index 0000000..232df14 --- /dev/null +++ b/src/basic/linux/fib_rules.h @@ -0,0 +1,90 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +#ifndef __LINUX_FIB_RULES_H +#define __LINUX_FIB_RULES_H + +#include +#include + +/* rule is permanent, and cannot be deleted */ +#define FIB_RULE_PERMANENT 0x00000001 +#define FIB_RULE_INVERT 0x00000002 +#define FIB_RULE_UNRESOLVED 0x00000004 +#define FIB_RULE_IIF_DETACHED 0x00000008 +#define FIB_RULE_DEV_DETACHED FIB_RULE_IIF_DETACHED +#define FIB_RULE_OIF_DETACHED 0x00000010 + +/* try to find source address in routing lookups */ +#define FIB_RULE_FIND_SADDR 0x00010000 + +struct fib_rule_hdr { + __u8 family; + __u8 dst_len; + __u8 src_len; + __u8 tos; + + __u8 table; + __u8 res1; /* reserved */ + __u8 res2; /* reserved */ + __u8 action; + + __u32 flags; +}; + +struct fib_rule_uid_range { + __u32 start; + __u32 end; +}; + +struct fib_rule_port_range { + __u16 start; + __u16 end; +}; + +enum { + FRA_UNSPEC, + FRA_DST, /* destination address */ + FRA_SRC, /* source address */ + FRA_IIFNAME, /* interface name */ +#define FRA_IFNAME FRA_IIFNAME + FRA_GOTO, /* target to jump to (FR_ACT_GOTO) */ + FRA_UNUSED2, + FRA_PRIORITY, /* priority/preference */ + FRA_UNUSED3, + FRA_UNUSED4, + FRA_UNUSED5, + FRA_FWMARK, /* mark */ + FRA_FLOW, /* flow/class id */ + FRA_TUN_ID, + FRA_SUPPRESS_IFGROUP, + FRA_SUPPRESS_PREFIXLEN, + FRA_TABLE, /* Extended table id */ + FRA_FWMASK, /* mask for netfilter mark */ + FRA_OIFNAME, + FRA_PAD, + FRA_L3MDEV, /* iif or oif is l3mdev goto its table */ + FRA_UID_RANGE, /* UID range */ + FRA_PROTOCOL, /* Originator of the rule */ + FRA_IP_PROTO, /* ip proto */ + FRA_SPORT_RANGE, /* sport */ + FRA_DPORT_RANGE, /* dport */ + __FRA_MAX +}; + +#define FRA_MAX (__FRA_MAX - 1) + +enum { + FR_ACT_UNSPEC, + FR_ACT_TO_TBL, /* Pass to fixed table */ + FR_ACT_GOTO, /* Jump to another rule */ + FR_ACT_NOP, /* No operation */ + FR_ACT_RES3, + FR_ACT_RES4, + FR_ACT_BLACKHOLE, /* Drop without notification */ + FR_ACT_UNREACHABLE, /* Drop with ENETUNREACH */ + FR_ACT_PROHIBIT, /* Drop with EACCES */ + __FR_ACT_MAX, +}; + +#define FR_ACT_MAX (__FR_ACT_MAX - 1) + +#endif diff --git a/src/basic/linux/fou.h b/src/basic/linux/fou.h new file mode 100644 index 0000000..87c2c9f --- /dev/null +++ b/src/basic/linux/fou.h @@ -0,0 +1,48 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* fou.h - FOU Interface */ + +#ifndef _UAPI_LINUX_FOU_H +#define _UAPI_LINUX_FOU_H + +/* NETLINK_GENERIC related info + */ +#define FOU_GENL_NAME "fou" +#define FOU_GENL_VERSION 0x1 + +enum { + FOU_ATTR_UNSPEC, + FOU_ATTR_PORT, /* u16 */ + FOU_ATTR_AF, /* u8 */ + FOU_ATTR_IPPROTO, /* u8 */ + FOU_ATTR_TYPE, /* u8 */ + FOU_ATTR_REMCSUM_NOPARTIAL, /* flag */ + FOU_ATTR_LOCAL_V4, /* u32 */ + FOU_ATTR_LOCAL_V6, /* in6_addr */ + FOU_ATTR_PEER_V4, /* u32 */ + FOU_ATTR_PEER_V6, /* in6_addr */ + FOU_ATTR_PEER_PORT, /* u16 */ + FOU_ATTR_IFINDEX, /* s32 */ + + __FOU_ATTR_MAX, +}; + +#define FOU_ATTR_MAX (__FOU_ATTR_MAX - 1) + +enum { + FOU_CMD_UNSPEC, + FOU_CMD_ADD, + FOU_CMD_DEL, + FOU_CMD_GET, + + __FOU_CMD_MAX, +}; + +enum { + FOU_ENCAP_UNSPEC, + FOU_ENCAP_DIRECT, + FOU_ENCAP_GUE, +}; + +#define FOU_CMD_MAX (__FOU_CMD_MAX - 1) + +#endif /* _UAPI_LINUX_FOU_H */ diff --git a/src/basic/linux/genetlink.h b/src/basic/linux/genetlink.h new file mode 100644 index 0000000..ddba3ca --- /dev/null +++ b/src/basic/linux/genetlink.h @@ -0,0 +1,103 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +#ifndef _UAPI__LINUX_GENERIC_NETLINK_H +#define _UAPI__LINUX_GENERIC_NETLINK_H + +#include +#include + +#define GENL_NAMSIZ 16 /* length of family name */ + +#define GENL_MIN_ID NLMSG_MIN_TYPE +#define GENL_MAX_ID 1023 + +struct genlmsghdr { + __u8 cmd; + __u8 version; + __u16 reserved; +}; + +#define GENL_HDRLEN NLMSG_ALIGN(sizeof(struct genlmsghdr)) + +#define GENL_ADMIN_PERM 0x01 +#define GENL_CMD_CAP_DO 0x02 +#define GENL_CMD_CAP_DUMP 0x04 +#define GENL_CMD_CAP_HASPOL 0x08 +#define GENL_UNS_ADMIN_PERM 0x10 + +/* + * List of reserved static generic netlink identifiers: + */ +#define GENL_ID_CTRL NLMSG_MIN_TYPE +#define GENL_ID_VFS_DQUOT (NLMSG_MIN_TYPE + 1) +#define GENL_ID_PMCRAID (NLMSG_MIN_TYPE + 2) +/* must be last reserved + 1 */ +#define GENL_START_ALLOC (NLMSG_MIN_TYPE + 3) + +/************************************************************************** + * Controller + **************************************************************************/ + +enum { + CTRL_CMD_UNSPEC, + CTRL_CMD_NEWFAMILY, + CTRL_CMD_DELFAMILY, + CTRL_CMD_GETFAMILY, + CTRL_CMD_NEWOPS, + CTRL_CMD_DELOPS, + CTRL_CMD_GETOPS, + CTRL_CMD_NEWMCAST_GRP, + CTRL_CMD_DELMCAST_GRP, + CTRL_CMD_GETMCAST_GRP, /* unused */ + CTRL_CMD_GETPOLICY, + __CTRL_CMD_MAX, +}; + +#define CTRL_CMD_MAX (__CTRL_CMD_MAX - 1) + +enum { + CTRL_ATTR_UNSPEC, + CTRL_ATTR_FAMILY_ID, + CTRL_ATTR_FAMILY_NAME, + CTRL_ATTR_VERSION, + CTRL_ATTR_HDRSIZE, + CTRL_ATTR_MAXATTR, + CTRL_ATTR_OPS, + CTRL_ATTR_MCAST_GROUPS, + CTRL_ATTR_POLICY, + CTRL_ATTR_OP_POLICY, + CTRL_ATTR_OP, + __CTRL_ATTR_MAX, +}; + +#define CTRL_ATTR_MAX (__CTRL_ATTR_MAX - 1) + +enum { + CTRL_ATTR_OP_UNSPEC, + CTRL_ATTR_OP_ID, + CTRL_ATTR_OP_FLAGS, + __CTRL_ATTR_OP_MAX, +}; + +#define CTRL_ATTR_OP_MAX (__CTRL_ATTR_OP_MAX - 1) + +enum { + CTRL_ATTR_MCAST_GRP_UNSPEC, + CTRL_ATTR_MCAST_GRP_NAME, + CTRL_ATTR_MCAST_GRP_ID, + __CTRL_ATTR_MCAST_GRP_MAX, +}; + +#define CTRL_ATTR_MCAST_GRP_MAX (__CTRL_ATTR_MCAST_GRP_MAX - 1) + +enum { + CTRL_ATTR_POLICY_UNSPEC, + CTRL_ATTR_POLICY_DO, + CTRL_ATTR_POLICY_DUMP, + + __CTRL_ATTR_POLICY_DUMP_MAX, + CTRL_ATTR_POLICY_DUMP_MAX = __CTRL_ATTR_POLICY_DUMP_MAX - 1 +}; + +#define CTRL_ATTR_POLICY_MAX (__CTRL_ATTR_POLICY_DUMP_MAX - 1) + +#endif /* _UAPI__LINUX_GENERIC_NETLINK_H */ diff --git a/src/basic/linux/hdlc/ioctl.h b/src/basic/linux/hdlc/ioctl.h new file mode 100644 index 0000000..b06341a --- /dev/null +++ b/src/basic/linux/hdlc/ioctl.h @@ -0,0 +1,94 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +#ifndef __HDLC_IOCTL_H__ +#define __HDLC_IOCTL_H__ + + +#define GENERIC_HDLC_VERSION 4 /* For synchronization with sethdlc utility */ + +#define CLOCK_DEFAULT 0 /* Default setting */ +#define CLOCK_EXT 1 /* External TX and RX clock - DTE */ +#define CLOCK_INT 2 /* Internal TX and RX clock - DCE */ +#define CLOCK_TXINT 3 /* Internal TX and external RX clock */ +#define CLOCK_TXFROMRX 4 /* TX clock derived from external RX clock */ + + +#define ENCODING_DEFAULT 0 /* Default setting */ +#define ENCODING_NRZ 1 +#define ENCODING_NRZI 2 +#define ENCODING_FM_MARK 3 +#define ENCODING_FM_SPACE 4 +#define ENCODING_MANCHESTER 5 + + +#define PARITY_DEFAULT 0 /* Default setting */ +#define PARITY_NONE 1 /* No parity */ +#define PARITY_CRC16_PR0 2 /* CRC16, initial value 0x0000 */ +#define PARITY_CRC16_PR1 3 /* CRC16, initial value 0xFFFF */ +#define PARITY_CRC16_PR0_CCITT 4 /* CRC16, initial 0x0000, ITU-T version */ +#define PARITY_CRC16_PR1_CCITT 5 /* CRC16, initial 0xFFFF, ITU-T version */ +#define PARITY_CRC32_PR0_CCITT 6 /* CRC32, initial value 0x00000000 */ +#define PARITY_CRC32_PR1_CCITT 7 /* CRC32, initial value 0xFFFFFFFF */ + +#define LMI_DEFAULT 0 /* Default setting */ +#define LMI_NONE 1 /* No LMI, all PVCs are static */ +#define LMI_ANSI 2 /* ANSI Annex D */ +#define LMI_CCITT 3 /* ITU-T Annex A */ +#define LMI_CISCO 4 /* The "original" LMI, aka Gang of Four */ + +#ifndef __ASSEMBLY__ + +typedef struct { + unsigned int clock_rate; /* bits per second */ + unsigned int clock_type; /* internal, external, TX-internal etc. */ + unsigned short loopback; +} sync_serial_settings; /* V.35, V.24, X.21 */ + +typedef struct { + unsigned int clock_rate; /* bits per second */ + unsigned int clock_type; /* internal, external, TX-internal etc. */ + unsigned short loopback; + unsigned int slot_map; +} te1_settings; /* T1, E1 */ + +typedef struct { + unsigned short encoding; + unsigned short parity; +} raw_hdlc_proto; + +typedef struct { + unsigned int t391; + unsigned int t392; + unsigned int n391; + unsigned int n392; + unsigned int n393; + unsigned short lmi; + unsigned short dce; /* 1 for DCE (network side) operation */ +} fr_proto; + +typedef struct { + unsigned int dlci; +} fr_proto_pvc; /* for creating/deleting FR PVCs */ + +typedef struct { + unsigned int dlci; + char master[IFNAMSIZ]; /* Name of master FRAD device */ +}fr_proto_pvc_info; /* for returning PVC information only */ + +typedef struct { + unsigned int interval; + unsigned int timeout; +} cisco_proto; + +typedef struct { + unsigned short dce; /* 1 for DCE (network side) operation */ + unsigned int modulo; /* modulo (8 = basic / 128 = extended) */ + unsigned int window; /* frame window size */ + unsigned int t1; /* timeout t1 */ + unsigned int t2; /* timeout t2 */ + unsigned int n2; /* frame retry counter */ +} x25_hdlc_proto; + +/* PPP doesn't need any info now - supply length = 0 to ioctl */ + +#endif /* __ASSEMBLY__ */ +#endif /* __HDLC_IOCTL_H__ */ diff --git a/src/basic/linux/if.h b/src/basic/linux/if.h new file mode 100644 index 0000000..e79f5c8 --- /dev/null +++ b/src/basic/linux/if.h @@ -0,0 +1,297 @@ +/* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */ +/* + * INET An implementation of the TCP/IP protocol suite for the LINUX + * operating system. INET is implemented using the BSD Socket + * interface as the means of communication with the user level. + * + * Global definitions for the INET interface module. + * + * Version: @(#)if.h 1.0.2 04/18/93 + * + * Authors: Original taken from Berkeley UNIX 4.3, (c) UCB 1982-1988 + * Ross Biro + * Fred N. van Kempen, + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +#ifndef _LINUX_IF_H +#define _LINUX_IF_H + +#include /* for compatibility with glibc */ +#include /* for "__kernel_caddr_t" et al */ +#include /* for "struct sockaddr" et al */ + +#ifndef __KERNEL__ +#include /* for struct sockaddr. */ +#endif + +#if __UAPI_DEF_IF_IFNAMSIZ +#define IFNAMSIZ 16 +#endif /* __UAPI_DEF_IF_IFNAMSIZ */ +#define IFALIASZ 256 +#define ALTIFNAMSIZ 128 +#include + +/* For glibc compatibility. An empty enum does not compile. */ +#if __UAPI_DEF_IF_NET_DEVICE_FLAGS_LOWER_UP_DORMANT_ECHO != 0 || \ + __UAPI_DEF_IF_NET_DEVICE_FLAGS != 0 +/** + * enum net_device_flags - &struct net_device flags + * + * These are the &struct net_device flags, they can be set by drivers, the + * kernel and some can be triggered by userspace. Userspace can query and + * set these flags using userspace utilities but there is also a sysfs + * entry available for all dev flags which can be queried and set. These flags + * are shared for all types of net_devices. The sysfs entries are available + * via /sys/class/net//flags. Flags which can be toggled through sysfs + * are annotated below, note that only a few flags can be toggled and some + * other flags are always preserved from the original net_device flags + * even if you try to set them via sysfs. Flags which are always preserved + * are kept under the flag grouping @IFF_VOLATILE. Flags which are volatile + * are annotated below as such. + * + * You should have a pretty good reason to be extending these flags. + * + * @IFF_UP: interface is up. Can be toggled through sysfs. + * @IFF_BROADCAST: broadcast address valid. Volatile. + * @IFF_DEBUG: turn on debugging. Can be toggled through sysfs. + * @IFF_LOOPBACK: is a loopback net. Volatile. + * @IFF_POINTOPOINT: interface is has p-p link. Volatile. + * @IFF_NOTRAILERS: avoid use of trailers. Can be toggled through sysfs. + * Volatile. + * @IFF_RUNNING: interface RFC2863 OPER_UP. Volatile. + * @IFF_NOARP: no ARP protocol. Can be toggled through sysfs. Volatile. + * @IFF_PROMISC: receive all packets. Can be toggled through sysfs. + * @IFF_ALLMULTI: receive all multicast packets. Can be toggled through + * sysfs. + * @IFF_MASTER: master of a load balancer. Volatile. + * @IFF_SLAVE: slave of a load balancer. Volatile. + * @IFF_MULTICAST: Supports multicast. Can be toggled through sysfs. + * @IFF_PORTSEL: can set media type. Can be toggled through sysfs. + * @IFF_AUTOMEDIA: auto media select active. Can be toggled through sysfs. + * @IFF_DYNAMIC: dialup device with changing addresses. Can be toggled + * through sysfs. + * @IFF_LOWER_UP: driver signals L1 up. Volatile. + * @IFF_DORMANT: driver signals dormant. Volatile. + * @IFF_ECHO: echo sent packets. Volatile. + */ +enum net_device_flags { +/* for compatibility with glibc net/if.h */ +#if __UAPI_DEF_IF_NET_DEVICE_FLAGS + IFF_UP = 1<<0, /* sysfs */ + IFF_BROADCAST = 1<<1, /* volatile */ + IFF_DEBUG = 1<<2, /* sysfs */ + IFF_LOOPBACK = 1<<3, /* volatile */ + IFF_POINTOPOINT = 1<<4, /* volatile */ + IFF_NOTRAILERS = 1<<5, /* sysfs */ + IFF_RUNNING = 1<<6, /* volatile */ + IFF_NOARP = 1<<7, /* sysfs */ + IFF_PROMISC = 1<<8, /* sysfs */ + IFF_ALLMULTI = 1<<9, /* sysfs */ + IFF_MASTER = 1<<10, /* volatile */ + IFF_SLAVE = 1<<11, /* volatile */ + IFF_MULTICAST = 1<<12, /* sysfs */ + IFF_PORTSEL = 1<<13, /* sysfs */ + IFF_AUTOMEDIA = 1<<14, /* sysfs */ + IFF_DYNAMIC = 1<<15, /* sysfs */ +#endif /* __UAPI_DEF_IF_NET_DEVICE_FLAGS */ +#if __UAPI_DEF_IF_NET_DEVICE_FLAGS_LOWER_UP_DORMANT_ECHO + IFF_LOWER_UP = 1<<16, /* volatile */ + IFF_DORMANT = 1<<17, /* volatile */ + IFF_ECHO = 1<<18, /* volatile */ +#endif /* __UAPI_DEF_IF_NET_DEVICE_FLAGS_LOWER_UP_DORMANT_ECHO */ +}; +#endif /* __UAPI_DEF_IF_NET_DEVICE_FLAGS_LOWER_UP_DORMANT_ECHO != 0 || __UAPI_DEF_IF_NET_DEVICE_FLAGS != 0 */ + +/* for compatibility with glibc net/if.h */ +#if __UAPI_DEF_IF_NET_DEVICE_FLAGS +#define IFF_UP IFF_UP +#define IFF_BROADCAST IFF_BROADCAST +#define IFF_DEBUG IFF_DEBUG +#define IFF_LOOPBACK IFF_LOOPBACK +#define IFF_POINTOPOINT IFF_POINTOPOINT +#define IFF_NOTRAILERS IFF_NOTRAILERS +#define IFF_RUNNING IFF_RUNNING +#define IFF_NOARP IFF_NOARP +#define IFF_PROMISC IFF_PROMISC +#define IFF_ALLMULTI IFF_ALLMULTI +#define IFF_MASTER IFF_MASTER +#define IFF_SLAVE IFF_SLAVE +#define IFF_MULTICAST IFF_MULTICAST +#define IFF_PORTSEL IFF_PORTSEL +#define IFF_AUTOMEDIA IFF_AUTOMEDIA +#define IFF_DYNAMIC IFF_DYNAMIC +#endif /* __UAPI_DEF_IF_NET_DEVICE_FLAGS */ + +#if __UAPI_DEF_IF_NET_DEVICE_FLAGS_LOWER_UP_DORMANT_ECHO +#define IFF_LOWER_UP IFF_LOWER_UP +#define IFF_DORMANT IFF_DORMANT +#define IFF_ECHO IFF_ECHO +#endif /* __UAPI_DEF_IF_NET_DEVICE_FLAGS_LOWER_UP_DORMANT_ECHO */ + +#define IFF_VOLATILE (IFF_LOOPBACK|IFF_POINTOPOINT|IFF_BROADCAST|IFF_ECHO|\ + IFF_MASTER|IFF_SLAVE|IFF_RUNNING|IFF_LOWER_UP|IFF_DORMANT) + +#define IF_GET_IFACE 0x0001 /* for querying only */ +#define IF_GET_PROTO 0x0002 + +/* For definitions see hdlc.h */ +#define IF_IFACE_V35 0x1000 /* V.35 serial interface */ +#define IF_IFACE_V24 0x1001 /* V.24 serial interface */ +#define IF_IFACE_X21 0x1002 /* X.21 serial interface */ +#define IF_IFACE_T1 0x1003 /* T1 telco serial interface */ +#define IF_IFACE_E1 0x1004 /* E1 telco serial interface */ +#define IF_IFACE_SYNC_SERIAL 0x1005 /* can't be set by software */ +#define IF_IFACE_X21D 0x1006 /* X.21 Dual Clocking (FarSite) */ + +/* For definitions see hdlc.h */ +#define IF_PROTO_HDLC 0x2000 /* raw HDLC protocol */ +#define IF_PROTO_PPP 0x2001 /* PPP protocol */ +#define IF_PROTO_CISCO 0x2002 /* Cisco HDLC protocol */ +#define IF_PROTO_FR 0x2003 /* Frame Relay protocol */ +#define IF_PROTO_FR_ADD_PVC 0x2004 /* Create FR PVC */ +#define IF_PROTO_FR_DEL_PVC 0x2005 /* Delete FR PVC */ +#define IF_PROTO_X25 0x2006 /* X.25 */ +#define IF_PROTO_HDLC_ETH 0x2007 /* raw HDLC, Ethernet emulation */ +#define IF_PROTO_FR_ADD_ETH_PVC 0x2008 /* Create FR Ethernet-bridged PVC */ +#define IF_PROTO_FR_DEL_ETH_PVC 0x2009 /* Delete FR Ethernet-bridged PVC */ +#define IF_PROTO_FR_PVC 0x200A /* for reading PVC status */ +#define IF_PROTO_FR_ETH_PVC 0x200B +#define IF_PROTO_RAW 0x200C /* RAW Socket */ + +/* RFC 2863 operational status */ +enum { + IF_OPER_UNKNOWN, + IF_OPER_NOTPRESENT, + IF_OPER_DOWN, + IF_OPER_LOWERLAYERDOWN, + IF_OPER_TESTING, + IF_OPER_DORMANT, + IF_OPER_UP, +}; + +/* link modes */ +enum { + IF_LINK_MODE_DEFAULT, + IF_LINK_MODE_DORMANT, /* limit upward transition to dormant */ + IF_LINK_MODE_TESTING, /* limit upward transition to testing */ +}; + +/* + * Device mapping structure. I'd just gone off and designed a + * beautiful scheme using only loadable modules with arguments + * for driver options and along come the PCMCIA people 8) + * + * Ah well. The get() side of this is good for WDSETUP, and it'll + * be handy for debugging things. The set side is fine for now and + * being very small might be worth keeping for clean configuration. + */ + +/* for compatibility with glibc net/if.h */ +#if __UAPI_DEF_IF_IFMAP +struct ifmap { + unsigned long mem_start; + unsigned long mem_end; + unsigned short base_addr; + unsigned char irq; + unsigned char dma; + unsigned char port; + /* 3 bytes spare */ +}; +#endif /* __UAPI_DEF_IF_IFMAP */ + +struct if_settings { + unsigned int type; /* Type of physical device or protocol */ + unsigned int size; /* Size of the data allocated by the caller */ + union { + /* {atm/eth/dsl}_settings anyone ? */ + raw_hdlc_proto *raw_hdlc; + cisco_proto *cisco; + fr_proto *fr; + fr_proto_pvc *fr_pvc; + fr_proto_pvc_info *fr_pvc_info; + x25_hdlc_proto *x25; + + /* interface settings */ + sync_serial_settings *sync; + te1_settings *te1; + } ifs_ifsu; +}; + +/* + * Interface request structure used for socket + * ioctl's. All interface ioctl's must have parameter + * definitions which begin with ifr_name. The + * remainder may be interface specific. + */ + +/* for compatibility with glibc net/if.h */ +#if __UAPI_DEF_IF_IFREQ +struct ifreq { +#define IFHWADDRLEN 6 + union + { + char ifrn_name[IFNAMSIZ]; /* if name, e.g. "en0" */ + } ifr_ifrn; + + union { + struct sockaddr ifru_addr; + struct sockaddr ifru_dstaddr; + struct sockaddr ifru_broadaddr; + struct sockaddr ifru_netmask; + struct sockaddr ifru_hwaddr; + short ifru_flags; + int ifru_ivalue; + int ifru_mtu; + struct ifmap ifru_map; + char ifru_slave[IFNAMSIZ]; /* Just fits the size */ + char ifru_newname[IFNAMSIZ]; + void * ifru_data; + struct if_settings ifru_settings; + } ifr_ifru; +}; +#endif /* __UAPI_DEF_IF_IFREQ */ + +#define ifr_name ifr_ifrn.ifrn_name /* interface name */ +#define ifr_hwaddr ifr_ifru.ifru_hwaddr /* MAC address */ +#define ifr_addr ifr_ifru.ifru_addr /* address */ +#define ifr_dstaddr ifr_ifru.ifru_dstaddr /* other end of p-p lnk */ +#define ifr_broadaddr ifr_ifru.ifru_broadaddr /* broadcast address */ +#define ifr_netmask ifr_ifru.ifru_netmask /* interface net mask */ +#define ifr_flags ifr_ifru.ifru_flags /* flags */ +#define ifr_metric ifr_ifru.ifru_ivalue /* metric */ +#define ifr_mtu ifr_ifru.ifru_mtu /* mtu */ +#define ifr_map ifr_ifru.ifru_map /* device map */ +#define ifr_slave ifr_ifru.ifru_slave /* slave device */ +#define ifr_data ifr_ifru.ifru_data /* for use by interface */ +#define ifr_ifindex ifr_ifru.ifru_ivalue /* interface index */ +#define ifr_bandwidth ifr_ifru.ifru_ivalue /* link bandwidth */ +#define ifr_qlen ifr_ifru.ifru_ivalue /* Queue length */ +#define ifr_newname ifr_ifru.ifru_newname /* New name */ +#define ifr_settings ifr_ifru.ifru_settings /* Device/proto settings*/ + +/* + * Structure used in SIOCGIFCONF request. + * Used to retrieve interface configuration + * for machine (useful for programs which + * must know all networks accessible). + */ + +/* for compatibility with glibc net/if.h */ +#if __UAPI_DEF_IF_IFCONF +struct ifconf { + int ifc_len; /* size of buffer */ + union { + char *ifcu_buf; + struct ifreq *ifcu_req; + } ifc_ifcu; +}; +#endif /* __UAPI_DEF_IF_IFCONF */ + +#define ifc_buf ifc_ifcu.ifcu_buf /* buffer address */ +#define ifc_req ifc_ifcu.ifcu_req /* array of structures */ + +#endif /* _LINUX_IF_H */ diff --git a/src/basic/linux/if_addr.h b/src/basic/linux/if_addr.h new file mode 100644 index 0000000..1c392dd --- /dev/null +++ b/src/basic/linux/if_addr.h @@ -0,0 +1,79 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +#ifndef __LINUX_IF_ADDR_H +#define __LINUX_IF_ADDR_H + +#include +#include + +struct ifaddrmsg { + __u8 ifa_family; + __u8 ifa_prefixlen; /* The prefix length */ + __u8 ifa_flags; /* Flags */ + __u8 ifa_scope; /* Address scope */ + __u32 ifa_index; /* Link index */ +}; + +/* + * Important comment: + * IFA_ADDRESS is prefix address, rather than local interface address. + * It makes no difference for normally configured broadcast interfaces, + * but for point-to-point IFA_ADDRESS is DESTINATION address, + * local address is supplied in IFA_LOCAL attribute. + * + * IFA_FLAGS is a u32 attribute that extends the u8 field ifa_flags. + * If present, the value from struct ifaddrmsg will be ignored. + */ +enum { + IFA_UNSPEC, + IFA_ADDRESS, + IFA_LOCAL, + IFA_LABEL, + IFA_BROADCAST, + IFA_ANYCAST, + IFA_CACHEINFO, + IFA_MULTICAST, + IFA_FLAGS, + IFA_RT_PRIORITY, /* u32, priority/metric for prefix route */ + IFA_TARGET_NETNSID, + IFA_PROTO, /* u8, address protocol */ + __IFA_MAX, +}; + +#define IFA_MAX (__IFA_MAX - 1) + +/* ifa_flags */ +#define IFA_F_SECONDARY 0x01 +#define IFA_F_TEMPORARY IFA_F_SECONDARY + +#define IFA_F_NODAD 0x02 +#define IFA_F_OPTIMISTIC 0x04 +#define IFA_F_DADFAILED 0x08 +#define IFA_F_HOMEADDRESS 0x10 +#define IFA_F_DEPRECATED 0x20 +#define IFA_F_TENTATIVE 0x40 +#define IFA_F_PERMANENT 0x80 +#define IFA_F_MANAGETEMPADDR 0x100 +#define IFA_F_NOPREFIXROUTE 0x200 +#define IFA_F_MCAUTOJOIN 0x400 +#define IFA_F_STABLE_PRIVACY 0x800 + +struct ifa_cacheinfo { + __u32 ifa_prefered; + __u32 ifa_valid; + __u32 cstamp; /* created timestamp, hundredths of seconds */ + __u32 tstamp; /* updated timestamp, hundredths of seconds */ +}; + +/* backwards compatibility for userspace */ +#ifndef __KERNEL__ +#define IFA_RTA(r) ((struct rtattr*)(((char*)(r)) + NLMSG_ALIGN(sizeof(struct ifaddrmsg)))) +#define IFA_PAYLOAD(n) NLMSG_PAYLOAD(n,sizeof(struct ifaddrmsg)) +#endif + +/* ifa_proto */ +#define IFAPROT_UNSPEC 0 +#define IFAPROT_KERNEL_LO 1 /* loopback */ +#define IFAPROT_KERNEL_RA 2 /* set by kernel from router announcement */ +#define IFAPROT_KERNEL_LL 3 /* link-local set by kernel */ + +#endif diff --git a/src/basic/linux/if_bonding.h b/src/basic/linux/if_bonding.h new file mode 100644 index 0000000..d174914 --- /dev/null +++ b/src/basic/linux/if_bonding.h @@ -0,0 +1,155 @@ +/* SPDX-License-Identifier: GPL-1.0+ WITH Linux-syscall-note */ +/* + * Bond several ethernet interfaces into a Cisco, running 'Etherchannel'. + * + * + * Portions are (c) Copyright 1995 Simon "Guru Aleph-Null" Janes + * NCM: Network and Communications Management, Inc. + * + * BUT, I'm the one who modified it for ethernet, so: + * (c) Copyright 1999, Thomas Davis, tadavis@lbl.gov + * + * This software may be used and distributed according to the terms + * of the GNU Public License, incorporated herein by reference. + * + * 2003/03/18 - Amir Noam + * - Added support for getting slave's speed and duplex via ethtool. + * Needed for 802.3ad and other future modes. + * + * 2003/03/18 - Tsippy Mendelson and + * Shmulik Hen + * - Enable support of modes that need to use the unique mac address of + * each slave. + * + * 2003/03/18 - Tsippy Mendelson and + * Amir Noam + * - Moved driver's private data types to bonding.h + * + * 2003/03/18 - Amir Noam , + * Tsippy Mendelson and + * Shmulik Hen + * - Added support for IEEE 802.3ad Dynamic link aggregation mode. + * + * 2003/05/01 - Amir Noam + * - Added ABI version control to restore compatibility between + * new/old ifenslave and new/old bonding. + * + * 2003/12/01 - Shmulik Hen + * - Code cleanup and style changes + * + * 2005/05/05 - Jason Gabler + * - added definitions for various XOR hashing policies + */ + +#ifndef _LINUX_IF_BONDING_H +#define _LINUX_IF_BONDING_H + +#include +#include +#include + +/* userland - kernel ABI version (2003/05/08) */ +#define BOND_ABI_VERSION 2 + +/* + * We can remove these ioctl definitions in 2.5. People should use the + * SIOC*** versions of them instead + */ +#define BOND_ENSLAVE_OLD (SIOCDEVPRIVATE) +#define BOND_RELEASE_OLD (SIOCDEVPRIVATE + 1) +#define BOND_SETHWADDR_OLD (SIOCDEVPRIVATE + 2) +#define BOND_SLAVE_INFO_QUERY_OLD (SIOCDEVPRIVATE + 11) +#define BOND_INFO_QUERY_OLD (SIOCDEVPRIVATE + 12) +#define BOND_CHANGE_ACTIVE_OLD (SIOCDEVPRIVATE + 13) + +#define BOND_CHECK_MII_STATUS (SIOCGMIIPHY) + +#define BOND_MODE_ROUNDROBIN 0 +#define BOND_MODE_ACTIVEBACKUP 1 +#define BOND_MODE_XOR 2 +#define BOND_MODE_BROADCAST 3 +#define BOND_MODE_8023AD 4 +#define BOND_MODE_TLB 5 +#define BOND_MODE_ALB 6 /* TLB + RLB (receive load balancing) */ + +/* each slave's link has 4 states */ +#define BOND_LINK_UP 0 /* link is up and running */ +#define BOND_LINK_FAIL 1 /* link has just gone down */ +#define BOND_LINK_DOWN 2 /* link has been down for too long time */ +#define BOND_LINK_BACK 3 /* link is going back */ + +/* each slave has several states */ +#define BOND_STATE_ACTIVE 0 /* link is active */ +#define BOND_STATE_BACKUP 1 /* link is backup */ + +#define BOND_DEFAULT_MAX_BONDS 1 /* Default maximum number of devices to support */ + +#define BOND_DEFAULT_TX_QUEUES 16 /* Default number of tx queues per device */ + +#define BOND_DEFAULT_RESEND_IGMP 1 /* Default number of IGMP membership reports */ + +/* hashing types */ +#define BOND_XMIT_POLICY_LAYER2 0 /* layer 2 (MAC only), default */ +#define BOND_XMIT_POLICY_LAYER34 1 /* layer 3+4 (IP ^ (TCP || UDP)) */ +#define BOND_XMIT_POLICY_LAYER23 2 /* layer 2+3 (IP ^ MAC) */ +#define BOND_XMIT_POLICY_ENCAP23 3 /* encapsulated layer 2+3 */ +#define BOND_XMIT_POLICY_ENCAP34 4 /* encapsulated layer 3+4 */ +#define BOND_XMIT_POLICY_VLAN_SRCMAC 5 /* vlan + source MAC */ + +/* 802.3ad port state definitions (43.4.2.2 in the 802.3ad standard) */ +#define LACP_STATE_LACP_ACTIVITY 0x1 +#define LACP_STATE_LACP_TIMEOUT 0x2 +#define LACP_STATE_AGGREGATION 0x4 +#define LACP_STATE_SYNCHRONIZATION 0x8 +#define LACP_STATE_COLLECTING 0x10 +#define LACP_STATE_DISTRIBUTING 0x20 +#define LACP_STATE_DEFAULTED 0x40 +#define LACP_STATE_EXPIRED 0x80 + +typedef struct ifbond { + __s32 bond_mode; + __s32 num_slaves; + __s32 miimon; +} ifbond; + +typedef struct ifslave { + __s32 slave_id; /* Used as an IN param to the BOND_SLAVE_INFO_QUERY ioctl */ + char slave_name[IFNAMSIZ]; + __s8 link; + __s8 state; + __u32 link_failure_count; +} ifslave; + +struct ad_info { + __u16 aggregator_id; + __u16 ports; + __u16 actor_key; + __u16 partner_key; + __u8 partner_system[ETH_ALEN]; +}; + +/* Embedded inside LINK_XSTATS_TYPE_BOND */ +enum { + BOND_XSTATS_UNSPEC, + BOND_XSTATS_3AD, + __BOND_XSTATS_MAX +}; +#define BOND_XSTATS_MAX (__BOND_XSTATS_MAX - 1) + +/* Embedded inside BOND_XSTATS_3AD */ +enum { + BOND_3AD_STAT_LACPDU_RX, + BOND_3AD_STAT_LACPDU_TX, + BOND_3AD_STAT_LACPDU_UNKNOWN_RX, + BOND_3AD_STAT_LACPDU_ILLEGAL_RX, + BOND_3AD_STAT_MARKER_RX, + BOND_3AD_STAT_MARKER_TX, + BOND_3AD_STAT_MARKER_RESP_RX, + BOND_3AD_STAT_MARKER_RESP_TX, + BOND_3AD_STAT_MARKER_UNKNOWN_RX, + BOND_3AD_STAT_PAD, + __BOND_3AD_STAT_MAX +}; +#define BOND_3AD_STAT_MAX (__BOND_3AD_STAT_MAX - 1) + +#endif /* _LINUX_IF_BONDING_H */ diff --git a/src/basic/linux/if_bridge.h b/src/basic/linux/if_bridge.h new file mode 100644 index 0000000..d9de241 --- /dev/null +++ b/src/basic/linux/if_bridge.h @@ -0,0 +1,826 @@ +/* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */ +/* + * Linux ethernet bridge + * + * Authors: + * Lennert Buytenhek + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#ifndef _UAPI_LINUX_IF_BRIDGE_H +#define _UAPI_LINUX_IF_BRIDGE_H + +#include +#include +#include + +#define SYSFS_BRIDGE_ATTR "bridge" +#define SYSFS_BRIDGE_FDB "brforward" +#define SYSFS_BRIDGE_PORT_SUBDIR "brif" +#define SYSFS_BRIDGE_PORT_ATTR "brport" +#define SYSFS_BRIDGE_PORT_LINK "bridge" + +#define BRCTL_VERSION 1 + +#define BRCTL_GET_VERSION 0 +#define BRCTL_GET_BRIDGES 1 +#define BRCTL_ADD_BRIDGE 2 +#define BRCTL_DEL_BRIDGE 3 +#define BRCTL_ADD_IF 4 +#define BRCTL_DEL_IF 5 +#define BRCTL_GET_BRIDGE_INFO 6 +#define BRCTL_GET_PORT_LIST 7 +#define BRCTL_SET_BRIDGE_FORWARD_DELAY 8 +#define BRCTL_SET_BRIDGE_HELLO_TIME 9 +#define BRCTL_SET_BRIDGE_MAX_AGE 10 +#define BRCTL_SET_AGEING_TIME 11 +#define BRCTL_SET_GC_INTERVAL 12 +#define BRCTL_GET_PORT_INFO 13 +#define BRCTL_SET_BRIDGE_STP_STATE 14 +#define BRCTL_SET_BRIDGE_PRIORITY 15 +#define BRCTL_SET_PORT_PRIORITY 16 +#define BRCTL_SET_PATH_COST 17 +#define BRCTL_GET_FDB_ENTRIES 18 + +#define BR_STATE_DISABLED 0 +#define BR_STATE_LISTENING 1 +#define BR_STATE_LEARNING 2 +#define BR_STATE_FORWARDING 3 +#define BR_STATE_BLOCKING 4 + +struct __bridge_info { + __u64 designated_root; + __u64 bridge_id; + __u32 root_path_cost; + __u32 max_age; + __u32 hello_time; + __u32 forward_delay; + __u32 bridge_max_age; + __u32 bridge_hello_time; + __u32 bridge_forward_delay; + __u8 topology_change; + __u8 topology_change_detected; + __u8 root_port; + __u8 stp_enabled; + __u32 ageing_time; + __u32 gc_interval; + __u32 hello_timer_value; + __u32 tcn_timer_value; + __u32 topology_change_timer_value; + __u32 gc_timer_value; +}; + +struct __port_info { + __u64 designated_root; + __u64 designated_bridge; + __u16 port_id; + __u16 designated_port; + __u32 path_cost; + __u32 designated_cost; + __u8 state; + __u8 top_change_ack; + __u8 config_pending; + __u8 unused0; + __u32 message_age_timer_value; + __u32 forward_delay_timer_value; + __u32 hold_timer_value; +}; + +struct __fdb_entry { + __u8 mac_addr[ETH_ALEN]; + __u8 port_no; + __u8 is_local; + __u32 ageing_timer_value; + __u8 port_hi; + __u8 pad0; + __u16 unused; +}; + +/* Bridge Flags */ +#define BRIDGE_FLAGS_MASTER 1 /* Bridge command to/from master */ +#define BRIDGE_FLAGS_SELF 2 /* Bridge command to/from lowerdev */ + +#define BRIDGE_MODE_VEB 0 /* Default loopback mode */ +#define BRIDGE_MODE_VEPA 1 /* 802.1Qbg defined VEPA mode */ +#define BRIDGE_MODE_UNDEF 0xFFFF /* mode undefined */ + +/* Bridge management nested attributes + * [IFLA_AF_SPEC] = { + * [IFLA_BRIDGE_FLAGS] + * [IFLA_BRIDGE_MODE] + * [IFLA_BRIDGE_VLAN_INFO] + * } + */ +enum { + IFLA_BRIDGE_FLAGS, + IFLA_BRIDGE_MODE, + IFLA_BRIDGE_VLAN_INFO, + IFLA_BRIDGE_VLAN_TUNNEL_INFO, + IFLA_BRIDGE_MRP, + IFLA_BRIDGE_CFM, + IFLA_BRIDGE_MST, + __IFLA_BRIDGE_MAX, +}; +#define IFLA_BRIDGE_MAX (__IFLA_BRIDGE_MAX - 1) + +#define BRIDGE_VLAN_INFO_MASTER (1<<0) /* Operate on Bridge device as well */ +#define BRIDGE_VLAN_INFO_PVID (1<<1) /* VLAN is PVID, ingress untagged */ +#define BRIDGE_VLAN_INFO_UNTAGGED (1<<2) /* VLAN egresses untagged */ +#define BRIDGE_VLAN_INFO_RANGE_BEGIN (1<<3) /* VLAN is start of vlan range */ +#define BRIDGE_VLAN_INFO_RANGE_END (1<<4) /* VLAN is end of vlan range */ +#define BRIDGE_VLAN_INFO_BRENTRY (1<<5) /* Global bridge VLAN entry */ +#define BRIDGE_VLAN_INFO_ONLY_OPTS (1<<6) /* Skip create/delete/flags */ + +struct bridge_vlan_info { + __u16 flags; + __u16 vid; +}; + +enum { + IFLA_BRIDGE_VLAN_TUNNEL_UNSPEC, + IFLA_BRIDGE_VLAN_TUNNEL_ID, + IFLA_BRIDGE_VLAN_TUNNEL_VID, + IFLA_BRIDGE_VLAN_TUNNEL_FLAGS, + __IFLA_BRIDGE_VLAN_TUNNEL_MAX, +}; + +#define IFLA_BRIDGE_VLAN_TUNNEL_MAX (__IFLA_BRIDGE_VLAN_TUNNEL_MAX - 1) + +struct bridge_vlan_xstats { + __u64 rx_bytes; + __u64 rx_packets; + __u64 tx_bytes; + __u64 tx_packets; + __u16 vid; + __u16 flags; + __u32 pad2; +}; + +enum { + IFLA_BRIDGE_MRP_UNSPEC, + IFLA_BRIDGE_MRP_INSTANCE, + IFLA_BRIDGE_MRP_PORT_STATE, + IFLA_BRIDGE_MRP_PORT_ROLE, + IFLA_BRIDGE_MRP_RING_STATE, + IFLA_BRIDGE_MRP_RING_ROLE, + IFLA_BRIDGE_MRP_START_TEST, + IFLA_BRIDGE_MRP_INFO, + IFLA_BRIDGE_MRP_IN_ROLE, + IFLA_BRIDGE_MRP_IN_STATE, + IFLA_BRIDGE_MRP_START_IN_TEST, + __IFLA_BRIDGE_MRP_MAX, +}; + +#define IFLA_BRIDGE_MRP_MAX (__IFLA_BRIDGE_MRP_MAX - 1) + +enum { + IFLA_BRIDGE_MRP_INSTANCE_UNSPEC, + IFLA_BRIDGE_MRP_INSTANCE_RING_ID, + IFLA_BRIDGE_MRP_INSTANCE_P_IFINDEX, + IFLA_BRIDGE_MRP_INSTANCE_S_IFINDEX, + IFLA_BRIDGE_MRP_INSTANCE_PRIO, + __IFLA_BRIDGE_MRP_INSTANCE_MAX, +}; + +#define IFLA_BRIDGE_MRP_INSTANCE_MAX (__IFLA_BRIDGE_MRP_INSTANCE_MAX - 1) + +enum { + IFLA_BRIDGE_MRP_PORT_STATE_UNSPEC, + IFLA_BRIDGE_MRP_PORT_STATE_STATE, + __IFLA_BRIDGE_MRP_PORT_STATE_MAX, +}; + +#define IFLA_BRIDGE_MRP_PORT_STATE_MAX (__IFLA_BRIDGE_MRP_PORT_STATE_MAX - 1) + +enum { + IFLA_BRIDGE_MRP_PORT_ROLE_UNSPEC, + IFLA_BRIDGE_MRP_PORT_ROLE_ROLE, + __IFLA_BRIDGE_MRP_PORT_ROLE_MAX, +}; + +#define IFLA_BRIDGE_MRP_PORT_ROLE_MAX (__IFLA_BRIDGE_MRP_PORT_ROLE_MAX - 1) + +enum { + IFLA_BRIDGE_MRP_RING_STATE_UNSPEC, + IFLA_BRIDGE_MRP_RING_STATE_RING_ID, + IFLA_BRIDGE_MRP_RING_STATE_STATE, + __IFLA_BRIDGE_MRP_RING_STATE_MAX, +}; + +#define IFLA_BRIDGE_MRP_RING_STATE_MAX (__IFLA_BRIDGE_MRP_RING_STATE_MAX - 1) + +enum { + IFLA_BRIDGE_MRP_RING_ROLE_UNSPEC, + IFLA_BRIDGE_MRP_RING_ROLE_RING_ID, + IFLA_BRIDGE_MRP_RING_ROLE_ROLE, + __IFLA_BRIDGE_MRP_RING_ROLE_MAX, +}; + +#define IFLA_BRIDGE_MRP_RING_ROLE_MAX (__IFLA_BRIDGE_MRP_RING_ROLE_MAX - 1) + +enum { + IFLA_BRIDGE_MRP_START_TEST_UNSPEC, + IFLA_BRIDGE_MRP_START_TEST_RING_ID, + IFLA_BRIDGE_MRP_START_TEST_INTERVAL, + IFLA_BRIDGE_MRP_START_TEST_MAX_MISS, + IFLA_BRIDGE_MRP_START_TEST_PERIOD, + IFLA_BRIDGE_MRP_START_TEST_MONITOR, + __IFLA_BRIDGE_MRP_START_TEST_MAX, +}; + +#define IFLA_BRIDGE_MRP_START_TEST_MAX (__IFLA_BRIDGE_MRP_START_TEST_MAX - 1) + +enum { + IFLA_BRIDGE_MRP_INFO_UNSPEC, + IFLA_BRIDGE_MRP_INFO_RING_ID, + IFLA_BRIDGE_MRP_INFO_P_IFINDEX, + IFLA_BRIDGE_MRP_INFO_S_IFINDEX, + IFLA_BRIDGE_MRP_INFO_PRIO, + IFLA_BRIDGE_MRP_INFO_RING_STATE, + IFLA_BRIDGE_MRP_INFO_RING_ROLE, + IFLA_BRIDGE_MRP_INFO_TEST_INTERVAL, + IFLA_BRIDGE_MRP_INFO_TEST_MAX_MISS, + IFLA_BRIDGE_MRP_INFO_TEST_MONITOR, + IFLA_BRIDGE_MRP_INFO_I_IFINDEX, + IFLA_BRIDGE_MRP_INFO_IN_STATE, + IFLA_BRIDGE_MRP_INFO_IN_ROLE, + IFLA_BRIDGE_MRP_INFO_IN_TEST_INTERVAL, + IFLA_BRIDGE_MRP_INFO_IN_TEST_MAX_MISS, + __IFLA_BRIDGE_MRP_INFO_MAX, +}; + +#define IFLA_BRIDGE_MRP_INFO_MAX (__IFLA_BRIDGE_MRP_INFO_MAX - 1) + +enum { + IFLA_BRIDGE_MRP_IN_STATE_UNSPEC, + IFLA_BRIDGE_MRP_IN_STATE_IN_ID, + IFLA_BRIDGE_MRP_IN_STATE_STATE, + __IFLA_BRIDGE_MRP_IN_STATE_MAX, +}; + +#define IFLA_BRIDGE_MRP_IN_STATE_MAX (__IFLA_BRIDGE_MRP_IN_STATE_MAX - 1) + +enum { + IFLA_BRIDGE_MRP_IN_ROLE_UNSPEC, + IFLA_BRIDGE_MRP_IN_ROLE_RING_ID, + IFLA_BRIDGE_MRP_IN_ROLE_IN_ID, + IFLA_BRIDGE_MRP_IN_ROLE_ROLE, + IFLA_BRIDGE_MRP_IN_ROLE_I_IFINDEX, + __IFLA_BRIDGE_MRP_IN_ROLE_MAX, +}; + +#define IFLA_BRIDGE_MRP_IN_ROLE_MAX (__IFLA_BRIDGE_MRP_IN_ROLE_MAX - 1) + +enum { + IFLA_BRIDGE_MRP_START_IN_TEST_UNSPEC, + IFLA_BRIDGE_MRP_START_IN_TEST_IN_ID, + IFLA_BRIDGE_MRP_START_IN_TEST_INTERVAL, + IFLA_BRIDGE_MRP_START_IN_TEST_MAX_MISS, + IFLA_BRIDGE_MRP_START_IN_TEST_PERIOD, + __IFLA_BRIDGE_MRP_START_IN_TEST_MAX, +}; + +#define IFLA_BRIDGE_MRP_START_IN_TEST_MAX (__IFLA_BRIDGE_MRP_START_IN_TEST_MAX - 1) + +struct br_mrp_instance { + __u32 ring_id; + __u32 p_ifindex; + __u32 s_ifindex; + __u16 prio; +}; + +struct br_mrp_ring_state { + __u32 ring_id; + __u32 ring_state; +}; + +struct br_mrp_ring_role { + __u32 ring_id; + __u32 ring_role; +}; + +struct br_mrp_start_test { + __u32 ring_id; + __u32 interval; + __u32 max_miss; + __u32 period; + __u32 monitor; +}; + +struct br_mrp_in_state { + __u32 in_state; + __u16 in_id; +}; + +struct br_mrp_in_role { + __u32 ring_id; + __u32 in_role; + __u32 i_ifindex; + __u16 in_id; +}; + +struct br_mrp_start_in_test { + __u32 interval; + __u32 max_miss; + __u32 period; + __u16 in_id; +}; + +enum { + IFLA_BRIDGE_CFM_UNSPEC, + IFLA_BRIDGE_CFM_MEP_CREATE, + IFLA_BRIDGE_CFM_MEP_DELETE, + IFLA_BRIDGE_CFM_MEP_CONFIG, + IFLA_BRIDGE_CFM_CC_CONFIG, + IFLA_BRIDGE_CFM_CC_PEER_MEP_ADD, + IFLA_BRIDGE_CFM_CC_PEER_MEP_REMOVE, + IFLA_BRIDGE_CFM_CC_RDI, + IFLA_BRIDGE_CFM_CC_CCM_TX, + IFLA_BRIDGE_CFM_MEP_CREATE_INFO, + IFLA_BRIDGE_CFM_MEP_CONFIG_INFO, + IFLA_BRIDGE_CFM_CC_CONFIG_INFO, + IFLA_BRIDGE_CFM_CC_RDI_INFO, + IFLA_BRIDGE_CFM_CC_CCM_TX_INFO, + IFLA_BRIDGE_CFM_CC_PEER_MEP_INFO, + IFLA_BRIDGE_CFM_MEP_STATUS_INFO, + IFLA_BRIDGE_CFM_CC_PEER_STATUS_INFO, + __IFLA_BRIDGE_CFM_MAX, +}; + +#define IFLA_BRIDGE_CFM_MAX (__IFLA_BRIDGE_CFM_MAX - 1) + +enum { + IFLA_BRIDGE_CFM_MEP_CREATE_UNSPEC, + IFLA_BRIDGE_CFM_MEP_CREATE_INSTANCE, + IFLA_BRIDGE_CFM_MEP_CREATE_DOMAIN, + IFLA_BRIDGE_CFM_MEP_CREATE_DIRECTION, + IFLA_BRIDGE_CFM_MEP_CREATE_IFINDEX, + __IFLA_BRIDGE_CFM_MEP_CREATE_MAX, +}; + +#define IFLA_BRIDGE_CFM_MEP_CREATE_MAX (__IFLA_BRIDGE_CFM_MEP_CREATE_MAX - 1) + +enum { + IFLA_BRIDGE_CFM_MEP_DELETE_UNSPEC, + IFLA_BRIDGE_CFM_MEP_DELETE_INSTANCE, + __IFLA_BRIDGE_CFM_MEP_DELETE_MAX, +}; + +#define IFLA_BRIDGE_CFM_MEP_DELETE_MAX (__IFLA_BRIDGE_CFM_MEP_DELETE_MAX - 1) + +enum { + IFLA_BRIDGE_CFM_MEP_CONFIG_UNSPEC, + IFLA_BRIDGE_CFM_MEP_CONFIG_INSTANCE, + IFLA_BRIDGE_CFM_MEP_CONFIG_UNICAST_MAC, + IFLA_BRIDGE_CFM_MEP_CONFIG_MDLEVEL, + IFLA_BRIDGE_CFM_MEP_CONFIG_MEPID, + __IFLA_BRIDGE_CFM_MEP_CONFIG_MAX, +}; + +#define IFLA_BRIDGE_CFM_MEP_CONFIG_MAX (__IFLA_BRIDGE_CFM_MEP_CONFIG_MAX - 1) + +enum { + IFLA_BRIDGE_CFM_CC_CONFIG_UNSPEC, + IFLA_BRIDGE_CFM_CC_CONFIG_INSTANCE, + IFLA_BRIDGE_CFM_CC_CONFIG_ENABLE, + IFLA_BRIDGE_CFM_CC_CONFIG_EXP_INTERVAL, + IFLA_BRIDGE_CFM_CC_CONFIG_EXP_MAID, + __IFLA_BRIDGE_CFM_CC_CONFIG_MAX, +}; + +#define IFLA_BRIDGE_CFM_CC_CONFIG_MAX (__IFLA_BRIDGE_CFM_CC_CONFIG_MAX - 1) + +enum { + IFLA_BRIDGE_CFM_CC_PEER_MEP_UNSPEC, + IFLA_BRIDGE_CFM_CC_PEER_MEP_INSTANCE, + IFLA_BRIDGE_CFM_CC_PEER_MEPID, + __IFLA_BRIDGE_CFM_CC_PEER_MEP_MAX, +}; + +#define IFLA_BRIDGE_CFM_CC_PEER_MEP_MAX (__IFLA_BRIDGE_CFM_CC_PEER_MEP_MAX - 1) + +enum { + IFLA_BRIDGE_CFM_CC_RDI_UNSPEC, + IFLA_BRIDGE_CFM_CC_RDI_INSTANCE, + IFLA_BRIDGE_CFM_CC_RDI_RDI, + __IFLA_BRIDGE_CFM_CC_RDI_MAX, +}; + +#define IFLA_BRIDGE_CFM_CC_RDI_MAX (__IFLA_BRIDGE_CFM_CC_RDI_MAX - 1) + +enum { + IFLA_BRIDGE_CFM_CC_CCM_TX_UNSPEC, + IFLA_BRIDGE_CFM_CC_CCM_TX_INSTANCE, + IFLA_BRIDGE_CFM_CC_CCM_TX_DMAC, + IFLA_BRIDGE_CFM_CC_CCM_TX_SEQ_NO_UPDATE, + IFLA_BRIDGE_CFM_CC_CCM_TX_PERIOD, + IFLA_BRIDGE_CFM_CC_CCM_TX_IF_TLV, + IFLA_BRIDGE_CFM_CC_CCM_TX_IF_TLV_VALUE, + IFLA_BRIDGE_CFM_CC_CCM_TX_PORT_TLV, + IFLA_BRIDGE_CFM_CC_CCM_TX_PORT_TLV_VALUE, + __IFLA_BRIDGE_CFM_CC_CCM_TX_MAX, +}; + +#define IFLA_BRIDGE_CFM_CC_CCM_TX_MAX (__IFLA_BRIDGE_CFM_CC_CCM_TX_MAX - 1) + +enum { + IFLA_BRIDGE_CFM_MEP_STATUS_UNSPEC, + IFLA_BRIDGE_CFM_MEP_STATUS_INSTANCE, + IFLA_BRIDGE_CFM_MEP_STATUS_OPCODE_UNEXP_SEEN, + IFLA_BRIDGE_CFM_MEP_STATUS_VERSION_UNEXP_SEEN, + IFLA_BRIDGE_CFM_MEP_STATUS_RX_LEVEL_LOW_SEEN, + __IFLA_BRIDGE_CFM_MEP_STATUS_MAX, +}; + +#define IFLA_BRIDGE_CFM_MEP_STATUS_MAX (__IFLA_BRIDGE_CFM_MEP_STATUS_MAX - 1) + +enum { + IFLA_BRIDGE_CFM_CC_PEER_STATUS_UNSPEC, + IFLA_BRIDGE_CFM_CC_PEER_STATUS_INSTANCE, + IFLA_BRIDGE_CFM_CC_PEER_STATUS_PEER_MEPID, + IFLA_BRIDGE_CFM_CC_PEER_STATUS_CCM_DEFECT, + IFLA_BRIDGE_CFM_CC_PEER_STATUS_RDI, + IFLA_BRIDGE_CFM_CC_PEER_STATUS_PORT_TLV_VALUE, + IFLA_BRIDGE_CFM_CC_PEER_STATUS_IF_TLV_VALUE, + IFLA_BRIDGE_CFM_CC_PEER_STATUS_SEEN, + IFLA_BRIDGE_CFM_CC_PEER_STATUS_TLV_SEEN, + IFLA_BRIDGE_CFM_CC_PEER_STATUS_SEQ_UNEXP_SEEN, + __IFLA_BRIDGE_CFM_CC_PEER_STATUS_MAX, +}; + +#define IFLA_BRIDGE_CFM_CC_PEER_STATUS_MAX (__IFLA_BRIDGE_CFM_CC_PEER_STATUS_MAX - 1) + +enum { + IFLA_BRIDGE_MST_UNSPEC, + IFLA_BRIDGE_MST_ENTRY, + __IFLA_BRIDGE_MST_MAX, +}; +#define IFLA_BRIDGE_MST_MAX (__IFLA_BRIDGE_MST_MAX - 1) + +enum { + IFLA_BRIDGE_MST_ENTRY_UNSPEC, + IFLA_BRIDGE_MST_ENTRY_MSTI, + IFLA_BRIDGE_MST_ENTRY_STATE, + __IFLA_BRIDGE_MST_ENTRY_MAX, +}; +#define IFLA_BRIDGE_MST_ENTRY_MAX (__IFLA_BRIDGE_MST_ENTRY_MAX - 1) + +struct bridge_stp_xstats { + __u64 transition_blk; + __u64 transition_fwd; + __u64 rx_bpdu; + __u64 tx_bpdu; + __u64 rx_tcn; + __u64 tx_tcn; +}; + +/* Bridge vlan RTM header */ +struct br_vlan_msg { + __u8 family; + __u8 reserved1; + __u16 reserved2; + __u32 ifindex; +}; + +enum { + BRIDGE_VLANDB_DUMP_UNSPEC, + BRIDGE_VLANDB_DUMP_FLAGS, + __BRIDGE_VLANDB_DUMP_MAX, +}; +#define BRIDGE_VLANDB_DUMP_MAX (__BRIDGE_VLANDB_DUMP_MAX - 1) + +/* flags used in BRIDGE_VLANDB_DUMP_FLAGS attribute to affect dumps */ +#define BRIDGE_VLANDB_DUMPF_STATS (1 << 0) /* Include stats in the dump */ +#define BRIDGE_VLANDB_DUMPF_GLOBAL (1 << 1) /* Dump global vlan options only */ + +/* Bridge vlan RTM attributes + * [BRIDGE_VLANDB_ENTRY] = { + * [BRIDGE_VLANDB_ENTRY_INFO] + * ... + * } + * [BRIDGE_VLANDB_GLOBAL_OPTIONS] = { + * [BRIDGE_VLANDB_GOPTS_ID] + * ... + * } + */ +enum { + BRIDGE_VLANDB_UNSPEC, + BRIDGE_VLANDB_ENTRY, + BRIDGE_VLANDB_GLOBAL_OPTIONS, + __BRIDGE_VLANDB_MAX, +}; +#define BRIDGE_VLANDB_MAX (__BRIDGE_VLANDB_MAX - 1) + +enum { + BRIDGE_VLANDB_ENTRY_UNSPEC, + BRIDGE_VLANDB_ENTRY_INFO, + BRIDGE_VLANDB_ENTRY_RANGE, + BRIDGE_VLANDB_ENTRY_STATE, + BRIDGE_VLANDB_ENTRY_TUNNEL_INFO, + BRIDGE_VLANDB_ENTRY_STATS, + BRIDGE_VLANDB_ENTRY_MCAST_ROUTER, + __BRIDGE_VLANDB_ENTRY_MAX, +}; +#define BRIDGE_VLANDB_ENTRY_MAX (__BRIDGE_VLANDB_ENTRY_MAX - 1) + +/* [BRIDGE_VLANDB_ENTRY] = { + * [BRIDGE_VLANDB_ENTRY_TUNNEL_INFO] = { + * [BRIDGE_VLANDB_TINFO_ID] + * ... + * } + * } + */ +enum { + BRIDGE_VLANDB_TINFO_UNSPEC, + BRIDGE_VLANDB_TINFO_ID, + BRIDGE_VLANDB_TINFO_CMD, + __BRIDGE_VLANDB_TINFO_MAX, +}; +#define BRIDGE_VLANDB_TINFO_MAX (__BRIDGE_VLANDB_TINFO_MAX - 1) + +/* [BRIDGE_VLANDB_ENTRY] = { + * [BRIDGE_VLANDB_ENTRY_STATS] = { + * [BRIDGE_VLANDB_STATS_RX_BYTES] + * ... + * } + * ... + * } + */ +enum { + BRIDGE_VLANDB_STATS_UNSPEC, + BRIDGE_VLANDB_STATS_RX_BYTES, + BRIDGE_VLANDB_STATS_RX_PACKETS, + BRIDGE_VLANDB_STATS_TX_BYTES, + BRIDGE_VLANDB_STATS_TX_PACKETS, + BRIDGE_VLANDB_STATS_PAD, + __BRIDGE_VLANDB_STATS_MAX, +}; +#define BRIDGE_VLANDB_STATS_MAX (__BRIDGE_VLANDB_STATS_MAX - 1) + +enum { + BRIDGE_VLANDB_GOPTS_UNSPEC, + BRIDGE_VLANDB_GOPTS_ID, + BRIDGE_VLANDB_GOPTS_RANGE, + BRIDGE_VLANDB_GOPTS_MCAST_SNOOPING, + BRIDGE_VLANDB_GOPTS_MCAST_IGMP_VERSION, + BRIDGE_VLANDB_GOPTS_MCAST_MLD_VERSION, + BRIDGE_VLANDB_GOPTS_MCAST_LAST_MEMBER_CNT, + BRIDGE_VLANDB_GOPTS_MCAST_STARTUP_QUERY_CNT, + BRIDGE_VLANDB_GOPTS_MCAST_LAST_MEMBER_INTVL, + BRIDGE_VLANDB_GOPTS_PAD, + BRIDGE_VLANDB_GOPTS_MCAST_MEMBERSHIP_INTVL, + BRIDGE_VLANDB_GOPTS_MCAST_QUERIER_INTVL, + BRIDGE_VLANDB_GOPTS_MCAST_QUERY_INTVL, + BRIDGE_VLANDB_GOPTS_MCAST_QUERY_RESPONSE_INTVL, + BRIDGE_VLANDB_GOPTS_MCAST_STARTUP_QUERY_INTVL, + BRIDGE_VLANDB_GOPTS_MCAST_QUERIER, + BRIDGE_VLANDB_GOPTS_MCAST_ROUTER_PORTS, + BRIDGE_VLANDB_GOPTS_MCAST_QUERIER_STATE, + BRIDGE_VLANDB_GOPTS_MSTI, + __BRIDGE_VLANDB_GOPTS_MAX +}; +#define BRIDGE_VLANDB_GOPTS_MAX (__BRIDGE_VLANDB_GOPTS_MAX - 1) + +/* Bridge multicast database attributes + * [MDBA_MDB] = { + * [MDBA_MDB_ENTRY] = { + * [MDBA_MDB_ENTRY_INFO] { + * struct br_mdb_entry + * [MDBA_MDB_EATTR attributes] + * } + * } + * } + * [MDBA_ROUTER] = { + * [MDBA_ROUTER_PORT] = { + * u32 ifindex + * [MDBA_ROUTER_PATTR attributes] + * } + * } + */ +enum { + MDBA_UNSPEC, + MDBA_MDB, + MDBA_ROUTER, + __MDBA_MAX, +}; +#define MDBA_MAX (__MDBA_MAX - 1) + +enum { + MDBA_MDB_UNSPEC, + MDBA_MDB_ENTRY, + __MDBA_MDB_MAX, +}; +#define MDBA_MDB_MAX (__MDBA_MDB_MAX - 1) + +enum { + MDBA_MDB_ENTRY_UNSPEC, + MDBA_MDB_ENTRY_INFO, + __MDBA_MDB_ENTRY_MAX, +}; +#define MDBA_MDB_ENTRY_MAX (__MDBA_MDB_ENTRY_MAX - 1) + +/* per mdb entry additional attributes */ +enum { + MDBA_MDB_EATTR_UNSPEC, + MDBA_MDB_EATTR_TIMER, + MDBA_MDB_EATTR_SRC_LIST, + MDBA_MDB_EATTR_GROUP_MODE, + MDBA_MDB_EATTR_SOURCE, + MDBA_MDB_EATTR_RTPROT, + __MDBA_MDB_EATTR_MAX +}; +#define MDBA_MDB_EATTR_MAX (__MDBA_MDB_EATTR_MAX - 1) + +/* per mdb entry source */ +enum { + MDBA_MDB_SRCLIST_UNSPEC, + MDBA_MDB_SRCLIST_ENTRY, + __MDBA_MDB_SRCLIST_MAX +}; +#define MDBA_MDB_SRCLIST_MAX (__MDBA_MDB_SRCLIST_MAX - 1) + +/* per mdb entry per source attributes + * these are embedded in MDBA_MDB_SRCLIST_ENTRY + */ +enum { + MDBA_MDB_SRCATTR_UNSPEC, + MDBA_MDB_SRCATTR_ADDRESS, + MDBA_MDB_SRCATTR_TIMER, + __MDBA_MDB_SRCATTR_MAX +}; +#define MDBA_MDB_SRCATTR_MAX (__MDBA_MDB_SRCATTR_MAX - 1) + +/* multicast router types */ +enum { + MDB_RTR_TYPE_DISABLED, + MDB_RTR_TYPE_TEMP_QUERY, + MDB_RTR_TYPE_PERM, + MDB_RTR_TYPE_TEMP +}; + +enum { + MDBA_ROUTER_UNSPEC, + MDBA_ROUTER_PORT, + __MDBA_ROUTER_MAX, +}; +#define MDBA_ROUTER_MAX (__MDBA_ROUTER_MAX - 1) + +/* router port attributes */ +enum { + MDBA_ROUTER_PATTR_UNSPEC, + MDBA_ROUTER_PATTR_TIMER, + MDBA_ROUTER_PATTR_TYPE, + MDBA_ROUTER_PATTR_INET_TIMER, + MDBA_ROUTER_PATTR_INET6_TIMER, + MDBA_ROUTER_PATTR_VID, + __MDBA_ROUTER_PATTR_MAX +}; +#define MDBA_ROUTER_PATTR_MAX (__MDBA_ROUTER_PATTR_MAX - 1) + +struct br_port_msg { + __u8 family; + __u32 ifindex; +}; + +struct br_mdb_entry { + __u32 ifindex; +#define MDB_TEMPORARY 0 +#define MDB_PERMANENT 1 + __u8 state; +#define MDB_FLAGS_OFFLOAD (1 << 0) +#define MDB_FLAGS_FAST_LEAVE (1 << 1) +#define MDB_FLAGS_STAR_EXCL (1 << 2) +#define MDB_FLAGS_BLOCKED (1 << 3) + __u8 flags; + __u16 vid; + struct { + union { + __be32 ip4; + struct in6_addr ip6; + unsigned char mac_addr[ETH_ALEN]; + } u; + __be16 proto; + } addr; +}; + +enum { + MDBA_SET_ENTRY_UNSPEC, + MDBA_SET_ENTRY, + MDBA_SET_ENTRY_ATTRS, + __MDBA_SET_ENTRY_MAX, +}; +#define MDBA_SET_ENTRY_MAX (__MDBA_SET_ENTRY_MAX - 1) + +/* [MDBA_SET_ENTRY_ATTRS] = { + * [MDBE_ATTR_xxx] + * ... + * } + */ +enum { + MDBE_ATTR_UNSPEC, + MDBE_ATTR_SOURCE, + MDBE_ATTR_SRC_LIST, + MDBE_ATTR_GROUP_MODE, + MDBE_ATTR_RTPROT, + __MDBE_ATTR_MAX, +}; +#define MDBE_ATTR_MAX (__MDBE_ATTR_MAX - 1) + +/* per mdb entry source */ +enum { + MDBE_SRC_LIST_UNSPEC, + MDBE_SRC_LIST_ENTRY, + __MDBE_SRC_LIST_MAX, +}; +#define MDBE_SRC_LIST_MAX (__MDBE_SRC_LIST_MAX - 1) + +/* per mdb entry per source attributes + * these are embedded in MDBE_SRC_LIST_ENTRY + */ +enum { + MDBE_SRCATTR_UNSPEC, + MDBE_SRCATTR_ADDRESS, + __MDBE_SRCATTR_MAX, +}; +#define MDBE_SRCATTR_MAX (__MDBE_SRCATTR_MAX - 1) + +/* Embedded inside LINK_XSTATS_TYPE_BRIDGE */ +enum { + BRIDGE_XSTATS_UNSPEC, + BRIDGE_XSTATS_VLAN, + BRIDGE_XSTATS_MCAST, + BRIDGE_XSTATS_PAD, + BRIDGE_XSTATS_STP, + __BRIDGE_XSTATS_MAX +}; +#define BRIDGE_XSTATS_MAX (__BRIDGE_XSTATS_MAX - 1) + +enum { + BR_MCAST_DIR_RX, + BR_MCAST_DIR_TX, + BR_MCAST_DIR_SIZE +}; + +/* IGMP/MLD statistics */ +struct br_mcast_stats { + __u64 igmp_v1queries[BR_MCAST_DIR_SIZE]; + __u64 igmp_v2queries[BR_MCAST_DIR_SIZE]; + __u64 igmp_v3queries[BR_MCAST_DIR_SIZE]; + __u64 igmp_leaves[BR_MCAST_DIR_SIZE]; + __u64 igmp_v1reports[BR_MCAST_DIR_SIZE]; + __u64 igmp_v2reports[BR_MCAST_DIR_SIZE]; + __u64 igmp_v3reports[BR_MCAST_DIR_SIZE]; + __u64 igmp_parse_errors; + + __u64 mld_v1queries[BR_MCAST_DIR_SIZE]; + __u64 mld_v2queries[BR_MCAST_DIR_SIZE]; + __u64 mld_leaves[BR_MCAST_DIR_SIZE]; + __u64 mld_v1reports[BR_MCAST_DIR_SIZE]; + __u64 mld_v2reports[BR_MCAST_DIR_SIZE]; + __u64 mld_parse_errors; + + __u64 mcast_bytes[BR_MCAST_DIR_SIZE]; + __u64 mcast_packets[BR_MCAST_DIR_SIZE]; +}; + +/* bridge boolean options + * BR_BOOLOPT_NO_LL_LEARN - disable learning from link-local packets + * BR_BOOLOPT_MCAST_VLAN_SNOOPING - control vlan multicast snooping + * + * IMPORTANT: if adding a new option do not forget to handle + * it in br_boolopt_toggle/get and bridge sysfs + */ +enum br_boolopt_id { + BR_BOOLOPT_NO_LL_LEARN, + BR_BOOLOPT_MCAST_VLAN_SNOOPING, + BR_BOOLOPT_MST_ENABLE, + BR_BOOLOPT_MAX +}; + +/* struct br_boolopt_multi - change multiple bridge boolean options + * + * @optval: new option values (bit per option) + * @optmask: options to change (bit per option) + */ +struct br_boolopt_multi { + __u32 optval; + __u32 optmask; +}; + +enum { + BRIDGE_QUERIER_UNSPEC, + BRIDGE_QUERIER_IP_ADDRESS, + BRIDGE_QUERIER_IP_PORT, + BRIDGE_QUERIER_IP_OTHER_TIMER, + BRIDGE_QUERIER_PAD, + BRIDGE_QUERIER_IPV6_ADDRESS, + BRIDGE_QUERIER_IPV6_PORT, + BRIDGE_QUERIER_IPV6_OTHER_TIMER, + __BRIDGE_QUERIER_MAX +}; +#define BRIDGE_QUERIER_MAX (__BRIDGE_QUERIER_MAX - 1) +#endif /* _UAPI_LINUX_IF_BRIDGE_H */ diff --git a/src/basic/linux/if_ether.h b/src/basic/linux/if_ether.h new file mode 100644 index 0000000..69e0457 --- /dev/null +++ b/src/basic/linux/if_ether.h @@ -0,0 +1,181 @@ +/* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */ +/* + * INET An implementation of the TCP/IP protocol suite for the LINUX + * operating system. INET is implemented using the BSD Socket + * interface as the means of communication with the user level. + * + * Global definitions for the Ethernet IEEE 802.3 interface. + * + * Version: @(#)if_ether.h 1.0.1a 02/08/94 + * + * Author: Fred N. van Kempen, + * Donald Becker, + * Alan Cox, + * Steve Whitehouse, + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#ifndef _UAPI_LINUX_IF_ETHER_H +#define _UAPI_LINUX_IF_ETHER_H + +#include + +/* + * IEEE 802.3 Ethernet magic constants. The frame sizes omit the preamble + * and FCS/CRC (frame check sequence). + */ + +#define ETH_ALEN 6 /* Octets in one ethernet addr */ +#define ETH_TLEN 2 /* Octets in ethernet type field */ +#define ETH_HLEN 14 /* Total octets in header. */ +#define ETH_ZLEN 60 /* Min. octets in frame sans FCS */ +#define ETH_DATA_LEN 1500 /* Max. octets in payload */ +#define ETH_FRAME_LEN 1514 /* Max. octets in frame sans FCS */ +#define ETH_FCS_LEN 4 /* Octets in the FCS */ + +#define ETH_MIN_MTU 68 /* Min IPv4 MTU per RFC791 */ +#define ETH_MAX_MTU 0xFFFFU /* 65535, same as IP_MAX_MTU */ + +/* + * These are the defined Ethernet Protocol ID's. + */ + +#define ETH_P_LOOP 0x0060 /* Ethernet Loopback packet */ +#define ETH_P_PUP 0x0200 /* Xerox PUP packet */ +#define ETH_P_PUPAT 0x0201 /* Xerox PUP Addr Trans packet */ +#define ETH_P_TSN 0x22F0 /* TSN (IEEE 1722) packet */ +#define ETH_P_ERSPAN2 0x22EB /* ERSPAN version 2 (type III) */ +#define ETH_P_IP 0x0800 /* Internet Protocol packet */ +#define ETH_P_X25 0x0805 /* CCITT X.25 */ +#define ETH_P_ARP 0x0806 /* Address Resolution packet */ +#define ETH_P_BPQ 0x08FF /* G8BPQ AX.25 Ethernet Packet [ NOT AN OFFICIALLY REGISTERED ID ] */ +#define ETH_P_IEEEPUP 0x0a00 /* Xerox IEEE802.3 PUP packet */ +#define ETH_P_IEEEPUPAT 0x0a01 /* Xerox IEEE802.3 PUP Addr Trans packet */ +#define ETH_P_BATMAN 0x4305 /* B.A.T.M.A.N.-Advanced packet [ NOT AN OFFICIALLY REGISTERED ID ] */ +#define ETH_P_DEC 0x6000 /* DEC Assigned proto */ +#define ETH_P_DNA_DL 0x6001 /* DEC DNA Dump/Load */ +#define ETH_P_DNA_RC 0x6002 /* DEC DNA Remote Console */ +#define ETH_P_DNA_RT 0x6003 /* DEC DNA Routing */ +#define ETH_P_LAT 0x6004 /* DEC LAT */ +#define ETH_P_DIAG 0x6005 /* DEC Diagnostics */ +#define ETH_P_CUST 0x6006 /* DEC Customer use */ +#define ETH_P_SCA 0x6007 /* DEC Systems Comms Arch */ +#define ETH_P_TEB 0x6558 /* Trans Ether Bridging */ +#define ETH_P_RARP 0x8035 /* Reverse Addr Res packet */ +#define ETH_P_ATALK 0x809B /* Appletalk DDP */ +#define ETH_P_AARP 0x80F3 /* Appletalk AARP */ +#define ETH_P_8021Q 0x8100 /* 802.1Q VLAN Extended Header */ +#define ETH_P_ERSPAN 0x88BE /* ERSPAN type II */ +#define ETH_P_IPX 0x8137 /* IPX over DIX */ +#define ETH_P_IPV6 0x86DD /* IPv6 over bluebook */ +#define ETH_P_PAUSE 0x8808 /* IEEE Pause frames. See 802.3 31B */ +#define ETH_P_SLOW 0x8809 /* Slow Protocol. See 802.3ad 43B */ +#define ETH_P_WCCP 0x883E /* Web-cache coordination protocol + * defined in draft-wilson-wrec-wccp-v2-00.txt */ +#define ETH_P_MPLS_UC 0x8847 /* MPLS Unicast traffic */ +#define ETH_P_MPLS_MC 0x8848 /* MPLS Multicast traffic */ +#define ETH_P_ATMMPOA 0x884c /* MultiProtocol Over ATM */ +#define ETH_P_PPP_DISC 0x8863 /* PPPoE discovery messages */ +#define ETH_P_PPP_SES 0x8864 /* PPPoE session messages */ +#define ETH_P_LINK_CTL 0x886c /* HPNA, wlan link local tunnel */ +#define ETH_P_ATMFATE 0x8884 /* Frame-based ATM Transport + * over Ethernet + */ +#define ETH_P_PAE 0x888E /* Port Access Entity (IEEE 802.1X) */ +#define ETH_P_PROFINET 0x8892 /* PROFINET */ +#define ETH_P_REALTEK 0x8899 /* Multiple proprietary protocols */ +#define ETH_P_AOE 0x88A2 /* ATA over Ethernet */ +#define ETH_P_ETHERCAT 0x88A4 /* EtherCAT */ +#define ETH_P_8021AD 0x88A8 /* 802.1ad Service VLAN */ +#define ETH_P_802_EX1 0x88B5 /* 802.1 Local Experimental 1. */ +#define ETH_P_PREAUTH 0x88C7 /* 802.11 Preauthentication */ +#define ETH_P_TIPC 0x88CA /* TIPC */ +#define ETH_P_LLDP 0x88CC /* Link Layer Discovery Protocol */ +#define ETH_P_MRP 0x88E3 /* Media Redundancy Protocol */ +#define ETH_P_MACSEC 0x88E5 /* 802.1ae MACsec */ +#define ETH_P_8021AH 0x88E7 /* 802.1ah Backbone Service Tag */ +#define ETH_P_MVRP 0x88F5 /* 802.1Q MVRP */ +#define ETH_P_1588 0x88F7 /* IEEE 1588 Timesync */ +#define ETH_P_NCSI 0x88F8 /* NCSI protocol */ +#define ETH_P_PRP 0x88FB /* IEC 62439-3 PRP/HSRv0 */ +#define ETH_P_CFM 0x8902 /* Connectivity Fault Management */ +#define ETH_P_FCOE 0x8906 /* Fibre Channel over Ethernet */ +#define ETH_P_IBOE 0x8915 /* Infiniband over Ethernet */ +#define ETH_P_TDLS 0x890D /* TDLS */ +#define ETH_P_FIP 0x8914 /* FCoE Initialization Protocol */ +#define ETH_P_80221 0x8917 /* IEEE 802.21 Media Independent Handover Protocol */ +#define ETH_P_HSR 0x892F /* IEC 62439-3 HSRv1 */ +#define ETH_P_NSH 0x894F /* Network Service Header */ +#define ETH_P_LOOPBACK 0x9000 /* Ethernet loopback packet, per IEEE 802.3 */ +#define ETH_P_QINQ1 0x9100 /* deprecated QinQ VLAN [ NOT AN OFFICIALLY REGISTERED ID ] */ +#define ETH_P_QINQ2 0x9200 /* deprecated QinQ VLAN [ NOT AN OFFICIALLY REGISTERED ID ] */ +#define ETH_P_QINQ3 0x9300 /* deprecated QinQ VLAN [ NOT AN OFFICIALLY REGISTERED ID ] */ +#define ETH_P_EDSA 0xDADA /* Ethertype DSA [ NOT AN OFFICIALLY REGISTERED ID ] */ +#define ETH_P_DSA_8021Q 0xDADB /* Fake VLAN Header for DSA [ NOT AN OFFICIALLY REGISTERED ID ] */ +#define ETH_P_DSA_A5PSW 0xE001 /* A5PSW Tag Value [ NOT AN OFFICIALLY REGISTERED ID ] */ +#define ETH_P_IFE 0xED3E /* ForCES inter-FE LFB type */ +#define ETH_P_AF_IUCV 0xFBFB /* IBM af_iucv [ NOT AN OFFICIALLY REGISTERED ID ] */ + +#define ETH_P_802_3_MIN 0x0600 /* If the value in the ethernet type is more than this value + * then the frame is Ethernet II. Else it is 802.3 */ + +/* + * Non DIX types. Won't clash for 1500 types. + */ + +#define ETH_P_802_3 0x0001 /* Dummy type for 802.3 frames */ +#define ETH_P_AX25 0x0002 /* Dummy protocol id for AX.25 */ +#define ETH_P_ALL 0x0003 /* Every packet (be careful!!!) */ +#define ETH_P_802_2 0x0004 /* 802.2 frames */ +#define ETH_P_SNAP 0x0005 /* Internal only */ +#define ETH_P_DDCMP 0x0006 /* DEC DDCMP: Internal only */ +#define ETH_P_WAN_PPP 0x0007 /* Dummy type for WAN PPP frames*/ +#define ETH_P_PPP_MP 0x0008 /* Dummy type for PPP MP frames */ +#define ETH_P_LOCALTALK 0x0009 /* Localtalk pseudo type */ +#define ETH_P_CAN 0x000C /* CAN: Controller Area Network */ +#define ETH_P_CANFD 0x000D /* CANFD: CAN flexible data rate*/ +#define ETH_P_CANXL 0x000E /* CANXL: eXtended frame Length */ +#define ETH_P_PPPTALK 0x0010 /* Dummy type for Atalk over PPP*/ +#define ETH_P_TR_802_2 0x0011 /* 802.2 frames */ +#define ETH_P_MOBITEX 0x0015 /* Mobitex (kaz@cafe.net) */ +#define ETH_P_CONTROL 0x0016 /* Card specific control frames */ +#define ETH_P_IRDA 0x0017 /* Linux-IrDA */ +#define ETH_P_ECONET 0x0018 /* Acorn Econet */ +#define ETH_P_HDLC 0x0019 /* HDLC frames */ +#define ETH_P_ARCNET 0x001A /* 1A for ArcNet :-) */ +#define ETH_P_DSA 0x001B /* Distributed Switch Arch. */ +#define ETH_P_TRAILER 0x001C /* Trailer switch tagging */ +#define ETH_P_PHONET 0x00F5 /* Nokia Phonet frames */ +#define ETH_P_IEEE802154 0x00F6 /* IEEE802.15.4 frame */ +#define ETH_P_CAIF 0x00F7 /* ST-Ericsson CAIF protocol */ +#define ETH_P_XDSA 0x00F8 /* Multiplexed DSA protocol */ +#define ETH_P_MAP 0x00F9 /* Qualcomm multiplexing and + * aggregation protocol + */ +#define ETH_P_MCTP 0x00FA /* Management component transport + * protocol packets + */ + +/* + * This is an Ethernet frame header. + */ + +/* allow libcs like musl to deactivate this, glibc does not implement this. */ +#ifndef __UAPI_DEF_ETHHDR +#define __UAPI_DEF_ETHHDR 1 +#endif + +#if __UAPI_DEF_ETHHDR +struct ethhdr { + unsigned char h_dest[ETH_ALEN]; /* destination eth addr */ + unsigned char h_source[ETH_ALEN]; /* source ether addr */ + __be16 h_proto; /* packet type ID field */ +} __attribute__((packed)); +#endif + + +#endif /* _UAPI_LINUX_IF_ETHER_H */ diff --git a/src/basic/linux/if_link.h b/src/basic/linux/if_link.h new file mode 100644 index 0000000..1021a7e --- /dev/null +++ b/src/basic/linux/if_link.h @@ -0,0 +1,1392 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +#ifndef _UAPI_LINUX_IF_LINK_H +#define _UAPI_LINUX_IF_LINK_H + +#include +#include + +/* This struct should be in sync with struct rtnl_link_stats64 */ +struct rtnl_link_stats { + __u32 rx_packets; + __u32 tx_packets; + __u32 rx_bytes; + __u32 tx_bytes; + __u32 rx_errors; + __u32 tx_errors; + __u32 rx_dropped; + __u32 tx_dropped; + __u32 multicast; + __u32 collisions; + /* detailed rx_errors: */ + __u32 rx_length_errors; + __u32 rx_over_errors; + __u32 rx_crc_errors; + __u32 rx_frame_errors; + __u32 rx_fifo_errors; + __u32 rx_missed_errors; + + /* detailed tx_errors */ + __u32 tx_aborted_errors; + __u32 tx_carrier_errors; + __u32 tx_fifo_errors; + __u32 tx_heartbeat_errors; + __u32 tx_window_errors; + + /* for cslip etc */ + __u32 rx_compressed; + __u32 tx_compressed; + + __u32 rx_nohandler; +}; + +/** + * struct rtnl_link_stats64 - The main device statistics structure. + * + * @rx_packets: Number of good packets received by the interface. + * For hardware interfaces counts all good packets received from the device + * by the host, including packets which host had to drop at various stages + * of processing (even in the driver). + * + * @tx_packets: Number of packets successfully transmitted. + * For hardware interfaces counts packets which host was able to successfully + * hand over to the device, which does not necessarily mean that packets + * had been successfully transmitted out of the device, only that device + * acknowledged it copied them out of host memory. + * + * @rx_bytes: Number of good received bytes, corresponding to @rx_packets. + * + * For IEEE 802.3 devices should count the length of Ethernet Frames + * excluding the FCS. + * + * @tx_bytes: Number of good transmitted bytes, corresponding to @tx_packets. + * + * For IEEE 802.3 devices should count the length of Ethernet Frames + * excluding the FCS. + * + * @rx_errors: Total number of bad packets received on this network device. + * This counter must include events counted by @rx_length_errors, + * @rx_crc_errors, @rx_frame_errors and other errors not otherwise + * counted. + * + * @tx_errors: Total number of transmit problems. + * This counter must include events counter by @tx_aborted_errors, + * @tx_carrier_errors, @tx_fifo_errors, @tx_heartbeat_errors, + * @tx_window_errors and other errors not otherwise counted. + * + * @rx_dropped: Number of packets received but not processed, + * e.g. due to lack of resources or unsupported protocol. + * For hardware interfaces this counter may include packets discarded + * due to L2 address filtering but should not include packets dropped + * by the device due to buffer exhaustion which are counted separately in + * @rx_missed_errors (since procfs folds those two counters together). + * + * @tx_dropped: Number of packets dropped on their way to transmission, + * e.g. due to lack of resources. + * + * @multicast: Multicast packets received. + * For hardware interfaces this statistic is commonly calculated + * at the device level (unlike @rx_packets) and therefore may include + * packets which did not reach the host. + * + * For IEEE 802.3 devices this counter may be equivalent to: + * + * - 30.3.1.1.21 aMulticastFramesReceivedOK + * + * @collisions: Number of collisions during packet transmissions. + * + * @rx_length_errors: Number of packets dropped due to invalid length. + * Part of aggregate "frame" errors in `/proc/net/dev`. + * + * For IEEE 802.3 devices this counter should be equivalent to a sum + * of the following attributes: + * + * - 30.3.1.1.23 aInRangeLengthErrors + * - 30.3.1.1.24 aOutOfRangeLengthField + * - 30.3.1.1.25 aFrameTooLongErrors + * + * @rx_over_errors: Receiver FIFO overflow event counter. + * + * Historically the count of overflow events. Such events may be + * reported in the receive descriptors or via interrupts, and may + * not correspond one-to-one with dropped packets. + * + * The recommended interpretation for high speed interfaces is - + * number of packets dropped because they did not fit into buffers + * provided by the host, e.g. packets larger than MTU or next buffer + * in the ring was not available for a scatter transfer. + * + * Part of aggregate "frame" errors in `/proc/net/dev`. + * + * This statistics was historically used interchangeably with + * @rx_fifo_errors. + * + * This statistic corresponds to hardware events and is not commonly used + * on software devices. + * + * @rx_crc_errors: Number of packets received with a CRC error. + * Part of aggregate "frame" errors in `/proc/net/dev`. + * + * For IEEE 802.3 devices this counter must be equivalent to: + * + * - 30.3.1.1.6 aFrameCheckSequenceErrors + * + * @rx_frame_errors: Receiver frame alignment errors. + * Part of aggregate "frame" errors in `/proc/net/dev`. + * + * For IEEE 802.3 devices this counter should be equivalent to: + * + * - 30.3.1.1.7 aAlignmentErrors + * + * @rx_fifo_errors: Receiver FIFO error counter. + * + * Historically the count of overflow events. Those events may be + * reported in the receive descriptors or via interrupts, and may + * not correspond one-to-one with dropped packets. + * + * This statistics was used interchangeably with @rx_over_errors. + * Not recommended for use in drivers for high speed interfaces. + * + * This statistic is used on software devices, e.g. to count software + * packet queue overflow (can) or sequencing errors (GRE). + * + * @rx_missed_errors: Count of packets missed by the host. + * Folded into the "drop" counter in `/proc/net/dev`. + * + * Counts number of packets dropped by the device due to lack + * of buffer space. This usually indicates that the host interface + * is slower than the network interface, or host is not keeping up + * with the receive packet rate. + * + * This statistic corresponds to hardware events and is not used + * on software devices. + * + * @tx_aborted_errors: + * Part of aggregate "carrier" errors in `/proc/net/dev`. + * For IEEE 802.3 devices capable of half-duplex operation this counter + * must be equivalent to: + * + * - 30.3.1.1.11 aFramesAbortedDueToXSColls + * + * High speed interfaces may use this counter as a general device + * discard counter. + * + * @tx_carrier_errors: Number of frame transmission errors due to loss + * of carrier during transmission. + * Part of aggregate "carrier" errors in `/proc/net/dev`. + * + * For IEEE 802.3 devices this counter must be equivalent to: + * + * - 30.3.1.1.13 aCarrierSenseErrors + * + * @tx_fifo_errors: Number of frame transmission errors due to device + * FIFO underrun / underflow. This condition occurs when the device + * begins transmission of a frame but is unable to deliver the + * entire frame to the transmitter in time for transmission. + * Part of aggregate "carrier" errors in `/proc/net/dev`. + * + * @tx_heartbeat_errors: Number of Heartbeat / SQE Test errors for + * old half-duplex Ethernet. + * Part of aggregate "carrier" errors in `/proc/net/dev`. + * + * For IEEE 802.3 devices possibly equivalent to: + * + * - 30.3.2.1.4 aSQETestErrors + * + * @tx_window_errors: Number of frame transmission errors due + * to late collisions (for Ethernet - after the first 64B of transmission). + * Part of aggregate "carrier" errors in `/proc/net/dev`. + * + * For IEEE 802.3 devices this counter must be equivalent to: + * + * - 30.3.1.1.10 aLateCollisions + * + * @rx_compressed: Number of correctly received compressed packets. + * This counters is only meaningful for interfaces which support + * packet compression (e.g. CSLIP, PPP). + * + * @tx_compressed: Number of transmitted compressed packets. + * This counters is only meaningful for interfaces which support + * packet compression (e.g. CSLIP, PPP). + * + * @rx_nohandler: Number of packets received on the interface + * but dropped by the networking stack because the device is + * not designated to receive packets (e.g. backup link in a bond). + * + * @rx_otherhost_dropped: Number of packets dropped due to mismatch + * in destination MAC address. + */ +struct rtnl_link_stats64 { + __u64 rx_packets; + __u64 tx_packets; + __u64 rx_bytes; + __u64 tx_bytes; + __u64 rx_errors; + __u64 tx_errors; + __u64 rx_dropped; + __u64 tx_dropped; + __u64 multicast; + __u64 collisions; + + /* detailed rx_errors: */ + __u64 rx_length_errors; + __u64 rx_over_errors; + __u64 rx_crc_errors; + __u64 rx_frame_errors; + __u64 rx_fifo_errors; + __u64 rx_missed_errors; + + /* detailed tx_errors */ + __u64 tx_aborted_errors; + __u64 tx_carrier_errors; + __u64 tx_fifo_errors; + __u64 tx_heartbeat_errors; + __u64 tx_window_errors; + + /* for cslip etc */ + __u64 rx_compressed; + __u64 tx_compressed; + __u64 rx_nohandler; + + __u64 rx_otherhost_dropped; +}; + +/* Subset of link stats useful for in-HW collection. Meaning of the fields is as + * for struct rtnl_link_stats64. + */ +struct rtnl_hw_stats64 { + __u64 rx_packets; + __u64 tx_packets; + __u64 rx_bytes; + __u64 tx_bytes; + __u64 rx_errors; + __u64 tx_errors; + __u64 rx_dropped; + __u64 tx_dropped; + __u64 multicast; +}; + +/* The struct should be in sync with struct ifmap */ +struct rtnl_link_ifmap { + __u64 mem_start; + __u64 mem_end; + __u64 base_addr; + __u16 irq; + __u8 dma; + __u8 port; +}; + +/* + * IFLA_AF_SPEC + * Contains nested attributes for address family specific attributes. + * Each address family may create a attribute with the address family + * number as type and create its own attribute structure in it. + * + * Example: + * [IFLA_AF_SPEC] = { + * [AF_INET] = { + * [IFLA_INET_CONF] = ..., + * }, + * [AF_INET6] = { + * [IFLA_INET6_FLAGS] = ..., + * [IFLA_INET6_CONF] = ..., + * } + * } + */ + +enum { + IFLA_UNSPEC, + IFLA_ADDRESS, + IFLA_BROADCAST, + IFLA_IFNAME, + IFLA_MTU, + IFLA_LINK, + IFLA_QDISC, + IFLA_STATS, + IFLA_COST, +#define IFLA_COST IFLA_COST + IFLA_PRIORITY, +#define IFLA_PRIORITY IFLA_PRIORITY + IFLA_MASTER, +#define IFLA_MASTER IFLA_MASTER + IFLA_WIRELESS, /* Wireless Extension event - see wireless.h */ +#define IFLA_WIRELESS IFLA_WIRELESS + IFLA_PROTINFO, /* Protocol specific information for a link */ +#define IFLA_PROTINFO IFLA_PROTINFO + IFLA_TXQLEN, +#define IFLA_TXQLEN IFLA_TXQLEN + IFLA_MAP, +#define IFLA_MAP IFLA_MAP + IFLA_WEIGHT, +#define IFLA_WEIGHT IFLA_WEIGHT + IFLA_OPERSTATE, + IFLA_LINKMODE, + IFLA_LINKINFO, +#define IFLA_LINKINFO IFLA_LINKINFO + IFLA_NET_NS_PID, + IFLA_IFALIAS, + IFLA_NUM_VF, /* Number of VFs if device is SR-IOV PF */ + IFLA_VFINFO_LIST, + IFLA_STATS64, + IFLA_VF_PORTS, + IFLA_PORT_SELF, + IFLA_AF_SPEC, + IFLA_GROUP, /* Group the device belongs to */ + IFLA_NET_NS_FD, + IFLA_EXT_MASK, /* Extended info mask, VFs, etc */ + IFLA_PROMISCUITY, /* Promiscuity count: > 0 means acts PROMISC */ +#define IFLA_PROMISCUITY IFLA_PROMISCUITY + IFLA_NUM_TX_QUEUES, + IFLA_NUM_RX_QUEUES, + IFLA_CARRIER, + IFLA_PHYS_PORT_ID, + IFLA_CARRIER_CHANGES, + IFLA_PHYS_SWITCH_ID, + IFLA_LINK_NETNSID, + IFLA_PHYS_PORT_NAME, + IFLA_PROTO_DOWN, + IFLA_GSO_MAX_SEGS, + IFLA_GSO_MAX_SIZE, + IFLA_PAD, + IFLA_XDP, + IFLA_EVENT, + IFLA_NEW_NETNSID, + IFLA_IF_NETNSID, + IFLA_TARGET_NETNSID = IFLA_IF_NETNSID, /* new alias */ + IFLA_CARRIER_UP_COUNT, + IFLA_CARRIER_DOWN_COUNT, + IFLA_NEW_IFINDEX, + IFLA_MIN_MTU, + IFLA_MAX_MTU, + IFLA_PROP_LIST, + IFLA_ALT_IFNAME, /* Alternative ifname */ + IFLA_PERM_ADDRESS, + IFLA_PROTO_DOWN_REASON, + + /* device (sysfs) name as parent, used instead + * of IFLA_LINK where there's no parent netdev + */ + IFLA_PARENT_DEV_NAME, + IFLA_PARENT_DEV_BUS_NAME, + IFLA_GRO_MAX_SIZE, + IFLA_TSO_MAX_SIZE, + IFLA_TSO_MAX_SEGS, + IFLA_ALLMULTI, /* Allmulti count: > 0 means acts ALLMULTI */ + + IFLA_DEVLINK_PORT, + + __IFLA_MAX +}; + + +#define IFLA_MAX (__IFLA_MAX - 1) + +enum { + IFLA_PROTO_DOWN_REASON_UNSPEC, + IFLA_PROTO_DOWN_REASON_MASK, /* u32, mask for reason bits */ + IFLA_PROTO_DOWN_REASON_VALUE, /* u32, reason bit value */ + + __IFLA_PROTO_DOWN_REASON_CNT, + IFLA_PROTO_DOWN_REASON_MAX = __IFLA_PROTO_DOWN_REASON_CNT - 1 +}; + +/* backwards compatibility for userspace */ +#ifndef __KERNEL__ +#define IFLA_RTA(r) ((struct rtattr*)(((char*)(r)) + NLMSG_ALIGN(sizeof(struct ifinfomsg)))) +#define IFLA_PAYLOAD(n) NLMSG_PAYLOAD(n,sizeof(struct ifinfomsg)) +#endif + +enum { + IFLA_INET_UNSPEC, + IFLA_INET_CONF, + __IFLA_INET_MAX, +}; + +#define IFLA_INET_MAX (__IFLA_INET_MAX - 1) + +/* ifi_flags. + + IFF_* flags. + + The only change is: + IFF_LOOPBACK, IFF_BROADCAST and IFF_POINTOPOINT are + more not changeable by user. They describe link media + characteristics and set by device driver. + + Comments: + - Combination IFF_BROADCAST|IFF_POINTOPOINT is invalid + - If neither of these three flags are set; + the interface is NBMA. + + - IFF_MULTICAST does not mean anything special: + multicasts can be used on all not-NBMA links. + IFF_MULTICAST means that this media uses special encapsulation + for multicast frames. Apparently, all IFF_POINTOPOINT and + IFF_BROADCAST devices are able to use multicasts too. + */ + +/* IFLA_LINK. + For usual devices it is equal ifi_index. + If it is a "virtual interface" (f.e. tunnel), ifi_link + can point to real physical interface (f.e. for bandwidth calculations), + or maybe 0, what means, that real media is unknown (usual + for IPIP tunnels, when route to endpoint is allowed to change) + */ + +/* Subtype attributes for IFLA_PROTINFO */ +enum { + IFLA_INET6_UNSPEC, + IFLA_INET6_FLAGS, /* link flags */ + IFLA_INET6_CONF, /* sysctl parameters */ + IFLA_INET6_STATS, /* statistics */ + IFLA_INET6_MCAST, /* MC things. What of them? */ + IFLA_INET6_CACHEINFO, /* time values and max reasm size */ + IFLA_INET6_ICMP6STATS, /* statistics (icmpv6) */ + IFLA_INET6_TOKEN, /* device token */ + IFLA_INET6_ADDR_GEN_MODE, /* implicit address generator mode */ + IFLA_INET6_RA_MTU, /* mtu carried in the RA message */ + __IFLA_INET6_MAX +}; + +#define IFLA_INET6_MAX (__IFLA_INET6_MAX - 1) + +enum in6_addr_gen_mode { + IN6_ADDR_GEN_MODE_EUI64, + IN6_ADDR_GEN_MODE_NONE, + IN6_ADDR_GEN_MODE_STABLE_PRIVACY, + IN6_ADDR_GEN_MODE_RANDOM, +}; + +/* Bridge section */ + +enum { + IFLA_BR_UNSPEC, + IFLA_BR_FORWARD_DELAY, + IFLA_BR_HELLO_TIME, + IFLA_BR_MAX_AGE, + IFLA_BR_AGEING_TIME, + IFLA_BR_STP_STATE, + IFLA_BR_PRIORITY, + IFLA_BR_VLAN_FILTERING, + IFLA_BR_VLAN_PROTOCOL, + IFLA_BR_GROUP_FWD_MASK, + IFLA_BR_ROOT_ID, + IFLA_BR_BRIDGE_ID, + IFLA_BR_ROOT_PORT, + IFLA_BR_ROOT_PATH_COST, + IFLA_BR_TOPOLOGY_CHANGE, + IFLA_BR_TOPOLOGY_CHANGE_DETECTED, + IFLA_BR_HELLO_TIMER, + IFLA_BR_TCN_TIMER, + IFLA_BR_TOPOLOGY_CHANGE_TIMER, + IFLA_BR_GC_TIMER, + IFLA_BR_GROUP_ADDR, + IFLA_BR_FDB_FLUSH, + IFLA_BR_MCAST_ROUTER, + IFLA_BR_MCAST_SNOOPING, + IFLA_BR_MCAST_QUERY_USE_IFADDR, + IFLA_BR_MCAST_QUERIER, + IFLA_BR_MCAST_HASH_ELASTICITY, + IFLA_BR_MCAST_HASH_MAX, + IFLA_BR_MCAST_LAST_MEMBER_CNT, + IFLA_BR_MCAST_STARTUP_QUERY_CNT, + IFLA_BR_MCAST_LAST_MEMBER_INTVL, + IFLA_BR_MCAST_MEMBERSHIP_INTVL, + IFLA_BR_MCAST_QUERIER_INTVL, + IFLA_BR_MCAST_QUERY_INTVL, + IFLA_BR_MCAST_QUERY_RESPONSE_INTVL, + IFLA_BR_MCAST_STARTUP_QUERY_INTVL, + IFLA_BR_NF_CALL_IPTABLES, + IFLA_BR_NF_CALL_IP6TABLES, + IFLA_BR_NF_CALL_ARPTABLES, + IFLA_BR_VLAN_DEFAULT_PVID, + IFLA_BR_PAD, + IFLA_BR_VLAN_STATS_ENABLED, + IFLA_BR_MCAST_STATS_ENABLED, + IFLA_BR_MCAST_IGMP_VERSION, + IFLA_BR_MCAST_MLD_VERSION, + IFLA_BR_VLAN_STATS_PER_PORT, + IFLA_BR_MULTI_BOOLOPT, + IFLA_BR_MCAST_QUERIER_STATE, + __IFLA_BR_MAX, +}; + +#define IFLA_BR_MAX (__IFLA_BR_MAX - 1) + +struct ifla_bridge_id { + __u8 prio[2]; + __u8 addr[6]; /* ETH_ALEN */ +}; + +enum { + BRIDGE_MODE_UNSPEC, + BRIDGE_MODE_HAIRPIN, +}; + +enum { + IFLA_BRPORT_UNSPEC, + IFLA_BRPORT_STATE, /* Spanning tree state */ + IFLA_BRPORT_PRIORITY, /* " priority */ + IFLA_BRPORT_COST, /* " cost */ + IFLA_BRPORT_MODE, /* mode (hairpin) */ + IFLA_BRPORT_GUARD, /* bpdu guard */ + IFLA_BRPORT_PROTECT, /* root port protection */ + IFLA_BRPORT_FAST_LEAVE, /* multicast fast leave */ + IFLA_BRPORT_LEARNING, /* mac learning */ + IFLA_BRPORT_UNICAST_FLOOD, /* flood unicast traffic */ + IFLA_BRPORT_PROXYARP, /* proxy ARP */ + IFLA_BRPORT_LEARNING_SYNC, /* mac learning sync from device */ + IFLA_BRPORT_PROXYARP_WIFI, /* proxy ARP for Wi-Fi */ + IFLA_BRPORT_ROOT_ID, /* designated root */ + IFLA_BRPORT_BRIDGE_ID, /* designated bridge */ + IFLA_BRPORT_DESIGNATED_PORT, + IFLA_BRPORT_DESIGNATED_COST, + IFLA_BRPORT_ID, + IFLA_BRPORT_NO, + IFLA_BRPORT_TOPOLOGY_CHANGE_ACK, + IFLA_BRPORT_CONFIG_PENDING, + IFLA_BRPORT_MESSAGE_AGE_TIMER, + IFLA_BRPORT_FORWARD_DELAY_TIMER, + IFLA_BRPORT_HOLD_TIMER, + IFLA_BRPORT_FLUSH, + IFLA_BRPORT_MULTICAST_ROUTER, + IFLA_BRPORT_PAD, + IFLA_BRPORT_MCAST_FLOOD, + IFLA_BRPORT_MCAST_TO_UCAST, + IFLA_BRPORT_VLAN_TUNNEL, + IFLA_BRPORT_BCAST_FLOOD, + IFLA_BRPORT_GROUP_FWD_MASK, + IFLA_BRPORT_NEIGH_SUPPRESS, + IFLA_BRPORT_ISOLATED, + IFLA_BRPORT_BACKUP_PORT, + IFLA_BRPORT_MRP_RING_OPEN, + IFLA_BRPORT_MRP_IN_OPEN, + IFLA_BRPORT_MCAST_EHT_HOSTS_LIMIT, + IFLA_BRPORT_MCAST_EHT_HOSTS_CNT, + IFLA_BRPORT_LOCKED, + IFLA_BRPORT_MAB, + __IFLA_BRPORT_MAX +}; +#define IFLA_BRPORT_MAX (__IFLA_BRPORT_MAX - 1) + +struct ifla_cacheinfo { + __u32 max_reasm_len; + __u32 tstamp; /* ipv6InterfaceTable updated timestamp */ + __u32 reachable_time; + __u32 retrans_time; +}; + +enum { + IFLA_INFO_UNSPEC, + IFLA_INFO_KIND, + IFLA_INFO_DATA, + IFLA_INFO_XSTATS, + IFLA_INFO_SLAVE_KIND, + IFLA_INFO_SLAVE_DATA, + __IFLA_INFO_MAX, +}; + +#define IFLA_INFO_MAX (__IFLA_INFO_MAX - 1) + +/* VLAN section */ + +enum { + IFLA_VLAN_UNSPEC, + IFLA_VLAN_ID, + IFLA_VLAN_FLAGS, + IFLA_VLAN_EGRESS_QOS, + IFLA_VLAN_INGRESS_QOS, + IFLA_VLAN_PROTOCOL, + __IFLA_VLAN_MAX, +}; + +#define IFLA_VLAN_MAX (__IFLA_VLAN_MAX - 1) + +struct ifla_vlan_flags { + __u32 flags; + __u32 mask; +}; + +enum { + IFLA_VLAN_QOS_UNSPEC, + IFLA_VLAN_QOS_MAPPING, + __IFLA_VLAN_QOS_MAX +}; + +#define IFLA_VLAN_QOS_MAX (__IFLA_VLAN_QOS_MAX - 1) + +struct ifla_vlan_qos_mapping { + __u32 from; + __u32 to; +}; + +/* MACVLAN section */ +enum { + IFLA_MACVLAN_UNSPEC, + IFLA_MACVLAN_MODE, + IFLA_MACVLAN_FLAGS, + IFLA_MACVLAN_MACADDR_MODE, + IFLA_MACVLAN_MACADDR, + IFLA_MACVLAN_MACADDR_DATA, + IFLA_MACVLAN_MACADDR_COUNT, + IFLA_MACVLAN_BC_QUEUE_LEN, + IFLA_MACVLAN_BC_QUEUE_LEN_USED, + __IFLA_MACVLAN_MAX, +}; + +#define IFLA_MACVLAN_MAX (__IFLA_MACVLAN_MAX - 1) + +enum macvlan_mode { + MACVLAN_MODE_PRIVATE = 1, /* don't talk to other macvlans */ + MACVLAN_MODE_VEPA = 2, /* talk to other ports through ext bridge */ + MACVLAN_MODE_BRIDGE = 4, /* talk to bridge ports directly */ + MACVLAN_MODE_PASSTHRU = 8,/* take over the underlying device */ + MACVLAN_MODE_SOURCE = 16,/* use source MAC address list to assign */ +}; + +enum macvlan_macaddr_mode { + MACVLAN_MACADDR_ADD, + MACVLAN_MACADDR_DEL, + MACVLAN_MACADDR_FLUSH, + MACVLAN_MACADDR_SET, +}; + +#define MACVLAN_FLAG_NOPROMISC 1 +#define MACVLAN_FLAG_NODST 2 /* skip dst macvlan if matching src macvlan */ + +/* VRF section */ +enum { + IFLA_VRF_UNSPEC, + IFLA_VRF_TABLE, + __IFLA_VRF_MAX +}; + +#define IFLA_VRF_MAX (__IFLA_VRF_MAX - 1) + +enum { + IFLA_VRF_PORT_UNSPEC, + IFLA_VRF_PORT_TABLE, + __IFLA_VRF_PORT_MAX +}; + +#define IFLA_VRF_PORT_MAX (__IFLA_VRF_PORT_MAX - 1) + +/* MACSEC section */ +enum { + IFLA_MACSEC_UNSPEC, + IFLA_MACSEC_SCI, + IFLA_MACSEC_PORT, + IFLA_MACSEC_ICV_LEN, + IFLA_MACSEC_CIPHER_SUITE, + IFLA_MACSEC_WINDOW, + IFLA_MACSEC_ENCODING_SA, + IFLA_MACSEC_ENCRYPT, + IFLA_MACSEC_PROTECT, + IFLA_MACSEC_INC_SCI, + IFLA_MACSEC_ES, + IFLA_MACSEC_SCB, + IFLA_MACSEC_REPLAY_PROTECT, + IFLA_MACSEC_VALIDATION, + IFLA_MACSEC_PAD, + IFLA_MACSEC_OFFLOAD, + __IFLA_MACSEC_MAX, +}; + +#define IFLA_MACSEC_MAX (__IFLA_MACSEC_MAX - 1) + +/* XFRM section */ +enum { + IFLA_XFRM_UNSPEC, + IFLA_XFRM_LINK, + IFLA_XFRM_IF_ID, + IFLA_XFRM_COLLECT_METADATA, + __IFLA_XFRM_MAX +}; + +#define IFLA_XFRM_MAX (__IFLA_XFRM_MAX - 1) + +enum macsec_validation_type { + MACSEC_VALIDATE_DISABLED = 0, + MACSEC_VALIDATE_CHECK = 1, + MACSEC_VALIDATE_STRICT = 2, + __MACSEC_VALIDATE_END, + MACSEC_VALIDATE_MAX = __MACSEC_VALIDATE_END - 1, +}; + +enum macsec_offload { + MACSEC_OFFLOAD_OFF = 0, + MACSEC_OFFLOAD_PHY = 1, + MACSEC_OFFLOAD_MAC = 2, + __MACSEC_OFFLOAD_END, + MACSEC_OFFLOAD_MAX = __MACSEC_OFFLOAD_END - 1, +}; + +/* IPVLAN section */ +enum { + IFLA_IPVLAN_UNSPEC, + IFLA_IPVLAN_MODE, + IFLA_IPVLAN_FLAGS, + __IFLA_IPVLAN_MAX +}; + +#define IFLA_IPVLAN_MAX (__IFLA_IPVLAN_MAX - 1) + +enum ipvlan_mode { + IPVLAN_MODE_L2 = 0, + IPVLAN_MODE_L3, + IPVLAN_MODE_L3S, + IPVLAN_MODE_MAX +}; + +#define IPVLAN_F_PRIVATE 0x01 +#define IPVLAN_F_VEPA 0x02 + +/* Tunnel RTM header */ +struct tunnel_msg { + __u8 family; + __u8 flags; + __u16 reserved2; + __u32 ifindex; +}; + +/* VXLAN section */ + +/* include statistics in the dump */ +#define TUNNEL_MSG_FLAG_STATS 0x01 + +#define TUNNEL_MSG_VALID_USER_FLAGS TUNNEL_MSG_FLAG_STATS + +/* Embedded inside VXLAN_VNIFILTER_ENTRY_STATS */ +enum { + VNIFILTER_ENTRY_STATS_UNSPEC, + VNIFILTER_ENTRY_STATS_RX_BYTES, + VNIFILTER_ENTRY_STATS_RX_PKTS, + VNIFILTER_ENTRY_STATS_RX_DROPS, + VNIFILTER_ENTRY_STATS_RX_ERRORS, + VNIFILTER_ENTRY_STATS_TX_BYTES, + VNIFILTER_ENTRY_STATS_TX_PKTS, + VNIFILTER_ENTRY_STATS_TX_DROPS, + VNIFILTER_ENTRY_STATS_TX_ERRORS, + VNIFILTER_ENTRY_STATS_PAD, + __VNIFILTER_ENTRY_STATS_MAX +}; +#define VNIFILTER_ENTRY_STATS_MAX (__VNIFILTER_ENTRY_STATS_MAX - 1) + +enum { + VXLAN_VNIFILTER_ENTRY_UNSPEC, + VXLAN_VNIFILTER_ENTRY_START, + VXLAN_VNIFILTER_ENTRY_END, + VXLAN_VNIFILTER_ENTRY_GROUP, + VXLAN_VNIFILTER_ENTRY_GROUP6, + VXLAN_VNIFILTER_ENTRY_STATS, + __VXLAN_VNIFILTER_ENTRY_MAX +}; +#define VXLAN_VNIFILTER_ENTRY_MAX (__VXLAN_VNIFILTER_ENTRY_MAX - 1) + +enum { + VXLAN_VNIFILTER_UNSPEC, + VXLAN_VNIFILTER_ENTRY, + __VXLAN_VNIFILTER_MAX +}; +#define VXLAN_VNIFILTER_MAX (__VXLAN_VNIFILTER_MAX - 1) + +enum { + IFLA_VXLAN_UNSPEC, + IFLA_VXLAN_ID, + IFLA_VXLAN_GROUP, /* group or remote address */ + IFLA_VXLAN_LINK, + IFLA_VXLAN_LOCAL, + IFLA_VXLAN_TTL, + IFLA_VXLAN_TOS, + IFLA_VXLAN_LEARNING, + IFLA_VXLAN_AGEING, + IFLA_VXLAN_LIMIT, + IFLA_VXLAN_PORT_RANGE, /* source port */ + IFLA_VXLAN_PROXY, + IFLA_VXLAN_RSC, + IFLA_VXLAN_L2MISS, + IFLA_VXLAN_L3MISS, + IFLA_VXLAN_PORT, /* destination port */ + IFLA_VXLAN_GROUP6, + IFLA_VXLAN_LOCAL6, + IFLA_VXLAN_UDP_CSUM, + IFLA_VXLAN_UDP_ZERO_CSUM6_TX, + IFLA_VXLAN_UDP_ZERO_CSUM6_RX, + IFLA_VXLAN_REMCSUM_TX, + IFLA_VXLAN_REMCSUM_RX, + IFLA_VXLAN_GBP, + IFLA_VXLAN_REMCSUM_NOPARTIAL, + IFLA_VXLAN_COLLECT_METADATA, + IFLA_VXLAN_LABEL, + IFLA_VXLAN_GPE, + IFLA_VXLAN_TTL_INHERIT, + IFLA_VXLAN_DF, + IFLA_VXLAN_VNIFILTER, /* only applicable with COLLECT_METADATA mode */ + __IFLA_VXLAN_MAX +}; +#define IFLA_VXLAN_MAX (__IFLA_VXLAN_MAX - 1) + +struct ifla_vxlan_port_range { + __be16 low; + __be16 high; +}; + +enum ifla_vxlan_df { + VXLAN_DF_UNSET = 0, + VXLAN_DF_SET, + VXLAN_DF_INHERIT, + __VXLAN_DF_END, + VXLAN_DF_MAX = __VXLAN_DF_END - 1, +}; + +/* GENEVE section */ +enum { + IFLA_GENEVE_UNSPEC, + IFLA_GENEVE_ID, + IFLA_GENEVE_REMOTE, + IFLA_GENEVE_TTL, + IFLA_GENEVE_TOS, + IFLA_GENEVE_PORT, /* destination port */ + IFLA_GENEVE_COLLECT_METADATA, + IFLA_GENEVE_REMOTE6, + IFLA_GENEVE_UDP_CSUM, + IFLA_GENEVE_UDP_ZERO_CSUM6_TX, + IFLA_GENEVE_UDP_ZERO_CSUM6_RX, + IFLA_GENEVE_LABEL, + IFLA_GENEVE_TTL_INHERIT, + IFLA_GENEVE_DF, + IFLA_GENEVE_INNER_PROTO_INHERIT, + __IFLA_GENEVE_MAX +}; +#define IFLA_GENEVE_MAX (__IFLA_GENEVE_MAX - 1) + +enum ifla_geneve_df { + GENEVE_DF_UNSET = 0, + GENEVE_DF_SET, + GENEVE_DF_INHERIT, + __GENEVE_DF_END, + GENEVE_DF_MAX = __GENEVE_DF_END - 1, +}; + +/* Bareudp section */ +enum { + IFLA_BAREUDP_UNSPEC, + IFLA_BAREUDP_PORT, + IFLA_BAREUDP_ETHERTYPE, + IFLA_BAREUDP_SRCPORT_MIN, + IFLA_BAREUDP_MULTIPROTO_MODE, + __IFLA_BAREUDP_MAX +}; + +#define IFLA_BAREUDP_MAX (__IFLA_BAREUDP_MAX - 1) + +/* PPP section */ +enum { + IFLA_PPP_UNSPEC, + IFLA_PPP_DEV_FD, + __IFLA_PPP_MAX +}; +#define IFLA_PPP_MAX (__IFLA_PPP_MAX - 1) + +/* GTP section */ + +enum ifla_gtp_role { + GTP_ROLE_GGSN = 0, + GTP_ROLE_SGSN, +}; + +enum { + IFLA_GTP_UNSPEC, + IFLA_GTP_FD0, + IFLA_GTP_FD1, + IFLA_GTP_PDP_HASHSIZE, + IFLA_GTP_ROLE, + IFLA_GTP_CREATE_SOCKETS, + IFLA_GTP_RESTART_COUNT, + __IFLA_GTP_MAX, +}; +#define IFLA_GTP_MAX (__IFLA_GTP_MAX - 1) + +/* Bonding section */ + +enum { + IFLA_BOND_UNSPEC, + IFLA_BOND_MODE, + IFLA_BOND_ACTIVE_SLAVE, + IFLA_BOND_MIIMON, + IFLA_BOND_UPDELAY, + IFLA_BOND_DOWNDELAY, + IFLA_BOND_USE_CARRIER, + IFLA_BOND_ARP_INTERVAL, + IFLA_BOND_ARP_IP_TARGET, + IFLA_BOND_ARP_VALIDATE, + IFLA_BOND_ARP_ALL_TARGETS, + IFLA_BOND_PRIMARY, + IFLA_BOND_PRIMARY_RESELECT, + IFLA_BOND_FAIL_OVER_MAC, + IFLA_BOND_XMIT_HASH_POLICY, + IFLA_BOND_RESEND_IGMP, + IFLA_BOND_NUM_PEER_NOTIF, + IFLA_BOND_ALL_SLAVES_ACTIVE, + IFLA_BOND_MIN_LINKS, + IFLA_BOND_LP_INTERVAL, + IFLA_BOND_PACKETS_PER_SLAVE, + IFLA_BOND_AD_LACP_RATE, + IFLA_BOND_AD_SELECT, + IFLA_BOND_AD_INFO, + IFLA_BOND_AD_ACTOR_SYS_PRIO, + IFLA_BOND_AD_USER_PORT_KEY, + IFLA_BOND_AD_ACTOR_SYSTEM, + IFLA_BOND_TLB_DYNAMIC_LB, + IFLA_BOND_PEER_NOTIF_DELAY, + IFLA_BOND_AD_LACP_ACTIVE, + IFLA_BOND_MISSED_MAX, + IFLA_BOND_NS_IP6_TARGET, + __IFLA_BOND_MAX, +}; + +#define IFLA_BOND_MAX (__IFLA_BOND_MAX - 1) + +enum { + IFLA_BOND_AD_INFO_UNSPEC, + IFLA_BOND_AD_INFO_AGGREGATOR, + IFLA_BOND_AD_INFO_NUM_PORTS, + IFLA_BOND_AD_INFO_ACTOR_KEY, + IFLA_BOND_AD_INFO_PARTNER_KEY, + IFLA_BOND_AD_INFO_PARTNER_MAC, + __IFLA_BOND_AD_INFO_MAX, +}; + +#define IFLA_BOND_AD_INFO_MAX (__IFLA_BOND_AD_INFO_MAX - 1) + +enum { + IFLA_BOND_SLAVE_UNSPEC, + IFLA_BOND_SLAVE_STATE, + IFLA_BOND_SLAVE_MII_STATUS, + IFLA_BOND_SLAVE_LINK_FAILURE_COUNT, + IFLA_BOND_SLAVE_PERM_HWADDR, + IFLA_BOND_SLAVE_QUEUE_ID, + IFLA_BOND_SLAVE_AD_AGGREGATOR_ID, + IFLA_BOND_SLAVE_AD_ACTOR_OPER_PORT_STATE, + IFLA_BOND_SLAVE_AD_PARTNER_OPER_PORT_STATE, + IFLA_BOND_SLAVE_PRIO, + __IFLA_BOND_SLAVE_MAX, +}; + +#define IFLA_BOND_SLAVE_MAX (__IFLA_BOND_SLAVE_MAX - 1) + +/* SR-IOV virtual function management section */ + +enum { + IFLA_VF_INFO_UNSPEC, + IFLA_VF_INFO, + __IFLA_VF_INFO_MAX, +}; + +#define IFLA_VF_INFO_MAX (__IFLA_VF_INFO_MAX - 1) + +enum { + IFLA_VF_UNSPEC, + IFLA_VF_MAC, /* Hardware queue specific attributes */ + IFLA_VF_VLAN, /* VLAN ID and QoS */ + IFLA_VF_TX_RATE, /* Max TX Bandwidth Allocation */ + IFLA_VF_SPOOFCHK, /* Spoof Checking on/off switch */ + IFLA_VF_LINK_STATE, /* link state enable/disable/auto switch */ + IFLA_VF_RATE, /* Min and Max TX Bandwidth Allocation */ + IFLA_VF_RSS_QUERY_EN, /* RSS Redirection Table and Hash Key query + * on/off switch + */ + IFLA_VF_STATS, /* network device statistics */ + IFLA_VF_TRUST, /* Trust VF */ + IFLA_VF_IB_NODE_GUID, /* VF Infiniband node GUID */ + IFLA_VF_IB_PORT_GUID, /* VF Infiniband port GUID */ + IFLA_VF_VLAN_LIST, /* nested list of vlans, option for QinQ */ + IFLA_VF_BROADCAST, /* VF broadcast */ + __IFLA_VF_MAX, +}; + +#define IFLA_VF_MAX (__IFLA_VF_MAX - 1) + +struct ifla_vf_mac { + __u32 vf; + __u8 mac[32]; /* MAX_ADDR_LEN */ +}; + +struct ifla_vf_broadcast { + __u8 broadcast[32]; +}; + +struct ifla_vf_vlan { + __u32 vf; + __u32 vlan; /* 0 - 4095, 0 disables VLAN filter */ + __u32 qos; +}; + +enum { + IFLA_VF_VLAN_INFO_UNSPEC, + IFLA_VF_VLAN_INFO, /* VLAN ID, QoS and VLAN protocol */ + __IFLA_VF_VLAN_INFO_MAX, +}; + +#define IFLA_VF_VLAN_INFO_MAX (__IFLA_VF_VLAN_INFO_MAX - 1) +#define MAX_VLAN_LIST_LEN 1 + +struct ifla_vf_vlan_info { + __u32 vf; + __u32 vlan; /* 0 - 4095, 0 disables VLAN filter */ + __u32 qos; + __be16 vlan_proto; /* VLAN protocol either 802.1Q or 802.1ad */ +}; + +struct ifla_vf_tx_rate { + __u32 vf; + __u32 rate; /* Max TX bandwidth in Mbps, 0 disables throttling */ +}; + +struct ifla_vf_rate { + __u32 vf; + __u32 min_tx_rate; /* Min Bandwidth in Mbps */ + __u32 max_tx_rate; /* Max Bandwidth in Mbps */ +}; + +struct ifla_vf_spoofchk { + __u32 vf; + __u32 setting; +}; + +struct ifla_vf_guid { + __u32 vf; + __u64 guid; +}; + +enum { + IFLA_VF_LINK_STATE_AUTO, /* link state of the uplink */ + IFLA_VF_LINK_STATE_ENABLE, /* link always up */ + IFLA_VF_LINK_STATE_DISABLE, /* link always down */ + __IFLA_VF_LINK_STATE_MAX, +}; + +struct ifla_vf_link_state { + __u32 vf; + __u32 link_state; +}; + +struct ifla_vf_rss_query_en { + __u32 vf; + __u32 setting; +}; + +enum { + IFLA_VF_STATS_RX_PACKETS, + IFLA_VF_STATS_TX_PACKETS, + IFLA_VF_STATS_RX_BYTES, + IFLA_VF_STATS_TX_BYTES, + IFLA_VF_STATS_BROADCAST, + IFLA_VF_STATS_MULTICAST, + IFLA_VF_STATS_PAD, + IFLA_VF_STATS_RX_DROPPED, + IFLA_VF_STATS_TX_DROPPED, + __IFLA_VF_STATS_MAX, +}; + +#define IFLA_VF_STATS_MAX (__IFLA_VF_STATS_MAX - 1) + +struct ifla_vf_trust { + __u32 vf; + __u32 setting; +}; + +/* VF ports management section + * + * Nested layout of set/get msg is: + * + * [IFLA_NUM_VF] + * [IFLA_VF_PORTS] + * [IFLA_VF_PORT] + * [IFLA_PORT_*], ... + * [IFLA_VF_PORT] + * [IFLA_PORT_*], ... + * ... + * [IFLA_PORT_SELF] + * [IFLA_PORT_*], ... + */ + +enum { + IFLA_VF_PORT_UNSPEC, + IFLA_VF_PORT, /* nest */ + __IFLA_VF_PORT_MAX, +}; + +#define IFLA_VF_PORT_MAX (__IFLA_VF_PORT_MAX - 1) + +enum { + IFLA_PORT_UNSPEC, + IFLA_PORT_VF, /* __u32 */ + IFLA_PORT_PROFILE, /* string */ + IFLA_PORT_VSI_TYPE, /* 802.1Qbg (pre-)standard VDP */ + IFLA_PORT_INSTANCE_UUID, /* binary UUID */ + IFLA_PORT_HOST_UUID, /* binary UUID */ + IFLA_PORT_REQUEST, /* __u8 */ + IFLA_PORT_RESPONSE, /* __u16, output only */ + __IFLA_PORT_MAX, +}; + +#define IFLA_PORT_MAX (__IFLA_PORT_MAX - 1) + +#define PORT_PROFILE_MAX 40 +#define PORT_UUID_MAX 16 +#define PORT_SELF_VF -1 + +enum { + PORT_REQUEST_PREASSOCIATE = 0, + PORT_REQUEST_PREASSOCIATE_RR, + PORT_REQUEST_ASSOCIATE, + PORT_REQUEST_DISASSOCIATE, +}; + +enum { + PORT_VDP_RESPONSE_SUCCESS = 0, + PORT_VDP_RESPONSE_INVALID_FORMAT, + PORT_VDP_RESPONSE_INSUFFICIENT_RESOURCES, + PORT_VDP_RESPONSE_UNUSED_VTID, + PORT_VDP_RESPONSE_VTID_VIOLATION, + PORT_VDP_RESPONSE_VTID_VERSION_VIOALTION, + PORT_VDP_RESPONSE_OUT_OF_SYNC, + /* 0x08-0xFF reserved for future VDP use */ + PORT_PROFILE_RESPONSE_SUCCESS = 0x100, + PORT_PROFILE_RESPONSE_INPROGRESS, + PORT_PROFILE_RESPONSE_INVALID, + PORT_PROFILE_RESPONSE_BADSTATE, + PORT_PROFILE_RESPONSE_INSUFFICIENT_RESOURCES, + PORT_PROFILE_RESPONSE_ERROR, +}; + +struct ifla_port_vsi { + __u8 vsi_mgr_id; + __u8 vsi_type_id[3]; + __u8 vsi_type_version; + __u8 pad[3]; +}; + + +/* IPoIB section */ + +enum { + IFLA_IPOIB_UNSPEC, + IFLA_IPOIB_PKEY, + IFLA_IPOIB_MODE, + IFLA_IPOIB_UMCAST, + __IFLA_IPOIB_MAX +}; + +enum { + IPOIB_MODE_DATAGRAM = 0, /* using unreliable datagram QPs */ + IPOIB_MODE_CONNECTED = 1, /* using connected QPs */ +}; + +#define IFLA_IPOIB_MAX (__IFLA_IPOIB_MAX - 1) + + +/* HSR/PRP section, both uses same interface */ + +/* Different redundancy protocols for hsr device */ +enum { + HSR_PROTOCOL_HSR, + HSR_PROTOCOL_PRP, + HSR_PROTOCOL_MAX, +}; + +enum { + IFLA_HSR_UNSPEC, + IFLA_HSR_SLAVE1, + IFLA_HSR_SLAVE2, + IFLA_HSR_MULTICAST_SPEC, /* Last byte of supervision addr */ + IFLA_HSR_SUPERVISION_ADDR, /* Supervision frame multicast addr */ + IFLA_HSR_SEQ_NR, + IFLA_HSR_VERSION, /* HSR version */ + IFLA_HSR_PROTOCOL, /* Indicate different protocol than + * HSR. For example PRP. + */ + __IFLA_HSR_MAX, +}; + +#define IFLA_HSR_MAX (__IFLA_HSR_MAX - 1) + +/* STATS section */ + +struct if_stats_msg { + __u8 family; + __u8 pad1; + __u16 pad2; + __u32 ifindex; + __u32 filter_mask; +}; + +/* A stats attribute can be netdev specific or a global stat. + * For netdev stats, lets use the prefix IFLA_STATS_LINK_* + */ +enum { + IFLA_STATS_UNSPEC, /* also used as 64bit pad attribute */ + IFLA_STATS_LINK_64, + IFLA_STATS_LINK_XSTATS, + IFLA_STATS_LINK_XSTATS_SLAVE, + IFLA_STATS_LINK_OFFLOAD_XSTATS, + IFLA_STATS_AF_SPEC, + __IFLA_STATS_MAX, +}; + +#define IFLA_STATS_MAX (__IFLA_STATS_MAX - 1) + +#define IFLA_STATS_FILTER_BIT(ATTR) (1 << (ATTR - 1)) + +enum { + IFLA_STATS_GETSET_UNSPEC, + IFLA_STATS_GET_FILTERS, /* Nest of IFLA_STATS_LINK_xxx, each a u32 with + * a filter mask for the corresponding group. + */ + IFLA_STATS_SET_OFFLOAD_XSTATS_L3_STATS, /* 0 or 1 as u8 */ + __IFLA_STATS_GETSET_MAX, +}; + +#define IFLA_STATS_GETSET_MAX (__IFLA_STATS_GETSET_MAX - 1) + +/* These are embedded into IFLA_STATS_LINK_XSTATS: + * [IFLA_STATS_LINK_XSTATS] + * -> [LINK_XSTATS_TYPE_xxx] + * -> [rtnl link type specific attributes] + */ +enum { + LINK_XSTATS_TYPE_UNSPEC, + LINK_XSTATS_TYPE_BRIDGE, + LINK_XSTATS_TYPE_BOND, + __LINK_XSTATS_TYPE_MAX +}; +#define LINK_XSTATS_TYPE_MAX (__LINK_XSTATS_TYPE_MAX - 1) + +/* These are stats embedded into IFLA_STATS_LINK_OFFLOAD_XSTATS */ +enum { + IFLA_OFFLOAD_XSTATS_UNSPEC, + IFLA_OFFLOAD_XSTATS_CPU_HIT, /* struct rtnl_link_stats64 */ + IFLA_OFFLOAD_XSTATS_HW_S_INFO, /* HW stats info. A nest */ + IFLA_OFFLOAD_XSTATS_L3_STATS, /* struct rtnl_hw_stats64 */ + __IFLA_OFFLOAD_XSTATS_MAX +}; +#define IFLA_OFFLOAD_XSTATS_MAX (__IFLA_OFFLOAD_XSTATS_MAX - 1) + +enum { + IFLA_OFFLOAD_XSTATS_HW_S_INFO_UNSPEC, + IFLA_OFFLOAD_XSTATS_HW_S_INFO_REQUEST, /* u8 */ + IFLA_OFFLOAD_XSTATS_HW_S_INFO_USED, /* u8 */ + __IFLA_OFFLOAD_XSTATS_HW_S_INFO_MAX, +}; +#define IFLA_OFFLOAD_XSTATS_HW_S_INFO_MAX \ + (__IFLA_OFFLOAD_XSTATS_HW_S_INFO_MAX - 1) + +/* XDP section */ + +#define XDP_FLAGS_UPDATE_IF_NOEXIST (1U << 0) +#define XDP_FLAGS_SKB_MODE (1U << 1) +#define XDP_FLAGS_DRV_MODE (1U << 2) +#define XDP_FLAGS_HW_MODE (1U << 3) +#define XDP_FLAGS_REPLACE (1U << 4) +#define XDP_FLAGS_MODES (XDP_FLAGS_SKB_MODE | \ + XDP_FLAGS_DRV_MODE | \ + XDP_FLAGS_HW_MODE) +#define XDP_FLAGS_MASK (XDP_FLAGS_UPDATE_IF_NOEXIST | \ + XDP_FLAGS_MODES | XDP_FLAGS_REPLACE) + +/* These are stored into IFLA_XDP_ATTACHED on dump. */ +enum { + XDP_ATTACHED_NONE = 0, + XDP_ATTACHED_DRV, + XDP_ATTACHED_SKB, + XDP_ATTACHED_HW, + XDP_ATTACHED_MULTI, +}; + +enum { + IFLA_XDP_UNSPEC, + IFLA_XDP_FD, + IFLA_XDP_ATTACHED, + IFLA_XDP_FLAGS, + IFLA_XDP_PROG_ID, + IFLA_XDP_DRV_PROG_ID, + IFLA_XDP_SKB_PROG_ID, + IFLA_XDP_HW_PROG_ID, + IFLA_XDP_EXPECTED_FD, + __IFLA_XDP_MAX, +}; + +#define IFLA_XDP_MAX (__IFLA_XDP_MAX - 1) + +enum { + IFLA_EVENT_NONE, + IFLA_EVENT_REBOOT, /* internal reset / reboot */ + IFLA_EVENT_FEATURES, /* change in offload features */ + IFLA_EVENT_BONDING_FAILOVER, /* change in active slave */ + IFLA_EVENT_NOTIFY_PEERS, /* re-sent grat. arp/ndisc */ + IFLA_EVENT_IGMP_RESEND, /* re-sent IGMP JOIN */ + IFLA_EVENT_BONDING_OPTIONS, /* change in bonding options */ +}; + +/* tun section */ + +enum { + IFLA_TUN_UNSPEC, + IFLA_TUN_OWNER, + IFLA_TUN_GROUP, + IFLA_TUN_TYPE, + IFLA_TUN_PI, + IFLA_TUN_VNET_HDR, + IFLA_TUN_PERSIST, + IFLA_TUN_MULTI_QUEUE, + IFLA_TUN_NUM_QUEUES, + IFLA_TUN_NUM_DISABLED_QUEUES, + __IFLA_TUN_MAX, +}; + +#define IFLA_TUN_MAX (__IFLA_TUN_MAX - 1) + +/* rmnet section */ + +#define RMNET_FLAGS_INGRESS_DEAGGREGATION (1U << 0) +#define RMNET_FLAGS_INGRESS_MAP_COMMANDS (1U << 1) +#define RMNET_FLAGS_INGRESS_MAP_CKSUMV4 (1U << 2) +#define RMNET_FLAGS_EGRESS_MAP_CKSUMV4 (1U << 3) +#define RMNET_FLAGS_INGRESS_MAP_CKSUMV5 (1U << 4) +#define RMNET_FLAGS_EGRESS_MAP_CKSUMV5 (1U << 5) + +enum { + IFLA_RMNET_UNSPEC, + IFLA_RMNET_MUX_ID, + IFLA_RMNET_FLAGS, + __IFLA_RMNET_MAX, +}; + +#define IFLA_RMNET_MAX (__IFLA_RMNET_MAX - 1) + +struct ifla_rmnet_flags { + __u32 flags; + __u32 mask; +}; + +/* MCTP section */ + +enum { + IFLA_MCTP_UNSPEC, + IFLA_MCTP_NET, + __IFLA_MCTP_MAX, +}; + +#define IFLA_MCTP_MAX (__IFLA_MCTP_MAX - 1) + +/* DSA section */ + +enum { + IFLA_DSA_UNSPEC, + IFLA_DSA_MASTER, + __IFLA_DSA_MAX, +}; + +#define IFLA_DSA_MAX (__IFLA_DSA_MAX - 1) + +#endif /* _UAPI_LINUX_IF_LINK_H */ diff --git a/src/basic/linux/if_macsec.h b/src/basic/linux/if_macsec.h new file mode 100644 index 0000000..d5b6d1f --- /dev/null +++ b/src/basic/linux/if_macsec.h @@ -0,0 +1,194 @@ +/* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */ +/* + * include/uapi/linux/if_macsec.h - MACsec device + * + * Copyright (c) 2015 Sabrina Dubroca + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#ifndef _UAPI_MACSEC_H +#define _UAPI_MACSEC_H + +#include + +#define MACSEC_GENL_NAME "macsec" +#define MACSEC_GENL_VERSION 1 + +#define MACSEC_MAX_KEY_LEN 128 + +#define MACSEC_KEYID_LEN 16 + +#define MACSEC_SALT_LEN 12 + +/* cipher IDs as per IEEE802.1AE-2018 (Table 14-1) */ +#define MACSEC_CIPHER_ID_GCM_AES_128 0x0080C20001000001ULL +#define MACSEC_CIPHER_ID_GCM_AES_256 0x0080C20001000002ULL +#define MACSEC_CIPHER_ID_GCM_AES_XPN_128 0x0080C20001000003ULL +#define MACSEC_CIPHER_ID_GCM_AES_XPN_256 0x0080C20001000004ULL + +/* deprecated cipher ID for GCM-AES-128 */ +#define MACSEC_DEFAULT_CIPHER_ID 0x0080020001000001ULL +#define MACSEC_DEFAULT_CIPHER_ALT MACSEC_CIPHER_ID_GCM_AES_128 + +#define MACSEC_MIN_ICV_LEN 8 +#define MACSEC_MAX_ICV_LEN 32 +/* upper limit for ICV length as recommended by IEEE802.1AE-2006 */ +#define MACSEC_STD_ICV_LEN 16 + +enum macsec_attrs { + MACSEC_ATTR_UNSPEC, + MACSEC_ATTR_IFINDEX, /* u32, ifindex of the MACsec netdevice */ + MACSEC_ATTR_RXSC_CONFIG, /* config, nested macsec_rxsc_attrs */ + MACSEC_ATTR_SA_CONFIG, /* config, nested macsec_sa_attrs */ + MACSEC_ATTR_SECY, /* dump, nested macsec_secy_attrs */ + MACSEC_ATTR_TXSA_LIST, /* dump, nested, macsec_sa_attrs for each TXSA */ + MACSEC_ATTR_RXSC_LIST, /* dump, nested, macsec_rxsc_attrs for each RXSC */ + MACSEC_ATTR_TXSC_STATS, /* dump, nested, macsec_txsc_stats_attr */ + MACSEC_ATTR_SECY_STATS, /* dump, nested, macsec_secy_stats_attr */ + MACSEC_ATTR_OFFLOAD, /* config, nested, macsec_offload_attrs */ + __MACSEC_ATTR_END, + NUM_MACSEC_ATTR = __MACSEC_ATTR_END, + MACSEC_ATTR_MAX = __MACSEC_ATTR_END - 1, +}; + +enum macsec_secy_attrs { + MACSEC_SECY_ATTR_UNSPEC, + MACSEC_SECY_ATTR_SCI, + MACSEC_SECY_ATTR_ENCODING_SA, + MACSEC_SECY_ATTR_WINDOW, + MACSEC_SECY_ATTR_CIPHER_SUITE, + MACSEC_SECY_ATTR_ICV_LEN, + MACSEC_SECY_ATTR_PROTECT, + MACSEC_SECY_ATTR_REPLAY, + MACSEC_SECY_ATTR_OPER, + MACSEC_SECY_ATTR_VALIDATE, + MACSEC_SECY_ATTR_ENCRYPT, + MACSEC_SECY_ATTR_INC_SCI, + MACSEC_SECY_ATTR_ES, + MACSEC_SECY_ATTR_SCB, + MACSEC_SECY_ATTR_PAD, + __MACSEC_SECY_ATTR_END, + NUM_MACSEC_SECY_ATTR = __MACSEC_SECY_ATTR_END, + MACSEC_SECY_ATTR_MAX = __MACSEC_SECY_ATTR_END - 1, +}; + +enum macsec_rxsc_attrs { + MACSEC_RXSC_ATTR_UNSPEC, + MACSEC_RXSC_ATTR_SCI, /* config/dump, u64 */ + MACSEC_RXSC_ATTR_ACTIVE, /* config/dump, u8 0..1 */ + MACSEC_RXSC_ATTR_SA_LIST, /* dump, nested */ + MACSEC_RXSC_ATTR_STATS, /* dump, nested, macsec_rxsc_stats_attr */ + MACSEC_RXSC_ATTR_PAD, + __MACSEC_RXSC_ATTR_END, + NUM_MACSEC_RXSC_ATTR = __MACSEC_RXSC_ATTR_END, + MACSEC_RXSC_ATTR_MAX = __MACSEC_RXSC_ATTR_END - 1, +}; + +enum macsec_sa_attrs { + MACSEC_SA_ATTR_UNSPEC, + MACSEC_SA_ATTR_AN, /* config/dump, u8 0..3 */ + MACSEC_SA_ATTR_ACTIVE, /* config/dump, u8 0..1 */ + MACSEC_SA_ATTR_PN, /* config/dump, u32/u64 (u64 if XPN) */ + MACSEC_SA_ATTR_KEY, /* config, data */ + MACSEC_SA_ATTR_KEYID, /* config/dump, 128-bit */ + MACSEC_SA_ATTR_STATS, /* dump, nested, macsec_sa_stats_attr */ + MACSEC_SA_ATTR_PAD, + MACSEC_SA_ATTR_SSCI, /* config/dump, u32 - XPN only */ + MACSEC_SA_ATTR_SALT, /* config, 96-bit - XPN only */ + __MACSEC_SA_ATTR_END, + NUM_MACSEC_SA_ATTR = __MACSEC_SA_ATTR_END, + MACSEC_SA_ATTR_MAX = __MACSEC_SA_ATTR_END - 1, +}; + +enum macsec_offload_attrs { + MACSEC_OFFLOAD_ATTR_UNSPEC, + MACSEC_OFFLOAD_ATTR_TYPE, /* config/dump, u8 0..2 */ + MACSEC_OFFLOAD_ATTR_PAD, + __MACSEC_OFFLOAD_ATTR_END, + NUM_MACSEC_OFFLOAD_ATTR = __MACSEC_OFFLOAD_ATTR_END, + MACSEC_OFFLOAD_ATTR_MAX = __MACSEC_OFFLOAD_ATTR_END - 1, +}; + +enum macsec_nl_commands { + MACSEC_CMD_GET_TXSC, + MACSEC_CMD_ADD_RXSC, + MACSEC_CMD_DEL_RXSC, + MACSEC_CMD_UPD_RXSC, + MACSEC_CMD_ADD_TXSA, + MACSEC_CMD_DEL_TXSA, + MACSEC_CMD_UPD_TXSA, + MACSEC_CMD_ADD_RXSA, + MACSEC_CMD_DEL_RXSA, + MACSEC_CMD_UPD_RXSA, + MACSEC_CMD_UPD_OFFLOAD, +}; + +/* u64 per-RXSC stats */ +enum macsec_rxsc_stats_attr { + MACSEC_RXSC_STATS_ATTR_UNSPEC, + MACSEC_RXSC_STATS_ATTR_IN_OCTETS_VALIDATED, + MACSEC_RXSC_STATS_ATTR_IN_OCTETS_DECRYPTED, + MACSEC_RXSC_STATS_ATTR_IN_PKTS_UNCHECKED, + MACSEC_RXSC_STATS_ATTR_IN_PKTS_DELAYED, + MACSEC_RXSC_STATS_ATTR_IN_PKTS_OK, + MACSEC_RXSC_STATS_ATTR_IN_PKTS_INVALID, + MACSEC_RXSC_STATS_ATTR_IN_PKTS_LATE, + MACSEC_RXSC_STATS_ATTR_IN_PKTS_NOT_VALID, + MACSEC_RXSC_STATS_ATTR_IN_PKTS_NOT_USING_SA, + MACSEC_RXSC_STATS_ATTR_IN_PKTS_UNUSED_SA, + MACSEC_RXSC_STATS_ATTR_PAD, + __MACSEC_RXSC_STATS_ATTR_END, + NUM_MACSEC_RXSC_STATS_ATTR = __MACSEC_RXSC_STATS_ATTR_END, + MACSEC_RXSC_STATS_ATTR_MAX = __MACSEC_RXSC_STATS_ATTR_END - 1, +}; + +/* u32 per-{RX,TX}SA stats */ +enum macsec_sa_stats_attr { + MACSEC_SA_STATS_ATTR_UNSPEC, + MACSEC_SA_STATS_ATTR_IN_PKTS_OK, + MACSEC_SA_STATS_ATTR_IN_PKTS_INVALID, + MACSEC_SA_STATS_ATTR_IN_PKTS_NOT_VALID, + MACSEC_SA_STATS_ATTR_IN_PKTS_NOT_USING_SA, + MACSEC_SA_STATS_ATTR_IN_PKTS_UNUSED_SA, + MACSEC_SA_STATS_ATTR_OUT_PKTS_PROTECTED, + MACSEC_SA_STATS_ATTR_OUT_PKTS_ENCRYPTED, + __MACSEC_SA_STATS_ATTR_END, + NUM_MACSEC_SA_STATS_ATTR = __MACSEC_SA_STATS_ATTR_END, + MACSEC_SA_STATS_ATTR_MAX = __MACSEC_SA_STATS_ATTR_END - 1, +}; + +/* u64 per-TXSC stats */ +enum macsec_txsc_stats_attr { + MACSEC_TXSC_STATS_ATTR_UNSPEC, + MACSEC_TXSC_STATS_ATTR_OUT_PKTS_PROTECTED, + MACSEC_TXSC_STATS_ATTR_OUT_PKTS_ENCRYPTED, + MACSEC_TXSC_STATS_ATTR_OUT_OCTETS_PROTECTED, + MACSEC_TXSC_STATS_ATTR_OUT_OCTETS_ENCRYPTED, + MACSEC_TXSC_STATS_ATTR_PAD, + __MACSEC_TXSC_STATS_ATTR_END, + NUM_MACSEC_TXSC_STATS_ATTR = __MACSEC_TXSC_STATS_ATTR_END, + MACSEC_TXSC_STATS_ATTR_MAX = __MACSEC_TXSC_STATS_ATTR_END - 1, +}; + +/* u64 per-SecY stats */ +enum macsec_secy_stats_attr { + MACSEC_SECY_STATS_ATTR_UNSPEC, + MACSEC_SECY_STATS_ATTR_OUT_PKTS_UNTAGGED, + MACSEC_SECY_STATS_ATTR_IN_PKTS_UNTAGGED, + MACSEC_SECY_STATS_ATTR_OUT_PKTS_TOO_LONG, + MACSEC_SECY_STATS_ATTR_IN_PKTS_NO_TAG, + MACSEC_SECY_STATS_ATTR_IN_PKTS_BAD_TAG, + MACSEC_SECY_STATS_ATTR_IN_PKTS_UNKNOWN_SCI, + MACSEC_SECY_STATS_ATTR_IN_PKTS_NO_SCI, + MACSEC_SECY_STATS_ATTR_IN_PKTS_OVERRUN, + MACSEC_SECY_STATS_ATTR_PAD, + __MACSEC_SECY_STATS_ATTR_END, + NUM_MACSEC_SECY_STATS_ATTR = __MACSEC_SECY_STATS_ATTR_END, + MACSEC_SECY_STATS_ATTR_MAX = __MACSEC_SECY_STATS_ATTR_END - 1, +}; + +#endif /* _UAPI_MACSEC_H */ diff --git a/src/basic/linux/if_tun.h b/src/basic/linux/if_tun.h new file mode 100644 index 0000000..287cdc8 --- /dev/null +++ b/src/basic/linux/if_tun.h @@ -0,0 +1,118 @@ +/* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */ +/* + * Universal TUN/TAP device driver. + * Copyright (C) 1999-2000 Maxim Krasnyansky + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#ifndef _UAPI__IF_TUN_H +#define _UAPI__IF_TUN_H + +#include +#include +#include + +/* Read queue size */ +#define TUN_READQ_SIZE 500 +/* TUN device type flags: deprecated. Use IFF_TUN/IFF_TAP instead. */ +#define TUN_TUN_DEV IFF_TUN +#define TUN_TAP_DEV IFF_TAP +#define TUN_TYPE_MASK 0x000f + +/* Ioctl defines */ +#define TUNSETNOCSUM _IOW('T', 200, int) +#define TUNSETDEBUG _IOW('T', 201, int) +#define TUNSETIFF _IOW('T', 202, int) +#define TUNSETPERSIST _IOW('T', 203, int) +#define TUNSETOWNER _IOW('T', 204, int) +#define TUNSETLINK _IOW('T', 205, int) +#define TUNSETGROUP _IOW('T', 206, int) +#define TUNGETFEATURES _IOR('T', 207, unsigned int) +#define TUNSETOFFLOAD _IOW('T', 208, unsigned int) +#define TUNSETTXFILTER _IOW('T', 209, unsigned int) +#define TUNGETIFF _IOR('T', 210, unsigned int) +#define TUNGETSNDBUF _IOR('T', 211, int) +#define TUNSETSNDBUF _IOW('T', 212, int) +#define TUNATTACHFILTER _IOW('T', 213, struct sock_fprog) +#define TUNDETACHFILTER _IOW('T', 214, struct sock_fprog) +#define TUNGETVNETHDRSZ _IOR('T', 215, int) +#define TUNSETVNETHDRSZ _IOW('T', 216, int) +#define TUNSETQUEUE _IOW('T', 217, int) +#define TUNSETIFINDEX _IOW('T', 218, unsigned int) +#define TUNGETFILTER _IOR('T', 219, struct sock_fprog) +#define TUNSETVNETLE _IOW('T', 220, int) +#define TUNGETVNETLE _IOR('T', 221, int) +/* The TUNSETVNETBE and TUNGETVNETBE ioctls are for cross-endian support on + * little-endian hosts. Not all kernel configurations support them, but all + * configurations that support SET also support GET. + */ +#define TUNSETVNETBE _IOW('T', 222, int) +#define TUNGETVNETBE _IOR('T', 223, int) +#define TUNSETSTEERINGEBPF _IOR('T', 224, int) +#define TUNSETFILTEREBPF _IOR('T', 225, int) +#define TUNSETCARRIER _IOW('T', 226, int) +#define TUNGETDEVNETNS _IO('T', 227) + +/* TUNSETIFF ifr flags */ +#define IFF_TUN 0x0001 +#define IFF_TAP 0x0002 +#define IFF_NAPI 0x0010 +#define IFF_NAPI_FRAGS 0x0020 +/* Used in TUNSETIFF to bring up tun/tap without carrier */ +#define IFF_NO_CARRIER 0x0040 +#define IFF_NO_PI 0x1000 +/* This flag has no real effect */ +#define IFF_ONE_QUEUE 0x2000 +#define IFF_VNET_HDR 0x4000 +#define IFF_TUN_EXCL 0x8000 +#define IFF_MULTI_QUEUE 0x0100 +#define IFF_ATTACH_QUEUE 0x0200 +#define IFF_DETACH_QUEUE 0x0400 +/* read-only flag */ +#define IFF_PERSIST 0x0800 +#define IFF_NOFILTER 0x1000 + +/* Socket options */ +#define TUN_TX_TIMESTAMP 1 + +/* Features for GSO (TUNSETOFFLOAD). */ +#define TUN_F_CSUM 0x01 /* You can hand me unchecksummed packets. */ +#define TUN_F_TSO4 0x02 /* I can handle TSO for IPv4 packets */ +#define TUN_F_TSO6 0x04 /* I can handle TSO for IPv6 packets */ +#define TUN_F_TSO_ECN 0x08 /* I can handle TSO with ECN bits. */ +#define TUN_F_UFO 0x10 /* I can handle UFO packets */ +#define TUN_F_USO4 0x20 /* I can handle USO for IPv4 packets */ +#define TUN_F_USO6 0x40 /* I can handle USO for IPv6 packets */ + +/* Protocol info prepended to the packets (when IFF_NO_PI is not set) */ +#define TUN_PKT_STRIP 0x0001 +struct tun_pi { + __u16 flags; + __be16 proto; +}; + +/* + * Filter spec (used for SETXXFILTER ioctls) + * This stuff is applicable only to the TAP (Ethernet) devices. + * If the count is zero the filter is disabled and the driver accepts + * all packets (promisc mode). + * If the filter is enabled in order to accept broadcast packets + * broadcast addr must be explicitly included in the addr list. + */ +#define TUN_FLT_ALLMULTI 0x0001 /* Accept all multicast packets */ +struct tun_filter { + __u16 flags; /* TUN_FLT_ flags see above */ + __u16 count; /* Number of addresses */ + __u8 addr[][ETH_ALEN]; +}; + +#endif /* _UAPI__IF_TUN_H */ diff --git a/src/basic/linux/if_tunnel.h b/src/basic/linux/if_tunnel.h new file mode 100644 index 0000000..1021196 --- /dev/null +++ b/src/basic/linux/if_tunnel.h @@ -0,0 +1,185 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +#ifndef _UAPI_IF_TUNNEL_H_ +#define _UAPI_IF_TUNNEL_H_ + +#include +#include +#include +#include +#include + + +#define SIOCGETTUNNEL (SIOCDEVPRIVATE + 0) +#define SIOCADDTUNNEL (SIOCDEVPRIVATE + 1) +#define SIOCDELTUNNEL (SIOCDEVPRIVATE + 2) +#define SIOCCHGTUNNEL (SIOCDEVPRIVATE + 3) +#define SIOCGETPRL (SIOCDEVPRIVATE + 4) +#define SIOCADDPRL (SIOCDEVPRIVATE + 5) +#define SIOCDELPRL (SIOCDEVPRIVATE + 6) +#define SIOCCHGPRL (SIOCDEVPRIVATE + 7) +#define SIOCGET6RD (SIOCDEVPRIVATE + 8) +#define SIOCADD6RD (SIOCDEVPRIVATE + 9) +#define SIOCDEL6RD (SIOCDEVPRIVATE + 10) +#define SIOCCHG6RD (SIOCDEVPRIVATE + 11) + +#define GRE_CSUM __cpu_to_be16(0x8000) +#define GRE_ROUTING __cpu_to_be16(0x4000) +#define GRE_KEY __cpu_to_be16(0x2000) +#define GRE_SEQ __cpu_to_be16(0x1000) +#define GRE_STRICT __cpu_to_be16(0x0800) +#define GRE_REC __cpu_to_be16(0x0700) +#define GRE_ACK __cpu_to_be16(0x0080) +#define GRE_FLAGS __cpu_to_be16(0x0078) +#define GRE_VERSION __cpu_to_be16(0x0007) + +#define GRE_IS_CSUM(f) ((f) & GRE_CSUM) +#define GRE_IS_ROUTING(f) ((f) & GRE_ROUTING) +#define GRE_IS_KEY(f) ((f) & GRE_KEY) +#define GRE_IS_SEQ(f) ((f) & GRE_SEQ) +#define GRE_IS_STRICT(f) ((f) & GRE_STRICT) +#define GRE_IS_REC(f) ((f) & GRE_REC) +#define GRE_IS_ACK(f) ((f) & GRE_ACK) + +#define GRE_VERSION_0 __cpu_to_be16(0x0000) +#define GRE_VERSION_1 __cpu_to_be16(0x0001) +#define GRE_PROTO_PPP __cpu_to_be16(0x880b) +#define GRE_PPTP_KEY_MASK __cpu_to_be32(0xffff) + +struct ip_tunnel_parm { + char name[IFNAMSIZ]; + int link; + __be16 i_flags; + __be16 o_flags; + __be32 i_key; + __be32 o_key; + struct iphdr iph; +}; + +enum { + IFLA_IPTUN_UNSPEC, + IFLA_IPTUN_LINK, + IFLA_IPTUN_LOCAL, + IFLA_IPTUN_REMOTE, + IFLA_IPTUN_TTL, + IFLA_IPTUN_TOS, + IFLA_IPTUN_ENCAP_LIMIT, + IFLA_IPTUN_FLOWINFO, + IFLA_IPTUN_FLAGS, + IFLA_IPTUN_PROTO, + IFLA_IPTUN_PMTUDISC, + IFLA_IPTUN_6RD_PREFIX, + IFLA_IPTUN_6RD_RELAY_PREFIX, + IFLA_IPTUN_6RD_PREFIXLEN, + IFLA_IPTUN_6RD_RELAY_PREFIXLEN, + IFLA_IPTUN_ENCAP_TYPE, + IFLA_IPTUN_ENCAP_FLAGS, + IFLA_IPTUN_ENCAP_SPORT, + IFLA_IPTUN_ENCAP_DPORT, + IFLA_IPTUN_COLLECT_METADATA, + IFLA_IPTUN_FWMARK, + __IFLA_IPTUN_MAX, +}; +#define IFLA_IPTUN_MAX (__IFLA_IPTUN_MAX - 1) + +enum tunnel_encap_types { + TUNNEL_ENCAP_NONE, + TUNNEL_ENCAP_FOU, + TUNNEL_ENCAP_GUE, + TUNNEL_ENCAP_MPLS, +}; + +#define TUNNEL_ENCAP_FLAG_CSUM (1<<0) +#define TUNNEL_ENCAP_FLAG_CSUM6 (1<<1) +#define TUNNEL_ENCAP_FLAG_REMCSUM (1<<2) + +/* SIT-mode i_flags */ +#define SIT_ISATAP 0x0001 + +struct ip_tunnel_prl { + __be32 addr; + __u16 flags; + __u16 __reserved; + __u32 datalen; + __u32 __reserved2; + /* data follows */ +}; + +/* PRL flags */ +#define PRL_DEFAULT 0x0001 + +struct ip_tunnel_6rd { + struct in6_addr prefix; + __be32 relay_prefix; + __u16 prefixlen; + __u16 relay_prefixlen; +}; + +enum { + IFLA_GRE_UNSPEC, + IFLA_GRE_LINK, + IFLA_GRE_IFLAGS, + IFLA_GRE_OFLAGS, + IFLA_GRE_IKEY, + IFLA_GRE_OKEY, + IFLA_GRE_LOCAL, + IFLA_GRE_REMOTE, + IFLA_GRE_TTL, + IFLA_GRE_TOS, + IFLA_GRE_PMTUDISC, + IFLA_GRE_ENCAP_LIMIT, + IFLA_GRE_FLOWINFO, + IFLA_GRE_FLAGS, + IFLA_GRE_ENCAP_TYPE, + IFLA_GRE_ENCAP_FLAGS, + IFLA_GRE_ENCAP_SPORT, + IFLA_GRE_ENCAP_DPORT, + IFLA_GRE_COLLECT_METADATA, + IFLA_GRE_IGNORE_DF, + IFLA_GRE_FWMARK, + IFLA_GRE_ERSPAN_INDEX, + IFLA_GRE_ERSPAN_VER, + IFLA_GRE_ERSPAN_DIR, + IFLA_GRE_ERSPAN_HWID, + __IFLA_GRE_MAX, +}; + +#define IFLA_GRE_MAX (__IFLA_GRE_MAX - 1) + +/* VTI-mode i_flags */ +#define VTI_ISVTI ((__force __be16)0x0001) + +enum { + IFLA_VTI_UNSPEC, + IFLA_VTI_LINK, + IFLA_VTI_IKEY, + IFLA_VTI_OKEY, + IFLA_VTI_LOCAL, + IFLA_VTI_REMOTE, + IFLA_VTI_FWMARK, + __IFLA_VTI_MAX, +}; + +#define IFLA_VTI_MAX (__IFLA_VTI_MAX - 1) + +#define TUNNEL_CSUM __cpu_to_be16(0x01) +#define TUNNEL_ROUTING __cpu_to_be16(0x02) +#define TUNNEL_KEY __cpu_to_be16(0x04) +#define TUNNEL_SEQ __cpu_to_be16(0x08) +#define TUNNEL_STRICT __cpu_to_be16(0x10) +#define TUNNEL_REC __cpu_to_be16(0x20) +#define TUNNEL_VERSION __cpu_to_be16(0x40) +#define TUNNEL_NO_KEY __cpu_to_be16(0x80) +#define TUNNEL_DONT_FRAGMENT __cpu_to_be16(0x0100) +#define TUNNEL_OAM __cpu_to_be16(0x0200) +#define TUNNEL_CRIT_OPT __cpu_to_be16(0x0400) +#define TUNNEL_GENEVE_OPT __cpu_to_be16(0x0800) +#define TUNNEL_VXLAN_OPT __cpu_to_be16(0x1000) +#define TUNNEL_NOCACHE __cpu_to_be16(0x2000) +#define TUNNEL_ERSPAN_OPT __cpu_to_be16(0x4000) +#define TUNNEL_GTP_OPT __cpu_to_be16(0x8000) + +#define TUNNEL_OPTIONS_PRESENT \ + (TUNNEL_GENEVE_OPT | TUNNEL_VXLAN_OPT | TUNNEL_ERSPAN_OPT | \ + TUNNEL_GTP_OPT) + +#endif /* _UAPI_IF_TUNNEL_H_ */ diff --git a/src/basic/linux/in.h b/src/basic/linux/in.h new file mode 100644 index 0000000..07a4cb1 --- /dev/null +++ b/src/basic/linux/in.h @@ -0,0 +1,331 @@ +/* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */ +/* + * INET An implementation of the TCP/IP protocol suite for the LINUX + * operating system. INET is implemented using the BSD Socket + * interface as the means of communication with the user level. + * + * Definitions of the Internet Protocol. + * + * Version: @(#)in.h 1.0.1 04/21/93 + * + * Authors: Original taken from the GNU Project file. + * Fred N. van Kempen, + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +#ifndef _UAPI_LINUX_IN_H +#define _UAPI_LINUX_IN_H + +#include +#include +#include +#include + +#if __UAPI_DEF_IN_IPPROTO +/* Standard well-defined IP protocols. */ +enum { + IPPROTO_IP = 0, /* Dummy protocol for TCP */ +#define IPPROTO_IP IPPROTO_IP + IPPROTO_ICMP = 1, /* Internet Control Message Protocol */ +#define IPPROTO_ICMP IPPROTO_ICMP + IPPROTO_IGMP = 2, /* Internet Group Management Protocol */ +#define IPPROTO_IGMP IPPROTO_IGMP + IPPROTO_IPIP = 4, /* IPIP tunnels (older KA9Q tunnels use 94) */ +#define IPPROTO_IPIP IPPROTO_IPIP + IPPROTO_TCP = 6, /* Transmission Control Protocol */ +#define IPPROTO_TCP IPPROTO_TCP + IPPROTO_EGP = 8, /* Exterior Gateway Protocol */ +#define IPPROTO_EGP IPPROTO_EGP + IPPROTO_PUP = 12, /* PUP protocol */ +#define IPPROTO_PUP IPPROTO_PUP + IPPROTO_UDP = 17, /* User Datagram Protocol */ +#define IPPROTO_UDP IPPROTO_UDP + IPPROTO_IDP = 22, /* XNS IDP protocol */ +#define IPPROTO_IDP IPPROTO_IDP + IPPROTO_TP = 29, /* SO Transport Protocol Class 4 */ +#define IPPROTO_TP IPPROTO_TP + IPPROTO_DCCP = 33, /* Datagram Congestion Control Protocol */ +#define IPPROTO_DCCP IPPROTO_DCCP + IPPROTO_IPV6 = 41, /* IPv6-in-IPv4 tunnelling */ +#define IPPROTO_IPV6 IPPROTO_IPV6 + IPPROTO_RSVP = 46, /* RSVP Protocol */ +#define IPPROTO_RSVP IPPROTO_RSVP + IPPROTO_GRE = 47, /* Cisco GRE tunnels (rfc 1701,1702) */ +#define IPPROTO_GRE IPPROTO_GRE + IPPROTO_ESP = 50, /* Encapsulation Security Payload protocol */ +#define IPPROTO_ESP IPPROTO_ESP + IPPROTO_AH = 51, /* Authentication Header protocol */ +#define IPPROTO_AH IPPROTO_AH + IPPROTO_MTP = 92, /* Multicast Transport Protocol */ +#define IPPROTO_MTP IPPROTO_MTP + IPPROTO_BEETPH = 94, /* IP option pseudo header for BEET */ +#define IPPROTO_BEETPH IPPROTO_BEETPH + IPPROTO_ENCAP = 98, /* Encapsulation Header */ +#define IPPROTO_ENCAP IPPROTO_ENCAP + IPPROTO_PIM = 103, /* Protocol Independent Multicast */ +#define IPPROTO_PIM IPPROTO_PIM + IPPROTO_COMP = 108, /* Compression Header Protocol */ +#define IPPROTO_COMP IPPROTO_COMP + IPPROTO_L2TP = 115, /* Layer 2 Tunnelling Protocol */ +#define IPPROTO_L2TP IPPROTO_L2TP + IPPROTO_SCTP = 132, /* Stream Control Transport Protocol */ +#define IPPROTO_SCTP IPPROTO_SCTP + IPPROTO_UDPLITE = 136, /* UDP-Lite (RFC 3828) */ +#define IPPROTO_UDPLITE IPPROTO_UDPLITE + IPPROTO_MPLS = 137, /* MPLS in IP (RFC 4023) */ +#define IPPROTO_MPLS IPPROTO_MPLS + IPPROTO_ETHERNET = 143, /* Ethernet-within-IPv6 Encapsulation */ +#define IPPROTO_ETHERNET IPPROTO_ETHERNET + IPPROTO_RAW = 255, /* Raw IP packets */ +#define IPPROTO_RAW IPPROTO_RAW + IPPROTO_MPTCP = 262, /* Multipath TCP connection */ +#define IPPROTO_MPTCP IPPROTO_MPTCP + IPPROTO_MAX +}; +#endif + +#if __UAPI_DEF_IN_ADDR +/* Internet address. */ +struct in_addr { + __be32 s_addr; +}; +#endif + +#define IP_TOS 1 +#define IP_TTL 2 +#define IP_HDRINCL 3 +#define IP_OPTIONS 4 +#define IP_ROUTER_ALERT 5 +#define IP_RECVOPTS 6 +#define IP_RETOPTS 7 +#define IP_PKTINFO 8 +#define IP_PKTOPTIONS 9 +#define IP_MTU_DISCOVER 10 +#define IP_RECVERR 11 +#define IP_RECVTTL 12 +#define IP_RECVTOS 13 +#define IP_MTU 14 +#define IP_FREEBIND 15 +#define IP_IPSEC_POLICY 16 +#define IP_XFRM_POLICY 17 +#define IP_PASSSEC 18 +#define IP_TRANSPARENT 19 + +/* BSD compatibility */ +#define IP_RECVRETOPTS IP_RETOPTS + +/* TProxy original addresses */ +#define IP_ORIGDSTADDR 20 +#define IP_RECVORIGDSTADDR IP_ORIGDSTADDR + +#define IP_MINTTL 21 +#define IP_NODEFRAG 22 +#define IP_CHECKSUM 23 +#define IP_BIND_ADDRESS_NO_PORT 24 +#define IP_RECVFRAGSIZE 25 +#define IP_RECVERR_RFC4884 26 + +/* IP_MTU_DISCOVER values */ +#define IP_PMTUDISC_DONT 0 /* Never send DF frames */ +#define IP_PMTUDISC_WANT 1 /* Use per route hints */ +#define IP_PMTUDISC_DO 2 /* Always DF */ +#define IP_PMTUDISC_PROBE 3 /* Ignore dst pmtu */ +/* Always use interface mtu (ignores dst pmtu) but don't set DF flag. + * Also incoming ICMP frag_needed notifications will be ignored on + * this socket to prevent accepting spoofed ones. + */ +#define IP_PMTUDISC_INTERFACE 4 +/* weaker version of IP_PMTUDISC_INTERFACE, which allows packets to get + * fragmented if they exeed the interface mtu + */ +#define IP_PMTUDISC_OMIT 5 + +#define IP_MULTICAST_IF 32 +#define IP_MULTICAST_TTL 33 +#define IP_MULTICAST_LOOP 34 +#define IP_ADD_MEMBERSHIP 35 +#define IP_DROP_MEMBERSHIP 36 +#define IP_UNBLOCK_SOURCE 37 +#define IP_BLOCK_SOURCE 38 +#define IP_ADD_SOURCE_MEMBERSHIP 39 +#define IP_DROP_SOURCE_MEMBERSHIP 40 +#define IP_MSFILTER 41 +#define MCAST_JOIN_GROUP 42 +#define MCAST_BLOCK_SOURCE 43 +#define MCAST_UNBLOCK_SOURCE 44 +#define MCAST_LEAVE_GROUP 45 +#define MCAST_JOIN_SOURCE_GROUP 46 +#define MCAST_LEAVE_SOURCE_GROUP 47 +#define MCAST_MSFILTER 48 +#define IP_MULTICAST_ALL 49 +#define IP_UNICAST_IF 50 + +#define MCAST_EXCLUDE 0 +#define MCAST_INCLUDE 1 + +/* These need to appear somewhere around here */ +#define IP_DEFAULT_MULTICAST_TTL 1 +#define IP_DEFAULT_MULTICAST_LOOP 1 + +/* Request struct for multicast socket ops */ + +#if __UAPI_DEF_IP_MREQ +struct ip_mreq { + struct in_addr imr_multiaddr; /* IP multicast address of group */ + struct in_addr imr_interface; /* local IP address of interface */ +}; + +struct ip_mreqn { + struct in_addr imr_multiaddr; /* IP multicast address of group */ + struct in_addr imr_address; /* local IP address of interface */ + int imr_ifindex; /* Interface index */ +}; + +struct ip_mreq_source { + __be32 imr_multiaddr; + __be32 imr_interface; + __be32 imr_sourceaddr; +}; + +struct ip_msfilter { + __be32 imsf_multiaddr; + __be32 imsf_interface; + __u32 imsf_fmode; + __u32 imsf_numsrc; + union { + __be32 imsf_slist[1]; + __DECLARE_FLEX_ARRAY(__be32, imsf_slist_flex); + }; +}; + +#define IP_MSFILTER_SIZE(numsrc) \ + (sizeof(struct ip_msfilter) - sizeof(__u32) \ + + (numsrc) * sizeof(__u32)) + +struct group_req { + __u32 gr_interface; /* interface index */ + struct __kernel_sockaddr_storage gr_group; /* group address */ +}; + +struct group_source_req { + __u32 gsr_interface; /* interface index */ + struct __kernel_sockaddr_storage gsr_group; /* group address */ + struct __kernel_sockaddr_storage gsr_source; /* source address */ +}; + +struct group_filter { + union { + struct { + __u32 gf_interface_aux; /* interface index */ + struct __kernel_sockaddr_storage gf_group_aux; /* multicast address */ + __u32 gf_fmode_aux; /* filter mode */ + __u32 gf_numsrc_aux; /* number of sources */ + struct __kernel_sockaddr_storage gf_slist[1]; /* interface index */ + }; + struct { + __u32 gf_interface; /* interface index */ + struct __kernel_sockaddr_storage gf_group; /* multicast address */ + __u32 gf_fmode; /* filter mode */ + __u32 gf_numsrc; /* number of sources */ + struct __kernel_sockaddr_storage gf_slist_flex[]; /* interface index */ + }; + }; +}; + +#define GROUP_FILTER_SIZE(numsrc) \ + (sizeof(struct group_filter) - sizeof(struct __kernel_sockaddr_storage) \ + + (numsrc) * sizeof(struct __kernel_sockaddr_storage)) +#endif + +#if __UAPI_DEF_IN_PKTINFO +struct in_pktinfo { + int ipi_ifindex; + struct in_addr ipi_spec_dst; + struct in_addr ipi_addr; +}; +#endif + +/* Structure describing an Internet (IP) socket address. */ +#if __UAPI_DEF_SOCKADDR_IN +#define __SOCK_SIZE__ 16 /* sizeof(struct sockaddr) */ +struct sockaddr_in { + __kernel_sa_family_t sin_family; /* Address family */ + __be16 sin_port; /* Port number */ + struct in_addr sin_addr; /* Internet address */ + + /* Pad to size of `struct sockaddr'. */ + unsigned char __pad[__SOCK_SIZE__ - sizeof(short int) - + sizeof(unsigned short int) - sizeof(struct in_addr)]; +}; +#define sin_zero __pad /* for BSD UNIX comp. -FvK */ +#endif + +#if __UAPI_DEF_IN_CLASS +/* + * Definitions of the bits in an Internet address integer. + * On subnets, host and network parts are found according + * to the subnet mask, not these masks. + */ +#define IN_CLASSA(a) ((((long int) (a)) & 0x80000000) == 0) +#define IN_CLASSA_NET 0xff000000 +#define IN_CLASSA_NSHIFT 24 +#define IN_CLASSA_HOST (0xffffffff & ~IN_CLASSA_NET) +#define IN_CLASSA_MAX 128 + +#define IN_CLASSB(a) ((((long int) (a)) & 0xc0000000) == 0x80000000) +#define IN_CLASSB_NET 0xffff0000 +#define IN_CLASSB_NSHIFT 16 +#define IN_CLASSB_HOST (0xffffffff & ~IN_CLASSB_NET) +#define IN_CLASSB_MAX 65536 + +#define IN_CLASSC(a) ((((long int) (a)) & 0xe0000000) == 0xc0000000) +#define IN_CLASSC_NET 0xffffff00 +#define IN_CLASSC_NSHIFT 8 +#define IN_CLASSC_HOST (0xffffffff & ~IN_CLASSC_NET) + +#define IN_CLASSD(a) ((((long int) (a)) & 0xf0000000) == 0xe0000000) +#define IN_MULTICAST(a) IN_CLASSD(a) +#define IN_MULTICAST_NET 0xe0000000 + +#define IN_BADCLASS(a) (((long int) (a) ) == (long int)0xffffffff) +#define IN_EXPERIMENTAL(a) IN_BADCLASS((a)) + +#define IN_CLASSE(a) ((((long int) (a)) & 0xf0000000) == 0xf0000000) +#define IN_CLASSE_NET 0xffffffff +#define IN_CLASSE_NSHIFT 0 + +/* Address to accept any incoming messages. */ +#define INADDR_ANY ((unsigned long int) 0x00000000) + +/* Address to send to all hosts. */ +#define INADDR_BROADCAST ((unsigned long int) 0xffffffff) + +/* Address indicating an error return. */ +#define INADDR_NONE ((unsigned long int) 0xffffffff) + +/* Dummy address for src of ICMP replies if no real address is set (RFC7600). */ +#define INADDR_DUMMY ((unsigned long int) 0xc0000008) + +/* Network number for local host loopback. */ +#define IN_LOOPBACKNET 127 + +/* Address to loopback in software to local host. */ +#define INADDR_LOOPBACK 0x7f000001 /* 127.0.0.1 */ +#define IN_LOOPBACK(a) ((((long int) (a)) & 0xff000000) == 0x7f000000) + +/* Defines for Multicast INADDR */ +#define INADDR_UNSPEC_GROUP 0xe0000000U /* 224.0.0.0 */ +#define INADDR_ALLHOSTS_GROUP 0xe0000001U /* 224.0.0.1 */ +#define INADDR_ALLRTRS_GROUP 0xe0000002U /* 224.0.0.2 */ +#define INADDR_ALLSNOOPERS_GROUP 0xe000006aU /* 224.0.0.106 */ +#define INADDR_MAX_LOCAL_GROUP 0xe00000ffU /* 224.0.0.255 */ +#endif + +/* contains the htonl type stuff.. */ +#include + + +#endif /* _UAPI_LINUX_IN_H */ diff --git a/src/basic/linux/in6.h b/src/basic/linux/in6.h new file mode 100644 index 0000000..c4c53a9 --- /dev/null +++ b/src/basic/linux/in6.h @@ -0,0 +1,302 @@ +/* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */ +/* + * Types and definitions for AF_INET6 + * Linux INET6 implementation + * + * Authors: + * Pedro Roque + * + * Sources: + * IPv6 Program Interfaces for BSD Systems + * + * + * Advanced Sockets API for IPv6 + * + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#ifndef _UAPI_LINUX_IN6_H +#define _UAPI_LINUX_IN6_H + +#include +#include + +/* + * IPv6 address structure + */ + +#if __UAPI_DEF_IN6_ADDR +struct in6_addr { + union { + __u8 u6_addr8[16]; +#if __UAPI_DEF_IN6_ADDR_ALT + __be16 u6_addr16[8]; + __be32 u6_addr32[4]; +#endif + } in6_u; +#define s6_addr in6_u.u6_addr8 +#if __UAPI_DEF_IN6_ADDR_ALT +#define s6_addr16 in6_u.u6_addr16 +#define s6_addr32 in6_u.u6_addr32 +#endif +}; +#endif /* __UAPI_DEF_IN6_ADDR */ + +#if __UAPI_DEF_SOCKADDR_IN6 +struct sockaddr_in6 { + unsigned short int sin6_family; /* AF_INET6 */ + __be16 sin6_port; /* Transport layer port # */ + __be32 sin6_flowinfo; /* IPv6 flow information */ + struct in6_addr sin6_addr; /* IPv6 address */ + __u32 sin6_scope_id; /* scope id (new in RFC2553) */ +}; +#endif /* __UAPI_DEF_SOCKADDR_IN6 */ + +#if __UAPI_DEF_IPV6_MREQ +struct ipv6_mreq { + /* IPv6 multicast address of group */ + struct in6_addr ipv6mr_multiaddr; + + /* local IPv6 address of interface */ + int ipv6mr_ifindex; +}; +#endif /* __UAPI_DEF_IVP6_MREQ */ + +#define ipv6mr_acaddr ipv6mr_multiaddr + +struct in6_flowlabel_req { + struct in6_addr flr_dst; + __be32 flr_label; + __u8 flr_action; + __u8 flr_share; + __u16 flr_flags; + __u16 flr_expires; + __u16 flr_linger; + __u32 __flr_pad; + /* Options in format of IPV6_PKTOPTIONS */ +}; + +#define IPV6_FL_A_GET 0 +#define IPV6_FL_A_PUT 1 +#define IPV6_FL_A_RENEW 2 + +#define IPV6_FL_F_CREATE 1 +#define IPV6_FL_F_EXCL 2 +#define IPV6_FL_F_REFLECT 4 +#define IPV6_FL_F_REMOTE 8 + +#define IPV6_FL_S_NONE 0 +#define IPV6_FL_S_EXCL 1 +#define IPV6_FL_S_PROCESS 2 +#define IPV6_FL_S_USER 3 +#define IPV6_FL_S_ANY 255 + + +/* + * Bitmask constant declarations to help applications select out the + * flow label and priority fields. + * + * Note that this are in host byte order while the flowinfo field of + * sockaddr_in6 is in network byte order. + */ + +#define IPV6_FLOWINFO_FLOWLABEL 0x000fffff +#define IPV6_FLOWINFO_PRIORITY 0x0ff00000 + +/* These definitions are obsolete */ +#define IPV6_PRIORITY_UNCHARACTERIZED 0x0000 +#define IPV6_PRIORITY_FILLER 0x0100 +#define IPV6_PRIORITY_UNATTENDED 0x0200 +#define IPV6_PRIORITY_RESERVED1 0x0300 +#define IPV6_PRIORITY_BULK 0x0400 +#define IPV6_PRIORITY_RESERVED2 0x0500 +#define IPV6_PRIORITY_INTERACTIVE 0x0600 +#define IPV6_PRIORITY_CONTROL 0x0700 +#define IPV6_PRIORITY_8 0x0800 +#define IPV6_PRIORITY_9 0x0900 +#define IPV6_PRIORITY_10 0x0a00 +#define IPV6_PRIORITY_11 0x0b00 +#define IPV6_PRIORITY_12 0x0c00 +#define IPV6_PRIORITY_13 0x0d00 +#define IPV6_PRIORITY_14 0x0e00 +#define IPV6_PRIORITY_15 0x0f00 + +/* + * IPV6 extension headers + */ +#if __UAPI_DEF_IPPROTO_V6 +#define IPPROTO_HOPOPTS 0 /* IPv6 hop-by-hop options */ +#define IPPROTO_ROUTING 43 /* IPv6 routing header */ +#define IPPROTO_FRAGMENT 44 /* IPv6 fragmentation header */ +#define IPPROTO_ICMPV6 58 /* ICMPv6 */ +#define IPPROTO_NONE 59 /* IPv6 no next header */ +#define IPPROTO_DSTOPTS 60 /* IPv6 destination options */ +#define IPPROTO_MH 135 /* IPv6 mobility header */ +#endif /* __UAPI_DEF_IPPROTO_V6 */ + +/* + * IPv6 TLV options. + */ +#define IPV6_TLV_PAD1 0 +#define IPV6_TLV_PADN 1 +#define IPV6_TLV_ROUTERALERT 5 +#define IPV6_TLV_CALIPSO 7 /* RFC 5570 */ +#define IPV6_TLV_IOAM 49 /* TEMPORARY IANA allocation for IOAM */ +#define IPV6_TLV_JUMBO 194 +#define IPV6_TLV_HAO 201 /* home address option */ + +/* + * IPV6 socket options + */ +#if __UAPI_DEF_IPV6_OPTIONS +#define IPV6_ADDRFORM 1 +#define IPV6_2292PKTINFO 2 +#define IPV6_2292HOPOPTS 3 +#define IPV6_2292DSTOPTS 4 +#define IPV6_2292RTHDR 5 +#define IPV6_2292PKTOPTIONS 6 +#define IPV6_CHECKSUM 7 +#define IPV6_2292HOPLIMIT 8 +#define IPV6_NEXTHOP 9 +#define IPV6_AUTHHDR 10 /* obsolete */ +#define IPV6_FLOWINFO 11 + +#define IPV6_UNICAST_HOPS 16 +#define IPV6_MULTICAST_IF 17 +#define IPV6_MULTICAST_HOPS 18 +#define IPV6_MULTICAST_LOOP 19 +#define IPV6_ADD_MEMBERSHIP 20 +#define IPV6_DROP_MEMBERSHIP 21 +#define IPV6_ROUTER_ALERT 22 +#define IPV6_MTU_DISCOVER 23 +#define IPV6_MTU 24 +#define IPV6_RECVERR 25 +#define IPV6_V6ONLY 26 +#define IPV6_JOIN_ANYCAST 27 +#define IPV6_LEAVE_ANYCAST 28 +#define IPV6_MULTICAST_ALL 29 +#define IPV6_ROUTER_ALERT_ISOLATE 30 +#define IPV6_RECVERR_RFC4884 31 + +/* IPV6_MTU_DISCOVER values */ +#define IPV6_PMTUDISC_DONT 0 +#define IPV6_PMTUDISC_WANT 1 +#define IPV6_PMTUDISC_DO 2 +#define IPV6_PMTUDISC_PROBE 3 +/* same as IPV6_PMTUDISC_PROBE, provided for symetry with IPv4 + * also see comments on IP_PMTUDISC_INTERFACE + */ +#define IPV6_PMTUDISC_INTERFACE 4 +/* weaker version of IPV6_PMTUDISC_INTERFACE, which allows packets to + * get fragmented if they exceed the interface mtu + */ +#define IPV6_PMTUDISC_OMIT 5 + +/* Flowlabel */ +#define IPV6_FLOWLABEL_MGR 32 +#define IPV6_FLOWINFO_SEND 33 + +#define IPV6_IPSEC_POLICY 34 +#define IPV6_XFRM_POLICY 35 +#define IPV6_HDRINCL 36 +#endif + +/* + * Multicast: + * Following socket options are shared between IPv4 and IPv6. + * + * MCAST_JOIN_GROUP 42 + * MCAST_BLOCK_SOURCE 43 + * MCAST_UNBLOCK_SOURCE 44 + * MCAST_LEAVE_GROUP 45 + * MCAST_JOIN_SOURCE_GROUP 46 + * MCAST_LEAVE_SOURCE_GROUP 47 + * MCAST_MSFILTER 48 + */ + +/* + * Advanced API (RFC3542) (1) + * + * Note: IPV6_RECVRTHDRDSTOPTS does not exist. see net/ipv6/datagram.c. + */ + +#define IPV6_RECVPKTINFO 49 +#define IPV6_PKTINFO 50 +#define IPV6_RECVHOPLIMIT 51 +#define IPV6_HOPLIMIT 52 +#define IPV6_RECVHOPOPTS 53 +#define IPV6_HOPOPTS 54 +#define IPV6_RTHDRDSTOPTS 55 +#define IPV6_RECVRTHDR 56 +#define IPV6_RTHDR 57 +#define IPV6_RECVDSTOPTS 58 +#define IPV6_DSTOPTS 59 +#define IPV6_RECVPATHMTU 60 +#define IPV6_PATHMTU 61 +#define IPV6_DONTFRAG 62 +#if 0 /* not yet */ +#define IPV6_USE_MIN_MTU 63 +#endif + +/* + * Netfilter (1) + * + * Following socket options are used in ip6_tables; + * see include/linux/netfilter_ipv6/ip6_tables.h. + * + * IP6T_SO_SET_REPLACE / IP6T_SO_GET_INFO 64 + * IP6T_SO_SET_ADD_COUNTERS / IP6T_SO_GET_ENTRIES 65 + */ + +/* + * Advanced API (RFC3542) (2) + */ +#define IPV6_RECVTCLASS 66 +#define IPV6_TCLASS 67 + +/* + * Netfilter (2) + * + * Following socket options are used in ip6_tables; + * see include/linux/netfilter_ipv6/ip6_tables.h. + * + * IP6T_SO_GET_REVISION_MATCH 68 + * IP6T_SO_GET_REVISION_TARGET 69 + * IP6T_SO_ORIGINAL_DST 80 + */ + +#define IPV6_AUTOFLOWLABEL 70 +/* RFC5014: Source address selection */ +#define IPV6_ADDR_PREFERENCES 72 + +#define IPV6_PREFER_SRC_TMP 0x0001 +#define IPV6_PREFER_SRC_PUBLIC 0x0002 +#define IPV6_PREFER_SRC_PUBTMP_DEFAULT 0x0100 +#define IPV6_PREFER_SRC_COA 0x0004 +#define IPV6_PREFER_SRC_HOME 0x0400 +#define IPV6_PREFER_SRC_CGA 0x0008 +#define IPV6_PREFER_SRC_NONCGA 0x0800 + +/* RFC5082: Generalized Ttl Security Mechanism */ +#define IPV6_MINHOPCOUNT 73 + +#define IPV6_ORIGDSTADDR 74 +#define IPV6_RECVORIGDSTADDR IPV6_ORIGDSTADDR +#define IPV6_TRANSPARENT 75 +#define IPV6_UNICAST_IF 76 +#define IPV6_RECVFRAGSIZE 77 +#define IPV6_FREEBIND 78 + +/* + * Multicast Routing: + * see include/uapi/linux/mroute6.h. + * + * MRT6_BASE 200 + * ... + * MRT6_MAX + */ +#endif /* _UAPI_LINUX_IN6_H */ diff --git a/src/basic/linux/ipv6_route.h b/src/basic/linux/ipv6_route.h new file mode 100644 index 0000000..593800a --- /dev/null +++ b/src/basic/linux/ipv6_route.h @@ -0,0 +1,64 @@ +/* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */ +/* + * Linux INET6 implementation + * + * Authors: + * Pedro Roque + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#ifndef _UAPI_LINUX_IPV6_ROUTE_H +#define _UAPI_LINUX_IPV6_ROUTE_H + +#include +#include /* For struct in6_addr. */ + +#define RTF_DEFAULT 0x00010000 /* default - learned via ND */ +#define RTF_ALLONLINK 0x00020000 /* (deprecated and will be removed) + fallback, no routers on link */ +#define RTF_ADDRCONF 0x00040000 /* addrconf route - RA */ +#define RTF_PREFIX_RT 0x00080000 /* A prefix only route - RA */ +#define RTF_ANYCAST 0x00100000 /* Anycast */ + +#define RTF_NONEXTHOP 0x00200000 /* route with no nexthop */ +#define RTF_EXPIRES 0x00400000 + +#define RTF_ROUTEINFO 0x00800000 /* route information - RA */ + +#define RTF_CACHE 0x01000000 /* read-only: can not be set by user */ +#define RTF_FLOW 0x02000000 /* flow significant route */ +#define RTF_POLICY 0x04000000 /* policy route */ + +#define RTF_PREF(pref) ((pref) << 27) +#define RTF_PREF_MASK 0x18000000 + +#define RTF_PCPU 0x40000000 /* read-only: can not be set by user */ +#define RTF_LOCAL 0x80000000 + + +struct in6_rtmsg { + struct in6_addr rtmsg_dst; + struct in6_addr rtmsg_src; + struct in6_addr rtmsg_gateway; + __u32 rtmsg_type; + __u16 rtmsg_dst_len; + __u16 rtmsg_src_len; + __u32 rtmsg_metric; + unsigned long rtmsg_info; + __u32 rtmsg_flags; + int rtmsg_ifindex; +}; + +#define RTMSG_NEWDEVICE 0x11 +#define RTMSG_DELDEVICE 0x12 +#define RTMSG_NEWROUTE 0x21 +#define RTMSG_DELROUTE 0x22 + +#define IP6_RT_PRIO_USER 1024 +#define IP6_RT_PRIO_ADDRCONF 256 + +#endif /* _UAPI_LINUX_IPV6_ROUTE_H */ diff --git a/src/basic/linux/l2tp.h b/src/basic/linux/l2tp.h new file mode 100644 index 0000000..7d81c3e --- /dev/null +++ b/src/basic/linux/l2tp.h @@ -0,0 +1,203 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * L2TP-over-IP socket for L2TPv3. + * + * Author: James Chapman + */ + +#ifndef _UAPI_LINUX_L2TP_H_ +#define _UAPI_LINUX_L2TP_H_ + +#include +#include +#include +#include + +/** + * struct sockaddr_l2tpip - the sockaddr structure for L2TP-over-IP sockets + * @l2tp_family: address family number AF_L2TPIP. + * @l2tp_addr: protocol specific address information + * @l2tp_conn_id: connection id of tunnel + */ +#define __SOCK_SIZE__ 16 /* sizeof(struct sockaddr) */ +struct sockaddr_l2tpip { + /* The first fields must match struct sockaddr_in */ + __kernel_sa_family_t l2tp_family; /* AF_INET */ + __be16 l2tp_unused; /* INET port number (unused) */ + struct in_addr l2tp_addr; /* Internet address */ + + __u32 l2tp_conn_id; /* Connection ID of tunnel */ + + /* Pad to size of `struct sockaddr'. */ + unsigned char __pad[__SOCK_SIZE__ - + sizeof(__kernel_sa_family_t) - + sizeof(__be16) - sizeof(struct in_addr) - + sizeof(__u32)]; +}; + +/** + * struct sockaddr_l2tpip6 - the sockaddr structure for L2TP-over-IPv6 sockets + * @l2tp_family: address family number AF_L2TPIP. + * @l2tp_addr: protocol specific address information + * @l2tp_conn_id: connection id of tunnel + */ +struct sockaddr_l2tpip6 { + /* The first fields must match struct sockaddr_in6 */ + __kernel_sa_family_t l2tp_family; /* AF_INET6 */ + __be16 l2tp_unused; /* INET port number (unused) */ + __be32 l2tp_flowinfo; /* IPv6 flow information */ + struct in6_addr l2tp_addr; /* IPv6 address */ + __u32 l2tp_scope_id; /* scope id (new in RFC2553) */ + __u32 l2tp_conn_id; /* Connection ID of tunnel */ +}; + +/***************************************************************************** + * NETLINK_GENERIC netlink family. + *****************************************************************************/ + +/* + * Commands. + * Valid TLVs of each command are:- + * TUNNEL_CREATE - CONN_ID, pw_type, netns, ifname, ipinfo, udpinfo, udpcsum + * TUNNEL_DELETE - CONN_ID + * TUNNEL_MODIFY - CONN_ID, udpcsum + * TUNNEL_GETSTATS - CONN_ID, (stats) + * TUNNEL_GET - CONN_ID, (...) + * SESSION_CREATE - SESSION_ID, PW_TYPE, cookie, peer_cookie, l2spec + * SESSION_DELETE - SESSION_ID + * SESSION_MODIFY - SESSION_ID + * SESSION_GET - SESSION_ID, (...) + * SESSION_GETSTATS - SESSION_ID, (stats) + * + */ +enum { + L2TP_CMD_NOOP, + L2TP_CMD_TUNNEL_CREATE, + L2TP_CMD_TUNNEL_DELETE, + L2TP_CMD_TUNNEL_MODIFY, + L2TP_CMD_TUNNEL_GET, + L2TP_CMD_SESSION_CREATE, + L2TP_CMD_SESSION_DELETE, + L2TP_CMD_SESSION_MODIFY, + L2TP_CMD_SESSION_GET, + __L2TP_CMD_MAX, +}; + +#define L2TP_CMD_MAX (__L2TP_CMD_MAX - 1) + +/* + * ATTR types defined for L2TP + */ +enum { + L2TP_ATTR_NONE, /* no data */ + L2TP_ATTR_PW_TYPE, /* u16, enum l2tp_pwtype */ + L2TP_ATTR_ENCAP_TYPE, /* u16, enum l2tp_encap_type */ + L2TP_ATTR_OFFSET, /* u16 (not used) */ + L2TP_ATTR_DATA_SEQ, /* u16 (not used) */ + L2TP_ATTR_L2SPEC_TYPE, /* u8, enum l2tp_l2spec_type */ + L2TP_ATTR_L2SPEC_LEN, /* u8 (not used) */ + L2TP_ATTR_PROTO_VERSION, /* u8 */ + L2TP_ATTR_IFNAME, /* string */ + L2TP_ATTR_CONN_ID, /* u32 */ + L2TP_ATTR_PEER_CONN_ID, /* u32 */ + L2TP_ATTR_SESSION_ID, /* u32 */ + L2TP_ATTR_PEER_SESSION_ID, /* u32 */ + L2TP_ATTR_UDP_CSUM, /* u8 */ + L2TP_ATTR_VLAN_ID, /* u16 (not used) */ + L2TP_ATTR_COOKIE, /* 0, 4 or 8 bytes */ + L2TP_ATTR_PEER_COOKIE, /* 0, 4 or 8 bytes */ + L2TP_ATTR_DEBUG, /* u32, enum l2tp_debug_flags (not used) */ + L2TP_ATTR_RECV_SEQ, /* u8 */ + L2TP_ATTR_SEND_SEQ, /* u8 */ + L2TP_ATTR_LNS_MODE, /* u8 */ + L2TP_ATTR_USING_IPSEC, /* u8 */ + L2TP_ATTR_RECV_TIMEOUT, /* msec */ + L2TP_ATTR_FD, /* int */ + L2TP_ATTR_IP_SADDR, /* u32 */ + L2TP_ATTR_IP_DADDR, /* u32 */ + L2TP_ATTR_UDP_SPORT, /* u16 */ + L2TP_ATTR_UDP_DPORT, /* u16 */ + L2TP_ATTR_MTU, /* u16 (not used) */ + L2TP_ATTR_MRU, /* u16 (not used) */ + L2TP_ATTR_STATS, /* nested */ + L2TP_ATTR_IP6_SADDR, /* struct in6_addr */ + L2TP_ATTR_IP6_DADDR, /* struct in6_addr */ + L2TP_ATTR_UDP_ZERO_CSUM6_TX, /* flag */ + L2TP_ATTR_UDP_ZERO_CSUM6_RX, /* flag */ + L2TP_ATTR_PAD, + __L2TP_ATTR_MAX, +}; + +#define L2TP_ATTR_MAX (__L2TP_ATTR_MAX - 1) + +/* Nested in L2TP_ATTR_STATS */ +enum { + L2TP_ATTR_STATS_NONE, /* no data */ + L2TP_ATTR_TX_PACKETS, /* u64 */ + L2TP_ATTR_TX_BYTES, /* u64 */ + L2TP_ATTR_TX_ERRORS, /* u64 */ + L2TP_ATTR_RX_PACKETS, /* u64 */ + L2TP_ATTR_RX_BYTES, /* u64 */ + L2TP_ATTR_RX_SEQ_DISCARDS, /* u64 */ + L2TP_ATTR_RX_OOS_PACKETS, /* u64 */ + L2TP_ATTR_RX_ERRORS, /* u64 */ + L2TP_ATTR_STATS_PAD, + L2TP_ATTR_RX_COOKIE_DISCARDS, /* u64 */ + L2TP_ATTR_RX_INVALID, /* u64 */ + __L2TP_ATTR_STATS_MAX, +}; + +#define L2TP_ATTR_STATS_MAX (__L2TP_ATTR_STATS_MAX - 1) + +enum l2tp_pwtype { + L2TP_PWTYPE_NONE = 0x0000, + L2TP_PWTYPE_ETH_VLAN = 0x0004, + L2TP_PWTYPE_ETH = 0x0005, + L2TP_PWTYPE_PPP = 0x0007, + L2TP_PWTYPE_PPP_AC = 0x0008, + L2TP_PWTYPE_IP = 0x000b, + __L2TP_PWTYPE_MAX +}; + +enum l2tp_l2spec_type { + L2TP_L2SPECTYPE_NONE, + L2TP_L2SPECTYPE_DEFAULT, +}; + +enum l2tp_encap_type { + L2TP_ENCAPTYPE_UDP, + L2TP_ENCAPTYPE_IP, +}; + +/* For L2TP_ATTR_DATA_SEQ. Unused. */ +enum l2tp_seqmode { + L2TP_SEQ_NONE = 0, + L2TP_SEQ_IP = 1, + L2TP_SEQ_ALL = 2, +}; + +/** + * enum l2tp_debug_flags - debug message categories for L2TP tunnels/sessions. + * + * Unused. + * + * @L2TP_MSG_DEBUG: verbose debug (if compiled in) + * @L2TP_MSG_CONTROL: userspace - kernel interface + * @L2TP_MSG_SEQ: sequence numbers + * @L2TP_MSG_DATA: data packets + */ +enum l2tp_debug_flags { + L2TP_MSG_DEBUG = (1 << 0), + L2TP_MSG_CONTROL = (1 << 1), + L2TP_MSG_SEQ = (1 << 2), + L2TP_MSG_DATA = (1 << 3), +}; + +/* + * NETLINK_GENERIC related info + */ +#define L2TP_GENL_NAME "l2tp" +#define L2TP_GENL_VERSION 0x1 +#define L2TP_GENL_MCGROUP "l2tp" + +#endif /* _UAPI_LINUX_L2TP_H_ */ diff --git a/src/basic/linux/libc-compat.h b/src/basic/linux/libc-compat.h new file mode 100644 index 0000000..8254c93 --- /dev/null +++ b/src/basic/linux/libc-compat.h @@ -0,0 +1,267 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * Compatibility interface for userspace libc header coordination: + * + * Define compatibility macros that are used to control the inclusion or + * exclusion of UAPI structures and definitions in coordination with another + * userspace C library. + * + * This header is intended to solve the problem of UAPI definitions that + * conflict with userspace definitions. If a UAPI header has such conflicting + * definitions then the solution is as follows: + * + * * Synchronize the UAPI header and the libc headers so either one can be + * used and such that the ABI is preserved. If this is not possible then + * no simple compatibility interface exists (you need to write translating + * wrappers and rename things) and you can't use this interface. + * + * Then follow this process: + * + * (a) Include libc-compat.h in the UAPI header. + * e.g. #include + * This include must be as early as possible. + * + * (b) In libc-compat.h add enough code to detect that the comflicting + * userspace libc header has been included first. + * + * (c) If the userspace libc header has been included first define a set of + * guard macros of the form __UAPI_DEF_FOO and set their values to 1, else + * set their values to 0. + * + * (d) Back in the UAPI header with the conflicting definitions, guard the + * definitions with: + * #if __UAPI_DEF_FOO + * ... + * #endif + * + * This fixes the situation where the linux headers are included *after* the + * libc headers. To fix the problem with the inclusion in the other order the + * userspace libc headers must be fixed like this: + * + * * For all definitions that conflict with kernel definitions wrap those + * defines in the following: + * #if !__UAPI_DEF_FOO + * ... + * #endif + * + * This prevents the redefinition of a construct already defined by the kernel. + */ +#ifndef _UAPI_LIBC_COMPAT_H +#define _UAPI_LIBC_COMPAT_H + +/* We have included glibc headers... */ +#if defined(__GLIBC__) + +/* Coordinate with glibc net/if.h header. */ +#if defined(_NET_IF_H) && defined(__USE_MISC) + +/* GLIBC headers included first so don't define anything + * that would already be defined. */ + +#define __UAPI_DEF_IF_IFCONF 0 +#define __UAPI_DEF_IF_IFMAP 0 +#define __UAPI_DEF_IF_IFNAMSIZ 0 +#define __UAPI_DEF_IF_IFREQ 0 +/* Everything up to IFF_DYNAMIC, matches net/if.h until glibc 2.23 */ +#define __UAPI_DEF_IF_NET_DEVICE_FLAGS 0 +/* For the future if glibc adds IFF_LOWER_UP, IFF_DORMANT and IFF_ECHO */ +#ifndef __UAPI_DEF_IF_NET_DEVICE_FLAGS_LOWER_UP_DORMANT_ECHO +#define __UAPI_DEF_IF_NET_DEVICE_FLAGS_LOWER_UP_DORMANT_ECHO 1 +#endif /* __UAPI_DEF_IF_NET_DEVICE_FLAGS_LOWER_UP_DORMANT_ECHO */ + +#else /* _NET_IF_H */ + +/* Linux headers included first, and we must define everything + * we need. The expectation is that glibc will check the + * __UAPI_DEF_* defines and adjust appropriately. */ + +#define __UAPI_DEF_IF_IFCONF 1 +#define __UAPI_DEF_IF_IFMAP 1 +#define __UAPI_DEF_IF_IFNAMSIZ 1 +#define __UAPI_DEF_IF_IFREQ 1 +/* Everything up to IFF_DYNAMIC, matches net/if.h until glibc 2.23 */ +#define __UAPI_DEF_IF_NET_DEVICE_FLAGS 1 +/* For the future if glibc adds IFF_LOWER_UP, IFF_DORMANT and IFF_ECHO */ +#define __UAPI_DEF_IF_NET_DEVICE_FLAGS_LOWER_UP_DORMANT_ECHO 1 + +#endif /* _NET_IF_H */ + +/* Coordinate with glibc netinet/in.h header. */ +#if defined(_NETINET_IN_H) + +/* GLIBC headers included first so don't define anything + * that would already be defined. */ +#define __UAPI_DEF_IN_ADDR 0 +#define __UAPI_DEF_IN_IPPROTO 0 +#define __UAPI_DEF_IN_PKTINFO 0 +#define __UAPI_DEF_IP_MREQ 0 +#define __UAPI_DEF_SOCKADDR_IN 0 +#define __UAPI_DEF_IN_CLASS 0 + +#define __UAPI_DEF_IN6_ADDR 0 +/* The exception is the in6_addr macros which must be defined + * if the glibc code didn't define them. This guard matches + * the guard in glibc/inet/netinet/in.h which defines the + * additional in6_addr macros e.g. s6_addr16, and s6_addr32. */ +#if defined(__USE_MISC) || defined (__USE_GNU) +#define __UAPI_DEF_IN6_ADDR_ALT 0 +#else +#define __UAPI_DEF_IN6_ADDR_ALT 1 +#endif +#define __UAPI_DEF_SOCKADDR_IN6 0 +#define __UAPI_DEF_IPV6_MREQ 0 +#define __UAPI_DEF_IPPROTO_V6 0 +#define __UAPI_DEF_IPV6_OPTIONS 0 +#define __UAPI_DEF_IN6_PKTINFO 0 +#define __UAPI_DEF_IP6_MTUINFO 0 + +#else + +/* Linux headers included first, and we must define everything + * we need. The expectation is that glibc will check the + * __UAPI_DEF_* defines and adjust appropriately. */ +#define __UAPI_DEF_IN_ADDR 1 +#define __UAPI_DEF_IN_IPPROTO 1 +#define __UAPI_DEF_IN_PKTINFO 1 +#define __UAPI_DEF_IP_MREQ 1 +#define __UAPI_DEF_SOCKADDR_IN 1 +#define __UAPI_DEF_IN_CLASS 1 + +#define __UAPI_DEF_IN6_ADDR 1 +/* We unconditionally define the in6_addr macros and glibc must + * coordinate. */ +#define __UAPI_DEF_IN6_ADDR_ALT 1 +#define __UAPI_DEF_SOCKADDR_IN6 1 +#define __UAPI_DEF_IPV6_MREQ 1 +#define __UAPI_DEF_IPPROTO_V6 1 +#define __UAPI_DEF_IPV6_OPTIONS 1 +#define __UAPI_DEF_IN6_PKTINFO 1 +#define __UAPI_DEF_IP6_MTUINFO 1 + +#endif /* _NETINET_IN_H */ + +/* Coordinate with glibc netipx/ipx.h header. */ +#if defined(__NETIPX_IPX_H) + +#define __UAPI_DEF_SOCKADDR_IPX 0 +#define __UAPI_DEF_IPX_ROUTE_DEFINITION 0 +#define __UAPI_DEF_IPX_INTERFACE_DEFINITION 0 +#define __UAPI_DEF_IPX_CONFIG_DATA 0 +#define __UAPI_DEF_IPX_ROUTE_DEF 0 + +#else /* defined(__NETIPX_IPX_H) */ + +#define __UAPI_DEF_SOCKADDR_IPX 1 +#define __UAPI_DEF_IPX_ROUTE_DEFINITION 1 +#define __UAPI_DEF_IPX_INTERFACE_DEFINITION 1 +#define __UAPI_DEF_IPX_CONFIG_DATA 1 +#define __UAPI_DEF_IPX_ROUTE_DEF 1 + +#endif /* defined(__NETIPX_IPX_H) */ + +/* Definitions for xattr.h */ +#if defined(_SYS_XATTR_H) +#define __UAPI_DEF_XATTR 0 +#else +#define __UAPI_DEF_XATTR 1 +#endif + +/* If we did not see any headers from any supported C libraries, + * or we are being included in the kernel, then define everything + * that we need. Check for previous __UAPI_* definitions to give + * unsupported C libraries a way to opt out of any kernel definition. */ +#else /* !defined(__GLIBC__) */ + +/* Definitions for if.h */ +#ifndef __UAPI_DEF_IF_IFCONF +#define __UAPI_DEF_IF_IFCONF 1 +#endif +#ifndef __UAPI_DEF_IF_IFMAP +#define __UAPI_DEF_IF_IFMAP 1 +#endif +#ifndef __UAPI_DEF_IF_IFNAMSIZ +#define __UAPI_DEF_IF_IFNAMSIZ 1 +#endif +#ifndef __UAPI_DEF_IF_IFREQ +#define __UAPI_DEF_IF_IFREQ 1 +#endif +/* Everything up to IFF_DYNAMIC, matches net/if.h until glibc 2.23 */ +#ifndef __UAPI_DEF_IF_NET_DEVICE_FLAGS +#define __UAPI_DEF_IF_NET_DEVICE_FLAGS 1 +#endif +/* For the future if glibc adds IFF_LOWER_UP, IFF_DORMANT and IFF_ECHO */ +#ifndef __UAPI_DEF_IF_NET_DEVICE_FLAGS_LOWER_UP_DORMANT_ECHO +#define __UAPI_DEF_IF_NET_DEVICE_FLAGS_LOWER_UP_DORMANT_ECHO 1 +#endif + +/* Definitions for in.h */ +#ifndef __UAPI_DEF_IN_ADDR +#define __UAPI_DEF_IN_ADDR 1 +#endif +#ifndef __UAPI_DEF_IN_IPPROTO +#define __UAPI_DEF_IN_IPPROTO 1 +#endif +#ifndef __UAPI_DEF_IN_PKTINFO +#define __UAPI_DEF_IN_PKTINFO 1 +#endif +#ifndef __UAPI_DEF_IP_MREQ +#define __UAPI_DEF_IP_MREQ 1 +#endif +#ifndef __UAPI_DEF_SOCKADDR_IN +#define __UAPI_DEF_SOCKADDR_IN 1 +#endif +#ifndef __UAPI_DEF_IN_CLASS +#define __UAPI_DEF_IN_CLASS 1 +#endif + +/* Definitions for in6.h */ +#ifndef __UAPI_DEF_IN6_ADDR +#define __UAPI_DEF_IN6_ADDR 1 +#endif +#ifndef __UAPI_DEF_IN6_ADDR_ALT +#define __UAPI_DEF_IN6_ADDR_ALT 1 +#endif +#ifndef __UAPI_DEF_SOCKADDR_IN6 +#define __UAPI_DEF_SOCKADDR_IN6 1 +#endif +#ifndef __UAPI_DEF_IPV6_MREQ +#define __UAPI_DEF_IPV6_MREQ 1 +#endif +#ifndef __UAPI_DEF_IPPROTO_V6 +#define __UAPI_DEF_IPPROTO_V6 1 +#endif +#ifndef __UAPI_DEF_IPV6_OPTIONS +#define __UAPI_DEF_IPV6_OPTIONS 1 +#endif +#ifndef __UAPI_DEF_IN6_PKTINFO +#define __UAPI_DEF_IN6_PKTINFO 1 +#endif +#ifndef __UAPI_DEF_IP6_MTUINFO +#define __UAPI_DEF_IP6_MTUINFO 1 +#endif + +/* Definitions for ipx.h */ +#ifndef __UAPI_DEF_SOCKADDR_IPX +#define __UAPI_DEF_SOCKADDR_IPX 1 +#endif +#ifndef __UAPI_DEF_IPX_ROUTE_DEFINITION +#define __UAPI_DEF_IPX_ROUTE_DEFINITION 1 +#endif +#ifndef __UAPI_DEF_IPX_INTERFACE_DEFINITION +#define __UAPI_DEF_IPX_INTERFACE_DEFINITION 1 +#endif +#ifndef __UAPI_DEF_IPX_CONFIG_DATA +#define __UAPI_DEF_IPX_CONFIG_DATA 1 +#endif +#ifndef __UAPI_DEF_IPX_ROUTE_DEF +#define __UAPI_DEF_IPX_ROUTE_DEF 1 +#endif + +/* Definitions for xattr.h */ +#ifndef __UAPI_DEF_XATTR +#define __UAPI_DEF_XATTR 1 +#endif + +#endif /* __GLIBC__ */ + +#endif /* _UAPI_LIBC_COMPAT_H */ diff --git a/src/basic/linux/mrp_bridge.h b/src/basic/linux/mrp_bridge.h new file mode 100644 index 0000000..bd4424d --- /dev/null +++ b/src/basic/linux/mrp_bridge.h @@ -0,0 +1,74 @@ +/* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */ + +#ifndef _UAPI_LINUX_MRP_BRIDGE_H_ +#define _UAPI_LINUX_MRP_BRIDGE_H_ + +#include +#include + +#define MRP_MAX_FRAME_LENGTH 200 +#define MRP_DEFAULT_PRIO 0x8000 +#define MRP_DOMAIN_UUID_LENGTH 16 +#define MRP_VERSION 1 +#define MRP_FRAME_PRIO 7 +#define MRP_OUI_LENGTH 3 +#define MRP_MANUFACTURE_DATA_LENGTH 2 + +enum br_mrp_ring_role_type { + BR_MRP_RING_ROLE_DISABLED, + BR_MRP_RING_ROLE_MRC, + BR_MRP_RING_ROLE_MRM, + BR_MRP_RING_ROLE_MRA, +}; + +enum br_mrp_in_role_type { + BR_MRP_IN_ROLE_DISABLED, + BR_MRP_IN_ROLE_MIC, + BR_MRP_IN_ROLE_MIM, +}; + +enum br_mrp_ring_state_type { + BR_MRP_RING_STATE_OPEN, + BR_MRP_RING_STATE_CLOSED, +}; + +enum br_mrp_in_state_type { + BR_MRP_IN_STATE_OPEN, + BR_MRP_IN_STATE_CLOSED, +}; + +enum br_mrp_port_state_type { + BR_MRP_PORT_STATE_DISABLED, + BR_MRP_PORT_STATE_BLOCKED, + BR_MRP_PORT_STATE_FORWARDING, + BR_MRP_PORT_STATE_NOT_CONNECTED, +}; + +enum br_mrp_port_role_type { + BR_MRP_PORT_ROLE_PRIMARY, + BR_MRP_PORT_ROLE_SECONDARY, + BR_MRP_PORT_ROLE_INTER, +}; + +enum br_mrp_tlv_header_type { + BR_MRP_TLV_HEADER_END = 0x0, + BR_MRP_TLV_HEADER_COMMON = 0x1, + BR_MRP_TLV_HEADER_RING_TEST = 0x2, + BR_MRP_TLV_HEADER_RING_TOPO = 0x3, + BR_MRP_TLV_HEADER_RING_LINK_DOWN = 0x4, + BR_MRP_TLV_HEADER_RING_LINK_UP = 0x5, + BR_MRP_TLV_HEADER_IN_TEST = 0x6, + BR_MRP_TLV_HEADER_IN_TOPO = 0x7, + BR_MRP_TLV_HEADER_IN_LINK_DOWN = 0x8, + BR_MRP_TLV_HEADER_IN_LINK_UP = 0x9, + BR_MRP_TLV_HEADER_IN_LINK_STATUS = 0xa, + BR_MRP_TLV_HEADER_OPTION = 0x7f, +}; + +enum br_mrp_sub_tlv_header_type { + BR_MRP_SUB_TLV_HEADER_TEST_MGR_NACK = 0x1, + BR_MRP_SUB_TLV_HEADER_TEST_PROPAGATE = 0x2, + BR_MRP_SUB_TLV_HEADER_TEST_AUTO_MGR = 0x3, +}; + +#endif diff --git a/src/basic/linux/netdevice.h b/src/basic/linux/netdevice.h new file mode 100644 index 0000000..f3770c5 --- /dev/null +++ b/src/basic/linux/netdevice.h @@ -0,0 +1,66 @@ +/* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */ +/* + * INET An implementation of the TCP/IP protocol suite for the LINUX + * operating system. INET is implemented using the BSD Socket + * interface as the means of communication with the user level. + * + * Definitions for the Interfaces handler. + * + * Version: @(#)dev.h 1.0.10 08/12/93 + * + * Authors: Ross Biro + * Fred N. van Kempen, + * Corey Minyard + * Donald J. Becker, + * Alan Cox, + * Bjorn Ekwall. + * Pekka Riikonen + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * Moved to /usr/include/linux for NET3 + */ +#ifndef _UAPI_LINUX_NETDEVICE_H +#define _UAPI_LINUX_NETDEVICE_H + +#include +#include +#include +#include + + +#define MAX_ADDR_LEN 32 /* Largest hardware address length */ + +/* Initial net device group. All devices belong to group 0 by default. */ +#define INIT_NETDEV_GROUP 0 + + +/* interface name assignment types (sysfs name_assign_type attribute) */ +#define NET_NAME_UNKNOWN 0 /* unknown origin (not exposed to userspace) */ +#define NET_NAME_ENUM 1 /* enumerated by kernel */ +#define NET_NAME_PREDICTABLE 2 /* predictably named by the kernel */ +#define NET_NAME_USER 3 /* provided by user-space */ +#define NET_NAME_RENAMED 4 /* renamed by user-space */ + +/* Media selection options. */ +enum { + IF_PORT_UNKNOWN = 0, + IF_PORT_10BASE2, + IF_PORT_10BASET, + IF_PORT_AUI, + IF_PORT_100BASET, + IF_PORT_100BASETX, + IF_PORT_100BASEFX +}; + +/* hardware address assignment types */ +#define NET_ADDR_PERM 0 /* address is permanent (default) */ +#define NET_ADDR_RANDOM 1 /* address is generated randomly */ +#define NET_ADDR_STOLEN 2 /* address is stolen from other device */ +#define NET_ADDR_SET 3 /* address is set using + * dev_set_mac_address() */ + +#endif /* _UAPI_LINUX_NETDEVICE_H */ diff --git a/src/basic/linux/netfilter/nf_tables.h b/src/basic/linux/netfilter/nf_tables.h new file mode 100644 index 0000000..cfa844d --- /dev/null +++ b/src/basic/linux/netfilter/nf_tables.h @@ -0,0 +1,1963 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +#ifndef _LINUX_NF_TABLES_H +#define _LINUX_NF_TABLES_H + +#define NFT_NAME_MAXLEN 256 +#define NFT_TABLE_MAXNAMELEN NFT_NAME_MAXLEN +#define NFT_CHAIN_MAXNAMELEN NFT_NAME_MAXLEN +#define NFT_SET_MAXNAMELEN NFT_NAME_MAXLEN +#define NFT_OBJ_MAXNAMELEN NFT_NAME_MAXLEN +#define NFT_USERDATA_MAXLEN 256 +#define NFT_OSF_MAXGENRELEN 16 + +/** + * enum nft_registers - nf_tables registers + * + * nf_tables used to have five registers: a verdict register and four data + * registers of size 16. The data registers have been changed to 16 registers + * of size 4. For compatibility reasons, the NFT_REG_[1-4] registers still + * map to areas of size 16, the 4 byte registers are addressed using + * NFT_REG32_00 - NFT_REG32_15. + */ +enum nft_registers { + NFT_REG_VERDICT, + NFT_REG_1, + NFT_REG_2, + NFT_REG_3, + NFT_REG_4, + __NFT_REG_MAX, + + NFT_REG32_00 = 8, + NFT_REG32_01, + NFT_REG32_02, + NFT_REG32_03, + NFT_REG32_04, + NFT_REG32_05, + NFT_REG32_06, + NFT_REG32_07, + NFT_REG32_08, + NFT_REG32_09, + NFT_REG32_10, + NFT_REG32_11, + NFT_REG32_12, + NFT_REG32_13, + NFT_REG32_14, + NFT_REG32_15, +}; +#define NFT_REG_MAX (__NFT_REG_MAX - 1) + +#define NFT_REG_SIZE 16 +#define NFT_REG32_SIZE 4 +#define NFT_REG32_COUNT (NFT_REG32_15 - NFT_REG32_00 + 1) + +/** + * enum nft_verdicts - nf_tables internal verdicts + * + * @NFT_CONTINUE: continue evaluation of the current rule + * @NFT_BREAK: terminate evaluation of the current rule + * @NFT_JUMP: push the current chain on the jump stack and jump to a chain + * @NFT_GOTO: jump to a chain without pushing the current chain on the jump stack + * @NFT_RETURN: return to the topmost chain on the jump stack + * + * The nf_tables verdicts share their numeric space with the netfilter verdicts. + */ +enum nft_verdicts { + NFT_CONTINUE = -1, + NFT_BREAK = -2, + NFT_JUMP = -3, + NFT_GOTO = -4, + NFT_RETURN = -5, +}; + +/** + * enum nf_tables_msg_types - nf_tables netlink message types + * + * @NFT_MSG_NEWTABLE: create a new table (enum nft_table_attributes) + * @NFT_MSG_GETTABLE: get a table (enum nft_table_attributes) + * @NFT_MSG_DELTABLE: delete a table (enum nft_table_attributes) + * @NFT_MSG_NEWCHAIN: create a new chain (enum nft_chain_attributes) + * @NFT_MSG_GETCHAIN: get a chain (enum nft_chain_attributes) + * @NFT_MSG_DELCHAIN: delete a chain (enum nft_chain_attributes) + * @NFT_MSG_NEWRULE: create a new rule (enum nft_rule_attributes) + * @NFT_MSG_GETRULE: get a rule (enum nft_rule_attributes) + * @NFT_MSG_DELRULE: delete a rule (enum nft_rule_attributes) + * @NFT_MSG_NEWSET: create a new set (enum nft_set_attributes) + * @NFT_MSG_GETSET: get a set (enum nft_set_attributes) + * @NFT_MSG_DELSET: delete a set (enum nft_set_attributes) + * @NFT_MSG_NEWSETELEM: create a new set element (enum nft_set_elem_attributes) + * @NFT_MSG_GETSETELEM: get a set element (enum nft_set_elem_attributes) + * @NFT_MSG_DELSETELEM: delete a set element (enum nft_set_elem_attributes) + * @NFT_MSG_NEWGEN: announce a new generation, only for events (enum nft_gen_attributes) + * @NFT_MSG_GETGEN: get the rule-set generation (enum nft_gen_attributes) + * @NFT_MSG_TRACE: trace event (enum nft_trace_attributes) + * @NFT_MSG_NEWOBJ: create a stateful object (enum nft_obj_attributes) + * @NFT_MSG_GETOBJ: get a stateful object (enum nft_obj_attributes) + * @NFT_MSG_DELOBJ: delete a stateful object (enum nft_obj_attributes) + * @NFT_MSG_GETOBJ_RESET: get and reset a stateful object (enum nft_obj_attributes) + * @NFT_MSG_NEWFLOWTABLE: add new flow table (enum nft_flowtable_attributes) + * @NFT_MSG_GETFLOWTABLE: get flow table (enum nft_flowtable_attributes) + * @NFT_MSG_DELFLOWTABLE: delete flow table (enum nft_flowtable_attributes) + * @NFT_MSG_GETRULE_RESET: get rules and reset stateful expressions (enum nft_obj_attributes) + */ +enum nf_tables_msg_types { + NFT_MSG_NEWTABLE, + NFT_MSG_GETTABLE, + NFT_MSG_DELTABLE, + NFT_MSG_NEWCHAIN, + NFT_MSG_GETCHAIN, + NFT_MSG_DELCHAIN, + NFT_MSG_NEWRULE, + NFT_MSG_GETRULE, + NFT_MSG_DELRULE, + NFT_MSG_NEWSET, + NFT_MSG_GETSET, + NFT_MSG_DELSET, + NFT_MSG_NEWSETELEM, + NFT_MSG_GETSETELEM, + NFT_MSG_DELSETELEM, + NFT_MSG_NEWGEN, + NFT_MSG_GETGEN, + NFT_MSG_TRACE, + NFT_MSG_NEWOBJ, + NFT_MSG_GETOBJ, + NFT_MSG_DELOBJ, + NFT_MSG_GETOBJ_RESET, + NFT_MSG_NEWFLOWTABLE, + NFT_MSG_GETFLOWTABLE, + NFT_MSG_DELFLOWTABLE, + NFT_MSG_GETRULE_RESET, + NFT_MSG_MAX, +}; + +/** + * enum nft_list_attributes - nf_tables generic list netlink attributes + * + * @NFTA_LIST_ELEM: list element (NLA_NESTED) + */ +enum nft_list_attributes { + NFTA_LIST_UNSPEC, + NFTA_LIST_ELEM, + __NFTA_LIST_MAX +}; +#define NFTA_LIST_MAX (__NFTA_LIST_MAX - 1) + +/** + * enum nft_hook_attributes - nf_tables netfilter hook netlink attributes + * + * @NFTA_HOOK_HOOKNUM: netfilter hook number (NLA_U32) + * @NFTA_HOOK_PRIORITY: netfilter hook priority (NLA_U32) + * @NFTA_HOOK_DEV: netdevice name (NLA_STRING) + * @NFTA_HOOK_DEVS: list of netdevices (NLA_NESTED) + */ +enum nft_hook_attributes { + NFTA_HOOK_UNSPEC, + NFTA_HOOK_HOOKNUM, + NFTA_HOOK_PRIORITY, + NFTA_HOOK_DEV, + NFTA_HOOK_DEVS, + __NFTA_HOOK_MAX +}; +#define NFTA_HOOK_MAX (__NFTA_HOOK_MAX - 1) + +/** + * enum nft_table_flags - nf_tables table flags + * + * @NFT_TABLE_F_DORMANT: this table is not active + */ +enum nft_table_flags { + NFT_TABLE_F_DORMANT = 0x1, + NFT_TABLE_F_OWNER = 0x2, +}; +#define NFT_TABLE_F_MASK (NFT_TABLE_F_DORMANT | \ + NFT_TABLE_F_OWNER) + +/** + * enum nft_table_attributes - nf_tables table netlink attributes + * + * @NFTA_TABLE_NAME: name of the table (NLA_STRING) + * @NFTA_TABLE_FLAGS: bitmask of enum nft_table_flags (NLA_U32) + * @NFTA_TABLE_USE: number of chains in this table (NLA_U32) + * @NFTA_TABLE_USERDATA: user data (NLA_BINARY) + * @NFTA_TABLE_OWNER: owner of this table through netlink portID (NLA_U32) + */ +enum nft_table_attributes { + NFTA_TABLE_UNSPEC, + NFTA_TABLE_NAME, + NFTA_TABLE_FLAGS, + NFTA_TABLE_USE, + NFTA_TABLE_HANDLE, + NFTA_TABLE_PAD, + NFTA_TABLE_USERDATA, + NFTA_TABLE_OWNER, + __NFTA_TABLE_MAX +}; +#define NFTA_TABLE_MAX (__NFTA_TABLE_MAX - 1) + +enum nft_chain_flags { + NFT_CHAIN_BASE = (1 << 0), + NFT_CHAIN_HW_OFFLOAD = (1 << 1), + NFT_CHAIN_BINDING = (1 << 2), +}; +#define NFT_CHAIN_FLAGS (NFT_CHAIN_BASE | \ + NFT_CHAIN_HW_OFFLOAD | \ + NFT_CHAIN_BINDING) + +/** + * enum nft_chain_attributes - nf_tables chain netlink attributes + * + * @NFTA_CHAIN_TABLE: name of the table containing the chain (NLA_STRING) + * @NFTA_CHAIN_HANDLE: numeric handle of the chain (NLA_U64) + * @NFTA_CHAIN_NAME: name of the chain (NLA_STRING) + * @NFTA_CHAIN_HOOK: hook specification for basechains (NLA_NESTED: nft_hook_attributes) + * @NFTA_CHAIN_POLICY: numeric policy of the chain (NLA_U32) + * @NFTA_CHAIN_USE: number of references to this chain (NLA_U32) + * @NFTA_CHAIN_TYPE: type name of the string (NLA_NUL_STRING) + * @NFTA_CHAIN_COUNTERS: counter specification of the chain (NLA_NESTED: nft_counter_attributes) + * @NFTA_CHAIN_FLAGS: chain flags + * @NFTA_CHAIN_ID: uniquely identifies a chain in a transaction (NLA_U32) + * @NFTA_CHAIN_USERDATA: user data (NLA_BINARY) + */ +enum nft_chain_attributes { + NFTA_CHAIN_UNSPEC, + NFTA_CHAIN_TABLE, + NFTA_CHAIN_HANDLE, + NFTA_CHAIN_NAME, + NFTA_CHAIN_HOOK, + NFTA_CHAIN_POLICY, + NFTA_CHAIN_USE, + NFTA_CHAIN_TYPE, + NFTA_CHAIN_COUNTERS, + NFTA_CHAIN_PAD, + NFTA_CHAIN_FLAGS, + NFTA_CHAIN_ID, + NFTA_CHAIN_USERDATA, + __NFTA_CHAIN_MAX +}; +#define NFTA_CHAIN_MAX (__NFTA_CHAIN_MAX - 1) + +/** + * enum nft_rule_attributes - nf_tables rule netlink attributes + * + * @NFTA_RULE_TABLE: name of the table containing the rule (NLA_STRING) + * @NFTA_RULE_CHAIN: name of the chain containing the rule (NLA_STRING) + * @NFTA_RULE_HANDLE: numeric handle of the rule (NLA_U64) + * @NFTA_RULE_EXPRESSIONS: list of expressions (NLA_NESTED: nft_expr_attributes) + * @NFTA_RULE_COMPAT: compatibility specifications of the rule (NLA_NESTED: nft_rule_compat_attributes) + * @NFTA_RULE_POSITION: numeric handle of the previous rule (NLA_U64) + * @NFTA_RULE_USERDATA: user data (NLA_BINARY, NFT_USERDATA_MAXLEN) + * @NFTA_RULE_ID: uniquely identifies a rule in a transaction (NLA_U32) + * @NFTA_RULE_POSITION_ID: transaction unique identifier of the previous rule (NLA_U32) + */ +enum nft_rule_attributes { + NFTA_RULE_UNSPEC, + NFTA_RULE_TABLE, + NFTA_RULE_CHAIN, + NFTA_RULE_HANDLE, + NFTA_RULE_EXPRESSIONS, + NFTA_RULE_COMPAT, + NFTA_RULE_POSITION, + NFTA_RULE_USERDATA, + NFTA_RULE_PAD, + NFTA_RULE_ID, + NFTA_RULE_POSITION_ID, + NFTA_RULE_CHAIN_ID, + __NFTA_RULE_MAX +}; +#define NFTA_RULE_MAX (__NFTA_RULE_MAX - 1) + +/** + * enum nft_rule_compat_flags - nf_tables rule compat flags + * + * @NFT_RULE_COMPAT_F_INV: invert the check result + */ +enum nft_rule_compat_flags { + NFT_RULE_COMPAT_F_INV = (1 << 1), + NFT_RULE_COMPAT_F_MASK = NFT_RULE_COMPAT_F_INV, +}; + +/** + * enum nft_rule_compat_attributes - nf_tables rule compat attributes + * + * @NFTA_RULE_COMPAT_PROTO: numeric value of handled protocol (NLA_U32) + * @NFTA_RULE_COMPAT_FLAGS: bitmask of enum nft_rule_compat_flags (NLA_U32) + */ +enum nft_rule_compat_attributes { + NFTA_RULE_COMPAT_UNSPEC, + NFTA_RULE_COMPAT_PROTO, + NFTA_RULE_COMPAT_FLAGS, + __NFTA_RULE_COMPAT_MAX +}; +#define NFTA_RULE_COMPAT_MAX (__NFTA_RULE_COMPAT_MAX - 1) + +/** + * enum nft_set_flags - nf_tables set flags + * + * @NFT_SET_ANONYMOUS: name allocation, automatic cleanup on unlink + * @NFT_SET_CONSTANT: set contents may not change while bound + * @NFT_SET_INTERVAL: set contains intervals + * @NFT_SET_MAP: set is used as a dictionary + * @NFT_SET_TIMEOUT: set uses timeouts + * @NFT_SET_EVAL: set can be updated from the evaluation path + * @NFT_SET_OBJECT: set contains stateful objects + * @NFT_SET_CONCAT: set contains a concatenation + * @NFT_SET_EXPR: set contains expressions + */ +enum nft_set_flags { + NFT_SET_ANONYMOUS = 0x1, + NFT_SET_CONSTANT = 0x2, + NFT_SET_INTERVAL = 0x4, + NFT_SET_MAP = 0x8, + NFT_SET_TIMEOUT = 0x10, + NFT_SET_EVAL = 0x20, + NFT_SET_OBJECT = 0x40, + NFT_SET_CONCAT = 0x80, + NFT_SET_EXPR = 0x100, +}; + +/** + * enum nft_set_policies - set selection policy + * + * @NFT_SET_POL_PERFORMANCE: prefer high performance over low memory use + * @NFT_SET_POL_MEMORY: prefer low memory use over high performance + */ +enum nft_set_policies { + NFT_SET_POL_PERFORMANCE, + NFT_SET_POL_MEMORY, +}; + +/** + * enum nft_set_desc_attributes - set element description + * + * @NFTA_SET_DESC_SIZE: number of elements in set (NLA_U32) + * @NFTA_SET_DESC_CONCAT: description of field concatenation (NLA_NESTED) + */ +enum nft_set_desc_attributes { + NFTA_SET_DESC_UNSPEC, + NFTA_SET_DESC_SIZE, + NFTA_SET_DESC_CONCAT, + __NFTA_SET_DESC_MAX +}; +#define NFTA_SET_DESC_MAX (__NFTA_SET_DESC_MAX - 1) + +/** + * enum nft_set_field_attributes - attributes of concatenated fields + * + * @NFTA_SET_FIELD_LEN: length of single field, in bits (NLA_U32) + */ +enum nft_set_field_attributes { + NFTA_SET_FIELD_UNSPEC, + NFTA_SET_FIELD_LEN, + __NFTA_SET_FIELD_MAX +}; +#define NFTA_SET_FIELD_MAX (__NFTA_SET_FIELD_MAX - 1) + +/** + * enum nft_set_attributes - nf_tables set netlink attributes + * + * @NFTA_SET_TABLE: table name (NLA_STRING) + * @NFTA_SET_NAME: set name (NLA_STRING) + * @NFTA_SET_FLAGS: bitmask of enum nft_set_flags (NLA_U32) + * @NFTA_SET_KEY_TYPE: key data type, informational purpose only (NLA_U32) + * @NFTA_SET_KEY_LEN: key data length (NLA_U32) + * @NFTA_SET_DATA_TYPE: mapping data type (NLA_U32) + * @NFTA_SET_DATA_LEN: mapping data length (NLA_U32) + * @NFTA_SET_POLICY: selection policy (NLA_U32) + * @NFTA_SET_DESC: set description (NLA_NESTED) + * @NFTA_SET_ID: uniquely identifies a set in a transaction (NLA_U32) + * @NFTA_SET_TIMEOUT: default timeout value (NLA_U64) + * @NFTA_SET_GC_INTERVAL: garbage collection interval (NLA_U32) + * @NFTA_SET_USERDATA: user data (NLA_BINARY) + * @NFTA_SET_OBJ_TYPE: stateful object type (NLA_U32: NFT_OBJECT_*) + * @NFTA_SET_HANDLE: set handle (NLA_U64) + * @NFTA_SET_EXPR: set expression (NLA_NESTED: nft_expr_attributes) + * @NFTA_SET_EXPRESSIONS: list of expressions (NLA_NESTED: nft_list_attributes) + */ +enum nft_set_attributes { + NFTA_SET_UNSPEC, + NFTA_SET_TABLE, + NFTA_SET_NAME, + NFTA_SET_FLAGS, + NFTA_SET_KEY_TYPE, + NFTA_SET_KEY_LEN, + NFTA_SET_DATA_TYPE, + NFTA_SET_DATA_LEN, + NFTA_SET_POLICY, + NFTA_SET_DESC, + NFTA_SET_ID, + NFTA_SET_TIMEOUT, + NFTA_SET_GC_INTERVAL, + NFTA_SET_USERDATA, + NFTA_SET_PAD, + NFTA_SET_OBJ_TYPE, + NFTA_SET_HANDLE, + NFTA_SET_EXPR, + NFTA_SET_EXPRESSIONS, + __NFTA_SET_MAX +}; +#define NFTA_SET_MAX (__NFTA_SET_MAX - 1) + +/** + * enum nft_set_elem_flags - nf_tables set element flags + * + * @NFT_SET_ELEM_INTERVAL_END: element ends the previous interval + * @NFT_SET_ELEM_CATCHALL: special catch-all element + */ +enum nft_set_elem_flags { + NFT_SET_ELEM_INTERVAL_END = 0x1, + NFT_SET_ELEM_CATCHALL = 0x2, +}; + +/** + * enum nft_set_elem_attributes - nf_tables set element netlink attributes + * + * @NFTA_SET_ELEM_KEY: key value (NLA_NESTED: nft_data) + * @NFTA_SET_ELEM_DATA: data value of mapping (NLA_NESTED: nft_data_attributes) + * @NFTA_SET_ELEM_FLAGS: bitmask of nft_set_elem_flags (NLA_U32) + * @NFTA_SET_ELEM_TIMEOUT: timeout value (NLA_U64) + * @NFTA_SET_ELEM_EXPIRATION: expiration time (NLA_U64) + * @NFTA_SET_ELEM_USERDATA: user data (NLA_BINARY) + * @NFTA_SET_ELEM_EXPR: expression (NLA_NESTED: nft_expr_attributes) + * @NFTA_SET_ELEM_OBJREF: stateful object reference (NLA_STRING) + * @NFTA_SET_ELEM_KEY_END: closing key value (NLA_NESTED: nft_data) + * @NFTA_SET_ELEM_EXPRESSIONS: list of expressions (NLA_NESTED: nft_list_attributes) + */ +enum nft_set_elem_attributes { + NFTA_SET_ELEM_UNSPEC, + NFTA_SET_ELEM_KEY, + NFTA_SET_ELEM_DATA, + NFTA_SET_ELEM_FLAGS, + NFTA_SET_ELEM_TIMEOUT, + NFTA_SET_ELEM_EXPIRATION, + NFTA_SET_ELEM_USERDATA, + NFTA_SET_ELEM_EXPR, + NFTA_SET_ELEM_PAD, + NFTA_SET_ELEM_OBJREF, + NFTA_SET_ELEM_KEY_END, + NFTA_SET_ELEM_EXPRESSIONS, + __NFTA_SET_ELEM_MAX +}; +#define NFTA_SET_ELEM_MAX (__NFTA_SET_ELEM_MAX - 1) + +/** + * enum nft_set_elem_list_attributes - nf_tables set element list netlink attributes + * + * @NFTA_SET_ELEM_LIST_TABLE: table of the set to be changed (NLA_STRING) + * @NFTA_SET_ELEM_LIST_SET: name of the set to be changed (NLA_STRING) + * @NFTA_SET_ELEM_LIST_ELEMENTS: list of set elements (NLA_NESTED: nft_set_elem_attributes) + * @NFTA_SET_ELEM_LIST_SET_ID: uniquely identifies a set in a transaction (NLA_U32) + */ +enum nft_set_elem_list_attributes { + NFTA_SET_ELEM_LIST_UNSPEC, + NFTA_SET_ELEM_LIST_TABLE, + NFTA_SET_ELEM_LIST_SET, + NFTA_SET_ELEM_LIST_ELEMENTS, + NFTA_SET_ELEM_LIST_SET_ID, + __NFTA_SET_ELEM_LIST_MAX +}; +#define NFTA_SET_ELEM_LIST_MAX (__NFTA_SET_ELEM_LIST_MAX - 1) + +/** + * enum nft_data_types - nf_tables data types + * + * @NFT_DATA_VALUE: generic data + * @NFT_DATA_VERDICT: netfilter verdict + * + * The type of data is usually determined by the kernel directly and is not + * explicitly specified by userspace. The only difference are sets, where + * userspace specifies the key and mapping data types. + * + * The values 0xffffff00-0xffffffff are reserved for internally used types. + * The remaining range can be freely used by userspace to encode types, all + * values are equivalent to NFT_DATA_VALUE. + */ +enum nft_data_types { + NFT_DATA_VALUE, + NFT_DATA_VERDICT = 0xffffff00U, +}; + +#define NFT_DATA_RESERVED_MASK 0xffffff00U + +/** + * enum nft_data_attributes - nf_tables data netlink attributes + * + * @NFTA_DATA_VALUE: generic data (NLA_BINARY) + * @NFTA_DATA_VERDICT: nf_tables verdict (NLA_NESTED: nft_verdict_attributes) + */ +enum nft_data_attributes { + NFTA_DATA_UNSPEC, + NFTA_DATA_VALUE, + NFTA_DATA_VERDICT, + __NFTA_DATA_MAX +}; +#define NFTA_DATA_MAX (__NFTA_DATA_MAX - 1) + +/* Maximum length of a value */ +#define NFT_DATA_VALUE_MAXLEN 64 + +/** + * enum nft_verdict_attributes - nf_tables verdict netlink attributes + * + * @NFTA_VERDICT_CODE: nf_tables verdict (NLA_U32: enum nft_verdicts) + * @NFTA_VERDICT_CHAIN: jump target chain name (NLA_STRING) + * @NFTA_VERDICT_CHAIN_ID: jump target chain ID (NLA_U32) + */ +enum nft_verdict_attributes { + NFTA_VERDICT_UNSPEC, + NFTA_VERDICT_CODE, + NFTA_VERDICT_CHAIN, + NFTA_VERDICT_CHAIN_ID, + __NFTA_VERDICT_MAX +}; +#define NFTA_VERDICT_MAX (__NFTA_VERDICT_MAX - 1) + +/** + * enum nft_expr_attributes - nf_tables expression netlink attributes + * + * @NFTA_EXPR_NAME: name of the expression type (NLA_STRING) + * @NFTA_EXPR_DATA: type specific data (NLA_NESTED) + */ +enum nft_expr_attributes { + NFTA_EXPR_UNSPEC, + NFTA_EXPR_NAME, + NFTA_EXPR_DATA, + __NFTA_EXPR_MAX +}; +#define NFTA_EXPR_MAX (__NFTA_EXPR_MAX - 1) + +/** + * enum nft_immediate_attributes - nf_tables immediate expression netlink attributes + * + * @NFTA_IMMEDIATE_DREG: destination register to load data into (NLA_U32) + * @NFTA_IMMEDIATE_DATA: data to load (NLA_NESTED: nft_data_attributes) + */ +enum nft_immediate_attributes { + NFTA_IMMEDIATE_UNSPEC, + NFTA_IMMEDIATE_DREG, + NFTA_IMMEDIATE_DATA, + __NFTA_IMMEDIATE_MAX +}; +#define NFTA_IMMEDIATE_MAX (__NFTA_IMMEDIATE_MAX - 1) + +/** + * enum nft_bitwise_ops - nf_tables bitwise operations + * + * @NFT_BITWISE_BOOL: mask-and-xor operation used to implement NOT, AND, OR and + * XOR boolean operations + * @NFT_BITWISE_LSHIFT: left-shift operation + * @NFT_BITWISE_RSHIFT: right-shift operation + */ +enum nft_bitwise_ops { + NFT_BITWISE_BOOL, + NFT_BITWISE_LSHIFT, + NFT_BITWISE_RSHIFT, +}; + +/** + * enum nft_bitwise_attributes - nf_tables bitwise expression netlink attributes + * + * @NFTA_BITWISE_SREG: source register (NLA_U32: nft_registers) + * @NFTA_BITWISE_DREG: destination register (NLA_U32: nft_registers) + * @NFTA_BITWISE_LEN: length of operands (NLA_U32) + * @NFTA_BITWISE_MASK: mask value (NLA_NESTED: nft_data_attributes) + * @NFTA_BITWISE_XOR: xor value (NLA_NESTED: nft_data_attributes) + * @NFTA_BITWISE_OP: type of operation (NLA_U32: nft_bitwise_ops) + * @NFTA_BITWISE_DATA: argument for non-boolean operations + * (NLA_NESTED: nft_data_attributes) + * + * The bitwise expression supports boolean and shift operations. It implements + * the boolean operations by performing the following operation: + * + * dreg = (sreg & mask) ^ xor + * + * with these mask and xor values: + * + * mask xor + * NOT: 1 1 + * OR: ~x x + * XOR: 1 x + * AND: x 0 + */ +enum nft_bitwise_attributes { + NFTA_BITWISE_UNSPEC, + NFTA_BITWISE_SREG, + NFTA_BITWISE_DREG, + NFTA_BITWISE_LEN, + NFTA_BITWISE_MASK, + NFTA_BITWISE_XOR, + NFTA_BITWISE_OP, + NFTA_BITWISE_DATA, + __NFTA_BITWISE_MAX +}; +#define NFTA_BITWISE_MAX (__NFTA_BITWISE_MAX - 1) + +/** + * enum nft_byteorder_ops - nf_tables byteorder operators + * + * @NFT_BYTEORDER_NTOH: network to host operator + * @NFT_BYTEORDER_HTON: host to network operator + */ +enum nft_byteorder_ops { + NFT_BYTEORDER_NTOH, + NFT_BYTEORDER_HTON, +}; + +/** + * enum nft_byteorder_attributes - nf_tables byteorder expression netlink attributes + * + * @NFTA_BYTEORDER_SREG: source register (NLA_U32: nft_registers) + * @NFTA_BYTEORDER_DREG: destination register (NLA_U32: nft_registers) + * @NFTA_BYTEORDER_OP: operator (NLA_U32: enum nft_byteorder_ops) + * @NFTA_BYTEORDER_LEN: length of the data (NLA_U32) + * @NFTA_BYTEORDER_SIZE: data size in bytes (NLA_U32: 2 or 4) + */ +enum nft_byteorder_attributes { + NFTA_BYTEORDER_UNSPEC, + NFTA_BYTEORDER_SREG, + NFTA_BYTEORDER_DREG, + NFTA_BYTEORDER_OP, + NFTA_BYTEORDER_LEN, + NFTA_BYTEORDER_SIZE, + __NFTA_BYTEORDER_MAX +}; +#define NFTA_BYTEORDER_MAX (__NFTA_BYTEORDER_MAX - 1) + +/** + * enum nft_cmp_ops - nf_tables relational operator + * + * @NFT_CMP_EQ: equal + * @NFT_CMP_NEQ: not equal + * @NFT_CMP_LT: less than + * @NFT_CMP_LTE: less than or equal to + * @NFT_CMP_GT: greater than + * @NFT_CMP_GTE: greater than or equal to + */ +enum nft_cmp_ops { + NFT_CMP_EQ, + NFT_CMP_NEQ, + NFT_CMP_LT, + NFT_CMP_LTE, + NFT_CMP_GT, + NFT_CMP_GTE, +}; + +/** + * enum nft_cmp_attributes - nf_tables cmp expression netlink attributes + * + * @NFTA_CMP_SREG: source register of data to compare (NLA_U32: nft_registers) + * @NFTA_CMP_OP: cmp operation (NLA_U32: nft_cmp_ops) + * @NFTA_CMP_DATA: data to compare against (NLA_NESTED: nft_data_attributes) + */ +enum nft_cmp_attributes { + NFTA_CMP_UNSPEC, + NFTA_CMP_SREG, + NFTA_CMP_OP, + NFTA_CMP_DATA, + __NFTA_CMP_MAX +}; +#define NFTA_CMP_MAX (__NFTA_CMP_MAX - 1) + +/** + * enum nft_range_ops - nf_tables range operator + * + * @NFT_RANGE_EQ: equal + * @NFT_RANGE_NEQ: not equal + */ +enum nft_range_ops { + NFT_RANGE_EQ, + NFT_RANGE_NEQ, +}; + +/** + * enum nft_range_attributes - nf_tables range expression netlink attributes + * + * @NFTA_RANGE_SREG: source register of data to compare (NLA_U32: nft_registers) + * @NFTA_RANGE_OP: cmp operation (NLA_U32: nft_cmp_ops) + * @NFTA_RANGE_FROM_DATA: data range from (NLA_NESTED: nft_data_attributes) + * @NFTA_RANGE_TO_DATA: data range to (NLA_NESTED: nft_data_attributes) + */ +enum nft_range_attributes { + NFTA_RANGE_UNSPEC, + NFTA_RANGE_SREG, + NFTA_RANGE_OP, + NFTA_RANGE_FROM_DATA, + NFTA_RANGE_TO_DATA, + __NFTA_RANGE_MAX +}; +#define NFTA_RANGE_MAX (__NFTA_RANGE_MAX - 1) + +enum nft_lookup_flags { + NFT_LOOKUP_F_INV = (1 << 0), +}; + +/** + * enum nft_lookup_attributes - nf_tables set lookup expression netlink attributes + * + * @NFTA_LOOKUP_SET: name of the set where to look for (NLA_STRING) + * @NFTA_LOOKUP_SREG: source register of the data to look for (NLA_U32: nft_registers) + * @NFTA_LOOKUP_DREG: destination register (NLA_U32: nft_registers) + * @NFTA_LOOKUP_SET_ID: uniquely identifies a set in a transaction (NLA_U32) + * @NFTA_LOOKUP_FLAGS: flags (NLA_U32: enum nft_lookup_flags) + */ +enum nft_lookup_attributes { + NFTA_LOOKUP_UNSPEC, + NFTA_LOOKUP_SET, + NFTA_LOOKUP_SREG, + NFTA_LOOKUP_DREG, + NFTA_LOOKUP_SET_ID, + NFTA_LOOKUP_FLAGS, + __NFTA_LOOKUP_MAX +}; +#define NFTA_LOOKUP_MAX (__NFTA_LOOKUP_MAX - 1) + +enum nft_dynset_ops { + NFT_DYNSET_OP_ADD, + NFT_DYNSET_OP_UPDATE, + NFT_DYNSET_OP_DELETE, +}; + +enum nft_dynset_flags { + NFT_DYNSET_F_INV = (1 << 0), + NFT_DYNSET_F_EXPR = (1 << 1), +}; + +/** + * enum nft_dynset_attributes - dynset expression attributes + * + * @NFTA_DYNSET_SET_NAME: name of set the to add data to (NLA_STRING) + * @NFTA_DYNSET_SET_ID: uniquely identifier of the set in the transaction (NLA_U32) + * @NFTA_DYNSET_OP: operation (NLA_U32) + * @NFTA_DYNSET_SREG_KEY: source register of the key (NLA_U32) + * @NFTA_DYNSET_SREG_DATA: source register of the data (NLA_U32) + * @NFTA_DYNSET_TIMEOUT: timeout value for the new element (NLA_U64) + * @NFTA_DYNSET_EXPR: expression (NLA_NESTED: nft_expr_attributes) + * @NFTA_DYNSET_FLAGS: flags (NLA_U32) + * @NFTA_DYNSET_EXPRESSIONS: list of expressions (NLA_NESTED: nft_list_attributes) + */ +enum nft_dynset_attributes { + NFTA_DYNSET_UNSPEC, + NFTA_DYNSET_SET_NAME, + NFTA_DYNSET_SET_ID, + NFTA_DYNSET_OP, + NFTA_DYNSET_SREG_KEY, + NFTA_DYNSET_SREG_DATA, + NFTA_DYNSET_TIMEOUT, + NFTA_DYNSET_EXPR, + NFTA_DYNSET_PAD, + NFTA_DYNSET_FLAGS, + NFTA_DYNSET_EXPRESSIONS, + __NFTA_DYNSET_MAX, +}; +#define NFTA_DYNSET_MAX (__NFTA_DYNSET_MAX - 1) + +/** + * enum nft_payload_bases - nf_tables payload expression offset bases + * + * @NFT_PAYLOAD_LL_HEADER: link layer header + * @NFT_PAYLOAD_NETWORK_HEADER: network header + * @NFT_PAYLOAD_TRANSPORT_HEADER: transport header + * @NFT_PAYLOAD_INNER_HEADER: inner header / payload + */ +enum nft_payload_bases { + NFT_PAYLOAD_LL_HEADER, + NFT_PAYLOAD_NETWORK_HEADER, + NFT_PAYLOAD_TRANSPORT_HEADER, + NFT_PAYLOAD_INNER_HEADER, + NFT_PAYLOAD_TUN_HEADER, +}; + +/** + * enum nft_payload_csum_types - nf_tables payload expression checksum types + * + * @NFT_PAYLOAD_CSUM_NONE: no checksumming + * @NFT_PAYLOAD_CSUM_INET: internet checksum (RFC 791) + * @NFT_PAYLOAD_CSUM_SCTP: CRC-32c, for use in SCTP header (RFC 3309) + */ +enum nft_payload_csum_types { + NFT_PAYLOAD_CSUM_NONE, + NFT_PAYLOAD_CSUM_INET, + NFT_PAYLOAD_CSUM_SCTP, +}; + +enum nft_payload_csum_flags { + NFT_PAYLOAD_L4CSUM_PSEUDOHDR = (1 << 0), +}; + +enum nft_inner_type { + NFT_INNER_UNSPEC = 0, + NFT_INNER_VXLAN, + NFT_INNER_GENEVE, +}; + +enum nft_inner_flags { + NFT_INNER_HDRSIZE = (1 << 0), + NFT_INNER_LL = (1 << 1), + NFT_INNER_NH = (1 << 2), + NFT_INNER_TH = (1 << 3), +}; +#define NFT_INNER_MASK (NFT_INNER_HDRSIZE | NFT_INNER_LL | \ + NFT_INNER_NH | NFT_INNER_TH) + +enum nft_inner_attributes { + NFTA_INNER_UNSPEC, + NFTA_INNER_NUM, + NFTA_INNER_TYPE, + NFTA_INNER_FLAGS, + NFTA_INNER_HDRSIZE, + NFTA_INNER_EXPR, + __NFTA_INNER_MAX +}; +#define NFTA_INNER_MAX (__NFTA_INNER_MAX - 1) + +/** + * enum nft_payload_attributes - nf_tables payload expression netlink attributes + * + * @NFTA_PAYLOAD_DREG: destination register to load data into (NLA_U32: nft_registers) + * @NFTA_PAYLOAD_BASE: payload base (NLA_U32: nft_payload_bases) + * @NFTA_PAYLOAD_OFFSET: payload offset relative to base (NLA_U32) + * @NFTA_PAYLOAD_LEN: payload length (NLA_U32) + * @NFTA_PAYLOAD_SREG: source register to load data from (NLA_U32: nft_registers) + * @NFTA_PAYLOAD_CSUM_TYPE: checksum type (NLA_U32) + * @NFTA_PAYLOAD_CSUM_OFFSET: checksum offset relative to base (NLA_U32) + * @NFTA_PAYLOAD_CSUM_FLAGS: checksum flags (NLA_U32) + */ +enum nft_payload_attributes { + NFTA_PAYLOAD_UNSPEC, + NFTA_PAYLOAD_DREG, + NFTA_PAYLOAD_BASE, + NFTA_PAYLOAD_OFFSET, + NFTA_PAYLOAD_LEN, + NFTA_PAYLOAD_SREG, + NFTA_PAYLOAD_CSUM_TYPE, + NFTA_PAYLOAD_CSUM_OFFSET, + NFTA_PAYLOAD_CSUM_FLAGS, + __NFTA_PAYLOAD_MAX +}; +#define NFTA_PAYLOAD_MAX (__NFTA_PAYLOAD_MAX - 1) + +enum nft_exthdr_flags { + NFT_EXTHDR_F_PRESENT = (1 << 0), +}; + +/** + * enum nft_exthdr_op - nf_tables match options + * + * @NFT_EXTHDR_OP_IPV6: match against ipv6 extension headers + * @NFT_EXTHDR_OP_TCP: match against tcp options + * @NFT_EXTHDR_OP_IPV4: match against ipv4 options + * @NFT_EXTHDR_OP_SCTP: match against sctp chunks + */ +enum nft_exthdr_op { + NFT_EXTHDR_OP_IPV6, + NFT_EXTHDR_OP_TCPOPT, + NFT_EXTHDR_OP_IPV4, + NFT_EXTHDR_OP_SCTP, + __NFT_EXTHDR_OP_MAX +}; +#define NFT_EXTHDR_OP_MAX (__NFT_EXTHDR_OP_MAX - 1) + +/** + * enum nft_exthdr_attributes - nf_tables extension header expression netlink attributes + * + * @NFTA_EXTHDR_DREG: destination register (NLA_U32: nft_registers) + * @NFTA_EXTHDR_TYPE: extension header type (NLA_U8) + * @NFTA_EXTHDR_OFFSET: extension header offset (NLA_U32) + * @NFTA_EXTHDR_LEN: extension header length (NLA_U32) + * @NFTA_EXTHDR_FLAGS: extension header flags (NLA_U32) + * @NFTA_EXTHDR_OP: option match type (NLA_U32) + * @NFTA_EXTHDR_SREG: option match type (NLA_U32) + */ +enum nft_exthdr_attributes { + NFTA_EXTHDR_UNSPEC, + NFTA_EXTHDR_DREG, + NFTA_EXTHDR_TYPE, + NFTA_EXTHDR_OFFSET, + NFTA_EXTHDR_LEN, + NFTA_EXTHDR_FLAGS, + NFTA_EXTHDR_OP, + NFTA_EXTHDR_SREG, + __NFTA_EXTHDR_MAX +}; +#define NFTA_EXTHDR_MAX (__NFTA_EXTHDR_MAX - 1) + +/** + * enum nft_meta_keys - nf_tables meta expression keys + * + * @NFT_META_LEN: packet length (skb->len) + * @NFT_META_PROTOCOL: packet ethertype protocol (skb->protocol), invalid in OUTPUT + * @NFT_META_PRIORITY: packet priority (skb->priority) + * @NFT_META_MARK: packet mark (skb->mark) + * @NFT_META_IIF: packet input interface index (dev->ifindex) + * @NFT_META_OIF: packet output interface index (dev->ifindex) + * @NFT_META_IIFNAME: packet input interface name (dev->name) + * @NFT_META_OIFNAME: packet output interface name (dev->name) + * @NFT_META_IIFTYPE: packet input interface type (dev->type) + * @NFT_META_OIFTYPE: packet output interface type (dev->type) + * @NFT_META_SKUID: originating socket UID (fsuid) + * @NFT_META_SKGID: originating socket GID (fsgid) + * @NFT_META_NFTRACE: packet nftrace bit + * @NFT_META_RTCLASSID: realm value of packet's route (skb->dst->tclassid) + * @NFT_META_SECMARK: packet secmark (skb->secmark) + * @NFT_META_NFPROTO: netfilter protocol + * @NFT_META_L4PROTO: layer 4 protocol number + * @NFT_META_BRI_IIFNAME: packet input bridge interface name + * @NFT_META_BRI_OIFNAME: packet output bridge interface name + * @NFT_META_PKTTYPE: packet type (skb->pkt_type), special handling for loopback + * @NFT_META_CPU: cpu id through smp_processor_id() + * @NFT_META_IIFGROUP: packet input interface group + * @NFT_META_OIFGROUP: packet output interface group + * @NFT_META_CGROUP: socket control group (skb->sk->sk_classid) + * @NFT_META_PRANDOM: a 32bit pseudo-random number + * @NFT_META_SECPATH: boolean, secpath_exists (!!skb->sp) + * @NFT_META_IIFKIND: packet input interface kind name (dev->rtnl_link_ops->kind) + * @NFT_META_OIFKIND: packet output interface kind name (dev->rtnl_link_ops->kind) + * @NFT_META_BRI_IIFPVID: packet input bridge port pvid + * @NFT_META_BRI_IIFVPROTO: packet input bridge vlan proto + * @NFT_META_TIME_NS: time since epoch (in nanoseconds) + * @NFT_META_TIME_DAY: day of week (from 0 = Sunday to 6 = Saturday) + * @NFT_META_TIME_HOUR: hour of day (in seconds) + * @NFT_META_SDIF: slave device interface index + * @NFT_META_SDIFNAME: slave device interface name + */ +enum nft_meta_keys { + NFT_META_LEN, + NFT_META_PROTOCOL, + NFT_META_PRIORITY, + NFT_META_MARK, + NFT_META_IIF, + NFT_META_OIF, + NFT_META_IIFNAME, + NFT_META_OIFNAME, + NFT_META_IFTYPE, +#define NFT_META_IIFTYPE NFT_META_IFTYPE + NFT_META_OIFTYPE, + NFT_META_SKUID, + NFT_META_SKGID, + NFT_META_NFTRACE, + NFT_META_RTCLASSID, + NFT_META_SECMARK, + NFT_META_NFPROTO, + NFT_META_L4PROTO, + NFT_META_BRI_IIFNAME, + NFT_META_BRI_OIFNAME, + NFT_META_PKTTYPE, + NFT_META_CPU, + NFT_META_IIFGROUP, + NFT_META_OIFGROUP, + NFT_META_CGROUP, + NFT_META_PRANDOM, + NFT_META_SECPATH, + NFT_META_IIFKIND, + NFT_META_OIFKIND, + NFT_META_BRI_IIFPVID, + NFT_META_BRI_IIFVPROTO, + NFT_META_TIME_NS, + NFT_META_TIME_DAY, + NFT_META_TIME_HOUR, + NFT_META_SDIF, + NFT_META_SDIFNAME, + __NFT_META_IIFTYPE, +}; + +/** + * enum nft_rt_keys - nf_tables routing expression keys + * + * @NFT_RT_CLASSID: realm value of packet's route (skb->dst->tclassid) + * @NFT_RT_NEXTHOP4: routing nexthop for IPv4 + * @NFT_RT_NEXTHOP6: routing nexthop for IPv6 + * @NFT_RT_TCPMSS: fetch current path tcp mss + * @NFT_RT_XFRM: boolean, skb->dst->xfrm != NULL + */ +enum nft_rt_keys { + NFT_RT_CLASSID, + NFT_RT_NEXTHOP4, + NFT_RT_NEXTHOP6, + NFT_RT_TCPMSS, + NFT_RT_XFRM, + __NFT_RT_MAX +}; +#define NFT_RT_MAX (__NFT_RT_MAX - 1) + +/** + * enum nft_hash_types - nf_tables hash expression types + * + * @NFT_HASH_JENKINS: Jenkins Hash + * @NFT_HASH_SYM: Symmetric Hash + */ +enum nft_hash_types { + NFT_HASH_JENKINS, + NFT_HASH_SYM, +}; + +/** + * enum nft_hash_attributes - nf_tables hash expression netlink attributes + * + * @NFTA_HASH_SREG: source register (NLA_U32) + * @NFTA_HASH_DREG: destination register (NLA_U32) + * @NFTA_HASH_LEN: source data length (NLA_U32) + * @NFTA_HASH_MODULUS: modulus value (NLA_U32) + * @NFTA_HASH_SEED: seed value (NLA_U32) + * @NFTA_HASH_OFFSET: add this offset value to hash result (NLA_U32) + * @NFTA_HASH_TYPE: hash operation (NLA_U32: nft_hash_types) + * @NFTA_HASH_SET_NAME: name of the map to lookup (NLA_STRING) + * @NFTA_HASH_SET_ID: id of the map (NLA_U32) + */ +enum nft_hash_attributes { + NFTA_HASH_UNSPEC, + NFTA_HASH_SREG, + NFTA_HASH_DREG, + NFTA_HASH_LEN, + NFTA_HASH_MODULUS, + NFTA_HASH_SEED, + NFTA_HASH_OFFSET, + NFTA_HASH_TYPE, + NFTA_HASH_SET_NAME, /* deprecated */ + NFTA_HASH_SET_ID, /* deprecated */ + __NFTA_HASH_MAX, +}; +#define NFTA_HASH_MAX (__NFTA_HASH_MAX - 1) + +/** + * enum nft_meta_attributes - nf_tables meta expression netlink attributes + * + * @NFTA_META_DREG: destination register (NLA_U32) + * @NFTA_META_KEY: meta data item to load (NLA_U32: nft_meta_keys) + * @NFTA_META_SREG: source register (NLA_U32) + */ +enum nft_meta_attributes { + NFTA_META_UNSPEC, + NFTA_META_DREG, + NFTA_META_KEY, + NFTA_META_SREG, + __NFTA_META_MAX +}; +#define NFTA_META_MAX (__NFTA_META_MAX - 1) + +/** + * enum nft_rt_attributes - nf_tables routing expression netlink attributes + * + * @NFTA_RT_DREG: destination register (NLA_U32) + * @NFTA_RT_KEY: routing data item to load (NLA_U32: nft_rt_keys) + */ +enum nft_rt_attributes { + NFTA_RT_UNSPEC, + NFTA_RT_DREG, + NFTA_RT_KEY, + __NFTA_RT_MAX +}; +#define NFTA_RT_MAX (__NFTA_RT_MAX - 1) + +/** + * enum nft_socket_attributes - nf_tables socket expression netlink attributes + * + * @NFTA_SOCKET_KEY: socket key to match + * @NFTA_SOCKET_DREG: destination register + * @NFTA_SOCKET_LEVEL: cgroups2 ancestor level (only for cgroupsv2) + */ +enum nft_socket_attributes { + NFTA_SOCKET_UNSPEC, + NFTA_SOCKET_KEY, + NFTA_SOCKET_DREG, + NFTA_SOCKET_LEVEL, + __NFTA_SOCKET_MAX +}; +#define NFTA_SOCKET_MAX (__NFTA_SOCKET_MAX - 1) + +/* + * enum nft_socket_keys - nf_tables socket expression keys + * + * @NFT_SOCKET_TRANSPARENT: Value of the IP(V6)_TRANSPARENT socket option + * @NFT_SOCKET_MARK: Value of the socket mark + * @NFT_SOCKET_WILDCARD: Whether the socket is zero-bound (e.g. 0.0.0.0 or ::0) + * @NFT_SOCKET_CGROUPV2: Match on cgroups version 2 + */ +enum nft_socket_keys { + NFT_SOCKET_TRANSPARENT, + NFT_SOCKET_MARK, + NFT_SOCKET_WILDCARD, + NFT_SOCKET_CGROUPV2, + __NFT_SOCKET_MAX +}; +#define NFT_SOCKET_MAX (__NFT_SOCKET_MAX - 1) + +/** + * enum nft_ct_keys - nf_tables ct expression keys + * + * @NFT_CT_STATE: conntrack state (bitmask of enum ip_conntrack_info) + * @NFT_CT_DIRECTION: conntrack direction (enum ip_conntrack_dir) + * @NFT_CT_STATUS: conntrack status (bitmask of enum ip_conntrack_status) + * @NFT_CT_MARK: conntrack mark value + * @NFT_CT_SECMARK: conntrack secmark value + * @NFT_CT_EXPIRATION: relative conntrack expiration time in ms + * @NFT_CT_HELPER: connection tracking helper assigned to conntrack + * @NFT_CT_L3PROTOCOL: conntrack layer 3 protocol + * @NFT_CT_SRC: conntrack layer 3 protocol source (IPv4/IPv6 address, deprecated) + * @NFT_CT_DST: conntrack layer 3 protocol destination (IPv4/IPv6 address, deprecated) + * @NFT_CT_PROTOCOL: conntrack layer 4 protocol + * @NFT_CT_PROTO_SRC: conntrack layer 4 protocol source + * @NFT_CT_PROTO_DST: conntrack layer 4 protocol destination + * @NFT_CT_LABELS: conntrack labels + * @NFT_CT_PKTS: conntrack packets + * @NFT_CT_BYTES: conntrack bytes + * @NFT_CT_AVGPKT: conntrack average bytes per packet + * @NFT_CT_ZONE: conntrack zone + * @NFT_CT_EVENTMASK: ctnetlink events to be generated for this conntrack + * @NFT_CT_SRC_IP: conntrack layer 3 protocol source (IPv4 address) + * @NFT_CT_DST_IP: conntrack layer 3 protocol destination (IPv4 address) + * @NFT_CT_SRC_IP6: conntrack layer 3 protocol source (IPv6 address) + * @NFT_CT_DST_IP6: conntrack layer 3 protocol destination (IPv6 address) + * @NFT_CT_ID: conntrack id + */ +enum nft_ct_keys { + NFT_CT_STATE, + NFT_CT_DIRECTION, + NFT_CT_STATUS, + NFT_CT_MARK, + NFT_CT_SECMARK, + NFT_CT_EXPIRATION, + NFT_CT_HELPER, + NFT_CT_L3PROTOCOL, + NFT_CT_SRC, + NFT_CT_DST, + NFT_CT_PROTOCOL, + NFT_CT_PROTO_SRC, + NFT_CT_PROTO_DST, + NFT_CT_LABELS, + NFT_CT_PKTS, + NFT_CT_BYTES, + NFT_CT_AVGPKT, + NFT_CT_ZONE, + NFT_CT_EVENTMASK, + NFT_CT_SRC_IP, + NFT_CT_DST_IP, + NFT_CT_SRC_IP6, + NFT_CT_DST_IP6, + NFT_CT_ID, + __NFT_CT_MAX +}; +#define NFT_CT_MAX (__NFT_CT_MAX - 1) + +/** + * enum nft_ct_attributes - nf_tables ct expression netlink attributes + * + * @NFTA_CT_DREG: destination register (NLA_U32) + * @NFTA_CT_KEY: conntrack data item to load (NLA_U32: nft_ct_keys) + * @NFTA_CT_DIRECTION: direction in case of directional keys (NLA_U8) + * @NFTA_CT_SREG: source register (NLA_U32) + */ +enum nft_ct_attributes { + NFTA_CT_UNSPEC, + NFTA_CT_DREG, + NFTA_CT_KEY, + NFTA_CT_DIRECTION, + NFTA_CT_SREG, + __NFTA_CT_MAX +}; +#define NFTA_CT_MAX (__NFTA_CT_MAX - 1) + +/** + * enum nft_flow_attributes - ct offload expression attributes + * @NFTA_FLOW_TABLE_NAME: flow table name (NLA_STRING) + */ +enum nft_offload_attributes { + NFTA_FLOW_UNSPEC, + NFTA_FLOW_TABLE_NAME, + __NFTA_FLOW_MAX, +}; +#define NFTA_FLOW_MAX (__NFTA_FLOW_MAX - 1) + +enum nft_limit_type { + NFT_LIMIT_PKTS, + NFT_LIMIT_PKT_BYTES +}; + +enum nft_limit_flags { + NFT_LIMIT_F_INV = (1 << 0), +}; + +/** + * enum nft_limit_attributes - nf_tables limit expression netlink attributes + * + * @NFTA_LIMIT_RATE: refill rate (NLA_U64) + * @NFTA_LIMIT_UNIT: refill unit (NLA_U64) + * @NFTA_LIMIT_BURST: burst (NLA_U32) + * @NFTA_LIMIT_TYPE: type of limit (NLA_U32: enum nft_limit_type) + * @NFTA_LIMIT_FLAGS: flags (NLA_U32: enum nft_limit_flags) + */ +enum nft_limit_attributes { + NFTA_LIMIT_UNSPEC, + NFTA_LIMIT_RATE, + NFTA_LIMIT_UNIT, + NFTA_LIMIT_BURST, + NFTA_LIMIT_TYPE, + NFTA_LIMIT_FLAGS, + NFTA_LIMIT_PAD, + __NFTA_LIMIT_MAX +}; +#define NFTA_LIMIT_MAX (__NFTA_LIMIT_MAX - 1) + +enum nft_connlimit_flags { + NFT_CONNLIMIT_F_INV = (1 << 0), +}; + +/** + * enum nft_connlimit_attributes - nf_tables connlimit expression netlink attributes + * + * @NFTA_CONNLIMIT_COUNT: number of connections (NLA_U32) + * @NFTA_CONNLIMIT_FLAGS: flags (NLA_U32: enum nft_connlimit_flags) + */ +enum nft_connlimit_attributes { + NFTA_CONNLIMIT_UNSPEC, + NFTA_CONNLIMIT_COUNT, + NFTA_CONNLIMIT_FLAGS, + __NFTA_CONNLIMIT_MAX +}; +#define NFTA_CONNLIMIT_MAX (__NFTA_CONNLIMIT_MAX - 1) + +/** + * enum nft_counter_attributes - nf_tables counter expression netlink attributes + * + * @NFTA_COUNTER_BYTES: number of bytes (NLA_U64) + * @NFTA_COUNTER_PACKETS: number of packets (NLA_U64) + */ +enum nft_counter_attributes { + NFTA_COUNTER_UNSPEC, + NFTA_COUNTER_BYTES, + NFTA_COUNTER_PACKETS, + NFTA_COUNTER_PAD, + __NFTA_COUNTER_MAX +}; +#define NFTA_COUNTER_MAX (__NFTA_COUNTER_MAX - 1) + +/** + * enum nft_last_attributes - nf_tables last expression netlink attributes + * + * @NFTA_LAST_SET: last update has been set, zero means never updated (NLA_U32) + * @NFTA_LAST_MSECS: milliseconds since last update (NLA_U64) + */ +enum nft_last_attributes { + NFTA_LAST_UNSPEC, + NFTA_LAST_SET, + NFTA_LAST_MSECS, + NFTA_LAST_PAD, + __NFTA_LAST_MAX +}; +#define NFTA_LAST_MAX (__NFTA_LAST_MAX - 1) + +/** + * enum nft_log_attributes - nf_tables log expression netlink attributes + * + * @NFTA_LOG_GROUP: netlink group to send messages to (NLA_U32) + * @NFTA_LOG_PREFIX: prefix to prepend to log messages (NLA_STRING) + * @NFTA_LOG_SNAPLEN: length of payload to include in netlink message (NLA_U32) + * @NFTA_LOG_QTHRESHOLD: queue threshold (NLA_U32) + * @NFTA_LOG_LEVEL: log level (NLA_U32) + * @NFTA_LOG_FLAGS: logging flags (NLA_U32) + */ +enum nft_log_attributes { + NFTA_LOG_UNSPEC, + NFTA_LOG_GROUP, + NFTA_LOG_PREFIX, + NFTA_LOG_SNAPLEN, + NFTA_LOG_QTHRESHOLD, + NFTA_LOG_LEVEL, + NFTA_LOG_FLAGS, + __NFTA_LOG_MAX +}; +#define NFTA_LOG_MAX (__NFTA_LOG_MAX - 1) + +/** + * enum nft_log_level - nf_tables log levels + * + * @NFT_LOGLEVEL_EMERG: system is unusable + * @NFT_LOGLEVEL_ALERT: action must be taken immediately + * @NFT_LOGLEVEL_CRIT: critical conditions + * @NFT_LOGLEVEL_ERR: error conditions + * @NFT_LOGLEVEL_WARNING: warning conditions + * @NFT_LOGLEVEL_NOTICE: normal but significant condition + * @NFT_LOGLEVEL_INFO: informational + * @NFT_LOGLEVEL_DEBUG: debug-level messages + * @NFT_LOGLEVEL_AUDIT: enabling audit logging + */ +enum nft_log_level { + NFT_LOGLEVEL_EMERG, + NFT_LOGLEVEL_ALERT, + NFT_LOGLEVEL_CRIT, + NFT_LOGLEVEL_ERR, + NFT_LOGLEVEL_WARNING, + NFT_LOGLEVEL_NOTICE, + NFT_LOGLEVEL_INFO, + NFT_LOGLEVEL_DEBUG, + NFT_LOGLEVEL_AUDIT, + __NFT_LOGLEVEL_MAX +}; +#define NFT_LOGLEVEL_MAX (__NFT_LOGLEVEL_MAX - 1) + +/** + * enum nft_queue_attributes - nf_tables queue expression netlink attributes + * + * @NFTA_QUEUE_NUM: netlink queue to send messages to (NLA_U16) + * @NFTA_QUEUE_TOTAL: number of queues to load balance packets on (NLA_U16) + * @NFTA_QUEUE_FLAGS: various flags (NLA_U16) + * @NFTA_QUEUE_SREG_QNUM: source register of queue number (NLA_U32: nft_registers) + */ +enum nft_queue_attributes { + NFTA_QUEUE_UNSPEC, + NFTA_QUEUE_NUM, + NFTA_QUEUE_TOTAL, + NFTA_QUEUE_FLAGS, + NFTA_QUEUE_SREG_QNUM, + __NFTA_QUEUE_MAX +}; +#define NFTA_QUEUE_MAX (__NFTA_QUEUE_MAX - 1) + +#define NFT_QUEUE_FLAG_BYPASS 0x01 /* for compatibility with v2 */ +#define NFT_QUEUE_FLAG_CPU_FANOUT 0x02 /* use current CPU (no hashing) */ +#define NFT_QUEUE_FLAG_MASK 0x03 + +enum nft_quota_flags { + NFT_QUOTA_F_INV = (1 << 0), + NFT_QUOTA_F_DEPLETED = (1 << 1), +}; + +/** + * enum nft_quota_attributes - nf_tables quota expression netlink attributes + * + * @NFTA_QUOTA_BYTES: quota in bytes (NLA_U16) + * @NFTA_QUOTA_FLAGS: flags (NLA_U32) + * @NFTA_QUOTA_CONSUMED: quota already consumed in bytes (NLA_U64) + */ +enum nft_quota_attributes { + NFTA_QUOTA_UNSPEC, + NFTA_QUOTA_BYTES, + NFTA_QUOTA_FLAGS, + NFTA_QUOTA_PAD, + NFTA_QUOTA_CONSUMED, + __NFTA_QUOTA_MAX +}; +#define NFTA_QUOTA_MAX (__NFTA_QUOTA_MAX - 1) + +/** + * enum nft_secmark_attributes - nf_tables secmark object netlink attributes + * + * @NFTA_SECMARK_CTX: security context (NLA_STRING) + */ +enum nft_secmark_attributes { + NFTA_SECMARK_UNSPEC, + NFTA_SECMARK_CTX, + __NFTA_SECMARK_MAX, +}; +#define NFTA_SECMARK_MAX (__NFTA_SECMARK_MAX - 1) + +/* Max security context length */ +#define NFT_SECMARK_CTX_MAXLEN 256 + +/** + * enum nft_reject_types - nf_tables reject expression reject types + * + * @NFT_REJECT_ICMP_UNREACH: reject using ICMP unreachable + * @NFT_REJECT_TCP_RST: reject using TCP RST + * @NFT_REJECT_ICMPX_UNREACH: abstracted ICMP unreachable for bridge and inet + */ +enum nft_reject_types { + NFT_REJECT_ICMP_UNREACH, + NFT_REJECT_TCP_RST, + NFT_REJECT_ICMPX_UNREACH, +}; + +/** + * enum nft_reject_code - Generic reject codes for IPv4/IPv6 + * + * @NFT_REJECT_ICMPX_NO_ROUTE: no route to host / network unreachable + * @NFT_REJECT_ICMPX_PORT_UNREACH: port unreachable + * @NFT_REJECT_ICMPX_HOST_UNREACH: host unreachable + * @NFT_REJECT_ICMPX_ADMIN_PROHIBITED: administratively prohibited + * + * These codes are mapped to real ICMP and ICMPv6 codes. + */ +enum nft_reject_inet_code { + NFT_REJECT_ICMPX_NO_ROUTE = 0, + NFT_REJECT_ICMPX_PORT_UNREACH, + NFT_REJECT_ICMPX_HOST_UNREACH, + NFT_REJECT_ICMPX_ADMIN_PROHIBITED, + __NFT_REJECT_ICMPX_MAX +}; +#define NFT_REJECT_ICMPX_MAX (__NFT_REJECT_ICMPX_MAX - 1) + +/** + * enum nft_reject_attributes - nf_tables reject expression netlink attributes + * + * @NFTA_REJECT_TYPE: packet type to use (NLA_U32: nft_reject_types) + * @NFTA_REJECT_ICMP_CODE: ICMP code to use (NLA_U8) + */ +enum nft_reject_attributes { + NFTA_REJECT_UNSPEC, + NFTA_REJECT_TYPE, + NFTA_REJECT_ICMP_CODE, + __NFTA_REJECT_MAX +}; +#define NFTA_REJECT_MAX (__NFTA_REJECT_MAX - 1) + +/** + * enum nft_nat_types - nf_tables nat expression NAT types + * + * @NFT_NAT_SNAT: source NAT + * @NFT_NAT_DNAT: destination NAT + */ +enum nft_nat_types { + NFT_NAT_SNAT, + NFT_NAT_DNAT, +}; + +/** + * enum nft_nat_attributes - nf_tables nat expression netlink attributes + * + * @NFTA_NAT_TYPE: NAT type (NLA_U32: nft_nat_types) + * @NFTA_NAT_FAMILY: NAT family (NLA_U32) + * @NFTA_NAT_REG_ADDR_MIN: source register of address range start (NLA_U32: nft_registers) + * @NFTA_NAT_REG_ADDR_MAX: source register of address range end (NLA_U32: nft_registers) + * @NFTA_NAT_REG_PROTO_MIN: source register of proto range start (NLA_U32: nft_registers) + * @NFTA_NAT_REG_PROTO_MAX: source register of proto range end (NLA_U32: nft_registers) + * @NFTA_NAT_FLAGS: NAT flags (see NF_NAT_RANGE_* in linux/netfilter/nf_nat.h) (NLA_U32) + */ +enum nft_nat_attributes { + NFTA_NAT_UNSPEC, + NFTA_NAT_TYPE, + NFTA_NAT_FAMILY, + NFTA_NAT_REG_ADDR_MIN, + NFTA_NAT_REG_ADDR_MAX, + NFTA_NAT_REG_PROTO_MIN, + NFTA_NAT_REG_PROTO_MAX, + NFTA_NAT_FLAGS, + __NFTA_NAT_MAX +}; +#define NFTA_NAT_MAX (__NFTA_NAT_MAX - 1) + +/** + * enum nft_tproxy_attributes - nf_tables tproxy expression netlink attributes + * + * NFTA_TPROXY_FAMILY: Target address family (NLA_U32: nft_registers) + * NFTA_TPROXY_REG_ADDR: Target address register (NLA_U32: nft_registers) + * NFTA_TPROXY_REG_PORT: Target port register (NLA_U32: nft_registers) + */ +enum nft_tproxy_attributes { + NFTA_TPROXY_UNSPEC, + NFTA_TPROXY_FAMILY, + NFTA_TPROXY_REG_ADDR, + NFTA_TPROXY_REG_PORT, + __NFTA_TPROXY_MAX +}; +#define NFTA_TPROXY_MAX (__NFTA_TPROXY_MAX - 1) + +/** + * enum nft_masq_attributes - nf_tables masquerade expression attributes + * + * @NFTA_MASQ_FLAGS: NAT flags (see NF_NAT_RANGE_* in linux/netfilter/nf_nat.h) (NLA_U32) + * @NFTA_MASQ_REG_PROTO_MIN: source register of proto range start (NLA_U32: nft_registers) + * @NFTA_MASQ_REG_PROTO_MAX: source register of proto range end (NLA_U32: nft_registers) + */ +enum nft_masq_attributes { + NFTA_MASQ_UNSPEC, + NFTA_MASQ_FLAGS, + NFTA_MASQ_REG_PROTO_MIN, + NFTA_MASQ_REG_PROTO_MAX, + __NFTA_MASQ_MAX +}; +#define NFTA_MASQ_MAX (__NFTA_MASQ_MAX - 1) + +/** + * enum nft_redir_attributes - nf_tables redirect expression netlink attributes + * + * @NFTA_REDIR_REG_PROTO_MIN: source register of proto range start (NLA_U32: nft_registers) + * @NFTA_REDIR_REG_PROTO_MAX: source register of proto range end (NLA_U32: nft_registers) + * @NFTA_REDIR_FLAGS: NAT flags (see NF_NAT_RANGE_* in linux/netfilter/nf_nat.h) (NLA_U32) + */ +enum nft_redir_attributes { + NFTA_REDIR_UNSPEC, + NFTA_REDIR_REG_PROTO_MIN, + NFTA_REDIR_REG_PROTO_MAX, + NFTA_REDIR_FLAGS, + __NFTA_REDIR_MAX +}; +#define NFTA_REDIR_MAX (__NFTA_REDIR_MAX - 1) + +/** + * enum nft_dup_attributes - nf_tables dup expression netlink attributes + * + * @NFTA_DUP_SREG_ADDR: source register of address (NLA_U32: nft_registers) + * @NFTA_DUP_SREG_DEV: source register of output interface (NLA_U32: nft_register) + */ +enum nft_dup_attributes { + NFTA_DUP_UNSPEC, + NFTA_DUP_SREG_ADDR, + NFTA_DUP_SREG_DEV, + __NFTA_DUP_MAX +}; +#define NFTA_DUP_MAX (__NFTA_DUP_MAX - 1) + +/** + * enum nft_fwd_attributes - nf_tables fwd expression netlink attributes + * + * @NFTA_FWD_SREG_DEV: source register of output interface (NLA_U32: nft_register) + * @NFTA_FWD_SREG_ADDR: source register of destination address (NLA_U32: nft_register) + * @NFTA_FWD_NFPROTO: layer 3 family of source register address (NLA_U32: enum nfproto) + */ +enum nft_fwd_attributes { + NFTA_FWD_UNSPEC, + NFTA_FWD_SREG_DEV, + NFTA_FWD_SREG_ADDR, + NFTA_FWD_NFPROTO, + __NFTA_FWD_MAX +}; +#define NFTA_FWD_MAX (__NFTA_FWD_MAX - 1) + +/** + * enum nft_objref_attributes - nf_tables stateful object expression netlink attributes + * + * @NFTA_OBJREF_IMM_TYPE: object type for immediate reference (NLA_U32: nft_register) + * @NFTA_OBJREF_IMM_NAME: object name for immediate reference (NLA_STRING) + * @NFTA_OBJREF_SET_SREG: source register of the data to look for (NLA_U32: nft_registers) + * @NFTA_OBJREF_SET_NAME: name of the set where to look for (NLA_STRING) + * @NFTA_OBJREF_SET_ID: id of the set where to look for in this transaction (NLA_U32) + */ +enum nft_objref_attributes { + NFTA_OBJREF_UNSPEC, + NFTA_OBJREF_IMM_TYPE, + NFTA_OBJREF_IMM_NAME, + NFTA_OBJREF_SET_SREG, + NFTA_OBJREF_SET_NAME, + NFTA_OBJREF_SET_ID, + __NFTA_OBJREF_MAX +}; +#define NFTA_OBJREF_MAX (__NFTA_OBJREF_MAX - 1) + +/** + * enum nft_gen_attributes - nf_tables ruleset generation attributes + * + * @NFTA_GEN_ID: Ruleset generation ID (NLA_U32) + */ +enum nft_gen_attributes { + NFTA_GEN_UNSPEC, + NFTA_GEN_ID, + NFTA_GEN_PROC_PID, + NFTA_GEN_PROC_NAME, + __NFTA_GEN_MAX +}; +#define NFTA_GEN_MAX (__NFTA_GEN_MAX - 1) + +/* + * enum nft_fib_attributes - nf_tables fib expression netlink attributes + * + * @NFTA_FIB_DREG: destination register (NLA_U32) + * @NFTA_FIB_RESULT: desired result (NLA_U32) + * @NFTA_FIB_FLAGS: flowi fields to initialize when querying the FIB (NLA_U32) + * + * The FIB expression performs a route lookup according + * to the packet data. + */ +enum nft_fib_attributes { + NFTA_FIB_UNSPEC, + NFTA_FIB_DREG, + NFTA_FIB_RESULT, + NFTA_FIB_FLAGS, + __NFTA_FIB_MAX +}; +#define NFTA_FIB_MAX (__NFTA_FIB_MAX - 1) + +enum nft_fib_result { + NFT_FIB_RESULT_UNSPEC, + NFT_FIB_RESULT_OIF, + NFT_FIB_RESULT_OIFNAME, + NFT_FIB_RESULT_ADDRTYPE, + __NFT_FIB_RESULT_MAX +}; +#define NFT_FIB_RESULT_MAX (__NFT_FIB_RESULT_MAX - 1) + +enum nft_fib_flags { + NFTA_FIB_F_SADDR = 1 << 0, /* look up src */ + NFTA_FIB_F_DADDR = 1 << 1, /* look up dst */ + NFTA_FIB_F_MARK = 1 << 2, /* use skb->mark */ + NFTA_FIB_F_IIF = 1 << 3, /* restrict to iif */ + NFTA_FIB_F_OIF = 1 << 4, /* restrict to oif */ + NFTA_FIB_F_PRESENT = 1 << 5, /* check existence only */ +}; + +enum nft_ct_helper_attributes { + NFTA_CT_HELPER_UNSPEC, + NFTA_CT_HELPER_NAME, + NFTA_CT_HELPER_L3PROTO, + NFTA_CT_HELPER_L4PROTO, + __NFTA_CT_HELPER_MAX, +}; +#define NFTA_CT_HELPER_MAX (__NFTA_CT_HELPER_MAX - 1) + +enum nft_ct_timeout_timeout_attributes { + NFTA_CT_TIMEOUT_UNSPEC, + NFTA_CT_TIMEOUT_L3PROTO, + NFTA_CT_TIMEOUT_L4PROTO, + NFTA_CT_TIMEOUT_DATA, + __NFTA_CT_TIMEOUT_MAX, +}; +#define NFTA_CT_TIMEOUT_MAX (__NFTA_CT_TIMEOUT_MAX - 1) + +enum nft_ct_expectation_attributes { + NFTA_CT_EXPECT_UNSPEC, + NFTA_CT_EXPECT_L3PROTO, + NFTA_CT_EXPECT_L4PROTO, + NFTA_CT_EXPECT_DPORT, + NFTA_CT_EXPECT_TIMEOUT, + NFTA_CT_EXPECT_SIZE, + __NFTA_CT_EXPECT_MAX, +}; +#define NFTA_CT_EXPECT_MAX (__NFTA_CT_EXPECT_MAX - 1) + +#define NFT_OBJECT_UNSPEC 0 +#define NFT_OBJECT_COUNTER 1 +#define NFT_OBJECT_QUOTA 2 +#define NFT_OBJECT_CT_HELPER 3 +#define NFT_OBJECT_LIMIT 4 +#define NFT_OBJECT_CONNLIMIT 5 +#define NFT_OBJECT_TUNNEL 6 +#define NFT_OBJECT_CT_TIMEOUT 7 +#define NFT_OBJECT_SECMARK 8 +#define NFT_OBJECT_CT_EXPECT 9 +#define NFT_OBJECT_SYNPROXY 10 +#define __NFT_OBJECT_MAX 11 +#define NFT_OBJECT_MAX (__NFT_OBJECT_MAX - 1) + +/** + * enum nft_object_attributes - nf_tables stateful object netlink attributes + * + * @NFTA_OBJ_TABLE: name of the table containing the expression (NLA_STRING) + * @NFTA_OBJ_NAME: name of this expression type (NLA_STRING) + * @NFTA_OBJ_TYPE: stateful object type (NLA_U32) + * @NFTA_OBJ_DATA: stateful object data (NLA_NESTED) + * @NFTA_OBJ_USE: number of references to this expression (NLA_U32) + * @NFTA_OBJ_HANDLE: object handle (NLA_U64) + * @NFTA_OBJ_USERDATA: user data (NLA_BINARY) + */ +enum nft_object_attributes { + NFTA_OBJ_UNSPEC, + NFTA_OBJ_TABLE, + NFTA_OBJ_NAME, + NFTA_OBJ_TYPE, + NFTA_OBJ_DATA, + NFTA_OBJ_USE, + NFTA_OBJ_HANDLE, + NFTA_OBJ_PAD, + NFTA_OBJ_USERDATA, + __NFTA_OBJ_MAX +}; +#define NFTA_OBJ_MAX (__NFTA_OBJ_MAX - 1) + +/** + * enum nft_flowtable_flags - nf_tables flowtable flags + * + * @NFT_FLOWTABLE_HW_OFFLOAD: flowtable hardware offload is enabled + * @NFT_FLOWTABLE_COUNTER: enable flow counters + */ +enum nft_flowtable_flags { + NFT_FLOWTABLE_HW_OFFLOAD = 0x1, + NFT_FLOWTABLE_COUNTER = 0x2, + NFT_FLOWTABLE_MASK = (NFT_FLOWTABLE_HW_OFFLOAD | + NFT_FLOWTABLE_COUNTER) +}; + +/** + * enum nft_flowtable_attributes - nf_tables flow table netlink attributes + * + * @NFTA_FLOWTABLE_TABLE: name of the table containing the expression (NLA_STRING) + * @NFTA_FLOWTABLE_NAME: name of this flow table (NLA_STRING) + * @NFTA_FLOWTABLE_HOOK: netfilter hook configuration(NLA_U32) + * @NFTA_FLOWTABLE_USE: number of references to this flow table (NLA_U32) + * @NFTA_FLOWTABLE_HANDLE: object handle (NLA_U64) + * @NFTA_FLOWTABLE_FLAGS: flags (NLA_U32) + */ +enum nft_flowtable_attributes { + NFTA_FLOWTABLE_UNSPEC, + NFTA_FLOWTABLE_TABLE, + NFTA_FLOWTABLE_NAME, + NFTA_FLOWTABLE_HOOK, + NFTA_FLOWTABLE_USE, + NFTA_FLOWTABLE_HANDLE, + NFTA_FLOWTABLE_PAD, + NFTA_FLOWTABLE_FLAGS, + __NFTA_FLOWTABLE_MAX +}; +#define NFTA_FLOWTABLE_MAX (__NFTA_FLOWTABLE_MAX - 1) + +/** + * enum nft_flowtable_hook_attributes - nf_tables flow table hook netlink attributes + * + * @NFTA_FLOWTABLE_HOOK_NUM: netfilter hook number (NLA_U32) + * @NFTA_FLOWTABLE_HOOK_PRIORITY: netfilter hook priority (NLA_U32) + * @NFTA_FLOWTABLE_HOOK_DEVS: input devices this flow table is bound to (NLA_NESTED) + */ +enum nft_flowtable_hook_attributes { + NFTA_FLOWTABLE_HOOK_UNSPEC, + NFTA_FLOWTABLE_HOOK_NUM, + NFTA_FLOWTABLE_HOOK_PRIORITY, + NFTA_FLOWTABLE_HOOK_DEVS, + __NFTA_FLOWTABLE_HOOK_MAX +}; +#define NFTA_FLOWTABLE_HOOK_MAX (__NFTA_FLOWTABLE_HOOK_MAX - 1) + +/** + * enum nft_osf_attributes - nftables osf expression netlink attributes + * + * @NFTA_OSF_DREG: destination register (NLA_U32: nft_registers) + * @NFTA_OSF_TTL: Value of the TTL osf option (NLA_U8) + * @NFTA_OSF_FLAGS: flags (NLA_U32) + */ +enum nft_osf_attributes { + NFTA_OSF_UNSPEC, + NFTA_OSF_DREG, + NFTA_OSF_TTL, + NFTA_OSF_FLAGS, + __NFTA_OSF_MAX, +}; +#define NFTA_OSF_MAX (__NFTA_OSF_MAX - 1) + +enum nft_osf_flags { + NFT_OSF_F_VERSION = (1 << 0), +}; + +/** + * enum nft_synproxy_attributes - nf_tables synproxy expression netlink attributes + * + * @NFTA_SYNPROXY_MSS: mss value sent to the backend (NLA_U16) + * @NFTA_SYNPROXY_WSCALE: wscale value sent to the backend (NLA_U8) + * @NFTA_SYNPROXY_FLAGS: flags (NLA_U32) + */ +enum nft_synproxy_attributes { + NFTA_SYNPROXY_UNSPEC, + NFTA_SYNPROXY_MSS, + NFTA_SYNPROXY_WSCALE, + NFTA_SYNPROXY_FLAGS, + __NFTA_SYNPROXY_MAX, +}; +#define NFTA_SYNPROXY_MAX (__NFTA_SYNPROXY_MAX - 1) + +/** + * enum nft_device_attributes - nf_tables device netlink attributes + * + * @NFTA_DEVICE_NAME: name of this device (NLA_STRING) + */ +enum nft_devices_attributes { + NFTA_DEVICE_UNSPEC, + NFTA_DEVICE_NAME, + __NFTA_DEVICE_MAX +}; +#define NFTA_DEVICE_MAX (__NFTA_DEVICE_MAX - 1) + +/* + * enum nft_xfrm_attributes - nf_tables xfrm expr netlink attributes + * + * @NFTA_XFRM_DREG: destination register (NLA_U32) + * @NFTA_XFRM_KEY: enum nft_xfrm_keys (NLA_U32) + * @NFTA_XFRM_DIR: direction (NLA_U8) + * @NFTA_XFRM_SPNUM: index in secpath array (NLA_U32) + */ +enum nft_xfrm_attributes { + NFTA_XFRM_UNSPEC, + NFTA_XFRM_DREG, + NFTA_XFRM_KEY, + NFTA_XFRM_DIR, + NFTA_XFRM_SPNUM, + __NFTA_XFRM_MAX +}; +#define NFTA_XFRM_MAX (__NFTA_XFRM_MAX - 1) + +enum nft_xfrm_keys { + NFT_XFRM_KEY_UNSPEC, + NFT_XFRM_KEY_DADDR_IP4, + NFT_XFRM_KEY_DADDR_IP6, + NFT_XFRM_KEY_SADDR_IP4, + NFT_XFRM_KEY_SADDR_IP6, + NFT_XFRM_KEY_REQID, + NFT_XFRM_KEY_SPI, + __NFT_XFRM_KEY_MAX, +}; +#define NFT_XFRM_KEY_MAX (__NFT_XFRM_KEY_MAX - 1) + +/** + * enum nft_trace_attributes - nf_tables trace netlink attributes + * + * @NFTA_TRACE_TABLE: name of the table (NLA_STRING) + * @NFTA_TRACE_CHAIN: name of the chain (NLA_STRING) + * @NFTA_TRACE_RULE_HANDLE: numeric handle of the rule (NLA_U64) + * @NFTA_TRACE_TYPE: type of the event (NLA_U32: nft_trace_types) + * @NFTA_TRACE_VERDICT: verdict returned by hook (NLA_NESTED: nft_verdicts) + * @NFTA_TRACE_ID: pseudo-id, same for each skb traced (NLA_U32) + * @NFTA_TRACE_LL_HEADER: linklayer header (NLA_BINARY) + * @NFTA_TRACE_NETWORK_HEADER: network header (NLA_BINARY) + * @NFTA_TRACE_TRANSPORT_HEADER: transport header (NLA_BINARY) + * @NFTA_TRACE_IIF: indev ifindex (NLA_U32) + * @NFTA_TRACE_IIFTYPE: netdev->type of indev (NLA_U16) + * @NFTA_TRACE_OIF: outdev ifindex (NLA_U32) + * @NFTA_TRACE_OIFTYPE: netdev->type of outdev (NLA_U16) + * @NFTA_TRACE_MARK: nfmark (NLA_U32) + * @NFTA_TRACE_NFPROTO: nf protocol processed (NLA_U32) + * @NFTA_TRACE_POLICY: policy that decided fate of packet (NLA_U32) + */ +enum nft_trace_attributes { + NFTA_TRACE_UNSPEC, + NFTA_TRACE_TABLE, + NFTA_TRACE_CHAIN, + NFTA_TRACE_RULE_HANDLE, + NFTA_TRACE_TYPE, + NFTA_TRACE_VERDICT, + NFTA_TRACE_ID, + NFTA_TRACE_LL_HEADER, + NFTA_TRACE_NETWORK_HEADER, + NFTA_TRACE_TRANSPORT_HEADER, + NFTA_TRACE_IIF, + NFTA_TRACE_IIFTYPE, + NFTA_TRACE_OIF, + NFTA_TRACE_OIFTYPE, + NFTA_TRACE_MARK, + NFTA_TRACE_NFPROTO, + NFTA_TRACE_POLICY, + NFTA_TRACE_PAD, + __NFTA_TRACE_MAX +}; +#define NFTA_TRACE_MAX (__NFTA_TRACE_MAX - 1) + +enum nft_trace_types { + NFT_TRACETYPE_UNSPEC, + NFT_TRACETYPE_POLICY, + NFT_TRACETYPE_RETURN, + NFT_TRACETYPE_RULE, + __NFT_TRACETYPE_MAX +}; +#define NFT_TRACETYPE_MAX (__NFT_TRACETYPE_MAX - 1) + +/** + * enum nft_ng_attributes - nf_tables number generator expression netlink attributes + * + * @NFTA_NG_DREG: destination register (NLA_U32) + * @NFTA_NG_MODULUS: maximum counter value (NLA_U32) + * @NFTA_NG_TYPE: operation type (NLA_U32) + * @NFTA_NG_OFFSET: offset to be added to the counter (NLA_U32) + * @NFTA_NG_SET_NAME: name of the map to lookup (NLA_STRING) + * @NFTA_NG_SET_ID: id of the map (NLA_U32) + */ +enum nft_ng_attributes { + NFTA_NG_UNSPEC, + NFTA_NG_DREG, + NFTA_NG_MODULUS, + NFTA_NG_TYPE, + NFTA_NG_OFFSET, + NFTA_NG_SET_NAME, /* deprecated */ + NFTA_NG_SET_ID, /* deprecated */ + __NFTA_NG_MAX +}; +#define NFTA_NG_MAX (__NFTA_NG_MAX - 1) + +enum nft_ng_types { + NFT_NG_INCREMENTAL, + NFT_NG_RANDOM, + __NFT_NG_MAX +}; +#define NFT_NG_MAX (__NFT_NG_MAX - 1) + +enum nft_tunnel_key_ip_attributes { + NFTA_TUNNEL_KEY_IP_UNSPEC, + NFTA_TUNNEL_KEY_IP_SRC, + NFTA_TUNNEL_KEY_IP_DST, + __NFTA_TUNNEL_KEY_IP_MAX +}; +#define NFTA_TUNNEL_KEY_IP_MAX (__NFTA_TUNNEL_KEY_IP_MAX - 1) + +enum nft_tunnel_ip6_attributes { + NFTA_TUNNEL_KEY_IP6_UNSPEC, + NFTA_TUNNEL_KEY_IP6_SRC, + NFTA_TUNNEL_KEY_IP6_DST, + NFTA_TUNNEL_KEY_IP6_FLOWLABEL, + __NFTA_TUNNEL_KEY_IP6_MAX +}; +#define NFTA_TUNNEL_KEY_IP6_MAX (__NFTA_TUNNEL_KEY_IP6_MAX - 1) + +enum nft_tunnel_opts_attributes { + NFTA_TUNNEL_KEY_OPTS_UNSPEC, + NFTA_TUNNEL_KEY_OPTS_VXLAN, + NFTA_TUNNEL_KEY_OPTS_ERSPAN, + NFTA_TUNNEL_KEY_OPTS_GENEVE, + __NFTA_TUNNEL_KEY_OPTS_MAX +}; +#define NFTA_TUNNEL_KEY_OPTS_MAX (__NFTA_TUNNEL_KEY_OPTS_MAX - 1) + +enum nft_tunnel_opts_vxlan_attributes { + NFTA_TUNNEL_KEY_VXLAN_UNSPEC, + NFTA_TUNNEL_KEY_VXLAN_GBP, + __NFTA_TUNNEL_KEY_VXLAN_MAX +}; +#define NFTA_TUNNEL_KEY_VXLAN_MAX (__NFTA_TUNNEL_KEY_VXLAN_MAX - 1) + +enum nft_tunnel_opts_erspan_attributes { + NFTA_TUNNEL_KEY_ERSPAN_UNSPEC, + NFTA_TUNNEL_KEY_ERSPAN_VERSION, + NFTA_TUNNEL_KEY_ERSPAN_V1_INDEX, + NFTA_TUNNEL_KEY_ERSPAN_V2_HWID, + NFTA_TUNNEL_KEY_ERSPAN_V2_DIR, + __NFTA_TUNNEL_KEY_ERSPAN_MAX +}; +#define NFTA_TUNNEL_KEY_ERSPAN_MAX (__NFTA_TUNNEL_KEY_ERSPAN_MAX - 1) + +enum nft_tunnel_opts_geneve_attributes { + NFTA_TUNNEL_KEY_GENEVE_UNSPEC, + NFTA_TUNNEL_KEY_GENEVE_CLASS, + NFTA_TUNNEL_KEY_GENEVE_TYPE, + NFTA_TUNNEL_KEY_GENEVE_DATA, + __NFTA_TUNNEL_KEY_GENEVE_MAX +}; +#define NFTA_TUNNEL_KEY_GENEVE_MAX (__NFTA_TUNNEL_KEY_GENEVE_MAX - 1) + +enum nft_tunnel_flags { + NFT_TUNNEL_F_ZERO_CSUM_TX = (1 << 0), + NFT_TUNNEL_F_DONT_FRAGMENT = (1 << 1), + NFT_TUNNEL_F_SEQ_NUMBER = (1 << 2), +}; +#define NFT_TUNNEL_F_MASK (NFT_TUNNEL_F_ZERO_CSUM_TX | \ + NFT_TUNNEL_F_DONT_FRAGMENT | \ + NFT_TUNNEL_F_SEQ_NUMBER) + +enum nft_tunnel_key_attributes { + NFTA_TUNNEL_KEY_UNSPEC, + NFTA_TUNNEL_KEY_ID, + NFTA_TUNNEL_KEY_IP, + NFTA_TUNNEL_KEY_IP6, + NFTA_TUNNEL_KEY_FLAGS, + NFTA_TUNNEL_KEY_TOS, + NFTA_TUNNEL_KEY_TTL, + NFTA_TUNNEL_KEY_SPORT, + NFTA_TUNNEL_KEY_DPORT, + NFTA_TUNNEL_KEY_OPTS, + __NFTA_TUNNEL_KEY_MAX +}; +#define NFTA_TUNNEL_KEY_MAX (__NFTA_TUNNEL_KEY_MAX - 1) + +enum nft_tunnel_keys { + NFT_TUNNEL_PATH, + NFT_TUNNEL_ID, + __NFT_TUNNEL_MAX +}; +#define NFT_TUNNEL_MAX (__NFT_TUNNEL_MAX - 1) + +enum nft_tunnel_mode { + NFT_TUNNEL_MODE_NONE, + NFT_TUNNEL_MODE_RX, + NFT_TUNNEL_MODE_TX, + __NFT_TUNNEL_MODE_MAX +}; +#define NFT_TUNNEL_MODE_MAX (__NFT_TUNNEL_MODE_MAX - 1) + +enum nft_tunnel_attributes { + NFTA_TUNNEL_UNSPEC, + NFTA_TUNNEL_KEY, + NFTA_TUNNEL_DREG, + NFTA_TUNNEL_MODE, + __NFTA_TUNNEL_MAX +}; +#define NFTA_TUNNEL_MAX (__NFTA_TUNNEL_MAX - 1) + +#endif /* _LINUX_NF_TABLES_H */ diff --git a/src/basic/linux/netfilter/nfnetlink.h b/src/basic/linux/netfilter/nfnetlink.h new file mode 100644 index 0000000..6cd58cd --- /dev/null +++ b/src/basic/linux/netfilter/nfnetlink.h @@ -0,0 +1,82 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +#ifndef _UAPI_NFNETLINK_H +#define _UAPI_NFNETLINK_H +#include +#include + +enum nfnetlink_groups { + NFNLGRP_NONE, +#define NFNLGRP_NONE NFNLGRP_NONE + NFNLGRP_CONNTRACK_NEW, +#define NFNLGRP_CONNTRACK_NEW NFNLGRP_CONNTRACK_NEW + NFNLGRP_CONNTRACK_UPDATE, +#define NFNLGRP_CONNTRACK_UPDATE NFNLGRP_CONNTRACK_UPDATE + NFNLGRP_CONNTRACK_DESTROY, +#define NFNLGRP_CONNTRACK_DESTROY NFNLGRP_CONNTRACK_DESTROY + NFNLGRP_CONNTRACK_EXP_NEW, +#define NFNLGRP_CONNTRACK_EXP_NEW NFNLGRP_CONNTRACK_EXP_NEW + NFNLGRP_CONNTRACK_EXP_UPDATE, +#define NFNLGRP_CONNTRACK_EXP_UPDATE NFNLGRP_CONNTRACK_EXP_UPDATE + NFNLGRP_CONNTRACK_EXP_DESTROY, +#define NFNLGRP_CONNTRACK_EXP_DESTROY NFNLGRP_CONNTRACK_EXP_DESTROY + NFNLGRP_NFTABLES, +#define NFNLGRP_NFTABLES NFNLGRP_NFTABLES + NFNLGRP_ACCT_QUOTA, +#define NFNLGRP_ACCT_QUOTA NFNLGRP_ACCT_QUOTA + NFNLGRP_NFTRACE, +#define NFNLGRP_NFTRACE NFNLGRP_NFTRACE + __NFNLGRP_MAX, +}; +#define NFNLGRP_MAX (__NFNLGRP_MAX - 1) + +/* General form of address family dependent message. + */ +struct nfgenmsg { + __u8 nfgen_family; /* AF_xxx */ + __u8 version; /* nfnetlink version */ + __be16 res_id; /* resource id */ +}; + +#define NFNETLINK_V0 0 + +/* netfilter netlink message types are split in two pieces: + * 8 bit subsystem, 8bit operation. + */ + +#define NFNL_SUBSYS_ID(x) ((x & 0xff00) >> 8) +#define NFNL_MSG_TYPE(x) (x & 0x00ff) + +/* No enum here, otherwise __stringify() trick of MODULE_ALIAS_NFNL_SUBSYS() + * won't work anymore */ +#define NFNL_SUBSYS_NONE 0 +#define NFNL_SUBSYS_CTNETLINK 1 +#define NFNL_SUBSYS_CTNETLINK_EXP 2 +#define NFNL_SUBSYS_QUEUE 3 +#define NFNL_SUBSYS_ULOG 4 +#define NFNL_SUBSYS_OSF 5 +#define NFNL_SUBSYS_IPSET 6 +#define NFNL_SUBSYS_ACCT 7 +#define NFNL_SUBSYS_CTNETLINK_TIMEOUT 8 +#define NFNL_SUBSYS_CTHELPER 9 +#define NFNL_SUBSYS_NFTABLES 10 +#define NFNL_SUBSYS_NFT_COMPAT 11 +#define NFNL_SUBSYS_HOOK 12 +#define NFNL_SUBSYS_COUNT 13 + +/* Reserved control nfnetlink messages */ +#define NFNL_MSG_BATCH_BEGIN NLMSG_MIN_TYPE +#define NFNL_MSG_BATCH_END NLMSG_MIN_TYPE+1 + +/** + * enum nfnl_batch_attributes - nfnetlink batch netlink attributes + * + * @NFNL_BATCH_GENID: generation ID for this changeset (NLA_U32) + */ +enum nfnl_batch_attributes { + NFNL_BATCH_UNSPEC, + NFNL_BATCH_GENID, + __NFNL_BATCH_MAX +}; +#define NFNL_BATCH_MAX (__NFNL_BATCH_MAX - 1) + +#endif /* _UAPI_NFNETLINK_H */ diff --git a/src/basic/linux/netlink.h b/src/basic/linux/netlink.h new file mode 100644 index 0000000..e2ae82e --- /dev/null +++ b/src/basic/linux/netlink.h @@ -0,0 +1,378 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +#ifndef _UAPI__LINUX_NETLINK_H +#define _UAPI__LINUX_NETLINK_H + +#include +#include /* for __kernel_sa_family_t */ +#include + +#define NETLINK_ROUTE 0 /* Routing/device hook */ +#define NETLINK_UNUSED 1 /* Unused number */ +#define NETLINK_USERSOCK 2 /* Reserved for user mode socket protocols */ +#define NETLINK_FIREWALL 3 /* Unused number, formerly ip_queue */ +#define NETLINK_SOCK_DIAG 4 /* socket monitoring */ +#define NETLINK_NFLOG 5 /* netfilter/iptables ULOG */ +#define NETLINK_XFRM 6 /* ipsec */ +#define NETLINK_SELINUX 7 /* SELinux event notifications */ +#define NETLINK_ISCSI 8 /* Open-iSCSI */ +#define NETLINK_AUDIT 9 /* auditing */ +#define NETLINK_FIB_LOOKUP 10 +#define NETLINK_CONNECTOR 11 +#define NETLINK_NETFILTER 12 /* netfilter subsystem */ +#define NETLINK_IP6_FW 13 +#define NETLINK_DNRTMSG 14 /* DECnet routing messages (obsolete) */ +#define NETLINK_KOBJECT_UEVENT 15 /* Kernel messages to userspace */ +#define NETLINK_GENERIC 16 +/* leave room for NETLINK_DM (DM Events) */ +#define NETLINK_SCSITRANSPORT 18 /* SCSI Transports */ +#define NETLINK_ECRYPTFS 19 +#define NETLINK_RDMA 20 +#define NETLINK_CRYPTO 21 /* Crypto layer */ +#define NETLINK_SMC 22 /* SMC monitoring */ + +#define NETLINK_INET_DIAG NETLINK_SOCK_DIAG + +#define MAX_LINKS 32 + +struct sockaddr_nl { + __kernel_sa_family_t nl_family; /* AF_NETLINK */ + unsigned short nl_pad; /* zero */ + __u32 nl_pid; /* port ID */ + __u32 nl_groups; /* multicast groups mask */ +}; + +/** + * struct nlmsghdr - fixed format metadata header of Netlink messages + * @nlmsg_len: Length of message including header + * @nlmsg_type: Message content type + * @nlmsg_flags: Additional flags + * @nlmsg_seq: Sequence number + * @nlmsg_pid: Sending process port ID + */ +struct nlmsghdr { + __u32 nlmsg_len; + __u16 nlmsg_type; + __u16 nlmsg_flags; + __u32 nlmsg_seq; + __u32 nlmsg_pid; +}; + +/* Flags values */ + +#define NLM_F_REQUEST 0x01 /* It is request message. */ +#define NLM_F_MULTI 0x02 /* Multipart message, terminated by NLMSG_DONE */ +#define NLM_F_ACK 0x04 /* Reply with ack, with zero or error code */ +#define NLM_F_ECHO 0x08 /* Receive resulting notifications */ +#define NLM_F_DUMP_INTR 0x10 /* Dump was inconsistent due to sequence change */ +#define NLM_F_DUMP_FILTERED 0x20 /* Dump was filtered as requested */ + +/* Modifiers to GET request */ +#define NLM_F_ROOT 0x100 /* specify tree root */ +#define NLM_F_MATCH 0x200 /* return all matching */ +#define NLM_F_ATOMIC 0x400 /* atomic GET */ +#define NLM_F_DUMP (NLM_F_ROOT|NLM_F_MATCH) + +/* Modifiers to NEW request */ +#define NLM_F_REPLACE 0x100 /* Override existing */ +#define NLM_F_EXCL 0x200 /* Do not touch, if it exists */ +#define NLM_F_CREATE 0x400 /* Create, if it does not exist */ +#define NLM_F_APPEND 0x800 /* Add to end of list */ + +/* Modifiers to DELETE request */ +#define NLM_F_NONREC 0x100 /* Do not delete recursively */ +#define NLM_F_BULK 0x200 /* Delete multiple objects */ + +/* Flags for ACK message */ +#define NLM_F_CAPPED 0x100 /* request was capped */ +#define NLM_F_ACK_TLVS 0x200 /* extended ACK TVLs were included */ + +/* + 4.4BSD ADD NLM_F_CREATE|NLM_F_EXCL + 4.4BSD CHANGE NLM_F_REPLACE + + True CHANGE NLM_F_CREATE|NLM_F_REPLACE + Append NLM_F_CREATE + Check NLM_F_EXCL + */ + +#define NLMSG_ALIGNTO 4U +#define NLMSG_ALIGN(len) ( ((len)+NLMSG_ALIGNTO-1) & ~(NLMSG_ALIGNTO-1) ) +#define NLMSG_HDRLEN ((int) NLMSG_ALIGN(sizeof(struct nlmsghdr))) +#define NLMSG_LENGTH(len) ((len) + NLMSG_HDRLEN) +#define NLMSG_SPACE(len) NLMSG_ALIGN(NLMSG_LENGTH(len)) +#define NLMSG_DATA(nlh) ((void *)(((char *)nlh) + NLMSG_HDRLEN)) +#define NLMSG_NEXT(nlh,len) ((len) -= NLMSG_ALIGN((nlh)->nlmsg_len), \ + (struct nlmsghdr *)(((char *)(nlh)) + \ + NLMSG_ALIGN((nlh)->nlmsg_len))) +#define NLMSG_OK(nlh,len) ((len) >= (int)sizeof(struct nlmsghdr) && \ + (nlh)->nlmsg_len >= sizeof(struct nlmsghdr) && \ + (nlh)->nlmsg_len <= (len)) +#define NLMSG_PAYLOAD(nlh,len) ((nlh)->nlmsg_len - NLMSG_SPACE((len))) + +#define NLMSG_NOOP 0x1 /* Nothing. */ +#define NLMSG_ERROR 0x2 /* Error */ +#define NLMSG_DONE 0x3 /* End of a dump */ +#define NLMSG_OVERRUN 0x4 /* Data lost */ + +#define NLMSG_MIN_TYPE 0x10 /* < 0x10: reserved control messages */ + +struct nlmsgerr { + int error; + struct nlmsghdr msg; + /* + * followed by the message contents unless NETLINK_CAP_ACK was set + * or the ACK indicates success (error == 0) + * message length is aligned with NLMSG_ALIGN() + */ + /* + * followed by TLVs defined in enum nlmsgerr_attrs + * if NETLINK_EXT_ACK was set + */ +}; + +/** + * enum nlmsgerr_attrs - nlmsgerr attributes + * @NLMSGERR_ATTR_UNUSED: unused + * @NLMSGERR_ATTR_MSG: error message string (string) + * @NLMSGERR_ATTR_OFFS: offset of the invalid attribute in the original + * message, counting from the beginning of the header (u32) + * @NLMSGERR_ATTR_COOKIE: arbitrary subsystem specific cookie to + * be used - in the success case - to identify a created + * object or operation or similar (binary) + * @NLMSGERR_ATTR_POLICY: policy for a rejected attribute + * @NLMSGERR_ATTR_MISS_TYPE: type of a missing required attribute, + * %NLMSGERR_ATTR_MISS_NEST will not be present if the attribute was + * missing at the message level + * @NLMSGERR_ATTR_MISS_NEST: offset of the nest where attribute was missing + * @__NLMSGERR_ATTR_MAX: number of attributes + * @NLMSGERR_ATTR_MAX: highest attribute number + */ +enum nlmsgerr_attrs { + NLMSGERR_ATTR_UNUSED, + NLMSGERR_ATTR_MSG, + NLMSGERR_ATTR_OFFS, + NLMSGERR_ATTR_COOKIE, + NLMSGERR_ATTR_POLICY, + NLMSGERR_ATTR_MISS_TYPE, + NLMSGERR_ATTR_MISS_NEST, + + __NLMSGERR_ATTR_MAX, + NLMSGERR_ATTR_MAX = __NLMSGERR_ATTR_MAX - 1 +}; + +#define NETLINK_ADD_MEMBERSHIP 1 +#define NETLINK_DROP_MEMBERSHIP 2 +#define NETLINK_PKTINFO 3 +#define NETLINK_BROADCAST_ERROR 4 +#define NETLINK_NO_ENOBUFS 5 +#ifndef __KERNEL__ +#define NETLINK_RX_RING 6 +#define NETLINK_TX_RING 7 +#endif +#define NETLINK_LISTEN_ALL_NSID 8 +#define NETLINK_LIST_MEMBERSHIPS 9 +#define NETLINK_CAP_ACK 10 +#define NETLINK_EXT_ACK 11 +#define NETLINK_GET_STRICT_CHK 12 + +struct nl_pktinfo { + __u32 group; +}; + +struct nl_mmap_req { + unsigned int nm_block_size; + unsigned int nm_block_nr; + unsigned int nm_frame_size; + unsigned int nm_frame_nr; +}; + +struct nl_mmap_hdr { + unsigned int nm_status; + unsigned int nm_len; + __u32 nm_group; + /* credentials */ + __u32 nm_pid; + __u32 nm_uid; + __u32 nm_gid; +}; + +#ifndef __KERNEL__ +enum nl_mmap_status { + NL_MMAP_STATUS_UNUSED, + NL_MMAP_STATUS_RESERVED, + NL_MMAP_STATUS_VALID, + NL_MMAP_STATUS_COPY, + NL_MMAP_STATUS_SKIP, +}; + +#define NL_MMAP_MSG_ALIGNMENT NLMSG_ALIGNTO +#define NL_MMAP_MSG_ALIGN(sz) __ALIGN_KERNEL(sz, NL_MMAP_MSG_ALIGNMENT) +#define NL_MMAP_HDRLEN NL_MMAP_MSG_ALIGN(sizeof(struct nl_mmap_hdr)) +#endif + +#define NET_MAJOR 36 /* Major 36 is reserved for networking */ + +enum { + NETLINK_UNCONNECTED = 0, + NETLINK_CONNECTED, +}; + +/* + * <------- NLA_HDRLEN ------> <-- NLA_ALIGN(payload)--> + * +---------------------+- - -+- - - - - - - - - -+- - -+ + * | Header | Pad | Payload | Pad | + * | (struct nlattr) | ing | | ing | + * +---------------------+- - -+- - - - - - - - - -+- - -+ + * <-------------- nlattr->nla_len --------------> + */ + +struct nlattr { + __u16 nla_len; + __u16 nla_type; +}; + +/* + * nla_type (16 bits) + * +---+---+-------------------------------+ + * | N | O | Attribute Type | + * +---+---+-------------------------------+ + * N := Carries nested attributes + * O := Payload stored in network byte order + * + * Note: The N and O flag are mutually exclusive. + */ +#define NLA_F_NESTED (1 << 15) +#define NLA_F_NET_BYTEORDER (1 << 14) +#define NLA_TYPE_MASK ~(NLA_F_NESTED | NLA_F_NET_BYTEORDER) + +#define NLA_ALIGNTO 4 +#define NLA_ALIGN(len) (((len) + NLA_ALIGNTO - 1) & ~(NLA_ALIGNTO - 1)) +#define NLA_HDRLEN ((int) NLA_ALIGN(sizeof(struct nlattr))) + +/* Generic 32 bitflags attribute content sent to the kernel. + * + * The value is a bitmap that defines the values being set + * The selector is a bitmask that defines which value is legit + * + * Examples: + * value = 0x0, and selector = 0x1 + * implies we are selecting bit 1 and we want to set its value to 0. + * + * value = 0x2, and selector = 0x2 + * implies we are selecting bit 2 and we want to set its value to 1. + * + */ +struct nla_bitfield32 { + __u32 value; + __u32 selector; +}; + +/* + * policy descriptions - it's specific to each family how this is used + * Normally, it should be retrieved via a dump inside another attribute + * specifying where it applies. + */ + +/** + * enum netlink_attribute_type - type of an attribute + * @NL_ATTR_TYPE_INVALID: unused + * @NL_ATTR_TYPE_FLAG: flag attribute (present/not present) + * @NL_ATTR_TYPE_U8: 8-bit unsigned attribute + * @NL_ATTR_TYPE_U16: 16-bit unsigned attribute + * @NL_ATTR_TYPE_U32: 32-bit unsigned attribute + * @NL_ATTR_TYPE_U64: 64-bit unsigned attribute + * @NL_ATTR_TYPE_S8: 8-bit signed attribute + * @NL_ATTR_TYPE_S16: 16-bit signed attribute + * @NL_ATTR_TYPE_S32: 32-bit signed attribute + * @NL_ATTR_TYPE_S64: 64-bit signed attribute + * @NL_ATTR_TYPE_BINARY: binary data, min/max length may be specified + * @NL_ATTR_TYPE_STRING: string, min/max length may be specified + * @NL_ATTR_TYPE_NUL_STRING: NUL-terminated string, + * min/max length may be specified + * @NL_ATTR_TYPE_NESTED: nested, i.e. the content of this attribute + * consists of sub-attributes. The nested policy and maxtype + * inside may be specified. + * @NL_ATTR_TYPE_NESTED_ARRAY: nested array, i.e. the content of this + * attribute contains sub-attributes whose type is irrelevant + * (just used to separate the array entries) and each such array + * entry has attributes again, the policy for those inner ones + * and the corresponding maxtype may be specified. + * @NL_ATTR_TYPE_BITFIELD32: &struct nla_bitfield32 attribute + */ +enum netlink_attribute_type { + NL_ATTR_TYPE_INVALID, + + NL_ATTR_TYPE_FLAG, + + NL_ATTR_TYPE_U8, + NL_ATTR_TYPE_U16, + NL_ATTR_TYPE_U32, + NL_ATTR_TYPE_U64, + + NL_ATTR_TYPE_S8, + NL_ATTR_TYPE_S16, + NL_ATTR_TYPE_S32, + NL_ATTR_TYPE_S64, + + NL_ATTR_TYPE_BINARY, + NL_ATTR_TYPE_STRING, + NL_ATTR_TYPE_NUL_STRING, + + NL_ATTR_TYPE_NESTED, + NL_ATTR_TYPE_NESTED_ARRAY, + + NL_ATTR_TYPE_BITFIELD32, +}; + +/** + * enum netlink_policy_type_attr - policy type attributes + * @NL_POLICY_TYPE_ATTR_UNSPEC: unused + * @NL_POLICY_TYPE_ATTR_TYPE: type of the attribute, + * &enum netlink_attribute_type (U32) + * @NL_POLICY_TYPE_ATTR_MIN_VALUE_S: minimum value for signed + * integers (S64) + * @NL_POLICY_TYPE_ATTR_MAX_VALUE_S: maximum value for signed + * integers (S64) + * @NL_POLICY_TYPE_ATTR_MIN_VALUE_U: minimum value for unsigned + * integers (U64) + * @NL_POLICY_TYPE_ATTR_MAX_VALUE_U: maximum value for unsigned + * integers (U64) + * @NL_POLICY_TYPE_ATTR_MIN_LENGTH: minimum length for binary + * attributes, no minimum if not given (U32) + * @NL_POLICY_TYPE_ATTR_MAX_LENGTH: maximum length for binary + * attributes, no maximum if not given (U32) + * @NL_POLICY_TYPE_ATTR_POLICY_IDX: sub policy for nested and + * nested array types (U32) + * @NL_POLICY_TYPE_ATTR_POLICY_MAXTYPE: maximum sub policy + * attribute for nested and nested array types, this can + * in theory be < the size of the policy pointed to by + * the index, if limited inside the nesting (U32) + * @NL_POLICY_TYPE_ATTR_BITFIELD32_MASK: valid mask for the + * bitfield32 type (U32) + * @NL_POLICY_TYPE_ATTR_MASK: mask of valid bits for unsigned integers (U64) + * @NL_POLICY_TYPE_ATTR_PAD: pad attribute for 64-bit alignment + * + * @__NL_POLICY_TYPE_ATTR_MAX: number of attributes + * @NL_POLICY_TYPE_ATTR_MAX: highest attribute number + */ +enum netlink_policy_type_attr { + NL_POLICY_TYPE_ATTR_UNSPEC, + NL_POLICY_TYPE_ATTR_TYPE, + NL_POLICY_TYPE_ATTR_MIN_VALUE_S, + NL_POLICY_TYPE_ATTR_MAX_VALUE_S, + NL_POLICY_TYPE_ATTR_MIN_VALUE_U, + NL_POLICY_TYPE_ATTR_MAX_VALUE_U, + NL_POLICY_TYPE_ATTR_MIN_LENGTH, + NL_POLICY_TYPE_ATTR_MAX_LENGTH, + NL_POLICY_TYPE_ATTR_POLICY_IDX, + NL_POLICY_TYPE_ATTR_POLICY_MAXTYPE, + NL_POLICY_TYPE_ATTR_BITFIELD32_MASK, + NL_POLICY_TYPE_ATTR_PAD, + NL_POLICY_TYPE_ATTR_MASK, + + /* keep last */ + __NL_POLICY_TYPE_ATTR_MAX, + NL_POLICY_TYPE_ATTR_MAX = __NL_POLICY_TYPE_ATTR_MAX - 1 +}; + +#endif /* _UAPI__LINUX_NETLINK_H */ diff --git a/src/basic/linux/nexthop.h b/src/basic/linux/nexthop.h new file mode 100644 index 0000000..d8ffa8c --- /dev/null +++ b/src/basic/linux/nexthop.h @@ -0,0 +1,104 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +#ifndef _UAPI_LINUX_NEXTHOP_H +#define _UAPI_LINUX_NEXTHOP_H + +#include + +struct nhmsg { + unsigned char nh_family; + unsigned char nh_scope; /* return only */ + unsigned char nh_protocol; /* Routing protocol that installed nh */ + unsigned char resvd; + unsigned int nh_flags; /* RTNH_F flags */ +}; + +/* entry in a nexthop group */ +struct nexthop_grp { + __u32 id; /* nexthop id - must exist */ + __u8 weight; /* weight of this nexthop */ + __u8 resvd1; + __u16 resvd2; +}; + +enum { + NEXTHOP_GRP_TYPE_MPATH, /* hash-threshold nexthop group + * default type if not specified + */ + NEXTHOP_GRP_TYPE_RES, /* resilient nexthop group */ + __NEXTHOP_GRP_TYPE_MAX, +}; + +#define NEXTHOP_GRP_TYPE_MAX (__NEXTHOP_GRP_TYPE_MAX - 1) + +enum { + NHA_UNSPEC, + NHA_ID, /* u32; id for nexthop. id == 0 means auto-assign */ + + NHA_GROUP, /* array of nexthop_grp */ + NHA_GROUP_TYPE, /* u16 one of NEXTHOP_GRP_TYPE */ + /* if NHA_GROUP attribute is added, no other attributes can be set */ + + NHA_BLACKHOLE, /* flag; nexthop used to blackhole packets */ + /* if NHA_BLACKHOLE is added, OIF, GATEWAY, ENCAP can not be set */ + + NHA_OIF, /* u32; nexthop device */ + NHA_GATEWAY, /* be32 (IPv4) or in6_addr (IPv6) gw address */ + NHA_ENCAP_TYPE, /* u16; lwt encap type */ + NHA_ENCAP, /* lwt encap data */ + + /* NHA_OIF can be appended to dump request to return only + * nexthops using given device + */ + NHA_GROUPS, /* flag; only return nexthop groups in dump */ + NHA_MASTER, /* u32; only return nexthops with given master dev */ + + NHA_FDB, /* flag; nexthop belongs to a bridge fdb */ + /* if NHA_FDB is added, OIF, BLACKHOLE, ENCAP cannot be set */ + + /* nested; resilient nexthop group attributes */ + NHA_RES_GROUP, + /* nested; nexthop bucket attributes */ + NHA_RES_BUCKET, + + __NHA_MAX, +}; + +#define NHA_MAX (__NHA_MAX - 1) + +enum { + NHA_RES_GROUP_UNSPEC, + /* Pad attribute for 64-bit alignment. */ + NHA_RES_GROUP_PAD = NHA_RES_GROUP_UNSPEC, + + /* u16; number of nexthop buckets in a resilient nexthop group */ + NHA_RES_GROUP_BUCKETS, + /* clock_t as u32; nexthop bucket idle timer (per-group) */ + NHA_RES_GROUP_IDLE_TIMER, + /* clock_t as u32; nexthop unbalanced timer */ + NHA_RES_GROUP_UNBALANCED_TIMER, + /* clock_t as u64; nexthop unbalanced time */ + NHA_RES_GROUP_UNBALANCED_TIME, + + __NHA_RES_GROUP_MAX, +}; + +#define NHA_RES_GROUP_MAX (__NHA_RES_GROUP_MAX - 1) + +enum { + NHA_RES_BUCKET_UNSPEC, + /* Pad attribute for 64-bit alignment. */ + NHA_RES_BUCKET_PAD = NHA_RES_BUCKET_UNSPEC, + + /* u16; nexthop bucket index */ + NHA_RES_BUCKET_INDEX, + /* clock_t as u64; nexthop bucket idle time */ + NHA_RES_BUCKET_IDLE_TIME, + /* u32; nexthop id assigned to the nexthop bucket */ + NHA_RES_BUCKET_NH_ID, + + __NHA_RES_BUCKET_MAX, +}; + +#define NHA_RES_BUCKET_MAX (__NHA_RES_BUCKET_MAX - 1) + +#endif diff --git a/src/basic/linux/nl80211.h b/src/basic/linux/nl80211.h new file mode 100644 index 0000000..c14a91b --- /dev/null +++ b/src/basic/linux/nl80211.h @@ -0,0 +1,7726 @@ +#ifndef __LINUX_NL80211_H +#define __LINUX_NL80211_H +/* + * 802.11 netlink interface public header + * + * Copyright 2006-2010 Johannes Berg + * Copyright 2008 Michael Wu + * Copyright 2008 Luis Carlos Cobo + * Copyright 2008 Michael Buesch + * Copyright 2008, 2009 Luis R. Rodriguez + * Copyright 2008 Jouni Malinen + * Copyright 2008 Colin McCabe + * Copyright 2015-2017 Intel Deutschland GmbH + * Copyright (C) 2018-2022 Intel Corporation + * + * Permission to use, copy, modify, and/or distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + * + */ + +/* + * This header file defines the userspace API to the wireless stack. Please + * be careful not to break things - i.e. don't move anything around or so + * unless you can demonstrate that it breaks neither API nor ABI. + * + * Additions to the API should be accompanied by actual implementations in + * an upstream driver, so that example implementations exist in case there + * are ever concerns about the precise semantics of the API or changes are + * needed, and to ensure that code for dead (no longer implemented) API + * can actually be identified and removed. + * Nonetheless, semantics should also be documented carefully in this file. + */ + +#include + +#define NL80211_GENL_NAME "nl80211" + +#define NL80211_MULTICAST_GROUP_CONFIG "config" +#define NL80211_MULTICAST_GROUP_SCAN "scan" +#define NL80211_MULTICAST_GROUP_REG "regulatory" +#define NL80211_MULTICAST_GROUP_MLME "mlme" +#define NL80211_MULTICAST_GROUP_VENDOR "vendor" +#define NL80211_MULTICAST_GROUP_NAN "nan" +#define NL80211_MULTICAST_GROUP_TESTMODE "testmode" + +#define NL80211_EDMG_BW_CONFIG_MIN 4 +#define NL80211_EDMG_BW_CONFIG_MAX 15 +#define NL80211_EDMG_CHANNELS_MIN 1 +#define NL80211_EDMG_CHANNELS_MAX 0x3c /* 0b00111100 */ + +/** + * DOC: Station handling + * + * Stations are added per interface, but a special case exists with VLAN + * interfaces. When a station is bound to an AP interface, it may be moved + * into a VLAN identified by a VLAN interface index (%NL80211_ATTR_STA_VLAN). + * The station is still assumed to belong to the AP interface it was added + * to. + * + * Station handling varies per interface type and depending on the driver's + * capabilities. + * + * For drivers supporting TDLS with external setup (WIPHY_FLAG_SUPPORTS_TDLS + * and WIPHY_FLAG_TDLS_EXTERNAL_SETUP), the station lifetime is as follows: + * - a setup station entry is added, not yet authorized, without any rate + * or capability information, this just exists to avoid race conditions + * - when the TDLS setup is done, a single NL80211_CMD_SET_STATION is valid + * to add rate and capability information to the station and at the same + * time mark it authorized. + * - %NL80211_TDLS_ENABLE_LINK is then used + * - after this, the only valid operation is to remove it by tearing down + * the TDLS link (%NL80211_TDLS_DISABLE_LINK) + * + * TODO: need more info for other interface types + */ + +/** + * DOC: Frame transmission/registration support + * + * Frame transmission and registration support exists to allow userspace + * management entities such as wpa_supplicant react to management frames + * that are not being handled by the kernel. This includes, for example, + * certain classes of action frames that cannot be handled in the kernel + * for various reasons. + * + * Frame registration is done on a per-interface basis and registrations + * cannot be removed other than by closing the socket. It is possible to + * specify a registration filter to register, for example, only for a + * certain type of action frame. In particular with action frames, those + * that userspace registers for will not be returned as unhandled by the + * driver, so that the registered application has to take responsibility + * for doing that. + * + * The type of frame that can be registered for is also dependent on the + * driver and interface type. The frame types are advertised in wiphy + * attributes so applications know what to expect. + * + * NOTE: When an interface changes type while registrations are active, + * these registrations are ignored until the interface type is + * changed again. This means that changing the interface type can + * lead to a situation that couldn't otherwise be produced, but + * any such registrations will be dormant in the sense that they + * will not be serviced, i.e. they will not receive any frames. + * + * Frame transmission allows userspace to send for example the required + * responses to action frames. It is subject to some sanity checking, + * but many frames can be transmitted. When a frame was transmitted, its + * status is indicated to the sending socket. + * + * For more technical details, see the corresponding command descriptions + * below. + */ + +/** + * DOC: Virtual interface / concurrency capabilities + * + * Some devices are able to operate with virtual MACs, they can have + * more than one virtual interface. The capability handling for this + * is a bit complex though, as there may be a number of restrictions + * on the types of concurrency that are supported. + * + * To start with, each device supports the interface types listed in + * the %NL80211_ATTR_SUPPORTED_IFTYPES attribute, but by listing the + * types there no concurrency is implied. + * + * Once concurrency is desired, more attributes must be observed: + * To start with, since some interface types are purely managed in + * software, like the AP-VLAN type in mac80211 for example, there's + * an additional list of these, they can be added at any time and + * are only restricted by some semantic restrictions (e.g. AP-VLAN + * cannot be added without a corresponding AP interface). This list + * is exported in the %NL80211_ATTR_SOFTWARE_IFTYPES attribute. + * + * Further, the list of supported combinations is exported. This is + * in the %NL80211_ATTR_INTERFACE_COMBINATIONS attribute. Basically, + * it exports a list of "groups", and at any point in time the + * interfaces that are currently active must fall into any one of + * the advertised groups. Within each group, there are restrictions + * on the number of interfaces of different types that are supported + * and also the number of different channels, along with potentially + * some other restrictions. See &enum nl80211_if_combination_attrs. + * + * All together, these attributes define the concurrency of virtual + * interfaces that a given device supports. + */ + +/** + * DOC: packet coalesce support + * + * In most cases, host that receives IPv4 and IPv6 multicast/broadcast + * packets does not do anything with these packets. Therefore the + * reception of these unwanted packets causes unnecessary processing + * and power consumption. + * + * Packet coalesce feature helps to reduce number of received interrupts + * to host by buffering these packets in firmware/hardware for some + * predefined time. Received interrupt will be generated when one of the + * following events occur. + * a) Expiration of hardware timer whose expiration time is set to maximum + * coalescing delay of matching coalesce rule. + * b) Coalescing buffer in hardware reaches it's limit. + * c) Packet doesn't match any of the configured coalesce rules. + * + * User needs to configure following parameters for creating a coalesce + * rule. + * a) Maximum coalescing delay + * b) List of packet patterns which needs to be matched + * c) Condition for coalescence. pattern 'match' or 'no match' + * Multiple such rules can be created. + */ + +/** + * DOC: WPA/WPA2 EAPOL handshake offload + * + * By setting @NL80211_EXT_FEATURE_4WAY_HANDSHAKE_STA_PSK flag drivers + * can indicate they support offloading EAPOL handshakes for WPA/WPA2 + * preshared key authentication in station mode. In %NL80211_CMD_CONNECT + * the preshared key should be specified using %NL80211_ATTR_PMK. Drivers + * supporting this offload may reject the %NL80211_CMD_CONNECT when no + * preshared key material is provided, for example when that driver does + * not support setting the temporal keys through %NL80211_CMD_NEW_KEY. + * + * Similarly @NL80211_EXT_FEATURE_4WAY_HANDSHAKE_STA_1X flag can be + * set by drivers indicating offload support of the PTK/GTK EAPOL + * handshakes during 802.1X authentication in station mode. In order to + * use the offload the %NL80211_CMD_CONNECT should have + * %NL80211_ATTR_WANT_1X_4WAY_HS attribute flag. Drivers supporting this + * offload may reject the %NL80211_CMD_CONNECT when the attribute flag is + * not present. + * + * By setting @NL80211_EXT_FEATURE_4WAY_HANDSHAKE_AP_PSK flag drivers + * can indicate they support offloading EAPOL handshakes for WPA/WPA2 + * preshared key authentication in AP mode. In %NL80211_CMD_START_AP + * the preshared key should be specified using %NL80211_ATTR_PMK. Drivers + * supporting this offload may reject the %NL80211_CMD_START_AP when no + * preshared key material is provided, for example when that driver does + * not support setting the temporal keys through %NL80211_CMD_NEW_KEY. + * + * For 802.1X the PMK or PMK-R0 are set by providing %NL80211_ATTR_PMK + * using %NL80211_CMD_SET_PMK. For offloaded FT support also + * %NL80211_ATTR_PMKR0_NAME must be provided. + */ + +/** + * DOC: FILS shared key authentication offload + * + * FILS shared key authentication offload can be advertized by drivers by + * setting @NL80211_EXT_FEATURE_FILS_SK_OFFLOAD flag. The drivers that support + * FILS shared key authentication offload should be able to construct the + * authentication and association frames for FILS shared key authentication and + * eventually do a key derivation as per IEEE 802.11ai. The below additional + * parameters should be given to driver in %NL80211_CMD_CONNECT and/or in + * %NL80211_CMD_UPDATE_CONNECT_PARAMS. + * %NL80211_ATTR_FILS_ERP_USERNAME - used to construct keyname_nai + * %NL80211_ATTR_FILS_ERP_REALM - used to construct keyname_nai + * %NL80211_ATTR_FILS_ERP_NEXT_SEQ_NUM - used to construct erp message + * %NL80211_ATTR_FILS_ERP_RRK - used to generate the rIK and rMSK + * rIK should be used to generate an authentication tag on the ERP message and + * rMSK should be used to derive a PMKSA. + * rIK, rMSK should be generated and keyname_nai, sequence number should be used + * as specified in IETF RFC 6696. + * + * When FILS shared key authentication is completed, driver needs to provide the + * below additional parameters to userspace, which can be either after setting + * up a connection or after roaming. + * %NL80211_ATTR_FILS_KEK - used for key renewal + * %NL80211_ATTR_FILS_ERP_NEXT_SEQ_NUM - used in further EAP-RP exchanges + * %NL80211_ATTR_PMKID - used to identify the PMKSA used/generated + * %Nl80211_ATTR_PMK - used to update PMKSA cache in userspace + * The PMKSA can be maintained in userspace persistently so that it can be used + * later after reboots or wifi turn off/on also. + * + * %NL80211_ATTR_FILS_CACHE_ID is the cache identifier advertized by a FILS + * capable AP supporting PMK caching. It specifies the scope within which the + * PMKSAs are cached in an ESS. %NL80211_CMD_SET_PMKSA and + * %NL80211_CMD_DEL_PMKSA are enhanced to allow support for PMKSA caching based + * on FILS cache identifier. Additionally %NL80211_ATTR_PMK is used with + * %NL80211_SET_PMKSA to specify the PMK corresponding to a PMKSA for driver to + * use in a FILS shared key connection with PMKSA caching. + */ + +/** + * DOC: SAE authentication offload + * + * By setting @NL80211_EXT_FEATURE_SAE_OFFLOAD flag drivers can indicate they + * support offloading SAE authentication for WPA3-Personal networks in station + * mode. Similarly @NL80211_EXT_FEATURE_SAE_OFFLOAD_AP flag can be set by + * drivers indicating the offload support in AP mode. + * + * The password for SAE should be specified using %NL80211_ATTR_SAE_PASSWORD in + * %NL80211_CMD_CONNECT and %NL80211_CMD_START_AP for station and AP mode + * respectively. + */ + +/** + * DOC: VLAN offload support for setting group keys and binding STAs to VLANs + * + * By setting @NL80211_EXT_FEATURE_VLAN_OFFLOAD flag drivers can indicate they + * support offloading VLAN functionality in a manner where the driver exposes a + * single netdev that uses VLAN tagged frames and separate VLAN-specific netdevs + * can then be added using RTM_NEWLINK/IFLA_VLAN_ID similarly to the Ethernet + * case. Frames received from stations that are not assigned to any VLAN are + * delivered on the main netdev and frames to such stations can be sent through + * that main netdev. + * + * %NL80211_CMD_NEW_KEY (for group keys), %NL80211_CMD_NEW_STATION, and + * %NL80211_CMD_SET_STATION will optionally specify vlan_id using + * %NL80211_ATTR_VLAN_ID. + */ + +/** + * DOC: TID configuration + * + * TID config support can be checked in the %NL80211_ATTR_TID_CONFIG + * attribute given in wiphy capabilities. + * + * The necessary configuration parameters are mentioned in + * &enum nl80211_tid_config_attr and it will be passed to the + * %NL80211_CMD_SET_TID_CONFIG command in %NL80211_ATTR_TID_CONFIG. + * + * If the configuration needs to be applied for specific peer then the MAC + * address of the peer needs to be passed in %NL80211_ATTR_MAC, otherwise the + * configuration will be applied for all the connected peers in the vif except + * any peers that have peer specific configuration for the TID by default; if + * the %NL80211_TID_CONFIG_ATTR_OVERRIDE flag is set, peer specific values + * will be overwritten. + * + * All this configuration is valid only for STA's current connection + * i.e. the configuration will be reset to default when the STA connects back + * after disconnection/roaming, and this configuration will be cleared when + * the interface goes down. + */ + +/** + * DOC: FILS shared key crypto offload + * + * This feature is applicable to drivers running in AP mode. + * + * FILS shared key crypto offload can be advertised by drivers by setting + * @NL80211_EXT_FEATURE_FILS_CRYPTO_OFFLOAD flag. The drivers that support + * FILS shared key crypto offload should be able to encrypt and decrypt + * association frames for FILS shared key authentication as per IEEE 802.11ai. + * With this capability, for FILS key derivation, drivers depend on userspace. + * + * After FILS key derivation, userspace shares the FILS AAD details with the + * driver and the driver stores the same to use in decryption of association + * request and in encryption of association response. The below parameters + * should be given to the driver in %NL80211_CMD_SET_FILS_AAD. + * %NL80211_ATTR_MAC - STA MAC address, used for storing FILS AAD per STA + * %NL80211_ATTR_FILS_KEK - Used for encryption or decryption + * %NL80211_ATTR_FILS_NONCES - Used for encryption or decryption + * (STA Nonce 16 bytes followed by AP Nonce 16 bytes) + * + * Once the association is done, the driver cleans the FILS AAD data. + */ + +/** + * DOC: Multi-Link Operation + * + * In Multi-Link Operation, a connection between to MLDs utilizes multiple + * links. To use this in nl80211, various commands and responses now need + * to or will include the new %NL80211_ATTR_MLO_LINKS attribute. + * Additionally, various commands that need to operate on a specific link + * now need to be given the %NL80211_ATTR_MLO_LINK_ID attribute, e.g. to + * use %NL80211_CMD_START_AP or similar functions. + */ + +/** + * enum nl80211_commands - supported nl80211 commands + * + * @NL80211_CMD_UNSPEC: unspecified command to catch errors + * + * @NL80211_CMD_GET_WIPHY: request information about a wiphy or dump request + * to get a list of all present wiphys. + * @NL80211_CMD_SET_WIPHY: set wiphy parameters, needs %NL80211_ATTR_WIPHY or + * %NL80211_ATTR_IFINDEX; can be used to set %NL80211_ATTR_WIPHY_NAME, + * %NL80211_ATTR_WIPHY_TXQ_PARAMS, %NL80211_ATTR_WIPHY_FREQ, + * %NL80211_ATTR_WIPHY_FREQ_OFFSET (and the attributes determining the + * channel width; this is used for setting monitor mode channel), + * %NL80211_ATTR_WIPHY_RETRY_SHORT, %NL80211_ATTR_WIPHY_RETRY_LONG, + * %NL80211_ATTR_WIPHY_FRAG_THRESHOLD, and/or + * %NL80211_ATTR_WIPHY_RTS_THRESHOLD. However, for setting the channel, + * see %NL80211_CMD_SET_CHANNEL instead, the support here is for backward + * compatibility only. + * @NL80211_CMD_NEW_WIPHY: Newly created wiphy, response to get request + * or rename notification. Has attributes %NL80211_ATTR_WIPHY and + * %NL80211_ATTR_WIPHY_NAME. + * @NL80211_CMD_DEL_WIPHY: Wiphy deleted. Has attributes + * %NL80211_ATTR_WIPHY and %NL80211_ATTR_WIPHY_NAME. + * + * @NL80211_CMD_GET_INTERFACE: Request an interface's configuration; + * either a dump request for all interfaces or a specific get with a + * single %NL80211_ATTR_IFINDEX is supported. + * @NL80211_CMD_SET_INTERFACE: Set type of a virtual interface, requires + * %NL80211_ATTR_IFINDEX and %NL80211_ATTR_IFTYPE. + * @NL80211_CMD_NEW_INTERFACE: Newly created virtual interface or response + * to %NL80211_CMD_GET_INTERFACE. Has %NL80211_ATTR_IFINDEX, + * %NL80211_ATTR_WIPHY and %NL80211_ATTR_IFTYPE attributes. Can also + * be sent from userspace to request creation of a new virtual interface, + * then requires attributes %NL80211_ATTR_WIPHY, %NL80211_ATTR_IFTYPE and + * %NL80211_ATTR_IFNAME. + * @NL80211_CMD_DEL_INTERFACE: Virtual interface was deleted, has attributes + * %NL80211_ATTR_IFINDEX and %NL80211_ATTR_WIPHY. Can also be sent from + * userspace to request deletion of a virtual interface, then requires + * attribute %NL80211_ATTR_IFINDEX. If multiple BSSID advertisements are + * enabled using %NL80211_ATTR_MBSSID_CONFIG, %NL80211_ATTR_MBSSID_ELEMS, + * and if this command is used for the transmitting interface, then all + * the non-transmitting interfaces are deleted as well. + * + * @NL80211_CMD_GET_KEY: Get sequence counter information for a key specified + * by %NL80211_ATTR_KEY_IDX and/or %NL80211_ATTR_MAC. %NL80211_ATTR_MAC + * represents peer's MLD address for MLO pairwise key. For MLO group key, + * the link is identified by %NL80211_ATTR_MLO_LINK_ID. + * @NL80211_CMD_SET_KEY: Set key attributes %NL80211_ATTR_KEY_DEFAULT, + * %NL80211_ATTR_KEY_DEFAULT_MGMT, or %NL80211_ATTR_KEY_THRESHOLD. + * For MLO connection, the link to set default key is identified by + * %NL80211_ATTR_MLO_LINK_ID. + * @NL80211_CMD_NEW_KEY: add a key with given %NL80211_ATTR_KEY_DATA, + * %NL80211_ATTR_KEY_IDX, %NL80211_ATTR_MAC, %NL80211_ATTR_KEY_CIPHER, + * and %NL80211_ATTR_KEY_SEQ attributes. %NL80211_ATTR_MAC represents + * peer's MLD address for MLO pairwise key. The link to add MLO + * group key is identified by %NL80211_ATTR_MLO_LINK_ID. + * @NL80211_CMD_DEL_KEY: delete a key identified by %NL80211_ATTR_KEY_IDX + * or %NL80211_ATTR_MAC. %NL80211_ATTR_MAC represents peer's MLD address + * for MLO pairwise key. The link to delete group key is identified by + * %NL80211_ATTR_MLO_LINK_ID. + * + * @NL80211_CMD_GET_BEACON: (not used) + * @NL80211_CMD_SET_BEACON: change the beacon on an access point interface + * using the %NL80211_ATTR_BEACON_HEAD and %NL80211_ATTR_BEACON_TAIL + * attributes. For drivers that generate the beacon and probe responses + * internally, the following attributes must be provided: %NL80211_ATTR_IE, + * %NL80211_ATTR_IE_PROBE_RESP and %NL80211_ATTR_IE_ASSOC_RESP. + * @NL80211_CMD_START_AP: Start AP operation on an AP interface, parameters + * are like for %NL80211_CMD_SET_BEACON, and additionally parameters that + * do not change are used, these include %NL80211_ATTR_BEACON_INTERVAL, + * %NL80211_ATTR_DTIM_PERIOD, %NL80211_ATTR_SSID, + * %NL80211_ATTR_HIDDEN_SSID, %NL80211_ATTR_CIPHERS_PAIRWISE, + * %NL80211_ATTR_CIPHER_GROUP, %NL80211_ATTR_WPA_VERSIONS, + * %NL80211_ATTR_AKM_SUITES, %NL80211_ATTR_PRIVACY, + * %NL80211_ATTR_AUTH_TYPE, %NL80211_ATTR_INACTIVITY_TIMEOUT, + * %NL80211_ATTR_ACL_POLICY and %NL80211_ATTR_MAC_ADDRS. + * The channel to use can be set on the interface or be given using the + * %NL80211_ATTR_WIPHY_FREQ and %NL80211_ATTR_WIPHY_FREQ_OFFSET, and the + * attributes determining channel width. + * @NL80211_CMD_NEW_BEACON: old alias for %NL80211_CMD_START_AP + * @NL80211_CMD_STOP_AP: Stop AP operation on the given interface + * @NL80211_CMD_DEL_BEACON: old alias for %NL80211_CMD_STOP_AP + * + * @NL80211_CMD_GET_STATION: Get station attributes for station identified by + * %NL80211_ATTR_MAC on the interface identified by %NL80211_ATTR_IFINDEX. + * @NL80211_CMD_SET_STATION: Set station attributes for station identified by + * %NL80211_ATTR_MAC on the interface identified by %NL80211_ATTR_IFINDEX. + * @NL80211_CMD_NEW_STATION: Add a station with given attributes to the + * interface identified by %NL80211_ATTR_IFINDEX. + * @NL80211_CMD_DEL_STATION: Remove a station identified by %NL80211_ATTR_MAC + * or, if no MAC address given, all stations, on the interface identified + * by %NL80211_ATTR_IFINDEX. %NL80211_ATTR_MGMT_SUBTYPE and + * %NL80211_ATTR_REASON_CODE can optionally be used to specify which type + * of disconnection indication should be sent to the station + * (Deauthentication or Disassociation frame and reason code for that + * frame). + * + * @NL80211_CMD_GET_MPATH: Get mesh path attributes for mesh path to + * destination %NL80211_ATTR_MAC on the interface identified by + * %NL80211_ATTR_IFINDEX. + * @NL80211_CMD_SET_MPATH: Set mesh path attributes for mesh path to + * destination %NL80211_ATTR_MAC on the interface identified by + * %NL80211_ATTR_IFINDEX. + * @NL80211_CMD_NEW_MPATH: Create a new mesh path for the destination given by + * %NL80211_ATTR_MAC via %NL80211_ATTR_MPATH_NEXT_HOP. + * @NL80211_CMD_DEL_MPATH: Delete a mesh path to the destination given by + * %NL80211_ATTR_MAC. + * @NL80211_CMD_NEW_PATH: Add a mesh path with given attributes to the + * interface identified by %NL80211_ATTR_IFINDEX. + * @NL80211_CMD_DEL_PATH: Remove a mesh path identified by %NL80211_ATTR_MAC + * or, if no MAC address given, all mesh paths, on the interface identified + * by %NL80211_ATTR_IFINDEX. + * @NL80211_CMD_SET_BSS: Set BSS attributes for BSS identified by + * %NL80211_ATTR_IFINDEX. + * + * @NL80211_CMD_GET_REG: ask the wireless core to send us its currently set + * regulatory domain. If %NL80211_ATTR_WIPHY is specified and the device + * has a private regulatory domain, it will be returned. Otherwise, the + * global regdomain will be returned. + * A device will have a private regulatory domain if it uses the + * regulatory_hint() API. Even when a private regdomain is used the channel + * information will still be mended according to further hints from + * the regulatory core to help with compliance. A dump version of this API + * is now available which will returns the global regdomain as well as + * all private regdomains of present wiphys (for those that have it). + * If a wiphy is self-managed (%NL80211_ATTR_WIPHY_SELF_MANAGED_REG), then + * its private regdomain is the only valid one for it. The regulatory + * core is not used to help with compliance in this case. + * @NL80211_CMD_SET_REG: Set current regulatory domain. CRDA sends this command + * after being queried by the kernel. CRDA replies by sending a regulatory + * domain structure which consists of %NL80211_ATTR_REG_ALPHA set to our + * current alpha2 if it found a match. It also provides + * NL80211_ATTR_REG_RULE_FLAGS, and a set of regulatory rules. Each + * regulatory rule is a nested set of attributes given by + * %NL80211_ATTR_REG_RULE_FREQ_[START|END] and + * %NL80211_ATTR_FREQ_RANGE_MAX_BW with an attached power rule given by + * %NL80211_ATTR_REG_RULE_POWER_MAX_ANT_GAIN and + * %NL80211_ATTR_REG_RULE_POWER_MAX_EIRP. + * @NL80211_CMD_REQ_SET_REG: ask the wireless core to set the regulatory domain + * to the specified ISO/IEC 3166-1 alpha2 country code. The core will + * store this as a valid request and then query userspace for it. + * + * @NL80211_CMD_GET_MESH_CONFIG: Get mesh networking properties for the + * interface identified by %NL80211_ATTR_IFINDEX + * + * @NL80211_CMD_SET_MESH_CONFIG: Set mesh networking properties for the + * interface identified by %NL80211_ATTR_IFINDEX + * + * @NL80211_CMD_SET_MGMT_EXTRA_IE: Set extra IEs for management frames. The + * interface is identified with %NL80211_ATTR_IFINDEX and the management + * frame subtype with %NL80211_ATTR_MGMT_SUBTYPE. The extra IE data to be + * added to the end of the specified management frame is specified with + * %NL80211_ATTR_IE. If the command succeeds, the requested data will be + * added to all specified management frames generated by + * kernel/firmware/driver. + * Note: This command has been removed and it is only reserved at this + * point to avoid re-using existing command number. The functionality this + * command was planned for has been provided with cleaner design with the + * option to specify additional IEs in NL80211_CMD_TRIGGER_SCAN, + * NL80211_CMD_AUTHENTICATE, NL80211_CMD_ASSOCIATE, + * NL80211_CMD_DEAUTHENTICATE, and NL80211_CMD_DISASSOCIATE. + * + * @NL80211_CMD_GET_SCAN: get scan results + * @NL80211_CMD_TRIGGER_SCAN: trigger a new scan with the given parameters + * %NL80211_ATTR_TX_NO_CCK_RATE is used to decide whether to send the + * probe requests at CCK rate or not. %NL80211_ATTR_BSSID can be used to + * specify a BSSID to scan for; if not included, the wildcard BSSID will + * be used. + * @NL80211_CMD_NEW_SCAN_RESULTS: scan notification (as a reply to + * NL80211_CMD_GET_SCAN and on the "scan" multicast group) + * @NL80211_CMD_SCAN_ABORTED: scan was aborted, for unspecified reasons, + * partial scan results may be available + * + * @NL80211_CMD_START_SCHED_SCAN: start a scheduled scan at certain + * intervals and certain number of cycles, as specified by + * %NL80211_ATTR_SCHED_SCAN_PLANS. If %NL80211_ATTR_SCHED_SCAN_PLANS is + * not specified and only %NL80211_ATTR_SCHED_SCAN_INTERVAL is specified, + * scheduled scan will run in an infinite loop with the specified interval. + * These attributes are mutually exculsive, + * i.e. NL80211_ATTR_SCHED_SCAN_INTERVAL must not be passed if + * NL80211_ATTR_SCHED_SCAN_PLANS is defined. + * If for some reason scheduled scan is aborted by the driver, all scan + * plans are canceled (including scan plans that did not start yet). + * Like with normal scans, if SSIDs (%NL80211_ATTR_SCAN_SSIDS) + * are passed, they are used in the probe requests. For + * broadcast, a broadcast SSID must be passed (ie. an empty + * string). If no SSID is passed, no probe requests are sent and + * a passive scan is performed. %NL80211_ATTR_SCAN_FREQUENCIES, + * if passed, define which channels should be scanned; if not + * passed, all channels allowed for the current regulatory domain + * are used. Extra IEs can also be passed from the userspace by + * using the %NL80211_ATTR_IE attribute. The first cycle of the + * scheduled scan can be delayed by %NL80211_ATTR_SCHED_SCAN_DELAY + * is supplied. If the device supports multiple concurrent scheduled + * scans, it will allow such when the caller provides the flag attribute + * %NL80211_ATTR_SCHED_SCAN_MULTI to indicate user-space support for it. + * @NL80211_CMD_STOP_SCHED_SCAN: stop a scheduled scan. Returns -ENOENT if + * scheduled scan is not running. The caller may assume that as soon + * as the call returns, it is safe to start a new scheduled scan again. + * @NL80211_CMD_SCHED_SCAN_RESULTS: indicates that there are scheduled scan + * results available. + * @NL80211_CMD_SCHED_SCAN_STOPPED: indicates that the scheduled scan has + * stopped. The driver may issue this event at any time during a + * scheduled scan. One reason for stopping the scan is if the hardware + * does not support starting an association or a normal scan while running + * a scheduled scan. This event is also sent when the + * %NL80211_CMD_STOP_SCHED_SCAN command is received or when the interface + * is brought down while a scheduled scan was running. + * + * @NL80211_CMD_GET_SURVEY: get survey resuls, e.g. channel occupation + * or noise level + * @NL80211_CMD_NEW_SURVEY_RESULTS: survey data notification (as a reply to + * NL80211_CMD_GET_SURVEY and on the "scan" multicast group) + * + * @NL80211_CMD_SET_PMKSA: Add a PMKSA cache entry using %NL80211_ATTR_MAC + * (for the BSSID), %NL80211_ATTR_PMKID, and optionally %NL80211_ATTR_PMK + * (PMK is used for PTKSA derivation in case of FILS shared key offload) or + * using %NL80211_ATTR_SSID, %NL80211_ATTR_FILS_CACHE_ID, + * %NL80211_ATTR_PMKID, and %NL80211_ATTR_PMK in case of FILS + * authentication where %NL80211_ATTR_FILS_CACHE_ID is the identifier + * advertized by a FILS capable AP identifying the scope of PMKSA in an + * ESS. + * @NL80211_CMD_DEL_PMKSA: Delete a PMKSA cache entry, using %NL80211_ATTR_MAC + * (for the BSSID) and %NL80211_ATTR_PMKID or using %NL80211_ATTR_SSID, + * %NL80211_ATTR_FILS_CACHE_ID, and %NL80211_ATTR_PMKID in case of FILS + * authentication. + * @NL80211_CMD_FLUSH_PMKSA: Flush all PMKSA cache entries. + * + * @NL80211_CMD_REG_CHANGE: indicates to userspace the regulatory domain + * has been changed and provides details of the request information + * that caused the change such as who initiated the regulatory request + * (%NL80211_ATTR_REG_INITIATOR), the wiphy_idx + * (%NL80211_ATTR_REG_ALPHA2) on which the request was made from if + * the initiator was %NL80211_REGDOM_SET_BY_COUNTRY_IE or + * %NL80211_REGDOM_SET_BY_DRIVER, the type of regulatory domain + * set (%NL80211_ATTR_REG_TYPE), if the type of regulatory domain is + * %NL80211_REG_TYPE_COUNTRY the alpha2 to which we have moved on + * to (%NL80211_ATTR_REG_ALPHA2). + * @NL80211_CMD_REG_BEACON_HINT: indicates to userspace that an AP beacon + * has been found while world roaming thus enabling active scan or + * any mode of operation that initiates TX (beacons) on a channel + * where we would not have been able to do either before. As an example + * if you are world roaming (regulatory domain set to world or if your + * driver is using a custom world roaming regulatory domain) and while + * doing a passive scan on the 5 GHz band you find an AP there (if not + * on a DFS channel) you will now be able to actively scan for that AP + * or use AP mode on your card on that same channel. Note that this will + * never be used for channels 1-11 on the 2 GHz band as they are always + * enabled world wide. This beacon hint is only sent if your device had + * either disabled active scanning or beaconing on a channel. We send to + * userspace the wiphy on which we removed a restriction from + * (%NL80211_ATTR_WIPHY) and the channel on which this occurred + * before (%NL80211_ATTR_FREQ_BEFORE) and after (%NL80211_ATTR_FREQ_AFTER) + * the beacon hint was processed. + * + * @NL80211_CMD_AUTHENTICATE: authentication request and notification. + * This command is used both as a command (request to authenticate) and + * as an event on the "mlme" multicast group indicating completion of the + * authentication process. + * When used as a command, %NL80211_ATTR_IFINDEX is used to identify the + * interface. %NL80211_ATTR_MAC is used to specify PeerSTAAddress (and + * BSSID in case of station mode). %NL80211_ATTR_SSID is used to specify + * the SSID (mainly for association, but is included in authentication + * request, too, to help BSS selection. %NL80211_ATTR_WIPHY_FREQ + + * %NL80211_ATTR_WIPHY_FREQ_OFFSET is used to specify the frequence of the + * channel in MHz. %NL80211_ATTR_AUTH_TYPE is used to specify the + * authentication type. %NL80211_ATTR_IE is used to define IEs + * (VendorSpecificInfo, but also including RSN IE and FT IEs) to be added + * to the frame. + * When used as an event, this reports reception of an Authentication + * frame in station and IBSS modes when the local MLME processed the + * frame, i.e., it was for the local STA and was received in correct + * state. This is similar to MLME-AUTHENTICATE.confirm primitive in the + * MLME SAP interface (kernel providing MLME, userspace SME). The + * included %NL80211_ATTR_FRAME attribute contains the management frame + * (including both the header and frame body, but not FCS). This event is + * also used to indicate if the authentication attempt timed out. In that + * case the %NL80211_ATTR_FRAME attribute is replaced with a + * %NL80211_ATTR_TIMED_OUT flag (and %NL80211_ATTR_MAC to indicate which + * pending authentication timed out). + * @NL80211_CMD_ASSOCIATE: association request and notification; like + * NL80211_CMD_AUTHENTICATE but for Association and Reassociation + * (similar to MLME-ASSOCIATE.request, MLME-REASSOCIATE.request, + * MLME-ASSOCIATE.confirm or MLME-REASSOCIATE.confirm primitives). The + * %NL80211_ATTR_PREV_BSSID attribute is used to specify whether the + * request is for the initial association to an ESS (that attribute not + * included) or for reassociation within the ESS (that attribute is + * included). + * @NL80211_CMD_DEAUTHENTICATE: deauthentication request and notification; like + * NL80211_CMD_AUTHENTICATE but for Deauthentication frames (similar to + * MLME-DEAUTHENTICATION.request and MLME-DEAUTHENTICATE.indication + * primitives). + * @NL80211_CMD_DISASSOCIATE: disassociation request and notification; like + * NL80211_CMD_AUTHENTICATE but for Disassociation frames (similar to + * MLME-DISASSOCIATE.request and MLME-DISASSOCIATE.indication primitives). + * + * @NL80211_CMD_MICHAEL_MIC_FAILURE: notification of a locally detected Michael + * MIC (part of TKIP) failure; sent on the "mlme" multicast group; the + * event includes %NL80211_ATTR_MAC to describe the source MAC address of + * the frame with invalid MIC, %NL80211_ATTR_KEY_TYPE to show the key + * type, %NL80211_ATTR_KEY_IDX to indicate the key identifier, and + * %NL80211_ATTR_KEY_SEQ to indicate the TSC value of the frame; this + * event matches with MLME-MICHAELMICFAILURE.indication() primitive + * + * @NL80211_CMD_JOIN_IBSS: Join a new IBSS -- given at least an SSID and a + * FREQ attribute (for the initial frequency if no peer can be found) + * and optionally a MAC (as BSSID) and FREQ_FIXED attribute if those + * should be fixed rather than automatically determined. Can only be + * executed on a network interface that is UP, and fixed BSSID/FREQ + * may be rejected. Another optional parameter is the beacon interval, + * given in the %NL80211_ATTR_BEACON_INTERVAL attribute, which if not + * given defaults to 100 TU (102.4ms). + * @NL80211_CMD_LEAVE_IBSS: Leave the IBSS -- no special arguments, the IBSS is + * determined by the network interface. + * + * @NL80211_CMD_TESTMODE: testmode command, takes a wiphy (or ifindex) attribute + * to identify the device, and the TESTDATA blob attribute to pass through + * to the driver. + * + * @NL80211_CMD_CONNECT: connection request and notification; this command + * requests to connect to a specified network but without separating + * auth and assoc steps. For this, you need to specify the SSID in a + * %NL80211_ATTR_SSID attribute, and can optionally specify the association + * IEs in %NL80211_ATTR_IE, %NL80211_ATTR_AUTH_TYPE, + * %NL80211_ATTR_USE_MFP, %NL80211_ATTR_MAC, %NL80211_ATTR_WIPHY_FREQ, + * %NL80211_ATTR_WIPHY_FREQ_OFFSET, %NL80211_ATTR_CONTROL_PORT, + * %NL80211_ATTR_CONTROL_PORT_ETHERTYPE, + * %NL80211_ATTR_CONTROL_PORT_NO_ENCRYPT, + * %NL80211_ATTR_CONTROL_PORT_OVER_NL80211, %NL80211_ATTR_MAC_HINT, and + * %NL80211_ATTR_WIPHY_FREQ_HINT. + * If included, %NL80211_ATTR_MAC and %NL80211_ATTR_WIPHY_FREQ are + * restrictions on BSS selection, i.e., they effectively prevent roaming + * within the ESS. %NL80211_ATTR_MAC_HINT and %NL80211_ATTR_WIPHY_FREQ_HINT + * can be included to provide a recommendation of the initial BSS while + * allowing the driver to roam to other BSSes within the ESS and also to + * ignore this recommendation if the indicated BSS is not ideal. Only one + * set of BSSID,frequency parameters is used (i.e., either the enforcing + * %NL80211_ATTR_MAC,%NL80211_ATTR_WIPHY_FREQ or the less strict + * %NL80211_ATTR_MAC_HINT and %NL80211_ATTR_WIPHY_FREQ_HINT). + * Driver shall not modify the IEs specified through %NL80211_ATTR_IE if + * %NL80211_ATTR_MAC is included. However, if %NL80211_ATTR_MAC_HINT is + * included, these IEs through %NL80211_ATTR_IE are specified by the user + * space based on the best possible BSS selected. Thus, if the driver ends + * up selecting a different BSS, it can modify these IEs accordingly (e.g. + * userspace asks the driver to perform PMKSA caching with BSS1 and the + * driver ends up selecting BSS2 with different PMKSA cache entry; RSNIE + * has to get updated with the apt PMKID). + * %NL80211_ATTR_PREV_BSSID can be used to request a reassociation within + * the ESS in case the device is already associated and an association with + * a different BSS is desired. + * Background scan period can optionally be + * specified in %NL80211_ATTR_BG_SCAN_PERIOD, + * if not specified default background scan configuration + * in driver is used and if period value is 0, bg scan will be disabled. + * This attribute is ignored if driver does not support roam scan. + * It is also sent as an event, with the BSSID and response IEs when the + * connection is established or failed to be established. This can be + * determined by the %NL80211_ATTR_STATUS_CODE attribute (0 = success, + * non-zero = failure). If %NL80211_ATTR_TIMED_OUT is included in the + * event, the connection attempt failed due to not being able to initiate + * authentication/association or not receiving a response from the AP. + * Non-zero %NL80211_ATTR_STATUS_CODE value is indicated in that case as + * well to remain backwards compatible. + * @NL80211_CMD_ROAM: Notification indicating the card/driver roamed by itself. + * When a security association was established on an 802.1X network using + * fast transition, this event should be followed by an + * %NL80211_CMD_PORT_AUTHORIZED event. + * Following a %NL80211_CMD_ROAM event userspace can issue + * %NL80211_CMD_GET_SCAN in order to obtain the scan information for the + * new BSS the card/driver roamed to. + * @NL80211_CMD_DISCONNECT: drop a given connection; also used to notify + * userspace that a connection was dropped by the AP or due to other + * reasons, for this the %NL80211_ATTR_DISCONNECTED_BY_AP and + * %NL80211_ATTR_REASON_CODE attributes are used. + * + * @NL80211_CMD_SET_WIPHY_NETNS: Set a wiphy's netns. Note that all devices + * associated with this wiphy must be down and will follow. + * + * @NL80211_CMD_REMAIN_ON_CHANNEL: Request to remain awake on the specified + * channel for the specified amount of time. This can be used to do + * off-channel operations like transmit a Public Action frame and wait for + * a response while being associated to an AP on another channel. + * %NL80211_ATTR_IFINDEX is used to specify which interface (and thus + * radio) is used. %NL80211_ATTR_WIPHY_FREQ is used to specify the + * frequency for the operation. + * %NL80211_ATTR_DURATION is used to specify the duration in milliseconds + * to remain on the channel. This command is also used as an event to + * notify when the requested duration starts (it may take a while for the + * driver to schedule this time due to other concurrent needs for the + * radio). + * When called, this operation returns a cookie (%NL80211_ATTR_COOKIE) + * that will be included with any events pertaining to this request; + * the cookie is also used to cancel the request. + * @NL80211_CMD_CANCEL_REMAIN_ON_CHANNEL: This command can be used to cancel a + * pending remain-on-channel duration if the desired operation has been + * completed prior to expiration of the originally requested duration. + * %NL80211_ATTR_WIPHY or %NL80211_ATTR_IFINDEX is used to specify the + * radio. The %NL80211_ATTR_COOKIE attribute must be given as well to + * uniquely identify the request. + * This command is also used as an event to notify when a requested + * remain-on-channel duration has expired. + * + * @NL80211_CMD_SET_TX_BITRATE_MASK: Set the mask of rates to be used in TX + * rate selection. %NL80211_ATTR_IFINDEX is used to specify the interface + * and @NL80211_ATTR_TX_RATES the set of allowed rates. + * + * @NL80211_CMD_REGISTER_FRAME: Register for receiving certain mgmt frames + * (via @NL80211_CMD_FRAME) for processing in userspace. This command + * requires an interface index, a frame type attribute (optional for + * backward compatibility reasons, if not given assumes action frames) + * and a match attribute containing the first few bytes of the frame + * that should match, e.g. a single byte for only a category match or + * four bytes for vendor frames including the OUI. The registration + * cannot be dropped, but is removed automatically when the netlink + * socket is closed. Multiple registrations can be made. + * The %NL80211_ATTR_RECEIVE_MULTICAST flag attribute can be given if + * %NL80211_EXT_FEATURE_MULTICAST_REGISTRATIONS is available, in which + * case the registration can also be modified to include/exclude the + * flag, rather than requiring unregistration to change it. + * @NL80211_CMD_REGISTER_ACTION: Alias for @NL80211_CMD_REGISTER_FRAME for + * backward compatibility + * @NL80211_CMD_FRAME: Management frame TX request and RX notification. This + * command is used both as a request to transmit a management frame and + * as an event indicating reception of a frame that was not processed in + * kernel code, but is for us (i.e., which may need to be processed in a + * user space application). %NL80211_ATTR_FRAME is used to specify the + * frame contents (including header). %NL80211_ATTR_WIPHY_FREQ is used + * to indicate on which channel the frame is to be transmitted or was + * received. If this channel is not the current channel (remain-on-channel + * or the operational channel) the device will switch to the given channel + * and transmit the frame, optionally waiting for a response for the time + * specified using %NL80211_ATTR_DURATION. When called, this operation + * returns a cookie (%NL80211_ATTR_COOKIE) that will be included with the + * TX status event pertaining to the TX request. + * %NL80211_ATTR_TX_NO_CCK_RATE is used to decide whether to send the + * management frames at CCK rate or not in 2GHz band. + * %NL80211_ATTR_CSA_C_OFFSETS_TX is an array of offsets to CSA + * counters which will be updated to the current value. This attribute + * is used during CSA period. + * For TX on an MLD, the frequency can be omitted and the link ID be + * specified, or if transmitting to a known peer MLD (with MLD addresses + * in the frame) both can be omitted and the link will be selected by + * lower layers. + * For RX notification, %NL80211_ATTR_RX_HW_TIMESTAMP may be included to + * indicate the frame RX timestamp and %NL80211_ATTR_TX_HW_TIMESTAMP may + * be included to indicate the ack TX timestamp. + * @NL80211_CMD_FRAME_WAIT_CANCEL: When an off-channel TX was requested, this + * command may be used with the corresponding cookie to cancel the wait + * time if it is known that it is no longer necessary. This command is + * also sent as an event whenever the driver has completed the off-channel + * wait time. + * @NL80211_CMD_ACTION: Alias for @NL80211_CMD_FRAME for backward compatibility. + * @NL80211_CMD_FRAME_TX_STATUS: Report TX status of a management frame + * transmitted with %NL80211_CMD_FRAME. %NL80211_ATTR_COOKIE identifies + * the TX command and %NL80211_ATTR_FRAME includes the contents of the + * frame. %NL80211_ATTR_ACK flag is included if the recipient acknowledged + * the frame. %NL80211_ATTR_TX_HW_TIMESTAMP may be included to indicate the + * tx timestamp and %NL80211_ATTR_RX_HW_TIMESTAMP may be included to + * indicate the ack RX timestamp. + * @NL80211_CMD_ACTION_TX_STATUS: Alias for @NL80211_CMD_FRAME_TX_STATUS for + * backward compatibility. + * + * @NL80211_CMD_SET_POWER_SAVE: Set powersave, using %NL80211_ATTR_PS_STATE + * @NL80211_CMD_GET_POWER_SAVE: Get powersave status in %NL80211_ATTR_PS_STATE + * + * @NL80211_CMD_SET_CQM: Connection quality monitor configuration. This command + * is used to configure connection quality monitoring notification trigger + * levels. + * @NL80211_CMD_NOTIFY_CQM: Connection quality monitor notification. This + * command is used as an event to indicate the that a trigger level was + * reached. + * @NL80211_CMD_SET_CHANNEL: Set the channel (using %NL80211_ATTR_WIPHY_FREQ + * and the attributes determining channel width) the given interface + * (identifed by %NL80211_ATTR_IFINDEX) shall operate on. + * In case multiple channels are supported by the device, the mechanism + * with which it switches channels is implementation-defined. + * When a monitor interface is given, it can only switch channel while + * no other interfaces are operating to avoid disturbing the operation + * of any other interfaces, and other interfaces will again take + * precedence when they are used. + * + * @NL80211_CMD_SET_WDS_PEER: Set the MAC address of the peer on a WDS interface + * (no longer supported). + * + * @NL80211_CMD_SET_MULTICAST_TO_UNICAST: Configure if this AP should perform + * multicast to unicast conversion. When enabled, all multicast packets + * with ethertype ARP, IPv4 or IPv6 (possibly within an 802.1Q header) + * will be sent out to each station once with the destination (multicast) + * MAC address replaced by the station's MAC address. Note that this may + * break certain expectations of the receiver, e.g. the ability to drop + * unicast IP packets encapsulated in multicast L2 frames, or the ability + * to not send destination unreachable messages in such cases. + * This can only be toggled per BSS. Configure this on an interface of + * type %NL80211_IFTYPE_AP. It applies to all its VLAN interfaces + * (%NL80211_IFTYPE_AP_VLAN), except for those in 4addr (WDS) mode. + * If %NL80211_ATTR_MULTICAST_TO_UNICAST_ENABLED is not present with this + * command, the feature is disabled. + * + * @NL80211_CMD_JOIN_MESH: Join a mesh. The mesh ID must be given, and initial + * mesh config parameters may be given. + * @NL80211_CMD_LEAVE_MESH: Leave the mesh network -- no special arguments, the + * network is determined by the network interface. + * + * @NL80211_CMD_UNPROT_DEAUTHENTICATE: Unprotected deauthentication frame + * notification. This event is used to indicate that an unprotected + * deauthentication frame was dropped when MFP is in use. + * @NL80211_CMD_UNPROT_DISASSOCIATE: Unprotected disassociation frame + * notification. This event is used to indicate that an unprotected + * disassociation frame was dropped when MFP is in use. + * + * @NL80211_CMD_NEW_PEER_CANDIDATE: Notification on the reception of a + * beacon or probe response from a compatible mesh peer. This is only + * sent while no station information (sta_info) exists for the new peer + * candidate and when @NL80211_MESH_SETUP_USERSPACE_AUTH, + * @NL80211_MESH_SETUP_USERSPACE_AMPE, or + * @NL80211_MESH_SETUP_USERSPACE_MPM is set. On reception of this + * notification, userspace may decide to create a new station + * (@NL80211_CMD_NEW_STATION). To stop this notification from + * reoccurring, the userspace authentication daemon may want to create the + * new station with the AUTHENTICATED flag unset and maybe change it later + * depending on the authentication result. + * + * @NL80211_CMD_GET_WOWLAN: get Wake-on-Wireless-LAN (WoWLAN) settings. + * @NL80211_CMD_SET_WOWLAN: set Wake-on-Wireless-LAN (WoWLAN) settings. + * Since wireless is more complex than wired ethernet, it supports + * various triggers. These triggers can be configured through this + * command with the %NL80211_ATTR_WOWLAN_TRIGGERS attribute. For + * more background information, see + * https://wireless.wiki.kernel.org/en/users/Documentation/WoWLAN. + * The @NL80211_CMD_SET_WOWLAN command can also be used as a notification + * from the driver reporting the wakeup reason. In this case, the + * @NL80211_ATTR_WOWLAN_TRIGGERS attribute will contain the reason + * for the wakeup, if it was caused by wireless. If it is not present + * in the wakeup notification, the wireless device didn't cause the + * wakeup but reports that it was woken up. + * + * @NL80211_CMD_SET_REKEY_OFFLOAD: This command is used give the driver + * the necessary information for supporting GTK rekey offload. This + * feature is typically used during WoWLAN. The configuration data + * is contained in %NL80211_ATTR_REKEY_DATA (which is nested and + * contains the data in sub-attributes). After rekeying happened, + * this command may also be sent by the driver as an MLME event to + * inform userspace of the new replay counter. + * + * @NL80211_CMD_PMKSA_CANDIDATE: This is used as an event to inform userspace + * of PMKSA caching dandidates. + * + * @NL80211_CMD_TDLS_OPER: Perform a high-level TDLS command (e.g. link setup). + * In addition, this can be used as an event to request userspace to take + * actions on TDLS links (set up a new link or tear down an existing one). + * In such events, %NL80211_ATTR_TDLS_OPERATION indicates the requested + * operation, %NL80211_ATTR_MAC contains the peer MAC address, and + * %NL80211_ATTR_REASON_CODE the reason code to be used (only with + * %NL80211_TDLS_TEARDOWN). + * @NL80211_CMD_TDLS_MGMT: Send a TDLS management frame. The + * %NL80211_ATTR_TDLS_ACTION attribute determines the type of frame to be + * sent. Public Action codes (802.11-2012 8.1.5.1) will be sent as + * 802.11 management frames, while TDLS action codes (802.11-2012 + * 8.5.13.1) will be encapsulated and sent as data frames. The currently + * supported Public Action code is %WLAN_PUB_ACTION_TDLS_DISCOVER_RES + * and the currently supported TDLS actions codes are given in + * &enum ieee80211_tdls_actioncode. + * + * @NL80211_CMD_UNEXPECTED_FRAME: Used by an application controlling an AP + * (or GO) interface (i.e. hostapd) to ask for unexpected frames to + * implement sending deauth to stations that send unexpected class 3 + * frames. Also used as the event sent by the kernel when such a frame + * is received. + * For the event, the %NL80211_ATTR_MAC attribute carries the TA and + * other attributes like the interface index are present. + * If used as the command it must have an interface index and you can + * only unsubscribe from the event by closing the socket. Subscription + * is also for %NL80211_CMD_UNEXPECTED_4ADDR_FRAME events. + * + * @NL80211_CMD_UNEXPECTED_4ADDR_FRAME: Sent as an event indicating that the + * associated station identified by %NL80211_ATTR_MAC sent a 4addr frame + * and wasn't already in a 4-addr VLAN. The event will be sent similarly + * to the %NL80211_CMD_UNEXPECTED_FRAME event, to the same listener. + * + * @NL80211_CMD_PROBE_CLIENT: Probe an associated station on an AP interface + * by sending a null data frame to it and reporting when the frame is + * acknowleged. This is used to allow timing out inactive clients. Uses + * %NL80211_ATTR_IFINDEX and %NL80211_ATTR_MAC. The command returns a + * direct reply with an %NL80211_ATTR_COOKIE that is later used to match + * up the event with the request. The event includes the same data and + * has %NL80211_ATTR_ACK set if the frame was ACKed. + * + * @NL80211_CMD_REGISTER_BEACONS: Register this socket to receive beacons from + * other BSSes when any interfaces are in AP mode. This helps implement + * OLBC handling in hostapd. Beacons are reported in %NL80211_CMD_FRAME + * messages. Note that per PHY only one application may register. + * + * @NL80211_CMD_SET_NOACK_MAP: sets a bitmap for the individual TIDs whether + * No Acknowledgement Policy should be applied. + * + * @NL80211_CMD_CH_SWITCH_NOTIFY: An AP or GO may decide to switch channels + * independently of the userspace SME, send this event indicating + * %NL80211_ATTR_IFINDEX is now on %NL80211_ATTR_WIPHY_FREQ and the + * attributes determining channel width. This indication may also be + * sent when a remotely-initiated switch (e.g., when a STA receives a CSA + * from the remote AP) is completed; + * + * @NL80211_CMD_CH_SWITCH_STARTED_NOTIFY: Notify that a channel switch + * has been started on an interface, regardless of the initiator + * (ie. whether it was requested from a remote device or + * initiated on our own). It indicates that + * %NL80211_ATTR_IFINDEX will be on %NL80211_ATTR_WIPHY_FREQ + * after %NL80211_ATTR_CH_SWITCH_COUNT TBTT's. The userspace may + * decide to react to this indication by requesting other + * interfaces to change channel as well. + * + * @NL80211_CMD_START_P2P_DEVICE: Start the given P2P Device, identified by + * its %NL80211_ATTR_WDEV identifier. It must have been created with + * %NL80211_CMD_NEW_INTERFACE previously. After it has been started, the + * P2P Device can be used for P2P operations, e.g. remain-on-channel and + * public action frame TX. + * @NL80211_CMD_STOP_P2P_DEVICE: Stop the given P2P Device, identified by + * its %NL80211_ATTR_WDEV identifier. + * + * @NL80211_CMD_CONN_FAILED: connection request to an AP failed; used to + * notify userspace that AP has rejected the connection request from a + * station, due to particular reason. %NL80211_ATTR_CONN_FAILED_REASON + * is used for this. + * + * @NL80211_CMD_SET_MCAST_RATE: Change the rate used to send multicast frames + * for IBSS or MESH vif. + * + * @NL80211_CMD_SET_MAC_ACL: sets ACL for MAC address based access control. + * This is to be used with the drivers advertising the support of MAC + * address based access control. List of MAC addresses is passed in + * %NL80211_ATTR_MAC_ADDRS and ACL policy is passed in + * %NL80211_ATTR_ACL_POLICY. Driver will enable ACL with this list, if it + * is not already done. The new list will replace any existing list. Driver + * will clear its ACL when the list of MAC addresses passed is empty. This + * command is used in AP/P2P GO mode. Driver has to make sure to clear its + * ACL list during %NL80211_CMD_STOP_AP. + * + * @NL80211_CMD_RADAR_DETECT: Start a Channel availability check (CAC). Once + * a radar is detected or the channel availability scan (CAC) has finished + * or was aborted, or a radar was detected, usermode will be notified with + * this event. This command is also used to notify userspace about radars + * while operating on this channel. + * %NL80211_ATTR_RADAR_EVENT is used to inform about the type of the + * event. + * + * @NL80211_CMD_GET_PROTOCOL_FEATURES: Get global nl80211 protocol features, + * i.e. features for the nl80211 protocol rather than device features. + * Returns the features in the %NL80211_ATTR_PROTOCOL_FEATURES bitmap. + * + * @NL80211_CMD_UPDATE_FT_IES: Pass down the most up-to-date Fast Transition + * Information Element to the WLAN driver + * + * @NL80211_CMD_FT_EVENT: Send a Fast transition event from the WLAN driver + * to the supplicant. This will carry the target AP's MAC address along + * with the relevant Information Elements. This event is used to report + * received FT IEs (MDIE, FTIE, RSN IE, TIE, RICIE). + * + * @NL80211_CMD_CRIT_PROTOCOL_START: Indicates user-space will start running + * a critical protocol that needs more reliability in the connection to + * complete. + * + * @NL80211_CMD_CRIT_PROTOCOL_STOP: Indicates the connection reliability can + * return back to normal. + * + * @NL80211_CMD_GET_COALESCE: Get currently supported coalesce rules. + * @NL80211_CMD_SET_COALESCE: Configure coalesce rules or clear existing rules. + * + * @NL80211_CMD_CHANNEL_SWITCH: Perform a channel switch by announcing the + * new channel information (Channel Switch Announcement - CSA) + * in the beacon for some time (as defined in the + * %NL80211_ATTR_CH_SWITCH_COUNT parameter) and then change to the + * new channel. Userspace provides the new channel information (using + * %NL80211_ATTR_WIPHY_FREQ and the attributes determining channel + * width). %NL80211_ATTR_CH_SWITCH_BLOCK_TX may be supplied to inform + * other station that transmission must be blocked until the channel + * switch is complete. + * + * @NL80211_CMD_VENDOR: Vendor-specified command/event. The command is specified + * by the %NL80211_ATTR_VENDOR_ID attribute and a sub-command in + * %NL80211_ATTR_VENDOR_SUBCMD. Parameter(s) can be transported in + * %NL80211_ATTR_VENDOR_DATA. + * For feature advertisement, the %NL80211_ATTR_VENDOR_DATA attribute is + * used in the wiphy data as a nested attribute containing descriptions + * (&struct nl80211_vendor_cmd_info) of the supported vendor commands. + * This may also be sent as an event with the same attributes. + * + * @NL80211_CMD_SET_QOS_MAP: Set Interworking QoS mapping for IP DSCP values. + * The QoS mapping information is included in %NL80211_ATTR_QOS_MAP. If + * that attribute is not included, QoS mapping is disabled. Since this + * QoS mapping is relevant for IP packets, it is only valid during an + * association. This is cleared on disassociation and AP restart. + * + * @NL80211_CMD_ADD_TX_TS: Ask the kernel to add a traffic stream for the given + * %NL80211_ATTR_TSID and %NL80211_ATTR_MAC with %NL80211_ATTR_USER_PRIO + * and %NL80211_ATTR_ADMITTED_TIME parameters. + * Note that the action frame handshake with the AP shall be handled by + * userspace via the normal management RX/TX framework, this only sets + * up the TX TS in the driver/device. + * If the admitted time attribute is not added then the request just checks + * if a subsequent setup could be successful, the intent is to use this to + * avoid setting up a session with the AP when local restrictions would + * make that impossible. However, the subsequent "real" setup may still + * fail even if the check was successful. + * @NL80211_CMD_DEL_TX_TS: Remove an existing TS with the %NL80211_ATTR_TSID + * and %NL80211_ATTR_MAC parameters. It isn't necessary to call this + * before removing a station entry entirely, or before disassociating + * or similar, cleanup will happen in the driver/device in this case. + * + * @NL80211_CMD_GET_MPP: Get mesh path attributes for mesh proxy path to + * destination %NL80211_ATTR_MAC on the interface identified by + * %NL80211_ATTR_IFINDEX. + * + * @NL80211_CMD_JOIN_OCB: Join the OCB network. The center frequency and + * bandwidth of a channel must be given. + * @NL80211_CMD_LEAVE_OCB: Leave the OCB network -- no special arguments, the + * network is determined by the network interface. + * + * @NL80211_CMD_TDLS_CHANNEL_SWITCH: Start channel-switching with a TDLS peer, + * identified by the %NL80211_ATTR_MAC parameter. A target channel is + * provided via %NL80211_ATTR_WIPHY_FREQ and other attributes determining + * channel width/type. The target operating class is given via + * %NL80211_ATTR_OPER_CLASS. + * The driver is responsible for continually initiating channel-switching + * operations and returning to the base channel for communication with the + * AP. + * @NL80211_CMD_TDLS_CANCEL_CHANNEL_SWITCH: Stop channel-switching with a TDLS + * peer given by %NL80211_ATTR_MAC. Both peers must be on the base channel + * when this command completes. + * + * @NL80211_CMD_WIPHY_REG_CHANGE: Similar to %NL80211_CMD_REG_CHANGE, but used + * as an event to indicate changes for devices with wiphy-specific regdom + * management. + * + * @NL80211_CMD_ABORT_SCAN: Stop an ongoing scan. Returns -ENOENT if a scan is + * not running. The driver indicates the status of the scan through + * cfg80211_scan_done(). + * + * @NL80211_CMD_START_NAN: Start NAN operation, identified by its + * %NL80211_ATTR_WDEV interface. This interface must have been + * previously created with %NL80211_CMD_NEW_INTERFACE. After it + * has been started, the NAN interface will create or join a + * cluster. This command must have a valid + * %NL80211_ATTR_NAN_MASTER_PREF attribute and optional + * %NL80211_ATTR_BANDS attributes. If %NL80211_ATTR_BANDS is + * omitted or set to 0, it means don't-care and the device will + * decide what to use. After this command NAN functions can be + * added. + * @NL80211_CMD_STOP_NAN: Stop the NAN operation, identified by + * its %NL80211_ATTR_WDEV interface. + * @NL80211_CMD_ADD_NAN_FUNCTION: Add a NAN function. The function is defined + * with %NL80211_ATTR_NAN_FUNC nested attribute. When called, this + * operation returns the strictly positive and unique instance id + * (%NL80211_ATTR_NAN_FUNC_INST_ID) and a cookie (%NL80211_ATTR_COOKIE) + * of the function upon success. + * Since instance ID's can be re-used, this cookie is the right + * way to identify the function. This will avoid races when a termination + * event is handled by the user space after it has already added a new + * function that got the same instance id from the kernel as the one + * which just terminated. + * This cookie may be used in NAN events even before the command + * returns, so userspace shouldn't process NAN events until it processes + * the response to this command. + * Look at %NL80211_ATTR_SOCKET_OWNER as well. + * @NL80211_CMD_DEL_NAN_FUNCTION: Delete a NAN function by cookie. + * This command is also used as a notification sent when a NAN function is + * terminated. This will contain a %NL80211_ATTR_NAN_FUNC_INST_ID + * and %NL80211_ATTR_COOKIE attributes. + * @NL80211_CMD_CHANGE_NAN_CONFIG: Change current NAN + * configuration. NAN must be operational (%NL80211_CMD_START_NAN + * was executed). It must contain at least one of the following + * attributes: %NL80211_ATTR_NAN_MASTER_PREF, + * %NL80211_ATTR_BANDS. If %NL80211_ATTR_BANDS is omitted, the + * current configuration is not changed. If it is present but + * set to zero, the configuration is changed to don't-care + * (i.e. the device can decide what to do). + * @NL80211_CMD_NAN_FUNC_MATCH: Notification sent when a match is reported. + * This will contain a %NL80211_ATTR_NAN_MATCH nested attribute and + * %NL80211_ATTR_COOKIE. + * + * @NL80211_CMD_UPDATE_CONNECT_PARAMS: Update one or more connect parameters + * for subsequent roaming cases if the driver or firmware uses internal + * BSS selection. This command can be issued only while connected and it + * does not result in a change for the current association. Currently, + * only the %NL80211_ATTR_IE data is used and updated with this command. + * + * @NL80211_CMD_SET_PMK: For offloaded 4-Way handshake, set the PMK or PMK-R0 + * for the given authenticator address (specified with %NL80211_ATTR_MAC). + * When %NL80211_ATTR_PMKR0_NAME is set, %NL80211_ATTR_PMK specifies the + * PMK-R0, otherwise it specifies the PMK. + * @NL80211_CMD_DEL_PMK: For offloaded 4-Way handshake, delete the previously + * configured PMK for the authenticator address identified by + * %NL80211_ATTR_MAC. + * @NL80211_CMD_PORT_AUTHORIZED: An event that indicates an 802.1X FT roam was + * completed successfully. Drivers that support 4 way handshake offload + * should send this event after indicating 802.1X FT assocation with + * %NL80211_CMD_ROAM. If the 4 way handshake failed %NL80211_CMD_DISCONNECT + * should be indicated instead. + * @NL80211_CMD_CONTROL_PORT_FRAME: Control Port (e.g. PAE) frame TX request + * and RX notification. This command is used both as a request to transmit + * a control port frame and as a notification that a control port frame + * has been received. %NL80211_ATTR_FRAME is used to specify the + * frame contents. The frame is the raw EAPoL data, without ethernet or + * 802.11 headers. + * For an MLD transmitter, the %NL80211_ATTR_MLO_LINK_ID may be given and + * its effect will depend on the destination: If the destination is known + * to be an MLD, this will be used as a hint to select the link to transmit + * the frame on. If the destination is not an MLD, this will select both + * the link to transmit on and the source address will be set to the link + * address of that link. + * When used as an event indication %NL80211_ATTR_CONTROL_PORT_ETHERTYPE, + * %NL80211_ATTR_CONTROL_PORT_NO_ENCRYPT and %NL80211_ATTR_MAC are added + * indicating the protocol type of the received frame; whether the frame + * was received unencrypted and the MAC address of the peer respectively. + * + * @NL80211_CMD_RELOAD_REGDB: Request that the regdb firmware file is reloaded. + * + * @NL80211_CMD_EXTERNAL_AUTH: This interface is exclusively defined for host + * drivers that do not define separate commands for authentication and + * association, but rely on user space for the authentication to happen. + * This interface acts both as the event request (driver to user space) + * to trigger the authentication and command response (userspace to + * driver) to indicate the authentication status. + * + * User space uses the %NL80211_CMD_CONNECT command to the host driver to + * trigger a connection. The host driver selects a BSS and further uses + * this interface to offload only the authentication part to the user + * space. Authentication frames are passed between the driver and user + * space through the %NL80211_CMD_FRAME interface. Host driver proceeds + * further with the association after getting successful authentication + * status. User space indicates the authentication status through + * %NL80211_ATTR_STATUS_CODE attribute in %NL80211_CMD_EXTERNAL_AUTH + * command interface. + * + * Host driver reports this status on an authentication failure to the + * user space through the connect result as the user space would have + * initiated the connection through the connect request. + * + * @NL80211_CMD_STA_OPMODE_CHANGED: An event that notify station's + * ht opmode or vht opmode changes using any of %NL80211_ATTR_SMPS_MODE, + * %NL80211_ATTR_CHANNEL_WIDTH,%NL80211_ATTR_NSS attributes with its + * address(specified in %NL80211_ATTR_MAC). + * + * @NL80211_CMD_GET_FTM_RESPONDER_STATS: Retrieve FTM responder statistics, in + * the %NL80211_ATTR_FTM_RESPONDER_STATS attribute. + * + * @NL80211_CMD_PEER_MEASUREMENT_START: start a (set of) peer measurement(s) + * with the given parameters, which are encapsulated in the nested + * %NL80211_ATTR_PEER_MEASUREMENTS attribute. Optionally, MAC address + * randomization may be enabled and configured by specifying the + * %NL80211_ATTR_MAC and %NL80211_ATTR_MAC_MASK attributes. + * If a timeout is requested, use the %NL80211_ATTR_TIMEOUT attribute. + * A u64 cookie for further %NL80211_ATTR_COOKIE use is returned in + * the netlink extended ack message. + * + * To cancel a measurement, close the socket that requested it. + * + * Measurement results are reported to the socket that requested the + * measurement using @NL80211_CMD_PEER_MEASUREMENT_RESULT when they + * become available, so applications must ensure a large enough socket + * buffer size. + * + * Depending on driver support it may or may not be possible to start + * multiple concurrent measurements. + * @NL80211_CMD_PEER_MEASUREMENT_RESULT: This command number is used for the + * result notification from the driver to the requesting socket. + * @NL80211_CMD_PEER_MEASUREMENT_COMPLETE: Notification only, indicating that + * the measurement completed, using the measurement cookie + * (%NL80211_ATTR_COOKIE). + * + * @NL80211_CMD_NOTIFY_RADAR: Notify the kernel that a radar signal was + * detected and reported by a neighboring device on the channel + * indicated by %NL80211_ATTR_WIPHY_FREQ and other attributes + * determining the width and type. + * + * @NL80211_CMD_UPDATE_OWE_INFO: This interface allows the host driver to + * offload OWE processing to user space. This intends to support + * OWE AKM by the host drivers that implement SME but rely + * on the user space for the cryptographic/DH IE processing in AP mode. + * + * @NL80211_CMD_PROBE_MESH_LINK: The requirement for mesh link metric + * refreshing, is that from one mesh point we be able to send some data + * frames to other mesh points which are not currently selected as a + * primary traffic path, but which are only 1 hop away. The absence of + * the primary path to the chosen node makes it necessary to apply some + * form of marking on a chosen packet stream so that the packets can be + * properly steered to the selected node for testing, and not by the + * regular mesh path lookup. Further, the packets must be of type data + * so that the rate control (often embedded in firmware) is used for + * rate selection. + * + * Here attribute %NL80211_ATTR_MAC is used to specify connected mesh + * peer MAC address and %NL80211_ATTR_FRAME is used to specify the frame + * content. The frame is ethernet data. + * + * @NL80211_CMD_SET_TID_CONFIG: Data frame TID specific configuration + * is passed using %NL80211_ATTR_TID_CONFIG attribute. + * + * @NL80211_CMD_UNPROT_BEACON: Unprotected or incorrectly protected Beacon + * frame. This event is used to indicate that a received Beacon frame was + * dropped because it did not include a valid MME MIC while beacon + * protection was enabled (BIGTK configured in station mode). + * + * @NL80211_CMD_CONTROL_PORT_FRAME_TX_STATUS: Report TX status of a control + * port frame transmitted with %NL80211_CMD_CONTROL_PORT_FRAME. + * %NL80211_ATTR_COOKIE identifies the TX command and %NL80211_ATTR_FRAME + * includes the contents of the frame. %NL80211_ATTR_ACK flag is included + * if the recipient acknowledged the frame. + * + * @NL80211_CMD_SET_SAR_SPECS: SAR power limitation configuration is + * passed using %NL80211_ATTR_SAR_SPEC. %NL80211_ATTR_WIPHY is used to + * specify the wiphy index to be applied to. + * + * @NL80211_CMD_OBSS_COLOR_COLLISION: This notification is sent out whenever + * mac80211/drv detects a bss color collision. + * + * @NL80211_CMD_COLOR_CHANGE_REQUEST: This command is used to indicate that + * userspace wants to change the BSS color. + * + * @NL80211_CMD_COLOR_CHANGE_STARTED: Notify userland, that a color change has + * started + * + * @NL80211_CMD_COLOR_CHANGE_ABORTED: Notify userland, that the color change has + * been aborted + * + * @NL80211_CMD_COLOR_CHANGE_COMPLETED: Notify userland that the color change + * has completed + * + * @NL80211_CMD_SET_FILS_AAD: Set FILS AAD data to the driver using - + * &NL80211_ATTR_MAC - for STA MAC address + * &NL80211_ATTR_FILS_KEK - for KEK + * &NL80211_ATTR_FILS_NONCES - for FILS Nonces + * (STA Nonce 16 bytes followed by AP Nonce 16 bytes) + * + * @NL80211_CMD_ASSOC_COMEBACK: notification about an association + * temporal rejection with comeback. The event includes %NL80211_ATTR_MAC + * to describe the BSSID address of the AP and %NL80211_ATTR_TIMEOUT to + * specify the timeout value. + * + * @NL80211_CMD_ADD_LINK: Add a new link to an interface. The + * %NL80211_ATTR_MLO_LINK_ID attribute is used for the new link. + * @NL80211_CMD_REMOVE_LINK: Remove a link from an interface. This may come + * without %NL80211_ATTR_MLO_LINK_ID as an easy way to remove all links + * in preparation for e.g. roaming to a regular (non-MLO) AP. + * + * @NL80211_CMD_ADD_LINK_STA: Add a link to an MLD station + * @NL80211_CMD_MODIFY_LINK_STA: Modify a link of an MLD station + * @NL80211_CMD_REMOVE_LINK_STA: Remove a link of an MLD station + * + * @NL80211_CMD_MAX: highest used command number + * @__NL80211_CMD_AFTER_LAST: internal use + */ +enum nl80211_commands { +/* don't change the order or add anything between, this is ABI! */ + NL80211_CMD_UNSPEC, + + NL80211_CMD_GET_WIPHY, /* can dump */ + NL80211_CMD_SET_WIPHY, + NL80211_CMD_NEW_WIPHY, + NL80211_CMD_DEL_WIPHY, + + NL80211_CMD_GET_INTERFACE, /* can dump */ + NL80211_CMD_SET_INTERFACE, + NL80211_CMD_NEW_INTERFACE, + NL80211_CMD_DEL_INTERFACE, + + NL80211_CMD_GET_KEY, + NL80211_CMD_SET_KEY, + NL80211_CMD_NEW_KEY, + NL80211_CMD_DEL_KEY, + + NL80211_CMD_GET_BEACON, + NL80211_CMD_SET_BEACON, + NL80211_CMD_START_AP, + NL80211_CMD_NEW_BEACON = NL80211_CMD_START_AP, + NL80211_CMD_STOP_AP, + NL80211_CMD_DEL_BEACON = NL80211_CMD_STOP_AP, + + NL80211_CMD_GET_STATION, + NL80211_CMD_SET_STATION, + NL80211_CMD_NEW_STATION, + NL80211_CMD_DEL_STATION, + + NL80211_CMD_GET_MPATH, + NL80211_CMD_SET_MPATH, + NL80211_CMD_NEW_MPATH, + NL80211_CMD_DEL_MPATH, + + NL80211_CMD_SET_BSS, + + NL80211_CMD_SET_REG, + NL80211_CMD_REQ_SET_REG, + + NL80211_CMD_GET_MESH_CONFIG, + NL80211_CMD_SET_MESH_CONFIG, + + NL80211_CMD_SET_MGMT_EXTRA_IE /* reserved; not used */, + + NL80211_CMD_GET_REG, + + NL80211_CMD_GET_SCAN, + NL80211_CMD_TRIGGER_SCAN, + NL80211_CMD_NEW_SCAN_RESULTS, + NL80211_CMD_SCAN_ABORTED, + + NL80211_CMD_REG_CHANGE, + + NL80211_CMD_AUTHENTICATE, + NL80211_CMD_ASSOCIATE, + NL80211_CMD_DEAUTHENTICATE, + NL80211_CMD_DISASSOCIATE, + + NL80211_CMD_MICHAEL_MIC_FAILURE, + + NL80211_CMD_REG_BEACON_HINT, + + NL80211_CMD_JOIN_IBSS, + NL80211_CMD_LEAVE_IBSS, + + NL80211_CMD_TESTMODE, + + NL80211_CMD_CONNECT, + NL80211_CMD_ROAM, + NL80211_CMD_DISCONNECT, + + NL80211_CMD_SET_WIPHY_NETNS, + + NL80211_CMD_GET_SURVEY, + NL80211_CMD_NEW_SURVEY_RESULTS, + + NL80211_CMD_SET_PMKSA, + NL80211_CMD_DEL_PMKSA, + NL80211_CMD_FLUSH_PMKSA, + + NL80211_CMD_REMAIN_ON_CHANNEL, + NL80211_CMD_CANCEL_REMAIN_ON_CHANNEL, + + NL80211_CMD_SET_TX_BITRATE_MASK, + + NL80211_CMD_REGISTER_FRAME, + NL80211_CMD_REGISTER_ACTION = NL80211_CMD_REGISTER_FRAME, + NL80211_CMD_FRAME, + NL80211_CMD_ACTION = NL80211_CMD_FRAME, + NL80211_CMD_FRAME_TX_STATUS, + NL80211_CMD_ACTION_TX_STATUS = NL80211_CMD_FRAME_TX_STATUS, + + NL80211_CMD_SET_POWER_SAVE, + NL80211_CMD_GET_POWER_SAVE, + + NL80211_CMD_SET_CQM, + NL80211_CMD_NOTIFY_CQM, + + NL80211_CMD_SET_CHANNEL, + NL80211_CMD_SET_WDS_PEER, + + NL80211_CMD_FRAME_WAIT_CANCEL, + + NL80211_CMD_JOIN_MESH, + NL80211_CMD_LEAVE_MESH, + + NL80211_CMD_UNPROT_DEAUTHENTICATE, + NL80211_CMD_UNPROT_DISASSOCIATE, + + NL80211_CMD_NEW_PEER_CANDIDATE, + + NL80211_CMD_GET_WOWLAN, + NL80211_CMD_SET_WOWLAN, + + NL80211_CMD_START_SCHED_SCAN, + NL80211_CMD_STOP_SCHED_SCAN, + NL80211_CMD_SCHED_SCAN_RESULTS, + NL80211_CMD_SCHED_SCAN_STOPPED, + + NL80211_CMD_SET_REKEY_OFFLOAD, + + NL80211_CMD_PMKSA_CANDIDATE, + + NL80211_CMD_TDLS_OPER, + NL80211_CMD_TDLS_MGMT, + + NL80211_CMD_UNEXPECTED_FRAME, + + NL80211_CMD_PROBE_CLIENT, + + NL80211_CMD_REGISTER_BEACONS, + + NL80211_CMD_UNEXPECTED_4ADDR_FRAME, + + NL80211_CMD_SET_NOACK_MAP, + + NL80211_CMD_CH_SWITCH_NOTIFY, + + NL80211_CMD_START_P2P_DEVICE, + NL80211_CMD_STOP_P2P_DEVICE, + + NL80211_CMD_CONN_FAILED, + + NL80211_CMD_SET_MCAST_RATE, + + NL80211_CMD_SET_MAC_ACL, + + NL80211_CMD_RADAR_DETECT, + + NL80211_CMD_GET_PROTOCOL_FEATURES, + + NL80211_CMD_UPDATE_FT_IES, + NL80211_CMD_FT_EVENT, + + NL80211_CMD_CRIT_PROTOCOL_START, + NL80211_CMD_CRIT_PROTOCOL_STOP, + + NL80211_CMD_GET_COALESCE, + NL80211_CMD_SET_COALESCE, + + NL80211_CMD_CHANNEL_SWITCH, + + NL80211_CMD_VENDOR, + + NL80211_CMD_SET_QOS_MAP, + + NL80211_CMD_ADD_TX_TS, + NL80211_CMD_DEL_TX_TS, + + NL80211_CMD_GET_MPP, + + NL80211_CMD_JOIN_OCB, + NL80211_CMD_LEAVE_OCB, + + NL80211_CMD_CH_SWITCH_STARTED_NOTIFY, + + NL80211_CMD_TDLS_CHANNEL_SWITCH, + NL80211_CMD_TDLS_CANCEL_CHANNEL_SWITCH, + + NL80211_CMD_WIPHY_REG_CHANGE, + + NL80211_CMD_ABORT_SCAN, + + NL80211_CMD_START_NAN, + NL80211_CMD_STOP_NAN, + NL80211_CMD_ADD_NAN_FUNCTION, + NL80211_CMD_DEL_NAN_FUNCTION, + NL80211_CMD_CHANGE_NAN_CONFIG, + NL80211_CMD_NAN_MATCH, + + NL80211_CMD_SET_MULTICAST_TO_UNICAST, + + NL80211_CMD_UPDATE_CONNECT_PARAMS, + + NL80211_CMD_SET_PMK, + NL80211_CMD_DEL_PMK, + + NL80211_CMD_PORT_AUTHORIZED, + + NL80211_CMD_RELOAD_REGDB, + + NL80211_CMD_EXTERNAL_AUTH, + + NL80211_CMD_STA_OPMODE_CHANGED, + + NL80211_CMD_CONTROL_PORT_FRAME, + + NL80211_CMD_GET_FTM_RESPONDER_STATS, + + NL80211_CMD_PEER_MEASUREMENT_START, + NL80211_CMD_PEER_MEASUREMENT_RESULT, + NL80211_CMD_PEER_MEASUREMENT_COMPLETE, + + NL80211_CMD_NOTIFY_RADAR, + + NL80211_CMD_UPDATE_OWE_INFO, + + NL80211_CMD_PROBE_MESH_LINK, + + NL80211_CMD_SET_TID_CONFIG, + + NL80211_CMD_UNPROT_BEACON, + + NL80211_CMD_CONTROL_PORT_FRAME_TX_STATUS, + + NL80211_CMD_SET_SAR_SPECS, + + NL80211_CMD_OBSS_COLOR_COLLISION, + + NL80211_CMD_COLOR_CHANGE_REQUEST, + + NL80211_CMD_COLOR_CHANGE_STARTED, + NL80211_CMD_COLOR_CHANGE_ABORTED, + NL80211_CMD_COLOR_CHANGE_COMPLETED, + + NL80211_CMD_SET_FILS_AAD, + + NL80211_CMD_ASSOC_COMEBACK, + + NL80211_CMD_ADD_LINK, + NL80211_CMD_REMOVE_LINK, + + NL80211_CMD_ADD_LINK_STA, + NL80211_CMD_MODIFY_LINK_STA, + NL80211_CMD_REMOVE_LINK_STA, + + /* add new commands above here */ + + /* used to define NL80211_CMD_MAX below */ + __NL80211_CMD_AFTER_LAST, + NL80211_CMD_MAX = __NL80211_CMD_AFTER_LAST - 1 +}; + +/* + * Allow user space programs to use #ifdef on new commands by defining them + * here + */ +#define NL80211_CMD_SET_BSS NL80211_CMD_SET_BSS +#define NL80211_CMD_SET_MGMT_EXTRA_IE NL80211_CMD_SET_MGMT_EXTRA_IE +#define NL80211_CMD_REG_CHANGE NL80211_CMD_REG_CHANGE +#define NL80211_CMD_AUTHENTICATE NL80211_CMD_AUTHENTICATE +#define NL80211_CMD_ASSOCIATE NL80211_CMD_ASSOCIATE +#define NL80211_CMD_DEAUTHENTICATE NL80211_CMD_DEAUTHENTICATE +#define NL80211_CMD_DISASSOCIATE NL80211_CMD_DISASSOCIATE +#define NL80211_CMD_REG_BEACON_HINT NL80211_CMD_REG_BEACON_HINT + +#define NL80211_ATTR_FEATURE_FLAGS NL80211_ATTR_FEATURE_FLAGS + +/* source-level API compatibility */ +#define NL80211_CMD_GET_MESH_PARAMS NL80211_CMD_GET_MESH_CONFIG +#define NL80211_CMD_SET_MESH_PARAMS NL80211_CMD_SET_MESH_CONFIG +#define NL80211_MESH_SETUP_VENDOR_PATH_SEL_IE NL80211_MESH_SETUP_IE + +/** + * enum nl80211_attrs - nl80211 netlink attributes + * + * @NL80211_ATTR_UNSPEC: unspecified attribute to catch errors + * + * @NL80211_ATTR_WIPHY: index of wiphy to operate on, cf. + * /sys/class/ieee80211//index + * @NL80211_ATTR_WIPHY_NAME: wiphy name (used for renaming) + * @NL80211_ATTR_WIPHY_TXQ_PARAMS: a nested array of TX queue parameters + * @NL80211_ATTR_WIPHY_FREQ: frequency of the selected channel in MHz, + * defines the channel together with the (deprecated) + * %NL80211_ATTR_WIPHY_CHANNEL_TYPE attribute or the attributes + * %NL80211_ATTR_CHANNEL_WIDTH and if needed %NL80211_ATTR_CENTER_FREQ1 + * and %NL80211_ATTR_CENTER_FREQ2 + * @NL80211_ATTR_CHANNEL_WIDTH: u32 attribute containing one of the values + * of &enum nl80211_chan_width, describing the channel width. See the + * documentation of the enum for more information. + * @NL80211_ATTR_CENTER_FREQ1: Center frequency of the first part of the + * channel, used for anything but 20 MHz bandwidth. In S1G this is the + * operating channel center frequency. + * @NL80211_ATTR_CENTER_FREQ2: Center frequency of the second part of the + * channel, used only for 80+80 MHz bandwidth + * @NL80211_ATTR_WIPHY_CHANNEL_TYPE: included with NL80211_ATTR_WIPHY_FREQ + * if HT20 or HT40 are to be used (i.e., HT disabled if not included): + * NL80211_CHAN_NO_HT = HT not allowed (i.e., same as not including + * this attribute) + * NL80211_CHAN_HT20 = HT20 only + * NL80211_CHAN_HT40MINUS = secondary channel is below the primary channel + * NL80211_CHAN_HT40PLUS = secondary channel is above the primary channel + * This attribute is now deprecated. + * @NL80211_ATTR_WIPHY_RETRY_SHORT: TX retry limit for frames whose length is + * less than or equal to the RTS threshold; allowed range: 1..255; + * dot11ShortRetryLimit; u8 + * @NL80211_ATTR_WIPHY_RETRY_LONG: TX retry limit for frames whose length is + * greater than the RTS threshold; allowed range: 1..255; + * dot11ShortLongLimit; u8 + * @NL80211_ATTR_WIPHY_FRAG_THRESHOLD: fragmentation threshold, i.e., maximum + * length in octets for frames; allowed range: 256..8000, disable + * fragmentation with (u32)-1; dot11FragmentationThreshold; u32 + * @NL80211_ATTR_WIPHY_RTS_THRESHOLD: RTS threshold (TX frames with length + * larger than or equal to this use RTS/CTS handshake); allowed range: + * 0..65536, disable with (u32)-1; dot11RTSThreshold; u32 + * @NL80211_ATTR_WIPHY_COVERAGE_CLASS: Coverage Class as defined by IEEE 802.11 + * section 7.3.2.9; dot11CoverageClass; u8 + * + * @NL80211_ATTR_IFINDEX: network interface index of the device to operate on + * @NL80211_ATTR_IFNAME: network interface name + * @NL80211_ATTR_IFTYPE: type of virtual interface, see &enum nl80211_iftype + * + * @NL80211_ATTR_WDEV: wireless device identifier, used for pseudo-devices + * that don't have a netdev (u64) + * + * @NL80211_ATTR_MAC: MAC address (various uses) + * + * @NL80211_ATTR_KEY_DATA: (temporal) key data; for TKIP this consists of + * 16 bytes encryption key followed by 8 bytes each for TX and RX MIC + * keys + * @NL80211_ATTR_KEY_IDX: key ID (u8, 0-3) + * @NL80211_ATTR_KEY_CIPHER: key cipher suite (u32, as defined by IEEE 802.11 + * section 7.3.2.25.1, e.g. 0x000FAC04) + * @NL80211_ATTR_KEY_SEQ: transmit key sequence number (IV/PN) for TKIP and + * CCMP keys, each six bytes in little endian + * @NL80211_ATTR_KEY_DEFAULT: Flag attribute indicating the key is default key + * @NL80211_ATTR_KEY_DEFAULT_MGMT: Flag attribute indicating the key is the + * default management key + * @NL80211_ATTR_CIPHER_SUITES_PAIRWISE: For crypto settings for connect or + * other commands, indicates which pairwise cipher suites are used + * @NL80211_ATTR_CIPHER_SUITE_GROUP: For crypto settings for connect or + * other commands, indicates which group cipher suite is used + * + * @NL80211_ATTR_BEACON_INTERVAL: beacon interval in TU + * @NL80211_ATTR_DTIM_PERIOD: DTIM period for beaconing + * @NL80211_ATTR_BEACON_HEAD: portion of the beacon before the TIM IE + * @NL80211_ATTR_BEACON_TAIL: portion of the beacon after the TIM IE + * + * @NL80211_ATTR_STA_AID: Association ID for the station (u16) + * @NL80211_ATTR_STA_FLAGS: flags, nested element with NLA_FLAG attributes of + * &enum nl80211_sta_flags (deprecated, use %NL80211_ATTR_STA_FLAGS2) + * @NL80211_ATTR_STA_LISTEN_INTERVAL: listen interval as defined by + * IEEE 802.11 7.3.1.6 (u16). + * @NL80211_ATTR_STA_SUPPORTED_RATES: supported rates, array of supported + * rates as defined by IEEE 802.11 7.3.2.2 but without the length + * restriction (at most %NL80211_MAX_SUPP_RATES). + * @NL80211_ATTR_STA_VLAN: interface index of VLAN interface to move station + * to, or the AP interface the station was originally added to. + * @NL80211_ATTR_STA_INFO: information about a station, part of station info + * given for %NL80211_CMD_GET_STATION, nested attribute containing + * info as possible, see &enum nl80211_sta_info. + * + * @NL80211_ATTR_WIPHY_BANDS: Information about an operating bands, + * consisting of a nested array. + * + * @NL80211_ATTR_MESH_ID: mesh id (1-32 bytes). + * @NL80211_ATTR_STA_PLINK_ACTION: action to perform on the mesh peer link + * (see &enum nl80211_plink_action). + * @NL80211_ATTR_MPATH_NEXT_HOP: MAC address of the next hop for a mesh path. + * @NL80211_ATTR_MPATH_INFO: information about a mesh_path, part of mesh path + * info given for %NL80211_CMD_GET_MPATH, nested attribute described at + * &enum nl80211_mpath_info. + * + * @NL80211_ATTR_MNTR_FLAGS: flags, nested element with NLA_FLAG attributes of + * &enum nl80211_mntr_flags. + * + * @NL80211_ATTR_REG_ALPHA2: an ISO-3166-alpha2 country code for which the + * current regulatory domain should be set to or is already set to. + * For example, 'CR', for Costa Rica. This attribute is used by the kernel + * to query the CRDA to retrieve one regulatory domain. This attribute can + * also be used by userspace to query the kernel for the currently set + * regulatory domain. We chose an alpha2 as that is also used by the + * IEEE-802.11 country information element to identify a country. + * Users can also simply ask the wireless core to set regulatory domain + * to a specific alpha2. + * @NL80211_ATTR_REG_RULES: a nested array of regulatory domain regulatory + * rules. + * + * @NL80211_ATTR_BSS_CTS_PROT: whether CTS protection is enabled (u8, 0 or 1) + * @NL80211_ATTR_BSS_SHORT_PREAMBLE: whether short preamble is enabled + * (u8, 0 or 1) + * @NL80211_ATTR_BSS_SHORT_SLOT_TIME: whether short slot time enabled + * (u8, 0 or 1) + * @NL80211_ATTR_BSS_BASIC_RATES: basic rates, array of basic + * rates in format defined by IEEE 802.11 7.3.2.2 but without the length + * restriction (at most %NL80211_MAX_SUPP_RATES). + * + * @NL80211_ATTR_HT_CAPABILITY: HT Capability information element (from + * association request when used with NL80211_CMD_NEW_STATION) + * + * @NL80211_ATTR_SUPPORTED_IFTYPES: nested attribute containing all + * supported interface types, each a flag attribute with the number + * of the interface mode. + * + * @NL80211_ATTR_MGMT_SUBTYPE: Management frame subtype for + * %NL80211_CMD_SET_MGMT_EXTRA_IE. + * + * @NL80211_ATTR_IE: Information element(s) data (used, e.g., with + * %NL80211_CMD_SET_MGMT_EXTRA_IE). + * + * @NL80211_ATTR_MAX_NUM_SCAN_SSIDS: number of SSIDs you can scan with + * a single scan request, a wiphy attribute. + * @NL80211_ATTR_MAX_NUM_SCHED_SCAN_SSIDS: number of SSIDs you can + * scan with a single scheduled scan request, a wiphy attribute. + * @NL80211_ATTR_MAX_SCAN_IE_LEN: maximum length of information elements + * that can be added to a scan request + * @NL80211_ATTR_MAX_SCHED_SCAN_IE_LEN: maximum length of information + * elements that can be added to a scheduled scan request + * @NL80211_ATTR_MAX_MATCH_SETS: maximum number of sets that can be + * used with @NL80211_ATTR_SCHED_SCAN_MATCH, a wiphy attribute. + * + * @NL80211_ATTR_SCAN_FREQUENCIES: nested attribute with frequencies (in MHz) + * @NL80211_ATTR_SCAN_SSIDS: nested attribute with SSIDs, leave out for passive + * scanning and include a zero-length SSID (wildcard) for wildcard scan + * @NL80211_ATTR_BSS: scan result BSS + * + * @NL80211_ATTR_REG_INITIATOR: indicates who requested the regulatory domain + * currently in effect. This could be any of the %NL80211_REGDOM_SET_BY_* + * @NL80211_ATTR_REG_TYPE: indicates the type of the regulatory domain currently + * set. This can be one of the nl80211_reg_type (%NL80211_REGDOM_TYPE_*) + * + * @NL80211_ATTR_SUPPORTED_COMMANDS: wiphy attribute that specifies + * an array of command numbers (i.e. a mapping index to command number) + * that the driver for the given wiphy supports. + * + * @NL80211_ATTR_FRAME: frame data (binary attribute), including frame header + * and body, but not FCS; used, e.g., with NL80211_CMD_AUTHENTICATE and + * NL80211_CMD_ASSOCIATE events + * @NL80211_ATTR_SSID: SSID (binary attribute, 0..32 octets) + * @NL80211_ATTR_AUTH_TYPE: AuthenticationType, see &enum nl80211_auth_type, + * represented as a u32 + * @NL80211_ATTR_REASON_CODE: ReasonCode for %NL80211_CMD_DEAUTHENTICATE and + * %NL80211_CMD_DISASSOCIATE, u16 + * + * @NL80211_ATTR_KEY_TYPE: Key Type, see &enum nl80211_key_type, represented as + * a u32 + * + * @NL80211_ATTR_FREQ_BEFORE: A channel which has suffered a regulatory change + * due to considerations from a beacon hint. This attribute reflects + * the state of the channel _before_ the beacon hint processing. This + * attributes consists of a nested attribute containing + * NL80211_FREQUENCY_ATTR_* + * @NL80211_ATTR_FREQ_AFTER: A channel which has suffered a regulatory change + * due to considerations from a beacon hint. This attribute reflects + * the state of the channel _after_ the beacon hint processing. This + * attributes consists of a nested attribute containing + * NL80211_FREQUENCY_ATTR_* + * + * @NL80211_ATTR_CIPHER_SUITES: a set of u32 values indicating the supported + * cipher suites + * + * @NL80211_ATTR_FREQ_FIXED: a flag indicating the IBSS should not try to look + * for other networks on different channels + * + * @NL80211_ATTR_TIMED_OUT: a flag indicating than an operation timed out; this + * is used, e.g., with %NL80211_CMD_AUTHENTICATE event + * + * @NL80211_ATTR_USE_MFP: Whether management frame protection (IEEE 802.11w) is + * used for the association (&enum nl80211_mfp, represented as a u32); + * this attribute can be used with %NL80211_CMD_ASSOCIATE and + * %NL80211_CMD_CONNECT requests. %NL80211_MFP_OPTIONAL is not allowed for + * %NL80211_CMD_ASSOCIATE since user space SME is expected and hence, it + * must have decided whether to use management frame protection or not. + * Setting %NL80211_MFP_OPTIONAL with a %NL80211_CMD_CONNECT request will + * let the driver (or the firmware) decide whether to use MFP or not. + * + * @NL80211_ATTR_STA_FLAGS2: Attribute containing a + * &struct nl80211_sta_flag_update. + * + * @NL80211_ATTR_CONTROL_PORT: A flag indicating whether user space controls + * IEEE 802.1X port, i.e., sets/clears %NL80211_STA_FLAG_AUTHORIZED, in + * station mode. If the flag is included in %NL80211_CMD_ASSOCIATE + * request, the driver will assume that the port is unauthorized until + * authorized by user space. Otherwise, port is marked authorized by + * default in station mode. + * @NL80211_ATTR_CONTROL_PORT_ETHERTYPE: A 16-bit value indicating the + * ethertype that will be used for key negotiation. It can be + * specified with the associate and connect commands. If it is not + * specified, the value defaults to 0x888E (PAE, 802.1X). This + * attribute is also used as a flag in the wiphy information to + * indicate that protocols other than PAE are supported. + * @NL80211_ATTR_CONTROL_PORT_NO_ENCRYPT: When included along with + * %NL80211_ATTR_CONTROL_PORT_ETHERTYPE, indicates that the custom + * ethertype frames used for key negotiation must not be encrypted. + * @NL80211_ATTR_CONTROL_PORT_OVER_NL80211: A flag indicating whether control + * port frames (e.g. of type given in %NL80211_ATTR_CONTROL_PORT_ETHERTYPE) + * will be sent directly to the network interface or sent via the NL80211 + * socket. If this attribute is missing, then legacy behavior of sending + * control port frames directly to the network interface is used. If the + * flag is included, then control port frames are sent over NL80211 instead + * using %CMD_CONTROL_PORT_FRAME. If control port routing over NL80211 is + * to be used then userspace must also use the %NL80211_ATTR_SOCKET_OWNER + * flag. When used with %NL80211_ATTR_CONTROL_PORT_NO_PREAUTH, pre-auth + * frames are not forwared over the control port. + * + * @NL80211_ATTR_TESTDATA: Testmode data blob, passed through to the driver. + * We recommend using nested, driver-specific attributes within this. + * + * @NL80211_ATTR_DISCONNECTED_BY_AP: A flag indicating that the DISCONNECT + * event was due to the AP disconnecting the station, and not due to + * a local disconnect request. + * @NL80211_ATTR_STATUS_CODE: StatusCode for the %NL80211_CMD_CONNECT + * event (u16) + * @NL80211_ATTR_PRIVACY: Flag attribute, used with connect(), indicating + * that protected APs should be used. This is also used with NEW_BEACON to + * indicate that the BSS is to use protection. + * + * @NL80211_ATTR_CIPHERS_PAIRWISE: Used with CONNECT, ASSOCIATE, and NEW_BEACON + * to indicate which unicast key ciphers will be used with the connection + * (an array of u32). + * @NL80211_ATTR_CIPHER_GROUP: Used with CONNECT, ASSOCIATE, and NEW_BEACON to + * indicate which group key cipher will be used with the connection (a + * u32). + * @NL80211_ATTR_WPA_VERSIONS: Used with CONNECT, ASSOCIATE, and NEW_BEACON to + * indicate which WPA version(s) the AP we want to associate with is using + * (a u32 with flags from &enum nl80211_wpa_versions). + * @NL80211_ATTR_AKM_SUITES: Used with CONNECT, ASSOCIATE, and NEW_BEACON to + * indicate which key management algorithm(s) to use (an array of u32). + * This attribute is also sent in response to @NL80211_CMD_GET_WIPHY, + * indicating the supported AKM suites, intended for specific drivers which + * implement SME and have constraints on which AKMs are supported and also + * the cases where an AKM support is offloaded to the driver/firmware. + * If there is no such notification from the driver, user space should + * assume the driver supports all the AKM suites. + * + * @NL80211_ATTR_REQ_IE: (Re)association request information elements as + * sent out by the card, for ROAM and successful CONNECT events. + * @NL80211_ATTR_RESP_IE: (Re)association response information elements as + * sent by peer, for ROAM and successful CONNECT events. + * + * @NL80211_ATTR_PREV_BSSID: previous BSSID, to be used in ASSOCIATE and CONNECT + * commands to specify a request to reassociate within an ESS, i.e., to use + * Reassociate Request frame (with the value of this attribute in the + * Current AP address field) instead of Association Request frame which is + * used for the initial association to an ESS. + * + * @NL80211_ATTR_KEY: key information in a nested attribute with + * %NL80211_KEY_* sub-attributes + * @NL80211_ATTR_KEYS: array of keys for static WEP keys for connect() + * and join_ibss(), key information is in a nested attribute each + * with %NL80211_KEY_* sub-attributes + * + * @NL80211_ATTR_PID: Process ID of a network namespace. + * + * @NL80211_ATTR_GENERATION: Used to indicate consistent snapshots for + * dumps. This number increases whenever the object list being + * dumped changes, and as such userspace can verify that it has + * obtained a complete and consistent snapshot by verifying that + * all dump messages contain the same generation number. If it + * changed then the list changed and the dump should be repeated + * completely from scratch. + * + * @NL80211_ATTR_4ADDR: Use 4-address frames on a virtual interface + * + * @NL80211_ATTR_SURVEY_INFO: survey information about a channel, part of + * the survey response for %NL80211_CMD_GET_SURVEY, nested attribute + * containing info as possible, see &enum survey_info. + * + * @NL80211_ATTR_PMKID: PMK material for PMKSA caching. + * @NL80211_ATTR_MAX_NUM_PMKIDS: maximum number of PMKIDs a firmware can + * cache, a wiphy attribute. + * + * @NL80211_ATTR_DURATION: Duration of an operation in milliseconds, u32. + * @NL80211_ATTR_MAX_REMAIN_ON_CHANNEL_DURATION: Device attribute that + * specifies the maximum duration that can be requested with the + * remain-on-channel operation, in milliseconds, u32. + * + * @NL80211_ATTR_COOKIE: Generic 64-bit cookie to identify objects. + * + * @NL80211_ATTR_TX_RATES: Nested set of attributes + * (enum nl80211_tx_rate_attributes) describing TX rates per band. The + * enum nl80211_band value is used as the index (nla_type() of the nested + * data. If a band is not included, it will be configured to allow all + * rates based on negotiated supported rates information. This attribute + * is used with %NL80211_CMD_SET_TX_BITRATE_MASK and with starting AP, + * and joining mesh networks (not IBSS yet). In the later case, it must + * specify just a single bitrate, which is to be used for the beacon. + * The driver must also specify support for this with the extended + * features NL80211_EXT_FEATURE_BEACON_RATE_LEGACY, + * NL80211_EXT_FEATURE_BEACON_RATE_HT, + * NL80211_EXT_FEATURE_BEACON_RATE_VHT and + * NL80211_EXT_FEATURE_BEACON_RATE_HE. + * + * @NL80211_ATTR_FRAME_MATCH: A binary attribute which typically must contain + * at least one byte, currently used with @NL80211_CMD_REGISTER_FRAME. + * @NL80211_ATTR_FRAME_TYPE: A u16 indicating the frame type/subtype for the + * @NL80211_CMD_REGISTER_FRAME command. + * @NL80211_ATTR_TX_FRAME_TYPES: wiphy capability attribute, which is a + * nested attribute of %NL80211_ATTR_FRAME_TYPE attributes, containing + * information about which frame types can be transmitted with + * %NL80211_CMD_FRAME. + * @NL80211_ATTR_RX_FRAME_TYPES: wiphy capability attribute, which is a + * nested attribute of %NL80211_ATTR_FRAME_TYPE attributes, containing + * information about which frame types can be registered for RX. + * + * @NL80211_ATTR_ACK: Flag attribute indicating that the frame was + * acknowledged by the recipient. + * + * @NL80211_ATTR_PS_STATE: powersave state, using &enum nl80211_ps_state values. + * + * @NL80211_ATTR_CQM: connection quality monitor configuration in a + * nested attribute with %NL80211_ATTR_CQM_* sub-attributes. + * + * @NL80211_ATTR_LOCAL_STATE_CHANGE: Flag attribute to indicate that a command + * is requesting a local authentication/association state change without + * invoking actual management frame exchange. This can be used with + * NL80211_CMD_AUTHENTICATE, NL80211_CMD_DEAUTHENTICATE, + * NL80211_CMD_DISASSOCIATE. + * + * @NL80211_ATTR_AP_ISOLATE: (AP mode) Do not forward traffic between stations + * connected to this BSS. + * + * @NL80211_ATTR_WIPHY_TX_POWER_SETTING: Transmit power setting type. See + * &enum nl80211_tx_power_setting for possible values. + * @NL80211_ATTR_WIPHY_TX_POWER_LEVEL: Transmit power level in signed mBm units. + * This is used in association with @NL80211_ATTR_WIPHY_TX_POWER_SETTING + * for non-automatic settings. + * + * @NL80211_ATTR_SUPPORT_IBSS_RSN: The device supports IBSS RSN, which mostly + * means support for per-station GTKs. + * + * @NL80211_ATTR_WIPHY_ANTENNA_TX: Bitmap of allowed antennas for transmitting. + * This can be used to mask out antennas which are not attached or should + * not be used for transmitting. If an antenna is not selected in this + * bitmap the hardware is not allowed to transmit on this antenna. + * + * Each bit represents one antenna, starting with antenna 1 at the first + * bit. Depending on which antennas are selected in the bitmap, 802.11n + * drivers can derive which chainmasks to use (if all antennas belonging to + * a particular chain are disabled this chain should be disabled) and if + * a chain has diversity antennas wether diversity should be used or not. + * HT capabilities (STBC, TX Beamforming, Antenna selection) can be + * derived from the available chains after applying the antenna mask. + * Non-802.11n drivers can derive wether to use diversity or not. + * Drivers may reject configurations or RX/TX mask combinations they cannot + * support by returning -EINVAL. + * + * @NL80211_ATTR_WIPHY_ANTENNA_RX: Bitmap of allowed antennas for receiving. + * This can be used to mask out antennas which are not attached or should + * not be used for receiving. If an antenna is not selected in this bitmap + * the hardware should not be configured to receive on this antenna. + * For a more detailed description see @NL80211_ATTR_WIPHY_ANTENNA_TX. + * + * @NL80211_ATTR_WIPHY_ANTENNA_AVAIL_TX: Bitmap of antennas which are available + * for configuration as TX antennas via the above parameters. + * + * @NL80211_ATTR_WIPHY_ANTENNA_AVAIL_RX: Bitmap of antennas which are available + * for configuration as RX antennas via the above parameters. + * + * @NL80211_ATTR_MCAST_RATE: Multicast tx rate (in 100 kbps) for IBSS + * + * @NL80211_ATTR_OFFCHANNEL_TX_OK: For management frame TX, the frame may be + * transmitted on another channel when the channel given doesn't match + * the current channel. If the current channel doesn't match and this + * flag isn't set, the frame will be rejected. This is also used as an + * nl80211 capability flag. + * + * @NL80211_ATTR_BSS_HT_OPMODE: HT operation mode (u16) + * + * @NL80211_ATTR_KEY_DEFAULT_TYPES: A nested attribute containing flags + * attributes, specifying what a key should be set as default as. + * See &enum nl80211_key_default_types. + * + * @NL80211_ATTR_MESH_SETUP: Optional mesh setup parameters. These cannot be + * changed once the mesh is active. + * @NL80211_ATTR_MESH_CONFIG: Mesh configuration parameters, a nested attribute + * containing attributes from &enum nl80211_meshconf_params. + * @NL80211_ATTR_SUPPORT_MESH_AUTH: Currently, this means the underlying driver + * allows auth frames in a mesh to be passed to userspace for processing via + * the @NL80211_MESH_SETUP_USERSPACE_AUTH flag. + * @NL80211_ATTR_STA_PLINK_STATE: The state of a mesh peer link as defined in + * &enum nl80211_plink_state. Used when userspace is driving the peer link + * management state machine. @NL80211_MESH_SETUP_USERSPACE_AMPE or + * @NL80211_MESH_SETUP_USERSPACE_MPM must be enabled. + * + * @NL80211_ATTR_WOWLAN_TRIGGERS_SUPPORTED: indicates, as part of the wiphy + * capabilities, the supported WoWLAN triggers + * @NL80211_ATTR_WOWLAN_TRIGGERS: used by %NL80211_CMD_SET_WOWLAN to + * indicate which WoW triggers should be enabled. This is also + * used by %NL80211_CMD_GET_WOWLAN to get the currently enabled WoWLAN + * triggers. + * + * @NL80211_ATTR_SCHED_SCAN_INTERVAL: Interval between scheduled scan + * cycles, in msecs. + * + * @NL80211_ATTR_SCHED_SCAN_MATCH: Nested attribute with one or more + * sets of attributes to match during scheduled scans. Only BSSs + * that match any of the sets will be reported. These are + * pass-thru filter rules. + * For a match to succeed, the BSS must match all attributes of a + * set. Since not every hardware supports matching all types of + * attributes, there is no guarantee that the reported BSSs are + * fully complying with the match sets and userspace needs to be + * able to ignore them by itself. + * Thus, the implementation is somewhat hardware-dependent, but + * this is only an optimization and the userspace application + * needs to handle all the non-filtered results anyway. + * If the match attributes don't make sense when combined with + * the values passed in @NL80211_ATTR_SCAN_SSIDS (eg. if an SSID + * is included in the probe request, but the match attributes + * will never let it go through), -EINVAL may be returned. + * If omitted, no filtering is done. + * + * @NL80211_ATTR_INTERFACE_COMBINATIONS: Nested attribute listing the supported + * interface combinations. In each nested item, it contains attributes + * defined in &enum nl80211_if_combination_attrs. + * @NL80211_ATTR_SOFTWARE_IFTYPES: Nested attribute (just like + * %NL80211_ATTR_SUPPORTED_IFTYPES) containing the interface types that + * are managed in software: interfaces of these types aren't subject to + * any restrictions in their number or combinations. + * + * @NL80211_ATTR_REKEY_DATA: nested attribute containing the information + * necessary for GTK rekeying in the device, see &enum nl80211_rekey_data. + * + * @NL80211_ATTR_SCAN_SUPP_RATES: rates per to be advertised as supported in scan, + * nested array attribute containing an entry for each band, with the entry + * being a list of supported rates as defined by IEEE 802.11 7.3.2.2 but + * without the length restriction (at most %NL80211_MAX_SUPP_RATES). + * + * @NL80211_ATTR_HIDDEN_SSID: indicates whether SSID is to be hidden from Beacon + * and Probe Response (when response to wildcard Probe Request); see + * &enum nl80211_hidden_ssid, represented as a u32 + * + * @NL80211_ATTR_IE_PROBE_RESP: Information element(s) for Probe Response frame. + * This is used with %NL80211_CMD_NEW_BEACON and %NL80211_CMD_SET_BEACON to + * provide extra IEs (e.g., WPS/P2P IE) into Probe Response frames when the + * driver (or firmware) replies to Probe Request frames. + * @NL80211_ATTR_IE_ASSOC_RESP: Information element(s) for (Re)Association + * Response frames. This is used with %NL80211_CMD_NEW_BEACON and + * %NL80211_CMD_SET_BEACON to provide extra IEs (e.g., WPS/P2P IE) into + * (Re)Association Response frames when the driver (or firmware) replies to + * (Re)Association Request frames. + * + * @NL80211_ATTR_STA_WME: Nested attribute containing the wme configuration + * of the station, see &enum nl80211_sta_wme_attr. + * @NL80211_ATTR_SUPPORT_AP_UAPSD: the device supports uapsd when working + * as AP. + * + * @NL80211_ATTR_ROAM_SUPPORT: Indicates whether the firmware is capable of + * roaming to another AP in the same ESS if the signal lever is low. + * + * @NL80211_ATTR_PMKSA_CANDIDATE: Nested attribute containing the PMKSA caching + * candidate information, see &enum nl80211_pmksa_candidate_attr. + * + * @NL80211_ATTR_TX_NO_CCK_RATE: Indicates whether to use CCK rate or not + * for management frames transmission. In order to avoid p2p probe/action + * frames are being transmitted at CCK rate in 2GHz band, the user space + * applications use this attribute. + * This attribute is used with %NL80211_CMD_TRIGGER_SCAN and + * %NL80211_CMD_FRAME commands. + * + * @NL80211_ATTR_TDLS_ACTION: Low level TDLS action code (e.g. link setup + * request, link setup confirm, link teardown, etc.). Values are + * described in the TDLS (802.11z) specification. + * @NL80211_ATTR_TDLS_DIALOG_TOKEN: Non-zero token for uniquely identifying a + * TDLS conversation between two devices. + * @NL80211_ATTR_TDLS_OPERATION: High level TDLS operation; see + * &enum nl80211_tdls_operation, represented as a u8. + * @NL80211_ATTR_TDLS_SUPPORT: A flag indicating the device can operate + * as a TDLS peer sta. + * @NL80211_ATTR_TDLS_EXTERNAL_SETUP: The TDLS discovery/setup and teardown + * procedures should be performed by sending TDLS packets via + * %NL80211_CMD_TDLS_MGMT. Otherwise %NL80211_CMD_TDLS_OPER should be + * used for asking the driver to perform a TDLS operation. + * + * @NL80211_ATTR_DEVICE_AP_SME: This u32 attribute may be listed for devices + * that have AP support to indicate that they have the AP SME integrated + * with support for the features listed in this attribute, see + * &enum nl80211_ap_sme_features. + * + * @NL80211_ATTR_DONT_WAIT_FOR_ACK: Used with %NL80211_CMD_FRAME, this tells + * the driver to not wait for an acknowledgement. Note that due to this, + * it will also not give a status callback nor return a cookie. This is + * mostly useful for probe responses to save airtime. + * + * @NL80211_ATTR_FEATURE_FLAGS: This u32 attribute contains flags from + * &enum nl80211_feature_flags and is advertised in wiphy information. + * @NL80211_ATTR_PROBE_RESP_OFFLOAD: Indicates that the HW responds to probe + * requests while operating in AP-mode. + * This attribute holds a bitmap of the supported protocols for + * offloading (see &enum nl80211_probe_resp_offload_support_attr). + * + * @NL80211_ATTR_PROBE_RESP: Probe Response template data. Contains the entire + * probe-response frame. The DA field in the 802.11 header is zero-ed out, + * to be filled by the FW. + * @NL80211_ATTR_DISABLE_HT: Force HT capable interfaces to disable + * this feature during association. This is a flag attribute. + * Currently only supported in mac80211 drivers. + * @NL80211_ATTR_DISABLE_VHT: Force VHT capable interfaces to disable + * this feature during association. This is a flag attribute. + * Currently only supported in mac80211 drivers. + * @NL80211_ATTR_DISABLE_HE: Force HE capable interfaces to disable + * this feature during association. This is a flag attribute. + * Currently only supported in mac80211 drivers. + * @NL80211_ATTR_HT_CAPABILITY_MASK: Specify which bits of the + * ATTR_HT_CAPABILITY to which attention should be paid. + * Currently, only mac80211 NICs support this feature. + * The values that may be configured are: + * MCS rates, MAX-AMSDU, HT-20-40 and HT_CAP_SGI_40 + * AMPDU density and AMPDU factor. + * All values are treated as suggestions and may be ignored + * by the driver as required. The actual values may be seen in + * the station debugfs ht_caps file. + * + * @NL80211_ATTR_DFS_REGION: region for regulatory rules which this country + * abides to when initiating radiation on DFS channels. A country maps + * to one DFS region. + * + * @NL80211_ATTR_NOACK_MAP: This u16 bitmap contains the No Ack Policy of + * up to 16 TIDs. + * + * @NL80211_ATTR_INACTIVITY_TIMEOUT: timeout value in seconds, this can be + * used by the drivers which has MLME in firmware and does not have support + * to report per station tx/rx activity to free up the station entry from + * the list. This needs to be used when the driver advertises the + * capability to timeout the stations. + * + * @NL80211_ATTR_RX_SIGNAL_DBM: signal strength in dBm (as a 32-bit int); + * this attribute is (depending on the driver capabilities) added to + * received frames indicated with %NL80211_CMD_FRAME. + * + * @NL80211_ATTR_BG_SCAN_PERIOD: Background scan period in seconds + * or 0 to disable background scan. + * + * @NL80211_ATTR_USER_REG_HINT_TYPE: type of regulatory hint passed from + * userspace. If unset it is assumed the hint comes directly from + * a user. If set code could specify exactly what type of source + * was used to provide the hint. For the different types of + * allowed user regulatory hints see nl80211_user_reg_hint_type. + * + * @NL80211_ATTR_CONN_FAILED_REASON: The reason for which AP has rejected + * the connection request from a station. nl80211_connect_failed_reason + * enum has different reasons of connection failure. + * + * @NL80211_ATTR_AUTH_DATA: Fields and elements in Authentication frames. + * This contains the authentication frame body (non-IE and IE data), + * excluding the Authentication algorithm number, i.e., starting at the + * Authentication transaction sequence number field. It is used with + * authentication algorithms that need special fields to be added into + * the frames (SAE and FILS). Currently, only the SAE cases use the + * initial two fields (Authentication transaction sequence number and + * Status code). However, those fields are included in the attribute data + * for all authentication algorithms to keep the attribute definition + * consistent. + * + * @NL80211_ATTR_VHT_CAPABILITY: VHT Capability information element (from + * association request when used with NL80211_CMD_NEW_STATION) + * + * @NL80211_ATTR_SCAN_FLAGS: scan request control flags (u32) + * + * @NL80211_ATTR_P2P_CTWINDOW: P2P GO Client Traffic Window (u8), used with + * the START_AP and SET_BSS commands + * @NL80211_ATTR_P2P_OPPPS: P2P GO opportunistic PS (u8), used with the + * START_AP and SET_BSS commands. This can have the values 0 or 1; + * if not given in START_AP 0 is assumed, if not given in SET_BSS + * no change is made. + * + * @NL80211_ATTR_LOCAL_MESH_POWER_MODE: local mesh STA link-specific power mode + * defined in &enum nl80211_mesh_power_mode. + * + * @NL80211_ATTR_ACL_POLICY: ACL policy, see &enum nl80211_acl_policy, + * carried in a u32 attribute + * + * @NL80211_ATTR_MAC_ADDRS: Array of nested MAC addresses, used for + * MAC ACL. + * + * @NL80211_ATTR_MAC_ACL_MAX: u32 attribute to advertise the maximum + * number of MAC addresses that a device can support for MAC + * ACL. + * + * @NL80211_ATTR_RADAR_EVENT: Type of radar event for notification to userspace, + * contains a value of enum nl80211_radar_event (u32). + * + * @NL80211_ATTR_EXT_CAPA: 802.11 extended capabilities that the kernel driver + * has and handles. The format is the same as the IE contents. See + * 802.11-2012 8.4.2.29 for more information. + * @NL80211_ATTR_EXT_CAPA_MASK: Extended capabilities that the kernel driver + * has set in the %NL80211_ATTR_EXT_CAPA value, for multibit fields. + * + * @NL80211_ATTR_STA_CAPABILITY: Station capabilities (u16) are advertised to + * the driver, e.g., to enable TDLS power save (PU-APSD). + * + * @NL80211_ATTR_STA_EXT_CAPABILITY: Station extended capabilities are + * advertised to the driver, e.g., to enable TDLS off channel operations + * and PU-APSD. + * + * @NL80211_ATTR_PROTOCOL_FEATURES: global nl80211 feature flags, see + * &enum nl80211_protocol_features, the attribute is a u32. + * + * @NL80211_ATTR_SPLIT_WIPHY_DUMP: flag attribute, userspace supports + * receiving the data for a single wiphy split across multiple + * messages, given with wiphy dump message + * + * @NL80211_ATTR_MDID: Mobility Domain Identifier + * + * @NL80211_ATTR_IE_RIC: Resource Information Container Information + * Element + * + * @NL80211_ATTR_CRIT_PROT_ID: critical protocol identifier requiring increased + * reliability, see &enum nl80211_crit_proto_id (u16). + * @NL80211_ATTR_MAX_CRIT_PROT_DURATION: duration in milliseconds in which + * the connection should have increased reliability (u16). + * + * @NL80211_ATTR_PEER_AID: Association ID for the peer TDLS station (u16). + * This is similar to @NL80211_ATTR_STA_AID but with a difference of being + * allowed to be used with the first @NL80211_CMD_SET_STATION command to + * update a TDLS peer STA entry. + * + * @NL80211_ATTR_COALESCE_RULE: Coalesce rule information. + * + * @NL80211_ATTR_CH_SWITCH_COUNT: u32 attribute specifying the number of TBTT's + * until the channel switch event. + * @NL80211_ATTR_CH_SWITCH_BLOCK_TX: flag attribute specifying that transmission + * must be blocked on the current channel (before the channel switch + * operation). Also included in the channel switch started event if quiet + * was requested by the AP. + * @NL80211_ATTR_CSA_IES: Nested set of attributes containing the IE information + * for the time while performing a channel switch. + * @NL80211_ATTR_CNTDWN_OFFS_BEACON: An array of offsets (u16) to the channel + * switch or color change counters in the beacons tail (%NL80211_ATTR_BEACON_TAIL). + * @NL80211_ATTR_CNTDWN_OFFS_PRESP: An array of offsets (u16) to the channel + * switch or color change counters in the probe response (%NL80211_ATTR_PROBE_RESP). + * + * @NL80211_ATTR_RXMGMT_FLAGS: flags for nl80211_send_mgmt(), u32. + * As specified in the &enum nl80211_rxmgmt_flags. + * + * @NL80211_ATTR_STA_SUPPORTED_CHANNELS: array of supported channels. + * + * @NL80211_ATTR_STA_SUPPORTED_OPER_CLASSES: array of supported + * operating classes. + * + * @NL80211_ATTR_HANDLE_DFS: A flag indicating whether user space + * controls DFS operation in IBSS mode. If the flag is included in + * %NL80211_CMD_JOIN_IBSS request, the driver will allow use of DFS + * channels and reports radar events to userspace. Userspace is required + * to react to radar events, e.g. initiate a channel switch or leave the + * IBSS network. + * + * @NL80211_ATTR_SUPPORT_5_MHZ: A flag indicating that the device supports + * 5 MHz channel bandwidth. + * @NL80211_ATTR_SUPPORT_10_MHZ: A flag indicating that the device supports + * 10 MHz channel bandwidth. + * + * @NL80211_ATTR_OPMODE_NOTIF: Operating mode field from Operating Mode + * Notification Element based on association request when used with + * %NL80211_CMD_NEW_STATION or %NL80211_CMD_SET_STATION (only when + * %NL80211_FEATURE_FULL_AP_CLIENT_STATE is supported, or with TDLS); + * u8 attribute. + * + * @NL80211_ATTR_VENDOR_ID: The vendor ID, either a 24-bit OUI or, if + * %NL80211_VENDOR_ID_IS_LINUX is set, a special Linux ID (not used yet) + * @NL80211_ATTR_VENDOR_SUBCMD: vendor sub-command + * @NL80211_ATTR_VENDOR_DATA: data for the vendor command, if any; this + * attribute is also used for vendor command feature advertisement + * @NL80211_ATTR_VENDOR_EVENTS: used for event list advertising in the wiphy + * info, containing a nested array of possible events + * + * @NL80211_ATTR_QOS_MAP: IP DSCP mapping for Interworking QoS mapping. This + * data is in the format defined for the payload of the QoS Map Set element + * in IEEE Std 802.11-2012, 8.4.2.97. + * + * @NL80211_ATTR_MAC_HINT: MAC address recommendation as initial BSS + * @NL80211_ATTR_WIPHY_FREQ_HINT: frequency of the recommended initial BSS + * + * @NL80211_ATTR_MAX_AP_ASSOC_STA: Device attribute that indicates how many + * associated stations are supported in AP mode (including P2P GO); u32. + * Since drivers may not have a fixed limit on the maximum number (e.g., + * other concurrent operations may affect this), drivers are allowed to + * advertise values that cannot always be met. In such cases, an attempt + * to add a new station entry with @NL80211_CMD_NEW_STATION may fail. + * + * @NL80211_ATTR_CSA_C_OFFSETS_TX: An array of csa counter offsets (u16) which + * should be updated when the frame is transmitted. + * @NL80211_ATTR_MAX_CSA_COUNTERS: U8 attribute used to advertise the maximum + * supported number of csa counters. + * + * @NL80211_ATTR_TDLS_PEER_CAPABILITY: flags for TDLS peer capabilities, u32. + * As specified in the &enum nl80211_tdls_peer_capability. + * + * @NL80211_ATTR_SOCKET_OWNER: Flag attribute, if set during interface + * creation then the new interface will be owned by the netlink socket + * that created it and will be destroyed when the socket is closed. + * If set during scheduled scan start then the new scan req will be + * owned by the netlink socket that created it and the scheduled scan will + * be stopped when the socket is closed. + * If set during configuration of regulatory indoor operation then the + * regulatory indoor configuration would be owned by the netlink socket + * that configured the indoor setting, and the indoor operation would be + * cleared when the socket is closed. + * If set during NAN interface creation, the interface will be destroyed + * if the socket is closed just like any other interface. Moreover, NAN + * notifications will be sent in unicast to that socket. Without this + * attribute, the notifications will be sent to the %NL80211_MCGRP_NAN + * multicast group. + * If set during %NL80211_CMD_ASSOCIATE or %NL80211_CMD_CONNECT the + * station will deauthenticate when the socket is closed. + * If set during %NL80211_CMD_JOIN_IBSS the IBSS will be automatically + * torn down when the socket is closed. + * If set during %NL80211_CMD_JOIN_MESH the mesh setup will be + * automatically torn down when the socket is closed. + * If set during %NL80211_CMD_START_AP the AP will be automatically + * disabled when the socket is closed. + * + * @NL80211_ATTR_TDLS_INITIATOR: flag attribute indicating the current end is + * the TDLS link initiator. + * + * @NL80211_ATTR_USE_RRM: flag for indicating whether the current connection + * shall support Radio Resource Measurements (11k). This attribute can be + * used with %NL80211_CMD_ASSOCIATE and %NL80211_CMD_CONNECT requests. + * User space applications are expected to use this flag only if the + * underlying device supports these minimal RRM features: + * %NL80211_FEATURE_DS_PARAM_SET_IE_IN_PROBES, + * %NL80211_FEATURE_QUIET, + * Or, if global RRM is supported, see: + * %NL80211_EXT_FEATURE_RRM + * If this flag is used, driver must add the Power Capabilities IE to the + * association request. In addition, it must also set the RRM capability + * flag in the association request's Capability Info field. + * + * @NL80211_ATTR_WIPHY_DYN_ACK: flag attribute used to enable ACK timeout + * estimation algorithm (dynack). In order to activate dynack + * %NL80211_FEATURE_ACKTO_ESTIMATION feature flag must be set by lower + * drivers to indicate dynack capability. Dynack is automatically disabled + * setting valid value for coverage class. + * + * @NL80211_ATTR_TSID: a TSID value (u8 attribute) + * @NL80211_ATTR_USER_PRIO: user priority value (u8 attribute) + * @NL80211_ATTR_ADMITTED_TIME: admitted time in units of 32 microseconds + * (per second) (u16 attribute) + * + * @NL80211_ATTR_SMPS_MODE: SMPS mode to use (ap mode). see + * &enum nl80211_smps_mode. + * + * @NL80211_ATTR_OPER_CLASS: operating class + * + * @NL80211_ATTR_MAC_MASK: MAC address mask + * + * @NL80211_ATTR_WIPHY_SELF_MANAGED_REG: flag attribute indicating this device + * is self-managing its regulatory information and any regulatory domain + * obtained from it is coming from the device's wiphy and not the global + * cfg80211 regdomain. + * + * @NL80211_ATTR_EXT_FEATURES: extended feature flags contained in a byte + * array. The feature flags are identified by their bit index (see &enum + * nl80211_ext_feature_index). The bit index is ordered starting at the + * least-significant bit of the first byte in the array, ie. bit index 0 + * is located at bit 0 of byte 0. bit index 25 would be located at bit 1 + * of byte 3 (u8 array). + * + * @NL80211_ATTR_SURVEY_RADIO_STATS: Request overall radio statistics to be + * returned along with other survey data. If set, @NL80211_CMD_GET_SURVEY + * may return a survey entry without a channel indicating global radio + * statistics (only some values are valid and make sense.) + * For devices that don't return such an entry even then, the information + * should be contained in the result as the sum of the respective counters + * over all channels. + * + * @NL80211_ATTR_SCHED_SCAN_DELAY: delay before the first cycle of a + * scheduled scan is started. Or the delay before a WoWLAN + * net-detect scan is started, counting from the moment the + * system is suspended. This value is a u32, in seconds. + + * @NL80211_ATTR_REG_INDOOR: flag attribute, if set indicates that the device + * is operating in an indoor environment. + * + * @NL80211_ATTR_MAX_NUM_SCHED_SCAN_PLANS: maximum number of scan plans for + * scheduled scan supported by the device (u32), a wiphy attribute. + * @NL80211_ATTR_MAX_SCAN_PLAN_INTERVAL: maximum interval (in seconds) for + * a scan plan (u32), a wiphy attribute. + * @NL80211_ATTR_MAX_SCAN_PLAN_ITERATIONS: maximum number of iterations in + * a scan plan (u32), a wiphy attribute. + * @NL80211_ATTR_SCHED_SCAN_PLANS: a list of scan plans for scheduled scan. + * Each scan plan defines the number of scan iterations and the interval + * between scans. The last scan plan will always run infinitely, + * thus it must not specify the number of iterations, only the interval + * between scans. The scan plans are executed sequentially. + * Each scan plan is a nested attribute of &enum nl80211_sched_scan_plan. + * @NL80211_ATTR_PBSS: flag attribute. If set it means operate + * in a PBSS. Specified in %NL80211_CMD_CONNECT to request + * connecting to a PCP, and in %NL80211_CMD_START_AP to start + * a PCP instead of AP. Relevant for DMG networks only. + * @NL80211_ATTR_BSS_SELECT: nested attribute for driver supporting the + * BSS selection feature. When used with %NL80211_CMD_GET_WIPHY it contains + * attributes according &enum nl80211_bss_select_attr to indicate what + * BSS selection behaviours are supported. When used with %NL80211_CMD_CONNECT + * it contains the behaviour-specific attribute containing the parameters for + * BSS selection to be done by driver and/or firmware. + * + * @NL80211_ATTR_STA_SUPPORT_P2P_PS: whether P2P PS mechanism supported + * or not. u8, one of the values of &enum nl80211_sta_p2p_ps_status + * + * @NL80211_ATTR_PAD: attribute used for padding for 64-bit alignment + * + * @NL80211_ATTR_IFTYPE_EXT_CAPA: Nested attribute of the following attributes: + * %NL80211_ATTR_IFTYPE, %NL80211_ATTR_EXT_CAPA, + * %NL80211_ATTR_EXT_CAPA_MASK, to specify the extended capabilities and + * other interface-type specific capabilities per interface type. For MLO, + * %NL80211_ATTR_EML_CAPABILITY and %NL80211_ATTR_MLD_CAPA_AND_OPS are + * present. + * + * @NL80211_ATTR_MU_MIMO_GROUP_DATA: array of 24 bytes that defines a MU-MIMO + * groupID for monitor mode. + * The first 8 bytes are a mask that defines the membership in each + * group (there are 64 groups, group 0 and 63 are reserved), + * each bit represents a group and set to 1 for being a member in + * that group and 0 for not being a member. + * The remaining 16 bytes define the position in each group: 2 bits for + * each group. + * (smaller group numbers represented on most significant bits and bigger + * group numbers on least significant bits.) + * This attribute is used only if all interfaces are in monitor mode. + * Set this attribute in order to monitor packets using the given MU-MIMO + * groupID data. + * to turn off that feature set all the bits of the groupID to zero. + * @NL80211_ATTR_MU_MIMO_FOLLOW_MAC_ADDR: mac address for the sniffer to follow + * when using MU-MIMO air sniffer. + * to turn that feature off set an invalid mac address + * (e.g. FF:FF:FF:FF:FF:FF) + * + * @NL80211_ATTR_SCAN_START_TIME_TSF: The time at which the scan was actually + * started (u64). The time is the TSF of the BSS the interface that + * requested the scan is connected to (if available, otherwise this + * attribute must not be included). + * @NL80211_ATTR_SCAN_START_TIME_TSF_BSSID: The BSS according to which + * %NL80211_ATTR_SCAN_START_TIME_TSF is set. + * @NL80211_ATTR_MEASUREMENT_DURATION: measurement duration in TUs (u16). If + * %NL80211_ATTR_MEASUREMENT_DURATION_MANDATORY is not set, this is the + * maximum measurement duration allowed. This attribute is used with + * measurement requests. It can also be used with %NL80211_CMD_TRIGGER_SCAN + * if the scan is used for beacon report radio measurement. + * @NL80211_ATTR_MEASUREMENT_DURATION_MANDATORY: flag attribute that indicates + * that the duration specified with %NL80211_ATTR_MEASUREMENT_DURATION is + * mandatory. If this flag is not set, the duration is the maximum duration + * and the actual measurement duration may be shorter. + * + * @NL80211_ATTR_MESH_PEER_AID: Association ID for the mesh peer (u16). This is + * used to pull the stored data for mesh peer in power save state. + * + * @NL80211_ATTR_NAN_MASTER_PREF: the master preference to be used by + * %NL80211_CMD_START_NAN and optionally with + * %NL80211_CMD_CHANGE_NAN_CONFIG. Its type is u8 and it can't be 0. + * Also, values 1 and 255 are reserved for certification purposes and + * should not be used during a normal device operation. + * @NL80211_ATTR_BANDS: operating bands configuration. This is a u32 + * bitmask of BIT(NL80211_BAND_*) as described in %enum + * nl80211_band. For instance, for NL80211_BAND_2GHZ, bit 0 + * would be set. This attribute is used with + * %NL80211_CMD_START_NAN and %NL80211_CMD_CHANGE_NAN_CONFIG, and + * it is optional. If no bands are set, it means don't-care and + * the device will decide what to use. + * @NL80211_ATTR_NAN_FUNC: a function that can be added to NAN. See + * &enum nl80211_nan_func_attributes for description of this nested + * attribute. + * @NL80211_ATTR_NAN_MATCH: used to report a match. This is a nested attribute. + * See &enum nl80211_nan_match_attributes. + * @NL80211_ATTR_FILS_KEK: KEK for FILS (Re)Association Request/Response frame + * protection. + * @NL80211_ATTR_FILS_NONCES: Nonces (part of AAD) for FILS (Re)Association + * Request/Response frame protection. This attribute contains the 16 octet + * STA Nonce followed by 16 octets of AP Nonce. + * + * @NL80211_ATTR_MULTICAST_TO_UNICAST_ENABLED: Indicates whether or not multicast + * packets should be send out as unicast to all stations (flag attribute). + * + * @NL80211_ATTR_BSSID: The BSSID of the AP. Note that %NL80211_ATTR_MAC is also + * used in various commands/events for specifying the BSSID. + * + * @NL80211_ATTR_SCHED_SCAN_RELATIVE_RSSI: Relative RSSI threshold by which + * other BSSs has to be better or slightly worse than the current + * connected BSS so that they get reported to user space. + * This will give an opportunity to userspace to consider connecting to + * other matching BSSs which have better or slightly worse RSSI than + * the current connected BSS by using an offloaded operation to avoid + * unnecessary wakeups. + * + * @NL80211_ATTR_SCHED_SCAN_RSSI_ADJUST: When present the RSSI level for BSSs in + * the specified band is to be adjusted before doing + * %NL80211_ATTR_SCHED_SCAN_RELATIVE_RSSI based comparison to figure out + * better BSSs. The attribute value is a packed structure + * value as specified by &struct nl80211_bss_select_rssi_adjust. + * + * @NL80211_ATTR_TIMEOUT_REASON: The reason for which an operation timed out. + * u32 attribute with an &enum nl80211_timeout_reason value. This is used, + * e.g., with %NL80211_CMD_CONNECT event. + * + * @NL80211_ATTR_FILS_ERP_USERNAME: EAP Re-authentication Protocol (ERP) + * username part of NAI used to refer keys rRK and rIK. This is used with + * %NL80211_CMD_CONNECT. + * + * @NL80211_ATTR_FILS_ERP_REALM: EAP Re-authentication Protocol (ERP) realm part + * of NAI specifying the domain name of the ER server. This is used with + * %NL80211_CMD_CONNECT. + * + * @NL80211_ATTR_FILS_ERP_NEXT_SEQ_NUM: Unsigned 16-bit ERP next sequence number + * to use in ERP messages. This is used in generating the FILS wrapped data + * for FILS authentication and is used with %NL80211_CMD_CONNECT. + * + * @NL80211_ATTR_FILS_ERP_RRK: ERP re-authentication Root Key (rRK) for the + * NAI specified by %NL80211_ATTR_FILS_ERP_USERNAME and + * %NL80211_ATTR_FILS_ERP_REALM. This is used for generating rIK and rMSK + * from successful FILS authentication and is used with + * %NL80211_CMD_CONNECT. + * + * @NL80211_ATTR_FILS_CACHE_ID: A 2-octet identifier advertized by a FILS AP + * identifying the scope of PMKSAs. This is used with + * @NL80211_CMD_SET_PMKSA and @NL80211_CMD_DEL_PMKSA. + * + * @NL80211_ATTR_PMK: attribute for passing PMK key material. Used with + * %NL80211_CMD_SET_PMKSA for the PMKSA identified by %NL80211_ATTR_PMKID. + * For %NL80211_CMD_CONNECT and %NL80211_CMD_START_AP it is used to provide + * PSK for offloading 4-way handshake for WPA/WPA2-PSK networks. For 802.1X + * authentication it is used with %NL80211_CMD_SET_PMK. For offloaded FT + * support this attribute specifies the PMK-R0 if NL80211_ATTR_PMKR0_NAME + * is included as well. + * + * @NL80211_ATTR_SCHED_SCAN_MULTI: flag attribute which user-space shall use to + * indicate that it supports multiple active scheduled scan requests. + * @NL80211_ATTR_SCHED_SCAN_MAX_REQS: indicates maximum number of scheduled + * scan request that may be active for the device (u32). + * + * @NL80211_ATTR_WANT_1X_4WAY_HS: flag attribute which user-space can include + * in %NL80211_CMD_CONNECT to indicate that for 802.1X authentication it + * wants to use the supported offload of the 4-way handshake. + * @NL80211_ATTR_PMKR0_NAME: PMK-R0 Name for offloaded FT. + * @NL80211_ATTR_PORT_AUTHORIZED: (reserved) + * + * @NL80211_ATTR_EXTERNAL_AUTH_ACTION: Identify the requested external + * authentication operation (u32 attribute with an + * &enum nl80211_external_auth_action value). This is used with the + * %NL80211_CMD_EXTERNAL_AUTH request event. + * @NL80211_ATTR_EXTERNAL_AUTH_SUPPORT: Flag attribute indicating that the user + * space supports external authentication. This attribute shall be used + * with %NL80211_CMD_CONNECT and %NL80211_CMD_START_AP request. The driver + * may offload authentication processing to user space if this capability + * is indicated in the respective requests from the user space. (This flag + * attribute deprecated for %NL80211_CMD_START_AP, use + * %NL80211_ATTR_AP_SETTINGS_FLAGS) + * + * @NL80211_ATTR_NSS: Station's New/updated RX_NSS value notified using this + * u8 attribute. This is used with %NL80211_CMD_STA_OPMODE_CHANGED. + * + * @NL80211_ATTR_TXQ_STATS: TXQ statistics (nested attribute, see &enum + * nl80211_txq_stats) + * @NL80211_ATTR_TXQ_LIMIT: Total packet limit for the TXQ queues for this phy. + * The smaller of this and the memory limit is enforced. + * @NL80211_ATTR_TXQ_MEMORY_LIMIT: Total memory limit (in bytes) for the + * TXQ queues for this phy. The smaller of this and the packet limit is + * enforced. + * @NL80211_ATTR_TXQ_QUANTUM: TXQ scheduler quantum (bytes). Number of bytes + * a flow is assigned on each round of the DRR scheduler. + * @NL80211_ATTR_HE_CAPABILITY: HE Capability information element (from + * association request when used with NL80211_CMD_NEW_STATION). Can be set + * only if %NL80211_STA_FLAG_WME is set. + * + * @NL80211_ATTR_FTM_RESPONDER: nested attribute which user-space can include + * in %NL80211_CMD_START_AP or %NL80211_CMD_SET_BEACON for fine timing + * measurement (FTM) responder functionality and containing parameters as + * possible, see &enum nl80211_ftm_responder_attr + * + * @NL80211_ATTR_FTM_RESPONDER_STATS: Nested attribute with FTM responder + * statistics, see &enum nl80211_ftm_responder_stats. + * + * @NL80211_ATTR_TIMEOUT: Timeout for the given operation in milliseconds (u32), + * if the attribute is not given no timeout is requested. Note that 0 is an + * invalid value. + * + * @NL80211_ATTR_PEER_MEASUREMENTS: peer measurements request (and result) + * data, uses nested attributes specified in + * &enum nl80211_peer_measurement_attrs. + * This is also used for capability advertisement in the wiphy information, + * with the appropriate sub-attributes. + * + * @NL80211_ATTR_AIRTIME_WEIGHT: Station's weight when scheduled by the airtime + * scheduler. + * + * @NL80211_ATTR_STA_TX_POWER_SETTING: Transmit power setting type (u8) for + * station associated with the AP. See &enum nl80211_tx_power_setting for + * possible values. + * @NL80211_ATTR_STA_TX_POWER: Transmit power level (s16) in dBm units. This + * allows to set Tx power for a station. If this attribute is not included, + * the default per-interface tx power setting will be overriding. Driver + * should be picking up the lowest tx power, either tx power per-interface + * or per-station. + * + * @NL80211_ATTR_SAE_PASSWORD: attribute for passing SAE password material. It + * is used with %NL80211_CMD_CONNECT to provide password for offloading + * SAE authentication for WPA3-Personal networks. + * + * @NL80211_ATTR_TWT_RESPONDER: Enable target wait time responder support. + * + * @NL80211_ATTR_HE_OBSS_PD: nested attribute for OBSS Packet Detection + * functionality. + * + * @NL80211_ATTR_WIPHY_EDMG_CHANNELS: bitmap that indicates the 2.16 GHz + * channel(s) that are allowed to be used for EDMG transmissions. + * Defined by IEEE P802.11ay/D4.0 section 9.4.2.251. (u8 attribute) + * @NL80211_ATTR_WIPHY_EDMG_BW_CONFIG: Channel BW Configuration subfield encodes + * the allowed channel bandwidth configurations. (u8 attribute) + * Defined by IEEE P802.11ay/D4.0 section 9.4.2.251, Table 13. + * + * @NL80211_ATTR_VLAN_ID: VLAN ID (1..4094) for the station and VLAN group key + * (u16). + * + * @NL80211_ATTR_HE_BSS_COLOR: nested attribute for BSS Color Settings. + * + * @NL80211_ATTR_IFTYPE_AKM_SUITES: nested array attribute, with each entry + * using attributes from &enum nl80211_iftype_akm_attributes. This + * attribute is sent in a response to %NL80211_CMD_GET_WIPHY indicating + * supported AKM suites capability per interface. AKMs advertised in + * %NL80211_ATTR_AKM_SUITES are default capabilities if AKM suites not + * advertised for a specific interface type. + * + * @NL80211_ATTR_TID_CONFIG: TID specific configuration in a + * nested attribute with &enum nl80211_tid_config_attr sub-attributes; + * on output (in wiphy attributes) it contains only the feature sub- + * attributes. + * + * @NL80211_ATTR_CONTROL_PORT_NO_PREAUTH: disable preauth frame rx on control + * port in order to forward/receive them as ordinary data frames. + * + * @NL80211_ATTR_PMK_LIFETIME: Maximum lifetime for PMKSA in seconds (u32, + * dot11RSNAConfigPMKReauthThreshold; 0 is not a valid value). + * An optional parameter configured through %NL80211_CMD_SET_PMKSA. + * Drivers that trigger roaming need to know the lifetime of the + * configured PMKSA for triggering the full vs. PMKSA caching based + * authentication. This timeout helps authentication methods like SAE, + * where PMK gets updated only by going through a full (new SAE) + * authentication instead of getting updated during an association for EAP + * authentication. No new full authentication within the PMK expiry shall + * result in a disassociation at the end of the lifetime. + * + * @NL80211_ATTR_PMK_REAUTH_THRESHOLD: Reauthentication threshold time, in + * terms of percentage of %NL80211_ATTR_PMK_LIFETIME + * (u8, dot11RSNAConfigPMKReauthThreshold, 1..100). This is an optional + * parameter configured through %NL80211_CMD_SET_PMKSA. Requests the + * driver to trigger a full authentication roam (without PMKSA caching) + * after the reauthentication threshold time, but before the PMK lifetime + * has expired. + * + * Authentication methods like SAE need to be able to generate a new PMKSA + * entry without having to force a disconnection after the PMK timeout. If + * no roaming occurs between the reauth threshold and PMK expiration, + * disassociation is still forced. + * @NL80211_ATTR_RECEIVE_MULTICAST: multicast flag for the + * %NL80211_CMD_REGISTER_FRAME command, see the description there. + * @NL80211_ATTR_WIPHY_FREQ_OFFSET: offset of the associated + * %NL80211_ATTR_WIPHY_FREQ in positive KHz. Only valid when supplied with + * an %NL80211_ATTR_WIPHY_FREQ_OFFSET. + * @NL80211_ATTR_CENTER_FREQ1_OFFSET: Center frequency offset in KHz for the + * first channel segment specified in %NL80211_ATTR_CENTER_FREQ1. + * @NL80211_ATTR_SCAN_FREQ_KHZ: nested attribute with KHz frequencies + * + * @NL80211_ATTR_HE_6GHZ_CAPABILITY: HE 6 GHz Band Capability element (from + * association request when used with NL80211_CMD_NEW_STATION). + * + * @NL80211_ATTR_FILS_DISCOVERY: Optional parameter to configure FILS + * discovery. It is a nested attribute, see + * &enum nl80211_fils_discovery_attributes. + * + * @NL80211_ATTR_UNSOL_BCAST_PROBE_RESP: Optional parameter to configure + * unsolicited broadcast probe response. It is a nested attribute, see + * &enum nl80211_unsol_bcast_probe_resp_attributes. + * + * @NL80211_ATTR_S1G_CAPABILITY: S1G Capability information element (from + * association request when used with NL80211_CMD_NEW_STATION) + * @NL80211_ATTR_S1G_CAPABILITY_MASK: S1G Capability Information element + * override mask. Used with NL80211_ATTR_S1G_CAPABILITY in + * NL80211_CMD_ASSOCIATE or NL80211_CMD_CONNECT. + * + * @NL80211_ATTR_SAE_PWE: Indicates the mechanism(s) allowed for SAE PWE + * derivation in WPA3-Personal networks which are using SAE authentication. + * This is a u8 attribute that encapsulates one of the values from + * &enum nl80211_sae_pwe_mechanism. + * + * @NL80211_ATTR_SAR_SPEC: SAR power limitation specification when + * used with %NL80211_CMD_SET_SAR_SPECS. The message contains fields + * of %nl80211_sar_attrs which specifies the sar type and related + * sar specs. Sar specs contains array of %nl80211_sar_specs_attrs. + * + * @NL80211_ATTR_RECONNECT_REQUESTED: flag attribute, used with deauth and + * disassoc events to indicate that an immediate reconnect to the AP + * is desired. + * + * @NL80211_ATTR_OBSS_COLOR_BITMAP: bitmap of the u64 BSS colors for the + * %NL80211_CMD_OBSS_COLOR_COLLISION event. + * + * @NL80211_ATTR_COLOR_CHANGE_COUNT: u8 attribute specifying the number of TBTT's + * until the color switch event. + * @NL80211_ATTR_COLOR_CHANGE_COLOR: u8 attribute specifying the color that we are + * switching to + * @NL80211_ATTR_COLOR_CHANGE_ELEMS: Nested set of attributes containing the IE + * information for the time while performing a color switch. + * + * @NL80211_ATTR_MBSSID_CONFIG: Nested attribute for multiple BSSID + * advertisements (MBSSID) parameters in AP mode. + * Kernel uses this attribute to indicate the driver's support for MBSSID + * and enhanced multi-BSSID advertisements (EMA AP) to the userspace. + * Userspace should use this attribute to configure per interface MBSSID + * parameters. + * See &enum nl80211_mbssid_config_attributes for details. + * + * @NL80211_ATTR_MBSSID_ELEMS: Nested parameter to pass multiple BSSID elements. + * Mandatory parameter for the transmitting interface to enable MBSSID. + * Optional for the non-transmitting interfaces. + * + * @NL80211_ATTR_RADAR_BACKGROUND: Configure dedicated offchannel chain + * available for radar/CAC detection on some hw. This chain can't be used + * to transmit or receive frames and it is bounded to a running wdev. + * Background radar/CAC detection allows to avoid the CAC downtime + * switching on a different channel during CAC detection on the selected + * radar channel. + * + * @NL80211_ATTR_AP_SETTINGS_FLAGS: u32 attribute contains ap settings flags, + * enumerated in &enum nl80211_ap_settings_flags. This attribute shall be + * used with %NL80211_CMD_START_AP request. + * + * @NL80211_ATTR_EHT_CAPABILITY: EHT Capability information element (from + * association request when used with NL80211_CMD_NEW_STATION). Can be set + * only if %NL80211_STA_FLAG_WME is set. + * + * @NL80211_ATTR_MLO_LINK_ID: A (u8) link ID for use with MLO, to be used with + * various commands that need a link ID to operate. + * @NL80211_ATTR_MLO_LINKS: A nested array of links, each containing some + * per-link information and a link ID. + * @NL80211_ATTR_MLD_ADDR: An MLD address, used with various commands such as + * authenticate/associate. + * + * @NL80211_ATTR_MLO_SUPPORT: Flag attribute to indicate user space supports MLO + * connection. Used with %NL80211_CMD_CONNECT. If this attribute is not + * included in NL80211_CMD_CONNECT drivers must not perform MLO connection. + * + * @NL80211_ATTR_MAX_NUM_AKM_SUITES: U16 attribute. Indicates maximum number of + * AKM suites allowed for %NL80211_CMD_CONNECT, %NL80211_CMD_ASSOCIATE and + * %NL80211_CMD_START_AP in %NL80211_CMD_GET_WIPHY response. If this + * attribute is not present userspace shall consider maximum number of AKM + * suites allowed as %NL80211_MAX_NR_AKM_SUITES which is the legacy maximum + * number prior to the introduction of this attribute. + * + * @NL80211_ATTR_EML_CAPABILITY: EML Capability information (u16) + * @NL80211_ATTR_MLD_CAPA_AND_OPS: MLD Capabilities and Operations (u16) + * + * @NL80211_ATTR_TX_HW_TIMESTAMP: Hardware timestamp for TX operation in + * nanoseconds (u64). This is the device clock timestamp so it will + * probably reset when the device is stopped or the firmware is reset. + * When used with %NL80211_CMD_FRAME_TX_STATUS, indicates the frame TX + * timestamp. When used with %NL80211_CMD_FRAME RX notification, indicates + * the ack TX timestamp. + * @NL80211_ATTR_RX_HW_TIMESTAMP: Hardware timestamp for RX operation in + * nanoseconds (u64). This is the device clock timestamp so it will + * probably reset when the device is stopped or the firmware is reset. + * When used with %NL80211_CMD_FRAME_TX_STATUS, indicates the ack RX + * timestamp. When used with %NL80211_CMD_FRAME RX notification, indicates + * the incoming frame RX timestamp. + * @NL80211_ATTR_TD_BITMAP: Transition Disable bitmap, for subsequent + * (re)associations. + * @NUM_NL80211_ATTR: total number of nl80211_attrs available + * @NL80211_ATTR_MAX: highest attribute number currently defined + * @__NL80211_ATTR_AFTER_LAST: internal use + */ +enum nl80211_attrs { +/* don't change the order or add anything between, this is ABI! */ + NL80211_ATTR_UNSPEC, + + NL80211_ATTR_WIPHY, + NL80211_ATTR_WIPHY_NAME, + + NL80211_ATTR_IFINDEX, + NL80211_ATTR_IFNAME, + NL80211_ATTR_IFTYPE, + + NL80211_ATTR_MAC, + + NL80211_ATTR_KEY_DATA, + NL80211_ATTR_KEY_IDX, + NL80211_ATTR_KEY_CIPHER, + NL80211_ATTR_KEY_SEQ, + NL80211_ATTR_KEY_DEFAULT, + + NL80211_ATTR_BEACON_INTERVAL, + NL80211_ATTR_DTIM_PERIOD, + NL80211_ATTR_BEACON_HEAD, + NL80211_ATTR_BEACON_TAIL, + + NL80211_ATTR_STA_AID, + NL80211_ATTR_STA_FLAGS, + NL80211_ATTR_STA_LISTEN_INTERVAL, + NL80211_ATTR_STA_SUPPORTED_RATES, + NL80211_ATTR_STA_VLAN, + NL80211_ATTR_STA_INFO, + + NL80211_ATTR_WIPHY_BANDS, + + NL80211_ATTR_MNTR_FLAGS, + + NL80211_ATTR_MESH_ID, + NL80211_ATTR_STA_PLINK_ACTION, + NL80211_ATTR_MPATH_NEXT_HOP, + NL80211_ATTR_MPATH_INFO, + + NL80211_ATTR_BSS_CTS_PROT, + NL80211_ATTR_BSS_SHORT_PREAMBLE, + NL80211_ATTR_BSS_SHORT_SLOT_TIME, + + NL80211_ATTR_HT_CAPABILITY, + + NL80211_ATTR_SUPPORTED_IFTYPES, + + NL80211_ATTR_REG_ALPHA2, + NL80211_ATTR_REG_RULES, + + NL80211_ATTR_MESH_CONFIG, + + NL80211_ATTR_BSS_BASIC_RATES, + + NL80211_ATTR_WIPHY_TXQ_PARAMS, + NL80211_ATTR_WIPHY_FREQ, + NL80211_ATTR_WIPHY_CHANNEL_TYPE, + + NL80211_ATTR_KEY_DEFAULT_MGMT, + + NL80211_ATTR_MGMT_SUBTYPE, + NL80211_ATTR_IE, + + NL80211_ATTR_MAX_NUM_SCAN_SSIDS, + + NL80211_ATTR_SCAN_FREQUENCIES, + NL80211_ATTR_SCAN_SSIDS, + NL80211_ATTR_GENERATION, /* replaces old SCAN_GENERATION */ + NL80211_ATTR_BSS, + + NL80211_ATTR_REG_INITIATOR, + NL80211_ATTR_REG_TYPE, + + NL80211_ATTR_SUPPORTED_COMMANDS, + + NL80211_ATTR_FRAME, + NL80211_ATTR_SSID, + NL80211_ATTR_AUTH_TYPE, + NL80211_ATTR_REASON_CODE, + + NL80211_ATTR_KEY_TYPE, + + NL80211_ATTR_MAX_SCAN_IE_LEN, + NL80211_ATTR_CIPHER_SUITES, + + NL80211_ATTR_FREQ_BEFORE, + NL80211_ATTR_FREQ_AFTER, + + NL80211_ATTR_FREQ_FIXED, + + + NL80211_ATTR_WIPHY_RETRY_SHORT, + NL80211_ATTR_WIPHY_RETRY_LONG, + NL80211_ATTR_WIPHY_FRAG_THRESHOLD, + NL80211_ATTR_WIPHY_RTS_THRESHOLD, + + NL80211_ATTR_TIMED_OUT, + + NL80211_ATTR_USE_MFP, + + NL80211_ATTR_STA_FLAGS2, + + NL80211_ATTR_CONTROL_PORT, + + NL80211_ATTR_TESTDATA, + + NL80211_ATTR_PRIVACY, + + NL80211_ATTR_DISCONNECTED_BY_AP, + NL80211_ATTR_STATUS_CODE, + + NL80211_ATTR_CIPHER_SUITES_PAIRWISE, + NL80211_ATTR_CIPHER_SUITE_GROUP, + NL80211_ATTR_WPA_VERSIONS, + NL80211_ATTR_AKM_SUITES, + + NL80211_ATTR_REQ_IE, + NL80211_ATTR_RESP_IE, + + NL80211_ATTR_PREV_BSSID, + + NL80211_ATTR_KEY, + NL80211_ATTR_KEYS, + + NL80211_ATTR_PID, + + NL80211_ATTR_4ADDR, + + NL80211_ATTR_SURVEY_INFO, + + NL80211_ATTR_PMKID, + NL80211_ATTR_MAX_NUM_PMKIDS, + + NL80211_ATTR_DURATION, + + NL80211_ATTR_COOKIE, + + NL80211_ATTR_WIPHY_COVERAGE_CLASS, + + NL80211_ATTR_TX_RATES, + + NL80211_ATTR_FRAME_MATCH, + + NL80211_ATTR_ACK, + + NL80211_ATTR_PS_STATE, + + NL80211_ATTR_CQM, + + NL80211_ATTR_LOCAL_STATE_CHANGE, + + NL80211_ATTR_AP_ISOLATE, + + NL80211_ATTR_WIPHY_TX_POWER_SETTING, + NL80211_ATTR_WIPHY_TX_POWER_LEVEL, + + NL80211_ATTR_TX_FRAME_TYPES, + NL80211_ATTR_RX_FRAME_TYPES, + NL80211_ATTR_FRAME_TYPE, + + NL80211_ATTR_CONTROL_PORT_ETHERTYPE, + NL80211_ATTR_CONTROL_PORT_NO_ENCRYPT, + + NL80211_ATTR_SUPPORT_IBSS_RSN, + + NL80211_ATTR_WIPHY_ANTENNA_TX, + NL80211_ATTR_WIPHY_ANTENNA_RX, + + NL80211_ATTR_MCAST_RATE, + + NL80211_ATTR_OFFCHANNEL_TX_OK, + + NL80211_ATTR_BSS_HT_OPMODE, + + NL80211_ATTR_KEY_DEFAULT_TYPES, + + NL80211_ATTR_MAX_REMAIN_ON_CHANNEL_DURATION, + + NL80211_ATTR_MESH_SETUP, + + NL80211_ATTR_WIPHY_ANTENNA_AVAIL_TX, + NL80211_ATTR_WIPHY_ANTENNA_AVAIL_RX, + + NL80211_ATTR_SUPPORT_MESH_AUTH, + NL80211_ATTR_STA_PLINK_STATE, + + NL80211_ATTR_WOWLAN_TRIGGERS, + NL80211_ATTR_WOWLAN_TRIGGERS_SUPPORTED, + + NL80211_ATTR_SCHED_SCAN_INTERVAL, + + NL80211_ATTR_INTERFACE_COMBINATIONS, + NL80211_ATTR_SOFTWARE_IFTYPES, + + NL80211_ATTR_REKEY_DATA, + + NL80211_ATTR_MAX_NUM_SCHED_SCAN_SSIDS, + NL80211_ATTR_MAX_SCHED_SCAN_IE_LEN, + + NL80211_ATTR_SCAN_SUPP_RATES, + + NL80211_ATTR_HIDDEN_SSID, + + NL80211_ATTR_IE_PROBE_RESP, + NL80211_ATTR_IE_ASSOC_RESP, + + NL80211_ATTR_STA_WME, + NL80211_ATTR_SUPPORT_AP_UAPSD, + + NL80211_ATTR_ROAM_SUPPORT, + + NL80211_ATTR_SCHED_SCAN_MATCH, + NL80211_ATTR_MAX_MATCH_SETS, + + NL80211_ATTR_PMKSA_CANDIDATE, + + NL80211_ATTR_TX_NO_CCK_RATE, + + NL80211_ATTR_TDLS_ACTION, + NL80211_ATTR_TDLS_DIALOG_TOKEN, + NL80211_ATTR_TDLS_OPERATION, + NL80211_ATTR_TDLS_SUPPORT, + NL80211_ATTR_TDLS_EXTERNAL_SETUP, + + NL80211_ATTR_DEVICE_AP_SME, + + NL80211_ATTR_DONT_WAIT_FOR_ACK, + + NL80211_ATTR_FEATURE_FLAGS, + + NL80211_ATTR_PROBE_RESP_OFFLOAD, + + NL80211_ATTR_PROBE_RESP, + + NL80211_ATTR_DFS_REGION, + + NL80211_ATTR_DISABLE_HT, + NL80211_ATTR_HT_CAPABILITY_MASK, + + NL80211_ATTR_NOACK_MAP, + + NL80211_ATTR_INACTIVITY_TIMEOUT, + + NL80211_ATTR_RX_SIGNAL_DBM, + + NL80211_ATTR_BG_SCAN_PERIOD, + + NL80211_ATTR_WDEV, + + NL80211_ATTR_USER_REG_HINT_TYPE, + + NL80211_ATTR_CONN_FAILED_REASON, + + NL80211_ATTR_AUTH_DATA, + + NL80211_ATTR_VHT_CAPABILITY, + + NL80211_ATTR_SCAN_FLAGS, + + NL80211_ATTR_CHANNEL_WIDTH, + NL80211_ATTR_CENTER_FREQ1, + NL80211_ATTR_CENTER_FREQ2, + + NL80211_ATTR_P2P_CTWINDOW, + NL80211_ATTR_P2P_OPPPS, + + NL80211_ATTR_LOCAL_MESH_POWER_MODE, + + NL80211_ATTR_ACL_POLICY, + + NL80211_ATTR_MAC_ADDRS, + + NL80211_ATTR_MAC_ACL_MAX, + + NL80211_ATTR_RADAR_EVENT, + + NL80211_ATTR_EXT_CAPA, + NL80211_ATTR_EXT_CAPA_MASK, + + NL80211_ATTR_STA_CAPABILITY, + NL80211_ATTR_STA_EXT_CAPABILITY, + + NL80211_ATTR_PROTOCOL_FEATURES, + NL80211_ATTR_SPLIT_WIPHY_DUMP, + + NL80211_ATTR_DISABLE_VHT, + NL80211_ATTR_VHT_CAPABILITY_MASK, + + NL80211_ATTR_MDID, + NL80211_ATTR_IE_RIC, + + NL80211_ATTR_CRIT_PROT_ID, + NL80211_ATTR_MAX_CRIT_PROT_DURATION, + + NL80211_ATTR_PEER_AID, + + NL80211_ATTR_COALESCE_RULE, + + NL80211_ATTR_CH_SWITCH_COUNT, + NL80211_ATTR_CH_SWITCH_BLOCK_TX, + NL80211_ATTR_CSA_IES, + NL80211_ATTR_CNTDWN_OFFS_BEACON, + NL80211_ATTR_CNTDWN_OFFS_PRESP, + + NL80211_ATTR_RXMGMT_FLAGS, + + NL80211_ATTR_STA_SUPPORTED_CHANNELS, + + NL80211_ATTR_STA_SUPPORTED_OPER_CLASSES, + + NL80211_ATTR_HANDLE_DFS, + + NL80211_ATTR_SUPPORT_5_MHZ, + NL80211_ATTR_SUPPORT_10_MHZ, + + NL80211_ATTR_OPMODE_NOTIF, + + NL80211_ATTR_VENDOR_ID, + NL80211_ATTR_VENDOR_SUBCMD, + NL80211_ATTR_VENDOR_DATA, + NL80211_ATTR_VENDOR_EVENTS, + + NL80211_ATTR_QOS_MAP, + + NL80211_ATTR_MAC_HINT, + NL80211_ATTR_WIPHY_FREQ_HINT, + + NL80211_ATTR_MAX_AP_ASSOC_STA, + + NL80211_ATTR_TDLS_PEER_CAPABILITY, + + NL80211_ATTR_SOCKET_OWNER, + + NL80211_ATTR_CSA_C_OFFSETS_TX, + NL80211_ATTR_MAX_CSA_COUNTERS, + + NL80211_ATTR_TDLS_INITIATOR, + + NL80211_ATTR_USE_RRM, + + NL80211_ATTR_WIPHY_DYN_ACK, + + NL80211_ATTR_TSID, + NL80211_ATTR_USER_PRIO, + NL80211_ATTR_ADMITTED_TIME, + + NL80211_ATTR_SMPS_MODE, + + NL80211_ATTR_OPER_CLASS, + + NL80211_ATTR_MAC_MASK, + + NL80211_ATTR_WIPHY_SELF_MANAGED_REG, + + NL80211_ATTR_EXT_FEATURES, + + NL80211_ATTR_SURVEY_RADIO_STATS, + + NL80211_ATTR_NETNS_FD, + + NL80211_ATTR_SCHED_SCAN_DELAY, + + NL80211_ATTR_REG_INDOOR, + + NL80211_ATTR_MAX_NUM_SCHED_SCAN_PLANS, + NL80211_ATTR_MAX_SCAN_PLAN_INTERVAL, + NL80211_ATTR_MAX_SCAN_PLAN_ITERATIONS, + NL80211_ATTR_SCHED_SCAN_PLANS, + + NL80211_ATTR_PBSS, + + NL80211_ATTR_BSS_SELECT, + + NL80211_ATTR_STA_SUPPORT_P2P_PS, + + NL80211_ATTR_PAD, + + NL80211_ATTR_IFTYPE_EXT_CAPA, + + NL80211_ATTR_MU_MIMO_GROUP_DATA, + NL80211_ATTR_MU_MIMO_FOLLOW_MAC_ADDR, + + NL80211_ATTR_SCAN_START_TIME_TSF, + NL80211_ATTR_SCAN_START_TIME_TSF_BSSID, + NL80211_ATTR_MEASUREMENT_DURATION, + NL80211_ATTR_MEASUREMENT_DURATION_MANDATORY, + + NL80211_ATTR_MESH_PEER_AID, + + NL80211_ATTR_NAN_MASTER_PREF, + NL80211_ATTR_BANDS, + NL80211_ATTR_NAN_FUNC, + NL80211_ATTR_NAN_MATCH, + + NL80211_ATTR_FILS_KEK, + NL80211_ATTR_FILS_NONCES, + + NL80211_ATTR_MULTICAST_TO_UNICAST_ENABLED, + + NL80211_ATTR_BSSID, + + NL80211_ATTR_SCHED_SCAN_RELATIVE_RSSI, + NL80211_ATTR_SCHED_SCAN_RSSI_ADJUST, + + NL80211_ATTR_TIMEOUT_REASON, + + NL80211_ATTR_FILS_ERP_USERNAME, + NL80211_ATTR_FILS_ERP_REALM, + NL80211_ATTR_FILS_ERP_NEXT_SEQ_NUM, + NL80211_ATTR_FILS_ERP_RRK, + NL80211_ATTR_FILS_CACHE_ID, + + NL80211_ATTR_PMK, + + NL80211_ATTR_SCHED_SCAN_MULTI, + NL80211_ATTR_SCHED_SCAN_MAX_REQS, + + NL80211_ATTR_WANT_1X_4WAY_HS, + NL80211_ATTR_PMKR0_NAME, + NL80211_ATTR_PORT_AUTHORIZED, + + NL80211_ATTR_EXTERNAL_AUTH_ACTION, + NL80211_ATTR_EXTERNAL_AUTH_SUPPORT, + + NL80211_ATTR_NSS, + NL80211_ATTR_ACK_SIGNAL, + + NL80211_ATTR_CONTROL_PORT_OVER_NL80211, + + NL80211_ATTR_TXQ_STATS, + NL80211_ATTR_TXQ_LIMIT, + NL80211_ATTR_TXQ_MEMORY_LIMIT, + NL80211_ATTR_TXQ_QUANTUM, + + NL80211_ATTR_HE_CAPABILITY, + + NL80211_ATTR_FTM_RESPONDER, + + NL80211_ATTR_FTM_RESPONDER_STATS, + + NL80211_ATTR_TIMEOUT, + + NL80211_ATTR_PEER_MEASUREMENTS, + + NL80211_ATTR_AIRTIME_WEIGHT, + NL80211_ATTR_STA_TX_POWER_SETTING, + NL80211_ATTR_STA_TX_POWER, + + NL80211_ATTR_SAE_PASSWORD, + + NL80211_ATTR_TWT_RESPONDER, + + NL80211_ATTR_HE_OBSS_PD, + + NL80211_ATTR_WIPHY_EDMG_CHANNELS, + NL80211_ATTR_WIPHY_EDMG_BW_CONFIG, + + NL80211_ATTR_VLAN_ID, + + NL80211_ATTR_HE_BSS_COLOR, + + NL80211_ATTR_IFTYPE_AKM_SUITES, + + NL80211_ATTR_TID_CONFIG, + + NL80211_ATTR_CONTROL_PORT_NO_PREAUTH, + + NL80211_ATTR_PMK_LIFETIME, + NL80211_ATTR_PMK_REAUTH_THRESHOLD, + + NL80211_ATTR_RECEIVE_MULTICAST, + NL80211_ATTR_WIPHY_FREQ_OFFSET, + NL80211_ATTR_CENTER_FREQ1_OFFSET, + NL80211_ATTR_SCAN_FREQ_KHZ, + + NL80211_ATTR_HE_6GHZ_CAPABILITY, + + NL80211_ATTR_FILS_DISCOVERY, + + NL80211_ATTR_UNSOL_BCAST_PROBE_RESP, + + NL80211_ATTR_S1G_CAPABILITY, + NL80211_ATTR_S1G_CAPABILITY_MASK, + + NL80211_ATTR_SAE_PWE, + + NL80211_ATTR_RECONNECT_REQUESTED, + + NL80211_ATTR_SAR_SPEC, + + NL80211_ATTR_DISABLE_HE, + + NL80211_ATTR_OBSS_COLOR_BITMAP, + + NL80211_ATTR_COLOR_CHANGE_COUNT, + NL80211_ATTR_COLOR_CHANGE_COLOR, + NL80211_ATTR_COLOR_CHANGE_ELEMS, + + NL80211_ATTR_MBSSID_CONFIG, + NL80211_ATTR_MBSSID_ELEMS, + + NL80211_ATTR_RADAR_BACKGROUND, + + NL80211_ATTR_AP_SETTINGS_FLAGS, + + NL80211_ATTR_EHT_CAPABILITY, + + NL80211_ATTR_DISABLE_EHT, + + NL80211_ATTR_MLO_LINKS, + NL80211_ATTR_MLO_LINK_ID, + NL80211_ATTR_MLD_ADDR, + + NL80211_ATTR_MLO_SUPPORT, + + NL80211_ATTR_MAX_NUM_AKM_SUITES, + + NL80211_ATTR_EML_CAPABILITY, + NL80211_ATTR_MLD_CAPA_AND_OPS, + + NL80211_ATTR_TX_HW_TIMESTAMP, + NL80211_ATTR_RX_HW_TIMESTAMP, + NL80211_ATTR_TD_BITMAP, + + /* add attributes here, update the policy in nl80211.c */ + + __NL80211_ATTR_AFTER_LAST, + NUM_NL80211_ATTR = __NL80211_ATTR_AFTER_LAST, + NL80211_ATTR_MAX = __NL80211_ATTR_AFTER_LAST - 1 +}; + +/* source-level API compatibility */ +#define NL80211_ATTR_SCAN_GENERATION NL80211_ATTR_GENERATION +#define NL80211_ATTR_MESH_PARAMS NL80211_ATTR_MESH_CONFIG +#define NL80211_ATTR_IFACE_SOCKET_OWNER NL80211_ATTR_SOCKET_OWNER +#define NL80211_ATTR_SAE_DATA NL80211_ATTR_AUTH_DATA +#define NL80211_ATTR_CSA_C_OFF_BEACON NL80211_ATTR_CNTDWN_OFFS_BEACON +#define NL80211_ATTR_CSA_C_OFF_PRESP NL80211_ATTR_CNTDWN_OFFS_PRESP + +/* + * Allow user space programs to use #ifdef on new attributes by defining them + * here + */ +#define NL80211_CMD_CONNECT NL80211_CMD_CONNECT +#define NL80211_ATTR_HT_CAPABILITY NL80211_ATTR_HT_CAPABILITY +#define NL80211_ATTR_BSS_BASIC_RATES NL80211_ATTR_BSS_BASIC_RATES +#define NL80211_ATTR_WIPHY_TXQ_PARAMS NL80211_ATTR_WIPHY_TXQ_PARAMS +#define NL80211_ATTR_WIPHY_FREQ NL80211_ATTR_WIPHY_FREQ +#define NL80211_ATTR_WIPHY_CHANNEL_TYPE NL80211_ATTR_WIPHY_CHANNEL_TYPE +#define NL80211_ATTR_MGMT_SUBTYPE NL80211_ATTR_MGMT_SUBTYPE +#define NL80211_ATTR_IE NL80211_ATTR_IE +#define NL80211_ATTR_REG_INITIATOR NL80211_ATTR_REG_INITIATOR +#define NL80211_ATTR_REG_TYPE NL80211_ATTR_REG_TYPE +#define NL80211_ATTR_FRAME NL80211_ATTR_FRAME +#define NL80211_ATTR_SSID NL80211_ATTR_SSID +#define NL80211_ATTR_AUTH_TYPE NL80211_ATTR_AUTH_TYPE +#define NL80211_ATTR_REASON_CODE NL80211_ATTR_REASON_CODE +#define NL80211_ATTR_CIPHER_SUITES_PAIRWISE NL80211_ATTR_CIPHER_SUITES_PAIRWISE +#define NL80211_ATTR_CIPHER_SUITE_GROUP NL80211_ATTR_CIPHER_SUITE_GROUP +#define NL80211_ATTR_WPA_VERSIONS NL80211_ATTR_WPA_VERSIONS +#define NL80211_ATTR_AKM_SUITES NL80211_ATTR_AKM_SUITES +#define NL80211_ATTR_KEY NL80211_ATTR_KEY +#define NL80211_ATTR_KEYS NL80211_ATTR_KEYS +#define NL80211_ATTR_FEATURE_FLAGS NL80211_ATTR_FEATURE_FLAGS + +#define NL80211_WIPHY_NAME_MAXLEN 64 + +#define NL80211_MAX_SUPP_RATES 32 +#define NL80211_MAX_SUPP_HT_RATES 77 +#define NL80211_MAX_SUPP_REG_RULES 128 +#define NL80211_TKIP_DATA_OFFSET_ENCR_KEY 0 +#define NL80211_TKIP_DATA_OFFSET_TX_MIC_KEY 16 +#define NL80211_TKIP_DATA_OFFSET_RX_MIC_KEY 24 +#define NL80211_HT_CAPABILITY_LEN 26 +#define NL80211_VHT_CAPABILITY_LEN 12 +#define NL80211_HE_MIN_CAPABILITY_LEN 16 +#define NL80211_HE_MAX_CAPABILITY_LEN 54 +#define NL80211_MAX_NR_CIPHER_SUITES 5 + +/* + * NL80211_MAX_NR_AKM_SUITES is obsolete when %NL80211_ATTR_MAX_NUM_AKM_SUITES + * present in %NL80211_CMD_GET_WIPHY response. + */ +#define NL80211_MAX_NR_AKM_SUITES 2 +#define NL80211_EHT_MIN_CAPABILITY_LEN 13 +#define NL80211_EHT_MAX_CAPABILITY_LEN 51 + +#define NL80211_MIN_REMAIN_ON_CHANNEL_TIME 10 + +/* default RSSI threshold for scan results if none specified. */ +#define NL80211_SCAN_RSSI_THOLD_OFF -300 + +#define NL80211_CQM_TXE_MAX_INTVL 1800 + +/** + * enum nl80211_iftype - (virtual) interface types + * + * @NL80211_IFTYPE_UNSPECIFIED: unspecified type, driver decides + * @NL80211_IFTYPE_ADHOC: independent BSS member + * @NL80211_IFTYPE_STATION: managed BSS member + * @NL80211_IFTYPE_AP: access point + * @NL80211_IFTYPE_AP_VLAN: VLAN interface for access points; VLAN interfaces + * are a bit special in that they must always be tied to a pre-existing + * AP type interface. + * @NL80211_IFTYPE_WDS: wireless distribution interface + * @NL80211_IFTYPE_MONITOR: monitor interface receiving all frames + * @NL80211_IFTYPE_MESH_POINT: mesh point + * @NL80211_IFTYPE_P2P_CLIENT: P2P client + * @NL80211_IFTYPE_P2P_GO: P2P group owner + * @NL80211_IFTYPE_P2P_DEVICE: P2P device interface type, this is not a netdev + * and therefore can't be created in the normal ways, use the + * %NL80211_CMD_START_P2P_DEVICE and %NL80211_CMD_STOP_P2P_DEVICE + * commands to create and destroy one + * @NL80211_IFTYPE_OCB: Outside Context of a BSS + * This mode corresponds to the MIB variable dot11OCBActivated=true + * @NL80211_IFTYPE_NAN: NAN device interface type (not a netdev) + * @NL80211_IFTYPE_MAX: highest interface type number currently defined + * @NUM_NL80211_IFTYPES: number of defined interface types + * + * These values are used with the %NL80211_ATTR_IFTYPE + * to set the type of an interface. + * + */ +enum nl80211_iftype { + NL80211_IFTYPE_UNSPECIFIED, + NL80211_IFTYPE_ADHOC, + NL80211_IFTYPE_STATION, + NL80211_IFTYPE_AP, + NL80211_IFTYPE_AP_VLAN, + NL80211_IFTYPE_WDS, + NL80211_IFTYPE_MONITOR, + NL80211_IFTYPE_MESH_POINT, + NL80211_IFTYPE_P2P_CLIENT, + NL80211_IFTYPE_P2P_GO, + NL80211_IFTYPE_P2P_DEVICE, + NL80211_IFTYPE_OCB, + NL80211_IFTYPE_NAN, + + /* keep last */ + NUM_NL80211_IFTYPES, + NL80211_IFTYPE_MAX = NUM_NL80211_IFTYPES - 1 +}; + +/** + * enum nl80211_sta_flags - station flags + * + * Station flags. When a station is added to an AP interface, it is + * assumed to be already associated (and hence authenticated.) + * + * @__NL80211_STA_FLAG_INVALID: attribute number 0 is reserved + * @NL80211_STA_FLAG_AUTHORIZED: station is authorized (802.1X) + * @NL80211_STA_FLAG_SHORT_PREAMBLE: station is capable of receiving frames + * with short barker preamble + * @NL80211_STA_FLAG_WME: station is WME/QoS capable + * @NL80211_STA_FLAG_MFP: station uses management frame protection + * @NL80211_STA_FLAG_AUTHENTICATED: station is authenticated + * @NL80211_STA_FLAG_TDLS_PEER: station is a TDLS peer -- this flag should + * only be used in managed mode (even in the flags mask). Note that the + * flag can't be changed, it is only valid while adding a station, and + * attempts to change it will silently be ignored (rather than rejected + * as errors.) + * @NL80211_STA_FLAG_ASSOCIATED: station is associated; used with drivers + * that support %NL80211_FEATURE_FULL_AP_CLIENT_STATE to transition a + * previously added station into associated state + * @NL80211_STA_FLAG_MAX: highest station flag number currently defined + * @__NL80211_STA_FLAG_AFTER_LAST: internal use + */ +enum nl80211_sta_flags { + __NL80211_STA_FLAG_INVALID, + NL80211_STA_FLAG_AUTHORIZED, + NL80211_STA_FLAG_SHORT_PREAMBLE, + NL80211_STA_FLAG_WME, + NL80211_STA_FLAG_MFP, + NL80211_STA_FLAG_AUTHENTICATED, + NL80211_STA_FLAG_TDLS_PEER, + NL80211_STA_FLAG_ASSOCIATED, + + /* keep last */ + __NL80211_STA_FLAG_AFTER_LAST, + NL80211_STA_FLAG_MAX = __NL80211_STA_FLAG_AFTER_LAST - 1 +}; + +/** + * enum nl80211_sta_p2p_ps_status - station support of P2P PS + * + * @NL80211_P2P_PS_UNSUPPORTED: station doesn't support P2P PS mechanism + * @@NL80211_P2P_PS_SUPPORTED: station supports P2P PS mechanism + * @NUM_NL80211_P2P_PS_STATUS: number of values + */ +enum nl80211_sta_p2p_ps_status { + NL80211_P2P_PS_UNSUPPORTED = 0, + NL80211_P2P_PS_SUPPORTED, + + NUM_NL80211_P2P_PS_STATUS, +}; + +#define NL80211_STA_FLAG_MAX_OLD_API NL80211_STA_FLAG_TDLS_PEER + +/** + * struct nl80211_sta_flag_update - station flags mask/set + * @mask: mask of station flags to set + * @set: which values to set them to + * + * Both mask and set contain bits as per &enum nl80211_sta_flags. + */ +struct nl80211_sta_flag_update { + __u32 mask; + __u32 set; +} __attribute__((packed)); + +/** + * enum nl80211_he_gi - HE guard interval + * @NL80211_RATE_INFO_HE_GI_0_8: 0.8 usec + * @NL80211_RATE_INFO_HE_GI_1_6: 1.6 usec + * @NL80211_RATE_INFO_HE_GI_3_2: 3.2 usec + */ +enum nl80211_he_gi { + NL80211_RATE_INFO_HE_GI_0_8, + NL80211_RATE_INFO_HE_GI_1_6, + NL80211_RATE_INFO_HE_GI_3_2, +}; + +/** + * enum nl80211_he_ltf - HE long training field + * @NL80211_RATE_INFO_HE_1xLTF: 3.2 usec + * @NL80211_RATE_INFO_HE_2xLTF: 6.4 usec + * @NL80211_RATE_INFO_HE_4xLTF: 12.8 usec + */ +enum nl80211_he_ltf { + NL80211_RATE_INFO_HE_1XLTF, + NL80211_RATE_INFO_HE_2XLTF, + NL80211_RATE_INFO_HE_4XLTF, +}; + +/** + * enum nl80211_he_ru_alloc - HE RU allocation values + * @NL80211_RATE_INFO_HE_RU_ALLOC_26: 26-tone RU allocation + * @NL80211_RATE_INFO_HE_RU_ALLOC_52: 52-tone RU allocation + * @NL80211_RATE_INFO_HE_RU_ALLOC_106: 106-tone RU allocation + * @NL80211_RATE_INFO_HE_RU_ALLOC_242: 242-tone RU allocation + * @NL80211_RATE_INFO_HE_RU_ALLOC_484: 484-tone RU allocation + * @NL80211_RATE_INFO_HE_RU_ALLOC_996: 996-tone RU allocation + * @NL80211_RATE_INFO_HE_RU_ALLOC_2x996: 2x996-tone RU allocation + */ +enum nl80211_he_ru_alloc { + NL80211_RATE_INFO_HE_RU_ALLOC_26, + NL80211_RATE_INFO_HE_RU_ALLOC_52, + NL80211_RATE_INFO_HE_RU_ALLOC_106, + NL80211_RATE_INFO_HE_RU_ALLOC_242, + NL80211_RATE_INFO_HE_RU_ALLOC_484, + NL80211_RATE_INFO_HE_RU_ALLOC_996, + NL80211_RATE_INFO_HE_RU_ALLOC_2x996, +}; + +/** + * enum nl80211_eht_gi - EHT guard interval + * @NL80211_RATE_INFO_EHT_GI_0_8: 0.8 usec + * @NL80211_RATE_INFO_EHT_GI_1_6: 1.6 usec + * @NL80211_RATE_INFO_EHT_GI_3_2: 3.2 usec + */ +enum nl80211_eht_gi { + NL80211_RATE_INFO_EHT_GI_0_8, + NL80211_RATE_INFO_EHT_GI_1_6, + NL80211_RATE_INFO_EHT_GI_3_2, +}; + +/** + * enum nl80211_eht_ru_alloc - EHT RU allocation values + * @NL80211_RATE_INFO_EHT_RU_ALLOC_26: 26-tone RU allocation + * @NL80211_RATE_INFO_EHT_RU_ALLOC_52: 52-tone RU allocation + * @NL80211_RATE_INFO_EHT_RU_ALLOC_52P26: 52+26-tone RU allocation + * @NL80211_RATE_INFO_EHT_RU_ALLOC_106: 106-tone RU allocation + * @NL80211_RATE_INFO_EHT_RU_ALLOC_106P26: 106+26 tone RU allocation + * @NL80211_RATE_INFO_EHT_RU_ALLOC_242: 242-tone RU allocation + * @NL80211_RATE_INFO_EHT_RU_ALLOC_484: 484-tone RU allocation + * @NL80211_RATE_INFO_EHT_RU_ALLOC_484P242: 484+242 tone RU allocation + * @NL80211_RATE_INFO_EHT_RU_ALLOC_996: 996-tone RU allocation + * @NL80211_RATE_INFO_EHT_RU_ALLOC_996P484: 996+484 tone RU allocation + * @NL80211_RATE_INFO_EHT_RU_ALLOC_996P484P242: 996+484+242 tone RU allocation + * @NL80211_RATE_INFO_EHT_RU_ALLOC_2x996: 2x996-tone RU allocation + * @NL80211_RATE_INFO_EHT_RU_ALLOC_2x996P484: 2x996+484 tone RU allocation + * @NL80211_RATE_INFO_EHT_RU_ALLOC_3x996: 3x996-tone RU allocation + * @NL80211_RATE_INFO_EHT_RU_ALLOC_3x996P484: 3x996+484 tone RU allocation + * @NL80211_RATE_INFO_EHT_RU_ALLOC_4x996: 4x996-tone RU allocation + */ +enum nl80211_eht_ru_alloc { + NL80211_RATE_INFO_EHT_RU_ALLOC_26, + NL80211_RATE_INFO_EHT_RU_ALLOC_52, + NL80211_RATE_INFO_EHT_RU_ALLOC_52P26, + NL80211_RATE_INFO_EHT_RU_ALLOC_106, + NL80211_RATE_INFO_EHT_RU_ALLOC_106P26, + NL80211_RATE_INFO_EHT_RU_ALLOC_242, + NL80211_RATE_INFO_EHT_RU_ALLOC_484, + NL80211_RATE_INFO_EHT_RU_ALLOC_484P242, + NL80211_RATE_INFO_EHT_RU_ALLOC_996, + NL80211_RATE_INFO_EHT_RU_ALLOC_996P484, + NL80211_RATE_INFO_EHT_RU_ALLOC_996P484P242, + NL80211_RATE_INFO_EHT_RU_ALLOC_2x996, + NL80211_RATE_INFO_EHT_RU_ALLOC_2x996P484, + NL80211_RATE_INFO_EHT_RU_ALLOC_3x996, + NL80211_RATE_INFO_EHT_RU_ALLOC_3x996P484, + NL80211_RATE_INFO_EHT_RU_ALLOC_4x996, +}; + +/** + * enum nl80211_rate_info - bitrate information + * + * These attribute types are used with %NL80211_STA_INFO_TXRATE + * when getting information about the bitrate of a station. + * There are 2 attributes for bitrate, a legacy one that represents + * a 16-bit value, and new one that represents a 32-bit value. + * If the rate value fits into 16 bit, both attributes are reported + * with the same value. If the rate is too high to fit into 16 bits + * (>6.5535Gbps) only 32-bit attribute is included. + * User space tools encouraged to use the 32-bit attribute and fall + * back to the 16-bit one for compatibility with older kernels. + * + * @__NL80211_RATE_INFO_INVALID: attribute number 0 is reserved + * @NL80211_RATE_INFO_BITRATE: total bitrate (u16, 100kbit/s) + * @NL80211_RATE_INFO_MCS: mcs index for 802.11n (u8) + * @NL80211_RATE_INFO_40_MHZ_WIDTH: 40 MHz dualchannel bitrate + * @NL80211_RATE_INFO_SHORT_GI: 400ns guard interval + * @NL80211_RATE_INFO_BITRATE32: total bitrate (u32, 100kbit/s) + * @NL80211_RATE_INFO_MAX: highest rate_info number currently defined + * @NL80211_RATE_INFO_VHT_MCS: MCS index for VHT (u8) + * @NL80211_RATE_INFO_VHT_NSS: number of streams in VHT (u8) + * @NL80211_RATE_INFO_80_MHZ_WIDTH: 80 MHz VHT rate + * @NL80211_RATE_INFO_80P80_MHZ_WIDTH: unused - 80+80 is treated the + * same as 160 for purposes of the bitrates + * @NL80211_RATE_INFO_160_MHZ_WIDTH: 160 MHz VHT rate + * @NL80211_RATE_INFO_10_MHZ_WIDTH: 10 MHz width - note that this is + * a legacy rate and will be reported as the actual bitrate, i.e. + * half the base (20 MHz) rate + * @NL80211_RATE_INFO_5_MHZ_WIDTH: 5 MHz width - note that this is + * a legacy rate and will be reported as the actual bitrate, i.e. + * a quarter of the base (20 MHz) rate + * @NL80211_RATE_INFO_HE_MCS: HE MCS index (u8, 0-11) + * @NL80211_RATE_INFO_HE_NSS: HE NSS value (u8, 1-8) + * @NL80211_RATE_INFO_HE_GI: HE guard interval identifier + * (u8, see &enum nl80211_he_gi) + * @NL80211_RATE_INFO_HE_DCM: HE DCM value (u8, 0/1) + * @NL80211_RATE_INFO_RU_ALLOC: HE RU allocation, if not present then + * non-OFDMA was used (u8, see &enum nl80211_he_ru_alloc) + * @NL80211_RATE_INFO_320_MHZ_WIDTH: 320 MHz bitrate + * @NL80211_RATE_INFO_EHT_MCS: EHT MCS index (u8, 0-15) + * @NL80211_RATE_INFO_EHT_NSS: EHT NSS value (u8, 1-8) + * @NL80211_RATE_INFO_EHT_GI: EHT guard interval identifier + * (u8, see &enum nl80211_eht_gi) + * @NL80211_RATE_INFO_EHT_RU_ALLOC: EHT RU allocation, if not present then + * non-OFDMA was used (u8, see &enum nl80211_eht_ru_alloc) + * @__NL80211_RATE_INFO_AFTER_LAST: internal use + */ +enum nl80211_rate_info { + __NL80211_RATE_INFO_INVALID, + NL80211_RATE_INFO_BITRATE, + NL80211_RATE_INFO_MCS, + NL80211_RATE_INFO_40_MHZ_WIDTH, + NL80211_RATE_INFO_SHORT_GI, + NL80211_RATE_INFO_BITRATE32, + NL80211_RATE_INFO_VHT_MCS, + NL80211_RATE_INFO_VHT_NSS, + NL80211_RATE_INFO_80_MHZ_WIDTH, + NL80211_RATE_INFO_80P80_MHZ_WIDTH, + NL80211_RATE_INFO_160_MHZ_WIDTH, + NL80211_RATE_INFO_10_MHZ_WIDTH, + NL80211_RATE_INFO_5_MHZ_WIDTH, + NL80211_RATE_INFO_HE_MCS, + NL80211_RATE_INFO_HE_NSS, + NL80211_RATE_INFO_HE_GI, + NL80211_RATE_INFO_HE_DCM, + NL80211_RATE_INFO_HE_RU_ALLOC, + NL80211_RATE_INFO_320_MHZ_WIDTH, + NL80211_RATE_INFO_EHT_MCS, + NL80211_RATE_INFO_EHT_NSS, + NL80211_RATE_INFO_EHT_GI, + NL80211_RATE_INFO_EHT_RU_ALLOC, + + /* keep last */ + __NL80211_RATE_INFO_AFTER_LAST, + NL80211_RATE_INFO_MAX = __NL80211_RATE_INFO_AFTER_LAST - 1 +}; + +/** + * enum nl80211_sta_bss_param - BSS information collected by STA + * + * These attribute types are used with %NL80211_STA_INFO_BSS_PARAM + * when getting information about the bitrate of a station. + * + * @__NL80211_STA_BSS_PARAM_INVALID: attribute number 0 is reserved + * @NL80211_STA_BSS_PARAM_CTS_PROT: whether CTS protection is enabled (flag) + * @NL80211_STA_BSS_PARAM_SHORT_PREAMBLE: whether short preamble is enabled + * (flag) + * @NL80211_STA_BSS_PARAM_SHORT_SLOT_TIME: whether short slot time is enabled + * (flag) + * @NL80211_STA_BSS_PARAM_DTIM_PERIOD: DTIM period for beaconing (u8) + * @NL80211_STA_BSS_PARAM_BEACON_INTERVAL: Beacon interval (u16) + * @NL80211_STA_BSS_PARAM_MAX: highest sta_bss_param number currently defined + * @__NL80211_STA_BSS_PARAM_AFTER_LAST: internal use + */ +enum nl80211_sta_bss_param { + __NL80211_STA_BSS_PARAM_INVALID, + NL80211_STA_BSS_PARAM_CTS_PROT, + NL80211_STA_BSS_PARAM_SHORT_PREAMBLE, + NL80211_STA_BSS_PARAM_SHORT_SLOT_TIME, + NL80211_STA_BSS_PARAM_DTIM_PERIOD, + NL80211_STA_BSS_PARAM_BEACON_INTERVAL, + + /* keep last */ + __NL80211_STA_BSS_PARAM_AFTER_LAST, + NL80211_STA_BSS_PARAM_MAX = __NL80211_STA_BSS_PARAM_AFTER_LAST - 1 +}; + +/** + * enum nl80211_sta_info - station information + * + * These attribute types are used with %NL80211_ATTR_STA_INFO + * when getting information about a station. + * + * @__NL80211_STA_INFO_INVALID: attribute number 0 is reserved + * @NL80211_STA_INFO_INACTIVE_TIME: time since last activity (u32, msecs) + * @NL80211_STA_INFO_RX_BYTES: total received bytes (MPDU length) + * (u32, from this station) + * @NL80211_STA_INFO_TX_BYTES: total transmitted bytes (MPDU length) + * (u32, to this station) + * @NL80211_STA_INFO_RX_BYTES64: total received bytes (MPDU length) + * (u64, from this station) + * @NL80211_STA_INFO_TX_BYTES64: total transmitted bytes (MPDU length) + * (u64, to this station) + * @NL80211_STA_INFO_SIGNAL: signal strength of last received PPDU (u8, dBm) + * @NL80211_STA_INFO_TX_BITRATE: current unicast tx rate, nested attribute + * containing info as possible, see &enum nl80211_rate_info + * @NL80211_STA_INFO_RX_PACKETS: total received packet (MSDUs and MMPDUs) + * (u32, from this station) + * @NL80211_STA_INFO_TX_PACKETS: total transmitted packets (MSDUs and MMPDUs) + * (u32, to this station) + * @NL80211_STA_INFO_TX_RETRIES: total retries (MPDUs) (u32, to this station) + * @NL80211_STA_INFO_TX_FAILED: total failed packets (MPDUs) + * (u32, to this station) + * @NL80211_STA_INFO_SIGNAL_AVG: signal strength average (u8, dBm) + * @NL80211_STA_INFO_LLID: the station's mesh LLID + * @NL80211_STA_INFO_PLID: the station's mesh PLID + * @NL80211_STA_INFO_PLINK_STATE: peer link state for the station + * (see %enum nl80211_plink_state) + * @NL80211_STA_INFO_RX_BITRATE: last unicast data frame rx rate, nested + * attribute, like NL80211_STA_INFO_TX_BITRATE. + * @NL80211_STA_INFO_BSS_PARAM: current station's view of BSS, nested attribute + * containing info as possible, see &enum nl80211_sta_bss_param + * @NL80211_STA_INFO_CONNECTED_TIME: time since the station is last connected + * @NL80211_STA_INFO_STA_FLAGS: Contains a struct nl80211_sta_flag_update. + * @NL80211_STA_INFO_BEACON_LOSS: count of times beacon loss was detected (u32) + * @NL80211_STA_INFO_T_OFFSET: timing offset with respect to this STA (s64) + * @NL80211_STA_INFO_LOCAL_PM: local mesh STA link-specific power mode + * @NL80211_STA_INFO_PEER_PM: peer mesh STA link-specific power mode + * @NL80211_STA_INFO_NONPEER_PM: neighbor mesh STA power save mode towards + * non-peer STA + * @NL80211_STA_INFO_CHAIN_SIGNAL: per-chain signal strength of last PPDU + * Contains a nested array of signal strength attributes (u8, dBm) + * @NL80211_STA_INFO_CHAIN_SIGNAL_AVG: per-chain signal strength average + * Same format as NL80211_STA_INFO_CHAIN_SIGNAL. + * @NL80211_STA_EXPECTED_THROUGHPUT: expected throughput considering also the + * 802.11 header (u32, kbps) + * @NL80211_STA_INFO_RX_DROP_MISC: RX packets dropped for unspecified reasons + * (u64) + * @NL80211_STA_INFO_BEACON_RX: number of beacons received from this peer (u64) + * @NL80211_STA_INFO_BEACON_SIGNAL_AVG: signal strength average + * for beacons only (u8, dBm) + * @NL80211_STA_INFO_TID_STATS: per-TID statistics (see &enum nl80211_tid_stats) + * This is a nested attribute where each the inner attribute number is the + * TID+1 and the special TID 16 (i.e. value 17) is used for non-QoS frames; + * each one of those is again nested with &enum nl80211_tid_stats + * attributes carrying the actual values. + * @NL80211_STA_INFO_RX_DURATION: aggregate PPDU duration for all frames + * received from the station (u64, usec) + * @NL80211_STA_INFO_PAD: attribute used for padding for 64-bit alignment + * @NL80211_STA_INFO_ACK_SIGNAL: signal strength of the last ACK frame(u8, dBm) + * @NL80211_STA_INFO_ACK_SIGNAL_AVG: avg signal strength of ACK frames (s8, dBm) + * @NL80211_STA_INFO_RX_MPDUS: total number of received packets (MPDUs) + * (u32, from this station) + * @NL80211_STA_INFO_FCS_ERROR_COUNT: total number of packets (MPDUs) received + * with an FCS error (u32, from this station). This count may not include + * some packets with an FCS error due to TA corruption. Hence this counter + * might not be fully accurate. + * @NL80211_STA_INFO_CONNECTED_TO_GATE: set to true if STA has a path to a + * mesh gate (u8, 0 or 1) + * @NL80211_STA_INFO_TX_DURATION: aggregate PPDU duration for all frames + * sent to the station (u64, usec) + * @NL80211_STA_INFO_AIRTIME_WEIGHT: current airtime weight for station (u16) + * @NL80211_STA_INFO_AIRTIME_LINK_METRIC: airtime link metric for mesh station + * @NL80211_STA_INFO_ASSOC_AT_BOOTTIME: Timestamp (CLOCK_BOOTTIME, nanoseconds) + * of STA's association + * @NL80211_STA_INFO_CONNECTED_TO_AS: set to true if STA has a path to a + * authentication server (u8, 0 or 1) + * @__NL80211_STA_INFO_AFTER_LAST: internal + * @NL80211_STA_INFO_MAX: highest possible station info attribute + */ +enum nl80211_sta_info { + __NL80211_STA_INFO_INVALID, + NL80211_STA_INFO_INACTIVE_TIME, + NL80211_STA_INFO_RX_BYTES, + NL80211_STA_INFO_TX_BYTES, + NL80211_STA_INFO_LLID, + NL80211_STA_INFO_PLID, + NL80211_STA_INFO_PLINK_STATE, + NL80211_STA_INFO_SIGNAL, + NL80211_STA_INFO_TX_BITRATE, + NL80211_STA_INFO_RX_PACKETS, + NL80211_STA_INFO_TX_PACKETS, + NL80211_STA_INFO_TX_RETRIES, + NL80211_STA_INFO_TX_FAILED, + NL80211_STA_INFO_SIGNAL_AVG, + NL80211_STA_INFO_RX_BITRATE, + NL80211_STA_INFO_BSS_PARAM, + NL80211_STA_INFO_CONNECTED_TIME, + NL80211_STA_INFO_STA_FLAGS, + NL80211_STA_INFO_BEACON_LOSS, + NL80211_STA_INFO_T_OFFSET, + NL80211_STA_INFO_LOCAL_PM, + NL80211_STA_INFO_PEER_PM, + NL80211_STA_INFO_NONPEER_PM, + NL80211_STA_INFO_RX_BYTES64, + NL80211_STA_INFO_TX_BYTES64, + NL80211_STA_INFO_CHAIN_SIGNAL, + NL80211_STA_INFO_CHAIN_SIGNAL_AVG, + NL80211_STA_INFO_EXPECTED_THROUGHPUT, + NL80211_STA_INFO_RX_DROP_MISC, + NL80211_STA_INFO_BEACON_RX, + NL80211_STA_INFO_BEACON_SIGNAL_AVG, + NL80211_STA_INFO_TID_STATS, + NL80211_STA_INFO_RX_DURATION, + NL80211_STA_INFO_PAD, + NL80211_STA_INFO_ACK_SIGNAL, + NL80211_STA_INFO_ACK_SIGNAL_AVG, + NL80211_STA_INFO_RX_MPDUS, + NL80211_STA_INFO_FCS_ERROR_COUNT, + NL80211_STA_INFO_CONNECTED_TO_GATE, + NL80211_STA_INFO_TX_DURATION, + NL80211_STA_INFO_AIRTIME_WEIGHT, + NL80211_STA_INFO_AIRTIME_LINK_METRIC, + NL80211_STA_INFO_ASSOC_AT_BOOTTIME, + NL80211_STA_INFO_CONNECTED_TO_AS, + + /* keep last */ + __NL80211_STA_INFO_AFTER_LAST, + NL80211_STA_INFO_MAX = __NL80211_STA_INFO_AFTER_LAST - 1 +}; + +/* we renamed this - stay compatible */ +#define NL80211_STA_INFO_DATA_ACK_SIGNAL_AVG NL80211_STA_INFO_ACK_SIGNAL_AVG + + +/** + * enum nl80211_tid_stats - per TID statistics attributes + * @__NL80211_TID_STATS_INVALID: attribute number 0 is reserved + * @NL80211_TID_STATS_RX_MSDU: number of MSDUs received (u64) + * @NL80211_TID_STATS_TX_MSDU: number of MSDUs transmitted (or + * attempted to transmit; u64) + * @NL80211_TID_STATS_TX_MSDU_RETRIES: number of retries for + * transmitted MSDUs (not counting the first attempt; u64) + * @NL80211_TID_STATS_TX_MSDU_FAILED: number of failed transmitted + * MSDUs (u64) + * @NL80211_TID_STATS_PAD: attribute used for padding for 64-bit alignment + * @NL80211_TID_STATS_TXQ_STATS: TXQ stats (nested attribute) + * @NUM_NL80211_TID_STATS: number of attributes here + * @NL80211_TID_STATS_MAX: highest numbered attribute here + */ +enum nl80211_tid_stats { + __NL80211_TID_STATS_INVALID, + NL80211_TID_STATS_RX_MSDU, + NL80211_TID_STATS_TX_MSDU, + NL80211_TID_STATS_TX_MSDU_RETRIES, + NL80211_TID_STATS_TX_MSDU_FAILED, + NL80211_TID_STATS_PAD, + NL80211_TID_STATS_TXQ_STATS, + + /* keep last */ + NUM_NL80211_TID_STATS, + NL80211_TID_STATS_MAX = NUM_NL80211_TID_STATS - 1 +}; + +/** + * enum nl80211_txq_stats - per TXQ statistics attributes + * @__NL80211_TXQ_STATS_INVALID: attribute number 0 is reserved + * @NUM_NL80211_TXQ_STATS: number of attributes here + * @NL80211_TXQ_STATS_BACKLOG_BYTES: number of bytes currently backlogged + * @NL80211_TXQ_STATS_BACKLOG_PACKETS: number of packets currently + * backlogged + * @NL80211_TXQ_STATS_FLOWS: total number of new flows seen + * @NL80211_TXQ_STATS_DROPS: total number of packet drops + * @NL80211_TXQ_STATS_ECN_MARKS: total number of packet ECN marks + * @NL80211_TXQ_STATS_OVERLIMIT: number of drops due to queue space overflow + * @NL80211_TXQ_STATS_OVERMEMORY: number of drops due to memory limit overflow + * (only for per-phy stats) + * @NL80211_TXQ_STATS_COLLISIONS: number of hash collisions + * @NL80211_TXQ_STATS_TX_BYTES: total number of bytes dequeued from TXQ + * @NL80211_TXQ_STATS_TX_PACKETS: total number of packets dequeued from TXQ + * @NL80211_TXQ_STATS_MAX_FLOWS: number of flow buckets for PHY + * @NL80211_TXQ_STATS_MAX: highest numbered attribute here + */ +enum nl80211_txq_stats { + __NL80211_TXQ_STATS_INVALID, + NL80211_TXQ_STATS_BACKLOG_BYTES, + NL80211_TXQ_STATS_BACKLOG_PACKETS, + NL80211_TXQ_STATS_FLOWS, + NL80211_TXQ_STATS_DROPS, + NL80211_TXQ_STATS_ECN_MARKS, + NL80211_TXQ_STATS_OVERLIMIT, + NL80211_TXQ_STATS_OVERMEMORY, + NL80211_TXQ_STATS_COLLISIONS, + NL80211_TXQ_STATS_TX_BYTES, + NL80211_TXQ_STATS_TX_PACKETS, + NL80211_TXQ_STATS_MAX_FLOWS, + + /* keep last */ + NUM_NL80211_TXQ_STATS, + NL80211_TXQ_STATS_MAX = NUM_NL80211_TXQ_STATS - 1 +}; + +/** + * enum nl80211_mpath_flags - nl80211 mesh path flags + * + * @NL80211_MPATH_FLAG_ACTIVE: the mesh path is active + * @NL80211_MPATH_FLAG_RESOLVING: the mesh path discovery process is running + * @NL80211_MPATH_FLAG_SN_VALID: the mesh path contains a valid SN + * @NL80211_MPATH_FLAG_FIXED: the mesh path has been manually set + * @NL80211_MPATH_FLAG_RESOLVED: the mesh path discovery process succeeded + */ +enum nl80211_mpath_flags { + NL80211_MPATH_FLAG_ACTIVE = 1<<0, + NL80211_MPATH_FLAG_RESOLVING = 1<<1, + NL80211_MPATH_FLAG_SN_VALID = 1<<2, + NL80211_MPATH_FLAG_FIXED = 1<<3, + NL80211_MPATH_FLAG_RESOLVED = 1<<4, +}; + +/** + * enum nl80211_mpath_info - mesh path information + * + * These attribute types are used with %NL80211_ATTR_MPATH_INFO when getting + * information about a mesh path. + * + * @__NL80211_MPATH_INFO_INVALID: attribute number 0 is reserved + * @NL80211_MPATH_INFO_FRAME_QLEN: number of queued frames for this destination + * @NL80211_MPATH_INFO_SN: destination sequence number + * @NL80211_MPATH_INFO_METRIC: metric (cost) of this mesh path + * @NL80211_MPATH_INFO_EXPTIME: expiration time for the path, in msec from now + * @NL80211_MPATH_INFO_FLAGS: mesh path flags, enumerated in + * &enum nl80211_mpath_flags; + * @NL80211_MPATH_INFO_DISCOVERY_TIMEOUT: total path discovery timeout, in msec + * @NL80211_MPATH_INFO_DISCOVERY_RETRIES: mesh path discovery retries + * @NL80211_MPATH_INFO_HOP_COUNT: hop count to destination + * @NL80211_MPATH_INFO_PATH_CHANGE: total number of path changes to destination + * @NL80211_MPATH_INFO_MAX: highest mesh path information attribute number + * currently defined + * @__NL80211_MPATH_INFO_AFTER_LAST: internal use + */ +enum nl80211_mpath_info { + __NL80211_MPATH_INFO_INVALID, + NL80211_MPATH_INFO_FRAME_QLEN, + NL80211_MPATH_INFO_SN, + NL80211_MPATH_INFO_METRIC, + NL80211_MPATH_INFO_EXPTIME, + NL80211_MPATH_INFO_FLAGS, + NL80211_MPATH_INFO_DISCOVERY_TIMEOUT, + NL80211_MPATH_INFO_DISCOVERY_RETRIES, + NL80211_MPATH_INFO_HOP_COUNT, + NL80211_MPATH_INFO_PATH_CHANGE, + + /* keep last */ + __NL80211_MPATH_INFO_AFTER_LAST, + NL80211_MPATH_INFO_MAX = __NL80211_MPATH_INFO_AFTER_LAST - 1 +}; + +/** + * enum nl80211_band_iftype_attr - Interface type data attributes + * + * @__NL80211_BAND_IFTYPE_ATTR_INVALID: attribute number 0 is reserved + * @NL80211_BAND_IFTYPE_ATTR_IFTYPES: nested attribute containing a flag attribute + * for each interface type that supports the band data + * @NL80211_BAND_IFTYPE_ATTR_HE_CAP_MAC: HE MAC capabilities as in HE + * capabilities IE + * @NL80211_BAND_IFTYPE_ATTR_HE_CAP_PHY: HE PHY capabilities as in HE + * capabilities IE + * @NL80211_BAND_IFTYPE_ATTR_HE_CAP_MCS_SET: HE supported NSS/MCS as in HE + * capabilities IE + * @NL80211_BAND_IFTYPE_ATTR_HE_CAP_PPE: HE PPE thresholds information as + * defined in HE capabilities IE + * @NL80211_BAND_IFTYPE_ATTR_HE_6GHZ_CAPA: HE 6GHz band capabilities (__le16), + * given for all 6 GHz band channels + * @NL80211_BAND_IFTYPE_ATTR_VENDOR_ELEMS: vendor element capabilities that are + * advertised on this band/for this iftype (binary) + * @NL80211_BAND_IFTYPE_ATTR_EHT_CAP_MAC: EHT MAC capabilities as in EHT + * capabilities element + * @NL80211_BAND_IFTYPE_ATTR_EHT_CAP_PHY: EHT PHY capabilities as in EHT + * capabilities element + * @NL80211_BAND_IFTYPE_ATTR_EHT_CAP_MCS_SET: EHT supported NSS/MCS as in EHT + * capabilities element + * @NL80211_BAND_IFTYPE_ATTR_EHT_CAP_PPE: EHT PPE thresholds information as + * defined in EHT capabilities element + * @__NL80211_BAND_IFTYPE_ATTR_AFTER_LAST: internal use + * @NL80211_BAND_IFTYPE_ATTR_MAX: highest band attribute currently defined + */ +enum nl80211_band_iftype_attr { + __NL80211_BAND_IFTYPE_ATTR_INVALID, + + NL80211_BAND_IFTYPE_ATTR_IFTYPES, + NL80211_BAND_IFTYPE_ATTR_HE_CAP_MAC, + NL80211_BAND_IFTYPE_ATTR_HE_CAP_PHY, + NL80211_BAND_IFTYPE_ATTR_HE_CAP_MCS_SET, + NL80211_BAND_IFTYPE_ATTR_HE_CAP_PPE, + NL80211_BAND_IFTYPE_ATTR_HE_6GHZ_CAPA, + NL80211_BAND_IFTYPE_ATTR_VENDOR_ELEMS, + NL80211_BAND_IFTYPE_ATTR_EHT_CAP_MAC, + NL80211_BAND_IFTYPE_ATTR_EHT_CAP_PHY, + NL80211_BAND_IFTYPE_ATTR_EHT_CAP_MCS_SET, + NL80211_BAND_IFTYPE_ATTR_EHT_CAP_PPE, + + /* keep last */ + __NL80211_BAND_IFTYPE_ATTR_AFTER_LAST, + NL80211_BAND_IFTYPE_ATTR_MAX = __NL80211_BAND_IFTYPE_ATTR_AFTER_LAST - 1 +}; + +/** + * enum nl80211_band_attr - band attributes + * @__NL80211_BAND_ATTR_INVALID: attribute number 0 is reserved + * @NL80211_BAND_ATTR_FREQS: supported frequencies in this band, + * an array of nested frequency attributes + * @NL80211_BAND_ATTR_RATES: supported bitrates in this band, + * an array of nested bitrate attributes + * @NL80211_BAND_ATTR_HT_MCS_SET: 16-byte attribute containing the MCS set as + * defined in 802.11n + * @NL80211_BAND_ATTR_HT_CAPA: HT capabilities, as in the HT information IE + * @NL80211_BAND_ATTR_HT_AMPDU_FACTOR: A-MPDU factor, as in 11n + * @NL80211_BAND_ATTR_HT_AMPDU_DENSITY: A-MPDU density, as in 11n + * @NL80211_BAND_ATTR_VHT_MCS_SET: 32-byte attribute containing the MCS set as + * defined in 802.11ac + * @NL80211_BAND_ATTR_VHT_CAPA: VHT capabilities, as in the HT information IE + * @NL80211_BAND_ATTR_IFTYPE_DATA: nested array attribute, with each entry using + * attributes from &enum nl80211_band_iftype_attr + * @NL80211_BAND_ATTR_EDMG_CHANNELS: bitmap that indicates the 2.16 GHz + * channel(s) that are allowed to be used for EDMG transmissions. + * Defined by IEEE P802.11ay/D4.0 section 9.4.2.251. + * @NL80211_BAND_ATTR_EDMG_BW_CONFIG: Channel BW Configuration subfield encodes + * the allowed channel bandwidth configurations. + * Defined by IEEE P802.11ay/D4.0 section 9.4.2.251, Table 13. + * @NL80211_BAND_ATTR_MAX: highest band attribute currently defined + * @__NL80211_BAND_ATTR_AFTER_LAST: internal use + */ +enum nl80211_band_attr { + __NL80211_BAND_ATTR_INVALID, + NL80211_BAND_ATTR_FREQS, + NL80211_BAND_ATTR_RATES, + + NL80211_BAND_ATTR_HT_MCS_SET, + NL80211_BAND_ATTR_HT_CAPA, + NL80211_BAND_ATTR_HT_AMPDU_FACTOR, + NL80211_BAND_ATTR_HT_AMPDU_DENSITY, + + NL80211_BAND_ATTR_VHT_MCS_SET, + NL80211_BAND_ATTR_VHT_CAPA, + NL80211_BAND_ATTR_IFTYPE_DATA, + + NL80211_BAND_ATTR_EDMG_CHANNELS, + NL80211_BAND_ATTR_EDMG_BW_CONFIG, + + /* keep last */ + __NL80211_BAND_ATTR_AFTER_LAST, + NL80211_BAND_ATTR_MAX = __NL80211_BAND_ATTR_AFTER_LAST - 1 +}; + +#define NL80211_BAND_ATTR_HT_CAPA NL80211_BAND_ATTR_HT_CAPA + +/** + * enum nl80211_wmm_rule - regulatory wmm rule + * + * @__NL80211_WMMR_INVALID: attribute number 0 is reserved + * @NL80211_WMMR_CW_MIN: Minimum contention window slot. + * @NL80211_WMMR_CW_MAX: Maximum contention window slot. + * @NL80211_WMMR_AIFSN: Arbitration Inter Frame Space. + * @NL80211_WMMR_TXOP: Maximum allowed tx operation time. + * @nl80211_WMMR_MAX: highest possible wmm rule. + * @__NL80211_WMMR_LAST: Internal use. + */ +enum nl80211_wmm_rule { + __NL80211_WMMR_INVALID, + NL80211_WMMR_CW_MIN, + NL80211_WMMR_CW_MAX, + NL80211_WMMR_AIFSN, + NL80211_WMMR_TXOP, + + /* keep last */ + __NL80211_WMMR_LAST, + NL80211_WMMR_MAX = __NL80211_WMMR_LAST - 1 +}; + +/** + * enum nl80211_frequency_attr - frequency attributes + * @__NL80211_FREQUENCY_ATTR_INVALID: attribute number 0 is reserved + * @NL80211_FREQUENCY_ATTR_FREQ: Frequency in MHz + * @NL80211_FREQUENCY_ATTR_DISABLED: Channel is disabled in current + * regulatory domain. + * @NL80211_FREQUENCY_ATTR_NO_IR: no mechanisms that initiate radiation + * are permitted on this channel, this includes sending probe + * requests, or modes of operation that require beaconing. + * @NL80211_FREQUENCY_ATTR_RADAR: Radar detection is mandatory + * on this channel in current regulatory domain. + * @NL80211_FREQUENCY_ATTR_MAX_TX_POWER: Maximum transmission power in mBm + * (100 * dBm). + * @NL80211_FREQUENCY_ATTR_DFS_STATE: current state for DFS + * (enum nl80211_dfs_state) + * @NL80211_FREQUENCY_ATTR_DFS_TIME: time in miliseconds for how long + * this channel is in this DFS state. + * @NL80211_FREQUENCY_ATTR_NO_HT40_MINUS: HT40- isn't possible with this + * channel as the control channel + * @NL80211_FREQUENCY_ATTR_NO_HT40_PLUS: HT40+ isn't possible with this + * channel as the control channel + * @NL80211_FREQUENCY_ATTR_NO_80MHZ: any 80 MHz channel using this channel + * as the primary or any of the secondary channels isn't possible, + * this includes 80+80 channels + * @NL80211_FREQUENCY_ATTR_NO_160MHZ: any 160 MHz (but not 80+80) channel + * using this channel as the primary or any of the secondary channels + * isn't possible + * @NL80211_FREQUENCY_ATTR_DFS_CAC_TIME: DFS CAC time in milliseconds. + * @NL80211_FREQUENCY_ATTR_INDOOR_ONLY: Only indoor use is permitted on this + * channel. A channel that has the INDOOR_ONLY attribute can only be + * used when there is a clear assessment that the device is operating in + * an indoor surroundings, i.e., it is connected to AC power (and not + * through portable DC inverters) or is under the control of a master + * that is acting as an AP and is connected to AC power. + * @NL80211_FREQUENCY_ATTR_IR_CONCURRENT: IR operation is allowed on this + * channel if it's connected concurrently to a BSS on the same channel on + * the 2 GHz band or to a channel in the same UNII band (on the 5 GHz + * band), and IEEE80211_CHAN_RADAR is not set. Instantiating a GO or TDLS + * off-channel on a channel that has the IR_CONCURRENT attribute set can be + * done when there is a clear assessment that the device is operating under + * the guidance of an authorized master, i.e., setting up a GO or TDLS + * off-channel while the device is also connected to an AP with DFS and + * radar detection on the UNII band (it is up to user-space, i.e., + * wpa_supplicant to perform the required verifications). Using this + * attribute for IR is disallowed for master interfaces (IBSS, AP). + * @NL80211_FREQUENCY_ATTR_NO_20MHZ: 20 MHz operation is not allowed + * on this channel in current regulatory domain. + * @NL80211_FREQUENCY_ATTR_NO_10MHZ: 10 MHz operation is not allowed + * on this channel in current regulatory domain. + * @NL80211_FREQUENCY_ATTR_WMM: this channel has wmm limitations. + * This is a nested attribute that contains the wmm limitation per AC. + * (see &enum nl80211_wmm_rule) + * @NL80211_FREQUENCY_ATTR_NO_HE: HE operation is not allowed on this channel + * in current regulatory domain. + * @NL80211_FREQUENCY_ATTR_OFFSET: frequency offset in KHz + * @NL80211_FREQUENCY_ATTR_1MHZ: 1 MHz operation is allowed + * on this channel in current regulatory domain. + * @NL80211_FREQUENCY_ATTR_2MHZ: 2 MHz operation is allowed + * on this channel in current regulatory domain. + * @NL80211_FREQUENCY_ATTR_4MHZ: 4 MHz operation is allowed + * on this channel in current regulatory domain. + * @NL80211_FREQUENCY_ATTR_8MHZ: 8 MHz operation is allowed + * on this channel in current regulatory domain. + * @NL80211_FREQUENCY_ATTR_16MHZ: 16 MHz operation is allowed + * on this channel in current regulatory domain. + * @NL80211_FREQUENCY_ATTR_NO_320MHZ: any 320 MHz channel using this channel + * as the primary or any of the secondary channels isn't possible + * @NL80211_FREQUENCY_ATTR_NO_EHT: EHT operation is not allowed on this channel + * in current regulatory domain. + * @NL80211_FREQUENCY_ATTR_MAX: highest frequency attribute number + * currently defined + * @__NL80211_FREQUENCY_ATTR_AFTER_LAST: internal use + * + * See https://apps.fcc.gov/eas/comments/GetPublishedDocument.html?id=327&tn=528122 + * for more information on the FCC description of the relaxations allowed + * by NL80211_FREQUENCY_ATTR_INDOOR_ONLY and + * NL80211_FREQUENCY_ATTR_IR_CONCURRENT. + */ +enum nl80211_frequency_attr { + __NL80211_FREQUENCY_ATTR_INVALID, + NL80211_FREQUENCY_ATTR_FREQ, + NL80211_FREQUENCY_ATTR_DISABLED, + NL80211_FREQUENCY_ATTR_NO_IR, + __NL80211_FREQUENCY_ATTR_NO_IBSS, + NL80211_FREQUENCY_ATTR_RADAR, + NL80211_FREQUENCY_ATTR_MAX_TX_POWER, + NL80211_FREQUENCY_ATTR_DFS_STATE, + NL80211_FREQUENCY_ATTR_DFS_TIME, + NL80211_FREQUENCY_ATTR_NO_HT40_MINUS, + NL80211_FREQUENCY_ATTR_NO_HT40_PLUS, + NL80211_FREQUENCY_ATTR_NO_80MHZ, + NL80211_FREQUENCY_ATTR_NO_160MHZ, + NL80211_FREQUENCY_ATTR_DFS_CAC_TIME, + NL80211_FREQUENCY_ATTR_INDOOR_ONLY, + NL80211_FREQUENCY_ATTR_IR_CONCURRENT, + NL80211_FREQUENCY_ATTR_NO_20MHZ, + NL80211_FREQUENCY_ATTR_NO_10MHZ, + NL80211_FREQUENCY_ATTR_WMM, + NL80211_FREQUENCY_ATTR_NO_HE, + NL80211_FREQUENCY_ATTR_OFFSET, + NL80211_FREQUENCY_ATTR_1MHZ, + NL80211_FREQUENCY_ATTR_2MHZ, + NL80211_FREQUENCY_ATTR_4MHZ, + NL80211_FREQUENCY_ATTR_8MHZ, + NL80211_FREQUENCY_ATTR_16MHZ, + NL80211_FREQUENCY_ATTR_NO_320MHZ, + NL80211_FREQUENCY_ATTR_NO_EHT, + + /* keep last */ + __NL80211_FREQUENCY_ATTR_AFTER_LAST, + NL80211_FREQUENCY_ATTR_MAX = __NL80211_FREQUENCY_ATTR_AFTER_LAST - 1 +}; + +#define NL80211_FREQUENCY_ATTR_MAX_TX_POWER NL80211_FREQUENCY_ATTR_MAX_TX_POWER +#define NL80211_FREQUENCY_ATTR_PASSIVE_SCAN NL80211_FREQUENCY_ATTR_NO_IR +#define NL80211_FREQUENCY_ATTR_NO_IBSS NL80211_FREQUENCY_ATTR_NO_IR +#define NL80211_FREQUENCY_ATTR_NO_IR NL80211_FREQUENCY_ATTR_NO_IR +#define NL80211_FREQUENCY_ATTR_GO_CONCURRENT \ + NL80211_FREQUENCY_ATTR_IR_CONCURRENT + +/** + * enum nl80211_bitrate_attr - bitrate attributes + * @__NL80211_BITRATE_ATTR_INVALID: attribute number 0 is reserved + * @NL80211_BITRATE_ATTR_RATE: Bitrate in units of 100 kbps + * @NL80211_BITRATE_ATTR_2GHZ_SHORTPREAMBLE: Short preamble supported + * in 2.4 GHz band. + * @NL80211_BITRATE_ATTR_MAX: highest bitrate attribute number + * currently defined + * @__NL80211_BITRATE_ATTR_AFTER_LAST: internal use + */ +enum nl80211_bitrate_attr { + __NL80211_BITRATE_ATTR_INVALID, + NL80211_BITRATE_ATTR_RATE, + NL80211_BITRATE_ATTR_2GHZ_SHORTPREAMBLE, + + /* keep last */ + __NL80211_BITRATE_ATTR_AFTER_LAST, + NL80211_BITRATE_ATTR_MAX = __NL80211_BITRATE_ATTR_AFTER_LAST - 1 +}; + +/** + * enum nl80211_initiator - Indicates the initiator of a reg domain request + * @NL80211_REGDOM_SET_BY_CORE: Core queried CRDA for a dynamic world + * regulatory domain. + * @NL80211_REGDOM_SET_BY_USER: User asked the wireless core to set the + * regulatory domain. + * @NL80211_REGDOM_SET_BY_DRIVER: a wireless drivers has hinted to the + * wireless core it thinks its knows the regulatory domain we should be in. + * @NL80211_REGDOM_SET_BY_COUNTRY_IE: the wireless core has received an + * 802.11 country information element with regulatory information it + * thinks we should consider. cfg80211 only processes the country + * code from the IE, and relies on the regulatory domain information + * structure passed by userspace (CRDA) from our wireless-regdb. + * If a channel is enabled but the country code indicates it should + * be disabled we disable the channel and re-enable it upon disassociation. + */ +enum nl80211_reg_initiator { + NL80211_REGDOM_SET_BY_CORE, + NL80211_REGDOM_SET_BY_USER, + NL80211_REGDOM_SET_BY_DRIVER, + NL80211_REGDOM_SET_BY_COUNTRY_IE, +}; + +/** + * enum nl80211_reg_type - specifies the type of regulatory domain + * @NL80211_REGDOM_TYPE_COUNTRY: the regulatory domain set is one that pertains + * to a specific country. When this is set you can count on the + * ISO / IEC 3166 alpha2 country code being valid. + * @NL80211_REGDOM_TYPE_WORLD: the regulatory set domain is the world regulatory + * domain. + * @NL80211_REGDOM_TYPE_CUSTOM_WORLD: the regulatory domain set is a custom + * driver specific world regulatory domain. These do not apply system-wide + * and are only applicable to the individual devices which have requested + * them to be applied. + * @NL80211_REGDOM_TYPE_INTERSECTION: the regulatory domain set is the product + * of an intersection between two regulatory domains -- the previously + * set regulatory domain on the system and the last accepted regulatory + * domain request to be processed. + */ +enum nl80211_reg_type { + NL80211_REGDOM_TYPE_COUNTRY, + NL80211_REGDOM_TYPE_WORLD, + NL80211_REGDOM_TYPE_CUSTOM_WORLD, + NL80211_REGDOM_TYPE_INTERSECTION, +}; + +/** + * enum nl80211_reg_rule_attr - regulatory rule attributes + * @__NL80211_REG_RULE_ATTR_INVALID: attribute number 0 is reserved + * @NL80211_ATTR_REG_RULE_FLAGS: a set of flags which specify additional + * considerations for a given frequency range. These are the + * &enum nl80211_reg_rule_flags. + * @NL80211_ATTR_FREQ_RANGE_START: starting frequencry for the regulatory + * rule in KHz. This is not a center of frequency but an actual regulatory + * band edge. + * @NL80211_ATTR_FREQ_RANGE_END: ending frequency for the regulatory rule + * in KHz. This is not a center a frequency but an actual regulatory + * band edge. + * @NL80211_ATTR_FREQ_RANGE_MAX_BW: maximum allowed bandwidth for this + * frequency range, in KHz. + * @NL80211_ATTR_POWER_RULE_MAX_ANT_GAIN: the maximum allowed antenna gain + * for a given frequency range. The value is in mBi (100 * dBi). + * If you don't have one then don't send this. + * @NL80211_ATTR_POWER_RULE_MAX_EIRP: the maximum allowed EIRP for + * a given frequency range. The value is in mBm (100 * dBm). + * @NL80211_ATTR_DFS_CAC_TIME: DFS CAC time in milliseconds. + * If not present or 0 default CAC time will be used. + * @NL80211_REG_RULE_ATTR_MAX: highest regulatory rule attribute number + * currently defined + * @__NL80211_REG_RULE_ATTR_AFTER_LAST: internal use + */ +enum nl80211_reg_rule_attr { + __NL80211_REG_RULE_ATTR_INVALID, + NL80211_ATTR_REG_RULE_FLAGS, + + NL80211_ATTR_FREQ_RANGE_START, + NL80211_ATTR_FREQ_RANGE_END, + NL80211_ATTR_FREQ_RANGE_MAX_BW, + + NL80211_ATTR_POWER_RULE_MAX_ANT_GAIN, + NL80211_ATTR_POWER_RULE_MAX_EIRP, + + NL80211_ATTR_DFS_CAC_TIME, + + /* keep last */ + __NL80211_REG_RULE_ATTR_AFTER_LAST, + NL80211_REG_RULE_ATTR_MAX = __NL80211_REG_RULE_ATTR_AFTER_LAST - 1 +}; + +/** + * enum nl80211_sched_scan_match_attr - scheduled scan match attributes + * @__NL80211_SCHED_SCAN_MATCH_ATTR_INVALID: attribute number 0 is reserved + * @NL80211_SCHED_SCAN_MATCH_ATTR_SSID: SSID to be used for matching, + * only report BSS with matching SSID. + * (This cannot be used together with BSSID.) + * @NL80211_SCHED_SCAN_MATCH_ATTR_RSSI: RSSI threshold (in dBm) for reporting a + * BSS in scan results. Filtering is turned off if not specified. Note that + * if this attribute is in a match set of its own, then it is treated as + * the default value for all matchsets with an SSID, rather than being a + * matchset of its own without an RSSI filter. This is due to problems with + * how this API was implemented in the past. Also, due to the same problem, + * the only way to create a matchset with only an RSSI filter (with this + * attribute) is if there's only a single matchset with the RSSI attribute. + * @NL80211_SCHED_SCAN_MATCH_ATTR_RELATIVE_RSSI: Flag indicating whether + * %NL80211_SCHED_SCAN_MATCH_ATTR_RSSI to be used as absolute RSSI or + * relative to current bss's RSSI. + * @NL80211_SCHED_SCAN_MATCH_ATTR_RSSI_ADJUST: When present the RSSI level for + * BSS-es in the specified band is to be adjusted before doing + * RSSI-based BSS selection. The attribute value is a packed structure + * value as specified by &struct nl80211_bss_select_rssi_adjust. + * @NL80211_SCHED_SCAN_MATCH_ATTR_BSSID: BSSID to be used for matching + * (this cannot be used together with SSID). + * @NL80211_SCHED_SCAN_MATCH_PER_BAND_RSSI: Nested attribute that carries the + * band specific minimum rssi thresholds for the bands defined in + * enum nl80211_band. The minimum rssi threshold value(s32) specific to a + * band shall be encapsulated in attribute with type value equals to one + * of the NL80211_BAND_* defined in enum nl80211_band. For example, the + * minimum rssi threshold value for 2.4GHZ band shall be encapsulated + * within an attribute of type NL80211_BAND_2GHZ. And one or more of such + * attributes will be nested within this attribute. + * @NL80211_SCHED_SCAN_MATCH_ATTR_MAX: highest scheduled scan filter + * attribute number currently defined + * @__NL80211_SCHED_SCAN_MATCH_ATTR_AFTER_LAST: internal use + */ +enum nl80211_sched_scan_match_attr { + __NL80211_SCHED_SCAN_MATCH_ATTR_INVALID, + + NL80211_SCHED_SCAN_MATCH_ATTR_SSID, + NL80211_SCHED_SCAN_MATCH_ATTR_RSSI, + NL80211_SCHED_SCAN_MATCH_ATTR_RELATIVE_RSSI, + NL80211_SCHED_SCAN_MATCH_ATTR_RSSI_ADJUST, + NL80211_SCHED_SCAN_MATCH_ATTR_BSSID, + NL80211_SCHED_SCAN_MATCH_PER_BAND_RSSI, + + /* keep last */ + __NL80211_SCHED_SCAN_MATCH_ATTR_AFTER_LAST, + NL80211_SCHED_SCAN_MATCH_ATTR_MAX = + __NL80211_SCHED_SCAN_MATCH_ATTR_AFTER_LAST - 1 +}; + +/* only for backward compatibility */ +#define NL80211_ATTR_SCHED_SCAN_MATCH_SSID NL80211_SCHED_SCAN_MATCH_ATTR_SSID + +/** + * enum nl80211_reg_rule_flags - regulatory rule flags + * + * @NL80211_RRF_NO_OFDM: OFDM modulation not allowed + * @NL80211_RRF_NO_CCK: CCK modulation not allowed + * @NL80211_RRF_NO_INDOOR: indoor operation not allowed + * @NL80211_RRF_NO_OUTDOOR: outdoor operation not allowed + * @NL80211_RRF_DFS: DFS support is required to be used + * @NL80211_RRF_PTP_ONLY: this is only for Point To Point links + * @NL80211_RRF_PTMP_ONLY: this is only for Point To Multi Point links + * @NL80211_RRF_NO_IR: no mechanisms that initiate radiation are allowed, + * this includes probe requests or modes of operation that require + * beaconing. + * @NL80211_RRF_AUTO_BW: maximum available bandwidth should be calculated + * base on contiguous rules and wider channels will be allowed to cross + * multiple contiguous/overlapping frequency ranges. + * @NL80211_RRF_IR_CONCURRENT: See %NL80211_FREQUENCY_ATTR_IR_CONCURRENT + * @NL80211_RRF_NO_HT40MINUS: channels can't be used in HT40- operation + * @NL80211_RRF_NO_HT40PLUS: channels can't be used in HT40+ operation + * @NL80211_RRF_NO_80MHZ: 80MHz operation not allowed + * @NL80211_RRF_NO_160MHZ: 160MHz operation not allowed + * @NL80211_RRF_NO_HE: HE operation not allowed + * @NL80211_RRF_NO_320MHZ: 320MHz operation not allowed + */ +enum nl80211_reg_rule_flags { + NL80211_RRF_NO_OFDM = 1<<0, + NL80211_RRF_NO_CCK = 1<<1, + NL80211_RRF_NO_INDOOR = 1<<2, + NL80211_RRF_NO_OUTDOOR = 1<<3, + NL80211_RRF_DFS = 1<<4, + NL80211_RRF_PTP_ONLY = 1<<5, + NL80211_RRF_PTMP_ONLY = 1<<6, + NL80211_RRF_NO_IR = 1<<7, + __NL80211_RRF_NO_IBSS = 1<<8, + NL80211_RRF_AUTO_BW = 1<<11, + NL80211_RRF_IR_CONCURRENT = 1<<12, + NL80211_RRF_NO_HT40MINUS = 1<<13, + NL80211_RRF_NO_HT40PLUS = 1<<14, + NL80211_RRF_NO_80MHZ = 1<<15, + NL80211_RRF_NO_160MHZ = 1<<16, + NL80211_RRF_NO_HE = 1<<17, + NL80211_RRF_NO_320MHZ = 1<<18, +}; + +#define NL80211_RRF_PASSIVE_SCAN NL80211_RRF_NO_IR +#define NL80211_RRF_NO_IBSS NL80211_RRF_NO_IR +#define NL80211_RRF_NO_IR NL80211_RRF_NO_IR +#define NL80211_RRF_NO_HT40 (NL80211_RRF_NO_HT40MINUS |\ + NL80211_RRF_NO_HT40PLUS) +#define NL80211_RRF_GO_CONCURRENT NL80211_RRF_IR_CONCURRENT + +/* For backport compatibility with older userspace */ +#define NL80211_RRF_NO_IR_ALL (NL80211_RRF_NO_IR | __NL80211_RRF_NO_IBSS) + +/** + * enum nl80211_dfs_regions - regulatory DFS regions + * + * @NL80211_DFS_UNSET: Country has no DFS master region specified + * @NL80211_DFS_FCC: Country follows DFS master rules from FCC + * @NL80211_DFS_ETSI: Country follows DFS master rules from ETSI + * @NL80211_DFS_JP: Country follows DFS master rules from JP/MKK/Telec + */ +enum nl80211_dfs_regions { + NL80211_DFS_UNSET = 0, + NL80211_DFS_FCC = 1, + NL80211_DFS_ETSI = 2, + NL80211_DFS_JP = 3, +}; + +/** + * enum nl80211_user_reg_hint_type - type of user regulatory hint + * + * @NL80211_USER_REG_HINT_USER: a user sent the hint. This is always + * assumed if the attribute is not set. + * @NL80211_USER_REG_HINT_CELL_BASE: the hint comes from a cellular + * base station. Device drivers that have been tested to work + * properly to support this type of hint can enable these hints + * by setting the NL80211_FEATURE_CELL_BASE_REG_HINTS feature + * capability on the struct wiphy. The wireless core will + * ignore all cell base station hints until at least one device + * present has been registered with the wireless core that + * has listed NL80211_FEATURE_CELL_BASE_REG_HINTS as a + * supported feature. + * @NL80211_USER_REG_HINT_INDOOR: a user sent an hint indicating that the + * platform is operating in an indoor environment. + */ +enum nl80211_user_reg_hint_type { + NL80211_USER_REG_HINT_USER = 0, + NL80211_USER_REG_HINT_CELL_BASE = 1, + NL80211_USER_REG_HINT_INDOOR = 2, +}; + +/** + * enum nl80211_survey_info - survey information + * + * These attribute types are used with %NL80211_ATTR_SURVEY_INFO + * when getting information about a survey. + * + * @__NL80211_SURVEY_INFO_INVALID: attribute number 0 is reserved + * @NL80211_SURVEY_INFO_FREQUENCY: center frequency of channel + * @NL80211_SURVEY_INFO_NOISE: noise level of channel (u8, dBm) + * @NL80211_SURVEY_INFO_IN_USE: channel is currently being used + * @NL80211_SURVEY_INFO_TIME: amount of time (in ms) that the radio + * was turned on (on channel or globally) + * @NL80211_SURVEY_INFO_TIME_BUSY: amount of the time the primary + * channel was sensed busy (either due to activity or energy detect) + * @NL80211_SURVEY_INFO_TIME_EXT_BUSY: amount of time the extension + * channel was sensed busy + * @NL80211_SURVEY_INFO_TIME_RX: amount of time the radio spent + * receiving data (on channel or globally) + * @NL80211_SURVEY_INFO_TIME_TX: amount of time the radio spent + * transmitting data (on channel or globally) + * @NL80211_SURVEY_INFO_TIME_SCAN: time the radio spent for scan + * (on this channel or globally) + * @NL80211_SURVEY_INFO_PAD: attribute used for padding for 64-bit alignment + * @NL80211_SURVEY_INFO_TIME_BSS_RX: amount of time the radio spent + * receiving frames destined to the local BSS + * @NL80211_SURVEY_INFO_MAX: highest survey info attribute number + * currently defined + * @NL80211_SURVEY_INFO_FREQUENCY_OFFSET: center frequency offset in KHz + * @__NL80211_SURVEY_INFO_AFTER_LAST: internal use + */ +enum nl80211_survey_info { + __NL80211_SURVEY_INFO_INVALID, + NL80211_SURVEY_INFO_FREQUENCY, + NL80211_SURVEY_INFO_NOISE, + NL80211_SURVEY_INFO_IN_USE, + NL80211_SURVEY_INFO_TIME, + NL80211_SURVEY_INFO_TIME_BUSY, + NL80211_SURVEY_INFO_TIME_EXT_BUSY, + NL80211_SURVEY_INFO_TIME_RX, + NL80211_SURVEY_INFO_TIME_TX, + NL80211_SURVEY_INFO_TIME_SCAN, + NL80211_SURVEY_INFO_PAD, + NL80211_SURVEY_INFO_TIME_BSS_RX, + NL80211_SURVEY_INFO_FREQUENCY_OFFSET, + + /* keep last */ + __NL80211_SURVEY_INFO_AFTER_LAST, + NL80211_SURVEY_INFO_MAX = __NL80211_SURVEY_INFO_AFTER_LAST - 1 +}; + +/* keep old names for compatibility */ +#define NL80211_SURVEY_INFO_CHANNEL_TIME NL80211_SURVEY_INFO_TIME +#define NL80211_SURVEY_INFO_CHANNEL_TIME_BUSY NL80211_SURVEY_INFO_TIME_BUSY +#define NL80211_SURVEY_INFO_CHANNEL_TIME_EXT_BUSY NL80211_SURVEY_INFO_TIME_EXT_BUSY +#define NL80211_SURVEY_INFO_CHANNEL_TIME_RX NL80211_SURVEY_INFO_TIME_RX +#define NL80211_SURVEY_INFO_CHANNEL_TIME_TX NL80211_SURVEY_INFO_TIME_TX + +/** + * enum nl80211_mntr_flags - monitor configuration flags + * + * Monitor configuration flags. + * + * @__NL80211_MNTR_FLAG_INVALID: reserved + * + * @NL80211_MNTR_FLAG_FCSFAIL: pass frames with bad FCS + * @NL80211_MNTR_FLAG_PLCPFAIL: pass frames with bad PLCP + * @NL80211_MNTR_FLAG_CONTROL: pass control frames + * @NL80211_MNTR_FLAG_OTHER_BSS: disable BSSID filtering + * @NL80211_MNTR_FLAG_COOK_FRAMES: report frames after processing. + * overrides all other flags. + * @NL80211_MNTR_FLAG_ACTIVE: use the configured MAC address + * and ACK incoming unicast packets. + * + * @__NL80211_MNTR_FLAG_AFTER_LAST: internal use + * @NL80211_MNTR_FLAG_MAX: highest possible monitor flag + */ +enum nl80211_mntr_flags { + __NL80211_MNTR_FLAG_INVALID, + NL80211_MNTR_FLAG_FCSFAIL, + NL80211_MNTR_FLAG_PLCPFAIL, + NL80211_MNTR_FLAG_CONTROL, + NL80211_MNTR_FLAG_OTHER_BSS, + NL80211_MNTR_FLAG_COOK_FRAMES, + NL80211_MNTR_FLAG_ACTIVE, + + /* keep last */ + __NL80211_MNTR_FLAG_AFTER_LAST, + NL80211_MNTR_FLAG_MAX = __NL80211_MNTR_FLAG_AFTER_LAST - 1 +}; + +/** + * enum nl80211_mesh_power_mode - mesh power save modes + * + * @NL80211_MESH_POWER_UNKNOWN: The mesh power mode of the mesh STA is + * not known or has not been set yet. + * @NL80211_MESH_POWER_ACTIVE: Active mesh power mode. The mesh STA is + * in Awake state all the time. + * @NL80211_MESH_POWER_LIGHT_SLEEP: Light sleep mode. The mesh STA will + * alternate between Active and Doze states, but will wake up for + * neighbor's beacons. + * @NL80211_MESH_POWER_DEEP_SLEEP: Deep sleep mode. The mesh STA will + * alternate between Active and Doze states, but may not wake up + * for neighbor's beacons. + * + * @__NL80211_MESH_POWER_AFTER_LAST - internal use + * @NL80211_MESH_POWER_MAX - highest possible power save level + */ + +enum nl80211_mesh_power_mode { + NL80211_MESH_POWER_UNKNOWN, + NL80211_MESH_POWER_ACTIVE, + NL80211_MESH_POWER_LIGHT_SLEEP, + NL80211_MESH_POWER_DEEP_SLEEP, + + __NL80211_MESH_POWER_AFTER_LAST, + NL80211_MESH_POWER_MAX = __NL80211_MESH_POWER_AFTER_LAST - 1 +}; + +/** + * enum nl80211_meshconf_params - mesh configuration parameters + * + * Mesh configuration parameters. These can be changed while the mesh is + * active. + * + * @__NL80211_MESHCONF_INVALID: internal use + * + * @NL80211_MESHCONF_RETRY_TIMEOUT: specifies the initial retry timeout in + * millisecond units, used by the Peer Link Open message + * + * @NL80211_MESHCONF_CONFIRM_TIMEOUT: specifies the initial confirm timeout, in + * millisecond units, used by the peer link management to close a peer link + * + * @NL80211_MESHCONF_HOLDING_TIMEOUT: specifies the holding timeout, in + * millisecond units + * + * @NL80211_MESHCONF_MAX_PEER_LINKS: maximum number of peer links allowed + * on this mesh interface + * + * @NL80211_MESHCONF_MAX_RETRIES: specifies the maximum number of peer link + * open retries that can be sent to establish a new peer link instance in a + * mesh + * + * @NL80211_MESHCONF_TTL: specifies the value of TTL field set at a source mesh + * point. + * + * @NL80211_MESHCONF_AUTO_OPEN_PLINKS: whether we should automatically open + * peer links when we detect compatible mesh peers. Disabled if + * @NL80211_MESH_SETUP_USERSPACE_MPM or @NL80211_MESH_SETUP_USERSPACE_AMPE are + * set. + * + * @NL80211_MESHCONF_HWMP_MAX_PREQ_RETRIES: the number of action frames + * containing a PREQ that an MP can send to a particular destination (path + * target) + * + * @NL80211_MESHCONF_PATH_REFRESH_TIME: how frequently to refresh mesh paths + * (in milliseconds) + * + * @NL80211_MESHCONF_MIN_DISCOVERY_TIMEOUT: minimum length of time to wait + * until giving up on a path discovery (in milliseconds) + * + * @NL80211_MESHCONF_HWMP_ACTIVE_PATH_TIMEOUT: The time (in TUs) for which mesh + * points receiving a PREQ shall consider the forwarding information from + * the root to be valid. (TU = time unit) + * + * @NL80211_MESHCONF_HWMP_PREQ_MIN_INTERVAL: The minimum interval of time (in + * TUs) during which an MP can send only one action frame containing a PREQ + * reference element + * + * @NL80211_MESHCONF_HWMP_NET_DIAM_TRVS_TIME: The interval of time (in TUs) + * that it takes for an HWMP information element to propagate across the + * mesh + * + * @NL80211_MESHCONF_HWMP_ROOTMODE: whether root mode is enabled or not + * + * @NL80211_MESHCONF_ELEMENT_TTL: specifies the value of TTL field set at a + * source mesh point for path selection elements. + * + * @NL80211_MESHCONF_HWMP_RANN_INTERVAL: The interval of time (in TUs) between + * root announcements are transmitted. + * + * @NL80211_MESHCONF_GATE_ANNOUNCEMENTS: Advertise that this mesh station has + * access to a broader network beyond the MBSS. This is done via Root + * Announcement frames. + * + * @NL80211_MESHCONF_HWMP_PERR_MIN_INTERVAL: The minimum interval of time (in + * TUs) during which a mesh STA can send only one Action frame containing a + * PERR element. + * + * @NL80211_MESHCONF_FORWARDING: set Mesh STA as forwarding or non-forwarding + * or forwarding entity (default is TRUE - forwarding entity) + * + * @NL80211_MESHCONF_RSSI_THRESHOLD: RSSI threshold in dBm. This specifies the + * threshold for average signal strength of candidate station to establish + * a peer link. + * + * @NL80211_MESHCONF_SYNC_OFFSET_MAX_NEIGHBOR: maximum number of neighbors + * to synchronize to for 11s default synchronization method + * (see 11C.12.2.2) + * + * @NL80211_MESHCONF_HT_OPMODE: set mesh HT protection mode. + * + * @NL80211_MESHCONF_ATTR_MAX: highest possible mesh configuration attribute + * + * @NL80211_MESHCONF_HWMP_PATH_TO_ROOT_TIMEOUT: The time (in TUs) for + * which mesh STAs receiving a proactive PREQ shall consider the forwarding + * information to the root mesh STA to be valid. + * + * @NL80211_MESHCONF_HWMP_ROOT_INTERVAL: The interval of time (in TUs) between + * proactive PREQs are transmitted. + * + * @NL80211_MESHCONF_HWMP_CONFIRMATION_INTERVAL: The minimum interval of time + * (in TUs) during which a mesh STA can send only one Action frame + * containing a PREQ element for root path confirmation. + * + * @NL80211_MESHCONF_POWER_MODE: Default mesh power mode for new peer links. + * type &enum nl80211_mesh_power_mode (u32) + * + * @NL80211_MESHCONF_AWAKE_WINDOW: awake window duration (in TUs) + * + * @NL80211_MESHCONF_PLINK_TIMEOUT: If no tx activity is seen from a STA we've + * established peering with for longer than this time (in seconds), then + * remove it from the STA's list of peers. You may set this to 0 to disable + * the removal of the STA. Default is 30 minutes. + * + * @NL80211_MESHCONF_CONNECTED_TO_GATE: If set to true then this mesh STA + * will advertise that it is connected to a gate in the mesh formation + * field. If left unset then the mesh formation field will only + * advertise such if there is an active root mesh path. + * + * @NL80211_MESHCONF_NOLEARN: Try to avoid multi-hop path discovery (e.g. + * PREQ/PREP for HWMP) if the destination is a direct neighbor. Note that + * this might not be the optimal decision as a multi-hop route might be + * better. So if using this setting you will likely also want to disable + * dot11MeshForwarding and use another mesh routing protocol on top. + * + * @NL80211_MESHCONF_CONNECTED_TO_AS: If set to true then this mesh STA + * will advertise that it is connected to a authentication server + * in the mesh formation field. + * + * @__NL80211_MESHCONF_ATTR_AFTER_LAST: internal use + */ +enum nl80211_meshconf_params { + __NL80211_MESHCONF_INVALID, + NL80211_MESHCONF_RETRY_TIMEOUT, + NL80211_MESHCONF_CONFIRM_TIMEOUT, + NL80211_MESHCONF_HOLDING_TIMEOUT, + NL80211_MESHCONF_MAX_PEER_LINKS, + NL80211_MESHCONF_MAX_RETRIES, + NL80211_MESHCONF_TTL, + NL80211_MESHCONF_AUTO_OPEN_PLINKS, + NL80211_MESHCONF_HWMP_MAX_PREQ_RETRIES, + NL80211_MESHCONF_PATH_REFRESH_TIME, + NL80211_MESHCONF_MIN_DISCOVERY_TIMEOUT, + NL80211_MESHCONF_HWMP_ACTIVE_PATH_TIMEOUT, + NL80211_MESHCONF_HWMP_PREQ_MIN_INTERVAL, + NL80211_MESHCONF_HWMP_NET_DIAM_TRVS_TIME, + NL80211_MESHCONF_HWMP_ROOTMODE, + NL80211_MESHCONF_ELEMENT_TTL, + NL80211_MESHCONF_HWMP_RANN_INTERVAL, + NL80211_MESHCONF_GATE_ANNOUNCEMENTS, + NL80211_MESHCONF_HWMP_PERR_MIN_INTERVAL, + NL80211_MESHCONF_FORWARDING, + NL80211_MESHCONF_RSSI_THRESHOLD, + NL80211_MESHCONF_SYNC_OFFSET_MAX_NEIGHBOR, + NL80211_MESHCONF_HT_OPMODE, + NL80211_MESHCONF_HWMP_PATH_TO_ROOT_TIMEOUT, + NL80211_MESHCONF_HWMP_ROOT_INTERVAL, + NL80211_MESHCONF_HWMP_CONFIRMATION_INTERVAL, + NL80211_MESHCONF_POWER_MODE, + NL80211_MESHCONF_AWAKE_WINDOW, + NL80211_MESHCONF_PLINK_TIMEOUT, + NL80211_MESHCONF_CONNECTED_TO_GATE, + NL80211_MESHCONF_NOLEARN, + NL80211_MESHCONF_CONNECTED_TO_AS, + + /* keep last */ + __NL80211_MESHCONF_ATTR_AFTER_LAST, + NL80211_MESHCONF_ATTR_MAX = __NL80211_MESHCONF_ATTR_AFTER_LAST - 1 +}; + +/** + * enum nl80211_mesh_setup_params - mesh setup parameters + * + * Mesh setup parameters. These are used to start/join a mesh and cannot be + * changed while the mesh is active. + * + * @__NL80211_MESH_SETUP_INVALID: Internal use + * + * @NL80211_MESH_SETUP_ENABLE_VENDOR_PATH_SEL: Enable this option to use a + * vendor specific path selection algorithm or disable it to use the + * default HWMP. + * + * @NL80211_MESH_SETUP_ENABLE_VENDOR_METRIC: Enable this option to use a + * vendor specific path metric or disable it to use the default Airtime + * metric. + * + * @NL80211_MESH_SETUP_IE: Information elements for this mesh, for instance, a + * robust security network ie, or a vendor specific information element + * that vendors will use to identify the path selection methods and + * metrics in use. + * + * @NL80211_MESH_SETUP_USERSPACE_AUTH: Enable this option if an authentication + * daemon will be authenticating mesh candidates. + * + * @NL80211_MESH_SETUP_USERSPACE_AMPE: Enable this option if an authentication + * daemon will be securing peer link frames. AMPE is a secured version of + * Mesh Peering Management (MPM) and is implemented with the assistance of + * a userspace daemon. When this flag is set, the kernel will send peer + * management frames to a userspace daemon that will implement AMPE + * functionality (security capabilities selection, key confirmation, and + * key management). When the flag is unset (default), the kernel can + * autonomously complete (unsecured) mesh peering without the need of a + * userspace daemon. + * + * @NL80211_MESH_SETUP_ENABLE_VENDOR_SYNC: Enable this option to use a + * vendor specific synchronization method or disable it to use the default + * neighbor offset synchronization + * + * @NL80211_MESH_SETUP_USERSPACE_MPM: Enable this option if userspace will + * implement an MPM which handles peer allocation and state. + * + * @NL80211_MESH_SETUP_AUTH_PROTOCOL: Inform the kernel of the authentication + * method (u8, as defined in IEEE 8.4.2.100.6, e.g. 0x1 for SAE). + * Default is no authentication method required. + * + * @NL80211_MESH_SETUP_ATTR_MAX: highest possible mesh setup attribute number + * + * @__NL80211_MESH_SETUP_ATTR_AFTER_LAST: Internal use + */ +enum nl80211_mesh_setup_params { + __NL80211_MESH_SETUP_INVALID, + NL80211_MESH_SETUP_ENABLE_VENDOR_PATH_SEL, + NL80211_MESH_SETUP_ENABLE_VENDOR_METRIC, + NL80211_MESH_SETUP_IE, + NL80211_MESH_SETUP_USERSPACE_AUTH, + NL80211_MESH_SETUP_USERSPACE_AMPE, + NL80211_MESH_SETUP_ENABLE_VENDOR_SYNC, + NL80211_MESH_SETUP_USERSPACE_MPM, + NL80211_MESH_SETUP_AUTH_PROTOCOL, + + /* keep last */ + __NL80211_MESH_SETUP_ATTR_AFTER_LAST, + NL80211_MESH_SETUP_ATTR_MAX = __NL80211_MESH_SETUP_ATTR_AFTER_LAST - 1 +}; + +/** + * enum nl80211_txq_attr - TX queue parameter attributes + * @__NL80211_TXQ_ATTR_INVALID: Attribute number 0 is reserved + * @NL80211_TXQ_ATTR_AC: AC identifier (NL80211_AC_*) + * @NL80211_TXQ_ATTR_TXOP: Maximum burst time in units of 32 usecs, 0 meaning + * disabled + * @NL80211_TXQ_ATTR_CWMIN: Minimum contention window [a value of the form + * 2^n-1 in the range 1..32767] + * @NL80211_TXQ_ATTR_CWMAX: Maximum contention window [a value of the form + * 2^n-1 in the range 1..32767] + * @NL80211_TXQ_ATTR_AIFS: Arbitration interframe space [0..255] + * @__NL80211_TXQ_ATTR_AFTER_LAST: Internal + * @NL80211_TXQ_ATTR_MAX: Maximum TXQ attribute number + */ +enum nl80211_txq_attr { + __NL80211_TXQ_ATTR_INVALID, + NL80211_TXQ_ATTR_AC, + NL80211_TXQ_ATTR_TXOP, + NL80211_TXQ_ATTR_CWMIN, + NL80211_TXQ_ATTR_CWMAX, + NL80211_TXQ_ATTR_AIFS, + + /* keep last */ + __NL80211_TXQ_ATTR_AFTER_LAST, + NL80211_TXQ_ATTR_MAX = __NL80211_TXQ_ATTR_AFTER_LAST - 1 +}; + +enum nl80211_ac { + NL80211_AC_VO, + NL80211_AC_VI, + NL80211_AC_BE, + NL80211_AC_BK, + NL80211_NUM_ACS +}; + +/* backward compat */ +#define NL80211_TXQ_ATTR_QUEUE NL80211_TXQ_ATTR_AC +#define NL80211_TXQ_Q_VO NL80211_AC_VO +#define NL80211_TXQ_Q_VI NL80211_AC_VI +#define NL80211_TXQ_Q_BE NL80211_AC_BE +#define NL80211_TXQ_Q_BK NL80211_AC_BK + +/** + * enum nl80211_channel_type - channel type + * @NL80211_CHAN_NO_HT: 20 MHz, non-HT channel + * @NL80211_CHAN_HT20: 20 MHz HT channel + * @NL80211_CHAN_HT40MINUS: HT40 channel, secondary channel + * below the control channel + * @NL80211_CHAN_HT40PLUS: HT40 channel, secondary channel + * above the control channel + */ +enum nl80211_channel_type { + NL80211_CHAN_NO_HT, + NL80211_CHAN_HT20, + NL80211_CHAN_HT40MINUS, + NL80211_CHAN_HT40PLUS +}; + +/** + * enum nl80211_key_mode - Key mode + * + * @NL80211_KEY_RX_TX: (Default) + * Key can be used for Rx and Tx immediately + * + * The following modes can only be selected for unicast keys and when the + * driver supports @NL80211_EXT_FEATURE_EXT_KEY_ID: + * + * @NL80211_KEY_NO_TX: Only allowed in combination with @NL80211_CMD_NEW_KEY: + * Unicast key can only be used for Rx, Tx not allowed, yet + * @NL80211_KEY_SET_TX: Only allowed in combination with @NL80211_CMD_SET_KEY: + * The unicast key identified by idx and mac is cleared for Tx and becomes + * the preferred Tx key for the station. + */ +enum nl80211_key_mode { + NL80211_KEY_RX_TX, + NL80211_KEY_NO_TX, + NL80211_KEY_SET_TX +}; + +/** + * enum nl80211_chan_width - channel width definitions + * + * These values are used with the %NL80211_ATTR_CHANNEL_WIDTH + * attribute. + * + * @NL80211_CHAN_WIDTH_20_NOHT: 20 MHz, non-HT channel + * @NL80211_CHAN_WIDTH_20: 20 MHz HT channel + * @NL80211_CHAN_WIDTH_40: 40 MHz channel, the %NL80211_ATTR_CENTER_FREQ1 + * attribute must be provided as well + * @NL80211_CHAN_WIDTH_80: 80 MHz channel, the %NL80211_ATTR_CENTER_FREQ1 + * attribute must be provided as well + * @NL80211_CHAN_WIDTH_80P80: 80+80 MHz channel, the %NL80211_ATTR_CENTER_FREQ1 + * and %NL80211_ATTR_CENTER_FREQ2 attributes must be provided as well + * @NL80211_CHAN_WIDTH_160: 160 MHz channel, the %NL80211_ATTR_CENTER_FREQ1 + * attribute must be provided as well + * @NL80211_CHAN_WIDTH_5: 5 MHz OFDM channel + * @NL80211_CHAN_WIDTH_10: 10 MHz OFDM channel + * @NL80211_CHAN_WIDTH_1: 1 MHz OFDM channel + * @NL80211_CHAN_WIDTH_2: 2 MHz OFDM channel + * @NL80211_CHAN_WIDTH_4: 4 MHz OFDM channel + * @NL80211_CHAN_WIDTH_8: 8 MHz OFDM channel + * @NL80211_CHAN_WIDTH_16: 16 MHz OFDM channel + * @NL80211_CHAN_WIDTH_320: 320 MHz channel, the %NL80211_ATTR_CENTER_FREQ1 + * attribute must be provided as well + */ +enum nl80211_chan_width { + NL80211_CHAN_WIDTH_20_NOHT, + NL80211_CHAN_WIDTH_20, + NL80211_CHAN_WIDTH_40, + NL80211_CHAN_WIDTH_80, + NL80211_CHAN_WIDTH_80P80, + NL80211_CHAN_WIDTH_160, + NL80211_CHAN_WIDTH_5, + NL80211_CHAN_WIDTH_10, + NL80211_CHAN_WIDTH_1, + NL80211_CHAN_WIDTH_2, + NL80211_CHAN_WIDTH_4, + NL80211_CHAN_WIDTH_8, + NL80211_CHAN_WIDTH_16, + NL80211_CHAN_WIDTH_320, +}; + +/** + * enum nl80211_bss_scan_width - control channel width for a BSS + * + * These values are used with the %NL80211_BSS_CHAN_WIDTH attribute. + * + * @NL80211_BSS_CHAN_WIDTH_20: control channel is 20 MHz wide or compatible + * @NL80211_BSS_CHAN_WIDTH_10: control channel is 10 MHz wide + * @NL80211_BSS_CHAN_WIDTH_5: control channel is 5 MHz wide + * @NL80211_BSS_CHAN_WIDTH_1: control channel is 1 MHz wide + * @NL80211_BSS_CHAN_WIDTH_2: control channel is 2 MHz wide + */ +enum nl80211_bss_scan_width { + NL80211_BSS_CHAN_WIDTH_20, + NL80211_BSS_CHAN_WIDTH_10, + NL80211_BSS_CHAN_WIDTH_5, + NL80211_BSS_CHAN_WIDTH_1, + NL80211_BSS_CHAN_WIDTH_2, +}; + +/** + * enum nl80211_bss - netlink attributes for a BSS + * + * @__NL80211_BSS_INVALID: invalid + * @NL80211_BSS_BSSID: BSSID of the BSS (6 octets) + * @NL80211_BSS_FREQUENCY: frequency in MHz (u32) + * @NL80211_BSS_TSF: TSF of the received probe response/beacon (u64) + * (if @NL80211_BSS_PRESP_DATA is present then this is known to be + * from a probe response, otherwise it may be from the same beacon + * that the NL80211_BSS_BEACON_TSF will be from) + * @NL80211_BSS_BEACON_INTERVAL: beacon interval of the (I)BSS (u16) + * @NL80211_BSS_CAPABILITY: capability field (CPU order, u16) + * @NL80211_BSS_INFORMATION_ELEMENTS: binary attribute containing the + * raw information elements from the probe response/beacon (bin); + * if the %NL80211_BSS_BEACON_IES attribute is present and the data is + * different then the IEs here are from a Probe Response frame; otherwise + * they are from a Beacon frame. + * However, if the driver does not indicate the source of the IEs, these + * IEs may be from either frame subtype. + * If present, the @NL80211_BSS_PRESP_DATA attribute indicates that the + * data here is known to be from a probe response, without any heuristics. + * @NL80211_BSS_SIGNAL_MBM: signal strength of probe response/beacon + * in mBm (100 * dBm) (s32) + * @NL80211_BSS_SIGNAL_UNSPEC: signal strength of the probe response/beacon + * in unspecified units, scaled to 0..100 (u8) + * @NL80211_BSS_STATUS: status, if this BSS is "used" + * @NL80211_BSS_SEEN_MS_AGO: age of this BSS entry in ms + * @NL80211_BSS_BEACON_IES: binary attribute containing the raw information + * elements from a Beacon frame (bin); not present if no Beacon frame has + * yet been received + * @NL80211_BSS_CHAN_WIDTH: channel width of the control channel + * (u32, enum nl80211_bss_scan_width) + * @NL80211_BSS_BEACON_TSF: TSF of the last received beacon (u64) + * (not present if no beacon frame has been received yet) + * @NL80211_BSS_PRESP_DATA: the data in @NL80211_BSS_INFORMATION_ELEMENTS and + * @NL80211_BSS_TSF is known to be from a probe response (flag attribute) + * @NL80211_BSS_LAST_SEEN_BOOTTIME: CLOCK_BOOTTIME timestamp when this entry + * was last updated by a received frame. The value is expected to be + * accurate to about 10ms. (u64, nanoseconds) + * @NL80211_BSS_PAD: attribute used for padding for 64-bit alignment + * @NL80211_BSS_PARENT_TSF: the time at the start of reception of the first + * octet of the timestamp field of the last beacon/probe received for + * this BSS. The time is the TSF of the BSS specified by + * @NL80211_BSS_PARENT_BSSID. (u64). + * @NL80211_BSS_PARENT_BSSID: the BSS according to which @NL80211_BSS_PARENT_TSF + * is set. + * @NL80211_BSS_CHAIN_SIGNAL: per-chain signal strength of last BSS update. + * Contains a nested array of signal strength attributes (u8, dBm), + * using the nesting index as the antenna number. + * @NL80211_BSS_FREQUENCY_OFFSET: frequency offset in KHz + * @NL80211_BSS_MLO_LINK_ID: MLO link ID of the BSS (u8). + * @NL80211_BSS_MLD_ADDR: MLD address of this BSS if connected to it. + * @__NL80211_BSS_AFTER_LAST: internal + * @NL80211_BSS_MAX: highest BSS attribute + */ +enum nl80211_bss { + __NL80211_BSS_INVALID, + NL80211_BSS_BSSID, + NL80211_BSS_FREQUENCY, + NL80211_BSS_TSF, + NL80211_BSS_BEACON_INTERVAL, + NL80211_BSS_CAPABILITY, + NL80211_BSS_INFORMATION_ELEMENTS, + NL80211_BSS_SIGNAL_MBM, + NL80211_BSS_SIGNAL_UNSPEC, + NL80211_BSS_STATUS, + NL80211_BSS_SEEN_MS_AGO, + NL80211_BSS_BEACON_IES, + NL80211_BSS_CHAN_WIDTH, + NL80211_BSS_BEACON_TSF, + NL80211_BSS_PRESP_DATA, + NL80211_BSS_LAST_SEEN_BOOTTIME, + NL80211_BSS_PAD, + NL80211_BSS_PARENT_TSF, + NL80211_BSS_PARENT_BSSID, + NL80211_BSS_CHAIN_SIGNAL, + NL80211_BSS_FREQUENCY_OFFSET, + NL80211_BSS_MLO_LINK_ID, + NL80211_BSS_MLD_ADDR, + + /* keep last */ + __NL80211_BSS_AFTER_LAST, + NL80211_BSS_MAX = __NL80211_BSS_AFTER_LAST - 1 +}; + +/** + * enum nl80211_bss_status - BSS "status" + * @NL80211_BSS_STATUS_AUTHENTICATED: Authenticated with this BSS. + * Note that this is no longer used since cfg80211 no longer + * keeps track of whether or not authentication was done with + * a given BSS. + * @NL80211_BSS_STATUS_ASSOCIATED: Associated with this BSS. + * @NL80211_BSS_STATUS_IBSS_JOINED: Joined to this IBSS. + * + * The BSS status is a BSS attribute in scan dumps, which + * indicates the status the interface has wrt. this BSS. + */ +enum nl80211_bss_status { + NL80211_BSS_STATUS_AUTHENTICATED, + NL80211_BSS_STATUS_ASSOCIATED, + NL80211_BSS_STATUS_IBSS_JOINED, +}; + +/** + * enum nl80211_auth_type - AuthenticationType + * + * @NL80211_AUTHTYPE_OPEN_SYSTEM: Open System authentication + * @NL80211_AUTHTYPE_SHARED_KEY: Shared Key authentication (WEP only) + * @NL80211_AUTHTYPE_FT: Fast BSS Transition (IEEE 802.11r) + * @NL80211_AUTHTYPE_NETWORK_EAP: Network EAP (some Cisco APs and mainly LEAP) + * @NL80211_AUTHTYPE_SAE: Simultaneous authentication of equals + * @NL80211_AUTHTYPE_FILS_SK: Fast Initial Link Setup shared key + * @NL80211_AUTHTYPE_FILS_SK_PFS: Fast Initial Link Setup shared key with PFS + * @NL80211_AUTHTYPE_FILS_PK: Fast Initial Link Setup public key + * @__NL80211_AUTHTYPE_NUM: internal + * @NL80211_AUTHTYPE_MAX: maximum valid auth algorithm + * @NL80211_AUTHTYPE_AUTOMATIC: determine automatically (if necessary by + * trying multiple times); this is invalid in netlink -- leave out + * the attribute for this on CONNECT commands. + */ +enum nl80211_auth_type { + NL80211_AUTHTYPE_OPEN_SYSTEM, + NL80211_AUTHTYPE_SHARED_KEY, + NL80211_AUTHTYPE_FT, + NL80211_AUTHTYPE_NETWORK_EAP, + NL80211_AUTHTYPE_SAE, + NL80211_AUTHTYPE_FILS_SK, + NL80211_AUTHTYPE_FILS_SK_PFS, + NL80211_AUTHTYPE_FILS_PK, + + /* keep last */ + __NL80211_AUTHTYPE_NUM, + NL80211_AUTHTYPE_MAX = __NL80211_AUTHTYPE_NUM - 1, + NL80211_AUTHTYPE_AUTOMATIC +}; + +/** + * enum nl80211_key_type - Key Type + * @NL80211_KEYTYPE_GROUP: Group (broadcast/multicast) key + * @NL80211_KEYTYPE_PAIRWISE: Pairwise (unicast/individual) key + * @NL80211_KEYTYPE_PEERKEY: PeerKey (DLS) + * @NUM_NL80211_KEYTYPES: number of defined key types + */ +enum nl80211_key_type { + NL80211_KEYTYPE_GROUP, + NL80211_KEYTYPE_PAIRWISE, + NL80211_KEYTYPE_PEERKEY, + + NUM_NL80211_KEYTYPES +}; + +/** + * enum nl80211_mfp - Management frame protection state + * @NL80211_MFP_NO: Management frame protection not used + * @NL80211_MFP_REQUIRED: Management frame protection required + * @NL80211_MFP_OPTIONAL: Management frame protection is optional + */ +enum nl80211_mfp { + NL80211_MFP_NO, + NL80211_MFP_REQUIRED, + NL80211_MFP_OPTIONAL, +}; + +enum nl80211_wpa_versions { + NL80211_WPA_VERSION_1 = 1 << 0, + NL80211_WPA_VERSION_2 = 1 << 1, + NL80211_WPA_VERSION_3 = 1 << 2, +}; + +/** + * enum nl80211_key_default_types - key default types + * @__NL80211_KEY_DEFAULT_TYPE_INVALID: invalid + * @NL80211_KEY_DEFAULT_TYPE_UNICAST: key should be used as default + * unicast key + * @NL80211_KEY_DEFAULT_TYPE_MULTICAST: key should be used as default + * multicast key + * @NUM_NL80211_KEY_DEFAULT_TYPES: number of default types + */ +enum nl80211_key_default_types { + __NL80211_KEY_DEFAULT_TYPE_INVALID, + NL80211_KEY_DEFAULT_TYPE_UNICAST, + NL80211_KEY_DEFAULT_TYPE_MULTICAST, + + NUM_NL80211_KEY_DEFAULT_TYPES +}; + +/** + * enum nl80211_key_attributes - key attributes + * @__NL80211_KEY_INVALID: invalid + * @NL80211_KEY_DATA: (temporal) key data; for TKIP this consists of + * 16 bytes encryption key followed by 8 bytes each for TX and RX MIC + * keys + * @NL80211_KEY_IDX: key ID (u8, 0-3) + * @NL80211_KEY_CIPHER: key cipher suite (u32, as defined by IEEE 802.11 + * section 7.3.2.25.1, e.g. 0x000FAC04) + * @NL80211_KEY_SEQ: transmit key sequence number (IV/PN) for TKIP and + * CCMP keys, each six bytes in little endian + * @NL80211_KEY_DEFAULT: flag indicating default key + * @NL80211_KEY_DEFAULT_MGMT: flag indicating default management key + * @NL80211_KEY_TYPE: the key type from enum nl80211_key_type, if not + * specified the default depends on whether a MAC address was + * given with the command using the key or not (u32) + * @NL80211_KEY_DEFAULT_TYPES: A nested attribute containing flags + * attributes, specifying what a key should be set as default as. + * See &enum nl80211_key_default_types. + * @NL80211_KEY_MODE: the mode from enum nl80211_key_mode. + * Defaults to @NL80211_KEY_RX_TX. + * @NL80211_KEY_DEFAULT_BEACON: flag indicating default Beacon frame key + * + * @__NL80211_KEY_AFTER_LAST: internal + * @NL80211_KEY_MAX: highest key attribute + */ +enum nl80211_key_attributes { + __NL80211_KEY_INVALID, + NL80211_KEY_DATA, + NL80211_KEY_IDX, + NL80211_KEY_CIPHER, + NL80211_KEY_SEQ, + NL80211_KEY_DEFAULT, + NL80211_KEY_DEFAULT_MGMT, + NL80211_KEY_TYPE, + NL80211_KEY_DEFAULT_TYPES, + NL80211_KEY_MODE, + NL80211_KEY_DEFAULT_BEACON, + + /* keep last */ + __NL80211_KEY_AFTER_LAST, + NL80211_KEY_MAX = __NL80211_KEY_AFTER_LAST - 1 +}; + +/** + * enum nl80211_tx_rate_attributes - TX rate set attributes + * @__NL80211_TXRATE_INVALID: invalid + * @NL80211_TXRATE_LEGACY: Legacy (non-MCS) rates allowed for TX rate selection + * in an array of rates as defined in IEEE 802.11 7.3.2.2 (u8 values with + * 1 = 500 kbps) but without the IE length restriction (at most + * %NL80211_MAX_SUPP_RATES in a single array). + * @NL80211_TXRATE_HT: HT (MCS) rates allowed for TX rate selection + * in an array of MCS numbers. + * @NL80211_TXRATE_VHT: VHT rates allowed for TX rate selection, + * see &struct nl80211_txrate_vht + * @NL80211_TXRATE_GI: configure GI, see &enum nl80211_txrate_gi + * @NL80211_TXRATE_HE: HE rates allowed for TX rate selection, + * see &struct nl80211_txrate_he + * @NL80211_TXRATE_HE_GI: configure HE GI, 0.8us, 1.6us and 3.2us. + * @NL80211_TXRATE_HE_LTF: configure HE LTF, 1XLTF, 2XLTF and 4XLTF. + * @__NL80211_TXRATE_AFTER_LAST: internal + * @NL80211_TXRATE_MAX: highest TX rate attribute + */ +enum nl80211_tx_rate_attributes { + __NL80211_TXRATE_INVALID, + NL80211_TXRATE_LEGACY, + NL80211_TXRATE_HT, + NL80211_TXRATE_VHT, + NL80211_TXRATE_GI, + NL80211_TXRATE_HE, + NL80211_TXRATE_HE_GI, + NL80211_TXRATE_HE_LTF, + + /* keep last */ + __NL80211_TXRATE_AFTER_LAST, + NL80211_TXRATE_MAX = __NL80211_TXRATE_AFTER_LAST - 1 +}; + +#define NL80211_TXRATE_MCS NL80211_TXRATE_HT +#define NL80211_VHT_NSS_MAX 8 + +/** + * struct nl80211_txrate_vht - VHT MCS/NSS txrate bitmap + * @mcs: MCS bitmap table for each NSS (array index 0 for 1 stream, etc.) + */ +struct nl80211_txrate_vht { + __u16 mcs[NL80211_VHT_NSS_MAX]; +}; + +#define NL80211_HE_NSS_MAX 8 +/** + * struct nl80211_txrate_he - HE MCS/NSS txrate bitmap + * @mcs: MCS bitmap table for each NSS (array index 0 for 1 stream, etc.) + */ +struct nl80211_txrate_he { + __u16 mcs[NL80211_HE_NSS_MAX]; +}; + +enum nl80211_txrate_gi { + NL80211_TXRATE_DEFAULT_GI, + NL80211_TXRATE_FORCE_SGI, + NL80211_TXRATE_FORCE_LGI, +}; + +/** + * enum nl80211_band - Frequency band + * @NL80211_BAND_2GHZ: 2.4 GHz ISM band + * @NL80211_BAND_5GHZ: around 5 GHz band (4.9 - 5.7 GHz) + * @NL80211_BAND_60GHZ: around 60 GHz band (58.32 - 69.12 GHz) + * @NL80211_BAND_6GHZ: around 6 GHz band (5.9 - 7.2 GHz) + * @NL80211_BAND_S1GHZ: around 900MHz, supported by S1G PHYs + * @NL80211_BAND_LC: light communication band (placeholder) + * @NUM_NL80211_BANDS: number of bands, avoid using this in userspace + * since newer kernel versions may support more bands + */ +enum nl80211_band { + NL80211_BAND_2GHZ, + NL80211_BAND_5GHZ, + NL80211_BAND_60GHZ, + NL80211_BAND_6GHZ, + NL80211_BAND_S1GHZ, + NL80211_BAND_LC, + + NUM_NL80211_BANDS, +}; + +/** + * enum nl80211_ps_state - powersave state + * @NL80211_PS_DISABLED: powersave is disabled + * @NL80211_PS_ENABLED: powersave is enabled + */ +enum nl80211_ps_state { + NL80211_PS_DISABLED, + NL80211_PS_ENABLED, +}; + +/** + * enum nl80211_attr_cqm - connection quality monitor attributes + * @__NL80211_ATTR_CQM_INVALID: invalid + * @NL80211_ATTR_CQM_RSSI_THOLD: RSSI threshold in dBm. This value specifies + * the threshold for the RSSI level at which an event will be sent. Zero + * to disable. Alternatively, if %NL80211_EXT_FEATURE_CQM_RSSI_LIST is + * set, multiple values can be supplied as a low-to-high sorted array of + * threshold values in dBm. Events will be sent when the RSSI value + * crosses any of the thresholds. + * @NL80211_ATTR_CQM_RSSI_HYST: RSSI hysteresis in dBm. This value specifies + * the minimum amount the RSSI level must change after an event before a + * new event may be issued (to reduce effects of RSSI oscillation). + * @NL80211_ATTR_CQM_RSSI_THRESHOLD_EVENT: RSSI threshold event + * @NL80211_ATTR_CQM_PKT_LOSS_EVENT: a u32 value indicating that this many + * consecutive packets were not acknowledged by the peer + * @NL80211_ATTR_CQM_TXE_RATE: TX error rate in %. Minimum % of TX failures + * during the given %NL80211_ATTR_CQM_TXE_INTVL before an + * %NL80211_CMD_NOTIFY_CQM with reported %NL80211_ATTR_CQM_TXE_RATE and + * %NL80211_ATTR_CQM_TXE_PKTS is generated. + * @NL80211_ATTR_CQM_TXE_PKTS: number of attempted packets in a given + * %NL80211_ATTR_CQM_TXE_INTVL before %NL80211_ATTR_CQM_TXE_RATE is + * checked. + * @NL80211_ATTR_CQM_TXE_INTVL: interval in seconds. Specifies the periodic + * interval in which %NL80211_ATTR_CQM_TXE_PKTS and + * %NL80211_ATTR_CQM_TXE_RATE must be satisfied before generating an + * %NL80211_CMD_NOTIFY_CQM. Set to 0 to turn off TX error reporting. + * @NL80211_ATTR_CQM_BEACON_LOSS_EVENT: flag attribute that's set in a beacon + * loss event + * @NL80211_ATTR_CQM_RSSI_LEVEL: the RSSI value in dBm that triggered the + * RSSI threshold event. + * @__NL80211_ATTR_CQM_AFTER_LAST: internal + * @NL80211_ATTR_CQM_MAX: highest key attribute + */ +enum nl80211_attr_cqm { + __NL80211_ATTR_CQM_INVALID, + NL80211_ATTR_CQM_RSSI_THOLD, + NL80211_ATTR_CQM_RSSI_HYST, + NL80211_ATTR_CQM_RSSI_THRESHOLD_EVENT, + NL80211_ATTR_CQM_PKT_LOSS_EVENT, + NL80211_ATTR_CQM_TXE_RATE, + NL80211_ATTR_CQM_TXE_PKTS, + NL80211_ATTR_CQM_TXE_INTVL, + NL80211_ATTR_CQM_BEACON_LOSS_EVENT, + NL80211_ATTR_CQM_RSSI_LEVEL, + + /* keep last */ + __NL80211_ATTR_CQM_AFTER_LAST, + NL80211_ATTR_CQM_MAX = __NL80211_ATTR_CQM_AFTER_LAST - 1 +}; + +/** + * enum nl80211_cqm_rssi_threshold_event - RSSI threshold event + * @NL80211_CQM_RSSI_THRESHOLD_EVENT_LOW: The RSSI level is lower than the + * configured threshold + * @NL80211_CQM_RSSI_THRESHOLD_EVENT_HIGH: The RSSI is higher than the + * configured threshold + * @NL80211_CQM_RSSI_BEACON_LOSS_EVENT: (reserved, never sent) + */ +enum nl80211_cqm_rssi_threshold_event { + NL80211_CQM_RSSI_THRESHOLD_EVENT_LOW, + NL80211_CQM_RSSI_THRESHOLD_EVENT_HIGH, + NL80211_CQM_RSSI_BEACON_LOSS_EVENT, +}; + + +/** + * enum nl80211_tx_power_setting - TX power adjustment + * @NL80211_TX_POWER_AUTOMATIC: automatically determine transmit power + * @NL80211_TX_POWER_LIMITED: limit TX power by the mBm parameter + * @NL80211_TX_POWER_FIXED: fix TX power to the mBm parameter + */ +enum nl80211_tx_power_setting { + NL80211_TX_POWER_AUTOMATIC, + NL80211_TX_POWER_LIMITED, + NL80211_TX_POWER_FIXED, +}; + +/** + * enum nl80211_tid_config - TID config state + * @NL80211_TID_CONFIG_ENABLE: Enable config for the TID + * @NL80211_TID_CONFIG_DISABLE: Disable config for the TID + */ +enum nl80211_tid_config { + NL80211_TID_CONFIG_ENABLE, + NL80211_TID_CONFIG_DISABLE, +}; + +/* enum nl80211_tx_rate_setting - TX rate configuration type + * @NL80211_TX_RATE_AUTOMATIC: automatically determine TX rate + * @NL80211_TX_RATE_LIMITED: limit the TX rate by the TX rate parameter + * @NL80211_TX_RATE_FIXED: fix TX rate to the TX rate parameter + */ +enum nl80211_tx_rate_setting { + NL80211_TX_RATE_AUTOMATIC, + NL80211_TX_RATE_LIMITED, + NL80211_TX_RATE_FIXED, +}; + +/* enum nl80211_tid_config_attr - TID specific configuration. + * @NL80211_TID_CONFIG_ATTR_PAD: pad attribute for 64-bit values + * @NL80211_TID_CONFIG_ATTR_VIF_SUPP: a bitmap (u64) of attributes supported + * for per-vif configuration; doesn't list the ones that are generic + * (%NL80211_TID_CONFIG_ATTR_TIDS, %NL80211_TID_CONFIG_ATTR_OVERRIDE). + * @NL80211_TID_CONFIG_ATTR_PEER_SUPP: same as the previous per-vif one, but + * per peer instead. + * @NL80211_TID_CONFIG_ATTR_OVERRIDE: flag attribue, if set indicates + * that the new configuration overrides all previous peer + * configurations, otherwise previous peer specific configurations + * should be left untouched. + * @NL80211_TID_CONFIG_ATTR_TIDS: a bitmask value of TIDs (bit 0 to 7) + * Its type is u16. + * @NL80211_TID_CONFIG_ATTR_NOACK: Configure ack policy for the TID. + * specified in %NL80211_TID_CONFIG_ATTR_TID. see %enum nl80211_tid_config. + * Its type is u8. + * @NL80211_TID_CONFIG_ATTR_RETRY_SHORT: Number of retries used with data frame + * transmission, user-space sets this configuration in + * &NL80211_CMD_SET_TID_CONFIG. It is u8 type, min value is 1 and + * the max value is advertised by the driver in this attribute on + * output in wiphy capabilities. + * @NL80211_TID_CONFIG_ATTR_RETRY_LONG: Number of retries used with data frame + * transmission, user-space sets this configuration in + * &NL80211_CMD_SET_TID_CONFIG. Its type is u8, min value is 1 and + * the max value is advertised by the driver in this attribute on + * output in wiphy capabilities. + * @NL80211_TID_CONFIG_ATTR_AMPDU_CTRL: Enable/Disable MPDU aggregation + * for the TIDs specified in %NL80211_TID_CONFIG_ATTR_TIDS. + * Its type is u8, using the values from &nl80211_tid_config. + * @NL80211_TID_CONFIG_ATTR_RTSCTS_CTRL: Enable/Disable RTS_CTS for the TIDs + * specified in %NL80211_TID_CONFIG_ATTR_TIDS. It is u8 type, using + * the values from &nl80211_tid_config. + * @NL80211_TID_CONFIG_ATTR_AMSDU_CTRL: Enable/Disable MSDU aggregation + * for the TIDs specified in %NL80211_TID_CONFIG_ATTR_TIDS. + * Its type is u8, using the values from &nl80211_tid_config. + * @NL80211_TID_CONFIG_ATTR_TX_RATE_TYPE: This attribute will be useful + * to notfiy the driver that what type of txrate should be used + * for the TIDs specified in %NL80211_TID_CONFIG_ATTR_TIDS. using + * the values form &nl80211_tx_rate_setting. + * @NL80211_TID_CONFIG_ATTR_TX_RATE: Data frame TX rate mask should be applied + * with the parameters passed through %NL80211_ATTR_TX_RATES. + * configuration is applied to the data frame for the tid to that connected + * station. + */ +enum nl80211_tid_config_attr { + __NL80211_TID_CONFIG_ATTR_INVALID, + NL80211_TID_CONFIG_ATTR_PAD, + NL80211_TID_CONFIG_ATTR_VIF_SUPP, + NL80211_TID_CONFIG_ATTR_PEER_SUPP, + NL80211_TID_CONFIG_ATTR_OVERRIDE, + NL80211_TID_CONFIG_ATTR_TIDS, + NL80211_TID_CONFIG_ATTR_NOACK, + NL80211_TID_CONFIG_ATTR_RETRY_SHORT, + NL80211_TID_CONFIG_ATTR_RETRY_LONG, + NL80211_TID_CONFIG_ATTR_AMPDU_CTRL, + NL80211_TID_CONFIG_ATTR_RTSCTS_CTRL, + NL80211_TID_CONFIG_ATTR_AMSDU_CTRL, + NL80211_TID_CONFIG_ATTR_TX_RATE_TYPE, + NL80211_TID_CONFIG_ATTR_TX_RATE, + + /* keep last */ + __NL80211_TID_CONFIG_ATTR_AFTER_LAST, + NL80211_TID_CONFIG_ATTR_MAX = __NL80211_TID_CONFIG_ATTR_AFTER_LAST - 1 +}; + +/** + * enum nl80211_packet_pattern_attr - packet pattern attribute + * @__NL80211_PKTPAT_INVALID: invalid number for nested attribute + * @NL80211_PKTPAT_PATTERN: the pattern, values where the mask has + * a zero bit are ignored + * @NL80211_PKTPAT_MASK: pattern mask, must be long enough to have + * a bit for each byte in the pattern. The lowest-order bit corresponds + * to the first byte of the pattern, but the bytes of the pattern are + * in a little-endian-like format, i.e. the 9th byte of the pattern + * corresponds to the lowest-order bit in the second byte of the mask. + * For example: The match 00:xx:00:00:xx:00:00:00:00:xx:xx:xx (where + * xx indicates "don't care") would be represented by a pattern of + * twelve zero bytes, and a mask of "0xed,0x01". + * Note that the pattern matching is done as though frames were not + * 802.11 frames but 802.3 frames, i.e. the frame is fully unpacked + * first (including SNAP header unpacking) and then matched. + * @NL80211_PKTPAT_OFFSET: packet offset, pattern is matched after + * these fixed number of bytes of received packet + * @NUM_NL80211_PKTPAT: number of attributes + * @MAX_NL80211_PKTPAT: max attribute number + */ +enum nl80211_packet_pattern_attr { + __NL80211_PKTPAT_INVALID, + NL80211_PKTPAT_MASK, + NL80211_PKTPAT_PATTERN, + NL80211_PKTPAT_OFFSET, + + NUM_NL80211_PKTPAT, + MAX_NL80211_PKTPAT = NUM_NL80211_PKTPAT - 1, +}; + +/** + * struct nl80211_pattern_support - packet pattern support information + * @max_patterns: maximum number of patterns supported + * @min_pattern_len: minimum length of each pattern + * @max_pattern_len: maximum length of each pattern + * @max_pkt_offset: maximum Rx packet offset + * + * This struct is carried in %NL80211_WOWLAN_TRIG_PKT_PATTERN when + * that is part of %NL80211_ATTR_WOWLAN_TRIGGERS_SUPPORTED or in + * %NL80211_ATTR_COALESCE_RULE_PKT_PATTERN when that is part of + * %NL80211_ATTR_COALESCE_RULE in the capability information given + * by the kernel to userspace. + */ +struct nl80211_pattern_support { + __u32 max_patterns; + __u32 min_pattern_len; + __u32 max_pattern_len; + __u32 max_pkt_offset; +} __attribute__((packed)); + +/* only for backward compatibility */ +#define __NL80211_WOWLAN_PKTPAT_INVALID __NL80211_PKTPAT_INVALID +#define NL80211_WOWLAN_PKTPAT_MASK NL80211_PKTPAT_MASK +#define NL80211_WOWLAN_PKTPAT_PATTERN NL80211_PKTPAT_PATTERN +#define NL80211_WOWLAN_PKTPAT_OFFSET NL80211_PKTPAT_OFFSET +#define NUM_NL80211_WOWLAN_PKTPAT NUM_NL80211_PKTPAT +#define MAX_NL80211_WOWLAN_PKTPAT MAX_NL80211_PKTPAT +#define nl80211_wowlan_pattern_support nl80211_pattern_support + +/** + * enum nl80211_wowlan_triggers - WoWLAN trigger definitions + * @__NL80211_WOWLAN_TRIG_INVALID: invalid number for nested attributes + * @NL80211_WOWLAN_TRIG_ANY: wake up on any activity, do not really put + * the chip into a special state -- works best with chips that have + * support for low-power operation already (flag) + * Note that this mode is incompatible with all of the others, if + * any others are even supported by the device. + * @NL80211_WOWLAN_TRIG_DISCONNECT: wake up on disconnect, the way disconnect + * is detected is implementation-specific (flag) + * @NL80211_WOWLAN_TRIG_MAGIC_PKT: wake up on magic packet (6x 0xff, followed + * by 16 repetitions of MAC addr, anywhere in payload) (flag) + * @NL80211_WOWLAN_TRIG_PKT_PATTERN: wake up on the specified packet patterns + * which are passed in an array of nested attributes, each nested attribute + * defining a with attributes from &struct nl80211_wowlan_trig_pkt_pattern. + * Each pattern defines a wakeup packet. Packet offset is associated with + * each pattern which is used while matching the pattern. The matching is + * done on the MSDU, i.e. as though the packet was an 802.3 packet, so the + * pattern matching is done after the packet is converted to the MSDU. + * + * In %NL80211_ATTR_WOWLAN_TRIGGERS_SUPPORTED, it is a binary attribute + * carrying a &struct nl80211_pattern_support. + * + * When reporting wakeup. it is a u32 attribute containing the 0-based + * index of the pattern that caused the wakeup, in the patterns passed + * to the kernel when configuring. + * @NL80211_WOWLAN_TRIG_GTK_REKEY_SUPPORTED: Not a real trigger, and cannot be + * used when setting, used only to indicate that GTK rekeying is supported + * by the device (flag) + * @NL80211_WOWLAN_TRIG_GTK_REKEY_FAILURE: wake up on GTK rekey failure (if + * done by the device) (flag) + * @NL80211_WOWLAN_TRIG_EAP_IDENT_REQUEST: wake up on EAP Identity Request + * packet (flag) + * @NL80211_WOWLAN_TRIG_4WAY_HANDSHAKE: wake up on 4-way handshake (flag) + * @NL80211_WOWLAN_TRIG_RFKILL_RELEASE: wake up when rfkill is released + * (on devices that have rfkill in the device) (flag) + * @NL80211_WOWLAN_TRIG_WAKEUP_PKT_80211: For wakeup reporting only, contains + * the 802.11 packet that caused the wakeup, e.g. a deauth frame. The frame + * may be truncated, the @NL80211_WOWLAN_TRIG_WAKEUP_PKT_80211_LEN + * attribute contains the original length. + * @NL80211_WOWLAN_TRIG_WAKEUP_PKT_80211_LEN: Original length of the 802.11 + * packet, may be bigger than the @NL80211_WOWLAN_TRIG_WAKEUP_PKT_80211 + * attribute if the packet was truncated somewhere. + * @NL80211_WOWLAN_TRIG_WAKEUP_PKT_8023: For wakeup reporting only, contains the + * 802.11 packet that caused the wakeup, e.g. a magic packet. The frame may + * be truncated, the @NL80211_WOWLAN_TRIG_WAKEUP_PKT_8023_LEN attribute + * contains the original length. + * @NL80211_WOWLAN_TRIG_WAKEUP_PKT_8023_LEN: Original length of the 802.3 + * packet, may be bigger than the @NL80211_WOWLAN_TRIG_WAKEUP_PKT_8023 + * attribute if the packet was truncated somewhere. + * @NL80211_WOWLAN_TRIG_TCP_CONNECTION: TCP connection wake, see DOC section + * "TCP connection wakeup" for more details. This is a nested attribute + * containing the exact information for establishing and keeping alive + * the TCP connection. + * @NL80211_WOWLAN_TRIG_TCP_WAKEUP_MATCH: For wakeup reporting only, the + * wakeup packet was received on the TCP connection + * @NL80211_WOWLAN_TRIG_WAKEUP_TCP_CONNLOST: For wakeup reporting only, the + * TCP connection was lost or failed to be established + * @NL80211_WOWLAN_TRIG_WAKEUP_TCP_NOMORETOKENS: For wakeup reporting only, + * the TCP connection ran out of tokens to use for data to send to the + * service + * @NL80211_WOWLAN_TRIG_NET_DETECT: wake up when a configured network + * is detected. This is a nested attribute that contains the + * same attributes used with @NL80211_CMD_START_SCHED_SCAN. It + * specifies how the scan is performed (e.g. the interval, the + * channels to scan and the initial delay) as well as the scan + * results that will trigger a wake (i.e. the matchsets). This + * attribute is also sent in a response to + * @NL80211_CMD_GET_WIPHY, indicating the number of match sets + * supported by the driver (u32). + * @NL80211_WOWLAN_TRIG_NET_DETECT_RESULTS: nested attribute + * containing an array with information about what triggered the + * wake up. If no elements are present in the array, it means + * that the information is not available. If more than one + * element is present, it means that more than one match + * occurred. + * Each element in the array is a nested attribute that contains + * one optional %NL80211_ATTR_SSID attribute and one optional + * %NL80211_ATTR_SCAN_FREQUENCIES attribute. At least one of + * these attributes must be present. If + * %NL80211_ATTR_SCAN_FREQUENCIES contains more than one + * frequency, it means that the match occurred in more than one + * channel. + * @NUM_NL80211_WOWLAN_TRIG: number of wake on wireless triggers + * @MAX_NL80211_WOWLAN_TRIG: highest wowlan trigger attribute number + * + * These nested attributes are used to configure the wakeup triggers and + * to report the wakeup reason(s). + */ +enum nl80211_wowlan_triggers { + __NL80211_WOWLAN_TRIG_INVALID, + NL80211_WOWLAN_TRIG_ANY, + NL80211_WOWLAN_TRIG_DISCONNECT, + NL80211_WOWLAN_TRIG_MAGIC_PKT, + NL80211_WOWLAN_TRIG_PKT_PATTERN, + NL80211_WOWLAN_TRIG_GTK_REKEY_SUPPORTED, + NL80211_WOWLAN_TRIG_GTK_REKEY_FAILURE, + NL80211_WOWLAN_TRIG_EAP_IDENT_REQUEST, + NL80211_WOWLAN_TRIG_4WAY_HANDSHAKE, + NL80211_WOWLAN_TRIG_RFKILL_RELEASE, + NL80211_WOWLAN_TRIG_WAKEUP_PKT_80211, + NL80211_WOWLAN_TRIG_WAKEUP_PKT_80211_LEN, + NL80211_WOWLAN_TRIG_WAKEUP_PKT_8023, + NL80211_WOWLAN_TRIG_WAKEUP_PKT_8023_LEN, + NL80211_WOWLAN_TRIG_TCP_CONNECTION, + NL80211_WOWLAN_TRIG_WAKEUP_TCP_MATCH, + NL80211_WOWLAN_TRIG_WAKEUP_TCP_CONNLOST, + NL80211_WOWLAN_TRIG_WAKEUP_TCP_NOMORETOKENS, + NL80211_WOWLAN_TRIG_NET_DETECT, + NL80211_WOWLAN_TRIG_NET_DETECT_RESULTS, + + /* keep last */ + NUM_NL80211_WOWLAN_TRIG, + MAX_NL80211_WOWLAN_TRIG = NUM_NL80211_WOWLAN_TRIG - 1 +}; + +/** + * DOC: TCP connection wakeup + * + * Some devices can establish a TCP connection in order to be woken up by a + * packet coming in from outside their network segment, or behind NAT. If + * configured, the device will establish a TCP connection to the given + * service, and periodically send data to that service. The first data + * packet is usually transmitted after SYN/ACK, also ACKing the SYN/ACK. + * The data packets can optionally include a (little endian) sequence + * number (in the TCP payload!) that is generated by the device, and, also + * optionally, a token from a list of tokens. This serves as a keep-alive + * with the service, and for NATed connections, etc. + * + * During this keep-alive period, the server doesn't send any data to the + * client. When receiving data, it is compared against the wakeup pattern + * (and mask) and if it matches, the host is woken up. Similarly, if the + * connection breaks or cannot be established to start with, the host is + * also woken up. + * + * Developer's note: ARP offload is required for this, otherwise TCP + * response packets might not go through correctly. + */ + +/** + * struct nl80211_wowlan_tcp_data_seq - WoWLAN TCP data sequence + * @start: starting value + * @offset: offset of sequence number in packet + * @len: length of the sequence value to write, 1 through 4 + * + * Note: don't confuse with the TCP sequence number(s), this is for the + * keepalive packet payload. The actual value is written into the packet + * in little endian. + */ +struct nl80211_wowlan_tcp_data_seq { + __u32 start, offset, len; +}; + +/** + * struct nl80211_wowlan_tcp_data_token - WoWLAN TCP data token config + * @offset: offset of token in packet + * @len: length of each token + * @token_stream: stream of data to be used for the tokens, the length must + * be a multiple of @len for this to make sense + */ +struct nl80211_wowlan_tcp_data_token { + __u32 offset, len; + __u8 token_stream[]; +}; + +/** + * struct nl80211_wowlan_tcp_data_token_feature - data token features + * @min_len: minimum token length + * @max_len: maximum token length + * @bufsize: total available token buffer size (max size of @token_stream) + */ +struct nl80211_wowlan_tcp_data_token_feature { + __u32 min_len, max_len, bufsize; +}; + +/** + * enum nl80211_wowlan_tcp_attrs - WoWLAN TCP connection parameters + * @__NL80211_WOWLAN_TCP_INVALID: invalid number for nested attributes + * @NL80211_WOWLAN_TCP_SRC_IPV4: source IPv4 address (in network byte order) + * @NL80211_WOWLAN_TCP_DST_IPV4: destination IPv4 address + * (in network byte order) + * @NL80211_WOWLAN_TCP_DST_MAC: destination MAC address, this is given because + * route lookup when configured might be invalid by the time we suspend, + * and doing a route lookup when suspending is no longer possible as it + * might require ARP querying. + * @NL80211_WOWLAN_TCP_SRC_PORT: source port (u16); optional, if not given a + * socket and port will be allocated + * @NL80211_WOWLAN_TCP_DST_PORT: destination port (u16) + * @NL80211_WOWLAN_TCP_DATA_PAYLOAD: data packet payload, at least one byte. + * For feature advertising, a u32 attribute holding the maximum length + * of the data payload. + * @NL80211_WOWLAN_TCP_DATA_PAYLOAD_SEQ: data packet sequence configuration + * (if desired), a &struct nl80211_wowlan_tcp_data_seq. For feature + * advertising it is just a flag + * @NL80211_WOWLAN_TCP_DATA_PAYLOAD_TOKEN: data packet token configuration, + * see &struct nl80211_wowlan_tcp_data_token and for advertising see + * &struct nl80211_wowlan_tcp_data_token_feature. + * @NL80211_WOWLAN_TCP_DATA_INTERVAL: data interval in seconds, maximum + * interval in feature advertising (u32) + * @NL80211_WOWLAN_TCP_WAKE_PAYLOAD: wake packet payload, for advertising a + * u32 attribute holding the maximum length + * @NL80211_WOWLAN_TCP_WAKE_MASK: Wake packet payload mask, not used for + * feature advertising. The mask works like @NL80211_PKTPAT_MASK + * but on the TCP payload only. + * @NUM_NL80211_WOWLAN_TCP: number of TCP attributes + * @MAX_NL80211_WOWLAN_TCP: highest attribute number + */ +enum nl80211_wowlan_tcp_attrs { + __NL80211_WOWLAN_TCP_INVALID, + NL80211_WOWLAN_TCP_SRC_IPV4, + NL80211_WOWLAN_TCP_DST_IPV4, + NL80211_WOWLAN_TCP_DST_MAC, + NL80211_WOWLAN_TCP_SRC_PORT, + NL80211_WOWLAN_TCP_DST_PORT, + NL80211_WOWLAN_TCP_DATA_PAYLOAD, + NL80211_WOWLAN_TCP_DATA_PAYLOAD_SEQ, + NL80211_WOWLAN_TCP_DATA_PAYLOAD_TOKEN, + NL80211_WOWLAN_TCP_DATA_INTERVAL, + NL80211_WOWLAN_TCP_WAKE_PAYLOAD, + NL80211_WOWLAN_TCP_WAKE_MASK, + + /* keep last */ + NUM_NL80211_WOWLAN_TCP, + MAX_NL80211_WOWLAN_TCP = NUM_NL80211_WOWLAN_TCP - 1 +}; + +/** + * struct nl80211_coalesce_rule_support - coalesce rule support information + * @max_rules: maximum number of rules supported + * @pat: packet pattern support information + * @max_delay: maximum supported coalescing delay in msecs + * + * This struct is carried in %NL80211_ATTR_COALESCE_RULE in the + * capability information given by the kernel to userspace. + */ +struct nl80211_coalesce_rule_support { + __u32 max_rules; + struct nl80211_pattern_support pat; + __u32 max_delay; +} __attribute__((packed)); + +/** + * enum nl80211_attr_coalesce_rule - coalesce rule attribute + * @__NL80211_COALESCE_RULE_INVALID: invalid number for nested attribute + * @NL80211_ATTR_COALESCE_RULE_DELAY: delay in msecs used for packet coalescing + * @NL80211_ATTR_COALESCE_RULE_CONDITION: condition for packet coalescence, + * see &enum nl80211_coalesce_condition. + * @NL80211_ATTR_COALESCE_RULE_PKT_PATTERN: packet offset, pattern is matched + * after these fixed number of bytes of received packet + * @NUM_NL80211_ATTR_COALESCE_RULE: number of attributes + * @NL80211_ATTR_COALESCE_RULE_MAX: max attribute number + */ +enum nl80211_attr_coalesce_rule { + __NL80211_COALESCE_RULE_INVALID, + NL80211_ATTR_COALESCE_RULE_DELAY, + NL80211_ATTR_COALESCE_RULE_CONDITION, + NL80211_ATTR_COALESCE_RULE_PKT_PATTERN, + + /* keep last */ + NUM_NL80211_ATTR_COALESCE_RULE, + NL80211_ATTR_COALESCE_RULE_MAX = NUM_NL80211_ATTR_COALESCE_RULE - 1 +}; + +/** + * enum nl80211_coalesce_condition - coalesce rule conditions + * @NL80211_COALESCE_CONDITION_MATCH: coalaesce Rx packets when patterns + * in a rule are matched. + * @NL80211_COALESCE_CONDITION_NO_MATCH: coalesce Rx packets when patterns + * in a rule are not matched. + */ +enum nl80211_coalesce_condition { + NL80211_COALESCE_CONDITION_MATCH, + NL80211_COALESCE_CONDITION_NO_MATCH +}; + +/** + * enum nl80211_iface_limit_attrs - limit attributes + * @NL80211_IFACE_LIMIT_UNSPEC: (reserved) + * @NL80211_IFACE_LIMIT_MAX: maximum number of interfaces that + * can be chosen from this set of interface types (u32) + * @NL80211_IFACE_LIMIT_TYPES: nested attribute containing a + * flag attribute for each interface type in this set + * @NUM_NL80211_IFACE_LIMIT: number of attributes + * @MAX_NL80211_IFACE_LIMIT: highest attribute number + */ +enum nl80211_iface_limit_attrs { + NL80211_IFACE_LIMIT_UNSPEC, + NL80211_IFACE_LIMIT_MAX, + NL80211_IFACE_LIMIT_TYPES, + + /* keep last */ + NUM_NL80211_IFACE_LIMIT, + MAX_NL80211_IFACE_LIMIT = NUM_NL80211_IFACE_LIMIT - 1 +}; + +/** + * enum nl80211_if_combination_attrs -- interface combination attributes + * + * @NL80211_IFACE_COMB_UNSPEC: (reserved) + * @NL80211_IFACE_COMB_LIMITS: Nested attributes containing the limits + * for given interface types, see &enum nl80211_iface_limit_attrs. + * @NL80211_IFACE_COMB_MAXNUM: u32 attribute giving the total number of + * interfaces that can be created in this group. This number doesn't + * apply to interfaces purely managed in software, which are listed + * in a separate attribute %NL80211_ATTR_INTERFACES_SOFTWARE. + * @NL80211_IFACE_COMB_STA_AP_BI_MATCH: flag attribute specifying that + * beacon intervals within this group must be all the same even for + * infrastructure and AP/GO combinations, i.e. the GO(s) must adopt + * the infrastructure network's beacon interval. + * @NL80211_IFACE_COMB_NUM_CHANNELS: u32 attribute specifying how many + * different channels may be used within this group. + * @NL80211_IFACE_COMB_RADAR_DETECT_WIDTHS: u32 attribute containing the bitmap + * of supported channel widths for radar detection. + * @NL80211_IFACE_COMB_RADAR_DETECT_REGIONS: u32 attribute containing the bitmap + * of supported regulatory regions for radar detection. + * @NL80211_IFACE_COMB_BI_MIN_GCD: u32 attribute specifying the minimum GCD of + * different beacon intervals supported by all the interface combinations + * in this group (if not present, all beacon intervals be identical). + * @NUM_NL80211_IFACE_COMB: number of attributes + * @MAX_NL80211_IFACE_COMB: highest attribute number + * + * Examples: + * limits = [ #{STA} <= 1, #{AP} <= 1 ], matching BI, channels = 1, max = 2 + * => allows an AP and a STA that must match BIs + * + * numbers = [ #{AP, P2P-GO} <= 8 ], BI min gcd, channels = 1, max = 8, + * => allows 8 of AP/GO that can have BI gcd >= min gcd + * + * numbers = [ #{STA} <= 2 ], channels = 2, max = 2 + * => allows two STAs on the same or on different channels + * + * numbers = [ #{STA} <= 1, #{P2P-client,P2P-GO} <= 3 ], max = 4 + * => allows a STA plus three P2P interfaces + * + * The list of these four possibilities could completely be contained + * within the %NL80211_ATTR_INTERFACE_COMBINATIONS attribute to indicate + * that any of these groups must match. + * + * "Combinations" of just a single interface will not be listed here, + * a single interface of any valid interface type is assumed to always + * be possible by itself. This means that implicitly, for each valid + * interface type, the following group always exists: + * numbers = [ #{} <= 1 ], channels = 1, max = 1 + */ +enum nl80211_if_combination_attrs { + NL80211_IFACE_COMB_UNSPEC, + NL80211_IFACE_COMB_LIMITS, + NL80211_IFACE_COMB_MAXNUM, + NL80211_IFACE_COMB_STA_AP_BI_MATCH, + NL80211_IFACE_COMB_NUM_CHANNELS, + NL80211_IFACE_COMB_RADAR_DETECT_WIDTHS, + NL80211_IFACE_COMB_RADAR_DETECT_REGIONS, + NL80211_IFACE_COMB_BI_MIN_GCD, + + /* keep last */ + NUM_NL80211_IFACE_COMB, + MAX_NL80211_IFACE_COMB = NUM_NL80211_IFACE_COMB - 1 +}; + + +/** + * enum nl80211_plink_state - state of a mesh peer link finite state machine + * + * @NL80211_PLINK_LISTEN: initial state, considered the implicit + * state of non existent mesh peer links + * @NL80211_PLINK_OPN_SNT: mesh plink open frame has been sent to + * this mesh peer + * @NL80211_PLINK_OPN_RCVD: mesh plink open frame has been received + * from this mesh peer + * @NL80211_PLINK_CNF_RCVD: mesh plink confirm frame has been + * received from this mesh peer + * @NL80211_PLINK_ESTAB: mesh peer link is established + * @NL80211_PLINK_HOLDING: mesh peer link is being closed or cancelled + * @NL80211_PLINK_BLOCKED: all frames transmitted from this mesh + * plink are discarded, except for authentication frames + * @NUM_NL80211_PLINK_STATES: number of peer link states + * @MAX_NL80211_PLINK_STATES: highest numerical value of plink states + */ +enum nl80211_plink_state { + NL80211_PLINK_LISTEN, + NL80211_PLINK_OPN_SNT, + NL80211_PLINK_OPN_RCVD, + NL80211_PLINK_CNF_RCVD, + NL80211_PLINK_ESTAB, + NL80211_PLINK_HOLDING, + NL80211_PLINK_BLOCKED, + + /* keep last */ + NUM_NL80211_PLINK_STATES, + MAX_NL80211_PLINK_STATES = NUM_NL80211_PLINK_STATES - 1 +}; + +/** + * enum nl80211_plink_action - actions to perform in mesh peers + * + * @NL80211_PLINK_ACTION_NO_ACTION: perform no action + * @NL80211_PLINK_ACTION_OPEN: start mesh peer link establishment + * @NL80211_PLINK_ACTION_BLOCK: block traffic from this mesh peer + * @NUM_NL80211_PLINK_ACTIONS: number of possible actions + */ +enum plink_actions { + NL80211_PLINK_ACTION_NO_ACTION, + NL80211_PLINK_ACTION_OPEN, + NL80211_PLINK_ACTION_BLOCK, + + NUM_NL80211_PLINK_ACTIONS, +}; + + +#define NL80211_KCK_LEN 16 +#define NL80211_KEK_LEN 16 +#define NL80211_KCK_EXT_LEN 24 +#define NL80211_KEK_EXT_LEN 32 +#define NL80211_REPLAY_CTR_LEN 8 + +/** + * enum nl80211_rekey_data - attributes for GTK rekey offload + * @__NL80211_REKEY_DATA_INVALID: invalid number for nested attributes + * @NL80211_REKEY_DATA_KEK: key encryption key (binary) + * @NL80211_REKEY_DATA_KCK: key confirmation key (binary) + * @NL80211_REKEY_DATA_REPLAY_CTR: replay counter (binary) + * @NL80211_REKEY_DATA_AKM: AKM data (OUI, suite type) + * @NUM_NL80211_REKEY_DATA: number of rekey attributes (internal) + * @MAX_NL80211_REKEY_DATA: highest rekey attribute (internal) + */ +enum nl80211_rekey_data { + __NL80211_REKEY_DATA_INVALID, + NL80211_REKEY_DATA_KEK, + NL80211_REKEY_DATA_KCK, + NL80211_REKEY_DATA_REPLAY_CTR, + NL80211_REKEY_DATA_AKM, + + /* keep last */ + NUM_NL80211_REKEY_DATA, + MAX_NL80211_REKEY_DATA = NUM_NL80211_REKEY_DATA - 1 +}; + +/** + * enum nl80211_hidden_ssid - values for %NL80211_ATTR_HIDDEN_SSID + * @NL80211_HIDDEN_SSID_NOT_IN_USE: do not hide SSID (i.e., broadcast it in + * Beacon frames) + * @NL80211_HIDDEN_SSID_ZERO_LEN: hide SSID by using zero-length SSID element + * in Beacon frames + * @NL80211_HIDDEN_SSID_ZERO_CONTENTS: hide SSID by using correct length of SSID + * element in Beacon frames but zero out each byte in the SSID + */ +enum nl80211_hidden_ssid { + NL80211_HIDDEN_SSID_NOT_IN_USE, + NL80211_HIDDEN_SSID_ZERO_LEN, + NL80211_HIDDEN_SSID_ZERO_CONTENTS +}; + +/** + * enum nl80211_sta_wme_attr - station WME attributes + * @__NL80211_STA_WME_INVALID: invalid number for nested attribute + * @NL80211_STA_WME_UAPSD_QUEUES: bitmap of uapsd queues. the format + * is the same as the AC bitmap in the QoS info field. + * @NL80211_STA_WME_MAX_SP: max service period. the format is the same + * as the MAX_SP field in the QoS info field (but already shifted down). + * @__NL80211_STA_WME_AFTER_LAST: internal + * @NL80211_STA_WME_MAX: highest station WME attribute + */ +enum nl80211_sta_wme_attr { + __NL80211_STA_WME_INVALID, + NL80211_STA_WME_UAPSD_QUEUES, + NL80211_STA_WME_MAX_SP, + + /* keep last */ + __NL80211_STA_WME_AFTER_LAST, + NL80211_STA_WME_MAX = __NL80211_STA_WME_AFTER_LAST - 1 +}; + +/** + * enum nl80211_pmksa_candidate_attr - attributes for PMKSA caching candidates + * @__NL80211_PMKSA_CANDIDATE_INVALID: invalid number for nested attributes + * @NL80211_PMKSA_CANDIDATE_INDEX: candidate index (u32; the smaller, the higher + * priority) + * @NL80211_PMKSA_CANDIDATE_BSSID: candidate BSSID (6 octets) + * @NL80211_PMKSA_CANDIDATE_PREAUTH: RSN pre-authentication supported (flag) + * @NUM_NL80211_PMKSA_CANDIDATE: number of PMKSA caching candidate attributes + * (internal) + * @MAX_NL80211_PMKSA_CANDIDATE: highest PMKSA caching candidate attribute + * (internal) + */ +enum nl80211_pmksa_candidate_attr { + __NL80211_PMKSA_CANDIDATE_INVALID, + NL80211_PMKSA_CANDIDATE_INDEX, + NL80211_PMKSA_CANDIDATE_BSSID, + NL80211_PMKSA_CANDIDATE_PREAUTH, + + /* keep last */ + NUM_NL80211_PMKSA_CANDIDATE, + MAX_NL80211_PMKSA_CANDIDATE = NUM_NL80211_PMKSA_CANDIDATE - 1 +}; + +/** + * enum nl80211_tdls_operation - values for %NL80211_ATTR_TDLS_OPERATION + * @NL80211_TDLS_DISCOVERY_REQ: Send a TDLS discovery request + * @NL80211_TDLS_SETUP: Setup TDLS link + * @NL80211_TDLS_TEARDOWN: Teardown a TDLS link which is already established + * @NL80211_TDLS_ENABLE_LINK: Enable TDLS link + * @NL80211_TDLS_DISABLE_LINK: Disable TDLS link + */ +enum nl80211_tdls_operation { + NL80211_TDLS_DISCOVERY_REQ, + NL80211_TDLS_SETUP, + NL80211_TDLS_TEARDOWN, + NL80211_TDLS_ENABLE_LINK, + NL80211_TDLS_DISABLE_LINK, +}; + +/** + * enum nl80211_ap_sme_features - device-integrated AP features + * @NL80211_AP_SME_SA_QUERY_OFFLOAD: SA Query procedures offloaded to driver + * when user space indicates support for SA Query procedures offload during + * "start ap" with %NL80211_AP_SETTINGS_SA_QUERY_OFFLOAD_SUPPORT. + */ +enum nl80211_ap_sme_features { + NL80211_AP_SME_SA_QUERY_OFFLOAD = 1 << 0, +}; + +/** + * enum nl80211_feature_flags - device/driver features + * @NL80211_FEATURE_SK_TX_STATUS: This driver supports reflecting back + * TX status to the socket error queue when requested with the + * socket option. + * @NL80211_FEATURE_HT_IBSS: This driver supports IBSS with HT datarates. + * @NL80211_FEATURE_INACTIVITY_TIMER: This driver takes care of freeing up + * the connected inactive stations in AP mode. + * @NL80211_FEATURE_CELL_BASE_REG_HINTS: This driver has been tested + * to work properly to support receiving regulatory hints from + * cellular base stations. + * @NL80211_FEATURE_P2P_DEVICE_NEEDS_CHANNEL: (no longer available, only + * here to reserve the value for API/ABI compatibility) + * @NL80211_FEATURE_SAE: This driver supports simultaneous authentication of + * equals (SAE) with user space SME (NL80211_CMD_AUTHENTICATE) in station + * mode + * @NL80211_FEATURE_LOW_PRIORITY_SCAN: This driver supports low priority scan + * @NL80211_FEATURE_SCAN_FLUSH: Scan flush is supported + * @NL80211_FEATURE_AP_SCAN: Support scanning using an AP vif + * @NL80211_FEATURE_VIF_TXPOWER: The driver supports per-vif TX power setting + * @NL80211_FEATURE_NEED_OBSS_SCAN: The driver expects userspace to perform + * OBSS scans and generate 20/40 BSS coex reports. This flag is used only + * for drivers implementing the CONNECT API, for AUTH/ASSOC it is implied. + * @NL80211_FEATURE_P2P_GO_CTWIN: P2P GO implementation supports CT Window + * setting + * @NL80211_FEATURE_P2P_GO_OPPPS: P2P GO implementation supports opportunistic + * powersave + * @NL80211_FEATURE_FULL_AP_CLIENT_STATE: The driver supports full state + * transitions for AP clients. Without this flag (and if the driver + * doesn't have the AP SME in the device) the driver supports adding + * stations only when they're associated and adds them in associated + * state (to later be transitioned into authorized), with this flag + * they should be added before even sending the authentication reply + * and then transitioned into authenticated, associated and authorized + * states using station flags. + * Note that even for drivers that support this, the default is to add + * stations in authenticated/associated state, so to add unauthenticated + * stations the authenticated/associated bits have to be set in the mask. + * @NL80211_FEATURE_ADVERTISE_CHAN_LIMITS: cfg80211 advertises channel limits + * (HT40, VHT 80/160 MHz) if this flag is set + * @NL80211_FEATURE_USERSPACE_MPM: This driver supports a userspace Mesh + * Peering Management entity which may be implemented by registering for + * beacons or NL80211_CMD_NEW_PEER_CANDIDATE events. The mesh beacon is + * still generated by the driver. + * @NL80211_FEATURE_ACTIVE_MONITOR: This driver supports an active monitor + * interface. An active monitor interface behaves like a normal monitor + * interface, but gets added to the driver. It ensures that incoming + * unicast packets directed at the configured interface address get ACKed. + * @NL80211_FEATURE_AP_MODE_CHAN_WIDTH_CHANGE: This driver supports dynamic + * channel bandwidth change (e.g., HT 20 <-> 40 MHz channel) during the + * lifetime of a BSS. + * @NL80211_FEATURE_DS_PARAM_SET_IE_IN_PROBES: This device adds a DS Parameter + * Set IE to probe requests. + * @NL80211_FEATURE_WFA_TPC_IE_IN_PROBES: This device adds a WFA TPC Report IE + * to probe requests. + * @NL80211_FEATURE_QUIET: This device, in client mode, supports Quiet Period + * requests sent to it by an AP. + * @NL80211_FEATURE_TX_POWER_INSERTION: This device is capable of inserting the + * current tx power value into the TPC Report IE in the spectrum + * management TPC Report action frame, and in the Radio Measurement Link + * Measurement Report action frame. + * @NL80211_FEATURE_ACKTO_ESTIMATION: This driver supports dynamic ACK timeout + * estimation (dynack). %NL80211_ATTR_WIPHY_DYN_ACK flag attribute is used + * to enable dynack. + * @NL80211_FEATURE_STATIC_SMPS: Device supports static spatial + * multiplexing powersave, ie. can turn off all but one chain + * even on HT connections that should be using more chains. + * @NL80211_FEATURE_DYNAMIC_SMPS: Device supports dynamic spatial + * multiplexing powersave, ie. can turn off all but one chain + * and then wake the rest up as required after, for example, + * rts/cts handshake. + * @NL80211_FEATURE_SUPPORTS_WMM_ADMISSION: the device supports setting up WMM + * TSPEC sessions (TID aka TSID 0-7) with the %NL80211_CMD_ADD_TX_TS + * command. Standard IEEE 802.11 TSPEC setup is not yet supported, it + * needs to be able to handle Block-Ack agreements and other things. + * @NL80211_FEATURE_MAC_ON_CREATE: Device supports configuring + * the vif's MAC address upon creation. + * See 'macaddr' field in the vif_params (cfg80211.h). + * @NL80211_FEATURE_TDLS_CHANNEL_SWITCH: Driver supports channel switching when + * operating as a TDLS peer. + * @NL80211_FEATURE_SCAN_RANDOM_MAC_ADDR: This device/driver supports using a + * random MAC address during scan (if the device is unassociated); the + * %NL80211_SCAN_FLAG_RANDOM_ADDR flag may be set for scans and the MAC + * address mask/value will be used. + * @NL80211_FEATURE_SCHED_SCAN_RANDOM_MAC_ADDR: This device/driver supports + * using a random MAC address for every scan iteration during scheduled + * scan (while not associated), the %NL80211_SCAN_FLAG_RANDOM_ADDR may + * be set for scheduled scan and the MAC address mask/value will be used. + * @NL80211_FEATURE_ND_RANDOM_MAC_ADDR: This device/driver supports using a + * random MAC address for every scan iteration during "net detect", i.e. + * scan in unassociated WoWLAN, the %NL80211_SCAN_FLAG_RANDOM_ADDR may + * be set for scheduled scan and the MAC address mask/value will be used. + */ +enum nl80211_feature_flags { + NL80211_FEATURE_SK_TX_STATUS = 1 << 0, + NL80211_FEATURE_HT_IBSS = 1 << 1, + NL80211_FEATURE_INACTIVITY_TIMER = 1 << 2, + NL80211_FEATURE_CELL_BASE_REG_HINTS = 1 << 3, + NL80211_FEATURE_P2P_DEVICE_NEEDS_CHANNEL = 1 << 4, + NL80211_FEATURE_SAE = 1 << 5, + NL80211_FEATURE_LOW_PRIORITY_SCAN = 1 << 6, + NL80211_FEATURE_SCAN_FLUSH = 1 << 7, + NL80211_FEATURE_AP_SCAN = 1 << 8, + NL80211_FEATURE_VIF_TXPOWER = 1 << 9, + NL80211_FEATURE_NEED_OBSS_SCAN = 1 << 10, + NL80211_FEATURE_P2P_GO_CTWIN = 1 << 11, + NL80211_FEATURE_P2P_GO_OPPPS = 1 << 12, + /* bit 13 is reserved */ + NL80211_FEATURE_ADVERTISE_CHAN_LIMITS = 1 << 14, + NL80211_FEATURE_FULL_AP_CLIENT_STATE = 1 << 15, + NL80211_FEATURE_USERSPACE_MPM = 1 << 16, + NL80211_FEATURE_ACTIVE_MONITOR = 1 << 17, + NL80211_FEATURE_AP_MODE_CHAN_WIDTH_CHANGE = 1 << 18, + NL80211_FEATURE_DS_PARAM_SET_IE_IN_PROBES = 1 << 19, + NL80211_FEATURE_WFA_TPC_IE_IN_PROBES = 1 << 20, + NL80211_FEATURE_QUIET = 1 << 21, + NL80211_FEATURE_TX_POWER_INSERTION = 1 << 22, + NL80211_FEATURE_ACKTO_ESTIMATION = 1 << 23, + NL80211_FEATURE_STATIC_SMPS = 1 << 24, + NL80211_FEATURE_DYNAMIC_SMPS = 1 << 25, + NL80211_FEATURE_SUPPORTS_WMM_ADMISSION = 1 << 26, + NL80211_FEATURE_MAC_ON_CREATE = 1 << 27, + NL80211_FEATURE_TDLS_CHANNEL_SWITCH = 1 << 28, + NL80211_FEATURE_SCAN_RANDOM_MAC_ADDR = 1 << 29, + NL80211_FEATURE_SCHED_SCAN_RANDOM_MAC_ADDR = 1 << 30, + NL80211_FEATURE_ND_RANDOM_MAC_ADDR = 1U << 31, +}; + +/** + * enum nl80211_ext_feature_index - bit index of extended features. + * @NL80211_EXT_FEATURE_VHT_IBSS: This driver supports IBSS with VHT datarates. + * @NL80211_EXT_FEATURE_RRM: This driver supports RRM. When featured, user can + * request to use RRM (see %NL80211_ATTR_USE_RRM) with + * %NL80211_CMD_ASSOCIATE and %NL80211_CMD_CONNECT requests, which will set + * the ASSOC_REQ_USE_RRM flag in the association request even if + * NL80211_FEATURE_QUIET is not advertized. + * @NL80211_EXT_FEATURE_MU_MIMO_AIR_SNIFFER: This device supports MU-MIMO air + * sniffer which means that it can be configured to hear packets from + * certain groups which can be configured by the + * %NL80211_ATTR_MU_MIMO_GROUP_DATA attribute, + * or can be configured to follow a station by configuring the + * %NL80211_ATTR_MU_MIMO_FOLLOW_MAC_ADDR attribute. + * @NL80211_EXT_FEATURE_SCAN_START_TIME: This driver includes the actual + * time the scan started in scan results event. The time is the TSF of + * the BSS that the interface that requested the scan is connected to + * (if available). + * @NL80211_EXT_FEATURE_BSS_PARENT_TSF: Per BSS, this driver reports the + * time the last beacon/probe was received. The time is the TSF of the + * BSS that the interface that requested the scan is connected to + * (if available). + * @NL80211_EXT_FEATURE_SET_SCAN_DWELL: This driver supports configuration of + * channel dwell time. + * @NL80211_EXT_FEATURE_BEACON_RATE_LEGACY: Driver supports beacon rate + * configuration (AP/mesh), supporting a legacy (non HT/VHT) rate. + * @NL80211_EXT_FEATURE_BEACON_RATE_HT: Driver supports beacon rate + * configuration (AP/mesh) with HT rates. + * @NL80211_EXT_FEATURE_BEACON_RATE_VHT: Driver supports beacon rate + * configuration (AP/mesh) with VHT rates. + * @NL80211_EXT_FEATURE_FILS_STA: This driver supports Fast Initial Link Setup + * with user space SME (NL80211_CMD_AUTHENTICATE) in station mode. + * @NL80211_EXT_FEATURE_MGMT_TX_RANDOM_TA: This driver supports randomized TA + * in @NL80211_CMD_FRAME while not associated. + * @NL80211_EXT_FEATURE_MGMT_TX_RANDOM_TA_CONNECTED: This driver supports + * randomized TA in @NL80211_CMD_FRAME while associated. + * @NL80211_EXT_FEATURE_SCHED_SCAN_RELATIVE_RSSI: The driver supports sched_scan + * for reporting BSSs with better RSSI than the current connected BSS + * (%NL80211_ATTR_SCHED_SCAN_RELATIVE_RSSI). + * @NL80211_EXT_FEATURE_CQM_RSSI_LIST: With this driver the + * %NL80211_ATTR_CQM_RSSI_THOLD attribute accepts a list of zero or more + * RSSI threshold values to monitor rather than exactly one threshold. + * @NL80211_EXT_FEATURE_FILS_SK_OFFLOAD: Driver SME supports FILS shared key + * authentication with %NL80211_CMD_CONNECT. + * @NL80211_EXT_FEATURE_4WAY_HANDSHAKE_STA_PSK: Device wants to do 4-way + * handshake with PSK in station mode (PSK is passed as part of the connect + * and associate commands), doing it in the host might not be supported. + * @NL80211_EXT_FEATURE_4WAY_HANDSHAKE_STA_1X: Device wants to do doing 4-way + * handshake with 802.1X in station mode (will pass EAP frames to the host + * and accept the set_pmk/del_pmk commands), doing it in the host might not + * be supported. + * @NL80211_EXT_FEATURE_FILS_MAX_CHANNEL_TIME: Driver is capable of overriding + * the max channel attribute in the FILS request params IE with the + * actual dwell time. + * @NL80211_EXT_FEATURE_ACCEPT_BCAST_PROBE_RESP: Driver accepts broadcast probe + * response + * @NL80211_EXT_FEATURE_OCE_PROBE_REQ_HIGH_TX_RATE: Driver supports sending + * the first probe request in each channel at rate of at least 5.5Mbps. + * @NL80211_EXT_FEATURE_OCE_PROBE_REQ_DEFERRAL_SUPPRESSION: Driver supports + * probe request tx deferral and suppression + * @NL80211_EXT_FEATURE_MFP_OPTIONAL: Driver supports the %NL80211_MFP_OPTIONAL + * value in %NL80211_ATTR_USE_MFP. + * @NL80211_EXT_FEATURE_LOW_SPAN_SCAN: Driver supports low span scan. + * @NL80211_EXT_FEATURE_LOW_POWER_SCAN: Driver supports low power scan. + * @NL80211_EXT_FEATURE_HIGH_ACCURACY_SCAN: Driver supports high accuracy scan. + * @NL80211_EXT_FEATURE_DFS_OFFLOAD: HW/driver will offload DFS actions. + * Device or driver will do all DFS-related actions by itself, + * informing user-space about CAC progress, radar detection event, + * channel change triggered by radar detection event. + * No need to start CAC from user-space, no need to react to + * "radar detected" event. + * @NL80211_EXT_FEATURE_CONTROL_PORT_OVER_NL80211: Driver supports sending and + * receiving control port frames over nl80211 instead of the netdevice. + * @NL80211_EXT_FEATURE_ACK_SIGNAL_SUPPORT: This driver/device supports + * (average) ACK signal strength reporting. + * @NL80211_EXT_FEATURE_TXQS: Driver supports FQ-CoDel-enabled intermediate + * TXQs. + * @NL80211_EXT_FEATURE_SCAN_RANDOM_SN: Driver/device supports randomizing the + * SN in probe request frames if requested by %NL80211_SCAN_FLAG_RANDOM_SN. + * @NL80211_EXT_FEATURE_SCAN_MIN_PREQ_CONTENT: Driver/device can omit all data + * except for supported rates from the probe request content if requested + * by the %NL80211_SCAN_FLAG_MIN_PREQ_CONTENT flag. + * @NL80211_EXT_FEATURE_ENABLE_FTM_RESPONDER: Driver supports enabling fine + * timing measurement responder role. + * + * @NL80211_EXT_FEATURE_CAN_REPLACE_PTK0: Driver/device confirm that they are + * able to rekey an in-use key correctly. Userspace must not rekey PTK keys + * if this flag is not set. Ignoring this can leak clear text packets and/or + * freeze the connection. + * @NL80211_EXT_FEATURE_EXT_KEY_ID: Driver supports "Extended Key ID for + * Individually Addressed Frames" from IEEE802.11-2016. + * + * @NL80211_EXT_FEATURE_AIRTIME_FAIRNESS: Driver supports getting airtime + * fairness for transmitted packets and has enabled airtime fairness + * scheduling. + * + * @NL80211_EXT_FEATURE_AP_PMKSA_CACHING: Driver/device supports PMKSA caching + * (set/del PMKSA operations) in AP mode. + * + * @NL80211_EXT_FEATURE_SCHED_SCAN_BAND_SPECIFIC_RSSI_THOLD: Driver supports + * filtering of sched scan results using band specific RSSI thresholds. + * + * @NL80211_EXT_FEATURE_STA_TX_PWR: This driver supports controlling tx power + * to a station. + * + * @NL80211_EXT_FEATURE_SAE_OFFLOAD: Device wants to do SAE authentication in + * station mode (SAE password is passed as part of the connect command). + * + * @NL80211_EXT_FEATURE_VLAN_OFFLOAD: The driver supports a single netdev + * with VLAN tagged frames and separate VLAN-specific netdevs added using + * vconfig similarly to the Ethernet case. + * + * @NL80211_EXT_FEATURE_AQL: The driver supports the Airtime Queue Limit (AQL) + * feature, which prevents bufferbloat by using the expected transmission + * time to limit the amount of data buffered in the hardware. + * + * @NL80211_EXT_FEATURE_BEACON_PROTECTION: The driver supports Beacon protection + * and can receive key configuration for BIGTK using key indexes 6 and 7. + * @NL80211_EXT_FEATURE_BEACON_PROTECTION_CLIENT: The driver supports Beacon + * protection as a client only and cannot transmit protected beacons. + * + * @NL80211_EXT_FEATURE_CONTROL_PORT_NO_PREAUTH: The driver can disable the + * forwarding of preauth frames over the control port. They are then + * handled as ordinary data frames. + * + * @NL80211_EXT_FEATURE_PROTECTED_TWT: Driver supports protected TWT frames + * + * @NL80211_EXT_FEATURE_DEL_IBSS_STA: The driver supports removing stations + * in IBSS mode, essentially by dropping their state. + * + * @NL80211_EXT_FEATURE_MULTICAST_REGISTRATIONS: management frame registrations + * are possible for multicast frames and those will be reported properly. + * + * @NL80211_EXT_FEATURE_SCAN_FREQ_KHZ: This driver supports receiving and + * reporting scan request with %NL80211_ATTR_SCAN_FREQ_KHZ. In order to + * report %NL80211_ATTR_SCAN_FREQ_KHZ, %NL80211_SCAN_FLAG_FREQ_KHZ must be + * included in the scan request. + * + * @NL80211_EXT_FEATURE_CONTROL_PORT_OVER_NL80211_TX_STATUS: The driver + * can report tx status for control port over nl80211 tx operations. + * + * @NL80211_EXT_FEATURE_OPERATING_CHANNEL_VALIDATION: Driver supports Operating + * Channel Validation (OCV) when using driver's SME for RSNA handshakes. + * + * @NL80211_EXT_FEATURE_4WAY_HANDSHAKE_AP_PSK: Device wants to do 4-way + * handshake with PSK in AP mode (PSK is passed as part of the start AP + * command). + * + * @NL80211_EXT_FEATURE_SAE_OFFLOAD_AP: Device wants to do SAE authentication + * in AP mode (SAE password is passed as part of the start AP command). + * + * @NL80211_EXT_FEATURE_FILS_DISCOVERY: Driver/device supports FILS discovery + * frames transmission + * + * @NL80211_EXT_FEATURE_UNSOL_BCAST_PROBE_RESP: Driver/device supports + * unsolicited broadcast probe response transmission + * + * @NL80211_EXT_FEATURE_BEACON_RATE_HE: Driver supports beacon rate + * configuration (AP/mesh) with HE rates. + * + * @NL80211_EXT_FEATURE_SECURE_LTF: Device supports secure LTF measurement + * exchange protocol. + * + * @NL80211_EXT_FEATURE_SECURE_RTT: Device supports secure RTT measurement + * exchange protocol. + * + * @NL80211_EXT_FEATURE_PROT_RANGE_NEGO_AND_MEASURE: Device supports management + * frame protection for all management frames exchanged during the + * negotiation and range measurement procedure. + * + * @NL80211_EXT_FEATURE_BSS_COLOR: The driver supports BSS color collision + * detection and change announcemnts. + * + * @NL80211_EXT_FEATURE_FILS_CRYPTO_OFFLOAD: Driver running in AP mode supports + * FILS encryption and decryption for (Re)Association Request and Response + * frames. Userspace has to share FILS AAD details to the driver by using + * @NL80211_CMD_SET_FILS_AAD. + * + * @NL80211_EXT_FEATURE_RADAR_BACKGROUND: Device supports background radar/CAC + * detection. + * + * @NL80211_EXT_FEATURE_POWERED_ADDR_CHANGE: Device can perform a MAC address + * change without having to bring the underlying network device down + * first. For example, in station mode this can be used to vary the + * origin MAC address prior to a connection to a new AP for privacy + * or other reasons. Note that certain driver specific restrictions + * might apply, e.g. no scans in progress, no offchannel operations + * in progress, and no active connections. + * + * @NUM_NL80211_EXT_FEATURES: number of extended features. + * @MAX_NL80211_EXT_FEATURES: highest extended feature index. + */ +enum nl80211_ext_feature_index { + NL80211_EXT_FEATURE_VHT_IBSS, + NL80211_EXT_FEATURE_RRM, + NL80211_EXT_FEATURE_MU_MIMO_AIR_SNIFFER, + NL80211_EXT_FEATURE_SCAN_START_TIME, + NL80211_EXT_FEATURE_BSS_PARENT_TSF, + NL80211_EXT_FEATURE_SET_SCAN_DWELL, + NL80211_EXT_FEATURE_BEACON_RATE_LEGACY, + NL80211_EXT_FEATURE_BEACON_RATE_HT, + NL80211_EXT_FEATURE_BEACON_RATE_VHT, + NL80211_EXT_FEATURE_FILS_STA, + NL80211_EXT_FEATURE_MGMT_TX_RANDOM_TA, + NL80211_EXT_FEATURE_MGMT_TX_RANDOM_TA_CONNECTED, + NL80211_EXT_FEATURE_SCHED_SCAN_RELATIVE_RSSI, + NL80211_EXT_FEATURE_CQM_RSSI_LIST, + NL80211_EXT_FEATURE_FILS_SK_OFFLOAD, + NL80211_EXT_FEATURE_4WAY_HANDSHAKE_STA_PSK, + NL80211_EXT_FEATURE_4WAY_HANDSHAKE_STA_1X, + NL80211_EXT_FEATURE_FILS_MAX_CHANNEL_TIME, + NL80211_EXT_FEATURE_ACCEPT_BCAST_PROBE_RESP, + NL80211_EXT_FEATURE_OCE_PROBE_REQ_HIGH_TX_RATE, + NL80211_EXT_FEATURE_OCE_PROBE_REQ_DEFERRAL_SUPPRESSION, + NL80211_EXT_FEATURE_MFP_OPTIONAL, + NL80211_EXT_FEATURE_LOW_SPAN_SCAN, + NL80211_EXT_FEATURE_LOW_POWER_SCAN, + NL80211_EXT_FEATURE_HIGH_ACCURACY_SCAN, + NL80211_EXT_FEATURE_DFS_OFFLOAD, + NL80211_EXT_FEATURE_CONTROL_PORT_OVER_NL80211, + NL80211_EXT_FEATURE_ACK_SIGNAL_SUPPORT, + /* we renamed this - stay compatible */ + NL80211_EXT_FEATURE_DATA_ACK_SIGNAL_SUPPORT = NL80211_EXT_FEATURE_ACK_SIGNAL_SUPPORT, + NL80211_EXT_FEATURE_TXQS, + NL80211_EXT_FEATURE_SCAN_RANDOM_SN, + NL80211_EXT_FEATURE_SCAN_MIN_PREQ_CONTENT, + NL80211_EXT_FEATURE_CAN_REPLACE_PTK0, + NL80211_EXT_FEATURE_ENABLE_FTM_RESPONDER, + NL80211_EXT_FEATURE_AIRTIME_FAIRNESS, + NL80211_EXT_FEATURE_AP_PMKSA_CACHING, + NL80211_EXT_FEATURE_SCHED_SCAN_BAND_SPECIFIC_RSSI_THOLD, + NL80211_EXT_FEATURE_EXT_KEY_ID, + NL80211_EXT_FEATURE_STA_TX_PWR, + NL80211_EXT_FEATURE_SAE_OFFLOAD, + NL80211_EXT_FEATURE_VLAN_OFFLOAD, + NL80211_EXT_FEATURE_AQL, + NL80211_EXT_FEATURE_BEACON_PROTECTION, + NL80211_EXT_FEATURE_CONTROL_PORT_NO_PREAUTH, + NL80211_EXT_FEATURE_PROTECTED_TWT, + NL80211_EXT_FEATURE_DEL_IBSS_STA, + NL80211_EXT_FEATURE_MULTICAST_REGISTRATIONS, + NL80211_EXT_FEATURE_BEACON_PROTECTION_CLIENT, + NL80211_EXT_FEATURE_SCAN_FREQ_KHZ, + NL80211_EXT_FEATURE_CONTROL_PORT_OVER_NL80211_TX_STATUS, + NL80211_EXT_FEATURE_OPERATING_CHANNEL_VALIDATION, + NL80211_EXT_FEATURE_4WAY_HANDSHAKE_AP_PSK, + NL80211_EXT_FEATURE_SAE_OFFLOAD_AP, + NL80211_EXT_FEATURE_FILS_DISCOVERY, + NL80211_EXT_FEATURE_UNSOL_BCAST_PROBE_RESP, + NL80211_EXT_FEATURE_BEACON_RATE_HE, + NL80211_EXT_FEATURE_SECURE_LTF, + NL80211_EXT_FEATURE_SECURE_RTT, + NL80211_EXT_FEATURE_PROT_RANGE_NEGO_AND_MEASURE, + NL80211_EXT_FEATURE_BSS_COLOR, + NL80211_EXT_FEATURE_FILS_CRYPTO_OFFLOAD, + NL80211_EXT_FEATURE_RADAR_BACKGROUND, + NL80211_EXT_FEATURE_POWERED_ADDR_CHANGE, + + /* add new features before the definition below */ + NUM_NL80211_EXT_FEATURES, + MAX_NL80211_EXT_FEATURES = NUM_NL80211_EXT_FEATURES - 1 +}; + +/** + * enum nl80211_probe_resp_offload_support_attr - optional supported + * protocols for probe-response offloading by the driver/FW. + * To be used with the %NL80211_ATTR_PROBE_RESP_OFFLOAD attribute. + * Each enum value represents a bit in the bitmap of supported + * protocols. Typically a subset of probe-requests belonging to a + * supported protocol will be excluded from offload and uploaded + * to the host. + * + * @NL80211_PROBE_RESP_OFFLOAD_SUPPORT_WPS: Support for WPS ver. 1 + * @NL80211_PROBE_RESP_OFFLOAD_SUPPORT_WPS2: Support for WPS ver. 2 + * @NL80211_PROBE_RESP_OFFLOAD_SUPPORT_P2P: Support for P2P + * @NL80211_PROBE_RESP_OFFLOAD_SUPPORT_80211U: Support for 802.11u + */ +enum nl80211_probe_resp_offload_support_attr { + NL80211_PROBE_RESP_OFFLOAD_SUPPORT_WPS = 1<<0, + NL80211_PROBE_RESP_OFFLOAD_SUPPORT_WPS2 = 1<<1, + NL80211_PROBE_RESP_OFFLOAD_SUPPORT_P2P = 1<<2, + NL80211_PROBE_RESP_OFFLOAD_SUPPORT_80211U = 1<<3, +}; + +/** + * enum nl80211_connect_failed_reason - connection request failed reasons + * @NL80211_CONN_FAIL_MAX_CLIENTS: Maximum number of clients that can be + * handled by the AP is reached. + * @NL80211_CONN_FAIL_BLOCKED_CLIENT: Connection request is rejected due to ACL. + */ +enum nl80211_connect_failed_reason { + NL80211_CONN_FAIL_MAX_CLIENTS, + NL80211_CONN_FAIL_BLOCKED_CLIENT, +}; + +/** + * enum nl80211_timeout_reason - timeout reasons + * + * @NL80211_TIMEOUT_UNSPECIFIED: Timeout reason unspecified. + * @NL80211_TIMEOUT_SCAN: Scan (AP discovery) timed out. + * @NL80211_TIMEOUT_AUTH: Authentication timed out. + * @NL80211_TIMEOUT_ASSOC: Association timed out. + */ +enum nl80211_timeout_reason { + NL80211_TIMEOUT_UNSPECIFIED, + NL80211_TIMEOUT_SCAN, + NL80211_TIMEOUT_AUTH, + NL80211_TIMEOUT_ASSOC, +}; + +/** + * enum nl80211_scan_flags - scan request control flags + * + * Scan request control flags are used to control the handling + * of NL80211_CMD_TRIGGER_SCAN and NL80211_CMD_START_SCHED_SCAN + * requests. + * + * NL80211_SCAN_FLAG_LOW_SPAN, NL80211_SCAN_FLAG_LOW_POWER, and + * NL80211_SCAN_FLAG_HIGH_ACCURACY flags are exclusive of each other, i.e., only + * one of them can be used in the request. + * + * @NL80211_SCAN_FLAG_LOW_PRIORITY: scan request has low priority + * @NL80211_SCAN_FLAG_FLUSH: flush cache before scanning + * @NL80211_SCAN_FLAG_AP: force a scan even if the interface is configured + * as AP and the beaconing has already been configured. This attribute is + * dangerous because will destroy stations performance as a lot of frames + * will be lost while scanning off-channel, therefore it must be used only + * when really needed + * @NL80211_SCAN_FLAG_RANDOM_ADDR: use a random MAC address for this scan (or + * for scheduled scan: a different one for every scan iteration). When the + * flag is set, depending on device capabilities the @NL80211_ATTR_MAC and + * @NL80211_ATTR_MAC_MASK attributes may also be given in which case only + * the masked bits will be preserved from the MAC address and the remainder + * randomised. If the attributes are not given full randomisation (46 bits, + * locally administered 1, multicast 0) is assumed. + * This flag must not be requested when the feature isn't supported, check + * the nl80211 feature flags for the device. + * @NL80211_SCAN_FLAG_FILS_MAX_CHANNEL_TIME: fill the dwell time in the FILS + * request parameters IE in the probe request + * @NL80211_SCAN_FLAG_ACCEPT_BCAST_PROBE_RESP: accept broadcast probe responses + * @NL80211_SCAN_FLAG_OCE_PROBE_REQ_HIGH_TX_RATE: send probe request frames at + * rate of at least 5.5M. In case non OCE AP is discovered in the channel, + * only the first probe req in the channel will be sent in high rate. + * @NL80211_SCAN_FLAG_OCE_PROBE_REQ_DEFERRAL_SUPPRESSION: allow probe request + * tx deferral (dot11FILSProbeDelay shall be set to 15ms) + * and suppression (if it has received a broadcast Probe Response frame, + * Beacon frame or FILS Discovery frame from an AP that the STA considers + * a suitable candidate for (re-)association - suitable in terms of + * SSID and/or RSSI. + * @NL80211_SCAN_FLAG_LOW_SPAN: Span corresponds to the total time taken to + * accomplish the scan. Thus, this flag intends the driver to perform the + * scan request with lesser span/duration. It is specific to the driver + * implementations on how this is accomplished. Scan accuracy may get + * impacted with this flag. + * @NL80211_SCAN_FLAG_LOW_POWER: This flag intends the scan attempts to consume + * optimal possible power. Drivers can resort to their specific means to + * optimize the power. Scan accuracy may get impacted with this flag. + * @NL80211_SCAN_FLAG_HIGH_ACCURACY: Accuracy here intends to the extent of scan + * results obtained. Thus HIGH_ACCURACY scan flag aims to get maximum + * possible scan results. This flag hints the driver to use the best + * possible scan configuration to improve the accuracy in scanning. + * Latency and power use may get impacted with this flag. + * @NL80211_SCAN_FLAG_RANDOM_SN: randomize the sequence number in probe + * request frames from this scan to avoid correlation/tracking being + * possible. + * @NL80211_SCAN_FLAG_MIN_PREQ_CONTENT: minimize probe request content to + * only have supported rates and no additional capabilities (unless + * added by userspace explicitly.) + * @NL80211_SCAN_FLAG_FREQ_KHZ: report scan results with + * %NL80211_ATTR_SCAN_FREQ_KHZ. This also means + * %NL80211_ATTR_SCAN_FREQUENCIES will not be included. + * @NL80211_SCAN_FLAG_COLOCATED_6GHZ: scan for colocated APs reported by + * 2.4/5 GHz APs + */ +enum nl80211_scan_flags { + NL80211_SCAN_FLAG_LOW_PRIORITY = 1<<0, + NL80211_SCAN_FLAG_FLUSH = 1<<1, + NL80211_SCAN_FLAG_AP = 1<<2, + NL80211_SCAN_FLAG_RANDOM_ADDR = 1<<3, + NL80211_SCAN_FLAG_FILS_MAX_CHANNEL_TIME = 1<<4, + NL80211_SCAN_FLAG_ACCEPT_BCAST_PROBE_RESP = 1<<5, + NL80211_SCAN_FLAG_OCE_PROBE_REQ_HIGH_TX_RATE = 1<<6, + NL80211_SCAN_FLAG_OCE_PROBE_REQ_DEFERRAL_SUPPRESSION = 1<<7, + NL80211_SCAN_FLAG_LOW_SPAN = 1<<8, + NL80211_SCAN_FLAG_LOW_POWER = 1<<9, + NL80211_SCAN_FLAG_HIGH_ACCURACY = 1<<10, + NL80211_SCAN_FLAG_RANDOM_SN = 1<<11, + NL80211_SCAN_FLAG_MIN_PREQ_CONTENT = 1<<12, + NL80211_SCAN_FLAG_FREQ_KHZ = 1<<13, + NL80211_SCAN_FLAG_COLOCATED_6GHZ = 1<<14, +}; + +/** + * enum nl80211_acl_policy - access control policy + * + * Access control policy is applied on a MAC list set by + * %NL80211_CMD_START_AP and %NL80211_CMD_SET_MAC_ACL, to + * be used with %NL80211_ATTR_ACL_POLICY. + * + * @NL80211_ACL_POLICY_ACCEPT_UNLESS_LISTED: Deny stations which are + * listed in ACL, i.e. allow all the stations which are not listed + * in ACL to authenticate. + * @NL80211_ACL_POLICY_DENY_UNLESS_LISTED: Allow the stations which are listed + * in ACL, i.e. deny all the stations which are not listed in ACL. + */ +enum nl80211_acl_policy { + NL80211_ACL_POLICY_ACCEPT_UNLESS_LISTED, + NL80211_ACL_POLICY_DENY_UNLESS_LISTED, +}; + +/** + * enum nl80211_smps_mode - SMPS mode + * + * Requested SMPS mode (for AP mode) + * + * @NL80211_SMPS_OFF: SMPS off (use all antennas). + * @NL80211_SMPS_STATIC: static SMPS (use a single antenna) + * @NL80211_SMPS_DYNAMIC: dynamic smps (start with a single antenna and + * turn on other antennas after CTS/RTS). + */ +enum nl80211_smps_mode { + NL80211_SMPS_OFF, + NL80211_SMPS_STATIC, + NL80211_SMPS_DYNAMIC, + + __NL80211_SMPS_AFTER_LAST, + NL80211_SMPS_MAX = __NL80211_SMPS_AFTER_LAST - 1 +}; + +/** + * enum nl80211_radar_event - type of radar event for DFS operation + * + * Type of event to be used with NL80211_ATTR_RADAR_EVENT to inform userspace + * about detected radars or success of the channel available check (CAC) + * + * @NL80211_RADAR_DETECTED: A radar pattern has been detected. The channel is + * now unusable. + * @NL80211_RADAR_CAC_FINISHED: Channel Availability Check has been finished, + * the channel is now available. + * @NL80211_RADAR_CAC_ABORTED: Channel Availability Check has been aborted, no + * change to the channel status. + * @NL80211_RADAR_NOP_FINISHED: The Non-Occupancy Period for this channel is + * over, channel becomes usable. + * @NL80211_RADAR_PRE_CAC_EXPIRED: Channel Availability Check done on this + * non-operating channel is expired and no longer valid. New CAC must + * be done on this channel before starting the operation. This is not + * applicable for ETSI dfs domain where pre-CAC is valid for ever. + * @NL80211_RADAR_CAC_STARTED: Channel Availability Check has been started, + * should be generated by HW if NL80211_EXT_FEATURE_DFS_OFFLOAD is enabled. + */ +enum nl80211_radar_event { + NL80211_RADAR_DETECTED, + NL80211_RADAR_CAC_FINISHED, + NL80211_RADAR_CAC_ABORTED, + NL80211_RADAR_NOP_FINISHED, + NL80211_RADAR_PRE_CAC_EXPIRED, + NL80211_RADAR_CAC_STARTED, +}; + +/** + * enum nl80211_dfs_state - DFS states for channels + * + * Channel states used by the DFS code. + * + * @NL80211_DFS_USABLE: The channel can be used, but channel availability + * check (CAC) must be performed before using it for AP or IBSS. + * @NL80211_DFS_UNAVAILABLE: A radar has been detected on this channel, it + * is therefore marked as not available. + * @NL80211_DFS_AVAILABLE: The channel has been CAC checked and is available. + */ +enum nl80211_dfs_state { + NL80211_DFS_USABLE, + NL80211_DFS_UNAVAILABLE, + NL80211_DFS_AVAILABLE, +}; + +/** + * enum nl80211_protocol_features - nl80211 protocol features + * @NL80211_PROTOCOL_FEATURE_SPLIT_WIPHY_DUMP: nl80211 supports splitting + * wiphy dumps (if requested by the application with the attribute + * %NL80211_ATTR_SPLIT_WIPHY_DUMP. Also supported is filtering the + * wiphy dump by %NL80211_ATTR_WIPHY, %NL80211_ATTR_IFINDEX or + * %NL80211_ATTR_WDEV. + */ +enum nl80211_protocol_features { + NL80211_PROTOCOL_FEATURE_SPLIT_WIPHY_DUMP = 1 << 0, +}; + +/** + * enum nl80211_crit_proto_id - nl80211 critical protocol identifiers + * + * @NL80211_CRIT_PROTO_UNSPEC: protocol unspecified. + * @NL80211_CRIT_PROTO_DHCP: BOOTP or DHCPv6 protocol. + * @NL80211_CRIT_PROTO_EAPOL: EAPOL protocol. + * @NL80211_CRIT_PROTO_APIPA: APIPA protocol. + * @NUM_NL80211_CRIT_PROTO: must be kept last. + */ +enum nl80211_crit_proto_id { + NL80211_CRIT_PROTO_UNSPEC, + NL80211_CRIT_PROTO_DHCP, + NL80211_CRIT_PROTO_EAPOL, + NL80211_CRIT_PROTO_APIPA, + /* add other protocols before this one */ + NUM_NL80211_CRIT_PROTO +}; + +/* maximum duration for critical protocol measures */ +#define NL80211_CRIT_PROTO_MAX_DURATION 5000 /* msec */ + +/** + * enum nl80211_rxmgmt_flags - flags for received management frame. + * + * Used by cfg80211_rx_mgmt() + * + * @NL80211_RXMGMT_FLAG_ANSWERED: frame was answered by device/driver. + * @NL80211_RXMGMT_FLAG_EXTERNAL_AUTH: Host driver intends to offload + * the authentication. Exclusively defined for host drivers that + * advertises the SME functionality but would like the userspace + * to handle certain authentication algorithms (e.g. SAE). + */ +enum nl80211_rxmgmt_flags { + NL80211_RXMGMT_FLAG_ANSWERED = 1 << 0, + NL80211_RXMGMT_FLAG_EXTERNAL_AUTH = 1 << 1, +}; + +/* + * If this flag is unset, the lower 24 bits are an OUI, if set + * a Linux nl80211 vendor ID is used (no such IDs are allocated + * yet, so that's not valid so far) + */ +#define NL80211_VENDOR_ID_IS_LINUX 0x80000000 + +/** + * struct nl80211_vendor_cmd_info - vendor command data + * @vendor_id: If the %NL80211_VENDOR_ID_IS_LINUX flag is clear, then the + * value is a 24-bit OUI; if it is set then a separately allocated ID + * may be used, but no such IDs are allocated yet. New IDs should be + * added to this file when needed. + * @subcmd: sub-command ID for the command + */ +struct nl80211_vendor_cmd_info { + __u32 vendor_id; + __u32 subcmd; +}; + +/** + * enum nl80211_tdls_peer_capability - TDLS peer flags. + * + * Used by tdls_mgmt() to determine which conditional elements need + * to be added to TDLS Setup frames. + * + * @NL80211_TDLS_PEER_HT: TDLS peer is HT capable. + * @NL80211_TDLS_PEER_VHT: TDLS peer is VHT capable. + * @NL80211_TDLS_PEER_WMM: TDLS peer is WMM capable. + * @NL80211_TDLS_PEER_HE: TDLS peer is HE capable. + */ +enum nl80211_tdls_peer_capability { + NL80211_TDLS_PEER_HT = 1<<0, + NL80211_TDLS_PEER_VHT = 1<<1, + NL80211_TDLS_PEER_WMM = 1<<2, + NL80211_TDLS_PEER_HE = 1<<3, +}; + +/** + * enum nl80211_sched_scan_plan - scanning plan for scheduled scan + * @__NL80211_SCHED_SCAN_PLAN_INVALID: attribute number 0 is reserved + * @NL80211_SCHED_SCAN_PLAN_INTERVAL: interval between scan iterations. In + * seconds (u32). + * @NL80211_SCHED_SCAN_PLAN_ITERATIONS: number of scan iterations in this + * scan plan (u32). The last scan plan must not specify this attribute + * because it will run infinitely. A value of zero is invalid as it will + * make the scan plan meaningless. + * @NL80211_SCHED_SCAN_PLAN_MAX: highest scheduled scan plan attribute number + * currently defined + * @__NL80211_SCHED_SCAN_PLAN_AFTER_LAST: internal use + */ +enum nl80211_sched_scan_plan { + __NL80211_SCHED_SCAN_PLAN_INVALID, + NL80211_SCHED_SCAN_PLAN_INTERVAL, + NL80211_SCHED_SCAN_PLAN_ITERATIONS, + + /* keep last */ + __NL80211_SCHED_SCAN_PLAN_AFTER_LAST, + NL80211_SCHED_SCAN_PLAN_MAX = + __NL80211_SCHED_SCAN_PLAN_AFTER_LAST - 1 +}; + +/** + * struct nl80211_bss_select_rssi_adjust - RSSI adjustment parameters. + * + * @band: band of BSS that must match for RSSI value adjustment. The value + * of this field is according to &enum nl80211_band. + * @delta: value used to adjust the RSSI value of matching BSS in dB. + */ +struct nl80211_bss_select_rssi_adjust { + __u8 band; + __s8 delta; +} __attribute__((packed)); + +/** + * enum nl80211_bss_select_attr - attributes for bss selection. + * + * @__NL80211_BSS_SELECT_ATTR_INVALID: reserved. + * @NL80211_BSS_SELECT_ATTR_RSSI: Flag indicating only RSSI-based BSS selection + * is requested. + * @NL80211_BSS_SELECT_ATTR_BAND_PREF: attribute indicating BSS + * selection should be done such that the specified band is preferred. + * When there are multiple BSS-es in the preferred band, the driver + * shall use RSSI-based BSS selection as a second step. The value of + * this attribute is according to &enum nl80211_band (u32). + * @NL80211_BSS_SELECT_ATTR_RSSI_ADJUST: When present the RSSI level for + * BSS-es in the specified band is to be adjusted before doing + * RSSI-based BSS selection. The attribute value is a packed structure + * value as specified by &struct nl80211_bss_select_rssi_adjust. + * @NL80211_BSS_SELECT_ATTR_MAX: highest bss select attribute number. + * @__NL80211_BSS_SELECT_ATTR_AFTER_LAST: internal use. + * + * One and only one of these attributes are found within %NL80211_ATTR_BSS_SELECT + * for %NL80211_CMD_CONNECT. It specifies the required BSS selection behaviour + * which the driver shall use. + */ +enum nl80211_bss_select_attr { + __NL80211_BSS_SELECT_ATTR_INVALID, + NL80211_BSS_SELECT_ATTR_RSSI, + NL80211_BSS_SELECT_ATTR_BAND_PREF, + NL80211_BSS_SELECT_ATTR_RSSI_ADJUST, + + /* keep last */ + __NL80211_BSS_SELECT_ATTR_AFTER_LAST, + NL80211_BSS_SELECT_ATTR_MAX = __NL80211_BSS_SELECT_ATTR_AFTER_LAST - 1 +}; + +/** + * enum nl80211_nan_function_type - NAN function type + * + * Defines the function type of a NAN function + * + * @NL80211_NAN_FUNC_PUBLISH: function is publish + * @NL80211_NAN_FUNC_SUBSCRIBE: function is subscribe + * @NL80211_NAN_FUNC_FOLLOW_UP: function is follow-up + */ +enum nl80211_nan_function_type { + NL80211_NAN_FUNC_PUBLISH, + NL80211_NAN_FUNC_SUBSCRIBE, + NL80211_NAN_FUNC_FOLLOW_UP, + + /* keep last */ + __NL80211_NAN_FUNC_TYPE_AFTER_LAST, + NL80211_NAN_FUNC_MAX_TYPE = __NL80211_NAN_FUNC_TYPE_AFTER_LAST - 1, +}; + +/** + * enum nl80211_nan_publish_type - NAN publish tx type + * + * Defines how to send publish Service Discovery Frames + * + * @NL80211_NAN_SOLICITED_PUBLISH: publish function is solicited + * @NL80211_NAN_UNSOLICITED_PUBLISH: publish function is unsolicited + */ +enum nl80211_nan_publish_type { + NL80211_NAN_SOLICITED_PUBLISH = 1 << 0, + NL80211_NAN_UNSOLICITED_PUBLISH = 1 << 1, +}; + +/** + * enum nl80211_nan_func_term_reason - NAN functions termination reason + * + * Defines termination reasons of a NAN function + * + * @NL80211_NAN_FUNC_TERM_REASON_USER_REQUEST: requested by user + * @NL80211_NAN_FUNC_TERM_REASON_TTL_EXPIRED: timeout + * @NL80211_NAN_FUNC_TERM_REASON_ERROR: errored + */ +enum nl80211_nan_func_term_reason { + NL80211_NAN_FUNC_TERM_REASON_USER_REQUEST, + NL80211_NAN_FUNC_TERM_REASON_TTL_EXPIRED, + NL80211_NAN_FUNC_TERM_REASON_ERROR, +}; + +#define NL80211_NAN_FUNC_SERVICE_ID_LEN 6 +#define NL80211_NAN_FUNC_SERVICE_SPEC_INFO_MAX_LEN 0xff +#define NL80211_NAN_FUNC_SRF_MAX_LEN 0xff + +/** + * enum nl80211_nan_func_attributes - NAN function attributes + * @__NL80211_NAN_FUNC_INVALID: invalid + * @NL80211_NAN_FUNC_TYPE: &enum nl80211_nan_function_type (u8). + * @NL80211_NAN_FUNC_SERVICE_ID: 6 bytes of the service ID hash as + * specified in NAN spec. This is a binary attribute. + * @NL80211_NAN_FUNC_PUBLISH_TYPE: relevant if the function's type is + * publish. Defines the transmission type for the publish Service Discovery + * Frame, see &enum nl80211_nan_publish_type. Its type is u8. + * @NL80211_NAN_FUNC_PUBLISH_BCAST: relevant if the function is a solicited + * publish. Should the solicited publish Service Discovery Frame be sent to + * the NAN Broadcast address. This is a flag. + * @NL80211_NAN_FUNC_SUBSCRIBE_ACTIVE: relevant if the function's type is + * subscribe. Is the subscribe active. This is a flag. + * @NL80211_NAN_FUNC_FOLLOW_UP_ID: relevant if the function's type is follow up. + * The instance ID for the follow up Service Discovery Frame. This is u8. + * @NL80211_NAN_FUNC_FOLLOW_UP_REQ_ID: relevant if the function's type + * is follow up. This is a u8. + * The requestor instance ID for the follow up Service Discovery Frame. + * @NL80211_NAN_FUNC_FOLLOW_UP_DEST: the MAC address of the recipient of the + * follow up Service Discovery Frame. This is a binary attribute. + * @NL80211_NAN_FUNC_CLOSE_RANGE: is this function limited for devices in a + * close range. The range itself (RSSI) is defined by the device. + * This is a flag. + * @NL80211_NAN_FUNC_TTL: strictly positive number of DWs this function should + * stay active. If not present infinite TTL is assumed. This is a u32. + * @NL80211_NAN_FUNC_SERVICE_INFO: array of bytes describing the service + * specific info. This is a binary attribute. + * @NL80211_NAN_FUNC_SRF: Service Receive Filter. This is a nested attribute. + * See &enum nl80211_nan_srf_attributes. + * @NL80211_NAN_FUNC_RX_MATCH_FILTER: Receive Matching filter. This is a nested + * attribute. It is a list of binary values. + * @NL80211_NAN_FUNC_TX_MATCH_FILTER: Transmit Matching filter. This is a + * nested attribute. It is a list of binary values. + * @NL80211_NAN_FUNC_INSTANCE_ID: The instance ID of the function. + * Its type is u8 and it cannot be 0. + * @NL80211_NAN_FUNC_TERM_REASON: NAN function termination reason. + * See &enum nl80211_nan_func_term_reason. + * + * @NUM_NL80211_NAN_FUNC_ATTR: internal + * @NL80211_NAN_FUNC_ATTR_MAX: highest NAN function attribute + */ +enum nl80211_nan_func_attributes { + __NL80211_NAN_FUNC_INVALID, + NL80211_NAN_FUNC_TYPE, + NL80211_NAN_FUNC_SERVICE_ID, + NL80211_NAN_FUNC_PUBLISH_TYPE, + NL80211_NAN_FUNC_PUBLISH_BCAST, + NL80211_NAN_FUNC_SUBSCRIBE_ACTIVE, + NL80211_NAN_FUNC_FOLLOW_UP_ID, + NL80211_NAN_FUNC_FOLLOW_UP_REQ_ID, + NL80211_NAN_FUNC_FOLLOW_UP_DEST, + NL80211_NAN_FUNC_CLOSE_RANGE, + NL80211_NAN_FUNC_TTL, + NL80211_NAN_FUNC_SERVICE_INFO, + NL80211_NAN_FUNC_SRF, + NL80211_NAN_FUNC_RX_MATCH_FILTER, + NL80211_NAN_FUNC_TX_MATCH_FILTER, + NL80211_NAN_FUNC_INSTANCE_ID, + NL80211_NAN_FUNC_TERM_REASON, + + /* keep last */ + NUM_NL80211_NAN_FUNC_ATTR, + NL80211_NAN_FUNC_ATTR_MAX = NUM_NL80211_NAN_FUNC_ATTR - 1 +}; + +/** + * enum nl80211_nan_srf_attributes - NAN Service Response filter attributes + * @__NL80211_NAN_SRF_INVALID: invalid + * @NL80211_NAN_SRF_INCLUDE: present if the include bit of the SRF set. + * This is a flag. + * @NL80211_NAN_SRF_BF: Bloom Filter. Present if and only if + * %NL80211_NAN_SRF_MAC_ADDRS isn't present. This attribute is binary. + * @NL80211_NAN_SRF_BF_IDX: index of the Bloom Filter. Mandatory if + * %NL80211_NAN_SRF_BF is present. This is a u8. + * @NL80211_NAN_SRF_MAC_ADDRS: list of MAC addresses for the SRF. Present if + * and only if %NL80211_NAN_SRF_BF isn't present. This is a nested + * attribute. Each nested attribute is a MAC address. + * @NUM_NL80211_NAN_SRF_ATTR: internal + * @NL80211_NAN_SRF_ATTR_MAX: highest NAN SRF attribute + */ +enum nl80211_nan_srf_attributes { + __NL80211_NAN_SRF_INVALID, + NL80211_NAN_SRF_INCLUDE, + NL80211_NAN_SRF_BF, + NL80211_NAN_SRF_BF_IDX, + NL80211_NAN_SRF_MAC_ADDRS, + + /* keep last */ + NUM_NL80211_NAN_SRF_ATTR, + NL80211_NAN_SRF_ATTR_MAX = NUM_NL80211_NAN_SRF_ATTR - 1, +}; + +/** + * enum nl80211_nan_match_attributes - NAN match attributes + * @__NL80211_NAN_MATCH_INVALID: invalid + * @NL80211_NAN_MATCH_FUNC_LOCAL: the local function that had the + * match. This is a nested attribute. + * See &enum nl80211_nan_func_attributes. + * @NL80211_NAN_MATCH_FUNC_PEER: the peer function + * that caused the match. This is a nested attribute. + * See &enum nl80211_nan_func_attributes. + * + * @NUM_NL80211_NAN_MATCH_ATTR: internal + * @NL80211_NAN_MATCH_ATTR_MAX: highest NAN match attribute + */ +enum nl80211_nan_match_attributes { + __NL80211_NAN_MATCH_INVALID, + NL80211_NAN_MATCH_FUNC_LOCAL, + NL80211_NAN_MATCH_FUNC_PEER, + + /* keep last */ + NUM_NL80211_NAN_MATCH_ATTR, + NL80211_NAN_MATCH_ATTR_MAX = NUM_NL80211_NAN_MATCH_ATTR - 1 +}; + +/** + * nl80211_external_auth_action - Action to perform with external + * authentication request. Used by NL80211_ATTR_EXTERNAL_AUTH_ACTION. + * @NL80211_EXTERNAL_AUTH_START: Start the authentication. + * @NL80211_EXTERNAL_AUTH_ABORT: Abort the ongoing authentication. + */ +enum nl80211_external_auth_action { + NL80211_EXTERNAL_AUTH_START, + NL80211_EXTERNAL_AUTH_ABORT, +}; + +/** + * enum nl80211_ftm_responder_attributes - fine timing measurement + * responder attributes + * @__NL80211_FTM_RESP_ATTR_INVALID: Invalid + * @NL80211_FTM_RESP_ATTR_ENABLED: FTM responder is enabled + * @NL80211_FTM_RESP_ATTR_LCI: The content of Measurement Report Element + * (9.4.2.22 in 802.11-2016) with type 8 - LCI (9.4.2.22.10), + * i.e. starting with the measurement token + * @NL80211_FTM_RESP_ATTR_CIVIC: The content of Measurement Report Element + * (9.4.2.22 in 802.11-2016) with type 11 - Civic (Section 9.4.2.22.13), + * i.e. starting with the measurement token + * @__NL80211_FTM_RESP_ATTR_LAST: Internal + * @NL80211_FTM_RESP_ATTR_MAX: highest FTM responder attribute. + */ +enum nl80211_ftm_responder_attributes { + __NL80211_FTM_RESP_ATTR_INVALID, + + NL80211_FTM_RESP_ATTR_ENABLED, + NL80211_FTM_RESP_ATTR_LCI, + NL80211_FTM_RESP_ATTR_CIVICLOC, + + /* keep last */ + __NL80211_FTM_RESP_ATTR_LAST, + NL80211_FTM_RESP_ATTR_MAX = __NL80211_FTM_RESP_ATTR_LAST - 1, +}; + +/* + * enum nl80211_ftm_responder_stats - FTM responder statistics + * + * These attribute types are used with %NL80211_ATTR_FTM_RESPONDER_STATS + * when getting FTM responder statistics. + * + * @__NL80211_FTM_STATS_INVALID: attribute number 0 is reserved + * @NL80211_FTM_STATS_SUCCESS_NUM: number of FTM sessions in which all frames + * were ssfully answered (u32) + * @NL80211_FTM_STATS_PARTIAL_NUM: number of FTM sessions in which part of the + * frames were successfully answered (u32) + * @NL80211_FTM_STATS_FAILED_NUM: number of failed FTM sessions (u32) + * @NL80211_FTM_STATS_ASAP_NUM: number of ASAP sessions (u32) + * @NL80211_FTM_STATS_NON_ASAP_NUM: number of non-ASAP sessions (u32) + * @NL80211_FTM_STATS_TOTAL_DURATION_MSEC: total sessions durations - gives an + * indication of how much time the responder was busy (u64, msec) + * @NL80211_FTM_STATS_UNKNOWN_TRIGGERS_NUM: number of unknown FTM triggers - + * triggers from initiators that didn't finish successfully the negotiation + * phase with the responder (u32) + * @NL80211_FTM_STATS_RESCHEDULE_REQUESTS_NUM: number of FTM reschedule requests + * - initiator asks for a new scheduling although it already has scheduled + * FTM slot (u32) + * @NL80211_FTM_STATS_OUT_OF_WINDOW_TRIGGERS_NUM: number of FTM triggers out of + * scheduled window (u32) + * @NL80211_FTM_STATS_PAD: used for padding, ignore + * @__NL80211_TXQ_ATTR_AFTER_LAST: Internal + * @NL80211_FTM_STATS_MAX: highest possible FTM responder stats attribute + */ +enum nl80211_ftm_responder_stats { + __NL80211_FTM_STATS_INVALID, + NL80211_FTM_STATS_SUCCESS_NUM, + NL80211_FTM_STATS_PARTIAL_NUM, + NL80211_FTM_STATS_FAILED_NUM, + NL80211_FTM_STATS_ASAP_NUM, + NL80211_FTM_STATS_NON_ASAP_NUM, + NL80211_FTM_STATS_TOTAL_DURATION_MSEC, + NL80211_FTM_STATS_UNKNOWN_TRIGGERS_NUM, + NL80211_FTM_STATS_RESCHEDULE_REQUESTS_NUM, + NL80211_FTM_STATS_OUT_OF_WINDOW_TRIGGERS_NUM, + NL80211_FTM_STATS_PAD, + + /* keep last */ + __NL80211_FTM_STATS_AFTER_LAST, + NL80211_FTM_STATS_MAX = __NL80211_FTM_STATS_AFTER_LAST - 1 +}; + +/** + * enum nl80211_preamble - frame preamble types + * @NL80211_PREAMBLE_LEGACY: legacy (HR/DSSS, OFDM, ERP PHY) preamble + * @NL80211_PREAMBLE_HT: HT preamble + * @NL80211_PREAMBLE_VHT: VHT preamble + * @NL80211_PREAMBLE_DMG: DMG preamble + * @NL80211_PREAMBLE_HE: HE preamble + */ +enum nl80211_preamble { + NL80211_PREAMBLE_LEGACY, + NL80211_PREAMBLE_HT, + NL80211_PREAMBLE_VHT, + NL80211_PREAMBLE_DMG, + NL80211_PREAMBLE_HE, +}; + +/** + * enum nl80211_peer_measurement_type - peer measurement types + * @NL80211_PMSR_TYPE_INVALID: invalid/unused, needed as we use + * these numbers also for attributes + * + * @NL80211_PMSR_TYPE_FTM: flight time measurement + * + * @NUM_NL80211_PMSR_TYPES: internal + * @NL80211_PMSR_TYPE_MAX: highest type number + */ +enum nl80211_peer_measurement_type { + NL80211_PMSR_TYPE_INVALID, + + NL80211_PMSR_TYPE_FTM, + + NUM_NL80211_PMSR_TYPES, + NL80211_PMSR_TYPE_MAX = NUM_NL80211_PMSR_TYPES - 1 +}; + +/** + * enum nl80211_peer_measurement_status - peer measurement status + * @NL80211_PMSR_STATUS_SUCCESS: measurement completed successfully + * @NL80211_PMSR_STATUS_REFUSED: measurement was locally refused + * @NL80211_PMSR_STATUS_TIMEOUT: measurement timed out + * @NL80211_PMSR_STATUS_FAILURE: measurement failed, a type-dependent + * reason may be available in the response data + */ +enum nl80211_peer_measurement_status { + NL80211_PMSR_STATUS_SUCCESS, + NL80211_PMSR_STATUS_REFUSED, + NL80211_PMSR_STATUS_TIMEOUT, + NL80211_PMSR_STATUS_FAILURE, +}; + +/** + * enum nl80211_peer_measurement_req - peer measurement request attributes + * @__NL80211_PMSR_REQ_ATTR_INVALID: invalid + * + * @NL80211_PMSR_REQ_ATTR_DATA: This is a nested attribute with measurement + * type-specific request data inside. The attributes used are from the + * enums named nl80211_peer_measurement__req. + * @NL80211_PMSR_REQ_ATTR_GET_AP_TSF: include AP TSF timestamp, if supported + * (flag attribute) + * + * @NUM_NL80211_PMSR_REQ_ATTRS: internal + * @NL80211_PMSR_REQ_ATTR_MAX: highest attribute number + */ +enum nl80211_peer_measurement_req { + __NL80211_PMSR_REQ_ATTR_INVALID, + + NL80211_PMSR_REQ_ATTR_DATA, + NL80211_PMSR_REQ_ATTR_GET_AP_TSF, + + /* keep last */ + NUM_NL80211_PMSR_REQ_ATTRS, + NL80211_PMSR_REQ_ATTR_MAX = NUM_NL80211_PMSR_REQ_ATTRS - 1 +}; + +/** + * enum nl80211_peer_measurement_resp - peer measurement response attributes + * @__NL80211_PMSR_RESP_ATTR_INVALID: invalid + * + * @NL80211_PMSR_RESP_ATTR_DATA: This is a nested attribute with measurement + * type-specific results inside. The attributes used are from the enums + * named nl80211_peer_measurement__resp. + * @NL80211_PMSR_RESP_ATTR_STATUS: u32 value with the measurement status + * (using values from &enum nl80211_peer_measurement_status.) + * @NL80211_PMSR_RESP_ATTR_HOST_TIME: host time (%CLOCK_BOOTTIME) when the + * result was measured; this value is not expected to be accurate to + * more than 20ms. (u64, nanoseconds) + * @NL80211_PMSR_RESP_ATTR_AP_TSF: TSF of the AP that the interface + * doing the measurement is connected to when the result was measured. + * This shall be accurately reported if supported and requested + * (u64, usec) + * @NL80211_PMSR_RESP_ATTR_FINAL: If results are sent to the host partially + * (*e.g. with FTM per-burst data) this flag will be cleared on all but + * the last result; if all results are combined it's set on the single + * result. + * @NL80211_PMSR_RESP_ATTR_PAD: padding for 64-bit attributes, ignore + * + * @NUM_NL80211_PMSR_RESP_ATTRS: internal + * @NL80211_PMSR_RESP_ATTR_MAX: highest attribute number + */ +enum nl80211_peer_measurement_resp { + __NL80211_PMSR_RESP_ATTR_INVALID, + + NL80211_PMSR_RESP_ATTR_DATA, + NL80211_PMSR_RESP_ATTR_STATUS, + NL80211_PMSR_RESP_ATTR_HOST_TIME, + NL80211_PMSR_RESP_ATTR_AP_TSF, + NL80211_PMSR_RESP_ATTR_FINAL, + NL80211_PMSR_RESP_ATTR_PAD, + + /* keep last */ + NUM_NL80211_PMSR_RESP_ATTRS, + NL80211_PMSR_RESP_ATTR_MAX = NUM_NL80211_PMSR_RESP_ATTRS - 1 +}; + +/** + * enum nl80211_peer_measurement_peer_attrs - peer attributes for measurement + * @__NL80211_PMSR_PEER_ATTR_INVALID: invalid + * + * @NL80211_PMSR_PEER_ATTR_ADDR: peer's MAC address + * @NL80211_PMSR_PEER_ATTR_CHAN: channel definition, nested, using top-level + * attributes like %NL80211_ATTR_WIPHY_FREQ etc. + * @NL80211_PMSR_PEER_ATTR_REQ: This is a nested attribute indexed by + * measurement type, with attributes from the + * &enum nl80211_peer_measurement_req inside. + * @NL80211_PMSR_PEER_ATTR_RESP: This is a nested attribute indexed by + * measurement type, with attributes from the + * &enum nl80211_peer_measurement_resp inside. + * + * @NUM_NL80211_PMSR_PEER_ATTRS: internal + * @NL80211_PMSR_PEER_ATTR_MAX: highest attribute number + */ +enum nl80211_peer_measurement_peer_attrs { + __NL80211_PMSR_PEER_ATTR_INVALID, + + NL80211_PMSR_PEER_ATTR_ADDR, + NL80211_PMSR_PEER_ATTR_CHAN, + NL80211_PMSR_PEER_ATTR_REQ, + NL80211_PMSR_PEER_ATTR_RESP, + + /* keep last */ + NUM_NL80211_PMSR_PEER_ATTRS, + NL80211_PMSR_PEER_ATTR_MAX = NUM_NL80211_PMSR_PEER_ATTRS - 1, +}; + +/** + * enum nl80211_peer_measurement_attrs - peer measurement attributes + * @__NL80211_PMSR_ATTR_INVALID: invalid + * + * @NL80211_PMSR_ATTR_MAX_PEERS: u32 attribute used for capability + * advertisement only, indicates the maximum number of peers + * measurements can be done with in a single request + * @NL80211_PMSR_ATTR_REPORT_AP_TSF: flag attribute in capability + * indicating that the connected AP's TSF can be reported in + * measurement results + * @NL80211_PMSR_ATTR_RANDOMIZE_MAC_ADDR: flag attribute in capability + * indicating that MAC address randomization is supported. + * @NL80211_PMSR_ATTR_TYPE_CAPA: capabilities reported by the device, + * this contains a nesting indexed by measurement type, and + * type-specific capabilities inside, which are from the enums + * named nl80211_peer_measurement__capa. + * @NL80211_PMSR_ATTR_PEERS: nested attribute, the nesting index is + * meaningless, just a list of peers to measure with, with the + * sub-attributes taken from + * &enum nl80211_peer_measurement_peer_attrs. + * + * @NUM_NL80211_PMSR_ATTR: internal + * @NL80211_PMSR_ATTR_MAX: highest attribute number + */ +enum nl80211_peer_measurement_attrs { + __NL80211_PMSR_ATTR_INVALID, + + NL80211_PMSR_ATTR_MAX_PEERS, + NL80211_PMSR_ATTR_REPORT_AP_TSF, + NL80211_PMSR_ATTR_RANDOMIZE_MAC_ADDR, + NL80211_PMSR_ATTR_TYPE_CAPA, + NL80211_PMSR_ATTR_PEERS, + + /* keep last */ + NUM_NL80211_PMSR_ATTR, + NL80211_PMSR_ATTR_MAX = NUM_NL80211_PMSR_ATTR - 1 +}; + +/** + * enum nl80211_peer_measurement_ftm_capa - FTM capabilities + * @__NL80211_PMSR_FTM_CAPA_ATTR_INVALID: invalid + * + * @NL80211_PMSR_FTM_CAPA_ATTR_ASAP: flag attribute indicating ASAP mode + * is supported + * @NL80211_PMSR_FTM_CAPA_ATTR_NON_ASAP: flag attribute indicating non-ASAP + * mode is supported + * @NL80211_PMSR_FTM_CAPA_ATTR_REQ_LCI: flag attribute indicating if LCI + * data can be requested during the measurement + * @NL80211_PMSR_FTM_CAPA_ATTR_REQ_CIVICLOC: flag attribute indicating if civic + * location data can be requested during the measurement + * @NL80211_PMSR_FTM_CAPA_ATTR_PREAMBLES: u32 bitmap attribute of bits + * from &enum nl80211_preamble. + * @NL80211_PMSR_FTM_CAPA_ATTR_BANDWIDTHS: bitmap of values from + * &enum nl80211_chan_width indicating the supported channel + * bandwidths for FTM. Note that a higher channel bandwidth may be + * configured to allow for other measurements types with different + * bandwidth requirement in the same measurement. + * @NL80211_PMSR_FTM_CAPA_ATTR_MAX_BURSTS_EXPONENT: u32 attribute indicating + * the maximum bursts exponent that can be used (if not present anything + * is valid) + * @NL80211_PMSR_FTM_CAPA_ATTR_MAX_FTMS_PER_BURST: u32 attribute indicating + * the maximum FTMs per burst (if not present anything is valid) + * @NL80211_PMSR_FTM_CAPA_ATTR_TRIGGER_BASED: flag attribute indicating if + * trigger based ranging measurement is supported + * @NL80211_PMSR_FTM_CAPA_ATTR_NON_TRIGGER_BASED: flag attribute indicating + * if non trigger based ranging measurement is supported + * + * @NUM_NL80211_PMSR_FTM_CAPA_ATTR: internal + * @NL80211_PMSR_FTM_CAPA_ATTR_MAX: highest attribute number + */ +enum nl80211_peer_measurement_ftm_capa { + __NL80211_PMSR_FTM_CAPA_ATTR_INVALID, + + NL80211_PMSR_FTM_CAPA_ATTR_ASAP, + NL80211_PMSR_FTM_CAPA_ATTR_NON_ASAP, + NL80211_PMSR_FTM_CAPA_ATTR_REQ_LCI, + NL80211_PMSR_FTM_CAPA_ATTR_REQ_CIVICLOC, + NL80211_PMSR_FTM_CAPA_ATTR_PREAMBLES, + NL80211_PMSR_FTM_CAPA_ATTR_BANDWIDTHS, + NL80211_PMSR_FTM_CAPA_ATTR_MAX_BURSTS_EXPONENT, + NL80211_PMSR_FTM_CAPA_ATTR_MAX_FTMS_PER_BURST, + NL80211_PMSR_FTM_CAPA_ATTR_TRIGGER_BASED, + NL80211_PMSR_FTM_CAPA_ATTR_NON_TRIGGER_BASED, + + /* keep last */ + NUM_NL80211_PMSR_FTM_CAPA_ATTR, + NL80211_PMSR_FTM_CAPA_ATTR_MAX = NUM_NL80211_PMSR_FTM_CAPA_ATTR - 1 +}; + +/** + * enum nl80211_peer_measurement_ftm_req - FTM request attributes + * @__NL80211_PMSR_FTM_REQ_ATTR_INVALID: invalid + * + * @NL80211_PMSR_FTM_REQ_ATTR_ASAP: ASAP mode requested (flag) + * @NL80211_PMSR_FTM_REQ_ATTR_PREAMBLE: preamble type (see + * &enum nl80211_preamble), optional for DMG (u32) + * @NL80211_PMSR_FTM_REQ_ATTR_NUM_BURSTS_EXP: number of bursts exponent as in + * 802.11-2016 9.4.2.168 "Fine Timing Measurement Parameters element" + * (u8, 0-15, optional with default 15 i.e. "no preference") + * @NL80211_PMSR_FTM_REQ_ATTR_BURST_PERIOD: interval between bursts in units + * of 100ms (u16, optional with default 0) + * @NL80211_PMSR_FTM_REQ_ATTR_BURST_DURATION: burst duration, as in 802.11-2016 + * Table 9-257 "Burst Duration field encoding" (u8, 0-15, optional with + * default 15 i.e. "no preference") + * @NL80211_PMSR_FTM_REQ_ATTR_FTMS_PER_BURST: number of successful FTM frames + * requested per burst + * (u8, 0-31, optional with default 0 i.e. "no preference") + * @NL80211_PMSR_FTM_REQ_ATTR_NUM_FTMR_RETRIES: number of FTMR frame retries + * (u8, default 3) + * @NL80211_PMSR_FTM_REQ_ATTR_REQUEST_LCI: request LCI data (flag) + * @NL80211_PMSR_FTM_REQ_ATTR_REQUEST_CIVICLOC: request civic location data + * (flag) + * @NL80211_PMSR_FTM_REQ_ATTR_TRIGGER_BASED: request trigger based ranging + * measurement (flag). + * This attribute and %NL80211_PMSR_FTM_REQ_ATTR_NON_TRIGGER_BASED are + * mutually exclusive. + * if neither %NL80211_PMSR_FTM_REQ_ATTR_TRIGGER_BASED nor + * %NL80211_PMSR_FTM_REQ_ATTR_NON_TRIGGER_BASED is set, EDCA based + * ranging will be used. + * @NL80211_PMSR_FTM_REQ_ATTR_NON_TRIGGER_BASED: request non trigger based + * ranging measurement (flag) + * This attribute and %NL80211_PMSR_FTM_REQ_ATTR_TRIGGER_BASED are + * mutually exclusive. + * if neither %NL80211_PMSR_FTM_REQ_ATTR_TRIGGER_BASED nor + * %NL80211_PMSR_FTM_REQ_ATTR_NON_TRIGGER_BASED is set, EDCA based + * ranging will be used. + * @NL80211_PMSR_FTM_REQ_ATTR_LMR_FEEDBACK: negotiate for LMR feedback. Only + * valid if either %NL80211_PMSR_FTM_REQ_ATTR_TRIGGER_BASED or + * %NL80211_PMSR_FTM_REQ_ATTR_NON_TRIGGER_BASED is set. + * @NL80211_PMSR_FTM_REQ_ATTR_BSS_COLOR: optional. The BSS color of the + * responder. Only valid if %NL80211_PMSR_FTM_REQ_ATTR_NON_TRIGGER_BASED + * or %NL80211_PMSR_FTM_REQ_ATTR_TRIGGER_BASED is set. + * + * @NUM_NL80211_PMSR_FTM_REQ_ATTR: internal + * @NL80211_PMSR_FTM_REQ_ATTR_MAX: highest attribute number + */ +enum nl80211_peer_measurement_ftm_req { + __NL80211_PMSR_FTM_REQ_ATTR_INVALID, + + NL80211_PMSR_FTM_REQ_ATTR_ASAP, + NL80211_PMSR_FTM_REQ_ATTR_PREAMBLE, + NL80211_PMSR_FTM_REQ_ATTR_NUM_BURSTS_EXP, + NL80211_PMSR_FTM_REQ_ATTR_BURST_PERIOD, + NL80211_PMSR_FTM_REQ_ATTR_BURST_DURATION, + NL80211_PMSR_FTM_REQ_ATTR_FTMS_PER_BURST, + NL80211_PMSR_FTM_REQ_ATTR_NUM_FTMR_RETRIES, + NL80211_PMSR_FTM_REQ_ATTR_REQUEST_LCI, + NL80211_PMSR_FTM_REQ_ATTR_REQUEST_CIVICLOC, + NL80211_PMSR_FTM_REQ_ATTR_TRIGGER_BASED, + NL80211_PMSR_FTM_REQ_ATTR_NON_TRIGGER_BASED, + NL80211_PMSR_FTM_REQ_ATTR_LMR_FEEDBACK, + NL80211_PMSR_FTM_REQ_ATTR_BSS_COLOR, + + /* keep last */ + NUM_NL80211_PMSR_FTM_REQ_ATTR, + NL80211_PMSR_FTM_REQ_ATTR_MAX = NUM_NL80211_PMSR_FTM_REQ_ATTR - 1 +}; + +/** + * enum nl80211_peer_measurement_ftm_failure_reasons - FTM failure reasons + * @NL80211_PMSR_FTM_FAILURE_UNSPECIFIED: unspecified failure, not used + * @NL80211_PMSR_FTM_FAILURE_NO_RESPONSE: no response from the FTM responder + * @NL80211_PMSR_FTM_FAILURE_REJECTED: FTM responder rejected measurement + * @NL80211_PMSR_FTM_FAILURE_WRONG_CHANNEL: we already know the peer is + * on a different channel, so can't measure (if we didn't know, we'd + * try and get no response) + * @NL80211_PMSR_FTM_FAILURE_PEER_NOT_CAPABLE: peer can't actually do FTM + * @NL80211_PMSR_FTM_FAILURE_INVALID_TIMESTAMP: invalid T1/T4 timestamps + * received + * @NL80211_PMSR_FTM_FAILURE_PEER_BUSY: peer reports busy, you may retry + * later (see %NL80211_PMSR_FTM_RESP_ATTR_BUSY_RETRY_TIME) + * @NL80211_PMSR_FTM_FAILURE_BAD_CHANGED_PARAMS: parameters were changed + * by the peer and are no longer supported + */ +enum nl80211_peer_measurement_ftm_failure_reasons { + NL80211_PMSR_FTM_FAILURE_UNSPECIFIED, + NL80211_PMSR_FTM_FAILURE_NO_RESPONSE, + NL80211_PMSR_FTM_FAILURE_REJECTED, + NL80211_PMSR_FTM_FAILURE_WRONG_CHANNEL, + NL80211_PMSR_FTM_FAILURE_PEER_NOT_CAPABLE, + NL80211_PMSR_FTM_FAILURE_INVALID_TIMESTAMP, + NL80211_PMSR_FTM_FAILURE_PEER_BUSY, + NL80211_PMSR_FTM_FAILURE_BAD_CHANGED_PARAMS, +}; + +/** + * enum nl80211_peer_measurement_ftm_resp - FTM response attributes + * @__NL80211_PMSR_FTM_RESP_ATTR_INVALID: invalid + * + * @NL80211_PMSR_FTM_RESP_ATTR_FAIL_REASON: FTM-specific failure reason + * (u32, optional) + * @NL80211_PMSR_FTM_RESP_ATTR_BURST_INDEX: optional, if bursts are reported + * as separate results then it will be the burst index 0...(N-1) and + * the top level will indicate partial results (u32) + * @NL80211_PMSR_FTM_RESP_ATTR_NUM_FTMR_ATTEMPTS: number of FTM Request frames + * transmitted (u32, optional) + * @NL80211_PMSR_FTM_RESP_ATTR_NUM_FTMR_SUCCESSES: number of FTM Request frames + * that were acknowleged (u32, optional) + * @NL80211_PMSR_FTM_RESP_ATTR_BUSY_RETRY_TIME: retry time received from the + * busy peer (u32, seconds) + * @NL80211_PMSR_FTM_RESP_ATTR_NUM_BURSTS_EXP: actual number of bursts exponent + * used by the responder (similar to request, u8) + * @NL80211_PMSR_FTM_RESP_ATTR_BURST_DURATION: actual burst duration used by + * the responder (similar to request, u8) + * @NL80211_PMSR_FTM_RESP_ATTR_FTMS_PER_BURST: actual FTMs per burst used + * by the responder (similar to request, u8) + * @NL80211_PMSR_FTM_RESP_ATTR_RSSI_AVG: average RSSI across all FTM action + * frames (optional, s32, 1/2 dBm) + * @NL80211_PMSR_FTM_RESP_ATTR_RSSI_SPREAD: RSSI spread across all FTM action + * frames (optional, s32, 1/2 dBm) + * @NL80211_PMSR_FTM_RESP_ATTR_TX_RATE: bitrate we used for the response to the + * FTM action frame (optional, nested, using &enum nl80211_rate_info + * attributes) + * @NL80211_PMSR_FTM_RESP_ATTR_RX_RATE: bitrate the responder used for the FTM + * action frame (optional, nested, using &enum nl80211_rate_info attrs) + * @NL80211_PMSR_FTM_RESP_ATTR_RTT_AVG: average RTT (s64, picoseconds, optional + * but one of RTT/DIST must be present) + * @NL80211_PMSR_FTM_RESP_ATTR_RTT_VARIANCE: RTT variance (u64, ps^2, note that + * standard deviation is the square root of variance, optional) + * @NL80211_PMSR_FTM_RESP_ATTR_RTT_SPREAD: RTT spread (u64, picoseconds, + * optional) + * @NL80211_PMSR_FTM_RESP_ATTR_DIST_AVG: average distance (s64, mm, optional + * but one of RTT/DIST must be present) + * @NL80211_PMSR_FTM_RESP_ATTR_DIST_VARIANCE: distance variance (u64, mm^2, note + * that standard deviation is the square root of variance, optional) + * @NL80211_PMSR_FTM_RESP_ATTR_DIST_SPREAD: distance spread (u64, mm, optional) + * @NL80211_PMSR_FTM_RESP_ATTR_LCI: LCI data from peer (binary, optional); + * this is the contents of the Measurement Report Element (802.11-2016 + * 9.4.2.22.1) starting with the Measurement Token, with Measurement + * Type 8. + * @NL80211_PMSR_FTM_RESP_ATTR_CIVICLOC: civic location data from peer + * (binary, optional); + * this is the contents of the Measurement Report Element (802.11-2016 + * 9.4.2.22.1) starting with the Measurement Token, with Measurement + * Type 11. + * @NL80211_PMSR_FTM_RESP_ATTR_PAD: ignore, for u64/s64 padding only + * + * @NUM_NL80211_PMSR_FTM_RESP_ATTR: internal + * @NL80211_PMSR_FTM_RESP_ATTR_MAX: highest attribute number + */ +enum nl80211_peer_measurement_ftm_resp { + __NL80211_PMSR_FTM_RESP_ATTR_INVALID, + + NL80211_PMSR_FTM_RESP_ATTR_FAIL_REASON, + NL80211_PMSR_FTM_RESP_ATTR_BURST_INDEX, + NL80211_PMSR_FTM_RESP_ATTR_NUM_FTMR_ATTEMPTS, + NL80211_PMSR_FTM_RESP_ATTR_NUM_FTMR_SUCCESSES, + NL80211_PMSR_FTM_RESP_ATTR_BUSY_RETRY_TIME, + NL80211_PMSR_FTM_RESP_ATTR_NUM_BURSTS_EXP, + NL80211_PMSR_FTM_RESP_ATTR_BURST_DURATION, + NL80211_PMSR_FTM_RESP_ATTR_FTMS_PER_BURST, + NL80211_PMSR_FTM_RESP_ATTR_RSSI_AVG, + NL80211_PMSR_FTM_RESP_ATTR_RSSI_SPREAD, + NL80211_PMSR_FTM_RESP_ATTR_TX_RATE, + NL80211_PMSR_FTM_RESP_ATTR_RX_RATE, + NL80211_PMSR_FTM_RESP_ATTR_RTT_AVG, + NL80211_PMSR_FTM_RESP_ATTR_RTT_VARIANCE, + NL80211_PMSR_FTM_RESP_ATTR_RTT_SPREAD, + NL80211_PMSR_FTM_RESP_ATTR_DIST_AVG, + NL80211_PMSR_FTM_RESP_ATTR_DIST_VARIANCE, + NL80211_PMSR_FTM_RESP_ATTR_DIST_SPREAD, + NL80211_PMSR_FTM_RESP_ATTR_LCI, + NL80211_PMSR_FTM_RESP_ATTR_CIVICLOC, + NL80211_PMSR_FTM_RESP_ATTR_PAD, + + /* keep last */ + NUM_NL80211_PMSR_FTM_RESP_ATTR, + NL80211_PMSR_FTM_RESP_ATTR_MAX = NUM_NL80211_PMSR_FTM_RESP_ATTR - 1 +}; + +/** + * enum nl80211_obss_pd_attributes - OBSS packet detection attributes + * @__NL80211_HE_OBSS_PD_ATTR_INVALID: Invalid + * + * @NL80211_HE_OBSS_PD_ATTR_MIN_OFFSET: the OBSS PD minimum tx power offset. + * @NL80211_HE_OBSS_PD_ATTR_MAX_OFFSET: the OBSS PD maximum tx power offset. + * @NL80211_HE_OBSS_PD_ATTR_NON_SRG_MAX_OFFSET: the non-SRG OBSS PD maximum + * tx power offset. + * @NL80211_HE_OBSS_PD_ATTR_BSS_COLOR_BITMAP: bitmap that indicates the BSS color + * values used by members of the SRG. + * @NL80211_HE_OBSS_PD_ATTR_PARTIAL_BSSID_BITMAP: bitmap that indicates the partial + * BSSID values used by members of the SRG. + * @NL80211_HE_OBSS_PD_ATTR_SR_CTRL: The SR Control field of SRP element. + * + * @__NL80211_HE_OBSS_PD_ATTR_LAST: Internal + * @NL80211_HE_OBSS_PD_ATTR_MAX: highest OBSS PD attribute. + */ +enum nl80211_obss_pd_attributes { + __NL80211_HE_OBSS_PD_ATTR_INVALID, + + NL80211_HE_OBSS_PD_ATTR_MIN_OFFSET, + NL80211_HE_OBSS_PD_ATTR_MAX_OFFSET, + NL80211_HE_OBSS_PD_ATTR_NON_SRG_MAX_OFFSET, + NL80211_HE_OBSS_PD_ATTR_BSS_COLOR_BITMAP, + NL80211_HE_OBSS_PD_ATTR_PARTIAL_BSSID_BITMAP, + NL80211_HE_OBSS_PD_ATTR_SR_CTRL, + + /* keep last */ + __NL80211_HE_OBSS_PD_ATTR_LAST, + NL80211_HE_OBSS_PD_ATTR_MAX = __NL80211_HE_OBSS_PD_ATTR_LAST - 1, +}; + +/** + * enum nl80211_bss_color_attributes - BSS Color attributes + * @__NL80211_HE_BSS_COLOR_ATTR_INVALID: Invalid + * + * @NL80211_HE_BSS_COLOR_ATTR_COLOR: the current BSS Color. + * @NL80211_HE_BSS_COLOR_ATTR_DISABLED: is BSS coloring disabled. + * @NL80211_HE_BSS_COLOR_ATTR_PARTIAL: the AID equation to be used.. + * + * @__NL80211_HE_BSS_COLOR_ATTR_LAST: Internal + * @NL80211_HE_BSS_COLOR_ATTR_MAX: highest BSS Color attribute. + */ +enum nl80211_bss_color_attributes { + __NL80211_HE_BSS_COLOR_ATTR_INVALID, + + NL80211_HE_BSS_COLOR_ATTR_COLOR, + NL80211_HE_BSS_COLOR_ATTR_DISABLED, + NL80211_HE_BSS_COLOR_ATTR_PARTIAL, + + /* keep last */ + __NL80211_HE_BSS_COLOR_ATTR_LAST, + NL80211_HE_BSS_COLOR_ATTR_MAX = __NL80211_HE_BSS_COLOR_ATTR_LAST - 1, +}; + +/** + * enum nl80211_iftype_akm_attributes - interface type AKM attributes + * @__NL80211_IFTYPE_AKM_ATTR_INVALID: Invalid + * + * @NL80211_IFTYPE_AKM_ATTR_IFTYPES: nested attribute containing a flag + * attribute for each interface type that supports AKM suites specified in + * %NL80211_IFTYPE_AKM_ATTR_SUITES + * @NL80211_IFTYPE_AKM_ATTR_SUITES: an array of u32. Used to indicate supported + * AKM suites for the specified interface types. + * + * @__NL80211_IFTYPE_AKM_ATTR_LAST: Internal + * @NL80211_IFTYPE_AKM_ATTR_MAX: highest interface type AKM attribute. + */ +enum nl80211_iftype_akm_attributes { + __NL80211_IFTYPE_AKM_ATTR_INVALID, + + NL80211_IFTYPE_AKM_ATTR_IFTYPES, + NL80211_IFTYPE_AKM_ATTR_SUITES, + + /* keep last */ + __NL80211_IFTYPE_AKM_ATTR_LAST, + NL80211_IFTYPE_AKM_ATTR_MAX = __NL80211_IFTYPE_AKM_ATTR_LAST - 1, +}; + +/** + * enum nl80211_fils_discovery_attributes - FILS discovery configuration + * from IEEE Std 802.11ai-2016, Annex C.3 MIB detail. + * + * @__NL80211_FILS_DISCOVERY_ATTR_INVALID: Invalid + * + * @NL80211_FILS_DISCOVERY_ATTR_INT_MIN: Minimum packet interval (u32, TU). + * Allowed range: 0..10000 (TU = Time Unit) + * @NL80211_FILS_DISCOVERY_ATTR_INT_MAX: Maximum packet interval (u32, TU). + * Allowed range: 0..10000 (TU = Time Unit) + * @NL80211_FILS_DISCOVERY_ATTR_TMPL: Template data for FILS discovery action + * frame including the headers. + * + * @__NL80211_FILS_DISCOVERY_ATTR_LAST: Internal + * @NL80211_FILS_DISCOVERY_ATTR_MAX: highest attribute + */ +enum nl80211_fils_discovery_attributes { + __NL80211_FILS_DISCOVERY_ATTR_INVALID, + + NL80211_FILS_DISCOVERY_ATTR_INT_MIN, + NL80211_FILS_DISCOVERY_ATTR_INT_MAX, + NL80211_FILS_DISCOVERY_ATTR_TMPL, + + /* keep last */ + __NL80211_FILS_DISCOVERY_ATTR_LAST, + NL80211_FILS_DISCOVERY_ATTR_MAX = __NL80211_FILS_DISCOVERY_ATTR_LAST - 1 +}; + +/* + * FILS discovery template minimum length with action frame headers and + * mandatory fields. + */ +#define NL80211_FILS_DISCOVERY_TMPL_MIN_LEN 42 + +/** + * enum nl80211_unsol_bcast_probe_resp_attributes - Unsolicited broadcast probe + * response configuration. Applicable only in 6GHz. + * + * @__NL80211_UNSOL_BCAST_PROBE_RESP_ATTR_INVALID: Invalid + * + * @NL80211_UNSOL_BCAST_PROBE_RESP_ATTR_INT: Maximum packet interval (u32, TU). + * Allowed range: 0..20 (TU = Time Unit). IEEE P802.11ax/D6.0 + * 26.17.2.3.2 (AP behavior for fast passive scanning). + * @NL80211_UNSOL_BCAST_PROBE_RESP_ATTR_TMPL: Unsolicited broadcast probe response + * frame template (binary). + * + * @__NL80211_UNSOL_BCAST_PROBE_RESP_ATTR_LAST: Internal + * @NL80211_UNSOL_BCAST_PROBE_RESP_ATTR_MAX: highest attribute + */ +enum nl80211_unsol_bcast_probe_resp_attributes { + __NL80211_UNSOL_BCAST_PROBE_RESP_ATTR_INVALID, + + NL80211_UNSOL_BCAST_PROBE_RESP_ATTR_INT, + NL80211_UNSOL_BCAST_PROBE_RESP_ATTR_TMPL, + + /* keep last */ + __NL80211_UNSOL_BCAST_PROBE_RESP_ATTR_LAST, + NL80211_UNSOL_BCAST_PROBE_RESP_ATTR_MAX = + __NL80211_UNSOL_BCAST_PROBE_RESP_ATTR_LAST - 1 +}; + +/** + * enum nl80211_sae_pwe_mechanism - The mechanism(s) allowed for SAE PWE + * derivation. Applicable only when WPA3-Personal SAE authentication is + * used. + * + * @NL80211_SAE_PWE_UNSPECIFIED: not specified, used internally to indicate that + * attribute is not present from userspace. + * @NL80211_SAE_PWE_HUNT_AND_PECK: hunting-and-pecking loop only + * @NL80211_SAE_PWE_HASH_TO_ELEMENT: hash-to-element only + * @NL80211_SAE_PWE_BOTH: both hunting-and-pecking loop and hash-to-element + * can be used. + */ +enum nl80211_sae_pwe_mechanism { + NL80211_SAE_PWE_UNSPECIFIED, + NL80211_SAE_PWE_HUNT_AND_PECK, + NL80211_SAE_PWE_HASH_TO_ELEMENT, + NL80211_SAE_PWE_BOTH, +}; + +/** + * enum nl80211_sar_type - type of SAR specs + * + * @NL80211_SAR_TYPE_POWER: power limitation specified in 0.25dBm unit + * + */ +enum nl80211_sar_type { + NL80211_SAR_TYPE_POWER, + + /* add new type here */ + + /* Keep last */ + NUM_NL80211_SAR_TYPE, +}; + +/** + * enum nl80211_sar_attrs - Attributes for SAR spec + * + * @NL80211_SAR_ATTR_TYPE: the SAR type as defined in &enum nl80211_sar_type. + * + * @NL80211_SAR_ATTR_SPECS: Nested array of SAR power + * limit specifications. Each specification contains a set + * of %nl80211_sar_specs_attrs. + * + * For SET operation, it contains array of %NL80211_SAR_ATTR_SPECS_POWER + * and %NL80211_SAR_ATTR_SPECS_RANGE_INDEX. + * + * For sar_capa dump, it contains array of + * %NL80211_SAR_ATTR_SPECS_START_FREQ + * and %NL80211_SAR_ATTR_SPECS_END_FREQ. + * + * @__NL80211_SAR_ATTR_LAST: Internal + * @NL80211_SAR_ATTR_MAX: highest sar attribute + * + * These attributes are used with %NL80211_CMD_SET_SAR_SPEC + */ +enum nl80211_sar_attrs { + __NL80211_SAR_ATTR_INVALID, + + NL80211_SAR_ATTR_TYPE, + NL80211_SAR_ATTR_SPECS, + + __NL80211_SAR_ATTR_LAST, + NL80211_SAR_ATTR_MAX = __NL80211_SAR_ATTR_LAST - 1, +}; + +/** + * enum nl80211_sar_specs_attrs - Attributes for SAR power limit specs + * + * @NL80211_SAR_ATTR_SPECS_POWER: Required (s32)value to specify the actual + * power limit value in units of 0.25 dBm if type is + * NL80211_SAR_TYPE_POWER. (i.e., a value of 44 represents 11 dBm). + * 0 means userspace doesn't have SAR limitation on this associated range. + * + * @NL80211_SAR_ATTR_SPECS_RANGE_INDEX: Required (u32) value to specify the + * index of exported freq range table and the associated power limitation + * is applied to this range. + * + * Userspace isn't required to set all the ranges advertised by WLAN driver, + * and userspace can skip some certain ranges. These skipped ranges don't + * have SAR limitations, and they are same as setting the + * %NL80211_SAR_ATTR_SPECS_POWER to any unreasonable high value because any + * value higher than regulatory allowed value just means SAR power + * limitation is removed, but it's required to set at least one range. + * It's not allowed to set duplicated range in one SET operation. + * + * Every SET operation overwrites previous SET operation. + * + * @NL80211_SAR_ATTR_SPECS_START_FREQ: Required (u32) value to specify the start + * frequency of this range edge when registering SAR capability to wiphy. + * It's not a channel center frequency. The unit is kHz. + * + * @NL80211_SAR_ATTR_SPECS_END_FREQ: Required (u32) value to specify the end + * frequency of this range edge when registering SAR capability to wiphy. + * It's not a channel center frequency. The unit is kHz. + * + * @__NL80211_SAR_ATTR_SPECS_LAST: Internal + * @NL80211_SAR_ATTR_SPECS_MAX: highest sar specs attribute + */ +enum nl80211_sar_specs_attrs { + __NL80211_SAR_ATTR_SPECS_INVALID, + + NL80211_SAR_ATTR_SPECS_POWER, + NL80211_SAR_ATTR_SPECS_RANGE_INDEX, + NL80211_SAR_ATTR_SPECS_START_FREQ, + NL80211_SAR_ATTR_SPECS_END_FREQ, + + __NL80211_SAR_ATTR_SPECS_LAST, + NL80211_SAR_ATTR_SPECS_MAX = __NL80211_SAR_ATTR_SPECS_LAST - 1, +}; + +/** + * enum nl80211_mbssid_config_attributes - multiple BSSID (MBSSID) and enhanced + * multi-BSSID advertisements (EMA) in AP mode. + * Kernel uses some of these attributes to advertise driver's support for + * MBSSID and EMA. + * Remaining attributes should be used by the userspace to configure the + * features. + * + * @__NL80211_MBSSID_CONFIG_ATTR_INVALID: Invalid + * + * @NL80211_MBSSID_CONFIG_ATTR_MAX_INTERFACES: Used by the kernel to advertise + * the maximum number of MBSSID interfaces supported by the driver. + * Driver should indicate MBSSID support by setting + * wiphy->mbssid_max_interfaces to a value more than or equal to 2. + * + * @NL80211_MBSSID_CONFIG_ATTR_MAX_EMA_PROFILE_PERIODICITY: Used by the kernel + * to advertise the maximum profile periodicity supported by the driver + * if EMA is enabled. Driver should indicate EMA support to the userspace + * by setting wiphy->ema_max_profile_periodicity to + * a non-zero value. + * + * @NL80211_MBSSID_CONFIG_ATTR_INDEX: Mandatory parameter to pass the index of + * this BSS (u8) in the multiple BSSID set. + * Value must be set to 0 for the transmitting interface and non-zero for + * all non-transmitting interfaces. The userspace will be responsible + * for using unique indices for the interfaces. + * Range: 0 to wiphy->mbssid_max_interfaces-1. + * + * @NL80211_MBSSID_CONFIG_ATTR_TX_IFINDEX: Mandatory parameter for + * a non-transmitted profile which provides the interface index (u32) of + * the transmitted profile. The value must match one of the interface + * indices advertised by the kernel. Optional if the interface being set up + * is the transmitting one, however, if provided then the value must match + * the interface index of the same. + * + * @NL80211_MBSSID_CONFIG_ATTR_EMA: Flag used to enable EMA AP feature. + * Setting this flag is permitted only if the driver advertises EMA support + * by setting wiphy->ema_max_profile_periodicity to non-zero. + * + * @__NL80211_MBSSID_CONFIG_ATTR_LAST: Internal + * @NL80211_MBSSID_CONFIG_ATTR_MAX: highest attribute + */ +enum nl80211_mbssid_config_attributes { + __NL80211_MBSSID_CONFIG_ATTR_INVALID, + + NL80211_MBSSID_CONFIG_ATTR_MAX_INTERFACES, + NL80211_MBSSID_CONFIG_ATTR_MAX_EMA_PROFILE_PERIODICITY, + NL80211_MBSSID_CONFIG_ATTR_INDEX, + NL80211_MBSSID_CONFIG_ATTR_TX_IFINDEX, + NL80211_MBSSID_CONFIG_ATTR_EMA, + + /* keep last */ + __NL80211_MBSSID_CONFIG_ATTR_LAST, + NL80211_MBSSID_CONFIG_ATTR_MAX = __NL80211_MBSSID_CONFIG_ATTR_LAST - 1, +}; + +/** + * enum nl80211_ap_settings_flags - AP settings flags + * + * @NL80211_AP_SETTINGS_EXTERNAL_AUTH_SUPPORT: AP supports external + * authentication. + * @NL80211_AP_SETTINGS_SA_QUERY_OFFLOAD_SUPPORT: Userspace supports SA Query + * procedures offload to driver. If driver advertises + * %NL80211_AP_SME_SA_QUERY_OFFLOAD in AP SME features, userspace shall + * ignore SA Query procedures and validations when this flag is set by + * userspace. + */ +enum nl80211_ap_settings_flags { + NL80211_AP_SETTINGS_EXTERNAL_AUTH_SUPPORT = 1 << 0, + NL80211_AP_SETTINGS_SA_QUERY_OFFLOAD_SUPPORT = 1 << 1, +}; + +#endif /* __LINUX_NL80211_H */ diff --git a/src/basic/linux/pkt_sched.h b/src/basic/linux/pkt_sched.h new file mode 100644 index 0000000..000eec1 --- /dev/null +++ b/src/basic/linux/pkt_sched.h @@ -0,0 +1,1281 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +#ifndef __LINUX_PKT_SCHED_H +#define __LINUX_PKT_SCHED_H + +#include +#include + +/* Logical priority bands not depending on specific packet scheduler. + Every scheduler will map them to real traffic classes, if it has + no more precise mechanism to classify packets. + + These numbers have no special meaning, though their coincidence + with obsolete IPv6 values is not occasional :-). New IPv6 drafts + preferred full anarchy inspired by diffserv group. + + Note: TC_PRIO_BESTEFFORT does not mean that it is the most unhappy + class, actually, as rule it will be handled with more care than + filler or even bulk. + */ + +#define TC_PRIO_BESTEFFORT 0 +#define TC_PRIO_FILLER 1 +#define TC_PRIO_BULK 2 +#define TC_PRIO_INTERACTIVE_BULK 4 +#define TC_PRIO_INTERACTIVE 6 +#define TC_PRIO_CONTROL 7 + +#define TC_PRIO_MAX 15 + +/* Generic queue statistics, available for all the elements. + Particular schedulers may have also their private records. + */ + +struct tc_stats { + __u64 bytes; /* Number of enqueued bytes */ + __u32 packets; /* Number of enqueued packets */ + __u32 drops; /* Packets dropped because of lack of resources */ + __u32 overlimits; /* Number of throttle events when this + * flow goes out of allocated bandwidth */ + __u32 bps; /* Current flow byte rate */ + __u32 pps; /* Current flow packet rate */ + __u32 qlen; + __u32 backlog; +}; + +struct tc_estimator { + signed char interval; + unsigned char ewma_log; +}; + +/* "Handles" + --------- + + All the traffic control objects have 32bit identifiers, or "handles". + + They can be considered as opaque numbers from user API viewpoint, + but actually they always consist of two fields: major and + minor numbers, which are interpreted by kernel specially, + that may be used by applications, though not recommended. + + F.e. qdisc handles always have minor number equal to zero, + classes (or flows) have major equal to parent qdisc major, and + minor uniquely identifying class inside qdisc. + + Macros to manipulate handles: + */ + +#define TC_H_MAJ_MASK (0xFFFF0000U) +#define TC_H_MIN_MASK (0x0000FFFFU) +#define TC_H_MAJ(h) ((h)&TC_H_MAJ_MASK) +#define TC_H_MIN(h) ((h)&TC_H_MIN_MASK) +#define TC_H_MAKE(maj,min) (((maj)&TC_H_MAJ_MASK)|((min)&TC_H_MIN_MASK)) + +#define TC_H_UNSPEC (0U) +#define TC_H_ROOT (0xFFFFFFFFU) +#define TC_H_INGRESS (0xFFFFFFF1U) +#define TC_H_CLSACT TC_H_INGRESS + +#define TC_H_MIN_PRIORITY 0xFFE0U +#define TC_H_MIN_INGRESS 0xFFF2U +#define TC_H_MIN_EGRESS 0xFFF3U + +/* Need to corrospond to iproute2 tc/tc_core.h "enum link_layer" */ +enum tc_link_layer { + TC_LINKLAYER_UNAWARE, /* Indicate unaware old iproute2 util */ + TC_LINKLAYER_ETHERNET, + TC_LINKLAYER_ATM, +}; +#define TC_LINKLAYER_MASK 0x0F /* limit use to lower 4 bits */ + +struct tc_ratespec { + unsigned char cell_log; + __u8 linklayer; /* lower 4 bits */ + unsigned short overhead; + short cell_align; + unsigned short mpu; + __u32 rate; +}; + +#define TC_RTAB_SIZE 1024 + +struct tc_sizespec { + unsigned char cell_log; + unsigned char size_log; + short cell_align; + int overhead; + unsigned int linklayer; + unsigned int mpu; + unsigned int mtu; + unsigned int tsize; +}; + +enum { + TCA_STAB_UNSPEC, + TCA_STAB_BASE, + TCA_STAB_DATA, + __TCA_STAB_MAX +}; + +#define TCA_STAB_MAX (__TCA_STAB_MAX - 1) + +/* FIFO section */ + +struct tc_fifo_qopt { + __u32 limit; /* Queue length: bytes for bfifo, packets for pfifo */ +}; + +/* SKBPRIO section */ + +/* + * Priorities go from zero to (SKBPRIO_MAX_PRIORITY - 1). + * SKBPRIO_MAX_PRIORITY should be at least 64 in order for skbprio to be able + * to map one to one the DS field of IPV4 and IPV6 headers. + * Memory allocation grows linearly with SKBPRIO_MAX_PRIORITY. + */ + +#define SKBPRIO_MAX_PRIORITY 64 + +struct tc_skbprio_qopt { + __u32 limit; /* Queue length in packets. */ +}; + +/* PRIO section */ + +#define TCQ_PRIO_BANDS 16 +#define TCQ_MIN_PRIO_BANDS 2 + +struct tc_prio_qopt { + int bands; /* Number of bands */ + __u8 priomap[TC_PRIO_MAX+1]; /* Map: logical priority -> PRIO band */ +}; + +/* MULTIQ section */ + +struct tc_multiq_qopt { + __u16 bands; /* Number of bands */ + __u16 max_bands; /* Maximum number of queues */ +}; + +/* PLUG section */ + +#define TCQ_PLUG_BUFFER 0 +#define TCQ_PLUG_RELEASE_ONE 1 +#define TCQ_PLUG_RELEASE_INDEFINITE 2 +#define TCQ_PLUG_LIMIT 3 + +struct tc_plug_qopt { + /* TCQ_PLUG_BUFFER: Inset a plug into the queue and + * buffer any incoming packets + * TCQ_PLUG_RELEASE_ONE: Dequeue packets from queue head + * to beginning of the next plug. + * TCQ_PLUG_RELEASE_INDEFINITE: Dequeue all packets from queue. + * Stop buffering packets until the next TCQ_PLUG_BUFFER + * command is received (just act as a pass-thru queue). + * TCQ_PLUG_LIMIT: Increase/decrease queue size + */ + int action; + __u32 limit; +}; + +/* TBF section */ + +struct tc_tbf_qopt { + struct tc_ratespec rate; + struct tc_ratespec peakrate; + __u32 limit; + __u32 buffer; + __u32 mtu; +}; + +enum { + TCA_TBF_UNSPEC, + TCA_TBF_PARMS, + TCA_TBF_RTAB, + TCA_TBF_PTAB, + TCA_TBF_RATE64, + TCA_TBF_PRATE64, + TCA_TBF_BURST, + TCA_TBF_PBURST, + TCA_TBF_PAD, + __TCA_TBF_MAX, +}; + +#define TCA_TBF_MAX (__TCA_TBF_MAX - 1) + + +/* TEQL section */ + +/* TEQL does not require any parameters */ + +/* SFQ section */ + +struct tc_sfq_qopt { + unsigned quantum; /* Bytes per round allocated to flow */ + int perturb_period; /* Period of hash perturbation */ + __u32 limit; /* Maximal packets in queue */ + unsigned divisor; /* Hash divisor */ + unsigned flows; /* Maximal number of flows */ +}; + +struct tc_sfqred_stats { + __u32 prob_drop; /* Early drops, below max threshold */ + __u32 forced_drop; /* Early drops, after max threshold */ + __u32 prob_mark; /* Marked packets, below max threshold */ + __u32 forced_mark; /* Marked packets, after max threshold */ + __u32 prob_mark_head; /* Marked packets, below max threshold */ + __u32 forced_mark_head;/* Marked packets, after max threshold */ +}; + +struct tc_sfq_qopt_v1 { + struct tc_sfq_qopt v0; + unsigned int depth; /* max number of packets per flow */ + unsigned int headdrop; +/* SFQRED parameters */ + __u32 limit; /* HARD maximal flow queue length (bytes) */ + __u32 qth_min; /* Min average length threshold (bytes) */ + __u32 qth_max; /* Max average length threshold (bytes) */ + unsigned char Wlog; /* log(W) */ + unsigned char Plog; /* log(P_max/(qth_max-qth_min)) */ + unsigned char Scell_log; /* cell size for idle damping */ + unsigned char flags; + __u32 max_P; /* probability, high resolution */ +/* SFQRED stats */ + struct tc_sfqred_stats stats; +}; + + +struct tc_sfq_xstats { + __s32 allot; +}; + +/* RED section */ + +enum { + TCA_RED_UNSPEC, + TCA_RED_PARMS, + TCA_RED_STAB, + TCA_RED_MAX_P, + TCA_RED_FLAGS, /* bitfield32 */ + TCA_RED_EARLY_DROP_BLOCK, /* u32 */ + TCA_RED_MARK_BLOCK, /* u32 */ + __TCA_RED_MAX, +}; + +#define TCA_RED_MAX (__TCA_RED_MAX - 1) + +struct tc_red_qopt { + __u32 limit; /* HARD maximal queue length (bytes) */ + __u32 qth_min; /* Min average length threshold (bytes) */ + __u32 qth_max; /* Max average length threshold (bytes) */ + unsigned char Wlog; /* log(W) */ + unsigned char Plog; /* log(P_max/(qth_max-qth_min)) */ + unsigned char Scell_log; /* cell size for idle damping */ + + /* This field can be used for flags that a RED-like qdisc has + * historically supported. E.g. when configuring RED, it can be used for + * ECN, HARDDROP and ADAPTATIVE. For SFQ it can be used for ECN, + * HARDDROP. Etc. Because this field has not been validated, and is + * copied back on dump, any bits besides those to which a given qdisc + * has assigned a historical meaning need to be considered for free use + * by userspace tools. + * + * Any further flags need to be passed differently, e.g. through an + * attribute (such as TCA_RED_FLAGS above). Such attribute should allow + * passing both recent and historic flags in one value. + */ + unsigned char flags; +#define TC_RED_ECN 1 +#define TC_RED_HARDDROP 2 +#define TC_RED_ADAPTATIVE 4 +#define TC_RED_NODROP 8 +}; + +#define TC_RED_HISTORIC_FLAGS (TC_RED_ECN | TC_RED_HARDDROP | TC_RED_ADAPTATIVE) + +struct tc_red_xstats { + __u32 early; /* Early drops */ + __u32 pdrop; /* Drops due to queue limits */ + __u32 other; /* Drops due to drop() calls */ + __u32 marked; /* Marked packets */ +}; + +/* GRED section */ + +#define MAX_DPs 16 + +enum { + TCA_GRED_UNSPEC, + TCA_GRED_PARMS, + TCA_GRED_STAB, + TCA_GRED_DPS, + TCA_GRED_MAX_P, + TCA_GRED_LIMIT, + TCA_GRED_VQ_LIST, /* nested TCA_GRED_VQ_ENTRY */ + __TCA_GRED_MAX, +}; + +#define TCA_GRED_MAX (__TCA_GRED_MAX - 1) + +enum { + TCA_GRED_VQ_ENTRY_UNSPEC, + TCA_GRED_VQ_ENTRY, /* nested TCA_GRED_VQ_* */ + __TCA_GRED_VQ_ENTRY_MAX, +}; +#define TCA_GRED_VQ_ENTRY_MAX (__TCA_GRED_VQ_ENTRY_MAX - 1) + +enum { + TCA_GRED_VQ_UNSPEC, + TCA_GRED_VQ_PAD, + TCA_GRED_VQ_DP, /* u32 */ + TCA_GRED_VQ_STAT_BYTES, /* u64 */ + TCA_GRED_VQ_STAT_PACKETS, /* u32 */ + TCA_GRED_VQ_STAT_BACKLOG, /* u32 */ + TCA_GRED_VQ_STAT_PROB_DROP, /* u32 */ + TCA_GRED_VQ_STAT_PROB_MARK, /* u32 */ + TCA_GRED_VQ_STAT_FORCED_DROP, /* u32 */ + TCA_GRED_VQ_STAT_FORCED_MARK, /* u32 */ + TCA_GRED_VQ_STAT_PDROP, /* u32 */ + TCA_GRED_VQ_STAT_OTHER, /* u32 */ + TCA_GRED_VQ_FLAGS, /* u32 */ + __TCA_GRED_VQ_MAX +}; + +#define TCA_GRED_VQ_MAX (__TCA_GRED_VQ_MAX - 1) + +struct tc_gred_qopt { + __u32 limit; /* HARD maximal queue length (bytes) */ + __u32 qth_min; /* Min average length threshold (bytes) */ + __u32 qth_max; /* Max average length threshold (bytes) */ + __u32 DP; /* up to 2^32 DPs */ + __u32 backlog; + __u32 qave; + __u32 forced; + __u32 early; + __u32 other; + __u32 pdrop; + __u8 Wlog; /* log(W) */ + __u8 Plog; /* log(P_max/(qth_max-qth_min)) */ + __u8 Scell_log; /* cell size for idle damping */ + __u8 prio; /* prio of this VQ */ + __u32 packets; + __u32 bytesin; +}; + +/* gred setup */ +struct tc_gred_sopt { + __u32 DPs; + __u32 def_DP; + __u8 grio; + __u8 flags; + __u16 pad1; +}; + +/* CHOKe section */ + +enum { + TCA_CHOKE_UNSPEC, + TCA_CHOKE_PARMS, + TCA_CHOKE_STAB, + TCA_CHOKE_MAX_P, + __TCA_CHOKE_MAX, +}; + +#define TCA_CHOKE_MAX (__TCA_CHOKE_MAX - 1) + +struct tc_choke_qopt { + __u32 limit; /* Hard queue length (packets) */ + __u32 qth_min; /* Min average threshold (packets) */ + __u32 qth_max; /* Max average threshold (packets) */ + unsigned char Wlog; /* log(W) */ + unsigned char Plog; /* log(P_max/(qth_max-qth_min)) */ + unsigned char Scell_log; /* cell size for idle damping */ + unsigned char flags; /* see RED flags */ +}; + +struct tc_choke_xstats { + __u32 early; /* Early drops */ + __u32 pdrop; /* Drops due to queue limits */ + __u32 other; /* Drops due to drop() calls */ + __u32 marked; /* Marked packets */ + __u32 matched; /* Drops due to flow match */ +}; + +/* HTB section */ +#define TC_HTB_NUMPRIO 8 +#define TC_HTB_MAXDEPTH 8 +#define TC_HTB_PROTOVER 3 /* the same as HTB and TC's major */ + +struct tc_htb_opt { + struct tc_ratespec rate; + struct tc_ratespec ceil; + __u32 buffer; + __u32 cbuffer; + __u32 quantum; + __u32 level; /* out only */ + __u32 prio; +}; +struct tc_htb_glob { + __u32 version; /* to match HTB/TC */ + __u32 rate2quantum; /* bps->quantum divisor */ + __u32 defcls; /* default class number */ + __u32 debug; /* debug flags */ + + /* stats */ + __u32 direct_pkts; /* count of non shaped packets */ +}; +enum { + TCA_HTB_UNSPEC, + TCA_HTB_PARMS, + TCA_HTB_INIT, + TCA_HTB_CTAB, + TCA_HTB_RTAB, + TCA_HTB_DIRECT_QLEN, + TCA_HTB_RATE64, + TCA_HTB_CEIL64, + TCA_HTB_PAD, + TCA_HTB_OFFLOAD, + __TCA_HTB_MAX, +}; + +#define TCA_HTB_MAX (__TCA_HTB_MAX - 1) + +struct tc_htb_xstats { + __u32 lends; + __u32 borrows; + __u32 giants; /* unused since 'Make HTB scheduler work with TSO.' */ + __s32 tokens; + __s32 ctokens; +}; + +/* HFSC section */ + +struct tc_hfsc_qopt { + __u16 defcls; /* default class */ +}; + +struct tc_service_curve { + __u32 m1; /* slope of the first segment in bps */ + __u32 d; /* x-projection of the first segment in us */ + __u32 m2; /* slope of the second segment in bps */ +}; + +struct tc_hfsc_stats { + __u64 work; /* total work done */ + __u64 rtwork; /* work done by real-time criteria */ + __u32 period; /* current period */ + __u32 level; /* class level in hierarchy */ +}; + +enum { + TCA_HFSC_UNSPEC, + TCA_HFSC_RSC, + TCA_HFSC_FSC, + TCA_HFSC_USC, + __TCA_HFSC_MAX, +}; + +#define TCA_HFSC_MAX (__TCA_HFSC_MAX - 1) + + +/* CBQ section */ + +#define TC_CBQ_MAXPRIO 8 +#define TC_CBQ_MAXLEVEL 8 +#define TC_CBQ_DEF_EWMA 5 + +struct tc_cbq_lssopt { + unsigned char change; + unsigned char flags; +#define TCF_CBQ_LSS_BOUNDED 1 +#define TCF_CBQ_LSS_ISOLATED 2 + unsigned char ewma_log; + unsigned char level; +#define TCF_CBQ_LSS_FLAGS 1 +#define TCF_CBQ_LSS_EWMA 2 +#define TCF_CBQ_LSS_MAXIDLE 4 +#define TCF_CBQ_LSS_MINIDLE 8 +#define TCF_CBQ_LSS_OFFTIME 0x10 +#define TCF_CBQ_LSS_AVPKT 0x20 + __u32 maxidle; + __u32 minidle; + __u32 offtime; + __u32 avpkt; +}; + +struct tc_cbq_wrropt { + unsigned char flags; + unsigned char priority; + unsigned char cpriority; + unsigned char __reserved; + __u32 allot; + __u32 weight; +}; + +struct tc_cbq_ovl { + unsigned char strategy; +#define TC_CBQ_OVL_CLASSIC 0 +#define TC_CBQ_OVL_DELAY 1 +#define TC_CBQ_OVL_LOWPRIO 2 +#define TC_CBQ_OVL_DROP 3 +#define TC_CBQ_OVL_RCLASSIC 4 + unsigned char priority2; + __u16 pad; + __u32 penalty; +}; + +struct tc_cbq_police { + unsigned char police; + unsigned char __res1; + unsigned short __res2; +}; + +struct tc_cbq_fopt { + __u32 split; + __u32 defmap; + __u32 defchange; +}; + +struct tc_cbq_xstats { + __u32 borrows; + __u32 overactions; + __s32 avgidle; + __s32 undertime; +}; + +enum { + TCA_CBQ_UNSPEC, + TCA_CBQ_LSSOPT, + TCA_CBQ_WRROPT, + TCA_CBQ_FOPT, + TCA_CBQ_OVL_STRATEGY, + TCA_CBQ_RATE, + TCA_CBQ_RTAB, + TCA_CBQ_POLICE, + __TCA_CBQ_MAX, +}; + +#define TCA_CBQ_MAX (__TCA_CBQ_MAX - 1) + +/* dsmark section */ + +enum { + TCA_DSMARK_UNSPEC, + TCA_DSMARK_INDICES, + TCA_DSMARK_DEFAULT_INDEX, + TCA_DSMARK_SET_TC_INDEX, + TCA_DSMARK_MASK, + TCA_DSMARK_VALUE, + __TCA_DSMARK_MAX, +}; + +#define TCA_DSMARK_MAX (__TCA_DSMARK_MAX - 1) + +/* ATM section */ + +enum { + TCA_ATM_UNSPEC, + TCA_ATM_FD, /* file/socket descriptor */ + TCA_ATM_PTR, /* pointer to descriptor - later */ + TCA_ATM_HDR, /* LL header */ + TCA_ATM_EXCESS, /* excess traffic class (0 for CLP) */ + TCA_ATM_ADDR, /* PVC address (for output only) */ + TCA_ATM_STATE, /* VC state (ATM_VS_*; for output only) */ + __TCA_ATM_MAX, +}; + +#define TCA_ATM_MAX (__TCA_ATM_MAX - 1) + +/* Network emulator */ + +enum { + TCA_NETEM_UNSPEC, + TCA_NETEM_CORR, + TCA_NETEM_DELAY_DIST, + TCA_NETEM_REORDER, + TCA_NETEM_CORRUPT, + TCA_NETEM_LOSS, + TCA_NETEM_RATE, + TCA_NETEM_ECN, + TCA_NETEM_RATE64, + TCA_NETEM_PAD, + TCA_NETEM_LATENCY64, + TCA_NETEM_JITTER64, + TCA_NETEM_SLOT, + TCA_NETEM_SLOT_DIST, + __TCA_NETEM_MAX, +}; + +#define TCA_NETEM_MAX (__TCA_NETEM_MAX - 1) + +struct tc_netem_qopt { + __u32 latency; /* added delay (us) */ + __u32 limit; /* fifo limit (packets) */ + __u32 loss; /* random packet loss (0=none ~0=100%) */ + __u32 gap; /* re-ordering gap (0 for none) */ + __u32 duplicate; /* random packet dup (0=none ~0=100%) */ + __u32 jitter; /* random jitter in latency (us) */ +}; + +struct tc_netem_corr { + __u32 delay_corr; /* delay correlation */ + __u32 loss_corr; /* packet loss correlation */ + __u32 dup_corr; /* duplicate correlation */ +}; + +struct tc_netem_reorder { + __u32 probability; + __u32 correlation; +}; + +struct tc_netem_corrupt { + __u32 probability; + __u32 correlation; +}; + +struct tc_netem_rate { + __u32 rate; /* byte/s */ + __s32 packet_overhead; + __u32 cell_size; + __s32 cell_overhead; +}; + +struct tc_netem_slot { + __s64 min_delay; /* nsec */ + __s64 max_delay; + __s32 max_packets; + __s32 max_bytes; + __s64 dist_delay; /* nsec */ + __s64 dist_jitter; /* nsec */ +}; + +enum { + NETEM_LOSS_UNSPEC, + NETEM_LOSS_GI, /* General Intuitive - 4 state model */ + NETEM_LOSS_GE, /* Gilbert Elliot models */ + __NETEM_LOSS_MAX +}; +#define NETEM_LOSS_MAX (__NETEM_LOSS_MAX - 1) + +/* State transition probabilities for 4 state model */ +struct tc_netem_gimodel { + __u32 p13; + __u32 p31; + __u32 p32; + __u32 p14; + __u32 p23; +}; + +/* Gilbert-Elliot models */ +struct tc_netem_gemodel { + __u32 p; + __u32 r; + __u32 h; + __u32 k1; +}; + +#define NETEM_DIST_SCALE 8192 +#define NETEM_DIST_MAX 16384 + +/* DRR */ + +enum { + TCA_DRR_UNSPEC, + TCA_DRR_QUANTUM, + __TCA_DRR_MAX +}; + +#define TCA_DRR_MAX (__TCA_DRR_MAX - 1) + +struct tc_drr_stats { + __u32 deficit; +}; + +/* MQPRIO */ +#define TC_QOPT_BITMASK 15 +#define TC_QOPT_MAX_QUEUE 16 + +enum { + TC_MQPRIO_HW_OFFLOAD_NONE, /* no offload requested */ + TC_MQPRIO_HW_OFFLOAD_TCS, /* offload TCs, no queue counts */ + __TC_MQPRIO_HW_OFFLOAD_MAX +}; + +#define TC_MQPRIO_HW_OFFLOAD_MAX (__TC_MQPRIO_HW_OFFLOAD_MAX - 1) + +enum { + TC_MQPRIO_MODE_DCB, + TC_MQPRIO_MODE_CHANNEL, + __TC_MQPRIO_MODE_MAX +}; + +#define __TC_MQPRIO_MODE_MAX (__TC_MQPRIO_MODE_MAX - 1) + +enum { + TC_MQPRIO_SHAPER_DCB, + TC_MQPRIO_SHAPER_BW_RATE, /* Add new shapers below */ + __TC_MQPRIO_SHAPER_MAX +}; + +#define __TC_MQPRIO_SHAPER_MAX (__TC_MQPRIO_SHAPER_MAX - 1) + +struct tc_mqprio_qopt { + __u8 num_tc; + __u8 prio_tc_map[TC_QOPT_BITMASK + 1]; + __u8 hw; + __u16 count[TC_QOPT_MAX_QUEUE]; + __u16 offset[TC_QOPT_MAX_QUEUE]; +}; + +#define TC_MQPRIO_F_MODE 0x1 +#define TC_MQPRIO_F_SHAPER 0x2 +#define TC_MQPRIO_F_MIN_RATE 0x4 +#define TC_MQPRIO_F_MAX_RATE 0x8 + +enum { + TCA_MQPRIO_UNSPEC, + TCA_MQPRIO_MODE, + TCA_MQPRIO_SHAPER, + TCA_MQPRIO_MIN_RATE64, + TCA_MQPRIO_MAX_RATE64, + __TCA_MQPRIO_MAX, +}; + +#define TCA_MQPRIO_MAX (__TCA_MQPRIO_MAX - 1) + +/* SFB */ + +enum { + TCA_SFB_UNSPEC, + TCA_SFB_PARMS, + __TCA_SFB_MAX, +}; + +#define TCA_SFB_MAX (__TCA_SFB_MAX - 1) + +/* + * Note: increment, decrement are Q0.16 fixed-point values. + */ +struct tc_sfb_qopt { + __u32 rehash_interval; /* delay between hash move, in ms */ + __u32 warmup_time; /* double buffering warmup time in ms (warmup_time < rehash_interval) */ + __u32 max; /* max len of qlen_min */ + __u32 bin_size; /* maximum queue length per bin */ + __u32 increment; /* probability increment, (d1 in Blue) */ + __u32 decrement; /* probability decrement, (d2 in Blue) */ + __u32 limit; /* max SFB queue length */ + __u32 penalty_rate; /* inelastic flows are rate limited to 'rate' pps */ + __u32 penalty_burst; +}; + +struct tc_sfb_xstats { + __u32 earlydrop; + __u32 penaltydrop; + __u32 bucketdrop; + __u32 queuedrop; + __u32 childdrop; /* drops in child qdisc */ + __u32 marked; + __u32 maxqlen; + __u32 maxprob; + __u32 avgprob; +}; + +#define SFB_MAX_PROB 0xFFFF + +/* QFQ */ +enum { + TCA_QFQ_UNSPEC, + TCA_QFQ_WEIGHT, + TCA_QFQ_LMAX, + __TCA_QFQ_MAX +}; + +#define TCA_QFQ_MAX (__TCA_QFQ_MAX - 1) + +struct tc_qfq_stats { + __u32 weight; + __u32 lmax; +}; + +/* CODEL */ + +enum { + TCA_CODEL_UNSPEC, + TCA_CODEL_TARGET, + TCA_CODEL_LIMIT, + TCA_CODEL_INTERVAL, + TCA_CODEL_ECN, + TCA_CODEL_CE_THRESHOLD, + __TCA_CODEL_MAX +}; + +#define TCA_CODEL_MAX (__TCA_CODEL_MAX - 1) + +struct tc_codel_xstats { + __u32 maxpacket; /* largest packet we've seen so far */ + __u32 count; /* how many drops we've done since the last time we + * entered dropping state + */ + __u32 lastcount; /* count at entry to dropping state */ + __u32 ldelay; /* in-queue delay seen by most recently dequeued packet */ + __s32 drop_next; /* time to drop next packet */ + __u32 drop_overlimit; /* number of time max qdisc packet limit was hit */ + __u32 ecn_mark; /* number of packets we ECN marked instead of dropped */ + __u32 dropping; /* are we in dropping state ? */ + __u32 ce_mark; /* number of CE marked packets because of ce_threshold */ +}; + +/* FQ_CODEL */ + +#define FQ_CODEL_QUANTUM_MAX (1 << 20) + +enum { + TCA_FQ_CODEL_UNSPEC, + TCA_FQ_CODEL_TARGET, + TCA_FQ_CODEL_LIMIT, + TCA_FQ_CODEL_INTERVAL, + TCA_FQ_CODEL_ECN, + TCA_FQ_CODEL_FLOWS, + TCA_FQ_CODEL_QUANTUM, + TCA_FQ_CODEL_CE_THRESHOLD, + TCA_FQ_CODEL_DROP_BATCH_SIZE, + TCA_FQ_CODEL_MEMORY_LIMIT, + TCA_FQ_CODEL_CE_THRESHOLD_SELECTOR, + TCA_FQ_CODEL_CE_THRESHOLD_MASK, + __TCA_FQ_CODEL_MAX +}; + +#define TCA_FQ_CODEL_MAX (__TCA_FQ_CODEL_MAX - 1) + +enum { + TCA_FQ_CODEL_XSTATS_QDISC, + TCA_FQ_CODEL_XSTATS_CLASS, +}; + +struct tc_fq_codel_qd_stats { + __u32 maxpacket; /* largest packet we've seen so far */ + __u32 drop_overlimit; /* number of time max qdisc + * packet limit was hit + */ + __u32 ecn_mark; /* number of packets we ECN marked + * instead of being dropped + */ + __u32 new_flow_count; /* number of time packets + * created a 'new flow' + */ + __u32 new_flows_len; /* count of flows in new list */ + __u32 old_flows_len; /* count of flows in old list */ + __u32 ce_mark; /* packets above ce_threshold */ + __u32 memory_usage; /* in bytes */ + __u32 drop_overmemory; +}; + +struct tc_fq_codel_cl_stats { + __s32 deficit; + __u32 ldelay; /* in-queue delay seen by most recently + * dequeued packet + */ + __u32 count; + __u32 lastcount; + __u32 dropping; + __s32 drop_next; +}; + +struct tc_fq_codel_xstats { + __u32 type; + union { + struct tc_fq_codel_qd_stats qdisc_stats; + struct tc_fq_codel_cl_stats class_stats; + }; +}; + +/* FQ */ + +enum { + TCA_FQ_UNSPEC, + + TCA_FQ_PLIMIT, /* limit of total number of packets in queue */ + + TCA_FQ_FLOW_PLIMIT, /* limit of packets per flow */ + + TCA_FQ_QUANTUM, /* RR quantum */ + + TCA_FQ_INITIAL_QUANTUM, /* RR quantum for new flow */ + + TCA_FQ_RATE_ENABLE, /* enable/disable rate limiting */ + + TCA_FQ_FLOW_DEFAULT_RATE,/* obsolete, do not use */ + + TCA_FQ_FLOW_MAX_RATE, /* per flow max rate */ + + TCA_FQ_BUCKETS_LOG, /* log2(number of buckets) */ + + TCA_FQ_FLOW_REFILL_DELAY, /* flow credit refill delay in usec */ + + TCA_FQ_ORPHAN_MASK, /* mask applied to orphaned skb hashes */ + + TCA_FQ_LOW_RATE_THRESHOLD, /* per packet delay under this rate */ + + TCA_FQ_CE_THRESHOLD, /* DCTCP-like CE-marking threshold */ + + TCA_FQ_TIMER_SLACK, /* timer slack */ + + TCA_FQ_HORIZON, /* time horizon in us */ + + TCA_FQ_HORIZON_DROP, /* drop packets beyond horizon, or cap their EDT */ + + __TCA_FQ_MAX +}; + +#define TCA_FQ_MAX (__TCA_FQ_MAX - 1) + +struct tc_fq_qd_stats { + __u64 gc_flows; + __u64 highprio_packets; + __u64 tcp_retrans; + __u64 throttled; + __u64 flows_plimit; + __u64 pkts_too_long; + __u64 allocation_errors; + __s64 time_next_delayed_flow; + __u32 flows; + __u32 inactive_flows; + __u32 throttled_flows; + __u32 unthrottle_latency_ns; + __u64 ce_mark; /* packets above ce_threshold */ + __u64 horizon_drops; + __u64 horizon_caps; +}; + +/* Heavy-Hitter Filter */ + +enum { + TCA_HHF_UNSPEC, + TCA_HHF_BACKLOG_LIMIT, + TCA_HHF_QUANTUM, + TCA_HHF_HH_FLOWS_LIMIT, + TCA_HHF_RESET_TIMEOUT, + TCA_HHF_ADMIT_BYTES, + TCA_HHF_EVICT_TIMEOUT, + TCA_HHF_NON_HH_WEIGHT, + __TCA_HHF_MAX +}; + +#define TCA_HHF_MAX (__TCA_HHF_MAX - 1) + +struct tc_hhf_xstats { + __u32 drop_overlimit; /* number of times max qdisc packet limit + * was hit + */ + __u32 hh_overlimit; /* number of times max heavy-hitters was hit */ + __u32 hh_tot_count; /* number of captured heavy-hitters so far */ + __u32 hh_cur_count; /* number of current heavy-hitters */ +}; + +/* PIE */ +enum { + TCA_PIE_UNSPEC, + TCA_PIE_TARGET, + TCA_PIE_LIMIT, + TCA_PIE_TUPDATE, + TCA_PIE_ALPHA, + TCA_PIE_BETA, + TCA_PIE_ECN, + TCA_PIE_BYTEMODE, + TCA_PIE_DQ_RATE_ESTIMATOR, + __TCA_PIE_MAX +}; +#define TCA_PIE_MAX (__TCA_PIE_MAX - 1) + +struct tc_pie_xstats { + __u64 prob; /* current probability */ + __u32 delay; /* current delay in ms */ + __u32 avg_dq_rate; /* current average dq_rate in + * bits/pie_time + */ + __u32 dq_rate_estimating; /* is avg_dq_rate being calculated? */ + __u32 packets_in; /* total number of packets enqueued */ + __u32 dropped; /* packets dropped due to pie_action */ + __u32 overlimit; /* dropped due to lack of space + * in queue + */ + __u32 maxq; /* maximum queue size */ + __u32 ecn_mark; /* packets marked with ecn*/ +}; + +/* FQ PIE */ +enum { + TCA_FQ_PIE_UNSPEC, + TCA_FQ_PIE_LIMIT, + TCA_FQ_PIE_FLOWS, + TCA_FQ_PIE_TARGET, + TCA_FQ_PIE_TUPDATE, + TCA_FQ_PIE_ALPHA, + TCA_FQ_PIE_BETA, + TCA_FQ_PIE_QUANTUM, + TCA_FQ_PIE_MEMORY_LIMIT, + TCA_FQ_PIE_ECN_PROB, + TCA_FQ_PIE_ECN, + TCA_FQ_PIE_BYTEMODE, + TCA_FQ_PIE_DQ_RATE_ESTIMATOR, + __TCA_FQ_PIE_MAX +}; +#define TCA_FQ_PIE_MAX (__TCA_FQ_PIE_MAX - 1) + +struct tc_fq_pie_xstats { + __u32 packets_in; /* total number of packets enqueued */ + __u32 dropped; /* packets dropped due to fq_pie_action */ + __u32 overlimit; /* dropped due to lack of space in queue */ + __u32 overmemory; /* dropped due to lack of memory in queue */ + __u32 ecn_mark; /* packets marked with ecn */ + __u32 new_flow_count; /* count of new flows created by packets */ + __u32 new_flows_len; /* count of flows in new list */ + __u32 old_flows_len; /* count of flows in old list */ + __u32 memory_usage; /* total memory across all queues */ +}; + +/* CBS */ +struct tc_cbs_qopt { + __u8 offload; + __u8 _pad[3]; + __s32 hicredit; + __s32 locredit; + __s32 idleslope; + __s32 sendslope; +}; + +enum { + TCA_CBS_UNSPEC, + TCA_CBS_PARMS, + __TCA_CBS_MAX, +}; + +#define TCA_CBS_MAX (__TCA_CBS_MAX - 1) + + +/* ETF */ +struct tc_etf_qopt { + __s32 delta; + __s32 clockid; + __u32 flags; +#define TC_ETF_DEADLINE_MODE_ON _BITUL(0) +#define TC_ETF_OFFLOAD_ON _BITUL(1) +#define TC_ETF_SKIP_SOCK_CHECK _BITUL(2) +}; + +enum { + TCA_ETF_UNSPEC, + TCA_ETF_PARMS, + __TCA_ETF_MAX, +}; + +#define TCA_ETF_MAX (__TCA_ETF_MAX - 1) + + +/* CAKE */ +enum { + TCA_CAKE_UNSPEC, + TCA_CAKE_PAD, + TCA_CAKE_BASE_RATE64, + TCA_CAKE_DIFFSERV_MODE, + TCA_CAKE_ATM, + TCA_CAKE_FLOW_MODE, + TCA_CAKE_OVERHEAD, + TCA_CAKE_RTT, + TCA_CAKE_TARGET, + TCA_CAKE_AUTORATE, + TCA_CAKE_MEMORY, + TCA_CAKE_NAT, + TCA_CAKE_RAW, + TCA_CAKE_WASH, + TCA_CAKE_MPU, + TCA_CAKE_INGRESS, + TCA_CAKE_ACK_FILTER, + TCA_CAKE_SPLIT_GSO, + TCA_CAKE_FWMARK, + __TCA_CAKE_MAX +}; +#define TCA_CAKE_MAX (__TCA_CAKE_MAX - 1) + +enum { + __TCA_CAKE_STATS_INVALID, + TCA_CAKE_STATS_PAD, + TCA_CAKE_STATS_CAPACITY_ESTIMATE64, + TCA_CAKE_STATS_MEMORY_LIMIT, + TCA_CAKE_STATS_MEMORY_USED, + TCA_CAKE_STATS_AVG_NETOFF, + TCA_CAKE_STATS_MIN_NETLEN, + TCA_CAKE_STATS_MAX_NETLEN, + TCA_CAKE_STATS_MIN_ADJLEN, + TCA_CAKE_STATS_MAX_ADJLEN, + TCA_CAKE_STATS_TIN_STATS, + TCA_CAKE_STATS_DEFICIT, + TCA_CAKE_STATS_COBALT_COUNT, + TCA_CAKE_STATS_DROPPING, + TCA_CAKE_STATS_DROP_NEXT_US, + TCA_CAKE_STATS_P_DROP, + TCA_CAKE_STATS_BLUE_TIMER_US, + __TCA_CAKE_STATS_MAX +}; +#define TCA_CAKE_STATS_MAX (__TCA_CAKE_STATS_MAX - 1) + +enum { + __TCA_CAKE_TIN_STATS_INVALID, + TCA_CAKE_TIN_STATS_PAD, + TCA_CAKE_TIN_STATS_SENT_PACKETS, + TCA_CAKE_TIN_STATS_SENT_BYTES64, + TCA_CAKE_TIN_STATS_DROPPED_PACKETS, + TCA_CAKE_TIN_STATS_DROPPED_BYTES64, + TCA_CAKE_TIN_STATS_ACKS_DROPPED_PACKETS, + TCA_CAKE_TIN_STATS_ACKS_DROPPED_BYTES64, + TCA_CAKE_TIN_STATS_ECN_MARKED_PACKETS, + TCA_CAKE_TIN_STATS_ECN_MARKED_BYTES64, + TCA_CAKE_TIN_STATS_BACKLOG_PACKETS, + TCA_CAKE_TIN_STATS_BACKLOG_BYTES, + TCA_CAKE_TIN_STATS_THRESHOLD_RATE64, + TCA_CAKE_TIN_STATS_TARGET_US, + TCA_CAKE_TIN_STATS_INTERVAL_US, + TCA_CAKE_TIN_STATS_WAY_INDIRECT_HITS, + TCA_CAKE_TIN_STATS_WAY_MISSES, + TCA_CAKE_TIN_STATS_WAY_COLLISIONS, + TCA_CAKE_TIN_STATS_PEAK_DELAY_US, + TCA_CAKE_TIN_STATS_AVG_DELAY_US, + TCA_CAKE_TIN_STATS_BASE_DELAY_US, + TCA_CAKE_TIN_STATS_SPARSE_FLOWS, + TCA_CAKE_TIN_STATS_BULK_FLOWS, + TCA_CAKE_TIN_STATS_UNRESPONSIVE_FLOWS, + TCA_CAKE_TIN_STATS_MAX_SKBLEN, + TCA_CAKE_TIN_STATS_FLOW_QUANTUM, + __TCA_CAKE_TIN_STATS_MAX +}; +#define TCA_CAKE_TIN_STATS_MAX (__TCA_CAKE_TIN_STATS_MAX - 1) +#define TC_CAKE_MAX_TINS (8) + +enum { + CAKE_FLOW_NONE = 0, + CAKE_FLOW_SRC_IP, + CAKE_FLOW_DST_IP, + CAKE_FLOW_HOSTS, /* = CAKE_FLOW_SRC_IP | CAKE_FLOW_DST_IP */ + CAKE_FLOW_FLOWS, + CAKE_FLOW_DUAL_SRC, /* = CAKE_FLOW_SRC_IP | CAKE_FLOW_FLOWS */ + CAKE_FLOW_DUAL_DST, /* = CAKE_FLOW_DST_IP | CAKE_FLOW_FLOWS */ + CAKE_FLOW_TRIPLE, /* = CAKE_FLOW_HOSTS | CAKE_FLOW_FLOWS */ + CAKE_FLOW_MAX, +}; + +enum { + CAKE_DIFFSERV_DIFFSERV3 = 0, + CAKE_DIFFSERV_DIFFSERV4, + CAKE_DIFFSERV_DIFFSERV8, + CAKE_DIFFSERV_BESTEFFORT, + CAKE_DIFFSERV_PRECEDENCE, + CAKE_DIFFSERV_MAX +}; + +enum { + CAKE_ACK_NONE = 0, + CAKE_ACK_FILTER, + CAKE_ACK_AGGRESSIVE, + CAKE_ACK_MAX +}; + +enum { + CAKE_ATM_NONE = 0, + CAKE_ATM_ATM, + CAKE_ATM_PTM, + CAKE_ATM_MAX +}; + + +/* TAPRIO */ +enum { + TC_TAPRIO_CMD_SET_GATES = 0x00, + TC_TAPRIO_CMD_SET_AND_HOLD = 0x01, + TC_TAPRIO_CMD_SET_AND_RELEASE = 0x02, +}; + +enum { + TCA_TAPRIO_SCHED_ENTRY_UNSPEC, + TCA_TAPRIO_SCHED_ENTRY_INDEX, /* u32 */ + TCA_TAPRIO_SCHED_ENTRY_CMD, /* u8 */ + TCA_TAPRIO_SCHED_ENTRY_GATE_MASK, /* u32 */ + TCA_TAPRIO_SCHED_ENTRY_INTERVAL, /* u32 */ + __TCA_TAPRIO_SCHED_ENTRY_MAX, +}; +#define TCA_TAPRIO_SCHED_ENTRY_MAX (__TCA_TAPRIO_SCHED_ENTRY_MAX - 1) + +/* The format for schedule entry list is: + * [TCA_TAPRIO_SCHED_ENTRY_LIST] + * [TCA_TAPRIO_SCHED_ENTRY] + * [TCA_TAPRIO_SCHED_ENTRY_CMD] + * [TCA_TAPRIO_SCHED_ENTRY_GATES] + * [TCA_TAPRIO_SCHED_ENTRY_INTERVAL] + */ +enum { + TCA_TAPRIO_SCHED_UNSPEC, + TCA_TAPRIO_SCHED_ENTRY, + __TCA_TAPRIO_SCHED_MAX, +}; + +#define TCA_TAPRIO_SCHED_MAX (__TCA_TAPRIO_SCHED_MAX - 1) + +/* The format for the admin sched (dump only): + * [TCA_TAPRIO_SCHED_ADMIN_SCHED] + * [TCA_TAPRIO_ATTR_SCHED_BASE_TIME] + * [TCA_TAPRIO_ATTR_SCHED_ENTRY_LIST] + * [TCA_TAPRIO_ATTR_SCHED_ENTRY] + * [TCA_TAPRIO_ATTR_SCHED_ENTRY_CMD] + * [TCA_TAPRIO_ATTR_SCHED_ENTRY_GATES] + * [TCA_TAPRIO_ATTR_SCHED_ENTRY_INTERVAL] + */ + +#define TCA_TAPRIO_ATTR_FLAG_TXTIME_ASSIST _BITUL(0) +#define TCA_TAPRIO_ATTR_FLAG_FULL_OFFLOAD _BITUL(1) + +enum { + TCA_TAPRIO_TC_ENTRY_UNSPEC, + TCA_TAPRIO_TC_ENTRY_INDEX, /* u32 */ + TCA_TAPRIO_TC_ENTRY_MAX_SDU, /* u32 */ + + /* add new constants above here */ + __TCA_TAPRIO_TC_ENTRY_CNT, + TCA_TAPRIO_TC_ENTRY_MAX = (__TCA_TAPRIO_TC_ENTRY_CNT - 1) +}; + +enum { + TCA_TAPRIO_ATTR_UNSPEC, + TCA_TAPRIO_ATTR_PRIOMAP, /* struct tc_mqprio_qopt */ + TCA_TAPRIO_ATTR_SCHED_ENTRY_LIST, /* nested of entry */ + TCA_TAPRIO_ATTR_SCHED_BASE_TIME, /* s64 */ + TCA_TAPRIO_ATTR_SCHED_SINGLE_ENTRY, /* single entry */ + TCA_TAPRIO_ATTR_SCHED_CLOCKID, /* s32 */ + TCA_TAPRIO_PAD, + TCA_TAPRIO_ATTR_ADMIN_SCHED, /* The admin sched, only used in dump */ + TCA_TAPRIO_ATTR_SCHED_CYCLE_TIME, /* s64 */ + TCA_TAPRIO_ATTR_SCHED_CYCLE_TIME_EXTENSION, /* s64 */ + TCA_TAPRIO_ATTR_FLAGS, /* u32 */ + TCA_TAPRIO_ATTR_TXTIME_DELAY, /* u32 */ + TCA_TAPRIO_ATTR_TC_ENTRY, /* nest */ + __TCA_TAPRIO_ATTR_MAX, +}; + +#define TCA_TAPRIO_ATTR_MAX (__TCA_TAPRIO_ATTR_MAX - 1) + +/* ETS */ + +#define TCQ_ETS_MAX_BANDS 16 + +enum { + TCA_ETS_UNSPEC, + TCA_ETS_NBANDS, /* u8 */ + TCA_ETS_NSTRICT, /* u8 */ + TCA_ETS_QUANTA, /* nested TCA_ETS_QUANTA_BAND */ + TCA_ETS_QUANTA_BAND, /* u32 */ + TCA_ETS_PRIOMAP, /* nested TCA_ETS_PRIOMAP_BAND */ + TCA_ETS_PRIOMAP_BAND, /* u8 */ + __TCA_ETS_MAX, +}; + +#define TCA_ETS_MAX (__TCA_ETS_MAX - 1) + +#endif diff --git a/src/basic/linux/rtnetlink.h b/src/basic/linux/rtnetlink.h new file mode 100644 index 0000000..eb2747d --- /dev/null +++ b/src/basic/linux/rtnetlink.h @@ -0,0 +1,826 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +#ifndef _UAPI__LINUX_RTNETLINK_H +#define _UAPI__LINUX_RTNETLINK_H + +#include +#include +#include +#include +#include + +/* rtnetlink families. Values up to 127 are reserved for real address + * families, values above 128 may be used arbitrarily. + */ +#define RTNL_FAMILY_IPMR 128 +#define RTNL_FAMILY_IP6MR 129 +#define RTNL_FAMILY_MAX 129 + +/**** + * Routing/neighbour discovery messages. + ****/ + +/* Types of messages */ + +enum { + RTM_BASE = 16, +#define RTM_BASE RTM_BASE + + RTM_NEWLINK = 16, +#define RTM_NEWLINK RTM_NEWLINK + RTM_DELLINK, +#define RTM_DELLINK RTM_DELLINK + RTM_GETLINK, +#define RTM_GETLINK RTM_GETLINK + RTM_SETLINK, +#define RTM_SETLINK RTM_SETLINK + + RTM_NEWADDR = 20, +#define RTM_NEWADDR RTM_NEWADDR + RTM_DELADDR, +#define RTM_DELADDR RTM_DELADDR + RTM_GETADDR, +#define RTM_GETADDR RTM_GETADDR + + RTM_NEWROUTE = 24, +#define RTM_NEWROUTE RTM_NEWROUTE + RTM_DELROUTE, +#define RTM_DELROUTE RTM_DELROUTE + RTM_GETROUTE, +#define RTM_GETROUTE RTM_GETROUTE + + RTM_NEWNEIGH = 28, +#define RTM_NEWNEIGH RTM_NEWNEIGH + RTM_DELNEIGH, +#define RTM_DELNEIGH RTM_DELNEIGH + RTM_GETNEIGH, +#define RTM_GETNEIGH RTM_GETNEIGH + + RTM_NEWRULE = 32, +#define RTM_NEWRULE RTM_NEWRULE + RTM_DELRULE, +#define RTM_DELRULE RTM_DELRULE + RTM_GETRULE, +#define RTM_GETRULE RTM_GETRULE + + RTM_NEWQDISC = 36, +#define RTM_NEWQDISC RTM_NEWQDISC + RTM_DELQDISC, +#define RTM_DELQDISC RTM_DELQDISC + RTM_GETQDISC, +#define RTM_GETQDISC RTM_GETQDISC + + RTM_NEWTCLASS = 40, +#define RTM_NEWTCLASS RTM_NEWTCLASS + RTM_DELTCLASS, +#define RTM_DELTCLASS RTM_DELTCLASS + RTM_GETTCLASS, +#define RTM_GETTCLASS RTM_GETTCLASS + + RTM_NEWTFILTER = 44, +#define RTM_NEWTFILTER RTM_NEWTFILTER + RTM_DELTFILTER, +#define RTM_DELTFILTER RTM_DELTFILTER + RTM_GETTFILTER, +#define RTM_GETTFILTER RTM_GETTFILTER + + RTM_NEWACTION = 48, +#define RTM_NEWACTION RTM_NEWACTION + RTM_DELACTION, +#define RTM_DELACTION RTM_DELACTION + RTM_GETACTION, +#define RTM_GETACTION RTM_GETACTION + + RTM_NEWPREFIX = 52, +#define RTM_NEWPREFIX RTM_NEWPREFIX + + RTM_GETMULTICAST = 58, +#define RTM_GETMULTICAST RTM_GETMULTICAST + + RTM_GETANYCAST = 62, +#define RTM_GETANYCAST RTM_GETANYCAST + + RTM_NEWNEIGHTBL = 64, +#define RTM_NEWNEIGHTBL RTM_NEWNEIGHTBL + RTM_GETNEIGHTBL = 66, +#define RTM_GETNEIGHTBL RTM_GETNEIGHTBL + RTM_SETNEIGHTBL, +#define RTM_SETNEIGHTBL RTM_SETNEIGHTBL + + RTM_NEWNDUSEROPT = 68, +#define RTM_NEWNDUSEROPT RTM_NEWNDUSEROPT + + RTM_NEWADDRLABEL = 72, +#define RTM_NEWADDRLABEL RTM_NEWADDRLABEL + RTM_DELADDRLABEL, +#define RTM_DELADDRLABEL RTM_DELADDRLABEL + RTM_GETADDRLABEL, +#define RTM_GETADDRLABEL RTM_GETADDRLABEL + + RTM_GETDCB = 78, +#define RTM_GETDCB RTM_GETDCB + RTM_SETDCB, +#define RTM_SETDCB RTM_SETDCB + + RTM_NEWNETCONF = 80, +#define RTM_NEWNETCONF RTM_NEWNETCONF + RTM_DELNETCONF, +#define RTM_DELNETCONF RTM_DELNETCONF + RTM_GETNETCONF = 82, +#define RTM_GETNETCONF RTM_GETNETCONF + + RTM_NEWMDB = 84, +#define RTM_NEWMDB RTM_NEWMDB + RTM_DELMDB = 85, +#define RTM_DELMDB RTM_DELMDB + RTM_GETMDB = 86, +#define RTM_GETMDB RTM_GETMDB + + RTM_NEWNSID = 88, +#define RTM_NEWNSID RTM_NEWNSID + RTM_DELNSID = 89, +#define RTM_DELNSID RTM_DELNSID + RTM_GETNSID = 90, +#define RTM_GETNSID RTM_GETNSID + + RTM_NEWSTATS = 92, +#define RTM_NEWSTATS RTM_NEWSTATS + RTM_GETSTATS = 94, +#define RTM_GETSTATS RTM_GETSTATS + RTM_SETSTATS, +#define RTM_SETSTATS RTM_SETSTATS + + RTM_NEWCACHEREPORT = 96, +#define RTM_NEWCACHEREPORT RTM_NEWCACHEREPORT + + RTM_NEWCHAIN = 100, +#define RTM_NEWCHAIN RTM_NEWCHAIN + RTM_DELCHAIN, +#define RTM_DELCHAIN RTM_DELCHAIN + RTM_GETCHAIN, +#define RTM_GETCHAIN RTM_GETCHAIN + + RTM_NEWNEXTHOP = 104, +#define RTM_NEWNEXTHOP RTM_NEWNEXTHOP + RTM_DELNEXTHOP, +#define RTM_DELNEXTHOP RTM_DELNEXTHOP + RTM_GETNEXTHOP, +#define RTM_GETNEXTHOP RTM_GETNEXTHOP + + RTM_NEWLINKPROP = 108, +#define RTM_NEWLINKPROP RTM_NEWLINKPROP + RTM_DELLINKPROP, +#define RTM_DELLINKPROP RTM_DELLINKPROP + RTM_GETLINKPROP, +#define RTM_GETLINKPROP RTM_GETLINKPROP + + RTM_NEWVLAN = 112, +#define RTM_NEWNVLAN RTM_NEWVLAN + RTM_DELVLAN, +#define RTM_DELVLAN RTM_DELVLAN + RTM_GETVLAN, +#define RTM_GETVLAN RTM_GETVLAN + + RTM_NEWNEXTHOPBUCKET = 116, +#define RTM_NEWNEXTHOPBUCKET RTM_NEWNEXTHOPBUCKET + RTM_DELNEXTHOPBUCKET, +#define RTM_DELNEXTHOPBUCKET RTM_DELNEXTHOPBUCKET + RTM_GETNEXTHOPBUCKET, +#define RTM_GETNEXTHOPBUCKET RTM_GETNEXTHOPBUCKET + + RTM_NEWTUNNEL = 120, +#define RTM_NEWTUNNEL RTM_NEWTUNNEL + RTM_DELTUNNEL, +#define RTM_DELTUNNEL RTM_DELTUNNEL + RTM_GETTUNNEL, +#define RTM_GETTUNNEL RTM_GETTUNNEL + + __RTM_MAX, +#define RTM_MAX (((__RTM_MAX + 3) & ~3) - 1) +}; + +#define RTM_NR_MSGTYPES (RTM_MAX + 1 - RTM_BASE) +#define RTM_NR_FAMILIES (RTM_NR_MSGTYPES >> 2) +#define RTM_FAM(cmd) (((cmd) - RTM_BASE) >> 2) + +/* + Generic structure for encapsulation of optional route information. + It is reminiscent of sockaddr, but with sa_family replaced + with attribute type. + */ + +struct rtattr { + unsigned short rta_len; + unsigned short rta_type; +}; + +/* Macros to handle rtattributes */ + +#define RTA_ALIGNTO 4U +#define RTA_ALIGN(len) ( ((len)+RTA_ALIGNTO-1) & ~(RTA_ALIGNTO-1) ) +#define RTA_OK(rta,len) ((len) >= (int)sizeof(struct rtattr) && \ + (rta)->rta_len >= sizeof(struct rtattr) && \ + (rta)->rta_len <= (len)) +#define RTA_NEXT(rta,attrlen) ((attrlen) -= RTA_ALIGN((rta)->rta_len), \ + (struct rtattr*)(((char*)(rta)) + RTA_ALIGN((rta)->rta_len))) +#define RTA_LENGTH(len) (RTA_ALIGN(sizeof(struct rtattr)) + (len)) +#define RTA_SPACE(len) RTA_ALIGN(RTA_LENGTH(len)) +#define RTA_DATA(rta) ((void*)(((char*)(rta)) + RTA_LENGTH(0))) +#define RTA_PAYLOAD(rta) ((int)((rta)->rta_len) - RTA_LENGTH(0)) + + + + +/****************************************************************************** + * Definitions used in routing table administration. + ****/ + +struct rtmsg { + unsigned char rtm_family; + unsigned char rtm_dst_len; + unsigned char rtm_src_len; + unsigned char rtm_tos; + + unsigned char rtm_table; /* Routing table id */ + unsigned char rtm_protocol; /* Routing protocol; see below */ + unsigned char rtm_scope; /* See below */ + unsigned char rtm_type; /* See below */ + + unsigned rtm_flags; +}; + +/* rtm_type */ + +enum { + RTN_UNSPEC, + RTN_UNICAST, /* Gateway or direct route */ + RTN_LOCAL, /* Accept locally */ + RTN_BROADCAST, /* Accept locally as broadcast, + send as broadcast */ + RTN_ANYCAST, /* Accept locally as broadcast, + but send as unicast */ + RTN_MULTICAST, /* Multicast route */ + RTN_BLACKHOLE, /* Drop */ + RTN_UNREACHABLE, /* Destination is unreachable */ + RTN_PROHIBIT, /* Administratively prohibited */ + RTN_THROW, /* Not in this table */ + RTN_NAT, /* Translate this address */ + RTN_XRESOLVE, /* Use external resolver */ + __RTN_MAX +}; + +#define RTN_MAX (__RTN_MAX - 1) + + +/* rtm_protocol */ + +#define RTPROT_UNSPEC 0 +#define RTPROT_REDIRECT 1 /* Route installed by ICMP redirects; + not used by current IPv4 */ +#define RTPROT_KERNEL 2 /* Route installed by kernel */ +#define RTPROT_BOOT 3 /* Route installed during boot */ +#define RTPROT_STATIC 4 /* Route installed by administrator */ + +/* Values of protocol >= RTPROT_STATIC are not interpreted by kernel; + they are just passed from user and back as is. + It will be used by hypothetical multiple routing daemons. + Note that protocol values should be standardized in order to + avoid conflicts. + */ + +#define RTPROT_GATED 8 /* Apparently, GateD */ +#define RTPROT_RA 9 /* RDISC/ND router advertisements */ +#define RTPROT_MRT 10 /* Merit MRT */ +#define RTPROT_ZEBRA 11 /* Zebra */ +#define RTPROT_BIRD 12 /* BIRD */ +#define RTPROT_DNROUTED 13 /* DECnet routing daemon */ +#define RTPROT_XORP 14 /* XORP */ +#define RTPROT_NTK 15 /* Netsukuku */ +#define RTPROT_DHCP 16 /* DHCP client */ +#define RTPROT_MROUTED 17 /* Multicast daemon */ +#define RTPROT_KEEPALIVED 18 /* Keepalived daemon */ +#define RTPROT_BABEL 42 /* Babel daemon */ +#define RTPROT_OPENR 99 /* Open Routing (Open/R) Routes */ +#define RTPROT_BGP 186 /* BGP Routes */ +#define RTPROT_ISIS 187 /* ISIS Routes */ +#define RTPROT_OSPF 188 /* OSPF Routes */ +#define RTPROT_RIP 189 /* RIP Routes */ +#define RTPROT_EIGRP 192 /* EIGRP Routes */ + +/* rtm_scope + + Really it is not scope, but sort of distance to the destination. + NOWHERE are reserved for not existing destinations, HOST is our + local addresses, LINK are destinations, located on directly attached + link and UNIVERSE is everywhere in the Universe. + + Intermediate values are also possible f.e. interior routes + could be assigned a value between UNIVERSE and LINK. +*/ + +enum rt_scope_t { + RT_SCOPE_UNIVERSE=0, +/* User defined values */ + RT_SCOPE_SITE=200, + RT_SCOPE_LINK=253, + RT_SCOPE_HOST=254, + RT_SCOPE_NOWHERE=255 +}; + +/* rtm_flags */ + +#define RTM_F_NOTIFY 0x100 /* Notify user of route change */ +#define RTM_F_CLONED 0x200 /* This route is cloned */ +#define RTM_F_EQUALIZE 0x400 /* Multipath equalizer: NI */ +#define RTM_F_PREFIX 0x800 /* Prefix addresses */ +#define RTM_F_LOOKUP_TABLE 0x1000 /* set rtm_table to FIB lookup result */ +#define RTM_F_FIB_MATCH 0x2000 /* return full fib lookup match */ +#define RTM_F_OFFLOAD 0x4000 /* route is offloaded */ +#define RTM_F_TRAP 0x8000 /* route is trapping packets */ +#define RTM_F_OFFLOAD_FAILED 0x20000000 /* route offload failed, this value + * is chosen to avoid conflicts with + * other flags defined in + * include/uapi/linux/ipv6_route.h + */ + +/* Reserved table identifiers */ + +enum rt_class_t { + RT_TABLE_UNSPEC=0, +/* User defined values */ + RT_TABLE_COMPAT=252, + RT_TABLE_DEFAULT=253, + RT_TABLE_MAIN=254, + RT_TABLE_LOCAL=255, + RT_TABLE_MAX=0xFFFFFFFF +}; + + +/* Routing message attributes */ + +enum rtattr_type_t { + RTA_UNSPEC, + RTA_DST, + RTA_SRC, + RTA_IIF, + RTA_OIF, + RTA_GATEWAY, + RTA_PRIORITY, + RTA_PREFSRC, + RTA_METRICS, + RTA_MULTIPATH, + RTA_PROTOINFO, /* no longer used */ + RTA_FLOW, + RTA_CACHEINFO, + RTA_SESSION, /* no longer used */ + RTA_MP_ALGO, /* no longer used */ + RTA_TABLE, + RTA_MARK, + RTA_MFC_STATS, + RTA_VIA, + RTA_NEWDST, + RTA_PREF, + RTA_ENCAP_TYPE, + RTA_ENCAP, + RTA_EXPIRES, + RTA_PAD, + RTA_UID, + RTA_TTL_PROPAGATE, + RTA_IP_PROTO, + RTA_SPORT, + RTA_DPORT, + RTA_NH_ID, + __RTA_MAX +}; + +#define RTA_MAX (__RTA_MAX - 1) + +#define RTM_RTA(r) ((struct rtattr*)(((char*)(r)) + NLMSG_ALIGN(sizeof(struct rtmsg)))) +#define RTM_PAYLOAD(n) NLMSG_PAYLOAD(n,sizeof(struct rtmsg)) + +/* RTM_MULTIPATH --- array of struct rtnexthop. + * + * "struct rtnexthop" describes all necessary nexthop information, + * i.e. parameters of path to a destination via this nexthop. + * + * At the moment it is impossible to set different prefsrc, mtu, window + * and rtt for different paths from multipath. + */ + +struct rtnexthop { + unsigned short rtnh_len; + unsigned char rtnh_flags; + unsigned char rtnh_hops; + int rtnh_ifindex; +}; + +/* rtnh_flags */ + +#define RTNH_F_DEAD 1 /* Nexthop is dead (used by multipath) */ +#define RTNH_F_PERVASIVE 2 /* Do recursive gateway lookup */ +#define RTNH_F_ONLINK 4 /* Gateway is forced on link */ +#define RTNH_F_OFFLOAD 8 /* Nexthop is offloaded */ +#define RTNH_F_LINKDOWN 16 /* carrier-down on nexthop */ +#define RTNH_F_UNRESOLVED 32 /* The entry is unresolved (ipmr) */ +#define RTNH_F_TRAP 64 /* Nexthop is trapping packets */ + +#define RTNH_COMPARE_MASK (RTNH_F_DEAD | RTNH_F_LINKDOWN | \ + RTNH_F_OFFLOAD | RTNH_F_TRAP) + +/* Macros to handle hexthops */ + +#define RTNH_ALIGNTO 4 +#define RTNH_ALIGN(len) ( ((len)+RTNH_ALIGNTO-1) & ~(RTNH_ALIGNTO-1) ) +#define RTNH_OK(rtnh,len) ((rtnh)->rtnh_len >= sizeof(struct rtnexthop) && \ + ((int)(rtnh)->rtnh_len) <= (len)) +#define RTNH_NEXT(rtnh) ((struct rtnexthop*)(((char*)(rtnh)) + RTNH_ALIGN((rtnh)->rtnh_len))) +#define RTNH_LENGTH(len) (RTNH_ALIGN(sizeof(struct rtnexthop)) + (len)) +#define RTNH_SPACE(len) RTNH_ALIGN(RTNH_LENGTH(len)) +#define RTNH_DATA(rtnh) ((struct rtattr*)(((char*)(rtnh)) + RTNH_LENGTH(0))) + +/* RTA_VIA */ +struct rtvia { + __kernel_sa_family_t rtvia_family; + __u8 rtvia_addr[]; +}; + +/* RTM_CACHEINFO */ + +struct rta_cacheinfo { + __u32 rta_clntref; + __u32 rta_lastuse; + __s32 rta_expires; + __u32 rta_error; + __u32 rta_used; + +#define RTNETLINK_HAVE_PEERINFO 1 + __u32 rta_id; + __u32 rta_ts; + __u32 rta_tsage; +}; + +/* RTM_METRICS --- array of struct rtattr with types of RTAX_* */ + +enum { + RTAX_UNSPEC, +#define RTAX_UNSPEC RTAX_UNSPEC + RTAX_LOCK, +#define RTAX_LOCK RTAX_LOCK + RTAX_MTU, +#define RTAX_MTU RTAX_MTU + RTAX_WINDOW, +#define RTAX_WINDOW RTAX_WINDOW + RTAX_RTT, +#define RTAX_RTT RTAX_RTT + RTAX_RTTVAR, +#define RTAX_RTTVAR RTAX_RTTVAR + RTAX_SSTHRESH, +#define RTAX_SSTHRESH RTAX_SSTHRESH + RTAX_CWND, +#define RTAX_CWND RTAX_CWND + RTAX_ADVMSS, +#define RTAX_ADVMSS RTAX_ADVMSS + RTAX_REORDERING, +#define RTAX_REORDERING RTAX_REORDERING + RTAX_HOPLIMIT, +#define RTAX_HOPLIMIT RTAX_HOPLIMIT + RTAX_INITCWND, +#define RTAX_INITCWND RTAX_INITCWND + RTAX_FEATURES, +#define RTAX_FEATURES RTAX_FEATURES + RTAX_RTO_MIN, +#define RTAX_RTO_MIN RTAX_RTO_MIN + RTAX_INITRWND, +#define RTAX_INITRWND RTAX_INITRWND + RTAX_QUICKACK, +#define RTAX_QUICKACK RTAX_QUICKACK + RTAX_CC_ALGO, +#define RTAX_CC_ALGO RTAX_CC_ALGO + RTAX_FASTOPEN_NO_COOKIE, +#define RTAX_FASTOPEN_NO_COOKIE RTAX_FASTOPEN_NO_COOKIE + __RTAX_MAX +}; + +#define RTAX_MAX (__RTAX_MAX - 1) + +#define RTAX_FEATURE_ECN (1 << 0) +#define RTAX_FEATURE_SACK (1 << 1) +#define RTAX_FEATURE_TIMESTAMP (1 << 2) +#define RTAX_FEATURE_ALLFRAG (1 << 3) + +#define RTAX_FEATURE_MASK (RTAX_FEATURE_ECN | RTAX_FEATURE_SACK | \ + RTAX_FEATURE_TIMESTAMP | RTAX_FEATURE_ALLFRAG) + +struct rta_session { + __u8 proto; + __u8 pad1; + __u16 pad2; + + union { + struct { + __u16 sport; + __u16 dport; + } ports; + + struct { + __u8 type; + __u8 code; + __u16 ident; + } icmpt; + + __u32 spi; + } u; +}; + +struct rta_mfc_stats { + __u64 mfcs_packets; + __u64 mfcs_bytes; + __u64 mfcs_wrong_if; +}; + +/**** + * General form of address family dependent message. + ****/ + +struct rtgenmsg { + unsigned char rtgen_family; +}; + +/***************************************************************** + * Link layer specific messages. + ****/ + +/* struct ifinfomsg + * passes link level specific information, not dependent + * on network protocol. + */ + +struct ifinfomsg { + unsigned char ifi_family; + unsigned char __ifi_pad; + unsigned short ifi_type; /* ARPHRD_* */ + int ifi_index; /* Link index */ + unsigned ifi_flags; /* IFF_* flags */ + unsigned ifi_change; /* IFF_* change mask */ +}; + +/******************************************************************** + * prefix information + ****/ + +struct prefixmsg { + unsigned char prefix_family; + unsigned char prefix_pad1; + unsigned short prefix_pad2; + int prefix_ifindex; + unsigned char prefix_type; + unsigned char prefix_len; + unsigned char prefix_flags; + unsigned char prefix_pad3; +}; + +enum +{ + PREFIX_UNSPEC, + PREFIX_ADDRESS, + PREFIX_CACHEINFO, + __PREFIX_MAX +}; + +#define PREFIX_MAX (__PREFIX_MAX - 1) + +struct prefix_cacheinfo { + __u32 preferred_time; + __u32 valid_time; +}; + + +/***************************************************************** + * Traffic control messages. + ****/ + +struct tcmsg { + unsigned char tcm_family; + unsigned char tcm__pad1; + unsigned short tcm__pad2; + int tcm_ifindex; + __u32 tcm_handle; + __u32 tcm_parent; +/* tcm_block_index is used instead of tcm_parent + * in case tcm_ifindex == TCM_IFINDEX_MAGIC_BLOCK + */ +#define tcm_block_index tcm_parent + __u32 tcm_info; +}; + +/* For manipulation of filters in shared block, tcm_ifindex is set to + * TCM_IFINDEX_MAGIC_BLOCK, and tcm_parent is aliased to tcm_block_index + * which is the block index. + */ +#define TCM_IFINDEX_MAGIC_BLOCK (0xFFFFFFFFU) + +enum { + TCA_UNSPEC, + TCA_KIND, + TCA_OPTIONS, + TCA_STATS, + TCA_XSTATS, + TCA_RATE, + TCA_FCNT, + TCA_STATS2, + TCA_STAB, + TCA_PAD, + TCA_DUMP_INVISIBLE, + TCA_CHAIN, + TCA_HW_OFFLOAD, + TCA_INGRESS_BLOCK, + TCA_EGRESS_BLOCK, + TCA_DUMP_FLAGS, + __TCA_MAX +}; + +#define TCA_MAX (__TCA_MAX - 1) + +#define TCA_DUMP_FLAGS_TERSE (1 << 0) /* Means that in dump user gets only basic + * data necessary to identify the objects + * (handle, cookie, etc.) and stats. + */ + +#define TCA_RTA(r) ((struct rtattr*)(((char*)(r)) + NLMSG_ALIGN(sizeof(struct tcmsg)))) +#define TCA_PAYLOAD(n) NLMSG_PAYLOAD(n,sizeof(struct tcmsg)) + +/******************************************************************** + * Neighbor Discovery userland options + ****/ + +struct nduseroptmsg { + unsigned char nduseropt_family; + unsigned char nduseropt_pad1; + unsigned short nduseropt_opts_len; /* Total length of options */ + int nduseropt_ifindex; + __u8 nduseropt_icmp_type; + __u8 nduseropt_icmp_code; + unsigned short nduseropt_pad2; + unsigned int nduseropt_pad3; + /* Followed by one or more ND options */ +}; + +enum { + NDUSEROPT_UNSPEC, + NDUSEROPT_SRCADDR, + __NDUSEROPT_MAX +}; + +#define NDUSEROPT_MAX (__NDUSEROPT_MAX - 1) + +#ifndef __KERNEL__ +/* RTnetlink multicast groups - backwards compatibility for userspace */ +#define RTMGRP_LINK 1 +#define RTMGRP_NOTIFY 2 +#define RTMGRP_NEIGH 4 +#define RTMGRP_TC 8 + +#define RTMGRP_IPV4_IFADDR 0x10 +#define RTMGRP_IPV4_MROUTE 0x20 +#define RTMGRP_IPV4_ROUTE 0x40 +#define RTMGRP_IPV4_RULE 0x80 + +#define RTMGRP_IPV6_IFADDR 0x100 +#define RTMGRP_IPV6_MROUTE 0x200 +#define RTMGRP_IPV6_ROUTE 0x400 +#define RTMGRP_IPV6_IFINFO 0x800 + +#define RTMGRP_DECnet_IFADDR 0x1000 +#define RTMGRP_DECnet_ROUTE 0x4000 + +#define RTMGRP_IPV6_PREFIX 0x20000 +#endif + +/* RTnetlink multicast groups */ +enum rtnetlink_groups { + RTNLGRP_NONE, +#define RTNLGRP_NONE RTNLGRP_NONE + RTNLGRP_LINK, +#define RTNLGRP_LINK RTNLGRP_LINK + RTNLGRP_NOTIFY, +#define RTNLGRP_NOTIFY RTNLGRP_NOTIFY + RTNLGRP_NEIGH, +#define RTNLGRP_NEIGH RTNLGRP_NEIGH + RTNLGRP_TC, +#define RTNLGRP_TC RTNLGRP_TC + RTNLGRP_IPV4_IFADDR, +#define RTNLGRP_IPV4_IFADDR RTNLGRP_IPV4_IFADDR + RTNLGRP_IPV4_MROUTE, +#define RTNLGRP_IPV4_MROUTE RTNLGRP_IPV4_MROUTE + RTNLGRP_IPV4_ROUTE, +#define RTNLGRP_IPV4_ROUTE RTNLGRP_IPV4_ROUTE + RTNLGRP_IPV4_RULE, +#define RTNLGRP_IPV4_RULE RTNLGRP_IPV4_RULE + RTNLGRP_IPV6_IFADDR, +#define RTNLGRP_IPV6_IFADDR RTNLGRP_IPV6_IFADDR + RTNLGRP_IPV6_MROUTE, +#define RTNLGRP_IPV6_MROUTE RTNLGRP_IPV6_MROUTE + RTNLGRP_IPV6_ROUTE, +#define RTNLGRP_IPV6_ROUTE RTNLGRP_IPV6_ROUTE + RTNLGRP_IPV6_IFINFO, +#define RTNLGRP_IPV6_IFINFO RTNLGRP_IPV6_IFINFO + RTNLGRP_DECnet_IFADDR, +#define RTNLGRP_DECnet_IFADDR RTNLGRP_DECnet_IFADDR + RTNLGRP_NOP2, + RTNLGRP_DECnet_ROUTE, +#define RTNLGRP_DECnet_ROUTE RTNLGRP_DECnet_ROUTE + RTNLGRP_DECnet_RULE, +#define RTNLGRP_DECnet_RULE RTNLGRP_DECnet_RULE + RTNLGRP_NOP4, + RTNLGRP_IPV6_PREFIX, +#define RTNLGRP_IPV6_PREFIX RTNLGRP_IPV6_PREFIX + RTNLGRP_IPV6_RULE, +#define RTNLGRP_IPV6_RULE RTNLGRP_IPV6_RULE + RTNLGRP_ND_USEROPT, +#define RTNLGRP_ND_USEROPT RTNLGRP_ND_USEROPT + RTNLGRP_PHONET_IFADDR, +#define RTNLGRP_PHONET_IFADDR RTNLGRP_PHONET_IFADDR + RTNLGRP_PHONET_ROUTE, +#define RTNLGRP_PHONET_ROUTE RTNLGRP_PHONET_ROUTE + RTNLGRP_DCB, +#define RTNLGRP_DCB RTNLGRP_DCB + RTNLGRP_IPV4_NETCONF, +#define RTNLGRP_IPV4_NETCONF RTNLGRP_IPV4_NETCONF + RTNLGRP_IPV6_NETCONF, +#define RTNLGRP_IPV6_NETCONF RTNLGRP_IPV6_NETCONF + RTNLGRP_MDB, +#define RTNLGRP_MDB RTNLGRP_MDB + RTNLGRP_MPLS_ROUTE, +#define RTNLGRP_MPLS_ROUTE RTNLGRP_MPLS_ROUTE + RTNLGRP_NSID, +#define RTNLGRP_NSID RTNLGRP_NSID + RTNLGRP_MPLS_NETCONF, +#define RTNLGRP_MPLS_NETCONF RTNLGRP_MPLS_NETCONF + RTNLGRP_IPV4_MROUTE_R, +#define RTNLGRP_IPV4_MROUTE_R RTNLGRP_IPV4_MROUTE_R + RTNLGRP_IPV6_MROUTE_R, +#define RTNLGRP_IPV6_MROUTE_R RTNLGRP_IPV6_MROUTE_R + RTNLGRP_NEXTHOP, +#define RTNLGRP_NEXTHOP RTNLGRP_NEXTHOP + RTNLGRP_BRVLAN, +#define RTNLGRP_BRVLAN RTNLGRP_BRVLAN + RTNLGRP_MCTP_IFADDR, +#define RTNLGRP_MCTP_IFADDR RTNLGRP_MCTP_IFADDR + RTNLGRP_TUNNEL, +#define RTNLGRP_TUNNEL RTNLGRP_TUNNEL + RTNLGRP_STATS, +#define RTNLGRP_STATS RTNLGRP_STATS + __RTNLGRP_MAX +}; +#define RTNLGRP_MAX (__RTNLGRP_MAX - 1) + +/* TC action piece */ +struct tcamsg { + unsigned char tca_family; + unsigned char tca__pad1; + unsigned short tca__pad2; +}; + +enum { + TCA_ROOT_UNSPEC, + TCA_ROOT_TAB, +#define TCA_ACT_TAB TCA_ROOT_TAB +#define TCAA_MAX TCA_ROOT_TAB + TCA_ROOT_FLAGS, + TCA_ROOT_COUNT, + TCA_ROOT_TIME_DELTA, /* in msecs */ + __TCA_ROOT_MAX, +#define TCA_ROOT_MAX (__TCA_ROOT_MAX - 1) +}; + +#define TA_RTA(r) ((struct rtattr*)(((char*)(r)) + NLMSG_ALIGN(sizeof(struct tcamsg)))) +#define TA_PAYLOAD(n) NLMSG_PAYLOAD(n,sizeof(struct tcamsg)) +/* tcamsg flags stored in attribute TCA_ROOT_FLAGS + * + * TCA_ACT_FLAG_LARGE_DUMP_ON user->kernel to request for larger than + * TCA_ACT_MAX_PRIO actions in a dump. All dump responses will contain the + * number of actions being dumped stored in for user app's consumption in + * TCA_ROOT_COUNT + * + * TCA_ACT_FLAG_TERSE_DUMP user->kernel to request terse (brief) dump that only + * includes essential action info (kind, index, etc.) + * + */ +#define TCA_FLAG_LARGE_DUMP_ON (1 << 0) +#define TCA_ACT_FLAG_LARGE_DUMP_ON TCA_FLAG_LARGE_DUMP_ON +#define TCA_ACT_FLAG_TERSE_DUMP (1 << 1) + +/* New extended info filters for IFLA_EXT_MASK */ +#define RTEXT_FILTER_VF (1 << 0) +#define RTEXT_FILTER_BRVLAN (1 << 1) +#define RTEXT_FILTER_BRVLAN_COMPRESSED (1 << 2) +#define RTEXT_FILTER_SKIP_STATS (1 << 3) +#define RTEXT_FILTER_MRP (1 << 4) +#define RTEXT_FILTER_CFM_CONFIG (1 << 5) +#define RTEXT_FILTER_CFM_STATUS (1 << 6) +#define RTEXT_FILTER_MST (1 << 7) + +/* End of information exported to user level */ + + + +#endif /* _UAPI__LINUX_RTNETLINK_H */ diff --git a/src/basic/linux/stddef.h b/src/basic/linux/stddef.h new file mode 100644 index 0000000..1a73963 --- /dev/null +++ b/src/basic/linux/stddef.h @@ -0,0 +1,46 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +#ifndef _UAPI_LINUX_STDDEF_H +#define _UAPI_LINUX_STDDEF_H + + +#ifndef __always_inline +#define __always_inline inline +#endif + +/** + * __struct_group() - Create a mirrored named and anonyomous struct + * + * @TAG: The tag name for the named sub-struct (usually empty) + * @NAME: The identifier name of the mirrored sub-struct + * @ATTRS: Any struct attributes (usually empty) + * @MEMBERS: The member declarations for the mirrored structs + * + * Used to create an anonymous union of two structs with identical layout + * and size: one anonymous and one named. The former's members can be used + * normally without sub-struct naming, and the latter can be used to + * reason about the start, end, and size of the group of struct members. + * The named struct can also be explicitly tagged for layer reuse, as well + * as both having struct attributes appended. + */ +#define __struct_group(TAG, NAME, ATTRS, MEMBERS...) \ + union { \ + struct { MEMBERS } ATTRS; \ + struct TAG { MEMBERS } ATTRS NAME; \ + } + +/** + * __DECLARE_FLEX_ARRAY() - Declare a flexible array usable in a union + * + * @TYPE: The type of each flexible array element + * @NAME: The name of the flexible array member + * + * In order to have a flexible array member in a union or alone in a + * struct, it needs to be wrapped in an anonymous struct with at least 1 + * named member, but that member can be empty. + */ +#define __DECLARE_FLEX_ARRAY(TYPE, NAME) \ + struct { \ + struct { } __empty_ ## NAME; \ + TYPE NAME[]; \ + } +#endif diff --git a/src/basic/linux/update.sh b/src/basic/linux/update.sh new file mode 100755 index 0000000..6155766 --- /dev/null +++ b/src/basic/linux/update.sh @@ -0,0 +1,11 @@ +#!/usr/bin/env bash +# SPDX-License-Identifier: LGPL-2.1-or-later +set -eu +set -o pipefail + +for i in *.h */*.h; do + curl --fail "https://raw.githubusercontent.com/torvalds/linux/master/include/uapi/linux/$i" -o "$i" + + sed -r -i -e 's/__user //g' -e '/^#include / d' "$i" + sed -r -i 's/^(#include )/#if WANT_LINUX_FS_H\n\1\n#endif/' "$i" +done diff --git a/src/basic/linux/wireguard.h b/src/basic/linux/wireguard.h new file mode 100644 index 0000000..ae88be1 --- /dev/null +++ b/src/basic/linux/wireguard.h @@ -0,0 +1,196 @@ +/* SPDX-License-Identifier: (GPL-2.0 WITH Linux-syscall-note) OR MIT */ +/* + * Copyright (C) 2015-2019 Jason A. Donenfeld . All Rights Reserved. + * + * Documentation + * ============= + * + * The below enums and macros are for interfacing with WireGuard, using generic + * netlink, with family WG_GENL_NAME and version WG_GENL_VERSION. It defines two + * methods: get and set. Note that while they share many common attributes, + * these two functions actually accept a slightly different set of inputs and + * outputs. + * + * WG_CMD_GET_DEVICE + * ----------------- + * + * May only be called via NLM_F_REQUEST | NLM_F_DUMP. The command should contain + * one but not both of: + * + * WGDEVICE_A_IFINDEX: NLA_U32 + * WGDEVICE_A_IFNAME: NLA_NUL_STRING, maxlen IFNAMSIZ - 1 + * + * The kernel will then return several messages (NLM_F_MULTI) containing the + * following tree of nested items: + * + * WGDEVICE_A_IFINDEX: NLA_U32 + * WGDEVICE_A_IFNAME: NLA_NUL_STRING, maxlen IFNAMSIZ - 1 + * WGDEVICE_A_PRIVATE_KEY: NLA_EXACT_LEN, len WG_KEY_LEN + * WGDEVICE_A_PUBLIC_KEY: NLA_EXACT_LEN, len WG_KEY_LEN + * WGDEVICE_A_LISTEN_PORT: NLA_U16 + * WGDEVICE_A_FWMARK: NLA_U32 + * WGDEVICE_A_PEERS: NLA_NESTED + * 0: NLA_NESTED + * WGPEER_A_PUBLIC_KEY: NLA_EXACT_LEN, len WG_KEY_LEN + * WGPEER_A_PRESHARED_KEY: NLA_EXACT_LEN, len WG_KEY_LEN + * WGPEER_A_ENDPOINT: NLA_MIN_LEN(struct sockaddr), struct sockaddr_in or struct sockaddr_in6 + * WGPEER_A_PERSISTENT_KEEPALIVE_INTERVAL: NLA_U16 + * WGPEER_A_LAST_HANDSHAKE_TIME: NLA_EXACT_LEN, struct __kernel_timespec + * WGPEER_A_RX_BYTES: NLA_U64 + * WGPEER_A_TX_BYTES: NLA_U64 + * WGPEER_A_ALLOWEDIPS: NLA_NESTED + * 0: NLA_NESTED + * WGALLOWEDIP_A_FAMILY: NLA_U16 + * WGALLOWEDIP_A_IPADDR: NLA_MIN_LEN(struct in_addr), struct in_addr or struct in6_addr + * WGALLOWEDIP_A_CIDR_MASK: NLA_U8 + * 0: NLA_NESTED + * ... + * 0: NLA_NESTED + * ... + * ... + * WGPEER_A_PROTOCOL_VERSION: NLA_U32 + * 0: NLA_NESTED + * ... + * ... + * + * It is possible that all of the allowed IPs of a single peer will not + * fit within a single netlink message. In that case, the same peer will + * be written in the following message, except it will only contain + * WGPEER_A_PUBLIC_KEY and WGPEER_A_ALLOWEDIPS. This may occur several + * times in a row for the same peer. It is then up to the receiver to + * coalesce adjacent peers. Likewise, it is possible that all peers will + * not fit within a single message. So, subsequent peers will be sent + * in following messages, except those will only contain WGDEVICE_A_IFNAME + * and WGDEVICE_A_PEERS. It is then up to the receiver to coalesce these + * messages to form the complete list of peers. + * + * Since this is an NLA_F_DUMP command, the final message will always be + * NLMSG_DONE, even if an error occurs. However, this NLMSG_DONE message + * contains an integer error code. It is either zero or a negative error + * code corresponding to the errno. + * + * WG_CMD_SET_DEVICE + * ----------------- + * + * May only be called via NLM_F_REQUEST. The command should contain the + * following tree of nested items, containing one but not both of + * WGDEVICE_A_IFINDEX and WGDEVICE_A_IFNAME: + * + * WGDEVICE_A_IFINDEX: NLA_U32 + * WGDEVICE_A_IFNAME: NLA_NUL_STRING, maxlen IFNAMSIZ - 1 + * WGDEVICE_A_FLAGS: NLA_U32, 0 or WGDEVICE_F_REPLACE_PEERS if all current + * peers should be removed prior to adding the list below. + * WGDEVICE_A_PRIVATE_KEY: len WG_KEY_LEN, all zeros to remove + * WGDEVICE_A_LISTEN_PORT: NLA_U16, 0 to choose randomly + * WGDEVICE_A_FWMARK: NLA_U32, 0 to disable + * WGDEVICE_A_PEERS: NLA_NESTED + * 0: NLA_NESTED + * WGPEER_A_PUBLIC_KEY: len WG_KEY_LEN + * WGPEER_A_FLAGS: NLA_U32, 0 and/or WGPEER_F_REMOVE_ME if the + * specified peer should not exist at the end of the + * operation, rather than added/updated and/or + * WGPEER_F_REPLACE_ALLOWEDIPS if all current allowed + * IPs of this peer should be removed prior to adding + * the list below and/or WGPEER_F_UPDATE_ONLY if the + * peer should only be set if it already exists. + * WGPEER_A_PRESHARED_KEY: len WG_KEY_LEN, all zeros to remove + * WGPEER_A_ENDPOINT: struct sockaddr_in or struct sockaddr_in6 + * WGPEER_A_PERSISTENT_KEEPALIVE_INTERVAL: NLA_U16, 0 to disable + * WGPEER_A_ALLOWEDIPS: NLA_NESTED + * 0: NLA_NESTED + * WGALLOWEDIP_A_FAMILY: NLA_U16 + * WGALLOWEDIP_A_IPADDR: struct in_addr or struct in6_addr + * WGALLOWEDIP_A_CIDR_MASK: NLA_U8 + * 0: NLA_NESTED + * ... + * 0: NLA_NESTED + * ... + * ... + * WGPEER_A_PROTOCOL_VERSION: NLA_U32, should not be set or used at + * all by most users of this API, as the + * most recent protocol will be used when + * this is unset. Otherwise, must be set + * to 1. + * 0: NLA_NESTED + * ... + * ... + * + * It is possible that the amount of configuration data exceeds that of + * the maximum message length accepted by the kernel. In that case, several + * messages should be sent one after another, with each successive one + * filling in information not contained in the prior. Note that if + * WGDEVICE_F_REPLACE_PEERS is specified in the first message, it probably + * should not be specified in fragments that come after, so that the list + * of peers is only cleared the first time but appended after. Likewise for + * peers, if WGPEER_F_REPLACE_ALLOWEDIPS is specified in the first message + * of a peer, it likely should not be specified in subsequent fragments. + * + * If an error occurs, NLMSG_ERROR will reply containing an errno. + */ + +#ifndef _WG_UAPI_WIREGUARD_H +#define _WG_UAPI_WIREGUARD_H + +#define WG_GENL_NAME "wireguard" +#define WG_GENL_VERSION 1 + +#define WG_KEY_LEN 32 + +enum wg_cmd { + WG_CMD_GET_DEVICE, + WG_CMD_SET_DEVICE, + __WG_CMD_MAX +}; +#define WG_CMD_MAX (__WG_CMD_MAX - 1) + +enum wgdevice_flag { + WGDEVICE_F_REPLACE_PEERS = 1U << 0, + __WGDEVICE_F_ALL = WGDEVICE_F_REPLACE_PEERS +}; +enum wgdevice_attribute { + WGDEVICE_A_UNSPEC, + WGDEVICE_A_IFINDEX, + WGDEVICE_A_IFNAME, + WGDEVICE_A_PRIVATE_KEY, + WGDEVICE_A_PUBLIC_KEY, + WGDEVICE_A_FLAGS, + WGDEVICE_A_LISTEN_PORT, + WGDEVICE_A_FWMARK, + WGDEVICE_A_PEERS, + __WGDEVICE_A_LAST +}; +#define WGDEVICE_A_MAX (__WGDEVICE_A_LAST - 1) + +enum wgpeer_flag { + WGPEER_F_REMOVE_ME = 1U << 0, + WGPEER_F_REPLACE_ALLOWEDIPS = 1U << 1, + WGPEER_F_UPDATE_ONLY = 1U << 2, + __WGPEER_F_ALL = WGPEER_F_REMOVE_ME | WGPEER_F_REPLACE_ALLOWEDIPS | + WGPEER_F_UPDATE_ONLY +}; +enum wgpeer_attribute { + WGPEER_A_UNSPEC, + WGPEER_A_PUBLIC_KEY, + WGPEER_A_PRESHARED_KEY, + WGPEER_A_FLAGS, + WGPEER_A_ENDPOINT, + WGPEER_A_PERSISTENT_KEEPALIVE_INTERVAL, + WGPEER_A_LAST_HANDSHAKE_TIME, + WGPEER_A_RX_BYTES, + WGPEER_A_TX_BYTES, + WGPEER_A_ALLOWEDIPS, + WGPEER_A_PROTOCOL_VERSION, + __WGPEER_A_LAST +}; +#define WGPEER_A_MAX (__WGPEER_A_LAST - 1) + +enum wgallowedip_attribute { + WGALLOWEDIP_A_UNSPEC, + WGALLOWEDIP_A_FAMILY, + WGALLOWEDIP_A_IPADDR, + WGALLOWEDIP_A_CIDR_MASK, + __WGALLOWEDIP_A_LAST +}; +#define WGALLOWEDIP_A_MAX (__WGALLOWEDIP_A_LAST - 1) + +#endif /* _WG_UAPI_WIREGUARD_H */ diff --git a/src/basic/list.h b/src/basic/list.h new file mode 100644 index 0000000..c0d5af6 --- /dev/null +++ b/src/basic/list.h @@ -0,0 +1,198 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +#include "macro.h" + +/* The head of the linked list. Use this in the structure that shall + * contain the head of the linked list */ +#define LIST_HEAD(t,name) \ + t *name + +/* The pointers in the linked list's items. Use this in the item structure */ +#define LIST_FIELDS(t,name) \ + t *name##_next, *name##_prev + +/* Initialize the list's head */ +#define LIST_HEAD_INIT(head) \ + do { \ + (head) = NULL; \ + } while (false) + +/* Initialize a list item */ +#define LIST_INIT(name,item) \ + do { \ + typeof(*(item)) *_item = (item); \ + assert(_item); \ + _item->name##_prev = _item->name##_next = NULL; \ + } while (false) + +/* Prepend an item to the list */ +#define LIST_PREPEND(name,head,item) \ + do { \ + typeof(*(head)) **_head = &(head), *_item = (item); \ + assert(_item); \ + if ((_item->name##_next = *_head)) \ + _item->name##_next->name##_prev = _item; \ + _item->name##_prev = NULL; \ + *_head = _item; \ + } while (false) + +/* Append an item to the list */ +#define LIST_APPEND(name,head,item) \ + do { \ + typeof(*(head)) **_hhead = &(head), *_tail; \ + LIST_FIND_TAIL(name, *_hhead, _tail); \ + LIST_INSERT_AFTER(name, *_hhead, _tail, item); \ + } while (false) + +/* Remove an item from the list */ +#define LIST_REMOVE(name,head,item) \ + do { \ + typeof(*(head)) **_head = &(head), *_item = (item); \ + assert(_item); \ + if (_item->name##_next) \ + _item->name##_next->name##_prev = _item->name##_prev; \ + if (_item->name##_prev) \ + _item->name##_prev->name##_next = _item->name##_next; \ + else { \ + assert(*_head == _item); \ + *_head = _item->name##_next; \ + } \ + _item->name##_next = _item->name##_prev = NULL; \ + } while (false) + +/* Find the head of the list */ +#define LIST_FIND_HEAD(name,item,head) \ + do { \ + typeof(*(item)) *_item = (item); \ + if (!_item) \ + (head) = NULL; \ + else { \ + while (_item->name##_prev) \ + _item = _item->name##_prev; \ + (head) = _item; \ + } \ + } while (false) + +/* Find the tail of the list */ +#define LIST_FIND_TAIL(name,item,tail) \ + do { \ + typeof(*(item)) *_item = (item); \ + if (!_item) \ + (tail) = NULL; \ + else { \ + while (_item->name##_next) \ + _item = _item->name##_next; \ + (tail) = _item; \ + } \ + } while (false) + +/* Insert an item after another one (a = where, b = what) */ +#define LIST_INSERT_AFTER(name,head,a,b) \ + do { \ + typeof(*(head)) **_head = &(head), *_a = (a), *_b = (b); \ + assert(_b); \ + if (!_a) { \ + if ((_b->name##_next = *_head)) \ + _b->name##_next->name##_prev = _b; \ + _b->name##_prev = NULL; \ + *_head = _b; \ + } else { \ + if ((_b->name##_next = _a->name##_next)) \ + _b->name##_next->name##_prev = _b; \ + _b->name##_prev = _a; \ + _a->name##_next = _b; \ + } \ + } while (false) + +/* Insert an item before another one (a = where, b = what) */ +#define LIST_INSERT_BEFORE(name,head,a,b) \ + do { \ + typeof(*(head)) **_head = &(head), *_a = (a), *_b = (b); \ + assert(_b); \ + if (!_a) { \ + if (!*_head) { \ + _b->name##_next = NULL; \ + _b->name##_prev = NULL; \ + *_head = _b; \ + } else { \ + typeof(*(head)) *_tail = (head); \ + while (_tail->name##_next) \ + _tail = _tail->name##_next; \ + _b->name##_next = NULL; \ + _b->name##_prev = _tail; \ + _tail->name##_next = _b; \ + } \ + } else { \ + if ((_b->name##_prev = _a->name##_prev)) \ + _b->name##_prev->name##_next = _b; \ + else \ + *_head = _b; \ + _b->name##_next = _a; \ + _a->name##_prev = _b; \ + } \ + } while (false) + +#define LIST_JUST_US(name, item) \ + ({ \ + typeof(*(item)) *_item = (item); \ + !(_item)->name##_prev && !(_item)->name##_next; \ + }) + +/* The type of the iterator 'i' is automatically determined by the type of 'head', and declared in the + * loop. Hence, do not declare the same variable in the outer scope. Sometimes, we set 'head' through + * hashmap_get(). In that case, you need to explicitly cast the result. */ +#define LIST_FOREACH_WITH_NEXT(name,i,n,head) \ + for (typeof(*(head)) *n, *i = (head); i && (n = i->name##_next, true); i = n) + +#define LIST_FOREACH(name,i,head) \ + LIST_FOREACH_WITH_NEXT(name, i, UNIQ_T(n, UNIQ), head) + +#define _LIST_FOREACH_WITH_PREV(name,i,p,start) \ + for (typeof(*(start)) *p, *i = (start); i && (p = i->name##_prev, true); i = p) + +#define LIST_FOREACH_BACKWARDS(name,i,start) \ + _LIST_FOREACH_WITH_PREV(name, i, UNIQ_T(p, UNIQ), start) + +/* Iterate through all the members of the list p is included in, but skip over p */ +#define LIST_FOREACH_OTHERS(name,i,p) \ + for (typeof(*(p)) *_p = (p), *i = ({ \ + typeof(*_p) *_j = _p; \ + while (_j && _j->name##_prev) \ + _j = _j->name##_prev; \ + if (_j == _p) \ + _j = _p->name##_next; \ + _j; \ + }); \ + i; \ + i = i->name##_next == _p ? _p->name##_next : i->name##_next) + +/* Loop starting from p->next until p->prev. p can be adjusted meanwhile. */ +#define LIST_LOOP_BUT_ONE(name,i,head,p) \ + for (typeof(*(p)) *i = (p)->name##_next ? (p)->name##_next : (head); \ + i != (p); \ + i = i->name##_next ? i->name##_next : (head)) + +/* Join two lists tail to head: a->b, c->d to a->b->c->d and de-initialise second list */ +#define LIST_JOIN(name,a,b) \ + do { \ + assert(b); \ + if (!(a)) \ + (a) = (b); \ + else { \ + typeof(*(a)) *_head = (b), *_tail; \ + LIST_FIND_TAIL(name, (a), _tail); \ + _tail->name##_next = _head; \ + _head->name##_prev = _tail; \ + } \ + (b) = NULL; \ + } while (false) + +#define LIST_POP(name, a) \ + ({ \ + typeof(a)* _a = &(a); \ + typeof(a) _p = *_a; \ + if (_p) \ + LIST_REMOVE(name, *_a, _p); \ + _p; \ + }) diff --git a/src/basic/locale-util.c b/src/basic/locale-util.c new file mode 100644 index 0000000..60f3d21 --- /dev/null +++ b/src/basic/locale-util.c @@ -0,0 +1,372 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "def.h" +#include "dirent-util.h" +#include "env-util.h" +#include "fd-util.h" +#include "fileio.h" +#include "hashmap.h" +#include "locale-util.h" +#include "missing_syscall.h" +#include "path-util.h" +#include "set.h" +#include "string-table.h" +#include "string-util.h" +#include "strv.h" +#include "utf8.h" + +static char *normalize_locale(const char *name) { + const char *e; + + /* Locale names are weird: glibc has some magic rules when looking for the charset name on disk: it + * lowercases everything, and removes most special chars. This means the official .UTF-8 suffix + * becomes .utf8 when looking things up on disk. When enumerating locales, let's do the reverse + * operation, and go back to ".UTF-8" which appears to be the more commonly accepted name. We only do + * that for UTF-8 however, since it's kinda the only charset that matters. */ + + e = endswith(name, ".utf8"); + if (e) { + _cleanup_free_ char *prefix = NULL; + + prefix = strndup(name, e - name); + if (!prefix) + return NULL; + + return strjoin(prefix, ".UTF-8"); + } + + e = strstr(name, ".utf8@"); + if (e) { + _cleanup_free_ char *prefix = NULL; + + prefix = strndup(name, e - name); + if (!prefix) + return NULL; + + return strjoin(prefix, ".UTF-8@", e + 6); + } + + return strdup(name); +} + +static int add_locales_from_archive(Set *locales) { + /* Stolen from glibc... */ + + struct locarhead { + uint32_t magic; + /* Serial number. */ + uint32_t serial; + /* Name hash table. */ + uint32_t namehash_offset; + uint32_t namehash_used; + uint32_t namehash_size; + /* String table. */ + uint32_t string_offset; + uint32_t string_used; + uint32_t string_size; + /* Table with locale records. */ + uint32_t locrectab_offset; + uint32_t locrectab_used; + uint32_t locrectab_size; + /* MD5 sum hash table. */ + uint32_t sumhash_offset; + uint32_t sumhash_used; + uint32_t sumhash_size; + }; + + struct namehashent { + /* Hash value of the name. */ + uint32_t hashval; + /* Offset of the name in the string table. */ + uint32_t name_offset; + /* Offset of the locale record. */ + uint32_t locrec_offset; + }; + + const struct locarhead *h; + const struct namehashent *e; + const void *p = MAP_FAILED; + _cleanup_close_ int fd = -1; + size_t sz = 0; + struct stat st; + int r; + + fd = open("/usr/lib/locale/locale-archive", O_RDONLY|O_NOCTTY|O_CLOEXEC); + if (fd < 0) + return errno == ENOENT ? 0 : -errno; + + if (fstat(fd, &st) < 0) + return -errno; + + if (!S_ISREG(st.st_mode)) + return -EBADMSG; + + if (st.st_size < (off_t) sizeof(struct locarhead)) + return -EBADMSG; + + if (file_offset_beyond_memory_size(st.st_size)) + return -EFBIG; + + p = mmap(NULL, st.st_size, PROT_READ, MAP_SHARED, fd, 0); + if (p == MAP_FAILED) + return -errno; + + h = (const struct locarhead *) p; + if (h->magic != 0xde020109 || + h->namehash_offset + h->namehash_size > st.st_size || + h->string_offset + h->string_size > st.st_size || + h->locrectab_offset + h->locrectab_size > st.st_size || + h->sumhash_offset + h->sumhash_size > st.st_size) { + r = -EBADMSG; + goto finish; + } + + e = (const struct namehashent*) ((const uint8_t*) p + h->namehash_offset); + for (size_t i = 0; i < h->namehash_size; i++) { + char *z; + + if (e[i].locrec_offset == 0) + continue; + + if (!utf8_is_valid((char*) p + e[i].name_offset)) + continue; + + z = normalize_locale((char*) p + e[i].name_offset); + if (!z) { + r = -ENOMEM; + goto finish; + } + + r = set_consume(locales, z); + if (r < 0) + goto finish; + } + + r = 0; + + finish: + if (p != MAP_FAILED) + munmap((void*) p, sz); + + return r; +} + +static int add_locales_from_libdir(Set *locales) { + _cleanup_closedir_ DIR *dir = NULL; + int r; + + dir = opendir("/usr/lib/locale"); + if (!dir) + return errno == ENOENT ? 0 : -errno; + + FOREACH_DIRENT(de, dir, return -errno) { + char *z; + + if (de->d_type != DT_DIR) + continue; + + z = normalize_locale(de->d_name); + if (!z) + return -ENOMEM; + + r = set_consume(locales, z); + if (r < 0 && r != -EEXIST) + return r; + } + + return 0; +} + +int get_locales(char ***ret) { + _cleanup_set_free_free_ Set *locales = NULL; + _cleanup_strv_free_ char **l = NULL; + int r; + + locales = set_new(&string_hash_ops); + if (!locales) + return -ENOMEM; + + r = add_locales_from_archive(locales); + if (r < 0 && r != -ENOENT) + return r; + + r = add_locales_from_libdir(locales); + if (r < 0) + return r; + + char *locale; + SET_FOREACH(locale, locales) { + r = locale_is_installed(locale); + if (r < 0) + return r; + if (r == 0) + free(set_remove(locales, locale)); + } + + l = set_get_strv(locales); + if (!l) + return -ENOMEM; + + /* Now, all elements are owned by strv 'l'. Hence, do not call set_free_free(). */ + locales = set_free(locales); + + r = getenv_bool("SYSTEMD_LIST_NON_UTF8_LOCALES"); + if (r == -ENXIO || r == 0) { + char **a, **b; + + /* Filter out non-UTF-8 locales, because it's 2019, by default */ + for (a = b = l; *a; a++) { + + if (endswith(*a, "UTF-8") || + strstr(*a, ".UTF-8@")) + *(b++) = *a; + else + free(*a); + } + + *b = NULL; + + } else if (r < 0) + log_debug_errno(r, "Failed to parse $SYSTEMD_LIST_NON_UTF8_LOCALES as boolean"); + + strv_sort(l); + + *ret = TAKE_PTR(l); + + return 0; +} + +bool locale_is_valid(const char *name) { + + if (isempty(name)) + return false; + + if (strlen(name) >= 128) + return false; + + if (!utf8_is_valid(name)) + return false; + + if (!filename_is_valid(name)) + return false; + + if (!string_is_safe(name)) + return false; + + return true; +} + +int locale_is_installed(const char *name) { + if (!locale_is_valid(name)) + return false; + + if (STR_IN_SET(name, "C", "POSIX")) /* These ones are always OK */ + return true; + + _cleanup_(freelocalep) locale_t loc = + newlocale(LC_ALL_MASK, name, 0); + if (loc == (locale_t) 0) + return errno == ENOMEM ? -ENOMEM : false; + + return true; +} + +bool is_locale_utf8(void) { + const char *set; + static int cached_answer = -1; + + /* Note that we default to 'true' here, since today UTF8 is + * pretty much supported everywhere. */ + + if (cached_answer >= 0) + goto out; + + /* This function may be called from libsystemd, and setlocale() is not thread safe. Assuming yes. */ + if (gettid() != raw_getpid()) { + cached_answer = true; + goto out; + } + + if (!setlocale(LC_ALL, "")) { + cached_answer = true; + goto out; + } + + set = nl_langinfo(CODESET); + if (!set) { + cached_answer = true; + goto out; + } + + if (streq(set, "UTF-8")) { + cached_answer = true; + goto out; + } + + /* For LC_CTYPE=="C" return true, because CTYPE is effectively + * unset and everything can do to UTF-8 nowadays. */ + set = setlocale(LC_CTYPE, NULL); + if (!set) { + cached_answer = true; + goto out; + } + + /* Check result, but ignore the result if C was set + * explicitly. */ + cached_answer = + STR_IN_SET(set, "C", "POSIX") && + !getenv("LC_ALL") && + !getenv("LC_CTYPE") && + !getenv("LANG"); + +out: + return (bool) cached_answer; +} + +void locale_variables_free(char *l[_VARIABLE_LC_MAX]) { + if (!l) + return; + + for (LocaleVariable i = 0; i < _VARIABLE_LC_MAX; i++) + l[i] = mfree(l[i]); +} + +void locale_variables_simplify(char *l[_VARIABLE_LC_MAX]) { + assert(l); + + for (LocaleVariable p = 0; p < _VARIABLE_LC_MAX; p++) { + if (p == VARIABLE_LANG) + continue; + if (isempty(l[p]) || streq_ptr(l[VARIABLE_LANG], l[p])) + l[p] = mfree(l[p]); + } +} + +static const char * const locale_variable_table[_VARIABLE_LC_MAX] = { + [VARIABLE_LANG] = "LANG", + [VARIABLE_LANGUAGE] = "LANGUAGE", + [VARIABLE_LC_CTYPE] = "LC_CTYPE", + [VARIABLE_LC_NUMERIC] = "LC_NUMERIC", + [VARIABLE_LC_TIME] = "LC_TIME", + [VARIABLE_LC_COLLATE] = "LC_COLLATE", + [VARIABLE_LC_MONETARY] = "LC_MONETARY", + [VARIABLE_LC_MESSAGES] = "LC_MESSAGES", + [VARIABLE_LC_PAPER] = "LC_PAPER", + [VARIABLE_LC_NAME] = "LC_NAME", + [VARIABLE_LC_ADDRESS] = "LC_ADDRESS", + [VARIABLE_LC_TELEPHONE] = "LC_TELEPHONE", + [VARIABLE_LC_MEASUREMENT] = "LC_MEASUREMENT", + [VARIABLE_LC_IDENTIFICATION] = "LC_IDENTIFICATION" +}; + +DEFINE_STRING_TABLE_LOOKUP(locale_variable, LocaleVariable); diff --git a/src/basic/locale-util.h b/src/basic/locale-util.h new file mode 100644 index 0000000..4176a88 --- /dev/null +++ b/src/basic/locale-util.h @@ -0,0 +1,55 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +#include +#include +#include + +#include "macro.h" + +typedef enum LocaleVariable { + /* We don't list LC_ALL here on purpose. People should be + * using LANG instead. */ + + VARIABLE_LANG, + VARIABLE_LANGUAGE, + VARIABLE_LC_CTYPE, + VARIABLE_LC_NUMERIC, + VARIABLE_LC_TIME, + VARIABLE_LC_COLLATE, + VARIABLE_LC_MONETARY, + VARIABLE_LC_MESSAGES, + VARIABLE_LC_PAPER, + VARIABLE_LC_NAME, + VARIABLE_LC_ADDRESS, + VARIABLE_LC_TELEPHONE, + VARIABLE_LC_MEASUREMENT, + VARIABLE_LC_IDENTIFICATION, + _VARIABLE_LC_MAX, + _VARIABLE_LC_INVALID = -EINVAL, +} LocaleVariable; + +int get_locales(char ***l); +bool locale_is_valid(const char *name); +int locale_is_installed(const char *name); + +#define _(String) gettext(String) +#define N_(String) String + +bool is_locale_utf8(void); + +const char* locale_variable_to_string(LocaleVariable i) _const_; +LocaleVariable locale_variable_from_string(const char *s) _pure_; + +static inline void freelocalep(locale_t *p) { + if (*p == (locale_t) 0) + return; + + freelocale(*p); +} + +void locale_variables_free(char* l[_VARIABLE_LC_MAX]); +static inline void locale_variables_freep(char*(*l)[_VARIABLE_LC_MAX]) { + locale_variables_free(*l); +} +void locale_variables_simplify(char *l[_VARIABLE_LC_MAX]); diff --git a/src/basic/log.c b/src/basic/log.c new file mode 100644 index 0000000..3162eab --- /dev/null +++ b/src/basic/log.c @@ -0,0 +1,1524 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "sd-messages.h" + +#include "alloc-util.h" +#include "errno-util.h" +#include "fd-util.h" +#include "format-util.h" +#include "io-util.h" +#include "log.h" +#include "macro.h" +#include "missing_syscall.h" +#include "missing_threads.h" +#include "parse-util.h" +#include "proc-cmdline.h" +#include "process-util.h" +#include "ratelimit.h" +#include "signal-util.h" +#include "socket-util.h" +#include "stdio-util.h" +#include "string-table.h" +#include "string-util.h" +#include "syslog-util.h" +#include "terminal-util.h" +#include "time-util.h" +#include "utf8.h" + +#define SNDBUF_SIZE (8*1024*1024) + +static log_syntax_callback_t log_syntax_callback = NULL; +static void *log_syntax_callback_userdata = NULL; + +static LogTarget log_target = LOG_TARGET_CONSOLE; +static int log_max_level = LOG_INFO; +static int log_facility = LOG_DAEMON; + +static int console_fd = STDERR_FILENO; +static int console_fd_is_tty = -1; /* tri-state: -1 means don't know */ +static int syslog_fd = -1; +static int kmsg_fd = -1; +static int journal_fd = -1; + +static bool syslog_is_stream = false; + +static int show_color = -1; /* tristate */ +static bool show_location = false; +static bool show_time = false; +static bool show_tid = false; + +static bool upgrade_syslog_to_journal = false; +static bool always_reopen_console = false; +static bool open_when_needed = false; +static bool prohibit_ipc = false; + +/* Akin to glibc's __abort_msg; which is private and we hence cannot + * use here. */ +static char *log_abort_msg = NULL; + +#if LOG_MESSAGE_VERIFICATION || defined(__COVERITY__) +bool _log_message_dummy = false; /* Always false */ +#endif + +/* An assert to use in logging functions that does not call recursively + * into our logging functions (since that might lead to a loop). */ +#define assert_raw(expr) \ + do { \ + if (_unlikely_(!(expr))) { \ + fputs(#expr "\n", stderr); \ + abort(); \ + } \ + } while (false) + +static void log_close_console(void) { + /* See comment in log_close_journal() */ + (void) safe_close_above_stdio(TAKE_FD(console_fd)); + console_fd_is_tty = -1; +} + +static int log_open_console(void) { + + if (!always_reopen_console) { + console_fd = STDERR_FILENO; + console_fd_is_tty = -1; + return 0; + } + + if (console_fd < 3) { + int fd; + + fd = open_terminal("/dev/console", O_WRONLY|O_NOCTTY|O_CLOEXEC); + if (fd < 0) + return fd; + + console_fd = fd_move_above_stdio(fd); + console_fd_is_tty = true; + } + + return 0; +} + +static void log_close_kmsg(void) { + /* See comment in log_close_journal() */ + (void) safe_close(TAKE_FD(kmsg_fd)); +} + +static int log_open_kmsg(void) { + + if (kmsg_fd >= 0) + return 0; + + kmsg_fd = open("/dev/kmsg", O_WRONLY|O_NOCTTY|O_CLOEXEC); + if (kmsg_fd < 0) + return -errno; + + kmsg_fd = fd_move_above_stdio(kmsg_fd); + return 0; +} + +static void log_close_syslog(void) { + /* See comment in log_close_journal() */ + (void) safe_close(TAKE_FD(syslog_fd)); +} + +static int create_log_socket(int type) { + struct timeval tv; + int fd; + + fd = socket(AF_UNIX, type|SOCK_CLOEXEC, 0); + if (fd < 0) + return -errno; + + fd = fd_move_above_stdio(fd); + (void) fd_inc_sndbuf(fd, SNDBUF_SIZE); + + /* We need a blocking fd here since we'd otherwise lose messages way too early. However, let's not hang forever + * in the unlikely case of a deadlock. */ + if (getpid_cached() == 1) + timeval_store(&tv, 10 * USEC_PER_MSEC); + else + timeval_store(&tv, 10 * USEC_PER_SEC); + (void) setsockopt(fd, SOL_SOCKET, SO_SNDTIMEO, &tv, sizeof(tv)); + + return fd; +} + +static int log_open_syslog(void) { + int r; + + if (syslog_fd >= 0) + return 0; + + syslog_fd = create_log_socket(SOCK_DGRAM); + if (syslog_fd < 0) { + r = syslog_fd; + goto fail; + } + + r = connect_unix_path(syslog_fd, AT_FDCWD, "/dev/log"); + if (r < 0) { + safe_close(syslog_fd); + + /* Some legacy syslog systems still use stream sockets. They really shouldn't. But what can + * we do... */ + syslog_fd = create_log_socket(SOCK_STREAM); + if (syslog_fd < 0) { + r = syslog_fd; + goto fail; + } + + r = connect_unix_path(syslog_fd, AT_FDCWD, "/dev/log"); + if (r < 0) + goto fail; + + syslog_is_stream = true; + } else + syslog_is_stream = false; + + return 0; + +fail: + log_close_syslog(); + return r; +} + +static void log_close_journal(void) { + /* If the journal FD is bad, safe_close will fail, and will try to log, which will fail, so we'll + * try to close the journal FD, which is bad, so safe_close will fail... Whether we can close it + * or not, invalidate it immediately so that we don't get in a recursive loop until we run out of + * stack. */ + (void) safe_close(TAKE_FD(journal_fd)); +} + +static int log_open_journal(void) { + int r; + + if (journal_fd >= 0) + return 0; + + journal_fd = create_log_socket(SOCK_DGRAM); + if (journal_fd < 0) { + r = journal_fd; + goto fail; + } + + r = connect_unix_path(journal_fd, AT_FDCWD, "/run/systemd/journal/socket"); + if (r < 0) + goto fail; + + return 0; + +fail: + log_close_journal(); + return r; +} + +static bool stderr_is_journal(void) { + _cleanup_free_ char *w = NULL; + const char *e; + uint64_t dev, ino; + struct stat st; + + e = getenv("JOURNAL_STREAM"); + if (!e) + return false; + + if (extract_first_word(&e, &w, ":", EXTRACT_DONT_COALESCE_SEPARATORS) <= 0) + return false; + if (!e) + return false; + + if (safe_atou64(w, &dev) < 0) + return false; + if (safe_atou64(e, &ino) < 0) + return false; + + if (fstat(STDERR_FILENO, &st) < 0) + return false; + + return st.st_dev == dev && st.st_ino == ino; +} + +int log_open(void) { + int r; + + /* Do not call from library code. */ + + /* This function is often called in preparation for logging. Let's make sure we don't clobber errno, + * so that a call to a logging function immediately following a log_open() call can still easily + * reference an error that happened immediately before the log_open() call. */ + PROTECT_ERRNO; + + /* If we don't use the console, we close it here to not get killed by SAK. If we don't use syslog, we + * close it here too, so that we are not confused by somebody deleting the socket in the fs, and to + * make sure we don't use it if prohibit_ipc is set. If we don't use /dev/kmsg we still keep it open, + * because there is no reason to close it. */ + + if (log_target == LOG_TARGET_NULL) { + log_close_journal(); + log_close_syslog(); + log_close_console(); + return 0; + } + + if (getpid_cached() == 1 || + stderr_is_journal() || + IN_SET(log_target, + LOG_TARGET_KMSG, + LOG_TARGET_JOURNAL, + LOG_TARGET_JOURNAL_OR_KMSG, + LOG_TARGET_SYSLOG, + LOG_TARGET_SYSLOG_OR_KMSG)) { + + if (!prohibit_ipc) { + if (IN_SET(log_target, + LOG_TARGET_AUTO, + LOG_TARGET_JOURNAL_OR_KMSG, + LOG_TARGET_JOURNAL)) { + + r = log_open_journal(); + if (r >= 0) { + log_close_syslog(); + log_close_console(); + return r; + } + } + + if (IN_SET(log_target, + LOG_TARGET_SYSLOG_OR_KMSG, + LOG_TARGET_SYSLOG)) { + + r = log_open_syslog(); + if (r >= 0) { + log_close_journal(); + log_close_console(); + return r; + } + } + } + + if (IN_SET(log_target, LOG_TARGET_AUTO, + LOG_TARGET_JOURNAL_OR_KMSG, + LOG_TARGET_SYSLOG_OR_KMSG, + LOG_TARGET_KMSG)) { + r = log_open_kmsg(); + if (r >= 0) { + log_close_journal(); + log_close_syslog(); + log_close_console(); + return r; + } + } + } + + log_close_journal(); + log_close_syslog(); + + return log_open_console(); +} + +void log_set_target(LogTarget target) { + assert(target >= 0); + assert(target < _LOG_TARGET_MAX); + + if (upgrade_syslog_to_journal) { + if (target == LOG_TARGET_SYSLOG) + target = LOG_TARGET_JOURNAL; + else if (target == LOG_TARGET_SYSLOG_OR_KMSG) + target = LOG_TARGET_JOURNAL_OR_KMSG; + } + + log_target = target; +} + +void log_close(void) { + /* Do not call from library code. */ + + log_close_journal(); + log_close_syslog(); + log_close_kmsg(); + log_close_console(); +} + +void log_forget_fds(void) { + /* Do not call from library code. */ + + console_fd = kmsg_fd = syslog_fd = journal_fd = -1; + console_fd_is_tty = -1; +} + +void log_set_max_level(int level) { + assert(level == LOG_NULL || (level & LOG_PRIMASK) == level); + + log_max_level = level; +} + +void log_set_facility(int facility) { + log_facility = facility; +} + +static bool check_console_fd_is_tty(void) { + if (console_fd < 0) + return false; + + if (console_fd_is_tty < 0) + console_fd_is_tty = isatty(console_fd) > 0; + + return console_fd_is_tty; +} + +static int write_to_console( + int level, + int error, + const char *file, + int line, + const char *func, + const char *buffer) { + + char location[256], + header_time[FORMAT_TIMESTAMP_MAX], + prefix[1 + DECIMAL_STR_MAX(int) + 2], + tid_string[3 + DECIMAL_STR_MAX(pid_t) + 1]; + struct iovec iovec[9]; + const char *on = NULL, *off = NULL; + size_t n = 0; + + if (console_fd < 0) + return 0; + + if (log_target == LOG_TARGET_CONSOLE_PREFIXED) { + xsprintf(prefix, "<%i>", level); + iovec[n++] = IOVEC_MAKE_STRING(prefix); + } + + if (show_time && + format_timestamp(header_time, sizeof(header_time), now(CLOCK_REALTIME))) { + iovec[n++] = IOVEC_MAKE_STRING(header_time); + iovec[n++] = IOVEC_MAKE_STRING(" "); + } + + if (show_tid) { + xsprintf(tid_string, "(" PID_FMT ") ", gettid()); + iovec[n++] = IOVEC_MAKE_STRING(tid_string); + } + + if (log_get_show_color()) + get_log_colors(LOG_PRI(level), &on, &off, NULL); + + if (show_location) { + const char *lon = "", *loff = ""; + if (log_get_show_color()) { + lon = ansi_highlight_yellow4(); + loff = ansi_normal(); + } + + (void) snprintf(location, sizeof location, "%s%s:%i%s: ", lon, file, line, loff); + iovec[n++] = IOVEC_MAKE_STRING(location); + } + + if (on) + iovec[n++] = IOVEC_MAKE_STRING(on); + iovec[n++] = IOVEC_MAKE_STRING(buffer); + if (off) + iovec[n++] = IOVEC_MAKE_STRING(off); + + /* When writing to a TTY we output an extra '\r' (i.e. CR) first, to generate CRNL rather than just + * NL. This is a robustness thing in case the TTY is currently in raw mode (specifically: has the + * ONLCR flag off). We want that subsequent output definitely starts at the beginning of the line + * again, after all. If the TTY is not in raw mode the extra CR should not hurt. */ + iovec[n++] = IOVEC_MAKE_STRING((check_console_fd_is_tty() ? "\r\n" : "\n")); + + if (writev(console_fd, iovec, n) < 0) { + + if (errno == EIO && getpid_cached() == 1) { + + /* If somebody tried to kick us from our console tty (via vhangup() or suchlike), try + * to reconnect. */ + + log_close_console(); + (void) log_open_console(); + if (console_fd < 0) + return 0; + + if (writev(console_fd, iovec, n) < 0) + return -errno; + } else + return -errno; + } + + return 1; +} + +static int write_to_syslog( + int level, + int error, + const char *file, + int line, + const char *func, + const char *buffer) { + + char header_priority[2 + DECIMAL_STR_MAX(int) + 1], + header_time[64], + header_pid[4 + DECIMAL_STR_MAX(pid_t) + 1]; + time_t t; + struct tm tm; + + if (syslog_fd < 0) + return 0; + + xsprintf(header_priority, "<%i>", level); + + t = (time_t) (now(CLOCK_REALTIME) / USEC_PER_SEC); + if (!localtime_r(&t, &tm)) + return -EINVAL; + + if (strftime(header_time, sizeof(header_time), "%h %e %T ", &tm) <= 0) + return -EINVAL; + + xsprintf(header_pid, "["PID_FMT"]: ", getpid_cached()); + + struct iovec iovec[] = { + IOVEC_MAKE_STRING(header_priority), + IOVEC_MAKE_STRING(header_time), + IOVEC_MAKE_STRING(program_invocation_short_name), + IOVEC_MAKE_STRING(header_pid), + IOVEC_MAKE_STRING(buffer), + }; + const struct msghdr msghdr = { + .msg_iov = iovec, + .msg_iovlen = ELEMENTSOF(iovec), + }; + + /* When using syslog via SOCK_STREAM separate the messages by NUL chars */ + if (syslog_is_stream) + iovec[ELEMENTSOF(iovec) - 1].iov_len++; + + for (;;) { + ssize_t n; + + n = sendmsg(syslog_fd, &msghdr, MSG_NOSIGNAL); + if (n < 0) + return -errno; + + if (!syslog_is_stream) + break; + + if (IOVEC_INCREMENT(iovec, ELEMENTSOF(iovec), n)) + break; + } + + return 1; +} + +static int write_to_kmsg( + int level, + int error, + const char *file, + int line, + const char *func, + const char *buffer) { + + /* Set a ratelimit on the amount of messages logged to /dev/kmsg. This is mostly supposed to be a + * safety catch for the case where start indiscriminately logging in a loop. It will not catch cases + * where we log excessively, but not in a tight loop. + * + * Note that this ratelimit is per-emitter, so we might still overwhelm /dev/kmsg with multiple + * loggers. + */ + static thread_local RateLimit ratelimit = { 5 * USEC_PER_SEC, 200 }; + + char header_priority[2 + DECIMAL_STR_MAX(int) + 1], + header_pid[4 + DECIMAL_STR_MAX(pid_t) + 1]; + + if (kmsg_fd < 0) + return 0; + + if (!ratelimit_below(&ratelimit)) + return 0; + + xsprintf(header_priority, "<%i>", level); + xsprintf(header_pid, "["PID_FMT"]: ", getpid_cached()); + + const struct iovec iovec[] = { + IOVEC_MAKE_STRING(header_priority), + IOVEC_MAKE_STRING(program_invocation_short_name), + IOVEC_MAKE_STRING(header_pid), + IOVEC_MAKE_STRING(buffer), + IOVEC_MAKE_STRING("\n"), + }; + + if (writev(kmsg_fd, iovec, ELEMENTSOF(iovec)) < 0) + return -errno; + + return 1; +} + +static int log_do_header( + char *header, + size_t size, + int level, + int error, + const char *file, int line, const char *func, + const char *object_field, const char *object, + const char *extra_field, const char *extra) { + int r; + + error = IS_SYNTHETIC_ERRNO(error) ? 0 : ERRNO_VALUE(error); + + r = snprintf(header, size, + "PRIORITY=%i\n" + "SYSLOG_FACILITY=%i\n" + "TID=" PID_FMT "\n" + "%s%.256s%s" /* CODE_FILE */ + "%s%.*i%s" /* CODE_LINE */ + "%s%.256s%s" /* CODE_FUNC */ + "%s%.*i%s" /* ERRNO */ + "%s%.256s%s" /* object */ + "%s%.256s%s" /* extra */ + "SYSLOG_IDENTIFIER=%.256s\n", + LOG_PRI(level), + LOG_FAC(level), + gettid(), + isempty(file) ? "" : "CODE_FILE=", + isempty(file) ? "" : file, + isempty(file) ? "" : "\n", + line ? "CODE_LINE=" : "", + line ? 1 : 0, line, /* %.0d means no output too, special case for 0 */ + line ? "\n" : "", + isempty(func) ? "" : "CODE_FUNC=", + isempty(func) ? "" : func, + isempty(func) ? "" : "\n", + error ? "ERRNO=" : "", + error ? 1 : 0, error, + error ? "\n" : "", + isempty(object) ? "" : object_field, + isempty(object) ? "" : object, + isempty(object) ? "" : "\n", + isempty(extra) ? "" : extra_field, + isempty(extra) ? "" : extra, + isempty(extra) ? "" : "\n", + program_invocation_short_name); + assert_raw((size_t) r < size); + + return 0; +} + +static int write_to_journal( + int level, + int error, + const char *file, + int line, + const char *func, + const char *object_field, + const char *object, + const char *extra_field, + const char *extra, + const char *buffer) { + + char header[LINE_MAX]; + + if (journal_fd < 0) + return 0; + + log_do_header(header, sizeof(header), level, error, file, line, func, object_field, object, extra_field, extra); + + struct iovec iovec[4] = { + IOVEC_MAKE_STRING(header), + IOVEC_MAKE_STRING("MESSAGE="), + IOVEC_MAKE_STRING(buffer), + IOVEC_MAKE_STRING("\n"), + }; + const struct msghdr msghdr = { + .msg_iov = iovec, + .msg_iovlen = ELEMENTSOF(iovec), + }; + + if (sendmsg(journal_fd, &msghdr, MSG_NOSIGNAL) < 0) + return -errno; + + return 1; +} + +int log_dispatch_internal( + int level, + int error, + const char *file, + int line, + const char *func, + const char *object_field, + const char *object, + const char *extra_field, + const char *extra, + char *buffer) { + + assert_raw(buffer); + + if (log_target == LOG_TARGET_NULL) + return -ERRNO_VALUE(error); + + /* Patch in LOG_DAEMON facility if necessary */ + if ((level & LOG_FACMASK) == 0) + level |= log_facility; + + if (open_when_needed) + (void) log_open(); + + do { + char *e; + int k = 0; + + buffer += strspn(buffer, NEWLINE); + + if (buffer[0] == 0) + break; + + if ((e = strpbrk(buffer, NEWLINE))) + *(e++) = 0; + + if (IN_SET(log_target, LOG_TARGET_AUTO, + LOG_TARGET_JOURNAL_OR_KMSG, + LOG_TARGET_JOURNAL)) { + + k = write_to_journal(level, error, file, line, func, object_field, object, extra_field, extra, buffer); + if (k < 0 && k != -EAGAIN) + log_close_journal(); + } + + if (IN_SET(log_target, LOG_TARGET_SYSLOG_OR_KMSG, + LOG_TARGET_SYSLOG)) { + + k = write_to_syslog(level, error, file, line, func, buffer); + if (k < 0 && k != -EAGAIN) + log_close_syslog(); + } + + if (k <= 0 && + IN_SET(log_target, LOG_TARGET_AUTO, + LOG_TARGET_SYSLOG_OR_KMSG, + LOG_TARGET_JOURNAL_OR_KMSG, + LOG_TARGET_KMSG)) { + + if (k < 0) + log_open_kmsg(); + + k = write_to_kmsg(level, error, file, line, func, buffer); + if (k < 0) { + log_close_kmsg(); + (void) log_open_console(); + } + } + + if (k <= 0) + (void) write_to_console(level, error, file, line, func, buffer); + + buffer = e; + } while (buffer); + + if (open_when_needed) + log_close(); + + return -ERRNO_VALUE(error); +} + +int log_dump_internal( + int level, + int error, + const char *file, + int line, + const char *func, + char *buffer) { + + PROTECT_ERRNO; + + /* This modifies the buffer... */ + + if (_likely_(LOG_PRI(level) > log_max_level)) + return -ERRNO_VALUE(error); + + return log_dispatch_internal(level, error, file, line, func, NULL, NULL, NULL, NULL, buffer); +} + +int log_internalv( + int level, + int error, + const char *file, + int line, + const char *func, + const char *format, + va_list ap) { + + if (_likely_(LOG_PRI(level) > log_max_level)) + return -ERRNO_VALUE(error); + + /* Make sure that %m maps to the specified error (or "Success"). */ + char buffer[LINE_MAX]; + LOCAL_ERRNO(ERRNO_VALUE(error)); + + (void) vsnprintf(buffer, sizeof buffer, format, ap); + + return log_dispatch_internal(level, error, file, line, func, NULL, NULL, NULL, NULL, buffer); +} + +int log_internal( + int level, + int error, + const char *file, + int line, + const char *func, + const char *format, ...) { + + va_list ap; + int r; + + va_start(ap, format); + r = log_internalv(level, error, file, line, func, format, ap); + va_end(ap); + + return r; +} + +int log_object_internalv( + int level, + int error, + const char *file, + int line, + const char *func, + const char *object_field, + const char *object, + const char *extra_field, + const char *extra, + const char *format, + va_list ap) { + + char *buffer, *b; + + if (_likely_(LOG_PRI(level) > log_max_level)) + return -ERRNO_VALUE(error); + + /* Make sure that %m maps to the specified error (or "Success"). */ + LOCAL_ERRNO(ERRNO_VALUE(error)); + + /* Prepend the object name before the message */ + if (object) { + size_t n; + + n = strlen(object); + buffer = newa(char, n + 2 + LINE_MAX); + b = stpcpy(stpcpy(buffer, object), ": "); + } else + b = buffer = newa(char, LINE_MAX); + + (void) vsnprintf(b, LINE_MAX, format, ap); + + return log_dispatch_internal(level, error, file, line, func, + object_field, object, extra_field, extra, buffer); +} + +int log_object_internal( + int level, + int error, + const char *file, + int line, + const char *func, + const char *object_field, + const char *object, + const char *extra_field, + const char *extra, + const char *format, ...) { + + va_list ap; + int r; + + va_start(ap, format); + r = log_object_internalv(level, error, file, line, func, object_field, object, extra_field, extra, format, ap); + va_end(ap); + + return r; +} + +static void log_assert( + int level, + const char *text, + const char *file, + int line, + const char *func, + const char *format) { + + static char buffer[LINE_MAX]; + + if (_likely_(LOG_PRI(level) > log_max_level)) + return; + + DISABLE_WARNING_FORMAT_NONLITERAL; + (void) snprintf(buffer, sizeof buffer, format, text, file, line, func); + REENABLE_WARNING; + + log_abort_msg = buffer; + + log_dispatch_internal(level, 0, file, line, func, NULL, NULL, NULL, NULL, buffer); +} + +_noreturn_ void log_assert_failed( + const char *text, + const char *file, + int line, + const char *func) { + log_assert(LOG_CRIT, text, file, line, func, + "Assertion '%s' failed at %s:%u, function %s(). Aborting."); + abort(); +} + +_noreturn_ void log_assert_failed_unreachable( + const char *file, + int line, + const char *func) { + log_assert(LOG_CRIT, "Code should not be reached", file, line, func, + "%s at %s:%u, function %s(). Aborting. 💥"); + abort(); +} + +void log_assert_failed_return( + const char *text, + const char *file, + int line, + const char *func) { + PROTECT_ERRNO; + log_assert(LOG_DEBUG, text, file, line, func, + "Assertion '%s' failed at %s:%u, function %s(). Ignoring."); +} + +int log_oom_internal(int level, const char *file, int line, const char *func) { + return log_internal(level, ENOMEM, file, line, func, "Out of memory."); +} + +int log_format_iovec( + struct iovec *iovec, + size_t iovec_len, + size_t *n, + bool newline_separator, + int error, + const char *format, + va_list ap) { + + static const char nl = '\n'; + + while (format && *n + 1 < iovec_len) { + va_list aq; + char *m; + int r; + + /* We need to copy the va_list structure, + * since vasprintf() leaves it afterwards at + * an undefined location */ + + errno = ERRNO_VALUE(error); + + va_copy(aq, ap); + r = vasprintf(&m, format, aq); + va_end(aq); + if (r < 0) + return -EINVAL; + + /* Now, jump enough ahead, so that we point to + * the next format string */ + VA_FORMAT_ADVANCE(format, ap); + + iovec[(*n)++] = IOVEC_MAKE_STRING(m); + if (newline_separator) + iovec[(*n)++] = IOVEC_MAKE((char *)&nl, 1); + + format = va_arg(ap, char *); + } + return 0; +} + +int log_struct_internal( + int level, + int error, + const char *file, + int line, + const char *func, + const char *format, ...) { + + char buf[LINE_MAX]; + bool found = false; + PROTECT_ERRNO; + va_list ap; + + if (_likely_(LOG_PRI(level) > log_max_level) || + log_target == LOG_TARGET_NULL) + return -ERRNO_VALUE(error); + + if ((level & LOG_FACMASK) == 0) + level |= log_facility; + + if (IN_SET(log_target, + LOG_TARGET_AUTO, + LOG_TARGET_JOURNAL_OR_KMSG, + LOG_TARGET_JOURNAL)) { + + if (open_when_needed) + log_open_journal(); + + if (journal_fd >= 0) { + char header[LINE_MAX]; + struct iovec iovec[17]; + size_t n = 0; + int r; + bool fallback = false; + + /* If the journal is available do structured logging. + * Do not report the errno if it is synthetic. */ + log_do_header(header, sizeof(header), level, error, file, line, func, NULL, NULL, NULL, NULL); + iovec[n++] = IOVEC_MAKE_STRING(header); + + va_start(ap, format); + r = log_format_iovec(iovec, ELEMENTSOF(iovec), &n, true, error, format, ap); + if (r < 0) + fallback = true; + else { + const struct msghdr msghdr = { + .msg_iov = iovec, + .msg_iovlen = n, + }; + + (void) sendmsg(journal_fd, &msghdr, MSG_NOSIGNAL); + } + + va_end(ap); + for (size_t i = 1; i < n; i += 2) + free(iovec[i].iov_base); + + if (!fallback) { + if (open_when_needed) + log_close(); + + return -ERRNO_VALUE(error); + } + } + } + + /* Fallback if journal logging is not available or didn't work. */ + + va_start(ap, format); + while (format) { + va_list aq; + + errno = ERRNO_VALUE(error); + + va_copy(aq, ap); + (void) vsnprintf(buf, sizeof buf, format, aq); + va_end(aq); + + if (startswith(buf, "MESSAGE=")) { + found = true; + break; + } + + VA_FORMAT_ADVANCE(format, ap); + + format = va_arg(ap, char *); + } + va_end(ap); + + if (!found) { + if (open_when_needed) + log_close(); + + return -ERRNO_VALUE(error); + } + + return log_dispatch_internal(level, error, file, line, func, NULL, NULL, NULL, NULL, buf + 8); +} + +int log_struct_iovec_internal( + int level, + int error, + const char *file, + int line, + const char *func, + const struct iovec input_iovec[], + size_t n_input_iovec) { + + PROTECT_ERRNO; + + if (_likely_(LOG_PRI(level) > log_max_level) || + log_target == LOG_TARGET_NULL) + return -ERRNO_VALUE(error); + + if ((level & LOG_FACMASK) == 0) + level |= log_facility; + + if (IN_SET(log_target, LOG_TARGET_AUTO, + LOG_TARGET_JOURNAL_OR_KMSG, + LOG_TARGET_JOURNAL) && + journal_fd >= 0) { + + char header[LINE_MAX]; + log_do_header(header, sizeof(header), level, error, file, line, func, NULL, NULL, NULL, NULL); + + struct iovec iovec[1 + n_input_iovec*2]; + iovec[0] = IOVEC_MAKE_STRING(header); + for (size_t i = 0; i < n_input_iovec; i++) { + iovec[1+i*2] = input_iovec[i]; + iovec[1+i*2+1] = IOVEC_MAKE_STRING("\n"); + } + + const struct msghdr msghdr = { + .msg_iov = iovec, + .msg_iovlen = 1 + n_input_iovec*2, + }; + + if (sendmsg(journal_fd, &msghdr, MSG_NOSIGNAL) >= 0) + return -ERRNO_VALUE(error); + } + + for (size_t i = 0; i < n_input_iovec; i++) + if (memory_startswith(input_iovec[i].iov_base, input_iovec[i].iov_len, "MESSAGE=")) { + char *m; + + m = strndupa_safe((char*) input_iovec[i].iov_base + STRLEN("MESSAGE="), + input_iovec[i].iov_len - STRLEN("MESSAGE=")); + + return log_dispatch_internal(level, error, file, line, func, NULL, NULL, NULL, NULL, m); + } + + /* Couldn't find MESSAGE=. */ + return -ERRNO_VALUE(error); +} + +int log_set_target_from_string(const char *e) { + LogTarget t; + + t = log_target_from_string(e); + if (t < 0) + return t; + + log_set_target(t); + return 0; +} + +int log_set_max_level_from_string(const char *e) { + int t; + + t = log_level_from_string(e); + if (t < 0) + return t; + + log_set_max_level(t); + return 0; +} + +static int parse_proc_cmdline_item(const char *key, const char *value, void *data) { + + /* + * The systemd.log_xyz= settings are parsed by all tools, and + * so is "debug". + * + * However, "quiet" is only parsed by PID 1, and only turns of + * status output to /dev/console, but does not alter the log + * level. + */ + + if (streq(key, "debug") && !value) + log_set_max_level(LOG_DEBUG); + + else if (proc_cmdline_key_streq(key, "systemd.log_target")) { + + if (proc_cmdline_value_missing(key, value)) + return 0; + + if (log_set_target_from_string(value) < 0) + log_warning("Failed to parse log target '%s'. Ignoring.", value); + + } else if (proc_cmdline_key_streq(key, "systemd.log_level")) { + + if (proc_cmdline_value_missing(key, value)) + return 0; + + if (log_set_max_level_from_string(value) < 0) + log_warning("Failed to parse log level '%s'. Ignoring.", value); + + } else if (proc_cmdline_key_streq(key, "systemd.log_color")) { + + if (log_show_color_from_string(value ?: "1") < 0) + log_warning("Failed to parse log color setting '%s'. Ignoring.", value); + + } else if (proc_cmdline_key_streq(key, "systemd.log_location")) { + + if (log_show_location_from_string(value ?: "1") < 0) + log_warning("Failed to parse log location setting '%s'. Ignoring.", value); + + } else if (proc_cmdline_key_streq(key, "systemd.log_tid")) { + + if (log_show_tid_from_string(value ?: "1") < 0) + log_warning("Failed to parse log tid setting '%s'. Ignoring.", value); + + } else if (proc_cmdline_key_streq(key, "systemd.log_time")) { + + if (log_show_time_from_string(value ?: "1") < 0) + log_warning("Failed to parse log time setting '%s'. Ignoring.", value); + + } + + return 0; +} + +static bool should_parse_proc_cmdline(void) { + /* PID1 always reads the kernel command line. */ + if (getpid_cached() == 1) + return true; + + /* Otherwise, parse the commandline if invoked directly by systemd. */ + return invoked_by_systemd(); +} + +void log_parse_environment_variables(void) { + const char *e; + + e = getenv("SYSTEMD_LOG_TARGET"); + if (e && log_set_target_from_string(e) < 0) + log_warning("Failed to parse log target '%s'. Ignoring.", e); + + e = getenv("SYSTEMD_LOG_LEVEL"); + if (e && log_set_max_level_from_string(e) < 0) + log_warning("Failed to parse log level '%s'. Ignoring.", e); + + e = getenv("SYSTEMD_LOG_COLOR"); + if (e && log_show_color_from_string(e) < 0) + log_warning("Failed to parse log color '%s'. Ignoring.", e); + + e = getenv("SYSTEMD_LOG_LOCATION"); + if (e && log_show_location_from_string(e) < 0) + log_warning("Failed to parse log location '%s'. Ignoring.", e); + + e = getenv("SYSTEMD_LOG_TIME"); + if (e && log_show_time_from_string(e) < 0) + log_warning("Failed to parse log time '%s'. Ignoring.", e); + + e = getenv("SYSTEMD_LOG_TID"); + if (e && log_show_tid_from_string(e) < 0) + log_warning("Failed to parse log tid '%s'. Ignoring.", e); +} + +void log_parse_environment(void) { + /* Do not call from library code. */ + + if (should_parse_proc_cmdline()) + (void) proc_cmdline_parse(parse_proc_cmdline_item, NULL, PROC_CMDLINE_STRIP_RD_PREFIX); + + log_parse_environment_variables(); +} + +LogTarget log_get_target(void) { + return log_target; +} + +void log_settle_target(void) { + + /* If we're using LOG_TARGET_AUTO and opening the log again on every single log call, we'll check if + * stderr is attached to the journal every single log call. However, if we then close all file + * descriptors later, that will stop working because stderr will be closed as well. To avoid that + * problem, this function is used to permanently change the log target depending on whether stderr is + * connected to the journal or not. */ + + LogTarget t = log_get_target(); + + if (t != LOG_TARGET_AUTO) + return; + + t = getpid_cached() == 1 || stderr_is_journal() ? (prohibit_ipc ? LOG_TARGET_KMSG : LOG_TARGET_JOURNAL_OR_KMSG) + : LOG_TARGET_CONSOLE; + log_set_target(t); +} + +int log_get_max_level(void) { + return log_max_level; +} + +void log_show_color(bool b) { + show_color = b; +} + +bool log_get_show_color(void) { + return show_color > 0; /* Defaults to false. */ +} + +void log_show_location(bool b) { + show_location = b; +} + +bool log_get_show_location(void) { + return show_location; +} + +void log_show_time(bool b) { + show_time = b; +} + +bool log_get_show_time(void) { + return show_time; +} + +void log_show_tid(bool b) { + show_tid = b; +} + +bool log_get_show_tid(void) { + return show_tid; +} + +int log_show_color_from_string(const char *e) { + int t; + + t = parse_boolean(e); + if (t < 0) + return t; + + log_show_color(t); + return 0; +} + +int log_show_location_from_string(const char *e) { + int t; + + t = parse_boolean(e); + if (t < 0) + return t; + + log_show_location(t); + return 0; +} + +int log_show_time_from_string(const char *e) { + int t; + + t = parse_boolean(e); + if (t < 0) + return t; + + log_show_time(t); + return 0; +} + +int log_show_tid_from_string(const char *e) { + int t; + + t = parse_boolean(e); + if (t < 0) + return t; + + log_show_tid(t); + return 0; +} + +bool log_on_console(void) { + if (IN_SET(log_target, LOG_TARGET_CONSOLE, + LOG_TARGET_CONSOLE_PREFIXED)) + return true; + + return syslog_fd < 0 && kmsg_fd < 0 && journal_fd < 0; +} + +static const char *const log_target_table[_LOG_TARGET_MAX] = { + [LOG_TARGET_CONSOLE] = "console", + [LOG_TARGET_CONSOLE_PREFIXED] = "console-prefixed", + [LOG_TARGET_KMSG] = "kmsg", + [LOG_TARGET_JOURNAL] = "journal", + [LOG_TARGET_JOURNAL_OR_KMSG] = "journal-or-kmsg", + [LOG_TARGET_SYSLOG] = "syslog", + [LOG_TARGET_SYSLOG_OR_KMSG] = "syslog-or-kmsg", + [LOG_TARGET_AUTO] = "auto", + [LOG_TARGET_NULL] = "null", +}; + +DEFINE_STRING_TABLE_LOOKUP(log_target, LogTarget); + +void log_received_signal(int level, const struct signalfd_siginfo *si) { + assert(si); + + if (pid_is_valid(si->ssi_pid)) { + _cleanup_free_ char *p = NULL; + + (void) get_process_comm(si->ssi_pid, &p); + + log_full(level, + "Received SIG%s from PID %"PRIu32" (%s).", + signal_to_string(si->ssi_signo), + si->ssi_pid, strna(p)); + } else + log_full(level, + "Received SIG%s.", + signal_to_string(si->ssi_signo)); +} + +void set_log_syntax_callback(log_syntax_callback_t cb, void *userdata) { + assert(!log_syntax_callback || !cb); + assert(!log_syntax_callback_userdata || !userdata); + + log_syntax_callback = cb; + log_syntax_callback_userdata = userdata; +} + +int log_syntax_internal( + const char *unit, + int level, + const char *config_file, + unsigned config_line, + int error, + const char *file, + int line, + const char *func, + const char *format, ...) { + + PROTECT_ERRNO; + + if (log_syntax_callback) + log_syntax_callback(unit, level, log_syntax_callback_userdata); + + if (_likely_(LOG_PRI(level) > log_max_level) || + log_target == LOG_TARGET_NULL) + return -ERRNO_VALUE(error); + + char buffer[LINE_MAX]; + va_list ap; + const char *unit_fmt = NULL; + + errno = ERRNO_VALUE(error); + + va_start(ap, format); + (void) vsnprintf(buffer, sizeof buffer, format, ap); + va_end(ap); + + if (unit) + unit_fmt = getpid_cached() == 1 ? "UNIT=%s" : "USER_UNIT=%s"; + + if (config_file) { + if (config_line > 0) + return log_struct_internal( + level, + error, + file, line, func, + "MESSAGE_ID=" SD_MESSAGE_INVALID_CONFIGURATION_STR, + "CONFIG_FILE=%s", config_file, + "CONFIG_LINE=%u", config_line, + LOG_MESSAGE("%s:%u: %s", config_file, config_line, buffer), + unit_fmt, unit, + NULL); + else + return log_struct_internal( + level, + error, + file, line, func, + "MESSAGE_ID=" SD_MESSAGE_INVALID_CONFIGURATION_STR, + "CONFIG_FILE=%s", config_file, + LOG_MESSAGE("%s: %s", config_file, buffer), + unit_fmt, unit, + NULL); + } else if (unit) + return log_struct_internal( + level, + error, + file, line, func, + "MESSAGE_ID=" SD_MESSAGE_INVALID_CONFIGURATION_STR, + LOG_MESSAGE("%s: %s", unit, buffer), + unit_fmt, unit, + NULL); + else + return log_struct_internal( + level, + error, + file, line, func, + "MESSAGE_ID=" SD_MESSAGE_INVALID_CONFIGURATION_STR, + LOG_MESSAGE("%s", buffer), + NULL); +} + +int log_syntax_invalid_utf8_internal( + const char *unit, + int level, + const char *config_file, + unsigned config_line, + const char *file, + int line, + const char *func, + const char *rvalue) { + + _cleanup_free_ char *p = NULL; + + if (rvalue) + p = utf8_escape_invalid(rvalue); + + return log_syntax_internal(unit, level, config_file, config_line, + SYNTHETIC_ERRNO(EINVAL), file, line, func, + "String is not UTF-8 clean, ignoring assignment: %s", strna(p)); +} + +void log_set_upgrade_syslog_to_journal(bool b) { + upgrade_syslog_to_journal = b; + + /* Make the change effective immediately */ + if (b) { + if (log_target == LOG_TARGET_SYSLOG) + log_target = LOG_TARGET_JOURNAL; + else if (log_target == LOG_TARGET_SYSLOG_OR_KMSG) + log_target = LOG_TARGET_JOURNAL_OR_KMSG; + } +} + +void log_set_always_reopen_console(bool b) { + always_reopen_console = b; +} + +void log_set_open_when_needed(bool b) { + open_when_needed = b; +} + +void log_set_prohibit_ipc(bool b) { + prohibit_ipc = b; +} + +int log_emergency_level(void) { + /* Returns the log level to use for log_emergency() logging. We use LOG_EMERG only when we are PID 1, as only + * then the system of the whole system is obviously affected. */ + + return getpid_cached() == 1 ? LOG_EMERG : LOG_ERR; +} + +int log_dup_console(void) { + int copy; + + /* Duplicate the fd we use for fd logging if it's < 3 and use the copy from now on. This call is useful + * whenever we want to continue logging through the original fd, but want to rearrange stderr. */ + + if (console_fd < 0 || console_fd >= 3) + return 0; + + copy = fcntl(console_fd, F_DUPFD_CLOEXEC, 3); + if (copy < 0) + return -errno; + + console_fd = copy; + return 0; +} + +void log_setup(void) { + log_set_target(LOG_TARGET_AUTO); + log_parse_environment(); + (void) log_open(); + if (log_on_console() && show_color < 0) + log_show_color(true); +} diff --git a/src/basic/log.h b/src/basic/log.h new file mode 100644 index 0000000..560686e --- /dev/null +++ b/src/basic/log.h @@ -0,0 +1,409 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +#include +#include +#include +#include +#include + +#include "macro.h" +#include "ratelimit.h" + +/* Some structures we reference but don't want to pull in headers for */ +struct iovec; +struct signalfd_siginfo; + +typedef enum LogTarget{ + LOG_TARGET_CONSOLE, + LOG_TARGET_CONSOLE_PREFIXED, + LOG_TARGET_KMSG, + LOG_TARGET_JOURNAL, + LOG_TARGET_JOURNAL_OR_KMSG, + LOG_TARGET_SYSLOG, + LOG_TARGET_SYSLOG_OR_KMSG, + LOG_TARGET_AUTO, /* console if stderr is not journal, JOURNAL_OR_KMSG otherwise */ + LOG_TARGET_NULL, + _LOG_TARGET_MAX, + _LOG_TARGET_INVALID = -EINVAL, +} LogTarget; + +/* This log level disables logging completely. It can only be passed to log_set_max_level() and cannot be + * used a regular log level. */ +#define LOG_NULL (LOG_EMERG - 1) + +/* Note to readers: << and >> have lower precedence (are evaluated earlier) than & and | */ +#define SYNTHETIC_ERRNO(num) (1 << 30 | (num)) +#define IS_SYNTHETIC_ERRNO(val) ((val) >> 30 & 1) +#define ERRNO_VALUE(val) (abs(val) & ~(1 << 30)) + +/* The callback function to be invoked when syntax warnings are seen + * in the unit files. */ +typedef void (*log_syntax_callback_t)(const char *unit, int level, void *userdata); +void set_log_syntax_callback(log_syntax_callback_t cb, void *userdata); + +static inline void clear_log_syntax_callback(dummy_t *dummy) { + set_log_syntax_callback(/* cb= */ NULL, /* userdata= */ NULL); +} + +const char *log_target_to_string(LogTarget target) _const_; +LogTarget log_target_from_string(const char *s) _pure_; +void log_set_target(LogTarget target); +int log_set_target_from_string(const char *e); +LogTarget log_get_target(void) _pure_; +void log_settle_target(void); + +void log_set_max_level(int level); +int log_set_max_level_from_string(const char *e); +int log_get_max_level(void) _pure_; + +void log_set_facility(int facility); + +void log_show_color(bool b); +bool log_get_show_color(void) _pure_; +void log_show_location(bool b); +bool log_get_show_location(void) _pure_; +void log_show_time(bool b); +bool log_get_show_time(void) _pure_; +void log_show_tid(bool b); +bool log_get_show_tid(void) _pure_; + +int log_show_color_from_string(const char *e); +int log_show_location_from_string(const char *e); +int log_show_time_from_string(const char *e); +int log_show_tid_from_string(const char *e); + +/* Functions below that open and close logs or configure logging based on the + * environment should not be called from library code — this is always a job + * for the application itself. */ + +assert_cc(STRLEN(__FILE__) > STRLEN(RELATIVE_SOURCE_PATH) + 1); +#define PROJECT_FILE (&__FILE__[STRLEN(RELATIVE_SOURCE_PATH) + 1]) + +int log_open(void); +void log_close(void); +void log_forget_fds(void); + +void log_parse_environment_variables(void); +void log_parse_environment(void); + +int log_dispatch_internal( + int level, + int error, + const char *file, + int line, + const char *func, + const char *object_field, + const char *object, + const char *extra, + const char *extra_field, + char *buffer); + +int log_internal( + int level, + int error, + const char *file, + int line, + const char *func, + const char *format, ...) _printf_(6,7); + +int log_internalv( + int level, + int error, + const char *file, + int line, + const char *func, + const char *format, + va_list ap) _printf_(6,0); + +int log_object_internalv( + int level, + int error, + const char *file, + int line, + const char *func, + const char *object_field, + const char *object, + const char *extra_field, + const char *extra, + const char *format, + va_list ap) _printf_(10,0); + +int log_object_internal( + int level, + int error, + const char *file, + int line, + const char *func, + const char *object_field, + const char *object, + const char *extra_field, + const char *extra, + const char *format, ...) _printf_(10,11); + +int log_struct_internal( + int level, + int error, + const char *file, + int line, + const char *func, + const char *format, ...) _printf_(6,0) _sentinel_; + +int log_oom_internal( + int level, + const char *file, + int line, + const char *func); + +int log_format_iovec( + struct iovec *iovec, + size_t iovec_len, + size_t *n, + bool newline_separator, + int error, + const char *format, + va_list ap) _printf_(6, 0); + +int log_struct_iovec_internal( + int level, + int error, + const char *file, + int line, + const char *func, + const struct iovec *input_iovec, + size_t n_input_iovec); + +/* This modifies the buffer passed! */ +int log_dump_internal( + int level, + int error, + const char *file, + int line, + const char *func, + char *buffer); + +/* Logging for various assertions */ +_noreturn_ void log_assert_failed( + const char *text, + const char *file, + int line, + const char *func); + +_noreturn_ void log_assert_failed_unreachable( + const char *file, + int line, + const char *func); + +void log_assert_failed_return( + const char *text, + const char *file, + int line, + const char *func); + +#define log_dispatch(level, error, buffer) \ + log_dispatch_internal(level, error, PROJECT_FILE, __LINE__, __func__, NULL, NULL, NULL, NULL, buffer) + +/* Logging with level */ +#define log_full_errno_zerook(level, error, ...) \ + ({ \ + int _level = (level), _e = (error); \ + _e = (log_get_max_level() >= LOG_PRI(_level)) \ + ? log_internal(_level, _e, PROJECT_FILE, __LINE__, __func__, __VA_ARGS__) \ + : -ERRNO_VALUE(_e); \ + _e < 0 ? _e : -ESTRPIPE; \ + }) + +#if BUILD_MODE_DEVELOPER && !defined(TEST_CODE) +# define ASSERT_NON_ZERO(x) assert((x) != 0) +#else +# define ASSERT_NON_ZERO(x) +#endif + +#define log_full_errno(level, error, ...) \ + ({ \ + int _error = (error); \ + ASSERT_NON_ZERO(_error); \ + log_full_errno_zerook(level, _error, __VA_ARGS__); \ + }) + +#define log_full(level, fmt, ...) \ + ({ \ + if (BUILD_MODE_DEVELOPER) \ + assert(!strstr(fmt, "%m")); \ + (void) log_full_errno_zerook(level, 0, fmt, ##__VA_ARGS__); \ + }) + +int log_emergency_level(void); + +/* Normal logging */ +#define log_debug(...) log_full(LOG_DEBUG, __VA_ARGS__) +#define log_info(...) log_full(LOG_INFO, __VA_ARGS__) +#define log_notice(...) log_full(LOG_NOTICE, __VA_ARGS__) +#define log_warning(...) log_full(LOG_WARNING, __VA_ARGS__) +#define log_error(...) log_full(LOG_ERR, __VA_ARGS__) +#define log_emergency(...) log_full(log_emergency_level(), __VA_ARGS__) + +/* Logging triggered by an errno-like error */ +#define log_debug_errno(error, ...) log_full_errno(LOG_DEBUG, error, __VA_ARGS__) +#define log_info_errno(error, ...) log_full_errno(LOG_INFO, error, __VA_ARGS__) +#define log_notice_errno(error, ...) log_full_errno(LOG_NOTICE, error, __VA_ARGS__) +#define log_warning_errno(error, ...) log_full_errno(LOG_WARNING, error, __VA_ARGS__) +#define log_error_errno(error, ...) log_full_errno(LOG_ERR, error, __VA_ARGS__) +#define log_emergency_errno(error, ...) log_full_errno(log_emergency_level(), error, __VA_ARGS__) + +/* This logs at the specified level the first time it is called, and then + * logs at debug. If the specified level is debug, this logs only the first + * time it is called. */ +#define log_once(level, ...) \ + ({ \ + if (ONCE) \ + log_full(level, __VA_ARGS__); \ + else if (LOG_PRI(level) != LOG_DEBUG) \ + log_debug(__VA_ARGS__); \ + }) + +#define log_once_errno(level, error, ...) \ + ({ \ + int _err = (error); \ + if (ONCE) \ + _err = log_full_errno(level, _err, __VA_ARGS__); \ + else if (LOG_PRI(level) != LOG_DEBUG) \ + _err = log_debug_errno(_err, __VA_ARGS__); \ + else \ + _err = -ERRNO_VALUE(_err); \ + _err; \ + }) + +#if LOG_TRACE +# define log_trace(...) log_debug(__VA_ARGS__) +# define log_trace_errno(...) log_debug_errno(__VA_ARGS__) +#else +# define log_trace(...) do {} while (0) +# define log_trace_errno(e, ...) (-ERRNO_VALUE(e)) +#endif + +/* Structured logging */ +#define log_struct_errno(level, error, ...) \ + log_struct_internal(level, error, PROJECT_FILE, __LINE__, __func__, __VA_ARGS__, NULL) +#define log_struct(level, ...) log_struct_errno(level, 0, __VA_ARGS__) + +#define log_struct_iovec_errno(level, error, iovec, n_iovec) \ + log_struct_iovec_internal(level, error, PROJECT_FILE, __LINE__, __func__, iovec, n_iovec) +#define log_struct_iovec(level, iovec, n_iovec) log_struct_iovec_errno(level, 0, iovec, n_iovec) + +/* This modifies the buffer passed! */ +#define log_dump(level, buffer) \ + log_dump_internal(level, 0, PROJECT_FILE, __LINE__, __func__, buffer) + +#define log_oom() log_oom_internal(LOG_ERR, PROJECT_FILE, __LINE__, __func__) +#define log_oom_debug() log_oom_internal(LOG_DEBUG, PROJECT_FILE, __LINE__, __func__) + +bool log_on_console(void) _pure_; + +/* Helper to wrap the main message in structured logging. The macro doesn't do much, + * except to provide visual grouping of the format string and its arguments. */ +#if LOG_MESSAGE_VERIFICATION || defined(__COVERITY__) +/* Do a fake formatting of the message string to let the scanner verify the arguments against the format + * message. The variable will never be set to true, but we don't tell the compiler that :) */ +extern bool _log_message_dummy; +# define LOG_MESSAGE(fmt, ...) "MESSAGE=%.0d" fmt, (_log_message_dummy && printf(fmt, ##__VA_ARGS__)), ##__VA_ARGS__ +#else +# define LOG_MESSAGE(fmt, ...) "MESSAGE=" fmt, ##__VA_ARGS__ +#endif + +void log_received_signal(int level, const struct signalfd_siginfo *si); + +/* If turned on, any requests for a log target involving "syslog" will be implicitly upgraded to the equivalent journal target */ +void log_set_upgrade_syslog_to_journal(bool b); + +/* If turned on, and log_open() is called, we'll not use STDERR_FILENO for logging ever, but rather open /dev/console */ +void log_set_always_reopen_console(bool b); + +/* If turned on, we'll open the log stream implicitly if needed on each individual log call. This is normally not + * desired as we want to reuse our logging streams. It is useful however */ +void log_set_open_when_needed(bool b); + +/* If turned on, then we'll never use IPC-based logging, i.e. never log to syslog or the journal. We'll only log to + * stderr, the console or kmsg */ +void log_set_prohibit_ipc(bool b); + +int log_dup_console(void); + +int log_syntax_internal( + const char *unit, + int level, + const char *config_file, + unsigned config_line, + int error, + const char *file, + int line, + const char *func, + const char *format, ...) _printf_(9, 10); + +int log_syntax_invalid_utf8_internal( + const char *unit, + int level, + const char *config_file, + unsigned config_line, + const char *file, + int line, + const char *func, + const char *rvalue); + +#define log_syntax(unit, level, config_file, config_line, error, ...) \ + ({ \ + int _level = (level), _e = (error); \ + (log_get_max_level() >= LOG_PRI(_level)) \ + ? log_syntax_internal(unit, _level, config_file, config_line, _e, PROJECT_FILE, __LINE__, __func__, __VA_ARGS__) \ + : -ERRNO_VALUE(_e); \ + }) + +#define log_syntax_invalid_utf8(unit, level, config_file, config_line, rvalue) \ + ({ \ + int _level = (level); \ + (log_get_max_level() >= LOG_PRI(_level)) \ + ? log_syntax_invalid_utf8_internal(unit, _level, config_file, config_line, PROJECT_FILE, __LINE__, __func__, rvalue) \ + : -EINVAL; \ + }) + +#define DEBUG_LOGGING _unlikely_(log_get_max_level() >= LOG_DEBUG) + +void log_setup(void); + +typedef struct LogRateLimit { + int error; + int level; + RateLimit ratelimit; +} LogRateLimit; + +#define log_ratelimit_internal(_level, _error, _format, _file, _line, _func, ...) \ +({ \ + int _log_ratelimit_error = (_error); \ + int _log_ratelimit_level = (_level); \ + static LogRateLimit _log_ratelimit = { \ + .ratelimit = { \ + .interval = 1 * USEC_PER_SEC, \ + .burst = 1, \ + }, \ + }; \ + unsigned _num_dropped_errors = ratelimit_num_dropped(&_log_ratelimit.ratelimit); \ + if (_log_ratelimit_error != _log_ratelimit.error || _log_ratelimit_level != _log_ratelimit.level) { \ + ratelimit_reset(&_log_ratelimit.ratelimit); \ + _log_ratelimit.error = _log_ratelimit_error; \ + _log_ratelimit.level = _log_ratelimit_level; \ + } \ + if (ratelimit_below(&_log_ratelimit.ratelimit)) \ + _log_ratelimit_error = _num_dropped_errors > 0 \ + ? log_internal(_log_ratelimit_level, _log_ratelimit_error, _file, _line, _func, _format " (Dropped %u similar message(s))", __VA_ARGS__, _num_dropped_errors) \ + : log_internal(_log_ratelimit_level, _log_ratelimit_error, _file, _line, _func, _format, __VA_ARGS__); \ + _log_ratelimit_error; \ +}) + +#define log_ratelimit_full_errno(level, error, format, ...) \ + ({ \ + int _level = (level), _e = (error); \ + _e = (log_get_max_level() >= LOG_PRI(_level)) \ + ? log_ratelimit_internal(_level, _e, format, PROJECT_FILE, __LINE__, __func__, __VA_ARGS__) \ + : -ERRNO_VALUE(_e); \ + _e < 0 ? _e : -ESTRPIPE; \ + }) diff --git a/src/basic/login-util.c b/src/basic/login-util.c new file mode 100644 index 0000000..044e8b7 --- /dev/null +++ b/src/basic/login-util.c @@ -0,0 +1,12 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include "login-util.h" +#include "string-util.h" + +bool session_id_valid(const char *id) { + + if (isempty(id)) + return false; + + return id[strspn(id, LETTERS DIGITS)] == '\0'; +} diff --git a/src/basic/login-util.h b/src/basic/login-util.h new file mode 100644 index 0000000..841fd32 --- /dev/null +++ b/src/basic/login-util.h @@ -0,0 +1,20 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +#include +#include + +#define SD_LOGIND_ROOT_CHECK_INHIBITORS (UINT64_C(1) << 0) +#define SD_LOGIND_REBOOT_VIA_KEXEC (UINT64_C(1) << 1) + +/* For internal use only */ +#define SD_LOGIND_INTERACTIVE (UINT64_C(1) << 63) + +#define SD_LOGIND_SHUTDOWN_AND_SLEEP_FLAGS_PUBLIC (SD_LOGIND_ROOT_CHECK_INHIBITORS|SD_LOGIND_REBOOT_VIA_KEXEC) +#define SD_LOGIND_SHUTDOWN_AND_SLEEP_FLAGS_ALL (SD_LOGIND_SHUTDOWN_AND_SLEEP_FLAGS_PUBLIC|SD_LOGIND_INTERACTIVE) + +bool session_id_valid(const char *id); + +static inline bool logind_running(void) { + return access("/run/systemd/seats/", F_OK) >= 0; +} diff --git a/src/basic/macro.h b/src/basic/macro.h new file mode 100644 index 0000000..2671c6c --- /dev/null +++ b/src/basic/macro.h @@ -0,0 +1,442 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +#include +#include +#include +#include +#include +#include +#include + +#include "macro-fundamental.h" + +#if !defined(HAS_FEATURE_MEMORY_SANITIZER) +# if defined(__has_feature) +# if __has_feature(memory_sanitizer) +# define HAS_FEATURE_MEMORY_SANITIZER 1 +# endif +# endif +# if !defined(HAS_FEATURE_MEMORY_SANITIZER) +# define HAS_FEATURE_MEMORY_SANITIZER 0 +# endif +#endif + +#if !defined(HAS_FEATURE_ADDRESS_SANITIZER) +# ifdef __SANITIZE_ADDRESS__ +# define HAS_FEATURE_ADDRESS_SANITIZER 1 +# elif defined(__has_feature) +# if __has_feature(address_sanitizer) +# define HAS_FEATURE_ADDRESS_SANITIZER 1 +# endif +# endif +# if !defined(HAS_FEATURE_ADDRESS_SANITIZER) +# define HAS_FEATURE_ADDRESS_SANITIZER 0 +# endif +#endif + +/* Note: on GCC "no_sanitize_address" is a function attribute only, on llvm it may also be applied to global + * variables. We define a specific macro which knows this. Note that on GCC we don't need this decorator so much, since + * our primary usecase for this attribute is registration structures placed in named ELF sections which shall not be + * padded, but GCC doesn't pad those anyway if AddressSanitizer is enabled. */ +#if HAS_FEATURE_ADDRESS_SANITIZER && defined(__clang__) +#define _variable_no_sanitize_address_ __attribute__((__no_sanitize_address__)) +#else +#define _variable_no_sanitize_address_ +#endif + +/* Apparently there's no has_feature() call defined to check for ubsan, hence let's define this + * unconditionally on llvm */ +#if defined(__clang__) +#define _function_no_sanitize_float_cast_overflow_ __attribute__((no_sanitize("float-cast-overflow"))) +#else +#define _function_no_sanitize_float_cast_overflow_ +#endif + +/* Temporarily disable some warnings */ +#define DISABLE_WARNING_DEPRECATED_DECLARATIONS \ + _Pragma("GCC diagnostic push"); \ + _Pragma("GCC diagnostic ignored \"-Wdeprecated-declarations\"") + +#define DISABLE_WARNING_FORMAT_NONLITERAL \ + _Pragma("GCC diagnostic push"); \ + _Pragma("GCC diagnostic ignored \"-Wformat-nonliteral\"") + +#define DISABLE_WARNING_MISSING_PROTOTYPES \ + _Pragma("GCC diagnostic push"); \ + _Pragma("GCC diagnostic ignored \"-Wmissing-prototypes\"") + +#define DISABLE_WARNING_NONNULL \ + _Pragma("GCC diagnostic push"); \ + _Pragma("GCC diagnostic ignored \"-Wnonnull\"") + +#define DISABLE_WARNING_SHADOW \ + _Pragma("GCC diagnostic push"); \ + _Pragma("GCC diagnostic ignored \"-Wshadow\"") + +#define DISABLE_WARNING_INCOMPATIBLE_POINTER_TYPES \ + _Pragma("GCC diagnostic push"); \ + _Pragma("GCC diagnostic ignored \"-Wincompatible-pointer-types\"") + +#if HAVE_WSTRINGOP_TRUNCATION +# define DISABLE_WARNING_STRINGOP_TRUNCATION \ + _Pragma("GCC diagnostic push"); \ + _Pragma("GCC diagnostic ignored \"-Wstringop-truncation\"") +#else +# define DISABLE_WARNING_STRINGOP_TRUNCATION \ + _Pragma("GCC diagnostic push") +#endif + +#define DISABLE_WARNING_TYPE_LIMITS \ + _Pragma("GCC diagnostic push"); \ + _Pragma("GCC diagnostic ignored \"-Wtype-limits\"") + +#define REENABLE_WARNING \ + _Pragma("GCC diagnostic pop") + +/* automake test harness */ +#define EXIT_TEST_SKIP 77 + +/* builtins */ +#if __SIZEOF_INT__ == 4 +#define BUILTIN_FFS_U32(x) __builtin_ffs(x); +#elif __SIZEOF_LONG__ == 4 +#define BUILTIN_FFS_U32(x) __builtin_ffsl(x); +#else +#error "neither int nor long are four bytes long?!?" +#endif + +/* align to next higher power-of-2 (except for: 0 => 0, overflow => 0) */ +static inline unsigned long ALIGN_POWER2(unsigned long u) { + + /* Avoid subtraction overflow */ + if (u == 0) + return 0; + + /* clz(0) is undefined */ + if (u == 1) + return 1; + + /* left-shift overflow is undefined */ + if (__builtin_clzl(u - 1UL) < 1) + return 0; + + return 1UL << (sizeof(u) * 8 - __builtin_clzl(u - 1UL)); +} + +static inline size_t GREEDY_ALLOC_ROUND_UP(size_t l) { + size_t m; + + /* Round up allocation sizes a bit to some reasonable, likely larger value. This is supposed to be + * used for cases which are likely called in an allocation loop of some form, i.e. that repetitively + * grow stuff, for example strv_extend() and suchlike. + * + * Note the difference to GREEDY_REALLOC() here, as this helper operates on a single size value only, + * and rounds up to next multiple of 2, needing no further counter. + * + * Note the benefits of direct ALIGN_POWER2() usage: type-safety for size_t, sane handling for very + * small (i.e. <= 2) and safe handling for very large (i.e. > SSIZE_MAX) values. */ + + if (l <= 2) + return 2; /* Never allocate less than 2 of something. */ + + m = ALIGN_POWER2(l); + if (m == 0) /* overflow? */ + return l; + + return m; +} + +/* + * container_of - cast a member of a structure out to the containing structure + * @ptr: the pointer to the member. + * @type: the type of the container struct this is embedded in. + * @member: the name of the member within the struct. + */ +#define container_of(ptr, type, member) __container_of(UNIQ, (ptr), type, member) +#define __container_of(uniq, ptr, type, member) \ + ({ \ + const typeof( ((type*)0)->member ) *UNIQ_T(A, uniq) = (ptr); \ + (type*)( (char *)UNIQ_T(A, uniq) - offsetof(type, member) ); \ + }) + +#ifdef __COVERITY__ + +/* Use special definitions of assertion macros in order to prevent + * false positives of ASSERT_SIDE_EFFECT on Coverity static analyzer + * for uses of assert_se() and assert_return(). + * + * These definitions make expression go through a (trivial) function + * call to ensure they are not discarded. Also use ! or !! to ensure + * the boolean expressions are seen as such. + * + * This technique has been described and recommended in: + * https://community.synopsys.com/s/question/0D534000046Yuzb/suppressing-assertsideeffect-for-functions-that-allow-for-sideeffects + */ + +extern void __coverity_panic__(void); + +static inline void __coverity_check__(int condition) { + if (!condition) + __coverity_panic__(); +} + +static inline int __coverity_check_and_return__(int condition) { + return condition; +} + +#define assert_message_se(expr, message) __coverity_check__(!!(expr)) + +#define assert_log(expr, message) __coverity_check_and_return__(!!(expr)) + +#else /* ! __COVERITY__ */ + +#define assert_message_se(expr, message) \ + do { \ + if (_unlikely_(!(expr))) \ + log_assert_failed(message, PROJECT_FILE, __LINE__, __PRETTY_FUNCTION__); \ + } while (false) + +#define assert_log(expr, message) ((_likely_(expr)) \ + ? (true) \ + : (log_assert_failed_return(message, PROJECT_FILE, __LINE__, __PRETTY_FUNCTION__), false)) + +#endif /* __COVERITY__ */ + +#define assert_se(expr) assert_message_se(expr, #expr) + +/* We override the glibc assert() here. */ +#undef assert +#ifdef NDEBUG +#define assert(expr) ({ if (!(expr)) __builtin_unreachable(); }) +#else +#define assert(expr) assert_message_se(expr, #expr) +#endif + +#define assert_not_reached() \ + log_assert_failed_unreachable(PROJECT_FILE, __LINE__, __PRETTY_FUNCTION__) + +#define assert_return(expr, r) \ + do { \ + if (!assert_log(expr, #expr)) \ + return (r); \ + } while (false) + +#define assert_return_errno(expr, r, err) \ + do { \ + if (!assert_log(expr, #expr)) { \ + errno = err; \ + return (r); \ + } \ + } while (false) + +#define return_with_errno(r, err) \ + do { \ + errno = abs(err); \ + return r; \ + } while (false) + +#define PTR_TO_INT(p) ((int) ((intptr_t) (p))) +#define INT_TO_PTR(u) ((void *) ((intptr_t) (u))) +#define PTR_TO_UINT(p) ((unsigned) ((uintptr_t) (p))) +#define UINT_TO_PTR(u) ((void *) ((uintptr_t) (u))) + +#define PTR_TO_LONG(p) ((long) ((intptr_t) (p))) +#define LONG_TO_PTR(u) ((void *) ((intptr_t) (u))) +#define PTR_TO_ULONG(p) ((unsigned long) ((uintptr_t) (p))) +#define ULONG_TO_PTR(u) ((void *) ((uintptr_t) (u))) + +#define PTR_TO_UINT8(p) ((uint8_t) ((uintptr_t) (p))) +#define UINT8_TO_PTR(u) ((void *) ((uintptr_t) (u))) + +#define PTR_TO_INT32(p) ((int32_t) ((intptr_t) (p))) +#define INT32_TO_PTR(u) ((void *) ((intptr_t) (u))) +#define PTR_TO_UINT32(p) ((uint32_t) ((uintptr_t) (p))) +#define UINT32_TO_PTR(u) ((void *) ((uintptr_t) (u))) + +#define PTR_TO_INT64(p) ((int64_t) ((intptr_t) (p))) +#define INT64_TO_PTR(u) ((void *) ((intptr_t) (u))) +#define PTR_TO_UINT64(p) ((uint64_t) ((uintptr_t) (p))) +#define UINT64_TO_PTR(u) ((void *) ((uintptr_t) (u))) + +#define PTR_TO_SIZE(p) ((size_t) ((uintptr_t) (p))) +#define SIZE_TO_PTR(u) ((void *) ((uintptr_t) (u))) + +#define CHAR_TO_STR(x) ((char[2]) { x, 0 }) + +#define char_array_0(x) x[sizeof(x)-1] = 0; + +#define sizeof_field(struct_type, member) sizeof(((struct_type *) 0)->member) + +/* Maximum buffer size needed for formatting an unsigned integer type as hex, including space for '0x' + * prefix and trailing NUL suffix. */ +#define HEXADECIMAL_STR_MAX(type) (2 + sizeof(type) * 2 + 1) + +/* Returns the number of chars needed to format variables of the specified type as a decimal string. Adds in + * extra space for a negative '-' prefix for signed types. Includes space for the trailing NUL. */ +#define DECIMAL_STR_MAX(type) \ + ((size_t) IS_SIGNED_INTEGER_TYPE(type) + 1U + \ + (sizeof(type) <= 1 ? 3U : \ + sizeof(type) <= 2 ? 5U : \ + sizeof(type) <= 4 ? 10U : \ + sizeof(type) <= 8 ? (IS_SIGNED_INTEGER_TYPE(type) ? 19U : 20U) : sizeof(int[-2*(sizeof(type) > 8)]))) + +/* Returns the number of chars needed to format the specified integer value. It's hence more specific than + * DECIMAL_STR_MAX() which answers the same question for all possible values of the specified type. Does + * *not* include space for a trailing NUL. (If you wonder why we special case _x_ == 0 here: it's to trick + * out gcc's -Wtype-limits, which would complain on comparing an unsigned type with < 0, otherwise. By + * special-casing == 0 here first, we can use <= 0 instead of < 0 to trick out gcc.) */ +#define DECIMAL_STR_WIDTH(x) \ + ({ \ + typeof(x) _x_ = (x); \ + size_t ans; \ + if (_x_ == 0) \ + ans = 1; \ + else { \ + ans = _x_ <= 0 ? 2 : 1; \ + while ((_x_ /= 10) != 0) \ + ans++; \ + } \ + ans; \ + }) + +#define SWAP_TWO(x, y) do { \ + typeof(x) _t = (x); \ + (x) = (y); \ + (y) = (_t); \ + } while (false) + +#define STRV_MAKE(...) ((char**) ((const char*[]) { __VA_ARGS__, NULL })) +#define STRV_MAKE_EMPTY ((char*[1]) { NULL }) +#define STRV_MAKE_CONST(...) ((const char* const*) ((const char*[]) { __VA_ARGS__, NULL })) + +/* Pointers range from NULL to POINTER_MAX */ +#define POINTER_MAX ((void*) UINTPTR_MAX) + +/* Iterates through a specified list of pointers. Accepts NULL pointers, but uses POINTER_MAX as internal marker for EOL. */ +#define FOREACH_POINTER(p, x, ...) \ + for (typeof(p) *_l = (typeof(p)[]) { ({ p = x; }), ##__VA_ARGS__, POINTER_MAX }; \ + p != (typeof(p)) POINTER_MAX; \ + p = *(++_l)) + +#define DEFINE_TRIVIAL_DESTRUCTOR(name, type, func) \ + static inline void name(type *p) { \ + func(p); \ + } + +/* When func() returns the void value (NULL, -1, …) of the appropriate type */ +#define DEFINE_TRIVIAL_CLEANUP_FUNC(type, func) \ + static inline void func##p(type *p) { \ + if (*p) \ + *p = func(*p); \ + } + +/* When func() doesn't return the appropriate type, set variable to empty afterwards */ +#define DEFINE_TRIVIAL_CLEANUP_FUNC_FULL(type, func, empty) \ + static inline void func##p(type *p) { \ + if (*p != (empty)) { \ + func(*p); \ + *p = (empty); \ + } \ + } + +#define _DEFINE_TRIVIAL_REF_FUNC(type, name, scope) \ + scope type *name##_ref(type *p) { \ + if (!p) \ + return NULL; \ + \ + /* For type check. */ \ + unsigned *q = &p->n_ref; \ + assert(*q > 0); \ + assert_se(*q < UINT_MAX); \ + \ + (*q)++; \ + return p; \ + } + +#define _DEFINE_TRIVIAL_UNREF_FUNC(type, name, free_func, scope) \ + scope type *name##_unref(type *p) { \ + if (!p) \ + return NULL; \ + \ + assert(p->n_ref > 0); \ + p->n_ref--; \ + if (p->n_ref > 0) \ + return NULL; \ + \ + return free_func(p); \ + } + +#define DEFINE_TRIVIAL_REF_FUNC(type, name) \ + _DEFINE_TRIVIAL_REF_FUNC(type, name,) +#define DEFINE_PRIVATE_TRIVIAL_REF_FUNC(type, name) \ + _DEFINE_TRIVIAL_REF_FUNC(type, name, static) +#define DEFINE_PUBLIC_TRIVIAL_REF_FUNC(type, name) \ + _DEFINE_TRIVIAL_REF_FUNC(type, name, _public_) + +#define DEFINE_TRIVIAL_UNREF_FUNC(type, name, free_func) \ + _DEFINE_TRIVIAL_UNREF_FUNC(type, name, free_func,) +#define DEFINE_PRIVATE_TRIVIAL_UNREF_FUNC(type, name, free_func) \ + _DEFINE_TRIVIAL_UNREF_FUNC(type, name, free_func, static) +#define DEFINE_PUBLIC_TRIVIAL_UNREF_FUNC(type, name, free_func) \ + _DEFINE_TRIVIAL_UNREF_FUNC(type, name, free_func, _public_) + +#define DEFINE_TRIVIAL_REF_UNREF_FUNC(type, name, free_func) \ + DEFINE_TRIVIAL_REF_FUNC(type, name); \ + DEFINE_TRIVIAL_UNREF_FUNC(type, name, free_func); + +#define DEFINE_PRIVATE_TRIVIAL_REF_UNREF_FUNC(type, name, free_func) \ + DEFINE_PRIVATE_TRIVIAL_REF_FUNC(type, name); \ + DEFINE_PRIVATE_TRIVIAL_UNREF_FUNC(type, name, free_func); + +#define DEFINE_PUBLIC_TRIVIAL_REF_UNREF_FUNC(type, name, free_func) \ + DEFINE_PUBLIC_TRIVIAL_REF_FUNC(type, name); \ + DEFINE_PUBLIC_TRIVIAL_UNREF_FUNC(type, name, free_func); + +/* A macro to force copying of a variable from memory. This is useful whenever we want to read something from + * memory and want to make sure the compiler won't optimize away the destination variable for us. It's not + * supposed to be a full CPU memory barrier, i.e. CPU is still allowed to reorder the reads, but it is not + * allowed to remove our local copies of the variables. We want this to work for unaligned memory, hence + * memcpy() is great for our purposes. */ +#define READ_NOW(x) \ + ({ \ + typeof(x) _copy; \ + memcpy(&_copy, &(x), sizeof(_copy)); \ + asm volatile ("" : : : "memory"); \ + _copy; \ + }) + +#define saturate_add(x, y, limit) \ + ({ \ + typeof(limit) _x = (x); \ + typeof(limit) _y = (y); \ + _x > (limit) || _y >= (limit) - _x ? (limit) : _x + _y; \ + }) + +static inline size_t size_add(size_t x, size_t y) { + return saturate_add(x, y, SIZE_MAX); +} + +typedef struct { + int _empty[0]; +} dummy_t; + +assert_cc(sizeof(dummy_t) == 0); + +/* A little helper for subtracting 1 off a pointer in a safe UB-free way. This is intended to be used for for + * loops that count down from a high pointer until some base. A naive loop would implement this like this: + * + * for (p = end-1; p >= base; p--) … + * + * But this is not safe because p before the base is UB in C. With this macro the loop becomes this instead: + * + * for (p = PTR_SUB1(end, base); p; p = PTR_SUB1(p, base)) … + * + * And is free from UB! */ +#define PTR_SUB1(p, base) \ + ({ \ + typeof(p) _q = (p); \ + _q && _q > (base) ? &_q[-1] : NULL; \ + }) + +#include "log.h" diff --git a/src/basic/math-util.h b/src/basic/math-util.h new file mode 100644 index 0000000..24023cd --- /dev/null +++ b/src/basic/math-util.h @@ -0,0 +1,14 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +#include + +#include "macro.h" + +/* On some optimization level, iszero(x) is converted to (x == 0.0), and emits warning -Wfloat-equal. + * The argument must be a floating point, i.e. one of float, double, or long double. */ +#define iszero_safe(x) (fpclassify(x) == FP_ZERO) + +/* To avoid x == y and triggering compile warning -Wfloat-equal. This returns false if one of the argument is + * NaN or infinity. One of the argument must be a floating point. */ +#define fp_equal(x, y) iszero_safe((x) - (y)) diff --git a/src/basic/memfd-util.c b/src/basic/memfd-util.c new file mode 100644 index 0000000..f05fb15 --- /dev/null +++ b/src/basic/memfd-util.c @@ -0,0 +1,135 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include +#include +#include +#if HAVE_LINUX_MEMFD_H +#include +#endif +#include +#include + +#include "alloc-util.h" +#include "errno-util.h" +#include "fd-util.h" +#include "macro.h" +#include "memfd-util.h" +#include "missing_fcntl.h" +#include "missing_mman.h" +#include "missing_syscall.h" +#include "string-util.h" +#include "utf8.h" + +int memfd_new(const char *name) { + _cleanup_free_ char *g = NULL; + + if (!name) { + char pr[17] = {}; + + /* If no name is specified we generate one. We include + * a hint indicating our library implementation, and + * add the thread name to it */ + + assert_se(prctl(PR_GET_NAME, (unsigned long) pr) >= 0); + + if (isempty(pr)) + name = "sd"; + else { + _cleanup_free_ char *e = NULL; + + e = utf8_escape_invalid(pr); + if (!e) + return -ENOMEM; + + g = strjoin("sd-", e); + if (!g) + return -ENOMEM; + + name = g; + } + } + + return RET_NERRNO(memfd_create(name, MFD_ALLOW_SEALING | MFD_CLOEXEC)); +} + +int memfd_map(int fd, uint64_t offset, size_t size, void **p) { + void *q; + int sealed; + + assert(fd >= 0); + assert(size > 0); + assert(p); + + sealed = memfd_get_sealed(fd); + if (sealed < 0) + return sealed; + + if (sealed) + q = mmap(NULL, size, PROT_READ, MAP_PRIVATE, fd, offset); + else + q = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, offset); + if (q == MAP_FAILED) + return -errno; + + *p = q; + return 0; +} + +int memfd_set_sealed(int fd) { + assert(fd >= 0); + + return RET_NERRNO(fcntl(fd, F_ADD_SEALS, F_SEAL_SHRINK | F_SEAL_GROW | F_SEAL_WRITE | F_SEAL_SEAL)); +} + +int memfd_get_sealed(int fd) { + int r; + + assert(fd >= 0); + + r = fcntl(fd, F_GET_SEALS); + if (r < 0) + return -errno; + + return r == (F_SEAL_SHRINK | F_SEAL_GROW | F_SEAL_WRITE | F_SEAL_SEAL); +} + +int memfd_get_size(int fd, uint64_t *sz) { + struct stat stat; + + assert(fd >= 0); + assert(sz); + + if (fstat(fd, &stat) < 0) + return -errno; + + *sz = stat.st_size; + return 0; +} + +int memfd_set_size(int fd, uint64_t sz) { + assert(fd >= 0); + + return RET_NERRNO(ftruncate(fd, sz)); +} + +int memfd_new_and_map(const char *name, size_t sz, void **p) { + _cleanup_close_ int fd = -1; + int r; + + assert(sz > 0); + assert(p); + + fd = memfd_new(name); + if (fd < 0) + return fd; + + r = memfd_set_size(fd, sz); + if (r < 0) + return r; + + r = memfd_map(fd, 0, sz, p); + if (r < 0) + return r; + + return TAKE_FD(fd); +} diff --git a/src/basic/memfd-util.h b/src/basic/memfd-util.h new file mode 100644 index 0000000..8596c1a --- /dev/null +++ b/src/basic/memfd-util.h @@ -0,0 +1,18 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +#include +#include +#include +#include + +int memfd_new(const char *name); +int memfd_new_and_map(const char *name, size_t sz, void **p); + +int memfd_map(int fd, uint64_t offset, size_t size, void **p); + +int memfd_set_sealed(int fd); +int memfd_get_sealed(int fd); + +int memfd_get_size(int fd, uint64_t *sz); +int memfd_set_size(int fd, uint64_t sz); diff --git a/src/basic/memory-util.c b/src/basic/memory-util.c new file mode 100644 index 0000000..84b5b2d --- /dev/null +++ b/src/basic/memory-util.c @@ -0,0 +1,59 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include + +#include "memory-util.h" +#include "missing_threads.h" + +size_t page_size(void) { + static thread_local size_t pgsz = 0; + long r; + + if (_likely_(pgsz > 0)) + return pgsz; + + r = sysconf(_SC_PAGESIZE); + assert(r > 0); + + pgsz = (size_t) r; + return pgsz; +} + +bool memeqbyte(uint8_t byte, const void *data, size_t length) { + /* Does the buffer consist entirely of the same specific byte value? + * Copied from https://github.com/systemd/casync/, copied in turn from + * https://github.com/rustyrussell/ccan/blob/master/ccan/mem/mem.c#L92, + * which is licensed CC-0. + */ + + const uint8_t *p = data; + + /* Check first 16 bytes manually */ + for (size_t i = 0; i < 16; i++, length--) { + if (length == 0) + return true; + if (p[i] != byte) + return false; + } + + /* Now we know first 16 bytes match, memcmp() with self. */ + return memcmp(data, p + 16, length) == 0; +} + +#if !HAVE_EXPLICIT_BZERO +/* + * The pointer to memset() is volatile so that compiler must de-reference the pointer and can't assume that + * it points to any function in particular (such as memset(), which it then might further "optimize"). This + * approach is inspired by openssl's crypto/mem_clr.c. + */ +typedef void *(*memset_t)(void *,int,size_t); + +static volatile memset_t memset_func = memset; + +void* explicit_bzero_safe(void *p, size_t l) { + if (l > 0) + memset_func(p, '\0', l); + + return p; +} +#endif diff --git a/src/basic/memory-util.h b/src/basic/memory-util.h new file mode 100644 index 0000000..6e3280b --- /dev/null +++ b/src/basic/memory-util.h @@ -0,0 +1,123 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +#include +#include +#include +#include +#include + +#include "alloc-util.h" +#include "macro.h" + +size_t page_size(void) _pure_; +#define PAGE_ALIGN(l) ALIGN_TO((l), page_size()) +#define PAGE_ALIGN_DOWN(l) ((l) & ~(page_size() - 1)) +#define PAGE_OFFSET(l) ((l) & (page_size() - 1)) + +/* Normal memcpy() requires src to be nonnull. We do nothing if n is 0. */ +static inline void *memcpy_safe(void *dst, const void *src, size_t n) { + if (n == 0) + return dst; + assert(src); + return memcpy(dst, src, n); +} + +/* Normal mempcpy() requires src to be nonnull. We do nothing if n is 0. */ +static inline void *mempcpy_safe(void *dst, const void *src, size_t n) { + if (n == 0) + return dst; + assert(src); + return mempcpy(dst, src, n); +} + +/* Normal memcmp() requires s1 and s2 to be nonnull. We do nothing if n is 0. */ +static inline int memcmp_safe(const void *s1, const void *s2, size_t n) { + if (n == 0) + return 0; + assert(s1); + assert(s2); + return memcmp(s1, s2, n); +} + +/* Compare s1 (length n1) with s2 (length n2) in lexicographic order. */ +static inline int memcmp_nn(const void *s1, size_t n1, const void *s2, size_t n2) { + return memcmp_safe(s1, s2, MIN(n1, n2)) + ?: CMP(n1, n2); +} + +#define memzero(x,l) \ + ({ \ + size_t _l_ = (l); \ + if (_l_ > 0) \ + memset(x, 0, _l_); \ + }) + +#define zero(x) (memzero(&(x), sizeof(x))) + +bool memeqbyte(uint8_t byte, const void *data, size_t length); + +#define memeqzero(data, length) memeqbyte(0x00, data, length) + +#define eqzero(x) memeqzero(x, sizeof(x)) + +static inline void *mempset(void *s, int c, size_t n) { + memset(s, c, n); + return (uint8_t*)s + n; +} + +/* Normal memmem() requires haystack to be nonnull, which is annoying for zero-length buffers */ +static inline void *memmem_safe(const void *haystack, size_t haystacklen, const void *needle, size_t needlelen) { + + if (needlelen <= 0) + return (void*) haystack; + + if (haystacklen < needlelen) + return NULL; + + assert(haystack); + assert(needle); + + return memmem(haystack, haystacklen, needle, needlelen); +} + +static inline void *mempmem_safe(const void *haystack, size_t haystacklen, const void *needle, size_t needlelen) { + const uint8_t *p; + + p = memmem_safe(haystack, haystacklen, needle, needlelen); + if (!p) + return NULL; + + return (uint8_t*) p + needlelen; +} + +#if HAVE_EXPLICIT_BZERO +static inline void* explicit_bzero_safe(void *p, size_t l) { + if (l > 0) + explicit_bzero(p, l); + + return p; +} +#else +void *explicit_bzero_safe(void *p, size_t l); +#endif + +static inline void* erase_and_free(void *p) { + size_t l; + + if (!p) + return NULL; + + l = MALLOC_SIZEOF_SAFE(p); + explicit_bzero_safe(p, l); + return mfree(p); +} + +static inline void erase_and_freep(void *p) { + erase_and_free(*(void**) p); +} + +/* Use with _cleanup_ to erase a single 'char' when leaving scope */ +static inline void erase_char(char *p) { + explicit_bzero_safe(p, sizeof(char)); +} diff --git a/src/basic/mempool.c b/src/basic/mempool.c new file mode 100644 index 0000000..fff23fd --- /dev/null +++ b/src/basic/mempool.c @@ -0,0 +1,83 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include +#include + +#include "macro.h" +#include "memory-util.h" +#include "mempool.h" + +struct pool { + struct pool *next; + size_t n_tiles; + size_t n_used; +}; + +void* mempool_alloc_tile(struct mempool *mp) { + size_t i; + + /* When a tile is released we add it to the list and simply + * place the next pointer at its offset 0. */ + + assert(mp->tile_size >= sizeof(void*)); + assert(mp->at_least > 0); + + if (mp->freelist) { + void *r; + + r = mp->freelist; + mp->freelist = * (void**) mp->freelist; + return r; + } + + if (_unlikely_(!mp->first_pool) || + _unlikely_(mp->first_pool->n_used >= mp->first_pool->n_tiles)) { + size_t size, n; + struct pool *p; + + n = mp->first_pool ? mp->first_pool->n_tiles : 0; + n = MAX(mp->at_least, n * 2); + size = PAGE_ALIGN(ALIGN(sizeof(struct pool)) + n*mp->tile_size); + n = (size - ALIGN(sizeof(struct pool))) / mp->tile_size; + + p = malloc(size); + if (!p) + return NULL; + + p->next = mp->first_pool; + p->n_tiles = n; + p->n_used = 0; + + mp->first_pool = p; + } + + i = mp->first_pool->n_used++; + + return ((uint8_t*) mp->first_pool) + ALIGN(sizeof(struct pool)) + i*mp->tile_size; +} + +void* mempool_alloc0_tile(struct mempool *mp) { + void *p; + + p = mempool_alloc_tile(mp); + if (p) + memzero(p, mp->tile_size); + return p; +} + +void mempool_free_tile(struct mempool *mp, void *p) { + * (void**) p = mp->freelist; + mp->freelist = p; +} + +#if VALGRIND +void mempool_drop(struct mempool *mp) { + struct pool *p = mp->first_pool; + while (p) { + struct pool *n; + n = p->next; + free(p); + p = n; + } +} +#endif diff --git a/src/basic/mempool.h b/src/basic/mempool.h new file mode 100644 index 0000000..539ccbd --- /dev/null +++ b/src/basic/mempool.h @@ -0,0 +1,30 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +#include +#include + +struct pool; + +struct mempool { + struct pool *first_pool; + void *freelist; + size_t tile_size; + unsigned at_least; +}; + +void* mempool_alloc_tile(struct mempool *mp); +void* mempool_alloc0_tile(struct mempool *mp); +void mempool_free_tile(struct mempool *mp, void *p); + +#define DEFINE_MEMPOOL(pool_name, tile_type, alloc_at_least) \ +static struct mempool pool_name = { \ + .tile_size = sizeof(tile_type), \ + .at_least = alloc_at_least, \ +} + +__attribute__((weak)) bool mempool_enabled(void); + +#if VALGRIND +void mempool_drop(struct mempool *mp); +#endif diff --git a/src/basic/meson.build b/src/basic/meson.build new file mode 100644 index 0000000..bfe52d5 --- /dev/null +++ b/src/basic/meson.build @@ -0,0 +1,467 @@ +# SPDX-License-Identifier: LGPL-2.1-or-later + +basic_sources = files( + 'MurmurHash2.c', + 'MurmurHash2.h', + 'af-list.c', + 'af-list.h', + 'alloc-util.c', + 'alloc-util.h', + 'architecture.c', + 'architecture.h', + 'arphrd-util.c', + 'arphrd-util.h', + 'async.c', + 'async.h', + 'audit-util.c', + 'audit-util.h', + 'build.c', + 'build.h', + 'bus-label.c', + 'bus-label.h', + 'cap-list.c', + 'cap-list.h', + 'capability-util.c', + 'capability-util.h', + 'cgroup-util.c', + 'cgroup-util.h', + 'chase-symlinks.c', + 'chase-symlinks.h', + 'chattr-util.c', + 'chattr-util.h', + 'conf-files.c', + 'conf-files.h', + 'def.h', + 'devnum-util.c', + 'devnum-util.h', + 'dirent-util.c', + 'dirent-util.h', + 'dns-def.h', + 'efivars.c', + 'efivars.h', + 'env-file.c', + 'env-file.h', + 'env-util.c', + 'env-util.h', + 'errno-list.c', + 'errno-list.h', + 'errno-util.h', + 'escape.c', + 'escape.h', + 'ether-addr-util.c', + 'ether-addr-util.h', + 'extract-word.c', + 'extract-word.h', + 'fd-util.c', + 'fd-util.h', + 'fileio.c', + 'fileio.h', + 'filesystems.c', + 'filesystems.h', + 'format-util.c', + 'format-util.h', + 'fs-util.c', + 'fs-util.h', + 'glob-util.c', + 'glob-util.h', + 'glyph-util.c', + 'glyph-util.h', + 'gunicode.c', + 'gunicode.h', + 'hash-funcs.c', + 'hash-funcs.h', + 'hashmap.c', + 'hashmap.h', + 'hexdecoct.c', + 'hexdecoct.h', + 'hmac.c', + 'hmac.h', + 'hostname-util.c', + 'hostname-util.h', + 'in-addr-util.c', + 'in-addr-util.h', + 'inotify-util.c', + 'inotify-util.h', + 'io-util.c', + 'io-util.h', + 'ioprio-util.c', + 'ioprio-util.h', + 'limits-util.c', + 'limits-util.h', + 'linux/btrfs.h', + 'linux/btrfs_tree.h', + 'linux/can/netlink.h', + 'linux/can/vxcan.h', + 'linux/cfm_bridge.h', + 'linux/fib_rules.h', + 'linux/fou.h', + 'linux/genetlink.h', + 'linux/hdlc/ioctl.h', + 'linux/if.h', + 'linux/if_addr.h', + 'linux/if_bonding.h', + 'linux/if_bridge.h', + 'linux/if_ether.h', + 'linux/if_link.h', + 'linux/if_macsec.h', + 'linux/if_tun.h', + 'linux/if_tunnel.h', + 'linux/in.h', + 'linux/in6.h', + 'linux/ipv6_route.h', + 'linux/l2tp.h', + 'linux/libc-compat.h', + 'linux/mrp_bridge.h', + 'linux/netdevice.h', + 'linux/netfilter/nf_tables.h', + 'linux/netfilter/nfnetlink.h', + 'linux/netlink.h', + 'linux/nexthop.h', + 'linux/nl80211.h', + 'linux/pkt_sched.h', + 'linux/rtnetlink.h', + 'linux/wireguard.h', + 'list.h', + 'locale-util.c', + 'locale-util.h', + 'log.c', + 'log.h', + 'login-util.c', + 'login-util.h', + 'macro.h', + 'math-util.h', + 'memfd-util.c', + 'memfd-util.h', + 'memory-util.c', + 'memory-util.h', + 'mempool.c', + 'mempool.h', + 'missing_audit.h', + 'missing_capability.h', + 'missing_drm.h', + 'missing_fcntl.h', + 'missing_fs.h', + 'missing_input.h', + 'missing_ioprio.h', + 'missing_keyctl.h', + 'missing_magic.h', + 'missing_mman.h', + 'missing_mount.h', + 'missing_network.h', + 'missing_prctl.h', + 'missing_random.h', + 'missing_resource.h', + 'missing_sched.h', + 'missing_securebits.h', + 'missing_socket.h', + 'missing_stat.h', + 'missing_stdlib.h', + 'missing_syscall.h', + 'missing_timerfd.h', + 'missing_type.h', + 'mkdir.c', + 'mkdir.h', + 'mountpoint-util.c', + 'mountpoint-util.h', + 'namespace-util.c', + 'namespace-util.h', + 'nss-util.h', + 'nulstr-util.c', + 'nulstr-util.h', + 'ordered-set.c', + 'ordered-set.h', + 'os-util.c', + 'os-util.h', + 'parse-util.c', + 'parse-util.h', + 'path-lookup.c', + 'path-lookup.h', + 'path-util.c', + 'path-util.h', + 'percent-util.c', + 'percent-util.h', + 'prioq.c', + 'prioq.h', + 'proc-cmdline.c', + 'proc-cmdline.h', + 'process-util.c', + 'process-util.h', + 'procfs-util.c', + 'procfs-util.h', + 'pthread-util.h', + 'random-util.c', + 'random-util.h', + 'ratelimit.c', + 'ratelimit.h', + 'raw-clone.h', + 'raw-reboot.h', + 'recurse-dir.c', + 'recurse-dir.h', + 'replace-var.c', + 'replace-var.h', + 'rlimit-util.c', + 'rlimit-util.h', + 'set.h', + 'sigbus.c', + 'sigbus.h', + 'signal-util.c', + 'signal-util.h', + 'siphash24.c', + 'siphash24.h', + 'socket-util.c', + 'socket-util.h', + 'sort-util.c', + 'sort-util.h', + 'sparse-endian.h', + 'special.h', + 'stat-util.c', + 'stat-util.h', + 'static-destruct.h', + 'stdio-util.h', + 'strbuf.c', + 'strbuf.h', + 'string-table.c', + 'string-table.h', + 'string-util.c', + 'string-util.h', + 'strv.c', + 'strv.h', + 'strxcpyx.c', + 'strxcpyx.h', + 'sync-util.c', + 'sync-util.h', + 'sysctl-util.c', + 'sysctl-util.h', + 'syslog-util.c', + 'syslog-util.h', + 'terminal-util.c', + 'terminal-util.h', + 'time-util.c', + 'time-util.h', + 'tmpfile-util.c', + 'tmpfile-util.h', + 'uid-range.c', + 'uid-range.h', + 'umask-util.h', + 'unaligned.h', + 'unit-def.c', + 'unit-def.h', + 'unit-file.c', + 'unit-file.h', + 'unit-name.c', + 'unit-name.h', + 'user-util.c', + 'user-util.h', + 'utf8.c', + 'utf8.h', + 'util.c', + 'util.h', + 'virt.c', + 'virt.h', + 'xattr-util.c', + 'xattr-util.h') + +missing_audit_h = files('missing_audit.h') +missing_capability_h = files('missing_capability.h') +missing_socket_h = files('missing_socket.h') + +missing_syscall_def_h = files('missing_syscall_def.h') +basic_sources += missing_syscall_def_h + +generate_af_list = find_program('generate-af-list.sh') +af_list_txt = custom_target( + 'af-list.txt', + output : 'af-list.txt', + command : [generate_af_list, cpp, config_h, missing_socket_h], + capture : true) + +generate_arphrd_list = find_program('generate-arphrd-list.sh') +arphrd_list_txt = custom_target( + 'arphrd-list.txt', + output : 'arphrd-list.txt', + command : [generate_arphrd_list, cpp, config_h], + capture : true) + +generate_cap_list = find_program('generate-cap-list.sh') +cap_list_txt = custom_target( + 'cap-list.txt', + output : 'cap-list.txt', + command : [generate_cap_list, cpp, config_h, missing_capability_h], + capture : true) + +generate_errno_list = find_program('generate-errno-list.sh') +errno_list_txt = custom_target( + 'errno-list.txt', + output : 'errno-list.txt', + command : [generate_errno_list, cpp], + capture : true) + +generated_gperf_headers = [] +foreach item : [['af', af_list_txt, 'af', ''], + ['arphrd', arphrd_list_txt, 'arphrd', 'ARPHRD_'], + ['cap', cap_list_txt, 'capability', ''], + ['errno', errno_list_txt, 'errno', '']] + + fname = '@0@-from-name.gperf'.format(item[0]) + gperf_file = custom_target( + fname, + input : item[1], + output : fname, + command : [generate_gperfs, item[2], item[3], '@INPUT@'], + capture : true) + + fname = '@0@-from-name.h'.format(item[0]) + target1 = custom_target( + fname, + input : gperf_file, + output : fname, + command : [gperf, + '-L', 'ANSI-C', '-t', '--ignore-case', + '-N', 'lookup_@0@'.format(item[2]), + '-H', 'hash_@0@_name'.format(item[2]), + '-p', '-C', + '@INPUT@'], + capture : true) + + fname = '@0@-to-name.h'.format(item[0]) + awkscript = '@0@-to-name.awk'.format(item[0]) + target2 = custom_target( + fname, + input : [awkscript, item[1]], + output : fname, + command : [awk, '-f', '@INPUT0@', '@INPUT1@'], + capture : true) + + generated_gperf_headers += [target1, target2] +endforeach + +basic_sources += generated_gperf_headers + +############################################################ + +arch_list = [ + 'alpha', + 'arc', + 'arm', + 'arm64', + 'i386', + 'ia64', + 'loongarch64', + 'm68k', + 'mips64', + 'mips64n32', + 'mipso32', + 'parisc', + 'powerpc', + 'powerpc64', + 'riscv32', + 'riscv64', + 's390', + 's390x', + 'sparc', + 'x86_64' +] + +run_target( + 'update-syscall-tables', + command : [update_syscall_tables_sh, meson.current_source_dir()] + arch_list) + +syscall_list_txt = files('syscall-list.txt') + +syscall_lists = [] +foreach arch: arch_list + syscall_lists += files('syscalls-@0@.txt'.format(arch)) +endforeach + +missing_syscalls_py = find_program('missing_syscalls.py') + +run_target( + 'update-syscall-header', + command : [missing_syscalls_py, + missing_syscall_def_h, + syscall_lists]) + +############################################################ + +filesystem_includes = ['linux/magic.h', + 'linux/gfs2_ondisk.h'] + +check_filesystems = find_program('check-filesystems.sh') +r = run_command([check_filesystems, cpp, files('filesystems-gperf.gperf')] + filesystem_includes, check: false) +if r.returncode() != 0 + error('Unknown filesystems defined in kernel headers:\n\n' + r.stdout()) +endif + +filesystems_gperf_h = custom_target( + 'filesystems-gperf.h', + input : 'filesystems-gperf.gperf', + output : 'filesystems-gperf.h', + command : [gperf, '@INPUT@', '--output-file', '@OUTPUT@']) + +generate_filesystem_list = find_program('generate-filesystem-list.py') +fname = 'filesystem-list.h' +filesystem_list_h = custom_target( + fname, + input : 'filesystems-gperf.gperf', + output : fname, + command : [generate_filesystem_list, + '@INPUT@'], + capture : true) + +generate_filesystem_switch_case_h = find_program('generate-filesystem-switch-case.py') +fname = 'filesystem-switch-case.h' +filesystem_switch_case_h = custom_target( + fname, + input : 'filesystems-gperf.gperf', + output : 'filesystem-switch-case.h', + command : [generate_filesystem_switch_case_h, + '@INPUT@'], + capture : true) + +basic_sources += [filesystem_list_h, filesystem_switch_case_h, filesystems_gperf_h] + +libbasic = static_library( + 'basic', + basic_sources, + fundamental_sources, + include_directories : basic_includes, + dependencies : [versiondep, + threads, + libcap, + libm], + c_args : ['-fvisibility=default'], + build_by_default : false) + +############################################################ + +basic_gcrypt_sources = files( + 'gcrypt-util.c', + 'gcrypt-util.h') + +# A convenience library that is separate from libbasic to avoid +# unnecessary linking to libgcrypt. +libbasic_gcrypt = static_library( + 'basic-gcrypt', + basic_gcrypt_sources, + include_directories : basic_includes, + dependencies : [libgcrypt], + c_args : ['-fvisibility=default'], + build_by_default : false) + +############################################################ + +basic_compress_sources = files( + 'compress.c', + 'compress.h') + +# A convenience library that is separate from libbasic to avoid unnecessary +# linking to the compression libraries. +libbasic_compress = static_library( + 'basic-compress', + basic_compress_sources, + include_directories : basic_includes, + dependencies : [libxz, + libzstd, + liblz4], + c_args : ['-fvisibility=default'], + build_by_default : false) diff --git a/src/basic/missing_audit.h b/src/basic/missing_audit.h new file mode 100644 index 0000000..62e3c29 --- /dev/null +++ b/src/basic/missing_audit.h @@ -0,0 +1,24 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +#include + +#if HAVE_AUDIT +#include +#endif + +#ifndef AUDIT_SERVICE_START +#define AUDIT_SERVICE_START 1130 /* Service (daemon) start */ +#endif + +#ifndef AUDIT_SERVICE_STOP +#define AUDIT_SERVICE_STOP 1131 /* Service (daemon) stop */ +#endif + +#ifndef MAX_AUDIT_MESSAGE_LENGTH +#define MAX_AUDIT_MESSAGE_LENGTH 8970 +#endif + +#ifndef AUDIT_NLGRP_MAX +#define AUDIT_NLGRP_READLOG 1 +#endif diff --git a/src/basic/missing_capability.h b/src/basic/missing_capability.h new file mode 100644 index 0000000..5adda55 --- /dev/null +++ b/src/basic/missing_capability.h @@ -0,0 +1,39 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +#include + +/* 3a101b8de0d39403b2c7e5c23fd0b005668acf48 (3.16) */ +#ifndef CAP_AUDIT_READ +# define CAP_AUDIT_READ 37 +#endif + +/* 980737282232b752bb14dab96d77665c15889c36 (5.8) */ +#ifndef CAP_PERFMON +# define CAP_PERFMON 38 +#endif + +/* a17b53c4a4b55ec322c132b6670743612229ee9c (5.8) */ +#ifndef CAP_BPF +# define CAP_BPF 39 +#endif + +/* 124ea650d3072b005457faed69909221c2905a1f (5.9) */ +#ifndef CAP_CHECKPOINT_RESTORE +# define CAP_CHECKPOINT_RESTORE 40 +#endif + +#define SYSTEMD_CAP_LAST_CAP CAP_CHECKPOINT_RESTORE + +#ifdef CAP_LAST_CAP +# if CAP_LAST_CAP > SYSTEMD_CAP_LAST_CAP +# if BUILD_MODE_DEVELOPER && defined(TEST_CAPABILITY_C) +# warning "The capability list here is outdated" +# endif +# else +# undef CAP_LAST_CAP +# endif +#endif +#ifndef CAP_LAST_CAP +# define CAP_LAST_CAP SYSTEMD_CAP_LAST_CAP +#endif diff --git a/src/basic/missing_drm.h b/src/basic/missing_drm.h new file mode 100644 index 0000000..0dec591 --- /dev/null +++ b/src/basic/missing_drm.h @@ -0,0 +1,10 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +#ifndef DRM_IOCTL_SET_MASTER +#define DRM_IOCTL_SET_MASTER _IO('d', 0x1e) +#endif + +#ifndef DRM_IOCTL_DROP_MASTER +#define DRM_IOCTL_DROP_MASTER _IO('d', 0x1f) +#endif diff --git a/src/basic/missing_fcntl.h b/src/basic/missing_fcntl.h new file mode 100644 index 0000000..00937d2 --- /dev/null +++ b/src/basic/missing_fcntl.h @@ -0,0 +1,60 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +#include + +#ifndef F_LINUX_SPECIFIC_BASE +#define F_LINUX_SPECIFIC_BASE 1024 +#endif + +#ifndef F_SETPIPE_SZ +#define F_SETPIPE_SZ (F_LINUX_SPECIFIC_BASE + 7) +#endif + +#ifndef F_GETPIPE_SZ +#define F_GETPIPE_SZ (F_LINUX_SPECIFIC_BASE + 8) +#endif + +#ifndef F_ADD_SEALS +#define F_ADD_SEALS (F_LINUX_SPECIFIC_BASE + 9) +#define F_GET_SEALS (F_LINUX_SPECIFIC_BASE + 10) + +#define F_SEAL_SEAL 0x0001 /* prevent further seals from being set */ +#define F_SEAL_SHRINK 0x0002 /* prevent file from shrinking */ +#define F_SEAL_GROW 0x0004 /* prevent file from growing */ +#define F_SEAL_WRITE 0x0008 /* prevent writes */ +#endif + +#ifndef F_OFD_GETLK +#define F_OFD_GETLK 36 +#define F_OFD_SETLK 37 +#define F_OFD_SETLKW 38 +#endif + +#ifndef MAX_HANDLE_SZ +#define MAX_HANDLE_SZ 128 +#endif + +/* The precise definition of __O_TMPFILE is arch specific; use the + * values defined by the kernel (note: some are hexa, some are octal, + * duplicated as-is from the kernel definitions): + * - alpha, parisc, sparc: each has a specific value; + * - others: they use the "generic" value. + */ + +#ifndef __O_TMPFILE +#if defined(__alpha__) +#define __O_TMPFILE 0100000000 +#elif defined(__parisc__) || defined(__hppa__) +#define __O_TMPFILE 0400000000 +#elif defined(__sparc__) || defined(__sparc64__) +#define __O_TMPFILE 0x2000000 +#else +#define __O_TMPFILE 020000000 +#endif +#endif + +/* a horrid kludge trying to make sure that this will fail on old kernels */ +#ifndef O_TMPFILE +#define O_TMPFILE (__O_TMPFILE | O_DIRECTORY) +#endif diff --git a/src/basic/missing_fs.h b/src/basic/missing_fs.h new file mode 100644 index 0000000..6638d76 --- /dev/null +++ b/src/basic/missing_fs.h @@ -0,0 +1,71 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +/* linux/fs.h */ +#ifndef RENAME_NOREPLACE /* 0a7c3937a1f23f8cb5fc77ae01661e9968a51d0c (3.15) */ +#define RENAME_NOREPLACE (1 << 0) +#endif + +#ifndef BLKGETDISKSEQ +#define BLKGETDISKSEQ _IOR(0x12,128,__u64) +#endif + +/* linux/fs.h or sys/mount.h */ +#ifndef MS_MOVE +#define MS_MOVE 8192 +#endif + +#ifndef MS_REC +#define MS_REC 16384 +#endif + +#ifndef MS_PRIVATE +#define MS_PRIVATE (1<<18) +#endif + +#ifndef MS_SLAVE +#define MS_SLAVE (1<<19) +#endif + +#ifndef MS_SHARED +#define MS_SHARED (1<<20) +#endif + +#ifndef MS_RELATIME +#define MS_RELATIME (1<<21) +#endif + +#ifndef MS_KERNMOUNT +#define MS_KERNMOUNT (1<<22) +#endif + +#ifndef MS_I_VERSION +#define MS_I_VERSION (1<<23) +#endif + +#ifndef MS_STRICTATIME +#define MS_STRICTATIME (1<<24) +#endif + +#ifndef MS_LAZYTIME +#define MS_LAZYTIME (1<<25) +#endif + +/* Not exposed yet. Defined at fs/ext4/ext4.h */ +#ifndef EXT4_IOC_RESIZE_FS +#define EXT4_IOC_RESIZE_FS _IOW('f', 16, __u64) +#endif + +/* linux/nsfs.h */ +#ifndef NS_GET_NSTYPE /* d95fa3c76a66b6d76b1e109ea505c55e66360f3c (4.11) */ +#define NS_GET_NSTYPE _IO(0xb7, 0x3) +#endif + +#ifndef FS_PROJINHERIT_FL +#define FS_PROJINHERIT_FL 0x20000000 +#endif + +/* linux/fscrypt.h */ +#ifndef FS_KEY_DESCRIPTOR_SIZE +#define FS_KEY_DESCRIPTOR_SIZE 8 +#endif diff --git a/src/basic/missing_input.h b/src/basic/missing_input.h new file mode 100644 index 0000000..6cf16ff --- /dev/null +++ b/src/basic/missing_input.h @@ -0,0 +1,45 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +#include +#include + +/* linux@c7dc65737c9a607d3e6f8478659876074ad129b8 (3.12) */ +#ifndef EVIOCREVOKE +#define EVIOCREVOKE _IOW('E', 0x91, int) +#endif + +/* linux@06a16293f71927f756dcf37558a79c0b05a91641 (4.4) */ +#ifndef EVIOCSMASK +struct input_mask { + __u32 type; + __u32 codes_size; + __u64 codes_ptr; +}; + +#define EVIOCGMASK _IOR('E', 0x92, struct input_mask) +#define EVIOCSMASK _IOW('E', 0x93, struct input_mask) +#endif + +/* linux@7611392fe8ff95ecae528b01a815ae3d72ca6b95 (3.17) */ +#ifndef INPUT_PROP_POINTING_STICK +#define INPUT_PROP_POINTING_STICK 0x05 +#endif + +/* linux@500d4160abe9a2e88b12e319c13ae3ebd1e18108 (4.0) */ +#ifndef INPUT_PROP_ACCELEROMETER +#define INPUT_PROP_ACCELEROMETER 0x06 +#endif + +/* linux@d09bbfd2a8408a995419dff0d2ba906013cf4cc9 (3.11) */ +#ifndef BTN_DPAD_UP +#define BTN_DPAD_UP 0x220 +#define BTN_DPAD_DOWN 0x221 +#define BTN_DPAD_LEFT 0x222 +#define BTN_DPAD_RIGHT 0x223 +#endif + +/* linux@358f24704f2f016af7d504b357cdf32606091d07 (3.13) */ +#ifndef KEY_ALS_TOGGLE +#define KEY_ALS_TOGGLE 0x230 +#endif diff --git a/src/basic/missing_ioprio.h b/src/basic/missing_ioprio.h new file mode 100644 index 0000000..9cbd172 --- /dev/null +++ b/src/basic/missing_ioprio.h @@ -0,0 +1,59 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +#include + +/* Match values uses by the kernel internally, as no public header seems to exist. */ + +#ifndef IOPRIO_N_CLASSES +# define IOPRIO_N_CLASSES 8 +#endif + +#ifndef IOPRIO_BE_NR +# define IOPRIO_BE_NR 8 +#endif + +#ifndef IOPRIO_CLASS_NONE +# define IOPRIO_CLASS_NONE 0 +#endif +#ifndef IOPRIO_CLASS_RT +# define IOPRIO_CLASS_RT 1 +#endif +#ifndef IOPRIO_CLASS_BE +# define IOPRIO_CLASS_BE 2 +#endif +#ifndef IOPRIO_CLASS_IDLE +# define IOPRIO_CLASS_IDLE 3 +#endif + +#ifndef IOPRIO_WHO_PROCESS +# define IOPRIO_WHO_PROCESS 1 +#endif +#ifndef IOPRIO_WHO_PGRP +# define IOPRIO_WHO_PGRP 2 +#endif +#ifndef IOPRIO_WHO_USER +# define IOPRIO_WHO_USER 3 +#endif + +#ifndef IOPRIO_BITS +# define IOPRIO_BITS 16 +#endif +#ifndef IOPRIO_N_CLASSES +# define IOPRIO_N_CLASSES 8 +#endif +#ifndef IOPRIO_CLASS_SHIFT +# define IOPRIO_CLASS_SHIFT 13 +#endif + +static inline int ioprio_prio_class(int value) { + return value >> IOPRIO_CLASS_SHIFT; +} + +static inline int ioprio_prio_data(int value) { + return value & ((1 << IOPRIO_CLASS_SHIFT) - 1); +} + +static inline int ioprio_prio_value(int class, int data) { + return (class << IOPRIO_CLASS_SHIFT) | data; +} diff --git a/src/basic/missing_keyctl.h b/src/basic/missing_keyctl.h new file mode 100644 index 0000000..081003a --- /dev/null +++ b/src/basic/missing_keyctl.h @@ -0,0 +1,79 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +#include +#include + +#ifndef KEYCTL_JOIN_SESSION_KEYRING +#define KEYCTL_JOIN_SESSION_KEYRING 1 +#endif + +#ifndef KEYCTL_CHOWN +#define KEYCTL_CHOWN 4 +#endif + +#ifndef KEYCTL_SETPERM +#define KEYCTL_SETPERM 5 +#endif + +#ifndef KEYCTL_DESCRIBE +#define KEYCTL_DESCRIBE 6 +#endif + +#ifndef KEYCTL_LINK +#define KEYCTL_LINK 8 +#endif + +#ifndef KEYCTL_READ +#define KEYCTL_READ 11 +#endif + +#ifndef KEYCTL_SET_TIMEOUT +#define KEYCTL_SET_TIMEOUT 15 +#endif + +#ifndef KEY_SPEC_USER_KEYRING +#define KEY_SPEC_USER_KEYRING -4 +#endif + +#ifndef KEY_SPEC_SESSION_KEYRING +#define KEY_SPEC_SESSION_KEYRING -3 +#endif + +/* From linux/key.h */ +#ifndef KEY_POS_VIEW + +typedef int32_t key_serial_t; + +#define KEY_POS_VIEW 0x01000000 +#define KEY_POS_READ 0x02000000 +#define KEY_POS_WRITE 0x04000000 +#define KEY_POS_SEARCH 0x08000000 +#define KEY_POS_LINK 0x10000000 +#define KEY_POS_SETATTR 0x20000000 +#define KEY_POS_ALL 0x3f000000 + +#define KEY_USR_VIEW 0x00010000 +#define KEY_USR_READ 0x00020000 +#define KEY_USR_WRITE 0x00040000 +#define KEY_USR_SEARCH 0x00080000 +#define KEY_USR_LINK 0x00100000 +#define KEY_USR_SETATTR 0x00200000 +#define KEY_USR_ALL 0x003f0000 + +#define KEY_GRP_VIEW 0x00000100 +#define KEY_GRP_READ 0x00000200 +#define KEY_GRP_WRITE 0x00000400 +#define KEY_GRP_SEARCH 0x00000800 +#define KEY_GRP_LINK 0x00001000 +#define KEY_GRP_SETATTR 0x00002000 +#define KEY_GRP_ALL 0x00003f00 + +#define KEY_OTH_VIEW 0x00000001 +#define KEY_OTH_READ 0x00000002 +#define KEY_OTH_WRITE 0x00000004 +#define KEY_OTH_SEARCH 0x00000008 +#define KEY_OTH_LINK 0x00000010 +#define KEY_OTH_SETATTR 0x00000020 +#define KEY_OTH_ALL 0x0000003f +#endif diff --git a/src/basic/missing_loop.h b/src/basic/missing_loop.h new file mode 100644 index 0000000..7141544 --- /dev/null +++ b/src/basic/missing_loop.h @@ -0,0 +1,24 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +#include + +#ifndef LOOP_CONFIGURE +struct loop_config { + __u32 fd; + __u32 block_size; + struct loop_info64 info; + __u64 __reserved[8]; +}; + +#define LOOP_CONFIGURE 0x4C0A +#endif + +#ifndef LO_FLAGS_DIRECT_IO +#define LO_FLAGS_DIRECT_IO 16 +#define LOOP_SET_DIRECT_IO 0x4C08 +#endif + +#ifndef LOOP_SET_STATUS_SETTABLE_FLAGS +#define LOOP_SET_STATUS_SETTABLE_FLAGS (LO_FLAGS_AUTOCLEAR | LO_FLAGS_PARTSCAN | LO_FLAGS_DIRECT_IO) +#endif diff --git a/src/basic/missing_magic.h b/src/basic/missing_magic.h new file mode 100644 index 0000000..c104fcf --- /dev/null +++ b/src/basic/missing_magic.h @@ -0,0 +1,194 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +#include + +/* 62aa81d7c4c24b90fdb61da70ac0dbbc414f9939 (4.13) */ +#ifndef OCFS2_SUPER_MAGIC +#define OCFS2_SUPER_MAGIC 0x7461636f +#endif + +/* 67e9c74b8a873408c27ac9a8e4c1d1c8d72c93ff (4.5) */ +#ifndef CGROUP2_SUPER_MAGIC +#define CGROUP2_SUPER_MAGIC 0x63677270 +#endif + +/* 4282d60689d4f21b40692029080440cc58e8a17d (4.1) */ +#ifndef TRACEFS_MAGIC +#define TRACEFS_MAGIC 0x74726163 +#endif + +/* e149ed2b805fefdccf7ccdfc19eca22fdd4514ac (3.19) */ +#ifndef NSFS_MAGIC +#define NSFS_MAGIC 0x6e736673 +#endif + +/* b2197755b2633e164a439682fb05a9b5ea48f706 (4.4) */ +#ifndef BPF_FS_MAGIC +#define BPF_FS_MAGIC 0xcafe4a11 +#endif + +/* Not exposed yet (4.20). Defined at ipc/mqueue.c */ +#ifndef MQUEUE_MAGIC +#define MQUEUE_MAGIC 0x19800202 +#endif + +/* Not exposed yet (as of Linux 5.4). Defined in fs/xfs/libxfs/xfs_format.h */ +#ifndef XFS_SB_MAGIC +#define XFS_SB_MAGIC 0x58465342 +#endif + +/* dea2903719283c156b53741126228c4a1b40440f (5.17) */ +#ifndef CIFS_SUPER_MAGIC +#define CIFS_SUPER_MAGIC 0xFF534D42 +#endif + +/* dea2903719283c156b53741126228c4a1b40440f (5.17) */ +#ifndef SMB2_SUPER_MAGIC +#define SMB2_SUPER_MAGIC 0xFE534D42 +#endif + +/* 257f871993474e2bde6c497b54022c362cf398e1 (4.5) */ +#ifndef OVERLAYFS_SUPER_MAGIC +#define OVERLAYFS_SUPER_MAGIC 0x794c7630 +#endif + +/* 2a28900be20640fcd1e548b1e3bad79e8221fcf9 (4.7) */ +#ifndef UDF_SUPER_MAGIC +#define UDF_SUPER_MAGIC 0x15013346 +#endif + +/* b1123ea6d3b3da25af5c8a9d843bd07ab63213f4 (4.8)*/ +#ifndef BALLOON_KVM_MAGIC +#define BALLOON_KVM_MAGIC 0x13661366 +#endif + +/* 48b4800a1c6af2cdda344ea4e2c843dcc1f6afc9 (4.8) */ +#ifndef ZSMALLOC_MAGIC +#define ZSMALLOC_MAGIC 0x58295829 +#endif + +/* 3bc52c45bac26bf7ed1dc8d287ad1aeaed1250b6 (4.9) */ +#ifndef DAXFS_MAGIC +#define DAXFS_MAGIC 0x64646178 +#endif + +/* 5ff193fbde20df5d80fec367cea3e7856c057320 (4.10) */ +#ifndef RDTGROUP_SUPER_MAGIC +#define RDTGROUP_SUPER_MAGIC 0x7655821 +#endif + +/* a481f4d917835cad86701fc0d1e620c74bb5cd5f (4.13) */ +#ifndef AAFS_MAGIC +#define AAFS_MAGIC 0x5a3c69f0 +#endif + +/* f044c8847bb61eff5e1e95b6f6bb950e7f4a73a4 (4.15) */ +#ifndef AFS_FS_MAGIC +#define AFS_FS_MAGIC 0x6b414653 +#endif + +/* dddde68b8f06dd83486124b8d245e7bfb15c185d (4.20) */ +#ifndef XFS_SUPER_MAGIC +#define XFS_SUPER_MAGIC 0x58465342 +#endif + +/* 3ad20fe393b31025bebfc2d76964561f65df48aa (5.0) */ +#ifndef BINDERFS_SUPER_MAGIC +#define BINDERFS_SUPER_MAGIC 0x6c6f6f70 +#endif + +/* ed63bb1d1f8469586006a9ca63c42344401aa2ab (5.3) */ +#ifndef DMA_BUF_MAGIC +#define DMA_BUF_MAGIC 0x444d4142 +#endif + +/* ea8157ab2ae5e914dd427e5cfab533b6da3819cd (5.3) */ +#ifndef Z3FOLD_MAGIC +#define Z3FOLD_MAGIC 0x33 +#endif + +/* 47e4937a4a7ca4184fd282791dfee76c6799966a (5.4) */ +#ifndef EROFS_SUPER_MAGIC_V1 +#define EROFS_SUPER_MAGIC_V1 0xe0f5e1e2 +#endif + +/* fe030c9b85e6783bc52fe86449c0a4b8aa16c753 (5.5) */ +#ifndef PPC_CMM_MAGIC +#define PPC_CMM_MAGIC 0xc7571590 +#endif + +/* 8dcc1a9d90c10fa4143e5c17821082e5e60e46a1 (5.6) */ +#ifndef ZONEFS_MAGIC +#define ZONEFS_MAGIC 0x5a4f4653 +#endif + +/* 3234ac664a870e6ea69ae3a57d824cd7edbeacc5 (5.8) */ +#ifndef DEVMEM_MAGIC +#define DEVMEM_MAGIC 0x454d444d +#endif + +/* Not in mainline but included in Ubuntu */ +#ifndef SHIFTFS_MAGIC +#define SHIFTFS_MAGIC 0x6a656a62 +#endif + +/* 1507f51255c9ff07d75909a84e7c0d7f3c4b2f49 (5.14) */ +#ifndef SECRETMEM_MAGIC +#define SECRETMEM_MAGIC 0x5345434d +#endif + +/* Not exposed yet. Defined at fs/fuse/inode.c */ +#ifndef FUSE_SUPER_MAGIC +#define FUSE_SUPER_MAGIC 0x65735546 +#endif + +/* Not exposed yet. Defined at fs/fuse/control.c */ +#ifndef FUSE_CTL_SUPER_MAGIC +#define FUSE_CTL_SUPER_MAGIC 0x65735543 +#endif + +/* Not exposed yet. Defined at fs/ceph/super.h */ +#ifndef CEPH_SUPER_MAGIC +#define CEPH_SUPER_MAGIC 0x00c36400 +#endif + +/* Not exposed yet. Defined at fs/orangefs/orangefs-kernel.h */ +#ifndef ORANGEFS_DEVREQ_MAGIC +#define ORANGEFS_DEVREQ_MAGIC 0x20030529 +#endif + +/* linux/gfs2_ondisk.h */ +#ifndef GFS2_MAGIC +#define GFS2_MAGIC 0x01161970 +#endif + +/* Not exposed yet. Defined at fs/configfs/mount.c */ +#ifndef CONFIGFS_MAGIC +#define CONFIGFS_MAGIC 0x62656570 +#endif + +/* Not exposed yet. Defined at fs/vboxsf/super.c */ +#ifndef VBOXSF_SUPER_MAGIC +#define VBOXSF_SUPER_MAGIC 0x786f4256 +#endif + +/* Not exposed yet. Defined at fs/exfat/exfat_fs.h */ +#ifndef EXFAT_SUPER_MAGIC +#define EXFAT_SUPER_MAGIC 0x2011BAB0UL +#endif + +/* Not exposed yet, internally actually called RPCAUTH_GSSMAGIC. Defined in net/sunrpc/rpc_pipe.c */ +#ifndef RPC_PIPEFS_SUPER_MAGIC +#define RPC_PIPEFS_SUPER_MAGIC 0x67596969 +#endif + +/* Not exposed yet, defined at fs/ntfs/ntfs.h */ +#ifndef NTFS_SB_MAGIC +#define NTFS_SB_MAGIC 0x5346544e +#endif + +/* Not exposed yet, encoded literally in fs/ntfs3/super.c. */ +#ifndef NTFS3_SUPER_MAGIC +#define NTFS3_SUPER_MAGIC 0x7366746e +#endif diff --git a/src/basic/missing_mman.h b/src/basic/missing_mman.h new file mode 100644 index 0000000..4a10912 --- /dev/null +++ b/src/basic/missing_mman.h @@ -0,0 +1,12 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +#include + +#ifndef MFD_ALLOW_SEALING +#define MFD_ALLOW_SEALING 0x0002U +#endif + +#ifndef MFD_CLOEXEC +#define MFD_CLOEXEC 0x0001U +#endif diff --git a/src/basic/missing_mount.h b/src/basic/missing_mount.h new file mode 100644 index 0000000..69b0bcf --- /dev/null +++ b/src/basic/missing_mount.h @@ -0,0 +1,9 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +#include + +/* dab741e0e02bd3c4f5e2e97be74b39df2523fc6e (5.10) */ +#ifndef MS_NOSYMFOLLOW +#define MS_NOSYMFOLLOW 256 +#endif diff --git a/src/basic/missing_network.h b/src/basic/missing_network.h new file mode 100644 index 0000000..776c7c8 --- /dev/null +++ b/src/basic/missing_network.h @@ -0,0 +1,83 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +/* linux/in6.h or netinet/in.h */ +#ifndef IPV6_UNICAST_IF +#define IPV6_UNICAST_IF 76 +#endif + +/* linux/in6.h or netinet/in.h */ +#ifndef IPV6_TRANSPARENT +#define IPV6_TRANSPARENT 75 +#endif + +/* Not exposed but defined at include/net/ip.h */ +#ifndef IPV4_MIN_MTU +#define IPV4_MIN_MTU 68 +#endif + +/* linux/ipv6.h */ +#ifndef IPV6_MIN_MTU +#define IPV6_MIN_MTU 1280 +#endif + +/* Note that LOOPBACK_IFINDEX is currently not exposed by the + * kernel/glibc, but hardcoded internally by the kernel. However, as + * it is exported to userspace indirectly via rtnetlink and the + * ioctls, and made use of widely we define it here too, in a way that + * is compatible with the kernel's internal definition. */ +#ifndef LOOPBACK_IFINDEX +#define LOOPBACK_IFINDEX 1 +#endif + +/* Not exposed yet. Similar values are defined in net/ethernet.h */ +#ifndef ETHERTYPE_LLDP +#define ETHERTYPE_LLDP 0x88cc +#endif + +/* Not exposed but defined in linux/netdevice.h */ +#ifndef MAX_PHYS_ITEM_ID_LEN +#define MAX_PHYS_ITEM_ID_LEN 32 +#endif + +/* Not exposed but defined in include/net/bonding.h */ +#ifndef BOND_MAX_ARP_TARGETS +#define BOND_MAX_ARP_TARGETS 16 +#endif + +/* Not exposed but defined in include/linux/ieee80211.h */ +#ifndef IEEE80211_MAX_SSID_LEN +#define IEEE80211_MAX_SSID_LEN 32 +#endif + +/* Not exposed but defined in include/net/netlabel.h */ +#ifndef NETLBL_NLTYPE_UNLABELED_NAME +#define NETLBL_NLTYPE_UNLABELED_NAME "NLBL_UNLBL" +#endif + +/* Not exposed but defined in net/netlabel/netlabel_unlabeled.h */ +enum { + NLBL_UNLABEL_C_UNSPEC, + NLBL_UNLABEL_C_ACCEPT, + NLBL_UNLABEL_C_LIST, + NLBL_UNLABEL_C_STATICADD, + NLBL_UNLABEL_C_STATICREMOVE, + NLBL_UNLABEL_C_STATICLIST, + NLBL_UNLABEL_C_STATICADDDEF, + NLBL_UNLABEL_C_STATICREMOVEDEF, + NLBL_UNLABEL_C_STATICLISTDEF, + __NLBL_UNLABEL_C_MAX, +}; + +/* Not exposed but defined in net/netlabel/netlabel_unlabeled.h */ +enum { + NLBL_UNLABEL_A_UNSPEC, + NLBL_UNLABEL_A_ACPTFLG, + NLBL_UNLABEL_A_IPV6ADDR, + NLBL_UNLABEL_A_IPV6MASK, + NLBL_UNLABEL_A_IPV4ADDR, + NLBL_UNLABEL_A_IPV4MASK, + NLBL_UNLABEL_A_IFACE, + NLBL_UNLABEL_A_SECCTX, + __NLBL_UNLABEL_A_MAX, +}; diff --git a/src/basic/missing_prctl.h b/src/basic/missing_prctl.h new file mode 100644 index 0000000..ab85130 --- /dev/null +++ b/src/basic/missing_prctl.h @@ -0,0 +1,14 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +#include + +/* 58319057b7847667f0c9585b9de0e8932b0fdb08 (4.3) */ +#ifndef PR_CAP_AMBIENT +#define PR_CAP_AMBIENT 47 + +#define PR_CAP_AMBIENT_IS_SET 1 +#define PR_CAP_AMBIENT_RAISE 2 +#define PR_CAP_AMBIENT_LOWER 3 +#define PR_CAP_AMBIENT_CLEAR_ALL 4 +#endif diff --git a/src/basic/missing_random.h b/src/basic/missing_random.h new file mode 100644 index 0000000..443b913 --- /dev/null +++ b/src/basic/missing_random.h @@ -0,0 +1,20 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +#if USE_SYS_RANDOM_H +# include +#else +# include +#endif + +#ifndef GRND_NONBLOCK +#define GRND_NONBLOCK 0x0001 +#endif + +#ifndef GRND_RANDOM +#define GRND_RANDOM 0x0002 +#endif + +#ifndef GRND_INSECURE +#define GRND_INSECURE 0x0004 +#endif diff --git a/src/basic/missing_resource.h b/src/basic/missing_resource.h new file mode 100644 index 0000000..6e76765 --- /dev/null +++ b/src/basic/missing_resource.h @@ -0,0 +1,11 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +#include + +#ifndef RLIMIT_RTTIME +#define RLIMIT_RTTIME 15 +#endif + +/* If RLIMIT_RTTIME is not defined, then we cannot use RLIMIT_NLIMITS as is */ +#define _RLIMIT_MAX (RLIMIT_RTTIME+1 > RLIMIT_NLIMITS ? RLIMIT_RTTIME+1 : RLIMIT_NLIMITS) diff --git a/src/basic/missing_sched.h b/src/basic/missing_sched.h new file mode 100644 index 0000000..bcd5b77 --- /dev/null +++ b/src/basic/missing_sched.h @@ -0,0 +1,26 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +#include + +#ifndef CLONE_NEWCGROUP +#define CLONE_NEWCGROUP 0x02000000 +#endif + +/* 769071ac9f20b6a447410c7eaa55d1a5233ef40c (5.8) */ +#ifndef CLONE_NEWTIME +#define CLONE_NEWTIME 0x00000080 +#endif + +/* Not exposed yet. Defined at include/linux/sched.h */ +#ifndef PF_KTHREAD +#define PF_KTHREAD 0x00200000 +#endif + +/* The maximum thread/process name length including trailing NUL byte. This mimics the kernel definition of the same + * name, which we need in userspace at various places but is not defined in userspace currently, neither under this + * name nor any other. */ +/* Not exposed yet. Defined at include/linux/sched.h */ +#ifndef TASK_COMM_LEN +#define TASK_COMM_LEN 16 +#endif diff --git a/src/basic/missing_securebits.h b/src/basic/missing_securebits.h new file mode 100644 index 0000000..03fad6f --- /dev/null +++ b/src/basic/missing_securebits.h @@ -0,0 +1,18 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +#include + +/* 746bf6d64275be0c65b0631d8a72b16f1454cfa1 (4.3) */ +#ifndef SECURE_NO_CAP_AMBIENT_RAISE +#define SECURE_NO_CAP_AMBIENT_RAISE 6 +#define SECURE_NO_CAP_AMBIENT_RAISE_LOCKED 7 /* make bit-6 immutable */ +#define SECBIT_NO_CAP_AMBIENT_RAISE (issecure_mask(SECURE_NO_CAP_AMBIENT_RAISE)) +#define SECBIT_NO_CAP_AMBIENT_RAISE_LOCKED (issecure_mask(SECURE_NO_CAP_AMBIENT_RAISE_LOCKED)) + +#undef SECURE_ALL_BITS +#define SECURE_ALL_BITS (issecure_mask(SECURE_NOROOT) | \ + issecure_mask(SECURE_NO_SETUID_FIXUP) | \ + issecure_mask(SECURE_KEEP_CAPS) | \ + issecure_mask(SECURE_NO_CAP_AMBIENT_RAISE)) +#endif diff --git a/src/basic/missing_socket.h b/src/basic/missing_socket.h new file mode 100644 index 0000000..30ac297 --- /dev/null +++ b/src/basic/missing_socket.h @@ -0,0 +1,81 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +#include + +#if HAVE_LINUX_VM_SOCKETS_H +#include +#else +#define VMADDR_CID_ANY -1U +struct sockaddr_vm { + unsigned short svm_family; + unsigned short svm_reserved1; + unsigned int svm_port; + unsigned int svm_cid; + unsigned char svm_zero[sizeof(struct sockaddr) - + sizeof(unsigned short) - + sizeof(unsigned short) - + sizeof(unsigned int) - + sizeof(unsigned int)]; +}; +#endif /* !HAVE_LINUX_VM_SOCKETS_H */ + +#ifndef AF_VSOCK +#define AF_VSOCK 40 +#endif + +#ifndef SO_REUSEPORT +#define SO_REUSEPORT 15 +#endif + +#ifndef SO_PEERGROUPS +#define SO_PEERGROUPS 59 +#endif + +#ifndef SO_BINDTOIFINDEX +#define SO_BINDTOIFINDEX 62 +#endif + +#ifndef SOL_NETLINK +#define SOL_NETLINK 270 +#endif + +#ifndef SOL_ALG +#define SOL_ALG 279 +#endif + +/* Not exposed yet. Defined in include/linux/socket.h. */ +#ifndef SOL_SCTP +#define SOL_SCTP 132 +#endif + +/* Not exposed yet. Defined in include/linux/socket.h */ +#ifndef SCM_SECURITY +#define SCM_SECURITY 0x03 +#endif + +/* netinet/in.h */ +#ifndef IP_FREEBIND +#define IP_FREEBIND 15 +#endif + +#ifndef IP_TRANSPARENT +#define IP_TRANSPARENT 19 +#endif + +#ifndef IPV6_FREEBIND +#define IPV6_FREEBIND 78 +#endif + +#ifndef IP_RECVFRAGSIZE +#define IP_RECVFRAGSIZE 25 +#endif + +#ifndef IPV6_RECVFRAGSIZE +#define IPV6_RECVFRAGSIZE 77 +#endif + +/* linux/sockios.h */ +#ifndef SIOCGSKNS +#define SIOCGSKNS 0x894C +#endif diff --git a/src/basic/missing_stat.h b/src/basic/missing_stat.h new file mode 100644 index 0000000..372fdf9 --- /dev/null +++ b/src/basic/missing_stat.h @@ -0,0 +1,135 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +#include +#include + +#if WANT_LINUX_STAT_H +#include +#endif + +/* Thew newest definition we are aware of (fa2fcf4f1df1559a0a4ee0f46915b496cc2ebf60; 5.8) */ +#define STATX_DEFINITION { \ + __u32 stx_mask; \ + __u32 stx_blksize; \ + __u64 stx_attributes; \ + __u32 stx_nlink; \ + __u32 stx_uid; \ + __u32 stx_gid; \ + __u16 stx_mode; \ + __u16 __spare0[1]; \ + __u64 stx_ino; \ + __u64 stx_size; \ + __u64 stx_blocks; \ + __u64 stx_attributes_mask; \ + struct statx_timestamp stx_atime; \ + struct statx_timestamp stx_btime; \ + struct statx_timestamp stx_ctime; \ + struct statx_timestamp stx_mtime; \ + __u32 stx_rdev_major; \ + __u32 stx_rdev_minor; \ + __u32 stx_dev_major; \ + __u32 stx_dev_minor; \ + __u64 stx_mnt_id; \ + __u64 __spare2; \ + __u64 __spare3[12]; \ +} + +#if !HAVE_STRUCT_STATX +struct statx_timestamp { + __s64 tv_sec; + __u32 tv_nsec; + __s32 __reserved; +}; + +struct statx STATX_DEFINITION; +#endif + +/* Always define the newest version we are aware of as a distinct type, so that we can use it even if glibc + * defines an older definition */ +struct new_statx STATX_DEFINITION; + +/* a528d35e8bfcc521d7cb70aaf03e1bd296c8493f (4.11) */ +#ifndef AT_STATX_SYNC_AS_STAT +#define AT_STATX_SYNC_AS_STAT 0x0000 +#endif + +/* a528d35e8bfcc521d7cb70aaf03e1bd296c8493f (4.11) */ +#ifndef AT_STATX_FORCE_SYNC +#define AT_STATX_FORCE_SYNC 0x2000 +#endif + +/* a528d35e8bfcc521d7cb70aaf03e1bd296c8493f (4.11) */ +#ifndef AT_STATX_DONT_SYNC +#define AT_STATX_DONT_SYNC 0x4000 +#endif + +/* a528d35e8bfcc521d7cb70aaf03e1bd296c8493f (4.11) */ +#ifndef STATX_TYPE +#define STATX_TYPE 0x00000001U +#endif + +/* a528d35e8bfcc521d7cb70aaf03e1bd296c8493f (4.11) */ +#ifndef STATX_MODE +#define STATX_MODE 0x00000002U +#endif + +/* a528d35e8bfcc521d7cb70aaf03e1bd296c8493f (4.11) */ +#ifndef STATX_NLINK +#define STATX_NLINK 0x00000004U +#endif + +/* a528d35e8bfcc521d7cb70aaf03e1bd296c8493f (4.11) */ +#ifndef STATX_UID +#define STATX_UID 0x00000008U +#endif + +/* a528d35e8bfcc521d7cb70aaf03e1bd296c8493f (4.11) */ +#ifndef STATX_GID +#define STATX_GID 0x00000010U +#endif + +/* a528d35e8bfcc521d7cb70aaf03e1bd296c8493f (4.11) */ +#ifndef STATX_ATIME +#define STATX_ATIME 0x00000020U +#endif + +/* a528d35e8bfcc521d7cb70aaf03e1bd296c8493f (4.11) */ +#ifndef STATX_MTIME +#define STATX_MTIME 0x00000040U +#endif + +/* a528d35e8bfcc521d7cb70aaf03e1bd296c8493f (4.11) */ +#ifndef STATX_CTIME +#define STATX_CTIME 0x00000080U +#endif + +/* a528d35e8bfcc521d7cb70aaf03e1bd296c8493f (4.11) */ +#ifndef STATX_INO +#define STATX_INO 0x00000100U +#endif + +/* a528d35e8bfcc521d7cb70aaf03e1bd296c8493f (4.11) */ +#ifndef STATX_SIZE +#define STATX_SIZE 0x00000200U +#endif + +/* a528d35e8bfcc521d7cb70aaf03e1bd296c8493f (4.11) */ +#ifndef STATX_BLOCKS +#define STATX_BLOCKS 0x00000400U +#endif + +/* a528d35e8bfcc521d7cb70aaf03e1bd296c8493f (4.11) */ +#ifndef STATX_BTIME +#define STATX_BTIME 0x00000800U +#endif + +/* fa2fcf4f1df1559a0a4ee0f46915b496cc2ebf60 (5.8) */ +#ifndef STATX_MNT_ID +#define STATX_MNT_ID 0x00001000U +#endif + +/* 80340fe3605c0e78cfe496c3b3878be828cfdbfe (5.8) */ +#ifndef STATX_ATTR_MOUNT_ROOT +#define STATX_ATTR_MOUNT_ROOT 0x00002000 /* Root of a mount */ +#endif diff --git a/src/basic/missing_stdlib.h b/src/basic/missing_stdlib.h new file mode 100644 index 0000000..8c76f93 --- /dev/null +++ b/src/basic/missing_stdlib.h @@ -0,0 +1,13 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +#include + +/* stdlib.h */ +#if !HAVE_SECURE_GETENV +# if HAVE___SECURE_GETENV +# define secure_getenv __secure_getenv +# else +# error "neither secure_getenv nor __secure_getenv are available" +# endif +#endif diff --git a/src/basic/missing_syscall.h b/src/basic/missing_syscall.h new file mode 100644 index 0000000..d54e59f --- /dev/null +++ b/src/basic/missing_syscall.h @@ -0,0 +1,648 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +/* Missing glibc definitions to access certain kernel APIs */ + +#include +#include +#if HAVE_LINUX_TIME_TYPES_H +/* This header defines __kernel_timespec for us, but is only available since Linux 5.1, hence conditionally + * include this. */ +#include +#endif +#include +#include +#include +#include +#include + +#ifdef ARCH_MIPS +#include +#endif + +#include "macro.h" +#include "missing_keyctl.h" +#include "missing_stat.h" +#include "missing_syscall_def.h" + +/* linux/kcmp.h */ +#ifndef KCMP_FILE /* 3f4994cfc15f38a3159c6e3a4b3ab2e1481a6b02 (3.19) */ +#define KCMP_FILE 0 +#endif + +/* ======================================================================= */ + +#if !HAVE_PIVOT_ROOT +static inline int missing_pivot_root(const char *new_root, const char *put_old) { + return syscall(__NR_pivot_root, new_root, put_old); +} + +# define pivot_root missing_pivot_root +#endif + +/* ======================================================================= */ + +#if !HAVE_IOPRIO_GET +static inline int missing_ioprio_get(int which, int who) { + return syscall(__NR_ioprio_get, which, who); +} + +# define ioprio_get missing_ioprio_get +#endif + +/* ======================================================================= */ + +#if !HAVE_IOPRIO_SET +static inline int missing_ioprio_set(int which, int who, int ioprio) { + return syscall(__NR_ioprio_set, which, who, ioprio); +} + +# define ioprio_set missing_ioprio_set +#endif + +/* ======================================================================= */ + +#if !HAVE_MEMFD_CREATE +static inline int missing_memfd_create(const char *name, unsigned int flags) { +# ifdef __NR_memfd_create + return syscall(__NR_memfd_create, name, flags); +# else + errno = ENOSYS; + return -1; +# endif +} + +# define memfd_create missing_memfd_create +#endif + +/* ======================================================================= */ + +#if !HAVE_GETRANDOM +/* glibc says getrandom() returns ssize_t */ +static inline ssize_t missing_getrandom(void *buffer, size_t count, unsigned flags) { +# ifdef __NR_getrandom + return syscall(__NR_getrandom, buffer, count, flags); +# else + errno = ENOSYS; + return -1; +# endif +} + +# define getrandom missing_getrandom +#endif + +/* ======================================================================= */ + +/* The syscall has been defined since forever, but the glibc wrapper was missing. */ +#if !HAVE_GETTID +static inline pid_t missing_gettid(void) { +# if defined __NR_gettid && __NR_gettid >= 0 + return (pid_t) syscall(__NR_gettid); +# else +# error "__NR_gettid not defined" +# endif +} + +# define gettid missing_gettid +#endif + +/* ======================================================================= */ + +#if !HAVE_NAME_TO_HANDLE_AT +struct file_handle { + unsigned int handle_bytes; + int handle_type; + unsigned char f_handle[0]; +}; + +static inline int missing_name_to_handle_at(int fd, const char *name, struct file_handle *handle, int *mnt_id, int flags) { +# ifdef __NR_name_to_handle_at + return syscall(__NR_name_to_handle_at, fd, name, handle, mnt_id, flags); +# else + errno = ENOSYS; + return -1; +# endif +} + +# define name_to_handle_at missing_name_to_handle_at +#endif + +/* ======================================================================= */ + +#if !HAVE_SETNS +static inline int missing_setns(int fd, int nstype) { +# ifdef __NR_setns + return syscall(__NR_setns, fd, nstype); +# else + errno = ENOSYS; + return -1; +# endif +} + +# define setns missing_setns +#endif + +/* ======================================================================= */ + +static inline pid_t raw_getpid(void) { +#if defined(__alpha__) + return (pid_t) syscall(__NR_getxpid); +#else + return (pid_t) syscall(__NR_getpid); +#endif +} + +/* ======================================================================= */ + +#if !HAVE_RENAMEAT2 +static inline int missing_renameat2(int oldfd, const char *oldname, int newfd, const char *newname, unsigned flags) { +# ifdef __NR_renameat2 + return syscall(__NR_renameat2, oldfd, oldname, newfd, newname, flags); +# else + errno = ENOSYS; + return -1; +# endif +} + +# define renameat2 missing_renameat2 +#endif + +/* ======================================================================= */ + +#if !HAVE_KCMP +static inline int missing_kcmp(pid_t pid1, pid_t pid2, int type, unsigned long idx1, unsigned long idx2) { +# if defined __NR_kcmp && __NR_kcmp >= 0 + return syscall(__NR_kcmp, pid1, pid2, type, idx1, idx2); +# else + errno = ENOSYS; + return -1; +# endif +} + +# define kcmp missing_kcmp +#endif + +/* ======================================================================= */ + +#if !HAVE_KEYCTL +static inline long missing_keyctl(int cmd, unsigned long arg2, unsigned long arg3, unsigned long arg4, unsigned long arg5) { +# if defined __NR_keyctl && __NR_keyctl >= 0 + return syscall(__NR_keyctl, cmd, arg2, arg3, arg4, arg5); +# else + errno = ENOSYS; + return -1; +# endif + +# define keyctl missing_keyctl +} + +static inline key_serial_t missing_add_key(const char *type, const char *description, const void *payload, size_t plen, key_serial_t ringid) { +# if defined __NR_add_key && __NR_add_key >= 0 + return syscall(__NR_add_key, type, description, payload, plen, ringid); +# else + errno = ENOSYS; + return -1; +# endif + +# define add_key missing_add_key +} + +static inline key_serial_t missing_request_key(const char *type, const char *description, const char * callout_info, key_serial_t destringid) { +# if defined __NR_request_key && __NR_request_key >= 0 + return syscall(__NR_request_key, type, description, callout_info, destringid); +# else + errno = ENOSYS; + return -1; +# endif + +# define request_key missing_request_key +} +#endif + +/* ======================================================================= */ + +#if !HAVE_COPY_FILE_RANGE +static inline ssize_t missing_copy_file_range(int fd_in, loff_t *off_in, + int fd_out, loff_t *off_out, + size_t len, + unsigned int flags) { +# ifdef __NR_copy_file_range + return syscall(__NR_copy_file_range, fd_in, off_in, fd_out, off_out, len, flags); +# else + errno = ENOSYS; + return -1; +# endif +} + +# define copy_file_range missing_copy_file_range +#endif + +/* ======================================================================= */ + +#if !HAVE_BPF +union bpf_attr; + +static inline int missing_bpf(int cmd, union bpf_attr *attr, size_t size) { +#ifdef __NR_bpf + return (int) syscall(__NR_bpf, cmd, attr, size); +#else + errno = ENOSYS; + return -1; +#endif +} + +# define bpf missing_bpf +#endif + +/* ======================================================================= */ + +#if !HAVE_STATX +struct statx; + +static inline ssize_t missing_statx(int dfd, const char *filename, unsigned flags, unsigned int mask, struct statx *buffer) { +# ifdef __NR_statx + return syscall(__NR_statx, dfd, filename, flags, mask, buffer); +# else + errno = ENOSYS; + return -1; +# endif +} +#endif + +/* This typedef is supposed to be always defined. */ +typedef struct statx struct_statx; + +#if !HAVE_STATX +# define statx(dfd, filename, flags, mask, buffer) missing_statx(dfd, filename, flags, mask, buffer) +#endif + +/* ======================================================================= */ + +#if !HAVE_SET_MEMPOLICY +enum { + MPOL_DEFAULT, + MPOL_PREFERRED, + MPOL_BIND, + MPOL_INTERLEAVE, + MPOL_LOCAL, +}; + +static inline long missing_set_mempolicy(int mode, const unsigned long *nodemask, + unsigned long maxnode) { + long i; +# if defined __NR_set_mempolicy && __NR_set_mempolicy >= 0 + i = syscall(__NR_set_mempolicy, mode, nodemask, maxnode); +# else + errno = ENOSYS; + i = -1; +# endif + return i; +} + +# define set_mempolicy missing_set_mempolicy +#endif + +#if !HAVE_GET_MEMPOLICY +static inline long missing_get_mempolicy(int *mode, unsigned long *nodemask, + unsigned long maxnode, void *addr, + unsigned long flags) { + long i; +# if defined __NR_get_mempolicy && __NR_get_mempolicy >= 0 + i = syscall(__NR_get_mempolicy, mode, nodemask, maxnode, addr, flags); +# else + errno = ENOSYS; + i = -1; +# endif + return i; +} + +# define get_mempolicy missing_get_mempolicy +#endif + +/* ======================================================================= */ + +#if !HAVE_PIDFD_SEND_SIGNAL +static inline int missing_pidfd_send_signal(int fd, int sig, siginfo_t *info, unsigned flags) { +# ifdef __NR_pidfd_send_signal + return syscall(__NR_pidfd_send_signal, fd, sig, info, flags); +# else + errno = ENOSYS; + return -1; +# endif +} + +# define pidfd_send_signal missing_pidfd_send_signal +#endif + +#if !HAVE_PIDFD_OPEN +static inline int missing_pidfd_open(pid_t pid, unsigned flags) { +# ifdef __NR_pidfd_open + return syscall(__NR_pidfd_open, pid, flags); +# else + errno = ENOSYS; + return -1; +# endif +} + +# define pidfd_open missing_pidfd_open +#endif + +/* ======================================================================= */ + +#if !HAVE_RT_SIGQUEUEINFO +static inline int missing_rt_sigqueueinfo(pid_t tgid, int sig, siginfo_t *info) { +# if defined __NR_rt_sigqueueinfo && __NR_rt_sigqueueinfo >= 0 + return syscall(__NR_rt_sigqueueinfo, tgid, sig, info); +# else +# error "__NR_rt_sigqueueinfo not defined" +# endif +} + +# define rt_sigqueueinfo missing_rt_sigqueueinfo +#endif + +/* ======================================================================= */ + +#if !HAVE_EXECVEAT +static inline int missing_execveat(int dirfd, const char *pathname, + char *const argv[], char *const envp[], + int flags) { +# if defined __NR_execveat && __NR_execveat >= 0 + return syscall(__NR_execveat, dirfd, pathname, argv, envp, flags); +# else + errno = ENOSYS; + return -1; +# endif +} + +# undef AT_EMPTY_PATH +# define AT_EMPTY_PATH 0x1000 +# define execveat missing_execveat +#endif + +/* ======================================================================= */ + +#if !HAVE_CLOSE_RANGE +static inline int missing_close_range(int first_fd, int end_fd, unsigned flags) { +# ifdef __NR_close_range + /* Kernel-side the syscall expects fds as unsigned integers (just like close() actually), while + * userspace exclusively uses signed integers for fds. We don't know just yet how glibc is going to + * wrap this syscall, but let's assume it's going to be similar to what they do for close(), + * i.e. make the same unsigned → signed type change from the raw kernel syscall compared to the + * userspace wrapper. There's only one caveat for this: unlike for close() there's the special + * UINT_MAX fd value for the 'end_fd' argument. Let's safely map that to -1 here. And let's refuse + * any other negative values. */ + if ((first_fd < 0) || (end_fd < 0 && end_fd != -1)) { + errno = -EBADF; + return -1; + } + + return syscall(__NR_close_range, + (unsigned) first_fd, + end_fd == -1 ? UINT_MAX : (unsigned) end_fd, /* Of course, the compiler should figure out that this is the identity mapping IRL */ + flags); +# else + errno = ENOSYS; + return -1; +# endif +} + +# define close_range missing_close_range +#endif + +/* ======================================================================= */ + +#if !HAVE_EPOLL_PWAIT2 + +/* Defined to be equivalent to the kernel's _NSIG_WORDS, i.e. the size of the array of longs that is + * encapsulated by sigset_t. */ +#define KERNEL_NSIG_WORDS (64 / (sizeof(long) * 8)) +#define KERNEL_NSIG_BYTES (KERNEL_NSIG_WORDS * sizeof(long)) + +struct epoll_event; + +static inline int missing_epoll_pwait2( + int fd, + struct epoll_event *events, + int maxevents, + const struct timespec *timeout, + const sigset_t *sigset) { + +# if defined(__NR_epoll_pwait2) && HAVE_LINUX_TIME_TYPES_H + if (timeout) { + /* Convert from userspace timespec to kernel timespec */ + struct __kernel_timespec ts = { + .tv_sec = timeout->tv_sec, + .tv_nsec = timeout->tv_nsec, + }; + + return syscall(__NR_epoll_pwait2, fd, events, maxevents, &ts, sigset, sigset ? KERNEL_NSIG_BYTES : 0); + } else + return syscall(__NR_epoll_pwait2, fd, events, maxevents, NULL, sigset, sigset ? KERNEL_NSIG_BYTES : 0); +# else + errno = ENOSYS; + return -1; +# endif +} + +# define epoll_pwait2 missing_epoll_pwait2 +#endif + +/* ======================================================================= */ + +#if !HAVE_MOUNT_SETATTR + +#if !HAVE_STRUCT_MOUNT_ATTR +struct mount_attr { + uint64_t attr_set; + uint64_t attr_clr; + uint64_t propagation; + uint64_t userns_fd; +}; +#else +struct mount_attr; +#endif + +#ifndef MOUNT_ATTR_RDONLY +#define MOUNT_ATTR_RDONLY 0x00000001 /* Mount read-only */ +#endif + +#ifndef MOUNT_ATTR_NOSUID +#define MOUNT_ATTR_NOSUID 0x00000002 /* Ignore suid and sgid bits */ +#endif + +#ifndef MOUNT_ATTR_NODEV +#define MOUNT_ATTR_NODEV 0x00000004 /* Disallow access to device special files */ +#endif + +#ifndef MOUNT_ATTR_NOEXEC +#define MOUNT_ATTR_NOEXEC 0x00000008 /* Disallow program execution */ +#endif + +#ifndef MOUNT_ATTR__ATIME +#define MOUNT_ATTR__ATIME 0x00000070 /* Setting on how atime should be updated */ +#endif + +#ifndef MOUNT_ATTR_RELATIME +#define MOUNT_ATTR_RELATIME 0x00000000 /* - Update atime relative to mtime/ctime. */ +#endif + +#ifndef MOUNT_ATTR_NOATIME +#define MOUNT_ATTR_NOATIME 0x00000010 /* - Do not update access times. */ +#endif + +#ifndef MOUNT_ATTR_STRICTATIME +#define MOUNT_ATTR_STRICTATIME 0x00000020 /* - Always perform atime updates */ +#endif + +#ifndef MOUNT_ATTR_NODIRATIME +#define MOUNT_ATTR_NODIRATIME 0x00000080 /* Do not update directory access times */ +#endif + +#ifndef MOUNT_ATTR_IDMAP +#define MOUNT_ATTR_IDMAP 0x00100000 /* Idmap mount to @userns_fd in struct mount_attr. */ +#endif + +#ifndef MOUNT_ATTR_NOSYMFOLLOW +#define MOUNT_ATTR_NOSYMFOLLOW 0x00200000 /* Do not follow symlinks */ +#endif + +#ifndef MOUNT_ATTR_SIZE_VER0 +#define MOUNT_ATTR_SIZE_VER0 32 /* sizeof first published struct */ +#endif + +#ifndef AT_RECURSIVE +#define AT_RECURSIVE 0x8000 +#endif + +static inline int missing_mount_setattr( + int dfd, + const char *path, + unsigned flags, + struct mount_attr *attr, + size_t size) { + +# if defined __NR_mount_setattr && __NR_mount_setattr >= 0 + return syscall(__NR_mount_setattr, dfd, path, flags, attr, size); +# else + errno = ENOSYS; + return -1; +# endif +} + +# define mount_setattr missing_mount_setattr +#endif + +/* ======================================================================= */ + +#if !HAVE_OPEN_TREE + +#ifndef OPEN_TREE_CLONE +#define OPEN_TREE_CLONE 1 +#endif + +#ifndef OPEN_TREE_CLOEXEC +#define OPEN_TREE_CLOEXEC O_CLOEXEC +#endif + +static inline int missing_open_tree( + int dfd, + const char *filename, + unsigned flags) { + +# if defined __NR_open_tree && __NR_open_tree >= 0 + return syscall(__NR_open_tree, dfd, filename, flags); +# else + errno = ENOSYS; + return -1; +# endif +} + +# define open_tree missing_open_tree +#endif + +/* ======================================================================= */ + +#if !HAVE_MOVE_MOUNT + +#ifndef MOVE_MOUNT_F_EMPTY_PATH +#define MOVE_MOUNT_F_EMPTY_PATH 0x00000004 /* Empty from path permitted */ +#endif + +#ifndef MOVE_MOUNT_T_EMPTY_PATH +#define MOVE_MOUNT_T_EMPTY_PATH 0x00000040 /* Empty to path permitted */ +#endif + +static inline int missing_move_mount( + int from_dfd, + const char *from_pathname, + int to_dfd, + const char *to_pathname, + unsigned flags) { + +# if defined __NR_move_mount && __NR_move_mount >= 0 + return syscall(__NR_move_mount, from_dfd, from_pathname, to_dfd, to_pathname, flags); +# else + errno = ENOSYS; + return -1; +# endif +} + +# define move_mount missing_move_mount +#endif + +/* ======================================================================= */ + +#if !HAVE_FSOPEN + +#ifndef FSOPEN_CLOEXEC +#define FSOPEN_CLOEXEC 0x00000001 +#endif + +static inline int missing_fsopen(const char *fsname, unsigned flags) { +# if defined __NR_fsopen && __NR_fsopen >= 0 + return syscall(__NR_fsopen, fsname, flags); +# else + errno = ENOSYS; + return -1; +# endif +} + +# define fsopen missing_fsopen +#endif + +/* ======================================================================= */ + +#if !HAVE_FSCONFIG + +#ifndef FSCONFIG_SET_STRING +#define FSCONFIG_SET_STRING 1 /* Set parameter, supplying a string value */ +#endif + +static inline int missing_fsconfig(int fd, unsigned cmd, const char *key, const void *value, int aux) { +# if defined __NR_fsconfig && __NR_fsconfig >= 0 + return syscall(__NR_fsconfig, fd, cmd, key, value, aux); +# else + errno = ENOSYS; + return -1; +# endif +} + +# define fsconfig missing_fsconfig +#endif + +/* ======================================================================= */ + +#if !HAVE_GETDENTS64 + +static inline ssize_t missing_getdents64(int fd, void *buffer, size_t length) { +# if defined __NR_getdents64 && __NR_getdents64 >= 0 + return syscall(__NR_getdents64, fd, buffer, length); +# else + errno = ENOSYS; + return -1; +# endif +} + +# define getdents64 missing_getdents64 +#endif diff --git a/src/basic/missing_syscall_def.h b/src/basic/missing_syscall_def.h new file mode 100644 index 0000000..67cae70 --- /dev/null +++ b/src/basic/missing_syscall_def.h @@ -0,0 +1,1199 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later + * This file is generated by src/basic/missing_syscalls.py. Do not edit! + * + * Use 'ninja -C build update-syscall-tables' to download new syscall tables, + * and 'ninja -C build update-syscall-header' to regenerate this file. + */ +#pragma once + +/* Note: if this code looks strange, this is because it is derived from the same + * template as the per-syscall blocks below. */ +# if defined(__aarch64__) +# elif defined(__alpha__) +# elif defined(__arc__) || defined(__tilegx__) +# elif defined(__arm__) +# elif defined(__i386__) +# elif defined(__ia64__) +# elif defined(__loongarch64) +# elif defined(__m68k__) +# elif defined(_MIPS_SIM) +# if _MIPS_SIM == _MIPS_SIM_ABI32 +# elif _MIPS_SIM == _MIPS_SIM_NABI32 +# elif _MIPS_SIM == _MIPS_SIM_ABI64 +# else +# error "Unknown MIPS ABI" +# endif +# elif defined(__hppa__) +# elif defined(__powerpc__) +# elif defined(__riscv) +# if __riscv_xlen == 32 +# elif __riscv_xlen == 64 +# else +# error "Unknown RISC-V ABI" +# endif +# elif defined(__s390__) +# elif defined(__sparc__) +# elif defined(__x86_64__) +# if defined(__ILP32__) +# else +# endif +# elif !defined(missing_arch_template) +# warning "Current architecture is missing from the template" +# define missing_arch_template 1 +# endif + +#ifndef __IGNORE_bpf +# if defined(__aarch64__) +# define systemd_NR_bpf 280 +# elif defined(__alpha__) +# define systemd_NR_bpf 515 +# elif defined(__arc__) || defined(__tilegx__) +# define systemd_NR_bpf 280 +# elif defined(__arm__) +# define systemd_NR_bpf 386 +# elif defined(__i386__) +# define systemd_NR_bpf 357 +# elif defined(__ia64__) +# define systemd_NR_bpf 1341 +# elif defined(__loongarch64) +# define systemd_NR_bpf 280 +# elif defined(__m68k__) +# define systemd_NR_bpf 354 +# elif defined(_MIPS_SIM) +# if _MIPS_SIM == _MIPS_SIM_ABI32 +# define systemd_NR_bpf 4355 +# elif _MIPS_SIM == _MIPS_SIM_NABI32 +# define systemd_NR_bpf 6319 +# elif _MIPS_SIM == _MIPS_SIM_ABI64 +# define systemd_NR_bpf 5315 +# else +# error "Unknown MIPS ABI" +# endif +# elif defined(__hppa__) +# define systemd_NR_bpf 341 +# elif defined(__powerpc__) +# define systemd_NR_bpf 361 +# elif defined(__riscv) +# if __riscv_xlen == 32 +# define systemd_NR_bpf 280 +# elif __riscv_xlen == 64 +# define systemd_NR_bpf 280 +# else +# error "Unknown RISC-V ABI" +# endif +# elif defined(__s390__) +# define systemd_NR_bpf 351 +# elif defined(__sparc__) +# define systemd_NR_bpf 349 +# elif defined(__x86_64__) +# if defined(__ILP32__) +# define systemd_NR_bpf (321 | /* __X32_SYSCALL_BIT */ 0x40000000) +# else +# define systemd_NR_bpf 321 +# endif +# elif !defined(missing_arch_template) +# warning "bpf() syscall number is unknown for your architecture" +# endif + +/* may be an (invalid) negative number due to libseccomp, see PR 13319 */ +# if defined __NR_bpf && __NR_bpf >= 0 +# if defined systemd_NR_bpf +assert_cc(__NR_bpf == systemd_NR_bpf); +# endif +# else +# if defined __NR_bpf +# undef __NR_bpf +# endif +# if defined systemd_NR_bpf && systemd_NR_bpf >= 0 +# define __NR_bpf systemd_NR_bpf +# endif +# endif +#endif + +#ifndef __IGNORE_close_range +# if defined(__aarch64__) +# define systemd_NR_close_range 436 +# elif defined(__alpha__) +# define systemd_NR_close_range 546 +# elif defined(__arc__) || defined(__tilegx__) +# define systemd_NR_close_range 436 +# elif defined(__arm__) +# define systemd_NR_close_range 436 +# elif defined(__i386__) +# define systemd_NR_close_range 436 +# elif defined(__ia64__) +# define systemd_NR_close_range 1460 +# elif defined(__loongarch64) +# define systemd_NR_close_range 436 +# elif defined(__m68k__) +# define systemd_NR_close_range 436 +# elif defined(_MIPS_SIM) +# if _MIPS_SIM == _MIPS_SIM_ABI32 +# define systemd_NR_close_range 4436 +# elif _MIPS_SIM == _MIPS_SIM_NABI32 +# define systemd_NR_close_range 6436 +# elif _MIPS_SIM == _MIPS_SIM_ABI64 +# define systemd_NR_close_range 5436 +# else +# error "Unknown MIPS ABI" +# endif +# elif defined(__hppa__) +# define systemd_NR_close_range 436 +# elif defined(__powerpc__) +# define systemd_NR_close_range 436 +# elif defined(__riscv) +# if __riscv_xlen == 32 +# define systemd_NR_close_range 436 +# elif __riscv_xlen == 64 +# define systemd_NR_close_range 436 +# else +# error "Unknown RISC-V ABI" +# endif +# elif defined(__s390__) +# define systemd_NR_close_range 436 +# elif defined(__sparc__) +# define systemd_NR_close_range 436 +# elif defined(__x86_64__) +# if defined(__ILP32__) +# define systemd_NR_close_range (436 | /* __X32_SYSCALL_BIT */ 0x40000000) +# else +# define systemd_NR_close_range 436 +# endif +# elif !defined(missing_arch_template) +# warning "close_range() syscall number is unknown for your architecture" +# endif + +/* may be an (invalid) negative number due to libseccomp, see PR 13319 */ +# if defined __NR_close_range && __NR_close_range >= 0 +# if defined systemd_NR_close_range +assert_cc(__NR_close_range == systemd_NR_close_range); +# endif +# else +# if defined __NR_close_range +# undef __NR_close_range +# endif +# if defined systemd_NR_close_range && systemd_NR_close_range >= 0 +# define __NR_close_range systemd_NR_close_range +# endif +# endif +#endif + +#ifndef __IGNORE_copy_file_range +# if defined(__aarch64__) +# define systemd_NR_copy_file_range 285 +# elif defined(__alpha__) +# define systemd_NR_copy_file_range 519 +# elif defined(__arc__) || defined(__tilegx__) +# define systemd_NR_copy_file_range 285 +# elif defined(__arm__) +# define systemd_NR_copy_file_range 391 +# elif defined(__i386__) +# define systemd_NR_copy_file_range 377 +# elif defined(__ia64__) +# define systemd_NR_copy_file_range 1347 +# elif defined(__loongarch64) +# define systemd_NR_copy_file_range 285 +# elif defined(__m68k__) +# define systemd_NR_copy_file_range 376 +# elif defined(_MIPS_SIM) +# if _MIPS_SIM == _MIPS_SIM_ABI32 +# define systemd_NR_copy_file_range 4360 +# elif _MIPS_SIM == _MIPS_SIM_NABI32 +# define systemd_NR_copy_file_range 6324 +# elif _MIPS_SIM == _MIPS_SIM_ABI64 +# define systemd_NR_copy_file_range 5320 +# else +# error "Unknown MIPS ABI" +# endif +# elif defined(__hppa__) +# define systemd_NR_copy_file_range 346 +# elif defined(__powerpc__) +# define systemd_NR_copy_file_range 379 +# elif defined(__riscv) +# if __riscv_xlen == 32 +# define systemd_NR_copy_file_range 285 +# elif __riscv_xlen == 64 +# define systemd_NR_copy_file_range 285 +# else +# error "Unknown RISC-V ABI" +# endif +# elif defined(__s390__) +# define systemd_NR_copy_file_range 375 +# elif defined(__sparc__) +# define systemd_NR_copy_file_range 357 +# elif defined(__x86_64__) +# if defined(__ILP32__) +# define systemd_NR_copy_file_range (326 | /* __X32_SYSCALL_BIT */ 0x40000000) +# else +# define systemd_NR_copy_file_range 326 +# endif +# elif !defined(missing_arch_template) +# warning "copy_file_range() syscall number is unknown for your architecture" +# endif + +/* may be an (invalid) negative number due to libseccomp, see PR 13319 */ +# if defined __NR_copy_file_range && __NR_copy_file_range >= 0 +# if defined systemd_NR_copy_file_range +assert_cc(__NR_copy_file_range == systemd_NR_copy_file_range); +# endif +# else +# if defined __NR_copy_file_range +# undef __NR_copy_file_range +# endif +# if defined systemd_NR_copy_file_range && systemd_NR_copy_file_range >= 0 +# define __NR_copy_file_range systemd_NR_copy_file_range +# endif +# endif +#endif + +#ifndef __IGNORE_epoll_pwait2 +# if defined(__aarch64__) +# define systemd_NR_epoll_pwait2 441 +# elif defined(__alpha__) +# define systemd_NR_epoll_pwait2 551 +# elif defined(__arc__) || defined(__tilegx__) +# define systemd_NR_epoll_pwait2 441 +# elif defined(__arm__) +# define systemd_NR_epoll_pwait2 441 +# elif defined(__i386__) +# define systemd_NR_epoll_pwait2 441 +# elif defined(__ia64__) +# define systemd_NR_epoll_pwait2 1465 +# elif defined(__loongarch64) +# define systemd_NR_epoll_pwait2 441 +# elif defined(__m68k__) +# define systemd_NR_epoll_pwait2 441 +# elif defined(_MIPS_SIM) +# if _MIPS_SIM == _MIPS_SIM_ABI32 +# define systemd_NR_epoll_pwait2 4441 +# elif _MIPS_SIM == _MIPS_SIM_NABI32 +# define systemd_NR_epoll_pwait2 6441 +# elif _MIPS_SIM == _MIPS_SIM_ABI64 +# define systemd_NR_epoll_pwait2 5441 +# else +# error "Unknown MIPS ABI" +# endif +# elif defined(__hppa__) +# define systemd_NR_epoll_pwait2 441 +# elif defined(__powerpc__) +# define systemd_NR_epoll_pwait2 441 +# elif defined(__riscv) +# if __riscv_xlen == 32 +# define systemd_NR_epoll_pwait2 441 +# elif __riscv_xlen == 64 +# define systemd_NR_epoll_pwait2 441 +# else +# error "Unknown RISC-V ABI" +# endif +# elif defined(__s390__) +# define systemd_NR_epoll_pwait2 441 +# elif defined(__sparc__) +# define systemd_NR_epoll_pwait2 441 +# elif defined(__x86_64__) +# if defined(__ILP32__) +# define systemd_NR_epoll_pwait2 (441 | /* __X32_SYSCALL_BIT */ 0x40000000) +# else +# define systemd_NR_epoll_pwait2 441 +# endif +# elif !defined(missing_arch_template) +# warning "epoll_pwait2() syscall number is unknown for your architecture" +# endif + +/* may be an (invalid) negative number due to libseccomp, see PR 13319 */ +# if defined __NR_epoll_pwait2 && __NR_epoll_pwait2 >= 0 +# if defined systemd_NR_epoll_pwait2 +assert_cc(__NR_epoll_pwait2 == systemd_NR_epoll_pwait2); +# endif +# else +# if defined __NR_epoll_pwait2 +# undef __NR_epoll_pwait2 +# endif +# if defined systemd_NR_epoll_pwait2 && systemd_NR_epoll_pwait2 >= 0 +# define __NR_epoll_pwait2 systemd_NR_epoll_pwait2 +# endif +# endif +#endif + +#ifndef __IGNORE_getrandom +# if defined(__aarch64__) +# define systemd_NR_getrandom 278 +# elif defined(__alpha__) +# define systemd_NR_getrandom 511 +# elif defined(__arc__) || defined(__tilegx__) +# define systemd_NR_getrandom 278 +# elif defined(__arm__) +# define systemd_NR_getrandom 384 +# elif defined(__i386__) +# define systemd_NR_getrandom 355 +# elif defined(__ia64__) +# define systemd_NR_getrandom 1339 +# elif defined(__loongarch64) +# define systemd_NR_getrandom 278 +# elif defined(__m68k__) +# define systemd_NR_getrandom 352 +# elif defined(_MIPS_SIM) +# if _MIPS_SIM == _MIPS_SIM_ABI32 +# define systemd_NR_getrandom 4353 +# elif _MIPS_SIM == _MIPS_SIM_NABI32 +# define systemd_NR_getrandom 6317 +# elif _MIPS_SIM == _MIPS_SIM_ABI64 +# define systemd_NR_getrandom 5313 +# else +# error "Unknown MIPS ABI" +# endif +# elif defined(__hppa__) +# define systemd_NR_getrandom 339 +# elif defined(__powerpc__) +# define systemd_NR_getrandom 359 +# elif defined(__riscv) +# if __riscv_xlen == 32 +# define systemd_NR_getrandom 278 +# elif __riscv_xlen == 64 +# define systemd_NR_getrandom 278 +# else +# error "Unknown RISC-V ABI" +# endif +# elif defined(__s390__) +# define systemd_NR_getrandom 349 +# elif defined(__sparc__) +# define systemd_NR_getrandom 347 +# elif defined(__x86_64__) +# if defined(__ILP32__) +# define systemd_NR_getrandom (318 | /* __X32_SYSCALL_BIT */ 0x40000000) +# else +# define systemd_NR_getrandom 318 +# endif +# elif !defined(missing_arch_template) +# warning "getrandom() syscall number is unknown for your architecture" +# endif + +/* may be an (invalid) negative number due to libseccomp, see PR 13319 */ +# if defined __NR_getrandom && __NR_getrandom >= 0 +# if defined systemd_NR_getrandom +assert_cc(__NR_getrandom == systemd_NR_getrandom); +# endif +# else +# if defined __NR_getrandom +# undef __NR_getrandom +# endif +# if defined systemd_NR_getrandom && systemd_NR_getrandom >= 0 +# define __NR_getrandom systemd_NR_getrandom +# endif +# endif +#endif + +#ifndef __IGNORE_memfd_create +# if defined(__aarch64__) +# define systemd_NR_memfd_create 279 +# elif defined(__alpha__) +# define systemd_NR_memfd_create 512 +# elif defined(__arc__) || defined(__tilegx__) +# define systemd_NR_memfd_create 279 +# elif defined(__arm__) +# define systemd_NR_memfd_create 385 +# elif defined(__i386__) +# define systemd_NR_memfd_create 356 +# elif defined(__ia64__) +# define systemd_NR_memfd_create 1340 +# elif defined(__loongarch64) +# define systemd_NR_memfd_create 279 +# elif defined(__m68k__) +# define systemd_NR_memfd_create 353 +# elif defined(_MIPS_SIM) +# if _MIPS_SIM == _MIPS_SIM_ABI32 +# define systemd_NR_memfd_create 4354 +# elif _MIPS_SIM == _MIPS_SIM_NABI32 +# define systemd_NR_memfd_create 6318 +# elif _MIPS_SIM == _MIPS_SIM_ABI64 +# define systemd_NR_memfd_create 5314 +# else +# error "Unknown MIPS ABI" +# endif +# elif defined(__hppa__) +# define systemd_NR_memfd_create 340 +# elif defined(__powerpc__) +# define systemd_NR_memfd_create 360 +# elif defined(__riscv) +# if __riscv_xlen == 32 +# define systemd_NR_memfd_create 279 +# elif __riscv_xlen == 64 +# define systemd_NR_memfd_create 279 +# else +# error "Unknown RISC-V ABI" +# endif +# elif defined(__s390__) +# define systemd_NR_memfd_create 350 +# elif defined(__sparc__) +# define systemd_NR_memfd_create 348 +# elif defined(__x86_64__) +# if defined(__ILP32__) +# define systemd_NR_memfd_create (319 | /* __X32_SYSCALL_BIT */ 0x40000000) +# else +# define systemd_NR_memfd_create 319 +# endif +# elif !defined(missing_arch_template) +# warning "memfd_create() syscall number is unknown for your architecture" +# endif + +/* may be an (invalid) negative number due to libseccomp, see PR 13319 */ +# if defined __NR_memfd_create && __NR_memfd_create >= 0 +# if defined systemd_NR_memfd_create +assert_cc(__NR_memfd_create == systemd_NR_memfd_create); +# endif +# else +# if defined __NR_memfd_create +# undef __NR_memfd_create +# endif +# if defined systemd_NR_memfd_create && systemd_NR_memfd_create >= 0 +# define __NR_memfd_create systemd_NR_memfd_create +# endif +# endif +#endif + +#ifndef __IGNORE_mount_setattr +# if defined(__aarch64__) +# define systemd_NR_mount_setattr 442 +# elif defined(__alpha__) +# define systemd_NR_mount_setattr 552 +# elif defined(__arc__) || defined(__tilegx__) +# define systemd_NR_mount_setattr 442 +# elif defined(__arm__) +# define systemd_NR_mount_setattr 442 +# elif defined(__i386__) +# define systemd_NR_mount_setattr 442 +# elif defined(__ia64__) +# define systemd_NR_mount_setattr 1466 +# elif defined(__loongarch64) +# define systemd_NR_mount_setattr 442 +# elif defined(__m68k__) +# define systemd_NR_mount_setattr 442 +# elif defined(_MIPS_SIM) +# if _MIPS_SIM == _MIPS_SIM_ABI32 +# define systemd_NR_mount_setattr 4442 +# elif _MIPS_SIM == _MIPS_SIM_NABI32 +# define systemd_NR_mount_setattr 6442 +# elif _MIPS_SIM == _MIPS_SIM_ABI64 +# define systemd_NR_mount_setattr 5442 +# else +# error "Unknown MIPS ABI" +# endif +# elif defined(__hppa__) +# define systemd_NR_mount_setattr 442 +# elif defined(__powerpc__) +# define systemd_NR_mount_setattr 442 +# elif defined(__riscv) +# if __riscv_xlen == 32 +# define systemd_NR_mount_setattr 442 +# elif __riscv_xlen == 64 +# define systemd_NR_mount_setattr 442 +# else +# error "Unknown RISC-V ABI" +# endif +# elif defined(__s390__) +# define systemd_NR_mount_setattr 442 +# elif defined(__sparc__) +# define systemd_NR_mount_setattr 442 +# elif defined(__x86_64__) +# if defined(__ILP32__) +# define systemd_NR_mount_setattr (442 | /* __X32_SYSCALL_BIT */ 0x40000000) +# else +# define systemd_NR_mount_setattr 442 +# endif +# elif !defined(missing_arch_template) +# warning "mount_setattr() syscall number is unknown for your architecture" +# endif + +/* may be an (invalid) negative number due to libseccomp, see PR 13319 */ +# if defined __NR_mount_setattr && __NR_mount_setattr >= 0 +# if defined systemd_NR_mount_setattr +assert_cc(__NR_mount_setattr == systemd_NR_mount_setattr); +# endif +# else +# if defined __NR_mount_setattr +# undef __NR_mount_setattr +# endif +# if defined systemd_NR_mount_setattr && systemd_NR_mount_setattr >= 0 +# define __NR_mount_setattr systemd_NR_mount_setattr +# endif +# endif +#endif + +#ifndef __IGNORE_move_mount +# if defined(__aarch64__) +# define systemd_NR_move_mount 429 +# elif defined(__alpha__) +# define systemd_NR_move_mount 539 +# elif defined(__arc__) || defined(__tilegx__) +# define systemd_NR_move_mount 429 +# elif defined(__arm__) +# define systemd_NR_move_mount 429 +# elif defined(__i386__) +# define systemd_NR_move_mount 429 +# elif defined(__ia64__) +# define systemd_NR_move_mount 1453 +# elif defined(__loongarch64) +# define systemd_NR_move_mount 429 +# elif defined(__m68k__) +# define systemd_NR_move_mount 429 +# elif defined(_MIPS_SIM) +# if _MIPS_SIM == _MIPS_SIM_ABI32 +# define systemd_NR_move_mount 4429 +# elif _MIPS_SIM == _MIPS_SIM_NABI32 +# define systemd_NR_move_mount 6429 +# elif _MIPS_SIM == _MIPS_SIM_ABI64 +# define systemd_NR_move_mount 5429 +# else +# error "Unknown MIPS ABI" +# endif +# elif defined(__hppa__) +# define systemd_NR_move_mount 429 +# elif defined(__powerpc__) +# define systemd_NR_move_mount 429 +# elif defined(__riscv) +# if __riscv_xlen == 32 +# define systemd_NR_move_mount 429 +# elif __riscv_xlen == 64 +# define systemd_NR_move_mount 429 +# else +# error "Unknown RISC-V ABI" +# endif +# elif defined(__s390__) +# define systemd_NR_move_mount 429 +# elif defined(__sparc__) +# define systemd_NR_move_mount 429 +# elif defined(__x86_64__) +# if defined(__ILP32__) +# define systemd_NR_move_mount (429 | /* __X32_SYSCALL_BIT */ 0x40000000) +# else +# define systemd_NR_move_mount 429 +# endif +# elif !defined(missing_arch_template) +# warning "move_mount() syscall number is unknown for your architecture" +# endif + +/* may be an (invalid) negative number due to libseccomp, see PR 13319 */ +# if defined __NR_move_mount && __NR_move_mount >= 0 +# if defined systemd_NR_move_mount +assert_cc(__NR_move_mount == systemd_NR_move_mount); +# endif +# else +# if defined __NR_move_mount +# undef __NR_move_mount +# endif +# if defined systemd_NR_move_mount && systemd_NR_move_mount >= 0 +# define __NR_move_mount systemd_NR_move_mount +# endif +# endif +#endif + +#ifndef __IGNORE_name_to_handle_at +# if defined(__aarch64__) +# define systemd_NR_name_to_handle_at 264 +# elif defined(__alpha__) +# define systemd_NR_name_to_handle_at 497 +# elif defined(__arc__) || defined(__tilegx__) +# define systemd_NR_name_to_handle_at 264 +# elif defined(__arm__) +# define systemd_NR_name_to_handle_at 370 +# elif defined(__i386__) +# define systemd_NR_name_to_handle_at 341 +# elif defined(__ia64__) +# define systemd_NR_name_to_handle_at 1326 +# elif defined(__loongarch64) +# define systemd_NR_name_to_handle_at 264 +# elif defined(__m68k__) +# define systemd_NR_name_to_handle_at 340 +# elif defined(_MIPS_SIM) +# if _MIPS_SIM == _MIPS_SIM_ABI32 +# define systemd_NR_name_to_handle_at 4339 +# elif _MIPS_SIM == _MIPS_SIM_NABI32 +# define systemd_NR_name_to_handle_at 6303 +# elif _MIPS_SIM == _MIPS_SIM_ABI64 +# define systemd_NR_name_to_handle_at 5298 +# else +# error "Unknown MIPS ABI" +# endif +# elif defined(__hppa__) +# define systemd_NR_name_to_handle_at 325 +# elif defined(__powerpc__) +# define systemd_NR_name_to_handle_at 345 +# elif defined(__riscv) +# if __riscv_xlen == 32 +# define systemd_NR_name_to_handle_at 264 +# elif __riscv_xlen == 64 +# define systemd_NR_name_to_handle_at 264 +# else +# error "Unknown RISC-V ABI" +# endif +# elif defined(__s390__) +# define systemd_NR_name_to_handle_at 335 +# elif defined(__sparc__) +# define systemd_NR_name_to_handle_at 332 +# elif defined(__x86_64__) +# if defined(__ILP32__) +# define systemd_NR_name_to_handle_at (303 | /* __X32_SYSCALL_BIT */ 0x40000000) +# else +# define systemd_NR_name_to_handle_at 303 +# endif +# elif !defined(missing_arch_template) +# warning "name_to_handle_at() syscall number is unknown for your architecture" +# endif + +/* may be an (invalid) negative number due to libseccomp, see PR 13319 */ +# if defined __NR_name_to_handle_at && __NR_name_to_handle_at >= 0 +# if defined systemd_NR_name_to_handle_at +assert_cc(__NR_name_to_handle_at == systemd_NR_name_to_handle_at); +# endif +# else +# if defined __NR_name_to_handle_at +# undef __NR_name_to_handle_at +# endif +# if defined systemd_NR_name_to_handle_at && systemd_NR_name_to_handle_at >= 0 +# define __NR_name_to_handle_at systemd_NR_name_to_handle_at +# endif +# endif +#endif + +#ifndef __IGNORE_open_tree +# if defined(__aarch64__) +# define systemd_NR_open_tree 428 +# elif defined(__alpha__) +# define systemd_NR_open_tree 538 +# elif defined(__arc__) || defined(__tilegx__) +# define systemd_NR_open_tree 428 +# elif defined(__arm__) +# define systemd_NR_open_tree 428 +# elif defined(__i386__) +# define systemd_NR_open_tree 428 +# elif defined(__ia64__) +# define systemd_NR_open_tree 1452 +# elif defined(__loongarch64) +# define systemd_NR_open_tree 428 +# elif defined(__m68k__) +# define systemd_NR_open_tree 428 +# elif defined(_MIPS_SIM) +# if _MIPS_SIM == _MIPS_SIM_ABI32 +# define systemd_NR_open_tree 4428 +# elif _MIPS_SIM == _MIPS_SIM_NABI32 +# define systemd_NR_open_tree 6428 +# elif _MIPS_SIM == _MIPS_SIM_ABI64 +# define systemd_NR_open_tree 5428 +# else +# error "Unknown MIPS ABI" +# endif +# elif defined(__hppa__) +# define systemd_NR_open_tree 428 +# elif defined(__powerpc__) +# define systemd_NR_open_tree 428 +# elif defined(__riscv) +# if __riscv_xlen == 32 +# define systemd_NR_open_tree 428 +# elif __riscv_xlen == 64 +# define systemd_NR_open_tree 428 +# else +# error "Unknown RISC-V ABI" +# endif +# elif defined(__s390__) +# define systemd_NR_open_tree 428 +# elif defined(__sparc__) +# define systemd_NR_open_tree 428 +# elif defined(__x86_64__) +# if defined(__ILP32__) +# define systemd_NR_open_tree (428 | /* __X32_SYSCALL_BIT */ 0x40000000) +# else +# define systemd_NR_open_tree 428 +# endif +# elif !defined(missing_arch_template) +# warning "open_tree() syscall number is unknown for your architecture" +# endif + +/* may be an (invalid) negative number due to libseccomp, see PR 13319 */ +# if defined __NR_open_tree && __NR_open_tree >= 0 +# if defined systemd_NR_open_tree +assert_cc(__NR_open_tree == systemd_NR_open_tree); +# endif +# else +# if defined __NR_open_tree +# undef __NR_open_tree +# endif +# if defined systemd_NR_open_tree && systemd_NR_open_tree >= 0 +# define __NR_open_tree systemd_NR_open_tree +# endif +# endif +#endif + +#ifndef __IGNORE_openat2 +# if defined(__aarch64__) +# define systemd_NR_openat2 437 +# elif defined(__alpha__) +# define systemd_NR_openat2 547 +# elif defined(__arc__) || defined(__tilegx__) +# define systemd_NR_openat2 437 +# elif defined(__arm__) +# define systemd_NR_openat2 437 +# elif defined(__i386__) +# define systemd_NR_openat2 437 +# elif defined(__ia64__) +# define systemd_NR_openat2 1461 +# elif defined(__loongarch64) +# define systemd_NR_openat2 437 +# elif defined(__m68k__) +# define systemd_NR_openat2 437 +# elif defined(_MIPS_SIM) +# if _MIPS_SIM == _MIPS_SIM_ABI32 +# define systemd_NR_openat2 4437 +# elif _MIPS_SIM == _MIPS_SIM_NABI32 +# define systemd_NR_openat2 6437 +# elif _MIPS_SIM == _MIPS_SIM_ABI64 +# define systemd_NR_openat2 5437 +# else +# error "Unknown MIPS ABI" +# endif +# elif defined(__hppa__) +# define systemd_NR_openat2 437 +# elif defined(__powerpc__) +# define systemd_NR_openat2 437 +# elif defined(__riscv) +# if __riscv_xlen == 32 +# define systemd_NR_openat2 437 +# elif __riscv_xlen == 64 +# define systemd_NR_openat2 437 +# else +# error "Unknown RISC-V ABI" +# endif +# elif defined(__s390__) +# define systemd_NR_openat2 437 +# elif defined(__sparc__) +# define systemd_NR_openat2 437 +# elif defined(__x86_64__) +# if defined(__ILP32__) +# define systemd_NR_openat2 (437 | /* __X32_SYSCALL_BIT */ 0x40000000) +# else +# define systemd_NR_openat2 437 +# endif +# elif !defined(missing_arch_template) +# warning "openat2() syscall number is unknown for your architecture" +# endif + +/* may be an (invalid) negative number due to libseccomp, see PR 13319 */ +# if defined __NR_openat2 && __NR_openat2 >= 0 +# if defined systemd_NR_openat2 +assert_cc(__NR_openat2 == systemd_NR_openat2); +# endif +# else +# if defined __NR_openat2 +# undef __NR_openat2 +# endif +# if defined systemd_NR_openat2 && systemd_NR_openat2 >= 0 +# define __NR_openat2 systemd_NR_openat2 +# endif +# endif +#endif + +#ifndef __IGNORE_pidfd_open +# if defined(__aarch64__) +# define systemd_NR_pidfd_open 434 +# elif defined(__alpha__) +# define systemd_NR_pidfd_open 544 +# elif defined(__arc__) || defined(__tilegx__) +# define systemd_NR_pidfd_open 434 +# elif defined(__arm__) +# define systemd_NR_pidfd_open 434 +# elif defined(__i386__) +# define systemd_NR_pidfd_open 434 +# elif defined(__ia64__) +# define systemd_NR_pidfd_open 1458 +# elif defined(__loongarch64) +# define systemd_NR_pidfd_open 434 +# elif defined(__m68k__) +# define systemd_NR_pidfd_open 434 +# elif defined(_MIPS_SIM) +# if _MIPS_SIM == _MIPS_SIM_ABI32 +# define systemd_NR_pidfd_open 4434 +# elif _MIPS_SIM == _MIPS_SIM_NABI32 +# define systemd_NR_pidfd_open 6434 +# elif _MIPS_SIM == _MIPS_SIM_ABI64 +# define systemd_NR_pidfd_open 5434 +# else +# error "Unknown MIPS ABI" +# endif +# elif defined(__hppa__) +# define systemd_NR_pidfd_open 434 +# elif defined(__powerpc__) +# define systemd_NR_pidfd_open 434 +# elif defined(__riscv) +# if __riscv_xlen == 32 +# define systemd_NR_pidfd_open 434 +# elif __riscv_xlen == 64 +# define systemd_NR_pidfd_open 434 +# else +# error "Unknown RISC-V ABI" +# endif +# elif defined(__s390__) +# define systemd_NR_pidfd_open 434 +# elif defined(__sparc__) +# define systemd_NR_pidfd_open 434 +# elif defined(__x86_64__) +# if defined(__ILP32__) +# define systemd_NR_pidfd_open (434 | /* __X32_SYSCALL_BIT */ 0x40000000) +# else +# define systemd_NR_pidfd_open 434 +# endif +# elif !defined(missing_arch_template) +# warning "pidfd_open() syscall number is unknown for your architecture" +# endif + +/* may be an (invalid) negative number due to libseccomp, see PR 13319 */ +# if defined __NR_pidfd_open && __NR_pidfd_open >= 0 +# if defined systemd_NR_pidfd_open +assert_cc(__NR_pidfd_open == systemd_NR_pidfd_open); +# endif +# else +# if defined __NR_pidfd_open +# undef __NR_pidfd_open +# endif +# if defined systemd_NR_pidfd_open && systemd_NR_pidfd_open >= 0 +# define __NR_pidfd_open systemd_NR_pidfd_open +# endif +# endif +#endif + +#ifndef __IGNORE_pidfd_send_signal +# if defined(__aarch64__) +# define systemd_NR_pidfd_send_signal 424 +# elif defined(__alpha__) +# define systemd_NR_pidfd_send_signal 534 +# elif defined(__arc__) || defined(__tilegx__) +# define systemd_NR_pidfd_send_signal 424 +# elif defined(__arm__) +# define systemd_NR_pidfd_send_signal 424 +# elif defined(__i386__) +# define systemd_NR_pidfd_send_signal 424 +# elif defined(__ia64__) +# define systemd_NR_pidfd_send_signal 1448 +# elif defined(__loongarch64) +# define systemd_NR_pidfd_send_signal 424 +# elif defined(__m68k__) +# define systemd_NR_pidfd_send_signal 424 +# elif defined(_MIPS_SIM) +# if _MIPS_SIM == _MIPS_SIM_ABI32 +# define systemd_NR_pidfd_send_signal 4424 +# elif _MIPS_SIM == _MIPS_SIM_NABI32 +# define systemd_NR_pidfd_send_signal 6424 +# elif _MIPS_SIM == _MIPS_SIM_ABI64 +# define systemd_NR_pidfd_send_signal 5424 +# else +# error "Unknown MIPS ABI" +# endif +# elif defined(__hppa__) +# define systemd_NR_pidfd_send_signal 424 +# elif defined(__powerpc__) +# define systemd_NR_pidfd_send_signal 424 +# elif defined(__riscv) +# if __riscv_xlen == 32 +# define systemd_NR_pidfd_send_signal 424 +# elif __riscv_xlen == 64 +# define systemd_NR_pidfd_send_signal 424 +# else +# error "Unknown RISC-V ABI" +# endif +# elif defined(__s390__) +# define systemd_NR_pidfd_send_signal 424 +# elif defined(__sparc__) +# define systemd_NR_pidfd_send_signal 424 +# elif defined(__x86_64__) +# if defined(__ILP32__) +# define systemd_NR_pidfd_send_signal (424 | /* __X32_SYSCALL_BIT */ 0x40000000) +# else +# define systemd_NR_pidfd_send_signal 424 +# endif +# elif !defined(missing_arch_template) +# warning "pidfd_send_signal() syscall number is unknown for your architecture" +# endif + +/* may be an (invalid) negative number due to libseccomp, see PR 13319 */ +# if defined __NR_pidfd_send_signal && __NR_pidfd_send_signal >= 0 +# if defined systemd_NR_pidfd_send_signal +assert_cc(__NR_pidfd_send_signal == systemd_NR_pidfd_send_signal); +# endif +# else +# if defined __NR_pidfd_send_signal +# undef __NR_pidfd_send_signal +# endif +# if defined systemd_NR_pidfd_send_signal && systemd_NR_pidfd_send_signal >= 0 +# define __NR_pidfd_send_signal systemd_NR_pidfd_send_signal +# endif +# endif +#endif + +#ifndef __IGNORE_pkey_mprotect +# if defined(__aarch64__) +# define systemd_NR_pkey_mprotect 288 +# elif defined(__alpha__) +# define systemd_NR_pkey_mprotect 524 +# elif defined(__arc__) || defined(__tilegx__) +# define systemd_NR_pkey_mprotect 288 +# elif defined(__arm__) +# define systemd_NR_pkey_mprotect 394 +# elif defined(__i386__) +# define systemd_NR_pkey_mprotect 380 +# elif defined(__ia64__) +# define systemd_NR_pkey_mprotect 1354 +# elif defined(__loongarch64) +# define systemd_NR_pkey_mprotect 288 +# elif defined(__m68k__) +# define systemd_NR_pkey_mprotect 381 +# elif defined(_MIPS_SIM) +# if _MIPS_SIM == _MIPS_SIM_ABI32 +# define systemd_NR_pkey_mprotect 4363 +# elif _MIPS_SIM == _MIPS_SIM_NABI32 +# define systemd_NR_pkey_mprotect 6327 +# elif _MIPS_SIM == _MIPS_SIM_ABI64 +# define systemd_NR_pkey_mprotect 5323 +# else +# error "Unknown MIPS ABI" +# endif +# elif defined(__hppa__) +# define systemd_NR_pkey_mprotect 351 +# elif defined(__powerpc__) +# define systemd_NR_pkey_mprotect 386 +# elif defined(__riscv) +# if __riscv_xlen == 32 +# define systemd_NR_pkey_mprotect 288 +# elif __riscv_xlen == 64 +# define systemd_NR_pkey_mprotect 288 +# else +# error "Unknown RISC-V ABI" +# endif +# elif defined(__s390__) +# define systemd_NR_pkey_mprotect 384 +# elif defined(__sparc__) +# define systemd_NR_pkey_mprotect 362 +# elif defined(__x86_64__) +# if defined(__ILP32__) +# define systemd_NR_pkey_mprotect (329 | /* __X32_SYSCALL_BIT */ 0x40000000) +# else +# define systemd_NR_pkey_mprotect 329 +# endif +# elif !defined(missing_arch_template) +# warning "pkey_mprotect() syscall number is unknown for your architecture" +# endif + +/* may be an (invalid) negative number due to libseccomp, see PR 13319 */ +# if defined __NR_pkey_mprotect && __NR_pkey_mprotect >= 0 +# if defined systemd_NR_pkey_mprotect +assert_cc(__NR_pkey_mprotect == systemd_NR_pkey_mprotect); +# endif +# else +# if defined __NR_pkey_mprotect +# undef __NR_pkey_mprotect +# endif +# if defined systemd_NR_pkey_mprotect && systemd_NR_pkey_mprotect >= 0 +# define __NR_pkey_mprotect systemd_NR_pkey_mprotect +# endif +# endif +#endif + +#ifndef __IGNORE_renameat2 +# if defined(__aarch64__) +# define systemd_NR_renameat2 276 +# elif defined(__alpha__) +# define systemd_NR_renameat2 510 +# elif defined(__arc__) || defined(__tilegx__) +# define systemd_NR_renameat2 276 +# elif defined(__arm__) +# define systemd_NR_renameat2 382 +# elif defined(__i386__) +# define systemd_NR_renameat2 353 +# elif defined(__ia64__) +# define systemd_NR_renameat2 1338 +# elif defined(__loongarch64) +# define systemd_NR_renameat2 276 +# elif defined(__m68k__) +# define systemd_NR_renameat2 351 +# elif defined(_MIPS_SIM) +# if _MIPS_SIM == _MIPS_SIM_ABI32 +# define systemd_NR_renameat2 4351 +# elif _MIPS_SIM == _MIPS_SIM_NABI32 +# define systemd_NR_renameat2 6315 +# elif _MIPS_SIM == _MIPS_SIM_ABI64 +# define systemd_NR_renameat2 5311 +# else +# error "Unknown MIPS ABI" +# endif +# elif defined(__hppa__) +# define systemd_NR_renameat2 337 +# elif defined(__powerpc__) +# define systemd_NR_renameat2 357 +# elif defined(__riscv) +# if __riscv_xlen == 32 +# define systemd_NR_renameat2 276 +# elif __riscv_xlen == 64 +# define systemd_NR_renameat2 276 +# else +# error "Unknown RISC-V ABI" +# endif +# elif defined(__s390__) +# define systemd_NR_renameat2 347 +# elif defined(__sparc__) +# define systemd_NR_renameat2 345 +# elif defined(__x86_64__) +# if defined(__ILP32__) +# define systemd_NR_renameat2 (316 | /* __X32_SYSCALL_BIT */ 0x40000000) +# else +# define systemd_NR_renameat2 316 +# endif +# elif !defined(missing_arch_template) +# warning "renameat2() syscall number is unknown for your architecture" +# endif + +/* may be an (invalid) negative number due to libseccomp, see PR 13319 */ +# if defined __NR_renameat2 && __NR_renameat2 >= 0 +# if defined systemd_NR_renameat2 +assert_cc(__NR_renameat2 == systemd_NR_renameat2); +# endif +# else +# if defined __NR_renameat2 +# undef __NR_renameat2 +# endif +# if defined systemd_NR_renameat2 && systemd_NR_renameat2 >= 0 +# define __NR_renameat2 systemd_NR_renameat2 +# endif +# endif +#endif + +#ifndef __IGNORE_setns +# if defined(__aarch64__) +# define systemd_NR_setns 268 +# elif defined(__alpha__) +# define systemd_NR_setns 501 +# elif defined(__arc__) || defined(__tilegx__) +# define systemd_NR_setns 268 +# elif defined(__arm__) +# define systemd_NR_setns 375 +# elif defined(__i386__) +# define systemd_NR_setns 346 +# elif defined(__ia64__) +# define systemd_NR_setns 1330 +# elif defined(__loongarch64) +# define systemd_NR_setns 268 +# elif defined(__m68k__) +# define systemd_NR_setns 344 +# elif defined(_MIPS_SIM) +# if _MIPS_SIM == _MIPS_SIM_ABI32 +# define systemd_NR_setns 4344 +# elif _MIPS_SIM == _MIPS_SIM_NABI32 +# define systemd_NR_setns 6308 +# elif _MIPS_SIM == _MIPS_SIM_ABI64 +# define systemd_NR_setns 5303 +# else +# error "Unknown MIPS ABI" +# endif +# elif defined(__hppa__) +# define systemd_NR_setns 328 +# elif defined(__powerpc__) +# define systemd_NR_setns 350 +# elif defined(__riscv) +# if __riscv_xlen == 32 +# define systemd_NR_setns 268 +# elif __riscv_xlen == 64 +# define systemd_NR_setns 268 +# else +# error "Unknown RISC-V ABI" +# endif +# elif defined(__s390__) +# define systemd_NR_setns 339 +# elif defined(__sparc__) +# define systemd_NR_setns 337 +# elif defined(__x86_64__) +# if defined(__ILP32__) +# define systemd_NR_setns (308 | /* __X32_SYSCALL_BIT */ 0x40000000) +# else +# define systemd_NR_setns 308 +# endif +# elif !defined(missing_arch_template) +# warning "setns() syscall number is unknown for your architecture" +# endif + +/* may be an (invalid) negative number due to libseccomp, see PR 13319 */ +# if defined __NR_setns && __NR_setns >= 0 +# if defined systemd_NR_setns +assert_cc(__NR_setns == systemd_NR_setns); +# endif +# else +# if defined __NR_setns +# undef __NR_setns +# endif +# if defined systemd_NR_setns && systemd_NR_setns >= 0 +# define __NR_setns systemd_NR_setns +# endif +# endif +#endif + +#ifndef __IGNORE_statx +# if defined(__aarch64__) +# define systemd_NR_statx 291 +# elif defined(__alpha__) +# define systemd_NR_statx 522 +# elif defined(__arc__) || defined(__tilegx__) +# define systemd_NR_statx 291 +# elif defined(__arm__) +# define systemd_NR_statx 397 +# elif defined(__i386__) +# define systemd_NR_statx 383 +# elif defined(__ia64__) +# define systemd_NR_statx 1350 +# elif defined(__loongarch64) +# define systemd_NR_statx 291 +# elif defined(__m68k__) +# define systemd_NR_statx 379 +# elif defined(_MIPS_SIM) +# if _MIPS_SIM == _MIPS_SIM_ABI32 +# define systemd_NR_statx 4366 +# elif _MIPS_SIM == _MIPS_SIM_NABI32 +# define systemd_NR_statx 6330 +# elif _MIPS_SIM == _MIPS_SIM_ABI64 +# define systemd_NR_statx 5326 +# else +# error "Unknown MIPS ABI" +# endif +# elif defined(__hppa__) +# define systemd_NR_statx 349 +# elif defined(__powerpc__) +# define systemd_NR_statx 383 +# elif defined(__riscv) +# if __riscv_xlen == 32 +# define systemd_NR_statx 291 +# elif __riscv_xlen == 64 +# define systemd_NR_statx 291 +# else +# error "Unknown RISC-V ABI" +# endif +# elif defined(__s390__) +# define systemd_NR_statx 379 +# elif defined(__sparc__) +# define systemd_NR_statx 360 +# elif defined(__x86_64__) +# if defined(__ILP32__) +# define systemd_NR_statx (332 | /* __X32_SYSCALL_BIT */ 0x40000000) +# else +# define systemd_NR_statx 332 +# endif +# elif !defined(missing_arch_template) +# warning "statx() syscall number is unknown for your architecture" +# endif + +/* may be an (invalid) negative number due to libseccomp, see PR 13319 */ +# if defined __NR_statx && __NR_statx >= 0 +# if defined systemd_NR_statx +assert_cc(__NR_statx == systemd_NR_statx); +# endif +# else +# if defined __NR_statx +# undef __NR_statx +# endif +# if defined systemd_NR_statx && systemd_NR_statx >= 0 +# define __NR_statx systemd_NR_statx +# endif +# endif +#endif diff --git a/src/basic/missing_syscalls.py b/src/basic/missing_syscalls.py new file mode 100644 index 0000000..642d4d9 --- /dev/null +++ b/src/basic/missing_syscalls.py @@ -0,0 +1,162 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: LGPL-2.1-or-later + +import sys +import functools + +# We only generate numbers for a dozen or so syscalls +SYSCALLS = [ + 'bpf', + 'close_range', + 'copy_file_range', + 'epoll_pwait2', + 'getrandom', + 'memfd_create', + 'mount_setattr', + 'move_mount', + 'name_to_handle_at', + 'open_tree', + 'openat2', + 'pidfd_open', + 'pidfd_send_signal', + 'pkey_mprotect', + 'renameat2', + 'setns', + 'statx', +] + +def dictify(f): + def wrap(*args, **kwargs): + return dict(f(*args, **kwargs)) + return functools.update_wrapper(wrap, f) + +@dictify +def parse_syscall_table(filename): + print(f'Reading {filename}…') + for line in open(filename): + items = line.split() + if len(items) >= 2: + yield items[0], int(items[1]) + +def parse_syscall_tables(filenames): + return {filename.split('-')[-1][:-4]: parse_syscall_table(filename) + for filename in filenames} + +DEF_TEMPLATE_A = '''\ + +#ifndef __IGNORE_{syscall} +''' + +DEF_TEMPLATE_B = '''\ +# if defined(__aarch64__) +# define systemd_NR_{syscall} {nr_arm64} +# elif defined(__alpha__) +# define systemd_NR_{syscall} {nr_alpha} +# elif defined(__arc__) || defined(__tilegx__) +# define systemd_NR_{syscall} {nr_arc} +# elif defined(__arm__) +# define systemd_NR_{syscall} {nr_arm} +# elif defined(__i386__) +# define systemd_NR_{syscall} {nr_i386} +# elif defined(__ia64__) +# define systemd_NR_{syscall} {nr_ia64} +# elif defined(__loongarch64) +# define systemd_NR_{syscall} {nr_loongarch64} +# elif defined(__m68k__) +# define systemd_NR_{syscall} {nr_m68k} +# elif defined(_MIPS_SIM) +# if _MIPS_SIM == _MIPS_SIM_ABI32 +# define systemd_NR_{syscall} {nr_mipso32} +# elif _MIPS_SIM == _MIPS_SIM_NABI32 +# define systemd_NR_{syscall} {nr_mips64n32} +# elif _MIPS_SIM == _MIPS_SIM_ABI64 +# define systemd_NR_{syscall} {nr_mips64} +# else +# error "Unknown MIPS ABI" +# endif +# elif defined(__hppa__) +# define systemd_NR_{syscall} {nr_parisc} +# elif defined(__powerpc__) +# define systemd_NR_{syscall} {nr_powerpc} +# elif defined(__riscv) +# if __riscv_xlen == 32 +# define systemd_NR_{syscall} {nr_riscv32} +# elif __riscv_xlen == 64 +# define systemd_NR_{syscall} {nr_riscv64} +# else +# error "Unknown RISC-V ABI" +# endif +# elif defined(__s390__) +# define systemd_NR_{syscall} {nr_s390} +# elif defined(__sparc__) +# define systemd_NR_{syscall} {nr_sparc} +# elif defined(__x86_64__) +# if defined(__ILP32__) +# define systemd_NR_{syscall} ({nr_x86_64} | /* __X32_SYSCALL_BIT */ 0x40000000) +# else +# define systemd_NR_{syscall} {nr_x86_64} +# endif +# elif !defined(missing_arch_template) +%s +# endif +''' + +DEF_TEMPLATE_C = '''\ + +/* may be an (invalid) negative number due to libseccomp, see PR 13319 */ +# if defined __NR_{syscall} && __NR_{syscall} >= 0 +# if defined systemd_NR_{syscall} +assert_cc(__NR_{syscall} == systemd_NR_{syscall}); +# endif +# else +# if defined __NR_{syscall} +# undef __NR_{syscall} +# endif +# if defined systemd_NR_{syscall} && systemd_NR_{syscall} >= 0 +# define __NR_{syscall} systemd_NR_{syscall} +# endif +# endif +#endif''' + +DEF_TEMPLATE = (DEF_TEMPLATE_A + + DEF_TEMPLATE_B % '# warning "{syscall}() syscall number is unknown for your architecture"' + + DEF_TEMPLATE_C) + +ARCH_CHECK = '''\ +/* Note: if this code looks strange, this is because it is derived from the same + * template as the per-syscall blocks below. */ +''' + '\n'.join(line for line in DEF_TEMPLATE_B.splitlines() + if ' define ' not in line) % '''\ +# warning "Current architecture is missing from the template" +# define missing_arch_template 1''' + +def print_syscall_def(syscall, tables, out): + mappings = {f'nr_{arch}':t.get(syscall, -1) + for arch, t in tables.items()} + print(DEF_TEMPLATE.format(syscall=syscall, **mappings), + file=out) + +def print_syscall_defs(syscalls, tables, out): + print('''\ +/* SPDX-License-Identifier: LGPL-2.1-or-later + * This file is generated by src/basic/missing_syscalls.py. Do not edit! + * + * Use 'ninja -C build update-syscall-tables' to download new syscall tables, + * and 'ninja -C build update-syscall-header' to regenerate this file. + */ +#pragma once +''', + file=out) + print(ARCH_CHECK, file=out) + for syscall in syscalls: + print_syscall_def(syscall, tables, out) + +if __name__ == '__main__': + output_file = sys.argv[1] + arch_files = sys.argv[2:] + out = open(output_file, 'wt') + + tables = parse_syscall_tables(arch_files) + print_syscall_defs(SYSCALLS, tables, out) + + print(f'Wrote {output_file}') diff --git a/src/basic/missing_threads.h b/src/basic/missing_threads.h new file mode 100644 index 0000000..fb3b722 --- /dev/null +++ b/src/basic/missing_threads.h @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +/* If threads.h doesn't exist, then define our own thread_local to match C11's thread_local. */ +#if HAVE_THREADS_H +# include +#elif !(defined(thread_local)) +/* Don't break on glibc < 2.16 that doesn't define __STDC_NO_THREADS__ + * see https://gcc.gnu.org/bugzilla/show_bug.cgi?id=53769 */ +# if __STDC_VERSION__ >= 201112L && !(defined(__STDC_NO_THREADS__) || (defined(__GNU_LIBRARY__) && __GLIBC__ == 2 && __GLIBC_MINOR__ < 16)) +# define thread_local _Thread_local +# else +# define thread_local __thread +# endif +#endif diff --git a/src/basic/missing_timerfd.h b/src/basic/missing_timerfd.h new file mode 100644 index 0000000..dba3043 --- /dev/null +++ b/src/basic/missing_timerfd.h @@ -0,0 +1,8 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +#include + +#ifndef TFD_TIMER_CANCEL_ON_SET +#define TFD_TIMER_CANCEL_ON_SET (1 << 1) +#endif diff --git a/src/basic/missing_type.h b/src/basic/missing_type.h new file mode 100644 index 0000000..f623309 --- /dev/null +++ b/src/basic/missing_type.h @@ -0,0 +1,12 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +#include + +#if !HAVE_CHAR32_T +#define char32_t uint32_t +#endif + +#if !HAVE_CHAR16_T +#define char16_t uint16_t +#endif diff --git a/src/basic/missing_xfs.h b/src/basic/missing_xfs.h new file mode 100644 index 0000000..ba5fe81 --- /dev/null +++ b/src/basic/missing_xfs.h @@ -0,0 +1,42 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +/* This is currently not exported in the public kernel headers, but the libxfs library code part of xfsprogs + * defines it as public header */ + +#ifndef XFS_IOC_FSGEOMETRY +#define XFS_IOC_FSGEOMETRY _IOR ('X', 124, struct xfs_fsop_geom) + +typedef struct xfs_fsop_geom { + uint32_t blocksize; + uint32_t rtextsize; + uint32_t agblocks; + uint32_t agcount; + uint32_t logblocks; + uint32_t sectsize; + uint32_t inodesize; + uint32_t imaxpct; + uint64_t datablocks; + uint64_t rtblocks; + uint64_t rtextents; + uint64_t logstart; + unsigned char uuid[16]; + uint32_t sunit; + uint32_t swidth; + int32_t version; + uint32_t flags; + uint32_t logsectsize; + uint32_t rtsectsize; + uint32_t dirblocksize; + uint32_t logsunit; +} xfs_fsop_geom_t; +#endif + +#ifndef XFS_IOC_FSGROWFSDATA +#define XFS_IOC_FSGROWFSDATA _IOW ('X', 110, struct xfs_growfs_data) + +typedef struct xfs_growfs_data { + uint64_t newblocks; + uint32_t imaxpct; +} xfs_growfs_data_t; +#endif diff --git a/src/basic/mkdir.c b/src/basic/mkdir.c new file mode 100644 index 0000000..c8ff342 --- /dev/null +++ b/src/basic/mkdir.c @@ -0,0 +1,249 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include +#include +#include + +#include "alloc-util.h" +#include "chase-symlinks.h" +#include "fd-util.h" +#include "format-util.h" +#include "fs-util.h" +#include "macro.h" +#include "mkdir.h" +#include "path-util.h" +#include "stat-util.h" +#include "stdio-util.h" +#include "user-util.h" + +int mkdir_safe_internal( + const char *path, + mode_t mode, + uid_t uid, + gid_t gid, + MkdirFlags flags, + mkdirat_func_t _mkdirat) { + + struct stat st; + int r; + + assert(path); + assert(mode != MODE_INVALID); + assert(_mkdirat && _mkdirat != mkdirat); + + if (_mkdirat(AT_FDCWD, path, mode) >= 0) { + r = chmod_and_chown(path, mode, uid, gid); + if (r < 0) + return r; + } + + if (lstat(path, &st) < 0) + return -errno; + + if ((flags & MKDIR_FOLLOW_SYMLINK) && S_ISLNK(st.st_mode)) { + _cleanup_free_ char *p = NULL; + + r = chase_symlinks(path, NULL, CHASE_NONEXISTENT, &p, NULL); + if (r < 0) + return r; + if (r == 0) + return mkdir_safe_internal(p, mode, uid, gid, + flags & ~MKDIR_FOLLOW_SYMLINK, + _mkdirat); + + if (lstat(p, &st) < 0) + return -errno; + } + + if (flags & MKDIR_IGNORE_EXISTING) + return 0; + + if (!S_ISDIR(st.st_mode)) + return log_full_errno(flags & MKDIR_WARN_MODE ? LOG_WARNING : LOG_DEBUG, SYNTHETIC_ERRNO(ENOTDIR), + "Path \"%s\" already exists and is not a directory, refusing.", path); + + if ((st.st_mode & ~mode & 0777) != 0) + return log_full_errno(flags & MKDIR_WARN_MODE ? LOG_WARNING : LOG_DEBUG, SYNTHETIC_ERRNO(EEXIST), + "Directory \"%s\" already exists, but has mode %04o that is too permissive (%04o was requested), refusing.", + path, st.st_mode & 0777, mode); + + if ((uid != UID_INVALID && st.st_uid != uid) || + (gid != GID_INVALID && st.st_gid != gid)) { + char u[DECIMAL_STR_MAX(uid_t)] = "-", g[DECIMAL_STR_MAX(gid_t)] = "-"; + + if (uid != UID_INVALID) + xsprintf(u, UID_FMT, uid); + if (gid != UID_INVALID) + xsprintf(g, GID_FMT, gid); + return log_full_errno(flags & MKDIR_WARN_MODE ? LOG_WARNING : LOG_DEBUG, SYNTHETIC_ERRNO(EEXIST), + "Directory \"%s\" already exists, but is owned by "UID_FMT":"GID_FMT" (%s:%s was requested), refusing.", + path, st.st_uid, st.st_gid, u, g); + } + + return 0; +} + +int mkdirat_errno_wrapper(int dirfd, const char *pathname, mode_t mode) { + return RET_NERRNO(mkdirat(dirfd, pathname, mode)); +} + +int mkdir_safe(const char *path, mode_t mode, uid_t uid, gid_t gid, MkdirFlags flags) { + return mkdir_safe_internal(path, mode, uid, gid, flags, mkdirat_errno_wrapper); +} + +int mkdir_parents_internal(const char *prefix, const char *path, mode_t mode, uid_t uid, gid_t gid, MkdirFlags flags, mkdirat_func_t _mkdirat) { + const char *p, *e = NULL; + int r; + + assert(path); + assert(_mkdirat != mkdirat); + + if (prefix) { + p = path_startswith_full(path, prefix, /* accept_dot_dot= */ false); + if (!p) + return -ENOTDIR; + } else + p = path; + + if (isempty(p)) + return 0; + + if (!path_is_safe(p)) + return -ENOTDIR; + + /* return immediately if directory exists */ + r = path_find_last_component(p, /* accept_dot_dot= */ false, &e, NULL); + if (r <= 0) /* r == 0 means path is equivalent to prefix. */ + return r; + if (e == p) + return 0; + + assert(e > p); + assert(*e == '/'); + + /* drop the last component */ + path = strndupa_safe(path, e - path); + r = is_dir(path, true); + if (r > 0) + return 0; + if (r == 0) + return -ENOTDIR; + + /* create every parent directory in the path, except the last component */ + for (p = path;;) { + char *s; + int n; + + n = path_find_first_component(&p, /* accept_dot_dot= */ false, (const char **) &s); + if (n <= 0) + return n; + + assert(p); + assert(s >= path); + assert(IN_SET(s[n], '/', '\0')); + + s[n] = '\0'; + + if (!prefix || !path_startswith_full(prefix, path, /* accept_dot_dot= */ false)) { + r = mkdir_safe_internal(path, mode, uid, gid, flags | MKDIR_IGNORE_EXISTING, _mkdirat); + if (r < 0 && r != -EEXIST) + return r; + } + + s[n] = *p == '\0' ? '\0' : '/'; + } +} + +int mkdir_parents(const char *path, mode_t mode) { + return mkdir_parents_internal(NULL, path, mode, UID_INVALID, UID_INVALID, 0, mkdirat_errno_wrapper); +} + +int mkdir_parents_safe(const char *prefix, const char *path, mode_t mode, uid_t uid, gid_t gid, MkdirFlags flags) { + return mkdir_parents_internal(prefix, path, mode, uid, gid, flags, mkdirat_errno_wrapper); +} + +int mkdir_p_internal(const char *prefix, const char *path, mode_t mode, uid_t uid, gid_t gid, MkdirFlags flags, mkdirat_func_t _mkdirat) { + int r; + + /* Like mkdir -p */ + + assert(_mkdirat != mkdirat); + + r = mkdir_parents_internal(prefix, path, mode, uid, gid, flags | MKDIR_FOLLOW_SYMLINK, _mkdirat); + if (r < 0) + return r; + + if (!uid_is_valid(uid) && !gid_is_valid(gid) && flags == 0) { + r = _mkdirat(AT_FDCWD, path, mode); + if (r < 0 && (r != -EEXIST || is_dir(path, true) <= 0)) + return r; + } else { + r = mkdir_safe_internal(path, mode, uid, gid, flags, _mkdirat); + if (r < 0 && r != -EEXIST) + return r; + } + + return 0; +} + +int mkdir_p(const char *path, mode_t mode) { + return mkdir_p_internal(NULL, path, mode, UID_INVALID, UID_INVALID, 0, mkdirat_errno_wrapper); +} + +int mkdir_p_safe(const char *prefix, const char *path, mode_t mode, uid_t uid, gid_t gid, MkdirFlags flags) { + return mkdir_p_internal(prefix, path, mode, uid, gid, flags, mkdirat_errno_wrapper); +} + +int mkdir_p_root(const char *root, const char *p, uid_t uid, gid_t gid, mode_t m) { + _cleanup_free_ char *pp = NULL, *bn = NULL; + _cleanup_close_ int dfd = -1; + int r; + + r = path_extract_directory(p, &pp); + if (r == -EDESTADDRREQ) { + /* only fname is passed, no prefix to operate on */ + dfd = open(".", O_RDONLY|O_CLOEXEC|O_DIRECTORY); + if (dfd < 0) + return -errno; + } else if (r == -EADDRNOTAVAIL) + /* only root dir or "." was passed, i.e. there is no parent to extract, in that case there's nothing to do. */ + return 0; + else if (r < 0) + return r; + else { + /* Extracting the parent dir worked, hence we aren't top-level? Recurse up first. */ + r = mkdir_p_root(root, pp, uid, gid, m); + if (r < 0) + return r; + + dfd = chase_symlinks_and_open(pp, root, CHASE_PREFIX_ROOT, O_RDONLY|O_CLOEXEC|O_DIRECTORY, NULL); + if (dfd < 0) + return dfd; + } + + r = path_extract_filename(p, &bn); + if (r == -EADDRNOTAVAIL) /* Already top-level */ + return 0; + if (r < 0) + return r; + + if (mkdirat(dfd, bn, m) < 0) { + if (errno == EEXIST) + return 0; + + return -errno; + } + + if (uid_is_valid(uid) || gid_is_valid(gid)) { + _cleanup_close_ int nfd = -1; + + nfd = openat(dfd, bn, O_RDONLY|O_CLOEXEC|O_DIRECTORY|O_NOFOLLOW); + if (nfd < 0) + return -errno; + + if (fchown(nfd, uid, gid) < 0) + return -errno; + } + + return 1; +} diff --git a/src/basic/mkdir.h b/src/basic/mkdir.h new file mode 100644 index 0000000..c0c0ea6 --- /dev/null +++ b/src/basic/mkdir.h @@ -0,0 +1,25 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +#include + +typedef enum MkdirFlags { + MKDIR_FOLLOW_SYMLINK = 1 << 0, + MKDIR_IGNORE_EXISTING = 1 << 1, /* Quietly accept a preexisting directory (or file) */ + MKDIR_WARN_MODE = 1 << 2, /* Log at LOG_WARNING when mode doesn't match */ +} MkdirFlags; + +int mkdirat_errno_wrapper(int dirfd, const char *pathname, mode_t mode); + +int mkdir_safe(const char *path, mode_t mode, uid_t uid, gid_t gid, MkdirFlags flags); +int mkdir_parents(const char *path, mode_t mode); +int mkdir_parents_safe(const char *prefix, const char *path, mode_t mode, uid_t uid, gid_t gid, MkdirFlags flags); +int mkdir_p(const char *path, mode_t mode); +int mkdir_p_safe(const char *prefix, const char *path, mode_t mode, uid_t uid, gid_t gid, MkdirFlags flags); +int mkdir_p_root(const char *root, const char *p, uid_t uid, gid_t gid, mode_t m); + +/* The following are used to implement the mkdir_xyz_label() calls, don't use otherwise. */ +typedef int (*mkdirat_func_t)(int dir_fd, const char *pathname, mode_t mode); +int mkdir_safe_internal(const char *path, mode_t mode, uid_t uid, gid_t gid, MkdirFlags flags, mkdirat_func_t _mkdir); +int mkdir_parents_internal(const char *prefix, const char *path, mode_t mode, uid_t uid, gid_t gid, MkdirFlags flags, mkdirat_func_t _mkdir); +int mkdir_p_internal(const char *prefix, const char *path, mode_t mode, uid_t uid, gid_t gid, MkdirFlags flags, mkdirat_func_t _mkdir); diff --git a/src/basic/mountpoint-util.c b/src/basic/mountpoint-util.c new file mode 100644 index 0000000..3c95179 --- /dev/null +++ b/src/basic/mountpoint-util.c @@ -0,0 +1,558 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include +#include +#include + +#include "alloc-util.h" +#include "chase-symlinks.h" +#include "fd-util.h" +#include "fileio.h" +#include "filesystems.h" +#include "fs-util.h" +#include "missing_stat.h" +#include "missing_syscall.h" +#include "mkdir.h" +#include "mountpoint-util.h" +#include "nulstr-util.h" +#include "parse-util.h" +#include "path-util.h" +#include "stat-util.h" +#include "stdio-util.h" +#include "strv.h" +#include "user-util.h" + +/* This is the original MAX_HANDLE_SZ definition from the kernel, when the API was introduced. We use that in place of + * any more currently defined value to future-proof things: if the size is increased in the API headers, and our code + * is recompiled then it would cease working on old kernels, as those refuse any sizes larger than this value with + * EINVAL right-away. Hence, let's disconnect ourselves from any such API changes, and stick to the original definition + * from when it was introduced. We use it as a start value only anyway (see below), and hence should be able to deal + * with large file handles anyway. */ +#define ORIGINAL_MAX_HANDLE_SZ 128 + +int name_to_handle_at_loop( + int fd, + const char *path, + struct file_handle **ret_handle, + int *ret_mnt_id, + int flags) { + + _cleanup_free_ struct file_handle *h = NULL; + size_t n = ORIGINAL_MAX_HANDLE_SZ; + + assert((flags & ~(AT_SYMLINK_FOLLOW|AT_EMPTY_PATH)) == 0); + + /* We need to invoke name_to_handle_at() in a loop, given that it might return EOVERFLOW when the specified + * buffer is too small. Note that in contrast to what the docs might suggest, MAX_HANDLE_SZ is only good as a + * start value, it is not an upper bound on the buffer size required. + * + * This improves on raw name_to_handle_at() also in one other regard: ret_handle and ret_mnt_id can be passed + * as NULL if there's no interest in either. */ + + for (;;) { + int mnt_id = -1; + + h = malloc0(offsetof(struct file_handle, f_handle) + n); + if (!h) + return -ENOMEM; + + h->handle_bytes = n; + + if (name_to_handle_at(fd, path, h, &mnt_id, flags) >= 0) { + + if (ret_handle) + *ret_handle = TAKE_PTR(h); + + if (ret_mnt_id) + *ret_mnt_id = mnt_id; + + return 0; + } + if (errno != EOVERFLOW) + return -errno; + + if (!ret_handle && ret_mnt_id && mnt_id >= 0) { + + /* As it appears, name_to_handle_at() fills in mnt_id even when it returns EOVERFLOW when the + * buffer is too small, but that's undocumented. Hence, let's make use of this if it appears to + * be filled in, and the caller was interested in only the mount ID an nothing else. */ + + *ret_mnt_id = mnt_id; + return 0; + } + + /* If name_to_handle_at() didn't increase the byte size, then this EOVERFLOW is caused by something + * else (apparently EOVERFLOW is returned for untriggered nfs4 mounts sometimes), not by the too small + * buffer. In that case propagate EOVERFLOW */ + if (h->handle_bytes <= n) + return -EOVERFLOW; + + /* The buffer was too small. Size the new buffer by what name_to_handle_at() returned. */ + n = h->handle_bytes; + if (offsetof(struct file_handle, f_handle) + n < n) /* check for addition overflow */ + return -EOVERFLOW; + + h = mfree(h); + } +} + +static int fd_fdinfo_mnt_id(int fd, const char *filename, int flags, int *ret_mnt_id) { + char path[STRLEN("/proc/self/fdinfo/") + DECIMAL_STR_MAX(int)]; + _cleanup_free_ char *fdinfo = NULL; + _cleanup_close_ int subfd = -1; + char *p; + int r; + + assert(ret_mnt_id); + assert((flags & ~(AT_SYMLINK_FOLLOW|AT_EMPTY_PATH)) == 0); + + if ((flags & AT_EMPTY_PATH) && isempty(filename)) + xsprintf(path, "/proc/self/fdinfo/%i", fd); + else { + subfd = openat(fd, filename, O_CLOEXEC|O_PATH|(flags & AT_SYMLINK_FOLLOW ? 0 : O_NOFOLLOW)); + if (subfd < 0) + return -errno; + + xsprintf(path, "/proc/self/fdinfo/%i", subfd); + } + + r = read_full_virtual_file(path, &fdinfo, NULL); + if (r == -ENOENT) /* The fdinfo directory is a relatively new addition */ + return proc_mounted() > 0 ? -EOPNOTSUPP : -ENOSYS; + if (r < 0) + return r; + + p = startswith(fdinfo, "mnt_id:"); + if (!p) { + p = strstr(fdinfo, "\nmnt_id:"); + if (!p) /* The mnt_id field is a relatively new addition */ + return -EOPNOTSUPP; + + p += 8; + } + + p += strspn(p, WHITESPACE); + p[strcspn(p, WHITESPACE)] = 0; + + return safe_atoi(p, ret_mnt_id); +} + +static bool filename_possibly_with_slash_suffix(const char *s) { + const char *slash, *copied; + + /* Checks whether the specified string is either file name, or a filename with a suffix of + * slashes. But nothing else. + * + * this is OK: foo, bar, foo/, bar/, foo//, bar/// + * this is not OK: "", "/", "/foo", "foo/bar", ".", ".." … */ + + slash = strchr(s, '/'); + if (!slash) + return filename_is_valid(s); + + if (slash - s > PATH_MAX) /* We want to allocate on the stack below, hence do a size check first */ + return false; + + if (slash[strspn(slash, "/")] != 0) /* Check that the suffix consist only of one or more slashes */ + return false; + + copied = strndupa_safe(s, slash - s); + return filename_is_valid(copied); +} + +static bool is_name_to_handle_at_fatal_error(int err) { + /* name_to_handle_at() can return "acceptable" errors that are due to the context. For + * example the kernel does not support name_to_handle_at() at all (ENOSYS), or the syscall + * was blocked (EACCES/EPERM; maybe through seccomp, because we are running inside of a + * container), or the mount point is not triggered yet (EOVERFLOW, think nfs4), or some + * general name_to_handle_at() flakiness (EINVAL). However other errors are not supposed to + * happen and therefore are considered fatal ones. */ + + assert(err < 0); + + return !IN_SET(err, -EOPNOTSUPP, -ENOSYS, -EACCES, -EPERM, -EOVERFLOW, -EINVAL); +} + +int fd_is_mount_point(int fd, const char *filename, int flags) { + _cleanup_free_ struct file_handle *h = NULL, *h_parent = NULL; + int mount_id = -1, mount_id_parent = -1; + bool nosupp = false, check_st_dev = true; + STRUCT_STATX_DEFINE(sx); + struct stat a, b; + int r; + + assert(fd >= 0); + assert((flags & ~AT_SYMLINK_FOLLOW) == 0); + + if (!filename) { + /* If the file name is specified as NULL we'll see if the specified 'fd' is a mount + * point. That's only supported if the kernel supports statx(), or if the inode specified via + * 'fd' refers to a directory. Otherwise, we'll have to fail (ENOTDIR), because we have no + * kernel API to query the information we need. */ + flags |= AT_EMPTY_PATH; + filename = ""; + } else if (!filename_possibly_with_slash_suffix(filename)) + /* Insist that the specified filename is actually a filename, and not a path, i.e. some inode further + * up or down the tree then immediately below the specified directory fd. */ + return -EINVAL; + + /* First we will try statx()' STATX_ATTR_MOUNT_ROOT attribute, which is our ideal API, available + * since kernel 5.8. + * + * If that fails, our second try is the name_to_handle_at() syscall, which tells us the mount id and + * an opaque file "handle". It is not supported everywhere though (kernel compile-time option, not + * all file systems are hooked up). If it works the mount id is usually good enough to tell us + * whether something is a mount point. + * + * If that didn't work we will try to read the mount id from /proc/self/fdinfo/. This is almost + * as good as name_to_handle_at(), however, does not return the opaque file handle. The opaque file + * handle is pretty useful to detect the root directory, which we should always consider a mount + * point. Hence we use this only as fallback. Exporting the mnt_id in fdinfo is a pretty recent + * kernel addition. + * + * As last fallback we do traditional fstat() based st_dev comparisons. This is how things were + * traditionally done, but unionfs breaks this since it exposes file systems with a variety of st_dev + * reported. Also, btrfs subvolumes have different st_dev, even though they aren't real mounts of + * their own. */ + + if (statx(fd, filename, (FLAGS_SET(flags, AT_SYMLINK_FOLLOW) ? 0 : AT_SYMLINK_NOFOLLOW) | + (flags & AT_EMPTY_PATH) | + AT_NO_AUTOMOUNT, STATX_TYPE, &sx) < 0) { + if (!ERRNO_IS_NOT_SUPPORTED(errno) && !ERRNO_IS_PRIVILEGE(errno)) + return -errno; + + /* If statx() is not available or forbidden, fall back to name_to_handle_at() below */ + } else if (FLAGS_SET(sx.stx_attributes_mask, STATX_ATTR_MOUNT_ROOT)) /* yay! */ + return FLAGS_SET(sx.stx_attributes, STATX_ATTR_MOUNT_ROOT); + else if (FLAGS_SET(sx.stx_mask, STATX_TYPE) && S_ISLNK(sx.stx_mode)) + return false; /* symlinks are never mount points */ + + r = name_to_handle_at_loop(fd, filename, &h, &mount_id, flags); + if (r < 0) { + if (is_name_to_handle_at_fatal_error(r)) + return r; + if (r != -EOPNOTSUPP) + goto fallback_fdinfo; + + /* This kernel or file system does not support name_to_handle_at(), hence let's see + * if the upper fs supports it (in which case it is a mount point), otherwise fall + * back to the traditional stat() logic */ + nosupp = true; + } + + if (isempty(filename)) + r = name_to_handle_at_loop(fd, "..", &h_parent, &mount_id_parent, 0); /* can't work for non-directories 😢 */ + else + r = name_to_handle_at_loop(fd, "", &h_parent, &mount_id_parent, AT_EMPTY_PATH); + if (r < 0) { + if (is_name_to_handle_at_fatal_error(r)) + return r; + if (r != -EOPNOTSUPP) + goto fallback_fdinfo; + if (nosupp) + /* Both the parent and the directory can't do name_to_handle_at() */ + goto fallback_fdinfo; + + /* The parent can't do name_to_handle_at() but the directory we are + * interested in can? If so, it must be a mount point. */ + return 1; + } + + /* The parent can do name_to_handle_at() but the directory we are interested in can't? If + * so, it must be a mount point. */ + if (nosupp) + return 1; + + /* If the file handle for the directory we are interested in and its parent are identical, + * we assume this is the root directory, which is a mount point. */ + + if (h->handle_bytes == h_parent->handle_bytes && + h->handle_type == h_parent->handle_type && + memcmp(h->f_handle, h_parent->f_handle, h->handle_bytes) == 0) + return 1; + + return mount_id != mount_id_parent; + +fallback_fdinfo: + r = fd_fdinfo_mnt_id(fd, filename, flags, &mount_id); + if (IN_SET(r, -EOPNOTSUPP, -EACCES, -EPERM, -ENOSYS)) + goto fallback_fstat; + if (r < 0) + return r; + + if (isempty(filename)) + r = fd_fdinfo_mnt_id(fd, "..", 0, &mount_id_parent); /* can't work for non-directories 😢 */ + else + r = fd_fdinfo_mnt_id(fd, "", AT_EMPTY_PATH, &mount_id_parent); + if (r < 0) + return r; + + if (mount_id != mount_id_parent) + return 1; + + /* Hmm, so, the mount ids are the same. This leaves one special case though for the root file + * system. For that, let's see if the parent directory has the same inode as we are interested + * in. Hence, let's also do fstat() checks now, too, but avoid the st_dev comparisons, since they + * aren't that useful on unionfs mounts. */ + check_st_dev = false; + +fallback_fstat: + /* yay for fstatat() taking a different set of flags than the other _at() above */ + if (flags & AT_SYMLINK_FOLLOW) + flags &= ~AT_SYMLINK_FOLLOW; + else + flags |= AT_SYMLINK_NOFOLLOW; + if (fstatat(fd, filename, &a, flags) < 0) + return -errno; + if (S_ISLNK(a.st_mode)) /* Symlinks are never mount points */ + return false; + + if (isempty(filename)) + r = fstatat(fd, "..", &b, 0); + else + r = fstatat(fd, "", &b, AT_EMPTY_PATH); + if (r < 0) + return -errno; + + /* A directory with same device and inode as its parent? Must be the root directory */ + if (stat_inode_same(&a, &b)) + return 1; + + return check_st_dev && (a.st_dev != b.st_dev); +} + +/* flags can be AT_SYMLINK_FOLLOW or 0 */ +int path_is_mount_point(const char *t, const char *root, int flags) { + _cleanup_free_ char *canonical = NULL; + _cleanup_close_ int fd = -1; + int r; + + assert(t); + assert((flags & ~AT_SYMLINK_FOLLOW) == 0); + + if (path_equal(t, "/")) + return 1; + + /* we need to resolve symlinks manually, we can't just rely on + * fd_is_mount_point() to do that for us; if we have a structure like + * /bin -> /usr/bin/ and /usr is a mount point, then the parent that we + * look at needs to be /usr, not /. */ + if (flags & AT_SYMLINK_FOLLOW) { + r = chase_symlinks(t, root, CHASE_TRAIL_SLASH, &canonical, NULL); + if (r < 0) + return r; + + t = canonical; + } + + fd = open_parent(t, O_PATH|O_CLOEXEC, 0); + if (fd < 0) + return fd; + + return fd_is_mount_point(fd, last_path_component(t), flags); +} + +int path_get_mnt_id(const char *path, int *ret) { + STRUCT_NEW_STATX_DEFINE(buf); + int r; + + if (statx(AT_FDCWD, path, AT_SYMLINK_NOFOLLOW|AT_NO_AUTOMOUNT, STATX_MNT_ID, &buf.sx) < 0) { + if (!ERRNO_IS_NOT_SUPPORTED(errno) && !ERRNO_IS_PRIVILEGE(errno)) + return -errno; + + /* Fall back to name_to_handle_at() and then fdinfo if statx is not supported or we lack + * privileges */ + + } else if (FLAGS_SET(buf.nsx.stx_mask, STATX_MNT_ID)) { + *ret = buf.nsx.stx_mnt_id; + return 0; + } + + r = name_to_handle_at_loop(AT_FDCWD, path, NULL, ret, 0); + if (r == 0 || is_name_to_handle_at_fatal_error(r)) + return r; + + return fd_fdinfo_mnt_id(AT_FDCWD, path, 0, ret); +} + +bool fstype_is_network(const char *fstype) { + const char *x; + + x = startswith(fstype, "fuse."); + if (x) + fstype = x; + + if (nulstr_contains(filesystem_sets[FILESYSTEM_SET_NETWORK].value, fstype)) + return true; + + /* Filesystems not present in the internal database */ + return STR_IN_SET(fstype, + "davfs", + "glusterfs", + "lustre", + "sshfs"); +} + +bool fstype_needs_quota(const char *fstype) { + /* 1. quotacheck needs to be run for some filesystems after they are mounted + * if the filesystem was not unmounted cleanly. + * 2. You may need to run quotaon to enable quota usage tracking and/or + * enforcement. + * ext2 - needs 1) and 2) + * ext3 - needs 2) if configured using usrjquota/grpjquota mount options + * ext4 - needs 1) if created without journal, needs 2) if created without QUOTA + * filesystem feature + * reiserfs - needs 2). + * jfs - needs 2) + * f2fs - needs 2) if configured using usrjquota/grpjquota/prjjquota mount options + * xfs - nothing needed + * gfs2 - nothing needed + * ocfs2 - nothing needed + * btrfs - nothing needed + * for reference see filesystem and quota manpages */ + return STR_IN_SET(fstype, + "ext2", + "ext3", + "ext4", + "reiserfs", + "jfs", + "f2fs"); +} + +bool fstype_is_api_vfs(const char *fstype) { + const FilesystemSet *fs; + + FOREACH_POINTER(fs, + filesystem_sets + FILESYSTEM_SET_BASIC_API, + filesystem_sets + FILESYSTEM_SET_AUXILIARY_API, + filesystem_sets + FILESYSTEM_SET_PRIVILEGED_API, + filesystem_sets + FILESYSTEM_SET_TEMPORARY) + if (nulstr_contains(fs->value, fstype)) + return true; + + /* Filesystems not present in the internal database */ + return STR_IN_SET(fstype, + "autofs", + "cpuset", + "devtmpfs"); +} + +bool fstype_is_blockdev_backed(const char *fstype) { + const char *x; + + x = startswith(fstype, "fuse."); + if (x) + fstype = x; + + return !streq(fstype, "9p") && !fstype_is_network(fstype) && !fstype_is_api_vfs(fstype); +} + +bool fstype_is_ro(const char *fstype) { + /* All Linux file systems that are necessarily read-only */ + return STR_IN_SET(fstype, + "DM_verity_hash", + "cramfs", + "erofs", + "iso9660", + "squashfs"); +} + +bool fstype_can_discard(const char *fstype) { + return STR_IN_SET(fstype, + "btrfs", + "f2fs", + "ext4", + "vfat", + "xfs"); +} + +bool fstype_can_uid_gid(const char *fstype) { + + /* All file systems that have a uid=/gid= mount option that fixates the owners of all files and directories, + * current and future. */ + + return STR_IN_SET(fstype, + "adfs", + "exfat", + "fat", + "hfs", + "hpfs", + "iso9660", + "msdos", + "ntfs", + "vfat"); +} + +int dev_is_devtmpfs(void) { + _cleanup_fclose_ FILE *proc_self_mountinfo = NULL; + int mount_id, r; + char *e; + + r = path_get_mnt_id("/dev", &mount_id); + if (r < 0) + return r; + + r = fopen_unlocked("/proc/self/mountinfo", "re", &proc_self_mountinfo); + if (r == -ENOENT) + return proc_mounted() > 0 ? -ENOENT : -ENOSYS; + if (r < 0) + return r; + + for (;;) { + _cleanup_free_ char *line = NULL; + int mid; + + r = read_line(proc_self_mountinfo, LONG_LINE_MAX, &line); + if (r < 0) + return r; + if (r == 0) + break; + + if (sscanf(line, "%i", &mid) != 1) + continue; + + if (mid != mount_id) + continue; + + e = strstrafter(line, " - "); + if (!e) + continue; + + /* accept any name that starts with the currently expected type */ + if (startswith(e, "devtmpfs")) + return true; + } + + return false; +} + +const char *mount_propagation_flags_to_string(unsigned long flags) { + + switch (flags & (MS_SHARED|MS_SLAVE|MS_PRIVATE)) { + case 0: + return ""; + case MS_SHARED: + return "shared"; + case MS_SLAVE: + return "slave"; + case MS_PRIVATE: + return "private"; + } + + return NULL; +} + +int mount_propagation_flags_from_string(const char *name, unsigned long *ret) { + + if (isempty(name)) + *ret = 0; + else if (streq(name, "shared")) + *ret = MS_SHARED; + else if (streq(name, "slave")) + *ret = MS_SLAVE; + else if (streq(name, "private")) + *ret = MS_PRIVATE; + else + return -EINVAL; + return 0; +} diff --git a/src/basic/mountpoint-util.h b/src/basic/mountpoint-util.h new file mode 100644 index 0000000..e2b493a --- /dev/null +++ b/src/basic/mountpoint-util.h @@ -0,0 +1,26 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +#include +#include +#include + +int name_to_handle_at_loop(int fd, const char *path, struct file_handle **ret_handle, int *ret_mnt_id, int flags); + +int path_get_mnt_id(const char *path, int *ret); + +int fd_is_mount_point(int fd, const char *filename, int flags); +int path_is_mount_point(const char *path, const char *root, int flags); + +bool fstype_is_network(const char *fstype); +bool fstype_needs_quota(const char *fstype); +bool fstype_is_api_vfs(const char *fstype); +bool fstype_is_blockdev_backed(const char *fstype); +bool fstype_is_ro(const char *fsype); +bool fstype_can_discard(const char *fstype); +bool fstype_can_uid_gid(const char *fstype); + +int dev_is_devtmpfs(void); + +const char *mount_propagation_flags_to_string(unsigned long flags); +int mount_propagation_flags_from_string(const char *name, unsigned long *ret); diff --git a/src/basic/namespace-util.c b/src/basic/namespace-util.c new file mode 100644 index 0000000..a87a875 --- /dev/null +++ b/src/basic/namespace-util.c @@ -0,0 +1,262 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include +#include +#include + +#include "errno-util.h" +#include "fd-util.h" +#include "fileio.h" +#include "missing_fs.h" +#include "missing_magic.h" +#include "missing_sched.h" +#include "namespace-util.h" +#include "process-util.h" +#include "stat-util.h" +#include "stdio-util.h" +#include "user-util.h" + +const struct namespace_info namespace_info[] = { + [NAMESPACE_CGROUP] = { "cgroup", "ns/cgroup", CLONE_NEWCGROUP, }, + [NAMESPACE_IPC] = { "ipc", "ns/ipc", CLONE_NEWIPC, }, + [NAMESPACE_NET] = { "net", "ns/net", CLONE_NEWNET, }, + /* So, the mount namespace flag is called CLONE_NEWNS for historical + * reasons. Let's expose it here under a more explanatory name: "mnt". + * This is in-line with how the kernel exposes namespaces in /proc/$PID/ns. */ + [NAMESPACE_MOUNT] = { "mnt", "ns/mnt", CLONE_NEWNS, }, + [NAMESPACE_PID] = { "pid", "ns/pid", CLONE_NEWPID, }, + [NAMESPACE_USER] = { "user", "ns/user", CLONE_NEWUSER, }, + [NAMESPACE_UTS] = { "uts", "ns/uts", CLONE_NEWUTS, }, + [NAMESPACE_TIME] = { "time", "ns/time", CLONE_NEWTIME, }, + { /* Allow callers to iterate over the array without using _NAMESPACE_TYPE_MAX. */ }, +}; + +#define pid_namespace_path(pid, type) procfs_file_alloca(pid, namespace_info[type].proc_path) + +int namespace_open(pid_t pid, int *pidns_fd, int *mntns_fd, int *netns_fd, int *userns_fd, int *root_fd) { + _cleanup_close_ int pidnsfd = -1, mntnsfd = -1, netnsfd = -1, usernsfd = -1; + int rfd = -1; + + assert(pid >= 0); + + if (mntns_fd) { + const char *mntns; + + mntns = pid_namespace_path(pid, NAMESPACE_MOUNT); + mntnsfd = open(mntns, O_RDONLY|O_NOCTTY|O_CLOEXEC); + if (mntnsfd < 0) + return -errno; + } + + if (pidns_fd) { + const char *pidns; + + pidns = pid_namespace_path(pid, NAMESPACE_PID); + pidnsfd = open(pidns, O_RDONLY|O_NOCTTY|O_CLOEXEC); + if (pidnsfd < 0) + return -errno; + } + + if (netns_fd) { + const char *netns; + + netns = pid_namespace_path(pid, NAMESPACE_NET); + netnsfd = open(netns, O_RDONLY|O_NOCTTY|O_CLOEXEC); + if (netnsfd < 0) + return -errno; + } + + if (userns_fd) { + const char *userns; + + userns = pid_namespace_path(pid, NAMESPACE_USER); + usernsfd = open(userns, O_RDONLY|O_NOCTTY|O_CLOEXEC); + if (usernsfd < 0 && errno != ENOENT) + return -errno; + } + + if (root_fd) { + const char *root; + + root = procfs_file_alloca(pid, "root"); + rfd = open(root, O_RDONLY|O_NOCTTY|O_CLOEXEC|O_DIRECTORY); + if (rfd < 0) + return -errno; + } + + if (pidns_fd) + *pidns_fd = TAKE_FD(pidnsfd); + + if (mntns_fd) + *mntns_fd = TAKE_FD(mntnsfd); + + if (netns_fd) + *netns_fd = TAKE_FD(netnsfd); + + if (userns_fd) + *userns_fd = TAKE_FD(usernsfd); + + if (root_fd) + *root_fd = TAKE_FD(rfd); + + return 0; +} + +int namespace_enter(int pidns_fd, int mntns_fd, int netns_fd, int userns_fd, int root_fd) { + int r; + + if (userns_fd >= 0) { + /* Can't setns to your own userns, since then you could escalate from non-root to root in + * your own namespace, so check if namespaces are equal before attempting to enter. */ + + r = files_same(FORMAT_PROC_FD_PATH(userns_fd), "/proc/self/ns/user", 0); + if (r < 0) + return r; + if (r) + userns_fd = -1; + } + + if (pidns_fd >= 0) + if (setns(pidns_fd, CLONE_NEWPID) < 0) + return -errno; + + if (mntns_fd >= 0) + if (setns(mntns_fd, CLONE_NEWNS) < 0) + return -errno; + + if (netns_fd >= 0) + if (setns(netns_fd, CLONE_NEWNET) < 0) + return -errno; + + if (userns_fd >= 0) + if (setns(userns_fd, CLONE_NEWUSER) < 0) + return -errno; + + if (root_fd >= 0) { + if (fchdir(root_fd) < 0) + return -errno; + + if (chroot(".") < 0) + return -errno; + } + + return reset_uid_gid(); +} + +int fd_is_ns(int fd, unsigned long nsflag) { + struct statfs s; + int r; + + /* Checks whether the specified file descriptor refers to a namespace created by specifying nsflag in clone(). + * On old kernels there's no nice way to detect that, hence on those we'll return a recognizable error (EUCLEAN), + * so that callers can handle this somewhat nicely. + * + * This function returns > 0 if the fd definitely refers to a network namespace, 0 if it definitely does not + * refer to a network namespace, -EUCLEAN if we can't determine, and other negative error codes on error. */ + + if (fstatfs(fd, &s) < 0) + return -errno; + + if (!is_fs_type(&s, NSFS_MAGIC)) { + /* On really old kernels, there was no "nsfs", and network namespace sockets belonged to procfs + * instead. Handle that in a somewhat smart way. */ + + if (is_fs_type(&s, PROC_SUPER_MAGIC)) { + struct statfs t; + + /* OK, so it is procfs. Let's see if our own network namespace is procfs, too. If so, then the + * passed fd might refer to a network namespace, but we can't know for sure. In that case, + * return a recognizable error. */ + + if (statfs("/proc/self/ns/net", &t) < 0) + return -errno; + + if (s.f_type == t.f_type) + return -EUCLEAN; /* It's possible, we simply don't know */ + } + + return 0; /* No! */ + } + + r = ioctl(fd, NS_GET_NSTYPE); + if (r < 0) { + if (errno == ENOTTY) /* Old kernels didn't know this ioctl, let's also return a recognizable error in that case */ + return -EUCLEAN; + + return -errno; + } + + return (unsigned long) r == nsflag; +} + +int detach_mount_namespace(void) { + + /* Detaches the mount namespace, disabling propagation from our namespace to the host */ + + if (unshare(CLONE_NEWNS) < 0) + return -errno; + + return RET_NERRNO(mount(NULL, "/", NULL, MS_SLAVE | MS_REC, NULL)); +} + +int userns_acquire(const char *uid_map, const char *gid_map) { + char path[STRLEN("/proc//uid_map") + DECIMAL_STR_MAX(pid_t) + 1]; + _cleanup_(sigkill_waitp) pid_t pid = 0; + _cleanup_close_ int userns_fd = -1; + int r; + + assert(uid_map); + assert(gid_map); + + /* Forks off a process in a new userns, configures the specified uidmap/gidmap, acquires an fd to it, + * and then kills the process again. This way we have a userns fd that is not bound to any + * process. We can use that for file system mounts and similar. */ + + r = safe_fork("(sd-mkuserns)", FORK_CLOSE_ALL_FDS|FORK_DEATHSIG|FORK_NEW_USERNS, &pid); + if (r < 0) + return r; + if (r == 0) + /* Child. We do nothing here, just freeze until somebody kills us. */ + freeze(); + + xsprintf(path, "/proc/" PID_FMT "/uid_map", pid); + r = write_string_file(path, uid_map, WRITE_STRING_FILE_DISABLE_BUFFER); + if (r < 0) + return log_error_errno(r, "Failed to write UID map: %m"); + + xsprintf(path, "/proc/" PID_FMT "/gid_map", pid); + r = write_string_file(path, gid_map, WRITE_STRING_FILE_DISABLE_BUFFER); + if (r < 0) + return log_error_errno(r, "Failed to write GID map: %m"); + + r = namespace_open(pid, NULL, NULL, NULL, &userns_fd, NULL); + if (r < 0) + return log_error_errno(r, "Failed to open userns fd: %m"); + + return TAKE_FD(userns_fd); + +} + +int in_same_namespace(pid_t pid1, pid_t pid2, NamespaceType type) { + const char *ns_path; + struct stat ns_st1, ns_st2; + + if (pid1 == 0) + pid1 = getpid_cached(); + + if (pid2 == 0) + pid2 = getpid_cached(); + + if (pid1 == pid2) + return 1; + + ns_path = pid_namespace_path(pid1, type); + if (stat(ns_path, &ns_st1) < 0) + return -errno; + + ns_path = pid_namespace_path(pid2, type); + if (stat(ns_path, &ns_st2) < 0) + return -errno; + + return stat_inode_same(&ns_st1, &ns_st2); +} diff --git a/src/basic/namespace-util.h b/src/basic/namespace-util.h new file mode 100644 index 0000000..be5b228 --- /dev/null +++ b/src/basic/namespace-util.h @@ -0,0 +1,48 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +#include + +typedef enum NamespaceType { + NAMESPACE_CGROUP, + NAMESPACE_IPC, + NAMESPACE_NET, + NAMESPACE_MOUNT, + NAMESPACE_PID, + NAMESPACE_USER, + NAMESPACE_UTS, + NAMESPACE_TIME, + _NAMESPACE_TYPE_MAX, + _NAMESPACE_TYPE_INVALID = -EINVAL, +} NamespaceType; + +extern const struct namespace_info { + const char *proc_name; + const char *proc_path; + unsigned int clone_flag; +} namespace_info[_NAMESPACE_TYPE_MAX + 1]; + +int namespace_open(pid_t pid, int *pidns_fd, int *mntns_fd, int *netns_fd, int *userns_fd, int *root_fd); +int namespace_enter(int pidns_fd, int mntns_fd, int netns_fd, int userns_fd, int root_fd); + +int fd_is_ns(int fd, unsigned long nsflag); + +int detach_mount_namespace(void); + +static inline bool userns_shift_range_valid(uid_t shift, uid_t range) { + /* Checks that the specified userns range makes sense, i.e. contains at least one UID, and the end + * doesn't overflow uid_t. */ + + assert_cc((uid_t) -1 > 0); /* verify that uid_t is unsigned */ + + if (range <= 0) + return false; + + if (shift > (uid_t) -1 - range) + return false; + + return true; +} + +int userns_acquire(const char *uid_map, const char *gid_map); +int in_same_namespace(pid_t pid1, pid_t pid2, NamespaceType type); diff --git a/src/basic/nss-util.h b/src/basic/nss-util.h new file mode 100644 index 0000000..579e2c0 --- /dev/null +++ b/src/basic/nss-util.h @@ -0,0 +1,273 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +#include +#include +#include +#include +#include + +#define NSS_SIGNALS_BLOCK SIGALRM,SIGVTALRM,SIGPIPE,SIGCHLD,SIGTSTP,SIGIO,SIGHUP,SIGUSR1,SIGUSR2,SIGPROF,SIGURG,SIGWINCH + +#ifndef DEPRECATED_RES_USE_INET6 +# define DEPRECATED_RES_USE_INET6 0x00002000 +#endif + +#define NSS_GETHOSTBYNAME_PROTOTYPES(module) \ +enum nss_status _nss_##module##_gethostbyname4_r( \ + const char *name, \ + struct gaih_addrtuple **pat, \ + char *buffer, size_t buflen, \ + int *errnop, int *h_errnop, \ + int32_t *ttlp) _public_; \ +enum nss_status _nss_##module##_gethostbyname3_r( \ + const char *name, \ + int af, \ + struct hostent *host, \ + char *buffer, size_t buflen, \ + int *errnop, int *h_errnop, \ + int32_t *ttlp, \ + char **canonp) _public_; \ +enum nss_status _nss_##module##_gethostbyname2_r( \ + const char *name, \ + int af, \ + struct hostent *host, \ + char *buffer, size_t buflen, \ + int *errnop, int *h_errnop) _public_; \ +enum nss_status _nss_##module##_gethostbyname_r( \ + const char *name, \ + struct hostent *host, \ + char *buffer, size_t buflen, \ + int *errnop, int *h_errnop) _public_ + +#define NSS_GETHOSTBYADDR_PROTOTYPES(module) \ +enum nss_status _nss_##module##_gethostbyaddr2_r( \ + const void* addr, socklen_t len, \ + int af, \ + struct hostent *host, \ + char *buffer, size_t buflen, \ + int *errnop, int *h_errnop, \ + int32_t *ttlp) _public_; \ +enum nss_status _nss_##module##_gethostbyaddr_r( \ + const void* addr, socklen_t len, \ + int af, \ + struct hostent *host, \ + char *buffer, size_t buflen, \ + int *errnop, int *h_errnop) _public_ + +#define NSS_GETHOSTBYNAME_FALLBACKS(module) \ +enum nss_status _nss_##module##_gethostbyname2_r( \ + const char *name, \ + int af, \ + struct hostent *host, \ + char *buffer, size_t buflen, \ + int *errnop, int *h_errnop) { \ + return _nss_##module##_gethostbyname3_r( \ + name, \ + af, \ + host, \ + buffer, buflen, \ + errnop, h_errnop, \ + NULL, \ + NULL); \ +} \ +enum nss_status _nss_##module##_gethostbyname_r( \ + const char *name, \ + struct hostent *host, \ + char *buffer, size_t buflen, \ + int *errnop, int *h_errnop) { \ + enum nss_status ret = NSS_STATUS_NOTFOUND; \ + \ + if (_res.options & DEPRECATED_RES_USE_INET6) \ + ret = _nss_##module##_gethostbyname3_r( \ + name, \ + AF_INET6, \ + host, \ + buffer, buflen, \ + errnop, h_errnop, \ + NULL, \ + NULL); \ + if (ret == NSS_STATUS_NOTFOUND) \ + ret = _nss_##module##_gethostbyname3_r( \ + name, \ + AF_INET, \ + host, \ + buffer, buflen, \ + errnop, h_errnop, \ + NULL, \ + NULL); \ + return ret; \ +} + +#define NSS_GETHOSTBYADDR_FALLBACKS(module) \ +enum nss_status _nss_##module##_gethostbyaddr_r( \ + const void* addr, socklen_t len, \ + int af, \ + struct hostent *host, \ + char *buffer, size_t buflen, \ + int *errnop, int *h_errnop) { \ + return _nss_##module##_gethostbyaddr2_r( \ + addr, len, \ + af, \ + host, \ + buffer, buflen, \ + errnop, h_errnop, \ + NULL); \ +} + +#define NSS_GETPW_PROTOTYPES(module) \ +enum nss_status _nss_##module##_getpwnam_r( \ + const char *name, \ + struct passwd *pwd, \ + char *buffer, size_t buflen, \ + int *errnop) _public_; \ +enum nss_status _nss_##module##_getpwuid_r( \ + uid_t uid, \ + struct passwd *pwd, \ + char *buffer, size_t buflen, \ + int *errnop) _public_ + +#define NSS_GETSP_PROTOTYPES(module) \ +enum nss_status _nss_##module##_getspnam_r( \ + const char *name, \ + struct spwd *spwd, \ + char *buffer, size_t buflen, \ + int *errnop) _public_ + +#define NSS_GETSG_PROTOTYPES(module) \ +enum nss_status _nss_##module##_getsgnam_r( \ + const char *name, \ + struct sgrp *sgrp, \ + char *buffer, size_t buflen, \ + int *errnop) _public_ + +#define NSS_GETGR_PROTOTYPES(module) \ +enum nss_status _nss_##module##_getgrnam_r( \ + const char *name, \ + struct group *gr, \ + char *buffer, size_t buflen, \ + int *errnop) _public_; \ +enum nss_status _nss_##module##_getgrgid_r( \ + gid_t gid, \ + struct group *gr, \ + char *buffer, size_t buflen, \ + int *errnop) _public_ + +#define NSS_PWENT_PROTOTYPES(module) \ +enum nss_status _nss_##module##_endpwent( \ + void) _public_; \ +enum nss_status _nss_##module##_setpwent( \ + int stayopen) _public_; \ +enum nss_status _nss_##module##_getpwent_r( \ + struct passwd *result, \ + char *buffer, \ + size_t buflen, \ + int *errnop) _public_; + +#define NSS_SPENT_PROTOTYPES(module) \ +enum nss_status _nss_##module##_endspent( \ + void) _public_; \ +enum nss_status _nss_##module##_setspent( \ + int stayopen) _public_; \ +enum nss_status _nss_##module##_getspent_r( \ + struct spwd *spwd, \ + char *buffer, \ + size_t buflen, \ + int *errnop) _public_; + +#define NSS_GRENT_PROTOTYPES(module) \ +enum nss_status _nss_##module##_endgrent( \ + void) _public_; \ +enum nss_status _nss_##module##_setgrent( \ + int stayopen) _public_; \ +enum nss_status _nss_##module##_getgrent_r( \ + struct group *result, \ + char *buffer, \ + size_t buflen, \ + int *errnop) _public_; + +#define NSS_SGENT_PROTOTYPES(module) \ +enum nss_status _nss_##module##_endsgent( \ + void) _public_; \ +enum nss_status _nss_##module##_setsgent( \ + int stayopen) _public_; \ +enum nss_status _nss_##module##_getsgent_r( \ + struct sgrp *sgrp, \ + char *buffer, \ + size_t buflen, \ + int *errnop) _public_; + +#define NSS_INITGROUPS_PROTOTYPE(module) \ +enum nss_status _nss_##module##_initgroups_dyn( \ + const char *user, \ + gid_t group, \ + long int *start, \ + long int *size, \ + gid_t **groupsp, \ + long int limit, \ + int *errnop) _public_; + +typedef enum nss_status (*_nss_gethostbyname4_r_t)( + const char *name, + struct gaih_addrtuple **pat, + char *buffer, size_t buflen, + int *errnop, int *h_errnop, + int32_t *ttlp); + +typedef enum nss_status (*_nss_gethostbyname3_r_t)( + const char *name, + int af, + struct hostent *result, + char *buffer, size_t buflen, + int *errnop, int *h_errnop, + int32_t *ttlp, + char **canonp); + +typedef enum nss_status (*_nss_gethostbyname2_r_t)( + const char *name, + int af, + struct hostent *result, + char *buffer, size_t buflen, + int *errnop, int *h_errnop); + +typedef enum nss_status (*_nss_gethostbyname_r_t)( + const char *name, + struct hostent *result, + char *buffer, size_t buflen, + int *errnop, int *h_errnop); + +typedef enum nss_status (*_nss_gethostbyaddr2_r_t)( + const void* addr, socklen_t len, + int af, + struct hostent *result, + char *buffer, size_t buflen, + int *errnop, int *h_errnop, + int32_t *ttlp); +typedef enum nss_status (*_nss_gethostbyaddr_r_t)( + const void* addr, socklen_t len, + int af, + struct hostent *host, + char *buffer, size_t buflen, + int *errnop, int *h_errnop); + +typedef enum nss_status (*_nss_getpwnam_r_t)( + const char *name, + struct passwd *pwd, + char *buffer, size_t buflen, + int *errnop); +typedef enum nss_status (*_nss_getpwuid_r_t)( + uid_t uid, + struct passwd *pwd, + char *buffer, size_t buflen, + int *errnop); + +typedef enum nss_status (*_nss_getgrnam_r_t)( + const char *name, + struct group *gr, + char *buffer, size_t buflen, + int *errnop); +typedef enum nss_status (*_nss_getgrgid_r_t)( + gid_t gid, + struct group *gr, + char *buffer, size_t buflen, + int *errnop); diff --git a/src/basic/nulstr-util.c b/src/basic/nulstr-util.c new file mode 100644 index 0000000..dbafc8c --- /dev/null +++ b/src/basic/nulstr-util.c @@ -0,0 +1,17 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include "nulstr-util.h" +#include "string-util.h" + +const char* nulstr_get(const char *nulstr, const char *needle) { + const char *i; + + if (!nulstr) + return NULL; + + NULSTR_FOREACH(i, nulstr) + if (streq(i, needle)) + return i; + + return NULL; +} diff --git a/src/basic/nulstr-util.h b/src/basic/nulstr-util.h new file mode 100644 index 0000000..1d1fbc1 --- /dev/null +++ b/src/basic/nulstr-util.h @@ -0,0 +1,17 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +#include +#include + +#define NULSTR_FOREACH(i, l) \ + for ((i) = (l); (i) && *(i); (i) = strchr((i), 0)+1) + +#define NULSTR_FOREACH_PAIR(i, j, l) \ + for ((i) = (l), (j) = strchr((i), 0)+1; (i) && *(i); (i) = strchr((j), 0)+1, (j) = *(i) ? strchr((i), 0)+1 : (i)) + +const char* nulstr_get(const char *nulstr, const char *needle); + +static inline bool nulstr_contains(const char *nulstr, const char *needle) { + return nulstr_get(nulstr, needle); +} diff --git a/src/basic/ordered-set.c b/src/basic/ordered-set.c new file mode 100644 index 0000000..b4c2588 --- /dev/null +++ b/src/basic/ordered-set.c @@ -0,0 +1,103 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include "fileio.h" +#include "ordered-set.h" +#include "strv.h" + +int _ordered_set_ensure_allocated(OrderedSet **s, const struct hash_ops *ops HASHMAP_DEBUG_PARAMS) { + if (*s) + return 0; + + *s = _ordered_set_new(ops HASHMAP_DEBUG_PASS_ARGS); + if (!*s) + return -ENOMEM; + + return 0; +} + +int _ordered_set_ensure_put(OrderedSet **s, const struct hash_ops *ops, void *p HASHMAP_DEBUG_PARAMS) { + int r; + + r = _ordered_set_ensure_allocated(s, ops HASHMAP_DEBUG_PASS_ARGS); + if (r < 0) + return r; + + return ordered_set_put(*s, p); +} + +int ordered_set_consume(OrderedSet *s, void *p) { + int r; + + r = ordered_set_put(s, p); + if (r <= 0) + free(p); + + return r; +} + +int _ordered_set_put_strdup(OrderedSet **s, const char *p HASHMAP_DEBUG_PARAMS) { + char *c; + int r; + + assert(s); + assert(p); + + r = _ordered_set_ensure_allocated(s, &string_hash_ops_free HASHMAP_DEBUG_PASS_ARGS); + if (r < 0) + return r; + + if (ordered_set_contains(*s, p)) + return 0; + + c = strdup(p); + if (!c) + return -ENOMEM; + + return ordered_set_consume(*s, c); +} + +int _ordered_set_put_strdupv(OrderedSet **s, char **l HASHMAP_DEBUG_PARAMS) { + int n = 0, r; + + STRV_FOREACH(i, l) { + r = _ordered_set_put_strdup(s, *i HASHMAP_DEBUG_PASS_ARGS); + if (r < 0) + return r; + + n += r; + } + + return n; +} + +int ordered_set_put_string_set(OrderedSet **s, OrderedSet *l) { + int n = 0, r; + char *p; + + /* Like ordered_set_put_strv, but for an OrderedSet of strings */ + + ORDERED_SET_FOREACH(p, l) { + r = ordered_set_put_strdup(s, p); + if (r < 0) + return r; + + n += r; + } + + return n; +} + +void ordered_set_print(FILE *f, const char *field, OrderedSet *s) { + bool space = false; + char *p; + + if (ordered_set_isempty(s)) + return; + + fputs(field, f); + + ORDERED_SET_FOREACH(p, s) + fputs_with_space(f, p, NULL, &space); + + fputc('\n', f); +} diff --git a/src/basic/ordered-set.h b/src/basic/ordered-set.h new file mode 100644 index 0000000..e73da20 --- /dev/null +++ b/src/basic/ordered-set.h @@ -0,0 +1,109 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +#include + +#include "hashmap.h" + +typedef struct OrderedSet OrderedSet; + +static inline OrderedSet* _ordered_set_new(const struct hash_ops *ops HASHMAP_DEBUG_PARAMS) { + return (OrderedSet*) _ordered_hashmap_new(ops HASHMAP_DEBUG_PASS_ARGS); +} +#define ordered_set_new(ops) _ordered_set_new(ops HASHMAP_DEBUG_SRC_ARGS) + +int _ordered_set_ensure_allocated(OrderedSet **s, const struct hash_ops *ops HASHMAP_DEBUG_PARAMS); +#define ordered_set_ensure_allocated(s, ops) _ordered_set_ensure_allocated(s, ops HASHMAP_DEBUG_SRC_ARGS) + +int _ordered_set_ensure_put(OrderedSet **s, const struct hash_ops *ops, void *p HASHMAP_DEBUG_PARAMS); +#define ordered_set_ensure_put(s, hash_ops, key) _ordered_set_ensure_put(s, hash_ops, key HASHMAP_DEBUG_SRC_ARGS) + +static inline void ordered_set_clear(OrderedSet *s) { + return ordered_hashmap_clear((OrderedHashmap*) s); +} + +static inline void ordered_set_clear_free(OrderedSet *s) { + return ordered_hashmap_clear_free((OrderedHashmap*) s); +} + +static inline OrderedSet* ordered_set_free(OrderedSet *s) { + return (OrderedSet*) ordered_hashmap_free((OrderedHashmap*) s); +} + +static inline OrderedSet* ordered_set_free_free(OrderedSet *s) { + return (OrderedSet*) ordered_hashmap_free_free((OrderedHashmap*) s); +} + +static inline int ordered_set_contains(OrderedSet *s, const void *p) { + return ordered_hashmap_contains((OrderedHashmap*) s, p); +} + +static inline int ordered_set_put(OrderedSet *s, void *p) { + return ordered_hashmap_put((OrderedHashmap*) s, p, p); +} + +static inline void *ordered_set_get(OrderedSet *s, const void *p) { + return ordered_hashmap_get((OrderedHashmap*) s, p); +} + +static inline unsigned ordered_set_size(OrderedSet *s) { + return ordered_hashmap_size((OrderedHashmap*) s); +} + +static inline bool ordered_set_isempty(OrderedSet *s) { + return ordered_hashmap_isempty((OrderedHashmap*) s); +} + +static inline bool ordered_set_iterate(OrderedSet *s, Iterator *i, void **value) { + return ordered_hashmap_iterate((OrderedHashmap*) s, i, value, NULL); +} + +static inline void* ordered_set_remove(OrderedSet *s, void *p) { + return ordered_hashmap_remove((OrderedHashmap*) s, p); +} + +static inline void* ordered_set_first(OrderedSet *s) { + return ordered_hashmap_first((OrderedHashmap*) s); +} + +static inline void* ordered_set_steal_first(OrderedSet *s) { + return ordered_hashmap_steal_first((OrderedHashmap*) s); +} + +static inline char** ordered_set_get_strv(OrderedSet *s) { + return _hashmap_get_strv(HASHMAP_BASE((OrderedHashmap*) s)); +} + +static inline int ordered_set_reserve(OrderedSet *s, unsigned entries_add) { + return ordered_hashmap_reserve((OrderedHashmap*) s, entries_add); +} + +int ordered_set_consume(OrderedSet *s, void *p); +int _ordered_set_put_strdup(OrderedSet **s, const char *p HASHMAP_DEBUG_PARAMS); +#define ordered_set_put_strdup(s, p) _ordered_set_put_strdup(s, p HASHMAP_DEBUG_SRC_ARGS) +int _ordered_set_put_strdupv(OrderedSet **s, char **l HASHMAP_DEBUG_PARAMS); +#define ordered_set_put_strdupv(s, l) _ordered_set_put_strdupv(s, l HASHMAP_DEBUG_SRC_ARGS) +int ordered_set_put_string_set(OrderedSet **s, OrderedSet *l); +void ordered_set_print(FILE *f, const char *field, OrderedSet *s); + +#define _ORDERED_SET_FOREACH(e, s, i) \ + for (Iterator i = ITERATOR_FIRST; ordered_set_iterate((s), &i, (void**)&(e)); ) +#define ORDERED_SET_FOREACH(e, s) \ + _ORDERED_SET_FOREACH(e, s, UNIQ_T(i, UNIQ)) + +#define ordered_set_clear_with_destructor(s, f) \ + ({ \ + OrderedSet *_s = (s); \ + void *_item; \ + while ((_item = ordered_set_steal_first(_s))) \ + f(_item); \ + _s; \ + }) +#define ordered_set_free_with_destructor(s, f) \ + ordered_set_free(ordered_set_clear_with_destructor(s, f)) + +DEFINE_TRIVIAL_CLEANUP_FUNC(OrderedSet*, ordered_set_free); +DEFINE_TRIVIAL_CLEANUP_FUNC(OrderedSet*, ordered_set_free_free); + +#define _cleanup_ordered_set_free_ _cleanup_(ordered_set_freep) +#define _cleanup_ordered_set_free_free_ _cleanup_(ordered_set_free_freep) diff --git a/src/basic/os-util.c b/src/basic/os-util.c new file mode 100644 index 0000000..8f8bb08 --- /dev/null +++ b/src/basic/os-util.c @@ -0,0 +1,372 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include "alloc-util.h" +#include "chase-symlinks.h" +#include "dirent-util.h" +#include "env-file.h" +#include "env-util.h" +#include "fd-util.h" +#include "fileio.h" +#include "fs-util.h" +#include "glyph-util.h" +#include "macro.h" +#include "os-util.h" +#include "parse-util.h" +#include "path-util.h" +#include "stat-util.h" +#include "string-util.h" +#include "strv.h" +#include "utf8.h" +#include "xattr-util.h" + +bool image_name_is_valid(const char *s) { + if (!filename_is_valid(s)) + return false; + + if (string_has_cc(s, NULL)) + return false; + + if (!utf8_is_valid(s)) + return false; + + /* Temporary files for atomically creating new files */ + if (startswith(s, ".#")) + return false; + + return true; +} + +int path_is_extension_tree(const char *path, const char *extension, bool relax_extension_release_check) { + int r; + + assert(path); + + /* Does the path exist at all? If not, generate an error immediately. This is useful so that a missing root dir + * always results in -ENOENT, and we can properly distinguish the case where the whole root doesn't exist from + * the case where just the os-release file is missing. */ + if (laccess(path, F_OK) < 0) + return -errno; + + /* We use /usr/lib/extension-release.d/extension-release[.NAME] as flag for something being a system extension, + * and {/etc|/usr/lib}/os-release as a flag for something being an OS (when not an extension). */ + r = open_extension_release(path, extension, relax_extension_release_check, NULL, NULL); + if (r == -ENOENT) /* We got nothing */ + return 0; + if (r < 0) + return r; + + return 1; +} + +static int extension_release_strict_xattr_value(int extension_release_fd, const char *extension_release_dir_path, const char *filename) { + int r; + + assert(extension_release_fd >= 0); + assert(extension_release_dir_path); + assert(filename); + + /* No xattr or cannot parse it? Then skip this. */ + _cleanup_free_ char *extension_release_xattr = NULL; + r = fgetxattr_malloc(extension_release_fd, "user.extension-release.strict", &extension_release_xattr); + if (r < 0) { + if (!ERRNO_IS_XATTR_ABSENT(r)) + return log_debug_errno(r, + "%s/%s: Failed to read 'user.extension-release.strict' extended attribute from file, ignoring: %m", + extension_release_dir_path, filename); + + return log_debug_errno(r, "%s/%s does not have user.extension-release.strict xattr, ignoring.", extension_release_dir_path, filename); + } + + /* Explicitly set to request strict matching? Skip it. */ + r = parse_boolean(extension_release_xattr); + if (r < 0) + return log_debug_errno(r, + "%s/%s: Failed to parse 'user.extension-release.strict' extended attribute from file, ignoring: %m", + extension_release_dir_path, filename); + if (r > 0) { + log_debug("%s/%s: 'user.extension-release.strict' attribute is true, ignoring file.", + extension_release_dir_path, filename); + return true; + } + + log_debug("%s/%s: 'user.extension-release.strict' attribute is false%s", + extension_release_dir_path, filename, + special_glyph(SPECIAL_GLYPH_ELLIPSIS)); + + return false; +} + +int open_extension_release(const char *root, const char *extension, bool relax_extension_release_check, char **ret_path, int *ret_fd) { + _cleanup_free_ char *q = NULL; + int r, fd; + + if (extension) { + const char *extension_full_path; + + if (!image_name_is_valid(extension)) + return log_debug_errno(SYNTHETIC_ERRNO(EINVAL), + "The extension name %s is invalid.", extension); + + extension_full_path = strjoina("/usr/lib/extension-release.d/extension-release.", extension); + r = chase_symlinks(extension_full_path, root, CHASE_PREFIX_ROOT, + ret_path ? &q : NULL, + ret_fd ? &fd : NULL); + log_full_errno_zerook(LOG_DEBUG, MIN(r, 0), "Checking for %s: %m", extension_full_path); + + /* Cannot find the expected extension-release file? The image filename might have been + * mangled on deployment, so fallback to checking for any file in the extension-release.d + * directory, and return the first one with a user.extension-release xattr instead. + * The user.extension-release.strict xattr is checked to ensure the author of the image + * considers it OK if names do not match. */ + if (r == -ENOENT) { + _cleanup_free_ char *extension_release_dir_path = NULL; + _cleanup_closedir_ DIR *extension_release_dir = NULL; + + r = chase_symlinks_and_opendir("/usr/lib/extension-release.d/", root, CHASE_PREFIX_ROOT, + &extension_release_dir_path, &extension_release_dir); + if (r < 0) + return log_debug_errno(r, "Cannot open %s/usr/lib/extension-release.d/, ignoring: %m", root); + + r = -ENOENT; + FOREACH_DIRENT(de, extension_release_dir, return -errno) { + int k; + + if (!IN_SET(de->d_type, DT_REG, DT_UNKNOWN)) + continue; + + const char *image_name = startswith(de->d_name, "extension-release."); + if (!image_name) + continue; + + if (!image_name_is_valid(image_name)) { + log_debug("%s/%s is not a valid extension-release file name, ignoring.", + extension_release_dir_path, de->d_name); + continue; + } + + /* We already chased the directory, and checked that + * this is a real file, so we shouldn't fail to open it. */ + _cleanup_close_ int extension_release_fd = openat(dirfd(extension_release_dir), + de->d_name, + O_PATH|O_CLOEXEC|O_NOFOLLOW); + if (extension_release_fd < 0) + return log_debug_errno(errno, + "Failed to open extension-release file %s/%s: %m", + extension_release_dir_path, + de->d_name); + + /* Really ensure it is a regular file after we open it. */ + if (fd_verify_regular(extension_release_fd) < 0) { + log_debug("%s/%s is not a regular file, ignoring.", extension_release_dir_path, de->d_name); + continue; + } + + if (!relax_extension_release_check) { + k = extension_release_strict_xattr_value(extension_release_fd, + extension_release_dir_path, + de->d_name); + if (k != 0) + continue; + } + + /* We already found what we were looking for, but there's another candidate? + * We treat this as an error, as we want to enforce that there are no ambiguities + * in case we are in the fallback path.*/ + if (r == 0) { + r = -ENOTUNIQ; + break; + } + + r = 0; /* Found it! */ + + if (ret_fd) + fd = TAKE_FD(extension_release_fd); + + if (ret_path) { + q = path_join(extension_release_dir_path, de->d_name); + if (!q) + return -ENOMEM; + } + } + } + } else { + const char *var = secure_getenv("SYSTEMD_OS_RELEASE"); + if (var) + r = chase_symlinks(var, root, 0, + ret_path ? &q : NULL, + ret_fd ? &fd : NULL); + else + FOREACH_STRING(path, "/etc/os-release", "/usr/lib/os-release") { + r = chase_symlinks(path, root, CHASE_PREFIX_ROOT, + ret_path ? &q : NULL, + ret_fd ? &fd : NULL); + if (r != -ENOENT) + break; + } + } + if (r < 0) + return r; + + if (ret_fd) { + int real_fd; + + /* Convert the O_PATH fd into a proper, readable one */ + real_fd = fd_reopen(fd, O_RDONLY|O_CLOEXEC|O_NOCTTY); + safe_close(fd); + if (real_fd < 0) + return real_fd; + + *ret_fd = real_fd; + } + + if (ret_path) + *ret_path = TAKE_PTR(q); + + return 0; +} + +int fopen_extension_release(const char *root, const char *extension, bool relax_extension_release_check, char **ret_path, FILE **ret_file) { + _cleanup_free_ char *p = NULL; + _cleanup_close_ int fd = -1; + FILE *f; + int r; + + if (!ret_file) + return open_extension_release(root, extension, relax_extension_release_check, ret_path, NULL); + + r = open_extension_release(root, extension, relax_extension_release_check, ret_path ? &p : NULL, &fd); + if (r < 0) + return r; + + f = take_fdopen(&fd, "r"); + if (!f) + return -errno; + + if (ret_path) + *ret_path = TAKE_PTR(p); + *ret_file = f; + + return 0; +} + +static int parse_release_internal(const char *root, bool relax_extension_release_check, const char *extension, va_list ap) { + _cleanup_fclose_ FILE *f = NULL; + _cleanup_free_ char *p = NULL; + int r; + + r = fopen_extension_release(root, extension, relax_extension_release_check, &p, &f); + if (r < 0) + return r; + + return parse_env_filev(f, p, ap); +} + +int _parse_extension_release(const char *root, bool relax_extension_release_check, const char *extension, ...) { + va_list ap; + int r; + + va_start(ap, extension); + r = parse_release_internal(root, relax_extension_release_check, extension, ap); + va_end(ap); + + return r; +} + +int _parse_os_release(const char *root, ...) { + va_list ap; + int r; + + va_start(ap, root); + r = parse_release_internal(root, /* relax_extension_release_check= */ false, NULL, ap); + va_end(ap); + + return r; +} + +int load_os_release_pairs(const char *root, char ***ret) { + _cleanup_fclose_ FILE *f = NULL; + _cleanup_free_ char *p = NULL; + int r; + + r = fopen_os_release(root, &p, &f); + if (r < 0) + return r; + + return load_env_file_pairs(f, p, ret); +} + +int load_os_release_pairs_with_prefix(const char *root, const char *prefix, char ***ret) { + _cleanup_strv_free_ char **os_release_pairs = NULL, **os_release_pairs_prefixed = NULL; + int r; + + r = load_os_release_pairs(root, &os_release_pairs); + if (r < 0) + return r; + + STRV_FOREACH_PAIR(p, q, os_release_pairs) { + char *line; + + /* We strictly return only the four main ID fields and ignore the rest */ + if (!STR_IN_SET(*p, "ID", "VERSION_ID", "BUILD_ID", "VARIANT_ID")) + continue; + + ascii_strlower(*p); + line = strjoin(prefix, *p, "=", *q); + if (!line) + return -ENOMEM; + r = strv_consume(&os_release_pairs_prefixed, line); + if (r < 0) + return r; + } + + *ret = TAKE_PTR(os_release_pairs_prefixed); + + return 0; +} + +int load_extension_release_pairs(const char *root, const char *extension, bool relax_extension_release_check, char ***ret) { + _cleanup_fclose_ FILE *f = NULL; + _cleanup_free_ char *p = NULL; + int r; + + r = fopen_extension_release(root, extension, relax_extension_release_check, &p, &f); + if (r < 0) + return r; + + return load_env_file_pairs(f, p, ret); +} + +int os_release_support_ended(const char *support_end, bool quiet) { + _cleanup_free_ char *_support_end_alloc = NULL; + int r; + + if (!support_end) { + /* If the caller has the variably handy, they can pass it in. If not, we'll read it + * ourselves. */ + + r = parse_os_release(NULL, + "SUPPORT_END", &_support_end_alloc); + if (r < 0) + return log_full_errno((r == -ENOENT || quiet) ? LOG_DEBUG : LOG_WARNING, r, + "Failed to read os-release file, ignoring: %m"); + if (!_support_end_alloc) + return false; /* no end date defined */ + + support_end = _support_end_alloc; + } + + struct tm tm = {}; + + const char *k = strptime(support_end, "%Y-%m-%d", &tm); + if (!k || *k) + return log_full_errno(quiet ? LOG_DEBUG : LOG_WARNING, SYNTHETIC_ERRNO(EINVAL), + "Failed to parse SUPPORT_END= in os-release file, ignoring: %m"); + + time_t eol = mktime(&tm); + if (eol == (time_t) -1) + return log_full_errno(quiet ? LOG_DEBUG : LOG_WARNING, SYNTHETIC_ERRNO(EINVAL), + "Failed to convert SUPPORT_END= in os-release file, ignoring: %m"); + + usec_t ts = now(CLOCK_REALTIME); + return DIV_ROUND_UP(ts, USEC_PER_SEC) > (usec_t) eol; +} diff --git a/src/basic/os-util.h b/src/basic/os-util.h new file mode 100644 index 0000000..d22f5ab --- /dev/null +++ b/src/basic/os-util.h @@ -0,0 +1,35 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +#include + +/* The *_extension_release flavours will look for /usr/lib/extension-release/extension-release.NAME + * in accordance with the OS extension specification, rather than for /usr/lib/ or /etc/os-release. */ + +bool image_name_is_valid(const char *s) _pure_; + +int path_is_extension_tree(const char *path, const char *extension, bool relax_extension_release_check); +static inline int path_is_os_tree(const char *path) { + return path_is_extension_tree(path, NULL, false); +} + +int open_extension_release(const char *root, const char *extension, bool relax_extension_release_check, char **ret_path, int *ret_fd); +static inline int open_os_release(const char *root, char **ret_path, int *ret_fd) { + return open_extension_release(root, NULL, false, ret_path, ret_fd); +} + +int fopen_extension_release(const char *root, const char *extension, bool relax_extension_release_check, char **ret_path, FILE **ret_file); +static inline int fopen_os_release(const char *root, char **ret_path, FILE **ret_file) { + return fopen_extension_release(root, NULL, false, ret_path, ret_file); +} + +int _parse_extension_release(const char *root, bool relax_extension_release_check, const char *extension, ...) _sentinel_; +int _parse_os_release(const char *root, ...) _sentinel_; +#define parse_extension_release(root, relax_extension_release_check, extension, ...) _parse_extension_release(root, relax_extension_release_check, extension, __VA_ARGS__, NULL) +#define parse_os_release(root, ...) _parse_os_release(root, __VA_ARGS__, NULL) + +int load_extension_release_pairs(const char *root, const char *extension, bool relax_extension_release_check, char ***ret); +int load_os_release_pairs(const char *root, char ***ret); +int load_os_release_pairs_with_prefix(const char *root, const char *prefix, char ***ret); + +int os_release_support_ended(const char *support_end, bool quiet); diff --git a/src/basic/parse-util.c b/src/basic/parse-util.c new file mode 100644 index 0000000..fbebf63 --- /dev/null +++ b/src/basic/parse-util.c @@ -0,0 +1,719 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include +#include +#include +#include +#include +#include + +#include "alloc-util.h" +#include "errno-list.h" +#include "extract-word.h" +#include "locale-util.h" +#include "macro.h" +#include "missing_network.h" +#include "parse-util.h" +#include "process-util.h" +#include "stat-util.h" +#include "string-util.h" +#include "strv.h" + +int parse_boolean(const char *v) { + if (!v) + return -EINVAL; + + if (STRCASE_IN_SET(v, + "1", + "yes", + "y", + "true", + "t", + "on")) + return 1; + + if (STRCASE_IN_SET(v, + "0", + "no", + "n", + "false", + "f", + "off")) + return 0; + + return -EINVAL; +} + +int parse_pid(const char *s, pid_t* ret_pid) { + unsigned long ul = 0; + pid_t pid; + int r; + + assert(s); + assert(ret_pid); + + r = safe_atolu(s, &ul); + if (r < 0) + return r; + + pid = (pid_t) ul; + + if ((unsigned long) pid != ul) + return -ERANGE; + + if (!pid_is_valid(pid)) + return -ERANGE; + + *ret_pid = pid; + return 0; +} + +int parse_mode(const char *s, mode_t *ret) { + unsigned m; + int r; + + assert(s); + + r = safe_atou_full(s, 8 | + SAFE_ATO_REFUSE_PLUS_MINUS, /* Leading '+' or even '-' char? that's just weird, + * refuse. User might have wanted to add mode flags or + * so, but this parser doesn't allow that, so let's + * better be safe. */ + &m); + if (r < 0) + return r; + if (m > 07777) + return -ERANGE; + + if (ret) + *ret = m; + return 0; +} + +int parse_ifindex(const char *s) { + int ifi, r; + + assert(s); + + r = safe_atoi(s, &ifi); + if (r < 0) + return r; + if (ifi <= 0) + return -EINVAL; + + return ifi; +} + +int parse_mtu(int family, const char *s, uint32_t *ret) { + uint64_t u, m; + int r; + + r = parse_size(s, 1024, &u); + if (r < 0) + return r; + + if (u > UINT32_MAX) + return -ERANGE; + + switch (family) { + case AF_INET: + m = IPV4_MIN_MTU; /* This is 68 */ + break; + case AF_INET6: + m = IPV6_MIN_MTU; /* This is 1280 */ + break; + default: + m = 0; + } + + if (u < m) + return -ERANGE; + + *ret = (uint32_t) u; + return 0; +} + +int parse_size(const char *t, uint64_t base, uint64_t *size) { + + /* Soo, sometimes we want to parse IEC binary suffixes, and + * sometimes SI decimal suffixes. This function can parse + * both. Which one is the right way depends on the + * context. Wikipedia suggests that SI is customary for + * hardware metrics and network speeds, while IEC is + * customary for most data sizes used by software and volatile + * (RAM) memory. Hence be careful which one you pick! + * + * In either case we use just K, M, G as suffix, and not Ki, + * Mi, Gi or so (as IEC would suggest). That's because that's + * frickin' ugly. But this means you really need to make sure + * to document which base you are parsing when you use this + * call. */ + + struct table { + const char *suffix; + unsigned long long factor; + }; + + static const struct table iec[] = { + { "E", 1024ULL*1024ULL*1024ULL*1024ULL*1024ULL*1024ULL }, + { "P", 1024ULL*1024ULL*1024ULL*1024ULL*1024ULL }, + { "T", 1024ULL*1024ULL*1024ULL*1024ULL }, + { "G", 1024ULL*1024ULL*1024ULL }, + { "M", 1024ULL*1024ULL }, + { "K", 1024ULL }, + { "B", 1ULL }, + { "", 1ULL }, + }; + + static const struct table si[] = { + { "E", 1000ULL*1000ULL*1000ULL*1000ULL*1000ULL*1000ULL }, + { "P", 1000ULL*1000ULL*1000ULL*1000ULL*1000ULL }, + { "T", 1000ULL*1000ULL*1000ULL*1000ULL }, + { "G", 1000ULL*1000ULL*1000ULL }, + { "M", 1000ULL*1000ULL }, + { "K", 1000ULL }, + { "B", 1ULL }, + { "", 1ULL }, + }; + + const struct table *table; + const char *p; + unsigned long long r = 0; + unsigned n_entries, start_pos = 0; + + assert(t); + assert(IN_SET(base, 1000, 1024)); + assert(size); + + if (base == 1000) { + table = si; + n_entries = ELEMENTSOF(si); + } else { + table = iec; + n_entries = ELEMENTSOF(iec); + } + + p = t; + do { + unsigned long long l, tmp; + double frac = 0; + char *e; + unsigned i; + + p += strspn(p, WHITESPACE); + + errno = 0; + l = strtoull(p, &e, 10); + if (errno > 0) + return -errno; + if (e == p) + return -EINVAL; + if (*p == '-') + return -ERANGE; + + if (*e == '.') { + e++; + + /* strtoull() itself would accept space/+/- */ + if (ascii_isdigit(*e)) { + unsigned long long l2; + char *e2; + + l2 = strtoull(e, &e2, 10); + if (errno > 0) + return -errno; + + /* Ignore failure. E.g. 10.M is valid */ + frac = l2; + for (; e < e2; e++) + frac /= 10; + } + } + + e += strspn(e, WHITESPACE); + + for (i = start_pos; i < n_entries; i++) + if (startswith(e, table[i].suffix)) + break; + + if (i >= n_entries) + return -EINVAL; + + if (l + (frac > 0) > ULLONG_MAX / table[i].factor) + return -ERANGE; + + tmp = l * table[i].factor + (unsigned long long) (frac * table[i].factor); + if (tmp > ULLONG_MAX - r) + return -ERANGE; + + r += tmp; + if ((unsigned long long) (uint64_t) r != r) + return -ERANGE; + + p = e + strlen(table[i].suffix); + + start_pos = i + 1; + + } while (*p); + + *size = r; + + return 0; +} + +int parse_range(const char *t, unsigned *lower, unsigned *upper) { + _cleanup_free_ char *word = NULL; + unsigned l, u; + int r; + + assert(lower); + assert(upper); + + /* Extract the lower bound. */ + r = extract_first_word(&t, &word, "-", EXTRACT_DONT_COALESCE_SEPARATORS); + if (r < 0) + return r; + if (r == 0) + return -EINVAL; + + r = safe_atou(word, &l); + if (r < 0) + return r; + + /* Check for the upper bound and extract it if needed */ + if (!t) + /* Single number with no dashes. */ + u = l; + else if (!*t) + /* Trailing dash is an error. */ + return -EINVAL; + else { + r = safe_atou(t, &u); + if (r < 0) + return r; + } + + *lower = l; + *upper = u; + return 0; +} + +int parse_errno(const char *t) { + int r, e; + + assert(t); + + r = errno_from_name(t); + if (r > 0) + return r; + + r = safe_atoi(t, &e); + if (r < 0) + return r; + + /* 0 is also allowed here */ + if (!errno_is_valid(e) && e != 0) + return -ERANGE; + + return e; +} + +static const char *mangle_base(const char *s, unsigned *base) { + const char *k; + + assert(s); + assert(base); + + /* Base already explicitly specified, then don't do anything. */ + if (SAFE_ATO_MASK_FLAGS(*base) != 0) + return s; + + /* Support Python 3 style "0b" and 0x" prefixes, because they truly make sense, much more than C's "0" prefix for octal. */ + k = STARTSWITH_SET(s, "0b", "0B"); + if (k) { + *base = 2 | (*base & SAFE_ATO_ALL_FLAGS); + return k; + } + + k = STARTSWITH_SET(s, "0o", "0O"); + if (k) { + *base = 8 | (*base & SAFE_ATO_ALL_FLAGS); + return k; + } + + return s; +} + +int safe_atou_full(const char *s, unsigned base, unsigned *ret_u) { + char *x = NULL; + unsigned long l; + + assert(s); + assert(SAFE_ATO_MASK_FLAGS(base) <= 16); + + /* strtoul() is happy to parse negative values, and silently converts them to unsigned values without + * generating an error. We want a clean error, hence let's look for the "-" prefix on our own, and + * generate an error. But let's do so only after strtoul() validated that the string is clean + * otherwise, so that we return EINVAL preferably over ERANGE. */ + + if (FLAGS_SET(base, SAFE_ATO_REFUSE_LEADING_WHITESPACE) && + strchr(WHITESPACE, s[0])) + return -EINVAL; + + s += strspn(s, WHITESPACE); + + if (FLAGS_SET(base, SAFE_ATO_REFUSE_PLUS_MINUS) && + IN_SET(s[0], '+', '-')) + return -EINVAL; /* Note that we check the "-" prefix again a second time below, but return a + * different error. I.e. if the SAFE_ATO_REFUSE_PLUS_MINUS flag is set we + * blanket refuse +/- prefixed integers, while if it is missing we'll just + * return ERANGE, because the string actually parses correctly, but doesn't + * fit in the return type. */ + + if (FLAGS_SET(base, SAFE_ATO_REFUSE_LEADING_ZERO) && + s[0] == '0' && !streq(s, "0")) + return -EINVAL; /* This is particularly useful to avoid ambiguities between C's octal + * notation and assumed-to-be-decimal integers with a leading zero. */ + + s = mangle_base(s, &base); + + errno = 0; + l = strtoul(s, &x, SAFE_ATO_MASK_FLAGS(base) /* Let's mask off the flags bits so that only the actual + * base is left */); + if (errno > 0) + return -errno; + if (!x || x == s || *x != 0) + return -EINVAL; + if (l != 0 && s[0] == '-') + return -ERANGE; + if ((unsigned long) (unsigned) l != l) + return -ERANGE; + + if (ret_u) + *ret_u = (unsigned) l; + + return 0; +} + +int safe_atoi(const char *s, int *ret_i) { + unsigned base = 0; + char *x = NULL; + long l; + + assert(s); + + s += strspn(s, WHITESPACE); + s = mangle_base(s, &base); + + errno = 0; + l = strtol(s, &x, base); + if (errno > 0) + return -errno; + if (!x || x == s || *x != 0) + return -EINVAL; + if ((long) (int) l != l) + return -ERANGE; + + if (ret_i) + *ret_i = (int) l; + + return 0; +} + +int safe_atollu_full(const char *s, unsigned base, unsigned long long *ret_llu) { + char *x = NULL; + unsigned long long l; + + assert(s); + assert(SAFE_ATO_MASK_FLAGS(base) <= 16); + + if (FLAGS_SET(base, SAFE_ATO_REFUSE_LEADING_WHITESPACE) && + strchr(WHITESPACE, s[0])) + return -EINVAL; + + s += strspn(s, WHITESPACE); + + if (FLAGS_SET(base, SAFE_ATO_REFUSE_PLUS_MINUS) && + IN_SET(s[0], '+', '-')) + return -EINVAL; + + if (FLAGS_SET(base, SAFE_ATO_REFUSE_LEADING_ZERO) && + s[0] == '0' && s[1] != 0) + return -EINVAL; + + s = mangle_base(s, &base); + + errno = 0; + l = strtoull(s, &x, SAFE_ATO_MASK_FLAGS(base)); + if (errno > 0) + return -errno; + if (!x || x == s || *x != 0) + return -EINVAL; + if (l != 0 && s[0] == '-') + return -ERANGE; + + if (ret_llu) + *ret_llu = l; + + return 0; +} + +int safe_atolli(const char *s, long long int *ret_lli) { + unsigned base = 0; + char *x = NULL; + long long l; + + assert(s); + + s += strspn(s, WHITESPACE); + s = mangle_base(s, &base); + + errno = 0; + l = strtoll(s, &x, base); + if (errno > 0) + return -errno; + if (!x || x == s || *x != 0) + return -EINVAL; + + if (ret_lli) + *ret_lli = l; + + return 0; +} + +int safe_atou8_full(const char *s, unsigned base, uint8_t *ret) { + unsigned u; + int r; + + r = safe_atou_full(s, base, &u); + if (r < 0) + return r; + if (u > UINT8_MAX) + return -ERANGE; + + *ret = (uint8_t) u; + return 0; +} + +int safe_atou16_full(const char *s, unsigned base, uint16_t *ret) { + unsigned u; + int r; + + r = safe_atou_full(s, base, &u); + if (r < 0) + return r; + if (u > UINT16_MAX) + return -ERANGE; + + *ret = (uint16_t) u; + return 0; +} + +int safe_atoi16(const char *s, int16_t *ret) { + unsigned base = 0; + char *x = NULL; + long l; + + assert(s); + + s += strspn(s, WHITESPACE); + s = mangle_base(s, &base); + + errno = 0; + l = strtol(s, &x, base); + if (errno > 0) + return -errno; + if (!x || x == s || *x != 0) + return -EINVAL; + if ((long) (int16_t) l != l) + return -ERANGE; + + if (ret) + *ret = (int16_t) l; + + return 0; +} + +int safe_atod(const char *s, double *ret_d) { + _cleanup_(freelocalep) locale_t loc = (locale_t) 0; + char *x = NULL; + double d = 0; + + assert(s); + + loc = newlocale(LC_NUMERIC_MASK, "C", (locale_t) 0); + if (loc == (locale_t) 0) + return -errno; + + errno = 0; + d = strtod_l(s, &x, loc); + if (errno > 0) + return -errno; + if (!x || x == s || *x != 0) + return -EINVAL; + + if (ret_d) + *ret_d = (double) d; + + return 0; +} + +int parse_fractional_part_u(const char **p, size_t digits, unsigned *res) { + unsigned val = 0; + const char *s; + + s = *p; + + /* accept any number of digits, strtoull is limited to 19 */ + for (size_t i = 0; i < digits; i++,s++) { + if (!ascii_isdigit(*s)) { + if (i == 0) + return -EINVAL; + + /* too few digits, pad with 0 */ + for (; i < digits; i++) + val *= 10; + + break; + } + + val *= 10; + val += *s - '0'; + } + + /* maybe round up */ + if (*s >= '5' && *s <= '9') + val++; + + s += strspn(s, DIGITS); + + *p = s; + *res = val; + + return 0; +} + +int parse_nice(const char *p, int *ret) { + int n, r; + + r = safe_atoi(p, &n); + if (r < 0) + return r; + + if (!nice_is_valid(n)) + return -ERANGE; + + *ret = n; + return 0; +} + +int parse_ip_port(const char *s, uint16_t *ret) { + uint16_t l; + int r; + + r = safe_atou16_full(s, SAFE_ATO_REFUSE_LEADING_WHITESPACE, &l); + if (r < 0) + return r; + + if (l == 0) + return -EINVAL; + + *ret = (uint16_t) l; + + return 0; +} + +int parse_ip_port_range(const char *s, uint16_t *low, uint16_t *high) { + unsigned l, h; + int r; + + r = parse_range(s, &l, &h); + if (r < 0) + return r; + + if (l <= 0 || l > 65535 || h <= 0 || h > 65535) + return -EINVAL; + + if (h < l) + return -EINVAL; + + *low = l; + *high = h; + + return 0; +} + +int parse_ip_prefix_length(const char *s, int *ret) { + unsigned l; + int r; + + r = safe_atou(s, &l); + if (r < 0) + return r; + + if (l > 128) + return -ERANGE; + + *ret = (int) l; + + return 0; +} + +int parse_oom_score_adjust(const char *s, int *ret) { + int r, v; + + assert(s); + assert(ret); + + r = safe_atoi(s, &v); + if (r < 0) + return r; + + if (!oom_score_adjust_is_valid(v)) + return -ERANGE; + + *ret = v; + return 0; +} + +int store_loadavg_fixed_point(unsigned long i, unsigned long f, loadavg_t *ret) { + assert(ret); + + if (i >= (~0UL << LOADAVG_PRECISION_BITS)) + return -ERANGE; + + i = i << LOADAVG_PRECISION_BITS; + f = DIV_ROUND_UP((f << LOADAVG_PRECISION_BITS), 100); + + if (f >= LOADAVG_FIXED_POINT_1_0) + return -ERANGE; + + *ret = i | f; + return 0; +} + +int parse_loadavg_fixed_point(const char *s, loadavg_t *ret) { + const char *d, *f_str, *i_str; + unsigned long i, f; + int r; + + assert(s); + assert(ret); + + d = strchr(s, '.'); + if (!d) + return -EINVAL; + + i_str = strndupa_safe(s, d - s); + f_str = d + 1; + + r = safe_atolu_full(i_str, 10, &i); + if (r < 0) + return r; + + r = safe_atolu_full(f_str, 10, &f); + if (r < 0) + return r; + + return store_loadavg_fixed_point(i, f, ret); +} diff --git a/src/basic/parse-util.h b/src/basic/parse-util.h new file mode 100644 index 0000000..8d8d523 --- /dev/null +++ b/src/basic/parse-util.h @@ -0,0 +1,152 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +#include +#include +#include +#include +#include + +#include "macro.h" + +typedef unsigned long loadavg_t; + +int parse_boolean(const char *v) _pure_; +int parse_pid(const char *s, pid_t* ret_pid); +int parse_mode(const char *s, mode_t *ret); +int parse_ifindex(const char *s); +int parse_mtu(int family, const char *s, uint32_t *ret); + +int parse_size(const char *t, uint64_t base, uint64_t *size); +int parse_range(const char *t, unsigned *lower, unsigned *upper); +int parse_errno(const char *t); + +#define SAFE_ATO_REFUSE_PLUS_MINUS (1U << 30) +#define SAFE_ATO_REFUSE_LEADING_ZERO (1U << 29) +#define SAFE_ATO_REFUSE_LEADING_WHITESPACE (1U << 28) +#define SAFE_ATO_ALL_FLAGS (SAFE_ATO_REFUSE_PLUS_MINUS|SAFE_ATO_REFUSE_LEADING_ZERO|SAFE_ATO_REFUSE_LEADING_WHITESPACE) +#define SAFE_ATO_MASK_FLAGS(base) ((base) & ~SAFE_ATO_ALL_FLAGS) + +int safe_atou_full(const char *s, unsigned base, unsigned *ret_u); + +static inline int safe_atou(const char *s, unsigned *ret_u) { + return safe_atou_full(s, 0, ret_u); +} + +int safe_atoi(const char *s, int *ret_i); +int safe_atolli(const char *s, long long int *ret_i); + +int safe_atou8_full(const char *s, unsigned base, uint8_t *ret); + +static inline int safe_atou8(const char *s, uint8_t *ret) { + return safe_atou8_full(s, 0, ret); +} + +int safe_atou16_full(const char *s, unsigned base, uint16_t *ret); + +static inline int safe_atou16(const char *s, uint16_t *ret) { + return safe_atou16_full(s, 0, ret); +} + +static inline int safe_atoux16(const char *s, uint16_t *ret) { + return safe_atou16_full(s, 16, ret); +} + +int safe_atoi16(const char *s, int16_t *ret); + +static inline int safe_atou32_full(const char *s, unsigned base, uint32_t *ret_u) { + assert_cc(sizeof(uint32_t) == sizeof(unsigned)); + return safe_atou_full(s, base, (unsigned*) ret_u); +} + +static inline int safe_atou32(const char *s, uint32_t *ret_u) { + return safe_atou32_full(s, 0, (unsigned*) ret_u); +} + +static inline int safe_atoi32(const char *s, int32_t *ret_i) { + assert_cc(sizeof(int32_t) == sizeof(int)); + return safe_atoi(s, (int*) ret_i); +} + +int safe_atollu_full(const char *s, unsigned base, unsigned long long *ret_llu); + +static inline int safe_atollu(const char *s, unsigned long long *ret_llu) { + return safe_atollu_full(s, 0, ret_llu); +} + +static inline int safe_atou64(const char *s, uint64_t *ret_u) { + assert_cc(sizeof(uint64_t) == sizeof(unsigned long long)); + return safe_atollu(s, (unsigned long long*) ret_u); +} + +static inline int safe_atoi64(const char *s, int64_t *ret_i) { + assert_cc(sizeof(int64_t) == sizeof(long long int)); + return safe_atolli(s, (long long int*) ret_i); +} + +static inline int safe_atoux64(const char *s, uint64_t *ret) { + assert_cc(sizeof(int64_t) == sizeof(unsigned long long)); + return safe_atollu_full(s, 16, (unsigned long long*) ret); +} + +#if LONG_MAX == INT_MAX +static inline int safe_atolu_full(const char *s, unsigned base, unsigned long *ret_u) { + assert_cc(sizeof(unsigned long) == sizeof(unsigned)); + return safe_atou_full(s, base, (unsigned*) ret_u); +} +static inline int safe_atoli(const char *s, long int *ret_u) { + assert_cc(sizeof(long int) == sizeof(int)); + return safe_atoi(s, (int*) ret_u); +} +#else +static inline int safe_atolu_full(const char *s, unsigned base, unsigned long *ret_u) { + assert_cc(sizeof(unsigned long) == sizeof(unsigned long long)); + return safe_atollu_full(s, base, (unsigned long long*) ret_u); +} +static inline int safe_atoli(const char *s, long int *ret_u) { + assert_cc(sizeof(long int) == sizeof(long long int)); + return safe_atolli(s, (long long int*) ret_u); +} +#endif + +static inline int safe_atolu(const char *s, unsigned long *ret_u) { + return safe_atolu_full(s, 0, ret_u); +} + +#if SIZE_MAX == UINT_MAX +static inline int safe_atozu(const char *s, size_t *ret_u) { + assert_cc(sizeof(size_t) == sizeof(unsigned)); + return safe_atou(s, (unsigned *) ret_u); +} +#else +static inline int safe_atozu(const char *s, size_t *ret_u) { + assert_cc(sizeof(size_t) == sizeof(unsigned long)); + return safe_atolu(s, ret_u); +} +#endif + +int safe_atod(const char *s, double *ret_d); + +int parse_fractional_part_u(const char **s, size_t digits, unsigned *res); + +int parse_nice(const char *p, int *ret); + +int parse_ip_port(const char *s, uint16_t *ret); +int parse_ip_port_range(const char *s, uint16_t *low, uint16_t *high); + +int parse_ip_prefix_length(const char *s, int *ret); + +int parse_oom_score_adjust(const char *s, int *ret); + +/* Implement floating point using fixed integers, to improve performance when + * calculating load averages. These macros can be used to extract the integer + * and decimal parts of a value. */ +#define LOADAVG_PRECISION_BITS 11 +#define LOADAVG_FIXED_POINT_1_0 (1 << LOADAVG_PRECISION_BITS) +#define LOADAVG_INT_SIDE(x) ((x) >> LOADAVG_PRECISION_BITS) +#define LOADAVG_DECIMAL_SIDE(x) LOADAVG_INT_SIDE(((x) & (LOADAVG_FIXED_POINT_1_0 - 1)) * 100) + +/* Given a Linux load average (e.g. decimal number 34.89 where 34 is passed as i and 89 is passed as f), convert it + * to a loadavg_t. */ +int store_loadavg_fixed_point(unsigned long i, unsigned long f, loadavg_t *ret); +int parse_loadavg_fixed_point(const char *s, loadavg_t *ret); diff --git a/src/basic/path-lookup.c b/src/basic/path-lookup.c new file mode 100644 index 0000000..36f3862 --- /dev/null +++ b/src/basic/path-lookup.c @@ -0,0 +1,907 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include +#include +#include + +#include "alloc-util.h" +#include "fs-util.h" +#include "log.h" +#include "macro.h" +#include "nulstr-util.h" +#include "path-lookup.h" +#include "path-util.h" +#include "stat-util.h" +#include "string-util.h" +#include "strv.h" +#include "tmpfile-util.h" +#include "user-util.h" + +int xdg_user_runtime_dir(char **ret, const char *suffix) { + const char *e; + char *j; + + assert(ret); + assert(suffix); + + e = getenv("XDG_RUNTIME_DIR"); + if (!e) + return -ENXIO; + + j = path_join(e, suffix); + if (!j) + return -ENOMEM; + + *ret = j; + return 0; +} + +int xdg_user_config_dir(char **ret, const char *suffix) { + _cleanup_free_ char *j = NULL; + const char *e; + int r; + + assert(ret); + + e = getenv("XDG_CONFIG_HOME"); + if (e) { + j = path_join(e, suffix); + if (!j) + return -ENOMEM; + } else { + r = get_home_dir(&j); + if (r < 0) + return r; + + if (!path_extend(&j, "/.config", suffix)) + return -ENOMEM; + } + + *ret = TAKE_PTR(j); + return 0; +} + +int xdg_user_data_dir(char **ret, const char *suffix) { + _cleanup_free_ char *j = NULL; + const char *e; + int r; + + assert(ret); + assert(suffix); + + /* We don't treat /etc/xdg/systemd here as the spec + * suggests because we assume that is a link to + * /etc/systemd/ anyway. */ + + e = getenv("XDG_DATA_HOME"); + if (e) { + j = path_join(e, suffix); + if (!j) + return -ENOMEM; + } else { + r = get_home_dir(&j); + if (r < 0) + return r; + + if (!path_extend(&j, "/.local/share", suffix)) + return -ENOMEM; + } + + *ret = TAKE_PTR(j); + return 1; +} + +static const char* const user_data_unit_paths[] = { + "/usr/local/lib/systemd/user", + "/usr/local/share/systemd/user", + USER_DATA_UNIT_DIR, + "/usr/lib/systemd/user", + "/usr/share/systemd/user", + NULL +}; + +static const char* const user_config_unit_paths[] = { + USER_CONFIG_UNIT_DIR, + "/etc/systemd/user", + NULL +}; + +int xdg_user_dirs(char ***ret_config_dirs, char ***ret_data_dirs) { + /* Implement the mechanisms defined in + * + * https://standards.freedesktop.org/basedir-spec/basedir-spec-0.6.html + * + * We look in both the config and the data dirs because we + * want to encourage that distributors ship their unit files + * as data, and allow overriding as configuration. + */ + const char *e; + _cleanup_strv_free_ char **config_dirs = NULL, **data_dirs = NULL; + + e = getenv("XDG_CONFIG_DIRS"); + if (e) + config_dirs = strv_split(e, ":"); + else + config_dirs = strv_new("/etc/xdg"); + if (!config_dirs) + return -ENOMEM; + + e = getenv("XDG_DATA_DIRS"); + if (e) + data_dirs = strv_split(e, ":"); + else + data_dirs = strv_new("/usr/local/share", + "/usr/share"); + if (!data_dirs) + return -ENOMEM; + + *ret_config_dirs = TAKE_PTR(config_dirs); + *ret_data_dirs = TAKE_PTR(data_dirs); + + return 0; +} + +static char** user_dirs( + const char *persistent_config, + const char *runtime_config, + const char *global_persistent_config, + const char *global_runtime_config, + const char *generator, + const char *generator_early, + const char *generator_late, + const char *transient, + const char *persistent_control, + const char *runtime_control) { + + _cleanup_strv_free_ char **config_dirs = NULL, **data_dirs = NULL; + _cleanup_free_ char *data_home = NULL; + _cleanup_strv_free_ char **res = NULL; + int r; + + r = xdg_user_dirs(&config_dirs, &data_dirs); + if (r < 0) + return NULL; + + r = xdg_user_data_dir(&data_home, "/systemd/user"); + if (r < 0 && r != -ENXIO) + return NULL; + + /* Now merge everything we found. */ + if (strv_extend(&res, persistent_control) < 0) + return NULL; + + if (strv_extend(&res, runtime_control) < 0) + return NULL; + + if (strv_extend(&res, transient) < 0) + return NULL; + + if (strv_extend(&res, generator_early) < 0) + return NULL; + + if (strv_extend(&res, persistent_config) < 0) + return NULL; + + if (strv_extend_strv_concat(&res, config_dirs, "/systemd/user") < 0) + return NULL; + + /* global config has lower priority than the user config of the same type */ + if (strv_extend(&res, global_persistent_config) < 0) + return NULL; + + if (strv_extend_strv(&res, (char**) user_config_unit_paths, false) < 0) + return NULL; + + if (strv_extend(&res, runtime_config) < 0) + return NULL; + + if (strv_extend(&res, global_runtime_config) < 0) + return NULL; + + if (strv_extend(&res, generator) < 0) + return NULL; + + if (strv_extend(&res, data_home) < 0) + return NULL; + + if (strv_extend_strv_concat(&res, data_dirs, "/systemd/user") < 0) + return NULL; + + if (strv_extend_strv(&res, (char**) user_data_unit_paths, false) < 0) + return NULL; + + if (strv_extend(&res, generator_late) < 0) + return NULL; + + if (path_strv_make_absolute_cwd(res) < 0) + return NULL; + + return TAKE_PTR(res); +} + +bool path_is_user_data_dir(const char *path) { + assert(path); + + return strv_contains((char**) user_data_unit_paths, path); +} + +bool path_is_user_config_dir(const char *path) { + assert(path); + + return strv_contains((char**) user_config_unit_paths, path); +} + +static int acquire_generator_dirs( + LookupScope scope, + const char *tempdir, + char **generator, + char **generator_early, + char **generator_late) { + + _cleanup_free_ char *x = NULL, *y = NULL, *z = NULL, *p = NULL; + const char *prefix; + + assert(generator); + assert(generator_early); + assert(generator_late); + assert(IN_SET(scope, LOOKUP_SCOPE_SYSTEM, LOOKUP_SCOPE_USER, LOOKUP_SCOPE_GLOBAL)); + + if (scope == LOOKUP_SCOPE_GLOBAL) + return -EOPNOTSUPP; + + if (tempdir) + prefix = tempdir; + else if (scope == LOOKUP_SCOPE_SYSTEM) + prefix = "/run/systemd"; + else { + /* LOOKUP_SCOPE_USER */ + const char *e; + + e = getenv("XDG_RUNTIME_DIR"); + if (!e) + return -ENXIO; + + p = path_join(e, "/systemd"); + if (!p) + return -ENOMEM; + + prefix = p; + } + + x = path_join(prefix, "generator"); + if (!x) + return -ENOMEM; + + y = path_join(prefix, "generator.early"); + if (!y) + return -ENOMEM; + + z = path_join(prefix, "generator.late"); + if (!z) + return -ENOMEM; + + *generator = TAKE_PTR(x); + *generator_early = TAKE_PTR(y); + *generator_late = TAKE_PTR(z); + + return 0; +} + +static int acquire_transient_dir( + LookupScope scope, + const char *tempdir, + char **ret) { + + char *transient; + + assert(ret); + assert(IN_SET(scope, LOOKUP_SCOPE_SYSTEM, LOOKUP_SCOPE_USER, LOOKUP_SCOPE_GLOBAL)); + + if (scope == LOOKUP_SCOPE_GLOBAL) + return -EOPNOTSUPP; + + if (tempdir) + transient = path_join(tempdir, "transient"); + else if (scope == LOOKUP_SCOPE_SYSTEM) + transient = strdup("/run/systemd/transient"); + else + return xdg_user_runtime_dir(ret, "/systemd/transient"); + + if (!transient) + return -ENOMEM; + *ret = transient; + return 0; +} + +static int acquire_config_dirs(LookupScope scope, char **persistent, char **runtime) { + _cleanup_free_ char *a = NULL, *b = NULL; + int r; + + assert(persistent); + assert(runtime); + + switch (scope) { + + case LOOKUP_SCOPE_SYSTEM: + a = strdup(SYSTEM_CONFIG_UNIT_DIR); + b = strdup("/run/systemd/system"); + break; + + case LOOKUP_SCOPE_GLOBAL: + a = strdup(USER_CONFIG_UNIT_DIR); + b = strdup("/run/systemd/user"); + break; + + case LOOKUP_SCOPE_USER: + r = xdg_user_config_dir(&a, "/systemd/user"); + if (r < 0 && r != -ENXIO) + return r; + + r = xdg_user_runtime_dir(runtime, "/systemd/user"); + if (r < 0) { + if (r != -ENXIO) + return r; + + /* If XDG_RUNTIME_DIR is not set, don't consider that fatal, simply initialize the runtime + * directory to NULL */ + *runtime = NULL; + } + + *persistent = TAKE_PTR(a); + + return 0; + + default: + assert_not_reached(); + } + + if (!a || !b) + return -ENOMEM; + + *persistent = TAKE_PTR(a); + *runtime = TAKE_PTR(b); + + return 0; +} + +static int acquire_control_dirs(LookupScope scope, char **persistent, char **runtime) { + _cleanup_free_ char *a = NULL; + int r; + + assert(persistent); + assert(runtime); + + switch (scope) { + + case LOOKUP_SCOPE_SYSTEM: { + _cleanup_free_ char *b = NULL; + + a = strdup("/etc/systemd/system.control"); + if (!a) + return -ENOMEM; + + b = strdup("/run/systemd/system.control"); + if (!b) + return -ENOMEM; + + *runtime = TAKE_PTR(b); + + break; + } + + case LOOKUP_SCOPE_USER: + r = xdg_user_config_dir(&a, "/systemd/user.control"); + if (r < 0 && r != -ENXIO) + return r; + + r = xdg_user_runtime_dir(runtime, "/systemd/user.control"); + if (r < 0) { + if (r != -ENXIO) + return r; + + /* If XDG_RUNTIME_DIR is not set, don't consider this fatal, simply initialize the directory to + * NULL */ + *runtime = NULL; + } + + break; + + case LOOKUP_SCOPE_GLOBAL: + return -EOPNOTSUPP; + + default: + assert_not_reached(); + } + + *persistent = TAKE_PTR(a); + + return 0; +} + +static int acquire_attached_dirs( + LookupScope scope, + char **ret_persistent, + char **ret_runtime) { + + _cleanup_free_ char *a = NULL, *b = NULL; + + assert(ret_persistent); + assert(ret_runtime); + + /* Portable services are not available to regular users for now. */ + if (scope != LOOKUP_SCOPE_SYSTEM) + return -EOPNOTSUPP; + + a = strdup("/etc/systemd/system.attached"); + if (!a) + return -ENOMEM; + + b = strdup("/run/systemd/system.attached"); + if (!b) + return -ENOMEM; + + *ret_persistent = TAKE_PTR(a); + *ret_runtime = TAKE_PTR(b); + + return 0; +} + +static int patch_root_prefix(char **p, const char *root_dir) { + char *c; + + assert(p); + + if (!*p) + return 0; + + c = path_join(root_dir, *p); + if (!c) + return -ENOMEM; + + free_and_replace(*p, c); + return 0; +} + +static int patch_root_prefix_strv(char **l, const char *root_dir) { + int r; + + if (!root_dir) + return 0; + + STRV_FOREACH(i, l) { + r = patch_root_prefix(i, root_dir); + if (r < 0) + return r; + } + + return 0; +} + +static int get_paths_from_environ(const char *var, char ***paths, bool *append) { + const char *e; + int r; + + assert(var); + assert(paths); + assert(append); + + *append = false; + + e = getenv(var); + if (e) { + const char *k; + + k = endswith(e, ":"); + if (k) { + e = strndupa_safe(e, k - e); + *append = true; + } + + /* FIXME: empty components in other places should be rejected. */ + + r = path_split_and_make_absolute(e, paths); + if (r < 0) + return r; + } + + return 0; +} + +int lookup_paths_init( + LookupPaths *lp, + LookupScope scope, + LookupPathsFlags flags, + const char *root_dir) { + + _cleanup_(rmdir_and_freep) char *tempdir = NULL; + _cleanup_free_ char + *root = NULL, + *persistent_config = NULL, *runtime_config = NULL, + *global_persistent_config = NULL, *global_runtime_config = NULL, + *generator = NULL, *generator_early = NULL, *generator_late = NULL, + *transient = NULL, + *persistent_control = NULL, *runtime_control = NULL, + *persistent_attached = NULL, *runtime_attached = NULL; + bool append = false; /* Add items from SYSTEMD_UNIT_PATH before normal directories */ + _cleanup_strv_free_ char **paths = NULL; + int r; + + assert(lp); + assert(scope >= 0); + assert(scope < _LOOKUP_SCOPE_MAX); + +#if HAVE_SPLIT_USR + flags |= LOOKUP_PATHS_SPLIT_USR; +#endif + + if (!empty_or_root(root_dir)) { + if (scope == LOOKUP_SCOPE_USER) + return -EINVAL; + + r = is_dir(root_dir, true); + if (r < 0) + return r; + if (r == 0) + return -ENOTDIR; + + root = strdup(root_dir); + if (!root) + return -ENOMEM; + } + + if (flags & LOOKUP_PATHS_TEMPORARY_GENERATED) { + r = mkdtemp_malloc("/tmp/systemd-temporary-XXXXXX", &tempdir); + if (r < 0) + return log_debug_errno(r, "Failed to create temporary directory: %m"); + } + + /* Note: when XDG_RUNTIME_DIR is not set this will not return -ENXIO, but simply set runtime_config to NULL */ + r = acquire_config_dirs(scope, &persistent_config, &runtime_config); + if (r < 0) + return r; + + if (scope == LOOKUP_SCOPE_USER) { + r = acquire_config_dirs(LOOKUP_SCOPE_GLOBAL, &global_persistent_config, &global_runtime_config); + if (r < 0) + return r; + } + + if ((flags & LOOKUP_PATHS_EXCLUDE_GENERATED) == 0) { + /* Note: if XDG_RUNTIME_DIR is not set, this will fail completely with ENXIO */ + r = acquire_generator_dirs(scope, tempdir, + &generator, &generator_early, &generator_late); + if (r < 0 && !IN_SET(r, -EOPNOTSUPP, -ENXIO)) + return r; + } + + /* Note: if XDG_RUNTIME_DIR is not set, this will fail completely with ENXIO */ + r = acquire_transient_dir(scope, tempdir, &transient); + if (r < 0 && !IN_SET(r, -EOPNOTSUPP, -ENXIO)) + return r; + + /* Note: when XDG_RUNTIME_DIR is not set this will not return -ENXIO, but simply set runtime_control to NULL */ + r = acquire_control_dirs(scope, &persistent_control, &runtime_control); + if (r < 0 && r != -EOPNOTSUPP) + return r; + + r = acquire_attached_dirs(scope, &persistent_attached, &runtime_attached); + if (r < 0 && r != -EOPNOTSUPP) + return r; + + /* First priority is whatever has been passed to us via env vars */ + r = get_paths_from_environ("SYSTEMD_UNIT_PATH", &paths, &append); + if (r < 0) + return r; + + if (!paths || append) { + /* Let's figure something out. */ + + _cleanup_strv_free_ char **add = NULL; + + /* For the user units we include share/ in the search + * path in order to comply with the XDG basedir spec. + * For the system stuff we avoid such nonsense. OTOH + * we include /lib in the search path for the system + * stuff but avoid it for user stuff. */ + + switch (scope) { + + case LOOKUP_SCOPE_SYSTEM: + add = strv_new( + /* If you modify this you also want to modify + * systemdsystemunitpath= in systemd.pc.in! */ + STRV_IFNOTNULL(persistent_control), + STRV_IFNOTNULL(runtime_control), + STRV_IFNOTNULL(transient), + STRV_IFNOTNULL(generator_early), + persistent_config, + SYSTEM_CONFIG_UNIT_DIR, + "/etc/systemd/system", + STRV_IFNOTNULL(persistent_attached), + runtime_config, + "/run/systemd/system", + STRV_IFNOTNULL(runtime_attached), + STRV_IFNOTNULL(generator), + "/usr/local/lib/systemd/system", + SYSTEM_DATA_UNIT_DIR, + "/usr/lib/systemd/system", + STRV_IFNOTNULL(flags & LOOKUP_PATHS_SPLIT_USR ? "/lib/systemd/system" : NULL), + STRV_IFNOTNULL(generator_late)); + break; + + case LOOKUP_SCOPE_GLOBAL: + add = strv_new( + /* If you modify this you also want to modify + * systemduserunitpath= in systemd.pc.in, and + * the arrays in user_dirs() above! */ + STRV_IFNOTNULL(persistent_control), + STRV_IFNOTNULL(runtime_control), + STRV_IFNOTNULL(transient), + STRV_IFNOTNULL(generator_early), + persistent_config, + USER_CONFIG_UNIT_DIR, + "/etc/systemd/user", + runtime_config, + "/run/systemd/user", + STRV_IFNOTNULL(generator), + "/usr/local/share/systemd/user", + "/usr/share/systemd/user", + "/usr/local/lib/systemd/user", + USER_DATA_UNIT_DIR, + "/usr/lib/systemd/user", + STRV_IFNOTNULL(generator_late)); + break; + + case LOOKUP_SCOPE_USER: + add = user_dirs(persistent_config, runtime_config, + global_persistent_config, global_runtime_config, + generator, generator_early, generator_late, + transient, + persistent_control, runtime_control); + break; + + default: + assert_not_reached(); + } + + if (!add) + return -ENOMEM; + + if (paths) { + r = strv_extend_strv(&paths, add, true); + if (r < 0) + return r; + } else + /* Small optimization: if paths is NULL (and it usually is), we can simply assign 'add' to it, + * and don't have to copy anything */ + paths = TAKE_PTR(add); + } + + r = patch_root_prefix(&persistent_config, root); + if (r < 0) + return r; + r = patch_root_prefix(&runtime_config, root); + if (r < 0) + return r; + + r = patch_root_prefix(&generator, root); + if (r < 0) + return r; + r = patch_root_prefix(&generator_early, root); + if (r < 0) + return r; + r = patch_root_prefix(&generator_late, root); + if (r < 0) + return r; + + r = patch_root_prefix(&transient, root); + if (r < 0) + return r; + + r = patch_root_prefix(&persistent_control, root); + if (r < 0) + return r; + r = patch_root_prefix(&runtime_control, root); + if (r < 0) + return r; + + r = patch_root_prefix(&persistent_attached, root); + if (r < 0) + return r; + r = patch_root_prefix(&runtime_attached, root); + if (r < 0) + return r; + + r = patch_root_prefix_strv(paths, root); + if (r < 0) + return -ENOMEM; + + *lp = (LookupPaths) { + .search_path = strv_uniq(TAKE_PTR(paths)), + + .persistent_config = TAKE_PTR(persistent_config), + .runtime_config = TAKE_PTR(runtime_config), + + .generator = TAKE_PTR(generator), + .generator_early = TAKE_PTR(generator_early), + .generator_late = TAKE_PTR(generator_late), + + .transient = TAKE_PTR(transient), + + .persistent_control = TAKE_PTR(persistent_control), + .runtime_control = TAKE_PTR(runtime_control), + + .persistent_attached = TAKE_PTR(persistent_attached), + .runtime_attached = TAKE_PTR(runtime_attached), + + .root_dir = TAKE_PTR(root), + .temporary_dir = TAKE_PTR(tempdir), + }; + + return 0; +} + +int lookup_paths_init_or_warn(LookupPaths *lp, LookupScope scope, LookupPathsFlags flags, const char *root_dir) { + int r; + + r = lookup_paths_init(lp, scope, flags, root_dir); + if (r < 0) + return log_error_errno(r, "Failed to initialize unit search paths%s%s: %m", + isempty(root_dir) ? "" : " for root directory ", strempty(root_dir)); + return r; +} + +void lookup_paths_free(LookupPaths *lp) { + if (!lp) + return; + + lp->search_path = strv_free(lp->search_path); + + lp->persistent_config = mfree(lp->persistent_config); + lp->runtime_config = mfree(lp->runtime_config); + + lp->persistent_attached = mfree(lp->persistent_attached); + lp->runtime_attached = mfree(lp->runtime_attached); + + lp->generator = mfree(lp->generator); + lp->generator_early = mfree(lp->generator_early); + lp->generator_late = mfree(lp->generator_late); + + lp->transient = mfree(lp->transient); + + lp->persistent_control = mfree(lp->persistent_control); + lp->runtime_control = mfree(lp->runtime_control); + + lp->root_dir = mfree(lp->root_dir); + lp->temporary_dir = mfree(lp->temporary_dir); +} + +void lookup_paths_log(LookupPaths *lp) { + assert(lp); + + if (strv_isempty(lp->search_path)) { + log_debug("Ignoring unit files."); + lp->search_path = strv_free(lp->search_path); + } else { + _cleanup_free_ char *t = NULL; + + t = strv_join(lp->search_path, "\n\t"); + log_debug("Looking for unit files in (higher priority first):\n\t%s", strna(t)); + } +} + +char **generator_binary_paths(LookupScope scope) { + bool append = false; /* Add items from SYSTEMD_GENERATOR_PATH before normal directories */ + _cleanup_strv_free_ char **paths = NULL; + int r; + + /* First priority is whatever has been passed to us via env vars */ + r = get_paths_from_environ("SYSTEMD_GENERATOR_PATH", &paths, &append); + if (r < 0) + return NULL; + + if (!paths || append) { + _cleanup_strv_free_ char **add = NULL; + + switch (scope) { + + case LOOKUP_SCOPE_SYSTEM: + add = strv_new("/run/systemd/system-generators", + "/etc/systemd/system-generators", + "/usr/local/lib/systemd/system-generators", + SYSTEM_GENERATOR_DIR); + break; + + case LOOKUP_SCOPE_GLOBAL: + case LOOKUP_SCOPE_USER: + add = strv_new("/run/systemd/user-generators", + "/etc/systemd/user-generators", + "/usr/local/lib/systemd/user-generators", + USER_GENERATOR_DIR); + break; + + default: + assert_not_reached(); + } + + if (!add) + return NULL; + + if (paths) { + r = strv_extend_strv(&paths, add, true); + if (r < 0) + return NULL; + } else + /* Small optimization: if paths is NULL (and it usually is), we can simply assign 'add' to it, + * and don't have to copy anything */ + paths = TAKE_PTR(add); + } + + return TAKE_PTR(paths); +} + +char **env_generator_binary_paths(bool is_system) { + bool append = false; /* Add items from SYSTEMD_ENVIRONMENT_GENERATOR_PATH before normal directories */ + _cleanup_strv_free_ char **paths = NULL; + _cleanup_strv_free_ char **add = NULL; + int r; + + /* First priority is whatever has been passed to us via env vars */ + r = get_paths_from_environ("SYSTEMD_ENVIRONMENT_GENERATOR_PATH", &paths, &append); + if (r < 0) + return NULL; + + if (!paths || append) { + if (is_system) + add = strv_new("/run/systemd/system-environment-generators", + "/etc/systemd/system-environment-generators", + "/usr/local/lib/systemd/system-environment-generators", + SYSTEM_ENV_GENERATOR_DIR); + else + add = strv_new("/run/systemd/user-environment-generators", + "/etc/systemd/user-environment-generators", + "/usr/local/lib/systemd/user-environment-generators", + USER_ENV_GENERATOR_DIR); + + if (!add) + return NULL; + } + + if (paths) { + r = strv_extend_strv(&paths, add, true); + if (r < 0) + return NULL; + } else + /* Small optimization: if paths is NULL (and it usually is), we can simply assign 'add' to it, + * and don't have to copy anything */ + paths = TAKE_PTR(add); + + return TAKE_PTR(paths); +} + +int find_portable_profile(const char *name, const char *unit, char **ret_path) { + const char *p, *dot; + + assert(name); + assert(ret_path); + + assert_se(dot = strrchr(unit, '.')); + + NULSTR_FOREACH(p, PORTABLE_PROFILE_DIRS) { + _cleanup_free_ char *joined = NULL; + + joined = strjoin(p, "/", name, "/", dot + 1, ".conf"); + if (!joined) + return -ENOMEM; + + if (laccess(joined, F_OK) >= 0) { + *ret_path = TAKE_PTR(joined); + return 0; + } + + if (errno != ENOENT) + return -errno; + } + + return -ENOENT; +} diff --git a/src/basic/path-lookup.h b/src/basic/path-lookup.h new file mode 100644 index 0000000..aed72de --- /dev/null +++ b/src/basic/path-lookup.h @@ -0,0 +1,83 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +#include + +#include "def.h" +#include "macro.h" + +typedef enum LookupPathsFlags { + LOOKUP_PATHS_EXCLUDE_GENERATED = 1 << 0, + LOOKUP_PATHS_TEMPORARY_GENERATED = 1 << 1, + LOOKUP_PATHS_SPLIT_USR = 1 << 2, +} LookupPathsFlags; + +typedef enum LookupScope { + LOOKUP_SCOPE_SYSTEM, + LOOKUP_SCOPE_GLOBAL, + LOOKUP_SCOPE_USER, + _LOOKUP_SCOPE_MAX, + _LOOKUP_SCOPE_INVALID = -EINVAL, +} LookupScope; + +typedef struct LookupPaths { + /* Where we look for unit files. This includes the individual special paths below, but also any vendor + * supplied, static unit file paths. */ + char **search_path; + + /* Where we shall create or remove our installation symlinks, aka "configuration", and where the user/admin + * shall place their own unit files. */ + char *persistent_config; + char *runtime_config; + + /* Where units from a portable service image shall be placed. */ + char *persistent_attached; + char *runtime_attached; + + /* Where to place generated unit files (i.e. those a "generator" tool generated). Note the special semantics of + * this directory: the generators are flushed each time a "systemctl daemon-reload" is issued. The user should + * not alter these directories directly. */ + char *generator; + char *generator_early; + char *generator_late; + + /* Where to place transient unit files (i.e. those created dynamically via the bus API). Note the special + * semantics of this directory: all units created transiently have their unit files removed as the transient + * unit is unloaded. The user should not alter this directory directly. */ + char *transient; + + /* Where the snippets created by "systemctl set-property" are placed. Note that for transient units, the + * snippets are placed in the transient directory though (see above). The user should not alter this directory + * directly. */ + char *persistent_control; + char *runtime_control; + + /* The root directory prepended to all items above, or NULL */ + char *root_dir; + + /* A temporary directory when running in test mode, to be nuked */ + char *temporary_dir; +} LookupPaths; + +int lookup_paths_init(LookupPaths *lp, LookupScope scope, LookupPathsFlags flags, const char *root_dir); +int lookup_paths_init_or_warn(LookupPaths *lp, LookupScope scope, LookupPathsFlags flags, const char *root_dir); + +int xdg_user_dirs(char ***ret_config_dirs, char ***ret_data_dirs); +int xdg_user_runtime_dir(char **ret, const char *suffix); +int xdg_user_config_dir(char **ret, const char *suffix); +int xdg_user_data_dir(char **ret, const char *suffix); + +bool path_is_user_data_dir(const char *path); +bool path_is_user_config_dir(const char *path); + +void lookup_paths_log(LookupPaths *p); +void lookup_paths_free(LookupPaths *p); + +char **generator_binary_paths(LookupScope scope); +char **env_generator_binary_paths(bool is_system); + +#define NETWORK_DIRS ((const char* const*) CONF_PATHS_STRV("systemd/network")) +#define NETWORK_DIRS_NULSTR CONF_PATHS_NULSTR("systemd/network") + +#define PORTABLE_PROFILE_DIRS CONF_PATHS_NULSTR("systemd/portable/profile") +int find_portable_profile(const char *name, const char *unit, char **ret_path); diff --git a/src/basic/path-util.c b/src/basic/path-util.c new file mode 100644 index 0000000..72c0d6b --- /dev/null +++ b/src/basic/path-util.c @@ -0,0 +1,1397 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include +#include +#include +#include +#include +#include + +#include "alloc-util.h" +#include "chase-symlinks.h" +#include "extract-word.h" +#include "fd-util.h" +#include "fs-util.h" +#include "glob-util.h" +#include "log.h" +#include "macro.h" +#include "path-util.h" +#include "stat-util.h" +#include "string-util.h" +#include "strv.h" +#include "time-util.h" + +int path_split_and_make_absolute(const char *p, char ***ret) { + char **l; + int r; + + assert(p); + assert(ret); + + l = strv_split(p, ":"); + if (!l) + return -ENOMEM; + + r = path_strv_make_absolute_cwd(l); + if (r < 0) { + strv_free(l); + return r; + } + + *ret = l; + return r; +} + +char* path_make_absolute(const char *p, const char *prefix) { + assert(p); + + /* Makes every item in the list an absolute path by prepending + * the prefix, if specified and necessary */ + + if (path_is_absolute(p) || isempty(prefix)) + return strdup(p); + + return path_join(prefix, p); +} + +int safe_getcwd(char **ret) { + _cleanup_free_ char *cwd = NULL; + + cwd = get_current_dir_name(); + if (!cwd) + return negative_errno(); + + /* Let's make sure the directory is really absolute, to protect us from the logic behind + * CVE-2018-1000001 */ + if (cwd[0] != '/') + return -ENOMEDIUM; + + if (ret) + *ret = TAKE_PTR(cwd); + + return 0; +} + +int path_make_absolute_cwd(const char *p, char **ret) { + char *c; + int r; + + assert(p); + assert(ret); + + /* Similar to path_make_absolute(), but prefixes with the + * current working directory. */ + + if (path_is_absolute(p)) + c = strdup(p); + else { + _cleanup_free_ char *cwd = NULL; + + r = safe_getcwd(&cwd); + if (r < 0) + return r; + + c = path_join(cwd, p); + } + if (!c) + return -ENOMEM; + + *ret = c; + return 0; +} + +int path_make_relative(const char *from, const char *to, char **ret) { + _cleanup_free_ char *result = NULL; + unsigned n_parents; + const char *f, *t; + int r, k; + char *p; + + assert(from); + assert(to); + assert(ret); + + /* Strips the common part, and adds ".." elements as necessary. */ + + if (!path_is_absolute(from) || !path_is_absolute(to)) + return -EINVAL; + + for (;;) { + r = path_find_first_component(&from, true, &f); + if (r < 0) + return r; + + k = path_find_first_component(&to, true, &t); + if (k < 0) + return k; + + if (r == 0) { + /* end of 'from' */ + if (k == 0) { + /* from and to are equivalent. */ + result = strdup("."); + if (!result) + return -ENOMEM; + } else { + /* 'to' is inside of 'from'. */ + result = strdup(t); + if (!result) + return -ENOMEM; + + path_simplify(result); + + if (!path_is_valid(result)) + return -EINVAL; + } + + *ret = TAKE_PTR(result); + return 0; + } + + if (r != k || !strneq(f, t, r)) + break; + } + + /* If we're here, then "from_dir" has one or more elements that need to + * be replaced with "..". */ + + for (n_parents = 1;; n_parents++) { + /* If this includes ".." we can't do a simple series of "..". */ + r = path_find_first_component(&from, false, &f); + if (r < 0) + return r; + if (r == 0) + break; + } + + if (isempty(t) && n_parents * 3 > PATH_MAX) + /* PATH_MAX is counted *with* the trailing NUL byte */ + return -EINVAL; + + result = new(char, n_parents * 3 + !isempty(t) + strlen_ptr(t)); + if (!result) + return -ENOMEM; + + for (p = result; n_parents > 0; n_parents--) + p = mempcpy(p, "../", 3); + + if (isempty(t)) { + /* Remove trailing slash and terminate string. */ + *(--p) = '\0'; + *ret = TAKE_PTR(result); + return 0; + } + + strcpy(p, t); + + path_simplify(result); + + if (!path_is_valid(result)) + return -EINVAL; + + *ret = TAKE_PTR(result); + return 0; +} + +int path_make_relative_parent(const char *from_child, const char *to, char **ret) { + _cleanup_free_ char *from = NULL; + int r; + + assert(from_child); + assert(to); + assert(ret); + + /* Similar to path_make_relative(), but provides the relative path from the parent directory of + * 'from_child'. This may be useful when creating relative symlink. + * + * E.g. + * - from = "/path/to/aaa", to = "/path/to/bbb" + * path_make_relative(from, to) = "../bbb" + * path_make_relative_parent(from, to) = "bbb" + * + * - from = "/path/to/aaa/bbb", to = "/path/to/ccc/ddd" + * path_make_relative(from, to) = "../../ccc/ddd" + * path_make_relative_parent(from, to) = "../ccc/ddd" + */ + + r = path_extract_directory(from_child, &from); + if (r < 0) + return r; + + return path_make_relative(from, to, ret); +} + +char* path_startswith_strv(const char *p, char **set) { + STRV_FOREACH(s, set) { + char *t; + + t = path_startswith(p, *s); + if (t) + return t; + } + + return NULL; +} + +int path_strv_make_absolute_cwd(char **l) { + int r; + + /* Goes through every item in the string list and makes it + * absolute. This works in place and won't rollback any + * changes on failure. */ + + STRV_FOREACH(s, l) { + char *t; + + r = path_make_absolute_cwd(*s, &t); + if (r < 0) + return r; + + path_simplify(t); + free_and_replace(*s, t); + } + + return 0; +} + +char** path_strv_resolve(char **l, const char *root) { + unsigned k = 0; + bool enomem = false; + int r; + + if (strv_isempty(l)) + return l; + + /* Goes through every item in the string list and canonicalize + * the path. This works in place and won't rollback any + * changes on failure. */ + + STRV_FOREACH(s, l) { + _cleanup_free_ char *orig = NULL; + char *t, *u; + + if (!path_is_absolute(*s)) { + free(*s); + continue; + } + + if (root) { + orig = *s; + t = path_join(root, orig); + if (!t) { + enomem = true; + continue; + } + } else + t = *s; + + r = chase_symlinks(t, root, 0, &u, NULL); + if (r == -ENOENT) { + if (root) { + u = TAKE_PTR(orig); + free(t); + } else + u = t; + } else if (r < 0) { + free(t); + + if (r == -ENOMEM) + enomem = true; + + continue; + } else if (root) { + char *x; + + free(t); + x = path_startswith(u, root); + if (x) { + /* restore the slash if it was lost */ + if (!startswith(x, "/")) + *(--x) = '/'; + + t = strdup(x); + free(u); + if (!t) { + enomem = true; + continue; + } + u = t; + } else { + /* canonicalized path goes outside of + * prefix, keep the original path instead */ + free_and_replace(u, orig); + } + } else + free(t); + + l[k++] = u; + } + + l[k] = NULL; + + if (enomem) + return NULL; + + return l; +} + +char** path_strv_resolve_uniq(char **l, const char *root) { + + if (strv_isempty(l)) + return l; + + if (!path_strv_resolve(l, root)) + return NULL; + + return strv_uniq(l); +} + +char* path_simplify_full(char *path, PathSimplifyFlags flags) { + bool add_slash = false, keep_trailing_slash; + char *f = ASSERT_PTR(path); + int r; + + /* Removes redundant inner and trailing slashes. Also removes unnecessary dots. + * Modifies the passed string in-place. + * + * ///foo//./bar/. becomes /foo/bar + * .//./foo//./bar/. becomes foo/bar + */ + + if (isempty(path)) + return path; + + keep_trailing_slash = FLAGS_SET(flags, PATH_SIMPLIFY_KEEP_TRAILING_SLASH) && endswith(path, "/"); + + if (path_is_absolute(path)) + f++; + + for (const char *p = f;;) { + const char *e; + + r = path_find_first_component(&p, true, &e); + if (r == 0) + break; + + if (add_slash) + *f++ = '/'; + + if (r < 0) { + /* if path is invalid, then refuse to simplify remaining part. */ + memmove(f, p, strlen(p) + 1); + return path; + } + + memmove(f, e, r); + f += r; + + add_slash = true; + } + + /* Special rule, if we stripped everything, we need a "." for the current directory. */ + if (f == path) + *f++ = '.'; + + if (*(f-1) != '/' && keep_trailing_slash) + *f++ = '/'; + + *f = '\0'; + return path; +} + +char* path_startswith_full(const char *path, const char *prefix, bool accept_dot_dot) { + assert(path); + assert(prefix); + + /* Returns a pointer to the start of the first component after the parts matched by + * the prefix, iff + * - both paths are absolute or both paths are relative, + * and + * - each component in prefix in turn matches a component in path at the same position. + * An empty string will be returned when the prefix and path are equivalent. + * + * Returns NULL otherwise. + */ + + if ((path[0] == '/') != (prefix[0] == '/')) + return NULL; + + for (;;) { + const char *p, *q; + int r, k; + + r = path_find_first_component(&path, accept_dot_dot, &p); + if (r < 0) + return NULL; + + k = path_find_first_component(&prefix, accept_dot_dot, &q); + if (k < 0) + return NULL; + + if (k == 0) + return (char*) (p ?: path); + + if (r != k) + return NULL; + + if (!strneq(p, q, r)) + return NULL; + } +} + +int path_compare(const char *a, const char *b) { + int r; + + /* Order NULL before non-NULL */ + r = CMP(!!a, !!b); + if (r != 0) + return r; + + /* A relative path and an absolute path must not compare as equal. + * Which one is sorted before the other does not really matter. + * Here a relative path is ordered before an absolute path. */ + r = CMP(path_is_absolute(a), path_is_absolute(b)); + if (r != 0) + return r; + + for (;;) { + const char *aa, *bb; + int j, k; + + j = path_find_first_component(&a, true, &aa); + k = path_find_first_component(&b, true, &bb); + + if (j < 0 || k < 0) { + /* When one of paths is invalid, order invalid path after valid one. */ + r = CMP(j < 0, k < 0); + if (r != 0) + return r; + + /* fallback to use strcmp() if both paths are invalid. */ + return strcmp(a, b); + } + + /* Order prefixes first: "/foo" before "/foo/bar" */ + if (j == 0) { + if (k == 0) + return 0; + return -1; + } + if (k == 0) + return 1; + + /* Alphabetical sort: "/foo/aaa" before "/foo/b" */ + r = memcmp(aa, bb, MIN(j, k)); + if (r != 0) + return r; + + /* Sort "/foo/a" before "/foo/aaa" */ + r = CMP(j, k); + if (r != 0) + return r; + } +} + +bool path_equal_or_files_same(const char *a, const char *b, int flags) { + return path_equal(a, b) || files_same(a, b, flags) > 0; +} + +bool path_equal_filename(const char *a, const char *b) { + _cleanup_free_ char *a_basename = NULL, *b_basename = NULL; + int r; + + assert(a); + assert(b); + + r = path_extract_filename(a, &a_basename); + if (r < 0) { + log_debug_errno(r, "Failed to parse basename of %s: %m", a); + return false; + } + r = path_extract_filename(b, &b_basename); + if (r < 0) { + log_debug_errno(r, "Failed to parse basename of %s: %m", b); + return false; + } + + return path_equal(a_basename, b_basename); +} + +char* path_extend_internal(char **x, ...) { + size_t sz, old_sz; + char *q, *nx; + const char *p; + va_list ap; + bool slash; + + /* Joins all listed strings until the sentinel and places a "/" between them unless the strings end/begin + * already with one so that it is unnecessary. Note that slashes which are already duplicate won't be + * removed. The string returned is hence always equal to or longer than the sum of the lengths of each + * individual string. + * + * The first argument may be an already allocated string that is extended via realloc() if + * non-NULL. path_extend() and path_join() are macro wrappers around this function, making use of the + * first parameter to distinguish the two operations. + * + * Note: any listed empty string is simply skipped. This can be useful for concatenating strings of which some + * are optional. + * + * Examples: + * + * path_join("foo", "bar") → "foo/bar" + * path_join("foo/", "bar") → "foo/bar" + * path_join("", "foo", "", "bar", "") → "foo/bar" */ + + sz = old_sz = x ? strlen_ptr(*x) : 0; + va_start(ap, x); + while ((p = va_arg(ap, char*)) != POINTER_MAX) { + size_t add; + + if (isempty(p)) + continue; + + add = 1 + strlen(p); + if (sz > SIZE_MAX - add) { /* overflow check */ + va_end(ap); + return NULL; + } + + sz += add; + } + va_end(ap); + + nx = realloc(x ? *x : NULL, GREEDY_ALLOC_ROUND_UP(sz+1)); + if (!nx) + return NULL; + if (x) + *x = nx; + + if (old_sz > 0) + slash = nx[old_sz-1] == '/'; + else { + nx[old_sz] = 0; + slash = true; /* no need to generate a slash anymore */ + } + + q = nx + old_sz; + + va_start(ap, x); + while ((p = va_arg(ap, char*)) != POINTER_MAX) { + if (isempty(p)) + continue; + + if (!slash && p[0] != '/') + *(q++) = '/'; + + q = stpcpy(q, p); + slash = endswith(p, "/"); + } + va_end(ap); + + return nx; +} + +static int check_x_access(const char *path, int *ret_fd) { + _cleanup_close_ int fd = -1; + int r; + + /* We need to use O_PATH because there may be executables for which we have only exec + * permissions, but not read (usually suid executables). */ + fd = open(path, O_PATH|O_CLOEXEC); + if (fd < 0) + return -errno; + + r = fd_verify_regular(fd); + if (r < 0) + return r; + + r = access_fd(fd, X_OK); + if (r == -ENOSYS) { + /* /proc is not mounted. Fallback to access(). */ + if (access(path, X_OK) < 0) + return -errno; + } else if (r < 0) + return r; + + if (ret_fd) + *ret_fd = TAKE_FD(fd); + + return 0; +} + +static int find_executable_impl(const char *name, const char *root, char **ret_filename, int *ret_fd) { + _cleanup_close_ int fd = -1; + _cleanup_free_ char *path_name = NULL; + int r; + + assert(name); + + /* Function chase_symlinks() is invoked only when root is not NULL, as using it regardless of + * root value would alter the behavior of existing callers for example: /bin/sleep would become + * /usr/bin/sleep when find_executables is called. Hence, this function should be invoked when + * needed to avoid unforeseen regression or other complicated changes. */ + if (root) { + r = chase_symlinks(name, + root, + CHASE_PREFIX_ROOT, + &path_name, + /* ret_fd= */ NULL); /* prefix root to name in case full paths are not specified */ + if (r < 0) + return r; + + name = path_name; + } + + r = check_x_access(name, ret_fd ? &fd : NULL); + if (r < 0) + return r; + + if (ret_filename) { + r = path_make_absolute_cwd(name, ret_filename); + if (r < 0) + return r; + } + + if (ret_fd) + *ret_fd = TAKE_FD(fd); + + return 0; +} + +int find_executable_full(const char *name, const char *root, char **exec_search_path, bool use_path_envvar, char **ret_filename, int *ret_fd) { + int last_error = -ENOENT, r = 0; + const char *p = NULL; + + assert(name); + + if (is_path(name)) + return find_executable_impl(name, root, ret_filename, ret_fd); + + if (use_path_envvar) + /* Plain getenv, not secure_getenv, because we want to actually allow the user to pick the + * binary. */ + p = getenv("PATH"); + if (!p) + p = DEFAULT_PATH; + + if (exec_search_path) { + STRV_FOREACH(element, exec_search_path) { + _cleanup_free_ char *full_path = NULL; + + if (!path_is_absolute(*element)) + continue; + + full_path = path_join(*element, name); + if (!full_path) + return -ENOMEM; + + r = find_executable_impl(full_path, root, ret_filename, ret_fd); + if (r < 0) { + if (r != -EACCES) + last_error = r; + continue; + } + return 0; + } + return last_error; + } + + /* Resolve a single-component name to a full path */ + for (;;) { + _cleanup_free_ char *element = NULL; + + r = extract_first_word(&p, &element, ":", EXTRACT_RELAX|EXTRACT_DONT_COALESCE_SEPARATORS); + if (r < 0) + return r; + if (r == 0) + break; + + if (!path_is_absolute(element)) + continue; + + if (!path_extend(&element, name)) + return -ENOMEM; + + r = find_executable_impl(element, root, ret_filename, ret_fd); + if (r < 0) { + /* PATH entries which we don't have access to are ignored, as per tradition. */ + if (r != -EACCES) + last_error = r; + continue; + } + + /* Found it! */ + return 0; + } + + return last_error; +} + +bool paths_check_timestamp(const char* const* paths, usec_t *timestamp, bool update) { + bool changed = false, originally_unset; + + assert(timestamp); + + if (!paths) + return false; + + originally_unset = *timestamp == 0; + + STRV_FOREACH(i, paths) { + struct stat stats; + usec_t u; + + if (stat(*i, &stats) < 0) + continue; + + u = timespec_load(&stats.st_mtim); + + /* check first */ + if (*timestamp >= u) + continue; + + log_debug(originally_unset ? "Loaded timestamp for '%s'." : "Timestamp of '%s' changed.", *i); + + /* update timestamp */ + if (update) { + *timestamp = u; + changed = true; + } else + return true; + } + + return changed; +} + +static int executable_is_good(const char *executable) { + _cleanup_free_ char *p = NULL, *d = NULL; + int r; + + r = find_executable(executable, &p); + if (r == -ENOENT) + return 0; + if (r < 0) + return r; + + /* An fsck that is linked to /bin/true is a non-existent fsck */ + + r = readlink_malloc(p, &d); + if (r == -EINVAL) /* not a symlink */ + return 1; + if (r < 0) + return r; + + return !PATH_IN_SET(d, "true" + "/bin/true", + "/usr/bin/true", + "/dev/null"); +} + +int fsck_exists(void) { + return executable_is_good("fsck"); +} + +int fsck_exists_for_fstype(const char *fstype) { + const char *checker; + int r; + + assert(fstype); + + if (streq(fstype, "auto")) + return -EINVAL; + + r = fsck_exists(); + if (r <= 0) + return r; + + checker = strjoina("fsck.", fstype); + return executable_is_good(checker); +} + +static const char* skip_slash_or_dot(const char *p) { + for (; !isempty(p); p++) { + if (*p == '/') + continue; + if (startswith(p, "./")) { + p++; + continue; + } + break; + } + return p; +} + +int path_find_first_component(const char **p, bool accept_dot_dot, const char **ret) { + const char *q, *first, *end_first, *next; + size_t len; + + assert(p); + + /* When a path is input, then returns the pointer to the first component and its length, and + * move the input pointer to the next component or nul. This skips both over any '/' + * immediately *before* and *after* the first component before returning. + * + * Examples + * Input: p: "//.//aaa///bbbbb/cc" + * Output: p: "bbbbb///cc" + * ret: "aaa///bbbbb/cc" + * return value: 3 (== strlen("aaa")) + * + * Input: p: "aaa//" + * Output: p: (pointer to NUL) + * ret: "aaa//" + * return value: 3 (== strlen("aaa")) + * + * Input: p: "/", ".", "" + * Output: p: (pointer to NUL) + * ret: NULL + * return value: 0 + * + * Input: p: NULL + * Output: p: NULL + * ret: NULL + * return value: 0 + * + * Input: p: "(too long component)" + * Output: return value: -EINVAL + * + * (when accept_dot_dot is false) + * Input: p: "//..//aaa///bbbbb/cc" + * Output: return value: -EINVAL + */ + + q = *p; + + first = skip_slash_or_dot(q); + if (isempty(first)) { + *p = first; + if (ret) + *ret = NULL; + return 0; + } + if (streq(first, ".")) { + *p = first + 1; + if (ret) + *ret = NULL; + return 0; + } + + end_first = strchrnul(first, '/'); + len = end_first - first; + + if (len > NAME_MAX) + return -EINVAL; + if (!accept_dot_dot && len == 2 && first[0] == '.' && first[1] == '.') + return -EINVAL; + + next = skip_slash_or_dot(end_first); + + *p = next + streq(next, "."); + if (ret) + *ret = first; + return len; +} + +static const char* skip_slash_or_dot_backward(const char *path, const char *q) { + assert(path); + assert(!q || q >= path); + + for (; q; q = PTR_SUB1(q, path)) { + if (*q == '/') + continue; + if (q > path && strneq(q - 1, "/.", 2)) + continue; + break; + } + return q; +} + +int path_find_last_component(const char *path, bool accept_dot_dot, const char **next, const char **ret) { + const char *q, *last_end, *last_begin; + size_t len; + + /* Similar to path_find_first_component(), but search components from the end. + * + * Examples + * Input: path: "//.//aaa///bbbbb/cc//././" + * next: NULL + * Output: next: "/cc//././" + * ret: "cc//././" + * return value: 2 (== strlen("cc")) + * + * Input: path: "//.//aaa///bbbbb/cc//././" + * next: "/cc//././" + * Output: next: "///bbbbb/cc//././" + * ret: "bbbbb/cc//././" + * return value: 5 (== strlen("bbbbb")) + * + * Input: path: "/", ".", "", or NULL + * Output: next: equivalent to path + * ret: NULL + * return value: 0 + * + * Input: path: "(too long component)" + * Output: return value: -EINVAL + * + * (when accept_dot_dot is false) + * Input: path: "//..//aaa///bbbbb/cc/..//" + * Output: return value: -EINVAL + */ + + if (isempty(path)) { + if (next) + *next = path; + if (ret) + *ret = NULL; + return 0; + } + + if (next && *next) { + if (*next < path || *next > path + strlen(path)) + return -EINVAL; + if (*next == path) { + if (ret) + *ret = NULL; + return 0; + } + if (!IN_SET(**next, '\0', '/')) + return -EINVAL; + q = *next - 1; + } else + q = path + strlen(path) - 1; + + q = skip_slash_or_dot_backward(path, q); + if (!q || /* the root directory */ + (q == path && *q == '.')) { /* path is "." or "./" */ + if (next) + *next = path; + if (ret) + *ret = NULL; + return 0; + } + + last_end = q + 1; + + while (q && *q != '/') + q = PTR_SUB1(q, path); + + last_begin = q ? q + 1 : path; + len = last_end - last_begin; + + if (len > NAME_MAX) + return -EINVAL; + if (!accept_dot_dot && len == 2 && strneq(last_begin, "..", 2)) + return -EINVAL; + + if (next) { + q = skip_slash_or_dot_backward(path, q); + *next = q ? q + 1 : path; + } + + if (ret) + *ret = last_begin; + return len; +} + +const char* last_path_component(const char *path) { + + /* Finds the last component of the path, preserving the optional trailing slash that signifies a directory. + * + * a/b/c → c + * a/b/c/ → c/ + * x → x + * x/ → x/ + * /y → y + * /y/ → y/ + * / → / + * // → / + * /foo/a → a + * /foo/a/ → a/ + * + * Also, the empty string is mapped to itself. + * + * This is different than basename(), which returns "" when a trailing slash is present. + * + * This always succeeds (except if you pass NULL in which case it returns NULL, too). + */ + + unsigned l, k; + + if (!path) + return NULL; + + l = k = strlen(path); + if (l == 0) /* special case — an empty string */ + return path; + + while (k > 0 && path[k-1] == '/') + k--; + + if (k == 0) /* the root directory */ + return path + l - 1; + + while (k > 0 && path[k-1] != '/') + k--; + + return path + k; +} + +int path_extract_filename(const char *path, char **ret) { + _cleanup_free_ char *a = NULL; + const char *c, *next = NULL; + int r; + + /* Extracts the filename part (i.e. right-most component) from a path, i.e. string that passes + * filename_is_valid(). A wrapper around last_path_component(), but eats up trailing + * slashes. Returns: + * + * -EINVAL → if the path is not valid + * -EADDRNOTAVAIL → if only a directory was specified, but no filename, i.e. the root dir + * itself or "." is specified + * -ENOMEM → no memory + * + * Returns >= 0 on success. If the input path has a trailing slash, returns O_DIRECTORY, to + * indicate the referenced file must be a directory. + * + * This function guarantees to return a fully valid filename, i.e. one that passes + * filename_is_valid() – this means "." and ".." are not accepted. */ + + if (!path_is_valid(path)) + return -EINVAL; + + r = path_find_last_component(path, false, &next, &c); + if (r < 0) + return r; + if (r == 0) /* root directory */ + return -EADDRNOTAVAIL; + + a = strndup(c, r); + if (!a) + return -ENOMEM; + + *ret = TAKE_PTR(a); + return strlen(c) > (size_t) r ? O_DIRECTORY : 0; +} + +int path_extract_directory(const char *path, char **ret) { + _cleanup_free_ char *a = NULL; + const char *c, *next = NULL; + int r; + + /* The inverse of path_extract_filename(), i.e. returns the directory path prefix. Returns: + * + * -EINVAL → if the path is not valid + * -EDESTADDRREQ → if no directory was specified in the passed in path, i.e. only a filename was passed + * -EADDRNOTAVAIL → if the passed in parameter had no filename but did have a directory, i.e. + * the root dir itself or "." was specified + * -ENOMEM → no memory (surprise!) + * + * This function guarantees to return a fully valid path, i.e. one that passes path_is_valid(). + */ + + r = path_find_last_component(path, false, &next, &c); + if (r < 0) + return r; + if (r == 0) /* empty or root */ + return isempty(path) ? -EINVAL : -EADDRNOTAVAIL; + if (next == path) { + if (*path != '/') /* filename only */ + return -EDESTADDRREQ; + + a = strdup("/"); + if (!a) + return -ENOMEM; + *ret = TAKE_PTR(a); + return 0; + } + + a = strndup(path, next - path); + if (!a) + return -ENOMEM; + + path_simplify(a); + + if (!path_is_valid(a)) + return -EINVAL; + + *ret = TAKE_PTR(a); + return 0; +} + +bool filename_is_valid(const char *p) { + const char *e; + + if (isempty(p)) + return false; + + if (dot_or_dot_dot(p)) /* Yes, in this context we consider "." and ".." invalid */ + return false; + + e = strchrnul(p, '/'); + if (*e != 0) + return false; + + if (e - p > NAME_MAX) /* NAME_MAX is counted *without* the trailing NUL byte */ + return false; + + return true; +} + +bool path_is_valid_full(const char *p, bool accept_dot_dot) { + if (isempty(p)) + return false; + + for (const char *e = p;;) { + int r; + + r = path_find_first_component(&e, accept_dot_dot, NULL); + if (r < 0) + return false; + + if (e - p >= PATH_MAX) /* Already reached the maximum length for a path? (PATH_MAX is counted + * *with* the trailing NUL byte) */ + return false; + if (*e == 0) /* End of string? Yay! */ + return true; + } +} + +bool path_is_normalized(const char *p) { + if (!path_is_safe(p)) + return false; + + if (streq(p, ".") || startswith(p, "./") || endswith(p, "/.") || strstr(p, "/./")) + return false; + + if (strstr(p, "//")) + return false; + + return true; +} + +char *file_in_same_dir(const char *path, const char *filename) { + char *e, *ret; + size_t k; + + assert(path); + assert(filename); + + /* This removes the last component of path and appends + * filename, unless the latter is absolute anyway or the + * former isn't */ + + if (path_is_absolute(filename)) + return strdup(filename); + + e = strrchr(path, '/'); + if (!e) + return strdup(filename); + + k = strlen(filename); + ret = new(char, (e + 1 - path) + k + 1); + if (!ret) + return NULL; + + memcpy(mempcpy(ret, path, e + 1 - path), filename, k + 1); + return ret; +} + +bool hidden_or_backup_file(const char *filename) { + assert(filename); + + if (filename[0] == '.' || + STR_IN_SET(filename, + "lost+found", + "aquota.user", + "aquota.group") || + endswith(filename, "~")) + return true; + + const char *dot = strrchr(filename, '.'); + if (!dot) + return false; + + /* Please, let's not add more entries to the list below. If external projects think it's a good idea + * to come up with always new suffixes and that everybody else should just adjust to that, then it + * really should be on them. Hence, in future, let's not add any more entries. Instead, let's ask + * those packages to instead adopt one of the generic suffixes/prefixes for hidden files or backups, + * possibly augmented with an additional string. Specifically: there's now: + * + * The generic suffixes "~" and ".bak" for backup files + * The generic prefix "." for hidden files + * + * Thus, if a new package manager "foopkg" wants its own set of ".foopkg-new", ".foopkg-old", + * ".foopkg-dist" or so registered, let's refuse that and ask them to use ".foopkg.new", + * ".foopkg.old" or ".foopkg~" instead. + */ + + return STR_IN_SET(dot + 1, + "rpmnew", + "rpmsave", + "rpmorig", + "dpkg-old", + "dpkg-new", + "dpkg-tmp", + "dpkg-dist", + "dpkg-bak", + "dpkg-backup", + "dpkg-remove", + "ucf-new", + "ucf-old", + "ucf-dist", + "swp", + "bak", + "old", + "new"); +} + +bool is_device_path(const char *path) { + + /* Returns true for paths that likely refer to a device, either by path in sysfs or to something in + * /dev. This accepts any path that starts with /dev/ or /sys/ and has something after that prefix. + * It does not actually resolve the path. + * + * Examples: + * /dev/sda, /dev/sda/foo, /sys/class, /dev/.., /sys/.., /./dev/foo → yes. + * /../dev/sda, /dev, /sys, /usr/path, /usr/../dev/sda → no. + */ + + const char *p = PATH_STARTSWITH_SET(ASSERT_PTR(path), "/dev/", "/sys/"); + return !isempty(p); +} + +bool valid_device_node_path(const char *path) { + + /* Some superficial checks whether the specified path is a valid device node path, all without + * looking at the actual device node. */ + + if (!PATH_STARTSWITH_SET(path, "/dev/", "/run/systemd/inaccessible/")) + return false; + + if (endswith(path, "/")) /* can't be a device node if it ends in a slash */ + return false; + + return path_is_normalized(path); +} + +bool valid_device_allow_pattern(const char *path) { + assert(path); + + /* Like valid_device_node_path(), but also allows full-subsystem expressions like those accepted by + * DeviceAllow= and DeviceDeny=. */ + + if (STARTSWITH_SET(path, "block-", "char-")) + return true; + + return valid_device_node_path(path); +} + +bool dot_or_dot_dot(const char *path) { + if (!path) + return false; + if (path[0] != '.') + return false; + if (path[1] == 0) + return true; + if (path[1] != '.') + return false; + + return path[2] == 0; +} + +bool empty_or_root(const char *path) { + + /* For operations relative to some root directory, returns true if the specified root directory is + * redundant, i.e. either / or NULL or the empty string or any equivalent. */ + + if (isempty(path)) + return true; + + return path_equal(path, "/"); +} + +bool path_strv_contains(char **l, const char *path) { + STRV_FOREACH(i, l) + if (path_equal(*i, path)) + return true; + + return false; +} + +bool prefixed_path_strv_contains(char **l, const char *path) { + STRV_FOREACH(i, l) { + const char *j = *i; + + if (*j == '-') + j++; + if (*j == '+') + j++; + if (path_equal(j, path)) + return true; + } + + return false; +} + +int path_glob_can_match(const char *pattern, const char *prefix, char **ret) { + assert(pattern); + assert(prefix); + + for (const char *a = pattern, *b = prefix;;) { + _cleanup_free_ char *g = NULL, *h = NULL; + const char *p, *q; + int r, s; + + r = path_find_first_component(&a, /* accept_dot_dot = */ false, &p); + if (r < 0) + return r; + + s = path_find_first_component(&b, /* accept_dot_dot = */ false, &q); + if (s < 0) + return s; + + if (s == 0) { + /* The pattern matches the prefix. */ + if (ret) { + char *t; + + t = path_join(prefix, p); + if (!t) + return -ENOMEM; + + *ret = t; + } + return true; + } + + if (r == 0) + break; + + if (r == s && strneq(p, q, r)) + continue; /* common component. Check next. */ + + g = strndup(p, r); + if (!g) + return -ENOMEM; + + if (!string_is_glob(g)) + break; + + /* We found a glob component. Check if the glob pattern matches the prefix component. */ + + h = strndup(q, s); + if (!h) + return -ENOMEM; + + r = fnmatch(g, h, 0); + if (r == FNM_NOMATCH) + break; + if (r != 0) /* Failure to process pattern? */ + return -EINVAL; + } + + /* The pattern does not match the prefix. */ + if (ret) + *ret = NULL; + return false; +} diff --git a/src/basic/path-util.h b/src/basic/path-util.h new file mode 100644 index 0000000..09604ba --- /dev/null +++ b/src/basic/path-util.h @@ -0,0 +1,208 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +#include +#include +#include + +#include "macro.h" +#include "string-util.h" +#include "strv.h" +#include "time-util.h" + +#define PATH_SPLIT_SBIN_BIN(x) x "sbin:" x "bin" +#define PATH_SPLIT_SBIN_BIN_NULSTR(x) x "sbin\0" x "bin\0" + +#define PATH_NORMAL_SBIN_BIN(x) x "bin" +#define PATH_NORMAL_SBIN_BIN_NULSTR(x) x "bin\0" + +#if HAVE_SPLIT_BIN +# define PATH_SBIN_BIN(x) PATH_SPLIT_SBIN_BIN(x) +# define PATH_SBIN_BIN_NULSTR(x) PATH_SPLIT_SBIN_BIN_NULSTR(x) +#else +# define PATH_SBIN_BIN(x) PATH_NORMAL_SBIN_BIN(x) +# define PATH_SBIN_BIN_NULSTR(x) PATH_NORMAL_SBIN_BIN_NULSTR(x) +#endif + +#define DEFAULT_PATH_NORMAL PATH_SBIN_BIN("/usr/local/") ":" PATH_SBIN_BIN("/usr/") +#define DEFAULT_PATH_NORMAL_NULSTR PATH_SBIN_BIN_NULSTR("/usr/local/") PATH_SBIN_BIN_NULSTR("/usr/") +#define DEFAULT_PATH_SPLIT_USR DEFAULT_PATH_NORMAL ":" PATH_SBIN_BIN("/") +#define DEFAULT_PATH_SPLIT_USR_NULSTR DEFAULT_PATH_NORMAL_NULSTR PATH_SBIN_BIN_NULSTR("/") +#define DEFAULT_PATH_COMPAT PATH_SPLIT_SBIN_BIN("/usr/local/") ":" PATH_SPLIT_SBIN_BIN("/usr/") ":" PATH_SPLIT_SBIN_BIN("/") + +#if HAVE_SPLIT_USR +# define DEFAULT_PATH DEFAULT_PATH_SPLIT_USR +# define DEFAULT_PATH_NULSTR DEFAULT_PATH_SPLIT_USR_NULSTR +#else +# define DEFAULT_PATH DEFAULT_PATH_NORMAL +# define DEFAULT_PATH_NULSTR DEFAULT_PATH_NORMAL_NULSTR +#endif + +#ifndef DEFAULT_USER_PATH +# define DEFAULT_USER_PATH DEFAULT_PATH +#endif + +static inline bool is_path(const char *p) { + if (!p) /* A NULL pointer is definitely not a path */ + return false; + + return strchr(p, '/'); +} + +static inline bool path_is_absolute(const char *p) { + if (!p) /* A NULL pointer is definitely not an absolute path */ + return false; + + return p[0] == '/'; +} + +int path_split_and_make_absolute(const char *p, char ***ret); +char* path_make_absolute(const char *p, const char *prefix); +int safe_getcwd(char **ret); +int path_make_absolute_cwd(const char *p, char **ret); +int path_make_relative(const char *from, const char *to, char **ret); +int path_make_relative_parent(const char *from_child, const char *to, char **ret); +char* path_startswith_full(const char *path, const char *prefix, bool accept_dot_dot) _pure_; +static inline char* path_startswith(const char *path, const char *prefix) { + return path_startswith_full(path, prefix, true); +} +int path_compare(const char *a, const char *b) _pure_; + +static inline bool path_equal(const char *a, const char *b) { + return path_compare(a, b) == 0; +} + +bool path_equal_or_files_same(const char *a, const char *b, int flags); +/* Compares only the last portion of the input paths, ie: the filenames */ +bool path_equal_filename(const char *a, const char *b); + +char* path_extend_internal(char **x, ...); +#define path_extend(x, ...) path_extend_internal(x, __VA_ARGS__, POINTER_MAX) +#define path_join(...) path_extend_internal(NULL, __VA_ARGS__, POINTER_MAX) + +typedef enum PathSimplifyFlags { + PATH_SIMPLIFY_KEEP_TRAILING_SLASH = 1 << 0, +} PathSimplifyFlags; + +char* path_simplify_full(char *path, PathSimplifyFlags flags); +static inline char* path_simplify(char *path) { + return path_simplify_full(path, 0); +} + +static inline bool path_equal_ptr(const char *a, const char *b) { + return !!a == !!b && (!a || path_equal(a, b)); +} + +/* Note: the search terminates on the first NULL item. */ +#define PATH_IN_SET(p, ...) path_strv_contains(STRV_MAKE(__VA_ARGS__), p) + +char* path_startswith_strv(const char *p, char **set); +#define PATH_STARTSWITH_SET(p, ...) path_startswith_strv(p, STRV_MAKE(__VA_ARGS__)) + +int path_strv_make_absolute_cwd(char **l); +char** path_strv_resolve(char **l, const char *root); +char** path_strv_resolve_uniq(char **l, const char *root); + +int find_executable_full(const char *name, const char *root, char **exec_search_path, bool use_path_envvar, char **ret_filename, int *ret_fd); +static inline int find_executable(const char *name, char **ret_filename) { + return find_executable_full(name, /* root= */ NULL, NULL, true, ret_filename, NULL); +} + +bool paths_check_timestamp(const char* const* paths, usec_t *paths_ts_usec, bool update); + +int fsck_exists(void); +int fsck_exists_for_fstype(const char *fstype); + +/* Iterates through the path prefixes of the specified path, going up + * the tree, to root. Also returns "" (and not "/"!) for the root + * directory. Excludes the specified directory itself */ +#define PATH_FOREACH_PREFIX(prefix, path) \ + for (char *_slash = ({ \ + path_simplify(strcpy(prefix, path)); \ + streq(prefix, "/") ? NULL : strrchr(prefix, '/'); \ + }); \ + _slash && ((*_slash = 0), true); \ + _slash = strrchr((prefix), '/')) + +/* Same as PATH_FOREACH_PREFIX but also includes the specified path itself */ +#define PATH_FOREACH_PREFIX_MORE(prefix, path) \ + for (char *_slash = ({ \ + path_simplify(strcpy(prefix, path)); \ + if (streq(prefix, "/")) \ + prefix[0] = 0; \ + strrchr(prefix, 0); \ + }); \ + _slash && ((*_slash = 0), true); \ + _slash = strrchr((prefix), '/')) + +/* Similar to path_join(), but only works for two components, and only the first one may be NULL and returns + * an alloca() buffer, or possibly a const pointer into the path parameter. */ +#define prefix_roota(root, path) \ + ({ \ + const char* _path = (path), *_root = (root), *_ret; \ + char *_p, *_n; \ + size_t _l; \ + while (_path[0] == '/' && _path[1] == '/') \ + _path ++; \ + if (isempty(_root)) \ + _ret = _path; \ + else { \ + _l = strlen(_root) + 1 + strlen(_path) + 1; \ + _n = newa(char, _l); \ + _p = stpcpy(_n, _root); \ + while (_p > _n && _p[-1] == '/') \ + _p--; \ + if (_path[0] != '/') \ + *(_p++) = '/'; \ + strcpy(_p, _path); \ + _ret = _n; \ + } \ + _ret; \ + }) + +int path_find_first_component(const char **p, bool accept_dot_dot, const char **ret); +int path_find_last_component(const char *path, bool accept_dot_dot, const char **next, const char **ret); +const char* last_path_component(const char *path); +int path_extract_filename(const char *path, char **ret); +int path_extract_directory(const char *path, char **ret); + +bool filename_is_valid(const char *p) _pure_; +bool path_is_valid_full(const char *p, bool accept_dot_dot) _pure_; +static inline bool path_is_valid(const char *p) { + return path_is_valid_full(p, /* accept_dot_dot= */ true); +} +static inline bool path_is_safe(const char *p) { + return path_is_valid_full(p, /* accept_dot_dot= */ false); +} +bool path_is_normalized(const char *p) _pure_; + +char *file_in_same_dir(const char *path, const char *filename); + +bool hidden_or_backup_file(const char *filename) _pure_; + +bool is_device_path(const char *path); + +bool valid_device_node_path(const char *path); +bool valid_device_allow_pattern(const char *path); + +bool dot_or_dot_dot(const char *path); + +static inline const char *skip_dev_prefix(const char *p) { + const char *e; + + /* Drop any /dev prefix if there is any */ + + e = path_startswith(p, "/dev/"); + + return e ?: p; +} + +bool empty_or_root(const char *path); +static inline const char* empty_to_root(const char *path) { + return isempty(path) ? "/" : path; +} + +bool path_strv_contains(char **l, const char *path); +bool prefixed_path_strv_contains(char **l, const char *path); + +int path_glob_can_match(const char *pattern, const char *prefix, char **ret); diff --git a/src/basic/pcapng.h b/src/basic/pcapng.h new file mode 100644 index 0000000..57c3af5 --- /dev/null +++ b/src/basic/pcapng.h @@ -0,0 +1,115 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +/* + * For details about the file format see RFC: + * https://www.ietf.org/id/draft-tuexen-opsawg-pcapng-03.html + * and + * https://github.com/pcapng/pcapng/ + */ +enum pcapng_block_types { + PCAPNG_INTERFACE_BLOCK = 1, + PCAPNG_PACKET_BLOCK, /* Obsolete */ + PCAPNG_SIMPLE_PACKET_BLOCK, + PCAPNG_NAME_RESOLUTION_BLOCK, + PCAPNG_INTERFACE_STATS_BLOCK, + PCAPNG_ENHANCED_PACKET_BLOCK, + + PCAPNG_SECTION_BLOCK = 0x0A0D0D0A, +}; + +struct pcapng_option { + uint16_t code; + uint16_t length; + uint8_t data[]; +}; + +#define PCAPNG_BYTE_ORDER_MAGIC 0x1A2B3C4D +#define PCAPNG_MAJOR_VERS 1 +#define PCAPNG_MINOR_VERS 0 + +enum pcapng_opt { + PCAPNG_OPT_END = 0, + PCAPNG_OPT_COMMENT = 1, +}; + +struct pcapng_section { + uint32_t block_type; + uint32_t block_length; + uint32_t byte_order_magic; + uint16_t major_version; + uint16_t minor_version; + uint64_t section_length; +}; + +enum pcapng_section_opt { + PCAPNG_SHB_HARDWARE = 2, + PCAPNG_SHB_OS = 3, + PCAPNG_SHB_USERAPPL = 4, +}; + +struct pcapng_interface_block { + uint32_t block_type; /* 1 */ + uint32_t block_length; + uint16_t link_type; + uint16_t reserved; + uint32_t snap_len; +}; + +enum pcapng_interface_options { + PCAPNG_IFB_NAME = 2, + PCAPNG_IFB_DESCRIPTION, + PCAPNG_IFB_IPV4ADDR, + PCAPNG_IFB_IPV6ADDR, + PCAPNG_IFB_MACADDR, + PCAPNG_IFB_EUIADDR, + PCAPNG_IFB_SPEED, + PCAPNG_IFB_TSRESOL, + PCAPNG_IFB_TZONE, + PCAPNG_IFB_FILTER, + PCAPNG_IFB_OS, + PCAPNG_IFB_FCSLEN, + PCAPNG_IFB_TSOFFSET, + PCAPNG_IFB_HARDWARE, +}; + +struct pcapng_enhance_packet_block { + uint32_t block_type; /* 6 */ + uint32_t block_length; + uint32_t interface_id; + uint32_t timestamp_hi; + uint32_t timestamp_lo; + uint32_t capture_length; + uint32_t original_length; +}; + +/* Flags values */ +#define PCAPNG_IFB_INBOUND 0b01 +#define PCAPNG_IFB_OUTBOUND 0b10 + +enum pcapng_epb_options { + PCAPNG_EPB_FLAGS = 2, + PCAPNG_EPB_HASH, + PCAPNG_EPB_DROPCOUNT, + PCAPNG_EPB_PACKETID, + PCAPNG_EPB_QUEUE, + PCAPNG_EPB_VERDICT, +}; + +struct pcapng_statistics_block { + uint32_t block_type; /* 5 */ + uint32_t block_length; + uint32_t interface_id; + uint32_t timestamp_hi; + uint32_t timestamp_lo; +}; + +enum pcapng_isb_options { + PCAPNG_ISB_STARTTIME = 2, + PCAPNG_ISB_ENDTIME, + PCAPNG_ISB_IFRECV, + PCAPNG_ISB_IFDROP, + PCAPNG_ISB_FILTERACCEPT, + PCAPNG_ISB_OSDROP, + PCAPNG_ISB_USRDELIV, +}; diff --git a/src/basic/percent-util.c b/src/basic/percent-util.c new file mode 100644 index 0000000..cab9d0e --- /dev/null +++ b/src/basic/percent-util.c @@ -0,0 +1,157 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include "percent-util.h" +#include "string-util.h" +#include "parse-util.h" + +static int parse_parts_value_whole(const char *p, const char *symbol) { + const char *pc, *n; + int r, v; + + pc = endswith(p, symbol); + if (!pc) + return -EINVAL; + + n = strndupa_safe(p, pc - p); + r = safe_atoi(n, &v); + if (r < 0) + return r; + if (v < 0) + return -ERANGE; + + return v; +} + +static int parse_parts_value_with_tenths_place(const char *p, const char *symbol) { + const char *pc, *dot, *n; + int r, q, v; + + pc = endswith(p, symbol); + if (!pc) + return -EINVAL; + + dot = memchr(p, '.', pc - p); + if (dot) { + if (dot + 2 != pc) + return -EINVAL; + if (dot[1] < '0' || dot[1] > '9') + return -EINVAL; + q = dot[1] - '0'; + n = strndupa_safe(p, dot - p); + } else { + q = 0; + n = strndupa_safe(p, pc - p); + } + r = safe_atoi(n, &v); + if (r < 0) + return r; + if (v < 0) + return -ERANGE; + if (v > (INT_MAX - q) / 10) + return -ERANGE; + + v = v * 10 + q; + return v; +} + +static int parse_parts_value_with_hundredths_place(const char *p, const char *symbol) { + const char *pc, *dot, *n; + int r, q, v; + + pc = endswith(p, symbol); + if (!pc) + return -EINVAL; + + dot = memchr(p, '.', pc - p); + if (dot) { + if (dot + 3 == pc) { + /* Support two places after the dot */ + + if (dot[1] < '0' || dot[1] > '9' || dot[2] < '0' || dot[2] > '9') + return -EINVAL; + q = (dot[1] - '0') * 10 + (dot[2] - '0'); + + } else if (dot + 2 == pc) { + /* Support one place after the dot */ + + if (dot[1] < '0' || dot[1] > '9') + return -EINVAL; + q = (dot[1] - '0') * 10; + } else + /* We do not support zero or more than two places */ + return -EINVAL; + + n = strndupa_safe(p, dot - p); + } else { + q = 0; + n = strndupa_safe(p, pc - p); + } + r = safe_atoi(n, &v); + if (r < 0) + return r; + if (v < 0) + return -ERANGE; + if (v > (INT_MAX - q) / 100) + return -ERANGE; + + v = v * 100 + q; + return v; +} + +int parse_percent_unbounded(const char *p) { + return parse_parts_value_whole(p, "%"); +} + +int parse_percent(const char *p) { + int v; + + v = parse_percent_unbounded(p); + if (v > 100) + return -ERANGE; + + return v; +} + +int parse_permille_unbounded(const char *p) { + const char *pm; + + pm = endswith(p, "‰"); + if (pm) + return parse_parts_value_whole(p, "‰"); + + return parse_parts_value_with_tenths_place(p, "%"); +} + +int parse_permille(const char *p) { + int v; + + v = parse_permille_unbounded(p); + if (v > 1000) + return -ERANGE; + + return v; +} + +int parse_permyriad_unbounded(const char *p) { + const char *pm; + + pm = endswith(p, "‱"); + if (pm) + return parse_parts_value_whole(p, "‱"); + + pm = endswith(p, "‰"); + if (pm) + return parse_parts_value_with_tenths_place(p, "‰"); + + return parse_parts_value_with_hundredths_place(p, "%"); +} + +int parse_permyriad(const char *p) { + int v; + + v = parse_permyriad_unbounded(p); + if (v > 10000) + return -ERANGE; + + return v; +} diff --git a/src/basic/percent-util.h b/src/basic/percent-util.h new file mode 100644 index 0000000..e975d6e --- /dev/null +++ b/src/basic/percent-util.h @@ -0,0 +1,65 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +#include +#include + +#include "macro.h" + +int parse_percent_unbounded(const char *p); +int parse_percent(const char *p); + +int parse_permille_unbounded(const char *p); +int parse_permille(const char *p); + +int parse_permyriad_unbounded(const char *p); +int parse_permyriad(const char *p); + +/* Some macro-like helpers that convert a percent/permille/permyriad value (as parsed by parse_percent()) to + * a value relative to 100% == 2^32-1. Rounds to closest. */ +static inline uint32_t UINT32_SCALE_FROM_PERCENT(int percent) { + assert_cc(INT_MAX <= UINT32_MAX); + + return (uint32_t) (((uint64_t) CLAMP(percent, 0, 100) * UINT32_MAX + 50) / 100U); +} + +static inline uint32_t UINT32_SCALE_FROM_PERMILLE(int permille) { + return (uint32_t) (((uint64_t) CLAMP(permille, 0, 1000) * UINT32_MAX + 500) / 1000U); +} + +static inline uint32_t UINT32_SCALE_FROM_PERMYRIAD(int permyriad) { + return (uint32_t) (((uint64_t) CLAMP(permyriad, 0, 10000) * UINT32_MAX + 5000) / 10000U); +} + +static inline int UINT32_SCALE_TO_PERCENT(uint32_t scale) { + uint32_t u; + + u = (uint32_t) ((((uint64_t) scale) * 100U + UINT32_MAX/2) / UINT32_MAX); + if (u > INT_MAX) + return -ERANGE; + + return (int) u; +} + +static inline int UINT32_SCALE_TO_PERMILLE(uint32_t scale) { + uint32_t u; + + u = (uint32_t) ((((uint64_t) scale) * 1000U + UINT32_MAX/2) / UINT32_MAX); + if (u > INT_MAX) + return -ERANGE; + + return (int) u; +} + +static inline int UINT32_SCALE_TO_PERMYRIAD(uint32_t scale) { + uint32_t u; + + u = (uint32_t) ((((uint64_t) scale) * 10000U + UINT32_MAX/2) / UINT32_MAX); + if (u > INT_MAX) + return -ERANGE; + + return (int) u; +} + +#define PERMYRIAD_AS_PERCENT_FORMAT_STR "%i.%02i%%" +#define PERMYRIAD_AS_PERCENT_FORMAT_VAL(x) ((x)/100), ((x)%100) diff --git a/src/basic/prioq.c b/src/basic/prioq.c new file mode 100644 index 0000000..c15dcb2 --- /dev/null +++ b/src/basic/prioq.c @@ -0,0 +1,310 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +/* + * Priority Queue + * The prioq object implements a priority queue. That is, it orders objects by + * their priority and allows O(1) access to the object with the highest + * priority. Insertion and removal are Θ(log n). Optionally, the caller can + * provide a pointer to an index which will be kept up-to-date by the prioq. + * + * The underlying algorithm used in this implementation is a Heap. + */ + +#include +#include + +#include "alloc-util.h" +#include "hashmap.h" +#include "prioq.h" + +struct prioq_item { + void *data; + unsigned *idx; +}; + +struct Prioq { + compare_func_t compare_func; + unsigned n_items, n_allocated; + + struct prioq_item *items; +}; + +Prioq *prioq_new(compare_func_t compare_func) { + Prioq *q; + + q = new(Prioq, 1); + if (!q) + return q; + + *q = (Prioq) { + .compare_func = compare_func, + }; + + return q; +} + +Prioq* prioq_free(Prioq *q) { + if (!q) + return NULL; + + free(q->items); + return mfree(q); +} + +int prioq_ensure_allocated(Prioq **q, compare_func_t compare_func) { + assert(q); + + if (*q) + return 0; + + *q = prioq_new(compare_func); + if (!*q) + return -ENOMEM; + + return 0; +} + +static void swap(Prioq *q, unsigned j, unsigned k) { + assert(q); + assert(j < q->n_items); + assert(k < q->n_items); + + assert(!q->items[j].idx || *(q->items[j].idx) == j); + assert(!q->items[k].idx || *(q->items[k].idx) == k); + + SWAP_TWO(q->items[j].data, q->items[k].data); + SWAP_TWO(q->items[j].idx, q->items[k].idx); + + if (q->items[j].idx) + *q->items[j].idx = j; + + if (q->items[k].idx) + *q->items[k].idx = k; +} + +static unsigned shuffle_up(Prioq *q, unsigned idx) { + assert(q); + assert(idx < q->n_items); + + while (idx > 0) { + unsigned k; + + k = (idx-1)/2; + + if (q->compare_func(q->items[k].data, q->items[idx].data) <= 0) + break; + + swap(q, idx, k); + idx = k; + } + + return idx; +} + +static unsigned shuffle_down(Prioq *q, unsigned idx) { + assert(q); + + for (;;) { + unsigned j, k, s; + + k = (idx+1)*2; /* right child */ + j = k-1; /* left child */ + + if (j >= q->n_items) + break; + + if (q->compare_func(q->items[j].data, q->items[idx].data) < 0) + + /* So our left child is smaller than we are, let's + * remember this fact */ + s = j; + else + s = idx; + + if (k < q->n_items && + q->compare_func(q->items[k].data, q->items[s].data) < 0) + + /* So our right child is smaller than we are, let's + * remember this fact */ + s = k; + + /* s now points to the smallest of the three items */ + + if (s == idx) + /* No swap necessary, we're done */ + break; + + swap(q, idx, s); + idx = s; + } + + return idx; +} + +int prioq_put(Prioq *q, void *data, unsigned *idx) { + struct prioq_item *i; + unsigned k; + + assert(q); + + if (q->n_items >= q->n_allocated) { + unsigned n; + struct prioq_item *j; + + n = MAX((q->n_items+1) * 2, 16u); + j = reallocarray(q->items, n, sizeof(struct prioq_item)); + if (!j) + return -ENOMEM; + + q->items = j; + q->n_allocated = n; + } + + k = q->n_items++; + i = q->items + k; + i->data = data; + i->idx = idx; + + if (idx) + *idx = k; + + shuffle_up(q, k); + + return 0; +} + +int prioq_ensure_put(Prioq **q, compare_func_t compare_func, void *data, unsigned *idx) { + int r; + + r = prioq_ensure_allocated(q, compare_func); + if (r < 0) + return r; + + return prioq_put(*q, data, idx); +} + +static void remove_item(Prioq *q, struct prioq_item *i) { + struct prioq_item *l; + + assert(q); + assert(i); + + l = q->items + q->n_items - 1; + + if (i == l) + /* Last entry, let's just remove it */ + q->n_items--; + else { + unsigned k; + + /* Not last entry, let's replace the last entry with + * this one, and reshuffle */ + + k = i - q->items; + + i->data = l->data; + i->idx = l->idx; + if (i->idx) + *i->idx = k; + q->n_items--; + + k = shuffle_down(q, k); + shuffle_up(q, k); + } +} + +_pure_ static struct prioq_item* find_item(Prioq *q, void *data, unsigned *idx) { + struct prioq_item *i; + + assert(q); + + if (q->n_items <= 0) + return NULL; + + if (idx) { + if (*idx == PRIOQ_IDX_NULL || + *idx >= q->n_items) + return NULL; + + i = q->items + *idx; + if (i->data != data) + return NULL; + + return i; + } else { + for (i = q->items; i < q->items + q->n_items; i++) + if (i->data == data) + return i; + return NULL; + } +} + +int prioq_remove(Prioq *q, void *data, unsigned *idx) { + struct prioq_item *i; + + if (!q) + return 0; + + i = find_item(q, data, idx); + if (!i) + return 0; + + remove_item(q, i); + return 1; +} + +int prioq_reshuffle(Prioq *q, void *data, unsigned *idx) { + struct prioq_item *i; + unsigned k; + + assert(q); + + i = find_item(q, data, idx); + if (!i) + return 0; + + k = i - q->items; + k = shuffle_down(q, k); + shuffle_up(q, k); + return 1; +} + +void *prioq_peek_by_index(Prioq *q, unsigned idx) { + if (!q) + return NULL; + + if (idx >= q->n_items) + return NULL; + + return q->items[idx].data; +} + +void *prioq_pop(Prioq *q) { + void *data; + + if (!q) + return NULL; + + if (q->n_items <= 0) + return NULL; + + data = q->items[0].data; + remove_item(q, q->items); + return data; +} + +unsigned prioq_size(Prioq *q) { + + if (!q) + return 0; + + return q->n_items; +} + +bool prioq_isempty(Prioq *q) { + + if (!q) + return true; + + return q->n_items <= 0; +} diff --git a/src/basic/prioq.h b/src/basic/prioq.h new file mode 100644 index 0000000..508db88 --- /dev/null +++ b/src/basic/prioq.h @@ -0,0 +1,33 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +#include + +#include "hashmap.h" +#include "macro.h" + +typedef struct Prioq Prioq; + +#define PRIOQ_IDX_NULL (UINT_MAX) + +Prioq *prioq_new(compare_func_t compare); +Prioq *prioq_free(Prioq *q); +DEFINE_TRIVIAL_CLEANUP_FUNC(Prioq*, prioq_free); +int prioq_ensure_allocated(Prioq **q, compare_func_t compare_func); + +int prioq_put(Prioq *q, void *data, unsigned *idx); +int prioq_ensure_put(Prioq **q, compare_func_t compare_func, void *data, unsigned *idx); +int prioq_remove(Prioq *q, void *data, unsigned *idx); +int prioq_reshuffle(Prioq *q, void *data, unsigned *idx); + +void *prioq_peek_by_index(Prioq *q, unsigned idx) _pure_; +static inline void *prioq_peek(Prioq *q) { + return prioq_peek_by_index(q, 0); +} +void *prioq_pop(Prioq *q); + +#define PRIOQ_FOREACH_ITEM(q, p) \ + for (unsigned _i = 0; (p = prioq_peek_by_index(q, _i)); _i++) + +unsigned prioq_size(Prioq *q) _pure_; +bool prioq_isempty(Prioq *q) _pure_; diff --git a/src/basic/proc-cmdline.c b/src/basic/proc-cmdline.c new file mode 100644 index 0000000..a3aee28 --- /dev/null +++ b/src/basic/proc-cmdline.c @@ -0,0 +1,376 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include +#include + +#include "alloc-util.h" +#include "efivars.h" +#include "extract-word.h" +#include "fileio.h" +#include "macro.h" +#include "parse-util.h" +#include "proc-cmdline.h" +#include "process-util.h" +#include "special.h" +#include "string-util.h" +#include "util.h" +#include "virt.h" + +int proc_cmdline(char **ret) { + const char *e; + assert(ret); + + /* For testing purposes it is sometimes useful to be able to override what we consider /proc/cmdline to be */ + e = secure_getenv("SYSTEMD_PROC_CMDLINE"); + if (e) { + char *m; + + m = strdup(e); + if (!m) + return -ENOMEM; + + *ret = m; + return 0; + } + + if (detect_container() > 0) + return get_process_cmdline(1, SIZE_MAX, 0, ret); + + return read_virtual_file("/proc/cmdline", SIZE_MAX, ret, NULL); +} + +static int proc_cmdline_extract_first(const char **p, char **ret_word, ProcCmdlineFlags flags) { + const char *q = *p; + int r; + + for (;;) { + _cleanup_free_ char *word = NULL; + const char *c; + + r = extract_first_word(&q, &word, NULL, EXTRACT_UNQUOTE|EXTRACT_RELAX|EXTRACT_RETAIN_ESCAPE); + if (r < 0) + return r; + if (r == 0) + break; + + /* Filter out arguments that are intended only for the initrd */ + c = startswith(word, "rd."); + if (c) { + if (!in_initrd()) + continue; + + if (FLAGS_SET(flags, PROC_CMDLINE_STRIP_RD_PREFIX)) { + r = free_and_strdup(&word, c); + if (r < 0) + return r; + } + + } else if (FLAGS_SET(flags, PROC_CMDLINE_RD_STRICT) && in_initrd()) + continue; /* And optionally filter out arguments that are intended only for the host */ + + *p = q; + *ret_word = TAKE_PTR(word); + return 1; + } + + *p = q; + *ret_word = NULL; + return 0; +} + +int proc_cmdline_parse_given(const char *line, proc_cmdline_parse_t parse_item, void *data, ProcCmdlineFlags flags) { + const char *p; + int r; + + assert(parse_item); + + /* The PROC_CMDLINE_VALUE_OPTIONAL flag doesn't really make sense for proc_cmdline_parse(), let's make this + * clear. */ + assert(!FLAGS_SET(flags, PROC_CMDLINE_VALUE_OPTIONAL)); + + p = line; + for (;;) { + _cleanup_free_ char *word = NULL; + char *value; + + r = proc_cmdline_extract_first(&p, &word, flags); + if (r < 0) + return r; + if (r == 0) + break; + + value = strchr(word, '='); + if (value) + *(value++) = 0; + + r = parse_item(word, value, data); + if (r < 0) + return r; + } + + return 0; +} + +int proc_cmdline_parse(proc_cmdline_parse_t parse_item, void *data, ProcCmdlineFlags flags) { + _cleanup_free_ char *line = NULL; + int r; + + assert(parse_item); + + /* We parse the EFI variable first, because later settings have higher priority. */ + + if (!FLAGS_SET(flags, PROC_CMDLINE_IGNORE_EFI_OPTIONS)) { + r = systemd_efi_options_variable(&line); + if (r < 0) { + if (r != -ENODATA) + log_debug_errno(r, "Failed to get SystemdOptions EFI variable, ignoring: %m"); + } else { + r = proc_cmdline_parse_given(line, parse_item, data, flags); + if (r < 0) + return r; + + line = mfree(line); + } + } + + r = proc_cmdline(&line); + if (r < 0) + return r; + + return proc_cmdline_parse_given(line, parse_item, data, flags); +} + +static bool relaxed_equal_char(char a, char b) { + return a == b || + (a == '_' && b == '-') || + (a == '-' && b == '_'); +} + +char *proc_cmdline_key_startswith(const char *s, const char *prefix) { + assert(s); + assert(prefix); + + /* Much like startswith(), but considers "-" and "_" the same */ + + for (; *prefix != 0; s++, prefix++) + if (!relaxed_equal_char(*s, *prefix)) + return NULL; + + return (char*) s; +} + +bool proc_cmdline_key_streq(const char *x, const char *y) { + assert(x); + assert(y); + + /* Much like streq(), but considers "-" and "_" the same */ + + for (; *x != 0 || *y != 0; x++, y++) + if (!relaxed_equal_char(*x, *y)) + return false; + + return true; +} + +static int cmdline_get_key(const char *line, const char *key, ProcCmdlineFlags flags, char **ret_value) { + _cleanup_free_ char *ret = NULL; + bool found = false; + const char *p; + int r; + + assert(line); + assert(key); + + p = line; + for (;;) { + _cleanup_free_ char *word = NULL; + + r = proc_cmdline_extract_first(&p, &word, flags); + if (r < 0) + return r; + if (r == 0) + break; + + if (ret_value) { + const char *e; + + e = proc_cmdline_key_startswith(word, key); + if (!e) + continue; + + if (*e == '=') { + r = free_and_strdup(&ret, e+1); + if (r < 0) + return r; + + found = true; + + } else if (*e == 0 && FLAGS_SET(flags, PROC_CMDLINE_VALUE_OPTIONAL)) + found = true; + + } else { + if (streq(word, key)) { + found = true; + break; /* we found what we were looking for */ + } + } + } + + if (ret_value) + *ret_value = TAKE_PTR(ret); + + return found; +} + +int proc_cmdline_get_key(const char *key, ProcCmdlineFlags flags, char **ret_value) { + _cleanup_free_ char *line = NULL, *v = NULL; + int r; + + /* Looks for a specific key on the kernel command line and (with lower priority) the EFI variable. + * Supports three modes: + * + * a) The "ret_value" parameter is used. In this case a parameter beginning with the "key" string followed by + * "=" is searched for, and the value following it is returned in "ret_value". + * + * b) as above, but the PROC_CMDLINE_VALUE_OPTIONAL flag is set. In this case if the key is found as a separate + * word (i.e. not followed by "=" but instead by whitespace or the end of the command line), then this is + * also accepted, and "value" is returned as NULL. + * + * c) The "ret_value" parameter is NULL. In this case a search for the exact "key" parameter is performed. + * + * In all three cases, > 0 is returned if the key is found, 0 if not. */ + + if (isempty(key)) + return -EINVAL; + + if (FLAGS_SET(flags, PROC_CMDLINE_VALUE_OPTIONAL) && !ret_value) + return -EINVAL; + + r = proc_cmdline(&line); + if (r < 0) + return r; + + if (FLAGS_SET(flags, PROC_CMDLINE_IGNORE_EFI_OPTIONS)) /* Shortcut */ + return cmdline_get_key(line, key, flags, ret_value); + + r = cmdline_get_key(line, key, flags, ret_value ? &v : NULL); + if (r < 0) + return r; + if (r > 0) { + if (ret_value) + *ret_value = TAKE_PTR(v); + + return r; + } + + line = mfree(line); + r = systemd_efi_options_variable(&line); + if (r == -ENODATA) { + if (ret_value) + *ret_value = NULL; + + return false; /* Not found */ + } + if (r < 0) + return r; + + return cmdline_get_key(line, key, flags, ret_value); +} + +int proc_cmdline_get_bool(const char *key, bool *ret) { + _cleanup_free_ char *v = NULL; + int r; + + assert(ret); + + r = proc_cmdline_get_key(key, PROC_CMDLINE_VALUE_OPTIONAL, &v); + if (r < 0) + return r; + if (r == 0) { /* key not specified at all */ + *ret = false; + return 0; + } + + if (v) { /* key with parameter passed */ + r = parse_boolean(v); + if (r < 0) + return r; + *ret = r; + } else /* key without parameter passed */ + *ret = true; + + return 1; +} + +int proc_cmdline_get_key_many_internal(ProcCmdlineFlags flags, ...) { + _cleanup_free_ char *line = NULL; + bool processing_efi = true; + const char *p; + va_list ap; + int r, ret = 0; + + /* The PROC_CMDLINE_VALUE_OPTIONAL flag doesn't really make sense for proc_cmdline_get_key_many(), let's make + * this clear. */ + assert(!FLAGS_SET(flags, PROC_CMDLINE_VALUE_OPTIONAL)); + + /* This call may clobber arguments on failure! */ + + if (!FLAGS_SET(flags, PROC_CMDLINE_IGNORE_EFI_OPTIONS)) { + r = systemd_efi_options_variable(&line); + if (r < 0 && r != -ENODATA) + log_debug_errno(r, "Failed to get SystemdOptions EFI variable, ignoring: %m"); + } + + p = line; + for (;;) { + _cleanup_free_ char *word = NULL; + + r = proc_cmdline_extract_first(&p, &word, flags); + if (r < 0) + return r; + if (r == 0) { + /* We finished with this command line. If this was the EFI one, then let's proceed with the regular one */ + if (processing_efi) { + processing_efi = false; + + line = mfree(line); + r = proc_cmdline(&line); + if (r < 0) + return r; + + p = line; + continue; + } + + break; + } + + va_start(ap, flags); + + for (;;) { + char **v; + const char *k, *e; + + k = va_arg(ap, const char*); + if (!k) + break; + + assert_se(v = va_arg(ap, char**)); + + e = proc_cmdline_key_startswith(word, k); + if (e && *e == '=') { + r = free_and_strdup(v, e + 1); + if (r < 0) { + va_end(ap); + return r; + } + + ret++; + } + } + + va_end(ap); + } + + return ret; +} diff --git a/src/basic/proc-cmdline.h b/src/basic/proc-cmdline.h new file mode 100644 index 0000000..45f3a27 --- /dev/null +++ b/src/basic/proc-cmdline.h @@ -0,0 +1,39 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +#include + +#include "log.h" + +typedef enum ProcCmdlineFlags { + PROC_CMDLINE_STRIP_RD_PREFIX = 1 << 0, /* automatically strip "rd." prefix if it is set (and we are in the initrd, since otherwise we'd not consider it anyway) */ + PROC_CMDLINE_VALUE_OPTIONAL = 1 << 1, /* the value is optional (for boolean switches that can omit the value) */ + PROC_CMDLINE_RD_STRICT = 1 << 2, /* ignore this in the initrd */ + PROC_CMDLINE_IGNORE_EFI_OPTIONS = 1 << 3, /* don't check systemd's private EFI variable */ +} ProcCmdlineFlags; + +typedef int (*proc_cmdline_parse_t)(const char *key, const char *value, void *data); + +int proc_cmdline(char **ret); + +int proc_cmdline_parse_given(const char *line, proc_cmdline_parse_t parse_item, void *data, ProcCmdlineFlags flags); +int proc_cmdline_parse(const proc_cmdline_parse_t parse, void *userdata, ProcCmdlineFlags flags); + +int proc_cmdline_get_key(const char *parameter, ProcCmdlineFlags flags, char **value); +int proc_cmdline_get_bool(const char *key, bool *ret); + +int proc_cmdline_get_key_many_internal(ProcCmdlineFlags flags, ...); +#define proc_cmdline_get_key_many(flags, ...) proc_cmdline_get_key_many_internal(flags, __VA_ARGS__, NULL) + +char *proc_cmdline_key_startswith(const char *s, const char *prefix); +bool proc_cmdline_key_streq(const char *x, const char *y); + +/* A little helper call, to be used in proc_cmdline_parse_t callbacks */ +static inline bool proc_cmdline_value_missing(const char *key, const char *value) { + if (!value) { + log_warning("Missing argument for %s= kernel command line switch, ignoring.", key); + return true; + } + + return false; +} diff --git a/src/basic/process-util.c b/src/basic/process-util.c new file mode 100644 index 0000000..35246a9 --- /dev/null +++ b/src/basic/process-util.c @@ -0,0 +1,1691 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#if HAVE_VALGRIND_VALGRIND_H +#include +#endif + +#include "alloc-util.h" +#include "architecture.h" +#include "env-util.h" +#include "errno-util.h" +#include "escape.h" +#include "fd-util.h" +#include "fileio.h" +#include "fs-util.h" +#include "locale-util.h" +#include "log.h" +#include "macro.h" +#include "memory-util.h" +#include "missing_sched.h" +#include "missing_syscall.h" +#include "missing_threads.h" +#include "namespace-util.h" +#include "path-util.h" +#include "process-util.h" +#include "raw-clone.h" +#include "rlimit-util.h" +#include "signal-util.h" +#include "stat-util.h" +#include "stdio-util.h" +#include "string-table.h" +#include "string-util.h" +#include "terminal-util.h" +#include "user-util.h" +#include "utf8.h" + +/* The kernel limits userspace processes to TASK_COMM_LEN (16 bytes), but allows higher values for its own + * workers, e.g. "kworker/u9:3-kcryptd/253:0". Let's pick a fixed smallish limit that will work for the kernel. + */ +#define COMM_MAX_LEN 128 + +static int get_process_state(pid_t pid) { + _cleanup_free_ char *line = NULL; + const char *p; + char state; + int r; + + assert(pid >= 0); + + /* Shortcut: if we are enquired about our own state, we are obviously running */ + if (pid == 0 || pid == getpid_cached()) + return (unsigned char) 'R'; + + p = procfs_file_alloca(pid, "stat"); + + r = read_one_line_file(p, &line); + if (r == -ENOENT) + return -ESRCH; + if (r < 0) + return r; + + p = strrchr(line, ')'); + if (!p) + return -EIO; + + p++; + + if (sscanf(p, " %c", &state) != 1) + return -EIO; + + return (unsigned char) state; +} + +int get_process_comm(pid_t pid, char **ret) { + _cleanup_free_ char *escaped = NULL, *comm = NULL; + int r; + + assert(ret); + assert(pid >= 0); + + if (pid == 0 || pid == getpid_cached()) { + comm = new0(char, TASK_COMM_LEN + 1); /* Must fit in 16 byte according to prctl(2) */ + if (!comm) + return -ENOMEM; + + if (prctl(PR_GET_NAME, comm) < 0) + return -errno; + } else { + const char *p; + + p = procfs_file_alloca(pid, "comm"); + + /* Note that process names of kernel threads can be much longer than TASK_COMM_LEN */ + r = read_one_line_file(p, &comm); + if (r == -ENOENT) + return -ESRCH; + if (r < 0) + return r; + } + + escaped = new(char, COMM_MAX_LEN); + if (!escaped) + return -ENOMEM; + + /* Escape unprintable characters, just in case, but don't grow the string beyond the underlying size */ + cellescape(escaped, COMM_MAX_LEN, comm); + + *ret = TAKE_PTR(escaped); + return 0; +} + +static int get_process_cmdline_nulstr( + pid_t pid, + size_t max_size, + ProcessCmdlineFlags flags, + char **ret, + size_t *ret_size) { + + const char *p; + char *t; + size_t k; + int r; + + /* Retrieves a process' command line as a "sized nulstr", i.e. possibly without the last NUL, but + * with a specified size. + * + * If PROCESS_CMDLINE_COMM_FALLBACK is specified in flags and the process has no command line set + * (the case for kernel threads), or has a command line that resolves to the empty string, will + * return the "comm" name of the process instead. This will use at most _SC_ARG_MAX bytes of input + * data. + * + * Returns an error, 0 if output was read but is truncated, 1 otherwise. + */ + + p = procfs_file_alloca(pid, "cmdline"); + r = read_virtual_file(p, max_size, &t, &k); /* Let's assume that each input byte results in >= 1 + * columns of output. We ignore zero-width codepoints. */ + if (r == -ENOENT) + return -ESRCH; + if (r < 0) + return r; + + if (k == 0) { + t = mfree(t); + + if (!(flags & PROCESS_CMDLINE_COMM_FALLBACK)) + return -ENOENT; + + /* Kernel threads have no argv[] */ + _cleanup_free_ char *comm = NULL; + + r = get_process_comm(pid, &comm); + if (r < 0) + return r; + + t = strjoin("[", comm, "]"); + if (!t) + return -ENOMEM; + + k = strlen(t); + r = k <= max_size; + if (r == 0) /* truncation */ + t[max_size] = '\0'; + } + + *ret = t; + *ret_size = k; + return r; +} + +int get_process_cmdline(pid_t pid, size_t max_columns, ProcessCmdlineFlags flags, char **ret) { + _cleanup_free_ char *t = NULL; + size_t k; + char *ans; + + assert(pid >= 0); + assert(ret); + + /* Retrieve and format a commandline. See above for discussion of retrieval options. + * + * There are two main formatting modes: + * + * - when PROCESS_CMDLINE_QUOTE is specified, output is quoted in C/Python style. If no shell special + * characters are present, this output can be copy-pasted into the terminal to execute. UTF-8 + * output is assumed. + * + * - otherwise, a compact non-roundtrippable form is returned. Non-UTF8 bytes are replaced by �. The + * returned string is of the specified console width at most, abbreviated with an ellipsis. + * + * Returns -ESRCH if the process doesn't exist, and -ENOENT if the process has no command line (and + * PROCESS_CMDLINE_COMM_FALLBACK is not specified). Returns 0 and sets *line otherwise. */ + + int full = get_process_cmdline_nulstr(pid, max_columns, flags, &t, &k); + if (full < 0) + return full; + + if (flags & (PROCESS_CMDLINE_QUOTE | PROCESS_CMDLINE_QUOTE_POSIX)) { + ShellEscapeFlags shflags = SHELL_ESCAPE_EMPTY | + FLAGS_SET(flags, PROCESS_CMDLINE_QUOTE_POSIX) * SHELL_ESCAPE_POSIX; + + assert(!(flags & PROCESS_CMDLINE_USE_LOCALE)); + + _cleanup_strv_free_ char **args = NULL; + + args = strv_parse_nulstr(t, k); + if (!args) + return -ENOMEM; + + /* Drop trailing empty strings. See issue #21186. */ + STRV_FOREACH_BACKWARDS(p, args) { + if (!isempty(*p)) + break; + + *p = mfree(*p); + } + + ans = quote_command_line(args, shflags); + if (!ans) + return -ENOMEM; + } else { + /* Arguments are separated by NULs. Let's replace those with spaces. */ + for (size_t i = 0; i < k - 1; i++) + if (t[i] == '\0') + t[i] = ' '; + + delete_trailing_chars(t, WHITESPACE); + + bool eight_bit = (flags & PROCESS_CMDLINE_USE_LOCALE) && !is_locale_utf8(); + + ans = escape_non_printable_full(t, max_columns, + eight_bit * XESCAPE_8_BIT | !full * XESCAPE_FORCE_ELLIPSIS); + if (!ans) + return -ENOMEM; + + ans = str_realloc(ans); + } + + *ret = ans; + return 0; +} + +static int update_argv(const char name[], size_t l) { + static int can_do = -1; + + if (can_do == 0) + return 0; + can_do = false; /* We'll set it to true only if the whole process works */ + + /* Let's not bother with this if we don't have euid == 0. Strictly speaking we should check for the + * CAP_SYS_RESOURCE capability which is independent of the euid. In our own code the capability generally is + * present only for euid == 0, hence let's use this as quick bypass check, to avoid calling mmap() if + * PR_SET_MM_ARG_{START,END} fails with EPERM later on anyway. After all geteuid() is dead cheap to call, but + * mmap() is not. */ + if (geteuid() != 0) + return log_debug_errno(SYNTHETIC_ERRNO(EPERM), + "Skipping PR_SET_MM, as we don't have privileges."); + + static size_t mm_size = 0; + static char *mm = NULL; + int r; + + if (mm_size < l+1) { + size_t nn_size; + char *nn; + + nn_size = PAGE_ALIGN(l+1); + nn = mmap(NULL, nn_size, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0); + if (nn == MAP_FAILED) + return log_debug_errno(errno, "mmap() failed: %m"); + + strncpy(nn, name, nn_size); + + /* Now, let's tell the kernel about this new memory */ + if (prctl(PR_SET_MM, PR_SET_MM_ARG_START, (unsigned long) nn, 0, 0) < 0) { + if (ERRNO_IS_PRIVILEGE(errno)) + return log_debug_errno(errno, "PR_SET_MM_ARG_START failed: %m"); + + /* HACK: prctl() API is kind of dumb on this point. The existing end address may already be + * below the desired start address, in which case the kernel may have kicked this back due + * to a range-check failure (see linux/kernel/sys.c:validate_prctl_map() to see this in + * action). The proper solution would be to have a prctl() API that could set both start+end + * simultaneously, or at least let us query the existing address to anticipate this condition + * and respond accordingly. For now, we can only guess at the cause of this failure and try + * a workaround--which will briefly expand the arg space to something potentially huge before + * resizing it to what we want. */ + log_debug_errno(errno, "PR_SET_MM_ARG_START failed, attempting PR_SET_MM_ARG_END hack: %m"); + + if (prctl(PR_SET_MM, PR_SET_MM_ARG_END, (unsigned long) nn + l + 1, 0, 0) < 0) { + r = log_debug_errno(errno, "PR_SET_MM_ARG_END hack failed, proceeding without: %m"); + (void) munmap(nn, nn_size); + return r; + } + + if (prctl(PR_SET_MM, PR_SET_MM_ARG_START, (unsigned long) nn, 0, 0) < 0) + return log_debug_errno(errno, "PR_SET_MM_ARG_START still failed, proceeding without: %m"); + } else { + /* And update the end pointer to the new end, too. If this fails, we don't really know what + * to do, it's pretty unlikely that we can rollback, hence we'll just accept the failure, + * and continue. */ + if (prctl(PR_SET_MM, PR_SET_MM_ARG_END, (unsigned long) nn + l + 1, 0, 0) < 0) + log_debug_errno(errno, "PR_SET_MM_ARG_END failed, proceeding without: %m"); + } + + if (mm) + (void) munmap(mm, mm_size); + + mm = nn; + mm_size = nn_size; + } else { + strncpy(mm, name, mm_size); + + /* Update the end pointer, continuing regardless of any failure. */ + if (prctl(PR_SET_MM, PR_SET_MM_ARG_END, (unsigned long) mm + l + 1, 0, 0) < 0) + log_debug_errno(errno, "PR_SET_MM_ARG_END failed, proceeding without: %m"); + } + + can_do = true; + return 0; +} + +int rename_process(const char name[]) { + bool truncated = false; + + /* This is a like a poor man's setproctitle(). It changes the comm field, argv[0], and also the glibc's + * internally used name of the process. For the first one a limit of 16 chars applies; to the second one in + * many cases one of 10 (i.e. length of "/sbin/init") — however if we have CAP_SYS_RESOURCES it is unbounded; + * to the third one 7 (i.e. the length of "systemd". If you pass a longer string it will likely be + * truncated. + * + * Returns 0 if a name was set but truncated, > 0 if it was set but not truncated. */ + + if (isempty(name)) + return -EINVAL; /* let's not confuse users unnecessarily with an empty name */ + + if (!is_main_thread()) + return -EPERM; /* Let's not allow setting the process name from other threads than the main one, as we + * cache things without locking, and we make assumptions that PR_SET_NAME sets the + * process name that isn't correct on any other threads */ + + size_t l = strlen(name); + + /* First step, change the comm field. The main thread's comm is identical to the process comm. This means we + * can use PR_SET_NAME, which sets the thread name for the calling thread. */ + if (prctl(PR_SET_NAME, name) < 0) + log_debug_errno(errno, "PR_SET_NAME failed: %m"); + if (l >= TASK_COMM_LEN) /* Linux userspace process names can be 15 chars at max */ + truncated = true; + + /* Second step, change glibc's ID of the process name. */ + if (program_invocation_name) { + size_t k; + + k = strlen(program_invocation_name); + strncpy(program_invocation_name, name, k); + if (l > k) + truncated = true; + + /* Also update the short name. */ + char *p = strrchr(program_invocation_name, '/'); + program_invocation_short_name = p ? p + 1 : program_invocation_name; + } + + /* Third step, completely replace the argv[] array the kernel maintains for us. This requires privileges, but + * has the advantage that the argv[] array is exactly what we want it to be, and not filled up with zeros at + * the end. This is the best option for changing /proc/self/cmdline. */ + (void) update_argv(name, l); + + /* Fourth step: in all cases we'll also update the original argv[], so that our own code gets it right too if + * it still looks here */ + if (saved_argc > 0) { + if (saved_argv[0]) { + size_t k; + + k = strlen(saved_argv[0]); + strncpy(saved_argv[0], name, k); + if (l > k) + truncated = true; + } + + for (int i = 1; i < saved_argc; i++) { + if (!saved_argv[i]) + break; + + memzero(saved_argv[i], strlen(saved_argv[i])); + } + } + + return !truncated; +} + +int is_kernel_thread(pid_t pid) { + _cleanup_free_ char *line = NULL; + unsigned long long flags; + size_t l, i; + const char *p; + char *q; + int r; + + if (IN_SET(pid, 0, 1) || pid == getpid_cached()) /* pid 1, and we ourselves certainly aren't a kernel thread */ + return 0; + if (!pid_is_valid(pid)) + return -EINVAL; + + p = procfs_file_alloca(pid, "stat"); + r = read_one_line_file(p, &line); + if (r == -ENOENT) + return -ESRCH; + if (r < 0) + return r; + + /* Skip past the comm field */ + q = strrchr(line, ')'); + if (!q) + return -EINVAL; + q++; + + /* Skip 6 fields to reach the flags field */ + for (i = 0; i < 6; i++) { + l = strspn(q, WHITESPACE); + if (l < 1) + return -EINVAL; + q += l; + + l = strcspn(q, WHITESPACE); + if (l < 1) + return -EINVAL; + q += l; + } + + /* Skip preceding whitespace */ + l = strspn(q, WHITESPACE); + if (l < 1) + return -EINVAL; + q += l; + + /* Truncate the rest */ + l = strcspn(q, WHITESPACE); + if (l < 1) + return -EINVAL; + q[l] = 0; + + r = safe_atollu(q, &flags); + if (r < 0) + return r; + + return !!(flags & PF_KTHREAD); +} + +int get_process_capeff(pid_t pid, char **ret) { + const char *p; + int r; + + assert(pid >= 0); + assert(ret); + + p = procfs_file_alloca(pid, "status"); + + r = get_proc_field(p, "CapEff", WHITESPACE, ret); + if (r == -ENOENT) + return -ESRCH; + + return r; +} + +static int get_process_link_contents(pid_t pid, const char *proc_file, char **ret) { + const char *p; + int r; + + assert(proc_file); + + p = procfs_file_alloca(pid, proc_file); + + r = readlink_malloc(p, ret); + return r == -ENOENT ? -ESRCH : r; +} + +int get_process_exe(pid_t pid, char **ret) { + char *d; + int r; + + assert(pid >= 0); + + r = get_process_link_contents(pid, "exe", ret); + if (r < 0) + return r; + + if (ret) { + d = endswith(*ret, " (deleted)"); + if (d) + *d = '\0'; + } + + return 0; +} + +static int get_process_id(pid_t pid, const char *field, uid_t *ret) { + _cleanup_fclose_ FILE *f = NULL; + const char *p; + int r; + + assert(field); + assert(ret); + + if (pid < 0) + return -EINVAL; + + p = procfs_file_alloca(pid, "status"); + r = fopen_unlocked(p, "re", &f); + if (r == -ENOENT) + return -ESRCH; + if (r < 0) + return r; + + for (;;) { + _cleanup_free_ char *line = NULL; + char *l; + + r = read_line(f, LONG_LINE_MAX, &line); + if (r < 0) + return r; + if (r == 0) + break; + + l = strstrip(line); + + if (startswith(l, field)) { + l += strlen(field); + l += strspn(l, WHITESPACE); + + l[strcspn(l, WHITESPACE)] = 0; + + return parse_uid(l, ret); + } + } + + return -EIO; +} + +int get_process_uid(pid_t pid, uid_t *ret) { + + if (pid == 0 || pid == getpid_cached()) { + *ret = getuid(); + return 0; + } + + return get_process_id(pid, "Uid:", ret); +} + +int get_process_gid(pid_t pid, gid_t *ret) { + + if (pid == 0 || pid == getpid_cached()) { + *ret = getgid(); + return 0; + } + + assert_cc(sizeof(uid_t) == sizeof(gid_t)); + return get_process_id(pid, "Gid:", ret); +} + +int get_process_cwd(pid_t pid, char **ret) { + assert(pid >= 0); + + if (pid == 0 || pid == getpid_cached()) + return safe_getcwd(ret); + + return get_process_link_contents(pid, "cwd", ret); +} + +int get_process_root(pid_t pid, char **ret) { + assert(pid >= 0); + return get_process_link_contents(pid, "root", ret); +} + +#define ENVIRONMENT_BLOCK_MAX (5U*1024U*1024U) + +int get_process_environ(pid_t pid, char **ret) { + _cleanup_fclose_ FILE *f = NULL; + _cleanup_free_ char *outcome = NULL; + size_t sz = 0; + const char *p; + int r; + + assert(pid >= 0); + assert(ret); + + p = procfs_file_alloca(pid, "environ"); + + r = fopen_unlocked(p, "re", &f); + if (r == -ENOENT) + return -ESRCH; + if (r < 0) + return r; + + for (;;) { + char c; + + if (sz >= ENVIRONMENT_BLOCK_MAX) + return -ENOBUFS; + + if (!GREEDY_REALLOC(outcome, sz + 5)) + return -ENOMEM; + + r = safe_fgetc(f, &c); + if (r < 0) + return r; + if (r == 0) + break; + + if (c == '\0') + outcome[sz++] = '\n'; + else + sz += cescape_char(c, outcome + sz); + } + + outcome[sz] = '\0'; + *ret = TAKE_PTR(outcome); + + return 0; +} + +int get_process_ppid(pid_t pid, pid_t *ret) { + _cleanup_free_ char *line = NULL; + unsigned long ppid; + const char *p; + int r; + + assert(pid >= 0); + + if (pid == 0 || pid == getpid_cached()) { + if (ret) + *ret = getppid(); + return 0; + } + + if (pid == 1) /* PID 1 has no parent, shortcut this case */ + return -EADDRNOTAVAIL; + + p = procfs_file_alloca(pid, "stat"); + r = read_one_line_file(p, &line); + if (r == -ENOENT) + return -ESRCH; + if (r < 0) + return r; + + /* Let's skip the pid and comm fields. The latter is enclosed in () but does not escape any () in its + * value, so let's skip over it manually */ + + p = strrchr(line, ')'); + if (!p) + return -EIO; + + p++; + + if (sscanf(p, " " + "%*c " /* state */ + "%lu ", /* ppid */ + &ppid) != 1) + return -EIO; + + /* If ppid is zero the process has no parent. Which might be the case for PID 1 but also for + * processes originating in other namespaces that are inserted into a pidns. Return a recognizable + * error in this case. */ + if (ppid == 0) + return -EADDRNOTAVAIL; + + if ((pid_t) ppid < 0 || (unsigned long) (pid_t) ppid != ppid) + return -ERANGE; + + if (ret) + *ret = (pid_t) ppid; + + return 0; +} + +int get_process_umask(pid_t pid, mode_t *ret) { + _cleanup_free_ char *m = NULL; + const char *p; + int r; + + assert(pid >= 0); + assert(ret); + + p = procfs_file_alloca(pid, "status"); + + r = get_proc_field(p, "Umask", WHITESPACE, &m); + if (r == -ENOENT) + return -ESRCH; + + return parse_mode(m, ret); +} + +int wait_for_terminate(pid_t pid, siginfo_t *status) { + siginfo_t dummy; + + assert(pid >= 1); + + if (!status) + status = &dummy; + + for (;;) { + zero(*status); + + if (waitid(P_PID, pid, status, WEXITED) < 0) { + + if (errno == EINTR) + continue; + + return negative_errno(); + } + + return 0; + } +} + +/* + * Return values: + * < 0 : wait_for_terminate() failed to get the state of the + * process, the process was terminated by a signal, or + * failed for an unknown reason. + * >=0 : The process terminated normally, and its exit code is + * returned. + * + * That is, success is indicated by a return value of zero, and an + * error is indicated by a non-zero value. + * + * A warning is emitted if the process terminates abnormally, + * and also if it returns non-zero unless check_exit_code is true. + */ +int wait_for_terminate_and_check(const char *name, pid_t pid, WaitFlags flags) { + _cleanup_free_ char *buffer = NULL; + siginfo_t status; + int r, prio; + + assert(pid > 1); + + if (!name) { + r = get_process_comm(pid, &buffer); + if (r < 0) + log_debug_errno(r, "Failed to acquire process name of " PID_FMT ", ignoring: %m", pid); + else + name = buffer; + } + + prio = flags & WAIT_LOG_ABNORMAL ? LOG_ERR : LOG_DEBUG; + + r = wait_for_terminate(pid, &status); + if (r < 0) + return log_full_errno(prio, r, "Failed to wait for %s: %m", strna(name)); + + if (status.si_code == CLD_EXITED) { + if (status.si_status != EXIT_SUCCESS) + log_full(flags & WAIT_LOG_NON_ZERO_EXIT_STATUS ? LOG_ERR : LOG_DEBUG, + "%s failed with exit status %i.", strna(name), status.si_status); + else + log_debug("%s succeeded.", name); + + return status.si_status; + + } else if (IN_SET(status.si_code, CLD_KILLED, CLD_DUMPED)) { + + log_full(prio, "%s terminated by signal %s.", strna(name), signal_to_string(status.si_status)); + return -EPROTO; + } + + log_full(prio, "%s failed due to unknown reason.", strna(name)); + return -EPROTO; +} + +/* + * Return values: + * + * < 0 : wait_for_terminate_with_timeout() failed to get the state of the process, the process timed out, the process + * was terminated by a signal, or failed for an unknown reason. + * + * >=0 : The process terminated normally with no failures. + * + * Success is indicated by a return value of zero, a timeout is indicated by ETIMEDOUT, and all other child failure + * states are indicated by error is indicated by a non-zero value. + * + * This call assumes SIGCHLD has been blocked already, in particular before the child to wait for has been forked off + * to remain entirely race-free. + */ +int wait_for_terminate_with_timeout(pid_t pid, usec_t timeout) { + sigset_t mask; + int r; + usec_t until; + + assert_se(sigemptyset(&mask) == 0); + assert_se(sigaddset(&mask, SIGCHLD) == 0); + + /* Drop into a sigtimewait-based timeout. Waiting for the + * pid to exit. */ + until = usec_add(now(CLOCK_MONOTONIC), timeout); + for (;;) { + usec_t n; + siginfo_t status = {}; + + n = now(CLOCK_MONOTONIC); + if (n >= until) + break; + + r = RET_NERRNO(sigtimedwait(&mask, NULL, TIMESPEC_STORE(until - n))); + /* Assuming we woke due to the child exiting. */ + if (waitid(P_PID, pid, &status, WEXITED|WNOHANG) == 0) { + if (status.si_pid == pid) { + /* This is the correct child. */ + if (status.si_code == CLD_EXITED) + return status.si_status == 0 ? 0 : -EPROTO; + else + return -EPROTO; + } + } + /* Not the child, check for errors and proceed appropriately */ + if (r < 0) { + switch (r) { + case -EAGAIN: + /* Timed out, child is likely hung. */ + return -ETIMEDOUT; + case -EINTR: + /* Received a different signal and should retry */ + continue; + default: + /* Return any unexpected errors */ + return r; + } + } + } + + return -EPROTO; +} + +void sigkill_wait(pid_t pid) { + assert(pid > 1); + + (void) kill(pid, SIGKILL); + (void) wait_for_terminate(pid, NULL); +} + +void sigkill_waitp(pid_t *pid) { + PROTECT_ERRNO; + + if (!pid) + return; + if (*pid <= 1) + return; + + sigkill_wait(*pid); +} + +void sigterm_wait(pid_t pid) { + assert(pid > 1); + + (void) kill_and_sigcont(pid, SIGTERM); + (void) wait_for_terminate(pid, NULL); +} + +int kill_and_sigcont(pid_t pid, int sig) { + int r; + + r = RET_NERRNO(kill(pid, sig)); + + /* If this worked, also send SIGCONT, unless we already just sent a SIGCONT, or SIGKILL was sent which isn't + * affected by a process being suspended anyway. */ + if (r >= 0 && !IN_SET(sig, SIGCONT, SIGKILL)) + (void) kill(pid, SIGCONT); + + return r; +} + +int getenv_for_pid(pid_t pid, const char *field, char **ret) { + _cleanup_fclose_ FILE *f = NULL; + char *value = NULL; + const char *path; + size_t l, sum = 0; + int r; + + assert(pid >= 0); + assert(field); + assert(ret); + + if (pid == 0 || pid == getpid_cached()) { + const char *e; + + e = getenv(field); + if (!e) { + *ret = NULL; + return 0; + } + + value = strdup(e); + if (!value) + return -ENOMEM; + + *ret = value; + return 1; + } + + if (!pid_is_valid(pid)) + return -EINVAL; + + path = procfs_file_alloca(pid, "environ"); + + r = fopen_unlocked(path, "re", &f); + if (r == -ENOENT) + return -ESRCH; + if (r < 0) + return r; + + l = strlen(field); + for (;;) { + _cleanup_free_ char *line = NULL; + + if (sum > ENVIRONMENT_BLOCK_MAX) /* Give up searching eventually */ + return -ENOBUFS; + + r = read_nul_string(f, LONG_LINE_MAX, &line); + if (r < 0) + return r; + if (r == 0) /* EOF */ + break; + + sum += r; + + if (strneq(line, field, l) && line[l] == '=') { + value = strdup(line + l + 1); + if (!value) + return -ENOMEM; + + *ret = value; + return 1; + } + } + + *ret = NULL; + return 0; +} + +int pid_is_my_child(pid_t pid) { + pid_t ppid; + int r; + + if (pid <= 1) + return false; + + r = get_process_ppid(pid, &ppid); + if (r < 0) + return r; + + return ppid == getpid_cached(); +} + +bool pid_is_unwaited(pid_t pid) { + /* Checks whether a PID is still valid at all, including a zombie */ + + if (pid < 0) + return false; + + if (pid <= 1) /* If we or PID 1 would be dead and have been waited for, this code would not be running */ + return true; + + if (pid == getpid_cached()) + return true; + + if (kill(pid, 0) >= 0) + return true; + + return errno != ESRCH; +} + +bool pid_is_alive(pid_t pid) { + int r; + + /* Checks whether a PID is still valid and not a zombie */ + + if (pid < 0) + return false; + + if (pid <= 1) /* If we or PID 1 would be a zombie, this code would not be running */ + return true; + + if (pid == getpid_cached()) + return true; + + r = get_process_state(pid); + if (IN_SET(r, -ESRCH, 'Z')) + return false; + + return true; +} + +int pid_from_same_root_fs(pid_t pid) { + const char *root; + + if (pid < 0) + return false; + + if (pid == 0 || pid == getpid_cached()) + return true; + + root = procfs_file_alloca(pid, "root"); + + return files_same(root, "/proc/1/root", 0); +} + +bool is_main_thread(void) { + static thread_local int cached = 0; + + if (_unlikely_(cached == 0)) + cached = getpid_cached() == gettid() ? 1 : -1; + + return cached > 0; +} + +bool oom_score_adjust_is_valid(int oa) { + return oa >= OOM_SCORE_ADJ_MIN && oa <= OOM_SCORE_ADJ_MAX; +} + +unsigned long personality_from_string(const char *p) { + Architecture architecture; + + if (!p) + return PERSONALITY_INVALID; + + /* Parse a personality specifier. We use our own identifiers that indicate specific ABIs, rather than just + * hints regarding the register size, since we want to keep things open for multiple locally supported ABIs for + * the same register size. */ + + architecture = architecture_from_string(p); + if (architecture < 0) + return PERSONALITY_INVALID; + + if (architecture == native_architecture()) + return PER_LINUX; +#ifdef ARCHITECTURE_SECONDARY + if (architecture == ARCHITECTURE_SECONDARY) + return PER_LINUX32; +#endif + + return PERSONALITY_INVALID; +} + +const char* personality_to_string(unsigned long p) { + Architecture architecture = _ARCHITECTURE_INVALID; + + if (p == PER_LINUX) + architecture = native_architecture(); +#ifdef ARCHITECTURE_SECONDARY + else if (p == PER_LINUX32) + architecture = ARCHITECTURE_SECONDARY; +#endif + + if (architecture < 0) + return NULL; + + return architecture_to_string(architecture); +} + +int safe_personality(unsigned long p) { + int ret; + + /* So here's the deal, personality() is weirdly defined by glibc. In some cases it returns a failure via errno, + * and in others as negative return value containing an errno-like value. Let's work around this: this is a + * wrapper that uses errno if it is set, and uses the return value otherwise. And then it sets both errno and + * the return value indicating the same issue, so that we are definitely on the safe side. + * + * See https://github.com/systemd/systemd/issues/6737 */ + + errno = 0; + ret = personality(p); + if (ret < 0) { + if (errno != 0) + return -errno; + + errno = -ret; + } + + return ret; +} + +int opinionated_personality(unsigned long *ret) { + int current; + + /* Returns the current personality, or PERSONALITY_INVALID if we can't determine it. This function is a bit + * opinionated though, and ignores all the finer-grained bits and exotic personalities, only distinguishing the + * two most relevant personalities: PER_LINUX and PER_LINUX32. */ + + current = safe_personality(PERSONALITY_INVALID); + if (current < 0) + return current; + + if (((unsigned long) current & 0xffff) == PER_LINUX32) + *ret = PER_LINUX32; + else + *ret = PER_LINUX; + + return 0; +} + +void valgrind_summary_hack(void) { +#if HAVE_VALGRIND_VALGRIND_H + if (getpid_cached() == 1 && RUNNING_ON_VALGRIND) { + pid_t pid; + pid = raw_clone(SIGCHLD); + if (pid < 0) + log_emergency_errno(errno, "Failed to fork off valgrind helper: %m"); + else if (pid == 0) + exit(EXIT_SUCCESS); + else { + log_info("Spawned valgrind helper as PID "PID_FMT".", pid); + (void) wait_for_terminate(pid, NULL); + } + } +#endif +} + +int pid_compare_func(const pid_t *a, const pid_t *b) { + /* Suitable for usage in qsort() */ + return CMP(*a, *b); +} + +/* The cached PID, possible values: + * + * == UNSET [0] → cache not initialized yet + * == BUSY [-1] → some thread is initializing it at the moment + * any other → the cached PID + */ + +#define CACHED_PID_UNSET ((pid_t) 0) +#define CACHED_PID_BUSY ((pid_t) -1) + +static pid_t cached_pid = CACHED_PID_UNSET; + +void reset_cached_pid(void) { + /* Invoked in the child after a fork(), i.e. at the first moment the PID changed */ + cached_pid = CACHED_PID_UNSET; +} + +pid_t getpid_cached(void) { + static bool installed = false; + pid_t current_value = CACHED_PID_UNSET; + + /* getpid_cached() is much like getpid(), but caches the value in local memory, to avoid having to invoke a + * system call each time. This restores glibc behaviour from before 2.24, when getpid() was unconditionally + * cached. Starting with 2.24 getpid() started to become prohibitively expensive when used for detecting when + * objects were used across fork()s. With this caching the old behaviour is somewhat restored. + * + * https://bugzilla.redhat.com/show_bug.cgi?id=1443976 + * https://sourceware.org/git/gitweb.cgi?p=glibc.git;h=c579f48edba88380635ab98cb612030e3ed8691e + */ + + __atomic_compare_exchange_n( + &cached_pid, + ¤t_value, + CACHED_PID_BUSY, + false, + __ATOMIC_SEQ_CST, + __ATOMIC_SEQ_CST); + + switch (current_value) { + + case CACHED_PID_UNSET: { /* Not initialized yet, then do so now */ + pid_t new_pid; + + new_pid = raw_getpid(); + + if (!installed) { + /* __register_atfork() either returns 0 or -ENOMEM, in its glibc implementation. Since it's + * only half-documented (glibc doesn't document it but LSB does — though only superficially) + * we'll check for errors only in the most generic fashion possible. */ + + if (pthread_atfork(NULL, NULL, reset_cached_pid) != 0) { + /* OOM? Let's try again later */ + cached_pid = CACHED_PID_UNSET; + return new_pid; + } + + installed = true; + } + + cached_pid = new_pid; + return new_pid; + } + + case CACHED_PID_BUSY: /* Somebody else is currently initializing */ + return raw_getpid(); + + default: /* Properly initialized */ + return current_value; + } +} + +int must_be_root(void) { + + if (geteuid() == 0) + return 0; + + return log_error_errno(SYNTHETIC_ERRNO(EPERM), "Need to be root."); +} + +static void restore_sigsetp(sigset_t **ssp) { + if (*ssp) + (void) sigprocmask(SIG_SETMASK, *ssp, NULL); +} + +int safe_fork_full( + const char *name, + const int except_fds[], + size_t n_except_fds, + ForkFlags flags, + pid_t *ret_pid) { + + pid_t original_pid, pid; + sigset_t saved_ss, ss; + _unused_ _cleanup_(restore_sigsetp) sigset_t *saved_ssp = NULL; + bool block_signals = false, block_all = false; + int prio, r; + + /* A wrapper around fork(), that does a couple of important initializations in addition to mere forking. Always + * returns the child's PID in *ret_pid. Returns == 0 in the child, and > 0 in the parent. */ + + prio = flags & FORK_LOG ? LOG_ERR : LOG_DEBUG; + + original_pid = getpid_cached(); + + if (flags & FORK_FLUSH_STDIO) { + fflush(stdout); + fflush(stderr); /* This one shouldn't be necessary, stderr should be unbuffered anyway, but let's better be safe than sorry */ + } + + if (flags & (FORK_RESET_SIGNALS|FORK_DEATHSIG)) { + /* We temporarily block all signals, so that the new child has them blocked initially. This way, we can + * be sure that SIGTERMs are not lost we might send to the child. */ + + assert_se(sigfillset(&ss) >= 0); + block_signals = block_all = true; + + } else if (flags & FORK_WAIT) { + /* Let's block SIGCHLD at least, so that we can safely watch for the child process */ + + assert_se(sigemptyset(&ss) >= 0); + assert_se(sigaddset(&ss, SIGCHLD) >= 0); + block_signals = true; + } + + if (block_signals) { + if (sigprocmask(SIG_SETMASK, &ss, &saved_ss) < 0) + return log_full_errno(prio, errno, "Failed to set signal mask: %m"); + saved_ssp = &saved_ss; + } + + if ((flags & (FORK_NEW_MOUNTNS|FORK_NEW_USERNS)) != 0) + pid = raw_clone(SIGCHLD| + (FLAGS_SET(flags, FORK_NEW_MOUNTNS) ? CLONE_NEWNS : 0) | + (FLAGS_SET(flags, FORK_NEW_USERNS) ? CLONE_NEWUSER : 0)); + else + pid = fork(); + if (pid < 0) + return log_full_errno(prio, errno, "Failed to fork off '%s': %m", strna(name)); + if (pid > 0) { + /* We are in the parent process */ + + log_debug("Successfully forked off '%s' as PID " PID_FMT ".", strna(name), pid); + + if (flags & FORK_WAIT) { + if (block_all) { + /* undo everything except SIGCHLD */ + ss = saved_ss; + assert_se(sigaddset(&ss, SIGCHLD) >= 0); + (void) sigprocmask(SIG_SETMASK, &ss, NULL); + } + + r = wait_for_terminate_and_check(name, pid, (flags & FORK_LOG ? WAIT_LOG : 0)); + if (r < 0) + return r; + if (r != EXIT_SUCCESS) /* exit status > 0 should be treated as failure, too */ + return -EPROTO; + } + + if (ret_pid) + *ret_pid = pid; + + return 1; + } + + /* We are in the child process */ + + /* Restore signal mask manually */ + saved_ssp = NULL; + + if (flags & FORK_REOPEN_LOG) { + /* Close the logs if requested, before we log anything. And make sure we reopen it if needed. */ + log_close(); + log_set_open_when_needed(true); + log_settle_target(); + } + + if (name) { + r = rename_process(name); + if (r < 0) + log_full_errno(flags & FORK_LOG ? LOG_WARNING : LOG_DEBUG, + r, "Failed to rename process, ignoring: %m"); + } + + if (flags & (FORK_DEATHSIG|FORK_DEATHSIG_SIGINT)) + if (prctl(PR_SET_PDEATHSIG, (flags & FORK_DEATHSIG_SIGINT) ? SIGINT : SIGTERM) < 0) { + log_full_errno(prio, errno, "Failed to set death signal: %m"); + _exit(EXIT_FAILURE); + } + + if (flags & FORK_RESET_SIGNALS) { + r = reset_all_signal_handlers(); + if (r < 0) { + log_full_errno(prio, r, "Failed to reset signal handlers: %m"); + _exit(EXIT_FAILURE); + } + + /* This implicitly undoes the signal mask stuff we did before the fork()ing above */ + r = reset_signal_mask(); + if (r < 0) { + log_full_errno(prio, r, "Failed to reset signal mask: %m"); + _exit(EXIT_FAILURE); + } + } else if (block_signals) { /* undo what we did above */ + if (sigprocmask(SIG_SETMASK, &saved_ss, NULL) < 0) { + log_full_errno(prio, errno, "Failed to restore signal mask: %m"); + _exit(EXIT_FAILURE); + } + } + + if (flags & FORK_DEATHSIG) { + pid_t ppid; + /* Let's see if the parent PID is still the one we started from? If not, then the parent + * already died by the time we set PR_SET_PDEATHSIG, hence let's emulate the effect */ + + ppid = getppid(); + if (ppid == 0) + /* Parent is in a different PID namespace. */; + else if (ppid != original_pid) { + log_debug("Parent died early, raising SIGTERM."); + (void) raise(SIGTERM); + _exit(EXIT_FAILURE); + } + } + + if (FLAGS_SET(flags, FORK_NEW_MOUNTNS | FORK_MOUNTNS_SLAVE)) { + + /* Optionally, make sure we never propagate mounts to the host. */ + + if (mount(NULL, "/", NULL, MS_SLAVE | MS_REC, NULL) < 0) { + log_full_errno(prio, errno, "Failed to remount root directory as MS_SLAVE: %m"); + _exit(EXIT_FAILURE); + } + } + + if (flags & FORK_CLOSE_ALL_FDS) { + /* Close the logs here in case it got reopened above, as close_all_fds() would close them for us */ + log_close(); + + r = close_all_fds(except_fds, n_except_fds); + if (r < 0) { + log_full_errno(prio, r, "Failed to close all file descriptors: %m"); + _exit(EXIT_FAILURE); + } + } + + if (flags & FORK_CLOEXEC_OFF) { + r = fd_cloexec_many(except_fds, n_except_fds, false); + if (r < 0) { + log_full_errno(prio, r, "Failed to turn off O_CLOEXEC on file descriptors: %m"); + _exit(EXIT_FAILURE); + } + } + + /* When we were asked to reopen the logs, do so again now */ + if (flags & FORK_REOPEN_LOG) { + log_open(); + log_set_open_when_needed(false); + } + + if (flags & FORK_NULL_STDIO) { + r = make_null_stdio(); + if (r < 0) { + log_full_errno(prio, r, "Failed to connect stdin/stdout to /dev/null: %m"); + _exit(EXIT_FAILURE); + } + + } else if (flags & FORK_STDOUT_TO_STDERR) { + if (dup2(STDERR_FILENO, STDOUT_FILENO) < 0) { + log_full_errno(prio, errno, "Failed to connect stdout to stderr: %m"); + _exit(EXIT_FAILURE); + } + } + + if (flags & FORK_RLIMIT_NOFILE_SAFE) { + r = rlimit_nofile_safe(); + if (r < 0) { + log_full_errno(prio, r, "Failed to lower RLIMIT_NOFILE's soft limit to 1K: %m"); + _exit(EXIT_FAILURE); + } + } + + if (ret_pid) + *ret_pid = getpid_cached(); + + return 0; +} + +int namespace_fork( + const char *outer_name, + const char *inner_name, + const int except_fds[], + size_t n_except_fds, + ForkFlags flags, + int pidns_fd, + int mntns_fd, + int netns_fd, + int userns_fd, + int root_fd, + pid_t *ret_pid) { + + int r; + + /* This is much like safe_fork(), but forks twice, and joins the specified namespaces in the middle + * process. This ensures that we are fully a member of the destination namespace, with pidns an all, so that + * /proc/self/fd works correctly. */ + + r = safe_fork_full(outer_name, except_fds, n_except_fds, (flags|FORK_DEATHSIG) & ~(FORK_REOPEN_LOG|FORK_NEW_MOUNTNS|FORK_MOUNTNS_SLAVE), ret_pid); + if (r < 0) + return r; + if (r == 0) { + pid_t pid; + + /* Child */ + + r = namespace_enter(pidns_fd, mntns_fd, netns_fd, userns_fd, root_fd); + if (r < 0) { + log_full_errno(FLAGS_SET(flags, FORK_LOG) ? LOG_ERR : LOG_DEBUG, r, "Failed to join namespace: %m"); + _exit(EXIT_FAILURE); + } + + /* We mask a few flags here that either make no sense for the grandchild, or that we don't have to do again */ + r = safe_fork_full(inner_name, except_fds, n_except_fds, flags & ~(FORK_WAIT|FORK_RESET_SIGNALS|FORK_CLOSE_ALL_FDS|FORK_NULL_STDIO), &pid); + if (r < 0) + _exit(EXIT_FAILURE); + if (r == 0) { + /* Child */ + if (ret_pid) + *ret_pid = pid; + return 0; + } + + r = wait_for_terminate_and_check(inner_name, pid, FLAGS_SET(flags, FORK_LOG) ? WAIT_LOG : 0); + if (r < 0) + _exit(EXIT_FAILURE); + + _exit(r); + } + + return 1; +} + +int set_oom_score_adjust(int value) { + char t[DECIMAL_STR_MAX(int)]; + + xsprintf(t, "%i", value); + + return write_string_file("/proc/self/oom_score_adj", t, + WRITE_STRING_FILE_VERIFY_ON_FAILURE|WRITE_STRING_FILE_DISABLE_BUFFER); +} + +int get_oom_score_adjust(int *ret) { + _cleanup_free_ char *t = NULL; + int r, a; + + r = read_virtual_file("/proc/self/oom_score_adj", SIZE_MAX, &t, NULL); + if (r < 0) + return r; + + delete_trailing_chars(t, WHITESPACE); + + assert_se(safe_atoi(t, &a) >= 0); + assert_se(oom_score_adjust_is_valid(a)); + + if (ret) + *ret = a; + return 0; +} + +int pidfd_get_pid(int fd, pid_t *ret) { + char path[STRLEN("/proc/self/fdinfo/") + DECIMAL_STR_MAX(int)]; + _cleanup_free_ char *fdinfo = NULL; + char *p; + int r; + + if (fd < 0) + return -EBADF; + + xsprintf(path, "/proc/self/fdinfo/%i", fd); + + r = read_full_virtual_file(path, &fdinfo, NULL); + if (r == -ENOENT) /* if fdinfo doesn't exist we assume the process does not exist */ + return -ESRCH; + if (r < 0) + return r; + + p = startswith(fdinfo, "Pid:"); + if (!p) { + p = strstr(fdinfo, "\nPid:"); + if (!p) + return -ENOTTY; /* not a pidfd? */ + + p += 5; + } + + p += strspn(p, WHITESPACE); + p[strcspn(p, WHITESPACE)] = 0; + + return parse_pid(p, ret); +} + +static int rlimit_to_nice(rlim_t limit) { + if (limit <= 1) + return PRIO_MAX-1; /* i.e. 19 */ + + if (limit >= -PRIO_MIN + PRIO_MAX) + return PRIO_MIN; /* i.e. -20 */ + + return PRIO_MAX - (int) limit; +} + +int setpriority_closest(int priority) { + int current, limit, saved_errno; + struct rlimit highest; + + /* Try to set requested nice level */ + if (setpriority(PRIO_PROCESS, 0, priority) >= 0) + return 1; + + /* Permission failed */ + saved_errno = -errno; + if (!ERRNO_IS_PRIVILEGE(saved_errno)) + return saved_errno; + + errno = 0; + current = getpriority(PRIO_PROCESS, 0); + if (errno != 0) + return -errno; + + if (priority == current) + return 1; + + /* Hmm, we'd expect that raising the nice level from our status quo would always work. If it doesn't, + * then the whole setpriority() system call is blocked to us, hence let's propagate the error + * right-away */ + if (priority > current) + return saved_errno; + + if (getrlimit(RLIMIT_NICE, &highest) < 0) + return -errno; + + limit = rlimit_to_nice(highest.rlim_cur); + + /* We are already less nice than limit allows us */ + if (current < limit) { + log_debug("Cannot raise nice level, permissions and the resource limit do not allow it."); + return 0; + } + + /* Push to the allowed limit */ + if (setpriority(PRIO_PROCESS, 0, limit) < 0) + return -errno; + + log_debug("Cannot set requested nice level (%i), used next best (%i).", priority, limit); + return 0; +} + +bool invoked_as(char *argv[], const char *token) { + if (!argv || isempty(argv[0])) + return false; + + if (isempty(token)) + return false; + + return strstr(last_path_component(argv[0]), token); +} + +bool invoked_by_systemd(void) { + int r; + + /* If the process is directly executed by PID1 (e.g. ExecStart= or generator), systemd-importd, + * or systemd-homed, then $SYSTEMD_EXEC_PID= is set, and read the command line. */ + const char *e = getenv("SYSTEMD_EXEC_PID"); + if (!e) + return false; + + if (streq(e, "*")) + /* For testing. */ + return true; + + pid_t p; + r = parse_pid(e, &p); + if (r < 0) { + /* We know that systemd sets the variable correctly. Something else must have set it. */ + log_debug_errno(r, "Failed to parse \"SYSTEMD_EXEC_PID=%s\", ignoring: %m", e); + return false; + } + + return getpid_cached() == p; +} + +_noreturn_ void freeze(void) { + log_close(); + + /* Make sure nobody waits for us (i.e. on one of our sockets) anymore. Note that we use + * close_all_fds_without_malloc() instead of plain close_all_fds() here, since we want this function + * to be compatible with being called from signal handlers. */ + (void) close_all_fds_without_malloc(NULL, 0); + + /* Let's not freeze right away, but keep reaping zombies. */ + for (;;) { + siginfo_t si = {}; + + if (waitid(P_ALL, 0, &si, WEXITED) < 0 && errno != EINTR) + break; + } + + /* waitid() failed with an unexpected error, things are really borked. Freeze now! */ + for (;;) + pause(); +} + +bool argv_looks_like_help(int argc, char **argv) { + char **l; + + /* Scans the command line for indications the user asks for help. This is supposed to be called by + * tools that do not implement getopt() style command line parsing because they are not primarily + * user-facing. Detects four ways of asking for help: + * + * 1. Passing zero arguments + * 2. Passing "help" as first argument + * 3. Passing --help as any argument + * 4. Passing -h as any argument + */ + + if (argc <= 1) + return true; + + if (streq_ptr(argv[1], "help")) + return true; + + l = strv_skip(argv, 1); + + return strv_contains(l, "--help") || + strv_contains(l, "-h"); +} + +static const char *const sigchld_code_table[] = { + [CLD_EXITED] = "exited", + [CLD_KILLED] = "killed", + [CLD_DUMPED] = "dumped", + [CLD_TRAPPED] = "trapped", + [CLD_STOPPED] = "stopped", + [CLD_CONTINUED] = "continued", +}; + +DEFINE_STRING_TABLE_LOOKUP(sigchld_code, int); + +static const char* const sched_policy_table[] = { + [SCHED_OTHER] = "other", + [SCHED_BATCH] = "batch", + [SCHED_IDLE] = "idle", + [SCHED_FIFO] = "fifo", + [SCHED_RR] = "rr", +}; + +DEFINE_STRING_TABLE_LOOKUP_WITH_FALLBACK(sched_policy, int, INT_MAX); diff --git a/src/basic/process-util.h b/src/basic/process-util.h new file mode 100644 index 0000000..ed2f736 --- /dev/null +++ b/src/basic/process-util.h @@ -0,0 +1,198 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "alloc-util.h" +#include "format-util.h" +#include "macro.h" +#include "time-util.h" + +#define procfs_file_alloca(pid, field) \ + ({ \ + pid_t _pid_ = (pid); \ + const char *_field_ = (field); \ + char *_r_; \ + if (_pid_ == 0) { \ + _r_ = newa(char, STRLEN("/proc/self/") + strlen(_field_) + 1); \ + strcpy(stpcpy(_r_, "/proc/self/"), _field_); \ + } else { \ + _r_ = newa(char, STRLEN("/proc/") + DECIMAL_STR_MAX(pid_t) + 1 + strlen(_field_) + 1); \ + sprintf(_r_, "/proc/" PID_FMT "/%s", _pid_, _field_); \ + } \ + (const char*) _r_; \ + }) + +typedef enum ProcessCmdlineFlags { + PROCESS_CMDLINE_COMM_FALLBACK = 1 << 0, + PROCESS_CMDLINE_USE_LOCALE = 1 << 1, + PROCESS_CMDLINE_QUOTE = 1 << 2, + PROCESS_CMDLINE_QUOTE_POSIX = 1 << 3, +} ProcessCmdlineFlags; + +int get_process_comm(pid_t pid, char **ret); +int get_process_cmdline(pid_t pid, size_t max_columns, ProcessCmdlineFlags flags, char **ret); +int get_process_exe(pid_t pid, char **ret); +int get_process_uid(pid_t pid, uid_t *ret); +int get_process_gid(pid_t pid, gid_t *ret); +int get_process_capeff(pid_t pid, char **ret); +int get_process_cwd(pid_t pid, char **ret); +int get_process_root(pid_t pid, char **ret); +int get_process_environ(pid_t pid, char **ret); +int get_process_ppid(pid_t pid, pid_t *ret); +int get_process_umask(pid_t pid, mode_t *ret); + +int wait_for_terminate(pid_t pid, siginfo_t *status); + +typedef enum WaitFlags { + WAIT_LOG_ABNORMAL = 1 << 0, + WAIT_LOG_NON_ZERO_EXIT_STATUS = 1 << 1, + + /* A shortcut for requesting the most complete logging */ + WAIT_LOG = WAIT_LOG_ABNORMAL|WAIT_LOG_NON_ZERO_EXIT_STATUS, +} WaitFlags; + +int wait_for_terminate_and_check(const char *name, pid_t pid, WaitFlags flags); +int wait_for_terminate_with_timeout(pid_t pid, usec_t timeout); + +void sigkill_wait(pid_t pid); +void sigkill_waitp(pid_t *pid); +void sigterm_wait(pid_t pid); + +int kill_and_sigcont(pid_t pid, int sig); + +int rename_process(const char name[]); +int is_kernel_thread(pid_t pid); + +int getenv_for_pid(pid_t pid, const char *field, char **_value); + +bool pid_is_alive(pid_t pid); +bool pid_is_unwaited(pid_t pid); +int pid_is_my_child(pid_t pid); +int pid_from_same_root_fs(pid_t pid); + +bool is_main_thread(void); + +bool oom_score_adjust_is_valid(int oa); + +#ifndef PERSONALITY_INVALID +/* personality(7) documents that 0xffffffffUL is used for querying the + * current personality, hence let's use that here as error + * indicator. */ +#define PERSONALITY_INVALID 0xffffffffLU +#endif + +unsigned long personality_from_string(const char *p); +const char *personality_to_string(unsigned long); + +int safe_personality(unsigned long p); +int opinionated_personality(unsigned long *ret); + +const char *sigchld_code_to_string(int i) _const_; +int sigchld_code_from_string(const char *s) _pure_; + +int sched_policy_to_string_alloc(int i, char **s); +int sched_policy_from_string(const char *s); + +static inline pid_t PTR_TO_PID(const void *p) { + return (pid_t) ((uintptr_t) p); +} + +static inline void* PID_TO_PTR(pid_t pid) { + return (void*) ((uintptr_t) pid); +} + +void valgrind_summary_hack(void); + +int pid_compare_func(const pid_t *a, const pid_t *b); + +static inline bool nice_is_valid(int n) { + return n >= PRIO_MIN && n < PRIO_MAX; +} + +static inline bool sched_policy_is_valid(int i) { + return IN_SET(i, SCHED_OTHER, SCHED_BATCH, SCHED_IDLE, SCHED_FIFO, SCHED_RR); +} + +static inline bool sched_priority_is_valid(int i) { + return i >= 0 && i <= sched_get_priority_max(SCHED_RR); +} + +static inline bool pid_is_valid(pid_t p) { + return p > 0; +} + +pid_t getpid_cached(void); +void reset_cached_pid(void); + +int must_be_root(void); + +typedef enum ForkFlags { + FORK_RESET_SIGNALS = 1 << 0, /* Reset all signal handlers and signal mask */ + FORK_CLOSE_ALL_FDS = 1 << 1, /* Close all open file descriptors in the child, except for 0,1,2 */ + FORK_DEATHSIG = 1 << 2, /* Set PR_DEATHSIG in the child to SIGTERM */ + FORK_DEATHSIG_SIGINT = 1 << 3, /* Set PR_DEATHSIG in the child to SIGINT */ + FORK_NULL_STDIO = 1 << 4, /* Connect 0,1,2 to /dev/null */ + FORK_REOPEN_LOG = 1 << 5, /* Reopen log connection */ + FORK_LOG = 1 << 6, /* Log above LOG_DEBUG log level about failures */ + FORK_WAIT = 1 << 7, /* Wait until child exited */ + FORK_NEW_MOUNTNS = 1 << 8, /* Run child in its own mount namespace */ + FORK_MOUNTNS_SLAVE = 1 << 9, /* Make child's mount namespace MS_SLAVE */ + FORK_RLIMIT_NOFILE_SAFE = 1 << 10, /* Set RLIMIT_NOFILE soft limit to 1K for select() compat */ + FORK_STDOUT_TO_STDERR = 1 << 11, /* Make stdout a copy of stderr */ + FORK_FLUSH_STDIO = 1 << 12, /* fflush() stdout (and stderr) before forking */ + FORK_NEW_USERNS = 1 << 13, /* Run child in its own user namespace */ + FORK_CLOEXEC_OFF = 1 << 14, /* In the child: turn off O_CLOEXEC on all fds in except_fds[] */ +} ForkFlags; + +int safe_fork_full(const char *name, const int except_fds[], size_t n_except_fds, ForkFlags flags, pid_t *ret_pid); + +static inline int safe_fork(const char *name, ForkFlags flags, pid_t *ret_pid) { + return safe_fork_full(name, NULL, 0, flags, ret_pid); +} + +int namespace_fork(const char *outer_name, const char *inner_name, const int except_fds[], size_t n_except_fds, ForkFlags flags, int pidns_fd, int mntns_fd, int netns_fd, int userns_fd, int root_fd, pid_t *ret_pid); + +int set_oom_score_adjust(int value); +int get_oom_score_adjust(int *ret); + +/* The highest possibly (theoretic) pid_t value on this architecture. */ +#define PID_T_MAX ((pid_t) INT32_MAX) +/* The maximum number of concurrent processes Linux allows on this architecture, as well as the highest valid PID value + * the kernel will potentially assign. This reflects a value compiled into the kernel (PID_MAX_LIMIT), and sets the + * upper boundary on what may be written to the /proc/sys/kernel/pid_max sysctl (but do note that the sysctl is off by + * 1, since PID 0 can never exist and there can hence only be one process less than the limit would suggest). Since + * these values are documented in proc(5) we feel quite confident that they are stable enough for the near future at + * least to define them here too. */ +#define TASKS_MAX 4194303U + +assert_cc(TASKS_MAX <= (unsigned long) PID_T_MAX); + +/* Like TAKE_PTR() but for child PIDs, resetting them to 0 */ +#define TAKE_PID(pid) \ + ({ \ + pid_t *_ppid_ = &(pid); \ + pid_t _pid_ = *_ppid_; \ + *_ppid_ = 0; \ + _pid_; \ + }) + +int pidfd_get_pid(int fd, pid_t *ret); + +int setpriority_closest(int priority); + +bool invoked_as(char *argv[], const char *token); + +bool invoked_by_systemd(void); + +_noreturn_ void freeze(void); + +bool argv_looks_like_help(int argc, char **argv); diff --git a/src/basic/procfs-util.c b/src/basic/procfs-util.c new file mode 100644 index 0000000..4f60730 --- /dev/null +++ b/src/basic/procfs-util.c @@ -0,0 +1,268 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include +#include + +#include "alloc-util.h" +#include "def.h" +#include "fd-util.h" +#include "fileio.h" +#include "parse-util.h" +#include "process-util.h" +#include "procfs-util.h" +#include "stdio-util.h" +#include "string-util.h" + +int procfs_get_pid_max(uint64_t *ret) { + _cleanup_free_ char *value = NULL; + int r; + + assert(ret); + + r = read_one_line_file("/proc/sys/kernel/pid_max", &value); + if (r < 0) + return r; + + return safe_atou64(value, ret); +} + +int procfs_get_threads_max(uint64_t *ret) { + _cleanup_free_ char *value = NULL; + int r; + + assert(ret); + + r = read_one_line_file("/proc/sys/kernel/threads-max", &value); + if (r < 0) + return r; + + return safe_atou64(value, ret); +} + +int procfs_tasks_set_limit(uint64_t limit) { + char buffer[DECIMAL_STR_MAX(uint64_t)+1]; + uint64_t pid_max; + int r; + + if (limit == 0) /* This makes no sense, we are userspace and hence count as tasks too, and we want to live, + * hence the limit conceptually has to be above 0. Also, most likely if anyone asks for a zero + * limit they probably mean "no limit", hence let's better refuse this to avoid + * confusion. */ + return -EINVAL; + + /* The Linux kernel doesn't allow this value to go below 20, hence don't allow this either, higher values than + * TASKS_MAX are not accepted by the pid_max sysctl. We'll treat anything this high as "unbounded" and hence + * set it to the maximum. */ + limit = CLAMP(limit, 20U, TASKS_MAX); + + r = procfs_get_pid_max(&pid_max); + if (r < 0) + return r; + + /* As pid_max is about the numeric pid_t range we'll bump it if necessary, but only ever increase it, never + * decrease it, as threads-max is the much more relevant sysctl. */ + if (limit > pid_max-1) { + sprintf(buffer, "%" PRIu64, limit+1); /* Add one, since PID 0 is not a valid PID */ + r = write_string_file("/proc/sys/kernel/pid_max", buffer, WRITE_STRING_FILE_DISABLE_BUFFER); + if (r < 0) + return r; + } + + sprintf(buffer, "%" PRIu64, limit); + r = write_string_file("/proc/sys/kernel/threads-max", buffer, WRITE_STRING_FILE_DISABLE_BUFFER); + if (r < 0) { + uint64_t threads_max; + + /* Hmm, we couldn't write this? If so, maybe it was already set properly? In that case let's not + * generate an error */ + + if (procfs_get_threads_max(&threads_max) < 0) + return r; /* return original error */ + + if (MIN(pid_max - 1, threads_max) != limit) + return r; /* return original error */ + + /* Yay! Value set already matches what we were trying to set, hence consider this a success. */ + } + + return 0; +} + +int procfs_tasks_get_current(uint64_t *ret) { + _cleanup_free_ char *value = NULL; + const char *p, *nr; + size_t n; + int r; + + assert(ret); + + r = read_one_line_file("/proc/loadavg", &value); + if (r < 0) + return r; + + /* Look for the second part of the fourth field, which is separated by a slash from the first part. None of the + * earlier fields use a slash, hence let's use this to find the right spot. */ + p = strchr(value, '/'); + if (!p) + return -EINVAL; + + p++; + n = strspn(p, DIGITS); + nr = strndupa_safe(p, n); + + return safe_atou64(nr, ret); +} + +static uint64_t calc_gcd64(uint64_t a, uint64_t b) { + + while (b > 0) { + uint64_t t; + + t = a % b; + + a = b; + b = t; + } + + return a; +} + +int procfs_cpu_get_usage(nsec_t *ret) { + _cleanup_free_ char *first_line = NULL; + unsigned long user_ticks, nice_ticks, system_ticks, irq_ticks, softirq_ticks, + guest_ticks = 0, guest_nice_ticks = 0; + long ticks_per_second; + uint64_t sum, gcd, a, b; + const char *p; + int r; + + assert(ret); + + r = read_one_line_file("/proc/stat", &first_line); + if (r < 0) + return r; + + p = first_word(first_line, "cpu"); + if (!p) + return -EINVAL; + + if (sscanf(p, "%lu %lu %lu %*u %*u %lu %lu %*u %lu %lu", + &user_ticks, + &nice_ticks, + &system_ticks, + &irq_ticks, + &softirq_ticks, + &guest_ticks, + &guest_nice_ticks) < 5) /* we only insist on the first five fields */ + return -EINVAL; + + ticks_per_second = sysconf(_SC_CLK_TCK); + if (ticks_per_second < 0) + return -errno; + assert(ticks_per_second > 0); + + sum = (uint64_t) user_ticks + (uint64_t) nice_ticks + (uint64_t) system_ticks + + (uint64_t) irq_ticks + (uint64_t) softirq_ticks + + (uint64_t) guest_ticks + (uint64_t) guest_nice_ticks; + + /* Let's reduce this fraction before we apply it to avoid overflows when converting this to µsec */ + gcd = calc_gcd64(NSEC_PER_SEC, ticks_per_second); + + a = (uint64_t) NSEC_PER_SEC / gcd; + b = (uint64_t) ticks_per_second / gcd; + + *ret = DIV_ROUND_UP((nsec_t) sum * (nsec_t) a, (nsec_t) b); + return 0; +} + +int convert_meminfo_value_to_uint64_bytes(const char *word, uint64_t *ret) { + _cleanup_free_ char *w = NULL; + char *digits, *e; + uint64_t v; + size_t n; + int r; + + assert(word); + assert(ret); + + w = strdup(word); + if (!w) + return -ENOMEM; + + /* Determine length of numeric value */ + n = strspn(w, WHITESPACE); + digits = w + n; + n = strspn(digits, DIGITS); + if (n == 0) + return -EINVAL; + e = digits + n; + + /* Ensure the line ends in " kB" */ + n = strspn(e, WHITESPACE); + if (n == 0) + return -EINVAL; + if (!streq(e + n, "kB")) + return -EINVAL; + + *e = 0; + r = safe_atou64(digits, &v); + if (r < 0) + return r; + if (v == UINT64_MAX) + return -EINVAL; + + if (v > UINT64_MAX/1024) + return -EOVERFLOW; + + *ret = v * 1024U; + return 0; +} + +int procfs_memory_get(uint64_t *ret_total, uint64_t *ret_used) { + uint64_t mem_total = UINT64_MAX, mem_available = UINT64_MAX; + _cleanup_fclose_ FILE *f = NULL; + int r; + + f = fopen("/proc/meminfo", "re"); + if (!f) + return -errno; + + for (;;) { + _cleanup_free_ char *line = NULL; + uint64_t *v; + char *p; + + r = read_line(f, LONG_LINE_MAX, &line); + if (r < 0) + return r; + if (r == 0) + return -EINVAL; /* EOF: Couldn't find one or both fields? */ + + p = first_word(line, "MemTotal:"); + if (p) + v = &mem_total; + else { + p = first_word(line, "MemAvailable:"); + if (p) + v = &mem_available; + else + continue; + } + + r = convert_meminfo_value_to_uint64_bytes(p, v); + if (r < 0) + return r; + + if (mem_total != UINT64_MAX && mem_available != UINT64_MAX) + break; + } + + if (mem_available > mem_total) + return -EINVAL; + + if (ret_total) + *ret_total = mem_total; + if (ret_used) + *ret_used = mem_total - mem_available; + return 0; +} diff --git a/src/basic/procfs-util.h b/src/basic/procfs-util.h new file mode 100644 index 0000000..eb8c773 --- /dev/null +++ b/src/basic/procfs-util.h @@ -0,0 +1,21 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +#include + +#include "time-util.h" + +int procfs_get_pid_max(uint64_t *ret); +int procfs_get_threads_max(uint64_t *ret); + +int procfs_tasks_set_limit(uint64_t limit); +int procfs_tasks_get_current(uint64_t *ret); + +int procfs_cpu_get_usage(nsec_t *ret); + +int procfs_memory_get(uint64_t *ret_total, uint64_t *ret_used); +static inline int procfs_memory_get_used(uint64_t *ret) { + return procfs_memory_get(NULL, ret); +} + +int convert_meminfo_value_to_uint64_bytes(const char *word, uint64_t *ret); diff --git a/src/basic/pthread-util.h b/src/basic/pthread-util.h new file mode 100644 index 0000000..113485d --- /dev/null +++ b/src/basic/pthread-util.h @@ -0,0 +1,16 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +#include + +#include "macro.h" + +static inline pthread_mutex_t* pthread_mutex_lock_assert(pthread_mutex_t *mutex) { + assert_se(pthread_mutex_lock(mutex) == 0); + return mutex; +} + +static inline void pthread_mutex_unlock_assertp(pthread_mutex_t **mutexp) { + if (*mutexp) + assert_se(pthread_mutex_unlock(*mutexp) == 0); +} diff --git a/src/basic/random-util.c b/src/basic/random-util.c new file mode 100644 index 0000000..200a914 --- /dev/null +++ b/src/basic/random-util.c @@ -0,0 +1,251 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#if HAVE_SYS_AUXV_H +# include +#endif + +#include "alloc-util.h" +#include "env-util.h" +#include "errno-util.h" +#include "fd-util.h" +#include "fileio.h" +#include "io-util.h" +#include "missing_random.h" +#include "missing_syscall.h" +#include "missing_threads.h" +#include "parse-util.h" +#include "random-util.h" +#include "sha256.h" +#include "time-util.h" + +/* This is a "best effort" kind of thing, but has no real security value. So, this should only be used by + * random_bytes(), which is not meant for crypto. This could be made better, but we're *not* trying to roll a + * userspace prng here, or even have forward secrecy, but rather just do the shortest thing that is at least + * better than libc rand(). */ +static void fallback_random_bytes(void *p, size_t n) { + static thread_local uint64_t fallback_counter = 0; + struct { + char label[32]; + uint64_t call_id, block_id; + usec_t stamp_mono, stamp_real; + pid_t pid, tid; + uint8_t auxval[16]; + } state = { + /* Arbitrary domain separation to prevent other usage of AT_RANDOM from clashing. */ + .label = "systemd fallback random bytes v1", + .call_id = fallback_counter++, + .stamp_mono = now(CLOCK_MONOTONIC), + .stamp_real = now(CLOCK_REALTIME), + .pid = getpid(), + .tid = gettid(), + }; + +#if HAVE_SYS_AUXV_H + memcpy(state.auxval, ULONG_TO_PTR(getauxval(AT_RANDOM)), sizeof(state.auxval)); +#endif + + while (n > 0) { + struct sha256_ctx ctx; + + sha256_init_ctx(&ctx); + sha256_process_bytes(&state, sizeof(state), &ctx); + if (n < SHA256_DIGEST_SIZE) { + uint8_t partial[SHA256_DIGEST_SIZE]; + sha256_finish_ctx(&ctx, partial); + memcpy(p, partial, n); + break; + } + sha256_finish_ctx(&ctx, p); + p = (uint8_t *) p + SHA256_DIGEST_SIZE; + n -= SHA256_DIGEST_SIZE; + ++state.block_id; + } +} + +void random_bytes(void *p, size_t n) { + static bool have_getrandom = true, have_grndinsecure = true; + _cleanup_close_ int fd = -1; + + if (n == 0) + return; + + for (;;) { + ssize_t l; + + if (!have_getrandom) + break; + + l = getrandom(p, n, have_grndinsecure ? GRND_INSECURE : GRND_NONBLOCK); + if (l > 0) { + if ((size_t) l == n) + return; /* Done reading, success. */ + p = (uint8_t *) p + l; + n -= l; + continue; /* Interrupted by a signal; keep going. */ + } else if (l == 0) + break; /* Weird, so fallback to /dev/urandom. */ + else if (ERRNO_IS_NOT_SUPPORTED(errno)) { + have_getrandom = false; + break; /* No syscall, so fallback to /dev/urandom. */ + } else if (errno == EINVAL && have_grndinsecure) { + have_grndinsecure = false; + continue; /* No GRND_INSECURE; fallback to GRND_NONBLOCK. */ + } else if (errno == EAGAIN && !have_grndinsecure) + break; /* Will block, but no GRND_INSECURE, so fallback to /dev/urandom. */ + + break; /* Unexpected, so just give up and fallback to /dev/urandom. */ + } + + fd = open("/dev/urandom", O_RDONLY|O_CLOEXEC|O_NOCTTY); + if (fd >= 0 && loop_read_exact(fd, p, n, false) == 0) + return; + + /* This is a terrible fallback. Oh well. */ + fallback_random_bytes(p, n); +} + +int crypto_random_bytes(void *p, size_t n) { + static bool have_getrandom = true, seen_initialized = false; + _cleanup_close_ int fd = -1; + + if (n == 0) + return 0; + + for (;;) { + ssize_t l; + + if (!have_getrandom) + break; + + l = getrandom(p, n, 0); + if (l > 0) { + if ((size_t) l == n) + return 0; /* Done reading, success. */ + p = (uint8_t *) p + l; + n -= l; + continue; /* Interrupted by a signal; keep going. */ + } else if (l == 0) + return -EIO; /* Weird, should never happen. */ + else if (ERRNO_IS_NOT_SUPPORTED(errno)) { + have_getrandom = false; + break; /* No syscall, so fallback to /dev/urandom. */ + } + return -errno; + } + + if (!seen_initialized) { + _cleanup_close_ int ready_fd = -1; + int r; + + ready_fd = open("/dev/random", O_RDONLY|O_CLOEXEC|O_NOCTTY); + if (ready_fd < 0) + return -errno; + r = fd_wait_for_event(ready_fd, POLLIN, USEC_INFINITY); + if (r < 0) + return r; + seen_initialized = true; + } + + fd = open("/dev/urandom", O_RDONLY|O_CLOEXEC|O_NOCTTY); + if (fd < 0) + return -errno; + return loop_read_exact(fd, p, n, false); +} + +size_t random_pool_size(void) { + _cleanup_free_ char *s = NULL; + int r; + + /* Read pool size, if possible */ + r = read_one_line_file("/proc/sys/kernel/random/poolsize", &s); + if (r < 0) + log_debug_errno(r, "Failed to read pool size from kernel: %m"); + else { + unsigned sz; + + r = safe_atou(s, &sz); + if (r < 0) + log_debug_errno(r, "Failed to parse pool size: %s", s); + else + /* poolsize is in bits on 2.6, but we want bytes */ + return CLAMP(sz / 8, RANDOM_POOL_SIZE_MIN, RANDOM_POOL_SIZE_MAX); + } + + /* Use the minimum as default, if we can't retrieve the correct value */ + return RANDOM_POOL_SIZE_MIN; +} + +int random_write_entropy(int fd, const void *seed, size_t size, bool credit) { + _cleanup_close_ int opened_fd = -1; + int r; + + assert(seed || size == 0); + + if (size == 0) + return 0; + + if (fd < 0) { + opened_fd = open("/dev/urandom", O_WRONLY|O_CLOEXEC|O_NOCTTY); + if (opened_fd < 0) + return -errno; + + fd = opened_fd; + } + + if (credit) { + _cleanup_free_ struct rand_pool_info *info = NULL; + + /* The kernel API only accepts "int" as entropy count (which is in bits), let's avoid any + * chance for confusion here. */ + if (size > INT_MAX / 8) + return -EOVERFLOW; + + info = malloc(offsetof(struct rand_pool_info, buf) + size); + if (!info) + return -ENOMEM; + + info->entropy_count = size * 8; + info->buf_size = size; + memcpy(info->buf, seed, size); + + if (ioctl(fd, RNDADDENTROPY, info) < 0) + return -errno; + } else { + r = loop_write(fd, seed, size, false); + if (r < 0) + return r; + } + + return 1; +} + +uint64_t random_u64_range(uint64_t m) { + uint64_t x, remainder; + + /* Generates a random number in the range 0…m-1, unbiased. (Java's algorithm) */ + + if (m == 0) /* Let's take m == 0 as special case to return an integer from the full range */ + return random_u64(); + if (m == 1) + return 0; + + remainder = UINT64_MAX % m; + + do { + x = random_u64(); + } while (x >= UINT64_MAX - remainder); + + return x % m; +} diff --git a/src/basic/random-util.h b/src/basic/random-util.h new file mode 100644 index 0000000..2d99807 --- /dev/null +++ b/src/basic/random-util.h @@ -0,0 +1,31 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +#include +#include +#include + +void random_bytes(void *p, size_t n); /* Returns random bytes suitable for most uses, but may be insecure sometimes. */ +int crypto_random_bytes(void *p, size_t n); /* Returns secure random bytes after waiting for the RNG to initialize. */ + +static inline uint64_t random_u64(void) { + uint64_t u; + random_bytes(&u, sizeof(u)); + return u; +} + +static inline uint32_t random_u32(void) { + uint32_t u; + random_bytes(&u, sizeof(u)); + return u; +} + +/* Some limits on the pool sizes when we deal with the kernel random pool */ +#define RANDOM_POOL_SIZE_MIN 32U +#define RANDOM_POOL_SIZE_MAX (10U*1024U*1024U) + +size_t random_pool_size(void); + +int random_write_entropy(int fd, const void *seed, size_t size, bool credit); + +uint64_t random_u64_range(uint64_t max); diff --git a/src/basic/ratelimit.c b/src/basic/ratelimit.c new file mode 100644 index 0000000..1e1d5be --- /dev/null +++ b/src/basic/ratelimit.c @@ -0,0 +1,63 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include + +#include "macro.h" +#include "ratelimit.h" + +/* Modelled after Linux' lib/ratelimit.c by Dave Young + * , which is licensed GPLv2. */ + +bool ratelimit_below(RateLimit *r) { + usec_t ts; + + assert(r); + + if (!ratelimit_configured(r)) + return true; + + ts = now(CLOCK_MONOTONIC); + + if (r->begin <= 0 || + usec_sub_unsigned(ts, r->begin) > r->interval) { + r->begin = ts; + + /* Reset counter */ + r->num = 0; + goto good; + } + + if (r->num < r->burst) + goto good; + + r->num++; + return false; + +good: + r->num++; + return true; +} + +unsigned ratelimit_num_dropped(RateLimit *r) { + assert(r); + + return r->num > r->burst ? r->num - r->burst : 0; +} + +usec_t ratelimit_end(const RateLimit *rl) { + assert(rl); + + if (rl->begin == 0) + return 0; + + return usec_add(rl->begin, rl->interval); +} + +usec_t ratelimit_left(const RateLimit *rl) { + assert(rl); + + if (rl->begin == 0) + return 0; + + return usec_sub_unsigned(ratelimit_end(rl), now(CLOCK_MONOTONIC)); +} diff --git a/src/basic/ratelimit.h b/src/basic/ratelimit.h new file mode 100644 index 0000000..bb7160a --- /dev/null +++ b/src/basic/ratelimit.h @@ -0,0 +1,28 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +#include + +#include "time-util.h" + +typedef struct RateLimit { + usec_t interval; /* Keep those two fields first so they can be initialized easily: */ + unsigned burst; /* RateLimit rl = { INTERVAL, BURST }; */ + unsigned num; + usec_t begin; +} RateLimit; + +static inline void ratelimit_reset(RateLimit *rl) { + rl->num = rl->begin = 0; +} + +static inline bool ratelimit_configured(RateLimit *rl) { + return rl->interval > 0 && rl->burst > 0; +} + +bool ratelimit_below(RateLimit *r); + +unsigned ratelimit_num_dropped(RateLimit *r); + +usec_t ratelimit_end(const RateLimit *rl); +usec_t ratelimit_left(const RateLimit *rl); diff --git a/src/basic/raw-clone.h b/src/basic/raw-clone.h new file mode 100644 index 0000000..becf42e --- /dev/null +++ b/src/basic/raw-clone.h @@ -0,0 +1,79 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +/*** + Copyright © 2016 Michael Karcher +***/ + +#include +#include +#include + +#include "log.h" +#include "macro.h" + +/** + * raw_clone() - uses clone to create a new process with clone flags + * @flags: Flags to pass to the clone system call + * + * Uses the clone system call to create a new process with the cloning flags and termination signal passed in the flags + * parameter. Opposed to glibc's clone function, using this function does not set up a separate stack for the child, but + * relies on copy-on-write semantics on the one stack at a common virtual address, just as fork does. + * + * To obtain copy-on-write semantics, flags must not contain CLONE_VM, and thus CLONE_THREAD and CLONE_SIGHAND (which + * require CLONE_VM) are not usable. + * + * Additionally, as this function does not pass the ptid, newtls and ctid parameters to the kernel, flags must not + * contain CLONE_PARENT_SETTID, CLONE_CHILD_SETTID, CLONE_CHILD_CLEARTID or CLONE_SETTLS. + * + * Returns: 0 in the child process and the child process id in the parent. + */ +static inline pid_t raw_clone(unsigned long flags) { + pid_t ret; + + assert((flags & (CLONE_VM|CLONE_PARENT_SETTID|CLONE_CHILD_SETTID| + CLONE_CHILD_CLEARTID|CLONE_SETTLS)) == 0); +#if defined(__s390x__) || defined(__s390__) || defined(__CRIS__) + /* On s390/s390x and cris the order of the first and second arguments + * of the raw clone() system call is reversed. */ + ret = (pid_t) syscall(__NR_clone, NULL, flags); +#elif defined(__sparc__) + { + /** + * sparc always returns the other process id in %o0, and + * a boolean flag whether this is the child or the parent in + * %o1. Inline assembly is needed to get the flag returned + * in %o1. + */ + int in_child, child_pid, error; + + asm volatile("mov %3, %%g1\n\t" + "mov %4, %%o0\n\t" + "mov 0 , %%o1\n\t" +#if defined(__arch64__) + "t 0x6d\n\t" +#else + "t 0x10\n\t" +#endif + "addx %%g0, 0, %2\n\t" + "mov %%o1, %0\n\t" + "mov %%o0, %1" : + "=r"(in_child), "=r"(child_pid), "=r"(error) : + "i"(__NR_clone), "r"(flags) : + "%o1", "%o0", "%g1", "cc" ); + + if (error) { + errno = child_pid; + ret = -1; + } else + ret = in_child ? 0 : child_pid; + } +#else + ret = (pid_t) syscall(__NR_clone, flags, NULL); +#endif + + if (ret == 0) + reset_cached_pid(); + + return ret; +} diff --git a/src/basic/raw-reboot.h b/src/basic/raw-reboot.h new file mode 100644 index 0000000..e6bff30 --- /dev/null +++ b/src/basic/raw-reboot.h @@ -0,0 +1,14 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +#include +#include +#include + +/* glibc defines the reboot() API call, which is a wrapper around the system call of the same name, but without the + * extra "arg" parameter. Since we need that parameter for some calls, let's add a "raw" wrapper that is defined the + * same way, except it takes the additional argument. */ + +static inline int raw_reboot(int cmd, const void *arg) { + return (int) syscall(SYS_reboot, LINUX_REBOOT_MAGIC1, LINUX_REBOOT_MAGIC2, cmd, arg); +} diff --git a/src/basic/recurse-dir.c b/src/basic/recurse-dir.c new file mode 100644 index 0000000..d16ca98 --- /dev/null +++ b/src/basic/recurse-dir.c @@ -0,0 +1,452 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include "alloc-util.h" +#include "dirent-util.h" +#include "fd-util.h" +#include "fileio.h" +#include "missing_syscall.h" +#include "mountpoint-util.h" +#include "recurse-dir.h" +#include "sort-util.h" + +#define DEFAULT_RECURSION_MAX 100 + +static int sort_func(struct dirent * const *a, struct dirent * const *b) { + return strcmp((*a)->d_name, (*b)->d_name); +} + +static bool ignore_dirent(const struct dirent *de, RecurseDirFlags flags) { + assert(de); + + /* Depending on flag either ignore everything starting with ".", or just "." itself and ".." */ + + return FLAGS_SET(flags, RECURSE_DIR_IGNORE_DOT) ? + de->d_name[0] == '.' : + dot_or_dot_dot(de->d_name); +} + +int readdir_all(int dir_fd, + RecurseDirFlags flags, + DirectoryEntries **ret) { + + _cleanup_free_ DirectoryEntries *de = NULL; + struct dirent *entry; + DirectoryEntries *nde; + size_t add, sz, j; + + assert(dir_fd >= 0); + + /* Returns an array with pointers to "struct dirent" directory entries, optionally sorted. Free the + * array with readdir_all_freep(). + * + * Start with space for up to 8 directory entries. We expect at least 2 ("." + ".."), hence hopefully + * 8 will cover most cases comprehensively. (Note that most likely a lot more entries will actually + * fit in the buffer, given we calculate maximum file name length here.) */ + de = malloc(offsetof(DirectoryEntries, buffer) + DIRENT_SIZE_MAX * 8); + if (!de) + return -ENOMEM; + + de->buffer_size = 0; + for (;;) { + size_t bs; + ssize_t n; + + bs = MIN(MALLOC_SIZEOF_SAFE(de) - offsetof(DirectoryEntries, buffer), (size_t) SSIZE_MAX); + assert(bs > de->buffer_size); + + n = getdents64(dir_fd, (uint8_t*) de->buffer + de->buffer_size, bs - de->buffer_size); + if (n < 0) + return -errno; + if (n == 0) + break; + + msan_unpoison((uint8_t*) de->buffer + de->buffer_size, n); + + de->buffer_size += n; + + if (de->buffer_size < bs - DIRENT_SIZE_MAX) /* Still room for one more entry, then try to + * fill it up without growing the structure. */ + continue; + + if (bs >= SSIZE_MAX - offsetof(DirectoryEntries, buffer)) + return -EFBIG; + bs = bs >= (SSIZE_MAX - offsetof(DirectoryEntries, buffer))/2 ? SSIZE_MAX - offsetof(DirectoryEntries, buffer) : bs * 2; + + nde = realloc(de, bs); + if (!nde) + return -ENOMEM; + + de = nde; + } + + de->n_entries = 0; + FOREACH_DIRENT_IN_BUFFER(entry, de->buffer, de->buffer_size) { + if (ignore_dirent(entry, flags)) + continue; + + de->n_entries++; + } + + sz = ALIGN(offsetof(DirectoryEntries, buffer) + de->buffer_size); + add = sizeof(struct dirent*) * de->n_entries; + if (add > SIZE_MAX - add) + return -ENOMEM; + + nde = realloc(de, sz + add); + if (!nde) + return -ENOMEM; + + de = nde; + de->entries = (struct dirent**) ((uint8_t*) de + ALIGN(offsetof(DirectoryEntries, buffer) + de->buffer_size)); + + j = 0; + FOREACH_DIRENT_IN_BUFFER(entry, de->buffer, de->buffer_size) { + if (ignore_dirent(entry, flags)) + continue; + + de->entries[j++] = entry; + } + + if (FLAGS_SET(flags, RECURSE_DIR_SORT)) + typesafe_qsort(de->entries, de->n_entries, sort_func); + + if (ret) + *ret = TAKE_PTR(de); + + return 0; +} + +int recurse_dir( + int dir_fd, + const char *path, + unsigned statx_mask, + unsigned n_depth_max, + RecurseDirFlags flags, + recurse_dir_func_t func, + void *userdata) { + + _cleanup_free_ DirectoryEntries *de = NULL; + int r; + + assert(dir_fd >= 0); + assert(func); + + /* This is a lot like ftw()/nftw(), but a lot more modern, i.e. built around openat()/statx()/O_PATH, + * and under the assumption that fds are not as 'expensive' as they used to be. */ + + if (n_depth_max == 0) + return -EOVERFLOW; + if (n_depth_max == UINT_MAX) /* special marker for "default" */ + n_depth_max = DEFAULT_RECURSION_MAX; + + r = readdir_all(dir_fd, flags, &de); + if (r < 0) + return r; + + for (size_t i = 0; i < de->n_entries; i++) { + _cleanup_close_ int inode_fd = -1, subdir_fd = -1; + _cleanup_free_ char *joined = NULL; + STRUCT_STATX_DEFINE(sx); + bool sx_valid = false; + const char *p; + + /* For each directory entry we'll do one of the following: + * + * 1) If the entry refers to a directory, we'll open it as O_DIRECTORY 'subdir_fd' and then statx() the opened directory via that new fd (if requested) + * 2) Otherwise, if RECURSE_DIR_INODE_FD is set we'll open it as O_PATH 'inode_fd' and then statx() the opened inode via that new fd (if requested) + * 3) Otherwise, we'll statx() the directory entry via the directory fd we are currently looking at (if requested) + */ + + if (path) { + joined = path_join(path, de->entries[i]->d_name); + if (!joined) + return -ENOMEM; + + p = joined; + } else + p = de->entries[i]->d_name; + + if (IN_SET(de->entries[i]->d_type, DT_UNKNOWN, DT_DIR)) { + subdir_fd = openat(dir_fd, de->entries[i]->d_name, O_DIRECTORY|O_NOFOLLOW|O_CLOEXEC); + if (subdir_fd < 0) { + if (errno == ENOENT) /* Vanished by now, go for next file immediately */ + continue; + + /* If it is a subdir but we failed to open it, then fail */ + if (!IN_SET(errno, ENOTDIR, ELOOP)) { + log_debug_errno(errno, "Failed to open directory '%s': %m", p); + + assert(errno <= RECURSE_DIR_SKIP_OPEN_DIR_ERROR_MAX - RECURSE_DIR_SKIP_OPEN_DIR_ERROR_BASE); + + r = func(RECURSE_DIR_SKIP_OPEN_DIR_ERROR_BASE + errno, + p, + dir_fd, + -1, + de->entries[i], + NULL, + userdata); + if (r == RECURSE_DIR_LEAVE_DIRECTORY) + break; + if (!IN_SET(r, RECURSE_DIR_CONTINUE, RECURSE_DIR_SKIP_ENTRY)) + return r; + + continue; + } + + /* If it's not a subdir, then let's handle it like a regular inode below */ + + } else { + /* If we managed to get a DIR* off the inode, it's definitely a directory. */ + de->entries[i]->d_type = DT_DIR; + + if (statx_mask != 0 || (flags & RECURSE_DIR_SAME_MOUNT)) { + r = statx_fallback(subdir_fd, "", AT_EMPTY_PATH, statx_mask, &sx); + if (r < 0) + return r; + + sx_valid = true; + } + } + } + + if (subdir_fd < 0) { + /* It's not a subdirectory. */ + + if (flags & RECURSE_DIR_INODE_FD) { + + inode_fd = openat(dir_fd, de->entries[i]->d_name, O_PATH|O_NOFOLLOW|O_CLOEXEC); + if (inode_fd < 0) { + if (errno == ENOENT) /* Vanished by now, go for next file immediately */ + continue; + + log_debug_errno(errno, "Failed to open directory entry '%s': %m", p); + + assert(errno <= RECURSE_DIR_SKIP_OPEN_INODE_ERROR_MAX - RECURSE_DIR_SKIP_OPEN_INODE_ERROR_BASE); + + r = func(RECURSE_DIR_SKIP_OPEN_INODE_ERROR_BASE + errno, + p, + dir_fd, + -1, + de->entries[i], + NULL, + userdata); + if (r == RECURSE_DIR_LEAVE_DIRECTORY) + break; + if (!IN_SET(r, RECURSE_DIR_CONTINUE, RECURSE_DIR_SKIP_ENTRY)) + return r; + + continue; + } + + /* If we open the inode, then verify it's actually a non-directory, like we + * assume. Let's guarantee that we never pass statx data of a directory where + * caller expects a non-directory */ + + r = statx_fallback(inode_fd, "", AT_EMPTY_PATH, statx_mask | STATX_TYPE, &sx); + if (r < 0) + return r; + + assert(sx.stx_mask & STATX_TYPE); + sx_valid = true; + + if (S_ISDIR(sx.stx_mode)) { + /* What? It's a directory now? Then someone must have quickly + * replaced it. Let's handle that gracefully: convert it to a + * directory fd — which should be riskless now that we pinned the + * inode. */ + + subdir_fd = fd_reopen(inode_fd, O_DIRECTORY|O_CLOEXEC); + if (subdir_fd < 0) + return subdir_fd; + + inode_fd = safe_close(inode_fd); + } + + } else if (statx_mask != 0 || (de->entries[i]->d_type == DT_UNKNOWN && (flags & RECURSE_DIR_ENSURE_TYPE))) { + + r = statx_fallback(dir_fd, de->entries[i]->d_name, AT_SYMLINK_NOFOLLOW, statx_mask | STATX_TYPE, &sx); + if (r == -ENOENT) /* Vanished by now? Go for next file immediately */ + continue; + if (r < 0) { + log_debug_errno(r, "Failed to stat directory entry '%s': %m", p); + + assert(errno <= RECURSE_DIR_SKIP_STAT_INODE_ERROR_MAX - RECURSE_DIR_SKIP_STAT_INODE_ERROR_BASE); + + r = func(RECURSE_DIR_SKIP_STAT_INODE_ERROR_BASE + -r, + p, + dir_fd, + -1, + de->entries[i], + NULL, + userdata); + if (r == RECURSE_DIR_LEAVE_DIRECTORY) + break; + if (!IN_SET(r, RECURSE_DIR_CONTINUE, RECURSE_DIR_SKIP_ENTRY)) + return r; + + continue; + } + + assert(sx.stx_mask & STATX_TYPE); + sx_valid = true; + + if (S_ISDIR(sx.stx_mode)) { + /* So it suddenly is a directory, but we couldn't open it as such + * earlier? That is weird, and probably means somebody is racing + * against us. We could of course retry and open it as a directory + * again, but the chance to win here is limited. Hence, let's + * propagate this as EISDIR error instead. That way we make this + * something that can be reasonably handled, even though we give the + * guarantee that RECURSE_DIR_ENTRY is strictly issued for + * non-directory dirents. */ + + log_debug_errno(r, "Non-directory entry '%s' suddenly became a directory: %m", p); + + r = func(RECURSE_DIR_SKIP_STAT_INODE_ERROR_BASE + EISDIR, + p, + dir_fd, + -1, + de->entries[i], + NULL, + userdata); + if (r == RECURSE_DIR_LEAVE_DIRECTORY) + break; + if (!IN_SET(r, RECURSE_DIR_CONTINUE, RECURSE_DIR_SKIP_ENTRY)) + return r; + + continue; + } + } + } + + if (sx_valid) { + /* Copy over the data we acquired through statx() if we acquired any */ + if (sx.stx_mask & STATX_TYPE) { + assert((subdir_fd < 0) == !S_ISDIR(sx.stx_mode)); + de->entries[i]->d_type = IFTODT(sx.stx_mode); + } + + if (sx.stx_mask & STATX_INO) + de->entries[i]->d_ino = sx.stx_ino; + } + + if (subdir_fd >= 0) { + if (FLAGS_SET(flags, RECURSE_DIR_SAME_MOUNT)) { + bool is_mount; + + if (sx_valid && FLAGS_SET(sx.stx_attributes_mask, STATX_ATTR_MOUNT_ROOT)) + is_mount = FLAGS_SET(sx.stx_attributes, STATX_ATTR_MOUNT_ROOT); + else { + r = fd_is_mount_point(dir_fd, de->entries[i]->d_name, 0); + if (r < 0) + log_debug_errno(r, "Failed to determine whether %s is a submount, assuming not: %m", p); + + is_mount = r > 0; + } + + if (is_mount) { + r = func(RECURSE_DIR_SKIP_MOUNT, + p, + dir_fd, + subdir_fd, + de->entries[i], + statx_mask != 0 ? &sx : NULL, /* only pass sx if user asked for it */ + userdata); + if (r == RECURSE_DIR_LEAVE_DIRECTORY) + break; + if (!IN_SET(r, RECURSE_DIR_CONTINUE, RECURSE_DIR_SKIP_ENTRY)) + return r; + + continue; + } + } + + if (n_depth_max <= 1) { + /* When we reached max depth, generate a special event */ + + r = func(RECURSE_DIR_SKIP_DEPTH, + p, + dir_fd, + subdir_fd, + de->entries[i], + statx_mask != 0 ? &sx : NULL, /* only pass sx if user asked for it */ + userdata); + if (r == RECURSE_DIR_LEAVE_DIRECTORY) + break; + if (!IN_SET(r, RECURSE_DIR_CONTINUE, RECURSE_DIR_SKIP_ENTRY)) + return r; + + continue; + } + + r = func(RECURSE_DIR_ENTER, + p, + dir_fd, + subdir_fd, + de->entries[i], + statx_mask != 0 ? &sx : NULL, /* only pass sx if user asked for it */ + userdata); + if (r == RECURSE_DIR_LEAVE_DIRECTORY) + break; + if (r == RECURSE_DIR_SKIP_ENTRY) + continue; + if (r != RECURSE_DIR_CONTINUE) + return r; + + r = recurse_dir(subdir_fd, + p, + statx_mask, + n_depth_max - 1, + flags, + func, + userdata); + if (r != 0) + return r; + + r = func(RECURSE_DIR_LEAVE, + p, + dir_fd, + subdir_fd, + de->entries[i], + statx_mask != 0 ? &sx : NULL, /* only pass sx if user asked for it */ + userdata); + } else + /* Non-directory inode */ + r = func(RECURSE_DIR_ENTRY, + p, + dir_fd, + inode_fd, + de->entries[i], + statx_mask != 0 ? &sx : NULL, /* only pass sx if user asked for it */ + userdata); + + + if (r == RECURSE_DIR_LEAVE_DIRECTORY) + break; + if (!IN_SET(r, RECURSE_DIR_SKIP_ENTRY, RECURSE_DIR_CONTINUE)) + return r; + } + + return 0; +} + +int recurse_dir_at( + int atfd, + const char *path, + unsigned statx_mask, + unsigned n_depth_max, + RecurseDirFlags flags, + recurse_dir_func_t func, + void *userdata) { + + _cleanup_close_ int fd = -1; + + assert(atfd >= 0 || atfd == AT_FDCWD); + assert(func); + + fd = openat(atfd, path ?: ".", O_DIRECTORY|O_CLOEXEC); + if (fd < 0) + return -errno; + + return recurse_dir(fd, path, statx_mask, n_depth_max, flags, func, userdata); +} diff --git a/src/basic/recurse-dir.h b/src/basic/recurse-dir.h new file mode 100644 index 0000000..779c91e --- /dev/null +++ b/src/basic/recurse-dir.h @@ -0,0 +1,80 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +#include +#include + +#include "errno-list.h" +#include "stat-util.h" +#include "macro.h" + +typedef enum RecurseDirEvent { + RECURSE_DIR_ENTER, /* only for dir inodes */ + RECURSE_DIR_LEAVE, /* only for dir inodes */ + RECURSE_DIR_ENTRY, /* only for non-dir inodes */ + RECURSE_DIR_SKIP_MOUNT, /* only for dir inodes: when we don't descent into submounts */ + RECURSE_DIR_SKIP_DEPTH, /* only for dir inodes: when we reached the max depth */ + + /* If we hit an error opening/stating an entry, then we'll fire a + * 'RECURSE_DIR_SKIP_{OPEN_DIR|OPEN_INODE|STAT_INODE}_ERROR_BASE + errno' event. In this case 'de' + * will be valid, but the statx data NULL and the inode fd -1. */ + RECURSE_DIR_SKIP_OPEN_DIR_ERROR_BASE, + RECURSE_DIR_SKIP_OPEN_DIR_ERROR_MAX = RECURSE_DIR_SKIP_OPEN_DIR_ERROR_BASE + ERRNO_MAX, + + RECURSE_DIR_SKIP_OPEN_INODE_ERROR_BASE, + RECURSE_DIR_SKIP_OPEN_INODE_ERROR_MAX = RECURSE_DIR_SKIP_OPEN_INODE_ERROR_BASE + ERRNO_MAX, + + RECURSE_DIR_SKIP_STAT_INODE_ERROR_BASE, + RECURSE_DIR_SKIP_STAT_INODE_ERROR_MAX = RECURSE_DIR_SKIP_STAT_INODE_ERROR_BASE + ERRNO_MAX, + + _RECURSE_DIR_EVENT_MAX, + _RECURSE_DIR_EVENT_INVALID = -EINVAL, +} RecurseDirEvent; + +#define RECURSE_DIR_CONTINUE 0 +#define RECURSE_DIR_LEAVE_DIRECTORY INT_MIN +#define RECURSE_DIR_SKIP_ENTRY (INT_MIN+1) + +/* Make sure that the negative errno range and these two special returns don't overlap */ +assert_cc(RECURSE_DIR_LEAVE_DIRECTORY < -ERRNO_MAX); +assert_cc(RECURSE_DIR_SKIP_ENTRY < -ERRNO_MAX); + +/* Prototype for the callback function that is called whenever we enter or leave a dir inode, or find another dir entry. Return values are: + * + * RECURSE_DIR_CONTINUE (i.e. 0) → continue with next entry + * RECURSE_DIR_LEAVE_DIRECTORY → leave current directory immediately, don't process further siblings + * RECURSE_DIR_SKIP_ENTRY → skip this entry otherwise (only makes sense on RECURSE_DIR_ENTER) + * others → terminate iteration entirely, return the specified value (idea is that + * < 0 indicates errors and > 0 indicates various forms of success) + */ +typedef int (*recurse_dir_func_t)( + RecurseDirEvent event, + const char *path, /* Full non-normalized path, i.e. the path specified during recurise_dir() with what we found appended */ + int dir_fd, /* fd of the current dir */ + int inode_fd, /* fd of the current entry in the current dir (O_DIRECTORY if directory, and O_PATH otherwise, but only if RECURSE_DIR_INODE_FD was set) */ + const struct dirent *de, /* directory entry (always valid) */ + const struct statx *sx, /* statx data (only if statx_mask was non-zero) */ + void *userdata); + +typedef enum RecurseDirFlags { + /* Interpreted by readdir_all() */ + RECURSE_DIR_SORT = 1 << 0, /* sort file directory entries before processing them */ + RECURSE_DIR_IGNORE_DOT = 1 << 1, /* ignore all dot files ("." and ".." are always ignored) */ + + /* Interpreted by recurse_dir() */ + RECURSE_DIR_ENSURE_TYPE = 1 << 2, /* guarantees that 'd_type' field of 'de' is not DT_UNKNOWN */ + RECURSE_DIR_SAME_MOUNT = 1 << 3, /* skips over subdirectories that are submounts */ + RECURSE_DIR_INODE_FD = 1 << 4, /* passes an opened inode fd (O_DIRECTORY fd in case of dirs, O_PATH otherwise) */ +} RecurseDirFlags; + +typedef struct DirectoryEntries { + size_t n_entries; + struct dirent** entries; + size_t buffer_size; + struct dirent buffer[]; +} DirectoryEntries; + +int readdir_all(int dir_fd, RecurseDirFlags flags, DirectoryEntries **ret); + +int recurse_dir(int dir_fd, const char *path, unsigned statx_mask, unsigned n_depth_max, RecurseDirFlags flags, recurse_dir_func_t func, void *userdata); +int recurse_dir_at(int atfd, const char *path, unsigned statx_mask, unsigned n_depth_max, RecurseDirFlags flags, recurse_dir_func_t func, void *userdata); diff --git a/src/basic/replace-var.c b/src/basic/replace-var.c new file mode 100644 index 0000000..01c26ce --- /dev/null +++ b/src/basic/replace-var.c @@ -0,0 +1,93 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include +#include +#include + +#include "alloc-util.h" +#include "macro.h" +#include "replace-var.h" +#include "string-util.h" + +/* + * Generic infrastructure for replacing @FOO@ style variables in + * strings. Will call a callback for each replacement. + */ + +static int get_variable(const char *b, char **r) { + size_t k; + char *t; + + assert(b); + assert(r); + + if (*b != '@') + return 0; + + k = strspn(b + 1, UPPERCASE_LETTERS "_"); + if (k <= 0 || b[k+1] != '@') + return 0; + + t = strndup(b + 1, k); + if (!t) + return -ENOMEM; + + *r = t; + return 1; +} + +char *replace_var(const char *text, char *(*lookup)(const char *variable, void *userdata), void *userdata) { + char *r, *t; + const char *f; + size_t l; + + assert(text); + assert(lookup); + + l = strlen(text); + r = new(char, l+1); + if (!r) + return NULL; + + f = text; + t = r; + while (*f) { + _cleanup_free_ char *v = NULL, *n = NULL; + char *a; + int k; + size_t skip, d, nl; + + k = get_variable(f, &v); + if (k < 0) + goto oom; + if (k == 0) { + *(t++) = *(f++); + continue; + } + + n = lookup(v, userdata); + if (!n) + goto oom; + + skip = strlen(v) + 2; + + d = t - r; + nl = l - skip + strlen(n); + a = realloc(r, nl + 1); + if (!a) + goto oom; + + l = nl; + r = a; + t = r + d; + + t = stpcpy(t, n); + f += skip; + } + + *t = 0; + return r; + +oom: + return mfree(r); +} diff --git a/src/basic/replace-var.h b/src/basic/replace-var.h new file mode 100644 index 0000000..644d9df --- /dev/null +++ b/src/basic/replace-var.h @@ -0,0 +1,4 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +char *replace_var(const char *text, char *(*lookup)(const char *variable, void *userdata), void *userdata); diff --git a/src/basic/rlimit-util.c b/src/basic/rlimit-util.c new file mode 100644 index 0000000..e150976 --- /dev/null +++ b/src/basic/rlimit-util.c @@ -0,0 +1,413 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include + +#include "alloc-util.h" +#include "errno-util.h" +#include "extract-word.h" +#include "fd-util.h" +#include "format-util.h" +#include "macro.h" +#include "missing_resource.h" +#include "rlimit-util.h" +#include "string-table.h" +#include "time-util.h" + +int setrlimit_closest(int resource, const struct rlimit *rlim) { + struct rlimit highest, fixed; + + assert(rlim); + + if (setrlimit(resource, rlim) >= 0) + return 0; + + if (errno != EPERM) + return -errno; + + /* So we failed to set the desired setrlimit, then let's try + * to get as close as we can */ + if (getrlimit(resource, &highest) < 0) + return -errno; + + /* If the hard limit is unbounded anyway, then the EPERM had other reasons, let's propagate the original EPERM + * then */ + if (highest.rlim_max == RLIM_INFINITY) + return -EPERM; + + fixed = (struct rlimit) { + .rlim_cur = MIN(rlim->rlim_cur, highest.rlim_max), + .rlim_max = MIN(rlim->rlim_max, highest.rlim_max), + }; + + /* Shortcut things if we wouldn't change anything. */ + if (fixed.rlim_cur == highest.rlim_cur && + fixed.rlim_max == highest.rlim_max) + return 0; + + log_debug("Failed at setting rlimit " RLIM_FMT " for resource RLIMIT_%s. Will attempt setting value " RLIM_FMT " instead.", rlim->rlim_max, rlimit_to_string(resource), fixed.rlim_max); + + return RET_NERRNO(setrlimit(resource, &fixed)); +} + +int setrlimit_closest_all(const struct rlimit *const *rlim, int *which_failed) { + int r; + + assert(rlim); + + /* On failure returns the limit's index that failed in *which_failed, but only if non-NULL */ + + for (int i = 0; i < _RLIMIT_MAX; i++) { + if (!rlim[i]) + continue; + + r = setrlimit_closest(i, rlim[i]); + if (r < 0) { + if (which_failed) + *which_failed = i; + + return r; + } + } + + if (which_failed) + *which_failed = -1; + + return 0; +} + +static int rlimit_parse_u64(const char *val, rlim_t *ret) { + uint64_t u; + int r; + + assert(val); + assert(ret); + + if (streq(val, "infinity")) { + *ret = RLIM_INFINITY; + return 0; + } + + /* setrlimit(2) suggests rlim_t is always 64bit on Linux. */ + assert_cc(sizeof(rlim_t) == sizeof(uint64_t)); + + r = safe_atou64(val, &u); + if (r < 0) + return r; + if (u >= (uint64_t) RLIM_INFINITY) + return -ERANGE; + + *ret = (rlim_t) u; + return 0; +} + +static int rlimit_parse_size(const char *val, rlim_t *ret) { + uint64_t u; + int r; + + assert(val); + assert(ret); + + if (streq(val, "infinity")) { + *ret = RLIM_INFINITY; + return 0; + } + + r = parse_size(val, 1024, &u); + if (r < 0) + return r; + if (u >= (uint64_t) RLIM_INFINITY) + return -ERANGE; + + *ret = (rlim_t) u; + return 0; +} + +static int rlimit_parse_sec(const char *val, rlim_t *ret) { + uint64_t u; + usec_t t; + int r; + + assert(val); + assert(ret); + + if (streq(val, "infinity")) { + *ret = RLIM_INFINITY; + return 0; + } + + r = parse_sec(val, &t); + if (r < 0) + return r; + if (t == USEC_INFINITY) { + *ret = RLIM_INFINITY; + return 0; + } + + u = (uint64_t) DIV_ROUND_UP(t, USEC_PER_SEC); + if (u >= (uint64_t) RLIM_INFINITY) + return -ERANGE; + + *ret = (rlim_t) u; + return 0; +} + +static int rlimit_parse_usec(const char *val, rlim_t *ret) { + usec_t t; + int r; + + assert(val); + assert(ret); + + if (streq(val, "infinity")) { + *ret = RLIM_INFINITY; + return 0; + } + + r = parse_time(val, &t, 1); + if (r < 0) + return r; + if (t == USEC_INFINITY) { + *ret = RLIM_INFINITY; + return 0; + } + + *ret = (rlim_t) t; + return 0; +} + +static int rlimit_parse_nice(const char *val, rlim_t *ret) { + uint64_t rl; + int r; + + /* So, Linux is weird. The range for RLIMIT_NICE is 40..1, mapping to the nice levels -20..19. However, the + * RLIMIT_NICE limit defaults to 0 by the kernel, i.e. a value that maps to nice level 20, which of course is + * bogus and does not exist. In order to permit parsing the RLIMIT_NICE of 0 here we hence implement a slight + * asymmetry: when parsing as positive nice level we permit 0..19. When parsing as negative nice level, we + * permit -20..0. But when parsing as raw resource limit value then we also allow the special value 0. + * + * Yeah, Linux is quality engineering sometimes... */ + + if (val[0] == '+') { + + /* Prefixed with "+": Parse as positive user-friendly nice value */ + r = safe_atou64(val + 1, &rl); + if (r < 0) + return r; + + if (rl >= PRIO_MAX) + return -ERANGE; + + rl = 20 - rl; + + } else if (val[0] == '-') { + + /* Prefixed with "-": Parse as negative user-friendly nice value */ + r = safe_atou64(val + 1, &rl); + if (r < 0) + return r; + + if (rl > (uint64_t) (-PRIO_MIN)) + return -ERANGE; + + rl = 20 + rl; + } else { + + /* Not prefixed: parse as raw resource limit value */ + r = safe_atou64(val, &rl); + if (r < 0) + return r; + + if (rl > (uint64_t) (20 - PRIO_MIN)) + return -ERANGE; + } + + *ret = (rlim_t) rl; + return 0; +} + +static int (*const rlimit_parse_table[_RLIMIT_MAX])(const char *val, rlim_t *ret) = { + [RLIMIT_CPU] = rlimit_parse_sec, + [RLIMIT_FSIZE] = rlimit_parse_size, + [RLIMIT_DATA] = rlimit_parse_size, + [RLIMIT_STACK] = rlimit_parse_size, + [RLIMIT_CORE] = rlimit_parse_size, + [RLIMIT_RSS] = rlimit_parse_size, + [RLIMIT_NOFILE] = rlimit_parse_u64, + [RLIMIT_AS] = rlimit_parse_size, + [RLIMIT_NPROC] = rlimit_parse_u64, + [RLIMIT_MEMLOCK] = rlimit_parse_size, + [RLIMIT_LOCKS] = rlimit_parse_u64, + [RLIMIT_SIGPENDING] = rlimit_parse_u64, + [RLIMIT_MSGQUEUE] = rlimit_parse_size, + [RLIMIT_NICE] = rlimit_parse_nice, + [RLIMIT_RTPRIO] = rlimit_parse_u64, + [RLIMIT_RTTIME] = rlimit_parse_usec, +}; + +int rlimit_parse_one(int resource, const char *val, rlim_t *ret) { + assert(val); + assert(ret); + + if (resource < 0) + return -EINVAL; + if (resource >= _RLIMIT_MAX) + return -EINVAL; + + return rlimit_parse_table[resource](val, ret); +} + +int rlimit_parse(int resource, const char *val, struct rlimit *ret) { + _cleanup_free_ char *hard = NULL, *soft = NULL; + rlim_t hl, sl; + int r; + + assert(val); + assert(ret); + + r = extract_first_word(&val, &soft, ":", EXTRACT_DONT_COALESCE_SEPARATORS); + if (r < 0) + return r; + if (r == 0) + return -EINVAL; + + r = rlimit_parse_one(resource, soft, &sl); + if (r < 0) + return r; + + r = extract_first_word(&val, &hard, ":", EXTRACT_DONT_COALESCE_SEPARATORS); + if (r < 0) + return r; + if (!isempty(val)) + return -EINVAL; + if (r == 0) + hl = sl; + else { + r = rlimit_parse_one(resource, hard, &hl); + if (r < 0) + return r; + if (sl > hl) + return -EILSEQ; + } + + *ret = (struct rlimit) { + .rlim_cur = sl, + .rlim_max = hl, + }; + + return 0; +} + +int rlimit_format(const struct rlimit *rl, char **ret) { + _cleanup_free_ char *s = NULL; + int r; + + assert(rl); + assert(ret); + + if (rl->rlim_cur >= RLIM_INFINITY && rl->rlim_max >= RLIM_INFINITY) + r = free_and_strdup(&s, "infinity"); + else if (rl->rlim_cur >= RLIM_INFINITY) + r = asprintf(&s, "infinity:" RLIM_FMT, rl->rlim_max); + else if (rl->rlim_max >= RLIM_INFINITY) + r = asprintf(&s, RLIM_FMT ":infinity", rl->rlim_cur); + else if (rl->rlim_cur == rl->rlim_max) + r = asprintf(&s, RLIM_FMT, rl->rlim_cur); + else + r = asprintf(&s, RLIM_FMT ":" RLIM_FMT, rl->rlim_cur, rl->rlim_max); + if (r < 0) + return -ENOMEM; + + *ret = TAKE_PTR(s); + return 0; +} + +static const char* const rlimit_table[_RLIMIT_MAX] = { + [RLIMIT_AS] = "AS", + [RLIMIT_CORE] = "CORE", + [RLIMIT_CPU] = "CPU", + [RLIMIT_DATA] = "DATA", + [RLIMIT_FSIZE] = "FSIZE", + [RLIMIT_LOCKS] = "LOCKS", + [RLIMIT_MEMLOCK] = "MEMLOCK", + [RLIMIT_MSGQUEUE] = "MSGQUEUE", + [RLIMIT_NICE] = "NICE", + [RLIMIT_NOFILE] = "NOFILE", + [RLIMIT_NPROC] = "NPROC", + [RLIMIT_RSS] = "RSS", + [RLIMIT_RTPRIO] = "RTPRIO", + [RLIMIT_RTTIME] = "RTTIME", + [RLIMIT_SIGPENDING] = "SIGPENDING", + [RLIMIT_STACK] = "STACK", +}; + +DEFINE_STRING_TABLE_LOOKUP(rlimit, int); + +int rlimit_from_string_harder(const char *s) { + const char *suffix; + + /* The official prefix */ + suffix = startswith(s, "RLIMIT_"); + if (suffix) + return rlimit_from_string(suffix); + + /* Our own unit file setting prefix */ + suffix = startswith(s, "Limit"); + if (suffix) + return rlimit_from_string(suffix); + + return rlimit_from_string(s); +} + +void rlimit_free_all(struct rlimit **rl) { + int i; + + if (!rl) + return; + + for (i = 0; i < _RLIMIT_MAX; i++) + rl[i] = mfree(rl[i]); +} + +int rlimit_nofile_bump(int limit) { + int r; + + /* Bumps the (soft) RLIMIT_NOFILE resource limit as close as possible to the specified limit. If a negative + * limit is specified, bumps it to the maximum the kernel and the hard resource limit allows. This call should + * be used by all our programs that might need a lot of fds, and that know how to deal with high fd numbers + * (i.e. do not use select() — which chokes on fds >= 1024) */ + + if (limit < 0) + limit = read_nr_open(); + + if (limit < 3) + limit = 3; + + r = setrlimit_closest(RLIMIT_NOFILE, &RLIMIT_MAKE_CONST(limit)); + if (r < 0) + return log_debug_errno(r, "Failed to set RLIMIT_NOFILE: %m"); + + return 0; +} + +int rlimit_nofile_safe(void) { + struct rlimit rl; + + /* Resets RLIMIT_NOFILE's soft limit FD_SETSIZE (i.e. 1024), for compatibility with software still using + * select() */ + + if (getrlimit(RLIMIT_NOFILE, &rl) < 0) + return log_debug_errno(errno, "Failed to query RLIMIT_NOFILE: %m"); + + if (rl.rlim_cur <= FD_SETSIZE) + return 0; + + /* So we might have inherited a hard limit that's larger than the kernel's maximum limit as stored in + * /proc/sys/fs/nr_open. If we pass this hard limit unmodified to setrlimit(), we'll get EPERM. To + * make sure that doesn't happen, let's limit our hard limit to the value from nr_open. */ + rl.rlim_max = MIN(rl.rlim_max, (rlim_t) read_nr_open()); + rl.rlim_cur = MIN((rlim_t) FD_SETSIZE, rl.rlim_max); + if (setrlimit(RLIMIT_NOFILE, &rl) < 0) + return log_debug_errno(errno, "Failed to lower RLIMIT_NOFILE's soft limit to " RLIM_FMT ": %m", rl.rlim_cur); + + return 1; +} diff --git a/src/basic/rlimit-util.h b/src/basic/rlimit-util.h new file mode 100644 index 0000000..59bc066 --- /dev/null +++ b/src/basic/rlimit-util.h @@ -0,0 +1,25 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +#include + +#include "macro.h" + +const char *rlimit_to_string(int i) _const_; +int rlimit_from_string(const char *s) _pure_; +int rlimit_from_string_harder(const char *s) _pure_; + +int setrlimit_closest(int resource, const struct rlimit *rlim); +int setrlimit_closest_all(const struct rlimit * const *rlim, int *which_failed); + +int rlimit_parse_one(int resource, const char *val, rlim_t *ret); +int rlimit_parse(int resource, const char *val, struct rlimit *ret); + +int rlimit_format(const struct rlimit *rl, char **ret); + +void rlimit_free_all(struct rlimit **rl); + +#define RLIMIT_MAKE_CONST(lim) ((struct rlimit) { lim, lim }) + +int rlimit_nofile_bump(int limit); +int rlimit_nofile_safe(void); diff --git a/src/basic/set.h b/src/basic/set.h new file mode 100644 index 0000000..618e729 --- /dev/null +++ b/src/basic/set.h @@ -0,0 +1,155 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +#include "extract-word.h" +#include "hashmap.h" +#include "macro.h" + +#define set_free_and_replace(a, b) \ + free_and_replace_full(a, b, set_free) + +Set* _set_new(const struct hash_ops *hash_ops HASHMAP_DEBUG_PARAMS); +#define set_new(ops) _set_new(ops HASHMAP_DEBUG_SRC_ARGS) + +static inline Set* set_free(Set *s) { + return (Set*) _hashmap_free(HASHMAP_BASE(s), NULL, NULL); +} + +static inline Set* set_free_free(Set *s) { + return (Set*) _hashmap_free(HASHMAP_BASE(s), free, NULL); +} + +/* no set_free_free_free */ + +#define set_copy(s) ((Set*) _hashmap_copy(HASHMAP_BASE(s) HASHMAP_DEBUG_SRC_ARGS)) + +int _set_ensure_allocated(Set **s, const struct hash_ops *hash_ops HASHMAP_DEBUG_PARAMS); +#define set_ensure_allocated(h, ops) _set_ensure_allocated(h, ops HASHMAP_DEBUG_SRC_ARGS) + +int set_put(Set *s, const void *key); +/* no set_update */ +/* no set_replace */ +static inline void *set_get(const Set *s, const void *key) { + return _hashmap_get(HASHMAP_BASE((Set *) s), key); +} +/* no set_get2 */ + +static inline bool set_contains(const Set *s, const void *key) { + return _hashmap_contains(HASHMAP_BASE((Set *) s), key); +} + +static inline void *set_remove(Set *s, const void *key) { + return _hashmap_remove(HASHMAP_BASE(s), key); +} + +/* no set_remove2 */ +/* no set_remove_value */ +int set_remove_and_put(Set *s, const void *old_key, const void *new_key); +/* no set_remove_and_replace */ +int set_merge(Set *s, Set *other); + +static inline int set_reserve(Set *h, unsigned entries_add) { + return _hashmap_reserve(HASHMAP_BASE(h), entries_add); +} + +static inline int set_move(Set *s, Set *other) { + return _hashmap_move(HASHMAP_BASE(s), HASHMAP_BASE(other)); +} + +static inline int set_move_one(Set *s, Set *other, const void *key) { + return _hashmap_move_one(HASHMAP_BASE(s), HASHMAP_BASE(other), key); +} + +static inline unsigned set_size(const Set *s) { + return _hashmap_size(HASHMAP_BASE((Set *) s)); +} + +static inline bool set_isempty(const Set *s) { + return set_size(s) == 0; +} + +static inline unsigned set_buckets(const Set *s) { + return _hashmap_buckets(HASHMAP_BASE((Set *) s)); +} + +static inline bool set_iterate(const Set *s, Iterator *i, void **value) { + return _hashmap_iterate(HASHMAP_BASE((Set*) s), i, value, NULL); +} + +static inline void set_clear(Set *s) { + _hashmap_clear(HASHMAP_BASE(s), NULL, NULL); +} + +static inline void set_clear_free(Set *s) { + _hashmap_clear(HASHMAP_BASE(s), free, NULL); +} + +/* no set_clear_free_free */ + +static inline void *set_steal_first(Set *s) { + return _hashmap_first_key_and_value(HASHMAP_BASE(s), true, NULL); +} + +#define set_clear_with_destructor(s, f) \ + ({ \ + Set *_s = (s); \ + void *_item; \ + while ((_item = set_steal_first(_s))) \ + f(_item); \ + _s; \ + }) +#define set_free_with_destructor(s, f) \ + set_free(set_clear_with_destructor(s, f)) + +/* no set_steal_first_key */ +/* no set_first_key */ + +static inline void *set_first(const Set *s) { + return _hashmap_first_key_and_value(HASHMAP_BASE((Set *) s), false, NULL); +} + +/* no set_next */ + +static inline char **set_get_strv(Set *s) { + return _hashmap_get_strv(HASHMAP_BASE(s)); +} + +int _set_ensure_put(Set **s, const struct hash_ops *hash_ops, const void *key HASHMAP_DEBUG_PARAMS); +#define set_ensure_put(s, hash_ops, key) _set_ensure_put(s, hash_ops, key HASHMAP_DEBUG_SRC_ARGS) + +int _set_ensure_consume(Set **s, const struct hash_ops *hash_ops, void *key HASHMAP_DEBUG_PARAMS); +#define set_ensure_consume(s, hash_ops, key) _set_ensure_consume(s, hash_ops, key HASHMAP_DEBUG_SRC_ARGS) + +int set_consume(Set *s, void *value); + +int _set_put_strndup_full(Set **s, const struct hash_ops *hash_ops, const char *p, size_t n HASHMAP_DEBUG_PARAMS); +#define set_put_strndup_full(s, hash_ops, p, n) _set_put_strndup_full(s, hash_ops, p, n HASHMAP_DEBUG_SRC_ARGS) +#define set_put_strdup_full(s, hash_ops, p) set_put_strndup_full(s, hash_ops, p, SIZE_MAX) +#define set_put_strndup(s, p, n) set_put_strndup_full(s, &string_hash_ops_free, p, n) +#define set_put_strdup(s, p) set_put_strndup(s, p, SIZE_MAX) + +int _set_put_strdupv_full(Set **s, const struct hash_ops *hash_ops, char **l HASHMAP_DEBUG_PARAMS); +#define set_put_strdupv_full(s, hash_ops, l) _set_put_strdupv_full(s, hash_ops, l HASHMAP_DEBUG_SRC_ARGS) +#define set_put_strdupv(s, l) set_put_strdupv_full(s, &string_hash_ops_free, l) + +int set_put_strsplit(Set *s, const char *v, const char *separators, ExtractFlags flags); + +#define _SET_FOREACH(e, s, i) \ + for (Iterator i = ITERATOR_FIRST; set_iterate((s), &i, (void**)&(e)); ) +#define SET_FOREACH(e, s) \ + _SET_FOREACH(e, s, UNIQ_T(i, UNIQ)) + +#define SET_FOREACH_MOVE(e, d, s) \ + for (; ({ e = set_first(s); assert_se(!e || set_move_one(d, s, e) >= 0); e; }); ) + +DEFINE_TRIVIAL_CLEANUP_FUNC(Set*, set_free); +DEFINE_TRIVIAL_CLEANUP_FUNC(Set*, set_free_free); + +#define _cleanup_set_free_ _cleanup_(set_freep) +#define _cleanup_set_free_free_ _cleanup_(set_free_freep) + +int set_strjoin(Set *s, const char *separator, bool wrap_with_separator, char **ret); + +bool set_equal(Set *a, Set *b); + +bool set_fnmatch(Set *include_patterns, Set *exclude_patterns, const char *needle); diff --git a/src/basic/sigbus.c b/src/basic/sigbus.c new file mode 100644 index 0000000..33e2045 --- /dev/null +++ b/src/basic/sigbus.c @@ -0,0 +1,151 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include +#include +#include +#include + +#include "macro.h" +#include "memory-util.h" +#include "missing_syscall.h" +#include "process-util.h" +#include "sigbus.h" + +#define SIGBUS_QUEUE_MAX 64 + +static struct sigaction old_sigaction; +static unsigned n_installed = 0; + +/* We maintain a fixed size list of page addresses that triggered a + SIGBUS. We access with list with atomic operations, so that we + don't have to deal with locks between signal handler and main + programs in possibly multiple threads. */ + +static void* volatile sigbus_queue[SIGBUS_QUEUE_MAX]; +static volatile sig_atomic_t n_sigbus_queue = 0; + +static void sigbus_push(void *addr) { + assert(addr); + + /* Find a free place, increase the number of entries and leave, if we can */ + for (size_t u = 0; u < SIGBUS_QUEUE_MAX; u++) { + /* OK to initialize this here since we haven't started the atomic ops yet */ + void *tmp = NULL; + if (__atomic_compare_exchange_n(&sigbus_queue[u], &tmp, addr, false, + __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST)) { + __atomic_fetch_add(&n_sigbus_queue, 1, __ATOMIC_SEQ_CST); + return; + } + } + + /* If we can't, make sure the queue size is out of bounds, to + * mark it as overflowed */ + for (;;) { + sig_atomic_t c; + + __atomic_thread_fence(__ATOMIC_SEQ_CST); + c = n_sigbus_queue; + + if (c > SIGBUS_QUEUE_MAX) /* already overflowed */ + return; + + /* OK if we clobber c here, since we either immediately return + * or it will be immediately reinitialized on next loop */ + if (__atomic_compare_exchange_n(&n_sigbus_queue, &c, c + SIGBUS_QUEUE_MAX, false, + __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST)) + return; + } +} + +int sigbus_pop(void **ret) { + assert(ret); + + for (;;) { + unsigned u, c; + + __atomic_thread_fence(__ATOMIC_SEQ_CST); + c = n_sigbus_queue; + + if (_likely_(c == 0)) + return 0; + + if (_unlikely_(c > SIGBUS_QUEUE_MAX)) + return -EOVERFLOW; + + for (u = 0; u < SIGBUS_QUEUE_MAX; u++) { + void *addr; + + addr = sigbus_queue[u]; + if (!addr) + continue; + + /* OK if we clobber addr here, since we either immediately return + * or it will be immediately reinitialized on next loop */ + if (__atomic_compare_exchange_n(&sigbus_queue[u], &addr, NULL, false, + __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST)) { + __atomic_fetch_sub(&n_sigbus_queue, 1, __ATOMIC_SEQ_CST); + /* If we successfully entered this if condition, addr won't + * have been modified since its assignment, so safe to use it */ + *ret = addr; + return 1; + } + } + } +} + +static void sigbus_handler(int sn, siginfo_t *si, void *data) { + unsigned long ul; + void *aligned; + + assert(sn == SIGBUS); + assert(si); + + if (si->si_code != BUS_ADRERR || !si->si_addr) { + assert_se(sigaction(SIGBUS, &old_sigaction, NULL) == 0); + rt_sigqueueinfo(getpid_cached(), SIGBUS, si); + return; + } + + ul = (unsigned long) si->si_addr; + ul = ul / page_size(); + ul = ul * page_size(); + aligned = (void*) ul; + + /* Let's remember which address failed */ + sigbus_push(aligned); + + /* Replace mapping with an anonymous page, so that the + * execution can continue, however with a zeroed out page */ + assert_se(mmap(aligned, page_size(), PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS|MAP_FIXED, -1, 0) == aligned); +} + +void sigbus_install(void) { + struct sigaction sa = { + .sa_sigaction = sigbus_handler, + .sa_flags = SA_SIGINFO, + }; + + /* make sure that sysconf() is not called from a signal handler because + * it is not guaranteed to be async-signal-safe since POSIX.1-2008 */ + (void) page_size(); + + n_installed++; + + if (n_installed == 1) + assert_se(sigaction(SIGBUS, &sa, &old_sigaction) == 0); + + return; +} + +void sigbus_reset(void) { + + if (n_installed <= 0) + return; + + n_installed--; + + if (n_installed == 0) + assert_se(sigaction(SIGBUS, &old_sigaction, NULL) == 0); + + return; +} diff --git a/src/basic/sigbus.h b/src/basic/sigbus.h new file mode 100644 index 0000000..a40b1a8 --- /dev/null +++ b/src/basic/sigbus.h @@ -0,0 +1,7 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +void sigbus_install(void); +void sigbus_reset(void); + +int sigbus_pop(void **ret); diff --git a/src/basic/signal-util.c b/src/basic/signal-util.c new file mode 100644 index 0000000..fdbe7f4 --- /dev/null +++ b/src/basic/signal-util.c @@ -0,0 +1,285 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include +#include + +#include "errno-util.h" +#include "macro.h" +#include "missing_threads.h" +#include "parse-util.h" +#include "signal-util.h" +#include "stdio-util.h" +#include "string-table.h" +#include "string-util.h" + +int reset_all_signal_handlers(void) { + static const struct sigaction sa = { + .sa_handler = SIG_DFL, + .sa_flags = SA_RESTART, + }; + int r = 0; + + for (int sig = 1; sig < _NSIG; sig++) { + + /* These two cannot be caught... */ + if (IN_SET(sig, SIGKILL, SIGSTOP)) + continue; + + /* On Linux the first two RT signals are reserved by + * glibc, and sigaction() will return EINVAL for them. */ + if (sigaction(sig, &sa, NULL) < 0) + if (errno != EINVAL && r >= 0) + r = -errno; + } + + return r; +} + +int reset_signal_mask(void) { + sigset_t ss; + + if (sigemptyset(&ss) < 0) + return -errno; + + return RET_NERRNO(sigprocmask(SIG_SETMASK, &ss, NULL)); +} + +int sigaction_many_internal(const struct sigaction *sa, ...) { + int sig, r = 0; + va_list ap; + + va_start(ap, sa); + + /* negative signal ends the list. 0 signal is skipped. */ + while ((sig = va_arg(ap, int)) >= 0) { + + if (sig == 0) + continue; + + if (sigaction(sig, sa, NULL) < 0) { + if (r >= 0) + r = -errno; + } + } + + va_end(ap); + + return r; +} + +static int sigset_add_many_ap(sigset_t *ss, va_list ap) { + int sig, r = 0; + + assert(ss); + + while ((sig = va_arg(ap, int)) >= 0) { + + if (sig == 0) + continue; + + if (sigaddset(ss, sig) < 0) { + if (r >= 0) + r = -errno; + } + } + + return r; +} + +int sigset_add_many(sigset_t *ss, ...) { + va_list ap; + int r; + + va_start(ap, ss); + r = sigset_add_many_ap(ss, ap); + va_end(ap); + + return r; +} + +int sigprocmask_many(int how, sigset_t *old, ...) { + va_list ap; + sigset_t ss; + int r; + + if (sigemptyset(&ss) < 0) + return -errno; + + va_start(ap, old); + r = sigset_add_many_ap(&ss, ap); + va_end(ap); + + if (r < 0) + return r; + + if (sigprocmask(how, &ss, old) < 0) + return -errno; + + return 0; +} + +static const char *const static_signal_table[] = { + [SIGHUP] = "HUP", + [SIGINT] = "INT", + [SIGQUIT] = "QUIT", + [SIGILL] = "ILL", + [SIGTRAP] = "TRAP", + [SIGABRT] = "ABRT", + [SIGBUS] = "BUS", + [SIGFPE] = "FPE", + [SIGKILL] = "KILL", + [SIGUSR1] = "USR1", + [SIGSEGV] = "SEGV", + [SIGUSR2] = "USR2", + [SIGPIPE] = "PIPE", + [SIGALRM] = "ALRM", + [SIGTERM] = "TERM", +#ifdef SIGSTKFLT + [SIGSTKFLT] = "STKFLT", /* Linux on SPARC doesn't know SIGSTKFLT */ +#endif + [SIGCHLD] = "CHLD", + [SIGCONT] = "CONT", + [SIGSTOP] = "STOP", + [SIGTSTP] = "TSTP", + [SIGTTIN] = "TTIN", + [SIGTTOU] = "TTOU", + [SIGURG] = "URG", + [SIGXCPU] = "XCPU", + [SIGXFSZ] = "XFSZ", + [SIGVTALRM] = "VTALRM", + [SIGPROF] = "PROF", + [SIGWINCH] = "WINCH", + [SIGIO] = "IO", + [SIGPWR] = "PWR", + [SIGSYS] = "SYS" +}; + +DEFINE_PRIVATE_STRING_TABLE_LOOKUP(static_signal, int); + +const char *signal_to_string(int signo) { + static thread_local char buf[STRLEN("RTMIN+") + DECIMAL_STR_MAX(int)]; + const char *name; + + name = static_signal_to_string(signo); + if (name) + return name; + + if (signo >= SIGRTMIN && signo <= SIGRTMAX) + xsprintf(buf, "RTMIN+%d", signo - SIGRTMIN); + else + xsprintf(buf, "%d", signo); + + return buf; +} + +int signal_from_string(const char *s) { + const char *p; + int signo, r; + + /* Check that the input is a signal number. */ + if (safe_atoi(s, &signo) >= 0) { + if (SIGNAL_VALID(signo)) + return signo; + else + return -ERANGE; + } + + /* Drop "SIG" prefix. */ + if (startswith(s, "SIG")) + s += 3; + + /* Check that the input is a signal name. */ + signo = static_signal_from_string(s); + if (signo > 0) + return signo; + + /* Check that the input is RTMIN or + * RTMIN+n (0 <= n <= SIGRTMAX-SIGRTMIN). */ + p = startswith(s, "RTMIN"); + if (p) { + if (*p == '\0') + return SIGRTMIN; + if (*p != '+') + return -EINVAL; + + r = safe_atoi(p, &signo); + if (r < 0) + return r; + + if (signo < 0 || signo > SIGRTMAX - SIGRTMIN) + return -ERANGE; + + return signo + SIGRTMIN; + } + + /* Check that the input is RTMAX or + * RTMAX-n (0 <= n <= SIGRTMAX-SIGRTMIN). */ + p = startswith(s, "RTMAX"); + if (p) { + if (*p == '\0') + return SIGRTMAX; + if (*p != '-') + return -EINVAL; + + r = safe_atoi(p, &signo); + if (r < 0) + return r; + + if (signo > 0 || signo < SIGRTMIN - SIGRTMAX) + return -ERANGE; + + return signo + SIGRTMAX; + } + + return -EINVAL; +} + +void nop_signal_handler(int sig) { + /* nothing here */ +} + +int signal_is_blocked(int sig) { + sigset_t ss; + int r; + + r = pthread_sigmask(SIG_SETMASK, NULL, &ss); + if (r != 0) + return -r; + + return RET_NERRNO(sigismember(&ss, sig)); +} + +int pop_pending_signal_internal(int sig, ...) { + sigset_t ss; + va_list ap; + int r; + + if (sig < 0) /* Empty list? */ + return -EINVAL; + + if (sigemptyset(&ss) < 0) + return -errno; + + /* Add first signal (if the signal is zero, we'll silently skip it, to make it easier to build + * parameter lists where some element are sometimes off, similar to how sigset_add_many_ap() handles + * this.) */ + if (sig > 0 && sigaddset(&ss, sig) < 0) + return -errno; + + /* Add all other signals */ + va_start(ap, sig); + r = sigset_add_many_ap(&ss, ap); + va_end(ap); + if (r < 0) + return r; + + r = sigtimedwait(&ss, NULL, &(struct timespec) { 0, 0 }); + if (r < 0) { + if (errno == EAGAIN) + return 0; + + return -errno; + } + + return r; /* Returns the signal popped */ +} diff --git a/src/basic/signal-util.h b/src/basic/signal-util.h new file mode 100644 index 0000000..36372c1 --- /dev/null +++ b/src/basic/signal-util.h @@ -0,0 +1,67 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +#include + +#include "macro.h" + +int reset_all_signal_handlers(void); +int reset_signal_mask(void); + +int sigaction_many_internal(const struct sigaction *sa, ...); + +#define ignore_signals(...) \ + sigaction_many_internal( \ + &(const struct sigaction) { \ + .sa_handler = SIG_IGN, \ + .sa_flags = SA_RESTART \ + }, \ + __VA_ARGS__, \ + -1) + +#define default_signals(...) \ + sigaction_many_internal( \ + &(const struct sigaction) { \ + .sa_handler = SIG_DFL, \ + .sa_flags = SA_RESTART \ + }, \ + __VA_ARGS__, \ + -1) + +#define sigaction_many(sa, ...) \ + sigaction_many_internal(sa, __VA_ARGS__, -1) + +int sigset_add_many(sigset_t *ss, ...); +int sigprocmask_many(int how, sigset_t *old, ...); + +const char *signal_to_string(int i) _const_; +int signal_from_string(const char *s) _pure_; + +void nop_signal_handler(int sig); + +static inline void block_signals_reset(sigset_t *ss) { + assert_se(sigprocmask(SIG_SETMASK, ss, NULL) >= 0); +} + +#define BLOCK_SIGNALS(...) \ + _cleanup_(block_signals_reset) _unused_ sigset_t _saved_sigset = ({ \ + sigset_t _t; \ + assert_se(sigprocmask_many(SIG_BLOCK, &_t, __VA_ARGS__, -1) >= 0); \ + _t; \ + }) + +static inline bool SIGNAL_VALID(int signo) { + return signo > 0 && signo < _NSIG; +} + +static inline const char* signal_to_string_with_check(int n) { + if (!SIGNAL_VALID(n)) + return NULL; + + return signal_to_string(n); +} + +int signal_is_blocked(int sig); + +int pop_pending_signal_internal(int sig, ...); +#define pop_pending_signal(...) pop_pending_signal_internal(__VA_ARGS__, -1) diff --git a/src/basic/siphash24.c b/src/basic/siphash24.c new file mode 100644 index 0000000..b614ecf --- /dev/null +++ b/src/basic/siphash24.c @@ -0,0 +1,201 @@ +/* SPDX-License-Identifier: CC0-1.0 */ + +/* + SipHash reference C implementation + + Written in 2012 by + Jean-Philippe Aumasson + Daniel J. Bernstein + + To the extent possible under law, the author(s) have dedicated all copyright + and related and neighboring rights to this software to the public domain + worldwide. This software is distributed without any warranty. + + You should have received a copy of the CC0 Public Domain Dedication along with + this software. If not, see . + + (Minimal changes made by Lennart Poettering, to make clean for inclusion in systemd) + (Refactored by Tom Gundersen to split up in several functions and follow systemd + coding style) +*/ + +#include + +#include "macro.h" +#include "siphash24.h" +#include "unaligned.h" + +static uint64_t rotate_left(uint64_t x, uint8_t b) { + assert(b < 64); + + return (x << b) | (x >> (64 - b)); +} + +static void sipround(struct siphash *state) { + assert(state); + + state->v0 += state->v1; + state->v1 = rotate_left(state->v1, 13); + state->v1 ^= state->v0; + state->v0 = rotate_left(state->v0, 32); + state->v2 += state->v3; + state->v3 = rotate_left(state->v3, 16); + state->v3 ^= state->v2; + state->v0 += state->v3; + state->v3 = rotate_left(state->v3, 21); + state->v3 ^= state->v0; + state->v2 += state->v1; + state->v1 = rotate_left(state->v1, 17); + state->v1 ^= state->v2; + state->v2 = rotate_left(state->v2, 32); +} + +void siphash24_init(struct siphash *state, const uint8_t k[static 16]) { + uint64_t k0, k1; + + assert(state); + assert(k); + + k0 = unaligned_read_le64(k); + k1 = unaligned_read_le64(k + 8); + + *state = (struct siphash) { + /* "somepseudorandomlygeneratedbytes" */ + .v0 = 0x736f6d6570736575ULL ^ k0, + .v1 = 0x646f72616e646f6dULL ^ k1, + .v2 = 0x6c7967656e657261ULL ^ k0, + .v3 = 0x7465646279746573ULL ^ k1, + .padding = 0, + .inlen = 0, + }; +} + +void siphash24_compress(const void *_in, size_t inlen, struct siphash *state) { + + const uint8_t *in = ASSERT_PTR(_in); + const uint8_t *end = in + inlen; + size_t left = state->inlen & 7; + uint64_t m; + + assert(state); + + /* Update total length */ + state->inlen += inlen; + + /* If padding exists, fill it out */ + if (left > 0) { + for ( ; in < end && left < 8; in ++, left ++) + state->padding |= ((uint64_t) *in) << (left * 8); + + if (in == end && left < 8) + /* We did not have enough input to fill out the padding completely */ + return; + +#if ENABLE_DEBUG_SIPHASH + printf("(%3zu) v0 %08x %08x\n", state->inlen, (uint32_t) (state->v0 >> 32), (uint32_t) state->v0); + printf("(%3zu) v1 %08x %08x\n", state->inlen, (uint32_t) (state->v1 >> 32), (uint32_t) state->v1); + printf("(%3zu) v2 %08x %08x\n", state->inlen, (uint32_t) (state->v2 >> 32), (uint32_t) state->v2); + printf("(%3zu) v3 %08x %08x\n", state->inlen, (uint32_t) (state->v3 >> 32), (uint32_t) state->v3); + printf("(%3zu) compress padding %08x %08x\n", state->inlen, (uint32_t) (state->padding >> 32), (uint32_t)state->padding); +#endif + + state->v3 ^= state->padding; + sipround(state); + sipround(state); + state->v0 ^= state->padding; + + state->padding = 0; + } + + end -= (state->inlen % sizeof(uint64_t)); + + for ( ; in < end; in += 8) { + m = unaligned_read_le64(in); +#if ENABLE_DEBUG_SIPHASH + printf("(%3zu) v0 %08x %08x\n", state->inlen, (uint32_t) (state->v0 >> 32), (uint32_t) state->v0); + printf("(%3zu) v1 %08x %08x\n", state->inlen, (uint32_t) (state->v1 >> 32), (uint32_t) state->v1); + printf("(%3zu) v2 %08x %08x\n", state->inlen, (uint32_t) (state->v2 >> 32), (uint32_t) state->v2); + printf("(%3zu) v3 %08x %08x\n", state->inlen, (uint32_t) (state->v3 >> 32), (uint32_t) state->v3); + printf("(%3zu) compress %08x %08x\n", state->inlen, (uint32_t) (m >> 32), (uint32_t) m); +#endif + state->v3 ^= m; + sipround(state); + sipround(state); + state->v0 ^= m; + } + + left = state->inlen & 7; + switch (left) { + case 7: + state->padding |= ((uint64_t) in[6]) << 48; + _fallthrough_; + case 6: + state->padding |= ((uint64_t) in[5]) << 40; + _fallthrough_; + case 5: + state->padding |= ((uint64_t) in[4]) << 32; + _fallthrough_; + case 4: + state->padding |= ((uint64_t) in[3]) << 24; + _fallthrough_; + case 3: + state->padding |= ((uint64_t) in[2]) << 16; + _fallthrough_; + case 2: + state->padding |= ((uint64_t) in[1]) << 8; + _fallthrough_; + case 1: + state->padding |= ((uint64_t) in[0]); + _fallthrough_; + case 0: + break; + } +} + +uint64_t siphash24_finalize(struct siphash *state) { + uint64_t b; + + assert(state); + + b = state->padding | (((uint64_t) state->inlen) << 56); + +#if ENABLE_DEBUG_SIPHASH + printf("(%3zu) v0 %08x %08x\n", state->inlen, (uint32_t) (state->v0 >> 32), (uint32_t) state->v0); + printf("(%3zu) v1 %08x %08x\n", state->inlen, (uint32_t) (state->v1 >> 32), (uint32_t) state->v1); + printf("(%3zu) v2 %08x %08x\n", state->inlen, (uint32_t) (state->v2 >> 32), (uint32_t) state->v2); + printf("(%3zu) v3 %08x %08x\n", state->inlen, (uint32_t) (state->v3 >> 32), (uint32_t) state->v3); + printf("(%3zu) padding %08x %08x\n", state->inlen, (uint32_t) (state->padding >> 32), (uint32_t) state->padding); +#endif + + state->v3 ^= b; + sipround(state); + sipround(state); + state->v0 ^= b; + +#if ENABLE_DEBUG_SIPHASH + printf("(%3zu) v0 %08x %08x\n", state->inlen, (uint32_t) (state->v0 >> 32), (uint32_t) state->v0); + printf("(%3zu) v1 %08x %08x\n", state->inlen, (uint32_t) (state->v1 >> 32), (uint32_t) state->v1); + printf("(%3zu) v2 %08x %08x\n", state->inlen, (uint32_t) (state->v2 >> 32), (uint32_t) state->v2); + printf("(%3zu) v3 %08x %08x\n", state->inlen, (uint32_t) (state->v3 >> 32), (uint32_t) state->v3); +#endif + state->v2 ^= 0xff; + + sipround(state); + sipround(state); + sipround(state); + sipround(state); + + return state->v0 ^ state->v1 ^ state->v2 ^ state->v3; +} + +uint64_t siphash24(const void *in, size_t inlen, const uint8_t k[static 16]) { + struct siphash state; + + assert(in); + assert(k); + + siphash24_init(&state, k); + siphash24_compress(in, inlen, &state); + + return siphash24_finalize(&state); +} diff --git a/src/basic/siphash24.h b/src/basic/siphash24.h new file mode 100644 index 0000000..0b3e845 --- /dev/null +++ b/src/basic/siphash24.h @@ -0,0 +1,53 @@ +/* SPDX-License-Identifier: CC0-1.0 */ + +#pragma once + +#include +#include +#include +#include + +#include "string-util.h" +#include "time-util.h" + +struct siphash { + uint64_t v0; + uint64_t v1; + uint64_t v2; + uint64_t v3; + uint64_t padding; + size_t inlen; +}; + +void siphash24_init(struct siphash *state, const uint8_t k[static 16]); +void siphash24_compress(const void *in, size_t inlen, struct siphash *state); +#define siphash24_compress_byte(byte, state) siphash24_compress((const uint8_t[]) { (byte) }, 1, (state)) + +static inline void siphash24_compress_boolean(bool in, struct siphash *state) { + uint8_t i = in; + + siphash24_compress(&i, sizeof i, state); +} + +static inline void siphash24_compress_usec_t(usec_t in, struct siphash *state) { + siphash24_compress(&in, sizeof in, state); +} + +static inline void siphash24_compress_safe(const void *in, size_t inlen, struct siphash *state) { + if (inlen == 0) + return; + + siphash24_compress(in, inlen, state); +} + +static inline void siphash24_compress_string(const char *in, struct siphash *state) { + siphash24_compress_safe(in, strlen_ptr(in), state); +} + +uint64_t siphash24_finalize(struct siphash *state); + +uint64_t siphash24(const void *in, size_t inlen, const uint8_t k[static 16]); + +static inline uint64_t siphash24_string(const char *s, const uint8_t k[static 16]) { + return siphash24(s, strlen(s) + 1, k); +} diff --git a/src/basic/socket-util.c b/src/basic/socket-util.c new file mode 100644 index 0000000..cefbbcd --- /dev/null +++ b/src/basic/socket-util.c @@ -0,0 +1,1470 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "alloc-util.h" +#include "errno-util.h" +#include "escape.h" +#include "fd-util.h" +#include "fileio.h" +#include "format-util.h" +#include "io-util.h" +#include "log.h" +#include "memory-util.h" +#include "parse-util.h" +#include "path-util.h" +#include "process-util.h" +#include "socket-util.h" +#include "string-table.h" +#include "string-util.h" +#include "strv.h" +#include "sysctl-util.h" +#include "user-util.h" +#include "utf8.h" + +#if ENABLE_IDN +# define IDN_FLAGS NI_IDN +#else +# define IDN_FLAGS 0 +#endif + +static const char* const socket_address_type_table[] = { + [SOCK_STREAM] = "Stream", + [SOCK_DGRAM] = "Datagram", + [SOCK_RAW] = "Raw", + [SOCK_RDM] = "ReliableDatagram", + [SOCK_SEQPACKET] = "SequentialPacket", + [SOCK_DCCP] = "DatagramCongestionControl", +}; + +DEFINE_STRING_TABLE_LOOKUP(socket_address_type, int); + +int socket_address_verify(const SocketAddress *a, bool strict) { + assert(a); + + /* With 'strict' we enforce additional sanity constraints which are not set by the standard, + * but should only apply to sockets we create ourselves. */ + + switch (socket_address_family(a)) { + + case AF_INET: + if (a->size != sizeof(struct sockaddr_in)) + return -EINVAL; + + if (a->sockaddr.in.sin_port == 0) + return -EINVAL; + + if (!IN_SET(a->type, 0, SOCK_STREAM, SOCK_DGRAM)) + return -EINVAL; + + return 0; + + case AF_INET6: + if (a->size != sizeof(struct sockaddr_in6)) + return -EINVAL; + + if (a->sockaddr.in6.sin6_port == 0) + return -EINVAL; + + if (!IN_SET(a->type, 0, SOCK_STREAM, SOCK_DGRAM)) + return -EINVAL; + + return 0; + + case AF_UNIX: + if (a->size < offsetof(struct sockaddr_un, sun_path)) + return -EINVAL; + if (a->size > sizeof(struct sockaddr_un) + !strict) + /* If !strict, allow one extra byte, since getsockname() on Linux will append + * a NUL byte if we have path sockets that are above sun_path's full size. */ + return -EINVAL; + + if (a->size > offsetof(struct sockaddr_un, sun_path) && + a->sockaddr.un.sun_path[0] != 0 && + strict) { + /* Only validate file system sockets here, and only in strict mode */ + const char *e; + + e = memchr(a->sockaddr.un.sun_path, 0, sizeof(a->sockaddr.un.sun_path)); + if (e) { + /* If there's an embedded NUL byte, make sure the size of the socket address matches it */ + if (a->size != offsetof(struct sockaddr_un, sun_path) + (e - a->sockaddr.un.sun_path) + 1) + return -EINVAL; + } else { + /* If there's no embedded NUL byte, then the size needs to match the whole + * structure or the structure with one extra NUL byte suffixed. (Yeah, Linux is awful, + * and considers both equivalent: getsockname() even extends sockaddr_un beyond its + * size if the path is non NUL terminated.) */ + if (!IN_SET(a->size, sizeof(a->sockaddr.un.sun_path), sizeof(a->sockaddr.un.sun_path)+1)) + return -EINVAL; + } + } + + if (!IN_SET(a->type, 0, SOCK_STREAM, SOCK_DGRAM, SOCK_SEQPACKET)) + return -EINVAL; + + return 0; + + case AF_NETLINK: + + if (a->size != sizeof(struct sockaddr_nl)) + return -EINVAL; + + if (!IN_SET(a->type, 0, SOCK_RAW, SOCK_DGRAM)) + return -EINVAL; + + return 0; + + case AF_VSOCK: + if (a->size != sizeof(struct sockaddr_vm)) + return -EINVAL; + + if (!IN_SET(a->type, 0, SOCK_STREAM, SOCK_DGRAM)) + return -EINVAL; + + return 0; + + default: + return -EAFNOSUPPORT; + } +} + +int socket_address_print(const SocketAddress *a, char **ret) { + int r; + + assert(a); + assert(ret); + + r = socket_address_verify(a, false); /* We do non-strict validation, because we want to be + * able to pretty-print any socket the kernel considers + * valid. We still need to do validation to know if we + * can meaningfully print the address. */ + if (r < 0) + return r; + + if (socket_address_family(a) == AF_NETLINK) { + _cleanup_free_ char *sfamily = NULL; + + r = netlink_family_to_string_alloc(a->protocol, &sfamily); + if (r < 0) + return r; + + r = asprintf(ret, "%s %u", sfamily, a->sockaddr.nl.nl_groups); + if (r < 0) + return -ENOMEM; + + return 0; + } + + return sockaddr_pretty(&a->sockaddr.sa, a->size, false, true, ret); +} + +bool socket_address_can_accept(const SocketAddress *a) { + assert(a); + + return + IN_SET(a->type, SOCK_STREAM, SOCK_SEQPACKET); +} + +bool socket_address_equal(const SocketAddress *a, const SocketAddress *b) { + assert(a); + assert(b); + + /* Invalid addresses are unequal to all */ + if (socket_address_verify(a, false) < 0 || + socket_address_verify(b, false) < 0) + return false; + + if (a->type != b->type) + return false; + + if (socket_address_family(a) != socket_address_family(b)) + return false; + + switch (socket_address_family(a)) { + + case AF_INET: + if (a->sockaddr.in.sin_addr.s_addr != b->sockaddr.in.sin_addr.s_addr) + return false; + + if (a->sockaddr.in.sin_port != b->sockaddr.in.sin_port) + return false; + + break; + + case AF_INET6: + if (memcmp(&a->sockaddr.in6.sin6_addr, &b->sockaddr.in6.sin6_addr, sizeof(a->sockaddr.in6.sin6_addr)) != 0) + return false; + + if (a->sockaddr.in6.sin6_port != b->sockaddr.in6.sin6_port) + return false; + + break; + + case AF_UNIX: + if (a->size <= offsetof(struct sockaddr_un, sun_path) || + b->size <= offsetof(struct sockaddr_un, sun_path)) + return false; + + if ((a->sockaddr.un.sun_path[0] == 0) != (b->sockaddr.un.sun_path[0] == 0)) + return false; + + if (a->sockaddr.un.sun_path[0]) { + if (!path_equal_or_files_same(a->sockaddr.un.sun_path, b->sockaddr.un.sun_path, 0)) + return false; + } else { + if (a->size != b->size) + return false; + + if (memcmp(a->sockaddr.un.sun_path, b->sockaddr.un.sun_path, a->size) != 0) + return false; + } + + break; + + case AF_NETLINK: + if (a->protocol != b->protocol) + return false; + + if (a->sockaddr.nl.nl_groups != b->sockaddr.nl.nl_groups) + return false; + + break; + + case AF_VSOCK: + if (a->sockaddr.vm.svm_cid != b->sockaddr.vm.svm_cid) + return false; + + if (a->sockaddr.vm.svm_port != b->sockaddr.vm.svm_port) + return false; + + break; + + default: + /* Cannot compare, so we assume the addresses are different */ + return false; + } + + return true; +} + +const char* socket_address_get_path(const SocketAddress *a) { + assert(a); + + if (socket_address_family(a) != AF_UNIX) + return NULL; + + if (a->sockaddr.un.sun_path[0] == 0) + return NULL; + + /* Note that this is only safe because we know that there's an extra NUL byte after the sockaddr_un + * structure. On Linux AF_UNIX file system socket addresses don't have to be NUL terminated if they take up the + * full sun_path space. */ + assert_cc(sizeof(union sockaddr_union) >= sizeof(struct sockaddr_un)+1); + return a->sockaddr.un.sun_path; +} + +bool socket_ipv6_is_supported(void) { + static int cached = -1; + + if (cached < 0) { + + if (access("/proc/net/if_inet6", F_OK) < 0) { + + if (errno != ENOENT) { + log_debug_errno(errno, "Unexpected error when checking whether /proc/net/if_inet6 exists: %m"); + return false; + } + + cached = false; + } else + cached = true; + } + + return cached; +} + +bool socket_ipv6_is_enabled(void) { + _cleanup_free_ char *v = NULL; + int r; + + /* Much like socket_ipv6_is_supported(), but also checks that the sysctl that disables IPv6 on all + * interfaces isn't turned on */ + + if (!socket_ipv6_is_supported()) + return false; + + r = sysctl_read_ip_property(AF_INET6, "all", "disable_ipv6", &v); + if (r < 0) { + log_debug_errno(r, "Unexpected error reading 'net.ipv6.conf.all.disable_ipv6' sysctl: %m"); + return true; + } + + r = parse_boolean(v); + if (r < 0) { + log_debug_errno(r, "Failed to pare 'net.ipv6.conf.all.disable_ipv6' sysctl: %m"); + return true; + } + + return !r; +} + +bool socket_address_matches_fd(const SocketAddress *a, int fd) { + SocketAddress b; + socklen_t solen; + + assert(a); + assert(fd >= 0); + + b.size = sizeof(b.sockaddr); + if (getsockname(fd, &b.sockaddr.sa, &b.size) < 0) + return false; + + if (b.sockaddr.sa.sa_family != a->sockaddr.sa.sa_family) + return false; + + solen = sizeof(b.type); + if (getsockopt(fd, SOL_SOCKET, SO_TYPE, &b.type, &solen) < 0) + return false; + + if (b.type != a->type) + return false; + + if (a->protocol != 0) { + solen = sizeof(b.protocol); + if (getsockopt(fd, SOL_SOCKET, SO_PROTOCOL, &b.protocol, &solen) < 0) + return false; + + if (b.protocol != a->protocol) + return false; + } + + return socket_address_equal(a, &b); +} + +int sockaddr_port(const struct sockaddr *_sa, unsigned *ret_port) { + const union sockaddr_union *sa = (const union sockaddr_union*) _sa; + + /* Note, this returns the port as 'unsigned' rather than 'uint16_t', as AF_VSOCK knows larger ports */ + + assert(sa); + + switch (sa->sa.sa_family) { + + case AF_INET: + *ret_port = be16toh(sa->in.sin_port); + return 0; + + case AF_INET6: + *ret_port = be16toh(sa->in6.sin6_port); + return 0; + + case AF_VSOCK: + *ret_port = sa->vm.svm_port; + return 0; + + default: + return -EAFNOSUPPORT; + } +} + +const union in_addr_union *sockaddr_in_addr(const struct sockaddr *_sa) { + const union sockaddr_union *sa = (const union sockaddr_union*) _sa; + + if (!sa) + return NULL; + + switch (sa->sa.sa_family) { + + case AF_INET: + return (const union in_addr_union*) &sa->in.sin_addr; + + case AF_INET6: + return (const union in_addr_union*) &sa->in6.sin6_addr; + + default: + return NULL; + } +} + +int sockaddr_set_in_addr( + union sockaddr_union *u, + int family, + const union in_addr_union *a, + uint16_t port) { + + assert(u); + assert(a); + + switch (family) { + + case AF_INET: + u->in = (struct sockaddr_in) { + .sin_family = AF_INET, + .sin_addr = a->in, + .sin_port = htobe16(port), + }; + + return 0; + + case AF_INET6: + u->in6 = (struct sockaddr_in6) { + .sin6_family = AF_INET6, + .sin6_addr = a->in6, + .sin6_port = htobe16(port), + }; + + return 0; + + default: + return -EAFNOSUPPORT; + + } +} + +int sockaddr_pretty( + const struct sockaddr *_sa, + socklen_t salen, + bool translate_ipv6, + bool include_port, + char **ret) { + + union sockaddr_union *sa = (union sockaddr_union*) _sa; + char *p; + int r; + + assert(sa); + assert(salen >= sizeof(sa->sa.sa_family)); + + switch (sa->sa.sa_family) { + + case AF_INET: { + uint32_t a; + + a = be32toh(sa->in.sin_addr.s_addr); + + if (include_port) + r = asprintf(&p, + "%u.%u.%u.%u:%u", + a >> 24, (a >> 16) & 0xFF, (a >> 8) & 0xFF, a & 0xFF, + be16toh(sa->in.sin_port)); + else + r = asprintf(&p, + "%u.%u.%u.%u", + a >> 24, (a >> 16) & 0xFF, (a >> 8) & 0xFF, a & 0xFF); + if (r < 0) + return -ENOMEM; + break; + } + + case AF_INET6: { + static const unsigned char ipv4_prefix[] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0xFF, 0xFF + }; + + if (translate_ipv6 && + memcmp(&sa->in6.sin6_addr, ipv4_prefix, sizeof(ipv4_prefix)) == 0) { + const uint8_t *a = sa->in6.sin6_addr.s6_addr+12; + if (include_port) + r = asprintf(&p, + "%u.%u.%u.%u:%u", + a[0], a[1], a[2], a[3], + be16toh(sa->in6.sin6_port)); + else + r = asprintf(&p, + "%u.%u.%u.%u", + a[0], a[1], a[2], a[3]); + if (r < 0) + return -ENOMEM; + } else { + const char *a = IN6_ADDR_TO_STRING(&sa->in6.sin6_addr); + + if (include_port) { + if (asprintf(&p, + "[%s]:%u%s%s", + a, + be16toh(sa->in6.sin6_port), + sa->in6.sin6_scope_id != 0 ? "%" : "", + FORMAT_IFNAME_FULL(sa->in6.sin6_scope_id, FORMAT_IFNAME_IFINDEX)) < 0) + return -ENOMEM; + } else { + if (sa->in6.sin6_scope_id != 0) + p = strjoin(a, "%", FORMAT_IFNAME_FULL(sa->in6.sin6_scope_id, FORMAT_IFNAME_IFINDEX)); + else + p = strdup(a); + if (!p) + return -ENOMEM; + } + } + + break; + } + + case AF_UNIX: + if (salen <= offsetof(struct sockaddr_un, sun_path) || + (sa->un.sun_path[0] == 0 && salen == offsetof(struct sockaddr_un, sun_path) + 1)) + /* The name must have at least one character (and the leading NUL does not count) */ + p = strdup(""); + else { + /* Note that we calculate the path pointer here through the .un_buffer[] field, in order to + * outtrick bounds checking tools such as ubsan, which are too smart for their own good: on + * Linux the kernel may return sun_path[] data one byte longer than the declared size of the + * field. */ + char *path = (char*) sa->un_buffer + offsetof(struct sockaddr_un, sun_path); + size_t path_len = salen - offsetof(struct sockaddr_un, sun_path); + + if (path[0] == 0) { + /* Abstract socket. When parsing address information from, we + * explicitly reject overly long paths and paths with embedded NULs. + * But we might get such a socket from the outside. Let's return + * something meaningful and printable in this case. */ + + _cleanup_free_ char *e = NULL; + + e = cescape_length(path + 1, path_len - 1); + if (!e) + return -ENOMEM; + + p = strjoin("@", e); + } else { + if (path[path_len - 1] == '\0') + /* We expect a terminating NUL and don't print it */ + path_len --; + + p = cescape_length(path, path_len); + } + } + if (!p) + return -ENOMEM; + + break; + + case AF_VSOCK: + if (include_port) { + if (sa->vm.svm_cid == VMADDR_CID_ANY) + r = asprintf(&p, "vsock::%u", sa->vm.svm_port); + else + r = asprintf(&p, "vsock:%u:%u", sa->vm.svm_cid, sa->vm.svm_port); + } else + r = asprintf(&p, "vsock:%u", sa->vm.svm_cid); + if (r < 0) + return -ENOMEM; + break; + + default: + return -EOPNOTSUPP; + } + + *ret = p; + return 0; +} + +int getpeername_pretty(int fd, bool include_port, char **ret) { + union sockaddr_union sa; + socklen_t salen = sizeof(sa); + int r; + + assert(fd >= 0); + assert(ret); + + if (getpeername(fd, &sa.sa, &salen) < 0) + return -errno; + + if (sa.sa.sa_family == AF_UNIX) { + struct ucred ucred = UCRED_INVALID; + + /* UNIX connection sockets are anonymous, so let's use + * PID/UID as pretty credentials instead */ + + r = getpeercred(fd, &ucred); + if (r < 0) + return r; + + if (asprintf(ret, "PID "PID_FMT"/UID "UID_FMT, ucred.pid, ucred.uid) < 0) + return -ENOMEM; + + return 0; + } + + /* For remote sockets we translate IPv6 addresses back to IPv4 + * if applicable, since that's nicer. */ + + return sockaddr_pretty(&sa.sa, salen, true, include_port, ret); +} + +int getsockname_pretty(int fd, char **ret) { + union sockaddr_union sa; + socklen_t salen = sizeof(sa); + + assert(fd >= 0); + assert(ret); + + if (getsockname(fd, &sa.sa, &salen) < 0) + return -errno; + + /* For local sockets we do not translate IPv6 addresses back + * to IPv6 if applicable, since this is usually used for + * listening sockets where the difference between IPv4 and + * IPv6 matters. */ + + return sockaddr_pretty(&sa.sa, salen, false, true, ret); +} + +int socknameinfo_pretty(union sockaddr_union *sa, socklen_t salen, char **_ret) { + int r; + char host[NI_MAXHOST], *ret; + + assert(_ret); + + r = getnameinfo(&sa->sa, salen, host, sizeof(host), NULL, 0, IDN_FLAGS); + if (r != 0) { + int saved_errno = errno; + + r = sockaddr_pretty(&sa->sa, salen, true, true, &ret); + if (r < 0) + return r; + + log_debug_errno(saved_errno, "getnameinfo(%s) failed: %m", ret); + } else { + ret = strdup(host); + if (!ret) + return -ENOMEM; + } + + *_ret = ret; + return 0; +} + +static const char* const netlink_family_table[] = { + [NETLINK_ROUTE] = "route", + [NETLINK_FIREWALL] = "firewall", + [NETLINK_INET_DIAG] = "inet-diag", + [NETLINK_NFLOG] = "nflog", + [NETLINK_XFRM] = "xfrm", + [NETLINK_SELINUX] = "selinux", + [NETLINK_ISCSI] = "iscsi", + [NETLINK_AUDIT] = "audit", + [NETLINK_FIB_LOOKUP] = "fib-lookup", + [NETLINK_CONNECTOR] = "connector", + [NETLINK_NETFILTER] = "netfilter", + [NETLINK_IP6_FW] = "ip6-fw", + [NETLINK_DNRTMSG] = "dnrtmsg", + [NETLINK_KOBJECT_UEVENT] = "kobject-uevent", + [NETLINK_GENERIC] = "generic", + [NETLINK_SCSITRANSPORT] = "scsitransport", + [NETLINK_ECRYPTFS] = "ecryptfs", + [NETLINK_RDMA] = "rdma", +}; + +DEFINE_STRING_TABLE_LOOKUP_WITH_FALLBACK(netlink_family, int, INT_MAX); + +static const char* const socket_address_bind_ipv6_only_table[_SOCKET_ADDRESS_BIND_IPV6_ONLY_MAX] = { + [SOCKET_ADDRESS_DEFAULT] = "default", + [SOCKET_ADDRESS_BOTH] = "both", + [SOCKET_ADDRESS_IPV6_ONLY] = "ipv6-only" +}; + +DEFINE_STRING_TABLE_LOOKUP(socket_address_bind_ipv6_only, SocketAddressBindIPv6Only); + +SocketAddressBindIPv6Only socket_address_bind_ipv6_only_or_bool_from_string(const char *n) { + int r; + + r = parse_boolean(n); + if (r > 0) + return SOCKET_ADDRESS_IPV6_ONLY; + if (r == 0) + return SOCKET_ADDRESS_BOTH; + + return socket_address_bind_ipv6_only_from_string(n); +} + +bool sockaddr_equal(const union sockaddr_union *a, const union sockaddr_union *b) { + assert(a); + assert(b); + + if (a->sa.sa_family != b->sa.sa_family) + return false; + + if (a->sa.sa_family == AF_INET) + return a->in.sin_addr.s_addr == b->in.sin_addr.s_addr; + + if (a->sa.sa_family == AF_INET6) + return memcmp(&a->in6.sin6_addr, &b->in6.sin6_addr, sizeof(a->in6.sin6_addr)) == 0; + + if (a->sa.sa_family == AF_VSOCK) + return a->vm.svm_cid == b->vm.svm_cid; + + return false; +} + +int fd_set_sndbuf(int fd, size_t n, bool increase) { + int r, value; + socklen_t l = sizeof(value); + + if (n > INT_MAX) + return -ERANGE; + + r = getsockopt(fd, SOL_SOCKET, SO_SNDBUF, &value, &l); + if (r >= 0 && l == sizeof(value) && increase ? (size_t) value >= n*2 : (size_t) value == n*2) + return 0; + + /* First, try to set the buffer size with SO_SNDBUF. */ + r = setsockopt_int(fd, SOL_SOCKET, SO_SNDBUF, n); + if (r < 0) + return r; + + /* SO_SNDBUF above may set to the kernel limit, instead of the requested size. + * So, we need to check the actual buffer size here. */ + l = sizeof(value); + r = getsockopt(fd, SOL_SOCKET, SO_SNDBUF, &value, &l); + if (r >= 0 && l == sizeof(value) && increase ? (size_t) value >= n*2 : (size_t) value == n*2) + return 1; + + /* If we have the privileges we will ignore the kernel limit. */ + r = setsockopt_int(fd, SOL_SOCKET, SO_SNDBUFFORCE, n); + if (r < 0) + return r; + + return 1; +} + +int fd_set_rcvbuf(int fd, size_t n, bool increase) { + int r, value; + socklen_t l = sizeof(value); + + if (n > INT_MAX) + return -ERANGE; + + r = getsockopt(fd, SOL_SOCKET, SO_RCVBUF, &value, &l); + if (r >= 0 && l == sizeof(value) && increase ? (size_t) value >= n*2 : (size_t) value == n*2) + return 0; + + /* First, try to set the buffer size with SO_RCVBUF. */ + r = setsockopt_int(fd, SOL_SOCKET, SO_RCVBUF, n); + if (r < 0) + return r; + + /* SO_RCVBUF above may set to the kernel limit, instead of the requested size. + * So, we need to check the actual buffer size here. */ + l = sizeof(value); + r = getsockopt(fd, SOL_SOCKET, SO_RCVBUF, &value, &l); + if (r >= 0 && l == sizeof(value) && increase ? (size_t) value >= n*2 : (size_t) value == n*2) + return 1; + + /* If we have the privileges we will ignore the kernel limit. */ + r = setsockopt_int(fd, SOL_SOCKET, SO_RCVBUFFORCE, n); + if (r < 0) + return r; + + return 1; +} + +static const char* const ip_tos_table[] = { + [IPTOS_LOWDELAY] = "low-delay", + [IPTOS_THROUGHPUT] = "throughput", + [IPTOS_RELIABILITY] = "reliability", + [IPTOS_LOWCOST] = "low-cost", +}; + +DEFINE_STRING_TABLE_LOOKUP_WITH_FALLBACK(ip_tos, int, 0xff); + +bool ifname_valid_char(char a) { + if ((unsigned char) a >= 127U) + return false; + + if ((unsigned char) a <= 32U) + return false; + + if (IN_SET(a, + ':', /* colons are used by the legacy "alias" interface logic */ + '/', /* slashes cannot work, since we need to use network interfaces in sysfs paths, and in paths slashes are separators */ + '%')) /* %d is used in the kernel's weird foo%d format string naming feature which we really really don't want to ever run into by accident */ + return false; + + return true; +} + +bool ifname_valid_full(const char *p, IfnameValidFlags flags) { + bool numeric = true; + + /* Checks whether a network interface name is valid. This is inspired by dev_valid_name() in the kernel sources + * but slightly stricter, as we only allow non-control, non-space ASCII characters in the interface name. We + * also don't permit names that only container numbers, to avoid confusion with numeric interface indexes. */ + + assert(!(flags & ~_IFNAME_VALID_ALL)); + + if (isempty(p)) + return false; + + /* A valid ifindex? If so, it's valid iff IFNAME_VALID_NUMERIC is set */ + if (parse_ifindex(p) >= 0) + return flags & IFNAME_VALID_NUMERIC; + + if (flags & IFNAME_VALID_ALTERNATIVE) { + if (strlen(p) >= ALTIFNAMSIZ) + return false; + } else { + if (strlen(p) >= IFNAMSIZ) + return false; + } + + if (dot_or_dot_dot(p)) + return false; + + /* Let's refuse "all" and "default" as interface name, to avoid collisions with the special sysctl + * directories /proc/sys/net/{ipv4,ipv6}/conf/{all,default} */ + if (!FLAGS_SET(flags, IFNAME_VALID_SPECIAL) && STR_IN_SET(p, "all", "default")) + return false; + + for (const char *t = p; *t; t++) { + if (!ifname_valid_char(*t)) + return false; + + numeric = numeric && ascii_isdigit(*t); + } + + /* It's fully numeric but didn't parse as valid ifindex above? if so, it must be too large or zero or + * so, let's refuse that. */ + if (numeric) + return false; + + return true; +} + +bool address_label_valid(const char *p) { + + if (isempty(p)) + return false; + + if (strlen(p) >= IFNAMSIZ) + return false; + + while (*p) { + if ((uint8_t) *p >= 127U) + return false; + + if ((uint8_t) *p <= 31U) + return false; + p++; + } + + return true; +} + +int getpeercred(int fd, struct ucred *ucred) { + socklen_t n = sizeof(struct ucred); + struct ucred u; + int r; + + assert(fd >= 0); + assert(ucred); + + r = getsockopt(fd, SOL_SOCKET, SO_PEERCRED, &u, &n); + if (r < 0) + return -errno; + + if (n != sizeof(struct ucred)) + return -EIO; + + /* Check if the data is actually useful and not suppressed due to namespacing issues */ + if (!pid_is_valid(u.pid)) + return -ENODATA; + + /* Note that we don't check UID/GID here, as namespace translation works differently there: instead of + * receiving in "invalid" user/group we get the overflow UID/GID. */ + + *ucred = u; + return 0; +} + +int getpeersec(int fd, char **ret) { + _cleanup_free_ char *s = NULL; + socklen_t n = 64; + + assert(fd >= 0); + assert(ret); + + for (;;) { + s = new0(char, n+1); + if (!s) + return -ENOMEM; + + if (getsockopt(fd, SOL_SOCKET, SO_PEERSEC, s, &n) >= 0) + break; + + if (errno != ERANGE) + return -errno; + + s = mfree(s); + } + + if (isempty(s)) + return -EOPNOTSUPP; + + *ret = TAKE_PTR(s); + + return 0; +} + +int getpeergroups(int fd, gid_t **ret) { + socklen_t n = sizeof(gid_t) * 64; + _cleanup_free_ gid_t *d = NULL; + + assert(fd >= 0); + assert(ret); + + for (;;) { + d = malloc(n); + if (!d) + return -ENOMEM; + + if (getsockopt(fd, SOL_SOCKET, SO_PEERGROUPS, d, &n) >= 0) + break; + + if (errno != ERANGE) + return -errno; + + d = mfree(d); + } + + assert_se(n % sizeof(gid_t) == 0); + n /= sizeof(gid_t); + + if ((socklen_t) (int) n != n) + return -E2BIG; + + *ret = TAKE_PTR(d); + + return (int) n; +} + +ssize_t send_one_fd_iov_sa( + int transport_fd, + int fd, + const struct iovec *iov, size_t iovlen, + const struct sockaddr *sa, socklen_t len, + int flags) { + + CMSG_BUFFER_TYPE(CMSG_SPACE(sizeof(int))) control = {}; + struct msghdr mh = { + .msg_name = (struct sockaddr*) sa, + .msg_namelen = len, + .msg_iov = (struct iovec *)iov, + .msg_iovlen = iovlen, + }; + ssize_t k; + + assert(transport_fd >= 0); + + /* + * We need either an FD or data to send. + * If there's nothing, return an error. + */ + if (fd < 0 && !iov) + return -EINVAL; + + if (fd >= 0) { + struct cmsghdr *cmsg; + + mh.msg_control = &control; + mh.msg_controllen = sizeof(control); + + cmsg = CMSG_FIRSTHDR(&mh); + cmsg->cmsg_level = SOL_SOCKET; + cmsg->cmsg_type = SCM_RIGHTS; + cmsg->cmsg_len = CMSG_LEN(sizeof(int)); + memcpy(CMSG_DATA(cmsg), &fd, sizeof(int)); + } + k = sendmsg(transport_fd, &mh, MSG_NOSIGNAL | flags); + if (k < 0) + return (ssize_t) -errno; + + return k; +} + +int send_one_fd_sa( + int transport_fd, + int fd, + const struct sockaddr *sa, socklen_t len, + int flags) { + + assert(fd >= 0); + + return (int) send_one_fd_iov_sa(transport_fd, fd, NULL, 0, sa, len, flags); +} + +ssize_t receive_one_fd_iov( + int transport_fd, + struct iovec *iov, size_t iovlen, + int flags, + int *ret_fd) { + + CMSG_BUFFER_TYPE(CMSG_SPACE(sizeof(int))) control; + struct msghdr mh = { + .msg_control = &control, + .msg_controllen = sizeof(control), + .msg_iov = iov, + .msg_iovlen = iovlen, + }; + struct cmsghdr *found; + ssize_t k; + + assert(transport_fd >= 0); + assert(ret_fd); + + /* + * Receive a single FD via @transport_fd. We don't care for + * the transport-type. We retrieve a single FD at most, so for + * packet-based transports, the caller must ensure to send + * only a single FD per packet. This is best used in + * combination with send_one_fd(). + */ + + k = recvmsg_safe(transport_fd, &mh, MSG_CMSG_CLOEXEC | flags); + if (k < 0) + return k; + + found = cmsg_find(&mh, SOL_SOCKET, SCM_RIGHTS, CMSG_LEN(sizeof(int))); + if (!found) { + cmsg_close_all(&mh); + + /* If didn't receive an FD or any data, return an error. */ + if (k == 0) + return -EIO; + } + + if (found) + *ret_fd = *(int*) CMSG_DATA(found); + else + *ret_fd = -1; + + return k; +} + +int receive_one_fd(int transport_fd, int flags) { + int fd; + ssize_t k; + + k = receive_one_fd_iov(transport_fd, NULL, 0, flags, &fd); + if (k == 0) + return fd; + + /* k must be negative, since receive_one_fd_iov() only returns + * a positive value if data was received through the iov. */ + assert(k < 0); + return (int) k; +} + +ssize_t next_datagram_size_fd(int fd) { + ssize_t l; + int k; + + /* This is a bit like FIONREAD/SIOCINQ, however a bit more powerful. The difference being: recv(MSG_PEEK) will + * actually cause the next datagram in the queue to be validated regarding checksums, which FIONREAD doesn't + * do. This difference is actually of major importance as we need to be sure that the size returned here + * actually matches what we will read with recvmsg() next, as otherwise we might end up allocating a buffer of + * the wrong size. */ + + l = recv(fd, NULL, 0, MSG_PEEK|MSG_TRUNC); + if (l < 0) { + if (IN_SET(errno, EOPNOTSUPP, EFAULT)) + goto fallback; + + return -errno; + } + if (l == 0) + goto fallback; + + return l; + +fallback: + k = 0; + + /* Some sockets (AF_PACKET) do not support null-sized recv() with MSG_TRUNC set, let's fall back to FIONREAD + * for them. Checksums don't matter for raw sockets anyway, hence this should be fine. */ + + if (ioctl(fd, FIONREAD, &k) < 0) + return -errno; + + return (ssize_t) k; +} + +/* Put a limit on how many times will attempt to call accept4(). We loop + * only on "transient" errors, but let's make sure we don't loop forever. */ +#define MAX_FLUSH_ITERATIONS 1024 + +int flush_accept(int fd) { + + int r, b; + socklen_t l = sizeof(b); + + /* Similar to flush_fd() but flushes all incoming connections by accepting and immediately closing + * them. */ + + if (getsockopt(fd, SOL_SOCKET, SO_ACCEPTCONN, &b, &l) < 0) + return -errno; + + assert(l == sizeof(b)); + if (!b) /* Let's check if this socket accepts connections before calling accept(). accept4() can + * return EOPNOTSUPP if the fd is not a listening socket, which we should treat as a fatal + * error, or in case the incoming TCP connection triggered a network issue, which we want to + * treat as a transient error. Thus, let's rule out the first reason for EOPNOTSUPP early, so + * we can loop safely on transient errors below. */ + return -ENOTTY; + + for (unsigned iteration = 0;; iteration++) { + int cfd; + + r = fd_wait_for_event(fd, POLLIN, 0); + if (r < 0) { + if (r == -EINTR) + continue; + + return r; + } + if (r == 0) + return 0; + + if (iteration >= MAX_FLUSH_ITERATIONS) + return log_debug_errno(SYNTHETIC_ERRNO(EBUSY), + "Failed to flush connections within " STRINGIFY(MAX_FLUSH_ITERATIONS) " iterations."); + + cfd = accept4(fd, NULL, NULL, SOCK_NONBLOCK|SOCK_CLOEXEC); + if (cfd < 0) { + if (errno == EAGAIN) + return 0; + + if (ERRNO_IS_ACCEPT_AGAIN(errno)) + continue; + + return -errno; + } + + safe_close(cfd); + } +} + +struct cmsghdr* cmsg_find(struct msghdr *mh, int level, int type, socklen_t length) { + struct cmsghdr *cmsg; + + assert(mh); + + CMSG_FOREACH(cmsg, mh) + if (cmsg->cmsg_level == level && + cmsg->cmsg_type == type && + (length == (socklen_t) -1 || length == cmsg->cmsg_len)) + return cmsg; + + return NULL; +} + +int socket_ioctl_fd(void) { + int fd; + + /* Create a socket to invoke the various network interface ioctl()s on. Traditionally only AF_INET was good for + * that. Since kernel 4.6 AF_NETLINK works for this too. We first try to use AF_INET hence, but if that's not + * available (for example, because it is made unavailable via SECCOMP or such), we'll fall back to the more + * generic AF_NETLINK. */ + + fd = socket(AF_INET, SOCK_DGRAM|SOCK_CLOEXEC, 0); + if (fd < 0) + fd = socket(AF_NETLINK, SOCK_RAW|SOCK_CLOEXEC, NETLINK_GENERIC); + if (fd < 0) + return -errno; + + return fd; +} + +int sockaddr_un_unlink(const struct sockaddr_un *sa) { + const char *p, * nul; + + assert(sa); + + if (sa->sun_family != AF_UNIX) + return -EPROTOTYPE; + + if (sa->sun_path[0] == 0) /* Nothing to do for abstract sockets */ + return 0; + + /* The path in .sun_path is not necessarily NUL terminated. Let's fix that. */ + nul = memchr(sa->sun_path, 0, sizeof(sa->sun_path)); + if (nul) + p = sa->sun_path; + else + p = memdupa_suffix0(sa->sun_path, sizeof(sa->sun_path)); + + if (unlink(p) < 0) + return -errno; + + return 1; +} + +int sockaddr_un_set_path(struct sockaddr_un *ret, const char *path) { + size_t l; + + assert(ret); + assert(path); + + /* Initialize ret->sun_path from the specified argument. This will interpret paths starting with '@' as + * abstract namespace sockets, and those starting with '/' as regular filesystem sockets. It won't accept + * anything else (i.e. no relative paths), to avoid ambiguities. Note that this function cannot be used to + * reference paths in the abstract namespace that include NUL bytes in the name. */ + + l = strlen(path); + if (l < 2) + return -EINVAL; + if (!IN_SET(path[0], '/', '@')) + return -EINVAL; + + /* Don't allow paths larger than the space in sockaddr_un. Note that we are a tiny bit more restrictive than + * the kernel is: we insist on NUL termination (both for abstract namespace and regular file system socket + * addresses!), which the kernel doesn't. We do this to reduce chance of incompatibility with other apps that + * do not expect non-NUL terminated file system path. */ + if (l+1 > sizeof(ret->sun_path)) + return path[0] == '@' ? -EINVAL : -ENAMETOOLONG; /* return a recognizable error if this is + * too long to fit into a sockaddr_un, but + * is a file system path, and thus might be + * connectible via O_PATH indirection. */ + + *ret = (struct sockaddr_un) { + .sun_family = AF_UNIX, + }; + + if (path[0] == '@') { + /* Abstract namespace socket */ + memcpy(ret->sun_path + 1, path + 1, l); /* copy *with* trailing NUL byte */ + return (int) (offsetof(struct sockaddr_un, sun_path) + l); /* 🔥 *don't* 🔥 include trailing NUL in size */ + + } else { + assert(path[0] == '/'); + + /* File system socket */ + memcpy(ret->sun_path, path, l + 1); /* copy *with* trailing NUL byte */ + return (int) (offsetof(struct sockaddr_un, sun_path) + l + 1); /* include trailing NUL in size */ + } +} + +int socket_bind_to_ifname(int fd, const char *ifname) { + assert(fd >= 0); + + /* Call with NULL to drop binding */ + + return RET_NERRNO(setsockopt(fd, SOL_SOCKET, SO_BINDTODEVICE, ifname, strlen_ptr(ifname))); +} + +int socket_bind_to_ifindex(int fd, int ifindex) { + char ifname[IF_NAMESIZE]; + int r; + + assert(fd >= 0); + + if (ifindex <= 0) + /* Drop binding */ + return RET_NERRNO(setsockopt(fd, SOL_SOCKET, SO_BINDTODEVICE, NULL, 0)); + + r = setsockopt_int(fd, SOL_SOCKET, SO_BINDTOIFINDEX, ifindex); + if (r != -ENOPROTOOPT) + return r; + + /* Fall back to SO_BINDTODEVICE on kernels < 5.0 which didn't have SO_BINDTOIFINDEX */ + r = format_ifname(ifindex, ifname); + if (r < 0) + return r; + + return socket_bind_to_ifname(fd, ifname); +} + +ssize_t recvmsg_safe(int sockfd, struct msghdr *msg, int flags) { + ssize_t n; + + /* A wrapper around recvmsg() that checks for MSG_CTRUNC, and turns it into an error, in a reasonably + * safe way, closing any SCM_RIGHTS fds in the error path. + * + * Note that unlike our usual coding style this might modify *msg on failure. */ + + n = recvmsg(sockfd, msg, flags); + if (n < 0) + return -errno; + + if (FLAGS_SET(msg->msg_flags, MSG_CTRUNC)) { + cmsg_close_all(msg); + return -EXFULL; /* a recognizable error code */ + } + + return n; +} + +int socket_get_family(int fd) { + int af; + socklen_t sl = sizeof(af); + + if (getsockopt(fd, SOL_SOCKET, SO_DOMAIN, &af, &sl) < 0) + return -errno; + + if (sl != sizeof(af)) + return -EINVAL; + + return af; +} + +int socket_set_recvpktinfo(int fd, int af, bool b) { + + if (af == AF_UNSPEC) { + af = socket_get_family(fd); + if (af < 0) + return af; + } + + switch (af) { + + case AF_INET: + return setsockopt_int(fd, IPPROTO_IP, IP_PKTINFO, b); + + case AF_INET6: + return setsockopt_int(fd, IPPROTO_IPV6, IPV6_RECVPKTINFO, b); + + case AF_NETLINK: + return setsockopt_int(fd, SOL_NETLINK, NETLINK_PKTINFO, b); + + case AF_PACKET: + return setsockopt_int(fd, SOL_PACKET, PACKET_AUXDATA, b); + + default: + return -EAFNOSUPPORT; + } +} + +int socket_set_unicast_if(int fd, int af, int ifi) { + be32_t ifindex_be = htobe32(ifi); + + if (af == AF_UNSPEC) { + af = socket_get_family(fd); + if (af < 0) + return af; + } + + switch (af) { + + case AF_INET: + return RET_NERRNO(setsockopt(fd, IPPROTO_IP, IP_UNICAST_IF, &ifindex_be, sizeof(ifindex_be))); + + case AF_INET6: + return RET_NERRNO(setsockopt(fd, IPPROTO_IPV6, IPV6_UNICAST_IF, &ifindex_be, sizeof(ifindex_be))); + + default: + return -EAFNOSUPPORT; + } +} + +int socket_set_option(int fd, int af, int opt_ipv4, int opt_ipv6, int val) { + if (af == AF_UNSPEC) { + af = socket_get_family(fd); + if (af < 0) + return af; + } + + switch (af) { + + case AF_INET: + return setsockopt_int(fd, IPPROTO_IP, opt_ipv4, val); + + case AF_INET6: + return setsockopt_int(fd, IPPROTO_IPV6, opt_ipv6, val); + + default: + return -EAFNOSUPPORT; + } +} + +int socket_get_mtu(int fd, int af, size_t *ret) { + int mtu, r; + + if (af == AF_UNSPEC) { + af = socket_get_family(fd); + if (af < 0) + return af; + } + + switch (af) { + + case AF_INET: + r = getsockopt_int(fd, IPPROTO_IP, IP_MTU, &mtu); + break; + + case AF_INET6: + r = getsockopt_int(fd, IPPROTO_IPV6, IPV6_MTU, &mtu); + break; + + default: + return -EAFNOSUPPORT; + } + + if (r < 0) + return r; + if (mtu <= 0) + return -EINVAL; + + *ret = (size_t) mtu; + return 0; +} + +int connect_unix_path(int fd, int dir_fd, const char *path) { + _cleanup_close_ int inode_fd = -1; + union sockaddr_union sa = { + .un.sun_family = AF_UNIX, + }; + size_t path_len; + socklen_t salen; + + assert(fd >= 0); + assert(dir_fd == AT_FDCWD || dir_fd >= 0); + assert(path); + + /* Connects to the specified AF_UNIX socket in the file system. Works around the 108 byte size limit + * in sockaddr_un, by going via O_PATH if needed. This hence works for any kind of path. */ + + path_len = strlen(path); + + /* Refuse zero length path early, to make sure AF_UNIX stack won't mistake this for an abstract + * namespace path, since first char is NUL */ + if (path_len <= 0) + return -EINVAL; + + if (dir_fd == AT_FDCWD && path_len < sizeof(sa.un.sun_path)) { + memcpy(sa.un.sun_path, path, path_len + 1); + salen = offsetof(struct sockaddr_un, sun_path) + path_len + 1; + } else { + const char *proc; + size_t proc_len; + + /* If dir_fd is specified, then we need to go the indirect O_PATH route, because connectat() + * does not exist. If the path is too long, we also need to take the indirect route, since we + * can't fit this into a sockaddr_un directly. */ + + inode_fd = openat(dir_fd, path, O_PATH|O_CLOEXEC); + if (inode_fd < 0) + return -errno; + + proc = FORMAT_PROC_FD_PATH(inode_fd); + proc_len = strlen(proc); + + assert(proc_len < sizeof(sa.un.sun_path)); + memcpy(sa.un.sun_path, proc, proc_len + 1); + salen = offsetof(struct sockaddr_un, sun_path) + proc_len + 1; + } + + return RET_NERRNO(connect(fd, &sa.sa, salen)); +} diff --git a/src/basic/socket-util.h b/src/basic/socket-util.h new file mode 100644 index 0000000..7803168 --- /dev/null +++ b/src/basic/socket-util.h @@ -0,0 +1,343 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "errno-util.h" +#include "in-addr-util.h" +#include "macro.h" +#include "missing_network.h" +#include "missing_socket.h" +#include "sparse-endian.h" + +union sockaddr_union { + /* The minimal, abstract version */ + struct sockaddr sa; + + /* The libc provided version that allocates "enough room" for every protocol */ + struct sockaddr_storage storage; + + /* Protoctol-specific implementations */ + struct sockaddr_in in; + struct sockaddr_in6 in6; + struct sockaddr_un un; + struct sockaddr_nl nl; + struct sockaddr_ll ll; + struct sockaddr_vm vm; + + /* Ensure there is enough space to store Infiniband addresses */ + uint8_t ll_buffer[offsetof(struct sockaddr_ll, sll_addr) + CONST_MAX(ETH_ALEN, INFINIBAND_ALEN)]; + + /* Ensure there is enough space after the AF_UNIX sun_path for one more NUL byte, just to be sure that the path + * component is always followed by at least one NUL byte. */ + uint8_t un_buffer[sizeof(struct sockaddr_un) + 1]; +}; + +#define SUN_PATH_LEN (sizeof(((struct sockaddr_un){}).sun_path)) + +typedef struct SocketAddress { + union sockaddr_union sockaddr; + + /* We store the size here explicitly due to the weird + * sockaddr_un semantics for abstract sockets */ + socklen_t size; + + /* Socket type, i.e. SOCK_STREAM, SOCK_DGRAM, ... */ + int type; + + /* Socket protocol, IPPROTO_xxx, usually 0, except for netlink */ + int protocol; +} SocketAddress; + +typedef enum SocketAddressBindIPv6Only { + SOCKET_ADDRESS_DEFAULT, + SOCKET_ADDRESS_BOTH, + SOCKET_ADDRESS_IPV6_ONLY, + _SOCKET_ADDRESS_BIND_IPV6_ONLY_MAX, + _SOCKET_ADDRESS_BIND_IPV6_ONLY_INVALID = -EINVAL, +} SocketAddressBindIPv6Only; + +#define socket_address_family(a) ((a)->sockaddr.sa.sa_family) + +const char* socket_address_type_to_string(int t) _const_; +int socket_address_type_from_string(const char *s) _pure_; + +int sockaddr_un_unlink(const struct sockaddr_un *sa); + +static inline int socket_address_unlink(const SocketAddress *a) { + return socket_address_family(a) == AF_UNIX ? sockaddr_un_unlink(&a->sockaddr.un) : 0; +} + +bool socket_address_can_accept(const SocketAddress *a) _pure_; + +int socket_address_listen( + const SocketAddress *a, + int flags, + int backlog, + SocketAddressBindIPv6Only only, + const char *bind_to_device, + bool reuse_port, + bool free_bind, + bool transparent, + mode_t directory_mode, + mode_t socket_mode, + const char *label); + +int socket_address_verify(const SocketAddress *a, bool strict) _pure_; +int socket_address_print(const SocketAddress *a, char **p); +bool socket_address_matches_fd(const SocketAddress *a, int fd); + +bool socket_address_equal(const SocketAddress *a, const SocketAddress *b) _pure_; + +const char* socket_address_get_path(const SocketAddress *a); + +bool socket_ipv6_is_supported(void); +bool socket_ipv6_is_enabled(void); + +int sockaddr_port(const struct sockaddr *_sa, unsigned *port); +const union in_addr_union *sockaddr_in_addr(const struct sockaddr *sa); +int sockaddr_set_in_addr(union sockaddr_union *u, int family, const union in_addr_union *a, uint16_t port); + +int sockaddr_pretty(const struct sockaddr *_sa, socklen_t salen, bool translate_ipv6, bool include_port, char **ret); +int getpeername_pretty(int fd, bool include_port, char **ret); +int getsockname_pretty(int fd, char **ret); + +int socknameinfo_pretty(union sockaddr_union *sa, socklen_t salen, char **_ret); + +const char* socket_address_bind_ipv6_only_to_string(SocketAddressBindIPv6Only b) _const_; +SocketAddressBindIPv6Only socket_address_bind_ipv6_only_from_string(const char *s) _pure_; +SocketAddressBindIPv6Only socket_address_bind_ipv6_only_or_bool_from_string(const char *s); + +int netlink_family_to_string_alloc(int b, char **s); +int netlink_family_from_string(const char *s) _pure_; + +bool sockaddr_equal(const union sockaddr_union *a, const union sockaddr_union *b); + +int fd_set_sndbuf(int fd, size_t n, bool increase); +static inline int fd_inc_sndbuf(int fd, size_t n) { + return fd_set_sndbuf(fd, n, true); +} +int fd_set_rcvbuf(int fd, size_t n, bool increase); +static inline int fd_increase_rxbuf(int fd, size_t n) { + return fd_set_rcvbuf(fd, n, true); +} + +int ip_tos_to_string_alloc(int i, char **s); +int ip_tos_from_string(const char *s); + +typedef enum { + IFNAME_VALID_ALTERNATIVE = 1 << 0, /* Allow "altnames" too */ + IFNAME_VALID_NUMERIC = 1 << 1, /* Allow decimal formatted ifindexes too */ + IFNAME_VALID_SPECIAL = 1 << 2, /* Allow the special names "all" and "default" */ + _IFNAME_VALID_ALL = IFNAME_VALID_ALTERNATIVE | IFNAME_VALID_NUMERIC | IFNAME_VALID_SPECIAL, +} IfnameValidFlags; +bool ifname_valid_char(char a); +bool ifname_valid_full(const char *p, IfnameValidFlags flags); +static inline bool ifname_valid(const char *p) { + return ifname_valid_full(p, 0); +} +bool address_label_valid(const char *p); + +int getpeercred(int fd, struct ucred *ucred); +int getpeersec(int fd, char **ret); +int getpeergroups(int fd, gid_t **ret); + +ssize_t send_one_fd_iov_sa( + int transport_fd, + int fd, + const struct iovec *iov, size_t iovlen, + const struct sockaddr *sa, socklen_t len, + int flags); +int send_one_fd_sa(int transport_fd, + int fd, + const struct sockaddr *sa, socklen_t len, + int flags); +#define send_one_fd_iov(transport_fd, fd, iov, iovlen, flags) send_one_fd_iov_sa(transport_fd, fd, iov, iovlen, NULL, 0, flags) +#define send_one_fd(transport_fd, fd, flags) send_one_fd_iov_sa(transport_fd, fd, NULL, 0, NULL, 0, flags) +ssize_t receive_one_fd_iov(int transport_fd, struct iovec *iov, size_t iovlen, int flags, int *ret_fd); +int receive_one_fd(int transport_fd, int flags); + +ssize_t next_datagram_size_fd(int fd); + +int flush_accept(int fd); + +#define CMSG_FOREACH(cmsg, mh) \ + for ((cmsg) = CMSG_FIRSTHDR(mh); (cmsg); (cmsg) = CMSG_NXTHDR((mh), (cmsg))) + +struct cmsghdr* cmsg_find(struct msghdr *mh, int level, int type, socklen_t length); + +/* Type-safe, dereferencing version of cmsg_find() */ +#define CMSG_FIND_DATA(mh, level, type, ctype) \ + ({ \ + struct cmsghdr *_found; \ + _found = cmsg_find(mh, level, type, CMSG_LEN(sizeof(ctype))); \ + (ctype*) (_found ? CMSG_DATA(_found) : NULL); \ + }) + +/* Resolves to a type that can carry cmsghdr structures. Make sure things are properly aligned, i.e. the type + * itself is placed properly in memory and the size is also aligned to what's appropriate for "cmsghdr" + * structures. */ +#define CMSG_BUFFER_TYPE(size) \ + union { \ + struct cmsghdr cmsghdr; \ + uint8_t buf[size]; \ + uint8_t align_check[(size) >= CMSG_SPACE(0) && \ + (size) == CMSG_ALIGN(size) ? 1 : -1]; \ + } + +/* + * Certain hardware address types (e.g Infiniband) do not fit into sll_addr + * (8 bytes) and run over the structure. This macro returns the correct size that + * must be passed to kernel. + */ +#define SOCKADDR_LL_LEN(sa) \ + ({ \ + const struct sockaddr_ll *_sa = &(sa); \ + size_t _mac_len = sizeof(_sa->sll_addr); \ + assert(_sa->sll_family == AF_PACKET); \ + if (be16toh(_sa->sll_hatype) == ARPHRD_ETHER) \ + _mac_len = MAX(_mac_len, (size_t) ETH_ALEN); \ + if (be16toh(_sa->sll_hatype) == ARPHRD_INFINIBAND) \ + _mac_len = MAX(_mac_len, (size_t) INFINIBAND_ALEN); \ + offsetof(struct sockaddr_ll, sll_addr) + _mac_len; \ + }) + +/* Covers only file system and abstract AF_UNIX socket addresses, but not unnamed socket addresses. */ +#define SOCKADDR_UN_LEN(sa) \ + ({ \ + const struct sockaddr_un *_sa = &(sa); \ + assert(_sa->sun_family == AF_UNIX); \ + offsetof(struct sockaddr_un, sun_path) + \ + (_sa->sun_path[0] == 0 ? \ + 1 + strnlen(_sa->sun_path+1, sizeof(_sa->sun_path)-1) : \ + strnlen(_sa->sun_path, sizeof(_sa->sun_path))+1); \ + }) + +#define SOCKADDR_LEN(saddr) \ + ({ \ + const union sockaddr_union *__sa = &(saddr); \ + size_t _len; \ + switch (__sa->sa.sa_family) { \ + case AF_INET: \ + _len = sizeof(struct sockaddr_in); \ + break; \ + case AF_INET6: \ + _len = sizeof(struct sockaddr_in6); \ + break; \ + case AF_UNIX: \ + _len = SOCKADDR_UN_LEN(__sa->un); \ + break; \ + case AF_PACKET: \ + _len = SOCKADDR_LL_LEN(__sa->ll); \ + break; \ + case AF_NETLINK: \ + _len = sizeof(struct sockaddr_nl); \ + break; \ + case AF_VSOCK: \ + _len = sizeof(struct sockaddr_vm); \ + break; \ + default: \ + assert_not_reached(); \ + } \ + _len; \ + }) + +int socket_ioctl_fd(void); + +int sockaddr_un_set_path(struct sockaddr_un *ret, const char *path); + +static inline int setsockopt_int(int fd, int level, int optname, int value) { + if (setsockopt(fd, level, optname, &value, sizeof(value)) < 0) + return -errno; + + return 0; +} + +static inline int getsockopt_int(int fd, int level, int optname, int *ret) { + int v; + socklen_t sl = sizeof(v); + + if (getsockopt(fd, level, optname, &v, &sl) < 0) + return negative_errno(); + if (sl != sizeof(v)) + return -EIO; + + *ret = v; + return 0; +} + +int socket_bind_to_ifname(int fd, const char *ifname); +int socket_bind_to_ifindex(int fd, int ifindex); + +/* Define a 64bit version of timeval/timespec in any case, even on 32bit userspace. */ +struct timeval_large { + uint64_t tvl_sec, tvl_usec; +}; +struct timespec_large { + uint64_t tvl_sec, tvl_nsec; +}; + +/* glibc duplicates timespec/timeval on certain 32bit archs, once in 32bit and once in 64bit. + * See __convert_scm_timestamps() in glibc source code. Hence, we need additional buffer space for them + * to prevent from recvmsg_safe() returning -EXFULL. */ +#define CMSG_SPACE_TIMEVAL \ + ((sizeof(struct timeval) == sizeof(struct timeval_large)) ? \ + CMSG_SPACE(sizeof(struct timeval)) : \ + CMSG_SPACE(sizeof(struct timeval)) + \ + CMSG_SPACE(sizeof(struct timeval_large))) +#define CMSG_SPACE_TIMESPEC \ + ((sizeof(struct timespec) == sizeof(struct timespec_large)) ? \ + CMSG_SPACE(sizeof(struct timespec)) : \ + CMSG_SPACE(sizeof(struct timespec)) + \ + CMSG_SPACE(sizeof(struct timespec_large))) + +ssize_t recvmsg_safe(int sockfd, struct msghdr *msg, int flags); + +int socket_get_family(int fd); +int socket_set_recvpktinfo(int fd, int af, bool b); +int socket_set_unicast_if(int fd, int af, int ifi); + +int socket_set_option(int fd, int af, int opt_ipv4, int opt_ipv6, int val); +static inline int socket_set_recverr(int fd, int af, bool b) { + return socket_set_option(fd, af, IP_RECVERR, IPV6_RECVERR, b); +} +static inline int socket_set_recvttl(int fd, int af, bool b) { + return socket_set_option(fd, af, IP_RECVTTL, IPV6_RECVHOPLIMIT, b); +} +static inline int socket_set_ttl(int fd, int af, int ttl) { + return socket_set_option(fd, af, IP_TTL, IPV6_UNICAST_HOPS, ttl); +} +static inline int socket_set_freebind(int fd, int af, bool b) { + return socket_set_option(fd, af, IP_FREEBIND, IPV6_FREEBIND, b); +} +static inline int socket_set_transparent(int fd, int af, bool b) { + return socket_set_option(fd, af, IP_TRANSPARENT, IPV6_TRANSPARENT, b); +} +static inline int socket_set_recvfragsize(int fd, int af, bool b) { + return socket_set_option(fd, af, IP_RECVFRAGSIZE, IPV6_RECVFRAGSIZE, b); +} + +int socket_get_mtu(int fd, int af, size_t *ret); + +/* an initializer for struct ucred that initialized all fields to the invalid value appropriate for each */ +#define UCRED_INVALID { .pid = 0, .uid = UID_INVALID, .gid = GID_INVALID } + +int connect_unix_path(int fd, int dir_fd, const char *path); + +/* libc's SOMAXCONN is defined to 128 or 4096 (at least on glibc). But actually, the value can be much + * larger. In our codebase we want to set it to the max usually, since noawadays socket memory is properly + * tracked by memcg, and hence we don't need to enforce extra limits here. Moreover, the kernel caps it to + * /proc/sys/net/core/somaxconn anyway, thus by setting this to unbounded we just make that sysctl file + * authoritative. */ +#define SOMAXCONN_DELUXE INT_MAX diff --git a/src/basic/sort-util.c b/src/basic/sort-util.c new file mode 100644 index 0000000..e0fb9cf --- /dev/null +++ b/src/basic/sort-util.c @@ -0,0 +1,33 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include "sort-util.h" +#include "alloc-util.h" + +/* hey glibc, APIs with callbacks without a user pointer are so useless */ +void *xbsearch_r(const void *key, const void *base, size_t nmemb, size_t size, + comparison_userdata_fn_t compar, void *arg) { + size_t l, u, idx; + const void *p; + int comparison; + + assert(!size_multiply_overflow(nmemb, size)); + + l = 0; + u = nmemb; + while (l < u) { + idx = (l + u) / 2; + p = (const uint8_t*) base + idx * size; + comparison = compar(key, p, arg); + if (comparison < 0) + u = idx; + else if (comparison > 0) + l = idx + 1; + else + return (void *)p; + } + return NULL; +} + +int cmp_int(const int *a, const int *b) { + return CMP(*a, *b); +} diff --git a/src/basic/sort-util.h b/src/basic/sort-util.h new file mode 100644 index 0000000..45f05bb --- /dev/null +++ b/src/basic/sort-util.h @@ -0,0 +1,78 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +#include + +#include "macro.h" + +/* This is the same as glibc's internal __compar_d_fn_t type. glibc exports a public comparison_fn_t, for the + * external type __compar_fn_t, but doesn't do anything similar for __compar_d_fn_t. Let's hence do that + * ourselves, picking a name that is obvious, but likely enough to not clash with glibc's choice of naming if + * they should ever add one. */ +typedef int (*comparison_userdata_fn_t)(const void *, const void *, void *); + +void *xbsearch_r(const void *key, const void *base, size_t nmemb, size_t size, + comparison_userdata_fn_t compar, void *arg); + +#define typesafe_bsearch_r(k, b, n, func, userdata) \ + ({ \ + const typeof((b)[0]) *_k = k; \ + int (*_func_)(const typeof((b)[0])*, const typeof((b)[0])*, typeof(userdata)) = func; \ + (typeof((b)[0])*) xbsearch_r((const void*) _k, (b), (n), sizeof((b)[0]), (comparison_userdata_fn_t) _func_, userdata); \ + }) + +/** + * Normal bsearch requires base to be nonnull. Here were require + * that only if nmemb > 0. + */ +static inline void* bsearch_safe(const void *key, const void *base, + size_t nmemb, size_t size, comparison_fn_t compar) { + if (nmemb <= 0) + return NULL; + + assert(base); + return bsearch(key, base, nmemb, size, compar); +} + +#define typesafe_bsearch(k, b, n, func) \ + ({ \ + const typeof((b)[0]) *_k = k; \ + int (*_func_)(const typeof((b)[0])*, const typeof((b)[0])*) = func; \ + (typeof((b)[0])*) bsearch_safe((const void*) _k, (b), (n), sizeof((b)[0]), (comparison_fn_t) _func_); \ + }) + +/** + * Normal qsort requires base to be nonnull. Here were require + * that only if nmemb > 0. + */ +static inline void _qsort_safe(void *base, size_t nmemb, size_t size, comparison_fn_t compar) { + if (nmemb <= 1) + return; + + assert(base); + qsort(base, nmemb, size, compar); +} + +/* A wrapper around the above, but that adds typesafety: the element size is automatically derived from the type and so + * is the prototype for the comparison function */ +#define typesafe_qsort(p, n, func) \ + ({ \ + int (*_func_)(const typeof((p)[0])*, const typeof((p)[0])*) = func; \ + _qsort_safe((p), (n), sizeof((p)[0]), (comparison_fn_t) _func_); \ + }) + +static inline void qsort_r_safe(void *base, size_t nmemb, size_t size, comparison_userdata_fn_t compar, void *userdata) { + if (nmemb <= 1) + return; + + assert(base); + qsort_r(base, nmemb, size, compar, userdata); +} + +#define typesafe_qsort_r(p, n, func, userdata) \ + ({ \ + int (*_func_)(const typeof((p)[0])*, const typeof((p)[0])*, typeof(userdata)) = func; \ + qsort_r_safe((p), (n), sizeof((p)[0]), (comparison_userdata_fn_t) _func_, userdata); \ + }) + +int cmp_int(const int *a, const int *b); diff --git a/src/basic/sparse-endian.h b/src/basic/sparse-endian.h new file mode 100644 index 0000000..c795d3d --- /dev/null +++ b/src/basic/sparse-endian.h @@ -0,0 +1,90 @@ +/* SPDX-License-Identifier: MIT + * + * Copyright (c) 2012 Josh Triplett + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ +#pragma once + +#include +#include +#include + +#ifdef __CHECKER__ +#define __sd_bitwise __attribute__((__bitwise__)) +#define __sd_force __attribute__((__force__)) +#else +#define __sd_bitwise +#define __sd_force +#endif + +typedef uint16_t __sd_bitwise le16_t; +typedef uint16_t __sd_bitwise be16_t; +typedef uint32_t __sd_bitwise le32_t; +typedef uint32_t __sd_bitwise be32_t; +typedef uint64_t __sd_bitwise le64_t; +typedef uint64_t __sd_bitwise be64_t; + +#undef htobe16 +#undef htole16 +#undef be16toh +#undef le16toh +#undef htobe32 +#undef htole32 +#undef be32toh +#undef le32toh +#undef htobe64 +#undef htole64 +#undef be64toh +#undef le64toh + +#if __BYTE_ORDER == __LITTLE_ENDIAN +#define bswap_16_on_le(x) bswap_16(x) +#define bswap_32_on_le(x) bswap_32(x) +#define bswap_64_on_le(x) bswap_64(x) +#define bswap_16_on_be(x) (x) +#define bswap_32_on_be(x) (x) +#define bswap_64_on_be(x) (x) +#elif __BYTE_ORDER == __BIG_ENDIAN +#define bswap_16_on_le(x) (x) +#define bswap_32_on_le(x) (x) +#define bswap_64_on_le(x) (x) +#define bswap_16_on_be(x) bswap_16(x) +#define bswap_32_on_be(x) bswap_32(x) +#define bswap_64_on_be(x) bswap_64(x) +#endif + +static inline le16_t htole16(uint16_t value) { return (le16_t __sd_force) bswap_16_on_be(value); } +static inline le32_t htole32(uint32_t value) { return (le32_t __sd_force) bswap_32_on_be(value); } +static inline le64_t htole64(uint64_t value) { return (le64_t __sd_force) bswap_64_on_be(value); } + +static inline be16_t htobe16(uint16_t value) { return (be16_t __sd_force) bswap_16_on_le(value); } +static inline be32_t htobe32(uint32_t value) { return (be32_t __sd_force) bswap_32_on_le(value); } +static inline be64_t htobe64(uint64_t value) { return (be64_t __sd_force) bswap_64_on_le(value); } + +static inline uint16_t le16toh(le16_t value) { return bswap_16_on_be((uint16_t __sd_force)value); } +static inline uint32_t le32toh(le32_t value) { return bswap_32_on_be((uint32_t __sd_force)value); } +static inline uint64_t le64toh(le64_t value) { return bswap_64_on_be((uint64_t __sd_force)value); } + +static inline uint16_t be16toh(be16_t value) { return bswap_16_on_le((uint16_t __sd_force)value); } +static inline uint32_t be32toh(be32_t value) { return bswap_32_on_le((uint32_t __sd_force)value); } +static inline uint64_t be64toh(be64_t value) { return bswap_64_on_le((uint64_t __sd_force)value); } + +#undef __sd_bitwise +#undef __sd_force diff --git a/src/basic/special.h b/src/basic/special.h new file mode 100644 index 0000000..5d1111f --- /dev/null +++ b/src/basic/special.h @@ -0,0 +1,118 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +#define SPECIAL_DEFAULT_TARGET "default.target" +#define SPECIAL_INITRD_TARGET "initrd.target" + +/* Shutdown targets */ +#define SPECIAL_UMOUNT_TARGET "umount.target" +/* This is not really intended to be started by directly. This is + * mostly so that other targets (reboot/halt/poweroff) can depend on + * it to bring all services down that want to be brought down on + * system shutdown. */ +#define SPECIAL_SHUTDOWN_TARGET "shutdown.target" +#define SPECIAL_HALT_TARGET "halt.target" +#define SPECIAL_POWEROFF_TARGET "poweroff.target" +#define SPECIAL_REBOOT_TARGET "reboot.target" +#define SPECIAL_KEXEC_TARGET "kexec.target" +#define SPECIAL_EXIT_TARGET "exit.target" +#define SPECIAL_SUSPEND_TARGET "suspend.target" +#define SPECIAL_HIBERNATE_TARGET "hibernate.target" +#define SPECIAL_HYBRID_SLEEP_TARGET "hybrid-sleep.target" +#define SPECIAL_SUSPEND_THEN_HIBERNATE_TARGET "suspend-then-hibernate.target" +#define SPECIAL_FACTORY_RESET_TARGET "factory-reset.target" + +/* Special boot targets */ +#define SPECIAL_RESCUE_TARGET "rescue.target" +#define SPECIAL_EMERGENCY_TARGET "emergency.target" +#define SPECIAL_MULTI_USER_TARGET "multi-user.target" +#define SPECIAL_GRAPHICAL_TARGET "graphical.target" + +/* Early boot targets */ +#define SPECIAL_SYSINIT_TARGET "sysinit.target" +#define SPECIAL_SOCKETS_TARGET "sockets.target" +#define SPECIAL_TIMERS_TARGET "timers.target" +#define SPECIAL_PATHS_TARGET "paths.target" +#define SPECIAL_LOCAL_FS_TARGET "local-fs.target" +#define SPECIAL_LOCAL_FS_PRE_TARGET "local-fs-pre.target" +#define SPECIAL_INITRD_FS_TARGET "initrd-fs.target" +#define SPECIAL_INITRD_ROOT_DEVICE_TARGET "initrd-root-device.target" +#define SPECIAL_INITRD_ROOT_FS_TARGET "initrd-root-fs.target" +#define SPECIAL_INITRD_USR_FS_TARGET "initrd-usr-fs.target" +#define SPECIAL_REMOTE_FS_TARGET "remote-fs.target" /* LSB's $remote_fs */ +#define SPECIAL_REMOTE_FS_PRE_TARGET "remote-fs-pre.target" +#define SPECIAL_SWAP_TARGET "swap.target" +#define SPECIAL_NETWORK_ONLINE_TARGET "network-online.target" +#define SPECIAL_TIME_SYNC_TARGET "time-sync.target" /* LSB's $time */ +#define SPECIAL_TIME_SET_TARGET "time-set.target" +#define SPECIAL_BASIC_TARGET "basic.target" + +/* LSB compatibility */ +#define SPECIAL_NETWORK_TARGET "network.target" /* LSB's $network */ +#define SPECIAL_NSS_LOOKUP_TARGET "nss-lookup.target" /* LSB's $named */ +#define SPECIAL_RPCBIND_TARGET "rpcbind.target" /* LSB's $portmap */ + +/* + * Rules regarding adding further high level targets like the above: + * + * - Be conservative, only add more of these when we really need + * them. We need strong usecases for further additions. + * + * - When there can be multiple implementations running side-by-side, + * it needs to be a .target unit which can pull in all + * implementations. + * + * - If something can be implemented with socket activation, and + * without, it needs to be a .target unit, so that it can pull in + * the appropriate unit. + * + * - Otherwise, it should be a .service unit. + * + * - In some cases it is OK to have both a .service and a .target + * unit, i.e. if there can be multiple parallel implementations, but + * only one is the "system" one. Example: syslog. + * + * Or to put this in other words: .service symlinks can be used to + * arbitrate between multiple implementations if there can be only one + * of a kind. .target units can be used to support multiple + * implementations that can run side-by-side. + */ + +/* Magic early boot services */ +#define SPECIAL_FSCK_SERVICE "systemd-fsck@.service" +#define SPECIAL_FSCK_ROOT_SERVICE "systemd-fsck-root.service" +#define SPECIAL_FSCK_USR_SERVICE "systemd-fsck-usr.service" +#define SPECIAL_QUOTACHECK_SERVICE "systemd-quotacheck.service" +#define SPECIAL_QUOTAON_SERVICE "quotaon.service" +#define SPECIAL_REMOUNT_FS_SERVICE "systemd-remount-fs.service" +#define SPECIAL_VOLATILE_ROOT_SERVICE "systemd-volatile-root.service" +#define SPECIAL_UDEVD_SERVICE "systemd-udevd.service" + +/* Services systemd relies on */ +#define SPECIAL_DBUS_SERVICE "dbus.service" +#define SPECIAL_DBUS_SOCKET "dbus.socket" +#define SPECIAL_JOURNALD_SOCKET "systemd-journald.socket" +#define SPECIAL_JOURNALD_SERVICE "systemd-journald.service" +#define SPECIAL_TMPFILES_SETUP_SERVICE "systemd-tmpfiles-setup.service" + +/* Magic init signals */ +#define SPECIAL_KBREQUEST_TARGET "kbrequest.target" +#define SPECIAL_SIGPWR_TARGET "sigpwr.target" +#define SPECIAL_CTRL_ALT_DEL_TARGET "ctrl-alt-del.target" + +/* Where we add all our system units, users and machines by default */ +#define SPECIAL_SYSTEM_SLICE "system.slice" +#define SPECIAL_USER_SLICE "user.slice" +#define SPECIAL_MACHINE_SLICE "machine.slice" +#define SPECIAL_ROOT_SLICE "-.slice" + +/* The scope unit systemd itself lives in. */ +#define SPECIAL_INIT_SCOPE "init.scope" + +/* The root directory. */ +#define SPECIAL_ROOT_MOUNT "-.mount" + +/* Special slices valid for the user instance */ +#define SPECIAL_SESSION_SLICE "session.slice" +#define SPECIAL_APP_SLICE "app.slice" +#define SPECIAL_BACKGROUND_SLICE "background.slice" diff --git a/src/basic/stat-util.c b/src/basic/stat-util.c new file mode 100644 index 0000000..468840d --- /dev/null +++ b/src/basic/stat-util.c @@ -0,0 +1,443 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include +#include +#include +#include +#include +#include + +#include "alloc-util.h" +#include "chase-symlinks.h" +#include "dirent-util.h" +#include "errno-util.h" +#include "fd-util.h" +#include "fileio.h" +#include "filesystems.h" +#include "fs-util.h" +#include "macro.h" +#include "missing_fs.h" +#include "missing_magic.h" +#include "missing_syscall.h" +#include "nulstr-util.h" +#include "parse-util.h" +#include "stat-util.h" +#include "string-util.h" + +int is_symlink(const char *path) { + struct stat info; + + assert(path); + + if (lstat(path, &info) < 0) + return -errno; + + return !!S_ISLNK(info.st_mode); +} + +int is_dir_full(int atfd, const char* path, bool follow) { + struct stat st; + int r; + + assert(atfd >= 0 || atfd == AT_FDCWD); + assert(atfd >= 0 || path); + + if (path) + r = fstatat(atfd, path, &st, follow ? 0 : AT_SYMLINK_NOFOLLOW); + else + r = fstat(atfd, &st); + if (r < 0) + return -errno; + + return !!S_ISDIR(st.st_mode); +} + +int is_device_node(const char *path) { + struct stat info; + + assert(path); + + if (lstat(path, &info) < 0) + return -errno; + + return !!(S_ISBLK(info.st_mode) || S_ISCHR(info.st_mode)); +} + +int dir_is_empty_at(int dir_fd, const char *path, bool ignore_hidden_or_backup) { + _cleanup_close_ int fd = -1; + struct dirent *buf; + size_t m; + + if (path) { + assert(dir_fd >= 0 || dir_fd == AT_FDCWD); + + fd = openat(dir_fd, path, O_RDONLY|O_DIRECTORY|O_CLOEXEC); + if (fd < 0) + return -errno; + } else if (dir_fd == AT_FDCWD) { + fd = open(".", O_RDONLY|O_DIRECTORY|O_CLOEXEC); + if (fd < 0) + return -errno; + } else { + /* Note that DUPing is not enough, as the internal pointer would still be shared and moved + * getedents64(). */ + assert(dir_fd >= 0); + + fd = fd_reopen(dir_fd, O_RDONLY|O_DIRECTORY|O_CLOEXEC); + if (fd < 0) + return fd; + } + + /* Allocate space for at least 3 full dirents, since every dir has at least two entries ("." + + * ".."), and only once we have seen if there's a third we know whether the dir is empty or not. If + * 'ignore_hidden_or_backup' is true we'll allocate a bit more, since we might skip over a bunch of + * entries that we end up ignoring. */ + m = (ignore_hidden_or_backup ? 16 : 3) * DIRENT_SIZE_MAX; + buf = alloca(m); + + for (;;) { + struct dirent *de; + ssize_t n; + + n = getdents64(fd, buf, m); + if (n < 0) + return -errno; + if (n == 0) + break; + + assert((size_t) n <= m); + msan_unpoison(buf, n); + + FOREACH_DIRENT_IN_BUFFER(de, buf, n) + if (!(ignore_hidden_or_backup ? hidden_or_backup_file(de->d_name) : dot_or_dot_dot(de->d_name))) + return 0; + } + + return 1; +} + +bool null_or_empty(struct stat *st) { + assert(st); + + if (S_ISREG(st->st_mode) && st->st_size <= 0) + return true; + + /* We don't want to hardcode the major/minor of /dev/null, hence we do a simpler "is this a character + * device node?" check. */ + + if (S_ISCHR(st->st_mode)) + return true; + + return false; +} + +int null_or_empty_path_with_root(const char *fn, const char *root) { + struct stat st; + int r; + + assert(fn); + + /* A symlink to /dev/null or an empty file? + * When looking under root_dir, we can't expect /dev/ to be mounted, + * so let's see if the path is a (possibly dangling) symlink to /dev/null. */ + + if (path_equal_ptr(path_startswith(fn, root ?: "/"), "dev/null")) + return true; + + r = chase_symlinks_and_stat(fn, root, CHASE_PREFIX_ROOT, NULL, &st, NULL); + if (r < 0) + return r; + + return null_or_empty(&st); +} + +int null_or_empty_fd(int fd) { + struct stat st; + + assert(fd >= 0); + + if (fstat(fd, &st) < 0) + return -errno; + + return null_or_empty(&st); +} + +int path_is_read_only_fs(const char *path) { + struct statvfs st; + + assert(path); + + if (statvfs(path, &st) < 0) + return -errno; + + if (st.f_flag & ST_RDONLY) + return true; + + /* On NFS, statvfs() might not reflect whether we can actually + * write to the remote share. Let's try again with + * access(W_OK) which is more reliable, at least sometimes. */ + if (access(path, W_OK) < 0 && errno == EROFS) + return true; + + return false; +} + +int files_same(const char *filea, const char *fileb, int flags) { + struct stat a, b; + + assert(filea); + assert(fileb); + + if (fstatat(AT_FDCWD, filea, &a, flags) < 0) + return log_debug_errno(errno, "Cannot stat %s: %m", filea); + + if (fstatat(AT_FDCWD, fileb, &b, flags) < 0) + return log_debug_errno(errno, "Cannot stat %s: %m", fileb); + + return stat_inode_same(&a, &b); +} + +bool is_fs_type(const struct statfs *s, statfs_f_type_t magic_value) { + assert(s); + assert_cc(sizeof(statfs_f_type_t) >= sizeof(s->f_type)); + + return F_TYPE_EQUAL(s->f_type, magic_value); +} + +int fd_is_fs_type(int fd, statfs_f_type_t magic_value) { + struct statfs s; + + if (fstatfs(fd, &s) < 0) + return -errno; + + return is_fs_type(&s, magic_value); +} + +int path_is_fs_type(const char *path, statfs_f_type_t magic_value) { + struct statfs s; + + if (statfs(path, &s) < 0) + return -errno; + + return is_fs_type(&s, magic_value); +} + +bool is_temporary_fs(const struct statfs *s) { + return fs_in_group(s, FILESYSTEM_SET_TEMPORARY); +} + +bool is_network_fs(const struct statfs *s) { + return fs_in_group(s, FILESYSTEM_SET_NETWORK); +} + +int fd_is_temporary_fs(int fd) { + struct statfs s; + + if (fstatfs(fd, &s) < 0) + return -errno; + + return is_temporary_fs(&s); +} + +int fd_is_network_fs(int fd) { + struct statfs s; + + if (fstatfs(fd, &s) < 0) + return -errno; + + return is_network_fs(&s); +} + +int path_is_temporary_fs(const char *path) { + struct statfs s; + + if (statfs(path, &s) < 0) + return -errno; + + return is_temporary_fs(&s); +} + +int path_is_network_fs(const char *path) { + struct statfs s; + + if (statfs(path, &s) < 0) + return -errno; + + return is_network_fs(&s); +} + +int stat_verify_regular(const struct stat *st) { + assert(st); + + /* Checks whether the specified stat() structure refers to a regular file. If not returns an appropriate error + * code. */ + + if (S_ISDIR(st->st_mode)) + return -EISDIR; + + if (S_ISLNK(st->st_mode)) + return -ELOOP; + + if (!S_ISREG(st->st_mode)) + return -EBADFD; + + return 0; +} + +int fd_verify_regular(int fd) { + struct stat st; + + assert(fd >= 0); + + if (fstat(fd, &st) < 0) + return -errno; + + return stat_verify_regular(&st); +} + +int stat_verify_directory(const struct stat *st) { + assert(st); + + if (S_ISLNK(st->st_mode)) + return -ELOOP; + + if (!S_ISDIR(st->st_mode)) + return -ENOTDIR; + + return 0; +} + +int fd_verify_directory(int fd) { + struct stat st; + + assert(fd >= 0); + + if (fstat(fd, &st) < 0) + return -errno; + + return stat_verify_directory(&st); +} + +int proc_mounted(void) { + int r; + + /* A quick check of procfs is properly mounted */ + + r = path_is_fs_type("/proc/", PROC_SUPER_MAGIC); + if (r == -ENOENT) /* not mounted at all */ + return false; + + return r; +} + +bool stat_inode_same(const struct stat *a, const struct stat *b) { + + /* Returns if the specified stat structure references the same (though possibly modified) inode. Does + * a thorough check, comparing inode nr, backing device and if the inode is still of the same type. */ + + return a && b && + (a->st_mode & S_IFMT) != 0 && /* We use the check for .st_mode if the structure was ever initialized */ + ((a->st_mode ^ b->st_mode) & S_IFMT) == 0 && /* same inode type */ + a->st_dev == b->st_dev && + a->st_ino == b->st_ino; +} + +bool stat_inode_unmodified(const struct stat *a, const struct stat *b) { + + /* Returns if the specified stat structures reference the same, unmodified inode. This check tries to + * be reasonably careful when detecting changes: we check both inode and mtime, to cater for file + * systems where mtimes are fixed to 0 (think: ostree/nixos type installations). We also check file + * size, backing device, inode type and if this refers to a device not the major/minor. + * + * Note that we don't care if file attributes such as ownership or access mode change, this here is + * about contents of the file. The purpose here is to detect file contents changes, and nothing + * else. */ + + return stat_inode_same(a, b) && + a->st_mtim.tv_sec == b->st_mtim.tv_sec && + a->st_mtim.tv_nsec == b->st_mtim.tv_nsec && + (!S_ISREG(a->st_mode) || a->st_size == b->st_size) && /* if regular file, compare file size */ + (!(S_ISCHR(a->st_mode) || S_ISBLK(a->st_mode)) || a->st_rdev == b->st_rdev); /* if device node, also compare major/minor, because we can */ +} + +bool statx_inode_same(const struct statx *a, const struct statx *b) { + + /* Same as stat_inode_same() but for struct statx */ + + return a && b && + FLAGS_SET(a->stx_mask, STATX_TYPE|STATX_INO) && FLAGS_SET(b->stx_mask, STATX_TYPE|STATX_INO) && + (a->stx_mode & S_IFMT) != 0 && + ((a->stx_mode ^ b->stx_mode) & S_IFMT) == 0 && + a->stx_dev_major == b->stx_dev_major && + a->stx_dev_minor == b->stx_dev_minor && + a->stx_ino == b->stx_ino; +} + +bool statx_mount_same(const struct new_statx *a, const struct new_statx *b) { + if (!a || !b) + return false; + + /* if we have the mount ID, that's all we need */ + if (FLAGS_SET(a->stx_mask, STATX_MNT_ID) && FLAGS_SET(b->stx_mask, STATX_MNT_ID)) + return a->stx_mnt_id == b->stx_mnt_id; + + /* Otherwise, major/minor of backing device must match */ + return a->stx_dev_major == b->stx_dev_major && + a->stx_dev_minor == b->stx_dev_minor; +} + +int statx_fallback(int dfd, const char *path, int flags, unsigned mask, struct statx *sx) { + static bool avoid_statx = false; + struct stat st; + + if (!avoid_statx) { + if (statx(dfd, path, flags, mask, sx) < 0) { + if (!ERRNO_IS_NOT_SUPPORTED(errno) && errno != EPERM) + return -errno; + + /* If statx() is not supported or if we see EPERM (which might indicate seccomp + * filtering or so), let's do a fallback. Not that on EACCES we'll not fall back, + * since that is likely an indication of fs access issues, which we should + * propagate */ + } else + return 0; + + avoid_statx = true; + } + + /* Only do fallback if fstatat() supports the flag too, or if it's one of the sync flags, which are + * OK to ignore */ + if ((flags & ~(AT_EMPTY_PATH|AT_NO_AUTOMOUNT|AT_SYMLINK_NOFOLLOW| + AT_STATX_SYNC_AS_STAT|AT_STATX_FORCE_SYNC|AT_STATX_DONT_SYNC)) != 0) + return -EOPNOTSUPP; + + if (fstatat(dfd, path, &st, flags & (AT_EMPTY_PATH|AT_NO_AUTOMOUNT|AT_SYMLINK_NOFOLLOW)) < 0) + return -errno; + + *sx = (struct statx) { + .stx_mask = STATX_TYPE|STATX_MODE| + STATX_NLINK|STATX_UID|STATX_GID| + STATX_ATIME|STATX_MTIME|STATX_CTIME| + STATX_INO|STATX_SIZE|STATX_BLOCKS, + .stx_blksize = st.st_blksize, + .stx_nlink = st.st_nlink, + .stx_uid = st.st_uid, + .stx_gid = st.st_gid, + .stx_mode = st.st_mode, + .stx_ino = st.st_ino, + .stx_size = st.st_size, + .stx_blocks = st.st_blocks, + .stx_rdev_major = major(st.st_rdev), + .stx_rdev_minor = minor(st.st_rdev), + .stx_dev_major = major(st.st_dev), + .stx_dev_minor = minor(st.st_dev), + .stx_atime.tv_sec = st.st_atim.tv_sec, + .stx_atime.tv_nsec = st.st_atim.tv_nsec, + .stx_mtime.tv_sec = st.st_mtim.tv_sec, + .stx_mtime.tv_nsec = st.st_mtim.tv_nsec, + .stx_ctime.tv_sec = st.st_ctim.tv_sec, + .stx_ctime.tv_nsec = st.st_ctim.tv_nsec, + }; + + return 0; +} diff --git a/src/basic/stat-util.h b/src/basic/stat-util.h new file mode 100644 index 0000000..f9519d8 --- /dev/null +++ b/src/basic/stat-util.h @@ -0,0 +1,98 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +#include +#include +#include +#include +#include +#include +#include + +#include "macro.h" +#include "missing_stat.h" + +int is_symlink(const char *path); +int is_dir_full(int atfd, const char *fname, bool follow); +static inline int is_dir(const char *path, bool follow) { + return is_dir_full(AT_FDCWD, path, follow); +} +static inline int is_dir_fd(int fd) { + return is_dir_full(fd, NULL, false); +} +int is_device_node(const char *path); + +int dir_is_empty_at(int dir_fd, const char *path, bool ignore_hidden_or_backup); +static inline int dir_is_empty(const char *path, bool ignore_hidden_or_backup) { + return dir_is_empty_at(AT_FDCWD, path, ignore_hidden_or_backup); +} + +bool null_or_empty(struct stat *st) _pure_; +int null_or_empty_path_with_root(const char *fn, const char *root); +int null_or_empty_fd(int fd); + +static inline int null_or_empty_path(const char *fn) { + return null_or_empty_path_with_root(fn, NULL); +} + +int path_is_read_only_fs(const char *path); + +int files_same(const char *filea, const char *fileb, int flags); + +/* The .f_type field of struct statfs is really weird defined on + * different archs. Let's give its type a name. */ +typedef typeof(((struct statfs*)NULL)->f_type) statfs_f_type_t; + +bool is_fs_type(const struct statfs *s, statfs_f_type_t magic_value) _pure_; +int fd_is_fs_type(int fd, statfs_f_type_t magic_value); +int path_is_fs_type(const char *path, statfs_f_type_t magic_value); + +bool is_temporary_fs(const struct statfs *s) _pure_; +bool is_network_fs(const struct statfs *s) _pure_; + +int fd_is_temporary_fs(int fd); +int fd_is_network_fs(int fd); + +int path_is_temporary_fs(const char *path); +int path_is_network_fs(const char *path); + +/* Because statfs.t_type can be int on some architectures, we have to cast + * the const magic to the type, otherwise the compiler warns about + * signed/unsigned comparison, because the magic can be 32 bit unsigned. + */ +#define F_TYPE_EQUAL(a, b) (a == (typeof(a)) b) + +int stat_verify_regular(const struct stat *st); +int fd_verify_regular(int fd); + +int stat_verify_directory(const struct stat *st); +int fd_verify_directory(int fd); + +int proc_mounted(void); + +bool stat_inode_same(const struct stat *a, const struct stat *b); +bool stat_inode_unmodified(const struct stat *a, const struct stat *b); + +bool statx_inode_same(const struct statx *a, const struct statx *b); +bool statx_mount_same(const struct new_statx *a, const struct new_statx *b); + +int statx_fallback(int dfd, const char *path, int flags, unsigned mask, struct statx *sx); + +#if HAS_FEATURE_MEMORY_SANITIZER +# warning "Explicitly initializing struct statx, to work around msan limitation. Please remove as soon as msan has been updated to not require this." +# define STRUCT_STATX_DEFINE(var) \ + struct statx var = {} +# define STRUCT_NEW_STATX_DEFINE(var) \ + union { \ + struct statx sx; \ + struct new_statx nsx; \ + } var = {} +#else +# define STRUCT_STATX_DEFINE(var) \ + struct statx var +# define STRUCT_NEW_STATX_DEFINE(var) \ + union { \ + struct statx sx; \ + struct new_statx nsx; \ + } var +#endif diff --git a/src/basic/static-destruct.h b/src/basic/static-destruct.h new file mode 100644 index 0000000..97baac7 --- /dev/null +++ b/src/basic/static-destruct.h @@ -0,0 +1,63 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#pragma once + +#include "alloc-util.h" +#include "macro.h" + +/* A framework for registering static variables that shall be freed on shutdown of a process. It's a bit like gcc's + * destructor attribute, but allows us to precisely schedule when we want to free the variables. This is supposed to + * feel a bit like the gcc cleanup attribute, but for static variables. Note that this does not work for static + * variables declared in .so's, as the list is private to the same linking unit. But maybe that's a good thing. */ + +typedef struct StaticDestructor { + void *data; + free_func_t destroy; +} StaticDestructor; + +#define STATIC_DESTRUCTOR_REGISTER(variable, func) \ + _STATIC_DESTRUCTOR_REGISTER(UNIQ, variable, func) + +#define _STATIC_DESTRUCTOR_REGISTER(uq, variable, func) \ + /* Type-safe destructor */ \ + static void UNIQ_T(static_destructor_wrapper, uq)(void *p) { \ + typeof(variable) *q = p; \ + func(q); \ + } \ + /* Older compilers don't know "retain" attribute. */ \ + _Pragma("GCC diagnostic ignored \"-Wattributes\"") \ + /* The actual destructor structure we place in a special section to find it. */ \ + _section_("SYSTEMD_STATIC_DESTRUCT") \ + /* Use pointer alignment, since that is apparently what gcc does for static variables. */ \ + _alignptr_ \ + /* Make sure this is not dropped from the image despite not being explicitly referenced. */ \ + _used_ \ + /* Prevent garbage collection by the linker. */ \ + _retain_ \ + /* Make sure that AddressSanitizer doesn't pad this variable: we want everything in this section + * packed next to each other so that we can enumerate it. */ \ + _variable_no_sanitize_address_ \ + static const StaticDestructor UNIQ_T(static_destructor_entry, uq) = { \ + .data = &(variable), \ + .destroy = UNIQ_T(static_destructor_wrapper, uq), \ + } + +/* Beginning and end of our section listing the destructors. We define these as weak as we want this to work + * even if no destructors are defined and the section is missing. */ +extern const struct StaticDestructor _weak_ __start_SYSTEMD_STATIC_DESTRUCT[]; +extern const struct StaticDestructor _weak_ __stop_SYSTEMD_STATIC_DESTRUCT[]; + +/* The function to destroy everything. (Note that this must be static inline, as it's key that it remains in + * the same linking unit as the variables we want to destroy.) */ +static inline void static_destruct(void) { + const StaticDestructor *d; + + if (!__start_SYSTEMD_STATIC_DESTRUCT) + return; + + d = ALIGN_PTR(__start_SYSTEMD_STATIC_DESTRUCT); + while (d < __stop_SYSTEMD_STATIC_DESTRUCT) { + d->destroy(d->data); + d = ALIGN_PTR(d + 1); + } +} diff --git a/src/basic/stdio-util.h b/src/basic/stdio-util.h new file mode 100644 index 0000000..f647f12 --- /dev/null +++ b/src/basic/stdio-util.h @@ -0,0 +1,69 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +#include +#include +#include +#include + +#include "macro.h" +#include "memory-util.h" + +#define snprintf_ok(buf, len, fmt, ...) \ + ({ \ + char *_buf = (buf); \ + size_t _len = (len); \ + int _snpf = snprintf(_buf, _len, (fmt), ##__VA_ARGS__); \ + _snpf >= 0 && (size_t) _snpf < _len ? _buf : NULL; \ + }) + +#define xsprintf(buf, fmt, ...) \ + assert_message_se(snprintf_ok(buf, ELEMENTSOF(buf), fmt, ##__VA_ARGS__), "xsprintf: " #buf "[] must be big enough") + +#define VA_FORMAT_ADVANCE(format, ap) \ +do { \ + int _argtypes[128]; \ + size_t _i, _k; \ + /* See https://github.com/google/sanitizers/issues/992 */ \ + if (HAS_FEATURE_MEMORY_SANITIZER) \ + zero(_argtypes); \ + _k = parse_printf_format((format), ELEMENTSOF(_argtypes), _argtypes); \ + assert(_k < ELEMENTSOF(_argtypes)); \ + for (_i = 0; _i < _k; _i++) { \ + if (_argtypes[_i] & PA_FLAG_PTR) { \ + (void) va_arg(ap, void*); \ + continue; \ + } \ + \ + switch (_argtypes[_i]) { \ + case PA_INT: \ + case PA_INT|PA_FLAG_SHORT: \ + case PA_CHAR: \ + (void) va_arg(ap, int); \ + break; \ + case PA_INT|PA_FLAG_LONG: \ + (void) va_arg(ap, long int); \ + break; \ + case PA_INT|PA_FLAG_LONG_LONG: \ + (void) va_arg(ap, long long int); \ + break; \ + case PA_WCHAR: \ + (void) va_arg(ap, wchar_t); \ + break; \ + case PA_WSTRING: \ + case PA_STRING: \ + case PA_POINTER: \ + (void) va_arg(ap, void*); \ + break; \ + case PA_FLOAT: \ + case PA_DOUBLE: \ + (void) va_arg(ap, double); \ + break; \ + case PA_DOUBLE|PA_FLAG_LONG_DOUBLE: \ + (void) va_arg(ap, long double); \ + break; \ + default: \ + assert_not_reached(); \ + } \ + } \ +} while (false) diff --git a/src/basic/strbuf.c b/src/basic/strbuf.c new file mode 100644 index 0000000..0617acc --- /dev/null +++ b/src/basic/strbuf.c @@ -0,0 +1,181 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include +#include +#include + +#include "alloc-util.h" +#include "sort-util.h" +#include "strbuf.h" + +/* + * Strbuf stores given strings in a single continuous allocated memory + * area. Identical strings are de-duplicated and return the same offset + * as the first string stored. If the tail of a string already exists + * in the buffer, the tail is returned. + * + * A trie (http://en.wikipedia.org/wiki/Trie) is used to maintain the + * information about the stored strings. + * + * Example of udev rules: + * $ ./udevadm test . + * ... + * read rules file: /usr/lib/udev/rules.d/99-systemd.rules + * rules contain 196608 bytes tokens (16384 * 12 bytes), 39742 bytes strings + * 23939 strings (207859 bytes), 20404 de-duplicated (171653 bytes), 3536 trie nodes used + * ... + */ + +struct strbuf* strbuf_new(void) { + struct strbuf *str; + + str = new(struct strbuf, 1); + if (!str) + return NULL; + *str = (struct strbuf) { + .buf = new0(char, 1), + .root = new0(struct strbuf_node, 1), + .len = 1, + .nodes_count = 1, + }; + if (!str->buf || !str->root) { + free(str->buf); + free(str->root); + return mfree(str); + } + + return str; +} + +static struct strbuf_node* strbuf_node_cleanup(struct strbuf_node *node) { + size_t i; + + for (i = 0; i < node->children_count; i++) + strbuf_node_cleanup(node->children[i].child); + free(node->children); + return mfree(node); +} + +/* clean up trie data, leave only the string buffer */ +void strbuf_complete(struct strbuf *str) { + if (!str) + return; + if (str->root) + str->root = strbuf_node_cleanup(str->root); +} + +/* clean up everything */ +struct strbuf* strbuf_free(struct strbuf *str) { + if (!str) + return NULL; + + strbuf_complete(str); + free(str->buf); + return mfree(str); +} + +static int strbuf_children_cmp(const struct strbuf_child_entry *n1, + const struct strbuf_child_entry *n2) { + return n1->c - n2->c; +} + +static void bubbleinsert(struct strbuf_node *node, + uint8_t c, + struct strbuf_node *node_child) { + + struct strbuf_child_entry new = { + .c = c, + .child = node_child, + }; + int left = 0, right = node->children_count; + + while (right > left) { + int middle = (right + left) / 2 ; + if (strbuf_children_cmp(&node->children[middle], &new) <= 0) + left = middle + 1; + else + right = middle; + } + + memmove(node->children + left + 1, node->children + left, + sizeof(struct strbuf_child_entry) * (node->children_count - left)); + node->children[left] = new; + + node->children_count++; +} + +/* add string, return the index/offset into the buffer */ +ssize_t strbuf_add_string(struct strbuf *str, const char *s, size_t len) { + uint8_t c; + char *buf_new; + struct strbuf_child_entry *child; + struct strbuf_node *node; + ssize_t off; + + if (!str->root) + return -EINVAL; + + /* search string; start from last character to find possibly matching tails */ + + str->in_count++; + if (len == 0) { + str->dedup_count++; + return 0; + } + str->in_len += len; + + node = str->root; + for (size_t depth = 0; depth <= len; depth++) { + struct strbuf_child_entry search; + + /* match against current node */ + off = node->value_off + node->value_len - len; + if (depth == len || (node->value_len >= len && memcmp(str->buf + off, s, len) == 0)) { + str->dedup_len += len; + str->dedup_count++; + return off; + } + + c = s[len - 1 - depth]; + + /* lookup child node */ + search.c = c; + child = typesafe_bsearch(&search, node->children, node->children_count, strbuf_children_cmp); + if (!child) + break; + node = child->child; + } + + /* add new string */ + buf_new = realloc(str->buf, str->len + len+1); + if (!buf_new) + return -ENOMEM; + str->buf = buf_new; + off = str->len; + memcpy(str->buf + off, s, len); + str->len += len; + str->buf[str->len++] = '\0'; + + /* new node */ + _cleanup_free_ struct strbuf_node *node_child = NULL; + + node_child = new(struct strbuf_node, 1); + if (!node_child) + return -ENOMEM; + *node_child = (struct strbuf_node) { + .value_off = off, + .value_len = len, + }; + + /* extend array, add new entry, sort for bisection */ + child = reallocarray(node->children, node->children_count + 1, sizeof(struct strbuf_child_entry)); + if (!child) + return -ENOMEM; + + str->nodes_count++; + + node->children = child; + bubbleinsert(node, c, TAKE_PTR(node_child)); + + return off; +} diff --git a/src/basic/strbuf.h b/src/basic/strbuf.h new file mode 100644 index 0000000..6187c08 --- /dev/null +++ b/src/basic/strbuf.h @@ -0,0 +1,39 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +#include +#include +#include + +#include "macro.h" + +struct strbuf { + char *buf; + size_t len; + struct strbuf_node *root; + + size_t nodes_count; + size_t in_count; + size_t in_len; + size_t dedup_len; + size_t dedup_count; +}; + +struct strbuf_node { + size_t value_off; + size_t value_len; + + struct strbuf_child_entry *children; + uint8_t children_count; +}; + +struct strbuf_child_entry { + uint8_t c; + struct strbuf_node *child; +}; + +struct strbuf* strbuf_new(void); +ssize_t strbuf_add_string(struct strbuf *str, const char *s, size_t len); +void strbuf_complete(struct strbuf *str); +struct strbuf* strbuf_free(struct strbuf *str); +DEFINE_TRIVIAL_CLEANUP_FUNC(struct strbuf*, strbuf_free); diff --git a/src/basic/string-table.c b/src/basic/string-table.c new file mode 100644 index 0000000..3a63767 --- /dev/null +++ b/src/basic/string-table.c @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include "string-table.h" +#include "string-util.h" + +ssize_t string_table_lookup(const char * const *table, size_t len, const char *key) { + if (!key) + return -EINVAL; + + for (size_t i = 0; i < len; ++i) + if (streq_ptr(table[i], key)) + return (ssize_t) i; + + return -EINVAL; +} diff --git a/src/basic/string-table.h b/src/basic/string-table.h new file mode 100644 index 0000000..e3a26a6 --- /dev/null +++ b/src/basic/string-table.h @@ -0,0 +1,116 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#pragma once + +#include +#include +#include +#include + +#include "macro.h" +#include "parse-util.h" +#include "string-util.h" + +ssize_t string_table_lookup(const char * const *table, size_t len, const char *key); + +/* For basic lookup tables with strictly enumerated entries */ +#define _DEFINE_STRING_TABLE_LOOKUP_TO_STRING(name,type,scope) \ + scope const char *name##_to_string(type i) { \ + if (i < 0 || i >= (type) ELEMENTSOF(name##_table)) \ + return NULL; \ + return name##_table[i]; \ + } + +#define _DEFINE_STRING_TABLE_LOOKUP_FROM_STRING(name,type,scope) \ + scope type name##_from_string(const char *s) { \ + return (type) string_table_lookup(name##_table, ELEMENTSOF(name##_table), s); \ + } + +#define _DEFINE_STRING_TABLE_LOOKUP_FROM_STRING_WITH_BOOLEAN(name,type,yes,scope) \ + scope type name##_from_string(const char *s) { \ + if (!s) \ + return -EINVAL; \ + int b = parse_boolean(s); \ + if (b == 0) \ + return (type) 0; \ + if (b > 0) \ + return yes; \ + return (type) string_table_lookup(name##_table, ELEMENTSOF(name##_table), s); \ + } + +#define _DEFINE_STRING_TABLE_LOOKUP_TO_STRING_FALLBACK(name,type,max,scope) \ + scope int name##_to_string_alloc(type i, char **str) { \ + char *s; \ + if (i < 0 || i > max) \ + return -ERANGE; \ + if (i < (type) ELEMENTSOF(name##_table) && name##_table[i]) { \ + s = strdup(name##_table[i]); \ + if (!s) \ + return -ENOMEM; \ + } else { \ + if (asprintf(&s, "%i", i) < 0) \ + return -ENOMEM; \ + } \ + *str = s; \ + return 0; \ + } + +#define _DEFINE_STRING_TABLE_LOOKUP_FROM_STRING_FALLBACK(name,type,max,scope) \ + scope type name##_from_string(const char *s) { \ + unsigned u = 0; \ + type i; \ + if (!s) \ + return -EINVAL; \ + i = (type) string_table_lookup(name##_table, ELEMENTSOF(name##_table), s); \ + if (i >= 0) \ + return i; \ + if (safe_atou(s, &u) < 0) \ + return -EINVAL; \ + if (u > max) \ + return -EINVAL; \ + return (type) u; \ + } + +#define _DEFINE_STRING_TABLE_LOOKUP(name,type,scope) \ + _DEFINE_STRING_TABLE_LOOKUP_TO_STRING(name,type,scope) \ + _DEFINE_STRING_TABLE_LOOKUP_FROM_STRING(name,type,scope) + +#define _DEFINE_STRING_TABLE_LOOKUP_WITH_BOOLEAN(name,type,yes,scope) \ + _DEFINE_STRING_TABLE_LOOKUP_TO_STRING(name,type,scope) \ + _DEFINE_STRING_TABLE_LOOKUP_FROM_STRING_WITH_BOOLEAN(name,type,yes,scope) + +#define DEFINE_STRING_TABLE_LOOKUP(name,type) _DEFINE_STRING_TABLE_LOOKUP(name,type,) +#define DEFINE_STRING_TABLE_LOOKUP_TO_STRING(name,type) _DEFINE_STRING_TABLE_LOOKUP_TO_STRING(name,type,) +#define DEFINE_STRING_TABLE_LOOKUP_FROM_STRING(name,type) _DEFINE_STRING_TABLE_LOOKUP_FROM_STRING(name,type,) +#define DEFINE_PRIVATE_STRING_TABLE_LOOKUP(name,type) _DEFINE_STRING_TABLE_LOOKUP(name,type,static) +#define DEFINE_PRIVATE_STRING_TABLE_LOOKUP_TO_STRING(name,type) _DEFINE_STRING_TABLE_LOOKUP_TO_STRING(name,type,static) +#define DEFINE_PRIVATE_STRING_TABLE_LOOKUP_FROM_STRING(name,type) _DEFINE_STRING_TABLE_LOOKUP_FROM_STRING(name,type,static) + +#define DEFINE_STRING_TABLE_LOOKUP_WITH_BOOLEAN(name,type,yes) _DEFINE_STRING_TABLE_LOOKUP_WITH_BOOLEAN(name,type,yes,) +#define DEFINE_PRIVATE_STRING_TABLE_LOOKUP_WITH_BOOLEAN(name,type,yes) _DEFINE_STRING_TABLE_LOOKUP_WITH_BOOLEAN(name,type,yes,static) +#define DEFINE_PRIVATE_STRING_TABLE_LOOKUP_FROM_STRING_WITH_BOOLEAN(name,type,yes) \ + _DEFINE_STRING_TABLE_LOOKUP_FROM_STRING_WITH_BOOLEAN(name,type,yes,static) + +/* For string conversions where numbers are also acceptable */ +#define DEFINE_STRING_TABLE_LOOKUP_WITH_FALLBACK(name,type,max) \ + _DEFINE_STRING_TABLE_LOOKUP_TO_STRING_FALLBACK(name,type,max,) \ + _DEFINE_STRING_TABLE_LOOKUP_FROM_STRING_FALLBACK(name,type,max,) + +#define DEFINE_PRIVATE_STRING_TABLE_LOOKUP_TO_STRING_FALLBACK(name,type,max) \ + _DEFINE_STRING_TABLE_LOOKUP_TO_STRING_FALLBACK(name,type,max,static) +#define DEFINE_PRIVATE_STRING_TABLE_LOOKUP_FROM_STRING_FALLBACK(name,type,max) \ + _DEFINE_STRING_TABLE_LOOKUP_FROM_STRING_FALLBACK(name,type,max,static) + +#define DUMP_STRING_TABLE(name,type,max) \ + do { \ + flockfile(stdout); \ + for (type _k = 0; _k < (max); _k++) { \ + const char *_t; \ + _t = name##_to_string(_k); \ + if (!_t) \ + continue; \ + fputs_unlocked(_t, stdout); \ + fputc_unlocked('\n', stdout); \ + } \ + funlockfile(stdout); \ + } while (false) diff --git a/src/basic/string-util.c b/src/basic/string-util.c new file mode 100644 index 0000000..17d35fe --- /dev/null +++ b/src/basic/string-util.c @@ -0,0 +1,1204 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include +#include +#include +#include +#include + +#include "alloc-util.h" +#include "escape.h" +#include "extract-word.h" +#include "fileio.h" +#include "gunicode.h" +#include "locale-util.h" +#include "macro.h" +#include "memory-util.h" +#include "string-util.h" +#include "strv.h" +#include "terminal-util.h" +#include "utf8.h" +#include "util.h" + +char* first_word(const char *s, const char *word) { + size_t sl, wl; + const char *p; + + assert(s); + assert(word); + + /* Checks if the string starts with the specified word, either + * followed by NUL or by whitespace. Returns a pointer to the + * NUL or the first character after the whitespace. */ + + sl = strlen(s); + wl = strlen(word); + + if (sl < wl) + return NULL; + + if (wl == 0) + return (char*) s; + + if (memcmp(s, word, wl) != 0) + return NULL; + + p = s + wl; + if (*p == 0) + return (char*) p; + + if (!strchr(WHITESPACE, *p)) + return NULL; + + p += strspn(p, WHITESPACE); + return (char*) p; +} + +char *strnappend(const char *s, const char *suffix, size_t b) { + size_t a; + char *r; + + if (!s && !suffix) + return strdup(""); + + if (!s) + return strndup(suffix, b); + + if (!suffix) + return strdup(s); + + assert(s); + assert(suffix); + + a = strlen(s); + if (b > SIZE_MAX - a) + return NULL; + + r = new(char, a+b+1); + if (!r) + return NULL; + + memcpy(r, s, a); + memcpy(r+a, suffix, b); + r[a+b] = 0; + + return r; +} + +char *strjoin_real(const char *x, ...) { + va_list ap; + size_t l = 1; + char *r, *p; + + va_start(ap, x); + for (const char *t = x; t; t = va_arg(ap, const char *)) { + size_t n; + + n = strlen(t); + if (n > SIZE_MAX - l) { + va_end(ap); + return NULL; + } + l += n; + } + va_end(ap); + + p = r = new(char, l); + if (!r) + return NULL; + + va_start(ap, x); + for (const char *t = x; t; t = va_arg(ap, const char *)) + p = stpcpy(p, t); + va_end(ap); + + *p = 0; + + return r; +} + +char *strstrip(char *s) { + if (!s) + return NULL; + + /* Drops trailing whitespace. Modifies the string in place. Returns pointer to first non-space character */ + + return delete_trailing_chars(skip_leading_chars(s, WHITESPACE), WHITESPACE); +} + +char *delete_chars(char *s, const char *bad) { + char *f, *t; + + /* Drops all specified bad characters, regardless where in the string */ + + if (!s) + return NULL; + + if (!bad) + bad = WHITESPACE; + + for (f = s, t = s; *f; f++) { + if (strchr(bad, *f)) + continue; + + *(t++) = *f; + } + + *t = 0; + + return s; +} + +char *delete_trailing_chars(char *s, const char *bad) { + char *c = s; + + /* Drops all specified bad characters, at the end of the string */ + + if (!s) + return NULL; + + if (!bad) + bad = WHITESPACE; + + for (char *p = s; *p; p++) + if (!strchr(bad, *p)) + c = p + 1; + + *c = 0; + + return s; +} + +char *truncate_nl(char *s) { + assert(s); + + s[strcspn(s, NEWLINE)] = 0; + return s; +} + +char ascii_tolower(char x) { + + if (x >= 'A' && x <= 'Z') + return x - 'A' + 'a'; + + return x; +} + +char ascii_toupper(char x) { + + if (x >= 'a' && x <= 'z') + return x - 'a' + 'A'; + + return x; +} + +char *ascii_strlower(char *t) { + assert(t); + + for (char *p = t; *p; p++) + *p = ascii_tolower(*p); + + return t; +} + +char *ascii_strupper(char *t) { + assert(t); + + for (char *p = t; *p; p++) + *p = ascii_toupper(*p); + + return t; +} + +char *ascii_strlower_n(char *t, size_t n) { + if (n <= 0) + return t; + + for (size_t i = 0; i < n; i++) + t[i] = ascii_tolower(t[i]); + + return t; +} + +int ascii_strcasecmp_n(const char *a, const char *b, size_t n) { + + for (; n > 0; a++, b++, n--) { + int x, y; + + x = (int) (uint8_t) ascii_tolower(*a); + y = (int) (uint8_t) ascii_tolower(*b); + + if (x != y) + return x - y; + } + + return 0; +} + +int ascii_strcasecmp_nn(const char *a, size_t n, const char *b, size_t m) { + int r; + + r = ascii_strcasecmp_n(a, b, MIN(n, m)); + if (r != 0) + return r; + + return CMP(n, m); +} + +bool chars_intersect(const char *a, const char *b) { + /* Returns true if any of the chars in a are in b. */ + for (const char *p = a; *p; p++) + if (strchr(b, *p)) + return true; + + return false; +} + +bool string_has_cc(const char *p, const char *ok) { + assert(p); + + /* + * Check if a string contains control characters. If 'ok' is + * non-NULL it may be a string containing additional CCs to be + * considered OK. + */ + + for (const char *t = p; *t; t++) { + if (ok && strchr(ok, *t)) + continue; + + if (char_is_cc(*t)) + return true; + } + + return false; +} + +static int write_ellipsis(char *buf, bool unicode) { + if (unicode || is_locale_utf8()) { + buf[0] = 0xe2; /* tri-dot ellipsis: … */ + buf[1] = 0x80; + buf[2] = 0xa6; + } else { + buf[0] = '.'; + buf[1] = '.'; + buf[2] = '.'; + } + + return 3; +} + +static char *ascii_ellipsize_mem(const char *s, size_t old_length, size_t new_length, unsigned percent) { + size_t x, need_space, suffix_len; + char *t; + + assert(s); + assert(percent <= 100); + assert(new_length != SIZE_MAX); + + if (old_length <= new_length) + return strndup(s, old_length); + + /* Special case short ellipsations */ + switch (new_length) { + + case 0: + return strdup(""); + + case 1: + if (is_locale_utf8()) + return strdup("…"); + else + return strdup("."); + + case 2: + if (!is_locale_utf8()) + return strdup(".."); + + break; + + default: + break; + } + + /* Calculate how much space the ellipsis will take up. If we are in UTF-8 mode we only need space for one + * character ("…"), otherwise for three characters ("..."). Note that in both cases we need 3 bytes of storage, + * either for the UTF-8 encoded character or for three ASCII characters. */ + need_space = is_locale_utf8() ? 1 : 3; + + t = new(char, new_length+3); + if (!t) + return NULL; + + assert(new_length >= need_space); + + x = ((new_length - need_space) * percent + 50) / 100; + assert(x <= new_length - need_space); + + memcpy(t, s, x); + write_ellipsis(t + x, false); + suffix_len = new_length - x - need_space; + memcpy(t + x + 3, s + old_length - suffix_len, suffix_len); + *(t + x + 3 + suffix_len) = '\0'; + + return t; +} + +char *ellipsize_mem(const char *s, size_t old_length, size_t new_length, unsigned percent) { + size_t x, k, len, len2; + const char *i, *j; + char *e; + int r; + + /* Note that 'old_length' refers to bytes in the string, while 'new_length' refers to character cells taken up + * on screen. This distinction doesn't matter for ASCII strings, but it does matter for non-ASCII UTF-8 + * strings. + * + * Ellipsation is done in a locale-dependent way: + * 1. If the string passed in is fully ASCII and the current locale is not UTF-8, three dots are used ("...") + * 2. Otherwise, a unicode ellipsis is used ("…") + * + * In other words: you'll get a unicode ellipsis as soon as either the string contains non-ASCII characters or + * the current locale is UTF-8. + */ + + assert(s); + assert(percent <= 100); + + if (new_length == SIZE_MAX) + return strndup(s, old_length); + + if (new_length == 0) + return strdup(""); + + /* If no multibyte characters use ascii_ellipsize_mem for speed */ + if (ascii_is_valid_n(s, old_length)) + return ascii_ellipsize_mem(s, old_length, new_length, percent); + + x = ((new_length - 1) * percent) / 100; + assert(x <= new_length - 1); + + k = 0; + for (i = s; i < s + old_length; i = utf8_next_char(i)) { + char32_t c; + int w; + + r = utf8_encoded_to_unichar(i, &c); + if (r < 0) + return NULL; + + w = unichar_iswide(c) ? 2 : 1; + if (k + w <= x) + k += w; + else + break; + } + + for (j = s + old_length; j > i; ) { + char32_t c; + int w; + const char *jj; + + jj = utf8_prev_char(j); + r = utf8_encoded_to_unichar(jj, &c); + if (r < 0) + return NULL; + + w = unichar_iswide(c) ? 2 : 1; + if (k + w <= new_length) { + k += w; + j = jj; + } else + break; + } + assert(i <= j); + + /* we don't actually need to ellipsize */ + if (i == j) + return memdup_suffix0(s, old_length); + + /* make space for ellipsis, if possible */ + if (j < s + old_length) + j = utf8_next_char(j); + else if (i > s) + i = utf8_prev_char(i); + + len = i - s; + len2 = s + old_length - j; + e = new(char, len + 3 + len2 + 1); + if (!e) + return NULL; + + /* + printf("old_length=%zu new_length=%zu x=%zu len=%u len2=%u k=%u\n", + old_length, new_length, x, len, len2, k); + */ + + memcpy(e, s, len); + write_ellipsis(e + len, true); + memcpy(e + len + 3, j, len2); + *(e + len + 3 + len2) = '\0'; + + return e; +} + +char *cellescape(char *buf, size_t len, const char *s) { + /* Escape and ellipsize s into buffer buf of size len. Only non-control ASCII + * characters are copied as they are, everything else is escaped. The result + * is different then if escaping and ellipsization was performed in two + * separate steps, because each sequence is either stored in full or skipped. + * + * This function should be used for logging about strings which expected to + * be plain ASCII in a safe way. + * + * An ellipsis will be used if s is too long. It was always placed at the + * very end. + */ + + size_t i = 0, last_char_width[4] = {}, k = 0; + + assert(len > 0); /* at least a terminating NUL */ + + for (;;) { + char four[4]; + int w; + + if (*s == 0) /* terminating NUL detected? then we are done! */ + goto done; + + w = cescape_char(*s, four); + if (i + w + 1 > len) /* This character doesn't fit into the buffer anymore? In that case let's + * ellipsize at the previous location */ + break; + + /* OK, there was space, let's add this escaped character to the buffer */ + memcpy(buf + i, four, w); + i += w; + + /* And remember its width in the ring buffer */ + last_char_width[k] = w; + k = (k + 1) % 4; + + s++; + } + + /* Ellipsation is necessary. This means we might need to truncate the string again to make space for 4 + * characters ideally, but the buffer is shorter than that in the first place take what we can get */ + for (size_t j = 0; j < ELEMENTSOF(last_char_width); j++) { + + if (i + 4 <= len) /* nice, we reached our space goal */ + break; + + k = k == 0 ? 3 : k - 1; + if (last_char_width[k] == 0) /* bummer, we reached the beginning of the strings */ + break; + + assert(i >= last_char_width[k]); + i -= last_char_width[k]; + } + + if (i + 4 <= len) /* yay, enough space */ + i += write_ellipsis(buf + i, false); + else if (i + 3 <= len) { /* only space for ".." */ + buf[i++] = '.'; + buf[i++] = '.'; + } else if (i + 2 <= len) /* only space for a single "." */ + buf[i++] = '.'; + else + assert(i + 1 <= len); + + done: + buf[i] = '\0'; + return buf; +} + +char* strshorten(char *s, size_t l) { + assert(s); + + if (strnlen(s, l+1) > l) + s[l] = 0; + + return s; +} + +int strgrowpad0(char **s, size_t l) { + assert(s); + + char *q = realloc(*s, l); + if (!q) + return -ENOMEM; + *s = q; + + size_t sz = strlen(*s); + memzero(*s + sz, l - sz); + return 0; +} + +char *strreplace(const char *text, const char *old_string, const char *new_string) { + size_t l, old_len, new_len; + char *t, *ret = NULL; + const char *f; + + assert(old_string); + assert(new_string); + + if (!text) + return NULL; + + old_len = strlen(old_string); + new_len = strlen(new_string); + + l = strlen(text); + if (!GREEDY_REALLOC(ret, l+1)) + return NULL; + + f = text; + t = ret; + while (*f) { + size_t d, nl; + + if (!startswith(f, old_string)) { + *(t++) = *(f++); + continue; + } + + d = t - ret; + nl = l - old_len + new_len; + + if (!GREEDY_REALLOC(ret, nl + 1)) + return mfree(ret); + + l = nl; + t = ret + d; + + t = stpcpy(t, new_string); + f += old_len; + } + + *t = 0; + return ret; +} + +static void advance_offsets( + ssize_t diff, + size_t offsets[2], /* note: we can't use [static 2] here, since this may be NULL */ + size_t shift[static 2], + size_t size) { + + if (!offsets) + return; + + assert(shift); + + if ((size_t) diff < offsets[0]) + shift[0] += size; + if ((size_t) diff < offsets[1]) + shift[1] += size; +} + +char *strip_tab_ansi(char **ibuf, size_t *_isz, size_t highlight[2]) { + const char *begin = NULL; + enum { + STATE_OTHER, + STATE_ESCAPE, + STATE_CSI, + STATE_CSO, + } state = STATE_OTHER; + char *obuf = NULL; + size_t osz = 0, isz, shift[2] = {}, n_carriage_returns = 0; + FILE *f; + + assert(ibuf); + assert(*ibuf); + + /* This does three things: + * + * 1. Replaces TABs by 8 spaces + * 2. Strips ANSI color sequences (a subset of CSI), i.e. ESC '[' … 'm' sequences + * 3. Strips ANSI operating system sequences (CSO), i.e. ESC ']' … BEL sequences + * 4. Strip trailing \r characters (since they would "move the cursor", but have no + * other effect). + * + * Everything else will be left as it is. In particular other ANSI sequences are left as they are, as + * are any other special characters. Truncated ANSI sequences are left-as is too. This call is + * supposed to suppress the most basic formatting noise, but nothing else. + * + * Why care for CSO sequences? Well, to undo what terminal_urlify() and friends generate. */ + + isz = _isz ? *_isz : strlen(*ibuf); + + /* Note we turn off internal locking on f for performance reasons. It's safe to do so since we + * created f here and it doesn't leave our scope. */ + f = open_memstream_unlocked(&obuf, &osz); + if (!f) + return NULL; + + for (const char *i = *ibuf; i < *ibuf + isz + 1; i++) { + + switch (state) { + + case STATE_OTHER: + if (i >= *ibuf + isz) /* EOT */ + break; + + if (*i == '\r') { + n_carriage_returns++; + break; + } else if (*i == '\n') + /* Ignore carriage returns before new line */ + n_carriage_returns = 0; + for (; n_carriage_returns > 0; n_carriage_returns--) + fputc('\r', f); + + if (*i == '\x1B') + state = STATE_ESCAPE; + else if (*i == '\t') { + fputs(" ", f); + advance_offsets(i - *ibuf, highlight, shift, 7); + } else + fputc(*i, f); + + break; + + case STATE_ESCAPE: + assert(n_carriage_returns == 0); + + if (i >= *ibuf + isz) { /* EOT */ + fputc('\x1B', f); + advance_offsets(i - *ibuf, highlight, shift, 1); + break; + } else if (*i == '[') { /* ANSI CSI */ + state = STATE_CSI; + begin = i + 1; + } else if (*i == ']') { /* ANSI CSO */ + state = STATE_CSO; + begin = i + 1; + } else { + fputc('\x1B', f); + fputc(*i, f); + advance_offsets(i - *ibuf, highlight, shift, 1); + state = STATE_OTHER; + } + + break; + + case STATE_CSI: + assert(n_carriage_returns == 0); + + if (i >= *ibuf + isz || /* EOT … */ + !strchr("01234567890;m", *i)) { /* … or invalid chars in sequence */ + fputc('\x1B', f); + fputc('[', f); + advance_offsets(i - *ibuf, highlight, shift, 2); + state = STATE_OTHER; + i = begin-1; + } else if (*i == 'm') + state = STATE_OTHER; + + break; + + case STATE_CSO: + assert(n_carriage_returns == 0); + + if (i >= *ibuf + isz || /* EOT … */ + (*i != '\a' && (uint8_t) *i < 32U) || (uint8_t) *i > 126U) { /* … or invalid chars in sequence */ + fputc('\x1B', f); + fputc(']', f); + advance_offsets(i - *ibuf, highlight, shift, 2); + state = STATE_OTHER; + i = begin-1; + } else if (*i == '\a') + state = STATE_OTHER; + + break; + } + } + + if (fflush_and_check(f) < 0) { + fclose(f); + return mfree(obuf); + } + fclose(f); + + free_and_replace(*ibuf, obuf); + + if (_isz) + *_isz = osz; + + if (highlight) { + highlight[0] += shift[0]; + highlight[1] += shift[1]; + } + + return *ibuf; +} + +char *strextend_with_separator_internal(char **x, const char *separator, ...) { + size_t f, l, l_separator; + bool need_separator; + char *nr, *p; + va_list ap; + + assert(x); + + l = f = strlen_ptr(*x); + + need_separator = !isempty(*x); + l_separator = strlen_ptr(separator); + + va_start(ap, separator); + for (;;) { + const char *t; + size_t n; + + t = va_arg(ap, const char *); + if (!t) + break; + + n = strlen(t); + + if (need_separator) + n += l_separator; + + if (n >= SIZE_MAX - l) { + va_end(ap); + return NULL; + } + + l += n; + need_separator = true; + } + va_end(ap); + + need_separator = !isempty(*x); + + nr = realloc(*x, GREEDY_ALLOC_ROUND_UP(l+1)); + if (!nr) + return NULL; + + *x = nr; + p = nr + f; + + va_start(ap, separator); + for (;;) { + const char *t; + + t = va_arg(ap, const char *); + if (!t) + break; + + if (need_separator && separator) + p = stpcpy(p, separator); + + p = stpcpy(p, t); + + need_separator = true; + } + va_end(ap); + + assert(p == nr + l); + + *p = 0; + + return p; +} + +int strextendf_with_separator(char **x, const char *separator, const char *format, ...) { + size_t m, a, l_separator; + va_list ap; + int l; + + /* Appends a formatted string to the specified string. Don't use this in inner loops, since then + * we'll spend a tonload of time in determining the length of the string passed in, over and over + * again. */ + + assert(x); + assert(format); + + l_separator = isempty(*x) ? 0 : strlen_ptr(separator); + + /* Let's try to use the allocated buffer, if there's room at the end still. Otherwise let's extend by 64 chars. */ + if (*x) { + m = strlen(*x); + a = MALLOC_SIZEOF_SAFE(*x); + assert(a >= m + 1); + } else + m = a = 0; + + if (a - m < 17 + l_separator) { /* if there's less than 16 chars space, then enlarge the buffer first */ + char *n; + + if (_unlikely_(l_separator > SIZE_MAX - 64)) /* overflow check #1 */ + return -ENOMEM; + if (_unlikely_(m > SIZE_MAX - 64 - l_separator)) /* overflow check #2 */ + return -ENOMEM; + + n = realloc(*x, m + 64 + l_separator); + if (!n) + return -ENOMEM; + + *x = n; + a = MALLOC_SIZEOF_SAFE(*x); + } + + /* Now, let's try to format the string into it */ + memcpy_safe(*x + m, separator, l_separator); + va_start(ap, format); + l = vsnprintf(*x + m + l_separator, a - m - l_separator, format, ap); + va_end(ap); + + assert(l >= 0); + + if ((size_t) l < a - m - l_separator) { + char *n; + + /* Nice! This worked. We are done. But first, let's return the extra space we don't + * need. This should be a cheap operation, since we only lower the allocation size here, + * never increase. */ + n = realloc(*x, m + (size_t) l + l_separator + 1); + if (n) + *x = n; + } else { + char *n; + + /* Wasn't enough. Then let's allocate exactly what we need. */ + + if (_unlikely_((size_t) l > SIZE_MAX - (l_separator + 1))) /* overflow check #1 */ + goto oom; + if (_unlikely_(m > SIZE_MAX - ((size_t) l + l_separator + 1))) /* overflow check #2 */ + goto oom; + + a = m + (size_t) l + l_separator + 1; + n = realloc(*x, a); + if (!n) + goto oom; + *x = n; + + va_start(ap, format); + l = vsnprintf(*x + m + l_separator, a - m - l_separator, format, ap); + va_end(ap); + + assert((size_t) l < a - m - l_separator); + } + + return 0; + +oom: + /* truncate the bytes added after the first vsnprintf() attempt again */ + (*x)[m] = 0; + return -ENOMEM; +} + +char *strrep(const char *s, unsigned n) { + char *r, *p; + size_t l; + + assert(s); + + l = strlen(s); + p = r = malloc(l * n + 1); + if (!r) + return NULL; + + for (unsigned i = 0; i < n; i++) + p = stpcpy(p, s); + + *p = 0; + return r; +} + +int split_pair(const char *s, const char *sep, char **l, char **r) { + char *x, *a, *b; + + assert(s); + assert(sep); + assert(l); + assert(r); + + if (isempty(sep)) + return -EINVAL; + + x = strstr(s, sep); + if (!x) + return -EINVAL; + + a = strndup(s, x - s); + if (!a) + return -ENOMEM; + + b = strdup(x + strlen(sep)); + if (!b) { + free(a); + return -ENOMEM; + } + + *l = a; + *r = b; + + return 0; +} + +int free_and_strdup(char **p, const char *s) { + char *t; + + assert(p); + + /* Replaces a string pointer with a strdup()ed new string, + * possibly freeing the old one. */ + + if (streq_ptr(*p, s)) + return 0; + + if (s) { + t = strdup(s); + if (!t) + return -ENOMEM; + } else + t = NULL; + + free(*p); + *p = t; + + return 1; +} + +int free_and_strndup(char **p, const char *s, size_t l) { + char *t; + + assert(p); + assert(s || l == 0); + + /* Replaces a string pointer with a strndup()ed new string, + * freeing the old one. */ + + if (!*p && !s) + return 0; + + if (*p && s && strneq(*p, s, l) && (l > strlen(*p) || (*p)[l] == '\0')) + return 0; + + if (s) { + t = strndup(s, l); + if (!t) + return -ENOMEM; + } else + t = NULL; + + free_and_replace(*p, t); + return 1; +} + +bool string_is_safe(const char *p) { + if (!p) + return false; + + /* Checks if the specified string contains no quotes or control characters */ + + for (const char *t = p; *t; t++) { + if (*t > 0 && *t < ' ') /* no control characters */ + return false; + + if (strchr(QUOTES "\\\x7f", *t)) + return false; + } + + return true; +} + +char* string_erase(char *x) { + if (!x) + return NULL; + + /* A delicious drop of snake-oil! To be called on memory where we stored passphrases or so, after we + * used them. */ + explicit_bzero_safe(x, strlen(x)); + return x; +} + +int string_truncate_lines(const char *s, size_t n_lines, char **ret) { + const char *p = s, *e = s; + bool truncation_applied = false; + char *copy; + size_t n = 0; + + assert(s); + + /* Truncate after the specified number of lines. Returns > 0 if a truncation was applied or == 0 if + * there were fewer lines in the string anyway. Trailing newlines on input are ignored, and not + * generated either. */ + + for (;;) { + size_t k; + + k = strcspn(p, "\n"); + + if (p[k] == 0) { + if (k == 0) /* final empty line */ + break; + + if (n >= n_lines) /* above threshold */ + break; + + e = p + k; /* last line to include */ + break; + } + + assert(p[k] == '\n'); + + if (n >= n_lines) + break; + + if (k > 0) + e = p + k; + + p += k + 1; + n++; + } + + /* e points after the last character we want to keep */ + if (isempty(e)) + copy = strdup(s); + else { + if (!in_charset(e, "\n")) /* We only consider things truncated if we remove something that + * isn't a new-line or a series of them */ + truncation_applied = true; + + copy = strndup(s, e - s); + } + if (!copy) + return -ENOMEM; + + *ret = copy; + return truncation_applied; +} + +int string_extract_line(const char *s, size_t i, char **ret) { + const char *p = s; + size_t c = 0; + + /* Extract the i'nth line from the specified string. Returns > 0 if there are more lines after that, + * and == 0 if we are looking at the last line or already beyond the last line. As special + * optimization, if the first line is requested and the string only consists of one line we return + * NULL, indicating the input string should be used as is, and avoid a memory allocation for a very + * common case. */ + + for (;;) { + const char *q; + + q = strchr(p, '\n'); + if (i == c) { + /* The line we are looking for! */ + + if (q) { + char *m; + + m = strndup(p, q - p); + if (!m) + return -ENOMEM; + + *ret = m; + return !isempty(q + 1); /* more coming? */ + } else { + if (p == s) + *ret = NULL; /* Just use the input string */ + else { + char *m; + + m = strdup(p); + if (!m) + return -ENOMEM; + + *ret = m; + } + + return 0; /* The end */ + } + } + + if (!q) { + char *m; + + /* No more lines, return empty line */ + + m = strdup(""); + if (!m) + return -ENOMEM; + + *ret = m; + return 0; /* The end */ + } + + p = q + 1; + c++; + } +} + +int string_contains_word_strv(const char *string, const char *separators, char **words, const char **ret_word) { + /* In the default mode with no separators specified, we split on whitespace and + * don't coalesce separators. */ + const ExtractFlags flags = separators ? EXTRACT_DONT_COALESCE_SEPARATORS : 0; + + const char *found = NULL; + + for (const char *p = string;;) { + _cleanup_free_ char *w = NULL; + int r; + + r = extract_first_word(&p, &w, separators, flags); + if (r < 0) + return r; + if (r == 0) + break; + + found = strv_find(words, w); + if (found) + break; + } + + if (ret_word) + *ret_word = found; + return !!found; +} + +bool streq_skip_trailing_chars(const char *s1, const char *s2, const char *ok) { + if (!s1 && !s2) + return true; + if (!s1 || !s2) + return false; + + if (!ok) + ok = WHITESPACE; + + for (; *s1 && *s2; s1++, s2++) + if (*s1 != *s2) + break; + + return in_charset(s1, ok) && in_charset(s2, ok); +} + +char *string_replace_char(char *str, char old_char, char new_char) { + assert(str); + assert(old_char != '\0'); + assert(new_char != '\0'); + assert(old_char != new_char); + + for (char *p = strchr(str, old_char); p; p = strchr(p + 1, old_char)) + *p = new_char; + + return str; +} + +size_t strspn_from_end(const char *str, const char *accept) { + size_t n = 0; + + if (isempty(str)) + return 0; + + if (isempty(accept)) + return 0; + + for (const char *p = str + strlen(str); p > str && strchr(accept, p[-1]); p--) + n++; + + return n; +} diff --git a/src/basic/string-util.h b/src/basic/string-util.h new file mode 100644 index 0000000..913a96f --- /dev/null +++ b/src/basic/string-util.h @@ -0,0 +1,245 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +#include +#include +#include + +#include "alloc-util.h" +#include "macro.h" +#include "string-util-fundamental.h" + +/* What is interpreted as whitespace? */ +#define WHITESPACE " \t\n\r" +#define NEWLINE "\n\r" +#define QUOTES "\"\'" +#define COMMENTS "#;" +#define GLOB_CHARS "*?[" +#define DIGITS "0123456789" +#define LOWERCASE_LETTERS "abcdefghijklmnopqrstuvwxyz" +#define UPPERCASE_LETTERS "ABCDEFGHIJKLMNOPQRSTUVWXYZ" +#define LETTERS LOWERCASE_LETTERS UPPERCASE_LETTERS +#define ALPHANUMERICAL LETTERS DIGITS +#define HEXDIGITS DIGITS "abcdefABCDEF" +#define LOWERCASE_HEXDIGITS DIGITS "abcdef" + +static inline char* strstr_ptr(const char *haystack, const char *needle) { + if (!haystack || !needle) + return NULL; + return strstr(haystack, needle); +} + +static inline char *strstrafter(const char *haystack, const char *needle) { + char *p; + + /* Returns NULL if not found, or pointer to first character after needle if found */ + + p = strstr_ptr(haystack, needle); + if (!p) + return NULL; + + return p + strlen(needle); +} + +static inline const char* strnull(const char *s) { + return s ?: "(null)"; +} + +static inline const char *strna(const char *s) { + return s ?: "n/a"; +} + +static inline const char* true_false(bool b) { + return b ? "true" : "false"; +} + +static inline const char* plus_minus(bool b) { + return b ? "+" : "-"; +} + +static inline const char* one_zero(bool b) { + return b ? "1" : "0"; +} + +static inline const char* enable_disable(bool b) { + return b ? "enable" : "disable"; +} + +static inline const char *empty_to_null(const char *p) { + return isempty(p) ? NULL : p; +} + +static inline const char *empty_to_na(const char *p) { + return isempty(p) ? "n/a" : p; +} + +static inline const char *empty_to_dash(const char *str) { + return isempty(str) ? "-" : str; +} + +static inline bool empty_or_dash(const char *str) { + return !str || + str[0] == 0 || + (str[0] == '-' && str[1] == 0); +} + +static inline const char *empty_or_dash_to_null(const char *p) { + return empty_or_dash(p) ? NULL : p; +} + +char *first_word(const char *s, const char *word) _pure_; + +char *strnappend(const char *s, const char *suffix, size_t length); + +char *strjoin_real(const char *x, ...) _sentinel_; +#define strjoin(a, ...) strjoin_real((a), __VA_ARGS__, NULL) + +#define strjoina(a, ...) \ + ({ \ + const char *_appendees_[] = { a, __VA_ARGS__ }; \ + char *_d_, *_p_; \ + size_t _len_ = 0; \ + size_t _i_; \ + for (_i_ = 0; _i_ < ELEMENTSOF(_appendees_) && _appendees_[_i_]; _i_++) \ + _len_ += strlen(_appendees_[_i_]); \ + _p_ = _d_ = newa(char, _len_ + 1); \ + for (_i_ = 0; _i_ < ELEMENTSOF(_appendees_) && _appendees_[_i_]; _i_++) \ + _p_ = stpcpy(_p_, _appendees_[_i_]); \ + *_p_ = 0; \ + _d_; \ + }) + +char *strstrip(char *s); +char *delete_chars(char *s, const char *bad); +char *delete_trailing_chars(char *s, const char *bad); +char *truncate_nl(char *s); + +static inline char *skip_leading_chars(const char *s, const char *bad) { + if (!s) + return NULL; + + if (!bad) + bad = WHITESPACE; + + return (char*) s + strspn(s, bad); +} + +char ascii_tolower(char x); +char *ascii_strlower(char *s); +char *ascii_strlower_n(char *s, size_t n); + +char ascii_toupper(char x); +char *ascii_strupper(char *s); + +int ascii_strcasecmp_n(const char *a, const char *b, size_t n); +int ascii_strcasecmp_nn(const char *a, size_t n, const char *b, size_t m); + +bool chars_intersect(const char *a, const char *b) _pure_; + +static inline bool _pure_ in_charset(const char *s, const char* charset) { + assert(s); + assert(charset); + return s[strspn(s, charset)] == '\0'; +} + +static inline bool char_is_cc(char p) { + /* char is unsigned on some architectures, e.g. aarch64. So, compiler may warn the condition + * p >= 0 is always true. See #19543. Hence, let's cast to unsigned before the comparison. Note + * that the cast in the right hand side is redundant, as according to the C standard, compilers + * automatically cast a signed value to unsigned when comparing with an unsigned variable. Just + * for safety and readability. */ + return (uint8_t) p < (uint8_t) ' ' || p == 127; +} +bool string_has_cc(const char *p, const char *ok) _pure_; + +char *ellipsize_mem(const char *s, size_t old_length_bytes, size_t new_length_columns, unsigned percent); +static inline char *ellipsize(const char *s, size_t length, unsigned percent) { + return ellipsize_mem(s, strlen(s), length, percent); +} + +char *cellescape(char *buf, size_t len, const char *s); + +/* This limit is arbitrary, enough to give some idea what the string contains */ +#define CELLESCAPE_DEFAULT_LENGTH 64 + +char* strshorten(char *s, size_t l); + +int strgrowpad0(char **s, size_t l); + +char *strreplace(const char *text, const char *old_string, const char *new_string); + +char *strip_tab_ansi(char **ibuf, size_t *_isz, size_t highlight[2]); + +char *strextend_with_separator_internal(char **x, const char *separator, ...) _sentinel_; +#define strextend_with_separator(x, separator, ...) strextend_with_separator_internal(x, separator, __VA_ARGS__, NULL) +#define strextend(x, ...) strextend_with_separator_internal(x, NULL, __VA_ARGS__, NULL) + +int strextendf_with_separator(char **x, const char *separator, const char *format, ...) _printf_(3,4); +#define strextendf(x, ...) strextendf_with_separator(x, NULL, __VA_ARGS__) + +char *strrep(const char *s, unsigned n); + +int split_pair(const char *s, const char *sep, char **l, char **r); + +int free_and_strdup(char **p, const char *s); +static inline int free_and_strdup_warn(char **p, const char *s) { + int r; + + r = free_and_strdup(p, s); + if (r < 0) + return log_oom(); + return r; +} +int free_and_strndup(char **p, const char *s, size_t l); + +bool string_is_safe(const char *p) _pure_; + +DISABLE_WARNING_STRINGOP_TRUNCATION; +static inline void strncpy_exact(char *buf, const char *src, size_t buf_len) { + strncpy(buf, src, buf_len); +} +REENABLE_WARNING; + +/* Like startswith_no_case(), but operates on arbitrary memory blocks. + * It works only for ASCII strings. + */ +static inline void *memory_startswith_no_case(const void *p, size_t sz, const char *token) { + assert(token); + + size_t n = strlen(token); + if (sz < n) + return NULL; + + assert(p); + + for (size_t i = 0; i < n; i++) + if (ascii_tolower(((char *)p)[i]) != ascii_tolower(token[i])) + return NULL; + + return (uint8_t*) p + n; +} + +static inline char* str_realloc(char *p) { + /* Reallocate *p to actual size. Ignore failure, and return the original string on error. */ + + if (!p) + return NULL; + + return realloc(p, strlen(p) + 1) ?: p; +} + +char* string_erase(char *x); + +int string_truncate_lines(const char *s, size_t n_lines, char **ret); +int string_extract_line(const char *s, size_t i, char **ret); + +int string_contains_word_strv(const char *string, const char *separators, char **words, const char **ret_word); +static inline int string_contains_word(const char *string, const char *separators, const char *word) { + return string_contains_word_strv(string, separators, STRV_MAKE(word), NULL); +} + +bool streq_skip_trailing_chars(const char *s1, const char *s2, const char *ok); + +char *string_replace_char(char *str, char old_char, char new_char); + +size_t strspn_from_end(const char *str, const char *accept); diff --git a/src/basic/strv.c b/src/basic/strv.c new file mode 100644 index 0000000..3616134 --- /dev/null +++ b/src/basic/strv.c @@ -0,0 +1,997 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include +#include +#include +#include +#include + +#include "alloc-util.h" +#include "escape.h" +#include "extract-word.h" +#include "fileio.h" +#include "memory-util.h" +#include "nulstr-util.h" +#include "sort-util.h" +#include "string-util.h" +#include "strv.h" + +char* strv_find(char * const *l, const char *name) { + assert(name); + + STRV_FOREACH(i, l) + if (streq(*i, name)) + return *i; + + return NULL; +} + +char* strv_find_case(char * const *l, const char *name) { + assert(name); + + STRV_FOREACH(i, l) + if (strcaseeq(*i, name)) + return *i; + + return NULL; +} + +char* strv_find_prefix(char * const *l, const char *name) { + assert(name); + + STRV_FOREACH(i, l) + if (startswith(*i, name)) + return *i; + + return NULL; +} + +char* strv_find_startswith(char * const *l, const char *name) { + assert(name); + + /* Like strv_find_prefix, but actually returns only the + * suffix, not the whole item */ + + STRV_FOREACH(i, l) { + char *e; + + e = startswith(*i, name); + if (e) + return e; + } + + return NULL; +} + +char** strv_free(char **l) { + STRV_FOREACH(k, l) + free(*k); + + return mfree(l); +} + +char** strv_free_erase(char **l) { + STRV_FOREACH(i, l) + erase_and_freep(i); + + return mfree(l); +} + +char** strv_copy(char * const *l) { + _cleanup_strv_free_ char **result = NULL; + char **k; + + result = new(char*, strv_length(l) + 1); + if (!result) + return NULL; + + k = result; + STRV_FOREACH(i, l) { + *k = strdup(*i); + if (!*k) + return NULL; + k++; + } + + *k = NULL; + return TAKE_PTR(result); +} + +int strv_copy_unless_empty(char * const *l, char ***ret) { + assert(ret); + + if (strv_isempty(l)) { + *ret = NULL; + return 0; + } + + char **copy = strv_copy(l); + if (!copy) + return -ENOMEM; + + *ret = TAKE_PTR(copy); + return 1; +} + +size_t strv_length(char * const *l) { + size_t n = 0; + + STRV_FOREACH(i, l) + n++; + + return n; +} + +char** strv_new_ap(const char *x, va_list ap) { + _cleanup_strv_free_ char **a = NULL; + size_t n = 0, i = 0; + va_list aq; + + /* As a special trick we ignore all listed strings that equal + * STRV_IGNORE. This is supposed to be used with the + * STRV_IFNOTNULL() macro to include possibly NULL strings in + * the string list. */ + + va_copy(aq, ap); + for (const char *s = x; s; s = va_arg(aq, const char*)) { + if (s == STRV_IGNORE) + continue; + + n++; + } + va_end(aq); + + a = new(char*, n+1); + if (!a) + return NULL; + + for (const char *s = x; s; s = va_arg(ap, const char*)) { + if (s == STRV_IGNORE) + continue; + + a[i] = strdup(s); + if (!a[i]) + return NULL; + + i++; + } + + a[i] = NULL; + + return TAKE_PTR(a); +} + +char** strv_new_internal(const char *x, ...) { + char **r; + va_list ap; + + va_start(ap, x); + r = strv_new_ap(x, ap); + va_end(ap); + + return r; +} + +int strv_extend_strv(char ***a, char * const *b, bool filter_duplicates) { + size_t p, q, i = 0; + char **t; + + assert(a); + + if (strv_isempty(b)) + return 0; + + p = strv_length(*a); + q = strv_length(b); + + if (p >= SIZE_MAX - q) + return -ENOMEM; + + t = reallocarray(*a, GREEDY_ALLOC_ROUND_UP(p + q + 1), sizeof(char *)); + if (!t) + return -ENOMEM; + + t[p] = NULL; + *a = t; + + STRV_FOREACH(s, b) { + if (filter_duplicates && strv_contains(t, *s)) + continue; + + t[p+i] = strdup(*s); + if (!t[p+i]) + goto rollback; + + i++; + t[p+i] = NULL; + } + + assert(i <= q); + + return (int) i; + +rollback: + for (size_t j = 0; j < i; j++) + free(t[p + j]); + + t[p] = NULL; + return -ENOMEM; +} + +int strv_extend_strv_concat(char ***a, char * const *b, const char *suffix) { + int r; + + STRV_FOREACH(s, b) { + char *v; + + v = strjoin(*s, suffix); + if (!v) + return -ENOMEM; + + r = strv_push(a, v); + if (r < 0) { + free(v); + return r; + } + } + + return 0; +} + +int strv_split_newlines_full(char ***ret, const char *s, ExtractFlags flags) { + _cleanup_strv_free_ char **l = NULL; + size_t n; + int r; + + assert(s); + + /* Special version of strv_split_full() that splits on newlines and + * suppresses an empty string at the end. */ + + r = strv_split_full(&l, s, NEWLINE, flags); + if (r < 0) + return r; + + n = strv_length(l); + if (n > 0 && isempty(l[n - 1])) { + l[n - 1] = mfree(l[n - 1]); + n--; + } + + *ret = TAKE_PTR(l); + return n; +} + +int strv_split_full(char ***t, const char *s, const char *separators, ExtractFlags flags) { + _cleanup_strv_free_ char **l = NULL; + size_t n = 0; + int r; + + assert(t); + assert(s); + + for (;;) { + _cleanup_free_ char *word = NULL; + + r = extract_first_word(&s, &word, separators, flags); + if (r < 0) + return r; + if (r == 0) + break; + + if (!GREEDY_REALLOC(l, n + 2)) + return -ENOMEM; + + l[n++] = TAKE_PTR(word); + l[n] = NULL; + } + + if (!l) { + l = new0(char*, 1); + if (!l) + return -ENOMEM; + } + + *t = TAKE_PTR(l); + + return (int) n; +} + +int strv_split_and_extend_full(char ***t, const char *s, const char *separators, bool filter_duplicates, ExtractFlags flags) { + _cleanup_strv_free_ char **l = NULL; + int r; + + assert(t); + assert(s); + + r = strv_split_full(&l, s, separators, flags); + if (r < 0) + return r; + + r = strv_extend_strv(t, l, filter_duplicates); + if (r < 0) + return r; + + return (int) strv_length(*t); +} + +int strv_split_colon_pairs(char ***t, const char *s) { + _cleanup_strv_free_ char **l = NULL; + size_t n = 0; + int r; + + assert(t); + assert(s); + + for (;;) { + _cleanup_free_ char *first = NULL, *second = NULL, *tuple = NULL, *second_or_empty = NULL; + + r = extract_first_word(&s, &tuple, NULL, EXTRACT_UNQUOTE|EXTRACT_RETAIN_ESCAPE); + if (r < 0) + return r; + if (r == 0) + break; + + const char *p = tuple; + r = extract_many_words(&p, ":", EXTRACT_CUNESCAPE|EXTRACT_UNESCAPE_SEPARATORS, + &first, &second, NULL); + if (r < 0) + return r; + if (r == 0) + continue; + /* Enforce that at most 2 colon-separated words are contained in each group */ + if (!isempty(p)) + return -EINVAL; + + second_or_empty = strdup(strempty(second)); + if (!second_or_empty) + return -ENOMEM; + + if (!GREEDY_REALLOC(l, n + 3)) + return -ENOMEM; + + l[n++] = TAKE_PTR(first); + l[n++] = TAKE_PTR(second_or_empty); + + l[n] = NULL; + } + + if (!l) { + l = new0(char*, 1); + if (!l) + return -ENOMEM; + } + + *t = TAKE_PTR(l); + + return (int) n; +} + +char* strv_join_full(char * const *l, const char *separator, const char *prefix, bool escape_separator) { + char *r, *e; + size_t n, k, m; + + if (!separator) + separator = " "; + + k = strlen(separator); + m = strlen_ptr(prefix); + + if (escape_separator) /* If the separator was multi-char, we wouldn't know how to escape it. */ + assert(k == 1); + + n = 0; + STRV_FOREACH(s, l) { + if (s != l) + n += k; + + bool needs_escaping = escape_separator && strchr(*s, *separator); + + n += m + strlen(*s) * (1 + needs_escaping); + } + + r = new(char, n+1); + if (!r) + return NULL; + + e = r; + STRV_FOREACH(s, l) { + if (s != l) + e = stpcpy(e, separator); + + if (prefix) + e = stpcpy(e, prefix); + + bool needs_escaping = escape_separator && strchr(*s, *separator); + + if (needs_escaping) + for (size_t i = 0; (*s)[i]; i++) { + if ((*s)[i] == *separator) + *(e++) = '\\'; + *(e++) = (*s)[i]; + } + else + e = stpcpy(e, *s); + } + + *e = 0; + + return r; +} + +int strv_push_with_size(char ***l, size_t *n, char *value) { + /* n is a pointer to a variable to store the size of l. + * If not given (i.e. n is NULL or *n is SIZE_MAX), size will be calculated using strv_length(). + * If n is not NULL, the size after the push will be returned. + * If value is empty, no action is taken and *n is not set. */ + + if (!value) + return 0; + + size_t size = n ? *n : SIZE_MAX; + if (size == SIZE_MAX) + size = strv_length(*l); + + /* Check for overflow */ + if (size > SIZE_MAX-2) + return -ENOMEM; + + char **c = reallocarray(*l, GREEDY_ALLOC_ROUND_UP(size + 2), sizeof(char*)); + if (!c) + return -ENOMEM; + + c[size] = value; + c[size+1] = NULL; + + *l = c; + if (n) + *n = size + 1; + return 0; +} + +int strv_push_pair(char ***l, char *a, char *b) { + char **c; + size_t n; + + if (!a && !b) + return 0; + + n = strv_length(*l); + + /* Check for overflow */ + if (n > SIZE_MAX-3) + return -ENOMEM; + + /* increase and check for overflow */ + c = reallocarray(*l, GREEDY_ALLOC_ROUND_UP(n + !!a + !!b + 1), sizeof(char*)); + if (!c) + return -ENOMEM; + + if (a) + c[n++] = a; + if (b) + c[n++] = b; + c[n] = NULL; + + *l = c; + return 0; +} + +int strv_insert(char ***l, size_t position, char *value) { + char **c; + size_t n, m; + + if (!value) + return 0; + + n = strv_length(*l); + position = MIN(position, n); + + /* increase and check for overflow */ + m = n + 2; + if (m < n) + return -ENOMEM; + + c = new(char*, m); + if (!c) + return -ENOMEM; + + for (size_t i = 0; i < position; i++) + c[i] = (*l)[i]; + c[position] = value; + for (size_t i = position; i < n; i++) + c[i+1] = (*l)[i]; + c[n+1] = NULL; + + return free_and_replace(*l, c); +} + +int strv_consume_with_size(char ***l, size_t *n, char *value) { + int r; + + r = strv_push_with_size(l, n, value); + if (r < 0) + free(value); + + return r; +} + +int strv_consume_pair(char ***l, char *a, char *b) { + int r; + + r = strv_push_pair(l, a, b); + if (r < 0) { + free(a); + free(b); + } + + return r; +} + +int strv_consume_prepend(char ***l, char *value) { + int r; + + r = strv_push_prepend(l, value); + if (r < 0) + free(value); + + return r; +} + +int strv_prepend(char ***l, const char *value) { + char *v; + + if (!value) + return 0; + + v = strdup(value); + if (!v) + return -ENOMEM; + + return strv_consume_prepend(l, v); +} + +int strv_extend_with_size(char ***l, size_t *n, const char *value) { + char *v; + + if (!value) + return 0; + + v = strdup(value); + if (!v) + return -ENOMEM; + + return strv_consume_with_size(l, n, v); +} + +int strv_extend_front(char ***l, const char *value) { + size_t n, m; + char *v, **c; + + assert(l); + + /* Like strv_extend(), but prepends rather than appends the new entry */ + + if (!value) + return 0; + + n = strv_length(*l); + + /* Increase and overflow check. */ + m = n + 2; + if (m < n) + return -ENOMEM; + + v = strdup(value); + if (!v) + return -ENOMEM; + + c = reallocarray(*l, m, sizeof(char*)); + if (!c) { + free(v); + return -ENOMEM; + } + + memmove(c+1, c, n * sizeof(char*)); + c[0] = v; + c[n+1] = NULL; + + *l = c; + return 0; +} + +char** strv_uniq(char **l) { + /* Drops duplicate entries. The first identical string will be + * kept, the others dropped */ + + STRV_FOREACH(i, l) + strv_remove(i+1, *i); + + return l; +} + +bool strv_is_uniq(char * const *l) { + STRV_FOREACH(i, l) + if (strv_contains(i+1, *i)) + return false; + + return true; +} + +char** strv_remove(char **l, const char *s) { + char **f, **t; + + if (!l) + return NULL; + + assert(s); + + /* Drops every occurrence of s in the string list, edits + * in-place. */ + + for (f = t = l; *f; f++) + if (streq(*f, s)) + free(*f); + else + *(t++) = *f; + + *t = NULL; + return l; +} + +char** strv_parse_nulstr(const char *s, size_t l) { + /* l is the length of the input data, which will be split at NULs into + * elements of the resulting strv. Hence, the number of items in the resulting strv + * will be equal to one plus the number of NUL bytes in the l bytes starting at s, + * unless s[l-1] is NUL, in which case the final empty string is not stored in + * the resulting strv, and length is equal to the number of NUL bytes. + * + * Note that contrary to a normal nulstr which cannot contain empty strings, because + * the input data is terminated by any two consequent NUL bytes, this parser accepts + * empty strings in s. + */ + + size_t c = 0, i = 0; + char **v; + + assert(s || l <= 0); + + if (l <= 0) + return new0(char*, 1); + + for (const char *p = s; p < s + l; p++) + if (*p == 0) + c++; + + if (s[l-1] != 0) + c++; + + v = new0(char*, c+1); + if (!v) + return NULL; + + for (const char *p = s; p < s + l; ) { + const char *e; + + e = memchr(p, 0, s + l - p); + + v[i] = strndup(p, e ? e - p : s + l - p); + if (!v[i]) { + strv_free(v); + return NULL; + } + + i++; + + if (!e) + break; + + p = e + 1; + } + + assert(i == c); + + return v; +} + +char** strv_split_nulstr(const char *s) { + const char *i; + char **r = NULL; + + NULSTR_FOREACH(i, s) + if (strv_extend(&r, i) < 0) { + strv_free(r); + return NULL; + } + + if (!r) + return strv_new(NULL); + + return r; +} + +int strv_make_nulstr(char * const *l, char **ret, size_t *ret_size) { + /* A valid nulstr with two NULs at the end will be created, but + * q will be the length without the two trailing NULs. Thus the output + * string is a valid nulstr and can be iterated over using NULSTR_FOREACH, + * and can also be parsed by strv_parse_nulstr as long as the length + * is provided separately. + */ + + _cleanup_free_ char *m = NULL; + size_t n = 0; + + assert(ret); + assert(ret_size); + + STRV_FOREACH(i, l) { + size_t z; + + z = strlen(*i); + + if (!GREEDY_REALLOC(m, n + z + 2)) + return -ENOMEM; + + memcpy(m + n, *i, z + 1); + n += z + 1; + } + + if (!m) { + m = new0(char, 2); + if (!m) + return -ENOMEM; + n = 1; + } else + /* make sure there is a second extra NUL at the end of resulting nulstr */ + m[n] = '\0'; + + assert(n > 0); + *ret = TAKE_PTR(m); + *ret_size = n - 1; + + return 0; +} + +bool strv_overlap(char * const *a, char * const *b) { + STRV_FOREACH(i, a) + if (strv_contains(b, *i)) + return true; + + return false; +} + +static int str_compare(char * const *a, char * const *b) { + return strcmp(*a, *b); +} + +char** strv_sort(char **l) { + typesafe_qsort(l, strv_length(l), str_compare); + return l; +} + +int strv_compare(char * const *a, char * const *b) { + int r; + + if (strv_isempty(a)) { + if (strv_isempty(b)) + return 0; + else + return -1; + } + + if (strv_isempty(b)) + return 1; + + for ( ; *a || *b; ++a, ++b) { + r = strcmp_ptr(*a, *b); + if (r != 0) + return r; + } + + return 0; +} + +void strv_print(char * const *l) { + STRV_FOREACH(s, l) + puts(*s); +} + +int strv_extendf(char ***l, const char *format, ...) { + va_list ap; + char *x; + int r; + + va_start(ap, format); + r = vasprintf(&x, format, ap); + va_end(ap); + + if (r < 0) + return -ENOMEM; + + return strv_consume(l, x); +} + +char** strv_reverse(char **l) { + size_t n; + + n = strv_length(l); + if (n <= 1) + return l; + + for (size_t i = 0; i < n / 2; i++) + SWAP_TWO(l[i], l[n-1-i]); + + return l; +} + +char** strv_shell_escape(char **l, const char *bad) { + /* Escapes every character in every string in l that is in bad, + * edits in-place, does not roll-back on error. */ + + STRV_FOREACH(s, l) { + char *v; + + v = shell_escape(*s, bad); + if (!v) + return NULL; + + free(*s); + *s = v; + } + + return l; +} + +bool strv_fnmatch_full( + char* const* patterns, + const char *s, + int flags, + size_t *ret_matched_pos) { + + assert(s); + + if (patterns) + for (size_t i = 0; patterns[i]; i++) + /* NB: We treat all fnmatch() errors as equivalent to FNM_NOMATCH, i.e. if fnmatch() fails to + * process the pattern for some reason we'll consider this equivalent to non-matching. */ + if (fnmatch(patterns[i], s, flags) == 0) { + if (ret_matched_pos) + *ret_matched_pos = i; + return true; + } + + if (ret_matched_pos) + *ret_matched_pos = SIZE_MAX; + + return false; +} + +char** strv_skip(char **l, size_t n) { + + while (n > 0) { + if (strv_isempty(l)) + return l; + + l++, n--; + } + + return l; +} + +int strv_extend_n(char ***l, const char *value, size_t n) { + size_t i, k; + char **nl; + + assert(l); + + if (!value) + return 0; + if (n == 0) + return 0; + + /* Adds the value n times to l */ + + k = strv_length(*l); + if (n >= SIZE_MAX - k) + return -ENOMEM; + + nl = reallocarray(*l, GREEDY_ALLOC_ROUND_UP(k + n + 1), sizeof(char *)); + if (!nl) + return -ENOMEM; + + *l = nl; + + for (i = k; i < k + n; i++) { + nl[i] = strdup(value); + if (!nl[i]) + goto rollback; + } + nl[i] = NULL; + + return 0; + +rollback: + for (size_t j = k; j < i; j++) + free(nl[j]); + nl[k] = NULL; + + return -ENOMEM; +} + +int fputstrv(FILE *f, char * const *l, const char *separator, bool *space) { + bool b = false; + int r; + + /* Like fputs(), but for strv, and with a less stupid argument order */ + + if (!space) + space = &b; + + STRV_FOREACH(s, l) { + r = fputs_with_space(f, *s, separator, space); + if (r < 0) + return r; + } + + return 0; +} + +static int string_strv_hashmap_put_internal(Hashmap *h, const char *key, const char *value) { + char **l; + int r; + + l = hashmap_get(h, key); + if (l) { + /* A list for this key already exists, let's append to it if it is not listed yet */ + if (strv_contains(l, value)) + return 0; + + r = strv_extend(&l, value); + if (r < 0) + return r; + + assert_se(hashmap_update(h, key, l) >= 0); + } else { + /* No list for this key exists yet, create one */ + _cleanup_strv_free_ char **l2 = NULL; + _cleanup_free_ char *t = NULL; + + t = strdup(key); + if (!t) + return -ENOMEM; + + r = strv_extend(&l2, value); + if (r < 0) + return r; + + r = hashmap_put(h, t, l2); + if (r < 0) + return r; + TAKE_PTR(t); + TAKE_PTR(l2); + } + + return 1; +} + +int _string_strv_hashmap_put(Hashmap **h, const char *key, const char *value HASHMAP_DEBUG_PARAMS) { + int r; + + r = _hashmap_ensure_allocated(h, &string_strv_hash_ops HASHMAP_DEBUG_PASS_ARGS); + if (r < 0) + return r; + + return string_strv_hashmap_put_internal(*h, key, value); +} + +int _string_strv_ordered_hashmap_put(OrderedHashmap **h, const char *key, const char *value HASHMAP_DEBUG_PARAMS) { + int r; + + r = _ordered_hashmap_ensure_allocated(h, &string_strv_hash_ops HASHMAP_DEBUG_PASS_ARGS); + if (r < 0) + return r; + + return string_strv_hashmap_put_internal(PLAIN_HASHMAP(*h), key, value); +} + +DEFINE_HASH_OPS_FULL(string_strv_hash_ops, char, string_hash_func, string_compare_func, free, char*, strv_free); diff --git a/src/basic/strv.h b/src/basic/strv.h new file mode 100644 index 0000000..bda8cbf --- /dev/null +++ b/src/basic/strv.h @@ -0,0 +1,268 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +#include +#include +#include +#include +#include + +#include "alloc-util.h" +#include "extract-word.h" +#include "hashmap.h" +#include "macro.h" +#include "string-util.h" + +char* strv_find(char * const *l, const char *name) _pure_; +char* strv_find_case(char * const *l, const char *name) _pure_; +char* strv_find_prefix(char * const *l, const char *name) _pure_; +char* strv_find_startswith(char * const *l, const char *name) _pure_; + +#define strv_contains(l, s) (!!strv_find((l), (s))) +#define strv_contains_case(l, s) (!!strv_find_case((l), (s))) + +char** strv_free(char **l); +DEFINE_TRIVIAL_CLEANUP_FUNC(char**, strv_free); +#define _cleanup_strv_free_ _cleanup_(strv_freep) + +char** strv_free_erase(char **l); +DEFINE_TRIVIAL_CLEANUP_FUNC(char**, strv_free_erase); +#define _cleanup_strv_free_erase_ _cleanup_(strv_free_erasep) + +char** strv_copy(char * const *l); +int strv_copy_unless_empty(char * const *l, char ***ret); +size_t strv_length(char * const *l) _pure_; + +int strv_extend_strv(char ***a, char * const *b, bool filter_duplicates); +int strv_extend_strv_concat(char ***a, char * const *b, const char *suffix); +int strv_prepend(char ***l, const char *value); + +/* _with_size() are lower-level functions where the size can be provided externally, + * which allows us to skip iterating over the strv to find the end, which saves + * a bit of time and reduces the complexity of appending from O(n²) to O(n). */ + +int strv_extend_with_size(char ***l, size_t *n, const char *value); +static inline int strv_extend(char ***l, const char *value) { + return strv_extend_with_size(l, NULL, value); +} + +int strv_extendf(char ***l, const char *format, ...) _printf_(2,3); +int strv_extend_front(char ***l, const char *value); + +int strv_push_with_size(char ***l, size_t *n, char *value); +static inline int strv_push(char ***l, char *value) { + return strv_push_with_size(l, NULL, value); +} +int strv_push_pair(char ***l, char *a, char *b); + +int strv_insert(char ***l, size_t position, char *value); + +static inline int strv_push_prepend(char ***l, char *value) { + return strv_insert(l, 0, value); +} + +int strv_consume_with_size(char ***l, size_t *n, char *value); +static inline int strv_consume(char ***l, char *value) { + return strv_consume_with_size(l, NULL, value); +} + +int strv_consume_pair(char ***l, char *a, char *b); +int strv_consume_prepend(char ***l, char *value); + +char** strv_remove(char **l, const char *s); +char** strv_uniq(char **l); +bool strv_is_uniq(char * const *l); + +int strv_compare(char * const *a, char * const *b); +static inline bool strv_equal(char * const *a, char * const *b) { + return strv_compare(a, b) == 0; +} + +char** strv_new_internal(const char *x, ...) _sentinel_; +char** strv_new_ap(const char *x, va_list ap); +#define strv_new(...) strv_new_internal(__VA_ARGS__, NULL) + +#define STRV_IGNORE ((const char *) POINTER_MAX) + +static inline const char* STRV_IFNOTNULL(const char *x) { + return x ? x : STRV_IGNORE; +} + +static inline bool strv_isempty(char * const *l) { + return !l || !*l; +} + +int strv_split_full(char ***t, const char *s, const char *separators, ExtractFlags flags); +static inline char** strv_split(const char *s, const char *separators) { + char **ret; + + if (strv_split_full(&ret, s, separators, EXTRACT_RETAIN_ESCAPE) < 0) + return NULL; + + return ret; +} + +int strv_split_and_extend_full(char ***t, const char *s, const char *separators, bool filter_duplicates, ExtractFlags flags); +#define strv_split_and_extend(t, s, sep, dup) strv_split_and_extend_full(t, s, sep, dup, 0) + +int strv_split_newlines_full(char ***ret, const char *s, ExtractFlags flags); +static inline char** strv_split_newlines(const char *s) { + char **ret; + + if (strv_split_newlines_full(&ret, s, 0) < 0) + return NULL; + + return ret; +} + +/* Given a string containing white-space separated tuples of words themselves separated by ':', + * returns a vector of strings. If the second element in a tuple is missing, the corresponding + * string in the vector is an empty string. */ +int strv_split_colon_pairs(char ***t, const char *s); + +char* strv_join_full(char * const *l, const char *separator, const char *prefix, bool escape_separator); +static inline char *strv_join(char * const *l, const char *separator) { + return strv_join_full(l, separator, NULL, false); +} + +char** strv_parse_nulstr(const char *s, size_t l); +char** strv_split_nulstr(const char *s); +int strv_make_nulstr(char * const *l, char **p, size_t *n); + +static inline int strv_from_nulstr(char ***a, const char *nulstr) { + char **t; + + t = strv_split_nulstr(nulstr); + if (!t) + return -ENOMEM; + *a = t; + return 0; +} + +bool strv_overlap(char * const *a, char * const *b) _pure_; + +#define _STRV_FOREACH_BACKWARDS(s, l, h, i) \ + for (typeof(*(l)) *s, *h = (l), *i = ({ \ + size_t _len = strv_length(h); \ + _len > 0 ? h + _len - 1 : NULL; \ + }); \ + (s = i); \ + i = PTR_SUB1(i, h)) + +#define STRV_FOREACH_BACKWARDS(s, l) \ + _STRV_FOREACH_BACKWARDS(s, l, UNIQ_T(h, UNIQ), UNIQ_T(i, UNIQ)) + +#define _STRV_FOREACH_PAIR(x, y, l, i) \ + for (typeof(*l) *x, *y, *i = (l); \ + i && *(x = i) && *(y = i + 1); \ + i += 2) + +#define STRV_FOREACH_PAIR(x, y, l) \ + _STRV_FOREACH_PAIR(x, y, l, UNIQ_T(i, UNIQ)) + +char** strv_sort(char **l); +void strv_print(char * const *l); + +#define strv_from_stdarg_alloca(first) \ + ({ \ + char **_l; \ + \ + if (!first) \ + _l = (char**) &first; \ + else { \ + size_t _n; \ + va_list _ap; \ + \ + _n = 1; \ + va_start(_ap, first); \ + while (va_arg(_ap, char*)) \ + _n++; \ + va_end(_ap); \ + \ + _l = newa(char*, _n+1); \ + _l[_n = 0] = (char*) first; \ + va_start(_ap, first); \ + for (;;) { \ + _l[++_n] = va_arg(_ap, char*); \ + if (!_l[_n]) \ + break; \ + } \ + va_end(_ap); \ + } \ + _l; \ + }) + +#define STR_IN_SET(x, ...) strv_contains(STRV_MAKE(__VA_ARGS__), x) +#define STRPTR_IN_SET(x, ...) \ + ({ \ + const char* _x = (x); \ + _x && strv_contains(STRV_MAKE(__VA_ARGS__), _x); \ + }) + +#define STRCASE_IN_SET(x, ...) strv_contains_case(STRV_MAKE(__VA_ARGS__), x) +#define STRCASEPTR_IN_SET(x, ...) \ + ({ \ + const char* _x = (x); \ + _x && strv_contains_case(STRV_MAKE(__VA_ARGS__), _x); \ + }) + +#define STARTSWITH_SET(p, ...) \ + ({ \ + const char *_p = (p); \ + char *_found = NULL; \ + STRV_FOREACH(_i, STRV_MAKE(__VA_ARGS__)) { \ + _found = startswith(_p, *_i); \ + if (_found) \ + break; \ + } \ + _found; \ + }) + +#define ENDSWITH_SET(p, ...) \ + ({ \ + const char *_p = (p); \ + char *_found = NULL; \ + STRV_FOREACH(_i, STRV_MAKE(__VA_ARGS__)) { \ + _found = endswith(_p, *_i); \ + if (_found) \ + break; \ + } \ + _found; \ + }) + +#define _FOREACH_STRING(uniq, x, y, ...) \ + for (const char *x, * const*UNIQ_T(l, uniq) = STRV_MAKE_CONST(({ x = y; }), ##__VA_ARGS__); \ + x; \ + x = *(++UNIQ_T(l, uniq))) + +#define FOREACH_STRING(x, y, ...) \ + _FOREACH_STRING(UNIQ, x, y, ##__VA_ARGS__) + +char** strv_reverse(char **l); +char** strv_shell_escape(char **l, const char *bad); + +bool strv_fnmatch_full(char* const* patterns, const char *s, int flags, size_t *ret_matched_pos); +static inline bool strv_fnmatch(char* const* patterns, const char *s) { + return strv_fnmatch_full(patterns, s, 0, NULL); +} + +static inline bool strv_fnmatch_or_empty(char* const* patterns, const char *s, int flags) { + assert(s); + return strv_isempty(patterns) || + strv_fnmatch_full(patterns, s, flags, NULL); +} + +char** strv_skip(char **l, size_t n); + +int strv_extend_n(char ***l, const char *value, size_t n); + +int fputstrv(FILE *f, char * const *l, const char *separator, bool *space); + +#define strv_free_and_replace(a, b) \ + free_and_replace_full(a, b, strv_free) + +extern const struct hash_ops string_strv_hash_ops; +int _string_strv_hashmap_put(Hashmap **h, const char *key, const char *value HASHMAP_DEBUG_PARAMS); +int _string_strv_ordered_hashmap_put(OrderedHashmap **h, const char *key, const char *value HASHMAP_DEBUG_PARAMS); +#define string_strv_hashmap_put(h, k, v) _string_strv_hashmap_put(h, k, v HASHMAP_DEBUG_SRC_ARGS) +#define string_strv_ordered_hashmap_put(h, k, v) _string_strv_ordered_hashmap_put(h, k, v HASHMAP_DEBUG_SRC_ARGS) diff --git a/src/basic/strxcpyx.c b/src/basic/strxcpyx.c new file mode 100644 index 0000000..52b9565 --- /dev/null +++ b/src/basic/strxcpyx.c @@ -0,0 +1,145 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +/* + * Concatenates/copies strings. In any case, terminates in all cases + * with '\0' and moves the @dest pointer forward to the added '\0'. + * Returns the remaining size, and 0 if the string was truncated. + * + * Due to the intended usage, these helpers silently noop invocations + * having zero size. This is technically an exception to the above + * statement "terminates in all cases". It's unexpected for such calls to + * occur outside of a loop where this is the preferred behavior. + */ + +#include +#include +#include + +#include "string-util.h" +#include "strxcpyx.h" + +size_t strnpcpy_full(char **dest, size_t size, const char *src, size_t len, bool *ret_truncated) { + bool truncated = false; + + assert(dest); + assert(src); + + if (size == 0) { + if (ret_truncated) + *ret_truncated = len > 0; + return 0; + } + + if (len >= size) { + if (size > 1) + *dest = mempcpy(*dest, src, size-1); + size = 0; + truncated = true; + } else if (len > 0) { + *dest = mempcpy(*dest, src, len); + size -= len; + } + + if (ret_truncated) + *ret_truncated = truncated; + + *dest[0] = '\0'; + return size; +} + +size_t strpcpy_full(char **dest, size_t size, const char *src, bool *ret_truncated) { + assert(dest); + assert(src); + + return strnpcpy_full(dest, size, src, strlen(src), ret_truncated); +} + +size_t strpcpyf_full(char **dest, size_t size, bool *ret_truncated, const char *src, ...) { + bool truncated = false; + va_list va; + int i; + + assert(dest); + assert(src); + + va_start(va, src); + i = vsnprintf(*dest, size, src, va); + va_end(va); + + if (i < (int) size) { + *dest += i; + size -= i; + } else { + size = 0; + truncated = i > 0; + } + + if (ret_truncated) + *ret_truncated = truncated; + + return size; +} + +size_t strpcpyl_full(char **dest, size_t size, bool *ret_truncated, const char *src, ...) { + bool truncated = false; + va_list va; + + assert(dest); + assert(src); + + va_start(va, src); + do { + bool t; + + size = strpcpy_full(dest, size, src, &t); + truncated = truncated || t; + src = va_arg(va, char *); + } while (src); + va_end(va); + + if (ret_truncated) + *ret_truncated = truncated; + return size; +} + +size_t strnscpy_full(char *dest, size_t size, const char *src, size_t len, bool *ret_truncated) { + char *s; + + assert(dest); + assert(src); + + s = dest; + return strnpcpy_full(&s, size, src, len, ret_truncated); +} + +size_t strscpy_full(char *dest, size_t size, const char *src, bool *ret_truncated) { + assert(dest); + assert(src); + + return strnscpy_full(dest, size, src, strlen(src), ret_truncated); +} + +size_t strscpyl_full(char *dest, size_t size, bool *ret_truncated, const char *src, ...) { + bool truncated = false; + va_list va; + char *s; + + assert(dest); + assert(src); + + va_start(va, src); + s = dest; + do { + bool t; + + size = strpcpy_full(&s, size, src, &t); + truncated = truncated || t; + src = va_arg(va, char *); + } while (src); + va_end(va); + + if (ret_truncated) + *ret_truncated = truncated; + + return size; +} diff --git a/src/basic/strxcpyx.h b/src/basic/strxcpyx.h new file mode 100644 index 0000000..4a648ed --- /dev/null +++ b/src/basic/strxcpyx.h @@ -0,0 +1,33 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +#include +#include + +#include "macro.h" + +size_t strnpcpy_full(char **dest, size_t size, const char *src, size_t len, bool *ret_truncated); +static inline size_t strnpcpy(char **dest, size_t size, const char *src, size_t len) { + return strnpcpy_full(dest, size, src, len, NULL); +} +size_t strpcpy_full(char **dest, size_t size, const char *src, bool *ret_truncated); +static inline size_t strpcpy(char **dest, size_t size, const char *src) { + return strpcpy_full(dest, size, src, NULL); +} +size_t strpcpyf_full(char **dest, size_t size, bool *ret_truncated, const char *src, ...) _printf_(4, 5); +#define strpcpyf(dest, size, src, ...) \ + strpcpyf_full((dest), (size), NULL, (src), ##__VA_ARGS__) +size_t strpcpyl_full(char **dest, size_t size, bool *ret_truncated, const char *src, ...) _sentinel_; +#define strpcpyl(dest, size, src, ...) \ + strpcpyl_full((dest), (size), NULL, (src), ##__VA_ARGS__) +size_t strnscpy_full(char *dest, size_t size, const char *src, size_t len, bool *ret_truncated); +static inline size_t strnscpy(char *dest, size_t size, const char *src, size_t len) { + return strnscpy_full(dest, size, src, len, NULL); +} +size_t strscpy_full(char *dest, size_t size, const char *src, bool *ret_truncated); +static inline size_t strscpy(char *dest, size_t size, const char *src) { + return strscpy_full(dest, size, src, NULL); +} +size_t strscpyl_full(char *dest, size_t size, bool *ret_truncated, const char *src, ...) _sentinel_; +#define strscpyl(dest, size, src, ...) \ + strscpyl_full(dest, size, NULL, src, ##__VA_ARGS__) diff --git a/src/basic/sync-util.c b/src/basic/sync-util.c new file mode 100644 index 0000000..e2d4a3d --- /dev/null +++ b/src/basic/sync-util.c @@ -0,0 +1,163 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include +#include + +#include "fd-util.h" +#include "fs-util.h" +#include "path-util.h" +#include "sync-util.h" + +int fsync_directory_of_file(int fd) { + _cleanup_close_ int dfd = -1; + struct stat st; + int r; + + assert(fd >= 0); + + /* We only reasonably can do this for regular files and directories, or for O_PATH fds, hence check + * for the inode type first */ + if (fstat(fd, &st) < 0) + return -errno; + + if (S_ISDIR(st.st_mode)) { + dfd = openat(fd, "..", O_RDONLY|O_DIRECTORY|O_CLOEXEC, 0); + if (dfd < 0) + return -errno; + + } else if (!S_ISREG(st.st_mode)) { /* Regular files are OK regardless if O_PATH or not, for all other + * types check O_PATH flag */ + int flags; + + flags = fcntl(fd, F_GETFL); + if (flags < 0) + return -errno; + + if (!FLAGS_SET(flags, O_PATH)) /* If O_PATH this refers to the inode in the fs, in which case + * we can sensibly do what is requested. Otherwise this refers + * to a socket, fifo or device node, where the concept of a + * containing directory doesn't make too much sense. */ + return -ENOTTY; + } + + if (dfd < 0) { + _cleanup_free_ char *path = NULL; + + r = fd_get_path(fd, &path); + if (r < 0) { + log_debug_errno(r, "Failed to query /proc/self/fd/%d%s: %m", + fd, + r == -ENOSYS ? ", ignoring" : ""); + + if (r == -ENOSYS) + /* If /proc is not available, we're most likely running in some + * chroot environment, and syncing the directory is not very + * important in that case. Let's just silently do nothing. */ + return 0; + + return r; + } + + if (!path_is_absolute(path)) + return -EINVAL; + + dfd = open_parent(path, O_CLOEXEC|O_NOFOLLOW, 0); + if (dfd < 0) + return dfd; + } + + return RET_NERRNO(fsync(dfd)); +} + +int fsync_full(int fd) { + int r, q; + + /* Sync both the file and the directory */ + + r = RET_NERRNO(fsync(fd)); + + q = fsync_directory_of_file(fd); + if (r < 0) /* Return earlier error */ + return r; + if (q == -ENOTTY) /* Ignore if the 'fd' refers to a block device or so which doesn't really have a + * parent dir */ + return 0; + return q; +} + +int fsync_path_at(int at_fd, const char *path) { + _cleanup_close_ int opened_fd = -1; + int fd; + + if (isempty(path)) { + if (at_fd == AT_FDCWD) { + opened_fd = open(".", O_RDONLY|O_DIRECTORY|O_CLOEXEC); + if (opened_fd < 0) + return -errno; + + fd = opened_fd; + } else + fd = at_fd; + } else { + opened_fd = openat(at_fd, path, O_RDONLY|O_CLOEXEC|O_NONBLOCK); + if (opened_fd < 0) + return -errno; + + fd = opened_fd; + } + + return RET_NERRNO(fsync(fd)); +} + +int fsync_parent_at(int at_fd, const char *path) { + _cleanup_close_ int opened_fd = -1; + + if (isempty(path)) { + if (at_fd != AT_FDCWD) + return fsync_directory_of_file(at_fd); + + opened_fd = open("..", O_RDONLY|O_DIRECTORY|O_CLOEXEC); + if (opened_fd < 0) + return -errno; + + return RET_NERRNO(fsync(opened_fd)); + } + + opened_fd = openat(at_fd, path, O_PATH|O_CLOEXEC|O_NOFOLLOW); + if (opened_fd < 0) + return -errno; + + return fsync_directory_of_file(opened_fd); +} + +int fsync_path_and_parent_at(int at_fd, const char *path) { + _cleanup_close_ int opened_fd = -1; + + if (isempty(path)) { + if (at_fd != AT_FDCWD) + return fsync_full(at_fd); + + opened_fd = open(".", O_RDONLY|O_DIRECTORY|O_CLOEXEC); + } else + opened_fd = openat(at_fd, path, O_RDONLY|O_NOFOLLOW|O_NONBLOCK|O_CLOEXEC); + if (opened_fd < 0) + return -errno; + + return fsync_full(opened_fd); +} + +int syncfs_path(int at_fd, const char *path) { + _cleanup_close_ int fd = -1; + + if (isempty(path)) { + if (at_fd != AT_FDCWD) + return RET_NERRNO(syncfs(at_fd)); + + fd = open(".", O_RDONLY|O_DIRECTORY|O_CLOEXEC); + } else + fd = openat(at_fd, path, O_RDONLY|O_CLOEXEC|O_NONBLOCK); + if (fd < 0) + return -errno; + + return RET_NERRNO(syncfs(fd)); +} diff --git a/src/basic/sync-util.h b/src/basic/sync-util.h new file mode 100644 index 0000000..e449440 --- /dev/null +++ b/src/basic/sync-util.h @@ -0,0 +1,11 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +int fsync_directory_of_file(int fd); +int fsync_full(int fd); + +int fsync_path_at(int at_fd, const char *path); +int fsync_parent_at(int at_fd, const char *path); +int fsync_path_and_parent_at(int at_fd, const char *path); + +int syncfs_path(int at_fd, const char *path); diff --git a/src/basic/syscall-list.txt b/src/basic/syscall-list.txt new file mode 100644 index 0000000..c74917b --- /dev/null +++ b/src/basic/syscall-list.txt @@ -0,0 +1,514 @@ +_llseek +_newselect +accept +accept4 +access +acct +add_key +adjtimex +alarm +arc_gettls +arc_settls +arc_usr_cmpxchg +arch_prctl +arm_fadvise64_64 +atomic_barrier +atomic_cmpxchg_32 +bind +bpf +brk +cachectl +cacheflush +capget +capset +chdir +chmod +chown +chown32 +chroot +clock_adjtime +clock_adjtime64 +clock_getres +clock_getres_time64 +clock_gettime +clock_gettime64 +clock_nanosleep +clock_nanosleep_time64 +clock_settime +clock_settime64 +clone +clone3 +close +close_range +connect +copy_file_range +creat +delete_module +dipc +dup +dup2 +dup3 +epoll_create +epoll_create1 +epoll_ctl +epoll_ctl_old +epoll_pwait +epoll_pwait2 +epoll_wait +epoll_wait_old +eventfd +eventfd2 +exec_with_loader +execv +execve +execveat +exit +exit_group +faccessat +faccessat2 +fadvise64 +fadvise64_64 +fallocate +fanotify_init +fanotify_mark +fchdir +fchmod +fchmodat +fchmodat2 +fchown +fchown32 +fchownat +fcntl +fcntl64 +fdatasync +fgetxattr +finit_module +flistxattr +flock +fork +fremovexattr +fsconfig +fsetxattr +fsmount +fsopen +fspick +fstat +fstat64 +fstatat64 +fstatfs +fstatfs64 +fsync +ftruncate +ftruncate64 +futex +futex_requeue +futex_time64 +futex_wait +futex_waitv +futex_wake +futimesat +get_mempolicy +get_robust_list +get_thread_area +getcpu +getcwd +getdents +getdents64 +getdomainname +getdtablesize +getegid +getegid32 +geteuid +geteuid32 +getgid +getgid32 +getgroups +getgroups32 +gethostname +getitimer +getpagesize +getpeername +getpgid +getpgrp +getpid +getppid +getpriority +getrandom +getresgid +getresgid32 +getresuid +getresuid32 +getrlimit +getrusage +getsid +getsockname +getsockopt +gettid +gettimeofday +getuid +getuid32 +getxattr +getxgid +getxpid +getxuid +init_module +inotify_add_watch +inotify_init +inotify_init1 +inotify_rm_watch +io_cancel +io_destroy +io_getevents +io_pgetevents +io_pgetevents_time64 +io_setup +io_submit +io_uring_enter +io_uring_register +io_uring_setup +ioctl +ioperm +iopl +ioprio_get +ioprio_set +ipc +kcmp +kern_features +kexec_file_load +kexec_load +keyctl +kill +landlock_add_rule +landlock_create_ruleset +landlock_restrict_self +lchown +lchown32 +lgetxattr +link +linkat +listen +listxattr +llistxattr +lookup_dcookie +lremovexattr +lseek +lsetxattr +lstat +lstat64 +madvise +map_shadow_stack +mbind +membarrier +memfd_create +memfd_secret +memory_ordering +migrate_pages +mincore +mkdir +mkdirat +mknod +mknodat +mlock +mlock2 +mlockall +mmap +mmap2 +modify_ldt +mount +mount_setattr +move_mount +move_pages +mprotect +mq_getsetattr +mq_notify +mq_open +mq_timedreceive +mq_timedreceive_time64 +mq_timedsend +mq_timedsend_time64 +mq_unlink +mremap +msgctl +msgget +msgrcv +msgsnd +msync +multiplexer +munlock +munlockall +munmap +name_to_handle_at +nanosleep +newfstatat +nice +old_adjtimex +oldfstat +oldlstat +oldolduname +oldstat +oldumount +olduname +open +open_by_handle_at +open_tree +openat +openat2 +or1k_atomic +osf_fstat +osf_fstatfs +osf_fstatfs64 +osf_getdirentries +osf_getdomainname +osf_getitimer +osf_getrusage +osf_getsysinfo +osf_gettimeofday +osf_lstat +osf_mount +osf_proplist_syscall +osf_select +osf_set_program_attributes +osf_setitimer +osf_setsysinfo +osf_settimeofday +osf_shmat +osf_sigprocmask +osf_sigstack +osf_stat +osf_statfs +osf_statfs64 +osf_swapon +osf_syscall +osf_sysinfo +osf_usleep_thread +osf_utimes +osf_utsname +osf_wait4 +pause +pciconfig_iobase +pciconfig_read +pciconfig_write +perf_event_open +perfctr +personality +pidfd_getfd +pidfd_open +pidfd_send_signal +pipe +pipe2 +pivot_root +pkey_alloc +pkey_free +pkey_mprotect +poll +ppoll +ppoll_time64 +prctl +pread64 +preadv +preadv2 +prlimit64 +process_madvise +process_mrelease +process_vm_readv +process_vm_writev +pselect6 +pselect6_time64 +ptrace +pwrite64 +pwritev +pwritev2 +quotactl +quotactl_fd +read +readahead +readdir +readlink +readlinkat +readv +reboot +recv +recvfrom +recvmmsg +recvmmsg_time64 +recvmsg +remap_file_pages +removexattr +rename +renameat +renameat2 +request_key +restart_syscall +riscv_flush_icache +riscv_hwprobe +rmdir +rseq +rt_sigaction +rt_sigpending +rt_sigprocmask +rt_sigqueueinfo +rt_sigreturn +rt_sigsuspend +rt_sigtimedwait +rt_sigtimedwait_time64 +rt_tgsigqueueinfo +rtas +s390_guarded_storage +s390_pci_mmio_read +s390_pci_mmio_write +s390_runtime_instr +s390_sthyi +sched_get_affinity +sched_get_priority_max +sched_get_priority_min +sched_getaffinity +sched_getattr +sched_getparam +sched_getscheduler +sched_rr_get_interval +sched_rr_get_interval_time64 +sched_set_affinity +sched_setaffinity +sched_setattr +sched_setparam +sched_setscheduler +sched_yield +seccomp +select +semctl +semget +semop +semtimedop +semtimedop_time64 +send +sendfile +sendfile64 +sendmmsg +sendmsg +sendto +set_mempolicy +set_mempolicy_home_node +set_robust_list +set_thread_area +set_tid_address +setdomainname +setfsgid +setfsgid32 +setfsuid +setfsuid32 +setgid +setgid32 +setgroups +setgroups32 +sethae +sethostname +setitimer +setns +setpgid +setpgrp +setpriority +setregid +setregid32 +setresgid +setresgid32 +setresuid +setresuid32 +setreuid +setreuid32 +setrlimit +setsid +setsockopt +settimeofday +setuid +setuid32 +setxattr +sgetmask +shmat +shmctl +shmdt +shmget +shutdown +sigaction +sigaltstack +signal +signalfd +signalfd4 +sigpending +sigprocmask +sigreturn +sigsuspend +socket +socketcall +socketpair +splice +spu_create +spu_run +ssetmask +stat +stat64 +statfs +statfs64 +statx +stime +subpage_prot +swapcontext +swapoff +swapon +switch_endian +symlink +symlinkat +sync +sync_file_range +sync_file_range2 +syncfs +sys_debug_setcontext +syscall +sysfs +sysinfo +syslog +sysmips +tee +tgkill +time +timer_create +timer_delete +timer_getoverrun +timer_gettime +timer_gettime64 +timer_settime +timer_settime64 +timerfd +timerfd_create +timerfd_gettime +timerfd_gettime64 +timerfd_settime +timerfd_settime64 +times +tkill +truncate +truncate64 +ugetrlimit +umask +umount +umount2 +uname +unlink +unlinkat +unshare +userfaultfd +ustat +utime +utimensat +utimensat_time64 +utimes +utrap_install +vfork +vhangup +vm86 +vm86old +vmsplice +wait4 +waitid +waitpid +write +writev diff --git a/src/basic/syscalls-alpha.txt b/src/basic/syscalls-alpha.txt new file mode 100644 index 0000000..a6525cd --- /dev/null +++ b/src/basic/syscalls-alpha.txt @@ -0,0 +1,514 @@ +_llseek +_newselect +accept 99 +accept4 502 +access 33 +acct 51 +add_key 439 +adjtimex 366 +alarm +arc_gettls +arc_settls +arc_usr_cmpxchg +arch_prctl +arm_fadvise64_64 +atomic_barrier +atomic_cmpxchg_32 +bind 104 +bpf 515 +brk 17 +cachectl +cacheflush +capget 368 +capset 369 +chdir 12 +chmod 15 +chown 16 +chown32 +chroot 61 +clock_adjtime 499 +clock_adjtime64 +clock_getres 421 +clock_getres_time64 +clock_gettime 420 +clock_gettime64 +clock_nanosleep 422 +clock_nanosleep_time64 +clock_settime 419 +clock_settime64 +clone 312 +clone3 +close 6 +close_range 546 +connect 98 +copy_file_range 519 +creat +delete_module 308 +dipc 373 +dup 41 +dup2 90 +dup3 487 +epoll_create 407 +epoll_create1 486 +epoll_ctl 408 +epoll_ctl_old +epoll_pwait 474 +epoll_pwait2 551 +epoll_wait 409 +epoll_wait_old +eventfd 478 +eventfd2 485 +exec_with_loader 25 +execv +execve 59 +execveat 513 +exit 1 +exit_group 405 +faccessat 462 +faccessat2 549 +fadvise64 413 +fadvise64_64 +fallocate 480 +fanotify_init 494 +fanotify_mark 495 +fchdir 13 +fchmod 124 +fchmodat 461 +fchmodat2 562 +fchown 123 +fchown32 +fchownat 453 +fcntl 92 +fcntl64 +fdatasync 447 +fgetxattr 387 +finit_module 507 +flistxattr 390 +flock 131 +fork 2 +fremovexattr 393 +fsconfig 541 +fsetxattr 384 +fsmount 542 +fsopen 540 +fspick 543 +fstat 91 +fstat64 427 +fstatat64 455 +fstatfs 329 +fstatfs64 529 +fsync 95 +ftruncate 130 +ftruncate64 +futex 394 +futex_requeue 566 +futex_time64 +futex_wait 565 +futex_waitv 559 +futex_wake 564 +futimesat 454 +get_mempolicy 430 +get_robust_list 467 +get_thread_area +getcpu 473 +getcwd 367 +getdents 305 +getdents64 377 +getdomainname +getdtablesize 89 +getegid 530 +getegid32 +geteuid 531 +geteuid32 +getgid 47 +getgid32 +getgroups 79 +getgroups32 +gethostname 87 +getitimer 361 +getpagesize 64 +getpeername 141 +getpgid 233 +getpgrp 63 +getpid 20 +getppid 532 +getpriority 100 +getrandom 511 +getresgid 372 +getresgid32 +getresuid 344 +getresuid32 +getrlimit 144 +getrusage 364 +getsid 234 +getsockname 150 +getsockopt 118 +gettid 378 +gettimeofday 359 +getuid 24 +getuid32 +getxattr 385 +getxgid 47 +getxpid 20 +getxuid 24 +init_module 307 +inotify_add_watch 445 +inotify_init 444 +inotify_init1 489 +inotify_rm_watch 446 +io_cancel 402 +io_destroy 399 +io_getevents 400 +io_pgetevents 523 +io_pgetevents_time64 +io_setup 398 +io_submit 401 +io_uring_enter 536 +io_uring_register 537 +io_uring_setup 535 +ioctl 54 +ioperm +iopl +ioprio_get 443 +ioprio_set 442 +ipc +kcmp 506 +kern_features +kexec_file_load +kexec_load 448 +keyctl 441 +kill 37 +landlock_add_rule 555 +landlock_create_ruleset 554 +landlock_restrict_self 556 +lchown 208 +lchown32 +lgetxattr 386 +link 9 +linkat 458 +listen 106 +listxattr 388 +llistxattr 389 +lookup_dcookie 406 +lremovexattr 392 +lseek 19 +lsetxattr 383 +lstat 68 +lstat64 426 +madvise 75 +map_shadow_stack 563 +mbind 429 +membarrier 517 +memfd_create 512 +memfd_secret +memory_ordering +migrate_pages 449 +mincore 375 +mkdir 136 +mkdirat 451 +mknod 14 +mknodat 452 +mlock 314 +mlock2 518 +mlockall 316 +mmap 71 +mmap2 +modify_ldt +mount 302 +mount_setattr 552 +move_mount 539 +move_pages 472 +mprotect 74 +mq_getsetattr 437 +mq_notify 436 +mq_open 432 +mq_timedreceive 435 +mq_timedreceive_time64 +mq_timedsend 434 +mq_timedsend_time64 +mq_unlink 433 +mremap 341 +msgctl 200 +msgget 201 +msgrcv 202 +msgsnd 203 +msync 217 +multiplexer +munlock 315 +munlockall 317 +munmap 73 +name_to_handle_at 497 +nanosleep 340 +newfstatat +nice +old_adjtimex 303 +oldfstat +oldlstat +oldolduname +oldstat +oldumount 321 +olduname +open 45 +open_by_handle_at 498 +open_tree 538 +openat 450 +openat2 547 +or1k_atomic +osf_fstat 226 +osf_fstatfs 161 +osf_fstatfs64 228 +osf_getdirentries 159 +osf_getdomainname 165 +osf_getitimer 86 +osf_getrusage 117 +osf_getsysinfo 256 +osf_gettimeofday 116 +osf_lstat 225 +osf_mount 21 +osf_proplist_syscall 244 +osf_select 93 +osf_set_program_attributes 43 +osf_setitimer 83 +osf_setsysinfo 257 +osf_settimeofday 122 +osf_shmat 209 +osf_sigprocmask 48 +osf_sigstack 112 +osf_stat 224 +osf_statfs 160 +osf_statfs64 227 +osf_swapon 199 +osf_syscall 0 +osf_sysinfo 241 +osf_usleep_thread 251 +osf_utimes 138 +osf_utsname 207 +osf_wait4 7 +pause +pciconfig_iobase 376 +pciconfig_read 345 +pciconfig_write 346 +perf_event_open 493 +perfctr +personality 324 +pidfd_getfd 548 +pidfd_open 544 +pidfd_send_signal 534 +pipe 42 +pipe2 488 +pivot_root 374 +pkey_alloc 525 +pkey_free 526 +pkey_mprotect 524 +poll 94 +ppoll 464 +ppoll_time64 +prctl 348 +pread64 349 +preadv 490 +preadv2 520 +prlimit64 496 +process_madvise 550 +process_mrelease 558 +process_vm_readv 504 +process_vm_writev 505 +pselect6 463 +pselect6_time64 +ptrace 26 +pwrite64 350 +pwritev 491 +pwritev2 521 +quotactl 148 +quotactl_fd 553 +read 3 +readahead 379 +readdir +readlink 58 +readlinkat 460 +readv 120 +reboot 311 +recv 102 +recvfrom 125 +recvmmsg 479 +recvmmsg_time64 +recvmsg 113 +remap_file_pages 410 +removexattr 391 +rename 128 +renameat 457 +renameat2 510 +request_key 440 +restart_syscall 412 +riscv_flush_icache +riscv_hwprobe +rmdir 137 +rseq 527 +rt_sigaction 352 +rt_sigpending 354 +rt_sigprocmask 353 +rt_sigqueueinfo 356 +rt_sigreturn 351 +rt_sigsuspend 357 +rt_sigtimedwait 355 +rt_sigtimedwait_time64 +rt_tgsigqueueinfo 492 +rtas +s390_guarded_storage +s390_pci_mmio_read +s390_pci_mmio_write +s390_runtime_instr +s390_sthyi +sched_get_affinity +sched_get_priority_max 335 +sched_get_priority_min 336 +sched_getaffinity 396 +sched_getattr 509 +sched_getparam 331 +sched_getscheduler 333 +sched_rr_get_interval 337 +sched_rr_get_interval_time64 +sched_set_affinity +sched_setaffinity 395 +sched_setattr 508 +sched_setparam 330 +sched_setscheduler 332 +sched_yield 334 +seccomp 514 +select 358 +semctl 204 +semget 205 +semop 206 +semtimedop 423 +semtimedop_time64 +send 101 +sendfile 370 +sendfile64 +sendmmsg 503 +sendmsg 114 +sendto 133 +set_mempolicy 431 +set_mempolicy_home_node 560 +set_robust_list 466 +set_thread_area +set_tid_address 411 +setdomainname 166 +setfsgid 326 +setfsgid32 +setfsuid 325 +setfsuid32 +setgid 132 +setgid32 +setgroups 80 +setgroups32 +sethae 301 +sethostname 88 +setitimer 362 +setns 501 +setpgid 39 +setpgrp 82 +setpriority 96 +setregid 127 +setregid32 +setresgid 371 +setresgid32 +setresuid 343 +setresuid32 +setreuid 126 +setreuid32 +setrlimit 145 +setsid 147 +setsockopt 105 +settimeofday 360 +setuid 23 +setuid32 +setxattr 382 +sgetmask +shmat 209 +shmctl 210 +shmdt 211 +shmget 212 +shutdown 134 +sigaction 156 +sigaltstack 235 +signal +signalfd 476 +signalfd4 484 +sigpending 52 +sigprocmask +sigreturn 103 +sigsuspend 111 +socket 97 +socketcall +socketpair 135 +splice 468 +spu_create +spu_run +ssetmask +stat 67 +stat64 425 +statfs 328 +statfs64 528 +statx 522 +stime +subpage_prot +swapcontext +swapoff 304 +swapon 322 +switch_endian +symlink 57 +symlinkat 459 +sync 36 +sync_file_range 469 +sync_file_range2 +syncfs 500 +sys_debug_setcontext +syscall +sysfs 254 +sysinfo 318 +syslog 310 +sysmips +tee 470 +tgkill 424 +time +timer_create 414 +timer_delete 418 +timer_getoverrun 417 +timer_gettime 416 +timer_gettime64 +timer_settime 415 +timer_settime64 +timerfd 477 +timerfd_create 481 +timerfd_gettime 483 +timerfd_gettime64 +timerfd_settime 482 +timerfd_settime64 +times 323 +tkill 381 +truncate 129 +truncate64 +ugetrlimit +umask 60 +umount 22 +umount2 22 +uname 339 +unlink 10 +unlinkat 456 +unshare 465 +userfaultfd 516 +ustat 327 +utime +utimensat 475 +utimensat_time64 +utimes 363 +utrap_install +vfork 66 +vhangup 76 +vm86 +vm86old +vmsplice 471 +wait4 365 +waitid 438 +waitpid +write 4 +writev 121 diff --git a/src/basic/syscalls-arc.txt b/src/basic/syscalls-arc.txt new file mode 100644 index 0000000..a97a84f --- /dev/null +++ b/src/basic/syscalls-arc.txt @@ -0,0 +1,514 @@ +_llseek +_newselect +accept 202 +accept4 242 +access +acct 89 +add_key 217 +adjtimex 171 +alarm +arc_gettls 246 +arc_settls 245 +arc_usr_cmpxchg 248 +arch_prctl +arm_fadvise64_64 +atomic_barrier +atomic_cmpxchg_32 +bind 200 +bpf 280 +brk 214 +cachectl +cacheflush 244 +capget 90 +capset 91 +chdir 49 +chmod +chown +chown32 +chroot 51 +clock_adjtime 266 +clock_adjtime64 405 +clock_getres 114 +clock_getres_time64 406 +clock_gettime 113 +clock_gettime64 403 +clock_nanosleep 115 +clock_nanosleep_time64 407 +clock_settime 112 +clock_settime64 404 +clone 220 +clone3 435 +close 57 +close_range 436 +connect 203 +copy_file_range 285 +creat +delete_module 106 +dipc +dup 23 +dup2 +dup3 24 +epoll_create +epoll_create1 20 +epoll_ctl 21 +epoll_ctl_old +epoll_pwait 22 +epoll_pwait2 441 +epoll_wait +epoll_wait_old +eventfd +eventfd2 19 +exec_with_loader +execv +execve 221 +execveat 281 +exit 93 +exit_group 94 +faccessat 48 +faccessat2 439 +fadvise64 +fadvise64_64 223 +fallocate 47 +fanotify_init 262 +fanotify_mark 263 +fchdir 50 +fchmod 52 +fchmodat 53 +fchmodat2 452 +fchown 55 +fchown32 +fchownat 54 +fcntl +fcntl64 25 +fdatasync 83 +fgetxattr 10 +finit_module 273 +flistxattr 13 +flock 32 +fork +fremovexattr 16 +fsconfig 431 +fsetxattr 7 +fsmount 432 +fsopen 430 +fspick 433 +fstat +fstat64 80 +fstatat64 79 +fstatfs +fstatfs64 44 +fsync 82 +ftruncate +ftruncate64 46 +futex 98 +futex_requeue 456 +futex_time64 422 +futex_wait 455 +futex_waitv 449 +futex_wake 454 +futimesat +get_mempolicy 236 +get_robust_list 100 +get_thread_area +getcpu 168 +getcwd 17 +getdents +getdents64 61 +getdomainname +getdtablesize +getegid 177 +getegid32 +geteuid 175 +geteuid32 +getgid 176 +getgid32 +getgroups 158 +getgroups32 +gethostname +getitimer 102 +getpagesize +getpeername 205 +getpgid 155 +getpgrp +getpid 172 +getppid 173 +getpriority 141 +getrandom 278 +getresgid 150 +getresgid32 +getresuid 148 +getresuid32 +getrlimit 163 +getrusage 165 +getsid 156 +getsockname 204 +getsockopt 209 +gettid 178 +gettimeofday 169 +getuid 174 +getuid32 +getxattr 8 +getxgid +getxpid +getxuid +init_module 105 +inotify_add_watch 27 +inotify_init +inotify_init1 26 +inotify_rm_watch 28 +io_cancel 3 +io_destroy 1 +io_getevents 4 +io_pgetevents 292 +io_pgetevents_time64 416 +io_setup 0 +io_submit 2 +io_uring_enter 426 +io_uring_register 427 +io_uring_setup 425 +ioctl 29 +ioperm +iopl +ioprio_get 31 +ioprio_set 30 +ipc +kcmp 272 +kern_features +kexec_file_load 294 +kexec_load 104 +keyctl 219 +kill 129 +landlock_add_rule 445 +landlock_create_ruleset 444 +landlock_restrict_self 446 +lchown +lchown32 +lgetxattr 9 +link +linkat 37 +listen 201 +listxattr 11 +llistxattr 12 +lookup_dcookie 18 +lremovexattr 15 +lseek +lsetxattr 6 +lstat +lstat64 +madvise 233 +map_shadow_stack 453 +mbind 235 +membarrier 283 +memfd_create 279 +memfd_secret +memory_ordering +migrate_pages 238 +mincore 232 +mkdir +mkdirat 34 +mknod +mknodat 33 +mlock 228 +mlock2 284 +mlockall 230 +mmap +mmap2 222 +modify_ldt +mount 40 +mount_setattr 442 +move_mount 429 +move_pages 239 +mprotect 226 +mq_getsetattr 185 +mq_notify 184 +mq_open 180 +mq_timedreceive 183 +mq_timedreceive_time64 419 +mq_timedsend 182 +mq_timedsend_time64 418 +mq_unlink 181 +mremap 216 +msgctl 187 +msgget 186 +msgrcv 188 +msgsnd 189 +msync 227 +multiplexer +munlock 229 +munlockall 231 +munmap 215 +name_to_handle_at 264 +nanosleep 101 +newfstatat +nice +old_adjtimex +oldfstat +oldlstat +oldolduname +oldstat +oldumount +olduname +open +open_by_handle_at 265 +open_tree 428 +openat 56 +openat2 437 +or1k_atomic +osf_fstat +osf_fstatfs +osf_fstatfs64 +osf_getdirentries +osf_getdomainname +osf_getitimer +osf_getrusage +osf_getsysinfo +osf_gettimeofday +osf_lstat +osf_mount +osf_proplist_syscall +osf_select +osf_set_program_attributes +osf_setitimer +osf_setsysinfo +osf_settimeofday +osf_shmat +osf_sigprocmask +osf_sigstack +osf_stat +osf_statfs +osf_statfs64 +osf_swapon +osf_syscall +osf_sysinfo +osf_usleep_thread +osf_utimes +osf_utsname +osf_wait4 +pause +pciconfig_iobase +pciconfig_read +pciconfig_write +perf_event_open 241 +perfctr +personality 92 +pidfd_getfd 438 +pidfd_open 434 +pidfd_send_signal 424 +pipe +pipe2 59 +pivot_root 41 +pkey_alloc 289 +pkey_free 290 +pkey_mprotect 288 +poll +ppoll 73 +ppoll_time64 414 +prctl 167 +pread64 67 +preadv 69 +preadv2 286 +prlimit64 261 +process_madvise 440 +process_mrelease 448 +process_vm_readv 270 +process_vm_writev 271 +pselect6 72 +pselect6_time64 413 +ptrace 117 +pwrite64 68 +pwritev 70 +pwritev2 287 +quotactl 60 +quotactl_fd 443 +read 63 +readahead 213 +readdir +readlink +readlinkat 78 +readv 65 +reboot 142 +recv +recvfrom 207 +recvmmsg 243 +recvmmsg_time64 417 +recvmsg 212 +remap_file_pages 234 +removexattr 14 +rename +renameat 38 +renameat2 276 +request_key 218 +restart_syscall 128 +riscv_flush_icache +riscv_hwprobe +rmdir +rseq 293 +rt_sigaction 134 +rt_sigpending 136 +rt_sigprocmask 135 +rt_sigqueueinfo 138 +rt_sigreturn 139 +rt_sigsuspend 133 +rt_sigtimedwait 137 +rt_sigtimedwait_time64 421 +rt_tgsigqueueinfo 240 +rtas +s390_guarded_storage +s390_pci_mmio_read +s390_pci_mmio_write +s390_runtime_instr +s390_sthyi +sched_get_affinity +sched_get_priority_max 125 +sched_get_priority_min 126 +sched_getaffinity 123 +sched_getattr 275 +sched_getparam 121 +sched_getscheduler 120 +sched_rr_get_interval 127 +sched_rr_get_interval_time64 423 +sched_set_affinity +sched_setaffinity 122 +sched_setattr 274 +sched_setparam 118 +sched_setscheduler 119 +sched_yield 124 +seccomp 277 +select +semctl 191 +semget 190 +semop 193 +semtimedop 192 +semtimedop_time64 420 +send +sendfile +sendfile64 71 +sendmmsg 269 +sendmsg 211 +sendto 206 +set_mempolicy 237 +set_mempolicy_home_node 450 +set_robust_list 99 +set_thread_area +set_tid_address 96 +setdomainname 162 +setfsgid 152 +setfsgid32 +setfsuid 151 +setfsuid32 +setgid 144 +setgid32 +setgroups 159 +setgroups32 +sethae +sethostname 161 +setitimer 103 +setns 268 +setpgid 154 +setpgrp +setpriority 140 +setregid 143 +setregid32 +setresgid 149 +setresgid32 +setresuid 147 +setresuid32 +setreuid 145 +setreuid32 +setrlimit 164 +setsid 157 +setsockopt 208 +settimeofday 170 +setuid 146 +setuid32 +setxattr 5 +sgetmask +shmat 196 +shmctl 195 +shmdt 197 +shmget 194 +shutdown 210 +sigaction +sigaltstack 132 +signal +signalfd +signalfd4 74 +sigpending +sigprocmask +sigreturn +sigsuspend +socket 198 +socketcall +socketpair 199 +splice 76 +spu_create +spu_run +ssetmask +stat +stat64 +statfs +statfs64 43 +statx 291 +stime +subpage_prot +swapcontext +swapoff 225 +swapon 224 +switch_endian +symlink +symlinkat 36 +sync 81 +sync_file_range 84 +sync_file_range2 +syncfs 267 +sys_debug_setcontext +syscall +sysfs 247 +sysinfo 179 +syslog 116 +sysmips +tee 77 +tgkill 131 +time +timer_create 107 +timer_delete 111 +timer_getoverrun 109 +timer_gettime 108 +timer_gettime64 408 +timer_settime 110 +timer_settime64 409 +timerfd +timerfd_create 85 +timerfd_gettime 87 +timerfd_gettime64 410 +timerfd_settime 86 +timerfd_settime64 411 +times 153 +tkill 130 +truncate +truncate64 45 +ugetrlimit +umask 166 +umount +umount2 39 +uname 160 +unlink +unlinkat 35 +unshare 97 +userfaultfd 282 +ustat +utime +utimensat 88 +utimensat_time64 412 +utimes +utrap_install +vfork +vhangup 58 +vm86 +vm86old +vmsplice 75 +wait4 260 +waitid 95 +waitpid +write 64 +writev 66 diff --git a/src/basic/syscalls-arm.txt b/src/basic/syscalls-arm.txt new file mode 100644 index 0000000..1fb6b48 --- /dev/null +++ b/src/basic/syscalls-arm.txt @@ -0,0 +1,514 @@ +_llseek 140 +_newselect 142 +accept 285 +accept4 366 +access 33 +acct 51 +add_key 309 +adjtimex 124 +alarm +arc_gettls +arc_settls +arc_usr_cmpxchg +arch_prctl +arm_fadvise64_64 270 +atomic_barrier +atomic_cmpxchg_32 +bind 282 +bpf 386 +brk 45 +cachectl +cacheflush +capget 184 +capset 185 +chdir 12 +chmod 15 +chown 182 +chown32 212 +chroot 61 +clock_adjtime 372 +clock_adjtime64 405 +clock_getres 264 +clock_getres_time64 406 +clock_gettime 263 +clock_gettime64 403 +clock_nanosleep 265 +clock_nanosleep_time64 407 +clock_settime 262 +clock_settime64 404 +clone 120 +clone3 435 +close 6 +close_range 436 +connect 283 +copy_file_range 391 +creat 8 +delete_module 129 +dipc +dup 41 +dup2 63 +dup3 358 +epoll_create 250 +epoll_create1 357 +epoll_ctl 251 +epoll_ctl_old +epoll_pwait 346 +epoll_pwait2 441 +epoll_wait 252 +epoll_wait_old +eventfd 351 +eventfd2 356 +exec_with_loader +execv +execve 11 +execveat 387 +exit 1 +exit_group 248 +faccessat 334 +faccessat2 439 +fadvise64 +fadvise64_64 +fallocate 352 +fanotify_init 367 +fanotify_mark 368 +fchdir 133 +fchmod 94 +fchmodat 333 +fchmodat2 452 +fchown 95 +fchown32 207 +fchownat 325 +fcntl 55 +fcntl64 221 +fdatasync 148 +fgetxattr 231 +finit_module 379 +flistxattr 234 +flock 143 +fork 2 +fremovexattr 237 +fsconfig 431 +fsetxattr 228 +fsmount 432 +fsopen 430 +fspick 433 +fstat 108 +fstat64 197 +fstatat64 327 +fstatfs 100 +fstatfs64 267 +fsync 118 +ftruncate 93 +ftruncate64 194 +futex 240 +futex_requeue 456 +futex_time64 422 +futex_wait 455 +futex_waitv 449 +futex_wake 454 +futimesat 326 +get_mempolicy 320 +get_robust_list 339 +get_thread_area +getcpu 345 +getcwd 183 +getdents 141 +getdents64 217 +getdomainname +getdtablesize +getegid 50 +getegid32 202 +geteuid 49 +geteuid32 201 +getgid 47 +getgid32 200 +getgroups 80 +getgroups32 205 +gethostname +getitimer 105 +getpagesize +getpeername 287 +getpgid 132 +getpgrp 65 +getpid 20 +getppid 64 +getpriority 96 +getrandom 384 +getresgid 171 +getresgid32 211 +getresuid 165 +getresuid32 209 +getrlimit +getrusage 77 +getsid 147 +getsockname 286 +getsockopt 295 +gettid 224 +gettimeofday 78 +getuid 24 +getuid32 199 +getxattr 229 +getxgid +getxpid +getxuid +init_module 128 +inotify_add_watch 317 +inotify_init 316 +inotify_init1 360 +inotify_rm_watch 318 +io_cancel 247 +io_destroy 244 +io_getevents 245 +io_pgetevents 399 +io_pgetevents_time64 416 +io_setup 243 +io_submit 246 +io_uring_enter 426 +io_uring_register 427 +io_uring_setup 425 +ioctl 54 +ioperm +iopl +ioprio_get 315 +ioprio_set 314 +ipc +kcmp 378 +kern_features +kexec_file_load 401 +kexec_load 347 +keyctl 311 +kill 37 +landlock_add_rule 445 +landlock_create_ruleset 444 +landlock_restrict_self 446 +lchown 16 +lchown32 198 +lgetxattr 230 +link 9 +linkat 330 +listen 284 +listxattr 232 +llistxattr 233 +lookup_dcookie 249 +lremovexattr 236 +lseek 19 +lsetxattr 227 +lstat 107 +lstat64 196 +madvise 220 +map_shadow_stack 453 +mbind 319 +membarrier 389 +memfd_create 385 +memfd_secret +memory_ordering +migrate_pages 400 +mincore 219 +mkdir 39 +mkdirat 323 +mknod 14 +mknodat 324 +mlock 150 +mlock2 390 +mlockall 152 +mmap +mmap2 192 +modify_ldt +mount 21 +mount_setattr 442 +move_mount 429 +move_pages 344 +mprotect 125 +mq_getsetattr 279 +mq_notify 278 +mq_open 274 +mq_timedreceive 277 +mq_timedreceive_time64 419 +mq_timedsend 276 +mq_timedsend_time64 418 +mq_unlink 275 +mremap 163 +msgctl 304 +msgget 303 +msgrcv 302 +msgsnd 301 +msync 144 +multiplexer +munlock 151 +munlockall 153 +munmap 91 +name_to_handle_at 370 +nanosleep 162 +newfstatat +nice 34 +old_adjtimex +oldfstat +oldlstat +oldolduname +oldstat +oldumount +olduname +open 5 +open_by_handle_at 371 +open_tree 428 +openat 322 +openat2 437 +or1k_atomic +osf_fstat +osf_fstatfs +osf_fstatfs64 +osf_getdirentries +osf_getdomainname +osf_getitimer +osf_getrusage +osf_getsysinfo +osf_gettimeofday +osf_lstat +osf_mount +osf_proplist_syscall +osf_select +osf_set_program_attributes +osf_setitimer +osf_setsysinfo +osf_settimeofday +osf_shmat +osf_sigprocmask +osf_sigstack +osf_stat +osf_statfs +osf_statfs64 +osf_swapon +osf_syscall +osf_sysinfo +osf_usleep_thread +osf_utimes +osf_utsname +osf_wait4 +pause 29 +pciconfig_iobase 271 +pciconfig_read 272 +pciconfig_write 273 +perf_event_open 364 +perfctr +personality 136 +pidfd_getfd 438 +pidfd_open 434 +pidfd_send_signal 424 +pipe 42 +pipe2 359 +pivot_root 218 +pkey_alloc 395 +pkey_free 396 +pkey_mprotect 394 +poll 168 +ppoll 336 +ppoll_time64 414 +prctl 172 +pread64 180 +preadv 361 +preadv2 392 +prlimit64 369 +process_madvise 440 +process_mrelease 448 +process_vm_readv 376 +process_vm_writev 377 +pselect6 335 +pselect6_time64 413 +ptrace 26 +pwrite64 181 +pwritev 362 +pwritev2 393 +quotactl 131 +quotactl_fd 443 +read 3 +readahead 225 +readdir +readlink 85 +readlinkat 332 +readv 145 +reboot 88 +recv 291 +recvfrom 292 +recvmmsg 365 +recvmmsg_time64 417 +recvmsg 297 +remap_file_pages 253 +removexattr 235 +rename 38 +renameat 329 +renameat2 382 +request_key 310 +restart_syscall 0 +riscv_flush_icache +riscv_hwprobe +rmdir 40 +rseq 398 +rt_sigaction 174 +rt_sigpending 176 +rt_sigprocmask 175 +rt_sigqueueinfo 178 +rt_sigreturn 173 +rt_sigsuspend 179 +rt_sigtimedwait 177 +rt_sigtimedwait_time64 421 +rt_tgsigqueueinfo 363 +rtas +s390_guarded_storage +s390_pci_mmio_read +s390_pci_mmio_write +s390_runtime_instr +s390_sthyi +sched_get_affinity +sched_get_priority_max 159 +sched_get_priority_min 160 +sched_getaffinity 242 +sched_getattr 381 +sched_getparam 155 +sched_getscheduler 157 +sched_rr_get_interval 161 +sched_rr_get_interval_time64 423 +sched_set_affinity +sched_setaffinity 241 +sched_setattr 380 +sched_setparam 154 +sched_setscheduler 156 +sched_yield 158 +seccomp 383 +select +semctl 300 +semget 299 +semop 298 +semtimedop 312 +semtimedop_time64 420 +send 289 +sendfile 187 +sendfile64 239 +sendmmsg 374 +sendmsg 296 +sendto 290 +set_mempolicy 321 +set_mempolicy_home_node 450 +set_robust_list 338 +set_thread_area +set_tid_address 256 +setdomainname 121 +setfsgid 139 +setfsgid32 216 +setfsuid 138 +setfsuid32 215 +setgid 46 +setgid32 214 +setgroups 81 +setgroups32 206 +sethae +sethostname 74 +setitimer 104 +setns 375 +setpgid 57 +setpgrp +setpriority 97 +setregid 71 +setregid32 204 +setresgid 170 +setresgid32 210 +setresuid 164 +setresuid32 208 +setreuid 70 +setreuid32 203 +setrlimit 75 +setsid 66 +setsockopt 294 +settimeofday 79 +setuid 23 +setuid32 213 +setxattr 226 +sgetmask +shmat 305 +shmctl 308 +shmdt 306 +shmget 307 +shutdown 293 +sigaction 67 +sigaltstack 186 +signal +signalfd 349 +signalfd4 355 +sigpending 73 +sigprocmask 126 +sigreturn 119 +sigsuspend 72 +socket 281 +socketcall +socketpair 288 +splice 340 +spu_create +spu_run +ssetmask +stat 106 +stat64 195 +statfs 99 +statfs64 266 +statx 397 +stime +subpage_prot +swapcontext +swapoff 115 +swapon 87 +switch_endian +symlink 83 +symlinkat 331 +sync 36 +sync_file_range +sync_file_range2 341 +syncfs 373 +sys_debug_setcontext +syscall +sysfs 135 +sysinfo 116 +syslog 103 +sysmips +tee 342 +tgkill 268 +time +timer_create 257 +timer_delete 261 +timer_getoverrun 260 +timer_gettime 259 +timer_gettime64 408 +timer_settime 258 +timer_settime64 409 +timerfd +timerfd_create 350 +timerfd_gettime 354 +timerfd_gettime64 410 +timerfd_settime 353 +timerfd_settime64 411 +times 43 +tkill 238 +truncate 92 +truncate64 193 +ugetrlimit 191 +umask 60 +umount +umount2 52 +uname 122 +unlink 10 +unlinkat 328 +unshare 337 +userfaultfd 388 +ustat 62 +utime +utimensat 348 +utimensat_time64 412 +utimes 269 +utrap_install +vfork 190 +vhangup 111 +vm86 +vm86old +vmsplice 343 +wait4 114 +waitid 280 +waitpid +write 4 +writev 146 diff --git a/src/basic/syscalls-arm64.txt b/src/basic/syscalls-arm64.txt new file mode 100644 index 0000000..e71dc56 --- /dev/null +++ b/src/basic/syscalls-arm64.txt @@ -0,0 +1,514 @@ +_llseek +_newselect +accept 202 +accept4 242 +access +acct 89 +add_key 217 +adjtimex 171 +alarm +arc_gettls +arc_settls +arc_usr_cmpxchg +arch_prctl +arm_fadvise64_64 +atomic_barrier +atomic_cmpxchg_32 +bind 200 +bpf 280 +brk 214 +cachectl +cacheflush +capget 90 +capset 91 +chdir 49 +chmod +chown +chown32 +chroot 51 +clock_adjtime 266 +clock_adjtime64 +clock_getres 114 +clock_getres_time64 +clock_gettime 113 +clock_gettime64 +clock_nanosleep 115 +clock_nanosleep_time64 +clock_settime 112 +clock_settime64 +clone 220 +clone3 435 +close 57 +close_range 436 +connect 203 +copy_file_range 285 +creat +delete_module 106 +dipc +dup 23 +dup2 +dup3 24 +epoll_create +epoll_create1 20 +epoll_ctl 21 +epoll_ctl_old +epoll_pwait 22 +epoll_pwait2 441 +epoll_wait +epoll_wait_old +eventfd +eventfd2 19 +exec_with_loader +execv +execve 221 +execveat 281 +exit 93 +exit_group 94 +faccessat 48 +faccessat2 439 +fadvise64 223 +fadvise64_64 +fallocate 47 +fanotify_init 262 +fanotify_mark 263 +fchdir 50 +fchmod 52 +fchmodat 53 +fchmodat2 452 +fchown 55 +fchown32 +fchownat 54 +fcntl 25 +fcntl64 +fdatasync 83 +fgetxattr 10 +finit_module 273 +flistxattr 13 +flock 32 +fork +fremovexattr 16 +fsconfig 431 +fsetxattr 7 +fsmount 432 +fsopen 430 +fspick 433 +fstat 80 +fstat64 +fstatat64 +fstatfs 44 +fstatfs64 +fsync 82 +ftruncate 46 +ftruncate64 +futex 98 +futex_requeue 456 +futex_time64 +futex_wait 455 +futex_waitv 449 +futex_wake 454 +futimesat +get_mempolicy 236 +get_robust_list 100 +get_thread_area +getcpu 168 +getcwd 17 +getdents +getdents64 61 +getdomainname +getdtablesize +getegid 177 +getegid32 +geteuid 175 +geteuid32 +getgid 176 +getgid32 +getgroups 158 +getgroups32 +gethostname +getitimer 102 +getpagesize +getpeername 205 +getpgid 155 +getpgrp +getpid 172 +getppid 173 +getpriority 141 +getrandom 278 +getresgid 150 +getresgid32 +getresuid 148 +getresuid32 +getrlimit 163 +getrusage 165 +getsid 156 +getsockname 204 +getsockopt 209 +gettid 178 +gettimeofday 169 +getuid 174 +getuid32 +getxattr 8 +getxgid +getxpid +getxuid +init_module 105 +inotify_add_watch 27 +inotify_init +inotify_init1 26 +inotify_rm_watch 28 +io_cancel 3 +io_destroy 1 +io_getevents 4 +io_pgetevents 292 +io_pgetevents_time64 +io_setup 0 +io_submit 2 +io_uring_enter 426 +io_uring_register 427 +io_uring_setup 425 +ioctl 29 +ioperm +iopl +ioprio_get 31 +ioprio_set 30 +ipc +kcmp 272 +kern_features +kexec_file_load 294 +kexec_load 104 +keyctl 219 +kill 129 +landlock_add_rule 445 +landlock_create_ruleset 444 +landlock_restrict_self 446 +lchown +lchown32 +lgetxattr 9 +link +linkat 37 +listen 201 +listxattr 11 +llistxattr 12 +lookup_dcookie 18 +lremovexattr 15 +lseek 62 +lsetxattr 6 +lstat +lstat64 +madvise 233 +map_shadow_stack 453 +mbind 235 +membarrier 283 +memfd_create 279 +memfd_secret 447 +memory_ordering +migrate_pages 238 +mincore 232 +mkdir +mkdirat 34 +mknod +mknodat 33 +mlock 228 +mlock2 284 +mlockall 230 +mmap 222 +mmap2 +modify_ldt +mount 40 +mount_setattr 442 +move_mount 429 +move_pages 239 +mprotect 226 +mq_getsetattr 185 +mq_notify 184 +mq_open 180 +mq_timedreceive 183 +mq_timedreceive_time64 +mq_timedsend 182 +mq_timedsend_time64 +mq_unlink 181 +mremap 216 +msgctl 187 +msgget 186 +msgrcv 188 +msgsnd 189 +msync 227 +multiplexer +munlock 229 +munlockall 231 +munmap 215 +name_to_handle_at 264 +nanosleep 101 +newfstatat 79 +nice +old_adjtimex +oldfstat +oldlstat +oldolduname +oldstat +oldumount +olduname +open +open_by_handle_at 265 +open_tree 428 +openat 56 +openat2 437 +or1k_atomic +osf_fstat +osf_fstatfs +osf_fstatfs64 +osf_getdirentries +osf_getdomainname +osf_getitimer +osf_getrusage +osf_getsysinfo +osf_gettimeofday +osf_lstat +osf_mount +osf_proplist_syscall +osf_select +osf_set_program_attributes +osf_setitimer +osf_setsysinfo +osf_settimeofday +osf_shmat +osf_sigprocmask +osf_sigstack +osf_stat +osf_statfs +osf_statfs64 +osf_swapon +osf_syscall +osf_sysinfo +osf_usleep_thread +osf_utimes +osf_utsname +osf_wait4 +pause +pciconfig_iobase +pciconfig_read +pciconfig_write +perf_event_open 241 +perfctr +personality 92 +pidfd_getfd 438 +pidfd_open 434 +pidfd_send_signal 424 +pipe +pipe2 59 +pivot_root 41 +pkey_alloc 289 +pkey_free 290 +pkey_mprotect 288 +poll +ppoll 73 +ppoll_time64 +prctl 167 +pread64 67 +preadv 69 +preadv2 286 +prlimit64 261 +process_madvise 440 +process_mrelease 448 +process_vm_readv 270 +process_vm_writev 271 +pselect6 72 +pselect6_time64 +ptrace 117 +pwrite64 68 +pwritev 70 +pwritev2 287 +quotactl 60 +quotactl_fd 443 +read 63 +readahead 213 +readdir +readlink +readlinkat 78 +readv 65 +reboot 142 +recv +recvfrom 207 +recvmmsg 243 +recvmmsg_time64 +recvmsg 212 +remap_file_pages 234 +removexattr 14 +rename +renameat 38 +renameat2 276 +request_key 218 +restart_syscall 128 +riscv_flush_icache +riscv_hwprobe +rmdir +rseq 293 +rt_sigaction 134 +rt_sigpending 136 +rt_sigprocmask 135 +rt_sigqueueinfo 138 +rt_sigreturn 139 +rt_sigsuspend 133 +rt_sigtimedwait 137 +rt_sigtimedwait_time64 +rt_tgsigqueueinfo 240 +rtas +s390_guarded_storage +s390_pci_mmio_read +s390_pci_mmio_write +s390_runtime_instr +s390_sthyi +sched_get_affinity +sched_get_priority_max 125 +sched_get_priority_min 126 +sched_getaffinity 123 +sched_getattr 275 +sched_getparam 121 +sched_getscheduler 120 +sched_rr_get_interval 127 +sched_rr_get_interval_time64 +sched_set_affinity +sched_setaffinity 122 +sched_setattr 274 +sched_setparam 118 +sched_setscheduler 119 +sched_yield 124 +seccomp 277 +select +semctl 191 +semget 190 +semop 193 +semtimedop 192 +semtimedop_time64 +send +sendfile 71 +sendfile64 +sendmmsg 269 +sendmsg 211 +sendto 206 +set_mempolicy 237 +set_mempolicy_home_node 450 +set_robust_list 99 +set_thread_area +set_tid_address 96 +setdomainname 162 +setfsgid 152 +setfsgid32 +setfsuid 151 +setfsuid32 +setgid 144 +setgid32 +setgroups 159 +setgroups32 +sethae +sethostname 161 +setitimer 103 +setns 268 +setpgid 154 +setpgrp +setpriority 140 +setregid 143 +setregid32 +setresgid 149 +setresgid32 +setresuid 147 +setresuid32 +setreuid 145 +setreuid32 +setrlimit 164 +setsid 157 +setsockopt 208 +settimeofday 170 +setuid 146 +setuid32 +setxattr 5 +sgetmask +shmat 196 +shmctl 195 +shmdt 197 +shmget 194 +shutdown 210 +sigaction +sigaltstack 132 +signal +signalfd +signalfd4 74 +sigpending +sigprocmask +sigreturn +sigsuspend +socket 198 +socketcall +socketpair 199 +splice 76 +spu_create +spu_run +ssetmask +stat +stat64 +statfs 43 +statfs64 +statx 291 +stime +subpage_prot +swapcontext +swapoff 225 +swapon 224 +switch_endian +symlink +symlinkat 36 +sync 81 +sync_file_range 84 +sync_file_range2 +syncfs 267 +sys_debug_setcontext +syscall +sysfs +sysinfo 179 +syslog 116 +sysmips +tee 77 +tgkill 131 +time +timer_create 107 +timer_delete 111 +timer_getoverrun 109 +timer_gettime 108 +timer_gettime64 +timer_settime 110 +timer_settime64 +timerfd +timerfd_create 85 +timerfd_gettime 87 +timerfd_gettime64 +timerfd_settime 86 +timerfd_settime64 +times 153 +tkill 130 +truncate 45 +truncate64 +ugetrlimit +umask 166 +umount +umount2 39 +uname 160 +unlink +unlinkat 35 +unshare 97 +userfaultfd 282 +ustat +utime +utimensat 88 +utimensat_time64 +utimes +utrap_install +vfork +vhangup 58 +vm86 +vm86old +vmsplice 75 +wait4 260 +waitid 95 +waitpid +write 64 +writev 66 diff --git a/src/basic/syscalls-i386.txt b/src/basic/syscalls-i386.txt new file mode 100644 index 0000000..adb96bc --- /dev/null +++ b/src/basic/syscalls-i386.txt @@ -0,0 +1,514 @@ +_llseek 140 +_newselect 142 +accept +accept4 364 +access 33 +acct 51 +add_key 286 +adjtimex 124 +alarm 27 +arc_gettls +arc_settls +arc_usr_cmpxchg +arch_prctl 384 +arm_fadvise64_64 +atomic_barrier +atomic_cmpxchg_32 +bind 361 +bpf 357 +brk 45 +cachectl +cacheflush +capget 184 +capset 185 +chdir 12 +chmod 15 +chown 182 +chown32 212 +chroot 61 +clock_adjtime 343 +clock_adjtime64 405 +clock_getres 266 +clock_getres_time64 406 +clock_gettime 265 +clock_gettime64 403 +clock_nanosleep 267 +clock_nanosleep_time64 407 +clock_settime 264 +clock_settime64 404 +clone 120 +clone3 435 +close 6 +close_range 436 +connect 362 +copy_file_range 377 +creat 8 +delete_module 129 +dipc +dup 41 +dup2 63 +dup3 330 +epoll_create 254 +epoll_create1 329 +epoll_ctl 255 +epoll_ctl_old +epoll_pwait 319 +epoll_pwait2 441 +epoll_wait 256 +epoll_wait_old +eventfd 323 +eventfd2 328 +exec_with_loader +execv +execve 11 +execveat 358 +exit 1 +exit_group 252 +faccessat 307 +faccessat2 439 +fadvise64 250 +fadvise64_64 272 +fallocate 324 +fanotify_init 338 +fanotify_mark 339 +fchdir 133 +fchmod 94 +fchmodat 306 +fchmodat2 452 +fchown 95 +fchown32 207 +fchownat 298 +fcntl 55 +fcntl64 221 +fdatasync 148 +fgetxattr 231 +finit_module 350 +flistxattr 234 +flock 143 +fork 2 +fremovexattr 237 +fsconfig 431 +fsetxattr 228 +fsmount 432 +fsopen 430 +fspick 433 +fstat 108 +fstat64 197 +fstatat64 300 +fstatfs 100 +fstatfs64 269 +fsync 118 +ftruncate 93 +ftruncate64 194 +futex 240 +futex_requeue 456 +futex_time64 422 +futex_wait 455 +futex_waitv 449 +futex_wake 454 +futimesat 299 +get_mempolicy 275 +get_robust_list 312 +get_thread_area 244 +getcpu 318 +getcwd 183 +getdents 141 +getdents64 220 +getdomainname +getdtablesize +getegid 50 +getegid32 202 +geteuid 49 +geteuid32 201 +getgid 47 +getgid32 200 +getgroups 80 +getgroups32 205 +gethostname +getitimer 105 +getpagesize +getpeername 368 +getpgid 132 +getpgrp 65 +getpid 20 +getppid 64 +getpriority 96 +getrandom 355 +getresgid 171 +getresgid32 211 +getresuid 165 +getresuid32 209 +getrlimit 76 +getrusage 77 +getsid 147 +getsockname 367 +getsockopt 365 +gettid 224 +gettimeofday 78 +getuid 24 +getuid32 199 +getxattr 229 +getxgid +getxpid +getxuid +init_module 128 +inotify_add_watch 292 +inotify_init 291 +inotify_init1 332 +inotify_rm_watch 293 +io_cancel 249 +io_destroy 246 +io_getevents 247 +io_pgetevents 385 +io_pgetevents_time64 416 +io_setup 245 +io_submit 248 +io_uring_enter 426 +io_uring_register 427 +io_uring_setup 425 +ioctl 54 +ioperm 101 +iopl 110 +ioprio_get 290 +ioprio_set 289 +ipc 117 +kcmp 349 +kern_features +kexec_file_load +kexec_load 283 +keyctl 288 +kill 37 +landlock_add_rule 445 +landlock_create_ruleset 444 +landlock_restrict_self 446 +lchown 16 +lchown32 198 +lgetxattr 230 +link 9 +linkat 303 +listen 363 +listxattr 232 +llistxattr 233 +lookup_dcookie 253 +lremovexattr 236 +lseek 19 +lsetxattr 227 +lstat 107 +lstat64 196 +madvise 219 +map_shadow_stack 453 +mbind 274 +membarrier 375 +memfd_create 356 +memfd_secret 447 +memory_ordering +migrate_pages 294 +mincore 218 +mkdir 39 +mkdirat 296 +mknod 14 +mknodat 297 +mlock 150 +mlock2 376 +mlockall 152 +mmap 90 +mmap2 192 +modify_ldt 123 +mount 21 +mount_setattr 442 +move_mount 429 +move_pages 317 +mprotect 125 +mq_getsetattr 282 +mq_notify 281 +mq_open 277 +mq_timedreceive 280 +mq_timedreceive_time64 419 +mq_timedsend 279 +mq_timedsend_time64 418 +mq_unlink 278 +mremap 163 +msgctl 402 +msgget 399 +msgrcv 401 +msgsnd 400 +msync 144 +multiplexer +munlock 151 +munlockall 153 +munmap 91 +name_to_handle_at 341 +nanosleep 162 +newfstatat +nice 34 +old_adjtimex +oldfstat 28 +oldlstat 84 +oldolduname 59 +oldstat 18 +oldumount +olduname 109 +open 5 +open_by_handle_at 342 +open_tree 428 +openat 295 +openat2 437 +or1k_atomic +osf_fstat +osf_fstatfs +osf_fstatfs64 +osf_getdirentries +osf_getdomainname +osf_getitimer +osf_getrusage +osf_getsysinfo +osf_gettimeofday +osf_lstat +osf_mount +osf_proplist_syscall +osf_select +osf_set_program_attributes +osf_setitimer +osf_setsysinfo +osf_settimeofday +osf_shmat +osf_sigprocmask +osf_sigstack +osf_stat +osf_statfs +osf_statfs64 +osf_swapon +osf_syscall +osf_sysinfo +osf_usleep_thread +osf_utimes +osf_utsname +osf_wait4 +pause 29 +pciconfig_iobase +pciconfig_read +pciconfig_write +perf_event_open 336 +perfctr +personality 136 +pidfd_getfd 438 +pidfd_open 434 +pidfd_send_signal 424 +pipe 42 +pipe2 331 +pivot_root 217 +pkey_alloc 381 +pkey_free 382 +pkey_mprotect 380 +poll 168 +ppoll 309 +ppoll_time64 414 +prctl 172 +pread64 180 +preadv 333 +preadv2 378 +prlimit64 340 +process_madvise 440 +process_mrelease 448 +process_vm_readv 347 +process_vm_writev 348 +pselect6 308 +pselect6_time64 413 +ptrace 26 +pwrite64 181 +pwritev 334 +pwritev2 379 +quotactl 131 +quotactl_fd 443 +read 3 +readahead 225 +readdir 89 +readlink 85 +readlinkat 305 +readv 145 +reboot 88 +recv +recvfrom 371 +recvmmsg 337 +recvmmsg_time64 417 +recvmsg 372 +remap_file_pages 257 +removexattr 235 +rename 38 +renameat 302 +renameat2 353 +request_key 287 +restart_syscall 0 +riscv_flush_icache +riscv_hwprobe +rmdir 40 +rseq 386 +rt_sigaction 174 +rt_sigpending 176 +rt_sigprocmask 175 +rt_sigqueueinfo 178 +rt_sigreturn 173 +rt_sigsuspend 179 +rt_sigtimedwait 177 +rt_sigtimedwait_time64 421 +rt_tgsigqueueinfo 335 +rtas +s390_guarded_storage +s390_pci_mmio_read +s390_pci_mmio_write +s390_runtime_instr +s390_sthyi +sched_get_affinity +sched_get_priority_max 159 +sched_get_priority_min 160 +sched_getaffinity 242 +sched_getattr 352 +sched_getparam 155 +sched_getscheduler 157 +sched_rr_get_interval 161 +sched_rr_get_interval_time64 423 +sched_set_affinity +sched_setaffinity 241 +sched_setattr 351 +sched_setparam 154 +sched_setscheduler 156 +sched_yield 158 +seccomp 354 +select 82 +semctl 394 +semget 393 +semop +semtimedop +semtimedop_time64 420 +send +sendfile 187 +sendfile64 239 +sendmmsg 345 +sendmsg 370 +sendto 369 +set_mempolicy 276 +set_mempolicy_home_node 450 +set_robust_list 311 +set_thread_area 243 +set_tid_address 258 +setdomainname 121 +setfsgid 139 +setfsgid32 216 +setfsuid 138 +setfsuid32 215 +setgid 46 +setgid32 214 +setgroups 81 +setgroups32 206 +sethae +sethostname 74 +setitimer 104 +setns 346 +setpgid 57 +setpgrp +setpriority 97 +setregid 71 +setregid32 204 +setresgid 170 +setresgid32 210 +setresuid 164 +setresuid32 208 +setreuid 70 +setreuid32 203 +setrlimit 75 +setsid 66 +setsockopt 366 +settimeofday 79 +setuid 23 +setuid32 213 +setxattr 226 +sgetmask 68 +shmat 397 +shmctl 396 +shmdt 398 +shmget 395 +shutdown 373 +sigaction 67 +sigaltstack 186 +signal 48 +signalfd 321 +signalfd4 327 +sigpending 73 +sigprocmask 126 +sigreturn 119 +sigsuspend 72 +socket 359 +socketcall 102 +socketpair 360 +splice 313 +spu_create +spu_run +ssetmask 69 +stat 106 +stat64 195 +statfs 99 +statfs64 268 +statx 383 +stime 25 +subpage_prot +swapcontext +swapoff 115 +swapon 87 +switch_endian +symlink 83 +symlinkat 304 +sync 36 +sync_file_range 314 +sync_file_range2 +syncfs 344 +sys_debug_setcontext +syscall +sysfs 135 +sysinfo 116 +syslog 103 +sysmips +tee 315 +tgkill 270 +time 13 +timer_create 259 +timer_delete 263 +timer_getoverrun 262 +timer_gettime 261 +timer_gettime64 408 +timer_settime 260 +timer_settime64 409 +timerfd +timerfd_create 322 +timerfd_gettime 326 +timerfd_gettime64 410 +timerfd_settime 325 +timerfd_settime64 411 +times 43 +tkill 238 +truncate 92 +truncate64 193 +ugetrlimit 191 +umask 60 +umount 22 +umount2 52 +uname 122 +unlink 10 +unlinkat 301 +unshare 310 +userfaultfd 374 +ustat 62 +utime 30 +utimensat 320 +utimensat_time64 412 +utimes 271 +utrap_install +vfork 190 +vhangup 111 +vm86 166 +vm86old 113 +vmsplice 316 +wait4 114 +waitid 284 +waitpid 7 +write 4 +writev 146 diff --git a/src/basic/syscalls-ia64.txt b/src/basic/syscalls-ia64.txt new file mode 100644 index 0000000..4e82257 --- /dev/null +++ b/src/basic/syscalls-ia64.txt @@ -0,0 +1,603 @@ +_llseek +_newselect +_sysctl 1150 +accept 1194 +accept4 1334 +access 1049 +acct 1064 +add_key 1271 +adjtimex 1131 +alarm +arc_gettls +arc_settls +arc_usr_cmpxchg +arch_prctl +arm_fadvise64_64 +atomic_barrier +atomic_cmpxchg_32 +bdflush 1138 +bind 1191 +bpf 1341 +brk 1060 +cachectl +cacheflush +capget 1185 +capset 1186 +chdir 1034 +chmod 1038 +chown 1039 +chown32 +chroot 1068 +clock_adjtime 1328 +clock_adjtime64 +clock_getres 1255 +clock_getres_time64 +clock_gettime 1254 +clock_gettime64 +clock_nanosleep 1256 +clock_nanosleep_time64 +clock_settime 1253 +clock_settime64 +clone 1128 +clone2 1213 +clone3 +close 1029 +close_range 1460 +connect 1192 +copy_file_range 1347 +creat 1030 +create_module +delete_module 1134 +dipc +dup 1057 +dup2 1070 +dup3 1316 +epoll_create 1243 +epoll_create1 1315 +epoll_ctl 1244 +epoll_ctl_old +epoll_pwait 1305 +epoll_pwait2 1465 +epoll_wait 1245 +epoll_wait_old +eventfd 1309 +eventfd2 1314 +exec_with_loader +execv +execve 1033 +execveat 1342 +exit 1025 +exit_group 1236 +faccessat 1293 +faccessat2 1463 +fadvise64 1234 +fadvise64_64 +fallocate 1303 +fanotify_init 1323 +fanotify_mark 1324 +fchdir 1035 +fchmod 1099 +fchmodat 1292 +fchmodat2 1476 +fchown 1100 +fchown32 +fchownat 1284 +fcntl 1066 +fcntl64 +fdatasync 1052 +fgetxattr 1222 +finit_module 1335 +flistxattr 1225 +flock 1145 +fork +fremovexattr 1228 +fsconfig 1455 +fsetxattr 1219 +fsmount 1456 +fsopen 1454 +fspick 1457 +fstat 1212 +fstat64 +fstatat64 +fstatfs 1104 +fstatfs64 1257 +fsync 1051 +ftruncate 1098 +ftruncate64 +futex 1230 +futex_time64 +futex_waitv 1473 +futimesat 1285 +get_kernel_syms +get_mempolicy 1260 +get_robust_list 1299 +get_thread_area +getcpu 1304 +getcwd 1184 +getdents 1144 +getdents64 1214 +getdomainname +getdtablesize +getegid 1063 +getegid32 +geteuid 1047 +geteuid32 +getgid 1062 +getgid32 +getgroups 1077 +getgroups32 +gethostname +getitimer 1119 +getpagesize +getpeername 1196 +getpgid 1079 +getpgrp +getpid 1041 +getpmsg 1188 +getppid 1042 +getpriority 1101 +getrandom 1339 +getresgid 1075 +getresgid32 +getresuid 1073 +getresuid32 +getrlimit 1085 +getrusage 1086 +getsid 1082 +getsockname 1195 +getsockopt 1204 +gettid 1105 +gettimeofday 1087 +getuid 1046 +getuid32 +getunwind 1215 +getxattr 1220 +getxgid +getxpid +getxuid +idle +init_module 1133 +inotify_add_watch 1278 +inotify_init 1277 +inotify_init1 1318 +inotify_rm_watch 1279 +io_cancel 1242 +io_destroy 1239 +io_getevents 1240 +io_pgetevents 1351 +io_pgetevents_time64 +io_setup 1238 +io_submit 1241 +io_uring_enter 1450 +io_uring_register 1451 +io_uring_setup 1449 +ioctl 1065 +ioperm +iopl +ioprio_get 1275 +ioprio_set 1274 +ipc +kcmp 1345 +kern_features +kexec_file_load +kexec_load 1268 +keyctl 1273 +kill 1053 +landlock_add_rule 1469 +landlock_create_ruleset 1468 +landlock_restrict_self 1470 +lchown 1124 +lchown32 +lgetxattr 1221 +link 1031 +linkat 1289 +listen 1193 +listxattr 1223 +llistxattr 1224 +lookup_dcookie 1237 +lremovexattr 1227 +lseek 1040 +lsetxattr 1218 +lstat 1211 +lstat64 +madvise 1209 +map_shadow_stack +mbind 1259 +membarrier 1344 +memfd_create 1340 +memfd_secret +memory_ordering +migrate_pages 1280 +mincore 1208 +mkdir 1055 +mkdirat 1282 +mknod 1037 +mknodat 1283 +mlock 1153 +mlock2 1346 +mlockall 1154 +mmap 1151 +mmap2 1172 +modify_ldt +mount 1043 +mount_setattr 1466 +move_mount 1453 +move_pages 1276 +mprotect 1155 +mq_getsetattr 1267 +mq_notify 1266 +mq_open 1262 +mq_timedreceive 1265 +mq_timedreceive_time64 +mq_timedsend 1264 +mq_timedsend_time64 +mq_unlink 1263 +mremap 1156 +msgctl 1112 +msgget 1109 +msgrcv 1111 +msgsnd 1110 +msync 1157 +multiplexer +munlock 1158 +munlockall 1159 +munmap 1152 +name_to_handle_at 1326 +nanosleep 1168 +newfstatat 1286 +nfsservctl 1169 +nice +old_adjtimex +old_getpagesize 1171 +oldfstat +oldlstat +oldolduname +oldstat +oldumount +olduname +open 1028 +open_by_handle_at 1327 +open_tree 1452 +openat 1281 +openat2 1461 +or1k_atomic +osf_adjtime +osf_afs_syscall +osf_alt_plock +osf_alt_setsid +osf_alt_sigpending +osf_asynch_daemon +osf_audcntl +osf_audgen +osf_chflags +osf_execve +osf_exportfs +osf_fchflags +osf_fdatasync +osf_fpathconf +osf_fstat +osf_fstatfs +osf_fstatfs64 +osf_fuser +osf_getaddressconf +osf_getdirentries +osf_getdomainname +osf_getfh +osf_getfsstat +osf_gethostid +osf_getitimer +osf_getlogin +osf_getmnt +osf_getrusage +osf_getsysinfo +osf_gettimeofday +osf_kloadcall +osf_kmodcall +osf_lstat +osf_memcntl +osf_mincore +osf_mount +osf_mremap +osf_msfs_syscall +osf_msleep +osf_mvalid +osf_mwakeup +osf_naccept +osf_nfssvc +osf_ngetpeername +osf_ngetsockname +osf_nrecvfrom +osf_nrecvmsg +osf_nsendmsg +osf_ntp_adjtime +osf_ntp_gettime +osf_old_creat +osf_old_fstat +osf_old_getpgrp +osf_old_killpg +osf_old_lstat +osf_old_open +osf_old_sigaction +osf_old_sigblock +osf_old_sigreturn +osf_old_sigsetmask +osf_old_sigvec +osf_old_stat +osf_old_vadvise +osf_old_vtrace +osf_old_wait +osf_oldquota +osf_pathconf +osf_pid_block +osf_pid_unblock +osf_plock +osf_priocntlset +osf_profil +osf_proplist_syscall +osf_reboot +osf_revoke +osf_sbrk +osf_security +osf_select +osf_set_program_attributes +osf_set_speculative +osf_sethostid +osf_setitimer +osf_setlogin +osf_setsysinfo +osf_settimeofday +osf_shmat +osf_signal +osf_sigprocmask +osf_sigsendset +osf_sigstack +osf_sigwaitprim +osf_sstk +osf_stat +osf_statfs +osf_statfs64 +osf_subsys_info +osf_swapctl +osf_swapon +osf_syscall +osf_sysinfo +osf_table +osf_uadmin +osf_usleep_thread +osf_uswitch +osf_utc_adjtime +osf_utc_gettime +osf_utimes +osf_utsname +osf_wait4 +osf_waitid +pause +pciconfig_iobase +pciconfig_read 1173 +pciconfig_write 1174 +perf_event_open 1352 +perfctr +personality 1140 +pidfd_getfd 1462 +pidfd_open 1458 +pidfd_send_signal 1448 +pipe 1058 +pipe2 1317 +pivot_root 1207 +pkey_alloc 1355 +pkey_free 1356 +pkey_mprotect 1354 +poll 1090 +ppoll 1295 +ppoll_time64 +prctl 1170 +pread64 1148 +preadv 1319 +preadv2 1348 +prlimit64 1325 +process_madvise 1464 +process_mrelease 1472 +process_vm_readv 1332 +process_vm_writev 1333 +pselect6 1294 +pselect6_time64 +ptrace 1048 +pwrite64 1149 +pwritev 1320 +pwritev2 1349 +query_module +quotactl 1137 +quotactl_fd 1467 +read 1026 +readahead 1216 +readdir +readlink 1092 +readlinkat 1291 +readv 1146 +reboot 1096 +recv 1200 +recvfrom 1201 +recvmmsg 1322 +recvmmsg_time64 +recvmsg 1206 +remap_file_pages 1125 +removexattr 1226 +rename 1054 +renameat 1288 +renameat2 1338 +request_key 1272 +restart_syscall 1246 +riscv_flush_icache +riscv_hwprobe +rmdir 1056 +rseq 1357 +rt_sigaction 1177 +rt_sigpending 1178 +rt_sigprocmask 1179 +rt_sigqueueinfo 1180 +rt_sigreturn 1181 +rt_sigsuspend 1182 +rt_sigtimedwait 1183 +rt_sigtimedwait_time64 +rt_tgsigqueueinfo 1321 +rtas +s390_guarded_storage +s390_pci_mmio_read +s390_pci_mmio_write +s390_runtime_instr +s390_sthyi +sched_get_affinity +sched_get_priority_max 1165 +sched_get_priority_min 1166 +sched_getaffinity 1232 +sched_getattr 1337 +sched_getparam 1160 +sched_getscheduler 1162 +sched_rr_get_interval 1167 +sched_rr_get_interval_time64 +sched_set_affinity +sched_setaffinity 1231 +sched_setattr 1336 +sched_setparam 1161 +sched_setscheduler 1163 +sched_yield 1164 +seccomp 1353 +select 1089 +semctl 1108 +semget 1106 +semop 1107 +semtimedop 1247 +semtimedop_time64 +send 1198 +sendfile 1187 +sendfile64 +sendmmsg 1331 +sendmsg 1205 +sendto 1199 +set_mempolicy 1261 +set_mempolicy_home_node 1474 +set_robust_list 1298 +set_thread_area +set_tid_address 1233 +setdomainname 1129 +setfsgid 1143 +setfsgid32 +setfsuid 1142 +setfsuid32 +setgid 1061 +setgid32 +setgroups 1078 +setgroups32 +sethae +sethostname 1083 +setitimer 1118 +setns 1330 +setpgid 1080 +setpgrp +setpriority 1102 +setregid 1072 +setregid32 +setresgid 1076 +setresgid32 +setresuid 1074 +setresuid32 +setreuid 1071 +setreuid32 +setrlimit 1084 +setsid 1081 +setsockopt 1203 +settimeofday 1088 +setuid 1045 +setuid32 +setxattr 1217 +sgetmask +shmat 1114 +shmctl 1116 +shmdt 1115 +shmget 1113 +shutdown 1202 +sigaction +sigaltstack 1176 +signal +signalfd 1307 +signalfd4 1313 +sigpending +sigprocmask +sigreturn +sigsuspend +socket 1190 +socketcall +socketpair 1197 +splice 1297 +spu_create +spu_run +ssetmask +stat 1210 +stat64 +statfs 1103 +statfs64 1258 +statx 1350 +stime +subpage_prot +swapcontext +swapoff 1095 +swapon 1094 +switch_endian +symlink 1091 +symlinkat 1290 +sync 1050 +sync_file_range 1300 +sync_file_range2 +syncfs 1329 +sys_debug_setcontext +syscall +sysfs 1139 +sysinfo 1127 +syslog 1117 +sysmips +tee 1301 +tgkill 1235 +time +timer_create 1248 +timer_delete 1252 +timer_getoverrun 1251 +timer_gettime 1250 +timer_gettime64 +timer_settime 1249 +timer_settime64 +timerfd 1308 +timerfd_create 1310 +timerfd_gettime 1312 +timerfd_gettime64 +timerfd_settime 1311 +timerfd_settime64 +times 1059 +tkill 1229 +truncate 1097 +truncate64 +ugetrlimit +umask 1067 +umount 1044 +umount2 1044 +uname 1130 +unlink 1032 +unlinkat 1287 +unshare 1296 +uselib 1093 +userfaultfd 1343 +ustat 1069 +utime +utimensat 1306 +utimensat_time64 +utimes 1036 +utrap_install +vfork +vhangup 1123 +vm86 +vm86old +vmsplice 1302 +wait4 1126 +waitid 1270 +waitpid +write 1027 +writev 1147 diff --git a/src/basic/syscalls-loongarch64.txt b/src/basic/syscalls-loongarch64.txt new file mode 100644 index 0000000..7375718 --- /dev/null +++ b/src/basic/syscalls-loongarch64.txt @@ -0,0 +1,514 @@ +_llseek +_newselect +accept 202 +accept4 242 +access +acct 89 +add_key 217 +adjtimex 171 +alarm +arc_gettls +arc_settls +arc_usr_cmpxchg +arch_prctl +arm_fadvise64_64 +atomic_barrier +atomic_cmpxchg_32 +bind 200 +bpf 280 +brk 214 +cachectl +cacheflush +capget 90 +capset 91 +chdir 49 +chmod +chown +chown32 +chroot 51 +clock_adjtime 266 +clock_adjtime64 +clock_getres 114 +clock_getres_time64 +clock_gettime 113 +clock_gettime64 +clock_nanosleep 115 +clock_nanosleep_time64 +clock_settime 112 +clock_settime64 +clone 220 +clone3 435 +close 57 +close_range 436 +connect 203 +copy_file_range 285 +creat +delete_module 106 +dipc +dup 23 +dup2 +dup3 24 +epoll_create +epoll_create1 20 +epoll_ctl 21 +epoll_ctl_old +epoll_pwait 22 +epoll_pwait2 441 +epoll_wait +epoll_wait_old +eventfd +eventfd2 19 +exec_with_loader +execv +execve 221 +execveat 281 +exit 93 +exit_group 94 +faccessat 48 +faccessat2 439 +fadvise64 223 +fadvise64_64 +fallocate 47 +fanotify_init 262 +fanotify_mark 263 +fchdir 50 +fchmod 52 +fchmodat 53 +fchmodat2 452 +fchown 55 +fchown32 +fchownat 54 +fcntl 25 +fcntl64 +fdatasync 83 +fgetxattr 10 +finit_module 273 +flistxattr 13 +flock 32 +fork +fremovexattr 16 +fsconfig 431 +fsetxattr 7 +fsmount 432 +fsopen 430 +fspick 433 +fstat +fstat64 +fstatat64 +fstatfs 44 +fstatfs64 +fsync 82 +ftruncate 46 +ftruncate64 +futex 98 +futex_requeue 456 +futex_time64 +futex_wait 455 +futex_waitv 449 +futex_wake 454 +futimesat +get_mempolicy 236 +get_robust_list 100 +get_thread_area +getcpu 168 +getcwd 17 +getdents +getdents64 61 +getdomainname +getdtablesize +getegid 177 +getegid32 +geteuid 175 +geteuid32 +getgid 176 +getgid32 +getgroups 158 +getgroups32 +gethostname +getitimer 102 +getpagesize +getpeername 205 +getpgid 155 +getpgrp +getpid 172 +getppid 173 +getpriority 141 +getrandom 278 +getresgid 150 +getresgid32 +getresuid 148 +getresuid32 +getrlimit +getrusage 165 +getsid 156 +getsockname 204 +getsockopt 209 +gettid 178 +gettimeofday 169 +getuid 174 +getuid32 +getxattr 8 +getxgid +getxpid +getxuid +init_module 105 +inotify_add_watch 27 +inotify_init +inotify_init1 26 +inotify_rm_watch 28 +io_cancel 3 +io_destroy 1 +io_getevents 4 +io_pgetevents 292 +io_pgetevents_time64 +io_setup 0 +io_submit 2 +io_uring_enter 426 +io_uring_register 427 +io_uring_setup 425 +ioctl 29 +ioperm +iopl +ioprio_get 31 +ioprio_set 30 +ipc +kcmp 272 +kern_features +kexec_file_load 294 +kexec_load 104 +keyctl 219 +kill 129 +landlock_add_rule 445 +landlock_create_ruleset 444 +landlock_restrict_self 446 +lchown +lchown32 +lgetxattr 9 +link +linkat 37 +listen 201 +listxattr 11 +llistxattr 12 +lookup_dcookie 18 +lremovexattr 15 +lseek 62 +lsetxattr 6 +lstat +lstat64 +madvise 233 +map_shadow_stack 453 +mbind 235 +membarrier 283 +memfd_create 279 +memfd_secret +memory_ordering +migrate_pages 238 +mincore 232 +mkdir +mkdirat 34 +mknod +mknodat 33 +mlock 228 +mlock2 284 +mlockall 230 +mmap 222 +mmap2 +modify_ldt +mount 40 +mount_setattr 442 +move_mount 429 +move_pages 239 +mprotect 226 +mq_getsetattr 185 +mq_notify 184 +mq_open 180 +mq_timedreceive 183 +mq_timedreceive_time64 +mq_timedsend 182 +mq_timedsend_time64 +mq_unlink 181 +mremap 216 +msgctl 187 +msgget 186 +msgrcv 188 +msgsnd 189 +msync 227 +multiplexer +munlock 229 +munlockall 231 +munmap 215 +name_to_handle_at 264 +nanosleep 101 +newfstatat +nice +old_adjtimex +oldfstat +oldlstat +oldolduname +oldstat +oldumount +olduname +open +open_by_handle_at 265 +open_tree 428 +openat 56 +openat2 437 +or1k_atomic +osf_fstat +osf_fstatfs +osf_fstatfs64 +osf_getdirentries +osf_getdomainname +osf_getitimer +osf_getrusage +osf_getsysinfo +osf_gettimeofday +osf_lstat +osf_mount +osf_proplist_syscall +osf_select +osf_set_program_attributes +osf_setitimer +osf_setsysinfo +osf_settimeofday +osf_shmat +osf_sigprocmask +osf_sigstack +osf_stat +osf_statfs +osf_statfs64 +osf_swapon +osf_syscall +osf_sysinfo +osf_usleep_thread +osf_utimes +osf_utsname +osf_wait4 +pause +pciconfig_iobase +pciconfig_read +pciconfig_write +perf_event_open 241 +perfctr +personality 92 +pidfd_getfd 438 +pidfd_open 434 +pidfd_send_signal 424 +pipe +pipe2 59 +pivot_root 41 +pkey_alloc 289 +pkey_free 290 +pkey_mprotect 288 +poll +ppoll 73 +ppoll_time64 +prctl 167 +pread64 67 +preadv 69 +preadv2 286 +prlimit64 261 +process_madvise 440 +process_mrelease 448 +process_vm_readv 270 +process_vm_writev 271 +pselect6 72 +pselect6_time64 +ptrace 117 +pwrite64 68 +pwritev 70 +pwritev2 287 +quotactl 60 +quotactl_fd 443 +read 63 +readahead 213 +readdir +readlink +readlinkat 78 +readv 65 +reboot 142 +recv +recvfrom 207 +recvmmsg 243 +recvmmsg_time64 +recvmsg 212 +remap_file_pages 234 +removexattr 14 +rename +renameat +renameat2 276 +request_key 218 +restart_syscall 128 +riscv_flush_icache +riscv_hwprobe +rmdir +rseq 293 +rt_sigaction 134 +rt_sigpending 136 +rt_sigprocmask 135 +rt_sigqueueinfo 138 +rt_sigreturn 139 +rt_sigsuspend 133 +rt_sigtimedwait 137 +rt_sigtimedwait_time64 +rt_tgsigqueueinfo 240 +rtas +s390_guarded_storage +s390_pci_mmio_read +s390_pci_mmio_write +s390_runtime_instr +s390_sthyi +sched_get_affinity +sched_get_priority_max 125 +sched_get_priority_min 126 +sched_getaffinity 123 +sched_getattr 275 +sched_getparam 121 +sched_getscheduler 120 +sched_rr_get_interval 127 +sched_rr_get_interval_time64 +sched_set_affinity +sched_setaffinity 122 +sched_setattr 274 +sched_setparam 118 +sched_setscheduler 119 +sched_yield 124 +seccomp 277 +select +semctl 191 +semget 190 +semop 193 +semtimedop 192 +semtimedop_time64 +send +sendfile 71 +sendfile64 +sendmmsg 269 +sendmsg 211 +sendto 206 +set_mempolicy 237 +set_mempolicy_home_node 450 +set_robust_list 99 +set_thread_area +set_tid_address 96 +setdomainname 162 +setfsgid 152 +setfsgid32 +setfsuid 151 +setfsuid32 +setgid 144 +setgid32 +setgroups 159 +setgroups32 +sethae +sethostname 161 +setitimer 103 +setns 268 +setpgid 154 +setpgrp +setpriority 140 +setregid 143 +setregid32 +setresgid 149 +setresgid32 +setresuid 147 +setresuid32 +setreuid 145 +setreuid32 +setrlimit +setsid 157 +setsockopt 208 +settimeofday 170 +setuid 146 +setuid32 +setxattr 5 +sgetmask +shmat 196 +shmctl 195 +shmdt 197 +shmget 194 +shutdown 210 +sigaction +sigaltstack 132 +signal +signalfd +signalfd4 74 +sigpending +sigprocmask +sigreturn +sigsuspend +socket 198 +socketcall +socketpair 199 +splice 76 +spu_create +spu_run +ssetmask +stat +stat64 +statfs 43 +statfs64 +statx 291 +stime +subpage_prot +swapcontext +swapoff 225 +swapon 224 +switch_endian +symlink +symlinkat 36 +sync 81 +sync_file_range 84 +sync_file_range2 +syncfs 267 +sys_debug_setcontext +syscall +sysfs +sysinfo 179 +syslog 116 +sysmips +tee 77 +tgkill 131 +time +timer_create 107 +timer_delete 111 +timer_getoverrun 109 +timer_gettime 108 +timer_gettime64 +timer_settime 110 +timer_settime64 +timerfd +timerfd_create 85 +timerfd_gettime 87 +timerfd_gettime64 +timerfd_settime 86 +timerfd_settime64 +times 153 +tkill 130 +truncate 45 +truncate64 +ugetrlimit +umask 166 +umount +umount2 39 +uname 160 +unlink +unlinkat 35 +unshare 97 +userfaultfd 282 +ustat +utime +utimensat 88 +utimensat_time64 +utimes +utrap_install +vfork +vhangup 58 +vm86 +vm86old +vmsplice 75 +wait4 260 +waitid 95 +waitpid +write 64 +writev 66 diff --git a/src/basic/syscalls-m68k.txt b/src/basic/syscalls-m68k.txt new file mode 100644 index 0000000..b2eb156 --- /dev/null +++ b/src/basic/syscalls-m68k.txt @@ -0,0 +1,514 @@ +_llseek 140 +_newselect 142 +accept +accept4 361 +access 33 +acct 51 +add_key 279 +adjtimex 124 +alarm 27 +arc_gettls +arc_settls +arc_usr_cmpxchg +arch_prctl +arm_fadvise64_64 +atomic_barrier 336 +atomic_cmpxchg_32 335 +bind 358 +bpf 354 +brk 45 +cachectl +cacheflush 123 +capget 184 +capset 185 +chdir 12 +chmod 15 +chown 16 +chown32 198 +chroot 61 +clock_adjtime 342 +clock_adjtime64 405 +clock_getres 261 +clock_getres_time64 406 +clock_gettime 260 +clock_gettime64 403 +clock_nanosleep 262 +clock_nanosleep_time64 407 +clock_settime 259 +clock_settime64 404 +clone 120 +clone3 435 +close 6 +close_range 436 +connect 359 +copy_file_range 376 +creat 8 +delete_module 129 +dipc +dup 41 +dup2 63 +dup3 326 +epoll_create 249 +epoll_create1 325 +epoll_ctl 250 +epoll_ctl_old +epoll_pwait 315 +epoll_pwait2 441 +epoll_wait 251 +epoll_wait_old +eventfd 319 +eventfd2 324 +exec_with_loader +execv +execve 11 +execveat 355 +exit 1 +exit_group 247 +faccessat 300 +faccessat2 439 +fadvise64 246 +fadvise64_64 267 +fallocate 320 +fanotify_init 337 +fanotify_mark 338 +fchdir 133 +fchmod 94 +fchmodat 299 +fchmodat2 452 +fchown 95 +fchown32 207 +fchownat 291 +fcntl 55 +fcntl64 239 +fdatasync 148 +fgetxattr 228 +finit_module 348 +flistxattr 231 +flock 143 +fork 2 +fremovexattr 234 +fsconfig 431 +fsetxattr 225 +fsmount 432 +fsopen 430 +fspick 433 +fstat 108 +fstat64 197 +fstatat64 293 +fstatfs 100 +fstatfs64 264 +fsync 118 +ftruncate 93 +ftruncate64 194 +futex 235 +futex_requeue 456 +futex_time64 422 +futex_wait 455 +futex_waitv 449 +futex_wake 454 +futimesat 292 +get_mempolicy 269 +get_robust_list 305 +get_thread_area 333 +getcpu 314 +getcwd 183 +getdents 141 +getdents64 220 +getdomainname +getdtablesize +getegid 50 +getegid32 202 +geteuid 49 +geteuid32 201 +getgid 47 +getgid32 200 +getgroups 80 +getgroups32 205 +gethostname +getitimer 105 +getpagesize 166 +getpeername 365 +getpgid 132 +getpgrp 65 +getpid 20 +getppid 64 +getpriority 96 +getrandom 352 +getresgid 171 +getresgid32 211 +getresuid 165 +getresuid32 209 +getrlimit 76 +getrusage 77 +getsid 147 +getsockname 364 +getsockopt 362 +gettid 221 +gettimeofday 78 +getuid 24 +getuid32 199 +getxattr 226 +getxgid +getxpid +getxuid +init_module 128 +inotify_add_watch 285 +inotify_init 284 +inotify_init1 328 +inotify_rm_watch 286 +io_cancel 245 +io_destroy 242 +io_getevents 243 +io_pgetevents +io_pgetevents_time64 416 +io_setup 241 +io_submit 244 +io_uring_enter 426 +io_uring_register 427 +io_uring_setup 425 +ioctl 54 +ioperm +iopl +ioprio_get 283 +ioprio_set 282 +ipc 117 +kcmp 347 +kern_features +kexec_file_load +kexec_load 313 +keyctl 281 +kill 37 +landlock_add_rule 445 +landlock_create_ruleset 444 +landlock_restrict_self 446 +lchown 182 +lchown32 212 +lgetxattr 227 +link 9 +linkat 296 +listen 360 +listxattr 229 +llistxattr 230 +lookup_dcookie 248 +lremovexattr 233 +lseek 19 +lsetxattr 224 +lstat 107 +lstat64 196 +madvise 238 +map_shadow_stack 453 +mbind 268 +membarrier 374 +memfd_create 353 +memfd_secret +memory_ordering +migrate_pages 287 +mincore 237 +mkdir 39 +mkdirat 289 +mknod 14 +mknodat 290 +mlock 150 +mlock2 375 +mlockall 152 +mmap 90 +mmap2 192 +modify_ldt +mount 21 +mount_setattr 442 +move_mount 429 +move_pages 310 +mprotect 125 +mq_getsetattr 276 +mq_notify 275 +mq_open 271 +mq_timedreceive 274 +mq_timedreceive_time64 419 +mq_timedsend 273 +mq_timedsend_time64 418 +mq_unlink 272 +mremap 163 +msgctl 402 +msgget 399 +msgrcv 401 +msgsnd 400 +msync 144 +multiplexer +munlock 151 +munlockall 153 +munmap 91 +name_to_handle_at 340 +nanosleep 162 +newfstatat +nice 34 +old_adjtimex +oldfstat 28 +oldlstat 84 +oldolduname +oldstat 18 +oldumount +olduname +open 5 +open_by_handle_at 341 +open_tree 428 +openat 288 +openat2 437 +or1k_atomic +osf_fstat +osf_fstatfs +osf_fstatfs64 +osf_getdirentries +osf_getdomainname +osf_getitimer +osf_getrusage +osf_getsysinfo +osf_gettimeofday +osf_lstat +osf_mount +osf_proplist_syscall +osf_select +osf_set_program_attributes +osf_setitimer +osf_setsysinfo +osf_settimeofday +osf_shmat +osf_sigprocmask +osf_sigstack +osf_stat +osf_statfs +osf_statfs64 +osf_swapon +osf_syscall +osf_sysinfo +osf_usleep_thread +osf_utimes +osf_utsname +osf_wait4 +pause 29 +pciconfig_iobase +pciconfig_read +pciconfig_write +perf_event_open 332 +perfctr +personality 136 +pidfd_getfd 438 +pidfd_open 434 +pidfd_send_signal 424 +pipe 42 +pipe2 327 +pivot_root 217 +pkey_alloc 382 +pkey_free 383 +pkey_mprotect 381 +poll 168 +ppoll 302 +ppoll_time64 414 +prctl 172 +pread64 180 +preadv 329 +preadv2 377 +prlimit64 339 +process_madvise 440 +process_mrelease 448 +process_vm_readv 345 +process_vm_writev 346 +pselect6 301 +pselect6_time64 413 +ptrace 26 +pwrite64 181 +pwritev 330 +pwritev2 378 +quotactl 131 +quotactl_fd 443 +read 3 +readahead 240 +readdir 89 +readlink 85 +readlinkat 298 +readv 145 +reboot 88 +recv +recvfrom 368 +recvmmsg 371 +recvmmsg_time64 417 +recvmsg 369 +remap_file_pages 252 +removexattr 232 +rename 38 +renameat 295 +renameat2 351 +request_key 280 +restart_syscall 0 +riscv_flush_icache +riscv_hwprobe +rmdir 40 +rseq 384 +rt_sigaction 174 +rt_sigpending 176 +rt_sigprocmask 175 +rt_sigqueueinfo 178 +rt_sigreturn 173 +rt_sigsuspend 179 +rt_sigtimedwait 177 +rt_sigtimedwait_time64 421 +rt_tgsigqueueinfo 331 +rtas +s390_guarded_storage +s390_pci_mmio_read +s390_pci_mmio_write +s390_runtime_instr +s390_sthyi +sched_get_affinity +sched_get_priority_max 159 +sched_get_priority_min 160 +sched_getaffinity 312 +sched_getattr 350 +sched_getparam 155 +sched_getscheduler 157 +sched_rr_get_interval 161 +sched_rr_get_interval_time64 423 +sched_set_affinity +sched_setaffinity 311 +sched_setattr 349 +sched_setparam 154 +sched_setscheduler 156 +sched_yield 158 +seccomp 380 +select 82 +semctl 394 +semget 393 +semop +semtimedop +semtimedop_time64 420 +send +sendfile 187 +sendfile64 236 +sendmmsg 372 +sendmsg 367 +sendto 366 +set_mempolicy 270 +set_mempolicy_home_node 450 +set_robust_list 304 +set_thread_area 334 +set_tid_address 253 +setdomainname 121 +setfsgid 139 +setfsgid32 216 +setfsuid 138 +setfsuid32 215 +setgid 46 +setgid32 214 +setgroups 81 +setgroups32 206 +sethae +sethostname 74 +setitimer 104 +setns 344 +setpgid 57 +setpgrp +setpriority 97 +setregid 71 +setregid32 204 +setresgid 170 +setresgid32 210 +setresuid 164 +setresuid32 208 +setreuid 70 +setreuid32 203 +setrlimit 75 +setsid 66 +setsockopt 363 +settimeofday 79 +setuid 23 +setuid32 213 +setxattr 223 +sgetmask 68 +shmat 397 +shmctl 396 +shmdt 398 +shmget 395 +shutdown 370 +sigaction 67 +sigaltstack 186 +signal 48 +signalfd 317 +signalfd4 323 +sigpending 73 +sigprocmask 126 +sigreturn 119 +sigsuspend 72 +socket 356 +socketcall 102 +socketpair 357 +splice 306 +spu_create +spu_run +ssetmask 69 +stat 106 +stat64 195 +statfs 99 +statfs64 263 +statx 379 +stime 25 +subpage_prot +swapcontext +swapoff 115 +swapon 87 +switch_endian +symlink 83 +symlinkat 297 +sync 36 +sync_file_range 307 +sync_file_range2 +syncfs 343 +sys_debug_setcontext +syscall +sysfs 135 +sysinfo 116 +syslog 103 +sysmips +tee 308 +tgkill 265 +time 13 +timer_create 254 +timer_delete 258 +timer_getoverrun 257 +timer_gettime 256 +timer_gettime64 408 +timer_settime 255 +timer_settime64 409 +timerfd +timerfd_create 318 +timerfd_gettime 322 +timerfd_gettime64 410 +timerfd_settime 321 +timerfd_settime64 411 +times 43 +tkill 222 +truncate 92 +truncate64 193 +ugetrlimit 191 +umask 60 +umount 22 +umount2 52 +uname 122 +unlink 10 +unlinkat 294 +unshare 303 +userfaultfd 373 +ustat 62 +utime 30 +utimensat 316 +utimensat_time64 412 +utimes 266 +utrap_install +vfork 190 +vhangup 111 +vm86 +vm86old +vmsplice 309 +wait4 114 +waitid 277 +waitpid 7 +write 4 +writev 146 diff --git a/src/basic/syscalls-mips64.txt b/src/basic/syscalls-mips64.txt new file mode 100644 index 0000000..280ffa7 --- /dev/null +++ b/src/basic/syscalls-mips64.txt @@ -0,0 +1,514 @@ +_llseek +_newselect 5022 +accept 5042 +accept4 5293 +access 5020 +acct 5158 +add_key 5239 +adjtimex 5154 +alarm 5037 +arc_gettls +arc_settls +arc_usr_cmpxchg +arch_prctl +arm_fadvise64_64 +atomic_barrier +atomic_cmpxchg_32 +bind 5048 +bpf 5315 +brk 5012 +cachectl 5198 +cacheflush 5197 +capget 5123 +capset 5124 +chdir 5078 +chmod 5088 +chown 5090 +chown32 +chroot 5156 +clock_adjtime 5300 +clock_adjtime64 +clock_getres 5223 +clock_getres_time64 +clock_gettime 5222 +clock_gettime64 +clock_nanosleep 5224 +clock_nanosleep_time64 +clock_settime 5221 +clock_settime64 +clone 5055 +clone3 5435 +close 5003 +close_range 5436 +connect 5041 +copy_file_range 5320 +creat 5083 +delete_module 5169 +dipc +dup 5031 +dup2 5032 +dup3 5286 +epoll_create 5207 +epoll_create1 5285 +epoll_ctl 5208 +epoll_ctl_old +epoll_pwait 5272 +epoll_pwait2 5441 +epoll_wait 5209 +epoll_wait_old +eventfd 5278 +eventfd2 5284 +exec_with_loader +execv +execve 5057 +execveat 5316 +exit 5058 +exit_group 5205 +faccessat 5259 +faccessat2 5439 +fadvise64 5215 +fadvise64_64 +fallocate 5279 +fanotify_init 5295 +fanotify_mark 5296 +fchdir 5079 +fchmod 5089 +fchmodat 5258 +fchmodat2 5452 +fchown 5091 +fchown32 +fchownat 5250 +fcntl 5070 +fcntl64 +fdatasync 5073 +fgetxattr 5185 +finit_module 5307 +flistxattr 5188 +flock 5071 +fork 5056 +fremovexattr 5191 +fsconfig 5431 +fsetxattr 5182 +fsmount 5432 +fsopen 5430 +fspick 5433 +fstat 5005 +fstat64 +fstatat64 +fstatfs 5135 +fstatfs64 +fsync 5072 +ftruncate 5075 +ftruncate64 +futex 5194 +futex_requeue 5456 +futex_time64 +futex_wait 5455 +futex_waitv 5449 +futex_wake 5454 +futimesat 5251 +get_mempolicy 5228 +get_robust_list 5269 +get_thread_area +getcpu 5271 +getcwd 5077 +getdents 5076 +getdents64 5308 +getdomainname +getdtablesize +getegid 5106 +getegid32 +geteuid 5105 +geteuid32 +getgid 5102 +getgid32 +getgroups 5113 +getgroups32 +gethostname +getitimer 5035 +getpagesize +getpeername 5051 +getpgid 5119 +getpgrp 5109 +getpid 5038 +getppid 5108 +getpriority 5137 +getrandom 5313 +getresgid 5118 +getresgid32 +getresuid 5116 +getresuid32 +getrlimit 5095 +getrusage 5096 +getsid 5122 +getsockname 5050 +getsockopt 5054 +gettid 5178 +gettimeofday 5094 +getuid 5100 +getuid32 +getxattr 5183 +getxgid +getxpid +getxuid +init_module 5168 +inotify_add_watch 5244 +inotify_init 5243 +inotify_init1 5288 +inotify_rm_watch 5245 +io_cancel 5204 +io_destroy 5201 +io_getevents 5202 +io_pgetevents 5328 +io_pgetevents_time64 +io_setup 5200 +io_submit 5203 +io_uring_enter 5426 +io_uring_register 5427 +io_uring_setup 5425 +ioctl 5015 +ioperm +iopl +ioprio_get 5274 +ioprio_set 5273 +ipc +kcmp 5306 +kern_features +kexec_file_load +kexec_load 5270 +keyctl 5241 +kill 5060 +landlock_add_rule 5445 +landlock_create_ruleset 5444 +landlock_restrict_self 5446 +lchown 5092 +lchown32 +lgetxattr 5184 +link 5084 +linkat 5255 +listen 5049 +listxattr 5186 +llistxattr 5187 +lookup_dcookie 5206 +lremovexattr 5190 +lseek 5008 +lsetxattr 5181 +lstat 5006 +lstat64 +madvise 5027 +map_shadow_stack 5453 +mbind 5227 +membarrier 5318 +memfd_create 5314 +memfd_secret +memory_ordering +migrate_pages 5246 +mincore 5026 +mkdir 5081 +mkdirat 5248 +mknod 5131 +mknodat 5249 +mlock 5146 +mlock2 5319 +mlockall 5148 +mmap 5009 +mmap2 +modify_ldt +mount 5160 +mount_setattr 5442 +move_mount 5429 +move_pages 5267 +mprotect 5010 +mq_getsetattr 5235 +mq_notify 5234 +mq_open 5230 +mq_timedreceive 5233 +mq_timedreceive_time64 +mq_timedsend 5232 +mq_timedsend_time64 +mq_unlink 5231 +mremap 5024 +msgctl 5069 +msgget 5066 +msgrcv 5068 +msgsnd 5067 +msync 5025 +multiplexer +munlock 5147 +munlockall 5149 +munmap 5011 +name_to_handle_at 5298 +nanosleep 5034 +newfstatat 5252 +nice +old_adjtimex +oldfstat +oldlstat +oldolduname +oldstat +oldumount +olduname +open 5002 +open_by_handle_at 5299 +open_tree 5428 +openat 5247 +openat2 5437 +or1k_atomic +osf_fstat +osf_fstatfs +osf_fstatfs64 +osf_getdirentries +osf_getdomainname +osf_getitimer +osf_getrusage +osf_getsysinfo +osf_gettimeofday +osf_lstat +osf_mount +osf_proplist_syscall +osf_select +osf_set_program_attributes +osf_setitimer +osf_setsysinfo +osf_settimeofday +osf_shmat +osf_sigprocmask +osf_sigstack +osf_stat +osf_statfs +osf_statfs64 +osf_swapon +osf_syscall +osf_sysinfo +osf_usleep_thread +osf_utimes +osf_utsname +osf_wait4 +pause 5033 +pciconfig_iobase +pciconfig_read +pciconfig_write +perf_event_open 5292 +perfctr +personality 5132 +pidfd_getfd 5438 +pidfd_open 5434 +pidfd_send_signal 5424 +pipe 5021 +pipe2 5287 +pivot_root 5151 +pkey_alloc 5324 +pkey_free 5325 +pkey_mprotect 5323 +poll 5007 +ppoll 5261 +ppoll_time64 +prctl 5153 +pread64 5016 +preadv 5289 +preadv2 5321 +prlimit64 5297 +process_madvise 5440 +process_mrelease 5448 +process_vm_readv 5304 +process_vm_writev 5305 +pselect6 5260 +pselect6_time64 +ptrace 5099 +pwrite64 5017 +pwritev 5290 +pwritev2 5322 +quotactl 5172 +quotactl_fd 5443 +read 5000 +readahead 5179 +readdir +readlink 5087 +readlinkat 5257 +readv 5018 +reboot 5164 +recv +recvfrom 5044 +recvmmsg 5294 +recvmmsg_time64 +recvmsg 5046 +remap_file_pages 5210 +removexattr 5189 +rename 5080 +renameat 5254 +renameat2 5311 +request_key 5240 +restart_syscall 5213 +riscv_flush_icache +riscv_hwprobe +rmdir 5082 +rseq 5327 +rt_sigaction 5013 +rt_sigpending 5125 +rt_sigprocmask 5014 +rt_sigqueueinfo 5127 +rt_sigreturn 5211 +rt_sigsuspend 5128 +rt_sigtimedwait 5126 +rt_sigtimedwait_time64 +rt_tgsigqueueinfo 5291 +rtas +s390_guarded_storage +s390_pci_mmio_read +s390_pci_mmio_write +s390_runtime_instr +s390_sthyi +sched_get_affinity +sched_get_priority_max 5143 +sched_get_priority_min 5144 +sched_getaffinity 5196 +sched_getattr 5310 +sched_getparam 5140 +sched_getscheduler 5142 +sched_rr_get_interval 5145 +sched_rr_get_interval_time64 +sched_set_affinity +sched_setaffinity 5195 +sched_setattr 5309 +sched_setparam 5139 +sched_setscheduler 5141 +sched_yield 5023 +seccomp 5312 +select +semctl 5064 +semget 5062 +semop 5063 +semtimedop 5214 +semtimedop_time64 +send +sendfile 5039 +sendfile64 +sendmmsg 5302 +sendmsg 5045 +sendto 5043 +set_mempolicy 5229 +set_mempolicy_home_node 5450 +set_robust_list 5268 +set_thread_area 5242 +set_tid_address 5212 +setdomainname 5166 +setfsgid 5121 +setfsgid32 +setfsuid 5120 +setfsuid32 +setgid 5104 +setgid32 +setgroups 5114 +setgroups32 +sethae +sethostname 5165 +setitimer 5036 +setns 5303 +setpgid 5107 +setpgrp +setpriority 5138 +setregid 5112 +setregid32 +setresgid 5117 +setresgid32 +setresuid 5115 +setresuid32 +setreuid 5111 +setreuid32 +setrlimit 5155 +setsid 5110 +setsockopt 5053 +settimeofday 5159 +setuid 5103 +setuid32 +setxattr 5180 +sgetmask +shmat 5029 +shmctl 5030 +shmdt 5065 +shmget 5028 +shutdown 5047 +sigaction +sigaltstack 5129 +signal +signalfd 5276 +signalfd4 5283 +sigpending +sigprocmask +sigreturn +sigsuspend +socket 5040 +socketcall +socketpair 5052 +splice 5263 +spu_create +spu_run +ssetmask +stat 5004 +stat64 +statfs 5134 +statfs64 +statx 5326 +stime +subpage_prot +swapcontext +swapoff 5163 +swapon 5162 +switch_endian +symlink 5086 +symlinkat 5256 +sync 5157 +sync_file_range 5264 +sync_file_range2 +syncfs 5301 +sys_debug_setcontext +syscall +sysfs 5136 +sysinfo 5097 +syslog 5101 +sysmips 5199 +tee 5265 +tgkill 5225 +time +timer_create 5216 +timer_delete 5220 +timer_getoverrun 5219 +timer_gettime 5218 +timer_gettime64 +timer_settime 5217 +timer_settime64 +timerfd 5277 +timerfd_create 5280 +timerfd_gettime 5281 +timerfd_gettime64 +timerfd_settime 5282 +timerfd_settime64 +times 5098 +tkill 5192 +truncate 5074 +truncate64 +ugetrlimit +umask 5093 +umount +umount2 5161 +uname 5061 +unlink 5085 +unlinkat 5253 +unshare 5262 +userfaultfd 5317 +ustat 5133 +utime 5130 +utimensat 5275 +utimensat_time64 +utimes 5226 +utrap_install +vfork +vhangup 5150 +vm86 +vm86old +vmsplice 5266 +wait4 5059 +waitid 5237 +waitpid +write 5001 +writev 5019 diff --git a/src/basic/syscalls-mips64n32.txt b/src/basic/syscalls-mips64n32.txt new file mode 100644 index 0000000..5ce5fea --- /dev/null +++ b/src/basic/syscalls-mips64n32.txt @@ -0,0 +1,514 @@ +_llseek +_newselect 6022 +accept 6042 +accept4 6297 +access 6020 +acct 6158 +add_key 6243 +adjtimex 6154 +alarm 6037 +arc_gettls +arc_settls +arc_usr_cmpxchg +arch_prctl +arm_fadvise64_64 +atomic_barrier +atomic_cmpxchg_32 +bind 6048 +bpf 6319 +brk 6012 +cachectl 6198 +cacheflush 6197 +capget 6123 +capset 6124 +chdir 6078 +chmod 6088 +chown 6090 +chown32 +chroot 6156 +clock_adjtime 6305 +clock_adjtime64 6405 +clock_getres 6227 +clock_getres_time64 6406 +clock_gettime 6226 +clock_gettime64 6403 +clock_nanosleep 6228 +clock_nanosleep_time64 6407 +clock_settime 6225 +clock_settime64 6404 +clone 6055 +clone3 6435 +close 6003 +close_range 6436 +connect 6041 +copy_file_range 6324 +creat 6083 +delete_module 6169 +dipc +dup 6031 +dup2 6032 +dup3 6290 +epoll_create 6207 +epoll_create1 6289 +epoll_ctl 6208 +epoll_ctl_old +epoll_pwait 6276 +epoll_pwait2 6441 +epoll_wait 6209 +epoll_wait_old +eventfd 6282 +eventfd2 6288 +exec_with_loader +execv +execve 6057 +execveat 6320 +exit 6058 +exit_group 6205 +faccessat 6263 +faccessat2 6439 +fadvise64 6216 +fadvise64_64 +fallocate 6283 +fanotify_init 6300 +fanotify_mark 6301 +fchdir 6079 +fchmod 6089 +fchmodat 6262 +fchmodat2 6452 +fchown 6091 +fchown32 +fchownat 6254 +fcntl 6070 +fcntl64 6212 +fdatasync 6073 +fgetxattr 6185 +finit_module 6312 +flistxattr 6188 +flock 6071 +fork 6056 +fremovexattr 6191 +fsconfig 6431 +fsetxattr 6182 +fsmount 6432 +fsopen 6430 +fspick 6433 +fstat 6005 +fstat64 +fstatat64 +fstatfs 6135 +fstatfs64 6218 +fsync 6072 +ftruncate 6075 +ftruncate64 +futex 6194 +futex_requeue 6456 +futex_time64 6422 +futex_wait 6455 +futex_waitv 6449 +futex_wake 6454 +futimesat 6255 +get_mempolicy 6232 +get_robust_list 6273 +get_thread_area +getcpu 6275 +getcwd 6077 +getdents 6076 +getdents64 6299 +getdomainname +getdtablesize +getegid 6106 +getegid32 +geteuid 6105 +geteuid32 +getgid 6102 +getgid32 +getgroups 6113 +getgroups32 +gethostname +getitimer 6035 +getpagesize +getpeername 6051 +getpgid 6119 +getpgrp 6109 +getpid 6038 +getppid 6108 +getpriority 6137 +getrandom 6317 +getresgid 6118 +getresgid32 +getresuid 6116 +getresuid32 +getrlimit 6095 +getrusage 6096 +getsid 6122 +getsockname 6050 +getsockopt 6054 +gettid 6178 +gettimeofday 6094 +getuid 6100 +getuid32 +getxattr 6183 +getxgid +getxpid +getxuid +init_module 6168 +inotify_add_watch 6248 +inotify_init 6247 +inotify_init1 6292 +inotify_rm_watch 6249 +io_cancel 6204 +io_destroy 6201 +io_getevents 6202 +io_pgetevents 6332 +io_pgetevents_time64 6416 +io_setup 6200 +io_submit 6203 +io_uring_enter 6426 +io_uring_register 6427 +io_uring_setup 6425 +ioctl 6015 +ioperm +iopl +ioprio_get 6278 +ioprio_set 6277 +ipc +kcmp 6311 +kern_features +kexec_file_load +kexec_load 6274 +keyctl 6245 +kill 6060 +landlock_add_rule 6445 +landlock_create_ruleset 6444 +landlock_restrict_self 6446 +lchown 6092 +lchown32 +lgetxattr 6184 +link 6084 +linkat 6259 +listen 6049 +listxattr 6186 +llistxattr 6187 +lookup_dcookie 6206 +lremovexattr 6190 +lseek 6008 +lsetxattr 6181 +lstat 6006 +lstat64 +madvise 6027 +map_shadow_stack 6453 +mbind 6231 +membarrier 6322 +memfd_create 6318 +memfd_secret +memory_ordering +migrate_pages 6250 +mincore 6026 +mkdir 6081 +mkdirat 6252 +mknod 6131 +mknodat 6253 +mlock 6146 +mlock2 6323 +mlockall 6148 +mmap 6009 +mmap2 +modify_ldt +mount 6160 +mount_setattr 6442 +move_mount 6429 +move_pages 6271 +mprotect 6010 +mq_getsetattr 6239 +mq_notify 6238 +mq_open 6234 +mq_timedreceive 6237 +mq_timedreceive_time64 6419 +mq_timedsend 6236 +mq_timedsend_time64 6418 +mq_unlink 6235 +mremap 6024 +msgctl 6069 +msgget 6066 +msgrcv 6068 +msgsnd 6067 +msync 6025 +multiplexer +munlock 6147 +munlockall 6149 +munmap 6011 +name_to_handle_at 6303 +nanosleep 6034 +newfstatat 6256 +nice +old_adjtimex +oldfstat +oldlstat +oldolduname +oldstat +oldumount +olduname +open 6002 +open_by_handle_at 6304 +open_tree 6428 +openat 6251 +openat2 6437 +or1k_atomic +osf_fstat +osf_fstatfs +osf_fstatfs64 +osf_getdirentries +osf_getdomainname +osf_getitimer +osf_getrusage +osf_getsysinfo +osf_gettimeofday +osf_lstat +osf_mount +osf_proplist_syscall +osf_select +osf_set_program_attributes +osf_setitimer +osf_setsysinfo +osf_settimeofday +osf_shmat +osf_sigprocmask +osf_sigstack +osf_stat +osf_statfs +osf_statfs64 +osf_swapon +osf_syscall +osf_sysinfo +osf_usleep_thread +osf_utimes +osf_utsname +osf_wait4 +pause 6033 +pciconfig_iobase +pciconfig_read +pciconfig_write +perf_event_open 6296 +perfctr +personality 6132 +pidfd_getfd 6438 +pidfd_open 6434 +pidfd_send_signal 6424 +pipe 6021 +pipe2 6291 +pivot_root 6151 +pkey_alloc 6328 +pkey_free 6329 +pkey_mprotect 6327 +poll 6007 +ppoll 6265 +ppoll_time64 6414 +prctl 6153 +pread64 6016 +preadv 6293 +preadv2 6325 +prlimit64 6302 +process_madvise 6440 +process_mrelease 6448 +process_vm_readv 6309 +process_vm_writev 6310 +pselect6 6264 +pselect6_time64 6413 +ptrace 6099 +pwrite64 6017 +pwritev 6294 +pwritev2 6326 +quotactl 6172 +quotactl_fd 6443 +read 6000 +readahead 6179 +readdir +readlink 6087 +readlinkat 6261 +readv 6018 +reboot 6164 +recv +recvfrom 6044 +recvmmsg 6298 +recvmmsg_time64 6417 +recvmsg 6046 +remap_file_pages 6210 +removexattr 6189 +rename 6080 +renameat 6258 +renameat2 6315 +request_key 6244 +restart_syscall 6214 +riscv_flush_icache +riscv_hwprobe +rmdir 6082 +rseq 6331 +rt_sigaction 6013 +rt_sigpending 6125 +rt_sigprocmask 6014 +rt_sigqueueinfo 6127 +rt_sigreturn 6211 +rt_sigsuspend 6128 +rt_sigtimedwait 6126 +rt_sigtimedwait_time64 6421 +rt_tgsigqueueinfo 6295 +rtas +s390_guarded_storage +s390_pci_mmio_read +s390_pci_mmio_write +s390_runtime_instr +s390_sthyi +sched_get_affinity +sched_get_priority_max 6143 +sched_get_priority_min 6144 +sched_getaffinity 6196 +sched_getattr 6314 +sched_getparam 6140 +sched_getscheduler 6142 +sched_rr_get_interval 6145 +sched_rr_get_interval_time64 6423 +sched_set_affinity +sched_setaffinity 6195 +sched_setattr 6313 +sched_setparam 6139 +sched_setscheduler 6141 +sched_yield 6023 +seccomp 6316 +select +semctl 6064 +semget 6062 +semop 6063 +semtimedop 6215 +semtimedop_time64 6420 +send +sendfile 6039 +sendfile64 6219 +sendmmsg 6307 +sendmsg 6045 +sendto 6043 +set_mempolicy 6233 +set_mempolicy_home_node 6450 +set_robust_list 6272 +set_thread_area 6246 +set_tid_address 6213 +setdomainname 6166 +setfsgid 6121 +setfsgid32 +setfsuid 6120 +setfsuid32 +setgid 6104 +setgid32 +setgroups 6114 +setgroups32 +sethae +sethostname 6165 +setitimer 6036 +setns 6308 +setpgid 6107 +setpgrp +setpriority 6138 +setregid 6112 +setregid32 +setresgid 6117 +setresgid32 +setresuid 6115 +setresuid32 +setreuid 6111 +setreuid32 +setrlimit 6155 +setsid 6110 +setsockopt 6053 +settimeofday 6159 +setuid 6103 +setuid32 +setxattr 6180 +sgetmask +shmat 6029 +shmctl 6030 +shmdt 6065 +shmget 6028 +shutdown 6047 +sigaction +sigaltstack 6129 +signal +signalfd 6280 +signalfd4 6287 +sigpending +sigprocmask +sigreturn +sigsuspend +socket 6040 +socketcall +socketpair 6052 +splice 6267 +spu_create +spu_run +ssetmask +stat 6004 +stat64 +statfs 6134 +statfs64 6217 +statx 6330 +stime +subpage_prot +swapcontext +swapoff 6163 +swapon 6162 +switch_endian +symlink 6086 +symlinkat 6260 +sync 6157 +sync_file_range 6268 +sync_file_range2 +syncfs 6306 +sys_debug_setcontext +syscall +sysfs 6136 +sysinfo 6097 +syslog 6101 +sysmips 6199 +tee 6269 +tgkill 6229 +time +timer_create 6220 +timer_delete 6224 +timer_getoverrun 6223 +timer_gettime 6222 +timer_gettime64 6408 +timer_settime 6221 +timer_settime64 6409 +timerfd 6281 +timerfd_create 6284 +timerfd_gettime 6285 +timerfd_gettime64 6410 +timerfd_settime 6286 +timerfd_settime64 6411 +times 6098 +tkill 6192 +truncate 6074 +truncate64 +ugetrlimit +umask 6093 +umount +umount2 6161 +uname 6061 +unlink 6085 +unlinkat 6257 +unshare 6266 +userfaultfd 6321 +ustat 6133 +utime 6130 +utimensat 6279 +utimensat_time64 6412 +utimes 6230 +utrap_install +vfork +vhangup 6150 +vm86 +vm86old +vmsplice 6270 +wait4 6059 +waitid 6241 +waitpid +write 6001 +writev 6019 diff --git a/src/basic/syscalls-mipso32.txt b/src/basic/syscalls-mipso32.txt new file mode 100644 index 0000000..04fa146 --- /dev/null +++ b/src/basic/syscalls-mipso32.txt @@ -0,0 +1,514 @@ +_llseek 4140 +_newselect 4142 +accept 4168 +accept4 4334 +access 4033 +acct 4051 +add_key 4280 +adjtimex 4124 +alarm 4027 +arc_gettls +arc_settls +arc_usr_cmpxchg +arch_prctl +arm_fadvise64_64 +atomic_barrier +atomic_cmpxchg_32 +bind 4169 +bpf 4355 +brk 4045 +cachectl 4148 +cacheflush 4147 +capget 4204 +capset 4205 +chdir 4012 +chmod 4015 +chown 4202 +chown32 +chroot 4061 +clock_adjtime 4341 +clock_adjtime64 4405 +clock_getres 4264 +clock_getres_time64 4406 +clock_gettime 4263 +clock_gettime64 4403 +clock_nanosleep 4265 +clock_nanosleep_time64 4407 +clock_settime 4262 +clock_settime64 4404 +clone 4120 +clone3 4435 +close 4006 +close_range 4436 +connect 4170 +copy_file_range 4360 +creat 4008 +delete_module 4129 +dipc +dup 4041 +dup2 4063 +dup3 4327 +epoll_create 4248 +epoll_create1 4326 +epoll_ctl 4249 +epoll_ctl_old +epoll_pwait 4313 +epoll_pwait2 4441 +epoll_wait 4250 +epoll_wait_old +eventfd 4319 +eventfd2 4325 +exec_with_loader +execv +execve 4011 +execveat 4356 +exit 4001 +exit_group 4246 +faccessat 4300 +faccessat2 4439 +fadvise64 4254 +fadvise64_64 +fallocate 4320 +fanotify_init 4336 +fanotify_mark 4337 +fchdir 4133 +fchmod 4094 +fchmodat 4299 +fchmodat2 4452 +fchown 4095 +fchown32 +fchownat 4291 +fcntl 4055 +fcntl64 4220 +fdatasync 4152 +fgetxattr 4229 +finit_module 4348 +flistxattr 4232 +flock 4143 +fork 4002 +fremovexattr 4235 +fsconfig 4431 +fsetxattr 4226 +fsmount 4432 +fsopen 4430 +fspick 4433 +fstat 4108 +fstat64 4215 +fstatat64 4293 +fstatfs 4100 +fstatfs64 4256 +fsync 4118 +ftruncate 4093 +ftruncate64 4212 +futex 4238 +futex_requeue 4456 +futex_time64 4422 +futex_wait 4455 +futex_waitv 4449 +futex_wake 4454 +futimesat 4292 +get_mempolicy 4269 +get_robust_list 4310 +get_thread_area +getcpu 4312 +getcwd 4203 +getdents 4141 +getdents64 4219 +getdomainname +getdtablesize +getegid 4050 +getegid32 +geteuid 4049 +geteuid32 +getgid 4047 +getgid32 +getgroups 4080 +getgroups32 +gethostname +getitimer 4105 +getpagesize +getpeername 4171 +getpgid 4132 +getpgrp 4065 +getpid 4020 +getppid 4064 +getpriority 4096 +getrandom 4353 +getresgid 4191 +getresgid32 +getresuid 4186 +getresuid32 +getrlimit 4076 +getrusage 4077 +getsid 4151 +getsockname 4172 +getsockopt 4173 +gettid 4222 +gettimeofday 4078 +getuid 4024 +getuid32 +getxattr 4227 +getxgid +getxpid +getxuid +init_module 4128 +inotify_add_watch 4285 +inotify_init 4284 +inotify_init1 4329 +inotify_rm_watch 4286 +io_cancel 4245 +io_destroy 4242 +io_getevents 4243 +io_pgetevents 4368 +io_pgetevents_time64 4416 +io_setup 4241 +io_submit 4244 +io_uring_enter 4426 +io_uring_register 4427 +io_uring_setup 4425 +ioctl 4054 +ioperm 4101 +iopl 4110 +ioprio_get 4315 +ioprio_set 4314 +ipc 4117 +kcmp 4347 +kern_features +kexec_file_load +kexec_load 4311 +keyctl 4282 +kill 4037 +landlock_add_rule 4445 +landlock_create_ruleset 4444 +landlock_restrict_self 4446 +lchown 4016 +lchown32 +lgetxattr 4228 +link 4009 +linkat 4296 +listen 4174 +listxattr 4230 +llistxattr 4231 +lookup_dcookie 4247 +lremovexattr 4234 +lseek 4019 +lsetxattr 4225 +lstat 4107 +lstat64 4214 +madvise 4218 +map_shadow_stack 4453 +mbind 4268 +membarrier 4358 +memfd_create 4354 +memfd_secret +memory_ordering +migrate_pages 4287 +mincore 4217 +mkdir 4039 +mkdirat 4289 +mknod 4014 +mknodat 4290 +mlock 4154 +mlock2 4359 +mlockall 4156 +mmap 4090 +mmap2 4210 +modify_ldt 4123 +mount 4021 +mount_setattr 4442 +move_mount 4429 +move_pages 4308 +mprotect 4125 +mq_getsetattr 4276 +mq_notify 4275 +mq_open 4271 +mq_timedreceive 4274 +mq_timedreceive_time64 4419 +mq_timedsend 4273 +mq_timedsend_time64 4418 +mq_unlink 4272 +mremap 4167 +msgctl 4402 +msgget 4399 +msgrcv 4401 +msgsnd 4400 +msync 4144 +multiplexer +munlock 4155 +munlockall 4157 +munmap 4091 +name_to_handle_at 4339 +nanosleep 4166 +newfstatat +nice 4034 +old_adjtimex +oldfstat +oldlstat +oldolduname +oldstat +oldumount +olduname +open 4005 +open_by_handle_at 4340 +open_tree 4428 +openat 4288 +openat2 4437 +or1k_atomic +osf_fstat +osf_fstatfs +osf_fstatfs64 +osf_getdirentries +osf_getdomainname +osf_getitimer +osf_getrusage +osf_getsysinfo +osf_gettimeofday +osf_lstat +osf_mount +osf_proplist_syscall +osf_select +osf_set_program_attributes +osf_setitimer +osf_setsysinfo +osf_settimeofday +osf_shmat +osf_sigprocmask +osf_sigstack +osf_stat +osf_statfs +osf_statfs64 +osf_swapon +osf_syscall +osf_sysinfo +osf_usleep_thread +osf_utimes +osf_utsname +osf_wait4 +pause 4029 +pciconfig_iobase +pciconfig_read +pciconfig_write +perf_event_open 4333 +perfctr +personality 4136 +pidfd_getfd 4438 +pidfd_open 4434 +pidfd_send_signal 4424 +pipe 4042 +pipe2 4328 +pivot_root 4216 +pkey_alloc 4364 +pkey_free 4365 +pkey_mprotect 4363 +poll 4188 +ppoll 4302 +ppoll_time64 4414 +prctl 4192 +pread64 4200 +preadv 4330 +preadv2 4361 +prlimit64 4338 +process_madvise 4440 +process_mrelease 4448 +process_vm_readv 4345 +process_vm_writev 4346 +pselect6 4301 +pselect6_time64 4413 +ptrace 4026 +pwrite64 4201 +pwritev 4331 +pwritev2 4362 +quotactl 4131 +quotactl_fd 4443 +read 4003 +readahead 4223 +readdir 4089 +readlink 4085 +readlinkat 4298 +readv 4145 +reboot 4088 +recv 4175 +recvfrom 4176 +recvmmsg 4335 +recvmmsg_time64 4417 +recvmsg 4177 +remap_file_pages 4251 +removexattr 4233 +rename 4038 +renameat 4295 +renameat2 4351 +request_key 4281 +restart_syscall 4253 +riscv_flush_icache +riscv_hwprobe +rmdir 4040 +rseq 4367 +rt_sigaction 4194 +rt_sigpending 4196 +rt_sigprocmask 4195 +rt_sigqueueinfo 4198 +rt_sigreturn 4193 +rt_sigsuspend 4199 +rt_sigtimedwait 4197 +rt_sigtimedwait_time64 4421 +rt_tgsigqueueinfo 4332 +rtas +s390_guarded_storage +s390_pci_mmio_read +s390_pci_mmio_write +s390_runtime_instr +s390_sthyi +sched_get_affinity +sched_get_priority_max 4163 +sched_get_priority_min 4164 +sched_getaffinity 4240 +sched_getattr 4350 +sched_getparam 4159 +sched_getscheduler 4161 +sched_rr_get_interval 4165 +sched_rr_get_interval_time64 4423 +sched_set_affinity +sched_setaffinity 4239 +sched_setattr 4349 +sched_setparam 4158 +sched_setscheduler 4160 +sched_yield 4162 +seccomp 4352 +select +semctl 4394 +semget 4393 +semop +semtimedop +semtimedop_time64 4420 +send 4178 +sendfile 4207 +sendfile64 4237 +sendmmsg 4343 +sendmsg 4179 +sendto 4180 +set_mempolicy 4270 +set_mempolicy_home_node 4450 +set_robust_list 4309 +set_thread_area 4283 +set_tid_address 4252 +setdomainname 4121 +setfsgid 4139 +setfsgid32 +setfsuid 4138 +setfsuid32 +setgid 4046 +setgid32 +setgroups 4081 +setgroups32 +sethae +sethostname 4074 +setitimer 4104 +setns 4344 +setpgid 4057 +setpgrp +setpriority 4097 +setregid 4071 +setregid32 +setresgid 4190 +setresgid32 +setresuid 4185 +setresuid32 +setreuid 4070 +setreuid32 +setrlimit 4075 +setsid 4066 +setsockopt 4181 +settimeofday 4079 +setuid 4023 +setuid32 +setxattr 4224 +sgetmask 4068 +shmat 4397 +shmctl 4396 +shmdt 4398 +shmget 4395 +shutdown 4182 +sigaction 4067 +sigaltstack 4206 +signal 4048 +signalfd 4317 +signalfd4 4324 +sigpending 4073 +sigprocmask 4126 +sigreturn 4119 +sigsuspend 4072 +socket 4183 +socketcall 4102 +socketpair 4184 +splice 4304 +spu_create +spu_run +ssetmask 4069 +stat 4106 +stat64 4213 +statfs 4099 +statfs64 4255 +statx 4366 +stime 4025 +subpage_prot +swapcontext +swapoff 4115 +swapon 4087 +switch_endian +symlink 4083 +symlinkat 4297 +sync 4036 +sync_file_range 4305 +sync_file_range2 +syncfs 4342 +sys_debug_setcontext +syscall 4000 +sysfs 4135 +sysinfo 4116 +syslog 4103 +sysmips 4149 +tee 4306 +tgkill 4266 +time 4013 +timer_create 4257 +timer_delete 4261 +timer_getoverrun 4260 +timer_gettime 4259 +timer_gettime64 4408 +timer_settime 4258 +timer_settime64 4409 +timerfd 4318 +timerfd_create 4321 +timerfd_gettime 4322 +timerfd_gettime64 4410 +timerfd_settime 4323 +timerfd_settime64 4411 +times 4043 +tkill 4236 +truncate 4092 +truncate64 4211 +ugetrlimit +umask 4060 +umount 4022 +umount2 4052 +uname 4122 +unlink 4010 +unlinkat 4294 +unshare 4303 +userfaultfd 4357 +ustat 4062 +utime 4030 +utimensat 4316 +utimensat_time64 4412 +utimes 4267 +utrap_install +vfork +vhangup 4111 +vm86 4113 +vm86old +vmsplice 4307 +wait4 4114 +waitid 4278 +waitpid 4007 +write 4004 +writev 4146 diff --git a/src/basic/syscalls-parisc.txt b/src/basic/syscalls-parisc.txt new file mode 100644 index 0000000..31a31ed --- /dev/null +++ b/src/basic/syscalls-parisc.txt @@ -0,0 +1,514 @@ +_llseek 140 +_newselect 142 +accept 35 +accept4 320 +access 33 +acct 51 +add_key 264 +adjtimex 124 +alarm 27 +arc_gettls +arc_settls +arc_usr_cmpxchg +arch_prctl +arm_fadvise64_64 +atomic_barrier +atomic_cmpxchg_32 +bind 22 +bpf 341 +brk 45 +cachectl +cacheflush +capget 106 +capset 107 +chdir 12 +chmod 15 +chown 180 +chown32 +chroot 61 +clock_adjtime 324 +clock_adjtime64 405 +clock_getres 257 +clock_getres_time64 406 +clock_gettime 256 +clock_gettime64 403 +clock_nanosleep 258 +clock_nanosleep_time64 407 +clock_settime 255 +clock_settime64 404 +clone 120 +clone3 435 +close 6 +close_range 436 +connect 31 +copy_file_range 346 +creat 8 +delete_module 129 +dipc +dup 41 +dup2 63 +dup3 312 +epoll_create 224 +epoll_create1 311 +epoll_ctl 225 +epoll_ctl_old +epoll_pwait 297 +epoll_pwait2 441 +epoll_wait 226 +epoll_wait_old +eventfd 304 +eventfd2 310 +exec_with_loader +execv +execve 11 +execveat 342 +exit 1 +exit_group 222 +faccessat 287 +faccessat2 439 +fadvise64 +fadvise64_64 236 +fallocate 305 +fanotify_init 322 +fanotify_mark 323 +fchdir 133 +fchmod 94 +fchmodat 286 +fchmodat2 452 +fchown 95 +fchown32 +fchownat 278 +fcntl 55 +fcntl64 202 +fdatasync 148 +fgetxattr 243 +finit_module 333 +flistxattr 246 +flock 143 +fork 2 +fremovexattr 249 +fsconfig 431 +fsetxattr 240 +fsmount 432 +fsopen 430 +fspick 433 +fstat 28 +fstat64 112 +fstatat64 280 +fstatfs 100 +fstatfs64 299 +fsync 118 +ftruncate 93 +ftruncate64 200 +futex 210 +futex_requeue 456 +futex_time64 422 +futex_wait 455 +futex_waitv 449 +futex_wake 454 +futimesat 279 +get_mempolicy 261 +get_robust_list 290 +get_thread_area +getcpu 296 +getcwd 110 +getdents 141 +getdents64 201 +getdomainname +getdtablesize +getegid 50 +getegid32 +geteuid 49 +geteuid32 +getgid 47 +getgid32 +getgroups 80 +getgroups32 +gethostname +getitimer 105 +getpagesize +getpeername 53 +getpgid 132 +getpgrp 65 +getpid 20 +getppid 64 +getpriority 96 +getrandom 339 +getresgid 171 +getresgid32 +getresuid 165 +getresuid32 +getrlimit 76 +getrusage 77 +getsid 147 +getsockname 44 +getsockopt 182 +gettid 206 +gettimeofday 78 +getuid 24 +getuid32 +getxattr 241 +getxgid +getxpid +getxuid +init_module 128 +inotify_add_watch 270 +inotify_init 269 +inotify_init1 314 +inotify_rm_watch 271 +io_cancel 219 +io_destroy 216 +io_getevents 217 +io_pgetevents 350 +io_pgetevents_time64 416 +io_setup 215 +io_submit 218 +io_uring_enter 426 +io_uring_register 427 +io_uring_setup 425 +ioctl 54 +ioperm +iopl +ioprio_get 268 +ioprio_set 267 +ipc +kcmp 332 +kern_features +kexec_file_load 355 +kexec_load 300 +keyctl 266 +kill 37 +landlock_add_rule 445 +landlock_create_ruleset 444 +landlock_restrict_self 446 +lchown 16 +lchown32 +lgetxattr 242 +link 9 +linkat 283 +listen 32 +listxattr 244 +llistxattr 245 +lookup_dcookie 223 +lremovexattr 248 +lseek 19 +lsetxattr 239 +lstat 84 +lstat64 198 +madvise 119 +map_shadow_stack 453 +mbind 260 +membarrier 343 +memfd_create 340 +memfd_secret +memory_ordering +migrate_pages 272 +mincore 72 +mkdir 39 +mkdirat 276 +mknod 14 +mknodat 277 +mlock 150 +mlock2 345 +mlockall 152 +mmap 90 +mmap2 89 +modify_ldt +mount 21 +mount_setattr 442 +move_mount 429 +move_pages 295 +mprotect 125 +mq_getsetattr 234 +mq_notify 233 +mq_open 229 +mq_timedreceive 232 +mq_timedreceive_time64 419 +mq_timedsend 231 +mq_timedsend_time64 418 +mq_unlink 230 +mremap 163 +msgctl 191 +msgget 190 +msgrcv 189 +msgsnd 188 +msync 144 +multiplexer +munlock 151 +munlockall 153 +munmap 91 +name_to_handle_at 325 +nanosleep 162 +newfstatat +nice 34 +old_adjtimex +oldfstat +oldlstat +oldolduname +oldstat +oldumount +olduname +open 5 +open_by_handle_at 326 +open_tree 428 +openat 275 +openat2 437 +or1k_atomic +osf_fstat +osf_fstatfs +osf_fstatfs64 +osf_getdirentries +osf_getdomainname +osf_getitimer +osf_getrusage +osf_getsysinfo +osf_gettimeofday +osf_lstat +osf_mount +osf_proplist_syscall +osf_select +osf_set_program_attributes +osf_setitimer +osf_setsysinfo +osf_settimeofday +osf_shmat +osf_sigprocmask +osf_sigstack +osf_stat +osf_statfs +osf_statfs64 +osf_swapon +osf_syscall +osf_sysinfo +osf_usleep_thread +osf_utimes +osf_utsname +osf_wait4 +pause 29 +pciconfig_iobase +pciconfig_read +pciconfig_write +perf_event_open 318 +perfctr +personality 136 +pidfd_getfd 438 +pidfd_open 434 +pidfd_send_signal 424 +pipe 42 +pipe2 313 +pivot_root 67 +pkey_alloc 352 +pkey_free 353 +pkey_mprotect 351 +poll 168 +ppoll 274 +ppoll_time64 414 +prctl 172 +pread64 108 +preadv 315 +preadv2 347 +prlimit64 321 +process_madvise 440 +process_mrelease 448 +process_vm_readv 330 +process_vm_writev 331 +pselect6 273 +pselect6_time64 413 +ptrace 26 +pwrite64 109 +pwritev 316 +pwritev2 348 +quotactl 131 +quotactl_fd 443 +read 3 +readahead 207 +readdir +readlink 85 +readlinkat 285 +readv 145 +reboot 88 +recv 98 +recvfrom 123 +recvmmsg 319 +recvmmsg_time64 417 +recvmsg 184 +remap_file_pages 227 +removexattr 247 +rename 38 +renameat 282 +renameat2 337 +request_key 265 +restart_syscall 0 +riscv_flush_icache +riscv_hwprobe +rmdir 40 +rseq 354 +rt_sigaction 174 +rt_sigpending 176 +rt_sigprocmask 175 +rt_sigqueueinfo 178 +rt_sigreturn 173 +rt_sigsuspend 179 +rt_sigtimedwait 177 +rt_sigtimedwait_time64 421 +rt_tgsigqueueinfo 317 +rtas +s390_guarded_storage +s390_pci_mmio_read +s390_pci_mmio_write +s390_runtime_instr +s390_sthyi +sched_get_affinity +sched_get_priority_max 159 +sched_get_priority_min 160 +sched_getaffinity 212 +sched_getattr 335 +sched_getparam 155 +sched_getscheduler 157 +sched_rr_get_interval 161 +sched_rr_get_interval_time64 423 +sched_set_affinity +sched_setaffinity 211 +sched_setattr 334 +sched_setparam 154 +sched_setscheduler 156 +sched_yield 158 +seccomp 338 +select +semctl 187 +semget 186 +semop 185 +semtimedop 228 +semtimedop_time64 420 +send 58 +sendfile 122 +sendfile64 209 +sendmmsg 329 +sendmsg 183 +sendto 82 +set_mempolicy 262 +set_mempolicy_home_node 450 +set_robust_list 289 +set_thread_area +set_tid_address 237 +setdomainname 121 +setfsgid 139 +setfsgid32 +setfsuid 138 +setfsuid32 +setgid 46 +setgid32 +setgroups 81 +setgroups32 +sethae +sethostname 74 +setitimer 104 +setns 328 +setpgid 57 +setpgrp +setpriority 97 +setregid 71 +setregid32 +setresgid 170 +setresgid32 +setresuid 164 +setresuid32 +setreuid 70 +setreuid32 +setrlimit 75 +setsid 66 +setsockopt 181 +settimeofday 79 +setuid 23 +setuid32 +setxattr 238 +sgetmask 68 +shmat 192 +shmctl 195 +shmdt 193 +shmget 194 +shutdown 117 +sigaction +sigaltstack 166 +signal 48 +signalfd 302 +signalfd4 309 +sigpending 73 +sigprocmask 126 +sigreturn +sigsuspend +socket 17 +socketcall +socketpair 56 +splice 291 +spu_create +spu_run +ssetmask 69 +stat 18 +stat64 101 +statfs 99 +statfs64 298 +statx 349 +stime 25 +subpage_prot +swapcontext +swapoff 115 +swapon 87 +switch_endian +symlink 83 +symlinkat 284 +sync 36 +sync_file_range 292 +sync_file_range2 +syncfs 327 +sys_debug_setcontext +syscall +sysfs 135 +sysinfo 116 +syslog 103 +sysmips +tee 293 +tgkill 259 +time 13 +timer_create 250 +timer_delete 254 +timer_getoverrun 253 +timer_gettime 252 +timer_gettime64 408 +timer_settime 251 +timer_settime64 409 +timerfd +timerfd_create 306 +timerfd_gettime 308 +timerfd_gettime64 410 +timerfd_settime 307 +timerfd_settime64 411 +times 43 +tkill 208 +truncate 92 +truncate64 199 +ugetrlimit +umask 60 +umount +umount2 52 +uname 59 +unlink 10 +unlinkat 281 +unshare 288 +userfaultfd 344 +ustat 62 +utime 30 +utimensat 301 +utimensat_time64 412 +utimes 336 +utrap_install +vfork 113 +vhangup 111 +vm86 +vm86old +vmsplice 294 +wait4 114 +waitid 235 +waitpid 7 +write 4 +writev 146 diff --git a/src/basic/syscalls-powerpc.txt b/src/basic/syscalls-powerpc.txt new file mode 100644 index 0000000..87f546c --- /dev/null +++ b/src/basic/syscalls-powerpc.txt @@ -0,0 +1,514 @@ +_llseek 140 +_newselect 142 +accept 330 +accept4 344 +access 33 +acct 51 +add_key 269 +adjtimex 124 +alarm 27 +arc_gettls +arc_settls +arc_usr_cmpxchg +arch_prctl +arm_fadvise64_64 +atomic_barrier +atomic_cmpxchg_32 +bind 327 +bpf 361 +brk 45 +cachectl +cacheflush +capget 183 +capset 184 +chdir 12 +chmod 15 +chown 181 +chown32 +chroot 61 +clock_adjtime 347 +clock_adjtime64 405 +clock_getres 247 +clock_getres_time64 406 +clock_gettime 246 +clock_gettime64 403 +clock_nanosleep 248 +clock_nanosleep_time64 407 +clock_settime 245 +clock_settime64 404 +clone 120 +clone3 435 +close 6 +close_range 436 +connect 328 +copy_file_range 379 +creat 8 +delete_module 129 +dipc +dup 41 +dup2 63 +dup3 316 +epoll_create 236 +epoll_create1 315 +epoll_ctl 237 +epoll_ctl_old +epoll_pwait 303 +epoll_pwait2 441 +epoll_wait 238 +epoll_wait_old +eventfd 307 +eventfd2 314 +exec_with_loader +execv +execve 11 +execveat 362 +exit 1 +exit_group 234 +faccessat 298 +faccessat2 439 +fadvise64 233 +fadvise64_64 254 +fallocate 309 +fanotify_init 323 +fanotify_mark 324 +fchdir 133 +fchmod 94 +fchmodat 297 +fchmodat2 452 +fchown 95 +fchown32 +fchownat 289 +fcntl 55 +fcntl64 204 +fdatasync 148 +fgetxattr 214 +finit_module 353 +flistxattr 217 +flock 143 +fork 2 +fremovexattr 220 +fsconfig 431 +fsetxattr 211 +fsmount 432 +fsopen 430 +fspick 433 +fstat 108 +fstat64 197 +fstatat64 291 +fstatfs 100 +fstatfs64 253 +fsync 118 +ftruncate 93 +ftruncate64 194 +futex 221 +futex_requeue 456 +futex_time64 422 +futex_wait 455 +futex_waitv 449 +futex_wake 454 +futimesat 290 +get_mempolicy 260 +get_robust_list 299 +get_thread_area +getcpu 302 +getcwd 182 +getdents 141 +getdents64 202 +getdomainname +getdtablesize +getegid 50 +getegid32 +geteuid 49 +geteuid32 +getgid 47 +getgid32 +getgroups 80 +getgroups32 +gethostname +getitimer 105 +getpagesize +getpeername 332 +getpgid 132 +getpgrp 65 +getpid 20 +getppid 64 +getpriority 96 +getrandom 359 +getresgid 170 +getresgid32 +getresuid 165 +getresuid32 +getrlimit 76 +getrusage 77 +getsid 147 +getsockname 331 +getsockopt 340 +gettid 207 +gettimeofday 78 +getuid 24 +getuid32 +getxattr 212 +getxgid +getxpid +getxuid +init_module 128 +inotify_add_watch 276 +inotify_init 275 +inotify_init1 318 +inotify_rm_watch 277 +io_cancel 231 +io_destroy 228 +io_getevents 229 +io_pgetevents 388 +io_pgetevents_time64 416 +io_setup 227 +io_submit 230 +io_uring_enter 426 +io_uring_register 427 +io_uring_setup 425 +ioctl 54 +ioperm 101 +iopl 110 +ioprio_get 274 +ioprio_set 273 +ipc 117 +kcmp 354 +kern_features +kexec_file_load 382 +kexec_load 268 +keyctl 271 +kill 37 +landlock_add_rule 445 +landlock_create_ruleset 444 +landlock_restrict_self 446 +lchown 16 +lchown32 +lgetxattr 213 +link 9 +linkat 294 +listen 329 +listxattr 215 +llistxattr 216 +lookup_dcookie 235 +lremovexattr 219 +lseek 19 +lsetxattr 210 +lstat 107 +lstat64 196 +madvise 205 +map_shadow_stack 453 +mbind 259 +membarrier 365 +memfd_create 360 +memfd_secret +memory_ordering +migrate_pages 258 +mincore 206 +mkdir 39 +mkdirat 287 +mknod 14 +mknodat 288 +mlock 150 +mlock2 378 +mlockall 152 +mmap 90 +mmap2 192 +modify_ldt 123 +mount 21 +mount_setattr 442 +move_mount 429 +move_pages 301 +mprotect 125 +mq_getsetattr 267 +mq_notify 266 +mq_open 262 +mq_timedreceive 265 +mq_timedreceive_time64 419 +mq_timedsend 264 +mq_timedsend_time64 418 +mq_unlink 263 +mremap 163 +msgctl 402 +msgget 399 +msgrcv 401 +msgsnd 400 +msync 144 +multiplexer 201 +munlock 151 +munlockall 153 +munmap 91 +name_to_handle_at 345 +nanosleep 162 +newfstatat +nice 34 +old_adjtimex +oldfstat 28 +oldlstat 84 +oldolduname 59 +oldstat 18 +oldumount +olduname 109 +open 5 +open_by_handle_at 346 +open_tree 428 +openat 286 +openat2 437 +or1k_atomic +osf_fstat +osf_fstatfs +osf_fstatfs64 +osf_getdirentries +osf_getdomainname +osf_getitimer +osf_getrusage +osf_getsysinfo +osf_gettimeofday +osf_lstat +osf_mount +osf_proplist_syscall +osf_select +osf_set_program_attributes +osf_setitimer +osf_setsysinfo +osf_settimeofday +osf_shmat +osf_sigprocmask +osf_sigstack +osf_stat +osf_statfs +osf_statfs64 +osf_swapon +osf_syscall +osf_sysinfo +osf_usleep_thread +osf_utimes +osf_utsname +osf_wait4 +pause 29 +pciconfig_iobase 200 +pciconfig_read 198 +pciconfig_write 199 +perf_event_open 319 +perfctr +personality 136 +pidfd_getfd 438 +pidfd_open 434 +pidfd_send_signal 424 +pipe 42 +pipe2 317 +pivot_root 203 +pkey_alloc 384 +pkey_free 385 +pkey_mprotect 386 +poll 167 +ppoll 281 +ppoll_time64 414 +prctl 171 +pread64 179 +preadv 320 +preadv2 380 +prlimit64 325 +process_madvise 440 +process_mrelease 448 +process_vm_readv 351 +process_vm_writev 352 +pselect6 280 +pselect6_time64 413 +ptrace 26 +pwrite64 180 +pwritev 321 +pwritev2 381 +quotactl 131 +quotactl_fd 443 +read 3 +readahead 191 +readdir 89 +readlink 85 +readlinkat 296 +readv 145 +reboot 88 +recv 336 +recvfrom 337 +recvmmsg 343 +recvmmsg_time64 417 +recvmsg 342 +remap_file_pages 239 +removexattr 218 +rename 38 +renameat 293 +renameat2 357 +request_key 270 +restart_syscall 0 +riscv_flush_icache +riscv_hwprobe +rmdir 40 +rseq 387 +rt_sigaction 173 +rt_sigpending 175 +rt_sigprocmask 174 +rt_sigqueueinfo 177 +rt_sigreturn 172 +rt_sigsuspend 178 +rt_sigtimedwait 176 +rt_sigtimedwait_time64 421 +rt_tgsigqueueinfo 322 +rtas 255 +s390_guarded_storage +s390_pci_mmio_read +s390_pci_mmio_write +s390_runtime_instr +s390_sthyi +sched_get_affinity +sched_get_priority_max 159 +sched_get_priority_min 160 +sched_getaffinity 223 +sched_getattr 356 +sched_getparam 155 +sched_getscheduler 157 +sched_rr_get_interval 161 +sched_rr_get_interval_time64 423 +sched_set_affinity +sched_setaffinity 222 +sched_setattr 355 +sched_setparam 154 +sched_setscheduler 156 +sched_yield 158 +seccomp 358 +select 82 +semctl 394 +semget 393 +semop +semtimedop +semtimedop_time64 420 +send 334 +sendfile 186 +sendfile64 226 +sendmmsg 349 +sendmsg 341 +sendto 335 +set_mempolicy 261 +set_mempolicy_home_node 450 +set_robust_list 300 +set_thread_area +set_tid_address 232 +setdomainname 121 +setfsgid 139 +setfsgid32 +setfsuid 138 +setfsuid32 +setgid 46 +setgid32 +setgroups 81 +setgroups32 +sethae +sethostname 74 +setitimer 104 +setns 350 +setpgid 57 +setpgrp +setpriority 97 +setregid 71 +setregid32 +setresgid 169 +setresgid32 +setresuid 164 +setresuid32 +setreuid 70 +setreuid32 +setrlimit 75 +setsid 66 +setsockopt 339 +settimeofday 79 +setuid 23 +setuid32 +setxattr 209 +sgetmask 68 +shmat 397 +shmctl 396 +shmdt 398 +shmget 395 +shutdown 338 +sigaction 67 +sigaltstack 185 +signal 48 +signalfd 305 +signalfd4 313 +sigpending 73 +sigprocmask 126 +sigreturn 119 +sigsuspend 72 +socket 326 +socketcall 102 +socketpair 333 +splice 283 +spu_create 279 +spu_run 278 +ssetmask 69 +stat 106 +stat64 195 +statfs 99 +statfs64 252 +statx 383 +stime 25 +subpage_prot 310 +swapcontext 249 +swapoff 115 +swapon 87 +switch_endian 363 +symlink 83 +symlinkat 295 +sync 36 +sync_file_range +sync_file_range2 308 +syncfs 348 +sys_debug_setcontext 256 +syscall +sysfs 135 +sysinfo 116 +syslog 103 +sysmips +tee 284 +tgkill 250 +time 13 +timer_create 240 +timer_delete 244 +timer_getoverrun 243 +timer_gettime 242 +timer_gettime64 408 +timer_settime 241 +timer_settime64 409 +timerfd +timerfd_create 306 +timerfd_gettime 312 +timerfd_gettime64 410 +timerfd_settime 311 +timerfd_settime64 411 +times 43 +tkill 208 +truncate 92 +truncate64 193 +ugetrlimit 190 +umask 60 +umount 22 +umount2 52 +uname 122 +unlink 10 +unlinkat 292 +unshare 282 +userfaultfd 364 +ustat 62 +utime 30 +utimensat 304 +utimensat_time64 412 +utimes 251 +utrap_install +vfork 189 +vhangup 111 +vm86 113 +vm86old +vmsplice 285 +wait4 114 +waitid 272 +waitpid 7 +write 4 +writev 146 diff --git a/src/basic/syscalls-powerpc64.txt b/src/basic/syscalls-powerpc64.txt new file mode 100644 index 0000000..aabdfcc --- /dev/null +++ b/src/basic/syscalls-powerpc64.txt @@ -0,0 +1,514 @@ +_llseek 140 +_newselect 142 +accept 330 +accept4 344 +access 33 +acct 51 +add_key 269 +adjtimex 124 +alarm 27 +arc_gettls +arc_settls +arc_usr_cmpxchg +arch_prctl +arm_fadvise64_64 +atomic_barrier +atomic_cmpxchg_32 +bind 327 +bpf 361 +brk 45 +cachectl +cacheflush +capget 183 +capset 184 +chdir 12 +chmod 15 +chown 181 +chown32 +chroot 61 +clock_adjtime 347 +clock_adjtime64 +clock_getres 247 +clock_getres_time64 +clock_gettime 246 +clock_gettime64 +clock_nanosleep 248 +clock_nanosleep_time64 +clock_settime 245 +clock_settime64 +clone 120 +clone3 435 +close 6 +close_range 436 +connect 328 +copy_file_range 379 +creat 8 +delete_module 129 +dipc +dup 41 +dup2 63 +dup3 316 +epoll_create 236 +epoll_create1 315 +epoll_ctl 237 +epoll_ctl_old +epoll_pwait 303 +epoll_pwait2 441 +epoll_wait 238 +epoll_wait_old +eventfd 307 +eventfd2 314 +exec_with_loader +execv +execve 11 +execveat 362 +exit 1 +exit_group 234 +faccessat 298 +faccessat2 439 +fadvise64 233 +fadvise64_64 +fallocate 309 +fanotify_init 323 +fanotify_mark 324 +fchdir 133 +fchmod 94 +fchmodat 297 +fchmodat2 452 +fchown 95 +fchown32 +fchownat 289 +fcntl 55 +fcntl64 +fdatasync 148 +fgetxattr 214 +finit_module 353 +flistxattr 217 +flock 143 +fork 2 +fremovexattr 220 +fsconfig 431 +fsetxattr 211 +fsmount 432 +fsopen 430 +fspick 433 +fstat 108 +fstat64 +fstatat64 +fstatfs 100 +fstatfs64 253 +fsync 118 +ftruncate 93 +ftruncate64 +futex 221 +futex_requeue 456 +futex_time64 +futex_wait 455 +futex_waitv 449 +futex_wake 454 +futimesat 290 +get_mempolicy 260 +get_robust_list 299 +get_thread_area +getcpu 302 +getcwd 182 +getdents 141 +getdents64 202 +getdomainname +getdtablesize +getegid 50 +getegid32 +geteuid 49 +geteuid32 +getgid 47 +getgid32 +getgroups 80 +getgroups32 +gethostname +getitimer 105 +getpagesize +getpeername 332 +getpgid 132 +getpgrp 65 +getpid 20 +getppid 64 +getpriority 96 +getrandom 359 +getresgid 170 +getresgid32 +getresuid 165 +getresuid32 +getrlimit 76 +getrusage 77 +getsid 147 +getsockname 331 +getsockopt 340 +gettid 207 +gettimeofday 78 +getuid 24 +getuid32 +getxattr 212 +getxgid +getxpid +getxuid +init_module 128 +inotify_add_watch 276 +inotify_init 275 +inotify_init1 318 +inotify_rm_watch 277 +io_cancel 231 +io_destroy 228 +io_getevents 229 +io_pgetevents 388 +io_pgetevents_time64 +io_setup 227 +io_submit 230 +io_uring_enter 426 +io_uring_register 427 +io_uring_setup 425 +ioctl 54 +ioperm 101 +iopl 110 +ioprio_get 274 +ioprio_set 273 +ipc 117 +kcmp 354 +kern_features +kexec_file_load 382 +kexec_load 268 +keyctl 271 +kill 37 +landlock_add_rule 445 +landlock_create_ruleset 444 +landlock_restrict_self 446 +lchown 16 +lchown32 +lgetxattr 213 +link 9 +linkat 294 +listen 329 +listxattr 215 +llistxattr 216 +lookup_dcookie 235 +lremovexattr 219 +lseek 19 +lsetxattr 210 +lstat 107 +lstat64 +madvise 205 +map_shadow_stack 453 +mbind 259 +membarrier 365 +memfd_create 360 +memfd_secret +memory_ordering +migrate_pages 258 +mincore 206 +mkdir 39 +mkdirat 287 +mknod 14 +mknodat 288 +mlock 150 +mlock2 378 +mlockall 152 +mmap 90 +mmap2 +modify_ldt 123 +mount 21 +mount_setattr 442 +move_mount 429 +move_pages 301 +mprotect 125 +mq_getsetattr 267 +mq_notify 266 +mq_open 262 +mq_timedreceive 265 +mq_timedreceive_time64 +mq_timedsend 264 +mq_timedsend_time64 +mq_unlink 263 +mremap 163 +msgctl 402 +msgget 399 +msgrcv 401 +msgsnd 400 +msync 144 +multiplexer 201 +munlock 151 +munlockall 153 +munmap 91 +name_to_handle_at 345 +nanosleep 162 +newfstatat 291 +nice 34 +old_adjtimex +oldfstat 28 +oldlstat 84 +oldolduname 59 +oldstat 18 +oldumount +olduname 109 +open 5 +open_by_handle_at 346 +open_tree 428 +openat 286 +openat2 437 +or1k_atomic +osf_fstat +osf_fstatfs +osf_fstatfs64 +osf_getdirentries +osf_getdomainname +osf_getitimer +osf_getrusage +osf_getsysinfo +osf_gettimeofday +osf_lstat +osf_mount +osf_proplist_syscall +osf_select +osf_set_program_attributes +osf_setitimer +osf_setsysinfo +osf_settimeofday +osf_shmat +osf_sigprocmask +osf_sigstack +osf_stat +osf_statfs +osf_statfs64 +osf_swapon +osf_syscall +osf_sysinfo +osf_usleep_thread +osf_utimes +osf_utsname +osf_wait4 +pause 29 +pciconfig_iobase 200 +pciconfig_read 198 +pciconfig_write 199 +perf_event_open 319 +perfctr +personality 136 +pidfd_getfd 438 +pidfd_open 434 +pidfd_send_signal 424 +pipe 42 +pipe2 317 +pivot_root 203 +pkey_alloc 384 +pkey_free 385 +pkey_mprotect 386 +poll 167 +ppoll 281 +ppoll_time64 +prctl 171 +pread64 179 +preadv 320 +preadv2 380 +prlimit64 325 +process_madvise 440 +process_mrelease 448 +process_vm_readv 351 +process_vm_writev 352 +pselect6 280 +pselect6_time64 +ptrace 26 +pwrite64 180 +pwritev 321 +pwritev2 381 +quotactl 131 +quotactl_fd 443 +read 3 +readahead 191 +readdir 89 +readlink 85 +readlinkat 296 +readv 145 +reboot 88 +recv 336 +recvfrom 337 +recvmmsg 343 +recvmmsg_time64 +recvmsg 342 +remap_file_pages 239 +removexattr 218 +rename 38 +renameat 293 +renameat2 357 +request_key 270 +restart_syscall 0 +riscv_flush_icache +riscv_hwprobe +rmdir 40 +rseq 387 +rt_sigaction 173 +rt_sigpending 175 +rt_sigprocmask 174 +rt_sigqueueinfo 177 +rt_sigreturn 172 +rt_sigsuspend 178 +rt_sigtimedwait 176 +rt_sigtimedwait_time64 +rt_tgsigqueueinfo 322 +rtas 255 +s390_guarded_storage +s390_pci_mmio_read +s390_pci_mmio_write +s390_runtime_instr +s390_sthyi +sched_get_affinity +sched_get_priority_max 159 +sched_get_priority_min 160 +sched_getaffinity 223 +sched_getattr 356 +sched_getparam 155 +sched_getscheduler 157 +sched_rr_get_interval 161 +sched_rr_get_interval_time64 +sched_set_affinity +sched_setaffinity 222 +sched_setattr 355 +sched_setparam 154 +sched_setscheduler 156 +sched_yield 158 +seccomp 358 +select 82 +semctl 394 +semget 393 +semop +semtimedop 392 +semtimedop_time64 +send 334 +sendfile 186 +sendfile64 +sendmmsg 349 +sendmsg 341 +sendto 335 +set_mempolicy 261 +set_mempolicy_home_node 450 +set_robust_list 300 +set_thread_area +set_tid_address 232 +setdomainname 121 +setfsgid 139 +setfsgid32 +setfsuid 138 +setfsuid32 +setgid 46 +setgid32 +setgroups 81 +setgroups32 +sethae +sethostname 74 +setitimer 104 +setns 350 +setpgid 57 +setpgrp +setpriority 97 +setregid 71 +setregid32 +setresgid 169 +setresgid32 +setresuid 164 +setresuid32 +setreuid 70 +setreuid32 +setrlimit 75 +setsid 66 +setsockopt 339 +settimeofday 79 +setuid 23 +setuid32 +setxattr 209 +sgetmask 68 +shmat 397 +shmctl 396 +shmdt 398 +shmget 395 +shutdown 338 +sigaction 67 +sigaltstack 185 +signal 48 +signalfd 305 +signalfd4 313 +sigpending 73 +sigprocmask 126 +sigreturn 119 +sigsuspend 72 +socket 326 +socketcall 102 +socketpair 333 +splice 283 +spu_create 279 +spu_run 278 +ssetmask 69 +stat 106 +stat64 +statfs 99 +statfs64 252 +statx 383 +stime 25 +subpage_prot 310 +swapcontext 249 +swapoff 115 +swapon 87 +switch_endian 363 +symlink 83 +symlinkat 295 +sync 36 +sync_file_range +sync_file_range2 308 +syncfs 348 +sys_debug_setcontext 256 +syscall +sysfs 135 +sysinfo 116 +syslog 103 +sysmips +tee 284 +tgkill 250 +time 13 +timer_create 240 +timer_delete 244 +timer_getoverrun 243 +timer_gettime 242 +timer_gettime64 +timer_settime 241 +timer_settime64 +timerfd +timerfd_create 306 +timerfd_gettime 312 +timerfd_gettime64 +timerfd_settime 311 +timerfd_settime64 +times 43 +tkill 208 +truncate 92 +truncate64 +ugetrlimit 190 +umask 60 +umount 22 +umount2 52 +uname 122 +unlink 10 +unlinkat 292 +unshare 282 +userfaultfd 364 +ustat 62 +utime 30 +utimensat 304 +utimensat_time64 +utimes 251 +utrap_install +vfork 189 +vhangup 111 +vm86 113 +vm86old +vmsplice 285 +wait4 114 +waitid 272 +waitpid 7 +write 4 +writev 146 diff --git a/src/basic/syscalls-riscv32.txt b/src/basic/syscalls-riscv32.txt new file mode 100644 index 0000000..a791047 --- /dev/null +++ b/src/basic/syscalls-riscv32.txt @@ -0,0 +1,514 @@ +_llseek +_newselect +accept 202 +accept4 242 +access +acct 89 +add_key 217 +adjtimex +alarm +arc_gettls +arc_settls +arc_usr_cmpxchg +arch_prctl +arm_fadvise64_64 +atomic_barrier +atomic_cmpxchg_32 +bind 200 +bpf 280 +brk 214 +cachectl +cacheflush +capget 90 +capset 91 +chdir 49 +chmod +chown +chown32 +chroot 51 +clock_adjtime +clock_adjtime64 405 +clock_getres +clock_getres_time64 406 +clock_gettime +clock_gettime64 403 +clock_nanosleep +clock_nanosleep_time64 407 +clock_settime +clock_settime64 404 +clone 220 +clone3 435 +close 57 +close_range 436 +connect 203 +copy_file_range 285 +creat +delete_module 106 +dipc +dup 23 +dup2 +dup3 24 +epoll_create +epoll_create1 20 +epoll_ctl 21 +epoll_ctl_old +epoll_pwait 22 +epoll_pwait2 441 +epoll_wait +epoll_wait_old +eventfd +eventfd2 19 +exec_with_loader +execv +execve 221 +execveat 281 +exit 93 +exit_group 94 +faccessat 48 +faccessat2 439 +fadvise64 +fadvise64_64 223 +fallocate 47 +fanotify_init 262 +fanotify_mark 263 +fchdir 50 +fchmod 52 +fchmodat 53 +fchmodat2 452 +fchown 55 +fchown32 +fchownat 54 +fcntl +fcntl64 25 +fdatasync 83 +fgetxattr 10 +finit_module 273 +flistxattr 13 +flock 32 +fork +fremovexattr 16 +fsconfig 431 +fsetxattr 7 +fsmount 432 +fsopen 430 +fspick 433 +fstat +fstat64 +fstatat64 +fstatfs +fstatfs64 44 +fsync 82 +ftruncate +ftruncate64 46 +futex +futex_requeue 456 +futex_time64 422 +futex_wait 455 +futex_waitv 449 +futex_wake 454 +futimesat +get_mempolicy 236 +get_robust_list 100 +get_thread_area +getcpu 168 +getcwd 17 +getdents +getdents64 61 +getdomainname +getdtablesize +getegid 177 +getegid32 +geteuid 175 +geteuid32 +getgid 176 +getgid32 +getgroups 158 +getgroups32 +gethostname +getitimer 102 +getpagesize +getpeername 205 +getpgid 155 +getpgrp +getpid 172 +getppid 173 +getpriority 141 +getrandom 278 +getresgid 150 +getresgid32 +getresuid 148 +getresuid32 +getrlimit +getrusage 165 +getsid 156 +getsockname 204 +getsockopt 209 +gettid 178 +gettimeofday +getuid 174 +getuid32 +getxattr 8 +getxgid +getxpid +getxuid +init_module 105 +inotify_add_watch 27 +inotify_init +inotify_init1 26 +inotify_rm_watch 28 +io_cancel 3 +io_destroy 1 +io_getevents +io_pgetevents +io_pgetevents_time64 416 +io_setup 0 +io_submit 2 +io_uring_enter 426 +io_uring_register 427 +io_uring_setup 425 +ioctl 29 +ioperm +iopl +ioprio_get 31 +ioprio_set 30 +ipc +kcmp 272 +kern_features +kexec_file_load 294 +kexec_load 104 +keyctl 219 +kill 129 +landlock_add_rule 445 +landlock_create_ruleset 444 +landlock_restrict_self 446 +lchown +lchown32 +lgetxattr 9 +link +linkat 37 +listen 201 +listxattr 11 +llistxattr 12 +lookup_dcookie 18 +lremovexattr 15 +lseek +lsetxattr 6 +lstat +lstat64 +madvise 233 +map_shadow_stack 453 +mbind 235 +membarrier 283 +memfd_create 279 +memfd_secret 447 +memory_ordering +migrate_pages 238 +mincore 232 +mkdir +mkdirat 34 +mknod +mknodat 33 +mlock 228 +mlock2 284 +mlockall 230 +mmap +mmap2 222 +modify_ldt +mount 40 +mount_setattr 442 +move_mount 429 +move_pages 239 +mprotect 226 +mq_getsetattr 185 +mq_notify 184 +mq_open 180 +mq_timedreceive +mq_timedreceive_time64 419 +mq_timedsend +mq_timedsend_time64 418 +mq_unlink 181 +mremap 216 +msgctl 187 +msgget 186 +msgrcv 188 +msgsnd 189 +msync 227 +multiplexer +munlock 229 +munlockall 231 +munmap 215 +name_to_handle_at 264 +nanosleep +newfstatat +nice +old_adjtimex +oldfstat +oldlstat +oldolduname +oldstat +oldumount +olduname +open +open_by_handle_at 265 +open_tree 428 +openat 56 +openat2 437 +or1k_atomic +osf_fstat +osf_fstatfs +osf_fstatfs64 +osf_getdirentries +osf_getdomainname +osf_getitimer +osf_getrusage +osf_getsysinfo +osf_gettimeofday +osf_lstat +osf_mount +osf_proplist_syscall +osf_select +osf_set_program_attributes +osf_setitimer +osf_setsysinfo +osf_settimeofday +osf_shmat +osf_sigprocmask +osf_sigstack +osf_stat +osf_statfs +osf_statfs64 +osf_swapon +osf_syscall +osf_sysinfo +osf_usleep_thread +osf_utimes +osf_utsname +osf_wait4 +pause +pciconfig_iobase +pciconfig_read +pciconfig_write +perf_event_open 241 +perfctr +personality 92 +pidfd_getfd 438 +pidfd_open 434 +pidfd_send_signal 424 +pipe +pipe2 59 +pivot_root 41 +pkey_alloc 289 +pkey_free 290 +pkey_mprotect 288 +poll +ppoll +ppoll_time64 414 +prctl 167 +pread64 67 +preadv 69 +preadv2 286 +prlimit64 261 +process_madvise 440 +process_mrelease 448 +process_vm_readv 270 +process_vm_writev 271 +pselect6 +pselect6_time64 413 +ptrace 117 +pwrite64 68 +pwritev 70 +pwritev2 287 +quotactl 60 +quotactl_fd 443 +read 63 +readahead 213 +readdir +readlink +readlinkat 78 +readv 65 +reboot 142 +recv +recvfrom 207 +recvmmsg +recvmmsg_time64 417 +recvmsg 212 +remap_file_pages 234 +removexattr 14 +rename +renameat +renameat2 276 +request_key 218 +restart_syscall 128 +riscv_flush_icache 259 +riscv_hwprobe 258 +rmdir +rseq 293 +rt_sigaction 134 +rt_sigpending 136 +rt_sigprocmask 135 +rt_sigqueueinfo 138 +rt_sigreturn 139 +rt_sigsuspend 133 +rt_sigtimedwait +rt_sigtimedwait_time64 421 +rt_tgsigqueueinfo 240 +rtas +s390_guarded_storage +s390_pci_mmio_read +s390_pci_mmio_write +s390_runtime_instr +s390_sthyi +sched_get_affinity +sched_get_priority_max 125 +sched_get_priority_min 126 +sched_getaffinity 123 +sched_getattr 275 +sched_getparam 121 +sched_getscheduler 120 +sched_rr_get_interval +sched_rr_get_interval_time64 423 +sched_set_affinity +sched_setaffinity 122 +sched_setattr 274 +sched_setparam 118 +sched_setscheduler 119 +sched_yield 124 +seccomp 277 +select +semctl 191 +semget 190 +semop 193 +semtimedop +semtimedop_time64 420 +send +sendfile +sendfile64 71 +sendmmsg 269 +sendmsg 211 +sendto 206 +set_mempolicy 237 +set_mempolicy_home_node 450 +set_robust_list 99 +set_thread_area +set_tid_address 96 +setdomainname 162 +setfsgid 152 +setfsgid32 +setfsuid 151 +setfsuid32 +setgid 144 +setgid32 +setgroups 159 +setgroups32 +sethae +sethostname 161 +setitimer 103 +setns 268 +setpgid 154 +setpgrp +setpriority 140 +setregid 143 +setregid32 +setresgid 149 +setresgid32 +setresuid 147 +setresuid32 +setreuid 145 +setreuid32 +setrlimit +setsid 157 +setsockopt 208 +settimeofday +setuid 146 +setuid32 +setxattr 5 +sgetmask +shmat 196 +shmctl 195 +shmdt 197 +shmget 194 +shutdown 210 +sigaction +sigaltstack 132 +signal +signalfd +signalfd4 74 +sigpending +sigprocmask +sigreturn +sigsuspend +socket 198 +socketcall +socketpair 199 +splice 76 +spu_create +spu_run +ssetmask +stat +stat64 +statfs +statfs64 43 +statx 291 +stime +subpage_prot +swapcontext +swapoff 225 +swapon 224 +switch_endian +symlink +symlinkat 36 +sync 81 +sync_file_range 84 +sync_file_range2 +syncfs 267 +sys_debug_setcontext +syscall +sysfs +sysinfo 179 +syslog 116 +sysmips +tee 77 +tgkill 131 +time +timer_create 107 +timer_delete 111 +timer_getoverrun 109 +timer_gettime +timer_gettime64 408 +timer_settime +timer_settime64 409 +timerfd +timerfd_create 85 +timerfd_gettime +timerfd_gettime64 410 +timerfd_settime +timerfd_settime64 411 +times 153 +tkill 130 +truncate +truncate64 45 +ugetrlimit +umask 166 +umount +umount2 39 +uname 160 +unlink +unlinkat 35 +unshare 97 +userfaultfd 282 +ustat +utime +utimensat +utimensat_time64 412 +utimes +utrap_install +vfork +vhangup 58 +vm86 +vm86old +vmsplice 75 +wait4 +waitid 95 +waitpid +write 64 +writev 66 diff --git a/src/basic/syscalls-riscv64.txt b/src/basic/syscalls-riscv64.txt new file mode 100644 index 0000000..13bfa5f --- /dev/null +++ b/src/basic/syscalls-riscv64.txt @@ -0,0 +1,514 @@ +_llseek +_newselect +accept 202 +accept4 242 +access +acct 89 +add_key 217 +adjtimex 171 +alarm +arc_gettls +arc_settls +arc_usr_cmpxchg +arch_prctl +arm_fadvise64_64 +atomic_barrier +atomic_cmpxchg_32 +bind 200 +bpf 280 +brk 214 +cachectl +cacheflush +capget 90 +capset 91 +chdir 49 +chmod +chown +chown32 +chroot 51 +clock_adjtime 266 +clock_adjtime64 +clock_getres 114 +clock_getres_time64 +clock_gettime 113 +clock_gettime64 +clock_nanosleep 115 +clock_nanosleep_time64 +clock_settime 112 +clock_settime64 +clone 220 +clone3 435 +close 57 +close_range 436 +connect 203 +copy_file_range 285 +creat +delete_module 106 +dipc +dup 23 +dup2 +dup3 24 +epoll_create +epoll_create1 20 +epoll_ctl 21 +epoll_ctl_old +epoll_pwait 22 +epoll_pwait2 441 +epoll_wait +epoll_wait_old +eventfd +eventfd2 19 +exec_with_loader +execv +execve 221 +execveat 281 +exit 93 +exit_group 94 +faccessat 48 +faccessat2 439 +fadvise64 223 +fadvise64_64 +fallocate 47 +fanotify_init 262 +fanotify_mark 263 +fchdir 50 +fchmod 52 +fchmodat 53 +fchmodat2 452 +fchown 55 +fchown32 +fchownat 54 +fcntl 25 +fcntl64 +fdatasync 83 +fgetxattr 10 +finit_module 273 +flistxattr 13 +flock 32 +fork +fremovexattr 16 +fsconfig 431 +fsetxattr 7 +fsmount 432 +fsopen 430 +fspick 433 +fstat 80 +fstat64 +fstatat64 +fstatfs 44 +fstatfs64 +fsync 82 +ftruncate 46 +ftruncate64 +futex 98 +futex_requeue 456 +futex_time64 +futex_wait 455 +futex_waitv 449 +futex_wake 454 +futimesat +get_mempolicy 236 +get_robust_list 100 +get_thread_area +getcpu 168 +getcwd 17 +getdents +getdents64 61 +getdomainname +getdtablesize +getegid 177 +getegid32 +geteuid 175 +geteuid32 +getgid 176 +getgid32 +getgroups 158 +getgroups32 +gethostname +getitimer 102 +getpagesize +getpeername 205 +getpgid 155 +getpgrp +getpid 172 +getppid 173 +getpriority 141 +getrandom 278 +getresgid 150 +getresgid32 +getresuid 148 +getresuid32 +getrlimit 163 +getrusage 165 +getsid 156 +getsockname 204 +getsockopt 209 +gettid 178 +gettimeofday 169 +getuid 174 +getuid32 +getxattr 8 +getxgid +getxpid +getxuid +init_module 105 +inotify_add_watch 27 +inotify_init +inotify_init1 26 +inotify_rm_watch 28 +io_cancel 3 +io_destroy 1 +io_getevents 4 +io_pgetevents 292 +io_pgetevents_time64 +io_setup 0 +io_submit 2 +io_uring_enter 426 +io_uring_register 427 +io_uring_setup 425 +ioctl 29 +ioperm +iopl +ioprio_get 31 +ioprio_set 30 +ipc +kcmp 272 +kern_features +kexec_file_load 294 +kexec_load 104 +keyctl 219 +kill 129 +landlock_add_rule 445 +landlock_create_ruleset 444 +landlock_restrict_self 446 +lchown +lchown32 +lgetxattr 9 +link +linkat 37 +listen 201 +listxattr 11 +llistxattr 12 +lookup_dcookie 18 +lremovexattr 15 +lseek 62 +lsetxattr 6 +lstat +lstat64 +madvise 233 +map_shadow_stack 453 +mbind 235 +membarrier 283 +memfd_create 279 +memfd_secret 447 +memory_ordering +migrate_pages 238 +mincore 232 +mkdir +mkdirat 34 +mknod +mknodat 33 +mlock 228 +mlock2 284 +mlockall 230 +mmap 222 +mmap2 +modify_ldt +mount 40 +mount_setattr 442 +move_mount 429 +move_pages 239 +mprotect 226 +mq_getsetattr 185 +mq_notify 184 +mq_open 180 +mq_timedreceive 183 +mq_timedreceive_time64 +mq_timedsend 182 +mq_timedsend_time64 +mq_unlink 181 +mremap 216 +msgctl 187 +msgget 186 +msgrcv 188 +msgsnd 189 +msync 227 +multiplexer +munlock 229 +munlockall 231 +munmap 215 +name_to_handle_at 264 +nanosleep 101 +newfstatat 79 +nice +old_adjtimex +oldfstat +oldlstat +oldolduname +oldstat +oldumount +olduname +open +open_by_handle_at 265 +open_tree 428 +openat 56 +openat2 437 +or1k_atomic +osf_fstat +osf_fstatfs +osf_fstatfs64 +osf_getdirentries +osf_getdomainname +osf_getitimer +osf_getrusage +osf_getsysinfo +osf_gettimeofday +osf_lstat +osf_mount +osf_proplist_syscall +osf_select +osf_set_program_attributes +osf_setitimer +osf_setsysinfo +osf_settimeofday +osf_shmat +osf_sigprocmask +osf_sigstack +osf_stat +osf_statfs +osf_statfs64 +osf_swapon +osf_syscall +osf_sysinfo +osf_usleep_thread +osf_utimes +osf_utsname +osf_wait4 +pause +pciconfig_iobase +pciconfig_read +pciconfig_write +perf_event_open 241 +perfctr +personality 92 +pidfd_getfd 438 +pidfd_open 434 +pidfd_send_signal 424 +pipe +pipe2 59 +pivot_root 41 +pkey_alloc 289 +pkey_free 290 +pkey_mprotect 288 +poll +ppoll 73 +ppoll_time64 +prctl 167 +pread64 67 +preadv 69 +preadv2 286 +prlimit64 261 +process_madvise 440 +process_mrelease 448 +process_vm_readv 270 +process_vm_writev 271 +pselect6 72 +pselect6_time64 +ptrace 117 +pwrite64 68 +pwritev 70 +pwritev2 287 +quotactl 60 +quotactl_fd 443 +read 63 +readahead 213 +readdir +readlink +readlinkat 78 +readv 65 +reboot 142 +recv +recvfrom 207 +recvmmsg 243 +recvmmsg_time64 +recvmsg 212 +remap_file_pages 234 +removexattr 14 +rename +renameat +renameat2 276 +request_key 218 +restart_syscall 128 +riscv_flush_icache 259 +riscv_hwprobe 258 +rmdir +rseq 293 +rt_sigaction 134 +rt_sigpending 136 +rt_sigprocmask 135 +rt_sigqueueinfo 138 +rt_sigreturn 139 +rt_sigsuspend 133 +rt_sigtimedwait 137 +rt_sigtimedwait_time64 +rt_tgsigqueueinfo 240 +rtas +s390_guarded_storage +s390_pci_mmio_read +s390_pci_mmio_write +s390_runtime_instr +s390_sthyi +sched_get_affinity +sched_get_priority_max 125 +sched_get_priority_min 126 +sched_getaffinity 123 +sched_getattr 275 +sched_getparam 121 +sched_getscheduler 120 +sched_rr_get_interval 127 +sched_rr_get_interval_time64 +sched_set_affinity +sched_setaffinity 122 +sched_setattr 274 +sched_setparam 118 +sched_setscheduler 119 +sched_yield 124 +seccomp 277 +select +semctl 191 +semget 190 +semop 193 +semtimedop 192 +semtimedop_time64 +send +sendfile 71 +sendfile64 +sendmmsg 269 +sendmsg 211 +sendto 206 +set_mempolicy 237 +set_mempolicy_home_node 450 +set_robust_list 99 +set_thread_area +set_tid_address 96 +setdomainname 162 +setfsgid 152 +setfsgid32 +setfsuid 151 +setfsuid32 +setgid 144 +setgid32 +setgroups 159 +setgroups32 +sethae +sethostname 161 +setitimer 103 +setns 268 +setpgid 154 +setpgrp +setpriority 140 +setregid 143 +setregid32 +setresgid 149 +setresgid32 +setresuid 147 +setresuid32 +setreuid 145 +setreuid32 +setrlimit 164 +setsid 157 +setsockopt 208 +settimeofday 170 +setuid 146 +setuid32 +setxattr 5 +sgetmask +shmat 196 +shmctl 195 +shmdt 197 +shmget 194 +shutdown 210 +sigaction +sigaltstack 132 +signal +signalfd +signalfd4 74 +sigpending +sigprocmask +sigreturn +sigsuspend +socket 198 +socketcall +socketpair 199 +splice 76 +spu_create +spu_run +ssetmask +stat +stat64 +statfs 43 +statfs64 +statx 291 +stime +subpage_prot +swapcontext +swapoff 225 +swapon 224 +switch_endian +symlink +symlinkat 36 +sync 81 +sync_file_range 84 +sync_file_range2 +syncfs 267 +sys_debug_setcontext +syscall +sysfs +sysinfo 179 +syslog 116 +sysmips +tee 77 +tgkill 131 +time +timer_create 107 +timer_delete 111 +timer_getoverrun 109 +timer_gettime 108 +timer_gettime64 +timer_settime 110 +timer_settime64 +timerfd +timerfd_create 85 +timerfd_gettime 87 +timerfd_gettime64 +timerfd_settime 86 +timerfd_settime64 +times 153 +tkill 130 +truncate 45 +truncate64 +ugetrlimit +umask 166 +umount +umount2 39 +uname 160 +unlink +unlinkat 35 +unshare 97 +userfaultfd 282 +ustat +utime +utimensat 88 +utimensat_time64 +utimes +utrap_install +vfork +vhangup 58 +vm86 +vm86old +vmsplice 75 +wait4 260 +waitid 95 +waitpid +write 64 +writev 66 diff --git a/src/basic/syscalls-s390.txt b/src/basic/syscalls-s390.txt new file mode 100644 index 0000000..901bed3 --- /dev/null +++ b/src/basic/syscalls-s390.txt @@ -0,0 +1,514 @@ +_llseek 140 +_newselect 142 +accept +accept4 364 +access 33 +acct 51 +add_key 278 +adjtimex 124 +alarm 27 +arc_gettls +arc_settls +arc_usr_cmpxchg +arch_prctl +arm_fadvise64_64 +atomic_barrier +atomic_cmpxchg_32 +bind 361 +bpf 351 +brk 45 +cachectl +cacheflush +capget 184 +capset 185 +chdir 12 +chmod 15 +chown 182 +chown32 212 +chroot 61 +clock_adjtime 337 +clock_adjtime64 405 +clock_getres 261 +clock_getres_time64 406 +clock_gettime 260 +clock_gettime64 403 +clock_nanosleep 262 +clock_nanosleep_time64 407 +clock_settime 259 +clock_settime64 404 +clone 120 +clone3 435 +close 6 +close_range 436 +connect 362 +copy_file_range 375 +creat 8 +delete_module 129 +dipc +dup 41 +dup2 63 +dup3 326 +epoll_create 249 +epoll_create1 327 +epoll_ctl 250 +epoll_ctl_old +epoll_pwait 312 +epoll_pwait2 441 +epoll_wait 251 +epoll_wait_old +eventfd 318 +eventfd2 323 +exec_with_loader +execv +execve 11 +execveat 354 +exit 1 +exit_group 248 +faccessat 300 +faccessat2 439 +fadvise64 253 +fadvise64_64 264 +fallocate 314 +fanotify_init 332 +fanotify_mark 333 +fchdir 133 +fchmod 94 +fchmodat 299 +fchmodat2 452 +fchown 95 +fchown32 207 +fchownat 291 +fcntl 55 +fcntl64 221 +fdatasync 148 +fgetxattr 229 +finit_module 344 +flistxattr 232 +flock 143 +fork 2 +fremovexattr 235 +fsconfig 431 +fsetxattr 226 +fsmount 432 +fsopen 430 +fspick 433 +fstat 108 +fstat64 197 +fstatat64 293 +fstatfs 100 +fstatfs64 266 +fsync 118 +ftruncate 93 +ftruncate64 194 +futex 238 +futex_requeue 456 +futex_time64 422 +futex_wait 455 +futex_waitv 449 +futex_wake 454 +futimesat 292 +get_mempolicy 269 +get_robust_list 305 +get_thread_area +getcpu 311 +getcwd 183 +getdents 141 +getdents64 220 +getdomainname +getdtablesize +getegid 50 +getegid32 202 +geteuid 49 +geteuid32 201 +getgid 47 +getgid32 200 +getgroups 80 +getgroups32 205 +gethostname +getitimer 105 +getpagesize +getpeername 368 +getpgid 132 +getpgrp 65 +getpid 20 +getppid 64 +getpriority 96 +getrandom 349 +getresgid 171 +getresgid32 211 +getresuid 165 +getresuid32 209 +getrlimit 76 +getrusage 77 +getsid 147 +getsockname 367 +getsockopt 365 +gettid 236 +gettimeofday 78 +getuid 24 +getuid32 199 +getxattr 227 +getxgid +getxpid +getxuid +init_module 128 +inotify_add_watch 285 +inotify_init 284 +inotify_init1 324 +inotify_rm_watch 286 +io_cancel 247 +io_destroy 244 +io_getevents 245 +io_pgetevents 382 +io_pgetevents_time64 416 +io_setup 243 +io_submit 246 +io_uring_enter 426 +io_uring_register 427 +io_uring_setup 425 +ioctl 54 +ioperm 101 +iopl +ioprio_get 283 +ioprio_set 282 +ipc 117 +kcmp 343 +kern_features +kexec_file_load 381 +kexec_load 277 +keyctl 280 +kill 37 +landlock_add_rule 445 +landlock_create_ruleset 444 +landlock_restrict_self 446 +lchown 16 +lchown32 198 +lgetxattr 228 +link 9 +linkat 296 +listen 363 +listxattr 230 +llistxattr 231 +lookup_dcookie 110 +lremovexattr 234 +lseek 19 +lsetxattr 225 +lstat 107 +lstat64 196 +madvise 219 +map_shadow_stack 453 +mbind 268 +membarrier 356 +memfd_create 350 +memfd_secret 447 +memory_ordering +migrate_pages 287 +mincore 218 +mkdir 39 +mkdirat 289 +mknod 14 +mknodat 290 +mlock 150 +mlock2 374 +mlockall 152 +mmap 90 +mmap2 192 +modify_ldt +mount 21 +mount_setattr 442 +move_mount 429 +move_pages 310 +mprotect 125 +mq_getsetattr 276 +mq_notify 275 +mq_open 271 +mq_timedreceive 274 +mq_timedreceive_time64 419 +mq_timedsend 273 +mq_timedsend_time64 418 +mq_unlink 272 +mremap 163 +msgctl 402 +msgget 399 +msgrcv 401 +msgsnd 400 +msync 144 +multiplexer +munlock 151 +munlockall 153 +munmap 91 +name_to_handle_at 335 +nanosleep 162 +newfstatat +nice 34 +old_adjtimex +oldfstat +oldlstat +oldolduname +oldstat +oldumount +olduname +open 5 +open_by_handle_at 336 +open_tree 428 +openat 288 +openat2 437 +or1k_atomic +osf_fstat +osf_fstatfs +osf_fstatfs64 +osf_getdirentries +osf_getdomainname +osf_getitimer +osf_getrusage +osf_getsysinfo +osf_gettimeofday +osf_lstat +osf_mount +osf_proplist_syscall +osf_select +osf_set_program_attributes +osf_setitimer +osf_setsysinfo +osf_settimeofday +osf_shmat +osf_sigprocmask +osf_sigstack +osf_stat +osf_statfs +osf_statfs64 +osf_swapon +osf_syscall +osf_sysinfo +osf_usleep_thread +osf_utimes +osf_utsname +osf_wait4 +pause 29 +pciconfig_iobase +pciconfig_read +pciconfig_write +perf_event_open 331 +perfctr +personality 136 +pidfd_getfd 438 +pidfd_open 434 +pidfd_send_signal 424 +pipe 42 +pipe2 325 +pivot_root 217 +pkey_alloc 385 +pkey_free 386 +pkey_mprotect 384 +poll 168 +ppoll 302 +ppoll_time64 414 +prctl 172 +pread64 180 +preadv 328 +preadv2 376 +prlimit64 334 +process_madvise 440 +process_mrelease 448 +process_vm_readv 340 +process_vm_writev 341 +pselect6 301 +pselect6_time64 413 +ptrace 26 +pwrite64 181 +pwritev 329 +pwritev2 377 +quotactl 131 +quotactl_fd 443 +read 3 +readahead 222 +readdir 89 +readlink 85 +readlinkat 298 +readv 145 +reboot 88 +recv +recvfrom 371 +recvmmsg 357 +recvmmsg_time64 417 +recvmsg 372 +remap_file_pages 267 +removexattr 233 +rename 38 +renameat 295 +renameat2 347 +request_key 279 +restart_syscall 7 +riscv_flush_icache +riscv_hwprobe +rmdir 40 +rseq 383 +rt_sigaction 174 +rt_sigpending 176 +rt_sigprocmask 175 +rt_sigqueueinfo 178 +rt_sigreturn 173 +rt_sigsuspend 179 +rt_sigtimedwait 177 +rt_sigtimedwait_time64 421 +rt_tgsigqueueinfo 330 +rtas +s390_guarded_storage 378 +s390_pci_mmio_read 353 +s390_pci_mmio_write 352 +s390_runtime_instr 342 +s390_sthyi 380 +sched_get_affinity +sched_get_priority_max 159 +sched_get_priority_min 160 +sched_getaffinity 240 +sched_getattr 346 +sched_getparam 155 +sched_getscheduler 157 +sched_rr_get_interval 161 +sched_rr_get_interval_time64 423 +sched_set_affinity +sched_setaffinity 239 +sched_setattr 345 +sched_setparam 154 +sched_setscheduler 156 +sched_yield 158 +seccomp 348 +select +semctl 394 +semget 393 +semop +semtimedop +semtimedop_time64 420 +send +sendfile 187 +sendfile64 223 +sendmmsg 358 +sendmsg 370 +sendto 369 +set_mempolicy 270 +set_mempolicy_home_node 450 +set_robust_list 304 +set_thread_area +set_tid_address 252 +setdomainname 121 +setfsgid 139 +setfsgid32 216 +setfsuid 138 +setfsuid32 215 +setgid 46 +setgid32 214 +setgroups 81 +setgroups32 206 +sethae +sethostname 74 +setitimer 104 +setns 339 +setpgid 57 +setpgrp +setpriority 97 +setregid 71 +setregid32 204 +setresgid 170 +setresgid32 210 +setresuid 164 +setresuid32 208 +setreuid 70 +setreuid32 203 +setrlimit 75 +setsid 66 +setsockopt 366 +settimeofday 79 +setuid 23 +setuid32 213 +setxattr 224 +sgetmask +shmat 397 +shmctl 396 +shmdt 398 +shmget 395 +shutdown 373 +sigaction 67 +sigaltstack 186 +signal 48 +signalfd 316 +signalfd4 322 +sigpending 73 +sigprocmask 126 +sigreturn 119 +sigsuspend 72 +socket 359 +socketcall 102 +socketpair 360 +splice 306 +spu_create +spu_run +ssetmask +stat 106 +stat64 195 +statfs 99 +statfs64 265 +statx 379 +stime 25 +subpage_prot +swapcontext +swapoff 115 +swapon 87 +switch_endian +symlink 83 +symlinkat 297 +sync 36 +sync_file_range 307 +sync_file_range2 +syncfs 338 +sys_debug_setcontext +syscall +sysfs 135 +sysinfo 116 +syslog 103 +sysmips +tee 308 +tgkill 241 +time 13 +timer_create 254 +timer_delete 258 +timer_getoverrun 257 +timer_gettime 256 +timer_gettime64 408 +timer_settime 255 +timer_settime64 409 +timerfd 317 +timerfd_create 319 +timerfd_gettime 321 +timerfd_gettime64 410 +timerfd_settime 320 +timerfd_settime64 411 +times 43 +tkill 237 +truncate 92 +truncate64 193 +ugetrlimit 191 +umask 60 +umount 22 +umount2 52 +uname 122 +unlink 10 +unlinkat 294 +unshare 303 +userfaultfd 355 +ustat 62 +utime 30 +utimensat 315 +utimensat_time64 412 +utimes 313 +utrap_install +vfork 190 +vhangup 111 +vm86 +vm86old +vmsplice 309 +wait4 114 +waitid 281 +waitpid +write 4 +writev 146 diff --git a/src/basic/syscalls-s390x.txt b/src/basic/syscalls-s390x.txt new file mode 100644 index 0000000..4f06b9b --- /dev/null +++ b/src/basic/syscalls-s390x.txt @@ -0,0 +1,514 @@ +_llseek +_newselect +accept +accept4 364 +access 33 +acct 51 +add_key 278 +adjtimex 124 +alarm 27 +arc_gettls +arc_settls +arc_usr_cmpxchg +arch_prctl +arm_fadvise64_64 +atomic_barrier +atomic_cmpxchg_32 +bind 361 +bpf 351 +brk 45 +cachectl +cacheflush +capget 184 +capset 185 +chdir 12 +chmod 15 +chown 212 +chown32 +chroot 61 +clock_adjtime 337 +clock_adjtime64 +clock_getres 261 +clock_getres_time64 +clock_gettime 260 +clock_gettime64 +clock_nanosleep 262 +clock_nanosleep_time64 +clock_settime 259 +clock_settime64 +clone 120 +clone3 435 +close 6 +close_range 436 +connect 362 +copy_file_range 375 +creat 8 +delete_module 129 +dipc +dup 41 +dup2 63 +dup3 326 +epoll_create 249 +epoll_create1 327 +epoll_ctl 250 +epoll_ctl_old +epoll_pwait 312 +epoll_pwait2 441 +epoll_wait 251 +epoll_wait_old +eventfd 318 +eventfd2 323 +exec_with_loader +execv +execve 11 +execveat 354 +exit 1 +exit_group 248 +faccessat 300 +faccessat2 439 +fadvise64 253 +fadvise64_64 +fallocate 314 +fanotify_init 332 +fanotify_mark 333 +fchdir 133 +fchmod 94 +fchmodat 299 +fchmodat2 452 +fchown 207 +fchown32 +fchownat 291 +fcntl 55 +fcntl64 +fdatasync 148 +fgetxattr 229 +finit_module 344 +flistxattr 232 +flock 143 +fork 2 +fremovexattr 235 +fsconfig 431 +fsetxattr 226 +fsmount 432 +fsopen 430 +fspick 433 +fstat 108 +fstat64 +fstatat64 +fstatfs 100 +fstatfs64 266 +fsync 118 +ftruncate 93 +ftruncate64 +futex 238 +futex_requeue 456 +futex_time64 +futex_wait 455 +futex_waitv 449 +futex_wake 454 +futimesat 292 +get_mempolicy 269 +get_robust_list 305 +get_thread_area +getcpu 311 +getcwd 183 +getdents 141 +getdents64 220 +getdomainname +getdtablesize +getegid 202 +getegid32 +geteuid 201 +geteuid32 +getgid 200 +getgid32 +getgroups 205 +getgroups32 +gethostname +getitimer 105 +getpagesize +getpeername 368 +getpgid 132 +getpgrp 65 +getpid 20 +getppid 64 +getpriority 96 +getrandom 349 +getresgid 211 +getresgid32 +getresuid 209 +getresuid32 +getrlimit 191 +getrusage 77 +getsid 147 +getsockname 367 +getsockopt 365 +gettid 236 +gettimeofday 78 +getuid 199 +getuid32 +getxattr 227 +getxgid +getxpid +getxuid +init_module 128 +inotify_add_watch 285 +inotify_init 284 +inotify_init1 324 +inotify_rm_watch 286 +io_cancel 247 +io_destroy 244 +io_getevents 245 +io_pgetevents 382 +io_pgetevents_time64 +io_setup 243 +io_submit 246 +io_uring_enter 426 +io_uring_register 427 +io_uring_setup 425 +ioctl 54 +ioperm +iopl +ioprio_get 283 +ioprio_set 282 +ipc 117 +kcmp 343 +kern_features +kexec_file_load 381 +kexec_load 277 +keyctl 280 +kill 37 +landlock_add_rule 445 +landlock_create_ruleset 444 +landlock_restrict_self 446 +lchown 198 +lchown32 +lgetxattr 228 +link 9 +linkat 296 +listen 363 +listxattr 230 +llistxattr 231 +lookup_dcookie 110 +lremovexattr 234 +lseek 19 +lsetxattr 225 +lstat 107 +lstat64 +madvise 219 +map_shadow_stack 453 +mbind 268 +membarrier 356 +memfd_create 350 +memfd_secret 447 +memory_ordering +migrate_pages 287 +mincore 218 +mkdir 39 +mkdirat 289 +mknod 14 +mknodat 290 +mlock 150 +mlock2 374 +mlockall 152 +mmap 90 +mmap2 +modify_ldt +mount 21 +mount_setattr 442 +move_mount 429 +move_pages 310 +mprotect 125 +mq_getsetattr 276 +mq_notify 275 +mq_open 271 +mq_timedreceive 274 +mq_timedreceive_time64 +mq_timedsend 273 +mq_timedsend_time64 +mq_unlink 272 +mremap 163 +msgctl 402 +msgget 399 +msgrcv 401 +msgsnd 400 +msync 144 +multiplexer +munlock 151 +munlockall 153 +munmap 91 +name_to_handle_at 335 +nanosleep 162 +newfstatat 293 +nice 34 +old_adjtimex +oldfstat +oldlstat +oldolduname +oldstat +oldumount +olduname +open 5 +open_by_handle_at 336 +open_tree 428 +openat 288 +openat2 437 +or1k_atomic +osf_fstat +osf_fstatfs +osf_fstatfs64 +osf_getdirentries +osf_getdomainname +osf_getitimer +osf_getrusage +osf_getsysinfo +osf_gettimeofday +osf_lstat +osf_mount +osf_proplist_syscall +osf_select +osf_set_program_attributes +osf_setitimer +osf_setsysinfo +osf_settimeofday +osf_shmat +osf_sigprocmask +osf_sigstack +osf_stat +osf_statfs +osf_statfs64 +osf_swapon +osf_syscall +osf_sysinfo +osf_usleep_thread +osf_utimes +osf_utsname +osf_wait4 +pause 29 +pciconfig_iobase +pciconfig_read +pciconfig_write +perf_event_open 331 +perfctr +personality 136 +pidfd_getfd 438 +pidfd_open 434 +pidfd_send_signal 424 +pipe 42 +pipe2 325 +pivot_root 217 +pkey_alloc 385 +pkey_free 386 +pkey_mprotect 384 +poll 168 +ppoll 302 +ppoll_time64 +prctl 172 +pread64 180 +preadv 328 +preadv2 376 +prlimit64 334 +process_madvise 440 +process_mrelease 448 +process_vm_readv 340 +process_vm_writev 341 +pselect6 301 +pselect6_time64 +ptrace 26 +pwrite64 181 +pwritev 329 +pwritev2 377 +quotactl 131 +quotactl_fd 443 +read 3 +readahead 222 +readdir 89 +readlink 85 +readlinkat 298 +readv 145 +reboot 88 +recv +recvfrom 371 +recvmmsg 357 +recvmmsg_time64 +recvmsg 372 +remap_file_pages 267 +removexattr 233 +rename 38 +renameat 295 +renameat2 347 +request_key 279 +restart_syscall 7 +riscv_flush_icache +riscv_hwprobe +rmdir 40 +rseq 383 +rt_sigaction 174 +rt_sigpending 176 +rt_sigprocmask 175 +rt_sigqueueinfo 178 +rt_sigreturn 173 +rt_sigsuspend 179 +rt_sigtimedwait 177 +rt_sigtimedwait_time64 +rt_tgsigqueueinfo 330 +rtas +s390_guarded_storage 378 +s390_pci_mmio_read 353 +s390_pci_mmio_write 352 +s390_runtime_instr 342 +s390_sthyi 380 +sched_get_affinity +sched_get_priority_max 159 +sched_get_priority_min 160 +sched_getaffinity 240 +sched_getattr 346 +sched_getparam 155 +sched_getscheduler 157 +sched_rr_get_interval 161 +sched_rr_get_interval_time64 +sched_set_affinity +sched_setaffinity 239 +sched_setattr 345 +sched_setparam 154 +sched_setscheduler 156 +sched_yield 158 +seccomp 348 +select 142 +semctl 394 +semget 393 +semop +semtimedop 392 +semtimedop_time64 +send +sendfile 187 +sendfile64 +sendmmsg 358 +sendmsg 370 +sendto 369 +set_mempolicy 270 +set_mempolicy_home_node 450 +set_robust_list 304 +set_thread_area +set_tid_address 252 +setdomainname 121 +setfsgid 216 +setfsgid32 +setfsuid 215 +setfsuid32 +setgid 214 +setgid32 +setgroups 206 +setgroups32 +sethae +sethostname 74 +setitimer 104 +setns 339 +setpgid 57 +setpgrp +setpriority 97 +setregid 204 +setregid32 +setresgid 210 +setresgid32 +setresuid 208 +setresuid32 +setreuid 203 +setreuid32 +setrlimit 75 +setsid 66 +setsockopt 366 +settimeofday 79 +setuid 213 +setuid32 +setxattr 224 +sgetmask +shmat 397 +shmctl 396 +shmdt 398 +shmget 395 +shutdown 373 +sigaction 67 +sigaltstack 186 +signal 48 +signalfd 316 +signalfd4 322 +sigpending 73 +sigprocmask 126 +sigreturn 119 +sigsuspend 72 +socket 359 +socketcall 102 +socketpair 360 +splice 306 +spu_create +spu_run +ssetmask +stat 106 +stat64 +statfs 99 +statfs64 265 +statx 379 +stime +subpage_prot +swapcontext +swapoff 115 +swapon 87 +switch_endian +symlink 83 +symlinkat 297 +sync 36 +sync_file_range 307 +sync_file_range2 +syncfs 338 +sys_debug_setcontext +syscall +sysfs 135 +sysinfo 116 +syslog 103 +sysmips +tee 308 +tgkill 241 +time +timer_create 254 +timer_delete 258 +timer_getoverrun 257 +timer_gettime 256 +timer_gettime64 +timer_settime 255 +timer_settime64 +timerfd 317 +timerfd_create 319 +timerfd_gettime 321 +timerfd_gettime64 +timerfd_settime 320 +timerfd_settime64 +times 43 +tkill 237 +truncate 92 +truncate64 +ugetrlimit +umask 60 +umount 22 +umount2 52 +uname 122 +unlink 10 +unlinkat 294 +unshare 303 +userfaultfd 355 +ustat 62 +utime 30 +utimensat 315 +utimensat_time64 +utimes 313 +utrap_install +vfork 190 +vhangup 111 +vm86 +vm86old +vmsplice 309 +wait4 114 +waitid 281 +waitpid +write 4 +writev 146 diff --git a/src/basic/syscalls-sparc.txt b/src/basic/syscalls-sparc.txt new file mode 100644 index 0000000..cfaed82 --- /dev/null +++ b/src/basic/syscalls-sparc.txt @@ -0,0 +1,514 @@ +_llseek 236 +_newselect 230 +accept 99 +accept4 323 +access 33 +acct 51 +add_key 281 +adjtimex 219 +alarm 27 +arc_gettls +arc_settls +arc_usr_cmpxchg +arch_prctl +arm_fadvise64_64 +atomic_barrier +atomic_cmpxchg_32 +bind 353 +bpf 349 +brk 17 +cachectl +cacheflush +capget 21 +capset 22 +chdir 12 +chmod 15 +chown 13 +chown32 35 +chroot 61 +clock_adjtime 334 +clock_adjtime64 405 +clock_getres 258 +clock_getres_time64 406 +clock_gettime 257 +clock_gettime64 403 +clock_nanosleep 259 +clock_nanosleep_time64 407 +clock_settime 256 +clock_settime64 404 +clone 217 +clone3 +close 6 +close_range 436 +connect 98 +copy_file_range 357 +creat 8 +delete_module 222 +dipc +dup 41 +dup2 90 +dup3 320 +epoll_create 193 +epoll_create1 319 +epoll_ctl 194 +epoll_ctl_old +epoll_pwait 309 +epoll_pwait2 441 +epoll_wait 195 +epoll_wait_old +eventfd 313 +eventfd2 318 +exec_with_loader +execv 11 +execve 59 +execveat 350 +exit 1 +exit_group 188 +faccessat 296 +faccessat2 439 +fadvise64 209 +fadvise64_64 210 +fallocate 314 +fanotify_init 329 +fanotify_mark 330 +fchdir 176 +fchmod 124 +fchmodat 295 +fchmodat2 452 +fchown 123 +fchown32 32 +fchownat 287 +fcntl 92 +fcntl64 155 +fdatasync 253 +fgetxattr 177 +finit_module 342 +flistxattr 180 +flock 131 +fork 2 +fremovexattr 186 +fsconfig 431 +fsetxattr 171 +fsmount 432 +fsopen 430 +fspick 433 +fstat 62 +fstat64 63 +fstatat64 289 +fstatfs 158 +fstatfs64 235 +fsync 95 +ftruncate 130 +ftruncate64 84 +futex 142 +futex_requeue 456 +futex_time64 422 +futex_wait 455 +futex_waitv 449 +futex_wake 454 +futimesat 288 +get_mempolicy 304 +get_robust_list 301 +get_thread_area +getcpu 308 +getcwd 119 +getdents 174 +getdents64 154 +getdomainname 162 +getdtablesize +getegid 50 +getegid32 70 +geteuid 49 +geteuid32 69 +getgid 47 +getgid32 53 +getgroups 79 +getgroups32 115 +gethostname +getitimer 86 +getpagesize 64 +getpeername 141 +getpgid 224 +getpgrp 81 +getpid 20 +getppid 197 +getpriority 100 +getrandom 347 +getresgid +getresgid32 111 +getresuid +getresuid32 109 +getrlimit 144 +getrusage 117 +getsid 252 +getsockname 150 +getsockopt 118 +gettid 143 +gettimeofday 116 +getuid 24 +getuid32 44 +getxattr 172 +getxgid +getxpid +getxuid +init_module 190 +inotify_add_watch 152 +inotify_init 151 +inotify_init1 322 +inotify_rm_watch 156 +io_cancel 271 +io_destroy 269 +io_getevents 272 +io_pgetevents 361 +io_pgetevents_time64 416 +io_setup 268 +io_submit 270 +io_uring_enter 426 +io_uring_register 427 +io_uring_setup 425 +ioctl 54 +ioperm +iopl +ioprio_get 218 +ioprio_set 196 +ipc 215 +kcmp 341 +kern_features 340 +kexec_file_load +kexec_load 306 +keyctl 283 +kill 37 +landlock_add_rule 445 +landlock_create_ruleset 444 +landlock_restrict_self 446 +lchown 16 +lchown32 31 +lgetxattr 173 +link 9 +linkat 292 +listen 354 +listxattr 178 +llistxattr 179 +lookup_dcookie 208 +lremovexattr 182 +lseek 19 +lsetxattr 170 +lstat 40 +lstat64 132 +madvise 75 +map_shadow_stack 453 +mbind 303 +membarrier 351 +memfd_create 348 +memfd_secret +memory_ordering +migrate_pages 302 +mincore 78 +mkdir 136 +mkdirat 285 +mknod 14 +mknodat 286 +mlock 237 +mlock2 356 +mlockall 239 +mmap 71 +mmap2 56 +modify_ldt +mount 167 +mount_setattr 442 +move_mount 429 +move_pages 307 +mprotect 74 +mq_getsetattr 278 +mq_notify 277 +mq_open 273 +mq_timedreceive 276 +mq_timedreceive_time64 419 +mq_timedsend 275 +mq_timedsend_time64 418 +mq_unlink 274 +mremap 250 +msgctl 402 +msgget 399 +msgrcv 401 +msgsnd 400 +msync 65 +multiplexer +munlock 238 +munlockall 240 +munmap 73 +name_to_handle_at 332 +nanosleep 249 +newfstatat +nice 34 +old_adjtimex +oldfstat +oldlstat 202 +oldolduname +oldstat +oldumount +olduname +open 5 +open_by_handle_at 333 +open_tree 428 +openat 284 +openat2 437 +or1k_atomic +osf_fstat +osf_fstatfs +osf_fstatfs64 +osf_getdirentries +osf_getdomainname +osf_getitimer +osf_getrusage +osf_getsysinfo +osf_gettimeofday +osf_lstat +osf_mount +osf_proplist_syscall +osf_select +osf_set_program_attributes +osf_setitimer +osf_setsysinfo +osf_settimeofday +osf_shmat +osf_sigprocmask +osf_sigstack +osf_stat +osf_statfs +osf_statfs64 +osf_swapon +osf_syscall +osf_sysinfo +osf_usleep_thread +osf_utimes +osf_utsname +osf_wait4 +pause 29 +pciconfig_iobase +pciconfig_read 148 +pciconfig_write 149 +perf_event_open 327 +perfctr 18 +personality 191 +pidfd_getfd 438 +pidfd_open 434 +pidfd_send_signal 424 +pipe 42 +pipe2 321 +pivot_root 146 +pkey_alloc 363 +pkey_free 364 +pkey_mprotect 362 +poll 153 +ppoll 298 +ppoll_time64 414 +prctl 147 +pread64 67 +preadv 324 +preadv2 358 +prlimit64 331 +process_madvise 440 +process_mrelease 448 +process_vm_readv 338 +process_vm_writev 339 +pselect6 297 +pselect6_time64 413 +ptrace 26 +pwrite64 68 +pwritev 325 +pwritev2 359 +quotactl 165 +quotactl_fd 443 +read 3 +readahead 205 +readdir 204 +readlink 58 +readlinkat 294 +readv 120 +reboot 55 +recv +recvfrom 125 +recvmmsg 328 +recvmmsg_time64 417 +recvmsg 113 +remap_file_pages 192 +removexattr 181 +rename 128 +renameat 291 +renameat2 345 +request_key 282 +restart_syscall 0 +riscv_flush_icache +riscv_hwprobe +rmdir 137 +rseq 365 +rt_sigaction 102 +rt_sigpending 104 +rt_sigprocmask 103 +rt_sigqueueinfo 106 +rt_sigreturn 101 +rt_sigsuspend 107 +rt_sigtimedwait 105 +rt_sigtimedwait_time64 421 +rt_tgsigqueueinfo 326 +rtas +s390_guarded_storage +s390_pci_mmio_read +s390_pci_mmio_write +s390_runtime_instr +s390_sthyi +sched_get_affinity 161 +sched_get_priority_max 246 +sched_get_priority_min 247 +sched_getaffinity 260 +sched_getattr 344 +sched_getparam 242 +sched_getscheduler 244 +sched_rr_get_interval 248 +sched_rr_get_interval_time64 423 +sched_set_affinity 160 +sched_setaffinity 261 +sched_setattr 343 +sched_setparam 241 +sched_setscheduler 243 +sched_yield 245 +seccomp 346 +select 93 +semctl 394 +semget 393 +semop +semtimedop +semtimedop_time64 420 +send +sendfile 39 +sendfile64 140 +sendmmsg 336 +sendmsg 114 +sendto 133 +set_mempolicy 305 +set_mempolicy_home_node 450 +set_robust_list 300 +set_thread_area +set_tid_address 166 +setdomainname 163 +setfsgid 229 +setfsgid32 94 +setfsuid 228 +setfsuid32 91 +setgid 46 +setgid32 89 +setgroups 80 +setgroups32 82 +sethae +sethostname 88 +setitimer 83 +setns 337 +setpgid 185 +setpgrp +setpriority 96 +setregid 127 +setregid32 112 +setresgid +setresgid32 110 +setresuid +setresuid32 108 +setreuid 126 +setreuid32 72 +setrlimit 145 +setsid 175 +setsockopt 355 +settimeofday 122 +setuid 23 +setuid32 87 +setxattr 169 +sgetmask 199 +shmat 397 +shmctl 396 +shmdt 398 +shmget 395 +shutdown 134 +sigaction 198 +sigaltstack 28 +signal 48 +signalfd 311 +signalfd4 317 +sigpending 183 +sigprocmask 220 +sigreturn 216 +sigsuspend 201 +socket 97 +socketcall 206 +socketpair 135 +splice 232 +spu_create +spu_run +ssetmask 200 +stat 38 +stat64 139 +statfs 157 +statfs64 234 +statx 360 +stime 233 +subpage_prot +swapcontext +swapoff 213 +swapon 85 +switch_endian +symlink 57 +symlinkat 293 +sync 36 +sync_file_range 255 +sync_file_range2 +syncfs 335 +sys_debug_setcontext +syscall +sysfs 226 +sysinfo 214 +syslog 207 +sysmips +tee 280 +tgkill 211 +time 231 +timer_create 266 +timer_delete 265 +timer_getoverrun 264 +timer_gettime 263 +timer_gettime64 408 +timer_settime 262 +timer_settime64 409 +timerfd +timerfd_create 312 +timerfd_gettime 316 +timerfd_gettime64 410 +timerfd_settime 315 +timerfd_settime64 411 +times 43 +tkill 187 +truncate 129 +truncate64 77 +ugetrlimit +umask 60 +umount 159 +umount2 45 +uname 189 +unlink 10 +unlinkat 290 +unshare 299 +userfaultfd 352 +ustat 168 +utime 30 +utimensat 310 +utimensat_time64 412 +utimes 138 +utrap_install +vfork 66 +vhangup 76 +vm86 +vm86old +vmsplice 25 +wait4 7 +waitid 279 +waitpid 212 +write 4 +writev 121 diff --git a/src/basic/syscalls-x86_64.txt b/src/basic/syscalls-x86_64.txt new file mode 100644 index 0000000..cf888e8 --- /dev/null +++ b/src/basic/syscalls-x86_64.txt @@ -0,0 +1,514 @@ +_llseek +_newselect +accept 43 +accept4 288 +access 21 +acct 163 +add_key 248 +adjtimex 159 +alarm 37 +arc_gettls +arc_settls +arc_usr_cmpxchg +arch_prctl 158 +arm_fadvise64_64 +atomic_barrier +atomic_cmpxchg_32 +bind 49 +bpf 321 +brk 12 +cachectl +cacheflush +capget 125 +capset 126 +chdir 80 +chmod 90 +chown 92 +chown32 +chroot 161 +clock_adjtime 305 +clock_adjtime64 +clock_getres 229 +clock_getres_time64 +clock_gettime 228 +clock_gettime64 +clock_nanosleep 230 +clock_nanosleep_time64 +clock_settime 227 +clock_settime64 +clone 56 +clone3 435 +close 3 +close_range 436 +connect 42 +copy_file_range 326 +creat 85 +delete_module 176 +dipc +dup 32 +dup2 33 +dup3 292 +epoll_create 213 +epoll_create1 291 +epoll_ctl 233 +epoll_ctl_old 214 +epoll_pwait 281 +epoll_pwait2 441 +epoll_wait 232 +epoll_wait_old 215 +eventfd 284 +eventfd2 290 +exec_with_loader +execv +execve 59 +execveat 322 +exit 60 +exit_group 231 +faccessat 269 +faccessat2 439 +fadvise64 221 +fadvise64_64 +fallocate 285 +fanotify_init 300 +fanotify_mark 301 +fchdir 81 +fchmod 91 +fchmodat 268 +fchmodat2 452 +fchown 93 +fchown32 +fchownat 260 +fcntl 72 +fcntl64 +fdatasync 75 +fgetxattr 193 +finit_module 313 +flistxattr 196 +flock 73 +fork 57 +fremovexattr 199 +fsconfig 431 +fsetxattr 190 +fsmount 432 +fsopen 430 +fspick 433 +fstat 5 +fstat64 +fstatat64 +fstatfs 138 +fstatfs64 +fsync 74 +ftruncate 77 +ftruncate64 +futex 202 +futex_requeue 456 +futex_time64 +futex_wait 455 +futex_waitv 449 +futex_wake 454 +futimesat 261 +get_mempolicy 239 +get_robust_list 274 +get_thread_area 211 +getcpu 309 +getcwd 79 +getdents 78 +getdents64 217 +getdomainname +getdtablesize +getegid 108 +getegid32 +geteuid 107 +geteuid32 +getgid 104 +getgid32 +getgroups 115 +getgroups32 +gethostname +getitimer 36 +getpagesize +getpeername 52 +getpgid 121 +getpgrp 111 +getpid 39 +getppid 110 +getpriority 140 +getrandom 318 +getresgid 120 +getresgid32 +getresuid 118 +getresuid32 +getrlimit 97 +getrusage 98 +getsid 124 +getsockname 51 +getsockopt 55 +gettid 186 +gettimeofday 96 +getuid 102 +getuid32 +getxattr 191 +getxgid +getxpid +getxuid +init_module 175 +inotify_add_watch 254 +inotify_init 253 +inotify_init1 294 +inotify_rm_watch 255 +io_cancel 210 +io_destroy 207 +io_getevents 208 +io_pgetevents 333 +io_pgetevents_time64 +io_setup 206 +io_submit 209 +io_uring_enter 426 +io_uring_register 427 +io_uring_setup 425 +ioctl 16 +ioperm 173 +iopl 172 +ioprio_get 252 +ioprio_set 251 +ipc +kcmp 312 +kern_features +kexec_file_load 320 +kexec_load 246 +keyctl 250 +kill 62 +landlock_add_rule 445 +landlock_create_ruleset 444 +landlock_restrict_self 446 +lchown 94 +lchown32 +lgetxattr 192 +link 86 +linkat 265 +listen 50 +listxattr 194 +llistxattr 195 +lookup_dcookie 212 +lremovexattr 198 +lseek 8 +lsetxattr 189 +lstat 6 +lstat64 +madvise 28 +map_shadow_stack 453 +mbind 237 +membarrier 324 +memfd_create 319 +memfd_secret 447 +memory_ordering +migrate_pages 256 +mincore 27 +mkdir 83 +mkdirat 258 +mknod 133 +mknodat 259 +mlock 149 +mlock2 325 +mlockall 151 +mmap 9 +mmap2 +modify_ldt 154 +mount 165 +mount_setattr 442 +move_mount 429 +move_pages 279 +mprotect 10 +mq_getsetattr 245 +mq_notify 244 +mq_open 240 +mq_timedreceive 243 +mq_timedreceive_time64 +mq_timedsend 242 +mq_timedsend_time64 +mq_unlink 241 +mremap 25 +msgctl 71 +msgget 68 +msgrcv 70 +msgsnd 69 +msync 26 +multiplexer +munlock 150 +munlockall 152 +munmap 11 +name_to_handle_at 303 +nanosleep 35 +newfstatat 262 +nice +old_adjtimex +oldfstat +oldlstat +oldolduname +oldstat +oldumount +olduname +open 2 +open_by_handle_at 304 +open_tree 428 +openat 257 +openat2 437 +or1k_atomic +osf_fstat +osf_fstatfs +osf_fstatfs64 +osf_getdirentries +osf_getdomainname +osf_getitimer +osf_getrusage +osf_getsysinfo +osf_gettimeofday +osf_lstat +osf_mount +osf_proplist_syscall +osf_select +osf_set_program_attributes +osf_setitimer +osf_setsysinfo +osf_settimeofday +osf_shmat +osf_sigprocmask +osf_sigstack +osf_stat +osf_statfs +osf_statfs64 +osf_swapon +osf_syscall +osf_sysinfo +osf_usleep_thread +osf_utimes +osf_utsname +osf_wait4 +pause 34 +pciconfig_iobase +pciconfig_read +pciconfig_write +perf_event_open 298 +perfctr +personality 135 +pidfd_getfd 438 +pidfd_open 434 +pidfd_send_signal 424 +pipe 22 +pipe2 293 +pivot_root 155 +pkey_alloc 330 +pkey_free 331 +pkey_mprotect 329 +poll 7 +ppoll 271 +ppoll_time64 +prctl 157 +pread64 17 +preadv 295 +preadv2 327 +prlimit64 302 +process_madvise 440 +process_mrelease 448 +process_vm_readv 310 +process_vm_writev 311 +pselect6 270 +pselect6_time64 +ptrace 101 +pwrite64 18 +pwritev 296 +pwritev2 328 +quotactl 179 +quotactl_fd 443 +read 0 +readahead 187 +readdir +readlink 89 +readlinkat 267 +readv 19 +reboot 169 +recv +recvfrom 45 +recvmmsg 299 +recvmmsg_time64 +recvmsg 47 +remap_file_pages 216 +removexattr 197 +rename 82 +renameat 264 +renameat2 316 +request_key 249 +restart_syscall 219 +riscv_flush_icache +riscv_hwprobe +rmdir 84 +rseq 334 +rt_sigaction 13 +rt_sigpending 127 +rt_sigprocmask 14 +rt_sigqueueinfo 129 +rt_sigreturn 15 +rt_sigsuspend 130 +rt_sigtimedwait 128 +rt_sigtimedwait_time64 +rt_tgsigqueueinfo 297 +rtas +s390_guarded_storage +s390_pci_mmio_read +s390_pci_mmio_write +s390_runtime_instr +s390_sthyi +sched_get_affinity +sched_get_priority_max 146 +sched_get_priority_min 147 +sched_getaffinity 204 +sched_getattr 315 +sched_getparam 143 +sched_getscheduler 145 +sched_rr_get_interval 148 +sched_rr_get_interval_time64 +sched_set_affinity +sched_setaffinity 203 +sched_setattr 314 +sched_setparam 142 +sched_setscheduler 144 +sched_yield 24 +seccomp 317 +select 23 +semctl 66 +semget 64 +semop 65 +semtimedop 220 +semtimedop_time64 +send +sendfile 40 +sendfile64 +sendmmsg 307 +sendmsg 46 +sendto 44 +set_mempolicy 238 +set_mempolicy_home_node 450 +set_robust_list 273 +set_thread_area 205 +set_tid_address 218 +setdomainname 171 +setfsgid 123 +setfsgid32 +setfsuid 122 +setfsuid32 +setgid 106 +setgid32 +setgroups 116 +setgroups32 +sethae +sethostname 170 +setitimer 38 +setns 308 +setpgid 109 +setpgrp +setpriority 141 +setregid 114 +setregid32 +setresgid 119 +setresgid32 +setresuid 117 +setresuid32 +setreuid 113 +setreuid32 +setrlimit 160 +setsid 112 +setsockopt 54 +settimeofday 164 +setuid 105 +setuid32 +setxattr 188 +sgetmask +shmat 30 +shmctl 31 +shmdt 67 +shmget 29 +shutdown 48 +sigaction +sigaltstack 131 +signal +signalfd 282 +signalfd4 289 +sigpending +sigprocmask +sigreturn +sigsuspend +socket 41 +socketcall +socketpair 53 +splice 275 +spu_create +spu_run +ssetmask +stat 4 +stat64 +statfs 137 +statfs64 +statx 332 +stime +subpage_prot +swapcontext +swapoff 168 +swapon 167 +switch_endian +symlink 88 +symlinkat 266 +sync 162 +sync_file_range 277 +sync_file_range2 +syncfs 306 +sys_debug_setcontext +syscall +sysfs 139 +sysinfo 99 +syslog 103 +sysmips +tee 276 +tgkill 234 +time 201 +timer_create 222 +timer_delete 226 +timer_getoverrun 225 +timer_gettime 224 +timer_gettime64 +timer_settime 223 +timer_settime64 +timerfd +timerfd_create 283 +timerfd_gettime 287 +timerfd_gettime64 +timerfd_settime 286 +timerfd_settime64 +times 100 +tkill 200 +truncate 76 +truncate64 +ugetrlimit +umask 95 +umount +umount2 166 +uname 63 +unlink 87 +unlinkat 263 +unshare 272 +userfaultfd 323 +ustat 136 +utime 132 +utimensat 280 +utimensat_time64 +utimes 235 +utrap_install +vfork 58 +vhangup 153 +vm86 +vm86old +vmsplice 278 +wait4 61 +waitid 247 +waitpid +write 1 +writev 20 diff --git a/src/basic/sysctl-util.c b/src/basic/sysctl-util.c new file mode 100644 index 0000000..b66a662 --- /dev/null +++ b/src/basic/sysctl-util.c @@ -0,0 +1,137 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include +#include +#include +#include + +#include "af-list.h" +#include "fd-util.h" +#include "fileio.h" +#include "log.h" +#include "macro.h" +#include "path-util.h" +#include "socket-util.h" +#include "string-util.h" +#include "sysctl-util.h" + +char *sysctl_normalize(char *s) { + char *n; + + n = strpbrk(s, "/."); + + /* If the first separator is a slash, the path is + * assumed to be normalized and slashes remain slashes + * and dots remains dots. */ + + if (n && *n == '.') + /* Dots become slashes and slashes become dots. Fun. */ + do { + if (*n == '.') + *n = '/'; + else + *n = '.'; + + n = strpbrk(n + 1, "/."); + } while (n); + + path_simplify(s); + + /* Kill the leading slash, but keep the first character of the string in the same place. */ + if (s[0] == '/' && s[1] != 0) + memmove(s, s+1, strlen(s)); + + return s; +} + +int sysctl_write(const char *property, const char *value) { + char *p; + + assert(property); + assert(value); + + p = strjoina("/proc/sys/", property); + + path_simplify(p); + if (!path_is_normalized(p)) + return -EINVAL; + + log_debug("Setting '%s' to '%s'", p, value); + + return write_string_file(p, value, WRITE_STRING_FILE_VERIFY_ON_FAILURE | WRITE_STRING_FILE_DISABLE_BUFFER | WRITE_STRING_FILE_SUPPRESS_REDUNDANT_VIRTUAL); +} + +int sysctl_writef(const char *property, const char *format, ...) { + _cleanup_free_ char *v = NULL; + va_list ap; + int r; + + va_start(ap, format); + r = vasprintf(&v, format, ap); + va_end(ap); + + if (r < 0) + return -ENOMEM; + + return sysctl_write(property, v); +} + +int sysctl_write_ip_property(int af, const char *ifname, const char *property, const char *value) { + const char *p; + + assert(property); + assert(value); + + if (!IN_SET(af, AF_INET, AF_INET6)) + return -EAFNOSUPPORT; + + if (ifname) { + if (!ifname_valid_full(ifname, IFNAME_VALID_SPECIAL)) + return -EINVAL; + + p = strjoina("net/", af_to_ipv4_ipv6(af), "/conf/", ifname, "/", property); + } else + p = strjoina("net/", af_to_ipv4_ipv6(af), "/", property); + + return sysctl_write(p, value); +} + +int sysctl_read(const char *property, char **ret) { + char *p; + int r; + + assert(property); + + p = strjoina("/proc/sys/", property); + + path_simplify(p); + if (!path_is_normalized(p)) /* Filter out attempts to write to /proc/sys/../../…, just in case */ + return -EINVAL; + + r = read_full_virtual_file(p, ret, NULL); + if (r < 0) + return r; + if (ret) + delete_trailing_chars(*ret, NEWLINE); + + return r; +} + +int sysctl_read_ip_property(int af, const char *ifname, const char *property, char **ret) { + const char *p; + + assert(property); + + if (!IN_SET(af, AF_INET, AF_INET6)) + return -EAFNOSUPPORT; + + if (ifname) { + if (!ifname_valid_full(ifname, IFNAME_VALID_SPECIAL)) + return -EINVAL; + + p = strjoina("net/", af_to_ipv4_ipv6(af), "/conf/", ifname, "/", property); + } else + p = strjoina("net/", af_to_ipv4_ipv6(af), "/", property); + + return sysctl_read(p, ret); +} diff --git a/src/basic/sysctl-util.h b/src/basic/sysctl-util.h new file mode 100644 index 0000000..3236419 --- /dev/null +++ b/src/basic/sysctl-util.h @@ -0,0 +1,30 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +#include +#include + +#include "macro.h" +#include "stdio-util.h" +#include "string-util.h" + +char *sysctl_normalize(char *s); +int sysctl_read(const char *property, char **value); +int sysctl_write(const char *property, const char *value); +int sysctl_writef(const char *property, const char *format, ...) _printf_(2, 3); + +int sysctl_read_ip_property(int af, const char *ifname, const char *property, char **ret); +int sysctl_write_ip_property(int af, const char *ifname, const char *property, const char *value); +static inline int sysctl_write_ip_property_boolean(int af, const char *ifname, const char *property, bool value) { + return sysctl_write_ip_property(af, ifname, property, one_zero(value)); +} + +#define DEFINE_SYSCTL_WRITE_IP_PROPERTY(name, type, format) \ + static inline int sysctl_write_ip_property_##name(int af, const char *ifname, const char *property, type value) { \ + char buf[DECIMAL_STR_MAX(type)]; \ + xsprintf(buf, format, value); \ + return sysctl_write_ip_property(af, ifname, property, buf); \ + } + +DEFINE_SYSCTL_WRITE_IP_PROPERTY(int, int, "%i"); +DEFINE_SYSCTL_WRITE_IP_PROPERTY(uint32, uint32_t, "%" PRIu32); diff --git a/src/basic/syslog-util.c b/src/basic/syslog-util.c new file mode 100644 index 0000000..0371922 --- /dev/null +++ b/src/basic/syslog-util.c @@ -0,0 +1,131 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include + +#include "sd-id128.h" + +#include "glob-util.h" +#include "hexdecoct.h" +#include "macro.h" +#include "path-util.h" +#include "string-table.h" +#include "syslog-util.h" +#include "unit-name.h" + +int syslog_parse_priority(const char **p, int *priority, bool with_facility) { + int a = 0, b = 0, c = 0; + const char *end; + size_t k; + + assert(p); + assert(*p); + assert(priority); + + if ((*p)[0] != '<') + return 0; + + end = strchr(*p, '>'); + if (!end) + return 0; + + k = end - *p; + assert(k > 0); + + if (k == 2) + c = undecchar((*p)[1]); + else if (k == 3) { + b = undecchar((*p)[1]); + c = undecchar((*p)[2]); + } else if (k == 4) { + a = undecchar((*p)[1]); + b = undecchar((*p)[2]); + c = undecchar((*p)[3]); + } else + return 0; + + if (a < 0 || b < 0 || c < 0 || + (!with_facility && (a || b || c > 7))) + return 0; + + if (with_facility) + *priority = a*100 + b*10 + c; + else + *priority = (*priority & LOG_FACMASK) | c; + + *p += k + 1; + return 1; +} + +static const char *const log_facility_unshifted_table[LOG_NFACILITIES] = { + [LOG_FAC(LOG_KERN)] = "kern", + [LOG_FAC(LOG_USER)] = "user", + [LOG_FAC(LOG_MAIL)] = "mail", + [LOG_FAC(LOG_DAEMON)] = "daemon", + [LOG_FAC(LOG_AUTH)] = "auth", + [LOG_FAC(LOG_SYSLOG)] = "syslog", + [LOG_FAC(LOG_LPR)] = "lpr", + [LOG_FAC(LOG_NEWS)] = "news", + [LOG_FAC(LOG_UUCP)] = "uucp", + [LOG_FAC(LOG_CRON)] = "cron", + [LOG_FAC(LOG_AUTHPRIV)] = "authpriv", + [LOG_FAC(LOG_FTP)] = "ftp", + [LOG_FAC(LOG_LOCAL0)] = "local0", + [LOG_FAC(LOG_LOCAL1)] = "local1", + [LOG_FAC(LOG_LOCAL2)] = "local2", + [LOG_FAC(LOG_LOCAL3)] = "local3", + [LOG_FAC(LOG_LOCAL4)] = "local4", + [LOG_FAC(LOG_LOCAL5)] = "local5", + [LOG_FAC(LOG_LOCAL6)] = "local6", + [LOG_FAC(LOG_LOCAL7)] = "local7", +}; + +DEFINE_STRING_TABLE_LOOKUP_WITH_FALLBACK(log_facility_unshifted, int, LOG_FAC(~0)); + +bool log_facility_unshifted_is_valid(int facility) { + return facility >= 0 && facility <= LOG_FAC(~0); +} + +static const char *const log_level_table[] = { + [LOG_EMERG] = "emerg", + [LOG_ALERT] = "alert", + [LOG_CRIT] = "crit", + [LOG_ERR] = "err", + [LOG_WARNING] = "warning", + [LOG_NOTICE] = "notice", + [LOG_INFO] = "info", + [LOG_DEBUG] = "debug", +}; + +DEFINE_STRING_TABLE_LOOKUP_WITH_FALLBACK(log_level, int, LOG_DEBUG); + +bool log_level_is_valid(int level) { + return level >= 0 && level <= LOG_DEBUG; +} + +/* The maximum size for a log namespace length. This is the file name size limit 255 minus the size of a + * formatted machine ID minus a separator char */ +#define LOG_NAMESPACE_MAX (NAME_MAX - (SD_ID128_STRING_MAX - 1) - 1) + +bool log_namespace_name_valid(const char *s) { + /* Let's make sure the namespace fits in a filename that is prefixed with the machine ID and a dot + * (so that /var/log/journal/. can be created based on it). Also make sure it + * is suitable as unit instance name, and does not contain fishy characters. */ + + if (!filename_is_valid(s)) + return false; + + if (strlen(s) > LOG_NAMESPACE_MAX) + return false; + + if (!unit_instance_is_valid(s)) + return false; + + if (!string_is_safe(s)) + return false; + + /* Let's avoid globbing for now */ + if (string_is_glob(s)) + return false; + + return true; +} diff --git a/src/basic/syslog-util.h b/src/basic/syslog-util.h new file mode 100644 index 0000000..d7aa97f --- /dev/null +++ b/src/basic/syslog-util.h @@ -0,0 +1,16 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +#include + +int log_facility_unshifted_to_string_alloc(int i, char **s); +int log_facility_unshifted_from_string(const char *s); +bool log_facility_unshifted_is_valid(int faciliy); + +int log_level_to_string_alloc(int i, char **s); +int log_level_from_string(const char *s); +bool log_level_is_valid(int level); + +int syslog_parse_priority(const char **p, int *priority, bool with_facility); + +bool log_namespace_name_valid(const char *s); diff --git a/src/basic/terminal-util.c b/src/basic/terminal-util.c new file mode 100644 index 0000000..b9d1ff8 --- /dev/null +++ b/src/basic/terminal-util.c @@ -0,0 +1,1463 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "alloc-util.h" +#include "def.h" +#include "devnum-util.h" +#include "env-util.h" +#include "fd-util.h" +#include "fileio.h" +#include "fs-util.h" +#include "inotify-util.h" +#include "io-util.h" +#include "log.h" +#include "macro.h" +#include "namespace-util.h" +#include "parse-util.h" +#include "path-util.h" +#include "proc-cmdline.h" +#include "process-util.h" +#include "socket-util.h" +#include "stat-util.h" +#include "stdio-util.h" +#include "string-util.h" +#include "strv.h" +#include "terminal-util.h" +#include "time-util.h" +#include "user-util.h" +#include "util.h" + +static volatile unsigned cached_columns = 0; +static volatile unsigned cached_lines = 0; + +static volatile int cached_on_tty = -1; +static volatile int cached_color_mode = _COLOR_INVALID; +static volatile int cached_underline_enabled = -1; + +int chvt(int vt) { + _cleanup_close_ int fd = -1; + + /* Switch to the specified vt number. If the VT is specified <= 0 switch to the VT the kernel log messages go, + * if that's configured. */ + + fd = open_terminal("/dev/tty0", O_RDWR|O_NOCTTY|O_CLOEXEC|O_NONBLOCK); + if (fd < 0) + return -errno; + + if (vt <= 0) { + int tiocl[2] = { + TIOCL_GETKMSGREDIRECT, + 0 + }; + + if (ioctl(fd, TIOCLINUX, tiocl) < 0) + return -errno; + + vt = tiocl[0] <= 0 ? 1 : tiocl[0]; + } + + return RET_NERRNO(ioctl(fd, VT_ACTIVATE, vt)); +} + +int read_one_char(FILE *f, char *ret, usec_t t, bool *need_nl) { + _cleanup_free_ char *line = NULL; + struct termios old_termios; + int r, fd; + + assert(f); + assert(ret); + + /* If this is a terminal, then switch canonical mode off, so that we can read a single + * character. (Note that fmemopen() streams do not have an fd associated with them, let's handle that + * nicely.) */ + fd = fileno(f); + if (fd >= 0 && tcgetattr(fd, &old_termios) >= 0) { + struct termios new_termios = old_termios; + + new_termios.c_lflag &= ~ICANON; + new_termios.c_cc[VMIN] = 1; + new_termios.c_cc[VTIME] = 0; + + if (tcsetattr(fd, TCSADRAIN, &new_termios) >= 0) { + char c; + + if (t != USEC_INFINITY) { + if (fd_wait_for_event(fd, POLLIN, t) <= 0) { + (void) tcsetattr(fd, TCSADRAIN, &old_termios); + return -ETIMEDOUT; + } + } + + r = safe_fgetc(f, &c); + (void) tcsetattr(fd, TCSADRAIN, &old_termios); + if (r < 0) + return r; + if (r == 0) + return -EIO; + + if (need_nl) + *need_nl = c != '\n'; + + *ret = c; + return 0; + } + } + + if (t != USEC_INFINITY && fd > 0) { + /* Let's wait the specified amount of time for input. When we have no fd we skip this, under + * the assumption that this is an fmemopen() stream or so where waiting doesn't make sense + * anyway, as the data is either already in the stream or cannot possible be placed there + * while we access the stream */ + + if (fd_wait_for_event(fd, POLLIN, t) <= 0) + return -ETIMEDOUT; + } + + /* If this is not a terminal, then read a full line instead */ + + r = read_line(f, 16, &line); /* longer than necessary, to eat up UTF-8 chars/vt100 key sequences */ + if (r < 0) + return r; + if (r == 0) + return -EIO; + + if (strlen(line) != 1) + return -EBADMSG; + + if (need_nl) + *need_nl = false; + + *ret = line[0]; + return 0; +} + +#define DEFAULT_ASK_REFRESH_USEC (2*USEC_PER_SEC) + +int ask_char(char *ret, const char *replies, const char *fmt, ...) { + int r; + + assert(ret); + assert(replies); + assert(fmt); + + for (;;) { + va_list ap; + char c; + bool need_nl = true; + + fputs(ansi_highlight(), stdout); + + putchar('\r'); + + va_start(ap, fmt); + vprintf(fmt, ap); + va_end(ap); + + fputs(ansi_normal(), stdout); + + fflush(stdout); + + r = read_one_char(stdin, &c, DEFAULT_ASK_REFRESH_USEC, &need_nl); + if (r < 0) { + + if (r == -ETIMEDOUT) + continue; + + if (r == -EBADMSG) { + puts("Bad input, please try again."); + continue; + } + + putchar('\n'); + return r; + } + + if (need_nl) + putchar('\n'); + + if (strchr(replies, c)) { + *ret = c; + return 0; + } + + puts("Read unexpected character, please try again."); + } +} + +int ask_string(char **ret, const char *text, ...) { + _cleanup_free_ char *line = NULL; + va_list ap; + int r; + + assert(ret); + assert(text); + + fputs(ansi_highlight(), stdout); + + va_start(ap, text); + vprintf(text, ap); + va_end(ap); + + fputs(ansi_normal(), stdout); + + fflush(stdout); + + r = read_line(stdin, LONG_LINE_MAX, &line); + if (r < 0) + return r; + if (r == 0) + return -EIO; + + *ret = TAKE_PTR(line); + return 0; +} + +int reset_terminal_fd(int fd, bool switch_to_text) { + struct termios termios; + int r = 0; + + /* Set terminal to some sane defaults */ + + assert(fd >= 0); + + if (isatty(fd) < 1) + return log_debug_errno(errno, "Asked to reset a terminal that actually isn't a terminal: %m"); + + /* We leave locked terminal attributes untouched, so that Plymouth may set whatever it wants to set, + * and we don't interfere with that. */ + + /* Disable exclusive mode, just in case */ + if (ioctl(fd, TIOCNXCL) < 0) + log_debug_errno(errno, "TIOCNXCL ioctl failed on TTY, ignoring: %m"); + + /* Switch to text mode */ + if (switch_to_text) + if (ioctl(fd, KDSETMODE, KD_TEXT) < 0) + log_debug_errno(errno, "KDSETMODE ioctl for switching to text mode failed on TTY, ignoring: %m"); + + + /* Set default keyboard mode */ + (void) vt_reset_keyboard(fd); + + if (tcgetattr(fd, &termios) < 0) { + r = log_debug_errno(errno, "Failed to get terminal parameters: %m"); + goto finish; + } + + /* We only reset the stuff that matters to the software. How + * hardware is set up we don't touch assuming that somebody + * else will do that for us */ + + termios.c_iflag &= ~(IGNBRK | BRKINT | ISTRIP | INLCR | IGNCR | IUCLC); + termios.c_iflag |= ICRNL | IMAXBEL | IUTF8; + termios.c_oflag |= ONLCR | OPOST; + termios.c_cflag |= CREAD; + termios.c_lflag = ISIG | ICANON | IEXTEN | ECHO | ECHOE | ECHOK | ECHOCTL | ECHOKE; + + termios.c_cc[VINTR] = 03; /* ^C */ + termios.c_cc[VQUIT] = 034; /* ^\ */ + termios.c_cc[VERASE] = 0177; + termios.c_cc[VKILL] = 025; /* ^X */ + termios.c_cc[VEOF] = 04; /* ^D */ + termios.c_cc[VSTART] = 021; /* ^Q */ + termios.c_cc[VSTOP] = 023; /* ^S */ + termios.c_cc[VSUSP] = 032; /* ^Z */ + termios.c_cc[VLNEXT] = 026; /* ^V */ + termios.c_cc[VWERASE] = 027; /* ^W */ + termios.c_cc[VREPRINT] = 022; /* ^R */ + termios.c_cc[VEOL] = 0; + termios.c_cc[VEOL2] = 0; + + termios.c_cc[VTIME] = 0; + termios.c_cc[VMIN] = 1; + + if (tcsetattr(fd, TCSANOW, &termios) < 0) + r = -errno; + +finish: + /* Just in case, flush all crap out */ + (void) tcflush(fd, TCIOFLUSH); + + return r; +} + +int reset_terminal(const char *name) { + _cleanup_close_ int fd = -1; + + /* We open the terminal with O_NONBLOCK here, to ensure we + * don't block on carrier if this is a terminal with carrier + * configured. */ + + fd = open_terminal(name, O_RDWR|O_NOCTTY|O_CLOEXEC|O_NONBLOCK); + if (fd < 0) + return fd; + + return reset_terminal_fd(fd, true); +} + +int open_terminal(const char *name, int mode) { + _cleanup_close_ int fd = -1; + unsigned c = 0; + + /* + * If a TTY is in the process of being closed opening it might cause EIO. This is horribly awful, but + * unlikely to be changed in the kernel. Hence we work around this problem by retrying a couple of + * times. + * + * https://bugs.launchpad.net/ubuntu/+source/linux/+bug/554172/comments/245 + */ + + if (mode & O_CREAT) + return -EINVAL; + + for (;;) { + fd = open(name, mode, 0); + if (fd >= 0) + break; + + if (errno != EIO) + return -errno; + + /* Max 1s in total */ + if (c >= 20) + return -errno; + + (void) usleep(50 * USEC_PER_MSEC); + c++; + } + + if (isatty(fd) < 1) + return negative_errno(); + + return TAKE_FD(fd); +} + +int acquire_terminal( + const char *name, + AcquireTerminalFlags flags, + usec_t timeout) { + + _cleanup_close_ int notify = -1, fd = -1; + usec_t ts = USEC_INFINITY; + int r, wd = -1; + + assert(name); + assert(IN_SET(flags & ~ACQUIRE_TERMINAL_PERMISSIVE, ACQUIRE_TERMINAL_TRY, ACQUIRE_TERMINAL_FORCE, ACQUIRE_TERMINAL_WAIT)); + + /* We use inotify to be notified when the tty is closed. We create the watch before checking if we can actually + * acquire it, so that we don't lose any event. + * + * Note: strictly speaking this actually watches for the device being closed, it does *not* really watch + * whether a tty loses its controlling process. However, unless some rogue process uses TIOCNOTTY on /dev/tty + * *after* closing its tty otherwise this will not become a problem. As long as the administrator makes sure to + * not configure any service on the same tty as an untrusted user this should not be a problem. (Which they + * probably should not do anyway.) */ + + if ((flags & ~ACQUIRE_TERMINAL_PERMISSIVE) == ACQUIRE_TERMINAL_WAIT) { + notify = inotify_init1(IN_CLOEXEC | (timeout != USEC_INFINITY ? IN_NONBLOCK : 0)); + if (notify < 0) + return -errno; + + wd = inotify_add_watch(notify, name, IN_CLOSE); + if (wd < 0) + return -errno; + + if (timeout != USEC_INFINITY) + ts = now(CLOCK_MONOTONIC); + } + + for (;;) { + struct sigaction sa_old, sa_new = { + .sa_handler = SIG_IGN, + .sa_flags = SA_RESTART, + }; + + if (notify >= 0) { + r = flush_fd(notify); + if (r < 0) + return r; + } + + /* We pass here O_NOCTTY only so that we can check the return value TIOCSCTTY and have a reliable way + * to figure out if we successfully became the controlling process of the tty */ + fd = open_terminal(name, O_RDWR|O_NOCTTY|O_CLOEXEC); + if (fd < 0) + return fd; + + /* Temporarily ignore SIGHUP, so that we don't get SIGHUP'ed if we already own the tty. */ + assert_se(sigaction(SIGHUP, &sa_new, &sa_old) == 0); + + /* First, try to get the tty */ + r = RET_NERRNO(ioctl(fd, TIOCSCTTY, (flags & ~ACQUIRE_TERMINAL_PERMISSIVE) == ACQUIRE_TERMINAL_FORCE)); + + /* Reset signal handler to old value */ + assert_se(sigaction(SIGHUP, &sa_old, NULL) == 0); + + /* Success? Exit the loop now! */ + if (r >= 0) + break; + + /* Any failure besides -EPERM? Fail, regardless of the mode. */ + if (r != -EPERM) + return r; + + if (flags & ACQUIRE_TERMINAL_PERMISSIVE) /* If we are in permissive mode, then EPERM is fine, turn this + * into a success. Note that EPERM is also returned if we + * already are the owner of the TTY. */ + break; + + if (flags != ACQUIRE_TERMINAL_WAIT) /* If we are in TRY or FORCE mode, then propagate EPERM as EPERM */ + return r; + + assert(notify >= 0); + assert(wd >= 0); + + for (;;) { + union inotify_event_buffer buffer; + ssize_t l; + + if (timeout != USEC_INFINITY) { + usec_t n; + + assert(ts != USEC_INFINITY); + + n = usec_sub_unsigned(now(CLOCK_MONOTONIC), ts); + if (n >= timeout) + return -ETIMEDOUT; + + r = fd_wait_for_event(notify, POLLIN, usec_sub_unsigned(timeout, n)); + if (r < 0) + return r; + if (r == 0) + return -ETIMEDOUT; + } + + l = read(notify, &buffer, sizeof(buffer)); + if (l < 0) { + if (ERRNO_IS_TRANSIENT(errno)) + continue; + + return -errno; + } + + FOREACH_INOTIFY_EVENT(e, buffer, l) { + if (e->mask & IN_Q_OVERFLOW) /* If we hit an inotify queue overflow, simply check if the terminal is up for grabs now. */ + break; + + if (e->wd != wd || !(e->mask & IN_CLOSE)) /* Safety checks */ + return -EIO; + } + + break; + } + + /* We close the tty fd here since if the old session ended our handle will be dead. It's important that + * we do this after sleeping, so that we don't enter an endless loop. */ + fd = safe_close(fd); + } + + return TAKE_FD(fd); +} + +int release_terminal(void) { + static const struct sigaction sa_new = { + .sa_handler = SIG_IGN, + .sa_flags = SA_RESTART, + }; + + _cleanup_close_ int fd = -1; + struct sigaction sa_old; + int r; + + fd = open("/dev/tty", O_RDWR|O_NOCTTY|O_CLOEXEC|O_NONBLOCK); + if (fd < 0) + return -errno; + + /* Temporarily ignore SIGHUP, so that we don't get SIGHUP'ed + * by our own TIOCNOTTY */ + assert_se(sigaction(SIGHUP, &sa_new, &sa_old) == 0); + + r = RET_NERRNO(ioctl(fd, TIOCNOTTY)); + + assert_se(sigaction(SIGHUP, &sa_old, NULL) == 0); + + return r; +} + +int terminal_vhangup_fd(int fd) { + assert(fd >= 0); + return RET_NERRNO(ioctl(fd, TIOCVHANGUP)); +} + +int terminal_vhangup(const char *name) { + _cleanup_close_ int fd = -1; + + fd = open_terminal(name, O_RDWR|O_NOCTTY|O_CLOEXEC|O_NONBLOCK); + if (fd < 0) + return fd; + + return terminal_vhangup_fd(fd); +} + +int vt_disallocate(const char *name) { + const char *e; + int r; + + /* Deallocate the VT if possible. If not possible + * (i.e. because it is the active one), at least clear it + * entirely (including the scrollback buffer). */ + + e = path_startswith(name, "/dev/"); + if (!e) + return -EINVAL; + + if (tty_is_vc(name)) { + _cleanup_close_ int fd = -1; + unsigned u; + const char *n; + + n = startswith(e, "tty"); + if (!n) + return -EINVAL; + + r = safe_atou(n, &u); + if (r < 0) + return r; + + if (u <= 0) + return -EINVAL; + + /* Try to deallocate */ + fd = open_terminal("/dev/tty0", O_RDWR|O_NOCTTY|O_CLOEXEC|O_NONBLOCK); + if (fd < 0) + return fd; + + r = ioctl(fd, VT_DISALLOCATE, u); + if (r >= 0) + return 0; + if (errno != EBUSY) + return -errno; + } + + /* So this is not a VT (in which case we cannot deallocate it), + * or we failed to deallocate. Let's at least clear the screen. */ + + _cleanup_close_ int fd2 = open_terminal(name, O_RDWR|O_NOCTTY|O_CLOEXEC); + if (fd2 < 0) + return fd2; + + (void) loop_write(fd2, + "\033[r" /* clear scrolling region */ + "\033[H" /* move home */ + "\033[3J", /* clear screen including scrollback, requires Linux 2.6.40 */ + 10, false); + return 0; +} + +int make_console_stdio(void) { + int fd, r; + + /* Make /dev/console the controlling terminal and stdin/stdout/stderr, if we can. If we can't use + * /dev/null instead. This is particularly useful if /dev/console is turned off, e.g. if console=null + * is specified on the kernel command line. */ + + fd = acquire_terminal("/dev/console", ACQUIRE_TERMINAL_FORCE|ACQUIRE_TERMINAL_PERMISSIVE, USEC_INFINITY); + if (fd < 0) { + log_warning_errno(fd, "Failed to acquire terminal, using /dev/null stdin/stdout/stderr instead: %m"); + + r = make_null_stdio(); + if (r < 0) + return log_error_errno(r, "Failed to make /dev/null stdin/stdout/stderr: %m"); + + } else { + r = reset_terminal_fd(fd, true); + if (r < 0) + log_warning_errno(r, "Failed to reset terminal, ignoring: %m"); + + r = rearrange_stdio(fd, fd, fd); /* This invalidates 'fd' both on success and on failure. */ + if (r < 0) + return log_error_errno(r, "Failed to make terminal stdin/stdout/stderr: %m"); + } + + reset_terminal_feature_caches(); + return 0; +} + +bool tty_is_vc(const char *tty) { + assert(tty); + + return vtnr_from_tty(tty) >= 0; +} + +bool tty_is_console(const char *tty) { + assert(tty); + + return streq(skip_dev_prefix(tty), "console"); +} + +int vtnr_from_tty(const char *tty) { + int i, r; + + assert(tty); + + tty = skip_dev_prefix(tty); + + if (!startswith(tty, "tty") ) + return -EINVAL; + + if (!ascii_isdigit(tty[3])) + return -EINVAL; + + r = safe_atoi(tty+3, &i); + if (r < 0) + return r; + + if (i < 0 || i > 63) + return -EINVAL; + + return i; +} + + int resolve_dev_console(char **ret) { + _cleanup_free_ char *active = NULL; + char *tty; + int r; + + assert(ret); + + /* Resolve where /dev/console is pointing to, if /sys is actually ours (i.e. not read-only-mounted which is a + * sign for container setups) */ + + if (path_is_read_only_fs("/sys") > 0) + return -ENOMEDIUM; + + r = read_one_line_file("/sys/class/tty/console/active", &active); + if (r < 0) + return r; + + /* If multiple log outputs are configured the last one is what /dev/console points to */ + tty = strrchr(active, ' '); + if (tty) + tty++; + else + tty = active; + + if (streq(tty, "tty0")) { + active = mfree(active); + + /* Get the active VC (e.g. tty1) */ + r = read_one_line_file("/sys/class/tty/tty0/active", &active); + if (r < 0) + return r; + + tty = active; + } + + if (tty == active) + *ret = TAKE_PTR(active); + else { + char *tmp; + + tmp = strdup(tty); + if (!tmp) + return -ENOMEM; + + *ret = tmp; + } + + return 0; +} + +int get_kernel_consoles(char ***ret) { + _cleanup_strv_free_ char **l = NULL; + _cleanup_free_ char *line = NULL; + const char *p; + int r; + + assert(ret); + + /* If /sys is mounted read-only this means we are running in some kind of container environment. In that + * case /sys would reflect the host system, not us, hence ignore the data we can read from it. */ + if (path_is_read_only_fs("/sys") > 0) + goto fallback; + + r = read_one_line_file("/sys/class/tty/console/active", &line); + if (r < 0) + return r; + + p = line; + for (;;) { + _cleanup_free_ char *tty = NULL, *path = NULL; + + r = extract_first_word(&p, &tty, NULL, 0); + if (r < 0) + return r; + if (r == 0) + break; + + if (streq(tty, "tty0")) { + tty = mfree(tty); + r = read_one_line_file("/sys/class/tty/tty0/active", &tty); + if (r < 0) + return r; + } + + path = path_join("/dev", tty); + if (!path) + return -ENOMEM; + + if (access(path, F_OK) < 0) { + log_debug_errno(errno, "Console device %s is not accessible, skipping: %m", path); + continue; + } + + r = strv_consume(&l, TAKE_PTR(path)); + if (r < 0) + return r; + } + + if (strv_isempty(l)) { + log_debug("No devices found for system console"); + goto fallback; + } + + *ret = TAKE_PTR(l); + + return 0; + +fallback: + r = strv_extend(&l, "/dev/console"); + if (r < 0) + return r; + + *ret = TAKE_PTR(l); + + return 0; +} + +bool tty_is_vc_resolve(const char *tty) { + _cleanup_free_ char *resolved = NULL; + + assert(tty); + + tty = skip_dev_prefix(tty); + + if (streq(tty, "console")) { + if (resolve_dev_console(&resolved) < 0) + return false; + + tty = resolved; + } + + return tty_is_vc(tty); +} + +const char *default_term_for_tty(const char *tty) { + return tty && tty_is_vc_resolve(tty) ? "linux" : "vt220"; +} + +int fd_columns(int fd) { + struct winsize ws = {}; + + if (fd < 0) + return -EBADF; + + if (ioctl(fd, TIOCGWINSZ, &ws) < 0) + return -errno; + + if (ws.ws_col <= 0) + return -EIO; + + return ws.ws_col; +} + +unsigned columns(void) { + const char *e; + int c; + + if (cached_columns > 0) + return cached_columns; + + c = 0; + e = getenv("COLUMNS"); + if (e) + (void) safe_atoi(e, &c); + + if (c <= 0 || c > USHRT_MAX) { + c = fd_columns(STDOUT_FILENO); + if (c <= 0) + c = 80; + } + + cached_columns = c; + return cached_columns; +} + +int fd_lines(int fd) { + struct winsize ws = {}; + + if (fd < 0) + return -EBADF; + + if (ioctl(fd, TIOCGWINSZ, &ws) < 0) + return -errno; + + if (ws.ws_row <= 0) + return -EIO; + + return ws.ws_row; +} + +unsigned lines(void) { + const char *e; + int l; + + if (cached_lines > 0) + return cached_lines; + + l = 0; + e = getenv("LINES"); + if (e) + (void) safe_atoi(e, &l); + + if (l <= 0 || l > USHRT_MAX) { + l = fd_lines(STDOUT_FILENO); + if (l <= 0) + l = 24; + } + + cached_lines = l; + return cached_lines; +} + +int terminal_set_size_fd(int fd, const char *ident, unsigned rows, unsigned cols) { + struct winsize ws; + + if (rows == UINT_MAX && cols == UINT_MAX) + return 0; + + if (ioctl(fd, TIOCGWINSZ, &ws) < 0) + return log_debug_errno(errno, + "TIOCGWINSZ ioctl for getting %s size failed, not setting terminal size: %m", + ident ?: "TTY"); + + if (rows == UINT_MAX) + rows = ws.ws_row; + else if (rows > USHRT_MAX) + rows = USHRT_MAX; + + if (cols == UINT_MAX) + cols = ws.ws_col; + else if (cols > USHRT_MAX) + cols = USHRT_MAX; + + if (rows == ws.ws_row && cols == ws.ws_col) + return 0; + + ws.ws_row = rows; + ws.ws_col = cols; + + if (ioctl(fd, TIOCSWINSZ, &ws) < 0) + return log_debug_errno(errno, "TIOCSWINSZ ioctl for setting %s size failed: %m", ident ?: "TTY"); + + return 0; +} + +/* intended to be used as a SIGWINCH sighandler */ +void columns_lines_cache_reset(int signum) { + cached_columns = 0; + cached_lines = 0; +} + +void reset_terminal_feature_caches(void) { + cached_columns = 0; + cached_lines = 0; + + cached_color_mode = _COLOR_INVALID; + cached_underline_enabled = -1; + cached_on_tty = -1; +} + +bool on_tty(void) { + + /* We check both stdout and stderr, so that situations where pipes on the shell are used are reliably + * recognized, regardless if only the output or the errors are piped to some place. Since on_tty() is generally + * used to default to a safer, non-interactive, non-color mode of operation it's probably good to be defensive + * here, and check for both. Note that we don't check for STDIN_FILENO, because it should fine to use fancy + * terminal functionality when outputting stuff, even if the input is piped to us. */ + + if (cached_on_tty < 0) + cached_on_tty = + isatty(STDOUT_FILENO) > 0 && + isatty(STDERR_FILENO) > 0; + + return cached_on_tty; +} + +int getttyname_malloc(int fd, char **ret) { + char path[PATH_MAX], *c; /* PATH_MAX is counted *with* the trailing NUL byte */ + int r; + + assert(fd >= 0); + assert(ret); + + r = ttyname_r(fd, path, sizeof path); /* positive error */ + assert(r >= 0); + if (r == ERANGE) + return -ENAMETOOLONG; + if (r > 0) + return -r; + + c = strdup(skip_dev_prefix(path)); + if (!c) + return -ENOMEM; + + *ret = c; + return 0; +} + +int getttyname_harder(int fd, char **ret) { + _cleanup_free_ char *s = NULL; + int r; + + r = getttyname_malloc(fd, &s); + if (r < 0) + return r; + + if (streq(s, "tty")) + return get_ctty(0, NULL, ret); + + *ret = TAKE_PTR(s); + return 0; +} + +int get_ctty_devnr(pid_t pid, dev_t *d) { + int r; + _cleanup_free_ char *line = NULL; + const char *p; + unsigned long ttynr; + + assert(pid >= 0); + + p = procfs_file_alloca(pid, "stat"); + r = read_one_line_file(p, &line); + if (r < 0) + return r; + + p = strrchr(line, ')'); + if (!p) + return -EIO; + + p++; + + if (sscanf(p, " " + "%*c " /* state */ + "%*d " /* ppid */ + "%*d " /* pgrp */ + "%*d " /* session */ + "%lu ", /* ttynr */ + &ttynr) != 1) + return -EIO; + + if (major(ttynr) == 0 && minor(ttynr) == 0) + return -ENXIO; + + if (d) + *d = (dev_t) ttynr; + + return 0; +} + +int get_ctty(pid_t pid, dev_t *ret_devnr, char **ret) { + char pty[STRLEN("/dev/pts/") + DECIMAL_STR_MAX(dev_t) + 1]; + _cleanup_free_ char *buf = NULL; + const char *fn = NULL, *w; + dev_t devnr; + int r; + + r = get_ctty_devnr(pid, &devnr); + if (r < 0) + return r; + + r = device_path_make_canonical(S_IFCHR, devnr, &buf); + if (r < 0) { + struct stat st; + + if (r != -ENOENT) /* No symlink for this in /dev/char/? */ + return r; + + /* Maybe this is PTY? PTY devices are not listed in /dev/char/, as they don't follow the + * Linux device model and hence device_path_make_canonical() doesn't work for them. Let's + * assume this is a PTY for a moment, and check if the device node this would then map to in + * /dev/pts/ matches the one we are looking for. This way we don't have to hardcode the major + * number (which is 136 btw), but we still rely on the fact that PTY numbers map directly to + * the minor number of the pty. */ + xsprintf(pty, "/dev/pts/%u", minor(devnr)); + + if (stat(pty, &st) < 0) { + if (errno != ENOENT) + return -errno; + + } else if (S_ISCHR(st.st_mode) && devnr == st.st_rdev) /* Bingo! */ + fn = pty; + + if (!fn) { + /* Doesn't exist, or not a PTY? Probably something similar to the PTYs which have no + * symlink in /dev/char/. Let's return something vaguely useful. */ + r = device_path_make_major_minor(S_IFCHR, devnr, &buf); + if (r < 0) + return r; + + fn = buf; + } + } else + fn = buf; + + w = path_startswith(fn, "/dev/"); + if (!w) + return -EINVAL; + + if (ret) { + _cleanup_free_ char *b = NULL; + + b = strdup(w); + if (!b) + return -ENOMEM; + + *ret = TAKE_PTR(b); + } + + if (ret_devnr) + *ret_devnr = devnr; + + return 0; +} + +int ptsname_malloc(int fd, char **ret) { + size_t l = 100; + + assert(fd >= 0); + assert(ret); + + for (;;) { + char *c; + + c = new(char, l); + if (!c) + return -ENOMEM; + + if (ptsname_r(fd, c, l) == 0) { + *ret = c; + return 0; + } + if (errno != ERANGE) { + free(c); + return -errno; + } + + free(c); + + if (l > SIZE_MAX / 2) + return -ENOMEM; + + l *= 2; + } +} + +int openpt_allocate(int flags, char **ret_slave) { + _cleanup_close_ int fd = -1; + _cleanup_free_ char *p = NULL; + int r; + + fd = posix_openpt(flags|O_NOCTTY|O_CLOEXEC); + if (fd < 0) + return -errno; + + if (ret_slave) { + r = ptsname_malloc(fd, &p); + if (r < 0) + return r; + + if (!path_startswith(p, "/dev/pts/")) + return -EINVAL; + } + + if (unlockpt(fd) < 0) + return -errno; + + if (ret_slave) + *ret_slave = TAKE_PTR(p); + + return TAKE_FD(fd); +} + +static int ptsname_namespace(int pty, char **ret) { + int no = -1, r; + + /* Like ptsname(), but doesn't assume that the path is + * accessible in the local namespace. */ + + r = ioctl(pty, TIOCGPTN, &no); + if (r < 0) + return -errno; + + if (no < 0) + return -EIO; + + if (asprintf(ret, "/dev/pts/%i", no) < 0) + return -ENOMEM; + + return 0; +} + +int openpt_allocate_in_namespace(pid_t pid, int flags, char **ret_slave) { + _cleanup_close_ int pidnsfd = -1, mntnsfd = -1, usernsfd = -1, rootfd = -1, fd = -1; + _cleanup_close_pair_ int pair[2] = { -1, -1 }; + pid_t child; + int r; + + assert(pid > 0); + + r = namespace_open(pid, &pidnsfd, &mntnsfd, NULL, &usernsfd, &rootfd); + if (r < 0) + return r; + + if (socketpair(AF_UNIX, SOCK_DGRAM, 0, pair) < 0) + return -errno; + + r = namespace_fork("(sd-openptns)", "(sd-openpt)", NULL, 0, FORK_RESET_SIGNALS|FORK_DEATHSIG, + pidnsfd, mntnsfd, -1, usernsfd, rootfd, &child); + if (r < 0) + return r; + if (r == 0) { + pair[0] = safe_close(pair[0]); + + fd = openpt_allocate(flags, NULL); + if (fd < 0) + _exit(EXIT_FAILURE); + + if (send_one_fd(pair[1], fd, 0) < 0) + _exit(EXIT_FAILURE); + + _exit(EXIT_SUCCESS); + } + + pair[1] = safe_close(pair[1]); + + r = wait_for_terminate_and_check("(sd-openptns)", child, 0); + if (r < 0) + return r; + if (r != EXIT_SUCCESS) + return -EIO; + + fd = receive_one_fd(pair[0], 0); + if (fd < 0) + return fd; + + if (ret_slave) { + r = ptsname_namespace(fd, ret_slave); + if (r < 0) + return r; + } + + return TAKE_FD(fd); +} + +int open_terminal_in_namespace(pid_t pid, const char *name, int mode) { + _cleanup_close_ int pidnsfd = -1, mntnsfd = -1, usernsfd = -1, rootfd = -1; + _cleanup_close_pair_ int pair[2] = { -1, -1 }; + pid_t child; + int r; + + r = namespace_open(pid, &pidnsfd, &mntnsfd, NULL, &usernsfd, &rootfd); + if (r < 0) + return r; + + if (socketpair(AF_UNIX, SOCK_DGRAM, 0, pair) < 0) + return -errno; + + r = namespace_fork("(sd-terminalns)", "(sd-terminal)", NULL, 0, FORK_RESET_SIGNALS|FORK_DEATHSIG, + pidnsfd, mntnsfd, -1, usernsfd, rootfd, &child); + if (r < 0) + return r; + if (r == 0) { + int master; + + pair[0] = safe_close(pair[0]); + + master = open_terminal(name, mode|O_NOCTTY|O_CLOEXEC); + if (master < 0) + _exit(EXIT_FAILURE); + + if (send_one_fd(pair[1], master, 0) < 0) + _exit(EXIT_FAILURE); + + _exit(EXIT_SUCCESS); + } + + pair[1] = safe_close(pair[1]); + + r = wait_for_terminate_and_check("(sd-terminalns)", child, 0); + if (r < 0) + return r; + if (r != EXIT_SUCCESS) + return -EIO; + + return receive_one_fd(pair[0], 0); +} + +static bool getenv_terminal_is_dumb(void) { + const char *e; + + e = getenv("TERM"); + if (!e) + return true; + + return streq(e, "dumb"); +} + +bool terminal_is_dumb(void) { + if (!on_tty()) + return true; + + return getenv_terminal_is_dumb(); +} + +static ColorMode parse_systemd_colors(void) { + const char *e; + int r; + + e = getenv("SYSTEMD_COLORS"); + if (!e) + return _COLOR_INVALID; + if (streq(e, "16")) + return COLOR_16; + if (streq(e, "256")) + return COLOR_256; + r = parse_boolean(e); + if (r >= 0) + return r > 0 ? COLOR_ON : COLOR_OFF; + return _COLOR_INVALID; +} + +ColorMode get_color_mode(void) { + + /* Returns the mode used to choose output colors. The possible modes are COLOR_OFF for no colors, + * COLOR_16 for only the base 16 ANSI colors, COLOR_256 for more colors and COLOR_ON for unrestricted + * color output. For that we check $SYSTEMD_COLORS first (which is the explicit way to + * change the mode). If that didn't work we turn colors off unless we are on a TTY. And if we are on a TTY + * we turn it off if $TERM is set to "dumb". There's one special tweak though: if we are PID 1 then we do not + * check whether we are connected to a TTY, because we don't keep /dev/console open continuously due to fear + * of SAK, and hence things are a bit weird. */ + ColorMode m; + + if (cached_color_mode < 0) { + m = parse_systemd_colors(); + if (m >= 0) + cached_color_mode = m; + else if (getenv("NO_COLOR")) + /* We only check for the presence of the variable; value is ignored. */ + cached_color_mode = COLOR_OFF; + + else if (getpid_cached() == 1) { + /* PID1 outputs to the console without holding it open all the time. + * + * Note that the Linux console can only display 16 colors. We still enable 256 color + * mode even for PID1 output though (which typically goes to the Linux console), + * since the Linux console is able to parse the 256 color sequences and automatically + * map them to the closest color in the 16 color palette (since kernel 3.16). Doing + * 256 colors is nice for people who invoke systemd in a container or via a serial + * link or such, and use a true 256 color terminal to do so. */ + if (getenv_terminal_is_dumb()) + cached_color_mode = COLOR_OFF; + } else { + if (terminal_is_dumb()) + cached_color_mode = COLOR_OFF; + } + + if (cached_color_mode < 0) { + /* We failed to figure out any reason to *disable* colors. + * Let's see how many colors we shall use. */ + if (STRPTR_IN_SET(getenv("COLORTERM"), + "truecolor", + "24bit")) + cached_color_mode = COLOR_24BIT; + else + cached_color_mode = COLOR_256; + } + } + + return cached_color_mode; +} + +bool dev_console_colors_enabled(void) { + _cleanup_free_ char *s = NULL; + ColorMode m; + + /* Returns true if we assume that color is supported on /dev/console. + * + * For that we first check if we explicitly got told to use colors or not, by checking $SYSTEMD_COLORS. If that + * isn't set we check whether PID 1 has $TERM set, and if not, whether TERM is set on the kernel command + * line. If we find $TERM set we assume color if it's not set to "dumb", similarly to how regular + * colors_enabled() operates. */ + + m = parse_systemd_colors(); + if (m >= 0) + return m; + + if (getenv("NO_COLOR")) + return false; + + if (getenv_for_pid(1, "TERM", &s) <= 0) + (void) proc_cmdline_get_key("TERM", 0, &s); + + return !streq_ptr(s, "dumb"); +} + +bool underline_enabled(void) { + + if (cached_underline_enabled < 0) { + + /* The Linux console doesn't support underlining, turn it off, but only there. */ + + if (colors_enabled()) + cached_underline_enabled = !streq_ptr(getenv("TERM"), "linux"); + else + cached_underline_enabled = false; + } + + return cached_underline_enabled; +} + +int vt_default_utf8(void) { + _cleanup_free_ char *b = NULL; + int r; + + /* Read the default VT UTF8 setting from the kernel */ + + r = read_one_line_file("/sys/module/vt/parameters/default_utf8", &b); + if (r < 0) + return r; + + return parse_boolean(b); +} + +int vt_reset_keyboard(int fd) { + int kb; + + /* If we can't read the default, then default to unicode. It's 2017 after all. */ + kb = vt_default_utf8() != 0 ? K_UNICODE : K_XLATE; + + return RET_NERRNO(ioctl(fd, KDSKBMODE, kb)); +} + +int vt_restore(int fd) { + static const struct vt_mode mode = { + .mode = VT_AUTO, + }; + int r, q = 0; + + if (isatty(fd) < 1) + return log_debug_errno(errno, "Asked to restore the VT for an fd that does not refer to a terminal: %m"); + + if (ioctl(fd, KDSETMODE, KD_TEXT) < 0) + q = log_debug_errno(errno, "Failed to set VT in text mode, ignoring: %m"); + + r = vt_reset_keyboard(fd); + if (r < 0) { + log_debug_errno(r, "Failed to reset keyboard mode, ignoring: %m"); + if (q >= 0) + q = r; + } + + if (ioctl(fd, VT_SETMODE, &mode) < 0) { + log_debug_errno(errno, "Failed to set VT_AUTO mode, ignoring: %m"); + if (q >= 0) + q = -errno; + } + + r = fchmod_and_chown(fd, TTY_MODE, 0, GID_INVALID); + if (r < 0) { + log_debug_errno(r, "Failed to chmod()/chown() VT, ignoring: %m"); + if (q >= 0) + q = r; + } + + return q; +} + +int vt_release(int fd, bool restore) { + assert(fd >= 0); + + /* This function releases the VT by acknowledging the VT-switch signal + * sent by the kernel and optionally reset the VT in text and auto + * VT-switching modes. */ + + if (isatty(fd) < 1) + return log_debug_errno(errno, "Asked to release the VT for an fd that does not refer to a terminal: %m"); + + if (ioctl(fd, VT_RELDISP, 1) < 0) + return -errno; + + if (restore) + return vt_restore(fd); + + return 0; +} + +void get_log_colors(int priority, const char **on, const char **off, const char **highlight) { + /* Note that this will initialize output variables only when there's something to output. + * The caller must pre-initialize to "" or NULL as appropriate. */ + + if (priority <= LOG_ERR) { + if (on) + *on = ansi_highlight_red(); + if (off) + *off = ansi_normal(); + if (highlight) + *highlight = ansi_highlight(); + + } else if (priority <= LOG_WARNING) { + if (on) + *on = ansi_highlight_yellow(); + if (off) + *off = ansi_normal(); + if (highlight) + *highlight = ansi_highlight(); + + } else if (priority <= LOG_NOTICE) { + if (on) + *on = ansi_highlight(); + if (off) + *off = ansi_normal(); + if (highlight) + *highlight = ansi_highlight_red(); + + } else if (priority >= LOG_DEBUG) { + if (on) + *on = ansi_grey(); + if (off) + *off = ansi_normal(); + if (highlight) + *highlight = ansi_highlight_red(); + } +} diff --git a/src/basic/terminal-util.h b/src/basic/terminal-util.h new file mode 100644 index 0000000..59c868a --- /dev/null +++ b/src/basic/terminal-util.h @@ -0,0 +1,266 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +#include +#include +#include +#include +#include + +#include "macro.h" +#include "time-util.h" + +/* Regular colors */ +#define ANSI_BLACK "\x1B[0;30m" /* Some type of grey usually. */ +#define ANSI_RED "\x1B[0;31m" +#define ANSI_GREEN "\x1B[0;32m" +#define ANSI_YELLOW "\x1B[0;33m" +#define ANSI_BLUE "\x1B[0;34m" +#define ANSI_MAGENTA "\x1B[0;35m" +#define ANSI_CYAN "\x1B[0;36m" +#define ANSI_WHITE "\x1B[0;37m" /* This is actually rendered as light grey, legible even on a white + * background. See ANSI_HIGHLIGHT_WHITE for real white. */ + +#define ANSI_BRIGHT_BLACK "\x1B[0;90m" +#define ANSI_BRIGHT_RED "\x1B[0;91m" +#define ANSI_BRIGHT_GREEN "\x1B[0;92m" +#define ANSI_BRIGHT_YELLOW "\x1B[0;93m" +#define ANSI_BRIGHT_BLUE "\x1B[0;94m" +#define ANSI_BRIGHT_MAGENTA "\x1B[0;95m" +#define ANSI_BRIGHT_CYAN "\x1B[0;96m" +#define ANSI_BRIGHT_WHITE "\x1B[0;97m" + +#define ANSI_GREY "\x1B[0;38;5;245m" + +/* Bold/highlighted */ +#define ANSI_HIGHLIGHT_BLACK "\x1B[0;1;30m" +#define ANSI_HIGHLIGHT_RED "\x1B[0;1;31m" +#define ANSI_HIGHLIGHT_GREEN "\x1B[0;1;32m" +#define _ANSI_HIGHLIGHT_YELLOW "\x1B[0;1;33m" /* This yellow is currently not displayed well by some terminals */ +#define ANSI_HIGHLIGHT_BLUE "\x1B[0;1;34m" +#define ANSI_HIGHLIGHT_MAGENTA "\x1B[0;1;35m" +#define ANSI_HIGHLIGHT_CYAN "\x1B[0;1;36m" +#define ANSI_HIGHLIGHT_WHITE "\x1B[0;1;37m" +#define ANSI_HIGHLIGHT_YELLOW4 "\x1B[0;1;38;5;100m" +#define ANSI_HIGHLIGHT_KHAKI3 "\x1B[0;1;38;5;185m" +#define ANSI_HIGHLIGHT_GREY "\x1B[0;1;38;5;245m" + +#define ANSI_HIGHLIGHT_YELLOW ANSI_HIGHLIGHT_KHAKI3 /* Replacement yellow that is more legible */ + +/* Underlined */ +#define ANSI_GREY_UNDERLINE "\x1B[0;4;38;5;245m" +#define ANSI_HIGHLIGHT_RED_UNDERLINE "\x1B[0;1;4;31m" +#define ANSI_HIGHLIGHT_GREEN_UNDERLINE "\x1B[0;1;4;32m" +#define ANSI_HIGHLIGHT_YELLOW_UNDERLINE "\x1B[0;1;4;38;5;185m" +#define ANSI_HIGHLIGHT_BLUE_UNDERLINE "\x1B[0;1;4;34m" +#define ANSI_HIGHLIGHT_MAGENTA_UNDERLINE "\x1B[0;1;4;35m" +#define ANSI_HIGHLIGHT_GREY_UNDERLINE "\x1B[0;1;4;38;5;245m" + +/* Other ANSI codes */ +#define ANSI_UNDERLINE "\x1B[0;4m" +#define ANSI_HIGHLIGHT "\x1B[0;1;39m" +#define ANSI_HIGHLIGHT_UNDERLINE "\x1B[0;1;4m" + +/* Fallback colors: 256 -> 16 */ +#define ANSI_HIGHLIGHT_GREY_FALLBACK "\x1B[0;1;90m" +#define ANSI_HIGHLIGHT_YELLOW_FALLBACK "\x1B[0;1;33m" + +/* Reset/clear ANSI styles */ +#define ANSI_NORMAL "\x1B[0m" + +/* Erase characters until the end of the line */ +#define ANSI_ERASE_TO_END_OF_LINE "\x1B[K" + +/* Move cursor up one line */ +#define ANSI_REVERSE_LINEFEED "\x1BM" + +/* Set cursor to top left corner and clear screen */ +#define ANSI_HOME_CLEAR "\x1B[H\x1B[2J" + +int reset_terminal_fd(int fd, bool switch_to_text); +int reset_terminal(const char *name); + +int open_terminal(const char *name, int mode); + +/* Flags for tweaking the way we become the controlling process of a terminal. */ +typedef enum AcquireTerminalFlags { + /* Try to become the controlling process of the TTY. If we can't return -EPERM. */ + ACQUIRE_TERMINAL_TRY = 0, + + /* Tell the kernel to forcibly make us the controlling process of the TTY. Returns -EPERM if the kernel doesn't allow that. */ + ACQUIRE_TERMINAL_FORCE = 1, + + /* If we can't become the controlling process of the TTY right-away, then wait until we can. */ + ACQUIRE_TERMINAL_WAIT = 2, + + /* Pick one of the above, and then OR this flag in, in order to request permissive behaviour, if we can't become controlling process then don't mind */ + ACQUIRE_TERMINAL_PERMISSIVE = 1 << 2, +} AcquireTerminalFlags; + +/* Limits the use of ANSI colors to a subset. */ +typedef enum ColorMode { + /* No colors, monochrome output. */ + COLOR_OFF, + + /* All colors, no restrictions. */ + COLOR_ON, + + /* Only the base 16 colors. */ + COLOR_16, + + /* Only 256 colors. */ + COLOR_256, + + /* For truecolor or 24bit color support.*/ + COLOR_24BIT, + + _COLOR_INVALID = -EINVAL, +} ColorMode; + +int acquire_terminal(const char *name, AcquireTerminalFlags flags, usec_t timeout); +int release_terminal(void); + +int terminal_vhangup_fd(int fd); +int terminal_vhangup(const char *name); + +int terminal_set_size_fd(int fd, const char *ident, unsigned rows, unsigned cols); + +int chvt(int vt); + +int read_one_char(FILE *f, char *ret, usec_t timeout, bool *need_nl); +int ask_char(char *ret, const char *replies, const char *text, ...) _printf_(3, 4); +int ask_string(char **ret, const char *text, ...) _printf_(2, 3); + +int vt_disallocate(const char *name); + +int resolve_dev_console(char **ret); +int get_kernel_consoles(char ***ret); +bool tty_is_vc(const char *tty); +bool tty_is_vc_resolve(const char *tty); +bool tty_is_console(const char *tty) _pure_; +int vtnr_from_tty(const char *tty); +const char *default_term_for_tty(const char *tty); + +int make_console_stdio(void); + +int fd_columns(int fd); +unsigned columns(void); +int fd_lines(int fd); +unsigned lines(void); + +void columns_lines_cache_reset(int _unused_ signum); +void reset_terminal_feature_caches(void); + +bool on_tty(void); +bool terminal_is_dumb(void); +ColorMode get_color_mode(void); +bool underline_enabled(void); +bool dev_console_colors_enabled(void); + +static inline bool colors_enabled(void) { + + /* Returns true if colors are considered supported on our stdout. */ + return get_color_mode() != COLOR_OFF; +} + +#define DEFINE_ANSI_FUNC(name, NAME) \ + static inline const char *ansi_##name(void) { \ + return colors_enabled() ? ANSI_##NAME : ""; \ + } + +#define DEFINE_ANSI_FUNC_256(name, NAME, FALLBACK) \ + static inline const char *ansi_##name(void) { \ + switch (get_color_mode()) { \ + case COLOR_OFF: return ""; \ + case COLOR_16: return ANSI_##FALLBACK; \ + default : return ANSI_##NAME; \ + } \ + } + +#define DEFINE_ANSI_FUNC_UNDERLINE(name, NAME) \ + static inline const char *ansi_##name(void) { \ + return underline_enabled() ? ANSI_##NAME ANSI_UNDERLINE : \ + colors_enabled() ? ANSI_##NAME : ""; \ + } + + +#define DEFINE_ANSI_FUNC_UNDERLINE_256(name, NAME, FALLBACK) \ + static inline const char *ansi_##name(void) { \ + switch (get_color_mode()) { \ + case COLOR_OFF: return ""; \ + case COLOR_16: return underline_enabled() ? ANSI_##FALLBACK ANSI_UNDERLINE : ANSI_##FALLBACK; \ + default : return underline_enabled() ? ANSI_##NAME ANSI_UNDERLINE: ANSI_##NAME; \ + } \ + } + +DEFINE_ANSI_FUNC(normal, NORMAL); +DEFINE_ANSI_FUNC(highlight, HIGHLIGHT); +DEFINE_ANSI_FUNC(black, BLACK); +DEFINE_ANSI_FUNC(red, RED); +DEFINE_ANSI_FUNC(green, GREEN); +DEFINE_ANSI_FUNC(yellow, YELLOW); +DEFINE_ANSI_FUNC(blue, BLUE); +DEFINE_ANSI_FUNC(magenta, MAGENTA); +DEFINE_ANSI_FUNC(cyan, CYAN); +DEFINE_ANSI_FUNC(white, WHITE); +DEFINE_ANSI_FUNC_256(grey, GREY, BRIGHT_BLACK); + +DEFINE_ANSI_FUNC(bright_black, BRIGHT_BLACK); +DEFINE_ANSI_FUNC(bright_red, BRIGHT_RED); +DEFINE_ANSI_FUNC(bright_green, BRIGHT_GREEN); +DEFINE_ANSI_FUNC(bright_yellow, BRIGHT_YELLOW); +DEFINE_ANSI_FUNC(bright_blue, BRIGHT_BLUE); +DEFINE_ANSI_FUNC(bright_magenta, BRIGHT_MAGENTA); +DEFINE_ANSI_FUNC(bright_cyan, BRIGHT_CYAN); +DEFINE_ANSI_FUNC(bright_white, BRIGHT_WHITE); + +DEFINE_ANSI_FUNC(highlight_black, HIGHLIGHT_BLACK); +DEFINE_ANSI_FUNC(highlight_red, HIGHLIGHT_RED); +DEFINE_ANSI_FUNC(highlight_green, HIGHLIGHT_GREEN); +DEFINE_ANSI_FUNC_256(highlight_yellow, HIGHLIGHT_YELLOW, HIGHLIGHT_YELLOW_FALLBACK); +DEFINE_ANSI_FUNC_256(highlight_yellow4, HIGHLIGHT_YELLOW4, HIGHLIGHT_YELLOW_FALLBACK); +DEFINE_ANSI_FUNC(highlight_blue, HIGHLIGHT_BLUE); +DEFINE_ANSI_FUNC(highlight_magenta, HIGHLIGHT_MAGENTA); +DEFINE_ANSI_FUNC(highlight_cyan, HIGHLIGHT_CYAN); +DEFINE_ANSI_FUNC_256(highlight_grey, HIGHLIGHT_GREY, HIGHLIGHT_GREY_FALLBACK); +DEFINE_ANSI_FUNC(highlight_white, HIGHLIGHT_WHITE); + +static inline const char* _ansi_highlight_yellow(void) { + return colors_enabled() ? _ANSI_HIGHLIGHT_YELLOW : ""; +} + +DEFINE_ANSI_FUNC_UNDERLINE(underline, NORMAL); +DEFINE_ANSI_FUNC_UNDERLINE(highlight_underline, HIGHLIGHT); +DEFINE_ANSI_FUNC_UNDERLINE_256(grey_underline, GREY, BRIGHT_BLACK); +DEFINE_ANSI_FUNC_UNDERLINE(highlight_red_underline, HIGHLIGHT_RED); +DEFINE_ANSI_FUNC_UNDERLINE(highlight_green_underline, HIGHLIGHT_GREEN); +DEFINE_ANSI_FUNC_UNDERLINE_256(highlight_yellow_underline, HIGHLIGHT_YELLOW, HIGHLIGHT_YELLOW_FALLBACK); +DEFINE_ANSI_FUNC_UNDERLINE(highlight_blue_underline, HIGHLIGHT_BLUE); +DEFINE_ANSI_FUNC_UNDERLINE(highlight_magenta_underline, HIGHLIGHT_MAGENTA); +DEFINE_ANSI_FUNC_UNDERLINE_256(highlight_grey_underline, HIGHLIGHT_GREY, HIGHLIGHT_GREY_FALLBACK); + +int get_ctty_devnr(pid_t pid, dev_t *d); +int get_ctty(pid_t, dev_t *_devnr, char **r); + +int getttyname_malloc(int fd, char **r); +int getttyname_harder(int fd, char **r); + +int ptsname_malloc(int fd, char **ret); + +int openpt_allocate(int flags, char **ret_slave); +int openpt_allocate_in_namespace(pid_t pid, int flags, char **ret_slave); +int open_terminal_in_namespace(pid_t pid, const char *name, int mode); + +int vt_default_utf8(void); +int vt_reset_keyboard(int fd); +int vt_restore(int fd); +int vt_release(int fd, bool restore_vt); + +void get_log_colors(int priority, const char **on, const char **off, const char **highlight); + +static inline const char* ansi_highlight_green_red(bool b) { + return b ? ansi_highlight_green() : ansi_highlight_red(); +} + +/* This assumes there is a 'tty' group */ +#define TTY_MODE 0620 diff --git a/src/basic/time-util.c b/src/basic/time-util.c new file mode 100644 index 0000000..817ac43 --- /dev/null +++ b/src/basic/time-util.c @@ -0,0 +1,1635 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "alloc-util.h" +#include "fd-util.h" +#include "fileio.h" +#include "fs-util.h" +#include "io-util.h" +#include "log.h" +#include "macro.h" +#include "missing_threads.h" +#include "missing_timerfd.h" +#include "parse-util.h" +#include "path-util.h" +#include "process-util.h" +#include "stat-util.h" +#include "string-table.h" +#include "string-util.h" +#include "strv.h" +#include "time-util.h" + +static clockid_t map_clock_id(clockid_t c) { + + /* Some more exotic archs (s390, ppc, …) lack the "ALARM" flavour of the clocks. Thus, + * clock_gettime() will fail for them. Since they are essentially the same as their non-ALARM + * pendants (their only difference is when timers are set on them), let's just map them + * accordingly. This way, we can get the correct time even on those archs. */ + + switch (c) { + + case CLOCK_BOOTTIME_ALARM: + return CLOCK_BOOTTIME; + + case CLOCK_REALTIME_ALARM: + return CLOCK_REALTIME; + + default: + return c; + } +} + +usec_t now(clockid_t clock_id) { + struct timespec ts; + + assert_se(clock_gettime(map_clock_id(clock_id), &ts) == 0); + + return timespec_load(&ts); +} + +nsec_t now_nsec(clockid_t clock_id) { + struct timespec ts; + + assert_se(clock_gettime(map_clock_id(clock_id), &ts) == 0); + + return timespec_load_nsec(&ts); +} + +dual_timestamp* dual_timestamp_get(dual_timestamp *ts) { + assert(ts); + + ts->realtime = now(CLOCK_REALTIME); + ts->monotonic = now(CLOCK_MONOTONIC); + + return ts; +} + +triple_timestamp* triple_timestamp_get(triple_timestamp *ts) { + assert(ts); + + ts->realtime = now(CLOCK_REALTIME); + ts->monotonic = now(CLOCK_MONOTONIC); + ts->boottime = now(CLOCK_BOOTTIME); + + return ts; +} + +static usec_t map_clock_usec_internal(usec_t from, usec_t from_base, usec_t to_base) { + + /* Maps the time 'from' between two clocks, based on a common reference point where the first clock + * is at 'from_base' and the second clock at 'to_base'. Basically calculates: + * + * from - from_base + to_base + * + * But takes care of overflows/underflows and avoids signed operations. */ + + if (from >= from_base) { /* In the future */ + usec_t delta = from - from_base; + + if (to_base >= USEC_INFINITY - delta) /* overflow? */ + return USEC_INFINITY; + + return to_base + delta; + + } else { /* In the past */ + usec_t delta = from_base - from; + + if (to_base <= delta) /* underflow? */ + return 0; + + return to_base - delta; + } +} + +usec_t map_clock_usec(usec_t from, clockid_t from_clock, clockid_t to_clock) { + + /* Try to avoid any inaccuracy needlessly added in case we convert from effectively the same clock + * onto itself */ + if (map_clock_id(from_clock) == map_clock_id(to_clock)) + return from; + + /* Keep infinity as is */ + if (from == USEC_INFINITY) + return from; + + return map_clock_usec_internal(from, now(from_clock), now(to_clock)); +} + +dual_timestamp* dual_timestamp_from_realtime(dual_timestamp *ts, usec_t u) { + assert(ts); + + if (!timestamp_is_set(u)) { + ts->realtime = ts->monotonic = u; + return ts; + } + + ts->realtime = u; + ts->monotonic = map_clock_usec(u, CLOCK_REALTIME, CLOCK_MONOTONIC); + return ts; +} + +triple_timestamp* triple_timestamp_from_realtime(triple_timestamp *ts, usec_t u) { + usec_t nowr; + + assert(ts); + + if (!timestamp_is_set(u)) { + ts->realtime = ts->monotonic = ts->boottime = u; + return ts; + } + + nowr = now(CLOCK_REALTIME); + + ts->realtime = u; + ts->monotonic = map_clock_usec_internal(u, nowr, now(CLOCK_MONOTONIC)); + ts->boottime = map_clock_usec_internal(u, nowr, now(CLOCK_BOOTTIME)); + + return ts; +} + +dual_timestamp* dual_timestamp_from_monotonic(dual_timestamp *ts, usec_t u) { + assert(ts); + + if (u == USEC_INFINITY) { + ts->realtime = ts->monotonic = USEC_INFINITY; + return ts; + } + + ts->monotonic = u; + ts->realtime = map_clock_usec(u, CLOCK_MONOTONIC, CLOCK_REALTIME); + return ts; +} + +dual_timestamp* dual_timestamp_from_boottime(dual_timestamp *ts, usec_t u) { + usec_t nowm; + + if (u == USEC_INFINITY) { + ts->realtime = ts->monotonic = USEC_INFINITY; + return ts; + } + + nowm = now(CLOCK_BOOTTIME); + ts->monotonic = map_clock_usec_internal(u, nowm, now(CLOCK_MONOTONIC)); + ts->realtime = map_clock_usec_internal(u, nowm, now(CLOCK_REALTIME)); + return ts; +} + +usec_t triple_timestamp_by_clock(triple_timestamp *ts, clockid_t clock) { + + switch (clock) { + + case CLOCK_REALTIME: + case CLOCK_REALTIME_ALARM: + return ts->realtime; + + case CLOCK_MONOTONIC: + return ts->monotonic; + + case CLOCK_BOOTTIME: + case CLOCK_BOOTTIME_ALARM: + return ts->boottime; + + default: + return USEC_INFINITY; + } +} + +usec_t timespec_load(const struct timespec *ts) { + assert(ts); + + if (ts->tv_sec < 0 || ts->tv_nsec < 0) + return USEC_INFINITY; + + if ((usec_t) ts->tv_sec > (UINT64_MAX - (ts->tv_nsec / NSEC_PER_USEC)) / USEC_PER_SEC) + return USEC_INFINITY; + + return + (usec_t) ts->tv_sec * USEC_PER_SEC + + (usec_t) ts->tv_nsec / NSEC_PER_USEC; +} + +nsec_t timespec_load_nsec(const struct timespec *ts) { + assert(ts); + + if (ts->tv_sec < 0 || ts->tv_nsec < 0) + return NSEC_INFINITY; + + if ((nsec_t) ts->tv_sec >= (UINT64_MAX - ts->tv_nsec) / NSEC_PER_SEC) + return NSEC_INFINITY; + + return (nsec_t) ts->tv_sec * NSEC_PER_SEC + (nsec_t) ts->tv_nsec; +} + +struct timespec *timespec_store(struct timespec *ts, usec_t u) { + assert(ts); + + if (u == USEC_INFINITY || + u / USEC_PER_SEC >= TIME_T_MAX) { + ts->tv_sec = (time_t) -1; + ts->tv_nsec = -1L; + return ts; + } + + ts->tv_sec = (time_t) (u / USEC_PER_SEC); + ts->tv_nsec = (long) ((u % USEC_PER_SEC) * NSEC_PER_USEC); + + return ts; +} + +struct timespec *timespec_store_nsec(struct timespec *ts, nsec_t n) { + assert(ts); + + if (n == NSEC_INFINITY || + n / NSEC_PER_SEC >= TIME_T_MAX) { + ts->tv_sec = (time_t) -1; + ts->tv_nsec = -1L; + return ts; + } + + ts->tv_sec = (time_t) (n / NSEC_PER_SEC); + ts->tv_nsec = (long) (n % NSEC_PER_SEC); + + return ts; +} + +usec_t timeval_load(const struct timeval *tv) { + assert(tv); + + if (tv->tv_sec < 0 || tv->tv_usec < 0) + return USEC_INFINITY; + + if ((usec_t) tv->tv_sec > (UINT64_MAX - tv->tv_usec) / USEC_PER_SEC) + return USEC_INFINITY; + + return + (usec_t) tv->tv_sec * USEC_PER_SEC + + (usec_t) tv->tv_usec; +} + +struct timeval *timeval_store(struct timeval *tv, usec_t u) { + assert(tv); + + if (u == USEC_INFINITY || + u / USEC_PER_SEC > TIME_T_MAX) { + tv->tv_sec = (time_t) -1; + tv->tv_usec = (suseconds_t) -1; + } else { + tv->tv_sec = (time_t) (u / USEC_PER_SEC); + tv->tv_usec = (suseconds_t) (u % USEC_PER_SEC); + } + + return tv; +} + +char *format_timestamp_style( + char *buf, + size_t l, + usec_t t, + TimestampStyle style) { + + /* The weekdays in non-localized (English) form. We use this instead of the localized form, so that + * our generated timestamps may be parsed with parse_timestamp(), and always read the same. */ + static const char * const weekdays[] = { + [0] = "Sun", + [1] = "Mon", + [2] = "Tue", + [3] = "Wed", + [4] = "Thu", + [5] = "Fri", + [6] = "Sat", + }; + + struct tm tm; + time_t sec; + size_t n; + bool utc = false, us = false; + int r; + + assert(buf); + + switch (style) { + case TIMESTAMP_PRETTY: + case TIMESTAMP_UNIX: + break; + case TIMESTAMP_US: + us = true; + break; + case TIMESTAMP_UTC: + utc = true; + break; + case TIMESTAMP_US_UTC: + us = true; + utc = true; + break; + default: + return NULL; + } + + if (l < (size_t) (3 + /* week day */ + 1 + 10 + /* space and date */ + 1 + 8 + /* space and time */ + (us ? 1 + 6 : 0) + /* "." and microsecond part */ + 1 + 1 + /* space and shortest possible zone */ + 1)) + return NULL; /* Not enough space even for the shortest form. */ + if (!timestamp_is_set(t)) + return NULL; /* Timestamp is unset */ + + if (style == TIMESTAMP_UNIX) { + r = snprintf(buf, l, "@" USEC_FMT, t / USEC_PER_SEC); /* round down µs → s */ + if (r < 0 || (size_t) r >= l) + return NULL; /* Doesn't fit */ + + return buf; + } + + /* Let's not format times with years > 9999 */ + if (t > USEC_TIMESTAMP_FORMATTABLE_MAX) { + assert(l >= STRLEN("--- XXXX-XX-XX XX:XX:XX") + 1); + strcpy(buf, "--- XXXX-XX-XX XX:XX:XX"); + return buf; + } + + sec = (time_t) (t / USEC_PER_SEC); /* Round down */ + + if (!localtime_or_gmtime_r(&sec, &tm, utc)) + return NULL; + + /* Start with the week day */ + assert((size_t) tm.tm_wday < ELEMENTSOF(weekdays)); + memcpy(buf, weekdays[tm.tm_wday], 4); + + /* Add the main components */ + if (strftime(buf + 3, l - 3, " %Y-%m-%d %H:%M:%S", &tm) <= 0) + return NULL; /* Doesn't fit */ + + /* Append the microseconds part, if that's requested */ + if (us) { + n = strlen(buf); + if (n + 8 > l) + return NULL; /* Microseconds part doesn't fit. */ + + sprintf(buf + n, ".%06"PRI_USEC, t % USEC_PER_SEC); + } + + /* Append the timezone */ + n = strlen(buf); + if (utc) { + /* If this is UTC then let's explicitly use the "UTC" string here, because gmtime_r() + * normally uses the obsolete "GMT" instead. */ + if (n + 5 > l) + return NULL; /* "UTC" doesn't fit. */ + + strcpy(buf + n, " UTC"); + + } else if (!isempty(tm.tm_zone)) { + size_t tn; + + /* An explicit timezone is specified, let's use it, if it fits */ + tn = strlen(tm.tm_zone); + if (n + 1 + tn + 1 > l) { + /* The full time zone does not fit in. Yuck. */ + + if (n + 1 + _POSIX_TZNAME_MAX + 1 > l) + return NULL; /* Not even enough space for the POSIX minimum (of 6)? In that + * case, complain that it doesn't fit. */ + + /* So the time zone doesn't fit in fully, but the caller passed enough space for the + * POSIX minimum time zone length. In this case suppress the timezone entirely, in + * order not to dump an overly long, hard to read string on the user. This should be + * safe, because the user will assume the local timezone anyway if none is shown. And + * so does parse_timestamp(). */ + } else { + buf[n++] = ' '; + strcpy(buf + n, tm.tm_zone); + } + } + + return buf; +} + +char *format_timestamp_relative(char *buf, size_t l, usec_t t) { + const char *s; + usec_t n, d; + + if (!timestamp_is_set(t)) + return NULL; + + n = now(CLOCK_REALTIME); + if (n > t) { + d = n - t; + s = "ago"; + } else { + d = t - n; + s = "left"; + } + + if (d >= USEC_PER_YEAR) { + usec_t years = d / USEC_PER_YEAR; + usec_t months = (d % USEC_PER_YEAR) / USEC_PER_MONTH; + + (void) snprintf(buf, l, USEC_FMT " %s " USEC_FMT " %s %s", + years, + years == 1 ? "year" : "years", + months, + months == 1 ? "month" : "months", + s); + } else if (d >= USEC_PER_MONTH) { + usec_t months = d / USEC_PER_MONTH; + usec_t days = (d % USEC_PER_MONTH) / USEC_PER_DAY; + + (void) snprintf(buf, l, USEC_FMT " %s " USEC_FMT " %s %s", + months, + months == 1 ? "month" : "months", + days, + days == 1 ? "day" : "days", + s); + } else if (d >= USEC_PER_WEEK) { + usec_t weeks = d / USEC_PER_WEEK; + usec_t days = (d % USEC_PER_WEEK) / USEC_PER_DAY; + + (void) snprintf(buf, l, USEC_FMT " %s " USEC_FMT " %s %s", + weeks, + weeks == 1 ? "week" : "weeks", + days, + days == 1 ? "day" : "days", + s); + } else if (d >= 2*USEC_PER_DAY) + (void) snprintf(buf, l, USEC_FMT " days %s", d / USEC_PER_DAY, s); + else if (d >= 25*USEC_PER_HOUR) + (void) snprintf(buf, l, "1 day " USEC_FMT "h %s", + (d - USEC_PER_DAY) / USEC_PER_HOUR, s); + else if (d >= 6*USEC_PER_HOUR) + (void) snprintf(buf, l, USEC_FMT "h %s", + d / USEC_PER_HOUR, s); + else if (d >= USEC_PER_HOUR) + (void) snprintf(buf, l, USEC_FMT "h " USEC_FMT "min %s", + d / USEC_PER_HOUR, + (d % USEC_PER_HOUR) / USEC_PER_MINUTE, s); + else if (d >= 5*USEC_PER_MINUTE) + (void) snprintf(buf, l, USEC_FMT "min %s", + d / USEC_PER_MINUTE, s); + else if (d >= USEC_PER_MINUTE) + (void) snprintf(buf, l, USEC_FMT "min " USEC_FMT "s %s", + d / USEC_PER_MINUTE, + (d % USEC_PER_MINUTE) / USEC_PER_SEC, s); + else if (d >= USEC_PER_SEC) + (void) snprintf(buf, l, USEC_FMT "s %s", + d / USEC_PER_SEC, s); + else if (d >= USEC_PER_MSEC) + (void) snprintf(buf, l, USEC_FMT "ms %s", + d / USEC_PER_MSEC, s); + else if (d > 0) + (void) snprintf(buf, l, USEC_FMT"us %s", + d, s); + else + (void) snprintf(buf, l, "now"); + + buf[l-1] = 0; + return buf; +} + +char *format_timespan(char *buf, size_t l, usec_t t, usec_t accuracy) { + static const struct { + const char *suffix; + usec_t usec; + } table[] = { + { "y", USEC_PER_YEAR }, + { "month", USEC_PER_MONTH }, + { "w", USEC_PER_WEEK }, + { "d", USEC_PER_DAY }, + { "h", USEC_PER_HOUR }, + { "min", USEC_PER_MINUTE }, + { "s", USEC_PER_SEC }, + { "ms", USEC_PER_MSEC }, + { "us", 1 }, + }; + + char *p = ASSERT_PTR(buf); + bool something = false; + + assert(l > 0); + + if (t == USEC_INFINITY) { + strncpy(p, "infinity", l-1); + p[l-1] = 0; + return p; + } + + if (t <= 0) { + strncpy(p, "0", l-1); + p[l-1] = 0; + return p; + } + + /* The result of this function can be parsed with parse_sec */ + + for (size_t i = 0; i < ELEMENTSOF(table); i++) { + int k = 0; + size_t n; + bool done = false; + usec_t a, b; + + if (t <= 0) + break; + + if (t < accuracy && something) + break; + + if (t < table[i].usec) + continue; + + if (l <= 1) + break; + + a = t / table[i].usec; + b = t % table[i].usec; + + /* Let's see if we should shows this in dot notation */ + if (t < USEC_PER_MINUTE && b > 0) { + signed char j = 0; + + for (usec_t cc = table[i].usec; cc > 1; cc /= 10) + j++; + + for (usec_t cc = accuracy; cc > 1; cc /= 10) { + b /= 10; + j--; + } + + if (j > 0) { + k = snprintf(p, l, + "%s"USEC_FMT".%0*"PRI_USEC"%s", + p > buf ? " " : "", + a, + j, + b, + table[i].suffix); + + t = 0; + done = true; + } + } + + /* No? Then let's show it normally */ + if (!done) { + k = snprintf(p, l, + "%s"USEC_FMT"%s", + p > buf ? " " : "", + a, + table[i].suffix); + + t = b; + } + + n = MIN((size_t) k, l-1); + + l -= n; + p += n; + + something = true; + } + + *p = 0; + + return buf; +} + +static int parse_timestamp_impl(const char *t, usec_t *usec, bool with_tz) { + static const struct { + const char *name; + const int nr; + } day_nr[] = { + { "Sunday", 0 }, + { "Sun", 0 }, + { "Monday", 1 }, + { "Mon", 1 }, + { "Tuesday", 2 }, + { "Tue", 2 }, + { "Wednesday", 3 }, + { "Wed", 3 }, + { "Thursday", 4 }, + { "Thu", 4 }, + { "Friday", 5 }, + { "Fri", 5 }, + { "Saturday", 6 }, + { "Sat", 6 }, + }; + + const char *k, *utc = NULL, *tzn = NULL; + struct tm tm, copy; + time_t x; + usec_t x_usec, plus = 0, minus = 0, ret; + int r, weekday = -1, dst = -1; + size_t i; + + /* Allowed syntaxes: + * + * 2012-09-22 16:34:22 + * 2012-09-22 16:34 (seconds will be set to 0) + * 2012-09-22 (time will be set to 00:00:00) + * 16:34:22 (date will be set to today) + * 16:34 (date will be set to today, seconds to 0) + * now + * yesterday (time is set to 00:00:00) + * today (time is set to 00:00:00) + * tomorrow (time is set to 00:00:00) + * +5min + * -5days + * @2147483647 (seconds since epoch) + */ + + assert(t); + + if (t[0] == '@' && !with_tz) + return parse_sec(t + 1, usec); + + ret = now(CLOCK_REALTIME); + + if (!with_tz) { + if (streq(t, "now")) + goto finish; + + else if (t[0] == '+') { + r = parse_sec(t+1, &plus); + if (r < 0) + return r; + + goto finish; + + } else if (t[0] == '-') { + r = parse_sec(t+1, &minus); + if (r < 0) + return r; + + goto finish; + + } else if ((k = endswith(t, " ago"))) { + t = strndupa_safe(t, k - t); + + r = parse_sec(t, &minus); + if (r < 0) + return r; + + goto finish; + + } else if ((k = endswith(t, " left"))) { + t = strndupa_safe(t, k - t); + + r = parse_sec(t, &plus); + if (r < 0) + return r; + + goto finish; + } + + /* See if the timestamp is suffixed with UTC */ + utc = endswith_no_case(t, " UTC"); + if (utc) + t = strndupa_safe(t, utc - t); + else { + const char *e = NULL; + int j; + + tzset(); + + /* See if the timestamp is suffixed by either the DST or non-DST local timezone. Note + * that we only support the local timezones here, nothing else. Not because we + * wouldn't want to, but simply because there are no nice APIs available to cover + * this. By accepting the local time zone strings, we make sure that all timestamps + * written by format_timestamp() can be parsed correctly, even though we don't + * support arbitrary timezone specifications. */ + + for (j = 0; j <= 1; j++) { + + if (isempty(tzname[j])) + continue; + + e = endswith_no_case(t, tzname[j]); + if (!e) + continue; + if (e == t) + continue; + if (e[-1] != ' ') + continue; + + break; + } + + if (IN_SET(j, 0, 1)) { + /* Found one of the two timezones specified. */ + t = strndupa_safe(t, e - t - 1); + dst = j; + tzn = tzname[j]; + } + } + } + + x = (time_t) (ret / USEC_PER_SEC); + x_usec = 0; + + if (!localtime_or_gmtime_r(&x, &tm, utc)) + return -EINVAL; + + tm.tm_isdst = dst; + if (!with_tz && tzn) + tm.tm_zone = tzn; + + if (streq(t, "today")) { + tm.tm_sec = tm.tm_min = tm.tm_hour = 0; + goto from_tm; + + } else if (streq(t, "yesterday")) { + tm.tm_mday--; + tm.tm_sec = tm.tm_min = tm.tm_hour = 0; + goto from_tm; + + } else if (streq(t, "tomorrow")) { + tm.tm_mday++; + tm.tm_sec = tm.tm_min = tm.tm_hour = 0; + goto from_tm; + } + + for (i = 0; i < ELEMENTSOF(day_nr); i++) { + size_t skip; + + if (!startswith_no_case(t, day_nr[i].name)) + continue; + + skip = strlen(day_nr[i].name); + if (t[skip] != ' ') + continue; + + weekday = day_nr[i].nr; + t += skip + 1; + break; + } + + copy = tm; + k = strptime(t, "%y-%m-%d %H:%M:%S", &tm); + if (k) { + if (*k == '.') + goto parse_usec; + else if (*k == 0) + goto from_tm; + } + + tm = copy; + k = strptime(t, "%Y-%m-%d %H:%M:%S", &tm); + if (k) { + if (*k == '.') + goto parse_usec; + else if (*k == 0) + goto from_tm; + } + + /* Support OUTPUT_SHORT and OUTPUT_SHORT_PRECISE formats */ + tm = copy; + k = strptime(t, "%b %d %H:%M:%S", &tm); + if (k) { + if (*k == '.') + goto parse_usec; + else if (*k == 0) + goto from_tm; + } + + tm = copy; + k = strptime(t, "%y-%m-%d %H:%M", &tm); + if (k && *k == 0) { + tm.tm_sec = 0; + goto from_tm; + } + + tm = copy; + k = strptime(t, "%Y-%m-%d %H:%M", &tm); + if (k && *k == 0) { + tm.tm_sec = 0; + goto from_tm; + } + + tm = copy; + k = strptime(t, "%y-%m-%d", &tm); + if (k && *k == 0) { + tm.tm_sec = tm.tm_min = tm.tm_hour = 0; + goto from_tm; + } + + tm = copy; + k = strptime(t, "%Y-%m-%d", &tm); + if (k && *k == 0) { + tm.tm_sec = tm.tm_min = tm.tm_hour = 0; + goto from_tm; + } + + tm = copy; + k = strptime(t, "%H:%M:%S", &tm); + if (k) { + if (*k == '.') + goto parse_usec; + else if (*k == 0) + goto from_tm; + } + + tm = copy; + k = strptime(t, "%H:%M", &tm); + if (k && *k == 0) { + tm.tm_sec = 0; + goto from_tm; + } + + return -EINVAL; + +parse_usec: + { + unsigned add; + + k++; + r = parse_fractional_part_u(&k, 6, &add); + if (r < 0) + return -EINVAL; + + if (*k) + return -EINVAL; + + x_usec = add; + } + +from_tm: + if (weekday >= 0 && tm.tm_wday != weekday) + return -EINVAL; + + x = mktime_or_timegm(&tm, utc); + if (x < 0) + return -EINVAL; + + ret = (usec_t) x * USEC_PER_SEC + x_usec; + if (ret > USEC_TIMESTAMP_FORMATTABLE_MAX) + return -EINVAL; + +finish: + if (ret + plus < ret) /* overflow? */ + return -EINVAL; + ret += plus; + if (ret > USEC_TIMESTAMP_FORMATTABLE_MAX) + return -EINVAL; + + if (ret >= minus) + ret -= minus; + else + return -EINVAL; + + if (usec) + *usec = ret; + return 0; +} + +typedef struct ParseTimestampResult { + usec_t usec; + int return_value; +} ParseTimestampResult; + +int parse_timestamp(const char *t, usec_t *usec) { + char *last_space, *tz = NULL; + ParseTimestampResult *shared, tmp; + int r; + + last_space = strrchr(t, ' '); + if (last_space != NULL && timezone_is_valid(last_space + 1, LOG_DEBUG)) + tz = last_space + 1; + + if (!tz || endswith_no_case(t, " UTC")) + return parse_timestamp_impl(t, usec, false); + + shared = mmap(NULL, sizeof *shared, PROT_READ|PROT_WRITE, MAP_SHARED|MAP_ANONYMOUS, -1, 0); + if (shared == MAP_FAILED) + return negative_errno(); + + r = safe_fork("(sd-timestamp)", FORK_RESET_SIGNALS|FORK_CLOSE_ALL_FDS|FORK_DEATHSIG|FORK_WAIT, NULL); + if (r < 0) { + (void) munmap(shared, sizeof *shared); + return r; + } + if (r == 0) { + bool with_tz = true; + char *colon_tz; + + /* tzset(3) says $TZ should be prefixed with ":" if we reference timezone files */ + colon_tz = strjoina(":", tz); + + if (setenv("TZ", colon_tz, 1) != 0) { + shared->return_value = negative_errno(); + _exit(EXIT_FAILURE); + } + + tzset(); + + /* If there is a timezone that matches the tzname fields, leave the parsing to the implementation. + * Otherwise just cut it off. */ + with_tz = !STR_IN_SET(tz, tzname[0], tzname[1]); + + /* Cut off the timezone if we don't need it. */ + if (with_tz) + t = strndupa_safe(t, last_space - t); + + shared->return_value = parse_timestamp_impl(t, &shared->usec, with_tz); + + _exit(EXIT_SUCCESS); + } + + tmp = *shared; + if (munmap(shared, sizeof *shared) != 0) + return negative_errno(); + + if (tmp.return_value == 0 && usec) + *usec = tmp.usec; + + return tmp.return_value; +} + +static const char* extract_multiplier(const char *p, usec_t *multiplier) { + static const struct { + const char *suffix; + usec_t usec; + } table[] = { + { "seconds", USEC_PER_SEC }, + { "second", USEC_PER_SEC }, + { "sec", USEC_PER_SEC }, + { "s", USEC_PER_SEC }, + { "minutes", USEC_PER_MINUTE }, + { "minute", USEC_PER_MINUTE }, + { "min", USEC_PER_MINUTE }, + { "months", USEC_PER_MONTH }, + { "month", USEC_PER_MONTH }, + { "M", USEC_PER_MONTH }, + { "msec", USEC_PER_MSEC }, + { "ms", USEC_PER_MSEC }, + { "m", USEC_PER_MINUTE }, + { "hours", USEC_PER_HOUR }, + { "hour", USEC_PER_HOUR }, + { "hr", USEC_PER_HOUR }, + { "h", USEC_PER_HOUR }, + { "days", USEC_PER_DAY }, + { "day", USEC_PER_DAY }, + { "d", USEC_PER_DAY }, + { "weeks", USEC_PER_WEEK }, + { "week", USEC_PER_WEEK }, + { "w", USEC_PER_WEEK }, + { "years", USEC_PER_YEAR }, + { "year", USEC_PER_YEAR }, + { "y", USEC_PER_YEAR }, + { "usec", 1ULL }, + { "us", 1ULL }, + { "μs", 1ULL }, /* U+03bc (aka GREEK SMALL LETTER MU) */ + { "µs", 1ULL }, /* U+b5 (aka MICRO SIGN) */ + }; + + for (size_t i = 0; i < ELEMENTSOF(table); i++) { + char *e; + + e = startswith(p, table[i].suffix); + if (e) { + *multiplier = table[i].usec; + return e; + } + } + + return p; +} + +int parse_time(const char *t, usec_t *usec, usec_t default_unit) { + const char *p, *s; + usec_t r = 0; + bool something = false; + + assert(t); + assert(default_unit > 0); + + p = t; + + p += strspn(p, WHITESPACE); + s = startswith(p, "infinity"); + if (s) { + s += strspn(s, WHITESPACE); + if (*s != 0) + return -EINVAL; + + if (usec) + *usec = USEC_INFINITY; + return 0; + } + + for (;;) { + usec_t multiplier = default_unit, k; + long long l; + char *e; + + p += strspn(p, WHITESPACE); + + if (*p == 0) { + if (!something) + return -EINVAL; + + break; + } + + if (*p == '-') /* Don't allow "-0" */ + return -ERANGE; + + errno = 0; + l = strtoll(p, &e, 10); + if (errno > 0) + return -errno; + if (l < 0) + return -ERANGE; + + if (*e == '.') { + p = e + 1; + p += strspn(p, DIGITS); + } else if (e == p) + return -EINVAL; + else + p = e; + + s = extract_multiplier(p + strspn(p, WHITESPACE), &multiplier); + if (s == p && *s != '\0') + /* Don't allow '12.34.56', but accept '12.34 .56' or '12.34s.56' */ + return -EINVAL; + + p = s; + + if ((usec_t) l >= USEC_INFINITY / multiplier) + return -ERANGE; + + k = (usec_t) l * multiplier; + if (k >= USEC_INFINITY - r) + return -ERANGE; + + r += k; + + something = true; + + if (*e == '.') { + usec_t m = multiplier / 10; + const char *b; + + for (b = e + 1; *b >= '0' && *b <= '9'; b++, m /= 10) { + k = (usec_t) (*b - '0') * m; + if (k >= USEC_INFINITY - r) + return -ERANGE; + + r += k; + } + + /* Don't allow "0.-0", "3.+1", "3. 1", "3.sec" or "3.hoge" */ + if (b == e + 1) + return -EINVAL; + } + } + + if (usec) + *usec = r; + return 0; +} + +int parse_sec(const char *t, usec_t *usec) { + return parse_time(t, usec, USEC_PER_SEC); +} + +int parse_sec_fix_0(const char *t, usec_t *ret) { + usec_t k; + int r; + + assert(t); + assert(ret); + + r = parse_sec(t, &k); + if (r < 0) + return r; + + *ret = k == 0 ? USEC_INFINITY : k; + return r; +} + +int parse_sec_def_infinity(const char *t, usec_t *ret) { + t += strspn(t, WHITESPACE); + if (isempty(t)) { + *ret = USEC_INFINITY; + return 0; + } + return parse_sec(t, ret); +} + +static const char* extract_nsec_multiplier(const char *p, nsec_t *multiplier) { + static const struct { + const char *suffix; + nsec_t nsec; + } table[] = { + { "seconds", NSEC_PER_SEC }, + { "second", NSEC_PER_SEC }, + { "sec", NSEC_PER_SEC }, + { "s", NSEC_PER_SEC }, + { "minutes", NSEC_PER_MINUTE }, + { "minute", NSEC_PER_MINUTE }, + { "min", NSEC_PER_MINUTE }, + { "months", NSEC_PER_MONTH }, + { "month", NSEC_PER_MONTH }, + { "M", NSEC_PER_MONTH }, + { "msec", NSEC_PER_MSEC }, + { "ms", NSEC_PER_MSEC }, + { "m", NSEC_PER_MINUTE }, + { "hours", NSEC_PER_HOUR }, + { "hour", NSEC_PER_HOUR }, + { "hr", NSEC_PER_HOUR }, + { "h", NSEC_PER_HOUR }, + { "days", NSEC_PER_DAY }, + { "day", NSEC_PER_DAY }, + { "d", NSEC_PER_DAY }, + { "weeks", NSEC_PER_WEEK }, + { "week", NSEC_PER_WEEK }, + { "w", NSEC_PER_WEEK }, + { "years", NSEC_PER_YEAR }, + { "year", NSEC_PER_YEAR }, + { "y", NSEC_PER_YEAR }, + { "usec", NSEC_PER_USEC }, + { "us", NSEC_PER_USEC }, + { "μs", NSEC_PER_USEC }, /* U+03bc (aka GREEK LETTER MU) */ + { "µs", NSEC_PER_USEC }, /* U+b5 (aka MICRO SIGN) */ + { "nsec", 1ULL }, + { "ns", 1ULL }, + { "", 1ULL }, /* default is nsec */ + }; + size_t i; + + for (i = 0; i < ELEMENTSOF(table); i++) { + char *e; + + e = startswith(p, table[i].suffix); + if (e) { + *multiplier = table[i].nsec; + return e; + } + } + + return p; +} + +int parse_nsec(const char *t, nsec_t *nsec) { + const char *p, *s; + nsec_t r = 0; + bool something = false; + + assert(t); + assert(nsec); + + p = t; + + p += strspn(p, WHITESPACE); + s = startswith(p, "infinity"); + if (s) { + s += strspn(s, WHITESPACE); + if (*s != 0) + return -EINVAL; + + *nsec = NSEC_INFINITY; + return 0; + } + + for (;;) { + nsec_t multiplier = 1, k; + long long l; + char *e; + + p += strspn(p, WHITESPACE); + + if (*p == 0) { + if (!something) + return -EINVAL; + + break; + } + + if (*p == '-') /* Don't allow "-0" */ + return -ERANGE; + + errno = 0; + l = strtoll(p, &e, 10); + if (errno > 0) + return -errno; + if (l < 0) + return -ERANGE; + + if (*e == '.') { + p = e + 1; + p += strspn(p, DIGITS); + } else if (e == p) + return -EINVAL; + else + p = e; + + s = extract_nsec_multiplier(p + strspn(p, WHITESPACE), &multiplier); + if (s == p && *s != '\0') + /* Don't allow '12.34.56', but accept '12.34 .56' or '12.34s.56' */ + return -EINVAL; + + p = s; + + if ((nsec_t) l >= NSEC_INFINITY / multiplier) + return -ERANGE; + + k = (nsec_t) l * multiplier; + if (k >= NSEC_INFINITY - r) + return -ERANGE; + + r += k; + + something = true; + + if (*e == '.') { + nsec_t m = multiplier / 10; + const char *b; + + for (b = e + 1; *b >= '0' && *b <= '9'; b++, m /= 10) { + k = (nsec_t) (*b - '0') * m; + if (k >= NSEC_INFINITY - r) + return -ERANGE; + + r += k; + } + + /* Don't allow "0.-0", "3.+1", "3. 1", "3.sec" or "3.hoge" */ + if (b == e + 1) + return -EINVAL; + } + } + + *nsec = r; + + return 0; +} + +static int get_timezones_from_zone1970_tab(char ***ret) { + _cleanup_fclose_ FILE *f = NULL; + _cleanup_strv_free_ char **zones = NULL; + int r; + + assert(ret); + + f = fopen("/usr/share/zoneinfo/zone1970.tab", "re"); + if (!f) + return -errno; + + for (;;) { + _cleanup_free_ char *line = NULL, *cc = NULL, *co = NULL, *tz = NULL; + + r = read_line(f, LONG_LINE_MAX, &line); + if (r < 0) + return r; + if (r == 0) + break; + + const char *p = line; + + /* Line format is: + * 'country codes' 'coordinates' 'timezone' 'comments' */ + r = extract_many_words(&p, NULL, 0, &cc, &co, &tz, NULL); + if (r < 0) + continue; + + /* Lines that start with # are comments. */ + if (*cc == '#') + continue; + + r = strv_extend(&zones, tz); + if (r < 0) + return r; + } + + *ret = TAKE_PTR(zones); + return 0; +} + +static int get_timezones_from_tzdata_zi(char ***ret) { + _cleanup_fclose_ FILE *f = NULL; + _cleanup_strv_free_ char **zones = NULL; + int r; + + f = fopen("/usr/share/zoneinfo/tzdata.zi", "re"); + if (!f) + return -errno; + + for (;;) { + _cleanup_free_ char *line = NULL, *type = NULL, *f1 = NULL, *f2 = NULL; + + r = read_line(f, LONG_LINE_MAX, &line); + if (r < 0) + return r; + if (r == 0) + break; + + const char *p = line; + + /* The only lines we care about are Zone and Link lines. + * Zone line format is: + * 'Zone' 'timezone' ... + * Link line format is: + * 'Link' 'target' 'alias' + * See 'man zic' for more detail. */ + r = extract_many_words(&p, NULL, 0, &type, &f1, &f2, NULL); + if (r < 0) + continue; + + char *tz; + if (IN_SET(*type, 'Z', 'z')) + /* Zone lines have timezone in field 1. */ + tz = f1; + else if (IN_SET(*type, 'L', 'l')) + /* Link lines have timezone in field 2. */ + tz = f2; + else + /* Not a line we care about. */ + continue; + + r = strv_extend(&zones, tz); + if (r < 0) + return r; + } + + *ret = TAKE_PTR(zones); + return 0; +} + +int get_timezones(char ***ret) { + _cleanup_strv_free_ char **zones = NULL; + int r; + + assert(ret); + + r = get_timezones_from_tzdata_zi(&zones); + if (r == -ENOENT) { + log_debug_errno(r, "Could not get timezone data from tzdata.zi, using zone1970.tab: %m"); + r = get_timezones_from_zone1970_tab(&zones); + if (r == -ENOENT) + log_debug_errno(r, "Could not get timezone data from zone1970.tab, using UTC: %m"); + } + if (r < 0 && r != -ENOENT) + return r; + + /* Always include UTC */ + r = strv_extend(&zones, "UTC"); + if (r < 0) + return -ENOMEM; + + strv_sort(zones); + strv_uniq(zones); + + *ret = TAKE_PTR(zones); + return 0; +} + +int verify_timezone(const char *name, int log_level) { + bool slash = false; + const char *p, *t; + _cleanup_close_ int fd = -1; + char buf[4]; + int r; + + if (isempty(name)) + return -EINVAL; + + /* Always accept "UTC" as valid timezone, since it's the fallback, even if user has no timezones installed. */ + if (streq(name, "UTC")) + return 0; + + if (name[0] == '/') + return -EINVAL; + + for (p = name; *p; p++) { + if (!ascii_isdigit(*p) && + !ascii_isalpha(*p) && + !IN_SET(*p, '-', '_', '+', '/')) + return -EINVAL; + + if (*p == '/') { + + if (slash) + return -EINVAL; + + slash = true; + } else + slash = false; + } + + if (slash) + return -EINVAL; + + if (p - name >= PATH_MAX) + return -ENAMETOOLONG; + + t = strjoina("/usr/share/zoneinfo/", name); + + fd = open(t, O_RDONLY|O_CLOEXEC); + if (fd < 0) + return log_full_errno(log_level, errno, "Failed to open timezone file '%s': %m", t); + + r = fd_verify_regular(fd); + if (r < 0) + return log_full_errno(log_level, r, "Timezone file '%s' is not a regular file: %m", t); + + r = loop_read_exact(fd, buf, 4, false); + if (r < 0) + return log_full_errno(log_level, r, "Failed to read from timezone file '%s': %m", t); + + /* Magic from tzfile(5) */ + if (memcmp(buf, "TZif", 4) != 0) + return log_full_errno(log_level, SYNTHETIC_ERRNO(EBADMSG), + "Timezone file '%s' has wrong magic bytes", t); + + return 0; +} + +bool clock_supported(clockid_t clock) { + struct timespec ts; + + switch (clock) { + + case CLOCK_MONOTONIC: + case CLOCK_REALTIME: + case CLOCK_BOOTTIME: + /* These three are always available in our baseline, and work in timerfd, as of kernel 3.15 */ + return true; + + default: + /* For everything else, check properly */ + return clock_gettime(clock, &ts) >= 0; + } +} + +int get_timezone(char **ret) { + _cleanup_free_ char *t = NULL; + const char *e; + char *z; + int r; + + r = readlink_malloc("/etc/localtime", &t); + if (r == -ENOENT) { + /* If the symlink does not exist, assume "UTC", like glibc does */ + z = strdup("UTC"); + if (!z) + return -ENOMEM; + + *ret = z; + return 0; + } + if (r < 0) + return r; /* returns EINVAL if not a symlink */ + + e = PATH_STARTSWITH_SET(t, "/usr/share/zoneinfo/", "../usr/share/zoneinfo/"); + if (!e) + return -EINVAL; + + if (!timezone_is_valid(e, LOG_DEBUG)) + return -EINVAL; + + z = strdup(e); + if (!z) + return -ENOMEM; + + *ret = z; + return 0; +} + +time_t mktime_or_timegm(struct tm *tm, bool utc) { + return utc ? timegm(tm) : mktime(tm); +} + +struct tm *localtime_or_gmtime_r(const time_t *t, struct tm *tm, bool utc) { + return utc ? gmtime_r(t, tm) : localtime_r(t, tm); +} + +static uint32_t sysconf_clock_ticks_cached(void) { + static thread_local uint32_t hz = 0; + long r; + + if (hz == 0) { + r = sysconf(_SC_CLK_TCK); + + assert(r > 0); + hz = r; + } + + return hz; +} + +uint32_t usec_to_jiffies(usec_t u) { + uint32_t hz = sysconf_clock_ticks_cached(); + return DIV_ROUND_UP(u, USEC_PER_SEC / hz); +} + +usec_t jiffies_to_usec(uint32_t j) { + uint32_t hz = sysconf_clock_ticks_cached(); + return DIV_ROUND_UP(j * USEC_PER_SEC, hz); +} + +usec_t usec_shift_clock(usec_t x, clockid_t from, clockid_t to) { + usec_t a, b; + + if (x == USEC_INFINITY) + return USEC_INFINITY; + if (map_clock_id(from) == map_clock_id(to)) + return x; + + a = now(from); + b = now(to); + + if (x > a) + /* x lies in the future */ + return usec_add(b, usec_sub_unsigned(x, a)); + else + /* x lies in the past */ + return usec_sub_unsigned(b, usec_sub_unsigned(a, x)); +} + +bool in_utc_timezone(void) { + tzset(); + + return timezone == 0 && daylight == 0; +} + +int time_change_fd(void) { + + /* We only care for the cancellation event, hence we set the timeout to the latest possible value. */ + static const struct itimerspec its = { + .it_value.tv_sec = TIME_T_MAX, + }; + + _cleanup_close_ int fd = -1; + + assert_cc(sizeof(time_t) == sizeof(TIME_T_MAX)); + + /* Uses TFD_TIMER_CANCEL_ON_SET to get notifications whenever CLOCK_REALTIME makes a jump relative to + * CLOCK_MONOTONIC. */ + + fd = timerfd_create(CLOCK_REALTIME, TFD_NONBLOCK|TFD_CLOEXEC); + if (fd < 0) + return -errno; + + if (timerfd_settime(fd, TFD_TIMER_ABSTIME|TFD_TIMER_CANCEL_ON_SET, &its, NULL) >= 0) + return TAKE_FD(fd); + + /* So apparently there are systems where time_t is 64bit, but the kernel actually doesn't support + * 64bit time_t. In that case configuring a timer to TIME_T_MAX will fail with EOPNOTSUPP or a + * similar error. If that's the case let's try with INT32_MAX instead, maybe that works. It's a bit + * of a black magic thing though, but what can we do? + * + * We don't want this code on x86-64, hence let's conditionalize this for systems with 64bit time_t + * but where "long" is shorter than 64bit, i.e. 32bit archs. + * + * See: https://github.com/systemd/systemd/issues/14362 */ + +#if SIZEOF_TIME_T == 8 && ULONG_MAX < UINT64_MAX + if (ERRNO_IS_NOT_SUPPORTED(errno) || errno == EOVERFLOW) { + static const struct itimerspec its32 = { + .it_value.tv_sec = INT32_MAX, + }; + + if (timerfd_settime(fd, TFD_TIMER_ABSTIME|TFD_TIMER_CANCEL_ON_SET, &its32, NULL) >= 0) + return TAKE_FD(fd); + } +#endif + + return -errno; +} + +static const char* const timestamp_style_table[_TIMESTAMP_STYLE_MAX] = { + [TIMESTAMP_PRETTY] = "pretty", + [TIMESTAMP_US] = "us", + [TIMESTAMP_UTC] = "utc", + [TIMESTAMP_US_UTC] = "us+utc", + [TIMESTAMP_UNIX] = "unix", +}; + +/* Use the macro for enum → string to allow for aliases */ +_DEFINE_STRING_TABLE_LOOKUP_TO_STRING(timestamp_style, TimestampStyle,); + +/* For the string → enum mapping we use the generic implementation, but also support two aliases */ +TimestampStyle timestamp_style_from_string(const char *s) { + TimestampStyle t; + + t = (TimestampStyle) string_table_lookup(timestamp_style_table, ELEMENTSOF(timestamp_style_table), s); + if (t >= 0) + return t; + if (STRPTR_IN_SET(s, "µs", "μs")) /* acccept both µ symbols in unicode, i.e. micro symbol + greek small letter mu. */ + return TIMESTAMP_US; + if (STRPTR_IN_SET(s, "µs+utc", "μs+utc")) + return TIMESTAMP_US_UTC; + return t; +} diff --git a/src/basic/time-util.h b/src/basic/time-util.h new file mode 100644 index 0000000..c98f95a --- /dev/null +++ b/src/basic/time-util.h @@ -0,0 +1,217 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +#include +#include +#include +#include +#include +#include + +typedef uint64_t usec_t; +typedef uint64_t nsec_t; + +#define PRI_NSEC PRIu64 +#define PRI_USEC PRIu64 +#define NSEC_FMT "%" PRI_NSEC +#define USEC_FMT "%" PRI_USEC + +#include "macro.h" + +typedef struct dual_timestamp { + usec_t realtime; + usec_t monotonic; +} dual_timestamp; + +typedef struct triple_timestamp { + usec_t realtime; + usec_t monotonic; + usec_t boottime; +} triple_timestamp; + +typedef enum TimestampStyle { + TIMESTAMP_PRETTY, + TIMESTAMP_US, + TIMESTAMP_UTC, + TIMESTAMP_US_UTC, + TIMESTAMP_UNIX, + _TIMESTAMP_STYLE_MAX, + _TIMESTAMP_STYLE_INVALID = -EINVAL, +} TimestampStyle; + +#define USEC_INFINITY ((usec_t) UINT64_MAX) +#define NSEC_INFINITY ((nsec_t) UINT64_MAX) + +#define MSEC_PER_SEC 1000ULL +#define USEC_PER_SEC ((usec_t) 1000000ULL) +#define USEC_PER_MSEC ((usec_t) 1000ULL) +#define NSEC_PER_SEC ((nsec_t) 1000000000ULL) +#define NSEC_PER_MSEC ((nsec_t) 1000000ULL) +#define NSEC_PER_USEC ((nsec_t) 1000ULL) + +#define USEC_PER_MINUTE ((usec_t) (60ULL*USEC_PER_SEC)) +#define NSEC_PER_MINUTE ((nsec_t) (60ULL*NSEC_PER_SEC)) +#define USEC_PER_HOUR ((usec_t) (60ULL*USEC_PER_MINUTE)) +#define NSEC_PER_HOUR ((nsec_t) (60ULL*NSEC_PER_MINUTE)) +#define USEC_PER_DAY ((usec_t) (24ULL*USEC_PER_HOUR)) +#define NSEC_PER_DAY ((nsec_t) (24ULL*NSEC_PER_HOUR)) +#define USEC_PER_WEEK ((usec_t) (7ULL*USEC_PER_DAY)) +#define NSEC_PER_WEEK ((nsec_t) (7ULL*NSEC_PER_DAY)) +#define USEC_PER_MONTH ((usec_t) (2629800ULL*USEC_PER_SEC)) +#define NSEC_PER_MONTH ((nsec_t) (2629800ULL*NSEC_PER_SEC)) +#define USEC_PER_YEAR ((usec_t) (31557600ULL*USEC_PER_SEC)) +#define NSEC_PER_YEAR ((nsec_t) (31557600ULL*NSEC_PER_SEC)) + +/* We assume a maximum timezone length of 6. TZNAME_MAX is not defined on Linux, but glibc internally initializes this + * to 6. Let's rely on that. */ +#define FORMAT_TIMESTAMP_MAX (3U+1U+10U+1U+8U+1U+6U+1U+6U+1U) +#define FORMAT_TIMESTAMP_WIDTH 28U /* when outputting, assume this width */ +#define FORMAT_TIMESTAMP_RELATIVE_MAX 256U +#define FORMAT_TIMESPAN_MAX 64U + +#define TIME_T_MAX (time_t)((UINTMAX_C(1) << ((sizeof(time_t) << 3) - 1)) - 1) + +#define DUAL_TIMESTAMP_NULL ((struct dual_timestamp) {}) +#define TRIPLE_TIMESTAMP_NULL ((struct triple_timestamp) {}) + +usec_t now(clockid_t clock); +nsec_t now_nsec(clockid_t clock); + +usec_t map_clock_usec(usec_t from, clockid_t from_clock, clockid_t to_clock); + +dual_timestamp* dual_timestamp_get(dual_timestamp *ts); +dual_timestamp* dual_timestamp_from_realtime(dual_timestamp *ts, usec_t u); +dual_timestamp* dual_timestamp_from_monotonic(dual_timestamp *ts, usec_t u); +dual_timestamp* dual_timestamp_from_boottime(dual_timestamp *ts, usec_t u); + +triple_timestamp* triple_timestamp_get(triple_timestamp *ts); +triple_timestamp* triple_timestamp_from_realtime(triple_timestamp *ts, usec_t u); + +#define DUAL_TIMESTAMP_HAS_CLOCK(clock) \ + IN_SET(clock, CLOCK_REALTIME, CLOCK_REALTIME_ALARM, CLOCK_MONOTONIC) + +#define TRIPLE_TIMESTAMP_HAS_CLOCK(clock) \ + IN_SET(clock, CLOCK_REALTIME, CLOCK_REALTIME_ALARM, CLOCK_MONOTONIC, CLOCK_BOOTTIME, CLOCK_BOOTTIME_ALARM) + +static inline bool timestamp_is_set(usec_t timestamp) { + return timestamp > 0 && timestamp != USEC_INFINITY; +} + +static inline bool dual_timestamp_is_set(const dual_timestamp *ts) { + return timestamp_is_set(ts->realtime) || + timestamp_is_set(ts->monotonic); +} + +static inline bool triple_timestamp_is_set(const triple_timestamp *ts) { + return timestamp_is_set(ts->realtime) || + timestamp_is_set(ts->monotonic) || + timestamp_is_set(ts->boottime); +} + +usec_t triple_timestamp_by_clock(triple_timestamp *ts, clockid_t clock); + +usec_t timespec_load(const struct timespec *ts) _pure_; +nsec_t timespec_load_nsec(const struct timespec *ts) _pure_; +struct timespec* timespec_store(struct timespec *ts, usec_t u); +struct timespec* timespec_store_nsec(struct timespec *ts, nsec_t n); + +#define TIMESPEC_STORE(u) timespec_store(&(struct timespec) {}, (u)) + +usec_t timeval_load(const struct timeval *tv) _pure_; +struct timeval* timeval_store(struct timeval *tv, usec_t u); + +#define TIMEVAL_STORE(u) timeval_store(&(struct timeval) {}, (u)) + +char* format_timestamp_style(char *buf, size_t l, usec_t t, TimestampStyle style) _warn_unused_result_; +char* format_timestamp_relative(char *buf, size_t l, usec_t t) _warn_unused_result_; +char* format_timespan(char *buf, size_t l, usec_t t, usec_t accuracy) _warn_unused_result_; + +_warn_unused_result_ +static inline char* format_timestamp(char *buf, size_t l, usec_t t) { + return format_timestamp_style(buf, l, t, TIMESTAMP_PRETTY); +} + +/* Note: the lifetime of the compound literal is the immediately surrounding block, + * see C11 §6.5.2.5, and + * https://stackoverflow.com/questions/34880638/compound-literal-lifetime-and-if-blocks */ +#define FORMAT_TIMESTAMP(t) format_timestamp((char[FORMAT_TIMESTAMP_MAX]){}, FORMAT_TIMESTAMP_MAX, t) +#define FORMAT_TIMESTAMP_RELATIVE(t) \ + format_timestamp_relative((char[FORMAT_TIMESTAMP_RELATIVE_MAX]){}, FORMAT_TIMESTAMP_RELATIVE_MAX, t) +#define FORMAT_TIMESPAN(t, accuracy) format_timespan((char[FORMAT_TIMESPAN_MAX]){}, FORMAT_TIMESPAN_MAX, t, accuracy) +#define FORMAT_TIMESTAMP_STYLE(t, style) \ + format_timestamp_style((char[FORMAT_TIMESTAMP_MAX]){}, FORMAT_TIMESTAMP_MAX, t, style) + +int parse_timestamp(const char *t, usec_t *usec); + +int parse_sec(const char *t, usec_t *usec); +int parse_sec_fix_0(const char *t, usec_t *usec); +int parse_sec_def_infinity(const char *t, usec_t *usec); +int parse_time(const char *t, usec_t *usec, usec_t default_unit); +int parse_nsec(const char *t, nsec_t *nsec); + +int get_timezones(char ***l); +int verify_timezone(const char *name, int log_level); +static inline bool timezone_is_valid(const char *name, int log_level) { + return verify_timezone(name, log_level) >= 0; +} + +bool clock_supported(clockid_t clock); + +usec_t usec_shift_clock(usec_t, clockid_t from, clockid_t to); + +int get_timezone(char **timezone); + +time_t mktime_or_timegm(struct tm *tm, bool utc); +struct tm *localtime_or_gmtime_r(const time_t *t, struct tm *tm, bool utc); + +uint32_t usec_to_jiffies(usec_t usec); +usec_t jiffies_to_usec(uint32_t jiffies); + +bool in_utc_timezone(void); + +static inline usec_t usec_add(usec_t a, usec_t b) { + /* Adds two time values, and makes sure USEC_INFINITY as input results as USEC_INFINITY in output, + * and doesn't overflow. */ + + if (a > USEC_INFINITY - b) /* overflow check */ + return USEC_INFINITY; + + return a + b; +} + +static inline usec_t usec_sub_unsigned(usec_t timestamp, usec_t delta) { + if (timestamp == USEC_INFINITY) /* Make sure infinity doesn't degrade */ + return USEC_INFINITY; + if (timestamp < delta) + return 0; + + return timestamp - delta; +} + +static inline usec_t usec_sub_signed(usec_t timestamp, int64_t delta) { + if (delta == INT64_MIN) { /* prevent overflow */ + assert_cc(-(INT64_MIN + 1) == INT64_MAX); + assert_cc(USEC_INFINITY > INT64_MAX); + return usec_add(timestamp, (usec_t) INT64_MAX + 1); + } + if (delta < 0) + return usec_add(timestamp, (usec_t) (-delta)); + + return usec_sub_unsigned(timestamp, (usec_t) delta); +} + +#if SIZEOF_TIME_T == 8 + /* The last second we can format is 31. Dec 9999, 1s before midnight, because otherwise we'd enter 5 digit + * year territory. However, since we want to stay away from this in all timezones we take one day off. */ +# define USEC_TIMESTAMP_FORMATTABLE_MAX ((usec_t) 253402214399000000) +#elif SIZEOF_TIME_T == 4 +/* With a 32bit time_t we can't go beyond 2038... */ +# define USEC_TIMESTAMP_FORMATTABLE_MAX ((usec_t) 2147483647000000) +#else +# error "Yuck, time_t is neither 4 nor 8 bytes wide?" +#endif + +int time_change_fd(void); + +const char* timestamp_style_to_string(TimestampStyle t) _const_; +TimestampStyle timestamp_style_from_string(const char *s) _pure_; diff --git a/src/basic/tmpfile-util.c b/src/basic/tmpfile-util.c new file mode 100644 index 0000000..34d3016 --- /dev/null +++ b/src/basic/tmpfile-util.c @@ -0,0 +1,360 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include + +#include "alloc-util.h" +#include "fd-util.h" +#include "fileio.h" +#include "fs-util.h" +#include "hexdecoct.h" +#include "macro.h" +#include "memfd-util.h" +#include "missing_fcntl.h" +#include "missing_syscall.h" +#include "path-util.h" +#include "process-util.h" +#include "random-util.h" +#include "stdio-util.h" +#include "string-util.h" +#include "tmpfile-util.h" +#include "umask-util.h" + +int fopen_temporary(const char *path, FILE **ret_f, char **ret_temp_path) { + _cleanup_fclose_ FILE *f = NULL; + _cleanup_free_ char *t = NULL; + _cleanup_close_ int fd = -1; + int r; + + if (path) { + r = tempfn_xxxxxx(path, NULL, &t); + if (r < 0) + return r; + } else { + const char *d; + + r = tmp_dir(&d); + if (r < 0) + return r; + + t = path_join(d, "XXXXXX"); + if (!t) + return -ENOMEM; + } + + fd = mkostemp_safe(t); + if (fd < 0) + return -errno; + + /* This assumes that returned FILE object is short-lived and used within the same single-threaded + * context and never shared externally, hence locking is not necessary. */ + + r = take_fdopen_unlocked(&fd, "w", &f); + if (r < 0) { + (void) unlink(t); + return r; + } + + if (ret_f) + *ret_f = TAKE_PTR(f); + + if (ret_temp_path) + *ret_temp_path = TAKE_PTR(t); + + return 0; +} + +/* This is much like mkostemp() but is subject to umask(). */ +int mkostemp_safe(char *pattern) { + assert(pattern); + BLOCK_WITH_UMASK(0077); + return RET_NERRNO(mkostemp(pattern, O_CLOEXEC)); +} + +int fmkostemp_safe(char *pattern, const char *mode, FILE **ret_f) { + _cleanup_close_ int fd = -1; + FILE *f; + + fd = mkostemp_safe(pattern); + if (fd < 0) + return fd; + + f = take_fdopen(&fd, mode); + if (!f) + return -errno; + + *ret_f = f; + return 0; +} + +static int tempfn_build(const char *p, const char *pre, const char *post, bool child, char **ret) { + _cleanup_free_ char *d = NULL, *fn = NULL, *nf = NULL, *result = NULL; + size_t len_pre, len_post, len_add; + int r; + + assert(p); + assert(ret); + + /* + * Turns this: + * /foo/bar/waldo + * + * Into this : + * /foo/bar/waldo/.#
 (child == true)
+         *         /foo/bar/.#
waldo (child == false)
+         */
+
+        if (pre && strchr(pre, '/'))
+                return -EINVAL;
+
+        if (post && strchr(post, '/'))
+                return -EINVAL;
+
+        len_pre = strlen_ptr(pre);
+        len_post = strlen_ptr(post);
+        /* NAME_MAX is counted *without* the trailing NUL byte. */
+        if (len_pre > NAME_MAX - STRLEN(".#") ||
+            len_post > NAME_MAX - STRLEN(".#") - len_pre)
+                return -EINVAL;
+
+        len_add = len_pre + len_post + STRLEN(".#");
+
+        if (child) {
+                d = strdup(p);
+                if (!d)
+                        return -ENOMEM;
+        } else {
+                r = path_extract_directory(p, &d);
+                if (r < 0 && r != -EDESTADDRREQ) /* EDESTADDRREQ → No directory specified, just a filename */
+                        return r;
+
+                r = path_extract_filename(p, &fn);
+                if (r < 0)
+                        return r;
+
+                if (strlen(fn) > NAME_MAX - len_add)
+                        /* We cannot simply prepend and append strings to the filename. Let's truncate the filename. */
+                        fn[NAME_MAX - len_add] = '\0';
+        }
+
+        nf = strjoin(".#", strempty(pre), strempty(fn), strempty(post));
+        if (!nf)
+                return -ENOMEM;
+
+        if (d) {
+                if (!path_extend(&d, nf))
+                        return -ENOMEM;
+
+                result = path_simplify(TAKE_PTR(d));
+        } else
+                result = TAKE_PTR(nf);
+
+        if (!path_is_valid(result)) /* New path is not valid? (Maybe because too long?) Refuse. */
+                return -EINVAL;
+
+        *ret = TAKE_PTR(result);
+        return 0;
+}
+
+int tempfn_xxxxxx(const char *p, const char *extra, char **ret) {
+        /*
+         * Turns this:
+         *         /foo/bar/waldo
+         *
+         * Into this:
+         *         /foo/bar/.#waldoXXXXXX
+         */
+
+        return tempfn_build(p, extra, "XXXXXX", /* child = */ false, ret);
+}
+
+int tempfn_random(const char *p, const char *extra, char **ret) {
+        _cleanup_free_ char *s = NULL;
+
+        assert(p);
+        assert(ret);
+
+        /*
+         * Turns this:
+         *         /foo/bar/waldo
+         *
+         * Into this:
+         *         /foo/bar/.#waldobaa2a261115984a9
+         */
+
+        if (asprintf(&s, "%016" PRIx64, random_u64()) < 0)
+                return -ENOMEM;
+
+        return tempfn_build(p, extra, s, /* child = */ false, ret);
+}
+
+int tempfn_random_child(const char *p, const char *extra, char **ret) {
+        _cleanup_free_ char *s = NULL;
+        int r;
+
+        assert(ret);
+
+        /* Turns this:
+         *         /foo/bar/waldo
+         * Into this:
+         *         /foo/bar/waldo/.#3c2b6219aa75d7d0
+         */
+
+        if (!p) {
+                r = tmp_dir(&p);
+                if (r < 0)
+                        return r;
+        }
+
+        if (asprintf(&s, "%016" PRIx64, random_u64()) < 0)
+                return -ENOMEM;
+
+        return tempfn_build(p, extra, s, /* child = */ true, ret);
+}
+
+int open_tmpfile_unlinkable(const char *directory, int flags) {
+        char *p;
+        int fd, r;
+
+        if (!directory) {
+                r = tmp_dir(&directory);
+                if (r < 0)
+                        return r;
+        } else if (isempty(directory))
+                return -EINVAL;
+
+        /* Returns an unlinked temporary file that cannot be linked into the file system anymore */
+
+        /* Try O_TMPFILE first, if it is supported */
+        fd = open(directory, flags|O_TMPFILE|O_EXCL, S_IRUSR|S_IWUSR);
+        if (fd >= 0)
+                return fd;
+
+        /* Fall back to unguessable name + unlinking */
+        p = strjoina(directory, "/systemd-tmp-XXXXXX");
+
+        fd = mkostemp_safe(p);
+        if (fd < 0)
+                return fd;
+
+        (void) unlink(p);
+
+        return fd;
+}
+
+int open_tmpfile_linkable(const char *target, int flags, char **ret_path) {
+        _cleanup_free_ char *tmp = NULL;
+        int r, fd;
+
+        assert(target);
+        assert(ret_path);
+
+        /* Don't allow O_EXCL, as that has a special meaning for O_TMPFILE */
+        assert((flags & O_EXCL) == 0);
+
+        /* Creates a temporary file, that shall be renamed to "target" later. If possible, this uses O_TMPFILE – in
+         * which case "ret_path" will be returned as NULL. If not possible the temporary path name used is returned in
+         * "ret_path". Use link_tmpfile() below to rename the result after writing the file in full. */
+
+        fd = open_parent(target, O_TMPFILE|flags, 0640);
+        if (fd >= 0) {
+                *ret_path = NULL;
+                return fd;
+        }
+
+        log_debug_errno(fd, "Failed to use O_TMPFILE for %s: %m", target);
+
+        r = tempfn_random(target, NULL, &tmp);
+        if (r < 0)
+                return r;
+
+        fd = open(tmp, O_CREAT|O_EXCL|O_NOFOLLOW|O_NOCTTY|flags, 0640);
+        if (fd < 0)
+                return -errno;
+
+        *ret_path = TAKE_PTR(tmp);
+
+        return fd;
+}
+
+int fopen_tmpfile_linkable(const char *target, int flags, char **ret_path, FILE **ret_file) {
+        _cleanup_free_ char *path = NULL;
+        _cleanup_fclose_ FILE *f = NULL;
+        _cleanup_close_ int fd = -1;
+
+        assert(target);
+        assert(ret_file);
+        assert(ret_path);
+
+        fd = open_tmpfile_linkable(target, flags, &path);
+        if (fd < 0)
+                return fd;
+
+        f = take_fdopen(&fd, "w");
+        if (!f)
+                return -ENOMEM;
+
+        *ret_path = TAKE_PTR(path);
+        *ret_file = TAKE_PTR(f);
+        return 0;
+}
+
+int link_tmpfile(int fd, const char *path, const char *target) {
+        assert(fd >= 0);
+        assert(target);
+
+        /* Moves a temporary file created with open_tmpfile() above into its final place. if "path" is NULL an fd
+         * created with O_TMPFILE is assumed, and linkat() is used. Otherwise it is assumed O_TMPFILE is not supported
+         * on the directory, and renameat2() is used instead.
+         *
+         * Note that in both cases we will not replace existing files. This is because linkat() does not support this
+         * operation currently (renameat2() does), and there is no nice way to emulate this. */
+
+        if (path)
+                return rename_noreplace(AT_FDCWD, path, AT_FDCWD, target);
+
+        return RET_NERRNO(linkat(AT_FDCWD, FORMAT_PROC_FD_PATH(fd), AT_FDCWD, target, AT_SYMLINK_FOLLOW));
+}
+
+int flink_tmpfile(FILE *f, const char *path, const char *target) {
+        int fd, r;
+
+        assert(f);
+        assert(target);
+
+        fd = fileno(f);
+        if (fd < 0) /* Not all FILE* objects encapsulate fds */
+                return -EBADF;
+
+        r = fflush_sync_and_check(f);
+        if (r < 0)
+                return r;
+
+        return link_tmpfile(fd, path, target);
+}
+
+int mkdtemp_malloc(const char *template, char **ret) {
+        _cleanup_free_ char *p = NULL;
+        int r;
+
+        assert(ret);
+
+        if (template)
+                p = strdup(template);
+        else {
+                const char *tmp;
+
+                r = tmp_dir(&tmp);
+                if (r < 0)
+                        return r;
+
+                p = path_join(tmp, "XXXXXX");
+        }
+        if (!p)
+                return -ENOMEM;
+
+        if (!mkdtemp(p))
+                return -errno;
+
+        *ret = TAKE_PTR(p);
+        return 0;
+}
diff --git a/src/basic/tmpfile-util.h b/src/basic/tmpfile-util.h
new file mode 100644
index 0000000..610cbaf
--- /dev/null
+++ b/src/basic/tmpfile-util.h
@@ -0,0 +1,21 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include 
+
+int fopen_temporary(const char *path, FILE **_f, char **_temp_path);
+int mkostemp_safe(char *pattern);
+int fmkostemp_safe(char *pattern, const char *mode, FILE**_f);
+
+int tempfn_xxxxxx(const char *p, const char *extra, char **ret);
+int tempfn_random(const char *p, const char *extra, char **ret);
+int tempfn_random_child(const char *p, const char *extra, char **ret);
+
+int open_tmpfile_unlinkable(const char *directory, int flags);
+int open_tmpfile_linkable(const char *target, int flags, char **ret_path);
+int fopen_tmpfile_linkable(const char *target, int flags, char **ret_path, FILE **ret_file);
+
+int link_tmpfile(int fd, const char *path, const char *target);
+int flink_tmpfile(FILE *f, const char *path, const char *target);
+
+int mkdtemp_malloc(const char *template, char **ret);
diff --git a/src/basic/uid-range.c b/src/basic/uid-range.c
new file mode 100644
index 0000000..8463599
--- /dev/null
+++ b/src/basic/uid-range.c
@@ -0,0 +1,237 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include 
+#include 
+#include 
+
+#include "alloc-util.h"
+#include "errno-util.h"
+#include "fd-util.h"
+#include "format-util.h"
+#include "macro.h"
+#include "path-util.h"
+#include "sort-util.h"
+#include "stat-util.h"
+#include "uid-range.h"
+#include "user-util.h"
+
+UidRange *uid_range_free(UidRange *range) {
+        if (!range)
+                return NULL;
+
+        free(range->entries);
+        return mfree(range);
+}
+
+static bool uid_range_entry_intersect(const UidRangeEntry *a, const UidRangeEntry *b) {
+        assert(a);
+        assert(b);
+
+        return a->start <= b->start + b->nr && a->start + a->nr >= b->start;
+}
+
+static int uid_range_entry_compare(const UidRangeEntry *a, const UidRangeEntry *b) {
+        int r;
+
+        assert(a);
+        assert(b);
+
+        r = CMP(a->start, b->start);
+        if (r != 0)
+                return r;
+
+        return CMP(a->nr, b->nr);
+}
+
+static void uid_range_coalesce(UidRange *range) {
+        assert(range);
+
+        if (range->n_entries <= 0)
+                return;
+
+        typesafe_qsort(range->entries, range->n_entries, uid_range_entry_compare);
+
+        for (size_t i = 0; i < range->n_entries; i++) {
+                UidRangeEntry *x = range->entries + i;
+
+                for (size_t j = i + 1; j < range->n_entries; j++) {
+                        UidRangeEntry *y = range->entries + j;
+                        uid_t begin, end;
+
+                        if (!uid_range_entry_intersect(x, y))
+                                break;
+
+                        begin = MIN(x->start, y->start);
+                        end = MAX(x->start + x->nr, y->start + y->nr);
+
+                        x->start = begin;
+                        x->nr = end - begin;
+
+                        if (range->n_entries > j + 1)
+                                memmove(y, y + 1, sizeof(UidRangeEntry) * (range->n_entries - j - 1));
+
+                        range->n_entries--;
+                        j--;
+                }
+        }
+}
+
+int uid_range_add_internal(UidRange **range, uid_t start, uid_t nr, bool coalesce) {
+        _cleanup_(uid_range_freep) UidRange *range_new = NULL;
+        UidRange *p;
+
+        assert(range);
+
+        if (nr <= 0)
+                return 0;
+
+        if (start > UINT32_MAX - nr) /* overflow check */
+                return -ERANGE;
+
+        if (*range)
+                p = *range;
+        else {
+                range_new = new0(UidRange, 1);
+                if (!range_new)
+                        return -ENOMEM;
+
+                p = range_new;
+        }
+
+        if (!GREEDY_REALLOC(p->entries, p->n_entries + 1))
+                return -ENOMEM;
+
+        p->entries[p->n_entries++] = (UidRangeEntry) {
+                .start = start,
+                .nr = nr,
+        };
+
+        if (coalesce)
+                uid_range_coalesce(p);
+
+        TAKE_PTR(range_new);
+        *range = p;
+
+        return 0;
+}
+
+int uid_range_add_str(UidRange **range, const char *s) {
+        uid_t start, end;
+        int r;
+
+        assert(range);
+        assert(s);
+
+        r = parse_uid_range(s, &start, &end);
+        if (r < 0)
+                return r;
+
+        return uid_range_add_internal(range, start, end - start + 1, /* coalesce = */ true);
+}
+
+int uid_range_next_lower(const UidRange *range, uid_t *uid) {
+        uid_t closest = UID_INVALID, candidate;
+
+        assert(range);
+        assert(uid);
+
+        if (*uid == 0)
+                return -EBUSY;
+
+        candidate = *uid - 1;
+
+        for (size_t i = 0; i < range->n_entries; i++) {
+                uid_t begin, end;
+
+                begin = range->entries[i].start;
+                end = range->entries[i].start + range->entries[i].nr - 1;
+
+                if (candidate >= begin && candidate <= end) {
+                        *uid = candidate;
+                        return 1;
+                }
+
+                if (end < candidate)
+                        closest = end;
+        }
+
+        if (closest == UID_INVALID)
+                return -EBUSY;
+
+        *uid = closest;
+        return 1;
+}
+
+bool uid_range_covers(const UidRange *range, uid_t start, uid_t nr) {
+        if (nr == 0) /* empty range? always covered... */
+                return true;
+
+        if (start > UINT32_MAX - nr) /* range overflows? definitely not covered... */
+                return false;
+
+        if (!range)
+                return false;
+
+        for (size_t i = 0; i < range->n_entries; i++)
+                if (start >= range->entries[i].start &&
+                    start + nr <= range->entries[i].start + range->entries[i].nr)
+                        return true;
+
+        return false;
+}
+
+int uid_range_load_userns(UidRange **ret, const char *path) {
+        _cleanup_(uid_range_freep) UidRange *range = NULL;
+        _cleanup_fclose_ FILE *f = NULL;
+        int r;
+
+        /* If 'path' is NULL loads the UID range of the userns namespace we run. Otherwise load the data from
+         * the specified file (which can be either uid_map or gid_map, in case caller needs to deal with GID
+         * maps).
+         *
+         * To simplify things this will modify the passed array in case of later failure. */
+
+        assert(ret);
+
+        if (!path)
+                path = "/proc/self/uid_map";
+
+        f = fopen(path, "re");
+        if (!f) {
+                r = -errno;
+
+                if (r == -ENOENT && path_startswith(path, "/proc/"))
+                        return proc_mounted() > 0 ? -EOPNOTSUPP : -ENOSYS;
+
+                return r;
+        }
+
+        range = new0(UidRange, 1);
+        if (!range)
+                return -ENOMEM;
+
+        for (;;) {
+                uid_t uid_base, uid_shift, uid_range;
+                int k;
+
+                errno = 0;
+                k = fscanf(f, UID_FMT " " UID_FMT " " UID_FMT "\n", &uid_base, &uid_shift, &uid_range);
+                if (k == EOF) {
+                        if (ferror(f))
+                                return errno_or_else(EIO);
+
+                        break;
+                }
+                if (k != 3)
+                        return -EBADMSG;
+
+                r = uid_range_add_internal(&range, uid_base, uid_range, /* coalesce = */ false);
+                if (r < 0)
+                        return r;
+        }
+
+        uid_range_coalesce(range);
+
+        *ret = TAKE_PTR(range);
+        return 0;
+}
diff --git a/src/basic/uid-range.h b/src/basic/uid-range.h
new file mode 100644
index 0000000..461a511
--- /dev/null
+++ b/src/basic/uid-range.h
@@ -0,0 +1,34 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include 
+#include 
+
+#include "macro.h"
+
+typedef struct UidRangeEntry {
+        uid_t start, nr;
+} UidRangeEntry;
+
+typedef struct UidRange {
+        UidRangeEntry *entries;
+        size_t n_entries;
+} UidRange;
+
+UidRange *uid_range_free(UidRange *range);
+DEFINE_TRIVIAL_CLEANUP_FUNC(UidRange*, uid_range_free);
+
+int uid_range_add_internal(UidRange **range, uid_t start, uid_t nr, bool coalesce);
+static inline int uid_range_add(UidRange **range, uid_t start, uid_t nr) {
+        return uid_range_add_internal(range, start, nr, true);
+}
+int uid_range_add_str(UidRange **range, const char *s);
+
+int uid_range_next_lower(const UidRange *range, uid_t *uid);
+
+bool uid_range_covers(const UidRange *range, uid_t start, uid_t nr);
+static inline bool uid_range_contains(const UidRange *range, uid_t uid) {
+        return uid_range_covers(range, uid, 1);
+}
+
+int uid_range_load_userns(UidRange **ret, const char *path);
diff --git a/src/basic/umask-util.h b/src/basic/umask-util.h
new file mode 100644
index 0000000..90d18f7
--- /dev/null
+++ b/src/basic/umask-util.h
@@ -0,0 +1,29 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include 
+#include 
+#include 
+
+#include "macro.h"
+
+static inline void umaskp(mode_t *u) {
+        umask(*u & 0777);
+}
+
+#define _cleanup_umask_ _cleanup_(umaskp)
+
+/* We make use of the fact here that the umask() concept is using only the lower 9 bits of mode_t, although
+ * mode_t has space for the file type in the bits further up. We simply OR in the file type mask S_IFMT to
+ * distinguish the first and the second iteration of the RUN_WITH_UMASK() loop, so that we can run the first
+ * one, and exit on the second. */
+
+assert_cc((S_IFMT & 0777) == 0);
+
+#define RUN_WITH_UMASK(mask)                                            \
+        for (_cleanup_umask_ mode_t _saved_umask_ = umask(mask) | S_IFMT; \
+             FLAGS_SET(_saved_umask_, S_IFMT);                          \
+             _saved_umask_ &= 0777)
+
+#define BLOCK_WITH_UMASK(mask) \
+        _unused_ _cleanup_umask_ mode_t _saved_umask_ = umask(mask);
diff --git a/src/basic/unaligned.h b/src/basic/unaligned.h
new file mode 100644
index 0000000..4100be0
--- /dev/null
+++ b/src/basic/unaligned.h
@@ -0,0 +1,99 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include 
+#include 
+
+/* BE */
+
+static inline uint16_t unaligned_read_be16(const void *_u) {
+        const struct __attribute__((__packed__, __may_alias__)) { uint16_t x; } *u = _u;
+
+        return be16toh(u->x);
+}
+
+static inline uint32_t unaligned_read_be32(const void *_u) {
+        const struct __attribute__((__packed__, __may_alias__)) { uint32_t x; } *u = _u;
+
+        return be32toh(u->x);
+}
+
+static inline uint64_t unaligned_read_be64(const void *_u) {
+        const struct __attribute__((__packed__, __may_alias__)) { uint64_t x; } *u = _u;
+
+        return be64toh(u->x);
+}
+
+static inline void unaligned_write_be16(void *_u, uint16_t a) {
+        struct __attribute__((__packed__, __may_alias__)) { uint16_t x; } *u = _u;
+
+        u->x = be16toh(a);
+}
+
+static inline void unaligned_write_be32(void *_u, uint32_t a) {
+        struct __attribute__((__packed__, __may_alias__)) { uint32_t x; } *u = _u;
+
+        u->x = be32toh(a);
+}
+
+static inline void unaligned_write_be64(void *_u, uint64_t a) {
+        struct __attribute__((__packed__, __may_alias__)) { uint64_t x; } *u = _u;
+
+        u->x = be64toh(a);
+}
+
+/* LE */
+
+static inline uint16_t unaligned_read_le16(const void *_u) {
+        const struct __attribute__((__packed__, __may_alias__)) { uint16_t x; } *u = _u;
+
+        return le16toh(u->x);
+}
+
+static inline uint32_t unaligned_read_le32(const void *_u) {
+        const struct __attribute__((__packed__, __may_alias__)) { uint32_t x; } *u = _u;
+
+        return le32toh(u->x);
+}
+
+static inline uint64_t unaligned_read_le64(const void *_u) {
+        const struct __attribute__((__packed__, __may_alias__)) { uint64_t x; } *u = _u;
+
+        return le64toh(u->x);
+}
+
+static inline void unaligned_write_le16(void *_u, uint16_t a) {
+        struct __attribute__((__packed__, __may_alias__)) { uint16_t x; } *u = _u;
+
+        u->x = le16toh(a);
+}
+
+static inline void unaligned_write_le32(void *_u, uint32_t a) {
+        struct __attribute__((__packed__, __may_alias__)) { uint32_t x; } *u = _u;
+
+        u->x = le32toh(a);
+}
+
+static inline void unaligned_write_le64(void *_u, uint64_t a) {
+        struct __attribute__((__packed__, __may_alias__)) { uint64_t x; } *u = _u;
+
+        u->x = le64toh(a);
+}
+
+#if __BYTE_ORDER == __BIG_ENDIAN
+#define unaligned_read_ne16 unaligned_read_be16
+#define unaligned_read_ne32 unaligned_read_be32
+#define unaligned_read_ne64 unaligned_read_be64
+
+#define unaligned_write_ne16 unaligned_write_be16
+#define unaligned_write_ne32 unaligned_write_be32
+#define unaligned_write_ne64 unaligned_write_be64
+#else
+#define unaligned_read_ne16 unaligned_read_le16
+#define unaligned_read_ne32 unaligned_read_le32
+#define unaligned_read_ne64 unaligned_read_le64
+
+#define unaligned_write_ne16 unaligned_write_le16
+#define unaligned_write_ne32 unaligned_write_le32
+#define unaligned_write_ne64 unaligned_write_le64
+#endif
diff --git a/src/basic/unit-def.c b/src/basic/unit-def.c
new file mode 100644
index 0000000..94cd603
--- /dev/null
+++ b/src/basic/unit-def.c
@@ -0,0 +1,324 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "alloc-util.h"
+#include "bus-label.h"
+#include "string-table.h"
+#include "unit-def.h"
+#include "unit-name.h"
+
+char *unit_dbus_path_from_name(const char *name) {
+        _cleanup_free_ char *e = NULL;
+
+        assert(name);
+
+        e = bus_label_escape(name);
+        if (!e)
+                return NULL;
+
+        return strjoin("/org/freedesktop/systemd1/unit/", e);
+}
+
+int unit_name_from_dbus_path(const char *path, char **name) {
+        const char *e;
+        char *n;
+
+        e = startswith(path, "/org/freedesktop/systemd1/unit/");
+        if (!e)
+                return -EINVAL;
+
+        n = bus_label_unescape(e);
+        if (!n)
+                return -ENOMEM;
+
+        *name = n;
+        return 0;
+}
+
+const char* unit_dbus_interface_from_type(UnitType t) {
+
+        static const char *const table[_UNIT_TYPE_MAX] = {
+                [UNIT_SERVICE]   = "org.freedesktop.systemd1.Service",
+                [UNIT_SOCKET]    = "org.freedesktop.systemd1.Socket",
+                [UNIT_TARGET]    = "org.freedesktop.systemd1.Target",
+                [UNIT_DEVICE]    = "org.freedesktop.systemd1.Device",
+                [UNIT_MOUNT]     = "org.freedesktop.systemd1.Mount",
+                [UNIT_AUTOMOUNT] = "org.freedesktop.systemd1.Automount",
+                [UNIT_SWAP]      = "org.freedesktop.systemd1.Swap",
+                [UNIT_TIMER]     = "org.freedesktop.systemd1.Timer",
+                [UNIT_PATH]      = "org.freedesktop.systemd1.Path",
+                [UNIT_SLICE]     = "org.freedesktop.systemd1.Slice",
+                [UNIT_SCOPE]     = "org.freedesktop.systemd1.Scope",
+        };
+
+        if (t < 0)
+                return NULL;
+        if (t >= _UNIT_TYPE_MAX)
+                return NULL;
+
+        return table[t];
+}
+
+const char *unit_dbus_interface_from_name(const char *name) {
+        UnitType t;
+
+        t = unit_name_to_type(name);
+        if (t < 0)
+                return NULL;
+
+        return unit_dbus_interface_from_type(t);
+}
+
+static const char* const unit_type_table[_UNIT_TYPE_MAX] = {
+        [UNIT_SERVICE]   = "service",
+        [UNIT_SOCKET]    = "socket",
+        [UNIT_TARGET]    = "target",
+        [UNIT_DEVICE]    = "device",
+        [UNIT_MOUNT]     = "mount",
+        [UNIT_AUTOMOUNT] = "automount",
+        [UNIT_SWAP]      = "swap",
+        [UNIT_TIMER]     = "timer",
+        [UNIT_PATH]      = "path",
+        [UNIT_SLICE]     = "slice",
+        [UNIT_SCOPE]     = "scope",
+};
+
+DEFINE_STRING_TABLE_LOOKUP(unit_type, UnitType);
+
+static const char* const unit_load_state_table[_UNIT_LOAD_STATE_MAX] = {
+        [UNIT_STUB]        = "stub",
+        [UNIT_LOADED]      = "loaded",
+        [UNIT_NOT_FOUND]   = "not-found",
+        [UNIT_BAD_SETTING] = "bad-setting",
+        [UNIT_ERROR]       = "error",
+        [UNIT_MERGED]      = "merged",
+        [UNIT_MASKED]      = "masked"
+};
+
+DEFINE_STRING_TABLE_LOOKUP(unit_load_state, UnitLoadState);
+
+static const char* const unit_active_state_table[_UNIT_ACTIVE_STATE_MAX] = {
+        [UNIT_ACTIVE]       = "active",
+        [UNIT_RELOADING]    = "reloading",
+        [UNIT_INACTIVE]     = "inactive",
+        [UNIT_FAILED]       = "failed",
+        [UNIT_ACTIVATING]   = "activating",
+        [UNIT_DEACTIVATING] = "deactivating",
+        [UNIT_MAINTENANCE]  = "maintenance",
+};
+
+DEFINE_STRING_TABLE_LOOKUP(unit_active_state, UnitActiveState);
+
+static const char* const freezer_state_table[_FREEZER_STATE_MAX] = {
+        [FREEZER_RUNNING]  = "running",
+        [FREEZER_FREEZING] = "freezing",
+        [FREEZER_FROZEN]   = "frozen",
+        [FREEZER_THAWING]  = "thawing",
+};
+
+DEFINE_STRING_TABLE_LOOKUP(freezer_state, FreezerState);
+
+static const char* const unit_marker_table[_UNIT_MARKER_MAX] = {
+        [UNIT_MARKER_NEEDS_RELOAD]  = "needs-reload",
+        [UNIT_MARKER_NEEDS_RESTART] = "needs-restart",
+};
+
+DEFINE_STRING_TABLE_LOOKUP(unit_marker, UnitMarker);
+
+static const char* const automount_state_table[_AUTOMOUNT_STATE_MAX] = {
+        [AUTOMOUNT_DEAD]    = "dead",
+        [AUTOMOUNT_WAITING] = "waiting",
+        [AUTOMOUNT_RUNNING] = "running",
+        [AUTOMOUNT_FAILED]  = "failed"
+};
+
+DEFINE_STRING_TABLE_LOOKUP(automount_state, AutomountState);
+
+static const char* const device_state_table[_DEVICE_STATE_MAX] = {
+        [DEVICE_DEAD]      = "dead",
+        [DEVICE_TENTATIVE] = "tentative",
+        [DEVICE_PLUGGED]   = "plugged",
+};
+
+DEFINE_STRING_TABLE_LOOKUP(device_state, DeviceState);
+
+static const char* const mount_state_table[_MOUNT_STATE_MAX] = {
+        [MOUNT_DEAD]               = "dead",
+        [MOUNT_MOUNTING]           = "mounting",
+        [MOUNT_MOUNTING_DONE]      = "mounting-done",
+        [MOUNT_MOUNTED]            = "mounted",
+        [MOUNT_REMOUNTING]         = "remounting",
+        [MOUNT_UNMOUNTING]         = "unmounting",
+        [MOUNT_REMOUNTING_SIGTERM] = "remounting-sigterm",
+        [MOUNT_REMOUNTING_SIGKILL] = "remounting-sigkill",
+        [MOUNT_UNMOUNTING_SIGTERM] = "unmounting-sigterm",
+        [MOUNT_UNMOUNTING_SIGKILL] = "unmounting-sigkill",
+        [MOUNT_FAILED]             = "failed",
+        [MOUNT_CLEANING]           = "cleaning",
+};
+
+DEFINE_STRING_TABLE_LOOKUP(mount_state, MountState);
+
+static const char* const path_state_table[_PATH_STATE_MAX] = {
+        [PATH_DEAD]    = "dead",
+        [PATH_WAITING] = "waiting",
+        [PATH_RUNNING] = "running",
+        [PATH_FAILED]  = "failed"
+};
+
+DEFINE_STRING_TABLE_LOOKUP(path_state, PathState);
+
+static const char* const scope_state_table[_SCOPE_STATE_MAX] = {
+        [SCOPE_DEAD]         = "dead",
+        [SCOPE_START_CHOWN]  = "start-chown",
+        [SCOPE_RUNNING]      = "running",
+        [SCOPE_ABANDONED]    = "abandoned",
+        [SCOPE_STOP_SIGTERM] = "stop-sigterm",
+        [SCOPE_STOP_SIGKILL] = "stop-sigkill",
+        [SCOPE_FAILED]       = "failed",
+};
+
+DEFINE_STRING_TABLE_LOOKUP(scope_state, ScopeState);
+
+static const char* const service_state_table[_SERVICE_STATE_MAX] = {
+        [SERVICE_DEAD]           = "dead",
+        [SERVICE_CONDITION]      = "condition",
+        [SERVICE_START_PRE]      = "start-pre",
+        [SERVICE_START]          = "start",
+        [SERVICE_START_POST]     = "start-post",
+        [SERVICE_RUNNING]        = "running",
+        [SERVICE_EXITED]         = "exited",
+        [SERVICE_RELOAD]         = "reload",
+        [SERVICE_STOP]           = "stop",
+        [SERVICE_STOP_WATCHDOG]  = "stop-watchdog",
+        [SERVICE_STOP_SIGTERM]   = "stop-sigterm",
+        [SERVICE_STOP_SIGKILL]   = "stop-sigkill",
+        [SERVICE_STOP_POST]      = "stop-post",
+        [SERVICE_FINAL_WATCHDOG] = "final-watchdog",
+        [SERVICE_FINAL_SIGTERM]  = "final-sigterm",
+        [SERVICE_FINAL_SIGKILL]  = "final-sigkill",
+        [SERVICE_FAILED]         = "failed",
+        [SERVICE_AUTO_RESTART]   = "auto-restart",
+        [SERVICE_CLEANING]       = "cleaning",
+};
+
+DEFINE_STRING_TABLE_LOOKUP(service_state, ServiceState);
+
+static const char* const slice_state_table[_SLICE_STATE_MAX] = {
+        [SLICE_DEAD]   = "dead",
+        [SLICE_ACTIVE] = "active"
+};
+
+DEFINE_STRING_TABLE_LOOKUP(slice_state, SliceState);
+
+static const char* const socket_state_table[_SOCKET_STATE_MAX] = {
+        [SOCKET_DEAD]             = "dead",
+        [SOCKET_START_PRE]        = "start-pre",
+        [SOCKET_START_CHOWN]      = "start-chown",
+        [SOCKET_START_POST]       = "start-post",
+        [SOCKET_LISTENING]        = "listening",
+        [SOCKET_RUNNING]          = "running",
+        [SOCKET_STOP_PRE]         = "stop-pre",
+        [SOCKET_STOP_PRE_SIGTERM] = "stop-pre-sigterm",
+        [SOCKET_STOP_PRE_SIGKILL] = "stop-pre-sigkill",
+        [SOCKET_STOP_POST]        = "stop-post",
+        [SOCKET_FINAL_SIGTERM]    = "final-sigterm",
+        [SOCKET_FINAL_SIGKILL]    = "final-sigkill",
+        [SOCKET_FAILED]           = "failed",
+        [SOCKET_CLEANING]         = "cleaning",
+};
+
+DEFINE_STRING_TABLE_LOOKUP(socket_state, SocketState);
+
+static const char* const swap_state_table[_SWAP_STATE_MAX] = {
+        [SWAP_DEAD]                 = "dead",
+        [SWAP_ACTIVATING]           = "activating",
+        [SWAP_ACTIVATING_DONE]      = "activating-done",
+        [SWAP_ACTIVE]               = "active",
+        [SWAP_DEACTIVATING]         = "deactivating",
+        [SWAP_DEACTIVATING_SIGTERM] = "deactivating-sigterm",
+        [SWAP_DEACTIVATING_SIGKILL] = "deactivating-sigkill",
+        [SWAP_FAILED]               = "failed",
+        [SWAP_CLEANING]             = "cleaning",
+};
+
+DEFINE_STRING_TABLE_LOOKUP(swap_state, SwapState);
+
+static const char* const target_state_table[_TARGET_STATE_MAX] = {
+        [TARGET_DEAD]   = "dead",
+        [TARGET_ACTIVE] = "active"
+};
+
+DEFINE_STRING_TABLE_LOOKUP(target_state, TargetState);
+
+static const char* const timer_state_table[_TIMER_STATE_MAX] = {
+        [TIMER_DEAD]    = "dead",
+        [TIMER_WAITING] = "waiting",
+        [TIMER_RUNNING] = "running",
+        [TIMER_ELAPSED] = "elapsed",
+        [TIMER_FAILED]  = "failed"
+};
+
+DEFINE_STRING_TABLE_LOOKUP(timer_state, TimerState);
+
+static const char* const unit_dependency_table[_UNIT_DEPENDENCY_MAX] = {
+        [UNIT_REQUIRES]               = "Requires",
+        [UNIT_REQUISITE]              = "Requisite",
+        [UNIT_WANTS]                  = "Wants",
+        [UNIT_BINDS_TO]               = "BindsTo",
+        [UNIT_PART_OF]                = "PartOf",
+        [UNIT_UPHOLDS]                = "Upholds",
+        [UNIT_REQUIRED_BY]            = "RequiredBy",
+        [UNIT_REQUISITE_OF]           = "RequisiteOf",
+        [UNIT_WANTED_BY]              = "WantedBy",
+        [UNIT_BOUND_BY]               = "BoundBy",
+        [UNIT_UPHELD_BY]              = "UpheldBy",
+        [UNIT_CONSISTS_OF]            = "ConsistsOf",
+        [UNIT_CONFLICTS]              = "Conflicts",
+        [UNIT_CONFLICTED_BY]          = "ConflictedBy",
+        [UNIT_BEFORE]                 = "Before",
+        [UNIT_AFTER]                  = "After",
+        [UNIT_ON_SUCCESS]             = "OnSuccess",
+        [UNIT_ON_SUCCESS_OF]          = "OnSuccessOf",
+        [UNIT_ON_FAILURE]             = "OnFailure",
+        [UNIT_ON_FAILURE_OF]          = "OnFailureOf",
+        [UNIT_TRIGGERS]               = "Triggers",
+        [UNIT_TRIGGERED_BY]           = "TriggeredBy",
+        [UNIT_PROPAGATES_RELOAD_TO]   = "PropagatesReloadTo",
+        [UNIT_RELOAD_PROPAGATED_FROM] = "ReloadPropagatedFrom",
+        [UNIT_PROPAGATES_STOP_TO]     = "PropagatesStopTo",
+        [UNIT_STOP_PROPAGATED_FROM]   = "StopPropagatedFrom",
+        [UNIT_JOINS_NAMESPACE_OF]     = "JoinsNamespaceOf",
+        [UNIT_REFERENCES]             = "References",
+        [UNIT_REFERENCED_BY]          = "ReferencedBy",
+        [UNIT_IN_SLICE]               = "InSlice",
+        [UNIT_SLICE_OF]               = "SliceOf",
+};
+
+DEFINE_STRING_TABLE_LOOKUP(unit_dependency, UnitDependency);
+
+static const char* const notify_access_table[_NOTIFY_ACCESS_MAX] = {
+        [NOTIFY_NONE] = "none",
+        [NOTIFY_MAIN] = "main",
+        [NOTIFY_EXEC] = "exec",
+        [NOTIFY_ALL]  = "all"
+};
+
+DEFINE_STRING_TABLE_LOOKUP(notify_access, NotifyAccess);
+
+SpecialGlyph unit_active_state_to_glyph(UnitActiveState state) {
+        static const SpecialGlyph map[_UNIT_ACTIVE_STATE_MAX] = {
+                [UNIT_ACTIVE]       = SPECIAL_GLYPH_BLACK_CIRCLE,
+                [UNIT_RELOADING]    = SPECIAL_GLYPH_CIRCLE_ARROW,
+                [UNIT_INACTIVE]     = SPECIAL_GLYPH_WHITE_CIRCLE,
+                [UNIT_FAILED]       = SPECIAL_GLYPH_MULTIPLICATION_SIGN,
+                [UNIT_ACTIVATING]   = SPECIAL_GLYPH_BLACK_CIRCLE,
+                [UNIT_DEACTIVATING] = SPECIAL_GLYPH_BLACK_CIRCLE,
+                [UNIT_MAINTENANCE]  = SPECIAL_GLYPH_WHITE_CIRCLE,
+        };
+
+        if (state < 0)
+                return _SPECIAL_GLYPH_INVALID;
+
+        assert(state < _UNIT_ACTIVE_STATE_MAX);
+        return map[state];
+}
diff --git a/src/basic/unit-def.h b/src/basic/unit-def.h
new file mode 100644
index 0000000..5fcd51c
--- /dev/null
+++ b/src/basic/unit-def.h
@@ -0,0 +1,335 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include 
+
+#include "errno-list.h"
+#include "glyph-util.h"
+#include "macro.h"
+
+/* The enum order is used to order unit jobs in the job queue
+ * when other criteria (cpu weight, nice level) are identical.
+ * In this case service units have the highest priority. */
+typedef enum UnitType {
+        UNIT_SERVICE,
+        UNIT_MOUNT,
+        UNIT_SWAP,
+        UNIT_SOCKET,
+        UNIT_TARGET,
+        UNIT_DEVICE,
+        UNIT_AUTOMOUNT,
+        UNIT_TIMER,
+        UNIT_PATH,
+        UNIT_SLICE,
+        UNIT_SCOPE,
+        _UNIT_TYPE_MAX,
+        _UNIT_TYPE_INVALID = -EINVAL,
+        _UNIT_TYPE_ERRNO_MAX = -ERRNO_MAX, /* Ensure the whole errno range fits into this enum */
+} UnitType;
+
+typedef enum UnitLoadState {
+        UNIT_STUB,
+        UNIT_LOADED,
+        UNIT_NOT_FOUND,    /* error condition #1: unit file not found */
+        UNIT_BAD_SETTING,  /* error condition #2: we couldn't parse some essential unit file setting */
+        UNIT_ERROR,        /* error condition #3: other "system" error, catchall for the rest */
+        UNIT_MERGED,
+        UNIT_MASKED,
+        _UNIT_LOAD_STATE_MAX,
+        _UNIT_LOAD_STATE_INVALID = -EINVAL,
+} UnitLoadState;
+
+typedef enum UnitActiveState {
+        UNIT_ACTIVE,
+        UNIT_RELOADING,
+        UNIT_INACTIVE,
+        UNIT_FAILED,
+        UNIT_ACTIVATING,
+        UNIT_DEACTIVATING,
+        UNIT_MAINTENANCE,
+        _UNIT_ACTIVE_STATE_MAX,
+        _UNIT_ACTIVE_STATE_INVALID = -EINVAL,
+} UnitActiveState;
+
+typedef enum FreezerState {
+        FREEZER_RUNNING,
+        FREEZER_FREEZING,
+        FREEZER_FROZEN,
+        FREEZER_THAWING,
+        _FREEZER_STATE_MAX,
+        _FREEZER_STATE_INVALID = -EINVAL,
+} FreezerState;
+
+typedef enum UnitMarker {
+        UNIT_MARKER_NEEDS_RELOAD,
+        UNIT_MARKER_NEEDS_RESTART,
+        _UNIT_MARKER_MAX,
+        _UNIT_MARKER_INVALID = -EINVAL,
+} UnitMarker;
+
+typedef enum AutomountState {
+        AUTOMOUNT_DEAD,
+        AUTOMOUNT_WAITING,
+        AUTOMOUNT_RUNNING,
+        AUTOMOUNT_FAILED,
+        _AUTOMOUNT_STATE_MAX,
+        _AUTOMOUNT_STATE_INVALID = -EINVAL,
+} AutomountState;
+
+/* We simply watch devices, we cannot plug/unplug them. That
+ * simplifies the state engine greatly */
+typedef enum DeviceState {
+        DEVICE_DEAD,
+        DEVICE_TENTATIVE, /* mounted or swapped, but not (yet) announced by udev */
+        DEVICE_PLUGGED,   /* announced by udev */
+        _DEVICE_STATE_MAX,
+        _DEVICE_STATE_INVALID = -EINVAL,
+} DeviceState;
+
+typedef enum MountState {
+        MOUNT_DEAD,
+        MOUNT_MOUNTING,               /* /usr/bin/mount is running, but the mount is not done yet. */
+        MOUNT_MOUNTING_DONE,          /* /usr/bin/mount is running, and the mount is done. */
+        MOUNT_MOUNTED,
+        MOUNT_REMOUNTING,
+        MOUNT_UNMOUNTING,
+        MOUNT_REMOUNTING_SIGTERM,
+        MOUNT_REMOUNTING_SIGKILL,
+        MOUNT_UNMOUNTING_SIGTERM,
+        MOUNT_UNMOUNTING_SIGKILL,
+        MOUNT_FAILED,
+        MOUNT_CLEANING,
+        _MOUNT_STATE_MAX,
+        _MOUNT_STATE_INVALID = -EINVAL,
+} MountState;
+
+typedef enum PathState {
+        PATH_DEAD,
+        PATH_WAITING,
+        PATH_RUNNING,
+        PATH_FAILED,
+        _PATH_STATE_MAX,
+        _PATH_STATE_INVALID = -EINVAL,
+} PathState;
+
+typedef enum ScopeState {
+        SCOPE_DEAD,
+        SCOPE_START_CHOWN,
+        SCOPE_RUNNING,
+        SCOPE_ABANDONED,
+        SCOPE_STOP_SIGTERM,
+        SCOPE_STOP_SIGKILL,
+        SCOPE_FAILED,
+        _SCOPE_STATE_MAX,
+        _SCOPE_STATE_INVALID = -EINVAL,
+} ScopeState;
+
+typedef enum ServiceState {
+        SERVICE_DEAD,
+        SERVICE_CONDITION,
+        SERVICE_START_PRE,
+        SERVICE_START,
+        SERVICE_START_POST,
+        SERVICE_RUNNING,
+        SERVICE_EXITED,            /* Nothing is running anymore, but RemainAfterExit is true hence this is OK */
+        SERVICE_RELOAD,
+        SERVICE_STOP,              /* No STOP_PRE state, instead just register multiple STOP executables */
+        SERVICE_STOP_WATCHDOG,
+        SERVICE_STOP_SIGTERM,
+        SERVICE_STOP_SIGKILL,
+        SERVICE_STOP_POST,
+        SERVICE_FINAL_WATCHDOG,    /* In case the STOP_POST executable needs to be aborted. */
+        SERVICE_FINAL_SIGTERM,     /* In case the STOP_POST executable hangs, we shoot that down, too */
+        SERVICE_FINAL_SIGKILL,
+        SERVICE_FAILED,
+        SERVICE_AUTO_RESTART,
+        SERVICE_CLEANING,
+        _SERVICE_STATE_MAX,
+        _SERVICE_STATE_INVALID = -EINVAL,
+} ServiceState;
+
+typedef enum SliceState {
+        SLICE_DEAD,
+        SLICE_ACTIVE,
+        _SLICE_STATE_MAX,
+        _SLICE_STATE_INVALID = -EINVAL,
+} SliceState;
+
+typedef enum SocketState {
+        SOCKET_DEAD,
+        SOCKET_START_PRE,
+        SOCKET_START_CHOWN,
+        SOCKET_START_POST,
+        SOCKET_LISTENING,
+        SOCKET_RUNNING,
+        SOCKET_STOP_PRE,
+        SOCKET_STOP_PRE_SIGTERM,
+        SOCKET_STOP_PRE_SIGKILL,
+        SOCKET_STOP_POST,
+        SOCKET_FINAL_SIGTERM,
+        SOCKET_FINAL_SIGKILL,
+        SOCKET_FAILED,
+        SOCKET_CLEANING,
+        _SOCKET_STATE_MAX,
+        _SOCKET_STATE_INVALID = -EINVAL,
+} SocketState;
+
+typedef enum SwapState {
+        SWAP_DEAD,
+        SWAP_ACTIVATING,               /* /sbin/swapon is running, but the swap not yet enabled. */
+        SWAP_ACTIVATING_DONE,          /* /sbin/swapon is running, and the swap is done. */
+        SWAP_ACTIVE,
+        SWAP_DEACTIVATING,
+        SWAP_DEACTIVATING_SIGTERM,
+        SWAP_DEACTIVATING_SIGKILL,
+        SWAP_FAILED,
+        SWAP_CLEANING,
+        _SWAP_STATE_MAX,
+        _SWAP_STATE_INVALID = -EINVAL,
+} SwapState;
+
+typedef enum TargetState {
+        TARGET_DEAD,
+        TARGET_ACTIVE,
+        _TARGET_STATE_MAX,
+        _TARGET_STATE_INVALID = -EINVAL,
+} TargetState;
+
+typedef enum TimerState {
+        TIMER_DEAD,
+        TIMER_WAITING,
+        TIMER_RUNNING,
+        TIMER_ELAPSED,
+        TIMER_FAILED,
+        _TIMER_STATE_MAX,
+        _TIMER_STATE_INVALID = -EINVAL,
+} TimerState;
+
+typedef enum UnitDependency {
+        /* Positive dependencies */
+        UNIT_REQUIRES,
+        UNIT_REQUISITE,
+        UNIT_WANTS,
+        UNIT_BINDS_TO,
+        UNIT_PART_OF,
+        UNIT_UPHOLDS,
+
+        /* Inverse of the above */
+        UNIT_REQUIRED_BY,             /* inverse of 'requires' is 'required_by' */
+        UNIT_REQUISITE_OF,            /* inverse of 'requisite' is 'requisite_of' */
+        UNIT_WANTED_BY,               /* inverse of 'wants' */
+        UNIT_BOUND_BY,                /* inverse of 'binds_to' */
+        UNIT_CONSISTS_OF,             /* inverse of 'part_of' */
+        UNIT_UPHELD_BY,               /* inverse of 'uphold' */
+
+        /* Negative dependencies */
+        UNIT_CONFLICTS,               /* inverse of 'conflicts' is 'conflicted_by' */
+        UNIT_CONFLICTED_BY,
+
+        /* Order */
+        UNIT_BEFORE,                  /* inverse of 'before' is 'after' and vice versa */
+        UNIT_AFTER,
+
+        /* OnSuccess= + OnFailure= */
+        UNIT_ON_SUCCESS,
+        UNIT_ON_SUCCESS_OF,
+        UNIT_ON_FAILURE,
+        UNIT_ON_FAILURE_OF,
+
+        /* Triggers (i.e. a socket triggers a service) */
+        UNIT_TRIGGERS,
+        UNIT_TRIGGERED_BY,
+
+        /* Propagate reloads */
+        UNIT_PROPAGATES_RELOAD_TO,
+        UNIT_RELOAD_PROPAGATED_FROM,
+
+        /* Propagate stops */
+        UNIT_PROPAGATES_STOP_TO,
+        UNIT_STOP_PROPAGATED_FROM,
+
+        /* Joins namespace of */
+        UNIT_JOINS_NAMESPACE_OF,
+
+        /* Reference information for GC logic */
+        UNIT_REFERENCES,              /* Inverse of 'references' is 'referenced_by' */
+        UNIT_REFERENCED_BY,
+
+        /* Slice= */
+        UNIT_IN_SLICE,
+        UNIT_SLICE_OF,
+
+        _UNIT_DEPENDENCY_MAX,
+        _UNIT_DEPENDENCY_INVALID = -EINVAL,
+} UnitDependency;
+
+typedef enum NotifyAccess {
+        NOTIFY_NONE,
+        NOTIFY_ALL,
+        NOTIFY_MAIN,
+        NOTIFY_EXEC,
+        _NOTIFY_ACCESS_MAX,
+        _NOTIFY_ACCESS_INVALID = -EINVAL,
+} NotifyAccess;
+
+char *unit_dbus_path_from_name(const char *name);
+int unit_name_from_dbus_path(const char *path, char **name);
+
+const char* unit_dbus_interface_from_type(UnitType t);
+const char *unit_dbus_interface_from_name(const char *name);
+
+const char *unit_type_to_string(UnitType i) _const_;
+UnitType unit_type_from_string(const char *s) _pure_;
+
+const char *unit_load_state_to_string(UnitLoadState i) _const_;
+UnitLoadState unit_load_state_from_string(const char *s) _pure_;
+
+const char *unit_active_state_to_string(UnitActiveState i) _const_;
+UnitActiveState unit_active_state_from_string(const char *s) _pure_;
+
+const char *freezer_state_to_string(FreezerState i) _const_;
+FreezerState freezer_state_from_string(const char *s) _pure_;
+
+const char *unit_marker_to_string(UnitMarker m) _const_;
+UnitMarker unit_marker_from_string(const char *s) _pure_;
+
+const char* automount_state_to_string(AutomountState i) _const_;
+AutomountState automount_state_from_string(const char *s) _pure_;
+
+const char* device_state_to_string(DeviceState i) _const_;
+DeviceState device_state_from_string(const char *s) _pure_;
+
+const char* mount_state_to_string(MountState i) _const_;
+MountState mount_state_from_string(const char *s) _pure_;
+
+const char* path_state_to_string(PathState i) _const_;
+PathState path_state_from_string(const char *s) _pure_;
+
+const char* scope_state_to_string(ScopeState i) _const_;
+ScopeState scope_state_from_string(const char *s) _pure_;
+
+const char* service_state_to_string(ServiceState i) _const_;
+ServiceState service_state_from_string(const char *s) _pure_;
+
+const char* slice_state_to_string(SliceState i) _const_;
+SliceState slice_state_from_string(const char *s) _pure_;
+
+const char* socket_state_to_string(SocketState i) _const_;
+SocketState socket_state_from_string(const char *s) _pure_;
+
+const char* swap_state_to_string(SwapState i) _const_;
+SwapState swap_state_from_string(const char *s) _pure_;
+
+const char* target_state_to_string(TargetState i) _const_;
+TargetState target_state_from_string(const char *s) _pure_;
+
+const char *timer_state_to_string(TimerState i) _const_;
+TimerState timer_state_from_string(const char *s) _pure_;
+
+const char *unit_dependency_to_string(UnitDependency i) _const_;
+UnitDependency unit_dependency_from_string(const char *s) _pure_;
+
+const char* notify_access_to_string(NotifyAccess i) _const_;
+NotifyAccess notify_access_from_string(const char *s) _pure_;
+
+SpecialGlyph unit_active_state_to_glyph(UnitActiveState state);
diff --git a/src/basic/unit-file.c b/src/basic/unit-file.c
new file mode 100644
index 0000000..c81c69d
--- /dev/null
+++ b/src/basic/unit-file.c
@@ -0,0 +1,822 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "sd-id128.h"
+
+#include "chase-symlinks.h"
+#include "dirent-util.h"
+#include "fd-util.h"
+#include "fs-util.h"
+#include "macro.h"
+#include "path-lookup.h"
+#include "set.h"
+#include "special.h"
+#include "stat-util.h"
+#include "string-util.h"
+#include "strv.h"
+#include "unit-file.h"
+
+bool unit_type_may_alias(UnitType type) {
+        return IN_SET(type,
+                      UNIT_SERVICE,
+                      UNIT_SOCKET,
+                      UNIT_TARGET,
+                      UNIT_DEVICE,
+                      UNIT_TIMER,
+                      UNIT_PATH);
+}
+
+bool unit_type_may_template(UnitType type) {
+        return IN_SET(type,
+                      UNIT_SERVICE,
+                      UNIT_SOCKET,
+                      UNIT_TARGET,
+                      UNIT_TIMER,
+                      UNIT_PATH);
+}
+
+int unit_symlink_name_compatible(const char *symlink, const char *target, bool instance_propagation) {
+        _cleanup_free_ char *template = NULL;
+        int r, un_type1, un_type2;
+
+        un_type1 = unit_name_classify(symlink);
+
+        /* The straightforward case: the symlink name matches the target and we have a valid unit */
+        if (streq(symlink, target) &&
+            (un_type1 & (UNIT_NAME_PLAIN | UNIT_NAME_INSTANCE)))
+                return 1;
+
+        r = unit_name_template(symlink, &template);
+        if (r == -EINVAL)
+                return 0; /* Not a template */
+        if (r < 0)
+                return r;
+
+        un_type2 = unit_name_classify(target);
+
+        /* An instance name points to a target that is just the template name */
+        if (un_type1 == UNIT_NAME_INSTANCE &&
+            un_type2 == UNIT_NAME_TEMPLATE &&
+            streq(template, target))
+                return 1;
+
+        /* foo@.target.requires/bar@.service: instance will be propagated */
+        if (instance_propagation &&
+            un_type1 == UNIT_NAME_TEMPLATE &&
+            un_type2 == UNIT_NAME_TEMPLATE &&
+            streq(template, target))
+                return 1;
+
+        return 0;
+}
+
+int unit_validate_alias_symlink_or_warn(int log_level, const char *filename, const char *target) {
+        const char *src, *dst;
+        _cleanup_free_ char *src_instance = NULL, *dst_instance = NULL;
+        UnitType src_unit_type, dst_unit_type;
+        UnitNameFlags src_name_type, dst_name_type;
+
+        /* Check if the *alias* symlink is valid. This applies to symlinks like
+         * /etc/systemd/system/dbus.service → dbus-broker.service, but not to .wants or .requires symlinks
+         * and such. Neither does this apply to symlinks which *link* units, i.e. symlinks to outside of the
+         * unit lookup path.
+         *
+         * -EINVAL is returned if the something is wrong with the source filename or the source unit type is
+         *         not allowed to symlink,
+         * -EXDEV if the target filename is not a valid unit name or doesn't match the source,
+         * -ELOOP for an alias to self.
+         */
+
+        src = basename(filename);
+        dst = basename(target);
+
+        /* src checks */
+
+        src_name_type = unit_name_to_instance(src, &src_instance);
+        if (src_name_type < 0)
+                return log_full_errno(log_level, src_name_type,
+                                      "%s: not a valid unit name \"%s\": %m", filename, src);
+
+        src_unit_type = unit_name_to_type(src);
+        assert(src_unit_type >= 0); /* unit_name_to_instance() checked the suffix already */
+
+        if (!unit_type_may_alias(src_unit_type))
+                return log_full_errno(log_level, SYNTHETIC_ERRNO(EINVAL),
+                                      "%s: symlinks are not allowed for units of this type, rejecting.",
+                                      filename);
+
+        if (src_name_type != UNIT_NAME_PLAIN &&
+            !unit_type_may_template(src_unit_type))
+                return log_full_errno(log_level, SYNTHETIC_ERRNO(EINVAL),
+                                      "%s: templates not allowed for %s units, rejecting.",
+                                      filename, unit_type_to_string(src_unit_type));
+
+        /* dst checks */
+
+        if (streq(src, dst))
+                return log_debug_errno(SYNTHETIC_ERRNO(ELOOP),
+                                       "%s: unit self-alias: %s → %s, ignoring.",
+                                       filename, src, dst);
+
+        dst_name_type = unit_name_to_instance(dst, &dst_instance);
+        if (dst_name_type < 0)
+                return log_full_errno(log_level, dst_name_type == -EINVAL ? SYNTHETIC_ERRNO(EXDEV) : dst_name_type,
+                                      "%s points to \"%s\" which is not a valid unit name: %m",
+                                      filename, dst);
+
+        if (!(dst_name_type == src_name_type ||
+              (src_name_type == UNIT_NAME_INSTANCE && dst_name_type == UNIT_NAME_TEMPLATE)))
+                return log_full_errno(log_level, SYNTHETIC_ERRNO(EXDEV),
+                                      "%s: symlink target name type \"%s\" does not match source, rejecting.",
+                                      filename, dst);
+
+        if (dst_name_type == UNIT_NAME_INSTANCE) {
+                assert(src_instance);
+                assert(dst_instance);
+                if (!streq(src_instance, dst_instance))
+                        return log_full_errno(log_level, SYNTHETIC_ERRNO(EXDEV),
+                                              "%s: unit symlink target \"%s\" instance name doesn't match, rejecting.",
+                                              filename, dst);
+        }
+
+        dst_unit_type = unit_name_to_type(dst);
+        if (dst_unit_type != src_unit_type)
+                return log_full_errno(log_level, SYNTHETIC_ERRNO(EXDEV),
+                                      "%s: symlink target \"%s\" has incompatible suffix, rejecting.",
+                                      filename, dst);
+
+        return 0;
+}
+
+#define FOLLOW_MAX 8
+
+static int unit_ids_map_get(
+                Hashmap *unit_ids_map,
+                const char *unit_name,
+                const char **ret_fragment_path) {
+
+        /* Resolve recursively until we hit an absolute path, i.e. a non-aliased unit.
+         *
+         * We distinguish the case where unit_name was not found in the hashmap at all, and the case where
+         * some symlink was broken.
+         *
+         * If a symlink target points to an instance name, then we also check for the template. */
+
+        const char *id = NULL;
+        int r;
+
+        for (unsigned n = 0; n < FOLLOW_MAX; n++) {
+                const char *t = hashmap_get(unit_ids_map, id ?: unit_name);
+                if (!t) {
+                        _cleanup_free_ char *template = NULL;
+
+                        if (!id)
+                                return -ENOENT;
+
+                        r = unit_name_template(id, &template);
+                        if (r == -EINVAL)
+                                return -ENXIO; /* we failed to find the symlink target */
+                        if (r < 0)
+                                return log_error_errno(r, "Failed to determine template name for %s: %m", id);
+
+                        t = hashmap_get(unit_ids_map, template);
+                        if (!t)
+                                return -ENXIO;
+
+                        /* We successfully switched from instanced name to a template, let's continue */
+                }
+
+                if (path_is_absolute(t)) {
+                        if (ret_fragment_path)
+                                *ret_fragment_path = t;
+                        return 0;
+                }
+
+                id = t;
+        }
+
+        return -ELOOP;
+}
+
+static bool lookup_paths_mtime_exclude(const LookupPaths *lp, const char *path) {
+        /* Paths that are under our exclusive control. Users shall not alter those directly. */
+
+        return streq_ptr(path, lp->generator) ||
+               streq_ptr(path, lp->generator_early) ||
+               streq_ptr(path, lp->generator_late) ||
+               streq_ptr(path, lp->transient) ||
+               streq_ptr(path, lp->persistent_control) ||
+               streq_ptr(path, lp->runtime_control);
+}
+
+#define HASH_KEY SD_ID128_MAKE(4e,86,1b,e3,39,b3,40,46,98,5d,b8,11,34,8f,c3,c1)
+
+bool lookup_paths_timestamp_hash_same(const LookupPaths *lp, uint64_t timestamp_hash, uint64_t *ret_new) {
+        struct siphash state;
+
+        siphash24_init(&state, HASH_KEY.bytes);
+
+        STRV_FOREACH(dir, lp->search_path) {
+                struct stat st;
+
+                if (lookup_paths_mtime_exclude(lp, *dir))
+                        continue;
+
+                /* Determine the latest lookup path modification time */
+                if (stat(*dir, &st) < 0) {
+                        if (errno == ENOENT)
+                                continue;
+
+                        log_debug_errno(errno, "Failed to stat %s, ignoring: %m", *dir);
+                        continue;
+                }
+
+                siphash24_compress_usec_t(timespec_load(&st.st_mtim), &state);
+        }
+
+        uint64_t updated = siphash24_finalize(&state);
+        if (ret_new)
+                *ret_new = updated;
+        if (updated != timestamp_hash)
+                log_debug("Modification times have changed, need to update cache.");
+        return updated == timestamp_hash;
+}
+
+static int directory_name_is_valid(const char *name) {
+
+        /* Accept a directory whose name is a valid unit file name ending in .wants/, .requires/ or .d/ */
+
+        FOREACH_STRING(suffix, ".wants", ".requires", ".d") {
+                _cleanup_free_ char *chopped = NULL;
+                const char *e;
+
+                e = endswith(name, suffix);
+                if (!e)
+                        continue;
+
+                chopped = strndup(name, e - name);
+                if (!chopped)
+                        return log_oom();
+
+                if (unit_name_is_valid(chopped, UNIT_NAME_ANY) ||
+                    unit_type_from_string(chopped) >= 0)
+                        return true;
+        }
+
+        return false;
+}
+
+int unit_file_resolve_symlink(
+                const char *root_dir,
+                char **search_path,
+                const char *dir,
+                int dirfd,
+                const char *filename,
+                bool resolve_destination_target,
+                char **ret_destination) {
+
+        _cleanup_free_ char *target = NULL, *simplified = NULL, *dst = NULL, *_dir = NULL, *_filename = NULL;
+        int r;
+
+        /* This can be called with either dir+dirfd valid and filename just a name,
+         * or !dir && dirfd==AT_FDCWD, and filename being a full path.
+         *
+         * If resolve_destination_target is true, an absolute path will be returned.
+         * If not, an absolute path is returned for linked unit files, and a relative
+         * path otherwise.
+         *
+         * Returns an error, false if this is an alias, true if it's a linked unit file. */
+
+        assert(filename);
+        assert(ret_destination);
+        assert(dir || path_is_absolute(filename));
+        assert(dirfd >= 0 || dirfd == AT_FDCWD);
+
+        r = readlinkat_malloc(dirfd, filename, &target);
+        if (r < 0)
+                return log_warning_errno(r, "Failed to read symlink %s%s%s: %m",
+                                         dir, dir ? "/" : "", filename);
+
+        if (!dir) {
+                r = path_extract_directory(filename, &_dir);
+                if (r < 0)
+                        return r;
+                dir = _dir;
+
+                r = path_extract_filename(filename, &_filename);
+                if (r < 0)
+                        return r;
+                if (r == O_DIRECTORY)
+                        return log_warning_errno(SYNTHETIC_ERRNO(EISDIR),
+                                                 "Unexpected path to a directory \"%s\", refusing.", filename);
+                filename = _filename;
+        }
+
+        bool is_abs = path_is_absolute(target);
+        if (root_dir || !is_abs) {
+                char *target_abs = path_join(is_abs ? root_dir : dir, target);
+                if (!target_abs)
+                        return log_oom();
+
+                free_and_replace(target, target_abs);
+        }
+
+        /* Get rid of "." and ".." components in target path */
+        r = chase_symlinks(target, root_dir, CHASE_NOFOLLOW | CHASE_NONEXISTENT, &simplified, NULL);
+        if (r < 0)
+                return log_warning_errno(r, "Failed to resolve symlink %s/%s pointing to %s: %m",
+                                         dir, filename, target);
+
+        assert(path_is_absolute(simplified));
+
+        /* Check if the symlink remain inside of of our search path.
+         * If yes, it is an alias. Verify that it is valid.
+         *
+         * If no, then this is a linked unit file or mask, and we don't care about the target name
+         * when loading units, and we return the link *source* (resolve_destination_target == false);
+         * When this is called for installation purposes, we want the final destination,
+         * so we return the *target*.
+         */
+        const char *tail = path_startswith_strv(simplified, search_path);
+        if (tail) {  /* An alias */
+                _cleanup_free_ char *target_name = NULL;
+
+                r = path_extract_filename(simplified, &target_name);
+                if (r < 0)
+                        return r;
+
+                r = unit_validate_alias_symlink_or_warn(LOG_NOTICE, filename, simplified);
+                if (r < 0)
+                        return r;
+                if (is_path(tail))
+                        log_warning("Suspicious symlink %s/%s %s %s, treating as alias.",
+                                    dir, filename, special_glyph(SPECIAL_GLYPH_ARROW_RIGHT), simplified);
+
+                dst = resolve_destination_target ? TAKE_PTR(simplified) : TAKE_PTR(target_name);
+
+        } else {
+                log_debug("Linked unit file: %s/%s %s %s", dir, filename, special_glyph(SPECIAL_GLYPH_ARROW_RIGHT), simplified);
+
+                if (resolve_destination_target)
+                        dst = TAKE_PTR(simplified);
+                else {
+                        dst = path_join(dir, filename);
+                        if (!dst)
+                                return log_oom();
+                }
+        }
+
+        *ret_destination = TAKE_PTR(dst);
+        return !tail;  /* true if linked unit file */
+}
+
+int unit_file_build_name_map(
+                const LookupPaths *lp,
+                uint64_t *cache_timestamp_hash,
+                Hashmap **unit_ids_map,
+                Hashmap **unit_names_map,
+                Set **path_cache) {
+
+        /* Build two mappings: any name → main unit (i.e. the end result of symlink resolution), unit name →
+         * all aliases (i.e. the entry for a given key is a list of all names which point to this key). The
+         * key is included in the value iff we saw a file or symlink with that name. In other words, if we
+         * have a key, but it is not present in the value for itself, there was an alias pointing to it, but
+         * the unit itself is not loadable.
+         *
+         * At the same, build a cache of paths where to find units. The non-const parameters are for input
+         * and output. Existing contents will be freed before the new contents are stored.
+         */
+
+        _cleanup_hashmap_free_ Hashmap *ids = NULL, *names = NULL;
+        _cleanup_set_free_free_ Set *paths = NULL;
+        _cleanup_strv_free_ char **expanded_search_path = NULL;
+        uint64_t timestamp_hash;
+        int r;
+
+        /* Before doing anything, check if the timestamp hash that was passed is still valid.
+         * If yes, do nothing. */
+        if (cache_timestamp_hash &&
+            lookup_paths_timestamp_hash_same(lp, *cache_timestamp_hash, ×tamp_hash))
+                return 0;
+
+        /* The timestamp hash is now set based on the mtimes from before when we start reading files.
+         * If anything is modified concurrently, we'll consider the cache outdated. */
+
+        if (path_cache) {
+                paths = set_new(&path_hash_ops_free);
+                if (!paths)
+                        return log_oom();
+        }
+
+        /* Go over all our search paths, chase their symlinks and store the result in the
+         * expanded_search_path list.
+         *
+         * This is important for cases where any of the unit directories itself are symlinks into other
+         * directories and would therefore cause all of the unit files to be recognized as linked units.
+         *
+         * This is important for distributions such as NixOS where most paths in /etc/ are symlinks to some
+         * other location on the filesystem (e.g.  into /nix/store/).
+         *
+         * Search paths are ordered by priority (highest first), and we need to maintain this order.
+         * If a resolved path is already in the list, we don't need to include.
+         *
+         * Note that we build a list that contains both the original paths and the resolved symlinks:
+         * we need the latter for the case where the directory is symlinked, as described above, and
+         * the former for the case where some unit file alias is a dangling symlink that points to one
+         * of the "original" directories (and can't be followed).
+         */
+        STRV_FOREACH(dir, lp->search_path) {
+                _cleanup_free_ char *resolved_dir = NULL;
+
+                r = strv_extend(&expanded_search_path, *dir);
+                if (r < 0)
+                        return log_oom();
+
+                r = chase_symlinks(*dir, NULL, 0, &resolved_dir, NULL);
+                if (r < 0) {
+                        if (r != -ENOENT)
+                                log_warning_errno(r, "Failed to resolve symlink %s, ignoring: %m", *dir);
+                        continue;
+                }
+
+                if (strv_contains(expanded_search_path, resolved_dir))
+                        continue;
+
+                if (strv_consume(&expanded_search_path, TAKE_PTR(resolved_dir)) < 0)
+                        return log_oom();
+        }
+
+        STRV_FOREACH(dir, lp->search_path) {
+                _cleanup_closedir_ DIR *d = NULL;
+
+                d = opendir(*dir);
+                if (!d) {
+                        if (errno != ENOENT)
+                                log_warning_errno(errno, "Failed to open \"%s\", ignoring: %m", *dir);
+                        continue;
+                }
+
+                FOREACH_DIRENT_ALL(de, d, log_warning_errno(errno, "Failed to read \"%s\", ignoring: %m", *dir)) {
+                        _unused_ _cleanup_free_ char *_filename_free = NULL;
+                        char *filename;
+                        _cleanup_free_ char *dst = NULL;
+                        bool symlink_to_dir = false;
+
+                        /* We only care about valid units and dirs with certain suffixes, let's ignore the
+                         * rest. */
+
+                        if (de->d_type == DT_REG) {
+
+                                /* Accept a regular file whose name is a valid unit file name. */
+                                if (!unit_name_is_valid(de->d_name, UNIT_NAME_ANY))
+                                        continue;
+
+                        } else if (de->d_type == DT_DIR) {
+
+                                if (!paths) /* Skip directories early unless path_cache is requested */
+                                        continue;
+
+                                r = directory_name_is_valid(de->d_name);
+                                if (r < 0)
+                                        return r;
+                                if (r == 0)
+                                        continue;
+
+                        } else if (de->d_type == DT_LNK) {
+
+                                /* Accept a symlink file whose name is a valid unit file name or
+                                 * ending in .wants/, .requires/ or .d/. */
+
+                                if (!unit_name_is_valid(de->d_name, UNIT_NAME_ANY)) {
+                                        _cleanup_free_ char *target = NULL;
+
+                                        if (!paths) /* Skip symlink to a directory early unless path_cache is requested */
+                                                continue;
+
+                                        r = directory_name_is_valid(de->d_name);
+                                        if (r < 0)
+                                                return r;
+                                        if (r == 0)
+                                                continue;
+
+                                        r = readlinkat_malloc(dirfd(d), de->d_name, &target);
+                                        if (r < 0) {
+                                                log_warning_errno(r, "Failed to read symlink %s/%s, ignoring: %m",
+                                                                  *dir, de->d_name);
+                                                continue;
+                                        }
+
+                                        r = is_dir(target, /* follow = */ true);
+                                        if (r <= 0)
+                                                continue;
+
+                                        symlink_to_dir = true;
+                                }
+
+                        } else
+                                continue;
+
+                        filename = path_join(*dir, de->d_name);
+                        if (!filename)
+                                return log_oom();
+
+                        if (paths) {
+                                r = set_put(paths, filename);
+                                if (r < 0)
+                                        return log_oom();
+                                if (r == 0)
+                                        _filename_free = filename; /* Make sure we free the filename. */
+                        } else
+                                _filename_free = filename; /* Make sure we free the filename. */
+
+                        if (de->d_type == DT_DIR || (de->d_type == DT_LNK && symlink_to_dir))
+                                continue;
+
+                        assert(IN_SET(de->d_type, DT_REG, DT_LNK));
+
+                        /* search_path is ordered by priority (highest first). If the name is already mapped
+                         * to something (incl. itself), it means that we have already seen it, and we should
+                         * ignore it here. */
+                        if (hashmap_contains(ids, de->d_name))
+                                continue;
+
+                        if (de->d_type == DT_LNK) {
+                                /* We don't explicitly check for alias loops here. unit_ids_map_get() which
+                                 * limits the number of hops should be used to access the map. */
+
+                                r = unit_file_resolve_symlink(lp->root_dir, expanded_search_path,
+                                                              *dir, dirfd(d), de->d_name,
+                                                              /* resolve_destination_target= */ false,
+                                                              &dst);
+                                if (r == -ENOMEM)
+                                        return r;
+                                if (r < 0)  /* we ignore other errors here */
+                                        continue;
+
+                        } else {
+                                dst = TAKE_PTR(_filename_free); /* Grab the copy we made previously, if available. */
+                                if (!dst) {
+                                        dst = strdup(filename);
+                                        if (!dst)
+                                                return log_oom();
+                                }
+
+                                log_debug("%s: normal unit file: %s", __func__, dst);
+                        }
+
+                        _cleanup_free_ char *key = strdup(de->d_name);
+                        if (!key)
+                                return log_oom();
+
+                        r = hashmap_ensure_put(&ids, &string_hash_ops_free_free, key, dst);
+                        if (r < 0)
+                                return log_warning_errno(r, "Failed to add entry to hashmap (%s%s%s): %m",
+                                                         de->d_name, special_glyph(SPECIAL_GLYPH_ARROW_RIGHT), dst);
+                        key = dst = NULL;
+                }
+        }
+
+        /* Let's also put the names in the reverse db. */
+        const char *dummy, *src;
+        HASHMAP_FOREACH_KEY(dummy, src, ids) {
+                _cleanup_free_ char *inst = NULL, *dst_inst = NULL;
+                const char *dst;
+
+                r = unit_ids_map_get(ids, src, &dst);
+                if (r < 0)
+                        continue;
+
+                if (null_or_empty_path(dst) != 0)
+                        continue;
+
+                dst = basename(dst);
+
+                /* If we have an symlink from an instance name to a template name, it is an alias just for
+                 * this specific instance, foo@id.service ↔ template@id.service. */
+                if (unit_name_is_valid(dst, UNIT_NAME_TEMPLATE)) {
+                        UnitNameFlags t = unit_name_to_instance(src, &inst);
+                        if (t < 0)
+                                return log_error_errno(t, "Failed to extract instance part from %s: %m", src);
+                        if (t == UNIT_NAME_INSTANCE) {
+                                r = unit_name_replace_instance(dst, inst, &dst_inst);
+                                if (r < 0) {
+                                        /* This might happen e.g. if the combined length is too large.
+                                         * Let's not make too much of a fuss. */
+                                        log_debug_errno(r, "Failed to build alias name (%s + %s), ignoring: %m",
+                                                        dst, inst);
+                                        continue;
+                                }
+
+                                dst = dst_inst;
+                        }
+                }
+
+                r = string_strv_hashmap_put(&names, dst, src);
+                if (r < 0)
+                        return log_warning_errno(r, "Failed to add entry to hashmap (%s%s%s): %m",
+                                                 dst, special_glyph(SPECIAL_GLYPH_ARROW_RIGHT), src);
+        }
+
+        if (cache_timestamp_hash)
+                *cache_timestamp_hash = timestamp_hash;
+
+        hashmap_free_and_replace(*unit_ids_map, ids);
+        hashmap_free_and_replace(*unit_names_map, names);
+        if (path_cache)
+                set_free_and_replace(*path_cache, paths);
+
+        return 1;
+}
+
+static int add_name(
+                const char *unit_name,
+                Set **names,
+                const char *name) {
+        int r;
+
+        assert(names);
+        assert(name);
+
+        r = set_put_strdup(names, name);
+        if (r < 0)
+                return r;
+        if (r > 0 && !streq(unit_name, name))
+                log_debug("Unit %s has alias %s.", unit_name, name);
+        return r;
+}
+
+static int add_names(
+                Hashmap *unit_ids_map,
+                Hashmap *unit_name_map,
+                const char *unit_name,
+                const char *fragment_basename,  /* Only set when adding additional names based on fragment path */
+                UnitNameFlags name_type,
+                const char *instance,
+                Set **names,
+                const char *name) {
+
+        char **aliases;
+        int r;
+
+        assert(name_type == UNIT_NAME_PLAIN || instance);
+
+        /* The unit has its own name if it's not a template. If we're looking at a fragment, the fragment
+         * name (possibly with instance inserted), is also always one of the unit names. */
+        if (name_type != UNIT_NAME_TEMPLATE) {
+                r = add_name(unit_name, names, name);
+                if (r < 0)
+                        return r;
+        }
+
+        /* Add any aliases of the name to the set of names.
+         *
+         * We don't even need to know which fragment we will use. The unit_name_map should return the same
+         * set of names for any of the aliases. */
+        aliases = hashmap_get(unit_name_map, name);
+        STRV_FOREACH(alias, aliases) {
+                if (name_type == UNIT_NAME_INSTANCE && unit_name_is_valid(*alias, UNIT_NAME_TEMPLATE)) {
+                        _cleanup_free_ char *inst = NULL;
+                        const char *inst_fragment = NULL;
+
+                        r = unit_name_replace_instance(*alias, instance, &inst);
+                        if (r < 0)
+                                return log_debug_errno(r, "Cannot build instance name %s + %s: %m",
+                                                       *alias, instance);
+
+                        /* Exclude any aliases that point in some other direction.
+                         *
+                         * See https://github.com/systemd/systemd/pull/13119#discussion_r308145418. */
+                        r = unit_ids_map_get(unit_ids_map, inst, &inst_fragment);
+                        if (r < 0 && !IN_SET(r, -ENOENT, -ENXIO))
+                                return log_debug_errno(r, "Cannot find instance fragment %s: %m", inst);
+
+                        if (inst_fragment &&
+                            fragment_basename &&
+                            !streq(basename(inst_fragment), fragment_basename)) {
+                                log_debug("Instance %s has fragment %s and is not an alias of %s.",
+                                          inst, inst_fragment, unit_name);
+                                continue;
+                        }
+
+                        r = add_name(unit_name, names, inst);
+                } else
+                        r = add_name(unit_name, names, *alias);
+                if (r < 0)
+                        return r;
+        }
+
+        return 0;
+}
+
+int unit_file_find_fragment(
+                Hashmap *unit_ids_map,
+                Hashmap *unit_name_map,
+                const char *unit_name,
+                const char **ret_fragment_path,
+                Set **ret_names) {
+
+        const char *fragment = NULL;
+        _cleanup_free_ char *template = NULL, *instance = NULL;
+        _cleanup_set_free_ Set *names = NULL;
+        int r;
+
+        /* Finds a fragment path, and returns the set of names:
+         * if we have …/foo.service and …/foo-alias.service→foo.service,
+         * and …/foo@.service and …/foo-alias@.service→foo@.service,
+         * and …/foo@inst.service,
+         * this should return:
+         * foo.service → …/foo.service, {foo.service, foo-alias.service},
+         * foo-alias.service → …/foo.service, {foo.service, foo-alias.service},
+         * foo@.service → …/foo@.service, {foo@.service, foo-alias@.service},
+         * foo-alias@.service → …/foo@.service, {foo@.service, foo-alias@.service},
+         * foo@bar.service → …/foo@.service, {foo@bar.service, foo-alias@bar.service},
+         * foo-alias@bar.service → …/foo@.service, {foo@bar.service, foo-alias@bar.service},
+         * foo-alias@inst.service → …/foo@inst.service, {foo@inst.service, foo-alias@inst.service}.
+         */
+
+        UnitNameFlags name_type = unit_name_to_instance(unit_name, &instance);
+        if (name_type < 0)
+                return name_type;
+
+        if (ret_names) {
+                r = add_names(unit_ids_map, unit_name_map, unit_name, NULL, name_type, instance, &names, unit_name);
+                if (r < 0)
+                        return r;
+        }
+
+        /* First try to load fragment under the original name */
+        r = unit_ids_map_get(unit_ids_map, unit_name, &fragment);
+        if (r < 0 && !IN_SET(r, -ENOENT, -ENXIO))
+                return log_debug_errno(r, "Cannot load unit %s: %m", unit_name);
+
+        if (!fragment && name_type == UNIT_NAME_INSTANCE) {
+                /* Look for a fragment under the template name */
+
+                r = unit_name_template(unit_name, &template);
+                if (r < 0)
+                        return log_debug_errno(r, "Failed to determine template name: %m");
+
+                r = unit_ids_map_get(unit_ids_map, template, &fragment);
+                if (r < 0 && !IN_SET(r, -ENOENT, -ENXIO))
+                        return log_debug_errno(r, "Cannot load template %s: %m", template);
+        }
+
+        if (fragment && ret_names) {
+                const char *fragment_basename = basename(fragment);
+
+                if (!streq(fragment_basename, unit_name)) {
+                        /* Add names based on the fragment name to the set of names */
+                        r = add_names(unit_ids_map, unit_name_map, unit_name, fragment_basename, name_type, instance, &names, fragment_basename);
+                        if (r < 0)
+                                return r;
+                }
+        }
+
+        *ret_fragment_path = fragment;
+        if (ret_names)
+                *ret_names = TAKE_PTR(names);
+
+        return 0;
+}
+
+static const char * const rlmap[] = {
+        "emergency", SPECIAL_EMERGENCY_TARGET,
+        "-b",        SPECIAL_EMERGENCY_TARGET,
+        "rescue",    SPECIAL_RESCUE_TARGET,
+        "single",    SPECIAL_RESCUE_TARGET,
+        "-s",        SPECIAL_RESCUE_TARGET,
+        "s",         SPECIAL_RESCUE_TARGET,
+        "S",         SPECIAL_RESCUE_TARGET,
+        "1",         SPECIAL_RESCUE_TARGET,
+        "2",         SPECIAL_MULTI_USER_TARGET,
+        "3",         SPECIAL_MULTI_USER_TARGET,
+        "4",         SPECIAL_MULTI_USER_TARGET,
+        "5",         SPECIAL_GRAPHICAL_TARGET,
+        NULL
+};
+
+static const char * const rlmap_initrd[] = {
+        "emergency", SPECIAL_EMERGENCY_TARGET,
+        "rescue",    SPECIAL_RESCUE_TARGET,
+        NULL
+};
+
+const char* runlevel_to_target(const char *word) {
+        const char * const *rlmap_ptr;
+
+        if (!word)
+                return NULL;
+
+        if (in_initrd()) {
+                word = startswith(word, "rd.");
+                if (!word)
+                        return NULL;
+        }
+
+        rlmap_ptr = in_initrd() ? rlmap_initrd : rlmap;
+
+        for (size_t i = 0; rlmap_ptr[i]; i += 2)
+                if (streq(word, rlmap_ptr[i]))
+                        return rlmap_ptr[i+1];
+
+        return NULL;
+}
diff --git a/src/basic/unit-file.h b/src/basic/unit-file.h
new file mode 100644
index 0000000..1c43861
--- /dev/null
+++ b/src/basic/unit-file.h
@@ -0,0 +1,62 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include 
+
+#include "hashmap.h"
+#include "path-lookup.h"
+#include "time-util.h"
+#include "unit-name.h"
+
+typedef enum UnitFileState UnitFileState;
+
+enum UnitFileState {
+        UNIT_FILE_ENABLED,
+        UNIT_FILE_ENABLED_RUNTIME,
+        UNIT_FILE_LINKED,
+        UNIT_FILE_LINKED_RUNTIME,
+        UNIT_FILE_ALIAS,
+        UNIT_FILE_MASKED,
+        UNIT_FILE_MASKED_RUNTIME,
+        UNIT_FILE_STATIC,
+        UNIT_FILE_DISABLED,
+        UNIT_FILE_INDIRECT,
+        UNIT_FILE_GENERATED,
+        UNIT_FILE_TRANSIENT,
+        UNIT_FILE_BAD,
+        _UNIT_FILE_STATE_MAX,
+        _UNIT_FILE_STATE_INVALID = -EINVAL,
+};
+
+bool unit_type_may_alias(UnitType type) _const_;
+bool unit_type_may_template(UnitType type) _const_;
+
+int unit_symlink_name_compatible(const char *symlink, const char *target, bool instance_propagation);
+int unit_validate_alias_symlink_or_warn(int log_level, const char *filename, const char *target);
+
+bool lookup_paths_timestamp_hash_same(const LookupPaths *lp, uint64_t timestamp_hash, uint64_t *ret_new);
+
+int unit_file_resolve_symlink(
+                const char *root_dir,
+                char **search_path,
+                const char *dir,
+                int dirfd,
+                const char *filename,
+                bool resolve_destination_target,
+                char **ret_destination);
+
+int unit_file_build_name_map(
+                const LookupPaths *lp,
+                uint64_t *cache_timestamp_hash,
+                Hashmap **unit_ids_map,
+                Hashmap **unit_names_map,
+                Set **path_cache);
+
+int unit_file_find_fragment(
+                Hashmap *unit_ids_map,
+                Hashmap *unit_name_map,
+                const char *unit_name,
+                const char **ret_fragment_path,
+                Set **ret_names);
+
+const char* runlevel_to_target(const char *rl);
diff --git a/src/basic/unit-name.c b/src/basic/unit-name.c
new file mode 100644
index 0000000..3b739c7
--- /dev/null
+++ b/src/basic/unit-name.c
@@ -0,0 +1,905 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include 
+#include 
+#include 
+#include 
+
+#include "sd-id128.h"
+
+#include "alloc-util.h"
+#include "glob-util.h"
+#include "hexdecoct.h"
+#include "memory-util.h"
+#include "path-util.h"
+#include "random-util.h"
+#include "sparse-endian.h"
+#include "special.h"
+#include "stdio-util.h"
+#include "string-util.h"
+#include "strv.h"
+#include "unit-name.h"
+
+/* Characters valid in a unit name. */
+#define VALID_CHARS                             \
+        DIGITS                                  \
+        LETTERS                                 \
+        ":-_.\\"
+
+/* The same, but also permits the single @ character that may appear */
+#define VALID_CHARS_WITH_AT                     \
+        "@"                                     \
+        VALID_CHARS
+
+/* All chars valid in a unit name glob */
+#define VALID_CHARS_GLOB                        \
+        VALID_CHARS_WITH_AT                     \
+        "[]!-*?"
+
+#define LONG_UNIT_NAME_HASH_KEY SD_ID128_MAKE(ec,f2,37,fb,58,32,4a,32,84,9f,06,9b,0d,21,eb,9a)
+#define UNIT_NAME_HASH_LENGTH_CHARS 16
+
+bool unit_name_is_valid(const char *n, UnitNameFlags flags) {
+        const char *e, *i, *at;
+
+        assert((flags & ~(UNIT_NAME_PLAIN|UNIT_NAME_INSTANCE|UNIT_NAME_TEMPLATE)) == 0);
+
+        if (_unlikely_(flags == 0))
+                return false;
+
+        if (isempty(n))
+                return false;
+
+        if (strlen(n) >= UNIT_NAME_MAX)
+                return false;
+
+        e = strrchr(n, '.');
+        if (!e || e == n)
+                return false;
+
+        if (unit_type_from_string(e + 1) < 0)
+                return false;
+
+        for (i = n, at = NULL; i < e; i++) {
+
+                if (*i == '@' && !at)
+                        at = i;
+
+                if (!strchr(VALID_CHARS_WITH_AT, *i))
+                        return false;
+        }
+
+        if (at == n)
+                return false;
+
+        if (flags & UNIT_NAME_PLAIN)
+                if (!at)
+                        return true;
+
+        if (flags & UNIT_NAME_INSTANCE)
+                if (at && e > at + 1)
+                        return true;
+
+        if (flags & UNIT_NAME_TEMPLATE)
+                if (at && e == at + 1)
+                        return true;
+
+        return false;
+}
+
+bool unit_prefix_is_valid(const char *p) {
+
+        /* We don't allow additional @ in the prefix string */
+
+        if (isempty(p))
+                return false;
+
+        return in_charset(p, VALID_CHARS);
+}
+
+bool unit_instance_is_valid(const char *i) {
+
+        /* The max length depends on the length of the string, so we
+         * don't really check this here. */
+
+        if (isempty(i))
+                return false;
+
+        /* We allow additional @ in the instance string, we do not
+         * allow them in the prefix! */
+
+        return in_charset(i, "@" VALID_CHARS);
+}
+
+bool unit_suffix_is_valid(const char *s) {
+        if (isempty(s))
+                return false;
+
+        if (s[0] != '.')
+                return false;
+
+        if (unit_type_from_string(s + 1) < 0)
+                return false;
+
+        return true;
+}
+
+int unit_name_to_prefix(const char *n, char **ret) {
+        const char *p;
+        char *s;
+
+        assert(n);
+        assert(ret);
+
+        if (!unit_name_is_valid(n, UNIT_NAME_ANY))
+                return -EINVAL;
+
+        p = strchr(n, '@');
+        if (!p)
+                p = strrchr(n, '.');
+
+        assert_se(p);
+
+        s = strndup(n, p - n);
+        if (!s)
+                return -ENOMEM;
+
+        *ret = s;
+        return 0;
+}
+
+UnitNameFlags unit_name_to_instance(const char *n, char **ret) {
+        const char *p, *d;
+
+        assert(n);
+
+        if (!unit_name_is_valid(n, UNIT_NAME_ANY))
+                return -EINVAL;
+
+        /* Everything past the first @ and before the last . is the instance */
+        p = strchr(n, '@');
+        if (!p) {
+                if (ret)
+                        *ret = NULL;
+                return UNIT_NAME_PLAIN;
+        }
+
+        p++;
+
+        d = strrchr(p, '.');
+        if (!d)
+                return -EINVAL;
+
+        if (ret) {
+                char *i = strndup(p, d-p);
+                if (!i)
+                        return -ENOMEM;
+
+                *ret = i;
+        }
+        return d > p ? UNIT_NAME_INSTANCE : UNIT_NAME_TEMPLATE;
+}
+
+int unit_name_to_prefix_and_instance(const char *n, char **ret) {
+        const char *d;
+        char *s;
+
+        assert(n);
+        assert(ret);
+
+        if (!unit_name_is_valid(n, UNIT_NAME_ANY))
+                return -EINVAL;
+
+        d = strrchr(n, '.');
+        if (!d)
+                return -EINVAL;
+
+        s = strndup(n, d - n);
+        if (!s)
+                return -ENOMEM;
+
+        *ret = s;
+        return 0;
+}
+
+UnitType unit_name_to_type(const char *n) {
+        const char *e;
+
+        assert(n);
+
+        if (!unit_name_is_valid(n, UNIT_NAME_ANY))
+                return _UNIT_TYPE_INVALID;
+
+        assert_se(e = strrchr(n, '.'));
+
+        return unit_type_from_string(e + 1);
+}
+
+int unit_name_change_suffix(const char *n, const char *suffix, char **ret) {
+        _cleanup_free_ char *s = NULL;
+        size_t a, b;
+        char *e;
+
+        assert(n);
+        assert(suffix);
+        assert(ret);
+
+        if (!unit_name_is_valid(n, UNIT_NAME_ANY))
+                return -EINVAL;
+
+        if (!unit_suffix_is_valid(suffix))
+                return -EINVAL;
+
+        assert_se(e = strrchr(n, '.'));
+
+        a = e - n;
+        b = strlen(suffix);
+
+        s = new(char, a + b + 1);
+        if (!s)
+                return -ENOMEM;
+
+        strcpy(mempcpy(s, n, a), suffix);
+
+        /* Make sure the name is still valid (i.e. didn't grow too large due to longer suffix) */
+        if (!unit_name_is_valid(s, UNIT_NAME_ANY))
+                return -EINVAL;
+
+        *ret = TAKE_PTR(s);
+        return 0;
+}
+
+int unit_name_build(const char *prefix, const char *instance, const char *suffix, char **ret) {
+        UnitType type;
+
+        assert(prefix);
+        assert(suffix);
+        assert(ret);
+
+        if (suffix[0] != '.')
+                return -EINVAL;
+
+        type = unit_type_from_string(suffix + 1);
+        if (type < 0)
+                return type;
+
+        return unit_name_build_from_type(prefix, instance, type, ret);
+}
+
+int unit_name_build_from_type(const char *prefix, const char *instance, UnitType type, char **ret) {
+        _cleanup_free_ char *s = NULL;
+        const char *ut;
+
+        assert(prefix);
+        assert(type >= 0);
+        assert(type < _UNIT_TYPE_MAX);
+        assert(ret);
+
+        if (!unit_prefix_is_valid(prefix))
+                return -EINVAL;
+
+        ut = unit_type_to_string(type);
+
+        if (instance) {
+                if (!unit_instance_is_valid(instance))
+                        return -EINVAL;
+
+                s = strjoin(prefix, "@", instance, ".", ut);
+        } else
+                s = strjoin(prefix, ".", ut);
+        if (!s)
+                return -ENOMEM;
+
+        /* Verify that this didn't grow too large (or otherwise is invalid) */
+        if (!unit_name_is_valid(s, instance ? UNIT_NAME_INSTANCE : UNIT_NAME_PLAIN))
+                return -EINVAL;
+
+        *ret = TAKE_PTR(s);
+        return 0;
+}
+
+static char *do_escape_char(char c, char *t) {
+        assert(t);
+
+        *(t++) = '\\';
+        *(t++) = 'x';
+        *(t++) = hexchar(c >> 4);
+        *(t++) = hexchar(c);
+
+        return t;
+}
+
+static char *do_escape(const char *f, char *t) {
+        assert(f);
+        assert(t);
+
+        /* do not create units with a leading '.', like for "/.dotdir" mount points */
+        if (*f == '.') {
+                t = do_escape_char(*f, t);
+                f++;
+        }
+
+        for (; *f; f++) {
+                if (*f == '/')
+                        *(t++) = '-';
+                else if (IN_SET(*f, '-', '\\') || !strchr(VALID_CHARS, *f))
+                        t = do_escape_char(*f, t);
+                else
+                        *(t++) = *f;
+        }
+
+        return t;
+}
+
+char *unit_name_escape(const char *f) {
+        char *r, *t;
+
+        assert(f);
+
+        r = new(char, strlen(f)*4+1);
+        if (!r)
+                return NULL;
+
+        t = do_escape(f, r);
+        *t = 0;
+
+        return r;
+}
+
+int unit_name_unescape(const char *f, char **ret) {
+        _cleanup_free_ char *r = NULL;
+        char *t;
+
+        assert(f);
+
+        r = strdup(f);
+        if (!r)
+                return -ENOMEM;
+
+        for (t = r; *f; f++) {
+                if (*f == '-')
+                        *(t++) = '/';
+                else if (*f == '\\') {
+                        int a, b;
+
+                        if (f[1] != 'x')
+                                return -EINVAL;
+
+                        a = unhexchar(f[2]);
+                        if (a < 0)
+                                return -EINVAL;
+
+                        b = unhexchar(f[3]);
+                        if (b < 0)
+                                return -EINVAL;
+
+                        *(t++) = (char) (((uint8_t) a << 4U) | (uint8_t) b);
+                        f += 3;
+                } else
+                        *(t++) = *f;
+        }
+
+        *t = 0;
+
+        *ret = TAKE_PTR(r);
+
+        return 0;
+}
+
+int unit_name_path_escape(const char *f, char **ret) {
+        _cleanup_free_ char *p = NULL;
+        char *s;
+
+        assert(f);
+        assert(ret);
+
+        p = strdup(f);
+        if (!p)
+                return -ENOMEM;
+
+        path_simplify(p);
+
+        if (empty_or_root(p))
+                s = strdup("-");
+        else {
+                if (!path_is_normalized(p))
+                        return -EINVAL;
+
+                /* Truncate trailing slashes and skip leading slashes */
+                delete_trailing_chars(p, "/");
+                s = unit_name_escape(skip_leading_chars(p, "/"));
+        }
+        if (!s)
+                return -ENOMEM;
+
+        *ret = s;
+        return 0;
+}
+
+int unit_name_path_unescape(const char *f, char **ret) {
+        _cleanup_free_ char *s = NULL;
+        int r;
+
+        assert(f);
+
+        if (isempty(f))
+                return -EINVAL;
+
+        if (streq(f, "-")) {
+                s = strdup("/");
+                if (!s)
+                        return -ENOMEM;
+        } else {
+                _cleanup_free_ char *w = NULL;
+
+                r = unit_name_unescape(f, &w);
+                if (r < 0)
+                        return r;
+
+                /* Don't accept trailing or leading slashes */
+                if (startswith(w, "/") || endswith(w, "/"))
+                        return -EINVAL;
+
+                /* Prefix a slash again */
+                s = strjoin("/", w);
+                if (!s)
+                        return -ENOMEM;
+
+                if (!path_is_normalized(s))
+                        return -EINVAL;
+        }
+
+        if (ret)
+                *ret = TAKE_PTR(s);
+
+        return 0;
+}
+
+int unit_name_replace_instance(const char *f, const char *i, char **ret) {
+        _cleanup_free_ char *s = NULL;
+        const char *p, *e;
+        size_t a, b;
+
+        assert(f);
+        assert(i);
+        assert(ret);
+
+        if (!unit_name_is_valid(f, UNIT_NAME_INSTANCE|UNIT_NAME_TEMPLATE))
+                return -EINVAL;
+        if (!unit_instance_is_valid(i))
+                return -EINVAL;
+
+        assert_se(p = strchr(f, '@'));
+        assert_se(e = strrchr(f, '.'));
+
+        a = p - f;
+        b = strlen(i);
+
+        s = new(char, a + 1 + b + strlen(e) + 1);
+        if (!s)
+                return -ENOMEM;
+
+        strcpy(mempcpy(mempcpy(s, f, a + 1), i, b), e);
+
+        /* Make sure the resulting name still is valid, i.e. didn't grow too large */
+        if (!unit_name_is_valid(s, UNIT_NAME_INSTANCE))
+                return -EINVAL;
+
+        *ret = TAKE_PTR(s);
+        return 0;
+}
+
+int unit_name_template(const char *f, char **ret) {
+        const char *p, *e;
+        char *s;
+        size_t a;
+
+        assert(f);
+        assert(ret);
+
+        if (!unit_name_is_valid(f, UNIT_NAME_INSTANCE|UNIT_NAME_TEMPLATE))
+                return -EINVAL;
+
+        assert_se(p = strchr(f, '@'));
+        assert_se(e = strrchr(f, '.'));
+
+        a = p - f;
+
+        s = new(char, a + 1 + strlen(e) + 1);
+        if (!s)
+                return -ENOMEM;
+
+        strcpy(mempcpy(s, f, a + 1), e);
+
+        *ret = s;
+        return 0;
+}
+
+bool unit_name_is_hashed(const char *name) {
+        char *s;
+
+        if (!unit_name_is_valid(name, UNIT_NAME_PLAIN))
+                return false;
+
+        assert_se(s = strrchr(name, '.'));
+
+        if (s - name < UNIT_NAME_HASH_LENGTH_CHARS + 1)
+                return false;
+
+        s -= UNIT_NAME_HASH_LENGTH_CHARS;
+        if (s[-1] != '_')
+                return false;
+
+        for (size_t i = 0; i < UNIT_NAME_HASH_LENGTH_CHARS; i++)
+                if (!strchr(LOWERCASE_HEXDIGITS, s[i]))
+                        return false;
+
+        return true;
+}
+
+int unit_name_hash_long(const char *name, char **ret) {
+        _cleanup_free_ char *n = NULL, *hash = NULL;
+        char *suffix;
+        le64_t h;
+        size_t len;
+
+        if (strlen(name) < UNIT_NAME_MAX)
+                return -EMSGSIZE;
+
+        suffix = strrchr(name, '.');
+        if (!suffix)
+                return -EINVAL;
+
+        if (unit_type_from_string(suffix+1) < 0)
+                return -EINVAL;
+
+        h = htole64(siphash24_string(name, LONG_UNIT_NAME_HASH_KEY.bytes));
+
+        hash = hexmem(&h, sizeof(h));
+        if (!hash)
+                return -ENOMEM;
+
+        assert_se(strlen(hash) == UNIT_NAME_HASH_LENGTH_CHARS);
+
+        len = UNIT_NAME_MAX - 1 - strlen(suffix+1) - UNIT_NAME_HASH_LENGTH_CHARS - 2;
+        assert(len > 0 && len < UNIT_NAME_MAX);
+
+        n = strndup(name, len);
+        if (!n)
+                return -ENOMEM;
+
+        if (!strextend(&n, "_", hash, suffix))
+                return -ENOMEM;
+        assert_se(unit_name_is_valid(n, UNIT_NAME_PLAIN));
+
+        *ret = TAKE_PTR(n);
+
+        return 0;
+}
+
+int unit_name_from_path(const char *path, const char *suffix, char **ret) {
+        _cleanup_free_ char *p = NULL, *s = NULL;
+        int r;
+
+        assert(path);
+        assert(suffix);
+        assert(ret);
+
+        if (!unit_suffix_is_valid(suffix))
+                return -EINVAL;
+
+        r = unit_name_path_escape(path, &p);
+        if (r < 0)
+                return r;
+
+        s = strjoin(p, suffix);
+        if (!s)
+                return -ENOMEM;
+
+        if (strlen(s) >= UNIT_NAME_MAX) {
+                _cleanup_free_ char *n = NULL;
+
+                log_debug("Unit name \"%s\" too long, falling back to hashed unit name.", s);
+
+                r = unit_name_hash_long(s, &n);
+                if (r < 0)
+                        return r;
+
+                free_and_replace(s, n);
+        }
+
+        /* Refuse if this for some other reason didn't result in a valid name */
+        if (!unit_name_is_valid(s, UNIT_NAME_PLAIN))
+                return -EINVAL;
+
+        *ret = TAKE_PTR(s);
+        return 0;
+}
+
+int unit_name_from_path_instance(const char *prefix, const char *path, const char *suffix, char **ret) {
+        _cleanup_free_ char *p = NULL, *s = NULL;
+        int r;
+
+        assert(prefix);
+        assert(path);
+        assert(suffix);
+        assert(ret);
+
+        if (!unit_prefix_is_valid(prefix))
+                return -EINVAL;
+
+        if (!unit_suffix_is_valid(suffix))
+                return -EINVAL;
+
+        r = unit_name_path_escape(path, &p);
+        if (r < 0)
+                return r;
+
+        s = strjoin(prefix, "@", p, suffix);
+        if (!s)
+                return -ENOMEM;
+
+        if (strlen(s) >= UNIT_NAME_MAX) /* Return a slightly more descriptive error for this specific condition */
+                return -ENAMETOOLONG;
+
+        /* Refuse if this for some other reason didn't result in a valid name */
+        if (!unit_name_is_valid(s, UNIT_NAME_INSTANCE))
+                return -EINVAL;
+
+        *ret = TAKE_PTR(s);
+        return 0;
+}
+
+int unit_name_to_path(const char *name, char **ret) {
+        _cleanup_free_ char *prefix = NULL;
+        int r;
+
+        assert(name);
+
+        r = unit_name_to_prefix(name, &prefix);
+        if (r < 0)
+                return r;
+
+        if (unit_name_is_hashed(name))
+                return -ENAMETOOLONG;
+
+        return unit_name_path_unescape(prefix, ret);
+}
+
+static bool do_escape_mangle(const char *f, bool allow_globs, char *t) {
+        const char *valid_chars;
+        bool mangled = false;
+
+        assert(f);
+        assert(t);
+
+        /* We'll only escape the obvious characters here, to play safe.
+         *
+         * Returns true if any characters were mangled, false otherwise.
+         */
+
+        valid_chars = allow_globs ? VALID_CHARS_GLOB : VALID_CHARS_WITH_AT;
+
+        for (; *f; f++)
+                if (*f == '/') {
+                        *(t++) = '-';
+                        mangled = true;
+                } else if (!strchr(valid_chars, *f)) {
+                        t = do_escape_char(*f, t);
+                        mangled = true;
+                } else
+                        *(t++) = *f;
+        *t = 0;
+
+        return mangled;
+}
+
+/**
+ *  Convert a string to a unit name. /dev/blah is converted to dev-blah.device,
+ *  /blah/blah is converted to blah-blah.mount, anything else is left alone,
+ *  except that @suffix is appended if a valid unit suffix is not present.
+ *
+ *  If @allow_globs, globs characters are preserved. Otherwise, they are escaped.
+ */
+int unit_name_mangle_with_suffix(const char *name, const char *operation, UnitNameMangle flags, const char *suffix, char **ret) {
+        _cleanup_free_ char *s = NULL;
+        bool mangled, suggest_escape = true;
+        int r;
+
+        assert(name);
+        assert(suffix);
+        assert(ret);
+
+        if (isempty(name)) /* We cannot mangle empty unit names to become valid, sorry. */
+                return -EINVAL;
+
+        if (!unit_suffix_is_valid(suffix))
+                return -EINVAL;
+
+        /* Already a fully valid unit name? If so, no mangling is necessary... */
+        if (unit_name_is_valid(name, UNIT_NAME_ANY))
+                goto good;
+
+        /* Already a fully valid globbing expression? If so, no mangling is necessary either... */
+        if (string_is_glob(name) && in_charset(name, VALID_CHARS_GLOB)) {
+                if (flags & UNIT_NAME_MANGLE_GLOB)
+                        goto good;
+                log_full(flags & UNIT_NAME_MANGLE_WARN ? LOG_NOTICE : LOG_DEBUG,
+                         "Glob pattern passed%s%s, but globs are not supported for this.",
+                         operation ? " " : "", strempty(operation));
+                suggest_escape = false;
+        }
+
+        if (is_device_path(name)) {
+                r = unit_name_from_path(name, ".device", ret);
+                if (r >= 0)
+                        return 1;
+                if (r != -EINVAL)
+                        return r;
+        }
+
+        if (path_is_absolute(name)) {
+                r = unit_name_from_path(name, ".mount", ret);
+                if (r >= 0)
+                        return 1;
+                if (r != -EINVAL)
+                        return r;
+        }
+
+        s = new(char, strlen(name) * 4 + strlen(suffix) + 1);
+        if (!s)
+                return -ENOMEM;
+
+        mangled = do_escape_mangle(name, flags & UNIT_NAME_MANGLE_GLOB, s);
+        if (mangled)
+                log_full(flags & UNIT_NAME_MANGLE_WARN ? LOG_NOTICE : LOG_DEBUG,
+                         "Invalid unit name \"%s\" escaped as \"%s\"%s.",
+                         name, s,
+                         suggest_escape ? " (maybe you should use systemd-escape?)" : "");
+
+        /* Append a suffix if it doesn't have any, but only if this is not a glob, so that we can allow
+         * "foo.*" as a valid glob. */
+        if ((!(flags & UNIT_NAME_MANGLE_GLOB) || !string_is_glob(s)) && unit_name_to_type(s) < 0)
+                strcat(s, suffix);
+
+        /* Make sure mangling didn't grow this too large (but don't do this check if globbing is allowed,
+         * since globs generally do not qualify as valid unit names) */
+        if (!FLAGS_SET(flags, UNIT_NAME_MANGLE_GLOB) && !unit_name_is_valid(s, UNIT_NAME_ANY))
+                return -EINVAL;
+
+        *ret = TAKE_PTR(s);
+        return 1;
+
+good:
+        s = strdup(name);
+        if (!s)
+                return -ENOMEM;
+
+        *ret = TAKE_PTR(s);
+        return 0;
+}
+
+int slice_build_parent_slice(const char *slice, char **ret) {
+        _cleanup_free_ char *s = NULL;
+        char *dash;
+        int r;
+
+        assert(slice);
+        assert(ret);
+
+        if (!slice_name_is_valid(slice))
+                return -EINVAL;
+
+        if (streq(slice, SPECIAL_ROOT_SLICE)) {
+                *ret = NULL;
+                return 0;
+        }
+
+        s = strdup(slice);
+        if (!s)
+                return -ENOMEM;
+
+        dash = strrchr(s, '-');
+        if (dash)
+                strcpy(dash, ".slice");
+        else {
+                r = free_and_strdup(&s, SPECIAL_ROOT_SLICE);
+                if (r < 0)
+                        return r;
+        }
+
+        *ret = TAKE_PTR(s);
+        return 1;
+}
+
+int slice_build_subslice(const char *slice, const char *name, char **ret) {
+        char *subslice;
+
+        assert(slice);
+        assert(name);
+        assert(ret);
+
+        if (!slice_name_is_valid(slice))
+                return -EINVAL;
+
+        if (!unit_prefix_is_valid(name))
+                return -EINVAL;
+
+        if (streq(slice, SPECIAL_ROOT_SLICE))
+                subslice = strjoin(name, ".slice");
+        else {
+                char *e;
+
+                assert_se(e = endswith(slice, ".slice"));
+
+                subslice = new(char, (e - slice) + 1 + strlen(name) + 6 + 1);
+                if (!subslice)
+                        return -ENOMEM;
+
+                stpcpy(stpcpy(stpcpy(mempcpy(subslice, slice, e - slice), "-"), name), ".slice");
+        }
+
+        *ret = subslice;
+        return 0;
+}
+
+bool slice_name_is_valid(const char *name) {
+        const char *p, *e;
+        bool dash = false;
+
+        if (!unit_name_is_valid(name, UNIT_NAME_PLAIN))
+                return false;
+
+        if (streq(name, SPECIAL_ROOT_SLICE))
+                return true;
+
+        e = endswith(name, ".slice");
+        if (!e)
+                return false;
+
+        for (p = name; p < e; p++) {
+
+                if (*p == '-') {
+
+                        /* Don't allow initial dash */
+                        if (p == name)
+                                return false;
+
+                        /* Don't allow multiple dashes */
+                        if (dash)
+                                return false;
+
+                        dash = true;
+                } else
+                        dash = false;
+        }
+
+        /* Don't allow trailing hash */
+        if (dash)
+                return false;
+
+        return true;
+}
+
+bool unit_name_prefix_equal(const char *a, const char *b) {
+        const char *p, *q;
+
+        assert(a);
+        assert(b);
+
+        if (!unit_name_is_valid(a, UNIT_NAME_ANY) || !unit_name_is_valid(b, UNIT_NAME_ANY))
+                return false;
+
+        p = strchr(a, '@');
+        if (!p)
+                p = strrchr(a, '.');
+
+        q = strchr(b, '@');
+        if (!q)
+                q = strrchr(b, '.');
+
+        assert(p);
+        assert(q);
+
+        return memcmp_nn(a, p - a, b, q - b) == 0;
+}
diff --git a/src/basic/unit-name.h b/src/basic/unit-name.h
new file mode 100644
index 0000000..eaa701e
--- /dev/null
+++ b/src/basic/unit-name.h
@@ -0,0 +1,69 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include 
+
+#include "macro.h"
+#include "unit-def.h"
+
+#define UNIT_NAME_MAX 256
+
+typedef enum UnitNameFlags {
+        UNIT_NAME_PLAIN    = 1 << 0, /* Allow foo.service */
+        UNIT_NAME_TEMPLATE = 1 << 1, /* Allow foo@.service */
+        UNIT_NAME_INSTANCE = 1 << 2, /* Allow foo@bar.service */
+        UNIT_NAME_ANY = UNIT_NAME_PLAIN|UNIT_NAME_TEMPLATE|UNIT_NAME_INSTANCE,
+        _UNIT_NAME_INVALID = -EINVAL,
+} UnitNameFlags;
+
+bool unit_name_is_valid(const char *n, UnitNameFlags flags) _pure_;
+bool unit_prefix_is_valid(const char *p) _pure_;
+bool unit_instance_is_valid(const char *i) _pure_;
+bool unit_suffix_is_valid(const char *s) _pure_;
+
+int unit_name_to_prefix(const char *n, char **ret);
+UnitNameFlags unit_name_to_instance(const char *n, char **ret);
+static inline UnitNameFlags unit_name_classify(const char *n) {
+        return unit_name_to_instance(n, NULL);
+}
+int unit_name_to_prefix_and_instance(const char *n, char **ret);
+
+UnitType unit_name_to_type(const char *n) _pure_;
+
+int unit_name_change_suffix(const char *n, const char *suffix, char **ret);
+
+int unit_name_build(const char *prefix, const char *instance, const char *suffix, char **ret);
+int unit_name_build_from_type(const char *prefix, const char *instance, UnitType, char **ret);
+
+char *unit_name_escape(const char *f);
+int unit_name_unescape(const char *f, char **ret);
+int unit_name_path_escape(const char *f, char **ret);
+int unit_name_path_unescape(const char *f, char **ret);
+
+int unit_name_replace_instance(const char *f, const char *i, char **ret);
+
+int unit_name_template(const char *f, char **ret);
+
+int unit_name_hash_long(const char *name, char **ret);
+bool unit_name_is_hashed(const char *name);
+
+int unit_name_from_path(const char *path, const char *suffix, char **ret);
+int unit_name_from_path_instance(const char *prefix, const char *path, const char *suffix, char **ret);
+int unit_name_to_path(const char *name, char **ret);
+
+typedef enum UnitNameMangle {
+        UNIT_NAME_MANGLE_GLOB = 1 << 0,
+        UNIT_NAME_MANGLE_WARN = 1 << 1,
+} UnitNameMangle;
+
+int unit_name_mangle_with_suffix(const char *name, const char *operation, UnitNameMangle flags, const char *suffix, char **ret);
+
+static inline int unit_name_mangle(const char *name, UnitNameMangle flags, char **ret) {
+        return unit_name_mangle_with_suffix(name, NULL, flags, ".service", ret);
+}
+
+int slice_build_parent_slice(const char *slice, char **ret);
+int slice_build_subslice(const char *slice, const char *name, char **subslice);
+bool slice_name_is_valid(const char *name);
+
+bool unit_name_prefix_equal(const char *a, const char *b);
diff --git a/src/basic/user-util.c b/src/basic/user-util.c
new file mode 100644
index 0000000..519e788
--- /dev/null
+++ b/src/basic/user-util.c
@@ -0,0 +1,1068 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include "sd-messages.h"
+
+#include "alloc-util.h"
+#include "chase-symlinks.h"
+#include "errno-util.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "format-util.h"
+#include "macro.h"
+#include "parse-util.h"
+#include "path-util.h"
+#include "random-util.h"
+#include "string-util.h"
+#include "strv.h"
+#include "user-util.h"
+#include "utf8.h"
+
+bool uid_is_valid(uid_t uid) {
+
+        /* Also see POSIX IEEE Std 1003.1-2008, 2016 Edition, 3.436. */
+
+        /* Some libc APIs use UID_INVALID as special placeholder */
+        if (uid == (uid_t) UINT32_C(0xFFFFFFFF))
+                return false;
+
+        /* A long time ago UIDs where 16bit, hence explicitly avoid the 16bit -1 too */
+        if (uid == (uid_t) UINT32_C(0xFFFF))
+                return false;
+
+        return true;
+}
+
+int parse_uid(const char *s, uid_t *ret) {
+        uint32_t uid = 0;
+        int r;
+
+        assert(s);
+
+        assert_cc(sizeof(uid_t) == sizeof(uint32_t));
+
+        /* We are very strict when parsing UIDs, and prohibit +/- as prefix, leading zero as prefix, and
+         * whitespace. We do this, since this call is often used in a context where we parse things as UID
+         * first, and if that doesn't work we fall back to NSS. Thus we really want to make sure that UIDs
+         * are parsed as UIDs only if they really really look like UIDs. */
+        r = safe_atou32_full(s, 10
+                             | SAFE_ATO_REFUSE_PLUS_MINUS
+                             | SAFE_ATO_REFUSE_LEADING_ZERO
+                             | SAFE_ATO_REFUSE_LEADING_WHITESPACE, &uid);
+        if (r < 0)
+                return r;
+
+        if (!uid_is_valid(uid))
+                return -ENXIO; /* we return ENXIO instead of EINVAL
+                                * here, to make it easy to distinguish
+                                * invalid numeric uids from invalid
+                                * strings. */
+
+        if (ret)
+                *ret = uid;
+
+        return 0;
+}
+
+int parse_uid_range(const char *s, uid_t *ret_lower, uid_t *ret_upper) {
+        _cleanup_free_ char *word = NULL;
+        uid_t l, u;
+        int r;
+
+        assert(s);
+        assert(ret_lower);
+        assert(ret_upper);
+
+        r = extract_first_word(&s, &word, "-", EXTRACT_DONT_COALESCE_SEPARATORS);
+        if (r < 0)
+                return r;
+        if (r == 0)
+                return -EINVAL;
+
+        r = parse_uid(word, &l);
+        if (r < 0)
+                return r;
+
+        /* Check for the upper bound and extract it if needed */
+        if (!s)
+                /* Single number with no dash. */
+                u = l;
+        else if (!*s)
+                /* Trailing dash is an error. */
+                return -EINVAL;
+        else {
+                r = parse_uid(s, &u);
+                if (r < 0)
+                        return r;
+
+                if (l > u)
+                        return -EINVAL;
+        }
+
+        *ret_lower = l;
+        *ret_upper = u;
+        return 0;
+}
+
+char* getlogname_malloc(void) {
+        uid_t uid;
+        struct stat st;
+
+        if (isatty(STDIN_FILENO) && fstat(STDIN_FILENO, &st) >= 0)
+                uid = st.st_uid;
+        else
+                uid = getuid();
+
+        return uid_to_name(uid);
+}
+
+char *getusername_malloc(void) {
+        const char *e;
+
+        e = secure_getenv("USER");
+        if (e)
+                return strdup(e);
+
+        return uid_to_name(getuid());
+}
+
+bool is_nologin_shell(const char *shell) {
+        return PATH_IN_SET(shell,
+                           /* 'nologin' is the friendliest way to disable logins for a user account. It prints a nice
+                            * message and exits. Different distributions place the binary at different places though,
+                            * hence let's list them all. */
+                           "/bin/nologin",
+                           "/sbin/nologin",
+                           "/usr/bin/nologin",
+                           "/usr/sbin/nologin",
+                           /* 'true' and 'false' work too for the same purpose, but are less friendly as they don't do
+                            * any message printing. Different distributions place the binary at various places but at
+                            * least not in the 'sbin' directory. */
+                           "/bin/false",
+                           "/usr/bin/false",
+                           "/bin/true",
+                           "/usr/bin/true");
+}
+
+const char* default_root_shell(const char *root) {
+        /* We want to use the preferred shell, i.e. DEFAULT_USER_SHELL, which usually
+         * will be /bin/bash. Fall back to /bin/sh if DEFAULT_USER_SHELL is not found,
+         * or any access errors. */
+
+        int r = chase_symlinks(DEFAULT_USER_SHELL, root, CHASE_PREFIX_ROOT, NULL, NULL);
+        if (r < 0 && r != -ENOENT)
+                log_debug_errno(r, "Failed to look up shell '%s%s%s': %m",
+                                strempty(root), root ? "/" : "", DEFAULT_USER_SHELL);
+        if (r > 0)
+                return DEFAULT_USER_SHELL;
+
+        return "/bin/sh";
+}
+
+static int synthesize_user_creds(
+                const char **username,
+                uid_t *uid, gid_t *gid,
+                const char **home,
+                const char **shell,
+                UserCredsFlags flags) {
+
+        /* We enforce some special rules for uid=0 and uid=65534: in order to avoid NSS lookups for root we hardcode
+         * their user record data. */
+
+        if (STR_IN_SET(*username, "root", "0")) {
+                *username = "root";
+
+                if (uid)
+                        *uid = 0;
+                if (gid)
+                        *gid = 0;
+
+                if (home)
+                        *home = "/root";
+
+                if (shell)
+                        *shell = default_root_shell(NULL);
+
+                return 0;
+        }
+
+        if (STR_IN_SET(*username, NOBODY_USER_NAME, "65534") &&
+            synthesize_nobody()) {
+                *username = NOBODY_USER_NAME;
+
+                if (uid)
+                        *uid = UID_NOBODY;
+                if (gid)
+                        *gid = GID_NOBODY;
+
+                if (home)
+                        *home = FLAGS_SET(flags, USER_CREDS_CLEAN) ? NULL : "/";
+
+                if (shell)
+                        *shell = FLAGS_SET(flags, USER_CREDS_CLEAN) ? NULL : NOLOGIN;
+
+                return 0;
+        }
+
+        return -ENOMEDIUM;
+}
+
+int get_user_creds(
+                const char **username,
+                uid_t *uid, gid_t *gid,
+                const char **home,
+                const char **shell,
+                UserCredsFlags flags) {
+
+        uid_t u = UID_INVALID;
+        struct passwd *p;
+        int r;
+
+        assert(username);
+        assert(*username);
+
+        if (!FLAGS_SET(flags, USER_CREDS_PREFER_NSS) ||
+            (!home && !shell)) {
+
+                /* So here's the deal: normally, we'll try to synthesize all records we can synthesize, and override
+                 * the user database with that. However, if the user specifies USER_CREDS_PREFER_NSS then the
+                 * user database will override the synthetic records instead — except if the user is only interested in
+                 * the UID and/or GID (but not the home directory, or the shell), in which case we'll always override
+                 * the user database (i.e. the USER_CREDS_PREFER_NSS flag has no effect in this case). Why?
+                 * Simply because there are valid usecase where the user might change the home directory or the shell
+                 * of the relevant users, but changing the UID/GID mappings for them is something we explicitly don't
+                 * support. */
+
+                r = synthesize_user_creds(username, uid, gid, home, shell, flags);
+                if (r >= 0)
+                        return 0;
+                if (r != -ENOMEDIUM) /* not a username we can synthesize */
+                        return r;
+        }
+
+        if (parse_uid(*username, &u) >= 0) {
+                errno = 0;
+                p = getpwuid(u);
+
+                /* If there are multiple users with the same id, make sure to leave $USER to the configured value
+                 * instead of the first occurrence in the database. However if the uid was configured by a numeric uid,
+                 * then let's pick the real username from /etc/passwd. */
+                if (p)
+                        *username = p->pw_name;
+                else if (FLAGS_SET(flags, USER_CREDS_ALLOW_MISSING) && !gid && !home && !shell) {
+
+                        /* If the specified user is a numeric UID and it isn't in the user database, and the caller
+                         * passed USER_CREDS_ALLOW_MISSING and was only interested in the UID, then just return that
+                         * and don't complain. */
+
+                        if (uid)
+                                *uid = u;
+
+                        return 0;
+                }
+        } else {
+                errno = 0;
+                p = getpwnam(*username);
+        }
+        if (!p) {
+                r = errno_or_else(ESRCH);
+
+                /* If the user requested that we only synthesize as fallback, do so now */
+                if (FLAGS_SET(flags, USER_CREDS_PREFER_NSS)) {
+                        if (synthesize_user_creds(username, uid, gid, home, shell, flags) >= 0)
+                                return 0;
+                }
+
+                return r;
+        }
+
+        if (uid) {
+                if (!uid_is_valid(p->pw_uid))
+                        return -EBADMSG;
+
+                *uid = p->pw_uid;
+        }
+
+        if (gid) {
+                if (!gid_is_valid(p->pw_gid))
+                        return -EBADMSG;
+
+                *gid = p->pw_gid;
+        }
+
+        if (home) {
+                if (FLAGS_SET(flags, USER_CREDS_CLEAN) &&
+                    (empty_or_root(p->pw_dir) ||
+                     !path_is_valid(p->pw_dir) ||
+                     !path_is_absolute(p->pw_dir)))
+                        *home = NULL; /* Note: we don't insist on normalized paths, since there are setups that have /./ in the path */
+                else
+                        *home = p->pw_dir;
+        }
+
+        if (shell) {
+                if (FLAGS_SET(flags, USER_CREDS_CLEAN) &&
+                    (isempty(p->pw_shell) ||
+                     !path_is_valid(p->pw_dir) ||
+                     !path_is_absolute(p->pw_shell) ||
+                     is_nologin_shell(p->pw_shell)))
+                        *shell = NULL;
+                else
+                        *shell = p->pw_shell;
+        }
+
+        return 0;
+}
+
+int get_group_creds(const char **groupname, gid_t *gid, UserCredsFlags flags) {
+        struct group *g;
+        gid_t id;
+
+        assert(groupname);
+
+        /* We enforce some special rules for gid=0: in order to avoid NSS lookups for root we hardcode its data. */
+
+        if (STR_IN_SET(*groupname, "root", "0")) {
+                *groupname = "root";
+
+                if (gid)
+                        *gid = 0;
+
+                return 0;
+        }
+
+        if (STR_IN_SET(*groupname, NOBODY_GROUP_NAME, "65534") &&
+            synthesize_nobody()) {
+                *groupname = NOBODY_GROUP_NAME;
+
+                if (gid)
+                        *gid = GID_NOBODY;
+
+                return 0;
+        }
+
+        if (parse_gid(*groupname, &id) >= 0) {
+                errno = 0;
+                g = getgrgid(id);
+
+                if (g)
+                        *groupname = g->gr_name;
+                else if (FLAGS_SET(flags, USER_CREDS_ALLOW_MISSING)) {
+                        if (gid)
+                                *gid = id;
+
+                        return 0;
+                }
+        } else {
+                errno = 0;
+                g = getgrnam(*groupname);
+        }
+
+        if (!g)
+                return errno_or_else(ESRCH);
+
+        if (gid) {
+                if (!gid_is_valid(g->gr_gid))
+                        return -EBADMSG;
+
+                *gid = g->gr_gid;
+        }
+
+        return 0;
+}
+
+char* uid_to_name(uid_t uid) {
+        char *ret;
+        int r;
+
+        /* Shortcut things to avoid NSS lookups */
+        if (uid == 0)
+                return strdup("root");
+        if (uid == UID_NOBODY && synthesize_nobody())
+                return strdup(NOBODY_USER_NAME);
+
+        if (uid_is_valid(uid)) {
+                long bufsize;
+
+                bufsize = sysconf(_SC_GETPW_R_SIZE_MAX);
+                if (bufsize <= 0)
+                        bufsize = 4096;
+
+                for (;;) {
+                        struct passwd pwbuf, *pw = NULL;
+                        _cleanup_free_ char *buf = NULL;
+
+                        buf = malloc(bufsize);
+                        if (!buf)
+                                return NULL;
+
+                        r = getpwuid_r(uid, &pwbuf, buf, (size_t) bufsize, &pw);
+                        if (r == 0 && pw)
+                                return strdup(pw->pw_name);
+                        if (r != ERANGE)
+                                break;
+
+                        if (bufsize > LONG_MAX/2) /* overflow check */
+                                return NULL;
+
+                        bufsize *= 2;
+                }
+        }
+
+        if (asprintf(&ret, UID_FMT, uid) < 0)
+                return NULL;
+
+        return ret;
+}
+
+char* gid_to_name(gid_t gid) {
+        char *ret;
+        int r;
+
+        if (gid == 0)
+                return strdup("root");
+        if (gid == GID_NOBODY && synthesize_nobody())
+                return strdup(NOBODY_GROUP_NAME);
+
+        if (gid_is_valid(gid)) {
+                long bufsize;
+
+                bufsize = sysconf(_SC_GETGR_R_SIZE_MAX);
+                if (bufsize <= 0)
+                        bufsize = 4096;
+
+                for (;;) {
+                        struct group grbuf, *gr = NULL;
+                        _cleanup_free_ char *buf = NULL;
+
+                        buf = malloc(bufsize);
+                        if (!buf)
+                                return NULL;
+
+                        r = getgrgid_r(gid, &grbuf, buf, (size_t) bufsize, &gr);
+                        if (r == 0 && gr)
+                                return strdup(gr->gr_name);
+                        if (r != ERANGE)
+                                break;
+
+                        if (bufsize > LONG_MAX/2) /* overflow check */
+                                return NULL;
+
+                        bufsize *= 2;
+                }
+        }
+
+        if (asprintf(&ret, GID_FMT, gid) < 0)
+                return NULL;
+
+        return ret;
+}
+
+static bool gid_list_has(const gid_t *list, size_t size, gid_t val) {
+        for (size_t i = 0; i < size; i++)
+                if (list[i] == val)
+                        return true;
+        return false;
+}
+
+int in_gid(gid_t gid) {
+        _cleanup_free_ gid_t *gids = NULL;
+        int ngroups;
+
+        if (getgid() == gid)
+                return 1;
+
+        if (getegid() == gid)
+                return 1;
+
+        if (!gid_is_valid(gid))
+                return -EINVAL;
+
+        ngroups = getgroups_alloc(&gids);
+        if (ngroups < 0)
+                return ngroups;
+
+        return gid_list_has(gids, ngroups, gid);
+}
+
+int in_group(const char *name) {
+        int r;
+        gid_t gid;
+
+        r = get_group_creds(&name, &gid, 0);
+        if (r < 0)
+                return r;
+
+        return in_gid(gid);
+}
+
+int merge_gid_lists(const gid_t *list1, size_t size1, const gid_t *list2, size_t size2, gid_t **ret) {
+        size_t nresult = 0;
+        assert(ret);
+
+        if (size2 > INT_MAX - size1)
+                return -ENOBUFS;
+
+        gid_t *buf = new(gid_t, size1 + size2);
+        if (!buf)
+                return -ENOMEM;
+
+        /* Duplicates need to be skipped on merging, otherwise they'll be passed on and stored in the kernel. */
+        for (size_t i = 0; i < size1; i++)
+                if (!gid_list_has(buf, nresult, list1[i]))
+                        buf[nresult++] = list1[i];
+        for (size_t i = 0; i < size2; i++)
+                if (!gid_list_has(buf, nresult, list2[i]))
+                        buf[nresult++] = list2[i];
+        *ret = buf;
+        return (int)nresult;
+}
+
+int getgroups_alloc(gid_t** gids) {
+        gid_t *allocated;
+        _cleanup_free_  gid_t *p = NULL;
+        int ngroups = 8;
+        unsigned attempt = 0;
+
+        allocated = new(gid_t, ngroups);
+        if (!allocated)
+                return -ENOMEM;
+        p = allocated;
+
+        for (;;) {
+                ngroups = getgroups(ngroups, p);
+                if (ngroups >= 0)
+                        break;
+                if (errno != EINVAL)
+                        return -errno;
+
+                /* Give up eventually */
+                if (attempt++ > 10)
+                        return -EINVAL;
+
+                /* Get actual size needed, and size the array explicitly. Note that this is potentially racy
+                 * to use (in multi-threaded programs), hence let's call this in a loop. */
+                ngroups = getgroups(0, NULL);
+                if (ngroups < 0)
+                        return -errno;
+                if (ngroups == 0)
+                        return false;
+
+                free(allocated);
+
+                p = allocated = new(gid_t, ngroups);
+                if (!allocated)
+                        return -ENOMEM;
+        }
+
+        *gids = TAKE_PTR(p);
+        return ngroups;
+}
+
+int get_home_dir(char **ret) {
+        struct passwd *p;
+        const char *e;
+        char *h;
+        uid_t u;
+
+        assert(ret);
+
+        /* Take the user specified one */
+        e = secure_getenv("HOME");
+        if (e && path_is_valid(e) && path_is_absolute(e))
+                goto found;
+
+        /* Hardcode home directory for root and nobody to avoid NSS */
+        u = getuid();
+        if (u == 0) {
+                e = "/root";
+                goto found;
+        }
+
+        if (u == UID_NOBODY && synthesize_nobody()) {
+                e = "/";
+                goto found;
+        }
+
+        /* Check the database... */
+        errno = 0;
+        p = getpwuid(u);
+        if (!p)
+                return errno_or_else(ESRCH);
+        e = p->pw_dir;
+
+        if (!path_is_valid(e) || !path_is_absolute(e))
+                return -EINVAL;
+
+ found:
+        h = strdup(e);
+        if (!h)
+                return -ENOMEM;
+
+        *ret = path_simplify(h);
+        return 0;
+}
+
+int get_shell(char **ret) {
+        struct passwd *p;
+        const char *e;
+        char *s;
+        uid_t u;
+
+        assert(ret);
+
+        /* Take the user specified one */
+        e = secure_getenv("SHELL");
+        if (e && path_is_valid(e) && path_is_absolute(e))
+                goto found;
+
+        /* Hardcode shell for root and nobody to avoid NSS */
+        u = getuid();
+        if (u == 0) {
+                e = default_root_shell(NULL);
+                goto found;
+        }
+        if (u == UID_NOBODY && synthesize_nobody()) {
+                e = NOLOGIN;
+                goto found;
+        }
+
+        /* Check the database... */
+        errno = 0;
+        p = getpwuid(u);
+        if (!p)
+                return errno_or_else(ESRCH);
+        e = p->pw_shell;
+
+        if (!path_is_valid(e) || !path_is_absolute(e))
+                return -EINVAL;
+
+ found:
+        s = strdup(e);
+        if (!s)
+                return -ENOMEM;
+
+        *ret = path_simplify(s);
+        return 0;
+}
+
+int reset_uid_gid(void) {
+        int r;
+
+        r = maybe_setgroups(0, NULL);
+        if (r < 0)
+                return r;
+
+        if (setresgid(0, 0, 0) < 0)
+                return -errno;
+
+        return RET_NERRNO(setresuid(0, 0, 0));
+}
+
+int take_etc_passwd_lock(const char *root) {
+
+        struct flock flock = {
+                .l_type = F_WRLCK,
+                .l_whence = SEEK_SET,
+                .l_start = 0,
+                .l_len = 0,
+        };
+
+        const char *path;
+        int fd, r;
+
+        /* This is roughly the same as lckpwdf(), but not as awful. We
+         * don't want to use alarm() and signals, hence we implement
+         * our own trivial version of this.
+         *
+         * Note that shadow-utils also takes per-database locks in
+         * addition to lckpwdf(). However, we don't given that they
+         * are redundant as they invoke lckpwdf() first and keep
+         * it during everything they do. The per-database locks are
+         * awfully racy, and thus we just won't do them. */
+
+        if (root)
+                path = prefix_roota(root, ETC_PASSWD_LOCK_PATH);
+        else
+                path = ETC_PASSWD_LOCK_PATH;
+
+        fd = open(path, O_WRONLY|O_CREAT|O_CLOEXEC|O_NOCTTY|O_NOFOLLOW, 0600);
+        if (fd < 0)
+                return log_debug_errno(errno, "Cannot open %s: %m", path);
+
+        r = fcntl(fd, F_SETLKW, &flock);
+        if (r < 0) {
+                safe_close(fd);
+                return log_debug_errno(errno, "Locking %s failed: %m", path);
+        }
+
+        return fd;
+}
+
+bool valid_user_group_name(const char *u, ValidUserFlags flags) {
+        const char *i;
+
+        /* Checks if the specified name is a valid user/group name. There are two flavours of this call:
+         * strict mode is the default which is POSIX plus some extra rules; and relaxed mode where we accept
+         * pretty much everything except the really worst offending names.
+         *
+         * Whenever we synthesize users ourselves we should use the strict mode. But when we process users
+         * created by other stuff, let's be more liberal. */
+
+        if (isempty(u)) /* An empty user name is never valid */
+                return false;
+
+        if (parse_uid(u, NULL) >= 0) /* Something that parses as numeric UID string is valid exactly when the
+                                      * flag for it is set */
+                return FLAGS_SET(flags, VALID_USER_ALLOW_NUMERIC);
+
+        if (FLAGS_SET(flags, VALID_USER_RELAX)) {
+
+                /* In relaxed mode we just check very superficially. Apparently SSSD and other stuff is
+                 * extremely liberal (way too liberal if you ask me, even inserting "@" in user names, which
+                 * is bound to cause problems for example when used with an MTA), hence only filter the most
+                 * obvious cases, or where things would result in an invalid entry if such a user name would
+                 * show up in /etc/passwd (or equivalent getent output).
+                 *
+                 * Note that we stepped far out of POSIX territory here. It's not our fault though, but
+                 * SSSD's, Samba's and everybody else who ignored POSIX on this. (I mean, I am happy to step
+                 * outside of POSIX' bounds any day, but I must say in this case I probably wouldn't
+                 * have...) */
+
+                if (startswith(u, " ") || endswith(u, " ")) /* At least expect whitespace padding is removed
+                                                             * at front and back (accept in the middle, since
+                                                             * that's apparently a thing on Windows). Note
+                                                             * that this also blocks usernames consisting of
+                                                             * whitespace only. */
+                        return false;
+
+                if (!utf8_is_valid(u)) /* We want to synthesize JSON from this, hence insist on UTF-8 */
+                        return false;
+
+                if (string_has_cc(u, NULL)) /* CC characters are just dangerous (and \n in particular is the
+                                             * record separator in /etc/passwd), so we can't allow that. */
+                        return false;
+
+                if (strpbrk(u, ":/")) /* Colons are the field separator in /etc/passwd, we can't allow
+                                       * that. Slashes are special to file systems paths and user names
+                                       * typically show up in the file system as home directories, hence
+                                       * don't allow slashes. */
+                        return false;
+
+                if (in_charset(u, "0123456789")) /* Don't allow fully numeric strings, they might be confused
+                                                  * with UIDs (note that this test is more broad than
+                                                  * the parse_uid() test above, as it will cover more than
+                                                  * the 32bit range, and it will detect 65535 (which is in
+                                                  * invalid UID, even though in the unsigned 32 bit range) */
+                        return false;
+
+                if (u[0] == '-' && in_charset(u + 1, "0123456789")) /* Don't allow negative fully numeric
+                                                                     * strings either. After all some people
+                                                                     * write 65535 as -1 (even though that's
+                                                                     * not even true on 32bit uid_t
+                                                                     * anyway) */
+                        return false;
+
+                if (dot_or_dot_dot(u)) /* User names typically become home directory names, and these two are
+                                        * special in that context, don't allow that. */
+                        return false;
+
+                /* Compare with strict result and warn if result doesn't match */
+                if (FLAGS_SET(flags, VALID_USER_WARN) && !valid_user_group_name(u, 0))
+                        log_struct(LOG_NOTICE,
+                                   LOG_MESSAGE("Accepting user/group name '%s', which does not match strict user/group name rules.", u),
+                                   "USER_GROUP_NAME=%s", u,
+                                   "MESSAGE_ID=" SD_MESSAGE_UNSAFE_USER_NAME_STR);
+
+                /* Note that we make no restrictions on the length in relaxed mode! */
+        } else {
+                long sz;
+                size_t l;
+
+                /* Also see POSIX IEEE Std 1003.1-2008, 2016 Edition, 3.437. We are a bit stricter here
+                 * however. Specifically we deviate from POSIX rules:
+                 *
+                 * - We don't allow empty user names (see above)
+                 * - We require that names fit into the appropriate utmp field
+                 * - We don't allow any dots (this conflicts with chown syntax which permits dots as user/group name separator)
+                 * - We don't allow dashes or digit as the first character
+                 *
+                 * Note that other systems are even more restrictive, and don't permit underscores or uppercase characters.
+                 */
+
+                if (!ascii_isalpha(u[0]) &&
+                    u[0] != '_')
+                        return false;
+
+                for (i = u+1; *i; i++)
+                        if (!ascii_isalpha(*i) &&
+                            !ascii_isdigit(*i) &&
+                            !IN_SET(*i, '_', '-'))
+                                return false;
+
+                l = i - u;
+
+                sz = sysconf(_SC_LOGIN_NAME_MAX);
+                assert_se(sz > 0);
+
+                if (l > (size_t) sz)
+                        return false;
+                if (l > NAME_MAX) /* must fit in a filename */
+                        return false;
+                if (l > UT_NAMESIZE - 1)
+                        return false;
+        }
+
+        return true;
+}
+
+bool valid_gecos(const char *d) {
+
+        if (!d)
+                return false;
+
+        if (!utf8_is_valid(d))
+                return false;
+
+        if (string_has_cc(d, NULL))
+                return false;
+
+        /* Colons are used as field separators, and hence not OK */
+        if (strchr(d, ':'))
+                return false;
+
+        return true;
+}
+
+char *mangle_gecos(const char *d) {
+        char *mangled;
+
+        /* Makes sure the provided string becomes valid as a GEGOS field, by dropping bad chars. glibc's
+         * putwent() only changes \n and : to spaces. We do more: replace all CC too, and remove invalid
+         * UTF-8 */
+
+        mangled = strdup(d);
+        if (!mangled)
+                return NULL;
+
+        for (char *i = mangled; *i; i++) {
+                int len;
+
+                if ((uint8_t) *i < (uint8_t) ' ' || *i == ':') {
+                        *i = ' ';
+                        continue;
+                }
+
+                len = utf8_encoded_valid_unichar(i, SIZE_MAX);
+                if (len < 0) {
+                        *i = ' ';
+                        continue;
+                }
+
+                i += len - 1;
+        }
+
+        return mangled;
+}
+
+bool valid_home(const char *p) {
+        /* Note that this function is also called by valid_shell(), any
+         * changes must account for that. */
+
+        if (isempty(p))
+                return false;
+
+        if (!utf8_is_valid(p))
+                return false;
+
+        if (string_has_cc(p, NULL))
+                return false;
+
+        if (!path_is_absolute(p))
+                return false;
+
+        if (!path_is_normalized(p))
+                return false;
+
+        /* Colons are used as field separators, and hence not OK */
+        if (strchr(p, ':'))
+                return false;
+
+        return true;
+}
+
+int maybe_setgroups(size_t size, const gid_t *list) {
+        int r;
+
+        /* Check if setgroups is allowed before we try to drop all the auxiliary groups */
+        if (size == 0) { /* Dropping all aux groups? */
+                _cleanup_free_ char *setgroups_content = NULL;
+                bool can_setgroups;
+
+                r = read_one_line_file("/proc/self/setgroups", &setgroups_content);
+                if (r == -ENOENT)
+                        /* Old kernels don't have /proc/self/setgroups, so assume we can use setgroups */
+                        can_setgroups = true;
+                else if (r < 0)
+                        return r;
+                else
+                        can_setgroups = streq(setgroups_content, "allow");
+
+                if (!can_setgroups) {
+                        log_debug("Skipping setgroups(), /proc/self/setgroups is set to 'deny'");
+                        return 0;
+                }
+        }
+
+        return RET_NERRNO(setgroups(size, list));
+}
+
+bool synthesize_nobody(void) {
+        /* Returns true when we shall synthesize the "nobody" user (which we do by default). This can be turned off by
+         * touching /etc/systemd/dont-synthesize-nobody in order to provide upgrade compatibility with legacy systems
+         * that used the "nobody" user name and group name for other UIDs/GIDs than 65534.
+         *
+         * Note that we do not employ any kind of synchronization on the following caching variable. If the variable is
+         * accessed in multi-threaded programs in the worst case it might happen that we initialize twice, but that
+         * shouldn't matter as each initialization should come to the same result. */
+        static int cache = -1;
+
+        if (cache < 0)
+                cache = access("/etc/systemd/dont-synthesize-nobody", F_OK) < 0;
+
+        return cache;
+}
+
+int putpwent_sane(const struct passwd *pw, FILE *stream) {
+        assert(pw);
+        assert(stream);
+
+        errno = 0;
+        if (putpwent(pw, stream) != 0)
+                return errno_or_else(EIO);
+
+        return 0;
+}
+
+int putspent_sane(const struct spwd *sp, FILE *stream) {
+        assert(sp);
+        assert(stream);
+
+        errno = 0;
+        if (putspent(sp, stream) != 0)
+                return errno_or_else(EIO);
+
+        return 0;
+}
+
+int putgrent_sane(const struct group *gr, FILE *stream) {
+        assert(gr);
+        assert(stream);
+
+        errno = 0;
+        if (putgrent(gr, stream) != 0)
+                return errno_or_else(EIO);
+
+        return 0;
+}
+
+#if ENABLE_GSHADOW
+int putsgent_sane(const struct sgrp *sg, FILE *stream) {
+        assert(sg);
+        assert(stream);
+
+        errno = 0;
+        if (putsgent(sg, stream) != 0)
+                return errno_or_else(EIO);
+
+        return 0;
+}
+#endif
+
+int fgetpwent_sane(FILE *stream, struct passwd **pw) {
+        assert(stream);
+        assert(pw);
+
+        errno = 0;
+        struct passwd *p = fgetpwent(stream);
+        if (!p && errno != ENOENT)
+                return errno_or_else(EIO);
+
+        *pw = p;
+        return !!p;
+}
+
+int fgetspent_sane(FILE *stream, struct spwd **sp) {
+        assert(stream);
+        assert(sp);
+
+        errno = 0;
+        struct spwd *s = fgetspent(stream);
+        if (!s && errno != ENOENT)
+                return errno_or_else(EIO);
+
+        *sp = s;
+        return !!s;
+}
+
+int fgetgrent_sane(FILE *stream, struct group **gr) {
+        assert(stream);
+        assert(gr);
+
+        errno = 0;
+        struct group *g = fgetgrent(stream);
+        if (!g && errno != ENOENT)
+                return errno_or_else(EIO);
+
+        *gr = g;
+        return !!g;
+}
+
+#if ENABLE_GSHADOW
+int fgetsgent_sane(FILE *stream, struct sgrp **sg) {
+        assert(stream);
+        assert(sg);
+
+        errno = 0;
+        struct sgrp *s = fgetsgent(stream);
+        if (!s && errno != ENOENT)
+                return errno_or_else(EIO);
+
+        *sg = s;
+        return !!s;
+}
+#endif
+
+int is_this_me(const char *username) {
+        uid_t uid;
+        int r;
+
+        /* Checks if the specified username is our current one. Passed string might be a UID or a user name. */
+
+        r = get_user_creds(&username, &uid, NULL, NULL, NULL, USER_CREDS_ALLOW_MISSING);
+        if (r < 0)
+                return r;
+
+        return uid == getuid();
+}
+
+const char *get_home_root(void) {
+        const char *e;
+
+        /* For debug purposes allow overriding where we look for home dirs */
+        e = secure_getenv("SYSTEMD_HOME_ROOT");
+        if (e && path_is_absolute(e) && path_is_normalized(e))
+                return e;
+
+        return "/home";
+}
diff --git a/src/basic/user-util.h b/src/basic/user-util.h
new file mode 100644
index 0000000..a08683b
--- /dev/null
+++ b/src/basic/user-util.h
@@ -0,0 +1,150 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include 
+#if ENABLE_GSHADOW
+#  include 
+#endif
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+/* Users managed by systemd-homed. See https://systemd.io/UIDS-GIDS for details how this range fits into the rest of the world */
+#define HOME_UID_MIN ((uid_t) 60001)
+#define HOME_UID_MAX ((uid_t) 60513)
+
+/* Users mapped from host into a container */
+#define MAP_UID_MIN ((uid_t) 60514)
+#define MAP_UID_MAX ((uid_t) 60577)
+
+bool uid_is_valid(uid_t uid);
+
+static inline bool gid_is_valid(gid_t gid) {
+        return uid_is_valid((uid_t) gid);
+}
+
+int parse_uid(const char *s, uid_t* ret_uid);
+int parse_uid_range(const char *s, uid_t *ret_lower, uid_t *ret_upper);
+
+static inline int parse_gid(const char *s, gid_t *ret_gid) {
+        return parse_uid(s, (uid_t*) ret_gid);
+}
+
+char* getlogname_malloc(void);
+char* getusername_malloc(void);
+
+typedef enum UserCredsFlags {
+        USER_CREDS_PREFER_NSS    = 1 << 0,  /* if set, only synthesize user records if database lacks them. Normally we bypass the userdb entirely for the records we can synthesize */
+        USER_CREDS_ALLOW_MISSING = 1 << 1,  /* if a numeric UID string is resolved, be OK if there's no record for it */
+        USER_CREDS_CLEAN         = 1 << 2,  /* try to clean up shell and home fields with invalid data */
+} UserCredsFlags;
+
+int get_user_creds(const char **username, uid_t *uid, gid_t *gid, const char **home, const char **shell, UserCredsFlags flags);
+int get_group_creds(const char **groupname, gid_t *gid, UserCredsFlags flags);
+
+char* uid_to_name(uid_t uid);
+char* gid_to_name(gid_t gid);
+
+int in_gid(gid_t gid);
+int in_group(const char *name);
+
+int merge_gid_lists(const gid_t *list1, size_t size1, const gid_t *list2, size_t size2, gid_t **result);
+int getgroups_alloc(gid_t** gids);
+
+int get_home_dir(char **ret);
+int get_shell(char **ret);
+
+int reset_uid_gid(void);
+
+int take_etc_passwd_lock(const char *root);
+
+#define UID_INVALID ((uid_t) -1)
+#define GID_INVALID ((gid_t) -1)
+
+#define UID_NOBODY ((uid_t) 65534U)
+#define GID_NOBODY ((gid_t) 65534U)
+
+/* If REMOUNT_IDMAPPING_HOST_ROOT is set for remount_idmap() we'll include a mapping here that maps the host
+ * root user accessing the idmapped mount to the this user ID on the backing fs. This is the last valid UID in
+ * the *signed* 32bit range. You might wonder why precisely use this specific UID for this purpose? Well, we
+ * definitely cannot use the first 0…65536 UIDs for that, since in most cases that's precisely the file range
+ * we intend to map to some high UID range, and since UID mappings have to be bijective we thus cannot use
+ * them at all. Furthermore the UID range beyond INT32_MAX (i.e. the range above the signed 32bit range) is
+ * icky, since many APIs cannot use it (example: setfsuid() returns the old UID as signed integer). Following
+ * our usual logic of assigning a 16bit UID range to each container, so that the upper 16bit of a 32bit UID
+ * value indicate kind of a "container ID" and the lower 16bit map directly to the intended user you can read
+ * this specific UID as the "nobody" user of the container with ID 0x7FFF, which is kinda nice. */
+#define UID_MAPPED_ROOT ((uid_t) (INT32_MAX-1))
+#define GID_MAPPED_ROOT ((gid_t) (INT32_MAX-1))
+
+#define ETC_PASSWD_LOCK_PATH "/etc/.pwd.lock"
+
+/* The following macros add 1 when converting things, since UID 0 is a valid UID, while the pointer
+ * NULL is special */
+#define PTR_TO_UID(p) ((uid_t) (((uintptr_t) (p))-1))
+#define UID_TO_PTR(u) ((void*) (((uintptr_t) (u))+1))
+
+#define PTR_TO_GID(p) ((gid_t) (((uintptr_t) (p))-1))
+#define GID_TO_PTR(u) ((void*) (((uintptr_t) (u))+1))
+
+static inline bool userns_supported(void) {
+        return access("/proc/self/uid_map", F_OK) >= 0;
+}
+
+typedef enum ValidUserFlags {
+        VALID_USER_RELAX         = 1 << 0,
+        VALID_USER_WARN          = 1 << 1,
+        VALID_USER_ALLOW_NUMERIC = 1 << 2,
+} ValidUserFlags;
+
+bool valid_user_group_name(const char *u, ValidUserFlags flags);
+bool valid_gecos(const char *d);
+char *mangle_gecos(const char *d);
+bool valid_home(const char *p);
+
+static inline bool valid_shell(const char *p) {
+        /* We have the same requirements, so just piggy-back on the home check.
+         *
+         * Let's ignore /etc/shells because this is only applicable to real and
+         * not system users. It is also incompatible with the idea of empty /etc.
+         */
+        return valid_home(p);
+}
+
+int maybe_setgroups(size_t size, const gid_t *list);
+
+bool synthesize_nobody(void);
+
+int fgetpwent_sane(FILE *stream, struct passwd **pw);
+int fgetspent_sane(FILE *stream, struct spwd **sp);
+int fgetgrent_sane(FILE *stream, struct group **gr);
+int putpwent_sane(const struct passwd *pw, FILE *stream);
+int putspent_sane(const struct spwd *sp, FILE *stream);
+int putgrent_sane(const struct group *gr, FILE *stream);
+#if ENABLE_GSHADOW
+int fgetsgent_sane(FILE *stream, struct sgrp **sg);
+int putsgent_sane(const struct sgrp *sg, FILE *stream);
+#endif
+
+bool is_nologin_shell(const char *shell);
+const char* default_root_shell(const char *root);
+
+int is_this_me(const char *username);
+
+const char *get_home_root(void);
+
+static inline bool hashed_password_is_locked_or_invalid(const char *password) {
+        return password && password[0] != '$';
+}
+
+/* A locked *and* invalid password for "struct spwd"'s .sp_pwdp and "struct passwd"'s .pw_passwd field */
+#define PASSWORD_LOCKED_AND_INVALID "!*"
+
+/* A password indicating "look in shadow file, please!" for "struct passwd"'s .pw_passwd */
+#define PASSWORD_SEE_SHADOW "x"
+
+/* A password indicating "hey, no password required for login" */
+#define PASSWORD_NONE ""
diff --git a/src/basic/utf8.c b/src/basic/utf8.c
new file mode 100644
index 0000000..2532fcf
--- /dev/null
+++ b/src/basic/utf8.c
@@ -0,0 +1,611 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+/* Parts of this file are based on the GLIB utf8 validation functions. The
+ * original license text follows. */
+
+/* gutf8.c - Operations on UTF-8 strings.
+ *
+ * Copyright (C) 1999 Tom Tromey
+ * Copyright (C) 2000 Red Hat, Inc.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Library General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Library General Public License for more details.
+ *
+ * You should have received a copy of the GNU Library General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+
+#include 
+#include 
+#include 
+
+#include "alloc-util.h"
+#include "gunicode.h"
+#include "hexdecoct.h"
+#include "macro.h"
+#include "string-util.h"
+#include "utf8.h"
+
+bool unichar_is_valid(char32_t ch) {
+
+        if (ch >= 0x110000) /* End of unicode space */
+                return false;
+        if ((ch & 0xFFFFF800) == 0xD800) /* Reserved area for UTF-16 */
+                return false;
+        if ((ch >= 0xFDD0) && (ch <= 0xFDEF)) /* Reserved */
+                return false;
+        if ((ch & 0xFFFE) == 0xFFFE) /* BOM (Byte Order Mark) */
+                return false;
+
+        return true;
+}
+
+static bool unichar_is_control(char32_t ch) {
+
+        /*
+          0 to ' '-1 is the C0 range.
+          DEL=0x7F, and DEL+1 to 0x9F is C1 range.
+          '\t' is in C0 range, but more or less harmless and commonly used.
+        */
+
+        return (ch < ' ' && !IN_SET(ch, '\t', '\n')) ||
+                (0x7F <= ch && ch <= 0x9F);
+}
+
+/* count of characters used to encode one unicode char */
+static size_t utf8_encoded_expected_len(uint8_t c) {
+        if (c < 0x80)
+                return 1;
+        if ((c & 0xe0) == 0xc0)
+                return 2;
+        if ((c & 0xf0) == 0xe0)
+                return 3;
+        if ((c & 0xf8) == 0xf0)
+                return 4;
+        if ((c & 0xfc) == 0xf8)
+                return 5;
+        if ((c & 0xfe) == 0xfc)
+                return 6;
+
+        return 0;
+}
+
+/* decode one unicode char */
+int utf8_encoded_to_unichar(const char *str, char32_t *ret_unichar) {
+        char32_t unichar;
+        size_t len;
+
+        assert(str);
+
+        len = utf8_encoded_expected_len(str[0]);
+
+        switch (len) {
+        case 1:
+                *ret_unichar = (char32_t)str[0];
+                return 0;
+        case 2:
+                unichar = str[0] & 0x1f;
+                break;
+        case 3:
+                unichar = (char32_t)str[0] & 0x0f;
+                break;
+        case 4:
+                unichar = (char32_t)str[0] & 0x07;
+                break;
+        case 5:
+                unichar = (char32_t)str[0] & 0x03;
+                break;
+        case 6:
+                unichar = (char32_t)str[0] & 0x01;
+                break;
+        default:
+                return -EINVAL;
+        }
+
+        for (size_t i = 1; i < len; i++) {
+                if (((char32_t)str[i] & 0xc0) != 0x80)
+                        return -EINVAL;
+
+                unichar <<= 6;
+                unichar |= (char32_t)str[i] & 0x3f;
+        }
+
+        *ret_unichar = unichar;
+
+        return 0;
+}
+
+bool utf8_is_printable_newline(const char* str, size_t length, bool allow_newline) {
+        assert(str);
+
+        for (const char *p = str; length > 0;) {
+                int encoded_len, r;
+                char32_t val;
+
+                encoded_len = utf8_encoded_valid_unichar(p, length);
+                if (encoded_len < 0)
+                        return false;
+                assert(encoded_len > 0 && (size_t) encoded_len <= length);
+
+                r = utf8_encoded_to_unichar(p, &val);
+                if (r < 0 ||
+                    unichar_is_control(val) ||
+                    (!allow_newline && val == '\n'))
+                        return false;
+
+                length -= encoded_len;
+                p += encoded_len;
+        }
+
+        return true;
+}
+
+char *utf8_is_valid_n(const char *str, size_t len_bytes) {
+        /* Check if the string is composed of valid utf8 characters. If length len_bytes is given, stop after
+         * len_bytes. Otherwise, stop at NUL. */
+
+        assert(str);
+
+        for (const char *p = str; len_bytes != SIZE_MAX ? (size_t) (p - str) < len_bytes : *p != '\0'; ) {
+                int len;
+
+                if (_unlikely_(*p == '\0') && len_bytes != SIZE_MAX)
+                        return NULL; /* embedded NUL */
+
+                len = utf8_encoded_valid_unichar(p,
+                                                 len_bytes != SIZE_MAX ? len_bytes - (p - str) : SIZE_MAX);
+                if (_unlikely_(len < 0))
+                        return NULL; /* invalid character */
+
+                p += len;
+        }
+
+        return (char*) str;
+}
+
+char *utf8_escape_invalid(const char *str) {
+        char *p, *s;
+
+        assert(str);
+
+        p = s = malloc(strlen(str) * 4 + 1);
+        if (!p)
+                return NULL;
+
+        while (*str) {
+                int len;
+
+                len = utf8_encoded_valid_unichar(str, SIZE_MAX);
+                if (len > 0) {
+                        s = mempcpy(s, str, len);
+                        str += len;
+                } else {
+                        s = stpcpy(s, UTF8_REPLACEMENT_CHARACTER);
+                        str += 1;
+                }
+        }
+
+        *s = '\0';
+        return str_realloc(p);
+}
+
+static int utf8_char_console_width(const char *str) {
+        char32_t c;
+        int r;
+
+        r = utf8_encoded_to_unichar(str, &c);
+        if (r < 0)
+                return r;
+
+        /* TODO: we should detect combining characters */
+
+        return unichar_iswide(c) ? 2 : 1;
+}
+
+char *utf8_escape_non_printable_full(const char *str, size_t console_width, bool force_ellipsis) {
+        char *p, *s, *prev_s;
+        size_t n = 0; /* estimated print width */
+
+        assert(str);
+
+        if (console_width == 0)
+                return strdup("");
+
+        p = s = prev_s = malloc(strlen(str) * 4 + 1);
+        if (!p)
+                return NULL;
+
+        for (;;) {
+                int len;
+                char *saved_s = s;
+
+                if (!*str) { /* done! */
+                        if (force_ellipsis)
+                                goto truncation;
+                        else
+                                goto finish;
+                }
+
+                len = utf8_encoded_valid_unichar(str, SIZE_MAX);
+                if (len > 0) {
+                        if (utf8_is_printable(str, len)) {
+                                int w;
+
+                                w = utf8_char_console_width(str);
+                                assert(w >= 0);
+                                if (n + w > console_width)
+                                        goto truncation;
+
+                                s = mempcpy(s, str, len);
+                                str += len;
+                                n += w;
+
+                        } else {
+                                for (; len > 0; len--) {
+                                        if (n + 4 > console_width)
+                                                goto truncation;
+
+                                        *(s++) = '\\';
+                                        *(s++) = 'x';
+                                        *(s++) = hexchar((int) *str >> 4);
+                                        *(s++) = hexchar((int) *str);
+
+                                        str += 1;
+                                        n += 4;
+                                }
+                        }
+                } else {
+                        if (n + 1 > console_width)
+                                goto truncation;
+
+                        s = mempcpy(s, UTF8_REPLACEMENT_CHARACTER, strlen(UTF8_REPLACEMENT_CHARACTER));
+                        str += 1;
+                        n += 1;
+                }
+
+                prev_s = saved_s;
+        }
+
+ truncation:
+        /* Try to go back one if we don't have enough space for the ellipsis */
+        if (n + 1 > console_width)
+                s = prev_s;
+
+        s = mempcpy(s, "…", strlen("…"));
+
+ finish:
+        *s = '\0';
+        return str_realloc(p);
+}
+
+char *ascii_is_valid(const char *str) {
+        /* Check whether the string consists of valid ASCII bytes,
+         * i.e values between 0 and 127, inclusive. */
+
+        assert(str);
+
+        for (const char *p = str; *p; p++)
+                if ((unsigned char) *p >= 128)
+                        return NULL;
+
+        return (char*) str;
+}
+
+char *ascii_is_valid_n(const char *str, size_t len) {
+        /* Very similar to ascii_is_valid(), but checks exactly len
+         * bytes and rejects any NULs in that range. */
+
+        assert(str);
+
+        for (size_t i = 0; i < len; i++)
+                if ((unsigned char) str[i] >= 128 || str[i] == 0)
+                        return NULL;
+
+        return (char*) str;
+}
+
+int utf8_to_ascii(const char *str, char replacement_char, char **ret) {
+        /* Convert to a string that has only ASCII chars, replacing anything that is not ASCII
+         * by replacement_char. */
+
+        _cleanup_free_ char *ans = new(char, strlen(str) + 1);
+        if (!ans)
+                return -ENOMEM;
+
+        char *q = ans;
+
+        for (const char *p = str; *p; q++) {
+                int l;
+
+                l = utf8_encoded_valid_unichar(p, SIZE_MAX);
+                if (l < 0)  /* Non-UTF-8, let's not even try to propagate the garbage */
+                        return l;
+
+                if (l == 1)
+                        *q = *p;
+                else
+                        /* non-ASCII, we need to replace it */
+                        *q = replacement_char;
+
+                p += l;
+        }
+        *q = '\0';
+
+        *ret = TAKE_PTR(ans);
+        return 0;
+}
+
+/**
+ * utf8_encode_unichar() - Encode single UCS-4 character as UTF-8
+ * @out_utf8: output buffer of at least 4 bytes or NULL
+ * @g: UCS-4 character to encode
+ *
+ * This encodes a single UCS-4 character as UTF-8 and writes it into @out_utf8.
+ * The length of the character is returned. It is not zero-terminated! If the
+ * output buffer is NULL, only the length is returned.
+ *
+ * Returns: The length in bytes that the UTF-8 representation does or would
+ *          occupy.
+ */
+size_t utf8_encode_unichar(char *out_utf8, char32_t g) {
+
+        if (g < (1 << 7)) {
+                if (out_utf8)
+                        out_utf8[0] = g & 0x7f;
+                return 1;
+        } else if (g < (1 << 11)) {
+                if (out_utf8) {
+                        out_utf8[0] = 0xc0 | ((g >> 6) & 0x1f);
+                        out_utf8[1] = 0x80 | (g & 0x3f);
+                }
+                return 2;
+        } else if (g < (1 << 16)) {
+                if (out_utf8) {
+                        out_utf8[0] = 0xe0 | ((g >> 12) & 0x0f);
+                        out_utf8[1] = 0x80 | ((g >> 6) & 0x3f);
+                        out_utf8[2] = 0x80 | (g & 0x3f);
+                }
+                return 3;
+        } else if (g < (1 << 21)) {
+                if (out_utf8) {
+                        out_utf8[0] = 0xf0 | ((g >> 18) & 0x07);
+                        out_utf8[1] = 0x80 | ((g >> 12) & 0x3f);
+                        out_utf8[2] = 0x80 | ((g >> 6) & 0x3f);
+                        out_utf8[3] = 0x80 | (g & 0x3f);
+                }
+                return 4;
+        }
+
+        return 0;
+}
+
+char *utf16_to_utf8(const char16_t *s, size_t length /* bytes! */) {
+        const uint8_t *f;
+        char *r, *t;
+
+        assert(s);
+
+        /* Input length is in bytes, i.e. the shortest possible character takes 2 bytes. Each unicode character may
+         * take up to 4 bytes in UTF-8. Let's also account for a trailing NUL byte. */
+        if (length * 2 < length)
+                return NULL; /* overflow */
+
+        r = new(char, length * 2 + 1);
+        if (!r)
+                return NULL;
+
+        f = (const uint8_t*) s;
+        t = r;
+
+        while (f + 1 < (const uint8_t*) s + length) {
+                char16_t w1, w2;
+
+                /* see RFC 2781 section 2.2 */
+
+                w1 = f[1] << 8 | f[0];
+                f += 2;
+
+                if (!utf16_is_surrogate(w1)) {
+                        t += utf8_encode_unichar(t, w1);
+                        continue;
+                }
+
+                if (utf16_is_trailing_surrogate(w1))
+                        continue; /* spurious trailing surrogate, ignore */
+
+                if (f + 1 >= (const uint8_t*) s + length)
+                        break;
+
+                w2 = f[1] << 8 | f[0];
+                f += 2;
+
+                if (!utf16_is_trailing_surrogate(w2)) {
+                        f -= 2;
+                        continue; /* surrogate missing its trailing surrogate, ignore */
+                }
+
+                t += utf8_encode_unichar(t, utf16_surrogate_pair_to_unichar(w1, w2));
+        }
+
+        *t = 0;
+        return r;
+}
+
+size_t utf16_encode_unichar(char16_t *out, char32_t c) {
+
+        /* Note that this encodes as little-endian. */
+
+        switch (c) {
+
+        case 0 ... 0xd7ffU:
+        case 0xe000U ... 0xffffU:
+                out[0] = htole16(c);
+                return 1;
+
+        case 0x10000U ... 0x10ffffU:
+                c -= 0x10000U;
+                out[0] = htole16((c >> 10) + 0xd800U);
+                out[1] = htole16((c & 0x3ffU) + 0xdc00U);
+                return 2;
+
+        default: /* A surrogate (invalid) */
+                return 0;
+        }
+}
+
+char16_t *utf8_to_utf16(const char *s, size_t length) {
+        char16_t *n, *p;
+        int r;
+
+        assert(s);
+
+        n = new(char16_t, length + 1);
+        if (!n)
+                return NULL;
+
+        p = n;
+
+        for (size_t i = 0; i < length;) {
+                char32_t unichar;
+                size_t e;
+
+                e = utf8_encoded_expected_len(s[i]);
+                if (e <= 1) /* Invalid and single byte characters are copied as they are */
+                        goto copy;
+
+                if (i + e > length) /* sequence longer than input buffer, then copy as-is */
+                        goto copy;
+
+                r = utf8_encoded_to_unichar(s + i, &unichar);
+                if (r < 0) /* sequence invalid, then copy as-is */
+                        goto copy;
+
+                p += utf16_encode_unichar(p, unichar);
+                i += e;
+                continue;
+
+        copy:
+                *(p++) = htole16(s[i++]);
+        }
+
+        *p = 0;
+        return n;
+}
+
+size_t char16_strlen(const char16_t *s) {
+        size_t n = 0;
+
+        assert(s);
+
+        while (*s != 0)
+                n++, s++;
+
+        return n;
+}
+
+/* expected size used to encode one unicode char */
+static int utf8_unichar_to_encoded_len(char32_t unichar) {
+
+        if (unichar < 0x80)
+                return 1;
+        if (unichar < 0x800)
+                return 2;
+        if (unichar < 0x10000)
+                return 3;
+        if (unichar < 0x200000)
+                return 4;
+        if (unichar < 0x4000000)
+                return 5;
+
+        return 6;
+}
+
+/* validate one encoded unicode char and return its length */
+int utf8_encoded_valid_unichar(const char *str, size_t length /* bytes */) {
+        char32_t unichar;
+        size_t len;
+        int r;
+
+        assert(str);
+        assert(length > 0);
+
+        /* We read until NUL, at most length bytes. SIZE_MAX may be used to disable the length check. */
+
+        len = utf8_encoded_expected_len(str[0]);
+        if (len == 0)
+                return -EINVAL;
+
+        /* Do we have a truncated multi-byte character? */
+        if (len > length)
+                return -EINVAL;
+
+        /* ascii is valid */
+        if (len == 1)
+                return 1;
+
+        /* check if expected encoded chars are available */
+        for (size_t i = 0; i < len; i++)
+                if ((str[i] & 0x80) != 0x80)
+                        return -EINVAL;
+
+        r = utf8_encoded_to_unichar(str, &unichar);
+        if (r < 0)
+                return r;
+
+        /* check if encoded length matches encoded value */
+        if (utf8_unichar_to_encoded_len(unichar) != (int) len)
+                return -EINVAL;
+
+        /* check if value has valid range */
+        if (!unichar_is_valid(unichar))
+                return -EINVAL;
+
+        return (int) len;
+}
+
+size_t utf8_n_codepoints(const char *str) {
+        size_t n = 0;
+
+        /* Returns the number of UTF-8 codepoints in this string, or SIZE_MAX if the string is not valid UTF-8. */
+
+        while (*str != 0) {
+                int k;
+
+                k = utf8_encoded_valid_unichar(str, SIZE_MAX);
+                if (k < 0)
+                        return SIZE_MAX;
+
+                str += k;
+                n++;
+        }
+
+        return n;
+}
+
+size_t utf8_console_width(const char *str) {
+        size_t n = 0;
+
+        /* Returns the approximate width a string will take on screen when printed on a character cell
+         * terminal/console. */
+
+        while (*str) {
+                int w;
+
+                w = utf8_char_console_width(str);
+                if (w < 0)
+                        return SIZE_MAX;
+
+                n += w;
+                str = utf8_next_char(str);
+        }
+
+        return n;
+}
diff --git a/src/basic/utf8.h b/src/basic/utf8.h
new file mode 100644
index 0000000..4a06dd6
--- /dev/null
+++ b/src/basic/utf8.h
@@ -0,0 +1,59 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include 
+#include 
+#include 
+#include 
+
+#include "macro.h"
+#include "missing_type.h"
+
+#define UTF8_REPLACEMENT_CHARACTER "\xef\xbf\xbd"
+#define UTF8_BYTE_ORDER_MARK "\xef\xbb\xbf"
+
+bool unichar_is_valid(char32_t c);
+
+char *utf8_is_valid_n(const char *str, size_t len_bytes) _pure_;
+static inline char *utf8_is_valid(const char *s) {
+        return utf8_is_valid_n(s, SIZE_MAX);
+}
+char *ascii_is_valid(const char *s) _pure_;
+char *ascii_is_valid_n(const char *str, size_t len);
+
+int utf8_to_ascii(const char *str, char replacement_char, char **ret);
+
+bool utf8_is_printable_newline(const char* str, size_t length, bool allow_newline) _pure_;
+#define utf8_is_printable(str, length) utf8_is_printable_newline(str, length, true)
+
+char *utf8_escape_invalid(const char *s);
+char *utf8_escape_non_printable_full(const char *str, size_t console_width, bool force_ellipsis);
+static inline char *utf8_escape_non_printable(const char *str) {
+        return utf8_escape_non_printable_full(str, SIZE_MAX, false);
+}
+
+size_t utf8_encode_unichar(char *out_utf8, char32_t g);
+size_t utf16_encode_unichar(char16_t *out, char32_t c);
+
+char *utf16_to_utf8(const char16_t *s, size_t length /* bytes! */);
+char16_t *utf8_to_utf16(const char *s, size_t length);
+
+size_t char16_strlen(const char16_t *s); /* returns the number of 16bit words in the string (not bytes!) */
+
+int utf8_encoded_valid_unichar(const char *str, size_t length);
+int utf8_encoded_to_unichar(const char *str, char32_t *ret_unichar);
+
+static inline bool utf16_is_surrogate(char16_t c) {
+        return c >= 0xd800U && c <= 0xdfffU;
+}
+
+static inline bool utf16_is_trailing_surrogate(char16_t c) {
+        return c >= 0xdc00U && c <= 0xdfffU;
+}
+
+static inline char32_t utf16_surrogate_pair_to_unichar(char16_t lead, char16_t trail) {
+        return ((((char32_t) lead - 0xd800U) << 10) + ((char32_t) trail - 0xdc00U) + 0x10000U);
+}
+
+size_t utf8_n_codepoints(const char *str);
+size_t utf8_console_width(const char *str);
diff --git a/src/basic/util.c b/src/basic/util.c
new file mode 100644
index 0000000..e6aaa2d
--- /dev/null
+++ b/src/basic/util.c
@@ -0,0 +1,140 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include 
+#include 
+#include 
+
+#include "alloc-util.h"
+#include "build.h"
+#include "env-file.h"
+#include "env-util.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "hostname-util.h"
+#include "log.h"
+#include "macro.h"
+#include "parse-util.h"
+#include "stat-util.h"
+#include "string-util.h"
+#include "util.h"
+#include "virt.h"
+
+int saved_argc = 0;
+char **saved_argv = NULL;
+static int saved_in_initrd = -1;
+
+bool kexec_loaded(void) {
+       _cleanup_free_ char *s = NULL;
+
+       if (read_one_line_file("/sys/kernel/kexec_loaded", &s) < 0)
+               return false;
+
+       return s[0] == '1';
+}
+
+int prot_from_flags(int flags) {
+
+        switch (flags & O_ACCMODE) {
+
+        case O_RDONLY:
+                return PROT_READ;
+
+        case O_WRONLY:
+                return PROT_WRITE;
+
+        case O_RDWR:
+                return PROT_READ|PROT_WRITE;
+
+        default:
+                return -EINVAL;
+        }
+}
+
+bool in_initrd(void) {
+        int r;
+
+        if (saved_in_initrd >= 0)
+                return saved_in_initrd;
+
+        /* If /etc/initrd-release exists, we're in an initrd.
+         * This can be overridden by setting SYSTEMD_IN_INITRD=0|1.
+         */
+
+        r = getenv_bool_secure("SYSTEMD_IN_INITRD");
+        if (r < 0 && r != -ENXIO)
+                log_debug_errno(r, "Failed to parse $SYSTEMD_IN_INITRD, ignoring: %m");
+
+        if (r >= 0)
+                saved_in_initrd = r > 0;
+        else {
+                r = access("/etc/initrd-release", F_OK);
+                if (r < 0 && errno != ENOENT)
+                        log_debug_errno(r, "Failed to check if /etc/initrd-release exists, assuming it does not: %m");
+                saved_in_initrd = r >= 0;
+        }
+
+        return saved_in_initrd;
+}
+
+void in_initrd_force(bool value) {
+        saved_in_initrd = value;
+}
+
+int container_get_leader(const char *machine, pid_t *pid) {
+        _cleanup_free_ char *s = NULL, *class = NULL;
+        const char *p;
+        pid_t leader;
+        int r;
+
+        assert(machine);
+        assert(pid);
+
+        if (streq(machine, ".host")) {
+                *pid = 1;
+                return 0;
+        }
+
+        if (!hostname_is_valid(machine, 0))
+                return -EINVAL;
+
+        p = strjoina("/run/systemd/machines/", machine);
+        r = parse_env_file(NULL, p,
+                           "LEADER", &s,
+                           "CLASS", &class);
+        if (r == -ENOENT)
+                return -EHOSTDOWN;
+        if (r < 0)
+                return r;
+        if (!s)
+                return -EIO;
+
+        if (!streq_ptr(class, "container"))
+                return -EIO;
+
+        r = parse_pid(s, &leader);
+        if (r < 0)
+                return r;
+        if (leader <= 1)
+                return -EIO;
+
+        *pid = leader;
+        return 0;
+}
+
+int version(void) {
+        printf("systemd " STRINGIFY(PROJECT_VERSION) " (" GIT_VERSION ")\n%s\n",
+               systemd_features);
+        return 0;
+}
+
+/* Turn off core dumps but only if we're running outside of a container. */
+void disable_coredumps(void) {
+        int r;
+
+        if (detect_container() > 0)
+                return;
+
+        r = write_string_file("/proc/sys/kernel/core_pattern", "|/bin/false", WRITE_STRING_FILE_DISABLE_BUFFER);
+        if (r < 0)
+                log_debug_errno(r, "Failed to turn off coredumps, ignoring: %m");
+}
diff --git a/src/basic/util.h b/src/basic/util.h
new file mode 100644
index 0000000..68ae3b5
--- /dev/null
+++ b/src/basic/util.h
@@ -0,0 +1,80 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include 
+
+#include "macro.h"
+
+extern int saved_argc;
+extern char **saved_argv;
+
+static inline void save_argc_argv(int argc, char **argv) {
+
+        /* Protect against CVE-2021-4034 style attacks */
+        assert_se(argc > 0);
+        assert_se(argv);
+        assert_se(argv[0]);
+
+        saved_argc = argc;
+        saved_argv = argv;
+}
+
+bool kexec_loaded(void);
+
+int prot_from_flags(int flags) _const_;
+
+bool in_initrd(void);
+void in_initrd_force(bool value);
+
+/* Note: log2(0) == log2(1) == 0 here and below. */
+
+#define CONST_LOG2ULL(x) ((x) > 1 ? (unsigned) __builtin_clzll(x) ^ 63U : 0)
+#define NONCONST_LOG2ULL(x) ({                                     \
+                unsigned long long _x = (x);                       \
+                _x > 1 ? (unsigned) __builtin_clzll(_x) ^ 63U : 0; \
+        })
+#define LOG2ULL(x) __builtin_choose_expr(__builtin_constant_p(x), CONST_LOG2ULL(x), NONCONST_LOG2ULL(x))
+
+static inline unsigned log2u64(uint64_t x) {
+#if __SIZEOF_LONG_LONG__ == 8
+        return LOG2ULL(x);
+#else
+#  error "Wut?"
+#endif
+}
+
+static inline unsigned u32ctz(uint32_t n) {
+#if __SIZEOF_INT__ == 4
+        return n != 0 ? __builtin_ctz(n) : 32;
+#else
+#  error "Wut?"
+#endif
+}
+
+#define CONST_LOG2U(x) ((x) > 1 ? __SIZEOF_INT__ * 8 - __builtin_clz(x) - 1 : 0)
+#define NONCONST_LOG2U(x) ({                                             \
+                unsigned _x = (x);                                       \
+                _x > 1 ? __SIZEOF_INT__ * 8 - __builtin_clz(_x) - 1 : 0; \
+        })
+#define LOG2U(x) __builtin_choose_expr(__builtin_constant_p(x), CONST_LOG2U(x), NONCONST_LOG2U(x))
+
+static inline unsigned log2i(int x) {
+        return LOG2U(x);
+}
+
+static inline unsigned log2u(unsigned x) {
+        return LOG2U(x);
+}
+
+static inline unsigned log2u_round_up(unsigned x) {
+        if (x <= 1)
+                return 0;
+
+        return log2u(x - 1) + 1;
+}
+
+int container_get_leader(const char *machine, pid_t *pid);
+
+int version(void);
+
+void disable_coredumps(void);
diff --git a/src/basic/virt.c b/src/basic/virt.c
new file mode 100644
index 0000000..c2ed8d0
--- /dev/null
+++ b/src/basic/virt.c
@@ -0,0 +1,1056 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#if defined(__i386__) || defined(__x86_64__)
+#include 
+#endif
+#include 
+#include 
+#include 
+#include 
+
+#include "alloc-util.h"
+#include "cgroup-util.h"
+#include "dirent-util.h"
+#include "env-util.h"
+#include "errno-util.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "macro.h"
+#include "missing_threads.h"
+#include "process-util.h"
+#include "stat-util.h"
+#include "string-table.h"
+#include "string-util.h"
+#include "virt.h"
+
+enum {
+      SMBIOS_VM_BIT_SET,
+      SMBIOS_VM_BIT_UNSET,
+      SMBIOS_VM_BIT_UNKNOWN,
+};
+
+static Virtualization detect_vm_cpuid(void) {
+
+        /* CPUID is an x86 specific interface. */
+#if defined(__i386__) || defined(__x86_64__)
+
+        static const struct {
+                const char sig[13];
+                Virtualization id;
+        } vm_table[] = {
+                { "XenVMMXenVMM", VIRTUALIZATION_XEN       },
+                { "KVMKVMKVM",    VIRTUALIZATION_KVM       }, /* qemu with KVM */
+                { "Linux KVM Hv", VIRTUALIZATION_KVM       }, /* qemu with KVM + HyperV Enlightenments */
+                { "TCGTCGTCGTCG", VIRTUALIZATION_QEMU      }, /* qemu without KVM */
+                /* http://kb.vmware.com/selfservice/microsites/search.do?language=en_US&cmd=displayKC&externalId=1009458 */
+                { "VMwareVMware", VIRTUALIZATION_VMWARE    },
+                /* https://docs.microsoft.com/en-us/virtualization/hyper-v-on-windows/reference/tlfs */
+                { "Microsoft Hv", VIRTUALIZATION_MICROSOFT },
+                /* https://wiki.freebsd.org/bhyve */
+                { "bhyve bhyve ", VIRTUALIZATION_BHYVE     },
+                { "QNXQVMBSQG",   VIRTUALIZATION_QNX       },
+                /* https://projectacrn.org */
+                { "ACRNACRNACRN", VIRTUALIZATION_ACRN      },
+                /* https://www.lockheedmartin.com/en-us/products/Hardened-Security-for-Intel-Processors.html */
+                { "SRESRESRESRE", VIRTUALIZATION_SRE       },
+                { "Apple VZ",     VIRTUALIZATION_APPLE     },
+        };
+
+        uint32_t eax, ebx, ecx, edx;
+        bool hypervisor;
+
+        /* http://lwn.net/Articles/301888/ */
+
+        /* First detect whether there is a hypervisor */
+        if (__get_cpuid(1, &eax, &ebx, &ecx, &edx) == 0)
+                return VIRTUALIZATION_NONE;
+
+        hypervisor = ecx & 0x80000000U;
+
+        if (hypervisor) {
+                union {
+                        uint32_t sig32[3];
+                        char text[13];
+                } sig = {};
+
+                /* There is a hypervisor, see what it is */
+                __cpuid(0x40000000U, eax, ebx, ecx, edx);
+
+                sig.sig32[0] = ebx;
+                sig.sig32[1] = ecx;
+                sig.sig32[2] = edx;
+
+                log_debug("Virtualization found, CPUID=%s", sig.text);
+
+                for (size_t i = 0; i < ELEMENTSOF(vm_table); i++)
+                        if (memcmp_nn(sig.text, sizeof(sig.text),
+                                      vm_table[i].sig, sizeof(vm_table[i].sig)) == 0)
+                                return vm_table[i].id;
+
+                log_debug("Unknown virtualization with CPUID=%s. Add to vm_table[]?", sig.text);
+                return VIRTUALIZATION_VM_OTHER;
+        }
+#endif
+        log_debug("No virtualization found in CPUID");
+
+        return VIRTUALIZATION_NONE;
+}
+
+static Virtualization detect_vm_device_tree(void) {
+#if defined(__arm__) || defined(__aarch64__) || defined(__powerpc__) || defined(__powerpc64__)
+        _cleanup_free_ char *hvtype = NULL;
+        int r;
+
+        r = read_one_line_file("/proc/device-tree/hypervisor/compatible", &hvtype);
+        if (r == -ENOENT) {
+                _cleanup_closedir_ DIR *dir = NULL;
+                _cleanup_free_ char *compat = NULL;
+
+                if (access("/proc/device-tree/ibm,partition-name", F_OK) == 0 &&
+                    access("/proc/device-tree/hmc-managed?", F_OK) == 0 &&
+                    access("/proc/device-tree/chosen/qemu,graphic-width", F_OK) != 0)
+                        return VIRTUALIZATION_POWERVM;
+
+                dir = opendir("/proc/device-tree");
+                if (!dir) {
+                        if (errno == ENOENT) {
+                                log_debug_errno(errno, "/proc/device-tree: %m");
+                                return VIRTUALIZATION_NONE;
+                        }
+                        return -errno;
+                }
+
+                FOREACH_DIRENT(de, dir, return -errno)
+                        if (strstr(de->d_name, "fw-cfg")) {
+                                log_debug("Virtualization QEMU: \"fw-cfg\" present in /proc/device-tree/%s", de->d_name);
+                                return VIRTUALIZATION_QEMU;
+                        }
+
+                r = read_one_line_file("/proc/device-tree/compatible", &compat);
+                if (r < 0 && r != -ENOENT)
+                        return r;
+                if (r >= 0 && streq(compat, "qemu,pseries")) {
+                        log_debug("Virtualization %s found in /proc/device-tree/compatible", compat);
+                        return VIRTUALIZATION_QEMU;
+                }
+
+                log_debug("No virtualization found in /proc/device-tree/*");
+                return VIRTUALIZATION_NONE;
+        } else if (r < 0)
+                return r;
+
+        log_debug("Virtualization %s found in /proc/device-tree/hypervisor/compatible", hvtype);
+        if (streq(hvtype, "linux,kvm"))
+                return VIRTUALIZATION_KVM;
+        else if (strstr(hvtype, "xen"))
+                return VIRTUALIZATION_XEN;
+        else if (strstr(hvtype, "vmware"))
+                return VIRTUALIZATION_VMWARE;
+        else
+                return VIRTUALIZATION_VM_OTHER;
+#else
+        log_debug("This platform does not support /proc/device-tree");
+        return VIRTUALIZATION_NONE;
+#endif
+}
+
+#if defined(__i386__) || defined(__x86_64__) || defined(__arm__) || defined(__aarch64__) || defined(__loongarch64)
+static Virtualization detect_vm_dmi_vendor(void) {
+        static const char* const dmi_vendors[] = {
+                "/sys/class/dmi/id/product_name", /* Test this before sys_vendor to detect KVM over QEMU */
+                "/sys/class/dmi/id/sys_vendor",
+                "/sys/class/dmi/id/board_vendor",
+                "/sys/class/dmi/id/bios_vendor",
+                "/sys/class/dmi/id/product_version", /* For Hyper-V VMs test */
+                NULL
+        };
+
+        static const struct {
+                const char *vendor;
+                Virtualization id;
+        } dmi_vendor_table[] = {
+                { "KVM",                   VIRTUALIZATION_KVM       },
+                { "OpenStack",             VIRTUALIZATION_KVM       }, /* Detect OpenStack instance as KVM in non x86 architecture */
+                { "KubeVirt",              VIRTUALIZATION_KVM       }, /* Detect KubeVirt instance as KVM in non x86 architecture */
+                { "Amazon EC2",            VIRTUALIZATION_AMAZON    },
+                { "QEMU",                  VIRTUALIZATION_QEMU      },
+                { "VMware",                VIRTUALIZATION_VMWARE    }, /* https://kb.vmware.com/s/article/1009458 */
+                { "VMW",                   VIRTUALIZATION_VMWARE    },
+                { "innotek GmbH",          VIRTUALIZATION_ORACLE    },
+                { "VirtualBox",            VIRTUALIZATION_ORACLE    },
+                { "Xen",                   VIRTUALIZATION_XEN       },
+                { "Bochs",                 VIRTUALIZATION_BOCHS     },
+                { "Parallels",             VIRTUALIZATION_PARALLELS },
+                /* https://wiki.freebsd.org/bhyve */
+                { "BHYVE",                 VIRTUALIZATION_BHYVE     },
+                { "Hyper-V",               VIRTUALIZATION_MICROSOFT },
+                { "Apple Virtualization",  VIRTUALIZATION_APPLE     },
+                { "Google Compute Engine", VIRTUALIZATION_GOOGLE    }, /* https://cloud.google.com/run/docs/container-contract#sandbox */
+        };
+        int r;
+
+        STRV_FOREACH(vendor, dmi_vendors) {
+                _cleanup_free_ char *s = NULL;
+
+                r = read_one_line_file(*vendor, &s);
+                if (r < 0) {
+                        if (r == -ENOENT)
+                                continue;
+
+                        return r;
+                }
+
+                for (size_t i = 0; i < ELEMENTSOF(dmi_vendor_table); i++)
+                        if (startswith(s, dmi_vendor_table[i].vendor)) {
+                                log_debug("Virtualization %s found in DMI (%s)", s, *vendor);
+                                return dmi_vendor_table[i].id;
+                        }
+        }
+        log_debug("No virtualization found in DMI vendor table.");
+        return VIRTUALIZATION_NONE;
+}
+
+static int detect_vm_smbios(void) {
+        /* The SMBIOS BIOS Charateristics Extension Byte 2 (Section 2.1.2.2 of
+         * https://www.dmtf.org/sites/default/files/standards/documents/DSP0134_3.4.0.pdf), specifies that
+         * the 4th bit being set indicates a VM. The BIOS Characteristics table is exposed via the kernel in
+         * /sys/firmware/dmi/entries/0-0. Note that in the general case, this bit being unset should not
+         * imply that the system is running on bare-metal.  For example, QEMU 3.1.0 (with or without KVM)
+         * with SeaBIOS does not set this bit. */
+        _cleanup_free_ char *s = NULL;
+        size_t readsize;
+        int r;
+
+        r = read_full_virtual_file("/sys/firmware/dmi/entries/0-0/raw", &s, &readsize);
+        if (r < 0) {
+                log_debug_errno(r, "Unable to read /sys/firmware/dmi/entries/0-0/raw, "
+                                "using the virtualization information found in DMI vendor table, ignoring: %m");
+                return SMBIOS_VM_BIT_UNKNOWN;
+        }
+        if (readsize < 20 || s[1] < 20) {
+                /* The spec indicates that byte 1 contains the size of the table, 0x12 + the number of
+                 * extension bytes. The data we're interested in is in extension byte 2, which would be at
+                 * 0x13. If we didn't read that much data, or if the BIOS indicates that we don't have that
+                 * much data, we don't infer anything from the SMBIOS. */
+                log_debug("Only read %zu bytes from /sys/firmware/dmi/entries/0-0/raw (expected 20). "
+                          "Using the virtualization information found in DMI vendor table.", readsize);
+                return SMBIOS_VM_BIT_UNKNOWN;
+        }
+
+        uint8_t byte = (uint8_t) s[19];
+        if (byte & (1U<<4)) {
+                log_debug("DMI BIOS Extension table indicates virtualization.");
+                return SMBIOS_VM_BIT_SET;
+        }
+        log_debug("DMI BIOS Extension table does not indicate virtualization.");
+        return SMBIOS_VM_BIT_UNSET;
+}
+#endif /* defined(__i386__) || defined(__x86_64__) || defined(__arm__) || defined(__aarch64__) || defined(__loongarch64) */
+
+static Virtualization detect_vm_dmi(void) {
+#if defined(__i386__) || defined(__x86_64__) || defined(__arm__) || defined(__aarch64__) || defined(__loongarch64)
+
+        int r;
+        r = detect_vm_dmi_vendor();
+
+        /* The DMI vendor tables in /sys/class/dmi/id don't help us distinguish between Amazon EC2
+         * virtual machines and bare-metal instances, so we need to look at SMBIOS. */
+        if (r == VIRTUALIZATION_AMAZON) {
+                switch (detect_vm_smbios()) {
+                case SMBIOS_VM_BIT_SET:
+                        return VIRTUALIZATION_AMAZON;
+                case SMBIOS_VM_BIT_UNSET:
+                        return VIRTUALIZATION_NONE;
+                case SMBIOS_VM_BIT_UNKNOWN: {
+                        /* The DMI information we are after is only accessible to the root user,
+                         * so we fallback to using the product name which is less restricted
+                         * to distinguish metal systems from virtualized instances */
+                        _cleanup_free_ char *s = NULL;
+                        const char *e;
+
+                        r = read_full_virtual_file("/sys/class/dmi/id/product_name", &s, NULL);
+                        /* In EC2, virtualized is much more common than metal, so if for some reason
+                         * we fail to read the DMI data, assume we are virtualized. */
+                        if (r < 0) {
+                                log_debug_errno(r, "Can't read /sys/class/dmi/id/product_name,"
+                                                " assuming virtualized: %m");
+                                return VIRTUALIZATION_AMAZON;
+                        }
+                        e = strstrafter(truncate_nl(s), ".metal");
+                        if (e && IN_SET(*e, 0, '-')) {
+                                log_debug("DMI product name has '.metal', assuming no virtualization");
+                                return VIRTUALIZATION_NONE;
+                        } else
+                                return VIRTUALIZATION_AMAZON;
+                }
+                default:
+                        assert_not_reached();
+              }
+        }
+
+        /* If we haven't identified a VM, but the firmware indicates that there is one, indicate as much. We
+         * have no further information about what it is. */
+        if (r == VIRTUALIZATION_NONE && detect_vm_smbios() == SMBIOS_VM_BIT_SET)
+                return VIRTUALIZATION_VM_OTHER;
+        return r;
+#else
+        return VIRTUALIZATION_NONE;
+#endif
+}
+
+#define XENFEAT_dom0 11 /* xen/include/public/features.h */
+#define PATH_FEATURES "/sys/hypervisor/properties/features"
+/* Returns -errno, or 0 for domU, or 1 for dom0 */
+static int detect_vm_xen_dom0(void) {
+        _cleanup_free_ char *domcap = NULL;
+        int r;
+
+        r = read_one_line_file(PATH_FEATURES, &domcap);
+        if (r < 0 && r != -ENOENT)
+                return r;
+        if (r >= 0) {
+                unsigned long features;
+
+                /* Here, we need to use sscanf() instead of safe_atoul()
+                 * as the string lacks the leading "0x". */
+                r = sscanf(domcap, "%lx", &features);
+                if (r == 1) {
+                        r = !!(features & (1U << XENFEAT_dom0));
+                        log_debug("Virtualization XEN, found %s with value %08lx, "
+                                  "XENFEAT_dom0 (indicating the 'hardware domain') is%s set.",
+                                  PATH_FEATURES, features, r ? "" : " not");
+                        return r;
+                }
+                log_debug("Virtualization XEN, found %s, unhandled content '%s'",
+                          PATH_FEATURES, domcap);
+        }
+
+        r = read_one_line_file("/proc/xen/capabilities", &domcap);
+        if (r == -ENOENT) {
+                log_debug("Virtualization XEN because /proc/xen/capabilities does not exist");
+                return 0;
+        }
+        if (r < 0)
+                return r;
+
+        for (const char *i = domcap;;) {
+                _cleanup_free_ char *cap = NULL;
+
+                r = extract_first_word(&i, &cap, ",", 0);
+                if (r < 0)
+                        return r;
+                if (r == 0) {
+                        log_debug("Virtualization XEN DomU found (/proc/xen/capabilities)");
+                        return 0;
+                }
+
+                if (streq(cap, "control_d")) {
+                        log_debug("Virtualization XEN Dom0 ignored (/proc/xen/capabilities)");
+                        return 1;
+                }
+        }
+}
+
+static Virtualization detect_vm_xen(void) {
+        /* The presence of /proc/xen indicates some form of a Xen domain
+           The check for Dom0 is handled outside this function */
+        if (access("/proc/xen", F_OK) < 0) {
+                log_debug("Virtualization XEN not found, /proc/xen does not exist");
+                return VIRTUALIZATION_NONE;
+        }
+        log_debug("Virtualization XEN found (/proc/xen exists)");
+        return VIRTUALIZATION_XEN;
+}
+
+static Virtualization detect_vm_hypervisor(void) {
+        _cleanup_free_ char *hvtype = NULL;
+        int r;
+
+        r = read_one_line_file("/sys/hypervisor/type", &hvtype);
+        if (r == -ENOENT)
+                return VIRTUALIZATION_NONE;
+        if (r < 0)
+                return r;
+
+        log_debug("Virtualization %s found in /sys/hypervisor/type", hvtype);
+
+        if (streq(hvtype, "xen"))
+                return VIRTUALIZATION_XEN;
+        else
+                return VIRTUALIZATION_VM_OTHER;
+}
+
+static Virtualization detect_vm_uml(void) {
+        _cleanup_fclose_ FILE *f = NULL;
+        int r;
+
+        /* Detect User-Mode Linux by reading /proc/cpuinfo */
+        f = fopen("/proc/cpuinfo", "re");
+        if (!f) {
+                if (errno == ENOENT) {
+                        log_debug("/proc/cpuinfo not found, assuming no UML virtualization.");
+                        return VIRTUALIZATION_NONE;
+                }
+                return -errno;
+        }
+
+        for (;;) {
+                _cleanup_free_ char *line = NULL;
+                const char *t;
+
+                r = read_line(f, LONG_LINE_MAX, &line);
+                if (r < 0)
+                        return r;
+                if (r == 0)
+                        break;
+
+                t = startswith(line, "vendor_id\t: ");
+                if (t) {
+                        if (startswith(t, "User Mode Linux")) {
+                                log_debug("UML virtualization found in /proc/cpuinfo");
+                                return VIRTUALIZATION_UML;
+                        }
+
+                        break;
+                }
+        }
+
+        log_debug("UML virtualization not found in /proc/cpuinfo.");
+        return VIRTUALIZATION_NONE;
+}
+
+static Virtualization detect_vm_zvm(void) {
+
+#if defined(__s390__)
+        _cleanup_free_ char *t = NULL;
+        int r;
+
+        r = get_proc_field("/proc/sysinfo", "VM00 Control Program", WHITESPACE, &t);
+        if (r == -ENOENT)
+                return VIRTUALIZATION_NONE;
+        if (r < 0)
+                return r;
+
+        log_debug("Virtualization %s found in /proc/sysinfo", t);
+        if (streq(t, "z/VM"))
+                return VIRTUALIZATION_ZVM;
+        else
+                return VIRTUALIZATION_KVM;
+#else
+        log_debug("This platform does not support /proc/sysinfo");
+        return VIRTUALIZATION_NONE;
+#endif
+}
+
+/* Returns a short identifier for the various VM implementations */
+Virtualization detect_vm(void) {
+        static thread_local Virtualization cached_found = _VIRTUALIZATION_INVALID;
+        bool other = false;
+        int xen_dom0 = 0;
+        Virtualization v, dmi;
+
+        if (cached_found >= 0)
+                return cached_found;
+
+        /* We have to use the correct order here:
+         *
+         * → First, try to detect Oracle Virtualbox, Amazon EC2 Nitro, and Parallels, even if they use KVM,
+         *   as well as Xen even if it cloaks as Microsoft Hyper-V. Attempt to detect uml at this stage also
+         *   since it runs as a user-process nested inside other VMs. Also check for Xen now, because Xen PV
+         *   mode does not override CPUID when nested inside another hypervisor.
+         *
+         * → Second, try to detect from CPUID, this will report KVM for whatever software is used even if
+         *   info in DMI is overwritten.
+         *
+         * → Third, try to detect from DMI. */
+
+        dmi = detect_vm_dmi();
+        if (IN_SET(dmi,
+                   VIRTUALIZATION_ORACLE,
+                   VIRTUALIZATION_XEN,
+                   VIRTUALIZATION_AMAZON,
+                   VIRTUALIZATION_PARALLELS)) {
+                v = dmi;
+                goto finish;
+        }
+
+        /* Detect UML */
+        v = detect_vm_uml();
+        if (v < 0)
+                return v;
+        if (v != VIRTUALIZATION_NONE)
+                goto finish;
+
+        /* Detect Xen */
+        v = detect_vm_xen();
+        if (v < 0)
+                return v;
+        if (v == VIRTUALIZATION_XEN) {
+                 /* If we are Dom0, then we expect to not report as a VM. However, as we might be nested
+                  * inside another hypervisor which can be detected via the CPUID check, wait to report this
+                  * until after the CPUID check. */
+                xen_dom0 = detect_vm_xen_dom0();
+                if (xen_dom0 < 0)
+                        return xen_dom0;
+                if (xen_dom0 == 0)
+                        goto finish;
+
+                v = VIRTUALIZATION_NONE;
+        } else if (v != VIRTUALIZATION_NONE)
+                assert_not_reached();
+
+        /* Detect from CPUID */
+        v = detect_vm_cpuid();
+        if (v < 0)
+                return v;
+        if (v == VIRTUALIZATION_VM_OTHER)
+                other = true;
+        else if (v != VIRTUALIZATION_NONE)
+                goto finish;
+
+        /* If we are in Dom0 and have not yet finished, finish with the result of detect_vm_cpuid */
+        if (xen_dom0 > 0)
+                goto finish;
+
+        /* Now, let's get back to DMI */
+        if (dmi < 0)
+                return dmi;
+        if (dmi == VIRTUALIZATION_VM_OTHER)
+                other = true;
+        else if (dmi != VIRTUALIZATION_NONE) {
+                v = dmi;
+                goto finish;
+        }
+
+        /* Check high-level hypervisor sysfs file */
+        v = detect_vm_hypervisor();
+        if (v < 0)
+                return v;
+        if (v == VIRTUALIZATION_VM_OTHER)
+                other = true;
+        else if (v != VIRTUALIZATION_NONE)
+                goto finish;
+
+        v = detect_vm_device_tree();
+        if (v < 0)
+                return v;
+        if (v == VIRTUALIZATION_VM_OTHER)
+                other = true;
+        else if (v != VIRTUALIZATION_NONE)
+                goto finish;
+
+        v = detect_vm_zvm();
+        if (v < 0)
+                return v;
+
+finish:
+        if (v == VIRTUALIZATION_NONE && other)
+                v = VIRTUALIZATION_VM_OTHER;
+
+        cached_found = v;
+        log_debug("Found VM virtualization %s", virtualization_to_string(v));
+        return v;
+}
+
+static const char *const container_table[_VIRTUALIZATION_MAX] = {
+        [VIRTUALIZATION_LXC]            = "lxc",
+        [VIRTUALIZATION_LXC_LIBVIRT]    = "lxc-libvirt",
+        [VIRTUALIZATION_SYSTEMD_NSPAWN] = "systemd-nspawn",
+        [VIRTUALIZATION_DOCKER]         = "docker",
+        [VIRTUALIZATION_PODMAN]         = "podman",
+        [VIRTUALIZATION_RKT]            = "rkt",
+        [VIRTUALIZATION_WSL]            = "wsl",
+        [VIRTUALIZATION_PROOT]          = "proot",
+        [VIRTUALIZATION_POUCH]          = "pouch",
+};
+
+DEFINE_PRIVATE_STRING_TABLE_LOOKUP_FROM_STRING(container, int);
+
+static int running_in_cgroupns(void) {
+        int r;
+
+        if (!cg_ns_supported())
+                return false;
+
+        r = cg_all_unified();
+        if (r < 0)
+                return r;
+
+        if (r) {
+                /* cgroup v2 */
+
+                r = access("/sys/fs/cgroup/cgroup.events", F_OK);
+                if (r < 0) {
+                        if (errno != ENOENT)
+                                return -errno;
+                        /* All kernel versions have cgroup.events in nested cgroups. */
+                        return false;
+                }
+
+                /* There's no cgroup.type in the root cgroup, and future kernel versions
+                 * are unlikely to add it since cgroup.type is something that makes no sense
+                 * whatsoever in the root cgroup. */
+                r = access("/sys/fs/cgroup/cgroup.type", F_OK);
+                if (r == 0)
+                        return true;
+                if (r < 0 && errno != ENOENT)
+                        return -errno;
+
+                /* On older kernel versions, there's no cgroup.type */
+                r = access("/sys/kernel/cgroup/features", F_OK);
+                if (r < 0) {
+                        if (errno != ENOENT)
+                                return -errno;
+                        /* This is an old kernel that we know for sure has cgroup.events
+                         * only in nested cgroups. */
+                        return true;
+                }
+
+                /* This is a recent kernel, and cgroup.type doesn't exist, so we must be
+                 * in the root cgroup. */
+                return false;
+        } else {
+                /* cgroup v1 */
+
+                /* If systemd controller is not mounted, do not even bother. */
+                r = access("/sys/fs/cgroup/systemd", F_OK);
+                if (r < 0) {
+                        if (errno != ENOENT)
+                                return -errno;
+                        return false;
+                }
+
+                /* release_agent only exists in the root cgroup. */
+                r = access("/sys/fs/cgroup/systemd/release_agent", F_OK);
+                if (r < 0) {
+                        if (errno != ENOENT)
+                                return -errno;
+                        return true;
+                }
+
+                return false;
+        }
+}
+
+static Virtualization detect_container_files(void) {
+        static const struct {
+                const char *file_path;
+                Virtualization id;
+        } container_file_table[] = {
+                /* https://github.com/containers/podman/issues/6192 */
+                /* https://github.com/containers/podman/issues/3586#issuecomment-661918679 */
+                { "/run/.containerenv", VIRTUALIZATION_PODMAN },
+                /* https://github.com/moby/moby/issues/18355 */
+                /* Docker must be the last in this table, see below. */
+                { "/.dockerenv",        VIRTUALIZATION_DOCKER },
+        };
+
+        for (size_t i = 0; i < ELEMENTSOF(container_file_table); i++) {
+                if (access(container_file_table[i].file_path, F_OK) >= 0)
+                        return container_file_table[i].id;
+
+                if (errno != ENOENT)
+                        log_debug_errno(errno,
+                                        "Checking if %s exists failed, ignoring: %m",
+                                        container_file_table[i].file_path);
+        }
+
+        return VIRTUALIZATION_NONE;
+}
+
+Virtualization detect_container(void) {
+        static thread_local Virtualization cached_found = _VIRTUALIZATION_INVALID;
+        _cleanup_free_ char *m = NULL, *o = NULL, *p = NULL;
+        const char *e = NULL;
+        Virtualization v;
+        int r;
+
+        if (cached_found >= 0)
+                return cached_found;
+
+        /* /proc/vz exists in container and outside of the container, /proc/bc only outside of the container. */
+        if (access("/proc/vz", F_OK) < 0) {
+                if (errno != ENOENT)
+                        log_debug_errno(errno, "Failed to check if /proc/vz exists, ignoring: %m");
+        } else if (access("/proc/bc", F_OK) < 0) {
+                if (errno == ENOENT) {
+                        v = VIRTUALIZATION_OPENVZ;
+                        goto finish;
+                }
+
+                log_debug_errno(errno, "Failed to check if /proc/bc exists, ignoring: %m");
+        }
+
+        /* "Official" way of detecting WSL https://github.com/Microsoft/WSL/issues/423#issuecomment-221627364 */
+        r = read_one_line_file("/proc/sys/kernel/osrelease", &o);
+        if (r < 0)
+                log_debug_errno(r, "Failed to read /proc/sys/kernel/osrelease, ignoring: %m");
+        else if (strstr(o, "Microsoft") || strstr(o, "WSL")) {
+                v = VIRTUALIZATION_WSL;
+                goto finish;
+        }
+
+        /* proot doesn't use PID namespacing, so we can just check if we have a matching tracer for this
+         * invocation without worrying about it being elsewhere.
+         */
+        r = get_proc_field("/proc/self/status", "TracerPid", WHITESPACE, &p);
+        if (r < 0)
+                log_debug_errno(r, "Failed to read our own trace PID, ignoring: %m");
+        else if (!streq(p, "0")) {
+                pid_t ptrace_pid;
+
+                r = parse_pid(p, &ptrace_pid);
+                if (r < 0)
+                        log_debug_errno(r, "Failed to parse our own tracer PID, ignoring: %m");
+                else {
+                        _cleanup_free_ char *ptrace_comm = NULL;
+                        const char *pf;
+
+                        pf = procfs_file_alloca(ptrace_pid, "comm");
+                        r = read_one_line_file(pf, &ptrace_comm);
+                        if (r < 0)
+                                log_debug_errno(r, "Failed to read %s, ignoring: %m", pf);
+                        else if (startswith(ptrace_comm, "proot")) {
+                                v = VIRTUALIZATION_PROOT;
+                                goto finish;
+                        }
+                }
+        }
+
+        /* The container manager might have placed this in the /run/host/ hierarchy for us, which is best
+         * because we can be consumed just like that, without special privileges. */
+        r = read_one_line_file("/run/host/container-manager", &m);
+        if (r > 0) {
+                e = m;
+                goto translate_name;
+        }
+        if (!IN_SET(r, -ENOENT, 0))
+                return log_debug_errno(r, "Failed to read /run/host/container-manager: %m");
+
+        if (getpid_cached() == 1) {
+                /* If we are PID 1 we can just check our own environment variable, and that's authoritative.
+                 * We distinguish three cases:
+                 * - the variable is not defined → we jump to other checks
+                 * - the variable is defined to an empty value → we are not in a container
+                 * - anything else → some container, either one of the known ones or "container-other"
+                 */
+                e = getenv("container");
+                if (!e)
+                        goto check_files;
+                if (isempty(e)) {
+                        v = VIRTUALIZATION_NONE;
+                        goto finish;
+                }
+
+                goto translate_name;
+        }
+
+        /* Otherwise, PID 1 might have dropped this information into a file in /run. This is better than accessing
+         * /proc/1/environ, since we don't need CAP_SYS_PTRACE for that. */
+        r = read_one_line_file("/run/systemd/container", &m);
+        if (r > 0) {
+                e = m;
+                goto translate_name;
+        }
+        if (!IN_SET(r, -ENOENT, 0))
+                return log_debug_errno(r, "Failed to read /run/systemd/container: %m");
+
+        /* Fallback for cases where PID 1 was not systemd (for example, cases where init=/bin/sh is used. */
+        r = getenv_for_pid(1, "container", &m);
+        if (r > 0) {
+                e = m;
+                goto translate_name;
+        }
+        if (r < 0) /* This only works if we have CAP_SYS_PTRACE, hence let's better ignore failures here */
+                log_debug_errno(r, "Failed to read $container of PID 1, ignoring: %m");
+
+check_files:
+        /* Check for existence of some well-known files. We only do this after checking
+         * for other specific container managers, otherwise we risk mistaking another
+         * container manager for Docker: the /.dockerenv file could inadvertently end up
+         * in a file system image. */
+        v = detect_container_files();
+        if (v < 0)
+                return v;
+        if (v != VIRTUALIZATION_NONE)
+                goto finish;
+
+        r = running_in_cgroupns();
+        if (r > 0) {
+                v = VIRTUALIZATION_CONTAINER_OTHER;
+                goto finish;
+        }
+        if (r < 0)
+                log_debug_errno(r, "Failed to detect cgroup namespace: %m");
+
+        /* If none of that worked, give up, assume no container manager. */
+        v = VIRTUALIZATION_NONE;
+        goto finish;
+
+translate_name:
+        if (streq(e, "oci")) {
+                /* Some images hardcode container=oci, but OCI is not a specific container manager.
+                 * Try to detect one based on well-known files. */
+                v = detect_container_files();
+                if (v == VIRTUALIZATION_NONE)
+                        v = VIRTUALIZATION_CONTAINER_OTHER;
+                goto finish;
+        }
+        v = container_from_string(e);
+        if (v < 0)
+                v = VIRTUALIZATION_CONTAINER_OTHER;
+
+finish:
+        log_debug("Found container virtualization %s.", virtualization_to_string(v));
+        cached_found = v;
+        return v;
+}
+
+Virtualization detect_virtualization(void) {
+        int v;
+
+        v = detect_container();
+        if (v != VIRTUALIZATION_NONE)
+                return v;
+
+        return detect_vm();
+}
+
+static int userns_has_mapping(const char *name) {
+        _cleanup_fclose_ FILE *f = NULL;
+        uid_t a, b, c;
+        int r;
+
+        f = fopen(name, "re");
+        if (!f) {
+                log_debug_errno(errno, "Failed to open %s: %m", name);
+                return errno == ENOENT ? false : -errno;
+        }
+
+        errno = 0;
+        r = fscanf(f, UID_FMT " " UID_FMT " " UID_FMT "\n", &a, &b, &c);
+        if (r == EOF) {
+                if (ferror(f))
+                        return log_debug_errno(errno_or_else(EIO), "Failed to read %s: %m", name);
+
+                log_debug("%s is empty, we're in an uninitialized user namespace", name);
+                return true;
+        }
+        if (r != 3)
+                return log_debug_errno(SYNTHETIC_ERRNO(EBADMSG), "Failed to parse %s: %m", name);
+
+        if (a == 0 && b == 0 && c == UINT32_MAX) {
+                /* The kernel calls mappings_overlap() and does not allow overlaps */
+                log_debug("%s has a full 1:1 mapping", name);
+                return false;
+        }
+
+        /* Anything else implies that we are in a user namespace */
+        log_debug("Mapping found in %s, we're in a user namespace", name);
+        return true;
+}
+
+int running_in_userns(void) {
+        _cleanup_free_ char *line = NULL;
+        int r;
+
+        r = userns_has_mapping("/proc/self/uid_map");
+        if (r != 0)
+                return r;
+
+        r = userns_has_mapping("/proc/self/gid_map");
+        if (r != 0)
+                return r;
+
+        /* "setgroups" file was added in kernel v3.18-rc6-15-g9cc46516dd. It is also possible to compile a
+         * kernel without CONFIG_USER_NS, in which case "setgroups" also does not exist. We cannot
+         * distinguish those two cases, so assume that we're running on a stripped-down recent kernel, rather
+         * than on an old one, and if the file is not found, return false. */
+        r = read_virtual_file("/proc/self/setgroups", SIZE_MAX, &line, NULL);
+        if (r < 0) {
+                log_debug_errno(r, "/proc/self/setgroups: %m");
+                return r == -ENOENT ? false : r;
+        }
+
+        strstrip(line); /* remove trailing newline */
+
+        r = streq(line, "deny");
+        /* See user_namespaces(7) for a description of this "setgroups" contents. */
+        log_debug("/proc/self/setgroups contains \"%s\", %s user namespace", line, r ? "in" : "not in");
+        return r;
+}
+
+int running_in_chroot(void) {
+        int r;
+
+        if (getenv_bool("SYSTEMD_IGNORE_CHROOT") > 0)
+                return 0;
+
+        r = files_same("/proc/1/root", "/", 0);
+        if (r < 0)
+                return r;
+
+        return r == 0;
+}
+
+#if defined(__i386__) || defined(__x86_64__)
+struct cpuid_table_entry {
+        uint32_t flag_bit;
+        const char *name;
+};
+
+static const struct cpuid_table_entry leaf1_edx[] = {
+        {  0, "fpu"     },
+        {  1, "vme"     },
+        {  2, "de"      },
+        {  3, "pse"     },
+        {  4, "tsc"     },
+        {  5, "msr"     },
+        {  6, "pae"     },
+        {  7, "mce"     },
+        {  8, "cx8"     },
+        {  9, "apic"    },
+        { 11, "sep"     },
+        { 12, "mtrr"    },
+        { 13, "pge"     },
+        { 14, "mca"     },
+        { 15, "cmov"    },
+        { 16, "pat"     },
+        { 17, "pse36"   },
+        { 19, "clflush" },
+        { 23, "mmx"     },
+        { 24, "fxsr"    },
+        { 25, "sse"     },
+        { 26, "sse2"    },
+        { 28, "ht"      },
+};
+
+static const struct cpuid_table_entry leaf1_ecx[] = {
+        {  0, "pni"     },
+        {  1, "pclmul"  },
+        {  3, "monitor" },
+        {  9, "ssse3"   },
+        { 12, "fma3"    },
+        { 13, "cx16"    },
+        { 19, "sse4_1"  },
+        { 20, "sse4_2"  },
+        { 22, "movbe"   },
+        { 23, "popcnt"  },
+        { 25, "aes"     },
+        { 26, "xsave"   },
+        { 27, "osxsave" },
+        { 28, "avx"     },
+        { 29, "f16c"    },
+        { 30, "rdrand"  },
+};
+
+static const struct cpuid_table_entry leaf7_ebx[] = {
+        {  3, "bmi1"   },
+        {  5, "avx2"   },
+        {  8, "bmi2"   },
+        { 18, "rdseed" },
+        { 19, "adx"    },
+        { 29, "sha_ni" },
+};
+
+static const struct cpuid_table_entry leaf81_edx[] = {
+        { 11, "syscall" },
+        { 27, "rdtscp"  },
+        { 29, "lm"      },
+};
+
+static const struct cpuid_table_entry leaf81_ecx[] = {
+        {  0, "lahf_lm" },
+        {  5, "abm"     },
+};
+
+static const struct cpuid_table_entry leaf87_edx[] = {
+        {  8, "constant_tsc" },
+};
+
+static bool given_flag_in_set(const char *flag, const struct cpuid_table_entry *set, size_t set_size, uint32_t val) {
+        for (size_t i = 0; i < set_size; i++) {
+                if ((UINT32_C(1) << set[i].flag_bit) & val &&
+                                streq(flag, set[i].name))
+                        return true;
+        }
+        return false;
+}
+
+static bool real_has_cpu_with_flag(const char *flag) {
+        uint32_t eax, ebx, ecx, edx;
+
+        if (__get_cpuid(1, &eax, &ebx, &ecx, &edx)) {
+                if (given_flag_in_set(flag, leaf1_ecx, ELEMENTSOF(leaf1_ecx), ecx))
+                        return true;
+
+                if (given_flag_in_set(flag, leaf1_edx, ELEMENTSOF(leaf1_edx), edx))
+                        return true;
+        }
+
+        if (__get_cpuid_count(7, 0, &eax, &ebx, &ecx, &edx)) {
+                if (given_flag_in_set(flag, leaf7_ebx, ELEMENTSOF(leaf7_ebx), ebx))
+                        return true;
+        }
+
+        if (__get_cpuid(0x80000001U, &eax, &ebx, &ecx, &edx)) {
+                if (given_flag_in_set(flag, leaf81_ecx, ELEMENTSOF(leaf81_ecx), ecx))
+                        return true;
+
+                if (given_flag_in_set(flag, leaf81_edx, ELEMENTSOF(leaf81_edx), edx))
+                        return true;
+        }
+
+        if (__get_cpuid(0x80000007U, &eax, &ebx, &ecx, &edx))
+                if (given_flag_in_set(flag, leaf87_edx, ELEMENTSOF(leaf87_edx), edx))
+                        return true;
+
+        return false;
+}
+#endif
+
+bool has_cpu_with_flag(const char *flag) {
+        /* CPUID is an x86 specific interface. Assume on all others that no CPUs have those flags. */
+#if defined(__i386__) || defined(__x86_64__)
+        return real_has_cpu_with_flag(flag);
+#else
+        return false;
+#endif
+}
+
+static const char *const virtualization_table[_VIRTUALIZATION_MAX] = {
+        [VIRTUALIZATION_NONE]            = "none",
+        [VIRTUALIZATION_KVM]             = "kvm",
+        [VIRTUALIZATION_AMAZON]          = "amazon",
+        [VIRTUALIZATION_QEMU]            = "qemu",
+        [VIRTUALIZATION_BOCHS]           = "bochs",
+        [VIRTUALIZATION_XEN]             = "xen",
+        [VIRTUALIZATION_UML]             = "uml",
+        [VIRTUALIZATION_VMWARE]          = "vmware",
+        [VIRTUALIZATION_ORACLE]          = "oracle",
+        [VIRTUALIZATION_MICROSOFT]       = "microsoft",
+        [VIRTUALIZATION_ZVM]             = "zvm",
+        [VIRTUALIZATION_PARALLELS]       = "parallels",
+        [VIRTUALIZATION_BHYVE]           = "bhyve",
+        [VIRTUALIZATION_QNX]             = "qnx",
+        [VIRTUALIZATION_ACRN]            = "acrn",
+        [VIRTUALIZATION_POWERVM]         = "powervm",
+        [VIRTUALIZATION_APPLE]           = "apple",
+        [VIRTUALIZATION_SRE]             = "sre",
+        [VIRTUALIZATION_GOOGLE]          = "google",
+        [VIRTUALIZATION_VM_OTHER]        = "vm-other",
+
+        [VIRTUALIZATION_SYSTEMD_NSPAWN]  = "systemd-nspawn",
+        [VIRTUALIZATION_LXC_LIBVIRT]     = "lxc-libvirt",
+        [VIRTUALIZATION_LXC]             = "lxc",
+        [VIRTUALIZATION_OPENVZ]          = "openvz",
+        [VIRTUALIZATION_DOCKER]          = "docker",
+        [VIRTUALIZATION_PODMAN]          = "podman",
+        [VIRTUALIZATION_RKT]             = "rkt",
+        [VIRTUALIZATION_WSL]             = "wsl",
+        [VIRTUALIZATION_PROOT]           = "proot",
+        [VIRTUALIZATION_POUCH]           = "pouch",
+        [VIRTUALIZATION_CONTAINER_OTHER] = "container-other",
+};
+
+DEFINE_STRING_TABLE_LOOKUP(virtualization, Virtualization);
diff --git a/src/basic/virt.h b/src/basic/virt.h
new file mode 100644
index 0000000..dea39e4
--- /dev/null
+++ b/src/basic/virt.h
@@ -0,0 +1,70 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include 
+
+#include "errno-list.h"
+#include "macro.h"
+
+typedef enum Virtualization {
+        VIRTUALIZATION_NONE = 0,
+
+        VIRTUALIZATION_VM_FIRST,
+        VIRTUALIZATION_KVM = VIRTUALIZATION_VM_FIRST,
+        VIRTUALIZATION_AMAZON,
+        VIRTUALIZATION_QEMU,
+        VIRTUALIZATION_BOCHS,
+        VIRTUALIZATION_XEN,
+        VIRTUALIZATION_UML,
+        VIRTUALIZATION_VMWARE,
+        VIRTUALIZATION_ORACLE,
+        VIRTUALIZATION_MICROSOFT,
+        VIRTUALIZATION_ZVM,
+        VIRTUALIZATION_PARALLELS,
+        VIRTUALIZATION_BHYVE,
+        VIRTUALIZATION_QNX,
+        VIRTUALIZATION_ACRN,
+        VIRTUALIZATION_POWERVM,
+        VIRTUALIZATION_APPLE,
+        VIRTUALIZATION_SRE,
+        VIRTUALIZATION_GOOGLE,
+        VIRTUALIZATION_VM_OTHER,
+        VIRTUALIZATION_VM_LAST = VIRTUALIZATION_VM_OTHER,
+
+        VIRTUALIZATION_CONTAINER_FIRST,
+        VIRTUALIZATION_SYSTEMD_NSPAWN = VIRTUALIZATION_CONTAINER_FIRST,
+        VIRTUALIZATION_LXC_LIBVIRT,
+        VIRTUALIZATION_LXC,
+        VIRTUALIZATION_OPENVZ,
+        VIRTUALIZATION_DOCKER,
+        VIRTUALIZATION_PODMAN,
+        VIRTUALIZATION_RKT,
+        VIRTUALIZATION_WSL,
+        VIRTUALIZATION_PROOT,
+        VIRTUALIZATION_POUCH,
+        VIRTUALIZATION_CONTAINER_OTHER,
+        VIRTUALIZATION_CONTAINER_LAST = VIRTUALIZATION_CONTAINER_OTHER,
+
+        _VIRTUALIZATION_MAX,
+        _VIRTUALIZATION_INVALID = -EINVAL,
+        _VIRTUALIZATION_ERRNO_MAX = -ERRNO_MAX, /* ensure full range of errno fits into this enum */
+} Virtualization;
+
+static inline bool VIRTUALIZATION_IS_VM(Virtualization x) {
+        return x >= VIRTUALIZATION_VM_FIRST && x <= VIRTUALIZATION_VM_LAST;
+}
+
+static inline bool VIRTUALIZATION_IS_CONTAINER(Virtualization x) {
+        return x >= VIRTUALIZATION_CONTAINER_FIRST && x <= VIRTUALIZATION_CONTAINER_LAST;
+}
+
+Virtualization detect_vm(void);
+Virtualization detect_container(void);
+Virtualization detect_virtualization(void);
+
+int running_in_userns(void);
+int running_in_chroot(void);
+
+const char *virtualization_to_string(Virtualization v) _const_;
+Virtualization virtualization_from_string(const char *s) _pure_;
+bool has_cpu_with_flag(const char *flag);
diff --git a/src/basic/xattr-util.c b/src/basic/xattr-util.c
new file mode 100644
index 0000000..0b661d9
--- /dev/null
+++ b/src/basic/xattr-util.c
@@ -0,0 +1,295 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include "alloc-util.h"
+#include "errno-util.h"
+#include "fd-util.h"
+#include "macro.h"
+#include "missing_syscall.h"
+#include "sparse-endian.h"
+#include "stat-util.h"
+#include "stdio-util.h"
+#include "string-util.h"
+#include "time-util.h"
+#include "xattr-util.h"
+
+int getxattr_at_malloc(
+                int fd,
+                const char *path,
+                const char *name,
+                int flags,
+                char **ret) {
+
+        _cleanup_close_ int opened_fd = -1;
+        unsigned n_attempts = 7;
+        bool by_procfs = false;
+        size_t l = 100;
+
+        assert(fd >= 0 || fd == AT_FDCWD);
+        assert(name);
+        assert((flags & ~(AT_SYMLINK_FOLLOW|AT_EMPTY_PATH)) == 0);
+        assert(ret);
+
+        /* So, this is single function that does what getxattr()/lgetxattr()/fgetxattr() does, but in one go,
+         * and with additional bells and whistles. Specifically:
+         *
+         * 1. This works on O_PATH fds (which fgetxattr() does not)
+         * 2. Provides full openat()-style semantics, i.e. by-fd, by-path and combination thereof
+         * 3. As extension to openat()-style semantics implies AT_EMPTY_PATH if path is NULL.
+         * 4. Does a malloc() loop, automatically sizing the allocation
+         * 5. NUL-terminates the returned buffer (for safety)
+         */
+
+        if (!path) /* If path is NULL, imply AT_EMPTY_PATH. – But if it's "", don't — for safety reasons. */
+                flags |= AT_EMPTY_PATH;
+
+        if (isempty(path)) {
+                if (!FLAGS_SET(flags, AT_EMPTY_PATH))
+                        return -EINVAL;
+
+                if (fd == AT_FDCWD) /* Both unspecified? Then operate on current working directory */
+                        path = ".";
+                else
+                        path = NULL;
+
+        } else if (fd != AT_FDCWD) {
+
+                /* If both have been specified, then we go via O_PATH */
+                opened_fd = openat(fd, path, O_PATH|O_CLOEXEC|(FLAGS_SET(flags, AT_SYMLINK_FOLLOW) ? 0 : O_NOFOLLOW));
+                if (opened_fd < 0)
+                        return -errno;
+
+                fd = opened_fd;
+                path = NULL;
+                by_procfs = true; /* fgetxattr() is not going to work, go via /proc/ link right-away */
+        }
+
+        for (;;) {
+                _cleanup_free_ char *v = NULL;
+                ssize_t n;
+
+                if (n_attempts == 0) /* If someone is racing against us, give up eventually */
+                        return -EBUSY;
+                n_attempts--;
+
+                v = new0(char, l+1);
+                if (!v)
+                        return -ENOMEM;
+
+                l = MALLOC_ELEMENTSOF(v) - 1;
+
+                if (path)
+                        n = FLAGS_SET(flags, AT_SYMLINK_FOLLOW) ? getxattr(path, name, v, l) : lgetxattr(path, name, v, l);
+                else
+                        n = by_procfs ? getxattr(FORMAT_PROC_FD_PATH(fd), name, v, l) : fgetxattr(fd, name, v, l);
+                if (n < 0) {
+                        if (errno == EBADF) {
+                                if (by_procfs || path)
+                                        return -EBADF;
+
+                                by_procfs = true; /* Might be an O_PATH fd, try again via /proc/ link */
+                                continue;
+                        }
+
+                        if (errno != ERANGE)
+                                return -errno;
+                } else {
+                        v[n] = 0; /* NUL terminate */
+                        *ret = TAKE_PTR(v);
+                        return (int) n;
+                }
+
+                if (path)
+                        n = FLAGS_SET(flags, AT_SYMLINK_FOLLOW) ? getxattr(path, name, NULL, 0) : lgetxattr(path, name, NULL, 0);
+                else
+                        n = by_procfs ? getxattr(FORMAT_PROC_FD_PATH(fd), name, NULL, 0) : fgetxattr(fd, name, NULL, 0);
+                if (n < 0)
+                        return -errno;
+                if (n > INT_MAX) /* We couldn't return this as 'int' anymore */
+                        return -E2BIG;
+
+                l = (size_t) n;
+        }
+}
+
+static int parse_crtime(le64_t le, usec_t *usec) {
+        uint64_t u;
+
+        assert(usec);
+
+        u = le64toh(le);
+        if (IN_SET(u, 0, UINT64_MAX))
+                return -EIO;
+
+        *usec = (usec_t) u;
+        return 0;
+}
+
+int fd_getcrtime_at(
+                int fd,
+                const char *path,
+                int flags,
+                usec_t *ret) {
+
+        _cleanup_free_ le64_t *le = NULL;
+        STRUCT_STATX_DEFINE(sx);
+        usec_t a, b;
+        int r;
+
+        assert(fd >= 0 || fd == AT_FDCWD);
+        assert((flags & ~(AT_SYMLINK_FOLLOW|AT_EMPTY_PATH)) == 0);
+        assert(ret);
+
+        if (!path)
+                flags |= AT_EMPTY_PATH;
+
+        /* So here's the deal: the creation/birth time (crtime/btime) of a file is a relatively newly supported concept
+         * on Linux (or more strictly speaking: a concept that only recently got supported in the API, it was
+         * implemented on various file systems on the lower level since a while, but never was accessible). However, we
+         * needed a concept like that for vaccuuming algorithms and such, hence we emulated it via a user xattr for a
+         * long time. Starting with Linux 4.11 there's statx() which exposes the timestamp to userspace for the first
+         * time, where it is available. Thius function will read it, but it tries to keep some compatibility with older
+         * systems: we try to read both the crtime/btime and the xattr, and then use whatever is older. After all the
+         * concept is useful for determining how "old" a file really is, and hence using the older of the two makes
+         * most sense. */
+
+        if (statx(fd, strempty(path),
+                  (flags & ~AT_SYMLINK_FOLLOW)|(FLAGS_SET(flags, AT_SYMLINK_FOLLOW) ? 0 : AT_SYMLINK_NOFOLLOW)|AT_STATX_DONT_SYNC,
+                  STATX_BTIME,
+                  &sx) >= 0 &&
+            (sx.stx_mask & STATX_BTIME) &&
+            sx.stx_btime.tv_sec != 0)
+                a = (usec_t) sx.stx_btime.tv_sec * USEC_PER_SEC +
+                        (usec_t) sx.stx_btime.tv_nsec / NSEC_PER_USEC;
+        else
+                a = USEC_INFINITY;
+
+        r = getxattr_at_malloc(fd, path, "user.crtime_usec", flags, (char**) &le);
+        if (r >= 0) {
+                if (r != sizeof(*le))
+                        r = -EIO;
+                else
+                        r = parse_crtime(*le, &b);
+        }
+        if (r < 0) {
+                if (a != USEC_INFINITY) {
+                        *ret = a;
+                        return 0;
+                }
+
+                return r;
+        }
+
+        if (a != USEC_INFINITY)
+                *ret = MIN(a, b);
+        else
+                *ret = b;
+
+        return 0;
+}
+
+int fd_setcrtime(int fd, usec_t usec) {
+        le64_t le;
+
+        assert(fd >= 0);
+
+        if (!timestamp_is_set(usec))
+                usec = now(CLOCK_REALTIME);
+
+        le = htole64((uint64_t) usec);
+        return RET_NERRNO(fsetxattr(fd, "user.crtime_usec", &le, sizeof(le), 0));
+}
+
+int listxattr_at_malloc(
+                int fd,
+                const char *path,
+                int flags,
+                char **ret) {
+
+        _cleanup_close_ int opened_fd = -1;
+        bool by_procfs = false;
+        unsigned n_attempts = 7;
+        size_t l = 100;
+
+        assert(fd >= 0 || fd == AT_FDCWD);
+        assert((flags & ~(AT_SYMLINK_FOLLOW|AT_EMPTY_PATH)) == 0);
+        assert(ret);
+
+        /* This is to listxattr()/llistattr()/flistattr() what getxattr_at_malloc() is to getxattr()/… */
+
+        if (!path) /* If path is NULL, imply AT_EMPTY_PATH. – But if it's "", don't. */
+                flags |= AT_EMPTY_PATH;
+
+        if (isempty(path)) {
+                if (!FLAGS_SET(flags, AT_EMPTY_PATH))
+                        return -EINVAL;
+
+                if (fd == AT_FDCWD) /* Both unspecified? Then operate on current working directory */
+                        path = ".";
+                else
+                        path = NULL;
+
+        } else if (fd != AT_FDCWD) {
+                /* If both have been specified, then we go via O_PATH */
+                opened_fd = openat(fd, path, O_PATH|O_CLOEXEC|(FLAGS_SET(flags, AT_SYMLINK_FOLLOW) ? 0 : O_NOFOLLOW));
+                if (opened_fd < 0)
+                        return -errno;
+
+                fd = opened_fd;
+                path = NULL;
+                by_procfs = true;
+        }
+
+        for (;;) {
+                _cleanup_free_ char *v = NULL;
+                ssize_t n;
+
+                if (n_attempts == 0) /* If someone is racing against us, give up eventually */
+                        return -EBUSY;
+                n_attempts--;
+
+                v = new(char, l+1);
+                if (!v)
+                        return -ENOMEM;
+
+                l = MALLOC_ELEMENTSOF(v) - 1;
+
+                if (path)
+                        n = FLAGS_SET(flags, AT_SYMLINK_FOLLOW) ? listxattr(path, v, l) : llistxattr(path, v, l);
+                else
+                        n = by_procfs ? listxattr(FORMAT_PROC_FD_PATH(fd), v, l) : flistxattr(fd, v, l);
+                if (n < 0) {
+                        if (errno == EBADF) {
+                                if (by_procfs || path)
+                                        return -EBADF;
+
+                                by_procfs = true; /* Might be an O_PATH fd, try again via /proc/ link */
+                                continue;
+                        }
+
+                        if (errno != ERANGE)
+                                return -errno;
+                } else {
+                        v[n] = 0; /* NUL terminate */
+                        *ret = TAKE_PTR(v);
+                        return (int) n;
+                }
+
+                if (path)
+                        n = FLAGS_SET(flags, AT_SYMLINK_FOLLOW) ? listxattr(path, NULL, 0) : llistxattr(path, NULL, 0);
+                else
+                        n = by_procfs ? listxattr(FORMAT_PROC_FD_PATH(fd), NULL, 0) : flistxattr(fd, NULL, 0);
+                if (n < 0)
+                        return -errno;
+                if (n > INT_MAX) /* We couldn't return this as 'int' anymore */
+                        return -E2BIG;
+
+                l = (size_t) n;
+        }
+}
diff --git a/src/basic/xattr-util.h b/src/basic/xattr-util.h
new file mode 100644
index 0000000..0eb745a
--- /dev/null
+++ b/src/basic/xattr-util.h
@@ -0,0 +1,38 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include 
+#include 
+#include 
+
+#include "time-util.h"
+
+int getxattr_at_malloc(int fd, const char *path, const char *name, int flags, char **ret);
+static inline int getxattr_malloc(const char *path, const char *name, char **ret) {
+        return getxattr_at_malloc(AT_FDCWD, path, name, AT_SYMLINK_FOLLOW, ret);
+}
+static inline int lgetxattr_malloc(const char *path, const char *name, char **ret) {
+        return getxattr_at_malloc(AT_FDCWD, path, name, 0, ret);
+}
+static inline int fgetxattr_malloc(int fd, const char *name, char **ret) {
+        return getxattr_at_malloc(fd, NULL, name, AT_EMPTY_PATH, ret);
+}
+
+int fd_setcrtime(int fd, usec_t usec);
+
+int fd_getcrtime_at(int fd, const char *name, int flags, usec_t *ret);
+static inline int fd_getcrtime(int fd, usec_t *ret) {
+        return fd_getcrtime_at(fd, NULL, 0, ret);
+}
+
+
+int listxattr_at_malloc(int fd, const char *path, int flags, char **ret);
+static inline int listxattr_malloc(const char *path, char **ret) {
+        return listxattr_at_malloc(AT_FDCWD, path, AT_SYMLINK_FOLLOW, ret);
+}
+static inline int llistxattr_malloc(const char *path, char **ret) {
+        return listxattr_at_malloc(AT_FDCWD, path, 0, ret);
+}
+static inline int flistxattr_malloc(int fd, char **ret) {
+        return listxattr_at_malloc(fd, NULL, AT_EMPTY_PATH, ret);
+}
-- 
cgit v1.2.3