From 55944e5e40b1be2afc4855d8d2baf4b73d1876b5 Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Wed, 10 Apr 2024 22:49:52 +0200 Subject: Adding upstream version 255.4. Signed-off-by: Daniel Baumann --- test/units/testsuite-07.exec-context.sh | 375 ++++++++++++++++++++++++++++++++ 1 file changed, 375 insertions(+) create mode 100755 test/units/testsuite-07.exec-context.sh (limited to 'test/units/testsuite-07.exec-context.sh') diff --git a/test/units/testsuite-07.exec-context.sh b/test/units/testsuite-07.exec-context.sh new file mode 100755 index 0000000..66e8fce --- /dev/null +++ b/test/units/testsuite-07.exec-context.sh @@ -0,0 +1,375 @@ +#!/usr/bin/env bash +# SPDX-License-Identifier: LGPL-2.1-or-later +# shellcheck disable=SC2016 +set -eux +set -o pipefail + +# shellcheck source=test/units/util.sh +. "$(dirname "$0")"/util.sh + +# Make sure the unit's exec context matches its configuration +# See: https://github.com/systemd/systemd/pull/29552 + +# Even though hidepid= was introduced in kernel 3.3, we support only +# the post 5.8 implementation that allows us to apply the option per-instance, +# instead of the whole namespace. To distinguish between these two implementations +# lets check if we can mount procfs with a named value (e.g. hidepid=off), since +# support for this was introduced in the same commit as the per-instance stuff +proc_supports_option() { + local option="${1:?}" + local proc_tmp ec + + proc_tmp="$(mktemp -d)" + mount -t proc -o "$option" proc "$proc_tmp" && ec=0 || ec=$? + mountpoint -q "$proc_tmp" && umount -q "$proc_tmp" + rm -rf "$proc_tmp" + + return $ec +} + +# In coverage builds we disable ProtectSystem= and ProtectHome= via a service.d +# dropin in /etc. This dropin has, unfortunately, higher priority than +# the transient stuff from systemd-run. Let's just skip the following tests +# in that case instead of complicating the test setup even more */ +if [[ -z "${COVERAGE_BUILD_DIR:-}" ]]; then + systemd-run --wait --pipe -p ProtectSystem=yes \ + bash -xec "test ! -w /usr; test ! -w /boot; test -w /etc; test -w /var" + systemd-run --wait --pipe -p ProtectSystem=full \ + bash -xec "test ! -w /usr; test ! -w /boot; test ! -w /etc; test -w /var" + systemd-run --wait --pipe -p ProtectSystem=strict \ + bash -xec "test ! -w /; test ! -w /etc; test ! -w /var; test -w /dev; test -w /proc" + systemd-run --wait --pipe -p ProtectSystem=no \ + bash -xec "test -w /; test -w /etc; test -w /var; test -w /dev; test -w /proc" + + MARK="$(mktemp /root/.exec-context.XXX)" + systemd-run --wait --pipe -p ProtectHome=yes \ + bash -xec "test ! -w /home; test ! -w /root; test ! -w /run/user; test ! -e $MARK" + systemd-run --wait --pipe -p ProtectHome=read-only \ + bash -xec "test ! -w /home; test ! -w /root; test ! -w /run/user; test -e $MARK" + systemd-run --wait --pipe -p ProtectHome=tmpfs \ + bash -xec "test -w /home; test -w /root; test -w /run/user; test ! -e $MARK" + systemd-run --wait --pipe -p ProtectHome=no \ + bash -xec "test -w /home; test -w /root; test -w /run/user; test -e $MARK" + rm -f "$MARK" +fi + +if proc_supports_option "hidepid=off"; then + systemd-run --wait --pipe -p ProtectProc=noaccess -p User=testuser \ + bash -xec 'test -e /proc/1; test ! -r /proc/1; test -r /proc/$$$$/comm' + systemd-run --wait --pipe -p ProtectProc=invisible -p User=testuser \ + bash -xec 'test ! -e /proc/1; test -r /proc/$$$$/comm' + systemd-run --wait --pipe -p ProtectProc=ptraceable -p User=testuser \ + bash -xec 'test ! -e /proc/1; test -r /proc/$$$$/comm' + systemd-run --wait --pipe -p ProtectProc=ptraceable -p User=testuser -p AmbientCapabilities=CAP_SYS_PTRACE \ + bash -xec 'test -r /proc/1; test -r /proc/$$$$/comm' + systemd-run --wait --pipe -p ProtectProc=default -p User=testuser \ + bash -xec 'test -r /proc/1; test -r /proc/$$$$/comm' +fi + +if proc_supports_option "subset=pid"; then + systemd-run --wait --pipe -p ProcSubset=pid -p User=testuser \ + bash -xec "test -r /proc/1/comm; test ! -e /proc/cpuinfo" + systemd-run --wait --pipe -p ProcSubset=all -p User=testuser \ + bash -xec "test -r /proc/1/comm; test -r /proc/cpuinfo" +fi + +if ! systemd-detect-virt -cq; then + systemd-run --wait --pipe -p ProtectKernelLogs=yes -p User=testuser \ + bash -xec "test ! -r /dev/kmsg" + systemd-run --wait --pipe -p ProtectKernelLogs=no -p User=testuser \ + bash -xec "test -r /dev/kmsg" +fi + +systemd-run --wait --pipe -p BindPaths="/etc /home:/mnt:norbind -/foo/bar/baz:/usr:rbind" \ + bash -xec "mountpoint /etc; test -d /etc/systemd; mountpoint /mnt; ! mountpoint /usr" +systemd-run --wait --pipe -p BindReadOnlyPaths="/etc /home:/mnt:norbind -/foo/bar/baz:/usr:rbind" \ + bash -xec "test ! -w /etc; test ! -w /mnt; ! mountpoint /usr" +# Make sure we properly serialize/deserialize paths with spaces +# See: https://github.com/systemd/systemd/issues/30747 +touch "/tmp/test file with spaces" +systemd-run --wait --pipe -p TemporaryFileSystem="/tmp" -p BindPaths="/etc /home:/mnt:norbind /tmp/test\ file\ with\ spaces" \ + bash -xec "mountpoint /etc; test -d /etc/systemd; mountpoint /mnt; stat '/tmp/test file with spaces'" +systemd-run --wait --pipe -p TemporaryFileSystem="/tmp" -p BindPaths="/etc /home:/mnt:norbind /tmp/test\ file\ with\ spaces:/tmp/destination\ wi\:th\ spaces" \ + bash -xec "mountpoint /etc; test -d /etc/systemd; mountpoint /mnt; stat '/tmp/destination wi:th spaces'" + +# Check if we correctly serialize, deserialize, and set directives that +# have more complex internal handling +if ! systemd-detect-virt -cq; then + # Funny detail: this originally used the underlying rootfs device, but that, + # for some reason, caused "divide error" in kernel, followed by a kernel panic + TEMPFILE="$(mktemp)" + LODEV="$(losetup --show -f "$TEMPFILE")" + ROOT_DEV_MAJ_MIN="$(lsblk -nro MAJ:MIN "$LODEV")" + EXPECTED_IO_MAX="$ROOT_DEV_MAJ_MIN rbps=1000 wbps=1000000000000 riops=2000000000 wiops=4000" + EXPECTED_IO_LATENCY="$ROOT_DEV_MAJ_MIN target=69000" + SERVICE_NAME="test-io-directives-$RANDOM.service" + CGROUP_PATH="/sys/fs/cgroup/system.slice/$SERVICE_NAME" + + # IO*= + ARGUMENTS=( + # Throw in a couple of invalid entries just to test things out + -p IOReadBandwidthMax="/foo/bar 1M" + -p IOReadBandwidthMax="/foo/baz 1M" + -p IOReadBandwidthMax="$LODEV 1M" + -p IOReadBandwidthMax="$LODEV 1K" + -p IOWriteBandwidthMax="$LODEV 1G" + -p IOWriteBandwidthMax="$LODEV 1T" + -p IOReadIOPSMax="$LODEV 2G" + -p IOWriteIOPSMax="$LODEV 4K" + -p IODeviceLatencyTargetSec="$LODEV 666ms" + -p IODeviceLatencyTargetSec="/foo/bar 69ms" + -p IODeviceLatencyTargetSec="$LODEV 69ms" + -p IOReadBandwidthMax="/foo/bar 1M" + -p IOReadBandwidthMax="/foo/baz 1M" + # TODO: IODeviceWeight= doesn't work on loop devices and virtual disks + -p IODeviceWeight="$LODEV 999" + -p IODeviceWeight="/foo/bar 999" + ) + + systemctl set-property system.slice IOAccounting=yes + # io.latency not available by default on Debian stable + if [[ -e /sys/fs/cgroup/system.slice/io.latency ]]; then + systemd-run --wait --pipe --unit "$SERVICE_NAME" "${ARGUMENTS[@]}" \ + bash -xec "diff <(echo $EXPECTED_IO_MAX) $CGROUP_PATH/io.max; diff <(echo $EXPECTED_IO_LATENCY) $CGROUP_PATH/io.latency" + fi + + # CPUScheduling= + ARGUMENTS=( + -p CPUSchedulingPolicy=rr # ID: 2 + -p CPUSchedulingPolicy=fifo # ID: 1 + -p CPUSchedulingPriority=5 # Actual prio: 94 (99 - prio) + -p CPUSchedulingPriority=10 # Actual prio: 89 (99 - prio) + ) + + systemd-run --wait --pipe --unit "$SERVICE_NAME" "${ARGUMENTS[@]}" \ + bash -xec 'grep -E "^policy\s*:\s*1$" /proc/self/sched; grep -E "^prio\s*:\s*89$" /proc/self/sched' + + # Device*= + ARGUMENTS=( + -p DevicePolicy=closed + -p DevicePolicy=strict + -p DeviceAllow="char-mem rm" # Allow read & mknod for /dev/{null,zero,...} + -p DeviceAllow="/dev/loop0 rw" + -p DeviceAllow="/dev/loop0 w" # Allow write for /dev/loop0 + # Everything else should be disallowed per the strict policy + ) + + systemd-run --wait --pipe --unit "$SERVICE_NAME" "${ARGUMENTS[@]}" \ + bash -xec 'test -r /dev/null; test ! -w /dev/null; test ! -r /dev/loop0; test -w /dev/loop0; test ! -r /dev/tty; test ! -w /dev/tty' + + if ! systemctl --version | grep -qF -- "-BPF_FRAMEWORK"; then + # SocketBind*= + ARGUMENTS=( + -p SocketBindAllow= + -p SocketBindAllow=1234 + -p SocketBindAllow=ipv4:udp:any + -p SocketBindAllow=ipv6:6666 + # Everything but the last assignment is superfluous, but it still exercises + # the parsing machinery + -p SocketBindDeny= + -p SocketBindDeny=1111 + -p SocketBindDeny=ipv4:1111 + -p SocketBindDeny=ipv4:any + -p SocketBindDeny=ipv4:tcp:any + -p SocketBindDeny=ipv4:udp:10000-11000 + -p SocketBindDeny=ipv6:1111 + -p SocketBindDeny=any + ) + + # We should fail with EPERM when trying to bind to a socket not on the allow list + # (nc exits with 2 in that case) + systemd-run --wait -p SuccessExitStatus="1 2" --pipe "${ARGUMENTS[@]}" \ + bash -xec 'timeout 1s nc -l 127.0.0.1 9999; exit 42' + systemd-run --wait -p SuccessExitStatus="1 2" --pipe "${ARGUMENTS[@]}" \ + bash -xec 'timeout 1s nc -l ::1 9999; exit 42' + systemd-run --wait -p SuccessExitStatus="1 2" --pipe "${ARGUMENTS[@]}" \ + bash -xec 'timeout 1s nc -6 -u -l ::1 9999; exit 42' + systemd-run --wait -p SuccessExitStatus="1 2" --pipe "${ARGUMENTS[@]}" \ + bash -xec 'timeout 1s nc -4 -l 127.0.0.1 6666; exit 42' + # Consequently, we should succeed when binding to a socket on the allow list + # and keep listening on it until we're killed by `timeout` (EC 124) + systemd-run --wait --pipe -p SuccessExitStatus=124 "${ARGUMENTS[@]}" \ + bash -xec 'timeout 1s nc -4 -l 127.0.0.1 1234; exit 1' + systemd-run --wait --pipe -p SuccessExitStatus=124 "${ARGUMENTS[@]}" \ + bash -xec 'timeout 1s nc -4 -u -l 127.0.0.1 5678; exit 1' + systemd-run --wait --pipe -p SuccessExitStatus=124 "${ARGUMENTS[@]}" \ + bash -xec 'timeout 1s nc -6 -l ::1 1234; exit 1' + systemd-run --wait --pipe -p SuccessExitStatus=124 "${ARGUMENTS[@]}" \ + bash -xec 'timeout 1s nc -6 -l ::1 6666; exit 1' + fi + + losetup -d "$LODEV" + rm -f "$TEMPFILE" +fi + +# {Cache,Configuration,Logs,Runtime,State}Directory= +ARGUMENTS=( + -p CacheDirectory="foo/bar/baz also\ with\ spaces" + -p CacheDirectory="foo" + -p CacheDirectory="context" + -p CacheDirectoryMode="0123" + -p CacheDirectoryMode="0666" + -p ConfigurationDirectory="context/foo also_context/bar context/nested/baz context/semi\:colon" + -p ConfigurationDirectoryMode="0400" + -p LogsDirectory="context/foo" + -p LogsDirectory="" + -p LogsDirectory="context/a/very/nested/logs/dir" + -p RuntimeDirectory="context/with\ spaces" + # Note: {Runtime,State,Cache,Logs}Directory= directives support the directory:symlink syntax, which + # requires an additional level of escaping for the colon character + -p RuntimeDirectory="also_context:a\ symlink\ with\ \\\:\ col\\\:ons\ and\ \ spaces" + -p RuntimeDirectoryPreserve=yes + -p StateDirectory="context" + -p StateDirectory="./././././././context context context" + -p StateDirectoryMode="0000" +) + +rm -rf /run/context +systemd-run --wait --pipe "${ARGUMENTS[@]}" \ + bash -xec '[[ $CACHE_DIRECTORY == "/var/cache/also with spaces:/var/cache/context:/var/cache/foo:/var/cache/foo/bar/baz" ]]; + [[ $(stat -c "%a" "${CACHE_DIRECTORY##*:}") == 666 ]]' +systemd-run --wait --pipe "${ARGUMENTS[@]}" \ + bash -xec '[[ $CONFIGURATION_DIRECTORY == /etc/also_context/bar:/etc/context/foo:/etc/context/nested/baz:/etc/context/semi:colon ]]; + [[ $(stat -c "%a" "${CONFIGURATION_DIRECTORY%%:*}") == 400 ]]' +systemd-run --wait --pipe "${ARGUMENTS[@]}" \ + bash -xec '[[ $LOGS_DIRECTORY == /var/log/context/a/very/nested/logs/dir:/var/log/context/foo ]]; + [[ $(stat -c "%a" "${LOGS_DIRECTORY##*:}") == 755 ]]' +systemd-run --wait --pipe "${ARGUMENTS[@]}" \ + bash -xec '[[ $RUNTIME_DIRECTORY == "/run/also_context:/run/context/with spaces" ]]; + [[ $(stat -c "%a" "${RUNTIME_DIRECTORY##*:}") == 755 ]]; + [[ $(stat -c "%a" "${RUNTIME_DIRECTORY%%:*}") == 755 ]]' +systemd-run --wait --pipe "${ARGUMENTS[@]}" \ + bash -xec '[[ $STATE_DIRECTORY == /var/lib/context ]]; [[ $(stat -c "%a" $STATE_DIRECTORY) == 0 ]]' +test -d "/run/context/with spaces" +test -s "/run/a symlink with : col:ons and spaces" +rm -rf /var/{cache,lib,log}/context /etc/{also_,}context + +# Limit*= +# +# Note: keep limits of LimitDATA= and LimitAS= unlimited, otherwise ASan (LSan) +# won't be able to mmap the shadow maps +ARGUMENTS=( + -p LimitCPU=15 + -p LimitCPU=10:15 # ulimit -t + -p LimitFSIZE=96G # ulimit -f + -p LimitDATA=8T:infinity + -p LimitDATA=infinity # ulimit -d + -p LimitSTACK=8M # ulimit -s + -p LimitCORE=infinity + -p LimitCORE=17M # ulimit -c + -p LimitRSS=27G # ulimit -m + -p LimitNOFILE=7:127 # ulimit -n + -p LimitAS=infinity # ulimit -v + -p LimitNPROC=1 + -p LimitNPROC=64:infinity # ulimit -u + -p LimitMEMLOCK=37M # ulimit -l + -p LimitLOCKS=19:1021 # ulimit -x + -p LimitSIGPENDING=21 # ulimit -i + -p LimitMSGQUEUE=666 # ulimit -q + -p LimitNICE=4 # ulimit -e + -p LimitRTPRIO=8 # ulimit -r + -p LimitRTTIME=666666 # ulimit -R +) +# Do all the checks in one giant inline shell blob to avoid the overhead of spawning +# a new service for each check +# +# Note: ulimit shows storage-related values in 1024-byte increments* +# Note2: ulimit -R requires bash >= 5.1 +# +# * in POSIX mode -c a -f options show values in 512-byte increments; let's hope +# we never run in the POSIX mode +systemd-run --wait --pipe "${ARGUMENTS[@]}" \ + bash -xec 'KB=1; MB=$((KB * 1024)); GB=$((MB * 1024)); TB=$((GB * 1024)); + : CPU; [[ $(ulimit -St) -eq 10 ]]; [[ $(ulimit -Ht) -eq 15 ]]; + : FSIZE; [[ $(ulimit -Sf) -eq $((96 * GB)) ]]; [[ $(ulimit -Hf) -eq $((96 * GB)) ]]; + : DATA; [[ $(ulimit -Sd) == unlimited ]]; [[ $(ulimit -Hd) == unlimited ]]; + : STACK; [[ $(ulimit -Ss) -eq $((8 * MB)) ]]; [[ $(ulimit -Hs) -eq $((8 * MB)) ]]; + : CORE; [[ $(ulimit -Sc) -eq $((17 * MB)) ]]; [[ $(ulimit -Hc) -eq $((17 * MB)) ]]; + : RSS; [[ $(ulimit -Sm) -eq $((27 * GB)) ]]; [[ $(ulimit -Hm) -eq $((27 * GB)) ]]; + : NOFILE; [[ $(ulimit -Sn) -eq 7 ]]; [[ $(ulimit -Hn) -eq 127 ]]; + : AS; [[ $(ulimit -Sv) == unlimited ]]; [[ $(ulimit -Hv) == unlimited ]]; + : NPROC; [[ $(ulimit -Su) -eq 64 ]]; [[ $(ulimit -Hu) == unlimited ]]; + : MEMLOCK; [[ $(ulimit -Sl) -eq $((37 * MB)) ]]; [[ $(ulimit -Hl) -eq $((37 * MB)) ]]; + : LOCKS; [[ $(ulimit -Sx) -eq 19 ]]; [[ $(ulimit -Hx) -eq 1021 ]]; + : SIGPENDING; [[ $(ulimit -Si) -eq 21 ]]; [[ $(ulimit -Hi) -eq 21 ]]; + : MSGQUEUE; [[ $(ulimit -Sq) -eq 666 ]]; [[ $(ulimit -Hq) -eq 666 ]]; + : NICE; [[ $(ulimit -Se) -eq 4 ]]; [[ $(ulimit -He) -eq 4 ]]; + : RTPRIO; [[ $(ulimit -Sr) -eq 8 ]]; [[ $(ulimit -Hr) -eq 8 ]]; + ulimit -R || exit 0; + : RTTIME; [[ $(ulimit -SR) -eq 666666 ]]; [[ $(ulimit -HR) -eq 666666 ]];' + +# RestrictFileSystems= +# +# Note: running instrumented binaries requires at least /proc to be accessible, so let's +# skip the test when we're running under sanitizers +# +# Note: $GCOV_ERROR_LOG is used during coverage runs to suppress errors when creating *.gcda files, +# since gcov can't access the restricted filesystem (as expected) +if [[ ! -v ASAN_OPTIONS ]] && systemctl --version | grep "+BPF_FRAMEWORK" && kernel_supports_lsm bpf; then + ROOTFS="$(df --output=fstype /usr/bin | sed --quiet 2p)" + systemd-run --wait --pipe -p RestrictFileSystems="" ls / + systemd-run --wait --pipe -p RestrictFileSystems="$ROOTFS foo bar" ls / + (! systemd-run --wait --pipe -p RestrictFileSystems="$ROOTFS" ls /proc) + (! systemd-run --wait --pipe -p GCOV_ERROR_LOG=/dev/null -p RestrictFileSystems="foo" ls /) + systemd-run --wait --pipe -p RestrictFileSystems="$ROOTFS foo bar baz proc" ls /proc + systemd-run --wait --pipe -p RestrictFileSystems="$ROOTFS @foo @basic-api" ls /proc + systemd-run --wait --pipe -p RestrictFileSystems="$ROOTFS @foo @basic-api" ls /sys/fs/cgroup + + systemd-run --wait --pipe -p RestrictFileSystems="~" ls / + systemd-run --wait --pipe -p RestrictFileSystems="~proc" ls / + systemd-run --wait --pipe -p RestrictFileSystems="~@basic-api" ls / + (! systemd-run --wait --pipe -p GCOV_ERROR_LOG=/dev/null -p RestrictFileSystems="~$ROOTFS" ls /) + (! systemd-run --wait --pipe -p RestrictFileSystems="~proc" ls /proc) + (! systemd-run --wait --pipe -p RestrictFileSystems="~@basic-api" ls /proc) + (! systemd-run --wait --pipe -p RestrictFileSystems="~proc foo @bar @basic-api" ls /proc) + (! systemd-run --wait --pipe -p RestrictFileSystems="~proc foo @bar @basic-api" ls /sys) + systemd-run --wait --pipe -p RestrictFileSystems="~proc devtmpfs sysfs" ls / + (! systemd-run --wait --pipe -p RestrictFileSystems="~proc devtmpfs sysfs" ls /proc) + (! systemd-run --wait --pipe -p RestrictFileSystems="~proc devtmpfs sysfs" ls /dev) + (! systemd-run --wait --pipe -p RestrictFileSystems="~proc devtmpfs sysfs" ls /sys) +fi + +# Make sure we properly (de)serialize various string arrays, including whitespaces +# See: https://github.com/systemd/systemd/issues/31214 +systemd-run --wait --pipe -p Environment="FOO='bar4 '" \ + bash -xec '[[ $FOO == "bar4 " ]]' +systemd-run --wait --pipe -p Environment="FOO='bar4 ' BAR='\n\n'" \ + bash -xec "[[ \$FOO == 'bar4 ' && \$BAR == $'\n\n' ]]" +systemd-run --wait --pipe -p Environment='FOO="bar4 \\ "' -p Environment="BAR='\n\t'" \ + bash -xec "[[ \$FOO == 'bar4 \\ ' && \$BAR == $'\n\t' ]]" +TEST_ENV_FILE="/tmp/test-env-file-$RANDOM- " +cat >"$TEST_ENV_FILE" <