diff options
Diffstat (limited to 'test/units/TEST-55-OOMD.sh')
-rwxr-xr-x | test/units/TEST-55-OOMD.sh | 190 |
1 files changed, 190 insertions, 0 deletions
diff --git a/test/units/TEST-55-OOMD.sh b/test/units/TEST-55-OOMD.sh new file mode 100755 index 0000000..b04ebca --- /dev/null +++ b/test/units/TEST-55-OOMD.sh @@ -0,0 +1,190 @@ +#!/usr/bin/env bash +# SPDX-License-Identifier: LGPL-2.1-or-later +set -eux +set -o pipefail + +# shellcheck source=test/units/util.sh + . "$(dirname "$0")"/util.sh + +. /etc/os-release +# OpenSUSE does not have the stress tool packaged. It does have stress-ng but the stress-ng does not support +# --vm-stride which this test uses. +if [[ "$ID" =~ "opensuse" ]]; then + echo "Skipping due to missing stress package in OpenSUSE" >>/skipped + exit 77 +fi + +systemd-analyze log-level debug + +# Ensure that the init.scope.d drop-in is applied on boot +test "$(cat /sys/fs/cgroup/init.scope/memory.high)" != "max" + +# Loose checks to ensure the environment has the necessary features for systemd-oomd +[[ -e /proc/pressure ]] || echo "no PSI" >>/skipped +[[ "$(get_cgroup_hierarchy)" == "unified" ]] || echo "no cgroupsv2" >>/skipped +[[ -x /usr/lib/systemd/systemd-oomd ]] || echo "no oomd" >>/skipped +if [[ -s /skipped ]]; then + exit 77 +fi + +rm -rf /run/systemd/system/TEST-55-OOMD-testbloat.service.d + +# Activate swap file if we are in a VM +if systemd-detect-virt --vm --quiet; then + swapoff --all + if [[ "$(findmnt -n -o FSTYPE /)" == btrfs ]]; then + btrfs filesystem mkswapfile -s 64M /swapfile + else + dd if=/dev/zero of=/swapfile bs=1M count=64 + chmod 0600 /swapfile + mkswap /swapfile + fi + + swapon /swapfile + swapon --show +fi + +# Configure oomd explicitly to avoid conflicts with distro dropins +mkdir -p /run/systemd/oomd.conf.d/ +cat >/run/systemd/oomd.conf.d/99-oomd-test.conf <<EOF +[OOM] +DefaultMemoryPressureDurationSec=2s +EOF + +mkdir -p /run/systemd/system/-.slice.d/ +cat >/run/systemd/system/-.slice.d/99-oomd-test.conf <<EOF +[Slice] +ManagedOOMSwap=auto +EOF + +mkdir -p /run/systemd/system/user@.service.d/ +cat >/run/systemd/system/user@.service.d/99-oomd-test.conf <<EOF +[Service] +ManagedOOMMemoryPressure=auto +ManagedOOMMemoryPressureLimit=0% +EOF + +mkdir -p /run/systemd/system/systemd-oomd.service.d/ +cat >/run/systemd/system/systemd-oomd.service.d/debug.conf <<EOF +[Service] +Environment=SYSTEMD_LOG_LEVEL=debug +EOF + +systemctl daemon-reload + +# enable the service to ensure dbus-org.freedesktop.oom1.service exists +# and D-Bus activation works +systemctl enable systemd-oomd.service + +# if oomd is already running for some reasons, then restart it to make sure the above settings to be applied +if systemctl is-active systemd-oomd.service; then + systemctl restart systemd-oomd.service +fi + +if [[ -v ASAN_OPTIONS || -v UBSAN_OPTIONS ]]; then + # If we're running with sanitizers, sd-executor might pull in quite a significant chunk of shared + # libraries, which in turn causes a lot of pressure that can put us in the front when sd-oomd decides to + # go on a killing spree. This fact is exacerbated further on Arch Linux which ships unstripped gcc-libs, + # so sd-executor pulls in over 30M of libs on startup. Let's make the MemoryHigh= limit a bit more + # generous when running with sanitizers to make the test happy. + systemctl edit --runtime --stdin --drop-in=99-MemoryHigh.conf TEST-55-OOMD-testchill.service <<EOF +[Service] +MemoryHigh=60M +EOF + # Do the same for the user instance as well + mkdir -p /run/systemd/user/ + cp -rfv /run/systemd/system/TEST-55-OOMD-testchill.service.d/ /run/systemd/user/ +else + # Ensure that we can start services even with a very low hard memory cap without oom-kills, but skip + # under sanitizers as they balloon memory usage. + systemd-run -t -p MemoryMax=10M -p MemorySwapMax=0 -p MemoryZSwapMax=0 /bin/true +fi + +systemctl start TEST-55-OOMD-testchill.service +systemctl start TEST-55-OOMD-testbloat.service + +# Verify systemd-oomd is monitoring the expected units +timeout 1m bash -xec 'until oomctl | grep "/TEST-55-OOMD-workload.slice"; do sleep 1; done' +oomctl | grep "/TEST-55-OOMD-workload.slice" +oomctl | grep "20.00%" +oomctl | grep "Default Memory Pressure Duration: 2s" + +systemctl status TEST-55-OOMD-testchill.service + +# systemd-oomd watches for elevated pressure for 2 seconds before acting. +# It can take time to build up pressure so either wait 2 minutes or for the service to fail. +for _ in {0..59}; do + if ! systemctl status TEST-55-OOMD-testbloat.service; then + break + fi + oomctl + sleep 2 +done + +# testbloat should be killed and testchill should be fine +if systemctl status TEST-55-OOMD-testbloat.service; then exit 42; fi +if ! systemctl status TEST-55-OOMD-testchill.service; then exit 24; fi + +# Make sure we also work correctly on user units. +loginctl enable-linger testuser + +systemctl start --machine "testuser@.host" --user TEST-55-OOMD-testchill.service +systemctl start --machine "testuser@.host" --user TEST-55-OOMD-testbloat.service + +# Verify systemd-oomd is monitoring the expected units +# Try to avoid racing the oomctl output check by checking in a loop with a timeout +timeout 1m bash -xec 'until oomctl | grep "/TEST-55-OOMD-workload.slice"; do sleep 1; done' +oomctl | grep -E "/user.slice.*/TEST-55-OOMD-workload.slice" +oomctl | grep "20.00%" +oomctl | grep "Default Memory Pressure Duration: 2s" + +systemctl --machine "testuser@.host" --user status TEST-55-OOMD-testchill.service + +# systemd-oomd watches for elevated pressure for 2 seconds before acting. +# It can take time to build up pressure so either wait 2 minutes or for the service to fail. +for _ in {0..59}; do + if ! systemctl --machine "testuser@.host" --user status TEST-55-OOMD-testbloat.service; then + break + fi + oomctl + sleep 2 +done + +# testbloat should be killed and testchill should be fine +if systemctl --machine "testuser@.host" --user status TEST-55-OOMD-testbloat.service; then exit 42; fi +if ! systemctl --machine "testuser@.host" --user status TEST-55-OOMD-testchill.service; then exit 24; fi + +loginctl disable-linger testuser + +# only run this portion of the test if we can set xattrs +if cgroupfs_supports_user_xattrs; then + sleep 120 # wait for systemd-oomd kill cool down and elevated memory pressure to come down + + mkdir -p /run/systemd/system/TEST-55-OOMD-testbloat.service.d/ + cat >/run/systemd/system/TEST-55-OOMD-testbloat.service.d/override.conf <<EOF +[Service] +ManagedOOMPreference=avoid +EOF + + systemctl daemon-reload + systemctl start TEST-55-OOMD-testchill.service + systemctl start TEST-55-OOMD-testmunch.service + systemctl start TEST-55-OOMD-testbloat.service + + for _ in {0..59}; do + if ! systemctl status TEST-55-OOMD-testmunch.service; then + break + fi + oomctl + sleep 2 + done + + # testmunch should be killed since testbloat had the avoid xattr on it + if ! systemctl status TEST-55-OOMD-testbloat.service; then exit 25; fi + if systemctl status TEST-55-OOMD-testmunch.service; then exit 43; fi + if ! systemctl status TEST-55-OOMD-testchill.service; then exit 24; fi +fi + +systemd-analyze log-level info + +touch /testok |