diff options
Diffstat (limited to 'src/test/test-barrier.c')
-rw-r--r-- | src/test/test-barrier.c | 448 |
1 files changed, 448 insertions, 0 deletions
diff --git a/src/test/test-barrier.c b/src/test/test-barrier.c new file mode 100644 index 0000000..50ceb3a --- /dev/null +++ b/src/test/test-barrier.c @@ -0,0 +1,448 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +/* + * IPC barrier tests + * These tests verify the correct behavior of the IPC Barrier implementation. + * Note that the tests use alarm-timers to verify dead-locks and timeouts. These + * might not work on slow machines where 20ms are too short to perform specific + * operations (though, very unlikely). In case that turns out true, we have to + * increase it at the slightly cost of lengthen test-duration on other machines. + */ + +#include <stdio.h> +#include <sys/time.h> +#include <sys/wait.h> +#include <unistd.h> + +#include "barrier.h" +#include "errno-util.h" +#include "tests.h" +#include "time-util.h" +#include "util.h" +#include "virt.h" + +/* 20ms to test deadlocks; All timings use multiples of this constant as + * alarm/sleep timers. If this timeout is too small for slow machines to perform + * the requested operations, we have to increase it. On an i7 this works fine + * with 1ms base-time, so 20ms should be just fine for everyone. */ +#define BASE_TIME (20 * USEC_PER_MSEC) + +static void set_alarm(usec_t usecs) { + struct itimerval v = { }; + + timeval_store(&v.it_value, usecs); + assert_se(setitimer(ITIMER_REAL, &v, NULL) >= 0); +} + +static void sleep_for(usec_t usecs) { + /* stupid usleep() might fail if >1000000 */ + assert_se(usecs < USEC_PER_SEC); + usleep(usecs); +} + +#define TEST_BARRIER(_FUNCTION, _CHILD_CODE, _WAIT_CHILD, _PARENT_CODE, _WAIT_PARENT) \ + TEST(_FUNCTION) { \ + Barrier b = BARRIER_NULL; \ + pid_t pid1, pid2; \ + \ + assert_se(barrier_create(&b) >= 0); \ + assert_se(b.me > 0); \ + assert_se(b.them > 0); \ + assert_se(b.pipe[0] > 0); \ + assert_se(b.pipe[1] > 0); \ + \ + pid1 = fork(); \ + assert_se(pid1 >= 0); \ + if (pid1 == 0) { \ + barrier_set_role(&b, BARRIER_CHILD); \ + { _CHILD_CODE; } \ + exit(42); \ + } \ + \ + pid2 = fork(); \ + assert_se(pid2 >= 0); \ + if (pid2 == 0) { \ + barrier_set_role(&b, BARRIER_PARENT); \ + { _PARENT_CODE; } \ + exit(42); \ + } \ + \ + barrier_destroy(&b); \ + set_alarm(999999); \ + { _WAIT_CHILD; } \ + { _WAIT_PARENT; } \ + set_alarm(0); \ + } + +#define TEST_BARRIER_WAIT_SUCCESS(_pid) \ + ({ \ + int pidr, status; \ + pidr = waitpid(_pid, &status, 0); \ + assert_se(pidr == _pid); \ + assert_se(WIFEXITED(status)); \ + assert_se(WEXITSTATUS(status) == 42); \ + }) + +#define TEST_BARRIER_WAIT_ALARM(_pid) \ + ({ \ + int pidr, status; \ + pidr = waitpid(_pid, &status, 0); \ + assert_se(pidr == _pid); \ + assert_se(WIFSIGNALED(status)); \ + assert_se(WTERMSIG(status) == SIGALRM); \ + }) + +/* + * Test basic sync points + * This places a barrier in both processes and waits synchronously for them. + * The timeout makes sure the sync works as expected. The sleep_for() on one side + * makes sure the exit of the parent does not overwrite previous barriers. Due + * to the sleep_for(), we know that the parent already exited, thus there's a + * pending HUP on the pipe. However, the barrier_sync() prefers reads on the + * eventfd, thus we can safely wait on the barrier. + */ +TEST_BARRIER(barrier_sync, + ({ + set_alarm(BASE_TIME * 10); + assert_se(barrier_place(&b)); + sleep_for(BASE_TIME * 2); + assert_se(barrier_sync(&b)); + }), + TEST_BARRIER_WAIT_SUCCESS(pid1), + ({ + set_alarm(BASE_TIME * 10); + assert_se(barrier_place(&b)); + assert_se(barrier_sync(&b)); + }), + TEST_BARRIER_WAIT_SUCCESS(pid2)); + +/* + * Test wait_next() + * This places a barrier in the parent and syncs on it. The child sleeps while + * the parent places the barrier and then waits for a barrier. The wait will + * succeed as the child hasn't read the parent's barrier, yet. The following + * barrier and sync synchronize the exit. + */ +TEST_BARRIER(barrier_wait_next, + ({ + sleep_for(BASE_TIME); + set_alarm(BASE_TIME * 10); + assert_se(barrier_wait_next(&b)); + assert_se(barrier_place(&b)); + assert_se(barrier_sync(&b)); + }), + TEST_BARRIER_WAIT_SUCCESS(pid1), + ({ + set_alarm(BASE_TIME * 4); + assert_se(barrier_place(&b)); + assert_se(barrier_sync(&b)); + }), + TEST_BARRIER_WAIT_SUCCESS(pid2)); + +/* + * Test wait_next() multiple times + * This places two barriers in the parent and waits for the child to exit. The + * child sleeps 20ms so both barriers _should_ be in place. It then waits for + * the parent to place the next barrier twice. The first call will fetch both + * barriers and return. However, the second call will stall as the parent does + * not place a 3rd barrier (the sleep caught two barriers). wait_next() is does + * not look at barrier-links so this stall is expected. Thus this test times + * out. + */ +TEST_BARRIER(barrier_wait_next_twice, + ({ + sleep_for(BASE_TIME); + set_alarm(BASE_TIME); + assert_se(barrier_wait_next(&b)); + assert_se(barrier_wait_next(&b)); + assert_se(0); + }), + TEST_BARRIER_WAIT_ALARM(pid1), + ({ + set_alarm(BASE_TIME * 10); + assert_se(barrier_place(&b)); + assert_se(barrier_place(&b)); + sleep_for(BASE_TIME * 4); + }), + TEST_BARRIER_WAIT_SUCCESS(pid2)); + +/* + * Test wait_next() with local barriers + * This is the same as test_barrier_wait_next_twice, but places local barriers + * between both waits. This does not have any effect on the wait so it times out + * like the other test. + */ +TEST_BARRIER(barrier_wait_next_twice_local, + ({ + sleep_for(BASE_TIME); + set_alarm(BASE_TIME); + assert_se(barrier_wait_next(&b)); + assert_se(barrier_place(&b)); + assert_se(barrier_place(&b)); + assert_se(barrier_wait_next(&b)); + assert_se(0); + }), + TEST_BARRIER_WAIT_ALARM(pid1), + ({ + set_alarm(BASE_TIME * 10); + assert_se(barrier_place(&b)); + assert_se(barrier_place(&b)); + sleep_for(BASE_TIME * 4); + }), + TEST_BARRIER_WAIT_SUCCESS(pid2)); + +/* + * Test wait_next() with sync_next() + * This is again the same as test_barrier_wait_next_twice but uses a + * synced wait as the second wait. This works just fine because the local state + * has no barriers placed, therefore, the remote is always in sync. + */ +TEST_BARRIER(barrier_wait_next_twice_sync, + ({ + sleep_for(BASE_TIME); + set_alarm(BASE_TIME); + assert_se(barrier_wait_next(&b)); + assert_se(barrier_sync_next(&b)); + }), + TEST_BARRIER_WAIT_SUCCESS(pid1), + ({ + set_alarm(BASE_TIME * 10); + assert_se(barrier_place(&b)); + assert_se(barrier_place(&b)); + }), + TEST_BARRIER_WAIT_SUCCESS(pid2)); + +/* + * Test wait_next() with sync_next() and local barriers + * This is again the same as test_barrier_wait_next_twice_local but uses a + * synced wait as the second wait. This works just fine because the local state + * is in sync with the remote. + */ +TEST_BARRIER(barrier_wait_next_twice_local_sync, + ({ + sleep_for(BASE_TIME); + set_alarm(BASE_TIME); + assert_se(barrier_wait_next(&b)); + assert_se(barrier_place(&b)); + assert_se(barrier_place(&b)); + assert_se(barrier_sync_next(&b)); + }), + TEST_BARRIER_WAIT_SUCCESS(pid1), + ({ + set_alarm(BASE_TIME * 10); + assert_se(barrier_place(&b)); + assert_se(barrier_place(&b)); + }), + TEST_BARRIER_WAIT_SUCCESS(pid2)); + +/* + * Test sync_next() and sync() + * This tests sync_*() synchronizations and makes sure they work fine if the + * local state is behind the remote state. + */ +TEST_BARRIER(barrier_sync_next, + ({ + set_alarm(BASE_TIME * 10); + assert_se(barrier_sync_next(&b)); + assert_se(barrier_sync(&b)); + assert_se(barrier_place(&b)); + assert_se(barrier_place(&b)); + assert_se(barrier_sync_next(&b)); + assert_se(barrier_sync_next(&b)); + assert_se(barrier_sync(&b)); + }), + TEST_BARRIER_WAIT_SUCCESS(pid1), + ({ + set_alarm(BASE_TIME * 10); + sleep_for(BASE_TIME); + assert_se(barrier_place(&b)); + assert_se(barrier_place(&b)); + assert_se(barrier_sync(&b)); + }), + TEST_BARRIER_WAIT_SUCCESS(pid2)); + +/* + * Test sync_next() and sync() with local barriers + * This tests timeouts if sync_*() is used if local barriers are placed but the + * remote didn't place any. + */ +TEST_BARRIER(barrier_sync_next_local, + ({ + set_alarm(BASE_TIME); + assert_se(barrier_place(&b)); + assert_se(barrier_sync_next(&b)); + assert_se(0); + }), + TEST_BARRIER_WAIT_ALARM(pid1), + ({ + sleep_for(BASE_TIME * 2); + }), + TEST_BARRIER_WAIT_SUCCESS(pid2)); + +/* + * Test sync_next() and sync() with local barriers and abortion + * This is the same as test_barrier_sync_next_local but aborts the sync in the + * parent. Therefore, the sync_next() succeeds just fine due to the abortion. + */ +TEST_BARRIER(barrier_sync_next_local_abort, + ({ + set_alarm(BASE_TIME * 10); + assert_se(barrier_place(&b)); + assert_se(!barrier_sync_next(&b)); + }), + TEST_BARRIER_WAIT_SUCCESS(pid1), + ({ + assert_se(barrier_abort(&b)); + }), + TEST_BARRIER_WAIT_SUCCESS(pid2)); + +/* + * Test matched wait_abortion() + * This runs wait_abortion() with remote abortion. + */ +TEST_BARRIER(barrier_wait_abortion, + ({ + set_alarm(BASE_TIME * 10); + assert_se(barrier_wait_abortion(&b)); + }), + TEST_BARRIER_WAIT_SUCCESS(pid1), + ({ + assert_se(barrier_abort(&b)); + }), + TEST_BARRIER_WAIT_SUCCESS(pid2)); + +/* + * Test unmatched wait_abortion() + * This runs wait_abortion() without any remote abortion going on. It thus must + * timeout. + */ +TEST_BARRIER(barrier_wait_abortion_unmatched, + ({ + set_alarm(BASE_TIME); + assert_se(barrier_wait_abortion(&b)); + assert_se(0); + }), + TEST_BARRIER_WAIT_ALARM(pid1), + ({ + sleep_for(BASE_TIME * 2); + }), + TEST_BARRIER_WAIT_SUCCESS(pid2)); + +/* + * Test matched wait_abortion() with local abortion + * This runs wait_abortion() with local and remote abortion. + */ +TEST_BARRIER(barrier_wait_abortion_local, + ({ + set_alarm(BASE_TIME * 10); + assert_se(barrier_abort(&b)); + assert_se(!barrier_wait_abortion(&b)); + }), + TEST_BARRIER_WAIT_SUCCESS(pid1), + ({ + assert_se(barrier_abort(&b)); + }), + TEST_BARRIER_WAIT_SUCCESS(pid2)); + +/* + * Test unmatched wait_abortion() with local abortion + * This runs wait_abortion() with only local abortion. This must time out. + */ +TEST_BARRIER(barrier_wait_abortion_local_unmatched, + ({ + set_alarm(BASE_TIME); + assert_se(barrier_abort(&b)); + assert_se(!barrier_wait_abortion(&b)); + assert_se(0); + }), + TEST_BARRIER_WAIT_ALARM(pid1), + ({ + sleep_for(BASE_TIME * 2); + }), + TEST_BARRIER_WAIT_SUCCESS(pid2)); + +/* + * Test child exit + * Place barrier and sync with the child. The child only exits()s, which should + * cause an implicit abortion and wake the parent. + */ +TEST_BARRIER(barrier_exit, + ({ + }), + TEST_BARRIER_WAIT_SUCCESS(pid1), + ({ + set_alarm(BASE_TIME * 10); + assert_se(barrier_place(&b)); + assert_se(!barrier_sync(&b)); + }), + TEST_BARRIER_WAIT_SUCCESS(pid2)); + +/* + * Test child exit with sleep + * Same as test_barrier_exit but verifies the test really works due to the + * child-exit. We add a usleep() which triggers the alarm in the parent and + * causes the test to time out. + */ +TEST_BARRIER(barrier_no_exit, + ({ + sleep_for(BASE_TIME * 2); + }), + TEST_BARRIER_WAIT_SUCCESS(pid1), + ({ + set_alarm(BASE_TIME); + assert_se(barrier_place(&b)); + assert_se(!barrier_sync(&b)); + }), + TEST_BARRIER_WAIT_ALARM(pid2)); + +/* + * Test pending exit against sync + * The parent places a barrier *and* exits. The 20ms wait in the child + * guarantees both are pending. However, our logic prefers pending barriers over + * pending exit-abortions (unlike normal abortions), thus the wait_next() must + * succeed, same for the sync_next() as our local barrier-count is smaller than + * the remote. Once we place a barrier our count is equal, so the sync still + * succeeds. Only if we place one more barrier, we're ahead of the remote, thus + * we will fail due to HUP on the pipe. + */ +TEST_BARRIER(barrier_pending_exit, + ({ + set_alarm(BASE_TIME * 4); + sleep_for(BASE_TIME * 2); + assert_se(barrier_wait_next(&b)); + assert_se(barrier_sync_next(&b)); + assert_se(barrier_place(&b)); + assert_se(barrier_sync_next(&b)); + assert_se(barrier_place(&b)); + assert_se(!barrier_sync_next(&b)); + }), + TEST_BARRIER_WAIT_SUCCESS(pid1), + ({ + assert_se(barrier_place(&b)); + }), + TEST_BARRIER_WAIT_SUCCESS(pid2)); + + +static int intro(void) { + if (!slow_tests_enabled()) + return log_tests_skipped("slow tests are disabled"); + + /* + * This test uses real-time alarms and sleeps to test for CPU races explicitly. This is highly + * fragile if your system is under load. We already increased the BASE_TIME value to make the tests + * more robust, but that just makes the test take significantly longer. Given the recent issues when + * running the test in a virtualized environments, limit it to bare metal machines only, to minimize + * false-positives in CIs. + */ + + Virtualization v = detect_virtualization(); + if (v < 0 && ERRNO_IS_PRIVILEGE(v)) + return log_tests_skipped("Cannot detect virtualization"); + + if (v != VIRTUALIZATION_NONE) + return log_tests_skipped("This test requires a baremetal machine"); + + return EXIT_SUCCESS; + } + +DEFINE_TEST_MAIN_WITH_INTRO(LOG_INFO, intro); |