diff options
Diffstat (limited to 'tests/integration/shutdown.tcl')
-rw-r--r-- | tests/integration/shutdown.tcl | 238 |
1 files changed, 238 insertions, 0 deletions
diff --git a/tests/integration/shutdown.tcl b/tests/integration/shutdown.tcl new file mode 100644 index 0000000..60afc5c --- /dev/null +++ b/tests/integration/shutdown.tcl @@ -0,0 +1,238 @@ +# This test suite tests shutdown when there are lagging replicas connected. + +# Fill up the OS socket send buffer for the replica connection 1M at a time. +# When the replication buffer memory increases beyond 2M (often after writing 4M +# or so), we assume it's because the OS socket send buffer can't swallow +# anymore. +proc fill_up_os_socket_send_buffer_for_repl {idx} { + set i 0 + while {1} { + incr i + populate 1024 junk$i: 1024 $idx + after 10 + set buf_size [s $idx mem_total_replication_buffers] + if {$buf_size > 2*1024*1024} { + break + } + } +} + +foreach how {sigterm shutdown} { + test "Shutting down master waits for replica to catch up ($how)" { + start_server {} { + start_server {} { + set master [srv -1 client] + set master_host [srv -1 host] + set master_port [srv -1 port] + set master_pid [srv -1 pid] + set replica [srv 0 client] + set replica_pid [srv 0 pid] + + # Config master. + $master config set shutdown-timeout 300; # 5min for slow CI + $master config set repl-backlog-size 1; # small as possible + $master config set hz 100; # cron runs every 10ms + + # Config replica. + $replica replicaof $master_host $master_port + wait_for_sync $replica + + # Preparation: Set k to 1 on both master and replica. + $master set k 1 + wait_for_ofs_sync $master $replica + + # Pause the replica. + exec kill -SIGSTOP $replica_pid + after 10 + + # Fill up the OS socket send buffer for the replica connection + # to prevent the following INCR from reaching the replica via + # the OS. + fill_up_os_socket_send_buffer_for_repl -1 + + # Incr k and immediately shutdown master. + $master incr k + switch $how { + sigterm { + exec kill -SIGTERM $master_pid + } + shutdown { + set rd [redis_deferring_client -1] + $rd shutdown + } + } + wait_for_condition 50 100 { + [s -1 shutdown_in_milliseconds] > 0 + } else { + fail "Master not indicating ongoing shutdown." + } + + # Wake up replica and check if master has waited for it. + after 20; # 2 cron intervals + exec kill -SIGCONT $replica_pid + wait_for_condition 300 1000 { + [$replica get k] eq 2 + } else { + fail "Master exited before replica could catch up." + } + + # Check shutdown log messages on master + wait_for_log_messages -1 {"*ready to exit, bye bye*"} 0 100 500 + assert_equal 0 [count_log_message -1 "*Lagging replica*"] + verify_log_message -1 "*1 of 1 replicas are in sync*" 0 + } + } + } {} {repl external:skip} +} + +test {Shutting down master waits for replica timeout} { + start_server {} { + start_server {} { + set master [srv -1 client] + set master_host [srv -1 host] + set master_port [srv -1 port] + set master_pid [srv -1 pid] + set replica [srv 0 client] + set replica_pid [srv 0 pid] + + # Config master. + $master config set shutdown-timeout 1; # second + + # Config replica. + $replica replicaof $master_host $master_port + wait_for_sync $replica + + # Preparation: Set k to 1 on both master and replica. + $master set k 1 + wait_for_ofs_sync $master $replica + + # Pause the replica. + exec kill -SIGSTOP $replica_pid + after 10 + + # Fill up the OS socket send buffer for the replica connection to + # prevent the following INCR k from reaching the replica via the OS. + fill_up_os_socket_send_buffer_for_repl -1 + + # Incr k and immediately shutdown master. + $master incr k + exec kill -SIGTERM $master_pid + wait_for_condition 50 100 { + [s -1 shutdown_in_milliseconds] > 0 + } else { + fail "Master not indicating ongoing shutdown." + } + + # Let master finish shutting down and check log. + wait_for_log_messages -1 {"*ready to exit, bye bye*"} 0 100 100 + verify_log_message -1 "*Lagging replica*" 0 + verify_log_message -1 "*0 of 1 replicas are in sync*" 0 + + # Wake up replica. + exec kill -SIGCONT $replica_pid + assert_equal 1 [$replica get k] + } + } +} {} {repl external:skip} + +test "Shutting down master waits for replica then fails" { + start_server {} { + start_server {} { + set master [srv -1 client] + set master_host [srv -1 host] + set master_port [srv -1 port] + set master_pid [srv -1 pid] + set replica [srv 0 client] + set replica_pid [srv 0 pid] + + # Config master and replica. + $replica replicaof $master_host $master_port + wait_for_sync $replica + + # Pause the replica and write a key on master. + exec kill -SIGSTOP $replica_pid + after 10 + $master incr k + + # Two clients call blocking SHUTDOWN in parallel. + set rd1 [redis_deferring_client -1] + set rd2 [redis_deferring_client -1] + $rd1 shutdown + $rd2 shutdown + set info_clients [$master info clients] + assert_match "*connected_clients:3*" $info_clients + assert_match "*blocked_clients:2*" $info_clients + + # Start a very slow initial AOFRW, which will prevent shutdown. + $master config set rdb-key-save-delay 30000000; # 30 seconds + $master config set appendonly yes + + # Wake up replica, causing master to continue shutting down. + exec kill -SIGCONT $replica_pid + + # SHUTDOWN returns an error to both clients blocking on SHUTDOWN. + catch { $rd1 read } e1 + catch { $rd2 read } e2 + assert_match "*Errors trying to SHUTDOWN. Check logs*" $e1 + assert_match "*Errors trying to SHUTDOWN. Check logs*" $e2 + $rd1 close + $rd2 close + + # Check shutdown log messages on master. + verify_log_message -1 "*1 of 1 replicas are in sync*" 0 + verify_log_message -1 "*Writing initial AOF, can't exit*" 0 + verify_log_message -1 "*Errors trying to shut down*" 0 + + # Let master to exit fast, without waiting for the very slow AOFRW. + catch {$master shutdown nosave force} + } + } +} {} {repl external:skip} + +test "Shutting down master waits for replica then aborted" { + start_server {} { + start_server {} { + set master [srv -1 client] + set master_host [srv -1 host] + set master_port [srv -1 port] + set master_pid [srv -1 pid] + set replica [srv 0 client] + set replica_pid [srv 0 pid] + + # Config master and replica. + $replica replicaof $master_host $master_port + wait_for_sync $replica + + # Pause the replica and write a key on master. + exec kill -SIGSTOP $replica_pid + after 10 + $master incr k + + # Two clients call blocking SHUTDOWN in parallel. + set rd1 [redis_deferring_client -1] + set rd2 [redis_deferring_client -1] + $rd1 shutdown + $rd2 shutdown + set info_clients [$master info clients] + assert_match "*connected_clients:3*" $info_clients + assert_match "*blocked_clients:2*" $info_clients + + # Abort the shutdown + $master shutdown abort + + # Wake up replica, causing master to continue shutting down. + exec kill -SIGCONT $replica_pid + + # SHUTDOWN returns an error to both clients blocking on SHUTDOWN. + catch { $rd1 read } e1 + catch { $rd2 read } e2 + assert_match "*Errors trying to SHUTDOWN. Check logs*" $e1 + assert_match "*Errors trying to SHUTDOWN. Check logs*" $e2 + $rd1 close + $rd2 close + + # Check shutdown log messages on master. + verify_log_message -1 "*Shutdown manually aborted*" 0 + } + } +} {} {repl external:skip} |