summaryrefslogtreecommitdiffstats
path: root/tests/integration/shutdown.tcl
diff options
context:
space:
mode:
Diffstat (limited to 'tests/integration/shutdown.tcl')
-rw-r--r--tests/integration/shutdown.tcl238
1 files changed, 238 insertions, 0 deletions
diff --git a/tests/integration/shutdown.tcl b/tests/integration/shutdown.tcl
new file mode 100644
index 0000000..60afc5c
--- /dev/null
+++ b/tests/integration/shutdown.tcl
@@ -0,0 +1,238 @@
+# This test suite tests shutdown when there are lagging replicas connected.
+
+# Fill up the OS socket send buffer for the replica connection 1M at a time.
+# When the replication buffer memory increases beyond 2M (often after writing 4M
+# or so), we assume it's because the OS socket send buffer can't swallow
+# anymore.
+proc fill_up_os_socket_send_buffer_for_repl {idx} {
+ set i 0
+ while {1} {
+ incr i
+ populate 1024 junk$i: 1024 $idx
+ after 10
+ set buf_size [s $idx mem_total_replication_buffers]
+ if {$buf_size > 2*1024*1024} {
+ break
+ }
+ }
+}
+
+foreach how {sigterm shutdown} {
+ test "Shutting down master waits for replica to catch up ($how)" {
+ start_server {} {
+ start_server {} {
+ set master [srv -1 client]
+ set master_host [srv -1 host]
+ set master_port [srv -1 port]
+ set master_pid [srv -1 pid]
+ set replica [srv 0 client]
+ set replica_pid [srv 0 pid]
+
+ # Config master.
+ $master config set shutdown-timeout 300; # 5min for slow CI
+ $master config set repl-backlog-size 1; # small as possible
+ $master config set hz 100; # cron runs every 10ms
+
+ # Config replica.
+ $replica replicaof $master_host $master_port
+ wait_for_sync $replica
+
+ # Preparation: Set k to 1 on both master and replica.
+ $master set k 1
+ wait_for_ofs_sync $master $replica
+
+ # Pause the replica.
+ exec kill -SIGSTOP $replica_pid
+ after 10
+
+ # Fill up the OS socket send buffer for the replica connection
+ # to prevent the following INCR from reaching the replica via
+ # the OS.
+ fill_up_os_socket_send_buffer_for_repl -1
+
+ # Incr k and immediately shutdown master.
+ $master incr k
+ switch $how {
+ sigterm {
+ exec kill -SIGTERM $master_pid
+ }
+ shutdown {
+ set rd [redis_deferring_client -1]
+ $rd shutdown
+ }
+ }
+ wait_for_condition 50 100 {
+ [s -1 shutdown_in_milliseconds] > 0
+ } else {
+ fail "Master not indicating ongoing shutdown."
+ }
+
+ # Wake up replica and check if master has waited for it.
+ after 20; # 2 cron intervals
+ exec kill -SIGCONT $replica_pid
+ wait_for_condition 300 1000 {
+ [$replica get k] eq 2
+ } else {
+ fail "Master exited before replica could catch up."
+ }
+
+ # Check shutdown log messages on master
+ wait_for_log_messages -1 {"*ready to exit, bye bye*"} 0 100 500
+ assert_equal 0 [count_log_message -1 "*Lagging replica*"]
+ verify_log_message -1 "*1 of 1 replicas are in sync*" 0
+ }
+ }
+ } {} {repl external:skip}
+}
+
+test {Shutting down master waits for replica timeout} {
+ start_server {} {
+ start_server {} {
+ set master [srv -1 client]
+ set master_host [srv -1 host]
+ set master_port [srv -1 port]
+ set master_pid [srv -1 pid]
+ set replica [srv 0 client]
+ set replica_pid [srv 0 pid]
+
+ # Config master.
+ $master config set shutdown-timeout 1; # second
+
+ # Config replica.
+ $replica replicaof $master_host $master_port
+ wait_for_sync $replica
+
+ # Preparation: Set k to 1 on both master and replica.
+ $master set k 1
+ wait_for_ofs_sync $master $replica
+
+ # Pause the replica.
+ exec kill -SIGSTOP $replica_pid
+ after 10
+
+ # Fill up the OS socket send buffer for the replica connection to
+ # prevent the following INCR k from reaching the replica via the OS.
+ fill_up_os_socket_send_buffer_for_repl -1
+
+ # Incr k and immediately shutdown master.
+ $master incr k
+ exec kill -SIGTERM $master_pid
+ wait_for_condition 50 100 {
+ [s -1 shutdown_in_milliseconds] > 0
+ } else {
+ fail "Master not indicating ongoing shutdown."
+ }
+
+ # Let master finish shutting down and check log.
+ wait_for_log_messages -1 {"*ready to exit, bye bye*"} 0 100 100
+ verify_log_message -1 "*Lagging replica*" 0
+ verify_log_message -1 "*0 of 1 replicas are in sync*" 0
+
+ # Wake up replica.
+ exec kill -SIGCONT $replica_pid
+ assert_equal 1 [$replica get k]
+ }
+ }
+} {} {repl external:skip}
+
+test "Shutting down master waits for replica then fails" {
+ start_server {} {
+ start_server {} {
+ set master [srv -1 client]
+ set master_host [srv -1 host]
+ set master_port [srv -1 port]
+ set master_pid [srv -1 pid]
+ set replica [srv 0 client]
+ set replica_pid [srv 0 pid]
+
+ # Config master and replica.
+ $replica replicaof $master_host $master_port
+ wait_for_sync $replica
+
+ # Pause the replica and write a key on master.
+ exec kill -SIGSTOP $replica_pid
+ after 10
+ $master incr k
+
+ # Two clients call blocking SHUTDOWN in parallel.
+ set rd1 [redis_deferring_client -1]
+ set rd2 [redis_deferring_client -1]
+ $rd1 shutdown
+ $rd2 shutdown
+ set info_clients [$master info clients]
+ assert_match "*connected_clients:3*" $info_clients
+ assert_match "*blocked_clients:2*" $info_clients
+
+ # Start a very slow initial AOFRW, which will prevent shutdown.
+ $master config set rdb-key-save-delay 30000000; # 30 seconds
+ $master config set appendonly yes
+
+ # Wake up replica, causing master to continue shutting down.
+ exec kill -SIGCONT $replica_pid
+
+ # SHUTDOWN returns an error to both clients blocking on SHUTDOWN.
+ catch { $rd1 read } e1
+ catch { $rd2 read } e2
+ assert_match "*Errors trying to SHUTDOWN. Check logs*" $e1
+ assert_match "*Errors trying to SHUTDOWN. Check logs*" $e2
+ $rd1 close
+ $rd2 close
+
+ # Check shutdown log messages on master.
+ verify_log_message -1 "*1 of 1 replicas are in sync*" 0
+ verify_log_message -1 "*Writing initial AOF, can't exit*" 0
+ verify_log_message -1 "*Errors trying to shut down*" 0
+
+ # Let master to exit fast, without waiting for the very slow AOFRW.
+ catch {$master shutdown nosave force}
+ }
+ }
+} {} {repl external:skip}
+
+test "Shutting down master waits for replica then aborted" {
+ start_server {} {
+ start_server {} {
+ set master [srv -1 client]
+ set master_host [srv -1 host]
+ set master_port [srv -1 port]
+ set master_pid [srv -1 pid]
+ set replica [srv 0 client]
+ set replica_pid [srv 0 pid]
+
+ # Config master and replica.
+ $replica replicaof $master_host $master_port
+ wait_for_sync $replica
+
+ # Pause the replica and write a key on master.
+ exec kill -SIGSTOP $replica_pid
+ after 10
+ $master incr k
+
+ # Two clients call blocking SHUTDOWN in parallel.
+ set rd1 [redis_deferring_client -1]
+ set rd2 [redis_deferring_client -1]
+ $rd1 shutdown
+ $rd2 shutdown
+ set info_clients [$master info clients]
+ assert_match "*connected_clients:3*" $info_clients
+ assert_match "*blocked_clients:2*" $info_clients
+
+ # Abort the shutdown
+ $master shutdown abort
+
+ # Wake up replica, causing master to continue shutting down.
+ exec kill -SIGCONT $replica_pid
+
+ # SHUTDOWN returns an error to both clients blocking on SHUTDOWN.
+ catch { $rd1 read } e1
+ catch { $rd2 read } e2
+ assert_match "*Errors trying to SHUTDOWN. Check logs*" $e1
+ assert_match "*Errors trying to SHUTDOWN. Check logs*" $e2
+ $rd1 close
+ $rd2 close
+
+ # Check shutdown log messages on master.
+ verify_log_message -1 "*Shutdown manually aborted*" 0
+ }
+ }
+} {} {repl external:skip}