summaryrefslogtreecommitdiffstats
path: root/tests/integration/psync2-master-restart.tcl
diff options
context:
space:
mode:
Diffstat (limited to 'tests/integration/psync2-master-restart.tcl')
-rw-r--r--tests/integration/psync2-master-restart.tcl218
1 files changed, 218 insertions, 0 deletions
diff --git a/tests/integration/psync2-master-restart.tcl b/tests/integration/psync2-master-restart.tcl
new file mode 100644
index 0000000..a9e21d1
--- /dev/null
+++ b/tests/integration/psync2-master-restart.tcl
@@ -0,0 +1,218 @@
+start_server {tags {"psync2 external:skip"}} {
+start_server {} {
+start_server {} {
+ set master [srv 0 client]
+ set master_host [srv 0 host]
+ set master_port [srv 0 port]
+
+ set replica [srv -1 client]
+ set replica_host [srv -1 host]
+ set replica_port [srv -1 port]
+
+ set sub_replica [srv -2 client]
+
+ # Make sure the server saves an RDB on shutdown
+ $master config set save "3600 1"
+
+ # Because we will test partial resync later, we don’t want a timeout to cause
+ # the master-replica disconnect, then the extra reconnections will break the
+ # sync_partial_ok stat test
+ $master config set repl-timeout 3600
+ $replica config set repl-timeout 3600
+ $sub_replica config set repl-timeout 3600
+
+ # Avoid PINGs
+ $master config set repl-ping-replica-period 3600
+ $master config rewrite
+
+ # Build replication chain
+ $replica replicaof $master_host $master_port
+ $sub_replica replicaof $replica_host $replica_port
+
+ wait_for_condition 50 100 {
+ [status $replica master_link_status] eq {up} &&
+ [status $sub_replica master_link_status] eq {up}
+ } else {
+ fail "Replication not started."
+ }
+
+ test "PSYNC2: Partial resync after Master restart using RDB aux fields when offset is 0" {
+ assert {[status $master master_repl_offset] == 0}
+
+ set replid [status $master master_replid]
+ $replica config resetstat
+
+ catch {
+ restart_server 0 true false true now
+ set master [srv 0 client]
+ }
+ wait_for_condition 50 1000 {
+ [status $replica master_link_status] eq {up} &&
+ [status $sub_replica master_link_status] eq {up}
+ } else {
+ fail "Replicas didn't sync after master restart"
+ }
+
+ # Make sure master restore replication info correctly
+ assert {[status $master master_replid] != $replid}
+ assert {[status $master master_repl_offset] == 0}
+ assert {[status $master master_replid2] eq $replid}
+ assert {[status $master second_repl_offset] == 1}
+
+ # Make sure master set replication backlog correctly
+ assert {[status $master repl_backlog_active] == 1}
+ assert {[status $master repl_backlog_first_byte_offset] == 1}
+ assert {[status $master repl_backlog_histlen] == 0}
+
+ # Partial resync after Master restart
+ assert {[status $master sync_partial_ok] == 1}
+ assert {[status $replica sync_partial_ok] == 1}
+ }
+
+ # Generate some data
+ createComplexDataset $master 1000
+
+ test "PSYNC2: Partial resync after Master restart using RDB aux fields with data" {
+ wait_for_condition 500 100 {
+ [status $master master_repl_offset] == [status $replica master_repl_offset] &&
+ [status $master master_repl_offset] == [status $sub_replica master_repl_offset]
+ } else {
+ fail "Replicas and master offsets were unable to match *exactly*."
+ }
+
+ set replid [status $master master_replid]
+ set offset [status $master master_repl_offset]
+ $replica config resetstat
+
+ catch {
+ # SHUTDOWN NOW ensures master doesn't send GETACK to replicas before
+ # shutting down which would affect the replication offset.
+ restart_server 0 true false true now
+ set master [srv 0 client]
+ }
+ wait_for_condition 50 1000 {
+ [status $replica master_link_status] eq {up} &&
+ [status $sub_replica master_link_status] eq {up}
+ } else {
+ fail "Replicas didn't sync after master restart"
+ }
+
+ # Make sure master restore replication info correctly
+ assert {[status $master master_replid] != $replid}
+ assert {[status $master master_repl_offset] == $offset}
+ assert {[status $master master_replid2] eq $replid}
+ assert {[status $master second_repl_offset] == [expr $offset+1]}
+
+ # Make sure master set replication backlog correctly
+ assert {[status $master repl_backlog_active] == 1}
+ assert {[status $master repl_backlog_first_byte_offset] == [expr $offset+1]}
+ assert {[status $master repl_backlog_histlen] == 0}
+
+ # Partial resync after Master restart
+ assert {[status $master sync_partial_ok] == 1}
+ assert {[status $replica sync_partial_ok] == 1}
+ }
+
+ test "PSYNC2: Partial resync after Master restart using RDB aux fields with expire" {
+ $master debug set-active-expire 0
+ for {set j 0} {$j < 1024} {incr j} {
+ $master select [expr $j%16]
+ $master set $j somevalue px 10
+ }
+
+ after 20
+
+ # Wait until master has received ACK from replica. If the master thinks
+ # that any replica is lagging when it shuts down, master would send
+ # GETACK to the replicas, affecting the replication offset.
+ set offset [status $master master_repl_offset]
+ wait_for_condition 500 100 {
+ [string match "*slave0:*,offset=$offset,*" [$master info replication]] &&
+ $offset == [status $replica master_repl_offset] &&
+ $offset == [status $sub_replica master_repl_offset]
+ } else {
+ show_cluster_status
+ fail "Replicas and master offsets were unable to match *exactly*."
+ }
+
+ set offset [status $master master_repl_offset]
+ $replica config resetstat
+
+ catch {
+ # Unlike the test above, here we use SIGTERM, which behaves
+ # differently compared to SHUTDOWN NOW if there are lagging
+ # replicas. This is just to increase coverage and let each test use
+ # a different shutdown approach. In this case there are no lagging
+ # replicas though.
+ restart_server 0 true false
+ set master [srv 0 client]
+ }
+ wait_for_condition 50 1000 {
+ [status $replica master_link_status] eq {up} &&
+ [status $sub_replica master_link_status] eq {up}
+ } else {
+ fail "Replicas didn't sync after master restart"
+ }
+
+ set expired_offset [status $master repl_backlog_histlen]
+ # Stale keys expired and master_repl_offset grows correctly
+ assert {[status $master rdb_last_load_keys_expired] == 1024}
+ assert {[status $master master_repl_offset] == [expr $offset+$expired_offset]}
+
+ # Partial resync after Master restart
+ assert {[status $master sync_partial_ok] == 1}
+ assert {[status $replica sync_partial_ok] == 1}
+
+ set digest [$master debug digest]
+ assert {$digest eq [$replica debug digest]}
+ assert {$digest eq [$sub_replica debug digest]}
+ }
+
+ test "PSYNC2: Full resync after Master restart when too many key expired" {
+ $master config set repl-backlog-size 16384
+ $master config rewrite
+
+ $master debug set-active-expire 0
+ # Make sure replication backlog is full and will be trimmed.
+ for {set j 0} {$j < 2048} {incr j} {
+ $master select [expr $j%16]
+ $master set $j somevalue px 10
+ }
+
+ after 20
+
+ wait_for_condition 500 100 {
+ [status $master master_repl_offset] == [status $replica master_repl_offset] &&
+ [status $master master_repl_offset] == [status $sub_replica master_repl_offset]
+ } else {
+ fail "Replicas and master offsets were unable to match *exactly*."
+ }
+
+ $replica config resetstat
+
+ catch {
+ # Unlike the test above, here we use SIGTERM. This is just to
+ # increase coverage and let each test use a different shutdown
+ # approach.
+ restart_server 0 true false
+ set master [srv 0 client]
+ }
+ wait_for_condition 50 1000 {
+ [status $replica master_link_status] eq {up} &&
+ [status $sub_replica master_link_status] eq {up}
+ } else {
+ fail "Replicas didn't sync after master restart"
+ }
+
+ # Replication backlog is full
+ assert {[status $master repl_backlog_first_byte_offset] > [status $master second_repl_offset]}
+ assert {[status $master sync_partial_ok] == 0}
+ assert {[status $master sync_full] == 1}
+ assert {[status $master rdb_last_load_keys_expired] == 2048}
+ assert {[status $replica sync_full] == 1}
+
+ set digest [$master debug digest]
+ assert {$digest eq [$replica debug digest]}
+ assert {$digest eq [$sub_replica debug digest]}
+ }
+}}}