diff options
Diffstat (limited to 'tests/integration/psync2-master-restart.tcl')
-rw-r--r-- | tests/integration/psync2-master-restart.tcl | 218 |
1 files changed, 218 insertions, 0 deletions
diff --git a/tests/integration/psync2-master-restart.tcl b/tests/integration/psync2-master-restart.tcl new file mode 100644 index 0000000..a9e21d1 --- /dev/null +++ b/tests/integration/psync2-master-restart.tcl @@ -0,0 +1,218 @@ +start_server {tags {"psync2 external:skip"}} { +start_server {} { +start_server {} { + set master [srv 0 client] + set master_host [srv 0 host] + set master_port [srv 0 port] + + set replica [srv -1 client] + set replica_host [srv -1 host] + set replica_port [srv -1 port] + + set sub_replica [srv -2 client] + + # Make sure the server saves an RDB on shutdown + $master config set save "3600 1" + + # Because we will test partial resync later, we don’t want a timeout to cause + # the master-replica disconnect, then the extra reconnections will break the + # sync_partial_ok stat test + $master config set repl-timeout 3600 + $replica config set repl-timeout 3600 + $sub_replica config set repl-timeout 3600 + + # Avoid PINGs + $master config set repl-ping-replica-period 3600 + $master config rewrite + + # Build replication chain + $replica replicaof $master_host $master_port + $sub_replica replicaof $replica_host $replica_port + + wait_for_condition 50 100 { + [status $replica master_link_status] eq {up} && + [status $sub_replica master_link_status] eq {up} + } else { + fail "Replication not started." + } + + test "PSYNC2: Partial resync after Master restart using RDB aux fields when offset is 0" { + assert {[status $master master_repl_offset] == 0} + + set replid [status $master master_replid] + $replica config resetstat + + catch { + restart_server 0 true false true now + set master [srv 0 client] + } + wait_for_condition 50 1000 { + [status $replica master_link_status] eq {up} && + [status $sub_replica master_link_status] eq {up} + } else { + fail "Replicas didn't sync after master restart" + } + + # Make sure master restore replication info correctly + assert {[status $master master_replid] != $replid} + assert {[status $master master_repl_offset] == 0} + assert {[status $master master_replid2] eq $replid} + assert {[status $master second_repl_offset] == 1} + + # Make sure master set replication backlog correctly + assert {[status $master repl_backlog_active] == 1} + assert {[status $master repl_backlog_first_byte_offset] == 1} + assert {[status $master repl_backlog_histlen] == 0} + + # Partial resync after Master restart + assert {[status $master sync_partial_ok] == 1} + assert {[status $replica sync_partial_ok] == 1} + } + + # Generate some data + createComplexDataset $master 1000 + + test "PSYNC2: Partial resync after Master restart using RDB aux fields with data" { + wait_for_condition 500 100 { + [status $master master_repl_offset] == [status $replica master_repl_offset] && + [status $master master_repl_offset] == [status $sub_replica master_repl_offset] + } else { + fail "Replicas and master offsets were unable to match *exactly*." + } + + set replid [status $master master_replid] + set offset [status $master master_repl_offset] + $replica config resetstat + + catch { + # SHUTDOWN NOW ensures master doesn't send GETACK to replicas before + # shutting down which would affect the replication offset. + restart_server 0 true false true now + set master [srv 0 client] + } + wait_for_condition 50 1000 { + [status $replica master_link_status] eq {up} && + [status $sub_replica master_link_status] eq {up} + } else { + fail "Replicas didn't sync after master restart" + } + + # Make sure master restore replication info correctly + assert {[status $master master_replid] != $replid} + assert {[status $master master_repl_offset] == $offset} + assert {[status $master master_replid2] eq $replid} + assert {[status $master second_repl_offset] == [expr $offset+1]} + + # Make sure master set replication backlog correctly + assert {[status $master repl_backlog_active] == 1} + assert {[status $master repl_backlog_first_byte_offset] == [expr $offset+1]} + assert {[status $master repl_backlog_histlen] == 0} + + # Partial resync after Master restart + assert {[status $master sync_partial_ok] == 1} + assert {[status $replica sync_partial_ok] == 1} + } + + test "PSYNC2: Partial resync after Master restart using RDB aux fields with expire" { + $master debug set-active-expire 0 + for {set j 0} {$j < 1024} {incr j} { + $master select [expr $j%16] + $master set $j somevalue px 10 + } + + after 20 + + # Wait until master has received ACK from replica. If the master thinks + # that any replica is lagging when it shuts down, master would send + # GETACK to the replicas, affecting the replication offset. + set offset [status $master master_repl_offset] + wait_for_condition 500 100 { + [string match "*slave0:*,offset=$offset,*" [$master info replication]] && + $offset == [status $replica master_repl_offset] && + $offset == [status $sub_replica master_repl_offset] + } else { + show_cluster_status + fail "Replicas and master offsets were unable to match *exactly*." + } + + set offset [status $master master_repl_offset] + $replica config resetstat + + catch { + # Unlike the test above, here we use SIGTERM, which behaves + # differently compared to SHUTDOWN NOW if there are lagging + # replicas. This is just to increase coverage and let each test use + # a different shutdown approach. In this case there are no lagging + # replicas though. + restart_server 0 true false + set master [srv 0 client] + } + wait_for_condition 50 1000 { + [status $replica master_link_status] eq {up} && + [status $sub_replica master_link_status] eq {up} + } else { + fail "Replicas didn't sync after master restart" + } + + set expired_offset [status $master repl_backlog_histlen] + # Stale keys expired and master_repl_offset grows correctly + assert {[status $master rdb_last_load_keys_expired] == 1024} + assert {[status $master master_repl_offset] == [expr $offset+$expired_offset]} + + # Partial resync after Master restart + assert {[status $master sync_partial_ok] == 1} + assert {[status $replica sync_partial_ok] == 1} + + set digest [$master debug digest] + assert {$digest eq [$replica debug digest]} + assert {$digest eq [$sub_replica debug digest]} + } + + test "PSYNC2: Full resync after Master restart when too many key expired" { + $master config set repl-backlog-size 16384 + $master config rewrite + + $master debug set-active-expire 0 + # Make sure replication backlog is full and will be trimmed. + for {set j 0} {$j < 2048} {incr j} { + $master select [expr $j%16] + $master set $j somevalue px 10 + } + + after 20 + + wait_for_condition 500 100 { + [status $master master_repl_offset] == [status $replica master_repl_offset] && + [status $master master_repl_offset] == [status $sub_replica master_repl_offset] + } else { + fail "Replicas and master offsets were unable to match *exactly*." + } + + $replica config resetstat + + catch { + # Unlike the test above, here we use SIGTERM. This is just to + # increase coverage and let each test use a different shutdown + # approach. + restart_server 0 true false + set master [srv 0 client] + } + wait_for_condition 50 1000 { + [status $replica master_link_status] eq {up} && + [status $sub_replica master_link_status] eq {up} + } else { + fail "Replicas didn't sync after master restart" + } + + # Replication backlog is full + assert {[status $master repl_backlog_first_byte_offset] > [status $master second_repl_offset]} + assert {[status $master sync_partial_ok] == 0} + assert {[status $master sync_full] == 1} + assert {[status $master rdb_last_load_keys_expired] == 2048} + assert {[status $replica sync_full] == 1} + + set digest [$master debug digest] + assert {$digest eq [$replica debug digest]} + assert {$digest eq [$sub_replica debug digest]} + } +}}} |