diff options
Diffstat (limited to 'tests/cluster/tests/03-failover-loop.tcl')
-rw-r--r-- | tests/cluster/tests/03-failover-loop.tcl | 117 |
1 files changed, 117 insertions, 0 deletions
diff --git a/tests/cluster/tests/03-failover-loop.tcl b/tests/cluster/tests/03-failover-loop.tcl new file mode 100644 index 0000000..46c22a9 --- /dev/null +++ b/tests/cluster/tests/03-failover-loop.tcl @@ -0,0 +1,117 @@ +# Failover stress test. +# In this test a different node is killed in a loop for N +# iterations. The test checks that certain properties +# are preserved across iterations. + +source "../tests/includes/init-tests.tcl" + +test "Create a 5 nodes cluster" { + create_cluster 5 5 +} + +test "Cluster is up" { + assert_cluster_state ok +} + +set iterations 20 +set cluster [redis_cluster 127.0.0.1:[get_instance_attrib redis 0 port]] + +while {[incr iterations -1]} { + set tokill [randomInt 10] + set other [expr {($tokill+1)%10}] ; # Some other instance. + set key [randstring 20 20 alpha] + set val [randstring 20 20 alpha] + set role [RI $tokill role] + if {$role eq {master}} { + set slave {} + set myid [dict get [get_myself $tokill] id] + foreach_redis_id id { + if {$id == $tokill} continue + if {[dict get [get_myself $id] slaveof] eq $myid} { + set slave $id + } + } + if {$slave eq {}} { + fail "Unable to retrieve slave's ID for master #$tokill" + } + } + + puts "--- Iteration $iterations ---" + + if {$role eq {master}} { + test "Wait for slave of #$tokill to sync" { + wait_for_condition 1000 50 { + [string match {*state=online*} [RI $tokill slave0]] + } else { + fail "Slave of node #$tokill is not ok" + } + } + set slave_config_epoch [CI $slave cluster_my_epoch] + } + + test "Cluster is writable before failover" { + for {set i 0} {$i < 100} {incr i} { + catch {$cluster set $key:$i $val:$i} err + assert {$err eq {OK}} + } + # Wait for the write to propagate to the slave if we + # are going to kill a master. + if {$role eq {master}} { + R $tokill wait 1 20000 + } + } + + test "Terminating node #$tokill" { + # Stop AOF so that an initial AOFRW won't prevent the instance from terminating + R $tokill config set appendonly no + kill_instance redis $tokill + } + + if {$role eq {master}} { + test "Wait failover by #$slave with old epoch $slave_config_epoch" { + wait_for_condition 1000 50 { + [CI $slave cluster_my_epoch] > $slave_config_epoch + } else { + fail "No failover detected, epoch is still [CI $slave cluster_my_epoch]" + } + } + } + + test "Cluster should eventually be up again" { + assert_cluster_state ok + } + + test "Cluster is writable again" { + for {set i 0} {$i < 100} {incr i} { + catch {$cluster set $key:$i:2 $val:$i:2} err + assert {$err eq {OK}} + } + } + + test "Restarting node #$tokill" { + restart_instance redis $tokill + } + + test "Instance #$tokill is now a slave" { + wait_for_condition 1000 50 { + [RI $tokill role] eq {slave} + } else { + fail "Restarted instance is not a slave" + } + } + + test "We can read back the value we set before" { + for {set i 0} {$i < 100} {incr i} { + catch {$cluster get $key:$i} err + assert {$err eq "$val:$i"} + catch {$cluster get $key:$i:2} err + assert {$err eq "$val:$i:2"} + } + } +} + +test "Post condition: current_epoch >= my_epoch everywhere" { + foreach_redis_id id { + assert {[CI $id cluster_current_epoch] >= [CI $id cluster_my_epoch]} + } +} |