summaryrefslogtreecommitdiffstats
path: root/tests/cluster/tests/03-failover-loop.tcl
diff options
context:
space:
mode:
Diffstat (limited to 'tests/cluster/tests/03-failover-loop.tcl')
-rw-r--r--tests/cluster/tests/03-failover-loop.tcl117
1 files changed, 117 insertions, 0 deletions
diff --git a/tests/cluster/tests/03-failover-loop.tcl b/tests/cluster/tests/03-failover-loop.tcl
new file mode 100644
index 0000000..46c22a9
--- /dev/null
+++ b/tests/cluster/tests/03-failover-loop.tcl
@@ -0,0 +1,117 @@
+# Failover stress test.
+# In this test a different node is killed in a loop for N
+# iterations. The test checks that certain properties
+# are preserved across iterations.
+
+source "../tests/includes/init-tests.tcl"
+
+test "Create a 5 nodes cluster" {
+ create_cluster 5 5
+}
+
+test "Cluster is up" {
+ assert_cluster_state ok
+}
+
+set iterations 20
+set cluster [redis_cluster 127.0.0.1:[get_instance_attrib redis 0 port]]
+
+while {[incr iterations -1]} {
+ set tokill [randomInt 10]
+ set other [expr {($tokill+1)%10}] ; # Some other instance.
+ set key [randstring 20 20 alpha]
+ set val [randstring 20 20 alpha]
+ set role [RI $tokill role]
+ if {$role eq {master}} {
+ set slave {}
+ set myid [dict get [get_myself $tokill] id]
+ foreach_redis_id id {
+ if {$id == $tokill} continue
+ if {[dict get [get_myself $id] slaveof] eq $myid} {
+ set slave $id
+ }
+ }
+ if {$slave eq {}} {
+ fail "Unable to retrieve slave's ID for master #$tokill"
+ }
+ }
+
+ puts "--- Iteration $iterations ---"
+
+ if {$role eq {master}} {
+ test "Wait for slave of #$tokill to sync" {
+ wait_for_condition 1000 50 {
+ [string match {*state=online*} [RI $tokill slave0]]
+ } else {
+ fail "Slave of node #$tokill is not ok"
+ }
+ }
+ set slave_config_epoch [CI $slave cluster_my_epoch]
+ }
+
+ test "Cluster is writable before failover" {
+ for {set i 0} {$i < 100} {incr i} {
+ catch {$cluster set $key:$i $val:$i} err
+ assert {$err eq {OK}}
+ }
+ # Wait for the write to propagate to the slave if we
+ # are going to kill a master.
+ if {$role eq {master}} {
+ R $tokill wait 1 20000
+ }
+ }
+
+ test "Terminating node #$tokill" {
+ # Stop AOF so that an initial AOFRW won't prevent the instance from terminating
+ R $tokill config set appendonly no
+ kill_instance redis $tokill
+ }
+
+ if {$role eq {master}} {
+ test "Wait failover by #$slave with old epoch $slave_config_epoch" {
+ wait_for_condition 1000 50 {
+ [CI $slave cluster_my_epoch] > $slave_config_epoch
+ } else {
+ fail "No failover detected, epoch is still [CI $slave cluster_my_epoch]"
+ }
+ }
+ }
+
+ test "Cluster should eventually be up again" {
+ assert_cluster_state ok
+ }
+
+ test "Cluster is writable again" {
+ for {set i 0} {$i < 100} {incr i} {
+ catch {$cluster set $key:$i:2 $val:$i:2} err
+ assert {$err eq {OK}}
+ }
+ }
+
+ test "Restarting node #$tokill" {
+ restart_instance redis $tokill
+ }
+
+ test "Instance #$tokill is now a slave" {
+ wait_for_condition 1000 50 {
+ [RI $tokill role] eq {slave}
+ } else {
+ fail "Restarted instance is not a slave"
+ }
+ }
+
+ test "We can read back the value we set before" {
+ for {set i 0} {$i < 100} {incr i} {
+ catch {$cluster get $key:$i} err
+ assert {$err eq "$val:$i"}
+ catch {$cluster get $key:$i:2} err
+ assert {$err eq "$val:$i:2"}
+ }
+ }
+}
+
+test "Post condition: current_epoch >= my_epoch everywhere" {
+ foreach_redis_id id {
+ assert {[CI $id cluster_current_epoch] >= [CI $id cluster_my_epoch]}
+ }
+}