summaryrefslogtreecommitdiffstats
path: root/tests/cluster/tests/04-resharding.tcl
diff options
context:
space:
mode:
Diffstat (limited to 'tests/cluster/tests/04-resharding.tcl')
-rw-r--r--tests/cluster/tests/04-resharding.tcl196
1 files changed, 196 insertions, 0 deletions
diff --git a/tests/cluster/tests/04-resharding.tcl b/tests/cluster/tests/04-resharding.tcl
new file mode 100644
index 0000000..18a26bd
--- /dev/null
+++ b/tests/cluster/tests/04-resharding.tcl
@@ -0,0 +1,196 @@
+# Failover stress test.
+# In this test a different node is killed in a loop for N
+# iterations. The test checks that certain properties
+# are preserved across iterations.
+
+source "../tests/includes/init-tests.tcl"
+source "../../../tests/support/cli.tcl"
+
+test "Create a 5 nodes cluster" {
+ create_cluster 5 5
+}
+
+test "Cluster is up" {
+ assert_cluster_state ok
+}
+
+test "Enable AOF in all the instances" {
+ foreach_redis_id id {
+ R $id config set appendonly yes
+ # We use "appendfsync no" because it's fast but also guarantees that
+ # write(2) is performed before replying to client.
+ R $id config set appendfsync no
+ }
+
+ foreach_redis_id id {
+ wait_for_condition 1000 500 {
+ [RI $id aof_rewrite_in_progress] == 0 &&
+ [RI $id aof_enabled] == 1
+ } else {
+ fail "Failed to enable AOF on instance #$id"
+ }
+ }
+}
+
+# Return non-zero if the specified PID is about a process still in execution,
+# otherwise 0 is returned.
+proc process_is_running {pid} {
+ # PS should return with an error if PID is non existing,
+ # and catch will return non-zero. We want to return non-zero if
+ # the PID exists, so we invert the return value with expr not operator.
+ expr {![catch {exec ps -p $pid}]}
+}
+
+# Our resharding test performs the following actions:
+#
+# - N commands are sent to the cluster in the course of the test.
+# - Every command selects a random key from key:0 to key:MAX-1.
+# - The operation RPUSH key <randomvalue> is performed.
+# - Tcl remembers into an array all the values pushed to each list.
+# - After N/2 commands, the resharding process is started in background.
+# - The test continues while the resharding is in progress.
+# - At the end of the test, we wait for the resharding process to stop.
+# - Finally the keys are checked to see if they contain the value they should.
+
+set numkeys 50000
+set numops 200000
+set start_node_port [get_instance_attrib redis 0 port]
+set cluster [redis_cluster 127.0.0.1:$start_node_port]
+if {$::tls} {
+ # setup a non-TLS cluster client to the TLS cluster
+ set plaintext_port [get_instance_attrib redis 0 plaintext-port]
+ set cluster_plaintext [redis_cluster 127.0.0.1:$plaintext_port 0]
+ puts "Testing TLS cluster on start node 127.0.0.1:$start_node_port, plaintext port $plaintext_port"
+} else {
+ set cluster_plaintext $cluster
+ puts "Testing using non-TLS cluster"
+}
+catch {unset content}
+array set content {}
+set tribpid {}
+
+test "Cluster consistency during live resharding" {
+ set ele 0
+ for {set j 0} {$j < $numops} {incr j} {
+ # Trigger the resharding once we execute half the ops.
+ if {$tribpid ne {} &&
+ ($j % 10000) == 0 &&
+ ![process_is_running $tribpid]} {
+ set tribpid {}
+ }
+
+ if {$j >= $numops/2 && $tribpid eq {}} {
+ puts -nonewline "...Starting resharding..."
+ flush stdout
+ set target [dict get [get_myself [randomInt 5]] id]
+ set tribpid [lindex [exec \
+ ../../../src/redis-cli --cluster reshard \
+ 127.0.0.1:[get_instance_attrib redis 0 port] \
+ --cluster-from all \
+ --cluster-to $target \
+ --cluster-slots 100 \
+ --cluster-yes \
+ {*}[rediscli_tls_config "../../../tests"] \
+ | [info nameofexecutable] \
+ ../tests/helpers/onlydots.tcl \
+ &] 0]
+ }
+
+ # Write random data to random list.
+ set listid [randomInt $numkeys]
+ set key "key:$listid"
+ incr ele
+ # We write both with Lua scripts and with plain commands.
+ # This way we are able to stress Lua -> Redis command invocation
+ # as well, that has tests to prevent Lua to write into wrong
+ # hash slots.
+ # We also use both TLS and plaintext connections.
+ if {$listid % 3 == 0} {
+ $cluster rpush $key $ele
+ } elseif {$listid % 3 == 1} {
+ $cluster_plaintext rpush $key $ele
+ } else {
+ $cluster eval {redis.call("rpush",KEYS[1],ARGV[1])} 1 $key $ele
+ }
+ lappend content($key) $ele
+
+ if {($j % 1000) == 0} {
+ puts -nonewline W; flush stdout
+ }
+ }
+
+ # Wait for the resharding process to end
+ wait_for_condition 1000 500 {
+ [process_is_running $tribpid] == 0
+ } else {
+ fail "Resharding is not terminating after some time."
+ }
+
+}
+
+test "Verify $numkeys keys for consistency with logical content" {
+ # Check that the Redis Cluster content matches our logical content.
+ foreach {key value} [array get content] {
+ if {[$cluster lrange $key 0 -1] ne $value} {
+ fail "Key $key expected to hold '$value' but actual content is [$cluster lrange $key 0 -1]"
+ }
+ }
+}
+
+test "Terminate and restart all the instances" {
+ foreach_redis_id id {
+ # Stop AOF so that an initial AOFRW won't prevent the instance from terminating
+ R $id config set appendonly no
+ kill_instance redis $id
+ restart_instance redis $id
+ }
+}
+
+test "Cluster should eventually be up again" {
+ assert_cluster_state ok
+}
+
+test "Verify $numkeys keys after the restart" {
+ # Check that the Redis Cluster content matches our logical content.
+ foreach {key value} [array get content] {
+ if {[$cluster lrange $key 0 -1] ne $value} {
+ fail "Key $key expected to hold '$value' but actual content is [$cluster lrange $key 0 -1]"
+ }
+ }
+}
+
+test "Disable AOF in all the instances" {
+ foreach_redis_id id {
+ R $id config set appendonly no
+ }
+}
+
+test "Verify slaves consistency" {
+ set verified_masters 0
+ foreach_redis_id id {
+ set role [R $id role]
+ lassign $role myrole myoffset slaves
+ if {$myrole eq {slave}} continue
+ set masterport [get_instance_attrib redis $id port]
+ set masterdigest [R $id debug digest]
+ foreach_redis_id sid {
+ set srole [R $sid role]
+ if {[lindex $srole 0] eq {master}} continue
+ if {[lindex $srole 2] != $masterport} continue
+ wait_for_condition 1000 500 {
+ [R $sid debug digest] eq $masterdigest
+ } else {
+ fail "Master and slave data digest are different"
+ }
+ incr verified_masters
+ }
+ }
+ assert {$verified_masters >= 5}
+}
+
+test "Dump sanitization was skipped for migrations" {
+ set verified_masters 0
+ foreach_redis_id id {
+ assert {[RI $id dump_payload_sanitizations] == 0}
+ }
+}