summaryrefslogtreecommitdiffstats
path: root/tests/cluster/tests/28-cluster-shards.tcl
diff options
context:
space:
mode:
Diffstat (limited to 'tests/cluster/tests/28-cluster-shards.tcl')
-rw-r--r--tests/cluster/tests/28-cluster-shards.tcl287
1 files changed, 287 insertions, 0 deletions
diff --git a/tests/cluster/tests/28-cluster-shards.tcl b/tests/cluster/tests/28-cluster-shards.tcl
new file mode 100644
index 0000000..f24b917
--- /dev/null
+++ b/tests/cluster/tests/28-cluster-shards.tcl
@@ -0,0 +1,287 @@
+source "../tests/includes/init-tests.tcl"
+
+# Initial slot distribution.
+set ::slot0 [list 0 1000 1002 5459 5461 5461 10926 10926]
+set ::slot1 [list 5460 5460 5462 10922 10925 10925]
+set ::slot2 [list 10923 10924 10927 16383]
+set ::slot3 [list 1001 1001]
+
+proc cluster_create_with_split_slots {masters replicas} {
+ for {set j 0} {$j < $masters} {incr j} {
+ R $j cluster ADDSLOTSRANGE {*}[set ::slot${j}]
+ }
+ if {$replicas} {
+ cluster_allocate_slaves $masters $replicas
+ }
+ set ::cluster_master_nodes $masters
+ set ::cluster_replica_nodes $replicas
+}
+
+# Get the node info with the specific node_id from the
+# given reference node. Valid type options are "node" and "shard"
+proc get_node_info_from_shard {id reference {type node}} {
+ set shards_response [R $reference CLUSTER SHARDS]
+ foreach shard_response $shards_response {
+ set nodes [dict get $shard_response nodes]
+ foreach node $nodes {
+ if {[dict get $node id] eq $id} {
+ if {$type eq "node"} {
+ return $node
+ } elseif {$type eq "shard"} {
+ return $shard_response
+ } else {
+ return {}
+ }
+ }
+ }
+ }
+ # No shard found, return nothing
+ return {}
+}
+
+proc cluster_ensure_master {id} {
+ if { [regexp "master" [R $id role]] == 0 } {
+ assert_equal {OK} [R $id CLUSTER FAILOVER]
+ wait_for_condition 50 100 {
+ [regexp "master" [R $id role]] == 1
+ } else {
+ fail "instance $id is not master"
+ }
+ }
+}
+
+test "Create a 8 nodes cluster with 4 shards" {
+ cluster_create_with_split_slots 4 4
+}
+
+test "Cluster should start ok" {
+ assert_cluster_state ok
+}
+
+test "Set cluster hostnames and verify they are propagated" {
+ for {set j 0} {$j < $::cluster_master_nodes + $::cluster_replica_nodes} {incr j} {
+ R $j config set cluster-announce-hostname "host-$j.com"
+ }
+
+ # Wait for everyone to agree about the state
+ wait_for_cluster_propagation
+}
+
+test "Verify information about the shards" {
+ set ids {}
+ for {set j 0} {$j < $::cluster_master_nodes + $::cluster_replica_nodes} {incr j} {
+ lappend ids [R $j CLUSTER MYID]
+ }
+ set slots [list $::slot0 $::slot1 $::slot2 $::slot3 $::slot0 $::slot1 $::slot2 $::slot3]
+
+ # Verify on each node (primary/replica), the response of the `CLUSTER SLOTS` command is consistent.
+ for {set ref 0} {$ref < $::cluster_master_nodes + $::cluster_replica_nodes} {incr ref} {
+ for {set i 0} {$i < $::cluster_master_nodes + $::cluster_replica_nodes} {incr i} {
+ assert_equal [lindex $slots $i] [dict get [get_node_info_from_shard [lindex $ids $i] $ref "shard"] slots]
+ assert_equal "host-$i.com" [dict get [get_node_info_from_shard [lindex $ids $i] $ref "node"] hostname]
+ assert_equal "127.0.0.1" [dict get [get_node_info_from_shard [lindex $ids $i] $ref "node"] ip]
+ # Default value of 'cluster-preferred-endpoint-type' is ip.
+ assert_equal "127.0.0.1" [dict get [get_node_info_from_shard [lindex $ids $i] $ref "node"] endpoint]
+
+ if {$::tls} {
+ assert_equal [get_instance_attrib redis $i plaintext-port] [dict get [get_node_info_from_shard [lindex $ids $i] $ref "node"] port]
+ assert_equal [get_instance_attrib redis $i port] [dict get [get_node_info_from_shard [lindex $ids $i] $ref "node"] tls-port]
+ } else {
+ assert_equal [get_instance_attrib redis $i port] [dict get [get_node_info_from_shard [lindex $ids $i] $ref "node"] port]
+ }
+
+ if {$i < 4} {
+ assert_equal "master" [dict get [get_node_info_from_shard [lindex $ids $i] $ref "node"] role]
+ assert_equal "online" [dict get [get_node_info_from_shard [lindex $ids $i] $ref "node"] health]
+ } else {
+ assert_equal "replica" [dict get [get_node_info_from_shard [lindex $ids $i] $ref "node"] role]
+ # Replica could be in online or loading
+ }
+ }
+ }
+}
+
+test "Verify no slot shard" {
+ # Node 8 has no slots assigned
+ set node_8_id [R 8 CLUSTER MYID]
+ assert_equal {} [dict get [get_node_info_from_shard $node_8_id 8 "shard"] slots]
+ assert_equal {} [dict get [get_node_info_from_shard $node_8_id 0 "shard"] slots]
+}
+
+set node_0_id [R 0 CLUSTER MYID]
+
+test "Kill a node and tell the replica to immediately takeover" {
+ kill_instance redis 0
+ R 4 cluster failover force
+}
+
+# Primary 0 node should report as fail, wait until the new primary acknowledges it.
+test "Verify health as fail for killed node" {
+ wait_for_condition 50 100 {
+ "fail" eq [dict get [get_node_info_from_shard $node_0_id 4 "node"] "health"]
+ } else {
+ fail "New primary never detected the node failed"
+ }
+}
+
+set primary_id 4
+set replica_id 0
+
+test "Restarting primary node" {
+ restart_instance redis $replica_id
+}
+
+test "Instance #0 gets converted into a replica" {
+ wait_for_condition 1000 50 {
+ [RI $replica_id role] eq {slave}
+ } else {
+ fail "Old primary was not converted into replica"
+ }
+}
+
+test "Test the replica reports a loading state while it's loading" {
+ # Test the command is good for verifying everything moves to a happy state
+ set replica_cluster_id [R $replica_id CLUSTER MYID]
+ wait_for_condition 50 1000 {
+ [dict get [get_node_info_from_shard $replica_cluster_id $primary_id "node"] health] eq "online"
+ } else {
+ fail "Replica never transitioned to online"
+ }
+
+ # Set 1 MB of data, so there is something to load on full sync
+ R $primary_id debug populate 1000 key 1000
+
+ # Kill replica client for primary and load new data to the primary
+ R $primary_id config set repl-backlog-size 100
+
+ # Set the key load delay so that it will take at least
+ # 2 seconds to fully load the data.
+ R $replica_id config set key-load-delay 4000
+
+ # Trigger event loop processing every 1024 bytes, this trigger
+ # allows us to send and receive cluster messages, so we are setting
+ # it low so that the cluster messages are sent more frequently.
+ R $replica_id config set loading-process-events-interval-bytes 1024
+
+ R $primary_id multi
+ R $primary_id client kill type replica
+ # populate the correct data
+ set num 100
+ set value [string repeat A 1024]
+ for {set j 0} {$j < $num} {incr j} {
+ # Use hashtag valid for shard #0
+ set key "{ch3}$j"
+ R $primary_id set $key $value
+ }
+ R $primary_id exec
+
+ # The replica should reconnect and start a full sync, it will gossip about it's health to the primary.
+ wait_for_condition 50 1000 {
+ "loading" eq [dict get [get_node_info_from_shard $replica_cluster_id $primary_id "node"] health]
+ } else {
+ fail "Replica never transitioned to loading"
+ }
+
+ # Verify cluster shards and cluster slots (deprecated) API responds while the node is loading data.
+ R $replica_id CLUSTER SHARDS
+ R $replica_id CLUSTER SLOTS
+
+ # Speed up the key loading and verify everything resumes
+ R $replica_id config set key-load-delay 0
+
+ wait_for_condition 50 1000 {
+ "online" eq [dict get [get_node_info_from_shard $replica_cluster_id $primary_id "node"] health]
+ } else {
+ fail "Replica never transitioned to online"
+ }
+
+ # Final sanity, the replica agrees it is online.
+ assert_equal "online" [dict get [get_node_info_from_shard $replica_cluster_id $replica_id "node"] health]
+}
+
+test "Regression test for a crash when calling SHARDS during handshake" {
+ # Reset forget a node, so we can use it to establish handshaking connections
+ set id [R 19 CLUSTER MYID]
+ R 19 CLUSTER RESET HARD
+ for {set i 0} {$i < 19} {incr i} {
+ R $i CLUSTER FORGET $id
+ }
+ R 19 cluster meet 127.0.0.1 [get_instance_attrib redis 0 port]
+ # This should line would previously crash, since all the outbound
+ # connections were in handshake state.
+ R 19 CLUSTER SHARDS
+}
+
+test "Cluster is up" {
+ assert_cluster_state ok
+}
+test "Shard ids are unique" {
+ set shard_ids {}
+ for {set i 0} {$i < 4} {incr i} {
+ set shard_id [R $i cluster myshardid]
+ assert_equal [dict exists $shard_ids $shard_id] 0
+ dict set shard_ids $shard_id 1
+ }
+}
+
+test "CLUSTER MYSHARDID reports same id for both primary and replica" {
+ for {set i 0} {$i < 4} {incr i} {
+ assert_equal [R $i cluster myshardid] [R [expr $i+4] cluster myshardid]
+ assert_equal [string length [R $i cluster myshardid]] 40
+ }
+}
+
+test "New replica receives primary's shard id" {
+ #find a primary
+ set id 0
+ for {} {$id < 8} {incr id} {
+ if {[regexp "master" [R $id role]]} {
+ break
+ }
+ }
+ assert_not_equal [R 8 cluster myshardid] [R $id cluster myshardid]
+ assert_equal {OK} [R 8 cluster replicate [R $id cluster myid]]
+ assert_equal [R 8 cluster myshardid] [R $id cluster myshardid]
+}
+
+test "CLUSTER MYSHARDID reports same shard id after shard restart" {
+ set node_ids {}
+ for {set i 0} {$i < 8} {incr i 4} {
+ dict set node_ids $i [R $i cluster myshardid]
+ kill_instance redis $i
+ wait_for_condition 50 100 {
+ [instance_is_killed redis $i]
+ } else {
+ fail "instance $i is not killed"
+ }
+ }
+ for {set i 0} {$i < 8} {incr i 4} {
+ restart_instance redis $i
+ }
+ assert_cluster_state ok
+ for {set i 0} {$i < 8} {incr i 4} {
+ assert_equal [dict get $node_ids $i] [R $i cluster myshardid]
+ }
+}
+
+test "CLUSTER MYSHARDID reports same shard id after cluster restart" {
+ set node_ids {}
+ for {set i 0} {$i < 8} {incr i} {
+ dict set node_ids $i [R $i cluster myshardid]
+ }
+ for {set i 0} {$i < 8} {incr i} {
+ kill_instance redis $i
+ wait_for_condition 50 100 {
+ [instance_is_killed redis $i]
+ } else {
+ fail "instance $i is not killed"
+ }
+ }
+ for {set i 0} {$i < 8} {incr i} {
+ restart_instance redis $i
+ }
+ assert_cluster_state ok
+ for {set i 0} {$i < 8} {incr i} {
+ assert_equal [dict get $node_ids $i] [R $i cluster myshardid]
+ }
+}