Adding upstream version 5:7.2.4.upstream/5%7.2.4

Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
author: Daniel Baumann <daniel.baumann@progress-linux.org> 2024-04-14 13:40:54 +0000
committer: Daniel Baumann <daniel.baumann@progress-linux.org> 2024-04-14 13:40:54 +0000
commit: 317c0644ccf108aa23ef3fd8358bd66c2840bfc0 (patch)
tree: c417b3d25c86b775989cb5ac042f37611b626c8a /tests/cluster
parent: Initial commit. (diff)
download: redis-317c0644ccf108aa23ef3fd8358bd66c2840bfc0.tar.xz
redis-317c0644ccf108aa23ef3fd8358bd66c2840bfc0.zip
34 files changed, 3294 insertions, 0 deletions
diff --git a/tests/cluster/cluster.tcl b/tests/cluster/cluster.tcl
new file mode 100644
index 0000000..9931eac
--- /dev/null
+++ b/tests/cluster/cluster.tcl
@@ -0,0 +1,222 @@
+# Cluster-specific test functions.
+#
+# Copyright (C) 2014 Salvatore Sanfilippo antirez@gmail.com
+# This software is released under the BSD License. See the COPYING file for
+# more information.
+
+# Track cluster configuration as created by create_cluster below
+set ::cluster_master_nodes 0
+set ::cluster_replica_nodes 0
+
+# Returns a parsed CLUSTER NODES output as a list of dictionaries. Optional status field
+# can be specified to only returns entries that match the provided status.
+proc get_cluster_nodes {id {status "*"}} {
+    set lines [split [R $id cluster nodes] "\r\n"]
+    set nodes {}
+    foreach l $lines {
+        set l [string trim $l]
+        if {$l eq {}} continue
+        set args [split $l]
+        set node [dict create \
+            id [lindex $args 0] \
+            addr [lindex $args 1] \
+            flags [split [lindex $args 2] ,] \
+            slaveof [lindex $args 3] \
+            ping_sent [lindex $args 4] \
+            pong_recv [lindex $args 5] \
+            config_epoch [lindex $args 6] \
+            linkstate [lindex $args 7] \
+            slots [lrange $args 8 end] \
+        ]
+        if {[string match $status [lindex $args 7]]} {
+            lappend nodes $node
+        }
+    }
+    return $nodes
+}
+
+# Test node for flag.
+proc has_flag {node flag} {
+    expr {[lsearch -exact [dict get $node flags] $flag] != -1}
+}
+
+# Returns the parsed myself node entry as a dictionary.
+proc get_myself id {
+    set nodes [get_cluster_nodes $id]
+    foreach n $nodes {
+        if {[has_flag $n myself]} {return $n}
+    }
+    return {}
+}
+
+# Get a specific node by ID by parsing the CLUSTER NODES output
+# of the instance Number 'instance_id'
+proc get_node_by_id {instance_id node_id} {
+    set nodes [get_cluster_nodes $instance_id]
+    foreach n $nodes {
+        if {[dict get $n id] eq $node_id} {return $n}
+    }
+    return {}
+}
+
+# Return the value of the specified CLUSTER INFO field.
+proc CI {n field} {
+    get_info_field [R $n cluster info] $field
+}
+
+# Return the value of the specified INFO field.
+proc s {n field} {
+    get_info_field [R $n info] $field
+}
+
+# Assuming nodes are reset, this function performs slots allocation.
+# Only the first 'n' nodes are used.
+proc cluster_allocate_slots {n} {
+    set slot 16383
+    while {$slot >= 0} {
+        # Allocate successive slots to random nodes.
+        set node [randomInt $n]
+        lappend slots_$node $slot
+        incr slot -1
+    }
+    for {set j 0} {$j < $n} {incr j} {
+        R $j cluster addslots {*}[set slots_${j}]
+    }
+}
+
+# Check that cluster nodes agree about "state", or raise an error.
+proc assert_cluster_state {state} {
+    foreach_redis_id id {
+        if {[instance_is_killed redis $id]} continue
+        wait_for_condition 1000 50 {
+            [CI $id cluster_state] eq $state
+        } else {
+            fail "Cluster node $id cluster_state:[CI $id cluster_state]"
+        }
+    }
+}
+
+# Search the first node starting from ID $first that is not
+# already configured as a slave.
+proc cluster_find_available_slave {first} {
+    foreach_redis_id id {
+        if {$id < $first} continue
+        if {[instance_is_killed redis $id]} continue
+        set me [get_myself $id]
+        if {[dict get $me slaveof] eq {-}} {return $id}
+    }
+    fail "No available slaves"
+}
+
+# Add 'slaves' slaves to a cluster composed of 'masters' masters.
+# It assumes that masters are allocated sequentially from instance ID 0
+# to N-1.
+proc cluster_allocate_slaves {masters slaves} {
+    for {set j 0} {$j < $slaves} {incr j} {
+        set master_id [expr {$j % $masters}]
+        set slave_id [cluster_find_available_slave $masters]
+        set master_myself [get_myself $master_id]
+        R $slave_id cluster replicate [dict get $master_myself id]
+    }
+}
+
+# Create a cluster composed of the specified number of masters and slaves.
+proc create_cluster {masters slaves} {
+    cluster_allocate_slots $masters
+    if {$slaves} {
+        cluster_allocate_slaves $masters $slaves
+    }
+    assert_cluster_state ok
+
+    set ::cluster_master_nodes $masters
+    set ::cluster_replica_nodes $slaves
+}
+
+proc cluster_allocate_with_continuous_slots {n} {
+    set slot 16383
+    set avg [expr ($slot+1) / $n]
+    while {$slot >= 0} {
+        set node [expr $slot/$avg >= $n ? $n-1 : $slot/$avg]
+        lappend slots_$node $slot
+        incr slot -1
+    }
+    for {set j 0} {$j < $n} {incr j} {
+        R $j cluster addslots {*}[set slots_${j}]
+    }
+}
+
+# Create a cluster composed of the specified number of masters and slaves,
+# but with a continuous slot range. 
+proc cluster_create_with_continuous_slots {masters slaves} {
+    cluster_allocate_with_continuous_slots $masters
+    if {$slaves} {
+        cluster_allocate_slaves $masters $slaves
+    }
+    assert_cluster_state ok
+
+    set ::cluster_master_nodes $masters
+    set ::cluster_replica_nodes $slaves
+}
+
+
+# Set the cluster node-timeout to all the reachalbe nodes.
+proc set_cluster_node_timeout {to} {
+    foreach_redis_id id {
+        catch {R $id CONFIG SET cluster-node-timeout $to}
+    }
+}
+
+# Check if the cluster is writable and readable. Use node "id"
+# as a starting point to talk with the cluster.
+proc cluster_write_test {id} {
+    set prefix [randstring 20 20 alpha]
+    set port [get_instance_attrib redis $id port]
+    set cluster [redis_cluster 127.0.0.1:$port]
+    for {set j 0} {$j < 100} {incr j} {
+        $cluster set key.$j $prefix.$j
+    }
+    for {set j 0} {$j < 100} {incr j} {
+        assert {[$cluster get key.$j] eq "$prefix.$j"}
+    }
+    $cluster close
+}
+
+# Check if cluster configuration is consistent.
+proc cluster_config_consistent {} {
+    for {set j 0} {$j < $::cluster_master_nodes + $::cluster_replica_nodes} {incr j} {
+        if {$j == 0} {
+            set base_cfg [R $j cluster slots]
+        } else {
+            set cfg [R $j cluster slots]
+            if {$cfg != $base_cfg} {
+                return 0
+            }
+        }
+    }
+
+    return 1
+}
+
+# Wait for cluster configuration to propagate and be consistent across nodes.
+proc wait_for_cluster_propagation {} {
+    wait_for_condition 50 100 {
+        [cluster_config_consistent] eq 1
+    } else {
+        fail "cluster config did not reach a consistent state"
+    }
+}
+
+# Check if cluster's view of hostnames is consistent
+proc are_hostnames_propagated {match_string} {
+    for {set j 0} {$j < $::cluster_master_nodes + $::cluster_replica_nodes} {incr j} {
+        set cfg [R $j cluster slots]
+        foreach node $cfg {
+            for {set i 2} {$i < [llength $node]} {incr i} {
+                if {! [string match $match_string [lindex [lindex [lindex $node $i] 3] 1]] } {
+                    return 0
+                }
+            }
+        }
+    }
+    return 1
+}
diff --git a/tests/cluster/run.tcl b/tests/cluster/run.tcl
new file mode 100644
index 0000000..86c5f58
--- /dev/null
+++ b/tests/cluster/run.tcl
@@ -0,0 +1,32 @@
+# Cluster test suite. Copyright (C) 2014 Salvatore Sanfilippo antirez@gmail.com
+# This software is released under the BSD License. See the COPYING file for
+# more information.
+
+cd tests/cluster
+source cluster.tcl
+source ../instances.tcl
+source ../../support/cluster.tcl ; # Redis Cluster client.
+
+set ::instances_count 20 ; # How many instances we use at max.
+set ::tlsdir "../../tls"
+
+proc main {} {
+    parse_options
+    spawn_instance redis $::redis_base_port $::instances_count {
+        "cluster-enabled yes"
+        "appendonly yes"
+        "enable-protected-configs yes"
+        "enable-debug-command yes"
+        "save ''"
+    }
+    run_tests
+    cleanup
+    end_tests
+}
+
+if {[catch main e]} {
+    puts $::errorInfo
+    if {$::pause_on_error} pause_on_error
+    cleanup
+    exit 1
+}
diff --git a/tests/cluster/tests/00-base.tcl b/tests/cluster/tests/00-base.tcl
new file mode 100644
index 0000000..693dded
--- /dev/null
+++ b/tests/cluster/tests/00-base.tcl
@@ -0,0 +1,89 @@
+# Check the basic monitoring and failover capabilities.
+
+source "../tests/includes/init-tests.tcl"
+
+if {$::simulate_error} {
+    test "This test will fail" {
+        fail "Simulated error"
+    }
+}
+
+test "Different nodes have different IDs" {
+    set ids {}
+    set numnodes 0
+    foreach_redis_id id {
+        incr numnodes
+        # Every node should just know itself.
+        set nodeid [dict get [get_myself $id] id]
+        assert {$nodeid ne {}}
+        lappend ids $nodeid
+    }
+    set numids [llength [lsort -unique $ids]]
+    assert {$numids == $numnodes}
+}
+
+test "It is possible to perform slot allocation" {
+    cluster_allocate_slots 5
+}
+
+test "After the join, every node gets a different config epoch" {
+    set trynum 60
+    while {[incr trynum -1] != 0} {
+        # We check that this condition is true for *all* the nodes.
+        set ok 1 ; # Will be set to 0 every time a node is not ok.
+        foreach_redis_id id {
+            set epochs {}
+            foreach n [get_cluster_nodes $id] {
+                lappend epochs [dict get $n config_epoch]
+            }
+            if {[lsort $epochs] != [lsort -unique $epochs]} {
+                set ok 0 ; # At least one collision!
+            }
+        }
+        if {$ok} break
+        after 1000
+        puts -nonewline .
+        flush stdout
+    }
+    if {$trynum == 0} {
+        fail "Config epoch conflict resolution is not working."
+    }
+}
+
+test "Nodes should report cluster_state is ok now" {
+    assert_cluster_state ok
+}
+
+test "Sanity for CLUSTER COUNTKEYSINSLOT" {
+    set reply [R 0 CLUSTER COUNTKEYSINSLOT 0]
+    assert {$reply eq 0}
+}
+
+test "It is possible to write and read from the cluster" {
+    cluster_write_test 0
+}
+
+test "CLUSTER RESET SOFT test" {
+    set last_epoch_node0 [get_info_field [R 0 cluster info] cluster_current_epoch]
+    R 0 FLUSHALL
+    R 0 CLUSTER RESET
+    assert {[get_info_field [R 0 cluster info] cluster_current_epoch] eq $last_epoch_node0}
+
+    set last_epoch_node1 [get_info_field [R 1 cluster info] cluster_current_epoch]
+    R 1 FLUSHALL
+    R 1 CLUSTER RESET SOFT
+    assert {[get_info_field [R 1 cluster info] cluster_current_epoch] eq $last_epoch_node1}
+}
+
+test "Coverage: CLUSTER HELP" {
+    assert_match "*CLUSTER <subcommand> *" [R 0 CLUSTER HELP]
+}
+
+test "Coverage: ASKING" {
+    assert_equal {OK} [R 0 ASKING]
+}
+
+test "CLUSTER SLAVES and CLUSTER REPLICAS with zero replicas" {
+    assert_equal {} [R 0 cluster slaves [R 0 CLUSTER MYID]]
+    assert_equal {} [R 0 cluster replicas [R 0 CLUSTER MYID]]
+}
diff --git a/tests/cluster/tests/01-faildet.tcl b/tests/cluster/tests/01-faildet.tcl
new file mode 100644
index 0000000..8fe87c9
--- /dev/null
+++ b/tests/cluster/tests/01-faildet.tcl
@@ -0,0 +1,38 @@
+# Check the basic monitoring and failover capabilities.
+
+source "../tests/includes/init-tests.tcl"
+
+test "Create a 5 nodes cluster" {
+    create_cluster 5 5
+}
+
+test "Cluster should start ok" {
+    assert_cluster_state ok
+}
+
+test "Killing two slave nodes" {
+    kill_instance redis 5
+    kill_instance redis 6
+}
+
+test "Cluster should be still up" {
+    assert_cluster_state ok
+}
+
+test "Killing one master node" {
+    kill_instance redis 0
+}
+
+# Note: the only slave of instance 0 is already down so no
+# failover is possible, that would change the state back to ok.
+test "Cluster should be down now" {
+    assert_cluster_state fail
+}
+
+test "Restarting master node" {
+    restart_instance redis 0
+}
+
+test "Cluster should be up again" {
+    assert_cluster_state ok
+}
diff --git a/tests/cluster/tests/02-failover.tcl b/tests/cluster/tests/02-failover.tcl
new file mode 100644
index 0000000..6b2fd09
--- /dev/null
+++ b/tests/cluster/tests/02-failover.tcl
@@ -0,0 +1,65 @@
+# Check the basic monitoring and failover capabilities.
+
+source "../tests/includes/init-tests.tcl"
+
+test "Create a 5 nodes cluster" {
+    create_cluster 5 5
+}
+
+test "Cluster is up" {
+    assert_cluster_state ok
+}
+
+test "Cluster is writable" {
+    cluster_write_test 0
+}
+
+test "Instance #5 is a slave" {
+    assert {[RI 5 role] eq {slave}}
+}
+
+test "Instance #5 synced with the master" {
+    wait_for_condition 1000 50 {
+        [RI 5 master_link_status] eq {up}
+    } else {
+        fail "Instance #5 master link status is not up"
+    }
+}
+
+set current_epoch [CI 1 cluster_current_epoch]
+
+test "Killing one master node" {
+    kill_instance redis 0
+}
+
+test "Wait for failover" {
+    wait_for_condition 1000 50 {
+        [CI 1 cluster_current_epoch] > $current_epoch
+    } else {
+        fail "No failover detected"
+    }
+}
+
+test "Cluster should eventually be up again" {
+    assert_cluster_state ok
+}
+
+test "Cluster is writable" {
+    cluster_write_test 1
+}
+
+test "Instance #5 is now a master" {
+    assert {[RI 5 role] eq {master}}
+}
+
+test "Restarting the previously killed master node" {
+    restart_instance redis 0
+}
+
+test "Instance #0 gets converted into a slave" {
+    wait_for_condition 1000 50 {
+        [RI 0 role] eq {slave}
+    } else {
+        fail "Old master was not converted into slave"
+    }
+}
diff --git a/tests/cluster/tests/03-failover-loop.tcl b/tests/cluster/tests/03-failover-loop.tcl
new file mode 100644
index 0000000..46c22a9
--- /dev/null
+++ b/tests/cluster/tests/03-failover-loop.tcl
@@ -0,0 +1,117 @@
+# Failover stress test.
+# In this test a different node is killed in a loop for N
+# iterations. The test checks that certain properties
+# are preserved across iterations.
+
+source "../tests/includes/init-tests.tcl"
+
+test "Create a 5 nodes cluster" {
+    create_cluster 5 5
+}
+
+test "Cluster is up" {
+    assert_cluster_state ok
+}
+
+set iterations 20
+set cluster [redis_cluster 127.0.0.1:[get_instance_attrib redis 0 port]]
+
+while {[incr iterations -1]} {
+    set tokill [randomInt 10]
+    set other [expr {($tokill+1)%10}] ; # Some other instance.
+    set key [randstring 20 20 alpha]
+    set val [randstring 20 20 alpha]
+    set role [RI $tokill role]
+    if {$role eq {master}} {
+        set slave {}
+        set myid [dict get [get_myself $tokill] id]
+        foreach_redis_id id {
+            if {$id == $tokill} continue
+            if {[dict get [get_myself $id] slaveof] eq $myid} {
+                set slave $id
+            }
+        }
+        if {$slave eq {}} {
+            fail "Unable to retrieve slave's ID for master #$tokill"
+        }
+    }
+
+    puts "--- Iteration $iterations ---"
+
+    if {$role eq {master}} {
+        test "Wait for slave of #$tokill to sync" {
+            wait_for_condition 1000 50 {
+                [string match {*state=online*} [RI $tokill slave0]]
+            } else {
+                fail "Slave of node #$tokill is not ok"
+            }
+        }
+        set slave_config_epoch [CI $slave cluster_my_epoch]
+    }
+
+    test "Cluster is writable before failover" {
+        for {set i 0} {$i < 100} {incr i} {
+            catch {$cluster set $key:$i $val:$i} err
+            assert {$err eq {OK}}
+        }
+        # Wait for the write to propagate to the slave if we
+        # are going to kill a master.
+        if {$role eq {master}} {
+            R $tokill wait 1 20000
+        }
+    }
+
+    test "Terminating node #$tokill" {
+        # Stop AOF so that an initial AOFRW won't prevent the instance from terminating
+        R $tokill config set appendonly no
+        kill_instance redis $tokill
+    }
+
+    if {$role eq {master}} {
+        test "Wait failover by #$slave with old epoch $slave_config_epoch" {
+            wait_for_condition 1000 50 {
+                [CI $slave cluster_my_epoch] > $slave_config_epoch
+            } else {
+                fail "No failover detected, epoch is still [CI $slave cluster_my_epoch]"
+            }
+        }
+    }
+
+    test "Cluster should eventually be up again" {
+        assert_cluster_state ok
+    }
+
+    test "Cluster is writable again" {
+        for {set i 0} {$i < 100} {incr i} {
+            catch {$cluster set $key:$i:2 $val:$i:2} err
+            assert {$err eq {OK}}
+        }
+    }
+
+    test "Restarting node #$tokill" {
+        restart_instance redis $tokill
+    }
+
+    test "Instance #$tokill is now a slave" {
+        wait_for_condition 1000 50 {
+            [RI $tokill role] eq {slave}
+        } else {
+            fail "Restarted instance is not a slave"
+        }
+    }
+
+    test "We can read back the value we set before" {
+        for {set i 0} {$i < 100} {incr i} {
+            catch {$cluster get $key:$i} err
+            assert {$err eq "$val:$i"}
+            catch {$cluster get $key:$i:2} err
+            assert {$err eq "$val:$i:2"}
+        }
+    }
+}
+
+test "Post condition: current_epoch >= my_epoch everywhere" {
+    foreach_redis_id id {
+        assert {[CI $id cluster_current_epoch] >= [CI $id cluster_my_epoch]}
+    }
+}
diff --git a/tests/cluster/tests/04-resharding.tcl b/tests/cluster/tests/04-resharding.tcl
new file mode 100644
index 0000000..18a26bd
--- /dev/null
+++ b/tests/cluster/tests/04-resharding.tcl
@@ -0,0 +1,196 @@
+# Failover stress test.
+# In this test a different node is killed in a loop for N
+# iterations. The test checks that certain properties
+# are preserved across iterations.
+
+source "../tests/includes/init-tests.tcl"
+source "../../../tests/support/cli.tcl"
+
+test "Create a 5 nodes cluster" {
+    create_cluster 5 5
+}
+
+test "Cluster is up" {
+    assert_cluster_state ok
+}
+
+test "Enable AOF in all the instances" {
+    foreach_redis_id id {
+        R $id config set appendonly yes
+        # We use "appendfsync no" because it's fast but also guarantees that
+        # write(2) is performed before replying to client.
+        R $id config set appendfsync no
+    }
+
+    foreach_redis_id id {
+        wait_for_condition 1000 500 {
+            [RI $id aof_rewrite_in_progress] == 0 &&
+            [RI $id aof_enabled] == 1
+        } else {
+            fail "Failed to enable AOF on instance #$id"
+        }
+    }
+}
+
+# Return non-zero if the specified PID is about a process still in execution,
+# otherwise 0 is returned.
+proc process_is_running {pid} {
+    # PS should return with an error if PID is non existing,
+    # and catch will return non-zero. We want to return non-zero if
+    # the PID exists, so we invert the return value with expr not operator.
+    expr {![catch {exec ps -p $pid}]}
+}
+
+# Our resharding test performs the following actions:
+#
+# - N commands are sent to the cluster in the course of the test.
+# - Every command selects a random key from key:0 to key:MAX-1.
+# - The operation RPUSH key <randomvalue> is performed.
+# - Tcl remembers into an array all the values pushed to each list.
+# - After N/2 commands, the resharding process is started in background.
+# - The test continues while the resharding is in progress.
+# - At the end of the test, we wait for the resharding process to stop.
+# - Finally the keys are checked to see if they contain the value they should.
+
+set numkeys 50000
+set numops 200000
+set start_node_port [get_instance_attrib redis 0 port]
+set cluster [redis_cluster 127.0.0.1:$start_node_port]
+if {$::tls} {
+    # setup a non-TLS cluster client to the TLS cluster
+    set plaintext_port [get_instance_attrib redis 0 plaintext-port]
+    set cluster_plaintext [redis_cluster 127.0.0.1:$plaintext_port 0]
+    puts "Testing TLS cluster on start node 127.0.0.1:$start_node_port, plaintext port $plaintext_port"
+} else {
+    set cluster_plaintext $cluster
+    puts "Testing using non-TLS cluster"
+}
+catch {unset content}
+array set content {}
+set tribpid {}
+
+test "Cluster consistency during live resharding" {
+    set ele 0
+    for {set j 0} {$j < $numops} {incr j} {
+        # Trigger the resharding once we execute half the ops.
+        if {$tribpid ne {} &&
+            ($j % 10000) == 0 &&
+            ![process_is_running $tribpid]} {
+            set tribpid {}
+        }
+
+        if {$j >= $numops/2 && $tribpid eq {}} {
+            puts -nonewline "...Starting resharding..."
+            flush stdout
+            set target [dict get [get_myself [randomInt 5]] id]
+            set tribpid [lindex [exec \
+                ../../../src/redis-cli --cluster reshard \
+                127.0.0.1:[get_instance_attrib redis 0 port] \
+                --cluster-from all \
+                --cluster-to $target \
+                --cluster-slots 100 \
+                --cluster-yes \
+                {*}[rediscli_tls_config "../../../tests"] \
+                | [info nameofexecutable] \
+                ../tests/helpers/onlydots.tcl \
+                &] 0]
+        }
+
+        # Write random data to random list.
+        set listid [randomInt $numkeys]
+        set key "key:$listid"
+        incr ele
+        # We write both with Lua scripts and with plain commands.
+        # This way we are able to stress Lua -> Redis command invocation
+        # as well, that has tests to prevent Lua to write into wrong
+        # hash slots.
+        # We also use both TLS and plaintext connections.
+        if {$listid % 3 == 0} {
+            $cluster rpush $key $ele
+        } elseif {$listid % 3 == 1} {
+            $cluster_plaintext rpush $key $ele
+        } else {
+            $cluster eval {redis.call("rpush",KEYS[1],ARGV[1])} 1 $key $ele
+        }
+        lappend content($key) $ele
+
+        if {($j % 1000) == 0} {
+            puts -nonewline W; flush stdout
+        }
+    }
+
+    # Wait for the resharding process to end
+    wait_for_condition 1000 500 {
+        [process_is_running $tribpid] == 0
+    } else {
+        fail "Resharding is not terminating after some time."
+    }
+
+}
+
+test "Verify $numkeys keys for consistency with logical content" {
+    # Check that the Redis Cluster content matches our logical content.
+    foreach {key value} [array get content] {
+        if {[$cluster lrange $key 0 -1] ne $value} {
+            fail "Key $key expected to hold '$value' but actual content is [$cluster lrange $key 0 -1]"
+        }
+    }
+}
+
+test "Terminate and restart all the instances" {
+    foreach_redis_id id {
+        # Stop AOF so that an initial AOFRW won't prevent the instance from terminating
+        R $id config set appendonly no
+        kill_instance redis $id
+        restart_instance redis $id
+    }
+}
+
+test "Cluster should eventually be up again" {
+    assert_cluster_state ok
+}
+
+test "Verify $numkeys keys after the restart" {
+    # Check that the Redis Cluster content matches our logical content.
+    foreach {key value} [array get content] {
+        if {[$cluster lrange $key 0 -1] ne $value} {
+            fail "Key $key expected to hold '$value' but actual content is [$cluster lrange $key 0 -1]"
+        }
+    }
+}
+
+test "Disable AOF in all the instances" {
+    foreach_redis_id id {
+        R $id config set appendonly no
+    }
+}
+
+test "Verify slaves consistency" {
+    set verified_masters 0
+    foreach_redis_id id {
+        set role [R $id role]
+        lassign $role myrole myoffset slaves
+        if {$myrole eq {slave}} continue
+        set masterport [get_instance_attrib redis $id port]
+        set masterdigest [R $id debug digest]
+        foreach_redis_id sid {
+            set srole [R $sid role]
+            if {[lindex $srole 0] eq {master}} continue
+            if {[lindex $srole 2] != $masterport} continue
+            wait_for_condition 1000 500 {
+                [R $sid debug digest] eq $masterdigest
+            } else {
+                fail "Master and slave data digest are different"
+            }
+            incr verified_masters
+        }
+    }
+    assert {$verified_masters >= 5}
+}
+
+test "Dump sanitization was skipped for migrations" {
+    set verified_masters 0
+    foreach_redis_id id {
+        assert {[RI $id dump_payload_sanitizations] == 0}
+    }
+}
diff --git a/tests/cluster/tests/05-slave-selection.tcl b/tests/cluster/tests/05-slave-selection.tcl
new file mode 100644
index 0000000..bdb20a3
--- /dev/null
+++ b/tests/cluster/tests/05-slave-selection.tcl
@@ -0,0 +1,188 @@
+# Slave selection test
+# Check the algorithm trying to pick the slave with the most complete history.
+
+source "../tests/includes/init-tests.tcl"
+
+# Create a cluster with 5 master and 10 slaves, so that we have 2
+# slaves for each master.
+test "Create a 5 nodes cluster" {
+    create_cluster 5 10
+}
+
+test "Cluster is up" {
+    assert_cluster_state ok
+}
+
+test "The first master has actually two slaves" {
+    wait_for_condition 1000 50 {
+        [llength [lindex [R 0 role] 2]] == 2
+        && [llength [R 0 cluster replicas [R 0 CLUSTER MYID]]] == 2
+    } else {
+        fail "replicas didn't connect"
+    }
+}
+
+test "CLUSTER SLAVES and CLUSTER REPLICAS output is consistent" {
+    # Because we already have command output that cover CLUSTER REPLICAS elsewhere,
+    # here we simply judge whether their output is consistent to cover CLUSTER SLAVES.
+    set res [R 0 cluster slaves [R 0 CLUSTER MYID]]
+    set res2 [R 0 cluster replicas [R 0 CLUSTER MYID]]
+    assert_equal $res $res2
+}
+
+test {Slaves of #0 are instance #5 and #10 as expected} {
+    set port0 [get_instance_attrib redis 0 port]
+    assert {[lindex [R 5 role] 2] == $port0}
+    assert {[lindex [R 10 role] 2] == $port0}
+}
+
+test "Instance #5 and #10 synced with the master" {
+    wait_for_condition 1000 50 {
+        [RI 5 master_link_status] eq {up} &&
+        [RI 10 master_link_status] eq {up}
+    } else {
+        fail "Instance #5 or #10 master link status is not up"
+    }
+}
+
+set cluster [redis_cluster 127.0.0.1:[get_instance_attrib redis 0 port]]
+
+test "Slaves are both able to receive and acknowledge writes" {
+    for {set j 0} {$j < 100} {incr j} {
+        $cluster set $j $j
+    }
+    assert {[R 0 wait 2 60000] == 2}
+}
+
+test "Write data while slave #10 is paused and can't receive it" {
+    # Stop the slave with a multi/exec transaction so that the master will
+    # be killed as soon as it can accept writes again.
+    R 10 multi
+    R 10 debug sleep 10
+    R 10 client kill 127.0.0.1:$port0
+    R 10 deferred 1
+    R 10 exec
+
+    # Write some data the slave can't receive.
+    for {set j 0} {$j < 100} {incr j} {
+        $cluster set $j $j
+    }
+
+    # Prevent the master from accepting new slaves.
+    # Use a large pause value since we'll kill it anyway.
+    R 0 CLIENT PAUSE 60000
+
+    # Wait for the slave to return available again
+    R 10 deferred 0
+    assert {[R 10 read] eq {OK OK}}
+
+    # Kill the master so that a reconnection will not be possible.
+    kill_instance redis 0
+}
+
+test "Wait for instance #5 (and not #10) to turn into a master" {
+    wait_for_condition 1000 50 {
+        [RI 5 role] eq {master}
+    } else {
+        fail "No failover detected"
+    }
+}
+
+test "Wait for the node #10 to return alive before ending the test" {
+    R 10 ping
+}
+
+test "Cluster should eventually be up again" {
+    assert_cluster_state ok
+}
+
+test "Node #10 should eventually replicate node #5" {
+    set port5 [get_instance_attrib redis 5 port]
+    wait_for_condition 1000 50 {
+        ([lindex [R 10 role] 2] == $port5) &&
+        ([lindex [R 10 role] 3] eq {connected})
+    } else {
+        fail "#10 didn't became slave of #5"
+    }
+}
+
+source "../tests/includes/init-tests.tcl"
+
+# Create a cluster with 3 master and 15 slaves, so that we have 5
+# slaves for eatch master.
+test "Create a 3 nodes cluster" {
+    create_cluster 3 15
+}
+
+test "Cluster is up" {
+    assert_cluster_state ok
+}
+
+test "The first master has actually 5 slaves" {
+    wait_for_condition 1000 50 {
+        [llength [lindex [R 0 role] 2]] == 5
+    } else {
+        fail "replicas didn't connect"
+    }
+}
+
+test {Slaves of #0 are instance #3, #6, #9, #12 and #15 as expected} {
+    set port0 [get_instance_attrib redis 0 port]
+    assert {[lindex [R 3 role] 2] == $port0}
+    assert {[lindex [R 6 role] 2] == $port0}
+    assert {[lindex [R 9 role] 2] == $port0}
+    assert {[lindex [R 12 role] 2] == $port0}
+    assert {[lindex [R 15 role] 2] == $port0}
+}
+
+test {Instance #3, #6, #9, #12 and #15 synced with the master} {
+    wait_for_condition 1000 50 {
+        [RI 3 master_link_status] eq {up} &&
+        [RI 6 master_link_status] eq {up} &&
+        [RI 9 master_link_status] eq {up} &&
+        [RI 12 master_link_status] eq {up} &&
+        [RI 15 master_link_status] eq {up}
+    } else {
+        fail "Instance #3 or #6 or #9 or #12 or #15 master link status is not up"
+    }
+}
+
+proc master_detected {instances} {
+    foreach instance [dict keys $instances] {
+        if {[RI $instance role] eq {master}} {
+            return true
+        }
+    }
+
+    return false
+}
+
+test "New Master down consecutively" {
+    set instances [dict create 0 1 3 1 6 1 9 1 12 1 15 1]
+
+    set loops [expr {[dict size $instances]-1}]
+    for {set i 0} {$i < $loops} {incr i} {
+        set master_id -1
+        foreach instance [dict keys $instances] {
+            if {[RI $instance role] eq {master}} {
+                set master_id $instance
+                break;
+            }
+        }
+
+        if {$master_id eq -1} {
+            fail "no master detected, #loop $i"
+        }
+
+        set instances [dict remove $instances $master_id]
+
+        kill_instance redis $master_id
+        wait_for_condition 1000 50 {
+            [master_detected $instances]
+        } else {
+            fail "No failover detected when master $master_id fails"
+        }
+
+        assert_cluster_state ok
+    }
+}
diff --git a/tests/cluster/tests/06-slave-stop-cond.tcl b/tests/cluster/tests/06-slave-stop-cond.tcl
new file mode 100644
index 0000000..80a2d17
--- /dev/null
+++ b/tests/cluster/tests/06-slave-stop-cond.tcl
@@ -0,0 +1,77 @@
+# Slave stop condition test
+# Check that if there is a disconnection time limit, the slave will not try
+# to failover its master.
+
+source "../tests/includes/init-tests.tcl"
+
+# Create a cluster with 5 master and 5 slaves.
+test "Create a 5 nodes cluster" {
+    create_cluster 5 5
+}
+
+test "Cluster is up" {
+    assert_cluster_state ok
+}
+
+test "The first master has actually one slave" {
+    wait_for_condition 1000 50 {
+        [llength [lindex [R 0 role] 2]] == 1
+    } else {
+        fail "replicas didn't connect"
+    }
+}
+
+test {Slaves of #0 is instance #5 as expected} {
+    set port0 [get_instance_attrib redis 0 port]
+    assert {[lindex [R 5 role] 2] == $port0}
+}
+
+test "Instance #5 synced with the master" {
+    wait_for_condition 1000 50 {
+        [RI 5 master_link_status] eq {up}
+    } else {
+        fail "Instance #5 master link status is not up"
+    }
+}
+
+test "Lower the slave validity factor of #5 to the value of 2" {
+    assert {[R 5 config set cluster-slave-validity-factor 2] eq {OK}}
+}
+
+test "Break master-slave link and prevent further reconnections" {
+    # Stop the slave with a multi/exec transaction so that the master will
+    # be killed as soon as it can accept writes again.
+    R 5 multi
+    R 5 client kill 127.0.0.1:$port0
+    # here we should sleep 6 or more seconds (node_timeout * slave_validity)
+    # but the actual validity time is actually incremented by the
+    # repl-ping-slave-period value which is 10 seconds by default. So we
+    # need to wait more than 16 seconds.
+    R 5 debug sleep 20
+    R 5 deferred 1
+    R 5 exec
+
+    # Prevent the master from accepting new slaves.
+    # Use a large pause value since we'll kill it anyway.
+    R 0 CLIENT PAUSE 60000
+
+    # Wait for the slave to return available again
+    R 5 deferred 0
+    assert {[R 5 read] eq {OK OK}}
+
+    # Kill the master so that a reconnection will not be possible.
+    kill_instance redis 0
+}
+
+test "Slave #5 is reachable and alive" {
+    assert {[R 5 ping] eq {PONG}}
+}
+
+test "Slave #5 should not be able to failover" {
+    after 10000
+    assert {[RI 5 role] eq {slave}}
+}
+
+test "Cluster should be down" {
+    assert_cluster_state fail
+}
diff --git a/tests/cluster/tests/07-replica-migration.tcl b/tests/cluster/tests/07-replica-migration.tcl
new file mode 100644
index 0000000..c4e9985
--- /dev/null
+++ b/tests/cluster/tests/07-replica-migration.tcl
@@ -0,0 +1,103 @@
+# Replica migration test.
+# Check that orphaned masters are joined by replicas of masters having
+# multiple replicas attached, according to the migration barrier settings.
+
+source "../tests/includes/init-tests.tcl"
+
+# Create a cluster with 5 master and 10 slaves, so that we have 2
+# slaves for each master.
+test "Create a 5 nodes cluster" {
+    create_cluster 5 10
+}
+
+test "Cluster is up" {
+    assert_cluster_state ok
+}
+
+test "Each master should have two replicas attached" {
+    foreach_redis_id id {
+        if {$id < 5} {
+            wait_for_condition 1000 50 {
+                [llength [lindex [R $id role] 2]] == 2
+            } else {
+                fail "Master #$id does not have 2 slaves as expected"
+            }
+        }
+    }
+}
+
+test "Killing all the slaves of master #0 and #1" {
+    kill_instance redis 5
+    kill_instance redis 10
+    kill_instance redis 6
+    kill_instance redis 11
+    after 4000
+}
+
+foreach_redis_id id {
+    if {$id < 5} {
+        test "Master #$id should have at least one replica" {
+            wait_for_condition 1000 50 {
+                [llength [lindex [R $id role] 2]] >= 1
+            } else {
+                fail "Master #$id has no replicas"
+            }
+        }
+    }
+}
+
+# Now test the migration to a master which used to be a slave, after
+# a failver.
+
+source "../tests/includes/init-tests.tcl"
+
+# Create a cluster with 5 master and 10 slaves, so that we have 2
+# slaves for each master.
+test "Create a 5 nodes cluster" {
+    create_cluster 5 10
+}
+
+test "Cluster is up" {
+    assert_cluster_state ok
+}
+
+test "Kill slave #7 of master #2. Only slave left is #12 now" {
+    kill_instance redis 7
+}
+
+set current_epoch [CI 1 cluster_current_epoch]
+
+test "Killing master node #2, #12 should failover" {
+    kill_instance redis 2
+}
+
+test "Wait for failover" {
+    wait_for_condition 1000 50 {
+        [CI 1 cluster_current_epoch] > $current_epoch
+    } else {
+        fail "No failover detected"
+    }
+}
+
+test "Cluster should eventually be up again" {
+    assert_cluster_state ok
+}
+
+test "Cluster is writable" {
+    cluster_write_test 1
+}
+
+test "Instance 12 is now a master without slaves" {
+    assert {[RI 12 role] eq {master}}
+}
+
+# The remaining instance is now without slaves. Some other slave
+# should migrate to it.
+
+test "Master #12 should get at least one migrated replica" {
+    wait_for_condition 1000 50 {
+        [llength [lindex [R 12 role] 2]] >= 1
+    } else {
+        fail "Master #12 has no replicas"
+    }
+}
diff --git a/tests/cluster/tests/08-update-msg.tcl b/tests/cluster/tests/08-update-msg.tcl
new file mode 100644
index 0000000..9011f32
--- /dev/null
+++ b/tests/cluster/tests/08-update-msg.tcl
@@ -0,0 +1,90 @@
+# Test UPDATE messages sent by other nodes when the currently authorirative
+# master is unavailable. The test is performed in the following steps:
+#
+# 1) Master goes down.
+# 2) Slave failover and becomes new master.
+# 3) New master is partitioned away.
+# 4) Old master returns.
+# 5) At this point we expect the old master to turn into a slave ASAP because
+#    of the UPDATE messages it will receive from the other nodes when its
+#    configuration will be found to be outdated.
+
+source "../tests/includes/init-tests.tcl"
+
+test "Create a 5 nodes cluster" {
+    create_cluster 5 5
+}
+
+test "Cluster is up" {
+    assert_cluster_state ok
+}
+
+test "Cluster is writable" {
+    cluster_write_test 0
+}
+
+test "Instance #5 is a slave" {
+    assert {[RI 5 role] eq {slave}}
+}
+
+test "Instance #5 synced with the master" {
+    wait_for_condition 1000 50 {
+        [RI 5 master_link_status] eq {up}
+    } else {
+        fail "Instance #5 master link status is not up"
+    }
+}
+
+set current_epoch [CI 1 cluster_current_epoch]
+
+test "Killing one master node" {
+    kill_instance redis 0
+}
+
+test "Wait for failover" {
+    wait_for_condition 1000 50 {
+        [CI 1 cluster_current_epoch] > $current_epoch
+    } else {
+        fail "No failover detected"
+    }
+}
+
+test "Cluster should eventually be up again" {
+    assert_cluster_state ok
+}
+
+test "Cluster is writable" {
+    cluster_write_test 1
+}
+
+test "Instance #5 is now a master" {
+    assert {[RI 5 role] eq {master}}
+}
+
+test "Killing the new master #5" {
+    kill_instance redis 5
+}
+
+test "Cluster should be down now" {
+    assert_cluster_state fail
+}
+
+test "Restarting the old master node" {
+    restart_instance redis 0
+}
+
+test "Instance #0 gets converted into a slave" {
+    wait_for_condition 1000 50 {
+        [RI 0 role] eq {slave}
+    } else {
+        fail "Old master was not converted into slave"
+    }
+}
+
+test "Restarting the new master node" {
+    restart_instance redis 5
+}
+
+test "Cluster is up again" {
+    assert_cluster_state ok
+}
diff --git a/tests/cluster/tests/09-pubsub.tcl b/tests/cluster/tests/09-pubsub.tcl
new file mode 100644
index 0000000..e62b91c
--- /dev/null
+++ b/tests/cluster/tests/09-pubsub.tcl
@@ -0,0 +1,40 @@
+# Test PUBLISH propagation across the cluster.
+
+source "../tests/includes/init-tests.tcl"
+
+test "Create a 5 nodes cluster" {
+    create_cluster 5 5
+}
+
+proc test_cluster_publish {instance instances} {
+    # Subscribe all the instances but the one we use to send.
+    for {set j 0} {$j < $instances} {incr j} {
+        if {$j != $instance} {
+            R $j deferred 1
+            R $j subscribe testchannel
+            R $j read; # Read the subscribe reply
+        }
+    }
+
+    set data [randomValue]
+    R $instance PUBLISH testchannel $data
+
+    # Read the message back from all the nodes.
+    for {set j 0} {$j < $instances} {incr j} {
+        if {$j != $instance} {
+            set msg [R $j read]
+            assert {$data eq [lindex $msg 2]}
+            R $j unsubscribe testchannel
+            R $j read; # Read the unsubscribe reply
+            R $j deferred 0
+        }
+    }
+}
+
+test "Test publishing to master" {
+    test_cluster_publish 0 10
+}
+
+test "Test publishing to slave" {
+    test_cluster_publish 5 10
+}
diff --git a/tests/cluster/tests/10-manual-failover.tcl b/tests/cluster/tests/10-manual-failover.tcl
new file mode 100644
index 0000000..5441b79
--- /dev/null
+++ b/tests/cluster/tests/10-manual-failover.tcl
@@ -0,0 +1,192 @@
+# Check the manual failover
+
+source "../tests/includes/init-tests.tcl"
+
+test "Create a 5 nodes cluster" {
+    create_cluster 5 5
+}
+
+test "Cluster is up" {
+    assert_cluster_state ok
+}
+
+test "Cluster is writable" {
+    cluster_write_test 0
+}
+
+test "Instance #5 is a slave" {
+    assert {[RI 5 role] eq {slave}}
+}
+
+test "Instance #5 synced with the master" {
+    wait_for_condition 1000 50 {
+        [RI 5 master_link_status] eq {up}
+    } else {
+        fail "Instance #5 master link status is not up"
+    }
+}
+
+set current_epoch [CI 1 cluster_current_epoch]
+
+set numkeys 50000
+set numops 10000
+set cluster [redis_cluster 127.0.0.1:[get_instance_attrib redis 0 port]]
+catch {unset content}
+array set content {}
+
+test "Send CLUSTER FAILOVER to #5, during load" {
+    for {set j 0} {$j < $numops} {incr j} {
+        # Write random data to random list.
+        set listid [randomInt $numkeys]
+        set key "key:$listid"
+        set ele [randomValue]
+        # We write both with Lua scripts and with plain commands.
+        # This way we are able to stress Lua -> Redis command invocation
+        # as well, that has tests to prevent Lua to write into wrong
+        # hash slots.
+        if {$listid % 2} {
+            $cluster rpush $key $ele
+        } else {
+           $cluster eval {redis.call("rpush",KEYS[1],ARGV[1])} 1 $key $ele
+        }
+        lappend content($key) $ele
+
+        if {($j % 1000) == 0} {
+            puts -nonewline W; flush stdout
+        }
+
+        if {$j == $numops/2} {R 5 cluster failover}
+    }
+}
+
+test "Wait for failover" {
+    wait_for_condition 1000 50 {
+        [CI 1 cluster_current_epoch] > $current_epoch
+    } else {
+        fail "No failover detected"
+    }
+}
+
+test "Cluster should eventually be up again" {
+    assert_cluster_state ok
+}
+
+test "Cluster is writable" {
+    cluster_write_test 1
+}
+
+test "Instance #5 is now a master" {
+    assert {[RI 5 role] eq {master}}
+}
+
+test "Verify $numkeys keys for consistency with logical content" {
+    # Check that the Redis Cluster content matches our logical content.
+    foreach {key value} [array get content] {
+        assert {[$cluster lrange $key 0 -1] eq $value}
+    }
+}
+
+test "Instance #0 gets converted into a slave" {
+    wait_for_condition 1000 50 {
+        [RI 0 role] eq {slave}
+    } else {
+        fail "Old master was not converted into slave"
+    }
+}
+
+## Check that manual failover does not happen if we can't talk with the master.
+
+source "../tests/includes/init-tests.tcl"
+
+test "Create a 5 nodes cluster" {
+    create_cluster 5 5
+}
+
+test "Cluster is up" {
+    assert_cluster_state ok
+}
+
+test "Cluster is writable" {
+    cluster_write_test 0
+}
+
+test "Instance #5 is a slave" {
+    assert {[RI 5 role] eq {slave}}
+}
+
+test "Instance #5 synced with the master" {
+    wait_for_condition 1000 50 {
+        [RI 5 master_link_status] eq {up}
+    } else {
+        fail "Instance #5 master link status is not up"
+    }
+}
+
+test "Make instance #0 unreachable without killing it" {
+    R 0 deferred 1
+    R 0 DEBUG SLEEP 10
+}
+
+test "Send CLUSTER FAILOVER to instance #5" {
+    R 5 cluster failover
+}
+
+test "Instance #5 is still a slave after some time (no failover)" {
+    after 5000
+    assert {[RI 5 role] eq {master}}
+}
+
+test "Wait for instance #0 to return back alive" {
+    R 0 deferred 0
+    assert {[R 0 read] eq {OK}}
+}
+
+## Check with "force" failover happens anyway.
+
+source "../tests/includes/init-tests.tcl"
+
+test "Create a 5 nodes cluster" {
+    create_cluster 5 5
+}
+
+test "Cluster is up" {
+    assert_cluster_state ok
+}
+
+test "Cluster is writable" {
+    cluster_write_test 0
+}
+
+test "Instance #5 is a slave" {
+    assert {[RI 5 role] eq {slave}}
+}
+
+test "Instance #5 synced with the master" {
+    wait_for_condition 1000 50 {
+        [RI 5 master_link_status] eq {up}
+    } else {
+        fail "Instance #5 master link status is not up"
+    }
+}
+
+test "Make instance #0 unreachable without killing it" {
+    R 0 deferred 1
+    R 0 DEBUG SLEEP 10
+}
+
+test "Send CLUSTER FAILOVER to instance #5" {
+    R 5 cluster failover force
+}
+
+test "Instance #5 is a master after some time" {
+    wait_for_condition 1000 50 {
+        [RI 5 role] eq {master}
+    } else {
+        fail "Instance #5 is not a master after some time regardless of FORCE"
+    }
+}
+
+test "Wait for instance #0 to return back alive" {
+    R 0 deferred 0
+    assert {[R 0 read] eq {OK}}
+}
diff --git a/tests/cluster/tests/11-manual-takeover.tcl b/tests/cluster/tests/11-manual-takeover.tcl
new file mode 100644
index 0000000..78a0f85
--- /dev/null
+++ b/tests/cluster/tests/11-manual-takeover.tcl
@@ -0,0 +1,71 @@
+# Manual takeover test
+
+source "../tests/includes/init-tests.tcl"
+
+test "Create a 5 nodes cluster" {
+    create_cluster 5 5
+}
+
+test "Cluster is up" {
+    assert_cluster_state ok
+}
+
+test "Cluster is writable" {
+    cluster_write_test 0
+}
+
+# For this test, disable replica failover until
+# all of the primaries are confirmed killed. Otherwise
+# there might be enough time to elect a replica.
+set replica_ids { 5 6 7 }
+foreach id $replica_ids {
+    R $id config set cluster-replica-no-failover yes
+}
+
+test "Killing majority of master nodes" {
+    kill_instance redis 0
+    kill_instance redis 1
+    kill_instance redis 2
+}
+
+foreach id $replica_ids {
+    R $id config set cluster-replica-no-failover no
+}
+
+test "Cluster should eventually be down" {
+    assert_cluster_state fail
+}
+
+test "Use takeover to bring slaves back" {
+    foreach id $replica_ids {
+        R $id cluster failover takeover
+    }
+}
+
+test "Cluster should eventually be up again" {
+    assert_cluster_state ok
+}
+
+test "Cluster is writable" {
+    cluster_write_test 4
+}
+
+test "Instance #5, #6, #7 are now masters" {
+    foreach id $replica_ids {
+        assert {[RI $id role] eq {master}}
+    }
+}
+
+test "Restarting the previously killed master nodes" {
+    restart_instance redis 0
+    restart_instance redis 1
+    restart_instance redis 2
+}
+
+test "Instance #0, #1, #2 gets converted into a slaves" {
+    wait_for_condition 1000 50 {
+        [RI 0 role] eq {slave} && [RI 1 role] eq {slave} && [RI 2 role] eq {slave}
+    } else {
+        fail "Old masters not converted into slaves"
+    }
+}
diff --git a/tests/cluster/tests/12-replica-migration-2.tcl b/tests/cluster/tests/12-replica-migration-2.tcl
new file mode 100644
index 0000000..ed68006
--- /dev/null
+++ b/tests/cluster/tests/12-replica-migration-2.tcl
@@ -0,0 +1,75 @@
+# Replica migration test #2.
+#
+# Check that the status of master that can be targeted by replica migration
+# is acquired again, after being getting slots again, in a cluster where the
+# other masters have slaves.
+
+source "../tests/includes/init-tests.tcl"
+source "../../../tests/support/cli.tcl"
+
+# Create a cluster with 5 master and 15 slaves, to make sure there are no
+# empty masters and make rebalancing simpler to handle during the test.
+test "Create a 5 nodes cluster" {
+    cluster_create_with_continuous_slots 5 15
+}
+
+test "Cluster is up" {
+    assert_cluster_state ok
+}
+
+test "Each master should have at least two replicas attached" {
+    foreach_redis_id id {
+        if {$id < 5} {
+            wait_for_condition 1000 50 {
+                [llength [lindex [R $id role] 2]] >= 2
+            } else {
+                fail "Master #$id does not have 2 slaves as expected"
+            }
+        }
+    }
+}
+
+test "Set allow-replica-migration yes" {
+    foreach_redis_id id {
+        R $id CONFIG SET cluster-allow-replica-migration yes
+    }
+}
+
+set master0_id [dict get [get_myself 0] id]
+test "Resharding all the master #0 slots away from it" {
+    set output [exec \
+        ../../../src/redis-cli --cluster rebalance \
+        127.0.0.1:[get_instance_attrib redis 0 port] \
+        {*}[rediscli_tls_config "../../../tests"] \
+        --cluster-weight ${master0_id}=0 >@ stdout ]
+
+}
+
+test "Master #0 who lost all slots should turn into a replica without replicas" {
+    wait_for_condition 1000 50 {
+        [RI 0 role] == "slave" && [RI 0 connected_slaves] == 0
+    } else {
+        puts [R 0 info replication]
+        fail "Master #0 didn't turn itself into a replica"
+    }
+}
+
+test "Resharding back some slot to master #0" {
+    # Wait for the cluster config to propagate before attempting a
+    # new resharding.
+    after 10000
+    set output [exec \
+        ../../../src/redis-cli --cluster rebalance \
+        127.0.0.1:[get_instance_attrib redis 0 port] \
+        {*}[rediscli_tls_config "../../../tests"] \
+        --cluster-weight ${master0_id}=.01 \
+        --cluster-use-empty-masters  >@ stdout]
+}
+
+test "Master #0 should re-acquire one or more replicas" {
+    wait_for_condition 1000 50 {
+        [llength [lindex [R 0 role] 2]] >= 1
+    } else {
+        fail "Master #0 has no has replicas"
+    }
+}
diff --git a/tests/cluster/tests/12.1-replica-migration-3.tcl b/tests/cluster/tests/12.1-replica-migration-3.tcl
new file mode 100644
index 0000000..790c732
--- /dev/null
+++ b/tests/cluster/tests/12.1-replica-migration-3.tcl
@@ -0,0 +1,65 @@
+# Replica migration test #2.
+#
+# Check that if 'cluster-allow-replica-migration' is set to 'no', slaves do not
+# migrate when master becomes empty.
+
+source "../tests/includes/init-tests.tcl"
+source "../tests/includes/utils.tcl"
+
+# Create a cluster with 5 master and 15 slaves, to make sure there are no
+# empty masters and make rebalancing simpler to handle during the test.
+test "Create a 5 nodes cluster" {
+    cluster_create_with_continuous_slots 5 15
+}
+
+test "Cluster is up" {
+    assert_cluster_state ok
+}
+
+test "Each master should have at least two replicas attached" {
+    foreach_redis_id id {
+        if {$id < 5} {
+            wait_for_condition 1000 50 {
+                [llength [lindex [R $id role] 2]] >= 2
+            } else {
+                fail "Master #$id does not have 2 slaves as expected"
+            }
+        }
+    }
+}
+
+test "Set allow-replica-migration no" {
+    foreach_redis_id id {
+        R $id CONFIG SET cluster-allow-replica-migration no
+    }
+}
+
+set master0_id [dict get [get_myself 0] id]
+test "Resharding all the master #0 slots away from it" {
+    set output [exec \
+        ../../../src/redis-cli --cluster rebalance \
+        127.0.0.1:[get_instance_attrib redis 0 port] \
+        {*}[rediscli_tls_config "../../../tests"] \
+        --cluster-weight ${master0_id}=0 >@ stdout ]
+}
+
+test "Wait cluster to be stable" {
+    wait_cluster_stable
+}
+
+test "Master #0 still should have its replicas" {
+    assert { [llength [lindex [R 0 role] 2]] >= 2 }
+}
+
+test "Each master should have at least two replicas attached" {
+    foreach_redis_id id {
+        if {$id < 5} {
+            wait_for_condition 1000 50 {
+                [llength [lindex [R $id role] 2]] >= 2
+            } else {
+                fail "Master #$id does not have 2 slaves as expected"
+            }
+        }
+    }
+}
+
diff --git a/tests/cluster/tests/13-no-failover-option.tcl b/tests/cluster/tests/13-no-failover-option.tcl
new file mode 100644
index 0000000..befa598
--- /dev/null
+++ b/tests/cluster/tests/13-no-failover-option.tcl
@@ -0,0 +1,61 @@
+# Check that the no-failover option works
+
+source "../tests/includes/init-tests.tcl"
+
+test "Create a 5 nodes cluster" {
+    create_cluster 5 5
+}
+
+test "Cluster is up" {
+    assert_cluster_state ok
+}
+
+test "Cluster is writable" {
+    cluster_write_test 0
+}
+
+test "Instance #5 is a slave" {
+    assert {[RI 5 role] eq {slave}}
+
+    # Configure it to never failover the master
+    R 5 CONFIG SET cluster-slave-no-failover yes
+}
+
+test "Instance #5 synced with the master" {
+    wait_for_condition 1000 50 {
+        [RI 5 master_link_status] eq {up}
+    } else {
+        fail "Instance #5 master link status is not up"
+    }
+}
+
+test "The nofailover flag is propagated" {
+    set slave5_id [dict get [get_myself 5] id]
+
+    foreach_redis_id id {
+        wait_for_condition 1000 50 {
+            [has_flag [get_node_by_id $id $slave5_id] nofailover]
+        } else {
+            fail "Instance $id can't see the nofailover flag of slave"
+        }
+    }
+}
+
+set current_epoch [CI 1 cluster_current_epoch]
+
+test "Killing one master node" {
+    kill_instance redis 0
+}
+
+test "Cluster should be still down after some time" {
+    after 10000
+    assert_cluster_state fail
+}
+
+test "Instance #5 is still a slave" {
+    assert {[RI 5 role] eq {slave}}
+}
+
+test "Restarting the previously killed master node" {
+    restart_instance redis 0
+}
diff --git a/tests/cluster/tests/14-consistency-check.tcl b/tests/cluster/tests/14-consistency-check.tcl
new file mode 100644
index 0000000..e3b9a19
--- /dev/null
+++ b/tests/cluster/tests/14-consistency-check.tcl
@@ -0,0 +1,124 @@
+source "../tests/includes/init-tests.tcl"
+source "../../../tests/support/cli.tcl"
+
+test "Create a 5 nodes cluster" {
+    create_cluster 5 5
+}
+
+test "Cluster should start ok" {
+    assert_cluster_state ok
+}
+
+test "Cluster is writable" {
+    cluster_write_test 0
+}
+
+proc find_non_empty_master {} {
+    set master_id_no {}
+    foreach_redis_id id {
+        if {[RI $id role] eq {master} && [R $id dbsize] > 0} {
+            set master_id_no $id
+            break
+        }
+    }
+    return $master_id_no
+}
+
+proc get_one_of_my_replica {id} {
+    wait_for_condition 1000 50 {
+        [llength [lindex [R $id role] 2]] > 0
+    } else {
+        fail "replicas didn't connect"
+    }
+    set replica_port [lindex [lindex [lindex [R $id role] 2] 0] 1]
+    set replica_id_num [get_instance_id_by_port redis $replica_port]
+    return $replica_id_num
+}
+
+proc cluster_write_keys_with_expire {id ttl} {
+    set prefix [randstring 20 20 alpha]
+    set port [get_instance_attrib redis $id port]
+    set cluster [redis_cluster 127.0.0.1:$port]
+    for {set j 100} {$j < 200} {incr j} {
+        $cluster setex key_expire.$j $ttl $prefix.$j
+    }
+    $cluster close
+}
+
+# make sure that replica who restarts from persistence will load keys
+# that have already expired, critical for correct execution of commands
+# that arrive from the master
+proc test_slave_load_expired_keys {aof} {
+    test "Slave expired keys is loaded when restarted: appendonly=$aof" {
+        set master_id [find_non_empty_master]
+        set replica_id [get_one_of_my_replica $master_id]
+
+        set master_dbsize_0 [R $master_id dbsize]
+        set replica_dbsize_0 [R $replica_id dbsize]
+        assert_equal $master_dbsize_0 $replica_dbsize_0
+
+        # config the replica persistency and rewrite the config file to survive restart
+        # note that this needs to be done before populating the volatile keys since
+        # that triggers and AOFRW, and we rather the AOF file to have 'SET PXAT' commands
+        # rather than an RDB with volatile keys
+        R $replica_id config set appendonly $aof
+        R $replica_id config rewrite
+
+        # fill with 100 keys with 3 second TTL
+        set data_ttl 3
+        cluster_write_keys_with_expire $master_id $data_ttl
+
+        # wait for replica to be in sync with master
+        wait_for_condition 500 10 {
+            [R $replica_id dbsize] eq [R $master_id dbsize]
+        } else {
+            fail "replica didn't sync"
+        }
+        
+        set replica_dbsize_1 [R $replica_id dbsize]
+        assert {$replica_dbsize_1 > $replica_dbsize_0}
+
+        # make replica create persistence file
+        if {$aof == "yes"} {
+            # we need to wait for the initial AOFRW to be done, otherwise
+            # kill_instance (which now uses SIGTERM will fail ("Writing initial AOF, can't exit")
+            wait_for_condition 100 10 {
+                [RI $replica_id aof_rewrite_scheduled] eq 0 &&
+                [RI $replica_id aof_rewrite_in_progress] eq 0
+            } else {
+                fail "AOFRW didn't finish"
+            }
+        } else {
+            R $replica_id save
+        }
+
+        # kill the replica (would stay down until re-started)
+        kill_instance redis $replica_id
+
+        # Make sure the master doesn't do active expire (sending DELs to the replica)
+        R $master_id DEBUG SET-ACTIVE-EXPIRE 0
+
+        # wait for all the keys to get logically expired
+        after [expr $data_ttl*1000]
+
+        # start the replica again (loading an RDB or AOF file)
+        restart_instance redis $replica_id
+
+        # make sure the keys are still there
+        set replica_dbsize_3 [R $replica_id dbsize]
+        assert {$replica_dbsize_3 > $replica_dbsize_0}
+        
+        # restore settings
+        R $master_id DEBUG SET-ACTIVE-EXPIRE 1
+
+        # wait for the master to expire all keys and replica to get the DELs
+        wait_for_condition 500 10 {
+            [R $replica_id dbsize] eq $master_dbsize_0
+        } else {
+            fail "keys didn't expire"
+        }
+    }
+}
+
+test_slave_load_expired_keys no
+test_slave_load_expired_keys yes
diff --git a/tests/cluster/tests/15-cluster-slots.tcl b/tests/cluster/tests/15-cluster-slots.tcl
new file mode 100644
index 0000000..892e904
--- /dev/null
+++ b/tests/cluster/tests/15-cluster-slots.tcl
@@ -0,0 +1,128 @@
+source "../tests/includes/init-tests.tcl"
+
+proc cluster_allocate_mixedSlots {n} {
+    set slot 16383
+    while {$slot >= 0} {
+        set node [expr {$slot % $n}]
+        lappend slots_$node $slot
+        incr slot -1
+    }
+    for {set j 0} {$j < $n} {incr j} {
+        R $j cluster addslots {*}[set slots_${j}]
+    }
+}
+
+proc create_cluster_with_mixedSlot {masters slaves} {
+    cluster_allocate_mixedSlots $masters
+    if {$slaves} {
+        cluster_allocate_slaves $masters $slaves
+    }
+    assert_cluster_state ok
+}
+
+test "Create a 5 nodes cluster" {
+    create_cluster_with_mixedSlot 5 15
+}
+
+test "Cluster is up" {
+    assert_cluster_state ok
+}
+
+test "Cluster is writable" {
+    cluster_write_test 0
+}
+
+test "Instance #5 is a slave" {
+    assert {[RI 5 role] eq {slave}}
+}
+
+test "client do not break when cluster slot" {
+    R 0 config set client-output-buffer-limit "normal 33554432 16777216 60"
+    if { [catch {R 0 cluster slots}] } {
+        fail "output overflow when cluster slots"
+    }
+}
+
+test "client can handle keys with hash tag" {
+    set cluster [redis_cluster 127.0.0.1:[get_instance_attrib redis 0 port]]
+    $cluster set foo{tag} bar
+    $cluster close
+}
+
+test "slot migration is valid from primary to another primary" {
+    set cluster [redis_cluster 127.0.0.1:[get_instance_attrib redis 0 port]]
+    set key order1
+    set slot [$cluster cluster keyslot $key]
+    array set nodefrom [$cluster masternode_for_slot $slot]
+    array set nodeto [$cluster masternode_notfor_slot $slot]
+
+    assert_equal {OK} [$nodefrom(link) cluster setslot $slot node $nodeto(id)]
+    assert_equal {OK} [$nodeto(link) cluster setslot $slot node $nodeto(id)]
+}
+
+test "slot migration is invalid from primary to replica" {
+    set cluster [redis_cluster 127.0.0.1:[get_instance_attrib redis 0 port]]
+    set key order1
+    set slot [$cluster cluster keyslot $key]
+    array set nodefrom [$cluster masternode_for_slot $slot]
+
+    # Get replica node serving slot.
+    set replicanodeinfo [$cluster cluster replicas $nodefrom(id)]
+    puts $replicanodeinfo
+    set args [split $replicanodeinfo " "]
+    set replicaid [lindex [split [lindex $args 0] \{] 1]
+    puts $replicaid
+
+    catch {[$nodefrom(link) cluster setslot $slot node $replicaid]} err
+    assert_match "*Target node is not a master" $err
+}
+
+proc count_bound_slots {n} {
+     set slot_count 0
+     foreach slot_range_mapping [$n cluster slots] {
+         set start_slot [lindex $slot_range_mapping 0]
+         set end_slot [lindex $slot_range_mapping 1]
+         incr slot_count [expr $end_slot - $start_slot + 1]
+     }
+     return $slot_count
+ }
+
+ test "slot must be unbound on the owner when it is deleted" {
+     set node0 [Rn 0]
+     set node1 [Rn 1]
+     assert {[count_bound_slots $node0] eq 16384}
+     assert {[count_bound_slots $node1] eq 16384}
+
+     set slot_to_delete 0
+     # Delete
+     $node0 CLUSTER DELSLOTS $slot_to_delete
+
+     # Verify
+     # The node that owns the slot must unbind the slot that was deleted
+     wait_for_condition 1000 50 {
+         [count_bound_slots $node0] == 16383
+     } else {
+         fail "Cluster slot deletion was not recorded on the node that owns the slot"
+     }
+
+     # We don't propagate slot deletion across all nodes in the cluster.
+     # This can lead to extra redirect before the clients find out that the slot is unbound.
+     wait_for_condition 1000 50 {
+         [count_bound_slots $node1] == 16384
+     } else {
+         fail "Cluster slot deletion should not be propagated to all nodes in the cluster"
+     }
+ }
+
+if {$::tls} {
+    test {CLUSTER SLOTS from non-TLS client in TLS cluster} {
+        set slots_tls [R 0 cluster slots]
+        set host [get_instance_attrib redis 0 host]
+        set plaintext_port [get_instance_attrib redis 0 plaintext-port]
+        set client_plain [redis $host $plaintext_port 0 0]
+        set slots_plain [$client_plain cluster slots]
+        $client_plain close
+        # Compare the ports in the first row
+        assert_no_match [lindex $slots_tls 0 3 1] [lindex $slots_plain 0 3 1]
+    }
+}
+\ No newline at end of file
diff --git a/tests/cluster/tests/16-transactions-on-replica.tcl b/tests/cluster/tests/16-transactions-on-replica.tcl
new file mode 100644
index 0000000..8bec06e
--- /dev/null
+++ b/tests/cluster/tests/16-transactions-on-replica.tcl
@@ -0,0 +1,85 @@
+# Check basic transactions on a replica.
+
+source "../tests/includes/init-tests.tcl"
+
+test "Create a primary with a replica" {
+    create_cluster 1 1
+}
+
+test "Cluster should start ok" {
+    assert_cluster_state ok
+}
+
+set primary [Rn 0]
+set replica [Rn 1]
+
+test "Can't read from replica without READONLY" {
+    $primary SET a 1
+    wait_for_ofs_sync $primary $replica
+    catch {$replica GET a} err
+    assert {[string range $err 0 4] eq {MOVED}}
+}
+
+test "Can't read from replica after READWRITE" {
+    $replica READWRITE
+    catch {$replica GET a} err
+    assert {[string range $err 0 4] eq {MOVED}}
+}
+
+test "Can read from replica after READONLY" {
+    $replica READONLY
+    assert {[$replica GET a] eq {1}}
+}
+
+test "Can perform HSET primary and HGET from replica" {
+    $primary HSET h a 1
+    $primary HSET h b 2
+    $primary HSET h c 3
+    wait_for_ofs_sync $primary $replica
+    assert {[$replica HGET h a] eq {1}}
+    assert {[$replica HGET h b] eq {2}}
+    assert {[$replica HGET h c] eq {3}}
+}
+
+test "Can MULTI-EXEC transaction of HGET operations from replica" {
+    $replica MULTI
+    assert {[$replica HGET h a] eq {QUEUED}}
+    assert {[$replica HGET h b] eq {QUEUED}}
+    assert {[$replica HGET h c] eq {QUEUED}}
+    assert {[$replica EXEC] eq {1 2 3}}
+}
+
+test "MULTI-EXEC with write operations is MOVED" {
+    $replica MULTI
+    catch {$replica HSET h b 4} err
+    assert {[string range $err 0 4] eq {MOVED}}
+    catch {$replica exec} err
+    assert {[string range $err 0 8] eq {EXECABORT}}
+}
+
+test "read-only blocking operations from replica" {
+    set rd [redis_deferring_client redis 1]
+    $rd readonly
+    $rd read
+    $rd XREAD BLOCK 0 STREAMS k 0
+
+    wait_for_condition 1000 50 {
+        [RI 1 blocked_clients] eq {1}
+    } else {
+        fail "client wasn't blocked"
+    }
+
+    $primary XADD k * foo bar
+    set res [$rd read]
+    set res [lindex [lindex [lindex [lindex $res 0] 1] 0] 1]
+    assert {$res eq {foo bar}}
+    $rd close
+}
+
+test "reply MOVED when eval from replica for update" {
+    catch {[$replica eval {#!lua
+        return redis.call('del','a')
+        } 1 a
+    ]} err
+    assert {[string range $err 0 4] eq {MOVED}}
+}
+\ No newline at end of file
diff --git a/tests/cluster/tests/17-diskless-load-swapdb.tcl b/tests/cluster/tests/17-diskless-load-swapdb.tcl
new file mode 100644
index 0000000..7a56ec7
--- /dev/null
+++ b/tests/cluster/tests/17-diskless-load-swapdb.tcl
@@ -0,0 +1,86 @@
+# Check that replica keys and keys to slots map are right after failing to diskless load using SWAPDB.
+
+source "../tests/includes/init-tests.tcl"
+
+test "Create a primary with a replica" {
+    create_cluster 1 1
+}
+
+test "Cluster should start ok" {
+    assert_cluster_state ok
+}
+
+test "Cluster is writable" {
+    cluster_write_test 0
+}
+
+test "Main db not affected when fail to diskless load" {
+    set master [Rn 0]
+    set replica [Rn 1]
+    set master_id 0
+    set replica_id 1
+
+    $replica READONLY
+    $replica config set repl-diskless-load swapdb
+    $replica config set appendonly no
+    $replica config set save ""
+    $replica config rewrite
+    $master config set repl-backlog-size 1024
+    $master config set repl-diskless-sync yes
+    $master config set repl-diskless-sync-delay 0
+    $master config set rdb-key-save-delay 10000
+    $master config set rdbcompression no
+    $master config set appendonly no
+    $master config set save ""
+
+    # Write a key that belongs to slot 0
+    set slot0_key "06S"
+    $master set $slot0_key 1
+    wait_for_ofs_sync $master $replica
+    assert_equal {1} [$replica get $slot0_key]
+    assert_equal $slot0_key [$replica CLUSTER GETKEYSINSLOT 0 1]
+
+    # Save an RDB and kill the replica
+    $replica save
+    kill_instance redis $replica_id
+
+    # Delete the key from master
+    $master del $slot0_key
+
+    # Replica must full sync with master when start because replication
+    # backlog size is very small, and dumping rdb will cost several seconds.
+    set num 10000
+    set value [string repeat A 1024]
+    set rd [redis_deferring_client redis $master_id]
+    for {set j 0} {$j < $num} {incr j} {
+        $rd set $j $value
+    }
+    for {set j 0} {$j < $num} {incr j} {
+        $rd read
+    }
+
+    # Start the replica again
+    restart_instance redis $replica_id
+    $replica READONLY
+
+    # Start full sync, wait till after db started loading in background
+    wait_for_condition 500 10 {
+        [s $replica_id async_loading] eq 1
+    } else {
+        fail "Fail to full sync"
+    }
+
+    # Kill master, abort full sync
+    kill_instance redis $master_id
+
+    # Start full sync, wait till the replica detects the disconnection
+    wait_for_condition 500 10 {
+        [s $replica_id async_loading] eq 0
+    } else {
+        fail "Fail to full sync"
+    }
+
+    # Replica keys and keys to slots map still both are right
+    assert_equal {1} [$replica get $slot0_key]
+    assert_equal $slot0_key [$replica CLUSTER GETKEYSINSLOT 0 1]
+}
diff --git a/tests/cluster/tests/18-info.tcl b/tests/cluster/tests/18-info.tcl
new file mode 100644
index 0000000..68c62d3
--- /dev/null
+++ b/tests/cluster/tests/18-info.tcl
@@ -0,0 +1,45 @@
+# Check cluster info stats
+
+source "../tests/includes/init-tests.tcl"
+
+test "Create a primary with a replica" {
+    create_cluster 2 0
+}
+
+test "Cluster should start ok" {
+    assert_cluster_state ok
+}
+
+set primary1 [Rn 0]
+set primary2 [Rn 1]
+
+proc cmdstat {instance cmd} {
+    return [cmdrstat $cmd $instance]
+}
+
+proc errorstat {instance cmd} {
+    return [errorrstat $cmd $instance]
+}
+
+test "errorstats: rejected call due to MOVED Redirection" {
+    $primary1 config resetstat
+    $primary2 config resetstat
+    assert_match {} [errorstat $primary1 MOVED]
+    assert_match {} [errorstat $primary2 MOVED]
+    # we know that one will have a MOVED reply and one will succeed
+    catch {$primary1 set key b} replyP1
+    catch {$primary2 set key b} replyP2
+    # sort servers so we know which one failed
+    if {$replyP1 eq {OK}} {
+        assert_match {MOVED*} $replyP2
+        set pok $primary1
+        set perr $primary2
+    } else {
+        assert_match {MOVED*} $replyP1
+        set pok $primary2
+        set perr $primary1
+    }
+    assert_match {} [errorstat $pok MOVED]
+    assert_match {*count=1*} [errorstat $perr MOVED]
+    assert_match {*calls=0,*,rejected_calls=1,failed_calls=0} [cmdstat $perr set]
+}
diff --git a/tests/cluster/tests/19-cluster-nodes-slots.tcl b/tests/cluster/tests/19-cluster-nodes-slots.tcl
new file mode 100644
index 0000000..77faec9
--- /dev/null
+++ b/tests/cluster/tests/19-cluster-nodes-slots.tcl
@@ -0,0 +1,50 @@
+# Optimize CLUSTER NODES command by generating all nodes slot topology firstly
+
+source "../tests/includes/init-tests.tcl"
+
+test "Create a 2 nodes cluster" {
+    cluster_create_with_continuous_slots 2 2
+}
+
+test "Cluster should start ok" {
+    assert_cluster_state ok
+}
+
+set master1 [Rn 0]
+set master2 [Rn 1]
+
+test "Continuous slots distribution" {
+    assert_match "* 0-8191*" [$master1 CLUSTER NODES]
+    assert_match "* 8192-16383*" [$master2 CLUSTER NODES]
+    assert_match "*0 8191*" [$master1 CLUSTER SLOTS]
+    assert_match "*8192 16383*" [$master2 CLUSTER SLOTS]
+
+    $master1 CLUSTER DELSLOTS 4096
+    assert_match "* 0-4095 4097-8191*" [$master1 CLUSTER NODES]
+    assert_match "*0 4095*4097 8191*" [$master1 CLUSTER SLOTS]
+
+
+    $master2 CLUSTER DELSLOTS 12288
+    assert_match "* 8192-12287 12289-16383*" [$master2 CLUSTER NODES]
+    assert_match "*8192 12287*12289 16383*" [$master2 CLUSTER SLOTS]
+}
+
+test "Discontinuous slots distribution" {
+    # Remove middle slots
+    $master1 CLUSTER DELSLOTS 4092 4094
+    assert_match "* 0-4091 4093 4095 4097-8191*" [$master1 CLUSTER NODES]
+    assert_match "*0 4091*4093 4093*4095 4095*4097 8191*" [$master1 CLUSTER SLOTS]
+    $master2 CLUSTER DELSLOTS 12284 12286
+    assert_match "* 8192-12283 12285 12287 12289-16383*" [$master2 CLUSTER NODES]
+    assert_match "*8192 12283*12285 12285*12287 12287*12289 16383*" [$master2 CLUSTER SLOTS]
+
+    # Remove head slots
+    $master1 CLUSTER DELSLOTS 0 2
+    assert_match "* 1 3-4091 4093 4095 4097-8191*" [$master1 CLUSTER NODES]
+    assert_match "*1 1*3 4091*4093 4093*4095 4095*4097 8191*" [$master1 CLUSTER SLOTS]
+
+    # Remove tail slots
+    $master2 CLUSTER DELSLOTS 16380 16382 16383
+    assert_match "* 8192-12283 12285 12287 12289-16379 16381*" [$master2 CLUSTER NODES]
+    assert_match "*8192 12283*12285 12285*12287 12287*12289 16379*16381 16381*" [$master2 CLUSTER SLOTS]
+}
diff --git a/tests/cluster/tests/20-half-migrated-slot.tcl b/tests/cluster/tests/20-half-migrated-slot.tcl
new file mode 100644
index 0000000..229b3a8
--- /dev/null
+++ b/tests/cluster/tests/20-half-migrated-slot.tcl
@@ -0,0 +1,98 @@
+# Tests for fixing migrating slot at all stages:
+# 1. when migration is half inited on "migrating" node
+# 2. when migration is half inited on "importing" node
+# 3. migration inited, but not finished
+# 4. migration is half finished on "migrating" node
+# 5. migration is half finished on "importing" node
+
+# TODO: Test is currently disabled until it is stabilized (fixing the test
+# itself or real issues in Redis).
+
+if {false} {
+source "../tests/includes/init-tests.tcl"
+source "../tests/includes/utils.tcl"
+
+test "Create a 2 nodes cluster" {
+    create_cluster 2 0
+    config_set_all_nodes cluster-allow-replica-migration no
+}
+
+test "Cluster is up" {
+    assert_cluster_state ok
+}
+
+set cluster [redis_cluster 127.0.0.1:[get_instance_attrib redis 0 port]]
+catch {unset nodefrom}
+catch {unset nodeto}
+
+proc reset_cluster {} {
+    uplevel 1 {
+        $cluster refresh_nodes_map
+        array set nodefrom [$cluster masternode_for_slot 609]
+        array set nodeto [$cluster masternode_notfor_slot 609]
+    }
+}
+
+reset_cluster
+
+$cluster set aga xyz
+
+test "Half init migration in 'migrating' is fixable" {
+    assert_equal {OK} [$nodefrom(link) cluster setslot 609 migrating $nodeto(id)]
+    fix_cluster $nodefrom(addr)
+    assert_equal "xyz" [$cluster get aga]
+}
+
+test "Half init migration in 'importing' is fixable" {
+    assert_equal {OK} [$nodeto(link) cluster setslot 609 importing $nodefrom(id)]
+    fix_cluster $nodefrom(addr)
+    assert_equal "xyz" [$cluster get aga]
+}
+
+test "Init migration and move key" {
+    assert_equal {OK} [$nodefrom(link) cluster setslot 609 migrating $nodeto(id)]
+    assert_equal {OK} [$nodeto(link) cluster setslot 609 importing $nodefrom(id)]
+    assert_equal {OK} [$nodefrom(link) migrate $nodeto(host) $nodeto(port) aga 0 10000]
+    wait_for_cluster_propagation
+    assert_equal "xyz" [$cluster get aga]
+    fix_cluster $nodefrom(addr)
+    assert_equal "xyz" [$cluster get aga]
+}
+
+reset_cluster
+
+test "Move key again" {
+    wait_for_cluster_propagation
+    assert_equal {OK} [$nodefrom(link) cluster setslot 609 migrating $nodeto(id)]
+    assert_equal {OK} [$nodeto(link) cluster setslot 609 importing $nodefrom(id)]
+    assert_equal {OK} [$nodefrom(link) migrate $nodeto(host) $nodeto(port) aga 0 10000]
+    wait_for_cluster_propagation
+    assert_equal "xyz" [$cluster get aga]
+}
+
+test "Half-finish migration" {
+    # half finish migration on 'migrating' node
+    assert_equal {OK} [$nodefrom(link) cluster setslot 609 node $nodeto(id)]
+    fix_cluster $nodefrom(addr)
+    assert_equal "xyz" [$cluster get aga]
+}
+
+reset_cluster
+
+test "Move key back" {
+    # 'aga' key is in 609 slot
+    assert_equal {OK} [$nodefrom(link) cluster setslot 609 migrating $nodeto(id)]
+    assert_equal {OK} [$nodeto(link) cluster setslot 609 importing $nodefrom(id)]
+    assert_equal {OK} [$nodefrom(link) migrate $nodeto(host) $nodeto(port) aga 0 10000]
+    assert_equal "xyz" [$cluster get aga]
+}
+
+test "Half-finish importing" {
+    # Now we half finish 'importing' node
+    assert_equal {OK} [$nodeto(link) cluster setslot 609 node $nodeto(id)]
+    fix_cluster $nodefrom(addr)
+    assert_equal "xyz" [$cluster get aga]
+}
+
+config_set_all_nodes cluster-allow-replica-migration yes
+}
diff --git a/tests/cluster/tests/21-many-slot-migration.tcl b/tests/cluster/tests/21-many-slot-migration.tcl
new file mode 100644
index 0000000..1ac73dc
--- /dev/null
+++ b/tests/cluster/tests/21-many-slot-migration.tcl
@@ -0,0 +1,64 @@
+# Tests for many simultaneous migrations.
+
+# TODO: Test is currently disabled until it is stabilized (fixing the test
+# itself or real issues in Redis).
+
+if {false} {
+
+source "../tests/includes/init-tests.tcl"
+source "../tests/includes/utils.tcl"
+
+# TODO: This test currently runs without replicas, as failovers (which may
+# happen on lower-end CI platforms) are still not handled properly by the
+# cluster during slot migration (related to #6339).
+
+test "Create a 10 nodes cluster" {
+    create_cluster 10 0
+    config_set_all_nodes cluster-allow-replica-migration no
+}
+
+test "Cluster is up" {
+    assert_cluster_state ok
+}
+
+set cluster [redis_cluster 127.0.0.1:[get_instance_attrib redis 0 port]]
+catch {unset nodefrom}
+catch {unset nodeto}
+
+$cluster refresh_nodes_map
+
+test "Set many keys" {
+    for {set i 0} {$i < 40000} {incr i} {
+        $cluster set key:$i val:$i
+    }
+}
+
+test "Keys are accessible" {
+    for {set i 0} {$i < 40000} {incr i} {
+        assert { [$cluster get key:$i] eq "val:$i" }
+    }
+}
+
+test "Init migration of many slots" {
+    for {set slot 0} {$slot < 1000} {incr slot} {
+        array set nodefrom [$cluster masternode_for_slot $slot]
+        array set nodeto [$cluster masternode_notfor_slot $slot]
+
+        $nodefrom(link) cluster setslot $slot migrating $nodeto(id)
+        $nodeto(link) cluster setslot $slot importing $nodefrom(id)
+    }
+}
+
+test "Fix cluster" {
+    wait_for_cluster_propagation
+    fix_cluster $nodefrom(addr)
+}
+
+test "Keys are accessible" {
+    for {set i 0} {$i < 40000} {incr i} {
+        assert { [$cluster get key:$i] eq "val:$i" }
+    }
+}
+
+config_set_all_nodes cluster-allow-replica-migration yes
+}
diff --git a/tests/cluster/tests/22-replica-in-sync.tcl b/tests/cluster/tests/22-replica-in-sync.tcl
new file mode 100644
index 0000000..b5645aa
--- /dev/null
+++ b/tests/cluster/tests/22-replica-in-sync.tcl
@@ -0,0 +1,146 @@
+source "../tests/includes/init-tests.tcl"
+
+test "Create a 1 node cluster" {
+    create_cluster 1 0
+}
+
+test "Cluster is up" {
+    assert_cluster_state ok
+}
+
+test "Cluster is writable" {
+    cluster_write_test 0
+}
+
+proc is_in_slots {master_id replica} {
+    set slots [R $master_id cluster slots]
+    set found_position [string first $replica $slots]
+    set result [expr {$found_position != -1}]
+    return $result
+}
+
+proc is_replica_online {info_repl} {
+    set found_position [string first "state=online" $info_repl]
+    set result [expr {$found_position != -1}]
+    return $result
+}
+
+proc get_last_pong_time {node_id target_cid} {
+    foreach item [split [R $node_id cluster nodes] \n] {
+        set args [split $item " "]
+        if {[lindex $args 0] eq $target_cid} {
+            return [lindex $args 5]
+        }
+    }
+    fail "Target node ID was not present"
+}
+
+set master_id 0
+
+test "Fill up primary with data" {
+    # Set 1 MB of data
+    R $master_id debug populate 1000 key 1000
+}
+
+test "Add new node as replica" {
+    set replica_id 1
+    set replica [R $replica_id CLUSTER MYID]
+    R $replica_id cluster replicate [R $master_id CLUSTER MYID]
+}
+
+test "Check digest and replica state" {
+    wait_for_condition 1000 50 {
+        [is_in_slots $master_id $replica]
+    } else {
+        fail "New replica didn't appear in the slots"
+    }
+
+    wait_for_condition 100 50 {
+        [is_replica_online [R $master_id info replication]]
+    } else {
+        fail "Replica is down for too long"
+    }
+    set replica_digest [R $replica_id debug digest]
+    assert {$replica_digest ne 0}
+}
+
+test "Replica in loading state is hidden" {
+    # Kill replica client for master and load new data to the primary
+    R $master_id config set repl-backlog-size 100
+
+    # Set the key load delay so that it will take at least
+    # 2 seconds to fully load the data.
+    R $replica_id config set key-load-delay 4000
+
+    # Trigger event loop processing every 1024 bytes, this trigger
+    # allows us to send and receive cluster messages, so we are setting
+    # it low so that the cluster messages are sent more frequently.
+    R $replica_id config set loading-process-events-interval-bytes 1024
+
+    R $master_id multi
+    R $master_id client kill type replica
+    set num 100
+    set value [string repeat A 1024]
+    for {set j 0} {$j < $num} {incr j} {
+        set key "{0}"
+        append key $j
+        R $master_id set $key $value
+    }
+    R $master_id exec
+
+    # The master will be the last to know the replica
+    # is loading, so we will wait on that and assert
+    # the replica is loading afterwards. 
+    wait_for_condition 100 50 {
+        ![is_in_slots $master_id $replica]
+    } else {
+        fail "Replica was always present in cluster slots"
+    }
+    assert_equal 1 [s $replica_id loading]
+
+    # Wait for the replica to finish full-sync and become online
+    wait_for_condition 200 50 {
+        [s $replica_id master_link_status] eq "up"
+    } else {
+        fail "Replica didn't finish loading"
+    }
+
+    # Return configs to default values
+    R $replica_id config set loading-process-events-interval-bytes 2097152
+    R $replica_id config set key-load-delay 0
+
+    # Check replica is back in cluster slots
+    wait_for_condition 100 50 {
+        [is_in_slots $master_id $replica] 
+    } else {
+        fail "Replica is not back to slots"
+    }
+    assert_equal 1 [is_in_slots $replica_id $replica] 
+}
+
+test "Check disconnected replica not hidden from slots" {
+    # We want to disconnect the replica, but keep it alive so it can still gossip
+
+    # Make sure that the replica will not be able to re-connect to the master
+    R $master_id config set requirepass asdf
+
+    # Disconnect replica from primary
+    R $master_id client kill type replica
+
+    # Check master to have no replicas
+    assert {[s $master_id connected_slaves] == 0}
+
+    set replica_cid [R $replica_id cluster myid]
+    set initial_pong [get_last_pong_time $master_id $replica_cid]
+    wait_for_condition 50 100 {
+        $initial_pong != [get_last_pong_time $master_id $replica_cid]
+    } else {
+        fail "Primary never received gossip from replica"
+    }
+
+    # Check that replica is still in the cluster slots
+    assert {[is_in_slots $master_id $replica]}
+
+    # undo config
+    R $master_id config set requirepass ""
+}
diff --git a/tests/cluster/tests/25-pubsubshard-slot-migration.tcl b/tests/cluster/tests/25-pubsubshard-slot-migration.tcl
new file mode 100644
index 0000000..0f59ffe
--- /dev/null
+++ b/tests/cluster/tests/25-pubsubshard-slot-migration.tcl
@@ -0,0 +1,171 @@
+source "../tests/includes/init-tests.tcl"
+
+test "Create a 3 nodes cluster" {
+    cluster_create_with_continuous_slots 3 3
+}
+
+test "Cluster is up" {
+    assert_cluster_state ok
+}
+
+set cluster [redis_cluster 127.0.0.1:[get_instance_attrib redis 0 port]]
+
+test "Migrate a slot, verify client receives sunsubscribe on primary serving the slot." {
+
+    # Setup the to and from node
+    set channelname mychannel
+    set slot [$cluster cluster keyslot $channelname]
+    array set nodefrom [$cluster masternode_for_slot $slot]
+    array set nodeto [$cluster masternode_notfor_slot $slot]
+
+    set subscribeclient [redis_deferring_client_by_addr $nodefrom(host) $nodefrom(port)]
+
+    $subscribeclient deferred 1
+    $subscribeclient ssubscribe $channelname
+    $subscribeclient read
+
+    assert_equal {OK} [$nodefrom(link) cluster setslot $slot migrating $nodeto(id)]
+    assert_equal {OK} [$nodeto(link) cluster setslot $slot importing $nodefrom(id)]
+
+    # Verify subscribe is still valid, able to receive messages.
+    $nodefrom(link) spublish $channelname hello
+    assert_equal {smessage mychannel hello} [$subscribeclient read]
+
+    assert_equal {OK} [$nodefrom(link) cluster setslot $slot node $nodeto(id)]
+   
+    set msg [$subscribeclient read]
+    assert {"sunsubscribe" eq [lindex $msg 0]}
+    assert {$channelname eq [lindex $msg 1]}
+    assert {"0" eq [lindex $msg 2]}
+
+    assert_equal {OK} [$nodeto(link) cluster setslot $slot node $nodeto(id)]
+
+    $subscribeclient close
+}
+
+test "Client subscribes to multiple channels, migrate a slot, verify client receives sunsubscribe on primary serving the slot." {
+
+    # Setup the to and from node
+    set channelname ch3
+    set anotherchannelname ch7
+    set slot [$cluster cluster keyslot $channelname]
+    array set nodefrom [$cluster masternode_for_slot $slot]
+    array set nodeto [$cluster masternode_notfor_slot $slot]
+
+    set subscribeclient [redis_deferring_client_by_addr $nodefrom(host) $nodefrom(port)]
+
+    $subscribeclient deferred 1
+    $subscribeclient ssubscribe $channelname
+    $subscribeclient read
+
+    $subscribeclient ssubscribe $anotherchannelname
+    $subscribeclient read
+
+    assert_equal {OK} [$nodefrom(link) cluster setslot $slot migrating $nodeto(id)]
+    assert_equal {OK} [$nodeto(link) cluster setslot $slot importing $nodefrom(id)]
+
+    # Verify subscribe is still valid, able to receive messages.
+    $nodefrom(link) spublish $channelname hello
+    assert_equal {smessage ch3 hello} [$subscribeclient read]
+
+    assert_equal {OK} [$nodefrom(link) cluster setslot $slot node $nodeto(id)]
+
+    # Verify the client receives sunsubscribe message for the channel(slot) which got migrated.
+    set msg [$subscribeclient read]
+    assert {"sunsubscribe" eq [lindex $msg 0]}
+    assert {$channelname eq [lindex $msg 1]}
+    assert {"1" eq [lindex $msg 2]}
+
+    assert_equal {OK} [$nodeto(link) cluster setslot $slot node $nodeto(id)]
+
+    $nodefrom(link) spublish $anotherchannelname hello
+
+    # Verify the client is still connected and receives message from the other channel.
+    set msg [$subscribeclient read]
+    assert {"smessage" eq [lindex $msg 0]}
+    assert {$anotherchannelname eq [lindex $msg 1]}
+    assert {"hello" eq [lindex $msg 2]}
+
+    $subscribeclient close
+}
+
+test "Migrate a slot, verify client receives sunsubscribe on replica serving the slot." {
+
+    # Setup the to and from node
+    set channelname mychannel1
+    set slot [$cluster cluster keyslot $channelname]
+    array set nodefrom [$cluster masternode_for_slot $slot]
+    array set nodeto [$cluster masternode_notfor_slot $slot]
+
+    # Get replica node serving slot (mychannel) to connect a client.
+    set replicanodeinfo [$cluster cluster replicas $nodefrom(id)]
+    set args [split $replicanodeinfo " "]
+    set addr [lindex [split [lindex $args 1] @] 0]
+    set replicahost [lindex [split $addr :] 0]
+    set replicaport [lindex [split $addr :] 1]
+    set subscribeclient [redis_deferring_client_by_addr $replicahost $replicaport]
+
+    $subscribeclient deferred 1
+    $subscribeclient ssubscribe $channelname
+    $subscribeclient read
+
+    assert_equal {OK} [$nodefrom(link) cluster setslot $slot migrating $nodeto(id)]
+    assert_equal {OK} [$nodeto(link) cluster setslot $slot importing $nodefrom(id)]
+
+    # Verify subscribe is still valid, able to receive messages.
+    $nodefrom(link) spublish $channelname hello
+    assert_equal {smessage mychannel1 hello} [$subscribeclient read]
+
+    assert_equal {OK} [$nodefrom(link) cluster setslot $slot node $nodeto(id)]
+    assert_equal {OK} [$nodeto(link) cluster setslot $slot node $nodeto(id)]
+
+    set msg [$subscribeclient read]
+    assert {"sunsubscribe" eq [lindex $msg 0]}
+    assert {$channelname eq [lindex $msg 1]}
+    assert {"0" eq [lindex $msg 2]}
+
+    $subscribeclient close
+}
+
+test "Delete a slot, verify sunsubscribe message" {
+    set channelname ch2
+    set slot [$cluster cluster keyslot $channelname]
+
+    array set primary_client [$cluster masternode_for_slot $slot]
+
+    set subscribeclient [redis_deferring_client_by_addr $primary_client(host) $primary_client(port)]
+    $subscribeclient deferred 1
+    $subscribeclient ssubscribe $channelname
+    $subscribeclient read
+
+    $primary_client(link) cluster DELSLOTS $slot
+
+    set msg [$subscribeclient read]
+    assert {"sunsubscribe" eq [lindex $msg 0]}
+    assert {$channelname eq [lindex $msg 1]}
+    assert {"0" eq [lindex $msg 2]}
+    
+    $subscribeclient close
+}
+
+test "Reset cluster, verify sunsubscribe message" {
+    set channelname ch4
+    set slot [$cluster cluster keyslot $channelname]
+
+    array set primary_client [$cluster masternode_for_slot $slot]
+
+    set subscribeclient [redis_deferring_client_by_addr $primary_client(host) $primary_client(port)]
+    $subscribeclient deferred 1
+    $subscribeclient ssubscribe $channelname
+    $subscribeclient read
+
+    $cluster cluster reset HARD
+
+    set msg [$subscribeclient read]
+    assert {"sunsubscribe" eq [lindex $msg 0]}
+    assert {$channelname eq [lindex $msg 1]}
+    assert {"0" eq [lindex $msg 2]}
+    
+    $cluster close
+    $subscribeclient close
+}
+\ No newline at end of file
diff --git a/tests/cluster/tests/26-pubsubshard.tcl b/tests/cluster/tests/26-pubsubshard.tcl
new file mode 100644
index 0000000..2619eda
--- /dev/null
+++ b/tests/cluster/tests/26-pubsubshard.tcl
@@ -0,0 +1,94 @@
+# Test PUBSUB shard propagation in a cluster slot.
+
+source "../tests/includes/init-tests.tcl"
+
+test "Create a 3 nodes cluster" {
+    cluster_create_with_continuous_slots 3 3
+}
+
+set cluster [redis_cluster 127.0.0.1:[get_instance_attrib redis 0 port]]
+test "Pub/Sub shard basics" {
+
+    set slot [$cluster cluster keyslot "channel.0"]
+    array set publishnode [$cluster masternode_for_slot $slot]
+    array set notshardnode [$cluster masternode_notfor_slot $slot]
+
+    set publishclient [redis_client_by_addr $publishnode(host) $publishnode(port)]
+    set subscribeclient [redis_deferring_client_by_addr $publishnode(host) $publishnode(port)]
+    set subscribeclient2 [redis_deferring_client_by_addr $publishnode(host) $publishnode(port)]
+    set anotherclient [redis_deferring_client_by_addr $notshardnode(host) $notshardnode(port)]
+
+    $subscribeclient ssubscribe channel.0
+    $subscribeclient read
+
+    $subscribeclient2 ssubscribe channel.0
+    $subscribeclient2 read
+
+    $anotherclient ssubscribe channel.0
+    catch {$anotherclient read} err
+    assert_match {MOVED *} $err
+
+    set data [randomValue]
+    $publishclient spublish channel.0 $data
+
+    set msg [$subscribeclient read]
+    assert_equal $data [lindex $msg 2]
+
+    set msg [$subscribeclient2 read]
+    assert_equal $data [lindex $msg 2]
+
+    $publishclient close
+    $subscribeclient close
+    $subscribeclient2 close
+    $anotherclient close
+}
+
+test "client can't subscribe to multiple shard channels across different slots in same call" {
+    catch {$cluster ssubscribe channel.0 channel.1} err
+    assert_match {CROSSSLOT Keys*} $err
+}
+
+test "client can subscribe to multiple shard channels across different slots in separate call" {
+    $cluster ssubscribe ch3
+    $cluster ssubscribe ch7
+
+    $cluster sunsubscribe ch3
+    $cluster sunsubscribe ch7
+}
+
+
+test "Verify Pub/Sub and Pub/Sub shard no overlap" {
+    set slot [$cluster cluster keyslot "channel.0"]
+    array set publishnode [$cluster masternode_for_slot $slot]
+    array set notshardnode [$cluster masternode_notfor_slot $slot]
+
+    set publishshardclient [redis_client_by_addr $publishnode(host) $publishnode(port)]
+    set publishclient [redis_deferring_client_by_addr $publishnode(host) $publishnode(port)]
+    set subscribeshardclient [redis_deferring_client_by_addr $publishnode(host) $publishnode(port)]
+    set subscribeclient [redis_deferring_client_by_addr $publishnode(host) $publishnode(port)]
+
+    $subscribeshardclient deferred 1
+    $subscribeshardclient ssubscribe channel.0
+    $subscribeshardclient read
+
+    $subscribeclient deferred 1
+    $subscribeclient subscribe channel.0
+    $subscribeclient read
+
+    set sharddata "testingpubsubdata"
+    $publishshardclient spublish channel.0 $sharddata
+
+    set data "somemoredata"
+    $publishclient publish channel.0 $data
+
+    set msg [$subscribeshardclient read]
+    assert_equal $sharddata [lindex $msg 2]
+
+    set msg [$subscribeclient read]
+    assert_equal $data [lindex $msg 2]
+
+    $cluster close
+    $publishclient close
+    $subscribeclient close
+    $subscribeshardclient close
+}
+\ No newline at end of file
diff --git a/tests/cluster/tests/28-cluster-shards.tcl b/tests/cluster/tests/28-cluster-shards.tcl
new file mode 100644
index 0000000..f24b917
--- /dev/null
+++ b/tests/cluster/tests/28-cluster-shards.tcl
@@ -0,0 +1,287 @@
+source "../tests/includes/init-tests.tcl"
+
+# Initial slot distribution.
+set ::slot0 [list 0 1000 1002 5459 5461 5461 10926 10926]
+set ::slot1 [list 5460 5460 5462 10922 10925 10925]
+set ::slot2 [list 10923 10924 10927 16383]
+set ::slot3 [list 1001 1001]
+
+proc cluster_create_with_split_slots {masters replicas} {
+    for {set j 0} {$j < $masters} {incr j} {
+        R $j cluster ADDSLOTSRANGE {*}[set ::slot${j}]
+    }
+    if {$replicas} {
+        cluster_allocate_slaves $masters $replicas
+    }
+    set ::cluster_master_nodes $masters
+    set ::cluster_replica_nodes $replicas
+}
+
+# Get the node info with the specific node_id from the
+# given reference node. Valid type options are "node" and "shard"
+proc get_node_info_from_shard {id reference {type node}} {
+    set shards_response [R $reference CLUSTER SHARDS]
+    foreach shard_response $shards_response {
+        set nodes [dict get $shard_response nodes]
+        foreach node $nodes {
+            if {[dict get $node id] eq $id} {
+                if {$type eq "node"} {
+                    return $node
+                } elseif {$type eq "shard"} {
+                    return $shard_response
+                } else {
+                    return {}
+                }
+            }
+        }
+    }
+    # No shard found, return nothing
+    return {}
+}
+
+proc cluster_ensure_master {id} {
+    if { [regexp "master" [R $id role]] == 0 } {
+        assert_equal {OK} [R $id CLUSTER FAILOVER]
+        wait_for_condition 50 100 {
+            [regexp "master" [R $id role]] == 1
+        } else {
+            fail "instance $id is not master"
+        }
+    }
+}
+
+test "Create a 8 nodes cluster with 4 shards" {
+    cluster_create_with_split_slots 4 4
+}
+
+test "Cluster should start ok" {
+    assert_cluster_state ok
+}
+
+test "Set cluster hostnames and verify they are propagated" {
+    for {set j 0} {$j < $::cluster_master_nodes + $::cluster_replica_nodes} {incr j} {
+        R $j config set cluster-announce-hostname "host-$j.com"
+    }
+
+    # Wait for everyone to agree about the state
+    wait_for_cluster_propagation
+}
+
+test "Verify information about the shards" {
+    set ids {}
+    for {set j 0} {$j < $::cluster_master_nodes + $::cluster_replica_nodes} {incr j} {
+        lappend ids [R $j CLUSTER MYID]
+    }
+    set slots [list $::slot0 $::slot1 $::slot2 $::slot3 $::slot0 $::slot1 $::slot2 $::slot3]
+
+    # Verify on each node (primary/replica), the response of the `CLUSTER SLOTS` command is consistent.
+    for {set ref 0} {$ref < $::cluster_master_nodes + $::cluster_replica_nodes} {incr ref} {
+        for {set i 0} {$i < $::cluster_master_nodes + $::cluster_replica_nodes} {incr i} {
+            assert_equal [lindex $slots $i] [dict get [get_node_info_from_shard [lindex $ids $i] $ref "shard"] slots]
+            assert_equal "host-$i.com" [dict get [get_node_info_from_shard [lindex $ids $i] $ref "node"] hostname]
+            assert_equal "127.0.0.1"  [dict get [get_node_info_from_shard [lindex $ids $i] $ref "node"] ip]
+            # Default value of 'cluster-preferred-endpoint-type' is ip.
+            assert_equal "127.0.0.1"  [dict get [get_node_info_from_shard [lindex $ids $i] $ref "node"] endpoint]
+
+            if {$::tls} {
+                assert_equal [get_instance_attrib redis $i plaintext-port] [dict get [get_node_info_from_shard [lindex $ids $i] $ref "node"] port]
+                assert_equal [get_instance_attrib redis $i port] [dict get [get_node_info_from_shard [lindex $ids $i] $ref "node"] tls-port]
+            } else {
+                assert_equal [get_instance_attrib redis $i port] [dict get [get_node_info_from_shard [lindex $ids $i] $ref "node"] port]
+            }
+
+            if {$i < 4} {
+                assert_equal "master" [dict get [get_node_info_from_shard [lindex $ids $i] $ref "node"] role]
+                assert_equal "online" [dict get [get_node_info_from_shard [lindex $ids $i] $ref "node"] health]
+            } else {
+                assert_equal "replica" [dict get [get_node_info_from_shard [lindex $ids $i] $ref "node"] role]
+                # Replica could be in online or loading
+            }
+        }
+    }
+}
+
+test "Verify no slot shard" {
+    # Node 8 has no slots assigned
+    set node_8_id [R 8 CLUSTER MYID]
+    assert_equal {} [dict get [get_node_info_from_shard $node_8_id 8 "shard"] slots]
+    assert_equal {} [dict get [get_node_info_from_shard $node_8_id 0 "shard"] slots]
+}
+
+set node_0_id [R 0 CLUSTER MYID]
+
+test "Kill a node and tell the replica to immediately takeover" {
+    kill_instance redis 0
+    R 4 cluster failover force
+}
+
+# Primary 0 node should report as fail, wait until the new primary acknowledges it.
+test "Verify health as fail for killed node" {
+    wait_for_condition 50 100 {
+        "fail" eq [dict get [get_node_info_from_shard $node_0_id 4 "node"] "health"]
+    } else {
+        fail "New primary never detected the node failed"
+    }
+}
+
+set primary_id 4
+set replica_id 0
+
+test "Restarting primary node" {
+    restart_instance redis $replica_id
+}
+
+test "Instance #0 gets converted into a replica" {
+    wait_for_condition 1000 50 {
+        [RI $replica_id role] eq {slave}
+    } else {
+        fail "Old primary was not converted into replica"
+    }
+}
+
+test "Test the replica reports a loading state while it's loading" {
+    # Test the command is good for verifying everything moves to a happy state
+    set replica_cluster_id [R $replica_id CLUSTER MYID]
+    wait_for_condition 50 1000 {
+        [dict get [get_node_info_from_shard $replica_cluster_id $primary_id "node"] health] eq "online"
+    } else {
+        fail "Replica never transitioned to online"
+    }
+
+    # Set 1 MB of data, so there is something to load on full sync
+    R $primary_id debug populate 1000 key 1000
+
+    # Kill replica client for primary and load new data to the primary
+    R $primary_id config set repl-backlog-size 100
+
+    # Set the key load delay so that it will take at least
+    # 2 seconds to fully load the data.
+    R $replica_id config set key-load-delay 4000
+
+    # Trigger event loop processing every 1024 bytes, this trigger
+    # allows us to send and receive cluster messages, so we are setting
+    # it low so that the cluster messages are sent more frequently.
+    R $replica_id config set loading-process-events-interval-bytes 1024
+
+    R $primary_id multi
+    R $primary_id client kill type replica
+    # populate the correct data
+    set num 100
+    set value [string repeat A 1024]
+    for {set j 0} {$j < $num} {incr j} {
+        # Use hashtag valid for shard #0
+        set key "{ch3}$j"
+        R $primary_id set $key $value
+    }
+    R $primary_id exec
+
+    # The replica should reconnect and start a full sync, it will gossip about it's health to the primary.
+    wait_for_condition 50 1000 {
+        "loading" eq [dict get [get_node_info_from_shard $replica_cluster_id $primary_id "node"] health]
+    } else {
+        fail "Replica never transitioned to loading"
+    }
+
+    # Verify cluster shards and cluster slots (deprecated) API responds while the node is loading data.
+    R $replica_id CLUSTER SHARDS
+    R $replica_id CLUSTER SLOTS
+
+    # Speed up the key loading and verify everything resumes
+    R $replica_id config set key-load-delay 0
+
+    wait_for_condition 50 1000 {
+        "online" eq [dict get [get_node_info_from_shard $replica_cluster_id $primary_id "node"] health]
+    } else {
+        fail "Replica never transitioned to online"
+    }
+
+    # Final sanity, the replica agrees it is online.
+    assert_equal "online" [dict get [get_node_info_from_shard $replica_cluster_id $replica_id "node"] health]
+}
+
+test "Regression test for a crash when calling SHARDS during handshake" {
+    # Reset forget a node, so we can use it to establish handshaking connections
+    set id [R 19 CLUSTER MYID]
+    R 19 CLUSTER RESET HARD
+    for {set i 0} {$i < 19} {incr i} {
+        R $i CLUSTER FORGET $id
+    }
+    R 19 cluster meet 127.0.0.1 [get_instance_attrib redis 0 port]
+    # This should line would previously crash, since all the outbound
+    # connections were in handshake state.
+    R 19 CLUSTER SHARDS
+}
+
+test "Cluster is up" {
+    assert_cluster_state ok
+}
+test "Shard ids are unique" {
+    set shard_ids {}
+    for {set i 0} {$i < 4} {incr i} {
+        set shard_id [R $i cluster myshardid]
+        assert_equal [dict exists $shard_ids $shard_id] 0
+        dict set shard_ids $shard_id 1
+    }
+}
+
+test "CLUSTER MYSHARDID reports same id for both primary and replica" {
+    for {set i 0} {$i < 4} {incr i} {
+        assert_equal [R $i cluster myshardid] [R [expr $i+4] cluster myshardid]
+        assert_equal [string length [R $i cluster myshardid]] 40
+    }
+}
+
+test "New replica receives primary's shard id" {
+    #find a primary
+    set id 0
+    for {} {$id < 8} {incr id} {
+        if {[regexp "master" [R $id role]]} {
+            break
+        }
+    }
+    assert_not_equal [R 8 cluster myshardid] [R $id cluster myshardid]
+    assert_equal {OK} [R 8 cluster replicate [R $id cluster myid]]
+    assert_equal [R 8 cluster myshardid] [R $id cluster myshardid]
+}
+
+test "CLUSTER MYSHARDID reports same shard id after shard restart" {
+    set node_ids {}
+    for {set i 0} {$i < 8} {incr i 4} {
+        dict set node_ids $i [R $i cluster myshardid]
+        kill_instance redis $i
+        wait_for_condition 50 100 {
+            [instance_is_killed redis $i]
+        } else {
+            fail "instance $i is not killed"
+        }
+    }
+    for {set i 0} {$i < 8} {incr i 4} {
+        restart_instance redis $i
+    }
+    assert_cluster_state ok
+    for {set i 0} {$i < 8} {incr i 4} {
+        assert_equal [dict get $node_ids $i] [R $i cluster myshardid]
+    }
+}
+
+test "CLUSTER MYSHARDID reports same shard id after cluster restart" {
+    set node_ids {}
+    for {set i 0} {$i < 8} {incr i} {
+        dict set node_ids $i [R $i cluster myshardid]
+    }
+    for {set i 0} {$i < 8} {incr i} {
+        kill_instance redis $i
+        wait_for_condition 50 100 {
+            [instance_is_killed redis $i]
+        } else {
+            fail "instance $i is not killed"
+        }
+    }
+    for {set i 0} {$i < 8} {incr i} {
+        restart_instance redis $i
+    }
+    assert_cluster_state ok
+    for {set i 0} {$i < 8} {incr i} {
+        assert_equal [dict get $node_ids $i] [R $i cluster myshardid]
+    }
+}
diff --git a/tests/cluster/tests/29-slot-migration-response.tcl b/tests/cluster/tests/29-slot-migration-response.tcl
new file mode 100644
index 0000000..060cc8d
--- /dev/null
+++ b/tests/cluster/tests/29-slot-migration-response.tcl
@@ -0,0 +1,50 @@
+# Tests for the response of slot migrations.
+
+source "../tests/includes/init-tests.tcl"
+source "../tests/includes/utils.tcl"
+
+test "Create a 2 nodes cluster" {
+    create_cluster 2 0
+    config_set_all_nodes cluster-allow-replica-migration no
+}
+
+test "Cluster is up" {
+    assert_cluster_state ok
+}
+
+set cluster [redis_cluster 127.0.0.1:[get_instance_attrib redis 0 port]]
+catch {unset nodefrom}
+catch {unset nodeto}
+
+$cluster refresh_nodes_map
+
+test "Set many keys in the cluster" {
+    for {set i 0} {$i < 5000} {incr i} {
+        $cluster set $i $i
+        assert { [$cluster get $i] eq $i }
+    }
+}
+
+test "Test cluster responses during migration of slot x" {
+
+    set slot 10
+    array set nodefrom [$cluster masternode_for_slot $slot]
+    array set nodeto [$cluster masternode_notfor_slot $slot]
+
+    $nodeto(link) cluster setslot $slot importing $nodefrom(id)
+    $nodefrom(link) cluster setslot $slot migrating $nodeto(id)
+
+    # Get a key from that slot
+    set key [$nodefrom(link) cluster GETKEYSINSLOT $slot "1"]
+
+    # MOVED REPLY
+    assert_error "*MOVED*" {$nodeto(link) set $key "newVal"}
+
+    # ASK REPLY
+    assert_error "*ASK*" {$nodefrom(link) set "abc{$key}" "newVal"}
+
+    # UNSTABLE REPLY
+    assert_error "*TRYAGAIN*" {$nodefrom(link) mset "a{$key}" "newVal" $key "newVal2"}
+}
+
+config_set_all_nodes cluster-allow-replica-migration yes
diff --git a/tests/cluster/tests/helpers/onlydots.tcl b/tests/cluster/tests/helpers/onlydots.tcl
new file mode 100644
index 0000000..4a6d1ae
--- /dev/null
+++ b/tests/cluster/tests/helpers/onlydots.tcl
@@ -0,0 +1,16 @@
+# Read the standard input and only shows dots in the output, filtering out
+# all the other characters. Designed to avoid bufferization so that when
+# we get the output of redis-trib and want to show just the dots, we'll see
+# the dots as soon as redis-trib will output them.
+
+fconfigure stdin -buffering none
+
+while 1 {
+    set c [read stdin 1]
+    if {$c eq {}} {
+        exit 0; # EOF
+    } elseif {$c eq {.}} {
+        puts -nonewline .
+        flush stdout
+    }
+}
diff --git a/tests/cluster/tests/includes/init-tests.tcl b/tests/cluster/tests/includes/init-tests.tcl
new file mode 100644
index 0000000..4875a01
--- /dev/null
+++ b/tests/cluster/tests/includes/init-tests.tcl
@@ -0,0 +1,91 @@
+# Initialization tests -- most units will start including this.
+
+test "(init) Restart killed instances" {
+    foreach type {redis} {
+        foreach_${type}_id id {
+            if {[get_instance_attrib $type $id pid] == -1} {
+                puts -nonewline "$type/$id "
+                flush stdout
+                restart_instance $type $id
+            }
+        }
+    }
+}
+
+test "Cluster nodes are reachable" {
+    foreach_redis_id id {
+        # Every node should be reachable.
+        wait_for_condition 1000 50 {
+            ([catch {R $id ping} ping_reply] == 0) &&
+            ($ping_reply eq {PONG})
+        } else {
+            catch {R $id ping} err
+            fail "Node #$id keeps replying '$err' to PING."
+        }
+    }
+}
+
+test "Cluster nodes hard reset" {
+    foreach_redis_id id {
+        if {$::valgrind} {
+            set node_timeout 10000
+        } else {
+            set node_timeout 3000
+        }
+        catch {R $id flushall} ; # May fail for readonly slaves.
+        R $id MULTI
+        R $id cluster reset hard
+        R $id cluster set-config-epoch [expr {$id+1}]
+        R $id EXEC
+        R $id config set cluster-node-timeout $node_timeout
+        R $id config set cluster-slave-validity-factor 10
+        R $id config set loading-process-events-interval-bytes 2097152
+        R $id config set key-load-delay 0
+        R $id config set repl-diskless-load disabled
+        R $id config set cluster-announce-hostname ""
+        R $id DEBUG DROP-CLUSTER-PACKET-FILTER -1
+        R $id config rewrite
+    }
+}
+
+# Helper function to attempt to have each node in a cluster
+# meet each other.
+proc join_nodes_in_cluster {} {
+    # Join node 0 with 1, 1 with 2, ... and so forth.
+    # If auto-discovery works all nodes will know every other node
+    # eventually.
+    set ids {}
+    foreach_redis_id id {lappend ids $id}
+    for {set j 0} {$j < [expr [llength $ids]-1]} {incr j} {
+        set a [lindex $ids $j]
+        set b [lindex $ids [expr $j+1]]
+        set b_port [get_instance_attrib redis $b port]
+        R $a cluster meet 127.0.0.1 $b_port
+    }
+
+    foreach_redis_id id {
+        wait_for_condition 1000 50 {
+            [llength [get_cluster_nodes $id connected]] == [llength $ids]
+        } else {
+            return 0
+        }
+    }
+    return 1
+}
+
+test "Cluster Join and auto-discovery test" {
+    # Use multiple attempts since sometimes nodes timeout
+    # while attempting to connect.
+    for {set attempts 3} {$attempts > 0} {incr attempts -1} {
+        if {[join_nodes_in_cluster] == 1} {
+            break
+        }
+    }
+    if {$attempts == 0} {
+        fail "Cluster failed to form full mesh"
+    }
+}
+
+test "Before slots allocation, all nodes report cluster failure" {
+    assert_cluster_state fail
+}
diff --git a/tests/cluster/tests/includes/utils.tcl b/tests/cluster/tests/includes/utils.tcl
new file mode 100644
index 0000000..c1b0fe6
--- /dev/null
+++ b/tests/cluster/tests/includes/utils.tcl
@@ -0,0 +1,36 @@
+source "../../../tests/support/cli.tcl"
+
+proc config_set_all_nodes {keyword value} {
+    foreach_redis_id id {
+        R $id config set $keyword $value
+    }
+}
+
+proc fix_cluster {addr} {
+    set code [catch {
+        exec ../../../src/redis-cli {*}[rediscli_tls_config "../../../tests"] --cluster fix $addr << yes
+    } result]
+    if {$code != 0} {
+        puts "redis-cli --cluster fix returns non-zero exit code, output below:\n$result"
+    }
+    # Note: redis-cli --cluster fix may return a non-zero exit code if nodes don't agree,
+    # but we can ignore that and rely on the check below.
+    assert_cluster_state ok
+    wait_for_condition 100 100 {
+        [catch {exec ../../../src/redis-cli {*}[rediscli_tls_config "../../../tests"] --cluster check $addr} result] == 0
+    } else {
+        puts "redis-cli --cluster check returns non-zero exit code, output below:\n$result"
+        fail "Cluster could not settle with configuration"
+    }
+}
+
+proc wait_cluster_stable {} {
+    wait_for_condition 1000 50 {
+        [catch {exec ../../../src/redis-cli --cluster \
+            check 127.0.0.1:[get_instance_attrib redis 0 port] \
+            {*}[rediscli_tls_config "../../../tests"] \
+            }] == 0
+    } else {
+        fail "Cluster doesn't stabilize"
+    }
+}
+\ No newline at end of file
diff --git a/tests/cluster/tmp/.gitignore b/tests/cluster/tmp/.gitignore
new file mode 100644
index 0000000..f581f73
--- /dev/null
+++ b/tests/cluster/tmp/.gitignore
@@ -0,0 +1,2 @@
+redis_*
+sentinel_*
author	Daniel Baumann <daniel.baumann@progress-linux.org>	2024-04-14 13:40:54 +0000
committer	Daniel Baumann <daniel.baumann@progress-linux.org>	2024-04-14 13:40:54 +0000
commit	317c0644ccf108aa23ef3fd8358bd66c2840bfc0 (patch)
tree	c417b3d25c86b775989cb5ac042f37611b626c8a /tests/cluster
parent	Initial commit. (diff)
download	redis-317c0644ccf108aa23ef3fd8358bd66c2840bfc0.tar.xz redis-317c0644ccf108aa23ef3fd8358bd66c2840bfc0.zip