summaryrefslogtreecommitdiffstats
path: root/tests/sentinel
diff options
context:
space:
mode:
Diffstat (limited to 'tests/sentinel')
-rw-r--r--tests/sentinel/run.tcl35
-rw-r--r--tests/sentinel/tests/00-base.tcl139
-rw-r--r--tests/sentinel/tests/01-conf-update.tcl39
-rw-r--r--tests/sentinel/tests/02-slaves-reconf.tcl91
-rw-r--r--tests/sentinel/tests/03-runtime-reconf.tcl225
-rw-r--r--tests/sentinel/tests/04-slave-selection.tcl5
-rw-r--r--tests/sentinel/tests/05-manual.tcl50
-rw-r--r--tests/sentinel/tests/06-ckquorum.tcl42
-rw-r--r--tests/sentinel/tests/07-down-conditions.tcl102
-rw-r--r--tests/sentinel/tests/08-hostname-conf.tcl69
-rw-r--r--tests/sentinel/tests/09-acl-support.tcl56
-rw-r--r--tests/sentinel/tests/10-replica-priority.tcl76
-rw-r--r--tests/sentinel/tests/11-port-0.tcl33
-rw-r--r--tests/sentinel/tests/12-master-reboot.tcl103
-rw-r--r--tests/sentinel/tests/13-info-command.tcl47
-rwxr-xr-xtests/sentinel/tests/helpers/check_leaked_fds.tcl79
-rw-r--r--tests/sentinel/tests/includes/init-tests.tcl63
-rw-r--r--tests/sentinel/tests/includes/sentinel.conf9
-rw-r--r--tests/sentinel/tests/includes/start-init-tests.tcl18
-rw-r--r--tests/sentinel/tests/includes/utils.tcl22
-rw-r--r--tests/sentinel/tmp/.gitignore2
21 files changed, 1305 insertions, 0 deletions
diff --git a/tests/sentinel/run.tcl b/tests/sentinel/run.tcl
new file mode 100644
index 0000000..98c4c11
--- /dev/null
+++ b/tests/sentinel/run.tcl
@@ -0,0 +1,35 @@
+# Sentinel test suite. Copyright (C) 2014 Salvatore Sanfilippo antirez@gmail.com
+# This software is released under the BSD License. See the COPYING file for
+# more information.
+
+cd tests/sentinel
+source ../instances.tcl
+
+set ::instances_count 5 ; # How many instances we use at max.
+set ::tlsdir "../../tls"
+
+proc main {} {
+ parse_options
+ if {$::leaked_fds_file != ""} {
+ set ::env(LEAKED_FDS_FILE) $::leaked_fds_file
+ }
+ spawn_instance sentinel $::sentinel_base_port $::instances_count {
+ "sentinel deny-scripts-reconfig no"
+ "enable-protected-configs yes"
+ "enable-debug-command yes"
+ } "../tests/includes/sentinel.conf"
+
+ spawn_instance redis $::redis_base_port $::instances_count {
+ "enable-protected-configs yes"
+ "enable-debug-command yes"
+ }
+ run_tests
+ cleanup
+ end_tests
+}
+
+if {[catch main e]} {
+ puts $::errorInfo
+ cleanup
+ exit 1
+}
diff --git a/tests/sentinel/tests/00-base.tcl b/tests/sentinel/tests/00-base.tcl
new file mode 100644
index 0000000..761ee82
--- /dev/null
+++ b/tests/sentinel/tests/00-base.tcl
@@ -0,0 +1,139 @@
+# Check the basic monitoring and failover capabilities.
+source "../tests/includes/start-init-tests.tcl"
+source "../tests/includes/init-tests.tcl"
+
+foreach_sentinel_id id {
+ S $id sentinel debug default-down-after 1000
+}
+
+if {$::simulate_error} {
+ test "This test will fail" {
+ fail "Simulated error"
+ }
+}
+
+test "Sentinel commands sanity check" {
+ foreach_sentinel_id id {
+ assert_equal {72} [llength [S $id command list]]
+ assert_equal {15} [S $id command count]
+ }
+}
+
+test "Basic failover works if the master is down" {
+ set old_port [RPort $master_id]
+ set addr [S 0 SENTINEL GET-MASTER-ADDR-BY-NAME mymaster]
+ assert {[lindex $addr 1] == $old_port}
+ kill_instance redis $master_id
+ foreach_sentinel_id id {
+ S $id sentinel debug ping-period 500
+ S $id sentinel debug ask-period 500
+ wait_for_condition 1000 100 {
+ [lindex [S $id SENTINEL GET-MASTER-ADDR-BY-NAME mymaster] 1] != $old_port
+ } else {
+ fail "At least one Sentinel did not receive failover info"
+ }
+ }
+ restart_instance redis $master_id
+ set addr [S 0 SENTINEL GET-MASTER-ADDR-BY-NAME mymaster]
+ set master_id [get_instance_id_by_port redis [lindex $addr 1]]
+}
+
+test "New master [join $addr {:}] role matches" {
+ assert {[RI $master_id role] eq {master}}
+}
+
+test "All the other slaves now point to the new master" {
+ foreach_redis_id id {
+ if {$id != $master_id && $id != 0} {
+ wait_for_condition 1000 50 {
+ [RI $id master_port] == [lindex $addr 1]
+ } else {
+ fail "Redis ID $id not configured to replicate with new master"
+ }
+ }
+ }
+}
+
+test "The old master eventually gets reconfigured as a slave" {
+ wait_for_condition 1000 50 {
+ [RI 0 master_port] == [lindex $addr 1]
+ } else {
+ fail "Old master not reconfigured as slave of new master"
+ }
+}
+
+test "ODOWN is not possible without N (quorum) Sentinels reports" {
+ foreach_sentinel_id id {
+ S $id SENTINEL SET mymaster quorum [expr $sentinels+1]
+ }
+ set old_port [RPort $master_id]
+ set addr [S 0 SENTINEL GET-MASTER-ADDR-BY-NAME mymaster]
+ assert {[lindex $addr 1] == $old_port}
+ kill_instance redis $master_id
+
+ # Make sure failover did not happened.
+ set addr [S 0 SENTINEL GET-MASTER-ADDR-BY-NAME mymaster]
+ assert {[lindex $addr 1] == $old_port}
+ restart_instance redis $master_id
+}
+
+test "Failover is not possible without majority agreement" {
+ foreach_sentinel_id id {
+ S $id SENTINEL SET mymaster quorum $quorum
+ }
+
+ # Crash majority of sentinels
+ for {set id 0} {$id < $quorum} {incr id} {
+ kill_instance sentinel $id
+ }
+
+ # Kill the current master
+ kill_instance redis $master_id
+
+ # Make sure failover did not happened.
+ set addr [S $quorum SENTINEL GET-MASTER-ADDR-BY-NAME mymaster]
+ assert {[lindex $addr 1] == $old_port}
+ restart_instance redis $master_id
+
+ # Cleanup: restart Sentinels to monitor the master.
+ for {set id 0} {$id < $quorum} {incr id} {
+ restart_instance sentinel $id
+ }
+}
+
+test "Failover works if we configure for absolute agreement" {
+ foreach_sentinel_id id {
+ S $id SENTINEL SET mymaster quorum $sentinels
+ }
+
+ # Wait for Sentinels to monitor the master again
+ foreach_sentinel_id id {
+ wait_for_condition 1000 100 {
+ [dict get [S $id SENTINEL MASTER mymaster] info-refresh] < 100000
+ } else {
+ fail "At least one Sentinel is not monitoring the master"
+ }
+ }
+
+ kill_instance redis $master_id
+
+ foreach_sentinel_id id {
+ wait_for_condition 1000 100 {
+ [lindex [S $id SENTINEL GET-MASTER-ADDR-BY-NAME mymaster] 1] != $old_port
+ } else {
+ fail "At least one Sentinel did not receive failover info"
+ }
+ }
+ restart_instance redis $master_id
+ set addr [S 0 SENTINEL GET-MASTER-ADDR-BY-NAME mymaster]
+ set master_id [get_instance_id_by_port redis [lindex $addr 1]]
+
+ # Set the min ODOWN agreement back to strict majority.
+ foreach_sentinel_id id {
+ S $id SENTINEL SET mymaster quorum $quorum
+ }
+}
+
+test "New master [join $addr {:}] role matches" {
+ assert {[RI $master_id role] eq {master}}
+}
diff --git a/tests/sentinel/tests/01-conf-update.tcl b/tests/sentinel/tests/01-conf-update.tcl
new file mode 100644
index 0000000..5dca556
--- /dev/null
+++ b/tests/sentinel/tests/01-conf-update.tcl
@@ -0,0 +1,39 @@
+# Test Sentinel configuration consistency after partitions heal.
+
+source "../tests/includes/init-tests.tcl"
+
+test "We can failover with Sentinel 1 crashed" {
+ set old_port [RPort $master_id]
+ set addr [S 0 SENTINEL GET-MASTER-ADDR-BY-NAME mymaster]
+ assert {[lindex $addr 1] == $old_port}
+
+ # Crash Sentinel 1
+ kill_instance sentinel 1
+
+ kill_instance redis $master_id
+ foreach_sentinel_id id {
+ if {$id != 1} {
+ wait_for_condition 1000 50 {
+ [lindex [S $id SENTINEL GET-MASTER-ADDR-BY-NAME mymaster] 1] != $old_port
+ } else {
+ fail "Sentinel $id did not receive failover info"
+ }
+ }
+ }
+ restart_instance redis $master_id
+ set addr [S 0 SENTINEL GET-MASTER-ADDR-BY-NAME mymaster]
+ set master_id [get_instance_id_by_port redis [lindex $addr 1]]
+}
+
+test "After Sentinel 1 is restarted, its config gets updated" {
+ restart_instance sentinel 1
+ wait_for_condition 1000 50 {
+ [lindex [S 1 SENTINEL GET-MASTER-ADDR-BY-NAME mymaster] 1] != $old_port
+ } else {
+ fail "Restarted Sentinel did not receive failover info"
+ }
+}
+
+test "New master [join $addr {:}] role matches" {
+ assert {[RI $master_id role] eq {master}}
+}
diff --git a/tests/sentinel/tests/02-slaves-reconf.tcl b/tests/sentinel/tests/02-slaves-reconf.tcl
new file mode 100644
index 0000000..8196b60
--- /dev/null
+++ b/tests/sentinel/tests/02-slaves-reconf.tcl
@@ -0,0 +1,91 @@
+# Check that slaves are reconfigured at a latter time if they are partitioned.
+#
+# Here we should test:
+# 1) That slaves point to the new master after failover.
+# 2) That partitioned slaves point to new master when they are partitioned
+# away during failover and return at a latter time.
+
+source "../tests/includes/init-tests.tcl"
+
+proc 02_test_slaves_replication {} {
+ uplevel 1 {
+ test "Check that slaves replicate from current master" {
+ set master_port [RPort $master_id]
+ foreach_redis_id id {
+ if {$id == $master_id} continue
+ if {[instance_is_killed redis $id]} continue
+ wait_for_condition 1000 50 {
+ ([RI $id master_port] == $master_port) &&
+ ([RI $id master_link_status] eq {up})
+ } else {
+ fail "Redis slave $id is replicating from wrong master"
+ }
+ }
+ }
+ }
+}
+
+proc 02_crash_and_failover {} {
+ uplevel 1 {
+ test "Crash the master and force a failover" {
+ set old_port [RPort $master_id]
+ set addr [S 0 SENTINEL GET-MASTER-ADDR-BY-NAME mymaster]
+ assert {[lindex $addr 1] == $old_port}
+ kill_instance redis $master_id
+ foreach_sentinel_id id {
+ wait_for_condition 1000 50 {
+ [lindex [S $id SENTINEL GET-MASTER-ADDR-BY-NAME mymaster] 1] != $old_port
+ } else {
+ fail "At least one Sentinel did not receive failover info"
+ }
+ }
+ restart_instance redis $master_id
+ set addr [S 0 SENTINEL GET-MASTER-ADDR-BY-NAME mymaster]
+ set master_id [get_instance_id_by_port redis [lindex $addr 1]]
+ }
+ }
+}
+
+02_test_slaves_replication
+02_crash_and_failover
+
+foreach_sentinel_id id {
+ S $id sentinel debug info-period 100
+ S $id sentinel debug default-down-after 1000
+ S $id sentinel debug publish-period 100
+}
+
+02_test_slaves_replication
+
+test "Kill a slave instance" {
+ foreach_redis_id id {
+ if {$id == $master_id} continue
+ set killed_slave_id $id
+ kill_instance redis $id
+ break
+ }
+}
+
+02_crash_and_failover
+02_test_slaves_replication
+
+test "Wait for failover to end" {
+ set inprogress 1
+ while {$inprogress} {
+ set inprogress 0
+ foreach_sentinel_id id {
+ if {[dict exists [S $id SENTINEL MASTER mymaster] failover-state]} {
+ incr inprogress
+ }
+ }
+ if {$inprogress} {after 100}
+ }
+}
+
+test "Restart killed slave and test replication of slaves again..." {
+ restart_instance redis $killed_slave_id
+}
+
+# Now we check if the slave rejoining the partition is reconfigured even
+# if the failover finished.
+02_test_slaves_replication
diff --git a/tests/sentinel/tests/03-runtime-reconf.tcl b/tests/sentinel/tests/03-runtime-reconf.tcl
new file mode 100644
index 0000000..bd6eecc
--- /dev/null
+++ b/tests/sentinel/tests/03-runtime-reconf.tcl
@@ -0,0 +1,225 @@
+# Test runtime reconfiguration command SENTINEL SET.
+source "../tests/includes/init-tests.tcl"
+set num_sentinels [llength $::sentinel_instances]
+
+set ::user "testuser"
+set ::password "secret"
+
+proc server_set_password {} {
+ foreach_redis_id id {
+ assert_equal {OK} [R $id CONFIG SET requirepass $::password]
+ assert_equal {OK} [R $id AUTH $::password]
+ assert_equal {OK} [R $id CONFIG SET masterauth $::password]
+ }
+}
+
+proc server_reset_password {} {
+ foreach_redis_id id {
+ assert_equal {OK} [R $id CONFIG SET requirepass ""]
+ assert_equal {OK} [R $id CONFIG SET masterauth ""]
+ }
+}
+
+proc server_set_acl {id} {
+ assert_equal {OK} [R $id ACL SETUSER $::user on >$::password allchannels +@all]
+ assert_equal {OK} [R $id ACL SETUSER default off]
+
+ R $id CLIENT KILL USER default SKIPME no
+ assert_equal {OK} [R $id AUTH $::user $::password]
+ assert_equal {OK} [R $id CONFIG SET masteruser $::user]
+ assert_equal {OK} [R $id CONFIG SET masterauth $::password]
+}
+
+proc server_reset_acl {id} {
+ assert_equal {OK} [R $id ACL SETUSER default on]
+ assert_equal {1} [R $id ACL DELUSER $::user]
+
+ assert_equal {OK} [R $id CONFIG SET masteruser ""]
+ assert_equal {OK} [R $id CONFIG SET masterauth ""]
+}
+
+proc verify_sentinel_connect_replicas {id} {
+ foreach replica [S $id SENTINEL REPLICAS mymaster] {
+ if {[string match "*disconnected*" [dict get $replica flags]]} {
+ return 0
+ }
+ }
+ return 1
+}
+
+proc wait_for_sentinels_connect_servers { {is_connect 1} } {
+ foreach_sentinel_id id {
+ wait_for_condition 1000 50 {
+ [string match "*disconnected*" [dict get [S $id SENTINEL MASTER mymaster] flags]] != $is_connect
+ } else {
+ fail "At least some sentinel can't connect to master"
+ }
+
+ wait_for_condition 1000 50 {
+ [verify_sentinel_connect_replicas $id] == $is_connect
+ } else {
+ fail "At least some sentinel can't connect to replica"
+ }
+ }
+}
+
+test "Sentinels (re)connection following SENTINEL SET mymaster auth-pass" {
+ # 3 types of sentinels to test:
+ # (re)started while master changed pwd. Manage to connect only after setting pwd
+ set sent2re 0
+ # (up)dated in advance with master new password
+ set sent2up 1
+ # (un)touched. Yet manage to maintain (old) connection
+ set sent2un 2
+
+ wait_for_sentinels_connect_servers
+ kill_instance sentinel $sent2re
+ server_set_password
+ assert_equal {OK} [S $sent2up SENTINEL SET mymaster auth-pass $::password]
+ restart_instance sentinel $sent2re
+
+ # Verify sentinel that restarted failed to connect master
+ wait_for_condition 100 50 {
+ [string match "*disconnected*" [dict get [S $sent2re SENTINEL MASTER mymaster] flags]] != 0
+ } else {
+ fail "Expected to be disconnected from master due to wrong password"
+ }
+
+ # Update restarted sentinel with master password
+ assert_equal {OK} [S $sent2re SENTINEL SET mymaster auth-pass $::password]
+
+ # All sentinels expected to connect successfully
+ wait_for_sentinels_connect_servers
+
+ # remove requirepass and verify sentinels manage to connect servers
+ server_reset_password
+ wait_for_sentinels_connect_servers
+ # Sanity check
+ verify_sentinel_auto_discovery
+}
+
+test "Sentinels (re)connection following master ACL change" {
+ # Three types of sentinels to test during ACL change:
+ # 1. (re)started Sentinel. Manage to connect only after setting new pwd
+ # 2. (up)dated Sentinel, get just before ACL change the new password
+ # 3. (un)touched Sentinel that kept old connection with master and didn't
+ # set new ACL password won't persist ACL pwd change (unlike legacy auth-pass)
+ set sent2re 0
+ set sent2up 1
+ set sent2un 2
+
+ wait_for_sentinels_connect_servers
+ # kill sentinel 'sent2re' and restart it after ACL change
+ kill_instance sentinel $sent2re
+
+ # Update sentinel 'sent2up' with new user and pwd
+ assert_equal {OK} [S $sent2up SENTINEL SET mymaster auth-user $::user]
+ assert_equal {OK} [S $sent2up SENTINEL SET mymaster auth-pass $::password]
+
+ foreach_redis_id id {
+ server_set_acl $id
+ }
+
+ restart_instance sentinel $sent2re
+
+ # Verify sentinel that restarted failed to reconnect master
+ wait_for_condition 100 50 {
+ [string match "*disconnected*" [dict get [S $sent2re SENTINEL MASTER mymaster] flags]] != 0
+ } else {
+ fail "Expected: Restarted sentinel to be disconnected from master due to obsolete password"
+ }
+
+ # Verify sentinel with updated password managed to connect (wait for sentinelTimer to reconnect)
+ wait_for_condition 100 50 {
+ [string match "*disconnected*" [dict get [S $sent2up SENTINEL MASTER mymaster] flags]] == 0
+ } else {
+ fail "Expected: Sentinel to be connected to master"
+ }
+
+ # Verify sentinel untouched gets failed to connect master
+ wait_for_condition 100 50 {
+ [string match "*disconnected*" [dict get [S $sent2un SENTINEL MASTER mymaster] flags]] != 0
+ } else {
+ fail "Expected: Sentinel to be disconnected from master due to obsolete password"
+ }
+
+ # Now update all sentinels with new password
+ foreach_sentinel_id id {
+ assert_equal {OK} [S $id SENTINEL SET mymaster auth-user $::user]
+ assert_equal {OK} [S $id SENTINEL SET mymaster auth-pass $::password]
+ }
+
+ # All sentinels expected to connect successfully
+ wait_for_sentinels_connect_servers
+
+ # remove requirepass and verify sentinels manage to connect servers
+ foreach_redis_id id {
+ server_reset_acl $id
+ }
+
+ wait_for_sentinels_connect_servers
+ # Sanity check
+ verify_sentinel_auto_discovery
+}
+
+test "Set parameters in normal case" {
+
+ set info [S 0 SENTINEL master mymaster]
+ set origin_quorum [dict get $info quorum]
+ set origin_down_after_milliseconds [dict get $info down-after-milliseconds]
+ set update_quorum [expr $origin_quorum+1]
+ set update_down_after_milliseconds [expr $origin_down_after_milliseconds+1000]
+
+ assert_equal [S 0 SENTINEL SET mymaster quorum $update_quorum] "OK"
+ assert_equal [S 0 SENTINEL SET mymaster down-after-milliseconds $update_down_after_milliseconds] "OK"
+
+ set update_info [S 0 SENTINEL master mymaster]
+ assert {[dict get $update_info quorum] != $origin_quorum}
+ assert {[dict get $update_info down-after-milliseconds] != $origin_down_after_milliseconds}
+
+ #restore to origin config parameters
+ assert_equal [S 0 SENTINEL SET mymaster quorum $origin_quorum] "OK"
+ assert_equal [S 0 SENTINEL SET mymaster down-after-milliseconds $origin_down_after_milliseconds] "OK"
+}
+
+test "Set parameters in normal case with bad format" {
+
+ set info [S 0 SENTINEL master mymaster]
+ set origin_down_after_milliseconds [dict get $info down-after-milliseconds]
+
+ assert_error "ERR Invalid argument '-20' for SENTINEL SET 'down-after-milliseconds'*" {S 0 SENTINEL SET mymaster down-after-milliseconds -20}
+ assert_error "ERR Invalid argument 'abc' for SENTINEL SET 'down-after-milliseconds'*" {S 0 SENTINEL SET mymaster down-after-milliseconds "abc"}
+
+ set current_info [S 0 SENTINEL master mymaster]
+ assert {[dict get $current_info down-after-milliseconds] == $origin_down_after_milliseconds}
+}
+
+test "Sentinel Set with other error situations" {
+
+ # non-existing script
+ assert_error "ERR Notification script seems non existing*" {S 0 SENTINEL SET mymaster notification-script test.txt}
+
+ # wrong parameter number
+ assert_error "ERR wrong number of arguments for 'sentinel|set' command" {S 0 SENTINEL SET mymaster fakeoption}
+
+ # unknown parameter option
+ assert_error "ERR Unknown option or number of arguments for SENTINEL SET 'fakeoption'" {S 0 SENTINEL SET mymaster fakeoption fakevalue}
+
+ # save new config to disk failed
+ set info [S 0 SENTINEL master mymaster]
+ set origin_quorum [dict get $info quorum]
+ set update_quorum [expr $origin_quorum+1]
+ set sentinel_id 0
+ set configfilename [file join "sentinel_$sentinel_id" "sentinel.conf"]
+ set configfilename_bak [file join "sentinel_$sentinel_id" "sentinel.conf.bak"]
+
+ file rename $configfilename $configfilename_bak
+ file mkdir $configfilename
+
+ catch {[S 0 SENTINEL SET mymaster quorum $update_quorum]} err
+
+ file delete $configfilename
+ file rename $configfilename_bak $configfilename
+
+ assert_match "ERR Failed to save config file*" $err
+}
diff --git a/tests/sentinel/tests/04-slave-selection.tcl b/tests/sentinel/tests/04-slave-selection.tcl
new file mode 100644
index 0000000..3d2ca64
--- /dev/null
+++ b/tests/sentinel/tests/04-slave-selection.tcl
@@ -0,0 +1,5 @@
+# Test slave selection algorithm.
+#
+# This unit should test:
+# 1) That when there are no suitable slaves no failover is performed.
+# 2) That among the available slaves, the one with better offset is picked.
diff --git a/tests/sentinel/tests/05-manual.tcl b/tests/sentinel/tests/05-manual.tcl
new file mode 100644
index 0000000..a0004eb
--- /dev/null
+++ b/tests/sentinel/tests/05-manual.tcl
@@ -0,0 +1,50 @@
+# Test manual failover
+
+source "../tests/includes/init-tests.tcl"
+
+foreach_sentinel_id id {
+ S $id sentinel debug info-period 2000
+ S $id sentinel debug default-down-after 6000
+ S $id sentinel debug publish-period 1000
+}
+
+test "Manual failover works" {
+ set old_port [RPort $master_id]
+ set addr [S 0 SENTINEL GET-MASTER-ADDR-BY-NAME mymaster]
+ assert {[lindex $addr 1] == $old_port}
+ catch {S 0 SENTINEL FAILOVER mymaster} reply
+ assert {$reply eq "OK"}
+ foreach_sentinel_id id {
+ wait_for_condition 1000 50 {
+ [lindex [S $id SENTINEL GET-MASTER-ADDR-BY-NAME mymaster] 1] != $old_port
+ } else {
+ fail "At least one Sentinel did not receive failover info"
+ }
+ }
+ set addr [S 0 SENTINEL GET-MASTER-ADDR-BY-NAME mymaster]
+ set master_id [get_instance_id_by_port redis [lindex $addr 1]]
+}
+
+test "New master [join $addr {:}] role matches" {
+ assert {[RI $master_id role] eq {master}}
+}
+
+test "All the other slaves now point to the new master" {
+ foreach_redis_id id {
+ if {$id != $master_id && $id != 0} {
+ wait_for_condition 1000 50 {
+ [RI $id master_port] == [lindex $addr 1]
+ } else {
+ fail "Redis ID $id not configured to replicate with new master"
+ }
+ }
+ }
+}
+
+test "The old master eventually gets reconfigured as a slave" {
+ wait_for_condition 1000 50 {
+ [RI 0 master_port] == [lindex $addr 1]
+ } else {
+ fail "Old master not reconfigured as slave of new master"
+ }
+}
diff --git a/tests/sentinel/tests/06-ckquorum.tcl b/tests/sentinel/tests/06-ckquorum.tcl
new file mode 100644
index 0000000..36c3dc6
--- /dev/null
+++ b/tests/sentinel/tests/06-ckquorum.tcl
@@ -0,0 +1,42 @@
+# Test for the SENTINEL CKQUORUM command
+
+source "../tests/includes/init-tests.tcl"
+set num_sentinels [llength $::sentinel_instances]
+
+test "CKQUORUM reports OK and the right amount of Sentinels" {
+ foreach_sentinel_id id {
+ assert_match "*OK $num_sentinels usable*" [S $id SENTINEL CKQUORUM mymaster]
+ }
+}
+
+test "CKQUORUM detects quorum cannot be reached" {
+ set orig_quorum [expr {$num_sentinels/2+1}]
+ S 0 SENTINEL SET mymaster quorum [expr {$num_sentinels+1}]
+ catch {[S 0 SENTINEL CKQUORUM mymaster]} err
+ assert_match "*NOQUORUM*" $err
+ S 0 SENTINEL SET mymaster quorum $orig_quorum
+}
+
+test "CKQUORUM detects failover authorization cannot be reached" {
+ set orig_quorum [expr {$num_sentinels/2+1}]
+ S 0 SENTINEL SET mymaster quorum 1
+ for {set i 0} {$i < $orig_quorum} {incr i} {
+ kill_instance sentinel [expr {$i + 1}]
+ }
+
+ # We need to make sure that other sentinels are in `DOWN` state
+ # from the point of view of S 0 before we executing `CKQUORUM`.
+ wait_for_condition 300 50 {
+ [catch {S 0 SENTINEL CKQUORUM mymaster}] == 1
+ } else {
+ fail "At least $orig_quorum sentinels did not enter the down state."
+ }
+
+ assert_error "*NOQUORUM*" {S 0 SENTINEL CKQUORUM mymaster}
+
+ S 0 SENTINEL SET mymaster quorum $orig_quorum
+ for {set i 0} {$i < $orig_quorum} {incr i} {
+ restart_instance sentinel [expr {$i + 1}]
+ }
+}
+
diff --git a/tests/sentinel/tests/07-down-conditions.tcl b/tests/sentinel/tests/07-down-conditions.tcl
new file mode 100644
index 0000000..bb24d6d
--- /dev/null
+++ b/tests/sentinel/tests/07-down-conditions.tcl
@@ -0,0 +1,102 @@
+# Test conditions where an instance is considered to be down
+
+source "../tests/includes/init-tests.tcl"
+source "../../../tests/support/cli.tcl"
+
+foreach_sentinel_id id {
+ S $id sentinel debug info-period 1000
+ S $id sentinel debug ask-period 100
+ S $id sentinel debug default-down-after 3000
+ S $id sentinel debug publish-period 200
+ S $id sentinel debug ping-period 100
+}
+
+set ::alive_sentinel [expr {$::instances_count/2+2}]
+proc ensure_master_up {} {
+ S $::alive_sentinel sentinel debug info-period 1000
+ S $::alive_sentinel sentinel debug ping-period 100
+ S $::alive_sentinel sentinel debug ask-period 100
+ S $::alive_sentinel sentinel debug publish-period 100
+ wait_for_condition 1000 50 {
+ [dict get [S $::alive_sentinel sentinel master mymaster] flags] eq "master"
+ } else {
+ fail "Master flags are not just 'master'"
+ }
+}
+
+proc ensure_master_down {} {
+ S $::alive_sentinel sentinel debug info-period 1000
+ S $::alive_sentinel sentinel debug ping-period 100
+ S $::alive_sentinel sentinel debug ask-period 100
+ S $::alive_sentinel sentinel debug publish-period 100
+ wait_for_condition 1000 50 {
+ [string match *down* \
+ [dict get [S $::alive_sentinel sentinel master mymaster] flags]]
+ } else {
+ fail "Master is not flagged SDOWN"
+ }
+}
+
+test "Crash the majority of Sentinels to prevent failovers for this unit" {
+ for {set id 0} {$id < $quorum} {incr id} {
+ kill_instance sentinel $id
+ }
+}
+
+test "SDOWN is triggered by non-responding but not crashed instance" {
+ ensure_master_up
+ set master_addr [S $::alive_sentinel SENTINEL GET-MASTER-ADDR-BY-NAME mymaster]
+ set master_id [get_instance_id_by_port redis [lindex $master_addr 1]]
+
+ set pid [get_instance_attrib redis $master_id pid]
+ exec kill -SIGSTOP $pid
+ ensure_master_down
+ exec kill -SIGCONT $pid
+ ensure_master_up
+}
+
+test "SDOWN is triggered by crashed instance" {
+ lassign [S $::alive_sentinel SENTINEL GET-MASTER-ADDR-BY-NAME mymaster] host port
+ ensure_master_up
+ kill_instance redis 0
+ ensure_master_down
+ restart_instance redis 0
+ ensure_master_up
+}
+
+test "SDOWN is triggered by masters advertising as slaves" {
+ ensure_master_up
+ R 0 slaveof 127.0.0.1 34567
+ ensure_master_down
+ R 0 slaveof no one
+ ensure_master_up
+}
+
+test "SDOWN is triggered by misconfigured instance replying with errors" {
+ ensure_master_up
+ set orig_dir [lindex [R 0 config get dir] 1]
+ set orig_save [lindex [R 0 config get save] 1]
+ # Set dir to / and filename to "tmp" to make sure it will fail.
+ R 0 config set dir /
+ R 0 config set dbfilename tmp
+ R 0 config set save "1000000 1000000"
+ after 5000
+ R 0 bgsave
+ after 5000
+ ensure_master_down
+ R 0 config set save $orig_save
+ R 0 config set dir $orig_dir
+ R 0 config set dbfilename dump.rdb
+ R 0 bgsave
+ ensure_master_up
+}
+
+# We use this test setup to also test command renaming, as a side
+# effect of the master going down if we send PONG instead of PING
+test "SDOWN is triggered if we rename PING to PONG" {
+ ensure_master_up
+ S $::alive_sentinel SENTINEL SET mymaster rename-command PING PONG
+ ensure_master_down
+ S $::alive_sentinel SENTINEL SET mymaster rename-command PING PING
+ ensure_master_up
+}
diff --git a/tests/sentinel/tests/08-hostname-conf.tcl b/tests/sentinel/tests/08-hostname-conf.tcl
new file mode 100644
index 0000000..263b06f
--- /dev/null
+++ b/tests/sentinel/tests/08-hostname-conf.tcl
@@ -0,0 +1,69 @@
+source "../tests/includes/utils.tcl"
+
+proc set_redis_announce_ip {addr} {
+ foreach_redis_id id {
+ R $id config set replica-announce-ip $addr
+ }
+}
+
+proc set_sentinel_config {keyword value} {
+ foreach_sentinel_id id {
+ S $id sentinel config set $keyword $value
+ }
+}
+
+proc set_all_instances_hostname {hostname} {
+ foreach_sentinel_id id {
+ set_instance_attrib sentinel $id host $hostname
+ }
+ foreach_redis_id id {
+ set_instance_attrib redis $id host $hostname
+ }
+}
+
+test "(pre-init) Configure instances and sentinel for hostname use" {
+ set ::host "localhost"
+ restart_killed_instances
+ set_all_instances_hostname $::host
+ set_redis_announce_ip $::host
+ set_sentinel_config resolve-hostnames yes
+ set_sentinel_config announce-hostnames yes
+}
+
+source "../tests/includes/init-tests.tcl"
+
+proc verify_hostname_announced {hostname} {
+ foreach_sentinel_id id {
+ # Master is reported with its hostname
+ if {![string equal [lindex [S $id SENTINEL GET-MASTER-ADDR-BY-NAME mymaster] 0] $hostname]} {
+ return 0
+ }
+
+ # Replicas are reported with their hostnames
+ foreach replica [S $id SENTINEL REPLICAS mymaster] {
+ if {![string equal [dict get $replica ip] $hostname]} {
+ return 0
+ }
+ }
+ }
+ return 1
+}
+
+test "Sentinel announces hostnames" {
+ # Check initial state
+ verify_hostname_announced $::host
+
+ # Disable announce-hostnames and confirm IPs are used
+ set_sentinel_config announce-hostnames no
+ assert {[verify_hostname_announced "127.0.0.1"] || [verify_hostname_announced "::1"]}
+}
+
+# We need to revert any special configuration because all tests currently
+# share the same instances.
+test "(post-cleanup) Configure instances and sentinel for IPs" {
+ set ::host "127.0.0.1"
+ set_all_instances_hostname $::host
+ set_redis_announce_ip $::host
+ set_sentinel_config resolve-hostnames no
+ set_sentinel_config announce-hostnames no
+} \ No newline at end of file
diff --git a/tests/sentinel/tests/09-acl-support.tcl b/tests/sentinel/tests/09-acl-support.tcl
new file mode 100644
index 0000000..a754dac
--- /dev/null
+++ b/tests/sentinel/tests/09-acl-support.tcl
@@ -0,0 +1,56 @@
+
+source "../tests/includes/init-tests.tcl"
+
+set ::user "testuser"
+set ::password "secret"
+
+proc setup_acl {} {
+ foreach_sentinel_id id {
+ assert_equal {OK} [S $id ACL SETUSER $::user >$::password +@all on]
+ assert_equal {OK} [S $id ACL SETUSER default off]
+
+ S $id CLIENT KILL USER default SKIPME no
+ assert_equal {OK} [S $id AUTH $::user $::password]
+ }
+}
+
+proc teardown_acl {} {
+ foreach_sentinel_id id {
+ assert_equal {OK} [S $id ACL SETUSER default on]
+ assert_equal {1} [S $id ACL DELUSER $::user]
+
+ S $id SENTINEL CONFIG SET sentinel-user ""
+ S $id SENTINEL CONFIG SET sentinel-pass ""
+ }
+}
+
+test "(post-init) Set up ACL configuration" {
+ setup_acl
+ assert_equal $::user [S 1 ACL WHOAMI]
+}
+
+test "SENTINEL CONFIG SET handles on-the-fly credentials reconfiguration" {
+ # Make sure we're starting with a broken state...
+ wait_for_condition 200 50 {
+ [catch {S 1 SENTINEL CKQUORUM mymaster}] == 1
+ } else {
+ fail "Expected: Sentinel to be disconnected from master due to wrong password"
+ }
+ assert_error "*NOQUORUM*" {S 1 SENTINEL CKQUORUM mymaster}
+
+ foreach_sentinel_id id {
+ assert_equal {OK} [S $id SENTINEL CONFIG SET sentinel-user $::user]
+ assert_equal {OK} [S $id SENTINEL CONFIG SET sentinel-pass $::password]
+ }
+
+ wait_for_condition 200 50 {
+ [catch {S 1 SENTINEL CKQUORUM mymaster}] == 0
+ } else {
+ fail "Expected: Sentinel to be connected to master after setting password"
+ }
+ assert_match {*OK*} [S 1 SENTINEL CKQUORUM mymaster]
+}
+
+test "(post-cleanup) Tear down ACL configuration" {
+ teardown_acl
+}
diff --git a/tests/sentinel/tests/10-replica-priority.tcl b/tests/sentinel/tests/10-replica-priority.tcl
new file mode 100644
index 0000000..d3f868a
--- /dev/null
+++ b/tests/sentinel/tests/10-replica-priority.tcl
@@ -0,0 +1,76 @@
+source "../tests/includes/init-tests.tcl"
+
+test "Check acceptable replica-priority values" {
+ foreach_redis_id id {
+ if {$id == $master_id} continue
+
+ # ensure replica-announced accepts yes and no
+ catch {R $id CONFIG SET replica-announced no} e
+ if {$e ne "OK"} {
+ fail "Unable to set replica-announced to no"
+ }
+ catch {R $id CONFIG SET replica-announced yes} e
+ if {$e ne "OK"} {
+ fail "Unable to set replica-announced to yes"
+ }
+
+ # ensure a random value throw error
+ catch {R $id CONFIG SET replica-announced 321} e
+ if {$e eq "OK"} {
+ fail "Able to set replica-announced with something else than yes or no (321) whereas it should not be possible"
+ }
+ catch {R $id CONFIG SET replica-announced a3b2c1} e
+ if {$e eq "OK"} {
+ fail "Able to set replica-announced with something else than yes or no (a3b2c1) whereas it should not be possible"
+ }
+
+ # test only the first redis replica, no need to double test
+ break
+ }
+}
+
+proc 10_test_number_of_replicas {n_replicas_expected} {
+ test "Check sentinel replies with $n_replicas_expected replicas" {
+ # ensure sentinels replies with the right number of replicas
+ foreach_sentinel_id id {
+ S $id sentinel debug info-period 100
+ S $id sentinel debug default-down-after 1000
+ S $id sentinel debug publish-period 100
+ set len [llength [S $id SENTINEL REPLICAS mymaster]]
+ wait_for_condition 200 100 {
+ [llength [S $id SENTINEL REPLICAS mymaster]] == $n_replicas_expected
+ } else {
+ fail "Sentinel replies with a wrong number of replicas with replica-announced=yes (expected $n_replicas_expected but got $len) on sentinel $id"
+ }
+ }
+ }
+}
+
+proc 10_set_replica_announced {master_id announced n_replicas} {
+ test "Set replica-announced=$announced on $n_replicas replicas" {
+ set i 0
+ foreach_redis_id id {
+ if {$id == $master_id} continue
+ #puts "set replica-announce=$announced on redis #$id"
+ R $id CONFIG SET replica-announced "$announced"
+ incr i
+ if { $n_replicas!="all" && $i >= $n_replicas } { break }
+ }
+ }
+}
+
+# ensure all replicas are announced
+10_set_replica_announced $master_id "yes" "all"
+# ensure all replicas are announced by sentinels
+10_test_number_of_replicas 4
+
+# ensure the first 2 replicas are not announced
+10_set_replica_announced $master_id "no" 2
+# ensure sentinels are not announcing the first 2 replicas that have been set unannounced
+10_test_number_of_replicas 2
+
+# ensure all replicas are announced
+10_set_replica_announced $master_id "yes" "all"
+# ensure all replicas are not announced by sentinels
+10_test_number_of_replicas 4
+
diff --git a/tests/sentinel/tests/11-port-0.tcl b/tests/sentinel/tests/11-port-0.tcl
new file mode 100644
index 0000000..a3e8bdb
--- /dev/null
+++ b/tests/sentinel/tests/11-port-0.tcl
@@ -0,0 +1,33 @@
+source "../tests/includes/init-tests.tcl"
+
+test "Start/Stop sentinel on same port with a different runID should not change the total number of sentinels" {
+ set sentinel_id [expr $::instances_count-1]
+ # Kill sentinel instance
+ kill_instance sentinel $sentinel_id
+
+ # Delete line with myid in sentinels config file
+ set orgfilename [file join "sentinel_$sentinel_id" "sentinel.conf"]
+ set tmpfilename "sentinel.conf_tmp"
+ set dirname "sentinel_$sentinel_id"
+
+ delete_lines_with_pattern $orgfilename $tmpfilename "myid"
+
+ # Get count of total sentinels
+ set a [S 0 SENTINEL master mymaster]
+ set original_count [lindex $a 33]
+
+ # Restart sentinel with the modified config file
+ set pid [exec_instance "sentinel" $dirname $orgfilename]
+ lappend ::pids $pid
+
+ after 1000
+
+ # Get new count of total sentinel
+ set b [S 0 SENTINEL master mymaster]
+ set curr_count [lindex $b 33]
+
+ # If the count is not the same then fail the test
+ if {$original_count != $curr_count} {
+ fail "Sentinel count is incorrect, original count being $original_count and current count is $curr_count"
+ }
+}
diff --git a/tests/sentinel/tests/12-master-reboot.tcl b/tests/sentinel/tests/12-master-reboot.tcl
new file mode 100644
index 0000000..1fdd91d
--- /dev/null
+++ b/tests/sentinel/tests/12-master-reboot.tcl
@@ -0,0 +1,103 @@
+# Check the basic monitoring and failover capabilities.
+source "../tests/includes/init-tests.tcl"
+
+
+if {$::simulate_error} {
+ test "This test will fail" {
+ fail "Simulated error"
+ }
+}
+
+
+# Reboot an instance previously in very short time but do not check if it is loading
+proc reboot_instance {type id} {
+ set dirname "${type}_${id}"
+ set cfgfile [file join $dirname $type.conf]
+ set port [get_instance_attrib $type $id port]
+
+ # Execute the instance with its old setup and append the new pid
+ # file for cleanup.
+ set pid [exec_instance $type $dirname $cfgfile]
+ set_instance_attrib $type $id pid $pid
+ lappend ::pids $pid
+
+ # Check that the instance is running
+ if {[server_is_up 127.0.0.1 $port 100] == 0} {
+ set logfile [file join $dirname log.txt]
+ puts [exec tail $logfile]
+ abort_sentinel_test "Problems starting $type #$id: ping timeout, maybe server start failed, check $logfile"
+ }
+
+ # Connect with it with a fresh link
+ set link [redis 127.0.0.1 $port 0 $::tls]
+ $link reconnect 1
+ set_instance_attrib $type $id link $link
+}
+
+
+test "Master reboot in very short time" {
+ set old_port [RPort $master_id]
+ set addr [S 0 SENTINEL GET-MASTER-ADDR-BY-NAME mymaster]
+ assert {[lindex $addr 1] == $old_port}
+
+ R $master_id debug populate 10000
+ R $master_id bgsave
+ R $master_id config set key-load-delay 1500
+ R $master_id config set loading-process-events-interval-bytes 1024
+ R $master_id config rewrite
+
+ foreach_sentinel_id id {
+ S $id SENTINEL SET mymaster master-reboot-down-after-period 5000
+ S $id sentinel debug ping-period 500
+ S $id sentinel debug ask-period 500
+ }
+
+ kill_instance redis $master_id
+ reboot_instance redis $master_id
+
+ foreach_sentinel_id id {
+ wait_for_condition 1000 100 {
+ [lindex [S $id SENTINEL GET-MASTER-ADDR-BY-NAME mymaster] 1] != $old_port
+ } else {
+ fail "At least one Sentinel did not receive failover info"
+ }
+ }
+
+ set addr [S 0 SENTINEL GET-MASTER-ADDR-BY-NAME mymaster]
+ set master_id [get_instance_id_by_port redis [lindex $addr 1]]
+
+ # Make sure the instance load all the dataset
+ while 1 {
+ catch {[$link ping]} retval
+ if {[string match {*LOADING*} $retval]} {
+ after 100
+ continue
+ } else {
+ break
+ }
+ }
+}
+
+test "New master [join $addr {:}] role matches" {
+ assert {[RI $master_id role] eq {master}}
+}
+
+test "All the other slaves now point to the new master" {
+ foreach_redis_id id {
+ if {$id != $master_id && $id != 0} {
+ wait_for_condition 1000 50 {
+ [RI $id master_port] == [lindex $addr 1]
+ } else {
+ fail "Redis ID $id not configured to replicate with new master"
+ }
+ }
+ }
+}
+
+test "The old master eventually gets reconfigured as a slave" {
+ wait_for_condition 1000 50 {
+ [RI 0 master_port] == [lindex $addr 1]
+ } else {
+ fail "Old master not reconfigured as slave of new master"
+ }
+} \ No newline at end of file
diff --git a/tests/sentinel/tests/13-info-command.tcl b/tests/sentinel/tests/13-info-command.tcl
new file mode 100644
index 0000000..ef9dc01
--- /dev/null
+++ b/tests/sentinel/tests/13-info-command.tcl
@@ -0,0 +1,47 @@
+source "../tests/includes/init-tests.tcl"
+
+test "info command with at most one argument" {
+ set subCommandList {}
+ foreach arg {"" "all" "default" "everything"} {
+ if {$arg == ""} {
+ set info [S 0 info]
+ } else {
+ set info [S 0 info $arg]
+ }
+ assert { [string match "*redis_version*" $info] }
+ assert { [string match "*maxclients*" $info] }
+ assert { [string match "*used_cpu_user*" $info] }
+ assert { [string match "*sentinel_tilt*" $info] }
+ assert { ![string match "*used_memory*" $info] }
+ assert { ![string match "*rdb_last_bgsave*" $info] }
+ assert { ![string match "*master_repl_offset*" $info] }
+ assert { ![string match "*cluster_enabled*" $info] }
+ }
+}
+
+test "info command with one sub-section" {
+ set info [S 0 info cpu]
+ assert { [string match "*used_cpu_user*" $info] }
+ assert { ![string match "*sentinel_tilt*" $info] }
+ assert { ![string match "*redis_version*" $info] }
+
+ set info [S 0 info sentinel]
+ assert { [string match "*sentinel_tilt*" $info] }
+ assert { ![string match "*used_cpu_user*" $info] }
+ assert { ![string match "*redis_version*" $info] }
+}
+
+test "info command with multiple sub-sections" {
+ set info [S 0 info server sentinel replication]
+ assert { [string match "*redis_version*" $info] }
+ assert { [string match "*sentinel_tilt*" $info] }
+ assert { ![string match "*used_memory*" $info] }
+ assert { ![string match "*used_cpu_user*" $info] }
+
+ set info [S 0 info cpu all]
+ assert { [string match "*used_cpu_user*" $info] }
+ assert { [string match "*sentinel_tilt*" $info] }
+ assert { [string match "*redis_version*" $info] }
+ assert { ![string match "*used_memory*" $info] }
+ assert { ![string match "*master_repl_offset*" $info] }
+}
diff --git a/tests/sentinel/tests/helpers/check_leaked_fds.tcl b/tests/sentinel/tests/helpers/check_leaked_fds.tcl
new file mode 100755
index 0000000..482b3e0
--- /dev/null
+++ b/tests/sentinel/tests/helpers/check_leaked_fds.tcl
@@ -0,0 +1,79 @@
+#!/usr/bin/env tclsh
+#
+# This script detects file descriptors that have leaked from a parent process.
+#
+# Our goal is to detect file descriptors that were opened by the parent and
+# not cleaned up prior to exec(), but not file descriptors that were inherited
+# from the grandparent which the parent knows nothing about. To do that, we
+# look up every potential leak and try to match it against open files by the
+# grandparent process.
+
+# Get PID of parent process
+proc get_parent_pid {_pid} {
+ set fd [open "/proc/$_pid/status" "r"]
+ set content [read $fd]
+ close $fd
+
+ if {[regexp {\nPPid:\s+(\d+)} $content _ ppid]} {
+ return $ppid
+ }
+
+ error "failed to get parent pid"
+}
+
+# Read symlink to get info about the specified fd of the specified process.
+# The result can be the file name or an arbitrary string that identifies it.
+# When not able to read, an empty string is returned.
+proc get_fdlink {_pid fd} {
+ if { [catch {set fdlink [file readlink "/proc/$_pid/fd/$fd"]} err] } {
+ return ""
+ }
+ return $fdlink
+}
+
+# Linux only
+set os [exec uname]
+if {$os != "Linux"} {
+ puts "Only Linux is supported."
+ exit 0
+}
+
+if {![info exists env(LEAKED_FDS_FILE)]} {
+ puts "Missing LEAKED_FDS_FILE environment variable."
+ exit 0
+}
+
+set outfile $::env(LEAKED_FDS_FILE)
+set parent_pid [get_parent_pid [pid]]
+set grandparent_pid [get_parent_pid $parent_pid]
+set leaked_fds {}
+
+# Look for fds that were directly inherited from our parent but not from
+# our grandparent (tcl)
+foreach fd [glob -tails -directory "/proc/self/fd" *] {
+ # Ignore stdin/stdout/stderr
+ if {$fd == 0 || $fd == 1 || $fd == 2} {
+ continue
+ }
+
+ set fdlink [get_fdlink "self" $fd]
+ if {$fdlink == ""} {
+ continue
+ }
+
+ # We ignore fds that existed in the grandparent, or fds that don't exist
+ # in our parent (Sentinel process).
+ if {[get_fdlink $grandparent_pid $fd] == $fdlink ||
+ [get_fdlink $parent_pid $fd] != $fdlink} {
+ continue
+ }
+
+ lappend leaked_fds [list $fd $fdlink]
+}
+
+# Produce report only if we found leaks
+if {[llength $leaked_fds] > 0} {
+ set fd [open $outfile "w"]
+ puts $fd [join $leaked_fds "\n"]
+ close $fd
+}
diff --git a/tests/sentinel/tests/includes/init-tests.tcl b/tests/sentinel/tests/includes/init-tests.tcl
new file mode 100644
index 0000000..ddb1319
--- /dev/null
+++ b/tests/sentinel/tests/includes/init-tests.tcl
@@ -0,0 +1,63 @@
+# Initialization tests -- most units will start including this.
+source "../tests/includes/utils.tcl"
+
+test "(init) Restart killed instances" {
+ restart_killed_instances
+}
+
+test "(init) Remove old master entry from sentinels" {
+ foreach_sentinel_id id {
+ catch {S $id SENTINEL REMOVE mymaster}
+ }
+}
+
+set redis_slaves [expr $::instances_count - 1]
+test "(init) Create a master-slaves cluster of [expr $redis_slaves+1] instances" {
+ create_redis_master_slave_cluster [expr {$redis_slaves+1}]
+}
+set master_id 0
+
+test "(init) Sentinels can start monitoring a master" {
+ set sentinels [llength $::sentinel_instances]
+ set quorum [expr {$sentinels/2+1}]
+ foreach_sentinel_id id {
+ S $id SENTINEL MONITOR mymaster \
+ [get_instance_attrib redis $master_id host] \
+ [get_instance_attrib redis $master_id port] $quorum
+ }
+ foreach_sentinel_id id {
+ assert {[S $id sentinel master mymaster] ne {}}
+ S $id SENTINEL SET mymaster down-after-milliseconds 2000
+ S $id SENTINEL SET mymaster failover-timeout 10000
+ S $id SENTINEL debug tilt-period 5000
+ S $id SENTINEL SET mymaster parallel-syncs 10
+ if {$::leaked_fds_file != "" && [exec uname] == "Linux"} {
+ S $id SENTINEL SET mymaster notification-script ../../tests/helpers/check_leaked_fds.tcl
+ S $id SENTINEL SET mymaster client-reconfig-script ../../tests/helpers/check_leaked_fds.tcl
+ }
+ }
+}
+
+test "(init) Sentinels can talk with the master" {
+ foreach_sentinel_id id {
+ wait_for_condition 1000 50 {
+ [catch {S $id SENTINEL GET-MASTER-ADDR-BY-NAME mymaster}] == 0
+ } else {
+ fail "Sentinel $id can't talk with the master."
+ }
+ }
+}
+
+test "(init) Sentinels are able to auto-discover other sentinels" {
+ verify_sentinel_auto_discovery
+}
+
+test "(init) Sentinels are able to auto-discover slaves" {
+ foreach_sentinel_id id {
+ wait_for_condition 1000 50 {
+ [dict get [S $id SENTINEL MASTER mymaster] num-slaves] == $redis_slaves
+ } else {
+ fail "At least some sentinel can't detect some slave"
+ }
+ }
+}
diff --git a/tests/sentinel/tests/includes/sentinel.conf b/tests/sentinel/tests/includes/sentinel.conf
new file mode 100644
index 0000000..1275236
--- /dev/null
+++ b/tests/sentinel/tests/includes/sentinel.conf
@@ -0,0 +1,9 @@
+# assume master is down after being unresponsive for 20s
+sentinel down-after-milliseconds setmaster 20000
+# reconfigure one slave at a time
+sentinel parallel-syncs setmaster 2
+# wait for 4m before assuming failover went wrong
+sentinel failover-timeout setmaster 240000
+# monitoring set
+sentinel monitor setmaster 10.0.0.1 30000 2
+
diff --git a/tests/sentinel/tests/includes/start-init-tests.tcl b/tests/sentinel/tests/includes/start-init-tests.tcl
new file mode 100644
index 0000000..b052350
--- /dev/null
+++ b/tests/sentinel/tests/includes/start-init-tests.tcl
@@ -0,0 +1,18 @@
+test "(start-init) Flush config and compare rewrite config file lines" {
+ foreach_sentinel_id id {
+ assert_match "OK" [S $id SENTINEL FLUSHCONFIG]
+ set file1 ../tests/includes/sentinel.conf
+ set file2 [file join "sentinel_${id}" "sentinel.conf"]
+ set fh1 [open $file1 r]
+ set fh2 [open $file2 r]
+ while {[gets $fh1 line1]} {
+ if {[gets $fh2 line2]} {
+ assert [string equal $line1 $line2]
+ } else {
+ fail "sentinel config file rewrite sequence changed"
+ }
+ }
+ close $fh1
+ close $fh2
+ }
+} \ No newline at end of file
diff --git a/tests/sentinel/tests/includes/utils.tcl b/tests/sentinel/tests/includes/utils.tcl
new file mode 100644
index 0000000..adfd91c
--- /dev/null
+++ b/tests/sentinel/tests/includes/utils.tcl
@@ -0,0 +1,22 @@
+proc restart_killed_instances {} {
+ foreach type {redis sentinel} {
+ foreach_${type}_id id {
+ if {[get_instance_attrib $type $id pid] == -1} {
+ puts -nonewline "$type/$id "
+ flush stdout
+ restart_instance $type $id
+ }
+ }
+ }
+}
+
+proc verify_sentinel_auto_discovery {} {
+ set sentinels [llength $::sentinel_instances]
+ foreach_sentinel_id id {
+ wait_for_condition 1000 50 {
+ [dict get [S $id SENTINEL MASTER mymaster] num-other-sentinels] == ($sentinels-1)
+ } else {
+ fail "At least some sentinel can't detect some other sentinel"
+ }
+ }
+}
diff --git a/tests/sentinel/tmp/.gitignore b/tests/sentinel/tmp/.gitignore
new file mode 100644
index 0000000..f581f73
--- /dev/null
+++ b/tests/sentinel/tmp/.gitignore
@@ -0,0 +1,2 @@
+redis_*
+sentinel_*