summaryrefslogtreecommitdiffstats
path: root/utils/cluster_fail_time.tcl
blob: 87399495f8a988b2a17be4a8f1e7d84fa0253a2c (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
# This simple script is used in order to estimate the average PFAIL->FAIL
# state switch after a failure.

set ::sleep_time 10     ; # How much to sleep to trigger PFAIL.
set ::fail_port 30016   ; # Node to put in sleep.
set ::other_port 30001  ; # Node to use to monitor the flag switch.

proc avg vector {
    set sum 0.0
    foreach x $vector {
        set sum [expr {$sum+$x}]
    }
    expr {$sum/[llength $vector]}
}

set samples {}
while 1 {
    exec redis-cli -p $::fail_port debug sleep $::sleep_time > /dev/null &

    # Wait for fail? to appear.
    while 1 {
        set output [exec redis-cli -p $::other_port cluster nodes]
        if {[string match {*fail\?*} $output]} break
        after 100
    }

    puts "FAIL?"
    set start [clock milliseconds]

    # Wait for fail? to disappear.
    while 1 {
        set output [exec redis-cli -p $::other_port cluster nodes]
        if {![string match {*fail\?*} $output]} break
        after 100
    }

    puts "FAIL"
    set now [clock milliseconds]
    set elapsed [expr {$now-$start}]
    puts $elapsed
    lappend samples $elapsed

    puts "AVG([llength $samples]): [avg $samples]"

    # Wait for the instance to be available again.
    exec redis-cli -p $::fail_port ping

    # Wait for the fail flag to be cleared.
    after 2000
}