summaryrefslogtreecommitdiffstats
path: root/tests/integration/psync2.tcl
blob: a258f1b831900993d640d1fc074ddcac4fbab091 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
proc show_cluster_status {} {
    uplevel 1 {
        # The following is the regexp we use to match the log line
        # time info. Logs are in the following form:
        #
        # 11296:M 25 May 2020 17:37:14.652 # Server initialized
        set log_regexp {^[0-9]+:[A-Z] [0-9]+ [A-z]+ [0-9]+ ([0-9:.]+) .*}
        set repl_regexp {(master|repl|sync|backlog|meaningful|offset)}

        puts "Master ID is $master_id"
        for {set j 0} {$j < 5} {incr j} {
            puts "$j: sync_full: [status $R($j) sync_full]"
            puts "$j: id1      : [status $R($j) master_replid]:[status $R($j) master_repl_offset]"
            puts "$j: id2      : [status $R($j) master_replid2]:[status $R($j) second_repl_offset]"
            puts "$j: backlog  : firstbyte=[status $R($j) repl_backlog_first_byte_offset] len=[status $R($j) repl_backlog_histlen]"
            puts "$j: x var is : [$R($j) GET x]"
            puts "---"
        }

        # Show the replication logs of every instance, interleaving
        # them by the log date.
        #
        # First: load the lines as lists for each instance.
        array set log {}
        for {set j 0} {$j < 5} {incr j} {
            set fd [open $R_log($j)]
            while {[gets $fd l] >= 0} {
                if {[regexp $log_regexp $l] &&
                    [regexp -nocase $repl_regexp $l]} {
                    lappend log($j) $l
                }
            }
            close $fd
        }

        # To interleave the lines, at every step consume the element of
        # the list with the lowest time and remove it. Do it until
        # all the lists are empty.
        #
        # regexp {^[0-9]+:[A-Z] [0-9]+ [A-z]+ [0-9]+ ([0-9:.]+) .*} $l - logdate
        while 1 {
            # Find the log with smallest time.
            set empty 0
            set best 0
            set bestdate {}
            for {set j 0} {$j < 5} {incr j} {
                if {[llength $log($j)] == 0} {
                    incr empty
                    continue
                }
                regexp $log_regexp [lindex $log($j) 0] - date
                if {$bestdate eq {}} {
                    set best $j
                    set bestdate $date
                } else {
                    if {[string compare $bestdate $date] > 0} {
                        set best $j
                        set bestdate $date
                    }
                }
            }
            if {$empty == 5} break ; # Our exit condition: no more logs

            # Emit the one with the smallest time (that is the first
            # event in the time line).
            puts "\[$best port $R_port($best)\] [lindex $log($best) 0]"
            set log($best) [lrange $log($best) 1 end]
        }
    }
}

start_server {tags {"psync2 external:skip"}} {
start_server {} {
start_server {} {
start_server {} {
start_server {} {
    set master_id 0                 ; # Current master
    set start_time [clock seconds]  ; # Test start time
    set counter_value 0             ; # Current value of the Redis counter "x"

    # Config
    set debug_msg 0                 ; # Enable additional debug messages

    set no_exit 0                   ; # Do not exit at end of the test

    set duration 40                 ; # Total test seconds

    set genload 1                   ; # Load master with writes at every cycle

    set genload_time 5000           ; # Writes duration time in ms

    set disconnect 1                ; # Break replication link between random
                                      # master and slave instances while the
                                      # master is loaded with writes.

    set disconnect_period 1000      ; # Disconnect repl link every N ms.

    for {set j 0} {$j < 5} {incr j} {
        set R($j) [srv [expr 0-$j] client]
        set R_host($j) [srv [expr 0-$j] host]
        set R_port($j) [srv [expr 0-$j] port]
        set R_id_from_port($R_port($j)) $j ;# To get a replica index by port
        set R_log($j) [srv [expr 0-$j] stdout]
        if {$debug_msg} {puts "Log file: [srv [expr 0-$j] stdout]"}
    }

    set cycle 0
    while {([clock seconds]-$start_time) < $duration} {
        incr cycle
        test "PSYNC2: --- CYCLE $cycle ---" {}

        # Create a random replication layout.
        # Start with switching master (this simulates a failover).

        # 1) Select the new master.
        set master_id [randomInt 5]
        set used [list $master_id]
        test "PSYNC2: \[NEW LAYOUT\] Set #$master_id as master" {
            $R($master_id) slaveof no one
            $R($master_id) config set repl-ping-replica-period 1 ;# increase the chance that random ping will cause issues
            if {$counter_value == 0} {
                $R($master_id) set x $counter_value
            }
        }

        # Build a lookup with the root master of each replica (head of the chain).
        array set root_master {}
        for {set j 0} {$j < 5} {incr j} {
            set r $j
            while {1} {
                set r_master_port [status $R($r) master_port]
                if {$r_master_port == ""} {
                    set root_master($j) $r
                    break
                }
                set r_master_id $R_id_from_port($r_master_port)
                set r $r_master_id
            }
        }

        # Wait for the newly detached master-replica chain (new master and existing replicas that were
        # already connected to it, to get updated on the new replication id.
        # This is needed to avoid a race that can result in a full sync when a replica that already
        # got an updated repl id, tries to psync from one that's not yet aware of it.
        wait_for_condition 50 1000 {
            ([status $R(0) master_replid] == [status $R($root_master(0)) master_replid]) &&
            ([status $R(1) master_replid] == [status $R($root_master(1)) master_replid]) &&
            ([status $R(2) master_replid] == [status $R($root_master(2)) master_replid]) &&
            ([status $R(3) master_replid] == [status $R($root_master(3)) master_replid]) &&
            ([status $R(4) master_replid] == [status $R($root_master(4)) master_replid])
        } else {
            show_cluster_status
            fail "Replica did not inherit the new replid."
        }

        # Build a lookup with the direct connection master of each replica.
        # First loop that uses random to decide who replicates from who.
        array set slave_to_master {}
        while {[llength $used] != 5} {
            while 1 {
                set slave_id [randomInt 5]
                if {[lsearch -exact $used $slave_id] == -1} break
            }
            set rand [randomInt [llength $used]]
            set mid [lindex $used $rand]
            set slave_to_master($slave_id) $mid
            lappend used $slave_id
        }

        # 2) Attach all the slaves to a random instance
        # Second loop that does the actual SLAVEOF command and make sure execute it in the right order.
        while {[array size slave_to_master] > 0} {
            foreach slave_id [array names slave_to_master] {
                set mid $slave_to_master($slave_id)

                # We only attach the replica to a random instance that already in the old/new chain.
                if {$root_master($mid) == $root_master($master_id)} {
                    # Find a replica that can be attached to the new chain already attached to the new master.
                    # My new master is in the new chain.
                } elseif {$root_master($mid) == $root_master($slave_id)} {
                    # My new master and I are in the old chain.
                } else {
                    # In cycle 1, we do not care about the order.
                    if {$cycle != 1} {
                        # skipping this replica for now to avoid attaching in a bad order
                        # this is done to avoid an unexpected full sync, when we take a
                        # replica that already reconnected to the new chain and got a new replid
                        # and is then set to connect to a master that's still not aware of that new replid
                        continue
                    }
                }

                set master_host $R_host($master_id)
                set master_port $R_port($master_id)

                test "PSYNC2: Set #$slave_id to replicate from #$mid" {
                    $R($slave_id) slaveof $master_host $master_port
                }

                # Wait for replica to be connected before we proceed.
                wait_for_condition 50 1000 {
                    [status $R($slave_id) master_link_status] == "up"
                } else {
                    show_cluster_status
                    fail "Replica not reconnecting."
                }

                set root_master($slave_id) $root_master($mid)
                unset slave_to_master($slave_id)
                break
            }
        }

        # Wait for replicas to sync. so next loop won't get -LOADING error
        wait_for_condition 50 1000 {
            [status $R([expr {($master_id+1)%5}]) master_link_status] == "up" &&
            [status $R([expr {($master_id+2)%5}]) master_link_status] == "up" &&
            [status $R([expr {($master_id+3)%5}]) master_link_status] == "up" &&
            [status $R([expr {($master_id+4)%5}]) master_link_status] == "up"
        } else {
            show_cluster_status
            fail "Replica not reconnecting"
        }

        # 3) Increment the counter and wait for all the instances
        # to converge.
        test "PSYNC2: cluster is consistent after failover" {
            $R($master_id) incr x; incr counter_value
            for {set j 0} {$j < 5} {incr j} {
                wait_for_condition 50 1000 {
                    [$R($j) get x] == $counter_value
                } else {
                    show_cluster_status
                    fail "Instance #$j x variable is inconsistent"
                }
            }
        }

        # 4) Generate load while breaking the connection of random
        # slave-master pairs.
        test "PSYNC2: generate load while killing replication links" {
            set t [clock milliseconds]
            set next_break [expr {$t+$disconnect_period}]
            while {[clock milliseconds]-$t < $genload_time} {
                if {$genload} {
                    $R($master_id) incr x; incr counter_value
                }
                if {[clock milliseconds] == $next_break} {
                    set next_break \
                        [expr {[clock milliseconds]+$disconnect_period}]
                    set slave_id [randomInt 5]
                    if {$disconnect} {
                        $R($slave_id) client kill type master
                        if {$debug_msg} {
                            puts "+++ Breaking link for replica #$slave_id"
                        }
                    }
                }
            }
        }

        # 5) Increment the counter and wait for all the instances
        set x [$R($master_id) get x]
        test "PSYNC2: cluster is consistent after load (x = $x)" {
            for {set j 0} {$j < 5} {incr j} {
                wait_for_condition 50 1000 {
                    [$R($j) get x] == $counter_value
                } else {
                    show_cluster_status
                    fail "Instance #$j x variable is inconsistent"
                }
            }
        }

        # wait for all the slaves to be in sync.
        set masteroff [status $R($master_id) master_repl_offset]
        wait_for_condition 500 100 {
            [status $R(0) master_repl_offset] >= $masteroff &&
            [status $R(1) master_repl_offset] >= $masteroff &&
            [status $R(2) master_repl_offset] >= $masteroff &&
            [status $R(3) master_repl_offset] >= $masteroff &&
            [status $R(4) master_repl_offset] >= $masteroff
        } else {
            show_cluster_status
            fail "Replicas offsets didn't catch up with the master after too long time."
        }

        if {$debug_msg} {
            show_cluster_status
        }

        test "PSYNC2: total sum of full synchronizations is exactly 4" {
            set sum 0
            for {set j 0} {$j < 5} {incr j} {
                incr sum [status $R($j) sync_full]
            }
            if {$sum != 4} {
                show_cluster_status
                assert {$sum == 4}
            }
        }

        # In absence of pings, are the instances really able to have
        # the exact same offset?
        $R($master_id) config set repl-ping-replica-period 3600
        for {set j 0} {$j < 5} {incr j} {
            if {$j == $master_id} continue
            $R($j) config set repl-timeout 10000
        }
        wait_for_condition 500 100 {
            [status $R($master_id) master_repl_offset] == [status $R(0) master_repl_offset] &&
            [status $R($master_id) master_repl_offset] == [status $R(1) master_repl_offset] &&
            [status $R($master_id) master_repl_offset] == [status $R(2) master_repl_offset] &&
            [status $R($master_id) master_repl_offset] == [status $R(3) master_repl_offset] &&
            [status $R($master_id) master_repl_offset] == [status $R(4) master_repl_offset]
        } else {
            show_cluster_status
            fail "Replicas and master offsets were unable to match *exactly*."
        }

        # Limit anyway the maximum number of cycles. This is useful when the
        # test is skipped via --only option of the test suite. In that case
        # we don't want to see many seconds of this test being just skipped.
        if {$cycle > 50} break
    }

    test "PSYNC2: Bring the master back again for next test" {
        $R($master_id) slaveof no one
        set master_host $R_host($master_id)
        set master_port $R_port($master_id)
        for {set j 0} {$j < 5} {incr j} {
            if {$j == $master_id} continue
            $R($j) slaveof $master_host $master_port
        }

        # Wait for replicas to sync. it is not enough to just wait for connected_slaves==4
        # since we might do the check before the master realized that they're disconnected
        wait_for_condition 50 1000 {
            [status $R($master_id) connected_slaves] == 4 &&
            [status $R([expr {($master_id+1)%5}]) master_link_status] == "up" &&
            [status $R([expr {($master_id+2)%5}]) master_link_status] == "up" &&
            [status $R([expr {($master_id+3)%5}]) master_link_status] == "up" &&
            [status $R([expr {($master_id+4)%5}]) master_link_status] == "up"
        } else {
            show_cluster_status
            fail "Replica not reconnecting"
        }
    }

    test "PSYNC2: Partial resync after restart using RDB aux fields" {
        # Pick a random slave
        set slave_id [expr {($master_id+1)%5}]
        set sync_count [status $R($master_id) sync_full]
        set sync_partial [status $R($master_id) sync_partial_ok]
        set sync_partial_err [status $R($master_id) sync_partial_err]
        catch {
            $R($slave_id) config rewrite
            restart_server [expr {0-$slave_id}] true false
            set R($slave_id) [srv [expr {0-$slave_id}] client]
        }
        # note: just waiting for connected_slaves==4 has a race condition since
        # we might do the check before the master realized that the slave disconnected
        wait_for_condition 50 1000 {
            [status $R($master_id) sync_partial_ok] == $sync_partial + 1
        } else {
            puts "prev sync_full: $sync_count"
            puts "prev sync_partial_ok: $sync_partial"
            puts "prev sync_partial_err: $sync_partial_err"
            puts [$R($master_id) info stats]
            show_cluster_status
            fail "Replica didn't partial sync"
        }
        set new_sync_count [status $R($master_id) sync_full]
        assert {$sync_count == $new_sync_count}
    }

    if {$no_exit} {
        while 1 { puts -nonewline .; flush stdout; after 1000}
    }

}}}}}