summaryrefslogtreecommitdiffstats
path: root/tests/integration/psync2-pingoff.tcl
blob: 3589d07e75fb5772d79a3f4cf4c997f317b7f1f0 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
# These tests were added together with the meaningful offset implementation
# in redis 6.0.0, which was later abandoned in 6.0.4, they used to test that
# servers are able to PSYNC with replicas even if the replication stream has
# PINGs at the end which present in one sever and missing on another.
# We keep these tests just because they reproduce edge cases in the replication
# logic in hope they'll be able to spot some problem in the future.

start_server {tags {"psync2 external:skip"}} {
start_server {} {
    # Config
    set debug_msg 0                 ; # Enable additional debug messages

    for {set j 0} {$j < 2} {incr j} {
        set R($j) [srv [expr 0-$j] client]
        set R_host($j) [srv [expr 0-$j] host]
        set R_port($j) [srv [expr 0-$j] port]
        $R($j) CONFIG SET repl-ping-replica-period 1
        if {$debug_msg} {puts "Log file: [srv [expr 0-$j] stdout]"}
    }

    # Setup replication
    test "PSYNC2 pingoff: setup" {
        $R(1) replicaof $R_host(0) $R_port(0)
        $R(0) set foo bar
        wait_for_condition 50 1000 {
            [status $R(1) master_link_status] == "up" &&
            [$R(0) dbsize] == 1 && [$R(1) dbsize] == 1
        } else {
            fail "Replicas not replicating from master"
        }
    }

    test "PSYNC2 pingoff: write and wait replication" {
        $R(0) INCR counter
        $R(0) INCR counter
        $R(0) INCR counter
        wait_for_condition 50 1000 {
            [$R(0) GET counter] eq [$R(1) GET counter]
        } else {
            fail "Master and replica don't agree about counter"
        }
    }

    # In this test we'll make sure the replica will get stuck, but with
    # an active connection: this way the master will continue to send PINGs
    # every second (we modified the PING period earlier)
    test "PSYNC2 pingoff: pause replica and promote it" {
        $R(1) MULTI
        $R(1) DEBUG SLEEP 5
        $R(1) SLAVEOF NO ONE
        $R(1) EXEC
        $R(1) ping ; # Wait for it to return back available
    }

    test "Make the old master a replica of the new one and check conditions" {
        # We set the new master's ping period to a high value, so that there's
        # no chance for a race condition of sending a PING in between the two
        # INFO calls in the assert for master_repl_offset match below.
        $R(1) CONFIG SET repl-ping-replica-period 1000

        assert_equal [status $R(1) sync_full] 0
        $R(0) REPLICAOF $R_host(1) $R_port(1)

        wait_for_condition 50 1000 {
            [status $R(0) master_link_status] == "up"
        } else {
            fail "The new master was not able to sync"
        }

        # make sure replication is still alive and kicking
        $R(1) incr x
        wait_for_condition 50 1000 {
            [status $R(0) loading] == 0 &&
            [$R(0) get x] == 1
        } else {
            fail "replica didn't get incr"
        }
        assert_equal [status $R(0) master_repl_offset] [status $R(1) master_repl_offset]
    }
}}


start_server {tags {"psync2 external:skip"}} {
start_server {} {
start_server {} {
start_server {} {
start_server {} {
    test {test various edge cases of repl topology changes with missing pings at the end} {
        set master [srv -4 client]
        set master_host [srv -4 host]
        set master_port [srv -4 port]
        set replica1 [srv -3 client]
        set replica2 [srv -2 client]
        set replica3 [srv -1 client]
        set replica4 [srv -0 client]

        $replica1 replicaof $master_host $master_port
        $replica2 replicaof $master_host $master_port
        $replica3 replicaof $master_host $master_port
        $replica4 replicaof $master_host $master_port
        wait_for_condition 50 1000 {
            [status $master connected_slaves] == 4
        } else {
            fail "replicas didn't connect"
        }

        $master incr x
        wait_for_condition 50 1000 {
            [$replica1 get x] == 1 && [$replica2 get x] == 1 &&
            [$replica3 get x] == 1 && [$replica4 get x] == 1
        } else {
            fail "replicas didn't get incr"
        }

        # disconnect replica1 and replica2
        # and wait for the master to send a ping to replica3 and replica4
        $replica1 replicaof no one
        $replica2 replicaof 127.0.0.1 1 ;# we can't promote it to master since that will cycle the replication id
        $master config set repl-ping-replica-period 1
        set replofs [status $master master_repl_offset]
        wait_for_condition 50 100 {
            [status $replica3 master_repl_offset] > $replofs &&
            [status $replica4 master_repl_offset] > $replofs
        } else {
            fail "replica didn't sync in time"
        }

        # make everyone sync from the replica1 that didn't get the last ping from the old master
        # replica4 will keep syncing from the old master which now syncs from replica1
        # and replica2 will re-connect to the old master (which went back in time)
        set new_master_host [srv -3 host]
        set new_master_port [srv -3 port]
        $replica3 replicaof $new_master_host $new_master_port
        $master replicaof $new_master_host $new_master_port
        $replica2 replicaof $master_host $master_port
        wait_for_condition 50 1000 {
            [status $replica2 master_link_status] == "up" &&
            [status $replica3 master_link_status] == "up" &&
            [status $replica4 master_link_status] == "up" &&
            [status $master master_link_status] == "up"
        } else {
            fail "replicas didn't connect"
        }

        # make sure replication is still alive and kicking
        $replica1 incr x
        wait_for_condition 50 1000 {
            [$replica2 get x] == 2 &&
            [$replica3 get x] == 2 &&
            [$replica4 get x] == 2 &&
            [$master get x] == 2
        } else {
            fail "replicas didn't get incr"
        }

        # make sure we have the right amount of full syncs
        assert_equal [status $master sync_full] 6
        assert_equal [status $replica1 sync_full] 2
        assert_equal [status $replica2 sync_full] 0
        assert_equal [status $replica3 sync_full] 0
        assert_equal [status $replica4 sync_full] 0

        # force psync
        $master client kill type master
        $replica2 client kill type master
        $replica3 client kill type master
        $replica4 client kill type master

        # make sure replication is still alive and kicking
        $replica1 incr x
        wait_for_condition 50 1000 {
            [$replica2 get x] == 3 &&
            [$replica3 get x] == 3 &&
            [$replica4 get x] == 3 &&
            [$master get x] == 3
        } else {
            fail "replicas didn't get incr"
        }

        # make sure we have the right amount of full syncs
        assert_equal [status $master sync_full] 6
        assert_equal [status $replica1 sync_full] 2
        assert_equal [status $replica2 sync_full] 0
        assert_equal [status $replica3 sync_full] 0
        assert_equal [status $replica4 sync_full] 0
}
}}}}}

start_server {tags {"psync2 external:skip"}} {
start_server {} {
start_server {} {

    for {set j 0} {$j < 3} {incr j} {
        set R($j) [srv [expr 0-$j] client]
        set R_host($j) [srv [expr 0-$j] host]
        set R_port($j) [srv [expr 0-$j] port]
        $R($j) CONFIG SET repl-ping-replica-period 1
    }

    test "Chained replicas disconnect when replica re-connect with the same master" {
        # Add a second replica as a chained replica of the current replica
        $R(1) replicaof $R_host(0) $R_port(0)
        $R(2) replicaof $R_host(1) $R_port(1)
        wait_for_condition 50 1000 {
            [status $R(2) master_link_status] == "up"
        } else {
            fail "Chained replica not replicating from its master"
        }

        # Do a write on the master, and wait for the master to
        # send some PINGs to its replica
        $R(0) INCR counter2
        set replofs [status $R(0) master_repl_offset]
        wait_for_condition 50 100 {
            [status $R(1) master_repl_offset] > $replofs &&
            [status $R(2) master_repl_offset] > $replofs
        } else {
            fail "replica didn't sync in time"
        }
        set sync_partial_master [status $R(0) sync_partial_ok]
        set sync_partial_replica [status $R(1) sync_partial_ok]
        $R(0) CONFIG SET repl-ping-replica-period 100

        # Disconnect the master's direct replica
        $R(0) client kill type replica
        wait_for_condition 50 1000 {
            [status $R(1) master_link_status] == "up" && 
            [status $R(2) master_link_status] == "up" &&
            [status $R(0) sync_partial_ok] == $sync_partial_master + 1 &&
            [status $R(1) sync_partial_ok] == $sync_partial_replica
        } else {
            fail "Disconnected replica failed to PSYNC with master"
        }

        # Verify that the replica and its replica's meaningful and real
        # offsets match with the master
        assert_equal [status $R(0) master_repl_offset] [status $R(1) master_repl_offset]
        assert_equal [status $R(0) master_repl_offset] [status $R(2) master_repl_offset]

        # make sure replication is still alive and kicking
        $R(0) incr counter2
        wait_for_condition 50 1000 {
            [$R(1) get counter2] == 2 && [$R(2) get counter2] == 2
        } else {
            fail "replicas didn't get incr"
        }
        assert_equal [status $R(0) master_repl_offset] [status $R(1) master_repl_offset]
        assert_equal [status $R(0) master_repl_offset] [status $R(2) master_repl_offset]
    }
}}}