tests/integration/replication-buffer.tcl


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307

# This test group aims to test that all replicas share one global replication buffer,
# two replicas don't make replication buffer size double, and when there is no replica,
# replica buffer will shrink.
start_server {tags {"repl external:skip"}} {
start_server {} {
start_server {} {
start_server {} {
    set replica1 [srv -3 client]
    set replica2 [srv -2 client]
    set replica3 [srv -1 client]

    set master [srv 0 client]
    set master_host [srv 0 host]
    set master_port [srv 0 port]

    $master config set save ""
    $master config set repl-backlog-size 16384
    $master config set repl-diskless-sync-delay 5
    $master config set repl-diskless-sync-max-replicas 1
    $master config set client-output-buffer-limit "replica 0 0 0"

    # Make sure replica3 is synchronized with master
    $replica3 replicaof $master_host $master_port
    wait_for_sync $replica3

    # Generating RDB will take some 100 seconds
    $master config set rdb-key-save-delay 1000000
    populate 100 "" 16

    # Make sure replica1 and replica2 are waiting bgsave
    $master config set repl-diskless-sync-max-replicas 2
    $replica1 replicaof $master_host $master_port
    $replica2 replicaof $master_host $master_port
    wait_for_condition 50 100 {
        ([s rdb_bgsave_in_progress] == 1) &&
        [lindex [$replica1 role] 3] eq {sync} &&
        [lindex [$replica2 role] 3] eq {sync}
    } else {
        fail "fail to sync with replicas"
    }

    test {All replicas share one global replication buffer} {
        set before_used [s used_memory]
        populate 1024 "" 1024 ; # Write extra 1M data
        # New data uses 1M memory, but all replicas use only one
        # replication buffer, so all replicas output memory is not
        # more than double of replication buffer.
        set repl_buf_mem [s mem_total_replication_buffers]
        set extra_mem [expr {[s used_memory]-$before_used-1024*1024}]
        assert {$extra_mem < 2*$repl_buf_mem}

        # Kill replica1, replication_buffer will not become smaller
        catch {$replica1 shutdown nosave}
        wait_for_condition 50 100 {
            [s connected_slaves] eq {2}
        } else {
            fail "replica doesn't disconnect with master"
        }
        assert_equal $repl_buf_mem [s mem_total_replication_buffers]
    }

    test {Replication buffer will become smaller when no replica uses} {
        # Make sure replica3 catch up with the master
        wait_for_ofs_sync $master $replica3

        set repl_buf_mem [s mem_total_replication_buffers]
        # Kill replica2, replication_buffer will become smaller
        catch {$replica2 shutdown nosave}
        wait_for_condition 50 100 {
            [s connected_slaves] eq {1}
        } else {
            fail "replica2 doesn't disconnect with master"
        }
        assert {[expr $repl_buf_mem - 1024*1024] > [s mem_total_replication_buffers]}
    }
}
}
}
}

# This test group aims to test replication backlog size can outgrow the backlog
# limit config if there is a slow replica which keep massive replication buffers,
# and replicas could use this replication buffer (beyond backlog config) for
# partial re-synchronization. Of course, replication backlog memory also can
# become smaller when master disconnects with slow replicas since output buffer
# limit is reached.
start_server {tags {"repl external:skip"}} {
start_server {} {
start_server {} {
    set replica1 [srv -2 client]
    set replica1_pid [s -2 process_id]
    set replica2 [srv -1 client]
    set replica2_pid [s -1 process_id]

    set master [srv 0 client]
    set master_host [srv 0 host]
    set master_port [srv 0 port]

    $master config set save ""
    $master config set repl-backlog-size 16384
    $master config set client-output-buffer-limit "replica 0 0 0"

    # Executing 'debug digest' on master which has many keys costs much time
    # (especially in valgrind), this causes that replica1 and replica2 disconnect
    # with master.
    $master config set repl-timeout 1000
    $replica1 config set repl-timeout 1000
    $replica2 config set repl-timeout 1000

    $replica1 replicaof $master_host $master_port
    wait_for_sync $replica1

    test {Replication backlog size can outgrow the backlog limit config} {
        # Generating RDB will take 1000 seconds
        $master config set rdb-key-save-delay 1000000
        populate 1000 master 10000
        $replica2 replicaof $master_host $master_port
        # Make sure replica2 is waiting bgsave
        wait_for_condition 5000 100 {
            ([s rdb_bgsave_in_progress] == 1) &&
            [lindex [$replica2 role] 3] eq {sync}
        } else {
            fail "fail to sync with replicas"
        }
        # Replication actual backlog grow more than backlog setting since
        # the slow replica2 kept replication buffer.
        populate 10000 master 10000
        assert {[s repl_backlog_histlen] > [expr 10000*10000]}
    }

    # Wait replica1 catch up with the master
    wait_for_condition 1000 100 {
        [s -2 master_repl_offset] eq [s master_repl_offset]
    } else {
        fail "Replica offset didn't catch up with the master after too long time"
    }

    test {Replica could use replication buffer (beyond backlog config) for partial resynchronization} {
        # replica1 disconnects with master
        $replica1 replicaof [srv -1 host] [srv -1 port]
        # Write a mass of data that exceeds repl-backlog-size
        populate 10000 master 10000
        # replica1 reconnects with master
        $replica1 replicaof $master_host $master_port
        wait_for_condition 1000 100 {
            [s -2 master_repl_offset] eq [s master_repl_offset]
        } else {
            fail "Replica offset didn't catch up with the master after too long time"
        }

        # replica2 still waits for bgsave ending
        assert {[s rdb_bgsave_in_progress] eq {1} && [lindex [$replica2 role] 3] eq {sync}}
        # master accepted replica1 partial resync
        assert_equal [s sync_partial_ok] {1}
        assert_equal [$master debug digest] [$replica1 debug digest]
    }

    test {Replication backlog memory will become smaller if disconnecting with replica} {
        assert {[s repl_backlog_histlen] > [expr 2*10000*10000]}
        assert_equal [s connected_slaves] {2}

        pause_process $replica2_pid
        r config set client-output-buffer-limit "replica 128k 0 0"
        # trigger output buffer limit check
        r set key [string repeat A [expr 64*1024]]
        # master will close replica2's connection since replica2's output
        # buffer limit is reached, so there only is replica1.
        wait_for_condition 100 100 {
            [s connected_slaves] eq {1}
        } else {
            fail "master didn't disconnect with replica2"
        }

        # Since we trim replication backlog inrementally, replication backlog
        # memory may take time to be reclaimed.
        wait_for_condition 1000 100 {
            [s repl_backlog_histlen] < [expr 10000*10000]
        } else {
            fail "Replication backlog memory is not smaller"
        }
        resume_process $replica2_pid
    }
    # speed up termination
    $master config set shutdown-timeout 0
}
}
}

test {Partial resynchronization is successful even client-output-buffer-limit is less than repl-backlog-size} {
    start_server {tags {"repl external:skip"}} {
        start_server {} {
            r config set save ""
            r config set repl-backlog-size 100mb
            r config set client-output-buffer-limit "replica 512k 0 0"

            set replica [srv -1 client]
            $replica replicaof [srv 0 host] [srv 0 port]
            wait_for_sync $replica

            set big_str [string repeat A [expr 10*1024*1024]] ;# 10mb big string
            r multi
            r client kill type replica
            r set key $big_str
            r set key $big_str
            r debug sleep 2 ;# wait for replica reconnecting
            r exec
            # When replica reconnects with master, master accepts partial resync,
            # and don't close replica client even client output buffer limit is
            # reached.
            r set key $big_str ;# trigger output buffer limit check
            wait_for_ofs_sync r $replica
            # master accepted replica partial resync
            assert_equal [s sync_full] {1}
            assert_equal [s sync_partial_ok] {1}

            r multi
            r set key $big_str
            r set key $big_str
            r exec
            # replica's reply buffer size is more than client-output-buffer-limit but
            # doesn't exceed repl-backlog-size, we don't close replica client.
            wait_for_condition 1000 100 {
                [s -1 master_repl_offset] eq [s master_repl_offset]
            } else {
                fail "Replica offset didn't catch up with the master after too long time"
            }
            assert_equal [s sync_full] {1}
            assert_equal [s sync_partial_ok] {1}
        }
    }
}

# This test was added to make sure big keys added to the backlog do not trigger psync loop.
test {Replica client-output-buffer size is limited to backlog_limit/16 when no replication data is pending} {
    proc client_field {r type f} {
        set client [$r client list type $type]
        if {![regexp $f=(\[a-zA-Z0-9-\]+) $client - res]} {
            error "field $f not found for in $client"
        }
        return $res
    }

    start_server {tags {"repl external:skip"}} {
        start_server {} {
            set replica [srv -1 client]
            set replica_host [srv -1 host]
            set replica_port [srv -1 port]
            set master [srv 0 client]
            set master_host [srv 0 host]
            set master_port [srv 0 port]
            $master config set maxmemory-policy allkeys-lru

            $master config set repl-backlog-size 16384
            $master config set client-output-buffer-limit "replica 32768 32768 60"
            # Key has has to be larger than replica client-output-buffer limit.
            set keysize [expr 256*1024]

            $replica replicaof $master_host $master_port
            wait_for_condition 50 100 {
                [lindex [$replica role] 0] eq {slave} &&
                [string match {*master_link_status:up*} [$replica info replication]]
            } else {
                fail "Can't turn the instance into a replica"
            }

            # Write a big key that is gonna breach the obuf limit and cause the replica to disconnect,
            # then in the same event loop, add at least 16 more keys, and enable eviction, so that the
            # eviction code has a chance to call flushSlavesOutputBuffers, and then run PING to trigger the eviction code
            set _v [prepare_value $keysize]
            $master write "[format_command mset key $_v k1 1 k2 2 k3 3 k4 4 k5 5 k6 6 k7 7 k8 8 k9 9 ka a kb b kc c kd d ke e kf f kg g kh h]config set maxmemory 1\r\nping\r\n"
            $master flush
            $master read
            $master read
            $master read
            wait_for_ofs_sync $master $replica

            # Write another key to force the test to wait for another event loop iteration so that we
            # give the serverCron a chance to disconnect replicas with COB size exceeding the limits
            $master config set maxmemory 0
            $master set key1 1
            wait_for_ofs_sync $master $replica

            assert {[status $master connected_slaves] == 1}

            wait_for_condition 50 100 {
                [client_field $master replica tot-mem] < $keysize
            } else {
                fail "replica client-output-buffer usage is higher than expected."
            }

            # now we expect the replica to re-connect but fail partial sync (it doesn't have large
            # enough COB limit and must result in a full-sync)
            assert {[status $master sync_partial_ok] == 0}

            # Before this fix (#11905), the test would trigger an assertion in 'o->used >= c->ref_block_pos'
            test {The update of replBufBlock's repl_offset is ok - Regression test for #11666} {
                set rd [redis_deferring_client]
                set replid [status $master master_replid]
                set offset [status $master repl_backlog_first_byte_offset]
                $rd psync $replid $offset
                assert_equal {PONG} [$master ping] ;# Make sure the master doesn't crash.
                $rd close
            }
        }
    }
}