summaryrefslogtreecommitdiffstats
path: root/tests/integration/shutdown.tcl
blob: 60afc5c7f21345ef9098730bf0c13ef9ac439829 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
# This test suite tests shutdown when there are lagging replicas connected.

# Fill up the OS socket send buffer for the replica connection 1M at a time.
# When the replication buffer memory increases beyond 2M (often after writing 4M
# or so), we assume it's because the OS socket send buffer can't swallow
# anymore.
proc fill_up_os_socket_send_buffer_for_repl {idx} {
    set i 0
    while {1} {
        incr i
        populate 1024 junk$i: 1024 $idx
        after 10
        set buf_size [s $idx mem_total_replication_buffers]
        if {$buf_size > 2*1024*1024} {
            break
        }
    }
}

foreach how {sigterm shutdown} {
    test "Shutting down master waits for replica to catch up ($how)" {
        start_server {} {
            start_server {} {
                set master [srv -1 client]
                set master_host [srv -1 host]
                set master_port [srv -1 port]
                set master_pid [srv -1 pid]
                set replica [srv 0 client]
                set replica_pid [srv 0 pid]

                # Config master.
                $master config set shutdown-timeout 300; # 5min for slow CI
                $master config set repl-backlog-size 1;  # small as possible
                $master config set hz 100;               # cron runs every 10ms

                # Config replica.
                $replica replicaof $master_host $master_port
                wait_for_sync $replica

                # Preparation: Set k to 1 on both master and replica.
                $master set k 1
                wait_for_ofs_sync $master $replica

                # Pause the replica.
                exec kill -SIGSTOP $replica_pid
                after 10

                # Fill up the OS socket send buffer for the replica connection
                # to prevent the following INCR from reaching the replica via
                # the OS.
                fill_up_os_socket_send_buffer_for_repl -1

                # Incr k and immediately shutdown master.
                $master incr k
                switch $how {
                    sigterm {
                        exec kill -SIGTERM $master_pid
                    }
                    shutdown {
                        set rd [redis_deferring_client -1]
                        $rd shutdown
                    }
                }
                wait_for_condition 50 100 {
                    [s -1 shutdown_in_milliseconds] > 0
                } else {
                    fail "Master not indicating ongoing shutdown."
                }

                # Wake up replica and check if master has waited for it.
                after 20; # 2 cron intervals
                exec kill -SIGCONT $replica_pid
                wait_for_condition 300 1000 {
                    [$replica get k] eq 2
                } else {
                    fail "Master exited before replica could catch up."
                }

                # Check shutdown log messages on master
                wait_for_log_messages -1 {"*ready to exit, bye bye*"} 0 100 500
                assert_equal 0 [count_log_message -1 "*Lagging replica*"]
                verify_log_message -1 "*1 of 1 replicas are in sync*" 0
            }
        }
    } {} {repl external:skip}
}

test {Shutting down master waits for replica timeout} {
    start_server {} {
        start_server {} {
            set master [srv -1 client]
            set master_host [srv -1 host]
            set master_port [srv -1 port]
            set master_pid [srv -1 pid]
            set replica [srv 0 client]
            set replica_pid [srv 0 pid]

            # Config master.
            $master config set shutdown-timeout 1; # second

            # Config replica.
            $replica replicaof $master_host $master_port
            wait_for_sync $replica

            # Preparation: Set k to 1 on both master and replica.
            $master set k 1
            wait_for_ofs_sync $master $replica

            # Pause the replica.
            exec kill -SIGSTOP $replica_pid
            after 10

            # Fill up the OS socket send buffer for the replica connection to
            # prevent the following INCR k from reaching the replica via the OS.
            fill_up_os_socket_send_buffer_for_repl -1

            # Incr k and immediately shutdown master.
            $master incr k
            exec kill -SIGTERM $master_pid
            wait_for_condition 50 100 {
                [s -1 shutdown_in_milliseconds] > 0
            } else {
                fail "Master not indicating ongoing shutdown."
            }

            # Let master finish shutting down and check log.
            wait_for_log_messages -1 {"*ready to exit, bye bye*"} 0 100 100
            verify_log_message -1 "*Lagging replica*" 0
            verify_log_message -1 "*0 of 1 replicas are in sync*" 0

            # Wake up replica.
            exec kill -SIGCONT $replica_pid
            assert_equal 1 [$replica get k]
        }
    }
} {} {repl external:skip}

test "Shutting down master waits for replica then fails" {
    start_server {} {
        start_server {} {
            set master [srv -1 client]
            set master_host [srv -1 host]
            set master_port [srv -1 port]
            set master_pid [srv -1 pid]
            set replica [srv 0 client]
            set replica_pid [srv 0 pid]

            # Config master and replica.
            $replica replicaof $master_host $master_port
            wait_for_sync $replica

            # Pause the replica and write a key on master.
            exec kill -SIGSTOP $replica_pid
            after 10
            $master incr k

            # Two clients call blocking SHUTDOWN in parallel.
            set rd1 [redis_deferring_client -1]
            set rd2 [redis_deferring_client -1]
            $rd1 shutdown
            $rd2 shutdown
            set info_clients [$master info clients]
            assert_match "*connected_clients:3*" $info_clients
            assert_match "*blocked_clients:2*" $info_clients

            # Start a very slow initial AOFRW, which will prevent shutdown.
            $master config set rdb-key-save-delay 30000000; # 30 seconds
            $master config set appendonly yes

            # Wake up replica, causing master to continue shutting down.
            exec kill -SIGCONT $replica_pid

            # SHUTDOWN returns an error to both clients blocking on SHUTDOWN.
            catch { $rd1 read } e1
            catch { $rd2 read } e2
            assert_match "*Errors trying to SHUTDOWN. Check logs*" $e1
            assert_match "*Errors trying to SHUTDOWN. Check logs*" $e2
            $rd1 close
            $rd2 close

            # Check shutdown log messages on master.
            verify_log_message -1 "*1 of 1 replicas are in sync*" 0
            verify_log_message -1 "*Writing initial AOF, can't exit*" 0
            verify_log_message -1 "*Errors trying to shut down*" 0

            # Let master to exit fast, without waiting for the very slow AOFRW.
            catch {$master shutdown nosave force}
        }
    }
} {} {repl external:skip}

test "Shutting down master waits for replica then aborted" {
    start_server {} {
        start_server {} {
            set master [srv -1 client]
            set master_host [srv -1 host]
            set master_port [srv -1 port]
            set master_pid [srv -1 pid]
            set replica [srv 0 client]
            set replica_pid [srv 0 pid]

            # Config master and replica.
            $replica replicaof $master_host $master_port
            wait_for_sync $replica

            # Pause the replica and write a key on master.
            exec kill -SIGSTOP $replica_pid
            after 10
            $master incr k

            # Two clients call blocking SHUTDOWN in parallel.
            set rd1 [redis_deferring_client -1]
            set rd2 [redis_deferring_client -1]
            $rd1 shutdown
            $rd2 shutdown
            set info_clients [$master info clients]
            assert_match "*connected_clients:3*" $info_clients
            assert_match "*blocked_clients:2*" $info_clients

            # Abort the shutdown
            $master shutdown abort

            # Wake up replica, causing master to continue shutting down.
            exec kill -SIGCONT $replica_pid

            # SHUTDOWN returns an error to both clients blocking on SHUTDOWN.
            catch { $rd1 read } e1
            catch { $rd2 read } e2
            assert_match "*Errors trying to SHUTDOWN. Check logs*" $e1
            assert_match "*Errors trying to SHUTDOWN. Check logs*" $e2
            $rd1 close
            $rd2 close

            # Check shutdown log messages on master.
            verify_log_message -1 "*Shutdown manually aborted*" 0
        }
    }
} {} {repl external:skip}