summaryrefslogtreecommitdiffstats
path: root/tests/integration
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-14 13:40:54 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-14 13:40:54 +0000
commit317c0644ccf108aa23ef3fd8358bd66c2840bfc0 (patch)
treec417b3d25c86b775989cb5ac042f37611b626c8a /tests/integration
parentInitial commit. (diff)
downloadredis-317c0644ccf108aa23ef3fd8358bd66c2840bfc0.tar.xz
redis-317c0644ccf108aa23ef3fd8358bd66c2840bfc0.zip
Adding upstream version 5:7.2.4.upstream/5%7.2.4upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'tests/integration')
-rw-r--r--tests/integration/aof-multi-part.tcl1332
-rw-r--r--tests/integration/aof-race.tcl37
-rw-r--r--tests/integration/aof.tcl681
-rw-r--r--tests/integration/block-repl.tcl51
-rw-r--r--tests/integration/convert-ziplist-hash-on-load.tcl28
-rw-r--r--tests/integration/convert-ziplist-zset-on-load.tcl28
-rw-r--r--tests/integration/convert-zipmap-hash-on-load.tcl39
-rw-r--r--tests/integration/corrupt-dump-fuzzer.tcl230
-rw-r--r--tests/integration/corrupt-dump.tcl833
-rw-r--r--tests/integration/dismiss-mem.tcl101
-rw-r--r--tests/integration/failover.tcl294
-rw-r--r--tests/integration/logging.tcl61
-rw-r--r--tests/integration/psync2-master-restart.tcl218
-rw-r--r--tests/integration/psync2-pingoff.tcl250
-rw-r--r--tests/integration/psync2-reg.tcl82
-rw-r--r--tests/integration/psync2.tcl384
-rw-r--r--tests/integration/rdb.tcl419
-rw-r--r--tests/integration/redis-benchmark.tcl171
-rw-r--r--tests/integration/redis-cli.tcl609
-rw-r--r--tests/integration/replication-2.tcl93
-rw-r--r--tests/integration/replication-3.tcl130
-rw-r--r--tests/integration/replication-4.tcl295
-rw-r--r--tests/integration/replication-buffer.tcl307
-rw-r--r--tests/integration/replication-psync.tcl143
-rw-r--r--tests/integration/replication.tcl1456
-rw-r--r--tests/integration/shutdown.tcl234
26 files changed, 8506 insertions, 0 deletions
diff --git a/tests/integration/aof-multi-part.tcl b/tests/integration/aof-multi-part.tcl
new file mode 100644
index 0000000..1d41a8a
--- /dev/null
+++ b/tests/integration/aof-multi-part.tcl
@@ -0,0 +1,1332 @@
+source tests/support/aofmanifest.tcl
+set defaults {appendonly {yes} appendfilename {appendonly.aof} appenddirname {appendonlydir} auto-aof-rewrite-percentage {0}}
+set server_path [tmpdir server.multi.aof]
+set aof_dirname "appendonlydir"
+set aof_basename "appendonly.aof"
+set aof_dirpath "$server_path/$aof_dirname"
+set aof_base1_file "$server_path/$aof_dirname/${aof_basename}.1$::base_aof_sufix$::aof_format_suffix"
+set aof_base2_file "$server_path/$aof_dirname/${aof_basename}.2$::base_aof_sufix$::aof_format_suffix"
+set aof_incr1_file "$server_path/$aof_dirname/${aof_basename}.1$::incr_aof_sufix$::aof_format_suffix"
+set aof_incr2_file "$server_path/$aof_dirname/${aof_basename}.2$::incr_aof_sufix$::aof_format_suffix"
+set aof_incr3_file "$server_path/$aof_dirname/${aof_basename}.3$::incr_aof_sufix$::aof_format_suffix"
+set aof_manifest_file "$server_path/$aof_dirname/${aof_basename}$::manifest_suffix"
+set aof_old_name_old_path "$server_path/$aof_basename"
+set aof_old_name_new_path "$aof_dirpath/$aof_basename"
+set aof_old_name_old_path2 "$server_path/${aof_basename}2"
+set aof_manifest_file2 "$server_path/$aof_dirname/${aof_basename}2$::manifest_suffix"
+
+tags {"external:skip"} {
+
+ # Test Part 1
+
+ # In order to test the loading logic of redis under different combinations of manifest and AOF.
+ # We will manually construct the manifest file and AOF, and then start redis to verify whether
+ # the redis behavior is as expected.
+
+ test {Multi Part AOF can't load data when some file missing} {
+ create_aof $aof_dirpath $aof_base1_file {
+ append_to_aof [formatCommand set k1 v1]
+ }
+
+ create_aof $aof_dirpath $aof_incr2_file {
+ append_to_aof [formatCommand set k2 v2]
+ }
+
+ create_aof_manifest $aof_dirpath $aof_manifest_file {
+ append_to_manifest "file appendonly.aof.1.base.aof seq 1 type b\n"
+ append_to_manifest "file appendonly.aof.1.incr.aof seq 1 type i\n"
+ append_to_manifest "file appendonly.aof.2.incr.aof seq 2 type i\n"
+ }
+
+ start_server_aof [list dir $server_path] {
+ wait_for_condition 100 50 {
+ ! [is_alive $srv]
+ } else {
+ fail "AOF loading didn't fail"
+ }
+
+ assert_equal 1 [count_message_lines $server_path/stdout "appendonly.aof.1.incr.aof .*No such file or directory"]
+ }
+
+ clean_aof_persistence $aof_dirpath
+ }
+
+ test {Multi Part AOF can't load data when the sequence not increase monotonically} {
+ create_aof $aof_dirpath $aof_incr1_file {
+ append_to_aof [formatCommand set k1 v1]
+ }
+
+ create_aof $aof_dirpath $aof_incr2_file {
+ append_to_aof [formatCommand set k2 v2]
+ }
+
+ create_aof_manifest $aof_dirpath $aof_manifest_file {
+ append_to_manifest "file appendonly.aof.2.incr.aof seq 2 type i\n"
+ append_to_manifest "file appendonly.aof.1.incr.aof seq 1 type i\n"
+ }
+
+ start_server_aof [list dir $server_path] {
+ wait_for_condition 100 50 {
+ ! [is_alive $srv]
+ } else {
+ fail "AOF loading didn't fail"
+ }
+
+ assert_equal 1 [count_message_lines $server_path/stdout "Found a non-monotonic sequence number"]
+ }
+
+ clean_aof_persistence $aof_dirpath
+ }
+
+ test {Multi Part AOF can't load data when there are blank lines in the manifest file} {
+ create_aof $aof_dirpath $aof_incr1_file {
+ append_to_aof [formatCommand set k1 v1]
+ }
+
+ create_aof $aof_dirpath $aof_incr3_file {
+ append_to_aof [formatCommand set k2 v2]
+ }
+
+ create_aof_manifest $aof_dirpath $aof_manifest_file {
+ append_to_manifest "file appendonly.aof.1.incr.aof seq 1 type i\n"
+ append_to_manifest "\n"
+ append_to_manifest "file appendonly.aof.3.incr.aof seq 3 type i\n"
+ }
+
+ start_server_aof [list dir $server_path] {
+ wait_for_condition 100 50 {
+ ! [is_alive $srv]
+ } else {
+ fail "AOF loading didn't fail"
+ }
+
+ assert_equal 1 [count_message_lines $server_path/stdout "Invalid AOF manifest file format"]
+ }
+
+ clean_aof_persistence $aof_dirpath
+ }
+
+ test {Multi Part AOF can't load data when there is a duplicate base file} {
+ create_aof $aof_dirpath $aof_base1_file {
+ append_to_aof [formatCommand set k1 v1]
+ }
+
+ create_aof $aof_dirpath $aof_base2_file {
+ append_to_aof [formatCommand set k2 v2]
+ }
+
+ create_aof $aof_dirpath $aof_incr1_file {
+ append_to_aof [formatCommand set k3 v3]
+ }
+
+ create_aof_manifest $aof_dirpath $aof_manifest_file {
+ append_to_manifest "file appendonly.aof.1.base.aof seq 1 type b\n"
+ append_to_manifest "file appendonly.aof.2.base.aof seq 2 type b\n"
+ append_to_manifest "file appendonly.aof.1.incr.aof seq 1 type i\n"
+ }
+
+ start_server_aof [list dir $server_path] {
+ wait_for_condition 100 50 {
+ ! [is_alive $srv]
+ } else {
+ fail "AOF loading didn't fail"
+ }
+
+ assert_equal 1 [count_message_lines $server_path/stdout "Found duplicate base file information"]
+ }
+
+ clean_aof_persistence $aof_dirpath
+ }
+
+ test {Multi Part AOF can't load data when the manifest format is wrong (type unknown)} {
+ create_aof $aof_dirpath $aof_base1_file {
+ append_to_aof [formatCommand set k1 v1]
+ }
+
+ create_aof $aof_dirpath $aof_incr1_file {
+ append_to_aof [formatCommand set k3 v3]
+ }
+
+ create_aof_manifest $aof_dirpath $aof_manifest_file {
+ append_to_manifest "file appendonly.aof.1.base.aof seq 1 type x\n"
+ append_to_manifest "file appendonly.aof.1.incr.aof seq 1 type i\n"
+ }
+
+ start_server_aof [list dir $server_path] {
+ wait_for_condition 100 50 {
+ ! [is_alive $srv]
+ } else {
+ fail "AOF loading didn't fail"
+ }
+
+ assert_equal 1 [count_message_lines $server_path/stdout "Unknown AOF file type"]
+ }
+
+ clean_aof_persistence $aof_dirpath
+ }
+
+ test {Multi Part AOF can't load data when the manifest format is wrong (missing key)} {
+ create_aof $aof_dirpath $aof_base1_file {
+ append_to_aof [formatCommand set k1 v1]
+ }
+
+ create_aof $aof_dirpath $aof_incr1_file {
+ append_to_aof [formatCommand set k3 v3]
+ }
+
+ create_aof_manifest $aof_dirpath $aof_manifest_file {
+ append_to_manifest "filx appendonly.aof.1.base.aof seq 1 type b\n"
+ append_to_manifest "file appendonly.aof.1.incr.aof seq 1 type i\n"
+ }
+
+ start_server_aof [list dir $server_path] {
+ wait_for_condition 100 50 {
+ ! [is_alive $srv]
+ } else {
+ fail "AOF loading didn't fail"
+ }
+
+ assert_equal 2 [count_message_lines $server_path/stdout "Invalid AOF manifest file format"]
+ }
+
+ clean_aof_persistence $aof_dirpath
+ }
+
+ test {Multi Part AOF can't load data when the manifest format is wrong (line too short)} {
+ create_aof $aof_dirpath $aof_base1_file {
+ append_to_aof [formatCommand set k1 v1]
+ }
+
+ create_aof $aof_dirpath $aof_incr1_file {
+ append_to_aof [formatCommand set k3 v3]
+ }
+
+ create_aof_manifest $aof_dirpath $aof_manifest_file {
+ append_to_manifest "file appendonly.aof.1.base.aof seq 1 type b\n"
+ append_to_manifest "file appendonly.aof.1.incr.aof type i\n"
+ }
+
+ start_server_aof [list dir $server_path] {
+ wait_for_condition 100 50 {
+ ! [is_alive $srv]
+ } else {
+ fail "AOF loading didn't fail"
+ }
+
+ assert_equal 3 [count_message_lines $server_path/stdout "Invalid AOF manifest file format"]
+ }
+
+ clean_aof_persistence $aof_dirpath
+ }
+
+ test {Multi Part AOF can't load data when the manifest format is wrong (line too long)} {
+ create_aof $aof_dirpath $aof_base1_file {
+ append_to_aof [formatCommand set k1 v1]
+ }
+
+ create_aof $aof_dirpath $aof_incr1_file {
+ append_to_aof [formatCommand set k3 v3]
+ }
+
+ create_aof_manifest $aof_dirpath $aof_manifest_file {
+ append_to_manifest "file appendonly.aof.1.base.aof seq 1 type b file appendonly.aof.1.base.aof seq 1 type b file appendonly.aof.1.base.aof seq 1 type b file appendonly.aof.1.base.aof seq 1 type b file appendonly.aof.1.base.aof seq 1 type b file appendonly.aof.1.base.aof seq 1 type b file appendonly.aof.1.base.aof seq 1 type b file appendonly.aof.1.base.aof seq 1 type b file appendonly.aof.1.base.aof seq 1 type b file appendonly.aof.1.base.aof seq 1 type b file appendonly.aof.1.base.aof seq 1 type b file appendonly.aof.1.base.aof seq 1 type b file appendonly.aof.1.base.aof seq 1 type b file appendonly.aof.1.base.aof seq 1 type b file appendonly.aof.1.base.aof seq 1 type b file appendonly.aof.1.base.aof seq 1 type b file appendonly.aof.1.base.aof seq 1 type b file appendonly.aof.1.base.aof seq 1 type b file appendonly.aof.1.base.aof seq 1 type b file appendonly.aof.1.base.aof seq 1 type b file appendonly.aof.1.base.aof seq 1 type b file appendonly.aof.1.base.aof seq 1 type b file appendonly.aof.1.base.aof seq 1 type b file appendonly.aof.1.base.aof seq 1 type b file appendonly.aof.1.base.aof seq 1 type b file appendonly.aof.1.base.aof seq 1 type b file appendonly.aof.1.base.aof seq 1 type b file appendonly.aof.1.base.aof seq 1 type b file appendonly.aof.1.base.aof seq 1 type b file appendonly.aof.1.base.aof seq 1 type b file appendonly.aof.1.base.aof seq 1 type b file appendonly.aof.1.base.aof seq 1 type b file appendonly.aof.1.base.aof seq 1 type b file appendonly.aof.1.base.aof seq 1 type b file appendonly.aof.1.base.aof seq 1 type b file appendonly.aof.1.base.aof seq 1 type b file appendonly.aof.1.base.aof seq 1 type b file appendonly.aof.1.base.aof seq 1 type b file appendonly.aof.1.base.aof seq 1 type b file appendonly.aof.1.base.aof seq 1 type b file appendonly.aof.1.base.aof seq 1 type b file appendonly.aof.1.base.aof seq 1 type b file appendonly.aof.1.base.aof seq 1 type b\n"
+ append_to_manifest "file appendonly.aof.1.incr.aof seq 1 type i\n"
+ }
+
+ start_server_aof [list dir $server_path] {
+ wait_for_condition 100 50 {
+ ! [is_alive $srv]
+ } else {
+ fail "AOF loading didn't fail"
+ }
+
+ assert_equal 1 [count_message_lines $server_path/stdout "The AOF manifest file contains too long line"]
+ }
+
+ clean_aof_persistence $aof_dirpath
+ }
+
+ test {Multi Part AOF can't load data when the manifest format is wrong (odd parameter)} {
+ create_aof $aof_dirpath $aof_base1_file {
+ append_to_aof [formatCommand set k1 v1]
+ }
+
+ create_aof $aof_dirpath $aof_incr1_file {
+ append_to_aof [formatCommand set k3 v3]
+ }
+
+ create_aof_manifest $aof_dirpath $aof_manifest_file {
+ append_to_manifest "file appendonly.aof.1.base.aof seq 1 type b\n"
+ append_to_manifest "file appendonly.aof.1.incr.aof seq 1 type i newkey\n"
+ }
+
+ start_server_aof [list dir $server_path] {
+ wait_for_condition 100 50 {
+ ! [is_alive $srv]
+ } else {
+ fail "AOF loading didn't fail"
+ }
+
+ assert_equal 4 [count_message_lines $server_path/stdout "Invalid AOF manifest file format"]
+ }
+
+ clean_aof_persistence $aof_dirpath
+ }
+
+ test {Multi Part AOF can't load data when the manifest file is empty} {
+ create_aof_manifest $aof_dirpath $aof_manifest_file {
+ }
+
+ start_server_aof [list dir $server_path] {
+ wait_for_condition 100 50 {
+ ! [is_alive $srv]
+ } else {
+ fail "AOF loading didn't fail"
+ }
+
+ assert_equal 1 [count_message_lines $server_path/stdout "Found an empty AOF manifest"]
+ }
+
+ clean_aof_persistence $aof_dirpath
+ }
+
+ test {Multi Part AOF can start when no aof and no manifest} {
+ start_server_aof [list dir $server_path] {
+ assert_equal 1 [is_alive $srv]
+
+ set client [redis [dict get $srv host] [dict get $srv port] 0 $::tls]
+
+ assert_equal OK [$client set k1 v1]
+ assert_equal v1 [$client get k1]
+ }
+
+ clean_aof_persistence $aof_dirpath
+ }
+
+ test {Multi Part AOF can start when we have en empty AOF dir} {
+ create_aof_dir $aof_dirpath
+
+ start_server_aof [list dir $server_path] {
+ assert_equal 1 [is_alive $srv]
+ }
+ }
+
+ test {Multi Part AOF can load data discontinuously increasing sequence} {
+ create_aof $aof_dirpath $aof_base1_file {
+ append_to_aof [formatCommand set k1 v1]
+ }
+
+ create_aof $aof_dirpath $aof_incr1_file {
+ append_to_aof [formatCommand set k2 v2]
+ }
+
+ create_aof $aof_dirpath $aof_incr3_file {
+ append_to_aof [formatCommand set k3 v3]
+ }
+
+ create_aof_manifest $aof_dirpath $aof_manifest_file {
+ append_to_manifest "file appendonly.aof.1.base.aof seq 1 type b\n"
+ append_to_manifest "file appendonly.aof.1.incr.aof seq 1 type i\n"
+ append_to_manifest "file appendonly.aof.3.incr.aof seq 3 type i\n"
+ }
+
+ start_server_aof [list dir $server_path] {
+ assert_equal 1 [is_alive $srv]
+ set client [redis [dict get $srv host] [dict get $srv port] 0 $::tls]
+ wait_done_loading $client
+
+ assert_equal v1 [$client get k1]
+ assert_equal v2 [$client get k2]
+ assert_equal v3 [$client get k3]
+ }
+
+ clean_aof_persistence $aof_dirpath
+ }
+
+ test {Multi Part AOF can load data when manifest add new k-v} {
+ create_aof $aof_dirpath $aof_base1_file {
+ append_to_aof [formatCommand set k1 v1]
+ }
+
+ create_aof $aof_dirpath $aof_incr1_file {
+ append_to_aof [formatCommand set k2 v2]
+ }
+
+ create_aof $aof_dirpath $aof_incr3_file {
+ append_to_aof [formatCommand set k3 v3]
+ }
+
+ create_aof_manifest $aof_dirpath $aof_manifest_file {
+ append_to_manifest "file appendonly.aof.1.base.aof seq 1 type b newkey newvalue\n"
+ append_to_manifest "file appendonly.aof.1.incr.aof seq 1 type i\n"
+ append_to_manifest "file appendonly.aof.3.incr.aof seq 3 type i\n"
+ }
+
+ start_server_aof [list dir $server_path] {
+ assert_equal 1 [is_alive $srv]
+ set client [redis [dict get $srv host] [dict get $srv port] 0 $::tls]
+ wait_done_loading $client
+
+ assert_equal v1 [$client get k1]
+ assert_equal v2 [$client get k2]
+ assert_equal v3 [$client get k3]
+ }
+
+ clean_aof_persistence $aof_dirpath
+ }
+
+ test {Multi Part AOF can load data when some AOFs are empty} {
+ create_aof $aof_dirpath $aof_base1_file {
+ append_to_aof [formatCommand set k1 v1]
+ }
+
+ create_aof $aof_dirpath $aof_incr1_file {
+ }
+
+ create_aof $aof_dirpath $aof_incr3_file {
+ append_to_aof [formatCommand set k3 v3]
+ }
+
+ create_aof_manifest $aof_dirpath $aof_manifest_file {
+ append_to_manifest "file appendonly.aof.1.base.aof seq 1 type b\n"
+ append_to_manifest "file appendonly.aof.1.incr.aof seq 1 type i\n"
+ append_to_manifest "file appendonly.aof.3.incr.aof seq 3 type i\n"
+ }
+
+ start_server_aof [list dir $server_path] {
+ assert_equal 1 [is_alive $srv]
+ set client [redis [dict get $srv host] [dict get $srv port] 0 $::tls]
+ wait_done_loading $client
+
+ assert_equal v1 [$client get k1]
+ assert_equal "" [$client get k2]
+ assert_equal v3 [$client get k3]
+ }
+
+ clean_aof_persistence $aof_dirpath
+ }
+
+ test {Multi Part AOF can load data from old version redis (rdb preamble no)} {
+ create_aof $server_path $aof_old_name_old_path {
+ append_to_aof [formatCommand set k1 v1]
+ append_to_aof [formatCommand set k2 v2]
+ append_to_aof [formatCommand set k3 v3]
+ }
+
+ start_server_aof [list dir $server_path] {
+ assert_equal 1 [is_alive $srv]
+
+ set client [redis [dict get $srv host] [dict get $srv port] 0 $::tls]
+ wait_done_loading $client
+
+ assert_equal v1 [$client get k1]
+ assert_equal v2 [$client get k2]
+ assert_equal v3 [$client get k3]
+
+ assert_equal 0 [check_file_exist $server_path $aof_basename]
+ assert_equal 1 [check_file_exist $aof_dirpath $aof_basename]
+
+ assert_aof_manifest_content $aof_manifest_file {
+ {file appendonly.aof seq 1 type b}
+ {file appendonly.aof.1.incr.aof seq 1 type i}
+ }
+
+ assert_equal OK [$client set k4 v4]
+
+ $client bgrewriteaof
+ waitForBgrewriteaof $client
+
+ assert_equal OK [$client set k5 v5]
+
+ assert_aof_manifest_content $aof_manifest_file {
+ {file appendonly.aof.2.base.rdb seq 2 type b}
+ {file appendonly.aof.2.incr.aof seq 2 type i}
+ }
+
+ set d1 [$client debug digest]
+ $client debug loadaof
+ set d2 [$client debug digest]
+ assert {$d1 eq $d2}
+ }
+
+ clean_aof_persistence $aof_dirpath
+ }
+
+ test {Multi Part AOF can load data from old version redis (rdb preamble yes)} {
+ exec cp tests/assets/rdb-preamble.aof $aof_old_name_old_path
+ start_server_aof [list dir $server_path] {
+ assert_equal 1 [is_alive $srv]
+
+ set client [redis [dict get $srv host] [dict get $srv port] 0 $::tls]
+ wait_done_loading $client
+
+ # k1 k2 in rdb header and k3 in AOF tail
+ assert_equal v1 [$client get k1]
+ assert_equal v2 [$client get k2]
+ assert_equal v3 [$client get k3]
+
+ assert_equal 0 [check_file_exist $server_path $aof_basename]
+ assert_equal 1 [check_file_exist $aof_dirpath $aof_basename]
+
+ assert_aof_manifest_content $aof_manifest_file {
+ {file appendonly.aof seq 1 type b}
+ {file appendonly.aof.1.incr.aof seq 1 type i}
+ }
+
+ assert_equal OK [$client set k4 v4]
+
+ $client bgrewriteaof
+ waitForBgrewriteaof $client
+
+ assert_equal OK [$client set k5 v5]
+
+ assert_aof_manifest_content $aof_manifest_file {
+ {file appendonly.aof.2.base.rdb seq 2 type b}
+ {file appendonly.aof.2.incr.aof seq 2 type i}
+ }
+
+ set d1 [$client debug digest]
+ $client debug loadaof
+ set d2 [$client debug digest]
+ assert {$d1 eq $d2}
+ }
+
+ clean_aof_persistence $aof_dirpath
+ }
+
+ test {Multi Part AOF can continue the upgrade from the interrupted upgrade state} {
+ create_aof $server_path $aof_old_name_old_path {
+ append_to_aof [formatCommand set k1 v1]
+ append_to_aof [formatCommand set k2 v2]
+ append_to_aof [formatCommand set k3 v3]
+ }
+
+ # Create a layout of an interrupted upgrade (interrupted before the rename).
+ create_aof_manifest $aof_dirpath $aof_manifest_file {
+ append_to_manifest "file appendonly.aof seq 1 type b\n"
+ }
+
+ start_server_aof [list dir $server_path] {
+ assert_equal 1 [is_alive $srv]
+
+ set client [redis [dict get $srv host] [dict get $srv port] 0 $::tls]
+ wait_done_loading $client
+
+ assert_equal v1 [$client get k1]
+ assert_equal v2 [$client get k2]
+ assert_equal v3 [$client get k3]
+
+ assert_equal 0 [check_file_exist $server_path $aof_basename]
+ assert_equal 1 [check_file_exist $aof_dirpath $aof_basename]
+
+ assert_aof_manifest_content $aof_manifest_file {
+ {file appendonly.aof seq 1 type b}
+ {file appendonly.aof.1.incr.aof seq 1 type i}
+ }
+ }
+
+ clean_aof_persistence $aof_dirpath
+ }
+
+ test {Multi Part AOF can be loaded correctly when both server dir and aof dir contain old AOF} {
+ create_aof $server_path $aof_old_name_old_path {
+ append_to_aof [formatCommand set k1 v1]
+ append_to_aof [formatCommand set k2 v2]
+ append_to_aof [formatCommand set k3 v3]
+ }
+
+ create_aof_manifest $aof_dirpath $aof_manifest_file {
+ append_to_manifest "file appendonly.aof seq 1 type b\n"
+ }
+
+ create_aof $aof_dirpath $aof_old_name_new_path {
+ append_to_aof [formatCommand set k4 v4]
+ append_to_aof [formatCommand set k5 v5]
+ append_to_aof [formatCommand set k6 v6]
+ }
+
+ start_server_aof [list dir $server_path] {
+ assert_equal 1 [is_alive $srv]
+
+ set client [redis [dict get $srv host] [dict get $srv port] 0 $::tls]
+ wait_done_loading $client
+
+ assert_equal 0 [$client exists k1]
+ assert_equal 0 [$client exists k2]
+ assert_equal 0 [$client exists k3]
+
+ assert_equal v4 [$client get k4]
+ assert_equal v5 [$client get k5]
+ assert_equal v6 [$client get k6]
+
+ assert_equal 1 [check_file_exist $server_path $aof_basename]
+ assert_equal 1 [check_file_exist $aof_dirpath $aof_basename]
+
+ assert_aof_manifest_content $aof_manifest_file {
+ {file appendonly.aof seq 1 type b}
+ {file appendonly.aof.1.incr.aof seq 1 type i}
+ }
+ }
+
+ clean_aof_persistence $aof_dirpath
+ catch {exec rm -rf $aof_old_name_old_path}
+ }
+
+ test {Multi Part AOF can't load data when the manifest contains the old AOF file name but the file does not exist in server dir and aof dir} {
+ create_aof_manifest $aof_dirpath $aof_manifest_file {
+ append_to_manifest "file appendonly.aof seq 1 type b\n"
+ }
+
+ start_server_aof [list dir $server_path] {
+ wait_for_condition 100 50 {
+ ! [is_alive $srv]
+ } else {
+ fail "AOF loading didn't fail"
+ }
+
+ assert_equal 1 [count_message_lines $server_path/stdout "appendonly.aof .*No such file or directory"]
+ }
+
+ clean_aof_persistence $aof_dirpath
+ }
+
+ test {Multi Part AOF can upgrade when when two redis share the same server dir} {
+ create_aof $server_path $aof_old_name_old_path {
+ append_to_aof [formatCommand set k1 v1]
+ append_to_aof [formatCommand set k2 v2]
+ append_to_aof [formatCommand set k3 v3]
+ }
+
+ create_aof $server_path $aof_old_name_old_path2 {
+ append_to_aof [formatCommand set k4 v4]
+ append_to_aof [formatCommand set k5 v5]
+ append_to_aof [formatCommand set k6 v6]
+ }
+
+ start_server_aof [list dir $server_path] {
+ set redis1 [redis [dict get $srv host] [dict get $srv port] 0 $::tls]
+
+ start_server [list overrides [list dir $server_path appendonly yes appendfilename appendonly.aof2]] {
+ set redis2 [redis [srv host] [srv port] 0 $::tls]
+
+ test "Multi Part AOF can upgrade when when two redis share the same server dir (redis1)" {
+ wait_done_loading $redis1
+ assert_equal v1 [$redis1 get k1]
+ assert_equal v2 [$redis1 get k2]
+ assert_equal v3 [$redis1 get k3]
+
+ assert_equal 0 [$redis1 exists k4]
+ assert_equal 0 [$redis1 exists k5]
+ assert_equal 0 [$redis1 exists k6]
+
+ assert_aof_manifest_content $aof_manifest_file {
+ {file appendonly.aof seq 1 type b}
+ {file appendonly.aof.1.incr.aof seq 1 type i}
+ }
+
+ $redis1 bgrewriteaof
+ waitForBgrewriteaof $redis1
+
+ assert_equal OK [$redis1 set k v]
+
+ assert_aof_manifest_content $aof_manifest_file {
+ {file appendonly.aof.2.base.rdb seq 2 type b}
+ {file appendonly.aof.2.incr.aof seq 2 type i}
+ }
+
+ set d1 [$redis1 debug digest]
+ $redis1 debug loadaof
+ set d2 [$redis1 debug digest]
+ assert {$d1 eq $d2}
+ }
+
+ test "Multi Part AOF can upgrade when when two redis share the same server dir (redis2)" {
+ wait_done_loading $redis2
+
+ assert_equal 0 [$redis2 exists k1]
+ assert_equal 0 [$redis2 exists k2]
+ assert_equal 0 [$redis2 exists k3]
+
+ assert_equal v4 [$redis2 get k4]
+ assert_equal v5 [$redis2 get k5]
+ assert_equal v6 [$redis2 get k6]
+
+ assert_aof_manifest_content $aof_manifest_file2 {
+ {file appendonly.aof2 seq 1 type b}
+ {file appendonly.aof2.1.incr.aof seq 1 type i}
+ }
+
+ $redis2 bgrewriteaof
+ waitForBgrewriteaof $redis2
+
+ assert_equal OK [$redis2 set k v]
+
+ assert_aof_manifest_content $aof_manifest_file2 {
+ {file appendonly.aof2.2.base.rdb seq 2 type b}
+ {file appendonly.aof2.2.incr.aof seq 2 type i}
+ }
+
+ set d1 [$redis2 debug digest]
+ $redis2 debug loadaof
+ set d2 [$redis2 debug digest]
+ assert {$d1 eq $d2}
+ }
+ }
+ }
+ }
+
+ test {Multi Part AOF can handle appendfilename contains whitespaces} {
+ start_server [list overrides [list appendonly yes appendfilename "\" file seq \\n\\n.aof \""]] {
+ set dir [get_redis_dir]
+ set aof_manifest_name [format "%s/%s/%s%s" $dir "appendonlydir" " file seq \n\n.aof " $::manifest_suffix]
+ set redis [redis [srv host] [srv port] 0 $::tls]
+
+ assert_equal OK [$redis set k1 v1]
+
+ $redis bgrewriteaof
+ waitForBgrewriteaof $redis
+
+ assert_aof_manifest_content $aof_manifest_name {
+ {file " file seq \n\n.aof .2.base.rdb" seq 2 type b}
+ {file " file seq \n\n.aof .2.incr.aof" seq 2 type i}
+ }
+
+ set d1 [$redis debug digest]
+ $redis debug loadaof
+ set d2 [$redis debug digest]
+ assert {$d1 eq $d2}
+ }
+
+ clean_aof_persistence $aof_dirpath
+ }
+
+ test {Multi Part AOF can create BASE (RDB format) when redis starts from empty} {
+ start_server_aof [list dir $server_path] {
+ set client [redis [dict get $srv host] [dict get $srv port] 0 $::tls]
+ wait_done_loading $client
+
+ assert_equal 1 [check_file_exist $aof_dirpath "${aof_basename}.1${::base_aof_sufix}${::rdb_format_suffix}"]
+
+ assert_aof_manifest_content $aof_manifest_file {
+ {file appendonly.aof.1.base.rdb seq 1 type b}
+ {file appendonly.aof.1.incr.aof seq 1 type i}
+ }
+
+ $client set foo behavior
+
+ set d1 [$client debug digest]
+ $client debug loadaof
+ set d2 [$client debug digest]
+ assert {$d1 eq $d2}
+ }
+
+ clean_aof_persistence $aof_dirpath
+ }
+
+ test {Multi Part AOF can create BASE (AOF format) when redis starts from empty} {
+ start_server_aof [list dir $server_path aof-use-rdb-preamble no] {
+ set client [redis [dict get $srv host] [dict get $srv port] 0 $::tls]
+ wait_done_loading $client
+
+ assert_equal 1 [check_file_exist $aof_dirpath "${aof_basename}.1${::base_aof_sufix}${::aof_format_suffix}"]
+
+ assert_aof_manifest_content $aof_manifest_file {
+ {file appendonly.aof.1.base.aof seq 1 type b}
+ {file appendonly.aof.1.incr.aof seq 1 type i}
+ }
+
+ $client set foo behavior
+
+ set d1 [$client debug digest]
+ $client debug loadaof
+ set d2 [$client debug digest]
+ assert {$d1 eq $d2}
+ }
+
+ clean_aof_persistence $aof_dirpath
+ }
+
+ # Test Part 2
+ #
+ # To test whether the AOFRW behaves as expected during the redis run.
+ # We will start redis first, then perform pressure writing, enable and disable AOF, and manually
+ # and automatically run bgrewrite and other actions, to test whether the correct AOF file is created,
+ # whether the correct manifest is generated, whether the data can be reload correctly under continuous
+ # writing pressure, etc.
+
+
+ start_server {tags {"Multi Part AOF"} overrides {aof-use-rdb-preamble {yes} appendonly {no} save {}}} {
+ set dir [get_redis_dir]
+ set aof_basename "appendonly.aof"
+ set aof_dirname "appendonlydir"
+ set aof_dirpath "$dir/$aof_dirname"
+ set aof_manifest_name "$aof_basename$::manifest_suffix"
+ set aof_manifest_file "$dir/$aof_dirname/$aof_manifest_name"
+
+ set master [srv 0 client]
+ set master_host [srv 0 host]
+ set master_port [srv 0 port]
+
+ catch {exec rm -rf $aof_manifest_file}
+
+ test "Make sure aof manifest $aof_manifest_name not in aof directory" {
+ assert_equal 0 [file exists $aof_manifest_file]
+ }
+
+ test "AOF enable will create manifest file" {
+ r config set appendonly yes ; # Will create manifest and new INCR aof
+ r config set auto-aof-rewrite-percentage 0 ; # Disable auto-rewrite.
+ waitForBgrewriteaof r
+
+ # Start write load
+ set load_handle0 [start_write_load $master_host $master_port 10]
+
+ wait_for_condition 50 100 {
+ [r dbsize] > 0
+ } else {
+ fail "No write load detected."
+ }
+
+ # First AOFRW done
+ assert_aof_manifest_content $aof_manifest_file {
+ {file appendonly.aof.1.base.rdb seq 1 type b}
+ {file appendonly.aof.1.incr.aof seq 1 type i}
+ }
+
+ # Check we really have these files
+ assert_equal 1 [check_file_exist $aof_dirpath $aof_manifest_name]
+ assert_equal 1 [check_file_exist $aof_dirpath "${aof_basename}.1${::base_aof_sufix}${::rdb_format_suffix}"]
+ assert_equal 1 [check_file_exist $aof_dirpath "${aof_basename}.1${::incr_aof_sufix}${::aof_format_suffix}"]
+
+ r bgrewriteaof
+ waitForBgrewriteaof r
+
+ # The second AOFRW done
+ assert_aof_manifest_content $aof_manifest_file {
+ {file appendonly.aof.2.base.rdb seq 2 type b}
+ {file appendonly.aof.2.incr.aof seq 2 type i}
+ }
+
+ assert_equal 1 [check_file_exist $aof_dirpath $aof_manifest_name]
+ # Wait bio delete history
+ wait_for_condition 1000 10 {
+ [check_file_exist $aof_dirpath "${aof_basename}.1${::base_aof_sufix}${::rdb_format_suffix}"] == 0 &&
+ [check_file_exist $aof_dirpath "${aof_basename}.1${::incr_aof_sufix}${::aof_format_suffix}"] == 0
+ } else {
+ fail "Failed to delete history AOF"
+ }
+ assert_equal 1 [check_file_exist $aof_dirpath "${aof_basename}.2${::base_aof_sufix}${::rdb_format_suffix}"]
+ assert_equal 1 [check_file_exist $aof_dirpath "${aof_basename}.2${::incr_aof_sufix}${::aof_format_suffix}"]
+
+ stop_write_load $load_handle0
+ wait_load_handlers_disconnected
+
+ set d1 [r debug digest]
+ r debug loadaof
+ set d2 [r debug digest]
+ assert {$d1 eq $d2}
+ }
+
+ test "AOF multiple rewrite failures will open multiple INCR AOFs" {
+ # Start write load
+ r config set rdb-key-save-delay 10000000
+
+ set orig_size [r dbsize]
+ set load_handle0 [start_write_load $master_host $master_port 10]
+
+ wait_for_condition 50 100 {
+ [r dbsize] > $orig_size
+ } else {
+ fail "No write load detected."
+ }
+
+ # Let AOFRW fail three times
+ r bgrewriteaof
+ set pid1 [get_child_pid 0]
+ catch {exec kill -9 $pid1}
+ waitForBgrewriteaof r
+
+ r bgrewriteaof
+ set pid2 [get_child_pid 0]
+ catch {exec kill -9 $pid2}
+ waitForBgrewriteaof r
+
+ r bgrewriteaof
+ set pid3 [get_child_pid 0]
+ catch {exec kill -9 $pid3}
+ waitForBgrewriteaof r
+
+ assert_equal 0 [check_file_exist $dir "temp-rewriteaof-bg-$pid1.aof"]
+ assert_equal 0 [check_file_exist $dir "temp-rewriteaof-bg-$pid2.aof"]
+ assert_equal 0 [check_file_exist $dir "temp-rewriteaof-bg-$pid3.aof"]
+
+ # We will have four INCR AOFs
+ assert_aof_manifest_content $aof_manifest_file {
+ {file appendonly.aof.2.base.rdb seq 2 type b}
+ {file appendonly.aof.2.incr.aof seq 2 type i}
+ {file appendonly.aof.3.incr.aof seq 3 type i}
+ {file appendonly.aof.4.incr.aof seq 4 type i}
+ {file appendonly.aof.5.incr.aof seq 5 type i}
+ }
+
+ assert_equal 1 [check_file_exist $aof_dirpath "${aof_basename}.2${::base_aof_sufix}${::rdb_format_suffix}"]
+ assert_equal 1 [check_file_exist $aof_dirpath "${aof_basename}.2${::incr_aof_sufix}${::aof_format_suffix}"]
+ assert_equal 1 [check_file_exist $aof_dirpath "${aof_basename}.3${::incr_aof_sufix}${::aof_format_suffix}"]
+ assert_equal 1 [check_file_exist $aof_dirpath "${aof_basename}.4${::incr_aof_sufix}${::aof_format_suffix}"]
+ assert_equal 1 [check_file_exist $aof_dirpath "${aof_basename}.5${::incr_aof_sufix}${::aof_format_suffix}"]
+
+ stop_write_load $load_handle0
+ wait_load_handlers_disconnected
+
+ set d1 [r debug digest]
+ r debug loadaof
+ set d2 [r debug digest]
+ assert {$d1 eq $d2}
+
+ r config set rdb-key-save-delay 0
+ catch {exec kill -9 [get_child_pid 0]}
+ wait_for_condition 1000 10 {
+ [s rdb_bgsave_in_progress] eq 0
+ } else {
+ fail "bgsave did not stop in time"
+ }
+
+ # AOFRW success
+ r bgrewriteaof
+ waitForBgrewriteaof r
+
+ # All previous INCR AOFs have become history
+ # and have be deleted
+ assert_aof_manifest_content $aof_manifest_file {
+ {file appendonly.aof.3.base.rdb seq 3 type b}
+ {file appendonly.aof.6.incr.aof seq 6 type i}
+ }
+
+ # Wait bio delete history
+ wait_for_condition 1000 10 {
+ [check_file_exist $aof_dirpath "${aof_basename}.2${::base_aof_sufix}${::rdb_format_suffix}"] == 0 &&
+ [check_file_exist $aof_dirpath "${aof_basename}.2${::incr_aof_sufix}${::aof_format_suffix}"] == 0 &&
+ [check_file_exist $aof_dirpath "${aof_basename}.3${::incr_aof_sufix}${::aof_format_suffix}"] == 0 &&
+ [check_file_exist $aof_dirpath "${aof_basename}.4${::incr_aof_sufix}${::aof_format_suffix}"] == 0 &&
+ [check_file_exist $aof_dirpath "${aof_basename}.5${::incr_aof_sufix}${::aof_format_suffix}"] == 0
+ } else {
+ fail "Failed to delete history AOF"
+ }
+
+ assert_equal 1 [check_file_exist $aof_dirpath "${aof_basename}.3${::base_aof_sufix}${::rdb_format_suffix}"]
+ assert_equal 1 [check_file_exist $aof_dirpath "${aof_basename}.6${::incr_aof_sufix}${::aof_format_suffix}"]
+
+ set d1 [r debug digest]
+ r debug loadaof
+ set d2 [r debug digest]
+ assert {$d1 eq $d2}
+ }
+
+ test "AOF rewrite doesn't open new aof when AOF turn off" {
+ r config set appendonly no
+
+ r bgrewriteaof
+ waitForBgrewriteaof r
+
+ # We only have BASE AOF, no INCR AOF
+ assert_aof_manifest_content $aof_manifest_file {
+ {file appendonly.aof.4.base.rdb seq 4 type b}
+ }
+
+ assert_equal 1 [check_file_exist $aof_dirpath "${aof_basename}.4${::base_aof_sufix}${::rdb_format_suffix}"]
+ wait_for_condition 1000 10 {
+ [check_file_exist $aof_dirpath "${aof_basename}.6${::incr_aof_sufix}${::aof_format_suffix}"] == 0 &&
+ [check_file_exist $aof_dirpath "${aof_basename}.7${::incr_aof_sufix}${::aof_format_suffix}"] == 0
+ } else {
+ fail "Failed to delete history AOF"
+ }
+
+ set d1 [r debug digest]
+ r debug loadaof
+ set d2 [r debug digest]
+ assert {$d1 eq $d2}
+
+ # Turn on AOF again
+ r config set appendonly yes
+ waitForBgrewriteaof r
+
+ # A new INCR AOF was created
+ assert_aof_manifest_content $aof_manifest_file {
+ {file appendonly.aof.5.base.rdb seq 5 type b}
+ {file appendonly.aof.1.incr.aof seq 1 type i}
+ }
+
+ # Wait bio delete history
+ wait_for_condition 1000 10 {
+ [check_file_exist $aof_dirpath "${aof_basename}.4${::base_aof_sufix}${::rdb_format_suffix}"] == 0
+ } else {
+ fail "Failed to delete history AOF"
+ }
+
+ assert_equal 1 [check_file_exist $aof_dirpath "${aof_basename}.5${::base_aof_sufix}${::rdb_format_suffix}"]
+ assert_equal 1 [check_file_exist $aof_dirpath "${aof_basename}.1${::incr_aof_sufix}${::aof_format_suffix}"]
+ }
+
+ test "AOF enable/disable auto gc" {
+ r config set aof-disable-auto-gc yes
+
+ r bgrewriteaof
+ waitForBgrewriteaof r
+
+ r bgrewriteaof
+ waitForBgrewriteaof r
+
+ # We can see four history AOFs (Evolved from two BASE and two INCR)
+ assert_aof_manifest_content $aof_manifest_file {
+ {file appendonly.aof.7.base.rdb seq 7 type b}
+ {file appendonly.aof.2.incr.aof seq 2 type h}
+ {file appendonly.aof.6.base.rdb seq 6 type h}
+ {file appendonly.aof.1.incr.aof seq 1 type h}
+ {file appendonly.aof.5.base.rdb seq 5 type h}
+ {file appendonly.aof.3.incr.aof seq 3 type i}
+ }
+
+ assert_equal 1 [check_file_exist $aof_dirpath "${aof_basename}.5${::base_aof_sufix}${::rdb_format_suffix}"]
+ assert_equal 1 [check_file_exist $aof_dirpath "${aof_basename}.6${::base_aof_sufix}${::rdb_format_suffix}"]
+ assert_equal 1 [check_file_exist $aof_dirpath "${aof_basename}.1${::incr_aof_sufix}${::aof_format_suffix}"]
+ assert_equal 1 [check_file_exist $aof_dirpath "${aof_basename}.2${::incr_aof_sufix}${::aof_format_suffix}"]
+
+ r config set aof-disable-auto-gc no
+
+ # Auto gc success
+ assert_aof_manifest_content $aof_manifest_file {
+ {file appendonly.aof.7.base.rdb seq 7 type b}
+ {file appendonly.aof.3.incr.aof seq 3 type i}
+ }
+
+ # wait bio delete history
+ wait_for_condition 1000 10 {
+ [check_file_exist $aof_dirpath "${aof_basename}.5${::base_aof_sufix}${::rdb_format_suffix}"] == 0 &&
+ [check_file_exist $aof_dirpath "${aof_basename}.6${::base_aof_sufix}${::rdb_format_suffix}"] == 0 &&
+ [check_file_exist $aof_dirpath "${aof_basename}.1${::incr_aof_sufix}${::aof_format_suffix}"] == 0 &&
+ [check_file_exist $aof_dirpath "${aof_basename}.2${::incr_aof_sufix}${::aof_format_suffix}"] == 0
+ } else {
+ fail "Failed to delete history AOF"
+ }
+ }
+
+ test "AOF can produce consecutive sequence number after reload" {
+ # Current manifest, BASE seq 7 and INCR seq 3
+ assert_aof_manifest_content $aof_manifest_file {
+ {file appendonly.aof.7.base.rdb seq 7 type b}
+ {file appendonly.aof.3.incr.aof seq 3 type i}
+ }
+
+ r debug loadaof
+
+ # Trigger AOFRW
+ r bgrewriteaof
+ waitForBgrewriteaof r
+
+ # Now BASE seq is 8 and INCR seq is 4
+ assert_aof_manifest_content $aof_manifest_file {
+ {file appendonly.aof.8.base.rdb seq 8 type b}
+ {file appendonly.aof.4.incr.aof seq 4 type i}
+ }
+ }
+
+ test "AOF enable during BGSAVE will not write data util AOFRW finish" {
+ r config set appendonly no
+ r config set save ""
+ r config set rdb-key-save-delay 10000000
+
+ r set k1 v1
+ r bgsave
+
+ wait_for_condition 1000 10 {
+ [s rdb_bgsave_in_progress] eq 1
+ } else {
+ fail "bgsave did not start in time"
+ }
+
+ # Make server.aof_rewrite_scheduled = 1
+ r config set appendonly yes
+ assert_equal [s aof_rewrite_scheduled] 1
+
+ # Not open new INCR aof
+ assert_aof_manifest_content $aof_manifest_file {
+ {file appendonly.aof.8.base.rdb seq 8 type b}
+ {file appendonly.aof.4.incr.aof seq 4 type i}
+ }
+
+ r set k2 v2
+ r debug loadaof
+
+ # Both k1 and k2 lost
+ assert_equal 0 [r exists k1]
+ assert_equal 0 [r exists k2]
+
+ set total_forks [s total_forks]
+ assert_equal [s rdb_bgsave_in_progress] 1
+ r config set rdb-key-save-delay 0
+ catch {exec kill -9 [get_child_pid 0]}
+ wait_for_condition 1000 10 {
+ [s rdb_bgsave_in_progress] eq 0
+ } else {
+ fail "bgsave did not stop in time"
+ }
+
+ # Make sure AOFRW was scheduled
+ wait_for_condition 1000 10 {
+ [s total_forks] == [expr $total_forks + 1]
+ } else {
+ fail "aof rewrite did not scheduled"
+ }
+ waitForBgrewriteaof r
+
+ assert_aof_manifest_content $aof_manifest_file {
+ {file appendonly.aof.9.base.rdb seq 9 type b}
+ {file appendonly.aof.5.incr.aof seq 5 type i}
+ }
+
+ r set k3 v3
+ r debug loadaof
+ assert_equal v3 [r get k3]
+ }
+
+ test "AOF will trigger limit when AOFRW fails many times" {
+ # Clear all data and trigger a successful AOFRW, so we can let
+ # server.aof_current_size equal to 0
+ r flushall
+ r bgrewriteaof
+ waitForBgrewriteaof r
+
+ r config set rdb-key-save-delay 10000000
+ # Let us trigger AOFRW easily
+ r config set auto-aof-rewrite-percentage 1
+ r config set auto-aof-rewrite-min-size 1kb
+
+ # Set a key so that AOFRW can be delayed
+ r set k v
+
+ # Let AOFRW fail 3 times, this will trigger AOFRW limit
+ r bgrewriteaof
+ catch {exec kill -9 [get_child_pid 0]}
+ waitForBgrewriteaof r
+
+ r bgrewriteaof
+ catch {exec kill -9 [get_child_pid 0]}
+ waitForBgrewriteaof r
+
+ r bgrewriteaof
+ catch {exec kill -9 [get_child_pid 0]}
+ waitForBgrewriteaof r
+
+ assert_aof_manifest_content $aof_manifest_file {
+ {file appendonly.aof.10.base.rdb seq 10 type b}
+ {file appendonly.aof.6.incr.aof seq 6 type i}
+ {file appendonly.aof.7.incr.aof seq 7 type i}
+ {file appendonly.aof.8.incr.aof seq 8 type i}
+ {file appendonly.aof.9.incr.aof seq 9 type i}
+ }
+
+ # Write 1KB data to trigger AOFRW
+ r set x [string repeat x 1024]
+
+ # Make sure we have limit log
+ wait_for_condition 1000 50 {
+ [count_log_message 0 "triggered the limit"] == 1
+ } else {
+ fail "aof rewrite did not trigger limit"
+ }
+ assert_equal [status r aof_rewrite_in_progress] 0
+
+ # No new INCR AOF be created
+ assert_aof_manifest_content $aof_manifest_file {
+ {file appendonly.aof.10.base.rdb seq 10 type b}
+ {file appendonly.aof.6.incr.aof seq 6 type i}
+ {file appendonly.aof.7.incr.aof seq 7 type i}
+ {file appendonly.aof.8.incr.aof seq 8 type i}
+ {file appendonly.aof.9.incr.aof seq 9 type i}
+ }
+
+ # Turn off auto rewrite
+ r config set auto-aof-rewrite-percentage 0
+ r config set rdb-key-save-delay 0
+ catch {exec kill -9 [get_child_pid 0]}
+ wait_for_condition 1000 10 {
+ [s aof_rewrite_in_progress] eq 0
+ } else {
+ fail "aof rewrite did not stop in time"
+ }
+
+ # We can still manually execute AOFRW immediately
+ r bgrewriteaof
+ waitForBgrewriteaof r
+
+ # Can create New INCR AOF
+ assert_equal 1 [check_file_exist $aof_dirpath "${aof_basename}.10${::incr_aof_sufix}${::aof_format_suffix}"]
+
+ assert_aof_manifest_content $aof_manifest_file {
+ {file appendonly.aof.11.base.rdb seq 11 type b}
+ {file appendonly.aof.10.incr.aof seq 10 type i}
+ }
+
+ set d1 [r debug digest]
+ r debug loadaof
+ set d2 [r debug digest]
+ assert {$d1 eq $d2}
+ }
+
+ start_server {overrides {aof-use-rdb-preamble {yes} appendonly {no} save {}}} {
+ set dir [get_redis_dir]
+ set aof_basename "appendonly.aof"
+ set aof_dirname "appendonlydir"
+ set aof_dirpath "$dir/$aof_dirname"
+ set aof_manifest_name "$aof_basename$::manifest_suffix"
+ set aof_manifest_file "$dir/$aof_dirname/$aof_manifest_name"
+
+ set master [srv 0 client]
+ set master_host [srv 0 host]
+ set master_port [srv 0 port]
+
+ test "AOF will open a temporary INCR AOF to accumulate data until the first AOFRW success when AOF is dynamically enabled" {
+ r config set save ""
+ # Increase AOFRW execution time to give us enough time to kill it
+ r config set rdb-key-save-delay 10000000
+
+ # Start write load
+ set load_handle0 [start_write_load $master_host $master_port 10]
+
+ wait_for_condition 50 100 {
+ [r dbsize] > 0
+ } else {
+ fail "No write load detected."
+ }
+
+ # Enable AOF will trigger an initialized AOFRW
+ r config set appendonly yes
+ # Let AOFRW fail
+ assert_equal 1 [s aof_rewrite_in_progress]
+ set pid1 [get_child_pid 0]
+ catch {exec kill -9 $pid1}
+
+ # Wait for AOFRW to exit and delete temp incr aof
+ wait_for_condition 1000 100 {
+ [count_log_message 0 "Removing the temp incr aof file"] == 1
+ } else {
+ fail "temp aof did not delete"
+ }
+
+ # Make sure manifest file is not created
+ assert_equal 0 [check_file_exist $aof_dirpath $aof_manifest_name]
+ # Make sure BASE AOF is not created
+ assert_equal 0 [check_file_exist $aof_dirpath "${aof_basename}.1${::base_aof_sufix}${::rdb_format_suffix}"]
+
+ # Make sure the next AOFRW has started
+ wait_for_condition 1000 50 {
+ [s aof_rewrite_in_progress] == 1
+ } else {
+ fail "aof rewrite did not scheduled"
+ }
+
+ # Do a successful AOFRW
+ set total_forks [s total_forks]
+ r config set rdb-key-save-delay 0
+ catch {exec kill -9 [get_child_pid 0]}
+
+ # Make sure the next AOFRW has started
+ wait_for_condition 1000 10 {
+ [s total_forks] == [expr $total_forks + 1]
+ } else {
+ fail "aof rewrite did not scheduled"
+ }
+ waitForBgrewriteaof r
+
+ assert_equal 2 [count_log_message 0 "Removing the temp incr aof file"]
+
+ # BASE and INCR AOF are successfully created
+ assert_aof_manifest_content $aof_manifest_file {
+ {file appendonly.aof.1.base.rdb seq 1 type b}
+ {file appendonly.aof.1.incr.aof seq 1 type i}
+ }
+
+ stop_write_load $load_handle0
+ wait_load_handlers_disconnected
+
+ set d1 [r debug digest]
+ r debug loadaof
+ set d2 [r debug digest]
+ assert {$d1 eq $d2}
+
+ # Dynamic disable AOF again
+ r config set appendonly no
+
+ # Disabling AOF does not delete previous AOF files
+ r debug loadaof
+ set d2 [r debug digest]
+ assert {$d1 eq $d2}
+
+ assert_equal 0 [s rdb_changes_since_last_save]
+ r config set rdb-key-save-delay 10000000
+ set load_handle0 [start_write_load $master_host $master_port 10]
+ wait_for_condition 50 100 {
+ [s rdb_changes_since_last_save] > 0
+ } else {
+ fail "No write load detected."
+ }
+
+ # Re-enable AOF
+ r config set appendonly yes
+
+ # Let AOFRW fail
+ assert_equal 1 [s aof_rewrite_in_progress]
+ set pid1 [get_child_pid 0]
+ catch {exec kill -9 $pid1}
+
+ # Wait for AOFRW to exit and delete temp incr aof
+ wait_for_condition 1000 100 {
+ [count_log_message 0 "Removing the temp incr aof file"] == 3
+ } else {
+ fail "temp aof did not delete 3 times"
+ }
+
+ # Make sure no new incr AOF was created
+ assert_aof_manifest_content $aof_manifest_file {
+ {file appendonly.aof.1.base.rdb seq 1 type b}
+ {file appendonly.aof.1.incr.aof seq 1 type i}
+ }
+
+ # Make sure the next AOFRW has started
+ wait_for_condition 1000 50 {
+ [s aof_rewrite_in_progress] == 1
+ } else {
+ fail "aof rewrite did not scheduled"
+ }
+
+ # Do a successful AOFRW
+ set total_forks [s total_forks]
+ r config set rdb-key-save-delay 0
+ catch {exec kill -9 [get_child_pid 0]}
+
+ wait_for_condition 1000 10 {
+ [s total_forks] == [expr $total_forks + 1]
+ } else {
+ fail "aof rewrite did not scheduled"
+ }
+ waitForBgrewriteaof r
+
+ assert_equal 4 [count_log_message 0 "Removing the temp incr aof file"]
+
+ # New BASE and INCR AOF are successfully created
+ assert_aof_manifest_content $aof_manifest_file {
+ {file appendonly.aof.2.base.rdb seq 2 type b}
+ {file appendonly.aof.2.incr.aof seq 2 type i}
+ }
+
+ stop_write_load $load_handle0
+ wait_load_handlers_disconnected
+
+ set d1 [r debug digest]
+ r debug loadaof
+ set d2 [r debug digest]
+ assert {$d1 eq $d2}
+ }
+ }
+ }
+}
diff --git a/tests/integration/aof-race.tcl b/tests/integration/aof-race.tcl
new file mode 100644
index 0000000..32f3a74
--- /dev/null
+++ b/tests/integration/aof-race.tcl
@@ -0,0 +1,37 @@
+source tests/support/aofmanifest.tcl
+set defaults { appendonly {yes} appendfilename {appendonly.aof} appenddirname {appendonlydir} aof-use-rdb-preamble {no} }
+set server_path [tmpdir server.aof]
+
+tags {"aof external:skip"} {
+ # Specific test for a regression where internal buffers were not properly
+ # cleaned after a child responsible for an AOF rewrite exited. This buffer
+ # was subsequently appended to the new AOF, resulting in duplicate commands.
+ start_server_aof [list dir $server_path] {
+ set client [redis [dict get $srv host] [dict get $srv port] 0 $::tls]
+ set bench [open "|src/redis-benchmark -q -s [dict get $srv unixsocket] -c 20 -n 20000 incr foo" "r+"]
+
+ wait_for_condition 100 1 {
+ [$client get foo] > 0
+ } else {
+ # Don't care if it fails.
+ }
+
+ # Benchmark should be running by now: start background rewrite
+ $client bgrewriteaof
+
+ # Read until benchmark pipe reaches EOF
+ while {[string length [read $bench]] > 0} {}
+
+ waitForBgrewriteaof $client
+
+ # Check contents of foo
+ assert_equal 20000 [$client get foo]
+ }
+
+ # Restart server to replay AOF
+ start_server_aof [list dir $server_path] {
+ set client [redis [dict get $srv host] [dict get $srv port] 0 $::tls]
+ wait_done_loading $client
+ assert_equal 20000 [$client get foo]
+ }
+}
diff --git a/tests/integration/aof.tcl b/tests/integration/aof.tcl
new file mode 100644
index 0000000..1f73fc3
--- /dev/null
+++ b/tests/integration/aof.tcl
@@ -0,0 +1,681 @@
+source tests/support/aofmanifest.tcl
+set defaults { appendonly {yes} appendfilename {appendonly.aof} appenddirname {appendonlydir} auto-aof-rewrite-percentage {0}}
+set server_path [tmpdir server.aof]
+set aof_dirname "appendonlydir"
+set aof_basename "appendonly.aof"
+set aof_dirpath "$server_path/$aof_dirname"
+set aof_base_file "$server_path/$aof_dirname/${aof_basename}.1$::base_aof_sufix$::aof_format_suffix"
+set aof_file "$server_path/$aof_dirname/${aof_basename}.1$::incr_aof_sufix$::aof_format_suffix"
+set aof_manifest_file "$server_path/$aof_dirname/$aof_basename$::manifest_suffix"
+
+tags {"aof external:skip"} {
+ # Server can start when aof-load-truncated is set to yes and AOF
+ # is truncated, with an incomplete MULTI block.
+ create_aof $aof_dirpath $aof_file {
+ append_to_aof [formatCommand set foo hello]
+ append_to_aof [formatCommand multi]
+ append_to_aof [formatCommand set bar world]
+ }
+
+ create_aof_manifest $aof_dirpath $aof_manifest_file {
+ append_to_manifest "file appendonly.aof.1.incr.aof seq 1 type i\n"
+ }
+
+ start_server_aof [list dir $server_path aof-load-truncated yes] {
+ test "Unfinished MULTI: Server should start if load-truncated is yes" {
+ assert_equal 1 [is_alive $srv]
+ }
+ }
+
+ ## Should also start with truncated AOF without incomplete MULTI block.
+ create_aof $aof_dirpath $aof_file {
+ append_to_aof [formatCommand incr foo]
+ append_to_aof [formatCommand incr foo]
+ append_to_aof [formatCommand incr foo]
+ append_to_aof [formatCommand incr foo]
+ append_to_aof [formatCommand incr foo]
+ append_to_aof [string range [formatCommand incr foo] 0 end-1]
+ }
+
+ start_server_aof [list dir $server_path aof-load-truncated yes] {
+ test "Short read: Server should start if load-truncated is yes" {
+ assert_equal 1 [is_alive $srv]
+ }
+
+ test "Truncated AOF loaded: we expect foo to be equal to 5" {
+ set client [redis [dict get $srv host] [dict get $srv port] 0 $::tls]
+ wait_done_loading $client
+ assert {[$client get foo] eq "5"}
+ }
+
+ test "Append a new command after loading an incomplete AOF" {
+ $client incr foo
+ }
+ }
+
+ # Now the AOF file is expected to be correct
+ start_server_aof [list dir $server_path aof-load-truncated yes] {
+ test "Short read + command: Server should start" {
+ assert_equal 1 [is_alive $srv]
+ }
+
+ test "Truncated AOF loaded: we expect foo to be equal to 6 now" {
+ set client [redis [dict get $srv host] [dict get $srv port] 0 $::tls]
+ wait_done_loading $client
+ assert {[$client get foo] eq "6"}
+ }
+ }
+
+ ## Test that the server exits when the AOF contains a format error
+ create_aof $aof_dirpath $aof_file {
+ append_to_aof [formatCommand set foo hello]
+ append_to_aof "!!!"
+ append_to_aof [formatCommand set foo hello]
+ }
+
+ start_server_aof [list dir $server_path aof-load-truncated yes] {
+ test "Bad format: Server should have logged an error" {
+ set pattern "*Bad file format reading the append only file*"
+ set retry 10
+ while {$retry} {
+ set result [exec tail -1 < [dict get $srv stdout]]
+ if {[string match $pattern $result]} {
+ break
+ }
+ incr retry -1
+ after 1000
+ }
+ if {$retry == 0} {
+ error "assertion:expected error not found on config file"
+ }
+ }
+ }
+
+ ## Test the server doesn't start when the AOF contains an unfinished MULTI
+ create_aof $aof_dirpath $aof_file {
+ append_to_aof [formatCommand set foo hello]
+ append_to_aof [formatCommand multi]
+ append_to_aof [formatCommand set bar world]
+ }
+
+ start_server_aof [list dir $server_path aof-load-truncated no] {
+ test "Unfinished MULTI: Server should have logged an error" {
+ set pattern "*Unexpected end of file reading the append only file*"
+ set retry 10
+ while {$retry} {
+ set result [exec tail -1 < [dict get $srv stdout]]
+ if {[string match $pattern $result]} {
+ break
+ }
+ incr retry -1
+ after 1000
+ }
+ if {$retry == 0} {
+ error "assertion:expected error not found on config file"
+ }
+ }
+ }
+
+ ## Test that the server exits when the AOF contains a short read
+ create_aof $aof_dirpath $aof_file {
+ append_to_aof [formatCommand set foo hello]
+ append_to_aof [string range [formatCommand set bar world] 0 end-1]
+ }
+
+ start_server_aof [list dir $server_path aof-load-truncated no] {
+ test "Short read: Server should have logged an error" {
+ set pattern "*Unexpected end of file reading the append only file*"
+ set retry 10
+ while {$retry} {
+ set result [exec tail -1 < [dict get $srv stdout]]
+ if {[string match $pattern $result]} {
+ break
+ }
+ incr retry -1
+ after 1000
+ }
+ if {$retry == 0} {
+ error "assertion:expected error not found on config file"
+ }
+ }
+ }
+
+ ## Test that redis-check-aof indeed sees this AOF is not valid
+ test "Short read: Utility should confirm the AOF is not valid" {
+ catch {
+ exec src/redis-check-aof $aof_manifest_file
+ } result
+ assert_match "*not valid*" $result
+ }
+
+ test "Short read: Utility should show the abnormal line num in AOF" {
+ create_aof $aof_dirpath $aof_file {
+ append_to_aof [formatCommand set foo hello]
+ append_to_aof "!!!"
+ }
+
+ catch {
+ exec src/redis-check-aof $aof_manifest_file
+ } result
+ assert_match "*ok_up_to_line=8*" $result
+ }
+
+ test "Short read: Utility should be able to fix the AOF" {
+ set result [exec src/redis-check-aof --fix $aof_manifest_file << "y\n"]
+ assert_match "*Successfully truncated AOF*" $result
+ }
+
+ ## Test that the server can be started using the truncated AOF
+ start_server_aof [list dir $server_path aof-load-truncated no] {
+ test "Fixed AOF: Server should have been started" {
+ assert_equal 1 [is_alive $srv]
+ }
+
+ test "Fixed AOF: Keyspace should contain values that were parseable" {
+ set client [redis [dict get $srv host] [dict get $srv port] 0 $::tls]
+ wait_done_loading $client
+ assert_equal "hello" [$client get foo]
+ assert_equal "" [$client get bar]
+ }
+ }
+
+ ## Test that SPOP (that modifies the client's argc/argv) is correctly free'd
+ create_aof $aof_dirpath $aof_file {
+ append_to_aof [formatCommand sadd set foo]
+ append_to_aof [formatCommand sadd set bar]
+ append_to_aof [formatCommand spop set]
+ }
+
+ start_server_aof [list dir $server_path aof-load-truncated no] {
+ test "AOF+SPOP: Server should have been started" {
+ assert_equal 1 [is_alive $srv]
+ }
+
+ test "AOF+SPOP: Set should have 1 member" {
+ set client [redis [dict get $srv host] [dict get $srv port] 0 $::tls]
+ wait_done_loading $client
+ assert_equal 1 [$client scard set]
+ }
+ }
+
+ ## Uses the alsoPropagate() API.
+ create_aof $aof_dirpath $aof_file {
+ append_to_aof [formatCommand sadd set foo]
+ append_to_aof [formatCommand sadd set bar]
+ append_to_aof [formatCommand sadd set gah]
+ append_to_aof [formatCommand spop set 2]
+ }
+
+ start_server_aof [list dir $server_path] {
+ test "AOF+SPOP: Server should have been started" {
+ assert_equal 1 [is_alive $srv]
+ }
+
+ test "AOF+SPOP: Set should have 1 member" {
+ set client [redis [dict get $srv host] [dict get $srv port] 0 $::tls]
+ wait_done_loading $client
+ assert_equal 1 [$client scard set]
+ }
+ }
+
+ ## Test that PEXPIREAT is loaded correctly
+ create_aof $aof_dirpath $aof_file {
+ append_to_aof [formatCommand rpush list foo]
+ append_to_aof [formatCommand pexpireat list 1000]
+ append_to_aof [formatCommand rpush list bar]
+ }
+
+ start_server_aof [list dir $server_path aof-load-truncated no] {
+ test "AOF+EXPIRE: Server should have been started" {
+ assert_equal 1 [is_alive $srv]
+ }
+
+ test "AOF+EXPIRE: List should be empty" {
+ set client [redis [dict get $srv host] [dict get $srv port] 0 $::tls]
+ wait_done_loading $client
+ assert_equal 0 [$client llen list]
+ }
+ }
+
+ start_server {overrides {appendonly {yes}}} {
+ test {Redis should not try to convert DEL into EXPIREAT for EXPIRE -1} {
+ r set x 10
+ r expire x -1
+ }
+ }
+
+ start_server {overrides {appendonly {yes} appendfsync always}} {
+ test {AOF fsync always barrier issue} {
+ set rd [redis_deferring_client]
+ # Set a sleep when aof is flushed, so that we have a chance to look
+ # at the aof size and detect if the response of an incr command
+ # arrives before the data was written (and hopefully fsynced)
+ # We create a big reply, which will hopefully not have room in the
+ # socket buffers, and will install a write handler, then we sleep
+ # a big and issue the incr command, hoping that the last portion of
+ # the output buffer write, and the processing of the incr will happen
+ # in the same event loop cycle.
+ # Since the socket buffers and timing are unpredictable, we fuzz this
+ # test with slightly different sizes and sleeps a few times.
+ for {set i 0} {$i < 10} {incr i} {
+ r debug aof-flush-sleep 0
+ r del x
+ r setrange x [expr {int(rand()*5000000)+10000000}] x
+ r debug aof-flush-sleep 500000
+ set aof [get_last_incr_aof_path r]
+ set size1 [file size $aof]
+ $rd get x
+ after [expr {int(rand()*30)}]
+ $rd incr new_value
+ $rd read
+ $rd read
+ set size2 [file size $aof]
+ assert {$size1 != $size2}
+ }
+ }
+ }
+
+ start_server {overrides {appendonly {yes}}} {
+ test {GETEX should not append to AOF} {
+ set aof [get_last_incr_aof_path r]
+ r set foo bar
+ set before [file size $aof]
+ r getex foo
+ set after [file size $aof]
+ assert_equal $before $after
+ }
+ }
+
+ ## Test that the server exits when the AOF contains a unknown command
+ create_aof $aof_dirpath $aof_file {
+ append_to_aof [formatCommand set foo hello]
+ append_to_aof [formatCommand bla foo hello]
+ append_to_aof [formatCommand set foo hello]
+ }
+
+ start_server_aof [list dir $server_path aof-load-truncated yes] {
+ test "Unknown command: Server should have logged an error" {
+ set pattern "*Unknown command 'bla' reading the append only file*"
+ set retry 10
+ while {$retry} {
+ set result [exec tail -1 < [dict get $srv stdout]]
+ if {[string match $pattern $result]} {
+ break
+ }
+ incr retry -1
+ after 1000
+ }
+ if {$retry == 0} {
+ error "assertion:expected error not found on config file"
+ }
+ }
+ }
+
+ # Test that LMPOP/BLMPOP work fine with AOF.
+ create_aof $aof_dirpath $aof_file {
+ append_to_aof [formatCommand lpush mylist a b c]
+ append_to_aof [formatCommand rpush mylist2 1 2 3]
+ append_to_aof [formatCommand lpush mylist3 a b c d e]
+ }
+
+ start_server_aof [list dir $server_path aof-load-truncated no] {
+ test "AOF+LMPOP/BLMPOP: pop elements from the list" {
+ set client [redis [dict get $srv host] [dict get $srv port] 0 $::tls]
+ set client2 [redis [dict get $srv host] [dict get $srv port] 1 $::tls]
+ wait_done_loading $client
+
+ # Pop all elements from mylist, should be blmpop delete mylist.
+ $client lmpop 1 mylist left count 1
+ $client blmpop 0 1 mylist left count 10
+
+ # Pop all elements from mylist2, should be lmpop delete mylist2.
+ $client blmpop 0 2 mylist mylist2 right count 10
+ $client lmpop 2 mylist mylist2 right count 2
+
+ # Blocking path, be blocked and then released.
+ $client2 blmpop 0 2 mylist mylist2 left count 2
+ after 100
+ $client lpush mylist2 a b c
+
+ # Pop up the last element in mylist2
+ $client blmpop 0 3 mylist mylist2 mylist3 left count 1
+
+ # Leave two elements in mylist3.
+ $client blmpop 0 3 mylist mylist2 mylist3 right count 3
+ }
+ }
+
+ start_server_aof [list dir $server_path aof-load-truncated no] {
+ test "AOF+LMPOP/BLMPOP: after pop elements from the list" {
+ set client [redis [dict get $srv host] [dict get $srv port] 0 $::tls]
+ wait_done_loading $client
+
+ # mylist and mylist2 no longer exist.
+ assert_equal 0 [$client exists mylist mylist2]
+
+ # Length of mylist3 is two.
+ assert_equal 2 [$client llen mylist3]
+ }
+ }
+
+ # Test that ZMPOP/BZMPOP work fine with AOF.
+ create_aof $aof_dirpath $aof_file {
+ append_to_aof [formatCommand zadd myzset 1 one 2 two 3 three]
+ append_to_aof [formatCommand zadd myzset2 4 four 5 five 6 six]
+ append_to_aof [formatCommand zadd myzset3 1 one 2 two 3 three 4 four 5 five]
+ }
+
+ start_server_aof [list dir $server_path aof-load-truncated no] {
+ test "AOF+ZMPOP/BZMPOP: pop elements from the zset" {
+ set client [redis [dict get $srv host] [dict get $srv port] 0 $::tls]
+ set client2 [redis [dict get $srv host] [dict get $srv port] 1 $::tls]
+ wait_done_loading $client
+
+ # Pop all elements from myzset, should be bzmpop delete myzset.
+ $client zmpop 1 myzset min count 1
+ $client bzmpop 0 1 myzset min count 10
+
+ # Pop all elements from myzset2, should be zmpop delete myzset2.
+ $client bzmpop 0 2 myzset myzset2 max count 10
+ $client zmpop 2 myzset myzset2 max count 2
+
+ # Blocking path, be blocked and then released.
+ $client2 bzmpop 0 2 myzset myzset2 min count 2
+ after 100
+ $client zadd myzset2 1 one 2 two 3 three
+
+ # Pop up the last element in myzset2
+ $client bzmpop 0 3 myzset myzset2 myzset3 min count 1
+
+ # Leave two elements in myzset3.
+ $client bzmpop 0 3 myzset myzset2 myzset3 max count 3
+ }
+ }
+
+ start_server_aof [list dir $server_path aof-load-truncated no] {
+ test "AOF+ZMPOP/BZMPOP: after pop elements from the zset" {
+ set client [redis [dict get $srv host] [dict get $srv port] 0 $::tls]
+ wait_done_loading $client
+
+ # myzset and myzset2 no longer exist.
+ assert_equal 0 [$client exists myzset myzset2]
+
+ # Length of myzset3 is two.
+ assert_equal 2 [$client zcard myzset3]
+ }
+ }
+
+ test {Generate timestamp annotations in AOF} {
+ start_server {overrides {appendonly {yes}}} {
+ r config set aof-timestamp-enabled yes
+ r config set aof-use-rdb-preamble no
+ set aof [get_last_incr_aof_path r]
+
+ r set foo bar
+ assert_match "#TS:*" [exec head -n 1 $aof]
+
+ r bgrewriteaof
+ waitForBgrewriteaof r
+
+ set aof [get_base_aof_path r]
+ assert_match "#TS:*" [exec head -n 1 $aof]
+ }
+ }
+
+ # redis could load AOF which has timestamp annotations inside
+ create_aof $aof_dirpath $aof_file {
+ append_to_aof "#TS:1628217470\r\n"
+ append_to_aof [formatCommand set foo1 bar1]
+ append_to_aof "#TS:1628217471\r\n"
+ append_to_aof [formatCommand set foo2 bar2]
+ append_to_aof "#TS:1628217472\r\n"
+ append_to_aof "#TS:1628217473\r\n"
+ append_to_aof [formatCommand set foo3 bar3]
+ append_to_aof "#TS:1628217474\r\n"
+ }
+ start_server_aof [list dir $server_path] {
+ test {Successfully load AOF which has timestamp annotations inside} {
+ set c [redis [dict get $srv host] [dict get $srv port] 0 $::tls]
+ wait_done_loading $c
+ assert_equal "bar1" [$c get foo1]
+ assert_equal "bar2" [$c get foo2]
+ assert_equal "bar3" [$c get foo3]
+ }
+ }
+
+ test {Truncate AOF to specific timestamp} {
+ # truncate to timestamp 1628217473
+ exec src/redis-check-aof --truncate-to-timestamp 1628217473 $aof_manifest_file
+ start_server_aof [list dir $server_path] {
+ set c [redis [dict get $srv host] [dict get $srv port] 0 $::tls]
+ wait_done_loading $c
+ assert_equal "bar1" [$c get foo1]
+ assert_equal "bar2" [$c get foo2]
+ assert_equal "bar3" [$c get foo3]
+ }
+
+ # truncate to timestamp 1628217471
+ exec src/redis-check-aof --truncate-to-timestamp 1628217471 $aof_manifest_file
+ start_server_aof [list dir $server_path] {
+ set c [redis [dict get $srv host] [dict get $srv port] 0 $::tls]
+ wait_done_loading $c
+ assert_equal "bar1" [$c get foo1]
+ assert_equal "bar2" [$c get foo2]
+ assert_equal "" [$c get foo3]
+ }
+
+ # truncate to timestamp 1628217470
+ exec src/redis-check-aof --truncate-to-timestamp 1628217470 $aof_manifest_file
+ start_server_aof [list dir $server_path] {
+ set c [redis [dict get $srv host] [dict get $srv port] 0 $::tls]
+ wait_done_loading $c
+ assert_equal "bar1" [$c get foo1]
+ assert_equal "" [$c get foo2]
+ }
+
+ # truncate to timestamp 1628217469
+ catch {exec src/redis-check-aof --truncate-to-timestamp 1628217469 $aof_manifest_file} e
+ assert_match {*aborting*} $e
+ }
+
+ test {EVAL timeout with slow verbatim Lua script from AOF} {
+ start_server [list overrides [list dir $server_path appendonly yes lua-time-limit 1 aof-use-rdb-preamble no]] {
+ # generate a long running script that is propagated to the AOF as script
+ # make sure that the script times out during loading
+ create_aof $aof_dirpath $aof_file {
+ append_to_aof [formatCommand select 9]
+ append_to_aof [formatCommand eval {redis.call('set',KEYS[1],'y'); for i=1,1500000 do redis.call('ping') end return 'ok'} 1 x]
+ }
+ set rd [redis_deferring_client]
+ $rd debug loadaof
+ $rd flush
+ wait_for_condition 100 10 {
+ [catch {r ping} e] == 1
+ } else {
+ fail "server didn't start loading"
+ }
+ assert_error {LOADING*} {r ping}
+ $rd read
+ $rd close
+ wait_for_log_messages 0 {"*Slow script detected*"} 0 100 100
+ assert_equal [r get x] y
+ }
+ }
+
+ test {EVAL can process writes from AOF in read-only replicas} {
+ create_aof_manifest $aof_dirpath $aof_manifest_file {
+ append_to_manifest "file appendonly.aof.1.incr.aof seq 1 type i\n"
+ }
+ create_aof $aof_dirpath $aof_file {
+ append_to_aof [formatCommand select 9]
+ append_to_aof [formatCommand eval {redis.call("set",KEYS[1],"100")} 1 foo]
+ append_to_aof [formatCommand eval {redis.call("incr",KEYS[1])} 1 foo]
+ append_to_aof [formatCommand eval {redis.call("incr",KEYS[1])} 1 foo]
+ }
+ start_server [list overrides [list dir $server_path appendonly yes replica-read-only yes replicaof "127.0.0.1 0"]] {
+ assert_equal [r get foo] 102
+ }
+ }
+
+ test {Test redis-check-aof for old style resp AOF} {
+ create_aof $aof_dirpath $aof_file {
+ append_to_aof [formatCommand set foo hello]
+ append_to_aof [formatCommand set bar world]
+ }
+
+ catch {
+ exec src/redis-check-aof $aof_file
+ } result
+ assert_match "*Start checking Old-Style AOF*is valid*" $result
+ }
+
+ test {Test redis-check-aof for old style rdb-preamble AOF} {
+ catch {
+ exec src/redis-check-aof tests/assets/rdb-preamble.aof
+ } result
+ assert_match "*Start checking Old-Style AOF*RDB preamble is OK, proceeding with AOF tail*is valid*" $result
+ }
+
+ test {Test redis-check-aof for Multi Part AOF with resp AOF base} {
+ create_aof $aof_dirpath $aof_base_file {
+ append_to_aof [formatCommand set foo hello]
+ append_to_aof [formatCommand set bar world]
+ }
+
+ create_aof $aof_dirpath $aof_file {
+ append_to_aof [formatCommand set foo hello]
+ append_to_aof [formatCommand set bar world]
+ }
+
+ create_aof_manifest $aof_dirpath $aof_manifest_file {
+ append_to_manifest "file appendonly.aof.1.base.aof seq 1 type b\n"
+ append_to_manifest "file appendonly.aof.1.incr.aof seq 1 type i\n"
+ }
+
+ catch {
+ exec src/redis-check-aof $aof_manifest_file
+ } result
+ assert_match "*Start checking Multi Part AOF*Start to check BASE AOF (RESP format)*BASE AOF*is valid*Start to check INCR files*INCR AOF*is valid*All AOF files and manifest are valid*" $result
+ }
+
+ test {Test redis-check-aof for Multi Part AOF with rdb-preamble AOF base} {
+ exec cp tests/assets/rdb-preamble.aof $aof_base_file
+
+ create_aof $aof_dirpath $aof_file {
+ append_to_aof [formatCommand set foo hello]
+ append_to_aof [formatCommand set bar world]
+ }
+
+ create_aof_manifest $aof_dirpath $aof_manifest_file {
+ append_to_manifest "file appendonly.aof.1.base.aof seq 1 type b\n"
+ append_to_manifest "file appendonly.aof.1.incr.aof seq 1 type i\n"
+ }
+
+ catch {
+ exec src/redis-check-aof $aof_manifest_file
+ } result
+ assert_match "*Start checking Multi Part AOF*Start to check BASE AOF (RDB format)*DB preamble is OK, proceeding with AOF tail*BASE AOF*is valid*Start to check INCR files*INCR AOF*is valid*All AOF files and manifest are valid*" $result
+ }
+
+ test {Test redis-check-aof only truncates the last file for Multi Part AOF in fix mode} {
+ create_aof $aof_dirpath $aof_base_file {
+ append_to_aof [formatCommand set foo hello]
+ append_to_aof [formatCommand multi]
+ append_to_aof [formatCommand set bar world]
+ }
+
+ create_aof $aof_dirpath $aof_file {
+ append_to_aof [formatCommand set foo hello]
+ append_to_aof [formatCommand set bar world]
+ }
+
+ create_aof_manifest $aof_dirpath $aof_manifest_file {
+ append_to_manifest "file appendonly.aof.1.base.aof seq 1 type b\n"
+ append_to_manifest "file appendonly.aof.1.incr.aof seq 1 type i\n"
+ }
+
+ catch {
+ exec src/redis-check-aof $aof_manifest_file
+ } result
+ assert_match "*not valid*" $result
+
+ catch {
+ exec src/redis-check-aof --fix $aof_manifest_file
+ } result
+ assert_match "*Failed to truncate AOF*because it is not the last file*" $result
+ }
+
+ test {Test redis-check-aof only truncates the last file for Multi Part AOF in truncate-to-timestamp mode} {
+ create_aof $aof_dirpath $aof_base_file {
+ append_to_aof "#TS:1628217470\r\n"
+ append_to_aof [formatCommand set foo1 bar1]
+ append_to_aof "#TS:1628217471\r\n"
+ append_to_aof [formatCommand set foo2 bar2]
+ append_to_aof "#TS:1628217472\r\n"
+ append_to_aof "#TS:1628217473\r\n"
+ append_to_aof [formatCommand set foo3 bar3]
+ append_to_aof "#TS:1628217474\r\n"
+ }
+
+ create_aof $aof_dirpath $aof_file {
+ append_to_aof [formatCommand set foo hello]
+ append_to_aof [formatCommand set bar world]
+ }
+
+ create_aof_manifest $aof_dirpath $aof_manifest_file {
+ append_to_manifest "file appendonly.aof.1.base.aof seq 1 type b\n"
+ append_to_manifest "file appendonly.aof.1.incr.aof seq 1 type i\n"
+ }
+
+ catch {
+ exec src/redis-check-aof --truncate-to-timestamp 1628217473 $aof_manifest_file
+ } result
+ assert_match "*Failed to truncate AOF*to timestamp*because it is not the last file*" $result
+ }
+
+ start_server {overrides {appendonly yes appendfsync always}} {
+ test {FLUSHDB / FLUSHALL should persist in AOF} {
+ set aof [get_last_incr_aof_path r]
+
+ r set key value
+ r flushdb
+ r set key value2
+ r flushdb
+
+ # DB is empty
+ r flushdb
+ r flushdb
+ r flushdb
+
+ r set key value
+ r flushall
+ r set key value2
+ r flushall
+
+ # DBs are empty.
+ r flushall
+ r flushall
+ r flushall
+
+ # Assert that each FLUSHDB command is persisted even the DB is empty.
+ # Assert that each FLUSHALL command is persisted even the DBs are empty.
+ assert_aof_content $aof {
+ {select *}
+ {set key value}
+ {flushdb}
+ {set key value2}
+ {flushdb}
+ {flushdb}
+ {flushdb}
+ {flushdb}
+ {set key value}
+ {flushall}
+ {set key value2}
+ {flushall}
+ {flushall}
+ {flushall}
+ {flushall}
+ }
+ }
+ }
+}
diff --git a/tests/integration/block-repl.tcl b/tests/integration/block-repl.tcl
new file mode 100644
index 0000000..52b4a53
--- /dev/null
+++ b/tests/integration/block-repl.tcl
@@ -0,0 +1,51 @@
+# Test replication of blocking lists and zset operations.
+# Unlike stream operations such operations are "pop" style, so they consume
+# the list or sorted set, and must be replicated correctly.
+
+proc start_bg_block_op {host port db ops tls} {
+ set tclsh [info nameofexecutable]
+ exec $tclsh tests/helpers/bg_block_op.tcl $host $port $db $ops $tls &
+}
+
+proc stop_bg_block_op {handle} {
+ catch {exec /bin/kill -9 $handle}
+}
+
+start_server {tags {"repl" "external:skip"}} {
+ start_server {overrides {save {}}} {
+ set master [srv -1 client]
+ set master_host [srv -1 host]
+ set master_port [srv -1 port]
+ set slave [srv 0 client]
+
+ set load_handle0 [start_bg_block_op $master_host $master_port 9 100000 $::tls]
+ set load_handle1 [start_bg_block_op $master_host $master_port 9 100000 $::tls]
+ set load_handle2 [start_bg_block_op $master_host $master_port 9 100000 $::tls]
+
+ test {First server should have role slave after SLAVEOF} {
+ $slave slaveof $master_host $master_port
+ after 1000
+ s 0 role
+ } {slave}
+
+ test {Test replication with blocking lists and sorted sets operations} {
+ after 25000
+ stop_bg_block_op $load_handle0
+ stop_bg_block_op $load_handle1
+ stop_bg_block_op $load_handle2
+ wait_for_condition 100 100 {
+ [$master debug digest] == [$slave debug digest]
+ } else {
+ set csv1 [csvdump r]
+ set csv2 [csvdump {r -1}]
+ set fd [open /tmp/repldump1.txt w]
+ puts -nonewline $fd $csv1
+ close $fd
+ set fd [open /tmp/repldump2.txt w]
+ puts -nonewline $fd $csv2
+ close $fd
+ fail "Master - Replica inconsistency, Run diff -u against /tmp/repldump*.txt for more info"
+ }
+ }
+ }
+}
diff --git a/tests/integration/convert-ziplist-hash-on-load.tcl b/tests/integration/convert-ziplist-hash-on-load.tcl
new file mode 100644
index 0000000..c8265b2
--- /dev/null
+++ b/tests/integration/convert-ziplist-hash-on-load.tcl
@@ -0,0 +1,28 @@
+tags {"external:skip"} {
+
+# Copy RDB with ziplist encoded hash to server path
+set server_path [tmpdir "server.convert-ziplist-hash-on-load"]
+
+exec cp -f tests/assets/hash-ziplist.rdb $server_path
+start_server [list overrides [list "dir" $server_path "dbfilename" "hash-ziplist.rdb"]] {
+ test "RDB load ziplist hash: converts to listpack when RDB loading" {
+ r select 0
+
+ assert_encoding listpack hash
+ assert_equal 2 [r hlen hash]
+ assert_match {v1 v2} [r hmget hash f1 f2]
+ }
+}
+
+exec cp -f tests/assets/hash-ziplist.rdb $server_path
+start_server [list overrides [list "dir" $server_path "dbfilename" "hash-ziplist.rdb" "hash-max-ziplist-entries" 1]] {
+ test "RDB load ziplist hash: converts to hash table when hash-max-ziplist-entries is exceeded" {
+ r select 0
+
+ assert_encoding hashtable hash
+ assert_equal 2 [r hlen hash]
+ assert_match {v1 v2} [r hmget hash f1 f2]
+ }
+}
+
+}
diff --git a/tests/integration/convert-ziplist-zset-on-load.tcl b/tests/integration/convert-ziplist-zset-on-load.tcl
new file mode 100644
index 0000000..0fbb201
--- /dev/null
+++ b/tests/integration/convert-ziplist-zset-on-load.tcl
@@ -0,0 +1,28 @@
+tags {"external:skip"} {
+
+# Copy RDB with ziplist encoded hash to server path
+set server_path [tmpdir "server.convert-ziplist-hash-on-load"]
+
+exec cp -f tests/assets/zset-ziplist.rdb $server_path
+start_server [list overrides [list "dir" $server_path "dbfilename" "zset-ziplist.rdb"]] {
+ test "RDB load ziplist zset: converts to listpack when RDB loading" {
+ r select 0
+
+ assert_encoding listpack zset
+ assert_equal 2 [r zcard zset]
+ assert_match {one 1 two 2} [r zrange zset 0 -1 withscores]
+ }
+}
+
+exec cp -f tests/assets/zset-ziplist.rdb $server_path
+start_server [list overrides [list "dir" $server_path "dbfilename" "zset-ziplist.rdb" "zset-max-ziplist-entries" 1]] {
+ test "RDB load ziplist zset: converts to skiplist when zset-max-ziplist-entries is exceeded" {
+ r select 0
+
+ assert_encoding skiplist zset
+ assert_equal 2 [r zcard zset]
+ assert_match {one 1 two 2} [r zrange zset 0 -1 withscores]
+ }
+}
+
+}
diff --git a/tests/integration/convert-zipmap-hash-on-load.tcl b/tests/integration/convert-zipmap-hash-on-load.tcl
new file mode 100644
index 0000000..f7eda0e
--- /dev/null
+++ b/tests/integration/convert-zipmap-hash-on-load.tcl
@@ -0,0 +1,39 @@
+tags {"external:skip"} {
+
+# Copy RDB with zipmap encoded hash to server path
+set server_path [tmpdir "server.convert-zipmap-hash-on-load"]
+
+exec cp -f tests/assets/hash-zipmap.rdb $server_path
+start_server [list overrides [list "dir" $server_path "dbfilename" "hash-zipmap.rdb"]] {
+ test "RDB load zipmap hash: converts to listpack" {
+ r select 0
+
+ assert_match "*listpack*" [r debug object hash]
+ assert_equal 2 [r hlen hash]
+ assert_match {v1 v2} [r hmget hash f1 f2]
+ }
+}
+
+exec cp -f tests/assets/hash-zipmap.rdb $server_path
+start_server [list overrides [list "dir" $server_path "dbfilename" "hash-zipmap.rdb" "hash-max-ziplist-entries" 1]] {
+ test "RDB load zipmap hash: converts to hash table when hash-max-ziplist-entries is exceeded" {
+ r select 0
+
+ assert_match "*hashtable*" [r debug object hash]
+ assert_equal 2 [r hlen hash]
+ assert_match {v1 v2} [r hmget hash f1 f2]
+ }
+}
+
+exec cp -f tests/assets/hash-zipmap.rdb $server_path
+start_server [list overrides [list "dir" $server_path "dbfilename" "hash-zipmap.rdb" "hash-max-ziplist-value" 1]] {
+ test "RDB load zipmap hash: converts to hash table when hash-max-ziplist-value is exceeded" {
+ r select 0
+
+ assert_match "*hashtable*" [r debug object hash]
+ assert_equal 2 [r hlen hash]
+ assert_match {v1 v2} [r hmget hash f1 f2]
+ }
+}
+
+}
diff --git a/tests/integration/corrupt-dump-fuzzer.tcl b/tests/integration/corrupt-dump-fuzzer.tcl
new file mode 100644
index 0000000..9cd4ff9
--- /dev/null
+++ b/tests/integration/corrupt-dump-fuzzer.tcl
@@ -0,0 +1,230 @@
+# tests of corrupt listpack payload with valid CRC
+
+tags {"dump" "corruption" "external:skip"} {
+
+# catch sigterm so that in case one of the random command hangs the test,
+# usually due to redis not putting a response in the output buffers,
+# we'll know which command it was
+if { ! [ catch {
+ package require Tclx
+} err ] } {
+ signal error SIGTERM
+}
+
+proc generate_collections {suffix elements} {
+ set rd [redis_deferring_client]
+ for {set j 0} {$j < $elements} {incr j} {
+ # add both string values and integers
+ if {$j % 2 == 0} {set val $j} else {set val "_$j"}
+ $rd hset hash$suffix $j $val
+ $rd lpush list$suffix $val
+ $rd zadd zset$suffix $j $val
+ $rd sadd set$suffix $val
+ $rd xadd stream$suffix * item 1 value $val
+ }
+ for {set j 0} {$j < $elements * 5} {incr j} {
+ $rd read ; # Discard replies
+ }
+ $rd close
+}
+
+# generate keys with various types and encodings
+proc generate_types {} {
+ r config set list-max-ziplist-size 5
+ r config set hash-max-ziplist-entries 5
+ r config set set-max-listpack-entries 5
+ r config set zset-max-ziplist-entries 5
+ r config set stream-node-max-entries 5
+
+ # create small (ziplist / listpack encoded) objects with 3 items
+ generate_collections "" 3
+
+ # add some metadata to the stream
+ r xgroup create stream mygroup 0
+ set records [r xreadgroup GROUP mygroup Alice COUNT 2 STREAMS stream >]
+ r xdel stream [lindex [lindex [lindex [lindex $records 0] 1] 1] 0]
+ r xack stream mygroup [lindex [lindex [lindex [lindex $records 0] 1] 0] 0]
+
+ # create other non-collection types
+ r incr int
+ r set string str
+
+ # create bigger objects with 10 items (more than a single ziplist / listpack)
+ generate_collections big 10
+
+ # make sure our big stream also has a listpack record that has different
+ # field names than the master recorded
+ r xadd streambig * item 1 value 1
+ r xadd streambig * item 1 unique value
+}
+
+proc corrupt_payload {payload} {
+ set len [string length $payload]
+ set count 1 ;# usually corrupt only one byte
+ if {rand() > 0.9} { set count 2 }
+ while { $count > 0 } {
+ set idx [expr {int(rand() * $len)}]
+ set ch [binary format c [expr {int(rand()*255)}]]
+ set payload [string replace $payload $idx $idx $ch]
+ incr count -1
+ }
+ return $payload
+}
+
+# fuzzy tester for corrupt RESTORE payloads
+# valgrind will make sure there were no leaks in the rdb loader error handling code
+foreach sanitize_dump {no yes} {
+ if {$::accurate} {
+ set min_duration [expr {60 * 10}] ;# run at least 10 minutes
+ set min_cycles 1000 ;# run at least 1k cycles (max 16 minutes)
+ } else {
+ set min_duration 10 ; # run at least 10 seconds
+ set min_cycles 10 ; # run at least 10 cycles
+ }
+
+ # Don't execute this on FreeBSD due to a yet-undiscovered memory issue
+ # which causes tclsh to bloat.
+ if {[exec uname] == "FreeBSD"} {
+ set min_cycles 1
+ set min_duration 1
+ }
+
+ test "Fuzzer corrupt restore payloads - sanitize_dump: $sanitize_dump" {
+ if {$min_duration * 2 > $::timeout} {
+ fail "insufficient timeout"
+ }
+ # start a server, fill with data and save an RDB file once (avoid re-save)
+ start_server [list overrides [list "save" "" use-exit-on-panic yes crash-memcheck-enabled no loglevel verbose] ] {
+ set stdout [srv 0 stdout]
+ r config set sanitize-dump-payload $sanitize_dump
+ r debug set-skip-checksum-validation 1
+ set start_time [clock seconds]
+ generate_types
+ set dbsize [r dbsize]
+ r save
+ set cycle 0
+ set stat_terminated_in_restore 0
+ set stat_terminated_in_traffic 0
+ set stat_terminated_by_signal 0
+ set stat_successful_restore 0
+ set stat_rejected_restore 0
+ set stat_traffic_commands_sent 0
+ # repeatedly DUMP a random key, corrupt it and try RESTORE into a new key
+ while true {
+ set k [r randomkey]
+ set dump [r dump $k]
+ set dump [corrupt_payload $dump]
+ set printable_dump [string2printable $dump]
+ set restore_failed false
+ set report_and_restart false
+ set sent {}
+ # RESTORE can fail, but hopefully not terminate
+ if { [catch { r restore "_$k" 0 $dump REPLACE } err] } {
+ set restore_failed true
+ # skip if return failed with an error response.
+ if {[string match "ERR*" $err]} {
+ incr stat_rejected_restore
+ } else {
+ set report_and_restart true
+ incr stat_terminated_in_restore
+ write_log_line 0 "corrupt payload: $printable_dump"
+ if {$sanitize_dump == yes} {
+ puts "Server crashed in RESTORE with payload: $printable_dump"
+ }
+ }
+ } else {
+ r ping ;# an attempt to check if the server didn't terminate (this will throw an error that will terminate the tests)
+ }
+
+ set print_commands false
+ if {!$restore_failed} {
+ # if RESTORE didn't fail or terminate, run some random traffic on the new key
+ incr stat_successful_restore
+ if { [ catch {
+ set sent [generate_fuzzy_traffic_on_key "_$k" 1] ;# traffic for 1 second
+ incr stat_traffic_commands_sent [llength $sent]
+ r del "_$k" ;# in case the server terminated, here's where we'll detect it.
+ if {$dbsize != [r dbsize]} {
+ puts "unexpected keys"
+ puts "keys: [r keys *]"
+ puts "commands leading to it:"
+ foreach cmd $sent {
+ foreach arg $cmd {
+ puts -nonewline "[string2printable $arg] "
+ }
+ puts ""
+ }
+ exit 1
+ }
+ } err ] } {
+ set err [format "%s" $err] ;# convert to string for pattern matching
+ if {[string match "*SIGTERM*" $err]} {
+ puts "payload that caused test to hang: $printable_dump"
+ exit 1
+ }
+ # if the server terminated update stats and restart it
+ set report_and_restart true
+ incr stat_terminated_in_traffic
+ set by_signal [count_log_message 0 "crashed by signal"]
+ incr stat_terminated_by_signal $by_signal
+
+ if {$by_signal != 0 || $sanitize_dump == yes} {
+ puts "Server crashed (by signal: $by_signal), with payload: $printable_dump"
+ set print_commands true
+ }
+ }
+ }
+
+ # check valgrind report for invalid reads after each RESTORE
+ # payload so that we have a report that is easier to reproduce
+ set valgrind_errors [find_valgrind_errors [srv 0 stderr] false]
+ set asan_errors [sanitizer_errors_from_file [srv 0 stderr]]
+ if {$valgrind_errors != "" || $asan_errors != ""} {
+ puts "valgrind or asan found an issue for payload: $printable_dump"
+ set report_and_restart true
+ set print_commands true
+ }
+
+ if {$report_and_restart} {
+ if {$print_commands} {
+ puts "violating commands:"
+ foreach cmd $sent {
+ foreach arg $cmd {
+ puts -nonewline "[string2printable $arg] "
+ }
+ puts ""
+ }
+ }
+
+ # restart the server and re-apply debug configuration
+ write_log_line 0 "corrupt payload: $printable_dump"
+ restart_server 0 true true
+ r config set sanitize-dump-payload $sanitize_dump
+ r debug set-skip-checksum-validation 1
+ }
+
+ incr cycle
+ if { ([clock seconds]-$start_time) >= $min_duration && $cycle >= $min_cycles} {
+ break
+ }
+ }
+ if {$::verbose} {
+ puts "Done $cycle cycles in [expr {[clock seconds]-$start_time}] seconds."
+ puts "RESTORE: successful: $stat_successful_restore, rejected: $stat_rejected_restore"
+ puts "Total commands sent in traffic: $stat_traffic_commands_sent, crashes during traffic: $stat_terminated_in_traffic ($stat_terminated_by_signal by signal)."
+ }
+ }
+ # if we run sanitization we never expect the server to crash at runtime
+ if {$sanitize_dump == yes} {
+ assert_equal $stat_terminated_in_restore 0
+ assert_equal $stat_terminated_in_traffic 0
+ }
+ # make sure all terminations where due to assertion and not a SIGSEGV
+ assert_equal $stat_terminated_by_signal 0
+ }
+}
+
+
+
+} ;# tags
+
diff --git a/tests/integration/corrupt-dump.tcl b/tests/integration/corrupt-dump.tcl
new file mode 100644
index 0000000..3c9e5ce
--- /dev/null
+++ b/tests/integration/corrupt-dump.tcl
@@ -0,0 +1,833 @@
+# tests of corrupt ziplist payload with valid CRC
+# * setting crash-memcheck-enabled to no to avoid issues with valgrind
+# * setting use-exit-on-panic to yes so that valgrind can search for leaks
+# * setting debug set-skip-checksum-validation to 1 on some tests for which we
+# didn't bother to fake a valid checksum
+# * some tests set sanitize-dump-payload to no and some to yet, depending on
+# what we want to test
+
+tags {"dump" "corruption" "external:skip"} {
+
+# We only run OOM related tests on x86_64 and aarch64, as jemalloc on other
+# platforms (notably s390x) may actually succeed very large allocations. As
+# a result the test may hang for a very long time at the cleanup phase,
+# iterating as many as 2^61 hash table slots.
+
+set arch_name [exec uname -m]
+set run_oom_tests [expr {$arch_name == "x86_64" || $arch_name == "aarch64"}]
+
+set corrupt_payload_7445 "\x0E\x01\x1D\x1D\x00\x00\x00\x16\x00\x00\x00\x03\x00\x00\x04\x43\x43\x43\x43\x06\x04\x42\x42\x42\x42\x06\x3F\x41\x41\x41\x41\xFF\x09\x00\x88\xA5\xCA\xA8\xC5\x41\xF4\x35"
+
+test {corrupt payload: #7445 - with sanitize} {
+ start_server [list overrides [list loglevel verbose use-exit-on-panic yes crash-memcheck-enabled no] ] {
+ r config set sanitize-dump-payload yes
+ catch {
+ r restore key 0 $corrupt_payload_7445
+ } err
+ assert_match "*Bad data format*" $err
+ verify_log_message 0 "*integrity check failed*" 0
+ }
+}
+
+test {corrupt payload: hash with valid zip list header, invalid entry len} {
+ start_server [list overrides [list loglevel verbose use-exit-on-panic yes crash-memcheck-enabled no] ] {
+ catch {
+ r restore key 0 "\x0D\x1B\x1B\x00\x00\x00\x16\x00\x00\x00\x04\x00\x00\x02\x61\x00\x04\x02\x62\x00\x04\x14\x63\x00\x04\x02\x64\x00\xFF\x09\x00\xD9\x10\x54\x92\x15\xF5\x5F\x52"
+ } err
+ assert_match "*Bad data format*" $err
+ verify_log_message 0 "*integrity check failed*" 0
+ }
+}
+
+test {corrupt payload: invalid zlbytes header} {
+ start_server [list overrides [list loglevel verbose use-exit-on-panic yes crash-memcheck-enabled no] ] {
+ catch {
+ r restore key 0 "\x0D\x1B\x25\x00\x00\x00\x16\x00\x00\x00\x04\x00\x00\x02\x61\x00\x04\x02\x62\x00\x04\x02\x63\x00\x04\x02\x64\x00\xFF\x09\x00\xB7\xF7\x6E\x9F\x43\x43\x14\xC6"
+ } err
+ assert_match "*Bad data format*" $err
+ verify_log_message 0 "*integrity check failed*" 0
+ }
+}
+
+test {corrupt payload: valid zipped hash header, dup records} {
+ start_server [list overrides [list loglevel verbose use-exit-on-panic yes crash-memcheck-enabled no] ] {
+ catch {
+ r restore key 0 "\x0D\x1B\x1B\x00\x00\x00\x16\x00\x00\x00\x04\x00\x00\x02\x61\x00\x04\x02\x62\x00\x04\x02\x61\x00\x04\x02\x64\x00\xFF\x09\x00\xA1\x98\x36\x78\xCC\x8E\x93\x2E"
+ } err
+ assert_match "*Bad data format*" $err
+ verify_log_message 0 "*integrity check failed*" 0
+ }
+}
+
+test {corrupt payload: quicklist big ziplist prev len} {
+ start_server [list overrides [list loglevel verbose use-exit-on-panic yes crash-memcheck-enabled no] ] {
+ r config set sanitize-dump-payload no
+ catch {r restore key 0 "\x0E\x01\x13\x13\x00\x00\x00\x0E\x00\x00\x00\x02\x00\x00\x02\x61\x00\x0E\x02\x62\x00\xFF\x09\x00\x49\x97\x30\xB2\x0D\xA1\xED\xAA"} err
+ assert_match "*Bad data format*" $err
+ verify_log_message 0 "*integrity check failed*" 0
+ }
+}
+
+test {corrupt payload: quicklist small ziplist prev len} {
+ start_server [list overrides [list loglevel verbose use-exit-on-panic yes crash-memcheck-enabled no] ] {
+ r config set sanitize-dump-payload yes
+ catch {
+ r restore key 0 "\x0E\x01\x13\x13\x00\x00\x00\x0E\x00\x00\x00\x02\x00\x00\x02\x61\x00\x02\x02\x62\x00\xFF\x09\x00\xC7\x71\x03\x97\x07\x75\xB0\x63"
+ } err
+ assert_match "*Bad data format*" $err
+ verify_log_message 0 "*integrity check failed*" 0
+ }
+}
+
+test {corrupt payload: quicklist ziplist wrong count} {
+ start_server [list overrides [list loglevel verbose use-exit-on-panic yes crash-memcheck-enabled no] ] {
+ r config set sanitize-dump-payload no
+ catch {r restore key 0 "\x0E\x01\x13\x13\x00\x00\x00\x0E\x00\x00\x00\x03\x00\x00\x02\x61\x00\x04\x02\x62\x00\xFF\x09\x00\x4D\xE2\x0A\x2F\x08\x25\xDF\x91"} err
+ assert_match "*Bad data format*" $err
+ verify_log_message 0 "*integrity check failed*" 0
+ }
+}
+
+test {corrupt payload: #3080 - quicklist} {
+ start_server [list overrides [list loglevel verbose use-exit-on-panic yes crash-memcheck-enabled no] ] {
+ r config set sanitize-dump-payload no
+ catch {
+ r RESTORE key 0 "\x0E\x01\x80\x00\x00\x00\x10\x41\x41\x41\x41\x41\x41\x41\x41\x02\x00\x00\x80\x41\x41\x41\x41\x07\x00\x03\xC7\x1D\xEF\x54\x68\xCC\xF3"
+ r DUMP key ;# DUMP was used in the original issue, but now even with shallow sanitization restore safely fails, so this is dead code
+ } err
+ assert_match "*Bad data format*" $err
+ verify_log_message 0 "*integrity check failed*" 0
+ }
+}
+
+test {corrupt payload: quicklist with empty ziplist} {
+ start_server [list overrides [list loglevel verbose use-exit-on-panic yes crash-memcheck-enabled no] ] {
+ r config set sanitize-dump-payload no
+ r debug set-skip-checksum-validation 1
+ catch {r restore key 0 "\x0E\x01\x0B\x0B\x00\x00\x00\x0A\x00\x00\x00\x00\x00\xFF\x09\x00\xC2\x69\x37\x83\x3C\x7F\xFE\x6F" replace} err
+ assert_match "*Bad data format*" $err
+ r ping
+ }
+}
+
+test {corrupt payload: quicklist encoded_len is 0} {
+ start_server [list overrides [list loglevel verbose use-exit-on-panic yes crash-memcheck-enabled no] ] {
+ catch { r restore _list 0 "\x12\x01\x01\x00\x0a\x00\x8f\xc6\xc0\x57\x1c\x0a\xb3\x3c" replace } err
+ assert_match "*Bad data format*" $err
+ r ping
+ }
+}
+
+test {corrupt payload: quicklist listpack entry start with EOF} {
+ start_server [list overrides [list loglevel verbose use-exit-on-panic yes crash-memcheck-enabled no] ] {
+ r config set sanitize-dump-payload yes
+ catch { r restore _list 0 "\x12\x01\x02\x0b\x0b\x00\x00\x00\x01\x00\x81\x61\x02\xff\xff\x0a\x00\x7e\xd8\xde\x5b\x0d\xd7\x70\xb8" replace } err
+ assert_match "*Bad data format*" $err
+ r ping
+ }
+}
+
+test {corrupt payload: #3080 - ziplist} {
+ start_server [list overrides [list loglevel verbose use-exit-on-panic yes crash-memcheck-enabled no] ] {
+ # shallow sanitization is enough for restore to safely reject the payload with wrong size
+ r config set sanitize-dump-payload no
+ catch {
+ r RESTORE key 0 "\x0A\x80\x00\x00\x00\x10\x41\x41\x41\x41\x41\x41\x41\x41\x02\x00\x00\x80\x41\x41\x41\x41\x07\x00\x39\x5B\x49\xE0\xC1\xC6\xDD\x76"
+ } err
+ assert_match "*Bad data format*" $err
+ verify_log_message 0 "*integrity check failed*" 0
+ }
+}
+
+test {corrupt payload: load corrupted rdb with no CRC - #3505} {
+ set server_path [tmpdir "server.rdb-corruption-test"]
+ exec cp tests/assets/corrupt_ziplist.rdb $server_path
+ set srv [start_server [list overrides [list "dir" $server_path "dbfilename" "corrupt_ziplist.rdb" loglevel verbose use-exit-on-panic yes crash-memcheck-enabled no sanitize-dump-payload no]]]
+
+ # wait for termination
+ wait_for_condition 100 50 {
+ ! [is_alive $srv]
+ } else {
+ fail "rdb loading didn't fail"
+ }
+
+ set stdout [dict get $srv stdout]
+ assert_equal [count_message_lines $stdout "Terminating server after rdb file reading failure."] 1
+ assert_lessthan 1 [count_message_lines $stdout "integrity check failed"]
+ kill_server $srv ;# let valgrind look for issues
+}
+
+foreach sanitize_dump {no yes} {
+ test {corrupt payload: load corrupted rdb with empty keys} {
+ set server_path [tmpdir "server.rdb-corruption-empty-keys-test"]
+ exec cp tests/assets/corrupt_empty_keys.rdb $server_path
+ start_server [list overrides [list "dir" $server_path "dbfilename" "corrupt_empty_keys.rdb" "sanitize-dump-payload" $sanitize_dump]] {
+ r select 0
+ assert_equal [r dbsize] 0
+
+ verify_log_message 0 "*skipping empty key: set*" 0
+ verify_log_message 0 "*skipping empty key: list_quicklist*" 0
+ verify_log_message 0 "*skipping empty key: list_quicklist_empty_ziplist*" 0
+ verify_log_message 0 "*skipping empty key: list_ziplist*" 0
+ verify_log_message 0 "*skipping empty key: hash*" 0
+ verify_log_message 0 "*skipping empty key: hash_ziplist*" 0
+ verify_log_message 0 "*skipping empty key: zset*" 0
+ verify_log_message 0 "*skipping empty key: zset_ziplist*" 0
+ verify_log_message 0 "*skipping empty key: zset_listpack*" 0
+ verify_log_message 0 "*empty keys skipped: 9*" 0
+ }
+ }
+}
+
+test {corrupt payload: listpack invalid size header} {
+ start_server [list overrides [list loglevel verbose use-exit-on-panic yes crash-memcheck-enabled no] ] {
+ r config set sanitize-dump-payload no
+ catch {
+ r restore key 0 "\x0F\x01\x10\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x02\x40\x55\x5F\x00\x00\x00\x0F\x00\x01\x01\x00\x01\x02\x01\x88\x31\x00\x00\x00\x00\x00\x00\x00\x09\x88\x32\x00\x00\x00\x00\x00\x00\x00\x09\x00\x01\x00\x01\x00\x01\x00\x01\x02\x02\x88\x31\x00\x00\x00\x00\x00\x00\x00\x09\x88\x61\x00\x00\x00\x00\x00\x00\x00\x09\x88\x32\x00\x00\x00\x00\x00\x00\x00\x09\x88\x62\x00\x00\x00\x00\x00\x00\x00\x09\x08\x01\xFF\x0A\x01\x00\x00\x09\x00\x45\x91\x0A\x87\x2F\xA5\xF9\x2E"
+ } err
+ assert_match "*Bad data format*" $err
+ verify_log_message 0 "*Stream listpack integrity check failed*" 0
+ }
+}
+
+test {corrupt payload: listpack too long entry len} {
+ start_server [list overrides [list loglevel verbose use-exit-on-panic yes crash-memcheck-enabled no] ] {
+ r config set sanitize-dump-payload no
+ catch {
+ r restore key 0 "\x0F\x01\x10\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x02\x40\x55\x55\x00\x00\x00\x0F\x00\x01\x01\x00\x01\x02\x01\x88\x31\x00\x00\x00\x00\x00\x00\x00\x09\x88\x32\x00\x00\x00\x00\x00\x00\x00\x09\x00\x01\x00\x01\x00\x01\x00\x01\x02\x02\x89\x31\x00\x00\x00\x00\x00\x00\x00\x09\x88\x61\x00\x00\x00\x00\x00\x00\x00\x09\x88\x32\x00\x00\x00\x00\x00\x00\x00\x09\x88\x62\x00\x00\x00\x00\x00\x00\x00\x09\x08\x01\xFF\x0A\x01\x00\x00\x09\x00\x40\x63\xC9\x37\x03\xA2\xE5\x68"
+ } err
+ assert_equal [count_log_message 0 "crashed by signal"] 0
+ assert_equal [count_log_message 0 "ASSERTION FAILED"] 1
+ }
+}
+
+test {corrupt payload: listpack very long entry len} {
+ start_server [list overrides [list loglevel verbose use-exit-on-panic yes crash-memcheck-enabled no] ] {
+ r config set sanitize-dump-payload no
+ catch {
+ # This will catch migrated payloads from v6.2.x
+ r restore key 0 "\x0F\x01\x10\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x02\x40\x55\x55\x00\x00\x00\x0F\x00\x01\x01\x00\x01\x02\x01\x88\x31\x00\x00\x00\x00\x00\x00\x00\x09\x88\x32\x00\x00\x00\x00\x00\x00\x00\x09\x00\x01\x00\x01\x00\x01\x00\x01\x02\x02\x88\x31\x00\x00\x00\x00\x00\x00\x00\x09\x88\x61\x00\x00\x00\x00\x00\x00\x00\x09\x88\x32\x00\x00\x00\x00\x00\x00\x00\x09\x9C\x62\x00\x00\x00\x00\x00\x00\x00\x09\x08\x01\xFF\x0A\x01\x00\x00\x09\x00\x63\x6F\x42\x8E\x7C\xB5\xA2\x9D"
+ } err
+ assert_equal [count_log_message 0 "crashed by signal"] 0
+ assert_equal [count_log_message 0 "ASSERTION FAILED"] 1
+ }
+}
+
+test {corrupt payload: listpack too long entry prev len} {
+ start_server [list overrides [list loglevel verbose use-exit-on-panic yes crash-memcheck-enabled no] ] {
+ r config set sanitize-dump-payload yes
+ catch {
+ r restore key 0 "\x0F\x01\x10\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x02\x40\x55\x55\x00\x00\x00\x0F\x00\x01\x01\x00\x15\x02\x01\x88\x31\x00\x00\x00\x00\x00\x00\x00\x09\x88\x32\x00\x00\x00\x00\x00\x00\x00\x09\x00\x01\x00\x01\x00\x01\x00\x01\x02\x02\x88\x31\x00\x00\x00\x00\x00\x00\x00\x09\x88\x61\x00\x00\x00\x00\x00\x00\x00\x09\x88\x32\x00\x00\x00\x00\x00\x00\x00\x09\x88\x62\x00\x00\x00\x00\x00\x00\x00\x09\x08\x01\xFF\x0A\x01\x00\x00\x09\x00\x06\xFB\x44\x24\x0A\x8E\x75\xEA"
+ } err
+ assert_match "*Bad data format*" $err
+ verify_log_message 0 "*Stream listpack integrity check failed*" 0
+ }
+}
+
+test {corrupt payload: stream with duplicate consumers} {
+ start_server [list overrides [list loglevel verbose use-exit-on-panic yes crash-memcheck-enabled no] ] {
+ catch {
+ r restore key 0 "\x0F\x00\x00\x00\x00\x01\x07\x6D\x79\x67\x72\x6F\x75\x70\x00\x00\x00\x02\x04\x6E\x61\x6D\x65\x2A\x4C\xAA\x9A\x7D\x01\x00\x00\x00\x04\x6E\x61\x6D\x65\x2B\x4C\xAA\x9A\x7D\x01\x00\x00\x00\x0A\x00\xCC\xED\x8C\xA7\x62\xEE\xC7\xC8"
+ } err
+ assert_match "*Bad data format*" $err
+ verify_log_message 0 "*Duplicate stream consumer detected*" 0
+ r ping
+ }
+}
+
+test {corrupt payload: hash ziplist with duplicate records} {
+ # when we do perform full sanitization, we expect duplicate records to fail the restore
+ start_server [list overrides [list loglevel verbose use-exit-on-panic yes crash-memcheck-enabled no] ] {
+ r config set sanitize-dump-payload yes
+ r debug set-skip-checksum-validation 1
+ catch { r RESTORE _hash 0 "\x0D\x3D\x3D\x00\x00\x00\x3A\x00\x00\x00\x14\x13\x00\xF5\x02\xF5\x02\xF2\x02\x53\x5F\x31\x04\xF3\x02\xF3\x02\xF7\x02\xF7\x02\xF8\x02\x02\x5F\x37\x04\xF1\x02\xF1\x02\xF6\x02\x02\x5F\x35\x04\xF4\x02\x02\x5F\x33\x04\xFA\x02\x02\x5F\x39\x04\xF9\x02\xF9\xFF\x09\x00\xB5\x48\xDE\x62\x31\xD0\xE5\x63" } err
+ assert_match "*Bad data format*" $err
+ }
+}
+
+test {corrupt payload: hash listpack with duplicate records} {
+ # when we do perform full sanitization, we expect duplicate records to fail the restore
+ start_server [list overrides [list loglevel verbose use-exit-on-panic yes crash-memcheck-enabled no] ] {
+ r config set sanitize-dump-payload yes
+ r debug set-skip-checksum-validation 1
+ catch { r RESTORE _hash 0 "\x10\x17\x17\x00\x00\x00\x04\x00\x82\x61\x00\x03\x82\x62\x00\x03\x82\x61\x00\x03\x82\x64\x00\x03\xff\x0a\x00\xc0\xcf\xa6\x87\xe5\xa7\xc5\xbe" } err
+ assert_match "*Bad data format*" $err
+ }
+}
+
+test {corrupt payload: hash listpack with duplicate records - convert} {
+ # when we do NOT perform full sanitization, but we convert to hash, we expect duplicate records panic
+ start_server [list overrides [list loglevel verbose use-exit-on-panic yes crash-memcheck-enabled no] ] {
+ r config set sanitize-dump-payload no
+ r config set hash-max-listpack-entries 1
+ r debug set-skip-checksum-validation 1
+ catch { r RESTORE _hash 0 "\x10\x17\x17\x00\x00\x00\x04\x00\x82\x61\x00\x03\x82\x62\x00\x03\x82\x61\x00\x03\x82\x64\x00\x03\xff\x0a\x00\xc0\xcf\xa6\x87\xe5\xa7\xc5\xbe" } err
+ assert_equal [count_log_message 0 "crashed by signal"] 0
+ assert_equal [count_log_message 0 "listpack with dup elements"] 1
+ }
+}
+
+test {corrupt payload: hash ziplist uneven record count} {
+ # when we do NOT perform full sanitization, but shallow sanitization can detect uneven count
+ start_server [list overrides [list loglevel verbose use-exit-on-panic yes crash-memcheck-enabled no] ] {
+ r config set sanitize-dump-payload no
+ r debug set-skip-checksum-validation 1
+ catch { r RESTORE _hash 0 "\r\x1b\x1b\x00\x00\x00\x16\x00\x00\x00\x04\x00\x00\x02a\x00\x04\x02b\x00\x04\x02a\x00\x04\x02d\x00\xff\t\x00\xa1\x98\x36x\xcc\x8e\x93\x2e" } err
+ assert_match "*Bad data format*" $err
+ }
+}
+
+test {corrupt payload: hash duplicate records} {
+ # when we do perform full sanitization, we expect duplicate records to fail the restore
+ start_server [list overrides [list loglevel verbose use-exit-on-panic yes crash-memcheck-enabled no] ] {
+ r config set sanitize-dump-payload yes
+ r debug set-skip-checksum-validation 1
+ catch { r RESTORE _hash 0 "\x04\x02\x01a\x01b\x01a\x01d\t\x00\xc6\x9c\xab\xbc\bk\x0c\x06" } err
+ assert_match "*Bad data format*" $err
+ }
+}
+
+test {corrupt payload: hash empty zipmap} {
+ start_server [list overrides [list loglevel verbose use-exit-on-panic yes crash-memcheck-enabled no] ] {
+ r config set sanitize-dump-payload no
+ r debug set-skip-checksum-validation 1
+ catch { r RESTORE _hash 0 "\x09\x02\x00\xFF\x09\x00\xC0\xF1\xB8\x67\x4C\x16\xAC\xE3" } err
+ assert_match "*Bad data format*" $err
+ verify_log_message 0 "*Zipmap integrity check failed*" 0
+ }
+}
+
+test {corrupt payload: fuzzer findings - NPD in streamIteratorGetID} {
+ start_server [list overrides [list loglevel verbose use-exit-on-panic yes crash-memcheck-enabled no] ] {
+ r config set sanitize-dump-payload no
+ r debug set-skip-checksum-validation 1
+ catch {
+ r RESTORE key 0 "\x0F\x01\x10\x00\x00\x01\x73\xBD\x68\x48\x71\x00\x00\x00\x00\x00\x00\x00\x00\x40\x42\x42\x00\x00\x00\x18\x00\x03\x01\x00\x01\x02\x01\x84\x69\x74\x65\x6D\x05\x85\x76\x61\x6C\x75\x65\x06\x00\x01\x02\x01\x00\x01\x00\x01\x01\x01\x00\x01\x05\x01\x02\x01\x00\x01\x01\x01\x01\x01\x82\x5F\x31\x03\x05\x01\x02\x01\x00\x01\x02\x01\x01\x01\x02\x01\x48\x01\xFF\x03\x81\x00\x00\x01\x73\xBD\x68\x48\x71\x02\x01\x07\x6D\x79\x67\x72\x6F\x75\x70\x81\x00\x00\x01\x73\xBD\x68\x48\x71\x00\x01\x00\x00\x01\x73\xBD\x68\x48\x71\x00\x00\x00\x00\x00\x00\x00\x00\x72\x48\x68\xBD\x73\x01\x00\x00\x01\x01\x05\x41\x6C\x69\x63\x65\x72\x48\x68\xBD\x73\x01\x00\x00\x01\x00\x00\x01\x73\xBD\x68\x48\x71\x00\x00\x00\x00\x00\x00\x00\x00\x09\x00\x80\xCD\xB0\xD5\x1A\xCE\xFF\x10"
+ r XREVRANGE key 725 233
+ }
+ assert_equal [count_log_message 0 "crashed by signal"] 0
+ assert_equal [count_log_message 0 "ASSERTION FAILED"] 1
+ }
+}
+
+test {corrupt payload: fuzzer findings - listpack NPD on invalid stream} {
+ start_server [list overrides [list loglevel verbose use-exit-on-panic yes crash-memcheck-enabled no] ] {
+ r config set sanitize-dump-payload no
+ r debug set-skip-checksum-validation 1
+ catch {
+ r RESTORE _stream 0 "\x0F\x01\x10\x00\x00\x01\x73\xDC\xB6\x6B\xF1\x00\x00\x00\x00\x00\x00\x00\x00\x40\x42\x42\x00\x00\x00\x18\x00\x03\x01\x00\x01\x02\x01\x84\x69\x74\x65\x6D\x05\x85\x76\x61\x6C\x75\x65\x06\x00\x01\x02\x01\x00\x01\x00\x01\x01\x01\x00\x01\x05\x01\x02\x01\x1F\x01\x00\x01\x01\x01\x6D\x5F\x31\x03\x05\x01\x02\x01\x29\x01\x00\x01\x01\x01\x02\x01\x05\x01\xFF\x03\x81\x00\x00\x01\x73\xDC\xB6\x6C\x1A\x00\x01\x07\x6D\x79\x67\x72\x6F\x75\x70\x81\x00\x00\x01\x73\xDC\xB6\x6B\xF1\x00\x01\x00\x00\x01\x73\xDC\xB6\x6B\xF1\x00\x00\x00\x00\x00\x00\x00\x00\x4B\x6C\xB6\xDC\x73\x01\x00\x00\x01\x01\x05\x41\x6C\x69\x63\x65\x3D\x6C\xB6\xDC\x73\x01\x00\x00\x01\x00\x00\x01\x73\xDC\xB6\x6B\xF1\x00\x00\x00\x00\x00\x00\x00\x00\x09\x00\xC7\x7D\x1C\xD7\x04\xFF\xE6\x9D"
+ r XREAD STREAMS _stream 519389898758
+ }
+ assert_equal [count_log_message 0 "crashed by signal"] 0
+ assert_equal [count_log_message 0 "ASSERTION FAILED"] 1
+ }
+}
+
+test {corrupt payload: fuzzer findings - NPD in quicklistIndex} {
+ start_server [list overrides [list loglevel verbose use-exit-on-panic yes crash-memcheck-enabled no] ] {
+ r config set sanitize-dump-payload no
+ r debug set-skip-checksum-validation 1
+ catch {
+ r RESTORE key 0 "\x0E\x01\x13\x13\x00\x00\x00\x10\x00\x00\x00\x03\x12\x00\xF3\x02\x02\x5F\x31\x04\xF1\xFF\x09\x00\xC9\x4B\x31\xFE\x61\xC0\x96\xFE"
+ } err
+ assert_match "*Bad data format*" $err
+ verify_log_message 0 "*integrity check failed*" 0
+ }
+}
+
+test {corrupt payload: fuzzer findings - encoded entry header reach outside the allocation} {
+ start_server [list overrides [list loglevel verbose use-exit-on-panic yes crash-memcheck-enabled no] ] {
+ r debug set-skip-checksum-validation 1
+ catch {
+ r RESTORE key 0 "\x0D\x19\x19\x00\x00\x00\x16\x00\x00\x00\x06\x00\x00\xF1\x02\xF1\x02\xF2\x02\x02\x5F\x31\x04\x99\x02\xF3\xFF\x09\x00\xC5\xB8\x10\xC0\x8A\xF9\x16\xDF"
+ } err
+ assert_match "*Bad data format*" $err
+ verify_log_message 0 "*integrity check failed*" 0
+ }
+}
+
+
+test {corrupt payload: fuzzer findings - invalid ziplist encoding} {
+ start_server [list overrides [list loglevel verbose use-exit-on-panic yes crash-memcheck-enabled no] ] {
+ r config set sanitize-dump-payload yes
+ r debug set-skip-checksum-validation 1
+ catch {
+ r RESTORE _listbig 0 "\x0E\x02\x1B\x1B\x00\x00\x00\x16\x00\x00\x00\x05\x00\x00\x02\x5F\x39\x04\xF9\x02\x86\x5F\x37\x04\xF7\x02\x02\x5F\x35\xFF\x19\x19\x00\x00\x00\x16\x00\x00\x00\x05\x00\x00\xF5\x02\x02\x5F\x33\x04\xF3\x02\x02\x5F\x31\x04\xF1\xFF\x09\x00\x0C\xFC\x99\x2C\x23\x45\x15\x60"
+ } err
+ assert_match "*Bad data format*" $err
+ verify_log_message 0 "*integrity check failed*" 0
+ }
+}
+
+test {corrupt payload: fuzzer findings - hash crash} {
+ start_server [list overrides [list loglevel verbose use-exit-on-panic yes crash-memcheck-enabled no] ] {
+ r config set sanitize-dump-payload yes
+ r debug set-skip-checksum-validation 1
+ r RESTORE _hash 0 "\x0D\x19\x19\x00\x00\x00\x16\x00\x00\x00\x06\x00\x00\xF1\x02\xF1\x02\xF2\x02\x02\x5F\x31\x04\xF3\x02\xF3\xFF\x09\x00\x38\xB8\x10\xC0\x8A\xF9\x16\xDF"
+ r HSET _hash 394891450 1635910264
+ r HMGET _hash 887312884855
+ }
+}
+
+test {corrupt payload: fuzzer findings - uneven entry count in hash} {
+ start_server [list overrides [list loglevel verbose use-exit-on-panic yes crash-memcheck-enabled no] ] {
+ r debug set-skip-checksum-validation 1
+ catch {
+ r RESTORE _hashbig 0 "\x0D\x3D\x3D\x00\x00\x00\x38\x00\x00\x00\x14\x00\x00\xF2\x02\x02\x5F\x31\x04\x1C\x02\xF7\x02\xF1\x02\xF1\x02\xF5\x02\xF5\x02\xF4\x02\x02\x5F\x33\x04\xF6\x02\x02\x5F\x35\x04\xF8\x02\x02\x5F\x37\x04\xF9\x02\xF9\x02\xF3\x02\xF3\x02\xFA\x02\x02\x5F\x39\xFF\x09\x00\x73\xB7\x68\xC8\x97\x24\x8E\x88"
+ } err
+ assert_match "*Bad data format*" $err
+ verify_log_message 0 "*integrity check failed*" 0
+ }
+}
+
+test {corrupt payload: fuzzer findings - invalid read in lzf_decompress} {
+ start_server [list overrides [list loglevel verbose use-exit-on-panic yes crash-memcheck-enabled no] ] {
+ r config set sanitize-dump-payload no
+ r debug set-skip-checksum-validation 1
+ catch { r RESTORE _setbig 0 "\x02\x03\x02\x5F\x31\xC0\x02\xC3\x00\x09\x00\xE6\xDC\x76\x44\xFF\xEB\x3D\xFE" } err
+ assert_match "*Bad data format*" $err
+ }
+}
+
+test {corrupt payload: fuzzer findings - leak in rdbloading due to dup entry in set} {
+ start_server [list overrides [list loglevel verbose use-exit-on-panic yes crash-memcheck-enabled no] ] {
+ r config set sanitize-dump-payload no
+ r debug set-skip-checksum-validation 1
+ catch { r RESTORE _setbig 0 "\x02\x0A\x02\x5F\x39\xC0\x06\x02\x5F\x31\xC0\x00\xC0\x04\x02\x5F\x35\xC0\x02\xC0\x08\x02\x5F\x31\x02\x5F\x33\x09\x00\x7A\x5A\xFB\x90\x3A\xE9\x3C\xBE" } err
+ assert_match "*Bad data format*" $err
+ }
+}
+
+test {corrupt payload: fuzzer findings - empty intset} {
+ start_server [list overrides [list loglevel verbose use-exit-on-panic yes crash-memcheck-enabled no] ] {
+ r config set sanitize-dump-payload no
+ r debug set-skip-checksum-validation 1
+ catch {r RESTORE _setbig 0 "\x02\xC0\xC0\x06\x02\x5F\x39\xC0\x02\x02\x5F\x33\xC0\x00\x02\x5F\x31\xC0\x04\xC0\x08\x02\x5F\x37\x02\x5F\x35\x09\x00\xC5\xD4\x6D\xBA\xAD\x14\xB7\xE7"} err
+ assert_match "*Bad data format*" $err
+ r ping
+ }
+}
+
+test {corrupt payload: fuzzer findings - zset ziplist entry lensize is 0} {
+ start_server [list overrides [list loglevel verbose use-exit-on-panic yes crash-memcheck-enabled no] ] {
+ r config set sanitize-dump-payload no
+ r debug set-skip-checksum-validation 1
+ catch {r RESTORE _zsetbig 0 "\x0C\x3D\x3D\x00\x00\x00\x3A\x00\x00\x00\x14\x00\x00\xF1\x02\xF1\x02\x02\x5F\x31\x04\xF2\x02\xF3\x02\xF3\x02\x02\x5F\x33\x04\xF4\x02\xEE\x02\xF5\x02\x02\x5F\x35\x04\xF6\x02\xF7\x02\xF7\x02\x02\x5F\x37\x04\xF8\x02\xF9\x02\xF9\x02\x02\x5F\x39\x04\xFA\xFF\x09\x00\xAE\xF9\x77\x2A\x47\x24\x33\xF6"} err
+ assert_match "*Bad data format*" $err
+ verify_log_message 0 "*Zset ziplist integrity check failed*" 0
+ }
+}
+
+test {corrupt payload: fuzzer findings - valgrind ziplist prevlen reaches outside the ziplist} {
+ start_server [list overrides [list loglevel verbose use-exit-on-panic yes crash-memcheck-enabled no] ] {
+ r config set sanitize-dump-payload no
+ r debug set-skip-checksum-validation 1
+ catch {r RESTORE _listbig 0 "\x0E\x02\x1B\x1B\x00\x00\x00\x16\x00\x00\x00\x05\x00\x00\x02\x5F\x39\x04\xF9\x02\x02\x5F\x37\x04\xF7\x02\x02\x5F\x35\xFF\x19\x19\x00\x00\x00\x16\x00\x00\x00\x05\x00\x00\xF5\x02\x02\x5F\x33\x04\xF3\x95\x02\x5F\x31\x04\xF1\xFF\x09\x00\x0C\xFC\x99\x2C\x23\x45\x15\x60"} err
+ assert_match "*Bad data format*" $err
+ verify_log_message 0 "*integrity check failed*" 0
+ }
+}
+
+test {corrupt payload: fuzzer findings - valgrind - bad rdbLoadDoubleValue} {
+ start_server [list overrides [list loglevel verbose use-exit-on-panic yes crash-memcheck-enabled no] ] {
+ r config set sanitize-dump-payload no
+ r debug set-skip-checksum-validation 1
+ catch { r RESTORE _list 0 "\x03\x01\x11\x11\x00\x00\x00\x0A\x00\x00\x00\x01\x00\x00\xD0\x07\x1A\xE9\x02\xFF\x09\x00\x1A\x06\x07\x32\x41\x28\x3A\x46" } err
+ assert_match "*Bad data format*" $err
+ }
+}
+
+test {corrupt payload: fuzzer findings - valgrind ziplist prev too big} {
+ start_server [list overrides [list loglevel verbose use-exit-on-panic yes crash-memcheck-enabled no] ] {
+ r config set sanitize-dump-payload no
+ r debug set-skip-checksum-validation 1
+ catch {r RESTORE _list 0 "\x0E\x01\x13\x13\x00\x00\x00\x10\x00\x00\x00\x03\x00\x00\xF3\x02\x02\x5F\x31\xC1\xF1\xFF\x09\x00\xC9\x4B\x31\xFE\x61\xC0\x96\xFE"} err
+ assert_match "*Bad data format*" $err
+ verify_log_message 0 "*integrity check failed*" 0
+ }
+}
+
+test {corrupt payload: fuzzer findings - lzf decompression fails, avoid valgrind invalid read} {
+ start_server [list overrides [list loglevel verbose use-exit-on-panic yes crash-memcheck-enabled no] ] {
+ r config set sanitize-dump-payload no
+ r debug set-skip-checksum-validation 1
+ catch {r RESTORE _stream 0 "\x0F\x02\x10\x00\x00\x01\x73\xDD\xAA\x2A\xB9\x00\x00\x00\x00\x00\x00\x00\x00\xC3\x40\x4B\x40\x5C\x18\x5C\x00\x00\x00\x24\x00\x05\x01\x00\x01\x02\x01\x84\x69\x74\x65\x6D\x05\x85\x76\x61\x6C\x75\x65\x06\x40\x10\x00\x00\x20\x01\x00\x01\x20\x03\x00\x05\x20\x1C\x40\x07\x05\x01\x01\x82\x5F\x31\x03\x80\x0D\x40\x00\x00\x02\x60\x19\x40\x27\x40\x19\x00\x33\x60\x19\x40\x29\x02\x01\x01\x04\x20\x19\x00\xFF\x10\x00\x00\x01\x73\xDD\xAA\x2A\xBC\x00\x00\x00\x00\x00\x00\x00\x00\xC3\x40\x4D\x40\x5E\x18\x5E\x00\x00\x00\x24\x00\x05\x01\x00\x01\x02\x01\x84\x69\x74\x65\x6D\x05\x85\x76\x61\x6C\x75\x65\x06\x40\x10\x00\x00\x20\x01\x06\x01\x01\x82\x5F\x35\x03\x05\x20\x1E\x17\x0B\x03\x01\x01\x06\x01\x40\x0B\x00\x01\x60\x0D\x02\x82\x5F\x37\x60\x19\x80\x00\x00\x08\x60\x19\x80\x27\x02\x82\x5F\x39\x20\x19\x00\xFF\x0A\x81\x00\x00\x01\x73\xDD\xAA\x2A\xBE\x00\x00\x09\x00\x21\x85\x77\x43\x71\x7B\x17\x88"} err
+ assert_match "*Bad data format*" $err
+ }
+}
+
+test {corrupt payload: fuzzer findings - stream bad lp_count} {
+ start_server [list overrides [list loglevel verbose use-exit-on-panic yes crash-memcheck-enabled no] ] {
+ r config set sanitize-dump-payload yes
+ r debug set-skip-checksum-validation 1
+ catch { r RESTORE _stream 0 "\x0F\x01\x10\x00\x00\x01\x73\xDE\xDF\x7D\x9B\x00\x00\x00\x00\x00\x00\x00\x00\x40\x42\x42\x00\x00\x00\x18\x00\x03\x01\x00\x01\x02\x01\x84\x69\x74\x65\x6D\x05\x85\x76\x61\x6C\x75\x65\x06\x00\x01\x02\x01\x00\x01\x00\x01\x01\x01\x00\x01\x56\x01\x02\x01\x22\x01\x00\x01\x01\x01\x82\x5F\x31\x03\x05\x01\x02\x01\x2C\x01\x00\x01\x01\x01\x02\x01\x05\x01\xFF\x03\x81\x00\x00\x01\x73\xDE\xDF\x7D\xC7\x00\x01\x07\x6D\x79\x67\x72\x6F\x75\x70\x81\x00\x00\x01\x73\xDE\xDF\x7D\x9B\x00\x01\x00\x00\x01\x73\xDE\xDF\x7D\x9B\x00\x00\x00\x00\x00\x00\x00\x00\xF9\x7D\xDF\xDE\x73\x01\x00\x00\x01\x01\x05\x41\x6C\x69\x63\x65\xEB\x7D\xDF\xDE\x73\x01\x00\x00\x01\x00\x00\x01\x73\xDE\xDF\x7D\x9B\x00\x00\x00\x00\x00\x00\x00\x00\x09\x00\xB2\xA8\xA7\x5F\x1B\x61\x72\xD5"} err
+ assert_match "*Bad data format*" $err
+ r ping
+ }
+}
+
+test {corrupt payload: fuzzer findings - stream bad lp_count - unsanitized} {
+ start_server [list overrides [list loglevel verbose use-exit-on-panic yes crash-memcheck-enabled no] ] {
+ r config set sanitize-dump-payload no
+ r debug set-skip-checksum-validation 1
+ r RESTORE _stream 0 "\x0F\x01\x10\x00\x00\x01\x73\xDE\xDF\x7D\x9B\x00\x00\x00\x00\x00\x00\x00\x00\x40\x42\x42\x00\x00\x00\x18\x00\x03\x01\x00\x01\x02\x01\x84\x69\x74\x65\x6D\x05\x85\x76\x61\x6C\x75\x65\x06\x00\x01\x02\x01\x00\x01\x00\x01\x01\x01\x00\x01\x56\x01\x02\x01\x22\x01\x00\x01\x01\x01\x82\x5F\x31\x03\x05\x01\x02\x01\x2C\x01\x00\x01\x01\x01\x02\x01\x05\x01\xFF\x03\x81\x00\x00\x01\x73\xDE\xDF\x7D\xC7\x00\x01\x07\x6D\x79\x67\x72\x6F\x75\x70\x81\x00\x00\x01\x73\xDE\xDF\x7D\x9B\x00\x01\x00\x00\x01\x73\xDE\xDF\x7D\x9B\x00\x00\x00\x00\x00\x00\x00\x00\xF9\x7D\xDF\xDE\x73\x01\x00\x00\x01\x01\x05\x41\x6C\x69\x63\x65\xEB\x7D\xDF\xDE\x73\x01\x00\x00\x01\x00\x00\x01\x73\xDE\xDF\x7D\x9B\x00\x00\x00\x00\x00\x00\x00\x00\x09\x00\xB2\xA8\xA7\x5F\x1B\x61\x72\xD5"
+ catch { r XREVRANGE _stream 638932639 738}
+ assert_equal [count_log_message 0 "crashed by signal"] 0
+ assert_equal [count_log_message 0 "ASSERTION FAILED"] 1
+ }
+}
+
+test {corrupt payload: fuzzer findings - stream integrity check issue} {
+ start_server [list overrides [list loglevel verbose use-exit-on-panic yes crash-memcheck-enabled no] ] {
+ r config set sanitize-dump-payload yes
+ r debug set-skip-checksum-validation 1
+ catch { r RESTORE _stream 0 "\x0F\x02\x10\x00\x00\x01\x75\x2D\xA2\x90\x67\x00\x00\x00\x00\x00\x00\x00\x00\xC3\x40\x4F\x40\x5C\x18\x5C\x00\x00\x00\x24\x00\x05\x01\x00\x01\x4A\x01\x84\x69\x74\x65\x6D\x05\x85\x76\x61\x6C\x75\x65\x06\x40\x10\x00\x00\x20\x01\x00\x01\x20\x03\x00\x05\x20\x1C\x40\x09\x05\x01\x01\x82\x5F\x31\x03\x80\x0D\x00\x02\x20\x0D\x00\x02\xA0\x19\x00\x03\x20\x0B\x02\x82\x5F\x33\xA0\x19\x00\x04\x20\x0D\x00\x04\x20\x19\x00\xFF\x10\x00\x00\x01\x75\x2D\xA2\x90\x67\x00\x00\x00\x00\x00\x00\x00\x05\xC3\x40\x56\x40\x60\x18\x60\x00\x00\x00\x24\x00\x05\x01\x00\x01\x02\x01\x84\x69\x74\x65\x6D\x05\x85\x76\x61\x6C\x75\x65\x06\x40\x10\x00\x00\x20\x01\x06\x01\x01\x82\x5F\x35\x03\x05\x20\x1E\x40\x0B\x03\x01\x01\x06\x01\x80\x0B\x00\x02\x20\x0B\x02\x82\x5F\x37\x60\x19\x03\x01\x01\xDF\xFB\x20\x05\x00\x08\x60\x1A\x20\x0C\x00\xFC\x20\x05\x02\x82\x5F\x39\x20\x1B\x00\xFF\x0A\x81\x00\x00\x01\x75\x2D\xA2\x90\x68\x01\x00\x09\x00\x1D\x6F\xC0\x69\x8A\xDE\xF7\x92" } err
+ assert_match "*Bad data format*" $err
+ }
+}
+
+test {corrupt payload: fuzzer findings - infinite loop} {
+ start_server [list overrides [list loglevel verbose use-exit-on-panic yes crash-memcheck-enabled no] ] {
+ r config set sanitize-dump-payload no
+ r debug set-skip-checksum-validation 1
+ r RESTORE _stream 0 "\x0F\x01\x10\x00\x00\x01\x75\x3A\xA6\xD0\x93\x00\x00\x00\x00\x00\x00\x00\x00\x40\x42\x42\x00\x00\x00\x18\x00\x03\x01\x00\x01\x02\x01\x84\x69\x74\x65\x6D\x05\x85\x76\x61\x6C\x75\x65\x06\x00\x01\x02\x01\x00\x01\x00\x01\x01\x01\x00\x01\x05\x01\x02\x01\x00\x01\x01\x01\x01\x01\x82\x5F\x31\x03\xFD\x01\x02\x01\x00\x01\x02\x01\x01\x01\x02\x01\x05\x01\xFF\x03\x81\x00\x00\x01\x75\x3A\xA6\xD0\x93\x02\x01\x07\x6D\x79\x67\x72\x6F\x75\x70\x81\x00\x00\x01\x75\x3A\xA6\xD0\x93\x00\x01\x00\x00\x01\x75\x3A\xA6\xD0\x93\x00\x00\x00\x00\x00\x00\x00\x00\x94\xD0\xA6\x3A\x75\x01\x00\x00\x01\x01\x05\x41\x6C\x69\x63\x65\x94\xD0\xA6\x3A\x75\x01\x00\x00\x01\x00\x00\x01\x75\x3A\xA6\xD0\x93\x00\x00\x00\x00\x00\x00\x00\x00\x09\x00\xC4\x09\xAD\x69\x7E\xEE\xA6\x2F"
+ catch { r XREVRANGE _stream 288270516 971031845 }
+ assert_equal [count_log_message 0 "crashed by signal"] 0
+ assert_equal [count_log_message 0 "ASSERTION FAILED"] 1
+ }
+}
+
+test {corrupt payload: fuzzer findings - hash ziplist too long entry len} {
+ start_server [list overrides [list loglevel verbose use-exit-on-panic yes crash-memcheck-enabled no] ] {
+ r debug set-skip-checksum-validation 1
+ catch {
+ r RESTORE _hash 0 "\x0D\x3D\x3D\x00\x00\x00\x3A\x00\x00\x00\x14\x13\x00\xF5\x02\xF5\x02\xF2\x02\x53\x5F\x31\x04\xF3\x02\xF3\x02\xF7\x02\xF7\x02\xF8\x02\x02\x5F\x37\x04\xF1\x02\xF1\x02\xF6\x02\x02\x5F\x35\x04\xF4\x02\x02\x5F\x33\x04\xFA\x02\x02\x5F\x39\x04\xF9\x02\xF9\xFF\x09\x00\xB5\x48\xDE\x62\x31\xD0\xE5\x63"
+ } err
+ assert_match "*Bad data format*" $err
+ verify_log_message 0 "*integrity check failed*" 0
+ }
+}
+
+if {$run_oom_tests} {
+
+test {corrupt payload: OOM in rdbGenericLoadStringObject} {
+ start_server [list overrides [list loglevel verbose use-exit-on-panic yes crash-memcheck-enabled no] ] {
+ r config set sanitize-dump-payload no
+ catch { r RESTORE x 0 "\x0A\x81\x7F\xFF\xFF\xFF\xFF\xFF\xFF\xFF\x13\x00\x00\x00\x0E\x00\x00\x00\x02\x00\x00\x02\x61\x00\x04\x02\x62\x00\xFF\x09\x00\x57\x04\xE5\xCD\xD4\x37\x6C\x57" } err
+ assert_match "*Bad data format*" $err
+ r ping
+ }
+}
+
+test {corrupt payload: fuzzer findings - OOM in dictExpand} {
+ start_server [list overrides [list loglevel verbose use-exit-on-panic yes crash-memcheck-enabled no] ] {
+ r config set sanitize-dump-payload no
+ r debug set-skip-checksum-validation 1
+ catch { r RESTORE x 0 "\x02\x81\x02\x5F\x31\xC0\x00\xC0\x02\x09\x00\xCD\x84\x2C\xB7\xE8\xA4\x49\x57" } err
+ assert_match "*Bad data format*" $err
+ r ping
+ }
+}
+
+}
+
+test {corrupt payload: fuzzer findings - zset ziplist invalid tail offset} {
+ start_server [list overrides [list loglevel verbose use-exit-on-panic yes crash-memcheck-enabled no] ] {
+ r config set sanitize-dump-payload no
+ r debug set-skip-checksum-validation 1
+ catch {r RESTORE _zset 0 "\x0C\x19\x19\x00\x00\x00\x02\x00\x00\x00\x06\x00\x00\xF1\x02\xF1\x02\x02\x5F\x31\x04\xF2\x02\xF3\x02\xF3\xFF\x09\x00\x4D\x72\x7B\x97\xCD\x9A\x70\xC1"} err
+ assert_match "*Bad data format*" $err
+ verify_log_message 0 "*Zset ziplist integrity check failed*" 0
+ }
+}
+
+test {corrupt payload: fuzzer findings - negative reply length} {
+ start_server [list overrides [list loglevel verbose use-exit-on-panic yes crash-memcheck-enabled no] ] {
+ r config set sanitize-dump-payload no
+ r debug set-skip-checksum-validation 1
+ r RESTORE _stream 0 "\x0F\x01\x10\x00\x00\x01\x75\xCF\xA1\x16\xA7\x00\x00\x00\x00\x00\x00\x00\x00\x40\x42\x42\x00\x00\x00\x18\x00\x03\x01\x00\x01\x02\x01\x84\x69\x74\x65\x6D\x05\x85\x76\x61\x6C\x75\x65\x06\x00\x01\x02\x01\x00\x01\x00\x01\x01\x01\x00\x01\x05\x01\x02\x01\x00\x01\x01\x01\x01\x01\x14\x5F\x31\x03\x05\x01\x02\x01\x00\x01\x02\x01\x01\x01\x02\x01\x05\x01\xFF\x03\x81\x00\x00\x01\x75\xCF\xA1\x16\xA7\x02\x01\x07\x6D\x79\x67\x72\x6F\x75\x70\x81\x00\x00\x01\x75\xCF\xA1\x16\xA7\x01\x01\x00\x00\x01\x75\xCF\xA1\x16\xA7\x00\x00\x00\x00\x00\x00\x00\x01\xA7\x16\xA1\xCF\x75\x01\x00\x00\x01\x01\x05\x41\x6C\x69\x63\x65\xA7\x16\xA1\xCF\x75\x01\x00\x00\x01\x00\x00\x01\x75\xCF\xA1\x16\xA7\x00\x00\x00\x00\x00\x00\x00\x01\x09\x00\x1B\x42\x52\xB8\xDD\x5C\xE5\x4E"
+ catch {r XADD _stream * -956 -2601503852}
+ catch {r XINFO STREAM _stream FULL}
+ assert_equal [count_log_message 0 "crashed by signal"] 0
+ assert_equal [count_log_message 0 "ASSERTION FAILED"] 1
+ }
+}
+
+test {corrupt payload: fuzzer findings - valgrind negative malloc} {
+ start_server [list overrides [list loglevel verbose use-exit-on-panic yes crash-memcheck-enabled no] ] {
+ r config set sanitize-dump-payload yes
+ r debug set-skip-checksum-validation 1
+ catch {r RESTORE _key 0 "\x0E\x01\x81\xD6\xD6\x00\x00\x00\x0A\x00\x00\x00\x01\x00\x00\x40\xC8\x6F\x2F\x36\xE2\xDF\xE3\x2E\x26\x64\x8B\x87\xD1\x7A\xBD\xFF\xEF\xEF\x63\x65\xF6\xF8\x8C\x4E\xEC\x96\x89\x56\x88\xF8\x3D\x96\x5A\x32\xBD\xD1\x36\xD8\x02\xE6\x66\x37\xCB\x34\x34\xC4\x52\xA7\x2A\xD5\x6F\x2F\x7E\xEE\xA2\x94\xD9\xEB\xA9\x09\x38\x3B\xE1\xA9\x60\xB6\x4E\x09\x44\x1F\x70\x24\xAA\x47\xA8\x6E\x30\xE1\x13\x49\x4E\xA1\x92\xC4\x6C\xF0\x35\x83\xD9\x4F\xD9\x9C\x0A\x0D\x7A\xE7\xB1\x61\xF5\xC1\x2D\xDC\xC3\x0E\x87\xA6\x80\x15\x18\xBA\x7F\x72\xDD\x14\x75\x46\x44\x0B\xCA\x9C\x8F\x1C\x3C\xD7\xDA\x06\x62\x18\x7E\x15\x17\x24\xAB\x45\x21\x27\xC2\xBC\xBB\x86\x6E\xD8\xBD\x8E\x50\xE0\xE0\x88\xA4\x9B\x9D\x15\x2A\x98\xFF\x5E\x78\x6C\x81\xFC\xA8\xC9\xC8\xE6\x61\xC8\xD1\x4A\x7F\x81\xD6\xA6\x1A\xAD\x4C\xC1\xA2\x1C\x90\x68\x15\x2A\x8A\x36\xC0\x58\xC3\xCC\xA6\x54\x19\x12\x0F\xEB\x46\xFF\x6E\xE3\xA7\x92\xF8\xFF\x09\x00\xD0\x71\xF7\x9F\xF7\x6A\xD6\x2E"} err
+ assert_match "*Bad data format*" $err
+ r ping
+ }
+}
+
+test {corrupt payload: fuzzer findings - valgrind invalid read} {
+ start_server [list overrides [list loglevel verbose use-exit-on-panic yes crash-memcheck-enabled no] ] {
+ r config set sanitize-dump-payload yes
+ r debug set-skip-checksum-validation 1
+ catch {r RESTORE _key 0 "\x05\x0A\x02\x5F\x39\x00\x00\x00\x00\x00\x00\x22\x40\xC0\x08\x00\x00\x00\x00\x00\x00\x20\x40\x02\x5F\x37\x00\x00\x00\x00\x00\x00\x1C\x40\xC0\x06\x00\x00\x00\x00\x00\x00\x18\x40\x02\x5F\x33\x00\x00\x00\x00\x00\x00\x14\x40\xC0\x04\x00\x00\x00\x00\x00\x00\x10\x40\x02\x5F\x33\x00\x00\x00\x00\x00\x00\x08\x40\xC0\x02\x00\x00\x00\x00\x00\x00\x00\x40\x02\x5F\x31\x00\x00\x00\x00\x00\x00\xF0\x3F\xC0\x00\x00\x00\x00\x00\x00\x00\x00\x00\x09\x00\x3C\x66\xD7\x14\xA9\xDA\x3C\x69"} err
+ assert_match "*Bad data format*" $err
+ r ping
+ }
+}
+
+test {corrupt payload: fuzzer findings - empty hash ziplist} {
+ start_server [list overrides [list loglevel verbose use-exit-on-panic yes crash-memcheck-enabled no] ] {
+ r config set sanitize-dump-payload yes
+ r debug set-skip-checksum-validation 1
+ catch {r RESTORE _int 0 "\x04\xC0\x01\x09\x00\xF6\x8A\xB6\x7A\x85\x87\x72\x4D"} err
+ assert_match "*Bad data format*" $err
+ r ping
+ }
+}
+
+test {corrupt payload: fuzzer findings - stream with no records} {
+ start_server [list overrides [list loglevel verbose use-exit-on-panic yes crash-memcheck-enabled no] ] {
+ r config set sanitize-dump-payload no
+ r debug set-skip-checksum-validation 1
+ r restore _stream 0 "\x0F\x01\x10\x00\x00\x01\x78\x4D\x55\x68\x09\x00\x00\x00\x00\x00\x00\x00\x00\x40\x42\x42\x00\x00\x00\x18\x00\x02\x01\x01\x01\x02\x01\x84\x69\x74\x65\x6D\x05\x85\x76\x61\x6C\x75\x65\x06\x00\x01\x02\x01\x00\x01\x00\x01\x01\x01\x00\x01\x05\x01\x03\x01\x3E\x01\x00\x01\x01\x01\x82\x5F\x31\x03\x05\x01\x02\x01\x50\x01\x00\x01\x01\x01\x02\x01\x05\x23\xFF\x02\x81\x00\x00\x01\x78\x4D\x55\x68\x59\x00\x01\x07\x6D\x79\x67\x72\x6F\x75\x70\x81\x00\x00\x01\x78\x4D\x55\x68\x47\x00\x01\x00\x00\x01\x78\x4D\x55\x68\x47\x00\x00\x00\x00\x00\x00\x00\x00\x9F\x68\x55\x4D\x78\x01\x00\x00\x01\x01\x05\x41\x6C\x69\x63\x65\x85\x68\x55\x4D\x78\x01\x00\x00\x01\x00\x00\x01\x78\x4D\x55\x68\x47\x00\x00\x00\x00\x00\x00\x00\x00\x09\x00\xF1\xC0\x72\x70\x39\x40\x1E\xA9" replace
+ catch {r XREAD STREAMS _stream $}
+ assert_equal [count_log_message 0 "crashed by signal"] 0
+ assert_equal [count_log_message 0 "Guru Meditation"] 1
+ }
+}
+
+test {corrupt payload: fuzzer findings - quicklist ziplist tail followed by extra data which start with 0xff} {
+ start_server [list overrides [list loglevel verbose use-exit-on-panic yes crash-memcheck-enabled no] ] {
+ r config set sanitize-dump-payload yes
+ r debug set-skip-checksum-validation 1
+ catch {
+ r restore key 0 "\x0E\x01\x11\x11\x00\x00\x00\x0A\x00\x00\x00\x01\x00\x00\xF6\xFF\xB0\x6C\x9C\xFF\x09\x00\x9C\x37\x47\x49\x4D\xDE\x94\xF5" replace
+ } err
+ assert_match "*Bad data format*" $err
+ verify_log_message 0 "*integrity check failed*" 0
+ }
+}
+
+test {corrupt payload: fuzzer findings - dict init to huge size} {
+ start_server [list overrides [list loglevel verbose use-exit-on-panic yes crash-memcheck-enabled no] ] {
+ r config set sanitize-dump-payload no
+ r debug set-skip-checksum-validation 1
+ catch {r restore key 0 "\x02\x81\xC0\x00\x02\x5F\x31\xC0\x02\x09\x00\xB2\x1B\xE5\x17\x2E\x15\xF4\x6C" replace} err
+ assert_match "*Bad data format*" $err
+ r ping
+ }
+}
+
+test {corrupt payload: fuzzer findings - huge string} {
+ start_server [list overrides [list loglevel verbose use-exit-on-panic yes crash-memcheck-enabled no] ] {
+ r config set sanitize-dump-payload yes
+ r debug set-skip-checksum-validation 1
+ catch {r restore key 0 "\x00\x81\x01\x09\x00\xF6\x2B\xB6\x7A\x85\x87\x72\x4D"} err
+ assert_match "*Bad data format*" $err
+ r ping
+ }
+}
+
+test {corrupt payload: fuzzer findings - stream PEL without consumer} {
+ start_server [list overrides [list loglevel verbose use-exit-on-panic yes crash-memcheck-enabled no] ] {
+ r config set sanitize-dump-payload yes
+ r debug set-skip-checksum-validation 1
+ catch {r restore _stream 0 "\x0F\x01\x10\x00\x00\x01\x7B\x08\xF0\xB2\x34\x00\x00\x00\x00\x00\x00\x00\x00\xC3\x3B\x40\x42\x19\x42\x00\x00\x00\x18\x00\x02\x01\x01\x01\x02\x01\x84\x69\x74\x65\x6D\x05\x85\x76\x61\x6C\x75\x65\x06\x00\x20\x10\x00\x00\x20\x01\x00\x01\x20\x03\x02\x05\x01\x03\x20\x05\x40\x00\x04\x82\x5F\x31\x03\x05\x60\x19\x80\x32\x02\x05\x01\xFF\x02\x81\x00\x00\x01\x7B\x08\xF0\xB2\x34\x02\x01\x07\x6D\x79\x67\x72\x6F\x75\x70\x81\x00\x00\x01\x7B\x08\xF0\xB2\x34\x01\x01\x00\x00\x01\x7B\x08\xF0\xB2\x34\x00\x00\x00\x00\x00\x00\x00\x01\x35\xB2\xF0\x08\x7B\x01\x00\x00\x01\x01\x13\x41\x6C\x69\x63\x65\x35\xB2\xF0\x08\x7B\x01\x00\x00\x01\x00\x00\x01\x7B\x08\xF0\xB2\x34\x00\x00\x00\x00\x00\x00\x00\x01\x09\x00\x28\x2F\xE0\xC5\x04\xBB\xA7\x31"} err
+ assert_match "*Bad data format*" $err
+ r ping
+ }
+}
+
+test {corrupt payload: fuzzer findings - stream listpack valgrind issue} {
+ start_server [list overrides [list loglevel verbose use-exit-on-panic yes crash-memcheck-enabled no] ] {
+ r config set sanitize-dump-payload no
+ r debug set-skip-checksum-validation 1
+ r restore _stream 0 "\x0F\x01\x10\x00\x00\x01\x7B\x09\x5E\x94\xFF\x00\x00\x00\x00\x00\x00\x00\x00\x40\x42\x42\x00\x00\x00\x18\x00\x02\x01\x01\x01\x02\x01\x84\x69\x74\x65\x6D\x05\x85\x76\x61\x6C\x75\x65\x06\x00\x01\x02\x01\x00\x01\x00\x01\x01\x01\x00\x01\x05\x01\x03\x01\x25\x01\x00\x01\x01\x01\x82\x5F\x31\x03\x05\x01\x02\x01\x32\x01\x00\x01\x01\x01\x02\x01\xF0\x01\xFF\x02\x81\x00\x00\x01\x7B\x09\x5E\x95\x31\x00\x01\x07\x6D\x79\x67\x72\x6F\x75\x70\x81\x00\x00\x01\x7B\x09\x5E\x95\x24\x00\x01\x00\x00\x01\x7B\x09\x5E\x95\x24\x00\x00\x00\x00\x00\x00\x00\x00\x5C\x95\x5E\x09\x7B\x01\x00\x00\x01\x01\x05\x41\x6C\x69\x63\x65\x4B\x95\x5E\x09\x7B\x01\x00\x00\x01\x00\x00\x01\x7B\x09\x5E\x95\x24\x00\x00\x00\x00\x00\x00\x00\x00\x09\x00\x19\x29\x94\xDF\x76\xF8\x1A\xC6"
+ catch {r XINFO STREAM _stream FULL }
+ assert_equal [count_log_message 0 "crashed by signal"] 0
+ assert_equal [count_log_message 0 "ASSERTION FAILED"] 1
+ }
+}
+
+test {corrupt payload: fuzzer findings - stream with bad lpFirst} {
+ start_server [list overrides [list loglevel verbose use-exit-on-panic yes crash-memcheck-enabled no] ] {
+ r config set sanitize-dump-payload yes
+ r debug set-skip-checksum-validation 1
+ catch {r restore _stream 0 "\x0F\x01\x10\x00\x00\x01\x7B\x0E\x52\xD2\xEC\x00\x00\x00\x00\x00\x00\x00\x00\x40\x42\x42\x00\x00\x00\x18\x00\x02\xF7\x01\x01\x02\x01\x84\x69\x74\x65\x6D\x05\x85\x76\x61\x6C\x75\x65\x06\x00\x01\x02\x01\x00\x01\x00\x01\x01\x01\x00\x01\x05\x01\x03\x01\x01\x01\x00\x01\x01\x01\x82\x5F\x31\x03\x05\x01\x02\x01\x01\x01\x01\x01\x01\x01\x02\x01\x05\x01\xFF\x02\x81\x00\x00\x01\x7B\x0E\x52\xD2\xED\x01\x01\x07\x6D\x79\x67\x72\x6F\x75\x70\x81\x00\x00\x01\x7B\x0E\x52\xD2\xED\x00\x01\x00\x00\x01\x7B\x0E\x52\xD2\xED\x00\x00\x00\x00\x00\x00\x00\x00\xED\xD2\x52\x0E\x7B\x01\x00\x00\x01\x01\x05\x41\x6C\x69\x63\x65\xED\xD2\x52\x0E\x7B\x01\x00\x00\x01\x00\x00\x01\x7B\x0E\x52\xD2\xED\x00\x00\x00\x00\x00\x00\x00\x00\x09\x00\xAC\x05\xC9\x97\x5D\x45\x80\xB3"} err
+ assert_match "*Bad data format*" $err
+ r ping
+ }
+}
+
+test {corrupt payload: fuzzer findings - stream listpack lpPrev valgrind issue} {
+ start_server [list overrides [list loglevel verbose use-exit-on-panic yes crash-memcheck-enabled no] ] {
+ r config set sanitize-dump-payload no
+ r debug set-skip-checksum-validation 1
+ r restore _stream 0 "\x0F\x01\x10\x00\x00\x01\x7B\x0E\xAE\x66\x36\x00\x00\x00\x00\x00\x00\x00\x00\x40\x42\x42\x00\x00\x00\x18\x00\x02\x01\x01\x01\x02\x01\x84\x69\x74\x65\x6D\x05\x85\x76\x61\x6C\x75\x65\x06\x00\x01\x02\x01\x00\x01\x00\x01\x01\x01\x00\x01\x1D\x01\x03\x01\x24\x01\x00\x01\x01\x69\x82\x5F\x31\x03\x05\x01\x02\x01\x33\x01\x00\x01\x01\x01\x02\x01\x05\x01\xFF\x02\x81\x00\x00\x01\x7B\x0E\xAE\x66\x69\x00\x01\x07\x6D\x79\x67\x72\x6F\x75\x70\x81\x00\x00\x01\x7B\x0E\xAE\x66\x5A\x00\x01\x00\x00\x01\x7B\x0E\xAE\x66\x5A\x00\x00\x00\x00\x00\x00\x00\x00\x94\x66\xAE\x0E\x7B\x01\x00\x00\x01\x01\x05\x41\x6C\x69\x63\x65\x83\x66\xAE\x0E\x7B\x01\x00\x00\x01\x00\x00\x01\x7B\x0E\xAE\x66\x5A\x00\x00\x00\x00\x00\x00\x00\x00\x09\x00\xD5\xD7\xA5\x5C\x63\x1C\x09\x40"
+ catch {r XREVRANGE _stream 1618622681 606195012389}
+ assert_equal [count_log_message 0 "crashed by signal"] 0
+ assert_equal [count_log_message 0 "ASSERTION FAILED"] 1
+ }
+}
+
+test {corrupt payload: fuzzer findings - stream with non-integer entry id} {
+ start_server [list overrides [list loglevel verbose use-exit-on-panic yes crash-memcheck-enabled no] ] {
+ r config set sanitize-dump-payload yes
+ r debug set-skip-checksum-validation 1
+ catch {r restore _streambig 0 "\x0F\x03\x10\x00\x00\x01\x7B\x13\x34\xC3\xB2\x00\x00\x00\x00\x00\x00\x00\x00\xC3\x40\x4F\x40\x5C\x18\x5C\x00\x00\x00\x24\x00\x05\x01\x00\x01\x02\x01\x84\x69\x74\x65\x6D\x05\x85\x76\x61\x6C\x75\x65\x06\x40\x10\x00\x80\x20\x01\x00\x01\x20\x03\x00\x05\x20\x1C\x40\x09\x05\x01\x01\x82\x5F\x31\x03\x80\x0D\x00\x02\x20\x0D\x00\x02\xA0\x19\x00\x03\x20\x0B\x02\x82\x5F\x33\xA0\x19\x00\x04\x20\x0D\x00\x04\x20\x19\x00\xFF\x10\x00\x00\x01\x7B\x13\x34\xC3\xB2\x00\x00\x00\x00\x00\x00\x00\x05\xC3\x40\x56\x40\x61\x18\x61\x00\x00\x00\x24\x00\x05\x01\x00\x01\x02\x01\x84\x69\x74\x65\x6D\x05\x85\x76\x61\x6C\x75\x65\x06\x40\x10\x00\x00\x20\x01\x06\x01\x01\x82\x5F\x35\x03\x05\x20\x1E\x40\x0B\x03\x01\x01\x06\x01\x40\x0B\x03\x01\x01\xDF\xFB\x20\x05\x02\x82\x5F\x37\x60\x1A\x20\x0E\x00\xFC\x20\x05\x00\x08\xC0\x1B\x00\xFD\x20\x0C\x02\x82\x5F\x39\x20\x1B\x00\xFF\x10\x00\x00\x01\x7B\x13\x34\xC3\xB3\x00\x00\x00\x00\x00\x00\x00\x03\xC3\x3D\x40\x4A\x18\x4A\x00\x00\x00\x15\x00\x02\x01\x00\x01\x02\x01\x84\x69\x74\x65\x6D\x05\x85\x76\x61\x6C\x75\x65\x06\x40\x10\x00\x00\x20\x01\x40\x00\x00\x05\x60\x07\x02\xDF\xFD\x02\xC0\x23\x09\x01\x01\x86\x75\x6E\x69\x71\x75\x65\x07\xA0\x2D\x02\x08\x01\xFF\x0C\x81\x00\x00\x01\x7B\x13\x34\xC3\xB4\x00\x00\x09\x00\x9D\xBD\xD5\xB9\x33\xC4\xC5\xFF"} err
+ assert_match "*Bad data format*" $err
+ r ping
+ }
+}
+
+test {corrupt payload: fuzzer findings - empty quicklist} {
+ start_server [list overrides [list loglevel verbose use-exit-on-panic yes crash-memcheck-enabled no] ] {
+ r config set sanitize-dump-payload yes
+ r debug set-skip-checksum-validation 1
+ catch {
+ r restore key 0 "\x0E\xC0\x2B\x15\x00\x00\x00\x0A\x00\x00\x00\x01\x00\x00\xE0\x62\x58\xEA\xDF\x22\x00\x00\x00\xFF\x09\x00\xDF\x35\xD2\x67\xDC\x0E\x89\xAB" replace
+ } err
+ assert_match "*Bad data format*" $err
+ r ping
+ }
+}
+
+test {corrupt payload: fuzzer findings - empty zset} {
+ start_server [list overrides [list loglevel verbose use-exit-on-panic yes crash-memcheck-enabled no] ] {
+ r config set sanitize-dump-payload yes
+ r debug set-skip-checksum-validation 1
+ catch {r restore key 0 "\x05\xC0\x01\x09\x00\xF6\x8A\xB6\x7A\x85\x87\x72\x4D"} err
+ assert_match "*Bad data format*" $err
+ r ping
+ }
+}
+
+test {corrupt payload: fuzzer findings - hash with len of 0} {
+ start_server [list overrides [list loglevel verbose use-exit-on-panic yes crash-memcheck-enabled no] ] {
+ r config set sanitize-dump-payload yes
+ r debug set-skip-checksum-validation 1
+ catch {r restore key 0 "\x04\xC0\x21\x09\x00\xF6\x8A\xB6\x7A\x85\x87\x72\x4D"} err
+ assert_match "*Bad data format*" $err
+ r ping
+ }
+}
+
+test {corrupt payload: fuzzer findings - hash listpack first element too long entry len} {
+ start_server [list overrides [list loglevel verbose use-exit-on-panic yes crash-memcheck-enabled no] ] {
+ r debug set-skip-checksum-validation 1
+ r config set sanitize-dump-payload yes
+ catch { r restore _hash 0 "\x10\x15\x15\x00\x00\x00\x06\x00\xF0\x01\x00\x01\x01\x01\x82\x5F\x31\x03\x02\x01\x02\x01\xFF\x0A\x00\x94\x21\x0A\xFA\x06\x52\x9F\x44" replace } err
+ assert_match "*Bad data format*" $err
+ verify_log_message 0 "*integrity check failed*" 0
+ }
+}
+
+test {corrupt payload: fuzzer findings - stream double free listpack when insert dup node to rax returns 0} {
+ start_server [list overrides [list loglevel verbose use-exit-on-panic yes crash-memcheck-enabled no] ] {
+ r debug set-skip-checksum-validation 1
+ r config set sanitize-dump-payload yes
+ catch { r restore _stream 0 "\x0F\x03\x10\x00\x00\x01\x7B\x60\x5A\x23\x79\x00\x00\x00\x00\x00\x00\x00\x00\xC3\x40\x4F\x40\x5C\x18\x5C\x00\x00\x00\x24\x00\x05\x01\x00\x01\x02\x01\x84\x69\x74\x65\x6D\x05\x85\x76\x61\x6C\x75\x65\x06\x40\x10\x00\x00\x20\x01\x00\x01\x20\x03\x00\x05\x20\x1C\x40\x09\x05\x01\x01\x82\x5F\x31\x03\x80\x0D\x00\x02\x20\x0D\x00\x02\xA0\x19\x00\x03\x20\x0B\x02\x82\x5F\x33\xA0\x19\x00\x04\x20\x0D\x00\x04\x20\x19\x00\xFF\x10\x00\x00\x01\x7B\x60\x5A\x23\x79\x00\x00\x00\x00\x00\x00\x00\x05\xC3\x40\x51\x40\x5E\x18\x5E\x00\x00\x00\x24\x00\x05\x01\x00\x01\x02\x01\x84\x69\x74\x65\x6D\x05\x85\x76\x61\x6C\x75\x65\x06\x40\x10\x00\x00\x20\x01\x06\x01\x01\x82\x5F\x35\x03\x05\x20\x1E\x40\x0B\x03\x01\x01\x06\x01\x80\x0B\x00\x02\x20\x0B\x02\x82\x5F\x37\xA0\x19\x00\x03\x20\x0D\x00\x08\xA0\x19\x00\x04\x20\x0B\x02\x82\x5F\x39\x20\x19\x00\xFF\x10\x00\x00\x01\x7B\x60\x5A\x23\x79\x00\x00\x00\x00\x00\x00\x00\x00\xC3\x3B\x40\x49\x18\x49\x00\x00\x00\x15\x00\x02\x01\x00\x01\x02\x01\x84\x69\x74\x65\x6D\x05\x85\x76\x61\x6C\x75\x65\x06\x40\x10\x00\x00\x20\x01\x40\x00\x00\x05\x20\x07\x40\x09\xC0\x22\x09\x01\x01\x86\x75\x6E\x69\x71\x75\x65\x07\xA0\x2C\x02\x08\x01\xFF\x0C\x81\x00\x00\x01\x7B\x60\x5A\x23\x7A\x01\x00\x0A\x00\x9C\x8F\x1E\xBF\x2E\x05\x59\x09" replace } err
+ assert_match "*Bad data format*" $err
+ r ping
+ }
+}
+
+test {corrupt payload: fuzzer findings - LCS OOM} {
+ start_server [list overrides [list loglevel verbose use-exit-on-panic yes crash-memcheck-enabled no] ] {
+ r SETRANGE _int 423324 1450173551
+ catch {r LCS _int _int} err
+ assert_match "*Insufficient memory*" $err
+ r ping
+ }
+}
+
+test {corrupt payload: fuzzer findings - gcc asan reports false leak on assert} {
+ start_server [list overrides [list loglevel verbose use-exit-on-panic yes crash-memcheck-enabled no] ] {
+ r debug set-skip-checksum-validation 1
+ r config set sanitize-dump-payload no
+ catch { r restore _list 0 "\x12\x01\x02\x13\x13\x00\x00\x00\x10\x00\x00\x00\x03\x00\x00\xF3\xFE\x02\x5F\x31\x04\xF1\xFF\x0A\x00\x19\x8D\x3D\x74\x85\x94\x29\xBD" }
+ catch { r LPOP _list } err
+ assert_equal [count_log_message 0 "crashed by signal"] 0
+ assert_equal [count_log_message 0 "ASSERTION FAILED"] 1
+ }
+}
+
+test {corrupt payload: fuzzer findings - lpFind invalid access} {
+ start_server [list overrides [list loglevel verbose use-exit-on-panic yes crash-memcheck-enabled no] ] {
+ r debug set-skip-checksum-validation 1
+ r config set sanitize-dump-payload no
+ r restore _hashbig 0 "\x10\x39\x39\x00\x00\x00\x14\x00\x06\x01\x06\x01\x03\x01\x82\x5F\x33\x03\x07\x01\x82\x5F\x37\x03\x00\x01\x00\x01\x04\x01\x04\x01\x09\x01\x82\x5F\x39\x03\x05\x01\x82\x5F\x35\x03\x08\x01\x08\x01\x01\x01\x82\x5F\x31\x03\x02\x01\xF0\x01\xFF\x0A\x00\x29\xD7\xE4\x52\x79\x7A\x95\x82"
+ catch { r HLEN _hashbig }
+ catch { r HSETNX _hashbig 513072881620 "\x9A\x4B\x1F\xF2\x99\x74\x6E\x96\x84\x7F\xB9\x85\xBE\xD6\x1A\x93\x0A\xED\xAE\x19\xA0\x5A\x67\xD6\x89\xA8\xF9\xF2\xB8\xBD\x3E\x5A\xCF\xD2\x5B\x17\xA4\xBB\xB2\xA9\x56\x67\x6E\x0B\xED\xCD\x36\x49\xC6\x84\xFF\xC2\x76\x9B\xF3\x49\x88\x97\x92\xD2\x54\xE9\x08\x19\x86\x40\x96\x24\x68\x25\x9D\xF7\x0E\xB7\x36\x85\x68\x6B\x2A\x97\x64\x30\xE6\xFF\x9A\x2A\x42\x2B\x31\x01\x32\xB3\xEE\x78\x1A\x26\x94\xE2\x07\x34\x50\x8A\xFF\xF9\xAE\xEA\xEC\x59\x42\xF5\x39\x40\x65\xDE\x55\xCC\x77\x1B\x32\x02\x19\xEE\x3C\xD4\x79\x48\x01\x4F\x51\xFE\x22\xE0\x0C\xF4\x07\x06\xCD\x55\x30\xC0\x24\x32\xD4\xCC\xAF\x82\x05\x48\x14\x10\x55\xA1\x3D\xF6\x81\x45\x54\xEA\x71\x24\x27\x06\xDC\xFA\xE4\xE4\x87\xCC\x81\xA0\x47\xA5\xAF\xD1\x89\xE7\x42\xC3\x24\xD0\x32\x7A\xDE\x44\x47\x6E\x1F\xCB\xEE\xA6\x46\xDE\x0D\xE6\xD5\x16\x03\x2A\xD6\x9E\xFD\x94\x02\x2C\xDB\x1F\xD0\xBE\x98\x10\xE3\xEB\xEA\xBE\xE5\xD1" }
+ }
+}
+
+test {corrupt payload: fuzzer findings - invalid access in ziplist tail prevlen decoding} {
+ start_server [list overrides [list loglevel verbose use-exit-on-panic yes crash-memcheck-enabled no] ] {
+ r debug set-skip-checksum-validation 1
+ r config set sanitize-dump-payload no
+ catch {r restore _listbig 0 "\x0e\x02\x1B\x1B\x00\x00\x00\x16\x00\x00\x00\x05\x00\x00\x02\x5F\x39\x04\xF9\x02\x02\x5F\x37\x04\xF7\x02\x02\x5F\x35\xFF\x19\x19\x00\x00\x00\x16\x00\x00\x00\x05\x00\x00\xF5\x02\x02\x5F\x33\x04\xF3\x02\x02\x5F\x31\xFE\xF1\xFF\x0A\x00\x6B\x43\x32\x2F\xBB\x29\x0a\xBE"} err
+ assert_match "*Bad data format*" $err
+ r ping
+ }
+}
+
+test {corrupt payload: fuzzer findings - zset zslInsert with a NAN score} {
+ start_server [list overrides [list loglevel verbose use-exit-on-panic yes crash-memcheck-enabled no] ] {
+ r config set sanitize-dump-payload no
+ r debug set-skip-checksum-validation 1
+ catch {r restore _nan_zset 0 "\x05\x0A\x02\x5F\x39\x00\x00\x00\x00\x00\x00\x22\x40\xC0\x08\x00\x00\x00\x00\x00\x00\x20\x40\x02\x5F\x37\x00\x00\x00\x00\x00\x00\x1C\x40\xC0\x06\x00\x00\x00\x00\x00\x00\x18\x40\x02\x5F\x35\x00\x00\x00\x00\x00\x00\x14\x40\xC0\x04\x00\x00\x00\x00\x00\x00\x10\x40\x02\x5F\x33\x00\x00\x00\x00\x00\x00\x08\x40\xC0\x02\x00\x00\x00\x00\x00\x00\x00\x40\x02\x5F\x31\x00\x00\x00\x00\x00\x55\xF0\x7F\xC0\x00\x00\x00\x00\x00\x00\x00\x00\x00\x0A\x00\xEC\x94\x86\xD8\xFD\x5C\x5F\xD8"} err
+ assert_match "*Bad data format*" $err
+ r ping
+ }
+}
+
+test {corrupt payload: fuzzer findings - streamLastValidID panic} {
+ start_server [list overrides [list loglevel verbose use-exit-on-panic yes crash-memcheck-enabled no] ] {
+ r config set sanitize-dump-payload yes
+ r debug set-skip-checksum-validation 1
+ catch {r restore _streambig 0 "\x13\xC0\x10\x00\x00\x01\x80\x20\x48\xA0\x33\x00\x00\x00\x00\x00\x00\x00\x00\xC3\x40\x4F\x40\x5C\x18\x5C\x00\x00\x00\x24\x00\x05\x01\x00\x01\x02\x01\x84\x69\x74\x65\x6D\x05\x85\x76\x61\x6C\x75\x65\x06\x40\x10\x00\x00\x20\x01\x00\x01\x20\x03\x00\x05\x20\x1C\x40\x09\x05\x01\x01\x82\x5F\x31\x03\x80\x0D\x00\x02\x20\x0D\x00\x02\xA0\x19\x00\x03\x20\x0B\x02\x82\x5F\x33\x60\x19\x40\x2F\x02\x01\x01\x04\x20\x19\x00\xFF\x10\x00\x00\x01\x80\x20\x48\xA0\x34\x00\x00\x00\x00\x00\x00\x00\x01\xC3\x40\x51\x40\x5E\x18\x5E\x00\x00\x00\x24\x00\x05\x01\x00\x01\x02\x01\x84\x69\x74\x65\x6D\x05\x85\x76\x61\x6C\x75\x65\x06\x40\x10\x00\x00\x20\x01\x06\x01\x01\x82\x5F\x35\x03\x05\x20\x1E\x40\x0B\x03\x01\x01\x06\x01\x80\x0B\x00\x02\x20\x0B\x02\x82\x5F\x37\xA0\x19\x00\x03\x20\x0D\x00\x08\xA0\x19\x00\x04\x20\x0B\x02\x82\x5F\x39\x20\x19\x00\xFF\x10\x00\x00\x01\x80\x20\x48\xA0\x34\x00\x00\x00\x00\x00\x00\x00\x06\xC3\x3D\x40\x4A\x18\x4A\x00\x00\x00\x15\x00\x02\x01\x00\x01\x02\x01\x84\x69\x74\x65\x6D\x05\x85\x76\x61\x6C\x75\x65\x06\x40\x10\x00\x00\x20\x01\x40\x00\x00\x05\x60\x07\x02\xDF\xFA\x02\xC0\x23\x09\x01\x01\x86\x75\x6E\x69\x71\x75\x65\x07\xA0\x2D\x02\x08\x01\xFF\x0C\x81\x00\x00\x01\x80\x20\x48\xA0\x35\x00\x81\x00\x00\x01\x80\x20\x48\xA0\x33\x00\x00\x00\x0C\x00\x0A\x00\x34\x8B\x0E\x5B\x42\xCD\xD6\x08"} err
+ assert_match "*Bad data format*" $err
+ r ping
+ }
+}
+
+test {corrupt payload: fuzzer findings - valgrind fishy value warning} {
+ start_server [list overrides [list loglevel verbose use-exit-on-panic yes crash-memcheck-enabled no] ] {
+ r config set sanitize-dump-payload yes
+ r debug set-skip-checksum-validation 1
+ catch {r restore _key 0 "\x13\x01\x10\x00\x00\x01\x81\xCC\x07\xDC\xF2\x00\x00\x00\x00\x00\x00\x00\x00\x40\x42\x42\x00\x00\x00\x18\x00\x02\x01\x01\x01\x02\x01\x84\x69\x74\x65\x6D\x05\x85\x76\x61\x6C\x75\x65\x06\x00\x01\x02\x01\x00\x01\x00\x01\x01\x01\x00\x01\x05\x01\x03\x01\x2C\x01\x00\x01\x01\x01\x82\x5F\x31\x03\x05\x01\x02\x01\x3C\x01\x00\x01\x01\x01\x02\x01\x05\x01\xFF\x02\xD0\x00\x00\x01\x81\xCC\x07\xDD\x2E\x00\x81\x00\x00\x01\x81\xCC\x07\xDC\xF2\x00\x81\x00\x00\x01\x81\xCC\x07\xDD\x1E\x00\x03\x01\x07\x6D\x79\x67\x72\x6F\x75\x70\x81\x00\x00\x01\x81\xCC\x07\xDD\x1E\x00\x02\x01\x00\x00\x01\x81\xCC\x07\xDD\x1E\x00\x00\x00\x00\x00\x00\x00\x00\x71\xDD\x07\xCC\x81\x01\x00\x00\x01\x01\x05\x41\x6C\x69\x63\x65\x58\xDD\x07\xCC\x81\x01\x00\x00\x01\x00\x00\x01\x81\xCC\x07\xDD\x1E\x00\x00\x00\x00\x00\x00\x00\x00\x0A\x00\x2F\xB0\xD1\x15\x0A\x97\x87\x6B"} err
+ assert_match "*Bad data format*" $err
+ r ping
+ }
+}
+
+test {corrupt payload: fuzzer findings - empty set listpack} {
+ start_server [list overrides [list loglevel verbose use-exit-on-panic yes crash-memcheck-enabled no] ] {
+ r config set sanitize-dump-payload no
+ r debug set-skip-checksum-validation 1
+ catch {r restore _key 0 "\x14\x25\x25\x00\x00\x00\x00\x00\x02\x01\x82\x5F\x37\x03\x06\x01\x82\x5F\x35\x03\x82\x5F\x33\x03\x00\x01\x82\x5F\x31\x03\x82\x5F\x39\x03\x04\xA9\x08\x01\xFF\x0B\x00\xA3\x26\x49\xB4\x86\xB0\x0F\x41"} err
+ assert_match "*Bad data format*" $err
+ r ping
+ }
+}
+
+test {corrupt payload: fuzzer findings - set with duplicate elements causes sdiff to hang} {
+ start_server [list overrides [list loglevel verbose use-exit-on-panic yes crash-memcheck-enabled no] ] {
+ r config set sanitize-dump-payload yes
+ r debug set-skip-checksum-validation 1
+ catch {r restore _key 0 "\x14\x25\x25\x00\x00\x00\x0A\x00\x06\x01\x82\x5F\x35\x03\x04\x01\x82\x5F\x31\x03\x82\x5F\x33\x03\x00\x01\x82\x5F\x39\x03\x82\x5F\x33\x03\x08\x01\x02\x01\xFF\x0B\x00\x31\xBE\x7D\x41\x01\x03\x5B\xEC" replace} err
+ assert_match "*Bad data format*" $err
+ r ping
+
+ # In the past, it generated a broken protocol and left the client hung in sdiff
+ r config set sanitize-dump-payload no
+ assert_equal {OK} [r restore _key 0 "\x14\x25\x25\x00\x00\x00\x0A\x00\x06\x01\x82\x5F\x35\x03\x04\x01\x82\x5F\x31\x03\x82\x5F\x33\x03\x00\x01\x82\x5F\x39\x03\x82\x5F\x33\x03\x08\x01\x02\x01\xFF\x0B\x00\x31\xBE\x7D\x41\x01\x03\x5B\xEC" replace]
+ assert_type set _key
+ assert_encoding listpack _key
+ assert_equal 10 [r scard _key]
+ assert_equal {0 2 4 6 8 _1 _3 _3 _5 _9} [lsort [r smembers _key]]
+ assert_equal {0 2 4 6 8 _1 _3 _5 _9} [lsort [r sdiff _key]]
+ }
+} {} {logreqres:skip} ;# This test violates {"uniqueItems": true}
+
+} ;# tags
+
diff --git a/tests/integration/dismiss-mem.tcl b/tests/integration/dismiss-mem.tcl
new file mode 100644
index 0000000..87f6e1d
--- /dev/null
+++ b/tests/integration/dismiss-mem.tcl
@@ -0,0 +1,101 @@
+# The tests of this file aim to get coverage on all the "dismiss" methods
+# that dismiss all data-types memory in the fork child. like client query
+# buffer, client output buffer and replication backlog.
+# Actually, we may not have many asserts in the test, since we just check for
+# crashes and the dump file inconsistencies.
+
+start_server {tags {"dismiss external:skip"}} {
+ # In other tests, although we test child process dumping RDB file, but
+ # memory allocations of key/values are usually small, they couldn't cover
+ # the "dismiss" object methods, in this test, we create big size key/values
+ # to satisfy the conditions for release memory pages, especially, we assume
+ # the page size of OS is 4KB in some cases.
+ test {dismiss all data types memory} {
+ set bigstr [string repeat A 8192]
+ set 64bytes [string repeat A 64]
+
+ # string
+ populate 100 bigstring 8192
+
+ # list
+ r lpush biglist1 $bigstr ; # uncompressed ziplist node
+ r config set list-compress-depth 1 ; # compressed ziplist nodes
+ for {set i 0} {$i < 16} {incr i} {
+ r lpush biglist2 $bigstr
+ }
+
+ # set
+ r sadd bigset1 $bigstr ; # hash encoding
+ set biginteger [string repeat 1 19]
+ for {set i 0} {$i < 512} {incr i} {
+ r sadd bigset2 $biginteger ; # intset encoding
+ }
+
+ # zset
+ r zadd bigzset1 1.0 $bigstr ; # skiplist encoding
+ for {set i 0} {$i < 128} {incr i} {
+ r zadd bigzset2 1.0 $64bytes ; # ziplist encoding
+ }
+
+ # hash
+ r hset bighash1 field1 $bigstr ; # hash encoding
+ for {set i 0} {$i < 128} {incr i} {
+ r hset bighash2 $i $64bytes ; # ziplist encoding
+ }
+
+ # stream
+ r xadd bigstream * entry1 $bigstr entry2 $bigstr
+
+ set digest [debug_digest]
+ r config set aof-use-rdb-preamble no
+ r bgrewriteaof
+ waitForBgrewriteaof r
+ r debug loadaof
+ set newdigest [debug_digest]
+ assert {$digest eq $newdigest}
+ }
+
+ test {dismiss client output buffer} {
+ # Big output buffer
+ set item [string repeat "x" 100000]
+ for {set i 0} {$i < 100} {incr i} {
+ r lpush mylist $item
+ }
+ set rd [redis_deferring_client]
+ $rd lrange mylist 0 -1
+ $rd flush
+ after 100
+
+ r bgsave
+ waitForBgsave r
+ assert_equal $item [r lpop mylist]
+ }
+
+ test {dismiss client query buffer} {
+ # Big pending query buffer
+ set bigstr [string repeat A 8192]
+ set rd [redis_deferring_client]
+ $rd write "*2\r\n\$8192\r\n"
+ $rd write $bigstr\r\n
+ $rd flush
+ after 100
+
+ r bgsave
+ waitForBgsave r
+ }
+
+ test {dismiss replication backlog} {
+ set master [srv 0 client]
+ start_server {} {
+ r slaveof [srv -1 host] [srv -1 port]
+ wait_for_sync r
+
+ set bigstr [string repeat A 8192]
+ for {set i 0} {$i < 20} {incr i} {
+ $master set $i $bigstr
+ }
+ $master bgsave
+ waitForBgsave $master
+ }
+ }
+}
diff --git a/tests/integration/failover.tcl b/tests/integration/failover.tcl
new file mode 100644
index 0000000..21fa3d2
--- /dev/null
+++ b/tests/integration/failover.tcl
@@ -0,0 +1,294 @@
+start_server {tags {"failover external:skip"} overrides {save {}}} {
+start_server {overrides {save {}}} {
+start_server {overrides {save {}}} {
+ set node_0 [srv 0 client]
+ set node_0_host [srv 0 host]
+ set node_0_port [srv 0 port]
+ set node_0_pid [srv 0 pid]
+
+ set node_1 [srv -1 client]
+ set node_1_host [srv -1 host]
+ set node_1_port [srv -1 port]
+ set node_1_pid [srv -1 pid]
+
+ set node_2 [srv -2 client]
+ set node_2_host [srv -2 host]
+ set node_2_port [srv -2 port]
+ set node_2_pid [srv -2 pid]
+
+ proc assert_digests_match {n1 n2 n3} {
+ assert_equal [$n1 debug digest] [$n2 debug digest]
+ assert_equal [$n2 debug digest] [$n3 debug digest]
+ }
+
+ test {failover command fails without connected replica} {
+ catch { $node_0 failover to $node_1_host $node_1_port } err
+ if {! [string match "ERR*" $err]} {
+ fail "failover command succeeded when replica not connected"
+ }
+ }
+
+ test {setup replication for following tests} {
+ $node_1 replicaof $node_0_host $node_0_port
+ $node_2 replicaof $node_0_host $node_0_port
+ wait_for_sync $node_1
+ wait_for_sync $node_2
+ }
+
+ test {failover command fails with invalid host} {
+ catch { $node_0 failover to invalidhost $node_1_port } err
+ assert_match "ERR*" $err
+ }
+
+ test {failover command fails with invalid port} {
+ catch { $node_0 failover to $node_1_host invalidport } err
+ assert_match "ERR*" $err
+ }
+
+ test {failover command fails with just force and timeout} {
+ catch { $node_0 FAILOVER FORCE TIMEOUT 100} err
+ assert_match "ERR*" $err
+ }
+
+ test {failover command fails when sent to a replica} {
+ catch { $node_1 failover to $node_1_host $node_1_port } err
+ assert_match "ERR*" $err
+ }
+
+ test {failover command fails with force without timeout} {
+ catch { $node_0 failover to $node_1_host $node_1_port FORCE } err
+ assert_match "ERR*" $err
+ }
+
+ test {failover command to specific replica works} {
+ set initial_psyncs [s -1 sync_partial_ok]
+ set initial_syncs [s -1 sync_full]
+
+ # Generate a delta between primary and replica
+ set load_handler [start_write_load $node_0_host $node_0_port 5]
+ pause_process [srv -1 pid]
+ wait_for_condition 50 100 {
+ [s 0 total_commands_processed] > 100
+ } else {
+ fail "Node 0 did not accept writes"
+ }
+ resume_process [srv -1 pid]
+
+ # Execute the failover
+ $node_0 failover to $node_1_host $node_1_port
+
+ # Wait for failover to end
+ wait_for_condition 50 100 {
+ [s 0 master_failover_state] == "no-failover"
+ } else {
+ fail "Failover from node 0 to node 1 did not finish"
+ }
+
+ # stop the write load and make sure no more commands processed
+ stop_write_load $load_handler
+ wait_load_handlers_disconnected
+
+ $node_2 replicaof $node_1_host $node_1_port
+ wait_for_sync $node_0
+ wait_for_sync $node_2
+
+ assert_match *slave* [$node_0 role]
+ assert_match *master* [$node_1 role]
+ assert_match *slave* [$node_2 role]
+
+ # We should accept psyncs from both nodes
+ assert_equal [expr [s -1 sync_partial_ok] - $initial_psyncs] 2
+ assert_equal [expr [s -1 sync_full] - $initial_psyncs] 0
+ assert_digests_match $node_0 $node_1 $node_2
+ }
+
+ test {failover command to any replica works} {
+ set initial_psyncs [s -2 sync_partial_ok]
+ set initial_syncs [s -2 sync_full]
+
+ wait_for_ofs_sync $node_1 $node_2
+ # We stop node 0 to and make sure node 2 is selected
+ pause_process $node_0_pid
+ $node_1 set CASE 1
+ $node_1 FAILOVER
+
+ # Wait for failover to end
+ wait_for_condition 50 100 {
+ [s -1 master_failover_state] == "no-failover"
+ } else {
+ fail "Failover from node 1 to node 2 did not finish"
+ }
+ resume_process $node_0_pid
+ $node_0 replicaof $node_2_host $node_2_port
+
+ wait_for_sync $node_0
+ wait_for_sync $node_1
+
+ assert_match *slave* [$node_0 role]
+ assert_match *slave* [$node_1 role]
+ assert_match *master* [$node_2 role]
+
+ # We should accept Psyncs from both nodes
+ assert_equal [expr [s -2 sync_partial_ok] - $initial_psyncs] 2
+ assert_equal [expr [s -1 sync_full] - $initial_psyncs] 0
+ assert_digests_match $node_0 $node_1 $node_2
+ }
+
+ test {failover to a replica with force works} {
+ set initial_psyncs [s 0 sync_partial_ok]
+ set initial_syncs [s 0 sync_full]
+
+ pause_process $node_0_pid
+ # node 0 will never acknowledge this write
+ $node_2 set case 2
+ $node_2 failover to $node_0_host $node_0_port TIMEOUT 100 FORCE
+
+ # Wait for node 0 to give up on sync attempt and start failover
+ wait_for_condition 50 100 {
+ [s -2 master_failover_state] == "failover-in-progress"
+ } else {
+ fail "Failover from node 2 to node 0 did not timeout"
+ }
+
+ # Quick check that everyone is a replica, we never want a
+ # state where there are two masters.
+ assert_match *slave* [$node_1 role]
+ assert_match *slave* [$node_2 role]
+
+ resume_process $node_0_pid
+
+ # Wait for failover to end
+ wait_for_condition 50 100 {
+ [s -2 master_failover_state] == "no-failover"
+ } else {
+ fail "Failover from node 2 to node 0 did not finish"
+ }
+ $node_1 replicaof $node_0_host $node_0_port
+
+ wait_for_sync $node_1
+ wait_for_sync $node_2
+
+ assert_match *master* [$node_0 role]
+ assert_match *slave* [$node_1 role]
+ assert_match *slave* [$node_2 role]
+
+ assert_equal [count_log_message -2 "time out exceeded, failing over."] 1
+
+ # We should accept both psyncs, although this is the condition we might not
+ # since we didn't catch up.
+ assert_equal [expr [s 0 sync_partial_ok] - $initial_psyncs] 2
+ assert_equal [expr [s 0 sync_full] - $initial_syncs] 0
+ assert_digests_match $node_0 $node_1 $node_2
+ }
+
+ test {failover with timeout aborts if replica never catches up} {
+ set initial_psyncs [s 0 sync_partial_ok]
+ set initial_syncs [s 0 sync_full]
+
+ # Stop replica so it never catches up
+ pause_process [srv -1 pid]
+ $node_0 SET CASE 1
+
+ $node_0 failover to [srv -1 host] [srv -1 port] TIMEOUT 500
+ # Wait for failover to end
+ wait_for_condition 50 20 {
+ [s 0 master_failover_state] == "no-failover"
+ } else {
+ fail "Failover from node_0 to replica did not finish"
+ }
+
+ resume_process [srv -1 pid]
+
+ # We need to make sure the nodes actually sync back up
+ wait_for_ofs_sync $node_0 $node_1
+ wait_for_ofs_sync $node_0 $node_2
+
+ assert_match *master* [$node_0 role]
+ assert_match *slave* [$node_1 role]
+ assert_match *slave* [$node_2 role]
+
+ # Since we never caught up, there should be no syncs
+ assert_equal [expr [s 0 sync_partial_ok] - $initial_psyncs] 0
+ assert_equal [expr [s 0 sync_full] - $initial_syncs] 0
+ assert_digests_match $node_0 $node_1 $node_2
+ }
+
+ test {failovers can be aborted} {
+ set initial_psyncs [s 0 sync_partial_ok]
+ set initial_syncs [s 0 sync_full]
+
+ # Stop replica so it never catches up
+ pause_process [srv -1 pid]
+ $node_0 SET CASE 2
+
+ $node_0 failover to [srv -1 host] [srv -1 port] TIMEOUT 60000
+ assert_match [s 0 master_failover_state] "waiting-for-sync"
+
+ # Sanity check that read commands are still accepted
+ $node_0 GET CASE
+
+ $node_0 failover abort
+ assert_match [s 0 master_failover_state] "no-failover"
+
+ resume_process [srv -1 pid]
+
+ # Just make sure everything is still synced
+ wait_for_ofs_sync $node_0 $node_1
+ wait_for_ofs_sync $node_0 $node_2
+
+ assert_match *master* [$node_0 role]
+ assert_match *slave* [$node_1 role]
+ assert_match *slave* [$node_2 role]
+
+ # Since we never caught up, there should be no syncs
+ assert_equal [expr [s 0 sync_partial_ok] - $initial_psyncs] 0
+ assert_equal [expr [s 0 sync_full] - $initial_syncs] 0
+ assert_digests_match $node_0 $node_1 $node_2
+ }
+
+ test {failover aborts if target rejects sync request} {
+ set initial_psyncs [s 0 sync_partial_ok]
+ set initial_syncs [s 0 sync_full]
+
+ # We block psync, so the failover will fail
+ $node_1 acl setuser default -psync
+
+ # We pause the target long enough to send a write command
+ # during the pause. This write will not be interrupted.
+ pause_process [srv -1 pid]
+ set rd [redis_deferring_client]
+ $rd SET FOO BAR
+ $node_0 failover to $node_1_host $node_1_port
+ resume_process [srv -1 pid]
+
+ # Wait for failover to end
+ wait_for_condition 50 100 {
+ [s 0 master_failover_state] == "no-failover"
+ } else {
+ fail "Failover from node_0 to replica did not finish"
+ }
+
+ assert_equal [$rd read] "OK"
+ $rd close
+
+ # restore access to psync
+ $node_1 acl setuser default +psync
+
+ # We need to make sure the nodes actually sync back up
+ wait_for_sync $node_1
+ wait_for_sync $node_2
+
+ assert_match *master* [$node_0 role]
+ assert_match *slave* [$node_1 role]
+ assert_match *slave* [$node_2 role]
+
+ # We will cycle all of our replicas here and force a psync.
+ assert_equal [expr [s 0 sync_partial_ok] - $initial_psyncs] 2
+ assert_equal [expr [s 0 sync_full] - $initial_syncs] 0
+
+ assert_equal [count_log_message 0 "Failover target rejected psync request"] 1
+ assert_digests_match $node_0 $node_1 $node_2
+ }
+}
+}
+}
diff --git a/tests/integration/logging.tcl b/tests/integration/logging.tcl
new file mode 100644
index 0000000..4f8639b
--- /dev/null
+++ b/tests/integration/logging.tcl
@@ -0,0 +1,61 @@
+tags {"external:skip"} {
+
+set system_name [string tolower [exec uname -s]]
+set backtrace_supported 0
+
+# We only support darwin or Linux with glibc
+if {$system_name eq {darwin}} {
+ set backtrace_supported 1
+} elseif {$system_name eq {linux}} {
+ # Avoid the test on libmusl, which does not support backtrace
+ # and on static binaries (ldd exit code 1) where we can't detect libmusl
+ catch {
+ set ldd [exec ldd src/redis-server]
+ if {![string match {*libc.*musl*} $ldd]} {
+ set backtrace_supported 1
+ }
+ }
+}
+
+if {$backtrace_supported} {
+ set server_path [tmpdir server.log]
+ start_server [list overrides [list dir $server_path]] {
+ test "Server is able to generate a stack trace on selected systems" {
+ r config set watchdog-period 200
+ r debug sleep 1
+ set pattern "*debugCommand*"
+ set res [wait_for_log_messages 0 \"$pattern\" 0 100 100]
+ if {$::verbose} { puts $res }
+ }
+ }
+}
+
+# Valgrind will complain that the process terminated by a signal, skip it.
+if {!$::valgrind} {
+ if {$backtrace_supported} {
+ set crash_pattern "*STACK TRACE*"
+ } else {
+ set crash_pattern "*crashed by signal*"
+ }
+
+ set server_path [tmpdir server1.log]
+ start_server [list overrides [list dir $server_path crash-memcheck-enabled no]] {
+ test "Crash report generated on SIGABRT" {
+ set pid [s process_id]
+ exec kill -SIGABRT $pid
+ set res [wait_for_log_messages 0 \"$crash_pattern\" 0 50 100]
+ if {$::verbose} { puts $res }
+ }
+ }
+
+ set server_path [tmpdir server2.log]
+ start_server [list overrides [list dir $server_path crash-memcheck-enabled no]] {
+ test "Crash report generated on DEBUG SEGFAULT" {
+ catch {r debug segfault}
+ set res [wait_for_log_messages 0 \"$crash_pattern\" 0 50 100]
+ if {$::verbose} { puts $res }
+ }
+ }
+}
+
+}
diff --git a/tests/integration/psync2-master-restart.tcl b/tests/integration/psync2-master-restart.tcl
new file mode 100644
index 0000000..a9e21d1
--- /dev/null
+++ b/tests/integration/psync2-master-restart.tcl
@@ -0,0 +1,218 @@
+start_server {tags {"psync2 external:skip"}} {
+start_server {} {
+start_server {} {
+ set master [srv 0 client]
+ set master_host [srv 0 host]
+ set master_port [srv 0 port]
+
+ set replica [srv -1 client]
+ set replica_host [srv -1 host]
+ set replica_port [srv -1 port]
+
+ set sub_replica [srv -2 client]
+
+ # Make sure the server saves an RDB on shutdown
+ $master config set save "3600 1"
+
+ # Because we will test partial resync later, we don’t want a timeout to cause
+ # the master-replica disconnect, then the extra reconnections will break the
+ # sync_partial_ok stat test
+ $master config set repl-timeout 3600
+ $replica config set repl-timeout 3600
+ $sub_replica config set repl-timeout 3600
+
+ # Avoid PINGs
+ $master config set repl-ping-replica-period 3600
+ $master config rewrite
+
+ # Build replication chain
+ $replica replicaof $master_host $master_port
+ $sub_replica replicaof $replica_host $replica_port
+
+ wait_for_condition 50 100 {
+ [status $replica master_link_status] eq {up} &&
+ [status $sub_replica master_link_status] eq {up}
+ } else {
+ fail "Replication not started."
+ }
+
+ test "PSYNC2: Partial resync after Master restart using RDB aux fields when offset is 0" {
+ assert {[status $master master_repl_offset] == 0}
+
+ set replid [status $master master_replid]
+ $replica config resetstat
+
+ catch {
+ restart_server 0 true false true now
+ set master [srv 0 client]
+ }
+ wait_for_condition 50 1000 {
+ [status $replica master_link_status] eq {up} &&
+ [status $sub_replica master_link_status] eq {up}
+ } else {
+ fail "Replicas didn't sync after master restart"
+ }
+
+ # Make sure master restore replication info correctly
+ assert {[status $master master_replid] != $replid}
+ assert {[status $master master_repl_offset] == 0}
+ assert {[status $master master_replid2] eq $replid}
+ assert {[status $master second_repl_offset] == 1}
+
+ # Make sure master set replication backlog correctly
+ assert {[status $master repl_backlog_active] == 1}
+ assert {[status $master repl_backlog_first_byte_offset] == 1}
+ assert {[status $master repl_backlog_histlen] == 0}
+
+ # Partial resync after Master restart
+ assert {[status $master sync_partial_ok] == 1}
+ assert {[status $replica sync_partial_ok] == 1}
+ }
+
+ # Generate some data
+ createComplexDataset $master 1000
+
+ test "PSYNC2: Partial resync after Master restart using RDB aux fields with data" {
+ wait_for_condition 500 100 {
+ [status $master master_repl_offset] == [status $replica master_repl_offset] &&
+ [status $master master_repl_offset] == [status $sub_replica master_repl_offset]
+ } else {
+ fail "Replicas and master offsets were unable to match *exactly*."
+ }
+
+ set replid [status $master master_replid]
+ set offset [status $master master_repl_offset]
+ $replica config resetstat
+
+ catch {
+ # SHUTDOWN NOW ensures master doesn't send GETACK to replicas before
+ # shutting down which would affect the replication offset.
+ restart_server 0 true false true now
+ set master [srv 0 client]
+ }
+ wait_for_condition 50 1000 {
+ [status $replica master_link_status] eq {up} &&
+ [status $sub_replica master_link_status] eq {up}
+ } else {
+ fail "Replicas didn't sync after master restart"
+ }
+
+ # Make sure master restore replication info correctly
+ assert {[status $master master_replid] != $replid}
+ assert {[status $master master_repl_offset] == $offset}
+ assert {[status $master master_replid2] eq $replid}
+ assert {[status $master second_repl_offset] == [expr $offset+1]}
+
+ # Make sure master set replication backlog correctly
+ assert {[status $master repl_backlog_active] == 1}
+ assert {[status $master repl_backlog_first_byte_offset] == [expr $offset+1]}
+ assert {[status $master repl_backlog_histlen] == 0}
+
+ # Partial resync after Master restart
+ assert {[status $master sync_partial_ok] == 1}
+ assert {[status $replica sync_partial_ok] == 1}
+ }
+
+ test "PSYNC2: Partial resync after Master restart using RDB aux fields with expire" {
+ $master debug set-active-expire 0
+ for {set j 0} {$j < 1024} {incr j} {
+ $master select [expr $j%16]
+ $master set $j somevalue px 10
+ }
+
+ after 20
+
+ # Wait until master has received ACK from replica. If the master thinks
+ # that any replica is lagging when it shuts down, master would send
+ # GETACK to the replicas, affecting the replication offset.
+ set offset [status $master master_repl_offset]
+ wait_for_condition 500 100 {
+ [string match "*slave0:*,offset=$offset,*" [$master info replication]] &&
+ $offset == [status $replica master_repl_offset] &&
+ $offset == [status $sub_replica master_repl_offset]
+ } else {
+ show_cluster_status
+ fail "Replicas and master offsets were unable to match *exactly*."
+ }
+
+ set offset [status $master master_repl_offset]
+ $replica config resetstat
+
+ catch {
+ # Unlike the test above, here we use SIGTERM, which behaves
+ # differently compared to SHUTDOWN NOW if there are lagging
+ # replicas. This is just to increase coverage and let each test use
+ # a different shutdown approach. In this case there are no lagging
+ # replicas though.
+ restart_server 0 true false
+ set master [srv 0 client]
+ }
+ wait_for_condition 50 1000 {
+ [status $replica master_link_status] eq {up} &&
+ [status $sub_replica master_link_status] eq {up}
+ } else {
+ fail "Replicas didn't sync after master restart"
+ }
+
+ set expired_offset [status $master repl_backlog_histlen]
+ # Stale keys expired and master_repl_offset grows correctly
+ assert {[status $master rdb_last_load_keys_expired] == 1024}
+ assert {[status $master master_repl_offset] == [expr $offset+$expired_offset]}
+
+ # Partial resync after Master restart
+ assert {[status $master sync_partial_ok] == 1}
+ assert {[status $replica sync_partial_ok] == 1}
+
+ set digest [$master debug digest]
+ assert {$digest eq [$replica debug digest]}
+ assert {$digest eq [$sub_replica debug digest]}
+ }
+
+ test "PSYNC2: Full resync after Master restart when too many key expired" {
+ $master config set repl-backlog-size 16384
+ $master config rewrite
+
+ $master debug set-active-expire 0
+ # Make sure replication backlog is full and will be trimmed.
+ for {set j 0} {$j < 2048} {incr j} {
+ $master select [expr $j%16]
+ $master set $j somevalue px 10
+ }
+
+ after 20
+
+ wait_for_condition 500 100 {
+ [status $master master_repl_offset] == [status $replica master_repl_offset] &&
+ [status $master master_repl_offset] == [status $sub_replica master_repl_offset]
+ } else {
+ fail "Replicas and master offsets were unable to match *exactly*."
+ }
+
+ $replica config resetstat
+
+ catch {
+ # Unlike the test above, here we use SIGTERM. This is just to
+ # increase coverage and let each test use a different shutdown
+ # approach.
+ restart_server 0 true false
+ set master [srv 0 client]
+ }
+ wait_for_condition 50 1000 {
+ [status $replica master_link_status] eq {up} &&
+ [status $sub_replica master_link_status] eq {up}
+ } else {
+ fail "Replicas didn't sync after master restart"
+ }
+
+ # Replication backlog is full
+ assert {[status $master repl_backlog_first_byte_offset] > [status $master second_repl_offset]}
+ assert {[status $master sync_partial_ok] == 0}
+ assert {[status $master sync_full] == 1}
+ assert {[status $master rdb_last_load_keys_expired] == 2048}
+ assert {[status $replica sync_full] == 1}
+
+ set digest [$master debug digest]
+ assert {$digest eq [$replica debug digest]}
+ assert {$digest eq [$sub_replica debug digest]}
+ }
+}}}
diff --git a/tests/integration/psync2-pingoff.tcl b/tests/integration/psync2-pingoff.tcl
new file mode 100644
index 0000000..3589d07
--- /dev/null
+++ b/tests/integration/psync2-pingoff.tcl
@@ -0,0 +1,250 @@
+# These tests were added together with the meaningful offset implementation
+# in redis 6.0.0, which was later abandoned in 6.0.4, they used to test that
+# servers are able to PSYNC with replicas even if the replication stream has
+# PINGs at the end which present in one sever and missing on another.
+# We keep these tests just because they reproduce edge cases in the replication
+# logic in hope they'll be able to spot some problem in the future.
+
+start_server {tags {"psync2 external:skip"}} {
+start_server {} {
+ # Config
+ set debug_msg 0 ; # Enable additional debug messages
+
+ for {set j 0} {$j < 2} {incr j} {
+ set R($j) [srv [expr 0-$j] client]
+ set R_host($j) [srv [expr 0-$j] host]
+ set R_port($j) [srv [expr 0-$j] port]
+ $R($j) CONFIG SET repl-ping-replica-period 1
+ if {$debug_msg} {puts "Log file: [srv [expr 0-$j] stdout]"}
+ }
+
+ # Setup replication
+ test "PSYNC2 pingoff: setup" {
+ $R(1) replicaof $R_host(0) $R_port(0)
+ $R(0) set foo bar
+ wait_for_condition 50 1000 {
+ [status $R(1) master_link_status] == "up" &&
+ [$R(0) dbsize] == 1 && [$R(1) dbsize] == 1
+ } else {
+ fail "Replicas not replicating from master"
+ }
+ }
+
+ test "PSYNC2 pingoff: write and wait replication" {
+ $R(0) INCR counter
+ $R(0) INCR counter
+ $R(0) INCR counter
+ wait_for_condition 50 1000 {
+ [$R(0) GET counter] eq [$R(1) GET counter]
+ } else {
+ fail "Master and replica don't agree about counter"
+ }
+ }
+
+ # In this test we'll make sure the replica will get stuck, but with
+ # an active connection: this way the master will continue to send PINGs
+ # every second (we modified the PING period earlier)
+ test "PSYNC2 pingoff: pause replica and promote it" {
+ $R(1) MULTI
+ $R(1) DEBUG SLEEP 5
+ $R(1) SLAVEOF NO ONE
+ $R(1) EXEC
+ $R(1) ping ; # Wait for it to return back available
+ }
+
+ test "Make the old master a replica of the new one and check conditions" {
+ # We set the new master's ping period to a high value, so that there's
+ # no chance for a race condition of sending a PING in between the two
+ # INFO calls in the assert for master_repl_offset match below.
+ $R(1) CONFIG SET repl-ping-replica-period 1000
+
+ assert_equal [status $R(1) sync_full] 0
+ $R(0) REPLICAOF $R_host(1) $R_port(1)
+
+ wait_for_condition 50 1000 {
+ [status $R(0) master_link_status] == "up"
+ } else {
+ fail "The new master was not able to sync"
+ }
+
+ # make sure replication is still alive and kicking
+ $R(1) incr x
+ wait_for_condition 50 1000 {
+ [status $R(0) loading] == 0 &&
+ [$R(0) get x] == 1
+ } else {
+ fail "replica didn't get incr"
+ }
+ assert_equal [status $R(0) master_repl_offset] [status $R(1) master_repl_offset]
+ }
+}}
+
+
+start_server {tags {"psync2 external:skip"}} {
+start_server {} {
+start_server {} {
+start_server {} {
+start_server {} {
+ test {test various edge cases of repl topology changes with missing pings at the end} {
+ set master [srv -4 client]
+ set master_host [srv -4 host]
+ set master_port [srv -4 port]
+ set replica1 [srv -3 client]
+ set replica2 [srv -2 client]
+ set replica3 [srv -1 client]
+ set replica4 [srv -0 client]
+
+ $replica1 replicaof $master_host $master_port
+ $replica2 replicaof $master_host $master_port
+ $replica3 replicaof $master_host $master_port
+ $replica4 replicaof $master_host $master_port
+ wait_for_condition 50 1000 {
+ [status $master connected_slaves] == 4
+ } else {
+ fail "replicas didn't connect"
+ }
+
+ $master incr x
+ wait_for_condition 50 1000 {
+ [$replica1 get x] == 1 && [$replica2 get x] == 1 &&
+ [$replica3 get x] == 1 && [$replica4 get x] == 1
+ } else {
+ fail "replicas didn't get incr"
+ }
+
+ # disconnect replica1 and replica2
+ # and wait for the master to send a ping to replica3 and replica4
+ $replica1 replicaof no one
+ $replica2 replicaof 127.0.0.1 1 ;# we can't promote it to master since that will cycle the replication id
+ $master config set repl-ping-replica-period 1
+ set replofs [status $master master_repl_offset]
+ wait_for_condition 50 100 {
+ [status $replica3 master_repl_offset] > $replofs &&
+ [status $replica4 master_repl_offset] > $replofs
+ } else {
+ fail "replica didn't sync in time"
+ }
+
+ # make everyone sync from the replica1 that didn't get the last ping from the old master
+ # replica4 will keep syncing from the old master which now syncs from replica1
+ # and replica2 will re-connect to the old master (which went back in time)
+ set new_master_host [srv -3 host]
+ set new_master_port [srv -3 port]
+ $replica3 replicaof $new_master_host $new_master_port
+ $master replicaof $new_master_host $new_master_port
+ $replica2 replicaof $master_host $master_port
+ wait_for_condition 50 1000 {
+ [status $replica2 master_link_status] == "up" &&
+ [status $replica3 master_link_status] == "up" &&
+ [status $replica4 master_link_status] == "up" &&
+ [status $master master_link_status] == "up"
+ } else {
+ fail "replicas didn't connect"
+ }
+
+ # make sure replication is still alive and kicking
+ $replica1 incr x
+ wait_for_condition 50 1000 {
+ [$replica2 get x] == 2 &&
+ [$replica3 get x] == 2 &&
+ [$replica4 get x] == 2 &&
+ [$master get x] == 2
+ } else {
+ fail "replicas didn't get incr"
+ }
+
+ # make sure we have the right amount of full syncs
+ assert_equal [status $master sync_full] 6
+ assert_equal [status $replica1 sync_full] 2
+ assert_equal [status $replica2 sync_full] 0
+ assert_equal [status $replica3 sync_full] 0
+ assert_equal [status $replica4 sync_full] 0
+
+ # force psync
+ $master client kill type master
+ $replica2 client kill type master
+ $replica3 client kill type master
+ $replica4 client kill type master
+
+ # make sure replication is still alive and kicking
+ $replica1 incr x
+ wait_for_condition 50 1000 {
+ [$replica2 get x] == 3 &&
+ [$replica3 get x] == 3 &&
+ [$replica4 get x] == 3 &&
+ [$master get x] == 3
+ } else {
+ fail "replicas didn't get incr"
+ }
+
+ # make sure we have the right amount of full syncs
+ assert_equal [status $master sync_full] 6
+ assert_equal [status $replica1 sync_full] 2
+ assert_equal [status $replica2 sync_full] 0
+ assert_equal [status $replica3 sync_full] 0
+ assert_equal [status $replica4 sync_full] 0
+}
+}}}}}
+
+start_server {tags {"psync2 external:skip"}} {
+start_server {} {
+start_server {} {
+
+ for {set j 0} {$j < 3} {incr j} {
+ set R($j) [srv [expr 0-$j] client]
+ set R_host($j) [srv [expr 0-$j] host]
+ set R_port($j) [srv [expr 0-$j] port]
+ $R($j) CONFIG SET repl-ping-replica-period 1
+ }
+
+ test "Chained replicas disconnect when replica re-connect with the same master" {
+ # Add a second replica as a chained replica of the current replica
+ $R(1) replicaof $R_host(0) $R_port(0)
+ $R(2) replicaof $R_host(1) $R_port(1)
+ wait_for_condition 50 1000 {
+ [status $R(2) master_link_status] == "up"
+ } else {
+ fail "Chained replica not replicating from its master"
+ }
+
+ # Do a write on the master, and wait for the master to
+ # send some PINGs to its replica
+ $R(0) INCR counter2
+ set replofs [status $R(0) master_repl_offset]
+ wait_for_condition 50 100 {
+ [status $R(1) master_repl_offset] > $replofs &&
+ [status $R(2) master_repl_offset] > $replofs
+ } else {
+ fail "replica didn't sync in time"
+ }
+ set sync_partial_master [status $R(0) sync_partial_ok]
+ set sync_partial_replica [status $R(1) sync_partial_ok]
+ $R(0) CONFIG SET repl-ping-replica-period 100
+
+ # Disconnect the master's direct replica
+ $R(0) client kill type replica
+ wait_for_condition 50 1000 {
+ [status $R(1) master_link_status] == "up" &&
+ [status $R(2) master_link_status] == "up" &&
+ [status $R(0) sync_partial_ok] == $sync_partial_master + 1 &&
+ [status $R(1) sync_partial_ok] == $sync_partial_replica
+ } else {
+ fail "Disconnected replica failed to PSYNC with master"
+ }
+
+ # Verify that the replica and its replica's meaningful and real
+ # offsets match with the master
+ assert_equal [status $R(0) master_repl_offset] [status $R(1) master_repl_offset]
+ assert_equal [status $R(0) master_repl_offset] [status $R(2) master_repl_offset]
+
+ # make sure replication is still alive and kicking
+ $R(0) incr counter2
+ wait_for_condition 50 1000 {
+ [$R(1) get counter2] == 2 && [$R(2) get counter2] == 2
+ } else {
+ fail "replicas didn't get incr"
+ }
+ assert_equal [status $R(0) master_repl_offset] [status $R(1) master_repl_offset]
+ assert_equal [status $R(0) master_repl_offset] [status $R(2) master_repl_offset]
+ }
+}}}
diff --git a/tests/integration/psync2-reg.tcl b/tests/integration/psync2-reg.tcl
new file mode 100644
index 0000000..b8dd101
--- /dev/null
+++ b/tests/integration/psync2-reg.tcl
@@ -0,0 +1,82 @@
+# Issue 3899 regression test.
+# We create a chain of three instances: master -> slave -> slave2
+# and continuously break the link while traffic is generated by
+# redis-benchmark. At the end we check that the data is the same
+# everywhere.
+
+start_server {tags {"psync2 external:skip"}} {
+start_server {} {
+start_server {} {
+ # Config
+ set debug_msg 0 ; # Enable additional debug messages
+
+ set no_exit 0 ; # Do not exit at end of the test
+
+ set duration 20 ; # Total test seconds
+
+ for {set j 0} {$j < 3} {incr j} {
+ set R($j) [srv [expr 0-$j] client]
+ set R_host($j) [srv [expr 0-$j] host]
+ set R_port($j) [srv [expr 0-$j] port]
+ set R_unixsocket($j) [srv [expr 0-$j] unixsocket]
+ if {$debug_msg} {puts "Log file: [srv [expr 0-$j] stdout]"}
+ }
+
+ # Setup the replication and backlog parameters
+ test "PSYNC2 #3899 regression: setup" {
+ $R(1) slaveof $R_host(0) $R_port(0)
+ $R(2) slaveof $R_host(0) $R_port(0)
+ $R(0) set foo bar
+ wait_for_condition 50 1000 {
+ [status $R(1) master_link_status] == "up" &&
+ [status $R(2) master_link_status] == "up" &&
+ [$R(1) dbsize] == 1 &&
+ [$R(2) dbsize] == 1
+ } else {
+ fail "Replicas not replicating from master"
+ }
+ $R(0) config set repl-backlog-size 10mb
+ $R(1) config set repl-backlog-size 10mb
+ }
+
+ set cycle_start_time [clock milliseconds]
+ set bench_pid [exec src/redis-benchmark -s $R_unixsocket(0) -n 10000000 -r 1000 incr __rand_int__ > /dev/null &]
+ while 1 {
+ set elapsed [expr {[clock milliseconds]-$cycle_start_time}]
+ if {$elapsed > $duration*1000} break
+ if {rand() < .05} {
+ test "PSYNC2 #3899 regression: kill first replica" {
+ $R(1) client kill type master
+ }
+ }
+ if {rand() < .05} {
+ test "PSYNC2 #3899 regression: kill chained replica" {
+ $R(2) client kill type master
+ }
+ }
+ after 100
+ }
+ exec kill -9 $bench_pid
+
+ if {$debug_msg} {
+ for {set j 0} {$j < 100} {incr j} {
+ if {
+ [$R(0) debug digest] == [$R(1) debug digest] &&
+ [$R(1) debug digest] == [$R(2) debug digest]
+ } break
+ puts [$R(0) debug digest]
+ puts [$R(1) debug digest]
+ puts [$R(2) debug digest]
+ after 1000
+ }
+ }
+
+ test "PSYNC2 #3899 regression: verify consistency" {
+ wait_for_condition 50 1000 {
+ ([$R(0) debug digest] eq [$R(1) debug digest]) &&
+ ([$R(1) debug digest] eq [$R(2) debug digest])
+ } else {
+ fail "The three instances have different data sets"
+ }
+ }
+}}}
diff --git a/tests/integration/psync2.tcl b/tests/integration/psync2.tcl
new file mode 100644
index 0000000..4abe059
--- /dev/null
+++ b/tests/integration/psync2.tcl
@@ -0,0 +1,384 @@
+
+proc show_cluster_status {} {
+ uplevel 1 {
+ # The following is the regexp we use to match the log line
+ # time info. Logs are in the following form:
+ #
+ # 11296:M 25 May 2020 17:37:14.652 # Server initialized
+ set log_regexp {^[0-9]+:[A-Z] [0-9]+ [A-z]+ [0-9]+ ([0-9:.]+) .*}
+ set repl_regexp {(master|repl|sync|backlog|meaningful|offset)}
+
+ puts "Master ID is $master_id"
+ for {set j 0} {$j < 5} {incr j} {
+ puts "$j: sync_full: [status $R($j) sync_full]"
+ puts "$j: id1 : [status $R($j) master_replid]:[status $R($j) master_repl_offset]"
+ puts "$j: id2 : [status $R($j) master_replid2]:[status $R($j) second_repl_offset]"
+ puts "$j: backlog : firstbyte=[status $R($j) repl_backlog_first_byte_offset] len=[status $R($j) repl_backlog_histlen]"
+ puts "$j: x var is : [$R($j) GET x]"
+ puts "---"
+ }
+
+ # Show the replication logs of every instance, interleaving
+ # them by the log date.
+ #
+ # First: load the lines as lists for each instance.
+ array set log {}
+ for {set j 0} {$j < 5} {incr j} {
+ set fd [open $R_log($j)]
+ while {[gets $fd l] >= 0} {
+ if {[regexp $log_regexp $l] &&
+ [regexp -nocase $repl_regexp $l]} {
+ lappend log($j) $l
+ }
+ }
+ close $fd
+ }
+
+ # To interleave the lines, at every step consume the element of
+ # the list with the lowest time and remove it. Do it until
+ # all the lists are empty.
+ #
+ # regexp {^[0-9]+:[A-Z] [0-9]+ [A-z]+ [0-9]+ ([0-9:.]+) .*} $l - logdate
+ while 1 {
+ # Find the log with smallest time.
+ set empty 0
+ set best 0
+ set bestdate {}
+ for {set j 0} {$j < 5} {incr j} {
+ if {[llength $log($j)] == 0} {
+ incr empty
+ continue
+ }
+ regexp $log_regexp [lindex $log($j) 0] - date
+ if {$bestdate eq {}} {
+ set best $j
+ set bestdate $date
+ } else {
+ if {[string compare $bestdate $date] > 0} {
+ set best $j
+ set bestdate $date
+ }
+ }
+ }
+ if {$empty == 5} break ; # Our exit condition: no more logs
+
+ # Emit the one with the smallest time (that is the first
+ # event in the time line).
+ puts "\[$best port $R_port($best)\] [lindex $log($best) 0]"
+ set log($best) [lrange $log($best) 1 end]
+ }
+ }
+}
+
+start_server {tags {"psync2 external:skip"}} {
+start_server {} {
+start_server {} {
+start_server {} {
+start_server {} {
+ set master_id 0 ; # Current master
+ set start_time [clock seconds] ; # Test start time
+ set counter_value 0 ; # Current value of the Redis counter "x"
+
+ # Config
+ set debug_msg 0 ; # Enable additional debug messages
+
+ set no_exit 0 ; # Do not exit at end of the test
+
+ set duration 40 ; # Total test seconds
+
+ set genload 1 ; # Load master with writes at every cycle
+
+ set genload_time 5000 ; # Writes duration time in ms
+
+ set disconnect 1 ; # Break replication link between random
+ # master and slave instances while the
+ # master is loaded with writes.
+
+ set disconnect_period 1000 ; # Disconnect repl link every N ms.
+
+ for {set j 0} {$j < 5} {incr j} {
+ set R($j) [srv [expr 0-$j] client]
+ set R_host($j) [srv [expr 0-$j] host]
+ set R_port($j) [srv [expr 0-$j] port]
+ set R_id_from_port($R_port($j)) $j ;# To get a replica index by port
+ set R_log($j) [srv [expr 0-$j] stdout]
+ if {$debug_msg} {puts "Log file: [srv [expr 0-$j] stdout]"}
+ }
+
+ set cycle 0
+ while {([clock seconds]-$start_time) < $duration} {
+ incr cycle
+ test "PSYNC2: --- CYCLE $cycle ---" {}
+
+ # Create a random replication layout.
+ # Start with switching master (this simulates a failover).
+
+ # 1) Select the new master.
+ set master_id [randomInt 5]
+ set used [list $master_id]
+ test "PSYNC2: \[NEW LAYOUT\] Set #$master_id as master" {
+ $R($master_id) slaveof no one
+ $R($master_id) config set repl-ping-replica-period 1 ;# increase the chance that random ping will cause issues
+ if {$counter_value == 0} {
+ $R($master_id) set x $counter_value
+ }
+ }
+
+ # Build a lookup with the root master of each replica (head of the chain).
+ array set root_master {}
+ for {set j 0} {$j < 5} {incr j} {
+ set r $j
+ while {1} {
+ set r_master_port [status $R($r) master_port]
+ if {$r_master_port == ""} {
+ set root_master($j) $r
+ break
+ }
+ set r_master_id $R_id_from_port($r_master_port)
+ set r $r_master_id
+ }
+ }
+
+ # Wait for the newly detached master-replica chain (new master and existing replicas that were
+ # already connected to it, to get updated on the new replication id.
+ # This is needed to avoid a race that can result in a full sync when a replica that already
+ # got an updated repl id, tries to psync from one that's not yet aware of it.
+ wait_for_condition 50 1000 {
+ ([status $R(0) master_replid] == [status $R($root_master(0)) master_replid]) &&
+ ([status $R(1) master_replid] == [status $R($root_master(1)) master_replid]) &&
+ ([status $R(2) master_replid] == [status $R($root_master(2)) master_replid]) &&
+ ([status $R(3) master_replid] == [status $R($root_master(3)) master_replid]) &&
+ ([status $R(4) master_replid] == [status $R($root_master(4)) master_replid])
+ } else {
+ show_cluster_status
+ fail "Replica did not inherit the new replid."
+ }
+
+ # Build a lookup with the direct connection master of each replica.
+ # First loop that uses random to decide who replicates from who.
+ array set slave_to_master {}
+ while {[llength $used] != 5} {
+ while 1 {
+ set slave_id [randomInt 5]
+ if {[lsearch -exact $used $slave_id] == -1} break
+ }
+ set rand [randomInt [llength $used]]
+ set mid [lindex $used $rand]
+ set slave_to_master($slave_id) $mid
+ lappend used $slave_id
+ }
+
+ # 2) Attach all the slaves to a random instance
+ # Second loop that does the actual SLAVEOF command and make sure execute it in the right order.
+ while {[array size slave_to_master] > 0} {
+ foreach slave_id [array names slave_to_master] {
+ set mid $slave_to_master($slave_id)
+
+ # We only attach the replica to a random instance that already in the old/new chain.
+ if {$root_master($mid) == $root_master($master_id)} {
+ # Find a replica that can be attached to the new chain already attached to the new master.
+ # My new master is in the new chain.
+ } elseif {$root_master($mid) == $root_master($slave_id)} {
+ # My new master and I are in the old chain.
+ } else {
+ # In cycle 1, we do not care about the order.
+ if {$cycle != 1} {
+ # skipping this replica for now to avoid attaching in a bad order
+ # this is done to avoid an unexpected full sync, when we take a
+ # replica that already reconnected to the new chain and got a new replid
+ # and is then set to connect to a master that's still not aware of that new replid
+ continue
+ }
+ }
+
+ set master_host $R_host($master_id)
+ set master_port $R_port($master_id)
+
+ test "PSYNC2: Set #$slave_id to replicate from #$mid" {
+ $R($slave_id) slaveof $master_host $master_port
+ }
+
+ # Wait for replica to be connected before we proceed.
+ wait_for_condition 50 1000 {
+ [status $R($slave_id) master_link_status] == "up"
+ } else {
+ show_cluster_status
+ fail "Replica not reconnecting."
+ }
+
+ set root_master($slave_id) $root_master($mid)
+ unset slave_to_master($slave_id)
+ break
+ }
+ }
+
+ # Wait for replicas to sync. so next loop won't get -LOADING error
+ wait_for_condition 50 1000 {
+ [status $R([expr {($master_id+1)%5}]) master_link_status] == "up" &&
+ [status $R([expr {($master_id+2)%5}]) master_link_status] == "up" &&
+ [status $R([expr {($master_id+3)%5}]) master_link_status] == "up" &&
+ [status $R([expr {($master_id+4)%5}]) master_link_status] == "up"
+ } else {
+ show_cluster_status
+ fail "Replica not reconnecting"
+ }
+
+ # 3) Increment the counter and wait for all the instances
+ # to converge.
+ test "PSYNC2: cluster is consistent after failover" {
+ $R($master_id) incr x; incr counter_value
+ for {set j 0} {$j < 5} {incr j} {
+ wait_for_condition 50 1000 {
+ [$R($j) get x] == $counter_value
+ } else {
+ show_cluster_status
+ fail "Instance #$j x variable is inconsistent"
+ }
+ }
+ }
+
+ # 4) Generate load while breaking the connection of random
+ # slave-master pairs.
+ test "PSYNC2: generate load while killing replication links" {
+ set t [clock milliseconds]
+ set next_break [expr {$t+$disconnect_period}]
+ while {[clock milliseconds]-$t < $genload_time} {
+ if {$genload} {
+ $R($master_id) incr x; incr counter_value
+ }
+ if {[clock milliseconds] == $next_break} {
+ set next_break \
+ [expr {[clock milliseconds]+$disconnect_period}]
+ set slave_id [randomInt 5]
+ if {$disconnect} {
+ $R($slave_id) client kill type master
+ if {$debug_msg} {
+ puts "+++ Breaking link for replica #$slave_id"
+ }
+ }
+ }
+ }
+ }
+
+ # 5) Increment the counter and wait for all the instances
+ set x [$R($master_id) get x]
+ test "PSYNC2: cluster is consistent after load (x = $x)" {
+ for {set j 0} {$j < 5} {incr j} {
+ wait_for_condition 50 1000 {
+ [$R($j) get x] == $counter_value
+ } else {
+ show_cluster_status
+ fail "Instance #$j x variable is inconsistent"
+ }
+ }
+ }
+
+ # wait for all the slaves to be in sync.
+ set masteroff [status $R($master_id) master_repl_offset]
+ wait_for_condition 500 100 {
+ [status $R(0) master_repl_offset] >= $masteroff &&
+ [status $R(1) master_repl_offset] >= $masteroff &&
+ [status $R(2) master_repl_offset] >= $masteroff &&
+ [status $R(3) master_repl_offset] >= $masteroff &&
+ [status $R(4) master_repl_offset] >= $masteroff
+ } else {
+ show_cluster_status
+ fail "Replicas offsets didn't catch up with the master after too long time."
+ }
+
+ if {$debug_msg} {
+ show_cluster_status
+ }
+
+ test "PSYNC2: total sum of full synchronizations is exactly 4" {
+ set sum 0
+ for {set j 0} {$j < 5} {incr j} {
+ incr sum [status $R($j) sync_full]
+ }
+ if {$sum != 4} {
+ show_cluster_status
+ assert {$sum == 4}
+ }
+ }
+
+ # In absence of pings, are the instances really able to have
+ # the exact same offset?
+ $R($master_id) config set repl-ping-replica-period 3600
+ for {set j 0} {$j < 5} {incr j} {
+ if {$j == $master_id} continue
+ $R($j) config set repl-timeout 10000
+ }
+ wait_for_condition 500 100 {
+ [status $R($master_id) master_repl_offset] == [status $R(0) master_repl_offset] &&
+ [status $R($master_id) master_repl_offset] == [status $R(1) master_repl_offset] &&
+ [status $R($master_id) master_repl_offset] == [status $R(2) master_repl_offset] &&
+ [status $R($master_id) master_repl_offset] == [status $R(3) master_repl_offset] &&
+ [status $R($master_id) master_repl_offset] == [status $R(4) master_repl_offset]
+ } else {
+ show_cluster_status
+ fail "Replicas and master offsets were unable to match *exactly*."
+ }
+
+ # Limit anyway the maximum number of cycles. This is useful when the
+ # test is skipped via --only option of the test suite. In that case
+ # we don't want to see many seconds of this test being just skipped.
+ if {$cycle > 50} break
+ }
+
+ test "PSYNC2: Bring the master back again for next test" {
+ $R($master_id) slaveof no one
+ set master_host $R_host($master_id)
+ set master_port $R_port($master_id)
+ for {set j 0} {$j < 5} {incr j} {
+ if {$j == $master_id} continue
+ $R($j) slaveof $master_host $master_port
+ }
+
+ # Wait for replicas to sync. it is not enough to just wait for connected_slaves==4
+ # since we might do the check before the master realized that they're disconnected
+ wait_for_condition 50 1000 {
+ [status $R($master_id) connected_slaves] == 4 &&
+ [status $R([expr {($master_id+1)%5}]) master_link_status] == "up" &&
+ [status $R([expr {($master_id+2)%5}]) master_link_status] == "up" &&
+ [status $R([expr {($master_id+3)%5}]) master_link_status] == "up" &&
+ [status $R([expr {($master_id+4)%5}]) master_link_status] == "up"
+ } else {
+ show_cluster_status
+ fail "Replica not reconnecting"
+ }
+ }
+
+ test "PSYNC2: Partial resync after restart using RDB aux fields" {
+ # Pick a random slave
+ set slave_id [expr {($master_id+1)%5}]
+ set sync_count [status $R($master_id) sync_full]
+ set sync_partial [status $R($master_id) sync_partial_ok]
+ set sync_partial_err [status $R($master_id) sync_partial_err]
+ catch {
+ # Make sure the server saves an RDB on shutdown
+ $R($slave_id) config set save "900 1"
+ $R($slave_id) config rewrite
+ restart_server [expr {0-$slave_id}] true false
+ set R($slave_id) [srv [expr {0-$slave_id}] client]
+ }
+ # note: just waiting for connected_slaves==4 has a race condition since
+ # we might do the check before the master realized that the slave disconnected
+ wait_for_condition 50 1000 {
+ [status $R($master_id) sync_partial_ok] == $sync_partial + 1
+ } else {
+ puts "prev sync_full: $sync_count"
+ puts "prev sync_partial_ok: $sync_partial"
+ puts "prev sync_partial_err: $sync_partial_err"
+ puts [$R($master_id) info stats]
+ show_cluster_status
+ fail "Replica didn't partial sync"
+ }
+ set new_sync_count [status $R($master_id) sync_full]
+ assert {$sync_count == $new_sync_count}
+ }
+
+ if {$no_exit} {
+ while 1 { puts -nonewline .; flush stdout; after 1000}
+ }
+
+}}}}}
diff --git a/tests/integration/rdb.tcl b/tests/integration/rdb.tcl
new file mode 100644
index 0000000..cce2167
--- /dev/null
+++ b/tests/integration/rdb.tcl
@@ -0,0 +1,419 @@
+tags {"rdb external:skip"} {
+
+set server_path [tmpdir "server.rdb-encoding-test"]
+
+# Copy RDB with different encodings in server path
+exec cp tests/assets/encodings.rdb $server_path
+exec cp tests/assets/list-quicklist.rdb $server_path
+
+start_server [list overrides [list "dir" $server_path "dbfilename" "list-quicklist.rdb" save ""]] {
+ test "test old version rdb file" {
+ r select 0
+ assert_equal [r get x] 7
+ assert_encoding listpack list
+ r lpop list
+ } {7}
+}
+
+start_server [list overrides [list "dir" $server_path "dbfilename" "encodings.rdb"]] {
+ test "RDB encoding loading test" {
+ r select 0
+ csvdump r
+ } {"0","compressible","string","aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
+"0","hash","hash","a","1","aa","10","aaa","100","b","2","bb","20","bbb","200","c","3","cc","30","ccc","300","ddd","400","eee","5000000000",
+"0","hash_zipped","hash","a","1","b","2","c","3",
+"0","list","list","1","2","3","a","b","c","100000","6000000000","1","2","3","a","b","c","100000","6000000000","1","2","3","a","b","c","100000","6000000000",
+"0","list_zipped","list","1","2","3","a","b","c","100000","6000000000",
+"0","number","string","10"
+"0","set","set","1","100000","2","3","6000000000","a","b","c",
+"0","set_zipped_1","set","1","2","3","4",
+"0","set_zipped_2","set","100000","200000","300000","400000",
+"0","set_zipped_3","set","1000000000","2000000000","3000000000","4000000000","5000000000","6000000000",
+"0","string","string","Hello World"
+"0","zset","zset","a","1","b","2","c","3","aa","10","bb","20","cc","30","aaa","100","bbb","200","ccc","300","aaaa","1000","cccc","123456789","bbbb","5000000000",
+"0","zset_zipped","zset","a","1","b","2","c","3",
+}
+}
+
+set server_path [tmpdir "server.rdb-startup-test"]
+
+start_server [list overrides [list "dir" $server_path] keep_persistence true] {
+ test {Server started empty with non-existing RDB file} {
+ debug_digest
+ } {0000000000000000000000000000000000000000}
+ # Save an RDB file, needed for the next test.
+ r save
+}
+
+start_server [list overrides [list "dir" $server_path] keep_persistence true] {
+ test {Server started empty with empty RDB file} {
+ debug_digest
+ } {0000000000000000000000000000000000000000}
+}
+
+start_server [list overrides [list "dir" $server_path] keep_persistence true] {
+ test {Test RDB stream encoding} {
+ for {set j 0} {$j < 1000} {incr j} {
+ if {rand() < 0.9} {
+ r xadd stream * foo abc
+ } else {
+ r xadd stream * bar $j
+ }
+ }
+ r xgroup create stream mygroup 0
+ set records [r xreadgroup GROUP mygroup Alice COUNT 2 STREAMS stream >]
+ r xdel stream [lindex [lindex [lindex [lindex $records 0] 1] 1] 0]
+ r xack stream mygroup [lindex [lindex [lindex [lindex $records 0] 1] 0] 0]
+ set digest [debug_digest]
+ r config set sanitize-dump-payload no
+ r debug reload
+ set newdigest [debug_digest]
+ assert {$digest eq $newdigest}
+ }
+ test {Test RDB stream encoding - sanitize dump} {
+ r config set sanitize-dump-payload yes
+ r debug reload
+ set newdigest [debug_digest]
+ assert {$digest eq $newdigest}
+ }
+ # delete the stream, maybe valgrind will find something
+ r del stream
+}
+
+# Helper function to start a server and kill it, just to check the error
+# logged.
+set defaults {}
+proc start_server_and_kill_it {overrides code} {
+ upvar defaults defaults srv srv server_path server_path
+ set config [concat $defaults $overrides]
+ set srv [start_server [list overrides $config keep_persistence true]]
+ uplevel 1 $code
+ kill_server $srv
+}
+
+# Make the RDB file unreadable
+file attributes [file join $server_path dump.rdb] -permissions 0222
+
+# Detect root account (it is able to read the file even with 002 perm)
+set isroot 0
+catch {
+ open [file join $server_path dump.rdb]
+ set isroot 1
+}
+
+# Now make sure the server aborted with an error
+if {!$isroot} {
+ start_server_and_kill_it [list "dir" $server_path] {
+ test {Server should not start if RDB file can't be open} {
+ wait_for_condition 50 100 {
+ [string match {*Fatal error loading*} \
+ [exec tail -1 < [dict get $srv stdout]]]
+ } else {
+ fail "Server started even if RDB was unreadable!"
+ }
+ }
+ }
+}
+
+# Fix permissions of the RDB file.
+file attributes [file join $server_path dump.rdb] -permissions 0666
+
+# Corrupt its CRC64 checksum.
+set filesize [file size [file join $server_path dump.rdb]]
+set fd [open [file join $server_path dump.rdb] r+]
+fconfigure $fd -translation binary
+seek $fd -8 end
+puts -nonewline $fd "foobar00"; # Corrupt the checksum
+close $fd
+
+# Now make sure the server aborted with an error
+start_server_and_kill_it [list "dir" $server_path] {
+ test {Server should not start if RDB is corrupted} {
+ wait_for_condition 50 100 {
+ [string match {*CRC error*} \
+ [exec tail -10 < [dict get $srv stdout]]]
+ } else {
+ fail "Server started even if RDB was corrupted!"
+ }
+ }
+}
+
+start_server {} {
+ test {Test FLUSHALL aborts bgsave} {
+ r config set save ""
+ # 5000 keys with 1ms sleep per key should take 5 second
+ r config set rdb-key-save-delay 1000
+ populate 5000
+ assert_lessthan 999 [s rdb_changes_since_last_save]
+ r bgsave
+ assert_equal [s rdb_bgsave_in_progress] 1
+ r flushall
+ # wait a second max (bgsave should take 5)
+ wait_for_condition 10 100 {
+ [s rdb_bgsave_in_progress] == 0
+ } else {
+ fail "bgsave not aborted"
+ }
+ # verify that bgsave failed, by checking that the change counter is still high
+ assert_lessthan 999 [s rdb_changes_since_last_save]
+ # make sure the server is still writable
+ r set x xx
+ }
+
+ test {bgsave resets the change counter} {
+ r config set rdb-key-save-delay 0
+ r bgsave
+ wait_for_condition 50 100 {
+ [s rdb_bgsave_in_progress] == 0
+ } else {
+ fail "bgsave not done"
+ }
+ assert_equal [s rdb_changes_since_last_save] 0
+ }
+}
+
+test {client freed during loading} {
+ start_server [list overrides [list key-load-delay 50 loading-process-events-interval-bytes 1024 rdbcompression no save "900 1"]] {
+ # create a big rdb that will take long to load. it is important
+ # for keys to be big since the server processes events only once in 2mb.
+ # 100mb of rdb, 100k keys will load in more than 5 seconds
+ r debug populate 100000 key 1000
+
+ restart_server 0 false false
+
+ # make sure it's still loading
+ assert_equal [s loading] 1
+
+ # connect and disconnect 5 clients
+ set clients {}
+ for {set j 0} {$j < 5} {incr j} {
+ lappend clients [redis_deferring_client]
+ }
+ foreach rd $clients {
+ $rd debug log bla
+ }
+ foreach rd $clients {
+ $rd read
+ }
+ foreach rd $clients {
+ $rd close
+ }
+
+ # make sure the server freed the clients
+ wait_for_condition 100 100 {
+ [s connected_clients] < 3
+ } else {
+ fail "clients didn't disconnect"
+ }
+
+ # make sure it's still loading
+ assert_equal [s loading] 1
+
+ # no need to keep waiting for loading to complete
+ exec kill [srv 0 pid]
+ }
+}
+
+start_server {} {
+ test {Test RDB load info} {
+ r debug populate 1000
+ r save
+ assert {[r lastsave] <= [lindex [r time] 0]}
+ restart_server 0 true false
+ wait_done_loading r
+ assert {[s rdb_last_load_keys_expired] == 0}
+ assert {[s rdb_last_load_keys_loaded] == 1000}
+
+ r debug set-active-expire 0
+ for {set j 0} {$j < 1024} {incr j} {
+ r select [expr $j%16]
+ r set $j somevalue px 10
+ }
+ after 20
+
+ r save
+ restart_server 0 true false
+ wait_done_loading r
+ assert {[s rdb_last_load_keys_expired] == 1024}
+ assert {[s rdb_last_load_keys_loaded] == 1000}
+ }
+}
+
+# Our COW metrics (Private_Dirty) work only on Linux
+set system_name [string tolower [exec uname -s]]
+set page_size [exec getconf PAGESIZE]
+if {$system_name eq {linux} && $page_size == 4096} {
+
+start_server {overrides {save ""}} {
+ test {Test child sending info} {
+ # make sure that rdb_last_cow_size and current_cow_size are zero (the test using new server),
+ # so that the comparisons during the test will be valid
+ assert {[s current_cow_size] == 0}
+ assert {[s current_save_keys_processed] == 0}
+ assert {[s current_save_keys_total] == 0}
+
+ assert {[s rdb_last_cow_size] == 0}
+
+ # using a 200us delay, the bgsave is empirically taking about 10 seconds.
+ # we need it to take more than some 5 seconds, since redis only report COW once a second.
+ r config set rdb-key-save-delay 200
+ r config set loglevel debug
+
+ # populate the db with 10k keys of 512B each (since we want to measure the COW size by
+ # changing some keys and read the reported COW size, we are using small key size to prevent from
+ # the "dismiss mechanism" free memory and reduce the COW size)
+ set rd [redis_deferring_client 0]
+ set size 500 ;# aim for the 512 bin (sds overhead)
+ set cmd_count 10000
+ for {set k 0} {$k < $cmd_count} {incr k} {
+ $rd set key$k [string repeat A $size]
+ }
+
+ for {set k 0} {$k < $cmd_count} {incr k} {
+ catch { $rd read }
+ }
+
+ $rd close
+
+ # start background rdb save
+ r bgsave
+
+ set current_save_keys_total [s current_save_keys_total]
+ if {$::verbose} {
+ puts "Keys before bgsave start: $current_save_keys_total"
+ }
+
+ # on each iteration, we will write some key to the server to trigger copy-on-write, and
+ # wait to see that it reflected in INFO.
+ set iteration 1
+ set key_idx 0
+ while 1 {
+ # take samples before writing new data to the server
+ set cow_size [s current_cow_size]
+ if {$::verbose} {
+ puts "COW info before copy-on-write: $cow_size"
+ }
+
+ set keys_processed [s current_save_keys_processed]
+ if {$::verbose} {
+ puts "current_save_keys_processed info : $keys_processed"
+ }
+
+ # trigger copy-on-write
+ set modified_keys 16
+ for {set k 0} {$k < $modified_keys} {incr k} {
+ r setrange key$key_idx 0 [string repeat B $size]
+ incr key_idx 1
+ }
+
+ # changing 16 keys (512B each) will create at least 8192 COW (2 pages), but we don't want the test
+ # to be too strict, so we check for a change of at least 4096 bytes
+ set exp_cow [expr $cow_size + 4096]
+ # wait to see that current_cow_size value updated (as long as the child is in progress)
+ wait_for_condition 80 100 {
+ [s rdb_bgsave_in_progress] == 0 ||
+ [s current_cow_size] >= $exp_cow &&
+ [s current_save_keys_processed] > $keys_processed &&
+ [s current_fork_perc] > 0
+ } else {
+ if {$::verbose} {
+ puts "COW info on fail: [s current_cow_size]"
+ puts [exec tail -n 100 < [srv 0 stdout]]
+ }
+ fail "COW info wasn't reported"
+ }
+
+ # assert that $keys_processed is not greater than total keys.
+ assert_morethan_equal $current_save_keys_total $keys_processed
+
+ # for no accurate, stop after 2 iterations
+ if {!$::accurate && $iteration == 2} {
+ break
+ }
+
+ # stop iterating if the bgsave completed
+ if { [s rdb_bgsave_in_progress] == 0 } {
+ break
+ }
+
+ incr iteration 1
+ }
+
+ # make sure we saw report of current_cow_size
+ if {$iteration < 2 && $::verbose} {
+ puts [exec tail -n 100 < [srv 0 stdout]]
+ }
+ assert_morethan_equal $iteration 2
+
+ # if bgsave completed, check that rdb_last_cow_size (fork exit report)
+ # is at least 90% of last rdb_active_cow_size.
+ if { [s rdb_bgsave_in_progress] == 0 } {
+ set final_cow [s rdb_last_cow_size]
+ set cow_size [expr $cow_size * 0.9]
+ if {$final_cow < $cow_size && $::verbose} {
+ puts [exec tail -n 100 < [srv 0 stdout]]
+ }
+ assert_morethan_equal $final_cow $cow_size
+ }
+ }
+}
+} ;# system_name
+
+exec cp -f tests/assets/scriptbackup.rdb $server_path
+start_server [list overrides [list "dir" $server_path "dbfilename" "scriptbackup.rdb" "appendonly" "no"]] {
+ # the script is: "return redis.call('set', 'foo', 'bar')""
+ # its sha1 is: a0c38691e9fffe4563723c32ba77a34398e090e6
+ test {script won't load anymore if it's in rdb} {
+ assert_equal [r script exists a0c38691e9fffe4563723c32ba77a34398e090e6] 0
+ }
+}
+
+start_server {} {
+ test "failed bgsave prevents writes" {
+ # Make sure the server saves an RDB on shutdown
+ r config set save "900 1"
+
+ r config set rdb-key-save-delay 10000000
+ populate 1000
+ r set x x
+ r bgsave
+ set pid1 [get_child_pid 0]
+ catch {exec kill -9 $pid1}
+ waitForBgsave r
+
+ # make sure a read command succeeds
+ assert_equal [r get x] x
+
+ # make sure a write command fails
+ assert_error {MISCONF *} {r set x y}
+
+ # repeate with script
+ assert_error {MISCONF *} {r eval {
+ return redis.call('set','x',1)
+ } 1 x
+ }
+ assert_equal {x} [r eval {
+ return redis.call('get','x')
+ } 1 x
+ ]
+
+ # again with script using shebang
+ assert_error {MISCONF *} {r eval {#!lua
+ return redis.call('set','x',1)
+ } 1 x
+ }
+ assert_equal {x} [r eval {#!lua flags=no-writes
+ return redis.call('get','x')
+ } 1 x
+ ]
+
+ r config set rdb-key-save-delay 0
+ r bgsave
+ waitForBgsave r
+
+ # server is writable again
+ r set x y
+ } {OK}
+}
+
+} ;# tags
diff --git a/tests/integration/redis-benchmark.tcl b/tests/integration/redis-benchmark.tcl
new file mode 100644
index 0000000..8035632
--- /dev/null
+++ b/tests/integration/redis-benchmark.tcl
@@ -0,0 +1,171 @@
+source tests/support/benchmark.tcl
+
+
+proc cmdstat {cmd} {
+ return [cmdrstat $cmd r]
+}
+
+# common code to reset stats, flush the db and run redis-benchmark
+proc common_bench_setup {cmd} {
+ r config resetstat
+ r flushall
+ if {[catch { exec {*}$cmd } error]} {
+ set first_line [lindex [split $error "\n"] 0]
+ puts [colorstr red "redis-benchmark non zero code. first line: $first_line"]
+ fail "redis-benchmark non zero code. first line: $first_line"
+ }
+}
+
+# we use this extra asserts on a simple set,get test for features like uri parsing
+# and other simple flag related tests
+proc default_set_get_checks {} {
+ assert_match {*calls=10,*} [cmdstat set]
+ assert_match {*calls=10,*} [cmdstat get]
+ # assert one of the non benchmarked commands is not present
+ assert_match {} [cmdstat lrange]
+}
+
+start_server {tags {"benchmark network external:skip logreqres:skip"}} {
+ start_server {} {
+ set master_host [srv 0 host]
+ set master_port [srv 0 port]
+
+ test {benchmark: set,get} {
+ set cmd [redisbenchmark $master_host $master_port "-c 5 -n 10 -t set,get"]
+ common_bench_setup $cmd
+ default_set_get_checks
+ }
+
+ test {benchmark: connecting using URI set,get} {
+ set cmd [redisbenchmarkuri $master_host $master_port "-c 5 -n 10 -t set,get"]
+ common_bench_setup $cmd
+ default_set_get_checks
+ }
+
+ test {benchmark: connecting using URI with authentication set,get} {
+ r config set masterauth pass
+ set cmd [redisbenchmarkuriuserpass $master_host $master_port "default" pass "-c 5 -n 10 -t set,get"]
+ common_bench_setup $cmd
+ default_set_get_checks
+ }
+
+ test {benchmark: full test suite} {
+ set cmd [redisbenchmark $master_host $master_port "-c 10 -n 100"]
+ common_bench_setup $cmd
+
+ # ping total calls are 2*issued commands per test due to PING_INLINE and PING_MBULK
+ assert_match {*calls=200,*} [cmdstat ping]
+ assert_match {*calls=100,*} [cmdstat set]
+ assert_match {*calls=100,*} [cmdstat get]
+ assert_match {*calls=100,*} [cmdstat incr]
+ # lpush total calls are 2*issued commands per test due to the lrange tests
+ assert_match {*calls=200,*} [cmdstat lpush]
+ assert_match {*calls=100,*} [cmdstat rpush]
+ assert_match {*calls=100,*} [cmdstat lpop]
+ assert_match {*calls=100,*} [cmdstat rpop]
+ assert_match {*calls=100,*} [cmdstat sadd]
+ assert_match {*calls=100,*} [cmdstat hset]
+ assert_match {*calls=100,*} [cmdstat spop]
+ assert_match {*calls=100,*} [cmdstat zadd]
+ assert_match {*calls=100,*} [cmdstat zpopmin]
+ assert_match {*calls=400,*} [cmdstat lrange]
+ assert_match {*calls=100,*} [cmdstat mset]
+ # assert one of the non benchmarked commands is not present
+ assert_match {} [cmdstat rpoplpush]
+ }
+
+ test {benchmark: multi-thread set,get} {
+ set cmd [redisbenchmark $master_host $master_port "--threads 10 -c 5 -n 10 -t set,get"]
+ common_bench_setup $cmd
+ default_set_get_checks
+
+ # ensure only one key was populated
+ assert_match {1} [scan [regexp -inline {keys\=([\d]*)} [r info keyspace]] keys=%d]
+ }
+
+ test {benchmark: pipelined full set,get} {
+ set cmd [redisbenchmark $master_host $master_port "-P 5 -c 10 -n 10010 -t set,get"]
+ common_bench_setup $cmd
+ assert_match {*calls=10010,*} [cmdstat set]
+ assert_match {*calls=10010,*} [cmdstat get]
+ # assert one of the non benchmarked commands is not present
+ assert_match {} [cmdstat lrange]
+
+ # ensure only one key was populated
+ assert_match {1} [scan [regexp -inline {keys\=([\d]*)} [r info keyspace]] keys=%d]
+ }
+
+ test {benchmark: arbitrary command} {
+ set cmd [redisbenchmark $master_host $master_port "-c 5 -n 150 INCRBYFLOAT mykey 10.0"]
+ common_bench_setup $cmd
+ assert_match {*calls=150,*} [cmdstat incrbyfloat]
+ # assert one of the non benchmarked commands is not present
+ assert_match {} [cmdstat get]
+
+ # ensure only one key was populated
+ assert_match {1} [scan [regexp -inline {keys\=([\d]*)} [r info keyspace]] keys=%d]
+ }
+
+ test {benchmark: keyspace length} {
+ set cmd [redisbenchmark $master_host $master_port "-r 50 -t set -n 1000"]
+ common_bench_setup $cmd
+ assert_match {*calls=1000,*} [cmdstat set]
+ # assert one of the non benchmarked commands is not present
+ assert_match {} [cmdstat get]
+
+ # ensure the keyspace has the desired size
+ assert_match {50} [scan [regexp -inline {keys\=([\d]*)} [r info keyspace]] keys=%d]
+ }
+
+ test {benchmark: clients idle mode should return error when reached maxclients limit} {
+ set cmd [redisbenchmark $master_host $master_port "-c 10 -I"]
+ set original_maxclients [lindex [r config get maxclients] 1]
+ r config set maxclients 5
+ catch { exec {*}$cmd } error
+ assert_match "*Error*" $error
+ r config set maxclients $original_maxclients
+ }
+
+ # tls specific tests
+ if {$::tls} {
+ test {benchmark: specific tls-ciphers} {
+ set cmd [redisbenchmark $master_host $master_port "-r 50 -t set -n 1000 --tls-ciphers \"DEFAULT:-AES128-SHA256\""]
+ common_bench_setup $cmd
+ assert_match {*calls=1000,*} [cmdstat set]
+ # assert one of the non benchmarked commands is not present
+ assert_match {} [cmdstat get]
+ }
+
+ test {benchmark: tls connecting using URI with authentication set,get} {
+ r config set masterauth pass
+ set cmd [redisbenchmarkuriuserpass $master_host $master_port "default" pass "-c 5 -n 10 -t set,get"]
+ common_bench_setup $cmd
+ default_set_get_checks
+ }
+
+ test {benchmark: specific tls-ciphersuites} {
+ r flushall
+ r config resetstat
+ set ciphersuites_supported 1
+ set cmd [redisbenchmark $master_host $master_port "-r 50 -t set -n 1000 --tls-ciphersuites \"TLS_AES_128_GCM_SHA256\""]
+ if {[catch { exec {*}$cmd } error]} {
+ set first_line [lindex [split $error "\n"] 0]
+ if {[string match "*Invalid option*" $first_line]} {
+ set ciphersuites_supported 0
+ if {$::verbose} {
+ puts "Skipping test, TLSv1.3 not supported."
+ }
+ } else {
+ puts [colorstr red "redis-benchmark non zero code. first line: $first_line"]
+ fail "redis-benchmark non zero code. first line: $first_line"
+ }
+ }
+ if {$ciphersuites_supported} {
+ assert_match {*calls=1000,*} [cmdstat set]
+ # assert one of the non benchmarked commands is not present
+ assert_match {} [cmdstat get]
+ }
+ }
+ }
+ }
+}
diff --git a/tests/integration/redis-cli.tcl b/tests/integration/redis-cli.tcl
new file mode 100644
index 0000000..da82dda
--- /dev/null
+++ b/tests/integration/redis-cli.tcl
@@ -0,0 +1,609 @@
+source tests/support/cli.tcl
+
+if {$::singledb} {
+ set ::dbnum 0
+} else {
+ set ::dbnum 9
+}
+
+start_server {tags {"cli"}} {
+ proc open_cli {{opts ""} {infile ""}} {
+ if { $opts == "" } {
+ set opts "-n $::dbnum"
+ }
+ set ::env(TERM) dumb
+ set cmdline [rediscli [srv host] [srv port] $opts]
+ if {$infile ne ""} {
+ set cmdline "$cmdline < $infile"
+ set mode "r"
+ } else {
+ set mode "r+"
+ }
+ set fd [open "|$cmdline" $mode]
+ fconfigure $fd -buffering none
+ fconfigure $fd -blocking false
+ fconfigure $fd -translation binary
+ set _ $fd
+ }
+
+ proc close_cli {fd} {
+ close $fd
+ }
+
+ proc read_cli {fd} {
+ set ret [read $fd]
+ while {[string length $ret] == 0} {
+ after 10
+ set ret [read $fd]
+ }
+
+ # We may have a short read, try to read some more.
+ set empty_reads 0
+ while {$empty_reads < 5} {
+ set buf [read $fd]
+ if {[string length $buf] == 0} {
+ after 10
+ incr empty_reads
+ } else {
+ append ret $buf
+ set empty_reads 0
+ }
+ }
+ return $ret
+ }
+
+ proc write_cli {fd buf} {
+ puts $fd $buf
+ flush $fd
+ }
+
+ # Helpers to run tests in interactive mode
+
+ proc format_output {output} {
+ set _ [string trimright $output "\n"]
+ }
+
+ proc run_command {fd cmd} {
+ write_cli $fd $cmd
+ set _ [format_output [read_cli $fd]]
+ }
+
+ proc test_interactive_cli {name code} {
+ set ::env(FAKETTY) 1
+ set fd [open_cli]
+ test "Interactive CLI: $name" $code
+ close_cli $fd
+ unset ::env(FAKETTY)
+ }
+
+ proc test_interactive_nontty_cli {name code} {
+ set fd [open_cli]
+ test "Interactive non-TTY CLI: $name" $code
+ close_cli $fd
+ }
+
+ # Helpers to run tests where stdout is not a tty
+ proc write_tmpfile {contents} {
+ set tmp [tmpfile "cli"]
+ set tmpfd [open $tmp "w"]
+ puts -nonewline $tmpfd $contents
+ close $tmpfd
+ set _ $tmp
+ }
+
+ proc _run_cli {host port db opts args} {
+ set cmd [rediscli $host $port [list -n $db {*}$args]]
+ foreach {key value} $opts {
+ if {$key eq "pipe"} {
+ set cmd "sh -c \"$value | $cmd\""
+ }
+ if {$key eq "path"} {
+ set cmd "$cmd < $value"
+ }
+ }
+
+ set fd [open "|$cmd" "r"]
+ fconfigure $fd -buffering none
+ fconfigure $fd -translation binary
+ set resp [read $fd 1048576]
+ close $fd
+ set _ [format_output $resp]
+ }
+
+ proc run_cli {args} {
+ _run_cli [srv host] [srv port] $::dbnum {} {*}$args
+ }
+
+ proc run_cli_with_input_pipe {mode cmd args} {
+ if {$mode == "x" } {
+ _run_cli [srv host] [srv port] $::dbnum [list pipe $cmd] -x {*}$args
+ } elseif {$mode == "X"} {
+ _run_cli [srv host] [srv port] $::dbnum [list pipe $cmd] -X tag {*}$args
+ }
+ }
+
+ proc run_cli_with_input_file {mode path args} {
+ if {$mode == "x" } {
+ _run_cli [srv host] [srv port] $::dbnum [list path $path] -x {*}$args
+ } elseif {$mode == "X"} {
+ _run_cli [srv host] [srv port] $::dbnum [list path $path] -X tag {*}$args
+ }
+ }
+
+ proc run_cli_host_port_db {host port db args} {
+ _run_cli $host $port $db {} {*}$args
+ }
+
+ proc test_nontty_cli {name code} {
+ test "Non-interactive non-TTY CLI: $name" $code
+ }
+
+ # Helpers to run tests where stdout is a tty (fake it)
+ proc test_tty_cli {name code} {
+ set ::env(FAKETTY) 1
+ test "Non-interactive TTY CLI: $name" $code
+ unset ::env(FAKETTY)
+ }
+
+ test_interactive_cli "INFO response should be printed raw" {
+ set lines [split [run_command $fd info] "\n"]
+ foreach line $lines {
+ # Info lines end in \r\n, so they now end in \r.
+ if {![regexp {^\r$|^#|^[^#:]+:} $line]} {
+ fail "Malformed info line: $line"
+ }
+ }
+ }
+
+ test_interactive_cli "Status reply" {
+ assert_equal "OK" [run_command $fd "set key foo"]
+ }
+
+ test_interactive_cli "Integer reply" {
+ assert_equal "(integer) 1" [run_command $fd "incr counter"]
+ }
+
+ test_interactive_cli "Bulk reply" {
+ r set key foo
+ assert_equal "\"foo\"" [run_command $fd "get key"]
+ }
+
+ test_interactive_cli "Multi-bulk reply" {
+ r rpush list foo
+ r rpush list bar
+ assert_equal "1) \"foo\"\n2) \"bar\"" [run_command $fd "lrange list 0 -1"]
+ }
+
+ test_interactive_cli "Parsing quotes" {
+ assert_equal "OK" [run_command $fd "set key \"bar\""]
+ assert_equal "bar" [r get key]
+ assert_equal "OK" [run_command $fd "set key \" bar \""]
+ assert_equal " bar " [r get key]
+ assert_equal "OK" [run_command $fd "set key \"\\\"bar\\\"\""]
+ assert_equal "\"bar\"" [r get key]
+ assert_equal "OK" [run_command $fd "set key \"\tbar\t\""]
+ assert_equal "\tbar\t" [r get key]
+
+ # invalid quotation
+ assert_equal "Invalid argument(s)" [run_command $fd "get \"\"key"]
+ assert_equal "Invalid argument(s)" [run_command $fd "get \"key\"x"]
+
+ # quotes after the argument are weird, but should be allowed
+ assert_equal "OK" [run_command $fd "set key\"\" bar"]
+ assert_equal "bar" [r get key]
+ }
+
+ test_interactive_cli "Subscribed mode" {
+ if {$::force_resp3} {
+ run_command $fd "hello 3"
+ }
+
+ set reading "Reading messages... (press Ctrl-C to quit or any key to type command)\r"
+ set erase "\033\[K"; # Erases the "Reading messages..." line.
+
+ # Subscribe to some channels.
+ set sub1 "1) \"subscribe\"\n2) \"ch1\"\n3) (integer) 1\n"
+ set sub2 "1) \"subscribe\"\n2) \"ch2\"\n3) (integer) 2\n"
+ set sub3 "1) \"subscribe\"\n2) \"ch3\"\n3) (integer) 3\n"
+ assert_equal $sub1$sub2$sub3$reading \
+ [run_command $fd "subscribe ch1 ch2 ch3"]
+
+ # Receive pubsub message.
+ r publish ch2 hello
+ set message "1) \"message\"\n2) \"ch2\"\n3) \"hello\"\n"
+ assert_equal $erase$message$reading [read_cli $fd]
+
+ # Unsubscribe some.
+ set unsub1 "1) \"unsubscribe\"\n2) \"ch1\"\n3) (integer) 2\n"
+ set unsub2 "1) \"unsubscribe\"\n2) \"ch2\"\n3) (integer) 1\n"
+ assert_equal $erase$unsub1$unsub2$reading \
+ [run_command $fd "unsubscribe ch1 ch2"]
+
+ run_command $fd "hello 2"
+
+ # Command forbidden in subscribed mode (RESP2).
+ set err "(error) ERR Can't execute 'get': only (P|S)SUBSCRIBE / (P|S)UNSUBSCRIBE / PING / QUIT / RESET are allowed in this context\n"
+ assert_equal $erase$err$reading [run_command $fd "get k"]
+
+ # Command allowed in subscribed mode.
+ set pong "1) \"pong\"\n2) \"\"\n"
+ assert_equal $erase$pong$reading [run_command $fd "ping"]
+
+ # Reset exits subscribed mode.
+ assert_equal ${erase}RESET [run_command $fd "reset"]
+ assert_equal PONG [run_command $fd "ping"]
+
+ # Check TTY output of push messages in RESP3 has ")" prefix (to be changed to ">" in the future).
+ assert_match "1#*" [run_command $fd "hello 3"]
+ set sub1 "1) \"subscribe\"\n2) \"ch1\"\n3) (integer) 1\n"
+ assert_equal $sub1$reading \
+ [run_command $fd "subscribe ch1"]
+ }
+
+ test_interactive_nontty_cli "Subscribed mode" {
+ # Raw output and no "Reading messages..." info message.
+ # Use RESP3 in this test case.
+ assert_match {*proto 3*} [run_command $fd "hello 3"]
+
+ # Subscribe to some channels.
+ set sub1 "subscribe\nch1\n1"
+ set sub2 "subscribe\nch2\n2"
+ assert_equal $sub1\n$sub2 \
+ [run_command $fd "subscribe ch1 ch2"]
+
+ assert_equal OK [run_command $fd "client tracking on"]
+ assert_equal OK [run_command $fd "set k 42"]
+ assert_equal 42 [run_command $fd "get k"]
+
+ # Interleaving invalidate and pubsub messages.
+ r publish ch1 hello
+ r del k
+ r publish ch2 world
+ set message1 "message\nch1\nhello"
+ set invalidate "invalidate\nk"
+ set message2 "message\nch2\nworld"
+ assert_equal $message1\n$invalidate\n$message2\n [read_cli $fd]
+
+ # Unsubscribe all.
+ set unsub1 "unsubscribe\nch1\n1"
+ set unsub2 "unsubscribe\nch2\n0"
+ assert_equal $unsub1\n$unsub2 [run_command $fd "unsubscribe ch1 ch2"]
+ }
+
+ test_tty_cli "Status reply" {
+ assert_equal "OK" [run_cli set key bar]
+ assert_equal "bar" [r get key]
+ }
+
+ test_tty_cli "Integer reply" {
+ r del counter
+ assert_equal "(integer) 1" [run_cli incr counter]
+ }
+
+ test_tty_cli "Bulk reply" {
+ r set key "tab\tnewline\n"
+ assert_equal "\"tab\\tnewline\\n\"" [run_cli get key]
+ }
+
+ test_tty_cli "Multi-bulk reply" {
+ r del list
+ r rpush list foo
+ r rpush list bar
+ assert_equal "1) \"foo\"\n2) \"bar\"" [run_cli lrange list 0 -1]
+ }
+
+ test_tty_cli "Read last argument from pipe" {
+ assert_equal "OK" [run_cli_with_input_pipe x "echo foo" set key]
+ assert_equal "foo\n" [r get key]
+
+ assert_equal "OK" [run_cli_with_input_pipe X "echo foo" set key2 tag]
+ assert_equal "foo\n" [r get key2]
+ }
+
+ test_tty_cli "Read last argument from file" {
+ set tmpfile [write_tmpfile "from file"]
+
+ assert_equal "OK" [run_cli_with_input_file x $tmpfile set key]
+ assert_equal "from file" [r get key]
+
+ assert_equal "OK" [run_cli_with_input_file X $tmpfile set key2 tag]
+ assert_equal "from file" [r get key2]
+
+ file delete $tmpfile
+ }
+
+ test_tty_cli "Escape character in JSON mode" {
+ # reverse solidus
+ r hset solidus \/ \/
+ assert_equal \/ \/ [run_cli hgetall solidus]
+ set escaped_reverse_solidus \"\\"
+ assert_equal $escaped_reverse_solidus $escaped_reverse_solidus [run_cli --json hgetall \/]
+ # non printable (0xF0 in ISO-8859-1, not UTF-8(0xC3 0xB0))
+ set eth "\u00f0\u0065"
+ r hset eth test $eth
+ assert_equal \"\\xf0e\" [run_cli hget eth test]
+ assert_equal \"\u00f0e\" [run_cli --json hget eth test]
+ assert_equal \"\\\\xf0e\" [run_cli --quoted-json hget eth test]
+ # control characters
+ r hset control test "Hello\x00\x01\x02\x03World"
+ assert_equal \"Hello\\u0000\\u0001\\u0002\\u0003World" [run_cli --json hget control test]
+ # non-string keys
+ r hset numkey 1 One
+ assert_equal \{\"1\":\"One\"\} [run_cli --json hgetall numkey]
+ # non-string, non-printable keys
+ r hset npkey "K\u0000\u0001ey" "V\u0000\u0001alue"
+ assert_equal \{\"K\\u0000\\u0001ey\":\"V\\u0000\\u0001alue\"\} [run_cli --json hgetall npkey]
+ assert_equal \{\"K\\\\x00\\\\x01ey\":\"V\\\\x00\\\\x01alue\"\} [run_cli --quoted-json hgetall npkey]
+ }
+
+ test_nontty_cli "Status reply" {
+ assert_equal "OK" [run_cli set key bar]
+ assert_equal "bar" [r get key]
+ }
+
+ test_nontty_cli "Integer reply" {
+ r del counter
+ assert_equal "1" [run_cli incr counter]
+ }
+
+ test_nontty_cli "Bulk reply" {
+ r set key "tab\tnewline\n"
+ assert_equal "tab\tnewline" [run_cli get key]
+ }
+
+ test_nontty_cli "Multi-bulk reply" {
+ r del list
+ r rpush list foo
+ r rpush list bar
+ assert_equal "foo\nbar" [run_cli lrange list 0 -1]
+ }
+
+if {!$::tls} { ;# fake_redis_node doesn't support TLS
+ test_nontty_cli "ASK redirect test" {
+ # Set up two fake Redis nodes.
+ set tclsh [info nameofexecutable]
+ set script "tests/helpers/fake_redis_node.tcl"
+ set port1 [find_available_port $::baseport $::portcount]
+ set port2 [find_available_port $::baseport $::portcount]
+ set p1 [exec $tclsh $script $port1 \
+ "SET foo bar" "-ASK 12182 127.0.0.1:$port2" &]
+ set p2 [exec $tclsh $script $port2 \
+ "ASKING" "+OK" \
+ "SET foo bar" "+OK" &]
+ # Make sure both fake nodes have started listening
+ wait_for_condition 50 50 {
+ [catch {close [socket "127.0.0.1" $port1]}] == 0 && \
+ [catch {close [socket "127.0.0.1" $port2]}] == 0
+ } else {
+ fail "Failed to start fake Redis nodes"
+ }
+ # Run the cli
+ assert_equal "OK" [run_cli_host_port_db "127.0.0.1" $port1 0 -c SET foo bar]
+ }
+}
+
+ test_nontty_cli "Quoted input arguments" {
+ r set "\x00\x00" "value"
+ assert_equal "value" [run_cli --quoted-input get {"\x00\x00"}]
+ }
+
+ test_nontty_cli "No accidental unquoting of input arguments" {
+ run_cli --quoted-input set {"\x41\x41"} quoted-val
+ run_cli set {"\x41\x41"} unquoted-val
+ assert_equal "quoted-val" [r get AA]
+ assert_equal "unquoted-val" [r get {"\x41\x41"}]
+ }
+
+ test_nontty_cli "Invalid quoted input arguments" {
+ catch {run_cli --quoted-input set {"Unterminated}} err
+ assert_match {*exited abnormally*} $err
+
+ # A single arg that unquotes to two arguments is also not expected
+ catch {run_cli --quoted-input set {"arg1" "arg2"}} err
+ assert_match {*exited abnormally*} $err
+ }
+
+ test_nontty_cli "Read last argument from pipe" {
+ assert_equal "OK" [run_cli_with_input_pipe x "echo foo" set key]
+ assert_equal "foo\n" [r get key]
+
+ assert_equal "OK" [run_cli_with_input_pipe X "echo foo" set key2 tag]
+ assert_equal "foo\n" [r get key2]
+ }
+
+ test_nontty_cli "Read last argument from file" {
+ set tmpfile [write_tmpfile "from file"]
+
+ assert_equal "OK" [run_cli_with_input_file x $tmpfile set key]
+ assert_equal "from file" [r get key]
+
+ assert_equal "OK" [run_cli_with_input_file X $tmpfile set key2 tag]
+ assert_equal "from file" [r get key2]
+
+ file delete $tmpfile
+ }
+
+ test_nontty_cli "Test command-line hinting - latest server" {
+ # cli will connect to the running server and will use COMMAND DOCS
+ catch {run_cli --test_hint_file tests/assets/test_cli_hint_suite.txt} output
+ assert_match "*SUCCESS*" $output
+ }
+
+ test_nontty_cli "Test command-line hinting - no server" {
+ # cli will fail to connect to the server and will use the cached commands.c
+ catch {run_cli -p 123 --test_hint_file tests/assets/test_cli_hint_suite.txt} output
+ assert_match "*SUCCESS*" $output
+ }
+
+ test_nontty_cli "Test command-line hinting - old server" {
+ # cli will connect to the server but will not use COMMAND DOCS,
+ # and complete the missing info from the cached commands.c
+ r ACL setuser clitest on nopass +@all -command|docs
+ catch {run_cli --user clitest -a nopass --no-auth-warning --test_hint_file tests/assets/test_cli_hint_suite.txt} output
+ assert_match "*SUCCESS*" $output
+ r acl deluser clitest
+ }
+
+ proc test_redis_cli_rdb_dump {functions_only} {
+ r flushdb
+ r function flush
+
+ set dir [lindex [r config get dir] 1]
+
+ assert_equal "OK" [r debug populate 100000 key 1000]
+ assert_equal "lib1" [r function load "#!lua name=lib1\nredis.register_function('func1', function() return 123 end)"]
+ if {$functions_only} {
+ set args "--functions-rdb $dir/cli.rdb"
+ } else {
+ set args "--rdb $dir/cli.rdb"
+ }
+ catch {run_cli {*}$args} output
+ assert_match {*Transfer finished with success*} $output
+
+ file delete "$dir/dump.rdb"
+ file rename "$dir/cli.rdb" "$dir/dump.rdb"
+
+ assert_equal "OK" [r set should-not-exist 1]
+ assert_equal "should_not_exist_func" [r function load "#!lua name=should_not_exist_func\nredis.register_function('should_not_exist_func', function() return 456 end)"]
+ assert_equal "OK" [r debug reload nosave]
+ assert_equal {} [r get should-not-exist]
+ assert_equal {{library_name lib1 engine LUA functions {{name func1 description {} flags {}}}}} [r function list]
+ if {$functions_only} {
+ assert_equal 0 [r dbsize]
+ } else {
+ assert_equal 100000 [r dbsize]
+ }
+ }
+
+ foreach {functions_only} {no yes} {
+
+ test "Dumping an RDB - functions only: $functions_only" {
+ # Disk-based master
+ assert_match "OK" [r config set repl-diskless-sync no]
+ test_redis_cli_rdb_dump $functions_only
+
+ # Disk-less master
+ assert_match "OK" [r config set repl-diskless-sync yes]
+ assert_match "OK" [r config set repl-diskless-sync-delay 0]
+ test_redis_cli_rdb_dump $functions_only
+ } {} {needs:repl needs:debug}
+
+ } ;# foreach functions_only
+
+ test "Scan mode" {
+ r flushdb
+ populate 1000 key: 1
+
+ # basic use
+ assert_equal 1000 [llength [split [run_cli --scan]]]
+
+ # pattern
+ assert_equal {key:2} [run_cli --scan --pattern "*:2"]
+
+ # pattern matching with a quoted string
+ assert_equal {key:2} [run_cli --scan --quoted-pattern {"*:\x32"}]
+ }
+
+ proc test_redis_cli_repl {} {
+ set fd [open_cli "--replica"]
+ wait_for_condition 500 100 {
+ [string match {*slave0:*state=online*} [r info]]
+ } else {
+ fail "redis-cli --replica did not connect"
+ }
+
+ for {set i 0} {$i < 100} {incr i} {
+ r set test-key test-value-$i
+ }
+
+ wait_for_condition 500 100 {
+ [string match {*test-value-99*} [read_cli $fd]]
+ } else {
+ fail "redis-cli --replica didn't read commands"
+ }
+
+ fconfigure $fd -blocking true
+ r client kill type slave
+ catch { close_cli $fd } err
+ assert_match {*Server closed the connection*} $err
+ }
+
+ test "Connecting as a replica" {
+ # Disk-based master
+ assert_match "OK" [r config set repl-diskless-sync no]
+ test_redis_cli_repl
+
+ # Disk-less master
+ assert_match "OK" [r config set repl-diskless-sync yes]
+ assert_match "OK" [r config set repl-diskless-sync-delay 0]
+ test_redis_cli_repl
+ } {} {needs:repl}
+
+ test "Piping raw protocol" {
+ set cmds [tmpfile "cli_cmds"]
+ set cmds_fd [open $cmds "w"]
+
+ set cmds_count 2101
+
+ if {!$::singledb} {
+ puts $cmds_fd [formatCommand select 9]
+ incr cmds_count
+ }
+ puts $cmds_fd [formatCommand del test-counter]
+
+ for {set i 0} {$i < 1000} {incr i} {
+ puts $cmds_fd [formatCommand incr test-counter]
+ puts $cmds_fd [formatCommand set large-key [string repeat "x" 20000]]
+ }
+
+ for {set i 0} {$i < 100} {incr i} {
+ puts $cmds_fd [formatCommand set very-large-key [string repeat "x" 512000]]
+ }
+ close $cmds_fd
+
+ set cli_fd [open_cli "--pipe" $cmds]
+ fconfigure $cli_fd -blocking true
+ set output [read_cli $cli_fd]
+
+ assert_equal {1000} [r get test-counter]
+ assert_match "*All data transferred*errors: 0*replies: ${cmds_count}*" $output
+
+ file delete $cmds
+ }
+
+ test "Options -X with illegal argument" {
+ assert_error "*-x and -X are mutually exclusive*" {run_cli -x -X tag}
+
+ assert_error "*Unrecognized option or bad number*" {run_cli -X}
+
+ assert_error "*tag not match*" {run_cli_with_input_pipe X "echo foo" set key wrong_tag}
+ }
+
+ test "DUMP RESTORE with -x option" {
+ set cmdline [rediscli [srv host] [srv port]]
+
+ exec {*}$cmdline DEL set new_set
+ exec {*}$cmdline SADD set 1 2 3 4 5 6
+ assert_equal 6 [exec {*}$cmdline SCARD set]
+
+ assert_equal "OK" [exec {*}$cmdline -D "" --raw DUMP set | \
+ {*}$cmdline -x RESTORE new_set 0]
+
+ assert_equal 6 [exec {*}$cmdline SCARD new_set]
+ assert_equal "1\n2\n3\n4\n5\n6" [exec {*}$cmdline SMEMBERS new_set]
+ }
+
+ test "DUMP RESTORE with -X option" {
+ set cmdline [rediscli [srv host] [srv port]]
+
+ exec {*}$cmdline DEL zset new_zset
+ exec {*}$cmdline ZADD zset 1 a 2 b 3 c
+ assert_equal 3 [exec {*}$cmdline ZCARD zset]
+
+ assert_equal "OK" [exec {*}$cmdline -D "" --raw DUMP zset | \
+ {*}$cmdline -X dump_tag RESTORE new_zset 0 dump_tag REPLACE]
+
+ assert_equal 3 [exec {*}$cmdline ZCARD new_zset]
+ assert_equal "a\n1\nb\n2\nc\n3" [exec {*}$cmdline ZRANGE new_zset 0 -1 WITHSCORES]
+ }
+}
diff --git a/tests/integration/replication-2.tcl b/tests/integration/replication-2.tcl
new file mode 100644
index 0000000..c18ff24
--- /dev/null
+++ b/tests/integration/replication-2.tcl
@@ -0,0 +1,93 @@
+start_server {tags {"repl external:skip"}} {
+ start_server {} {
+ test {First server should have role slave after SLAVEOF} {
+ r -1 slaveof [srv 0 host] [srv 0 port]
+ wait_replica_online r
+ wait_for_condition 50 100 {
+ [s -1 master_link_status] eq {up}
+ } else {
+ fail "Replication not started."
+ }
+ }
+
+ test {If min-slaves-to-write is honored, write is accepted} {
+ r config set min-slaves-to-write 1
+ r config set min-slaves-max-lag 10
+ r set foo 12345
+ wait_for_condition 50 100 {
+ [r -1 get foo] eq {12345}
+ } else {
+ fail "Write did not reached replica"
+ }
+ }
+
+ test {No write if min-slaves-to-write is < attached slaves} {
+ r config set min-slaves-to-write 2
+ r config set min-slaves-max-lag 10
+ catch {r set foo 12345} err
+ set err
+ } {NOREPLICAS*}
+
+ test {If min-slaves-to-write is honored, write is accepted (again)} {
+ r config set min-slaves-to-write 1
+ r config set min-slaves-max-lag 10
+ r set foo 12345
+ wait_for_condition 50 100 {
+ [r -1 get foo] eq {12345}
+ } else {
+ fail "Write did not reached replica"
+ }
+ }
+
+ test {No write if min-slaves-max-lag is > of the slave lag} {
+ r config set min-slaves-to-write 1
+ r config set min-slaves-max-lag 2
+ pause_process [srv -1 pid]
+ assert {[r set foo 12345] eq {OK}}
+ wait_for_condition 100 100 {
+ [catch {r set foo 12345}] != 0
+ } else {
+ fail "Master didn't become readonly"
+ }
+ catch {r set foo 12345} err
+ assert_match {NOREPLICAS*} $err
+ }
+ resume_process [srv -1 pid]
+
+ test {min-slaves-to-write is ignored by slaves} {
+ r config set min-slaves-to-write 1
+ r config set min-slaves-max-lag 10
+ r -1 config set min-slaves-to-write 1
+ r -1 config set min-slaves-max-lag 10
+ r set foo aaabbb
+ wait_for_condition 50 100 {
+ [r -1 get foo] eq {aaabbb}
+ } else {
+ fail "Write did not reached replica"
+ }
+ }
+
+ # Fix parameters for the next test to work
+ r config set min-slaves-to-write 0
+ r -1 config set min-slaves-to-write 0
+ r flushall
+
+ test {MASTER and SLAVE dataset should be identical after complex ops} {
+ createComplexDataset r 10000
+ after 500
+ if {[r debug digest] ne [r -1 debug digest]} {
+ set csv1 [csvdump r]
+ set csv2 [csvdump {r -1}]
+ set fd [open /tmp/repldump1.txt w]
+ puts -nonewline $fd $csv1
+ close $fd
+ set fd [open /tmp/repldump2.txt w]
+ puts -nonewline $fd $csv2
+ close $fd
+ puts "Master - Replica inconsistency"
+ puts "Run diff -u against /tmp/repldump*.txt for more info"
+ }
+ assert_equal [r debug digest] [r -1 debug digest]
+ }
+ }
+}
diff --git a/tests/integration/replication-3.tcl b/tests/integration/replication-3.tcl
new file mode 100644
index 0000000..f53a05a
--- /dev/null
+++ b/tests/integration/replication-3.tcl
@@ -0,0 +1,130 @@
+start_server {tags {"repl external:skip"}} {
+ start_server {} {
+ test {First server should have role slave after SLAVEOF} {
+ r -1 slaveof [srv 0 host] [srv 0 port]
+ wait_for_condition 50 100 {
+ [s -1 master_link_status] eq {up}
+ } else {
+ fail "Replication not started."
+ }
+ }
+
+ if {$::accurate} {set numops 50000} else {set numops 5000}
+
+ test {MASTER and SLAVE consistency with expire} {
+ createComplexDataset r $numops useexpire
+
+ # Make sure everything expired before taking the digest
+ # createComplexDataset uses max expire time of 2 seconds
+ wait_for_condition 50 100 {
+ 0 == [scan [regexp -inline {expires\=([\d]*)} [r -1 info keyspace]] expires=%d]
+ } else {
+ fail "expire didn't end"
+ }
+
+ # make sure the replica got all the DELs
+ wait_for_ofs_sync [srv 0 client] [srv -1 client]
+
+ if {[r debug digest] ne [r -1 debug digest]} {
+ set csv1 [csvdump r]
+ set csv2 [csvdump {r -1}]
+ set fd [open /tmp/repldump1.txt w]
+ puts -nonewline $fd $csv1
+ close $fd
+ set fd [open /tmp/repldump2.txt w]
+ puts -nonewline $fd $csv2
+ close $fd
+ puts "Master - Replica inconsistency"
+ puts "Run diff -u against /tmp/repldump*.txt for more info"
+ }
+ assert_equal [r debug digest] [r -1 debug digest]
+ }
+
+ test {Master can replicate command longer than client-query-buffer-limit on replica} {
+ # Configure the master to have a bigger query buffer limit
+ r config set client-query-buffer-limit 2000000
+ r -1 config set client-query-buffer-limit 1048576
+ # Write a very large command onto the master
+ r set key [string repeat "x" 1100000]
+ wait_for_condition 300 100 {
+ [r -1 get key] eq [string repeat "x" 1100000]
+ } else {
+ fail "Unable to replicate command longer than client-query-buffer-limit"
+ }
+ }
+
+ test {Slave is able to evict keys created in writable slaves} {
+ r -1 select 5
+ assert {[r -1 dbsize] == 0}
+ r -1 config set slave-read-only no
+ r -1 set key1 1 ex 5
+ r -1 set key2 2 ex 5
+ r -1 set key3 3 ex 5
+ assert {[r -1 dbsize] == 3}
+ after 6000
+ r -1 dbsize
+ } {0}
+
+ test {Writable replica doesn't return expired keys} {
+ r select 5
+ assert {[r dbsize] == 0}
+ r debug set-active-expire 0
+ r set key1 5 px 10
+ r set key2 5 px 10
+ r -1 select 5
+ wait_for_condition 50 100 {
+ [r -1 dbsize] == 2 && [r -1 exists key1 key2] == 0
+ } else {
+ fail "Keys didn't replicate or didn't expire."
+ }
+ r -1 config set slave-read-only no
+ assert_equal 2 [r -1 dbsize] ; # active expire is off
+ assert_equal 1 [r -1 incr key1] ; # incr expires and re-creates key1
+ assert_equal -1 [r -1 ttl key1] ; # incr created key1 without TTL
+ assert_equal {} [r -1 get key2] ; # key2 expired but not deleted
+ assert_equal 2 [r -1 dbsize]
+ # cleanup
+ r debug set-active-expire 1
+ r -1 del key1 key2
+ r -1 config set slave-read-only yes
+ r del key1 key2
+ }
+
+ test {PFCOUNT updates cache on readonly replica} {
+ r select 5
+ assert {[r dbsize] == 0}
+ r pfadd key a b c d e f g h i j k l m n o p q
+ set strval [r get key]
+ r -1 select 5
+ wait_for_condition 50 100 {
+ [r -1 dbsize] == 1
+ } else {
+ fail "Replication timeout."
+ }
+ assert {$strval == [r -1 get key]}
+ assert_equal 17 [r -1 pfcount key]
+ assert {$strval != [r -1 get key]}; # cache updated
+ # cleanup
+ r del key
+ }
+
+ test {PFCOUNT doesn't use expired key on readonly replica} {
+ r select 5
+ assert {[r dbsize] == 0}
+ r debug set-active-expire 0
+ r pfadd key a b c d e f g h i j k l m n o p q
+ r pexpire key 10
+ r -1 select 5
+ wait_for_condition 50 100 {
+ [r -1 dbsize] == 1 && [r -1 exists key] == 0
+ } else {
+ fail "Key didn't replicate or didn't expire."
+ }
+ assert_equal [r -1 pfcount key] 0 ; # expired key not used
+ assert_equal [r -1 dbsize] 1 ; # but it's also not deleted
+ # cleanup
+ r debug set-active-expire 1
+ r del key
+ }
+ }
+}
diff --git a/tests/integration/replication-4.tcl b/tests/integration/replication-4.tcl
new file mode 100644
index 0000000..4370080
--- /dev/null
+++ b/tests/integration/replication-4.tcl
@@ -0,0 +1,295 @@
+start_server {tags {"repl network external:skip singledb:skip"} overrides {save {}}} {
+ start_server { overrides {save {}}} {
+
+ set master [srv -1 client]
+ set master_host [srv -1 host]
+ set master_port [srv -1 port]
+ set slave [srv 0 client]
+
+ set load_handle0 [start_bg_complex_data $master_host $master_port 9 100000]
+ set load_handle1 [start_bg_complex_data $master_host $master_port 11 100000]
+ set load_handle2 [start_bg_complex_data $master_host $master_port 12 100000]
+
+ test {First server should have role slave after SLAVEOF} {
+ $slave slaveof $master_host $master_port
+ wait_for_condition 50 100 {
+ [s 0 role] eq {slave}
+ } else {
+ fail "Replication not started."
+ }
+ }
+
+ test {Test replication with parallel clients writing in different DBs} {
+ # Gives the random workloads a chance to add some complex commands.
+ after 5000
+
+ # Make sure all parallel clients have written data.
+ wait_for_condition 1000 50 {
+ [$master select 9] == {OK} && [$master dbsize] > 0 &&
+ [$master select 11] == {OK} && [$master dbsize] > 0 &&
+ [$master select 12] == {OK} && [$master dbsize] > 0
+ } else {
+ fail "Parallel clients are not writing in different DBs."
+ }
+
+ stop_bg_complex_data $load_handle0
+ stop_bg_complex_data $load_handle1
+ stop_bg_complex_data $load_handle2
+ wait_for_condition 100 100 {
+ [$master debug digest] == [$slave debug digest]
+ } else {
+ set csv1 [csvdump r]
+ set csv2 [csvdump {r -1}]
+ set fd [open /tmp/repldump1.txt w]
+ puts -nonewline $fd $csv1
+ close $fd
+ set fd [open /tmp/repldump2.txt w]
+ puts -nonewline $fd $csv2
+ close $fd
+ fail "Master - Replica inconsistency, Run diff -u against /tmp/repldump*.txt for more info"
+ }
+ }
+ }
+}
+
+start_server {tags {"repl external:skip"}} {
+ start_server {} {
+ set master [srv -1 client]
+ set master_host [srv -1 host]
+ set master_port [srv -1 port]
+ set slave [srv 0 client]
+
+ # Load some functions to be used later
+ $master FUNCTION load replace {#!lua name=test
+ redis.register_function{function_name='f_default_flags', callback=function(keys, args) return redis.call('get',keys[1]) end, flags={}}
+ redis.register_function{function_name='f_no_writes', callback=function(keys, args) return redis.call('get',keys[1]) end, flags={'no-writes'}}
+ }
+
+ test {First server should have role slave after SLAVEOF} {
+ $slave slaveof $master_host $master_port
+ wait_replica_online $master
+ }
+
+ test {With min-slaves-to-write (1,3): master should be writable} {
+ $master config set min-slaves-max-lag 3
+ $master config set min-slaves-to-write 1
+ assert_equal OK [$master set foo 123]
+ assert_equal OK [$master eval "return redis.call('set','foo',12345)" 0]
+ }
+
+ test {With min-slaves-to-write (2,3): master should not be writable} {
+ $master config set min-slaves-max-lag 3
+ $master config set min-slaves-to-write 2
+ assert_error "*NOREPLICAS*" {$master set foo bar}
+ assert_error "*NOREPLICAS*" {$master eval "redis.call('set','foo','bar')" 0}
+ }
+
+ test {With min-slaves-to-write function without no-write flag} {
+ assert_error "*NOREPLICAS*" {$master fcall f_default_flags 1 foo}
+ assert_equal "12345" [$master fcall f_no_writes 1 foo]
+ }
+
+ test {With not enough good slaves, read in Lua script is still accepted} {
+ $master config set min-slaves-max-lag 3
+ $master config set min-slaves-to-write 1
+ $master eval "redis.call('set','foo','bar')" 0
+
+ $master config set min-slaves-to-write 2
+ $master eval "return redis.call('get','foo')" 0
+ } {bar}
+
+ test {With min-slaves-to-write: master not writable with lagged slave} {
+ $master config set min-slaves-max-lag 2
+ $master config set min-slaves-to-write 1
+ assert_equal OK [$master set foo 123]
+ assert_equal OK [$master eval "return redis.call('set','foo',12345)" 0]
+ # Killing a slave to make it become a lagged slave.
+ pause_process [srv 0 pid]
+ # Waiting for slave kill.
+ wait_for_condition 100 100 {
+ [catch {$master set foo 123}] != 0
+ } else {
+ fail "Master didn't become readonly"
+ }
+ assert_error "*NOREPLICAS*" {$master set foo 123}
+ assert_error "*NOREPLICAS*" {$master eval "return redis.call('set','foo',12345)" 0}
+ resume_process [srv 0 pid]
+ }
+ }
+}
+
+start_server {tags {"repl external:skip"}} {
+ start_server {} {
+ set master [srv -1 client]
+ set master_host [srv -1 host]
+ set master_port [srv -1 port]
+ set slave [srv 0 client]
+
+ test {First server should have role slave after SLAVEOF} {
+ $slave slaveof $master_host $master_port
+ wait_for_condition 50 100 {
+ [s 0 master_link_status] eq {up}
+ } else {
+ fail "Replication not started."
+ }
+ }
+
+ test {Replication of an expired key does not delete the expired key} {
+ # This test is very likely to do a false positive if the wait_for_ofs_sync
+ # takes longer than the expiration time, so give it a few more chances.
+ # Go with 5 retries of increasing timeout, i.e. start with 500ms, then go
+ # to 1000ms, 2000ms, 4000ms, 8000ms.
+ set px_ms 500
+ for {set i 0} {$i < 5} {incr i} {
+
+ wait_for_ofs_sync $master $slave
+ $master debug set-active-expire 0
+ $master set k 1 px $px_ms
+ wait_for_ofs_sync $master $slave
+ pause_process [srv 0 pid]
+ $master incr k
+ after [expr $px_ms + 1]
+ # Stopping the replica for one second to makes sure the INCR arrives
+ # to the replica after the key is logically expired.
+ resume_process [srv 0 pid]
+ wait_for_ofs_sync $master $slave
+ # Check that k is logically expired but is present in the replica.
+ set res [$slave exists k]
+ set errcode [catch {$slave debug object k} err] ; # Raises exception if k is gone.
+ if {$res == 0 && $errcode == 0} { break }
+ set px_ms [expr $px_ms * 2]
+
+ } ;# for
+
+ if {$::verbose} { puts "Replication of an expired key does not delete the expired key test attempts: $i" }
+ assert_equal $res 0
+ assert_equal $errcode 0
+ }
+ }
+}
+
+start_server {tags {"repl external:skip"}} {
+ start_server {} {
+ set master [srv -1 client]
+ set master_host [srv -1 host]
+ set master_port [srv -1 port]
+ set slave [srv 0 client]
+
+ test {First server should have role slave after SLAVEOF} {
+ $slave slaveof $master_host $master_port
+ wait_for_condition 50 100 {
+ [s 0 role] eq {slave}
+ } else {
+ fail "Replication not started."
+ }
+ }
+
+ test {Replication: commands with many arguments (issue #1221)} {
+ # We now issue large MSET commands, that may trigger a specific
+ # class of bugs, see issue #1221.
+ for {set j 0} {$j < 100} {incr j} {
+ set cmd [list mset]
+ for {set x 0} {$x < 1000} {incr x} {
+ lappend cmd [randomKey] [randomValue]
+ }
+ $master {*}$cmd
+ }
+
+ set retry 10
+ while {$retry && ([$master debug digest] ne [$slave debug digest])}\
+ {
+ after 1000
+ incr retry -1
+ }
+ assert {[$master dbsize] > 0}
+ }
+
+ test {spopwithcount rewrite srem command} {
+ $master del myset
+
+ set content {}
+ for {set j 0} {$j < 4000} {} {
+ lappend content [incr j]
+ }
+ $master sadd myset {*}$content
+ $master spop myset 1023
+ $master spop myset 1024
+ $master spop myset 1025
+
+ assert_match 928 [$master scard myset]
+ assert_match {*calls=3,*} [cmdrstat spop $master]
+
+ wait_for_condition 50 100 {
+ [status $slave master_repl_offset] == [status $master master_repl_offset]
+ } else {
+ fail "SREM replication inconsistency."
+ }
+ assert_match {*calls=4,*} [cmdrstat srem $slave]
+ assert_match 928 [$slave scard myset]
+ }
+
+ test {Replication of SPOP command -- alsoPropagate() API} {
+ $master del myset
+ set size [expr 1+[randomInt 100]]
+ set content {}
+ for {set j 0} {$j < $size} {incr j} {
+ lappend content [randomValue]
+ }
+ $master sadd myset {*}$content
+
+ set count [randomInt 100]
+ set result [$master spop myset $count]
+
+ wait_for_condition 50 100 {
+ [$master debug digest] eq [$slave debug digest]
+ } else {
+ fail "SPOP replication inconsistency"
+ }
+ }
+ }
+}
+
+start_server {tags {"repl external:skip"}} {
+ start_server {} {
+ set master [srv -1 client]
+ set master_host [srv -1 host]
+ set master_port [srv -1 port]
+ set replica [srv 0 client]
+
+ test {First server should have role slave after SLAVEOF} {
+ $replica slaveof $master_host $master_port
+ wait_for_condition 50 100 {
+ [s 0 role] eq {slave}
+ } else {
+ fail "Replication not started."
+ }
+ wait_for_sync $replica
+ }
+
+ test {Data divergence can happen under default conditions} {
+ $replica config set propagation-error-behavior ignore
+ $master debug replicate fake-command-1
+
+ # Wait for replication to normalize
+ $master set foo bar2
+ $master wait 1 2000
+
+ # Make sure we triggered the error, by finding the critical
+ # message and the fake command.
+ assert_equal [count_log_message 0 "fake-command-1"] 1
+ assert_equal [count_log_message 0 "== CRITICAL =="] 1
+ }
+
+ test {Data divergence is allowed on writable replicas} {
+ $replica config set replica-read-only no
+ $replica set number2 foo
+ $master incrby number2 1
+ $master wait 1 2000
+
+ assert_equal [$master get number2] 1
+ assert_equal [$replica get number2] foo
+
+ assert_equal [count_log_message 0 "incrby"] 1
+ }
+ }
+}
diff --git a/tests/integration/replication-buffer.tcl b/tests/integration/replication-buffer.tcl
new file mode 100644
index 0000000..64b26ca
--- /dev/null
+++ b/tests/integration/replication-buffer.tcl
@@ -0,0 +1,307 @@
+# This test group aims to test that all replicas share one global replication buffer,
+# two replicas don't make replication buffer size double, and when there is no replica,
+# replica buffer will shrink.
+start_server {tags {"repl external:skip"}} {
+start_server {} {
+start_server {} {
+start_server {} {
+ set replica1 [srv -3 client]
+ set replica2 [srv -2 client]
+ set replica3 [srv -1 client]
+
+ set master [srv 0 client]
+ set master_host [srv 0 host]
+ set master_port [srv 0 port]
+
+ $master config set save ""
+ $master config set repl-backlog-size 16384
+ $master config set repl-diskless-sync-delay 5
+ $master config set repl-diskless-sync-max-replicas 1
+ $master config set client-output-buffer-limit "replica 0 0 0"
+
+ # Make sure replica3 is synchronized with master
+ $replica3 replicaof $master_host $master_port
+ wait_for_sync $replica3
+
+ # Generating RDB will take some 100 seconds
+ $master config set rdb-key-save-delay 1000000
+ populate 100 "" 16
+
+ # Make sure replica1 and replica2 are waiting bgsave
+ $master config set repl-diskless-sync-max-replicas 2
+ $replica1 replicaof $master_host $master_port
+ $replica2 replicaof $master_host $master_port
+ wait_for_condition 50 100 {
+ ([s rdb_bgsave_in_progress] == 1) &&
+ [lindex [$replica1 role] 3] eq {sync} &&
+ [lindex [$replica2 role] 3] eq {sync}
+ } else {
+ fail "fail to sync with replicas"
+ }
+
+ test {All replicas share one global replication buffer} {
+ set before_used [s used_memory]
+ populate 1024 "" 1024 ; # Write extra 1M data
+ # New data uses 1M memory, but all replicas use only one
+ # replication buffer, so all replicas output memory is not
+ # more than double of replication buffer.
+ set repl_buf_mem [s mem_total_replication_buffers]
+ set extra_mem [expr {[s used_memory]-$before_used-1024*1024}]
+ assert {$extra_mem < 2*$repl_buf_mem}
+
+ # Kill replica1, replication_buffer will not become smaller
+ catch {$replica1 shutdown nosave}
+ wait_for_condition 50 100 {
+ [s connected_slaves] eq {2}
+ } else {
+ fail "replica doesn't disconnect with master"
+ }
+ assert_equal $repl_buf_mem [s mem_total_replication_buffers]
+ }
+
+ test {Replication buffer will become smaller when no replica uses} {
+ # Make sure replica3 catch up with the master
+ wait_for_ofs_sync $master $replica3
+
+ set repl_buf_mem [s mem_total_replication_buffers]
+ # Kill replica2, replication_buffer will become smaller
+ catch {$replica2 shutdown nosave}
+ wait_for_condition 50 100 {
+ [s connected_slaves] eq {1}
+ } else {
+ fail "replica2 doesn't disconnect with master"
+ }
+ assert {[expr $repl_buf_mem - 1024*1024] > [s mem_total_replication_buffers]}
+ }
+}
+}
+}
+}
+
+# This test group aims to test replication backlog size can outgrow the backlog
+# limit config if there is a slow replica which keep massive replication buffers,
+# and replicas could use this replication buffer (beyond backlog config) for
+# partial re-synchronization. Of course, replication backlog memory also can
+# become smaller when master disconnects with slow replicas since output buffer
+# limit is reached.
+start_server {tags {"repl external:skip"}} {
+start_server {} {
+start_server {} {
+ set replica1 [srv -2 client]
+ set replica1_pid [s -2 process_id]
+ set replica2 [srv -1 client]
+ set replica2_pid [s -1 process_id]
+
+ set master [srv 0 client]
+ set master_host [srv 0 host]
+ set master_port [srv 0 port]
+
+ $master config set save ""
+ $master config set repl-backlog-size 16384
+ $master config set client-output-buffer-limit "replica 0 0 0"
+
+ # Executing 'debug digest' on master which has many keys costs much time
+ # (especially in valgrind), this causes that replica1 and replica2 disconnect
+ # with master.
+ $master config set repl-timeout 1000
+ $replica1 config set repl-timeout 1000
+ $replica2 config set repl-timeout 1000
+
+ $replica1 replicaof $master_host $master_port
+ wait_for_sync $replica1
+
+ test {Replication backlog size can outgrow the backlog limit config} {
+ # Generating RDB will take 1000 seconds
+ $master config set rdb-key-save-delay 1000000
+ populate 1000 master 10000
+ $replica2 replicaof $master_host $master_port
+ # Make sure replica2 is waiting bgsave
+ wait_for_condition 5000 100 {
+ ([s rdb_bgsave_in_progress] == 1) &&
+ [lindex [$replica2 role] 3] eq {sync}
+ } else {
+ fail "fail to sync with replicas"
+ }
+ # Replication actual backlog grow more than backlog setting since
+ # the slow replica2 kept replication buffer.
+ populate 10000 master 10000
+ assert {[s repl_backlog_histlen] > [expr 10000*10000]}
+ }
+
+ # Wait replica1 catch up with the master
+ wait_for_condition 1000 100 {
+ [s -2 master_repl_offset] eq [s master_repl_offset]
+ } else {
+ fail "Replica offset didn't catch up with the master after too long time"
+ }
+
+ test {Replica could use replication buffer (beyond backlog config) for partial resynchronization} {
+ # replica1 disconnects with master
+ $replica1 replicaof [srv -1 host] [srv -1 port]
+ # Write a mass of data that exceeds repl-backlog-size
+ populate 10000 master 10000
+ # replica1 reconnects with master
+ $replica1 replicaof $master_host $master_port
+ wait_for_condition 1000 100 {
+ [s -2 master_repl_offset] eq [s master_repl_offset]
+ } else {
+ fail "Replica offset didn't catch up with the master after too long time"
+ }
+
+ # replica2 still waits for bgsave ending
+ assert {[s rdb_bgsave_in_progress] eq {1} && [lindex [$replica2 role] 3] eq {sync}}
+ # master accepted replica1 partial resync
+ assert_equal [s sync_partial_ok] {1}
+ assert_equal [$master debug digest] [$replica1 debug digest]
+ }
+
+ test {Replication backlog memory will become smaller if disconnecting with replica} {
+ assert {[s repl_backlog_histlen] > [expr 2*10000*10000]}
+ assert_equal [s connected_slaves] {2}
+
+ pause_process $replica2_pid
+ r config set client-output-buffer-limit "replica 128k 0 0"
+ # trigger output buffer limit check
+ r set key [string repeat A [expr 64*1024]]
+ # master will close replica2's connection since replica2's output
+ # buffer limit is reached, so there only is replica1.
+ wait_for_condition 100 100 {
+ [s connected_slaves] eq {1}
+ } else {
+ fail "master didn't disconnect with replica2"
+ }
+
+ # Since we trim replication backlog inrementally, replication backlog
+ # memory may take time to be reclaimed.
+ wait_for_condition 1000 100 {
+ [s repl_backlog_histlen] < [expr 10000*10000]
+ } else {
+ fail "Replication backlog memory is not smaller"
+ }
+ resume_process $replica2_pid
+ }
+ # speed up termination
+ $master config set shutdown-timeout 0
+}
+}
+}
+
+test {Partial resynchronization is successful even client-output-buffer-limit is less than repl-backlog-size} {
+ start_server {tags {"repl external:skip"}} {
+ start_server {} {
+ r config set save ""
+ r config set repl-backlog-size 100mb
+ r config set client-output-buffer-limit "replica 512k 0 0"
+
+ set replica [srv -1 client]
+ $replica replicaof [srv 0 host] [srv 0 port]
+ wait_for_sync $replica
+
+ set big_str [string repeat A [expr 10*1024*1024]] ;# 10mb big string
+ r multi
+ r client kill type replica
+ r set key $big_str
+ r set key $big_str
+ r debug sleep 2 ;# wait for replica reconnecting
+ r exec
+ # When replica reconnects with master, master accepts partial resync,
+ # and don't close replica client even client output buffer limit is
+ # reached.
+ r set key $big_str ;# trigger output buffer limit check
+ wait_for_ofs_sync r $replica
+ # master accepted replica partial resync
+ assert_equal [s sync_full] {1}
+ assert_equal [s sync_partial_ok] {1}
+
+ r multi
+ r set key $big_str
+ r set key $big_str
+ r exec
+ # replica's reply buffer size is more than client-output-buffer-limit but
+ # doesn't exceed repl-backlog-size, we don't close replica client.
+ wait_for_condition 1000 100 {
+ [s -1 master_repl_offset] eq [s master_repl_offset]
+ } else {
+ fail "Replica offset didn't catch up with the master after too long time"
+ }
+ assert_equal [s sync_full] {1}
+ assert_equal [s sync_partial_ok] {1}
+ }
+ }
+}
+
+# This test was added to make sure big keys added to the backlog do not trigger psync loop.
+test {Replica client-output-buffer size is limited to backlog_limit/16 when no replication data is pending} {
+ proc client_field {r type f} {
+ set client [$r client list type $type]
+ if {![regexp $f=(\[a-zA-Z0-9-\]+) $client - res]} {
+ error "field $f not found for in $client"
+ }
+ return $res
+ }
+
+ start_server {tags {"repl external:skip"}} {
+ start_server {} {
+ set replica [srv -1 client]
+ set replica_host [srv -1 host]
+ set replica_port [srv -1 port]
+ set master [srv 0 client]
+ set master_host [srv 0 host]
+ set master_port [srv 0 port]
+ $master config set maxmemory-policy allkeys-lru
+
+ $master config set repl-backlog-size 16384
+ $master config set client-output-buffer-limit "replica 32768 32768 60"
+ # Key has has to be larger than replica client-output-buffer limit.
+ set keysize [expr 256*1024]
+
+ $replica replicaof $master_host $master_port
+ wait_for_condition 50 100 {
+ [lindex [$replica role] 0] eq {slave} &&
+ [string match {*master_link_status:up*} [$replica info replication]]
+ } else {
+ fail "Can't turn the instance into a replica"
+ }
+
+ # Write a big key that is gonna breach the obuf limit and cause the replica to disconnect,
+ # then in the same event loop, add at least 16 more keys, and enable eviction, so that the
+ # eviction code has a chance to call flushSlavesOutputBuffers, and then run PING to trigger the eviction code
+ set _v [prepare_value $keysize]
+ $master write "[format_command mset key $_v k1 1 k2 2 k3 3 k4 4 k5 5 k6 6 k7 7 k8 8 k9 9 ka a kb b kc c kd d ke e kf f kg g kh h]config set maxmemory 1\r\nping\r\n"
+ $master flush
+ $master read
+ $master read
+ $master read
+ wait_for_ofs_sync $master $replica
+
+ # Write another key to force the test to wait for another event loop iteration so that we
+ # give the serverCron a chance to disconnect replicas with COB size exceeding the limits
+ $master config set maxmemory 0
+ $master set key1 1
+ wait_for_ofs_sync $master $replica
+
+ assert {[status $master connected_slaves] == 1}
+
+ wait_for_condition 50 100 {
+ [client_field $master replica tot-mem] < $keysize
+ } else {
+ fail "replica client-output-buffer usage is higher than expected."
+ }
+
+ # now we expect the replica to re-connect but fail partial sync (it doesn't have large
+ # enough COB limit and must result in a full-sync)
+ assert {[status $master sync_partial_ok] == 0}
+
+ # Before this fix (#11905), the test would trigger an assertion in 'o->used >= c->ref_block_pos'
+ test {The update of replBufBlock's repl_offset is ok - Regression test for #11666} {
+ set rd [redis_deferring_client]
+ set replid [status $master master_replid]
+ set offset [status $master repl_backlog_first_byte_offset]
+ $rd psync $replid $offset
+ assert_equal {PONG} [$master ping] ;# Make sure the master doesn't crash.
+ $rd close
+ }
+ }
+ }
+}
+
diff --git a/tests/integration/replication-psync.tcl b/tests/integration/replication-psync.tcl
new file mode 100644
index 0000000..dc1df0f
--- /dev/null
+++ b/tests/integration/replication-psync.tcl
@@ -0,0 +1,143 @@
+# Creates a master-slave pair and breaks the link continuously to force
+# partial resyncs attempts, all this while flooding the master with
+# write queries.
+#
+# You can specify backlog size, ttl, delay before reconnection, test duration
+# in seconds, and an additional condition to verify at the end.
+#
+# If reconnect is > 0, the test actually try to break the connection and
+# reconnect with the master, otherwise just the initial synchronization is
+# checked for consistency.
+proc test_psync {descr duration backlog_size backlog_ttl delay cond mdl sdl reconnect} {
+ start_server {tags {"repl"} overrides {save {}}} {
+ start_server {overrides {save {}}} {
+
+ set master [srv -1 client]
+ set master_host [srv -1 host]
+ set master_port [srv -1 port]
+ set slave [srv 0 client]
+
+ $master config set repl-backlog-size $backlog_size
+ $master config set repl-backlog-ttl $backlog_ttl
+ $master config set repl-diskless-sync $mdl
+ $master config set repl-diskless-sync-delay 1
+ $slave config set repl-diskless-load $sdl
+
+ set load_handle0 [start_bg_complex_data $master_host $master_port 9 100000]
+ set load_handle1 [start_bg_complex_data $master_host $master_port 11 100000]
+ set load_handle2 [start_bg_complex_data $master_host $master_port 12 100000]
+
+ test {Slave should be able to synchronize with the master} {
+ $slave slaveof $master_host $master_port
+ wait_for_condition 50 100 {
+ [lindex [r role] 0] eq {slave} &&
+ [lindex [r role] 3] eq {connected}
+ } else {
+ fail "Replication not started."
+ }
+ }
+
+ # Check that the background clients are actually writing.
+ test {Detect write load to master} {
+ wait_for_condition 50 1000 {
+ [$master dbsize] > 100
+ } else {
+ fail "Can't detect write load from background clients."
+ }
+ }
+
+ test "Test replication partial resync: $descr (diskless: $mdl, $sdl, reconnect: $reconnect)" {
+ # Now while the clients are writing data, break the maste-slave
+ # link multiple times.
+ if ($reconnect) {
+ for {set j 0} {$j < $duration*10} {incr j} {
+ after 100
+ # catch {puts "MASTER [$master dbsize] keys, REPLICA [$slave dbsize] keys"}
+
+ if {($j % 20) == 0} {
+ catch {
+ if {$delay} {
+ $slave multi
+ $slave client kill $master_host:$master_port
+ $slave debug sleep $delay
+ $slave exec
+ } else {
+ $slave client kill $master_host:$master_port
+ }
+ }
+ }
+ }
+ }
+ stop_bg_complex_data $load_handle0
+ stop_bg_complex_data $load_handle1
+ stop_bg_complex_data $load_handle2
+
+ # Wait for the slave to reach the "online"
+ # state from the POV of the master.
+ set retry 5000
+ while {$retry} {
+ set info [$master info]
+ if {[string match {*slave0:*state=online*} $info]} {
+ break
+ } else {
+ incr retry -1
+ after 100
+ }
+ }
+ if {$retry == 0} {
+ error "assertion:Slave not correctly synchronized"
+ }
+
+ # Wait that slave acknowledge it is online so
+ # we are sure that DBSIZE and DEBUG DIGEST will not
+ # fail because of timing issues. (-LOADING error)
+ wait_for_condition 5000 100 {
+ [lindex [$slave role] 3] eq {connected}
+ } else {
+ fail "Slave still not connected after some time"
+ }
+
+ wait_for_condition 100 100 {
+ [$master debug digest] == [$slave debug digest]
+ } else {
+ set csv1 [csvdump r]
+ set csv2 [csvdump {r -1}]
+ set fd [open /tmp/repldump1.txt w]
+ puts -nonewline $fd $csv1
+ close $fd
+ set fd [open /tmp/repldump2.txt w]
+ puts -nonewline $fd $csv2
+ close $fd
+ fail "Master - Replica inconsistency, Run diff -u against /tmp/repldump*.txt for more info"
+ }
+ assert {[$master dbsize] > 0}
+ eval $cond
+ }
+ }
+ }
+}
+
+tags {"external:skip"} {
+foreach mdl {no yes} {
+ foreach sdl {disabled swapdb} {
+ test_psync {no reconnection, just sync} 6 1000000 3600 0 {
+ } $mdl $sdl 0
+
+ test_psync {ok psync} 6 100000000 3600 0 {
+ assert {[s -1 sync_partial_ok] > 0}
+ } $mdl $sdl 1
+
+ test_psync {no backlog} 6 100 3600 0.5 {
+ assert {[s -1 sync_partial_err] > 0}
+ } $mdl $sdl 1
+
+ test_psync {ok after delay} 3 100000000 3600 3 {
+ assert {[s -1 sync_partial_ok] > 0}
+ } $mdl $sdl 1
+
+ test_psync {backlog expired} 3 100000000 1 3 {
+ assert {[s -1 sync_partial_err] > 0}
+ } $mdl $sdl 1
+ }
+}
+}
diff --git a/tests/integration/replication.tcl b/tests/integration/replication.tcl
new file mode 100644
index 0000000..de4d527
--- /dev/null
+++ b/tests/integration/replication.tcl
@@ -0,0 +1,1456 @@
+proc log_file_matches {log pattern} {
+ set fp [open $log r]
+ set content [read $fp]
+ close $fp
+ string match $pattern $content
+}
+
+start_server {tags {"repl network external:skip"}} {
+ set slave [srv 0 client]
+ set slave_host [srv 0 host]
+ set slave_port [srv 0 port]
+ set slave_log [srv 0 stdout]
+ start_server {} {
+ set master [srv 0 client]
+ set master_host [srv 0 host]
+ set master_port [srv 0 port]
+
+ # Configure the master in order to hang waiting for the BGSAVE
+ # operation, so that the slave remains in the handshake state.
+ $master config set repl-diskless-sync yes
+ $master config set repl-diskless-sync-delay 1000
+
+ # Start the replication process...
+ $slave slaveof $master_host $master_port
+
+ test {Slave enters handshake} {
+ wait_for_condition 50 1000 {
+ [string match *handshake* [$slave role]]
+ } else {
+ fail "Replica does not enter handshake state"
+ }
+ }
+
+ test {Slave enters wait_bgsave} {
+ wait_for_condition 50 1000 {
+ [string match *state=wait_bgsave* [$master info replication]]
+ } else {
+ fail "Replica does not enter wait_bgsave state"
+ }
+ }
+
+ # Use a short replication timeout on the slave, so that if there
+ # are no bugs the timeout is triggered in a reasonable amount
+ # of time.
+ $slave config set repl-timeout 5
+
+ # But make the master unable to send
+ # the periodic newlines to refresh the connection. The slave
+ # should detect the timeout.
+ $master debug sleep 10
+
+ test {Slave is able to detect timeout during handshake} {
+ wait_for_condition 50 1000 {
+ [log_file_matches $slave_log "*Timeout connecting to the MASTER*"]
+ } else {
+ fail "Replica is not able to detect timeout"
+ }
+ }
+ }
+}
+
+start_server {tags {"repl external:skip"}} {
+ set A [srv 0 client]
+ set A_host [srv 0 host]
+ set A_port [srv 0 port]
+ start_server {} {
+ set B [srv 0 client]
+ set B_host [srv 0 host]
+ set B_port [srv 0 port]
+
+ test {Set instance A as slave of B} {
+ $A slaveof $B_host $B_port
+ wait_for_condition 50 100 {
+ [lindex [$A role] 0] eq {slave} &&
+ [string match {*master_link_status:up*} [$A info replication]]
+ } else {
+ fail "Can't turn the instance into a replica"
+ }
+ }
+
+ test {INCRBYFLOAT replication, should not remove expire} {
+ r set test 1 EX 100
+ r incrbyfloat test 0.1
+ wait_for_ofs_sync $A $B
+ assert_equal [$A debug digest] [$B debug digest]
+ }
+
+ test {GETSET replication} {
+ $A config resetstat
+ $A config set loglevel debug
+ $B config set loglevel debug
+ r set test foo
+ assert_equal [r getset test bar] foo
+ wait_for_condition 500 10 {
+ [$A get test] eq "bar"
+ } else {
+ fail "getset wasn't propagated"
+ }
+ assert_equal [r set test vaz get] bar
+ wait_for_condition 500 10 {
+ [$A get test] eq "vaz"
+ } else {
+ fail "set get wasn't propagated"
+ }
+ assert_match {*calls=3,*} [cmdrstat set $A]
+ assert_match {} [cmdrstat getset $A]
+ }
+
+ test {BRPOPLPUSH replication, when blocking against empty list} {
+ $A config resetstat
+ set rd [redis_deferring_client]
+ $rd brpoplpush a b 5
+ r lpush a foo
+ wait_for_condition 50 100 {
+ [$A debug digest] eq [$B debug digest]
+ } else {
+ fail "Master and replica have different digest: [$A debug digest] VS [$B debug digest]"
+ }
+ assert_match {*calls=1,*} [cmdrstat rpoplpush $A]
+ assert_match {} [cmdrstat lmove $A]
+ }
+
+ test {BRPOPLPUSH replication, list exists} {
+ $A config resetstat
+ set rd [redis_deferring_client]
+ r lpush c 1
+ r lpush c 2
+ r lpush c 3
+ $rd brpoplpush c d 5
+ after 1000
+ assert_equal [$A debug digest] [$B debug digest]
+ assert_match {*calls=1,*} [cmdrstat rpoplpush $A]
+ assert_match {} [cmdrstat lmove $A]
+ }
+
+ foreach wherefrom {left right} {
+ foreach whereto {left right} {
+ test "BLMOVE ($wherefrom, $whereto) replication, when blocking against empty list" {
+ $A config resetstat
+ set rd [redis_deferring_client]
+ $rd blmove a b $wherefrom $whereto 5
+ r lpush a foo
+ wait_for_condition 50 100 {
+ [$A debug digest] eq [$B debug digest]
+ } else {
+ fail "Master and replica have different digest: [$A debug digest] VS [$B debug digest]"
+ }
+ assert_match {*calls=1,*} [cmdrstat lmove $A]
+ assert_match {} [cmdrstat rpoplpush $A]
+ }
+
+ test "BLMOVE ($wherefrom, $whereto) replication, list exists" {
+ $A config resetstat
+ set rd [redis_deferring_client]
+ r lpush c 1
+ r lpush c 2
+ r lpush c 3
+ $rd blmove c d $wherefrom $whereto 5
+ after 1000
+ assert_equal [$A debug digest] [$B debug digest]
+ assert_match {*calls=1,*} [cmdrstat lmove $A]
+ assert_match {} [cmdrstat rpoplpush $A]
+ }
+ }
+ }
+
+ test {BLPOP followed by role change, issue #2473} {
+ set rd [redis_deferring_client]
+ $rd blpop foo 0 ; # Block while B is a master
+
+ # Turn B into master of A
+ $A slaveof no one
+ $B slaveof $A_host $A_port
+ wait_for_condition 50 100 {
+ [lindex [$B role] 0] eq {slave} &&
+ [string match {*master_link_status:up*} [$B info replication]]
+ } else {
+ fail "Can't turn the instance into a replica"
+ }
+
+ # Push elements into the "foo" list of the new replica.
+ # If the client is still attached to the instance, we'll get
+ # a desync between the two instances.
+ $A rpush foo a b c
+ after 100
+
+ wait_for_condition 50 100 {
+ [$A debug digest] eq [$B debug digest] &&
+ [$A lrange foo 0 -1] eq {a b c} &&
+ [$B lrange foo 0 -1] eq {a b c}
+ } else {
+ fail "Master and replica have different digest: [$A debug digest] VS [$B debug digest]"
+ }
+ assert_match {*calls=1,*,rejected_calls=0,failed_calls=1*} [cmdrstat blpop $B]
+ }
+ }
+}
+
+start_server {tags {"repl external:skip"}} {
+ r set mykey foo
+
+ start_server {} {
+ test {Second server should have role master at first} {
+ s role
+ } {master}
+
+ test {SLAVEOF should start with link status "down"} {
+ r multi
+ r slaveof [srv -1 host] [srv -1 port]
+ r info replication
+ r exec
+ } {*master_link_status:down*}
+
+ test {The role should immediately be changed to "replica"} {
+ s role
+ } {slave}
+
+ wait_for_sync r
+ test {Sync should have transferred keys from master} {
+ r get mykey
+ } {foo}
+
+ test {The link status should be up} {
+ s master_link_status
+ } {up}
+
+ test {SET on the master should immediately propagate} {
+ r -1 set mykey bar
+
+ wait_for_condition 500 100 {
+ [r 0 get mykey] eq {bar}
+ } else {
+ fail "SET on master did not propagated on replica"
+ }
+ }
+
+ test {FLUSHDB / FLUSHALL should replicate} {
+ # we're attaching to a sub-replica, so we need to stop pings on the real master
+ r -1 config set repl-ping-replica-period 3600
+
+ set repl [attach_to_replication_stream]
+
+ r -1 set key value
+ r -1 flushdb
+
+ r -1 set key value2
+ r -1 flushall
+
+ wait_for_ofs_sync [srv 0 client] [srv -1 client]
+ assert_equal [r -1 dbsize] 0
+ assert_equal [r 0 dbsize] 0
+
+ # DB is empty.
+ r -1 flushdb
+ r -1 flushdb
+ r -1 eval {redis.call("flushdb")} 0
+
+ # DBs are empty.
+ r -1 flushall
+ r -1 flushall
+ r -1 eval {redis.call("flushall")} 0
+
+ # add another command to check nothing else was propagated after the above
+ r -1 incr x
+
+ # Assert that each FLUSHDB command is replicated even the DB is empty.
+ # Assert that each FLUSHALL command is replicated even the DBs are empty.
+ assert_replication_stream $repl {
+ {set key value}
+ {flushdb}
+ {set key value2}
+ {flushall}
+ {flushdb}
+ {flushdb}
+ {flushdb}
+ {flushall}
+ {flushall}
+ {flushall}
+ {incr x}
+ }
+ close_replication_stream $repl
+ }
+
+ test {ROLE in master reports master with a slave} {
+ set res [r -1 role]
+ lassign $res role offset slaves
+ assert {$role eq {master}}
+ assert {$offset > 0}
+ assert {[llength $slaves] == 1}
+ lassign [lindex $slaves 0] master_host master_port slave_offset
+ assert {$slave_offset <= $offset}
+ }
+
+ test {ROLE in slave reports slave in connected state} {
+ set res [r role]
+ lassign $res role master_host master_port slave_state slave_offset
+ assert {$role eq {slave}}
+ assert {$slave_state eq {connected}}
+ }
+ }
+}
+
+foreach mdl {no yes} {
+ foreach sdl {disabled swapdb} {
+ start_server {tags {"repl external:skip"} overrides {save {}}} {
+ set master [srv 0 client]
+ $master config set repl-diskless-sync $mdl
+ $master config set repl-diskless-sync-delay 5
+ $master config set repl-diskless-sync-max-replicas 3
+ set master_host [srv 0 host]
+ set master_port [srv 0 port]
+ set slaves {}
+ start_server {overrides {save {}}} {
+ lappend slaves [srv 0 client]
+ start_server {overrides {save {}}} {
+ lappend slaves [srv 0 client]
+ start_server {overrides {save {}}} {
+ lappend slaves [srv 0 client]
+ test "Connect multiple replicas at the same time (issue #141), master diskless=$mdl, replica diskless=$sdl" {
+ # start load handles only inside the test, so that the test can be skipped
+ set load_handle0 [start_bg_complex_data $master_host $master_port 9 100000000]
+ set load_handle1 [start_bg_complex_data $master_host $master_port 11 100000000]
+ set load_handle2 [start_bg_complex_data $master_host $master_port 12 100000000]
+ set load_handle3 [start_write_load $master_host $master_port 8]
+ set load_handle4 [start_write_load $master_host $master_port 4]
+ after 5000 ;# wait for some data to accumulate so that we have RDB part for the fork
+
+ # Send SLAVEOF commands to slaves
+ [lindex $slaves 0] config set repl-diskless-load $sdl
+ [lindex $slaves 1] config set repl-diskless-load $sdl
+ [lindex $slaves 2] config set repl-diskless-load $sdl
+ [lindex $slaves 0] slaveof $master_host $master_port
+ [lindex $slaves 1] slaveof $master_host $master_port
+ [lindex $slaves 2] slaveof $master_host $master_port
+
+ # Wait for all the three slaves to reach the "online"
+ # state from the POV of the master.
+ set retry 500
+ while {$retry} {
+ set info [r -3 info]
+ if {[string match {*slave0:*state=online*slave1:*state=online*slave2:*state=online*} $info]} {
+ break
+ } else {
+ incr retry -1
+ after 100
+ }
+ }
+ if {$retry == 0} {
+ error "assertion:Slaves not correctly synchronized"
+ }
+
+ # Wait that slaves acknowledge they are online so
+ # we are sure that DBSIZE and DEBUG DIGEST will not
+ # fail because of timing issues.
+ wait_for_condition 500 100 {
+ [lindex [[lindex $slaves 0] role] 3] eq {connected} &&
+ [lindex [[lindex $slaves 1] role] 3] eq {connected} &&
+ [lindex [[lindex $slaves 2] role] 3] eq {connected}
+ } else {
+ fail "Slaves still not connected after some time"
+ }
+
+ # Stop the write load
+ stop_bg_complex_data $load_handle0
+ stop_bg_complex_data $load_handle1
+ stop_bg_complex_data $load_handle2
+ stop_write_load $load_handle3
+ stop_write_load $load_handle4
+
+ # Make sure no more commands processed
+ wait_load_handlers_disconnected -3
+
+ wait_for_ofs_sync $master [lindex $slaves 0]
+ wait_for_ofs_sync $master [lindex $slaves 1]
+ wait_for_ofs_sync $master [lindex $slaves 2]
+
+ # Check digests
+ set digest [$master debug digest]
+ set digest0 [[lindex $slaves 0] debug digest]
+ set digest1 [[lindex $slaves 1] debug digest]
+ set digest2 [[lindex $slaves 2] debug digest]
+ assert {$digest ne 0000000000000000000000000000000000000000}
+ assert {$digest eq $digest0}
+ assert {$digest eq $digest1}
+ assert {$digest eq $digest2}
+ }
+ }
+ }
+ }
+ }
+ }
+}
+
+start_server {tags {"repl external:skip"} overrides {save {}}} {
+ set master [srv 0 client]
+ set master_host [srv 0 host]
+ set master_port [srv 0 port]
+ start_server {overrides {save {}}} {
+ test "Master stream is correctly processed while the replica has a script in -BUSY state" {
+ set load_handle0 [start_write_load $master_host $master_port 3]
+ set slave [srv 0 client]
+ $slave config set lua-time-limit 500
+ $slave slaveof $master_host $master_port
+
+ # Wait for the slave to be online
+ wait_for_condition 500 100 {
+ [lindex [$slave role] 3] eq {connected}
+ } else {
+ fail "Replica still not connected after some time"
+ }
+
+ # Wait some time to make sure the master is sending data
+ # to the slave.
+ after 5000
+
+ # Stop the ability of the slave to process data by sendig
+ # a script that will put it in BUSY state.
+ $slave eval {for i=1,3000000000 do end} 0
+
+ # Wait some time again so that more master stream will
+ # be processed.
+ after 2000
+
+ # Stop the write load
+ stop_write_load $load_handle0
+
+ # number of keys
+ wait_for_condition 500 100 {
+ [$master debug digest] eq [$slave debug digest]
+ } else {
+ fail "Different datasets between replica and master"
+ }
+ }
+ }
+}
+
+# Diskless load swapdb when NOT async_loading (different master replid)
+foreach testType {Successful Aborted} {
+ start_server {tags {"repl external:skip"}} {
+ set replica [srv 0 client]
+ set replica_host [srv 0 host]
+ set replica_port [srv 0 port]
+ set replica_log [srv 0 stdout]
+ start_server {} {
+ set master [srv 0 client]
+ set master_host [srv 0 host]
+ set master_port [srv 0 port]
+
+ # Set master and replica to use diskless replication on swapdb mode
+ $master config set repl-diskless-sync yes
+ $master config set repl-diskless-sync-delay 0
+ $master config set save ""
+ $replica config set repl-diskless-load swapdb
+ $replica config set save ""
+
+ # Put different data sets on the master and replica
+ # We need to put large keys on the master since the replica replies to info only once in 2mb
+ $replica debug populate 200 slave 10
+ $master debug populate 1000 master 100000
+ $master config set rdbcompression no
+
+ # Set a key value on replica to check status on failure and after swapping db
+ $replica set mykey myvalue
+
+ switch $testType {
+ "Aborted" {
+ # Set master with a slow rdb generation, so that we can easily intercept loading
+ # 10ms per key, with 1000 keys is 10 seconds
+ $master config set rdb-key-save-delay 10000
+
+ # Start the replication process
+ $replica replicaof $master_host $master_port
+
+ test {Diskless load swapdb (different replid): replica enter loading} {
+ # Wait for the replica to start reading the rdb
+ wait_for_condition 100 100 {
+ [s -1 loading] eq 1
+ } else {
+ fail "Replica didn't get into loading mode"
+ }
+
+ assert_equal [s -1 async_loading] 0
+ }
+
+ # Make sure that next sync will not start immediately so that we can catch the replica in between syncs
+ $master config set repl-diskless-sync-delay 5
+
+ # Kill the replica connection on the master
+ set killed [$master client kill type replica]
+
+ # Wait for loading to stop (fail)
+ wait_for_condition 100 100 {
+ [s -1 loading] eq 0
+ } else {
+ fail "Replica didn't disconnect"
+ }
+
+ test {Diskless load swapdb (different replid): old database is exposed after replication fails} {
+ # Ensure we see old values from replica
+ assert_equal [$replica get mykey] "myvalue"
+
+ # Make sure amount of replica keys didn't change
+ assert_equal [$replica dbsize] 201
+ }
+
+ # Speed up shutdown
+ $master config set rdb-key-save-delay 0
+ }
+ "Successful" {
+ # Start the replication process
+ $replica replicaof $master_host $master_port
+
+ # Let replica finish sync with master
+ wait_for_condition 100 100 {
+ [s -1 master_link_status] eq "up"
+ } else {
+ fail "Master <-> Replica didn't finish sync"
+ }
+
+ test {Diskless load swapdb (different replid): new database is exposed after swapping} {
+ # Ensure we don't see anymore the key that was stored only to replica and also that we don't get LOADING status
+ assert_equal [$replica GET mykey] ""
+
+ # Make sure amount of keys matches master
+ assert_equal [$replica dbsize] 1000
+ }
+ }
+ }
+ }
+ }
+}
+
+# Diskless load swapdb when async_loading (matching master replid)
+foreach testType {Successful Aborted} {
+ start_server {tags {"repl external:skip"}} {
+ set replica [srv 0 client]
+ set replica_host [srv 0 host]
+ set replica_port [srv 0 port]
+ set replica_log [srv 0 stdout]
+ start_server {} {
+ set master [srv 0 client]
+ set master_host [srv 0 host]
+ set master_port [srv 0 port]
+
+ # Set master and replica to use diskless replication on swapdb mode
+ $master config set repl-diskless-sync yes
+ $master config set repl-diskless-sync-delay 0
+ $master config set save ""
+ $replica config set repl-diskless-load swapdb
+ $replica config set save ""
+
+ # Set replica writable so we can check that a key we manually added is served
+ # during replication and after failure, but disappears on success
+ $replica config set replica-read-only no
+
+ # Initial sync to have matching replids between master and replica
+ $replica replicaof $master_host $master_port
+
+ # Let replica finish initial sync with master
+ wait_for_condition 100 100 {
+ [s -1 master_link_status] eq "up"
+ } else {
+ fail "Master <-> Replica didn't finish sync"
+ }
+
+ # Put different data sets on the master and replica
+ # We need to put large keys on the master since the replica replies to info only once in 2mb
+ $replica debug populate 2000 slave 10
+ $master debug populate 2000 master 100000
+ $master config set rdbcompression no
+
+ # Set a key value on replica to check status during loading, on failure and after swapping db
+ $replica set mykey myvalue
+
+ # Set a function value on replica to check status during loading, on failure and after swapping db
+ $replica function load {#!lua name=test
+ redis.register_function('test', function() return 'hello1' end)
+ }
+
+ # Set a function value on master to check it reaches the replica when replication ends
+ $master function load {#!lua name=test
+ redis.register_function('test', function() return 'hello2' end)
+ }
+
+ # Remember the sync_full stat before the client kill.
+ set sync_full [s 0 sync_full]
+
+ if {$testType == "Aborted"} {
+ # Set master with a slow rdb generation, so that we can easily intercept loading
+ # 10ms per key, with 2000 keys is 20 seconds
+ $master config set rdb-key-save-delay 10000
+ }
+
+ # Force the replica to try another full sync (this time it will have matching master replid)
+ $master multi
+ $master client kill type replica
+ # Fill replication backlog with new content
+ $master config set repl-backlog-size 16384
+ for {set keyid 0} {$keyid < 10} {incr keyid} {
+ $master set "$keyid string_$keyid" [string repeat A 16384]
+ }
+ $master exec
+
+ # Wait for sync_full to get incremented from the previous value.
+ # After the client kill, make sure we do a reconnect, and do a FULL SYNC.
+ wait_for_condition 100 100 {
+ [s 0 sync_full] > $sync_full
+ } else {
+ fail "Master <-> Replica didn't start the full sync"
+ }
+
+ switch $testType {
+ "Aborted" {
+ test {Diskless load swapdb (async_loading): replica enter async_loading} {
+ # Wait for the replica to start reading the rdb
+ wait_for_condition 100 100 {
+ [s -1 async_loading] eq 1
+ } else {
+ fail "Replica didn't get into async_loading mode"
+ }
+
+ assert_equal [s -1 loading] 0
+ }
+
+ test {Diskless load swapdb (async_loading): old database is exposed while async replication is in progress} {
+ # Ensure we still see old values while async_loading is in progress and also not LOADING status
+ assert_equal [$replica get mykey] "myvalue"
+
+ # Ensure we still can call old function while async_loading is in progress
+ assert_equal [$replica fcall test 0] "hello1"
+
+ # Make sure we're still async_loading to validate previous assertion
+ assert_equal [s -1 async_loading] 1
+
+ # Make sure amount of replica keys didn't change
+ assert_equal [$replica dbsize] 2001
+ }
+
+ test {Busy script during async loading} {
+ set rd_replica [redis_deferring_client -1]
+ $replica config set lua-time-limit 10
+ $rd_replica eval {while true do end} 0
+ after 200
+ assert_error {BUSY*} {$replica ping}
+ $replica script kill
+ after 200 ; # Give some time to Lua to call the hook again...
+ assert_equal [$replica ping] "PONG"
+ $rd_replica close
+ }
+
+ test {Blocked commands and configs during async-loading} {
+ assert_error {LOADING*} {$replica config set appendonly no}
+ assert_error {LOADING*} {$replica REPLICAOF no one}
+ }
+
+ # Make sure that next sync will not start immediately so that we can catch the replica in between syncs
+ $master config set repl-diskless-sync-delay 5
+
+ # Kill the replica connection on the master
+ set killed [$master client kill type replica]
+
+ # Wait for loading to stop (fail)
+ wait_for_condition 100 100 {
+ [s -1 async_loading] eq 0
+ } else {
+ fail "Replica didn't disconnect"
+ }
+
+ test {Diskless load swapdb (async_loading): old database is exposed after async replication fails} {
+ # Ensure we see old values from replica
+ assert_equal [$replica get mykey] "myvalue"
+
+ # Ensure we still can call old function
+ assert_equal [$replica fcall test 0] "hello1"
+
+ # Make sure amount of replica keys didn't change
+ assert_equal [$replica dbsize] 2001
+ }
+
+ # Speed up shutdown
+ $master config set rdb-key-save-delay 0
+ }
+ "Successful" {
+ # Let replica finish sync with master
+ wait_for_condition 100 100 {
+ [s -1 master_link_status] eq "up"
+ } else {
+ fail "Master <-> Replica didn't finish sync"
+ }
+
+ test {Diskless load swapdb (async_loading): new database is exposed after swapping} {
+ # Ensure we don't see anymore the key that was stored only to replica and also that we don't get LOADING status
+ assert_equal [$replica GET mykey] ""
+
+ # Ensure we got the new function
+ assert_equal [$replica fcall test 0] "hello2"
+
+ # Make sure amount of keys matches master
+ assert_equal [$replica dbsize] 2010
+ }
+ }
+ }
+ }
+ }
+}
+
+test {diskless loading short read} {
+ start_server {tags {"repl"} overrides {save ""}} {
+ set replica [srv 0 client]
+ set replica_host [srv 0 host]
+ set replica_port [srv 0 port]
+ start_server {overrides {save ""}} {
+ set master [srv 0 client]
+ set master_host [srv 0 host]
+ set master_port [srv 0 port]
+
+ # Set master and replica to use diskless replication
+ $master config set repl-diskless-sync yes
+ $master config set rdbcompression no
+ $replica config set repl-diskless-load swapdb
+ $master config set hz 500
+ $replica config set hz 500
+ $master config set dynamic-hz no
+ $replica config set dynamic-hz no
+ # Try to fill the master with all types of data types / encodings
+ set start [clock clicks -milliseconds]
+
+ # Set a function value to check short read handling on functions
+ r function load {#!lua name=test
+ redis.register_function('test', function() return 'hello1' end)
+ }
+
+ for {set k 0} {$k < 3} {incr k} {
+ for {set i 0} {$i < 10} {incr i} {
+ r set "$k int_$i" [expr {int(rand()*10000)}]
+ r expire "$k int_$i" [expr {int(rand()*10000)}]
+ r set "$k string_$i" [string repeat A [expr {int(rand()*1000000)}]]
+ r hset "$k hash_small" [string repeat A [expr {int(rand()*10)}]] 0[string repeat A [expr {int(rand()*10)}]]
+ r hset "$k hash_large" [string repeat A [expr {int(rand()*10000)}]] [string repeat A [expr {int(rand()*1000000)}]]
+ r sadd "$k set_small" [string repeat A [expr {int(rand()*10)}]]
+ r sadd "$k set_large" [string repeat A [expr {int(rand()*1000000)}]]
+ r zadd "$k zset_small" [expr {rand()}] [string repeat A [expr {int(rand()*10)}]]
+ r zadd "$k zset_large" [expr {rand()}] [string repeat A [expr {int(rand()*1000000)}]]
+ r lpush "$k list_small" [string repeat A [expr {int(rand()*10)}]]
+ r lpush "$k list_large" [string repeat A [expr {int(rand()*1000000)}]]
+ for {set j 0} {$j < 10} {incr j} {
+ r xadd "$k stream" * foo "asdf" bar "1234"
+ }
+ r xgroup create "$k stream" "mygroup_$i" 0
+ r xreadgroup GROUP "mygroup_$i" Alice COUNT 1 STREAMS "$k stream" >
+ }
+ }
+
+ if {$::verbose} {
+ set end [clock clicks -milliseconds]
+ set duration [expr $end - $start]
+ puts "filling took $duration ms (TODO: use pipeline)"
+ set start [clock clicks -milliseconds]
+ }
+
+ # Start the replication process...
+ set loglines [count_log_lines -1]
+ $master config set repl-diskless-sync-delay 0
+ $replica replicaof $master_host $master_port
+
+ # kill the replication at various points
+ set attempts 100
+ if {$::accurate} { set attempts 500 }
+ for {set i 0} {$i < $attempts} {incr i} {
+ # wait for the replica to start reading the rdb
+ # using the log file since the replica only responds to INFO once in 2mb
+ set res [wait_for_log_messages -1 {"*Loading DB in memory*"} $loglines 2000 1]
+ set loglines [lindex $res 1]
+
+ # add some additional random sleep so that we kill the master on a different place each time
+ after [expr {int(rand()*50)}]
+
+ # kill the replica connection on the master
+ set killed [$master client kill type replica]
+
+ set res [wait_for_log_messages -1 {"*Internal error in RDB*" "*Finished with success*" "*Successful partial resynchronization*"} $loglines 500 10]
+ if {$::verbose} { puts $res }
+ set log_text [lindex $res 0]
+ set loglines [lindex $res 1]
+ if {![string match "*Internal error in RDB*" $log_text]} {
+ # force the replica to try another full sync
+ $master multi
+ $master client kill type replica
+ $master set asdf asdf
+ # fill replication backlog with new content
+ $master config set repl-backlog-size 16384
+ for {set keyid 0} {$keyid < 10} {incr keyid} {
+ $master set "$keyid string_$keyid" [string repeat A 16384]
+ }
+ $master exec
+ }
+
+ # wait for loading to stop (fail)
+ # After a loading successfully, next loop will enter `async_loading`
+ wait_for_condition 1000 1 {
+ [s -1 async_loading] eq 0 &&
+ [s -1 loading] eq 0
+ } else {
+ fail "Replica didn't disconnect"
+ }
+ }
+ if {$::verbose} {
+ set end [clock clicks -milliseconds]
+ set duration [expr $end - $start]
+ puts "test took $duration ms"
+ }
+ # enable fast shutdown
+ $master config set rdb-key-save-delay 0
+ }
+ }
+} {} {external:skip}
+
+# get current stime and utime metrics for a thread (since it's creation)
+proc get_cpu_metrics { statfile } {
+ if { [ catch {
+ set fid [ open $statfile r ]
+ set data [ read $fid 1024 ]
+ ::close $fid
+ set data [ split $data ]
+
+ ;## number of jiffies it has been scheduled...
+ set utime [ lindex $data 13 ]
+ set stime [ lindex $data 14 ]
+ } err ] } {
+ error "assertion:can't parse /proc: $err"
+ }
+ set mstime [clock milliseconds]
+ return [ list $mstime $utime $stime ]
+}
+
+# compute %utime and %stime of a thread between two measurements
+proc compute_cpu_usage {start end} {
+ set clock_ticks [exec getconf CLK_TCK]
+ # convert ms time to jiffies and calc delta
+ set dtime [ expr { ([lindex $end 0] - [lindex $start 0]) * double($clock_ticks) / 1000 } ]
+ set utime [ expr { [lindex $end 1] - [lindex $start 1] } ]
+ set stime [ expr { [lindex $end 2] - [lindex $start 2] } ]
+ set pucpu [ expr { ($utime / $dtime) * 100 } ]
+ set pscpu [ expr { ($stime / $dtime) * 100 } ]
+ return [ list $pucpu $pscpu ]
+}
+
+
+# test diskless rdb pipe with multiple replicas, which may drop half way
+start_server {tags {"repl external:skip"} overrides {save ""}} {
+ set master [srv 0 client]
+ $master config set repl-diskless-sync yes
+ $master config set repl-diskless-sync-delay 5
+ $master config set repl-diskless-sync-max-replicas 2
+ set master_host [srv 0 host]
+ set master_port [srv 0 port]
+ set master_pid [srv 0 pid]
+ # put enough data in the db that the rdb file will be bigger than the socket buffers
+ # and since we'll have key-load-delay of 100, 20000 keys will take at least 2 seconds
+ # we also need the replica to process requests during transfer (which it does only once in 2mb)
+ $master debug populate 20000 test 10000
+ $master config set rdbcompression no
+ # If running on Linux, we also measure utime/stime to detect possible I/O handling issues
+ set os [catch {exec uname}]
+ set measure_time [expr {$os == "Linux"} ? 1 : 0]
+ foreach all_drop {no slow fast all timeout} {
+ test "diskless $all_drop replicas drop during rdb pipe" {
+ set replicas {}
+ set replicas_alive {}
+ # start one replica that will read the rdb fast, and one that will be slow
+ start_server {overrides {save ""}} {
+ lappend replicas [srv 0 client]
+ lappend replicas_alive [srv 0 client]
+ start_server {overrides {save ""}} {
+ lappend replicas [srv 0 client]
+ lappend replicas_alive [srv 0 client]
+
+ # start replication
+ # it's enough for just one replica to be slow, and have it's write handler enabled
+ # so that the whole rdb generation process is bound to that
+ set loglines [count_log_lines -2]
+ [lindex $replicas 0] config set repl-diskless-load swapdb
+ [lindex $replicas 0] config set key-load-delay 100 ;# 20k keys and 100 microseconds sleep means at least 2 seconds
+ [lindex $replicas 0] replicaof $master_host $master_port
+ [lindex $replicas 1] replicaof $master_host $master_port
+
+ # wait for the replicas to start reading the rdb
+ # using the log file since the replica only responds to INFO once in 2mb
+ wait_for_log_messages -1 {"*Loading DB in memory*"} 0 1500 10
+
+ if {$measure_time} {
+ set master_statfile "/proc/$master_pid/stat"
+ set master_start_metrics [get_cpu_metrics $master_statfile]
+ set start_time [clock seconds]
+ }
+
+ # wait a while so that the pipe socket writer will be
+ # blocked on write (since replica 0 is slow to read from the socket)
+ after 500
+
+ # add some command to be present in the command stream after the rdb.
+ $master incr $all_drop
+
+ # disconnect replicas depending on the current test
+ if {$all_drop == "all" || $all_drop == "fast"} {
+ exec kill [srv 0 pid]
+ set replicas_alive [lreplace $replicas_alive 1 1]
+ }
+ if {$all_drop == "all" || $all_drop == "slow"} {
+ exec kill [srv -1 pid]
+ set replicas_alive [lreplace $replicas_alive 0 0]
+ }
+ if {$all_drop == "timeout"} {
+ $master config set repl-timeout 2
+ # we want the slow replica to hang on a key for very long so it'll reach repl-timeout
+ pause_process [srv -1 pid]
+ after 2000
+ }
+
+ # wait for rdb child to exit
+ wait_for_condition 500 100 {
+ [s -2 rdb_bgsave_in_progress] == 0
+ } else {
+ fail "rdb child didn't terminate"
+ }
+
+ # make sure we got what we were aiming for, by looking for the message in the log file
+ if {$all_drop == "all"} {
+ wait_for_log_messages -2 {"*Diskless rdb transfer, last replica dropped, killing fork child*"} $loglines 1 1
+ }
+ if {$all_drop == "no"} {
+ wait_for_log_messages -2 {"*Diskless rdb transfer, done reading from pipe, 2 replicas still up*"} $loglines 1 1
+ }
+ if {$all_drop == "slow" || $all_drop == "fast"} {
+ wait_for_log_messages -2 {"*Diskless rdb transfer, done reading from pipe, 1 replicas still up*"} $loglines 1 1
+ }
+ if {$all_drop == "timeout"} {
+ wait_for_log_messages -2 {"*Disconnecting timedout replica (full sync)*"} $loglines 1 1
+ wait_for_log_messages -2 {"*Diskless rdb transfer, done reading from pipe, 1 replicas still up*"} $loglines 1 1
+ # master disconnected the slow replica, remove from array
+ set replicas_alive [lreplace $replicas_alive 0 0]
+ # release it
+ resume_process [srv -1 pid]
+ }
+
+ # make sure we don't have a busy loop going thought epoll_wait
+ if {$measure_time} {
+ set master_end_metrics [get_cpu_metrics $master_statfile]
+ set time_elapsed [expr {[clock seconds]-$start_time}]
+ set master_cpu [compute_cpu_usage $master_start_metrics $master_end_metrics]
+ set master_utime [lindex $master_cpu 0]
+ set master_stime [lindex $master_cpu 1]
+ if {$::verbose} {
+ puts "elapsed: $time_elapsed"
+ puts "master utime: $master_utime"
+ puts "master stime: $master_stime"
+ }
+ if {!$::no_latency && ($all_drop == "all" || $all_drop == "slow" || $all_drop == "timeout")} {
+ assert {$master_utime < 70}
+ assert {$master_stime < 70}
+ }
+ if {!$::no_latency && ($all_drop == "none" || $all_drop == "fast")} {
+ assert {$master_utime < 15}
+ assert {$master_stime < 15}
+ }
+ }
+
+ # verify the data integrity
+ foreach replica $replicas_alive {
+ # Wait that replicas acknowledge they are online so
+ # we are sure that DBSIZE and DEBUG DIGEST will not
+ # fail because of timing issues.
+ wait_for_condition 150 100 {
+ [lindex [$replica role] 3] eq {connected}
+ } else {
+ fail "replicas still not connected after some time"
+ }
+
+ # Make sure that replicas and master have same
+ # number of keys
+ wait_for_condition 50 100 {
+ [$master dbsize] == [$replica dbsize]
+ } else {
+ fail "Different number of keys between master and replicas after too long time."
+ }
+
+ # Check digests
+ set digest [$master debug digest]
+ set digest0 [$replica debug digest]
+ assert {$digest ne 0000000000000000000000000000000000000000}
+ assert {$digest eq $digest0}
+ }
+ }
+ }
+ }
+ }
+}
+
+test "diskless replication child being killed is collected" {
+ # when diskless master is waiting for the replica to become writable
+ # it removes the read event from the rdb pipe so if the child gets killed
+ # the replica will hung. and the master may not collect the pid with waitpid
+ start_server {tags {"repl"} overrides {save ""}} {
+ set master [srv 0 client]
+ set master_host [srv 0 host]
+ set master_port [srv 0 port]
+ set master_pid [srv 0 pid]
+ $master config set repl-diskless-sync yes
+ $master config set repl-diskless-sync-delay 0
+ # put enough data in the db that the rdb file will be bigger than the socket buffers
+ $master debug populate 20000 test 10000
+ $master config set rdbcompression no
+ start_server {overrides {save ""}} {
+ set replica [srv 0 client]
+ set loglines [count_log_lines 0]
+ $replica config set repl-diskless-load swapdb
+ $replica config set key-load-delay 1000000
+ $replica config set loading-process-events-interval-bytes 1024
+ $replica replicaof $master_host $master_port
+
+ # wait for the replicas to start reading the rdb
+ wait_for_log_messages 0 {"*Loading DB in memory*"} $loglines 1500 10
+
+ # wait to be sure the replica is hung and the master is blocked on write
+ after 500
+
+ # simulate the OOM killer or anyone else kills the child
+ set fork_child_pid [get_child_pid -1]
+ exec kill -9 $fork_child_pid
+
+ # wait for the parent to notice the child have exited
+ wait_for_condition 50 100 {
+ [s -1 rdb_bgsave_in_progress] == 0
+ } else {
+ fail "rdb child didn't terminate"
+ }
+
+ # Speed up shutdown
+ $replica config set key-load-delay 0
+ }
+ }
+} {} {external:skip}
+
+foreach mdl {yes no} {
+ test "replication child dies when parent is killed - diskless: $mdl" {
+ # when master is killed, make sure the fork child can detect that and exit
+ start_server {tags {"repl"} overrides {save ""}} {
+ set master [srv 0 client]
+ set master_host [srv 0 host]
+ set master_port [srv 0 port]
+ set master_pid [srv 0 pid]
+ $master config set repl-diskless-sync $mdl
+ $master config set repl-diskless-sync-delay 0
+ # create keys that will take 10 seconds to save
+ $master config set rdb-key-save-delay 1000
+ $master debug populate 10000
+ start_server {overrides {save ""}} {
+ set replica [srv 0 client]
+ $replica replicaof $master_host $master_port
+
+ # wait for rdb child to start
+ wait_for_condition 5000 10 {
+ [s -1 rdb_bgsave_in_progress] == 1
+ } else {
+ fail "rdb child didn't start"
+ }
+ set fork_child_pid [get_child_pid -1]
+
+ # simulate the OOM killer or anyone else kills the parent
+ exec kill -9 $master_pid
+
+ # wait for the child to notice the parent died have exited
+ wait_for_condition 500 10 {
+ [process_is_alive $fork_child_pid] == 0
+ } else {
+ fail "rdb child didn't terminate"
+ }
+ }
+ }
+ } {} {external:skip}
+}
+
+test "diskless replication read pipe cleanup" {
+ # In diskless replication, we create a read pipe for the RDB, between the child and the parent.
+ # When we close this pipe (fd), the read handler also needs to be removed from the event loop (if it still registered).
+ # Otherwise, next time we will use the same fd, the registration will be fail (panic), because
+ # we will use EPOLL_CTL_MOD (the fd still register in the event loop), on fd that already removed from epoll_ctl
+ start_server {tags {"repl"} overrides {save ""}} {
+ set master [srv 0 client]
+ set master_host [srv 0 host]
+ set master_port [srv 0 port]
+ set master_pid [srv 0 pid]
+ $master config set repl-diskless-sync yes
+ $master config set repl-diskless-sync-delay 0
+
+ # put enough data in the db, and slowdown the save, to keep the parent busy at the read process
+ $master config set rdb-key-save-delay 100000
+ $master debug populate 20000 test 10000
+ $master config set rdbcompression no
+ start_server {overrides {save ""}} {
+ set replica [srv 0 client]
+ set loglines [count_log_lines 0]
+ $replica config set repl-diskless-load swapdb
+ $replica replicaof $master_host $master_port
+
+ # wait for the replicas to start reading the rdb
+ wait_for_log_messages 0 {"*Loading DB in memory*"} $loglines 1500 10
+
+ set loglines [count_log_lines -1]
+ # send FLUSHALL so the RDB child will be killed
+ $master flushall
+
+ # wait for another RDB child process to be started
+ wait_for_log_messages -1 {"*Background RDB transfer started by pid*"} $loglines 800 10
+
+ # make sure master is alive
+ $master ping
+ }
+ }
+} {} {external:skip}
+
+test {replicaof right after disconnection} {
+ # this is a rare race condition that was reproduced sporadically by the psync2 unit.
+ # see details in #7205
+ start_server {tags {"repl"} overrides {save ""}} {
+ set replica1 [srv 0 client]
+ set replica1_host [srv 0 host]
+ set replica1_port [srv 0 port]
+ set replica1_log [srv 0 stdout]
+ start_server {overrides {save ""}} {
+ set replica2 [srv 0 client]
+ set replica2_host [srv 0 host]
+ set replica2_port [srv 0 port]
+ set replica2_log [srv 0 stdout]
+ start_server {overrides {save ""}} {
+ set master [srv 0 client]
+ set master_host [srv 0 host]
+ set master_port [srv 0 port]
+ $replica1 replicaof $master_host $master_port
+ $replica2 replicaof $master_host $master_port
+
+ wait_for_condition 50 100 {
+ [string match {*master_link_status:up*} [$replica1 info replication]] &&
+ [string match {*master_link_status:up*} [$replica2 info replication]]
+ } else {
+ fail "Can't turn the instance into a replica"
+ }
+
+ set rd [redis_deferring_client -1]
+ $rd debug sleep 1
+ after 100
+
+ # when replica2 will wake up from the sleep it will find both disconnection
+ # from it's master and also a replicaof command at the same event loop
+ $master client kill type replica
+ $replica2 replicaof $replica1_host $replica1_port
+ $rd read
+
+ wait_for_condition 50 100 {
+ [string match {*master_link_status:up*} [$replica2 info replication]]
+ } else {
+ fail "role change failed."
+ }
+
+ # make sure psync succeeded, and there were no unexpected full syncs.
+ assert_equal [status $master sync_full] 2
+ assert_equal [status $replica1 sync_full] 0
+ assert_equal [status $replica2 sync_full] 0
+ }
+ }
+ }
+} {} {external:skip}
+
+test {Kill rdb child process if its dumping RDB is not useful} {
+ start_server {tags {"repl"}} {
+ set slave1 [srv 0 client]
+ start_server {} {
+ set slave2 [srv 0 client]
+ start_server {} {
+ set master [srv 0 client]
+ set master_host [srv 0 host]
+ set master_port [srv 0 port]
+ for {set i 0} {$i < 10} {incr i} {
+ $master set $i $i
+ }
+ # Generating RDB will cost 10s(10 * 1s)
+ $master config set rdb-key-save-delay 1000000
+ $master config set repl-diskless-sync no
+ $master config set save ""
+
+ $slave1 slaveof $master_host $master_port
+ $slave2 slaveof $master_host $master_port
+
+ # Wait for starting child
+ wait_for_condition 50 100 {
+ ([s 0 rdb_bgsave_in_progress] == 1) &&
+ ([string match "*wait_bgsave*" [s 0 slave0]]) &&
+ ([string match "*wait_bgsave*" [s 0 slave1]])
+ } else {
+ fail "rdb child didn't start"
+ }
+
+ # Slave1 disconnect with master
+ $slave1 slaveof no one
+ # Shouldn't kill child since another slave wait for rdb
+ after 100
+ assert {[s 0 rdb_bgsave_in_progress] == 1}
+
+ # Slave2 disconnect with master
+ $slave2 slaveof no one
+ # Should kill child
+ wait_for_condition 100 10 {
+ [s 0 rdb_bgsave_in_progress] eq 0
+ } else {
+ fail "can't kill rdb child"
+ }
+
+ # If have save parameters, won't kill child
+ $master config set save "900 1"
+ $slave1 slaveof $master_host $master_port
+ $slave2 slaveof $master_host $master_port
+ wait_for_condition 50 100 {
+ ([s 0 rdb_bgsave_in_progress] == 1) &&
+ ([string match "*wait_bgsave*" [s 0 slave0]]) &&
+ ([string match "*wait_bgsave*" [s 0 slave1]])
+ } else {
+ fail "rdb child didn't start"
+ }
+ $slave1 slaveof no one
+ $slave2 slaveof no one
+ after 200
+ assert {[s 0 rdb_bgsave_in_progress] == 1}
+ catch {$master shutdown nosave}
+ }
+ }
+ }
+} {} {external:skip}
+
+start_server {tags {"repl external:skip"}} {
+ set master1_host [srv 0 host]
+ set master1_port [srv 0 port]
+ r set a b
+
+ start_server {} {
+ set master2 [srv 0 client]
+ set master2_host [srv 0 host]
+ set master2_port [srv 0 port]
+ # Take 10s for dumping RDB
+ $master2 debug populate 10 master2 10
+ $master2 config set rdb-key-save-delay 1000000
+
+ start_server {} {
+ set sub_replica [srv 0 client]
+
+ start_server {} {
+ # Full sync with master1
+ r slaveof $master1_host $master1_port
+ wait_for_sync r
+ assert_equal "b" [r get a]
+
+ # Let sub replicas sync with me
+ $sub_replica slaveof [srv 0 host] [srv 0 port]
+ wait_for_sync $sub_replica
+ assert_equal "b" [$sub_replica get a]
+
+ # Full sync with master2, and then kill master2 before finishing dumping RDB
+ r slaveof $master2_host $master2_port
+ wait_for_condition 50 100 {
+ ([s -2 rdb_bgsave_in_progress] == 1) &&
+ ([string match "*wait_bgsave*" [s -2 slave0]])
+ } else {
+ fail "full sync didn't start"
+ }
+ catch {$master2 shutdown nosave}
+
+ test {Don't disconnect with replicas before loading transferred RDB when full sync} {
+ assert ![log_file_matches [srv -1 stdout] "*Connection with master lost*"]
+ # The replication id is not changed in entire replication chain
+ assert_equal [s master_replid] [s -3 master_replid]
+ assert_equal [s master_replid] [s -1 master_replid]
+ }
+
+ test {Discard cache master before loading transferred RDB when full sync} {
+ set full_sync [s -3 sync_full]
+ set partial_sync [s -3 sync_partial_ok]
+ # Partial sync with master1
+ r slaveof $master1_host $master1_port
+ wait_for_sync r
+ # master1 accepts partial sync instead of full sync
+ assert_equal $full_sync [s -3 sync_full]
+ assert_equal [expr $partial_sync+1] [s -3 sync_partial_ok]
+
+ # Since master only partially sync replica, and repl id is not changed,
+ # the replica doesn't disconnect with its sub-replicas
+ assert_equal [s master_replid] [s -3 master_replid]
+ assert_equal [s master_replid] [s -1 master_replid]
+ assert ![log_file_matches [srv -1 stdout] "*Connection with master lost*"]
+ # Sub replica just has one full sync, no partial resync.
+ assert_equal 1 [s sync_full]
+ assert_equal 0 [s sync_partial_ok]
+ }
+ }
+ }
+ }
+}
+
+test {replica can handle EINTR if use diskless load} {
+ start_server {tags {"repl"}} {
+ set replica [srv 0 client]
+ set replica_log [srv 0 stdout]
+ start_server {} {
+ set master [srv 0 client]
+ set master_host [srv 0 host]
+ set master_port [srv 0 port]
+
+ $master debug populate 100 master 100000
+ $master config set rdbcompression no
+ $master config set repl-diskless-sync yes
+ $master config set repl-diskless-sync-delay 0
+ $replica config set repl-diskless-load on-empty-db
+ # Construct EINTR error by using the built in watchdog
+ $replica config set watchdog-period 200
+ # Block replica in read()
+ $master config set rdb-key-save-delay 10000
+ # set speedy shutdown
+ $master config set save ""
+ # Start the replication process...
+ $replica replicaof $master_host $master_port
+
+ # Wait for the replica to start reading the rdb
+ set res [wait_for_log_messages -1 {"*Loading DB in memory*"} 0 200 10]
+ set loglines [lindex $res 1]
+
+ # Wait till we see the watchgod log line AFTER the loading started
+ wait_for_log_messages -1 {"*WATCHDOG TIMER EXPIRED*"} $loglines 200 10
+
+ # Make sure we're still loading, and that there was just one full sync attempt
+ assert ![log_file_matches [srv -1 stdout] "*Reconnecting to MASTER*"]
+ assert_equal 1 [s 0 sync_full]
+ assert_equal 1 [s -1 loading]
+ }
+ }
+} {} {external:skip}
+
+start_server {tags {"repl" "external:skip"}} {
+ test "replica do not write the reply to the replication link - SYNC (_addReplyToBufferOrList)" {
+ set rd [redis_deferring_client]
+ set lines [count_log_lines 0]
+
+ $rd sync
+ $rd ping
+ catch {$rd read} e
+ if {$::verbose} { puts "SYNC _addReplyToBufferOrList: $e" }
+ assert_equal "PONG" [r ping]
+
+ # Check we got the warning logs about the PING command.
+ verify_log_message 0 "*Replica generated a reply to command 'ping', disconnecting it: *" $lines
+
+ $rd close
+ waitForBgsave r
+ }
+
+ test "replica do not write the reply to the replication link - SYNC (addReplyDeferredLen)" {
+ set rd [redis_deferring_client]
+ set lines [count_log_lines 0]
+
+ $rd sync
+ $rd xinfo help
+ catch {$rd read} e
+ if {$::verbose} { puts "SYNC addReplyDeferredLen: $e" }
+ assert_equal "PONG" [r ping]
+
+ # Check we got the warning logs about the XINFO HELP command.
+ verify_log_message 0 "*Replica generated a reply to command 'xinfo|help', disconnecting it: *" $lines
+
+ $rd close
+ waitForBgsave r
+ }
+
+ test "replica do not write the reply to the replication link - PSYNC (_addReplyToBufferOrList)" {
+ set rd [redis_deferring_client]
+ set lines [count_log_lines 0]
+
+ $rd psync replicationid -1
+ assert_match {FULLRESYNC * 0} [$rd read]
+ $rd get foo
+ catch {$rd read} e
+ if {$::verbose} { puts "PSYNC _addReplyToBufferOrList: $e" }
+ assert_equal "PONG" [r ping]
+
+ # Check we got the warning logs about the GET command.
+ verify_log_message 0 "*Replica generated a reply to command 'get', disconnecting it: *" $lines
+ verify_log_message 0 "*== CRITICAL == This master is sending an error to its replica: *" $lines
+ verify_log_message 0 "*Replica can't interact with the keyspace*" $lines
+
+ $rd close
+ waitForBgsave r
+ }
+
+ test "replica do not write the reply to the replication link - PSYNC (addReplyDeferredLen)" {
+ set rd [redis_deferring_client]
+ set lines [count_log_lines 0]
+
+ $rd psync replicationid -1
+ assert_match {FULLRESYNC * 0} [$rd read]
+ $rd slowlog get
+ catch {$rd read} e
+ if {$::verbose} { puts "PSYNC addReplyDeferredLen: $e" }
+ assert_equal "PONG" [r ping]
+
+ # Check we got the warning logs about the SLOWLOG GET command.
+ verify_log_message 0 "*Replica generated a reply to command 'slowlog|get', disconnecting it: *" $lines
+
+ $rd close
+ waitForBgsave r
+ }
+
+ test "PSYNC with wrong offset should throw error" {
+ # It used to accept the FULL SYNC, but also replied with an error.
+ assert_error {ERR value is not an integer or out of range} {r psync replicationid offset_str}
+ set logs [exec tail -n 100 < [srv 0 stdout]]
+ assert_match {*Replica * asks for synchronization but with a wrong offset} $logs
+ assert_equal "PONG" [r ping]
+ }
+}
+
+start_server {tags {"repl external:skip"}} {
+ set master [srv 0 client]
+ set master_host [srv 0 host]
+ set master_port [srv 0 port]
+ $master debug SET-ACTIVE-EXPIRE 0
+ start_server {} {
+ set slave [srv 0 client]
+ $slave debug SET-ACTIVE-EXPIRE 0
+ $slave slaveof $master_host $master_port
+
+ test "Test replication with lazy expire" {
+ # wait for replication to be in sync
+ wait_for_condition 50 100 {
+ [lindex [$slave role] 0] eq {slave} &&
+ [string match {*master_link_status:up*} [$slave info replication]]
+ } else {
+ fail "Can't turn the instance into a replica"
+ }
+
+ $master sadd s foo
+ $master pexpire s 1
+ after 10
+ $master sadd s foo
+ assert_equal 1 [$master wait 1 0]
+
+ assert_equal "set" [$master type s]
+ assert_equal "set" [$slave type s]
+ }
+ }
+}
diff --git a/tests/integration/shutdown.tcl b/tests/integration/shutdown.tcl
new file mode 100644
index 0000000..b2ec32c
--- /dev/null
+++ b/tests/integration/shutdown.tcl
@@ -0,0 +1,234 @@
+# This test suite tests shutdown when there are lagging replicas connected.
+
+# Fill up the OS socket send buffer for the replica connection 1M at a time.
+# When the replication buffer memory increases beyond 2M (often after writing 4M
+# or so), we assume it's because the OS socket send buffer can't swallow
+# anymore.
+proc fill_up_os_socket_send_buffer_for_repl {idx} {
+ set i 0
+ while {1} {
+ incr i
+ populate 1024 junk$i: 1024 $idx
+ after 10
+ set buf_size [s $idx mem_total_replication_buffers]
+ if {$buf_size > 2*1024*1024} {
+ break
+ }
+ }
+}
+
+foreach how {sigterm shutdown} {
+ test "Shutting down master waits for replica to catch up ($how)" {
+ start_server {overrides {save ""}} {
+ start_server {overrides {save ""}} {
+ set master [srv -1 client]
+ set master_host [srv -1 host]
+ set master_port [srv -1 port]
+ set master_pid [srv -1 pid]
+ set replica [srv 0 client]
+ set replica_pid [srv 0 pid]
+
+ # Config master.
+ $master config set shutdown-timeout 300; # 5min for slow CI
+ $master config set repl-backlog-size 1; # small as possible
+ $master config set hz 100; # cron runs every 10ms
+
+ # Config replica.
+ $replica replicaof $master_host $master_port
+ wait_for_sync $replica
+
+ # Preparation: Set k to 1 on both master and replica.
+ $master set k 1
+ wait_for_ofs_sync $master $replica
+
+ # Pause the replica.
+ pause_process $replica_pid
+
+ # Fill up the OS socket send buffer for the replica connection
+ # to prevent the following INCR from reaching the replica via
+ # the OS.
+ fill_up_os_socket_send_buffer_for_repl -1
+
+ # Incr k and immediately shutdown master.
+ $master incr k
+ switch $how {
+ sigterm {
+ exec kill -SIGTERM $master_pid
+ }
+ shutdown {
+ set rd [redis_deferring_client -1]
+ $rd shutdown
+ }
+ }
+ wait_for_condition 50 100 {
+ [s -1 shutdown_in_milliseconds] > 0
+ } else {
+ fail "Master not indicating ongoing shutdown."
+ }
+
+ # Wake up replica and check if master has waited for it.
+ after 20; # 2 cron intervals
+ resume_process $replica_pid
+ wait_for_condition 300 1000 {
+ [$replica get k] eq 2
+ } else {
+ fail "Master exited before replica could catch up."
+ }
+
+ # Check shutdown log messages on master
+ wait_for_log_messages -1 {"*ready to exit, bye bye*"} 0 100 500
+ assert_equal 0 [count_log_message -1 "*Lagging replica*"]
+ verify_log_message -1 "*1 of 1 replicas are in sync*" 0
+ }
+ }
+ } {} {repl external:skip}
+}
+
+test {Shutting down master waits for replica timeout} {
+ start_server {overrides {save ""}} {
+ start_server {overrides {save ""}} {
+ set master [srv -1 client]
+ set master_host [srv -1 host]
+ set master_port [srv -1 port]
+ set master_pid [srv -1 pid]
+ set replica [srv 0 client]
+ set replica_pid [srv 0 pid]
+
+ # Config master.
+ $master config set shutdown-timeout 1; # second
+
+ # Config replica.
+ $replica replicaof $master_host $master_port
+ wait_for_sync $replica
+
+ # Preparation: Set k to 1 on both master and replica.
+ $master set k 1
+ wait_for_ofs_sync $master $replica
+
+ # Pause the replica.
+ pause_process $replica_pid
+
+ # Fill up the OS socket send buffer for the replica connection to
+ # prevent the following INCR k from reaching the replica via the OS.
+ fill_up_os_socket_send_buffer_for_repl -1
+
+ # Incr k and immediately shutdown master.
+ $master incr k
+ exec kill -SIGTERM $master_pid
+ wait_for_condition 50 100 {
+ [s -1 shutdown_in_milliseconds] > 0
+ } else {
+ fail "Master not indicating ongoing shutdown."
+ }
+
+ # Let master finish shutting down and check log.
+ wait_for_log_messages -1 {"*ready to exit, bye bye*"} 0 100 100
+ verify_log_message -1 "*Lagging replica*" 0
+ verify_log_message -1 "*0 of 1 replicas are in sync*" 0
+
+ # Wake up replica.
+ resume_process $replica_pid
+ assert_equal 1 [$replica get k]
+ }
+ }
+} {} {repl external:skip}
+
+test "Shutting down master waits for replica then fails" {
+ start_server {overrides {save ""}} {
+ start_server {overrides {save ""}} {
+ set master [srv -1 client]
+ set master_host [srv -1 host]
+ set master_port [srv -1 port]
+ set master_pid [srv -1 pid]
+ set replica [srv 0 client]
+ set replica_pid [srv 0 pid]
+
+ # Config master and replica.
+ $replica replicaof $master_host $master_port
+ wait_for_sync $replica
+
+ # Pause the replica and write a key on master.
+ pause_process $replica_pid
+ $master incr k
+
+ # Two clients call blocking SHUTDOWN in parallel.
+ set rd1 [redis_deferring_client -1]
+ set rd2 [redis_deferring_client -1]
+ $rd1 shutdown
+ $rd2 shutdown
+ set info_clients [$master info clients]
+ assert_match "*connected_clients:3*" $info_clients
+ assert_match "*blocked_clients:2*" $info_clients
+
+ # Start a very slow initial AOFRW, which will prevent shutdown.
+ $master config set rdb-key-save-delay 30000000; # 30 seconds
+ $master config set appendonly yes
+
+ # Wake up replica, causing master to continue shutting down.
+ resume_process $replica_pid
+
+ # SHUTDOWN returns an error to both clients blocking on SHUTDOWN.
+ catch { $rd1 read } e1
+ catch { $rd2 read } e2
+ assert_match "*Errors trying to SHUTDOWN. Check logs*" $e1
+ assert_match "*Errors trying to SHUTDOWN. Check logs*" $e2
+ $rd1 close
+ $rd2 close
+
+ # Check shutdown log messages on master.
+ verify_log_message -1 "*1 of 1 replicas are in sync*" 0
+ verify_log_message -1 "*Writing initial AOF, can't exit*" 0
+ verify_log_message -1 "*Errors trying to shut down*" 0
+
+ # Let master to exit fast, without waiting for the very slow AOFRW.
+ catch {$master shutdown nosave force}
+ }
+ }
+} {} {repl external:skip}
+
+test "Shutting down master waits for replica then aborted" {
+ start_server {overrides {save ""}} {
+ start_server {overrides {save ""}} {
+ set master [srv -1 client]
+ set master_host [srv -1 host]
+ set master_port [srv -1 port]
+ set master_pid [srv -1 pid]
+ set replica [srv 0 client]
+ set replica_pid [srv 0 pid]
+
+ # Config master and replica.
+ $replica replicaof $master_host $master_port
+ wait_for_sync $replica
+
+ # Pause the replica and write a key on master.
+ pause_process $replica_pid
+ $master incr k
+
+ # Two clients call blocking SHUTDOWN in parallel.
+ set rd1 [redis_deferring_client -1]
+ set rd2 [redis_deferring_client -1]
+ $rd1 shutdown
+ $rd2 shutdown
+ set info_clients [$master info clients]
+ assert_match "*connected_clients:3*" $info_clients
+ assert_match "*blocked_clients:2*" $info_clients
+
+ # Abort the shutdown
+ $master shutdown abort
+
+ # Wake up replica, causing master to continue shutting down.
+ resume_process $replica_pid
+
+ # SHUTDOWN returns an error to both clients blocking on SHUTDOWN.
+ catch { $rd1 read } e1
+ catch { $rd2 read } e2
+ assert_match "*Errors trying to SHUTDOWN. Check logs*" $e1
+ assert_match "*Errors trying to SHUTDOWN. Check logs*" $e2
+ $rd1 close
+ $rd2 close
+
+ # Check shutdown log messages on master.
+ verify_log_message -1 "*Shutdown manually aborted*" 0
+ }
+ }
+} {} {repl external:skip}