diff options
Diffstat (limited to 'tests/integration')
26 files changed, 8248 insertions, 0 deletions
diff --git a/tests/integration/aof-multi-part.tcl b/tests/integration/aof-multi-part.tcl new file mode 100644 index 0000000..74f6b49 --- /dev/null +++ b/tests/integration/aof-multi-part.tcl @@ -0,0 +1,1332 @@ +source tests/support/aofmanifest.tcl +set defaults {appendonly {yes} appendfilename {appendonly.aof} appenddirname {appendonlydir} auto-aof-rewrite-percentage {0}} +set server_path [tmpdir server.multi.aof] +set aof_dirname "appendonlydir" +set aof_basename "appendonly.aof" +set aof_dirpath "$server_path/$aof_dirname" +set aof_base1_file "$server_path/$aof_dirname/${aof_basename}.1$::base_aof_sufix$::aof_format_suffix" +set aof_base2_file "$server_path/$aof_dirname/${aof_basename}.2$::base_aof_sufix$::aof_format_suffix" +set aof_incr1_file "$server_path/$aof_dirname/${aof_basename}.1$::incr_aof_sufix$::aof_format_suffix" +set aof_incr2_file "$server_path/$aof_dirname/${aof_basename}.2$::incr_aof_sufix$::aof_format_suffix" +set aof_incr3_file "$server_path/$aof_dirname/${aof_basename}.3$::incr_aof_sufix$::aof_format_suffix" +set aof_manifest_file "$server_path/$aof_dirname/${aof_basename}$::manifest_suffix" +set aof_old_name_old_path "$server_path/$aof_basename" +set aof_old_name_new_path "$aof_dirpath/$aof_basename" +set aof_old_name_old_path2 "$server_path/${aof_basename}2" +set aof_manifest_file2 "$server_path/$aof_dirname/${aof_basename}2$::manifest_suffix" + +tags {"external:skip"} { + + # Test Part 1 + + # In order to test the loading logic of redis under different combinations of manifest and AOF. + # We will manually construct the manifest file and AOF, and then start redis to verify whether + # the redis behavior is as expected. + + test {Multi Part AOF can't load data when some file missing} { + create_aof $aof_dirpath $aof_base1_file { + append_to_aof [formatCommand set k1 v1] + } + + create_aof $aof_dirpath $aof_incr2_file { + append_to_aof [formatCommand set k2 v2] + } + + create_aof_manifest $aof_dirpath $aof_manifest_file { + append_to_manifest "file appendonly.aof.1.base.aof seq 1 type b\n" + append_to_manifest "file appendonly.aof.1.incr.aof seq 1 type i\n" + append_to_manifest "file appendonly.aof.2.incr.aof seq 2 type i\n" + } + + start_server_aof [list dir $server_path] { + wait_for_condition 100 50 { + ! [is_alive $srv] + } else { + fail "AOF loading didn't fail" + } + + assert_equal 1 [count_message_lines $server_path/stdout "appendonly.aof.1.incr.aof .*No such file or directory"] + } + + clean_aof_persistence $aof_dirpath + } + + test {Multi Part AOF can't load data when the sequence not increase monotonically} { + create_aof $aof_dirpath $aof_incr1_file { + append_to_aof [formatCommand set k1 v1] + } + + create_aof $aof_dirpath $aof_incr2_file { + append_to_aof [formatCommand set k2 v2] + } + + create_aof_manifest $aof_dirpath $aof_manifest_file { + append_to_manifest "file appendonly.aof.2.incr.aof seq 2 type i\n" + append_to_manifest "file appendonly.aof.1.incr.aof seq 1 type i\n" + } + + start_server_aof [list dir $server_path] { + wait_for_condition 100 50 { + ! [is_alive $srv] + } else { + fail "AOF loading didn't fail" + } + + assert_equal 1 [count_message_lines $server_path/stdout "Found a non-monotonic sequence number"] + } + + clean_aof_persistence $aof_dirpath + } + + test {Multi Part AOF can't load data when there are blank lines in the manifest file} { + create_aof $aof_dirpath $aof_incr1_file { + append_to_aof [formatCommand set k1 v1] + } + + create_aof $aof_dirpath $aof_incr3_file { + append_to_aof [formatCommand set k2 v2] + } + + create_aof_manifest $aof_dirpath $aof_manifest_file { + append_to_manifest "file appendonly.aof.1.incr.aof seq 1 type i\n" + append_to_manifest "\n" + append_to_manifest "file appendonly.aof.3.incr.aof seq 3 type i\n" + } + + start_server_aof [list dir $server_path] { + wait_for_condition 100 50 { + ! [is_alive $srv] + } else { + fail "AOF loading didn't fail" + } + + assert_equal 1 [count_message_lines $server_path/stdout "Invalid AOF manifest file format"] + } + + clean_aof_persistence $aof_dirpath + } + + test {Multi Part AOF can't load data when there is a duplicate base file} { + create_aof $aof_dirpath $aof_base1_file { + append_to_aof [formatCommand set k1 v1] + } + + create_aof $aof_dirpath $aof_base2_file { + append_to_aof [formatCommand set k2 v2] + } + + create_aof $aof_dirpath $aof_incr1_file { + append_to_aof [formatCommand set k3 v3] + } + + create_aof_manifest $aof_dirpath $aof_manifest_file { + append_to_manifest "file appendonly.aof.1.base.aof seq 1 type b\n" + append_to_manifest "file appendonly.aof.2.base.aof seq 2 type b\n" + append_to_manifest "file appendonly.aof.1.incr.aof seq 1 type i\n" + } + + start_server_aof [list dir $server_path] { + wait_for_condition 100 50 { + ! [is_alive $srv] + } else { + fail "AOF loading didn't fail" + } + + assert_equal 1 [count_message_lines $server_path/stdout "Found duplicate base file information"] + } + + clean_aof_persistence $aof_dirpath + } + + test {Multi Part AOF can't load data when the manifest format is wrong (type unknown)} { + create_aof $aof_dirpath $aof_base1_file { + append_to_aof [formatCommand set k1 v1] + } + + create_aof $aof_dirpath $aof_incr1_file { + append_to_aof [formatCommand set k3 v3] + } + + create_aof_manifest $aof_dirpath $aof_manifest_file { + append_to_manifest "file appendonly.aof.1.base.aof seq 1 type x\n" + append_to_manifest "file appendonly.aof.1.incr.aof seq 1 type i\n" + } + + start_server_aof [list dir $server_path] { + wait_for_condition 100 50 { + ! [is_alive $srv] + } else { + fail "AOF loading didn't fail" + } + + assert_equal 1 [count_message_lines $server_path/stdout "Unknown AOF file type"] + } + + clean_aof_persistence $aof_dirpath + } + + test {Multi Part AOF can't load data when the manifest format is wrong (missing key)} { + create_aof $aof_dirpath $aof_base1_file { + append_to_aof [formatCommand set k1 v1] + } + + create_aof $aof_dirpath $aof_incr1_file { + append_to_aof [formatCommand set k3 v3] + } + + create_aof_manifest $aof_dirpath $aof_manifest_file { + append_to_manifest "filx appendonly.aof.1.base.aof seq 1 type b\n" + append_to_manifest "file appendonly.aof.1.incr.aof seq 1 type i\n" + } + + start_server_aof [list dir $server_path] { + wait_for_condition 100 50 { + ! [is_alive $srv] + } else { + fail "AOF loading didn't fail" + } + + assert_equal 2 [count_message_lines $server_path/stdout "Invalid AOF manifest file format"] + } + + clean_aof_persistence $aof_dirpath + } + + test {Multi Part AOF can't load data when the manifest format is wrong (line too short)} { + create_aof $aof_dirpath $aof_base1_file { + append_to_aof [formatCommand set k1 v1] + } + + create_aof $aof_dirpath $aof_incr1_file { + append_to_aof [formatCommand set k3 v3] + } + + create_aof_manifest $aof_dirpath $aof_manifest_file { + append_to_manifest "file appendonly.aof.1.base.aof seq 1 type b\n" + append_to_manifest "file appendonly.aof.1.incr.aof type i\n" + } + + start_server_aof [list dir $server_path] { + wait_for_condition 100 50 { + ! [is_alive $srv] + } else { + fail "AOF loading didn't fail" + } + + assert_equal 3 [count_message_lines $server_path/stdout "Invalid AOF manifest file format"] + } + + clean_aof_persistence $aof_dirpath + } + + test {Multi Part AOF can't load data when the manifest format is wrong (line too long)} { + create_aof $aof_dirpath $aof_base1_file { + append_to_aof [formatCommand set k1 v1] + } + + create_aof $aof_dirpath $aof_incr1_file { + append_to_aof [formatCommand set k3 v3] + } + + create_aof_manifest $aof_dirpath $aof_manifest_file { + append_to_manifest "file appendonly.aof.1.base.aof seq 1 type b file appendonly.aof.1.base.aof seq 1 type b file appendonly.aof.1.base.aof seq 1 type b file appendonly.aof.1.base.aof seq 1 type b file appendonly.aof.1.base.aof seq 1 type b file appendonly.aof.1.base.aof seq 1 type b file appendonly.aof.1.base.aof seq 1 type b file appendonly.aof.1.base.aof seq 1 type b file appendonly.aof.1.base.aof seq 1 type b file appendonly.aof.1.base.aof seq 1 type b file appendonly.aof.1.base.aof seq 1 type b file appendonly.aof.1.base.aof seq 1 type b file appendonly.aof.1.base.aof seq 1 type b file appendonly.aof.1.base.aof seq 1 type b file appendonly.aof.1.base.aof seq 1 type b file appendonly.aof.1.base.aof seq 1 type b file appendonly.aof.1.base.aof seq 1 type b file appendonly.aof.1.base.aof seq 1 type b file appendonly.aof.1.base.aof seq 1 type b file appendonly.aof.1.base.aof seq 1 type b file appendonly.aof.1.base.aof seq 1 type b file appendonly.aof.1.base.aof seq 1 type b file appendonly.aof.1.base.aof seq 1 type b file appendonly.aof.1.base.aof seq 1 type b file appendonly.aof.1.base.aof seq 1 type b file appendonly.aof.1.base.aof seq 1 type b file appendonly.aof.1.base.aof seq 1 type b file appendonly.aof.1.base.aof seq 1 type b file appendonly.aof.1.base.aof seq 1 type b file appendonly.aof.1.base.aof seq 1 type b file appendonly.aof.1.base.aof seq 1 type b file appendonly.aof.1.base.aof seq 1 type b file appendonly.aof.1.base.aof seq 1 type b file appendonly.aof.1.base.aof seq 1 type b file appendonly.aof.1.base.aof seq 1 type b file appendonly.aof.1.base.aof seq 1 type b file appendonly.aof.1.base.aof seq 1 type b file appendonly.aof.1.base.aof seq 1 type b file appendonly.aof.1.base.aof seq 1 type b file appendonly.aof.1.base.aof seq 1 type b file appendonly.aof.1.base.aof seq 1 type b file appendonly.aof.1.base.aof seq 1 type b file appendonly.aof.1.base.aof seq 1 type b\n" + append_to_manifest "file appendonly.aof.1.incr.aof seq 1 type i\n" + } + + start_server_aof [list dir $server_path] { + wait_for_condition 100 50 { + ! [is_alive $srv] + } else { + fail "AOF loading didn't fail" + } + + assert_equal 1 [count_message_lines $server_path/stdout "The AOF manifest file contains too long line"] + } + + clean_aof_persistence $aof_dirpath + } + + test {Multi Part AOF can't load data when the manifest format is wrong (odd parameter)} { + create_aof $aof_dirpath $aof_base1_file { + append_to_aof [formatCommand set k1 v1] + } + + create_aof $aof_dirpath $aof_incr1_file { + append_to_aof [formatCommand set k3 v3] + } + + create_aof_manifest $aof_dirpath $aof_manifest_file { + append_to_manifest "file appendonly.aof.1.base.aof seq 1 type b\n" + append_to_manifest "file appendonly.aof.1.incr.aof seq 1 type i newkey\n" + } + + start_server_aof [list dir $server_path] { + wait_for_condition 100 50 { + ! [is_alive $srv] + } else { + fail "AOF loading didn't fail" + } + + assert_equal 4 [count_message_lines $server_path/stdout "Invalid AOF manifest file format"] + } + + clean_aof_persistence $aof_dirpath + } + + test {Multi Part AOF can't load data when the manifest file is empty} { + create_aof_manifest $aof_dirpath $aof_manifest_file { + } + + start_server_aof [list dir $server_path] { + wait_for_condition 100 50 { + ! [is_alive $srv] + } else { + fail "AOF loading didn't fail" + } + + assert_equal 1 [count_message_lines $server_path/stdout "Found an empty AOF manifest"] + } + + clean_aof_persistence $aof_dirpath + } + + test {Multi Part AOF can start when no aof and no manifest} { + start_server_aof [list dir $server_path] { + assert_equal 1 [is_alive $srv] + + set client [redis [dict get $srv host] [dict get $srv port] 0 $::tls] + + assert_equal OK [$client set k1 v1] + assert_equal v1 [$client get k1] + } + + clean_aof_persistence $aof_dirpath + } + + test {Multi Part AOF can start when we have en empty AOF dir} { + create_aof_dir $aof_dirpath + + start_server_aof [list dir $server_path] { + assert_equal 1 [is_alive $srv] + } + } + + test {Multi Part AOF can load data discontinuously increasing sequence} { + create_aof $aof_dirpath $aof_base1_file { + append_to_aof [formatCommand set k1 v1] + } + + create_aof $aof_dirpath $aof_incr1_file { + append_to_aof [formatCommand set k2 v2] + } + + create_aof $aof_dirpath $aof_incr3_file { + append_to_aof [formatCommand set k3 v3] + } + + create_aof_manifest $aof_dirpath $aof_manifest_file { + append_to_manifest "file appendonly.aof.1.base.aof seq 1 type b\n" + append_to_manifest "file appendonly.aof.1.incr.aof seq 1 type i\n" + append_to_manifest "file appendonly.aof.3.incr.aof seq 3 type i\n" + } + + start_server_aof [list dir $server_path] { + assert_equal 1 [is_alive $srv] + set client [redis [dict get $srv host] [dict get $srv port] 0 $::tls] + wait_done_loading $client + + assert_equal v1 [$client get k1] + assert_equal v2 [$client get k2] + assert_equal v3 [$client get k3] + } + + clean_aof_persistence $aof_dirpath + } + + test {Multi Part AOF can load data when manifest add new k-v} { + create_aof $aof_dirpath $aof_base1_file { + append_to_aof [formatCommand set k1 v1] + } + + create_aof $aof_dirpath $aof_incr1_file { + append_to_aof [formatCommand set k2 v2] + } + + create_aof $aof_dirpath $aof_incr3_file { + append_to_aof [formatCommand set k3 v3] + } + + create_aof_manifest $aof_dirpath $aof_manifest_file { + append_to_manifest "file appendonly.aof.1.base.aof seq 1 type b newkey newvalue\n" + append_to_manifest "file appendonly.aof.1.incr.aof seq 1 type i\n" + append_to_manifest "file appendonly.aof.3.incr.aof seq 3 type i\n" + } + + start_server_aof [list dir $server_path] { + assert_equal 1 [is_alive $srv] + set client [redis [dict get $srv host] [dict get $srv port] 0 $::tls] + wait_done_loading $client + + assert_equal v1 [$client get k1] + assert_equal v2 [$client get k2] + assert_equal v3 [$client get k3] + } + + clean_aof_persistence $aof_dirpath + } + + test {Multi Part AOF can load data when some AOFs are empty} { + create_aof $aof_dirpath $aof_base1_file { + append_to_aof [formatCommand set k1 v1] + } + + create_aof $aof_dirpath $aof_incr1_file { + } + + create_aof $aof_dirpath $aof_incr3_file { + append_to_aof [formatCommand set k3 v3] + } + + create_aof_manifest $aof_dirpath $aof_manifest_file { + append_to_manifest "file appendonly.aof.1.base.aof seq 1 type b\n" + append_to_manifest "file appendonly.aof.1.incr.aof seq 1 type i\n" + append_to_manifest "file appendonly.aof.3.incr.aof seq 3 type i\n" + } + + start_server_aof [list dir $server_path] { + assert_equal 1 [is_alive $srv] + set client [redis [dict get $srv host] [dict get $srv port] 0 $::tls] + wait_done_loading $client + + assert_equal v1 [$client get k1] + assert_equal "" [$client get k2] + assert_equal v3 [$client get k3] + } + + clean_aof_persistence $aof_dirpath + } + + test {Multi Part AOF can load data from old version redis (rdb preamble no)} { + create_aof $server_path $aof_old_name_old_path { + append_to_aof [formatCommand set k1 v1] + append_to_aof [formatCommand set k2 v2] + append_to_aof [formatCommand set k3 v3] + } + + start_server_aof [list dir $server_path] { + assert_equal 1 [is_alive $srv] + + set client [redis [dict get $srv host] [dict get $srv port] 0 $::tls] + wait_done_loading $client + + assert_equal v1 [$client get k1] + assert_equal v2 [$client get k2] + assert_equal v3 [$client get k3] + + assert_equal 0 [check_file_exist $server_path $aof_basename] + assert_equal 1 [check_file_exist $aof_dirpath $aof_basename] + + assert_aof_manifest_content $aof_manifest_file { + {file appendonly.aof seq 1 type b} + {file appendonly.aof.1.incr.aof seq 1 type i} + } + + assert_equal OK [$client set k4 v4] + + $client bgrewriteaof + waitForBgrewriteaof $client + + assert_equal OK [$client set k5 v5] + + assert_aof_manifest_content $aof_manifest_file { + {file appendonly.aof.2.base.rdb seq 2 type b} + {file appendonly.aof.2.incr.aof seq 2 type i} + } + + set d1 [$client debug digest] + $client debug loadaof + set d2 [$client debug digest] + assert {$d1 eq $d2} + } + + clean_aof_persistence $aof_dirpath + } + + test {Multi Part AOF can load data from old version redis (rdb preamble yes)} { + exec cp tests/assets/rdb-preamble.aof $aof_old_name_old_path + start_server_aof [list dir $server_path] { + assert_equal 1 [is_alive $srv] + + set client [redis [dict get $srv host] [dict get $srv port] 0 $::tls] + wait_done_loading $client + + # k1 k2 in rdb header and k3 in AOF tail + assert_equal v1 [$client get k1] + assert_equal v2 [$client get k2] + assert_equal v3 [$client get k3] + + assert_equal 0 [check_file_exist $server_path $aof_basename] + assert_equal 1 [check_file_exist $aof_dirpath $aof_basename] + + assert_aof_manifest_content $aof_manifest_file { + {file appendonly.aof seq 1 type b} + {file appendonly.aof.1.incr.aof seq 1 type i} + } + + assert_equal OK [$client set k4 v4] + + $client bgrewriteaof + waitForBgrewriteaof $client + + assert_equal OK [$client set k5 v5] + + assert_aof_manifest_content $aof_manifest_file { + {file appendonly.aof.2.base.rdb seq 2 type b} + {file appendonly.aof.2.incr.aof seq 2 type i} + } + + set d1 [$client debug digest] + $client debug loadaof + set d2 [$client debug digest] + assert {$d1 eq $d2} + } + + clean_aof_persistence $aof_dirpath + } + + test {Multi Part AOF can continue the upgrade from the interrupted upgrade state} { + create_aof $server_path $aof_old_name_old_path { + append_to_aof [formatCommand set k1 v1] + append_to_aof [formatCommand set k2 v2] + append_to_aof [formatCommand set k3 v3] + } + + # Create a layout of an interrupted upgrade (interrupted before the rename). + create_aof_manifest $aof_dirpath $aof_manifest_file { + append_to_manifest "file appendonly.aof seq 1 type b\n" + } + + start_server_aof [list dir $server_path] { + assert_equal 1 [is_alive $srv] + + set client [redis [dict get $srv host] [dict get $srv port] 0 $::tls] + wait_done_loading $client + + assert_equal v1 [$client get k1] + assert_equal v2 [$client get k2] + assert_equal v3 [$client get k3] + + assert_equal 0 [check_file_exist $server_path $aof_basename] + assert_equal 1 [check_file_exist $aof_dirpath $aof_basename] + + assert_aof_manifest_content $aof_manifest_file { + {file appendonly.aof seq 1 type b} + {file appendonly.aof.1.incr.aof seq 1 type i} + } + } + + clean_aof_persistence $aof_dirpath + } + + test {Multi Part AOF can be loaded correctly when both server dir and aof dir contain old AOF} { + create_aof $server_path $aof_old_name_old_path { + append_to_aof [formatCommand set k1 v1] + append_to_aof [formatCommand set k2 v2] + append_to_aof [formatCommand set k3 v3] + } + + create_aof_manifest $aof_dirpath $aof_manifest_file { + append_to_manifest "file appendonly.aof seq 1 type b\n" + } + + create_aof $aof_dirpath $aof_old_name_new_path { + append_to_aof [formatCommand set k4 v4] + append_to_aof [formatCommand set k5 v5] + append_to_aof [formatCommand set k6 v6] + } + + start_server_aof [list dir $server_path] { + assert_equal 1 [is_alive $srv] + + set client [redis [dict get $srv host] [dict get $srv port] 0 $::tls] + wait_done_loading $client + + assert_equal 0 [$client exists k1] + assert_equal 0 [$client exists k2] + assert_equal 0 [$client exists k3] + + assert_equal v4 [$client get k4] + assert_equal v5 [$client get k5] + assert_equal v6 [$client get k6] + + assert_equal 1 [check_file_exist $server_path $aof_basename] + assert_equal 1 [check_file_exist $aof_dirpath $aof_basename] + + assert_aof_manifest_content $aof_manifest_file { + {file appendonly.aof seq 1 type b} + {file appendonly.aof.1.incr.aof seq 1 type i} + } + } + + clean_aof_persistence $aof_dirpath + catch {exec rm -rf $aof_old_name_old_path} + } + + test {Multi Part AOF can't load data when the manifest contains the old AOF file name but the file does not exist in server dir and aof dir} { + create_aof_manifest $aof_dirpath $aof_manifest_file { + append_to_manifest "file appendonly.aof seq 1 type b\n" + } + + start_server_aof [list dir $server_path] { + wait_for_condition 100 50 { + ! [is_alive $srv] + } else { + fail "AOF loading didn't fail" + } + + assert_equal 1 [count_message_lines $server_path/stdout "appendonly.aof .*No such file or directory"] + } + + clean_aof_persistence $aof_dirpath + } + + test {Multi Part AOF can upgrade when when two redis share the same server dir} { + create_aof $server_path $aof_old_name_old_path { + append_to_aof [formatCommand set k1 v1] + append_to_aof [formatCommand set k2 v2] + append_to_aof [formatCommand set k3 v3] + } + + create_aof $server_path $aof_old_name_old_path2 { + append_to_aof [formatCommand set k4 v4] + append_to_aof [formatCommand set k5 v5] + append_to_aof [formatCommand set k6 v6] + } + + start_server_aof [list dir $server_path] { + set redis1 [redis [dict get $srv host] [dict get $srv port] 0 $::tls] + + start_server [list overrides [list dir $server_path appendonly yes appendfilename appendonly.aof2]] { + set redis2 [redis [srv host] [srv port] 0 $::tls] + + test "Multi Part AOF can upgrade when when two redis share the same server dir (redis1)" { + wait_done_loading $redis1 + assert_equal v1 [$redis1 get k1] + assert_equal v2 [$redis1 get k2] + assert_equal v3 [$redis1 get k3] + + assert_equal 0 [$redis1 exists k4] + assert_equal 0 [$redis1 exists k5] + assert_equal 0 [$redis1 exists k6] + + assert_aof_manifest_content $aof_manifest_file { + {file appendonly.aof seq 1 type b} + {file appendonly.aof.1.incr.aof seq 1 type i} + } + + $redis1 bgrewriteaof + waitForBgrewriteaof $redis1 + + assert_equal OK [$redis1 set k v] + + assert_aof_manifest_content $aof_manifest_file { + {file appendonly.aof.2.base.rdb seq 2 type b} + {file appendonly.aof.2.incr.aof seq 2 type i} + } + + set d1 [$redis1 debug digest] + $redis1 debug loadaof + set d2 [$redis1 debug digest] + assert {$d1 eq $d2} + } + + test "Multi Part AOF can upgrade when when two redis share the same server dir (redis2)" { + wait_done_loading $redis2 + + assert_equal 0 [$redis2 exists k1] + assert_equal 0 [$redis2 exists k2] + assert_equal 0 [$redis2 exists k3] + + assert_equal v4 [$redis2 get k4] + assert_equal v5 [$redis2 get k5] + assert_equal v6 [$redis2 get k6] + + assert_aof_manifest_content $aof_manifest_file2 { + {file appendonly.aof2 seq 1 type b} + {file appendonly.aof2.1.incr.aof seq 1 type i} + } + + $redis2 bgrewriteaof + waitForBgrewriteaof $redis2 + + assert_equal OK [$redis2 set k v] + + assert_aof_manifest_content $aof_manifest_file2 { + {file appendonly.aof2.2.base.rdb seq 2 type b} + {file appendonly.aof2.2.incr.aof seq 2 type i} + } + + set d1 [$redis2 debug digest] + $redis2 debug loadaof + set d2 [$redis2 debug digest] + assert {$d1 eq $d2} + } + } + } + } + + test {Multi Part AOF can handle appendfilename contains whitespaces} { + start_server [list overrides [list appendonly yes appendfilename "\" file seq \\n\\n.aof \""]] { + set dir [get_redis_dir] + set aof_manifest_name [format "%s/%s/%s%s" $dir "appendonlydir" " file seq \n\n.aof " $::manifest_suffix] + set redis [redis [srv host] [srv port] 0 $::tls] + + assert_equal OK [$redis set k1 v1] + + $redis bgrewriteaof + waitForBgrewriteaof $redis + + assert_aof_manifest_content $aof_manifest_name { + {file " file seq \n\n.aof .2.base.rdb" seq 2 type b} + {file " file seq \n\n.aof .2.incr.aof" seq 2 type i} + } + + set d1 [$redis debug digest] + $redis debug loadaof + set d2 [$redis debug digest] + assert {$d1 eq $d2} + } + + clean_aof_persistence $aof_dirpath + } + + test {Multi Part AOF can create BASE (RDB format) when redis starts from empty} { + start_server_aof [list dir $server_path] { + set client [redis [dict get $srv host] [dict get $srv port] 0 $::tls] + wait_done_loading $client + + assert_equal 1 [check_file_exist $aof_dirpath "${aof_basename}.1${::base_aof_sufix}${::rdb_format_suffix}"] + + assert_aof_manifest_content $aof_manifest_file { + {file appendonly.aof.1.base.rdb seq 1 type b} + {file appendonly.aof.1.incr.aof seq 1 type i} + } + + $client set foo behavior + + set d1 [$client debug digest] + $client debug loadaof + set d2 [$client debug digest] + assert {$d1 eq $d2} + } + + clean_aof_persistence $aof_dirpath + } + + test {Multi Part AOF can create BASE (AOF format) when redis starts from empty} { + start_server_aof [list dir $server_path aof-use-rdb-preamble no] { + set client [redis [dict get $srv host] [dict get $srv port] 0 $::tls] + wait_done_loading $client + + assert_equal 1 [check_file_exist $aof_dirpath "${aof_basename}.1${::base_aof_sufix}${::aof_format_suffix}"] + + assert_aof_manifest_content $aof_manifest_file { + {file appendonly.aof.1.base.aof seq 1 type b} + {file appendonly.aof.1.incr.aof seq 1 type i} + } + + $client set foo behavior + + set d1 [$client debug digest] + $client debug loadaof + set d2 [$client debug digest] + assert {$d1 eq $d2} + } + + clean_aof_persistence $aof_dirpath + } + + # Test Part 2 + # + # To test whether the AOFRW behaves as expected during the redis run. + # We will start redis first, then perform pressure writing, enable and disable AOF, and manually + # and automatically run bgrewrite and other actions, to test whether the correct AOF file is created, + # whether the correct manifest is generated, whether the data can be reload correctly under continuous + # writing pressure, etc. + + + start_server {tags {"Multi Part AOF"} overrides {aof-use-rdb-preamble {yes} appendonly {no}}} { + set dir [get_redis_dir] + set aof_basename "appendonly.aof" + set aof_dirname "appendonlydir" + set aof_dirpath "$dir/$aof_dirname" + set aof_manifest_name "$aof_basename$::manifest_suffix" + set aof_manifest_file "$dir/$aof_dirname/$aof_manifest_name" + + set master [srv 0 client] + set master_host [srv 0 host] + set master_port [srv 0 port] + + catch {exec rm -rf $aof_manifest_file} + + test "Make sure aof manifest $aof_manifest_name not in aof directory" { + assert_equal 0 [file exists $aof_manifest_file] + } + + test "AOF enable will create manifest file" { + r config set appendonly yes ; # Will create manifest and new INCR aof + r config set auto-aof-rewrite-percentage 0 ; # Disable auto-rewrite. + waitForBgrewriteaof r + + # Start write load + set load_handle0 [start_write_load $master_host $master_port 10] + + wait_for_condition 50 100 { + [r dbsize] > 0 + } else { + fail "No write load detected." + } + + # First AOFRW done + assert_aof_manifest_content $aof_manifest_file { + {file appendonly.aof.1.base.rdb seq 1 type b} + {file appendonly.aof.1.incr.aof seq 1 type i} + } + + # Check we really have these files + assert_equal 1 [check_file_exist $aof_dirpath $aof_manifest_name] + assert_equal 1 [check_file_exist $aof_dirpath "${aof_basename}.1${::base_aof_sufix}${::rdb_format_suffix}"] + assert_equal 1 [check_file_exist $aof_dirpath "${aof_basename}.1${::incr_aof_sufix}${::aof_format_suffix}"] + + r bgrewriteaof + waitForBgrewriteaof r + + # The second AOFRW done + assert_aof_manifest_content $aof_manifest_file { + {file appendonly.aof.2.base.rdb seq 2 type b} + {file appendonly.aof.2.incr.aof seq 2 type i} + } + + assert_equal 1 [check_file_exist $aof_dirpath $aof_manifest_name] + # Wait bio delete history + wait_for_condition 1000 10 { + [check_file_exist $aof_dirpath "${aof_basename}.1${::base_aof_sufix}${::rdb_format_suffix}"] == 0 && + [check_file_exist $aof_dirpath "${aof_basename}.1${::incr_aof_sufix}${::aof_format_suffix}"] == 0 + } else { + fail "Failed to delete history AOF" + } + assert_equal 1 [check_file_exist $aof_dirpath "${aof_basename}.2${::base_aof_sufix}${::rdb_format_suffix}"] + assert_equal 1 [check_file_exist $aof_dirpath "${aof_basename}.2${::incr_aof_sufix}${::aof_format_suffix}"] + + stop_write_load $load_handle0 + wait_load_handlers_disconnected + + set d1 [r debug digest] + r debug loadaof + set d2 [r debug digest] + assert {$d1 eq $d2} + } + + test "AOF multiple rewrite failures will open multiple INCR AOFs" { + # Start write load + r config set rdb-key-save-delay 10000000 + + set orig_size [r dbsize] + set load_handle0 [start_write_load $master_host $master_port 10] + + wait_for_condition 50 100 { + [r dbsize] > $orig_size + } else { + fail "No write load detected." + } + + # Let AOFRW fail three times + r bgrewriteaof + set pid1 [get_child_pid 0] + catch {exec kill -9 $pid1} + waitForBgrewriteaof r + + r bgrewriteaof + set pid2 [get_child_pid 0] + catch {exec kill -9 $pid2} + waitForBgrewriteaof r + + r bgrewriteaof + set pid3 [get_child_pid 0] + catch {exec kill -9 $pid3} + waitForBgrewriteaof r + + assert_equal 0 [check_file_exist $dir "temp-rewriteaof-bg-$pid1.aof"] + assert_equal 0 [check_file_exist $dir "temp-rewriteaof-bg-$pid2.aof"] + assert_equal 0 [check_file_exist $dir "temp-rewriteaof-bg-$pid3.aof"] + + # We will have four INCR AOFs + assert_aof_manifest_content $aof_manifest_file { + {file appendonly.aof.2.base.rdb seq 2 type b} + {file appendonly.aof.2.incr.aof seq 2 type i} + {file appendonly.aof.3.incr.aof seq 3 type i} + {file appendonly.aof.4.incr.aof seq 4 type i} + {file appendonly.aof.5.incr.aof seq 5 type i} + } + + assert_equal 1 [check_file_exist $aof_dirpath "${aof_basename}.2${::base_aof_sufix}${::rdb_format_suffix}"] + assert_equal 1 [check_file_exist $aof_dirpath "${aof_basename}.2${::incr_aof_sufix}${::aof_format_suffix}"] + assert_equal 1 [check_file_exist $aof_dirpath "${aof_basename}.3${::incr_aof_sufix}${::aof_format_suffix}"] + assert_equal 1 [check_file_exist $aof_dirpath "${aof_basename}.4${::incr_aof_sufix}${::aof_format_suffix}"] + assert_equal 1 [check_file_exist $aof_dirpath "${aof_basename}.5${::incr_aof_sufix}${::aof_format_suffix}"] + + stop_write_load $load_handle0 + wait_load_handlers_disconnected + + set d1 [r debug digest] + r debug loadaof + set d2 [r debug digest] + assert {$d1 eq $d2} + + r config set rdb-key-save-delay 0 + catch {exec kill -9 [get_child_pid 0]} + wait_for_condition 1000 10 { + [s rdb_bgsave_in_progress] eq 0 + } else { + fail "bgsave did not stop in time" + } + + # AOFRW success + r bgrewriteaof + waitForBgrewriteaof r + + # All previous INCR AOFs have become history + # and have be deleted + assert_aof_manifest_content $aof_manifest_file { + {file appendonly.aof.3.base.rdb seq 3 type b} + {file appendonly.aof.6.incr.aof seq 6 type i} + } + + # Wait bio delete history + wait_for_condition 1000 10 { + [check_file_exist $aof_dirpath "${aof_basename}.2${::base_aof_sufix}${::rdb_format_suffix}"] == 0 && + [check_file_exist $aof_dirpath "${aof_basename}.2${::incr_aof_sufix}${::aof_format_suffix}"] == 0 && + [check_file_exist $aof_dirpath "${aof_basename}.3${::incr_aof_sufix}${::aof_format_suffix}"] == 0 && + [check_file_exist $aof_dirpath "${aof_basename}.4${::incr_aof_sufix}${::aof_format_suffix}"] == 0 && + [check_file_exist $aof_dirpath "${aof_basename}.5${::incr_aof_sufix}${::aof_format_suffix}"] == 0 + } else { + fail "Failed to delete history AOF" + } + + assert_equal 1 [check_file_exist $aof_dirpath "${aof_basename}.3${::base_aof_sufix}${::rdb_format_suffix}"] + assert_equal 1 [check_file_exist $aof_dirpath "${aof_basename}.6${::incr_aof_sufix}${::aof_format_suffix}"] + + set d1 [r debug digest] + r debug loadaof + set d2 [r debug digest] + assert {$d1 eq $d2} + } + + test "AOF rewrite doesn't open new aof when AOF turn off" { + r config set appendonly no + + r bgrewriteaof + waitForBgrewriteaof r + + # We only have BASE AOF, no INCR AOF + assert_aof_manifest_content $aof_manifest_file { + {file appendonly.aof.4.base.rdb seq 4 type b} + } + + assert_equal 1 [check_file_exist $aof_dirpath "${aof_basename}.4${::base_aof_sufix}${::rdb_format_suffix}"] + wait_for_condition 1000 10 { + [check_file_exist $aof_dirpath "${aof_basename}.6${::incr_aof_sufix}${::aof_format_suffix}"] == 0 && + [check_file_exist $aof_dirpath "${aof_basename}.7${::incr_aof_sufix}${::aof_format_suffix}"] == 0 + } else { + fail "Failed to delete history AOF" + } + + set d1 [r debug digest] + r debug loadaof + set d2 [r debug digest] + assert {$d1 eq $d2} + + # Turn on AOF again + r config set appendonly yes + waitForBgrewriteaof r + + # A new INCR AOF was created + assert_aof_manifest_content $aof_manifest_file { + {file appendonly.aof.5.base.rdb seq 5 type b} + {file appendonly.aof.1.incr.aof seq 1 type i} + } + + # Wait bio delete history + wait_for_condition 1000 10 { + [check_file_exist $aof_dirpath "${aof_basename}.4${::base_aof_sufix}${::rdb_format_suffix}"] == 0 + } else { + fail "Failed to delete history AOF" + } + + assert_equal 1 [check_file_exist $aof_dirpath "${aof_basename}.5${::base_aof_sufix}${::rdb_format_suffix}"] + assert_equal 1 [check_file_exist $aof_dirpath "${aof_basename}.1${::incr_aof_sufix}${::aof_format_suffix}"] + } + + test "AOF enable/disable auto gc" { + r config set aof-disable-auto-gc yes + + r bgrewriteaof + waitForBgrewriteaof r + + r bgrewriteaof + waitForBgrewriteaof r + + # We can see four history AOFs (Evolved from two BASE and two INCR) + assert_aof_manifest_content $aof_manifest_file { + {file appendonly.aof.7.base.rdb seq 7 type b} + {file appendonly.aof.2.incr.aof seq 2 type h} + {file appendonly.aof.6.base.rdb seq 6 type h} + {file appendonly.aof.1.incr.aof seq 1 type h} + {file appendonly.aof.5.base.rdb seq 5 type h} + {file appendonly.aof.3.incr.aof seq 3 type i} + } + + assert_equal 1 [check_file_exist $aof_dirpath "${aof_basename}.5${::base_aof_sufix}${::rdb_format_suffix}"] + assert_equal 1 [check_file_exist $aof_dirpath "${aof_basename}.6${::base_aof_sufix}${::rdb_format_suffix}"] + assert_equal 1 [check_file_exist $aof_dirpath "${aof_basename}.1${::incr_aof_sufix}${::aof_format_suffix}"] + assert_equal 1 [check_file_exist $aof_dirpath "${aof_basename}.2${::incr_aof_sufix}${::aof_format_suffix}"] + + r config set aof-disable-auto-gc no + + # Auto gc success + assert_aof_manifest_content $aof_manifest_file { + {file appendonly.aof.7.base.rdb seq 7 type b} + {file appendonly.aof.3.incr.aof seq 3 type i} + } + + # wait bio delete history + wait_for_condition 1000 10 { + [check_file_exist $aof_dirpath "${aof_basename}.5${::base_aof_sufix}${::rdb_format_suffix}"] == 0 && + [check_file_exist $aof_dirpath "${aof_basename}.6${::base_aof_sufix}${::rdb_format_suffix}"] == 0 && + [check_file_exist $aof_dirpath "${aof_basename}.1${::incr_aof_sufix}${::aof_format_suffix}"] == 0 && + [check_file_exist $aof_dirpath "${aof_basename}.2${::incr_aof_sufix}${::aof_format_suffix}"] == 0 + } else { + fail "Failed to delete history AOF" + } + } + + test "AOF can produce consecutive sequence number after reload" { + # Current manifest, BASE seq 7 and INCR seq 3 + assert_aof_manifest_content $aof_manifest_file { + {file appendonly.aof.7.base.rdb seq 7 type b} + {file appendonly.aof.3.incr.aof seq 3 type i} + } + + r debug loadaof + + # Trigger AOFRW + r bgrewriteaof + waitForBgrewriteaof r + + # Now BASE seq is 8 and INCR seq is 4 + assert_aof_manifest_content $aof_manifest_file { + {file appendonly.aof.8.base.rdb seq 8 type b} + {file appendonly.aof.4.incr.aof seq 4 type i} + } + } + + test "AOF enable during BGSAVE will not write data util AOFRW finish" { + r config set appendonly no + r config set save "" + r config set rdb-key-save-delay 10000000 + + r set k1 v1 + r bgsave + + wait_for_condition 1000 10 { + [s rdb_bgsave_in_progress] eq 1 + } else { + fail "bgsave did not start in time" + } + + # Make server.aof_rewrite_scheduled = 1 + r config set appendonly yes + assert_equal [s aof_rewrite_scheduled] 1 + + # Not open new INCR aof + assert_aof_manifest_content $aof_manifest_file { + {file appendonly.aof.8.base.rdb seq 8 type b} + {file appendonly.aof.4.incr.aof seq 4 type i} + } + + r set k2 v2 + r debug loadaof + + # Both k1 and k2 lost + assert_equal 0 [r exists k1] + assert_equal 0 [r exists k2] + + set total_forks [s total_forks] + assert_equal [s rdb_bgsave_in_progress] 1 + r config set rdb-key-save-delay 0 + catch {exec kill -9 [get_child_pid 0]} + wait_for_condition 1000 10 { + [s rdb_bgsave_in_progress] eq 0 + } else { + fail "bgsave did not stop in time" + } + + # Make sure AOFRW was scheduled + wait_for_condition 1000 10 { + [s total_forks] == [expr $total_forks + 1] + } else { + fail "aof rewrite did not scheduled" + } + waitForBgrewriteaof r + + assert_aof_manifest_content $aof_manifest_file { + {file appendonly.aof.9.base.rdb seq 9 type b} + {file appendonly.aof.5.incr.aof seq 5 type i} + } + + r set k3 v3 + r debug loadaof + assert_equal v3 [r get k3] + } + + test "AOF will trigger limit when AOFRW fails many times" { + # Clear all data and trigger a successful AOFRW, so we can let + # server.aof_current_size equal to 0 + r flushall + r bgrewriteaof + waitForBgrewriteaof r + + r config set rdb-key-save-delay 10000000 + # Let us trigger AOFRW easily + r config set auto-aof-rewrite-percentage 1 + r config set auto-aof-rewrite-min-size 1kb + + # Set a key so that AOFRW can be delayed + r set k v + + # Let AOFRW fail 3 times, this will trigger AOFRW limit + r bgrewriteaof + catch {exec kill -9 [get_child_pid 0]} + waitForBgrewriteaof r + + r bgrewriteaof + catch {exec kill -9 [get_child_pid 0]} + waitForBgrewriteaof r + + r bgrewriteaof + catch {exec kill -9 [get_child_pid 0]} + waitForBgrewriteaof r + + assert_aof_manifest_content $aof_manifest_file { + {file appendonly.aof.10.base.rdb seq 10 type b} + {file appendonly.aof.6.incr.aof seq 6 type i} + {file appendonly.aof.7.incr.aof seq 7 type i} + {file appendonly.aof.8.incr.aof seq 8 type i} + {file appendonly.aof.9.incr.aof seq 9 type i} + } + + # Write 1KB data to trigger AOFRW + r set x [string repeat x 1024] + + # Make sure we have limit log + wait_for_condition 1000 50 { + [count_log_message 0 "triggered the limit"] == 1 + } else { + fail "aof rewrite did not trigger limit" + } + assert_equal [status r aof_rewrite_in_progress] 0 + + # No new INCR AOF be created + assert_aof_manifest_content $aof_manifest_file { + {file appendonly.aof.10.base.rdb seq 10 type b} + {file appendonly.aof.6.incr.aof seq 6 type i} + {file appendonly.aof.7.incr.aof seq 7 type i} + {file appendonly.aof.8.incr.aof seq 8 type i} + {file appendonly.aof.9.incr.aof seq 9 type i} + } + + # Turn off auto rewrite + r config set auto-aof-rewrite-percentage 0 + r config set rdb-key-save-delay 0 + catch {exec kill -9 [get_child_pid 0]} + wait_for_condition 1000 10 { + [s aof_rewrite_in_progress] eq 0 + } else { + fail "aof rewrite did not stop in time" + } + + # We can still manually execute AOFRW immediately + r bgrewriteaof + waitForBgrewriteaof r + + # Can create New INCR AOF + assert_equal 1 [check_file_exist $aof_dirpath "${aof_basename}.10${::incr_aof_sufix}${::aof_format_suffix}"] + + assert_aof_manifest_content $aof_manifest_file { + {file appendonly.aof.11.base.rdb seq 11 type b} + {file appendonly.aof.10.incr.aof seq 10 type i} + } + + set d1 [r debug digest] + r debug loadaof + set d2 [r debug digest] + assert {$d1 eq $d2} + } + + start_server {overrides {aof-use-rdb-preamble {yes} appendonly {no}}} { + set dir [get_redis_dir] + set aof_basename "appendonly.aof" + set aof_dirname "appendonlydir" + set aof_dirpath "$dir/$aof_dirname" + set aof_manifest_name "$aof_basename$::manifest_suffix" + set aof_manifest_file "$dir/$aof_dirname/$aof_manifest_name" + + set master [srv 0 client] + set master_host [srv 0 host] + set master_port [srv 0 port] + + test "AOF will open a temporary INCR AOF to accumulate data until the first AOFRW success when AOF is dynamically enabled" { + r config set save "" + # Increase AOFRW execution time to give us enough time to kill it + r config set rdb-key-save-delay 10000000 + + # Start write load + set load_handle0 [start_write_load $master_host $master_port 10] + + wait_for_condition 50 100 { + [r dbsize] > 0 + } else { + fail "No write load detected." + } + + # Enable AOF will trigger an initialized AOFRW + r config set appendonly yes + # Let AOFRW fail + assert_equal 1 [s aof_rewrite_in_progress] + set pid1 [get_child_pid 0] + catch {exec kill -9 $pid1} + + # Wait for AOFRW to exit and delete temp incr aof + wait_for_condition 1000 100 { + [count_log_message 0 "Removing the temp incr aof file"] == 1 + } else { + fail "temp aof did not delete" + } + + # Make sure manifest file is not created + assert_equal 0 [check_file_exist $aof_dirpath $aof_manifest_name] + # Make sure BASE AOF is not created + assert_equal 0 [check_file_exist $aof_dirpath "${aof_basename}.1${::base_aof_sufix}${::rdb_format_suffix}"] + + # Make sure the next AOFRW has started + wait_for_condition 1000 50 { + [s aof_rewrite_in_progress] == 1 + } else { + fail "aof rewrite did not scheduled" + } + + # Do a successful AOFRW + set total_forks [s total_forks] + r config set rdb-key-save-delay 0 + catch {exec kill -9 [get_child_pid 0]} + + # Make sure the next AOFRW has started + wait_for_condition 1000 10 { + [s total_forks] == [expr $total_forks + 1] + } else { + fail "aof rewrite did not scheduled" + } + waitForBgrewriteaof r + + assert_equal 2 [count_log_message 0 "Removing the temp incr aof file"] + + # BASE and INCR AOF are successfully created + assert_aof_manifest_content $aof_manifest_file { + {file appendonly.aof.1.base.rdb seq 1 type b} + {file appendonly.aof.1.incr.aof seq 1 type i} + } + + stop_write_load $load_handle0 + wait_load_handlers_disconnected + + set d1 [r debug digest] + r debug loadaof + set d2 [r debug digest] + assert {$d1 eq $d2} + + # Dynamic disable AOF again + r config set appendonly no + + # Disabling AOF does not delete previous AOF files + r debug loadaof + set d2 [r debug digest] + assert {$d1 eq $d2} + + assert_equal 0 [s rdb_changes_since_last_save] + r config set rdb-key-save-delay 10000000 + set load_handle0 [start_write_load $master_host $master_port 10] + wait_for_condition 50 100 { + [s rdb_changes_since_last_save] > 0 + } else { + fail "No write load detected." + } + + # Re-enable AOF + r config set appendonly yes + + # Let AOFRW fail + assert_equal 1 [s aof_rewrite_in_progress] + set pid1 [get_child_pid 0] + catch {exec kill -9 $pid1} + + # Wait for AOFRW to exit and delete temp incr aof + wait_for_condition 1000 100 { + [count_log_message 0 "Removing the temp incr aof file"] == 3 + } else { + fail "temp aof did not delete 3 times" + } + + # Make sure no new incr AOF was created + assert_aof_manifest_content $aof_manifest_file { + {file appendonly.aof.1.base.rdb seq 1 type b} + {file appendonly.aof.1.incr.aof seq 1 type i} + } + + # Make sure the next AOFRW has started + wait_for_condition 1000 50 { + [s aof_rewrite_in_progress] == 1 + } else { + fail "aof rewrite did not scheduled" + } + + # Do a successful AOFRW + set total_forks [s total_forks] + r config set rdb-key-save-delay 0 + catch {exec kill -9 [get_child_pid 0]} + + wait_for_condition 1000 10 { + [s total_forks] == [expr $total_forks + 1] + } else { + fail "aof rewrite did not scheduled" + } + waitForBgrewriteaof r + + assert_equal 4 [count_log_message 0 "Removing the temp incr aof file"] + + # New BASE and INCR AOF are successfully created + assert_aof_manifest_content $aof_manifest_file { + {file appendonly.aof.2.base.rdb seq 2 type b} + {file appendonly.aof.2.incr.aof seq 2 type i} + } + + stop_write_load $load_handle0 + wait_load_handlers_disconnected + + set d1 [r debug digest] + r debug loadaof + set d2 [r debug digest] + assert {$d1 eq $d2} + } + } + } +} diff --git a/tests/integration/aof-race.tcl b/tests/integration/aof-race.tcl new file mode 100644 index 0000000..927896d --- /dev/null +++ b/tests/integration/aof-race.tcl @@ -0,0 +1,35 @@ +set defaults { appendonly {yes} appendfilename {appendonly.aof} appenddirname {appendonlydir} aof-use-rdb-preamble {no} } +set server_path [tmpdir server.aof] + +proc start_server_aof {overrides code} { + upvar defaults defaults srv srv server_path server_path + set config [concat $defaults $overrides] + start_server [list overrides $config] $code +} + +tags {"aof"} { + # Specific test for a regression where internal buffers were not properly + # cleaned after a child responsible for an AOF rewrite exited. This buffer + # was subsequently appended to the new AOF, resulting in duplicate commands. + start_server_aof [list dir $server_path] { + set client [redis [srv host] [srv port] 0 $::tls] + set bench [open "|src/redis-benchmark -q -s [srv unixsocket] -c 20 -n 20000 incr foo" "r+"] + + after 100 + + # Benchmark should be running by now: start background rewrite + $client bgrewriteaof + + # Read until benchmark pipe reaches EOF + while {[string length [read $bench]] > 0} {} + + # Check contents of foo + assert_equal 20000 [$client get foo] + } + + # Restart server to replay AOF + start_server_aof [list dir $server_path] { + set client [redis [srv host] [srv port] 0 $::tls] + assert_equal 20000 [$client get foo] + } +} diff --git a/tests/integration/aof.tcl b/tests/integration/aof.tcl new file mode 100644 index 0000000..1f73fc3 --- /dev/null +++ b/tests/integration/aof.tcl @@ -0,0 +1,681 @@ +source tests/support/aofmanifest.tcl +set defaults { appendonly {yes} appendfilename {appendonly.aof} appenddirname {appendonlydir} auto-aof-rewrite-percentage {0}} +set server_path [tmpdir server.aof] +set aof_dirname "appendonlydir" +set aof_basename "appendonly.aof" +set aof_dirpath "$server_path/$aof_dirname" +set aof_base_file "$server_path/$aof_dirname/${aof_basename}.1$::base_aof_sufix$::aof_format_suffix" +set aof_file "$server_path/$aof_dirname/${aof_basename}.1$::incr_aof_sufix$::aof_format_suffix" +set aof_manifest_file "$server_path/$aof_dirname/$aof_basename$::manifest_suffix" + +tags {"aof external:skip"} { + # Server can start when aof-load-truncated is set to yes and AOF + # is truncated, with an incomplete MULTI block. + create_aof $aof_dirpath $aof_file { + append_to_aof [formatCommand set foo hello] + append_to_aof [formatCommand multi] + append_to_aof [formatCommand set bar world] + } + + create_aof_manifest $aof_dirpath $aof_manifest_file { + append_to_manifest "file appendonly.aof.1.incr.aof seq 1 type i\n" + } + + start_server_aof [list dir $server_path aof-load-truncated yes] { + test "Unfinished MULTI: Server should start if load-truncated is yes" { + assert_equal 1 [is_alive $srv] + } + } + + ## Should also start with truncated AOF without incomplete MULTI block. + create_aof $aof_dirpath $aof_file { + append_to_aof [formatCommand incr foo] + append_to_aof [formatCommand incr foo] + append_to_aof [formatCommand incr foo] + append_to_aof [formatCommand incr foo] + append_to_aof [formatCommand incr foo] + append_to_aof [string range [formatCommand incr foo] 0 end-1] + } + + start_server_aof [list dir $server_path aof-load-truncated yes] { + test "Short read: Server should start if load-truncated is yes" { + assert_equal 1 [is_alive $srv] + } + + test "Truncated AOF loaded: we expect foo to be equal to 5" { + set client [redis [dict get $srv host] [dict get $srv port] 0 $::tls] + wait_done_loading $client + assert {[$client get foo] eq "5"} + } + + test "Append a new command after loading an incomplete AOF" { + $client incr foo + } + } + + # Now the AOF file is expected to be correct + start_server_aof [list dir $server_path aof-load-truncated yes] { + test "Short read + command: Server should start" { + assert_equal 1 [is_alive $srv] + } + + test "Truncated AOF loaded: we expect foo to be equal to 6 now" { + set client [redis [dict get $srv host] [dict get $srv port] 0 $::tls] + wait_done_loading $client + assert {[$client get foo] eq "6"} + } + } + + ## Test that the server exits when the AOF contains a format error + create_aof $aof_dirpath $aof_file { + append_to_aof [formatCommand set foo hello] + append_to_aof "!!!" + append_to_aof [formatCommand set foo hello] + } + + start_server_aof [list dir $server_path aof-load-truncated yes] { + test "Bad format: Server should have logged an error" { + set pattern "*Bad file format reading the append only file*" + set retry 10 + while {$retry} { + set result [exec tail -1 < [dict get $srv stdout]] + if {[string match $pattern $result]} { + break + } + incr retry -1 + after 1000 + } + if {$retry == 0} { + error "assertion:expected error not found on config file" + } + } + } + + ## Test the server doesn't start when the AOF contains an unfinished MULTI + create_aof $aof_dirpath $aof_file { + append_to_aof [formatCommand set foo hello] + append_to_aof [formatCommand multi] + append_to_aof [formatCommand set bar world] + } + + start_server_aof [list dir $server_path aof-load-truncated no] { + test "Unfinished MULTI: Server should have logged an error" { + set pattern "*Unexpected end of file reading the append only file*" + set retry 10 + while {$retry} { + set result [exec tail -1 < [dict get $srv stdout]] + if {[string match $pattern $result]} { + break + } + incr retry -1 + after 1000 + } + if {$retry == 0} { + error "assertion:expected error not found on config file" + } + } + } + + ## Test that the server exits when the AOF contains a short read + create_aof $aof_dirpath $aof_file { + append_to_aof [formatCommand set foo hello] + append_to_aof [string range [formatCommand set bar world] 0 end-1] + } + + start_server_aof [list dir $server_path aof-load-truncated no] { + test "Short read: Server should have logged an error" { + set pattern "*Unexpected end of file reading the append only file*" + set retry 10 + while {$retry} { + set result [exec tail -1 < [dict get $srv stdout]] + if {[string match $pattern $result]} { + break + } + incr retry -1 + after 1000 + } + if {$retry == 0} { + error "assertion:expected error not found on config file" + } + } + } + + ## Test that redis-check-aof indeed sees this AOF is not valid + test "Short read: Utility should confirm the AOF is not valid" { + catch { + exec src/redis-check-aof $aof_manifest_file + } result + assert_match "*not valid*" $result + } + + test "Short read: Utility should show the abnormal line num in AOF" { + create_aof $aof_dirpath $aof_file { + append_to_aof [formatCommand set foo hello] + append_to_aof "!!!" + } + + catch { + exec src/redis-check-aof $aof_manifest_file + } result + assert_match "*ok_up_to_line=8*" $result + } + + test "Short read: Utility should be able to fix the AOF" { + set result [exec src/redis-check-aof --fix $aof_manifest_file << "y\n"] + assert_match "*Successfully truncated AOF*" $result + } + + ## Test that the server can be started using the truncated AOF + start_server_aof [list dir $server_path aof-load-truncated no] { + test "Fixed AOF: Server should have been started" { + assert_equal 1 [is_alive $srv] + } + + test "Fixed AOF: Keyspace should contain values that were parseable" { + set client [redis [dict get $srv host] [dict get $srv port] 0 $::tls] + wait_done_loading $client + assert_equal "hello" [$client get foo] + assert_equal "" [$client get bar] + } + } + + ## Test that SPOP (that modifies the client's argc/argv) is correctly free'd + create_aof $aof_dirpath $aof_file { + append_to_aof [formatCommand sadd set foo] + append_to_aof [formatCommand sadd set bar] + append_to_aof [formatCommand spop set] + } + + start_server_aof [list dir $server_path aof-load-truncated no] { + test "AOF+SPOP: Server should have been started" { + assert_equal 1 [is_alive $srv] + } + + test "AOF+SPOP: Set should have 1 member" { + set client [redis [dict get $srv host] [dict get $srv port] 0 $::tls] + wait_done_loading $client + assert_equal 1 [$client scard set] + } + } + + ## Uses the alsoPropagate() API. + create_aof $aof_dirpath $aof_file { + append_to_aof [formatCommand sadd set foo] + append_to_aof [formatCommand sadd set bar] + append_to_aof [formatCommand sadd set gah] + append_to_aof [formatCommand spop set 2] + } + + start_server_aof [list dir $server_path] { + test "AOF+SPOP: Server should have been started" { + assert_equal 1 [is_alive $srv] + } + + test "AOF+SPOP: Set should have 1 member" { + set client [redis [dict get $srv host] [dict get $srv port] 0 $::tls] + wait_done_loading $client + assert_equal 1 [$client scard set] + } + } + + ## Test that PEXPIREAT is loaded correctly + create_aof $aof_dirpath $aof_file { + append_to_aof [formatCommand rpush list foo] + append_to_aof [formatCommand pexpireat list 1000] + append_to_aof [formatCommand rpush list bar] + } + + start_server_aof [list dir $server_path aof-load-truncated no] { + test "AOF+EXPIRE: Server should have been started" { + assert_equal 1 [is_alive $srv] + } + + test "AOF+EXPIRE: List should be empty" { + set client [redis [dict get $srv host] [dict get $srv port] 0 $::tls] + wait_done_loading $client + assert_equal 0 [$client llen list] + } + } + + start_server {overrides {appendonly {yes}}} { + test {Redis should not try to convert DEL into EXPIREAT for EXPIRE -1} { + r set x 10 + r expire x -1 + } + } + + start_server {overrides {appendonly {yes} appendfsync always}} { + test {AOF fsync always barrier issue} { + set rd [redis_deferring_client] + # Set a sleep when aof is flushed, so that we have a chance to look + # at the aof size and detect if the response of an incr command + # arrives before the data was written (and hopefully fsynced) + # We create a big reply, which will hopefully not have room in the + # socket buffers, and will install a write handler, then we sleep + # a big and issue the incr command, hoping that the last portion of + # the output buffer write, and the processing of the incr will happen + # in the same event loop cycle. + # Since the socket buffers and timing are unpredictable, we fuzz this + # test with slightly different sizes and sleeps a few times. + for {set i 0} {$i < 10} {incr i} { + r debug aof-flush-sleep 0 + r del x + r setrange x [expr {int(rand()*5000000)+10000000}] x + r debug aof-flush-sleep 500000 + set aof [get_last_incr_aof_path r] + set size1 [file size $aof] + $rd get x + after [expr {int(rand()*30)}] + $rd incr new_value + $rd read + $rd read + set size2 [file size $aof] + assert {$size1 != $size2} + } + } + } + + start_server {overrides {appendonly {yes}}} { + test {GETEX should not append to AOF} { + set aof [get_last_incr_aof_path r] + r set foo bar + set before [file size $aof] + r getex foo + set after [file size $aof] + assert_equal $before $after + } + } + + ## Test that the server exits when the AOF contains a unknown command + create_aof $aof_dirpath $aof_file { + append_to_aof [formatCommand set foo hello] + append_to_aof [formatCommand bla foo hello] + append_to_aof [formatCommand set foo hello] + } + + start_server_aof [list dir $server_path aof-load-truncated yes] { + test "Unknown command: Server should have logged an error" { + set pattern "*Unknown command 'bla' reading the append only file*" + set retry 10 + while {$retry} { + set result [exec tail -1 < [dict get $srv stdout]] + if {[string match $pattern $result]} { + break + } + incr retry -1 + after 1000 + } + if {$retry == 0} { + error "assertion:expected error not found on config file" + } + } + } + + # Test that LMPOP/BLMPOP work fine with AOF. + create_aof $aof_dirpath $aof_file { + append_to_aof [formatCommand lpush mylist a b c] + append_to_aof [formatCommand rpush mylist2 1 2 3] + append_to_aof [formatCommand lpush mylist3 a b c d e] + } + + start_server_aof [list dir $server_path aof-load-truncated no] { + test "AOF+LMPOP/BLMPOP: pop elements from the list" { + set client [redis [dict get $srv host] [dict get $srv port] 0 $::tls] + set client2 [redis [dict get $srv host] [dict get $srv port] 1 $::tls] + wait_done_loading $client + + # Pop all elements from mylist, should be blmpop delete mylist. + $client lmpop 1 mylist left count 1 + $client blmpop 0 1 mylist left count 10 + + # Pop all elements from mylist2, should be lmpop delete mylist2. + $client blmpop 0 2 mylist mylist2 right count 10 + $client lmpop 2 mylist mylist2 right count 2 + + # Blocking path, be blocked and then released. + $client2 blmpop 0 2 mylist mylist2 left count 2 + after 100 + $client lpush mylist2 a b c + + # Pop up the last element in mylist2 + $client blmpop 0 3 mylist mylist2 mylist3 left count 1 + + # Leave two elements in mylist3. + $client blmpop 0 3 mylist mylist2 mylist3 right count 3 + } + } + + start_server_aof [list dir $server_path aof-load-truncated no] { + test "AOF+LMPOP/BLMPOP: after pop elements from the list" { + set client [redis [dict get $srv host] [dict get $srv port] 0 $::tls] + wait_done_loading $client + + # mylist and mylist2 no longer exist. + assert_equal 0 [$client exists mylist mylist2] + + # Length of mylist3 is two. + assert_equal 2 [$client llen mylist3] + } + } + + # Test that ZMPOP/BZMPOP work fine with AOF. + create_aof $aof_dirpath $aof_file { + append_to_aof [formatCommand zadd myzset 1 one 2 two 3 three] + append_to_aof [formatCommand zadd myzset2 4 four 5 five 6 six] + append_to_aof [formatCommand zadd myzset3 1 one 2 two 3 three 4 four 5 five] + } + + start_server_aof [list dir $server_path aof-load-truncated no] { + test "AOF+ZMPOP/BZMPOP: pop elements from the zset" { + set client [redis [dict get $srv host] [dict get $srv port] 0 $::tls] + set client2 [redis [dict get $srv host] [dict get $srv port] 1 $::tls] + wait_done_loading $client + + # Pop all elements from myzset, should be bzmpop delete myzset. + $client zmpop 1 myzset min count 1 + $client bzmpop 0 1 myzset min count 10 + + # Pop all elements from myzset2, should be zmpop delete myzset2. + $client bzmpop 0 2 myzset myzset2 max count 10 + $client zmpop 2 myzset myzset2 max count 2 + + # Blocking path, be blocked and then released. + $client2 bzmpop 0 2 myzset myzset2 min count 2 + after 100 + $client zadd myzset2 1 one 2 two 3 three + + # Pop up the last element in myzset2 + $client bzmpop 0 3 myzset myzset2 myzset3 min count 1 + + # Leave two elements in myzset3. + $client bzmpop 0 3 myzset myzset2 myzset3 max count 3 + } + } + + start_server_aof [list dir $server_path aof-load-truncated no] { + test "AOF+ZMPOP/BZMPOP: after pop elements from the zset" { + set client [redis [dict get $srv host] [dict get $srv port] 0 $::tls] + wait_done_loading $client + + # myzset and myzset2 no longer exist. + assert_equal 0 [$client exists myzset myzset2] + + # Length of myzset3 is two. + assert_equal 2 [$client zcard myzset3] + } + } + + test {Generate timestamp annotations in AOF} { + start_server {overrides {appendonly {yes}}} { + r config set aof-timestamp-enabled yes + r config set aof-use-rdb-preamble no + set aof [get_last_incr_aof_path r] + + r set foo bar + assert_match "#TS:*" [exec head -n 1 $aof] + + r bgrewriteaof + waitForBgrewriteaof r + + set aof [get_base_aof_path r] + assert_match "#TS:*" [exec head -n 1 $aof] + } + } + + # redis could load AOF which has timestamp annotations inside + create_aof $aof_dirpath $aof_file { + append_to_aof "#TS:1628217470\r\n" + append_to_aof [formatCommand set foo1 bar1] + append_to_aof "#TS:1628217471\r\n" + append_to_aof [formatCommand set foo2 bar2] + append_to_aof "#TS:1628217472\r\n" + append_to_aof "#TS:1628217473\r\n" + append_to_aof [formatCommand set foo3 bar3] + append_to_aof "#TS:1628217474\r\n" + } + start_server_aof [list dir $server_path] { + test {Successfully load AOF which has timestamp annotations inside} { + set c [redis [dict get $srv host] [dict get $srv port] 0 $::tls] + wait_done_loading $c + assert_equal "bar1" [$c get foo1] + assert_equal "bar2" [$c get foo2] + assert_equal "bar3" [$c get foo3] + } + } + + test {Truncate AOF to specific timestamp} { + # truncate to timestamp 1628217473 + exec src/redis-check-aof --truncate-to-timestamp 1628217473 $aof_manifest_file + start_server_aof [list dir $server_path] { + set c [redis [dict get $srv host] [dict get $srv port] 0 $::tls] + wait_done_loading $c + assert_equal "bar1" [$c get foo1] + assert_equal "bar2" [$c get foo2] + assert_equal "bar3" [$c get foo3] + } + + # truncate to timestamp 1628217471 + exec src/redis-check-aof --truncate-to-timestamp 1628217471 $aof_manifest_file + start_server_aof [list dir $server_path] { + set c [redis [dict get $srv host] [dict get $srv port] 0 $::tls] + wait_done_loading $c + assert_equal "bar1" [$c get foo1] + assert_equal "bar2" [$c get foo2] + assert_equal "" [$c get foo3] + } + + # truncate to timestamp 1628217470 + exec src/redis-check-aof --truncate-to-timestamp 1628217470 $aof_manifest_file + start_server_aof [list dir $server_path] { + set c [redis [dict get $srv host] [dict get $srv port] 0 $::tls] + wait_done_loading $c + assert_equal "bar1" [$c get foo1] + assert_equal "" [$c get foo2] + } + + # truncate to timestamp 1628217469 + catch {exec src/redis-check-aof --truncate-to-timestamp 1628217469 $aof_manifest_file} e + assert_match {*aborting*} $e + } + + test {EVAL timeout with slow verbatim Lua script from AOF} { + start_server [list overrides [list dir $server_path appendonly yes lua-time-limit 1 aof-use-rdb-preamble no]] { + # generate a long running script that is propagated to the AOF as script + # make sure that the script times out during loading + create_aof $aof_dirpath $aof_file { + append_to_aof [formatCommand select 9] + append_to_aof [formatCommand eval {redis.call('set',KEYS[1],'y'); for i=1,1500000 do redis.call('ping') end return 'ok'} 1 x] + } + set rd [redis_deferring_client] + $rd debug loadaof + $rd flush + wait_for_condition 100 10 { + [catch {r ping} e] == 1 + } else { + fail "server didn't start loading" + } + assert_error {LOADING*} {r ping} + $rd read + $rd close + wait_for_log_messages 0 {"*Slow script detected*"} 0 100 100 + assert_equal [r get x] y + } + } + + test {EVAL can process writes from AOF in read-only replicas} { + create_aof_manifest $aof_dirpath $aof_manifest_file { + append_to_manifest "file appendonly.aof.1.incr.aof seq 1 type i\n" + } + create_aof $aof_dirpath $aof_file { + append_to_aof [formatCommand select 9] + append_to_aof [formatCommand eval {redis.call("set",KEYS[1],"100")} 1 foo] + append_to_aof [formatCommand eval {redis.call("incr",KEYS[1])} 1 foo] + append_to_aof [formatCommand eval {redis.call("incr",KEYS[1])} 1 foo] + } + start_server [list overrides [list dir $server_path appendonly yes replica-read-only yes replicaof "127.0.0.1 0"]] { + assert_equal [r get foo] 102 + } + } + + test {Test redis-check-aof for old style resp AOF} { + create_aof $aof_dirpath $aof_file { + append_to_aof [formatCommand set foo hello] + append_to_aof [formatCommand set bar world] + } + + catch { + exec src/redis-check-aof $aof_file + } result + assert_match "*Start checking Old-Style AOF*is valid*" $result + } + + test {Test redis-check-aof for old style rdb-preamble AOF} { + catch { + exec src/redis-check-aof tests/assets/rdb-preamble.aof + } result + assert_match "*Start checking Old-Style AOF*RDB preamble is OK, proceeding with AOF tail*is valid*" $result + } + + test {Test redis-check-aof for Multi Part AOF with resp AOF base} { + create_aof $aof_dirpath $aof_base_file { + append_to_aof [formatCommand set foo hello] + append_to_aof [formatCommand set bar world] + } + + create_aof $aof_dirpath $aof_file { + append_to_aof [formatCommand set foo hello] + append_to_aof [formatCommand set bar world] + } + + create_aof_manifest $aof_dirpath $aof_manifest_file { + append_to_manifest "file appendonly.aof.1.base.aof seq 1 type b\n" + append_to_manifest "file appendonly.aof.1.incr.aof seq 1 type i\n" + } + + catch { + exec src/redis-check-aof $aof_manifest_file + } result + assert_match "*Start checking Multi Part AOF*Start to check BASE AOF (RESP format)*BASE AOF*is valid*Start to check INCR files*INCR AOF*is valid*All AOF files and manifest are valid*" $result + } + + test {Test redis-check-aof for Multi Part AOF with rdb-preamble AOF base} { + exec cp tests/assets/rdb-preamble.aof $aof_base_file + + create_aof $aof_dirpath $aof_file { + append_to_aof [formatCommand set foo hello] + append_to_aof [formatCommand set bar world] + } + + create_aof_manifest $aof_dirpath $aof_manifest_file { + append_to_manifest "file appendonly.aof.1.base.aof seq 1 type b\n" + append_to_manifest "file appendonly.aof.1.incr.aof seq 1 type i\n" + } + + catch { + exec src/redis-check-aof $aof_manifest_file + } result + assert_match "*Start checking Multi Part AOF*Start to check BASE AOF (RDB format)*DB preamble is OK, proceeding with AOF tail*BASE AOF*is valid*Start to check INCR files*INCR AOF*is valid*All AOF files and manifest are valid*" $result + } + + test {Test redis-check-aof only truncates the last file for Multi Part AOF in fix mode} { + create_aof $aof_dirpath $aof_base_file { + append_to_aof [formatCommand set foo hello] + append_to_aof [formatCommand multi] + append_to_aof [formatCommand set bar world] + } + + create_aof $aof_dirpath $aof_file { + append_to_aof [formatCommand set foo hello] + append_to_aof [formatCommand set bar world] + } + + create_aof_manifest $aof_dirpath $aof_manifest_file { + append_to_manifest "file appendonly.aof.1.base.aof seq 1 type b\n" + append_to_manifest "file appendonly.aof.1.incr.aof seq 1 type i\n" + } + + catch { + exec src/redis-check-aof $aof_manifest_file + } result + assert_match "*not valid*" $result + + catch { + exec src/redis-check-aof --fix $aof_manifest_file + } result + assert_match "*Failed to truncate AOF*because it is not the last file*" $result + } + + test {Test redis-check-aof only truncates the last file for Multi Part AOF in truncate-to-timestamp mode} { + create_aof $aof_dirpath $aof_base_file { + append_to_aof "#TS:1628217470\r\n" + append_to_aof [formatCommand set foo1 bar1] + append_to_aof "#TS:1628217471\r\n" + append_to_aof [formatCommand set foo2 bar2] + append_to_aof "#TS:1628217472\r\n" + append_to_aof "#TS:1628217473\r\n" + append_to_aof [formatCommand set foo3 bar3] + append_to_aof "#TS:1628217474\r\n" + } + + create_aof $aof_dirpath $aof_file { + append_to_aof [formatCommand set foo hello] + append_to_aof [formatCommand set bar world] + } + + create_aof_manifest $aof_dirpath $aof_manifest_file { + append_to_manifest "file appendonly.aof.1.base.aof seq 1 type b\n" + append_to_manifest "file appendonly.aof.1.incr.aof seq 1 type i\n" + } + + catch { + exec src/redis-check-aof --truncate-to-timestamp 1628217473 $aof_manifest_file + } result + assert_match "*Failed to truncate AOF*to timestamp*because it is not the last file*" $result + } + + start_server {overrides {appendonly yes appendfsync always}} { + test {FLUSHDB / FLUSHALL should persist in AOF} { + set aof [get_last_incr_aof_path r] + + r set key value + r flushdb + r set key value2 + r flushdb + + # DB is empty + r flushdb + r flushdb + r flushdb + + r set key value + r flushall + r set key value2 + r flushall + + # DBs are empty. + r flushall + r flushall + r flushall + + # Assert that each FLUSHDB command is persisted even the DB is empty. + # Assert that each FLUSHALL command is persisted even the DBs are empty. + assert_aof_content $aof { + {select *} + {set key value} + {flushdb} + {set key value2} + {flushdb} + {flushdb} + {flushdb} + {flushdb} + {set key value} + {flushall} + {set key value2} + {flushall} + {flushall} + {flushall} + {flushall} + } + } + } +} diff --git a/tests/integration/block-repl.tcl b/tests/integration/block-repl.tcl new file mode 100644 index 0000000..3f3a86e --- /dev/null +++ b/tests/integration/block-repl.tcl @@ -0,0 +1,51 @@ +# Test replication of blocking lists and zset operations. +# Unlike stream operations such operations are "pop" style, so they consume +# the list or sorted set, and must be replicated correctly. + +proc start_bg_block_op {host port db ops tls} { + set tclsh [info nameofexecutable] + exec $tclsh tests/helpers/bg_block_op.tcl $host $port $db $ops $tls & +} + +proc stop_bg_block_op {handle} { + catch {exec /bin/kill -9 $handle} +} + +start_server {tags {"repl" "external:skip"}} { + start_server {} { + set master [srv -1 client] + set master_host [srv -1 host] + set master_port [srv -1 port] + set slave [srv 0 client] + + set load_handle0 [start_bg_block_op $master_host $master_port 9 100000 $::tls] + set load_handle1 [start_bg_block_op $master_host $master_port 9 100000 $::tls] + set load_handle2 [start_bg_block_op $master_host $master_port 9 100000 $::tls] + + test {First server should have role slave after SLAVEOF} { + $slave slaveof $master_host $master_port + after 1000 + s 0 role + } {slave} + + test {Test replication with blocking lists and sorted sets operations} { + after 25000 + stop_bg_block_op $load_handle0 + stop_bg_block_op $load_handle1 + stop_bg_block_op $load_handle2 + wait_for_condition 100 100 { + [$master debug digest] == [$slave debug digest] + } else { + set csv1 [csvdump r] + set csv2 [csvdump {r -1}] + set fd [open /tmp/repldump1.txt w] + puts -nonewline $fd $csv1 + close $fd + set fd [open /tmp/repldump2.txt w] + puts -nonewline $fd $csv2 + close $fd + fail "Master - Replica inconsistency, Run diff -u against /tmp/repldump*.txt for more info" + } + } + } +} diff --git a/tests/integration/convert-ziplist-hash-on-load.tcl b/tests/integration/convert-ziplist-hash-on-load.tcl new file mode 100644 index 0000000..c8265b2 --- /dev/null +++ b/tests/integration/convert-ziplist-hash-on-load.tcl @@ -0,0 +1,28 @@ +tags {"external:skip"} { + +# Copy RDB with ziplist encoded hash to server path +set server_path [tmpdir "server.convert-ziplist-hash-on-load"] + +exec cp -f tests/assets/hash-ziplist.rdb $server_path +start_server [list overrides [list "dir" $server_path "dbfilename" "hash-ziplist.rdb"]] { + test "RDB load ziplist hash: converts to listpack when RDB loading" { + r select 0 + + assert_encoding listpack hash + assert_equal 2 [r hlen hash] + assert_match {v1 v2} [r hmget hash f1 f2] + } +} + +exec cp -f tests/assets/hash-ziplist.rdb $server_path +start_server [list overrides [list "dir" $server_path "dbfilename" "hash-ziplist.rdb" "hash-max-ziplist-entries" 1]] { + test "RDB load ziplist hash: converts to hash table when hash-max-ziplist-entries is exceeded" { + r select 0 + + assert_encoding hashtable hash + assert_equal 2 [r hlen hash] + assert_match {v1 v2} [r hmget hash f1 f2] + } +} + +} diff --git a/tests/integration/convert-ziplist-zset-on-load.tcl b/tests/integration/convert-ziplist-zset-on-load.tcl new file mode 100644 index 0000000..0fbb201 --- /dev/null +++ b/tests/integration/convert-ziplist-zset-on-load.tcl @@ -0,0 +1,28 @@ +tags {"external:skip"} { + +# Copy RDB with ziplist encoded hash to server path +set server_path [tmpdir "server.convert-ziplist-hash-on-load"] + +exec cp -f tests/assets/zset-ziplist.rdb $server_path +start_server [list overrides [list "dir" $server_path "dbfilename" "zset-ziplist.rdb"]] { + test "RDB load ziplist zset: converts to listpack when RDB loading" { + r select 0 + + assert_encoding listpack zset + assert_equal 2 [r zcard zset] + assert_match {one 1 two 2} [r zrange zset 0 -1 withscores] + } +} + +exec cp -f tests/assets/zset-ziplist.rdb $server_path +start_server [list overrides [list "dir" $server_path "dbfilename" "zset-ziplist.rdb" "zset-max-ziplist-entries" 1]] { + test "RDB load ziplist zset: converts to skiplist when zset-max-ziplist-entries is exceeded" { + r select 0 + + assert_encoding skiplist zset + assert_equal 2 [r zcard zset] + assert_match {one 1 two 2} [r zrange zset 0 -1 withscores] + } +} + +} diff --git a/tests/integration/convert-zipmap-hash-on-load.tcl b/tests/integration/convert-zipmap-hash-on-load.tcl new file mode 100644 index 0000000..f7eda0e --- /dev/null +++ b/tests/integration/convert-zipmap-hash-on-load.tcl @@ -0,0 +1,39 @@ +tags {"external:skip"} { + +# Copy RDB with zipmap encoded hash to server path +set server_path [tmpdir "server.convert-zipmap-hash-on-load"] + +exec cp -f tests/assets/hash-zipmap.rdb $server_path +start_server [list overrides [list "dir" $server_path "dbfilename" "hash-zipmap.rdb"]] { + test "RDB load zipmap hash: converts to listpack" { + r select 0 + + assert_match "*listpack*" [r debug object hash] + assert_equal 2 [r hlen hash] + assert_match {v1 v2} [r hmget hash f1 f2] + } +} + +exec cp -f tests/assets/hash-zipmap.rdb $server_path +start_server [list overrides [list "dir" $server_path "dbfilename" "hash-zipmap.rdb" "hash-max-ziplist-entries" 1]] { + test "RDB load zipmap hash: converts to hash table when hash-max-ziplist-entries is exceeded" { + r select 0 + + assert_match "*hashtable*" [r debug object hash] + assert_equal 2 [r hlen hash] + assert_match {v1 v2} [r hmget hash f1 f2] + } +} + +exec cp -f tests/assets/hash-zipmap.rdb $server_path +start_server [list overrides [list "dir" $server_path "dbfilename" "hash-zipmap.rdb" "hash-max-ziplist-value" 1]] { + test "RDB load zipmap hash: converts to hash table when hash-max-ziplist-value is exceeded" { + r select 0 + + assert_match "*hashtable*" [r debug object hash] + assert_equal 2 [r hlen hash] + assert_match {v1 v2} [r hmget hash f1 f2] + } +} + +} diff --git a/tests/integration/corrupt-dump-fuzzer.tcl b/tests/integration/corrupt-dump-fuzzer.tcl new file mode 100644 index 0000000..011c314 --- /dev/null +++ b/tests/integration/corrupt-dump-fuzzer.tcl @@ -0,0 +1,218 @@ +# tests of corrupt ziplist payload with valid CRC + +tags {"dump" "corruption" "external:skip"} { + +# catch sigterm so that in case one of the random command hangs the test, +# usually due to redis not putting a response in the output buffers, +# we'll know which command it was +if { ! [ catch { + package require Tclx +} err ] } { + signal error SIGTERM +} + +proc generate_collections {suffix elements} { + set rd [redis_deferring_client] + for {set j 0} {$j < $elements} {incr j} { + # add both string values and integers + if {$j % 2 == 0} {set val $j} else {set val "_$j"} + $rd hset hash$suffix $j $val + $rd lpush list$suffix $val + $rd zadd zset$suffix $j $val + $rd sadd set$suffix $val + $rd xadd stream$suffix * item 1 value $val + } + for {set j 0} {$j < $elements * 5} {incr j} { + $rd read ; # Discard replies + } + $rd close +} + +# generate keys with various types and encodings +proc generate_types {} { + r config set list-max-ziplist-size 5 + r config set hash-max-ziplist-entries 5 + r config set zset-max-ziplist-entries 5 + r config set stream-node-max-entries 5 + + # create small (ziplist / listpack encoded) objects with 3 items + generate_collections "" 3 + + # add some metadata to the stream + r xgroup create stream mygroup 0 + set records [r xreadgroup GROUP mygroup Alice COUNT 2 STREAMS stream >] + r xdel stream [lindex [lindex [lindex [lindex $records 0] 1] 1] 0] + r xack stream mygroup [lindex [lindex [lindex [lindex $records 0] 1] 0] 0] + + # create other non-collection types + r incr int + r set string str + + # create bigger objects with 10 items (more than a single ziplist / listpack) + generate_collections big 10 + + # make sure our big stream also has a listpack record that has different + # field names than the master recorded + r xadd streambig * item 1 value 1 + r xadd streambig * item 1 unique value +} + +proc corrupt_payload {payload} { + set len [string length $payload] + set count 1 ;# usually corrupt only one byte + if {rand() > 0.9} { set count 2 } + while { $count > 0 } { + set idx [expr {int(rand() * $len)}] + set ch [binary format c [expr {int(rand()*255)}]] + set payload [string replace $payload $idx $idx $ch] + incr count -1 + } + return $payload +} + +# fuzzy tester for corrupt RESTORE payloads +# valgrind will make sure there were no leaks in the rdb loader error handling code +foreach sanitize_dump {no yes} { + if {$::accurate} { + set min_duration [expr {60 * 10}] ;# run at least 10 minutes + set min_cycles 1000 ;# run at least 1k cycles (max 16 minutes) + } else { + set min_duration 10 ; # run at least 10 seconds + set min_cycles 10 ; # run at least 10 cycles + } + + # Don't execute this on FreeBSD due to a yet-undiscovered memory issue + # which causes tclsh to bloat. + if {[exec uname] == "FreeBSD"} { + set min_cycles 1 + set min_duration 1 + } + + test "Fuzzer corrupt restore payloads - sanitize_dump: $sanitize_dump" { + if {$min_duration * 2 > $::timeout} { + fail "insufficient timeout" + } + # start a server, fill with data and save an RDB file once (avoid re-save) + start_server [list overrides [list "save" "" use-exit-on-panic yes crash-memcheck-enabled no loglevel verbose] ] { + set stdout [srv 0 stdout] + r config set sanitize-dump-payload $sanitize_dump + r debug set-skip-checksum-validation 1 + set start_time [clock seconds] + generate_types + set dbsize [r dbsize] + r save + set cycle 0 + set stat_terminated_in_restore 0 + set stat_terminated_in_traffic 0 + set stat_terminated_by_signal 0 + set stat_successful_restore 0 + set stat_rejected_restore 0 + set stat_traffic_commands_sent 0 + # repeatedly DUMP a random key, corrupt it and try RESTORE into a new key + while true { + set k [r randomkey] + set dump [r dump $k] + set dump [corrupt_payload $dump] + set printable_dump [string2printable $dump] + set restore_failed false + set report_and_restart false + set sent {} + # RESTORE can fail, but hopefully not terminate + if { [catch { r restore "_$k" 0 $dump REPLACE } err] } { + set restore_failed true + # skip if return failed with an error response. + if {[string match "ERR*" $err]} { + incr stat_rejected_restore + } else { + set report_and_restart true + incr stat_terminated_in_restore + write_log_line 0 "corrupt payload: $printable_dump" + if {$sanitize_dump == yes} { + puts "Server crashed in RESTORE with payload: $printable_dump" + } + } + } else { + r ping ;# an attempt to check if the server didn't terminate (this will throw an error that will terminate the tests) + } + + set print_commands false + if {!$restore_failed} { + # if RESTORE didn't fail or terminate, run some random traffic on the new key + incr stat_successful_restore + if { [ catch { + set sent [generate_fuzzy_traffic_on_key "_$k" 1] ;# traffic for 1 second + incr stat_traffic_commands_sent [llength $sent] + r del "_$k" ;# in case the server terminated, here's where we'll detect it. + if {$dbsize != [r dbsize]} { + puts "unexpected keys" + puts "keys: [r keys *]" + puts $sent + exit 1 + } + } err ] } { + # if the server terminated update stats and restart it + set report_and_restart true + incr stat_terminated_in_traffic + set by_signal [count_log_message 0 "crashed by signal"] + incr stat_terminated_by_signal $by_signal + + if {$by_signal != 0 || $sanitize_dump == yes} { + puts "Server crashed (by signal: $by_signal), with payload: $printable_dump" + set print_commands true + } + } + } + + # check valgrind report for invalid reads after each RESTORE + # payload so that we have a report that is easier to reproduce + set valgrind_errors [find_valgrind_errors [srv 0 stderr] false] + set asan_errors [sanitizer_errors_from_file [srv 0 stderr]] + if {$valgrind_errors != "" || $asan_errors != ""} { + puts "valgrind or asan found an issue for payload: $printable_dump" + set report_and_restart true + set print_commands true + } + + if {$report_and_restart} { + if {$print_commands} { + puts "violating commands:" + foreach cmd $sent { + foreach arg $cmd { + puts -nonewline "[string2printable $arg] " + } + puts "" + } + } + + # restart the server and re-apply debug configuration + write_log_line 0 "corrupt payload: $printable_dump" + restart_server 0 true true + r config set sanitize-dump-payload $sanitize_dump + r debug set-skip-checksum-validation 1 + } + + incr cycle + if { ([clock seconds]-$start_time) >= $min_duration && $cycle >= $min_cycles} { + break + } + } + if {$::verbose} { + puts "Done $cycle cycles in [expr {[clock seconds]-$start_time}] seconds." + puts "RESTORE: successful: $stat_successful_restore, rejected: $stat_rejected_restore" + puts "Total commands sent in traffic: $stat_traffic_commands_sent, crashes during traffic: $stat_terminated_in_traffic ($stat_terminated_by_signal by signal)." + } + } + # if we run sanitization we never expect the server to crash at runtime + if {$sanitize_dump == yes} { + assert_equal $stat_terminated_in_restore 0 + assert_equal $stat_terminated_in_traffic 0 + } + # make sure all terminations where due to assertion and not a SIGSEGV + assert_equal $stat_terminated_by_signal 0 + } +} + + + +} ;# tags + diff --git a/tests/integration/corrupt-dump.tcl b/tests/integration/corrupt-dump.tcl new file mode 100644 index 0000000..1b9be67 --- /dev/null +++ b/tests/integration/corrupt-dump.tcl @@ -0,0 +1,805 @@ +# tests of corrupt ziplist payload with valid CRC +# * setting crash-memcheck-enabled to no to avoid issues with valgrind +# * setting use-exit-on-panic to yes so that valgrind can search for leaks +# * setting debug set-skip-checksum-validation to 1 on some tests for which we +# didn't bother to fake a valid checksum +# * some tests set sanitize-dump-payload to no and some to yet, depending on +# what we want to test + +tags {"dump" "corruption" "external:skip"} { + +# We only run OOM related tests on x86_64 and aarch64, as jemalloc on other +# platforms (notably s390x) may actually succeed very large allocations. As +# a result the test may hang for a very long time at the cleanup phase, +# iterating as many as 2^61 hash table slots. + +set arch_name [exec uname -m] +set run_oom_tests [expr {$arch_name == "x86_64" || $arch_name == "aarch64"}] + +set corrupt_payload_7445 "\x0E\x01\x1D\x1D\x00\x00\x00\x16\x00\x00\x00\x03\x00\x00\x04\x43\x43\x43\x43\x06\x04\x42\x42\x42\x42\x06\x3F\x41\x41\x41\x41\xFF\x09\x00\x88\xA5\xCA\xA8\xC5\x41\xF4\x35" + +test {corrupt payload: #7445 - with sanitize} { + start_server [list overrides [list loglevel verbose use-exit-on-panic yes crash-memcheck-enabled no] ] { + r config set sanitize-dump-payload yes + catch { + r restore key 0 $corrupt_payload_7445 + } err + assert_match "*Bad data format*" $err + verify_log_message 0 "*integrity check failed*" 0 + } +} + +test {corrupt payload: hash with valid zip list header, invalid entry len} { + start_server [list overrides [list loglevel verbose use-exit-on-panic yes crash-memcheck-enabled no] ] { + catch { + r restore key 0 "\x0D\x1B\x1B\x00\x00\x00\x16\x00\x00\x00\x04\x00\x00\x02\x61\x00\x04\x02\x62\x00\x04\x14\x63\x00\x04\x02\x64\x00\xFF\x09\x00\xD9\x10\x54\x92\x15\xF5\x5F\x52" + } err + assert_match "*Bad data format*" $err + verify_log_message 0 "*integrity check failed*" 0 + } +} + +test {corrupt payload: invalid zlbytes header} { + start_server [list overrides [list loglevel verbose use-exit-on-panic yes crash-memcheck-enabled no] ] { + catch { + r restore key 0 "\x0D\x1B\x25\x00\x00\x00\x16\x00\x00\x00\x04\x00\x00\x02\x61\x00\x04\x02\x62\x00\x04\x02\x63\x00\x04\x02\x64\x00\xFF\x09\x00\xB7\xF7\x6E\x9F\x43\x43\x14\xC6" + } err + assert_match "*Bad data format*" $err + verify_log_message 0 "*integrity check failed*" 0 + } +} + +test {corrupt payload: valid zipped hash header, dup records} { + start_server [list overrides [list loglevel verbose use-exit-on-panic yes crash-memcheck-enabled no] ] { + catch { + r restore key 0 "\x0D\x1B\x1B\x00\x00\x00\x16\x00\x00\x00\x04\x00\x00\x02\x61\x00\x04\x02\x62\x00\x04\x02\x61\x00\x04\x02\x64\x00\xFF\x09\x00\xA1\x98\x36\x78\xCC\x8E\x93\x2E" + } err + assert_match "*Bad data format*" $err + verify_log_message 0 "*integrity check failed*" 0 + } +} + +test {corrupt payload: quicklist big ziplist prev len} { + start_server [list overrides [list loglevel verbose use-exit-on-panic yes crash-memcheck-enabled no] ] { + r config set sanitize-dump-payload no + catch {r restore key 0 "\x0E\x01\x13\x13\x00\x00\x00\x0E\x00\x00\x00\x02\x00\x00\x02\x61\x00\x0E\x02\x62\x00\xFF\x09\x00\x49\x97\x30\xB2\x0D\xA1\xED\xAA"} err + assert_match "*Bad data format*" $err + verify_log_message 0 "*integrity check failed*" 0 + } +} + +test {corrupt payload: quicklist small ziplist prev len} { + start_server [list overrides [list loglevel verbose use-exit-on-panic yes crash-memcheck-enabled no] ] { + r config set sanitize-dump-payload yes + catch { + r restore key 0 "\x0E\x01\x13\x13\x00\x00\x00\x0E\x00\x00\x00\x02\x00\x00\x02\x61\x00\x02\x02\x62\x00\xFF\x09\x00\xC7\x71\x03\x97\x07\x75\xB0\x63" + } err + assert_match "*Bad data format*" $err + verify_log_message 0 "*integrity check failed*" 0 + } +} + +test {corrupt payload: quicklist ziplist wrong count} { + start_server [list overrides [list loglevel verbose use-exit-on-panic yes crash-memcheck-enabled no] ] { + r config set sanitize-dump-payload no + catch {r restore key 0 "\x0E\x01\x13\x13\x00\x00\x00\x0E\x00\x00\x00\x03\x00\x00\x02\x61\x00\x04\x02\x62\x00\xFF\x09\x00\x4D\xE2\x0A\x2F\x08\x25\xDF\x91"} err + assert_match "*Bad data format*" $err + verify_log_message 0 "*integrity check failed*" 0 + } +} + +test {corrupt payload: #3080 - quicklist} { + start_server [list overrides [list loglevel verbose use-exit-on-panic yes crash-memcheck-enabled no] ] { + r config set sanitize-dump-payload no + catch { + r RESTORE key 0 "\x0E\x01\x80\x00\x00\x00\x10\x41\x41\x41\x41\x41\x41\x41\x41\x02\x00\x00\x80\x41\x41\x41\x41\x07\x00\x03\xC7\x1D\xEF\x54\x68\xCC\xF3" + r DUMP key ;# DUMP was used in the original issue, but now even with shallow sanitization restore safely fails, so this is dead code + } err + assert_match "*Bad data format*" $err + verify_log_message 0 "*integrity check failed*" 0 + } +} + +test {corrupt payload: quicklist with empty ziplist} { + start_server [list overrides [list loglevel verbose use-exit-on-panic yes crash-memcheck-enabled no] ] { + r config set sanitize-dump-payload no + r debug set-skip-checksum-validation 1 + catch {r restore key 0 "\x0E\x01\x0B\x0B\x00\x00\x00\x0A\x00\x00\x00\x00\x00\xFF\x09\x00\xC2\x69\x37\x83\x3C\x7F\xFE\x6F" replace} err + assert_match "*Bad data format*" $err + r ping + } +} + +test {corrupt payload: quicklist encoded_len is 0} { + start_server [list overrides [list loglevel verbose use-exit-on-panic yes crash-memcheck-enabled no] ] { + catch { r restore _list 0 "\x12\x01\x01\x00\x0a\x00\x8f\xc6\xc0\x57\x1c\x0a\xb3\x3c" replace } err + assert_match "*Bad data format*" $err + r ping + } +} + +test {corrupt payload: quicklist listpack entry start with EOF} { + start_server [list overrides [list loglevel verbose use-exit-on-panic yes crash-memcheck-enabled no] ] { + r config set sanitize-dump-payload yes + catch { r restore _list 0 "\x12\x01\x02\x0b\x0b\x00\x00\x00\x01\x00\x81\x61\x02\xff\xff\x0a\x00\x7e\xd8\xde\x5b\x0d\xd7\x70\xb8" replace } err + assert_match "*Bad data format*" $err + r ping + } +} + +test {corrupt payload: #3080 - ziplist} { + start_server [list overrides [list loglevel verbose use-exit-on-panic yes crash-memcheck-enabled no] ] { + # shallow sanitization is enough for restore to safely reject the payload with wrong size + r config set sanitize-dump-payload no + catch { + r RESTORE key 0 "\x0A\x80\x00\x00\x00\x10\x41\x41\x41\x41\x41\x41\x41\x41\x02\x00\x00\x80\x41\x41\x41\x41\x07\x00\x39\x5B\x49\xE0\xC1\xC6\xDD\x76" + } err + assert_match "*Bad data format*" $err + verify_log_message 0 "*integrity check failed*" 0 + } +} + +test {corrupt payload: load corrupted rdb with no CRC - #3505} { + set server_path [tmpdir "server.rdb-corruption-test"] + exec cp tests/assets/corrupt_ziplist.rdb $server_path + set srv [start_server [list overrides [list "dir" $server_path "dbfilename" "corrupt_ziplist.rdb" loglevel verbose use-exit-on-panic yes crash-memcheck-enabled no sanitize-dump-payload no]]] + + # wait for termination + wait_for_condition 100 50 { + ! [is_alive $srv] + } else { + fail "rdb loading didn't fail" + } + + set stdout [dict get $srv stdout] + assert_equal [count_message_lines $stdout "Terminating server after rdb file reading failure."] 1 + assert_lessthan 1 [count_message_lines $stdout "integrity check failed"] + kill_server $srv ;# let valgrind look for issues +} + +foreach sanitize_dump {no yes} { + test {corrupt payload: load corrupted rdb with empty keys} { + set server_path [tmpdir "server.rdb-corruption-empty-keys-test"] + exec cp tests/assets/corrupt_empty_keys.rdb $server_path + start_server [list overrides [list "dir" $server_path "dbfilename" "corrupt_empty_keys.rdb" "sanitize-dump-payload" $sanitize_dump]] { + r select 0 + assert_equal [r dbsize] 0 + + verify_log_message 0 "*skipping empty key: set*" 0 + verify_log_message 0 "*skipping empty key: list_quicklist*" 0 + verify_log_message 0 "*skipping empty key: list_quicklist_empty_ziplist*" 0 + verify_log_message 0 "*skipping empty key: list_ziplist*" 0 + verify_log_message 0 "*skipping empty key: hash*" 0 + verify_log_message 0 "*skipping empty key: hash_ziplist*" 0 + verify_log_message 0 "*skipping empty key: zset*" 0 + verify_log_message 0 "*skipping empty key: zset_ziplist*" 0 + verify_log_message 0 "*skipping empty key: zset_listpack*" 0 + verify_log_message 0 "*empty keys skipped: 9*" 0 + } + } +} + +test {corrupt payload: listpack invalid size header} { + start_server [list overrides [list loglevel verbose use-exit-on-panic yes crash-memcheck-enabled no] ] { + r config set sanitize-dump-payload no + catch { + r restore key 0 "\x0F\x01\x10\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x02\x40\x55\x5F\x00\x00\x00\x0F\x00\x01\x01\x00\x01\x02\x01\x88\x31\x00\x00\x00\x00\x00\x00\x00\x09\x88\x32\x00\x00\x00\x00\x00\x00\x00\x09\x00\x01\x00\x01\x00\x01\x00\x01\x02\x02\x88\x31\x00\x00\x00\x00\x00\x00\x00\x09\x88\x61\x00\x00\x00\x00\x00\x00\x00\x09\x88\x32\x00\x00\x00\x00\x00\x00\x00\x09\x88\x62\x00\x00\x00\x00\x00\x00\x00\x09\x08\x01\xFF\x0A\x01\x00\x00\x09\x00\x45\x91\x0A\x87\x2F\xA5\xF9\x2E" + } err + assert_match "*Bad data format*" $err + verify_log_message 0 "*Stream listpack integrity check failed*" 0 + } +} + +test {corrupt payload: listpack too long entry len} { + start_server [list overrides [list loglevel verbose use-exit-on-panic yes crash-memcheck-enabled no] ] { + r config set sanitize-dump-payload no + catch { + r restore key 0 "\x0F\x01\x10\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x02\x40\x55\x55\x00\x00\x00\x0F\x00\x01\x01\x00\x01\x02\x01\x88\x31\x00\x00\x00\x00\x00\x00\x00\x09\x88\x32\x00\x00\x00\x00\x00\x00\x00\x09\x00\x01\x00\x01\x00\x01\x00\x01\x02\x02\x89\x31\x00\x00\x00\x00\x00\x00\x00\x09\x88\x61\x00\x00\x00\x00\x00\x00\x00\x09\x88\x32\x00\x00\x00\x00\x00\x00\x00\x09\x88\x62\x00\x00\x00\x00\x00\x00\x00\x09\x08\x01\xFF\x0A\x01\x00\x00\x09\x00\x40\x63\xC9\x37\x03\xA2\xE5\x68" + } err + assert_equal [count_log_message 0 "crashed by signal"] 0 + assert_equal [count_log_message 0 "ASSERTION FAILED"] 1 + } +} + +test {corrupt payload: listpack very long entry len} { + start_server [list overrides [list loglevel verbose use-exit-on-panic yes crash-memcheck-enabled no] ] { + r config set sanitize-dump-payload no + catch { + # This will catch migrated payloads from v6.2.x + r restore key 0 "\x0F\x01\x10\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x02\x40\x55\x55\x00\x00\x00\x0F\x00\x01\x01\x00\x01\x02\x01\x88\x31\x00\x00\x00\x00\x00\x00\x00\x09\x88\x32\x00\x00\x00\x00\x00\x00\x00\x09\x00\x01\x00\x01\x00\x01\x00\x01\x02\x02\x88\x31\x00\x00\x00\x00\x00\x00\x00\x09\x88\x61\x00\x00\x00\x00\x00\x00\x00\x09\x88\x32\x00\x00\x00\x00\x00\x00\x00\x09\x9C\x62\x00\x00\x00\x00\x00\x00\x00\x09\x08\x01\xFF\x0A\x01\x00\x00\x09\x00\x63\x6F\x42\x8E\x7C\xB5\xA2\x9D" + } err + assert_equal [count_log_message 0 "crashed by signal"] 0 + assert_equal [count_log_message 0 "ASSERTION FAILED"] 1 + } +} + +test {corrupt payload: listpack too long entry prev len} { + start_server [list overrides [list loglevel verbose use-exit-on-panic yes crash-memcheck-enabled no] ] { + r config set sanitize-dump-payload yes + catch { + r restore key 0 "\x0F\x01\x10\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x02\x40\x55\x55\x00\x00\x00\x0F\x00\x01\x01\x00\x15\x02\x01\x88\x31\x00\x00\x00\x00\x00\x00\x00\x09\x88\x32\x00\x00\x00\x00\x00\x00\x00\x09\x00\x01\x00\x01\x00\x01\x00\x01\x02\x02\x88\x31\x00\x00\x00\x00\x00\x00\x00\x09\x88\x61\x00\x00\x00\x00\x00\x00\x00\x09\x88\x32\x00\x00\x00\x00\x00\x00\x00\x09\x88\x62\x00\x00\x00\x00\x00\x00\x00\x09\x08\x01\xFF\x0A\x01\x00\x00\x09\x00\x06\xFB\x44\x24\x0A\x8E\x75\xEA" + } err + assert_match "*Bad data format*" $err + verify_log_message 0 "*Stream listpack integrity check failed*" 0 + } +} + +test {corrupt payload: stream with duplicate consumers} { + start_server [list overrides [list loglevel verbose use-exit-on-panic yes crash-memcheck-enabled no] ] { + catch { + r restore key 0 "\x0F\x00\x00\x00\x00\x01\x07\x6D\x79\x67\x72\x6F\x75\x70\x00\x00\x00\x02\x04\x6E\x61\x6D\x65\x2A\x4C\xAA\x9A\x7D\x01\x00\x00\x00\x04\x6E\x61\x6D\x65\x2B\x4C\xAA\x9A\x7D\x01\x00\x00\x00\x0A\x00\xCC\xED\x8C\xA7\x62\xEE\xC7\xC8" + } err + assert_match "*Bad data format*" $err + verify_log_message 0 "*Duplicate stream consumer detected*" 0 + r ping + } +} + +test {corrupt payload: hash ziplist with duplicate records} { + # when we do perform full sanitization, we expect duplicate records to fail the restore + start_server [list overrides [list loglevel verbose use-exit-on-panic yes crash-memcheck-enabled no] ] { + r config set sanitize-dump-payload yes + r debug set-skip-checksum-validation 1 + catch { r RESTORE _hash 0 "\x0D\x3D\x3D\x00\x00\x00\x3A\x00\x00\x00\x14\x13\x00\xF5\x02\xF5\x02\xF2\x02\x53\x5F\x31\x04\xF3\x02\xF3\x02\xF7\x02\xF7\x02\xF8\x02\x02\x5F\x37\x04\xF1\x02\xF1\x02\xF6\x02\x02\x5F\x35\x04\xF4\x02\x02\x5F\x33\x04\xFA\x02\x02\x5F\x39\x04\xF9\x02\xF9\xFF\x09\x00\xB5\x48\xDE\x62\x31\xD0\xE5\x63" } err + assert_match "*Bad data format*" $err + } +} + +test {corrupt payload: hash listpack with duplicate records} { + # when we do perform full sanitization, we expect duplicate records to fail the restore + start_server [list overrides [list loglevel verbose use-exit-on-panic yes crash-memcheck-enabled no] ] { + r config set sanitize-dump-payload yes + r debug set-skip-checksum-validation 1 + catch { r RESTORE _hash 0 "\x10\x17\x17\x00\x00\x00\x04\x00\x82\x61\x00\x03\x82\x62\x00\x03\x82\x61\x00\x03\x82\x64\x00\x03\xff\x0a\x00\xc0\xcf\xa6\x87\xe5\xa7\xc5\xbe" } err + assert_match "*Bad data format*" $err + } +} + +test {corrupt payload: hash listpack with duplicate records - convert} { + # when we do NOT perform full sanitization, but we convert to hash, we expect duplicate records panic + start_server [list overrides [list loglevel verbose use-exit-on-panic yes crash-memcheck-enabled no] ] { + r config set sanitize-dump-payload no + r config set hash-max-listpack-entries 1 + r debug set-skip-checksum-validation 1 + catch { r RESTORE _hash 0 "\x10\x17\x17\x00\x00\x00\x04\x00\x82\x61\x00\x03\x82\x62\x00\x03\x82\x61\x00\x03\x82\x64\x00\x03\xff\x0a\x00\xc0\xcf\xa6\x87\xe5\xa7\xc5\xbe" } err + assert_equal [count_log_message 0 "crashed by signal"] 0 + assert_equal [count_log_message 0 "listpack with dup elements"] 1 + } +} + +test {corrupt payload: hash ziplist uneven record count} { + # when we do NOT perform full sanitization, but shallow sanitization can detect uneven count + start_server [list overrides [list loglevel verbose use-exit-on-panic yes crash-memcheck-enabled no] ] { + r config set sanitize-dump-payload no + r debug set-skip-checksum-validation 1 + catch { r RESTORE _hash 0 "\r\x1b\x1b\x00\x00\x00\x16\x00\x00\x00\x04\x00\x00\x02a\x00\x04\x02b\x00\x04\x02a\x00\x04\x02d\x00\xff\t\x00\xa1\x98\x36x\xcc\x8e\x93\x2e" } err + assert_match "*Bad data format*" $err + } +} + +test {corrupt payload: hash duplicate records} { + # when we do perform full sanitization, we expect duplicate records to fail the restore + start_server [list overrides [list loglevel verbose use-exit-on-panic yes crash-memcheck-enabled no] ] { + r config set sanitize-dump-payload yes + r debug set-skip-checksum-validation 1 + catch { r RESTORE _hash 0 "\x04\x02\x01a\x01b\x01a\x01d\t\x00\xc6\x9c\xab\xbc\bk\x0c\x06" } err + assert_match "*Bad data format*" $err + } +} + +test {corrupt payload: hash empty zipmap} { + start_server [list overrides [list loglevel verbose use-exit-on-panic yes crash-memcheck-enabled no] ] { + r config set sanitize-dump-payload no + r debug set-skip-checksum-validation 1 + catch { r RESTORE _hash 0 "\x09\x02\x00\xFF\x09\x00\xC0\xF1\xB8\x67\x4C\x16\xAC\xE3" } err + assert_match "*Bad data format*" $err + verify_log_message 0 "*Zipmap integrity check failed*" 0 + } +} + +test {corrupt payload: fuzzer findings - NPD in streamIteratorGetID} { + start_server [list overrides [list loglevel verbose use-exit-on-panic yes crash-memcheck-enabled no] ] { + r config set sanitize-dump-payload no + r debug set-skip-checksum-validation 1 + catch { + r RESTORE key 0 "\x0F\x01\x10\x00\x00\x01\x73\xBD\x68\x48\x71\x00\x00\x00\x00\x00\x00\x00\x00\x40\x42\x42\x00\x00\x00\x18\x00\x03\x01\x00\x01\x02\x01\x84\x69\x74\x65\x6D\x05\x85\x76\x61\x6C\x75\x65\x06\x00\x01\x02\x01\x00\x01\x00\x01\x01\x01\x00\x01\x05\x01\x02\x01\x00\x01\x01\x01\x01\x01\x82\x5F\x31\x03\x05\x01\x02\x01\x00\x01\x02\x01\x01\x01\x02\x01\x48\x01\xFF\x03\x81\x00\x00\x01\x73\xBD\x68\x48\x71\x02\x01\x07\x6D\x79\x67\x72\x6F\x75\x70\x81\x00\x00\x01\x73\xBD\x68\x48\x71\x00\x01\x00\x00\x01\x73\xBD\x68\x48\x71\x00\x00\x00\x00\x00\x00\x00\x00\x72\x48\x68\xBD\x73\x01\x00\x00\x01\x01\x05\x41\x6C\x69\x63\x65\x72\x48\x68\xBD\x73\x01\x00\x00\x01\x00\x00\x01\x73\xBD\x68\x48\x71\x00\x00\x00\x00\x00\x00\x00\x00\x09\x00\x80\xCD\xB0\xD5\x1A\xCE\xFF\x10" + r XREVRANGE key 725 233 + } + assert_equal [count_log_message 0 "crashed by signal"] 0 + assert_equal [count_log_message 0 "ASSERTION FAILED"] 1 + } +} + +test {corrupt payload: fuzzer findings - listpack NPD on invalid stream} { + start_server [list overrides [list loglevel verbose use-exit-on-panic yes crash-memcheck-enabled no] ] { + r config set sanitize-dump-payload no + r debug set-skip-checksum-validation 1 + catch { + r RESTORE _stream 0 "\x0F\x01\x10\x00\x00\x01\x73\xDC\xB6\x6B\xF1\x00\x00\x00\x00\x00\x00\x00\x00\x40\x42\x42\x00\x00\x00\x18\x00\x03\x01\x00\x01\x02\x01\x84\x69\x74\x65\x6D\x05\x85\x76\x61\x6C\x75\x65\x06\x00\x01\x02\x01\x00\x01\x00\x01\x01\x01\x00\x01\x05\x01\x02\x01\x1F\x01\x00\x01\x01\x01\x6D\x5F\x31\x03\x05\x01\x02\x01\x29\x01\x00\x01\x01\x01\x02\x01\x05\x01\xFF\x03\x81\x00\x00\x01\x73\xDC\xB6\x6C\x1A\x00\x01\x07\x6D\x79\x67\x72\x6F\x75\x70\x81\x00\x00\x01\x73\xDC\xB6\x6B\xF1\x00\x01\x00\x00\x01\x73\xDC\xB6\x6B\xF1\x00\x00\x00\x00\x00\x00\x00\x00\x4B\x6C\xB6\xDC\x73\x01\x00\x00\x01\x01\x05\x41\x6C\x69\x63\x65\x3D\x6C\xB6\xDC\x73\x01\x00\x00\x01\x00\x00\x01\x73\xDC\xB6\x6B\xF1\x00\x00\x00\x00\x00\x00\x00\x00\x09\x00\xC7\x7D\x1C\xD7\x04\xFF\xE6\x9D" + r XREAD STREAMS _stream 519389898758 + } + assert_equal [count_log_message 0 "crashed by signal"] 0 + assert_equal [count_log_message 0 "ASSERTION FAILED"] 1 + } +} + +test {corrupt payload: fuzzer findings - NPD in quicklistIndex} { + start_server [list overrides [list loglevel verbose use-exit-on-panic yes crash-memcheck-enabled no] ] { + r config set sanitize-dump-payload no + r debug set-skip-checksum-validation 1 + catch { + r RESTORE key 0 "\x0E\x01\x13\x13\x00\x00\x00\x10\x00\x00\x00\x03\x12\x00\xF3\x02\x02\x5F\x31\x04\xF1\xFF\x09\x00\xC9\x4B\x31\xFE\x61\xC0\x96\xFE" + } err + assert_match "*Bad data format*" $err + verify_log_message 0 "*integrity check failed*" 0 + } +} + +test {corrupt payload: fuzzer findings - encoded entry header reach outside the allocation} { + start_server [list overrides [list loglevel verbose use-exit-on-panic yes crash-memcheck-enabled no] ] { + r debug set-skip-checksum-validation 1 + catch { + r RESTORE key 0 "\x0D\x19\x19\x00\x00\x00\x16\x00\x00\x00\x06\x00\x00\xF1\x02\xF1\x02\xF2\x02\x02\x5F\x31\x04\x99\x02\xF3\xFF\x09\x00\xC5\xB8\x10\xC0\x8A\xF9\x16\xDF" + } err + assert_match "*Bad data format*" $err + verify_log_message 0 "*integrity check failed*" 0 + } +} + + +test {corrupt payload: fuzzer findings - invalid ziplist encoding} { + start_server [list overrides [list loglevel verbose use-exit-on-panic yes crash-memcheck-enabled no] ] { + r config set sanitize-dump-payload yes + r debug set-skip-checksum-validation 1 + catch { + r RESTORE _listbig 0 "\x0E\x02\x1B\x1B\x00\x00\x00\x16\x00\x00\x00\x05\x00\x00\x02\x5F\x39\x04\xF9\x02\x86\x5F\x37\x04\xF7\x02\x02\x5F\x35\xFF\x19\x19\x00\x00\x00\x16\x00\x00\x00\x05\x00\x00\xF5\x02\x02\x5F\x33\x04\xF3\x02\x02\x5F\x31\x04\xF1\xFF\x09\x00\x0C\xFC\x99\x2C\x23\x45\x15\x60" + } err + assert_match "*Bad data format*" $err + verify_log_message 0 "*integrity check failed*" 0 + } +} + +test {corrupt payload: fuzzer findings - hash crash} { + start_server [list overrides [list loglevel verbose use-exit-on-panic yes crash-memcheck-enabled no] ] { + r config set sanitize-dump-payload yes + r debug set-skip-checksum-validation 1 + r RESTORE _hash 0 "\x0D\x19\x19\x00\x00\x00\x16\x00\x00\x00\x06\x00\x00\xF1\x02\xF1\x02\xF2\x02\x02\x5F\x31\x04\xF3\x02\xF3\xFF\x09\x00\x38\xB8\x10\xC0\x8A\xF9\x16\xDF" + r HSET _hash 394891450 1635910264 + r HMGET _hash 887312884855 + } +} + +test {corrupt payload: fuzzer findings - uneven entry count in hash} { + start_server [list overrides [list loglevel verbose use-exit-on-panic yes crash-memcheck-enabled no] ] { + r debug set-skip-checksum-validation 1 + catch { + r RESTORE _hashbig 0 "\x0D\x3D\x3D\x00\x00\x00\x38\x00\x00\x00\x14\x00\x00\xF2\x02\x02\x5F\x31\x04\x1C\x02\xF7\x02\xF1\x02\xF1\x02\xF5\x02\xF5\x02\xF4\x02\x02\x5F\x33\x04\xF6\x02\x02\x5F\x35\x04\xF8\x02\x02\x5F\x37\x04\xF9\x02\xF9\x02\xF3\x02\xF3\x02\xFA\x02\x02\x5F\x39\xFF\x09\x00\x73\xB7\x68\xC8\x97\x24\x8E\x88" + } err + assert_match "*Bad data format*" $err + verify_log_message 0 "*integrity check failed*" 0 + } +} + +test {corrupt payload: fuzzer findings - invalid read in lzf_decompress} { + start_server [list overrides [list loglevel verbose use-exit-on-panic yes crash-memcheck-enabled no] ] { + r config set sanitize-dump-payload no + r debug set-skip-checksum-validation 1 + catch { r RESTORE _setbig 0 "\x02\x03\x02\x5F\x31\xC0\x02\xC3\x00\x09\x00\xE6\xDC\x76\x44\xFF\xEB\x3D\xFE" } err + assert_match "*Bad data format*" $err + } +} + +test {corrupt payload: fuzzer findings - leak in rdbloading due to dup entry in set} { + start_server [list overrides [list loglevel verbose use-exit-on-panic yes crash-memcheck-enabled no] ] { + r config set sanitize-dump-payload no + r debug set-skip-checksum-validation 1 + catch { r RESTORE _setbig 0 "\x02\x0A\x02\x5F\x39\xC0\x06\x02\x5F\x31\xC0\x00\xC0\x04\x02\x5F\x35\xC0\x02\xC0\x08\x02\x5F\x31\x02\x5F\x33\x09\x00\x7A\x5A\xFB\x90\x3A\xE9\x3C\xBE" } err + assert_match "*Bad data format*" $err + } +} + +test {corrupt payload: fuzzer findings - empty intset} { + start_server [list overrides [list loglevel verbose use-exit-on-panic yes crash-memcheck-enabled no] ] { + r config set sanitize-dump-payload no + r debug set-skip-checksum-validation 1 + catch {r RESTORE _setbig 0 "\x02\xC0\xC0\x06\x02\x5F\x39\xC0\x02\x02\x5F\x33\xC0\x00\x02\x5F\x31\xC0\x04\xC0\x08\x02\x5F\x37\x02\x5F\x35\x09\x00\xC5\xD4\x6D\xBA\xAD\x14\xB7\xE7"} err + assert_match "*Bad data format*" $err + r ping + } +} + +test {corrupt payload: fuzzer findings - zset ziplist entry lensize is 0} { + start_server [list overrides [list loglevel verbose use-exit-on-panic yes crash-memcheck-enabled no] ] { + r config set sanitize-dump-payload no + r debug set-skip-checksum-validation 1 + catch {r RESTORE _zsetbig 0 "\x0C\x3D\x3D\x00\x00\x00\x3A\x00\x00\x00\x14\x00\x00\xF1\x02\xF1\x02\x02\x5F\x31\x04\xF2\x02\xF3\x02\xF3\x02\x02\x5F\x33\x04\xF4\x02\xEE\x02\xF5\x02\x02\x5F\x35\x04\xF6\x02\xF7\x02\xF7\x02\x02\x5F\x37\x04\xF8\x02\xF9\x02\xF9\x02\x02\x5F\x39\x04\xFA\xFF\x09\x00\xAE\xF9\x77\x2A\x47\x24\x33\xF6"} err + assert_match "*Bad data format*" $err + verify_log_message 0 "*Zset ziplist integrity check failed*" 0 + } +} + +test {corrupt payload: fuzzer findings - valgrind ziplist prevlen reaches outside the ziplist} { + start_server [list overrides [list loglevel verbose use-exit-on-panic yes crash-memcheck-enabled no] ] { + r config set sanitize-dump-payload no + r debug set-skip-checksum-validation 1 + catch {r RESTORE _listbig 0 "\x0E\x02\x1B\x1B\x00\x00\x00\x16\x00\x00\x00\x05\x00\x00\x02\x5F\x39\x04\xF9\x02\x02\x5F\x37\x04\xF7\x02\x02\x5F\x35\xFF\x19\x19\x00\x00\x00\x16\x00\x00\x00\x05\x00\x00\xF5\x02\x02\x5F\x33\x04\xF3\x95\x02\x5F\x31\x04\xF1\xFF\x09\x00\x0C\xFC\x99\x2C\x23\x45\x15\x60"} err + assert_match "*Bad data format*" $err + verify_log_message 0 "*integrity check failed*" 0 + } +} + +test {corrupt payload: fuzzer findings - valgrind - bad rdbLoadDoubleValue} { + start_server [list overrides [list loglevel verbose use-exit-on-panic yes crash-memcheck-enabled no] ] { + r config set sanitize-dump-payload no + r debug set-skip-checksum-validation 1 + catch { r RESTORE _list 0 "\x03\x01\x11\x11\x00\x00\x00\x0A\x00\x00\x00\x01\x00\x00\xD0\x07\x1A\xE9\x02\xFF\x09\x00\x1A\x06\x07\x32\x41\x28\x3A\x46" } err + assert_match "*Bad data format*" $err + } +} + +test {corrupt payload: fuzzer findings - valgrind ziplist prev too big} { + start_server [list overrides [list loglevel verbose use-exit-on-panic yes crash-memcheck-enabled no] ] { + r config set sanitize-dump-payload no + r debug set-skip-checksum-validation 1 + catch {r RESTORE _list 0 "\x0E\x01\x13\x13\x00\x00\x00\x10\x00\x00\x00\x03\x00\x00\xF3\x02\x02\x5F\x31\xC1\xF1\xFF\x09\x00\xC9\x4B\x31\xFE\x61\xC0\x96\xFE"} err + assert_match "*Bad data format*" $err + verify_log_message 0 "*integrity check failed*" 0 + } +} + +test {corrupt payload: fuzzer findings - lzf decompression fails, avoid valgrind invalid read} { + start_server [list overrides [list loglevel verbose use-exit-on-panic yes crash-memcheck-enabled no] ] { + r config set sanitize-dump-payload no + r debug set-skip-checksum-validation 1 + catch {r RESTORE _stream 0 "\x0F\x02\x10\x00\x00\x01\x73\xDD\xAA\x2A\xB9\x00\x00\x00\x00\x00\x00\x00\x00\xC3\x40\x4B\x40\x5C\x18\x5C\x00\x00\x00\x24\x00\x05\x01\x00\x01\x02\x01\x84\x69\x74\x65\x6D\x05\x85\x76\x61\x6C\x75\x65\x06\x40\x10\x00\x00\x20\x01\x00\x01\x20\x03\x00\x05\x20\x1C\x40\x07\x05\x01\x01\x82\x5F\x31\x03\x80\x0D\x40\x00\x00\x02\x60\x19\x40\x27\x40\x19\x00\x33\x60\x19\x40\x29\x02\x01\x01\x04\x20\x19\x00\xFF\x10\x00\x00\x01\x73\xDD\xAA\x2A\xBC\x00\x00\x00\x00\x00\x00\x00\x00\xC3\x40\x4D\x40\x5E\x18\x5E\x00\x00\x00\x24\x00\x05\x01\x00\x01\x02\x01\x84\x69\x74\x65\x6D\x05\x85\x76\x61\x6C\x75\x65\x06\x40\x10\x00\x00\x20\x01\x06\x01\x01\x82\x5F\x35\x03\x05\x20\x1E\x17\x0B\x03\x01\x01\x06\x01\x40\x0B\x00\x01\x60\x0D\x02\x82\x5F\x37\x60\x19\x80\x00\x00\x08\x60\x19\x80\x27\x02\x82\x5F\x39\x20\x19\x00\xFF\x0A\x81\x00\x00\x01\x73\xDD\xAA\x2A\xBE\x00\x00\x09\x00\x21\x85\x77\x43\x71\x7B\x17\x88"} err + assert_match "*Bad data format*" $err + } +} + +test {corrupt payload: fuzzer findings - stream bad lp_count} { + start_server [list overrides [list loglevel verbose use-exit-on-panic yes crash-memcheck-enabled no] ] { + r config set sanitize-dump-payload yes + r debug set-skip-checksum-validation 1 + catch { r RESTORE _stream 0 "\x0F\x01\x10\x00\x00\x01\x73\xDE\xDF\x7D\x9B\x00\x00\x00\x00\x00\x00\x00\x00\x40\x42\x42\x00\x00\x00\x18\x00\x03\x01\x00\x01\x02\x01\x84\x69\x74\x65\x6D\x05\x85\x76\x61\x6C\x75\x65\x06\x00\x01\x02\x01\x00\x01\x00\x01\x01\x01\x00\x01\x56\x01\x02\x01\x22\x01\x00\x01\x01\x01\x82\x5F\x31\x03\x05\x01\x02\x01\x2C\x01\x00\x01\x01\x01\x02\x01\x05\x01\xFF\x03\x81\x00\x00\x01\x73\xDE\xDF\x7D\xC7\x00\x01\x07\x6D\x79\x67\x72\x6F\x75\x70\x81\x00\x00\x01\x73\xDE\xDF\x7D\x9B\x00\x01\x00\x00\x01\x73\xDE\xDF\x7D\x9B\x00\x00\x00\x00\x00\x00\x00\x00\xF9\x7D\xDF\xDE\x73\x01\x00\x00\x01\x01\x05\x41\x6C\x69\x63\x65\xEB\x7D\xDF\xDE\x73\x01\x00\x00\x01\x00\x00\x01\x73\xDE\xDF\x7D\x9B\x00\x00\x00\x00\x00\x00\x00\x00\x09\x00\xB2\xA8\xA7\x5F\x1B\x61\x72\xD5"} err + assert_match "*Bad data format*" $err + r ping + } +} + +test {corrupt payload: fuzzer findings - stream bad lp_count - unsanitized} { + start_server [list overrides [list loglevel verbose use-exit-on-panic yes crash-memcheck-enabled no] ] { + r config set sanitize-dump-payload no + r debug set-skip-checksum-validation 1 + r RESTORE _stream 0 "\x0F\x01\x10\x00\x00\x01\x73\xDE\xDF\x7D\x9B\x00\x00\x00\x00\x00\x00\x00\x00\x40\x42\x42\x00\x00\x00\x18\x00\x03\x01\x00\x01\x02\x01\x84\x69\x74\x65\x6D\x05\x85\x76\x61\x6C\x75\x65\x06\x00\x01\x02\x01\x00\x01\x00\x01\x01\x01\x00\x01\x56\x01\x02\x01\x22\x01\x00\x01\x01\x01\x82\x5F\x31\x03\x05\x01\x02\x01\x2C\x01\x00\x01\x01\x01\x02\x01\x05\x01\xFF\x03\x81\x00\x00\x01\x73\xDE\xDF\x7D\xC7\x00\x01\x07\x6D\x79\x67\x72\x6F\x75\x70\x81\x00\x00\x01\x73\xDE\xDF\x7D\x9B\x00\x01\x00\x00\x01\x73\xDE\xDF\x7D\x9B\x00\x00\x00\x00\x00\x00\x00\x00\xF9\x7D\xDF\xDE\x73\x01\x00\x00\x01\x01\x05\x41\x6C\x69\x63\x65\xEB\x7D\xDF\xDE\x73\x01\x00\x00\x01\x00\x00\x01\x73\xDE\xDF\x7D\x9B\x00\x00\x00\x00\x00\x00\x00\x00\x09\x00\xB2\xA8\xA7\x5F\x1B\x61\x72\xD5" + catch { r XREVRANGE _stream 638932639 738} + assert_equal [count_log_message 0 "crashed by signal"] 0 + assert_equal [count_log_message 0 "ASSERTION FAILED"] 1 + } +} + +test {corrupt payload: fuzzer findings - stream integrity check issue} { + start_server [list overrides [list loglevel verbose use-exit-on-panic yes crash-memcheck-enabled no] ] { + r config set sanitize-dump-payload yes + r debug set-skip-checksum-validation 1 + catch { r RESTORE _stream 0 "\x0F\x02\x10\x00\x00\x01\x75\x2D\xA2\x90\x67\x00\x00\x00\x00\x00\x00\x00\x00\xC3\x40\x4F\x40\x5C\x18\x5C\x00\x00\x00\x24\x00\x05\x01\x00\x01\x4A\x01\x84\x69\x74\x65\x6D\x05\x85\x76\x61\x6C\x75\x65\x06\x40\x10\x00\x00\x20\x01\x00\x01\x20\x03\x00\x05\x20\x1C\x40\x09\x05\x01\x01\x82\x5F\x31\x03\x80\x0D\x00\x02\x20\x0D\x00\x02\xA0\x19\x00\x03\x20\x0B\x02\x82\x5F\x33\xA0\x19\x00\x04\x20\x0D\x00\x04\x20\x19\x00\xFF\x10\x00\x00\x01\x75\x2D\xA2\x90\x67\x00\x00\x00\x00\x00\x00\x00\x05\xC3\x40\x56\x40\x60\x18\x60\x00\x00\x00\x24\x00\x05\x01\x00\x01\x02\x01\x84\x69\x74\x65\x6D\x05\x85\x76\x61\x6C\x75\x65\x06\x40\x10\x00\x00\x20\x01\x06\x01\x01\x82\x5F\x35\x03\x05\x20\x1E\x40\x0B\x03\x01\x01\x06\x01\x80\x0B\x00\x02\x20\x0B\x02\x82\x5F\x37\x60\x19\x03\x01\x01\xDF\xFB\x20\x05\x00\x08\x60\x1A\x20\x0C\x00\xFC\x20\x05\x02\x82\x5F\x39\x20\x1B\x00\xFF\x0A\x81\x00\x00\x01\x75\x2D\xA2\x90\x68\x01\x00\x09\x00\x1D\x6F\xC0\x69\x8A\xDE\xF7\x92" } err + assert_match "*Bad data format*" $err + } +} + +test {corrupt payload: fuzzer findings - infinite loop} { + start_server [list overrides [list loglevel verbose use-exit-on-panic yes crash-memcheck-enabled no] ] { + r config set sanitize-dump-payload no + r debug set-skip-checksum-validation 1 + r RESTORE _stream 0 "\x0F\x01\x10\x00\x00\x01\x75\x3A\xA6\xD0\x93\x00\x00\x00\x00\x00\x00\x00\x00\x40\x42\x42\x00\x00\x00\x18\x00\x03\x01\x00\x01\x02\x01\x84\x69\x74\x65\x6D\x05\x85\x76\x61\x6C\x75\x65\x06\x00\x01\x02\x01\x00\x01\x00\x01\x01\x01\x00\x01\x05\x01\x02\x01\x00\x01\x01\x01\x01\x01\x82\x5F\x31\x03\xFD\x01\x02\x01\x00\x01\x02\x01\x01\x01\x02\x01\x05\x01\xFF\x03\x81\x00\x00\x01\x75\x3A\xA6\xD0\x93\x02\x01\x07\x6D\x79\x67\x72\x6F\x75\x70\x81\x00\x00\x01\x75\x3A\xA6\xD0\x93\x00\x01\x00\x00\x01\x75\x3A\xA6\xD0\x93\x00\x00\x00\x00\x00\x00\x00\x00\x94\xD0\xA6\x3A\x75\x01\x00\x00\x01\x01\x05\x41\x6C\x69\x63\x65\x94\xD0\xA6\x3A\x75\x01\x00\x00\x01\x00\x00\x01\x75\x3A\xA6\xD0\x93\x00\x00\x00\x00\x00\x00\x00\x00\x09\x00\xC4\x09\xAD\x69\x7E\xEE\xA6\x2F" + catch { r XREVRANGE _stream 288270516 971031845 } + assert_equal [count_log_message 0 "crashed by signal"] 0 + assert_equal [count_log_message 0 "ASSERTION FAILED"] 1 + } +} + +test {corrupt payload: fuzzer findings - hash ziplist too long entry len} { + start_server [list overrides [list loglevel verbose use-exit-on-panic yes crash-memcheck-enabled no] ] { + r debug set-skip-checksum-validation 1 + catch { + r RESTORE _hash 0 "\x0D\x3D\x3D\x00\x00\x00\x3A\x00\x00\x00\x14\x13\x00\xF5\x02\xF5\x02\xF2\x02\x53\x5F\x31\x04\xF3\x02\xF3\x02\xF7\x02\xF7\x02\xF8\x02\x02\x5F\x37\x04\xF1\x02\xF1\x02\xF6\x02\x02\x5F\x35\x04\xF4\x02\x02\x5F\x33\x04\xFA\x02\x02\x5F\x39\x04\xF9\x02\xF9\xFF\x09\x00\xB5\x48\xDE\x62\x31\xD0\xE5\x63" + } err + assert_match "*Bad data format*" $err + verify_log_message 0 "*integrity check failed*" 0 + } +} + +if {$run_oom_tests} { + +test {corrupt payload: OOM in rdbGenericLoadStringObject} { + start_server [list overrides [list loglevel verbose use-exit-on-panic yes crash-memcheck-enabled no] ] { + r config set sanitize-dump-payload no + catch { r RESTORE x 0 "\x0A\x81\x7F\xFF\xFF\xFF\xFF\xFF\xFF\xFF\x13\x00\x00\x00\x0E\x00\x00\x00\x02\x00\x00\x02\x61\x00\x04\x02\x62\x00\xFF\x09\x00\x57\x04\xE5\xCD\xD4\x37\x6C\x57" } err + assert_match "*Bad data format*" $err + r ping + } +} + +test {corrupt payload: fuzzer findings - OOM in dictExpand} { + start_server [list overrides [list loglevel verbose use-exit-on-panic yes crash-memcheck-enabled no] ] { + r config set sanitize-dump-payload no + r debug set-skip-checksum-validation 1 + catch { r RESTORE x 0 "\x02\x81\x02\x5F\x31\xC0\x00\xC0\x02\x09\x00\xCD\x84\x2C\xB7\xE8\xA4\x49\x57" } err + assert_match "*Bad data format*" $err + r ping + } +} + +} + +test {corrupt payload: fuzzer findings - zset ziplist invalid tail offset} { + start_server [list overrides [list loglevel verbose use-exit-on-panic yes crash-memcheck-enabled no] ] { + r config set sanitize-dump-payload no + r debug set-skip-checksum-validation 1 + catch {r RESTORE _zset 0 "\x0C\x19\x19\x00\x00\x00\x02\x00\x00\x00\x06\x00\x00\xF1\x02\xF1\x02\x02\x5F\x31\x04\xF2\x02\xF3\x02\xF3\xFF\x09\x00\x4D\x72\x7B\x97\xCD\x9A\x70\xC1"} err + assert_match "*Bad data format*" $err + verify_log_message 0 "*Zset ziplist integrity check failed*" 0 + } +} + +test {corrupt payload: fuzzer findings - negative reply length} { + start_server [list overrides [list loglevel verbose use-exit-on-panic yes crash-memcheck-enabled no] ] { + r config set sanitize-dump-payload no + r debug set-skip-checksum-validation 1 + r RESTORE _stream 0 "\x0F\x01\x10\x00\x00\x01\x75\xCF\xA1\x16\xA7\x00\x00\x00\x00\x00\x00\x00\x00\x40\x42\x42\x00\x00\x00\x18\x00\x03\x01\x00\x01\x02\x01\x84\x69\x74\x65\x6D\x05\x85\x76\x61\x6C\x75\x65\x06\x00\x01\x02\x01\x00\x01\x00\x01\x01\x01\x00\x01\x05\x01\x02\x01\x00\x01\x01\x01\x01\x01\x14\x5F\x31\x03\x05\x01\x02\x01\x00\x01\x02\x01\x01\x01\x02\x01\x05\x01\xFF\x03\x81\x00\x00\x01\x75\xCF\xA1\x16\xA7\x02\x01\x07\x6D\x79\x67\x72\x6F\x75\x70\x81\x00\x00\x01\x75\xCF\xA1\x16\xA7\x01\x01\x00\x00\x01\x75\xCF\xA1\x16\xA7\x00\x00\x00\x00\x00\x00\x00\x01\xA7\x16\xA1\xCF\x75\x01\x00\x00\x01\x01\x05\x41\x6C\x69\x63\x65\xA7\x16\xA1\xCF\x75\x01\x00\x00\x01\x00\x00\x01\x75\xCF\xA1\x16\xA7\x00\x00\x00\x00\x00\x00\x00\x01\x09\x00\x1B\x42\x52\xB8\xDD\x5C\xE5\x4E" + catch {r XADD _stream * -956 -2601503852} + catch {r XINFO STREAM _stream FULL} + assert_equal [count_log_message 0 "crashed by signal"] 0 + assert_equal [count_log_message 0 "ASSERTION FAILED"] 1 + } +} + +test {corrupt payload: fuzzer findings - valgrind negative malloc} { + start_server [list overrides [list loglevel verbose use-exit-on-panic yes crash-memcheck-enabled no] ] { + r config set sanitize-dump-payload yes + r debug set-skip-checksum-validation 1 + catch {r RESTORE _key 0 "\x0E\x01\x81\xD6\xD6\x00\x00\x00\x0A\x00\x00\x00\x01\x00\x00\x40\xC8\x6F\x2F\x36\xE2\xDF\xE3\x2E\x26\x64\x8B\x87\xD1\x7A\xBD\xFF\xEF\xEF\x63\x65\xF6\xF8\x8C\x4E\xEC\x96\x89\x56\x88\xF8\x3D\x96\x5A\x32\xBD\xD1\x36\xD8\x02\xE6\x66\x37\xCB\x34\x34\xC4\x52\xA7\x2A\xD5\x6F\x2F\x7E\xEE\xA2\x94\xD9\xEB\xA9\x09\x38\x3B\xE1\xA9\x60\xB6\x4E\x09\x44\x1F\x70\x24\xAA\x47\xA8\x6E\x30\xE1\x13\x49\x4E\xA1\x92\xC4\x6C\xF0\x35\x83\xD9\x4F\xD9\x9C\x0A\x0D\x7A\xE7\xB1\x61\xF5\xC1\x2D\xDC\xC3\x0E\x87\xA6\x80\x15\x18\xBA\x7F\x72\xDD\x14\x75\x46\x44\x0B\xCA\x9C\x8F\x1C\x3C\xD7\xDA\x06\x62\x18\x7E\x15\x17\x24\xAB\x45\x21\x27\xC2\xBC\xBB\x86\x6E\xD8\xBD\x8E\x50\xE0\xE0\x88\xA4\x9B\x9D\x15\x2A\x98\xFF\x5E\x78\x6C\x81\xFC\xA8\xC9\xC8\xE6\x61\xC8\xD1\x4A\x7F\x81\xD6\xA6\x1A\xAD\x4C\xC1\xA2\x1C\x90\x68\x15\x2A\x8A\x36\xC0\x58\xC3\xCC\xA6\x54\x19\x12\x0F\xEB\x46\xFF\x6E\xE3\xA7\x92\xF8\xFF\x09\x00\xD0\x71\xF7\x9F\xF7\x6A\xD6\x2E"} err + assert_match "*Bad data format*" $err + r ping + } +} + +test {corrupt payload: fuzzer findings - valgrind invalid read} { + start_server [list overrides [list loglevel verbose use-exit-on-panic yes crash-memcheck-enabled no] ] { + r config set sanitize-dump-payload yes + r debug set-skip-checksum-validation 1 + catch {r RESTORE _key 0 "\x05\x0A\x02\x5F\x39\x00\x00\x00\x00\x00\x00\x22\x40\xC0\x08\x00\x00\x00\x00\x00\x00\x20\x40\x02\x5F\x37\x00\x00\x00\x00\x00\x00\x1C\x40\xC0\x06\x00\x00\x00\x00\x00\x00\x18\x40\x02\x5F\x33\x00\x00\x00\x00\x00\x00\x14\x40\xC0\x04\x00\x00\x00\x00\x00\x00\x10\x40\x02\x5F\x33\x00\x00\x00\x00\x00\x00\x08\x40\xC0\x02\x00\x00\x00\x00\x00\x00\x00\x40\x02\x5F\x31\x00\x00\x00\x00\x00\x00\xF0\x3F\xC0\x00\x00\x00\x00\x00\x00\x00\x00\x00\x09\x00\x3C\x66\xD7\x14\xA9\xDA\x3C\x69"} err + assert_match "*Bad data format*" $err + r ping + } +} + +test {corrupt payload: fuzzer findings - empty hash ziplist} { + start_server [list overrides [list loglevel verbose use-exit-on-panic yes crash-memcheck-enabled no] ] { + r config set sanitize-dump-payload yes + r debug set-skip-checksum-validation 1 + catch {r RESTORE _int 0 "\x04\xC0\x01\x09\x00\xF6\x8A\xB6\x7A\x85\x87\x72\x4D"} err + assert_match "*Bad data format*" $err + r ping + } +} + +test {corrupt payload: fuzzer findings - stream with no records} { + start_server [list overrides [list loglevel verbose use-exit-on-panic yes crash-memcheck-enabled no] ] { + r config set sanitize-dump-payload no + r debug set-skip-checksum-validation 1 + r restore _stream 0 "\x0F\x01\x10\x00\x00\x01\x78\x4D\x55\x68\x09\x00\x00\x00\x00\x00\x00\x00\x00\x40\x42\x42\x00\x00\x00\x18\x00\x02\x01\x01\x01\x02\x01\x84\x69\x74\x65\x6D\x05\x85\x76\x61\x6C\x75\x65\x06\x00\x01\x02\x01\x00\x01\x00\x01\x01\x01\x00\x01\x05\x01\x03\x01\x3E\x01\x00\x01\x01\x01\x82\x5F\x31\x03\x05\x01\x02\x01\x50\x01\x00\x01\x01\x01\x02\x01\x05\x23\xFF\x02\x81\x00\x00\x01\x78\x4D\x55\x68\x59\x00\x01\x07\x6D\x79\x67\x72\x6F\x75\x70\x81\x00\x00\x01\x78\x4D\x55\x68\x47\x00\x01\x00\x00\x01\x78\x4D\x55\x68\x47\x00\x00\x00\x00\x00\x00\x00\x00\x9F\x68\x55\x4D\x78\x01\x00\x00\x01\x01\x05\x41\x6C\x69\x63\x65\x85\x68\x55\x4D\x78\x01\x00\x00\x01\x00\x00\x01\x78\x4D\x55\x68\x47\x00\x00\x00\x00\x00\x00\x00\x00\x09\x00\xF1\xC0\x72\x70\x39\x40\x1E\xA9" replace + catch {r XREAD STREAMS _stream $} + assert_equal [count_log_message 0 "crashed by signal"] 0 + assert_equal [count_log_message 0 "Guru Meditation"] 1 + } +} + +test {corrupt payload: fuzzer findings - quicklist ziplist tail followed by extra data which start with 0xff} { + start_server [list overrides [list loglevel verbose use-exit-on-panic yes crash-memcheck-enabled no] ] { + r config set sanitize-dump-payload yes + r debug set-skip-checksum-validation 1 + catch { + r restore key 0 "\x0E\x01\x11\x11\x00\x00\x00\x0A\x00\x00\x00\x01\x00\x00\xF6\xFF\xB0\x6C\x9C\xFF\x09\x00\x9C\x37\x47\x49\x4D\xDE\x94\xF5" replace + } err + assert_match "*Bad data format*" $err + verify_log_message 0 "*integrity check failed*" 0 + } +} + +test {corrupt payload: fuzzer findings - dict init to huge size} { + start_server [list overrides [list loglevel verbose use-exit-on-panic yes crash-memcheck-enabled no] ] { + r config set sanitize-dump-payload no + r debug set-skip-checksum-validation 1 + catch {r restore key 0 "\x02\x81\xC0\x00\x02\x5F\x31\xC0\x02\x09\x00\xB2\x1B\xE5\x17\x2E\x15\xF4\x6C" replace} err + assert_match "*Bad data format*" $err + r ping + } +} + +test {corrupt payload: fuzzer findings - huge string} { + start_server [list overrides [list loglevel verbose use-exit-on-panic yes crash-memcheck-enabled no] ] { + r config set sanitize-dump-payload yes + r debug set-skip-checksum-validation 1 + catch {r restore key 0 "\x00\x81\x01\x09\x00\xF6\x2B\xB6\x7A\x85\x87\x72\x4D"} err + assert_match "*Bad data format*" $err + r ping + } +} + +test {corrupt payload: fuzzer findings - stream PEL without consumer} { + start_server [list overrides [list loglevel verbose use-exit-on-panic yes crash-memcheck-enabled no] ] { + r config set sanitize-dump-payload yes + r debug set-skip-checksum-validation 1 + catch {r restore _stream 0 "\x0F\x01\x10\x00\x00\x01\x7B\x08\xF0\xB2\x34\x00\x00\x00\x00\x00\x00\x00\x00\xC3\x3B\x40\x42\x19\x42\x00\x00\x00\x18\x00\x02\x01\x01\x01\x02\x01\x84\x69\x74\x65\x6D\x05\x85\x76\x61\x6C\x75\x65\x06\x00\x20\x10\x00\x00\x20\x01\x00\x01\x20\x03\x02\x05\x01\x03\x20\x05\x40\x00\x04\x82\x5F\x31\x03\x05\x60\x19\x80\x32\x02\x05\x01\xFF\x02\x81\x00\x00\x01\x7B\x08\xF0\xB2\x34\x02\x01\x07\x6D\x79\x67\x72\x6F\x75\x70\x81\x00\x00\x01\x7B\x08\xF0\xB2\x34\x01\x01\x00\x00\x01\x7B\x08\xF0\xB2\x34\x00\x00\x00\x00\x00\x00\x00\x01\x35\xB2\xF0\x08\x7B\x01\x00\x00\x01\x01\x13\x41\x6C\x69\x63\x65\x35\xB2\xF0\x08\x7B\x01\x00\x00\x01\x00\x00\x01\x7B\x08\xF0\xB2\x34\x00\x00\x00\x00\x00\x00\x00\x01\x09\x00\x28\x2F\xE0\xC5\x04\xBB\xA7\x31"} err + assert_match "*Bad data format*" $err + r ping + } +} + +test {corrupt payload: fuzzer findings - stream listpack valgrind issue} { + start_server [list overrides [list loglevel verbose use-exit-on-panic yes crash-memcheck-enabled no] ] { + r config set sanitize-dump-payload no + r debug set-skip-checksum-validation 1 + r restore _stream 0 "\x0F\x01\x10\x00\x00\x01\x7B\x09\x5E\x94\xFF\x00\x00\x00\x00\x00\x00\x00\x00\x40\x42\x42\x00\x00\x00\x18\x00\x02\x01\x01\x01\x02\x01\x84\x69\x74\x65\x6D\x05\x85\x76\x61\x6C\x75\x65\x06\x00\x01\x02\x01\x00\x01\x00\x01\x01\x01\x00\x01\x05\x01\x03\x01\x25\x01\x00\x01\x01\x01\x82\x5F\x31\x03\x05\x01\x02\x01\x32\x01\x00\x01\x01\x01\x02\x01\xF0\x01\xFF\x02\x81\x00\x00\x01\x7B\x09\x5E\x95\x31\x00\x01\x07\x6D\x79\x67\x72\x6F\x75\x70\x81\x00\x00\x01\x7B\x09\x5E\x95\x24\x00\x01\x00\x00\x01\x7B\x09\x5E\x95\x24\x00\x00\x00\x00\x00\x00\x00\x00\x5C\x95\x5E\x09\x7B\x01\x00\x00\x01\x01\x05\x41\x6C\x69\x63\x65\x4B\x95\x5E\x09\x7B\x01\x00\x00\x01\x00\x00\x01\x7B\x09\x5E\x95\x24\x00\x00\x00\x00\x00\x00\x00\x00\x09\x00\x19\x29\x94\xDF\x76\xF8\x1A\xC6" + catch {r XINFO STREAM _stream FULL } + assert_equal [count_log_message 0 "crashed by signal"] 0 + assert_equal [count_log_message 0 "ASSERTION FAILED"] 1 + } +} + +test {corrupt payload: fuzzer findings - stream with bad lpFirst} { + start_server [list overrides [list loglevel verbose use-exit-on-panic yes crash-memcheck-enabled no] ] { + r config set sanitize-dump-payload yes + r debug set-skip-checksum-validation 1 + catch {r restore _stream 0 "\x0F\x01\x10\x00\x00\x01\x7B\x0E\x52\xD2\xEC\x00\x00\x00\x00\x00\x00\x00\x00\x40\x42\x42\x00\x00\x00\x18\x00\x02\xF7\x01\x01\x02\x01\x84\x69\x74\x65\x6D\x05\x85\x76\x61\x6C\x75\x65\x06\x00\x01\x02\x01\x00\x01\x00\x01\x01\x01\x00\x01\x05\x01\x03\x01\x01\x01\x00\x01\x01\x01\x82\x5F\x31\x03\x05\x01\x02\x01\x01\x01\x01\x01\x01\x01\x02\x01\x05\x01\xFF\x02\x81\x00\x00\x01\x7B\x0E\x52\xD2\xED\x01\x01\x07\x6D\x79\x67\x72\x6F\x75\x70\x81\x00\x00\x01\x7B\x0E\x52\xD2\xED\x00\x01\x00\x00\x01\x7B\x0E\x52\xD2\xED\x00\x00\x00\x00\x00\x00\x00\x00\xED\xD2\x52\x0E\x7B\x01\x00\x00\x01\x01\x05\x41\x6C\x69\x63\x65\xED\xD2\x52\x0E\x7B\x01\x00\x00\x01\x00\x00\x01\x7B\x0E\x52\xD2\xED\x00\x00\x00\x00\x00\x00\x00\x00\x09\x00\xAC\x05\xC9\x97\x5D\x45\x80\xB3"} err + assert_match "*Bad data format*" $err + r ping + } +} + +test {corrupt payload: fuzzer findings - stream listpack lpPrev valgrind issue} { + start_server [list overrides [list loglevel verbose use-exit-on-panic yes crash-memcheck-enabled no] ] { + r config set sanitize-dump-payload no + r debug set-skip-checksum-validation 1 + r restore _stream 0 "\x0F\x01\x10\x00\x00\x01\x7B\x0E\xAE\x66\x36\x00\x00\x00\x00\x00\x00\x00\x00\x40\x42\x42\x00\x00\x00\x18\x00\x02\x01\x01\x01\x02\x01\x84\x69\x74\x65\x6D\x05\x85\x76\x61\x6C\x75\x65\x06\x00\x01\x02\x01\x00\x01\x00\x01\x01\x01\x00\x01\x1D\x01\x03\x01\x24\x01\x00\x01\x01\x69\x82\x5F\x31\x03\x05\x01\x02\x01\x33\x01\x00\x01\x01\x01\x02\x01\x05\x01\xFF\x02\x81\x00\x00\x01\x7B\x0E\xAE\x66\x69\x00\x01\x07\x6D\x79\x67\x72\x6F\x75\x70\x81\x00\x00\x01\x7B\x0E\xAE\x66\x5A\x00\x01\x00\x00\x01\x7B\x0E\xAE\x66\x5A\x00\x00\x00\x00\x00\x00\x00\x00\x94\x66\xAE\x0E\x7B\x01\x00\x00\x01\x01\x05\x41\x6C\x69\x63\x65\x83\x66\xAE\x0E\x7B\x01\x00\x00\x01\x00\x00\x01\x7B\x0E\xAE\x66\x5A\x00\x00\x00\x00\x00\x00\x00\x00\x09\x00\xD5\xD7\xA5\x5C\x63\x1C\x09\x40" + catch {r XREVRANGE _stream 1618622681 606195012389} + assert_equal [count_log_message 0 "crashed by signal"] 0 + assert_equal [count_log_message 0 "ASSERTION FAILED"] 1 + } +} + +test {corrupt payload: fuzzer findings - stream with non-integer entry id} { + start_server [list overrides [list loglevel verbose use-exit-on-panic yes crash-memcheck-enabled no] ] { + r config set sanitize-dump-payload yes + r debug set-skip-checksum-validation 1 + catch {r restore _streambig 0 "\x0F\x03\x10\x00\x00\x01\x7B\x13\x34\xC3\xB2\x00\x00\x00\x00\x00\x00\x00\x00\xC3\x40\x4F\x40\x5C\x18\x5C\x00\x00\x00\x24\x00\x05\x01\x00\x01\x02\x01\x84\x69\x74\x65\x6D\x05\x85\x76\x61\x6C\x75\x65\x06\x40\x10\x00\x80\x20\x01\x00\x01\x20\x03\x00\x05\x20\x1C\x40\x09\x05\x01\x01\x82\x5F\x31\x03\x80\x0D\x00\x02\x20\x0D\x00\x02\xA0\x19\x00\x03\x20\x0B\x02\x82\x5F\x33\xA0\x19\x00\x04\x20\x0D\x00\x04\x20\x19\x00\xFF\x10\x00\x00\x01\x7B\x13\x34\xC3\xB2\x00\x00\x00\x00\x00\x00\x00\x05\xC3\x40\x56\x40\x61\x18\x61\x00\x00\x00\x24\x00\x05\x01\x00\x01\x02\x01\x84\x69\x74\x65\x6D\x05\x85\x76\x61\x6C\x75\x65\x06\x40\x10\x00\x00\x20\x01\x06\x01\x01\x82\x5F\x35\x03\x05\x20\x1E\x40\x0B\x03\x01\x01\x06\x01\x40\x0B\x03\x01\x01\xDF\xFB\x20\x05\x02\x82\x5F\x37\x60\x1A\x20\x0E\x00\xFC\x20\x05\x00\x08\xC0\x1B\x00\xFD\x20\x0C\x02\x82\x5F\x39\x20\x1B\x00\xFF\x10\x00\x00\x01\x7B\x13\x34\xC3\xB3\x00\x00\x00\x00\x00\x00\x00\x03\xC3\x3D\x40\x4A\x18\x4A\x00\x00\x00\x15\x00\x02\x01\x00\x01\x02\x01\x84\x69\x74\x65\x6D\x05\x85\x76\x61\x6C\x75\x65\x06\x40\x10\x00\x00\x20\x01\x40\x00\x00\x05\x60\x07\x02\xDF\xFD\x02\xC0\x23\x09\x01\x01\x86\x75\x6E\x69\x71\x75\x65\x07\xA0\x2D\x02\x08\x01\xFF\x0C\x81\x00\x00\x01\x7B\x13\x34\xC3\xB4\x00\x00\x09\x00\x9D\xBD\xD5\xB9\x33\xC4\xC5\xFF"} err + assert_match "*Bad data format*" $err + r ping + } +} + +test {corrupt payload: fuzzer findings - empty quicklist} { + start_server [list overrides [list loglevel verbose use-exit-on-panic yes crash-memcheck-enabled no] ] { + r config set sanitize-dump-payload yes + r debug set-skip-checksum-validation 1 + catch { + r restore key 0 "\x0E\xC0\x2B\x15\x00\x00\x00\x0A\x00\x00\x00\x01\x00\x00\xE0\x62\x58\xEA\xDF\x22\x00\x00\x00\xFF\x09\x00\xDF\x35\xD2\x67\xDC\x0E\x89\xAB" replace + } err + assert_match "*Bad data format*" $err + r ping + } +} + +test {corrupt payload: fuzzer findings - empty zset} { + start_server [list overrides [list loglevel verbose use-exit-on-panic yes crash-memcheck-enabled no] ] { + r config set sanitize-dump-payload yes + r debug set-skip-checksum-validation 1 + catch {r restore key 0 "\x05\xC0\x01\x09\x00\xF6\x8A\xB6\x7A\x85\x87\x72\x4D"} err + assert_match "*Bad data format*" $err + r ping + } +} + +test {corrupt payload: fuzzer findings - hash with len of 0} { + start_server [list overrides [list loglevel verbose use-exit-on-panic yes crash-memcheck-enabled no] ] { + r config set sanitize-dump-payload yes + r debug set-skip-checksum-validation 1 + catch {r restore key 0 "\x04\xC0\x21\x09\x00\xF6\x8A\xB6\x7A\x85\x87\x72\x4D"} err + assert_match "*Bad data format*" $err + r ping + } +} + +test {corrupt payload: fuzzer findings - hash listpack first element too long entry len} { + start_server [list overrides [list loglevel verbose use-exit-on-panic yes crash-memcheck-enabled no] ] { + r debug set-skip-checksum-validation 1 + r config set sanitize-dump-payload yes + catch { r restore _hash 0 "\x10\x15\x15\x00\x00\x00\x06\x00\xF0\x01\x00\x01\x01\x01\x82\x5F\x31\x03\x02\x01\x02\x01\xFF\x0A\x00\x94\x21\x0A\xFA\x06\x52\x9F\x44" replace } err + assert_match "*Bad data format*" $err + verify_log_message 0 "*integrity check failed*" 0 + } +} + +test {corrupt payload: fuzzer findings - stream double free listpack when insert dup node to rax returns 0} { + start_server [list overrides [list loglevel verbose use-exit-on-panic yes crash-memcheck-enabled no] ] { + r debug set-skip-checksum-validation 1 + r config set sanitize-dump-payload yes + catch { r restore _stream 0 "\x0F\x03\x10\x00\x00\x01\x7B\x60\x5A\x23\x79\x00\x00\x00\x00\x00\x00\x00\x00\xC3\x40\x4F\x40\x5C\x18\x5C\x00\x00\x00\x24\x00\x05\x01\x00\x01\x02\x01\x84\x69\x74\x65\x6D\x05\x85\x76\x61\x6C\x75\x65\x06\x40\x10\x00\x00\x20\x01\x00\x01\x20\x03\x00\x05\x20\x1C\x40\x09\x05\x01\x01\x82\x5F\x31\x03\x80\x0D\x00\x02\x20\x0D\x00\x02\xA0\x19\x00\x03\x20\x0B\x02\x82\x5F\x33\xA0\x19\x00\x04\x20\x0D\x00\x04\x20\x19\x00\xFF\x10\x00\x00\x01\x7B\x60\x5A\x23\x79\x00\x00\x00\x00\x00\x00\x00\x05\xC3\x40\x51\x40\x5E\x18\x5E\x00\x00\x00\x24\x00\x05\x01\x00\x01\x02\x01\x84\x69\x74\x65\x6D\x05\x85\x76\x61\x6C\x75\x65\x06\x40\x10\x00\x00\x20\x01\x06\x01\x01\x82\x5F\x35\x03\x05\x20\x1E\x40\x0B\x03\x01\x01\x06\x01\x80\x0B\x00\x02\x20\x0B\x02\x82\x5F\x37\xA0\x19\x00\x03\x20\x0D\x00\x08\xA0\x19\x00\x04\x20\x0B\x02\x82\x5F\x39\x20\x19\x00\xFF\x10\x00\x00\x01\x7B\x60\x5A\x23\x79\x00\x00\x00\x00\x00\x00\x00\x00\xC3\x3B\x40\x49\x18\x49\x00\x00\x00\x15\x00\x02\x01\x00\x01\x02\x01\x84\x69\x74\x65\x6D\x05\x85\x76\x61\x6C\x75\x65\x06\x40\x10\x00\x00\x20\x01\x40\x00\x00\x05\x20\x07\x40\x09\xC0\x22\x09\x01\x01\x86\x75\x6E\x69\x71\x75\x65\x07\xA0\x2C\x02\x08\x01\xFF\x0C\x81\x00\x00\x01\x7B\x60\x5A\x23\x7A\x01\x00\x0A\x00\x9C\x8F\x1E\xBF\x2E\x05\x59\x09" replace } err + assert_match "*Bad data format*" $err + r ping + } +} + +test {corrupt payload: fuzzer findings - LCS OOM} { + start_server [list overrides [list loglevel verbose use-exit-on-panic yes crash-memcheck-enabled no] ] { + r SETRANGE _int 423324 1450173551 + catch {r LCS _int _int} err + assert_match "*Insufficient memory*" $err + r ping + } +} + +test {corrupt payload: fuzzer findings - gcc asan reports false leak on assert} { + start_server [list overrides [list loglevel verbose use-exit-on-panic yes crash-memcheck-enabled no] ] { + r debug set-skip-checksum-validation 1 + r config set sanitize-dump-payload no + catch { r restore _list 0 "\x12\x01\x02\x13\x13\x00\x00\x00\x10\x00\x00\x00\x03\x00\x00\xF3\xFE\x02\x5F\x31\x04\xF1\xFF\x0A\x00\x19\x8D\x3D\x74\x85\x94\x29\xBD" } + catch { r LPOP _list } err + assert_equal [count_log_message 0 "crashed by signal"] 0 + assert_equal [count_log_message 0 "ASSERTION FAILED"] 1 + } +} + +test {corrupt payload: fuzzer findings - lpFind invalid access} { + start_server [list overrides [list loglevel verbose use-exit-on-panic yes crash-memcheck-enabled no] ] { + r debug set-skip-checksum-validation 1 + r config set sanitize-dump-payload no + r restore _hashbig 0 "\x10\x39\x39\x00\x00\x00\x14\x00\x06\x01\x06\x01\x03\x01\x82\x5F\x33\x03\x07\x01\x82\x5F\x37\x03\x00\x01\x00\x01\x04\x01\x04\x01\x09\x01\x82\x5F\x39\x03\x05\x01\x82\x5F\x35\x03\x08\x01\x08\x01\x01\x01\x82\x5F\x31\x03\x02\x01\xF0\x01\xFF\x0A\x00\x29\xD7\xE4\x52\x79\x7A\x95\x82" + catch { r HLEN _hashbig } + catch { r HSETNX _hashbig 513072881620 "\x9A\x4B\x1F\xF2\x99\x74\x6E\x96\x84\x7F\xB9\x85\xBE\xD6\x1A\x93\x0A\xED\xAE\x19\xA0\x5A\x67\xD6\x89\xA8\xF9\xF2\xB8\xBD\x3E\x5A\xCF\xD2\x5B\x17\xA4\xBB\xB2\xA9\x56\x67\x6E\x0B\xED\xCD\x36\x49\xC6\x84\xFF\xC2\x76\x9B\xF3\x49\x88\x97\x92\xD2\x54\xE9\x08\x19\x86\x40\x96\x24\x68\x25\x9D\xF7\x0E\xB7\x36\x85\x68\x6B\x2A\x97\x64\x30\xE6\xFF\x9A\x2A\x42\x2B\x31\x01\x32\xB3\xEE\x78\x1A\x26\x94\xE2\x07\x34\x50\x8A\xFF\xF9\xAE\xEA\xEC\x59\x42\xF5\x39\x40\x65\xDE\x55\xCC\x77\x1B\x32\x02\x19\xEE\x3C\xD4\x79\x48\x01\x4F\x51\xFE\x22\xE0\x0C\xF4\x07\x06\xCD\x55\x30\xC0\x24\x32\xD4\xCC\xAF\x82\x05\x48\x14\x10\x55\xA1\x3D\xF6\x81\x45\x54\xEA\x71\x24\x27\x06\xDC\xFA\xE4\xE4\x87\xCC\x81\xA0\x47\xA5\xAF\xD1\x89\xE7\x42\xC3\x24\xD0\x32\x7A\xDE\x44\x47\x6E\x1F\xCB\xEE\xA6\x46\xDE\x0D\xE6\xD5\x16\x03\x2A\xD6\x9E\xFD\x94\x02\x2C\xDB\x1F\xD0\xBE\x98\x10\xE3\xEB\xEA\xBE\xE5\xD1" } + } +} + +test {corrupt payload: fuzzer findings - invalid access in ziplist tail prevlen decoding} { + start_server [list overrides [list loglevel verbose use-exit-on-panic yes crash-memcheck-enabled no] ] { + r debug set-skip-checksum-validation 1 + r config set sanitize-dump-payload no + catch {r restore _listbig 0 "\x0e\x02\x1B\x1B\x00\x00\x00\x16\x00\x00\x00\x05\x00\x00\x02\x5F\x39\x04\xF9\x02\x02\x5F\x37\x04\xF7\x02\x02\x5F\x35\xFF\x19\x19\x00\x00\x00\x16\x00\x00\x00\x05\x00\x00\xF5\x02\x02\x5F\x33\x04\xF3\x02\x02\x5F\x31\xFE\xF1\xFF\x0A\x00\x6B\x43\x32\x2F\xBB\x29\x0a\xBE"} err + assert_match "*Bad data format*" $err + r ping + } +} + +test {corrupt payload: fuzzer findings - zset zslInsert with a NAN score} { + start_server [list overrides [list loglevel verbose use-exit-on-panic yes crash-memcheck-enabled no] ] { + r config set sanitize-dump-payload no + r debug set-skip-checksum-validation 1 + catch {r restore _nan_zset 0 "\x05\x0A\x02\x5F\x39\x00\x00\x00\x00\x00\x00\x22\x40\xC0\x08\x00\x00\x00\x00\x00\x00\x20\x40\x02\x5F\x37\x00\x00\x00\x00\x00\x00\x1C\x40\xC0\x06\x00\x00\x00\x00\x00\x00\x18\x40\x02\x5F\x35\x00\x00\x00\x00\x00\x00\x14\x40\xC0\x04\x00\x00\x00\x00\x00\x00\x10\x40\x02\x5F\x33\x00\x00\x00\x00\x00\x00\x08\x40\xC0\x02\x00\x00\x00\x00\x00\x00\x00\x40\x02\x5F\x31\x00\x00\x00\x00\x00\x55\xF0\x7F\xC0\x00\x00\x00\x00\x00\x00\x00\x00\x00\x0A\x00\xEC\x94\x86\xD8\xFD\x5C\x5F\xD8"} err + assert_match "*Bad data format*" $err + r ping + } +} + +test {corrupt payload: fuzzer findings - streamLastValidID panic} { + start_server [list overrides [list loglevel verbose use-exit-on-panic yes crash-memcheck-enabled no] ] { + r config set sanitize-dump-payload yes + r debug set-skip-checksum-validation 1 + catch {r restore _streambig 0 "\x13\xC0\x10\x00\x00\x01\x80\x20\x48\xA0\x33\x00\x00\x00\x00\x00\x00\x00\x00\xC3\x40\x4F\x40\x5C\x18\x5C\x00\x00\x00\x24\x00\x05\x01\x00\x01\x02\x01\x84\x69\x74\x65\x6D\x05\x85\x76\x61\x6C\x75\x65\x06\x40\x10\x00\x00\x20\x01\x00\x01\x20\x03\x00\x05\x20\x1C\x40\x09\x05\x01\x01\x82\x5F\x31\x03\x80\x0D\x00\x02\x20\x0D\x00\x02\xA0\x19\x00\x03\x20\x0B\x02\x82\x5F\x33\x60\x19\x40\x2F\x02\x01\x01\x04\x20\x19\x00\xFF\x10\x00\x00\x01\x80\x20\x48\xA0\x34\x00\x00\x00\x00\x00\x00\x00\x01\xC3\x40\x51\x40\x5E\x18\x5E\x00\x00\x00\x24\x00\x05\x01\x00\x01\x02\x01\x84\x69\x74\x65\x6D\x05\x85\x76\x61\x6C\x75\x65\x06\x40\x10\x00\x00\x20\x01\x06\x01\x01\x82\x5F\x35\x03\x05\x20\x1E\x40\x0B\x03\x01\x01\x06\x01\x80\x0B\x00\x02\x20\x0B\x02\x82\x5F\x37\xA0\x19\x00\x03\x20\x0D\x00\x08\xA0\x19\x00\x04\x20\x0B\x02\x82\x5F\x39\x20\x19\x00\xFF\x10\x00\x00\x01\x80\x20\x48\xA0\x34\x00\x00\x00\x00\x00\x00\x00\x06\xC3\x3D\x40\x4A\x18\x4A\x00\x00\x00\x15\x00\x02\x01\x00\x01\x02\x01\x84\x69\x74\x65\x6D\x05\x85\x76\x61\x6C\x75\x65\x06\x40\x10\x00\x00\x20\x01\x40\x00\x00\x05\x60\x07\x02\xDF\xFA\x02\xC0\x23\x09\x01\x01\x86\x75\x6E\x69\x71\x75\x65\x07\xA0\x2D\x02\x08\x01\xFF\x0C\x81\x00\x00\x01\x80\x20\x48\xA0\x35\x00\x81\x00\x00\x01\x80\x20\x48\xA0\x33\x00\x00\x00\x0C\x00\x0A\x00\x34\x8B\x0E\x5B\x42\xCD\xD6\x08"} err + assert_match "*Bad data format*" $err + r ping + } +} + +test {corrupt payload: fuzzer findings - valgrind fishy value warning} { + start_server [list overrides [list loglevel verbose use-exit-on-panic yes crash-memcheck-enabled no] ] { + r config set sanitize-dump-payload yes + r debug set-skip-checksum-validation 1 + catch {r restore _key 0 "\x13\x01\x10\x00\x00\x01\x81\xCC\x07\xDC\xF2\x00\x00\x00\x00\x00\x00\x00\x00\x40\x42\x42\x00\x00\x00\x18\x00\x02\x01\x01\x01\x02\x01\x84\x69\x74\x65\x6D\x05\x85\x76\x61\x6C\x75\x65\x06\x00\x01\x02\x01\x00\x01\x00\x01\x01\x01\x00\x01\x05\x01\x03\x01\x2C\x01\x00\x01\x01\x01\x82\x5F\x31\x03\x05\x01\x02\x01\x3C\x01\x00\x01\x01\x01\x02\x01\x05\x01\xFF\x02\xD0\x00\x00\x01\x81\xCC\x07\xDD\x2E\x00\x81\x00\x00\x01\x81\xCC\x07\xDC\xF2\x00\x81\x00\x00\x01\x81\xCC\x07\xDD\x1E\x00\x03\x01\x07\x6D\x79\x67\x72\x6F\x75\x70\x81\x00\x00\x01\x81\xCC\x07\xDD\x1E\x00\x02\x01\x00\x00\x01\x81\xCC\x07\xDD\x1E\x00\x00\x00\x00\x00\x00\x00\x00\x71\xDD\x07\xCC\x81\x01\x00\x00\x01\x01\x05\x41\x6C\x69\x63\x65\x58\xDD\x07\xCC\x81\x01\x00\x00\x01\x00\x00\x01\x81\xCC\x07\xDD\x1E\x00\x00\x00\x00\x00\x00\x00\x00\x0A\x00\x2F\xB0\xD1\x15\x0A\x97\x87\x6B"} err + assert_match "*Bad data format*" $err + r ping + } +} + + +} ;# tags + diff --git a/tests/integration/dismiss-mem.tcl b/tests/integration/dismiss-mem.tcl new file mode 100644 index 0000000..87f6e1d --- /dev/null +++ b/tests/integration/dismiss-mem.tcl @@ -0,0 +1,101 @@ +# The tests of this file aim to get coverage on all the "dismiss" methods +# that dismiss all data-types memory in the fork child. like client query +# buffer, client output buffer and replication backlog. +# Actually, we may not have many asserts in the test, since we just check for +# crashes and the dump file inconsistencies. + +start_server {tags {"dismiss external:skip"}} { + # In other tests, although we test child process dumping RDB file, but + # memory allocations of key/values are usually small, they couldn't cover + # the "dismiss" object methods, in this test, we create big size key/values + # to satisfy the conditions for release memory pages, especially, we assume + # the page size of OS is 4KB in some cases. + test {dismiss all data types memory} { + set bigstr [string repeat A 8192] + set 64bytes [string repeat A 64] + + # string + populate 100 bigstring 8192 + + # list + r lpush biglist1 $bigstr ; # uncompressed ziplist node + r config set list-compress-depth 1 ; # compressed ziplist nodes + for {set i 0} {$i < 16} {incr i} { + r lpush biglist2 $bigstr + } + + # set + r sadd bigset1 $bigstr ; # hash encoding + set biginteger [string repeat 1 19] + for {set i 0} {$i < 512} {incr i} { + r sadd bigset2 $biginteger ; # intset encoding + } + + # zset + r zadd bigzset1 1.0 $bigstr ; # skiplist encoding + for {set i 0} {$i < 128} {incr i} { + r zadd bigzset2 1.0 $64bytes ; # ziplist encoding + } + + # hash + r hset bighash1 field1 $bigstr ; # hash encoding + for {set i 0} {$i < 128} {incr i} { + r hset bighash2 $i $64bytes ; # ziplist encoding + } + + # stream + r xadd bigstream * entry1 $bigstr entry2 $bigstr + + set digest [debug_digest] + r config set aof-use-rdb-preamble no + r bgrewriteaof + waitForBgrewriteaof r + r debug loadaof + set newdigest [debug_digest] + assert {$digest eq $newdigest} + } + + test {dismiss client output buffer} { + # Big output buffer + set item [string repeat "x" 100000] + for {set i 0} {$i < 100} {incr i} { + r lpush mylist $item + } + set rd [redis_deferring_client] + $rd lrange mylist 0 -1 + $rd flush + after 100 + + r bgsave + waitForBgsave r + assert_equal $item [r lpop mylist] + } + + test {dismiss client query buffer} { + # Big pending query buffer + set bigstr [string repeat A 8192] + set rd [redis_deferring_client] + $rd write "*2\r\n\$8192\r\n" + $rd write $bigstr\r\n + $rd flush + after 100 + + r bgsave + waitForBgsave r + } + + test {dismiss replication backlog} { + set master [srv 0 client] + start_server {} { + r slaveof [srv -1 host] [srv -1 port] + wait_for_sync r + + set bigstr [string repeat A 8192] + for {set i 0} {$i < 20} {incr i} { + $master set $i $bigstr + } + $master bgsave + waitForBgsave $master + } + } +} diff --git a/tests/integration/failover.tcl b/tests/integration/failover.tcl new file mode 100644 index 0000000..2cd9448 --- /dev/null +++ b/tests/integration/failover.tcl @@ -0,0 +1,294 @@ +start_server {tags {"failover external:skip"}} { +start_server {} { +start_server {} { + set node_0 [srv 0 client] + set node_0_host [srv 0 host] + set node_0_port [srv 0 port] + set node_0_pid [srv 0 pid] + + set node_1 [srv -1 client] + set node_1_host [srv -1 host] + set node_1_port [srv -1 port] + set node_1_pid [srv -1 pid] + + set node_2 [srv -2 client] + set node_2_host [srv -2 host] + set node_2_port [srv -2 port] + set node_2_pid [srv -2 pid] + + proc assert_digests_match {n1 n2 n3} { + assert_equal [$n1 debug digest] [$n2 debug digest] + assert_equal [$n2 debug digest] [$n3 debug digest] + } + + test {failover command fails without connected replica} { + catch { $node_0 failover to $node_1_host $node_1_port } err + if {! [string match "ERR*" $err]} { + fail "failover command succeeded when replica not connected" + } + } + + test {setup replication for following tests} { + $node_1 replicaof $node_0_host $node_0_port + $node_2 replicaof $node_0_host $node_0_port + wait_for_sync $node_1 + wait_for_sync $node_2 + } + + test {failover command fails with invalid host} { + catch { $node_0 failover to invalidhost $node_1_port } err + assert_match "ERR*" $err + } + + test {failover command fails with invalid port} { + catch { $node_0 failover to $node_1_host invalidport } err + assert_match "ERR*" $err + } + + test {failover command fails with just force and timeout} { + catch { $node_0 FAILOVER FORCE TIMEOUT 100} err + assert_match "ERR*" $err + } + + test {failover command fails when sent to a replica} { + catch { $node_1 failover to $node_1_host $node_1_port } err + assert_match "ERR*" $err + } + + test {failover command fails with force without timeout} { + catch { $node_0 failover to $node_1_host $node_1_port FORCE } err + assert_match "ERR*" $err + } + + test {failover command to specific replica works} { + set initial_psyncs [s -1 sync_partial_ok] + set initial_syncs [s -1 sync_full] + + # Generate a delta between primary and replica + set load_handler [start_write_load $node_0_host $node_0_port 5] + exec kill -SIGSTOP [srv -1 pid] + wait_for_condition 50 100 { + [s 0 total_commands_processed] > 100 + } else { + fail "Node 0 did not accept writes" + } + exec kill -SIGCONT [srv -1 pid] + + # Execute the failover + $node_0 failover to $node_1_host $node_1_port + + # Wait for failover to end + wait_for_condition 50 100 { + [s 0 master_failover_state] == "no-failover" + } else { + fail "Failover from node 0 to node 1 did not finish" + } + + # stop the write load and make sure no more commands processed + stop_write_load $load_handler + wait_load_handlers_disconnected + + $node_2 replicaof $node_1_host $node_1_port + wait_for_sync $node_0 + wait_for_sync $node_2 + + assert_match *slave* [$node_0 role] + assert_match *master* [$node_1 role] + assert_match *slave* [$node_2 role] + + # We should accept psyncs from both nodes + assert_equal [expr [s -1 sync_partial_ok] - $initial_psyncs] 2 + assert_equal [expr [s -1 sync_full] - $initial_psyncs] 0 + assert_digests_match $node_0 $node_1 $node_2 + } + + test {failover command to any replica works} { + set initial_psyncs [s -2 sync_partial_ok] + set initial_syncs [s -2 sync_full] + + wait_for_ofs_sync $node_1 $node_2 + # We stop node 0 to and make sure node 2 is selected + exec kill -SIGSTOP $node_0_pid + $node_1 set CASE 1 + $node_1 FAILOVER + + # Wait for failover to end + wait_for_condition 50 100 { + [s -1 master_failover_state] == "no-failover" + } else { + fail "Failover from node 1 to node 2 did not finish" + } + exec kill -SIGCONT $node_0_pid + $node_0 replicaof $node_2_host $node_2_port + + wait_for_sync $node_0 + wait_for_sync $node_1 + + assert_match *slave* [$node_0 role] + assert_match *slave* [$node_1 role] + assert_match *master* [$node_2 role] + + # We should accept Psyncs from both nodes + assert_equal [expr [s -2 sync_partial_ok] - $initial_psyncs] 2 + assert_equal [expr [s -1 sync_full] - $initial_psyncs] 0 + assert_digests_match $node_0 $node_1 $node_2 + } + + test {failover to a replica with force works} { + set initial_psyncs [s 0 sync_partial_ok] + set initial_syncs [s 0 sync_full] + + exec kill -SIGSTOP $node_0_pid + # node 0 will never acknowledge this write + $node_2 set case 2 + $node_2 failover to $node_0_host $node_0_port TIMEOUT 100 FORCE + + # Wait for node 0 to give up on sync attempt and start failover + wait_for_condition 50 100 { + [s -2 master_failover_state] == "failover-in-progress" + } else { + fail "Failover from node 2 to node 0 did not timeout" + } + + # Quick check that everyone is a replica, we never want a + # state where there are two masters. + assert_match *slave* [$node_1 role] + assert_match *slave* [$node_2 role] + + exec kill -SIGCONT $node_0_pid + + # Wait for failover to end + wait_for_condition 50 100 { + [s -2 master_failover_state] == "no-failover" + } else { + fail "Failover from node 2 to node 0 did not finish" + } + $node_1 replicaof $node_0_host $node_0_port + + wait_for_sync $node_1 + wait_for_sync $node_2 + + assert_match *master* [$node_0 role] + assert_match *slave* [$node_1 role] + assert_match *slave* [$node_2 role] + + assert_equal [count_log_message -2 "time out exceeded, failing over."] 1 + + # We should accept both psyncs, although this is the condition we might not + # since we didn't catch up. + assert_equal [expr [s 0 sync_partial_ok] - $initial_psyncs] 2 + assert_equal [expr [s 0 sync_full] - $initial_syncs] 0 + assert_digests_match $node_0 $node_1 $node_2 + } + + test {failover with timeout aborts if replica never catches up} { + set initial_psyncs [s 0 sync_partial_ok] + set initial_syncs [s 0 sync_full] + + # Stop replica so it never catches up + exec kill -SIGSTOP [srv -1 pid] + $node_0 SET CASE 1 + + $node_0 failover to [srv -1 host] [srv -1 port] TIMEOUT 500 + # Wait for failover to end + wait_for_condition 50 20 { + [s 0 master_failover_state] == "no-failover" + } else { + fail "Failover from node_0 to replica did not finish" + } + + exec kill -SIGCONT [srv -1 pid] + + # We need to make sure the nodes actually sync back up + wait_for_ofs_sync $node_0 $node_1 + wait_for_ofs_sync $node_0 $node_2 + + assert_match *master* [$node_0 role] + assert_match *slave* [$node_1 role] + assert_match *slave* [$node_2 role] + + # Since we never caught up, there should be no syncs + assert_equal [expr [s 0 sync_partial_ok] - $initial_psyncs] 0 + assert_equal [expr [s 0 sync_full] - $initial_syncs] 0 + assert_digests_match $node_0 $node_1 $node_2 + } + + test {failovers can be aborted} { + set initial_psyncs [s 0 sync_partial_ok] + set initial_syncs [s 0 sync_full] + + # Stop replica so it never catches up + exec kill -SIGSTOP [srv -1 pid] + $node_0 SET CASE 2 + + $node_0 failover to [srv -1 host] [srv -1 port] TIMEOUT 60000 + assert_match [s 0 master_failover_state] "waiting-for-sync" + + # Sanity check that read commands are still accepted + $node_0 GET CASE + + $node_0 failover abort + assert_match [s 0 master_failover_state] "no-failover" + + exec kill -SIGCONT [srv -1 pid] + + # Just make sure everything is still synced + wait_for_ofs_sync $node_0 $node_1 + wait_for_ofs_sync $node_0 $node_2 + + assert_match *master* [$node_0 role] + assert_match *slave* [$node_1 role] + assert_match *slave* [$node_2 role] + + # Since we never caught up, there should be no syncs + assert_equal [expr [s 0 sync_partial_ok] - $initial_psyncs] 0 + assert_equal [expr [s 0 sync_full] - $initial_syncs] 0 + assert_digests_match $node_0 $node_1 $node_2 + } + + test {failover aborts if target rejects sync request} { + set initial_psyncs [s 0 sync_partial_ok] + set initial_syncs [s 0 sync_full] + + # We block psync, so the failover will fail + $node_1 acl setuser default -psync + + # We pause the target long enough to send a write command + # during the pause. This write will not be interrupted. + exec kill -SIGSTOP [srv -1 pid] + set rd [redis_deferring_client] + $rd SET FOO BAR + $node_0 failover to $node_1_host $node_1_port + exec kill -SIGCONT [srv -1 pid] + + # Wait for failover to end + wait_for_condition 50 100 { + [s 0 master_failover_state] == "no-failover" + } else { + fail "Failover from node_0 to replica did not finish" + } + + assert_equal [$rd read] "OK" + $rd close + + # restore access to psync + $node_1 acl setuser default +psync + + # We need to make sure the nodes actually sync back up + wait_for_sync $node_1 + wait_for_sync $node_2 + + assert_match *master* [$node_0 role] + assert_match *slave* [$node_1 role] + assert_match *slave* [$node_2 role] + + # We will cycle all of our replicas here and force a psync. + assert_equal [expr [s 0 sync_partial_ok] - $initial_psyncs] 2 + assert_equal [expr [s 0 sync_full] - $initial_syncs] 0 + + assert_equal [count_log_message 0 "Failover target rejected psync request"] 1 + assert_digests_match $node_0 $node_1 $node_2 + } +} +} +} diff --git a/tests/integration/logging.tcl b/tests/integration/logging.tcl new file mode 100644 index 0000000..4f8639b --- /dev/null +++ b/tests/integration/logging.tcl @@ -0,0 +1,61 @@ +tags {"external:skip"} { + +set system_name [string tolower [exec uname -s]] +set backtrace_supported 0 + +# We only support darwin or Linux with glibc +if {$system_name eq {darwin}} { + set backtrace_supported 1 +} elseif {$system_name eq {linux}} { + # Avoid the test on libmusl, which does not support backtrace + # and on static binaries (ldd exit code 1) where we can't detect libmusl + catch { + set ldd [exec ldd src/redis-server] + if {![string match {*libc.*musl*} $ldd]} { + set backtrace_supported 1 + } + } +} + +if {$backtrace_supported} { + set server_path [tmpdir server.log] + start_server [list overrides [list dir $server_path]] { + test "Server is able to generate a stack trace on selected systems" { + r config set watchdog-period 200 + r debug sleep 1 + set pattern "*debugCommand*" + set res [wait_for_log_messages 0 \"$pattern\" 0 100 100] + if {$::verbose} { puts $res } + } + } +} + +# Valgrind will complain that the process terminated by a signal, skip it. +if {!$::valgrind} { + if {$backtrace_supported} { + set crash_pattern "*STACK TRACE*" + } else { + set crash_pattern "*crashed by signal*" + } + + set server_path [tmpdir server1.log] + start_server [list overrides [list dir $server_path crash-memcheck-enabled no]] { + test "Crash report generated on SIGABRT" { + set pid [s process_id] + exec kill -SIGABRT $pid + set res [wait_for_log_messages 0 \"$crash_pattern\" 0 50 100] + if {$::verbose} { puts $res } + } + } + + set server_path [tmpdir server2.log] + start_server [list overrides [list dir $server_path crash-memcheck-enabled no]] { + test "Crash report generated on DEBUG SEGFAULT" { + catch {r debug segfault} + set res [wait_for_log_messages 0 \"$crash_pattern\" 0 50 100] + if {$::verbose} { puts $res } + } + } +} + +} diff --git a/tests/integration/psync2-master-restart.tcl b/tests/integration/psync2-master-restart.tcl new file mode 100644 index 0000000..a9e21d1 --- /dev/null +++ b/tests/integration/psync2-master-restart.tcl @@ -0,0 +1,218 @@ +start_server {tags {"psync2 external:skip"}} { +start_server {} { +start_server {} { + set master [srv 0 client] + set master_host [srv 0 host] + set master_port [srv 0 port] + + set replica [srv -1 client] + set replica_host [srv -1 host] + set replica_port [srv -1 port] + + set sub_replica [srv -2 client] + + # Make sure the server saves an RDB on shutdown + $master config set save "3600 1" + + # Because we will test partial resync later, we don’t want a timeout to cause + # the master-replica disconnect, then the extra reconnections will break the + # sync_partial_ok stat test + $master config set repl-timeout 3600 + $replica config set repl-timeout 3600 + $sub_replica config set repl-timeout 3600 + + # Avoid PINGs + $master config set repl-ping-replica-period 3600 + $master config rewrite + + # Build replication chain + $replica replicaof $master_host $master_port + $sub_replica replicaof $replica_host $replica_port + + wait_for_condition 50 100 { + [status $replica master_link_status] eq {up} && + [status $sub_replica master_link_status] eq {up} + } else { + fail "Replication not started." + } + + test "PSYNC2: Partial resync after Master restart using RDB aux fields when offset is 0" { + assert {[status $master master_repl_offset] == 0} + + set replid [status $master master_replid] + $replica config resetstat + + catch { + restart_server 0 true false true now + set master [srv 0 client] + } + wait_for_condition 50 1000 { + [status $replica master_link_status] eq {up} && + [status $sub_replica master_link_status] eq {up} + } else { + fail "Replicas didn't sync after master restart" + } + + # Make sure master restore replication info correctly + assert {[status $master master_replid] != $replid} + assert {[status $master master_repl_offset] == 0} + assert {[status $master master_replid2] eq $replid} + assert {[status $master second_repl_offset] == 1} + + # Make sure master set replication backlog correctly + assert {[status $master repl_backlog_active] == 1} + assert {[status $master repl_backlog_first_byte_offset] == 1} + assert {[status $master repl_backlog_histlen] == 0} + + # Partial resync after Master restart + assert {[status $master sync_partial_ok] == 1} + assert {[status $replica sync_partial_ok] == 1} + } + + # Generate some data + createComplexDataset $master 1000 + + test "PSYNC2: Partial resync after Master restart using RDB aux fields with data" { + wait_for_condition 500 100 { + [status $master master_repl_offset] == [status $replica master_repl_offset] && + [status $master master_repl_offset] == [status $sub_replica master_repl_offset] + } else { + fail "Replicas and master offsets were unable to match *exactly*." + } + + set replid [status $master master_replid] + set offset [status $master master_repl_offset] + $replica config resetstat + + catch { + # SHUTDOWN NOW ensures master doesn't send GETACK to replicas before + # shutting down which would affect the replication offset. + restart_server 0 true false true now + set master [srv 0 client] + } + wait_for_condition 50 1000 { + [status $replica master_link_status] eq {up} && + [status $sub_replica master_link_status] eq {up} + } else { + fail "Replicas didn't sync after master restart" + } + + # Make sure master restore replication info correctly + assert {[status $master master_replid] != $replid} + assert {[status $master master_repl_offset] == $offset} + assert {[status $master master_replid2] eq $replid} + assert {[status $master second_repl_offset] == [expr $offset+1]} + + # Make sure master set replication backlog correctly + assert {[status $master repl_backlog_active] == 1} + assert {[status $master repl_backlog_first_byte_offset] == [expr $offset+1]} + assert {[status $master repl_backlog_histlen] == 0} + + # Partial resync after Master restart + assert {[status $master sync_partial_ok] == 1} + assert {[status $replica sync_partial_ok] == 1} + } + + test "PSYNC2: Partial resync after Master restart using RDB aux fields with expire" { + $master debug set-active-expire 0 + for {set j 0} {$j < 1024} {incr j} { + $master select [expr $j%16] + $master set $j somevalue px 10 + } + + after 20 + + # Wait until master has received ACK from replica. If the master thinks + # that any replica is lagging when it shuts down, master would send + # GETACK to the replicas, affecting the replication offset. + set offset [status $master master_repl_offset] + wait_for_condition 500 100 { + [string match "*slave0:*,offset=$offset,*" [$master info replication]] && + $offset == [status $replica master_repl_offset] && + $offset == [status $sub_replica master_repl_offset] + } else { + show_cluster_status + fail "Replicas and master offsets were unable to match *exactly*." + } + + set offset [status $master master_repl_offset] + $replica config resetstat + + catch { + # Unlike the test above, here we use SIGTERM, which behaves + # differently compared to SHUTDOWN NOW if there are lagging + # replicas. This is just to increase coverage and let each test use + # a different shutdown approach. In this case there are no lagging + # replicas though. + restart_server 0 true false + set master [srv 0 client] + } + wait_for_condition 50 1000 { + [status $replica master_link_status] eq {up} && + [status $sub_replica master_link_status] eq {up} + } else { + fail "Replicas didn't sync after master restart" + } + + set expired_offset [status $master repl_backlog_histlen] + # Stale keys expired and master_repl_offset grows correctly + assert {[status $master rdb_last_load_keys_expired] == 1024} + assert {[status $master master_repl_offset] == [expr $offset+$expired_offset]} + + # Partial resync after Master restart + assert {[status $master sync_partial_ok] == 1} + assert {[status $replica sync_partial_ok] == 1} + + set digest [$master debug digest] + assert {$digest eq [$replica debug digest]} + assert {$digest eq [$sub_replica debug digest]} + } + + test "PSYNC2: Full resync after Master restart when too many key expired" { + $master config set repl-backlog-size 16384 + $master config rewrite + + $master debug set-active-expire 0 + # Make sure replication backlog is full and will be trimmed. + for {set j 0} {$j < 2048} {incr j} { + $master select [expr $j%16] + $master set $j somevalue px 10 + } + + after 20 + + wait_for_condition 500 100 { + [status $master master_repl_offset] == [status $replica master_repl_offset] && + [status $master master_repl_offset] == [status $sub_replica master_repl_offset] + } else { + fail "Replicas and master offsets were unable to match *exactly*." + } + + $replica config resetstat + + catch { + # Unlike the test above, here we use SIGTERM. This is just to + # increase coverage and let each test use a different shutdown + # approach. + restart_server 0 true false + set master [srv 0 client] + } + wait_for_condition 50 1000 { + [status $replica master_link_status] eq {up} && + [status $sub_replica master_link_status] eq {up} + } else { + fail "Replicas didn't sync after master restart" + } + + # Replication backlog is full + assert {[status $master repl_backlog_first_byte_offset] > [status $master second_repl_offset]} + assert {[status $master sync_partial_ok] == 0} + assert {[status $master sync_full] == 1} + assert {[status $master rdb_last_load_keys_expired] == 2048} + assert {[status $replica sync_full] == 1} + + set digest [$master debug digest] + assert {$digest eq [$replica debug digest]} + assert {$digest eq [$sub_replica debug digest]} + } +}}} diff --git a/tests/integration/psync2-pingoff.tcl b/tests/integration/psync2-pingoff.tcl new file mode 100644 index 0000000..3589d07 --- /dev/null +++ b/tests/integration/psync2-pingoff.tcl @@ -0,0 +1,250 @@ +# These tests were added together with the meaningful offset implementation +# in redis 6.0.0, which was later abandoned in 6.0.4, they used to test that +# servers are able to PSYNC with replicas even if the replication stream has +# PINGs at the end which present in one sever and missing on another. +# We keep these tests just because they reproduce edge cases in the replication +# logic in hope they'll be able to spot some problem in the future. + +start_server {tags {"psync2 external:skip"}} { +start_server {} { + # Config + set debug_msg 0 ; # Enable additional debug messages + + for {set j 0} {$j < 2} {incr j} { + set R($j) [srv [expr 0-$j] client] + set R_host($j) [srv [expr 0-$j] host] + set R_port($j) [srv [expr 0-$j] port] + $R($j) CONFIG SET repl-ping-replica-period 1 + if {$debug_msg} {puts "Log file: [srv [expr 0-$j] stdout]"} + } + + # Setup replication + test "PSYNC2 pingoff: setup" { + $R(1) replicaof $R_host(0) $R_port(0) + $R(0) set foo bar + wait_for_condition 50 1000 { + [status $R(1) master_link_status] == "up" && + [$R(0) dbsize] == 1 && [$R(1) dbsize] == 1 + } else { + fail "Replicas not replicating from master" + } + } + + test "PSYNC2 pingoff: write and wait replication" { + $R(0) INCR counter + $R(0) INCR counter + $R(0) INCR counter + wait_for_condition 50 1000 { + [$R(0) GET counter] eq [$R(1) GET counter] + } else { + fail "Master and replica don't agree about counter" + } + } + + # In this test we'll make sure the replica will get stuck, but with + # an active connection: this way the master will continue to send PINGs + # every second (we modified the PING period earlier) + test "PSYNC2 pingoff: pause replica and promote it" { + $R(1) MULTI + $R(1) DEBUG SLEEP 5 + $R(1) SLAVEOF NO ONE + $R(1) EXEC + $R(1) ping ; # Wait for it to return back available + } + + test "Make the old master a replica of the new one and check conditions" { + # We set the new master's ping period to a high value, so that there's + # no chance for a race condition of sending a PING in between the two + # INFO calls in the assert for master_repl_offset match below. + $R(1) CONFIG SET repl-ping-replica-period 1000 + + assert_equal [status $R(1) sync_full] 0 + $R(0) REPLICAOF $R_host(1) $R_port(1) + + wait_for_condition 50 1000 { + [status $R(0) master_link_status] == "up" + } else { + fail "The new master was not able to sync" + } + + # make sure replication is still alive and kicking + $R(1) incr x + wait_for_condition 50 1000 { + [status $R(0) loading] == 0 && + [$R(0) get x] == 1 + } else { + fail "replica didn't get incr" + } + assert_equal [status $R(0) master_repl_offset] [status $R(1) master_repl_offset] + } +}} + + +start_server {tags {"psync2 external:skip"}} { +start_server {} { +start_server {} { +start_server {} { +start_server {} { + test {test various edge cases of repl topology changes with missing pings at the end} { + set master [srv -4 client] + set master_host [srv -4 host] + set master_port [srv -4 port] + set replica1 [srv -3 client] + set replica2 [srv -2 client] + set replica3 [srv -1 client] + set replica4 [srv -0 client] + + $replica1 replicaof $master_host $master_port + $replica2 replicaof $master_host $master_port + $replica3 replicaof $master_host $master_port + $replica4 replicaof $master_host $master_port + wait_for_condition 50 1000 { + [status $master connected_slaves] == 4 + } else { + fail "replicas didn't connect" + } + + $master incr x + wait_for_condition 50 1000 { + [$replica1 get x] == 1 && [$replica2 get x] == 1 && + [$replica3 get x] == 1 && [$replica4 get x] == 1 + } else { + fail "replicas didn't get incr" + } + + # disconnect replica1 and replica2 + # and wait for the master to send a ping to replica3 and replica4 + $replica1 replicaof no one + $replica2 replicaof 127.0.0.1 1 ;# we can't promote it to master since that will cycle the replication id + $master config set repl-ping-replica-period 1 + set replofs [status $master master_repl_offset] + wait_for_condition 50 100 { + [status $replica3 master_repl_offset] > $replofs && + [status $replica4 master_repl_offset] > $replofs + } else { + fail "replica didn't sync in time" + } + + # make everyone sync from the replica1 that didn't get the last ping from the old master + # replica4 will keep syncing from the old master which now syncs from replica1 + # and replica2 will re-connect to the old master (which went back in time) + set new_master_host [srv -3 host] + set new_master_port [srv -3 port] + $replica3 replicaof $new_master_host $new_master_port + $master replicaof $new_master_host $new_master_port + $replica2 replicaof $master_host $master_port + wait_for_condition 50 1000 { + [status $replica2 master_link_status] == "up" && + [status $replica3 master_link_status] == "up" && + [status $replica4 master_link_status] == "up" && + [status $master master_link_status] == "up" + } else { + fail "replicas didn't connect" + } + + # make sure replication is still alive and kicking + $replica1 incr x + wait_for_condition 50 1000 { + [$replica2 get x] == 2 && + [$replica3 get x] == 2 && + [$replica4 get x] == 2 && + [$master get x] == 2 + } else { + fail "replicas didn't get incr" + } + + # make sure we have the right amount of full syncs + assert_equal [status $master sync_full] 6 + assert_equal [status $replica1 sync_full] 2 + assert_equal [status $replica2 sync_full] 0 + assert_equal [status $replica3 sync_full] 0 + assert_equal [status $replica4 sync_full] 0 + + # force psync + $master client kill type master + $replica2 client kill type master + $replica3 client kill type master + $replica4 client kill type master + + # make sure replication is still alive and kicking + $replica1 incr x + wait_for_condition 50 1000 { + [$replica2 get x] == 3 && + [$replica3 get x] == 3 && + [$replica4 get x] == 3 && + [$master get x] == 3 + } else { + fail "replicas didn't get incr" + } + + # make sure we have the right amount of full syncs + assert_equal [status $master sync_full] 6 + assert_equal [status $replica1 sync_full] 2 + assert_equal [status $replica2 sync_full] 0 + assert_equal [status $replica3 sync_full] 0 + assert_equal [status $replica4 sync_full] 0 +} +}}}}} + +start_server {tags {"psync2 external:skip"}} { +start_server {} { +start_server {} { + + for {set j 0} {$j < 3} {incr j} { + set R($j) [srv [expr 0-$j] client] + set R_host($j) [srv [expr 0-$j] host] + set R_port($j) [srv [expr 0-$j] port] + $R($j) CONFIG SET repl-ping-replica-period 1 + } + + test "Chained replicas disconnect when replica re-connect with the same master" { + # Add a second replica as a chained replica of the current replica + $R(1) replicaof $R_host(0) $R_port(0) + $R(2) replicaof $R_host(1) $R_port(1) + wait_for_condition 50 1000 { + [status $R(2) master_link_status] == "up" + } else { + fail "Chained replica not replicating from its master" + } + + # Do a write on the master, and wait for the master to + # send some PINGs to its replica + $R(0) INCR counter2 + set replofs [status $R(0) master_repl_offset] + wait_for_condition 50 100 { + [status $R(1) master_repl_offset] > $replofs && + [status $R(2) master_repl_offset] > $replofs + } else { + fail "replica didn't sync in time" + } + set sync_partial_master [status $R(0) sync_partial_ok] + set sync_partial_replica [status $R(1) sync_partial_ok] + $R(0) CONFIG SET repl-ping-replica-period 100 + + # Disconnect the master's direct replica + $R(0) client kill type replica + wait_for_condition 50 1000 { + [status $R(1) master_link_status] == "up" && + [status $R(2) master_link_status] == "up" && + [status $R(0) sync_partial_ok] == $sync_partial_master + 1 && + [status $R(1) sync_partial_ok] == $sync_partial_replica + } else { + fail "Disconnected replica failed to PSYNC with master" + } + + # Verify that the replica and its replica's meaningful and real + # offsets match with the master + assert_equal [status $R(0) master_repl_offset] [status $R(1) master_repl_offset] + assert_equal [status $R(0) master_repl_offset] [status $R(2) master_repl_offset] + + # make sure replication is still alive and kicking + $R(0) incr counter2 + wait_for_condition 50 1000 { + [$R(1) get counter2] == 2 && [$R(2) get counter2] == 2 + } else { + fail "replicas didn't get incr" + } + assert_equal [status $R(0) master_repl_offset] [status $R(1) master_repl_offset] + assert_equal [status $R(0) master_repl_offset] [status $R(2) master_repl_offset] + } +}}} diff --git a/tests/integration/psync2-reg.tcl b/tests/integration/psync2-reg.tcl new file mode 100644 index 0000000..b8dd101 --- /dev/null +++ b/tests/integration/psync2-reg.tcl @@ -0,0 +1,82 @@ +# Issue 3899 regression test. +# We create a chain of three instances: master -> slave -> slave2 +# and continuously break the link while traffic is generated by +# redis-benchmark. At the end we check that the data is the same +# everywhere. + +start_server {tags {"psync2 external:skip"}} { +start_server {} { +start_server {} { + # Config + set debug_msg 0 ; # Enable additional debug messages + + set no_exit 0 ; # Do not exit at end of the test + + set duration 20 ; # Total test seconds + + for {set j 0} {$j < 3} {incr j} { + set R($j) [srv [expr 0-$j] client] + set R_host($j) [srv [expr 0-$j] host] + set R_port($j) [srv [expr 0-$j] port] + set R_unixsocket($j) [srv [expr 0-$j] unixsocket] + if {$debug_msg} {puts "Log file: [srv [expr 0-$j] stdout]"} + } + + # Setup the replication and backlog parameters + test "PSYNC2 #3899 regression: setup" { + $R(1) slaveof $R_host(0) $R_port(0) + $R(2) slaveof $R_host(0) $R_port(0) + $R(0) set foo bar + wait_for_condition 50 1000 { + [status $R(1) master_link_status] == "up" && + [status $R(2) master_link_status] == "up" && + [$R(1) dbsize] == 1 && + [$R(2) dbsize] == 1 + } else { + fail "Replicas not replicating from master" + } + $R(0) config set repl-backlog-size 10mb + $R(1) config set repl-backlog-size 10mb + } + + set cycle_start_time [clock milliseconds] + set bench_pid [exec src/redis-benchmark -s $R_unixsocket(0) -n 10000000 -r 1000 incr __rand_int__ > /dev/null &] + while 1 { + set elapsed [expr {[clock milliseconds]-$cycle_start_time}] + if {$elapsed > $duration*1000} break + if {rand() < .05} { + test "PSYNC2 #3899 regression: kill first replica" { + $R(1) client kill type master + } + } + if {rand() < .05} { + test "PSYNC2 #3899 regression: kill chained replica" { + $R(2) client kill type master + } + } + after 100 + } + exec kill -9 $bench_pid + + if {$debug_msg} { + for {set j 0} {$j < 100} {incr j} { + if { + [$R(0) debug digest] == [$R(1) debug digest] && + [$R(1) debug digest] == [$R(2) debug digest] + } break + puts [$R(0) debug digest] + puts [$R(1) debug digest] + puts [$R(2) debug digest] + after 1000 + } + } + + test "PSYNC2 #3899 regression: verify consistency" { + wait_for_condition 50 1000 { + ([$R(0) debug digest] eq [$R(1) debug digest]) && + ([$R(1) debug digest] eq [$R(2) debug digest]) + } else { + fail "The three instances have different data sets" + } + } +}}} diff --git a/tests/integration/psync2.tcl b/tests/integration/psync2.tcl new file mode 100644 index 0000000..a258f1b --- /dev/null +++ b/tests/integration/psync2.tcl @@ -0,0 +1,382 @@ + +proc show_cluster_status {} { + uplevel 1 { + # The following is the regexp we use to match the log line + # time info. Logs are in the following form: + # + # 11296:M 25 May 2020 17:37:14.652 # Server initialized + set log_regexp {^[0-9]+:[A-Z] [0-9]+ [A-z]+ [0-9]+ ([0-9:.]+) .*} + set repl_regexp {(master|repl|sync|backlog|meaningful|offset)} + + puts "Master ID is $master_id" + for {set j 0} {$j < 5} {incr j} { + puts "$j: sync_full: [status $R($j) sync_full]" + puts "$j: id1 : [status $R($j) master_replid]:[status $R($j) master_repl_offset]" + puts "$j: id2 : [status $R($j) master_replid2]:[status $R($j) second_repl_offset]" + puts "$j: backlog : firstbyte=[status $R($j) repl_backlog_first_byte_offset] len=[status $R($j) repl_backlog_histlen]" + puts "$j: x var is : [$R($j) GET x]" + puts "---" + } + + # Show the replication logs of every instance, interleaving + # them by the log date. + # + # First: load the lines as lists for each instance. + array set log {} + for {set j 0} {$j < 5} {incr j} { + set fd [open $R_log($j)] + while {[gets $fd l] >= 0} { + if {[regexp $log_regexp $l] && + [regexp -nocase $repl_regexp $l]} { + lappend log($j) $l + } + } + close $fd + } + + # To interleave the lines, at every step consume the element of + # the list with the lowest time and remove it. Do it until + # all the lists are empty. + # + # regexp {^[0-9]+:[A-Z] [0-9]+ [A-z]+ [0-9]+ ([0-9:.]+) .*} $l - logdate + while 1 { + # Find the log with smallest time. + set empty 0 + set best 0 + set bestdate {} + for {set j 0} {$j < 5} {incr j} { + if {[llength $log($j)] == 0} { + incr empty + continue + } + regexp $log_regexp [lindex $log($j) 0] - date + if {$bestdate eq {}} { + set best $j + set bestdate $date + } else { + if {[string compare $bestdate $date] > 0} { + set best $j + set bestdate $date + } + } + } + if {$empty == 5} break ; # Our exit condition: no more logs + + # Emit the one with the smallest time (that is the first + # event in the time line). + puts "\[$best port $R_port($best)\] [lindex $log($best) 0]" + set log($best) [lrange $log($best) 1 end] + } + } +} + +start_server {tags {"psync2 external:skip"}} { +start_server {} { +start_server {} { +start_server {} { +start_server {} { + set master_id 0 ; # Current master + set start_time [clock seconds] ; # Test start time + set counter_value 0 ; # Current value of the Redis counter "x" + + # Config + set debug_msg 0 ; # Enable additional debug messages + + set no_exit 0 ; # Do not exit at end of the test + + set duration 40 ; # Total test seconds + + set genload 1 ; # Load master with writes at every cycle + + set genload_time 5000 ; # Writes duration time in ms + + set disconnect 1 ; # Break replication link between random + # master and slave instances while the + # master is loaded with writes. + + set disconnect_period 1000 ; # Disconnect repl link every N ms. + + for {set j 0} {$j < 5} {incr j} { + set R($j) [srv [expr 0-$j] client] + set R_host($j) [srv [expr 0-$j] host] + set R_port($j) [srv [expr 0-$j] port] + set R_id_from_port($R_port($j)) $j ;# To get a replica index by port + set R_log($j) [srv [expr 0-$j] stdout] + if {$debug_msg} {puts "Log file: [srv [expr 0-$j] stdout]"} + } + + set cycle 0 + while {([clock seconds]-$start_time) < $duration} { + incr cycle + test "PSYNC2: --- CYCLE $cycle ---" {} + + # Create a random replication layout. + # Start with switching master (this simulates a failover). + + # 1) Select the new master. + set master_id [randomInt 5] + set used [list $master_id] + test "PSYNC2: \[NEW LAYOUT\] Set #$master_id as master" { + $R($master_id) slaveof no one + $R($master_id) config set repl-ping-replica-period 1 ;# increase the chance that random ping will cause issues + if {$counter_value == 0} { + $R($master_id) set x $counter_value + } + } + + # Build a lookup with the root master of each replica (head of the chain). + array set root_master {} + for {set j 0} {$j < 5} {incr j} { + set r $j + while {1} { + set r_master_port [status $R($r) master_port] + if {$r_master_port == ""} { + set root_master($j) $r + break + } + set r_master_id $R_id_from_port($r_master_port) + set r $r_master_id + } + } + + # Wait for the newly detached master-replica chain (new master and existing replicas that were + # already connected to it, to get updated on the new replication id. + # This is needed to avoid a race that can result in a full sync when a replica that already + # got an updated repl id, tries to psync from one that's not yet aware of it. + wait_for_condition 50 1000 { + ([status $R(0) master_replid] == [status $R($root_master(0)) master_replid]) && + ([status $R(1) master_replid] == [status $R($root_master(1)) master_replid]) && + ([status $R(2) master_replid] == [status $R($root_master(2)) master_replid]) && + ([status $R(3) master_replid] == [status $R($root_master(3)) master_replid]) && + ([status $R(4) master_replid] == [status $R($root_master(4)) master_replid]) + } else { + show_cluster_status + fail "Replica did not inherit the new replid." + } + + # Build a lookup with the direct connection master of each replica. + # First loop that uses random to decide who replicates from who. + array set slave_to_master {} + while {[llength $used] != 5} { + while 1 { + set slave_id [randomInt 5] + if {[lsearch -exact $used $slave_id] == -1} break + } + set rand [randomInt [llength $used]] + set mid [lindex $used $rand] + set slave_to_master($slave_id) $mid + lappend used $slave_id + } + + # 2) Attach all the slaves to a random instance + # Second loop that does the actual SLAVEOF command and make sure execute it in the right order. + while {[array size slave_to_master] > 0} { + foreach slave_id [array names slave_to_master] { + set mid $slave_to_master($slave_id) + + # We only attach the replica to a random instance that already in the old/new chain. + if {$root_master($mid) == $root_master($master_id)} { + # Find a replica that can be attached to the new chain already attached to the new master. + # My new master is in the new chain. + } elseif {$root_master($mid) == $root_master($slave_id)} { + # My new master and I are in the old chain. + } else { + # In cycle 1, we do not care about the order. + if {$cycle != 1} { + # skipping this replica for now to avoid attaching in a bad order + # this is done to avoid an unexpected full sync, when we take a + # replica that already reconnected to the new chain and got a new replid + # and is then set to connect to a master that's still not aware of that new replid + continue + } + } + + set master_host $R_host($master_id) + set master_port $R_port($master_id) + + test "PSYNC2: Set #$slave_id to replicate from #$mid" { + $R($slave_id) slaveof $master_host $master_port + } + + # Wait for replica to be connected before we proceed. + wait_for_condition 50 1000 { + [status $R($slave_id) master_link_status] == "up" + } else { + show_cluster_status + fail "Replica not reconnecting." + } + + set root_master($slave_id) $root_master($mid) + unset slave_to_master($slave_id) + break + } + } + + # Wait for replicas to sync. so next loop won't get -LOADING error + wait_for_condition 50 1000 { + [status $R([expr {($master_id+1)%5}]) master_link_status] == "up" && + [status $R([expr {($master_id+2)%5}]) master_link_status] == "up" && + [status $R([expr {($master_id+3)%5}]) master_link_status] == "up" && + [status $R([expr {($master_id+4)%5}]) master_link_status] == "up" + } else { + show_cluster_status + fail "Replica not reconnecting" + } + + # 3) Increment the counter and wait for all the instances + # to converge. + test "PSYNC2: cluster is consistent after failover" { + $R($master_id) incr x; incr counter_value + for {set j 0} {$j < 5} {incr j} { + wait_for_condition 50 1000 { + [$R($j) get x] == $counter_value + } else { + show_cluster_status + fail "Instance #$j x variable is inconsistent" + } + } + } + + # 4) Generate load while breaking the connection of random + # slave-master pairs. + test "PSYNC2: generate load while killing replication links" { + set t [clock milliseconds] + set next_break [expr {$t+$disconnect_period}] + while {[clock milliseconds]-$t < $genload_time} { + if {$genload} { + $R($master_id) incr x; incr counter_value + } + if {[clock milliseconds] == $next_break} { + set next_break \ + [expr {[clock milliseconds]+$disconnect_period}] + set slave_id [randomInt 5] + if {$disconnect} { + $R($slave_id) client kill type master + if {$debug_msg} { + puts "+++ Breaking link for replica #$slave_id" + } + } + } + } + } + + # 5) Increment the counter and wait for all the instances + set x [$R($master_id) get x] + test "PSYNC2: cluster is consistent after load (x = $x)" { + for {set j 0} {$j < 5} {incr j} { + wait_for_condition 50 1000 { + [$R($j) get x] == $counter_value + } else { + show_cluster_status + fail "Instance #$j x variable is inconsistent" + } + } + } + + # wait for all the slaves to be in sync. + set masteroff [status $R($master_id) master_repl_offset] + wait_for_condition 500 100 { + [status $R(0) master_repl_offset] >= $masteroff && + [status $R(1) master_repl_offset] >= $masteroff && + [status $R(2) master_repl_offset] >= $masteroff && + [status $R(3) master_repl_offset] >= $masteroff && + [status $R(4) master_repl_offset] >= $masteroff + } else { + show_cluster_status + fail "Replicas offsets didn't catch up with the master after too long time." + } + + if {$debug_msg} { + show_cluster_status + } + + test "PSYNC2: total sum of full synchronizations is exactly 4" { + set sum 0 + for {set j 0} {$j < 5} {incr j} { + incr sum [status $R($j) sync_full] + } + if {$sum != 4} { + show_cluster_status + assert {$sum == 4} + } + } + + # In absence of pings, are the instances really able to have + # the exact same offset? + $R($master_id) config set repl-ping-replica-period 3600 + for {set j 0} {$j < 5} {incr j} { + if {$j == $master_id} continue + $R($j) config set repl-timeout 10000 + } + wait_for_condition 500 100 { + [status $R($master_id) master_repl_offset] == [status $R(0) master_repl_offset] && + [status $R($master_id) master_repl_offset] == [status $R(1) master_repl_offset] && + [status $R($master_id) master_repl_offset] == [status $R(2) master_repl_offset] && + [status $R($master_id) master_repl_offset] == [status $R(3) master_repl_offset] && + [status $R($master_id) master_repl_offset] == [status $R(4) master_repl_offset] + } else { + show_cluster_status + fail "Replicas and master offsets were unable to match *exactly*." + } + + # Limit anyway the maximum number of cycles. This is useful when the + # test is skipped via --only option of the test suite. In that case + # we don't want to see many seconds of this test being just skipped. + if {$cycle > 50} break + } + + test "PSYNC2: Bring the master back again for next test" { + $R($master_id) slaveof no one + set master_host $R_host($master_id) + set master_port $R_port($master_id) + for {set j 0} {$j < 5} {incr j} { + if {$j == $master_id} continue + $R($j) slaveof $master_host $master_port + } + + # Wait for replicas to sync. it is not enough to just wait for connected_slaves==4 + # since we might do the check before the master realized that they're disconnected + wait_for_condition 50 1000 { + [status $R($master_id) connected_slaves] == 4 && + [status $R([expr {($master_id+1)%5}]) master_link_status] == "up" && + [status $R([expr {($master_id+2)%5}]) master_link_status] == "up" && + [status $R([expr {($master_id+3)%5}]) master_link_status] == "up" && + [status $R([expr {($master_id+4)%5}]) master_link_status] == "up" + } else { + show_cluster_status + fail "Replica not reconnecting" + } + } + + test "PSYNC2: Partial resync after restart using RDB aux fields" { + # Pick a random slave + set slave_id [expr {($master_id+1)%5}] + set sync_count [status $R($master_id) sync_full] + set sync_partial [status $R($master_id) sync_partial_ok] + set sync_partial_err [status $R($master_id) sync_partial_err] + catch { + $R($slave_id) config rewrite + restart_server [expr {0-$slave_id}] true false + set R($slave_id) [srv [expr {0-$slave_id}] client] + } + # note: just waiting for connected_slaves==4 has a race condition since + # we might do the check before the master realized that the slave disconnected + wait_for_condition 50 1000 { + [status $R($master_id) sync_partial_ok] == $sync_partial + 1 + } else { + puts "prev sync_full: $sync_count" + puts "prev sync_partial_ok: $sync_partial" + puts "prev sync_partial_err: $sync_partial_err" + puts [$R($master_id) info stats] + show_cluster_status + fail "Replica didn't partial sync" + } + set new_sync_count [status $R($master_id) sync_full] + assert {$sync_count == $new_sync_count} + } + + if {$no_exit} { + while 1 { puts -nonewline .; flush stdout; after 1000} + } + +}}}}} diff --git a/tests/integration/rdb.tcl b/tests/integration/rdb.tcl new file mode 100644 index 0000000..104d372 --- /dev/null +++ b/tests/integration/rdb.tcl @@ -0,0 +1,414 @@ +tags {"rdb external:skip"} { + +set server_path [tmpdir "server.rdb-encoding-test"] + +# Copy RDB with different encodings in server path +exec cp tests/assets/encodings.rdb $server_path +exec cp tests/assets/list-quicklist.rdb $server_path + +start_server [list overrides [list "dir" $server_path "dbfilename" "list-quicklist.rdb"]] { + test "test old version rdb file" { + r select 0 + assert_equal [r get x] 7 + r lpop list + } {7} +} + +start_server [list overrides [list "dir" $server_path "dbfilename" "encodings.rdb"]] { + test "RDB encoding loading test" { + r select 0 + csvdump r + } {"0","compressible","string","aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" +"0","hash","hash","a","1","aa","10","aaa","100","b","2","bb","20","bbb","200","c","3","cc","30","ccc","300","ddd","400","eee","5000000000", +"0","hash_zipped","hash","a","1","b","2","c","3", +"0","list","list","1","2","3","a","b","c","100000","6000000000","1","2","3","a","b","c","100000","6000000000","1","2","3","a","b","c","100000","6000000000", +"0","list_zipped","list","1","2","3","a","b","c","100000","6000000000", +"0","number","string","10" +"0","set","set","1","100000","2","3","6000000000","a","b","c", +"0","set_zipped_1","set","1","2","3","4", +"0","set_zipped_2","set","100000","200000","300000","400000", +"0","set_zipped_3","set","1000000000","2000000000","3000000000","4000000000","5000000000","6000000000", +"0","string","string","Hello World" +"0","zset","zset","a","1","b","2","c","3","aa","10","bb","20","cc","30","aaa","100","bbb","200","ccc","300","aaaa","1000","cccc","123456789","bbbb","5000000000", +"0","zset_zipped","zset","a","1","b","2","c","3", +} +} + +set server_path [tmpdir "server.rdb-startup-test"] + +start_server [list overrides [list "dir" $server_path] keep_persistence true] { + test {Server started empty with non-existing RDB file} { + debug_digest + } {0000000000000000000000000000000000000000} + # Save an RDB file, needed for the next test. + r save +} + +start_server [list overrides [list "dir" $server_path] keep_persistence true] { + test {Server started empty with empty RDB file} { + debug_digest + } {0000000000000000000000000000000000000000} +} + +start_server [list overrides [list "dir" $server_path] keep_persistence true] { + test {Test RDB stream encoding} { + for {set j 0} {$j < 1000} {incr j} { + if {rand() < 0.9} { + r xadd stream * foo abc + } else { + r xadd stream * bar $j + } + } + r xgroup create stream mygroup 0 + set records [r xreadgroup GROUP mygroup Alice COUNT 2 STREAMS stream >] + r xdel stream [lindex [lindex [lindex [lindex $records 0] 1] 1] 0] + r xack stream mygroup [lindex [lindex [lindex [lindex $records 0] 1] 0] 0] + set digest [debug_digest] + r config set sanitize-dump-payload no + r debug reload + set newdigest [debug_digest] + assert {$digest eq $newdigest} + } + test {Test RDB stream encoding - sanitize dump} { + r config set sanitize-dump-payload yes + r debug reload + set newdigest [debug_digest] + assert {$digest eq $newdigest} + } + # delete the stream, maybe valgrind will find something + r del stream +} + +# Helper function to start a server and kill it, just to check the error +# logged. +set defaults {} +proc start_server_and_kill_it {overrides code} { + upvar defaults defaults srv srv server_path server_path + set config [concat $defaults $overrides] + set srv [start_server [list overrides $config keep_persistence true]] + uplevel 1 $code + kill_server $srv +} + +# Make the RDB file unreadable +file attributes [file join $server_path dump.rdb] -permissions 0222 + +# Detect root account (it is able to read the file even with 002 perm) +set isroot 0 +catch { + open [file join $server_path dump.rdb] + set isroot 1 +} + +# Now make sure the server aborted with an error +if {!$isroot} { + start_server_and_kill_it [list "dir" $server_path] { + test {Server should not start if RDB file can't be open} { + wait_for_condition 50 100 { + [string match {*Fatal error loading*} \ + [exec tail -1 < [dict get $srv stdout]]] + } else { + fail "Server started even if RDB was unreadable!" + } + } + } +} + +# Fix permissions of the RDB file. +file attributes [file join $server_path dump.rdb] -permissions 0666 + +# Corrupt its CRC64 checksum. +set filesize [file size [file join $server_path dump.rdb]] +set fd [open [file join $server_path dump.rdb] r+] +fconfigure $fd -translation binary +seek $fd -8 end +puts -nonewline $fd "foobar00"; # Corrupt the checksum +close $fd + +# Now make sure the server aborted with an error +start_server_and_kill_it [list "dir" $server_path] { + test {Server should not start if RDB is corrupted} { + wait_for_condition 50 100 { + [string match {*CRC error*} \ + [exec tail -10 < [dict get $srv stdout]]] + } else { + fail "Server started even if RDB was corrupted!" + } + } +} + +start_server {} { + test {Test FLUSHALL aborts bgsave} { + r config set save "" + # 5000 keys with 1ms sleep per key should take 5 second + r config set rdb-key-save-delay 1000 + populate 5000 + assert_lessthan 999 [s rdb_changes_since_last_save] + r bgsave + assert_equal [s rdb_bgsave_in_progress] 1 + r flushall + # wait a second max (bgsave should take 5) + wait_for_condition 10 100 { + [s rdb_bgsave_in_progress] == 0 + } else { + fail "bgsave not aborted" + } + # verify that bgsave failed, by checking that the change counter is still high + assert_lessthan 999 [s rdb_changes_since_last_save] + # make sure the server is still writable + r set x xx + } + + test {bgsave resets the change counter} { + r config set rdb-key-save-delay 0 + r bgsave + wait_for_condition 50 100 { + [s rdb_bgsave_in_progress] == 0 + } else { + fail "bgsave not done" + } + assert_equal [s rdb_changes_since_last_save] 0 + } +} + +test {client freed during loading} { + start_server [list overrides [list key-load-delay 50 loading-process-events-interval-bytes 1024 rdbcompression no]] { + # create a big rdb that will take long to load. it is important + # for keys to be big since the server processes events only once in 2mb. + # 100mb of rdb, 100k keys will load in more than 5 seconds + r debug populate 100000 key 1000 + + restart_server 0 false false + + # make sure it's still loading + assert_equal [s loading] 1 + + # connect and disconnect 5 clients + set clients {} + for {set j 0} {$j < 5} {incr j} { + lappend clients [redis_deferring_client] + } + foreach rd $clients { + $rd debug log bla + } + foreach rd $clients { + $rd read + } + foreach rd $clients { + $rd close + } + + # make sure the server freed the clients + wait_for_condition 100 100 { + [s connected_clients] < 3 + } else { + fail "clients didn't disconnect" + } + + # make sure it's still loading + assert_equal [s loading] 1 + + # no need to keep waiting for loading to complete + exec kill [srv 0 pid] + } +} + +start_server {} { + test {Test RDB load info} { + r debug populate 1000 + r save + restart_server 0 true false + wait_done_loading r + assert {[s rdb_last_load_keys_expired] == 0} + assert {[s rdb_last_load_keys_loaded] == 1000} + + r debug set-active-expire 0 + for {set j 0} {$j < 1024} {incr j} { + r select [expr $j%16] + r set $j somevalue px 10 + } + after 20 + + r save + restart_server 0 true false + wait_done_loading r + assert {[s rdb_last_load_keys_expired] == 1024} + assert {[s rdb_last_load_keys_loaded] == 1000} + } +} + +# Our COW metrics (Private_Dirty) work only on Linux +set system_name [string tolower [exec uname -s]] +set page_size [exec getconf PAGESIZE] +if {$system_name eq {linux} && $page_size == 4096} { + +start_server {overrides {save ""}} { + test {Test child sending info} { + # make sure that rdb_last_cow_size and current_cow_size are zero (the test using new server), + # so that the comparisons during the test will be valid + assert {[s current_cow_size] == 0} + assert {[s current_save_keys_processed] == 0} + assert {[s current_save_keys_total] == 0} + + assert {[s rdb_last_cow_size] == 0} + + # using a 200us delay, the bgsave is empirically taking about 10 seconds. + # we need it to take more than some 5 seconds, since redis only report COW once a second. + r config set rdb-key-save-delay 200 + r config set loglevel debug + + # populate the db with 10k keys of 512B each (since we want to measure the COW size by + # changing some keys and read the reported COW size, we are using small key size to prevent from + # the "dismiss mechanism" free memory and reduce the COW size) + set rd [redis_deferring_client 0] + set size 500 ;# aim for the 512 bin (sds overhead) + set cmd_count 10000 + for {set k 0} {$k < $cmd_count} {incr k} { + $rd set key$k [string repeat A $size] + } + + for {set k 0} {$k < $cmd_count} {incr k} { + catch { $rd read } + } + + $rd close + + # start background rdb save + r bgsave + + set current_save_keys_total [s current_save_keys_total] + if {$::verbose} { + puts "Keys before bgsave start: $current_save_keys_total" + } + + # on each iteration, we will write some key to the server to trigger copy-on-write, and + # wait to see that it reflected in INFO. + set iteration 1 + set key_idx 0 + while 1 { + # take samples before writing new data to the server + set cow_size [s current_cow_size] + if {$::verbose} { + puts "COW info before copy-on-write: $cow_size" + } + + set keys_processed [s current_save_keys_processed] + if {$::verbose} { + puts "current_save_keys_processed info : $keys_processed" + } + + # trigger copy-on-write + set modified_keys 16 + for {set k 0} {$k < $modified_keys} {incr k} { + r setrange key$key_idx 0 [string repeat B $size] + incr key_idx 1 + } + + # changing 16 keys (512B each) will create at least 8192 COW (2 pages), but we don't want the test + # to be too strict, so we check for a change of at least 4096 bytes + set exp_cow [expr $cow_size + 4096] + # wait to see that current_cow_size value updated (as long as the child is in progress) + wait_for_condition 80 100 { + [s rdb_bgsave_in_progress] == 0 || + [s current_cow_size] >= $exp_cow && + [s current_save_keys_processed] > $keys_processed && + [s current_fork_perc] > 0 + } else { + if {$::verbose} { + puts "COW info on fail: [s current_cow_size]" + puts [exec tail -n 100 < [srv 0 stdout]] + } + fail "COW info wasn't reported" + } + + # assert that $keys_processed is not greater than total keys. + assert_morethan_equal $current_save_keys_total $keys_processed + + # for no accurate, stop after 2 iterations + if {!$::accurate && $iteration == 2} { + break + } + + # stop iterating if the bgsave completed + if { [s rdb_bgsave_in_progress] == 0 } { + break + } + + incr iteration 1 + } + + # make sure we saw report of current_cow_size + if {$iteration < 2 && $::verbose} { + puts [exec tail -n 100 < [srv 0 stdout]] + } + assert_morethan_equal $iteration 2 + + # if bgsave completed, check that rdb_last_cow_size (fork exit report) + # is at least 90% of last rdb_active_cow_size. + if { [s rdb_bgsave_in_progress] == 0 } { + set final_cow [s rdb_last_cow_size] + set cow_size [expr $cow_size * 0.9] + if {$final_cow < $cow_size && $::verbose} { + puts [exec tail -n 100 < [srv 0 stdout]] + } + assert_morethan_equal $final_cow $cow_size + } + } +} +} ;# system_name + +exec cp -f tests/assets/scriptbackup.rdb $server_path +start_server [list overrides [list "dir" $server_path "dbfilename" "scriptbackup.rdb" "appendonly" "no"]] { + # the script is: "return redis.call('set', 'foo', 'bar')"" + # its sha1 is: a0c38691e9fffe4563723c32ba77a34398e090e6 + test {script won't load anymore if it's in rdb} { + assert_equal [r script exists a0c38691e9fffe4563723c32ba77a34398e090e6] 0 + } +} + +start_server {} { + test "failed bgsave prevents writes" { + r config set rdb-key-save-delay 10000000 + populate 1000 + r set x x + r bgsave + set pid1 [get_child_pid 0] + catch {exec kill -9 $pid1} + waitForBgsave r + + # make sure a read command succeeds + assert_equal [r get x] x + + # make sure a write command fails + assert_error {MISCONF *} {r set x y} + + # repeate with script + assert_error {MISCONF *} {r eval { + return redis.call('set','x',1) + } 1 x + } + assert_equal {x} [r eval { + return redis.call('get','x') + } 1 x + ] + + # again with script using shebang + assert_error {MISCONF *} {r eval {#!lua + return redis.call('set','x',1) + } 1 x + } + assert_equal {x} [r eval {#!lua flags=no-writes + return redis.call('get','x') + } 1 x + ] + + r config set rdb-key-save-delay 0 + r bgsave + waitForBgsave r + + # server is writable again + r set x y + } {OK} +} + +} ;# tags diff --git a/tests/integration/redis-benchmark.tcl b/tests/integration/redis-benchmark.tcl new file mode 100644 index 0000000..5e8555b --- /dev/null +++ b/tests/integration/redis-benchmark.tcl @@ -0,0 +1,171 @@ +source tests/support/benchmark.tcl + + +proc cmdstat {cmd} { + return [cmdrstat $cmd r] +} + +# common code to reset stats, flush the db and run redis-benchmark +proc common_bench_setup {cmd} { + r config resetstat + r flushall + if {[catch { exec {*}$cmd } error]} { + set first_line [lindex [split $error "\n"] 0] + puts [colorstr red "redis-benchmark non zero code. first line: $first_line"] + fail "redis-benchmark non zero code. first line: $first_line" + } +} + +# we use this extra asserts on a simple set,get test for features like uri parsing +# and other simple flag related tests +proc default_set_get_checks {} { + assert_match {*calls=10,*} [cmdstat set] + assert_match {*calls=10,*} [cmdstat get] + # assert one of the non benchmarked commands is not present + assert_match {} [cmdstat lrange] +} + +start_server {tags {"benchmark network external:skip"}} { + start_server {} { + set master_host [srv 0 host] + set master_port [srv 0 port] + + test {benchmark: set,get} { + set cmd [redisbenchmark $master_host $master_port "-c 5 -n 10 -t set,get"] + common_bench_setup $cmd + default_set_get_checks + } + + test {benchmark: connecting using URI set,get} { + set cmd [redisbenchmarkuri $master_host $master_port "-c 5 -n 10 -t set,get"] + common_bench_setup $cmd + default_set_get_checks + } + + test {benchmark: connecting using URI with authentication set,get} { + r config set masterauth pass + set cmd [redisbenchmarkuriuserpass $master_host $master_port "default" pass "-c 5 -n 10 -t set,get"] + common_bench_setup $cmd + default_set_get_checks + } + + test {benchmark: full test suite} { + set cmd [redisbenchmark $master_host $master_port "-c 10 -n 100"] + common_bench_setup $cmd + + # ping total calls are 2*issued commands per test due to PING_INLINE and PING_MBULK + assert_match {*calls=200,*} [cmdstat ping] + assert_match {*calls=100,*} [cmdstat set] + assert_match {*calls=100,*} [cmdstat get] + assert_match {*calls=100,*} [cmdstat incr] + # lpush total calls are 2*issued commands per test due to the lrange tests + assert_match {*calls=200,*} [cmdstat lpush] + assert_match {*calls=100,*} [cmdstat rpush] + assert_match {*calls=100,*} [cmdstat lpop] + assert_match {*calls=100,*} [cmdstat rpop] + assert_match {*calls=100,*} [cmdstat sadd] + assert_match {*calls=100,*} [cmdstat hset] + assert_match {*calls=100,*} [cmdstat spop] + assert_match {*calls=100,*} [cmdstat zadd] + assert_match {*calls=100,*} [cmdstat zpopmin] + assert_match {*calls=400,*} [cmdstat lrange] + assert_match {*calls=100,*} [cmdstat mset] + # assert one of the non benchmarked commands is not present + assert_match {} [cmdstat rpoplpush] + } + + test {benchmark: multi-thread set,get} { + set cmd [redisbenchmark $master_host $master_port "--threads 10 -c 5 -n 10 -t set,get"] + common_bench_setup $cmd + default_set_get_checks + + # ensure only one key was populated + assert_match {1} [scan [regexp -inline {keys\=([\d]*)} [r info keyspace]] keys=%d] + } + + test {benchmark: pipelined full set,get} { + set cmd [redisbenchmark $master_host $master_port "-P 5 -c 10 -n 10010 -t set,get"] + common_bench_setup $cmd + assert_match {*calls=10010,*} [cmdstat set] + assert_match {*calls=10010,*} [cmdstat get] + # assert one of the non benchmarked commands is not present + assert_match {} [cmdstat lrange] + + # ensure only one key was populated + assert_match {1} [scan [regexp -inline {keys\=([\d]*)} [r info keyspace]] keys=%d] + } + + test {benchmark: arbitrary command} { + set cmd [redisbenchmark $master_host $master_port "-c 5 -n 150 INCRBYFLOAT mykey 10.0"] + common_bench_setup $cmd + assert_match {*calls=150,*} [cmdstat incrbyfloat] + # assert one of the non benchmarked commands is not present + assert_match {} [cmdstat get] + + # ensure only one key was populated + assert_match {1} [scan [regexp -inline {keys\=([\d]*)} [r info keyspace]] keys=%d] + } + + test {benchmark: keyspace length} { + set cmd [redisbenchmark $master_host $master_port "-r 50 -t set -n 1000"] + common_bench_setup $cmd + assert_match {*calls=1000,*} [cmdstat set] + # assert one of the non benchmarked commands is not present + assert_match {} [cmdstat get] + + # ensure the keyspace has the desired size + assert_match {50} [scan [regexp -inline {keys\=([\d]*)} [r info keyspace]] keys=%d] + } + + test {benchmark: clients idle mode should return error when reached maxclients limit} { + set cmd [redisbenchmark $master_host $master_port "-c 10 -I"] + set original_maxclients [lindex [r config get maxclients] 1] + r config set maxclients 5 + catch { exec {*}$cmd } error + assert_match "*Error*" $error + r config set maxclients $original_maxclients + } + + # tls specific tests + if {$::tls} { + test {benchmark: specific tls-ciphers} { + set cmd [redisbenchmark $master_host $master_port "-r 50 -t set -n 1000 --tls-ciphers \"DEFAULT:-AES128-SHA256\""] + common_bench_setup $cmd + assert_match {*calls=1000,*} [cmdstat set] + # assert one of the non benchmarked commands is not present + assert_match {} [cmdstat get] + } + + test {benchmark: tls connecting using URI with authentication set,get} { + r config set masterauth pass + set cmd [redisbenchmarkuriuserpass $master_host $master_port "default" pass "-c 5 -n 10 -t set,get"] + common_bench_setup $cmd + default_set_get_checks + } + + test {benchmark: specific tls-ciphersuites} { + r flushall + r config resetstat + set ciphersuites_supported 1 + set cmd [redisbenchmark $master_host $master_port "-r 50 -t set -n 1000 --tls-ciphersuites \"TLS_AES_128_GCM_SHA256\""] + if {[catch { exec {*}$cmd } error]} { + set first_line [lindex [split $error "\n"] 0] + if {[string match "*Invalid option*" $first_line]} { + set ciphersuites_supported 0 + if {$::verbose} { + puts "Skipping test, TLSv1.3 not supported." + } + } else { + puts [colorstr red "redis-benchmark non zero code. first line: $first_line"] + fail "redis-benchmark non zero code. first line: $first_line" + } + } + if {$ciphersuites_supported} { + assert_match {*calls=1000,*} [cmdstat set] + # assert one of the non benchmarked commands is not present + assert_match {} [cmdstat get] + } + } + } + } +} diff --git a/tests/integration/redis-cli.tcl b/tests/integration/redis-cli.tcl new file mode 100644 index 0000000..e159fb1 --- /dev/null +++ b/tests/integration/redis-cli.tcl @@ -0,0 +1,504 @@ +source tests/support/cli.tcl + +if {$::singledb} { + set ::dbnum 0 +} else { + set ::dbnum 9 +} + +start_server {tags {"cli"}} { + proc open_cli {{opts ""} {infile ""}} { + if { $opts == "" } { + set opts "-n $::dbnum" + } + set ::env(TERM) dumb + set cmdline [rediscli [srv host] [srv port] $opts] + if {$infile ne ""} { + set cmdline "$cmdline < $infile" + set mode "r" + } else { + set mode "r+" + } + set fd [open "|$cmdline" $mode] + fconfigure $fd -buffering none + fconfigure $fd -blocking false + fconfigure $fd -translation binary + set _ $fd + } + + proc close_cli {fd} { + close $fd + } + + proc read_cli {fd} { + set ret [read $fd] + while {[string length $ret] == 0} { + after 10 + set ret [read $fd] + } + + # We may have a short read, try to read some more. + set empty_reads 0 + while {$empty_reads < 5} { + set buf [read $fd] + if {[string length $buf] == 0} { + after 10 + incr empty_reads + } else { + append ret $buf + set empty_reads 0 + } + } + return $ret + } + + proc write_cli {fd buf} { + puts $fd $buf + flush $fd + } + + # Helpers to run tests in interactive mode + + proc format_output {output} { + set _ [string trimright [regsub -all "\r" $output ""] "\n"] + } + + proc run_command {fd cmd} { + write_cli $fd $cmd + set _ [format_output [read_cli $fd]] + } + + proc test_interactive_cli {name code} { + set ::env(FAKETTY) 1 + set fd [open_cli] + test "Interactive CLI: $name" $code + close_cli $fd + unset ::env(FAKETTY) + } + + # Helpers to run tests where stdout is not a tty + proc write_tmpfile {contents} { + set tmp [tmpfile "cli"] + set tmpfd [open $tmp "w"] + puts -nonewline $tmpfd $contents + close $tmpfd + set _ $tmp + } + + proc _run_cli {host port db opts args} { + set cmd [rediscli $host $port [list -n $db {*}$args]] + foreach {key value} $opts { + if {$key eq "pipe"} { + set cmd "sh -c \"$value | $cmd\"" + } + if {$key eq "path"} { + set cmd "$cmd < $value" + } + } + + set fd [open "|$cmd" "r"] + fconfigure $fd -buffering none + fconfigure $fd -translation binary + set resp [read $fd 1048576] + close $fd + set _ [format_output $resp] + } + + proc run_cli {args} { + _run_cli [srv host] [srv port] $::dbnum {} {*}$args + } + + proc run_cli_with_input_pipe {mode cmd args} { + if {$mode == "x" } { + _run_cli [srv host] [srv port] $::dbnum [list pipe $cmd] -x {*}$args + } elseif {$mode == "X"} { + _run_cli [srv host] [srv port] $::dbnum [list pipe $cmd] -X tag {*}$args + } + } + + proc run_cli_with_input_file {mode path args} { + if {$mode == "x" } { + _run_cli [srv host] [srv port] $::dbnum [list path $path] -x {*}$args + } elseif {$mode == "X"} { + _run_cli [srv host] [srv port] $::dbnum [list path $path] -X tag {*}$args + } + } + + proc run_cli_host_port_db {host port db args} { + _run_cli $host $port $db {} {*}$args + } + + proc test_nontty_cli {name code} { + test "Non-interactive non-TTY CLI: $name" $code + } + + # Helpers to run tests where stdout is a tty (fake it) + proc test_tty_cli {name code} { + set ::env(FAKETTY) 1 + test "Non-interactive TTY CLI: $name" $code + unset ::env(FAKETTY) + } + + test_interactive_cli "INFO response should be printed raw" { + set lines [split [run_command $fd info] "\n"] + foreach line $lines { + if {![regexp {^$|^#|^[^#:]+:} $line]} { + fail "Malformed info line: $line" + } + } + } + + test_interactive_cli "Status reply" { + assert_equal "OK" [run_command $fd "set key foo"] + } + + test_interactive_cli "Integer reply" { + assert_equal "(integer) 1" [run_command $fd "incr counter"] + } + + test_interactive_cli "Bulk reply" { + r set key foo + assert_equal "\"foo\"" [run_command $fd "get key"] + } + + test_interactive_cli "Multi-bulk reply" { + r rpush list foo + r rpush list bar + assert_equal "1) \"foo\"\n2) \"bar\"" [run_command $fd "lrange list 0 -1"] + } + + test_interactive_cli "Parsing quotes" { + assert_equal "OK" [run_command $fd "set key \"bar\""] + assert_equal "bar" [r get key] + assert_equal "OK" [run_command $fd "set key \" bar \""] + assert_equal " bar " [r get key] + assert_equal "OK" [run_command $fd "set key \"\\\"bar\\\"\""] + assert_equal "\"bar\"" [r get key] + assert_equal "OK" [run_command $fd "set key \"\tbar\t\""] + assert_equal "\tbar\t" [r get key] + + # invalid quotation + assert_equal "Invalid argument(s)" [run_command $fd "get \"\"key"] + assert_equal "Invalid argument(s)" [run_command $fd "get \"key\"x"] + + # quotes after the argument are weird, but should be allowed + assert_equal "OK" [run_command $fd "set key\"\" bar"] + assert_equal "bar" [r get key] + } + + test_tty_cli "Status reply" { + assert_equal "OK" [run_cli set key bar] + assert_equal "bar" [r get key] + } + + test_tty_cli "Integer reply" { + r del counter + assert_equal "(integer) 1" [run_cli incr counter] + } + + test_tty_cli "Bulk reply" { + r set key "tab\tnewline\n" + assert_equal "\"tab\\tnewline\\n\"" [run_cli get key] + } + + test_tty_cli "Multi-bulk reply" { + r del list + r rpush list foo + r rpush list bar + assert_equal "1) \"foo\"\n2) \"bar\"" [run_cli lrange list 0 -1] + } + + test_tty_cli "Read last argument from pipe" { + assert_equal "OK" [run_cli_with_input_pipe x "echo foo" set key] + assert_equal "foo\n" [r get key] + + assert_equal "OK" [run_cli_with_input_pipe X "echo foo" set key2 tag] + assert_equal "foo\n" [r get key2] + } + + test_tty_cli "Read last argument from file" { + set tmpfile [write_tmpfile "from file"] + + assert_equal "OK" [run_cli_with_input_file x $tmpfile set key] + assert_equal "from file" [r get key] + + assert_equal "OK" [run_cli_with_input_file X $tmpfile set key2 tag] + assert_equal "from file" [r get key2] + + file delete $tmpfile + } + + test_tty_cli "Escape character in JSON mode" { + # reverse solidus + r hset solidus \/ \/ + assert_equal \/ \/ [run_cli hgetall solidus] + set escaped_reverse_solidus \"\\" + assert_equal $escaped_reverse_solidus $escaped_reverse_solidus [run_cli --json hgetall \/] + # non printable (0xF0 in ISO-8859-1, not UTF-8(0xC3 0xB0)) + set eth "\u00f0\u0065" + r hset eth test $eth + assert_equal \"\\xf0e\" [run_cli hget eth test] + assert_equal \"\u00f0e\" [run_cli --json hget eth test] + assert_equal \"\\\\xf0e\" [run_cli --quoted-json hget eth test] + # control characters + r hset control test "Hello\x00\x01\x02\x03World" + assert_equal \"Hello\\u0000\\u0001\\u0002\\u0003World" [run_cli --json hget control test] + # non-string keys + r hset numkey 1 One + assert_equal \{\"1\":\"One\"\} [run_cli --json hgetall numkey] + # non-string, non-printable keys + r hset npkey "K\u0000\u0001ey" "V\u0000\u0001alue" + assert_equal \{\"K\\u0000\\u0001ey\":\"V\\u0000\\u0001alue\"\} [run_cli --json hgetall npkey] + assert_equal \{\"K\\\\x00\\\\x01ey\":\"V\\\\x00\\\\x01alue\"\} [run_cli --quoted-json hgetall npkey] + } + + test_nontty_cli "Status reply" { + assert_equal "OK" [run_cli set key bar] + assert_equal "bar" [r get key] + } + + test_nontty_cli "Integer reply" { + r del counter + assert_equal "1" [run_cli incr counter] + } + + test_nontty_cli "Bulk reply" { + r set key "tab\tnewline\n" + assert_equal "tab\tnewline" [run_cli get key] + } + + test_nontty_cli "Multi-bulk reply" { + r del list + r rpush list foo + r rpush list bar + assert_equal "foo\nbar" [run_cli lrange list 0 -1] + } + +if {!$::tls} { ;# fake_redis_node doesn't support TLS + test_nontty_cli "ASK redirect test" { + # Set up two fake Redis nodes. + set tclsh [info nameofexecutable] + set script "tests/helpers/fake_redis_node.tcl" + set port1 [find_available_port $::baseport $::portcount] + set port2 [find_available_port $::baseport $::portcount] + set p1 [exec $tclsh $script $port1 \ + "SET foo bar" "-ASK 12182 127.0.0.1:$port2" &] + set p2 [exec $tclsh $script $port2 \ + "ASKING" "+OK" \ + "SET foo bar" "+OK" &] + # Make sure both fake nodes have started listening + wait_for_condition 50 50 { + [catch {close [socket "127.0.0.1" $port1]}] == 0 && \ + [catch {close [socket "127.0.0.1" $port2]}] == 0 + } else { + fail "Failed to start fake Redis nodes" + } + # Run the cli + assert_equal "OK" [run_cli_host_port_db "127.0.0.1" $port1 0 -c SET foo bar] + } +} + + test_nontty_cli "Quoted input arguments" { + r set "\x00\x00" "value" + assert_equal "value" [run_cli --quoted-input get {"\x00\x00"}] + } + + test_nontty_cli "No accidental unquoting of input arguments" { + run_cli --quoted-input set {"\x41\x41"} quoted-val + run_cli set {"\x41\x41"} unquoted-val + assert_equal "quoted-val" [r get AA] + assert_equal "unquoted-val" [r get {"\x41\x41"}] + } + + test_nontty_cli "Invalid quoted input arguments" { + catch {run_cli --quoted-input set {"Unterminated}} err + assert_match {*exited abnormally*} $err + + # A single arg that unquotes to two arguments is also not expected + catch {run_cli --quoted-input set {"arg1" "arg2"}} err + assert_match {*exited abnormally*} $err + } + + test_nontty_cli "Read last argument from pipe" { + assert_equal "OK" [run_cli_with_input_pipe x "echo foo" set key] + assert_equal "foo\n" [r get key] + + assert_equal "OK" [run_cli_with_input_pipe X "echo foo" set key2 tag] + assert_equal "foo\n" [r get key2] + } + + test_nontty_cli "Read last argument from file" { + set tmpfile [write_tmpfile "from file"] + + assert_equal "OK" [run_cli_with_input_file x $tmpfile set key] + assert_equal "from file" [r get key] + + assert_equal "OK" [run_cli_with_input_file X $tmpfile set key2 tag] + assert_equal "from file" [r get key2] + + file delete $tmpfile + } + + proc test_redis_cli_rdb_dump {functions_only} { + r flushdb + r function flush + + set dir [lindex [r config get dir] 1] + + assert_equal "OK" [r debug populate 100000 key 1000] + assert_equal "lib1" [r function load "#!lua name=lib1\nredis.register_function('func1', function() return 123 end)"] + if {$functions_only} { + set args "--functions-rdb $dir/cli.rdb" + } else { + set args "--rdb $dir/cli.rdb" + } + catch {run_cli {*}$args} output + assert_match {*Transfer finished with success*} $output + + file delete "$dir/dump.rdb" + file rename "$dir/cli.rdb" "$dir/dump.rdb" + + assert_equal "OK" [r set should-not-exist 1] + assert_equal "should_not_exist_func" [r function load "#!lua name=should_not_exist_func\nredis.register_function('should_not_exist_func', function() return 456 end)"] + assert_equal "OK" [r debug reload nosave] + assert_equal {} [r get should-not-exist] + assert_equal {{library_name lib1 engine LUA functions {{name func1 description {} flags {}}}}} [r function list] + if {$functions_only} { + assert_equal 0 [r dbsize] + } else { + assert_equal 100000 [r dbsize] + } + } + + foreach {functions_only} {no yes} { + + test "Dumping an RDB - functions only: $functions_only" { + # Disk-based master + assert_match "OK" [r config set repl-diskless-sync no] + test_redis_cli_rdb_dump $functions_only + + # Disk-less master + assert_match "OK" [r config set repl-diskless-sync yes] + assert_match "OK" [r config set repl-diskless-sync-delay 0] + test_redis_cli_rdb_dump $functions_only + } {} {needs:repl needs:debug} + + } ;# foreach functions_only + + test "Scan mode" { + r flushdb + populate 1000 key: 1 + + # basic use + assert_equal 1000 [llength [split [run_cli --scan]]] + + # pattern + assert_equal {key:2} [run_cli --scan --pattern "*:2"] + + # pattern matching with a quoted string + assert_equal {key:2} [run_cli --scan --quoted-pattern {"*:\x32"}] + } + + proc test_redis_cli_repl {} { + set fd [open_cli "--replica"] + wait_for_condition 500 100 { + [string match {*slave0:*state=online*} [r info]] + } else { + fail "redis-cli --replica did not connect" + } + + for {set i 0} {$i < 100} {incr i} { + r set test-key test-value-$i + } + + wait_for_condition 500 100 { + [string match {*test-value-99*} [read_cli $fd]] + } else { + fail "redis-cli --replica didn't read commands" + } + + fconfigure $fd -blocking true + r client kill type slave + catch { close_cli $fd } err + assert_match {*Server closed the connection*} $err + } + + test "Connecting as a replica" { + # Disk-based master + assert_match "OK" [r config set repl-diskless-sync no] + test_redis_cli_repl + + # Disk-less master + assert_match "OK" [r config set repl-diskless-sync yes] + assert_match "OK" [r config set repl-diskless-sync-delay 0] + test_redis_cli_repl + } {} {needs:repl} + + test "Piping raw protocol" { + set cmds [tmpfile "cli_cmds"] + set cmds_fd [open $cmds "w"] + + set cmds_count 2101 + + if {!$::singledb} { + puts $cmds_fd [formatCommand select 9] + incr cmds_count + } + puts $cmds_fd [formatCommand del test-counter] + + for {set i 0} {$i < 1000} {incr i} { + puts $cmds_fd [formatCommand incr test-counter] + puts $cmds_fd [formatCommand set large-key [string repeat "x" 20000]] + } + + for {set i 0} {$i < 100} {incr i} { + puts $cmds_fd [formatCommand set very-large-key [string repeat "x" 512000]] + } + close $cmds_fd + + set cli_fd [open_cli "--pipe" $cmds] + fconfigure $cli_fd -blocking true + set output [read_cli $cli_fd] + + assert_equal {1000} [r get test-counter] + assert_match "*All data transferred*errors: 0*replies: ${cmds_count}*" $output + + file delete $cmds + } + + test "Options -X with illegal argument" { + assert_error "*-x and -X are mutually exclusive*" {run_cli -x -X tag} + + assert_error "*Unrecognized option or bad number*" {run_cli -X} + + assert_error "*tag not match*" {run_cli_with_input_pipe X "echo foo" set key wrong_tag} + } + + test "DUMP RESTORE with -x option" { + set cmdline [rediscli [srv host] [srv port]] + + exec {*}$cmdline DEL set new_set + exec {*}$cmdline SADD set 1 2 3 4 5 6 + assert_equal 6 [exec {*}$cmdline SCARD set] + + assert_equal "OK" [exec {*}$cmdline -D "" --raw DUMP set | \ + {*}$cmdline -x RESTORE new_set 0] + + assert_equal 6 [exec {*}$cmdline SCARD new_set] + assert_equal "1\n2\n3\n4\n5\n6" [exec {*}$cmdline SMEMBERS new_set] + } + + test "DUMP RESTORE with -X option" { + set cmdline [rediscli [srv host] [srv port]] + + exec {*}$cmdline DEL zset new_zset + exec {*}$cmdline ZADD zset 1 a 2 b 3 c + assert_equal 3 [exec {*}$cmdline ZCARD zset] + + assert_equal "OK" [exec {*}$cmdline -D "" --raw DUMP zset | \ + {*}$cmdline -X dump_tag RESTORE new_zset 0 dump_tag REPLACE] + + assert_equal 3 [exec {*}$cmdline ZCARD new_zset] + assert_equal "a\n1\nb\n2\nc\n3" [exec {*}$cmdline ZRANGE new_zset 0 -1 WITHSCORES] + } +} diff --git a/tests/integration/replication-2.tcl b/tests/integration/replication-2.tcl new file mode 100644 index 0000000..f9f2592 --- /dev/null +++ b/tests/integration/replication-2.tcl @@ -0,0 +1,93 @@ +start_server {tags {"repl external:skip"}} { + start_server {} { + test {First server should have role slave after SLAVEOF} { + r -1 slaveof [srv 0 host] [srv 0 port] + wait_replica_online r + wait_for_condition 50 100 { + [s -1 master_link_status] eq {up} + } else { + fail "Replication not started." + } + } + + test {If min-slaves-to-write is honored, write is accepted} { + r config set min-slaves-to-write 1 + r config set min-slaves-max-lag 10 + r set foo 12345 + wait_for_condition 50 100 { + [r -1 get foo] eq {12345} + } else { + fail "Write did not reached replica" + } + } + + test {No write if min-slaves-to-write is < attached slaves} { + r config set min-slaves-to-write 2 + r config set min-slaves-max-lag 10 + catch {r set foo 12345} err + set err + } {NOREPLICAS*} + + test {If min-slaves-to-write is honored, write is accepted (again)} { + r config set min-slaves-to-write 1 + r config set min-slaves-max-lag 10 + r set foo 12345 + wait_for_condition 50 100 { + [r -1 get foo] eq {12345} + } else { + fail "Write did not reached replica" + } + } + + test {No write if min-slaves-max-lag is > of the slave lag} { + r config set min-slaves-to-write 1 + r config set min-slaves-max-lag 2 + exec kill -SIGSTOP [srv -1 pid] + assert {[r set foo 12345] eq {OK}} + wait_for_condition 100 100 { + [catch {r set foo 12345}] != 0 + } else { + fail "Master didn't become readonly" + } + catch {r set foo 12345} err + assert_match {NOREPLICAS*} $err + } + exec kill -SIGCONT [srv -1 pid] + + test {min-slaves-to-write is ignored by slaves} { + r config set min-slaves-to-write 1 + r config set min-slaves-max-lag 10 + r -1 config set min-slaves-to-write 1 + r -1 config set min-slaves-max-lag 10 + r set foo aaabbb + wait_for_condition 50 100 { + [r -1 get foo] eq {aaabbb} + } else { + fail "Write did not reached replica" + } + } + + # Fix parameters for the next test to work + r config set min-slaves-to-write 0 + r -1 config set min-slaves-to-write 0 + r flushall + + test {MASTER and SLAVE dataset should be identical after complex ops} { + createComplexDataset r 10000 + after 500 + if {[r debug digest] ne [r -1 debug digest]} { + set csv1 [csvdump r] + set csv2 [csvdump {r -1}] + set fd [open /tmp/repldump1.txt w] + puts -nonewline $fd $csv1 + close $fd + set fd [open /tmp/repldump2.txt w] + puts -nonewline $fd $csv2 + close $fd + puts "Master - Replica inconsistency" + puts "Run diff -u against /tmp/repldump*.txt for more info" + } + assert_equal [r debug digest] [r -1 debug digest] + } + } +} diff --git a/tests/integration/replication-3.tcl b/tests/integration/replication-3.tcl new file mode 100644 index 0000000..f53a05a --- /dev/null +++ b/tests/integration/replication-3.tcl @@ -0,0 +1,130 @@ +start_server {tags {"repl external:skip"}} { + start_server {} { + test {First server should have role slave after SLAVEOF} { + r -1 slaveof [srv 0 host] [srv 0 port] + wait_for_condition 50 100 { + [s -1 master_link_status] eq {up} + } else { + fail "Replication not started." + } + } + + if {$::accurate} {set numops 50000} else {set numops 5000} + + test {MASTER and SLAVE consistency with expire} { + createComplexDataset r $numops useexpire + + # Make sure everything expired before taking the digest + # createComplexDataset uses max expire time of 2 seconds + wait_for_condition 50 100 { + 0 == [scan [regexp -inline {expires\=([\d]*)} [r -1 info keyspace]] expires=%d] + } else { + fail "expire didn't end" + } + + # make sure the replica got all the DELs + wait_for_ofs_sync [srv 0 client] [srv -1 client] + + if {[r debug digest] ne [r -1 debug digest]} { + set csv1 [csvdump r] + set csv2 [csvdump {r -1}] + set fd [open /tmp/repldump1.txt w] + puts -nonewline $fd $csv1 + close $fd + set fd [open /tmp/repldump2.txt w] + puts -nonewline $fd $csv2 + close $fd + puts "Master - Replica inconsistency" + puts "Run diff -u against /tmp/repldump*.txt for more info" + } + assert_equal [r debug digest] [r -1 debug digest] + } + + test {Master can replicate command longer than client-query-buffer-limit on replica} { + # Configure the master to have a bigger query buffer limit + r config set client-query-buffer-limit 2000000 + r -1 config set client-query-buffer-limit 1048576 + # Write a very large command onto the master + r set key [string repeat "x" 1100000] + wait_for_condition 300 100 { + [r -1 get key] eq [string repeat "x" 1100000] + } else { + fail "Unable to replicate command longer than client-query-buffer-limit" + } + } + + test {Slave is able to evict keys created in writable slaves} { + r -1 select 5 + assert {[r -1 dbsize] == 0} + r -1 config set slave-read-only no + r -1 set key1 1 ex 5 + r -1 set key2 2 ex 5 + r -1 set key3 3 ex 5 + assert {[r -1 dbsize] == 3} + after 6000 + r -1 dbsize + } {0} + + test {Writable replica doesn't return expired keys} { + r select 5 + assert {[r dbsize] == 0} + r debug set-active-expire 0 + r set key1 5 px 10 + r set key2 5 px 10 + r -1 select 5 + wait_for_condition 50 100 { + [r -1 dbsize] == 2 && [r -1 exists key1 key2] == 0 + } else { + fail "Keys didn't replicate or didn't expire." + } + r -1 config set slave-read-only no + assert_equal 2 [r -1 dbsize] ; # active expire is off + assert_equal 1 [r -1 incr key1] ; # incr expires and re-creates key1 + assert_equal -1 [r -1 ttl key1] ; # incr created key1 without TTL + assert_equal {} [r -1 get key2] ; # key2 expired but not deleted + assert_equal 2 [r -1 dbsize] + # cleanup + r debug set-active-expire 1 + r -1 del key1 key2 + r -1 config set slave-read-only yes + r del key1 key2 + } + + test {PFCOUNT updates cache on readonly replica} { + r select 5 + assert {[r dbsize] == 0} + r pfadd key a b c d e f g h i j k l m n o p q + set strval [r get key] + r -1 select 5 + wait_for_condition 50 100 { + [r -1 dbsize] == 1 + } else { + fail "Replication timeout." + } + assert {$strval == [r -1 get key]} + assert_equal 17 [r -1 pfcount key] + assert {$strval != [r -1 get key]}; # cache updated + # cleanup + r del key + } + + test {PFCOUNT doesn't use expired key on readonly replica} { + r select 5 + assert {[r dbsize] == 0} + r debug set-active-expire 0 + r pfadd key a b c d e f g h i j k l m n o p q + r pexpire key 10 + r -1 select 5 + wait_for_condition 50 100 { + [r -1 dbsize] == 1 && [r -1 exists key] == 0 + } else { + fail "Key didn't replicate or didn't expire." + } + assert_equal [r -1 pfcount key] 0 ; # expired key not used + assert_equal [r -1 dbsize] 1 ; # but it's also not deleted + # cleanup + r debug set-active-expire 1 + r del key + } + } +} diff --git a/tests/integration/replication-4.tcl b/tests/integration/replication-4.tcl new file mode 100644 index 0000000..d7a8b92 --- /dev/null +++ b/tests/integration/replication-4.tcl @@ -0,0 +1,258 @@ +start_server {tags {"repl network external:skip"}} { + start_server {} { + + set master [srv -1 client] + set master_host [srv -1 host] + set master_port [srv -1 port] + set slave [srv 0 client] + + set load_handle0 [start_bg_complex_data $master_host $master_port 9 100000] + set load_handle1 [start_bg_complex_data $master_host $master_port 11 100000] + set load_handle2 [start_bg_complex_data $master_host $master_port 12 100000] + + test {First server should have role slave after SLAVEOF} { + $slave slaveof $master_host $master_port + after 1000 + s 0 role + } {slave} + + test {Test replication with parallel clients writing in different DBs} { + after 5000 + stop_bg_complex_data $load_handle0 + stop_bg_complex_data $load_handle1 + stop_bg_complex_data $load_handle2 + wait_for_condition 100 100 { + [$master debug digest] == [$slave debug digest] + } else { + set csv1 [csvdump r] + set csv2 [csvdump {r -1}] + set fd [open /tmp/repldump1.txt w] + puts -nonewline $fd $csv1 + close $fd + set fd [open /tmp/repldump2.txt w] + puts -nonewline $fd $csv2 + close $fd + fail "Master - Replica inconsistency, Run diff -u against /tmp/repldump*.txt for more info" + } + assert {[$master dbsize] > 0} + } + } +} + +start_server {tags {"repl external:skip"}} { + start_server {} { + set master [srv -1 client] + set master_host [srv -1 host] + set master_port [srv -1 port] + set slave [srv 0 client] + + # Load some functions to be used later + $master FUNCTION load replace {#!lua name=test + redis.register_function{function_name='f_default_flags', callback=function(keys, args) return redis.call('get',keys[1]) end, flags={}} + redis.register_function{function_name='f_no_writes', callback=function(keys, args) return redis.call('get',keys[1]) end, flags={'no-writes'}} + } + + test {First server should have role slave after SLAVEOF} { + $slave slaveof $master_host $master_port + wait_replica_online $master + } + + test {With min-slaves-to-write (1,3): master should be writable} { + $master config set min-slaves-max-lag 3 + $master config set min-slaves-to-write 1 + assert_equal OK [$master set foo 123] + assert_equal OK [$master eval "return redis.call('set','foo',12345)" 0] + } + + test {With min-slaves-to-write (2,3): master should not be writable} { + $master config set min-slaves-max-lag 3 + $master config set min-slaves-to-write 2 + assert_error "*NOREPLICAS*" {$master set foo bar} + assert_error "*NOREPLICAS*" {$master eval "redis.call('set','foo','bar')" 0} + } + + test {With min-slaves-to-write function without no-write flag} { + assert_error "*NOREPLICAS*" {$master fcall f_default_flags 1 foo} + assert_equal "12345" [$master fcall f_no_writes 1 foo] + } + + test {With not enough good slaves, read in Lua script is still accepted} { + $master config set min-slaves-max-lag 3 + $master config set min-slaves-to-write 1 + $master eval "redis.call('set','foo','bar')" 0 + + $master config set min-slaves-to-write 2 + $master eval "return redis.call('get','foo')" 0 + } {bar} + + test {With min-slaves-to-write: master not writable with lagged slave} { + $master config set min-slaves-max-lag 2 + $master config set min-slaves-to-write 1 + assert_equal OK [$master set foo 123] + assert_equal OK [$master eval "return redis.call('set','foo',12345)" 0] + # Killing a slave to make it become a lagged slave. + exec kill -SIGSTOP [srv 0 pid] + # Waiting for slave kill. + wait_for_condition 100 100 { + [catch {$master set foo 123}] != 0 + } else { + fail "Master didn't become readonly" + } + assert_error "*NOREPLICAS*" {$master set foo 123} + assert_error "*NOREPLICAS*" {$master eval "return redis.call('set','foo',12345)" 0} + exec kill -SIGCONT [srv 0 pid] + } + } +} + +start_server {tags {"repl external:skip"}} { + start_server {} { + set master [srv -1 client] + set master_host [srv -1 host] + set master_port [srv -1 port] + set slave [srv 0 client] + + test {First server should have role slave after SLAVEOF} { + $slave slaveof $master_host $master_port + wait_for_condition 50 100 { + [s 0 master_link_status] eq {up} + } else { + fail "Replication not started." + } + } + + test {Replication of an expired key does not delete the expired key} { + # This test is very likely to do a false positive if the wait_for_ofs_sync + # takes longer than the expiration time, so give it a few more chances. + # Go with 5 retries of increasing timeout, i.e. start with 500ms, then go + # to 1000ms, 2000ms, 4000ms, 8000ms. + set px_ms 500 + for {set i 0} {$i < 5} {incr i} { + + wait_for_ofs_sync $master $slave + $master debug set-active-expire 0 + $master set k 1 px $px_ms + wait_for_ofs_sync $master $slave + exec kill -SIGSTOP [srv 0 pid] + $master incr k + after [expr $px_ms + 1] + # Stopping the replica for one second to makes sure the INCR arrives + # to the replica after the key is logically expired. + exec kill -SIGCONT [srv 0 pid] + wait_for_ofs_sync $master $slave + # Check that k is logically expired but is present in the replica. + set res [$slave exists k] + set errcode [catch {$slave debug object k} err] ; # Raises exception if k is gone. + if {$res == 0 && $errcode == 0} { break } + set px_ms [expr $px_ms * 2] + + } ;# for + + if {$::verbose} { puts "Replication of an expired key does not delete the expired key test attempts: $i" } + assert_equal $res 0 + assert_equal $errcode 0 + } + } +} + +start_server {tags {"repl external:skip"}} { + start_server {} { + set master [srv -1 client] + set master_host [srv -1 host] + set master_port [srv -1 port] + set slave [srv 0 client] + + test {First server should have role slave after SLAVEOF} { + $slave slaveof $master_host $master_port + wait_for_condition 50 100 { + [s 0 role] eq {slave} + } else { + fail "Replication not started." + } + } + + test {Replication: commands with many arguments (issue #1221)} { + # We now issue large MSET commands, that may trigger a specific + # class of bugs, see issue #1221. + for {set j 0} {$j < 100} {incr j} { + set cmd [list mset] + for {set x 0} {$x < 1000} {incr x} { + lappend cmd [randomKey] [randomValue] + } + $master {*}$cmd + } + + set retry 10 + while {$retry && ([$master debug digest] ne [$slave debug digest])}\ + { + after 1000 + incr retry -1 + } + assert {[$master dbsize] > 0} + } + + test {Replication of SPOP command -- alsoPropagate() API} { + $master del myset + set size [expr 1+[randomInt 100]] + set content {} + for {set j 0} {$j < $size} {incr j} { + lappend content [randomValue] + } + $master sadd myset {*}$content + + set count [randomInt 100] + set result [$master spop myset $count] + + wait_for_condition 50 100 { + [$master debug digest] eq [$slave debug digest] + } else { + fail "SPOP replication inconsistency" + } + } + } +} + +start_server {tags {"repl external:skip"}} { + start_server {} { + set master [srv -1 client] + set master_host [srv -1 host] + set master_port [srv -1 port] + set replica [srv 0 client] + + test {First server should have role slave after SLAVEOF} { + $replica slaveof $master_host $master_port + wait_for_condition 50 100 { + [s 0 role] eq {slave} + } else { + fail "Replication not started." + } + wait_for_sync $replica + } + + test {Data divergence can happen under default conditions} { + $replica config set propagation-error-behavior ignore + $master debug replicate fake-command-1 + + # Wait for replication to normalize + $master set foo bar2 + $master wait 1 2000 + + # Make sure we triggered the error, by finding the critical + # message and the fake command. + assert_equal [count_log_message 0 "fake-command-1"] 1 + assert_equal [count_log_message 0 "== CRITICAL =="] 1 + } + + test {Data divergence is allowed on writable replicas} { + $replica config set replica-read-only no + $replica set number2 foo + $master incrby number2 1 + $master wait 1 2000 + + assert_equal [$master get number2] 1 + assert_equal [$replica get number2] foo + + assert_equal [count_log_message 0 "incrby"] 1 + } + } +} diff --git a/tests/integration/replication-buffer.tcl b/tests/integration/replication-buffer.tcl new file mode 100644 index 0000000..2e40248 --- /dev/null +++ b/tests/integration/replication-buffer.tcl @@ -0,0 +1,296 @@ +# This test group aims to test that all replicas share one global replication buffer, +# two replicas don't make replication buffer size double, and when there is no replica, +# replica buffer will shrink. +start_server {tags {"repl external:skip"}} { +start_server {} { +start_server {} { +start_server {} { + set replica1 [srv -3 client] + set replica2 [srv -2 client] + set replica3 [srv -1 client] + + set master [srv 0 client] + set master_host [srv 0 host] + set master_port [srv 0 port] + + $master config set save "" + $master config set repl-backlog-size 16384 + $master config set repl-diskless-sync-delay 5 + $master config set repl-diskless-sync-max-replicas 1 + $master config set client-output-buffer-limit "replica 0 0 0" + + # Make sure replica3 is synchronized with master + $replica3 replicaof $master_host $master_port + wait_for_sync $replica3 + + # Generating RDB will take some 100 seconds + $master config set rdb-key-save-delay 1000000 + populate 100 "" 16 + + # Make sure replica1 and replica2 are waiting bgsave + $master config set repl-diskless-sync-max-replicas 2 + $replica1 replicaof $master_host $master_port + $replica2 replicaof $master_host $master_port + wait_for_condition 50 100 { + ([s rdb_bgsave_in_progress] == 1) && + [lindex [$replica1 role] 3] eq {sync} && + [lindex [$replica2 role] 3] eq {sync} + } else { + fail "fail to sync with replicas" + } + + test {All replicas share one global replication buffer} { + set before_used [s used_memory] + populate 1024 "" 1024 ; # Write extra 1M data + # New data uses 1M memory, but all replicas use only one + # replication buffer, so all replicas output memory is not + # more than double of replication buffer. + set repl_buf_mem [s mem_total_replication_buffers] + set extra_mem [expr {[s used_memory]-$before_used-1024*1024}] + assert {$extra_mem < 2*$repl_buf_mem} + + # Kill replica1, replication_buffer will not become smaller + catch {$replica1 shutdown nosave} + wait_for_condition 50 100 { + [s connected_slaves] eq {2} + } else { + fail "replica doesn't disconnect with master" + } + assert_equal $repl_buf_mem [s mem_total_replication_buffers] + } + + test {Replication buffer will become smaller when no replica uses} { + # Make sure replica3 catch up with the master + wait_for_ofs_sync $master $replica3 + + set repl_buf_mem [s mem_total_replication_buffers] + # Kill replica2, replication_buffer will become smaller + catch {$replica2 shutdown nosave} + wait_for_condition 50 100 { + [s connected_slaves] eq {1} + } else { + fail "replica2 doesn't disconnect with master" + } + assert {[expr $repl_buf_mem - 1024*1024] > [s mem_total_replication_buffers]} + } +} +} +} +} + +# This test group aims to test replication backlog size can outgrow the backlog +# limit config if there is a slow replica which keep massive replication buffers, +# and replicas could use this replication buffer (beyond backlog config) for +# partial re-synchronization. Of course, replication backlog memory also can +# become smaller when master disconnects with slow replicas since output buffer +# limit is reached. +start_server {tags {"repl external:skip"}} { +start_server {} { +start_server {} { + set replica1 [srv -2 client] + set replica1_pid [s -2 process_id] + set replica2 [srv -1 client] + set replica2_pid [s -1 process_id] + + set master [srv 0 client] + set master_host [srv 0 host] + set master_port [srv 0 port] + + $master config set save "" + $master config set repl-backlog-size 16384 + $master config set client-output-buffer-limit "replica 0 0 0" + + # Executing 'debug digest' on master which has many keys costs much time + # (especially in valgrind), this causes that replica1 and replica2 disconnect + # with master. + $master config set repl-timeout 1000 + $replica1 config set repl-timeout 1000 + $replica2 config set repl-timeout 1000 + + $replica1 replicaof $master_host $master_port + wait_for_sync $replica1 + + test {Replication backlog size can outgrow the backlog limit config} { + # Generating RDB will take 1000 seconds + $master config set rdb-key-save-delay 1000000 + populate 1000 master 10000 + $replica2 replicaof $master_host $master_port + # Make sure replica2 is waiting bgsave + wait_for_condition 5000 100 { + ([s rdb_bgsave_in_progress] == 1) && + [lindex [$replica2 role] 3] eq {sync} + } else { + fail "fail to sync with replicas" + } + # Replication actual backlog grow more than backlog setting since + # the slow replica2 kept replication buffer. + populate 10000 master 10000 + assert {[s repl_backlog_histlen] > [expr 10000*10000]} + } + + # Wait replica1 catch up with the master + wait_for_condition 1000 100 { + [s -2 master_repl_offset] eq [s master_repl_offset] + } else { + fail "Replica offset didn't catch up with the master after too long time" + } + + test {Replica could use replication buffer (beyond backlog config) for partial resynchronization} { + # replica1 disconnects with master + $replica1 replicaof [srv -1 host] [srv -1 port] + # Write a mass of data that exceeds repl-backlog-size + populate 10000 master 10000 + # replica1 reconnects with master + $replica1 replicaof $master_host $master_port + wait_for_condition 1000 100 { + [s -2 master_repl_offset] eq [s master_repl_offset] + } else { + fail "Replica offset didn't catch up with the master after too long time" + } + + # replica2 still waits for bgsave ending + assert {[s rdb_bgsave_in_progress] eq {1} && [lindex [$replica2 role] 3] eq {sync}} + # master accepted replica1 partial resync + assert_equal [s sync_partial_ok] {1} + assert_equal [$master debug digest] [$replica1 debug digest] + } + + test {Replication backlog memory will become smaller if disconnecting with replica} { + assert {[s repl_backlog_histlen] > [expr 2*10000*10000]} + assert_equal [s connected_slaves] {2} + + exec kill -SIGSTOP $replica2_pid + r config set client-output-buffer-limit "replica 128k 0 0" + # trigger output buffer limit check + r set key [string repeat A [expr 64*1024]] + # master will close replica2's connection since replica2's output + # buffer limit is reached, so there only is replica1. + wait_for_condition 100 100 { + [s connected_slaves] eq {1} + } else { + fail "master didn't disconnect with replica2" + } + + # Since we trim replication backlog inrementally, replication backlog + # memory may take time to be reclaimed. + wait_for_condition 1000 100 { + [s repl_backlog_histlen] < [expr 10000*10000] + } else { + fail "Replication backlog memory is not smaller" + } + exec kill -SIGCONT $replica2_pid + } + # speed up termination + $master config set shutdown-timeout 0 +} +} +} + +test {Partial resynchronization is successful even client-output-buffer-limit is less than repl-backlog-size} { + start_server {tags {"repl external:skip"}} { + start_server {} { + r config set save "" + r config set repl-backlog-size 100mb + r config set client-output-buffer-limit "replica 512k 0 0" + + set replica [srv -1 client] + $replica replicaof [srv 0 host] [srv 0 port] + wait_for_sync $replica + + set big_str [string repeat A [expr 10*1024*1024]] ;# 10mb big string + r multi + r client kill type replica + r set key $big_str + r set key $big_str + r debug sleep 2 ;# wait for replica reconnecting + r exec + # When replica reconnects with master, master accepts partial resync, + # and don't close replica client even client output buffer limit is + # reached. + r set key $big_str ;# trigger output buffer limit check + wait_for_ofs_sync r $replica + # master accepted replica partial resync + assert_equal [s sync_full] {1} + assert_equal [s sync_partial_ok] {1} + + r multi + r set key $big_str + r set key $big_str + r exec + # replica's reply buffer size is more than client-output-buffer-limit but + # doesn't exceed repl-backlog-size, we don't close replica client. + wait_for_condition 1000 100 { + [s -1 master_repl_offset] eq [s master_repl_offset] + } else { + fail "Replica offset didn't catch up with the master after too long time" + } + assert_equal [s sync_full] {1} + assert_equal [s sync_partial_ok] {1} + } + } +} + +# This test was added to make sure big keys added to the backlog do not trigger psync loop. +test {Replica client-output-buffer size is limited to backlog_limit/16 when no replication data is pending} { + proc client_field {r type f} { + set client [$r client list type $type] + if {![regexp $f=(\[a-zA-Z0-9-\]+) $client - res]} { + error "field $f not found for in $client" + } + return $res + } + + start_server {tags {"repl external:skip"}} { + start_server {} { + set replica [srv -1 client] + set replica_host [srv -1 host] + set replica_port [srv -1 port] + set master [srv 0 client] + set master_host [srv 0 host] + set master_port [srv 0 port] + + $master config set repl-backlog-size 16384 + $master config set client-output-buffer-limit "replica 32768 32768 60" + # Key has has to be larger than replica client-output-buffer limit. + set keysize [expr 256*1024] + + $replica replicaof $master_host $master_port + wait_for_condition 50 100 { + [lindex [$replica role] 0] eq {slave} && + [string match {*master_link_status:up*} [$replica info replication]] + } else { + fail "Can't turn the instance into a replica" + } + + set _v [prepare_value $keysize] + $master set key $_v + wait_for_ofs_sync $master $replica + + # Write another key to force the test to wait for another event loop iteration + # to give the serverCron a chance to disconnect replicas with COB size exceeding the limits + $master set key1 "1" + wait_for_ofs_sync $master $replica + + assert {[status $master connected_slaves] == 1} + + wait_for_condition 50 100 { + [client_field $master replica tot-mem] < $keysize + } else { + fail "replica client-output-buffer usage is higher than expected." + } + + assert {[status $master sync_partial_ok] == 0} + + # Before this fix (#11905), the test would trigger an assertion in 'o->used >= c->ref_block_pos' + test {The update of replBufBlock's repl_offset is ok - Regression test for #11666} { + set rd [redis_deferring_client] + set replid [status $master master_replid] + set offset [status $master repl_backlog_first_byte_offset] + $rd psync $replid $offset + assert_equal {PONG} [$master ping] ;# Make sure the master doesn't crash. + $rd close + } + } + } +} + diff --git a/tests/integration/replication-psync.tcl b/tests/integration/replication-psync.tcl new file mode 100644 index 0000000..16f3b88 --- /dev/null +++ b/tests/integration/replication-psync.tcl @@ -0,0 +1,143 @@ +# Creates a master-slave pair and breaks the link continuously to force +# partial resyncs attempts, all this while flooding the master with +# write queries. +# +# You can specify backlog size, ttl, delay before reconnection, test duration +# in seconds, and an additional condition to verify at the end. +# +# If reconnect is > 0, the test actually try to break the connection and +# reconnect with the master, otherwise just the initial synchronization is +# checked for consistency. +proc test_psync {descr duration backlog_size backlog_ttl delay cond mdl sdl reconnect} { + start_server {tags {"repl"}} { + start_server {} { + + set master [srv -1 client] + set master_host [srv -1 host] + set master_port [srv -1 port] + set slave [srv 0 client] + + $master config set repl-backlog-size $backlog_size + $master config set repl-backlog-ttl $backlog_ttl + $master config set repl-diskless-sync $mdl + $master config set repl-diskless-sync-delay 1 + $slave config set repl-diskless-load $sdl + + set load_handle0 [start_bg_complex_data $master_host $master_port 9 100000] + set load_handle1 [start_bg_complex_data $master_host $master_port 11 100000] + set load_handle2 [start_bg_complex_data $master_host $master_port 12 100000] + + test {Slave should be able to synchronize with the master} { + $slave slaveof $master_host $master_port + wait_for_condition 50 100 { + [lindex [r role] 0] eq {slave} && + [lindex [r role] 3] eq {connected} + } else { + fail "Replication not started." + } + } + + # Check that the background clients are actually writing. + test {Detect write load to master} { + wait_for_condition 50 1000 { + [$master dbsize] > 100 + } else { + fail "Can't detect write load from background clients." + } + } + + test "Test replication partial resync: $descr (diskless: $mdl, $sdl, reconnect: $reconnect)" { + # Now while the clients are writing data, break the maste-slave + # link multiple times. + if ($reconnect) { + for {set j 0} {$j < $duration*10} {incr j} { + after 100 + # catch {puts "MASTER [$master dbsize] keys, REPLICA [$slave dbsize] keys"} + + if {($j % 20) == 0} { + catch { + if {$delay} { + $slave multi + $slave client kill $master_host:$master_port + $slave debug sleep $delay + $slave exec + } else { + $slave client kill $master_host:$master_port + } + } + } + } + } + stop_bg_complex_data $load_handle0 + stop_bg_complex_data $load_handle1 + stop_bg_complex_data $load_handle2 + + # Wait for the slave to reach the "online" + # state from the POV of the master. + set retry 5000 + while {$retry} { + set info [$master info] + if {[string match {*slave0:*state=online*} $info]} { + break + } else { + incr retry -1 + after 100 + } + } + if {$retry == 0} { + error "assertion:Slave not correctly synchronized" + } + + # Wait that slave acknowledge it is online so + # we are sure that DBSIZE and DEBUG DIGEST will not + # fail because of timing issues. (-LOADING error) + wait_for_condition 5000 100 { + [lindex [$slave role] 3] eq {connected} + } else { + fail "Slave still not connected after some time" + } + + wait_for_condition 100 100 { + [$master debug digest] == [$slave debug digest] + } else { + set csv1 [csvdump r] + set csv2 [csvdump {r -1}] + set fd [open /tmp/repldump1.txt w] + puts -nonewline $fd $csv1 + close $fd + set fd [open /tmp/repldump2.txt w] + puts -nonewline $fd $csv2 + close $fd + fail "Master - Replica inconsistency, Run diff -u against /tmp/repldump*.txt for more info" + } + assert {[$master dbsize] > 0} + eval $cond + } + } + } +} + +tags {"external:skip"} { +foreach mdl {no yes} { + foreach sdl {disabled swapdb} { + test_psync {no reconnection, just sync} 6 1000000 3600 0 { + } $mdl $sdl 0 + + test_psync {ok psync} 6 100000000 3600 0 { + assert {[s -1 sync_partial_ok] > 0} + } $mdl $sdl 1 + + test_psync {no backlog} 6 100 3600 0.5 { + assert {[s -1 sync_partial_err] > 0} + } $mdl $sdl 1 + + test_psync {ok after delay} 3 100000000 3600 3 { + assert {[s -1 sync_partial_ok] > 0} + } $mdl $sdl 1 + + test_psync {backlog expired} 3 100000000 1 3 { + assert {[s -1 sync_partial_err] > 0} + } $mdl $sdl 1 + } +} +} diff --git a/tests/integration/replication.tcl b/tests/integration/replication.tcl new file mode 100644 index 0000000..4130080 --- /dev/null +++ b/tests/integration/replication.tcl @@ -0,0 +1,1396 @@ +proc log_file_matches {log pattern} { + set fp [open $log r] + set content [read $fp] + close $fp + string match $pattern $content +} + +start_server {tags {"repl network external:skip"}} { + set slave [srv 0 client] + set slave_host [srv 0 host] + set slave_port [srv 0 port] + set slave_log [srv 0 stdout] + start_server {} { + set master [srv 0 client] + set master_host [srv 0 host] + set master_port [srv 0 port] + + # Configure the master in order to hang waiting for the BGSAVE + # operation, so that the slave remains in the handshake state. + $master config set repl-diskless-sync yes + $master config set repl-diskless-sync-delay 1000 + + # Use a short replication timeout on the slave, so that if there + # are no bugs the timeout is triggered in a reasonable amount + # of time. + $slave config set repl-timeout 5 + + # Start the replication process... + $slave slaveof $master_host $master_port + + test {Slave enters handshake} { + wait_for_condition 50 1000 { + [string match *handshake* [$slave role]] + } else { + fail "Replica does not enter handshake state" + } + } + + # But make the master unable to send + # the periodic newlines to refresh the connection. The slave + # should detect the timeout. + $master debug sleep 10 + + test {Slave is able to detect timeout during handshake} { + wait_for_condition 50 1000 { + [log_file_matches $slave_log "*Timeout connecting to the MASTER*"] + } else { + fail "Replica is not able to detect timeout" + } + } + } +} + +start_server {tags {"repl external:skip"}} { + set A [srv 0 client] + set A_host [srv 0 host] + set A_port [srv 0 port] + start_server {} { + set B [srv 0 client] + set B_host [srv 0 host] + set B_port [srv 0 port] + + test {Set instance A as slave of B} { + $A slaveof $B_host $B_port + wait_for_condition 50 100 { + [lindex [$A role] 0] eq {slave} && + [string match {*master_link_status:up*} [$A info replication]] + } else { + fail "Can't turn the instance into a replica" + } + } + + test {INCRBYFLOAT replication, should not remove expire} { + r set test 1 EX 100 + r incrbyfloat test 0.1 + wait_for_ofs_sync $A $B + assert_equal [$A debug digest] [$B debug digest] + } + + test {GETSET replication} { + $A config resetstat + $A config set loglevel debug + $B config set loglevel debug + r set test foo + assert_equal [r getset test bar] foo + wait_for_condition 500 10 { + [$A get test] eq "bar" + } else { + fail "getset wasn't propagated" + } + assert_equal [r set test vaz get] bar + wait_for_condition 500 10 { + [$A get test] eq "vaz" + } else { + fail "set get wasn't propagated" + } + assert_match {*calls=3,*} [cmdrstat set $A] + assert_match {} [cmdrstat getset $A] + } + + test {BRPOPLPUSH replication, when blocking against empty list} { + $A config resetstat + set rd [redis_deferring_client] + $rd brpoplpush a b 5 + r lpush a foo + wait_for_condition 50 100 { + [$A debug digest] eq [$B debug digest] + } else { + fail "Master and replica have different digest: [$A debug digest] VS [$B debug digest]" + } + assert_match {*calls=1,*} [cmdrstat rpoplpush $A] + assert_match {} [cmdrstat lmove $A] + } + + test {BRPOPLPUSH replication, list exists} { + $A config resetstat + set rd [redis_deferring_client] + r lpush c 1 + r lpush c 2 + r lpush c 3 + $rd brpoplpush c d 5 + after 1000 + assert_equal [$A debug digest] [$B debug digest] + assert_match {*calls=1,*} [cmdrstat rpoplpush $A] + assert_match {} [cmdrstat lmove $A] + } + + foreach wherefrom {left right} { + foreach whereto {left right} { + test "BLMOVE ($wherefrom, $whereto) replication, when blocking against empty list" { + $A config resetstat + set rd [redis_deferring_client] + $rd blmove a b $wherefrom $whereto 5 + r lpush a foo + wait_for_condition 50 100 { + [$A debug digest] eq [$B debug digest] + } else { + fail "Master and replica have different digest: [$A debug digest] VS [$B debug digest]" + } + assert_match {*calls=1,*} [cmdrstat lmove $A] + assert_match {} [cmdrstat rpoplpush $A] + } + + test "BLMOVE ($wherefrom, $whereto) replication, list exists" { + $A config resetstat + set rd [redis_deferring_client] + r lpush c 1 + r lpush c 2 + r lpush c 3 + $rd blmove c d $wherefrom $whereto 5 + after 1000 + assert_equal [$A debug digest] [$B debug digest] + assert_match {*calls=1,*} [cmdrstat lmove $A] + assert_match {} [cmdrstat rpoplpush $A] + } + } + } + + test {BLPOP followed by role change, issue #2473} { + set rd [redis_deferring_client] + $rd blpop foo 0 ; # Block while B is a master + + # Turn B into master of A + $A slaveof no one + $B slaveof $A_host $A_port + wait_for_condition 50 100 { + [lindex [$B role] 0] eq {slave} && + [string match {*master_link_status:up*} [$B info replication]] + } else { + fail "Can't turn the instance into a replica" + } + + # Push elements into the "foo" list of the new replica. + # If the client is still attached to the instance, we'll get + # a desync between the two instances. + $A rpush foo a b c + after 100 + + wait_for_condition 50 100 { + [$A debug digest] eq [$B debug digest] && + [$A lrange foo 0 -1] eq {a b c} && + [$B lrange foo 0 -1] eq {a b c} + } else { + fail "Master and replica have different digest: [$A debug digest] VS [$B debug digest]" + } + } + } +} + +start_server {tags {"repl external:skip"}} { + r set mykey foo + + start_server {} { + test {Second server should have role master at first} { + s role + } {master} + + test {SLAVEOF should start with link status "down"} { + r multi + r slaveof [srv -1 host] [srv -1 port] + r info replication + r exec + } {*master_link_status:down*} + + test {The role should immediately be changed to "replica"} { + s role + } {slave} + + wait_for_sync r + test {Sync should have transferred keys from master} { + r get mykey + } {foo} + + test {The link status should be up} { + s master_link_status + } {up} + + test {SET on the master should immediately propagate} { + r -1 set mykey bar + + wait_for_condition 500 100 { + [r 0 get mykey] eq {bar} + } else { + fail "SET on master did not propagated on replica" + } + } + + test {FLUSHDB / FLUSHALL should replicate} { + set repl [attach_to_replication_stream] + + r -1 set key value + r -1 flushdb + + r -1 set key value2 + r -1 flushall + + wait_for_ofs_sync [srv 0 client] [srv -1 client] + assert_equal [r -1 dbsize] 0 + assert_equal [r 0 dbsize] 0 + + # DB is empty. + r -1 flushdb + r -1 flushdb + r -1 flushdb + + # DBs are empty. + r -1 flushall + r -1 flushall + r -1 flushall + + # Assert that each FLUSHDB command is replicated even the DB is empty. + # Assert that each FLUSHALL command is replicated even the DBs are empty. + assert_replication_stream $repl { + {set key value} + {flushdb} + {set key value2} + {flushall} + {flushdb} + {flushdb} + {flushdb} + {flushall} + {flushall} + {flushall} + } + close_replication_stream $repl + } + + test {ROLE in master reports master with a slave} { + set res [r -1 role] + lassign $res role offset slaves + assert {$role eq {master}} + assert {$offset > 0} + assert {[llength $slaves] == 1} + lassign [lindex $slaves 0] master_host master_port slave_offset + assert {$slave_offset <= $offset} + } + + test {ROLE in slave reports slave in connected state} { + set res [r role] + lassign $res role master_host master_port slave_state slave_offset + assert {$role eq {slave}} + assert {$slave_state eq {connected}} + } + } +} + +foreach mdl {no yes} { + foreach sdl {disabled swapdb} { + start_server {tags {"repl external:skip"}} { + set master [srv 0 client] + $master config set repl-diskless-sync $mdl + $master config set repl-diskless-sync-delay 5 + $master config set repl-diskless-sync-max-replicas 3 + set master_host [srv 0 host] + set master_port [srv 0 port] + set slaves {} + start_server {} { + lappend slaves [srv 0 client] + start_server {} { + lappend slaves [srv 0 client] + start_server {} { + lappend slaves [srv 0 client] + test "Connect multiple replicas at the same time (issue #141), master diskless=$mdl, replica diskless=$sdl" { + # start load handles only inside the test, so that the test can be skipped + set load_handle0 [start_bg_complex_data $master_host $master_port 9 100000000] + set load_handle1 [start_bg_complex_data $master_host $master_port 11 100000000] + set load_handle2 [start_bg_complex_data $master_host $master_port 12 100000000] + set load_handle3 [start_write_load $master_host $master_port 8] + set load_handle4 [start_write_load $master_host $master_port 4] + after 5000 ;# wait for some data to accumulate so that we have RDB part for the fork + + # Send SLAVEOF commands to slaves + [lindex $slaves 0] config set repl-diskless-load $sdl + [lindex $slaves 1] config set repl-diskless-load $sdl + [lindex $slaves 2] config set repl-diskless-load $sdl + [lindex $slaves 0] slaveof $master_host $master_port + [lindex $slaves 1] slaveof $master_host $master_port + [lindex $slaves 2] slaveof $master_host $master_port + + # Wait for all the three slaves to reach the "online" + # state from the POV of the master. + set retry 500 + while {$retry} { + set info [r -3 info] + if {[string match {*slave0:*state=online*slave1:*state=online*slave2:*state=online*} $info]} { + break + } else { + incr retry -1 + after 100 + } + } + if {$retry == 0} { + error "assertion:Slaves not correctly synchronized" + } + + # Wait that slaves acknowledge they are online so + # we are sure that DBSIZE and DEBUG DIGEST will not + # fail because of timing issues. + wait_for_condition 500 100 { + [lindex [[lindex $slaves 0] role] 3] eq {connected} && + [lindex [[lindex $slaves 1] role] 3] eq {connected} && + [lindex [[lindex $slaves 2] role] 3] eq {connected} + } else { + fail "Slaves still not connected after some time" + } + + # Stop the write load + stop_bg_complex_data $load_handle0 + stop_bg_complex_data $load_handle1 + stop_bg_complex_data $load_handle2 + stop_write_load $load_handle3 + stop_write_load $load_handle4 + + # Make sure no more commands processed + wait_load_handlers_disconnected -3 + + wait_for_ofs_sync $master [lindex $slaves 0] + wait_for_ofs_sync $master [lindex $slaves 1] + wait_for_ofs_sync $master [lindex $slaves 2] + + # Check digests + set digest [$master debug digest] + set digest0 [[lindex $slaves 0] debug digest] + set digest1 [[lindex $slaves 1] debug digest] + set digest2 [[lindex $slaves 2] debug digest] + assert {$digest ne 0000000000000000000000000000000000000000} + assert {$digest eq $digest0} + assert {$digest eq $digest1} + assert {$digest eq $digest2} + } + } + } + } + } + } +} + +start_server {tags {"repl external:skip"}} { + set master [srv 0 client] + set master_host [srv 0 host] + set master_port [srv 0 port] + start_server {} { + test "Master stream is correctly processed while the replica has a script in -BUSY state" { + set load_handle0 [start_write_load $master_host $master_port 3] + set slave [srv 0 client] + $slave config set lua-time-limit 500 + $slave slaveof $master_host $master_port + + # Wait for the slave to be online + wait_for_condition 500 100 { + [lindex [$slave role] 3] eq {connected} + } else { + fail "Replica still not connected after some time" + } + + # Wait some time to make sure the master is sending data + # to the slave. + after 5000 + + # Stop the ability of the slave to process data by sendig + # a script that will put it in BUSY state. + $slave eval {for i=1,3000000000 do end} 0 + + # Wait some time again so that more master stream will + # be processed. + after 2000 + + # Stop the write load + stop_write_load $load_handle0 + + # number of keys + wait_for_condition 500 100 { + [$master debug digest] eq [$slave debug digest] + } else { + fail "Different datasets between replica and master" + } + } + } +} + +# Diskless load swapdb when NOT async_loading (different master replid) +foreach testType {Successful Aborted} { + start_server {tags {"repl external:skip"}} { + set replica [srv 0 client] + set replica_host [srv 0 host] + set replica_port [srv 0 port] + set replica_log [srv 0 stdout] + start_server {} { + set master [srv 0 client] + set master_host [srv 0 host] + set master_port [srv 0 port] + + # Set master and replica to use diskless replication on swapdb mode + $master config set repl-diskless-sync yes + $master config set repl-diskless-sync-delay 0 + $master config set save "" + $replica config set repl-diskless-load swapdb + $replica config set save "" + + # Put different data sets on the master and replica + # We need to put large keys on the master since the replica replies to info only once in 2mb + $replica debug populate 200 slave 10 + $master debug populate 1000 master 100000 + $master config set rdbcompression no + + # Set a key value on replica to check status on failure and after swapping db + $replica set mykey myvalue + + switch $testType { + "Aborted" { + # Set master with a slow rdb generation, so that we can easily intercept loading + # 10ms per key, with 1000 keys is 10 seconds + $master config set rdb-key-save-delay 10000 + + # Start the replication process + $replica replicaof $master_host $master_port + + test {Diskless load swapdb (different replid): replica enter loading} { + # Wait for the replica to start reading the rdb + wait_for_condition 100 100 { + [s -1 loading] eq 1 + } else { + fail "Replica didn't get into loading mode" + } + + assert_equal [s -1 async_loading] 0 + } + + # Make sure that next sync will not start immediately so that we can catch the replica in between syncs + $master config set repl-diskless-sync-delay 5 + + # Kill the replica connection on the master + set killed [$master client kill type replica] + + # Wait for loading to stop (fail) + wait_for_condition 100 100 { + [s -1 loading] eq 0 + } else { + fail "Replica didn't disconnect" + } + + test {Diskless load swapdb (different replid): old database is exposed after replication fails} { + # Ensure we see old values from replica + assert_equal [$replica get mykey] "myvalue" + + # Make sure amount of replica keys didn't change + assert_equal [$replica dbsize] 201 + } + + # Speed up shutdown + $master config set rdb-key-save-delay 0 + } + "Successful" { + # Start the replication process + $replica replicaof $master_host $master_port + + # Let replica finish sync with master + wait_for_condition 100 100 { + [s -1 master_link_status] eq "up" + } else { + fail "Master <-> Replica didn't finish sync" + } + + test {Diskless load swapdb (different replid): new database is exposed after swapping} { + # Ensure we don't see anymore the key that was stored only to replica and also that we don't get LOADING status + assert_equal [$replica GET mykey] "" + + # Make sure amount of keys matches master + assert_equal [$replica dbsize] 1000 + } + } + } + } + } +} + +# Diskless load swapdb when async_loading (matching master replid) +foreach testType {Successful Aborted} { + start_server {tags {"repl external:skip"}} { + set replica [srv 0 client] + set replica_host [srv 0 host] + set replica_port [srv 0 port] + set replica_log [srv 0 stdout] + start_server {} { + set master [srv 0 client] + set master_host [srv 0 host] + set master_port [srv 0 port] + + # Set master and replica to use diskless replication on swapdb mode + $master config set repl-diskless-sync yes + $master config set repl-diskless-sync-delay 0 + $master config set save "" + $replica config set repl-diskless-load swapdb + $replica config set save "" + + # Set replica writable so we can check that a key we manually added is served + # during replication and after failure, but disappears on success + $replica config set replica-read-only no + + # Initial sync to have matching replids between master and replica + $replica replicaof $master_host $master_port + + # Let replica finish initial sync with master + wait_for_condition 100 100 { + [s -1 master_link_status] eq "up" + } else { + fail "Master <-> Replica didn't finish sync" + } + + # Put different data sets on the master and replica + # We need to put large keys on the master since the replica replies to info only once in 2mb + $replica debug populate 2000 slave 10 + $master debug populate 2000 master 100000 + $master config set rdbcompression no + + # Set a key value on replica to check status during loading, on failure and after swapping db + $replica set mykey myvalue + + # Set a function value on replica to check status during loading, on failure and after swapping db + $replica function load {#!lua name=test + redis.register_function('test', function() return 'hello1' end) + } + + # Set a function value on master to check it reaches the replica when replication ends + $master function load {#!lua name=test + redis.register_function('test', function() return 'hello2' end) + } + + # Force the replica to try another full sync (this time it will have matching master replid) + $master multi + $master client kill type replica + # Fill replication backlog with new content + $master config set repl-backlog-size 16384 + for {set keyid 0} {$keyid < 10} {incr keyid} { + $master set "$keyid string_$keyid" [string repeat A 16384] + } + $master exec + + switch $testType { + "Aborted" { + # Set master with a slow rdb generation, so that we can easily intercept loading + # 10ms per key, with 2000 keys is 20 seconds + $master config set rdb-key-save-delay 10000 + + test {Diskless load swapdb (async_loading): replica enter async_loading} { + # Wait for the replica to start reading the rdb + wait_for_condition 100 100 { + [s -1 async_loading] eq 1 + } else { + fail "Replica didn't get into async_loading mode" + } + + assert_equal [s -1 loading] 0 + } + + test {Diskless load swapdb (async_loading): old database is exposed while async replication is in progress} { + # Ensure we still see old values while async_loading is in progress and also not LOADING status + assert_equal [$replica get mykey] "myvalue" + + # Ensure we still can call old function while async_loading is in progress + assert_equal [$replica fcall test 0] "hello1" + + # Make sure we're still async_loading to validate previous assertion + assert_equal [s -1 async_loading] 1 + + # Make sure amount of replica keys didn't change + assert_equal [$replica dbsize] 2001 + } + + test {Busy script during async loading} { + set rd_replica [redis_deferring_client -1] + $replica config set lua-time-limit 10 + $rd_replica eval {while true do end} 0 + after 200 + assert_error {BUSY*} {$replica ping} + $replica script kill + after 200 ; # Give some time to Lua to call the hook again... + assert_equal [$replica ping] "PONG" + $rd_replica close + } + + test {Blocked commands and configs during async-loading} { + assert_error {LOADING*} {$replica config set appendonly no} + assert_error {LOADING*} {$replica REPLICAOF no one} + } + + # Make sure that next sync will not start immediately so that we can catch the replica in between syncs + $master config set repl-diskless-sync-delay 5 + + # Kill the replica connection on the master + set killed [$master client kill type replica] + + # Wait for loading to stop (fail) + wait_for_condition 100 100 { + [s -1 async_loading] eq 0 + } else { + fail "Replica didn't disconnect" + } + + test {Diskless load swapdb (async_loading): old database is exposed after async replication fails} { + # Ensure we see old values from replica + assert_equal [$replica get mykey] "myvalue" + + # Ensure we still can call old function + assert_equal [$replica fcall test 0] "hello1" + + # Make sure amount of replica keys didn't change + assert_equal [$replica dbsize] 2001 + } + + # Speed up shutdown + $master config set rdb-key-save-delay 0 + } + "Successful" { + # Let replica finish sync with master + wait_for_condition 100 100 { + [s -1 master_link_status] eq "up" + } else { + fail "Master <-> Replica didn't finish sync" + } + + test {Diskless load swapdb (async_loading): new database is exposed after swapping} { + # Ensure we don't see anymore the key that was stored only to replica and also that we don't get LOADING status + assert_equal [$replica GET mykey] "" + + # Ensure we got the new function + assert_equal [$replica fcall test 0] "hello2" + + # Make sure amount of keys matches master + assert_equal [$replica dbsize] 2010 + } + } + } + } + } +} + +test {diskless loading short read} { + start_server {tags {"repl"}} { + set replica [srv 0 client] + set replica_host [srv 0 host] + set replica_port [srv 0 port] + start_server {} { + set master [srv 0 client] + set master_host [srv 0 host] + set master_port [srv 0 port] + + # Set master and replica to use diskless replication + $master config set repl-diskless-sync yes + $master config set rdbcompression no + $replica config set repl-diskless-load swapdb + $master config set hz 500 + $replica config set hz 500 + $master config set dynamic-hz no + $replica config set dynamic-hz no + # Try to fill the master with all types of data types / encodings + set start [clock clicks -milliseconds] + + # Set a function value to check short read handling on functions + r function load {#!lua name=test + redis.register_function('test', function() return 'hello1' end) + } + + for {set k 0} {$k < 3} {incr k} { + for {set i 0} {$i < 10} {incr i} { + r set "$k int_$i" [expr {int(rand()*10000)}] + r expire "$k int_$i" [expr {int(rand()*10000)}] + r set "$k string_$i" [string repeat A [expr {int(rand()*1000000)}]] + r hset "$k hash_small" [string repeat A [expr {int(rand()*10)}]] 0[string repeat A [expr {int(rand()*10)}]] + r hset "$k hash_large" [string repeat A [expr {int(rand()*10000)}]] [string repeat A [expr {int(rand()*1000000)}]] + r sadd "$k set_small" [string repeat A [expr {int(rand()*10)}]] + r sadd "$k set_large" [string repeat A [expr {int(rand()*1000000)}]] + r zadd "$k zset_small" [expr {rand()}] [string repeat A [expr {int(rand()*10)}]] + r zadd "$k zset_large" [expr {rand()}] [string repeat A [expr {int(rand()*1000000)}]] + r lpush "$k list_small" [string repeat A [expr {int(rand()*10)}]] + r lpush "$k list_large" [string repeat A [expr {int(rand()*1000000)}]] + for {set j 0} {$j < 10} {incr j} { + r xadd "$k stream" * foo "asdf" bar "1234" + } + r xgroup create "$k stream" "mygroup_$i" 0 + r xreadgroup GROUP "mygroup_$i" Alice COUNT 1 STREAMS "$k stream" > + } + } + + if {$::verbose} { + set end [clock clicks -milliseconds] + set duration [expr $end - $start] + puts "filling took $duration ms (TODO: use pipeline)" + set start [clock clicks -milliseconds] + } + + # Start the replication process... + set loglines [count_log_lines -1] + $master config set repl-diskless-sync-delay 0 + $replica replicaof $master_host $master_port + + # kill the replication at various points + set attempts 100 + if {$::accurate} { set attempts 500 } + for {set i 0} {$i < $attempts} {incr i} { + # wait for the replica to start reading the rdb + # using the log file since the replica only responds to INFO once in 2mb + set res [wait_for_log_messages -1 {"*Loading DB in memory*"} $loglines 2000 1] + set loglines [lindex $res 1] + + # add some additional random sleep so that we kill the master on a different place each time + after [expr {int(rand()*50)}] + + # kill the replica connection on the master + set killed [$master client kill type replica] + + set res [wait_for_log_messages -1 {"*Internal error in RDB*" "*Finished with success*" "*Successful partial resynchronization*"} $loglines 500 10] + if {$::verbose} { puts $res } + set log_text [lindex $res 0] + set loglines [lindex $res 1] + if {![string match "*Internal error in RDB*" $log_text]} { + # force the replica to try another full sync + $master multi + $master client kill type replica + $master set asdf asdf + # fill replication backlog with new content + $master config set repl-backlog-size 16384 + for {set keyid 0} {$keyid < 10} {incr keyid} { + $master set "$keyid string_$keyid" [string repeat A 16384] + } + $master exec + } + + # wait for loading to stop (fail) + # After a loading successfully, next loop will enter `async_loading` + wait_for_condition 1000 1 { + [s -1 async_loading] eq 0 && + [s -1 loading] eq 0 + } else { + fail "Replica didn't disconnect" + } + } + if {$::verbose} { + set end [clock clicks -milliseconds] + set duration [expr $end - $start] + puts "test took $duration ms" + } + # enable fast shutdown + $master config set rdb-key-save-delay 0 + } + } +} {} {external:skip} + +# get current stime and utime metrics for a thread (since it's creation) +proc get_cpu_metrics { statfile } { + if { [ catch { + set fid [ open $statfile r ] + set data [ read $fid 1024 ] + ::close $fid + set data [ split $data ] + + ;## number of jiffies it has been scheduled... + set utime [ lindex $data 13 ] + set stime [ lindex $data 14 ] + } err ] } { + error "assertion:can't parse /proc: $err" + } + set mstime [clock milliseconds] + return [ list $mstime $utime $stime ] +} + +# compute %utime and %stime of a thread between two measurements +proc compute_cpu_usage {start end} { + set clock_ticks [exec getconf CLK_TCK] + # convert ms time to jiffies and calc delta + set dtime [ expr { ([lindex $end 0] - [lindex $start 0]) * double($clock_ticks) / 1000 } ] + set utime [ expr { [lindex $end 1] - [lindex $start 1] } ] + set stime [ expr { [lindex $end 2] - [lindex $start 2] } ] + set pucpu [ expr { ($utime / $dtime) * 100 } ] + set pscpu [ expr { ($stime / $dtime) * 100 } ] + return [ list $pucpu $pscpu ] +} + + +# test diskless rdb pipe with multiple replicas, which may drop half way +start_server {tags {"repl external:skip"}} { + set master [srv 0 client] + $master config set repl-diskless-sync yes + $master config set repl-diskless-sync-delay 5 + $master config set repl-diskless-sync-max-replicas 2 + set master_host [srv 0 host] + set master_port [srv 0 port] + set master_pid [srv 0 pid] + # put enough data in the db that the rdb file will be bigger than the socket buffers + # and since we'll have key-load-delay of 100, 20000 keys will take at least 2 seconds + # we also need the replica to process requests during transfer (which it does only once in 2mb) + $master debug populate 20000 test 10000 + $master config set rdbcompression no + # If running on Linux, we also measure utime/stime to detect possible I/O handling issues + set os [catch {exec uname}] + set measure_time [expr {$os == "Linux"} ? 1 : 0] + foreach all_drop {no slow fast all timeout} { + test "diskless $all_drop replicas drop during rdb pipe" { + set replicas {} + set replicas_alive {} + # start one replica that will read the rdb fast, and one that will be slow + start_server {} { + lappend replicas [srv 0 client] + lappend replicas_alive [srv 0 client] + start_server {} { + lappend replicas [srv 0 client] + lappend replicas_alive [srv 0 client] + + # start replication + # it's enough for just one replica to be slow, and have it's write handler enabled + # so that the whole rdb generation process is bound to that + set loglines [count_log_lines -2] + [lindex $replicas 0] config set repl-diskless-load swapdb + [lindex $replicas 0] config set key-load-delay 100 ;# 20k keys and 100 microseconds sleep means at least 2 seconds + [lindex $replicas 0] replicaof $master_host $master_port + [lindex $replicas 1] replicaof $master_host $master_port + + # wait for the replicas to start reading the rdb + # using the log file since the replica only responds to INFO once in 2mb + wait_for_log_messages -1 {"*Loading DB in memory*"} 0 800 10 + + if {$measure_time} { + set master_statfile "/proc/$master_pid/stat" + set master_start_metrics [get_cpu_metrics $master_statfile] + set start_time [clock seconds] + } + + # wait a while so that the pipe socket writer will be + # blocked on write (since replica 0 is slow to read from the socket) + after 500 + + # add some command to be present in the command stream after the rdb. + $master incr $all_drop + + # disconnect replicas depending on the current test + if {$all_drop == "all" || $all_drop == "fast"} { + exec kill [srv 0 pid] + set replicas_alive [lreplace $replicas_alive 1 1] + } + if {$all_drop == "all" || $all_drop == "slow"} { + exec kill [srv -1 pid] + set replicas_alive [lreplace $replicas_alive 0 0] + } + if {$all_drop == "timeout"} { + $master config set repl-timeout 2 + # we want the slow replica to hang on a key for very long so it'll reach repl-timeout + exec kill -SIGSTOP [srv -1 pid] + after 2000 + } + + # wait for rdb child to exit + wait_for_condition 500 100 { + [s -2 rdb_bgsave_in_progress] == 0 + } else { + fail "rdb child didn't terminate" + } + + # make sure we got what we were aiming for, by looking for the message in the log file + if {$all_drop == "all"} { + wait_for_log_messages -2 {"*Diskless rdb transfer, last replica dropped, killing fork child*"} $loglines 1 1 + } + if {$all_drop == "no"} { + wait_for_log_messages -2 {"*Diskless rdb transfer, done reading from pipe, 2 replicas still up*"} $loglines 1 1 + } + if {$all_drop == "slow" || $all_drop == "fast"} { + wait_for_log_messages -2 {"*Diskless rdb transfer, done reading from pipe, 1 replicas still up*"} $loglines 1 1 + } + if {$all_drop == "timeout"} { + wait_for_log_messages -2 {"*Disconnecting timedout replica (full sync)*"} $loglines 1 1 + wait_for_log_messages -2 {"*Diskless rdb transfer, done reading from pipe, 1 replicas still up*"} $loglines 1 1 + # master disconnected the slow replica, remove from array + set replicas_alive [lreplace $replicas_alive 0 0] + # release it + exec kill -SIGCONT [srv -1 pid] + } + + # make sure we don't have a busy loop going thought epoll_wait + if {$measure_time} { + set master_end_metrics [get_cpu_metrics $master_statfile] + set time_elapsed [expr {[clock seconds]-$start_time}] + set master_cpu [compute_cpu_usage $master_start_metrics $master_end_metrics] + set master_utime [lindex $master_cpu 0] + set master_stime [lindex $master_cpu 1] + if {$::verbose} { + puts "elapsed: $time_elapsed" + puts "master utime: $master_utime" + puts "master stime: $master_stime" + } + if {!$::no_latency && ($all_drop == "all" || $all_drop == "slow" || $all_drop == "timeout")} { + assert {$master_utime < 70} + assert {$master_stime < 70} + } + if {!$::no_latency && ($all_drop == "none" || $all_drop == "fast")} { + assert {$master_utime < 15} + assert {$master_stime < 15} + } + } + + # verify the data integrity + foreach replica $replicas_alive { + # Wait that replicas acknowledge they are online so + # we are sure that DBSIZE and DEBUG DIGEST will not + # fail because of timing issues. + wait_for_condition 150 100 { + [lindex [$replica role] 3] eq {connected} + } else { + fail "replicas still not connected after some time" + } + + # Make sure that replicas and master have same + # number of keys + wait_for_condition 50 100 { + [$master dbsize] == [$replica dbsize] + } else { + fail "Different number of keys between master and replicas after too long time." + } + + # Check digests + set digest [$master debug digest] + set digest0 [$replica debug digest] + assert {$digest ne 0000000000000000000000000000000000000000} + assert {$digest eq $digest0} + } + } + } + } + } +} + +test "diskless replication child being killed is collected" { + # when diskless master is waiting for the replica to become writable + # it removes the read event from the rdb pipe so if the child gets killed + # the replica will hung. and the master may not collect the pid with waitpid + start_server {tags {"repl"}} { + set master [srv 0 client] + set master_host [srv 0 host] + set master_port [srv 0 port] + set master_pid [srv 0 pid] + $master config set repl-diskless-sync yes + $master config set repl-diskless-sync-delay 0 + # put enough data in the db that the rdb file will be bigger than the socket buffers + $master debug populate 20000 test 10000 + $master config set rdbcompression no + start_server {} { + set replica [srv 0 client] + set loglines [count_log_lines 0] + $replica config set repl-diskless-load swapdb + $replica config set key-load-delay 1000000 + $replica config set loading-process-events-interval-bytes 1024 + $replica replicaof $master_host $master_port + + # wait for the replicas to start reading the rdb + wait_for_log_messages 0 {"*Loading DB in memory*"} $loglines 800 10 + + # wait to be sure the replica is hung and the master is blocked on write + after 500 + + # simulate the OOM killer or anyone else kills the child + set fork_child_pid [get_child_pid -1] + exec kill -9 $fork_child_pid + + # wait for the parent to notice the child have exited + wait_for_condition 50 100 { + [s -1 rdb_bgsave_in_progress] == 0 + } else { + fail "rdb child didn't terminate" + } + + # Speed up shutdown + $replica config set key-load-delay 0 + } + } +} {} {external:skip} + +foreach mdl {yes no} { + test "replication child dies when parent is killed - diskless: $mdl" { + # when master is killed, make sure the fork child can detect that and exit + start_server {tags {"repl"}} { + set master [srv 0 client] + set master_host [srv 0 host] + set master_port [srv 0 port] + set master_pid [srv 0 pid] + $master config set repl-diskless-sync $mdl + $master config set repl-diskless-sync-delay 0 + # create keys that will take 10 seconds to save + $master config set rdb-key-save-delay 1000 + $master debug populate 10000 + start_server {} { + set replica [srv 0 client] + $replica replicaof $master_host $master_port + + # wait for rdb child to start + wait_for_condition 5000 10 { + [s -1 rdb_bgsave_in_progress] == 1 + } else { + fail "rdb child didn't start" + } + set fork_child_pid [get_child_pid -1] + + # simulate the OOM killer or anyone else kills the parent + exec kill -9 $master_pid + + # wait for the child to notice the parent died have exited + wait_for_condition 500 10 { + [process_is_alive $fork_child_pid] == 0 + } else { + fail "rdb child didn't terminate" + } + } + } + } {} {external:skip} +} + +test "diskless replication read pipe cleanup" { + # In diskless replication, we create a read pipe for the RDB, between the child and the parent. + # When we close this pipe (fd), the read handler also needs to be removed from the event loop (if it still registered). + # Otherwise, next time we will use the same fd, the registration will be fail (panic), because + # we will use EPOLL_CTL_MOD (the fd still register in the event loop), on fd that already removed from epoll_ctl + start_server {tags {"repl"}} { + set master [srv 0 client] + set master_host [srv 0 host] + set master_port [srv 0 port] + set master_pid [srv 0 pid] + $master config set repl-diskless-sync yes + $master config set repl-diskless-sync-delay 0 + + # put enough data in the db, and slowdown the save, to keep the parent busy at the read process + $master config set rdb-key-save-delay 100000 + $master debug populate 20000 test 10000 + $master config set rdbcompression no + start_server {} { + set replica [srv 0 client] + set loglines [count_log_lines 0] + $replica config set repl-diskless-load swapdb + $replica replicaof $master_host $master_port + + # wait for the replicas to start reading the rdb + wait_for_log_messages 0 {"*Loading DB in memory*"} $loglines 800 10 + + set loglines [count_log_lines -1] + # send FLUSHALL so the RDB child will be killed + $master flushall + + # wait for another RDB child process to be started + wait_for_log_messages -1 {"*Background RDB transfer started by pid*"} $loglines 800 10 + + # make sure master is alive + $master ping + } + } +} {} {external:skip} + +test {replicaof right after disconnection} { + # this is a rare race condition that was reproduced sporadically by the psync2 unit. + # see details in #7205 + start_server {tags {"repl"}} { + set replica1 [srv 0 client] + set replica1_host [srv 0 host] + set replica1_port [srv 0 port] + set replica1_log [srv 0 stdout] + start_server {} { + set replica2 [srv 0 client] + set replica2_host [srv 0 host] + set replica2_port [srv 0 port] + set replica2_log [srv 0 stdout] + start_server {} { + set master [srv 0 client] + set master_host [srv 0 host] + set master_port [srv 0 port] + $replica1 replicaof $master_host $master_port + $replica2 replicaof $master_host $master_port + + wait_for_condition 50 100 { + [string match {*master_link_status:up*} [$replica1 info replication]] && + [string match {*master_link_status:up*} [$replica2 info replication]] + } else { + fail "Can't turn the instance into a replica" + } + + set rd [redis_deferring_client -1] + $rd debug sleep 1 + after 100 + + # when replica2 will wake up from the sleep it will find both disconnection + # from it's master and also a replicaof command at the same event loop + $master client kill type replica + $replica2 replicaof $replica1_host $replica1_port + $rd read + + wait_for_condition 50 100 { + [string match {*master_link_status:up*} [$replica2 info replication]] + } else { + fail "role change failed." + } + + # make sure psync succeeded, and there were no unexpected full syncs. + assert_equal [status $master sync_full] 2 + assert_equal [status $replica1 sync_full] 0 + assert_equal [status $replica2 sync_full] 0 + } + } + } +} {} {external:skip} + +test {Kill rdb child process if its dumping RDB is not useful} { + start_server {tags {"repl"}} { + set slave1 [srv 0 client] + start_server {} { + set slave2 [srv 0 client] + start_server {} { + set master [srv 0 client] + set master_host [srv 0 host] + set master_port [srv 0 port] + for {set i 0} {$i < 10} {incr i} { + $master set $i $i + } + # Generating RDB will cost 10s(10 * 1s) + $master config set rdb-key-save-delay 1000000 + $master config set repl-diskless-sync no + $master config set save "" + + $slave1 slaveof $master_host $master_port + $slave2 slaveof $master_host $master_port + + # Wait for starting child + wait_for_condition 50 100 { + ([s 0 rdb_bgsave_in_progress] == 1) && + ([string match "*wait_bgsave*" [s 0 slave0]]) && + ([string match "*wait_bgsave*" [s 0 slave1]]) + } else { + fail "rdb child didn't start" + } + + # Slave1 disconnect with master + $slave1 slaveof no one + # Shouldn't kill child since another slave wait for rdb + after 100 + assert {[s 0 rdb_bgsave_in_progress] == 1} + + # Slave2 disconnect with master + $slave2 slaveof no one + # Should kill child + wait_for_condition 100 10 { + [s 0 rdb_bgsave_in_progress] eq 0 + } else { + fail "can't kill rdb child" + } + + # If have save parameters, won't kill child + $master config set save "900 1" + $slave1 slaveof $master_host $master_port + $slave2 slaveof $master_host $master_port + wait_for_condition 50 100 { + ([s 0 rdb_bgsave_in_progress] == 1) && + ([string match "*wait_bgsave*" [s 0 slave0]]) && + ([string match "*wait_bgsave*" [s 0 slave1]]) + } else { + fail "rdb child didn't start" + } + $slave1 slaveof no one + $slave2 slaveof no one + after 200 + assert {[s 0 rdb_bgsave_in_progress] == 1} + catch {$master shutdown nosave} + } + } + } +} {} {external:skip} + +start_server {tags {"repl external:skip"}} { + set master1_host [srv 0 host] + set master1_port [srv 0 port] + r set a b + + start_server {} { + set master2 [srv 0 client] + set master2_host [srv 0 host] + set master2_port [srv 0 port] + # Take 10s for dumping RDB + $master2 debug populate 10 master2 10 + $master2 config set rdb-key-save-delay 1000000 + + start_server {} { + set sub_replica [srv 0 client] + + start_server {} { + # Full sync with master1 + r slaveof $master1_host $master1_port + wait_for_sync r + assert_equal "b" [r get a] + + # Let sub replicas sync with me + $sub_replica slaveof [srv 0 host] [srv 0 port] + wait_for_sync $sub_replica + assert_equal "b" [$sub_replica get a] + + # Full sync with master2, and then kill master2 before finishing dumping RDB + r slaveof $master2_host $master2_port + wait_for_condition 50 100 { + ([s -2 rdb_bgsave_in_progress] == 1) && + ([string match "*wait_bgsave*" [s -2 slave0]]) + } else { + fail "full sync didn't start" + } + catch {$master2 shutdown nosave} + + test {Don't disconnect with replicas before loading transferred RDB when full sync} { + assert ![log_file_matches [srv -1 stdout] "*Connection with master lost*"] + # The replication id is not changed in entire replication chain + assert_equal [s master_replid] [s -3 master_replid] + assert_equal [s master_replid] [s -1 master_replid] + } + + test {Discard cache master before loading transferred RDB when full sync} { + set full_sync [s -3 sync_full] + set partial_sync [s -3 sync_partial_ok] + # Partial sync with master1 + r slaveof $master1_host $master1_port + wait_for_sync r + # master1 accepts partial sync instead of full sync + assert_equal $full_sync [s -3 sync_full] + assert_equal [expr $partial_sync+1] [s -3 sync_partial_ok] + + # Since master only partially sync replica, and repl id is not changed, + # the replica doesn't disconnect with its sub-replicas + assert_equal [s master_replid] [s -3 master_replid] + assert_equal [s master_replid] [s -1 master_replid] + assert ![log_file_matches [srv -1 stdout] "*Connection with master lost*"] + # Sub replica just has one full sync, no partial resync. + assert_equal 1 [s sync_full] + assert_equal 0 [s sync_partial_ok] + } + } + } + } +} + +test {replica can handle EINTR if use diskless load} { + start_server {tags {"repl"}} { + set replica [srv 0 client] + set replica_log [srv 0 stdout] + start_server {} { + set master [srv 0 client] + set master_host [srv 0 host] + set master_port [srv 0 port] + + $master debug populate 100 master 100000 + $master config set rdbcompression no + $master config set repl-diskless-sync yes + $master config set repl-diskless-sync-delay 0 + $replica config set repl-diskless-load on-empty-db + # Construct EINTR error by using the built in watchdog + $replica config set watchdog-period 200 + # Block replica in read() + $master config set rdb-key-save-delay 10000 + # set speedy shutdown + $master config set save "" + # Start the replication process... + $replica replicaof $master_host $master_port + + # Wait for the replica to start reading the rdb + set res [wait_for_log_messages -1 {"*Loading DB in memory*"} 0 200 10] + set loglines [lindex $res 1] + + # Wait till we see the watchgod log line AFTER the loading started + wait_for_log_messages -1 {"*WATCHDOG TIMER EXPIRED*"} $loglines 200 10 + + # Make sure we're still loading, and that there was just one full sync attempt + assert ![log_file_matches [srv -1 stdout] "*Reconnecting to MASTER*"] + assert_equal 1 [s 0 sync_full] + assert_equal 1 [s -1 loading] + } + } +} {} {external:skip} + +start_server {tags {"repl" "external:skip"}} { + test "replica do not write the reply to the replication link - SYNC (_addReplyToBufferOrList)" { + set rd [redis_deferring_client] + set lines [count_log_lines 0] + + $rd sync + $rd ping + catch {$rd read} e + if {$::verbose} { puts "SYNC _addReplyToBufferOrList: $e" } + assert_equal "PONG" [r ping] + + # Check we got the warning logs about the PING command. + verify_log_message 0 "*Replica generated a reply to command 'ping', disconnecting it: *" $lines + + $rd close + waitForBgsave r + } + + test "replica do not write the reply to the replication link - SYNC (addReplyDeferredLen)" { + set rd [redis_deferring_client] + set lines [count_log_lines 0] + + $rd sync + $rd xinfo help + catch {$rd read} e + if {$::verbose} { puts "SYNC addReplyDeferredLen: $e" } + assert_equal "PONG" [r ping] + + # Check we got the warning logs about the XINFO HELP command. + verify_log_message 0 "*Replica generated a reply to command 'xinfo|help', disconnecting it: *" $lines + + $rd close + waitForBgsave r + } + + test "replica do not write the reply to the replication link - PSYNC (_addReplyToBufferOrList)" { + set rd [redis_deferring_client] + set lines [count_log_lines 0] + + $rd psync replicationid -1 + assert_match {FULLRESYNC * 0} [$rd read] + $rd get foo + catch {$rd read} e + if {$::verbose} { puts "PSYNC _addReplyToBufferOrList: $e" } + assert_equal "PONG" [r ping] + + # Check we got the warning logs about the GET command. + verify_log_message 0 "*Replica generated a reply to command 'get', disconnecting it: *" $lines + verify_log_message 0 "*== CRITICAL == This master is sending an error to its replica: *" $lines + verify_log_message 0 "*Replica can't interact with the keyspace*" $lines + + $rd close + waitForBgsave r + } + + test "replica do not write the reply to the replication link - PSYNC (addReplyDeferredLen)" { + set rd [redis_deferring_client] + set lines [count_log_lines 0] + + $rd psync replicationid -1 + assert_match {FULLRESYNC * 0} [$rd read] + $rd slowlog get + catch {$rd read} e + if {$::verbose} { puts "PSYNC addReplyDeferredLen: $e" } + assert_equal "PONG" [r ping] + + # Check we got the warning logs about the SLOWLOG GET command. + verify_log_message 0 "*Replica generated a reply to command 'slowlog|get', disconnecting it: *" $lines + + $rd close + waitForBgsave r + } + + test "PSYNC with wrong offset should throw error" { + # It used to accept the FULL SYNC, but also replied with an error. + assert_error {ERR value is not an integer or out of range} {r psync replicationid offset_str} + set logs [exec tail -n 100 < [srv 0 stdout]] + assert_match {*Replica * asks for synchronization but with a wrong offset} $logs + assert_equal "PONG" [r ping] + } +} diff --git a/tests/integration/shutdown.tcl b/tests/integration/shutdown.tcl new file mode 100644 index 0000000..60afc5c --- /dev/null +++ b/tests/integration/shutdown.tcl @@ -0,0 +1,238 @@ +# This test suite tests shutdown when there are lagging replicas connected. + +# Fill up the OS socket send buffer for the replica connection 1M at a time. +# When the replication buffer memory increases beyond 2M (often after writing 4M +# or so), we assume it's because the OS socket send buffer can't swallow +# anymore. +proc fill_up_os_socket_send_buffer_for_repl {idx} { + set i 0 + while {1} { + incr i + populate 1024 junk$i: 1024 $idx + after 10 + set buf_size [s $idx mem_total_replication_buffers] + if {$buf_size > 2*1024*1024} { + break + } + } +} + +foreach how {sigterm shutdown} { + test "Shutting down master waits for replica to catch up ($how)" { + start_server {} { + start_server {} { + set master [srv -1 client] + set master_host [srv -1 host] + set master_port [srv -1 port] + set master_pid [srv -1 pid] + set replica [srv 0 client] + set replica_pid [srv 0 pid] + + # Config master. + $master config set shutdown-timeout 300; # 5min for slow CI + $master config set repl-backlog-size 1; # small as possible + $master config set hz 100; # cron runs every 10ms + + # Config replica. + $replica replicaof $master_host $master_port + wait_for_sync $replica + + # Preparation: Set k to 1 on both master and replica. + $master set k 1 + wait_for_ofs_sync $master $replica + + # Pause the replica. + exec kill -SIGSTOP $replica_pid + after 10 + + # Fill up the OS socket send buffer for the replica connection + # to prevent the following INCR from reaching the replica via + # the OS. + fill_up_os_socket_send_buffer_for_repl -1 + + # Incr k and immediately shutdown master. + $master incr k + switch $how { + sigterm { + exec kill -SIGTERM $master_pid + } + shutdown { + set rd [redis_deferring_client -1] + $rd shutdown + } + } + wait_for_condition 50 100 { + [s -1 shutdown_in_milliseconds] > 0 + } else { + fail "Master not indicating ongoing shutdown." + } + + # Wake up replica and check if master has waited for it. + after 20; # 2 cron intervals + exec kill -SIGCONT $replica_pid + wait_for_condition 300 1000 { + [$replica get k] eq 2 + } else { + fail "Master exited before replica could catch up." + } + + # Check shutdown log messages on master + wait_for_log_messages -1 {"*ready to exit, bye bye*"} 0 100 500 + assert_equal 0 [count_log_message -1 "*Lagging replica*"] + verify_log_message -1 "*1 of 1 replicas are in sync*" 0 + } + } + } {} {repl external:skip} +} + +test {Shutting down master waits for replica timeout} { + start_server {} { + start_server {} { + set master [srv -1 client] + set master_host [srv -1 host] + set master_port [srv -1 port] + set master_pid [srv -1 pid] + set replica [srv 0 client] + set replica_pid [srv 0 pid] + + # Config master. + $master config set shutdown-timeout 1; # second + + # Config replica. + $replica replicaof $master_host $master_port + wait_for_sync $replica + + # Preparation: Set k to 1 on both master and replica. + $master set k 1 + wait_for_ofs_sync $master $replica + + # Pause the replica. + exec kill -SIGSTOP $replica_pid + after 10 + + # Fill up the OS socket send buffer for the replica connection to + # prevent the following INCR k from reaching the replica via the OS. + fill_up_os_socket_send_buffer_for_repl -1 + + # Incr k and immediately shutdown master. + $master incr k + exec kill -SIGTERM $master_pid + wait_for_condition 50 100 { + [s -1 shutdown_in_milliseconds] > 0 + } else { + fail "Master not indicating ongoing shutdown." + } + + # Let master finish shutting down and check log. + wait_for_log_messages -1 {"*ready to exit, bye bye*"} 0 100 100 + verify_log_message -1 "*Lagging replica*" 0 + verify_log_message -1 "*0 of 1 replicas are in sync*" 0 + + # Wake up replica. + exec kill -SIGCONT $replica_pid + assert_equal 1 [$replica get k] + } + } +} {} {repl external:skip} + +test "Shutting down master waits for replica then fails" { + start_server {} { + start_server {} { + set master [srv -1 client] + set master_host [srv -1 host] + set master_port [srv -1 port] + set master_pid [srv -1 pid] + set replica [srv 0 client] + set replica_pid [srv 0 pid] + + # Config master and replica. + $replica replicaof $master_host $master_port + wait_for_sync $replica + + # Pause the replica and write a key on master. + exec kill -SIGSTOP $replica_pid + after 10 + $master incr k + + # Two clients call blocking SHUTDOWN in parallel. + set rd1 [redis_deferring_client -1] + set rd2 [redis_deferring_client -1] + $rd1 shutdown + $rd2 shutdown + set info_clients [$master info clients] + assert_match "*connected_clients:3*" $info_clients + assert_match "*blocked_clients:2*" $info_clients + + # Start a very slow initial AOFRW, which will prevent shutdown. + $master config set rdb-key-save-delay 30000000; # 30 seconds + $master config set appendonly yes + + # Wake up replica, causing master to continue shutting down. + exec kill -SIGCONT $replica_pid + + # SHUTDOWN returns an error to both clients blocking on SHUTDOWN. + catch { $rd1 read } e1 + catch { $rd2 read } e2 + assert_match "*Errors trying to SHUTDOWN. Check logs*" $e1 + assert_match "*Errors trying to SHUTDOWN. Check logs*" $e2 + $rd1 close + $rd2 close + + # Check shutdown log messages on master. + verify_log_message -1 "*1 of 1 replicas are in sync*" 0 + verify_log_message -1 "*Writing initial AOF, can't exit*" 0 + verify_log_message -1 "*Errors trying to shut down*" 0 + + # Let master to exit fast, without waiting for the very slow AOFRW. + catch {$master shutdown nosave force} + } + } +} {} {repl external:skip} + +test "Shutting down master waits for replica then aborted" { + start_server {} { + start_server {} { + set master [srv -1 client] + set master_host [srv -1 host] + set master_port [srv -1 port] + set master_pid [srv -1 pid] + set replica [srv 0 client] + set replica_pid [srv 0 pid] + + # Config master and replica. + $replica replicaof $master_host $master_port + wait_for_sync $replica + + # Pause the replica and write a key on master. + exec kill -SIGSTOP $replica_pid + after 10 + $master incr k + + # Two clients call blocking SHUTDOWN in parallel. + set rd1 [redis_deferring_client -1] + set rd2 [redis_deferring_client -1] + $rd1 shutdown + $rd2 shutdown + set info_clients [$master info clients] + assert_match "*connected_clients:3*" $info_clients + assert_match "*blocked_clients:2*" $info_clients + + # Abort the shutdown + $master shutdown abort + + # Wake up replica, causing master to continue shutting down. + exec kill -SIGCONT $replica_pid + + # SHUTDOWN returns an error to both clients blocking on SHUTDOWN. + catch { $rd1 read } e1 + catch { $rd2 read } e2 + assert_match "*Errors trying to SHUTDOWN. Check logs*" $e1 + assert_match "*Errors trying to SHUTDOWN. Check logs*" $e2 + $rd1 close + $rd2 close + + # Check shutdown log messages on master. + verify_log_message -1 "*Shutdown manually aborted*" 0 + } + } +} {} {repl external:skip} |