diff options
Diffstat (limited to 'mysql-test/suite/rpl/t/rpl_parallel_retry.test')
-rw-r--r-- | mysql-test/suite/rpl/t/rpl_parallel_retry.test | 514 |
1 files changed, 514 insertions, 0 deletions
diff --git a/mysql-test/suite/rpl/t/rpl_parallel_retry.test b/mysql-test/suite/rpl/t/rpl_parallel_retry.test new file mode 100644 index 00000000..8b2affed --- /dev/null +++ b/mysql-test/suite/rpl/t/rpl_parallel_retry.test @@ -0,0 +1,514 @@ +--source include/have_innodb.inc +--source include/have_perfschema.inc +--source include/have_debug.inc +--source include/have_debug_sync.inc +--let $rpl_topology=1->2 +--source include/rpl_init.inc + +--echo *** Test retry of transactions that fail to replicate due to deadlock or similar temporary error. *** + +--connection server_1 +ALTER TABLE mysql.gtid_slave_pos ENGINE=InnoDB; +CREATE TABLE t1 (a int PRIMARY KEY, b INT) ENGINE=InnoDB; +INSERT INTO t1 VALUES (1,1); +--save_master_pos + +# Use a stored function to inject a debug_sync into the appropriate THD. +# The function does nothing on the master, and on the slave it injects the +# desired debug_sync action(s). +SET sql_log_bin=0; +--delimiter || +CREATE FUNCTION foo(x INT, d1 VARCHAR(500), d2 VARCHAR(500)) + RETURNS INT DETERMINISTIC + BEGIN + RETURN x; + END +|| +--delimiter ; +SET sql_log_bin=1; + +--connection server_2 +SET @old_parallel_threads=@@GLOBAL.slave_parallel_threads; +--source include/stop_slave.inc +SET GLOBAL slave_parallel_threads=5; +--source include/start_slave.inc +--sync_with_master +SET sql_log_bin=0; +--delimiter || +CREATE FUNCTION foo(x INT, d1 VARCHAR(500), d2 VARCHAR(500)) + RETURNS INT DETERMINISTIC + BEGIN + IF d1 != '' THEN + SET debug_sync = d1; + END IF; + IF d2 != '' THEN + SET debug_sync = d2; + END IF; + RETURN x; + END +|| +--delimiter ; +SET sql_log_bin=1; +--source include/stop_slave.inc + +--connection server_1 +SET gtid_seq_no = 100; +BEGIN; +INSERT INTO t1 VALUES (2,1); +UPDATE t1 SET b=b+1 WHERE a=1; +INSERT INTO t1 VALUES (3,1); +COMMIT; +SELECT * FROM t1 ORDER BY a; +--save_master_pos + +--connection server_2 +SET @old_dbug= @@GLOBAL.debug_dbug; +SET GLOBAL debug_dbug="+d,rpl_parallel_simulate_temp_err_gtid_0_x_100"; +let $old_retry= query_get_value(SHOW STATUS LIKE 'Slave_retried_transactions', Value, 1); +--source include/start_slave.inc +--sync_with_master +SET GLOBAL debug_dbug=@old_dbug; +let $new_retry= query_get_value(SHOW STATUS LIKE 'Slave_retried_transactions', Value, 1); +let $ps_value= query_get_value(select last_trans_retry_count from + performance_schema.replication_applier_status_by_worker where + last_trans_retry_count > 0, last_trans_retry_count, 1); +let $assert_text= Performance Schema retries should match with actual retries; +let $assert_cond= "$ps_value" = $new_retry - $old_retry; +source include/assert.inc; + + +SELECT * FROM t1 ORDER BY a; + + +--echo *** Test that double retry works when the first retry also fails with temp error *** +--source include/stop_slave.inc + +--connection server_1 +SET gtid_seq_no = 100; +SET @old_server_id = @@server_id; +SET server_id = 10; +BEGIN; +INSERT INTO t1 VALUES (4,1); +UPDATE t1 SET b=b+1 WHERE a=1; +INSERT INTO t1 VALUES (5,1); +INSERT INTO t1 VALUES (6,1); +COMMIT; +SET server_id = @old_server_id; +SELECT * FROM t1 ORDER BY a; +--save_master_pos + +--connection server_2 +SET @old_dbug= @@GLOBAL.debug_dbug; +SET GLOBAL debug_dbug="+d,rpl_parallel_simulate_temp_err_gtid_0_x_100,rpl_parallel_simulate_double_temp_err_gtid_0_x_100"; +let $old_retry= query_get_value(SHOW STATUS LIKE 'Slave_retried_transactions', Value, 1); +--source include/start_slave.inc +--sync_with_master +SET GLOBAL debug_dbug=@old_dbug; +let $new_retry= query_get_value(SHOW STATUS LIKE 'Slave_retried_transactions', Value, 1); +let $ps_value= query_get_value(select last_trans_retry_count from + performance_schema.replication_applier_status_by_worker where + last_trans_retry_count > 0, last_trans_retry_count, 1); +let $assert_text= Performance Schema retries should match with actual retries; +let $assert_cond= "$ps_value" = $new_retry - $old_retry; +source include/assert.inc; + +SELECT * FROM t1 ORDER BY a; + + +--echo *** Test too many retries, eventually causing failure. *** +--source include/stop_slave.inc + +--connection server_1 +SET gtid_seq_no = 100; +SET @old_server_id = @@server_id; +SET server_id = 11; +BEGIN; +INSERT INTO t1 VALUES (7,1); +UPDATE t1 SET b=b+1 WHERE a=1; +INSERT INTO t1 VALUES (8,1); +INSERT INTO t1 VALUES (9,1); +COMMIT; +SET server_id = @old_server_id; +SELECT * FROM t1 ORDER BY a; +--save_master_pos + +--connection server_2 +SET sql_log_bin=0; +CALL mtr.add_suppression("Slave worker thread retried transaction 10 time\\(s\\) in vain, giving up"); +CALL mtr.add_suppression("Slave: Deadlock found when trying to get lock; try restarting transaction"); +CALL mtr.add_suppression("Slave worker thread retried transaction .* in vain, giving up"); +SET sql_log_bin=1; + +SET @old_dbug= @@GLOBAL.debug_dbug; +SET GLOBAL debug_dbug="+d,rpl_parallel_simulate_temp_err_gtid_0_x_100,rpl_parallel_simulate_infinite_temp_err_gtid_0_x_100"; +let $old_retry= query_get_value(SHOW STATUS LIKE 'Slave_retried_transactions', Value, 1); +START SLAVE; +--let $slave_sql_errno= 1213 +--source include/wait_for_slave_sql_error.inc +SET GLOBAL debug_dbug=@old_dbug; +let $new_retry= query_get_value(SHOW STATUS LIKE 'Slave_retried_transactions', Value, 1); +--disable_query_log +eval SELECT $new_retry - $old_retry AS retries; +--enable_query_log +let $ps_value= query_get_value(select last_trans_retry_count from + performance_schema.replication_applier_status_by_worker where + last_trans_retry_count > 0, last_trans_retry_count, 1); +let $assert_text= Performance Schema retries should match with actual retries; +let $assert_cond= "$ps_value" = $new_retry - $old_retry; +source include/assert.inc; + +SELECT * FROM t1 ORDER BY a; +STOP SLAVE IO_THREAD; +--source include/start_slave.inc +--sync_with_master +SELECT * FROM t1 ORDER BY a; + +--echo *** Test retry of event group that spans multiple relay log files. *** + +--connection server_1 +CREATE TABLE t2 (a int PRIMARY KEY, b BLOB) ENGINE=InnoDB; +INSERT INTO t2 VALUES (1,"Hulubullu"); +--save_master_pos + +--connection server_2 +--sync_with_master +--source include/stop_slave.inc +SET @old_max= @@GLOBAL.max_relay_log_size; +SET GLOBAL max_relay_log_size=4096; + +--connection server_1 +--let $big= `SELECT REPEAT("*", 5000)` +SET gtid_seq_no = 100; +SET @old_server_id = @@server_id; +SET server_id = 12; +BEGIN; +--disable_query_log +eval INSERT INTO t2 VALUES (2, CONCAT("Hello ", "$big")); +eval INSERT INTO t2 VALUES (3, CONCAT("Long data: ", "$big")); +--enable_query_log +INSERT INTO t1 VALUES (10, 4); +COMMIT; +SET server_id = @old_server_id; +SELECT * FROM t1 WHERE a >= 10 ORDER BY a; +SELECT a, LENGTH(b) FROM t2 ORDER BY a; +--save_master_pos + +--connection server_2 +SET @old_dbug= @@GLOBAL.debug_dbug; +SET GLOBAL debug_dbug="+d,rpl_parallel_simulate_temp_err_gtid_0_x_100"; +let $old_retry= query_get_value(SHOW STATUS LIKE 'Slave_retried_transactions', Value, 1); +--source include/start_slave.inc +--sync_with_master +SET GLOBAL debug_dbug=@old_dbug; +let $new_retry= query_get_value(SHOW STATUS LIKE 'Slave_retried_transactions', Value, 1); +let $ps_value= query_get_value(select last_trans_retry_count from + performance_schema.replication_applier_status_by_worker where + last_trans_retry_count > 0, last_trans_retry_count, 1); +let $assert_text= Performance Schema retries should match with actual retries; +let $assert_cond= "$ps_value" = $new_retry - $old_retry; +source include/assert.inc; + +SELECT * FROM t1 WHERE a >= 10 ORDER BY a; +SELECT a, LENGTH(b) FROM t2 ORDER BY a; + +--connection server_1 +INSERT INTO t1 VALUES (11,11); +--disable_query_log +eval INSERT INTO t2 VALUES (4, "$big"); +--enable_query_log +--save_master_pos + +--connection server_2 +--sync_with_master +SELECT * FROM t1 WHERE a >= 10 ORDER BY a; +SELECT a, LENGTH(b) FROM t2 ORDER BY a; +SET GLOBAL max_relay_log_size=@old_max; + + +--echo *** MDEV-7065: Incorrect relay log position in parallel replication after retry of transaction *** + +--connection server_2 +--source include/stop_slave.inc + +--connection server_1 +BEGIN; +INSERT INTO t1 VALUES (100, 0); +INSERT INTO t1 VALUES (101, 0); +INSERT INTO t1 VALUES (102, 0); +INSERT INTO t1 VALUES (103, 0); +COMMIT; +SELECT * FROM t1 WHERE a >= 100 ORDER BY a; +--save_master_pos + +--connection server_2 +# Inject a DBUG error insert to cause the XID event of the single transaction +# from the master to fail with a deadlock error and be retried. +# The bug was that the retry of the XID would leave the relay log position +# incorrect (off by the size of XID event). +SET @old_dbug= @@GLOBAL.debug_dbug; +SET GLOBAL debug_dbug="+d,rpl_parallel_simulate_temp_err_xid"; +let $old_retry= query_get_value(SHOW STATUS LIKE 'Slave_retried_transactions', Value, 1); +--source include/start_slave.inc +--sync_with_master +SET GLOBAL debug_dbug=@old_dbug; +let $new_retry= query_get_value(SHOW STATUS LIKE 'Slave_retried_transactions', Value, 1); +let $ps_value= query_get_value(select last_trans_retry_count from + performance_schema.replication_applier_status_by_worker where + last_trans_retry_count > 0, last_trans_retry_count, 1); +let $assert_text= Performance Schema retries should match with actual retries; +let $assert_cond= "$ps_value" = $new_retry - $old_retry; +source include/assert.inc; + +SELECT * FROM t1 WHERE a >= 100 ORDER BY a; +# Stop the SQL thread. When the bug was there to give the incorrect relay log +# position, the restart of the SQL thread would read garbage data from the +# middle of an event and fail with relay log IO error. +--source include/stop_slave_sql.inc + +--connection server_1 +INSERT INTO t1 VALUES (104, 1); +INSERT INTO t1 VALUES (105, 1); +INSERT INTO t1 VALUES (106, 1); +INSERT INTO t1 VALUES (107, 1); +INSERT INTO t1 VALUES (108, 1); +INSERT INTO t1 VALUES (109, 1); +--save_master_pos + +--connection server_2 +--source include/start_slave.inc +--sync_with_master +SELECT * FROM t1 WHERE a >= 100 ORDER BY a; + + +--echo *** MDEV-6917: Parallel replication: "Commit failed due to failure of an earlier commit on which this one depends", but no prior failure seen ** + +--connection server_1 +CREATE TABLE t3 (a INT PRIMARY KEY, b INT, KEY b_idx(b)) ENGINE=InnoDB; +INSERT INTO t3 VALUES (1,NULL), (2,2), (3,NULL), (4,4), (5, NULL), (6, 6); +CREATE TABLE t4 (a INT PRIMARY KEY, b INT) ENGINE=InnoDB; +# MDEV-515 takes X-lock on the table for the first insert. +# So concurrent insert won't happen on the table +INSERT INTO t4 VALUES(100, 100); + +# We need statement binlog format to be able to inject debug_sync statements +# on the slave with calls to foo(). +SET @old_format= @@SESSION.binlog_format; +SET binlog_format='statement'; +--save_master_pos + +--connection server_2 +--sync_with_master +--source include/stop_slave.inc +CHANGE MASTER TO master_use_gtid=no; + +--connection server_1 + +# Create a group commit with three transactions T1, T2, T3. +# T2 will block T1 on the slave where we will make it run first, so it will be +# deadlock killed. +# The bug was that in this case, T3 was signalled to fail due to T2 failing, +# even though the retry of T2 was later successful. + +--connect (con1,127.0.0.1,root,,test,$SERVER_MYPORT_1,) +SET @old_format= @@SESSION.binlog_format; +SET binlog_format='statement'; +BEGIN; +INSERT INTO t4 VALUES (10, foo(1, 'before_execute_sql_command WAIT_FOR t1_start', '')); +UPDATE t3 SET b=NULL WHERE a=6; +SET debug_sync='commit_after_release_LOCK_prepare_ordered SIGNAL master_queued1 WAIT_FOR master_cont1'; +send COMMIT; +--connection server_1 +SET debug_sync='now WAIT_FOR master_queued1'; + +--connect (con2,127.0.0.1,root,,test,$SERVER_MYPORT_1,) +SET @old_format= @@SESSION.binlog_format; +SET binlog_format='statement'; +BEGIN; +INSERT INTO t4 VALUES (20, foo(2, 'group_commit_waiting_for_prior SIGNAL t2_waiting', '')); +DELETE FROM t3 WHERE b <= 3; +SET debug_sync='commit_after_release_LOCK_prepare_ordered SIGNAL master_queued2'; +send COMMIT; + +--connection server_1 +SET debug_sync='now WAIT_FOR master_queued2'; + +--connect (con3,127.0.0.1,root,,test,$SERVER_MYPORT_1,) +SET @old_format= @@SESSION.binlog_format; +SET binlog_format='statement'; +BEGIN; +INSERT INTO t4 VALUES (30, foo(3, 'before_execute_sql_command WAIT_FOR t3_start', 'group_commit_waiting_for_prior SIGNAL t3_waiting')); +INSERT INTO t3 VALUES (7,7); +SET debug_sync='commit_after_release_LOCK_prepare_ordered SIGNAL master_queued3'; +send COMMIT; + +--connection server_1 +SET debug_sync='now WAIT_FOR master_queued3'; +SET debug_sync='now SIGNAL master_cont1'; + +--connection con1 +REAP; +SET binlog_format=@old_format; +--connection con2 +REAP; +SET binlog_format=@old_format; +--connection con3 +REAP; +SET debug_sync='RESET'; +SET binlog_format=@old_format; + +--connection server_1 +--save_master_pos +SELECT * FROM t3 ORDER BY a; + + +--connection server_2 +let $old_retry= query_get_value(SHOW STATUS LIKE 'Slave_retried_transactions', Value, 1); +SET @old_dbug=@@GLOBAL.debug_dbug; +SET GLOBAL debug_dbug="+d,thd_need_ordering_with_force"; +--source include/start_slave.inc +# First, wait for T2 to complete up to where it is waiting for T1 to group +# commit for both of them. This will set locks that will block T1, causing +# a deadlock kill and retry of T2. T1 and T3 are still blocked at the start +# of each their SQL statements. +SET debug_sync='now WAIT_FOR t2_waiting'; +# Now let T3 move on until the point where it is itself ready to commit. +SET debug_sync='now SIGNAL t3_start'; +SET debug_sync='now WAIT_FOR t3_waiting'; +# Now T2 and T3 are set up, so we can let T1 proceed. +SET debug_sync='now SIGNAL t1_start'; +# Now we can wait for the slave to catch up. +# We should see T2 being deadlock killed and retried. +# The bug was that T2 deadlock kill would cause T3 to fail due to failure +# of an earlier commit. This is wrong as T2 did not fail, it was only +# retried. +--sync_with_master +SET GLOBAL debug_dbug=@old_dbug; +SET debug_sync='RESET'; +let $new_retry= query_get_value(SHOW STATUS LIKE 'Slave_retried_transactions', Value, 1); +let $ps_value= query_get_value(select last_trans_retry_count from + performance_schema.replication_applier_status_by_worker where + last_trans_retry_count > 0, last_trans_retry_count, 1); +let $assert_text= Performance Schema retries should match with actual retries; +let $assert_cond= "$ps_value" = $new_retry - $old_retry; +source include/assert.inc; +SELECT * FROM t3 ORDER BY a; + + +--connection server_1 +SET binlog_format=@old_format; + + +# Clean up of the above part. +--connection server_2 +--source include/stop_slave.inc +SET GLOBAL slave_parallel_threads=@old_parallel_threads; +--source include/start_slave.inc + +--connection server_1 +DROP TABLE t1, t2, t3, t4; +DROP function foo; + +--sync_slave_with_master server_2 + +# +# MDEV-12746 rpl.rpl_parallel_optimistic_nobinlog fails committing out of order at retry +# + +--connection server_1 +CREATE TABLE t1 (a int PRIMARY KEY, b INT) ENGINE=InnoDB; +# MDEV-515 takes X-lock on the table for the first insert. +# So concurrent insert won't happen on the table +INSERT INTO t1 VALUES(100, 100); + +# Replicate create-t1 and prepare to re-start slave in optimistic mode +--sync_slave_with_master server_2 +--source include/stop_slave.inc +SET @old_parallel_threads=@@GLOBAL.slave_parallel_threads; +SET @@GLOBAL.slave_parallel_threads=5; +SET @old_parallel_mode=@@GLOBAL.slave_parallel_mode; +SET @@GLOBAL.slave_parallel_mode='aggressive'; +SET @old_lock_wait_timeout=@@GLOBAL.innodb_lock_wait_timeout; +SET @@GLOBAL.innodb_lock_wait_timeout=2; +SET @old_slave_transaction_retries=@@GLOBAL.slave_transaction_retries; +SET @@GLOBAL.slave_transaction_retries=1; + +--echo # Spoilers on the slave side causing temporary errors +--connect (spoiler_21,127.0.0.1,root,,test,$SLAVE_MYPORT) +BEGIN; + INSERT INTO t1 SET a=1,b=2; + +--connect (spoiler_22,127.0.0.1,root,,test,$SLAVE_MYPORT) +BEGIN; + INSERT INTO t1 SET a=2,b=2; + +--echo # Master payload +--connection server_1 +SET @@SESSION.GTID_SEQ_NO=1000; +INSERT INTO t1 SET a=1,b=1; +SET @@SESSION.GTID_SEQ_NO=1001; +INSERT INTO t1 SET a=2,b=1; + +--echo # Start slave whose both appliers is destined to being blocked +--connection server_2 +SET @old_dbug= @@GLOBAL.debug_dbug; +SET @@GLOBAL.debug_dbug="+d,rpl_parallel_simulate_wait_at_retry"; +--source include/start_slave.inc + +--echo # Make sure both workers are waiting at their sync points +--let $wait_condition= SELECT count(*)=2 FROM information_schema.processlist WHERE state LIKE '%debug sync point%'; +--source include/wait_condition.inc + + +--echo # Signal to the 1st to proceed after it has reached termination state +SET @@DEBUG_SYNC='now SIGNAL proceed_by_1000'; +--connection spoiler_21 +ROLLBACK; + +--let $wait_condition= SELECT count(*)=1 FROM information_schema.processlist WHERE state LIKE '%debug sync point%'; +--source include/wait_condition.inc + +--echo # Release the 2nd worker to proceed +--connection spoiler_22 +ROLLBACK; +--connection server_2 +SET @@DEBUG_SYNC='now SIGNAL proceed_by_1001'; + +--echo # observe how it all ends +if (`SELECT count(*) = 1 FROM t1 WHERE a = 1`) +{ + --echo "*** Unexpected commit by the first Worker ***" + SELECT * from t1; + --die +} + +--echo # Wait for the workers to go home and check the result of applying +--let $wait_condition=SELECT count(*) = 0 FROM information_schema.processlist WHERE command = 'Slave_worker' +--source include/wait_condition.inc +if (`SELECT count(*) = 1 FROM t1 WHERE a = 2`) +{ + --echo + --echo "*** Error: congrats, you hit MDEV-12746 issue. ***" + --echo + --die +} +--echo # which is OK + +# +# Clean up +# +--connection server_2 +--source include/stop_slave.inc +SET @@GLOBAL.slave_parallel_threads=@old_parallel_threads; +SET @@GLOBAL.slave_parallel_mode=@old_parallel_mode; +SET @@GLOBAL.innodb_lock_wait_timeout=@old_lock_wait_timeout; +SET @@GLOBAL.slave_transaction_retries=@old_slave_transaction_retries; +SET @@GLOBAL.debug_dbug=@old_dbug; +SET debug_sync='RESET'; +--source include/start_slave.inc + +--connection server_1 +DROP TABLE t1; + +--sync_slave_with_master server_2 + +--source include/rpl_end.inc |