diff options
Diffstat (limited to 'mysql-test/suite/rpl/t')
53 files changed, 1571 insertions, 195 deletions
diff --git a/mysql-test/suite/rpl/t/parallel_backup_xa.inc b/mysql-test/suite/rpl/t/parallel_backup_xa.inc index 83a6fb79..5f287aa0 100644 --- a/mysql-test/suite/rpl/t/parallel_backup_xa.inc +++ b/mysql-test/suite/rpl/t/parallel_backup_xa.inc @@ -41,7 +41,7 @@ if ($slave_ooo_error) { SET @sav_innodb_lock_wait_timeout = @@global.innodb_lock_wait_timeout; SET @sav_slave_transaction_retries = @@global.slave_transaction_retries; - SET @@global.innodb_lock_wait_timeout =1; + SET @@global.innodb_lock_wait_timeout =5; SET @@global.slave_transaction_retries=0; } --source include/start_slave.inc diff --git a/mysql-test/suite/rpl/t/rpl_auditing.test b/mysql-test/suite/rpl/t/rpl_auditing.test new file mode 100644 index 00000000..3d3cee9a --- /dev/null +++ b/mysql-test/suite/rpl/t/rpl_auditing.test @@ -0,0 +1,77 @@ +if (!$SERVER_AUDIT2_SO) { + skip No SERVER_AUDIT2 plugin; +} + +source include/master-slave.inc; + +--disable_warnings +drop table if exists t1; +sync_slave_with_master; +reset master; +--enable_warnings + +--disable_warnings +CREATE TABLE IF NOT EXISTS mysql.server_audit_filters ( + filtername char(80) COLLATE utf8_bin NOT NULL DEFAULT '', + rule longtext CHARACTER SET utf8mb4 COLLATE utf8mb4_bin NOT NULL DEFAULT 'true' CHECK (json_valid(rule)), + CONSTRAINT c_filtername UNIQUE (filtername) +) ENGINE=Aria; + +CREATE TABLE IF NOT EXISTS mysql.server_audit_users (host char(60) COLLATE utf8_bin NOT NULL DEFAULT '', + user char(80) COLLATE utf8_bin NOT NULL DEFAULT '', + filtername char(80) NOT NULL DEFAULT '', + CONSTRAINT c_host_user UNIQUE (host, user) +) ENGINE=Aria; +--enable_warnings + +INSERT INTO mysql.server_audit_filters VALUES ('ignore_sys', '{"ignore_tables" : "mysql.*"}'); +INSERT INTO mysql.server_audit_users VALUES ('%','<replication_slave>','ignore_sys'); +INSERT INTO mysql.server_audit_users VALUES ('%','root','ignore_sys'); + +install plugin server_audit soname 'server_audit2'; +set global server_audit_logging=on; + +# this is done to make test deterministic +# so the above 'set' command is always logged before the 'create table t1' +-- disable_query_log +-- disable_result_log +select * from mysql.server_audit_filters; +select * from mysql.server_audit_users; +-- enable_result_log +-- enable_query_log + +connection master; +create table t1 (a int); +insert into t1 values (1); +truncate t1; +drop table t1; +sync_slave_with_master; + +set global server_audit_logging=off; + +truncate mysql.server_audit_filters; +truncate mysql.server_audit_users; +INSERT INTO mysql.server_audit_filters VALUES ('no_logging','false'); +INSERT INTO mysql.server_audit_users VALUES ('%','<replication_slave>','no_logging'); + +set global server_audit_logging=on; + +connection master; +create table t1 (a int); +insert into t1 values (1); +truncate t1; +drop table t1; +sync_slave_with_master; + +set global server_audit_logging=off; +uninstall plugin server_audit; +truncate mysql.server_audit_filters; +truncate mysql.server_audit_users; +let $MYSQLD_DATADIR= `SELECT @@datadir`; +# replace the timestamp and the hostname with constant values +--replace_regex /[0-9]* [0-9][0-9]:[0-9][0-9]:[0-9][0-9]\,[^,]*\,/TIME,HOSTNAME,/ /\,[1-9][0-9]*\,/,1,/ /\,[1-9][0-9]*/,ID/ /000001\\', [0-9]*,/#', POS,/ +cat_file $MYSQLD_DATADIR/server_audit.log; +remove_file $MYSQLD_DATADIR/server_audit.log; + +connection master; +--source include/rpl_end.inc diff --git a/mysql-test/suite/rpl/t/rpl_binlog_dump_slave_gtid_state_info.test b/mysql-test/suite/rpl/t/rpl_binlog_dump_slave_gtid_state_info.test index 02b31c06..4f0eafc4 100644 --- a/mysql-test/suite/rpl/t/rpl_binlog_dump_slave_gtid_state_info.test +++ b/mysql-test/suite/rpl/t/rpl_binlog_dump_slave_gtid_state_info.test @@ -39,6 +39,7 @@ --source include/master-slave.inc --connection master +SET @org_log_warnings=@@GLOBAL.LOG_WARNINGS; SET GLOBAL LOG_WARNINGS=2; --connection slave @@ -110,6 +111,7 @@ CHANGE MASTER TO MASTER_USE_GTID=slave_pos; --source include/wait_for_pattern_in_file.inc --echo "===== Clean up =====" +SET GLOBAL LOG_WARNINGS=@org_log_warnings; --connection slave --source include/stop_slave.inc CHANGE MASTER TO MASTER_USE_GTID=no; @@ -117,5 +119,4 @@ CHANGE MASTER TO MASTER_USE_GTID=no; --connection master DROP TABLE t; -SET GLOBAL LOG_WARNINGS=default; --source include/rpl_end.inc diff --git a/mysql-test/suite/rpl/t/rpl_change_master.test b/mysql-test/suite/rpl/t/rpl_change_master.test index 2758f9d6..992e2390 100644 --- a/mysql-test/suite/rpl/t/rpl_change_master.test +++ b/mysql-test/suite/rpl/t/rpl_change_master.test @@ -109,9 +109,4 @@ CHANGE MASTER TO MASTER_USER='root', MASTER_SSL=0, MASTER_SSL_CA='', MASTER_SSL_ CHANGE MASTER TO MASTER_USER='root', MASTER_PASSWORD='', MASTER_SSL=0; -# MDEV-20122: Deprecate MASTER_USE_GTID=Current_Pos to favor new MASTER_DEMOTE_TO_SLAVE option ---echo "Usage of CURRENT_POS in CHANGE MASTER MASTER_USE_GTID is dreprecated. -CHANGE MASTER TO MASTER_USE_GTID=CURRENT_POS; -CHANGE MASTER TO MASTER_USE_GTID=SLAVE_POS; - --source include/rpl_end.inc diff --git a/mysql-test/suite/rpl/t/rpl_change_master_demote.test b/mysql-test/suite/rpl/t/rpl_change_master_demote.test index 15b55014..6304d352 100644 --- a/mysql-test/suite/rpl/t/rpl_change_master_demote.test +++ b/mysql-test/suite/rpl/t/rpl_change_master_demote.test @@ -73,6 +73,9 @@ # non-boolean value. # # +# Additionally ensure MASTER_DEMOTE_TO_REPLICA aliases MASTER_DEMOTE_TO_SLAVE +# +# # References: # MDEV-19801: Change defaults for CHANGE MASTER TO so that GTID-based # replication is used by default if master supports it @@ -455,6 +458,37 @@ EOF --echo # +--echo # MDEV-31768 +--echo # Ensure MASTER_DEMOTE_TO_REPLICA aliases MASTER_DEMOTE_TO_SLAVE +--echo # +--connection slave +RESET MASTER; +--source include/reset_slave.inc +CREATE TABLE t_mdev_31768 (a int); +--let $gtid_binlog_pos= `SELECT @@GLOBAL.gtid_binlog_pos` +CHANGE MASTER TO master_use_gtid=Replica_Pos, master_demote_to_replica=1; +--let $gtid_slave_pos= `SELECT @@GLOBAL.gtid_slave_pos` + +--echo # Validating alias MASTER_DEMOTE_TO_REPLICA provides intended behavior.. +if (`SELECT strcmp("$gtid_binlog_pos","$gtid_slave_pos") != 0`) +{ + --echo # ..failed + --echo # Binlog pos: $gtid_binlog_pos + --echo # Replica pos: $gtid_slave_pos + die MASTER_DEMOTE_TO_REPLICA does not alias MASTER_DEMOTE_TO_SLAVE correctly; +} +--echo # ..success + +DROP TABLE t_mdev_31768; +RESET MASTER; +--source include/reset_slave.inc + +--echo # Clear primary binlog state to match replica +--connection master +RESET MASTER; + + +--echo # --echo # Cleanup --echo # --connection master diff --git a/mysql-test/suite/rpl/t/rpl_domain_id_filter_io_crash.test b/mysql-test/suite/rpl/t/rpl_domain_id_filter_io_crash.test index 95fac6c2..c7ef3a4e 100644 --- a/mysql-test/suite/rpl/t/rpl_domain_id_filter_io_crash.test +++ b/mysql-test/suite/rpl/t/rpl_domain_id_filter_io_crash.test @@ -9,6 +9,7 @@ CREATE TABLE t1(i INT) ENGINE=INNODB; INSERT INTO t1 VALUES(1); SELECT * FROM t1; sync_slave_with_master; +--source include/save_master_gtid.inc connection slave; @@ -28,7 +29,7 @@ let $ignore_domain_ids_before= query_get_value(SHOW SLAVE STATUS, Replicate_Igno CHANGE MASTER TO IGNORE_DOMAIN_IDS=(), MASTER_USE_GTID=slave_pos; --source include/start_slave.inc -sync_with_master; +--source include/sync_with_master_gtid.inc let $do_domain_ids_after= query_get_value(SHOW SLAVE STATUS, Replicate_Do_Domain_Ids, 1); let $ignore_domain_ids_after= query_get_value(SHOW SLAVE STATUS, Replicate_Ignore_Domain_Ids, 1); @@ -44,7 +45,7 @@ START TRANSACTION; INSERT INTO t1 VALUES(2); INSERT INTO t1 VALUES(3); COMMIT; -save_master_pos; +--source include/save_master_gtid.inc SELECT * FROM t1; connection slave; @@ -55,7 +56,7 @@ SET @@global.debug_dbug=@saved_dbug; START SLAVE io_thread; --source include/wait_for_slave_io_to_start.inc -sync_with_master; +--source include/sync_with_master_gtid.inc SELECT * FROM t1; --echo # Case 1 : Start slave with IGNORE_DOMAIN_IDS=(1), then restart @@ -70,7 +71,7 @@ let $ignore_domain_ids_before= query_get_value(SHOW SLAVE STATUS, Replicate_Igno CHANGE MASTER TO IGNORE_DOMAIN_IDS=(1), MASTER_USE_GTID=slave_pos; --source include/start_slave.inc -sync_with_master; +--source include/sync_with_master_gtid.inc let $do_domain_ids_after= query_get_value(SHOW SLAVE STATUS, Replicate_Do_Domain_Ids, 1); let $ignore_domain_ids_after= query_get_value(SHOW SLAVE STATUS, Replicate_Ignore_Domain_Ids, 1); @@ -86,7 +87,7 @@ INSERT INTO t1 VALUES(4); INSERT INTO t1 VALUES(5); COMMIT; -save_master_pos; +--source include/save_master_gtid.inc SELECT * FROM t1; connection slave; @@ -97,7 +98,7 @@ SET @@global.debug_dbug=@saved_dbug; START SLAVE io_thread; --source include/wait_for_slave_io_to_start.inc -sync_with_master; +--source include/sync_with_master_gtid.inc SELECT * FROM t1; --echo # Case 2 : Start slave with IGNORE_DOMAIN_IDS=(), then restart @@ -112,7 +113,7 @@ let $ignore_domain_ids_before= query_get_value(SHOW SLAVE STATUS, Replicate_Igno CHANGE MASTER TO IGNORE_DOMAIN_IDS=(), MASTER_USE_GTID=slave_pos; --source include/start_slave.inc -sync_with_master; +--source include/sync_with_master_gtid.inc let $do_domain_ids_after= query_get_value(SHOW SLAVE STATUS, Replicate_Do_Domain_Ids, 1); let $ignore_domain_ids_after= query_get_value(SHOW SLAVE STATUS, Replicate_Ignore_Domain_Ids, 1); @@ -140,7 +141,7 @@ INSERT INTO t1 VALUES(10); INSERT INTO t1 VALUES(11); COMMIT; -save_master_pos; +--source include/save_master_gtid.inc SELECT * FROM t1; connection slave; @@ -157,7 +158,7 @@ let $ignore_domain_ids_before= query_get_value(SHOW SLAVE STATUS, Replicate_Igno CHANGE MASTER TO IGNORE_DOMAIN_IDS=(1), MASTER_USE_GTID=slave_pos; --source include/start_slave.inc -sync_with_master; +--source include/sync_with_master_gtid.inc let $do_domain_ids_after= query_get_value(SHOW SLAVE STATUS, Replicate_Do_Domain_Ids, 1); let $ignore_domain_ids_after= query_get_value(SHOW SLAVE STATUS, Replicate_Ignore_Domain_Ids, 1); @@ -178,7 +179,7 @@ let $ignore_domain_ids_before= query_get_value(SHOW SLAVE STATUS, Replicate_Igno CHANGE MASTER TO IGNORE_DOMAIN_IDS=(1), MASTER_USE_GTID=slave_pos; --source include/start_slave.inc -sync_with_master; +--source include/sync_with_master_gtid.inc let $do_domain_ids_after= query_get_value(SHOW SLAVE STATUS, Replicate_Do_Domain_Ids, 1); let $ignore_domain_ids_after= query_get_value(SHOW SLAVE STATUS, Replicate_Ignore_Domain_Ids, 1); @@ -196,6 +197,15 @@ INSERT INTO t1 VALUES(12); INSERT INTO t1 VALUES(13); COMMIT; # IO thread gets killed here. +# MDEV-14357 +# As the prior transaction will be ignored on slave because its domain id is +# ignored, the replica's gtid_slave_pos will be updated to have seen it, +# despite its eventual failure to queue the whole transaction to the relay log. +# So for test consistency, we need to synchronize the SQL thread with this +# position; otherwise, when restarting the server after resetting +# IGNORE_DOMAIN_IDS, we will re-fetch this event and execute it. +--source include/save_master_gtid.inc + START TRANSACTION; INSERT INTO t1 VALUES(14); INSERT INTO t1 VALUES(15); @@ -207,7 +217,6 @@ INSERT INTO t1 VALUES(16); INSERT INTO t1 VALUES(17); COMMIT; -save_master_pos; SELECT * FROM t1; connection slave; @@ -217,6 +226,11 @@ SELECT * FROM t1; SET @@global.debug_dbug=@saved_dbug; +# MDEV-14357 +# Ensure the SQL thread is updated with the GTID of the ignored transaction +# so we don't fetch it and execute it after restarting without any ignored +# domain ids. +--source include/sync_with_master_gtid.inc --source include/stop_slave_sql.inc let $do_domain_ids_before= query_get_value(SHOW SLAVE STATUS, Replicate_Do_Domain_Ids, 1); let $ignore_domain_ids_before= query_get_value(SHOW SLAVE STATUS, Replicate_Ignore_Domain_Ids, 1); @@ -224,8 +238,12 @@ let $ignore_domain_ids_before= query_get_value(SHOW SLAVE STATUS, Replicate_Igno --echo IGNORE_DOMAIN_IDS (BEFORE) : $ignore_domain_ids_before CHANGE MASTER TO IGNORE_DOMAIN_IDS=(), MASTER_USE_GTID=slave_pos; + +--connection master +--source include/save_master_gtid.inc +--connection slave --source include/start_slave.inc -sync_with_master; +--source include/sync_with_master_gtid.inc let $do_domain_ids_after= query_get_value(SHOW SLAVE STATUS, Replicate_Do_Domain_Ids, 1); let $ignore_domain_ids_after= query_get_value(SHOW SLAVE STATUS, Replicate_Ignore_Domain_Ids, 1); @@ -246,7 +264,7 @@ let $ignore_domain_ids_before= query_get_value(SHOW SLAVE STATUS, Replicate_Igno CHANGE MASTER TO IGNORE_DOMAIN_IDS=(1), MASTER_USE_GTID=slave_pos; --source include/start_slave.inc -sync_with_master; +--source include/sync_with_master_gtid.inc let $do_domain_ids_after= query_get_value(SHOW SLAVE STATUS, Replicate_Do_Domain_Ids, 1); let $ignore_domain_ids_after= query_get_value(SHOW SLAVE STATUS, Replicate_Ignore_Domain_Ids, 1); @@ -264,6 +282,11 @@ INSERT INTO t1 VALUES(18); INSERT INTO t1 VALUES(19); # IO thread gets killed here. COMMIT; +# MDEV-14357 +# Synchronize gtid_slave_pos with the ignored event. See prior comments about +# MDEV-14357 for details. +--source include/save_master_gtid.inc + START TRANSACTION; INSERT INTO t1 VALUES(20); INSERT INTO t1 VALUES(21); @@ -275,7 +298,6 @@ INSERT INTO t1 VALUES(22); INSERT INTO t1 VALUES(23); COMMIT; -save_master_pos; SELECT * FROM t1; connection slave; @@ -285,6 +307,10 @@ SELECT * FROM t1; SET @@global.debug_dbug=@saved_dbug; +# MDEV-14357 +# Synchronize gtid_slave_pos with the ignored event. See prior comments about +# MDEV-14357 for details. +--source include/sync_with_master_gtid.inc --source include/stop_slave_sql.inc let $do_domain_ids_before= query_get_value(SHOW SLAVE STATUS, Replicate_Do_Domain_Ids, 1); let $ignore_domain_ids_before= query_get_value(SHOW SLAVE STATUS, Replicate_Ignore_Domain_Ids, 1); @@ -292,8 +318,12 @@ let $ignore_domain_ids_before= query_get_value(SHOW SLAVE STATUS, Replicate_Igno --echo IGNORE_DOMAIN_IDS (BEFORE) : $ignore_domain_ids_before CHANGE MASTER TO IGNORE_DOMAIN_IDS=(), MASTER_USE_GTID=slave_pos; + +--connection master +--source include/save_master_gtid.inc +--connection slave --source include/start_slave.inc -sync_with_master; +--source include/sync_with_master_gtid.inc let $do_domain_ids_after= query_get_value(SHOW SLAVE STATUS, Replicate_Do_Domain_Ids, 1); let $ignore_domain_ids_after= query_get_value(SHOW SLAVE STATUS, Replicate_Ignore_Domain_Ids, 1); @@ -314,7 +344,7 @@ let $ignore_domain_ids_before= query_get_value(SHOW SLAVE STATUS, Replicate_Igno CHANGE MASTER TO IGNORE_DOMAIN_IDS=(), MASTER_USE_GTID=slave_pos; --source include/start_slave.inc -sync_with_master; +--source include/sync_with_master_gtid.inc let $do_domain_ids_after= query_get_value(SHOW SLAVE STATUS, Replicate_Do_Domain_Ids, 1); let $ignore_domain_ids_after= query_get_value(SHOW SLAVE STATUS, Replicate_Ignore_Domain_Ids, 1); @@ -343,7 +373,7 @@ INSERT INTO t1 VALUES(28); INSERT INTO t1 VALUES(29); COMMIT; -save_master_pos; +--source include/save_master_gtid.inc SELECT * FROM t1; connection slave; @@ -361,7 +391,7 @@ let $ignore_domain_ids_before= query_get_value(SHOW SLAVE STATUS, Replicate_Igno CHANGE MASTER TO IGNORE_DOMAIN_IDS=(1), MASTER_USE_GTID=slave_pos; --source include/start_slave.inc -sync_with_master; +--source include/sync_with_master_gtid.inc let $do_domain_ids_after= query_get_value(SHOW SLAVE STATUS, Replicate_Do_Domain_Ids, 1); let $ignore_domain_ids_after= query_get_value(SHOW SLAVE STATUS, Replicate_Ignore_Domain_Ids, 1); diff --git a/mysql-test/suite/rpl/t/rpl_domain_id_filter_master_crash.test b/mysql-test/suite/rpl/t/rpl_domain_id_filter_master_crash.test index cdfdc098..19074ce9 100644 --- a/mysql-test/suite/rpl/t/rpl_domain_id_filter_master_crash.test +++ b/mysql-test/suite/rpl/t/rpl_domain_id_filter_master_crash.test @@ -6,9 +6,12 @@ connection master; +--disable_query_log call mtr.add_suppression("mysqld: Table '.*gtid_slave_pos' is marked as crashed and should be repaired"); call mtr.add_suppression("Checking table: './mysql/gtid_slave_pos'"); call mtr.add_suppression("mysql.gtid_slave_pos: 1 client is using or hasn't closed the table properly"); +call mtr.add_suppression("Could not read packet:.* errno: 11"); +--enable_query_log SET @@session.gtid_domain_id= 0; create table ti (a int auto_increment primary key) engine=innodb; @@ -39,7 +42,7 @@ insert into ti set a=null; insert into tm set a=null; set @@global.debug_dbug="+d,crash_before_send_xid"; ---exec echo "wait" > $MYSQLTEST_VARDIR/tmp/mysqld.1.expect +--write_line wait $MYSQLTEST_VARDIR/tmp/mysqld.1.expect connection slave; let $do_domain_ids_before= query_get_value(SHOW SLAVE STATUS, Replicate_Do_Domain_Ids, 1); @@ -61,7 +64,7 @@ connection master; --enable_reconnect --let $rpl_server_number=1 --source include/rpl_start_server.inc -#--exec echo "restart" > $MYSQLTEST_VARDIR/tmp/mysqld.1.expect +#--write_line restart $MYSQLTEST_VARDIR/tmp/mysqld.1.expect --source include/wait_until_connected_again.inc --echo # Master has restarted successfully save_master_pos; diff --git a/mysql-test/suite/rpl/t/rpl_domain_id_filter_restart.test b/mysql-test/suite/rpl/t/rpl_domain_id_filter_restart.test index 8083b8f2..38cbbdec 100644 --- a/mysql-test/suite/rpl/t/rpl_domain_id_filter_restart.test +++ b/mysql-test/suite/rpl/t/rpl_domain_id_filter_restart.test @@ -35,9 +35,10 @@ SET @@session.gtid_domain_id= 1; INSERT INTO t2 VALUES(1); SELECT * FROM t2; -sync_slave_with_master; +source include/save_master_gtid.inc; connection slave; +source include/sync_with_master_gtid.inc; SELECT * FROM t1; SELECT * FROM t2; diff --git a/mysql-test/suite/rpl/t/rpl_dump_request_retry_warning.test b/mysql-test/suite/rpl/t/rpl_dump_request_retry_warning.test index 1ee04362..633071a8 100644 --- a/mysql-test/suite/rpl/t/rpl_dump_request_retry_warning.test +++ b/mysql-test/suite/rpl/t/rpl_dump_request_retry_warning.test @@ -30,6 +30,10 @@ --let $rpl_skip_start_slave=1 --source include/master-slave.inc +--disable_query_log +call mtr.add_suppression("Could not read packet:.* errno: 11"); +--enable_query_log + # Do an insert on master CREATE TABLE t1(a int); INSERT INTO t1 VALUES(1); diff --git a/mysql-test/suite/rpl/t/rpl_get_lock.test b/mysql-test/suite/rpl/t/rpl_get_lock.test index b5c08858..c6f2f6ec 100644 --- a/mysql-test/suite/rpl/t/rpl_get_lock.test +++ b/mysql-test/suite/rpl/t/rpl_get_lock.test @@ -1,6 +1,17 @@ source include/master-slave.inc; +--disable_query_log CALL mtr.add_suppression("Unsafe statement written to the binary log using statement format since BINLOG_FORMAT = STATEMENT"); +call mtr.add_suppression("Could not read packet:.* errno: 11 "); +# The following one comes from calling dirty_close on client side +call mtr.add_suppression("Could not read packet:.* errno: 2 "); +call mtr.add_suppression("Could not read packet:.* errno: 35 "); +--enable_query_log + +let $org_log_warnings=`select @@global.log_warnings`; + +# Test extended warnings +SET GLOBAL LOG_WARNINGS=4; create table t1(n int); # Use of get_lock gives a warning for unsafeness if binlog_format=statement @@ -41,6 +52,10 @@ connection master1; drop table t1; sync_slave_with_master; +connection default; +--disable_query_log +--eval SET GLOBAL LOG_WARNINGS=$org_log_warnings; +--enable_query_log --source include/rpl_end.inc diff --git a/mysql-test/suite/rpl/t/rpl_gtid_crash-slave.opt b/mysql-test/suite/rpl/t/rpl_gtid_crash-slave.opt index 69c1a64e..5b3fb44c 100644 --- a/mysql-test/suite/rpl/t/rpl_gtid_crash-slave.opt +++ b/mysql-test/suite/rpl/t/rpl_gtid_crash-slave.opt @@ -1 +1 @@ ---master-retry-count=100 +--master-retry-count=100 --slave-net-timeout=10 diff --git a/mysql-test/suite/rpl/t/rpl_gtid_crash.test b/mysql-test/suite/rpl/t/rpl_gtid_crash.test index 28329831..e1a57f4b 100644 --- a/mysql-test/suite/rpl/t/rpl_gtid_crash.test +++ b/mysql-test/suite/rpl/t/rpl_gtid_crash.test @@ -12,6 +12,7 @@ call mtr.add_suppression("Checking table:"); call mtr.add_suppression("client is using or hasn't closed the table properly"); call mtr.add_suppression("Table .* is marked as crashed and should be repaired"); +call mtr.add_suppression("Could not read packet:.* errno: 11"); flush tables; ALTER TABLE mysql.gtid_slave_pos ENGINE=InnoDB; diff --git a/mysql-test/suite/rpl/t/rpl_gtid_crash_myisam.test b/mysql-test/suite/rpl/t/rpl_gtid_crash_myisam.test index faf388f5..e113e17b 100644 --- a/mysql-test/suite/rpl/t/rpl_gtid_crash_myisam.test +++ b/mysql-test/suite/rpl/t/rpl_gtid_crash_myisam.test @@ -5,6 +5,10 @@ --let $rpl_topology=1->2 --source include/rpl_init.inc +--disable_query_log +call mtr.add_suppression("Could not read packet:.* errno: 11"); +--enable_query_log + --echo *** Test crashing master with InnoDB disabled, the binlog gtid state should still be correctly recovered. *** --connection server_1 diff --git a/mysql-test/suite/rpl/t/rpl_gtid_grouping.test b/mysql-test/suite/rpl/t/rpl_gtid_grouping.test index 66448c4f..bb1057a6 100644 --- a/mysql-test/suite/rpl/t/rpl_gtid_grouping.test +++ b/mysql-test/suite/rpl/t/rpl_gtid_grouping.test @@ -93,5 +93,4 @@ CHANGE MASTER TO MASTER_USE_GTID=no; --connection master DROP TABLE t; -SET GLOBAL LOG_WARNINGS=default; --source include/rpl_end.inc diff --git a/mysql-test/suite/rpl/t/rpl_gtid_header_valid.test b/mysql-test/suite/rpl/t/rpl_gtid_header_valid.test new file mode 100644 index 00000000..d4d75ba8 --- /dev/null +++ b/mysql-test/suite/rpl/t/rpl_gtid_header_valid.test @@ -0,0 +1,213 @@ +# +# This test ensures that, when a GTID event is constructed by reading its +# content from a binlog file, the reader (e.g. replica, in this test) cannot +# read beyond the length of the GTID event. That is, we ensure that the +# structure indicated by its flags and extra_flags are consistent with the +# actual content of the event. +# +# To spoof a broken GTID log event, we use the DEBUG_DBUG mechanism to inject +# the master to write invalid GTID events for each flag. The transaction is +# given a commit id to ensure the event is not shorter than GTID_HEADER_LEN, +# which would result in zero padding up to GTID_HEADER_LEN. +# +# +# References: +# MDEV-33672: Gtid_log_event Construction from File Should Ensure Event +# Length When Using Extra Flags +# + +--source include/have_debug.inc + +# GTID event extra_flags are format independent +--source include/have_binlog_format_row.inc +--source include/have_innodb.inc +--source include/master-slave.inc + +--echo # +--echo # Initialize test data +--connection master +create table t1 (a int) engine=innodb; +--source include/save_master_gtid.inc +set @@SESSION.debug_dbug= "+d,binlog_force_commit_id"; + +--connection slave +set SQL_LOG_BIN= 0; +call mtr.add_suppression('Found invalid event in binary log'); +call mtr.add_suppression('Slave SQL.*Relay log read failure: Could not parse relay log event entry.* 1594'); +set SQL_LOG_BIN= 1; +--source include/sync_with_master_gtid.inc +--source include/stop_slave.inc +--source include/start_slave.inc +--let $cid_ctr= 100 + + +--echo # +--echo # Test FL_PREPARED_XA +--connection master +set @@SESSION.debug_dbug= "+d,negate_xid_from_gtid"; +--eval set @commit_id= $cid_ctr + +XA START 'x1'; +insert into t1 values (1); +XA END 'x1'; +XA PREPARE 'x1'; +set @@SESSION.debug_dbug= "-d,negate_xid_from_gtid"; +XA COMMIT 'x1'; +--source include/save_master_gtid.inc + +--echo # Waiting for slave to find invalid event.. +--connection slave +let $slave_sql_errno= 1594; # ER_SLAVE_RELAY_LOG_READ_FAILURE +source include/wait_for_slave_sql_error.inc; +STOP SLAVE IO_THREAD; + +--echo # Reset master binlogs (as there is an invalid event) and slave state +--connection master +RESET MASTER; +--connection slave +RESET MASTER; +RESET SLAVE; +set @@global.gtid_slave_pos=""; +--source include/start_slave.inc + + +--echo # +--echo # Test FL_COMPLETED_XA +--connection master +--inc $cid_ctr +--eval set @commit_id= $cid_ctr +XA START 'x1'; +--let $next_val = `SELECT max(a)+1 FROM t1` +--eval insert into t1 values ($next_val) +XA END 'x1'; +XA PREPARE 'x1'; +set @@SESSION.debug_dbug= "+d,negate_xid_from_gtid"; +XA COMMIT 'x1'; +set @@SESSION.debug_dbug= "-d,negate_xid_from_gtid"; +--source include/save_master_gtid.inc + +--echo # Waiting for slave to find invalid event.. +--connection slave +let $slave_sql_errno= 1594; # ER_SLAVE_RELAY_LOG_READ_FAILURE +source include/wait_for_slave_sql_error.inc; +STOP SLAVE IO_THREAD; + +--echo # Cleanup hanging XA PREPARE on slave +set statement SQL_LOG_BIN=0 for XA COMMIT 'x1'; + +--echo # Reset master binlogs (as there is an invalid event) and slave state +--connection master +RESET MASTER; +--connection slave +RESET MASTER; +RESET SLAVE; +set @@global.gtid_slave_pos=""; +--source include/start_slave.inc + + +--echo # +--echo # Test Missing xid.data (but has format id and length description parts) + +--connection master +--eval set @commit_id= $cid_ctr + +XA START 'x1'; +insert into t1 values (1); +XA END 'x1'; +XA PREPARE 'x1'; +set @@SESSION.debug_dbug= "+d,negate_xid_data_from_gtid"; +XA COMMIT 'x1'; +set @@SESSION.debug_dbug= "-d,negate_xid_data_from_gtid"; +--source include/save_master_gtid.inc + +--echo # Waiting for slave to find invalid event.. +--connection slave +let $slave_sql_errno= 1594; # ER_SLAVE_RELAY_LOG_READ_FAILURE +source include/wait_for_slave_sql_error.inc; +STOP SLAVE IO_THREAD; + +--echo # Cleanup hanging XA PREPARE on slave +set statement SQL_LOG_BIN=0 for XA COMMIT 'x1'; + +--echo # Reset master binlogs (as there is an invalid event) and slave state +--connection master +RESET MASTER; +--connection slave +RESET MASTER; +RESET SLAVE; +set @@global.gtid_slave_pos=""; +--source include/start_slave.inc + + +--echo # +--echo # Test FL_EXTRA_MULTI_ENGINE +--connection master +set @old_dbug= @@SESSION.debug_dbug; +set @@SESSION.debug_dbug= "+d,inject_fl_extra_multi_engine_into_gtid"; +--inc $cid_ctr +--eval set @commit_id= $cid_ctr +--let $next_val = `SELECT max(a)+1 FROM t1` +--eval insert into t1 values ($next_val) +--source include/save_master_gtid.inc +set @@SESSION.debug_dbug=@old_dbug; + +--connection slave +--echo # Waiting for slave to find invalid event.. +let $slave_sql_errno= 1594; # ER_SLAVE_RELAY_LOG_READ_FAILURE +source include/wait_for_slave_sql_error.inc; +STOP SLAVE IO_THREAD; + +--echo # Reset master binlogs (as there is an invalid event) and slave state +--connection master +RESET MASTER; + +--connection slave +RESET SLAVE; +RESET MASTER; +set @@global.gtid_slave_pos=""; +--source include/start_slave.inc + + +--echo # +--echo # Test FL_COMMIT_ALTER +--connection master +set @old_dbug= @@SESSION.debug_dbug; +set @@SESSION.debug_dbug= "+d,negate_alter_fl_from_gtid"; +set @old_alter_tp= @@SESSION.binlog_alter_two_phase; +set @@SESSION.binlog_alter_two_phase= 1; +alter table t1 add column (nc int); +--source include/save_master_gtid.inc +set @@SESSION.debug_dbug=@old_dbug; +set @@SESSION.binlog_alter_two_phase=@old_alter_tp; + +--connection slave +--echo # Waiting for slave to find invalid event.. +let $slave_sql_errno= 1594; # ER_SLAVE_RELAY_LOG_READ_FAILURE +source include/wait_for_slave_sql_error.inc; +STOP SLAVE IO_THREAD; + +--echo # Reset master binlogs (as there is an invalid event) and slave state +--connection master +RESET MASTER; + +--connection slave +# Just to keep tables consistent between master/slave +SET STATEMENT sql_log_bin=0 FOR alter table t1 add column (nc int); +RESET SLAVE; +RESET MASTER; +set @@global.gtid_slave_pos=""; +--source include/start_slave.inc + + +--echo # +--echo # Cleanup + +--connection master +drop table t1; +--source include/save_master_gtid.inc + +--connection slave +--source include/sync_with_master_gtid.inc + +--source include/rpl_end.inc +--echo # End of rpl_gtid_header_valid.test diff --git a/mysql-test/suite/rpl/t/rpl_gtid_stop_start.test b/mysql-test/suite/rpl/t/rpl_gtid_stop_start.test index b5ff2949..acce6651 100644 --- a/mysql-test/suite/rpl/t/rpl_gtid_stop_start.test +++ b/mysql-test/suite/rpl/t/rpl_gtid_stop_start.test @@ -173,6 +173,24 @@ SELECT * FROM t1 ORDER BY a; --echo *** MDEV-4486: Allow to start old-style replication even if mysql.gtid_slave_pos is unavailable +# In GTID mode, the old-style replication position is also updated. But during +# GTID connect, the old-style position is not known until receiving the fake +# GTID list event, which contains the required position value. If we happened +# to stop the slave above before this fake GTID list event, the test could fail +# with duplicate key errors due to switching to non-GTID mode at a wrong +# position too far back in the binlog. +# +# Work-around this by injecting an extra dummt event and syncing the slave to +# it, ensuring the old-style position will be updated. +# +# This work-around could be removed after MDEV-33996 is fixed. +--connection server_1 +INSERT INTO t1 VALUES (8); +DELETE FROM t1 WHERE a=8; +--save_master_pos +--connection server_2 +--sync_with_master + --connection server_2 --source include/stop_slave.inc CHANGE MASTER TO master_use_gtid= no; diff --git a/mysql-test/suite/rpl/t/rpl_heartbeat.test b/mysql-test/suite/rpl/t/rpl_heartbeat.test index e4753b7b..8a852fd3 100644 --- a/mysql-test/suite/rpl/t/rpl_heartbeat.test +++ b/mysql-test/suite/rpl/t/rpl_heartbeat.test @@ -8,14 +8,13 @@ # - SHOW STATUS like 'Slave_heartbeat_period' report -- source include/have_log_bin.inc - -connect (master,localhost,root,,test,$MASTER_MYPORT,$MASTER_MYSOCK); -connect (slave,localhost,root,,test,$SLAVE_MYPORT,$SLAVE_MYSOCK); +-- source include/master-slave.inc connection master; reset master; connection slave; +-- source include/stop_slave.inc set @restore_slave_net_timeout= @@global.slave_net_timeout; --disable_warnings set @@global.slave_net_timeout= 10; @@ -170,6 +169,5 @@ set @@global.slave_net_timeout= @restore_slave_net_timeout; --disable_prepare_warnings ---source include/stop_slave.inc - +--source include/rpl_end.inc --echo End of tests diff --git a/mysql-test/suite/rpl/t/rpl_heartbeat_debug.test b/mysql-test/suite/rpl/t/rpl_heartbeat_debug.test index bd66a249..e5937866 100644 --- a/mysql-test/suite/rpl/t/rpl_heartbeat_debug.test +++ b/mysql-test/suite/rpl/t/rpl_heartbeat_debug.test @@ -3,6 +3,12 @@ --source include/have_debug.inc --source include/master-slave.inc +--disable_query_log +CALL mtr.add_suppression('SET @master_heartbeat_period to master failed with error'); +CALL mtr.add_suppression('Master command COM_REGISTER_SLAVE failed: failed registering on master, reconnecting to try again'); +call mtr.add_suppression("Could not read packet:.* errno: 11"); +--enable_query_log + connection slave; --source include/stop_slave.inc set @restore_slave_net_timeout= @@global.slave_net_timeout; @@ -14,14 +20,13 @@ set @@global.slave_net_timeout= 10; ### Checking the range ### -# + # default period slave_net_timeout/2 # --query_vertical show status like 'Slave_heartbeat_period'; SET @saved_dbug= @@GLOBAL.debug_dbug; SET GLOBAL debug_dbug="+d,simulate_slave_heartbeat_network_error"; -CALL mtr.add_suppression('SET @master_heartbeat_period to master failed with error'); -CALL mtr.add_suppression('Master command COM_REGISTER_SLAVE failed: failed registering on master, reconnecting to try again'); + --source include/start_slave.inc diff --git a/mysql-test/suite/rpl/t/rpl_mdev33798.cnf b/mysql-test/suite/rpl/t/rpl_mdev33798.cnf new file mode 100644 index 00000000..8e5125ea --- /dev/null +++ b/mysql-test/suite/rpl/t/rpl_mdev33798.cnf @@ -0,0 +1,17 @@ +!include suite/rpl/my.cnf + +[mysqld.1] +log-slave-updates +loose-innodb + +[mysqld.2] +log-slave-updates +loose-innodb + +[mysqld.3] +log-slave-updates +loose-innodb + +[ENV] +SERVER_MYPORT_3= @mysqld.3.port +SERVER_MYSOCK_3= @mysqld.3.socket diff --git a/mysql-test/suite/rpl/t/rpl_mdev33798.test b/mysql-test/suite/rpl/t/rpl_mdev33798.test new file mode 100644 index 00000000..1448ed91 --- /dev/null +++ b/mysql-test/suite/rpl/t/rpl_mdev33798.test @@ -0,0 +1,182 @@ +--source include/have_innodb.inc +--source include/have_log_bin.inc +--let $rpl_topology=1->2,1->3 +--source include/rpl_init.inc +--connect (server_2b,127.0.0.1,root,,,$SERVER_MYPORT_2) + +--connection server_2 +SET @old_parallel_threads=@@GLOBAL.slave_parallel_threads; +SET @old_parallel_mode= @@GLOBAL.slave_parallel_mode; +SET @old_timeout= @@GLOBAL.lock_wait_timeout; +SET @old_innodb_timeout= @@GLOBAL.innodb_lock_wait_timeout; +--source include/stop_slave.inc +SET GLOBAL slave_parallel_threads=5; +set global slave_parallel_mode= aggressive; +# High timeout so we get replication sync error and test failure if the +# conflict handling is insufficient and lock wait timeout occurs. +SET GLOBAL lock_wait_timeout= 86400; +SET GLOBAL innodb_lock_wait_timeout= 86400; +SET STATEMENT sql_log_bin=0 FOR ALTER TABLE mysql.gtid_slave_pos ENGINE=InnoDB; +--source include/start_slave.inc + +--connection server_1 +CREATE TABLE t1 (a INT PRIMARY KEY, b INT) ENGINE=InnoDB; +INSERT INTO t1 VALUES (1, 0), (2, 0), (3, 0), (4, 0), (5, 0), (6, 0), (7, 0), (8, 0); +--save_master_pos + +--connection server_2 +--sync_with_master +--source include/stop_slave.inc + +# Test the following scenario: +# +# Transactions T1, T2 in domain 1, U1, U2 in domain 2. +# Wait cycle T1->U2->U1->T2->T1 as follows: +# T1 row lock wait on U2 +# U2 wait_for_prior_commit on U1 +# U1 row lock wait on T2 +# T2 wait_for_prior_commit on T1 +# +# Test that the wait cycle is broken correctly with deadlock kill. + +--connection server_2b +# Temporarily block T1 and U1. +BEGIN; +SELECT * FROM t1 WHERE a=1 FOR UPDATE; +SELECT * FROM t1 WHERE a=5 FOR UPDATE; + +--connection server_1 + +SET SESSION gtid_domain_id= 1; +# T1 in domain 1 +BEGIN; +UPDATE t1 SET b=1 WHERE a=1; +UPDATE t1 SET b=1 WHERE a=7; +COMMIT; +# T2 in domain 1 +UPDATE t1 SET b=2 WHERE a=3; + +SET SESSION gtid_domain_id=2; +# U1 in domain 2 +BEGIN; +UPDATE t1 SET b=3 WHERE a=5; +UPDATE t1 SET b=3 WHERE a=3; +COMMIT; +# U2 in domain 2 +UPDATE t1 SET b=4 WHERE a=7; +SET SESSION gtid_domain_id= 0; +--source include/save_master_gtid.inc + +--connection server_2 +--source include/start_slave.inc +# Wait until T2, U2 are holding the row locks. +--let $wait_condition= SELECT COUNT(*)=2 FROM INFORMATION_SCHEMA.PROCESSLIST WHERE state LIKE '%Waiting for prior transaction to commit%' +--source include/wait_condition.inc + +# Then let T1, U1 continue to conflict on the row locks, and check that +# replication correctly handles the conflict. +--connection server_2b +ROLLBACK; + +--connection server_2 +--source include/sync_with_master_gtid.inc + +# Allow either domain to "win" on the conflicting updates. +SELECT a, ( + (a=1 AND b=1) OR + (a=3 AND (b=2 OR b=3)) OR + (a=5 AND b=3) OR + (a=7 AND (b=1 OR b=4)) OR + ((a MOD 2)=0 AND b=0)) AS `ok` + FROM t1 + ORDER BY a; + +# Now try the same thing with multi-source replication. + +# Make server_3 a second master +--connection server_3 +--source include/sync_with_master_gtid.inc +--source include/stop_slave.inc + +--connection server_2 +--source include/stop_slave.inc +--replace_result $SERVER_MYPORT_3 MYPORT_3 +eval CHANGE MASTER 'm2' to master_port=$SERVER_MYPORT_3 , master_host='127.0.0.1', master_user='root', master_use_gtid=slave_pos; + +--connection server_1 + +SET SESSION gtid_domain_id= 1; +# T1 in domain 1 +BEGIN; +UPDATE t1 SET b=11 WHERE a=1; +UPDATE t1 SET b=11 WHERE a=7; +COMMIT; +# T2 in domain 1 +UPDATE t1 SET b=12 WHERE a=3; +SET SESSION gtid_domain_id= 1; + +--connection server_3 +SET SESSION gtid_domain_id=3; +# U1 in domain 3 +BEGIN; +UPDATE t1 SET b=13 WHERE a=5; +UPDATE t1 SET b=13 WHERE a=3; +COMMIT; +# U2 in domain 3 +UPDATE t1 SET b=14 WHERE a=7; +--source include/save_master_gtid.inc + +--connection server_2b +# Temporarily block T1 and U1. +BEGIN; +SELECT * FROM t1 WHERE a=1 FOR UPDATE; +SELECT * FROM t1 WHERE a=5 FOR UPDATE; + +START ALL SLAVES; +# Wait until T2, U2 are holding the row locks. +--let $wait_condition= SELECT COUNT(*)=2 FROM INFORMATION_SCHEMA.PROCESSLIST WHERE state LIKE '%Waiting for prior transaction to commit%' +--source include/wait_condition.inc + +--connection server_2b +ROLLBACK; + +--connection server_1 +--source include/save_master_gtid.inc +--connection server_2 +--source include/sync_with_master_gtid.inc +--connection server_3 +--source include/save_master_gtid.inc +--connection server_2 +--source include/sync_with_master_gtid.inc + +SELECT a, ( + (a=1 AND b=11) OR + (a=3 AND (b=12 OR b=13)) OR + (a=5 AND b=13) OR + (a=7 AND (b=11 OR b=14)) OR + ((a MOD 2)=0 AND b=0)) AS `ok` + FROM t1 + ORDER BY a; + +SET default_master_connection = 'm2'; +--source include/stop_slave.inc +RESET SLAVE 'm2' ALL; +SET default_master_connection = ''; + +--connection server_3 +--source include/start_slave.inc + +# Cleanup + +--disconnect server_2b +--connection server_1 +DROP TABLE t1; +--connection server_2 +--source include/stop_slave.inc +SET GLOBAL slave_parallel_threads=@old_parallel_threads; +set global slave_parallel_mode= @old_parallel_mode; +SET GLOBAL lock_wait_timeout= @old_timeout; +SET GLOBAL innodb_lock_wait_timeout= @old_innodb_timeout; +--source include/start_slave.inc + +--source include/rpl_end.inc diff --git a/mysql-test/suite/rpl/t/rpl_mysql_manager_race_condition.test b/mysql-test/suite/rpl/t/rpl_mysql_manager_race_condition.test index 751da315..dd4a0fbb 100644 --- a/mysql-test/suite/rpl/t/rpl_mysql_manager_race_condition.test +++ b/mysql-test/suite/rpl/t/rpl_mysql_manager_race_condition.test @@ -18,8 +18,14 @@ # associated with this test should enforce that the binlog background thread is # not created before the handle manager is initialized. # +# Addendum 1) This test is extended for MDEV-33799, as the original fix +# left out the possibility that the binlog background thread can be +# started during recovery if the binary log is used as the transaction +# coordinator. This resulted in similar segfaults as seen by MDEV-26473. +# # References: # MDEV-26473 mysqld got exception 0xc0000005 (rpl_slave_state/rpl_load_gtid_slave_state) +# MDEV-33799 mysql_manager_submit Segfault at Startup Still Possible During Recovery # --source include/have_debug.inc @@ -54,6 +60,63 @@ create table t1 (a int); --echo # +--echo # MDEV-33799 +--echo # Ensure that when the binary log is used for recovery (as tc log), that +--echo # the recovery process cannot start the binlog background thread before +--echo # the mysql handle manager has started. +--connection slave + +--echo # Add test suppresssions so crash recovery messages don't fail the test +set session sql_log_bin=0; +call mtr.add_suppression("mariadbd: Got error '145.*"); +call mtr.add_suppression("Checking table:.*"); +call mtr.add_suppression("mysql.gtid_slave_pos:.*hasn't closed the table properly"); +call mtr.add_suppression("Can't init tc log"); +call mtr.add_suppression("Aborting"); +set session sql_log_bin=1; + +--echo # Create slave-side only table +create table t2 (a int) engine=innodb; + +--write_file $MYSQLTEST_VARDIR/tmp/mysqld.2.expect +wait +EOF + +--echo # Crash mariadbd when binlogging transaction to corrupt database state +--connection slave1 +set @@session.debug_dbug="+d,crash_before_writing_xid"; +--send insert into t2 values (1) + +--connection slave +--source include/wait_until_disconnected.inc + +--connection slave1 +--error 2013,ER_CONNECTION_KILLED +--reap + +--echo # Restart mariadbd in recovery mode. Note --tc-heuristic-recover +--echo # forces mysqld to exit with error, so we run mariadbd via CLI +--echo # MYSQLD_LAST_CMD --debug_dbug="+d,delay_start_handle_manager" --tc-heuristic-recover=COMMIT +--error 1 +--exec $MYSQLD_LAST_CMD --debug_dbug="+d,delay_start_handle_manager" --tc-heuristic-recover=COMMIT + +--append_file $MYSQLTEST_VARDIR/tmp/mysqld.2.expect +restart +EOF + +--connection server_2 +--enable_reconnect +--source include/wait_until_connected_again.inc +--connection slave1 +--enable_reconnect +--source include/wait_until_connected_again.inc +--connection slave +--enable_reconnect +--source include/wait_until_connected_again.inc +--source include/start_slave.inc + + +--echo # --echo # Cleanup --echo # @@ -63,5 +126,6 @@ drop table t1; --connection slave --source include/sync_with_master_gtid.inc +drop table t2; --source include/rpl_end.inc diff --git a/mysql-test/suite/rpl/t/rpl_packet.test b/mysql-test/suite/rpl/t/rpl_packet.test index cbde486b..f1814e61 100644 --- a/mysql-test/suite/rpl/t/rpl_packet.test +++ b/mysql-test/suite/rpl/t/rpl_packet.test @@ -20,6 +20,9 @@ source include/master-slave.inc; call mtr.add_suppression("Slave I/O: Got a packet bigger than 'slave_max_allowed_packet' bytes, .*error.* 1153"); call mtr.add_suppression("Log entry on master is longer than slave_max_allowed_packet"); +call mtr.add_suppression("Could not write packet:"); +call mtr.add_suppression("Got a packet bigger than 'max_allowed_packet' bytes"); + let $db= DB_NAME_OF_MAX_LENGTH_AKA_NAME_LEN_64_BYTES_____________________; disable_warnings; eval drop database if exists $db; diff --git a/mysql-test/suite/rpl/t/rpl_parallel_gco_wait_kill.test b/mysql-test/suite/rpl/t/rpl_parallel_gco_wait_kill.test index c5d2b85c..c34c4f20 100644 --- a/mysql-test/suite/rpl/t/rpl_parallel_gco_wait_kill.test +++ b/mysql-test/suite/rpl/t/rpl_parallel_gco_wait_kill.test @@ -300,7 +300,7 @@ SET GLOBAL slave_parallel_threads=10; # Find the thread id of the driver SQL thread that we want to kill. --let $wait_condition= SELECT COUNT(*) = 1 FROM INFORMATION_SCHEMA.PROCESSLIST WHERE STATE LIKE '%Slave has read all relay log%' --source include/wait_condition.inc ---let $thd_id= `SELECT ID FROM INFORMATION_SCHEMA.PROCESSLIST WHERE STATE LIKE '%Slave has read all relay log%'` +--let $thd_id= `SELECT ID FROM INFORMATION_SCHEMA.PROCESSLIST WHERE STATE LIKE '%relay log%'` SET @old_max_queued= @@GLOBAL.slave_parallel_max_queued; SET GLOBAL slave_parallel_max_queued=9000; diff --git a/mysql-test/suite/rpl/t/rpl_parallel_multi_domain_xa.test b/mysql-test/suite/rpl/t/rpl_parallel_multi_domain_xa.test new file mode 100644 index 00000000..f17634d4 --- /dev/null +++ b/mysql-test/suite/rpl/t/rpl_parallel_multi_domain_xa.test @@ -0,0 +1,181 @@ +# Similar to rpl_parallel_optimistic_xa to verify XA +# parallel execution with multiple gtid domain. +# References: +# MDEV-33668 Adapt parallel slave's round-robin scheduling to XA events + +--source include/have_innodb.inc +--source include/have_perfschema.inc +--source include/master-slave.inc + +# Tests' global declarations +--let $trx = _trx_ +--let $slave_timeout= -1 + +call mtr.add_suppression("Deadlock found when trying to get lock; try restarting transaction"); +call mtr.add_suppression("WSREP: handlerton rollback failed"); + +--connection master +ALTER TABLE mysql.gtid_slave_pos ENGINE=InnoDB; +--save_master_pos + +# Prepare to restart slave into optimistic parallel mode +--connection slave +--sync_with_master +--source include/stop_slave.inc +# This test runs huge number of transactions independently in parallel that +# all conflict on a single row. This requires a large number of retries, as a +# transaction can repeatedly conflict/deadlock with a large number of other +# transactions (in a different domain) one by one. +SET @old_transaction_retries = @@GLOBAL.slave_transaction_retries; +SET @@global.slave_transaction_retries = 1000; +SET @old_parallel_threads = @@GLOBAL.slave_parallel_threads; +SET @old_slave_domain_parallel_threads = @@GLOBAL.slave_domain_parallel_threads; +SET @@global.slave_parallel_threads = 5; +SET @@global.slave_domain_parallel_threads = 3; +SET @old_parallel_mode = @@GLOBAL.slave_parallel_mode; + +CHANGE MASTER TO master_use_gtid=slave_pos; + +--connection master +CREATE TABLE t1 (a int PRIMARY KEY, b INT) ENGINE=InnoDB; +INSERT INTO t1 VALUES (1, 0); +--source include/save_master_gtid.inc + +--connection slave +--source include/start_slave.inc +--source include/sync_with_master_gtid.inc +--source include/stop_slave.inc + +--let $mode = 2 +# mode = 2 is optimistic +SET @@global.slave_parallel_mode ='optimistic'; +while ($mode) +{ + --connection master + # + # create XA events alternating gtid domains to run them in parallel on slave. + # + --let $domain_num = 3 + --let $trx_num = 777 + --let $i = $trx_num + --let $conn = master + --disable_query_log + while($i > 0) + { + --let $domain_id = `SELECT $i % $domain_num` + --eval set @@gtid_domain_id = $domain_id + # 'decision' to commit 0, or rollback 1 + --let $decision = `SELECT $i % 2` + --eval XA START '$conn$trx$i' + --eval UPDATE t1 SET b = 1 - 2 * $decision WHERE a = 1 + --eval XA END '$conn$trx$i' + --eval XA PREPARE '$conn$trx$i' + --let $term = COMMIT + if ($decision) + { + --let $term = ROLLBACK + } + --eval XA $term '$conn$trx$i' + + --dec $i + } + --enable_query_log + --source include/save_master_gtid.inc + + --connection slave + if (`select $mode = 1`) + { + SET @@global.slave_parallel_mode ='conservative'; + } + --source include/start_slave.inc + --source include/sync_with_master_gtid.inc + --source include/stop_slave.inc + + --dec $mode +} + + +# Generations test. +# Create few ranges of XAP groups length of greater than +# 3 * slave_parallel_threads + 1 +# terminated upon each range. +--let $iter = 3 +--let $generation_len = @@global.slave_parallel_threads +--let $domain_num = 3 +--disable_query_log +--connection master +while ($iter) +{ + --let $k = `select 3 * 3 * $generation_len` + --let $_k = $k + while ($k) + { + --source include/count_sessions.inc + --connect(con$k,localhost,root,,) + # + # create XA events alternating gtid domains to run them in parallel on slave. + # + --let $domain_id = `SELECT $k % $domain_num` + --eval set @@gtid_domain_id = $domain_id + --eval XA START '$trx$k' + --eval INSERT INTO t1 VALUES ($k + 1, $iter) + --eval XA END '$trx$k' + --eval XA PREPARE '$trx$k' + + --disconnect con$k + --connection master + --source include/wait_until_count_sessions.inc + + --dec $k + } + + --connection master + --let $k = $_k + while ($k) + { + --let $term = COMMIT + --let $decision = `SELECT $k % 2` + if ($decision) + { + --let $term = ROLLBACK + } + --eval XA $term '$trx$k' + } + --dec $iter +} +--enable_query_log +--source include/save_master_gtid.inc + +--connection slave +SET @@global.slave_parallel_mode = 'optimistic'; +--source include/start_slave.inc +--source include/sync_with_master_gtid.inc + + +# +# Overall consistency check +# +--let $diff_tables= master:t1, slave:t1 +--source include/diff_tables.inc + + +# +# Clean up. +# +--connection slave +--source include/stop_slave.inc +SET @@global.slave_parallel_mode = @old_parallel_mode; +SET @@global.slave_parallel_threads = @old_parallel_threads; +SET @@global.slave_domain_parallel_threads = @old_slave_domain_parallel_threads; +SET @@global.slave_transaction_retries = @old_transaction_retries; +--source include/start_slave.inc + +--connection master +DROP TABLE t1; +--source include/save_master_gtid.inc + +--connection slave +--source include/sync_with_master_gtid.inc + +--connection master +--source include/rpl_end.inc diff --git a/mysql-test/suite/rpl/t/rpl_parallel_optimistic.test b/mysql-test/suite/rpl/t/rpl_parallel_optimistic.test index f5e48282..09da8c2d 100644 --- a/mysql-test/suite/rpl/t/rpl_parallel_optimistic.test +++ b/mysql-test/suite/rpl/t/rpl_parallel_optimistic.test @@ -508,6 +508,7 @@ DELETE FROM t2; # The 1st of the following two trx:s a blocker on slave --connection server_2 +SET @org_log_warnings=@@GLOBAL.LOG_WARNINGS; set global log_warnings=2; BEGIN; INSERT INTO t1 SET a=1; @@ -555,7 +556,7 @@ DELETE FROM t2; # --connection server_2 --source include/stop_slave.inc -set global log_warnings=default; +set global log_warnings=@org_log_warnings; SET GLOBAL slave_parallel_mode=@old_parallel_mode; SET GLOBAL slave_parallel_threads=@old_parallel_threads; --source include/start_slave.inc diff --git a/mysql-test/suite/rpl/t/rpl_parallel_optimistic_xa.test b/mysql-test/suite/rpl/t/rpl_parallel_optimistic_xa.test index 35c22d1e..6e5cf436 100644 --- a/mysql-test/suite/rpl/t/rpl_parallel_optimistic_xa.test +++ b/mysql-test/suite/rpl/t/rpl_parallel_optimistic_xa.test @@ -206,7 +206,6 @@ while($i > 0) # --connection slave --source include/stop_slave.inc -set global log_warnings=default; SET GLOBAL slave_parallel_mode=@old_parallel_mode; SET GLOBAL slave_parallel_threads=@old_parallel_threads; --source include/start_slave.inc diff --git a/mysql-test/suite/rpl/t/rpl_parallel_retry.test b/mysql-test/suite/rpl/t/rpl_parallel_retry.test index fe6f40d2..1e87c85c 100644 --- a/mysql-test/suite/rpl/t/rpl_parallel_retry.test +++ b/mysql-test/suite/rpl/t/rpl_parallel_retry.test @@ -411,6 +411,44 @@ DROP function foo; --sync_slave_with_master server_2 # +# MDEV-33303: slave_parallel_mode=optimistic should not report the mode's +# specific temporary errors. +# + +--connection server_2 +--source include/stop_slave.inc +SET @old_parallel_threads=@@GLOBAL.slave_parallel_threads; +SET GLOBAL slave_parallel_threads=4; + +--connection server_1 +# The problem occurred in the code path for row-based updates in tables +# with no primary/unique key, where a scan is needed. +CREATE TABLE t1 (a INT, b VARCHAR(123)) ENGINE=InnoDB; +INSERT INTO t1 VALUES(1, 'asdf'); +UPDATE t1 SET b='zxf1' WHERE a=1; +UPDATE t1 SET b='\n' WHERE a=1; + +--connection server_2 +# Inject a small sleep in the code that makes the race easier to hit. +SET @old_dbug=@@GLOBAL.debug_dbug; +SET GLOBAL debug_dbug="+d,write_row_inject_sleep_before_ha_write_row"; +--source include/start_slave.inc + +--connection server_1 +# Here, we would get errors in the slave's error log: +# [ERROR] mariadbd: Can't find record in 't1' +--sync_slave_with_master server_2 + +--connection server_1 +DROP TABLE t1; +--sync_slave_with_master server_2 +--source include/stop_slave.inc +SET GLOBAL debug_dbug=@old_dbug; +SET GLOBAL slave_parallel_threads=@old_parallel_threads; +--source include/start_slave.inc + + +# # MDEV-12746 rpl.rpl_parallel_optimistic_nobinlog fails committing out of order at retry # diff --git a/mysql-test/suite/rpl/t/rpl_parallel_sbm.test b/mysql-test/suite/rpl/t/rpl_parallel_sbm.test index 58c0db15..90753caf 100644 --- a/mysql-test/suite/rpl/t/rpl_parallel_sbm.test +++ b/mysql-test/suite/rpl/t/rpl_parallel_sbm.test @@ -1,9 +1,9 @@ # # Ensure that Seconds_Behind_Master works correctly on the parallel replica. # ---source include/master-slave.inc --source include/have_log_bin.inc --source include/have_debug.inc +--source include/master-slave.inc --echo # --echo # MDEV-29639: Seconds_Behind_Master is incorrect for Delayed, Parallel Replicas @@ -67,11 +67,18 @@ if (`SELECT $sbm_trx1_arrive > ($seconds_since_idling + 1)`) --echo # if the event is still to be delayed, SBM should resume accordingly --source include/stop_slave.inc + +--echo # Lock t1 on slave to ensure the event can't finish (and thereby update +--echo # Seconds_Behind_Master) so slow running servers don't accidentally +--echo # catch up to the master before checking SBM. +--connection server_2 +LOCK TABLES t1 WRITE; + --source include/start_slave.inc --connection slave ---echo # Waiting for replica to resume the delay for the transaction ---let $wait_condition= SELECT count(*) FROM information_schema.processlist WHERE state LIKE 'Waiting until MASTER_DELAY seconds after master executed event'; +--echo # Waiting for replica to get blocked by the table lock +--let $wait_condition= SELECT count(*) FROM information_schema.processlist WHERE state LIKE 'Waiting for table metadata lock'; --source include/wait_condition.inc --echo # Sleeping 1s to increment SBM @@ -86,6 +93,9 @@ if (`SELECT $sbm_trx1_after_1s_sleep <= $sbm_trx1_arrive`) } --echo # ..done +--connection server_2 +UNLOCK TABLES; + --source include/sync_with_master_gtid.inc --echo # diff --git a/mysql-test/suite/rpl/t/rpl_parallel_seq.test b/mysql-test/suite/rpl/t/rpl_parallel_seq.test index b1b15412..cc361a7b 100644 --- a/mysql-test/suite/rpl/t/rpl_parallel_seq.test +++ b/mysql-test/suite/rpl/t/rpl_parallel_seq.test @@ -195,4 +195,22 @@ DROP TABLE ti; --sync_slave_with_master +# MDEV-31779 server crash in Rows_log_event::update_sequence at replaying binlog +--connection master +--let $binlog_file = query_get_value(SHOW MASTER STATUS, File, 1) +--let $binlog_start = query_get_value(SHOW MASTER STATUS, Position, 1) +CREATE SEQUENCE s; +--disable_ps2_protocol +SELECT NEXTVAL(s); +--enable_ps2_protocol +flush binary logs; +DROP SEQUENCE s; +--exec $MYSQL_BINLOG $datadir/$binlog_file | $MYSQL test +DROP SEQUENCE s; + +--sync_slave_with_master + +--connection master + + --source include/rpl_end.inc diff --git a/mysql-test/suite/rpl/t/rpl_parallel_slave_bgc_kill.test b/mysql-test/suite/rpl/t/rpl_parallel_slave_bgc_kill.test index efb998b0..94f19a6c 100644 --- a/mysql-test/suite/rpl/t/rpl_parallel_slave_bgc_kill.test +++ b/mysql-test/suite/rpl/t/rpl_parallel_slave_bgc_kill.test @@ -295,13 +295,14 @@ CREATE FUNCTION foo(x INT, d1 VARCHAR(500), d2 VARCHAR(500)) || --delimiter ; SET sql_log_bin=1; - +--source include/stop_slave_io.inc --connection server_1 INSERT INTO t3 VALUES (49,0); --save_master_pos --connection server_2 -START SLAVE SQL_THREAD; +CHANGE MASTER TO master_use_gtid=no; +--source include/start_slave.inc --sync_with_master SELECT * FROM t3 WHERE a >= 40 ORDER BY a; # Restore the foo() function. @@ -334,11 +335,6 @@ SET GLOBAL slave_parallel_threads=10; --echo *** 3. Same as (2), but not using gtid mode *** ---connection server_2 ---source include/stop_slave.inc -CHANGE MASTER TO master_use_gtid=no; ---source include/start_slave.inc - --connection server_1 # Set up three transactions on the master that will be group-committed # together so they can be replicated in parallel on the slave. diff --git a/mysql-test/suite/rpl/t/rpl_parallel_stop_slave.test b/mysql-test/suite/rpl/t/rpl_parallel_stop_slave.test index 35879e98..1610292e 100644 --- a/mysql-test/suite/rpl/t/rpl_parallel_stop_slave.test +++ b/mysql-test/suite/rpl/t/rpl_parallel_stop_slave.test @@ -57,6 +57,7 @@ COMMIT; INSERT INTO t3 VALUES(21, 21); INSERT INTO t3 VALUES(22, 22); --save_master_pos +--let $master_pos= query_get_value(SHOW MASTER STATUS, Position, 1) # Start a connection that will block the replicated transaction halfway. --connection con_temp1 @@ -64,7 +65,27 @@ BEGIN; INSERT INTO t2 VALUES (21); --connection server_2 -START SLAVE; + +# +# Parallel replication will complete any in-progress event group at STOP SLAVE, +# but only if the event group is already queued up for the worker thread. If +# the SQL driver thread is delayed in queueing up events, the parallel worker +# thread can abort the event group, leaving the non-transactional update to the +# MyISAM table that cannot be rolled back (MDEV-7432). If this happens the test +# would fail with duplicate key error after slave restart. +# +# To avoid this, we here wait for the IO thread to read all master events, and +# for the SQL driver thread to queue all the events for workers. This wait +# should be removed if/when MDEV-7432 is fixed. +# +START SLAVE IO_THREAD; +--let $slave_param= Read_Master_Log_Pos +--let $slave_param_value= $master_pos +--source include/wait_for_slave_param.inc +START SLAVE SQL_THREAD; +--let $wait_condition= SELECT COUNT(*)=1 FROM INFORMATION_SCHEMA.PROCESSLIST WHERE State LIKE '%Slave has read all relay log; waiting for more updates%' +--source include/wait_condition.inc + # Wait for the MyISAM change to be visible, after which replication will wait # for con_temp1 to roll back. --let $wait_condition= SELECT COUNT(*) = 1 FROM t1 WHERE a=20 diff --git a/mysql-test/suite/rpl/t/rpl_parallel_temptable.test b/mysql-test/suite/rpl/t/rpl_parallel_temptable.test index 3684763d..8bab4af2 100644 --- a/mysql-test/suite/rpl/t/rpl_parallel_temptable.test +++ b/mysql-test/suite/rpl/t/rpl_parallel_temptable.test @@ -264,6 +264,30 @@ SET GLOBAL slave_parallel_mode=@old_mode; --source include/start_slave.inc +--echo *** MDEV33426: Memory allocation accounting incorrect for replicated temptable +--connection server_1 +CREATE TEMPORARY TABLE t5 (a int) ENGINE=Aria; +CREATE TEMPORARY TABLE t6 (a int) ENGINE=Heap; +INSERT INTO t5 VALUES (1); +INSERT INTO t6 VALUES (2); +--save_master_pos + +--connection server_2 +--sync_with_master +--source include/stop_slave.inc + +--connection server_1 +INSERT INTO t1 SELECT a+40, 5 FROM t5; +INSERT INTO t1 SELECT a+40, 6 FROM t6; +DROP TABLE t5, t6; + +--save_master_pos + +--connection server_2 +--source include/start_slave.inc +--sync_with_master +SELECT * FROM t1 WHERE a>=40 ORDER BY a; + # Clean up. --connection server_2 diff --git a/mysql-test/suite/rpl/t/rpl_relay_max_extension.test b/mysql-test/suite/rpl/t/rpl_relay_max_extension.test index acca2f69..b6a8ccb5 100644 --- a/mysql-test/suite/rpl/t/rpl_relay_max_extension.test +++ b/mysql-test/suite/rpl/t/rpl_relay_max_extension.test @@ -47,7 +47,7 @@ RESET SLAVE; --echo # --let $datadir = `select @@datadir` ---exec echo "wait" > $MYSQLTEST_VARDIR/tmp/mysqld.2.expect +--write_line wait $MYSQLTEST_VARDIR/tmp/mysqld.2.expect --shutdown_server 10 --source include/wait_until_disconnected.inc @@ -64,7 +64,7 @@ RESET SLAVE; --echo # Restart slave server --echo # ---exec echo "restart" > $MYSQLTEST_VARDIR/tmp/mysqld.2.expect +--write_line restart $MYSQLTEST_VARDIR/tmp/mysqld.2.expect --enable_reconnect --source include/wait_until_connected_again.inc SET @save_slave_parallel_threads= @@GLOBAL.slave_parallel_threads; diff --git a/mysql-test/suite/rpl/t/rpl_rewrite_db_sys_vars.test b/mysql-test/suite/rpl/t/rpl_rewrite_db_sys_vars.test index 27710063..fab78a92 100644 --- a/mysql-test/suite/rpl/t/rpl_rewrite_db_sys_vars.test +++ b/mysql-test/suite/rpl/t/rpl_rewrite_db_sys_vars.test @@ -19,15 +19,8 @@ create database replica_db1; create database y; # This DB will be rewrited from test case create database test_replica; -SELECT @@GLOBAL.replicate_rewrite_db; let $rewrite_db_sss= query_get_value(SHOW SLAVE STATUS, Replicate_Rewrite_DB, 1); ---echo # Ensuring SHOW SLAVE STATUS produces correct value for Replicate_Rewrite_DB... -if (`SELECT strcmp(@@global.replicate_rewrite_db, "$rewrite_db_sss") != 0`) -{ - die SHOW SLAVE STATUS Replicate_Rewrite_DB value $rewrite_db_sss does not match variable @@GLOBAL.replicate_rewrite_db; -} ---echo # ...success - +eval SELECT @@GLOBAL.replicate_rewrite_db, '$rewrite_db_sss' as 'Replicate_Rewrite_DB from SHOW SLAVE STATUS'; --echo # Create DBs and tables on primary connection master; @@ -63,16 +56,10 @@ source include/stop_slave.inc; SET @save_replicate_rewrite_db = @@GLOBAL.replicate_rewrite_db; SELECT @@GLOBAL.replicate_rewrite_db; SET @@GLOBAL.replicate_rewrite_db="test_master->test_replica"; -SELECT @@GLOBAL.replicate_rewrite_db; SHOW DATABASES like 'test_replica'; source include/start_slave.inc; let $rewrite_db_sss= query_get_value(SHOW SLAVE STATUS, Replicate_Rewrite_DB, 1); ---echo # Ensuring SHOW SLAVE STATUS produces correct value for Replicate_Rewrite_DB... -if (`SELECT strcmp(@@global.replicate_rewrite_db, "$rewrite_db_sss") != 0`) -{ - die SHOW SLAVE STATUS Replicate_Rewrite_DB value $rewrite_db_sss does not match variable @@GLOBAL.replicate_rewrite_db; -} ---echo # ...success +eval SELECT @@GLOBAL.replicate_rewrite_db, '$rewrite_db_sss' as 'Replicate_Rewrite_DB from SHOW SLAVE STATUS'; --echo # Create DB and tables on primary connection master; @@ -101,6 +88,9 @@ select * from test_replica.my_table; --source include/diff_tables.inc --echo # Update of values on primary for DB not set in replication_rewrite_db +--source include/stop_slave.inc +--source include/reset_slave.inc + connection master; use x; insert into my_table values (314); @@ -108,9 +98,6 @@ select * from my_table; --source include/save_master_gtid.inc connection slave; ---let $rpl_only_running_threads=1 ---source include/stop_slave.inc ---source include/reset_slave.inc --source include/start_slave.inc SELECT @@GLOBAL.replicate_rewrite_db; --source include/sync_with_master_gtid.inc diff --git a/mysql-test/suite/rpl/t/rpl_row_find_row_debug.test b/mysql-test/suite/rpl/t/rpl_row_find_row_debug.test index e3edabe2..f06b2982 100644 --- a/mysql-test/suite/rpl/t/rpl_row_find_row_debug.test +++ b/mysql-test/suite/rpl/t/rpl_row_find_row_debug.test @@ -51,7 +51,9 @@ DROP TABLE t1; # cleanup --source include/stop_slave.inc SET @@GLOBAL.debug_dbug = @saved_dbug; +--disable_query_log --eval SET GLOBAL log_warnings = $log_warnings_save +--enable_query_log --source include/start_slave.inc --source include/rpl_end.inc diff --git a/mysql-test/suite/rpl/t/rpl_semi_sync_cond_var_per_thd.cnf b/mysql-test/suite/rpl/t/rpl_semi_sync_cond_var_per_thd.cnf new file mode 100644 index 00000000..e8e03e71 --- /dev/null +++ b/mysql-test/suite/rpl/t/rpl_semi_sync_cond_var_per_thd.cnf @@ -0,0 +1,10 @@ +!include ../my.cnf + +[mysqld.1] +log-warnings=9 +rpl_semi_sync_master_enabled=1 +rpl_semi_sync_master_wait_point=AFTER_COMMIT + +[mysqld.2] +log-warnings=9 +rpl_semi_sync_slave_enabled=1 diff --git a/mysql-test/suite/rpl/t/rpl_semi_sync_cond_var_per_thd.test b/mysql-test/suite/rpl/t/rpl_semi_sync_cond_var_per_thd.test new file mode 100644 index 00000000..65b6e8b4 --- /dev/null +++ b/mysql-test/suite/rpl/t/rpl_semi_sync_cond_var_per_thd.test @@ -0,0 +1,77 @@ +# +# This test ensures that, when using semi-sync with the wait_point +# AFTER_COMMIT, each thread awaiting an ACK is only woken up when its ACK (or +# an ACK for a later commit in binlog) has been received from the slave. +# +# Prior to MDEV-33551, all threads would be woken up for each ACK received, +# leading to large slowdowns, as each thread would check if the ACK was for it +# in mutual exclusion from the others. +# +# To ensure this, a debug-build-only log warning is added into +# Repl_semi_sync_master::commit_trx() at wakeup time, which will complain if +# the awoken thread's binlog wait coordinates are after the coordinate of the +# last ACK coordinates. Then, we use binlog group commit to commit a series of +# transactions, such that each will await an ACK concurrently. After all +# transactions have been finished (i.e. ACKed and committed), we check the log +# for the expected absence of the added debug warning message. +# +# +# References: +# MDEV-33551: Semi-sync Wait Point AFTER_COMMIT Slow on Workloads with Heavy +# Concurrency +# +--source include/have_binlog_format_row.inc +--source include/have_debug.inc +--source include/master-slave.inc + +--connection master +call mtr.add_suppression("Got an error reading communication packets"); +set @save_bgc_count= @@global.binlog_commit_wait_count; +set @save_bgc_usec= @@global.binlog_commit_wait_usec; +set @save_debug_dbug= @@global.debug_dbug; +set @@global.binlog_commit_wait_count=3; +set @@global.binlog_commit_wait_usec=10000000; +set @@global.debug_dbug="+d,testing_cond_var_per_thd"; + +--echo # Ensure semi-sync is on +--connection slave +let $status_var= rpl_semi_sync_slave_status; +let $status_var_value= ON; +source include/wait_for_status_var.inc; + +--connection master +let $status_var= rpl_semi_sync_master_status; +let $status_var_value= ON; +source include/wait_for_status_var.inc; + +--echo # Create three transactions to binlog group commit together +--connection master +--send create table t1 (a int) +--connection server_1 +--send create table t2 (a int) +--connection default +--send create table t3 (a int) + +--connection master +--reap +--connection server_1 +--reap +--connection default +--reap + +--let $assert_text= Check that there is no 'Thread awaiting semi-sync ACK was awoken before its ACK' warning in error log. +--let $assert_select=Thread awaiting semi-sync ACK was awoken before its ACK +--let $assert_file= $MYSQLTEST_VARDIR/log/mysqld.1.err +--let $assert_count= 0 +--let $assert_only_after=CURRENT_TEST +--source include/assert_grep.inc + +--echo # +--echo # Cleanup +--connection master +set @@global.binlog_commit_wait_count=@save_bgc_count; +set @@global.binlog_commit_wait_usec=@save_bgc_usec; +set @@global.debug_dbug=@save_debug_dbug; +drop table t1, t2, t3; + +--source include/rpl_end.inc diff --git a/mysql-test/suite/rpl/t/rpl_semi_sync_crash.inc b/mysql-test/suite/rpl/t/rpl_semi_sync_crash.inc index 01b0d0e5..bb705d39 100644 --- a/mysql-test/suite/rpl/t/rpl_semi_sync_crash.inc +++ b/mysql-test/suite/rpl/t/rpl_semi_sync_crash.inc @@ -34,14 +34,16 @@ if ($case == 1) if ($case == 2) { SET DEBUG_SYNC= "commit_before_get_LOCK_commit_ordered SIGNAL con1_ready WAIT_FOR con1_go"; + SET DEBUG_SYNC= "commit_after_release_LOCK_after_binlog_sync WAIT_FOR con1_go1"; --send_eval $query_to_crash --connect (conn_client_2,127.0.0.1,root,,test,$SERVER_MYPORT_2,) # use the same signal with $query_to_crash SET DEBUG_SYNC= "now WAIT_FOR con1_ready"; - SET GLOBAL debug_dbug="d,Notify_binlog_EOF"; + SET DEBUG_SYNC= "commit_before_get_LOCK_after_binlog_sync SIGNAL con1_go"; + SET DEBUG_SYNC= "commit_before_get_LOCK_commit_ordered SIGNAL con2_ready"; --send_eval $query2_to_crash --connection server_$server_to_crash - SET DEBUG_SYNC= "now WAIT_FOR eof_reached"; + SET DEBUG_SYNC= "now WAIT_FOR con2_ready"; --source include/kill_mysqld.inc } @@ -68,9 +70,7 @@ source include/wait_for_slave_param.inc; --error 2003 --source include/stop_slave.inc ---let $assert_cond= COUNT(*) = $expected_rows_on_slave FROM t1 ---let $assert_text= Table t1 should have $expected_rows_on_slave rows. ---source include/assert.inc +--eval select count(*) 'on slave must be $expected_rows_on_slave' from t1 SELECT @@GLOBAL.gtid_current_pos; @@ -81,9 +81,21 @@ SELECT @@GLOBAL.gtid_current_pos; --enable_reconnect --source include/wait_until_connected_again.inc ---let $assert_cond= COUNT(*) = $expected_rows_on_master FROM t1 ---let $assert_text= Table t1 should have $expected_rows_on_master rows. ---source include/assert.inc +--let $slave_semi_sync_enabled= query_get_value(SHOW VARIABLES LIKE 'Rpl_semi_sync_slave_enabled', Value, 1) +--echo # Ensuring variable rpl_semi_sync_slave_enabled is ON.. +if (`SELECT strcmp("ON", "$slave_semi_sync_enabled") != 0`) +{ + --die Slave started with rpl_semi_sync_slave_enabled=1 yet it is OFF in the variable output +} + +--let $slave_semi_sync_status= query_get_value(SHOW STATUS LIKE 'Rpl_semi_sync_slave_status', Value, 1) +--echo # Ensuring status rpl_semi_sync_slave_status is OFF.. +if (`SELECT strcmp("OFF", "$slave_semi_sync_status") != 0`) +{ + --die Slave started with skip-slave-start yet started with rpl_semi_sync_slave_status=ON +} + +--eval select count(*) 'on master must be $expected_rows_on_master' from t1 # Check error log for correct messages. let $log_error_ = $MYSQLTEST_VARDIR/log/mysqld.$server_to_crash.err; diff --git a/mysql-test/suite/rpl/t/rpl_semi_sync_fail_over.test b/mysql-test/suite/rpl/t/rpl_semi_sync_fail_over.test index 17d7b50d..e4d0bc0a 100644 --- a/mysql-test/suite/rpl/t/rpl_semi_sync_fail_over.test +++ b/mysql-test/suite/rpl/t/rpl_semi_sync_fail_over.test @@ -10,6 +10,11 @@ --source include/have_binlog_format_row.inc --source include/master-slave.inc +--disable_query_log +call mtr.add_suppression("Could not read packet:.* errno: 11"); +flush tables; +--enable_query_log + # Initial slave --connection server_2 --source include/stop_slave.inc @@ -78,7 +83,7 @@ SHOW VARIABLES LIKE 'gtid_slave_pos'; --connection server_1 --sync_with_master ---eval SELECT COUNT(*) = $rows_so_far as 'true' FROM t1 +--eval SELECT COUNT(*) 'must be $rows_so_far' FROM t1 --echo # ... the gtid states on the slave: SHOW VARIABLES LIKE 'gtid_slave_pos'; SHOW VARIABLES LIKE 'gtid_binlog_pos'; @@ -129,7 +134,7 @@ SHOW VARIABLES LIKE 'gtid_slave_pos'; --connection server_2 --sync_with_master ---eval SELECT COUNT(*) = $rows_so_far as 'true' FROM t1 +--eval SELECT COUNT(*) 'must be $rows_so_far as' FROM t1 --echo # ... the gtid states on the slave: SHOW VARIABLES LIKE 'gtid_slave_pos'; SHOW VARIABLES LIKE 'gtid_binlog_pos'; @@ -180,7 +185,7 @@ SHOW VARIABLES LIKE 'gtid_slave_pos'; --connection server_1 --source include/sync_with_master_gtid.inc ---eval SELECT COUNT(*) = $rows_so_far as 'true' FROM t1 +--eval SELECT COUNT(*) 'must be $rows_so_far' FROM t1 --echo # ... the gtid states on the slave: SHOW VARIABLES LIKE 'gtid_slave_pos'; SHOW VARIABLES LIKE 'gtid_binlog_pos'; diff --git a/mysql-test/suite/rpl/t/rpl_semi_sync_master_shutdown.test b/mysql-test/suite/rpl/t/rpl_semi_sync_master_shutdown.test index 05e6fcca..d653d84c 100644 --- a/mysql-test/suite/rpl/t/rpl_semi_sync_master_shutdown.test +++ b/mysql-test/suite/rpl/t/rpl_semi_sync_master_shutdown.test @@ -4,19 +4,17 @@ # finishes off as specified in particular trying to connect even to a shut down # master for a semisync firewell routine. -source include/not_embedded.inc; -source include/have_debug.inc; source include/master-slave.inc; --connection master ---let $sav_enabled_master=`SELECT @@GLOBAL.rpl_semi_sync_master_enabled ` +--let $sav_enabled_master=`SELECT @@GLOBAL.rpl_semi_sync_master_enabled` SET @@GLOBAL.rpl_semi_sync_master_enabled = 1; --connection slave source include/stop_slave.inc; ---let $sav_enabled_slave=`SELECT @@GLOBAL.rpl_semi_sync_slave_enabled ` -SET @@GLOBAL. rpl_semi_sync_slave_enabled = 1; +--let $sav_enabled_slave=`SELECT @@GLOBAL.rpl_semi_sync_slave_enabled` +SET @@GLOBAL.rpl_semi_sync_slave_enabled = 1; source include/start_slave.inc; --connection master @@ -52,7 +50,6 @@ source include/rpl_start_server.inc; #--source include/start_slave.inc --connection master -SET @@GLOBAL.debug_dbug=""; --eval SET @@GLOBAL. rpl_semi_sync_master_enabled = $sav_enabled_master --connection master diff --git a/mysql-test/suite/rpl/t/rpl_semi_sync_shutdown_await_ack.cnf b/mysql-test/suite/rpl/t/rpl_semi_sync_shutdown_await_ack.cnf index 2cf1b178..5f69b557 100644 --- a/mysql-test/suite/rpl/t/rpl_semi_sync_shutdown_await_ack.cnf +++ b/mysql-test/suite/rpl/t/rpl_semi_sync_shutdown_await_ack.cnf @@ -1,13 +1,13 @@ !include ../my.cnf [mysqld.1] -log_warnings=9 +log_warnings=3 [mysqld.2] -log_warnings=9 +log_warnings=3 [mysqld.3] -log_warnings=9 +log_warnings=3 [ENV] SERVER_MYPORT_3= @mysqld.3.port diff --git a/mysql-test/suite/rpl/t/rpl_semi_sync_shutdown_await_ack.inc b/mysql-test/suite/rpl/t/rpl_semi_sync_shutdown_await_ack.inc index 252541ae..d20ef628 100644 --- a/mysql-test/suite/rpl/t/rpl_semi_sync_shutdown_await_ack.inc +++ b/mysql-test/suite/rpl/t/rpl_semi_sync_shutdown_await_ack.inc @@ -3,7 +3,6 @@ # replicas before shutting down. # # Parameters: -# server_1_dbug (string) Debug setting for primary (server 1) # server_2_dbug (string) Debug setting to simulate delay or error on # the first replica (server 2) # server_3_dbug (string) Debug setting to simulate delay or error on @@ -32,8 +31,8 @@ while (`SELECT $i <= $slave_last`) --connection server_$i --sync_with_master - set global rpl_semi_sync_slave_enabled = 1; source include/stop_slave.inc; + set global rpl_semi_sync_slave_enabled = 1; source include/start_slave.inc; show status like 'Rpl_semi_sync_slave_status'; @@ -56,8 +55,6 @@ show status like 'Rpl_semi_sync_master_status'; show status like 'Rpl_semi_sync_master_clients'; --echo #-- Prepare servers to simulate delay or error ---connection server_1 ---eval SET @@GLOBAL.debug_dbug= $server_1_dbug --connection server_2 --eval SET @@GLOBAL.debug_dbug= $server_2_dbug --connection server_3 @@ -66,6 +63,11 @@ show status like 'Rpl_semi_sync_master_clients'; --echo #-- --echo #-- Test begins +--connection server_1_con2 +--write_file $MYSQLTEST_VARDIR/tmp/mysqld.1.expect +wait +EOF + --connection server_1 --echo #-- Begin semi-sync transaction --send INSERT INTO t1 VALUES (1) @@ -75,25 +77,13 @@ show status like 'Rpl_semi_sync_master_clients'; let $status_var= Rpl_semi_sync_master_wait_sessions; let $status_var_value= 1; source include/wait_for_status_var.inc; -show status like 'Rpl_semi_sync_master_wait_sessions'; - ---write_file $MYSQLTEST_VARDIR/tmp/mysqld.1.expect -wait -EOF - ---echo #-- Give enough time after timeout/ack received to query yes_tx/no_tx -SET @@GLOBAL.debug_dbug= "+d,delay_shutdown_phase_2_after_semisync_wait"; --echo #-- Begin master shutdown ---send SHUTDOWN WAIT FOR ALL SLAVES +SHUTDOWN WAIT FOR ALL SLAVES; +--source include/wait_until_disconnected.inc --connection server_1 ---reap ---echo #-- Ensure either ACK was received (yes_tx=1) or timeout (no_tx=1) -show status like 'Rpl_semi_sync_master_yes_tx'; -show status like 'Rpl_semi_sync_master_no_tx'; - ---connection server_1_con2 +--error 2013 --reap --source include/wait_until_disconnected.inc @@ -111,19 +101,72 @@ show status like 'Rpl_semi_sync_master_no_tx'; --echo # --echo #-- Re-synchronize slaves with master and disable semi-sync +if (`SELECT ($server_2_expect_row_count + $server_3_expect_row_count) < 2`) +{ +--echo #-- FIXME: workaround for MDEV-28141, preventing errored replicas from +--echo # killing their semi-sync connections +# I.e. we can't create a new kill connection to the primary if we know that the +# primary is shutting down for risk of Packets out of order error. So we wait +# to hit a debug_sync point before the creation of the new kill_connection, and +# don't progress until the primary has been shutdown, so no new connection can +# be formed. +# Note this is only needed in the error case (using corrupt_queue_event), as +# the running io_thread will otherwise automatically detect that the primary +# has shutdown before progressing to the cleanup of the io thread. +} + +if (!$server_2_expect_row_count) +{ + --connection server_2 + set debug_sync= "now wait_for at_semisync_kill_connection"; + set debug_sync= "now signal continue_semisync_kill_connection"; + --echo # Wait for debug_sync signal to have been received before issuing RESET + let $wait_condition= select count(*)=0 from information_schema.processlist where state like "debug sync point%"; + source include/wait_condition.inc; + set debug_sync= "reset"; +} +if (!$server_3_expect_row_count) +{ + --connection server_3 + set debug_sync= "now wait_for at_semisync_kill_connection"; + set debug_sync= "now signal continue_semisync_kill_connection"; + --echo # Wait for debug_sync signal to have been received before issuing RESET + let $wait_condition= select count(*)=0 from information_schema.processlist where state like "debug sync point%"; + source include/wait_condition.inc; + set debug_sync= "reset"; +} + --echo #-- Stop slaves --connection server_2 ---eval SET @@GLOBAL.debug_dbug= "$sav_server_2_dbug" ---eval SET @@GLOBAL.rpl_semi_sync_slave_enabled= 0 ---let $rpl_only_running_threads= 1 -source include/stop_slave.inc; +# If server_2_expect_row_count is 0, we are simulating an error on the replica +# and the IO thread will end with errno 1595. +# Otherwise, we still expect error, because the master has shutdown at this +# point, and the IO thread may or may not have realized the shutdown, and +# started to try to automatically reconnect. This may result in the IO thread +# giving a 2003 error if the slave tries to reconnect to a shutdown master. +# Additionally disable warnings because the slave may have stopped in err +# automatically, and we don't want a sporadic "Slave is already stopped" +# warning. +--disable_warnings +--let $rpl_allow_error= 1 +--source include/stop_slave_io.inc +--enable_warnings +--let $rpl_allow_error= +--source include/stop_slave_sql.inc +SET @@GLOBAL.debug_dbug= @sav_server_2_dbug; +SET @@GLOBAL.rpl_semi_sync_slave_enabled= @sav_enabled_server_2; --connection server_3 ---eval SET @@GLOBAL.debug_dbug= "$sav_server_3_dbug" ---eval SET @@GLOBAL.rpl_semi_sync_slave_enabled= 0 ---let $rpl_only_running_threads= 1 -source include/stop_slave.inc; +# Expect error for IO thread, see above comment for stopping server_2 +--disable_warnings +--let $rpl_allow_error= 1 +--source include/stop_slave_io.inc +--enable_warnings +--let $rpl_allow_error= +--source include/stop_slave_sql.inc +SET @@GLOBAL.debug_dbug= @sav_server_3_dbug; +SET @@GLOBAL.rpl_semi_sync_slave_enabled= @sav_enabled_server_3; --echo #-- Bring the master back up --connection server_1_con2 @@ -142,13 +185,6 @@ EOF --enable_reconnect --source include/wait_until_connected_again.inc ---eval SET @@GLOBAL.debug_dbug= "$sav_master_dbug" -let $status_var= Rpl_semi_sync_master_clients; -let $status_var_value= 0; -source include/wait_for_status_var.inc; ---eval SET @@GLOBAL.rpl_semi_sync_master_enabled = 0 -show status like 'Rpl_semi_sync_master_status'; - TRUNCATE TABLE t1; --save_master_pos diff --git a/mysql-test/suite/rpl/t/rpl_semi_sync_shutdown_await_ack.test b/mysql-test/suite/rpl/t/rpl_semi_sync_shutdown_await_ack.test index 6c088c57..4ed9ca0a 100644 --- a/mysql-test/suite/rpl/t/rpl_semi_sync_shutdown_await_ack.test +++ b/mysql-test/suite/rpl/t/rpl_semi_sync_shutdown_await_ack.test @@ -5,7 +5,7 @@ # This test validates that data is consistent between a primary and replica # in semi-sync mode when the primary is issued `SHUTDOWN WAIT FOR SLAVES` # during an active communication. More specifically, the primary should not -# kill the connection until it is sure a replica has received all binlog +# kill the Ack_thread until it is sure a replica has received all binlog # data, i.e. once the primary receives the ACK. If a primary is issued a # shutdown before receiving an ACK, it should wait until either 1) the ACK is # received, or 2) the configured timeout (rpl_semi_sync_master_timeout) is @@ -15,23 +15,18 @@ # Using a topology consisting of one primary with two replicas, all in # semi-sync mode, we use DEBUG_DBUG to simulate an error or delay on the # replicas during an active communication while the primary is issued -# `SHUTDOWN WAIT FOR SLAVES`. We create four test cases to ensure the primary -# will correctly wait for the communication to finish, and use the semi-sync -# status variables Rpl_semi_sync_master_yes_tx and Rpl_semi_sync_master_no_tx -# to ensure the connection was not prematurely killed due to the shutdown. +# `SHUTDOWN WAIT FOR SLAVES`. We create four test cases to ensure the +# Ack_thread is not prematurely killed due to the shutdown. # Test Case 1) If both replicas simulate a delay that is within the allowed -# timeout, the primary should delay killing the suspended thread -# until an ACK is received (Rpl_semi_sync_master_yes_tx should -# be 1). +# timeout, the primary should delay killing the Ack_thread +# until an ACK is received. # Test Case 2) If both replicas simulate an error before sending an ACK, the -# primary should delay killing the suspended thread until the -# the timeout is reached (Rpl_semi_sync_master_no_tx should be -# 1). +# primary should delay killing the Ack_thread until the +# the timeout is reached. # Test Case 3) If one replica simulates a delay within the allowed timeout # and the other simulates an error before sending an ACK, the -# primary should delay killing the suspended thread until it -# receives an ACK from the delayed slave -# (Rpl_semi_sync_master_yes_tx should be 1). +# primary should delay killing the Ack_thread until it +# receives an ACK from the delayed slave. # Test Case 4) If a replica errors before sending an ACK, it will cause the # IO thread to stop and handle the error. During error handling, # if semi-sync is active, the replica will form a new connection @@ -41,9 +36,11 @@ # slave should notice this, and not issue a `QUIT` command to # the primary, which would otherwise be sent to kill an active # connection. This test case validates that the slave does not -# send a `QUIT` in this case (Rpl_semi_sync_master_yes_tx should -# be 1 because server_3 will send the ACK within a valid -# timeout). +# send a `QUIT` in this case. +# Test Case 5) If a waiting-for-ACK user thread is killed (disconnected) +# during SHUTDOWN WAIT FOR ALL SLAVES, ensure the primary will +# still await the ACK from the replica before killing the +# Ack_thread. # # References: # MDEV-11853: semisync thread can be killed after sync binlog but before ACK @@ -58,6 +55,7 @@ --echo # Note: Simulated slave delay is hardcoded to 800 milliseconds --echo # Note: Simulated master shutdown delay is hardcoded to 500 milliseconds +--source include/have_innodb.inc --source include/have_debug.inc --let $rpl_topology=1->2, 1->3 --source include/rpl_init.inc @@ -69,9 +67,8 @@ call mtr.add_suppression("Timeout waiting"); call mtr.add_suppression("did not exit"); call mtr.add_suppression("Got an error reading communication packets"); ---let $sav_master_timeout= `SELECT @@global.rpl_semi_sync_master_timeout` +--let $sav_master_timeout= `SELECT @@GLOBAL.rpl_semi_sync_master_timeout` --let $sav_enabled_master= `SELECT @@GLOBAL.rpl_semi_sync_master_enabled` ---let $sav_master_dbug= `SELECT @@GLOBAL.debug_dbug` --echo # Suppress slave errors related to the simulated error --connection server_2 @@ -79,19 +76,19 @@ call mtr.add_suppression("reply failed"); call mtr.add_suppression("Replication event checksum verification"); call mtr.add_suppression("Relay log write failure"); call mtr.add_suppression("Failed to kill the active semi-sync connection"); ---let $sav_enabled_server_2=`SELECT @@GLOBAL.rpl_semi_sync_slave_enabled` ---let $sav_server_2_dbug= `SELECT @@GLOBAL.debug_dbug` +set @sav_enabled_server_2= @@GLOBAL.rpl_semi_sync_slave_enabled; +set @sav_server_2_dbug= @@GLOBAL.debug_dbug; --connection server_3 call mtr.add_suppression("reply failed"); call mtr.add_suppression("Replication event checksum verification"); call mtr.add_suppression("Relay log write failure"); call mtr.add_suppression("Failed to kill the active semi-sync connection"); ---let $sav_enabled_server_3=`SELECT @@GLOBAL.rpl_semi_sync_slave_enabled` ---let $sav_server_3_dbug= `SELECT @@GLOBAL.debug_dbug` +set @sav_enabled_server_3= @@GLOBAL.rpl_semi_sync_slave_enabled; +set @sav_server_3_dbug= @@GLOBAL.debug_dbug; --connection server_1 -CREATE TABLE t1 (a int); +CREATE TABLE t1 (a int) engine=innodb; --save_master_pos --let i= 2 @@ -113,10 +110,9 @@ while (`SELECT $i <= $slave_last`) --echo # --echo # Test Case 1) If both replicas simulate a delay that is within the ---echo # allowed timeout, the primary should delay killing the suspended thread ---echo # until an ACK is received (Rpl_semi_sync_master_yes_tx should be 1). +--echo # allowed timeout, the primary should delay killing the Ack_thread +--echo # until an ACK is received. --echo # ---let server_1_dbug= "" --let server_2_dbug= "+d,simulate_delay_semisync_slave_reply" --let server_3_dbug= "+d,simulate_delay_semisync_slave_reply" --let semisync_timeout= 1600 @@ -126,12 +122,11 @@ while (`SELECT $i <= $slave_last`) --echo # --echo # Test Case 2) If both replicas simulate an error before sending an ACK, ---echo # the primary should delay killing the suspended thread until the ---echo # timeout is reached (Rpl_semi_sync_master_no_tx should be 1). +--echo # the primary should delay killing the Ack_thread until the +--echo # timeout is reached. --echo # ---let server_1_dbug= "+d,mysqld_delay_kill_threads_phase_1" ---let server_2_dbug= "+d,corrupt_queue_event" ---let server_3_dbug= "+d,corrupt_queue_event" +--let server_2_dbug= "+d,corrupt_queue_event,delay_semisync_kill_connection_for_mdev_28141" +--let server_3_dbug= "+d,corrupt_queue_event,delay_semisync_kill_connection_for_mdev_28141" --let semisync_timeout= 500 --let server_2_expect_row_count= 0 --let server_3_expect_row_count= 0 @@ -140,11 +135,10 @@ while (`SELECT $i <= $slave_last`) --echo # --echo # Test Case 3) If one replica simulates a delay within the allowed --echo # timeout and the other simulates an error before sending an ACK, the ---echo # primary should delay killing the suspended thread until it receives an ---echo # ACK from the delayed slave (Rpl_semi_sync_master_yes_tx should be 1). +--echo # primary should delay killing the Ack_thread until it receives an +--echo # ACK from the delayed slave. --echo # ---let server_1_dbug= "+d,mysqld_delay_kill_threads_phase_1" ---let server_2_dbug= "+d,corrupt_queue_event" +--let server_2_dbug= "+d,corrupt_queue_event,delay_semisync_kill_connection_for_mdev_28141" --let server_3_dbug= "+d,simulate_delay_semisync_slave_reply" --let semisync_timeout= 1600 --let server_2_expect_row_count= 0 @@ -160,56 +154,123 @@ while (`SELECT $i <= $slave_last`) --echo # active semi-sync connection in-tact. The slave should notice this, and --echo # not issue a `QUIT` command to the primary, which would otherwise be --echo # sent to kill an active connection. This test case validates that the ---echo # slave does not send a `QUIT` in this case (Rpl_semi_sync_master_yes_tx ---echo # should be 1 because server_3 will send the ACK within a valid timeout). +--echo # slave does not send a `QUIT` in this case. --echo # - -# mysqld_delay_kill_threads_phase1 ensures that server_2 will have enough time -# to start a new connection that has the intent to kill the active semi-sync -# connection ---let server_1_dbug= "+d,mysqld_delay_kill_threads_phase_1" - -# slave_delay_killing_semisync_connection ensures that the primary has force -# killed its current connection before it is able to issue `KILL` ---let server_2_dbug= "+d,corrupt_queue_event,slave_delay_killing_semisync_connection" +--let server_2_dbug= "+d,corrupt_queue_event,delay_semisync_kill_connection_for_mdev_28141" --let server_3_dbug= "+d,simulate_delay_semisync_slave_reply" --let semisync_timeout= 1600 --let server_2_expect_row_count= 0 --let server_3_expect_row_count= 1 --source rpl_semi_sync_shutdown_await_ack.inc +# +# Added with MDEV-33551 +# +--echo # +--echo # Test Case 5) If a waiting-for-ACK user thread is killed (disconnected) +--echo # during SHUTDOWN WAIT FOR ALL SLAVES, ensure the primary will still +--echo # await the ACK from the replica before killing the Ack_receiver thread +--echo # +--connection server_1 +insert into t1 values (1); +--source include/save_master_gtid.inc + +--connection server_2 +--source include/sync_with_master_gtid.inc +--source include/stop_slave.inc +SET GLOBAL rpl_semi_sync_slave_enabled= 1; +--source include/start_slave.inc + +--connection server_1 +SET GLOBAL rpl_semi_sync_master_enabled= 1; +SET GLOBAL rpl_semi_sync_master_timeout= 2000; + +--let $status_var= Rpl_semi_sync_master_clients +--let $status_var_value= 1 +source include/wait_for_status_var.inc; + +show status like 'Rpl_semi_sync_master_status'; +show status like 'Rpl_semi_sync_master_clients'; + +--connection server_2 +SET @old_dbug= @@GLOBAL.debug_dbug; +SET GLOBAL debug_dbug="+d,simulate_delay_semisync_slave_reply"; + +--connect(con1, localhost, root,,) +--connect(con2, localhost, root,,) + +--connection con1 +--send insert into t1 values (2) + +--connection server_1 +--echo # Wait for thd to begin semi-sync wait.. +--let $wait_condition= SELECT COUNT(*) = 1 FROM information_schema.processlist WHERE state = 'Waiting for semi-sync ACK from slave' +--source include/wait_condition.inc +--echo # ..done + +--disconnect con1 + +--connection default +--write_file $MYSQLTEST_VARDIR/tmp/mysqld.1.expect +wait +EOF + +--connection con2 +SHUTDOWN WAIT FOR ALL SLAVES; +--source include/wait_until_disconnected.inc + +--echo # Ensure the primary waited for the ACK of the killed thread +--let $SEARCH_PATTERN= Delaying shutdown to await semi-sync ACK +--let $SEARCH_FILE= $MYSQLTEST_VARDIR/log/mysqld.1.err +--source include/search_pattern_in_file.inc + +--connection default +--source include/wait_until_disconnected.inc + +--connection server_1 +--source include/wait_until_disconnected.inc + +--connection server_2 +--let $rpl_allow_error= 1 +source include/stop_slave.inc; +--connection server_3 +source include/stop_slave.inc; +--let $rpl_allow_error= + +--connection default +--append_file $MYSQLTEST_VARDIR/tmp/mysqld.1.expect +restart +EOF +--enable_reconnect +--source include/wait_until_connected_again.inc + +--connection server_1 +--enable_reconnect +--source include/wait_until_connected_again.inc + + --echo ############################# --echo # Cleanup --echo ############################# --connection server_2 -source include/stop_slave.inc; +SET @@GLOBAL.rpl_semi_sync_slave_enabled = @sav_enabled_server_2; +SET @@GLOBAL.debug_dbug= @sav_server_2_dbug; source include/start_slave.inc; ---disable_query_log ---eval SET @@GLOBAL.rpl_semi_sync_slave_enabled = $sav_enabled_server_2 ---eval SET @@GLOBAL.debug_dbug= "$sav_server_2_dbug" ---enable_query_log - --connection server_3 -source include/stop_slave.inc; +SET @@GLOBAL.rpl_semi_sync_slave_enabled = @sav_enabled_server_3; +SET @@GLOBAL.debug_dbug= @sav_server_3_dbug; source include/start_slave.inc; ---disable_query_log ---eval SET @@GLOBAL.rpl_semi_sync_slave_enabled = $sav_enabled_server_3 ---eval SET @@GLOBAL.debug_dbug= "$sav_server_3_dbug" ---enable_query_log - - --connection server_1 let $status_var= Rpl_semi_sync_master_clients; let $status_var_value= 0; source include/wait_for_status_var.inc; --disable_query_log ---eval SET @@GLOBAL.rpl_semi_sync_master_timeout= $sav_master_timeout ---eval SET @@GLOBAL.rpl_semi_sync_master_enabled= $sav_enabled_master ---eval SET @@GLOBAL.debug_dbug= "$sav_master_dbug" +--eval SET @@GLOBAL.rpl_semi_sync_master_timeout= $sav_master_timeout; +--eval SET @@GLOBAL.rpl_semi_sync_master_enabled= $sav_enabled_master; --enable_query_log drop table t1; diff --git a/mysql-test/suite/rpl/t/rpl_semi_sync_slave_enabled_consistent.test b/mysql-test/suite/rpl/t/rpl_semi_sync_slave_enabled_consistent.test index 9e388ab4..ca7e7887 100644 --- a/mysql-test/suite/rpl/t/rpl_semi_sync_slave_enabled_consistent.test +++ b/mysql-test/suite/rpl/t/rpl_semi_sync_slave_enabled_consistent.test @@ -14,6 +14,9 @@ call mtr.add_suppression("Replication event checksum verification failed"); call mtr.add_suppression("could not queue event from master"); +call mtr.add_suppression("Semisync ack receiver.*error reading communication packets"); +call mtr.add_suppression("Semisync ack receiver got hangup"); +--sync_slave_with_master --echo # --echo # Set up a semisync connection diff --git a/mysql-test/suite/rpl/t/rpl_show_slave_status.test b/mysql-test/suite/rpl/t/rpl_show_slave_status.test new file mode 100644 index 00000000..f4bbb5fa --- /dev/null +++ b/mysql-test/suite/rpl/t/rpl_show_slave_status.test @@ -0,0 +1,27 @@ +--source include/have_binlog_format_mixed.inc +--source include/master-slave.inc + +--echo * +--echo * The purpose of this test is to prevent incorrect additions to SHOW +--echo * SLAVE STATUS, which has happened several times in the past. +--echo * +--echo * We must never, _ever_, add extra rows to this output of SHOW SLAVE +--echo * STATUS, except at the very end, as this breaks backwards compatibility +--echo * with applications or scripts that parse the output. This also means that +--echo * we cannot add _any_ new rows in a GA version if a different row was +--echo * already added in a later MariaDB version, as this would make it impossible +--echo * to merge the change up while preserving the order of rows. +--echo * + +--connection slave +--replace_column 1 # 2 # 3 # 4 # 5 # 6 # 7 # 8 # 9 # 10 # 11 # 12 # 13 # 14 # 15 # 16 # 17 # 18 # 19 # 20 # 21 # 22 # 23 # 24 # 25 # 26 # 27 # 28 # 29 # 30 # 31 # 32 # 33 # 34 # 35 # 36 # 37 # 38 # 39 # 40 # 41 # 42 # 43 # 44 # 45 # 46 # 47 # 48 # 49 # 50 # 51 # 52 # 53 # 54 # +query_vertical +SHOW SLAVE STATUS; + +--echo * +--echo * When modifying this test after adding a column to SHOW SLAVE STATUS, +--echo * _only_ additions at the end are allowed, the column number of existing +--echo * columns must _not_ change! +--echo * + +--source include/rpl_end.inc diff --git a/mysql-test/suite/rpl/t/rpl_shutdown_sighup.test b/mysql-test/suite/rpl/t/rpl_shutdown_sighup.test new file mode 100644 index 00000000..d1940f81 --- /dev/null +++ b/mysql-test/suite/rpl/t/rpl_shutdown_sighup.test @@ -0,0 +1,154 @@ +# +# The signal handler thread can use various different runtime resources when +# processing a SIGHUP (e.g. master-info information), as the logic calls into +# reload_acl_and_cache(). This test ensures that SIGHUP processing, when +# concurrent with server shutdown, the shutdown logic must wait for the SIGHUP +# processing to finish before cleaning up any resources. +# +# Additionally, the error case is tested such that the signal handler thread +# takes too long processing a SIGHUP, and the main mysqld thread must skip its +# wait and output a warning. +# +# Note the SIGHUP is sent via the command-line kill program via a perl script. +# +# References: +# MDEV-30260: Slave crashed:reload_acl_and_cache during shutdown +# + +--source include/not_windows.inc +--source include/not_embedded.inc +--source include/have_debug.inc +--source include/have_debug_sync.inc + +# Binlog format doesn't matter +--source include/have_binlog_format_statement.inc +--source include/master-slave.inc + +# For error test case which forces timeout +--connection slave +set statement sql_log_bin=0 for call mtr.add_suppression("Signal handler thread did not exit in a timely manner"); + + +--echo # +--echo # Main test +--connection master +create table t1 (a int); +insert into t1 values (1); +--source include/save_master_gtid.inc + +--connection slave +--source include/sync_with_master_gtid.inc + +# Make signal handler handle SIGHUP.. +set @@global.debug_dbug= "+d,hold_sighup_log_refresh"; +--let KILL_NODE_PIDFILE = `SELECT @@pid_file` +--perl + my $kill_sig = $ENV{'KILL_SIGNAL_VALUE'}; + my $pid_filename = $ENV{'KILL_NODE_PIDFILE'}; + my $mysqld_pid = `cat $pid_filename`; + chomp($mysqld_pid); + system("kill -HUP $mysqld_pid"); + exit(0); +EOF + +--echo # Waiting for sighup to reach reload_acl_and_cache.. +set debug_sync="now wait_for in_reload_acl_and_cache"; +--echo # Signalling signal handler to proceed to sleep before REFRESH_HOSTS +set debug_sync="now signal refresh_logs"; + +# ..while we are shutting down +--write_file $MYSQLTEST_VARDIR/tmp/mysqld.2.expect +wait +EOF +--echo # Starting shutdown (note this will take 3+ seconds due to DBUG my_sleep in reload_acl_and_cache) +shutdown; + +--source include/wait_until_disconnected.inc +--append_file $MYSQLTEST_VARDIR/tmp/mysqld.2.expect +restart: --skip-slave-start=0 +EOF + +--connection server_2 +--enable_reconnect +--source include/wait_until_connected_again.inc + +--connection slave +--enable_reconnect +--source include/wait_until_connected_again.inc + +--let $assert_text= Ensure Mariadbd did not segfault when shutting down +--let $assert_select= got signal 11 +--let $assert_file= $MYSQLTEST_VARDIR/log/mysqld.2.err +--let $assert_count= 0 +--let $assert_only_after = CURRENT_TEST: rpl.rpl_shutdown_sighup +--source include/assert_grep.inc + +--connection master +--sync_slave_with_master + + +--echo # +--echo # Error testcase to ensure an error message is shown if the signal +--echo # takes longer than the timeout while processing the SIGHUP + +--connection slave +set @@global.debug_dbug= "+d,force_sighup_processing_timeout"; +set @@global.debug_dbug= "+d,hold_sighup_log_refresh"; + +--connection master +insert into t1 values (1); +--source include/save_master_gtid.inc + +--connection slave +--source include/sync_with_master_gtid.inc + +# Make signal handler handle SIGHUP.. +--let KILL_NODE_PIDFILE = `SELECT @@pid_file` +--perl + my $kill_sig = $ENV{'KILL_SIGNAL_VALUE'}; + my $pid_filename = $ENV{'KILL_NODE_PIDFILE'}; + my $mysqld_pid = `cat $pid_filename`; + chomp($mysqld_pid); + system("kill -HUP $mysqld_pid"); + exit(0); +EOF +--echo # Waiting for sighup to reach reload_acl_and_cache.. +set debug_sync="now wait_for in_reload_acl_and_cache"; +--echo # Signalling signal handler to proceed to sleep before REFRESH_HOSTS +set debug_sync="now signal refresh_logs"; + +# ..while we are shutting down +--write_file $MYSQLTEST_VARDIR/tmp/mysqld.2.expect +wait +EOF +--echo # Starting shutdown (note this will take 3+ seconds due to DBUG my_sleep in reload_acl_and_cache) +shutdown; + +--source include/wait_until_disconnected.inc +--append_file $MYSQLTEST_VARDIR/tmp/mysqld.2.expect +restart: --skip-slave-start=0 +EOF + +--connection server_2 +--enable_reconnect +--source include/wait_until_connected_again.inc + +--connection slave +--enable_reconnect +--source include/wait_until_connected_again.inc + +--let $assert_text= Ensure warning is issued that signal handler thread is still processing +--let $assert_select= Signal handler thread did not exit in a timely manner. +--let $assert_file= $MYSQLTEST_VARDIR/log/mysqld.2.err +--let $assert_count= 1 +--let $assert_only_after = CURRENT_TEST: rpl.rpl_shutdown_sighup +--source include/assert_grep.inc + + +--echo # +--echo # Cleanup +--connection master +drop table t1; + +--source include/rpl_end.inc +--echo # End of rpl_shutdown_sighup.test diff --git a/mysql-test/suite/rpl/t/rpl_skip_error.test b/mysql-test/suite/rpl/t/rpl_skip_error.test index d3ef834e..ee11ce5a 100644 --- a/mysql-test/suite/rpl/t/rpl_skip_error.test +++ b/mysql-test/suite/rpl/t/rpl_skip_error.test @@ -3,7 +3,11 @@ # Verify that --slave-skip-errors works correctly. The error messages # specified by --slave-skip-errors on slave should be ignored. If # such errors occur, they should not be reported and not cause the -# slave to stop. +# slave to stop. If a skipped-due-to-error statement is a part of a +# larger transaction, and the error is not a deadlock error, the rest +# of the transaction should still commit, with just the errored statement +# ignored (note transactions which are skipped due to deadlocks are +# rolled back fully, see rpl_temporary_error2_skip_all.test). # # ==== Method ==== # @@ -164,6 +168,42 @@ let $current_skipped_error= query_get_value(show global status like "Slave_skipp --echo **** We cannot execute a select as there are differences in the --echo **** behavior between STMT and RBR. + +--echo **** +--echo **** Ensure transactions which are skipped due to encountering a +--echo **** non-deadlock error which is present in --slave-skip-errors result +--echo **** in partially committed transactions +# Slave will insert 3 first, and master will insert 3 within a larger trx +--let $value_preexisting_on_slave= 3 + +--connection master +CREATE TABLE t3 (a INT UNIQUE) ENGINE=InnoDB; + +--sync_slave_with_master +--connection slave +--eval INSERT INTO t3 VALUES ($value_preexisting_on_slave) + +--connection master +BEGIN; +INSERT INTO t3 VALUES (1); +INSERT INTO t3 VALUES (2); +--eval INSERT INTO t3 VALUES ($value_preexisting_on_slave) +INSERT INTO t3 VALUES (4); +COMMIT; +--sync_slave_with_master + +--echo **** Master and slave tables should have the same data, due to the +--echo **** partially replicated transaction's data overlapping with the data +--echo **** that pre-existed on the slave. That is, despite the transaction +--echo **** consisting of 4 statements, the errored statement should be ignored +--echo **** and the other 3 should commit successfully. +let $diff_tables=master:t3,slave:t3; +source include/diff_tables.inc; + +--connection master +DROP TABLE t3; + + --echo ==== Clean Up ==== connection master; diff --git a/mysql-test/suite/rpl/t/rpl_sync.test b/mysql-test/suite/rpl/t/rpl_sync.test index 1e2ec2ca..3dd99e73 100644 --- a/mysql-test/suite/rpl/t/rpl_sync.test +++ b/mysql-test/suite/rpl/t/rpl_sync.test @@ -82,7 +82,7 @@ print FILE "failure"; close ($file); EOF ---exec echo "restart" > $MYSQLTEST_VARDIR/tmp/mysqld.2.expect +--write_line restart $MYSQLTEST_VARDIR/tmp/mysqld.2.expect SET SESSION debug_dbug="d,crash_before_rotate_relaylog"; --error 2013 FLUSH LOGS; @@ -130,7 +130,7 @@ print FILE @content; close FILE; EOF ---exec echo "restart" > $MYSQLTEST_VARDIR/tmp/mysqld.2.expect +--write_line restart $MYSQLTEST_VARDIR/tmp/mysqld.2.expect SET SESSION debug_dbug="d,crash_before_rotate_relaylog"; --error 2013 FLUSH LOGS; diff --git a/mysql-test/suite/rpl/t/rpl_temporary_error2.test b/mysql-test/suite/rpl/t/rpl_temporary_error2.test index 49194c5d..3537499d 100644 --- a/mysql-test/suite/rpl/t/rpl_temporary_error2.test +++ b/mysql-test/suite/rpl/t/rpl_temporary_error2.test @@ -64,7 +64,14 @@ ROLLBACK; --connection slave --sync_with_master SELECT * FROM t1 ORDER BY a; ---echo * There will be two rows in t2 due to the retry. +if (!$ignored_db_deadlock) +{ + --echo * There will be two rows in t2 due to the retry. +} +if ($ignored_db_deadlock) +{ + --echo * There will be one row in t2 because the ignored deadlock does not retry. +} SELECT * FROM t2 ORDER BY a; let $new_retry= query_get_value(SHOW STATUS LIKE 'Slave_retried_transactions', Value, 1); --disable_query_log diff --git a/mysql-test/suite/rpl/t/rpl_temporary_error2_skip_all-slave.opt b/mysql-test/suite/rpl/t/rpl_temporary_error2_skip_all-slave.opt new file mode 100644 index 00000000..a9ddd735 --- /dev/null +++ b/mysql-test/suite/rpl/t/rpl_temporary_error2_skip_all-slave.opt @@ -0,0 +1 @@ +--slave-skip-errors=all diff --git a/mysql-test/suite/rpl/t/rpl_temporary_error2_skip_all.test b/mysql-test/suite/rpl/t/rpl_temporary_error2_skip_all.test new file mode 100644 index 00000000..6801bf18 --- /dev/null +++ b/mysql-test/suite/rpl/t/rpl_temporary_error2_skip_all.test @@ -0,0 +1,3 @@ +--source include/have_binlog_format_row.inc +--let $ignored_db_deadlock= 1 +--source rpl_temporary_error2.test diff --git a/mysql-test/suite/rpl/t/rpl_using_gtid_default.test b/mysql-test/suite/rpl/t/rpl_using_gtid_default.test index c14695f9..1557fcdc 100644 --- a/mysql-test/suite/rpl/t/rpl_using_gtid_default.test +++ b/mysql-test/suite/rpl/t/rpl_using_gtid_default.test @@ -42,10 +42,9 @@ # replication is used by default if master supports it # --source include/have_debug.inc ---source include/master-slave.inc - # Format independent test so just use one --source include/have_binlog_format_mixed.inc +--source include/master-slave.inc --echo # --echo # Slave default configuration should be Slave_Pos diff --git a/mysql-test/suite/rpl/t/show_status_stop_slave_race-7126.test b/mysql-test/suite/rpl/t/show_status_stop_slave_race-7126.test index cd0f8aad..815b2537 100644 --- a/mysql-test/suite/rpl/t/show_status_stop_slave_race-7126.test +++ b/mysql-test/suite/rpl/t/show_status_stop_slave_race-7126.test @@ -3,7 +3,10 @@ # --source include/master-slave.inc +--disable_query_log call mtr.add_suppression("Master is configured to log replication events"); +call mtr.add_suppression("Could not read packet:.* errno: 11"); +--enable_query_log --connection slave |