diff options
author | Jan Lindström <jan.lindstrom@mariadb.com> | 2022-03-03 18:51:12 +0200 |
---|---|---|
committer | Jan Lindström <jan.lindstrom@mariadb.com> | 2022-03-29 07:57:46 +0300 |
commit | 207b64058a734a8a3a5388cd6c205c29924d702b (patch) | |
tree | c2522a0a9898a7a7418ee1757cbf89c3134751c8 | |
parent | b2fa874e462e7352be173a3075a65e49f2fcc404 (diff) | |
download | mariadb-git-bb-10.6-MDEV-24845-galera.tar.gz |
MDEV-24845 : Oddities around innodb_fatal_semaphore_wait_threshold and global.innodb_disallow_writesbb-10.6-MDEV-24845-galera
We will remove variable innodb_disallow_writes because it is badly
designed and implemented. Parameter will be marked as removed.
Instead we will be using
* Galera provider is paused i.e. all commits will wait
* FLUSH TABLES WITH READ LOCK (FTWRL) to avoid any DDL during SST, this
global read lock i.e MDL_BACKUP_FTWRL1 will conflict MDL_BACKUP_DML
taken by InnoDB background threads doing writes. Similarly, it will conflict
with MBL_BACKUP_START used by mariabackup.
* We force flushing all dirty pages from buffer pool and force InnoDB checkpoint
* Encryption, purge, background statistics and FTS-optimize threads will
acquire MDL_BACKUP_DML before continuing. This will conflict with lock
acquired in FTWRL. Note that we will not use waiting. If MDL-lock can't be
acquired we will skip the operation.
handler.cc
handler.h
Add new API function ha_force_checkpoint to force checkpoint
inside InnoDB.
xtrabackup.cc
Remove INNODB_DISALLOW_WRITES code
mdl.cc
New functions to trylock and release global MDL lock.
wsrep_sst.cc
Force flushing all dirty pages from buffer pool and force InnoDB checkpoint
mysqld.cc
Mark innodb-disallow-writes variable as removed.
dict0stats.cc
dict_stats_func
Acquire backup lock and release it after we have done
fil0crypt.cc
fil_crypt_start_encrypting_space()
fil_crypt_thread()
Acquire backup lock and release it after we have done
fts0opt.cc
fts_optimize_sync_table()
fts_optimize_callback()
Acquire backup lock and release it after we have done
ha_innodb.cc
Remove all WITH_INNODB_DISALLOW_WRITES code
wsrep_force_checkpoint()
New API function to flush dirty pages from buffer pool and
force full checkpoint.
trx_purge,cc
trx_purge()
Acquire and release global MDL_BACKUP_DML lock
try
r
32 files changed, 367 insertions, 447 deletions
diff --git a/mysql-test/include/have_innodb_disallow_writes.inc b/mysql-test/include/have_innodb_disallow_writes.inc deleted file mode 100644 index 83b516b7a34..00000000000 --- a/mysql-test/include/have_innodb_disallow_writes.inc +++ /dev/null @@ -1,6 +0,0 @@ ---source include/have_innodb.inc - -if (`SELECT COUNT(*) = 0 from INFORMATION_SCHEMA.GLOBAL_VARIABLES - WHERE VARIABLE_NAME = 'INNODB_DISALLOW_WRITES'`) { - --skip Test requires 'innodb_disallow_writes' -} diff --git a/mysql-test/suite/galera/r/MW-328C.result b/mysql-test/suite/galera/r/MW-328C.result deleted file mode 100644 index 7a00bb718de..00000000000 --- a/mysql-test/suite/galera/r/MW-328C.result +++ /dev/null @@ -1,25 +0,0 @@ -connection node_2; -connection node_1; -CREATE TABLE t1 (f1 INTEGER AUTO_INCREMENT PRIMARY KEY, f2 CHAR(20) DEFAULT 'abc') ENGINE=InnoDB; -INSERT INTO t1 (f1) VALUES (1); -CREATE TABLE t2 (f1 CHAR(20)) ENGINE=InnoDB; -CREATE PROCEDURE proc_update () -BEGIN -DECLARE CONTINUE HANDLER FOR SQLEXCEPTION BEGIN END; -SET SESSION wsrep_sync_wait = 0; -WHILE 1 DO -UPDATE t1 SET f2 = LEFT(MD5(RAND()), 4); -END WHILE; -END| -connect node_1X, 127.0.0.1, root, , test, $NODE_MYPORT_1; -connection node_1X; -CALL proc_update();; -connection node_2; -SET SESSION wsrep_retry_autocommit = 10000; -connection node_1; -connection node_1X; -Got one of the listed errors -connection node_1; -DROP PROCEDURE proc_update; -DROP TABLE t1, t2; -CALL mtr.add_suppression("conflict state ABORTED after post commit"); diff --git a/mysql-test/suite/galera/r/galera_bf_abort_shutdown.result b/mysql-test/suite/galera/r/galera_bf_abort_shutdown.result index fa2a5c373f2..5707b68e190 100644 --- a/mysql-test/suite/galera/r/galera_bf_abort_shutdown.result +++ b/mysql-test/suite/galera/r/galera_bf_abort_shutdown.result @@ -5,6 +5,7 @@ connection node_2; connection node_1; CREATE TABLE t1 (f1 INT PRIMARY KEY); connection node_2; +call mtr.add_suppression("WSREP: Failed to scan the last segment to the end. Last events may be missing. Last recovered event:.*"); SET DEBUG_SYNC = 'wsrep_before_certification WAIT_FOR continue'; INSERT INTO t1 VALUES (1); connect node_2a, 127.0.0.1, root, , test, $NODE_MYPORT_2; diff --git a/mysql-test/suite/galera/r/galera_drop_database.result b/mysql-test/suite/galera/r/galera_drop_database.result index 6ab4b3d0c7b..03b55136c6f 100644 --- a/mysql-test/suite/galera/r/galera_drop_database.result +++ b/mysql-test/suite/galera/r/galera_drop_database.result @@ -48,3 +48,6 @@ SHOW TABLES; Tables_in_fts DROP DATABASE fts; connection node_2; +call mtr.add_suppression("WSREP: Failed to scan the last segment to the end. Last events may be missing. Last recovered event:"); +Warnings: +Note 1049 Unknown database 'fts' diff --git a/mysql-test/suite/galera/r/galera_events2.result b/mysql-test/suite/galera/r/galera_events2.result index cd44579af90..26d3a74a7f3 100644 --- a/mysql-test/suite/galera/r/galera_events2.result +++ b/mysql-test/suite/galera/r/galera_events2.result @@ -111,6 +111,7 @@ f1 f2 SELECT EVENT_CATALOG, EVENT_SCHEMA, EVENT_NAME, DEFINER, EVENT_BODY, EVENT_DEFINITION, EVENT_TYPE, EXECUTE_AT, INTERVAL_VALUE, INTERVAL_FIELD, STATUS,ON_COMPLETION, EVENT_COMMENT FROM INFORMATION_SCHEMA.EVENTS WHERE EVENT_NAME='one_event'; EVENT_CATALOG EVENT_SCHEMA EVENT_NAME DEFINER EVENT_BODY EVENT_DEFINITION EVENT_TYPE EXECUTE_AT INTERVAL_VALUE INTERVAL_FIELD STATUS ON_COMPLETION EVENT_COMMENT def test one_event root@localhost SQL SELECT 123 RECURRING NULL 10 SECOND SLAVESIDE_DISABLED NOT PRESERVE +call mtr.add_suppression("WSREP: Failed to scan the last segment to the end. Last events may be missing. Last recovered event:.*"); connection node_1; SELECT * FROM t1; f1 f2 diff --git a/mysql-test/suite/galera/r/galera_restart_on_unknown_option.result b/mysql-test/suite/galera/r/galera_restart_on_unknown_option.result index 6e672c2d444..b1ee6f5955f 100644 --- a/mysql-test/suite/galera/r/galera_restart_on_unknown_option.result +++ b/mysql-test/suite/galera/r/galera_restart_on_unknown_option.result @@ -41,6 +41,7 @@ f1 f2 connection node_2; Starting server ... Starting server ... +call mtr.add_suppression("WSREP: Failed to scan the last segment to the end. Last events may be missing. Last recovered event:"); SELECT * FROM t1; f1 f2 1 a diff --git a/mysql-test/suite/galera/r/galera_var_innodb_disallow_writes.result b/mysql-test/suite/galera/r/galera_var_innodb_disallow_writes.result deleted file mode 100644 index 758c34ee62e..00000000000 --- a/mysql-test/suite/galera/r/galera_var_innodb_disallow_writes.result +++ /dev/null @@ -1,30 +0,0 @@ -connection node_2; -connection node_1; -connection node_1a; -SET SESSION wsrep_sync_wait = 0; -connection node_1; -CREATE TABLE t1 (f1 INTEGER, f2 varchar(1024)) Engine=InnoDB; -CREATE TABLE ten (f1 INTEGER) ENGINE=InnoDB; -INSERT INTO ten VALUES (1),(2),(3),(4),(5),(6),(7),(8),(9),(10); -SET GLOBAL innodb_disallow_writes=ON; -INSERT INTO t1 (f2) SELECT 'abcde ' FROM ten AS a1, ten AS a2, ten AS a3, ten AS a4;; -connection node_2; -INSERT INTO t1 (f2) SELECT 'fghij ' FROM ten AS a1, ten AS a2, ten AS a3, ten AS a4; -SELECT COUNT(*) AS EXPECT_10000 FROM t1; -EXPECT_10000 -10000 -connection node_1a; -SET GLOBAL innodb_disallow_writes=OFF; -connection node_1; -SELECT COUNT(*) AS EXPECT_20000 FROM t1; -EXPECT_20000 -20000 -connection node_2; -SELECT COUNT(*) AS EXPECT_20000 FROM t1; -EXPECT_20000 -20000 -connection node_1; -connection node_2; -DROP TABLE t1; -DROP TABLE ten; -disconnect node_1a; diff --git a/mysql-test/suite/galera/t/MW-328C.cnf b/mysql-test/suite/galera/t/MW-328C.cnf deleted file mode 100644 index e68f891792c..00000000000 --- a/mysql-test/suite/galera/t/MW-328C.cnf +++ /dev/null @@ -1,7 +0,0 @@ -!include ../galera_2nodes.cnf - -[mysqld.1] -wsrep-debug=SERVER - -[mysqld.2] -wsrep-debug=SERVER diff --git a/mysql-test/suite/galera/t/MW-328C.test b/mysql-test/suite/galera/t/MW-328C.test deleted file mode 100644 index 7241dfbdbca..00000000000 --- a/mysql-test/suite/galera/t/MW-328C.test +++ /dev/null @@ -1,35 +0,0 @@ -# -# MW-328 Fix unnecessary/silent BF aborts -# - -# -# Make sure that a high value of wsrep_retry_autocommit -# masks all deadlock errors -# - ---source include/galera_cluster.inc ---source suite/galera/t/MW-328-header.inc - ---connection node_2 ---let $count = 100 - -SET SESSION wsrep_retry_autocommit = 10000; - ---disable_query_log - -while ($count) -{ - --error 0 - INSERT IGNORE INTO t2 SELECT f2 FROM t1; - - --disable_result_log - --error 0 - SELECT 1 FROM DUAL; - --enable_result_log - - --dec $count -} - ---enable_query_log - ---source suite/galera/t/MW-328-footer.inc diff --git a/mysql-test/suite/galera/t/galera_bf_abort_shutdown.test b/mysql-test/suite/galera/t/galera_bf_abort_shutdown.test index c7af8375b3f..42f85ecf7c2 100644 --- a/mysql-test/suite/galera/t/galera_bf_abort_shutdown.test +++ b/mysql-test/suite/galera/t/galera_bf_abort_shutdown.test @@ -18,6 +18,7 @@ CREATE TABLE t1 (f1 INT PRIMARY KEY); --connection node_2 +call mtr.add_suppression("WSREP: Failed to scan the last segment to the end. Last events may be missing. Last recovered event:.*"); SET DEBUG_SYNC = 'wsrep_before_certification WAIT_FOR continue'; --send INSERT INTO t1 VALUES (1) diff --git a/mysql-test/suite/galera/t/galera_drop_database.test b/mysql-test/suite/galera/t/galera_drop_database.test index 8dc73c1ce38..c1a66e1f66c 100644 --- a/mysql-test/suite/galera/t/galera_drop_database.test +++ b/mysql-test/suite/galera/t/galera_drop_database.test @@ -56,6 +56,7 @@ SHOW TABLES; DROP DATABASE fts; --connection node_2 +call mtr.add_suppression("WSREP: Failed to scan the last segment to the end. Last events may be missing. Last recovered event:"); --let $wait_condition = SELECT COUNT(*) = 0 FROM INFORMATION_SCHEMA.TABLES WHERE TABLE_NAME = 'fts_t1'; --source include/wait_condition.inc --let $wait_condition = SELECT COUNT(*) = 0 FROM INFORMATION_SCHEMA.TABLES WHERE TABLE_NAME = 'fts_t2'; diff --git a/mysql-test/suite/galera/t/galera_events2.test b/mysql-test/suite/galera/t/galera_events2.test index 3dfbe406fc4..b29ad3ba2f2 100644 --- a/mysql-test/suite/galera/t/galera_events2.test +++ b/mysql-test/suite/galera/t/galera_events2.test @@ -137,6 +137,8 @@ SELECT * FROM t1; --echo # node_2 Event should be SERVERSIDE_DISABLED SELECT EVENT_CATALOG, EVENT_SCHEMA, EVENT_NAME, DEFINER, EVENT_BODY, EVENT_DEFINITION, EVENT_TYPE, EXECUTE_AT, INTERVAL_VALUE, INTERVAL_FIELD, STATUS,ON_COMPLETION, EVENT_COMMENT FROM INFORMATION_SCHEMA.EVENTS WHERE EVENT_NAME='one_event'; +call mtr.add_suppression("WSREP: Failed to scan the last segment to the end. Last events may be missing. Last recovered event:.*"); + --connection node_1 SELECT * FROM t1; --echo # node_1 Event should be ENABLED diff --git a/mysql-test/suite/galera/t/galera_restart_on_unknown_option.test b/mysql-test/suite/galera/t/galera_restart_on_unknown_option.test index ed7f106c123..6a0f24dbaae 100644 --- a/mysql-test/suite/galera/t/galera_restart_on_unknown_option.test +++ b/mysql-test/suite/galera/t/galera_restart_on_unknown_option.test @@ -125,6 +125,7 @@ SELECT * FROM t1; # Sanity check (node 2 is running now and can perform SQL operators): +call mtr.add_suppression("WSREP: Failed to scan the last segment to the end. Last events may be missing. Last recovered event:"); SELECT * FROM t1; --connection node_1 diff --git a/mysql-test/suite/galera/t/galera_var_innodb_disallow_writes.test b/mysql-test/suite/galera/t/galera_var_innodb_disallow_writes.test deleted file mode 100644 index 10f3815e135..00000000000 --- a/mysql-test/suite/galera/t/galera_var_innodb_disallow_writes.test +++ /dev/null @@ -1,72 +0,0 @@ -# -# This test checks that innodb_disallow_writes works as expected -# -# Note that we need to enable binlog for this test: If the commit -# to InnoDB is done in one phase, the transaction is committed in -# memory before it is persisted to disk. This means that the -# innodb_disallow_writes=ON may not prevent transaction to -# become visible to other readers. On the other hand, if the -# commit is two phase (as it is with binlog), the transaction -# will be blocked in prepare phase. -# - ---source include/galera_cluster.inc ---source include/have_innodb.inc ---source include/have_log_bin.inc - ---let $datadir= `SELECT @@datadir` - - -# Open a separate connection to be used to run SHOW PROCESSLIST ---let $galera_connection_name = node_1a ---let $galera_server_number = 1 ---source include/galera_connect.inc ---connection node_1a -SET SESSION wsrep_sync_wait = 0; - ---connection node_1 -CREATE TABLE t1 (f1 INTEGER, f2 varchar(1024)) Engine=InnoDB; -CREATE TABLE ten (f1 INTEGER) ENGINE=InnoDB; -INSERT INTO ten VALUES (1),(2),(3),(4),(5),(6),(7),(8),(9),(10); - -SET GLOBAL innodb_disallow_writes=ON; ---exec find $datadir -type f-exec md5sum {} \; | md5sum >$MYSQLTEST_VARDIR/tmp/innodb_before - -# -# This insert has no effect before innodb_disallow_writes is OFF -# ---send INSERT INTO t1 (f2) SELECT 'abcde ' FROM ten AS a1, ten AS a2, ten AS a3, ten AS a4; - ---connection node_2 -INSERT INTO t1 (f2) SELECT 'fghij ' FROM ten AS a1, ten AS a2, ten AS a3, ten AS a4; -SELECT COUNT(*) AS EXPECT_10000 FROM t1; - ---connection node_1a ---sleep 5 - ---exec find $datadir -type f-exec md5sum {} \; | md5sum >$MYSQLTEST_VARDIR/tmp/innodb_after - -SET GLOBAL innodb_disallow_writes=OFF; - ---connection node_1 ---reap ---let $wait_condition = SELECT COUNT(*) = 20000 FROM t1; ---source include/wait_condition.inc - -SELECT COUNT(*) AS EXPECT_20000 FROM t1; - ---connection node_2 ---let $wait_condition = SELECT COUNT(*) = 20000 FROM t1; ---source include/wait_condition.inc -SELECT COUNT(*) AS EXPECT_20000 FROM t1; - ---connection node_1 ---diff_files $MYSQLTEST_VARDIR/tmp/innodb_before $MYSQLTEST_VARDIR/tmp/innodb_after - ---connection node_2 - -DROP TABLE t1; -DROP TABLE ten; - ---disconnect node_1a - diff --git a/mysql-test/suite/sys_vars/r/innodb_disallow_writes_basic.result b/mysql-test/suite/sys_vars/r/innodb_disallow_writes_basic.result deleted file mode 100644 index bfb6b67b5d8..00000000000 --- a/mysql-test/suite/sys_vars/r/innodb_disallow_writes_basic.result +++ /dev/null @@ -1,45 +0,0 @@ -# -# innodb_disallow_writes -# -# save the initial value -SET @innodb_disallow_writes_global_saved = @@global.innodb_disallow_writes; -# default -SELECT @@global.innodb_disallow_writes; -@@global.innodb_disallow_writes -0 - -# scope -SELECT @@session.innodb_disallow_writes; -ERROR HY000: Variable 'innodb_disallow_writes' is a GLOBAL variable -SET @@global.innodb_disallow_writes=OFF; -SELECT @@global.innodb_disallow_writes; -@@global.innodb_disallow_writes -0 -SET @@global.innodb_disallow_writes=ON; -SELECT @@global.innodb_disallow_writes; -@@global.innodb_disallow_writes -1 - -# valid values -SET @@global.innodb_disallow_writes='OFF'; -SELECT @@global.innodb_disallow_writes; -@@global.innodb_disallow_writes -0 -SET @@global.innodb_disallow_writes=ON; -SELECT @@global.innodb_disallow_writes; -@@global.innodb_disallow_writes -1 -SET @@global.innodb_disallow_writes=default; -SELECT @@global.innodb_disallow_writes; -@@global.innodb_disallow_writes -0 - -# invalid values -SET @@global.innodb_disallow_writes=NULL; -ERROR 42000: Variable 'innodb_disallow_writes' can't be set to the value of 'NULL' -SET @@global.innodb_disallow_writes='junk'; -ERROR 42000: Variable 'innodb_disallow_writes' can't be set to the value of 'junk' - -# restore the initial value -SET @@global.innodb_disallow_writes = @innodb_disallow_writes_global_saved; -# End of test diff --git a/mysql-test/suite/sys_vars/t/innodb_disallow_writes_basic.test b/mysql-test/suite/sys_vars/t/innodb_disallow_writes_basic.test deleted file mode 100644 index b8e5c127377..00000000000 --- a/mysql-test/suite/sys_vars/t/innodb_disallow_writes_basic.test +++ /dev/null @@ -1,42 +0,0 @@ ---source include/have_innodb_disallow_writes.inc - ---echo # ---echo # innodb_disallow_writes ---echo # - ---echo # save the initial value -SET @innodb_disallow_writes_global_saved = @@global.innodb_disallow_writes; - ---echo # default -SELECT @@global.innodb_disallow_writes; - ---echo ---echo # scope ---error ER_INCORRECT_GLOBAL_LOCAL_VAR -SELECT @@session.innodb_disallow_writes; -SET @@global.innodb_disallow_writes=OFF; -SELECT @@global.innodb_disallow_writes; -SET @@global.innodb_disallow_writes=ON; -SELECT @@global.innodb_disallow_writes; - ---echo ---echo # valid values -SET @@global.innodb_disallow_writes='OFF'; -SELECT @@global.innodb_disallow_writes; -SET @@global.innodb_disallow_writes=ON; -SELECT @@global.innodb_disallow_writes; -SET @@global.innodb_disallow_writes=default; -SELECT @@global.innodb_disallow_writes; - ---echo ---echo # invalid values ---error ER_WRONG_VALUE_FOR_VAR -SET @@global.innodb_disallow_writes=NULL; ---error ER_WRONG_VALUE_FOR_VAR -SET @@global.innodb_disallow_writes='junk'; - ---echo ---echo # restore the initial value -SET @@global.innodb_disallow_writes = @innodb_disallow_writes_global_saved; - ---echo # End of test diff --git a/scripts/wsrep_sst_rsync.sh b/scripts/wsrep_sst_rsync.sh index 67a7afc638f..9228810f611 100644 --- a/scripts/wsrep_sst_rsync.sh +++ b/scripts/wsrep_sst_rsync.sh @@ -3,7 +3,7 @@ set -ue # Copyright (C) 2017-2022 MariaDB -# Copyright (C) 2010-2014 Codership Oy +# Copyright (C) 2010-2022 Codership Oy # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -416,6 +416,8 @@ EOF sync + wsrep_log_info "Tables flushed" + if [ -n "$WSREP_SST_OPT_BINLOG" ]; then # Change the directory to binlog base (if possible): cd "$DATA" @@ -557,6 +559,8 @@ FILTER="-f '- /lost+found' exit $RC fi + wsrep_log_info "Transfer of normal directories done" + # Transfer InnoDB data files rsync ${STUNNEL:+--rsh="$STUNNEL"} \ --owner --group --perms --links --specials \ @@ -570,6 +574,8 @@ FILTER="-f '- /lost+found' exit 255 # unknown error fi + wsrep_log_info "Transfer of InnoDB data files done" + # second, we transfer InnoDB and Aria log files rsync ${STUNNEL:+--rsh="$STUNNEL"} \ --owner --group --perms --links --specials \ @@ -583,6 +589,8 @@ FILTER="-f '- /lost+found' exit 255 # unknown error fi + wsrep_log_info "Transfer of InnoDB and Aria log files done" + # then, we parallelize the transfer of database directories, # use '.' so that path concatenation works: @@ -610,6 +618,9 @@ FILTER="-f '- /lost+found' exit 255 # unknown error fi + wsrep_log_info "Transfer of data done" + + else # BYPASS wsrep_log_info "Bypassing state dump." @@ -620,6 +631,7 @@ FILTER="-f '- /lost+found' fi + wsrep_log_info "Sending continue to donor" echo 'continue' # now server can resume updating data echo "$STATE" > "$MAGIC_FILE" diff --git a/sql/handler.cc b/sql/handler.cc index d30c91f2195..9cbb653eaa0 100644 --- a/sql/handler.cc +++ b/sql/handler.cc @@ -1,5 +1,5 @@ /* Copyright (c) 2000, 2016, Oracle and/or its affiliates. - Copyright (c) 2009, 2021, MariaDB Corporation. + Copyright (c) 2009, 2022, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -7905,6 +7905,19 @@ int ha_abort_transaction(THD *bf_thd, THD *victim_thd, my_bool signal) DBUG_RETURN(0); } + +int ha_force_checkpoint(THD * thd) +{ + int res=0; + + handlerton *hton= installed_htons[DB_TYPE_INNODB]; + + if (hton && hton->force_checkpoint) + { + res= hton->force_checkpoint(hton, thd); + } + return (res); +} #endif /* WITH_WSREP */ diff --git a/sql/handler.h b/sql/handler.h index 0434d88aa98..0349b8bd65b 100644 --- a/sql/handler.h +++ b/sql/handler.h @@ -2,7 +2,7 @@ #define HANDLER_INCLUDED /* Copyright (c) 2000, 2019, Oracle and/or its affiliates. - Copyright (c) 2009, 2021, MariaDB + Copyright (c) 2009, 2022, MariaDB This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License @@ -1606,6 +1606,7 @@ struct handlerton THD *victim_thd, my_bool signal); int (*set_checkpoint)(handlerton *hton, const XID* xid); int (*get_checkpoint)(handlerton *hton, XID* xid); + int (*force_checkpoint)(handlerton *hton, THD* thd); /** Check if the version of the table matches the version in the .frm file. @@ -5437,6 +5438,7 @@ int ha_savepoint(THD *thd, SAVEPOINT *sv); int ha_release_savepoint(THD *thd, SAVEPOINT *sv); #ifdef WITH_WSREP int ha_abort_transaction(THD *bf_thd, THD *victim_thd, my_bool signal); +int ha_force_checkpoint(THD *thd); #endif /* these are called by storage engines */ diff --git a/sql/mdl.cc b/sql/mdl.cc index bd5d6aa7db3..769f36aa9fb 100644 --- a/sql/mdl.cc +++ b/sql/mdl.cc @@ -1,5 +1,5 @@ /* Copyright (c) 2007, 2012, Oracle and/or its affiliates. - Copyright (c) 2020, MariaDB + Copyright (c) 2020, 2022, MariaDB This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -3290,4 +3290,35 @@ void MDL_ticket::wsrep_report(bool debug) const m_lock->key.name(), psi_stage->m_name); } + +int thd_try_acquire_global_mdl(THD* thd, MDL_ticket **mdl) +{ + MDL_request request; + + WSREP_DEBUG("Trying to acquire global MDL for thread %lu", thd_get_thread_id(thd)); + DBUG_ASSERT(*mdl == NULL); + + MDL_REQUEST_INIT(&request, MDL_key::BACKUP, "", "", MDL_BACKUP_DML,MDL_EXPLICIT); + + if (thd->mdl_context.try_acquire_lock(&request) || + !request.ticket) + { + WSREP_DEBUG("Can't acquire global MDL for thread %lu", thd_get_thread_id(thd)); + return 1; + } + + WSREP_DEBUG("Acquired global MDL for thread %lu", thd_get_thread_id(thd)); + *mdl= request.ticket; + (*mdl)->wsrep_report(wsrep_debug); + return 0; +} + +void thd_release_global_mdl(THD *thd, MDL_ticket **mdl) +{ + WSREP_DEBUG("Releasing global MDL for thread %lu", thd_get_thread_id(thd)); + DBUG_ASSERT(*mdl != NULL); + (*mdl)->wsrep_report(wsrep_debug); + thd->mdl_context.release_lock(*mdl); + *mdl= NULL; +} #endif /* WITH_WSREP */ diff --git a/sql/mdl.h b/sql/mdl.h index 31ac4e81377..8b06c066aa7 100644 --- a/sql/mdl.h +++ b/sql/mdl.h @@ -1,7 +1,7 @@ #ifndef MDL_H #define MDL_H /* Copyright (c) 2009, 2012, Oracle and/or its affiliates. All rights reserved. - Copyright (c) 2020, 2021, MariaDB + Copyright (c) 2020, 2022, MariaDB This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -1130,4 +1130,11 @@ typedef int (*mdl_iterator_callback)(MDL_ticket *ticket, void *arg, bool granted); extern MYSQL_PLUGIN_IMPORT int mdl_iterate(mdl_iterator_callback callback, void *arg); + +#ifdef WITH_WSREP +extern int thd_try_acquire_global_mdl(THD* thd, MDL_ticket **mdl); +extern void thd_release_global_mdl(THD* thd, MDL_ticket **mdl); +extern bool sst_in_progress; +#endif /* WITH_WSREP */ + #endif /* MDL_H */ diff --git a/sql/sql_class.cc b/sql/sql_class.cc index 1ce59dc9348..5a1a1004ef5 100644 --- a/sql/sql_class.cc +++ b/sql/sql_class.cc @@ -1,6 +1,6 @@ /* Copyright (c) 2000, 2015, Oracle and/or its affiliates. - Copyright (c) 2008, 2021, MariaDB Corporation. + Copyright (c) 2008, 2022, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by diff --git a/sql/wsrep_sst.cc b/sql/wsrep_sst.cc index 786d8b9bbf5..0e38ad6426f 100644 --- a/sql/wsrep_sst.cc +++ b/sql/wsrep_sst.cc @@ -1,4 +1,5 @@ -/* Copyright 2008-2020 Codership Oy <http://www.codership.com> +/* Copyright (c) 2008-2022, Codership Oy <http://www.codership.com> + Copyright (c) 2008-2022, MariaDB This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -30,6 +31,8 @@ #include "wsrep_xid.h" #include "wsrep_thd.h" #include "wsrep_mysqld.h" +#include "handler.h" // ha_force_checkpoint +#include "debug_sync.h" #include <cstdio> #include <cstdlib> @@ -55,6 +58,7 @@ my_bool wsrep_sst_donor_rejects_queries= FALSE; bool sst_joiner_completed = false; bool sst_donor_completed = false; +bool sst_in_progress = false; struct sst_thread_arg { @@ -1497,20 +1501,18 @@ static int run_sql_command(THD *thd, const char *query) } mysql_parse(thd, thd->query(), thd->query_length(), &ps); + if (thd->is_error()) { int const err= thd->get_stmt_da()->sql_errno(); - WSREP_WARN ("Error executing '%s': %d (%s)%s", - query, err, thd->get_stmt_da()->message(), - err == ER_UNKNOWN_SYSTEM_VARIABLE ? - ". Was mysqld built with --with-innodb-disallow-writes ?" : ""); + WSREP_WARN ("Error executing '%s': %d (%s)", + query, err, thd->get_stmt_da()->message()); thd->clear_error(); return -1; } return 0; } - static int sst_flush_tables(THD* thd) { WSREP_INFO("Flushing tables for SST..."); @@ -1571,10 +1573,15 @@ static int sst_flush_tables(THD* thd) else { WSREP_INFO("Tables flushed."); - /* - Tables have been flushed. Create a file with cluster state ID and - wsrep_gtid_domain_id. - */ + // Tables have been flushed. + + /* We have already acquired MDL-locks above with FLUSH TABLES WITH READ + LOCK. We do following also here because SST script is waiting + flush tables operation to finish. Flush dirty pages from buffer + pool and force checkpoint to decrease time required. */ + ha_force_checkpoint(thd); + + // Create a file with cluster state ID and wsrep_gtid_domain_id. char content[100]; snprintf(content, sizeof(content), "%s:%lld %d\n", wsrep_cluster_state_uuid, (long long)wsrep_locked_seqno, wsrep_gtid_server.domain_id); @@ -1588,6 +1595,7 @@ static int sst_flush_tables(THD* thd) sprintf(tmp_name, "%s.tmp", real_name); FILE* file= fopen(tmp_name, "w+"); + if (0 == file) { err= errno; @@ -1619,34 +1627,6 @@ static int sst_flush_tables(THD* thd) return err; } - -static void sst_disallow_writes (THD* thd, bool yes) -{ - char query_str[64]= { 0, }; - ssize_t const query_max= sizeof(query_str) - 1; - CHARSET_INFO *current_charset; - - current_charset= thd->variables.character_set_client; - - if (!is_supported_parser_charset(current_charset)) - { - /* Do not use non-supported parser character sets */ - WSREP_WARN("Current client character set is non-supported parser character set: %s", current_charset->cs_name.str); - thd->variables.character_set_client= &my_charset_latin1; - WSREP_WARN("For SST temporally setting character set to : %s", - my_charset_latin1.cs_name.str); - } - - snprintf (query_str, query_max, "SET GLOBAL innodb_disallow_writes=%d", - yes ? 1 : 0); - - if (run_sql_command(thd, query_str)) - { - WSREP_ERROR("Failed to disallow InnoDB writes"); - } - thd->variables.character_set_client= current_charset; -} - static void* sst_donor_thread (void* a) { sst_thread_arg* arg= (sst_thread_arg*)a; @@ -1669,7 +1649,6 @@ static void* sst_donor_thread (void* a) // We also set this SST thread THD as system thread wsp::thd thd(FALSE, true); wsp::process proc(arg->cmd, "r", arg->env); - err= -proc.error(); /* Inform server about SST script startup and release TO isolation */ @@ -1692,9 +1671,9 @@ wait_signal: if (!strcasecmp (out, magic_flush)) { err= sst_flush_tables (thd.ptr); + if (!err) { - sst_disallow_writes (thd.ptr, true); /* Lets also keep statements that modify binary logs (like RESET LOGS, RESET MASTER) from proceeding until the files have been transferred @@ -1705,6 +1684,17 @@ wait_signal: mysql_mutex_lock(mysql_bin_log.get_log_lock()); } + sst_in_progress= true; + DBUG_EXECUTE_IF("sync.wsrep_donor_state", + { + const char act[]= + "now " + "SIGNAL sync.wsrep_donor_state_reached " + "WAIT_FOR signal.wsrep_donor_state"; + assert(!debug_sync_set_action(thd.ptr, + STRING_WITH_LEN(act))); + };); + locked= true; goto wait_signal; } @@ -1713,12 +1703,12 @@ wait_signal: { if (locked) { + sst_in_progress= false; if (mysql_bin_log.is_open()) { mysql_mutex_assert_owner(mysql_bin_log.get_log_lock()); mysql_mutex_unlock(mysql_bin_log.get_log_lock()); } - sst_disallow_writes (thd.ptr, false); thd.ptr->global_read_lock.unlock_global_read_lock(thd.ptr); locked= false; } @@ -1751,12 +1741,12 @@ wait_signal: if (locked) // don't forget to unlock server before return { + sst_in_progress= false; if (mysql_bin_log.is_open()) { mysql_mutex_assert_owner(mysql_bin_log.get_log_lock()); mysql_mutex_unlock(mysql_bin_log.get_log_lock()); } - sst_disallow_writes (thd.ptr, false); thd.ptr->global_read_lock.unlock_global_read_lock(thd.ptr); } diff --git a/storage/innobase/dict/dict0stats_bg.cc b/storage/innobase/dict/dict0stats_bg.cc index 2dd83d97687..3a4165aee96 100644 --- a/storage/innobase/dict/dict0stats_bg.cc +++ b/storage/innobase/dict/dict0stats_bg.cc @@ -1,7 +1,7 @@ /***************************************************************************** Copyright (c) 2012, 2017, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2017, 2021, MariaDB Corporation. +Copyright (c) 2017, 2022, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -38,6 +38,7 @@ Created Apr 25, 2012 Vasil Dimov # include "wsrep.h" # include "log.h" # include "wsrep_mysqld.h" +# include "mdl.h" #endif #include <vector> @@ -397,10 +398,49 @@ static std::mutex dict_stats_mutex; static void dict_stats_func(void*) { + bool done= false; THD *thd= innobase_create_background_thd("InnoDB statistics"); set_current_thd(thd); - while (dict_stats_process_entry_from_recalc_pool(thd)) {} - dict_defrag_process_entries_from_defrag_pool(thd); + do + { +#ifdef WITH_WSREP + MDL_ticket *mdl= nullptr; + if (thd_try_acquire_global_mdl(current_thd, &mdl)) + { + goto try_later; + } + else + { +#endif + done= dict_stats_process_entry_from_recalc_pool(thd); +#ifdef WITH_WSREP + thd_release_global_mdl(current_thd, &mdl); + } +#endif + } while (done); + done= true; + do + { +#ifdef WITH_WSREP + MDL_ticket *mdl= nullptr; + if (!thd_try_acquire_global_mdl(current_thd, &mdl)) + { +#endif + dict_defrag_process_entries_from_defrag_pool(thd); + done= false; +#ifdef WITH_WSREP + thd_release_global_mdl(current_thd, &mdl); + } + else + { + goto try_later; + } +#endif + } while(done); + +try_later: + dict_stats_schedule(MIN_RECALC_INTERVAL * 1000); + set_current_thd(nullptr); innobase_destroy_background_thd(thd); } diff --git a/storage/innobase/fil/fil0crypt.cc b/storage/innobase/fil/fil0crypt.cc index 69f18de9428..a7df124c655 100644 --- a/storage/innobase/fil/fil0crypt.cc +++ b/storage/innobase/fil/fil0crypt.cc @@ -1,6 +1,6 @@ /***************************************************************************** Copyright (C) 2013, 2015, Google Inc. All Rights Reserved. -Copyright (c) 2014, 2021, MariaDB Corporation. +Copyright (c) 2014, 2022, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -40,6 +40,7 @@ Modified Jan Lindström jan.lindstrom@mariadb.com #include "fsp0fsp.h" #include "fil0pagecompress.h" #include <my_crypt.h> +#include "mdl.h" static bool fil_crypt_threads_inited = false; @@ -952,10 +953,56 @@ func_exit: mtr.commit(); } +/** State of a rotation thread */ +struct rotate_thread_t { + explicit rotate_thread_t(uint no) : thread_no(no) {} + + uint thread_no; + bool first = true; /*!< is position before first space */ + space_list_t::iterator space + = fil_system.space_list.end();/*!< current space or .end() */ + uint32_t offset = 0; /*!< current page number */ + ulint batch = 0; /*!< #pages to rotate */ + uint min_key_version_found = 0; /*!< min key version found but not rotated */ + lsn_t end_lsn = 0; /*!< max lsn when rotating this space */ + + uint estimated_max_iops = 20;/*!< estimation of max iops */ + uint allocated_iops = 0; /*!< allocated iops */ + ulint cnt_waited = 0; /*!< #times waited during this slot */ + uintmax_t sum_waited_us = 0; /*!< wait time during this slot */ + + THD *thd =nullptr; /*!< Thread handle for rotation thread */ + MDL_ticket *mdl=nullptr; /*!< MDL-ticket for global lock */ + + fil_crypt_stat_t crypt_stat; // statistics + + /** @return whether this thread should terminate */ + bool should_shutdown() const + { + mysql_mutex_assert_owner(&fil_crypt_threads_mutex); + switch (srv_shutdown_state) + { + case SRV_SHUTDOWN_NONE: + return thread_no >= srv_n_fil_crypt_threads; + case SRV_SHUTDOWN_EXIT_THREADS: + /* srv_init_abort() must have been invoked */ + case SRV_SHUTDOWN_CLEANUP: + case SRV_SHUTDOWN_INITIATED: + return true; + case SRV_SHUTDOWN_LAST_PHASE: + break; + } + ut_ad(0); + return true; + } +}; + + /** Start encrypting a space +@param[in] state Rotate thread state @param[in,out] space Tablespace @return true if a recheck of tablespace is needed by encryption thread. */ -static bool fil_crypt_start_encrypting_space(fil_space_t* space) +static bool fil_crypt_start_encrypting_space(rotate_thread_t* state, fil_space_t* space) { mysql_mutex_lock(&fil_crypt_threads_mutex); @@ -976,6 +1023,20 @@ func_exit: return recheck; } +#ifdef WITH_WSREP + /* Take global MDL-lock to restrict other threads + doing FTWRL or EXPORT. Note that we do not wait here, + it is trylock. Releasing mutex before this call is not safe + because some other thread could start encrypting space then.*/ + if (!state->mdl) { + if (thd_try_acquire_global_mdl(state->thd, &state->mdl)) { + mysql_mutex_unlock(&fil_crypt_threads_mutex); + return recheck; + } + } + + ut_ad(!sst_in_progress); +#endif /* WITH_WSREP */ /* NOTE: we need to write and flush page 0 before publishing * the crypt data. This so that after restart there is no * risk of finding encrypted pages without having @@ -1040,6 +1101,11 @@ func_exit: crypt_data->rotate_state.starting = false; fil_crypt_start_converting = false; +#ifdef WITH_WSREP + if (state->mdl) { + thd_release_global_mdl(state->thd, &state->mdl); + } +#endif mysql_mutex_unlock(&fil_crypt_threads_mutex); mysql_mutex_unlock(&crypt_data->mutex); @@ -1050,52 +1116,20 @@ abort: mtr.commit(); mysql_mutex_lock(&fil_crypt_threads_mutex); fil_crypt_start_converting = false; - mysql_mutex_unlock(&fil_crypt_threads_mutex); +#ifdef WITH_WSREP + if (state->mdl) { + thd_release_global_mdl(state->thd, &state->mdl); + } +#endif + + mysql_mutex_unlock(&fil_crypt_threads_mutex); crypt_data->~fil_space_crypt_t(); ut_free(crypt_data); + return false; } -/** State of a rotation thread */ -struct rotate_thread_t { - explicit rotate_thread_t(uint no) : thread_no(no) {} - - uint thread_no; - bool first = true; /*!< is position before first space */ - space_list_t::iterator space - = fil_system.space_list.end();/*!< current space or .end() */ - uint32_t offset = 0; /*!< current page number */ - ulint batch = 0; /*!< #pages to rotate */ - uint min_key_version_found = 0; /*!< min key version found but not rotated */ - lsn_t end_lsn = 0; /*!< max lsn when rotating this space */ - - uint estimated_max_iops = 20;/*!< estimation of max iops */ - uint allocated_iops = 0; /*!< allocated iops */ - ulint cnt_waited = 0; /*!< #times waited during this slot */ - uintmax_t sum_waited_us = 0; /*!< wait time during this slot */ - - fil_crypt_stat_t crypt_stat; // statistics - - /** @return whether this thread should terminate */ - bool should_shutdown() const { - mysql_mutex_assert_owner(&fil_crypt_threads_mutex); - switch (srv_shutdown_state) { - case SRV_SHUTDOWN_NONE: - return thread_no >= srv_n_fil_crypt_threads; - case SRV_SHUTDOWN_EXIT_THREADS: - /* srv_init_abort() must have been invoked */ - case SRV_SHUTDOWN_CLEANUP: - case SRV_SHUTDOWN_INITIATED: - return true; - case SRV_SHUTDOWN_LAST_PHASE: - break; - } - ut_ad(0); - return true; - } -}; - /** Avoid the removal of the tablespace from default_encrypt_list only when 1) Another active encryption thread working on tablespace @@ -1150,7 +1184,7 @@ fil_crypt_space_needs_rotation( * space has no crypt data * start encrypting it... */ - *recheck = fil_crypt_start_encrypting_space(space); + *recheck = fil_crypt_start_encrypting_space(state, space); crypt_data = space->crypt_data; if (crypt_data == NULL) { @@ -1921,7 +1955,9 @@ fil_crypt_rotate_pages( if (state->space->is_stopping()) { break; } - +#ifdef WITH_WSREP + ut_ad(!sst_in_progress); +#endif fil_crypt_rotate_page(key_state, state); } } @@ -1940,6 +1976,10 @@ fil_crypt_flush_space( ut_ad(space->referenced()); +#ifdef WITH_WSREP + ut_ad(!sst_in_progress); +#endif + /* flush tablespace pages so that there are no pages left with old key */ lsn_t end_lsn = crypt_data->rotate_state.end_lsn; @@ -2050,8 +2090,11 @@ static void fil_crypt_complete_rotate_space(rotate_thread_t* state) accordingly */ static void fil_crypt_thread() { + /* state of this thread */ + my_thread_init(); mysql_mutex_lock(&fil_crypt_threads_mutex); rotate_thread_t thr(srv_n_fil_crypt_threads_started++); + thr.thd = innobase_create_background_thd("InnoDB encryption rotation thread"); pthread_cond_signal(&fil_crypt_cond); /* signal that we started */ if (!thr.should_shutdown()) { @@ -2080,6 +2123,13 @@ wait_for_work: goto wait_for_work; } +#ifdef WITH_WSREP + if (thd_try_acquire_global_mdl(thr.thd, &thr.mdl)) { + thr.space->release(); + thr.space = fil_system.space_list.end(); + break; + } +#endif /* we found a space to rotate */ mysql_mutex_unlock(&fil_crypt_threads_mutex); fil_crypt_start_rotate_space(&new_state, &thr); @@ -2091,6 +2141,9 @@ wait_for_work: space and stop rotation. */ if (thr.space->is_stopping()) { fil_crypt_complete_rotate_space(&thr); +#ifdef WITH_WSREP + thd_release_global_mdl(thr.thd, &thr.mdl); +#endif thr.space->release(); thr.space = fil_system.space_list.end(); break; @@ -2114,6 +2167,10 @@ wait_for_work: mysql_mutex_lock(&fil_crypt_threads_mutex); /* release iops */ fil_crypt_return_iops(&thr); + +#ifdef WITH_WSREP + thd_release_global_mdl(thr.thd, &thr.mdl); +#endif } if (thr.space != fil_system.space_list.end()) { @@ -2125,7 +2182,16 @@ wait_for_work: fil_crypt_return_iops(&thr); srv_n_fil_crypt_threads_started--; pthread_cond_signal(&fil_crypt_cond); /* signal that we stopped */ + +#ifdef WITH_WSREP + if (thr.mdl) { + thd_release_global_mdl(thr.thd, &thr.mdl); + } +#endif + + innobase_destroy_background_thd(thr.thd); mysql_mutex_unlock(&fil_crypt_threads_mutex); + my_thread_end(); #ifdef UNIV_PFS_THREAD pfs_delete_thread(); diff --git a/storage/innobase/fts/fts0opt.cc b/storage/innobase/fts/fts0opt.cc index adc44d74e7b..157ac5e511e 100644 --- a/storage/innobase/fts/fts0opt.cc +++ b/storage/innobase/fts/fts0opt.cc @@ -1,7 +1,7 @@ /***************************************************************************** Copyright (c) 2007, 2018, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2016, 2021, MariaDB Corporation. +Copyright (c) 2016, 2022, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -37,6 +37,8 @@ Completed 2011/7/10 Sunny and Jimmy Yang #include "zlib.h" #include "fts0opt.h" #include "fts0vlc.h" +#include "mdl.h" +#include "wsrep.h" /** The FTS optimize thread's work queue. */ ib_wqueue_t* fts_optimize_wq; @@ -2780,6 +2782,9 @@ static bool fts_is_sync_needed() static void fts_optimize_sync_table(dict_table_t *table, bool process_message= false) { +#ifdef WITH_WSREP + ut_ad(!sst_in_progress); +#endif MDL_ticket* mdl_ticket= nullptr; dict_table_t *sync_table= dict_acquire_mdl_shared<true>(table, fts_opt_thd, &mdl_ticket); @@ -2826,11 +2831,24 @@ static void fts_optimize_callback(void *) while (!done && srv_shutdown_state <= SRV_SHUTDOWN_INITIATED) { /* If there is no message in the queue and we have tables to optimize then optimize the tables. */ - +#ifdef WITH_WSREP + MDL_ticket* mdl= nullptr; +#endif + fts_msg_t* msg= nullptr; +#ifdef WITH_WSREP + thd_try_acquire_global_mdl(fts_opt_thd, &mdl); +#endif if (!done && ib_wqueue_is_empty(fts_optimize_wq) && n_tables > 0 && n_optimize > 0) { + + /* Message queue is empty but we have tables + to optimize. We should not optimize them if + we are not holding global MDL. */ + if (IF_WSREP(mdl == nullptr, 0)) + goto retry_later; /* Try later */ + fts_slot_t* slot = static_cast<fts_slot_t*>( ib_vector_get(fts_slots, current)); @@ -2845,21 +2863,30 @@ static void fts_optimize_callback(void *) n_optimize = fts_optimize_how_many(); current = 0; } - } else if (n_optimize == 0 || !ib_wqueue_is_empty(fts_optimize_wq)) { - fts_msg_t* msg = static_cast<fts_msg_t*> + msg = static_cast<fts_msg_t*> (ib_wqueue_nowait(fts_optimize_wq)); /* Timeout ? */ if (msg == NULL) { +retry_later: if (fts_is_sync_needed()) { fts_need_sync = true; } +#ifdef WITH_WSREP + if (mdl != nullptr) { + thd_release_global_mdl(fts_opt_thd, &mdl); + } +#endif if (n_tables) timer->set_time(5000, 0); return; } + /* We need to process all those messages + that will not result in any writes, namely + anything else except FTS_MSG_SYNC_TABLE even + when we are not holding global MDL. */ switch (msg->type) { case FTS_MSG_STOP: done = true; @@ -2883,6 +2910,14 @@ static void fts_optimize_callback(void *) break; case FTS_MSG_SYNC_TABLE: + /* If we are not holding global MDL, + add message back to wqueue and try + later. */ + if (IF_WSREP(mdl == nullptr, 0)) { + add_msg(msg); + goto retry_later; + } + DBUG_EXECUTE_IF( "fts_instrument_msg_sync_sleep", std::this_thread::sleep_for( @@ -2901,6 +2936,12 @@ static void fts_optimize_callback(void *) mem_heap_free(msg->heap); n_optimize = done ? 0 : fts_optimize_how_many(); } + +#ifdef WITH_WSREP + if (mdl != nullptr) { + thd_release_global_mdl(fts_opt_thd, &mdl); + } +#endif } /* Server is being shutdown, sync the data from FTS cache to disk @@ -2941,7 +2982,7 @@ fts_optimize_init(void) /* Create FTS optimize work queue */ fts_optimize_wq = ib_wqueue_create(); - ut_a(fts_optimize_wq != NULL); + ut_a(fts_optimize_wq != NULL); timer = srv_thread_pool->create_timer(timer_callback); /* Create FTS vector to store fts_slot_t */ diff --git a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc index 8151352d3ed..104fe454cd8 100644 --- a/storage/innobase/handler/ha_innodb.cc +++ b/storage/innobase/handler/ha_innodb.cc @@ -1963,6 +1963,7 @@ thd_to_trx_id( static void wsrep_abort_transaction(handlerton*, THD *, THD *, my_bool); static int innobase_wsrep_set_checkpoint(handlerton* hton, const XID* xid); static int innobase_wsrep_get_checkpoint(handlerton* hton, XID* xid); +static int wsrep_force_checkpoint(handlerton*, THD *); #endif /* WITH_WSREP */ #define normalize_table_name(a,b) \ @@ -3623,11 +3624,6 @@ ha_innobase::init_table_handle_for_HANDLER(void) m_prebuilt->trx->bulk_insert = false; } -#ifdef WITH_INNODB_DISALLOW_WRITES -/** Condition variable for innodb_disallow_writes */ -static pthread_cond_t allow_writes_cond; -#endif /* WITH_INNODB_DISALLOW_WRITES */ - /*********************************************************************//** Free any resources that were allocated and return failure. @return always return 1 */ @@ -3645,9 +3641,6 @@ static int innodb_init_abort() } srv_tmp_space.shutdown(); -#ifdef WITH_INNODB_DISALLOW_WRITES - pthread_cond_destroy(&allow_writes_cond); -#endif /* WITH_INNODB_DISALLOW_WRITES */ DBUG_RETURN(1); } @@ -4165,6 +4158,7 @@ static int innodb_init(void* p) innobase_hton->abort_transaction=wsrep_abort_transaction; innobase_hton->set_checkpoint=innobase_wsrep_set_checkpoint; innobase_hton->get_checkpoint=innobase_wsrep_get_checkpoint; + innobase_hton->force_checkpoint=wsrep_force_checkpoint; #endif /* WITH_WSREP */ innobase_hton->check_version = innodb_check_version; @@ -4210,10 +4204,6 @@ static int innodb_init(void* p) /* After this point, error handling has to use innodb_init_abort(). */ -#ifdef WITH_INNODB_DISALLOW_WRITES - pthread_cond_init(&allow_writes_cond, nullptr); -#endif /* WITH_INNODB_DISALLOW_WRITES */ - #ifdef HAVE_PSI_INTERFACE /* Register keys with MySQL performance schema */ int count; @@ -4326,9 +4316,6 @@ innobase_end(handlerton*, ha_panic_function) innodb_shutdown(); -#ifdef WITH_INNODB_DISALLOW_WRITES - pthread_cond_destroy(&allow_writes_cond); -#endif /* WITH_INNODB_DISALLOW_WRITES */ mysql_mutex_destroy(&log_requests.mutex); } @@ -19501,42 +19488,6 @@ static MYSQL_SYSVAR_ULONG(buf_dump_status_frequency, srv_buf_dump_status_frequen "dumped. Default is 0 (only start and end status is printed).", NULL, NULL, 0, 0, 100, 0); -#ifdef WITH_INNODB_DISALLOW_WRITES -my_bool innodb_disallow_writes; - -void innodb_wait_allow_writes() -{ - if (UNIV_UNLIKELY(innodb_disallow_writes)) - { - mysql_mutex_lock(&LOCK_global_system_variables); - while (innodb_disallow_writes) - my_cond_wait(&allow_writes_cond, &LOCK_global_system_variables.m_mutex); - mysql_mutex_unlock(&LOCK_global_system_variables); - } -} - -/************************************************************************** -An "update" method for innobase_disallow_writes variable. */ -static -void -innobase_disallow_writes_update(THD*, st_mysql_sys_var*, - void* var_ptr, const void* save) -{ - const my_bool val = *static_cast<const my_bool*>(save); - *static_cast<my_bool*>(var_ptr) = val; - mysql_mutex_unlock(&LOCK_global_system_variables); - if (!val) { - pthread_cond_broadcast(&allow_writes_cond); - } - mysql_mutex_lock(&LOCK_global_system_variables); -} - -static MYSQL_SYSVAR_BOOL(disallow_writes, innodb_disallow_writes, - PLUGIN_VAR_NOCMDOPT, - "Tell InnoDB to stop any writes to disk", - NULL, innobase_disallow_writes_update, FALSE); -#endif /* WITH_INNODB_DISALLOW_WRITES */ - static MYSQL_SYSVAR_BOOL(random_read_ahead, srv_random_read_ahead, PLUGIN_VAR_NOCMDARG, "Whether to use read ahead for random access within an extent.", @@ -19882,9 +19833,6 @@ static struct st_mysql_sys_var* innobase_system_variables[]= { MYSQL_SYSVAR(change_buffer_dump), MYSQL_SYSVAR(change_buffering_debug), #endif /* UNIV_DEBUG || UNIV_IBUF_DEBUG */ -#ifdef WITH_INNODB_DISALLOW_WRITES - MYSQL_SYSVAR(disallow_writes), -#endif /* WITH_INNODB_DISALLOW_WRITES */ MYSQL_SYSVAR(random_read_ahead), MYSQL_SYSVAR(read_ahead_threshold), MYSQL_SYSVAR(read_only), @@ -21364,3 +21312,23 @@ buf_pool_size_align( return (ulint)((size / m + 1) * m); } } + +#ifdef WITH_WSREP +/** Helper function for Galera rsync SST to flush all dirty +pages from buffer pool and force log checkpoint. +@param[in] handlerton* hton InnoDB handlerton +@param[in] THD* thd Thread handle */ +static int wsrep_force_checkpoint(handlerton *hton, THD * thd) +{ + DBUG_ASSERT(hton == innodb_hton_ptr); + WSREP_DEBUG("Starting checkpoint for %ld", thd_get_thread_id(thd)); + ut_ad(!sst_in_progress); + // Note that wsrep_on = OFF during rsync SST so we can't check that + // this is called only with WSREP(thd). + // Force InnoDB to checkpoint + log_make_checkpoint(); + + return (0); +} +#endif /* WITH_WSREP */ + diff --git a/storage/innobase/include/srv0srv.h b/storage/innobase/include/srv0srv.h index 623684823e7..dface79c552 100644 --- a/storage/innobase/include/srv0srv.h +++ b/storage/innobase/include/srv0srv.h @@ -300,13 +300,6 @@ extern ulong srv_log_write_ahead_size; extern my_bool srv_adaptive_flushing; extern my_bool srv_flush_sync; -#ifdef WITH_INNODB_DISALLOW_WRITES -extern my_bool innodb_disallow_writes; -void innodb_wait_allow_writes(); -#else -# define innodb_wait_allow_writes() do {} while (0) -#endif /* WITH_INNODB_DISALLOW_WRITES */ - /** Requested size in bytes */ extern ulint srv_buf_pool_size; /** Requested buffer pool chunk size. Each buffer pool instance consists diff --git a/storage/innobase/mtr/mtr0mtr.cc b/storage/innobase/mtr/mtr0mtr.cc index c5bfb355392..70fd4862939 100644 --- a/storage/innobase/mtr/mtr0mtr.cc +++ b/storage/innobase/mtr/mtr0mtr.cc @@ -36,6 +36,9 @@ Created 11/26/1995 Heikki Tuuri #ifdef BTR_CUR_HASH_ADAPT # include "btr0sea.h" #endif +#ifdef WITH_WSREP +#include "mdl.h" +#endif /** Iterate over a memo block in reverse. */ template <typename Functor> @@ -890,6 +893,9 @@ static mtr_t::page_flush_ahead log_close(lsn_t lsn) { mysql_mutex_assert_owner(&log_sys.mutex); ut_ad(lsn == log_sys.get_lsn()); +#ifdef WITH_WSREP + ut_ad(!sst_in_progress); +#endif byte *log_block= static_cast<byte*>(ut_align_down(log_sys.buf + log_sys.buf_free, diff --git a/storage/innobase/os/os0file.cc b/storage/innobase/os/os0file.cc index dbf8eb95e13..f818a6ecfcc 100644 --- a/storage/innobase/os/os0file.cc +++ b/storage/innobase/os/os0file.cc @@ -82,6 +82,10 @@ Created 10/21/1995 Heikki Tuuri #include <chrono> #include <memory> +#ifdef WITH_WSREP +#include "mdl.h" +#endif + /* Per-IO operation environment*/ class io_slots { @@ -153,8 +157,6 @@ static ulint os_innodb_umask = 0; #endif /* _WIN32 */ -#define WAIT_ALLOW_WRITES() innodb_wait_allow_writes() - Atomic_counter<ulint> os_n_file_reads; static ulint os_bytes_read_since_printout; Atomic_counter<size_t> os_n_file_writes; @@ -376,7 +378,6 @@ FILE* os_file_create_tmpfile() { FILE* file = NULL; - WAIT_ALLOW_WRITES(); File fd = mysql_tmpfile("ib"); if (fd >= 0) { @@ -979,7 +980,6 @@ os_file_flush_func( { int ret; - WAIT_ALLOW_WRITES(); ret = os_file_sync_posix(file); if (ret == 0) { @@ -1031,10 +1031,6 @@ os_file_create_simple_func( int create_flag; const char* mode_str = NULL; - if (create_mode != OS_FILE_OPEN && create_mode != OS_FILE_OPEN_RAW) { - WAIT_ALLOW_WRITES(); - } - ut_a(!(create_mode & OS_FILE_ON_ERROR_SILENT)); ut_a(!(create_mode & OS_FILE_ON_ERROR_NO_EXIT)); @@ -1148,7 +1144,6 @@ os_file_create_directory( { int rcode; - WAIT_ALLOW_WRITES(); rcode = mkdir(pathname, 0770); if (!(rcode == 0 || (errno == EEXIST && !fail_if_exists))) { @@ -1353,10 +1348,6 @@ os_file_create_simple_no_error_handling_func( os_file_t file; int create_flag; - if (create_mode != OS_FILE_OPEN && create_mode != OS_FILE_OPEN_RAW) { - WAIT_ALLOW_WRITES(); - } - ut_a(!(create_mode & OS_FILE_ON_ERROR_SILENT)); ut_a(!(create_mode & OS_FILE_ON_ERROR_NO_EXIT)); @@ -1431,7 +1422,6 @@ os_file_delete_if_exists_func( } int ret; - WAIT_ALLOW_WRITES(); ret = unlink(name); @@ -1456,7 +1446,6 @@ os_file_delete_func( const char* name) { int ret; - WAIT_ALLOW_WRITES(); ret = unlink(name); @@ -1495,7 +1484,6 @@ os_file_rename_func( #endif /* UNIV_DEBUG */ int ret; - WAIT_ALLOW_WRITES(); ret = rename(oldpath, newpath); @@ -1670,7 +1658,6 @@ bool os_file_set_eof( FILE* file) /*!< in: file to be truncated */ { - WAIT_ALLOW_WRITES(); return(!ftruncate(fileno(file), ftell(file))); } @@ -2134,10 +2121,6 @@ os_file_create_func( ? FILE_SHARE_READ | FILE_SHARE_WRITE | FILE_SHARE_DELETE : FILE_SHARE_READ | FILE_SHARE_DELETE; - if (create_mode != OS_FILE_OPEN && create_mode != OS_FILE_OPEN_RAW) { - WAIT_ALLOW_WRITES(); - } - on_error_no_exit = create_mode & OS_FILE_ON_ERROR_NO_EXIT ? true : false; @@ -2921,8 +2904,6 @@ os_file_write_func( ut_ad(n > 0); - WAIT_ALLOW_WRITES(); - ssize_t n_bytes = os_file_pwrite(type, file, (byte*)buf, n, offset, &err); if ((ulint) n_bytes != n && !os_has_said_disk_full) { @@ -3872,6 +3853,9 @@ func_exit: if (type.is_read()) { ++os_n_file_reads; } else { +#ifdef WITH_WSREP + ut_ad(!sst_in_progress); +#endif ++os_n_file_writes; } diff --git a/storage/innobase/row/row0merge.cc b/storage/innobase/row/row0merge.cc index a515d7d7e56..1efc53a58ab 100644 --- a/storage/innobase/row/row0merge.cc +++ b/storage/innobase/row/row0merge.cc @@ -4141,7 +4141,6 @@ pfs_os_file_t row_merge_file_create_low( const char* path) { - innodb_wait_allow_writes(); if (!path) { path = mysql_tmpdir; } diff --git a/storage/innobase/trx/trx0purge.cc b/storage/innobase/trx/trx0purge.cc index d45f23ea1ea..f0f6a475d6d 100644 --- a/storage/innobase/trx/trx0purge.cc +++ b/storage/innobase/trx/trx0purge.cc @@ -41,6 +41,7 @@ Created 3/26/1996 Heikki Tuuri #include "trx0rseg.h" #include "trx0trx.h" #include <mysql/service_wsrep.h> +#include "mdl.h" #include <unordered_map> @@ -1163,6 +1164,15 @@ trx_purge_attach_undo_recs(ulint n_purge_threads) while (UNIV_LIKELY(srv_undo_sources) || !srv_fast_shutdown) { purge_node_t* node; trx_purge_rec_t purge_rec; +#ifdef WITH_WSREP + MDL_ticket *mdl= nullptr; + + if (thd_try_acquire_global_mdl(current_thd, &mdl)) { + return 0; /* Try again later */ + } + + ut_ad(!sst_in_progress); +#endif /* Get the purge node. */ node = (purge_node_t*) thr->child; @@ -1181,8 +1191,14 @@ trx_purge_attach_undo_recs(ulint n_purge_threads) purge_sys.heap); if (purge_rec.undo_rec == NULL) { +#ifdef WITH_WSREP + thd_release_global_mdl(current_thd, &mdl); +#endif break; } else if (purge_rec.undo_rec == &trx_purge_dummy_rec) { +#ifdef WITH_WSREP + thd_release_global_mdl(current_thd, &mdl); +#endif continue; } @@ -1207,6 +1223,9 @@ trx_purge_attach_undo_recs(ulint n_purge_threads) node->undo_recs.push(purge_rec); +#ifdef WITH_WSREP + thd_release_global_mdl(current_thd, &mdl); +#endif if (n_pages_handled >= batch_size) { break; } |