diff options
author | Jan Lindström <jan.lindstrom@mariadb.com> | 2022-01-28 09:00:38 +0200 |
---|---|---|
committer | Jan Lindström <jan.lindstrom@mariadb.com> | 2022-03-02 12:13:53 +0200 |
commit | 1002f9c7af4a6487a045743d925e0c37fc3134d0 (patch) | |
tree | e5549a625b773fd3ee99a305920e86300b9497eb | |
parent | a1965b80e1ba7811a52fe54fc62a74879551598e (diff) | |
download | mariadb-git-bb-10.5-MDEV-24845-galera.tar.gz |
MDEV-24845 : Oddities around innodb_fatal_semaphore_wait_threshold and global.innodb_disallow_writesbb-10.5-MDEV-24845-galera
We will remove variable innodb_disallow_writes because it is badly
designed and implemented. Parameter will be marked as removed.
Instead we will be using
* Galera provider is paused i.e. all commits will wait
* FLUSH TABLES WITH READ LOCK (FTWRL) to avoid any DDL during SST
* We set max_dirty_pages_pct to 0.0 to flush all dirty pages from buffer pool
* We force flushing all dirty pages from buffer pool and force InnoDB checkpoint
* We set max_dirty_pagec_pct to 99.9 to avoid flushing any pages
* We set server to read-only
* Encryption, purge and FTS-optimize threads will acquire MDL_BACKUP_DML
before continuing. This will conflict with lock acquired in FTWRL.
Note that we will not use waiting. If MDL-lock can't be acquired
we will skip the operation.
xtrabackup.cc
Remove INNODB_DISALLOW_WRITES code
handler.cc
handler.h
Add new API function ha_force_checkpoint to force checkpoint
inside InnoDB.
mysqld.cc
Mark innodb-disallow-writes variable as removed.
sql_class.cc
New functions to get and release backup lock.
wsrep_sst.cc
Add functions to set innodb_max_dirty_pages_pct,
innodb_max_dirty_pages_pct_lwm and read_only.
We set them as 0.0 and then call new API function
to force buffer pool flush and full checkpoint.
After it has finished, we set them as 99.9.
fil0crypt.cc
fil_crypt_start_encrypting_space()
fil_crypt_thread()
Acquire backup lock and release it afte we have done
fts0opt.cc
fts_optimize_sync_table()
fts_optimize_callback()
Acquire backup lock and release it afte we have done
ha_innodb.cc
Remove all WITH_INNODB_DISALLOW_WRITES code
wsrep_force_checkpoint()
New API function to flush dirty pages from buffer pool and
force full checkpoint.
trx_purge,cc
trx_purge()
Acquire and release global MDL_BACKUP_DML lock
33 files changed, 375 insertions, 425 deletions
diff --git a/extra/mariabackup/xtrabackup.cc b/extra/mariabackup/xtrabackup.cc index 03a0a1fd08e..16db852c77a 100644 --- a/extra/mariabackup/xtrabackup.cc +++ b/extra/mariabackup/xtrabackup.cc @@ -4479,11 +4479,6 @@ fail: trx_pool_init(); recv_sys.create(); -#ifdef WITH_INNODB_DISALLOW_WRITES - srv_allow_writes_event = os_event_create(0); - os_event_set(srv_allow_writes_event); -#endif - xb_filters_init(); xb_fil_io_init(); @@ -5931,10 +5926,6 @@ static bool xtrabackup_prepare_func(char** argv) log_sys.create(); recv_sys.recovery_on = true; -#ifdef WITH_INNODB_DISALLOW_WRITES - srv_allow_writes_event = os_event_create(0); - os_event_set(srv_allow_writes_event); -#endif xb_fil_io_init(); if (dberr_t err = xb_load_tablespaces()) { msg("mariabackup: error: xb_data_files_init() failed " @@ -5957,9 +5948,7 @@ static bool xtrabackup_prepare_func(char** argv) xb_filter_hash_free(&inc_dir_tables_hash); fil_system.close(); -#ifdef WITH_INNODB_DISALLOW_WRITES - os_event_destroy(srv_allow_writes_event); -#endif + innodb_free_param(); log_sys.close(); sync_check_close(); diff --git a/mysql-test/include/have_innodb_disallow_writes.inc b/mysql-test/include/have_innodb_disallow_writes.inc deleted file mode 100644 index 83b516b7a34..00000000000 --- a/mysql-test/include/have_innodb_disallow_writes.inc +++ /dev/null @@ -1,6 +0,0 @@ ---source include/have_innodb.inc - -if (`SELECT COUNT(*) = 0 from INFORMATION_SCHEMA.GLOBAL_VARIABLES - WHERE VARIABLE_NAME = 'INNODB_DISALLOW_WRITES'`) { - --skip Test requires 'innodb_disallow_writes' -} diff --git a/mysql-test/suite/galera/r/galera_bf_abort_shutdown.result b/mysql-test/suite/galera/r/galera_bf_abort_shutdown.result index fa2a5c373f2..5707b68e190 100644 --- a/mysql-test/suite/galera/r/galera_bf_abort_shutdown.result +++ b/mysql-test/suite/galera/r/galera_bf_abort_shutdown.result @@ -5,6 +5,7 @@ connection node_2; connection node_1; CREATE TABLE t1 (f1 INT PRIMARY KEY); connection node_2; +call mtr.add_suppression("WSREP: Failed to scan the last segment to the end. Last events may be missing. Last recovered event:.*"); SET DEBUG_SYNC = 'wsrep_before_certification WAIT_FOR continue'; INSERT INTO t1 VALUES (1); connect node_2a, 127.0.0.1, root, , test, $NODE_MYPORT_2; diff --git a/mysql-test/suite/galera/r/galera_drop_database.result b/mysql-test/suite/galera/r/galera_drop_database.result index 6ab4b3d0c7b..03b55136c6f 100644 --- a/mysql-test/suite/galera/r/galera_drop_database.result +++ b/mysql-test/suite/galera/r/galera_drop_database.result @@ -48,3 +48,6 @@ SHOW TABLES; Tables_in_fts DROP DATABASE fts; connection node_2; +call mtr.add_suppression("WSREP: Failed to scan the last segment to the end. Last events may be missing. Last recovered event:"); +Warnings: +Note 1049 Unknown database 'fts' diff --git a/mysql-test/suite/galera/r/galera_events2.result b/mysql-test/suite/galera/r/galera_events2.result index cd44579af90..26d3a74a7f3 100644 --- a/mysql-test/suite/galera/r/galera_events2.result +++ b/mysql-test/suite/galera/r/galera_events2.result @@ -111,6 +111,7 @@ f1 f2 SELECT EVENT_CATALOG, EVENT_SCHEMA, EVENT_NAME, DEFINER, EVENT_BODY, EVENT_DEFINITION, EVENT_TYPE, EXECUTE_AT, INTERVAL_VALUE, INTERVAL_FIELD, STATUS,ON_COMPLETION, EVENT_COMMENT FROM INFORMATION_SCHEMA.EVENTS WHERE EVENT_NAME='one_event'; EVENT_CATALOG EVENT_SCHEMA EVENT_NAME DEFINER EVENT_BODY EVENT_DEFINITION EVENT_TYPE EXECUTE_AT INTERVAL_VALUE INTERVAL_FIELD STATUS ON_COMPLETION EVENT_COMMENT def test one_event root@localhost SQL SELECT 123 RECURRING NULL 10 SECOND SLAVESIDE_DISABLED NOT PRESERVE +call mtr.add_suppression("WSREP: Failed to scan the last segment to the end. Last events may be missing. Last recovered event:.*"); connection node_1; SELECT * FROM t1; f1 f2 diff --git a/mysql-test/suite/galera/r/galera_restart_on_unknown_option.result b/mysql-test/suite/galera/r/galera_restart_on_unknown_option.result index 6e672c2d444..b1ee6f5955f 100644 --- a/mysql-test/suite/galera/r/galera_restart_on_unknown_option.result +++ b/mysql-test/suite/galera/r/galera_restart_on_unknown_option.result @@ -41,6 +41,7 @@ f1 f2 connection node_2; Starting server ... Starting server ... +call mtr.add_suppression("WSREP: Failed to scan the last segment to the end. Last events may be missing. Last recovered event:"); SELECT * FROM t1; f1 f2 1 a diff --git a/mysql-test/suite/galera/r/galera_var_innodb_disallow_writes.result b/mysql-test/suite/galera/r/galera_var_innodb_disallow_writes.result deleted file mode 100644 index 758c34ee62e..00000000000 --- a/mysql-test/suite/galera/r/galera_var_innodb_disallow_writes.result +++ /dev/null @@ -1,30 +0,0 @@ -connection node_2; -connection node_1; -connection node_1a; -SET SESSION wsrep_sync_wait = 0; -connection node_1; -CREATE TABLE t1 (f1 INTEGER, f2 varchar(1024)) Engine=InnoDB; -CREATE TABLE ten (f1 INTEGER) ENGINE=InnoDB; -INSERT INTO ten VALUES (1),(2),(3),(4),(5),(6),(7),(8),(9),(10); -SET GLOBAL innodb_disallow_writes=ON; -INSERT INTO t1 (f2) SELECT 'abcde ' FROM ten AS a1, ten AS a2, ten AS a3, ten AS a4;; -connection node_2; -INSERT INTO t1 (f2) SELECT 'fghij ' FROM ten AS a1, ten AS a2, ten AS a3, ten AS a4; -SELECT COUNT(*) AS EXPECT_10000 FROM t1; -EXPECT_10000 -10000 -connection node_1a; -SET GLOBAL innodb_disallow_writes=OFF; -connection node_1; -SELECT COUNT(*) AS EXPECT_20000 FROM t1; -EXPECT_20000 -20000 -connection node_2; -SELECT COUNT(*) AS EXPECT_20000 FROM t1; -EXPECT_20000 -20000 -connection node_1; -connection node_2; -DROP TABLE t1; -DROP TABLE ten; -disconnect node_1a; diff --git a/mysql-test/suite/galera/r/galera_vote_rejoin_ddl.result b/mysql-test/suite/galera/r/galera_vote_rejoin_ddl.result index 6ad7ac75314..9f859f803f5 100644 --- a/mysql-test/suite/galera/r/galera_vote_rejoin_ddl.result +++ b/mysql-test/suite/galera/r/galera_vote_rejoin_ddl.result @@ -1,12 +1,14 @@ connection node_2; connection node_1; +connect node_3, 127.0.0.1, root, , test, $NODE_MYPORT_3; +connect node_4, 127.0.0.1, root, , test, $NODE_MYPORT_4; connection node_1; connection node_2; -connect node_3, 127.0.0.1, root, , test, $NODE_MYPORT_3; +connection node_3; +connection node_4; connection node_3; SET GLOBAL wsrep_provider_options = 'gmcast.isolate = 1'; connection node_1; -connect node_4, 127.0.0.1, root, , test, $NODE_MYPORT_4; connection node_4; SET SESSION wsrep_on=OFF; CREATE TABLE t1 (f1 INTEGER); diff --git a/mysql-test/suite/galera/t/galera_bf_abort_shutdown.test b/mysql-test/suite/galera/t/galera_bf_abort_shutdown.test index c7af8375b3f..42f85ecf7c2 100644 --- a/mysql-test/suite/galera/t/galera_bf_abort_shutdown.test +++ b/mysql-test/suite/galera/t/galera_bf_abort_shutdown.test @@ -18,6 +18,7 @@ CREATE TABLE t1 (f1 INT PRIMARY KEY); --connection node_2 +call mtr.add_suppression("WSREP: Failed to scan the last segment to the end. Last events may be missing. Last recovered event:.*"); SET DEBUG_SYNC = 'wsrep_before_certification WAIT_FOR continue'; --send INSERT INTO t1 VALUES (1) diff --git a/mysql-test/suite/galera/t/galera_drop_database.test b/mysql-test/suite/galera/t/galera_drop_database.test index 8dc73c1ce38..c1a66e1f66c 100644 --- a/mysql-test/suite/galera/t/galera_drop_database.test +++ b/mysql-test/suite/galera/t/galera_drop_database.test @@ -56,6 +56,7 @@ SHOW TABLES; DROP DATABASE fts; --connection node_2 +call mtr.add_suppression("WSREP: Failed to scan the last segment to the end. Last events may be missing. Last recovered event:"); --let $wait_condition = SELECT COUNT(*) = 0 FROM INFORMATION_SCHEMA.TABLES WHERE TABLE_NAME = 'fts_t1'; --source include/wait_condition.inc --let $wait_condition = SELECT COUNT(*) = 0 FROM INFORMATION_SCHEMA.TABLES WHERE TABLE_NAME = 'fts_t2'; diff --git a/mysql-test/suite/galera/t/galera_events2.test b/mysql-test/suite/galera/t/galera_events2.test index 3dfbe406fc4..b29ad3ba2f2 100644 --- a/mysql-test/suite/galera/t/galera_events2.test +++ b/mysql-test/suite/galera/t/galera_events2.test @@ -137,6 +137,8 @@ SELECT * FROM t1; --echo # node_2 Event should be SERVERSIDE_DISABLED SELECT EVENT_CATALOG, EVENT_SCHEMA, EVENT_NAME, DEFINER, EVENT_BODY, EVENT_DEFINITION, EVENT_TYPE, EXECUTE_AT, INTERVAL_VALUE, INTERVAL_FIELD, STATUS,ON_COMPLETION, EVENT_COMMENT FROM INFORMATION_SCHEMA.EVENTS WHERE EVENT_NAME='one_event'; +call mtr.add_suppression("WSREP: Failed to scan the last segment to the end. Last events may be missing. Last recovered event:.*"); + --connection node_1 SELECT * FROM t1; --echo # node_1 Event should be ENABLED diff --git a/mysql-test/suite/galera/t/galera_restart_on_unknown_option.test b/mysql-test/suite/galera/t/galera_restart_on_unknown_option.test index ed7f106c123..6a0f24dbaae 100644 --- a/mysql-test/suite/galera/t/galera_restart_on_unknown_option.test +++ b/mysql-test/suite/galera/t/galera_restart_on_unknown_option.test @@ -125,6 +125,7 @@ SELECT * FROM t1; # Sanity check (node 2 is running now and can perform SQL operators): +call mtr.add_suppression("WSREP: Failed to scan the last segment to the end. Last events may be missing. Last recovered event:"); SELECT * FROM t1; --connection node_1 diff --git a/mysql-test/suite/galera/t/galera_var_innodb_disallow_writes.test b/mysql-test/suite/galera/t/galera_var_innodb_disallow_writes.test deleted file mode 100644 index 10f3815e135..00000000000 --- a/mysql-test/suite/galera/t/galera_var_innodb_disallow_writes.test +++ /dev/null @@ -1,72 +0,0 @@ -# -# This test checks that innodb_disallow_writes works as expected -# -# Note that we need to enable binlog for this test: If the commit -# to InnoDB is done in one phase, the transaction is committed in -# memory before it is persisted to disk. This means that the -# innodb_disallow_writes=ON may not prevent transaction to -# become visible to other readers. On the other hand, if the -# commit is two phase (as it is with binlog), the transaction -# will be blocked in prepare phase. -# - ---source include/galera_cluster.inc ---source include/have_innodb.inc ---source include/have_log_bin.inc - ---let $datadir= `SELECT @@datadir` - - -# Open a separate connection to be used to run SHOW PROCESSLIST ---let $galera_connection_name = node_1a ---let $galera_server_number = 1 ---source include/galera_connect.inc ---connection node_1a -SET SESSION wsrep_sync_wait = 0; - ---connection node_1 -CREATE TABLE t1 (f1 INTEGER, f2 varchar(1024)) Engine=InnoDB; -CREATE TABLE ten (f1 INTEGER) ENGINE=InnoDB; -INSERT INTO ten VALUES (1),(2),(3),(4),(5),(6),(7),(8),(9),(10); - -SET GLOBAL innodb_disallow_writes=ON; ---exec find $datadir -type f-exec md5sum {} \; | md5sum >$MYSQLTEST_VARDIR/tmp/innodb_before - -# -# This insert has no effect before innodb_disallow_writes is OFF -# ---send INSERT INTO t1 (f2) SELECT 'abcde ' FROM ten AS a1, ten AS a2, ten AS a3, ten AS a4; - ---connection node_2 -INSERT INTO t1 (f2) SELECT 'fghij ' FROM ten AS a1, ten AS a2, ten AS a3, ten AS a4; -SELECT COUNT(*) AS EXPECT_10000 FROM t1; - ---connection node_1a ---sleep 5 - ---exec find $datadir -type f-exec md5sum {} \; | md5sum >$MYSQLTEST_VARDIR/tmp/innodb_after - -SET GLOBAL innodb_disallow_writes=OFF; - ---connection node_1 ---reap ---let $wait_condition = SELECT COUNT(*) = 20000 FROM t1; ---source include/wait_condition.inc - -SELECT COUNT(*) AS EXPECT_20000 FROM t1; - ---connection node_2 ---let $wait_condition = SELECT COUNT(*) = 20000 FROM t1; ---source include/wait_condition.inc -SELECT COUNT(*) AS EXPECT_20000 FROM t1; - ---connection node_1 ---diff_files $MYSQLTEST_VARDIR/tmp/innodb_before $MYSQLTEST_VARDIR/tmp/innodb_after - ---connection node_2 - -DROP TABLE t1; -DROP TABLE ten; - ---disconnect node_1a - diff --git a/mysql-test/suite/galera/t/galera_vote_rejoin_ddl.test b/mysql-test/suite/galera/t/galera_vote_rejoin_ddl.test index ca75d33b7d7..b760f0563e7 100644 --- a/mysql-test/suite/galera/t/galera_vote_rejoin_ddl.test +++ b/mysql-test/suite/galera/t/galera_vote_rejoin_ddl.test @@ -6,19 +6,17 @@ --source include/galera_cluster.inc --source include/big_test.inc +--connect node_3, 127.0.0.1, root, , test, $NODE_MYPORT_3 +--connect node_4, 127.0.0.1, root, , test, $NODE_MYPORT_4 + # Save original auto_increment_offset values. --let $node_1=node_1 --let $node_2=node_2 ---source include/auto_increment_offset_save.inc -# The following has to be set hard as these connection doesn't yet exists and -# the auto_increment_offset value changes during the lifetime of the servers. --let $node_3=node_3 ---let $auto_increment_offset_node_3 = 3; --let $node_4=node_4 ---let $auto_increment_offset_node_4 = 4; +--source include/auto_increment_offset_save.inc # Isolate node #3 ---connect node_3, 127.0.0.1, root, , test, $NODE_MYPORT_3 --connection node_3 SET GLOBAL wsrep_provider_options = 'gmcast.isolate = 1'; @@ -28,7 +26,6 @@ SET GLOBAL wsrep_provider_options = 'gmcast.isolate = 1'; --source include/wait_condition.inc # Introduce inconsistency on node #4 ---connect node_4, 127.0.0.1, root, , test, $NODE_MYPORT_4 --connection node_4 SET SESSION wsrep_on=OFF; CREATE TABLE t1 (f1 INTEGER); diff --git a/mysql-test/suite/sys_vars/r/innodb_disallow_writes_basic.result b/mysql-test/suite/sys_vars/r/innodb_disallow_writes_basic.result deleted file mode 100644 index bfb6b67b5d8..00000000000 --- a/mysql-test/suite/sys_vars/r/innodb_disallow_writes_basic.result +++ /dev/null @@ -1,45 +0,0 @@ -# -# innodb_disallow_writes -# -# save the initial value -SET @innodb_disallow_writes_global_saved = @@global.innodb_disallow_writes; -# default -SELECT @@global.innodb_disallow_writes; -@@global.innodb_disallow_writes -0 - -# scope -SELECT @@session.innodb_disallow_writes; -ERROR HY000: Variable 'innodb_disallow_writes' is a GLOBAL variable -SET @@global.innodb_disallow_writes=OFF; -SELECT @@global.innodb_disallow_writes; -@@global.innodb_disallow_writes -0 -SET @@global.innodb_disallow_writes=ON; -SELECT @@global.innodb_disallow_writes; -@@global.innodb_disallow_writes -1 - -# valid values -SET @@global.innodb_disallow_writes='OFF'; -SELECT @@global.innodb_disallow_writes; -@@global.innodb_disallow_writes -0 -SET @@global.innodb_disallow_writes=ON; -SELECT @@global.innodb_disallow_writes; -@@global.innodb_disallow_writes -1 -SET @@global.innodb_disallow_writes=default; -SELECT @@global.innodb_disallow_writes; -@@global.innodb_disallow_writes -0 - -# invalid values -SET @@global.innodb_disallow_writes=NULL; -ERROR 42000: Variable 'innodb_disallow_writes' can't be set to the value of 'NULL' -SET @@global.innodb_disallow_writes='junk'; -ERROR 42000: Variable 'innodb_disallow_writes' can't be set to the value of 'junk' - -# restore the initial value -SET @@global.innodb_disallow_writes = @innodb_disallow_writes_global_saved; -# End of test diff --git a/mysql-test/suite/sys_vars/t/innodb_disallow_writes_basic.test b/mysql-test/suite/sys_vars/t/innodb_disallow_writes_basic.test deleted file mode 100644 index b8e5c127377..00000000000 --- a/mysql-test/suite/sys_vars/t/innodb_disallow_writes_basic.test +++ /dev/null @@ -1,42 +0,0 @@ ---source include/have_innodb_disallow_writes.inc - ---echo # ---echo # innodb_disallow_writes ---echo # - ---echo # save the initial value -SET @innodb_disallow_writes_global_saved = @@global.innodb_disallow_writes; - ---echo # default -SELECT @@global.innodb_disallow_writes; - ---echo ---echo # scope ---error ER_INCORRECT_GLOBAL_LOCAL_VAR -SELECT @@session.innodb_disallow_writes; -SET @@global.innodb_disallow_writes=OFF; -SELECT @@global.innodb_disallow_writes; -SET @@global.innodb_disallow_writes=ON; -SELECT @@global.innodb_disallow_writes; - ---echo ---echo # valid values -SET @@global.innodb_disallow_writes='OFF'; -SELECT @@global.innodb_disallow_writes; -SET @@global.innodb_disallow_writes=ON; -SELECT @@global.innodb_disallow_writes; -SET @@global.innodb_disallow_writes=default; -SELECT @@global.innodb_disallow_writes; - ---echo ---echo # invalid values ---error ER_WRONG_VALUE_FOR_VAR -SET @@global.innodb_disallow_writes=NULL; ---error ER_WRONG_VALUE_FOR_VAR -SET @@global.innodb_disallow_writes='junk'; - ---echo ---echo # restore the initial value -SET @@global.innodb_disallow_writes = @innodb_disallow_writes_global_saved; - ---echo # End of test diff --git a/scripts/wsrep_sst_rsync.sh b/scripts/wsrep_sst_rsync.sh index 67a7afc638f..47e777cb791 100644 --- a/scripts/wsrep_sst_rsync.sh +++ b/scripts/wsrep_sst_rsync.sh @@ -3,7 +3,7 @@ set -ue # Copyright (C) 2017-2022 MariaDB -# Copyright (C) 2010-2014 Codership Oy +# Copyright (C) 2010-2022 Codership Oy # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -408,7 +408,7 @@ EOF rm -f "$ERROR" exit 255 fi - sleep 0.2 + sleep 0.5 done STATE=$(cat "$FLUSHED") @@ -416,6 +416,8 @@ EOF sync + wsrep_log_info "Tables flushed" + if [ -n "$WSREP_SST_OPT_BINLOG" ]; then # Change the directory to binlog base (if possible): cd "$DATA" @@ -557,6 +559,8 @@ FILTER="-f '- /lost+found' exit $RC fi + wsrep_log_info "Transfer of normal directories done" + # Transfer InnoDB data files rsync ${STUNNEL:+--rsh="$STUNNEL"} \ --owner --group --perms --links --specials \ @@ -570,6 +574,8 @@ FILTER="-f '- /lost+found' exit 255 # unknown error fi + wsrep_log_info "Transfer of InnoDB data files done" + # second, we transfer InnoDB and Aria log files rsync ${STUNNEL:+--rsh="$STUNNEL"} \ --owner --group --perms --links --specials \ @@ -583,6 +589,8 @@ FILTER="-f '- /lost+found' exit 255 # unknown error fi + wsrep_log_info "Transfer of InnoDB and Aria log files done" + # then, we parallelize the transfer of database directories, # use '.' so that path concatenation works: @@ -610,6 +618,9 @@ FILTER="-f '- /lost+found' exit 255 # unknown error fi + wsrep_log_info "Transfer of data done" + + else # BYPASS wsrep_log_info "Bypassing state dump." @@ -620,6 +631,7 @@ FILTER="-f '- /lost+found' fi + wsrep_log_info "Sending continue to donor" echo 'continue' # now server can resume updating data echo "$STATE" > "$MAGIC_FILE" diff --git a/sql/handler.cc b/sql/handler.cc index 5f5162f3e08..fe21b145de6 100644 --- a/sql/handler.cc +++ b/sql/handler.cc @@ -1,5 +1,5 @@ /* Copyright (c) 2000, 2016, Oracle and/or its affiliates. - Copyright (c) 2009, 2021, MariaDB Corporation. + Copyright (c) 2009, 2022, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -7552,6 +7552,19 @@ int ha_abort_transaction(THD *bf_thd, THD *victim_thd, my_bool signal) DBUG_RETURN(0); } + +int ha_force_checkpoint(THD * thd) +{ + int res=0; + + handlerton *hton= installed_htons[DB_TYPE_INNODB]; + + if (hton && hton->force_checkpoint) + { + res= hton->force_checkpoint(hton, thd); + } + return (res); +} #endif /* WITH_WSREP */ diff --git a/sql/handler.h b/sql/handler.h index b1f59681602..62af011cdb1 100644 --- a/sql/handler.h +++ b/sql/handler.h @@ -2,7 +2,7 @@ #define HANDLER_INCLUDED /* Copyright (c) 2000, 2019, Oracle and/or its affiliates. - Copyright (c) 2009, 2021, MariaDB + Copyright (c) 2009, 2022, MariaDB This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License @@ -1556,6 +1556,7 @@ struct handlerton THD *victim_thd, my_bool signal); int (*set_checkpoint)(handlerton *hton, const XID* xid); int (*get_checkpoint)(handlerton *hton, XID* xid); + int (*force_checkpoint)(handlerton *hton, THD* thd); /* Optional clauses in the CREATE/ALTER TABLE */ @@ -5263,6 +5264,7 @@ int ha_savepoint(THD *thd, SAVEPOINT *sv); int ha_release_savepoint(THD *thd, SAVEPOINT *sv); #ifdef WITH_WSREP int ha_abort_transaction(THD *bf_thd, THD *victim_thd, my_bool signal); +int ha_force_checkpoint(THD *thd); #endif /* these are called by storage engines */ diff --git a/sql/mysqld.cc b/sql/mysqld.cc index 51db77a2f19..45717c2c6c0 100644 --- a/sql/mysqld.cc +++ b/sql/mysqld.cc @@ -1,5 +1,5 @@ /* Copyright (c) 2000, 2015, Oracle and/or its affiliates. - Copyright (c) 2008, 2021, MariaDB + Copyright (c) 2008, 2022, MariaDB This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -4968,6 +4968,9 @@ static int init_server_components() MARIADB_REMOVED_OPTION("thread-concurrency"), MARIADB_REMOVED_OPTION("timed-mutexes"), + /* Removed in 10.5.14 */ + MARIADB_REMOVED_OPTION("innodb-disallow-writes"), + /* The following options were added after 5.6.10 */ MYSQL_TO_BE_IMPLEMENTED_OPTION("rpl-stop-slave-timeout"), MYSQL_TO_BE_IMPLEMENTED_OPTION("validate-user-plugins"), // NO_EMBEDDED_ACCESS_CHECKS diff --git a/sql/sql_class.cc b/sql/sql_class.cc index d31da032a7b..8fb1bbcc62a 100644 --- a/sql/sql_class.cc +++ b/sql/sql_class.cc @@ -1,6 +1,6 @@ /* Copyright (c) 2000, 2015, Oracle and/or its affiliates. - Copyright (c) 2008, 2021, MariaDB Corporation. + Copyright (c) 2008, 2022, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -8151,3 +8151,25 @@ THD_list_iterator *THD_list_iterator::iterator() { return &server_threads; } + +extern "C" +int thd_get_backup_lock(THD* thd, MDL_ticket **mdl) +{ + MDL_request request; + + MDL_REQUEST_INIT(&request, MDL_key::BACKUP, "", "", MDL_BACKUP_DML, + MDL_EXPLICIT); + + if (thd->mdl_context.try_acquire_lock(&request) || + !request.ticket) + return 1; + + *mdl= request.ticket; + return 0; +} + +extern "C" +void thd_release_backup_lock(THD* thd, MDL_ticket *mdl) +{ + thd->mdl_context.release_lock(mdl); +} diff --git a/sql/wsrep_sst.cc b/sql/wsrep_sst.cc index 2d6d8bc4165..dee3126d27f 100644 --- a/sql/wsrep_sst.cc +++ b/sql/wsrep_sst.cc @@ -1,4 +1,5 @@ -/* Copyright 2008-2020 Codership Oy <http://www.codership.com> +/* Copyright (c) 2008-2022, Codership Oy <http://www.codership.com> + Copyright (c) 2008-2022, MariaDB This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -30,7 +31,8 @@ #include "wsrep_xid.h" #include "wsrep_thd.h" #include "wsrep_mysqld.h" - +#include "sys_vars_shared.h" // intern_find_sys_var +#include "handler.h" // ha_force_checkpoint #include <cstdio> #include <cstdlib> @@ -55,6 +57,7 @@ my_bool wsrep_sst_donor_rejects_queries= FALSE; bool sst_joiner_completed = false; bool sst_donor_completed = false; +bool sst_in_progress = false; struct sst_thread_arg { @@ -1495,21 +1498,63 @@ static int run_sql_command(THD *thd, const char *query) } mysql_parse(thd, thd->query(), thd->query_length(), &ps, FALSE, FALSE); + if (thd->is_error()) { int const err= thd->get_stmt_da()->sql_errno(); - WSREP_WARN ("Error executing '%s': %d (%s)%s", - query, err, thd->get_stmt_da()->message(), - err == ER_UNKNOWN_SYSTEM_VARIABLE ? - ". Was mysqld built with --with-innodb-disallow-writes ?" : ""); + WSREP_WARN ("Error executing '%s': %d (%s)", + query, err, thd->get_stmt_da()->message()); thd->clear_error(); return -1; } return 0; } +static void sst_set_charset_and_run(THD *thd, const char *query_str) +{ + CHARSET_INFO *current_charset; + current_charset= thd->variables.character_set_client; + + if (!is_supported_parser_charset(current_charset)) + { + /* Do not use non-supported parser character sets */ + WSREP_WARN("Current client character set is non-supported parser character set: %s", current_charset->csname); + thd->variables.character_set_client= &my_charset_latin1; + WSREP_WARN("For SST temporally setting character set to : %s", + my_charset_latin1.csname); + } + + if (run_sql_command(thd, query_str)) + WSREP_ERROR("Failed to execute query: %s", query_str); + + thd->variables.character_set_client= current_charset; +} + +static void sst_set_max_dirty_pages_pct_lwm(THD* thd, double val) +{ + char query_str[64]= { 0, }; + ssize_t const query_max= sizeof(query_str) - 1; + + snprintf (query_str, query_max, "SET GLOBAL innodb_max_dirty_pages_pct_lwm=%f;", val); + + sst_set_charset_and_run(thd, query_str); +} -static int sst_flush_tables(THD* thd) +static void sst_set_max_dirty_pages_pct(THD* thd, double val) +{ + char query_str[64]= { 0, }; + ssize_t const query_max= sizeof(query_str) - 1; + + snprintf (query_str, query_max, "SET GLOBAL innodb_max_dirty_pages_pct=%f;", val); + sst_set_charset_and_run(thd, query_str); +} + +static void sst_set_read_only(THD* thd, bool yes) +{ + sst_set_charset_and_run(thd, yes ? "SET GLOBAL read_only=1" : "SET GLOBAL read_only=0"); +} + +static int sst_flush_tables(THD* thd, bool* changed_values) { WSREP_INFO("Flushing tables for SST..."); @@ -1569,10 +1614,25 @@ static int sst_flush_tables(THD* thd) else { WSREP_INFO("Tables flushed."); - /* - Tables have been flushed. Create a file with cluster state ID and - wsrep_gtid_domain_id. - */ + // Tables have been flushed. + + /* We have already acquired MDL-locks above with FLUSH TABLES WITH READ + LOCK. We do following also here because SST script is waiting + flush tables operation to finish. We set max dirty pages to 0 to + force writing dirty buffer pool pages and then we force full + checkpoint. Then, as safety we set max dirty pages as high as + possible to avoid further async writes of buffer pool pages. We + also set server read_only to avoid writes. */ + sst_set_max_dirty_pages_pct_lwm(thd, 0.0); + sst_set_max_dirty_pages_pct(thd, 0.0); + ha_force_checkpoint(thd); + sst_set_max_dirty_pages_pct_lwm(thd, 99.9); + sst_set_max_dirty_pages_pct(thd, 99.9); + sst_set_read_only(thd, true); + *changed_values= true; + sst_in_progress= true; + + // Create a file with cluster state ID and wsrep_gtid_domain_id. char content[100]; snprintf(content, sizeof(content), "%s:%lld %d\n", wsrep_cluster_state_uuid, (long long)wsrep_locked_seqno, wsrep_gtid_server.domain_id); @@ -1586,6 +1646,7 @@ static int sst_flush_tables(THD* thd) sprintf(tmp_name, "%s.tmp", real_name); FILE* file= fopen(tmp_name, "w+"); + if (0 == file) { err= errno; @@ -1617,34 +1678,6 @@ static int sst_flush_tables(THD* thd) return err; } - -static void sst_disallow_writes (THD* thd, bool yes) -{ - char query_str[64]= { 0, }; - ssize_t const query_max= sizeof(query_str) - 1; - CHARSET_INFO *current_charset; - - current_charset= thd->variables.character_set_client; - - if (!is_supported_parser_charset(current_charset)) - { - /* Do not use non-supported parser character sets */ - WSREP_WARN("Current client character set is non-supported parser character set: %s", current_charset->csname); - thd->variables.character_set_client= &my_charset_latin1; - WSREP_WARN("For SST temporally setting character set to : %s", - my_charset_latin1.csname); - } - - snprintf (query_str, query_max, "SET GLOBAL innodb_disallow_writes=%d", - yes ? 1 : 0); - - if (run_sql_command(thd, query_str)) - { - WSREP_ERROR("Failed to disallow InnoDB writes"); - } - thd->variables.character_set_client= current_charset; -} - static void* sst_donor_thread (void* a) { sst_thread_arg* arg= (sst_thread_arg*)a; @@ -1653,6 +1686,7 @@ static void* sst_donor_thread (void* a) int err= 1; bool locked= false; + bool changed_values= false; const char* out= NULL; const size_t out_len= 128; @@ -1668,6 +1702,21 @@ static void* sst_donor_thread (void* a) wsp::thd thd(FALSE, true); wsp::process proc(arg->cmd, "r", arg->env); + sys_var *max_dirty_pages_lwm= + intern_find_sys_var(STRING_WITH_LEN("innodb_max_dirty_pages_pct_lwm")); + sys_var *max_dirty_pages= + intern_find_sys_var(STRING_WITH_LEN("innodb_max_dirty_pages_pct")); + bool is_null; + const LEX_CSTRING dirty_pages_lwm= { STRING_WITH_LEN("innodb_max_dirty_pages_pct_lwm") }; + const LEX_CSTRING dirty_pages= { STRING_WITH_LEN("innodb_max_dirty_pages_pct") }; + + /* QUESTION: what if these change during SST ? */ + double dirty_lwm_value= max_dirty_pages_lwm->val_real(&is_null, thd.ptr, OPT_GLOBAL, + &dirty_pages_lwm); + double dirty_value= max_dirty_pages->val_real(&is_null, thd.ptr, OPT_GLOBAL, + &dirty_pages); + WSREP_DEBUG("InnoDB values pct_lwm=%f pct=%f", dirty_lwm_value, dirty_value); + err= -proc.error(); /* Inform server about SST script startup and release TO isolation */ @@ -1689,10 +1738,10 @@ wait_signal: if (!strcasecmp (out, magic_flush)) { - err= sst_flush_tables (thd.ptr); + err= sst_flush_tables (thd.ptr, &changed_values); + if (!err) { - sst_disallow_writes (thd.ptr, true); /* Lets also keep statements that modify binary logs (like RESET LOGS, RESET MASTER) from proceeding until the files have been transferred @@ -1711,12 +1760,12 @@ wait_signal: { if (locked) { + sst_in_progress= false; if (mysql_bin_log.is_open()) { mysql_mutex_assert_owner(mysql_bin_log.get_log_lock()); mysql_mutex_unlock(mysql_bin_log.get_log_lock()); } - sst_disallow_writes (thd.ptr, false); thd.ptr->global_read_lock.unlock_global_read_lock(thd.ptr); locked= false; } @@ -1749,15 +1798,22 @@ wait_signal: if (locked) // don't forget to unlock server before return { + sst_in_progress= false; if (mysql_bin_log.is_open()) { mysql_mutex_assert_owner(mysql_bin_log.get_log_lock()); mysql_mutex_unlock(mysql_bin_log.get_log_lock()); } - sst_disallow_writes (thd.ptr, false); thd.ptr->global_read_lock.unlock_global_read_lock(thd.ptr); } + if (changed_values) + { + sst_set_read_only(thd.ptr, false); + sst_set_max_dirty_pages_pct_lwm(thd.ptr, dirty_lwm_value); + sst_set_max_dirty_pages_pct(thd.ptr, dirty_value); + } + wsrep::gtid gtid(wsrep::id(ret_uuid.data, sizeof(ret_uuid.data)), wsrep::seqno(err ? wsrep::seqno::undefined() : wsrep::seqno(ret_seqno))); diff --git a/storage/innobase/fil/fil0crypt.cc b/storage/innobase/fil/fil0crypt.cc index 5830634692b..724dfcef3b7 100644 --- a/storage/innobase/fil/fil0crypt.cc +++ b/storage/innobase/fil/fil0crypt.cc @@ -1,6 +1,6 @@ /***************************************************************************** Copyright (C) 2013, 2015, Google Inc. All Rights Reserved. -Copyright (c) 2014, 2021, MariaDB Corporation. +Copyright (c) 2014, 2022, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -84,6 +84,12 @@ static uint n_fil_crypt_iops_allocated = 0; static fil_crypt_stat_t crypt_stat; static ib_mutex_t crypt_stat_mutex; +extern "C" int thd_get_backup_lock(THD* thd, MDL_ticket **mdl); +extern "C" void thd_release_backup_lock(THD* thd, MDL_ticket *mdl); +#ifdef WITH_WSREP +extern bool sst_in_progress; +#endif + /*********************************************************************** Check if a key needs rotation given a key_state @param[in] crypt_data Encryption information @@ -998,11 +1004,60 @@ func_exit: mtr.commit(); } +/** State of a rotation thread */ +struct rotate_thread_t { + explicit rotate_thread_t(uint no) { + memset(this, 0, sizeof(* this)); + thread_no = no; + first = true; + estimated_max_iops = 20; + thd = innobase_create_background_thd("InnoDB encryption rotation thread"); + mdl = NULL; + } + + uint thread_no; + bool first; /*!< is position before first space */ + fil_space_t* space; /*!< current space or NULL */ + uint32_t offset; /*!< current page number */ + ulint batch; /*!< #pages to rotate */ + uint min_key_version_found;/*!< min key version found but not rotated */ + lsn_t end_lsn; /*!< max lsn when rotating this space */ + + uint estimated_max_iops; /*!< estimation of max iops */ + uint allocated_iops; /*!< allocated iops */ + ulint cnt_waited; /*!< #times waited during this slot */ + uintmax_t sum_waited_us; /*!< wait time during this slot */ + + fil_crypt_stat_t crypt_stat; // statistics + THD *thd; /*!< Thread handle for + MDL-locking */ + MDL_ticket *mdl; /*!< MDL-ticket or NULL */ + + /** @return whether this thread should terminate */ + bool should_shutdown() const { + switch (srv_shutdown_state) { + case SRV_SHUTDOWN_NONE: + return thread_no >= srv_n_fil_crypt_threads; + case SRV_SHUTDOWN_EXIT_THREADS: + /* srv_init_abort() must have been invoked */ + case SRV_SHUTDOWN_CLEANUP: + case SRV_SHUTDOWN_INITIATED: + return true; + case SRV_SHUTDOWN_LAST_PHASE: + break; + } + ut_ad(0); + return true; + } +}; + /** Start encrypting a space +@param[in,out] thd Thread handle @param[in,out] space Tablespace @return true if a recheck of tablespace is needed by encryption thread. */ -static bool fil_crypt_start_encrypting_space(fil_space_t* space) +static bool fil_crypt_start_encrypting_space(rotate_thread_t* state, fil_space_t* space) { + bool mdl_locked= false; mutex_enter(&fil_crypt_threads_mutex); fil_space_crypt_t *crypt_data = space->crypt_data; @@ -1021,6 +1076,18 @@ static bool fil_crypt_start_encrypting_space(fil_space_t* space) return recheck; } + /* Take global backup MDL-lock to restrict other threads + doing FTWRL or EXPORT. */ + if (!state->mdl) { + if (thd_get_backup_lock(state->thd, &state->mdl)) { + mutex_exit(&fil_crypt_threads_mutex); + return recheck; + } + mdl_locked= true; + } +#ifdef WITH_WSREP + ut_ad(!sst_in_progress); +#endif /* NOTE: we need to write and flush page 0 before publishing * the crypt data. This so that after restart there is no * risk of finding encrypted pages without having @@ -1089,6 +1156,11 @@ static bool fil_crypt_start_encrypting_space(fil_space_t* space) mutex_exit(&crypt_data->mutex); mutex_exit(&fil_crypt_threads_mutex); + if (mdl_locked) { + ut_ad(state->mdl); + thd_release_backup_lock(state->thd, state->mdl); + state->mdl= NULL; + } return false; } @@ -1100,51 +1172,13 @@ abort: crypt_data->~fil_space_crypt_t(); ut_free(crypt_data); + if (mdl_locked) { + thd_release_backup_lock(state->thd, state->mdl); + state->mdl = NULL; + } return false; } -/** State of a rotation thread */ -struct rotate_thread_t { - explicit rotate_thread_t(uint no) { - memset(this, 0, sizeof(* this)); - thread_no = no; - first = true; - estimated_max_iops = 20; - } - - uint thread_no; - bool first; /*!< is position before first space */ - fil_space_t* space; /*!< current space or NULL */ - uint32_t offset; /*!< current page number */ - ulint batch; /*!< #pages to rotate */ - uint min_key_version_found;/*!< min key version found but not rotated */ - lsn_t end_lsn; /*!< max lsn when rotating this space */ - - uint estimated_max_iops; /*!< estimation of max iops */ - uint allocated_iops; /*!< allocated iops */ - ulint cnt_waited; /*!< #times waited during this slot */ - uintmax_t sum_waited_us; /*!< wait time during this slot */ - - fil_crypt_stat_t crypt_stat; // statistics - - /** @return whether this thread should terminate */ - bool should_shutdown() const { - switch (srv_shutdown_state) { - case SRV_SHUTDOWN_NONE: - return thread_no >= srv_n_fil_crypt_threads; - case SRV_SHUTDOWN_EXIT_THREADS: - /* srv_init_abort() must have been invoked */ - case SRV_SHUTDOWN_CLEANUP: - case SRV_SHUTDOWN_INITIATED: - return true; - case SRV_SHUTDOWN_LAST_PHASE: - break; - } - ut_ad(0); - return true; - } -}; - /** Avoid the removal of the tablespace from default_encrypt_list only when 1) Another active encryption thread working on tablespace @@ -1201,7 +1235,7 @@ fil_crypt_space_needs_rotation( * space has no crypt data * start encrypting it... */ - *recheck = fil_crypt_start_encrypting_space(space); + *recheck = fil_crypt_start_encrypting_space(state, space); crypt_data = space->crypt_data; if (crypt_data == NULL) { @@ -2008,7 +2042,9 @@ fil_crypt_rotate_pages( if (state->space->is_stopping()) { break; } - +#ifdef WITH_WSREP + ut_ad(!sst_in_progress); +#endif fil_crypt_rotate_page(key_state, state); } } @@ -2027,6 +2063,10 @@ fil_crypt_flush_space( ut_ad(space->referenced()); +#ifdef WITH_WSREP + ut_ad(!sst_in_progress); +#endif + /* flush tablespace pages so that there are no pages left with old key */ lsn_t end_lsn = crypt_data->rotate_state.end_lsn; @@ -2137,6 +2177,9 @@ static void fil_crypt_complete_rotate_space(rotate_thread_t* state) } } +extern "C" int thd_get_backup_lock(THD* thd, MDL_ticket **mdl); +extern "C" void thd_release_backup_lock(THD* thd, MDL_ticket *mdl); + /*********************************************************************//** A thread which monitors global key state and rotates tablespaces accordingly @return a dummy parameter */ @@ -2187,6 +2230,14 @@ DECLARE_THREAD(fil_crypt_thread)(void*) fil_crypt_find_space_to_rotate(&new_state, &thr, &recheck)) { /* we found a space to rotate */ + + /* Aquire global MDL BACKUP lock. */ + if (thd_get_backup_lock(thr.thd, &thr.mdl)) { + thr.space->release(); + thr.space = NULL; + break; + } + fil_crypt_start_rotate_space(&new_state, &thr); /* iterate all pages (cooperativly with other threads) */ @@ -2202,6 +2253,8 @@ DECLARE_THREAD(fil_crypt_thread)(void*) space and stop rotation. */ if (thr.space->is_stopping()) { fil_crypt_complete_rotate_space(&thr); + thd_release_backup_lock(thr.thd, thr.mdl); + thr.mdl = NULL; thr.space->release(); thr.space = NULL; break; @@ -2221,6 +2274,11 @@ DECLARE_THREAD(fil_crypt_thread)(void*) /* return iops */ fil_crypt_return_iops(&thr); + + if (thr.mdl) { + thd_release_backup_lock(thr.thd, thr.mdl); + thr.mdl = NULL; + } } } @@ -2233,11 +2291,18 @@ DECLARE_THREAD(fil_crypt_thread)(void*) thr.space = NULL; } + if (thr.mdl) { + thd_release_backup_lock(thr.thd, thr.mdl); + thr.mdl = NULL; + } + mutex_enter(&fil_crypt_threads_mutex); srv_n_fil_crypt_threads_started--; os_event_set(fil_crypt_event); /* signal that we stopped */ mutex_exit(&fil_crypt_threads_mutex); + innobase_destroy_background_thd(thr.thd); + /* We count the number of threads in os_thread_exit(). A created thread should always use that to exit and not use return() to exit. */ diff --git a/storage/innobase/fts/fts0opt.cc b/storage/innobase/fts/fts0opt.cc index c288dfee6f8..7d711ff72dd 100644 --- a/storage/innobase/fts/fts0opt.cc +++ b/storage/innobase/fts/fts0opt.cc @@ -1,7 +1,7 @@ /***************************************************************************** Copyright (c) 2007, 2018, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2016, 2021, MariaDB Corporation. +Copyright (c) 2016, 2022, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -252,6 +252,10 @@ static const char* fts_end_delete_sql = "DELETE FROM $BEING_DELETED;\n" "DELETE FROM $BEING_DELETED_CACHE;\n"; +extern "C" int thd_get_backup_lock(THD* thd, MDL_ticket **mdl); +extern "C" void thd_release_backup_lock(THD* thd, MDL_ticket *mdl); +extern bool sst_in_progress; + /**********************************************************************//** Initialize fts_zip_t. */ static @@ -2809,6 +2813,10 @@ static void fts_optimize_sync_table(dict_table_t *table, if (!sync_table) return; +#ifdef WITH_WSREP + ut_ad(!sst_in_progress); +#endif + if (sync_table->fts && sync_table->fts->cache && sync_table->is_accessible()) { fts_sync_table(sync_table, false); @@ -2846,11 +2854,19 @@ static void fts_optimize_callback(void *) while (!done && srv_shutdown_state <= SRV_SHUTDOWN_INITIATED) { /* If there is no message in the queue and we have tables to optimize then optimize the tables. */ + MDL_ticket *mdl= NULL; if (!done && ib_wqueue_is_empty(fts_optimize_wq) && n_tables > 0 && n_optimize > 0) { + + if (thd_get_backup_lock(fts_opt_thd, &mdl)) { + if (n_tables) + timer->set_time(5000, 0); + return; + } + fts_slot_t* slot = static_cast<fts_slot_t*>( ib_vector_get(fts_slots, current)); @@ -2866,6 +2882,8 @@ static void fts_optimize_callback(void *) current = 0; } + thd_release_backup_lock(fts_opt_thd, mdl); + } else if (n_optimize == 0 || !ib_wqueue_is_empty(fts_optimize_wq)) { fts_msg_t* msg = static_cast<fts_msg_t*> @@ -2880,6 +2898,12 @@ static void fts_optimize_callback(void *) return; } + if (thd_get_backup_lock(fts_opt_thd, &mdl)) { + if (n_tables) + timer->set_time(5000, 0); + return; + } + switch (msg->type) { case FTS_MSG_STOP: done = TRUE; @@ -2923,6 +2947,8 @@ static void fts_optimize_callback(void *) mem_heap_free(msg->heap); n_optimize = done ? 0 : fts_optimize_how_many(); + + thd_release_backup_lock(fts_opt_thd, mdl); } } diff --git a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc index 5ef7067ed06..071bfb2ad10 100644 --- a/storage/innobase/handler/ha_innodb.cc +++ b/storage/innobase/handler/ha_innodb.cc @@ -1624,6 +1624,7 @@ thd_to_trx_id( static void wsrep_abort_transaction(handlerton*, THD *, THD *, my_bool); static int innobase_wsrep_set_checkpoint(handlerton* hton, const XID* xid); static int innobase_wsrep_get_checkpoint(handlerton* hton, XID* xid); +static int wsrep_force_checkpoint(handlerton*, THD *); #endif /* WITH_WSREP */ /********************************************************************//** Converts an InnoDB error code to a MySQL error code and also tells to MySQL @@ -3206,9 +3207,6 @@ static int innodb_init_abort() } srv_tmp_space.shutdown(); -#ifdef WITH_INNODB_DISALLOW_WRITES - os_event_destroy(srv_allow_writes_event); -#endif /* WITH_INNODB_DISALLOW_WRITES */ DBUG_RETURN(1); } @@ -4011,6 +4009,7 @@ static int innodb_init(void* p) innobase_hton->abort_transaction=wsrep_abort_transaction; innobase_hton->set_checkpoint=innobase_wsrep_set_checkpoint; innobase_hton->get_checkpoint=innobase_wsrep_get_checkpoint; + innobase_hton->force_checkpoint=wsrep_force_checkpoint; #endif /* WITH_WSREP */ innobase_hton->tablefile_extensions = ha_innobase_exts; @@ -19727,39 +19726,6 @@ static MYSQL_SYSVAR_ULONG(buf_dump_status_frequency, srv_buf_dump_status_frequen "dumped. Default is 0 (only start and end status is printed).", NULL, NULL, 0, 0, 100, 0); -#ifdef WITH_INNODB_DISALLOW_WRITES -/******************************************************* - * innobase_disallow_writes variable definition * - *******************************************************/ - -/* Must always init to FALSE. */ -static my_bool innobase_disallow_writes = FALSE; - -/************************************************************************** -An "update" method for innobase_disallow_writes variable. */ -static -void -innobase_disallow_writes_update(THD*, st_mysql_sys_var*, - void* var_ptr, const void* save) -{ - const my_bool val = *static_cast<const my_bool*>(save); - *static_cast<my_bool*>(var_ptr) = val; - ut_a(srv_allow_writes_event); - mysql_mutex_unlock(&LOCK_global_system_variables); - if (val) { - os_event_reset(srv_allow_writes_event); - } else { - os_event_set(srv_allow_writes_event); - } - mysql_mutex_lock(&LOCK_global_system_variables); -} - -static MYSQL_SYSVAR_BOOL(disallow_writes, innobase_disallow_writes, - PLUGIN_VAR_NOCMDOPT, - "Tell InnoDB to stop any writes to disk", - NULL, innobase_disallow_writes_update, FALSE); -#endif /* WITH_INNODB_DISALLOW_WRITES */ - static MYSQL_SYSVAR_BOOL(random_read_ahead, srv_random_read_ahead, PLUGIN_VAR_NOCMDARG, "Whether to use read ahead for random access within an extent.", @@ -20149,9 +20115,6 @@ static struct st_mysql_sys_var* innobase_system_variables[]= { MYSQL_SYSVAR(change_buffer_dump), MYSQL_SYSVAR(change_buffering_debug), #endif /* UNIV_DEBUG || UNIV_IBUF_DEBUG */ -#ifdef WITH_INNODB_DISALLOW_WRITES - MYSQL_SYSVAR(disallow_writes), -#endif /* WITH_INNODB_DISALLOW_WRITES */ MYSQL_SYSVAR(random_read_ahead), MYSQL_SYSVAR(read_ahead_threshold), MYSQL_SYSVAR(read_only), @@ -21659,3 +21622,38 @@ buf_pool_size_align( return (ulint)((size / m + 1) * m); } } + +#ifdef WITH_WSREP +/** Helper function for Galera rsync SST to flush all dirty +pages from buffer pool and force log checkpoint. +@param[in] handlerton* hton InnoDB handlerton +@param[in] THD* thd Thread handle */ +static int wsrep_force_checkpoint(handlerton *hton, THD * thd) +{ + DBUG_ASSERT(hton == innodb_hton_ptr); + // Note that wsrep_on = OFF during rsync SST so we can't check that + // this is called only with WSREP(thd) + // Force a dirty pages flush now + ib::info() << "Flushing buffer pool..."; + buf_flush_sync(); + ib::info() << "Flushing buffer pool...done"; + // Force InnoDB to checkpoint + lsn_t lsn; + ib::info() << "Creating checkpoint..."; + while (log_sys.last_checkpoint_lsn.load(std::memory_order_acquire) + + SIZE_OF_FILE_CHECKPOINT + < (lsn= log_sys.get_lsn(std::memory_order_acquire))) + { + log_make_checkpoint(); + log_sys.log.flush(); + } + + if (dberr_t err= fil_write_flushed_lsn(lsn)) + ib::warn() << "Force checkpoint set failed: " << ut_strerr(err); + + ib::info() << "Creating checkpoint...done"; + + return (0); +} +#endif /* WITH_WSREP */ + diff --git a/storage/innobase/include/srv0srv.h b/storage/innobase/include/srv0srv.h index 889652039a9..77887ecf863 100644 --- a/storage/innobase/include/srv0srv.h +++ b/storage/innobase/include/srv0srv.h @@ -291,11 +291,6 @@ extern ulong srv_log_write_ahead_size; extern my_bool srv_adaptive_flushing; extern my_bool srv_flush_sync; -#ifdef WITH_INNODB_DISALLOW_WRITES -/* When this event is reset we do not allow any file writes to take place. */ -extern os_event_t srv_allow_writes_event; -#endif /* WITH_INNODB_DISALLOW_WRITES */ - /* If this flag is TRUE, then we will load the indexes' (and tables') metadata even if they are marked as "corrupted". Mostly it is for DBA to process corrupted index and table */ diff --git a/storage/innobase/innodb.cmake b/storage/innobase/innodb.cmake index cd8ba4f04d3..300357b2af4 100644 --- a/storage/innobase/innodb.cmake +++ b/storage/innobase/innodb.cmake @@ -149,13 +149,6 @@ ELSE() ADD_DEFINITIONS(-DMUTEX_SYS) ENDIF() -OPTION(WITH_INNODB_DISALLOW_WRITES "InnoDB freeze writes patch from Google" ${WITH_WSREP}) -IF (WITH_INNODB_DISALLOW_WRITES) - ADD_DEFINITIONS(-DWITH_INNODB_DISALLOW_WRITES) -ENDIF() -ADD_FEATURE_INFO(INNODB_DISALLOW_WRITES WITH_INNODB_DISALLOW_WRITES "Expose innodb_disallow_writes switch to stop innodb from writing to disk") - - # Include directories under innobase INCLUDE_DIRECTORIES(${CMAKE_SOURCE_DIR}/storage/innobase/include ${CMAKE_SOURCE_DIR}/storage/innobase/handler) diff --git a/storage/innobase/mtr/mtr0mtr.cc b/storage/innobase/mtr/mtr0mtr.cc index ca97a9e77e4..fa0f8d44476 100644 --- a/storage/innobase/mtr/mtr0mtr.cc +++ b/storage/innobase/mtr/mtr0mtr.cc @@ -32,6 +32,7 @@ Created 11/26/1995 Heikki Tuuri #include "page0types.h" #include "mtr0log.h" #include "log0recv.h" +#include "wsrep.h" /** Iterate over a memo block in reverse. */ template <typename Functor> @@ -872,12 +873,15 @@ static void log_write_low(const void *str, size_t size) while (size); } +extern bool sst_in_progress; + /** Close the log at mini-transaction commit. @return whether buffer pool flushing is needed */ static mtr_t::page_flush_ahead log_close(lsn_t lsn) { mysql_mutex_assert_owner(&log_sys.mutex); ut_ad(lsn == log_sys.get_lsn()); + DBUG_ASSERT(IF_WSREP(!sst_in_progress,1)); byte *log_block= static_cast<byte*>(ut_align_down(log_sys.buf + log_sys.buf_free, diff --git a/storage/innobase/os/os0file.cc b/storage/innobase/os/os0file.cc index b584f485629..55ebd9c6c58 100644 --- a/storage/innobase/os/os0file.cc +++ b/storage/innobase/os/os0file.cc @@ -83,6 +83,8 @@ Created 10/21/1995 Heikki Tuuri #include <thread> #include <chrono> +#include "wsrep.h" + /* Per-IO operation environment*/ class io_slots { @@ -153,14 +155,6 @@ static ulint os_innodb_umask = S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP; static ulint os_innodb_umask = 0; #endif /* _WIN32 */ - -#ifdef WITH_INNODB_DISALLOW_WRITES -#define WAIT_ALLOW_WRITES() os_event_wait(srv_allow_writes_event) -#else -#define WAIT_ALLOW_WRITES() do { } while (0) -#endif /* WITH_INNODB_DISALLOW_WRITES */ - - Atomic_counter<ulint> os_n_file_reads; static ulint os_bytes_read_since_printout; ulint os_n_file_writes; @@ -382,7 +376,6 @@ FILE* os_file_create_tmpfile() { FILE* file = NULL; - WAIT_ALLOW_WRITES(); File fd = mysql_tmpfile("ib"); if (fd >= 0) { @@ -990,7 +983,6 @@ os_file_flush_func( { int ret; - WAIT_ALLOW_WRITES(); ret = os_file_sync_posix(file); if (ret == 0) { @@ -1042,10 +1034,6 @@ os_file_create_simple_func( int create_flag; const char* mode_str = NULL; - if (create_mode != OS_FILE_OPEN && create_mode != OS_FILE_OPEN_RAW) { - WAIT_ALLOW_WRITES(); - } - ut_a(!(create_mode & OS_FILE_ON_ERROR_SILENT)); ut_a(!(create_mode & OS_FILE_ON_ERROR_NO_EXIT)); @@ -1163,7 +1151,6 @@ os_file_create_directory( { int rcode; - WAIT_ALLOW_WRITES(); rcode = mkdir(pathname, 0770); if (!(rcode == 0 || (errno == EEXIST && !fail_if_exists))) { @@ -1370,10 +1357,6 @@ os_file_create_simple_no_error_handling_func( os_file_t file; int create_flag; - if (create_mode != OS_FILE_OPEN && create_mode != OS_FILE_OPEN_RAW) { - WAIT_ALLOW_WRITES(); - } - ut_a(!(create_mode & OS_FILE_ON_ERROR_SILENT)); ut_a(!(create_mode & OS_FILE_ON_ERROR_NO_EXIT)); @@ -1448,7 +1431,6 @@ os_file_delete_if_exists_func( } int ret; - WAIT_ALLOW_WRITES(); ret = unlink(name); @@ -1473,7 +1455,6 @@ os_file_delete_func( const char* name) { int ret; - WAIT_ALLOW_WRITES(); ret = unlink(name); @@ -1512,7 +1493,6 @@ os_file_rename_func( #endif /* UNIV_DEBUG */ int ret; - WAIT_ALLOW_WRITES(); ret = rename(oldpath, newpath); @@ -1682,7 +1662,6 @@ bool os_file_set_eof( FILE* file) /*!< in: file to be truncated */ { - WAIT_ALLOW_WRITES(); return(!ftruncate(fileno(file), ftell(file))); } @@ -2150,10 +2129,6 @@ os_file_create_func( ? FILE_SHARE_READ | FILE_SHARE_WRITE | FILE_SHARE_DELETE : FILE_SHARE_READ | FILE_SHARE_DELETE; - if (create_mode != OS_FILE_OPEN && create_mode != OS_FILE_OPEN_RAW) { - WAIT_ALLOW_WRITES(); - } - on_error_no_exit = create_mode & OS_FILE_ON_ERROR_NO_EXIT ? true : false; @@ -2938,8 +2913,6 @@ os_file_write_func( ut_ad(n > 0); - WAIT_ALLOW_WRITES(); - ssize_t n_bytes = os_file_pwrite(type, file, (byte*)buf, n, offset, &err); if ((ulint) n_bytes != n && !os_has_said_disk_full) { @@ -3829,6 +3802,8 @@ void os_aio_wait_until_no_pending_reads() tpool::tpool_wait_end(); } +extern bool sst_in_progress; + /** Request a read or write. @param type I/O request @param buf buffer @@ -3876,6 +3851,7 @@ func_exit: if (type.is_read()) { ++os_n_file_reads; } else { + DBUG_ASSERT(IF_WSREP(!sst_in_progress, 1)); ++os_n_file_writes; } diff --git a/storage/innobase/row/row0merge.cc b/storage/innobase/row/row0merge.cc index b34587ba961..4b8f53b5543 100644 --- a/storage/innobase/row/row0merge.cc +++ b/storage/innobase/row/row0merge.cc @@ -4041,9 +4041,6 @@ pfs_os_file_t row_merge_file_create_low( const char* path) { -#ifdef WITH_INNODB_DISALLOW_WRITES - os_event_wait(srv_allow_writes_event); -#endif /* WITH_INNODB_DISALLOW_WRITES */ if (!path) { path = mysql_tmpdir; } diff --git a/storage/innobase/srv/srv0srv.cc b/storage/innobase/srv/srv0srv.cc index 09cf547b831..7cdc18e1e84 100644 --- a/storage/innobase/srv/srv0srv.cc +++ b/storage/innobase/srv/srv0srv.cc @@ -194,10 +194,6 @@ srv_printf_innodb_monitor() will request mutex acquisition with mutex_enter(), which will wait until it gets the mutex. */ #define MUTEX_NOWAIT(mutex_skipped) ((mutex_skipped) < MAX_MUTEX_NOWAIT) -#ifdef WITH_INNODB_DISALLOW_WRITES -UNIV_INTERN os_event_t srv_allow_writes_event; -#endif /* WITH_INNODB_DISALLOW_WRITES */ - /** copy of innodb_buffer_pool_size */ ulint srv_buf_pool_size; /** Requested buffer pool chunk size. Each buffer pool instance consists @@ -675,15 +671,6 @@ static void srv_init() mutex_create(LATCH_ID_PAGE_ZIP_STAT_PER_INDEX, &page_zip_stat_per_index_mutex); -#ifdef WITH_INNODB_DISALLOW_WRITES - /* Writes have to be enabled on init or else we hang. Thus, we - always set the event here regardless of innobase_disallow_writes. - That flag will always be 0 at this point because it isn't settable - via my.cnf or command line arg. */ - srv_allow_writes_event = os_event_create(0); - os_event_set(srv_allow_writes_event); -#endif /* WITH_INNODB_DISALLOW_WRITES */ - /* Initialize some INFORMATION SCHEMA internal structures */ trx_i_s_cache_init(trx_i_s_cache); @@ -1341,17 +1328,6 @@ void srv_monitor_task(void*) if (sync_array_print_long_waits(&waiter, &sema) && sema == old_sema && os_thread_eq(waiter, old_waiter)) { -#if defined(WITH_WSREP) && defined(WITH_INNODB_DISALLOW_WRITES) - if (!os_event_is_set(srv_allow_writes_event)) { - fprintf(stderr, - "WSREP: avoiding InnoDB self crash due to " - "long semaphore wait of > %lu seconds\n" - "Server is processing SST donor operation, " - "fatal_cnt now: " ULINTPF, - srv_fatal_semaphore_wait_threshold, fatal_cnt); - return; - } -#endif /* WITH_WSREP */ if (fatal_cnt++) { ib::fatal() << "Semaphore wait has lasted > " << srv_fatal_semaphore_wait_threshold diff --git a/storage/innobase/srv/srv0start.cc b/storage/innobase/srv/srv0start.cc index afb7834c9b9..c028ac0604d 100644 --- a/storage/innobase/srv/srv0start.cc +++ b/storage/innobase/srv/srv0start.cc @@ -2112,10 +2112,6 @@ void innodb_shutdown() } srv_tmp_space.shutdown(); -#ifdef WITH_INNODB_DISALLOW_WRITES - os_event_destroy(srv_allow_writes_event); -#endif /* WITH_INNODB_DISALLOW_WRITES */ - if (srv_was_started && srv_print_verbose_log) { ib::info() << "Shutdown completed; log sequence number " << srv_shutdown_lsn diff --git a/storage/innobase/trx/trx0purge.cc b/storage/innobase/trx/trx0purge.cc index 4d84f295c0b..7ab65add9e4 100644 --- a/storage/innobase/trx/trx0purge.cc +++ b/storage/innobase/trx/trx0purge.cc @@ -1,7 +1,7 @@ /***************************************************************************** Copyright (c) 1996, 2017, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2017, 2021, MariaDB Corporation. +Copyright (c) 2017, 2022, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -42,7 +42,9 @@ Created 3/26/1996 Heikki Tuuri #include "trx0rseg.h" #include "trx0trx.h" #include <mysql/service_wsrep.h> - +#include <mysqld.h> +#include "sql_class.h" +#include <mysql/service_thd_mdl.h> #include <unordered_map> /** Maximum allowable purge history length. <=0 means 'infinite'. */ @@ -1268,6 +1270,8 @@ static void trx_purge_wait_for_workers_to_complete() ut_ad(srv_get_task_queue_length() == 0); } +extern "C" int thd_get_backup_lock(THD* thd, MDL_ticket **mdl); +extern "C" void thd_release_backup_lock(THD* thd, MDL_ticket *mdl); /** Run a purge batch. @param n_tasks number of purge tasks to submit to the queue @@ -1277,15 +1281,21 @@ ulint trx_purge(ulint n_tasks, bool truncate) { que_thr_t* thr = NULL; ulint n_pages_handled; + MDL_ticket *mdl= NULL; ut_ad(n_tasks > 0); + /* Acquire global MDL_BACKUP_DML lock */ + if (thd_get_backup_lock(current_thd, &mdl)) + return (0); + srv_dml_needed_delay = trx_purge_dml_delay(); purge_sys.clone_oldest_view(); #ifdef UNIV_DEBUG if (srv_purge_view_update_only_debug) { + thd_release_backup_lock(current_thd, mdl); return(0); } #endif /* UNIV_DEBUG */ @@ -1311,6 +1321,8 @@ ulint trx_purge(ulint n_tasks, bool truncate) trx_purge_truncate_history(); } + thd_release_backup_lock(current_thd, mdl); + MONITOR_INC_VALUE(MONITOR_PURGE_INVOKED, 1); MONITOR_INC_VALUE(MONITOR_PURGE_N_PAGE_HANDLED, n_pages_handled); |