diff options
author | Marko Mäkelä <marko.makela@mariadb.com> | 2021-09-23 10:37:22 +0300 |
---|---|---|
committer | Marko Mäkelä <marko.makela@mariadb.com> | 2021-09-23 10:37:22 +0300 |
commit | 1718b9454930c5f02d08f2c62b7c462528c84ed5 (patch) | |
tree | 1454ac01b394fa928f3a70d0911a2fdd549105f4 | |
parent | b740b2356d539a27f8a7a9e9b6455f31bc6c9196 (diff) | |
parent | 2755e86a53cd89729d0cb54424fe83074dccd8ab (diff) | |
download | mariadb-git-1718b9454930c5f02d08f2c62b7c462528c84ed5.tar.gz |
Merge 10.5 into 10.6
-rw-r--r-- | CONTRIBUTING.md | 5 | ||||
-rw-r--r-- | README.md | 3 | ||||
-rw-r--r-- | cmake/cpack_rpm.cmake | 6 | ||||
-rw-r--r-- | mysql-test/suite/innodb/r/undo_truncate.result | 22 | ||||
-rw-r--r-- | mysql-test/suite/innodb/t/undo_truncate.opt | 1 | ||||
-rw-r--r-- | mysql-test/suite/innodb/t/undo_truncate.test | 80 | ||||
-rw-r--r-- | sql/CMakeLists.txt | 8 | ||||
-rw-r--r-- | sql/gen_yy_files.cmake | 3 | ||||
-rw-r--r-- | sql/sql_class.cc | 3 | ||||
-rw-r--r-- | storage/innobase/buf/buf0buf.cc | 1 | ||||
-rw-r--r-- | storage/innobase/buf/buf0flu.cc | 6 | ||||
-rw-r--r-- | storage/innobase/fsp/fsp0fsp.cc | 54 | ||||
-rw-r--r-- | storage/innobase/include/buf0buf.h | 2 | ||||
-rw-r--r-- | storage/innobase/include/log0log.h | 16 | ||||
-rw-r--r-- | storage/innobase/include/mtr0mtr.h | 4 | ||||
-rw-r--r-- | storage/innobase/include/mtr0mtr.ic | 4 | ||||
-rw-r--r-- | storage/innobase/include/sux_lock.h | 3 | ||||
-rw-r--r-- | storage/innobase/log/log0log.cc | 32 | ||||
-rw-r--r-- | storage/innobase/mtr/mtr0mtr.cc | 108 | ||||
-rw-r--r-- | storage/innobase/trx/trx0purge.cc | 488 |
20 files changed, 479 insertions, 370 deletions
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 64af450d29f..37899f37958 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -8,8 +8,7 @@ MariaDB Server has a vibrant community contributing in a wide range of areas. Th - [maria-developers mailing list](http://launchpad.net/~maria-developers) - [maria-discuss mailing list](http://launchpad.net/~maria-discuss) - [maria-docs mailing list](http://launchpad.net/~maria-docs) -- ircs://chat.freenode.net/maria ([see the IRC page on the Knowledge Base](https://mariadb.com/kb/en/meta/irc-chat-servers-and-zulip-instance/) for help with IRC). -- The MariaDB Foundation and MariaDB Corporation have a presence on Reddit, Twitter, Facebook and Google Plus. See the [social media page](https://mariadb.com/kb/en/mariadb/social-media/). +- The MariaDB Foundation and MariaDB Corporation have a presence on Reddit, Twitter and Facebook. See the [social media page](https://mariadb.com/kb/en/mariadb/social-media/). ### Help document MariaDB ---- @@ -36,7 +35,7 @@ You’re very welcome to support MariaDB Server as an individual, or talk your c ### Live QA for beginner contributors ---- -MariaDB has a dedicated time each week when we answer new contributor questions live on Zulip and IRC. +MariaDB has a dedicated time each week when we answer new contributor questions live on Zulip. From 8:00 to 10:00 UTC on Mondays, and 10:00 to 12:00 UTC on Thursdays, anyone can ask any questions they’d like, and a live developer will be available to assist. New contributors can ask questions any time, but we will provide immediate feedback during that interval. diff --git a/README.md b/README.md index f17a882d66e..58dbf105fb9 100644 --- a/README.md +++ b/README.md @@ -39,11 +39,10 @@ Help More help is available from the Maria Discuss mailing list https://launchpad.net/~maria-discuss, MariaDB's Zulip instance, https://mariadb.zulipchat.com/ -and the #maria IRC channel on Freenode. Live QA for beginner contributors ---- -MariaDB has a dedicated time each week when we answer new contributor questions live on Zulip and IRC. +MariaDB has a dedicated time each week when we answer new contributor questions live on Zulip. From 8:00 to 10:00 UTC on Mondays, and 10:00 to 12:00 UTC on Thursdays, anyone can ask any questions they’d like, and a live developer will be available to assist. diff --git a/cmake/cpack_rpm.cmake b/cmake/cpack_rpm.cmake index 5f954c7a1fe..644ebb3bc73 100644 --- a/cmake/cpack_rpm.cmake +++ b/cmake/cpack_rpm.cmake @@ -257,6 +257,12 @@ ELSEIF(RPM MATCHES "sles") "mariadb-server = %{version}-%{release}" ) ENDIF() + +# MDEV-24629, we need it outside of ELSIFs +IF(RPM MATCHES "fedora3[234]") + ALTERNATIVE_NAME("common" "mariadb-connector-c-config" ${MARIADB_CONNECTOR_C_VERSION}-1) +ENDIF() + SET(PYTHON_SHEBANG "/usr/bin/python3" CACHE STRING "python shebang") # If we want to build build MariaDB-shared-compat, diff --git a/mysql-test/suite/innodb/r/undo_truncate.result b/mysql-test/suite/innodb/r/undo_truncate.result index ad236bdecd4..54eeee9a9df 100644 --- a/mysql-test/suite/innodb/r/undo_truncate.result +++ b/mysql-test/suite/innodb/r/undo_truncate.result @@ -7,28 +7,12 @@ SET @trunc_start= WHERE variable_name = 'innodb_undo_truncations'); create table t1(keyc int primary key, c char(100)) engine = innodb; create table t2(keyc int primary key, c char(100)) engine = innodb; -CREATE PROCEDURE populate_t1() -BEGIN -DECLARE i INT DEFAULT 1; -while (i <= 20000) DO -insert into t1 values (i, 'a'); -SET i = i + 1; -END WHILE; -END | -CREATE PROCEDURE populate_t2() -BEGIN -DECLARE i INT DEFAULT 1; -while (i <= 20000) DO -insert into t2 values (i, 'a'); -SET i = i + 1; -END WHILE; -END | connect con1,localhost,root,,; begin; -call populate_t1(); +insert into t1 select seq,'a' from seq_1_to_20000; connect con2,localhost,root,,; begin; -call populate_t2(); +insert into t2 select seq,'a' from seq_1_to_20000; connection con1; update t1 set c = 'mysql'; connection con2; @@ -50,8 +34,6 @@ commit; disconnect con2; connection default; drop table t1, t2; -drop PROCEDURE populate_t1; -drop PROCEDURE populate_t2; InnoDB 0 transactions not purged SET GLOBAL innodb_purge_rseg_truncate_frequency = @save_frequency; SET GLOBAL innodb_undo_log_truncate = @save_truncate; diff --git a/mysql-test/suite/innodb/t/undo_truncate.opt b/mysql-test/suite/innodb/t/undo_truncate.opt new file mode 100644 index 00000000000..f4d78725c6e --- /dev/null +++ b/mysql-test/suite/innodb/t/undo_truncate.opt @@ -0,0 +1 @@ +--innodb-buffer-pool-size=24M diff --git a/mysql-test/suite/innodb/t/undo_truncate.test b/mysql-test/suite/innodb/t/undo_truncate.test index d2a4e287305..8de93814ed8 100644 --- a/mysql-test/suite/innodb/t/undo_truncate.test +++ b/mysql-test/suite/innodb/t/undo_truncate.test @@ -1,6 +1,11 @@ --source include/have_innodb.inc --source include/innodb_page_size.inc --source include/have_undo_tablespaces.inc +--source include/have_sequence.inc + +--disable_query_log +call mtr.add_suppression("InnoDB: Difficult to find free blocks in the buffer pool"); +--enable_query_log SET @save_frequency = @@GLOBAL.innodb_purge_rseg_truncate_frequency; SET @save_truncate = @@GLOBAL.innodb_undo_log_truncate; @@ -19,37 +24,14 @@ WHERE variable_name = 'innodb_undo_truncations'); create table t1(keyc int primary key, c char(100)) engine = innodb; create table t2(keyc int primary key, c char(100)) engine = innodb; # -delimiter |; -CREATE PROCEDURE populate_t1() -BEGIN - DECLARE i INT DEFAULT 1; - while (i <= 20000) DO - insert into t1 values (i, 'a'); - SET i = i + 1; - END WHILE; -END | -delimiter ;| -# -delimiter |; -CREATE PROCEDURE populate_t2() -BEGIN - DECLARE i INT DEFAULT 1; - while (i <= 20000) DO - insert into t2 values (i, 'a'); - SET i = i + 1; - END WHILE; -END | -delimiter ;| -# -# let DATADIR = `select @@datadir`; connect (con1,localhost,root,,); begin; -send call populate_t1(); +send insert into t1 select seq,'a' from seq_1_to_20000; connect (con2,localhost,root,,); begin; -send call populate_t2(); +send insert into t2 select seq,'a' from seq_1_to_20000; connection con1; reap; send update t1 set c = 'mysql'; connection con2; reap; send update t2 set c = 'mysql'; @@ -59,62 +41,14 @@ connection con1; reap; send delete from t1; connection con2; reap; delete from t2; connection con1; reap; -let CHECKFILE = $MYSQL_TMP_DIR/check.txt; -perl; -($dev,$ino,$mode,$nlink,$uid,$gid,$rdev,$size1) - = stat("$ENV{DATADIR}/undo001"); -($dev,$ino,$mode,$nlink,$uid,$gid,$rdev,$size2) - = stat("$ENV{DATADIR}/undo002"); -open(OUT, ">$ENV{CHECKFILE}") || die; -print OUT "let \$size1='$size1,$size2';\n"; -close(OUT); -EOF - SET GLOBAL innodb_undo_log_truncate = 1; commit; disconnect con1; connection con2; commit; disconnect con2; connection default; drop table t1, t2; -drop PROCEDURE populate_t1; -drop PROCEDURE populate_t2; --source include/wait_all_purged.inc -# Truncation will normally not occur with innodb_page_size=64k, -# and occasionally not with innodb_page_size=32k, -# because the undo log will not grow enough. -# TODO: For some reason this does not occur on 4k either! -if (`select @@innodb_page_size IN (8192,16384)`) -{ - let $wait_condition = (SELECT variable_value!=@trunc_start - FROM information_schema.global_status - WHERE variable_name = 'innodb_undo_truncations'); - source include/wait_condition.inc; -} - ---source $CHECKFILE -perl; -($dev,$ino,$mode,$nlink,$uid,$gid,$rdev,$size1) - = stat("$ENV{DATADIR}/undo001"); -($dev,$ino,$mode,$nlink,$uid,$gid,$rdev,$size2) - = stat("$ENV{DATADIR}/undo002"); -open(OUT, ">$ENV{CHECKFILE}") || die; -print OUT "let \$size2='$size1,$size2';\n"; -close(OUT); -EOF - ---source $CHECKFILE ---remove_file $CHECKFILE - -if ($size1 == $size2) -{ - # This fails for innodb_page_size=64k, occasionally also for 32k. - if (`select @@innodb_page_size IN (8192,16384)`) - { - echo Truncation did not happen: $size1; - } -} - SET GLOBAL innodb_purge_rseg_truncate_frequency = @save_frequency; SET GLOBAL innodb_undo_log_truncate = @save_truncate; diff --git a/sql/CMakeLists.txt b/sql/CMakeLists.txt index c50229ada1e..854d6cac8aa 100644 --- a/sql/CMakeLists.txt +++ b/sql/CMakeLists.txt @@ -1,5 +1,5 @@ # Copyright (c) 2006, 2014, Oracle and/or its affiliates. -# Copyright (c) 2010, 2020, MariaDB Corporation. +# Copyright (c) 2010, 2021, MariaDB Corporation. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -65,6 +65,8 @@ ADD_CUSTOM_COMMAND( DEPENDS gen_lex_token ) +FIND_PACKAGE(BISON 2.4) + ADD_CUSTOM_COMMAND( OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/yy_mariadb.yy ${CMAKE_CURRENT_BINARY_DIR}/yy_oracle.yy @@ -72,6 +74,7 @@ ADD_CUSTOM_COMMAND( "-DOUT1=${CMAKE_CURRENT_BINARY_DIR}/yy_oracle.yy" "-DOUT2=${CMAKE_CURRENT_BINARY_DIR}/yy_mariadb.yy" "-DIN=${CMAKE_CURRENT_SOURCE_DIR}/sql_yacc.yy" + "-DBISON_VERSION=${BISON_VERSION}" -P ${CMAKE_CURRENT_SOURCE_DIR}/gen_yy_files.cmake COMMENT "Building yy_mariadb.yy and yy_oracle.yy from sql_yacc.yy" DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/sql_yacc.yy @@ -325,9 +328,6 @@ IF(WITH_MYSQLD_LDFLAGS) ENDIF() -FIND_PACKAGE(BISON 2.4) - - # Handle out-of-source build from source package with possibly broken # bison. Copy bison output to from source to build directory, if not already # there diff --git a/sql/gen_yy_files.cmake b/sql/gen_yy_files.cmake index da63c72c37c..3ceb60a95de 100644 --- a/sql/gen_yy_files.cmake +++ b/sql/gen_yy_files.cmake @@ -5,6 +5,9 @@ file(READ "${IN}" data) file(WRITE "${OUT1}" "") file(WRITE "${OUT2}" "") set(where 0) +if(NOT(BISON_VERSION VERSION_LESS "3.0.0")) + string(REPLACE "\n%pure-parser" "\n%define api.pure" data "${data}") +endif() string(REGEX REPLACE "/\\* sql_yacc\\.yy \\*/" "/* DON'T EDIT THIS FILE. IT'S GENERATED. EDIT sql_yacc.yy INSTEAD */" data "${data}") while(NOT data STREQUAL "") string(REGEX MATCH "^(%[ie][^\n]*\n)|((%[^ie\n]|[^%\n])[^\n]*\n)+|\n+" line "${data}") diff --git a/sql/sql_class.cc b/sql/sql_class.cc index 954d53b9d72..7c0879af8c9 100644 --- a/sql/sql_class.cc +++ b/sql/sql_class.cc @@ -4929,6 +4929,9 @@ MYSQL_THD create_background_thd() thd->set_command(COM_DAEMON); thd->system_thread= SYSTEM_THREAD_GENERIC; thd->security_ctx->host_or_ip= ""; + thd->real_id= 0; + thd->thread_id= 0; + thd->query_id= 0; return thd; } diff --git a/storage/innobase/buf/buf0buf.cc b/storage/innobase/buf/buf0buf.cc index bab95c5453c..4ec6a61ccb9 100644 --- a/storage/innobase/buf/buf0buf.cc +++ b/storage/innobase/buf/buf0buf.cc @@ -3390,6 +3390,7 @@ loop: /* Delete possible entries for the page from the insert buffer: such can exist if the page belonged to an index which was dropped */ if (page_id < page_id_t{SRV_SPACE_ID_UPPER_BOUND, 0} && + !srv_is_undo_tablespace(page_id.space()) && !recv_recovery_is_on()) ibuf_merge_or_delete_for_page(nullptr, page_id, zip_size); diff --git a/storage/innobase/buf/buf0flu.cc b/storage/innobase/buf/buf0flu.cc index 84b0287d535..2107f0af9ae 100644 --- a/storage/innobase/buf/buf0flu.cc +++ b/storage/innobase/buf/buf0flu.cc @@ -366,10 +366,12 @@ void buf_page_write_complete(const IORequest &request) const bool temp= fsp_is_system_temporary(bpage->id().space()); mysql_mutex_lock(&buf_pool.mutex); + mysql_mutex_assert_not_owner(&buf_pool.flush_list_mutex); buf_pool.stat.n_pages_written++; /* While we do not need any mutex for clearing oldest_modification here, we hope that it will be in the same cache line with io_fix, whose changes must be protected by buf_pool.mutex. */ + ut_ad(temp || bpage->oldest_modification() > 2); bpage->clear_oldest_modification(temp); ut_ad(bpage->io_fix() == BUF_IO_WRITE); bpage->set_io_fix(BUF_IO_NONE); @@ -2234,7 +2236,9 @@ unemployed: mysql_mutex_unlock(&buf_pool.flush_list_mutex); - if (!recv_recovery_is_on() && srv_operation == SRV_OPERATION_NORMAL) + if (!recv_recovery_is_on() && + !srv_startup_is_before_trx_rollback_phase && + srv_operation == SRV_OPERATION_NORMAL) log_checkpoint(); mysql_mutex_lock(&buf_pool.flush_list_mutex); diff --git a/storage/innobase/fsp/fsp0fsp.cc b/storage/innobase/fsp/fsp0fsp.cc index 8477e74c5f1..2581cecddc3 100644 --- a/storage/innobase/fsp/fsp0fsp.cc +++ b/storage/innobase/fsp/fsp0fsp.cc @@ -564,7 +564,7 @@ void fsp_header_init(fil_space_t* space, uint32_t size, mtr_t* mtr) in order to avoid optimizing away any unchanged most significant bytes of FSP_SIZE. */ mtr->write<4,mtr_t::FORCED>(*block, FSP_HEADER_OFFSET + FSP_SIZE - + block->frame, size); + + block->frame, size); ut_ad(0 == mach_read_from_4(FSP_HEADER_OFFSET + FSP_FREE_LIMIT + block->frame)); if (auto f = space->flags & ~FSP_FLAGS_MEM_MASK) { @@ -758,10 +758,12 @@ fsp_try_extend_data_file(fil_space_t *space, buf_block_t *header, mtr_t *mtr) return(0); } - /* We ignore any fragments of a full megabyte when storing the size - to the space header */ + /* For the system tablespace, we ignore any fragments of a + full megabyte when storing the size to the space header */ - space->size_in_header = ut_2pow_round(space->size, (1024 * 1024) / ps); + space->size_in_header = space->id + ? space->size + : ut_2pow_round(space->size, (1024 * 1024) / ps); /* recv_sys_t::parse() expects to find a WRITE record that covers all 4 bytes. Therefore, we must specify mtr_t::FORCED @@ -1045,11 +1047,36 @@ static buf_block_t* fsp_page_create(fil_space_t *space, page_no_t offset, mtr_t *mtr) { - buf_block_t *free_block= buf_LRU_get_free_block(false); - buf_block_t *block= buf_page_create(space, static_cast<uint32_t>(offset), - space->zip_size(), mtr, free_block); + buf_block_t *block, *free_block; + + if (UNIV_UNLIKELY(space->is_being_truncated)) + { + const page_id_t page_id{space->id, offset}; + const ulint fold= page_id.fold(); + mysql_mutex_lock(&buf_pool.mutex); + block= reinterpret_cast<buf_block_t*> + (buf_pool.page_hash_get_low(page_id, fold)); + if (block && block->page.oldest_modification() <= 1) + block= nullptr; + mysql_mutex_unlock(&buf_pool.mutex); + + if (block) + { + ut_ad(block->page.buf_fix_count() >= 1); + ut_ad(block->lock.x_lock_count() == 1); + ut_ad(mtr->have_x_latch(*block)); + free_block= block; + goto got_free_block; + } + } + + free_block= buf_LRU_get_free_block(false); +got_free_block: + block= buf_page_create(space, static_cast<uint32_t>(offset), + space->zip_size(), mtr, free_block); if (UNIV_UNLIKELY(block != free_block)) buf_pool.free_block(free_block); + fsp_init_file_page(space, block, mtr); return block; } @@ -1753,7 +1780,10 @@ fseg_create(fil_space_t *space, ulint byte_offset, mtr_t *mtr, goto funct_exit; } - ut_ad(block->lock.not_recursive()); + ut_d(const auto x = block->lock.x_lock_count()); + ut_ad(x || block->lock.not_recursive()); + ut_ad(x == 1 || space->is_being_truncated); + ut_ad(x <= 2); ut_ad(!fil_page_get_type(block->frame)); mtr->write<1>(*block, FIL_PAGE_TYPE + 1 + block->frame, FIL_PAGE_TYPE_SYS); @@ -2179,14 +2209,14 @@ take_hinted_page: return(NULL); } - if (space->size <= ret_page && !is_system_tablespace(space_id)) { + if (space->size <= ret_page && !is_predefined_tablespace(space_id)) { /* It must be that we are extending a single-table tablespace whose size is still < 64 pages */ if (ret_page >= FSP_EXTENT_SIZE) { - ib::error() << "Error (2): trying to extend" - " a single-table tablespace " << space_id - << " by single page(s) though the" + ib::error() << "Trying to extend '" + << space->chain.start->name + << "' by single page(s) though the" << " space size " << space->size << ". Page no " << ret_page << "."; ut_ad(!has_done_reservation); diff --git a/storage/innobase/include/buf0buf.h b/storage/innobase/include/buf0buf.h index 3f922708ef1..59f4c400653 100644 --- a/storage/innobase/include/buf0buf.h +++ b/storage/innobase/include/buf0buf.h @@ -2142,9 +2142,7 @@ inline void buf_page_t::clear_oldest_modification() it from buf_pool.flush_list */ inline void buf_page_t::clear_oldest_modification(bool temporary) { - mysql_mutex_assert_not_owner(&buf_pool.flush_list_mutex); ut_ad(temporary == fsp_is_system_temporary(id().space())); - ut_ad(io_fix_ == BUF_IO_WRITE); if (temporary) { ut_ad(oldest_modification() == 2); diff --git a/storage/innobase/include/log0log.h b/storage/innobase/include/log0log.h index a651a56e8a0..be28528adb0 100644 --- a/storage/innobase/include/log0log.h +++ b/storage/innobase/include/log0log.h @@ -113,12 +113,16 @@ struct completion_callback; void log_write_up_to(lsn_t lsn, bool flush_to_disk, bool rotate_key = false, const completion_callback* cb=nullptr); -/** write to the log file up to the last log entry. -@param[in] sync whether we want the written log -also to be flushed to disk. */ -void -log_buffer_flush_to_disk( - bool sync = true); +/** Write to the log file up to the last log entry. +@param sync whether to wait for a durable write to complete */ +void log_buffer_flush_to_disk(bool sync= true); + + +/** Prepare to invoke log_write_and_flush(), before acquiring log_sys.mutex. */ +ATTRIBUTE_COLD void log_write_and_flush_prepare(); + +/** Durably write the log up to log_sys.lsn() and release log_sys.mutex. */ +ATTRIBUTE_COLD void log_write_and_flush(); /** Make a checkpoint */ ATTRIBUTE_COLD void log_make_checkpoint(); diff --git a/storage/innobase/include/mtr0mtr.h b/storage/innobase/include/mtr0mtr.h index 00df3525fe7..5551cb7b587 100644 --- a/storage/innobase/include/mtr0mtr.h +++ b/storage/innobase/include/mtr0mtr.h @@ -101,6 +101,10 @@ struct mtr_t { /** Commit the mini-transaction. */ void commit(); + /** Commit a mini-transaction that is shrinking a tablespace. + @param space tablespace that is being shrunk */ + ATTRIBUTE_COLD void commit_shrink(fil_space_t &space); + /** Commit a mini-transaction that did not modify any pages, but generated some redo log on a higher level, such as FILE_MODIFY records and an optional FILE_CHECKPOINT marker. diff --git a/storage/innobase/include/mtr0mtr.ic b/storage/innobase/include/mtr0mtr.ic index 3896f2f6715..f1b2f9aba83 100644 --- a/storage/innobase/include/mtr0mtr.ic +++ b/storage/innobase/include/mtr0mtr.ic @@ -48,8 +48,8 @@ mtr_t::memo_push(void* object, mtr_memo_type_t type) /* If this mtr has x-fixed a clean page then we set the made_dirty flag. This tells us if we need to - grab log_flush_order_mutex at mtr_commit so that we - can insert the dirtied page to the flush list. */ + grab log_sys.flush_order_mutex at mtr_t::commit() so that we + can insert the dirtied page into the flush list. */ if ((type == MTR_MEMO_PAGE_X_FIX || type == MTR_MEMO_PAGE_SX_FIX) && !m_made_dirty) { diff --git a/storage/innobase/include/sux_lock.h b/storage/innobase/include/sux_lock.h index c09915cf6de..6d2ddadb9c3 100644 --- a/storage/innobase/include/sux_lock.h +++ b/storage/innobase/include/sux_lock.h @@ -98,6 +98,9 @@ public: ut_ad(recursive); return recursive == RECURSIVE_X || recursive == RECURSIVE_U; } + + /** @return the number of X locks being held (by any thread) */ + unsigned x_lock_count() const { return recursive & RECURSIVE_MAX; } #endif /** Acquire a recursive lock */ diff --git a/storage/innobase/log/log0log.cc b/storage/innobase/log/log0log.cc index 2757571b52c..4d927227944 100644 --- a/storage/innobase/log/log0log.cc +++ b/storage/innobase/log/log0log.cc @@ -833,15 +833,41 @@ void log_write_up_to(lsn_t lsn, bool flush_to_disk, bool rotate_key, DBUG_EXECUTE_IF("crash_after_log_write_upto", DBUG_SUICIDE();); } -/** write to the log file up to the last log entry. -@param[in] sync whether we want the written log -also to be flushed to disk. */ +/** Write to the log file up to the last log entry. +@param sync whether to wait for a durable write to complete */ void log_buffer_flush_to_disk(bool sync) { ut_ad(!srv_read_only_mode); log_write_up_to(log_sys.get_lsn(std::memory_order_acquire), sync); } +/** Prepare to invoke log_write_and_flush(), before acquiring log_sys.mutex. */ +ATTRIBUTE_COLD void log_write_and_flush_prepare() +{ + mysql_mutex_assert_not_owner(&log_sys.mutex); + + while (flush_lock.acquire(log_sys.get_lsn() + 1, nullptr) != + group_commit_lock::ACQUIRED); + while (write_lock.acquire(log_sys.get_lsn() + 1, nullptr) != + group_commit_lock::ACQUIRED); +} + +/** Durably write the log and release log_sys.mutex */ +ATTRIBUTE_COLD void log_write_and_flush() +{ + ut_ad(!srv_read_only_mode); + auto lsn= log_sys.get_lsn(); + write_lock.set_pending(lsn); + log_write(false); + ut_a(log_sys.write_lsn == lsn); + write_lock.release(lsn); + + lsn= write_lock.value(); + flush_lock.set_pending(lsn); + log_write_flush_to_disk_low(lsn); + flush_lock.release(lsn); +} + /******************************************************************** Tries to establish a big enough margin of free space in the log buffer, such diff --git a/storage/innobase/mtr/mtr0mtr.cc b/storage/innobase/mtr/mtr0mtr.cc index 76a703b7496..ae3183703c2 100644 --- a/storage/innobase/mtr/mtr0mtr.cc +++ b/storage/innobase/mtr/mtr0mtr.cc @@ -461,6 +461,114 @@ void mtr_t::commit() release_resources(); } +/** Shrink a tablespace. */ +struct Shrink +{ + /** the first non-existing page in the tablespace */ + const page_id_t high; + + Shrink(const fil_space_t &space) : high({space.id, space.size}) {} + + bool operator()(mtr_memo_slot_t *slot) const + { + if (!slot->object) + return true; + switch (slot->type) { + default: + ut_ad("invalid type" == 0); + return false; + case MTR_MEMO_SPACE_X_LOCK: + ut_ad(high.space() == static_cast<fil_space_t*>(slot->object)->id); + return true; + case MTR_MEMO_PAGE_X_MODIFY: + case MTR_MEMO_PAGE_SX_MODIFY: + case MTR_MEMO_PAGE_X_FIX: + case MTR_MEMO_PAGE_SX_FIX: + auto &bpage= static_cast<buf_block_t*>(slot->object)->page; + ut_ad(bpage.io_fix() == BUF_IO_NONE); + const auto id= bpage.id(); + if (id < high) + { + ut_ad(id.space() == high.space() || + (id == page_id_t{0, TRX_SYS_PAGE_NO} && + srv_is_undo_tablespace(high.space()))); + break; + } + ut_ad(id.space() == high.space()); + ut_ad(bpage.state() == BUF_BLOCK_FILE_PAGE); + if (bpage.oldest_modification() > 1) + bpage.clear_oldest_modification(false); + slot->type= static_cast<mtr_memo_type_t>(slot->type & ~MTR_MEMO_MODIFY); + } + return true; + } +}; + +/** Commit a mini-transaction that is shrinking a tablespace. +@param space tablespace that is being shrunk */ +void mtr_t::commit_shrink(fil_space_t &space) +{ + ut_ad(is_active()); + ut_ad(!is_inside_ibuf()); + ut_ad(!high_level_read_only); + ut_ad(m_modifications); + ut_ad(m_made_dirty); + ut_ad(!recv_recovery_is_on()); + ut_ad(m_log_mode == MTR_LOG_ALL); + ut_ad(UT_LIST_GET_LEN(space.chain) == 1); + + log_write_and_flush_prepare(); + + const lsn_t start_lsn= finish_write(prepare_write()).first; + + mysql_mutex_lock(&log_sys.flush_order_mutex); + /* Durably write the reduced FSP_SIZE before truncating the data file. */ + log_write_and_flush(); + + os_file_truncate(space.chain.start->name, space.chain.start->handle, + os_offset_t{space.size} << srv_page_size_shift, true); + + if (m_freed_pages) + { + ut_ad(!m_freed_pages->empty()); + ut_ad(m_freed_space == &space); + ut_ad(memo_contains(*m_freed_space)); + ut_ad(is_named_space(m_freed_space)); + m_freed_space->update_last_freed_lsn(m_commit_lsn); + + if (!is_trim_pages()) + for (const auto &range : *m_freed_pages) + m_freed_space->add_free_range(range); + else + m_freed_space->clear_freed_ranges(); + delete m_freed_pages; + m_freed_pages= nullptr; + m_freed_space= nullptr; + /* mtr_t::start() will reset m_trim_pages */ + } + else + ut_ad(!m_freed_space); + + m_memo.for_each_block_in_reverse(CIterate<Shrink>{space}); + + m_memo.for_each_block_in_reverse(CIterate<const ReleaseBlocks> + (ReleaseBlocks(start_lsn, m_commit_lsn, + m_memo))); + mysql_mutex_unlock(&log_sys.flush_order_mutex); + + mysql_mutex_lock(&fil_system.mutex); + ut_ad(space.is_being_truncated); + ut_ad(space.is_stopping()); + space.clear_stopping(); + space.is_being_truncated= false; + mysql_mutex_unlock(&fil_system.mutex); + + m_memo.for_each_block_in_reverse(CIterate<ReleaseLatches>()); + srv_stats.log_write_requests.inc(); + + release_resources(); +} + /** Commit a mini-transaction that did not modify any pages, but generated some redo log on a higher level, such as FILE_MODIFY records and an optional FILE_CHECKPOINT marker. diff --git a/storage/innobase/trx/trx0purge.cc b/storage/innobase/trx/trx0purge.cc index d78500b1e14..140d7873e4e 100644 --- a/storage/innobase/trx/trx0purge.cc +++ b/storage/innobase/trx/trx0purge.cc @@ -546,250 +546,254 @@ function is called, the caller must not have any latches on undo log pages! */ static void trx_purge_truncate_history() { - ut_ad(purge_sys.head <= purge_sys.tail); - purge_sys_t::iterator& head = purge_sys.head.trx_no - ? purge_sys.head : purge_sys.tail; - - if (head.trx_no >= purge_sys.low_limit_no()) { - /* This is sometimes necessary. TODO: find out why. */ - head.trx_no = purge_sys.low_limit_no(); - head.undo_no = 0; - } - - for (auto& rseg : trx_sys.rseg_array) { - if (rseg.space) { - trx_purge_truncate_rseg_history(rseg, head); - } - } - - if (srv_undo_tablespaces_active < 2) { - return; - } - - while (srv_undo_log_truncate) { - if (!purge_sys.truncate.current) { - const ulint threshold = ulint(srv_max_undo_log_size - >> srv_page_size_shift); - for (ulint i = purge_sys.truncate.last - ? purge_sys.truncate.last->id - - srv_undo_space_id_start - : 0, j = i;; ) { - ulint space_id = srv_undo_space_id_start + i; - ut_ad(srv_is_undo_tablespace(space_id)); - fil_space_t* space= fil_space_get(space_id); - - if (space && space->get_size() > threshold) { - purge_sys.truncate.current = space; - break; - } - - ++i; - i %= srv_undo_tablespaces_active; - if (i == j) { - break; - } - } - } - - if (!purge_sys.truncate.current) { - return; - } - - fil_space_t& space = *purge_sys.truncate.current; - /* Undo tablespace always are a single file. */ - ut_a(UT_LIST_GET_LEN(space.chain) == 1); - fil_node_t* file = UT_LIST_GET_FIRST(space.chain); - /* The undo tablespace files are never closed. */ - ut_ad(file->is_open()); - - DBUG_LOG("undo", "marking for truncate: " << file->name); - - for (auto& rseg : trx_sys.rseg_array) { - if (rseg.space == &space) { - /* Once set, this rseg will - not be allocated to subsequent - transactions, but we will wait - for existing active - transactions to finish. */ - rseg.set_skip_allocation(); - } - } + ut_ad(purge_sys.head <= purge_sys.tail); + purge_sys_t::iterator &head= purge_sys.head.trx_no + ? purge_sys.head : purge_sys.tail; + + if (head.trx_no >= purge_sys.low_limit_no()) + { + /* This is sometimes necessary. TODO: find out why. */ + head.trx_no= purge_sys.low_limit_no(); + head.undo_no= 0; + } + + for (auto &rseg : trx_sys.rseg_array) + if (rseg.space) + trx_purge_truncate_rseg_history(rseg, head); + + if (srv_undo_tablespaces_active < 2) + return; - for (auto& rseg : trx_sys.rseg_array) { - if (rseg.space != &space) { - continue; - } - ut_ad(rseg.skip_allocation()); - if (rseg.is_referenced()) { - return; - } - rseg.latch.rd_lock(); - ut_ad(rseg.skip_allocation()); - if (rseg.is_referenced()) { + while (srv_undo_log_truncate) + { + if (!purge_sys.truncate.current) + { + const ulint threshold= + ulint(srv_max_undo_log_size >> srv_page_size_shift); + for (ulint i= purge_sys.truncate.last + ? purge_sys.truncate.last->id - srv_undo_space_id_start : 0, + j= i;; ) + { + const auto space_id= srv_undo_space_id_start + i; + ut_ad(srv_is_undo_tablespace(space_id)); + fil_space_t *space= fil_space_get(space_id); + ut_a(UT_LIST_GET_LEN(space->chain) == 1); + + if (space && space->get_size() > threshold) + { + purge_sys.truncate.current= space; + break; + } + + ++i; + i %= srv_undo_tablespaces_active; + if (i == j) + return; + } + } + + fil_space_t &space= *purge_sys.truncate.current; + /* Undo tablespace always are a single file. */ + fil_node_t *file= UT_LIST_GET_FIRST(space.chain); + /* The undo tablespace files are never closed. */ + ut_ad(file->is_open()); + + DBUG_LOG("undo", "marking for truncate: " << file->name); + + for (auto &rseg : trx_sys.rseg_array) + if (rseg.space == &space) + /* Once set, this rseg will not be allocated to subsequent + transactions, but we will wait for existing active + transactions to finish. */ + rseg.set_skip_allocation(); + + for (auto &rseg : trx_sys.rseg_array) + { + if (rseg.space != &space) + continue; + rseg.latch.rd_lock(); + ut_ad(rseg.skip_allocation()); + if (rseg.is_referenced()) + { not_free: - rseg.latch.rd_unlock(); - return; - } - - if (rseg.curr_size != 1) { - /* Check if all segments are - cached and safe to remove. */ - ulint cached = 0; - - for (trx_undo_t* undo = UT_LIST_GET_FIRST( - rseg.undo_cached); - undo; - undo = UT_LIST_GET_NEXT(undo_list, - undo)) { - if (head.trx_no < undo->trx_id) { - goto not_free; - } else { - cached += undo->size; - } - } - - ut_ad(rseg.curr_size > cached); - - if (rseg.curr_size > cached + 1) { - goto not_free; - } - } - - rseg.latch.rd_unlock(); - } - - ib::info() << "Truncating " << file->name; - trx_purge_cleanse_purge_queue(space); - - /* Flush all to-be-discarded pages of the tablespace. - - During truncation, we do not want any writes to the - to-be-discarded area, because we must set the space.size - early in order to have deterministic page allocation. - - If a log checkpoint was completed at LSN earlier than our - mini-transaction commit and the server was killed, then - discarding the to-be-trimmed pages without flushing would - break crash recovery. So, we cannot avoid the write. */ - while (buf_flush_list_space(&space)); - - log_free_check(); - - /* Re-initialize tablespace, in a single mini-transaction. */ - mtr_t mtr; - const ulint size = SRV_UNDO_TABLESPACE_SIZE_IN_PAGES; - mtr.start(); - mtr.x_lock_space(&space); - - /* Adjust the tablespace metadata. */ - mysql_mutex_lock(&fil_system.mutex); - ut_d(bool stopped=) space.set_stopping(); - ut_ad(!stopped); - space.is_being_truncated = true; - if (space.crypt_data) { - space.reacquire(); - mysql_mutex_unlock(&fil_system.mutex); - fil_space_crypt_close_tablespace(&space); - space.release(); - } else { - mysql_mutex_unlock(&fil_system.mutex); - } - - uint i = 60; - - while (space.referenced()) { - if (!--i) { - mtr.commit(); - ib::error() << "Failed to freeze" - " UNDO tablespace " - << file->name; - return; - } - - std::this_thread::sleep_for(std::chrono::seconds(1)); - } - - /* Associate the undo tablespace with mtr. - During mtr::commit(), InnoDB can use the undo - tablespace object to clear all freed ranges */ - mtr.set_named_space(&space); - mtr.trim_pages(page_id_t(space.id, size)); - fsp_header_init(&space, size, &mtr); - mysql_mutex_lock(&fil_system.mutex); - space.size = file->size = size; - mysql_mutex_unlock(&fil_system.mutex); - - buf_block_t* sys_header = trx_sysf_get(&mtr); - - for (auto& rseg : trx_sys.rseg_array) { - if (rseg.space != &space) { - continue; - } - - buf_block_t* rblock = trx_rseg_header_create( - purge_sys.truncate.current, - i, sys_header, &mtr); - ut_ad(rblock); - /* These were written by trx_rseg_header_create(). */ - ut_ad(!mach_read_from_4(TRX_RSEG + TRX_RSEG_FORMAT - + rblock->frame)); - ut_ad(!mach_read_from_4(TRX_RSEG + TRX_RSEG_HISTORY_SIZE - + rblock->frame)); - rseg.reinit(rblock - ? rblock->page.id().page_no() : FIL_NULL); - } - - mtr.commit(); - /* Write-ahead the redo log record. */ - log_write_up_to(mtr.commit_lsn(), true); - - /* Trim the file size. */ - os_file_truncate(file->name, file->handle, - os_offset_t(size) << srv_page_size_shift, - true); - - /* This is only executed by srv_purge_coordinator_thread. */ - export_vars.innodb_undo_truncations++; - - /* In MDEV-8319 (10.5) we will PUNCH_HOLE the garbage - (with write-ahead logging). */ - mysql_mutex_lock(&fil_system.mutex); - ut_ad(&space == purge_sys.truncate.current); - ut_ad(space.is_being_truncated); - purge_sys.truncate.current->clear_stopping(); - purge_sys.truncate.current->is_being_truncated = false; - mysql_mutex_unlock(&fil_system.mutex); - - if (purge_sys.rseg != NULL - && purge_sys.rseg->last_page_no == FIL_NULL) { - /* If purge_sys.rseg is pointing to rseg that - was recently truncated then move to next rseg - element. Note: Ideally purge_sys.rseg should - be NULL because purge should complete - processing of all the records but there is - purge_batch_size that can force the purge loop - to exit before all the records are purged and - in this case purge_sys.rseg could point to a - valid rseg waiting for next purge cycle. */ - purge_sys.next_stored = false; - purge_sys.rseg = NULL; - } - - DBUG_EXECUTE_IF("ib_undo_trunc", - ib::info() << "ib_undo_trunc"; - log_buffer_flush_to_disk(); - DBUG_SUICIDE();); - - for (auto& rseg : trx_sys.rseg_array) { - if (rseg.space == &space) { - rseg.clear_skip_allocation(); - } - } - - ib::info() << "Truncated " << file->name; - purge_sys.truncate.last = purge_sys.truncate.current; - purge_sys.truncate.current = NULL; - } + rseg.latch.rd_unlock(); + return; + } + + if (rseg.curr_size != 1) + { + /* Check if all segments are cached and safe to remove. */ + ulint cached= 0; + for (trx_undo_t *undo= UT_LIST_GET_FIRST(rseg.undo_cached); undo; + undo= UT_LIST_GET_NEXT(undo_list, undo)) + { + if (head.trx_no < undo->trx_id) + goto not_free; + else + cached+= undo->size; + } + + ut_ad(rseg.curr_size > cached); + + if (rseg.curr_size > cached + 1) + goto not_free; + } + + rseg.latch.rd_unlock(); + } + + ib::info() << "Truncating " << file->name; + trx_purge_cleanse_purge_queue(space); + + log_free_check(); + + mtr_t mtr; + mtr.start(); + mtr.x_lock_space(&space); + + /* Lock all modified pages of the tablespace. + + During truncation, we do not want any writes to the file. + + If a log checkpoint was completed at LSN earlier than our + mini-transaction commit and the server was killed, then + discarding the to-be-trimmed pages without flushing would + break crash recovery. */ + mysql_mutex_lock(&buf_pool.flush_list_mutex); + + for (buf_page_t *bpage= UT_LIST_GET_LAST(buf_pool.flush_list); bpage; ) + { + ut_ad(bpage->oldest_modification()); + ut_ad(bpage->in_file()); + + buf_page_t *prev= UT_LIST_GET_PREV(list, bpage); + + if (bpage->id().space() == space.id && + bpage->oldest_modification() != 1) + { + ut_ad(bpage->state() == BUF_BLOCK_FILE_PAGE); + auto block= reinterpret_cast<buf_block_t*>(bpage); + block->fix(); + buf_pool.flush_hp.set(prev); + mysql_mutex_unlock(&buf_pool.flush_list_mutex); + +#ifdef BTR_CUR_HASH_ADAPT + ut_ad(!block->index); /* There is no AHI on undo tablespaces. */ +#endif + block->lock.x_lock(); + mysql_mutex_lock(&buf_pool.flush_list_mutex); + ut_ad(bpage->io_fix() == BUF_IO_NONE); + + if (bpage->oldest_modification() > 1) + { + bpage->clear_oldest_modification(false); + mtr.memo_push(block, MTR_MEMO_PAGE_X_FIX); + } + else + { + block->lock.x_unlock(); + block->unfix(); + } + + if (prev != buf_pool.flush_hp.get()) + { + /* Rescan, because we may have lost the position. */ + bpage= UT_LIST_GET_LAST(buf_pool.flush_list); + continue; + } + } + + bpage= prev; + } + + mysql_mutex_unlock(&buf_pool.flush_list_mutex); + + /* Re-initialize tablespace, in a single mini-transaction. */ + const ulint size= SRV_UNDO_TABLESPACE_SIZE_IN_PAGES; + + /* Adjust the tablespace metadata. */ + mysql_mutex_lock(&fil_system.mutex); + space.set_stopping(); + space.is_being_truncated= true; + if (space.crypt_data) + { + space.reacquire(); + mysql_mutex_unlock(&fil_system.mutex); + fil_space_crypt_close_tablespace(&space); + space.release(); + } + else + mysql_mutex_unlock(&fil_system.mutex); + + for (auto i= 6000; space.referenced(); + std::this_thread::sleep_for(std::chrono::milliseconds(10))) + { + if (!--i) + { + mtr.commit(); + ib::error() << "Failed to freeze UNDO tablespace " << file->name; + return; + } + } + + /* Associate the undo tablespace with mtr. + During mtr::commit_shrink(), InnoDB can use the undo + tablespace object to clear all freed ranges */ + mtr.set_named_space(&space); + mtr.trim_pages(page_id_t(space.id, size)); + fsp_header_init(&space, size, &mtr); + mysql_mutex_lock(&fil_system.mutex); + space.size= file->size= size; + mysql_mutex_unlock(&fil_system.mutex); + + buf_block_t *sys_header= trx_sysf_get(&mtr); + + for (auto &rseg : trx_sys.rseg_array) + { + if (rseg.space != &space) + continue; + + buf_block_t *rblock= trx_rseg_header_create(&space, + &rseg - trx_sys.rseg_array, + sys_header, &mtr); + ut_ad(rblock); + /* These were written by trx_rseg_header_create(). */ + ut_ad(!mach_read_from_4(TRX_RSEG + TRX_RSEG_FORMAT + rblock->frame)); + ut_ad(!mach_read_from_4(TRX_RSEG + TRX_RSEG_HISTORY_SIZE + + rblock->frame)); + rseg.reinit(rblock ? rblock->page.id().page_no() : FIL_NULL); + } + + mtr.commit_shrink(space); + + /* No mutex; this is only updated by the purge coordinator. */ + export_vars.innodb_undo_truncations++; + + if (purge_sys.rseg && purge_sys.rseg->last_page_no == FIL_NULL) + { + /* If purge_sys.rseg is pointing to rseg that was recently + truncated then move to next rseg element. + + Note: Ideally purge_sys.rseg should be NULL because purge should + complete processing of all the records but srv_purge_batch_size + can force the purge loop to exit before all the records are purged. */ + purge_sys.rseg= nullptr; + purge_sys.next_stored= false; + } + + DBUG_EXECUTE_IF("ib_undo_trunc", ib::info() << "ib_undo_trunc"; + log_buffer_flush_to_disk(); + DBUG_SUICIDE();); + + for (auto &rseg : trx_sys.rseg_array) + if (rseg.space == &space) + rseg.clear_skip_allocation(); + + ib::info() << "Truncated " << file->name; + purge_sys.truncate.last= purge_sys.truncate.current; + ut_ad(&space == purge_sys.truncate.current); + purge_sys.truncate.current= nullptr; + } } /***********************************************************************//** |