From 2a4bd038f77c08c1698b37ab48bccd068f2ffcb6 Mon Sep 17 00:00:00 2001 From: Vladislav Vaintroub Date: Mon, 21 Nov 2022 11:14:54 +0100 Subject: MDEV-30055 - fix race condition in shutdown_debug.test Remove DBUG_ASSERT, that depends on timing/scheduling to succeed. --- sql/sql_connect.cc | 1 - 1 file changed, 1 deletion(-) diff --git a/sql/sql_connect.cc b/sql/sql_connect.cc index 842e5cdb101..c2d94d47f5c 100644 --- a/sql/sql_connect.cc +++ b/sql/sql_connect.cc @@ -1110,7 +1110,6 @@ void setup_connection_thread_globals(THD *thd) { DBUG_EXECUTE_IF("CONNECT_wait", { extern Dynamic_array listen_sockets; - DBUG_ASSERT(listen_sockets.size()); while (listen_sockets.size()) my_sleep(1000); }); -- cgit v1.2.1 From 3e0fd5e8a72ec8c6d48153113fb2987c2b456d17 Mon Sep 17 00:00:00 2001 From: Vladislav Vaintroub Date: Mon, 21 Nov 2022 11:57:19 +0100 Subject: MDEV-30055 shutdown_now_windows.test fails with "Assertion `unix_sock.fd >= 0' failed." Remove DBUG_ASSERT that depends on timing. --- sql/sql_connect.cc | 1 - 1 file changed, 1 deletion(-) diff --git a/sql/sql_connect.cc b/sql/sql_connect.cc index 17beb413e7f..0df727ba00c 100644 --- a/sql/sql_connect.cc +++ b/sql/sql_connect.cc @@ -1110,7 +1110,6 @@ bool setup_connection_thread_globals(THD *thd) DBUG_EXECUTE_IF("CONNECT_wait", { extern MYSQL_SOCKET unix_sock; - DBUG_ASSERT(unix_sock.fd >= 0); while (unix_sock.fd >= 0) my_sleep(1000); }); -- cgit v1.2.1 From 4e5e8166b433d5ccca9ebde6ea6f5dcae6d38c4f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marko=20M=C3=A4kel=C3=A4?= Date: Mon, 21 Nov 2022 17:55:35 +0200 Subject: MDEV-19514 fixup: Fix recovery with innodb_change_buffering_debug=1 During crash recovery, recv_sys.apply(true) invokes mlog_init.mark_ibuf_exist(), which in turn may invoke recv_sys.apply(true) via the buf_flush_sync() call in buf_page_get_low(). The simplest fix is to disable the innodb_change_buffering_debug=1 instrumentation during crash recovery. --- storage/innobase/buf/buf0buf.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/storage/innobase/buf/buf0buf.cc b/storage/innobase/buf/buf0buf.cc index c788a0ff3f2..272c8fadf6b 100644 --- a/storage/innobase/buf/buf0buf.cc +++ b/storage/innobase/buf/buf0buf.cc @@ -3043,7 +3043,7 @@ evict_from_pool: re_evict: if (mode != BUF_GET_IF_IN_POOL && mode != BUF_GET_IF_IN_POOL_OR_WATCH) { - } else if (!ibuf_debug) { + } else if (!ibuf_debug || recv_recovery_is_on()) { } else if (fil_space_t* space = fil_space_t::get(page_id.space())) { /* Try to evict the block from the buffer pool, to use the insert buffer (change buffer) as much as possible. */ -- cgit v1.2.1 From 46f8c46e940c7156f0c7374acda3540237d9e791 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marko=20M=C3=A4kel=C3=A4?= Date: Tue, 22 Nov 2022 15:00:26 +0200 Subject: MDEV-30069 InnoDB: Trying to write ... bytes at ... outside the bounds recv_sys_t::recover_deferred(): If the *.ibd file already exists, adjust the size to the tablespace metadata. It could be that in a multi-batch recovery, we will initially recover an all-zero *.ibd file to a smaller size, and then a fatal error would be reported during the last recovery batch. This bug could be worked around by executing the recovery again. During the initial (failed) recovery attempt, something should have been written to the first page of the file and the file size should be recovered by fil_node_t::read_page0(). --- storage/innobase/log/log0recv.cc | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/storage/innobase/log/log0recv.cc b/storage/innobase/log/log0recv.cc index 81ef4a5b680..06c7b192ec9 100644 --- a/storage/innobase/log/log0recv.cc +++ b/storage/innobase/log/log0recv.cc @@ -986,6 +986,20 @@ bool recv_sys_t::recover_deferred(recv_sys_t::map::iterator &p, DB_SUCCESS == os_file_punch_hole(node->handle, 0, 4096) && !my_test_if_thinly_provisioned(node->handle); #endif + /* Mimic fil_node_t::read_page0() in case the file exists and + has already been extended to a larger size. */ + ut_ad(node->size == size); + const os_offset_t file_size= os_file_get_size(node->handle); + if (file_size != os_offset_t(-1)) + { + const uint32_t n_pages= + uint32_t(file_size / fil_space_t::physical_size(flags)); + if (n_pages > size) + { + space->size= node->size= n_pages; + space->set_committed_size(); + } + } if (!os_file_set_size(node->name, node->handle, (size * fil_space_t::physical_size(flags)) & ~4095ULL, is_sparse)) -- cgit v1.2.1 From cff9939d098d1fbf795fb473bec0cd28697d1dac Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marko=20M=C3=A4kel=C3=A4?= Date: Tue, 22 Nov 2022 15:31:12 +0200 Subject: MDEV-30068 Confusing error message when encryption is not available on recovery fil_name_process(): If fil_ibd_load() returns FIL_LOAD_INVALID, display the file name and the tablespace identifier. --- .../suite/encryption/r/innodb-redo-nokeys.result | 6 ++-- .../suite/encryption/t/innodb-redo-nokeys.test | 6 ++-- storage/innobase/log/log0recv.cc | 37 +++++++--------------- 3 files changed, 18 insertions(+), 31 deletions(-) diff --git a/mysql-test/suite/encryption/r/innodb-redo-nokeys.result b/mysql-test/suite/encryption/r/innodb-redo-nokeys.result index 23e32698a2d..8c92fd07388 100644 --- a/mysql-test/suite/encryption/r/innodb-redo-nokeys.result +++ b/mysql-test/suite/encryption/r/innodb-redo-nokeys.result @@ -1,6 +1,6 @@ -call mtr.add_suppression("mariadbd.*: File .*"); -call mtr.add_suppression("Plugin 'file_key_management' .*"); -call mtr.add_suppression("InnoDB: We do not continue the crash recovery"); +call mtr.add_suppression("mariadbd.*: File "); +call mtr.add_suppression("Plugin 'file_key_management' "); +call mtr.add_suppression("InnoDB: Recovery cannot access file"); call mtr.add_suppression("InnoDB: Plugin initialization aborted"); call mtr.add_suppression("Plugin 'InnoDB' init function returned error\\."); call mtr.add_suppression("Plugin 'InnoDB' registration as a STORAGE ENGINE failed."); diff --git a/mysql-test/suite/encryption/t/innodb-redo-nokeys.test b/mysql-test/suite/encryption/t/innodb-redo-nokeys.test index 65bd3d2db72..905b4f6333f 100644 --- a/mysql-test/suite/encryption/t/innodb-redo-nokeys.test +++ b/mysql-test/suite/encryption/t/innodb-redo-nokeys.test @@ -3,9 +3,9 @@ # embedded does not support restart -- source include/not_embedded.inc -call mtr.add_suppression("mariadbd.*: File .*"); -call mtr.add_suppression("Plugin 'file_key_management' .*"); -call mtr.add_suppression("InnoDB: We do not continue the crash recovery"); +call mtr.add_suppression("mariadbd.*: File "); +call mtr.add_suppression("Plugin 'file_key_management' "); +call mtr.add_suppression("InnoDB: Recovery cannot access file"); call mtr.add_suppression("InnoDB: Plugin initialization aborted"); call mtr.add_suppression("Plugin 'InnoDB' init function returned error\\."); call mtr.add_suppression("Plugin 'InnoDB' registration as a STORAGE ENGINE failed."); diff --git a/storage/innobase/log/log0recv.cc b/storage/innobase/log/log0recv.cc index 826ddf3ff49..8db270b2a9b 100644 --- a/storage/innobase/log/log0recv.cc +++ b/storage/innobase/log/log0recv.cc @@ -879,35 +879,22 @@ same_space: case FIL_LOAD_INVALID: ut_ad(space == NULL); if (srv_force_recovery == 0) { - ib::warn() << "We do not continue the crash" - " recovery, because the table may" - " become corrupt if we cannot apply" - " the log records in the InnoDB log to" - " it. To fix the problem and start" - " mysqld:"; - ib::info() << "1) If there is a permission" - " problem in the file and mysqld" - " cannot open the file, you should" - " modify the permissions."; - ib::info() << "2) If the tablespace is not" - " needed, or you can restore an older" - " version from a backup, then you can" - " remove the .ibd file, and use" - " --innodb_force_recovery=1 to force" - " startup without this file."; - ib::info() << "3) If the file system or the" - " disk is broken, and you cannot" - " remove the .ibd file, you can set" - " --innodb_force_recovery."; + sql_print_error("InnoDB: Recovery cannot access" + " file %s (tablespace " + ULINTPF ")", name, space_id); + sql_print_information("InnoDB: You may set " + "innodb_force_recovery=1" + " to ignore this and" + " possibly get a" + " corrupted database."); recv_sys.found_corrupt_fs = true; break; } - ib::info() << "innodb_force_recovery was set to " - << srv_force_recovery << ". Continuing crash" - " recovery even though we cannot access the" - " files for tablespace " << space_id << "."; - break; + sql_print_warning("InnoDB: Ignoring changes to" + " file %s (tablespace " ULINTPF ")" + " due to innodb_force_recovery", + name, space_id); } } } -- cgit v1.2.1 From 9d388192c74f65b808702b87eb7dfbd1426717f4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marko=20M=C3=A4kel=C3=A4?= Date: Tue, 22 Nov 2022 15:32:47 +0200 Subject: Cleanup: Say "mariadbd" instead of "mysqld" in InnoDB messages --- storage/innobase/log/log0log.cc | 10 ++++++---- storage/innobase/row/row0mysql.cc | 4 ++-- storage/innobase/ut/ut0dbg.cc | 4 ++-- 3 files changed, 10 insertions(+), 8 deletions(-) diff --git a/storage/innobase/log/log0log.cc b/storage/innobase/log/log0log.cc index 53183605759..61b0d30fec2 100644 --- a/storage/innobase/log/log0log.cc +++ b/storage/innobase/log/log0log.cc @@ -1180,10 +1180,12 @@ wait_suspend_loop: if (srv_fast_shutdown == 2 || !srv_was_started) { if (!srv_read_only_mode && srv_was_started) { - ib::info() << "MySQL has requested a very fast" - " shutdown without flushing the InnoDB buffer" - " pool to data files. At the next mysqld" - " startup InnoDB will do a crash recovery!"; + sql_print_information( + "InnoDB: Executing innodb_fast_shutdown=2 " + "(without flushing the InnoDB buffer pool" + " to data files)." + " The next mariadbd" + " invocation will perform crash recovery!"); /* In this fastest shutdown we do not flush the buffer pool: diff --git a/storage/innobase/row/row0mysql.cc b/storage/innobase/row/row0mysql.cc index 1c135f787f6..5b3a56de101 100644 --- a/storage/innobase/row/row0mysql.cc +++ b/storage/innobase/row/row0mysql.cc @@ -1,7 +1,7 @@ /***************************************************************************** Copyright (c) 2000, 2018, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2015, 2021, MariaDB Corporation. +Copyright (c) 2015, 2022, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -743,7 +743,7 @@ handle_new_error: " table. You have to dump + drop + reimport the" " table or, in a case of widespread corruption," " dump all InnoDB tables and recreate the whole" - " tablespace. If the mysqld server crashes after" + " tablespace. If the mariadbd server crashes after" " the startup or when you dump the tables. " << FORCE_RECOVERY_MSG; goto rollback_to_savept; diff --git a/storage/innobase/ut/ut0dbg.cc b/storage/innobase/ut/ut0dbg.cc index fc51cce9500..167cc71403c 100644 --- a/storage/innobase/ut/ut0dbg.cc +++ b/storage/innobase/ut/ut0dbg.cc @@ -1,7 +1,7 @@ /***************************************************************************** Copyright (c) 1994, 2016, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2017, 2018, MariaDB Corporation. +Copyright (c) 2017, 2022, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -50,7 +50,7 @@ ut_dbg_assertion_failed( " to https://jira.mariadb.org/\n" "InnoDB: If you get repeated assertion failures" " or crashes, even\n" - "InnoDB: immediately after the mysqld startup, there may be\n" + "InnoDB: immediately after the mariadbd startup, there may be\n" "InnoDB: corruption in the InnoDB tablespace. Please refer to\n" "InnoDB: https://mariadb.com/kb/en/library/innodb-recovery-modes/\n" "InnoDB: about forcing recovery.\n", stderr); -- cgit v1.2.1 From 165564d3c33ae3d677d70644a83afcb744bdbf65 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marko=20M=C3=A4kel=C3=A4?= Date: Wed, 23 Nov 2022 17:34:05 +0200 Subject: MDEV-30009 InnoDB shutdown hangs when the change buffer is corrupted The InnoDB change buffer (ibuf.index, stored in the system tablespace) and the change buffer bitmaps in persistent tablespaces could get out of sync with each other: According to the bitmap, no changes exist for a page, while there actually exist buffered entries in ibuf.index. InnoDB performs lazy deletion of buffered changes. When a secondary index leaf page is freed (possibly as part of DROP INDEX), any buffered changes will not be deleted. Instead, they would be deleted on a subsequent buf_page_create_low(). One scenario where InnoDB failed to delete buffered changes is as follows: 1. Some changes were buffered for a secondary index leaf page. 2. The index page had been freed. 3. ibuf_read_merge_pages() invoked ibuf_merge_or_delete_for_page(), which noticed that the page had been freed, and reset the change buffer bits, but did not delete the records from ibuf.index. 4. The index page was reallocated for something else. 5. The index page was removed from the buffer pool. 6. Some changes were buffered for the newly created page. 7. Finally, the buffered changes from both 1. and 6. were merged. 8. The index is corrupted. An alternative outcome is: 4. Shutdown with innodb_fast_shutdown=0 gets into an infinite loop. An alternative scenario is: 3. ibuf_set_bitmap_for_bulk_load() reset the IBUF_BITMAP_BUFFERED bit but did not delete the ibuf.index records for that page number. The shutdown hang was already once fixed in commit d7a2401750bb29dfdb45929a536539b9f17b347f, refactored for 10.5 in commit 77e8a311e1f919f15845c75d08de4340965c0bc4 and disabled in commit 310dff5d847b3c117ab6bca8e6ccbcc8bca818d9 due to corruption. We will fix this as follows: ibuf_delete_recs(): Delete all ibuf.index entries for the specified page. ibuf_merge_or_delete_for_page(): When the change buffer bitmap bits were set and the page had been freed, and the page does not belong to ibuf.index itself, invoke ibuf_delete_recs(). This prevents the corruption from occurring when a DML operation is allocating a previously freed page for which changes had been buffered. ibuf_set_bitmap_for_bulk_load(): When the change buffer bitmap bits were set, invoke ibuf_delete_recs(). This prevents the corruption from occurring when CREATE INDEX is reusing a previously freed page. ibuf_read_merge_pages(): On slow shutdown, remove the orphan records by invoking ibuf_delete_recs(). This fixes the hang when the change buffer had become corrupted. We also remove the dops[] accounting, because nothing can monitor it during shutdown. We invoke ibuf_delete_recs() if: (a) buf_page_get_gen() failed to load the page or merge changes (b) the page is not a valid index leaf page (c) the page number is out of tablespace bounds srv_shutdown(): Invoke ibuf_max_size_update(0) to ensure that the race condition that motivated us to disable the code in ibuf_read_merge_pages() in commit 310dff5d847b3c117ab6bca8e6ccbcc8bca818d9 is no longer possible. That is, during slow shutdown, both the rollback of transactions and the purge of history will return early from ibuf_insert_low(). ibuf_merge_space(), ibuf_delete_for_discarded_space(): Cleanup: Do not allocate a memory heap. This was implemented by Thirunarayanan Balathandayuthapani and tested with innodb_change_buffering_debug=1 by Matthias Leich. --- storage/innobase/ibuf/ibuf0ibuf.cc | 245 ++++++++++++++++++++++--------------- storage/innobase/srv/srv0srv.cc | 4 + 2 files changed, 149 insertions(+), 100 deletions(-) diff --git a/storage/innobase/ibuf/ibuf0ibuf.cc b/storage/innobase/ibuf/ibuf0ibuf.cc index 5ac10b0fb33..ee462bc68c2 100644 --- a/storage/innobase/ibuf/ibuf0ibuf.cc +++ b/storage/innobase/ibuf/ibuf0ibuf.cc @@ -2274,16 +2274,74 @@ static MY_ATTRIBUTE((warn_unused_result, nonnull)) bool ibuf_delete_rec(const page_id_t page_id, btr_pcur_t* pcur, const dtuple_t* search_tuple, mtr_t* mtr); +/** Delete the change buffer records for the given page id +@param page_id page identifier */ +static void ibuf_delete_recs(const page_id_t page_id) +{ + if (!ibuf.index || srv_read_only_mode) + return; + dfield_t dfield[IBUF_REC_FIELD_METADATA]; + dtuple_t tuple {0,IBUF_REC_FIELD_METADATA,IBUF_REC_FIELD_METADATA, + dfield,0,nullptr +#ifdef UNIV_DEBUG + ,DATA_TUPLE_MAGIC_N +#endif + }; + byte space_id[4], page_no[4]; + + mach_write_to_4(space_id, page_id.space()); + mach_write_to_4(page_no, page_id.page_no()); + + dfield_set_data(&dfield[0], space_id, 4); + dfield_set_data(&dfield[1], field_ref_zero, 1); + dfield_set_data(&dfield[2], page_no, 4); + dtuple_set_types_binary(&tuple, IBUF_REC_FIELD_METADATA); + + mtr_t mtr; +loop: + btr_pcur_t pcur; + ibuf_mtr_start(&mtr); + if (btr_pcur_open(ibuf.index, &tuple, PAGE_CUR_GE, BTR_MODIFY_LEAF, + &pcur, &mtr) != DB_SUCCESS) + goto func_exit; + if (!btr_pcur_is_on_user_rec(&pcur)) + { + ut_ad(btr_pcur_is_after_last_on_page(&pcur)); + goto func_exit; + } + + for (;;) + { + ut_ad(btr_pcur_is_on_user_rec(&pcur)); + const rec_t* ibuf_rec = btr_pcur_get_rec(&pcur); + if (ibuf_rec_get_space(&mtr, ibuf_rec) != page_id.space() + || ibuf_rec_get_page_no(&mtr, ibuf_rec) != page_id.page_no()) + break; + /* Delete the record from ibuf */ + if (ibuf_delete_rec(page_id, &pcur, &tuple, &mtr)) + { + /* Deletion was pessimistic and mtr was committed: + we start from the beginning again */ + ut_ad(mtr.has_committed()); + goto loop; + } + + if (btr_pcur_is_after_last_on_page(&pcur)) + { + ibuf_mtr_commit(&mtr); + btr_pcur_close(&pcur); + goto loop; + } + } +func_exit: + ibuf_mtr_commit(&mtr); + btr_pcur_close(&pcur); +} + /** Merge the change buffer to some pages. */ static void ibuf_read_merge_pages(const uint32_t* space_ids, const uint32_t* page_nos, ulint n_stored) { -#ifndef DBUG_OFF - mem_heap_t* heap = mem_heap_create(512); - ulint dops[IBUF_OP_COUNT]; - memset(dops, 0, sizeof(dops)); -#endif - for (ulint i = 0; i < n_stored; i++) { const ulint space_id = space_ids[i]; fil_space_t* s = fil_space_t::get(space_id); @@ -2306,24 +2364,36 @@ tablespace_deleted: if (UNIV_LIKELY(page_nos[i] < size)) { mtr.start(); dberr_t err; + buf_block_t *block = buf_page_get_gen(page_id_t(space_id, page_nos[i]), zip_size, RW_X_LATCH, nullptr, BUF_GET_POSSIBLY_FREED, __FILE__, __LINE__, &mtr, &err, true); + bool remove = !block + || fil_page_get_type(block->frame) + != FIL_PAGE_INDEX + || !page_is_leaf(block->frame); mtr.commit(); if (err == DB_TABLESPACE_DELETED) { goto tablespace_deleted; } + if (!remove) { + continue; + } + } + + if (srv_shutdown_state == SRV_SHUTDOWN_NONE + || srv_fast_shutdown) { + continue; } -#ifndef DBUG_OFF - DBUG_EXECUTE_IF("ibuf_merge_corruption", goto work_around;); - continue; /* The following code works around a hang when the - change buffer is corrupted, likely due to the race - condition in crash recovery that was fixed in - MDEV-24449. But, it also introduces corruption by - itself in the following scenario: + change buffer is corrupted, likely due to the + failure of ibuf_merge_or_delete_for_page() to + invoke ibuf_delete_recs() if (!bitmap_bits). + + It also introduced corruption by itself in the + following scenario: (1) We merged buffered changes in buf_page_get_gen() (2) We committed the mini-transaction @@ -2332,64 +2402,16 @@ tablespace_deleted: (5) Other threads buffer changes for that page. (6) We will wrongly discard those newly buffered changes below. - This code will be available in debug builds, so that - users may try to fix a shutdown hang that occurs due - to a corrupted change buffer. */ + To prevent this scenario, we will only invoke this code + on shutdown. A call to ibuf_max_size_update(0) will cause + ibuf_insert_low() to refuse to insert anything into the + change buffer. */ -work_around: /* Prevent an infinite loop, by removing entries from - the change buffer also in the case the bitmap bits were + the change buffer in the case the bitmap bits were wrongly clear even though buffered changes exist. */ - const dtuple_t* tuple = ibuf_search_tuple_build( - space_id, page_nos[i], heap); -loop: - btr_pcur_t pcur; - ibuf_mtr_start(&mtr); - btr_pcur_open(ibuf.index, tuple, PAGE_CUR_GE, BTR_MODIFY_LEAF, - &pcur, &mtr); - if (!btr_pcur_is_on_user_rec(&pcur)) { - ut_ad(btr_pcur_is_after_last_on_page(&pcur)); - goto done; - } - - for (;;) { - ut_ad(btr_pcur_is_on_user_rec(&pcur)); - - const rec_t* ibuf_rec = btr_pcur_get_rec(&pcur); - if (ibuf_rec_get_space(&mtr, ibuf_rec) != space_id - || ibuf_rec_get_page_no(&mtr, ibuf_rec) - != page_nos[i]) { - break; - } - - dops[ibuf_rec_get_op_type(&mtr, ibuf_rec)]++; - /* Delete the record from ibuf */ - if (ibuf_delete_rec(page_id_t(space_id, page_nos[i]), - &pcur, tuple, &mtr)) { - /* Deletion was pessimistic and mtr - was committed: we start from the - beginning again */ - ut_ad(mtr.has_committed()); - goto loop; - } - - if (btr_pcur_is_after_last_on_page(&pcur)) { - ibuf_mtr_commit(&mtr); - btr_pcur_close(&pcur); - goto loop; - } - } -done: - ibuf_mtr_commit(&mtr); - btr_pcur_close(&pcur); - mem_heap_empty(heap); -#endif + ibuf_delete_recs(page_id_t(space_ids[i], page_nos[i])); } - -#ifndef DBUG_OFF - ibuf_add_ops(ibuf.n_discarded_ops, dops); - mem_heap_free(heap); -#endif } /** Contract the change buffer by reading pages to the buffer pool. @@ -2455,8 +2477,23 @@ ibuf_merge_space( { mtr_t mtr; btr_pcur_t pcur; - mem_heap_t* heap = mem_heap_create(512); - dtuple_t* tuple = ibuf_search_tuple_build(space, 0, heap); + + dfield_t dfield[IBUF_REC_FIELD_METADATA]; + dtuple_t tuple {0, IBUF_REC_FIELD_METADATA, + IBUF_REC_FIELD_METADATA,dfield,0,nullptr +#ifdef UNIV_DEBUG + , DATA_TUPLE_MAGIC_N +#endif + }; + byte space_id[4]; + + mach_write_to_4(space_id, space); + + dfield_set_data(&dfield[0], space_id, 4); + dfield_set_data(&dfield[1], field_ref_zero, 1); + dfield_set_data(&dfield[2], field_ref_zero, 4); + + dtuple_set_types_binary(&tuple, IBUF_REC_FIELD_METADATA); ulint n_pages = 0; ut_ad(space < SRV_SPACE_ID_UPPER_BOUND); @@ -2467,11 +2504,9 @@ ibuf_merge_space( /* Position the cursor on the first matching record. */ btr_pcur_open( - ibuf.index, tuple, PAGE_CUR_GE, BTR_SEARCH_LEAF, &pcur, + ibuf.index, &tuple, PAGE_CUR_GE, BTR_SEARCH_LEAF, &pcur, &mtr); - mem_heap_free(heap); - ut_ad(page_validate(btr_pcur_get_page(&pcur), ibuf.index)); ulint sum_sizes = 0; @@ -4194,6 +4229,11 @@ void ibuf_merge_or_delete_for_page(buf_block_t *block, const page_id_t page_id, ibuf_reset_bitmap(block, page_id, zip_size, &mtr); ibuf_mtr_commit(&mtr); bitmap_bits = 0; + if (!block + || btr_page_get_index_id(block->frame) + != DICT_IBUF_ID_MIN + IBUF_SPACE_ID) { + ibuf_delete_recs(page_id); + } } if (!bitmap_bits) { @@ -4440,22 +4480,31 @@ in DISCARD TABLESPACE, IMPORT TABLESPACE, or read-ahead. @param[in] space missing or to-be-discarded tablespace */ void ibuf_delete_for_discarded_space(ulint space) { - mem_heap_t* heap; btr_pcur_t pcur; - dtuple_t* search_tuple; const rec_t* ibuf_rec; mtr_t mtr; /* Counts for discarded operations. */ ulint dops[IBUF_OP_COUNT]; - heap = mem_heap_create(512); + dfield_t dfield[IBUF_REC_FIELD_METADATA]; + dtuple_t search_tuple {0,IBUF_REC_FIELD_METADATA, + IBUF_REC_FIELD_METADATA,dfield,0 + ,nullptr +#ifdef UNIV_DEBUG + ,DATA_TUPLE_MAGIC_N +#endif /* UNIV_DEBUG */ + }; + byte space_id[4]; + mach_write_to_4(space_id, space); + dfield_set_data(&dfield[0], space_id, 4); + dfield_set_data(&dfield[1], field_ref_zero, 1); + dfield_set_data(&dfield[2], field_ref_zero, 4); + dtuple_set_types_binary(&search_tuple, IBUF_REC_FIELD_METADATA); /* Use page number 0 to build the search tuple so that we get the cursor positioned at the first entry for this space id */ - search_tuple = ibuf_search_tuple_build(space, 0, heap); - memset(dops, 0, sizeof(dops)); loop: log_free_check(); @@ -4464,7 +4513,7 @@ loop: /* Position pcur in the insert buffer at the first entry for the space */ btr_pcur_open_on_user_rec( - ibuf.index, search_tuple, PAGE_CUR_GE, BTR_MODIFY_LEAF, + ibuf.index, &search_tuple, PAGE_CUR_GE, BTR_MODIFY_LEAF, &pcur, &mtr); if (!btr_pcur_is_on_user_rec(&pcur)) { @@ -4489,7 +4538,7 @@ loop: /* Delete the record from ibuf */ if (ibuf_delete_rec(page_id_t(space, page_no), - &pcur, search_tuple, &mtr)) { + &pcur, &search_tuple, &mtr)) { /* Deletion was pessimistic and mtr was committed: we start from the beginning again */ @@ -4510,8 +4559,6 @@ leave_loop: btr_pcur_close(&pcur); ibuf_add_ops(ibuf.n_discarded_ops, dops); - - mem_heap_free(heap); } /******************************************************************//** @@ -4682,23 +4729,21 @@ dberr_t ibuf_check_bitmap_on_import(const trx_t* trx, fil_space_t* space) void ibuf_set_bitmap_for_bulk_load(buf_block_t *block, mtr_t *mtr, bool reset) { - ulint free_val; - - ut_a(page_is_leaf(buf_block_get_frame(block))); - - free_val = ibuf_index_page_calc_free(block); - - buf_block_t* bitmap_page = ibuf_bitmap_get_map_page(block->page.id(), - block->zip_size(), - mtr); - - free_val = reset ? 0 : ibuf_index_page_calc_free(block); - /* FIXME: update the bitmap byte only once! */ - ibuf_bitmap_page_set_bits( - bitmap_page, block->page.id(), block->physical_size(), - free_val, mtr); - - ibuf_bitmap_page_set_bits( - bitmap_page, block->page.id(), block->physical_size(), - false, mtr); + ut_a(page_is_leaf(block->frame)); + const page_id_t id{block->page.id()}; + const auto zip_size= block->zip_size(); + + if (buf_block_t *bitmap_page= ibuf_bitmap_get_map_page(id, zip_size, mtr)) + { + if (ibuf_bitmap_page_get_bits(bitmap_page->frame, id, zip_size, + IBUF_BITMAP_BUFFERED, mtr)) + ibuf_delete_recs(id); + + ulint free_val= reset ? 0 : ibuf_index_page_calc_free(block); + /* FIXME: update the bitmap byte only once! */ + ibuf_bitmap_page_set_bits + (bitmap_page, id, block->physical_size(), free_val, mtr); + ibuf_bitmap_page_set_bits + (bitmap_page, id, block->physical_size(), false, mtr); + } } diff --git a/storage/innobase/srv/srv0srv.cc b/storage/innobase/srv/srv0srv.cc index 5a01b408a4b..a2d98ad88c4 100644 --- a/storage/innobase/srv/srv0srv.cc +++ b/storage/innobase/srv/srv0srv.cc @@ -1675,6 +1675,10 @@ void srv_shutdown(bool ibuf_merge) if (ibuf_merge) { srv_main_thread_op_info = "doing insert buffer merge"; + /* Disallow the use of change buffer to + avoid a race condition with + ibuf_read_merge_pages() */ + ibuf_max_size_update(0); log_free_check(); n_read = ibuf_contract(); } -- cgit v1.2.1