From 310dff5d847b3c117ab6bca8e6ccbcc8bca818d9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marko=20M=C3=A4kel=C3=A4?= Date: Mon, 7 Jun 2021 19:07:45 +0300 Subject: MDEV-25783: Change buffer records are lost under heavy load ibuf_read_merge_pages(): Disable some code that was added in MDEV-20394 in order to avoid a server hang if the change buffer is corrupted, presumably due to the race condition in recovery that was later fixed in MDEV-24449. The code will still be available in debug builds when the command line option --debug=d,ibuf_merge_corruption is specified. Due to MDEV-19514, the impact of this code is much worse starting with the 10.5 series. In older versions, the code was only enabled during a shutdown with innodb_fast_shutdown=0, but in 10.5 it was active during the normal operation of the server. --- storage/innobase/ibuf/ibuf0ibuf.cc | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/storage/innobase/ibuf/ibuf0ibuf.cc b/storage/innobase/ibuf/ibuf0ibuf.cc index 3c7f2de2a67..9288a496735 100644 --- a/storage/innobase/ibuf/ibuf0ibuf.cc +++ b/storage/innobase/ibuf/ibuf0ibuf.cc @@ -2296,9 +2296,11 @@ bool ibuf_delete_rec(const page_id_t page_id, btr_pcur_t* pcur, static void ibuf_read_merge_pages(const uint32_t* space_ids, const uint32_t* page_nos, ulint n_stored) { +#ifndef DBUG_OFF mem_heap_t* heap = mem_heap_create(512); ulint dops[IBUF_OP_COUNT]; memset(dops, 0, sizeof(dops)); +#endif for (ulint i = 0; i < n_stored; i++) { const ulint space_id = space_ids[i]; @@ -2331,6 +2333,28 @@ tablespace_deleted: goto tablespace_deleted; } } +#ifndef DBUG_OFF + DBUG_EXECUTE_IF("ibuf_merge_corruption", goto work_around;); + continue; + + /* The following code works around a hang when the + change buffer is corrupted, likely due to the race + condition in crash recovery that was fixed in + MDEV-24449. But, it also introduces corruption by + itself in the following scenario: + + (1) We merged buffered changes in buf_page_get_gen() + (2) We committed the mini-transaction + (3) Redo log and the page with the merged changes is written + (4) A write completion callback thread evicts the page. + (5) Other threads buffer changes for that page. + (6) We will wrongly discard those newly buffered changes below. + + This code will be available in debug builds, so that + users may try to fix a shutdown hang that occurs due + to a corrupted change buffer. */ + +work_around: /* Prevent an infinite loop, by removing entries from the change buffer also in the case the bitmap bits were wrongly clear even though buffered changes exist. */ @@ -2377,10 +2401,13 @@ done: ibuf_mtr_commit(&mtr); btr_pcur_close(&pcur); mem_heap_empty(heap); +#endif } +#ifndef DBUG_OFF ibuf_add_ops(ibuf.n_discarded_ops, dops); mem_heap_free(heap); +#endif } /*********************************************************************//** -- cgit v1.2.1