summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMarko Mäkelä <marko.makela@mariadb.com>2021-06-07 19:07:45 +0300
committerMarko Mäkelä <marko.makela@mariadb.com>2021-06-07 19:07:45 +0300
commit310dff5d847b3c117ab6bca8e6ccbcc8bca818d9 (patch)
tree79b8469c11e16608ee177513ff70bd14f6d295b5
parentf456e716feac2fe9a2b1eb5247128c271e1a4e83 (diff)
downloadmariadb-git-310dff5d847b3c117ab6bca8e6ccbcc8bca818d9.tar.gz
MDEV-25783: Change buffer records are lost under heavy load
ibuf_read_merge_pages(): Disable some code that was added in MDEV-20394 in order to avoid a server hang if the change buffer is corrupted, presumably due to the race condition in recovery that was later fixed in MDEV-24449. The code will still be available in debug builds when the command line option --debug=d,ibuf_merge_corruption is specified. Due to MDEV-19514, the impact of this code is much worse starting with the 10.5 series. In older versions, the code was only enabled during a shutdown with innodb_fast_shutdown=0, but in 10.5 it was active during the normal operation of the server.
-rw-r--r--storage/innobase/ibuf/ibuf0ibuf.cc27
1 files changed, 27 insertions, 0 deletions
diff --git a/storage/innobase/ibuf/ibuf0ibuf.cc b/storage/innobase/ibuf/ibuf0ibuf.cc
index 3c7f2de2a67..9288a496735 100644
--- a/storage/innobase/ibuf/ibuf0ibuf.cc
+++ b/storage/innobase/ibuf/ibuf0ibuf.cc
@@ -2296,9 +2296,11 @@ bool ibuf_delete_rec(const page_id_t page_id, btr_pcur_t* pcur,
static void ibuf_read_merge_pages(const uint32_t* space_ids,
const uint32_t* page_nos, ulint n_stored)
{
+#ifndef DBUG_OFF
mem_heap_t* heap = mem_heap_create(512);
ulint dops[IBUF_OP_COUNT];
memset(dops, 0, sizeof(dops));
+#endif
for (ulint i = 0; i < n_stored; i++) {
const ulint space_id = space_ids[i];
@@ -2331,6 +2333,28 @@ tablespace_deleted:
goto tablespace_deleted;
}
}
+#ifndef DBUG_OFF
+ DBUG_EXECUTE_IF("ibuf_merge_corruption", goto work_around;);
+ continue;
+
+ /* The following code works around a hang when the
+ change buffer is corrupted, likely due to the race
+ condition in crash recovery that was fixed in
+ MDEV-24449. But, it also introduces corruption by
+ itself in the following scenario:
+
+ (1) We merged buffered changes in buf_page_get_gen()
+ (2) We committed the mini-transaction
+ (3) Redo log and the page with the merged changes is written
+ (4) A write completion callback thread evicts the page.
+ (5) Other threads buffer changes for that page.
+ (6) We will wrongly discard those newly buffered changes below.
+
+ This code will be available in debug builds, so that
+ users may try to fix a shutdown hang that occurs due
+ to a corrupted change buffer. */
+
+work_around:
/* Prevent an infinite loop, by removing entries from
the change buffer also in the case the bitmap bits were
wrongly clear even though buffered changes exist. */
@@ -2377,10 +2401,13 @@ done:
ibuf_mtr_commit(&mtr);
btr_pcur_close(&pcur);
mem_heap_empty(heap);
+#endif
}
+#ifndef DBUG_OFF
ibuf_add_ops(ibuf.n_discarded_ops, dops);
mem_heap_free(heap);
+#endif
}
/*********************************************************************//**