summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMarko Mäkelä <marko.makela@mariadb.com>2020-09-07 15:31:54 +0300
committerMarko Mäkelä <marko.makela@mariadb.com>2020-09-07 15:31:54 +0300
commitf99cace77fbf3076137ba63b024465c6ab6ee25e (patch)
tree971a75a6a0b63e058f6ce60e12ae9c177104e34a
parent9dedba16ab12976487377208afe4721ea7b322ce (diff)
downloadmariadb-git-bb-10.2-MDEV-22924.tar.gz
MDEV-22924 Corruption in MVCC read via secondary indexbb-10.2-MDEV-22924
An unsafe optimization was introduced by commit 2347ffd843b8e4ee9d8eaafab05368435db59ece (MDEV-20301) which is based on mysql/mysql-server@3f3136188f1bd383f77f97823cf6ebd72d5e4d7e or mysql/mysql-server@647a3814a91c3d3bffc70ddff5513398e3f37bd4 in MySQL 8.0.12 or MySQL 8.0.13 (which in turn is based on the contribution in MySQL Bug #84958). Row_sel_get_clust_rec_for_mysql::operator(): In addition to checking that the pointer to the record matches, also check the latest modification of the page (FIL_PAGE_LSN) as well as the page identifier. Only if all three match, it is safe to reuse cached_old_vers. Row_sel_get_clust_rec_for_mysql::check_eq(): Assert that the PRIMARY KEY of the cached old version of the record corresponds to the latest version. We got a test case where CHECK TABLE, UPDATE and purge would be hammering on the same table (with only 6 rows) and a pointer that was originally pointing to a record pk=2 would match a cached_clust_rec that was pointing to a record pk=1. In the diagnosed `rr replay` trace, we would wrongly return an old cached version of the pk=1 record, instead of retrieving the correct version of the pk=2 record. Because of this, CHECK TABLE would fail to count one of the records in a secondary index, and report failure. This bug appears to affect MVCC reads via secondary indexes only. The purge of history in secondary indexes uses a different code path, and so do checks for implicit record locks.
-rw-r--r--storage/innobase/include/dict0mem.h19
-rw-r--r--storage/innobase/row/row0sel.cc63
2 files changed, 70 insertions, 12 deletions
diff --git a/storage/innobase/include/dict0mem.h b/storage/innobase/include/dict0mem.h
index 804176e2d5b..21d9e763178 100644
--- a/storage/innobase/include/dict0mem.h
+++ b/storage/innobase/include/dict0mem.h
@@ -1014,6 +1014,25 @@ struct dict_index_t{
/** @return whether the index includes virtual columns */
bool has_virtual() const { return type & DICT_VIRTUAL; }
+ /** @return the position of DB_TRX_ID */
+ uint16_t db_trx_id() const {
+ DBUG_ASSERT(is_primary());
+ DBUG_ASSERT(n_uniq);
+ return n_uniq;
+ }
+ /** @return the position of DB_ROLL_PTR */
+ uint16_t db_roll_ptr() const
+ {
+ return static_cast<uint16_t>(db_trx_id() + 1);
+ }
+
+ /** @return the offset of the metadata BLOB field,
+ or the first user field after the PRIMARY KEY,DB_TRX_ID,DB_ROLL_PTR */
+ uint16_t first_user_field() const
+ {
+ return static_cast<uint16_t>(db_trx_id() + 2);
+ }
+
/** @return whether the index is corrupted */
inline bool is_corrupted() const;
diff --git a/storage/innobase/row/row0sel.cc b/storage/innobase/row/row0sel.cc
index ae168ad5130..6951e0c7aac 100644
--- a/storage/innobase/row/row0sel.cc
+++ b/storage/innobase/row/row0sel.cc
@@ -3303,20 +3303,46 @@ row_sel_build_prev_vers_for_mysql(
return(err);
}
-/** Helper class to cache clust_rec and old_ver */
+/** Helper class to cache clust_rec and old_vers */
class Row_sel_get_clust_rec_for_mysql
{
- const rec_t *cached_clust_rec;
- rec_t *cached_old_vers;
+ const rec_t *cached_clust_rec;
+ rec_t *cached_old_vers;
+ lsn_t cached_lsn;
+ page_id_t cached_page_id;
-public:
- Row_sel_get_clust_rec_for_mysql() :
- cached_clust_rec(NULL), cached_old_vers(NULL) {}
+#ifdef UNIV_DEBUG
+ void check_eq(const dict_index_t *index, const rec_offs *offsets) const
+ {
+ rec_offs vers_offs[REC_OFFS_HEADER_SIZE + MAX_REF_PARTS];
+ rec_offs_init(vers_offs);
+ mem_heap_t *heap= nullptr;
+
+ ut_ad(rec_offs_validate(cached_clust_rec, index, offsets));
+ ut_ad(index->first_user_field() <= rec_offs_n_fields(offsets));
+ ut_ad(vers_offs == rec_get_offsets(cached_old_vers, index, vers_offs, true,
+ index->db_trx_id(), &heap));
+ ut_ad(!heap);
+ for (auto n= index->db_trx_id(); n--; )
+ {
+ const dict_col_t *col= dict_index_get_nth_col(index, n);
+ ulint len1, len2;
+ const byte *b1= rec_get_nth_field(cached_clust_rec, offsets, n, &len1);
+ const byte *b2= rec_get_nth_field(cached_old_vers, vers_offs, n, &len2);
+ ut_ad(!cmp_data_data(col->mtype, col->prtype, b1, len1, b2, len2));
+ }
+ }
+#endif
- dberr_t operator()(row_prebuilt_t *prebuilt, dict_index_t *sec_index,
- const rec_t *rec, que_thr_t *thr, const rec_t **out_rec,
- rec_offs **offsets, mem_heap_t **offset_heap,
- dtuple_t **vrow, mtr_t *mtr);
+public:
+ Row_sel_get_clust_rec_for_mysql() :
+ cached_clust_rec(NULL), cached_old_vers(NULL), cached_lsn(0),
+ cached_page_id(page_id_t(0,0)) {}
+
+ dberr_t operator()(row_prebuilt_t *prebuilt, dict_index_t *sec_index,
+ const rec_t *rec, que_thr_t *thr, const rec_t **out_rec,
+ rec_offs **offsets, mem_heap_t **offset_heap,
+ dtuple_t **vrow, mtr_t *mtr);
};
/*********************************************************************//**
@@ -3516,8 +3542,18 @@ Row_sel_get_clust_rec_for_mysql::operator()(
&& !lock_clust_rec_cons_read_sees(
clust_rec, clust_index, *offsets,
trx_get_read_view(trx))) {
+ const buf_page_t& bpage = btr_pcur_get_block(
+ prebuilt->clust_pcur)->page;
+
+ lsn_t lsn = bpage.newest_modification;
+ if (!lsn) {
+ lsn = mach_read_from_8(
+ page_align(clust_rec) + FIL_PAGE_LSN);
+ }
- if (clust_rec != cached_clust_rec) {
+ if (lsn != cached_lsn
+ || bpage.id != cached_page_id
+ || clust_rec != cached_clust_rec) {
/* The following call returns 'offsets' associated with
'old_vers' */
err = row_sel_build_prev_vers_for_mysql(
@@ -3529,6 +3565,8 @@ Row_sel_get_clust_rec_for_mysql::operator()(
goto err_exit;
}
+ cached_lsn = lsn;
+ cached_page_id = bpage.id;
cached_clust_rec = clust_rec;
cached_old_vers = old_vers;
} else {
@@ -3539,7 +3577,8 @@ Row_sel_get_clust_rec_for_mysql::operator()(
version of clust_rec and its old version
old_vers. Re-calculate the offsets for old_vers. */
- if (old_vers != NULL) {
+ if (old_vers) {
+ ut_d(check_eq(clust_index, *offsets));
*offsets = rec_get_offsets(
old_vers, clust_index, *offsets,
true, ULINT_UNDEFINED, offset_heap);