diff options
author | Marko Mäkelä <marko.makela@mariadb.com> | 2020-09-07 15:31:54 +0300 |
---|---|---|
committer | Marko Mäkelä <marko.makela@mariadb.com> | 2020-09-07 15:31:54 +0300 |
commit | f99cace77fbf3076137ba63b024465c6ab6ee25e (patch) | |
tree | 971a75a6a0b63e058f6ce60e12ae9c177104e34a | |
parent | 9dedba16ab12976487377208afe4721ea7b322ce (diff) | |
download | mariadb-git-bb-10.2-MDEV-22924.tar.gz |
MDEV-22924 Corruption in MVCC read via secondary indexbb-10.2-MDEV-22924
An unsafe optimization was introduced by
commit 2347ffd843b8e4ee9d8eaafab05368435db59ece (MDEV-20301)
which is based on
mysql/mysql-server@3f3136188f1bd383f77f97823cf6ebd72d5e4d7e or
mysql/mysql-server@647a3814a91c3d3bffc70ddff5513398e3f37bd4
in MySQL 8.0.12 or MySQL 8.0.13
(which in turn is based on the contribution in MySQL Bug #84958).
Row_sel_get_clust_rec_for_mysql::operator(): In addition to checking
that the pointer to the record matches, also check the latest
modification of the page (FIL_PAGE_LSN) as well as the page identifier.
Only if all three match, it is safe to reuse cached_old_vers.
Row_sel_get_clust_rec_for_mysql::check_eq(): Assert that the PRIMARY KEY
of the cached old version of the record corresponds to the latest version.
We got a test case where CHECK TABLE, UPDATE and purge would be
hammering on the same table (with only 6 rows) and a pointer that
was originally pointing to a record pk=2 would match a cached_clust_rec
that was pointing to a record pk=1. In the diagnosed `rr replay` trace,
we would wrongly return an old cached version of the pk=1 record,
instead of retrieving the correct version of the pk=2 record. Because
of this, CHECK TABLE would fail to count one of the records in a
secondary index, and report failure.
This bug appears to affect MVCC reads via secondary indexes only.
The purge of history in secondary indexes uses a different code path,
and so do checks for implicit record locks.
-rw-r--r-- | storage/innobase/include/dict0mem.h | 19 | ||||
-rw-r--r-- | storage/innobase/row/row0sel.cc | 63 |
2 files changed, 70 insertions, 12 deletions
diff --git a/storage/innobase/include/dict0mem.h b/storage/innobase/include/dict0mem.h index 804176e2d5b..21d9e763178 100644 --- a/storage/innobase/include/dict0mem.h +++ b/storage/innobase/include/dict0mem.h @@ -1014,6 +1014,25 @@ struct dict_index_t{ /** @return whether the index includes virtual columns */ bool has_virtual() const { return type & DICT_VIRTUAL; } + /** @return the position of DB_TRX_ID */ + uint16_t db_trx_id() const { + DBUG_ASSERT(is_primary()); + DBUG_ASSERT(n_uniq); + return n_uniq; + } + /** @return the position of DB_ROLL_PTR */ + uint16_t db_roll_ptr() const + { + return static_cast<uint16_t>(db_trx_id() + 1); + } + + /** @return the offset of the metadata BLOB field, + or the first user field after the PRIMARY KEY,DB_TRX_ID,DB_ROLL_PTR */ + uint16_t first_user_field() const + { + return static_cast<uint16_t>(db_trx_id() + 2); + } + /** @return whether the index is corrupted */ inline bool is_corrupted() const; diff --git a/storage/innobase/row/row0sel.cc b/storage/innobase/row/row0sel.cc index ae168ad5130..6951e0c7aac 100644 --- a/storage/innobase/row/row0sel.cc +++ b/storage/innobase/row/row0sel.cc @@ -3303,20 +3303,46 @@ row_sel_build_prev_vers_for_mysql( return(err); } -/** Helper class to cache clust_rec and old_ver */ +/** Helper class to cache clust_rec and old_vers */ class Row_sel_get_clust_rec_for_mysql { - const rec_t *cached_clust_rec; - rec_t *cached_old_vers; + const rec_t *cached_clust_rec; + rec_t *cached_old_vers; + lsn_t cached_lsn; + page_id_t cached_page_id; -public: - Row_sel_get_clust_rec_for_mysql() : - cached_clust_rec(NULL), cached_old_vers(NULL) {} +#ifdef UNIV_DEBUG + void check_eq(const dict_index_t *index, const rec_offs *offsets) const + { + rec_offs vers_offs[REC_OFFS_HEADER_SIZE + MAX_REF_PARTS]; + rec_offs_init(vers_offs); + mem_heap_t *heap= nullptr; + + ut_ad(rec_offs_validate(cached_clust_rec, index, offsets)); + ut_ad(index->first_user_field() <= rec_offs_n_fields(offsets)); + ut_ad(vers_offs == rec_get_offsets(cached_old_vers, index, vers_offs, true, + index->db_trx_id(), &heap)); + ut_ad(!heap); + for (auto n= index->db_trx_id(); n--; ) + { + const dict_col_t *col= dict_index_get_nth_col(index, n); + ulint len1, len2; + const byte *b1= rec_get_nth_field(cached_clust_rec, offsets, n, &len1); + const byte *b2= rec_get_nth_field(cached_old_vers, vers_offs, n, &len2); + ut_ad(!cmp_data_data(col->mtype, col->prtype, b1, len1, b2, len2)); + } + } +#endif - dberr_t operator()(row_prebuilt_t *prebuilt, dict_index_t *sec_index, - const rec_t *rec, que_thr_t *thr, const rec_t **out_rec, - rec_offs **offsets, mem_heap_t **offset_heap, - dtuple_t **vrow, mtr_t *mtr); +public: + Row_sel_get_clust_rec_for_mysql() : + cached_clust_rec(NULL), cached_old_vers(NULL), cached_lsn(0), + cached_page_id(page_id_t(0,0)) {} + + dberr_t operator()(row_prebuilt_t *prebuilt, dict_index_t *sec_index, + const rec_t *rec, que_thr_t *thr, const rec_t **out_rec, + rec_offs **offsets, mem_heap_t **offset_heap, + dtuple_t **vrow, mtr_t *mtr); }; /*********************************************************************//** @@ -3516,8 +3542,18 @@ Row_sel_get_clust_rec_for_mysql::operator()( && !lock_clust_rec_cons_read_sees( clust_rec, clust_index, *offsets, trx_get_read_view(trx))) { + const buf_page_t& bpage = btr_pcur_get_block( + prebuilt->clust_pcur)->page; + + lsn_t lsn = bpage.newest_modification; + if (!lsn) { + lsn = mach_read_from_8( + page_align(clust_rec) + FIL_PAGE_LSN); + } - if (clust_rec != cached_clust_rec) { + if (lsn != cached_lsn + || bpage.id != cached_page_id + || clust_rec != cached_clust_rec) { /* The following call returns 'offsets' associated with 'old_vers' */ err = row_sel_build_prev_vers_for_mysql( @@ -3529,6 +3565,8 @@ Row_sel_get_clust_rec_for_mysql::operator()( goto err_exit; } + cached_lsn = lsn; + cached_page_id = bpage.id; cached_clust_rec = clust_rec; cached_old_vers = old_vers; } else { @@ -3539,7 +3577,8 @@ Row_sel_get_clust_rec_for_mysql::operator()( version of clust_rec and its old version old_vers. Re-calculate the offsets for old_vers. */ - if (old_vers != NULL) { + if (old_vers) { + ut_d(check_eq(clust_index, *offsets)); *offsets = rec_get_offsets( old_vers, clust_index, *offsets, true, ULINT_UNDEFINED, offset_heap); |