summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMarko Mäkelä <marko.makela@mariadb.com>2019-04-26 17:55:35 +0300
committerMarko Mäkelä <marko.makela@mariadb.com>2019-04-26 18:22:22 +0300
commitaed761ced539bf5481f8358dfd7ca599da39273b (patch)
tree023476c8e1bef8cb54786331fd61f6cd5886094c
parent762663d89eda4c83ec0ed179a58618bf4ccabc46 (diff)
downloadmariadb-git-10.2-MDEV-17603.tar.gz
WIP: MDEV-17603 REPLACE and INSERT…ON DUPLICATE KEY UPDATE are deadlock-prone10.2-MDEV-17603
Implement an alternative fix for the bug whose original fix mysql/mysql-server@c93b0d9a972cb6f98fd445f2b69d924350f9128a in MySQL 5.7.4 caused problems. This is based on mysql/mysql-server@e0e4bacddf421550baca3578bc0db13693874fdb in MySQL 5.7.26. When performing a rollback to the start of the current row operation in REPLACE or INSERT...ON DUPLICATE KEY UPDATE we were not maintaining serializability, because we would release implicit locks that could already have been acquired for some of the indexes. lock_rec_convert_impl_to_expl_for_trx(): Declare globally. undo_node_t::convert_impl_to_expl(): Convert an implicit lock to an explicit one during a partial rollback. row_insert_for_mysql(): Set trx->duplicates=ULINT_UNDEFINED for rolling back the current row operation. This will allow undo_node_t::convert_impl_to_expl() to be effective only for this use case, not for other scenarios, such as rolling back to the start of the statement, or ROLLBACK TO SAVEPOINT. FIXME: Neither innodb.auto_increment_dup,log-bin nor the upstream fix (which we did not add) innodb.iodku pass. While the undo_node_t::convert_impl_to_expl() is working as intended, what happens in innodb.auto_increment_dup,log-bin is that the newly created explicit record lock for the record heap number 6 on the PRIMARY key root page (3) will be released when that record is deleted moments later, with the following stack trace: lock_rec_reset_nth_bit lock_rec_reset_and_release_wait_low lock_rec_reset_and_release_wait lock_update_delete btr_cur_optimistic_delete_func row_undo_ins_remove_clust_rec row_undo_ins row_undo row_undo_step que_thr_step que_run_threads_low que_run_threads trx_rollback_to_savepoint_low trx_rollback_to_savepoint row_mysql_handle_errors row_insert_for_mysql The idea might work with predicate locks, which we do not have. This entire scenario could also be fixed by MDEV-16232, which could allow the entire operation to be protected with page latches.
-rw-r--r--storage/innobase/include/lock0lock.h16
-rw-r--r--storage/innobase/include/row0undo.h30
-rw-r--r--storage/innobase/lock/lock0lock.cc1
-rw-r--r--storage/innobase/row/row0mysql.cc3
-rw-r--r--storage/innobase/row/row0uins.cc16
-rw-r--r--storage/innobase/row/row0umod.cc5
-rw-r--r--storage/innobase/row/row0undo.cc21
-rw-r--r--storage/innobase/trx/trx0roll.cc20
-rw-r--r--storage/innobase/trx/trx0trx.cc1
9 files changed, 88 insertions, 25 deletions
diff --git a/storage/innobase/include/lock0lock.h b/storage/innobase/include/lock0lock.h
index 79fb30eb3f5..9553c1cc29a 100644
--- a/storage/innobase/include/lock0lock.h
+++ b/storage/innobase/include/lock0lock.h
@@ -1,7 +1,7 @@
/*****************************************************************************
Copyright (c) 1996, 2016, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2017, 2018, MariaDB Corporation.
+Copyright (c) 2017, 2019, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -913,6 +913,20 @@ lock_rec_create(
/*!< in: true if caller owns
trx mutex */
+/*********************************************************************//**
+Creates an explicit record lock for a running transaction that currently only
+has an implicit lock on the record. The transaction instance must have a
+reference count > 0 so that it can't be committed and freed before this
+function has completed. */
+void
+lock_rec_convert_impl_to_expl_for_trx(
+/*==================================*/
+ const buf_block_t* block, /*!< in: buffer block of rec */
+ const rec_t* rec, /*!< in: user record on page */
+ dict_index_t* index, /*!< in: index of record */
+ trx_t* trx, /*!< in/out: active transaction */
+ ulint heap_no);/*!< in: rec heap number to lock */
+
/*************************************************************//**
Removes a record lock request, waiting or granted, from the queue. */
void
diff --git a/storage/innobase/include/row0undo.h b/storage/innobase/include/row0undo.h
index abf4f61329a..eee001de4c3 100644
--- a/storage/innobase/include/row0undo.h
+++ b/storage/innobase/include/row0undo.h
@@ -1,7 +1,7 @@
/*****************************************************************************
-Copyright (c) 1997, 2016, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2017, MariaDB Corporation.
+Copyright (c) 1997, 2018, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2017, 2019, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -120,6 +120,32 @@ struct undo_node_t{
mem_heap_t* heap; /*!< memory heap used as auxiliary storage for
row; this must be emptied after undo is tried
on a row */
+
+ /** On rollback, convert an implicit lock into explicit before
+ undoing an insert (or update of delete-marked record), if needed.
+ @param[in] cursor record whose insert is about to be undone */
+ void convert_impl_to_expl(const btr_cur_t& cursor) const
+ {
+ ut_ad(trx->in_rollback);
+ if (trx->duplicates != ULINT_UNDEFINED
+ || dict_index_is_spatial(cursor.index)) {
+ return;
+ }
+
+ ulint heap_no = page_rec_get_heap_no(btr_cur_get_rec(&cursor));
+
+ if (heap_no != PAGE_HEAP_NO_SUPREMUM) {
+ convert_impl_to_expl(cursor, heap_no);
+ }
+ }
+private:
+ /** On a partial rollback, convert an implicit lock into explicit
+ before undoing an insert (or update of delete-marked record).
+
+ Releasing an implicit lock could break the serializability of
+ INSERT...ON DUPLICATE KEY UPDATE and REPLACE statements.
+ @param[in] cursor record whose insert is about to be undone */
+ void convert_impl_to_expl(const btr_cur_t& cursor,ulint heap_no) const;
};
#endif
diff --git a/storage/innobase/lock/lock0lock.cc b/storage/innobase/lock/lock0lock.cc
index 7f6171241ac..192084ceda0 100644
--- a/storage/innobase/lock/lock0lock.cc
+++ b/storage/innobase/lock/lock0lock.cc
@@ -6023,7 +6023,6 @@ Creates an explicit record lock for a running transaction that currently only
has an implicit lock on the record. The transaction instance must have a
reference count > 0 so that it can't be committed and freed before this
function has completed. */
-static
void
lock_rec_convert_impl_to_expl_for_trx(
/*==================================*/
diff --git a/storage/innobase/row/row0mysql.cc b/storage/innobase/row/row0mysql.cc
index c90992cbdc2..0f6f94901a5 100644
--- a/storage/innobase/row/row0mysql.cc
+++ b/storage/innobase/row/row0mysql.cc
@@ -1424,8 +1424,11 @@ error_exit:
/* FIXME: What's this ? */
thr->lock_state = QUE_THR_LOCK_ROW;
+ ulint duplicates = trx->duplicates;
+ trx->duplicates = ULINT_UNDEFINED;
was_lock_wait = row_mysql_handle_errors(
&err, trx, thr, &savept);
+ trx->duplicates = duplicates;
thr->lock_state = QUE_THR_LOCK_NOLOCK;
diff --git a/storage/innobase/row/row0uins.cc b/storage/innobase/row/row0uins.cc
index 22edb7faf89..abe50920d0e 100644
--- a/storage/innobase/row/row0uins.cc
+++ b/storage/innobase/row/row0uins.cc
@@ -137,6 +137,8 @@ row_undo_ins_remove_clust_rec(
ut_a(success);
}
+ node->convert_impl_to_expl(*btr_cur);
+
if (btr_cur_optimistic_delete(btr_cur, 0, &mtr)) {
err = DB_SUCCESS;
goto func_exit;
@@ -193,7 +195,8 @@ row_undo_ins_remove_sec_low(
pessimistic descent down the index tree */
dict_index_t* index, /*!< in: index */
dtuple_t* entry, /*!< in: index entry to remove */
- que_thr_t* thr) /*!< in: query thread */
+ que_thr_t* thr, /*!< in: query thread */
+ undo_node_t* node) /*!< in: undo node */
{
btr_pcur_t pcur;
btr_cur_t* btr_cur;
@@ -251,6 +254,8 @@ row_undo_ins_remove_sec_low(
btr_cur = btr_pcur_get_btr_cur(&pcur);
+ node->convert_impl_to_expl(*btr_cur);
+
if (modify_leaf) {
err = btr_cur_optimistic_delete(btr_cur, 0, &mtr)
? DB_SUCCESS : DB_FAIL;
@@ -281,14 +286,15 @@ row_undo_ins_remove_sec(
/*====================*/
dict_index_t* index, /*!< in: index */
dtuple_t* entry, /*!< in: index entry to insert */
- que_thr_t* thr) /*!< in: query thread */
+ que_thr_t* thr, /*!< in: query thread */
+ undo_node_t* node)
{
dberr_t err;
ulint n_tries = 0;
/* Try first optimistic descent to the B-tree */
- err = row_undo_ins_remove_sec_low(BTR_MODIFY_LEAF, index, entry, thr);
+ err = row_undo_ins_remove_sec_low(BTR_MODIFY_LEAF, index, entry, thr, node);
if (err == DB_SUCCESS) {
@@ -299,7 +305,7 @@ row_undo_ins_remove_sec(
retry:
err = row_undo_ins_remove_sec_low(
BTR_MODIFY_TREE | BTR_LATCH_FOR_DELETE,
- index, entry, thr);
+ index, entry, thr, node);
/* The delete operation may fail if we have little
file space left: TODO: easiest to crash the database
@@ -453,7 +459,7 @@ row_undo_ins_remove_sec_rec(
assume that the secondary index record does
not exist. */
} else {
- err = row_undo_ins_remove_sec(index, entry, thr);
+ err = row_undo_ins_remove_sec(index, entry, thr, node);
if (UNIV_UNLIKELY(err != DB_SUCCESS)) {
goto func_exit;
diff --git a/storage/innobase/row/row0umod.cc b/storage/innobase/row/row0umod.cc
index 4ed4e74fce3..a76e28810d4 100644
--- a/storage/innobase/row/row0umod.cc
+++ b/storage/innobase/row/row0umod.cc
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1997, 2017, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1997, 2018, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2017, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
@@ -217,6 +217,7 @@ row_undo_mod_remove_clust_low(
/* In delete-marked records, DB_TRX_ID must
always refer to an existing update_undo log record. */
ut_ad(rec_get_trx_id(btr_cur_get_rec(btr_cur), btr_cur->index));
+ node->convert_impl_to_expl(*btr_cur);
if (mode == BTR_MODIFY_LEAF) {
err = btr_cur_optimistic_delete(btr_cur, 0, mtr)
@@ -525,6 +526,8 @@ row_undo_mod_del_mark_or_remove_sec_low(
}
}
+ node->convert_impl_to_expl(*btr_cur);
+
if (modify_leaf) {
success = btr_cur_optimistic_delete(btr_cur, 0, &mtr);
if (success) {
diff --git a/storage/innobase/row/row0undo.cc b/storage/innobase/row/row0undo.cc
index a78de7c7e80..c69eb4db2ff 100644
--- a/storage/innobase/row/row0undo.cc
+++ b/storage/innobase/row/row0undo.cc
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1997, 2016, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1997, 2018, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2017, 2019, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
@@ -39,7 +39,24 @@ Created 1/8/1997 Heikki Tuuri
#include "row0umod.h"
#include "row0upd.h"
#include "row0mysql.h"
-#include "srv0srv.h"
+#include "lock0lock.h"
+
+/** On a partial rollback, convert an implicit lock into explicit
+before undoing an insert (or update of delete-marked record).
+
+Releasing an implicit lock could break the serializability of
+INSERT...ON DUPLICATE KEY UPDATE and REPLACE statements.
+@param[in] cursor record whose insert is about to be undone */
+void
+undo_node_t::convert_impl_to_expl(const btr_cur_t& cursor, ulint heap_no) const
+{
+ trx_mutex_enter(trx);
+ trx->n_ref++;
+ trx_mutex_exit(trx);
+ lock_rec_convert_impl_to_expl_for_trx(
+ cursor.page_cur.block, cursor.page_cur.rec, cursor.index,
+ trx, heap_no);
+}
/* How to undo row operations?
(1) For an insert, we have stored a prefix of the clustered index record
diff --git a/storage/innobase/trx/trx0roll.cc b/storage/innobase/trx/trx0roll.cc
index 474b627a11e..1afe64ef5c5 100644
--- a/storage/innobase/trx/trx0roll.cc
+++ b/storage/innobase/trx/trx0roll.cc
@@ -141,13 +141,16 @@ trx_rollback_to_savepoint(
partial rollback requested, or NULL for
complete rollback */
{
- ut_ad(!trx_mutex_own(trx));
+ ut_ad(trx_state_eq(trx, TRX_STATE_ACTIVE)
+ || trx_state_eq(trx, TRX_STATE_NOT_STARTED));
- trx_start_if_not_started_xa(trx, true);
+ trx->error_state = DB_SUCCESS;
- trx_rollback_to_savepoint_low(trx, savept);
+ if (trx_state_eq(trx, TRX_STATE_ACTIVE)) {
+ trx_rollback_to_savepoint_low(trx, savept);
+ }
- return(trx->error_state);
+ return trx->error_state;
}
/*******************************************************************//**
@@ -160,18 +163,9 @@ trx_rollback_for_mysql_low(
trx_t* trx) /*!< in/out: transaction */
{
trx->op_info = "rollback";
-
- /* If we are doing the XA recovery of prepared transactions,
- then the transaction object does not have an InnoDB session
- object, and we set a dummy session that we use for all MySQL
- transactions. */
-
trx_rollback_to_savepoint_low(trx, NULL);
-
trx->op_info = "";
-
ut_a(trx->error_state == DB_SUCCESS);
-
return(trx->error_state);
}
diff --git a/storage/innobase/trx/trx0trx.cc b/storage/innobase/trx/trx0trx.cc
index 91e60571438..5e2857baa4e 100644
--- a/storage/innobase/trx/trx0trx.cc
+++ b/storage/innobase/trx/trx0trx.cc
@@ -120,6 +120,7 @@ trx_init(
trx->error_state = DB_SUCCESS;
+ trx->error_info = NULL;
trx->error_key_num = ULINT_UNDEFINED;
trx->undo_no = 0;