diff options
Diffstat (limited to 'storage/innobase/row/row0umod.cc')
-rw-r--r-- | storage/innobase/row/row0umod.cc | 303 |
1 files changed, 177 insertions, 126 deletions
diff --git a/storage/innobase/row/row0umod.cc b/storage/innobase/row/row0umod.cc index 3d4065cbab6..2a0779a03ea 100644 --- a/storage/innobase/row/row0umod.cc +++ b/storage/innobase/row/row0umod.cc @@ -30,6 +30,7 @@ Created 2/27/1997 Heikki Tuuri #include "dict0boot.h" #include "trx0undo.h" #include "trx0roll.h" +#include "trx0purge.h" #include "btr0btr.h" #include "mach0data.h" #include "ibuf0ibuf.h" @@ -121,7 +122,8 @@ row_undo_mod_clust_low( } if (mode != BTR_MODIFY_TREE) { - ut_ad((mode & ~BTR_ALREADY_S_LATCHED) == BTR_MODIFY_LEAF); + ut_ad((mode & ulint(~BTR_ALREADY_S_LATCHED)) + == BTR_MODIFY_LEAF); err = btr_cur_optimistic_update( BTR_NO_LOCKING_FLAG | BTR_NO_UNDO_LOG_FLAG @@ -146,101 +148,56 @@ row_undo_mod_clust_low( return(err); } -/***********************************************************//** -Purges a clustered index record after undo if possible. -This is attempted when the record was inserted by updating a -delete-marked record and there no longer exist transactions -that would see the delete-marked record. -@return DB_SUCCESS, DB_FAIL, or error code: we may run out of file space */ -static MY_ATTRIBUTE((nonnull, warn_unused_result)) -dberr_t -row_undo_mod_remove_clust_low( -/*==========================*/ - undo_node_t* node, /*!< in: row undo node */ - mtr_t* mtr, /*!< in/out: mini-transaction */ - ulint mode) /*!< in: BTR_MODIFY_LEAF or BTR_MODIFY_TREE */ +/** Get the byte offset of the DB_TRX_ID column +@param[in] rec clustered index record +@param[in] index clustered index +@return the byte offset of DB_TRX_ID, from the start of rec */ +static ulint row_trx_id_offset(const rec_t* rec, const dict_index_t* index) { - btr_cur_t* btr_cur; - dberr_t err; - ulint trx_id_offset; - - ut_ad(node->rec_type == TRX_UNDO_UPD_DEL_REC); - - /* Find out if the record has been purged already - or if we can remove it. */ - - if (!btr_pcur_restore_position(mode, &node->pcur, mtr) - || row_vers_must_preserve_del_marked(node->new_trx_id, - node->table->name, - mtr)) { - - return(DB_SUCCESS); - } - - btr_cur = btr_pcur_get_btr_cur(&node->pcur); - - trx_id_offset = btr_cur_get_index(btr_cur)->trx_id_offset; - + ut_ad(index->n_uniq <= MAX_REF_PARTS); + ulint trx_id_offset = index->trx_id_offset; if (!trx_id_offset) { - mem_heap_t* heap = NULL; - ulint trx_id_col; - const ulint* offsets; - ulint len; - - trx_id_col = dict_index_get_sys_col_pos( - btr_cur_get_index(btr_cur), DATA_TRX_ID); - ut_ad(trx_id_col > 0); - ut_ad(trx_id_col != ULINT_UNDEFINED); - - offsets = rec_get_offsets( - btr_cur_get_rec(btr_cur), btr_cur_get_index(btr_cur), - NULL, true, trx_id_col + 1, &heap); - + /* Reserve enough offsets for the PRIMARY KEY and 2 columns + so that we can access DB_TRX_ID, DB_ROLL_PTR. */ + ulint offsets_[REC_OFFS_HEADER_SIZE + MAX_REF_PARTS + 2]; + rec_offs_init(offsets_); + mem_heap_t* heap = NULL; + const ulint trx_id_pos = index->n_uniq ? index->n_uniq : 1; + ulint* offsets = rec_get_offsets(rec, index, offsets_, true, + trx_id_pos + 1, &heap); + ut_ad(!heap); + ulint len; trx_id_offset = rec_get_nth_field_offs( - offsets, trx_id_col, &len); + offsets, trx_id_pos, &len); ut_ad(len == DATA_TRX_ID_LEN); - mem_heap_free(heap); } - if (trx_read_trx_id(btr_cur_get_rec(btr_cur) + trx_id_offset) - != node->new_trx_id) { - /* The record must have been purged and then replaced - with a different one. */ - return(DB_SUCCESS); - } + return trx_id_offset; +} - /* We are about to remove an old, delete-marked version of the - record that may have been delete-marked by a different transaction - than the rolling-back one. */ - ut_ad(rec_get_deleted_flag(btr_cur_get_rec(btr_cur), - dict_table_is_comp(node->table))); - /* In delete-marked records, DB_TRX_ID must - always refer to an existing update_undo log record. */ - ut_ad(rec_get_trx_id(btr_cur_get_rec(btr_cur), btr_cur->index)); - - if (mode == BTR_MODIFY_LEAF) { - err = btr_cur_optimistic_delete(btr_cur, 0, mtr) - ? DB_SUCCESS - : DB_FAIL; - } else { - ut_ad(mode == (BTR_MODIFY_TREE | BTR_LATCH_FOR_DELETE)); +/** Determine if rollback must execute a purge-like operation. +@param[in,out] node row undo +@param[in,out] mtr mini-transaction +@return whether the record should be purged */ +static bool row_undo_mod_must_purge(undo_node_t* node, mtr_t* mtr) +{ + ut_ad(node->rec_type == TRX_UNDO_UPD_DEL_REC); + ut_ad(!node->table->is_temporary()); - /* This operation is analogous to purge, we can free also - inherited externally stored fields. - We can also assume that the record was complete - (including BLOBs), because it had been delete-marked - after it had been completely inserted. Therefore, we - are passing rollback=false, just like purge does. */ + btr_cur_t* btr_cur = btr_pcur_get_btr_cur(&node->pcur); + ut_ad(btr_cur->index->is_primary()); - btr_cur_pessimistic_delete(&err, FALSE, btr_cur, 0, - false, mtr); + mtr_s_lock(&purge_sys.latch, mtr); - /* The delete operation may fail if we have little - file space left: TODO: easiest to crash the database - and restart with more file space */ + if (!purge_sys.view.changes_visible(node->new_trx_id, + node->table->name)) { + return false; } - return(err); + const rec_t* rec = btr_cur_get_rec(btr_cur); + + return trx_read_trx_id(rec + row_trx_id_offset(rec, btr_cur->index)) + == node->new_trx_id; } /***********************************************************//** @@ -269,12 +226,13 @@ row_undo_mod_clust( log_free_check(); pcur = &node->pcur; index = btr_cur_get_index(btr_pcur_get_btr_cur(pcur)); + ut_ad(index->is_primary()); mtr.start(); if (index->table->is_temporary()) { mtr.set_log_mode(MTR_LOG_NO_REDO); } else { - mtr.set_named_space(index->space); + index->set_modified(mtr); } online = dict_index_is_online_ddl(index); @@ -308,7 +266,7 @@ row_undo_mod_clust( if (index->table->is_temporary()) { mtr.set_log_mode(MTR_LOG_NO_REDO); } else { - mtr.set_named_space(index->space); + index->set_modified(mtr); } err = row_undo_mod_clust_low( @@ -362,44 +320,122 @@ row_undo_mod_clust( btr_pcur_commit_specify_mtr(pcur, &mtr); - if (err == DB_SUCCESS && node->rec_type == TRX_UNDO_UPD_DEL_REC) { + if (err != DB_SUCCESS) { + goto func_exit; + } + + /* FIXME: Perform the below operations in the above + mini-transaction when possible. */ + + if (node->rec_type == TRX_UNDO_UPD_DEL_REC) { + /* In delete-marked records, DB_TRX_ID must + always refer to an existing update_undo log record. */ + ut_ad(node->new_trx_id); mtr.start(); + if (!btr_pcur_restore_position(BTR_MODIFY_LEAF, pcur, &mtr)) { + goto mtr_commit_exit; + } + if (index->table->is_temporary()) { mtr.set_log_mode(MTR_LOG_NO_REDO); } else { - mtr.set_named_space(index->space); + if (!row_undo_mod_must_purge(node, &mtr)) { + goto mtr_commit_exit; + } + index->set_modified(mtr); } - /* It is not necessary to call row_log_table, - because the record is delete-marked and would thus - be omitted from the rebuilt copy of the table. */ - err = row_undo_mod_remove_clust_low( - node, &mtr, BTR_MODIFY_LEAF); - if (err != DB_SUCCESS) { - btr_pcur_commit_specify_mtr(pcur, &mtr); + ut_ad(rec_get_deleted_flag(btr_pcur_get_rec(pcur), + dict_table_is_comp(node->table))); + if (btr_cur_optimistic_delete(&pcur->btr_cur, 0, &mtr)) { + goto mtr_commit_exit; + } - /* We may have to modify tree structure: do a - pessimistic descent down the index tree */ + btr_pcur_commit_specify_mtr(pcur, &mtr); - mtr.start(); - if (index->table->is_temporary()) { - mtr.set_log_mode(MTR_LOG_NO_REDO); - } else { - mtr.set_named_space(index->space); + mtr.start(); + if (!btr_pcur_restore_position( + BTR_MODIFY_TREE | BTR_LATCH_FOR_DELETE, + pcur, &mtr)) { + goto mtr_commit_exit; + } + + if (index->table->is_temporary()) { + mtr.set_log_mode(MTR_LOG_NO_REDO); + } else { + if (!row_undo_mod_must_purge(node, &mtr)) { + goto mtr_commit_exit; } + index->set_modified(mtr); + } - err = row_undo_mod_remove_clust_low( - node, &mtr, - BTR_MODIFY_TREE | BTR_LATCH_FOR_DELETE); + ut_ad(rec_get_deleted_flag(btr_pcur_get_rec(pcur), + dict_table_is_comp(node->table))); + + /* This operation is analogous to purge, we can free + also inherited externally stored fields. We can also + assume that the record was complete (including BLOBs), + because it had been delete-marked after it had been + completely inserted. Therefore, we are passing + rollback=false, just like purge does. */ + btr_cur_pessimistic_delete(&err, FALSE, &pcur->btr_cur, 0, + false, &mtr); + ut_ad(err == DB_SUCCESS + || err == DB_OUT_OF_FILE_SPACE); + } else if (!index->table->is_temporary() && node->new_trx_id) { + /* We rolled back a record so that it still exists. + We must reset the DB_TRX_ID if the history is no + longer accessible by any active read view. */ - ut_ad(err == DB_SUCCESS - || err == DB_OUT_OF_FILE_SPACE); + mtr.start(); + if (!btr_pcur_restore_position(BTR_MODIFY_LEAF, pcur, &mtr)) { + goto mtr_commit_exit; + } + rec_t* rec = btr_pcur_get_rec(pcur); + mtr_s_lock(&purge_sys.latch, &mtr); + if (!purge_sys.view.changes_visible(node->new_trx_id, + node->table->name)) { + goto mtr_commit_exit; } - btr_pcur_commit_specify_mtr(pcur, &mtr); + ulint trx_id_pos = index->n_uniq ? index->n_uniq : 1; + ut_ad(index->n_uniq <= MAX_REF_PARTS); + /* Reserve enough offsets for the PRIMARY KEY and 2 columns + so that we can access DB_TRX_ID, DB_ROLL_PTR. */ + ulint offsets_[REC_OFFS_HEADER_SIZE + MAX_REF_PARTS + 2]; + rec_offs_init(offsets_); + offsets = rec_get_offsets( + rec, index, offsets_, true, trx_id_pos + 2, &heap); + ulint len; + ulint trx_id_offset = rec_get_nth_field_offs( + offsets, trx_id_pos, &len); + ut_ad(len == DATA_TRX_ID_LEN); + + if (trx_read_trx_id(rec + trx_id_offset) == node->new_trx_id) { + ut_ad(!rec_get_deleted_flag( + rec, dict_table_is_comp(node->table))); + index->set_modified(mtr); + if (page_zip_des_t* page_zip = buf_block_get_page_zip( + btr_pcur_get_block(&node->pcur))) { + page_zip_write_trx_id_and_roll_ptr( + page_zip, rec, offsets, trx_id_pos, + 0, 1ULL << ROLL_PTR_INSERT_FLAG_POS, + &mtr); + } else { + mlog_write_string(rec + trx_id_offset, + reset_trx_id, + sizeof reset_trx_id, &mtr); + } + } + } else { + goto func_exit; } +mtr_commit_exit: + btr_pcur_commit_specify_mtr(pcur, &mtr); + +func_exit: node->state = UNDO_NODE_FETCH_NEXT; if (offsets_heap) { @@ -502,12 +538,11 @@ row_undo_mod_del_mark_or_remove_sec_low( ut_a(success); /* For temporary table, we can skip to check older version of - clustered index entry. Because the purge won't process - any no-redo rollback segment undo logs. */ - if (dict_table_is_temporary(node->table) + clustered index entry, because there is no MVCC or purge. */ + if (node->table->is_temporary() || row_vers_old_has_index_entry( - false, btr_pcur_get_rec(&(node->pcur)), - &mtr_vers, index, entry, 0, 0)) { + false, btr_pcur_get_rec(&node->pcur), + &mtr_vers, index, entry, 0, 0)) { err = btr_cur_del_mark_set_sec_rec(BTR_NO_LOCKING_FLAG, btr_cur, TRUE, thr, &mtr); ut_ad(err == DB_SUCCESS); @@ -526,18 +561,14 @@ row_undo_mod_del_mark_or_remove_sec_low( } if (modify_leaf) { - success = btr_cur_optimistic_delete(btr_cur, 0, &mtr); - if (success) { - err = DB_SUCCESS; - } else { - err = DB_FAIL; - } + err = btr_cur_optimistic_delete(btr_cur, 0, &mtr) + ? DB_SUCCESS : DB_FAIL; } else { /* Passing rollback=false, because we are deleting a secondary index record: the distinction only matters when deleting a record that contains externally stored columns. */ - ut_ad(!dict_index_is_clust(index)); + ut_ad(!index->is_primary()); btr_cur_pessimistic_delete(&err, FALSE, btr_cur, 0, false, &mtr); @@ -861,8 +892,8 @@ row_undo_mod_upd_del_sec( } /* During online index creation, - HA_ALTER_INPLACE_NO_LOCK_AFTER_PREPARE should - guarantee that any active transaction has not modified + HA_ALTER_INPLACE_COPY_NO_LOCK or HA_ALTER_INPLACE_NOCOPY_NO_LOCk + should guarantee that any active transaction has not modified indexed columns such that col->ord_part was 0 at the time when the undo log record was written. When we get to roll back an undo log entry TRX_UNDO_DEL_MARK_REC, @@ -927,8 +958,8 @@ row_undo_mod_del_mark_sec( } /* During online index creation, - HA_ALTER_INPLACE_NO_LOCK_AFTER_PREPARE should - guarantee that any active transaction has not modified + HA_ALTER_INPLACE_COPY_NO_LOCK or HA_ALTER_INPLACE_NOCOPY_NO_LOCK + should guarantee that any active transaction has not modified indexed columns such that col->ord_part was 0 at the time when the undo log record was written. When we get to roll back an undo log entry TRX_UNDO_DEL_MARK_REC, @@ -1030,8 +1061,7 @@ row_undo_mod_upd_exist_sec( format. REDUNDANT and COMPACT formats store a local 768-byte prefix of each externally stored column. */ - ut_a(dict_table_get_format(index->table) - >= UNIV_FORMAT_B); + ut_a(dict_table_has_atomic_blobs(index->table)); /* This is only legitimate when rolling back an incomplete transaction @@ -1175,6 +1205,20 @@ close_table: node->heap, &(node->update)); node->new_trx_id = trx_id; node->cmpl_info = cmpl_info; + ut_ad(!node->ref->info_bits); + + if (node->update->info_bits & REC_INFO_MIN_REC_FLAG) { + /* This must be an undo log record for a subsequent + instant ALTER TABLE, extending the metadata record. */ + ut_ad(clust_index->is_instant()); + if (node->update->info_bits != REC_INFO_MIN_REC_FLAG) { + ut_ad(!"wrong info_bits in undo log record"); + goto close_table; + } + node->update->info_bits = REC_INFO_METADATA; + const_cast<dtuple_t*>(node->ref)->info_bits + = REC_INFO_METADATA; + } if (!row_undo_search_clust_to_pcur(node)) { /* As long as this rolling-back transaction exists, @@ -1248,6 +1292,12 @@ row_undo_mod( node->index = dict_table_get_first_index(node->table); ut_ad(dict_index_is_clust(node->index)); + + if (node->ref->info_bits) { + ut_ad(node->ref->info_bits == REC_INFO_METADATA); + goto rollback_clust; + } + /* Skip the clustered index (the first index) */ node->index = dict_table_get_next_index(node->index); @@ -1270,6 +1320,7 @@ row_undo_mod( } if (err == DB_SUCCESS) { +rollback_clust: err = row_undo_mod_clust(node, thr); bool update_statistics |