summaryrefslogtreecommitdiff
path: root/storage/innobase/row
diff options
context:
space:
mode:
Diffstat (limited to 'storage/innobase/row')
-rw-r--r--storage/innobase/row/row0ftsort.cc29
-rw-r--r--storage/innobase/row/row0import.cc616
-rw-r--r--storage/innobase/row/row0ins.cc704
-rw-r--r--storage/innobase/row/row0log.cc1022
-rw-r--r--storage/innobase/row/row0merge.cc651
-rw-r--r--storage/innobase/row/row0mysql.cc2442
-rw-r--r--storage/innobase/row/row0purge.cc494
-rw-r--r--storage/innobase/row/row0quiesce.cc17
-rw-r--r--storage/innobase/row/row0row.cc81
-rw-r--r--storage/innobase/row/row0sel.cc1527
-rw-r--r--storage/innobase/row/row0uins.cc323
-rw-r--r--storage/innobase/row/row0umod.cc565
-rw-r--r--storage/innobase/row/row0undo.cc70
-rw-r--r--storage/innobase/row/row0upd.cc433
-rw-r--r--storage/innobase/row/row0vers.cc70
15 files changed, 3811 insertions, 5233 deletions
diff --git a/storage/innobase/row/row0ftsort.cc b/storage/innobase/row/row0ftsort.cc
index bc93ca25195..cc8844c3bd4 100644
--- a/storage/innobase/row/row0ftsort.cc
+++ b/storage/innobase/row/row0ftsort.cc
@@ -1,7 +1,7 @@
/*****************************************************************************
Copyright (c) 2010, 2016, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2015, 2021, MariaDB Corporation.
+Copyright (c) 2015, 2022, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -216,7 +216,7 @@ row_fts_psort_info_init(
common_info->old_zip_size = old_zip_size;
common_info->trx = trx;
common_info->all_info = psort_info;
- common_info->sort_event = os_event_create(0);
+ pthread_cond_init(&common_info->sort_cond, nullptr);
common_info->opt_doc_id_size = opt_doc_id_size;
if (log_tmp_is_encrypted()) {
@@ -285,7 +285,7 @@ row_fts_psort_info_init(
psort_info[j].psort_common = common_info;
psort_info[j].error = DB_SUCCESS;
psort_info[j].memory_used = 0;
- mutex_create(LATCH_ID_FTS_PLL_TOKENIZE, &psort_info[j].mutex);
+ mysql_mutex_init(0, &psort_info[j].mutex, nullptr);
}
/* Initialize merge_info structures parallel merge and insert
@@ -332,10 +332,10 @@ row_fts_psort_info_destroy(
aligned_free(psort_info[j].crypt_block[i]);
}
- mutex_free(&psort_info[j].mutex);
+ mysql_mutex_destroy(&psort_info[j].mutex);
}
- os_event_destroy(merge_info[0].psort_common->sort_event);
+ pthread_cond_destroy(&merge_info[0].psort_common->sort_cond);
ut_free(merge_info[0].psort_common->dup);
ut_free(merge_info[0].psort_common);
ut_free(psort_info);
@@ -721,7 +721,7 @@ row_merge_fts_get_next_doc_item(
ut_free(*doc_item);
}
- mutex_enter(&psort_info->mutex);
+ mysql_mutex_lock(&psort_info->mutex);
*doc_item = UT_LIST_GET_FIRST(psort_info->fts_doc_list);
if (*doc_item != NULL) {
@@ -733,7 +733,7 @@ row_merge_fts_get_next_doc_item(
+ (*doc_item)->field->len;
}
- mutex_exit(&psort_info->mutex);
+ mysql_mutex_unlock(&psort_info->mutex);
}
/*********************************************************************//**
@@ -917,7 +917,7 @@ loop:
}
if (doc_item == NULL) {
- os_thread_yield();
+ std::this_thread::yield();
}
row_merge_fts_get_next_doc_item(psort_info, &doc_item);
@@ -1032,9 +1032,9 @@ func_exit:
mem_heap_free(blob_heap);
- mutex_enter(&psort_info->mutex);
+ mysql_mutex_lock(&psort_info->mutex);
psort_info->error = error;
- mutex_exit(&psort_info->mutex);
+ mysql_mutex_unlock(&psort_info->mutex);
if (UT_LIST_GET_LEN(psort_info->fts_doc_list) > 0) {
/* child can exit either with error or told by parent. */
@@ -1047,9 +1047,10 @@ func_exit:
row_merge_fts_get_next_doc_item(psort_info, &doc_item);
} while (doc_item != NULL);
+ mysql_mutex_lock(&psort_info->mutex);
psort_info->child_status = FTS_CHILD_COMPLETE;
- os_event_set(psort_info->psort_common->sort_event);
- psort_info->child_status = FTS_CHILD_EXITING;
+ pthread_cond_signal(&psort_info->psort_common->sort_cond);
+ mysql_mutex_unlock(&psort_info->mutex);
}
/*********************************************************************//**
@@ -1632,7 +1633,7 @@ row_fts_merge_insert(
/* Get aux index */
fts_get_table_name(&fts_table, aux_table_name);
- aux_table = dict_table_open_on_name(aux_table_name, FALSE, FALSE,
+ aux_table = dict_table_open_on_name(aux_table_name, false,
DICT_ERR_IGNORE_NONE);
ut_ad(aux_table != NULL);
aux_index = dict_table_get_first_index(aux_table);
@@ -1768,7 +1769,7 @@ exit:
error = ins_ctx.btr_bulk->finish(error);
UT_DELETE(ins_ctx.btr_bulk);
- dict_table_close(aux_table, FALSE, FALSE);
+ aux_table->release();
trx->free();
diff --git a/storage/innobase/row/row0import.cc b/storage/innobase/row/row0import.cc
index 3c29461c19d..4afe9e874bb 100644
--- a/storage/innobase/row/row0import.cc
+++ b/storage/innobase/row/row0import.cc
@@ -1,7 +1,7 @@
/*****************************************************************************
Copyright (c) 2012, 2016, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2015, 2022, MariaDB Corporation.
+Copyright (c) 2015, 2023, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -32,15 +32,15 @@ Created 2012-02-08 by Sunny Bains.
#include "que0que.h"
#include "dict0boot.h"
#include "dict0load.h"
-#include "ibuf0ibuf.h"
#include "pars0pars.h"
+#include "row0row.h"
#include "row0sel.h"
#include "row0mysql.h"
#include "srv0start.h"
#include "row0quiesce.h"
#include "fil0pagecompress.h"
#include "trx0undo.h"
-#include "row0row.h"
+#include "lock0lock.h"
#ifdef HAVE_LZO
#include "lzo/lzo1x.h"
#endif
@@ -258,19 +258,18 @@ public:
}
/** Position the cursor on the first user record. */
- void open(buf_block_t* block) UNIV_NOTHROW
+ rec_t* open(buf_block_t* block, const dict_index_t* index) noexcept
+ MY_ATTRIBUTE((warn_unused_result))
{
+ m_cur.index = const_cast<dict_index_t*>(index);
page_cur_set_before_first(block, &m_cur);
-
- if (!end()) {
- next();
- }
+ return next();
}
/** Move to the next record. */
- void next() UNIV_NOTHROW
+ rec_t* next() noexcept MY_ATTRIBUTE((warn_unused_result))
{
- page_cur_move_to_next(&m_cur);
+ return page_cur_move_to_next(&m_cur);
}
/**
@@ -292,37 +291,36 @@ public:
/** Remove the current record
@return true on success */
- bool remove(
- const dict_index_t* index,
- rec_offs* offsets) UNIV_NOTHROW
+ bool remove(rec_offs* offsets) UNIV_NOTHROW
{
- ut_ad(page_is_leaf(m_cur.block->frame));
+ const dict_index_t* const index = m_cur.index;
+ ut_ad(page_is_leaf(m_cur.block->page.frame));
/* We can't end up with an empty page unless it is root. */
- if (page_get_n_recs(m_cur.block->frame) <= 1) {
+ if (page_get_n_recs(m_cur.block->page.frame) <= 1) {
return(false);
}
if (!rec_offs_any_extern(offsets)
&& m_cur.block->page.id().page_no() != index->page
- && ((page_get_data_size(m_cur.block->frame)
+ && ((page_get_data_size(m_cur.block->page.frame)
- rec_offs_size(offsets)
< BTR_CUR_PAGE_COMPRESS_LIMIT(index))
- || !page_has_siblings(m_cur.block->frame)
- || (page_get_n_recs(m_cur.block->frame) < 2))) {
+ || !page_has_siblings(m_cur.block->page.frame)
+ || (page_get_n_recs(m_cur.block->page.frame) < 2))) {
return false;
}
#ifdef UNIV_ZIP_DEBUG
page_zip_des_t* page_zip = buf_block_get_page_zip(m_cur.block);
ut_a(!page_zip || page_zip_validate(
- page_zip, m_cur.block->frame, index));
+ page_zip, m_cur.block->page.frame, index));
#endif /* UNIV_ZIP_DEBUG */
- page_cur_delete_rec(&m_cur, index, offsets, &m_mtr);
+ page_cur_delete_rec(&m_cur, offsets, &m_mtr);
#ifdef UNIV_ZIP_DEBUG
ut_a(!page_zip || page_zip_validate(
- page_zip, m_cur.block->frame, index));
+ page_zip, m_cur.block->page.frame, index));
#endif /* UNIV_ZIP_DEBUG */
return true;
@@ -370,24 +368,23 @@ public:
}
private:
- /** Begin import, position the cursor on the first record. */
- void open() UNIV_NOTHROW;
+ /** Begin import, position the cursor on the first record. */
+ inline bool open() noexcept;
- /** Close the persistent curosr and commit the mini-transaction. */
- void close() UNIV_NOTHROW;
+ /** Close the persistent cursor and commit the mini-transaction. */
+ void close() noexcept { m_mtr.commit(); btr_pcur_close(&m_pcur); }
- /** Position the cursor on the next record.
- @return DB_SUCCESS or error code */
- dberr_t next() UNIV_NOTHROW;
+ /** Position the cursor on the next record.
+ @return DB_SUCCESS or error code */
+ dberr_t next() noexcept;
- /** Store the persistent cursor position and reopen the
- B-tree cursor in BTR_MODIFY_TREE mode, because the
- tree structure may be changed during a pessimistic delete. */
- void purge_pessimistic_delete() UNIV_NOTHROW;
+ /** Store the persistent cursor position and reopen the
+ B-tree cursor in BTR_MODIFY_TREE mode, because the
+ tree structure may be changed during a pessimistic delete. */
+ inline dberr_t purge_pessimistic_delete() noexcept;
- /** Purge delete-marked records.
- @param offsets current row offsets. */
- void purge() UNIV_NOTHROW;
+ /** Purge a delete-marked record. */
+ dberr_t purge() noexcept;
protected:
// Disable copying
@@ -468,7 +465,7 @@ public:
Called for every page in the tablespace. If the page was not
updated then its state must be set to BUF_PAGE_NOT_USED. For
compressed tables the page descriptor memory will be at offset:
- block->frame + srv_page_size;
+ block->page.frame + srv_page_size;
@param block block read from file, note it is not from the buffer pool
@retval DB_SUCCESS or error code. */
virtual dberr_t operator()(buf_block_t* block) UNIV_NOTHROW = 0;
@@ -485,7 +482,7 @@ public:
static byte* get_frame(const buf_block_t* block)
{
return block->page.zip.data
- ? block->page.zip.data : block->frame;
+ ? block->page.zip.data : block->page.frame;
}
/** Invoke the functionality for the callback */
@@ -618,7 +615,7 @@ AbstractCallback::init(
os_offset_t file_size,
const buf_block_t* block) UNIV_NOTHROW
{
- const page_t* page = block->frame;
+ const page_t* page = block->page.frame;
m_space_flags = fsp_header_get_flags(page);
if (!fil_space_t::is_valid_flags(m_space_flags, true)) {
@@ -757,7 +754,7 @@ dberr_t FetchIndexRootPages::operator()(buf_block_t* block) UNIV_NOTHROW
return(DB_CORRUPTION);
}
- if (!page_is_comp(block->frame) !=
+ if (!page_is_comp(block->page.frame) !=
!dict_table_is_comp(m_table)) {
ib_errf(m_trx->mysql_thd, IB_LOG_LEVEL_ERROR,
ER_TABLE_SCHEMA_MISMATCH,
@@ -1458,8 +1455,6 @@ row_import::set_root_by_heuristic() UNIV_NOTHROW
" the tablespace has " << m_n_indexes << " indexes";
}
- dict_mutex_enter_for_mysql();
-
ulint i = 0;
dberr_t err = DB_SUCCESS;
@@ -1499,8 +1494,6 @@ row_import::set_root_by_heuristic() UNIV_NOTHROW
}
}
- dict_mutex_exit_for_mysql();
-
return(err);
}
@@ -1510,14 +1503,13 @@ Purge delete marked records.
dberr_t
IndexPurge::garbage_collect() UNIV_NOTHROW
{
- dberr_t err;
ibool comp = dict_table_is_comp(m_index->table);
/* Open the persistent cursor and start the mini-transaction. */
- open();
+ dberr_t err = open() ? next() : DB_CORRUPTION;
- while ((err = next()) == DB_SUCCESS) {
+ for (; err == DB_SUCCESS; err = next()) {
rec_t* rec = btr_pcur_get_rec(&m_pcur);
ibool deleted = rec_get_deleted_flag(rec, comp);
@@ -1525,7 +1517,10 @@ IndexPurge::garbage_collect() UNIV_NOTHROW
if (!deleted) {
++m_n_rows;
} else {
- purge();
+ err = purge();
+ if (err != DB_SUCCESS) {
+ break;
+ }
}
}
@@ -1538,40 +1533,33 @@ IndexPurge::garbage_collect() UNIV_NOTHROW
/**
Begin import, position the cursor on the first record. */
-void
-IndexPurge::open() UNIV_NOTHROW
+inline bool IndexPurge::open() noexcept
{
- mtr_start(&m_mtr);
+ m_mtr.start();
+ m_mtr.set_log_mode(MTR_LOG_NO_REDO);
- mtr_set_log_mode(&m_mtr, MTR_LOG_NO_REDO);
+ btr_pcur_init(&m_pcur);
- btr_pcur_open_at_index_side(
- true, m_index, BTR_MODIFY_LEAF, &m_pcur, true, 0, &m_mtr);
- btr_pcur_move_to_next_user_rec(&m_pcur, &m_mtr);
- if (rec_is_metadata(btr_pcur_get_rec(&m_pcur), *m_index)) {
- ut_ad(btr_pcur_is_on_user_rec(&m_pcur));
- /* Skip the metadata pseudo-record. */
- } else {
- btr_pcur_move_to_prev_on_page(&m_pcur);
- }
-}
+ if (m_pcur.open_leaf(true, m_index, BTR_MODIFY_LEAF, &m_mtr) != DB_SUCCESS)
+ return false;
-/**
-Close the persistent curosr and commit the mini-transaction. */
-void
-IndexPurge::close() UNIV_NOTHROW
-{
- btr_pcur_close(&m_pcur);
- mtr_commit(&m_mtr);
+ rec_t *rec= page_rec_get_next(btr_pcur_get_rec(&m_pcur));
+ if (!rec)
+ return false;
+ if (rec_is_metadata(rec, *m_index))
+ /* Skip the metadata pseudo-record. */
+ btr_pcur_get_page_cur(&m_pcur)->rec= rec;
+ return true;
}
/**
Position the cursor on the next record.
@return DB_SUCCESS or error code */
-dberr_t
-IndexPurge::next() UNIV_NOTHROW
+dberr_t IndexPurge::next() noexcept
{
- btr_pcur_move_to_next_on_page(&m_pcur);
+ if (UNIV_UNLIKELY(!btr_pcur_move_to_next_on_page(&m_pcur))) {
+ return DB_CORRUPTION;
+ }
/* When switching pages, commit the mini-transaction
in order to release the latch on the old page. */
@@ -1592,9 +1580,12 @@ IndexPurge::next() UNIV_NOTHROW
mtr_set_log_mode(&m_mtr, MTR_LOG_NO_REDO);
- btr_pcur_restore_position(BTR_MODIFY_LEAF, &m_pcur, &m_mtr);
+ if (m_pcur.restore_position(BTR_MODIFY_LEAF, &m_mtr)
+ == btr_pcur_t::CORRUPTED) {
+ return DB_CORRUPTION;
+ }
/* The following is based on btr_pcur_move_to_next_user_rec(). */
- m_pcur.old_stored = false;
+ m_pcur.old_rec = nullptr;
ut_ad(m_pcur.latch_mode == BTR_MODIFY_LEAF);
do {
if (btr_pcur_is_after_last_on_page(&m_pcur)) {
@@ -1602,56 +1593,12 @@ IndexPurge::next() UNIV_NOTHROW
return DB_END_OF_INDEX;
}
- buf_block_t* block = btr_pcur_get_block(&m_pcur);
- uint32_t next_page = btr_page_get_next(block->frame);
-
- /* MDEV-13542 FIXME: Make these checks part of
- btr_pcur_move_to_next_page(), and introduce a
- return status that will be checked in all callers! */
- switch (next_page) {
- default:
- if (next_page != block->page.id().page_no()) {
- break;
- }
- /* MDEV-20931 FIXME: Check that
- next_page is within the tablespace
- bounds! Also check that it is not a
- change buffer bitmap page. */
- /* fall through */
- case 0:
- case 1:
- case FIL_NULL:
- return DB_CORRUPTION;
+ if (dberr_t err = btr_pcur_move_to_next_page(&m_pcur,
+ &m_mtr)) {
+ return err;
}
-
- dict_index_t* index = m_pcur.btr_cur.index;
- buf_block_t* next_block = btr_block_get(
- *index, next_page, BTR_MODIFY_LEAF, false,
- &m_mtr);
-
- if (UNIV_UNLIKELY(!next_block
- || !fil_page_index_page_check(
- next_block->frame)
- || !!dict_index_is_spatial(index)
- != (fil_page_get_type(
- next_block->frame)
- == FIL_PAGE_RTREE)
- || page_is_comp(next_block->frame)
- != page_is_comp(block->frame)
- || btr_page_get_prev(
- next_block->frame)
- != block->page.id().page_no())) {
- return DB_CORRUPTION;
- }
-
- btr_leaf_page_release(block, BTR_MODIFY_LEAF, &m_mtr);
-
- page_cur_set_before_first(next_block,
- &m_pcur.btr_cur.page_cur);
-
- ut_d(page_check_dir(next_block->frame));
- } else {
- btr_pcur_move_to_next_on_page(&m_pcur);
+ } else if (!btr_pcur_move_to_next_on_page(&m_pcur)) {
+ return DB_CORRUPTION;
}
} while (!btr_pcur_is_on_user_rec(&m_pcur));
@@ -1662,41 +1609,38 @@ IndexPurge::next() UNIV_NOTHROW
Store the persistent cursor position and reopen the
B-tree cursor in BTR_MODIFY_TREE mode, because the
tree structure may be changed during a pessimistic delete. */
-void
-IndexPurge::purge_pessimistic_delete() UNIV_NOTHROW
+inline dberr_t IndexPurge::purge_pessimistic_delete() noexcept
{
- dberr_t err;
-
- btr_pcur_restore_position(BTR_MODIFY_TREE | BTR_LATCH_FOR_DELETE,
- &m_pcur, &m_mtr);
-
- ut_ad(rec_get_deleted_flag(
- btr_pcur_get_rec(&m_pcur),
- dict_table_is_comp(m_index->table)));
-
- btr_cur_pessimistic_delete(
- &err, FALSE, btr_pcur_get_btr_cur(&m_pcur), 0, false, &m_mtr);
-
- ut_a(err == DB_SUCCESS);
+ dberr_t err;
+ if (m_pcur.restore_position(BTR_PURGE_TREE, &m_mtr) != btr_pcur_t::CORRUPTED)
+ {
+ ut_ad(rec_get_deleted_flag(btr_pcur_get_rec(&m_pcur),
+ m_index->table->not_redundant()));
+ btr_cur_pessimistic_delete(&err, FALSE, btr_pcur_get_btr_cur(&m_pcur), 0,
+ false, &m_mtr);
+ }
+ else
+ err= DB_CORRUPTION;
- /* Reopen the B-tree cursor in BTR_MODIFY_LEAF mode */
- mtr_commit(&m_mtr);
+ m_mtr.commit();
+ return err;
}
-/**
-Purge delete-marked records. */
-void
-IndexPurge::purge() UNIV_NOTHROW
+dberr_t IndexPurge::purge() noexcept
{
- btr_pcur_store_position(&m_pcur, &m_mtr);
-
- purge_pessimistic_delete();
-
- mtr_start(&m_mtr);
-
- mtr_set_log_mode(&m_mtr, MTR_LOG_NO_REDO);
-
- btr_pcur_restore_position(BTR_MODIFY_LEAF, &m_pcur, &m_mtr);
+ btr_pcur_store_position(&m_pcur, &m_mtr);
+ m_mtr.commit();
+ m_mtr.start();
+ m_mtr.set_log_mode(MTR_LOG_NO_REDO);
+ dberr_t err= purge_pessimistic_delete();
+
+ m_mtr.start();
+ m_mtr.set_log_mode(MTR_LOG_NO_REDO);
+ if (err == DB_SUCCESS)
+ err= (m_pcur.restore_position(BTR_MODIFY_LEAF, &m_mtr) ==
+ btr_pcur_t::CORRUPTED)
+ ? DB_CORRUPTION : DB_SUCCESS;
+ return err;
}
/** Adjust the BLOB reference for a single column that is externally stored
@@ -1806,10 +1750,8 @@ re-organising the B+tree.
@return true if purge succeeded */
inline bool PageConverter::purge() UNIV_NOTHROW
{
- const dict_index_t* index = m_index->m_srv_index;
-
/* We can't have a page that is empty and not root. */
- if (m_rec_iter.remove(index, m_offsets)) {
+ if (m_rec_iter.remove(m_offsets)) {
++m_index->m_stats.m_n_purged;
@@ -1873,7 +1815,9 @@ PageConverter::update_records(
/* This will also position the cursor on the first user record. */
- m_rec_iter.open(block);
+ if (!m_rec_iter.open(block, m_index->m_srv_index)) {
+ return DB_CORRUPTION;
+ }
while (!m_rec_iter.end()) {
rec_t* rec = m_rec_iter.current();
@@ -1904,17 +1848,19 @@ PageConverter::update_records(
optimistic delete. */
if (deleted) {
+ ++m_index->m_stats.m_n_deleted;
/* A successful purge will move the cursor to the
next record. */
- if (!purge()) {
- m_rec_iter.next();
+ if (purge()) {
+ continue;
}
-
- ++m_index->m_stats.m_n_deleted;
} else {
++m_index->m_stats.m_n_rows;
- m_rec_iter.next();
+ }
+
+ if (!m_rec_iter.next()) {
+ return DB_CORRUPTION;
}
}
@@ -1934,7 +1880,7 @@ PageConverter::update_index_page(
return(DB_SUCCESS);
}
- buf_frame_t* page = block->frame;
+ buf_frame_t* page = block->page.frame;
const index_id_t id = btr_page_get_index_id(page);
if (id != m_index->m_id) {
@@ -1985,7 +1931,7 @@ PageConverter::update_index_page(
m_index->m_srv_index->id);
if (UNIV_LIKELY_NULL(block->page.zip.data)) {
memcpy(&block->page.zip.data[PAGE_HEADER + PAGE_INDEX_ID],
- &block->frame[PAGE_HEADER + PAGE_INDEX_ID], 8);
+ &block->page.frame[PAGE_HEADER + PAGE_INDEX_ID], 8);
}
if (m_index->m_srv_index->is_clust()) {
@@ -1994,12 +1940,12 @@ PageConverter::update_index_page(
}
} else if (page_is_leaf(page)) {
/* Set PAGE_MAX_TRX_ID on secondary index leaf pages. */
- mach_write_to_8(&block->frame[PAGE_HEADER + PAGE_MAX_TRX_ID],
- m_trx->id);
+ mach_write_to_8(&block->page.frame
+ [PAGE_HEADER + PAGE_MAX_TRX_ID], m_trx->id);
if (UNIV_LIKELY_NULL(block->page.zip.data)) {
memcpy_aligned<8>(&block->page.zip.data
[PAGE_HEADER + PAGE_MAX_TRX_ID],
- &block->frame
+ &block->page.frame
[PAGE_HEADER + PAGE_MAX_TRX_ID], 8);
}
} else {
@@ -2009,7 +1955,8 @@ clear_page_max_trx_id:
in MySQL 5.6, 5.7 and MariaDB 10.0 and 10.1
would set the field to the transaction ID even
on clustered index pages. */
- memset_aligned<8>(&block->frame[PAGE_HEADER + PAGE_MAX_TRX_ID],
+ memset_aligned<8>(&block->page.frame
+ [PAGE_HEADER + PAGE_MAX_TRX_ID],
0, 8);
if (UNIV_LIKELY_NULL(block->page.zip.data)) {
memset_aligned<8>(&block->page.zip.data
@@ -2031,7 +1978,9 @@ clear_page_max_trx_id:
return(DB_SUCCESS);
}
- return page_is_leaf(block->frame) ? update_records(block) : DB_SUCCESS;
+ return page_is_leaf(block->page.frame)
+ ? update_records(block)
+ : DB_SUCCESS;
}
/** Validate the space flags and update tablespace header page.
@@ -2078,8 +2027,8 @@ PageConverter::update_page(buf_block_t* block, uint16_t& page_type)
case FIL_PAGE_INDEX:
case FIL_PAGE_RTREE:
- /* We need to decompress the contents into block->frame
- before we can do any thing with Btree pages. */
+ /* We need to decompress the contents
+ before we can do anything. */
if (is_compressed_table() && !buf_zip_decompress(block, TRUE)) {
return(DB_CORRUPTION);
@@ -2135,9 +2084,9 @@ dberr_t PageConverter::operator()(buf_block_t* block) UNIV_NOTHROW
/* If we already had an old page with matching number
in the buffer pool, evict it now, because
we no longer evict the pages on DISCARD TABLESPACE. */
- buf_page_get_gen(block->page.id(), get_zip_size(),
- RW_NO_LATCH, NULL, BUF_EVICT_IF_IN_POOL,
- __FILE__, __LINE__, NULL, NULL);
+ buf_page_get_low(block->page.id(), get_zip_size(), RW_NO_LATCH,
+ nullptr, BUF_PEEK_IF_IN_POOL,
+ nullptr, nullptr, false);
uint16_t page_type;
@@ -2151,7 +2100,7 @@ dberr_t PageConverter::operator()(buf_block_t* block) UNIV_NOTHROW
if (!block->page.zip.data) {
buf_flush_init_for_writing(
- NULL, block->frame, NULL, full_crc32);
+ NULL, block->page.frame, NULL, full_crc32);
} else if (fil_page_type_is_index(page_type)) {
buf_flush_init_for_writing(
NULL, block->page.zip.data, &block->page.zip,
@@ -2173,11 +2122,8 @@ dberr_t
row_import_cleanup(
/*===============*/
row_prebuilt_t* prebuilt, /*!< in/out: prebuilt from handler */
- trx_t* trx, /*!< in/out: transaction for import */
dberr_t err) /*!< in: error code */
{
- ut_a(prebuilt->trx != trx);
-
if (err != DB_SUCCESS) {
dict_table_t* table = prebuilt->table;
table->file_unreadable = true;
@@ -2191,10 +2137,6 @@ row_import_cleanup(
ib::info() << "Discarding tablespace of table "
<< table->name << ": " << err;
- if (!trx->dict_operation_lock_mode) {
- row_mysql_lock_data_dictionary(trx);
- }
-
for (dict_index_t* index = UT_LIST_GET_FIRST(table->indexes);
index;
index = UT_LIST_GET_NEXT(indexes, index)) {
@@ -2202,15 +2144,13 @@ row_import_cleanup(
}
}
- ut_a(trx->dict_operation_lock_mode == RW_X_LATCH);
-
DBUG_EXECUTE_IF("ib_import_before_commit_crash", DBUG_SUICIDE(););
- trx_commit_for_mysql(trx);
-
- row_mysql_unlock_data_dictionary(trx);
+ prebuilt->trx->commit();
- trx->free();
+ if (prebuilt->trx->dict_operation_lock_mode) {
+ row_mysql_unlock_data_dictionary(prebuilt->trx);
+ }
prebuilt->trx->op_info = "";
@@ -2226,10 +2166,9 @@ dberr_t
row_import_error(
/*=============*/
row_prebuilt_t* prebuilt, /*!< in/out: prebuilt from handler */
- trx_t* trx, /*!< in/out: transaction for import */
dberr_t err) /*!< in: error code */
{
- if (!trx_is_interrupted(trx)) {
+ if (!trx_is_interrupted(prebuilt->trx)) {
char table_name[MAX_FULL_NAME_LEN + 1];
innobase_format_name(
@@ -2237,12 +2176,12 @@ row_import_error(
prebuilt->table->name.m_name);
ib_senderrf(
- trx->mysql_thd, IB_LOG_LEVEL_WARN,
+ prebuilt->trx->mysql_thd, IB_LOG_LEVEL_WARN,
ER_INNODB_IMPORT_ERROR,
table_name, (ulong) err, ut_strerr(err));
}
- return(row_import_cleanup(prebuilt, trx, err));
+ return row_import_cleanup(prebuilt, err);
}
/*****************************************************************//**
@@ -2376,43 +2315,28 @@ row_import_set_sys_max_row_id(
mtr_set_log_mode(&mtr, MTR_LOG_NO_REDO);
- btr_pcur_open_at_index_side(
- false, // High end
- index,
- BTR_SEARCH_LEAF,
- &pcur,
- true, // Init cursor
- 0, // Leaf level
- &mtr);
-
- btr_pcur_move_to_prev_on_page(&pcur);
- rec = btr_pcur_get_rec(&pcur);
-
- /* Check for empty table. */
- if (page_rec_is_infimum(rec)) {
- /* The table is empty. */
- } else if (rec_is_metadata(rec, *index)) {
- /* The clustered index contains the metadata record only,
- that is, the table is empty. */
- } else {
- row_id = mach_read_from_6(rec);
+ if (pcur.open_leaf(false, index, BTR_SEARCH_LEAF, &mtr)
+ == DB_SUCCESS) {
+ rec = btr_pcur_move_to_prev_on_page(&pcur);
+
+ if (!rec) {
+ /* The table is corrupted. */
+ } else if (page_rec_is_infimum(rec)) {
+ /* The table is empty. */
+ } else if (rec_is_metadata(rec, *index)) {
+ /* The clustered index contains the metadata
+ record only, that is, the table is empty. */
+ } else {
+ row_id = mach_read_from_6(rec);
+ }
}
- btr_pcur_close(&pcur);
mtr_commit(&mtr);
if (row_id) {
/* Update the system row id if the imported index row id is
greater than the max system row id. */
-
- mutex_enter(&dict_sys.mutex);
-
- if (row_id >= dict_sys.row_id) {
- dict_sys.row_id = row_id + 1;
- dict_hdr_flush_row_id();
- }
-
- mutex_exit(&dict_sys.mutex);
+ dict_sys.update_row_id(row_id);
}
}
@@ -3161,18 +3085,16 @@ and apply it to dict_table_t
static dberr_t handle_instant_metadata(dict_table_t *table,
const row_import &cfg)
{
- dict_get_and_save_data_dir_path(table, false);
+ dict_get_and_save_data_dir_path(table);
char *filepath;
if (DICT_TF_HAS_DATA_DIR(table->flags))
{
ut_a(table->data_dir_path);
-
- filepath=
- fil_make_filepath(table->data_dir_path, table->name.m_name, IBD, true);
+ filepath= fil_make_filepath(table->data_dir_path, table->name, IBD, true);
}
else
- filepath= fil_make_filepath(nullptr, table->name.m_name, IBD, false);
+ filepath= fil_make_filepath(nullptr, table->name, IBD, false);
if (!filepath)
return DB_OUT_OF_MEMORY;
@@ -3195,9 +3117,8 @@ static dberr_t handle_instant_metadata(dict_table_t *table,
static_cast<byte *>(aligned_malloc(srv_page_size, srv_page_size)),
&aligned_free);
- if (dberr_t err= os_file_read_no_error_handling(IORequestReadPartial,
- file, first_page.get(), 0,
- srv_page_size, nullptr))
+ if (dberr_t err= os_file_read(IORequestReadPartial, file, first_page.get(),
+ 0, srv_page_size, nullptr))
return err;
auto space_flags= fsp_header_get_flags(first_page.get());
@@ -3232,7 +3153,7 @@ static dberr_t handle_instant_metadata(dict_table_t *table,
aligned_malloc(UNIV_PAGE_SIZE_MAX, UNIV_PAGE_SIZE_MAX)),
&aligned_free);
- if (dberr_t err= os_file_read_no_error_handling(
+ if (dberr_t err= os_file_read(
IORequestReadPartial, file, page.get(), 3 * physical_size,
physical_size, nullptr))
return err;
@@ -3249,14 +3170,6 @@ static dberr_t handle_instant_metadata(dict_table_t *table,
{
dict_index_t *index= dict_table_get_first_index(table);
- auto tmp1= table->space_id;
- table->space_id= page_get_space_id(page.get());
- SCOPE_EXIT([tmp1, table]() { table->space_id= tmp1; });
-
- auto tmp2= index->page;
- index->page= page_get_page_no(page.get());
- SCOPE_EXIT([tmp2, index]() { index->page= tmp2; });
-
if (!page_is_comp(page.get()) != !dict_table_is_comp(table))
{
ib_errf(current_thd, IB_LOG_LEVEL_ERROR, ER_TABLE_SCHEMA_MISMATCH,
@@ -3265,7 +3178,7 @@ static dberr_t handle_instant_metadata(dict_table_t *table,
}
if (btr_cur_instant_root_init(index, page.get()))
- return DB_ERROR;
+ return DB_CORRUPTION;
ut_ad(index->n_core_null_bytes != dict_index_t::NO_CORE_NULL_BYTES);
@@ -3284,6 +3197,8 @@ static dberr_t handle_instant_metadata(dict_table_t *table,
while (btr_page_get_level(page.get()) != 0)
{
const rec_t *rec= page_rec_get_next(page_get_infimum_rec(page.get()));
+ if (!rec)
+ return DB_CORRUPTION;
/* Relax the assertion in rec_init_offsets(). */
ut_ad(!index->in_instant_init);
@@ -3295,10 +3210,8 @@ static dberr_t handle_instant_metadata(dict_table_t *table,
uint64_t child_page_no= btr_node_ptr_get_child_page_no(rec, offsets);
if (dberr_t err=
- os_file_read_no_error_handling(IORequestReadPartial, file,
- page.get(),
- child_page_no * physical_size,
- physical_size, nullptr))
+ os_file_read(IORequestReadPartial, file, page.get(),
+ child_page_no * physical_size, physical_size, nullptr))
return err;
if (dberr_t err= decrypt_decompress(space_crypt, space_flags,
@@ -3308,18 +3221,22 @@ static dberr_t handle_instant_metadata(dict_table_t *table,
return err;
}
- const auto *rec= page_rec_get_next(page_get_infimum_rec(page.get()));
+ const auto *rec= page_rec_get_next_const(page_get_infimum_rec(page.get()));
const auto comp= dict_table_is_comp(index->table);
- const auto info_bits= rec_get_info_bits(rec, comp);
- if (page_rec_is_supremum(rec) || !(info_bits & REC_INFO_MIN_REC_FLAG))
+ if (!rec || page_rec_is_supremum(rec))
{
+ corrupted_metadata:
ib::error() << "Table " << index->table->name
<< " is missing instant ALTER metadata";
index->table->corrupted= true;
return DB_CORRUPTION;
}
+ const auto info_bits= rec_get_info_bits(rec, comp);
+ if (!(info_bits & REC_INFO_MIN_REC_FLAG))
+ goto corrupted_metadata;
+
if ((info_bits & ~REC_INFO_DELETED_FLAG) != REC_INFO_MIN_REC_FLAG ||
(comp && rec_get_status(rec) != REC_STATUS_INSTANT))
{
@@ -3373,11 +3290,10 @@ static dberr_t handle_instant_metadata(dict_table_t *table,
&aligned_free);
if (dberr_t err=
- os_file_read_no_error_handling(IORequestReadPartial, file,
- second_page.get(), physical_size *
- mach_read_from_4(ptr +
- BTR_EXTERN_PAGE_NO),
- srv_page_size, nullptr))
+ os_file_read(IORequestReadPartial, file, second_page.get(),
+ physical_size *
+ mach_read_from_4(ptr + BTR_EXTERN_PAGE_NO),
+ physical_size, nullptr))
return err;
if (dberr_t err= decrypt_decompress(space_crypt, space_flags,
@@ -3585,8 +3501,6 @@ row_import_update_index_root(trx_t* trx, dict_table_t* table, bool reset)
que_thr_t* thr;
- graph->fork_type = QUE_FORK_MYSQL_INTERFACE;
-
ut_a(thr = que_fork_start_command(graph));
que_run_threads(thr);
@@ -3703,7 +3617,7 @@ dberr_t row_import_update_discarded_flag(trx_t* trx, table_id_t table_id,
pars_info_bind_function(
info, "my_func", row_import_set_discarded, &discard);
- dberr_t err = que_eval_sql(info, sql, false, trx);
+ dberr_t err = que_eval_sql(info, sql, trx);
ut_a(discard.n_recs == 1);
ut_a(discard.flags2 != ULINT32_UNDEFINED);
@@ -3784,15 +3698,15 @@ dberr_t FetchIndexRootPages::run(const fil_iterator_t& iter,
const bool encrypted= iter.crypt_data != NULL &&
iter.crypt_data->should_encrypt();
byte* const readptr= iter.io_buffer;
- block->frame= readptr;
+ block->page.frame= readptr;
if (block->page.zip.data)
block->page.zip.data= readptr;
bool page_compressed= false;
- dberr_t err= os_file_read_no_error_handling(
- IORequestReadPartial, iter.file, readptr, 3 * size, size, 0);
+ dberr_t err= os_file_read(IORequestReadPartial, iter.file, readptr,
+ 3 * size, size, nullptr);
if (err != DB_SUCCESS)
{
ib::error() << iter.filepath << ": os_file_read() failed";
@@ -3884,7 +3798,7 @@ static dberr_t fil_iterate(
required by buf_zip_decompress() */
dberr_t err = DB_SUCCESS;
bool page_compressed = false;
- bool punch_hole = true;
+ bool punch_hole = !my_test_if_thinly_provisioned(iter.file);
for (offset = iter.start; offset < iter.end; offset += n_bytes) {
if (callback.is_interrupted()) {
@@ -3893,7 +3807,7 @@ static dberr_t fil_iterate(
}
byte* io_buffer = iter.io_buffer;
- block->frame = io_buffer;
+ block->page.frame = io_buffer;
if (block->page.zip.data) {
/* Zip IO is done in the compressed page buffer. */
@@ -3916,9 +3830,8 @@ static dberr_t fil_iterate(
? iter.crypt_io_buffer : io_buffer;
byte* const writeptr = readptr;
- err = os_file_read_no_error_handling(
- IORequestReadPartial,
- iter.file, readptr, offset, n_bytes, 0);
+ err = os_file_read(IORequestReadPartial, iter.file, readptr,
+ offset, n_bytes, nullptr);
if (err != DB_SUCCESS) {
ib::error() << iter.filepath
<< ": os_file_read() failed";
@@ -3933,7 +3846,7 @@ static dberr_t fil_iterate(
for (ulint i = 0; i < n_pages_read;
++block->page.id_,
- ++i, page_off += size, block->frame += size) {
+ ++i, page_off += size, block->page.frame += size) {
byte* src = readptr + i * size;
const ulint page_no = page_get_page_no(src);
if (!page_no && block->page.id().page_no()) {
@@ -3990,7 +3903,7 @@ page_corrupted:
} else if (!page_compressed
&& type != FIL_PAGE_TYPE_XDES
&& !block->page.zip.data) {
- block->frame = src;
+ block->page.frame = src;
frame_changed = true;
} else {
ut_ad(dst != src);
@@ -4042,8 +3955,7 @@ page_corrupted:
if ((err = callback(block)) != DB_SUCCESS) {
goto func_exit;
} else if (!updated) {
- updated = block->page.state()
- == BUF_BLOCK_FILE_PAGE;
+ updated = !!block->page.frame;
}
/* If tablespace is encrypted we use additional
@@ -4051,10 +3963,10 @@ page_corrupted:
for decrypting readptr == crypt_io_buffer != io_buffer.
Destination for decryption is a buffer pool block
- block->frame == dst == io_buffer that is updated.
+ block->page.frame == dst == io_buffer that is updated.
Pages that did not require decryption even when
tablespace is marked as encrypted are not copied
- instead block->frame is set to src == readptr.
+ instead block->page.frame is set to src == readptr.
For encryption we again use temporary scratch area
writeptr != io_buffer == dst
@@ -4087,7 +3999,7 @@ page_corrupted:
if (block->page.zip.data) {
block->page.zip.data = dst;
} else {
- block->frame = dst;
+ block->page.frame = dst;
}
}
@@ -4203,18 +4115,17 @@ fil_tablespace_iterate(
return(DB_CORRUPTION););
/* Make sure the data_dir_path is set. */
- dict_get_and_save_data_dir_path(table, false);
+ dict_get_and_save_data_dir_path(table);
- if (DICT_TF_HAS_DATA_DIR(table->flags)) {
- ut_a(table->data_dir_path);
+ ut_ad(!DICT_TF_HAS_DATA_DIR(table->flags) || table->data_dir_path);
- filepath = fil_make_filepath(
- table->data_dir_path, table->name.m_name, IBD, true);
- } else {
- filepath = fil_make_filepath(
- NULL, table->name.m_name, IBD, false);
- }
+ const char *data_dir_path = DICT_TF_HAS_DATA_DIR(table->flags)
+ ? table->data_dir_path : nullptr;
+ filepath = fil_make_filepath(data_dir_path,
+ {table->name.m_name,
+ strlen(table->name.m_name)},
+ IBD, data_dir_path != nullptr);
if (!filepath) {
return(DB_OUT_OF_MEMORY);
} else {
@@ -4251,13 +4162,13 @@ fil_tablespace_iterate(
buf_block_t* block = reinterpret_cast<buf_block_t*>
(ut_zalloc_nokey(sizeof *block));
- block->frame = page;
- block->page.init(BUF_BLOCK_FILE_PAGE, page_id_t(~0ULL), 1);
+ block->page.frame = page;
+ block->page.init(buf_page_t::UNFIXED + 1, page_id_t{~0ULL});
- /* Read the first page and determine the page and zip size. */
+ /* Read the first page and determine the page size. */
- err = os_file_read_no_error_handling(IORequestReadPartial,
- file, page, 0, srv_page_size, 0);
+ err = os_file_read(IORequestReadPartial, file, page, 0, srv_page_size,
+ nullptr);
if (err == DB_SUCCESS) {
err = callback.init(file_size, block);
@@ -4306,8 +4217,9 @@ fil_tablespace_iterate(
if (block->page.zip.ssize) {
ut_ad(iter.n_io_buffers == 1);
- block->frame = iter.io_buffer;
- block->page.zip.data = block->frame + srv_page_size;
+ block->page.frame = iter.io_buffer;
+ block->page.zip.data = block->page.frame
+ + srv_page_size;
}
err = callback.run(iter, block);
@@ -4351,9 +4263,9 @@ row_import_for_mysql(
row_prebuilt_t* prebuilt) /*!< in: prebuilt struct in MySQL */
{
dberr_t err;
- trx_t* trx;
ib_uint64_t autoinc = 0;
char* filepath = NULL;
+ trx_t* trx = prebuilt->trx;
/* The caller assured that this is not read_only_mode and that no
temorary tablespace is being imported. */
@@ -4362,28 +4274,12 @@ row_import_for_mysql(
ut_ad(table->space_id);
ut_ad(table->space_id < SRV_SPACE_ID_UPPER_BOUND);
- ut_ad(prebuilt->trx);
+ ut_ad(trx);
+ ut_ad(trx->state == TRX_STATE_ACTIVE);
ut_ad(!table->is_readable());
ibuf_delete_for_discarded_space(table->space_id);
- trx_start_if_not_started(prebuilt->trx, true);
-
- trx = trx_create();
-
- /* So that the table is not DROPped during recovery. */
- trx_set_dict_operation(trx, TRX_DICT_OP_INDEX);
-
- trx_start_if_not_started(trx, true);
-
- /* So that we can send error messages to the user. */
- trx->mysql_thd = prebuilt->trx->mysql_thd;
-
- /* Ensure that the table will be dropped by trx_rollback_active()
- in case of a crash. */
-
- trx->table_id = table->id;
-
/* Assign an undo segment for the transaction, so that the
transaction will be recovered after a crash. */
@@ -4398,25 +4294,19 @@ row_import_for_mysql(
DBUG_EXECUTE_IF("ib_import_undo_assign_failure",
err = DB_TOO_MANY_CONCURRENT_TRXS;);
- if (err != DB_SUCCESS) {
-
- return(row_import_cleanup(prebuilt, trx, err));
-
- } else if (trx->rsegs.m_redo.undo == 0) {
-
+ if (err == DB_SUCCESS && !trx->has_logged_persistent()) {
err = DB_TOO_MANY_CONCURRENT_TRXS;
- return(row_import_cleanup(prebuilt, trx, err));
+ }
+ if (err != DB_SUCCESS) {
+ return row_import_cleanup(prebuilt, err);
}
- prebuilt->trx->op_info = "read meta-data file";
-
- /* Prevent DDL operations while we are checking. */
-
- rw_lock_s_lock(&dict_sys.latch);
+ trx->op_info = "read meta-data file";
row_import cfg;
+ THD* thd = trx->mysql_thd;
- err = row_import_read_cfg(table, trx->mysql_thd, cfg);
+ err = row_import_read_cfg(table, thd, cfg);
/* Check if the table column definitions match the contents
of the config file. */
@@ -4424,14 +4314,13 @@ row_import_for_mysql(
if (err == DB_SUCCESS) {
if (dberr_t err = handle_instant_metadata(table, cfg)) {
- rw_lock_s_unlock(&dict_sys.latch);
- return row_import_error(prebuilt, trx, err);
+ return row_import_error(prebuilt, err);
}
/* We have a schema file, try and match it with our
data dictionary. */
- err = cfg.match_schema(trx->mysql_thd);
+ err = cfg.match_schema(thd);
/* Update index->page and SYS_INDEXES.PAGE_NO to match the
B-tree root page numbers in the tablespace. Use the index
@@ -4442,15 +4331,10 @@ row_import_for_mysql(
autoinc = cfg.m_autoinc;
}
- rw_lock_s_unlock(&dict_sys.latch);
-
DBUG_EXECUTE_IF("ib_import_set_index_root_failure",
err = DB_TOO_MANY_CONCURRENT_TRXS;);
} else if (cfg.m_missing) {
-
- rw_lock_s_unlock(&dict_sys.latch);
-
/* We don't have a schema file, we will have to discover
the index root pages from the .ibd file and skip the schema
matching step. */
@@ -4460,13 +4344,13 @@ row_import_for_mysql(
cfg.m_zip_size = 0;
if (UT_LIST_GET_LEN(table->indexes) > 1) {
- ib_errf(trx->mysql_thd, IB_LOG_LEVEL_ERROR,
+ ib_errf(thd, IB_LOG_LEVEL_ERROR,
ER_INTERNAL_ERROR,
"Drop all secondary indexes before importing "
"table %s when .cfg file is missing.",
table->name.m_name);
err = DB_ERROR;
- return row_import_error(prebuilt, trx, err);
+ return row_import_error(prebuilt, err);
}
FetchIndexRootPages fetchIndexRootPages(table, trx);
@@ -4487,24 +4371,18 @@ row_import_for_mysql(
err = cfg.set_root_by_heuristic();
if (err == DB_SUCCESS) {
- if (dberr_t err =
- handle_instant_metadata(table,
- cfg)) {
- return row_import_error(
- prebuilt, trx, err);
- }
+ err = handle_instant_metadata(table,
+ cfg);
}
}
}
- } else {
- rw_lock_s_unlock(&dict_sys.latch);
}
if (err != DB_SUCCESS) {
- return(row_import_error(prebuilt, trx, err));
+ return row_import_error(prebuilt, err);
}
- prebuilt->trx->op_info = "importing tablespace";
+ trx->op_info = "importing tablespace";
ib::info() << "Phase I - Update all pages";
@@ -4546,31 +4424,28 @@ row_import_for_mysql(
if (err != DB_DECRYPTION_FAILED) {
- ib_errf(trx->mysql_thd, IB_LOG_LEVEL_ERROR,
+ ib_errf(thd, IB_LOG_LEVEL_ERROR,
ER_INTERNAL_ERROR,
"Error importing tablespace for table %s : %s",
table_name, ut_strerr(err));
}
- return(row_import_cleanup(prebuilt, trx, err));
+ return row_import_cleanup(prebuilt, err);
}
- row_mysql_lock_data_dictionary(trx);
-
/* If the table is stored in a remote tablespace, we need to
determine that filepath from the link file and system tables.
Find the space ID in SYS_TABLES since this is an ALTER TABLE. */
- dict_get_and_save_data_dir_path(table, true);
+ dict_get_and_save_data_dir_path(table);
- if (DICT_TF_HAS_DATA_DIR(table->flags)) {
- ut_a(table->data_dir_path);
+ ut_ad(!DICT_TF_HAS_DATA_DIR(table->flags) || table->data_dir_path);
+ const char *data_dir_path = DICT_TF_HAS_DATA_DIR(table->flags)
+ ? table->data_dir_path : nullptr;
+ fil_space_t::name_type name{
+ table->name.m_name, strlen(table->name.m_name)};
- filepath = fil_make_filepath(
- table->data_dir_path, table->name.m_name, IBD, true);
- } else {
- filepath = fil_make_filepath(
- NULL, table->name.m_name, IBD, false);
- }
+ filepath = fil_make_filepath(data_dir_path, name, IBD,
+ data_dir_path != nullptr);
DBUG_EXECUTE_IF(
"ib_import_OOM_15",
@@ -4579,13 +4454,10 @@ row_import_for_mysql(
);
if (filepath == NULL) {
- row_mysql_unlock_data_dictionary(trx);
- return(row_import_cleanup(prebuilt, trx, DB_OUT_OF_MEMORY));
+ return row_import_cleanup(prebuilt, DB_OUT_OF_MEMORY);
}
/* Open the tablespace so that we can access via the buffer pool.
- We set the 2nd param (fix_dict = true) here because we already
- have an x-lock on dict_sys.latch and dict_sys.mutex.
The tablespace is initially opened as a temporary one, because
we will not be writing any redo log for it before we have invoked
fil_space_t::set_imported() to declare it a persistent tablespace. */
@@ -4593,35 +4465,29 @@ row_import_for_mysql(
ulint fsp_flags = dict_tf_to_fsp_flags(table->flags);
table->space = fil_ibd_open(
- true, true, FIL_TYPE_IMPORT, table->space_id,
- fsp_flags, table->name, filepath, &err);
+ 2, FIL_TYPE_IMPORT, table->space_id,
+ fsp_flags, name, filepath, &err);
ut_ad((table->space == NULL) == (err != DB_SUCCESS));
DBUG_EXECUTE_IF("ib_import_open_tablespace_failure",
err = DB_TABLESPACE_NOT_FOUND; table->space = NULL;);
if (!table->space) {
- row_mysql_unlock_data_dictionary(trx);
-
- ib_senderrf(trx->mysql_thd, IB_LOG_LEVEL_ERROR,
+ ib_senderrf(thd, IB_LOG_LEVEL_ERROR,
ER_GET_ERRMSG,
err, ut_strerr(err), filepath);
-
- ut_free(filepath);
-
- return(row_import_cleanup(prebuilt, trx, err));
}
- row_mysql_unlock_data_dictionary(trx);
-
ut_free(filepath);
- err = ibuf_check_bitmap_on_import(trx, table->space);
+ if (err == DB_SUCCESS) {
+ err = ibuf_check_bitmap_on_import(trx, table->space);
+ }
DBUG_EXECUTE_IF("ib_import_check_bitmap_failure", err = DB_CORRUPTION;);
if (err != DB_SUCCESS) {
- return(row_import_cleanup(prebuilt, trx, err));
+ return row_import_cleanup(prebuilt, err);
}
/* The first index must always be the clustered index. */
@@ -4629,7 +4495,7 @@ row_import_for_mysql(
dict_index_t* index = dict_table_get_first_index(table);
if (!dict_index_is_clust(index)) {
- return(row_import_error(prebuilt, trx, DB_CORRUPTION));
+ return row_import_error(prebuilt, DB_CORRUPTION);
}
/* Update the Btree segment headers for index node and
@@ -4641,7 +4507,7 @@ row_import_for_mysql(
err = DB_CORRUPTION;);
if (err != DB_SUCCESS) {
- return(row_import_error(prebuilt, trx, err));
+ return row_import_error(prebuilt, err);
} else if (cfg.requires_purge(index->name)) {
/* Purge any delete-marked records that couldn't be
@@ -4660,7 +4526,7 @@ row_import_for_mysql(
DBUG_EXECUTE_IF("ib_import_cluster_failure", err = DB_CORRUPTION;);
if (err != DB_SUCCESS) {
- return(row_import_error(prebuilt, trx, err));
+ return row_import_error(prebuilt, err);
}
/* For secondary indexes, purge any records that couldn't be purged
@@ -4673,7 +4539,7 @@ row_import_for_mysql(
err = DB_CORRUPTION;);
if (err != DB_SUCCESS) {
- return(row_import_error(prebuilt, trx, err));
+ return row_import_error(prebuilt, err);
}
/* Ensure that the next available DB_ROW_ID is not smaller than
@@ -4697,7 +4563,7 @@ row_import_for_mysql(
ib::warn() << "Waiting for flush to complete on "
<< prebuilt->table->name;
}
- os_thread_sleep(20000);
+ std::this_thread::sleep_for(std::chrono::milliseconds(20));
}
ib::info() << "Phase IV - Flush complete";
@@ -4712,13 +4578,13 @@ row_import_for_mysql(
err = row_import_update_index_root(trx, table, false);
if (err != DB_SUCCESS) {
- return(row_import_error(prebuilt, trx, err));
+ return row_import_error(prebuilt, err);
}
err = row_import_update_discarded_flag(trx, table->id, false);
if (err != DB_SUCCESS) {
- return(row_import_error(prebuilt, trx, err));
+ return row_import_error(prebuilt, err);
}
table->file_unreadable = false;
@@ -4734,5 +4600,5 @@ row_import_for_mysql(
btr_write_autoinc(dict_table_get_first_index(table), autoinc);
}
- return(row_import_cleanup(prebuilt, trx, err));
+ return row_import_cleanup(prebuilt, err);
}
diff --git a/storage/innobase/row/row0ins.cc b/storage/innobase/row/row0ins.cc
index 79a255dfc8f..d9bc72bee28 100644
--- a/storage/innobase/row/row0ins.cc
+++ b/storage/innobase/row/row0ins.cc
@@ -1,7 +1,7 @@
/*****************************************************************************
Copyright (c) 1996, 2016, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2016, 2022, MariaDB Corporation.
+Copyright (c) 2016, 2023, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -35,7 +35,6 @@ Created 4/20/1996 Heikki Tuuri
#include "que0que.h"
#include "row0upd.h"
#include "row0sel.h"
-#include "row0log.h"
#include "rem0cmp.h"
#include "lock0lock.h"
#include "log0log.h"
@@ -44,9 +43,13 @@ Created 4/20/1996 Heikki Tuuri
#include "buf0lru.h"
#include "fts0fts.h"
#include "fts0types.h"
+#ifdef BTR_CUR_HASH_ADAPT
+# include "btr0sea.h"
+#endif
#ifdef WITH_WSREP
#include <wsrep.h>
#include <mysql/service_wsrep.h>
+#include "ha_prototypes.h"
#endif /* WITH_WSREP */
/*************************************************************************
@@ -172,7 +175,7 @@ dberr_t
row_ins_sec_index_entry_by_modify(
/*==============================*/
ulint flags, /*!< in: undo logging and locking flags */
- ulint mode, /*!< in: BTR_MODIFY_LEAF or BTR_MODIFY_TREE,
+ ulint mode, /*!< in: BTR_MODIFY_LEAF or BTR_INSERT_TREE,
depending on whether mtr holds just a leaf
latch or also a tree latch */
btr_cur_t* cursor, /*!< in: B-tree cursor */
@@ -192,8 +195,8 @@ row_ins_sec_index_entry_by_modify(
rec = btr_cur_get_rec(cursor);
- ut_ad(!dict_index_is_clust(cursor->index));
- ut_ad(rec_offs_validate(rec, cursor->index, *offsets));
+ ut_ad(!cursor->index()->is_clust());
+ ut_ad(rec_offs_validate(rec, cursor->index(), *offsets));
ut_ad(!entry->info_bits);
/* We know that in the alphabetical ordering, entry and rec are
@@ -202,7 +205,7 @@ row_ins_sec_index_entry_by_modify(
difference. */
update = row_upd_build_sec_rec_difference_binary(
- rec, cursor->index, *offsets, entry, heap);
+ rec, cursor->index(), *offsets, entry, heap);
if (!rec_get_deleted_flag(rec, rec_offs_comp(*offsets))) {
/* We should never insert in place of a record that
@@ -216,8 +219,8 @@ row_ins_sec_index_entry_by_modify(
returns. After that point, set_committed(true)
would be invoked in commit_inplace_alter_table(). */
ut_a(update->n_fields == 0);
- ut_a(!cursor->index->is_committed());
- ut_ad(!dict_index_is_online_ddl(cursor->index));
+ ut_a(!cursor->index()->is_committed());
+ ut_ad(!dict_index_is_online_ddl(cursor->index()));
return(DB_SUCCESS);
}
@@ -239,7 +242,7 @@ row_ins_sec_index_entry_by_modify(
break;
}
} else {
- ut_a(mode == BTR_MODIFY_TREE);
+ ut_ad(mode == BTR_INSERT_TREE);
if (buf_pool.running_out()) {
return(DB_LOCK_TABLE_FULL);
@@ -286,15 +289,15 @@ row_ins_clust_index_entry_by_modify(
dberr_t err = DB_SUCCESS;
btr_cur_t* cursor = btr_pcur_get_btr_cur(pcur);
TABLE* mysql_table = NULL;
- ut_ad(dict_index_is_clust(cursor->index));
+ ut_ad(cursor->index()->is_clust());
rec = btr_cur_get_rec(cursor);
ut_ad(rec_get_deleted_flag(rec,
- dict_table_is_comp(cursor->index->table)));
+ cursor->index()->table->not_redundant()));
/* In delete-marked records, DB_TRX_ID must
always refer to an existing undo log record. */
- ut_ad(rec_get_trx_id(rec, cursor->index));
+ ut_ad(rec_get_trx_id(rec, cursor->index()));
/* Build an update vector containing all the fields to be modified;
NOTE that this vector may NOT contain system columns trx_id or
@@ -305,15 +308,17 @@ row_ins_clust_index_entry_by_modify(
}
update = row_upd_build_difference_binary(
- cursor->index, entry, rec, NULL, true, true,
+ cursor->index(), entry, rec, NULL, true, true,
thr_get_trx(thr), heap, mysql_table, &err);
if (err != DB_SUCCESS) {
return(err);
}
if (mode != BTR_MODIFY_TREE) {
- ut_ad((mode & ulint(~BTR_ALREADY_S_LATCHED))
- == BTR_MODIFY_LEAF);
+ ut_ad(mode == BTR_MODIFY_LEAF
+ || mode == BTR_MODIFY_LEAF_ALREADY_LATCHED
+ || mode == BTR_MODIFY_ROOT_AND_LEAF
+ || mode == BTR_MODIFY_ROOT_AND_LEAF_ALREADY_LATCHED);
/* Try optimistic updating of the record, keeping changes
within the page */
@@ -672,7 +677,7 @@ row_ins_set_detailed(
{
ut_ad(!srv_read_only_mode);
- mutex_enter(&srv_misc_tmpfile_mutex);
+ mysql_mutex_lock(&srv_misc_tmpfile_mutex);
rewind(srv_misc_tmpfile);
if (os_file_set_eof(srv_misc_tmpfile)) {
@@ -686,13 +691,14 @@ row_ins_set_detailed(
trx_set_detailed_error(trx, "temp file operation failed");
}
- mutex_exit(&srv_misc_tmpfile_mutex);
+ mysql_mutex_unlock(&srv_misc_tmpfile_mutex);
}
/*********************************************************************//**
Acquires dict_foreign_err_mutex, rewinds dict_foreign_err_file
and displays information about the given transaction.
The caller must release dict_foreign_err_mutex. */
+TRANSACTIONAL_TARGET
static
void
row_ins_foreign_trx_print(
@@ -705,13 +711,14 @@ row_ins_foreign_trx_print(
ut_ad(!srv_read_only_mode);
- lock_mutex_enter();
- n_rec_locks = lock_number_of_rows_locked(&trx->lock);
- n_trx_locks = UT_LIST_GET_LEN(trx->lock.trx_locks);
- heap_size = mem_heap_get_size(trx->lock.lock_heap);
- lock_mutex_exit();
+ {
+ TMLockMutexGuard g{SRW_LOCK_CALL};
+ n_rec_locks = trx->lock.n_rec_locks;
+ n_trx_locks = UT_LIST_GET_LEN(trx->lock.trx_locks);
+ heap_size = mem_heap_get_size(trx->lock.lock_heap);
+ }
- mutex_enter(&dict_foreign_err_mutex);
+ mysql_mutex_lock(&dict_foreign_err_mutex);
rewind(dict_foreign_err_file);
ut_print_timestamp(dict_foreign_err_file);
fputs(" Transaction:\n", dict_foreign_err_file);
@@ -719,7 +726,7 @@ row_ins_foreign_trx_print(
trx_print_low(dict_foreign_err_file, trx, 600,
n_rec_locks, n_trx_locks, heap_size);
- ut_ad(mutex_own(&dict_foreign_err_mutex));
+ mysql_mutex_assert_owner(&dict_foreign_err_mutex);
}
/*********************************************************************//**
@@ -777,7 +784,7 @@ row_ins_foreign_report_err(
}
putc('\n', ef);
- mutex_exit(&dict_foreign_err_mutex);
+ mysql_mutex_unlock(&dict_foreign_err_mutex);
}
/*********************************************************************//**
@@ -843,7 +850,7 @@ row_ins_foreign_report_add_err(
}
putc('\n', ef);
- mutex_exit(&dict_foreign_err_mutex);
+ mysql_mutex_unlock(&dict_foreign_err_mutex);
}
/*********************************************************************//**
@@ -993,7 +1000,8 @@ row_ins_foreign_check_on_constraint(
{
upd_node_t* node;
upd_node_t* cascade;
- dict_table_t* table = foreign->foreign_table;
+ dict_table_t*const*const fktable = &foreign->foreign_table;
+ dict_table_t* table = *fktable;
dict_index_t* index;
dict_index_t* clust_index;
dtuple_t* ref;
@@ -1013,8 +1021,8 @@ row_ins_foreign_check_on_constraint(
/* Since we are going to delete or update a row, we have to invalidate
the MySQL query cache for table. A deadlock of threads is not possible
here because the caller of this function does not hold any latches with
- the mutex rank above the lock_sys_t::mutex. The query cache mutex
- has a rank just above the lock_sys_t::mutex. */
+ the mutex rank above the lock_sys.latch. The query cache mutex
+ has a rank just above the lock_sys.latch. */
row_ins_invalidate_query_cache(thr, table->name.m_name);
@@ -1106,7 +1114,7 @@ row_ins_foreign_check_on_constraint(
goto nonstandard_exit_func;
}
- index = btr_pcur_get_btr_cur(pcur)->index;
+ index = pcur->index();
ut_a(index == foreign->foreign_index);
@@ -1129,9 +1137,14 @@ row_ins_foreign_check_on_constraint(
ref = row_build_row_ref(ROW_COPY_POINTERS, index, rec,
tmp_heap);
- btr_pcur_open_with_no_init(clust_index, ref,
- PAGE_CUR_LE, BTR_SEARCH_LEAF,
- cascade->pcur, mtr);
+ cascade->pcur->old_rec = nullptr;
+ cascade->pcur->btr_cur.page_cur.index = clust_index;
+ err = btr_pcur_open_with_no_init(ref,
+ PAGE_CUR_LE, BTR_SEARCH_LEAF,
+ cascade->pcur, mtr);
+ if (UNIV_UNLIKELY(err != DB_SUCCESS)) {
+ goto nonstandard_exit_func;
+ }
clust_rec = btr_pcur_get_rec(cascade->pcur);
clust_block = btr_pcur_get_block(cascade->pcur);
@@ -1161,7 +1174,7 @@ row_ins_foreign_check_on_constraint(
/* Set an X-lock on the row to delete or update in the child table */
- err = lock_table(0, table, LOCK_IX, thr);
+ err = lock_table(table, fktable, LOCK_IX, thr);
if (err == DB_SUCCESS) {
/* Here it suffices to use a LOCK_REC_NOT_GAP type lock;
@@ -1338,21 +1351,14 @@ row_ins_foreign_check_on_constraint(
err = row_update_cascade_for_mysql(thr, cascade,
foreign->foreign_table);
- /* Release the data dictionary latch for a while, so that we do not
- starve other threads from doing CREATE TABLE etc. if we have a huge
- cascaded operation running. */
-
- row_mysql_unfreeze_data_dictionary(thr_get_trx(thr));
-
- DEBUG_SYNC_C("innodb_dml_cascade_dict_unfreeze");
-
- row_mysql_freeze_data_dictionary(thr_get_trx(thr));
-
mtr_start(mtr);
/* Restore pcur position */
- btr_pcur_restore_position(BTR_SEARCH_LEAF, pcur, mtr);
+ if (pcur->restore_position(BTR_SEARCH_LEAF, mtr)
+ != btr_pcur_t::SAME_ALL) {
+ err = DB_CORRUPTION;
+ }
if (tmp_heap) {
mem_heap_free(tmp_heap);
@@ -1371,7 +1377,10 @@ nonstandard_exit_func:
mtr_commit(mtr);
mtr_start(mtr);
- btr_pcur_restore_position(BTR_SEARCH_LEAF, pcur, mtr);
+ if (pcur->restore_position(BTR_SEARCH_LEAF, mtr)
+ != btr_pcur_t::SAME_ALL && err == DB_SUCCESS) {
+ err = DB_CORRUPTION;
+ }
DBUG_RETURN(err);
}
@@ -1457,10 +1466,7 @@ row_ins_check_foreign_constraint(
dtuple_t* entry, /*!< in: index entry for index */
que_thr_t* thr) /*!< in: query thread */
{
- dberr_t err;
upd_node_t* upd_node;
- dict_table_t* check_table;
- dict_index_t* check_index;
ulint n_fields_cmp;
btr_pcur_t pcur;
int cmp;
@@ -1482,14 +1488,10 @@ row_ins_check_foreign_constraint(
upd_node= NULL;
#endif /* WITH_WSREP */
- ut_ad(rw_lock_own(&dict_sys.latch, RW_LOCK_S));
-
- err = DB_SUCCESS;
-
- if (trx->check_foreigns == FALSE) {
+ if (!trx->check_foreigns) {
/* The user has suppressed foreign key checks currently for
this session */
- goto exit_func;
+ DBUG_RETURN(DB_SUCCESS);
}
/* If any of the foreign key fields in entry is SQL NULL, we
@@ -1498,12 +1500,12 @@ row_ins_check_foreign_constraint(
for (ulint i = 0; i < entry->n_fields; i++) {
dfield_t* field = dtuple_get_nth_field(entry, i);
if (i < foreign->n_fields && dfield_is_null(field)) {
- goto exit_func;
+ DBUG_RETURN(DB_SUCCESS);
}
/* System Versioning: if row_end != Inf, we
suppress the foreign key check */
if (field->type.vers_sys_end() && field->vers_history_row()) {
- goto exit_func;
+ DBUG_RETURN(DB_SUCCESS);
}
}
@@ -1528,7 +1530,7 @@ row_ins_check_foreign_constraint(
another, and the user has problems predicting in
which order they are performed. */
- goto exit_func;
+ DBUG_RETURN(DB_SUCCESS);
}
}
@@ -1540,23 +1542,32 @@ row_ins_check_foreign_constraint(
dfield_t* row_end = dtuple_get_nth_field(
insert_node->row, table->vers_end);
if (row_end->vers_history_row()) {
- goto exit_func;
+ DBUG_RETURN(DB_SUCCESS);
}
}
}
- if (check_ref) {
- check_table = foreign->referenced_table;
- check_index = foreign->referenced_index;
- } else {
- check_table = foreign->foreign_table;
- check_index = foreign->foreign_index;
+ dict_table_t *check_table;
+ dict_index_t *check_index;
+ dberr_t err = DB_SUCCESS;
+
+ {
+ dict_table_t*& fktable = check_ref
+ ? foreign->referenced_table : foreign->foreign_table;
+ check_table = fktable;
+ if (check_table) {
+ err = lock_table(check_table, &fktable, LOCK_IS, thr);
+ if (err != DB_SUCCESS) {
+ goto do_possible_lock_wait;
+ }
+ }
+ check_table = fktable;
}
- if (check_table == NULL
- || !check_table->is_readable()
- || check_index == NULL) {
+ check_index = check_ref
+ ? foreign->referenced_index : foreign->foreign_index;
+ if (!check_table || !check_table->is_readable() || !check_index) {
FILE* ef = dict_foreign_err_file;
std::string fk_str;
@@ -1601,22 +1612,10 @@ row_ins_check_foreign_constraint(
err = DB_ROW_IS_REFERENCED;
}
- mutex_exit(&dict_foreign_err_mutex);
+ mysql_mutex_unlock(&dict_foreign_err_mutex);
goto exit_func;
}
- if (check_table != table) {
- /* We already have a LOCK_IX on table, but not necessarily
- on check_table */
-
- err = lock_table(0, check_table, LOCK_IS, thr);
-
- if (err != DB_SUCCESS) {
-
- goto do_possible_lock_wait;
- }
- }
-
mtr_start(&mtr);
/* Store old value on n_fields_cmp */
@@ -1624,9 +1623,11 @@ row_ins_check_foreign_constraint(
n_fields_cmp = dtuple_get_n_fields_cmp(entry);
dtuple_set_n_fields_cmp(entry, foreign->n_fields);
-
- btr_pcur_open(check_index, entry, PAGE_CUR_GE,
- BTR_SEARCH_LEAF, &pcur, &mtr);
+ pcur.btr_cur.page_cur.index = check_index;
+ err = btr_pcur_open(entry, PAGE_CUR_GE, BTR_SEARCH_LEAF, &pcur, &mtr);
+ if (UNIV_UNLIKELY(err != DB_SUCCESS)) {
+ goto end_scan;
+ }
/* Scan index records and check if there is a matching record */
@@ -1812,9 +1813,8 @@ row_ins_check_foreign_constraint(
}
end_scan:
- btr_pcur_close(&pcur);
-
mtr_commit(&mtr);
+ ut_free(pcur.old_rec_buf);
/* Restore old value */
dtuple_set_n_fields_cmp(entry, n_fields_cmp);
@@ -1823,29 +1823,19 @@ do_possible_lock_wait:
if (err == DB_LOCK_WAIT) {
trx->error_state = err;
- que_thr_stop_for_mysql(thr);
-
thr->lock_state = QUE_THR_LOCK_ROW;
- check_table->inc_fk_checks();
-
- lock_wait_suspend_thread(thr);
+ err = lock_wait(thr);
thr->lock_state = QUE_THR_LOCK_NOLOCK;
- err = trx->error_state;
- if (err != DB_SUCCESS) {
- } else if (check_table->to_be_dropped) {
- err = DB_LOCK_WAIT_TIMEOUT;
- } else {
+ if (err == DB_SUCCESS) {
err = DB_LOCK_WAIT;
}
-
- check_table->dec_fk_checks();
}
exit_func:
- if (heap != NULL) {
+ if (UNIV_LIKELY_NULL(heap)) {
mem_heap_free(heap);
}
@@ -1910,14 +1900,10 @@ row_ins_check_foreign_constraints(
{
dict_foreign_t* foreign;
dberr_t err = DB_SUCCESS;
- trx_t* trx;
- ibool got_s_lock = FALSE;
mem_heap_t* heap = NULL;
DBUG_ASSERT(index->is_primary() == pk);
- trx = thr_get_trx(thr);
-
DEBUG_SYNC_C_IF_THD(thr_get_trx(thr)->mysql_thd,
"foreign_constraint_check_for_ins");
@@ -1960,37 +1946,14 @@ row_ins_check_foreign_constraints(
ref_table = dict_table_open_on_name(
foreign->referenced_table_name_lookup,
- FALSE, FALSE, DICT_ERR_IGNORE_NONE);
- }
-
- if (0 == trx->dict_operation_lock_mode) {
- got_s_lock = TRUE;
-
- row_mysql_freeze_data_dictionary(trx);
+ false, DICT_ERR_IGNORE_NONE);
}
- if (referenced_table) {
- foreign->foreign_table->inc_fk_checks();
- }
-
- /* NOTE that if the thread ends up waiting for a lock
- we will release dict_sys.latch temporarily!
- But the counter on the table protects the referenced
- table from being dropped while the check is running. */
-
err = row_ins_check_foreign_constraint(
TRUE, foreign, table, ref_tuple, thr);
- if (referenced_table) {
- foreign->foreign_table->dec_fk_checks();
- }
-
- if (got_s_lock) {
- row_mysql_unfreeze_data_dictionary(trx);
- }
-
- if (ref_table != NULL) {
- dict_table_close(ref_table, FALSE, FALSE);
+ if (ref_table) {
+ dict_table_close(ref_table);
}
}
}
@@ -2118,7 +2081,6 @@ row_ins_scan_sec_index_for_duplicate(
dict_index_t* index, /*!< in: non-clustered unique index */
dtuple_t* entry, /*!< in: index entry */
que_thr_t* thr, /*!< in: query thread */
- bool s_latch,/*!< in: whether index->lock is being held */
mtr_t* mtr, /*!< in/out: mini-transaction */
mem_heap_t* offsets_heap)
/*!< in/out: memory heap that can be emptied */
@@ -2127,15 +2089,13 @@ row_ins_scan_sec_index_for_duplicate(
int cmp;
ulint n_fields_cmp;
btr_pcur_t pcur;
- dberr_t err = DB_SUCCESS;
rec_offs offsets_[REC_OFFS_SEC_INDEX_SIZE];
rec_offs* offsets = offsets_;
DBUG_ENTER("row_ins_scan_sec_index_for_duplicate");
rec_offs_init(offsets_);
- ut_ad(s_latch == rw_lock_own_flagged(
- &index->lock, RW_LOCK_FLAG_S | RW_LOCK_FLAG_SX));
+ ut_ad(!index->lock.have_any());
n_unique = dict_index_get_n_unique(index);
@@ -2158,14 +2118,13 @@ row_ins_scan_sec_index_for_duplicate(
n_fields_cmp = dtuple_get_n_fields_cmp(entry);
dtuple_set_n_fields_cmp(entry, n_unique);
-
- btr_pcur_open(index, entry, PAGE_CUR_GE,
- s_latch
- ? BTR_SEARCH_LEAF_ALREADY_S_LATCHED
- : BTR_SEARCH_LEAF,
- &pcur, mtr);
-
+ pcur.btr_cur.page_cur.index = index;
trx_t* const trx = thr_get_trx(thr);
+ dberr_t err = btr_pcur_open(entry, PAGE_CUR_GE, BTR_SEARCH_LEAF,
+ &pcur, mtr);
+ if (err != DB_SUCCESS) {
+ goto end_scan;
+ }
/* Scan index records and check if there is a duplicate */
@@ -2323,11 +2282,11 @@ row_ins_duplicate_error_in_clust_online(
dberr_t err = DB_SUCCESS;
const rec_t* rec = btr_cur_get_rec(cursor);
- ut_ad(!cursor->index->is_instant());
+ ut_ad(!cursor->index()->is_instant());
if (cursor->low_match >= n_uniq && !page_rec_is_infimum(rec)) {
- *offsets = rec_get_offsets(rec, cursor->index, *offsets,
- cursor->index->n_fields,
+ *offsets = rec_get_offsets(rec, cursor->index(), *offsets,
+ cursor->index()->n_fields,
ULINT_UNDEFINED, heap);
err = row_ins_duplicate_online(n_uniq, entry, rec, *offsets);
if (err != DB_SUCCESS) {
@@ -2335,11 +2294,13 @@ row_ins_duplicate_error_in_clust_online(
}
}
- rec = page_rec_get_next_const(btr_cur_get_rec(cursor));
+ if (!(rec = page_rec_get_next_const(btr_cur_get_rec(cursor)))) {
+ return DB_CORRUPTION;
+ }
if (cursor->up_match >= n_uniq && !page_rec_is_supremum(rec)) {
- *offsets = rec_get_offsets(rec, cursor->index, *offsets,
- cursor->index->n_fields,
+ *offsets = rec_get_offsets(rec, cursor->index(), *offsets,
+ cursor->index()->n_fields,
ULINT_UNDEFINED, heap);
err = row_ins_duplicate_online(n_uniq, entry, rec, *offsets);
}
@@ -2372,7 +2333,7 @@ row_ins_duplicate_error_in_clust(
rec_offs* offsets = offsets_;
rec_offs_init(offsets_);
- ut_ad(dict_index_is_clust(cursor->index));
+ ut_ad(cursor->index()->is_clust());
/* NOTE: For unique non-clustered indexes there may be any number
of delete marked records with the same value for the non-clustered
@@ -2387,15 +2348,17 @@ row_ins_duplicate_error_in_clust(
user records on the leaf level. So, even if low_match would suggest
that a duplicate key violation may occur, this may not be the case. */
- n_unique = dict_index_get_n_unique(cursor->index);
+ n_unique = dict_index_get_n_unique(cursor->index());
if (cursor->low_match >= n_unique) {
rec = btr_cur_get_rec(cursor);
if (!page_rec_is_infimum(rec)) {
- offsets = rec_get_offsets(rec, cursor->index, offsets,
- cursor->index->n_core_fields,
+ offsets = rec_get_offsets(rec, cursor->index(),
+ offsets,
+ cursor->index()
+ ->n_core_fields,
ULINT_UNDEFINED, &heap);
/* We set a lock on the possible duplicate: this
@@ -2416,13 +2379,13 @@ row_ins_duplicate_error_in_clust(
err = row_ins_set_exclusive_rec_lock(
LOCK_REC_NOT_GAP,
btr_cur_get_block(cursor),
- rec, cursor->index, offsets, thr);
+ rec, cursor->index(), offsets, thr);
} else {
err = row_ins_set_shared_rec_lock(
LOCK_REC_NOT_GAP,
btr_cur_get_block(cursor), rec,
- cursor->index, offsets, thr);
+ cursor->index(), offsets, thr);
}
switch (err) {
@@ -2434,11 +2397,11 @@ row_ins_duplicate_error_in_clust(
}
if (row_ins_dupl_error_with_rec(
- rec, entry, cursor->index, offsets)) {
+ rec, entry, cursor->index(), offsets)) {
duplicate:
- trx->error_info = cursor->index;
+ trx->error_info = cursor->index();
err = DB_DUPLICATE_KEY;
- if (cursor->index->table->versioned()
+ if (cursor->index()->table->versioned()
&& entry->vers_history_row())
{
ulint trx_id_len;
@@ -2455,13 +2418,17 @@ duplicate:
}
}
+ err = DB_SUCCESS;
+
if (cursor->up_match >= n_unique) {
rec = page_rec_get_next(btr_cur_get_rec(cursor));
- if (!page_rec_is_supremum(rec)) {
- offsets = rec_get_offsets(rec, cursor->index, offsets,
- cursor->index->n_core_fields,
+ if (rec && !page_rec_is_supremum(rec)) {
+ offsets = rec_get_offsets(rec, cursor->index(),
+ offsets,
+ cursor->index()
+ ->n_core_fields,
ULINT_UNDEFINED, &heap);
if (trx->duplicates) {
@@ -2474,34 +2441,33 @@ duplicate:
err = row_ins_set_exclusive_rec_lock(
LOCK_REC_NOT_GAP,
btr_cur_get_block(cursor),
- rec, cursor->index, offsets, thr);
+ rec, cursor->index(), offsets, thr);
} else {
err = row_ins_set_shared_rec_lock(
LOCK_REC_NOT_GAP,
btr_cur_get_block(cursor),
- rec, cursor->index, offsets, thr);
+ rec, cursor->index(), offsets, thr);
}
switch (err) {
+ default:
+ break;
case DB_SUCCESS_LOCKED_REC:
+ err = DB_SUCCESS;
+ /* fall through */
case DB_SUCCESS:
- break;
- default:
- goto func_exit;
- }
-
- if (row_ins_dupl_error_with_rec(
- rec, entry, cursor->index, offsets)) {
- goto duplicate;
+ if (row_ins_dupl_error_with_rec(
+ rec, entry, cursor->index(),
+ offsets)) {
+ goto duplicate;
+ }
}
}
/* This should never happen */
- ut_error;
+ err = DB_CORRUPTION;
}
-
- err = DB_SUCCESS;
func_exit:
if (UNIV_LIKELY_NULL(heap)) {
mem_heap_free(heap);
@@ -2534,7 +2500,7 @@ row_ins_must_modify_rec(
and a secondary index node pointer contains all index fields. */
return(cursor->low_match
- >= dict_index_get_n_unique_in_tree(cursor->index)
+ >= dict_index_get_n_unique_in_tree(cursor->index())
&& !page_rec_is_infimum(btr_cur_get_rec(cursor)));
}
@@ -2562,9 +2528,9 @@ row_ins_index_entry_big_rec(
mtr_t mtr;
btr_pcur_t pcur;
rec_t* rec;
- dberr_t error;
- ut_ad(dict_index_is_clust(index));
+ pcur.btr_cur.page_cur.index = index;
+ ut_ad(index->is_primary());
DEBUG_SYNC_C_IF_THD(thd, "before_row_ins_extern_latch");
@@ -2575,8 +2541,12 @@ row_ins_index_entry_big_rec(
index->set_modified(mtr);
}
- btr_pcur_open(index, entry, PAGE_CUR_LE, BTR_MODIFY_TREE,
- &pcur, &mtr);
+ dberr_t error = btr_pcur_open(entry, PAGE_CUR_LE, BTR_MODIFY_TREE,
+ &pcur, &mtr);
+ if (error != DB_SUCCESS) {
+ return error;
+ }
+
rec = btr_pcur_get_rec(&pcur);
offsets = rec_get_offsets(rec, index, offsets, index->n_core_fields,
ULINT_UNDEFINED, heap);
@@ -2586,18 +2556,25 @@ row_ins_index_entry_big_rec(
&pcur, offsets, big_rec, &mtr, BTR_STORE_INSERT);
DEBUG_SYNC_C_IF_THD(thd, "after_row_ins_extern");
- if (error == DB_SUCCESS
- && dict_index_is_online_ddl(index)) {
- row_log_table_insert(btr_pcur_get_rec(&pcur), index, offsets);
- }
-
mtr.commit();
- btr_pcur_close(&pcur);
-
+ ut_free(pcur.old_rec_buf);
return(error);
}
+#if 0
+extern "C" int thd_is_slave(const MYSQL_THD thd);
+#else
+# define thd_is_slave(thd) 0
+#endif
+
+#if defined __aarch64__&&defined __GNUC__&&__GNUC__==4&&!defined __clang__
+/* Avoid GCC 4.8.5 internal compiler error due to srw_mutex::wr_unlock().
+We would only need this for row_ins_clust_index_entry_low(),
+but GCC 4.8.5 does not support pop_options. */
+# pragma GCC optimize ("O0")
+#endif
+
/***************************************************************//**
Tries to insert an entry into a clustered index, ignoring foreign key
constraints. If a record with the same unique key is found, the other
@@ -2613,7 +2590,7 @@ dberr_t
row_ins_clust_index_entry_low(
/*==========================*/
ulint flags, /*!< in: undo logging and locking flags */
- ulint mode, /*!< in: BTR_MODIFY_LEAF or BTR_MODIFY_TREE,
+ btr_latch_mode mode, /*!< in: BTR_MODIFY_LEAF or BTR_MODIFY_TREE,
depending on whether we wish optimistic or
pessimistic descent down the index tree */
dict_index_t* index, /*!< in: clustered index */
@@ -2623,15 +2600,16 @@ row_ins_clust_index_entry_low(
que_thr_t* thr) /*!< in: query thread */
{
btr_pcur_t pcur;
- btr_cur_t* cursor;
dberr_t err = DB_SUCCESS;
big_rec_t* big_rec = NULL;
mtr_t mtr;
- ib_uint64_t auto_inc = 0;
+ uint64_t auto_inc = 0;
mem_heap_t* offsets_heap = NULL;
rec_offs offsets_[REC_OFFS_NORMAL_SIZE];
rec_offs* offsets = offsets_;
rec_offs_init(offsets_);
+ trx_t* trx = thr_get_trx(thr);
+ buf_block_t* block;
DBUG_ENTER("row_ins_clust_index_entry_low");
@@ -2639,9 +2617,9 @@ row_ins_clust_index_entry_low(
ut_ad(!dict_index_is_unique(index)
|| n_uniq == dict_index_get_n_unique(index));
ut_ad(!n_uniq || n_uniq == dict_index_get_n_unique(index));
- ut_ad(!thr_get_trx(thr)->in_rollback);
+ ut_ad(!trx->in_rollback);
- mtr_start(&mtr);
+ mtr.start();
if (index->table->is_temporary()) {
/* Disable REDO logging as the lifetime of temp-tables is
@@ -2664,7 +2642,7 @@ row_ins_clust_index_entry_low(
} else {
if (mode == BTR_MODIFY_LEAF
&& dict_index_is_online_ddl(index)) {
- mode = BTR_MODIFY_LEAF_ALREADY_S_LATCHED;
+ mode = BTR_MODIFY_LEAF_ALREADY_LATCHED;
mtr_s_lock_index(index, &mtr);
}
@@ -2681,6 +2659,13 @@ row_ins_clust_index_entry_low(
dfield->type.mtype,
dfield->type.prtype
& DATA_UNSIGNED);
+ if (auto_inc
+ && mode != BTR_MODIFY_TREE) {
+ mode = btr_latch_mode(
+ BTR_MODIFY_ROOT_AND_LEAF
+ ^ BTR_MODIFY_LEAF
+ ^ mode);
+ }
}
}
}
@@ -2689,20 +2674,27 @@ row_ins_clust_index_entry_low(
/* Note that we use PAGE_CUR_LE as the search mode, because then
the function will return in both low_match and up_match of the
cursor sensible values */
- err = btr_pcur_open_low(index, 0, entry, PAGE_CUR_LE, mode, &pcur,
- __FILE__, __LINE__, auto_inc, &mtr);
+ pcur.btr_cur.page_cur.index = index;
+ err = btr_pcur_open(entry, PAGE_CUR_LE, mode, &pcur, &mtr);
if (err != DB_SUCCESS) {
index->table->file_unreadable = true;
+err_exit:
mtr.commit();
goto func_exit;
}
- cursor = btr_pcur_get_btr_cur(&pcur);
- cursor->thr = thr;
+ if (auto_inc) {
+ buf_block_t* root
+ = mtr.at_savepoint(mode != BTR_MODIFY_ROOT_AND_LEAF);
+ ut_ad(index->page == root->page.id().page_no());
+ page_set_autoinc(root, auto_inc, &mtr, false);
+ }
+
+ btr_pcur_get_btr_cur(&pcur)->thr = thr;
#ifdef UNIV_DEBUG
{
- page_t* page = btr_cur_get_page(cursor);
+ page_t* page = btr_pcur_get_page(&pcur);
rec_t* first_rec = page_rec_get_next(
page_get_infimum_rec(page));
@@ -2711,31 +2703,91 @@ row_ins_clust_index_entry_low(
}
#endif /* UNIV_DEBUG */
+ block = btr_pcur_get_block(&pcur);
+
+ DBUG_EXECUTE_IF("row_ins_row_level", goto skip_bulk_insert;);
+
+ if (!(flags & BTR_NO_UNDO_LOG_FLAG)
+ && page_is_empty(block->page.frame)
+ && !entry->is_metadata() && !trx->duplicates
+ && !trx->check_unique_secondary && !trx->check_foreigns
+ && !trx->dict_operation
+ && block->page.id().page_no() == index->page
+ && !index->table->skip_alter_undo
+ && !index->table->n_rec_locks
+ && !index->table->is_active_ddl()
+ && !index->table->versioned()
+ && !thd_is_slave(trx->mysql_thd) /* FIXME: MDEV-24622 */) {
+ DEBUG_SYNC_C("empty_root_page_insert");
+
+ if (!index->table->is_temporary()) {
+ err = lock_table(index->table, NULL, LOCK_X, thr);
+
+ if (err != DB_SUCCESS) {
+ trx->error_state = err;
+ goto err_exit;
+ }
+
+ if (index->table->n_rec_locks) {
+ goto skip_bulk_insert;
+ }
+
+#if 0
+ if (trx->is_wsrep())
+ {
+ if (!wsrep_thd_is_local_transaction(trx->mysql_thd))
+ goto skip_bulk_insert;
+ if (wsrep_append_table_key(trx->mysql_thd, *index->table))
+ {
+ trx->error_state = DB_ROLLBACK;
+ goto err_exit;
+ }
+ }
+#endif /* WITH_WSREP */
+
+#ifdef BTR_CUR_HASH_ADAPT
+ if (btr_search_enabled) {
+ btr_search_x_lock_all();
+ index->table->bulk_trx_id = trx->id;
+ btr_search_x_unlock_all();
+ } else {
+ index->table->bulk_trx_id = trx->id;
+ }
+#else /* BTR_CUR_HASH_ADAPT */
+ index->table->bulk_trx_id = trx->id;
+#endif /* BTR_CUR_HASH_ADAPT */
+ }
+
+ trx->bulk_insert = true;
+ }
+
+skip_bulk_insert:
if (UNIV_UNLIKELY(entry->info_bits != 0)) {
ut_ad(entry->is_metadata());
ut_ad(flags == BTR_NO_LOCKING_FLAG);
ut_ad(index->is_instant());
ut_ad(!dict_index_is_online_ddl(index));
- const rec_t* rec = btr_cur_get_rec(cursor);
+ const rec_t* rec = btr_pcur_get_rec(&pcur);
if (rec_get_info_bits(rec, page_rec_is_comp(rec))
& REC_INFO_MIN_REC_FLAG) {
- thr_get_trx(thr)->error_info = index;
+ trx->error_info = index;
err = DB_DUPLICATE_KEY;
goto err_exit;
}
- ut_ad(!row_ins_must_modify_rec(cursor));
+ ut_ad(!row_ins_must_modify_rec(&pcur.btr_cur));
goto do_insert;
}
- if (rec_is_metadata(btr_cur_get_rec(cursor), *index)) {
+ if (rec_is_metadata(btr_pcur_get_rec(&pcur), *index)) {
goto do_insert;
}
if (n_uniq
- && (cursor->up_match >= n_uniq || cursor->low_match >= n_uniq)) {
+ && (pcur.btr_cur.up_match >= n_uniq
+ || pcur.btr_cur.low_match >= n_uniq)) {
if (flags
== (BTR_CREATE_FLAG | BTR_NO_LOCKING_FLAG
@@ -2743,7 +2795,7 @@ row_ins_clust_index_entry_low(
/* Set no locks when applying log
in online table rebuild. Only check for duplicates. */
err = row_ins_duplicate_error_in_clust_online(
- n_uniq, entry, cursor,
+ n_uniq, entry, &pcur.btr_cur,
&offsets, &offsets_heap);
switch (err) {
@@ -2754,26 +2806,24 @@ row_ins_clust_index_entry_low(
/* fall through */
case DB_SUCCESS_LOCKED_REC:
case DB_DUPLICATE_KEY:
- thr_get_trx(thr)->error_info = cursor->index;
+ trx->error_info = index;
}
} else {
/* Note that the following may return also
DB_LOCK_WAIT */
err = row_ins_duplicate_error_in_clust(
- flags, cursor, entry, thr);
+ flags, &pcur.btr_cur, entry, thr);
}
if (err != DB_SUCCESS) {
-err_exit:
- mtr_commit(&mtr);
- goto func_exit;
+ goto err_exit;
}
}
/* Note: Allowing duplicates would qualify for modification of
an existing record as the new entry is exactly same as old entry. */
- if (row_ins_must_modify_rec(cursor)) {
+ if (row_ins_must_modify_rec(&pcur.btr_cur)) {
/* There is already an index entry with a long enough common
prefix, we must convert the insert into a modify of an
existing record */
@@ -2783,11 +2833,6 @@ err_exit:
&pcur, flags, mode, &offsets, &offsets_heap,
entry_heap, entry, thr, &mtr);
- if (err == DB_SUCCESS && dict_index_is_online_ddl(index)) {
- row_log_table_insert(btr_cur_get_rec(cursor),
- index, offsets);
- }
-
mtr_commit(&mtr);
mem_heap_free(entry_heap);
} else {
@@ -2796,10 +2841,13 @@ do_insert:
rec_t* insert_rec;
if (mode != BTR_MODIFY_TREE) {
- ut_ad((mode & ulint(~BTR_ALREADY_S_LATCHED))
- == BTR_MODIFY_LEAF);
+ ut_ad(mode == BTR_MODIFY_LEAF
+ || mode == BTR_MODIFY_LEAF_ALREADY_LATCHED
+ || mode == BTR_MODIFY_ROOT_AND_LEAF
+ || mode
+ == BTR_MODIFY_ROOT_AND_LEAF_ALREADY_LATCHED);
err = btr_cur_optimistic_insert(
- flags, cursor, &offsets, &offsets_heap,
+ flags, &pcur.btr_cur, &offsets, &offsets_heap,
entry, &insert_rec, &big_rec,
n_ext, thr, &mtr);
} else {
@@ -2808,26 +2856,24 @@ do_insert:
goto err_exit;
}
- DEBUG_SYNC_C("before_insert_pessimitic_row_ins_clust");
-
err = btr_cur_optimistic_insert(
- flags, cursor,
+ flags, &pcur.btr_cur,
&offsets, &offsets_heap,
entry, &insert_rec, &big_rec,
n_ext, thr, &mtr);
if (err == DB_FAIL) {
err = btr_cur_pessimistic_insert(
- flags, cursor,
+ flags, &pcur.btr_cur,
&offsets, &offsets_heap,
entry, &insert_rec, &big_rec,
n_ext, thr, &mtr);
}
}
- if (big_rec != NULL) {
- mtr_commit(&mtr);
+ mtr.commit();
+ if (big_rec) {
/* Online table rebuild could read (and
ignore) the incomplete record at this point.
If online rebuild is in progress, the
@@ -2838,16 +2884,8 @@ do_insert:
log_write_up_to(mtr.commit_lsn(), true););
err = row_ins_index_entry_big_rec(
entry, big_rec, offsets, &offsets_heap, index,
- thr_get_trx(thr)->mysql_thd);
+ trx->mysql_thd);
dtuple_convert_back_big_rec(index, entry, big_rec);
- } else {
- if (err == DB_SUCCESS
- && dict_index_is_online_ddl(index)) {
- row_log_table_insert(
- insert_rec, index, offsets);
- }
-
- mtr_commit(&mtr);
}
}
@@ -2856,24 +2894,14 @@ func_exit:
mem_heap_free(offsets_heap);
}
- btr_pcur_close(&pcur);
-
+ ut_free(pcur.old_rec_buf);
DBUG_RETURN(err);
}
-/** Start a mini-transaction and check if the index will be dropped.
+/** Start a mini-transaction.
@param[in,out] mtr mini-transaction
-@param[in,out] index secondary index
-@param[in] check whether to check
-@param[in] search_mode flags
-@return true if the index is to be dropped */
-static MY_ATTRIBUTE((warn_unused_result))
-bool
-row_ins_sec_mtr_start_and_check_if_aborted(
- mtr_t* mtr,
- dict_index_t* index,
- bool check,
- ulint search_mode)
+@param[in,out] index secondary index */
+static void row_ins_sec_mtr_start(mtr_t *mtr, dict_index_t *index)
{
ut_ad(!dict_index_is_clust(index));
ut_ad(mtr->is_named_space(index->table->space));
@@ -2883,30 +2911,6 @@ row_ins_sec_mtr_start_and_check_if_aborted(
mtr->start();
index->set_modified(*mtr);
mtr->set_log_mode(log_mode);
-
- if (!check) {
- return(false);
- }
-
- if (search_mode & BTR_ALREADY_S_LATCHED) {
- mtr_s_lock_index(index, mtr);
- } else {
- mtr_sx_lock_index(index, mtr);
- }
-
- switch (index->online_status) {
- case ONLINE_INDEX_ABORTED:
- case ONLINE_INDEX_ABORTED_DROPPED:
- ut_ad(!index->is_committed());
- return(true);
- case ONLINE_INDEX_COMPLETE:
- return(false);
- case ONLINE_INDEX_CREATION:
- break;
- }
-
- ut_error;
- return(true);
}
/***************************************************************//**
@@ -2915,13 +2919,13 @@ same fields is found, the other record is necessarily marked deleted.
It is then unmarked. Otherwise, the entry is just inserted to the index.
@retval DB_SUCCESS on success
@retval DB_LOCK_WAIT on lock wait when !(flags & BTR_NO_LOCKING_FLAG)
-@retval DB_FAIL if retry with BTR_MODIFY_TREE is needed
+@retval DB_FAIL if retry with BTR_INSERT_TREE is needed
@return error code */
dberr_t
row_ins_sec_index_entry_low(
/*========================*/
ulint flags, /*!< in: undo logging and locking flags */
- ulint mode, /*!< in: BTR_MODIFY_LEAF or BTR_MODIFY_TREE,
+ btr_latch_mode mode, /*!< in: BTR_MODIFY_LEAF or BTR_INSERT_TREE,
depending on whether we wish optimistic or
pessimistic descent down the index tree */
dict_index_t* index, /*!< in: secondary index */
@@ -2936,8 +2940,8 @@ row_ins_sec_index_entry_low(
DBUG_ENTER("row_ins_sec_index_entry_low");
btr_cur_t cursor;
- ulint search_mode = mode;
- dberr_t err = DB_SUCCESS;
+ btr_latch_mode search_mode = mode;
+ dberr_t err;
ulint n_unique;
mtr_t mtr;
rec_offs offsets_[REC_OFFS_NORMAL_SIZE];
@@ -2946,10 +2950,11 @@ row_ins_sec_index_entry_low(
rtr_info_t rtr_info;
ut_ad(!dict_index_is_clust(index));
- ut_ad(mode == BTR_MODIFY_LEAF || mode == BTR_MODIFY_TREE);
+ ut_ad(mode == BTR_MODIFY_LEAF || mode == BTR_INSERT_TREE);
cursor.thr = thr;
cursor.rtr_info = NULL;
+ cursor.page_cur.index = index;
ut_ad(thr_get_trx(thr)->id != 0);
mtr.start();
@@ -2961,53 +2966,22 @@ row_ins_sec_index_entry_low(
mtr.set_log_mode(MTR_LOG_NO_REDO);
} else {
index->set_modified(mtr);
- if (!dict_index_is_spatial(index)) {
- search_mode |= BTR_INSERT;
- }
- }
-
- /* Ensure that we acquire index->lock when inserting into an
- index with index->online_status == ONLINE_INDEX_COMPLETE, but
- could still be subject to rollback_inplace_alter_table().
- This prevents a concurrent change of index->online_status.
- The memory object cannot be freed as long as we have an open
- reference to the table, or index->table->n_ref_count > 0. */
- const bool check = !index->is_committed();
- if (check) {
- DEBUG_SYNC_C("row_ins_sec_index_enter");
- if (mode == BTR_MODIFY_LEAF) {
- search_mode |= BTR_ALREADY_S_LATCHED;
- mtr_s_lock_index(index, &mtr);
- } else {
- mtr_sx_lock_index(index, &mtr);
- }
-
- if (row_log_online_op_try(
- index, entry, thr_get_trx(thr)->id)) {
- goto func_exit;
- }
}
/* Note that we use PAGE_CUR_LE as the search mode, because then
the function will return in both low_match and up_match of the
cursor sensible values */
- if (!thr_get_trx(thr)->check_unique_secondary) {
- search_mode |= BTR_IGNORE_SEC_UNIQUE;
- }
-
- if (dict_index_is_spatial(index)) {
- cursor.index = index;
+ if (index->is_spatial()) {
rtr_init_rtr_info(&rtr_info, false, &cursor, index, false);
rtr_info_update_btr(&cursor, &rtr_info);
- err = btr_cur_search_to_nth_level(
- index, 0, entry, PAGE_CUR_RTREE_INSERT,
- search_mode,
- &cursor, __FILE__, __LINE__, &mtr);
+ err = rtr_insert_leaf(&cursor, entry, search_mode, &mtr);
- if (mode == BTR_MODIFY_LEAF && rtr_info.mbr_adj) {
+ if (err == DB_SUCCESS && search_mode == BTR_MODIFY_LEAF
+ && rtr_info.mbr_adj) {
mtr_commit(&mtr);
+ search_mode = mode = BTR_MODIFY_TREE;
rtr_clean_rtr_info(&rtr_info, true);
rtr_init_rtr_info(&rtr_info, false, &cursor,
index, false);
@@ -3018,13 +2992,8 @@ row_ins_sec_index_entry_low(
} else {
index->set_modified(mtr);
}
- search_mode &= ulint(~BTR_MODIFY_LEAF);
- search_mode |= BTR_MODIFY_TREE;
- err = btr_cur_search_to_nth_level(
- index, 0, entry, PAGE_CUR_RTREE_INSERT,
- search_mode,
- &cursor, __FILE__, __LINE__, &mtr);
- mode = BTR_MODIFY_TREE;
+ err = rtr_insert_leaf(&cursor, entry,
+ search_mode, &mtr);
}
DBUG_EXECUTE_IF(
@@ -3032,21 +3001,21 @@ row_ins_sec_index_entry_low(
goto func_exit;});
} else {
- err = btr_cur_search_to_nth_level(
- index, 0, entry, PAGE_CUR_LE,
- search_mode,
- &cursor, __FILE__, __LINE__, &mtr);
+ if (!index->table->is_temporary()) {
+ search_mode = btr_latch_mode(
+ search_mode
+ | (thr_get_trx(thr)->check_unique_secondary
+ ? BTR_INSERT | BTR_IGNORE_SEC_UNIQUE
+ : BTR_INSERT));
+ }
+
+ err = cursor.search_leaf(entry, PAGE_CUR_LE, search_mode,
+ &mtr);
}
if (err != DB_SUCCESS) {
if (err == DB_DECRYPTION_FAILED) {
- ib_push_warning(thr_get_trx(thr)->mysql_thd,
- DB_DECRYPTION_FAILED,
- "Table %s is encrypted but encryption service or"
- " used key_id is not available. "
- " Can't continue reading table.",
- index->table->name.m_name);
- index->table->file_unreadable = true;
+ btr_decryption_failed(*index);
}
goto func_exit;
}
@@ -3076,13 +3045,10 @@ row_ins_sec_index_entry_low(
DEBUG_SYNC_C("row_ins_sec_index_unique");
- if (row_ins_sec_mtr_start_and_check_if_aborted(
- &mtr, index, check, search_mode)) {
- goto func_exit;
- }
+ row_ins_sec_mtr_start(&mtr, index);
err = row_ins_scan_sec_index_for_duplicate(
- flags, index, entry, thr, check, &mtr, offsets_heap);
+ flags, index, entry, thr, &mtr, offsets_heap);
mtr_commit(&mtr);
@@ -3093,9 +3059,7 @@ row_ins_sec_index_entry_low(
if (!index->is_committed()) {
ut_ad(!thr_get_trx(thr)
->dict_operation_lock_mode);
- mutex_enter(&dict_sys.mutex);
- dict_set_corrupted_index_cache_only(index);
- mutex_exit(&dict_sys.mutex);
+ index->type |= DICT_CORRUPT;
/* Do not return any error to the
caller. The duplicate will be reported
by ALTER TABLE or CREATE UNIQUE INDEX.
@@ -3113,10 +3077,7 @@ row_ins_sec_index_entry_low(
DBUG_RETURN(err);
}
- if (row_ins_sec_mtr_start_and_check_if_aborted(
- &mtr, index, check, search_mode)) {
- goto func_exit;
- }
+ row_ins_sec_mtr_start(&mtr, index);
DEBUG_SYNC_C("row_ins_sec_index_entry_dup_locks_created");
@@ -3124,12 +3085,16 @@ row_ins_sec_index_entry_low(
locked with s-locks the necessary records to
prevent any insertion of a duplicate by another
transaction. Let us now reposition the cursor and
- continue the insertion. */
- btr_cur_search_to_nth_level(
- index, 0, entry, PAGE_CUR_LE,
- (search_mode
- & ~(BTR_INSERT | BTR_IGNORE_SEC_UNIQUE)),
- &cursor, __FILE__, __LINE__, &mtr);
+ continue the insertion (bypassing the change buffer). */
+ err = cursor.search_leaf(
+ entry, PAGE_CUR_LE,
+ btr_latch_mode(search_mode
+ & ~(BTR_INSERT
+ | BTR_IGNORE_SEC_UNIQUE)),
+ &mtr);
+ if (err != DB_SUCCESS) {
+ goto func_exit;
+ }
}
if (row_ins_must_modify_rec(&cursor)) {
@@ -3164,7 +3129,6 @@ row_ins_sec_index_entry_low(
err = rtr_ins_enlarge_mbr(&cursor, &mtr);
}
} else {
- ut_ad(mode == BTR_MODIFY_TREE);
if (buf_pool.running_out()) {
err = DB_LOCK_TABLE_FULL;
goto func_exit;
@@ -3367,7 +3331,7 @@ row_ins_sec_index_entry(
log_free_check();
err = row_ins_sec_index_entry_low(
- flags, BTR_MODIFY_TREE, index,
+ flags, BTR_INSERT_TREE, index,
offsets_heap, heap, entry, 0, thr);
}
@@ -3582,22 +3546,9 @@ row_ins_alloc_row_id_step(
/*======================*/
ins_node_t* node) /*!< in: row insert node */
{
- row_id_t row_id;
-
- ut_ad(node->state == INS_NODE_ALLOC_ROW_ID);
-
- if (dict_index_is_unique(dict_table_get_first_index(node->table))) {
-
- /* No row id is stored if the clustered index is unique */
-
- return;
- }
-
- /* Fill in row id value to row */
-
- row_id = dict_sys_get_new_row_id();
-
- dict_sys_write_row_id(node->sys_buf, row_id);
+ ut_ad(node->state == INS_NODE_ALLOC_ROW_ID);
+ if (dict_table_get_first_index(node->table)->is_gen_clust())
+ dict_sys_write_row_id(node->sys_buf, dict_sys.get_new_row_id());
}
/***********************************************************//**
@@ -3701,23 +3652,14 @@ row_ins(
ut_ad(node->state == INS_NODE_INSERT_ENTRIES);
- while (node->index != NULL) {
- if (!(node->index->type & DICT_FTS)) {
- dberr_t err = row_ins_index_entry_step(node, thr);
-
- if (err != DB_SUCCESS) {
- DBUG_RETURN(err);
- }
+ while (dict_index_t *index = node->index) {
+ if (index->type & (DICT_FTS | DICT_CORRUPT)
+ || !index->is_committed()) {
+ } else if (dberr_t err = row_ins_index_entry_step(node, thr)) {
+ DBUG_RETURN(err);
}
-
- node->index = dict_table_get_next_index(node->index);
+ node->index = dict_table_get_next_index(index);
++node->entry;
-
- /* Skip corrupted secondary index and its entry */
- while (node->index && node->index->is_corrupted()) {
- node->index = dict_table_get_next_index(node->index);
- ++node->entry;
- }
}
ut_ad(node->entry == node->entry_list.end());
@@ -3788,10 +3730,6 @@ row_ins_step(
goto do_insert;
}
- if (UNIV_LIKELY(!node->table->skip_alter_undo)) {
- trx_write_trx_id(&node->sys_buf[DATA_TRX_ID_LEN], trx->id);
- }
-
if (node->state == INS_NODE_SET_IX_LOCK) {
node->state = INS_NODE_ALLOC_ROW_ID;
@@ -3809,13 +3747,13 @@ row_ins_step(
goto same_trx;
}
- err = lock_table(0, node->table, LOCK_IX, thr);
+ err = lock_table(node->table, NULL, LOCK_IX, thr);
DBUG_EXECUTE_IF("ib_row_ins_ix_lock_wait",
err = DB_LOCK_WAIT;);
if (err != DB_SUCCESS) {
-
+ node->state = INS_NODE_SET_IX_LOCK;
goto error_handling;
}
diff --git a/storage/innobase/row/row0log.cc b/storage/innobase/row/row0log.cc
index b8b4bd56239..b21ff2b9f86 100644
--- a/storage/innobase/row/row0log.cc
+++ b/storage/innobase/row/row0log.cc
@@ -1,7 +1,7 @@
/*****************************************************************************
Copyright (c) 2011, 2018, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2017, 2022, MariaDB Corporation.
+Copyright (c) 2017, 2023, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -85,79 +85,6 @@ struct row_log_buf_t {
row_log_apply(). */
};
-/** Tracks BLOB allocation during online ALTER TABLE */
-class row_log_table_blob_t {
-public:
- /** Constructor (declaring a BLOB freed)
- @param offset_arg row_log_t::tail::total */
-#ifdef UNIV_DEBUG
- row_log_table_blob_t(ulonglong offset_arg) :
- old_offset (0), free_offset (offset_arg),
- offset (BLOB_FREED) {}
-#else /* UNIV_DEBUG */
- row_log_table_blob_t() :
- offset (BLOB_FREED) {}
-#endif /* UNIV_DEBUG */
-
- /** Declare a BLOB freed again.
- @param offset_arg row_log_t::tail::total */
-#ifdef UNIV_DEBUG
- void blob_free(ulonglong offset_arg)
-#else /* UNIV_DEBUG */
- void blob_free()
-#endif /* UNIV_DEBUG */
- {
- ut_ad(offset < offset_arg);
- ut_ad(offset != BLOB_FREED);
- ut_d(old_offset = offset);
- ut_d(free_offset = offset_arg);
- offset = BLOB_FREED;
- }
- /** Declare a freed BLOB reused.
- @param offset_arg row_log_t::tail::total */
- void blob_alloc(ulonglong offset_arg) {
- ut_ad(free_offset <= offset_arg);
- ut_d(old_offset = offset);
- offset = offset_arg;
- }
- /** Determine if a BLOB was freed at a given log position
- @param offset_arg row_log_t::head::total after the log record
- @return true if freed */
- bool is_freed(ulonglong offset_arg) const {
- /* This is supposed to be the offset at the end of the
- current log record. */
- ut_ad(offset_arg > 0);
- /* We should never get anywhere close the magic value. */
- ut_ad(offset_arg < BLOB_FREED);
- return(offset_arg < offset);
- }
-private:
- /** Magic value for a freed BLOB */
- static const ulonglong BLOB_FREED = ~0ULL;
-#ifdef UNIV_DEBUG
- /** Old offset, in case a page was freed, reused, freed, ... */
- ulonglong old_offset;
- /** Offset of last blob_free() */
- ulonglong free_offset;
-#endif /* UNIV_DEBUG */
- /** Byte offset to the log file */
- ulonglong offset;
-};
-
-/** @brief Map of off-page column page numbers to 0 or log byte offsets.
-
-If there is no mapping for a page number, it is safe to access.
-If a page number maps to 0, it is an off-page column that has been freed.
-If a page number maps to a nonzero number, the number is a byte offset
-into the index->online_log, indicating that the page is safe to access
-when applying log records starting from that offset. */
-typedef std::map<
- ulint,
- row_log_table_blob_t,
- std::less<ulint>,
- ut_allocator<std::pair<const ulint, row_log_table_blob_t> > >
- page_no_map;
-
/** @brief Buffer for logging modifications during online index creation
All modifications to an index that is being created will be logged by
@@ -172,12 +99,8 @@ directly. When also head.bytes == tail.bytes, both counts will be
reset to 0 and the file will be truncated. */
struct row_log_t {
pfs_os_file_t fd; /*!< file descriptor */
- ib_mutex_t mutex; /*!< mutex protecting error,
+ mysql_mutex_t mutex; /*!< mutex protecting error,
max_trx and tail */
- page_no_map* blobs; /*!< map of page numbers of off-page columns
- that have been freed during table-rebuilding
- ALTER TABLE (row_log_table_*); protected by
- index->lock X-latch only */
dict_table_t* table; /*!< table that is being rebuilt,
or NULL when this is a secondary
index that is being created online */
@@ -237,6 +160,11 @@ struct row_log_t {
const TABLE* old_table; /*< Use old table in case of error. */
uint64_t n_rows; /*< Number of rows read from the table */
+
+ /** Alter table transaction. It can be used to apply the DML logs
+ into the table */
+ const trx_t* alter_trx;
+
/** Determine whether the log should be in the 'instant ADD' format
@param[in] index the clustered index of the source table
@return whether to use the 'instant ADD COLUMN' format */
@@ -322,15 +250,14 @@ row_log_block_free(
DBUG_VOID_RETURN;
}
-/******************************************************//**
-Logs an operation to a secondary index that is (or was) being created. */
-void
-row_log_online_op(
-/*==============*/
- dict_index_t* index, /*!< in/out: index, S or X latched */
- const dtuple_t* tuple, /*!< in: index tuple */
- trx_id_t trx_id) /*!< in: transaction ID for insert,
- or 0 for delete */
+/** Logs an operation to a secondary index that is (or was) being created.
+@param index index, S or X latched
+@param tuple index tuple
+@param trx_id transaction ID for insert, or 0 for delete
+@retval false if row_log_apply() failure happens
+or true otherwise */
+bool row_log_online_op(dict_index_t *index, const dtuple_t *tuple,
+ trx_id_t trx_id)
{
byte* b;
ulint extra_size;
@@ -338,17 +265,19 @@ row_log_online_op(
ulint mrec_size;
ulint avail_size;
row_log_t* log;
+ bool success= true;
ut_ad(dtuple_validate(tuple));
ut_ad(dtuple_get_n_fields(tuple) == dict_index_get_n_fields(index));
- ut_ad(rw_lock_own_flagged(&index->lock,
- RW_LOCK_FLAG_X | RW_LOCK_FLAG_S));
+ ut_ad(index->lock.have_x() || index->lock.have_s());
if (index->is_corrupted()) {
- return;
+ return success;
}
- ut_ad(dict_index_is_online_ddl(index));
+ ut_ad(dict_index_is_online_ddl(index)
+ || (index->online_log
+ && index->online_status == ONLINE_INDEX_COMPLETE));
/* Compute the size of the record. This differs from
row_merge_buf_encode(), because here we do not encode
@@ -364,8 +293,9 @@ row_log_online_op(
+ (trx_id ? DATA_TRX_ID_LEN : 0);
log = index->online_log;
- mutex_enter(&log->mutex);
+ mysql_mutex_lock(&log->mutex);
+start_log:
if (trx_id > log->max_trx) {
log->max_trx = trx_id;
}
@@ -404,6 +334,7 @@ row_log_online_op(
rec_convert_dtuple_to_temp<false>(
b + extra_size, index, tuple->fields, tuple->n_fields);
+
b += size;
if (mrec_size >= avail_size) {
@@ -413,7 +344,28 @@ row_log_online_op(
byte* buf = log->tail.block;
if (byte_offset + srv_sort_buf_size >= srv_online_max_size) {
- goto write_failed;
+ if (index->online_status != ONLINE_INDEX_COMPLETE)
+ goto write_failed;
+ /* About to run out of log, InnoDB has to
+ apply the online log for the completed index */
+ index->lock.s_unlock();
+ dberr_t error= row_log_apply(
+ log->alter_trx, index, nullptr, nullptr);
+ index->lock.s_lock(SRW_LOCK_CALL);
+ if (error != DB_SUCCESS) {
+ /* Mark all newly added indexes
+ as corrupted */
+ log->error = error;
+ success = false;
+ goto err_exit;
+ }
+
+ /* Recheck whether the index online log */
+ if (!index->online_log) {
+ goto err_exit;
+ }
+
+ goto start_log;
}
if (mrec_size == avail_size) {
@@ -453,9 +405,6 @@ row_log_online_op(
buf, byte_offset, srv_sort_buf_size)
!= DB_SUCCESS) {
write_failed:
- /* We set the flag directly instead of invoking
- dict_set_corrupted_index_cache_only(index) here,
- because the index is not "public" yet. */
index->type |= DICT_CORRUPT;
}
@@ -472,7 +421,8 @@ write_failed:
MEM_UNDEFINED(log->tail.buf, sizeof log->tail.buf);
err_exit:
- mutex_exit(&log->mutex);
+ mysql_mutex_unlock(&log->mutex);
+ return success;
}
/******************************************************//**
@@ -500,13 +450,13 @@ row_log_table_open(
ulint size, /*!< in: size of log record */
ulint* avail) /*!< out: available size for log record */
{
- mutex_enter(&log->mutex);
+ mysql_mutex_lock(&log->mutex);
MEM_UNDEFINED(log->tail.buf, sizeof log->tail.buf);
if (log->error != DB_SUCCESS) {
err_exit:
- mutex_exit(&log->mutex);
+ mysql_mutex_unlock(&log->mutex);
return(NULL);
}
@@ -542,7 +492,7 @@ row_log_table_close_func(
{
row_log_t* log = index->online_log;
- ut_ad(mutex_own(&log->mutex));
+ mysql_mutex_assert_owner(&log->mutex);
if (size >= avail) {
const os_offset_t byte_offset
@@ -606,7 +556,7 @@ write_failed:
log->tail.total += size;
MEM_UNDEFINED(log->tail.buf, sizeof log->tail.buf);
err_exit:
- mutex_exit(&log->mutex);
+ mysql_mutex_unlock(&log->mutex);
onlineddl_rowlog_rows++;
/* 10000 means 100.00%, 4525 means 45.25% */
@@ -660,9 +610,7 @@ row_log_table_delete(
ut_ad(rec_offs_validate(rec, index, offsets));
ut_ad(rec_offs_n_fields(offsets) == dict_index_get_n_fields(index));
ut_ad(rec_offs_size(offsets) <= sizeof index->online_log->tail.buf);
- ut_ad(rw_lock_own_flagged(
- &index->lock,
- RW_LOCK_FLAG_S | RW_LOCK_FLAG_X | RW_LOCK_FLAG_SX));
+ ut_ad(index->lock.have_any());
if (index->online_status != ONLINE_INDEX_CREATION
|| (index->type & DICT_CORRUPT) || index->table->corrupted
@@ -798,7 +746,6 @@ row_log_table_low_redundant(
dtuple_t* tuple;
const ulint n_fields = rec_get_n_fields_old(rec);
- ut_ad(!page_is_comp(page_align(rec)));
ut_ad(index->n_fields >= n_fields);
ut_ad(index->n_fields == n_fields || index->is_instant());
ut_ad(dict_tf2_is_valid(index->table->flags, index->table->flags2));
@@ -957,25 +904,8 @@ row_log_table_low(
ut_ad(rec_offs_validate(rec, index, offsets));
ut_ad(rec_offs_n_fields(offsets) == dict_index_get_n_fields(index));
ut_ad(rec_offs_size(offsets) <= sizeof log->tail.buf);
- ut_ad(rw_lock_own_flagged(
- &index->lock,
- RW_LOCK_FLAG_S | RW_LOCK_FLAG_X | RW_LOCK_FLAG_SX));
-#ifdef UNIV_DEBUG
- switch (fil_page_get_type(page_align(rec))) {
- case FIL_PAGE_INDEX:
- break;
- case FIL_PAGE_TYPE_INSTANT:
- ut_ad(index->is_instant());
- ut_ad(!page_has_siblings(page_align(rec)));
- ut_ad(page_get_page_no(page_align(rec)) == index->page);
- break;
- default:
- ut_ad("wrong page type" == 0);
- }
-#endif /* UNIV_DEBUG */
- ut_ad(!rec_is_metadata(rec, *index));
- ut_ad(page_rec_is_leaf(rec));
- ut_ad(!page_is_comp(page_align(rec)) == !rec_offs_comp(offsets));
+ ut_ad(index->lock.have_any());
+
/* old_pk=row_log_table_get_pk() [not needed in INSERT] is a prefix
of the clustered index record (PRIMARY KEY,DB_TRX_ID,DB_ROLL_PTR),
with no information on virtual columns */
@@ -994,7 +924,6 @@ row_log_table_low(
return;
}
- ut_ad(page_is_comp(page_align(rec)));
ut_ad(rec_get_status(rec) == REC_STATUS_ORDINARY
|| rec_get_status(rec) == REC_STATUS_INSTANT);
@@ -1239,10 +1168,7 @@ row_log_table_get_pk(
ut_ad(dict_index_is_clust(index));
ut_ad(dict_index_is_online_ddl(index));
ut_ad(!offsets || rec_offs_validate(rec, index, offsets));
- ut_ad(rw_lock_own_flagged(
- &index->lock,
- RW_LOCK_FLAG_S | RW_LOCK_FLAG_X | RW_LOCK_FLAG_SX));
-
+ ut_ad(index->lock.have_any());
ut_ad(log);
ut_ad(log->table);
ut_ad(log->min_trx);
@@ -1280,7 +1206,7 @@ row_log_table_get_pk(
return(NULL);
}
- mutex_enter(&log->mutex);
+ mysql_mutex_lock(&log->mutex);
/* log->error is protected by log->mutex. */
if (log->error == DB_SUCCESS) {
@@ -1420,7 +1346,7 @@ err_exit:
}
func_exit:
- mutex_exit(&log->mutex);
+ mysql_mutex_unlock(&log->mutex);
return(tuple);
}
@@ -1440,83 +1366,6 @@ row_log_table_insert(
}
/******************************************************//**
-Notes that a BLOB is being freed during online ALTER TABLE. */
-void
-row_log_table_blob_free(
-/*====================*/
- dict_index_t* index, /*!< in/out: clustered index, X-latched */
- ulint page_no)/*!< in: starting page number of the BLOB */
-{
- ut_ad(dict_index_is_clust(index));
- ut_ad(dict_index_is_online_ddl(index));
- ut_ad(rw_lock_own_flagged(
- &index->lock,
- RW_LOCK_FLAG_X | RW_LOCK_FLAG_SX));
- ut_ad(page_no != FIL_NULL);
-
- if (index->online_log->error != DB_SUCCESS) {
- return;
- }
-
- page_no_map* blobs = index->online_log->blobs;
-
- if (blobs == NULL) {
- index->online_log->blobs = blobs = UT_NEW_NOKEY(page_no_map());
- }
-
-#ifdef UNIV_DEBUG
- const ulonglong log_pos = index->online_log->tail.total;
-#else
-# define log_pos /* empty */
-#endif /* UNIV_DEBUG */
-
- const page_no_map::value_type v(page_no,
- row_log_table_blob_t(log_pos));
-
- std::pair<page_no_map::iterator,bool> p = blobs->insert(v);
-
- if (!p.second) {
- /* Update the existing mapping. */
- ut_ad(p.first->first == page_no);
- p.first->second.blob_free(log_pos);
- }
-#undef log_pos
-}
-
-/******************************************************//**
-Notes that a BLOB is being allocated during online ALTER TABLE. */
-void
-row_log_table_blob_alloc(
-/*=====================*/
- dict_index_t* index, /*!< in/out: clustered index, X-latched */
- ulint page_no)/*!< in: starting page number of the BLOB */
-{
- ut_ad(dict_index_is_clust(index));
- ut_ad(dict_index_is_online_ddl(index));
-
- ut_ad(rw_lock_own_flagged(
- &index->lock,
- RW_LOCK_FLAG_X | RW_LOCK_FLAG_SX));
-
- ut_ad(page_no != FIL_NULL);
-
- if (index->online_log->error != DB_SUCCESS) {
- return;
- }
-
- /* Only track allocations if the same page has been freed
- earlier. Double allocation without a free is not allowed. */
- if (page_no_map* blobs = index->online_log->blobs) {
- page_no_map::iterator p = blobs->find(page_no);
-
- if (p != blobs->end()) {
- ut_ad(p->first == page_no);
- p->second.blob_alloc(index->online_log->tail.total);
- }
- }
-}
-
-/******************************************************//**
Converts a log record to a table row.
@return converted row, or NULL if the conversion fails */
static MY_ATTRIBUTE((nonnull, warn_unused_result))
@@ -1590,28 +1439,7 @@ row_log_table_apply_convert_mrec(
if (rec_offs_nth_extern(offsets, i)) {
ut_ad(rec_offs_any_extern(offsets));
- rw_lock_x_lock(dict_index_get_lock(index));
-
- if (const page_no_map* blobs = log->blobs) {
- data = rec_get_nth_field(
- mrec, offsets, i, &len);
- ut_ad(len >= BTR_EXTERN_FIELD_REF_SIZE);
-
- ulint page_no = mach_read_from_4(
- data + len - (BTR_EXTERN_FIELD_REF_SIZE
- - BTR_EXTERN_PAGE_NO));
- page_no_map::const_iterator p = blobs->find(
- page_no);
- if (p != blobs->end()
- && p->second.is_freed(log->head.total)) {
- /* This BLOB has been freed.
- We must not access the row. */
- *error = DB_MISSING_HISTORY;
- dfield_set_data(dfield, data, len);
- dfield_set_ext(dfield);
- goto blob_done;
- }
- }
+ index->lock.x_lock(SRW_LOCK_CALL);
data = btr_rec_copy_externally_stored_field(
mrec, offsets,
@@ -1619,8 +1447,8 @@ row_log_table_apply_convert_mrec(
i, &len, heap);
ut_a(data);
dfield_set_data(dfield, data, len);
-blob_done:
- rw_lock_x_unlock(dict_index_get_lock(index));
+
+ index->lock.x_unlock();
} else {
data = rec_get_nth_field(mrec, offsets, i, &len);
if (len == UNIV_SQL_DEFAULT) {
@@ -1667,6 +1495,12 @@ blob_done:
if ((new_col->prtype & DATA_NOT_NULL)
&& dfield_is_null(dfield)) {
+ if (!log->allow_not_null) {
+ /* We got a NULL value for a NOT NULL column. */
+ *error = DB_INVALID_NULL;
+ return NULL;
+ }
+
const dfield_t& default_field
= log->defaults->fields[col_no];
@@ -1676,12 +1510,6 @@ blob_done:
WARN_DATA_TRUNCATED, 1,
ulong(log->n_rows));
- if (!log->allow_not_null) {
- /* We got a NULL value for a NOT NULL column. */
- *error = DB_INVALID_NULL;
- return NULL;
- }
-
*dfield = default_field;
}
@@ -1755,7 +1583,7 @@ row_log_table_apply_insert_low(
entry = row_build_index_entry(row, NULL, index, heap);
error = row_ins_sec_index_entry_low(
- flags, BTR_MODIFY_TREE,
+ flags, BTR_INSERT_TREE,
index, offsets_heap, heap, entry,
thr_get_trx(thr)->id, thr);
@@ -1792,15 +1620,6 @@ row_log_table_apply_insert(
mrec, dup->index, offsets, log, heap, &error);
switch (error) {
- case DB_MISSING_HISTORY:
- ut_ad(log->blobs);
- /* Because some BLOBs are missing, we know that the
- transaction was rolled back later (a rollback of
- an insert can free BLOBs).
- We can simply skip the insert: the subsequent
- ROW_T_DELETE will be ignored, or a ROW_T_UPDATE will
- be interpreted as ROW_T_INSERT. */
- return(DB_SUCCESS);
case DB_SUCCESS:
ut_ad(row != NULL);
break;
@@ -1839,7 +1658,7 @@ row_log_table_apply_delete_low(
dberr_t error;
row_ext_t* ext;
dtuple_t* row;
- dict_index_t* index = btr_pcur_get_btr_cur(pcur)->index;
+ dict_index_t* index = pcur->index();
ut_ad(dict_index_is_clust(index));
@@ -1859,12 +1678,14 @@ row_log_table_apply_delete_low(
btr_cur_pessimistic_delete(&error, FALSE, btr_pcur_get_btr_cur(pcur),
BTR_CREATE_FLAG, false, mtr);
- mtr_commit(mtr);
-
if (error != DB_SUCCESS) {
- return(error);
+err_exit:
+ mtr->commit();
+ return error;
}
+ mtr->commit();
+
while ((index = dict_table_get_next_index(index)) != NULL) {
if (index->type & DICT_FTS) {
continue;
@@ -1874,9 +1695,12 @@ row_log_table_apply_delete_low(
row, ext, index, heap);
mtr->start();
index->set_modified(*mtr);
- btr_pcur_open(index, entry, PAGE_CUR_LE,
- BTR_MODIFY_TREE | BTR_LATCH_FOR_DELETE,
- pcur, mtr);
+ pcur->btr_cur.page_cur.index = index;
+ error = btr_pcur_open(entry, PAGE_CUR_LE, BTR_PURGE_TREE, pcur,
+ mtr);
+ if (error) {
+ goto err_exit;
+ }
#ifdef UNIV_DEBUG
switch (btr_pcur_get_btr_cur(pcur)->flag) {
case BTR_CUR_DELETE_REF:
@@ -1937,6 +1761,7 @@ row_log_table_apply_delete(
btr_pcur_t pcur;
rec_offs* offsets;
+ pcur.btr_cur.page_cur.index = index;
ut_ad(rec_offs_n_fields(moffsets) == index->first_user_field());
ut_ad(!rec_offs_any_extern(moffsets));
@@ -1955,9 +1780,11 @@ row_log_table_apply_delete(
mtr_start(&mtr);
index->set_modified(mtr);
- btr_pcur_open(index, old_pk, PAGE_CUR_LE,
- BTR_MODIFY_TREE | BTR_LATCH_FOR_DELETE,
- &pcur, &mtr);
+ dberr_t err = btr_pcur_open(old_pk, PAGE_CUR_LE, BTR_PURGE_TREE, &pcur,
+ &mtr);
+ if (err != DB_SUCCESS) {
+ goto all_done;
+ }
#ifdef UNIV_DEBUG
switch (btr_pcur_get_btr_cur(&pcur)->flag) {
case BTR_CUR_DELETE_REF:
@@ -1984,7 +1811,7 @@ all_done:
ROW_T_INSERT was skipped or
ROW_T_UPDATE was interpreted as ROW_T_DELETE
due to BLOBs having been freed by rollback. */
- return(DB_SUCCESS);
+ return err;
}
offsets = rec_get_offsets(btr_pcur_get_rec(&pcur), index, nullptr,
@@ -2066,6 +1893,8 @@ row_log_table_apply_update(
dberr_t error;
ulint n_index = 0;
+ pcur.btr_cur.page_cur.index = index;
+
ut_ad(dtuple_get_n_fields_cmp(old_pk)
== dict_index_get_n_unique(index));
ut_ad(dtuple_get_n_fields(old_pk) - (log->same_pk ? 0 : 2)
@@ -2075,20 +1904,6 @@ row_log_table_apply_update(
mrec, dup->index, offsets, log, heap, &error);
switch (error) {
- case DB_MISSING_HISTORY:
- /* The record contained BLOBs that are now missing. */
- ut_ad(log->blobs);
- /* Whether or not we are updating the PRIMARY KEY, we
- know that there should be a subsequent
- ROW_T_DELETE for rolling back a preceding ROW_T_INSERT,
- overriding this ROW_T_UPDATE record. (*1)
-
- This allows us to interpret this ROW_T_UPDATE
- as ROW_T_DELETE.
-
- When applying the subsequent ROW_T_DELETE, no matching
- record will be found. */
- /* fall through */
case DB_SUCCESS:
ut_ad(row != NULL);
break;
@@ -2100,10 +1915,25 @@ row_log_table_apply_update(
return(error);
}
- mtr_start(&mtr);
+ mtr.start();
index->set_modified(mtr);
- btr_pcur_open(index, old_pk, PAGE_CUR_LE,
- BTR_MODIFY_TREE, &pcur, &mtr);
+ error = btr_pcur_open(old_pk, PAGE_CUR_LE, BTR_MODIFY_TREE, &pcur,
+ &mtr);
+ if (error != DB_SUCCESS) {
+func_exit:
+ mtr.commit();
+func_exit_committed:
+ ut_ad(mtr.has_committed());
+ ut_free(pcur.old_rec_buf);
+
+ if (error != DB_SUCCESS) {
+ /* Report the erroneous row using the new
+ version of the table. */
+ innobase_row_to_mysql(dup->table, log->table, row);
+ }
+
+ return error;
+ }
#ifdef UNIV_DEBUG
switch (btr_pcur_get_btr_cur(&pcur)->flag) {
case BTR_CUR_DELETE_REF:
@@ -2118,80 +1948,16 @@ row_log_table_apply_update(
}
#endif /* UNIV_DEBUG */
- if (page_rec_is_infimum(btr_pcur_get_rec(&pcur))
- || btr_pcur_get_low_match(&pcur) < index->n_uniq) {
- /* The record was not found. This should only happen
- when an earlier ROW_T_INSERT or ROW_T_UPDATE was
- diverted because BLOBs were freed when the insert was
- later rolled back. */
-
- ut_ad(log->blobs);
-
- if (error == DB_SUCCESS) {
- /* An earlier ROW_T_INSERT could have been
- skipped because of a missing BLOB, like this:
-
- BEGIN;
- INSERT INTO t SET blob_col='blob value';
- UPDATE t SET blob_col='';
- ROLLBACK;
-
- This would generate the following records:
- ROW_T_INSERT (referring to 'blob value')
- ROW_T_UPDATE
- ROW_T_UPDATE (referring to 'blob value')
- ROW_T_DELETE
- [ROLLBACK removes the 'blob value']
-
- The ROW_T_INSERT would have been skipped
- because of a missing BLOB. Now we are
- executing the first ROW_T_UPDATE.
- The second ROW_T_UPDATE (for the ROLLBACK)
- would be interpreted as ROW_T_DELETE, because
- the BLOB would be missing.
-
- We could probably assume that the transaction
- has been rolled back and simply skip the
- 'insert' part of this ROW_T_UPDATE record.
- However, there might be some complex scenario
- that could interfere with such a shortcut.
- So, we will insert the row (and risk
- introducing a bogus duplicate key error
- for the ALTER TABLE), and a subsequent
- ROW_T_UPDATE or ROW_T_DELETE will delete it. */
- mtr_commit(&mtr);
- error = row_log_table_apply_insert_low(
- thr, row, offsets_heap, heap, dup);
- } else {
- /* Some BLOBs are missing, so we are interpreting
- this ROW_T_UPDATE as ROW_T_DELETE (see *1).
- Because the record was not found, we do nothing. */
- ut_ad(error == DB_MISSING_HISTORY);
- error = DB_SUCCESS;
-func_exit:
- mtr_commit(&mtr);
- }
-func_exit_committed:
- ut_ad(mtr.has_committed());
- ut_free(pcur.old_rec_buf);
-
- if (error != DB_SUCCESS) {
- /* Report the erroneous row using the new
- version of the table. */
- innobase_row_to_mysql(dup->table, log->table, row);
- }
-
- return(error);
- }
+ ut_ad(!page_rec_is_infimum(btr_pcur_get_rec(&pcur))
+ && btr_pcur_get_low_match(&pcur) >= index->n_uniq);
/* Prepare to update (or delete) the record. */
rec_offs* cur_offsets = rec_get_offsets(
btr_pcur_get_rec(&pcur), index, nullptr, index->n_core_fields,
ULINT_UNDEFINED, &offsets_heap);
+#ifdef UNIV_DEBUG
if (!log->same_pk) {
- /* Only update the record if DB_TRX_ID,DB_ROLL_PTR match what
- was buffered. */
ulint len;
const byte* rec_trx_id
= rec_get_nth_field(btr_pcur_get_rec(&pcur),
@@ -2206,59 +1972,17 @@ func_exit_committed:
+ static_cast<const char*>(old_pk_trx_id->data)
== old_pk_trx_id[1].data);
ut_d(trx_id_check(old_pk_trx_id->data, log->min_trx));
-
- if (memcmp(rec_trx_id, old_pk_trx_id->data,
- DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN)) {
- /* The ROW_T_UPDATE was logged for a different
- DB_TRX_ID,DB_ROLL_PTR. This is possible if an
- earlier ROW_T_INSERT or ROW_T_UPDATE was diverted
- because some BLOBs were missing due to rolling
- back the initial insert or due to purging
- the old BLOB values of an update. */
- ut_ad(log->blobs);
- if (error != DB_SUCCESS) {
- ut_ad(error == DB_MISSING_HISTORY);
- /* Some BLOBs are missing, so we are
- interpreting this ROW_T_UPDATE as
- ROW_T_DELETE (see *1).
- Because this is a different row,
- we will do nothing. */
- error = DB_SUCCESS;
- } else {
- /* Because the user record is missing due to
- BLOBs that were missing when processing
- an earlier log record, we should
- interpret the ROW_T_UPDATE as ROW_T_INSERT.
- However, there is a different user record
- with the same PRIMARY KEY value already. */
- error = DB_DUPLICATE_KEY;
- }
-
- goto func_exit;
- }
- }
-
- if (error != DB_SUCCESS) {
- ut_ad(error == DB_MISSING_HISTORY);
- ut_ad(log->blobs);
- /* Some BLOBs are missing, so we are interpreting
- this ROW_T_UPDATE as ROW_T_DELETE (see *1). */
- error = row_log_table_apply_delete_low(
- &pcur, cur_offsets, heap, &mtr);
- goto func_exit_committed;
+ ut_ad(!memcmp(rec_trx_id, old_pk_trx_id->data,
+ DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN));
}
+#endif
dtuple_t* entry = row_build_index_entry_low(
row, NULL, index, heap, ROW_BUILD_NORMAL);
upd_t* update = row_upd_build_difference_binary(
index, entry, btr_pcur_get_rec(&pcur), cur_offsets,
false, false, NULL, heap, dup->table, &error);
- if (error != DB_SUCCESS) {
- goto func_exit;
- }
-
- if (!update->n_fields) {
- /* Nothing to do. */
+ if (error != DB_SUCCESS || !update->n_fields) {
goto func_exit;
}
@@ -2339,7 +2063,7 @@ func_exit_committed:
for (n_index += index->type != DICT_CLUSTERED;
(index = dict_table_get_next_index(index)); n_index++) {
- if (index->type & DICT_FTS) {
+ if (!index->is_btree()) {
continue;
}
@@ -2356,7 +2080,7 @@ func_exit_committed:
dtuple_copy_v_fields(old_row, old_pk);
}
- mtr_commit(&mtr);
+ mtr.commit();
entry = row_build_index_entry(old_row, old_ext, index, heap);
if (!entry) {
@@ -2365,13 +2089,15 @@ func_exit_committed:
goto func_exit_committed;
}
- mtr_start(&mtr);
+ mtr.start();
index->set_modified(mtr);
+ pcur.btr_cur.page_cur.index = index;
ut_free(pcur.old_rec_buf);
+ pcur.old_rec_buf = nullptr;
if (ROW_FOUND != row_search_index_entry(
- index, entry, BTR_MODIFY_TREE, &pcur, &mtr)) {
+ entry, BTR_MODIFY_TREE, &pcur, &mtr)) {
ut_ad(0);
error = DB_CORRUPTION;
break;
@@ -2385,13 +2111,13 @@ func_exit_committed:
break;
}
- mtr_commit(&mtr);
+ mtr.commit();
entry = row_build_index_entry(row, NULL, index, heap);
error = row_ins_sec_index_entry_low(
BTR_CREATE_FLAG | BTR_NO_LOCKING_FLAG
| BTR_NO_UNDO_LOG_FLAG | BTR_KEEP_SYS_FLAG,
- BTR_MODIFY_TREE, index, offsets_heap, heap,
+ BTR_INSERT_TREE, index, offsets_heap, heap,
entry, thr_get_trx(thr)->id, thr);
/* Report correct index name for duplicate key error. */
@@ -2399,7 +2125,7 @@ func_exit_committed:
thr_get_trx(thr)->error_key_num = n_index;
}
- mtr_start(&mtr);
+ mtr.start();
index->set_modified(mtr);
}
@@ -2441,11 +2167,6 @@ row_log_table_apply_op(
*error = DB_SUCCESS;
- /* 3 = 1 (op type) + 1 (extra_size) + at least 1 byte payload */
- if (mrec + 3 >= mrec_end) {
- return(NULL);
- }
-
const bool is_instant = log->is_instant(dup->index);
const mrec_t* const mrec_start = mrec;
@@ -2717,7 +2438,8 @@ ulint
row_log_estimate_work(
const dict_index_t* index)
{
- if (index == NULL || index->online_log == NULL) {
+ if (index == NULL || index->online_log == NULL
+ || index->online_log_is_dummy()) {
return(0);
}
@@ -2775,7 +2497,7 @@ row_log_table_apply_ops(
ut_ad(dict_index_is_clust(index));
ut_ad(dict_index_is_online_ddl(index));
ut_ad(trx->mysql_thd);
- ut_ad(rw_lock_own(dict_index_get_lock(index), RW_LOCK_X));
+ ut_ad(index->lock.have_x());
ut_ad(!dict_index_is_online_ddl(new_index));
ut_ad(dict_col_get_clust_pos(
dict_table_get_sys_col(index->table, DATA_TRX_ID), index)
@@ -2795,7 +2517,7 @@ row_log_table_apply_ops(
next_block:
ut_ad(has_index_lock);
- ut_ad(rw_lock_own(dict_index_get_lock(index), RW_LOCK_X));
+ ut_ad(index->lock.have_u_or_x());
ut_ad(index->online_log->head.bytes == 0);
stage->inc(row_log_progress_inc_per_block());
@@ -2868,7 +2590,7 @@ all_done:
ut_ad(has_index_lock);
has_index_lock = false;
- rw_lock_x_unlock(dict_index_get_lock(index));
+ index->lock.x_unlock();
log_free_check();
@@ -2881,9 +2603,9 @@ all_done:
byte* buf = index->online_log->head.block;
- if (os_file_read_no_error_handling(
- IORequestRead, index->online_log->fd,
- buf, ofs, srv_sort_buf_size, 0) != DB_SUCCESS) {
+ if (DB_SUCCESS
+ != os_file_read(IORequestRead, index->online_log->fd,
+ buf, ofs, srv_sort_buf_size, nullptr)) {
ib::error()
<< "Unable to read temporary file"
" for table " << index->table->name;
@@ -3059,7 +2781,7 @@ all_done:
mrec = NULL;
process_next_block:
- rw_lock_x_lock(dict_index_get_lock(index));
+ index->lock.x_lock(SRW_LOCK_CALL);
has_index_lock = true;
index->online_log->head.bytes = 0;
@@ -3091,7 +2813,7 @@ interrupted:
error = DB_INTERRUPTED;
func_exit:
if (!has_index_lock) {
- rw_lock_x_lock(dict_index_get_lock(index));
+ index->lock.x_lock(SRW_LOCK_CALL);
}
mem_heap_free(offsets_heap);
@@ -3127,14 +2849,13 @@ row_log_table_apply(
stage->begin_phase_log_table();
- ut_ad(!rw_lock_own(&dict_sys.latch, RW_LOCK_S));
clust_index = dict_table_get_first_index(old_table);
if (clust_index->online_log->n_rows == 0) {
clust_index->online_log->n_rows = new_table->stat_n_rows;
}
- rw_lock_x_lock(dict_index_get_lock(clust_index));
+ clust_index->lock.x_lock(SRW_LOCK_CALL);
if (!clust_index->online_log) {
ut_ad(dict_index_get_online_status(clust_index)
@@ -3157,7 +2878,7 @@ row_log_table_apply(
== clust_index->online_log->tail.total);
}
- rw_lock_x_unlock(dict_index_get_lock(clust_index));
+ clust_index->lock.x_unlock();
DBUG_EXECUTE_IF("innodb_trx_duplicates",
thr_get_trx(thr)->duplicates = 0;);
@@ -3196,7 +2917,7 @@ row_log_allocate(
ut_ad(same_pk || table);
ut_ad(!table || col_map);
ut_ad(!defaults || col_map);
- ut_ad(rw_lock_own(dict_index_get_lock(index), RW_LOCK_X));
+ ut_ad(index->lock.have_u_or_x());
ut_ad(trx_state_eq(trx, TRX_STATE_ACTIVE));
ut_ad(trx->id);
@@ -3207,9 +2928,8 @@ row_log_allocate(
}
log->fd = OS_FILE_CLOSED;
- mutex_create(LATCH_ID_INDEX_ONLINE_LOG, &log->mutex);
+ mysql_mutex_init(index_online_log_key, &log->mutex, nullptr);
- log->blobs = NULL;
log->table = table;
log->same_pk = same_pk;
log->defaults = defaults;
@@ -3259,6 +2979,15 @@ row_log_allocate(
}
index->online_log = log;
+
+ if (!table) {
+ /* Assign the clustered index online log to table.
+ It can be used by concurrent DML to identify whether
+ the table has any online DDL */
+ index->table->indexes.start->online_log_make_dummy();
+ log->alter_trx = trx;
+ }
+
/* While we might be holding an exclusive data dictionary lock
here, in row_log_abort_sec() we will not always be holding it. Use
atomic operations in both cases. */
@@ -3276,7 +3005,6 @@ row_log_free(
{
MONITOR_ATOMIC_DEC(MONITOR_ONLINE_CREATE_INDEX);
- UT_DELETE(log->blobs);
UT_DELETE_ARRAY(log->non_core_fields);
row_log_block_free(log->tail);
row_log_block_free(log->head);
@@ -3290,7 +3018,7 @@ row_log_free(
my_large_free(log->crypt_tail, log->crypt_tail_size);
}
- mutex_free(&log->mutex);
+ mysql_mutex_destroy(&log->mutex);
ut_free(log);
}
@@ -3304,11 +3032,11 @@ row_log_get_max_trx(
dict_index_t* index) /*!< in: index, must be locked */
{
ut_ad(dict_index_get_online_status(index) == ONLINE_INDEX_CREATION);
-
- ut_ad((rw_lock_own(dict_index_get_lock(index), RW_LOCK_S)
- && mutex_own(&index->online_log->mutex))
- || rw_lock_own(dict_index_get_lock(index), RW_LOCK_X));
-
+#ifdef SAFE_MUTEX
+ ut_ad(index->lock.have_x()
+ || (index->lock.have_s()
+ && mysql_mutex_is_owner(&index->online_log->mutex)));
+#endif
return(index->online_log->max_trx);
}
@@ -3336,8 +3064,7 @@ row_log_apply_op_low(
ut_ad(!dict_index_is_clust(index));
- ut_ad(rw_lock_own(dict_index_get_lock(index), RW_LOCK_X)
- == has_index_lock);
+ ut_ad(index->lock.have_x() == has_index_lock);
ut_ad(!index->is_corrupted());
ut_ad(trx_id != 0 || op == ROW_OP_DELETE);
@@ -3350,18 +3077,22 @@ row_log_apply_op_low(
mtr_start(&mtr);
index->set_modified(mtr);
+ cursor.page_cur.index = index;
+ if (has_index_lock) {
+ mtr_x_lock_index(index, &mtr);
+ }
/* We perform the pessimistic variant of the operations if we
already hold index->lock exclusively. First, search the
record. The operation may already have been performed,
depending on when the row in the clustered index was
scanned. */
- btr_cur_search_to_nth_level(index, 0, entry, PAGE_CUR_LE,
- has_index_lock
- ? BTR_MODIFY_TREE
- : BTR_MODIFY_LEAF,
- &cursor, __FILE__, __LINE__,
- &mtr);
+ *error = cursor.search_leaf(entry, PAGE_CUR_LE, has_index_lock
+ ? BTR_MODIFY_TREE_ALREADY_LATCHED
+ : BTR_MODIFY_LEAF, &mtr);
+ if (UNIV_UNLIKELY(*error != DB_SUCCESS)) {
+ goto func_exit;
+ }
ut_ad(dict_index_get_n_unique(index) > 0);
/* This test is somewhat similar to row_ins_must_modify_rec(),
@@ -3395,9 +3126,10 @@ row_log_apply_op_low(
goto func_exit;
}
- if (btr_cur_optimistic_delete(
- &cursor, BTR_CREATE_FLAG, &mtr)) {
- *error = DB_SUCCESS;
+ *error = btr_cur_optimistic_delete(
+ &cursor, BTR_CREATE_FLAG, &mtr);
+
+ if (*error != DB_FAIL) {
break;
}
@@ -3407,11 +3139,12 @@ row_log_apply_op_low(
mtr_commit(&mtr);
mtr_start(&mtr);
index->set_modified(mtr);
- btr_cur_search_to_nth_level(
- index, 0, entry, PAGE_CUR_LE,
- BTR_MODIFY_TREE, &cursor,
- __FILE__, __LINE__, &mtr);
-
+ *error = cursor.search_leaf(entry, PAGE_CUR_LE,
+ BTR_MODIFY_TREE,
+ &mtr);
+ if (UNIV_UNLIKELY(*error != DB_SUCCESS)) {
+ goto func_exit;
+ }
/* No other thread than the current one
is allowed to modify the index tree.
Thus, the record should still exist. */
@@ -3510,10 +3243,12 @@ insert_the_rec:
mtr_commit(&mtr);
mtr_start(&mtr);
index->set_modified(mtr);
- btr_cur_search_to_nth_level(
- index, 0, entry, PAGE_CUR_LE,
- BTR_MODIFY_TREE, &cursor,
- __FILE__, __LINE__, &mtr);
+ *error = cursor.search_leaf(entry, PAGE_CUR_LE,
+ BTR_MODIFY_TREE,
+ &mtr);
+ if (*error != DB_SUCCESS) {
+ break;
+ }
}
/* We already determined that the
@@ -3579,8 +3314,7 @@ row_log_apply_op(
/* Online index creation is only used for secondary indexes. */
ut_ad(!dict_index_is_clust(index));
- ut_ad(rw_lock_own(dict_index_get_lock(index), RW_LOCK_X)
- == has_index_lock);
+ ut_ad(index->lock.have_x() == has_index_lock);
if (index->is_corrupted()) {
*error = DB_INDEX_CORRUPT;
@@ -3667,7 +3401,8 @@ interrupted)
@param[in,out] dup for reporting duplicate key errors
@param[in,out] stage performance schema accounting object, used by
ALTER TABLE. If not NULL, then stage->inc() will be called for each block
-of log that is applied.
+of log that is applied or nullptr when row log applied done by DML
+thread.
@return DB_SUCCESS, or error code on failure */
static
dberr_t
@@ -3689,9 +3424,11 @@ row_log_apply_ops(
const ulint i = 1 + REC_OFFS_HEADER_SIZE
+ dict_index_get_n_fields(index);
- ut_ad(dict_index_is_online_ddl(index));
+ ut_ad(dict_index_is_online_ddl(index)
+ || (index->online_log
+ && index->online_status == ONLINE_INDEX_COMPLETE));
ut_ad(!index->is_committed());
- ut_ad(rw_lock_own(dict_index_get_lock(index), RW_LOCK_X));
+ ut_ad(index->lock.have_x());
ut_ad(index->online_log);
MEM_UNDEFINED(&mrec_end, sizeof mrec_end);
@@ -3706,10 +3443,12 @@ row_log_apply_ops(
next_block:
ut_ad(has_index_lock);
- ut_ad(rw_lock_own(dict_index_get_lock(index), RW_LOCK_X));
+ ut_ad(index->lock.have_x());
ut_ad(index->online_log->head.bytes == 0);
- stage->inc(row_log_progress_inc_per_block());
+ if (stage) {
+ stage->inc(row_log_progress_inc_per_block());
+ }
if (trx_is_interrupted(trx)) {
goto interrupted;
@@ -3763,6 +3502,8 @@ all_done:
ut_ad(has_index_lock);
ut_ad(index->online_log->head.blocks == 0);
ut_ad(index->online_log->tail.blocks == 0);
+ index->online_log->tail.bytes = 0;
+ index->online_log->head.bytes = 0;
error = DB_SUCCESS;
goto func_exit;
}
@@ -3772,7 +3513,7 @@ all_done:
* srv_sort_buf_size;
ut_ad(has_index_lock);
has_index_lock = false;
- rw_lock_x_unlock(dict_index_get_lock(index));
+ index->lock.x_unlock();
log_free_check();
@@ -3783,9 +3524,9 @@ all_done:
byte* buf = index->online_log->head.block;
- if (os_file_read_no_error_handling(
- IORequestRead, index->online_log->fd,
- buf, ofs, srv_sort_buf_size, 0) != DB_SUCCESS) {
+ if (DB_SUCCESS
+ != os_file_read(IORequestRead, index->online_log->fd,
+ buf, ofs, srv_sort_buf_size, nullptr)) {
ib::error()
<< "Unable to read temporary file"
" for index " << index->name;
@@ -3932,7 +3673,7 @@ all_done:
mrec = NULL;
process_next_block:
- rw_lock_x_lock(dict_index_get_lock(index));
+ index->lock.x_lock(SRW_LOCK_CALL);
has_index_lock = true;
index->online_log->head.bytes = 0;
@@ -3964,7 +3705,7 @@ interrupted:
error = DB_INTERRUPTED;
func_exit:
if (!has_index_lock) {
- rw_lock_x_lock(dict_index_get_lock(index));
+ index->lock.x_lock(SRW_LOCK_CALL);
}
switch (error) {
@@ -3978,9 +3719,6 @@ func_exit:
}
/* fall through */
default:
- /* We set the flag directly instead of invoking
- dict_set_corrupted_index_cache_only(index) here,
- because the index is not "public" yet. */
index->type |= DICT_CORRUPT;
}
@@ -3998,7 +3736,8 @@ interrupted)
@param[in,out] table MySQL table (for reporting duplicates)
@param[in,out] stage performance schema accounting object, used by
ALTER TABLE. stage->begin_phase_log_index() will be called initially and then
-stage->inc() will be called for each block of log that is applied.
+stage->inc() will be called for each block of log that is applied or nullptr
+when row log has been applied by DML thread.
@return DB_SUCCESS, or error code on failure */
dberr_t
row_log_apply(
@@ -4008,20 +3747,23 @@ row_log_apply(
ut_stage_alter_t* stage)
{
dberr_t error;
- row_log_t* log;
row_merge_dup_t dup = { index, table, NULL, 0 };
DBUG_ENTER("row_log_apply");
- ut_ad(dict_index_is_online_ddl(index));
+ ut_ad(dict_index_is_online_ddl(index)
+ || (index->online_log
+ && index->online_status == ONLINE_INDEX_COMPLETE));
ut_ad(!dict_index_is_clust(index));
- stage->begin_phase_log_index();
+ if (stage) {
+ stage->begin_phase_log_index();
+ }
log_free_check();
- rw_lock_x_lock(dict_index_get_lock(index));
+ index->lock.x_lock(SRW_LOCK_CALL);
- if (!dict_table_is_corrupted(index->table)) {
+ if (index->online_log && !index->table->corrupted) {
error = row_log_apply_ops(trx, index, &dup, stage);
} else {
error = DB_SUCCESS;
@@ -4029,23 +3771,18 @@ row_log_apply(
if (error != DB_SUCCESS) {
ut_ad(index->table->space);
- /* We set the flag directly instead of invoking
- dict_set_corrupted_index_cache_only(index) here,
- because the index is not "public" yet. */
index->type |= DICT_CORRUPT;
index->table->drop_aborted = TRUE;
dict_index_set_online_status(index, ONLINE_INDEX_ABORTED);
- } else {
+ } else if (stage) {
+ /* Mark the index as completed only when it is
+ being called by DDL thread */
ut_ad(dup.n_dup == 0);
dict_index_set_online_status(index, ONLINE_INDEX_COMPLETE);
}
- log = index->online_log;
- index->online_log = NULL;
- rw_lock_x_unlock(dict_index_get_lock(index));
-
- row_log_free(log);
+ index->lock.x_unlock();
DBUG_RETURN(error);
}
@@ -4055,3 +3792,338 @@ unsigned row_log_get_n_core_fields(const dict_index_t *index)
ut_ad(index->online_log);
return index->online_log->n_core_fields;
}
+
+dberr_t row_log_get_error(const dict_index_t *index)
+{
+ ut_ad(index->online_log);
+ return index->online_log->error;
+}
+
+dberr_t dict_table_t::clear(que_thr_t *thr)
+{
+ dberr_t err= DB_SUCCESS;
+ for (dict_index_t *index= UT_LIST_GET_FIRST(indexes); index;
+ index= UT_LIST_GET_NEXT(indexes, index))
+ {
+ if (index->type & DICT_FTS)
+ continue;
+
+ switch (dict_index_get_online_status(index)) {
+ case ONLINE_INDEX_ABORTED:
+ case ONLINE_INDEX_ABORTED_DROPPED:
+ continue;
+ case ONLINE_INDEX_COMPLETE:
+ break;
+ case ONLINE_INDEX_CREATION:
+ ut_ad("invalid type" == 0);
+ MY_ASSERT_UNREACHABLE();
+ break;
+ }
+ if (dberr_t err_index= index->clear(thr))
+ err= err_index;
+ }
+ return err;
+}
+
+const rec_t *
+UndorecApplier::get_old_rec(const dtuple_t &tuple, dict_index_t *index,
+ const rec_t **clust_rec, rec_offs **offsets)
+{
+ ut_ad(index->is_primary());
+ btr_pcur_t pcur;
+
+ bool found= row_search_on_row_ref(&pcur, BTR_MODIFY_LEAF,
+ index->table, &tuple, &mtr);
+ ut_a(found);
+ *clust_rec= btr_pcur_get_rec(&pcur);
+
+ ulint len= 0;
+ rec_t *prev_version;
+ const rec_t *version= *clust_rec;
+ do
+ {
+ *offsets= rec_get_offsets(version, index, *offsets,
+ index->n_core_fields, ULINT_UNDEFINED,
+ &heap);
+ roll_ptr_t roll_ptr= trx_read_roll_ptr(
+ rec_get_nth_field(version, *offsets, index->db_roll_ptr(), &len));
+ ut_ad(len == DATA_ROLL_PTR_LEN);
+ if (is_same(roll_ptr))
+ return version;
+ trx_undo_prev_version_build(version, index, *offsets, heap, &prev_version,
+ nullptr, nullptr, 0);
+ version= prev_version;
+ }
+ while (version);
+
+ return nullptr;
+}
+
+/** Clear out all online log of other online indexes after
+encountering the error during row_log_apply() in DML thread
+@param table table which does online DDL */
+static void row_log_mark_other_online_index_abort(dict_table_t *table)
+{
+ dict_index_t *clust_index= dict_table_get_first_index(table);
+ for (dict_index_t *index= dict_table_get_next_index(clust_index);
+ index; index= dict_table_get_next_index(index))
+ {
+ if (index->online_log &&
+ index->online_status <= ONLINE_INDEX_CREATION &&
+ !index->is_corrupted())
+ {
+ index->lock.x_lock(SRW_LOCK_CALL);
+ row_log_abort_sec(index);
+ index->type|= DICT_CORRUPT;
+ index->lock.x_unlock();
+ MONITOR_ATOMIC_INC(MONITOR_BACKGROUND_DROP_INDEX);
+ }
+ }
+
+ clust_index->lock.x_lock(SRW_LOCK_CALL);
+ clust_index->online_log= nullptr;
+ clust_index->lock.x_unlock();
+ table->drop_aborted= TRUE;
+}
+
+void dtype_t::assign(const dict_col_t &col)
+{
+ prtype= col.prtype;
+ mtype= col.mtype;
+ len= col.len;
+ mbminlen= col.mbminlen;
+ mbmaxlen= col.mbmaxlen;
+}
+
+inline void dtuple_t::copy_field_types(const dict_index_t &index)
+{
+ ut_ad(index.n_fields == n_fields);
+ if (UNIV_LIKELY_NULL(index.change_col_info))
+ for (ulint i= 0; i < n_fields; i++)
+ fields[i].type.assign(*index.fields[i].col);
+}
+
+void UndorecApplier::log_insert(const dtuple_t &tuple,
+ dict_index_t *clust_index)
+{
+ DEBUG_SYNC_C("row_log_insert_handle");
+ ut_ad(clust_index->is_primary());
+ rec_offs offsets_[REC_OFFS_NORMAL_SIZE];
+ rec_offs *offsets= offsets_;
+
+ rec_offs_init(offsets_);
+ mtr.start();
+ const rec_t *rec;
+ const rec_t *match_rec= get_old_rec(tuple, clust_index, &rec, &offsets);
+ if (!match_rec)
+ {
+ mtr.commit();
+ return;
+ }
+ const rec_t *copy_rec= match_rec;
+ if (match_rec == rec)
+ {
+ copy_rec= rec_copy(mem_heap_alloc(
+ heap, rec_offs_size(offsets)), match_rec, offsets);
+ rec_offs_make_valid(copy_rec, clust_index, true, offsets);
+ }
+ mtr.commit();
+
+ dict_table_t *table= clust_index->table;
+ clust_index->lock.s_lock(SRW_LOCK_CALL);
+ if (clust_index->online_log &&
+ !clust_index->online_log_is_dummy() &&
+ clust_index->online_status <= ONLINE_INDEX_CREATION)
+ {
+ row_log_table_insert(copy_rec, clust_index, offsets);
+ clust_index->lock.s_unlock();
+ }
+ else
+ {
+ clust_index->lock.s_unlock();
+ row_ext_t *ext;
+ dtuple_t *row= row_build(ROW_COPY_POINTERS, clust_index,
+ copy_rec, offsets, table, nullptr, nullptr, &ext, heap);
+
+ if (table->n_v_cols)
+ {
+ /* Update the row with virtual column values present
+ in the undo log or update vector */
+ if (type == TRX_UNDO_UPD_DEL_REC)
+ row_upd_replace_vcol(row, table, update, false,
+ nullptr,
+ (cmpl_info & UPD_NODE_NO_ORD_CHANGE)
+ ? nullptr : undo_rec);
+ else
+ trx_undo_read_v_cols(table, undo_rec, row, false);
+ }
+
+ bool success= true;
+ for (dict_index_t *index= clust_index;
+ (index= dict_table_get_next_index(index)) != nullptr; )
+ {
+ index->lock.s_lock(SRW_LOCK_CALL);
+ if (index->online_log &&
+ index->online_status <= ONLINE_INDEX_CREATION &&
+ !index->is_corrupted())
+ {
+ dtuple_t *entry= row_build_index_entry_low(row, ext, index,
+ heap, ROW_BUILD_NORMAL);
+ entry->copy_field_types(*index);
+ success= row_log_online_op(index, entry, trx_id);
+ }
+
+ index->lock.s_unlock();
+ if (!success)
+ {
+ row_log_mark_other_online_index_abort(index->table);
+ return;
+ }
+ }
+ }
+}
+
+void UndorecApplier::log_update(const dtuple_t &tuple,
+ dict_index_t *clust_index)
+{
+ rec_offs offsets_[REC_OFFS_NORMAL_SIZE];
+ rec_offs offsets2_[REC_OFFS_NORMAL_SIZE];
+ rec_offs *offsets= offsets_;
+ rec_offs *prev_offsets= offsets2_;
+
+ rec_offs_init(offsets_);
+ rec_offs_init(offsets2_);
+
+ dict_table_t *table= clust_index->table;
+
+ clust_index->lock.s_lock(SRW_LOCK_CALL);
+ bool table_rebuild=
+ (clust_index->online_log
+ && !clust_index->online_log_is_dummy()
+ && clust_index->online_status <= ONLINE_INDEX_CREATION);
+ clust_index->lock.s_unlock();
+
+ mtr.start();
+ const rec_t *rec;
+ rec_t *prev_version;
+ bool is_update= (type == TRX_UNDO_UPD_EXIST_REC);
+ const rec_t *match_rec= get_old_rec(tuple, clust_index, &rec, &offsets);
+ if (!match_rec)
+ {
+ mtr.commit();
+ return;
+ }
+
+ if (table_rebuild)
+ {
+ const rec_t *copy_rec= match_rec;
+ if (match_rec == rec)
+ copy_rec= rec_copy(mem_heap_alloc(
+ heap, rec_offs_size(offsets)), match_rec, offsets);
+ trx_undo_prev_version_build(match_rec, clust_index, offsets, heap,
+ &prev_version, nullptr, nullptr, 0);
+
+ prev_offsets= rec_get_offsets(prev_version, clust_index, prev_offsets,
+ clust_index->n_core_fields,
+ ULINT_UNDEFINED, &heap);
+ rec_offs_make_valid(copy_rec, clust_index, true, offsets);
+ mtr.commit();
+
+ clust_index->lock.s_lock(SRW_LOCK_CALL);
+ /* Recheck whether clustered index online log has been cleared */
+ if (clust_index->online_log)
+ {
+ if (is_update)
+ {
+ const dtuple_t *rebuilt_old_pk= row_log_table_get_pk(
+ prev_version, clust_index, prev_offsets, nullptr, &heap);
+ row_log_table_update(copy_rec, clust_index, offsets, rebuilt_old_pk);
+ }
+ else
+ row_log_table_delete(prev_version, clust_index, prev_offsets, nullptr);
+ }
+ clust_index->lock.s_unlock();
+ return;
+ }
+
+ dtuple_t *row= nullptr;
+ row_ext_t *new_ext;
+ if (match_rec != rec)
+ row= row_build(ROW_COPY_POINTERS, clust_index, match_rec, offsets,
+ clust_index->table, NULL, NULL, &new_ext, heap);
+ else
+ row= row_build(ROW_COPY_DATA, clust_index, rec, offsets,
+ clust_index->table, NULL, NULL, &new_ext, heap);
+ mtr.commit();
+ row_ext_t *old_ext;
+ dtuple_t *old_row= nullptr;
+ if (!(this->cmpl_info & UPD_NODE_NO_ORD_CHANGE))
+ {
+ for (ulint i = 0; i < dict_table_get_n_v_cols(table); i++)
+ dfield_get_type(
+ dtuple_get_nth_v_field(row, i))->mtype = DATA_MISSING;
+ }
+
+ if (is_update)
+ {
+ old_row= dtuple_copy(row, heap);
+ row_upd_replace(old_row, &old_ext, clust_index, update, heap);
+ }
+
+ if (table->n_v_cols)
+ row_upd_replace_vcol(row, table, update, false, nullptr,
+ (cmpl_info & UPD_NODE_NO_ORD_CHANGE)
+ ? nullptr : this->undo_rec);
+
+ bool success= true;
+ dict_index_t *index= dict_table_get_next_index(clust_index);
+ while (index)
+ {
+ index->lock.s_lock(SRW_LOCK_CALL);
+ if (index->online_log &&
+ index->online_status <= ONLINE_INDEX_CREATION &&
+ !index->is_corrupted())
+ {
+ if (is_update)
+ {
+ /* Ignore the index if the update doesn't affect the index */
+ if (!row_upd_changes_ord_field_binary(index, update,
+ nullptr,
+ row, new_ext))
+ goto next_index;
+ dtuple_t *old_entry= row_build_index_entry_low(
+ old_row, old_ext, index, heap, ROW_BUILD_NORMAL);
+
+ old_entry->copy_field_types(*index);
+
+ success= row_log_online_op(index, old_entry, 0);
+
+ dtuple_t *new_entry= row_build_index_entry_low(
+ row, new_ext, index, heap, ROW_BUILD_NORMAL);
+
+ new_entry->copy_field_types(*index);
+
+ if (success)
+ success= row_log_online_op(index, new_entry, trx_id);
+ }
+ else
+ {
+ dtuple_t *old_entry= row_build_index_entry_low(
+ row, new_ext, index, heap, ROW_BUILD_NORMAL);
+
+ old_entry->copy_field_types(*index);
+
+ success= row_log_online_op(index, old_entry, 0);
+ }
+ }
+next_index:
+ index->lock.s_unlock();
+ if (!success)
+ {
+ row_log_mark_other_online_index_abort(index->table);
+ return;
+ }
+ index= dict_table_get_next_index(index);
+ }
+}
+
diff --git a/storage/innobase/row/row0merge.cc b/storage/innobase/row/row0merge.cc
index 80d51754d5c..70b51fbb812 100644
--- a/storage/innobase/row/row0merge.cc
+++ b/storage/innobase/row/row0merge.cc
@@ -1,7 +1,7 @@
/*****************************************************************************
Copyright (c) 2005, 2017, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2014, 2022, MariaDB Corporation.
+Copyright (c) 2014, 2023, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -51,6 +51,7 @@ Completed by Sunny Bains and Marko Makela
#endif /* BTR_CUR_ADAPT */
#include "ut0stage.h"
#include "fil0crypt.h"
+#include "srv0mon.h"
/* Ignore posix_fadvise() on those platforms where it does not exist */
#if defined _WIN32
@@ -129,7 +130,10 @@ public:
if (log_sys.check_flush_or_checkpoint()) {
if (mtr_started) {
- btr_pcur_move_to_prev_on_page(pcur);
+ if (!btr_pcur_move_to_prev_on_page(pcur)) {
+ error = DB_CORRUPTION;
+ break;
+ }
btr_pcur_store_position(pcur, scan_mtr);
scan_mtr->commit();
mtr_started = false;
@@ -141,20 +145,17 @@ public:
mtr.start();
index->set_modified(mtr);
- ins_cur.index = index;
+ ins_cur.page_cur.index = index;
rtr_init_rtr_info(&rtr_info, false, &ins_cur, index,
false);
rtr_info_update_btr(&ins_cur, &rtr_info);
- btr_cur_search_to_nth_level(index, 0, dtuple,
- PAGE_CUR_RTREE_INSERT,
- BTR_MODIFY_LEAF, &ins_cur,
- __FILE__, __LINE__,
- &mtr);
+ error = rtr_insert_leaf(&ins_cur, dtuple,
+ BTR_MODIFY_LEAF, &mtr);
/* It need to update MBR in parent entry,
so change search mode to BTR_MODIFY_TREE */
- if (rtr_info.mbr_adj) {
+ if (error == DB_SUCCESS && rtr_info.mbr_adj) {
mtr.commit();
rtr_clean_rtr_info(&rtr_info, true);
rtr_init_rtr_info(&rtr_info, false, &ins_cur,
@@ -162,19 +163,20 @@ public:
rtr_info_update_btr(&ins_cur, &rtr_info);
mtr.start();
index->set_modified(mtr);
- btr_cur_search_to_nth_level(
- index, 0, dtuple,
- PAGE_CUR_RTREE_INSERT,
- BTR_MODIFY_TREE, &ins_cur,
- __FILE__, __LINE__, &mtr);
+ error = rtr_insert_leaf(&ins_cur, dtuple,
+ BTR_MODIFY_TREE, &mtr);
+ }
+
+ if (error == DB_SUCCESS) {
+ error = btr_cur_optimistic_insert(
+ flag, &ins_cur, &ins_offsets,
+ &heap, dtuple, &rec, &big_rec,
+ 0, NULL, &mtr);
}
- error = btr_cur_optimistic_insert(
- flag, &ins_cur, &ins_offsets, &heap,
- dtuple, &rec, &big_rec, 0, NULL, &mtr);
+ ut_ad(!big_rec);
if (error == DB_FAIL) {
- ut_ad(!big_rec);
mtr.commit();
mtr.start();
index->set_modified(mtr);
@@ -184,18 +186,19 @@ public:
&ins_cur, index, false);
rtr_info_update_btr(&ins_cur, &rtr_info);
- btr_cur_search_to_nth_level(
- index, 0, dtuple,
- PAGE_CUR_RTREE_INSERT,
- BTR_MODIFY_TREE,
- &ins_cur, __FILE__, __LINE__, &mtr);
+ error = rtr_insert_leaf(&ins_cur, dtuple,
+ BTR_MODIFY_TREE, &mtr);
- error = btr_cur_pessimistic_insert(
+ if (error == DB_SUCCESS) {
+ error = btr_cur_pessimistic_insert(
flag, &ins_cur, &ins_offsets,
&heap, dtuple, &rec,
&big_rec, 0, NULL, &mtr);
+ }
}
+ ut_ad(!big_rec);
+
DBUG_EXECUTE_IF(
"row_merge_ins_spatial_fail",
error = DB_FAIL;
@@ -471,6 +474,7 @@ row_merge_buf_redundant_convert(
@param[in,out] v_heap heap memory to process data for virtual column
@param[in,out] my_table mysql table object
@param[in] trx transaction object
+@param[in] col_collate columns whose collations changed, or nullptr
@return number of rows added, 0 if out of space */
static
ulint
@@ -488,7 +492,8 @@ row_merge_buf_add(
dberr_t* err,
mem_heap_t** v_heap,
TABLE* my_table,
- trx_t* trx)
+ trx_t* trx,
+ const col_collations* col_collate)
{
ulint i;
const dict_index_t* index;
@@ -502,6 +507,7 @@ row_merge_buf_add(
doc_id_t write_doc_id;
ulint n_row_added = 0;
VCOL_STORAGE vcol_storage;
+
DBUG_ENTER("row_merge_buf_add");
if (buf->n_tuples >= buf->max_tuples) {
@@ -593,8 +599,17 @@ error:
row_field = dtuple_get_nth_field(row,
col->ind);
dfield_copy(field, row_field);
- }
+ /* Copy the column collation to the
+ tuple field */
+ if (col_collate) {
+ auto it = col_collate->find(col->ind);
+ if (it != col_collate->end()) {
+ field->type
+ .assign(*it->second);
+ }
+ }
+ }
/* Tokenize and process data for FTS */
if (!history_fts && (index->type & DICT_FTS)) {
@@ -648,7 +663,7 @@ error:
*doc_id % fts_sort_pll_degree);
/* Add doc item to fts_doc_list */
- mutex_enter(&psort_info[bucket].mutex);
+ mysql_mutex_lock(&psort_info[bucket].mutex);
if (psort_info[bucket].error == DB_SUCCESS) {
UT_LIST_ADD_LAST(
@@ -660,13 +675,14 @@ error:
ut_free(doc_item);
}
- mutex_exit(&psort_info[bucket].mutex);
+ mysql_mutex_unlock(&psort_info[bucket].mutex);
/* Sleep when memory used exceeds limit*/
while (psort_info[bucket].memory_used
> FTS_PENDING_DOC_MEMORY_LIMIT
&& trial_count++ < max_trial_count) {
- os_thread_sleep(1000);
+ std::this_thread::sleep_for(
+ std::chrono::milliseconds(1));
}
n_row_added = 1;
@@ -842,7 +858,7 @@ row_merge_dup_report(
row_merge_dup_t* dup, /*!< in/out: for reporting duplicates */
const dfield_t* entry) /*!< in: duplicate index entry */
{
- if (!dup->n_dup++) {
+ if (!dup->n_dup++ && dup->table) {
/* Only report the first duplicate record,
but count all duplicate records. */
innobase_fields_to_mysql(dup->table, dup->index, entry);
@@ -1068,11 +1084,11 @@ row_merge_read(
DBUG_LOG("ib_merge_sort", "fd=" << fd << " ofs=" << ofs);
DBUG_EXECUTE_IF("row_merge_read_failure", DBUG_RETURN(FALSE););
- const bool success = DB_SUCCESS == os_file_read_no_error_handling(
- IORequestRead, fd, buf, ofs, srv_sort_buf_size, 0);
+ const dberr_t err = os_file_read(
+ IORequestRead, fd, buf, ofs, srv_sort_buf_size, nullptr);
/* If encryption is enabled decrypt buffer */
- if (success && log_tmp_is_encrypted()) {
+ if (err == DB_SUCCESS && srv_encrypt_log) {
if (!log_tmp_block_decrypt(buf, srv_sort_buf_size,
crypt_buf, ofs)) {
DBUG_RETURN(false);
@@ -1087,11 +1103,7 @@ row_merge_read(
posix_fadvise(fd, ofs, srv_sort_buf_size, POSIX_FADV_DONTNEED);
#endif /* POSIX_FADV_DONTNEED */
- if (!success) {
- ib::error() << "Failed to read merge block at " << ofs;
- }
-
- DBUG_RETURN(success);
+ DBUG_RETURN(err == DB_SUCCESS);
}
/********************************************************************//**
@@ -1099,10 +1111,8 @@ Write a merge block to the file system.
@return whether the request was completed successfully
@retval false on error
@retval true on success */
-UNIV_INTERN
bool
row_merge_write(
-/*============*/
const pfs_os_file_t& fd, /*!< in: file descriptor */
ulint offset, /*!< in: offset where to write,
in number of row_merge_block_t elements */
@@ -1638,6 +1648,7 @@ stage->inc() will be called for each page read.
@param[in] eval_table mysql table used to evaluate virtual column
value, see innobase_get_computed_value().
@param[in] allow_not_null allow null to not-null conversion
+@param[in] col_collate columns whose collations changed, or nullptr
@return DB_SUCCESS or error */
static MY_ATTRIBUTE((warn_unused_result))
dberr_t
@@ -1665,7 +1676,8 @@ row_merge_read_clustered_index(
double pct_cost,
row_merge_block_t* crypt_block,
struct TABLE* eval_table,
- bool allow_not_null)
+ bool allow_not_null,
+ const col_collations* col_collate)
{
dict_index_t* clust_index; /* Clustered index */
mem_heap_t* row_heap = NULL;/* Heap memory to create
@@ -1685,10 +1697,8 @@ row_merge_read_clustered_index(
doc_id_t doc_id = 0;
doc_id_t max_doc_id = 0;
ibool add_doc_id = FALSE;
- os_event_t fts_parallel_sort_event = NULL;
- ibool fts_pll_sort = FALSE;
- int64_t sig_count = 0;
- spatial_index_info** sp_tuples = NULL;
+ pthread_cond_t* fts_parallel_sort_cond = nullptr;
+ spatial_index_info** sp_tuples = nullptr;
ulint num_spatial = 0;
BtrBulk* clust_btr_bulk = NULL;
bool clust_temp_file = false;
@@ -1728,7 +1738,7 @@ row_merge_read_clustered_index(
ut_malloc_nokey(n_index * sizeof *merge_buf));
row_merge_dup_t clust_dup = {index[0], table, col_map, 0};
- dfield_t* prev_fields;
+ dfield_t* prev_fields = nullptr;
const ulint n_uniq = dict_index_get_n_unique(index[0]);
ut_ad(trx->mysql_thd != NULL);
@@ -1762,10 +1772,9 @@ row_merge_read_clustered_index(
ut_ad(doc_id > 0);
}
- fts_pll_sort = TRUE;
row_fts_start_psort(psort_info);
- fts_parallel_sort_event =
- psort_info[0].psort_common->sort_event;
+ fts_parallel_sort_cond =
+ &psort_info[0].psort_common->sort_cond;
} else {
if (dict_index_is_spatial(index[i])) {
num_spatial++;
@@ -1811,17 +1820,60 @@ row_merge_read_clustered_index(
== (DATA_ROLL_PTR | DATA_NOT_NULL));
const ulint new_trx_id_col = col_map
? col_map[old_trx_id_col] : old_trx_id_col;
+ uint64_t n_rows = 0;
- btr_pcur_open_at_index_side(
- true, clust_index, BTR_SEARCH_LEAF, &pcur, true, 0, &mtr);
- mtr_started = true;
- btr_pcur_move_to_next_user_rec(&pcur, &mtr);
- if (rec_is_metadata(btr_pcur_get_rec(&pcur), *clust_index)) {
- ut_ad(btr_pcur_is_on_user_rec(&pcur));
- /* Skip the metadata pseudo-record. */
+ err = pcur.open_leaf(true, clust_index, BTR_SEARCH_LEAF, &mtr);
+ if (err != DB_SUCCESS) {
+err_exit:
+ trx->error_key_num = 0;
+ goto func_exit;
} else {
- ut_ad(!clust_index->is_instant());
- btr_pcur_move_to_prev_on_page(&pcur);
+ rec_t* rec = page_rec_get_next(btr_pcur_get_rec(&pcur));
+ if (!rec) {
+corrupted_metadata:
+ err = DB_CORRUPTION;
+ goto err_exit;
+ }
+ if (rec_get_info_bits(rec, page_rec_is_comp(rec))
+ & REC_INFO_MIN_REC_FLAG) {
+ if (!clust_index->is_instant()) {
+ goto corrupted_metadata;
+ }
+ if (page_rec_is_comp(rec)
+ && rec_get_status(rec) != REC_STATUS_INSTANT) {
+ goto corrupted_metadata;
+ }
+ /* Skip the metadata pseudo-record. */
+ btr_pcur_get_page_cur(&pcur)->rec = rec;
+ } else if (clust_index->is_instant()) {
+ goto corrupted_metadata;
+ }
+ }
+
+ /* Check if the table is supposed to be empty for our read view.
+
+ If we read bulk_trx_id as an older transaction ID, it is not
+ incorrect to check here whether that transaction should be
+ visible to us. If bulk_trx_id is not visible to us, the table
+ must have been empty at an earlier point of time, also in our
+ read view.
+
+ An INSERT would only update bulk_trx_id in
+ row_ins_clust_index_entry_low() if the table really was empty
+ (everything had been purged), when holding a leaf page latch
+ in the clustered index (actually, the root page is the only
+ leaf page in that case).
+
+ We are holding a clustered index leaf page latch here.
+ That will obviously prevent any concurrent INSERT from
+ updating bulk_trx_id while we read it. */
+ if (!online) {
+ } else if (trx_id_t bulk_trx_id = old_table->bulk_trx_id) {
+ ut_ad(trx->read_view.is_open());
+ ut_ad(bulk_trx_id != trx->id);
+ if (!trx->read_view.changes_visible(bulk_trx_id)) {
+ goto func_exit;
+ }
}
if (old_table != new_table) {
@@ -1870,21 +1922,17 @@ row_merge_read_clustered_index(
prev_fields = static_cast<dfield_t*>(
ut_malloc_nokey(n_uniq * sizeof *prev_fields));
mtuple_heap = mem_heap_create(sizeof(mrec_buf_t));
- } else {
- prev_fields = NULL;
}
mach_write_to_8(new_sys_trx_start, trx->id);
mach_write_to_8(new_sys_trx_end, TRX_ID_MAX);
- uint64_t n_rows = 0;
/* Scan the clustered index. */
for (;;) {
/* Do not continue if table pages are still encrypted */
if (!old_table->is_readable() || !new_table->is_readable()) {
err = DB_DECRYPTION_FAILED;
- trx->error_key_num = 0;
- goto func_exit;
+ goto err_exit;
}
const rec_t* rec;
@@ -1895,25 +1943,27 @@ row_merge_read_clustered_index(
page_cur_t* cur = btr_pcur_get_page_cur(&pcur);
bool history_row, history_fts = false;
- page_cur_move_to_next(cur);
-
stage->n_pk_recs_inc();
+ if (!page_cur_move_to_next(cur)) {
+corrupted_rec:
+ err = DB_CORRUPTION;
+ goto err_exit;
+ }
+
if (page_cur_is_after_last(cur)) {
stage->inc();
if (UNIV_UNLIKELY(trx_is_interrupted(trx))) {
err = DB_INTERRUPTED;
- trx->error_key_num = 0;
- goto func_exit;
+ goto err_exit;
}
if (online && old_table != new_table) {
err = row_log_table_get_error(clust_index);
if (err != DB_SUCCESS) {
- trx->error_key_num = 0;
- goto func_exit;
+ goto err_exit;
}
}
@@ -1932,7 +1982,7 @@ row_merge_read_clustered_index(
goto scan_next;
}
- if (clust_index->lock.waiters) {
+ if (clust_index->lock.is_waiting()) {
/* There are waiters on the clustered
index tree lock, likely the purge
thread. Store and restore the cursor
@@ -1942,20 +1992,23 @@ row_merge_read_clustered_index(
/* Store the cursor position on the last user
record on the page. */
- btr_pcur_move_to_prev_on_page(&pcur);
+ if (!btr_pcur_move_to_prev_on_page(&pcur)) {
+ goto corrupted_index;
+ }
/* Leaf pages must never be empty, unless
this is the only page in the index tree. */
- ut_ad(btr_pcur_is_on_user_rec(&pcur)
- || btr_pcur_get_block(
- &pcur)->page.id().page_no()
- == clust_index->page);
+ if (!btr_pcur_is_on_user_rec(&pcur)
+ && btr_pcur_get_block(&pcur)->page.id()
+ .page_no() != clust_index->page) {
+ goto corrupted_index;
+ }
btr_pcur_store_position(&pcur, &mtr);
mtr.commit();
mtr_started = false;
/* Give the waiters a chance to proceed. */
- os_thread_yield();
+ std::this_thread::yield();
scan_next:
ut_ad(!mtr_started);
ut_ad(!mtr.is_active());
@@ -1964,8 +2017,13 @@ scan_next:
/* Restore position on the record, or its
predecessor if the record was purged
meanwhile. */
- btr_pcur_restore_position(
- BTR_SEARCH_LEAF, &pcur, &mtr);
+ if (pcur.restore_position(BTR_SEARCH_LEAF,
+ &mtr)
+ == btr_pcur_t::CORRUPTED) {
+corrupted_index:
+ err = DB_CORRUPTION;
+ goto func_exit;
+ }
/* Move to the successor of the
original record. */
if (!btr_pcur_move_to_next_user_rec(
@@ -1988,16 +2046,24 @@ end_of_index:
goto end_of_index;
}
- buf_block_t* block = btr_block_get(
- *clust_index, next_page_no,
- RW_S_LATCH, false, &mtr);
+ buf_block_t* block = buf_page_get_gen(
+ page_id_t(old_table->space->id,
+ next_page_no),
+ old_table->space->zip_size(),
+ RW_S_LATCH, nullptr, BUF_GET, &mtr,
+ &err, false);
+ if (!block) {
+ goto err_exit;
+ }
- btr_leaf_page_release(page_cur_get_block(cur),
- BTR_SEARCH_LEAF, &mtr);
page_cur_set_before_first(block, cur);
- page_cur_move_to_next(cur);
+ if (!page_cur_move_to_next(cur)
+ || page_cur_is_after_last(cur)) {
+ goto corrupted_rec;
+ }
- ut_ad(!page_cur_is_after_last(cur));
+ const auto s = mtr.get_savepoint();
+ mtr.rollback_to_savepoint(s - 2, s - 1);
}
} else {
mem_heap_empty(row_heap);
@@ -2034,8 +2100,14 @@ end_of_index:
ut_ad(trx->read_view.is_open());
ut_ad(rec_trx_id != trx->id);
- if (!trx->read_view.changes_visible(
- rec_trx_id, old_table->name)) {
+ if (!trx->read_view.changes_visible(rec_trx_id)) {
+ if (rec_trx_id
+ >= trx->read_view.low_limit_id()
+ && rec_trx_id
+ >= trx_sys.get_max_trx_id()) {
+ goto corrupted_rec;
+ }
+
rec_t* old_vers;
row_vers_build_for_consistent_read(
@@ -2151,8 +2223,7 @@ end_of_index:
if (!allow_not_null) {
err = DB_INVALID_NULL;
- trx->error_key_num = 0;
- goto func_exit;
+ goto err_exit;
}
const dfield_t& default_field
@@ -2226,13 +2297,10 @@ end_of_index:
byte* b = static_cast<byte*>(dfield_get_data(dfield));
if (sequence.eof()) {
- err = DB_ERROR;
- trx->error_key_num = 0;
-
ib_errf(trx->mysql_thd, IB_LOG_LEVEL_ERROR,
ER_AUTOINC_READ_FAILED, "[NULL]");
-
- goto func_exit;
+ err = DB_ERROR;
+ goto err_exit;
}
ulonglong value = sequence++;
@@ -2324,7 +2392,8 @@ write_buffers:
buf, fts_index, old_table, new_table,
psort_info, row, ext, history_fts,
&doc_id, conv_heap, &err,
- &v_heap, eval_table, trx)))) {
+ &v_heap, eval_table, trx,
+ col_collate)))) {
/* If we are creating FTS index,
a single row can generate more
@@ -2445,8 +2514,10 @@ write_buffers:
we must reread it on the next
loop iteration. */
if (mtr_started) {
- btr_pcur_move_to_prev_on_page(
- &pcur);
+ if (!btr_pcur_move_to_prev_on_page(&pcur)) {
+ err = DB_CORRUPTION;
+ goto func_exit;
+ }
btr_pcur_store_position(
&pcur, &mtr);
@@ -2508,9 +2579,11 @@ write_buffers:
overflow). */
mtr.start();
mtr_started = true;
- btr_pcur_restore_position(
- BTR_SEARCH_LEAF, &pcur,
- &mtr);
+ if (pcur.restore_position(
+ BTR_SEARCH_LEAF, &mtr)
+ == btr_pcur_t::CORRUPTED) {
+ goto corrupted_index;
+ }
buf = row_merge_buf_empty(buf);
merge_buf[i] = buf;
/* Restart the outer loop on the
@@ -2541,22 +2614,21 @@ write_buffers:
from accessing this index, to ensure
read consistency. */
- trx_id_t max_trx_id;
-
ut_a(row == NULL);
- rw_lock_x_lock(
- dict_index_get_lock(buf->index));
- ut_a(dict_index_get_online_status(buf->index)
+
+ dict_index_t* index = buf->index;
+ index->lock.x_lock(SRW_LOCK_CALL);
+ ut_a(dict_index_get_online_status(index)
== ONLINE_INDEX_CREATION);
- max_trx_id = row_log_get_max_trx(buf->index);
+ trx_id_t max_trx_id = row_log_get_max_trx(
+ index);
- if (max_trx_id > buf->index->trx_id) {
- buf->index->trx_id = max_trx_id;
+ if (max_trx_id > index->trx_id) {
+ index->trx_id = max_trx_id;
}
- rw_lock_x_unlock(
- dict_index_get_lock(buf->index));
+ index->lock.x_unlock();
}
/* Secondary index and clustered index which is
@@ -2649,7 +2721,7 @@ write_buffers:
new_table, psort_info,
row, ext, history_fts, &doc_id,
conv_heap, &err, &v_heap,
- eval_table, trx)))) {
+ eval_table, trx, col_collate)))) {
/* An empty buffer should have enough
room for at least one record. */
ut_ad(err == DB_COMPUTE_VALUE_FAILED
@@ -2713,7 +2785,7 @@ all_done:
UT_DELETE(clust_btr_bulk);
}
- if (prev_fields != NULL) {
+ if (prev_fields) {
ut_free(prev_fields);
mem_heap_free(mtuple_heap);
}
@@ -2729,7 +2801,7 @@ all_done:
#ifdef FTS_INTERNAL_DIAG_PRINT
DEBUG_FTS_SORT_PRINT("FTS_SORT: Complete Scan Table\n");
#endif
- if (fts_pll_sort) {
+ if (UNIV_LIKELY_NULL(fts_parallel_sort_cond)) {
wait_again:
/* Check if error occurs in child thread */
for (ulint j = 0; j < fts_sort_pll_degree; j++) {
@@ -2750,14 +2822,15 @@ wait_again:
}
/* Now wait all children to report back to be completed */
- os_event_wait_time_low(fts_parallel_sort_event,
- 1000000, sig_count);
+ timespec abstime;
+ set_timespec(abstime, 1);
+ mysql_mutex_lock(&psort_info[0].mutex);
+ my_cond_timedwait(fts_parallel_sort_cond,
+ &psort_info[0].mutex.m_mutex, &abstime);
+ mysql_mutex_unlock(&psort_info[0].mutex);
for (ulint i = 0; i < fts_sort_pll_degree; i++) {
- if (psort_info[i].child_status != FTS_CHILD_COMPLETE
- && psort_info[i].child_status != FTS_CHILD_EXITING) {
- sig_count = os_event_reset(
- fts_parallel_sort_event);
+ if (!psort_info[i].child_status) {
goto wait_again;
}
}
@@ -2778,8 +2851,7 @@ wait_again:
row_fts_free_pll_merge_buf(psort_info);
ut_free(merge_buf);
-
- btr_pcur_close(&pcur);
+ ut_free(pcur.old_rec_buf);
if (sp_tuples != NULL) {
for (ulint i = 0; i < num_spatial; i++) {
@@ -2815,12 +2887,8 @@ wait_again:
}
if (vers_update_trt) {
- trx_mod_table_time_t& time =
- trx->mod_tables
- .insert(trx_mod_tables_t::value_type(
- const_cast<dict_table_t*>(new_table), 0))
- .first->second;
- time.set_versioned(0);
+ trx->mod_tables.emplace(new_table, 0)
+ .first->second.set_versioned(0);
}
trx->op_info = "";
@@ -3594,26 +3662,13 @@ row_merge_insert_index_tuples(
Any modifications after the
row_merge_read_clustered_index() scan
- will go through row_log_table_apply().
- Any modifications to off-page columns
- will be tracked by
- row_log_table_blob_alloc() and
- row_log_table_blob_free(). */
+ will go through row_log_table_apply(). */
row_merge_copy_blobs(
mrec, offsets, old_table->space->zip_size(),
dtuple, tuple_heap);
}
-#ifdef UNIV_DEBUG
- static const latch_level_t latches[] = {
- SYNC_INDEX_TREE, /* index->lock */
- SYNC_LEVEL_VARYING /* btr_bulk->m_page_bulks */
- };
-#endif /* UNIV_DEBUG */
-
ut_ad(dtuple_validate(dtuple));
- ut_ad(!sync_check_iterate(sync_allowed_latches(latches,
- latches + 2)));
error = btr_bulk->insert(dtuple);
if (error != DB_SUCCESS) {
@@ -3645,25 +3700,6 @@ err_exit:
}
/*********************************************************************//**
-Sets an exclusive lock on a table, for the duration of creating indexes.
-@return error code or DB_SUCCESS */
-dberr_t
-row_merge_lock_table(
-/*=================*/
- trx_t* trx, /*!< in/out: transaction */
- dict_table_t* table, /*!< in: table to lock */
- enum lock_mode mode) /*!< in: LOCK_X or LOCK_S */
-{
- ut_ad(!srv_read_only_mode);
- ut_ad(mode == LOCK_X || mode == LOCK_S);
-
- trx->op_info = "setting table lock for creating or dropping index";
- trx->ddl = true;
-
- return(lock_table_for_trx(table, trx, mode));
-}
-
-/*********************************************************************//**
Drop an index that was created before an error occurred.
The data dictionary must have been locked exclusively by the caller,
because the transaction will not be committed. */
@@ -3684,14 +3720,14 @@ row_merge_drop_index_dict(
pars_info_t* info;
ut_ad(!srv_read_only_mode);
- ut_ad(trx->dict_operation_lock_mode == RW_X_LATCH);
- ut_ad(trx_get_dict_operation(trx) == TRX_DICT_OP_INDEX);
- ut_d(dict_sys.assert_locked());
+ ut_ad(trx->dict_operation_lock_mode);
+ ut_ad(trx->dict_operation);
+ ut_ad(dict_sys.locked());
info = pars_info_create();
pars_info_add_ull_literal(info, "indexid", index_id);
trx->op_info = "dropping index from dictionary";
- error = que_eval_sql(info, sql, FALSE, trx);
+ error = que_eval_sql(info, sql, trx);
if (error != DB_SUCCESS) {
/* Even though we ensure that DDL transactions are WAIT
@@ -3710,6 +3746,7 @@ row_merge_drop_index_dict(
Drop indexes that were created before an error occurred.
The data dictionary must have been locked exclusively by the caller,
because the transaction will not be committed. */
+static
void
row_merge_drop_indexes_dict(
/*========================*/
@@ -3746,9 +3783,9 @@ row_merge_drop_indexes_dict(
pars_info_t* info;
ut_ad(!srv_read_only_mode);
- ut_ad(trx->dict_operation_lock_mode == RW_X_LATCH);
- ut_ad(trx_get_dict_operation(trx) == TRX_DICT_OP_INDEX);
- ut_d(dict_sys.assert_locked());
+ ut_ad(trx->dict_operation_lock_mode);
+ ut_ad(trx->dict_operation);
+ ut_ad(dict_sys.locked());
/* It is possible that table->n_ref_count > 1 when
locked=TRUE. In this case, all code that should have an open
@@ -3760,7 +3797,7 @@ row_merge_drop_indexes_dict(
info = pars_info_create();
pars_info_add_ull_literal(info, "tableid", table_id);
trx->op_info = "dropping indexes";
- error = que_eval_sql(info, sql, FALSE, trx);
+ error = que_eval_sql(info, sql, trx);
switch (error) {
case DB_SUCCESS:
@@ -3779,6 +3816,28 @@ row_merge_drop_indexes_dict(
trx->op_info = "";
}
+/** Drop common internal tables if all fulltext indexes are dropped
+@param trx transaction
+@param table user table */
+static void row_merge_drop_fulltext_indexes(trx_t *trx, dict_table_t *table)
+{
+ if (DICT_TF2_FLAG_IS_SET(table, DICT_TF2_FTS_HAS_DOC_ID) ||
+ !table->fts ||
+ !ib_vector_is_empty(table->fts->indexes))
+ return;
+
+ for (const dict_index_t *index= dict_table_get_first_index(table);
+ index; index= dict_table_get_next_index(index))
+ if (index->type & DICT_FTS)
+ return;
+
+ fts_optimize_remove_table(table);
+ fts_drop_tables(trx, *table);
+ table->fts->~fts_t();
+ table->fts= nullptr;
+ DICT_TF2_FLAG_UNSET(table, DICT_TF2_FTS);
+}
+
/** Drop indexes that were created before an error occurred.
The data dictionary must have been locked exclusively by the caller,
because the transaction will not be committed.
@@ -3798,9 +3857,9 @@ row_merge_drop_indexes(
dict_index_t* next_index;
ut_ad(!srv_read_only_mode);
- ut_ad(trx->dict_operation_lock_mode == RW_X_LATCH);
- ut_ad(trx_get_dict_operation(trx) == TRX_DICT_OP_INDEX);
- ut_d(dict_sys.assert_locked());
+ ut_ad(trx->dict_operation_lock_mode);
+ ut_ad(trx->dict_operation);
+ ut_ad(dict_sys.locked());
index = dict_table_get_first_index(table);
ut_ad(dict_index_is_clust(index));
@@ -3814,18 +3873,10 @@ row_merge_drop_indexes(
handle to the table be waiting for the next statement to execute,
or waiting for a meta-data lock.
- A concurrent purge will be prevented by dict_sys.latch. */
+ A concurrent purge will be prevented by MDL. */
if (!locked && (table->get_ref_count() > 1
|| table->has_lock_other_than(alter_trx))) {
- /* We will have to drop the indexes later, when the
- table is guaranteed to be no longer in use. Mark the
- indexes as incomplete and corrupted, so that other
- threads will stop using them. Let dict_table_close()
- or crash recovery or the next invocation of
- prepare_inplace_alter_table() take care of dropping
- the indexes. */
-
while ((index = dict_table_get_next_index(index)) != NULL) {
ut_ad(!dict_index_is_clust(index));
@@ -3869,8 +3920,7 @@ row_merge_drop_indexes(
table, index);
index = prev;
} else {
- rw_lock_x_lock(
- dict_index_get_lock(index));
+ index->lock.x_lock(SRW_LOCK_CALL);
dict_index_set_online_status(
index, ONLINE_INDEX_ABORTED);
index->type |= DICT_CORRUPT;
@@ -3879,14 +3929,14 @@ row_merge_drop_indexes(
}
continue;
case ONLINE_INDEX_CREATION:
- rw_lock_x_lock(dict_index_get_lock(index));
+ index->lock.x_lock(SRW_LOCK_CALL);
ut_ad(!index->is_committed());
row_log_abort_sec(index);
drop_aborted:
- rw_lock_x_unlock(dict_index_get_lock(index));
+ index->lock.x_unlock();
DEBUG_SYNC_C("merge_drop_index_after_abort");
- /* covered by dict_sys.mutex */
+ /* covered by dict_sys.latch */
MONITOR_INC(MONITOR_BACKGROUND_DROP_INDEX);
/* fall through */
case ONLINE_INDEX_ABORTED:
@@ -3895,17 +3945,17 @@ row_merge_drop_indexes(
the tablespace, but keep the object
in the data dictionary cache. */
row_merge_drop_index_dict(trx, index->id);
- rw_lock_x_lock(dict_index_get_lock(index));
+ index->lock.x_lock(SRW_LOCK_CALL);
dict_index_set_online_status(
index, ONLINE_INDEX_ABORTED_DROPPED);
- rw_lock_x_unlock(dict_index_get_lock(index));
+ index->lock.x_unlock();
table->drop_aborted = TRUE;
continue;
}
ut_error;
}
- fts_clear_all(table, trx);
+ row_merge_drop_fulltext_indexes(trx, table);
return;
}
@@ -3914,8 +3964,11 @@ row_merge_drop_indexes(
/* Invalidate all row_prebuilt_t::ins_graph that are referring
to this table. That is, force row_get_prebuilt_insert_row() to
rebuild prebuilt->ins_node->entry_list). */
- ut_ad(table->def_trx_id <= trx->id);
- table->def_trx_id = trx->id;
+ if (table->def_trx_id < trx->id) {
+ table->def_trx_id = trx->id;
+ } else {
+ ut_ad(table->def_trx_id == trx->id || table->name.part());
+ }
next_index = dict_table_get_next_index(index);
@@ -3950,7 +4003,7 @@ row_merge_drop_indexes(
break;
case ONLINE_INDEX_ABORTED:
case ONLINE_INDEX_ABORTED_DROPPED:
- /* covered by dict_sys.mutex */
+ /* covered by dict_sys.latch */
MONITOR_DEC(MONITOR_BACKGROUND_DROP_INDEX);
}
@@ -3958,22 +4011,92 @@ row_merge_drop_indexes(
}
}
- fts_clear_all(table, trx);
+ row_merge_drop_fulltext_indexes(trx, table);
table->drop_aborted = FALSE;
ut_d(dict_table_check_for_dup_indexes(table, CHECK_ALL_COMPLETE));
}
-/*********************************************************************//**
-Drop all partially created indexes during crash recovery. */
-void
-row_merge_drop_temp_indexes(void)
-/*=============================*/
+/** Drop fulltext indexes */
+static ibool row_merge_drop_fts(void *node, void *trx)
{
+ auto s= static_cast<sel_node_t*>(node);
+
+ const dfield_t *table_id= que_node_get_val(s->select_list);
+ ut_ad(table_id->type.mtype == DATA_BINARY);
+ node= que_node_get_next(s->select_list);
+ ut_ad(!que_node_get_next(node));
+ const dfield_t *index_id= que_node_get_val(node);
+ ut_ad(index_id->type.mtype == DATA_BINARY);
+
+ static const char sql[]=
+ "PROCEDURE DROP_TABLES_PROC () IS\n"
+ "tid CHAR;\n"
+ "iid CHAR;\n"
+
+ "DECLARE CURSOR cur_tab IS\n"
+ "SELECT ID FROM SYS_TABLES\n"
+ "WHERE INSTR(NAME,:name)+45=LENGTH(NAME)"
+ " AND INSTR('123456',SUBSTR(NAME,LENGTH(NAME)-1,1))>0"
+ " FOR UPDATE;\n"
+
+ "DECLARE CURSOR cur_idx IS\n"
+ "SELECT ID FROM SYS_INDEXES\n"
+ "WHERE TABLE_ID = tid FOR UPDATE;\n"
+
+ "BEGIN\n"
+ "OPEN cur_tab;\n"
+ "WHILE 1 = 1 LOOP\n"
+ " FETCH cur_tab INTO tid;\n"
+ " IF (SQL % NOTFOUND) THEN EXIT; END IF;\n"
+ " OPEN cur_idx;\n"
+ " WHILE 1 = 1 LOOP\n"
+ " FETCH cur_idx INTO iid;\n"
+ " IF (SQL % NOTFOUND) THEN EXIT; END IF;\n"
+ " DELETE FROM SYS_FIELDS WHERE INDEX_ID=iid;\n"
+ " DELETE FROM SYS_INDEXES WHERE CURRENT OF cur_idx;\n"
+ " END LOOP;\n"
+ " CLOSE cur_idx;\n"
+ " DELETE FROM SYS_COLUMNS WHERE TABLE_ID=tid;\n"
+ " DELETE FROM SYS_TABLES WHERE CURRENT OF cur_tab;\n"
+ "END LOOP;\n"
+ "CLOSE cur_tab;\n"
+ "END;\n";
+
+ if (table_id->len == 8 && index_id->len == 8)
+ {
+ char buf[sizeof "/FTS_0000000000000000_0000000000000000_INDEX_"];
+ snprintf(buf, sizeof buf, "/FTS_%016llx_%016llx_INDEX_",
+ static_cast<ulonglong>
+ (mach_read_from_8(static_cast<const byte*>(table_id->data))),
+ static_cast<ulonglong>
+ (mach_read_from_8(static_cast<const byte*>(index_id->data))));
+ auto pinfo= pars_info_create();
+ pars_info_add_str_literal(pinfo, "name", buf);
+ que_eval_sql(pinfo, sql, static_cast<trx_t*>(trx));
+ }
+
+ return true;
+}
+
+/** During recovery, drop recovered index stubs that were created in
+prepare_inplace_alter_table_dict(). */
+void row_merge_drop_temp_indexes()
+{
+ static_assert(DICT_FTS == 32, "compatibility");
+
static const char sql[] =
"PROCEDURE DROP_TEMP_INDEXES_PROC () IS\n"
"ixid CHAR;\n"
"found INT;\n"
+ "DECLARE FUNCTION drop_fts;\n"
+
+ "DECLARE CURSOR fts_cur IS\n"
+ " SELECT TABLE_ID,ID FROM SYS_INDEXES\n"
+ " WHERE TYPE=32"
+ " AND SUBSTR(NAME,0,1)='" TEMP_INDEX_PREFIX_STR "'\n"
+ " FOR UPDATE;\n"
+
"DECLARE CURSOR index_cur IS\n"
" SELECT ID FROM SYS_INDEXES\n"
" WHERE SUBSTR(NAME,0,1)='" TEMP_INDEX_PREFIX_STR "'\n"
@@ -3981,6 +4104,15 @@ row_merge_drop_temp_indexes(void)
"BEGIN\n"
"found := 1;\n"
+ "OPEN fts_cur;\n"
+ "WHILE found = 1 LOOP\n"
+ " FETCH fts_cur INTO drop_fts();\n"
+ " IF (SQL % NOTFOUND) THEN\n"
+ " found := 0;\n"
+ " END IF;\n"
+ "END LOOP;\n"
+ "CLOSE fts_cur;\n"
+
"OPEN index_cur;\n"
"WHILE found = 1 LOOP\n"
" FETCH index_cur INTO ixid;\n"
@@ -3993,31 +4125,36 @@ row_merge_drop_temp_indexes(void)
"END LOOP;\n"
"CLOSE index_cur;\n"
"END;\n";
- trx_t* trx;
- dberr_t error;
/* Load the table definitions that contain partially defined
indexes, so that the data dictionary information can be checked
when accessing the tablename.ibd files. */
- trx = trx_create();
+ trx_t* trx = trx_create();
+ trx_start_for_ddl(trx);
trx->op_info = "dropping partially created indexes";
+ dberr_t error = lock_sys_tables(trx);
+
row_mysql_lock_data_dictionary(trx);
/* Ensure that this transaction will be rolled back and locks
will be released, if the server gets killed before the commit
gets written to the redo log. */
- trx_set_dict_operation(trx, TRX_DICT_OP_INDEX);
+ trx->dict_operation = true;
trx->op_info = "dropping indexes";
- error = que_eval_sql(NULL, sql, FALSE, trx);
- if (error != DB_SUCCESS) {
+ pars_info_t* pinfo = pars_info_create();
+ pars_info_bind_function(pinfo, "drop_fts", row_merge_drop_fts, trx);
+ if (error == DB_SUCCESS) {
+ error = que_eval_sql(pinfo, sql, trx);
+ }
+
+ if (error) {
/* Even though we ensure that DDL transactions are WAIT
and DEADLOCK free, we could encounter other errors e.g.,
DB_TOO_MANY_CONCURRENT_TRXS. */
trx->error_state = DB_SUCCESS;
- ib::error() << "row_merge_drop_temp_indexes failed with error"
- << error;
+ ib::error() << "row_merge_drop_temp_indexes(): " << error;
}
trx_commit_for_mysql(trx);
@@ -4150,15 +4287,15 @@ row_merge_rename_index_to_add(
"WHERE TABLE_ID = :tableid AND ID = :indexid;\n"
"END;\n";
- ut_a(trx->dict_operation_lock_mode == RW_X_LATCH);
- ut_ad(trx_get_dict_operation(trx) == TRX_DICT_OP_INDEX);
+ ut_ad(trx->dict_operation_lock_mode);
+ ut_ad(trx->dict_operation);
trx->op_info = "renaming index to add";
pars_info_add_ull_literal(info, "tableid", table_id);
pars_info_add_ull_literal(info, "indexid", index_id);
- err = que_eval_sql(info, rename_index, FALSE, trx);
+ err = que_eval_sql(info, rename_index, trx);
if (err != DB_SUCCESS) {
/* Even though we ensure that DDL transactions are WAIT
@@ -4175,59 +4312,6 @@ row_merge_rename_index_to_add(
return(err);
}
-/*********************************************************************//**
-Rename an index in the dictionary that is to be dropped. The data
-dictionary must have been locked exclusively by the caller, because
-the transaction will not be committed.
-@return DB_SUCCESS if all OK */
-dberr_t
-row_merge_rename_index_to_drop(
-/*===========================*/
- trx_t* trx, /*!< in/out: transaction */
- table_id_t table_id, /*!< in: table identifier */
- index_id_t index_id) /*!< in: index identifier */
-{
- dberr_t err;
- pars_info_t* info = pars_info_create();
-
- ut_ad(!srv_read_only_mode);
-
- /* We use the private SQL parser of Innobase to generate the
- query graphs needed in renaming indexes. */
-
- static const char rename_index[] =
- "PROCEDURE RENAME_INDEX_PROC () IS\n"
- "BEGIN\n"
- "UPDATE SYS_INDEXES SET NAME=CONCAT('"
- TEMP_INDEX_PREFIX_STR "',NAME)\n"
- "WHERE TABLE_ID = :tableid AND ID = :indexid;\n"
- "END;\n";
-
- ut_a(trx->dict_operation_lock_mode == RW_X_LATCH);
- ut_ad(trx_get_dict_operation(trx) == TRX_DICT_OP_INDEX);
-
- trx->op_info = "renaming index to drop";
-
- pars_info_add_ull_literal(info, "tableid", table_id);
- pars_info_add_ull_literal(info, "indexid", index_id);
-
- err = que_eval_sql(info, rename_index, FALSE, trx);
-
- if (err != DB_SUCCESS) {
- /* Even though we ensure that DDL transactions are WAIT
- and DEADLOCK free, we could encounter other errors e.g.,
- DB_TOO_MANY_CONCURRENT_TRXS. */
- trx->error_state = DB_SUCCESS;
-
- ib::error() << "row_merge_rename_index_to_drop failed with"
- " error " << err;
- }
-
- trx->op_info = "";
-
- return(err);
-}
-
/** Create the index and load in to the dictionary.
@param[in,out] table the index is on this table
@param[in] index_def the index definition
@@ -4305,30 +4389,7 @@ row_merge_is_index_usable(
&& (index->table->is_temporary() || index->table->no_rollback()
|| index->trx_id == 0
|| !trx->read_view.is_open()
- || trx->read_view.changes_visible(
- index->trx_id,
- index->table->name)));
-}
-
-/*********************************************************************//**
-Drop a table. The caller must have ensured that the background stats
-thread is not processing the table. This can be done by calling
-dict_stats_wait_bg_to_stop_using_table() after locking the dictionary and
-before calling this function.
-@return DB_SUCCESS or error code */
-dberr_t
-row_merge_drop_table(
-/*=================*/
- trx_t* trx, /*!< in: transaction */
- dict_table_t* table) /*!< in: table to drop */
-{
- ut_ad(!srv_read_only_mode);
-
- /* There must be no open transactions on the table. */
- ut_a(table->get_ref_count() == 0);
-
- return(row_drop_table_for_mysql(table->name.m_name,
- trx, SQLCOM_DROP_TABLE, false, false));
+ || trx->read_view.changes_visible(index->trx_id)));
}
/** Build indexes on a table by reading a clustered index, creating a temporary
@@ -4359,6 +4420,7 @@ this function and it will be passed to other functions for further accounting.
@param[in] eval_table mysql table used to evaluate virtual column
value, see innobase_get_computed_value().
@param[in] allow_not_null allow the conversion from null to not-null
+@param[in] col_collate columns whose collations changed, or nullptr
@return DB_SUCCESS or error code */
dberr_t
row_merge_build_indexes(
@@ -4378,7 +4440,8 @@ row_merge_build_indexes(
ut_stage_alter_t* stage,
const dict_add_v_col_t* add_v,
struct TABLE* eval_table,
- bool allow_not_null)
+ bool allow_not_null,
+ const col_collations* col_collate)
{
merge_file_t* merge_files;
row_merge_block_t* block;
@@ -4528,7 +4591,8 @@ row_merge_build_indexes(
fts_sort_idx, psort_info, merge_files, key_numbers,
n_indexes, defaults, add_v, col_map, add_autoinc,
sequence, block, skip_pk_sort, &tmpfd, stage,
- pct_cost, crypt_block, eval_table, allow_not_null);
+ pct_cost, crypt_block, eval_table, allow_not_null,
+ col_collate);
stage->end_phase_read_pk();
@@ -4768,12 +4832,10 @@ func_exit:
case ONLINE_INDEX_COMPLETE:
break;
case ONLINE_INDEX_CREATION:
- rw_lock_x_lock(
- dict_index_get_lock(indexes[i]));
+ indexes[i]->lock.x_lock(SRW_LOCK_CALL);
row_log_abort_sec(indexes[i]);
indexes[i]->type |= DICT_CORRUPT;
- rw_lock_x_unlock(
- dict_index_get_lock(indexes[i]));
+ indexes[i]->lock.x_unlock();
new_table->drop_aborted = TRUE;
/* fall through */
case ONLINE_INDEX_ABORTED_DROPPED:
@@ -4782,6 +4844,13 @@ func_exit:
MONITOR_BACKGROUND_DROP_INDEX);
}
}
+
+ dict_index_t *clust_index= new_table->indexes.start;
+ clust_index->lock.x_lock(SRW_LOCK_CALL);
+ ut_ad(!clust_index->online_log ||
+ clust_index->online_log_is_dummy());
+ clust_index->online_log= nullptr;
+ clust_index->lock.x_unlock();
}
DBUG_EXECUTE_IF("ib_index_crash_after_bulk_load", DBUG_SUICIDE(););
diff --git a/storage/innobase/row/row0mysql.cc b/storage/innobase/row/row0mysql.cc
index 514d4b3ecd9..67167f19c70 100644
--- a/storage/innobase/row/row0mysql.cc
+++ b/storage/innobase/row/row0mysql.cc
@@ -36,11 +36,8 @@ Created 9/17/2000 Heikki Tuuri
#include "dict0crea.h"
#include "dict0dict.h"
#include "dict0load.h"
-#include "dict0priv.h"
#include "dict0stats.h"
#include "dict0stats_bg.h"
-#include "dict0defrag_bg.h"
-#include "btr0defragment.h"
#include "fil0fil.h"
#include "fil0crypt.h"
#include "fsp0file.h"
@@ -61,53 +58,15 @@ Created 9/17/2000 Heikki Tuuri
#include "trx0rec.h"
#include "trx0roll.h"
#include "trx0undo.h"
+#include "srv0mon.h"
#include "srv0start.h"
-#include "row0ext.h"
-#include "srv0start.h"
+#include "log.h"
#include <algorithm>
-#include <deque>
#include <vector>
+#include <thread>
-/** Provide optional 4.x backwards compatibility for 5.0 and above */
-ibool row_rollback_on_timeout = FALSE;
-
-/** Chain node of the list of tables to drop in the background. */
-struct row_mysql_drop_t{
- table_id_t table_id; /*!< table id */
- UT_LIST_NODE_T(row_mysql_drop_t)row_mysql_drop_list;
- /*!< list chain node */
-};
-
-/** @brief List of tables we should drop in background.
-
-ALTER TABLE in MySQL requires that the table handler can drop the
-table in background when there are no queries to it any
-more. Protected by row_drop_list_mutex. */
-static UT_LIST_BASE_NODE_T(row_mysql_drop_t) row_mysql_drop_list;
-
-/** Mutex protecting the background table drop list. */
-static ib_mutex_t row_drop_list_mutex;
-
-/** Flag: has row_mysql_drop_list been initialized? */
-static bool row_mysql_drop_list_inited;
-
-#ifdef UNIV_DEBUG
-/** Wait for the background drop list to become empty. */
-void
-row_wait_for_background_drop_list_empty()
-{
- bool empty = false;
- while (!empty) {
- mutex_enter(&row_drop_list_mutex);
- empty = (UT_LIST_GET_LEN(row_mysql_drop_list) == 0);
- mutex_exit(&row_drop_list_mutex);
- os_thread_sleep(100000);
- }
-}
-#endif /* UNIV_DEBUG */
-
/*******************************************************************//**
Delays an INSERT, DELETE or UPDATE operation if the purge is lagging. */
static
@@ -116,7 +75,8 @@ row_mysql_delay_if_needed(void)
/*===========================*/
{
if (srv_dml_needed_delay) {
- os_thread_sleep(srv_dml_needed_delay);
+ std::this_thread::sleep_for(
+ std::chrono::microseconds(srv_dml_needed_delay));
}
}
@@ -665,19 +625,20 @@ row_mysql_handle_errors(
DBUG_ENTER("row_mysql_handle_errors");
DEBUG_SYNC_C("row_mysql_handle_errors");
-handle_new_error:
err = trx->error_state;
+handle_new_error:
ut_a(err != DB_SUCCESS);
trx->error_state = DB_SUCCESS;
- DBUG_LOG("trx", "handle error: " << ut_strerr(err)
+ DBUG_LOG("trx", "handle error: " << err
<< ";id=" << ib::hex(trx->id) << ", " << trx);
switch (err) {
case DB_LOCK_WAIT_TIMEOUT:
- if (row_rollback_on_timeout) {
+ extern my_bool innobase_rollback_on_timeout;
+ if (innobase_rollback_on_timeout) {
goto rollback;
}
/* fall through */
@@ -707,14 +668,18 @@ handle_new_error:
trx->rollback(savept);
}
- /* MySQL will roll back the latest SQL statement */
+ if (!trx->bulk_insert) {
+ /* MariaDB will roll back the latest SQL statement */
+ break;
+ }
+ /* MariaDB will roll back the entire transaction. */
+ trx->bulk_insert = false;
+ trx->last_sql_stat_start.least_undo_no = 0;
+ trx->savepoints_discard();
break;
case DB_LOCK_WAIT:
- lock_wait_suspend_thread(thr);
-
- if (trx->error_state != DB_SUCCESS) {
- que_thr_stop_for_mysql(thr);
-
+ err = lock_wait(thr);
+ if (err != DB_SUCCESS) {
goto handle_new_error;
}
@@ -731,12 +696,8 @@ handle_new_error:
trx->rollback();
break;
- case DB_MUST_GET_MORE_FILE_SPACE:
- ib::fatal() << "The database cannot continue operation because"
- " of lack of space. You must add a new data file"
- " to my.cnf and restart the database.";
- break;
-
+ case DB_IO_ERROR:
+ case DB_TABLE_CORRUPT:
case DB_CORRUPTION:
case DB_PAGE_CORRUPTED:
ib::error() << "We detected index corruption in an InnoDB type"
@@ -763,14 +724,13 @@ handle_new_error:
ib::fatal() << "Unknown error " << err;
}
- if (trx->error_state != DB_SUCCESS) {
- *new_err = trx->error_state;
+ if (dberr_t n_err = trx->error_state) {
+ trx->error_state = DB_SUCCESS;
+ *new_err = n_err;
} else {
*new_err = err;
}
- trx->error_state = DB_SUCCESS;
-
DBUG_RETURN(false);
}
@@ -858,6 +818,10 @@ row_create_prebuilt(
DBUG_EXECUTE_IF("innodb_srch_key_buffer_max_value",
ut_a(temp_index->n_user_defined_cols
== MAX_REF_PARTS););
+ if (temp_index->is_corrupted()) {
+ continue;
+ }
+
uint temp_len = 0;
for (uint i = 0; i < temp_index->n_uniq; i++) {
ulint type = temp_index->fields[i].col->mtype;
@@ -942,13 +906,8 @@ row_create_prebuilt(
DBUG_RETURN(prebuilt);
}
-/********************************************************************//**
-Free a prebuilt struct for a MySQL table handle. */
-void
-row_prebuilt_free(
-/*==============*/
- row_prebuilt_t* prebuilt, /*!< in, own: prebuilt struct */
- ibool dict_locked) /*!< in: TRUE=data dictionary locked */
+/** Free a prebuilt struct for a TABLE handle. */
+void row_prebuilt_free(row_prebuilt_t *prebuilt)
{
DBUG_ENTER("row_prebuilt_free");
@@ -1008,7 +967,7 @@ row_prebuilt_free(
rtr_clean_rtr_info(prebuilt->rtr_info, true);
}
if (prebuilt->table) {
- dict_table_close(prebuilt->table, dict_locked, FALSE);
+ dict_table_close(prebuilt->table);
}
mem_heap_free(prebuilt->heap);
@@ -1069,7 +1028,6 @@ row_get_prebuilt_insert_row(
if (prebuilt->trx_id == table->def_trx_id
&& prebuilt->ins_node->entry_list.size()
== UT_LIST_GET_LEN(table->indexes)) {
-
return(prebuilt->ins_node->row);
}
@@ -1107,12 +1065,12 @@ row_get_prebuilt_insert_row(
dict_table_copy_types(row, table);
ins_node_set_new_row(node, row);
+ que_thr_t* fork = pars_complete_graph_for_exec(
+ node, prebuilt->trx, prebuilt->heap, prebuilt);
+ fork->state = QUE_THR_RUNNING;
prebuilt->ins_graph = static_cast<que_fork_t*>(
- que_node_get_parent(
- pars_complete_graph_for_exec(
- node,
- prebuilt->trx, prebuilt->heap, prebuilt)));
+ que_node_get_parent(fork));
prebuilt->ins_graph->state = QUE_FORK_ACTIVE;
@@ -1139,11 +1097,10 @@ row_lock_table_autoinc_for_mysql(
const dict_table_t* table = prebuilt->table;
que_thr_t* thr;
dberr_t err;
- ibool was_lock_wait;
/* If we already hold an AUTOINC lock on the table then do nothing.
Note: We peek at the value of the current owner without acquiring
- the lock mutex. */
+ lock_sys.latch. */
if (trx == table->autoinc_trx) {
return(DB_SUCCESS);
@@ -1159,36 +1116,20 @@ row_lock_table_autoinc_for_mysql(
thr = que_fork_get_first_thr(prebuilt->ins_graph);
- thr->start_running();
-
-run_again:
- thr->run_node = node;
- thr->prev_node = node;
-
- /* It may be that the current session has not yet started
- its transaction, or it has been committed: */
-
- trx_start_if_not_started_xa(trx, true);
-
- err = lock_table(0, prebuilt->table, LOCK_AUTO_INC, thr);
-
- trx->error_state = err;
-
- if (err != DB_SUCCESS) {
- que_thr_stop_for_mysql(thr);
+ do {
+ thr->run_node = node;
+ thr->prev_node = node;
- was_lock_wait = row_mysql_handle_errors(&err, trx, thr, NULL);
+ /* It may be that the current session has not yet started
+ its transaction, or it has been committed: */
- if (was_lock_wait) {
- goto run_again;
- }
+ trx_start_if_not_started_xa(trx, true);
- trx->op_info = "";
+ err = lock_table(prebuilt->table, NULL, LOCK_AUTO_INC, thr);
- return(err);
- }
-
- thr->stop_no_error();
+ trx->error_state = err;
+ } while (err != DB_SUCCESS
+ && row_mysql_handle_errors(&err, trx, thr, NULL));
trx->op_info = "";
@@ -1204,7 +1145,6 @@ row_lock_table(row_prebuilt_t* prebuilt)
trx_t* trx = prebuilt->trx;
que_thr_t* thr;
dberr_t err;
- ibool was_lock_wait;
trx->op_info = "setting table lock";
@@ -1218,39 +1158,20 @@ row_lock_table(row_prebuilt_t* prebuilt)
thr = que_fork_get_first_thr(prebuilt->sel_graph);
- thr->start_running();
-
-run_again:
- thr->run_node = thr;
- thr->prev_node = thr->common.parent;
-
- /* It may be that the current session has not yet started
- its transaction, or it has been committed: */
-
- trx_start_if_not_started_xa(trx, false);
+ do {
+ thr->run_node = thr;
+ thr->prev_node = thr->common.parent;
- err = lock_table(0, prebuilt->table,
- static_cast<enum lock_mode>(
- prebuilt->select_lock_type),
- thr);
+ /* It may be that the current session has not yet started
+ its transaction, or it has been committed: */
- trx->error_state = err;
+ trx_start_if_not_started_xa(trx, false);
- if (err != DB_SUCCESS) {
- que_thr_stop_for_mysql(thr);
-
- was_lock_wait = row_mysql_handle_errors(&err, trx, thr, NULL);
-
- if (was_lock_wait) {
- goto run_again;
- }
-
- trx->op_info = "";
-
- return(err);
- }
-
- thr->stop_no_error();
+ err = lock_table(prebuilt->table, NULL, static_cast<lock_mode>(
+ prebuilt->select_lock_type), thr);
+ trx->error_state = err;
+ } while (err != DB_SUCCESS
+ && row_mysql_handle_errors(&err, trx, thr, NULL));
trx->op_info = "";
@@ -1279,10 +1200,10 @@ row_mysql_get_table_status(
// to decrypt
if (push_warning) {
ib_push_warning(trx, DB_DECRYPTION_FAILED,
- "Table %s in tablespace %lu encrypted."
+ "Table %s is encrypted."
"However key management plugin or used key_id is not found or"
" used encryption algorithm or method does not match.",
- table->name.m_name, table->space);
+ table->name.m_name);
}
err = DB_DECRYPTION_FAILED;
@@ -1330,30 +1251,19 @@ row_insert_for_mysql(
ut_a(prebuilt->magic_n == ROW_PREBUILT_ALLOCATED);
ut_a(prebuilt->magic_n2 == ROW_PREBUILT_ALLOCATED);
- if (!prebuilt->table->space) {
-
- ib::error() << "The table " << prebuilt->table->name
+ if (!table->space) {
+ ib::error() << "The table " << table->name
<< " doesn't have a corresponding tablespace, it was"
" discarded.";
return(DB_TABLESPACE_DELETED);
-
- } else if (!prebuilt->table->is_readable()) {
- return(row_mysql_get_table_status(prebuilt->table, trx, true));
+ } else if (!table->is_readable()) {
+ return row_mysql_get_table_status(table, trx, true);
} else if (high_level_read_only) {
return(DB_READ_ONLY);
- }
-
- DBUG_EXECUTE_IF("mark_table_corrupted", {
- /* Mark the table corrupted for the clustered index */
- dict_index_t* index = dict_table_get_first_index(table);
- ut_ad(dict_index_is_clust(index));
- dict_set_corrupted(index, trx, "INSERT TABLE"); });
-
- if (dict_table_is_corrupted(table)) {
-
- ib::error() << "Table " << table->name << " is corrupt.";
- return(DB_TABLE_CORRUPT);
+ } else if (UNIV_UNLIKELY(table->corrupted)
+ || dict_table_get_first_index(table)->is_corrupted()) {
+ return DB_TABLE_CORRUPT;
}
trx->op_info = "inserting";
@@ -1374,7 +1284,12 @@ row_insert_for_mysql(
node->vers_update_end(prebuilt, ins_mode == ROW_INS_HISTORICAL);
}
- savept = trx_savept_take(trx);
+ /* Because we now allow multiple INSERT into the same
+ initially empty table in bulk insert mode, on error we must
+ roll back to the start of the transaction. For correctness, it
+ would suffice to roll back to the start of the first insert
+ into this empty table, but we will keep it simple and efficient. */
+ savept.least_undo_no = trx->bulk_insert ? 0 : trx->undo_no;
thr = que_fork_get_first_thr(prebuilt->ins_graph);
@@ -1383,10 +1298,9 @@ row_insert_for_mysql(
prebuilt->sql_stat_start = FALSE;
} else {
node->state = INS_NODE_ALLOC_ROW_ID;
+ node->trx_id = trx->id;
}
- thr->start_running();
-
run_again:
thr->run_node = node;
thr->prev_node = node;
@@ -1399,8 +1313,6 @@ run_again:
if (err != DB_SUCCESS) {
error_exit:
- que_thr_stop_for_mysql(thr);
-
/* FIXME: What's this ? */
thr->lock_state = QUE_THR_LOCK_ROW;
@@ -1411,7 +1323,8 @@ error_exit:
if (was_lock_wait) {
ut_ad(node->state == INS_NODE_INSERT_ENTRIES
- || node->state == INS_NODE_ALLOC_ROW_ID);
+ || node->state == INS_NODE_ALLOC_ROW_ID
+ || node->state == INS_NODE_SET_IX_LOCK);
goto run_again;
}
@@ -1473,15 +1386,14 @@ error_exit:
}
}
- thr->stop_no_error();
-
if (table->is_system_db) {
srv_stats.n_system_rows_inserted.inc(size_t(trx->id));
} else {
srv_stats.n_rows_inserted.inc(size_t(trx->id));
}
- /* Not protected by dict_sys.mutex for performance
+ /* Not protected by dict_sys.latch or table->stats_mutex_lock()
+ for performance
reasons, we would rather get garbage in stat_n_rows (which is
just an estimate anyway) than protecting the following code
with a latch. */
@@ -1518,12 +1430,12 @@ row_prebuild_sel_graph(
node = sel_node_create(prebuilt->heap);
+ que_thr_t* fork = pars_complete_graph_for_exec(
+ node, prebuilt->trx, prebuilt->heap, prebuilt);
+ fork->state = QUE_THR_RUNNING;
+
prebuilt->sel_graph = static_cast<que_fork_t*>(
- que_node_get_parent(
- pars_complete_graph_for_exec(
- static_cast<sel_node_t*>(node),
- prebuilt->trx, prebuilt->heap,
- prebuilt)));
+ que_node_get_parent(fork));
prebuilt->sel_graph->state = QUE_FORK_ACTIVE;
}
@@ -1547,11 +1459,8 @@ row_create_update_node_for_mysql(
node->in_mysql_interface = true;
node->is_delete = NO_DELETE;
- node->searched_update = FALSE;
- node->select = NULL;
- node->pcur = btr_pcur_create_for_mysql();
-
- DBUG_PRINT("info", ("node: %p, pcur: %p", node, node->pcur));
+ node->pcur = new (mem_heap_alloc(heap, sizeof(btr_pcur_t)))
+ btr_pcur_t();
node->table = table;
@@ -1563,10 +1472,6 @@ row_create_update_node_for_mysql(
UT_LIST_INIT(node->columns, &sym_node_t::col_var_list);
node->has_clust_rec_x_lock = TRUE;
- node->cmpl_info = 0;
-
- node->table_sym = NULL;
- node->col_assign_list = NULL;
DBUG_RETURN(node);
}
@@ -1667,33 +1572,24 @@ init_fts_doc_id_for_ref(
dict_table_t* table, /*!< in: table */
ulint* depth) /*!< in: recusive call depth */
{
- dict_foreign_t* foreign;
-
table->fk_max_recusive_level = 0;
- (*depth)++;
-
/* Limit on tables involved in cascading delete/update */
- if (*depth > FK_MAX_CASCADE_DEL) {
+ if (++*depth > FK_MAX_CASCADE_DEL) {
return;
}
/* Loop through this table's referenced list and also
recursively traverse each table's foreign table list */
- for (dict_foreign_set::iterator it = table->referenced_set.begin();
- it != table->referenced_set.end();
- ++it) {
-
- foreign = *it;
-
- ut_ad(foreign->foreign_table != NULL);
+ for (dict_foreign_t* foreign : table->referenced_set) {
+ ut_ad(foreign->foreign_table);
- if (foreign->foreign_table->fts != NULL) {
+ if (foreign->foreign_table->fts) {
fts_init_doc_id(foreign->foreign_table);
}
- if (!foreign->foreign_table->referenced_set.empty()
- && foreign->foreign_table != table) {
+ if (foreign->foreign_table != table
+ && !foreign->foreign_table->referenced_set.empty()) {
init_fts_doc_id_for_ref(
foreign->foreign_table, depth);
}
@@ -1714,7 +1610,6 @@ row_update_for_mysql(row_prebuilt_t* prebuilt)
dict_table_t* table = prebuilt->table;
trx_t* trx = prebuilt->trx;
ulint fk_depth = 0;
- bool got_s_lock = false;
DBUG_ENTER("row_update_for_mysql");
@@ -1744,18 +1639,6 @@ row_update_for_mysql(row_prebuilt_t* prebuilt)
trx_start_if_not_started_xa(trx, true);
}
- if (dict_table_is_referenced_by_foreign_key(table)) {
- /* Share lock the data dictionary to prevent any
- table dictionary (for foreign constraint) change.
- This is similar to row_ins_check_foreign_constraint
- check protect by the dictionary lock as well.
- In the future, this can be removed once the Foreign
- key MDL is implemented */
- row_mysql_freeze_data_dictionary(trx);
- init_fts_doc_id_for_ref(table, &fk_depth);
- row_mysql_unfreeze_data_dictionary(trx);
- }
-
node = prebuilt->upd_node;
const bool is_delete = node->is_delete == PLAIN_DELETE;
ut_ad(node->table == table);
@@ -1763,8 +1646,7 @@ row_update_for_mysql(row_prebuilt_t* prebuilt)
clust_index = dict_table_get_first_index(table);
btr_pcur_copy_stored_position(node->pcur,
- prebuilt->pcur->btr_cur.index
- == clust_index
+ prebuilt->pcur->index() == clust_index
? prebuilt->pcur
: prebuilt->clust_pcur);
@@ -1777,7 +1659,7 @@ row_update_for_mysql(row_prebuilt_t* prebuilt)
generated for the table: MySQL does not know anything about
the row id used as the clustered index key */
- savept = trx_savept_take(trx);
+ savept.least_undo_no = trx->undo_no;
thr = que_fork_get_first_thr(prebuilt->upd_graph);
@@ -1785,8 +1667,6 @@ row_update_for_mysql(row_prebuilt_t* prebuilt)
ut_ad(!prebuilt->sql_stat_start);
- thr->start_running();
-
ut_ad(!prebuilt->versioned_write || node->table->versioned());
if (prebuilt->versioned_write) {
@@ -1810,8 +1690,6 @@ row_update_for_mysql(row_prebuilt_t* prebuilt)
break;
}
- que_thr_stop_for_mysql(thr);
-
if (err == DB_RECORD_NOT_FOUND) {
trx->error_state = DB_SUCCESS;
goto error;
@@ -1830,8 +1708,6 @@ row_update_for_mysql(row_prebuilt_t* prebuilt)
}
}
- thr->stop_no_error();
-
if (dict_table_has_fts_index(table)
&& trx->fts_next_doc_id != UINT64_UNDEFINED) {
err = row_fts_update_or_delete(prebuilt);
@@ -1842,15 +1718,12 @@ row_update_for_mysql(row_prebuilt_t* prebuilt)
}
/* Completed cascading operations (if any) */
- if (got_s_lock) {
- row_mysql_unfreeze_data_dictionary(trx);
- }
-
bool update_statistics;
ut_ad(is_delete == (node->is_delete == PLAIN_DELETE));
if (is_delete) {
- /* Not protected by dict_sys.mutex for performance
+ /* Not protected by dict_sys.latch
+ or prebuilt->table->stats_mutex_lock() for performance
reasons, we would rather get garbage in stat_n_rows (which is
just an estimate anyway) than protecting the following code
with a latch. */
@@ -1881,22 +1754,14 @@ row_update_for_mysql(row_prebuilt_t* prebuilt)
prebuilt->table->stat_modified_counter++;
}
- trx->op_info = "";
-
- DBUG_RETURN(err);
-
error:
trx->op_info = "";
- if (got_s_lock) {
- row_mysql_unfreeze_data_dictionary(trx);
- }
-
DBUG_RETURN(err);
}
/** This can only be used when the current transaction is at
READ COMMITTED or READ UNCOMMITTED isolation level.
-Before calling this function row_search_for_mysql() must have
+Before calling this function row_search_mvcc() must have
initialized prebuilt->new_rec_locks to store the information which new
record locks really were set. This function removes a newly set
clustered index record lock under prebuilt->pcur or
@@ -1912,56 +1777,29 @@ row_unlock_for_mysql(
row_prebuilt_t* prebuilt,
ibool has_latches_on_recs)
{
- btr_pcur_t* pcur = prebuilt->pcur;
- btr_pcur_t* clust_pcur = prebuilt->clust_pcur;
- trx_t* trx = prebuilt->trx;
-
- ut_ad(prebuilt != NULL);
- ut_ad(trx != NULL);
- ut_ad(trx->isolation_level <= TRX_ISO_READ_COMMITTED);
-
- if (dict_index_is_spatial(prebuilt->index)) {
- return;
- }
-
- trx->op_info = "unlock_row";
-
- if (prebuilt->new_rec_locks >= 1) {
+ if (prebuilt->new_rec_locks == 1 && prebuilt->index->is_clust()) {
+ trx_t* trx = prebuilt->trx;
+ ut_ad(trx->isolation_level <= TRX_ISO_READ_COMMITTED);
+ trx->op_info = "unlock_row";
const rec_t* rec;
dict_index_t* index;
trx_id_t rec_trx_id;
mtr_t mtr;
+ btr_pcur_t* pcur = prebuilt->pcur;
mtr_start(&mtr);
/* Restore the cursor position and find the record */
- if (!has_latches_on_recs) {
- btr_pcur_restore_position(BTR_SEARCH_LEAF, pcur, &mtr);
+ if (!has_latches_on_recs
+ && pcur->restore_position(BTR_SEARCH_LEAF, &mtr)
+ != btr_pcur_t::SAME_ALL) {
+ goto no_unlock;
}
rec = btr_pcur_get_rec(pcur);
- index = btr_pcur_get_btr_cur(pcur)->index;
-
- if (prebuilt->new_rec_locks >= 2) {
- /* Restore the cursor position and find the record
- in the clustered index. */
-
- if (!has_latches_on_recs) {
- btr_pcur_restore_position(BTR_SEARCH_LEAF,
- clust_pcur, &mtr);
- }
-
- rec = btr_pcur_get_rec(clust_pcur);
- index = btr_pcur_get_btr_cur(clust_pcur)->index;
- }
-
- if (!dict_index_is_clust(index)) {
- /* This is not a clustered index record. We
- do not know how to unlock the record. */
- goto no_unlock;
- }
+ index = pcur->index();
/* If the record has been modified by this
transaction, do not unlock it. */
@@ -1993,60 +1831,15 @@ row_unlock_for_mysql(
lock_rec_unlock(
trx,
- btr_pcur_get_block(pcur),
+ btr_pcur_get_block(pcur)->page.id(),
rec,
static_cast<enum lock_mode>(
prebuilt->select_lock_type));
-
- if (prebuilt->new_rec_locks >= 2) {
- rec = btr_pcur_get_rec(clust_pcur);
-
- lock_rec_unlock(
- trx,
- btr_pcur_get_block(clust_pcur),
- rec,
- static_cast<enum lock_mode>(
- prebuilt->select_lock_type));
- }
}
no_unlock:
mtr_commit(&mtr);
+ trx->op_info = "";
}
-
- trx->op_info = "";
-}
-
-/*********************************************************************//**
-Locks the data dictionary in shared mode from modifications, for performing
-foreign key check, rollback, or other operation invisible to MySQL. */
-void
-row_mysql_freeze_data_dictionary_func(
-/*==================================*/
- trx_t* trx, /*!< in/out: transaction */
- const char* file, /*!< in: file name */
- unsigned line) /*!< in: line number */
-{
- ut_a(trx->dict_operation_lock_mode == 0);
-
- rw_lock_s_lock_inline(&dict_sys.latch, 0, file, line);
-
- trx->dict_operation_lock_mode = RW_S_LATCH;
-}
-
-/*********************************************************************//**
-Unlocks the data dictionary shared lock. */
-void
-row_mysql_unfreeze_data_dictionary(
-/*===============================*/
- trx_t* trx) /*!< in/out: transaction */
-{
- ut_ad(lock_trx_has_sys_table_locks(trx) == NULL);
-
- ut_a(trx->dict_operation_lock_mode == RW_S_LATCH);
-
- rw_lock_s_unlock(&dict_sys.latch);
-
- trx->dict_operation_lock_mode = 0;
}
/** Write query start time as SQL field data to a buffer. Needed by InnoDB.
@@ -2151,10 +1944,7 @@ static dberr_t row_update_vers_insert(que_thr_t* thr, upd_node_t* node)
switch (trx->error_state) {
case DB_LOCK_WAIT:
- que_thr_stop_for_mysql(thr);
- lock_wait_suspend_thread(thr);
-
- if (trx->error_state == DB_SUCCESS) {
+ if (lock_wait(thr) == DB_SUCCESS) {
continue;
}
@@ -2225,10 +2015,7 @@ row_update_cascade_for_mysql(
switch (trx->error_state) {
case DB_LOCK_WAIT:
- que_thr_stop_for_mysql(thr);
- lock_wait_suspend_thread(thr);
-
- if (trx->error_state == DB_SUCCESS) {
+ if (lock_wait(thr) == DB_SUCCESS) {
continue;
}
@@ -2243,7 +2030,8 @@ row_update_cascade_for_mysql(
bool stats;
if (node->is_delete == PLAIN_DELETE) {
- /* Not protected by dict_sys.mutex for
+ /* Not protected by dict_sys.latch
+ or node->table->stats_mutex_lock() for
performance reasons, we would rather
get garbage in stat_n_rows (which is
just an estimate anyway) than
@@ -2273,35 +2061,6 @@ row_update_cascade_for_mysql(
}
/*********************************************************************//**
-Locks the data dictionary exclusively for performing a table create or other
-data dictionary modification operation. */
-void
-row_mysql_lock_data_dictionary_func(
-/*================================*/
- trx_t* trx, /*!< in/out: transaction */
- const char* file, /*!< in: file name */
- unsigned line) /*!< in: line number */
-{
- ut_a(trx->dict_operation_lock_mode == 0
- || trx->dict_operation_lock_mode == RW_X_LATCH);
- dict_sys.lock(file, line);
- trx->dict_operation_lock_mode = RW_X_LATCH;
-}
-
-/*********************************************************************//**
-Unlocks the data dictionary exclusive lock. */
-void
-row_mysql_unlock_data_dictionary(
-/*=============================*/
- trx_t* trx) /*!< in/out: transaction */
-{
- ut_ad(lock_trx_has_sys_table_locks(trx) == NULL);
- ut_a(trx->dict_operation_lock_mode == RW_X_LATCH);
- trx->dict_operation_lock_mode = 0;
- dict_sys.unlock();
-}
-
-/*********************************************************************//**
Creates a table for MySQL. On failure the transaction will be rolled back
and the 'table' object will be freed.
@return error code or DB_SUCCESS */
@@ -2311,44 +2070,31 @@ row_create_table_for_mysql(
dict_table_t* table, /*!< in, own: table definition
(will be freed, or on DB_SUCCESS
added to the data dictionary cache) */
- trx_t* trx, /*!< in/out: transaction */
- fil_encryption_t mode, /*!< in: encryption mode */
- uint32_t key_id) /*!< in: encryption key_id */
+ trx_t* trx) /*!< in/out: transaction */
{
tab_node_t* node;
mem_heap_t* heap;
que_thr_t* thr;
- dberr_t err;
- ut_d(dict_sys.assert_locked());
- ut_ad(trx->dict_operation_lock_mode == RW_X_LATCH);
+ ut_ad(trx->state == TRX_STATE_ACTIVE);
+ ut_ad(dict_sys.sys_tables_exist());
+ ut_ad(dict_sys.locked());
+ ut_ad(trx->dict_operation_lock_mode);
+
+ DEBUG_SYNC_C("create_table");
DBUG_EXECUTE_IF(
"ib_create_table_fail_at_start_of_row_create_table_for_mysql",
- dict_mem_table_free(table);
- trx->op_info = "";
- return DB_ERROR;
+ dict_mem_table_free(table); return DB_ERROR;
);
trx->op_info = "creating table";
- trx_start_if_not_started_xa(trx, true);
-
heap = mem_heap_create(512);
- switch (trx_get_dict_operation(trx)) {
- case TRX_DICT_OP_NONE:
- trx_set_dict_operation(trx, TRX_DICT_OP_TABLE);
- case TRX_DICT_OP_TABLE:
- break;
- case TRX_DICT_OP_INDEX:
- /* If the transaction was previously flagged as
- TRX_DICT_OP_INDEX, we should be creating auxiliary
- tables for full-text indexes. */
- ut_ad(strstr(table->name.m_name, "/FTS_") != NULL);
- }
+ trx->dict_operation = true;
- node = tab_create_graph_create(table, heap, mode, key_id);
+ node = tab_create_graph_create(table, heap);
thr = pars_complete_graph_for_exec(node, trx, heap, NULL);
@@ -2357,62 +2103,12 @@ row_create_table_for_mysql(
que_run_threads(thr);
- err = trx->error_state;
-
- /* Update SYS_TABLESPACES and SYS_DATAFILES if a new file-per-table
- tablespace was created. */
- if (err == DB_SUCCESS && dict_table_is_file_per_table(table)) {
- err = dict_replace_tablespace_in_dictionary(
- table->space_id, table->name.m_name,
- table->space->flags,
- table->space->chain.start->name, trx);
-
- if (err != DB_SUCCESS) {
-
- /* We must delete the link file. */
- RemoteDatafile::delete_link_file(table->name.m_name);
- }
- }
-
- switch (err) {
- case DB_SUCCESS:
- break;
- case DB_OUT_OF_FILE_SPACE:
- trx->error_state = DB_SUCCESS;
- trx->rollback();
-
- ib::warn() << "Cannot create table "
- << table->name
- << " because tablespace full";
-
- if (dict_table_open_on_name(table->name.m_name, TRUE, FALSE,
- DICT_ERR_IGNORE_NONE)) {
-
- dict_table_close_and_drop(trx, table);
- } else {
- dict_mem_table_free(table);
- }
-
- break;
-
- case DB_UNSUPPORTED:
- case DB_TOO_MANY_CONCURRENT_TRXS:
- /* We already have .ibd file here. it should be deleted. */
-
- if (dict_table_is_file_per_table(table)
- && fil_delete_tablespace(table->space_id) != DB_SUCCESS) {
- ib::error() << "Cannot delete the file of table "
- << table->name;
- }
- /* fall through */
+ dberr_t err = trx->error_state;
- case DB_DUPLICATE_KEY:
- case DB_TABLESPACE_EXISTS:
- default:
+ if (err != DB_SUCCESS) {
trx->error_state = DB_SUCCESS;
trx->rollback();
dict_mem_table_free(table);
- break;
}
que_graph_free((que_t*) que_node_get_parent(thr));
@@ -2432,12 +2128,14 @@ row_create_index_for_mysql(
dict_index_t* index, /*!< in, own: index definition
(will be freed) */
trx_t* trx, /*!< in: transaction handle */
- const ulint* field_lengths) /*!< in: if not NULL, must contain
+ const ulint* field_lengths, /*!< in: if not NULL, must contain
dict_index_get_n_fields(index)
actual field lengths for the
index columns, which are
then checked for not being too
large. */
+ fil_encryption_t mode, /*!< in: encryption mode */
+ uint32_t key_id) /*!< in: encryption key_id */
{
ind_node_t* node;
mem_heap_t* heap;
@@ -2447,7 +2145,7 @@ row_create_index_for_mysql(
ulint len;
dict_table_t* table = index->table;
- ut_d(dict_sys.assert_locked());
+ ut_ad(dict_sys.locked());
for (i = 0; i < index->n_def; i++) {
/* Check that prefix_len and actual length
@@ -2471,21 +2169,21 @@ row_create_index_for_mysql(
}
}
- trx->op_info = "creating index";
-
/* For temp-table we avoid insertion into SYSTEM TABLES to
maintain performance and so we have separate path that directly
just updates dictonary cache. */
if (!table->is_temporary()) {
- trx_start_if_not_started_xa(trx, true);
- trx_set_dict_operation(trx, TRX_DICT_OP_TABLE);
+ ut_ad(trx->state == TRX_STATE_ACTIVE);
+ ut_ad(trx->dict_operation);
+ trx->op_info = "creating index";
+
/* Note that the space id where we store the index is
inherited from the table in dict_build_index_def_step()
in dict0crea.cc. */
heap = mem_heap_create(512);
node = ind_create_graph_create(index, table->name.m_name,
- heap);
+ heap, mode, key_id);
thr = pars_complete_graph_for_exec(node, trx, heap, NULL);
@@ -2506,6 +2204,8 @@ row_create_index_for_mysql(
if (index && (index->type & DICT_FTS)) {
err = fts_create_index_tables(trx, index, table->id);
}
+
+ trx->op_info = "";
} else {
dict_build_index_def(table, index, trx);
@@ -2527,270 +2227,9 @@ row_create_index_for_mysql(
}
}
- trx->op_info = "";
-
return(err);
}
-/*********************************************************************//**
-Drops a table for MySQL as a background operation. MySQL relies on Unix
-in ALTER TABLE to the fact that the table handler does not remove the
-table before all handles to it has been removed. Furhermore, the MySQL's
-call to drop table must be non-blocking. Therefore we do the drop table
-as a background operation, which is taken care of by the master thread
-in srv0srv.cc.
-@return error code or DB_SUCCESS */
-static
-dberr_t
-row_drop_table_for_mysql_in_background(
-/*===================================*/
- const char* name) /*!< in: table name */
-{
- dberr_t error;
- trx_t* trx;
-
- trx = trx_create();
-
- /* If the original transaction was dropping a table referenced by
- foreign keys, we must set the following to be able to drop the
- table: */
-
- trx->check_foreigns = false;
-
- /* Try to drop the table in InnoDB */
-
- error = row_drop_table_for_mysql(name, trx, SQLCOM_TRUNCATE);
-
- trx_commit_for_mysql(trx);
-
- trx->free();
-
- return(error);
-}
-
-/*********************************************************************//**
-The master thread in srv0srv.cc calls this regularly to drop tables which
-we must drop in background after queries to them have ended. Such lazy
-dropping of tables is needed in ALTER TABLE on Unix.
-@return how many tables dropped + remaining tables in list */
-ulint
-row_drop_tables_for_mysql_in_background(void)
-/*=========================================*/
-{
- row_mysql_drop_t* drop;
- dict_table_t* table;
- ulint n_tables;
- ulint n_tables_dropped = 0;
-loop:
- mutex_enter(&row_drop_list_mutex);
-
- ut_a(row_mysql_drop_list_inited);
-next:
- drop = UT_LIST_GET_FIRST(row_mysql_drop_list);
-
- n_tables = UT_LIST_GET_LEN(row_mysql_drop_list);
-
- mutex_exit(&row_drop_list_mutex);
-
- if (drop == NULL) {
- /* All tables dropped */
-
- return(n_tables + n_tables_dropped);
- }
-
- /* On fast shutdown, just empty the list without dropping tables. */
- table = srv_shutdown_state == SRV_SHUTDOWN_NONE || !srv_fast_shutdown
- ? dict_table_open_on_id(drop->table_id, FALSE,
- DICT_TABLE_OP_OPEN_ONLY_IF_CACHED)
- : NULL;
-
- if (!table) {
- n_tables_dropped++;
- mutex_enter(&row_drop_list_mutex);
- UT_LIST_REMOVE(row_mysql_drop_list, drop);
- MONITOR_DEC(MONITOR_BACKGROUND_DROP_TABLE);
- ut_free(drop);
- goto next;
- }
-
- ut_a(!table->can_be_evicted);
-
- bool skip = false;
-
- if (!table->to_be_dropped) {
-skip:
- dict_table_close(table, FALSE, FALSE);
-
- mutex_enter(&row_drop_list_mutex);
- UT_LIST_REMOVE(row_mysql_drop_list, drop);
- if (!skip) {
- UT_LIST_ADD_LAST(row_mysql_drop_list, drop);
- } else {
- ut_free(drop);
- }
- goto next;
- }
-
- if (!srv_fast_shutdown && !trx_sys.any_active_transactions()) {
- lock_mutex_enter();
- skip = UT_LIST_GET_LEN(table->locks) != 0;
- lock_mutex_exit();
- if (skip) {
- /* We cannot drop tables that are locked by XA
- PREPARE transactions. */
- goto skip;
- }
- }
-
- char* name = mem_strdup(table->name.m_name);
-
- dict_table_close(table, FALSE, FALSE);
-
- dberr_t err = row_drop_table_for_mysql_in_background(name);
-
- ut_free(name);
-
- if (err != DB_SUCCESS) {
- /* If the DROP fails for some table, we return, and let the
- main thread retry later */
- return(n_tables + n_tables_dropped);
- }
-
- goto loop;
-}
-
-/*********************************************************************//**
-Get the background drop list length. NOTE: the caller must own the
-drop list mutex!
-@return how many tables in list */
-ulint
-row_get_background_drop_list_len_low(void)
-/*======================================*/
-{
- ulint len;
-
- mutex_enter(&row_drop_list_mutex);
-
- ut_a(row_mysql_drop_list_inited);
-
- len = UT_LIST_GET_LEN(row_mysql_drop_list);
-
- mutex_exit(&row_drop_list_mutex);
-
- return(len);
-}
-
-/** Drop garbage tables during recovery. */
-void
-row_mysql_drop_garbage_tables()
-{
- mem_heap_t* heap = mem_heap_create(FN_REFLEN);
- btr_pcur_t pcur;
- mtr_t mtr;
- trx_t* trx = trx_create();
- trx->op_info = "dropping garbage tables";
- row_mysql_lock_data_dictionary(trx);
-
- mtr.start();
- btr_pcur_open_at_index_side(
- true, dict_table_get_first_index(dict_sys.sys_tables),
- BTR_SEARCH_LEAF, &pcur, true, 0, &mtr);
-
- for (;;) {
- const rec_t* rec;
- const byte* field;
- ulint len;
- const char* table_name;
-
- btr_pcur_move_to_next_user_rec(&pcur, &mtr);
-
- if (!btr_pcur_is_on_user_rec(&pcur)) {
- break;
- }
-
- rec = btr_pcur_get_rec(&pcur);
- if (rec_get_deleted_flag(rec, 0)) {
- continue;
- }
-
- field = rec_get_nth_field_old(rec, 0/*NAME*/, &len);
- if (len == UNIV_SQL_NULL || len == 0) {
- /* Corrupted SYS_TABLES.NAME */
- continue;
- }
-
- table_name = mem_heap_strdupl(
- heap,
- reinterpret_cast<const char*>(field), len);
- if (strstr(table_name, "/" TEMP_FILE_PREFIX "-") &&
- !strstr(table_name, "/" TEMP_FILE_PREFIX "-backup-") &&
- !strstr(table_name, "/" TEMP_FILE_PREFIX "-exchange-"))
- {
- btr_pcur_store_position(&pcur, &mtr);
- btr_pcur_commit_specify_mtr(&pcur, &mtr);
-
- if (dict_load_table(table_name,
- DICT_ERR_IGNORE_DROP)) {
- row_drop_table_for_mysql(table_name, trx,
- SQLCOM_DROP_TABLE);
- trx_commit_for_mysql(trx);
- }
-
- mtr.start();
- btr_pcur_restore_position(BTR_SEARCH_LEAF,
- &pcur, &mtr);
- }
-
- mem_heap_empty(heap);
- }
-
- btr_pcur_close(&pcur);
- mtr.commit();
- row_mysql_unlock_data_dictionary(trx);
- trx->free();
- mem_heap_free(heap);
-}
-
-/*********************************************************************//**
-If a table is not yet in the drop list, adds the table to the list of tables
-which the master thread drops in background. We need this on Unix because in
-ALTER TABLE MySQL may call drop table even if the table has running queries on
-it. Also, if there are running foreign key checks on the table, we drop the
-table lazily.
-@return whether background DROP TABLE was scheduled for the first time */
-static
-bool
-row_add_table_to_background_drop_list(table_id_t table_id)
-{
- row_mysql_drop_t* drop;
- bool added = true;
-
- mutex_enter(&row_drop_list_mutex);
-
- ut_a(row_mysql_drop_list_inited);
-
- /* Look if the table already is in the drop list */
- for (drop = UT_LIST_GET_FIRST(row_mysql_drop_list);
- drop != NULL;
- drop = UT_LIST_GET_NEXT(row_mysql_drop_list, drop)) {
-
- if (drop->table_id == table_id) {
- added = false;
- goto func_exit;
- }
- }
-
- drop = static_cast<row_mysql_drop_t*>(ut_malloc_nokey(sizeof *drop));
- drop->table_id = table_id;
-
- UT_LIST_ADD_LAST(row_mysql_drop_list, drop);
-
- MONITOR_INC(MONITOR_BACKGROUND_DROP_TABLE);
-func_exit:
- mutex_exit(&row_drop_list_mutex);
- return added;
-}
-
/** Reassigns the table identifier of a table.
@param[in,out] table table
@param[in,out] trx transaction
@@ -2803,6 +2242,13 @@ row_mysql_table_id_reassign(
trx_t* trx,
table_id_t* new_id)
{
+ if (!dict_sys.sys_tables || dict_sys.sys_tables->corrupted ||
+ !dict_sys.sys_columns || dict_sys.sys_columns->corrupted ||
+ !dict_sys.sys_indexes || dict_sys.sys_indexes->corrupted ||
+ !dict_sys.sys_virtual || dict_sys.sys_virtual->corrupted) {
+ return DB_CORRUPTION;
+ }
+
dberr_t err;
pars_info_t* info = pars_info_create();
@@ -2827,48 +2273,12 @@ row_mysql_table_id_reassign(
" WHERE TABLE_ID = :old_id;\n"
"UPDATE SYS_VIRTUAL SET TABLE_ID = :new_id\n"
" WHERE TABLE_ID = :old_id;\n"
- "END;\n", FALSE, trx);
+ "END;\n", trx);
return(err);
}
/*********************************************************************//**
-Setup the pre-requisites for DISCARD TABLESPACE. It will start the transaction,
-acquire the data dictionary lock in X mode and open the table.
-@return table instance or 0 if not found. */
-static
-dict_table_t*
-row_discard_tablespace_begin(
-/*=========================*/
- const char* name, /*!< in: table name */
- trx_t* trx) /*!< in: transaction handle */
-{
- trx->op_info = "discarding tablespace";
-
- trx_set_dict_operation(trx, TRX_DICT_OP_TABLE);
-
- trx_start_if_not_started_xa(trx, true);
-
- /* Serialize data dictionary operations with dictionary mutex:
- this is to avoid deadlocks during data dictionary operations */
-
- row_mysql_lock_data_dictionary(trx);
-
- dict_table_t* table;
-
- table = dict_table_open_on_name(
- name, TRUE, FALSE, DICT_ERR_IGNORE_FK_NOKEY);
-
- if (table) {
- dict_stats_wait_bg_to_stop_using_table(table, trx);
- ut_a(!is_system_tablespace(table->space_id));
- ut_ad(!table->n_foreign_key_checks_running);
- }
-
- return(table);
-}
-
-/*********************************************************************//**
Do the foreign key constraint checks.
@return DB_SUCCESS or error code. */
static
@@ -2903,7 +2313,7 @@ row_discard_tablespace_foreign_key_checks(
/* We only allow discarding a referenced table if
FOREIGN_KEY_CHECKS is set to 0 */
- mutex_enter(&dict_foreign_err_mutex);
+ mysql_mutex_lock(&dict_foreign_err_mutex);
rewind(ef);
@@ -2916,44 +2326,12 @@ row_discard_tablespace_foreign_key_checks(
ut_print_name(ef, trx, foreign->foreign_table_name);
putc('\n', ef);
- mutex_exit(&dict_foreign_err_mutex);
+ mysql_mutex_unlock(&dict_foreign_err_mutex);
return(DB_CANNOT_DROP_CONSTRAINT);
}
/*********************************************************************//**
-Cleanup after the DISCARD TABLESPACE operation.
-@return error code. */
-static
-dberr_t
-row_discard_tablespace_end(
-/*=======================*/
- trx_t* trx, /*!< in/out: transaction handle */
- dict_table_t* table, /*!< in/out: table to be discarded */
- dberr_t err) /*!< in: error code */
-{
- if (table != 0) {
- dict_table_close(table, TRUE, FALSE);
- }
-
- DBUG_EXECUTE_IF("ib_discard_before_commit_crash",
- log_buffer_flush_to_disk();
- DBUG_SUICIDE(););
-
- trx_commit_for_mysql(trx);
-
- DBUG_EXECUTE_IF("ib_discard_after_commit_crash",
- log_buffer_flush_to_disk();
- DBUG_SUICIDE(););
-
- row_mysql_unlock_data_dictionary(trx);
-
- trx->op_info = "";
-
- return(err);
-}
-
-/*********************************************************************//**
Do the DISCARD TABLESPACE operation.
@return DB_SUCCESS or error code. */
static
@@ -2963,17 +2341,17 @@ row_discard_tablespace(
trx_t* trx, /*!< in/out: transaction handle */
dict_table_t* table) /*!< in/out: table to be discarded */
{
- dberr_t err;
+ dberr_t err;
/* How do we prevent crashes caused by ongoing operations on
the table? Old operations could try to access non-existent
- pages. MySQL will block all DML on the table using MDL and a
+ pages. The SQL layer will block all DML on the table using MDL and a
DISCARD will not start unless all existing operations on the
table to be discarded are completed.
- 1) Acquire the data dictionary latch in X mode. To prevent any
- internal operations that MySQL is not aware off and also for
- the internal SQL parser.
+ 1) Acquire the data dictionary latch in X mode. This will
+ prevent any internal operations that are not covered by
+ MDL or InnoDB table locks.
2) Purge and rollback: we assign a new table id for the
table. Since purge and rollback look for the table based on
@@ -3006,7 +2384,7 @@ row_discard_tablespace(
if (dict_table_has_fts_index(table)
|| DICT_TF2_FLAG_IS_SET(table, DICT_TF2_FTS_HAS_DOC_ID)) {
- fts_drop_tables(trx, table);
+ fts_drop_tables(trx, *table);
}
/* Assign a new space ID to the table definition so that purge
@@ -3018,29 +2396,9 @@ row_discard_tablespace(
return(err);
}
- /* Discard the physical file that is used for the tablespace. */
- err = fil_delete_tablespace(table->space_id);
- switch (err) {
- case DB_IO_ERROR:
- ib::warn() << "ALTER TABLE " << table->name
- << " DISCARD TABLESPACE failed to delete file";
- break;
- case DB_TABLESPACE_NOT_FOUND:
- ib::warn() << "ALTER TABLE " << table->name
- << " DISCARD TABLESPACE failed to find tablespace";
- break;
- case DB_SUCCESS:
- break;
- default:
- ut_error;
- }
-
/* All persistent operations successful, update the
data dictionary memory cache. */
- table->file_unreadable = true;
- table->space = NULL;
- table->flags2 |= DICT_TF2_DISCARDED;
dict_table_change_id_in_cache(table, new_id);
dict_index_t* index = UT_LIST_GET_FIRST(table->indexes);
@@ -3062,992 +2420,81 @@ Discards the tablespace of a table which stored in an .ibd file. Discarding
means that this function renames the .ibd file and assigns a new table id for
the table. Also the file_unreadable flag is set.
@return error code or DB_SUCCESS */
-dberr_t
-row_discard_tablespace_for_mysql(
-/*=============================*/
- const char* name, /*!< in: table name */
- trx_t* trx) /*!< in: transaction handle */
-{
- dberr_t err;
- dict_table_t* table;
-
- /* Open the table and start the transaction if not started. */
-
- table = row_discard_tablespace_begin(name, trx);
-
- if (table == 0) {
- err = DB_TABLE_NOT_FOUND;
- } else if (table->is_temporary()) {
-
- ib_senderrf(trx->mysql_thd, IB_LOG_LEVEL_ERROR,
- ER_CANNOT_DISCARD_TEMPORARY_TABLE);
-
- err = DB_ERROR;
-
- } else if (table->space_id == TRX_SYS_SPACE) {
- char table_name[MAX_FULL_NAME_LEN + 1];
-
- innobase_format_name(
- table_name, sizeof(table_name),
- table->name.m_name);
-
- ib_senderrf(trx->mysql_thd, IB_LOG_LEVEL_ERROR,
- ER_TABLE_IN_SYSTEM_TABLESPACE, table_name);
-
- err = DB_ERROR;
-
- } else {
- ut_ad(!table->n_foreign_key_checks_running);
-
- bool fts_exist = (dict_table_has_fts_index(table)
- || DICT_TF2_FLAG_IS_SET(
- table, DICT_TF2_FTS_HAS_DOC_ID));
-
- if (fts_exist) {
- row_mysql_unlock_data_dictionary(trx);
- fts_optimize_remove_table(table);
- row_mysql_lock_data_dictionary(trx);
- }
-
- /* Do foreign key constraint checks. */
-
- err = row_discard_tablespace_foreign_key_checks(trx, table);
-
- if (err == DB_SUCCESS) {
- /* Note: This cannot be rolled back.
- Rollback would see the UPDATE SYS_INDEXES
- as two operations: DELETE and INSERT.
- It would invoke btr_free_if_exists()
- when rolling back the INSERT, effectively
- dropping all indexes of the table. */
- err = row_discard_tablespace(trx, table);
- }
-
- if (fts_exist && err != DB_SUCCESS) {
- fts_optimize_add_table(table);
- }
- }
-
- return(row_discard_tablespace_end(trx, table, err));
-}
-
-/*********************************************************************//**
-Sets an exclusive lock on a table.
-@return error code or DB_SUCCESS */
-dberr_t
-row_mysql_lock_table(
-/*=================*/
- trx_t* trx, /*!< in/out: transaction */
- dict_table_t* table, /*!< in: table to lock */
- enum lock_mode mode, /*!< in: LOCK_X or LOCK_S */
- const char* op_info) /*!< in: string for trx->op_info */
+dberr_t row_discard_tablespace_for_mysql(dict_table_t *table, trx_t *trx)
{
- mem_heap_t* heap;
- que_thr_t* thr;
- dberr_t err;
- sel_node_t* node;
-
- ut_ad(mode == LOCK_X || mode == LOCK_S);
+ ut_ad(!is_system_tablespace(table->space_id));
+ ut_ad(!table->is_temporary());
- heap = mem_heap_create(512);
-
- trx->op_info = op_info;
-
- node = sel_node_create(heap);
- thr = pars_complete_graph_for_exec(node, trx, heap, NULL);
- thr->graph->state = QUE_FORK_ACTIVE;
-
- /* We use the select query graph as the dummy graph needed
- in the lock module call */
+ const auto fts_exist = table->flags2 &
+ (DICT_TF2_FTS_HAS_DOC_ID | DICT_TF2_FTS);
- thr = que_fork_get_first_thr(
- static_cast<que_fork_t*>(que_node_get_parent(thr)));
+ dberr_t err;
- thr->start_running();
-
-run_again:
- thr->run_node = thr;
- thr->prev_node = thr->common.parent;
-
- err = lock_table(0, table, mode, thr);
-
- trx->error_state = err;
-
- if (err == DB_SUCCESS) {
- thr->stop_no_error();
- } else {
- que_thr_stop_for_mysql(thr);
-
- if (row_mysql_handle_errors(&err, trx, thr, NULL)) {
- goto run_again;
- }
- }
-
- que_graph_free(thr->graph);
- trx->op_info = "";
-
- return(err);
-}
-
-/** Drop ancillary FTS tables as part of dropping a table.
-@param[in,out] table Table cache entry
-@param[in,out] trx Transaction handle
-@return error code or DB_SUCCESS */
-UNIV_INLINE
-dberr_t
-row_drop_ancillary_fts_tables(
- dict_table_t* table,
- trx_t* trx)
-{
- /* Drop ancillary FTS tables */
- if (dict_table_has_fts_index(table)
- || DICT_TF2_FLAG_IS_SET(table, DICT_TF2_FTS_HAS_DOC_ID)) {
-
- ut_ad(table->get_ref_count() == 0);
- ut_ad(trx_is_started(trx));
-
- dberr_t err = fts_drop_tables(trx, table);
-
- if (UNIV_UNLIKELY(err != DB_SUCCESS)) {
- ib::error() << " Unable to remove ancillary FTS"
- " tables for table "
- << table->name << " : " << err;
-
- return(err);
- }
- }
-
- /* The table->fts flag can be set on the table for which
- the cluster index is being rebuilt. Such table might not have
- DICT_TF2_FTS flag set. So keep this out of above
- dict_table_has_fts_index condition */
- if (table->fts != NULL) {
- /* fts_que_graph_free_check_lock would try to acquire
- dict mutex lock */
- table->fts->dict_locked = true;
- table->fts->~fts_t();
- table->fts = nullptr;
- }
-
- return(DB_SUCCESS);
-}
-
-/** Drop a table from the memory cache as part of dropping a table.
-@param[in] tablename A copy of table->name. Used when table == null
-@param[in,out] table Table cache entry
-@param[in,out] trx Transaction handle
-@return error code or DB_SUCCESS */
-UNIV_INLINE
-dberr_t
-row_drop_table_from_cache(
- const char* tablename,
- dict_table_t* table,
- trx_t* trx)
-{
- dberr_t err = DB_SUCCESS;
- ut_ad(!table->is_temporary());
-
- /* Remove the pointer to this table object from the list
- of modified tables by the transaction because the object
- is going to be destroyed below. */
- trx->mod_tables.erase(table);
-
- dict_sys.remove(table);
-
- if (dict_load_table(tablename, DICT_ERR_IGNORE_FK_NOKEY)) {
- ib::error() << "Not able to remove table "
- << ut_get_name(trx, tablename)
- << " from the dictionary cache!";
- err = DB_ERROR;
- }
-
- return(err);
-}
-
-/** Drop a table for MySQL.
-If the data dictionary was not already locked by the transaction,
-the transaction will be committed. Otherwise, the data dictionary
-will remain locked.
-@param[in] name Table name
-@param[in,out] trx Transaction handle
-@param[in] sqlcom type of SQL operation
-@param[in] create_failed true=create table failed
- because e.g. foreign key column
-@param[in] nonatomic Whether it is permitted to release
- and reacquire dict_sys.latch
-@return error code or DB_SUCCESS */
-dberr_t
-row_drop_table_for_mysql(
- const char* name,
- trx_t* trx,
- enum_sql_command sqlcom,
- bool create_failed,
- bool nonatomic)
-{
- dberr_t err;
- dict_foreign_t* foreign;
- dict_table_t* table;
- char* tablename = NULL;
- bool locked_dictionary = false;
- pars_info_t* info = NULL;
- mem_heap_t* heap = NULL;
-
-
- DBUG_ENTER("row_drop_table_for_mysql");
- DBUG_PRINT("row_drop_table_for_mysql", ("table: '%s'", name));
-
- ut_a(name != NULL);
-
- /* Serialize data dictionary operations with dictionary mutex:
- no deadlocks can occur then in these operations */
-
- trx->op_info = "dropping table";
-
- if (trx->dict_operation_lock_mode != RW_X_LATCH) {
- /* Prevent foreign key checks etc. while we are
- dropping the table */
-
- row_mysql_lock_data_dictionary(trx);
-
- locked_dictionary = true;
- nonatomic = true;
- }
-
- ut_d(dict_sys.assert_locked());
-
- table = dict_table_open_on_name(
- name, TRUE, FALSE,
- static_cast<dict_err_ignore_t>(
- DICT_ERR_IGNORE_INDEX_ROOT
- | DICT_ERR_IGNORE_CORRUPT));
-
- if (!table) {
- if (locked_dictionary) {
- row_mysql_unlock_data_dictionary(trx);
- }
- trx->op_info = "";
- DBUG_RETURN(DB_TABLE_NOT_FOUND);
- }
-
- std::vector<pfs_os_file_t> detached_handles;
-
- const bool is_temp_name = strstr(table->name.m_name,
- "/" TEMP_FILE_PREFIX);
-
- if (table->is_temporary()) {
- ut_ad(table->space == fil_system.temp_space);
- for (dict_index_t* index = dict_table_get_first_index(table);
- index != NULL;
- index = dict_table_get_next_index(index)) {
- btr_free(page_id_t(SRV_TMP_SPACE_ID, index->page));
- }
- /* Remove the pointer to this table object from the list
- of modified tables by the transaction because the object
- is going to be destroyed below. */
- trx->mod_tables.erase(table);
- table->release();
- dict_sys.remove(table);
- err = DB_SUCCESS;
- goto funct_exit_all_freed;
- }
-
- /* This function is called recursively via fts_drop_tables(). */
- if (!trx_is_started(trx)) {
- trx_start_for_ddl(trx, TRX_DICT_OP_TABLE);
- }
-
- /* Turn on this drop bit before we could release the dictionary
- latch */
- table->to_be_dropped = true;
-
- if (nonatomic) {
- /* This trx did not acquire any locks on dictionary
- table records yet. Thus it is safe to release and
- reacquire the data dictionary latches. */
- if (table->fts) {
- row_mysql_unlock_data_dictionary(trx);
- fts_optimize_remove_table(table);
- row_mysql_lock_data_dictionary(trx);
- }
-
- dict_stats_wait_bg_to_stop_using_table(table, trx);
- }
-
- /* make sure background stats thread is not running on the table */
- ut_ad(!(table->stats_bg_flag & BG_STAT_IN_PROGRESS));
- if (!table->no_rollback()) {
- if (table->space != fil_system.sys_space) {
- /* Delete the link file if used. */
- if (DICT_TF_HAS_DATA_DIR(table->flags)) {
- RemoteDatafile::delete_link_file(name);
- }
- }
-
- dict_stats_recalc_pool_del(table);
- dict_stats_defrag_pool_del(table, NULL);
- if (btr_defragment_active) {
- /* During fts_drop_orphaned_tables() the
- btr_defragment_mutex has not yet been
- initialized by btr_defragment_init(). */
- btr_defragment_remove_table(table);
- }
-
- if (UNIV_LIKELY(!strstr(name, "/" TEMP_FILE_PREFIX_INNODB))) {
- /* Remove any persistent statistics for this table,
- in a separate transaction. */
- char errstr[1024];
- err = dict_stats_drop_table(name, errstr,
- sizeof errstr);
- if (err != DB_SUCCESS) {
- ib::warn() << errstr;
- }
- }
- }
-
- dict_table_prevent_eviction(table);
- dict_table_close(table, TRUE, FALSE);
-
- /* Check if the table is referenced by foreign key constraints from
- some other table (not the table itself) */
-
- if (!srv_read_only_mode && trx->check_foreigns) {
-
- for (dict_foreign_set::iterator it
- = table->referenced_set.begin();
- it != table->referenced_set.end();
- ++it) {
-
- foreign = *it;
-
- const bool ref_ok = sqlcom == SQLCOM_DROP_DB
- && dict_tables_have_same_db(
- name,
- foreign->foreign_table_name_lookup);
-
- /* We should allow dropping a referenced table if creating
- that referenced table has failed for some reason. For example
- if referenced table is created but it column types that are
- referenced do not match. */
- if (foreign->foreign_table != table &&
- !create_failed && !ref_ok) {
-
- FILE* ef = dict_foreign_err_file;
-
- /* We only allow dropping a referenced table
- if FOREIGN_KEY_CHECKS is set to 0 */
-
- err = DB_CANNOT_DROP_CONSTRAINT;
-
- mutex_enter(&dict_foreign_err_mutex);
- rewind(ef);
- ut_print_timestamp(ef);
-
- fputs(" Cannot drop table ", ef);
- ut_print_name(ef, trx, name);
- fputs("\n"
- "because it is referenced by ", ef);
- ut_print_name(ef, trx,
- foreign->foreign_table_name);
- putc('\n', ef);
- mutex_exit(&dict_foreign_err_mutex);
-
- goto funct_exit;
- }
- }
- }
-
- DBUG_EXECUTE_IF("row_drop_table_add_to_background", goto defer;);
-
- /* TODO: could we replace the counter n_foreign_key_checks_running
- with lock checks on the table? Acquire here an exclusive lock on the
- table, and rewrite lock0lock.cc and the lock wait in srv0srv.cc so that
- they can cope with the table having been dropped here? Foreign key
- checks take an IS or IX lock on the table. */
-
- if (table->n_foreign_key_checks_running > 0) {
-defer:
- /* Rename #sql-backup to #sql-ib if table has open ref count
- while dropping the table. This scenario can happen
- when purge thread is waiting for dict_sys.mutex so
- that it could close the table. But drop table acquires
- dict_sys.mutex.
- In the future this should use 'tmp_file_prefix'!
- */
- if (!is_temp_name
- || strstr(table->name.m_name, "/#sql-backup-")) {
- heap = mem_heap_create(FN_REFLEN);
- const char* tmp_name
- = dict_mem_create_temporary_tablename(
- heap, table->name.m_name, table->id);
- ib::info() << "Deferring DROP TABLE " << table->name
- << "; renaming to " << tmp_name;
- err = row_rename_table_for_mysql(
- table->name.m_name, tmp_name, trx,
- false, false);
- } else {
- err = DB_SUCCESS;
- }
- if (err == DB_SUCCESS) {
- row_add_table_to_background_drop_list(table->id);
- }
- goto funct_exit;
- }
-
- /* Remove all locks that are on the table or its records, if there
- are no references to the table but it has record locks, we release
- the record locks unconditionally. One use case is:
-
- CREATE TABLE t2 (PRIMARY KEY (a)) SELECT * FROM t1;
-
- If after the user transaction has done the SELECT and there is a
- problem in completing the CREATE TABLE operation, MySQL will drop
- the table. InnoDB will create a new background transaction to do the
- actual drop, the trx instance that is passed to this function. To
- preserve existing behaviour we remove the locks but ideally we
- shouldn't have to. There should never be record locks on a table
- that is going to be dropped. */
-
- if (table->get_ref_count() > 0 || table->n_rec_locks > 0
- || lock_table_has_locks(table)) {
- goto defer;
- }
-
- /* The "to_be_dropped" marks table that is to be dropped, but
- has not been dropped, instead, was put in the background drop
- list due to being used by concurrent DML operations. Clear it
- here since there are no longer any concurrent activities on it,
- and it is free to be dropped */
- table->to_be_dropped = false;
-
- switch (trx_get_dict_operation(trx)) {
- case TRX_DICT_OP_NONE:
- trx_set_dict_operation(trx, TRX_DICT_OP_TABLE);
- trx->table_id = table->id;
- case TRX_DICT_OP_TABLE:
- break;
- case TRX_DICT_OP_INDEX:
- /* If the transaction was previously flagged as
- TRX_DICT_OP_INDEX, we should be dropping auxiliary
- tables for full-text indexes. */
- ut_ad(strstr(table->name.m_name, "/FTS_"));
- }
-
- /* Mark all indexes unavailable in the data dictionary cache
- before starting to drop the table. */
-
- unsigned* page_no;
- unsigned* page_nos;
- heap = mem_heap_create(
- 200 + UT_LIST_GET_LEN(table->indexes) * sizeof *page_nos);
- tablename = mem_heap_strdup(heap, name);
-
- page_no = page_nos = static_cast<unsigned*>(
- mem_heap_alloc(
- heap,
- UT_LIST_GET_LEN(table->indexes) * sizeof *page_no));
-
- for (dict_index_t* index = dict_table_get_first_index(table);
- index != NULL;
- index = dict_table_get_next_index(index)) {
- rw_lock_x_lock(dict_index_get_lock(index));
- /* Save the page numbers so that we can restore them
- if the operation fails. */
- *page_no++ = index->page;
- /* Mark the index unusable. */
- index->page = FIL_NULL;
- rw_lock_x_unlock(dict_index_get_lock(index));
- }
-
- /* Deleting a row from SYS_INDEXES table will invoke
- dict_drop_index_tree(). */
- info = pars_info_create();
-
- pars_info_add_str_literal(info, "name", name);
-
- if (sqlcom != SQLCOM_TRUNCATE
- && strchr(name, '/')
- && dict_table_get_low("SYS_FOREIGN")
- && dict_table_get_low("SYS_FOREIGN_COLS")) {
- err = que_eval_sql(
- info,
- "PROCEDURE DROP_FOREIGN_PROC () IS\n"
- "fid CHAR;\n"
-
- "DECLARE CURSOR fk IS\n"
- "SELECT ID FROM SYS_FOREIGN\n"
- "WHERE FOR_NAME = :name\n"
- "AND TO_BINARY(FOR_NAME) = TO_BINARY(:name)\n"
- "FOR UPDATE;\n"
-
- "BEGIN\n"
- "OPEN fk;\n"
- "WHILE 1 = 1 LOOP\n"
- " FETCH fk INTO fid;\n"
- " IF (SQL % NOTFOUND) THEN RETURN; END IF;\n"
- " DELETE FROM SYS_FOREIGN_COLS WHERE ID=fid;\n"
- " DELETE FROM SYS_FOREIGN WHERE ID=fid;\n"
- "END LOOP;\n"
- "CLOSE fk;\n"
- "END;\n", FALSE, trx);
- if (err == DB_SUCCESS) {
- info = pars_info_create();
- pars_info_add_str_literal(info, "name", name);
- goto do_drop;
- }
- } else {
-do_drop:
- if (dict_table_get_low("SYS_VIRTUAL")) {
- err = que_eval_sql(
- info,
- "PROCEDURE DROP_VIRTUAL_PROC () IS\n"
- "tid CHAR;\n"
-
- "BEGIN\n"
- "SELECT ID INTO tid FROM SYS_TABLES\n"
- "WHERE NAME = :name FOR UPDATE;\n"
- "IF (SQL % NOTFOUND) THEN RETURN;"
- " END IF;\n"
- "DELETE FROM SYS_VIRTUAL"
- " WHERE TABLE_ID = tid;\n"
- "END;\n", FALSE, trx);
- if (err == DB_SUCCESS) {
- info = pars_info_create();
- pars_info_add_str_literal(
- info, "name", name);
- }
- } else {
- err = DB_SUCCESS;
- }
-
- err = err == DB_SUCCESS ? que_eval_sql(
- info,
- "PROCEDURE DROP_TABLE_PROC () IS\n"
- "tid CHAR;\n"
- "iid CHAR;\n"
-
- "DECLARE CURSOR cur_idx IS\n"
- "SELECT ID FROM SYS_INDEXES\n"
- "WHERE TABLE_ID = tid FOR UPDATE;\n"
-
- "BEGIN\n"
- "SELECT ID INTO tid FROM SYS_TABLES\n"
- "WHERE NAME = :name FOR UPDATE;\n"
- "IF (SQL % NOTFOUND) THEN RETURN; END IF;\n"
-
- "OPEN cur_idx;\n"
- "WHILE 1 = 1 LOOP\n"
- " FETCH cur_idx INTO iid;\n"
- " IF (SQL % NOTFOUND) THEN EXIT; END IF;\n"
- " DELETE FROM SYS_FIELDS\n"
- " WHERE INDEX_ID = iid;\n"
- " DELETE FROM SYS_INDEXES\n"
- " WHERE ID = iid AND TABLE_ID = tid;\n"
- "END LOOP;\n"
- "CLOSE cur_idx;\n"
-
- "DELETE FROM SYS_COLUMNS WHERE TABLE_ID=tid;\n"
- "DELETE FROM SYS_TABLES WHERE NAME=:name;\n"
-
- "END;\n", FALSE, trx) : err;
-
- if (err == DB_SUCCESS && table->space
- && dict_table_get_low("SYS_TABLESPACES")
- && dict_table_get_low("SYS_DATAFILES")) {
- info = pars_info_create();
- pars_info_add_int4_literal(info, "id",
- lint(table->space_id));
- err = que_eval_sql(
- info,
- "PROCEDURE DROP_SPACE_PROC () IS\n"
- "BEGIN\n"
- "DELETE FROM SYS_TABLESPACES\n"
- "WHERE SPACE = :id;\n"
- "DELETE FROM SYS_DATAFILES\n"
- "WHERE SPACE = :id;\n"
- "END;\n", FALSE, trx);
- }
- }
-
- switch (err) {
- fil_space_t* space;
- char* filepath;
- case DB_SUCCESS:
- if (!table->no_rollback()) {
- err = row_drop_ancillary_fts_tables(table, trx);
- if (err != DB_SUCCESS) {
- break;
- }
- }
-
- space = table->space;
- ut_ad(!space || space->id == table->space_id);
- /* Determine the tablespace filename before we drop
- dict_table_t. */
- if (DICT_TF_HAS_DATA_DIR(table->flags)) {
- dict_get_and_save_data_dir_path(table, true);
- ut_ad(table->data_dir_path || !space);
- filepath = space ? NULL : fil_make_filepath(
- table->data_dir_path,
- table->name.m_name, IBD,
- table->data_dir_path != NULL);
- } else {
- filepath = space ? NULL : fil_make_filepath(
- NULL, table->name.m_name, IBD, false);
- }
-
- /* Free the dict_table_t object. */
- err = row_drop_table_from_cache(tablename, table, trx);
- if (err != DB_SUCCESS) {
- ut_free(filepath);
- break;
- }
-
- /* Do not attempt to drop known-to-be-missing tablespaces,
- nor the system tablespace. */
- if (!space) {
- fil_delete_file(filepath);
- ut_free(filepath);
- break;
- }
-
- ut_ad(!filepath);
-
- if (space->id != TRX_SYS_SPACE) {
- err = fil_delete_tablespace(space->id, false,
- &detached_handles);
- }
- break;
-
- case DB_OUT_OF_FILE_SPACE:
- err = DB_MUST_GET_MORE_FILE_SPACE;
- trx->error_state = err;
- row_mysql_handle_errors(&err, trx, NULL, NULL);
-
- /* raise error */
- ut_error;
- break;
-
- case DB_TOO_MANY_CONCURRENT_TRXS:
- /* Cannot even find a free slot for the
- the undo log. We can directly exit here
- and return the DB_TOO_MANY_CONCURRENT_TRXS
- error. */
-
- default:
- /* This is some error we do not expect. Print
- the error number and rollback the transaction */
- ib::error() << "Unknown error code " << err << " while"
- " dropping table: "
- << ut_get_name(trx, tablename) << ".";
-
- trx->error_state = DB_SUCCESS;
- trx->rollback();
- trx->error_state = DB_SUCCESS;
-
- /* Mark all indexes available in the data dictionary
- cache again. */
-
- page_no = page_nos;
-
- for (dict_index_t* index = dict_table_get_first_index(table);
- index != NULL;
- index = dict_table_get_next_index(index)) {
- rw_lock_x_lock(dict_index_get_lock(index));
- ut_a(index->page == FIL_NULL);
- index->page = *page_no++;
- rw_lock_x_unlock(dict_index_get_lock(index));
- }
- }
-
- if (err != DB_SUCCESS && table != NULL) {
- /* Drop table has failed with error but as drop table is not
- transaction safe we should mark the table as corrupted to avoid
- unwarranted follow-up action on this table that can result
- in more serious issues. */
-
- table->corrupted = true;
- for (dict_index_t* index = UT_LIST_GET_FIRST(table->indexes);
- index != NULL;
- index = UT_LIST_GET_NEXT(indexes, index)) {
- dict_set_corrupted(index, trx, "DROP TABLE");
- }
- }
-
-funct_exit:
- if (heap) {
- mem_heap_free(heap);
- }
-
-funct_exit_all_freed:
- if (locked_dictionary) {
-
- if (trx_is_started(trx)) {
-
- trx_commit_for_mysql(trx);
- }
-
- /* Add the table to fts queue if drop table fails */
- if (err != DB_SUCCESS && table->fts) {
- fts_optimize_add_table(table);
- }
-
- row_mysql_unlock_data_dictionary(trx);
- }
-
- for (const auto& handle : detached_handles) {
- ut_ad(handle != OS_FILE_CLOSED);
- os_file_close(handle);
- }
-
- trx->op_info = "";
-
- DBUG_RETURN(err);
-}
-
-/** Drop a table after failed CREATE TABLE. */
-dberr_t row_drop_table_after_create_fail(const char* name, trx_t* trx)
-{
- ib::warn() << "Dropping incompletely created " << name << " table.";
- return row_drop_table_for_mysql(name, trx, SQLCOM_DROP_DB, true);
-}
-
-/*******************************************************************//**
-Drop all foreign keys in a database, see Bug#18942.
-Called at the end of row_drop_database_for_mysql().
-@return error code or DB_SUCCESS */
-static MY_ATTRIBUTE((nonnull, warn_unused_result))
-dberr_t
-drop_all_foreign_keys_in_db(
-/*========================*/
- const char* name, /*!< in: database name which ends to '/' */
- trx_t* trx) /*!< in: transaction handle */
-{
- pars_info_t* pinfo;
- dberr_t err;
-
- ut_a(name[strlen(name) - 1] == '/');
-
- pinfo = pars_info_create();
-
- pars_info_add_str_literal(pinfo, "dbname", name);
-
-/** true if for_name is not prefixed with dbname */
-#define TABLE_NOT_IN_THIS_DB \
-"SUBSTR(for_name, 0, LENGTH(:dbname)) <> :dbname"
-
- err = que_eval_sql(pinfo,
- "PROCEDURE DROP_ALL_FOREIGN_KEYS_PROC () IS\n"
- "foreign_id CHAR;\n"
- "for_name CHAR;\n"
- "found INT;\n"
- "DECLARE CURSOR cur IS\n"
- "SELECT ID, FOR_NAME FROM SYS_FOREIGN\n"
- "WHERE FOR_NAME >= :dbname\n"
- "LOCK IN SHARE MODE\n"
- "ORDER BY FOR_NAME;\n"
- "BEGIN\n"
- "found := 1;\n"
- "OPEN cur;\n"
- "WHILE found = 1 LOOP\n"
- " FETCH cur INTO foreign_id, for_name;\n"
- " IF (SQL % NOTFOUND) THEN\n"
- " found := 0;\n"
- " ELSIF (" TABLE_NOT_IN_THIS_DB ") THEN\n"
- " found := 0;\n"
- " ELSIF (1=1) THEN\n"
- " DELETE FROM SYS_FOREIGN_COLS\n"
- " WHERE ID = foreign_id;\n"
- " DELETE FROM SYS_FOREIGN\n"
- " WHERE ID = foreign_id;\n"
- " END IF;\n"
- "END LOOP;\n"
- "CLOSE cur;\n"
- "COMMIT WORK;\n"
- "END;\n",
- FALSE, /* do not reserve dict mutex,
- we are already holding it */
- trx);
-
- return(err);
-}
-
-/** Drop a database for MySQL.
-@param[in] name database name which ends at '/'
-@param[in] trx transaction handle
-@param[out] found number of dropped tables/partitions
-@return error code or DB_SUCCESS */
-dberr_t
-row_drop_database_for_mysql(
- const char* name,
- trx_t* trx,
- ulint* found)
-{
- dict_table_t* table;
- char* table_name;
- dberr_t err = DB_SUCCESS;
- ulint namelen = strlen(name);
- bool is_partition = false;
-
- ut_ad(found != NULL);
-
- DBUG_ENTER("row_drop_database_for_mysql");
-
- DBUG_PRINT("row_drop_database_for_mysql", ("db: '%s'", name));
-
- ut_a(name != NULL);
- /* Assert DB name or partition name. */
- if (name[namelen - 1] == '#') {
- ut_ad(name[namelen - 2] != '/');
- is_partition = true;
- trx->op_info = "dropping partitions";
- } else {
- ut_a(name[namelen - 1] == '/');
- trx->op_info = "dropping database";
- }
-
- *found = 0;
-
- trx_set_dict_operation(trx, TRX_DICT_OP_TABLE);
-
- trx_start_if_not_started_xa(trx, true);
-
-loop:
- row_mysql_lock_data_dictionary(trx);
-
- while ((table_name = dict_get_first_table_name_in_db(name))) {
- /* Drop parent table if it is a fts aux table, to
- avoid accessing dropped fts aux tables in information
- scheam when parent table still exists.
- Note: Drop parent table will drop fts aux tables. */
- char* parent_table_name = NULL;
- table_id_t table_id;
- index_id_t index_id;
-
- if (fts_check_aux_table(
- table_name, &table_id, &index_id)) {
- dict_table_t* parent_table = dict_table_open_on_id(
- table_id, TRUE, DICT_TABLE_OP_NORMAL);
- if (parent_table != NULL) {
- parent_table_name = mem_strdupl(
- parent_table->name.m_name,
- strlen(parent_table->name.m_name));
- dict_table_close(parent_table, TRUE, FALSE);
- }
- }
-
- if (parent_table_name != NULL) {
- ut_free(table_name);
- table_name = parent_table_name;
- }
-
- ut_a(memcmp(table_name, name, namelen) == 0);
-
- table = dict_table_open_on_name(
- table_name, TRUE, FALSE, static_cast<dict_err_ignore_t>(
- DICT_ERR_IGNORE_INDEX_ROOT
- | DICT_ERR_IGNORE_CORRUPT));
-
- if (!table) {
- ib::error() << "Cannot load table " << table_name
- << " from InnoDB internal data dictionary"
- " during drop database";
- ut_free(table_name);
- err = DB_TABLE_NOT_FOUND;
- break;
-
- }
-
- if (!table->name.is_temporary()) {
- /* There could be orphan temp tables left from
- interrupted alter table. Leave them, and handle
- the rest.*/
- if (table->can_be_evicted
- && (name[namelen - 1] != '#')) {
- ib::warn() << "Orphan table encountered during"
- " DROP DATABASE. This is possible if '"
- << table->name << ".frm' was lost.";
- }
-
- if (!table->is_readable() && !table->space) {
- ib::warn() << "Missing .ibd file for table "
- << table->name << ".";
- }
- }
-
- dict_table_close(table, TRUE, FALSE);
-
- /* The dict_table_t object must not be accessed before
- dict_table_open() or after dict_table_close(). But this is OK
- if we are holding, the dict_sys.mutex. */
- ut_ad(mutex_own(&dict_sys.mutex));
-
- /* Disable statistics on the found table. */
- if (!dict_stats_stop_bg(table)) {
- row_mysql_unlock_data_dictionary(trx);
-
- os_thread_sleep(250000);
-
- ut_free(table_name);
-
- goto loop;
- }
-
- /* Wait until MySQL does not have any queries running on
- the table */
-
- if (table->get_ref_count() > 0) {
- row_mysql_unlock_data_dictionary(trx);
-
- ib::warn() << "MySQL is trying to drop database "
- << ut_get_name(trx, name) << " though"
- " there are still open handles to table "
- << table->name << ".";
-
- os_thread_sleep(1000000);
-
- ut_free(table_name);
-
- goto loop;
- }
-
- err = row_drop_table_for_mysql(
- table_name, trx, SQLCOM_DROP_DB);
- trx_commit_for_mysql(trx);
-
- if (UNIV_UNLIKELY(err != DB_SUCCESS)) {
- ib::error() << "DROP DATABASE "
- << ut_get_name(trx, name) << " failed"
- " with error (" << err << ") for"
- " table " << ut_get_name(trx, table_name);
- ut_free(table_name);
- break;
- }
-
- ut_free(table_name);
- (*found)++;
- }
-
- /* Partitioning does not yet support foreign keys. */
- if (err == DB_SUCCESS && !is_partition) {
- /* after dropping all tables try to drop all leftover
- foreign keys in case orphaned ones exist */
- err = drop_all_foreign_keys_in_db(name, trx);
-
- if (err != DB_SUCCESS) {
- const std::string& db = ut_get_name(trx, name);
- ib::error() << "DROP DATABASE " << db << " failed with"
- " error " << err << " while dropping all"
- " foreign keys";
- }
- }
-
- trx_commit_for_mysql(trx);
-
- row_mysql_unlock_data_dictionary(trx);
-
- trx->op_info = "";
+ if (fts_exist)
+ {
+ fts_optimize_remove_table(table);
+ purge_sys.stop_FTS(*table);
+ err= fts_lock_tables(trx, *table);
+ if (err != DB_SUCCESS)
+ {
+rollback:
+ if (fts_exist)
+ {
+ purge_sys.resume_FTS();
+ fts_optimize_add_table(table);
+ }
+ trx->rollback();
+ if (trx->dict_operation_lock_mode)
+ row_mysql_unlock_data_dictionary(trx);
+ return err;
+ }
+ }
- DBUG_RETURN(err);
+ row_mysql_lock_data_dictionary(trx);
+ trx->op_info = "discarding tablespace";
+ trx->dict_operation= true;
+
+ /* We serialize data dictionary operations with dict_sys.latch:
+ this is to avoid deadlocks during data dictionary operations */
+
+ err= row_discard_tablespace_foreign_key_checks(trx, table);
+ if (err != DB_SUCCESS)
+ goto rollback;
+
+ /* Note: The following cannot be rolled back. Rollback would see the
+ UPDATE of SYS_INDEXES.TABLE_ID as two operations: DELETE and INSERT.
+ It would invoke btr_free_if_exists() when rolling back the INSERT,
+ effectively dropping all indexes of the table. Furthermore, calls like
+ ibuf_delete_for_discarded_space() are already discarding data
+ before the transaction is committed.
+
+ It would be better to remove the integrity-breaking
+ ALTER TABLE...DISCARD TABLESPACE operation altogether. */
+ table->file_unreadable= true;
+ table->space= nullptr;
+ table->flags2|= DICT_TF2_DISCARDED;
+ err= row_discard_tablespace(trx, table);
+ DBUG_EXECUTE_IF("ib_discard_before_commit_crash",
+ log_buffer_flush_to_disk(); DBUG_SUICIDE(););
+ /* FTS_ tables may be deleted */
+ std::vector<pfs_os_file_t> deleted;
+ trx->commit(deleted);
+ const auto space_id= table->space_id;
+ pfs_os_file_t d= fil_delete_tablespace(space_id);
+ DBUG_EXECUTE_IF("ib_discard_after_commit_crash", DBUG_SUICIDE(););
+ row_mysql_unlock_data_dictionary(trx);
+
+ if (d != OS_FILE_CLOSED)
+ os_file_close(d);
+ for (pfs_os_file_t d : deleted)
+ os_file_close(d);
+
+ if (fts_exist)
+ purge_sys.resume_FTS();
+
+ buf_flush_remove_pages(space_id);
+ trx->op_info= "";
+ return err;
}
/****************************************************************//**
@@ -4069,8 +2516,7 @@ row_delete_constraint_low(
"BEGIN\n"
"DELETE FROM SYS_FOREIGN_COLS WHERE ID = :id;\n"
"DELETE FROM SYS_FOREIGN WHERE ID = :id;\n"
- "END;\n"
- , FALSE, trx));
+ "END;\n", trx));
}
/****************************************************************//**
@@ -4115,7 +2561,6 @@ row_rename_table_for_mysql(
const char* old_name, /*!< in: old table name */
const char* new_name, /*!< in: new table name */
trx_t* trx, /*!< in/out: transaction */
- bool commit, /*!< in: whether to commit trx */
bool use_fk) /*!< in: whether to parse and enforce
FOREIGN KEY constraints */
{
@@ -4126,15 +2571,11 @@ row_rename_table_for_mysql(
ulint n_constraints_to_drop = 0;
ibool old_is_tmp, new_is_tmp;
pars_info_t* info = NULL;
- int retry;
- bool aux_fts_rename = false;
- char* is_part = NULL;
ut_a(old_name != NULL);
ut_a(new_name != NULL);
ut_ad(trx->state == TRX_STATE_ACTIVE);
- const bool dict_locked = trx->dict_operation_lock_mode == RW_X_LATCH;
- ut_ad(!commit || dict_locked);
+ ut_ad(trx->dict_operation_lock_mode);
if (high_level_read_only) {
return(DB_READ_ONLY);
@@ -4145,21 +2586,12 @@ row_rename_table_for_mysql(
old_is_tmp = dict_table_t::is_temporary_name(old_name);
new_is_tmp = dict_table_t::is_temporary_name(new_name);
- table = dict_table_open_on_name(old_name, dict_locked, FALSE,
+ table = dict_table_open_on_name(old_name, true,
DICT_ERR_IGNORE_FK_NOKEY);
- /* We look for pattern #P# to see if the table is partitioned
- MySQL table. */
-#ifdef __WIN__
- is_part = strstr((char *)old_name, (char *)"#p#");
-#else
- is_part = strstr((char *)old_name, (char *)"#P#");
-#endif /* __WIN__ */
-
- /* MySQL partition engine hard codes the file name
- separator as "#P#". The text case is fixed even if
- lower_case_table_names is set to 1 or 2. This is true
- for sub-partition names as well. InnoDB always
+ /* MariaDB partition engine hard codes the file name
+ separator as "#P#" and "#SP#". The text case is fixed even if
+ lower_case_table_names is set to 1 or 2. InnoDB always
normalises file names to lower case on Windows, this
can potentially cause problems when copying/moving
tables between platforms.
@@ -4173,11 +2605,10 @@ row_rename_table_for_mysql(
sensitive platform in Windows, we might need to
check the existence of table name without lowering
case them in the system table. */
- if (!table &&
- is_part &&
- innobase_get_lower_case_table_names() == 1) {
+ if (!table && lower_case_table_names == 1
+ && strstr(old_name, table_name_t::part_suffix)) {
char par_case_name[MAX_FULL_NAME_LEN + 1];
-#ifndef __WIN__
+#ifndef _WIN32
/* Check for the table using lower
case name, including the partition
separator "P" */
@@ -4193,16 +2624,19 @@ row_rename_table_for_mysql(
normalize_table_name_c_low(
par_case_name, old_name, FALSE);
#endif
- table = dict_table_open_on_name(par_case_name, dict_locked, FALSE,
+ table = dict_table_open_on_name(par_case_name, true,
DICT_ERR_IGNORE_FK_NOKEY);
}
if (!table) {
err = DB_TABLE_NOT_FOUND;
goto funct_exit;
+ }
+
+ ut_ad(!table->is_temporary());
- } else if (!table->is_readable() && !table->space
- && !(table->flags2 & DICT_TF2_DISCARDED)) {
+ if (!table->is_readable() && !table->space
+ && !(table->flags2 & DICT_TF2_DISCARDED)) {
err = DB_TABLE_NOT_FOUND;
@@ -4230,35 +2664,12 @@ row_rename_table_for_mysql(
}
}
- /* Is a foreign key check running on this table? */
- for (retry = 0; retry < 100
- && table->n_foreign_key_checks_running > 0; ++retry) {
- row_mysql_unlock_data_dictionary(trx);
- os_thread_yield();
- row_mysql_lock_data_dictionary(trx);
- }
+ err = trx_undo_report_rename(trx, table);
- if (table->n_foreign_key_checks_running > 0) {
- ib::error() << "In ALTER TABLE "
- << ut_get_name(trx, old_name)
- << " a FOREIGN KEY check is running. Cannot rename"
- " table.";
- err = DB_TABLE_IN_FK_CHECK;
+ if (err != DB_SUCCESS) {
goto funct_exit;
}
- if (!table->is_temporary()) {
- if (commit) {
- dict_stats_wait_bg_to_stop_using_table(table, trx);
- }
-
- err = trx_undo_report_rename(trx, table);
-
- if (err != DB_SUCCESS) {
- goto funct_exit;
- }
- }
-
/* We use the private SQL parser of Innobase to generate the query
graphs needed in updating the dictionary data from system tables. */
@@ -4273,46 +2684,12 @@ row_rename_table_for_mysql(
"UPDATE SYS_TABLES"
" SET NAME = :new_table_name\n"
" WHERE NAME = :old_table_name;\n"
- "END;\n"
- , FALSE, trx);
-
- /* Assume the caller guarantees destination name doesn't exist. */
- ut_ad(err != DB_DUPLICATE_KEY);
-
- /* SYS_TABLESPACES and SYS_DATAFILES need to be updated if
- the table is in a single-table tablespace. */
- if (err != DB_SUCCESS || !dict_table_is_file_per_table(table)) {
- } else if (table->space) {
- /* If old path and new path are the same means tablename
- has not changed and only the database name holding the table
- has changed so we need to make the complete filepath again. */
- char* new_path = dict_tables_have_same_db(old_name, new_name)
- ? os_file_make_new_pathname(
- table->space->chain.start->name, new_name)
- : fil_make_filepath(NULL, new_name, IBD, false);
+ "END;\n", trx);
- info = pars_info_create();
-
- pars_info_add_str_literal(info, "new_table_name", new_name);
- pars_info_add_str_literal(info, "new_path_name", new_path);
- pars_info_add_int4_literal(info, "space_id", table->space_id);
-
- err = que_eval_sql(info,
- "PROCEDURE RENAME_SPACE () IS\n"
- "BEGIN\n"
- "UPDATE SYS_TABLESPACES"
- " SET NAME = :new_table_name\n"
- " WHERE SPACE = :space_id;\n"
- "UPDATE SYS_DATAFILES"
- " SET PATH = :new_path_name\n"
- " WHERE SPACE = :space_id;\n"
- "END;\n"
- , FALSE, trx);
-
- ut_free(new_path);
- }
if (err != DB_SUCCESS) {
- goto err_exit;
+ // Assume the caller guarantees destination name doesn't exist.
+ ut_ad(err != DB_DUPLICATE_KEY);
+ goto rollback_and_exit;
}
if (!new_is_tmp) {
@@ -4425,8 +2802,7 @@ row_rename_table_for_mysql(
"WHERE REF_NAME = :old_table_name\n"
" AND TO_BINARY(REF_NAME)\n"
" = TO_BINARY(:old_table_name);\n"
- "END;\n"
- , FALSE, trx);
+ "END;\n", trx);
} else if (n_constraints_to_drop > 0) {
/* Drop some constraints of tmp tables. */
@@ -4451,54 +2827,29 @@ row_rename_table_for_mysql(
|| DICT_TF2_FLAG_IS_SET(table, DICT_TF2_FTS_HAS_DOC_ID))
&& !dict_tables_have_same_db(old_name, new_name)) {
err = fts_rename_aux_tables(table, new_name, trx);
- if (err != DB_TABLE_NOT_FOUND) {
- aux_fts_rename = true;
- }
}
- if (err != DB_SUCCESS) {
-err_exit:
- if (err == DB_DUPLICATE_KEY) {
- ib::error() << "Possible reasons:";
- ib::error() << "(1) Table rename would cause two"
- " FOREIGN KEY constraints to have the same"
- " internal name in case-insensitive"
- " comparison.";
- ib::error() << "(2) Table "
- << ut_get_name(trx, new_name)
- << " exists in the InnoDB internal data"
- " dictionary though MySQL is trying to rename"
- " table " << ut_get_name(trx, old_name)
- << " to it. Have you deleted the .frm file and"
- " not used DROP TABLE?";
- ib::info() << TROUBLESHOOTING_MSG;
- ib::error() << "If table "
- << ut_get_name(trx, new_name)
- << " is a temporary table #sql..., then"
- " it can be that there are still queries"
- " running on the table, and it will be dropped"
- " automatically when the queries end. You can"
- " drop the orphaned table inside InnoDB by"
- " creating an InnoDB table with the same name"
- " in another database and copying the .frm file"
- " to the current database. Then MySQL thinks"
- " the table exists, and DROP TABLE will"
- " succeed.";
- }
+ switch (err) {
+ case DB_DUPLICATE_KEY:
+ ib::error() << "Table rename might cause two"
+ " FOREIGN KEY constraints to have the same"
+ " internal name in case-insensitive comparison.";
+ ib::info() << TROUBLESHOOTING_MSG;
+ /* fall through */
+ rollback_and_exit:
+ default:
trx->error_state = DB_SUCCESS;
trx->rollback();
trx->error_state = DB_SUCCESS;
- } else {
- /* The following call will also rename the .ibd data file if
- the table is stored in a single-table tablespace */
-
+ break;
+ case DB_SUCCESS:
+ DEBUG_SYNC_C("innodb_rename_in_cache");
+ /* The following call will also rename the .ibd file */
err = dict_table_rename_in_cache(
- table, new_name, !new_is_tmp);
+ table, span<const char>{new_name,strlen(new_name)},
+ false);
if (err != DB_SUCCESS) {
- trx->error_state = DB_SUCCESS;
- trx->rollback();
- trx->error_state = DB_SUCCESS;
- goto funct_exit;
+ goto rollback_and_exit;
}
/* In case of copy alter, template db_name and
@@ -4513,7 +2864,7 @@ err_exit:
dict_names_t fk_tables;
err = dict_load_foreigns(
- new_name, NULL, false,
+ new_name, nullptr, trx->id,
!old_is_tmp || trx->check_foreigns,
use_fk
? DICT_ERR_IGNORE_NONE
@@ -4521,7 +2872,6 @@ err_exit:
fk_tables);
if (err != DB_SUCCESS) {
-
if (old_is_tmp) {
/* In case of copy alter, ignore the
loading of foreign key constraint
@@ -4535,7 +2885,7 @@ err_exit:
" definition.";
if (!trx->check_foreigns) {
err = DB_SUCCESS;
- goto funct_exit;
+ break;
}
} else {
ib::error() << "In RENAME TABLE table "
@@ -4545,22 +2895,14 @@ err_exit:
" with the new table definition.";
}
- trx->error_state = DB_SUCCESS;
- trx->rollback();
- trx->error_state = DB_SUCCESS;
+ goto rollback_and_exit;
}
/* Check whether virtual column or stored column affects
the foreign key constraint of the table. */
- if (dict_foreigns_has_s_base_col(
- table->foreign_set, table)) {
+ if (dict_foreigns_has_s_base_col(table->foreign_set, table)) {
err = DB_NO_FK_ON_S_BASE_COL;
- ut_a(DB_SUCCESS == dict_table_rename_in_cache(
- table, old_name, FALSE));
- trx->error_state = DB_SUCCESS;
- trx->rollback();
- trx->error_state = DB_SUCCESS;
- goto funct_exit;
+ goto rollback_and_exit;
}
/* Fill the virtual column set in foreign when
@@ -4569,8 +2911,8 @@ err_exit:
dict_mem_table_fill_foreign_vcol_set(table);
while (!fk_tables.empty()) {
- dict_load_table(fk_tables.front(),
- DICT_ERR_IGNORE_NONE);
+ const char *f = fk_tables.front();
+ dict_sys.load_table({f, strlen(f)});
fk_tables.pop_front();
}
@@ -4578,47 +2920,8 @@ err_exit:
}
funct_exit:
- if (aux_fts_rename && err != DB_SUCCESS
- && table != NULL && (table->space != 0)) {
-
- char* orig_name = table->name.m_name;
- trx_t* trx_bg = trx_create();
-
- /* If the first fts_rename fails, the trx would
- be rolled back and committed, we can't use it any more,
- so we have to start a new background trx here. */
- ut_a(trx_state_eq(trx_bg, TRX_STATE_NOT_STARTED));
- trx_bg->op_info = "Revert the failing rename "
- "for fts aux tables";
- trx_bg->dict_operation_lock_mode = RW_X_LATCH;
- trx_start_for_ddl(trx_bg, TRX_DICT_OP_TABLE);
-
- /* If rename fails and table has its own tablespace,
- we need to call fts_rename_aux_tables again to
- revert the ibd file rename, which is not under the
- control of trx. Also notice the parent table name
- in cache is not changed yet. If the reverting fails,
- the ibd data may be left in the new database, which
- can be fixed only manually. */
- table->name.m_name = const_cast<char*>(new_name);
- fts_rename_aux_tables(table, old_name, trx_bg);
- table->name.m_name = orig_name;
-
- trx_bg->dict_operation_lock_mode = 0;
- trx_commit_for_mysql(trx_bg);
- trx_bg->free();
- }
-
- if (table != NULL) {
- if (commit && !table->is_temporary()) {
- table->stats_bg_flag &= byte(~BG_STAT_SHOULD_QUIT);
- }
- dict_table_close(table, dict_locked, FALSE);
- }
-
- if (commit) {
- DEBUG_SYNC(trx->mysql_thd, "before_rename_table_commit");
- trx_commit_for_mysql(trx);
+ if (table) {
+ table->release();
}
if (UNIV_LIKELY_NULL(heap)) {
@@ -4629,214 +2932,3 @@ funct_exit:
return(err);
}
-
-/*********************************************************************//**
-Scans an index for either COUNT(*) or CHECK TABLE.
-If CHECK TABLE; Checks that the index contains entries in an ascending order,
-unique constraint is not broken, and calculates the number of index entries
-in the read view of the current transaction.
-@return DB_SUCCESS or other error */
-dberr_t
-row_scan_index_for_mysql(
-/*=====================*/
- row_prebuilt_t* prebuilt, /*!< in: prebuilt struct
- in MySQL handle */
- const dict_index_t* index, /*!< in: index */
- ulint* n_rows) /*!< out: number of entries
- seen in the consistent read */
-{
- dtuple_t* prev_entry = NULL;
- ulint matched_fields;
- byte* buf;
- dberr_t ret;
- rec_t* rec;
- int cmp;
- ibool contains_null;
- ulint i;
- ulint cnt;
- mem_heap_t* heap = NULL;
- rec_offs offsets_[REC_OFFS_NORMAL_SIZE];
- rec_offs* offsets;
- rec_offs_init(offsets_);
-
- *n_rows = 0;
-
- /* Don't support RTree Leaf level scan */
- ut_ad(!dict_index_is_spatial(index));
-
- if (dict_index_is_clust(index)) {
- /* The clustered index of a table is always available.
- During online ALTER TABLE that rebuilds the table, the
- clustered index in the old table will have
- index->online_log pointing to the new table. All
- indexes of the old table will remain valid and the new
- table will be unaccessible to MySQL until the
- completion of the ALTER TABLE. */
- } else if (dict_index_is_online_ddl(index)
- || (index->type & DICT_FTS)) {
- /* Full Text index are implemented by auxiliary tables,
- not the B-tree. We also skip secondary indexes that are
- being created online. */
- return(DB_SUCCESS);
- }
-
- ulint bufsize = std::max<ulint>(srv_page_size,
- prebuilt->mysql_row_len);
- buf = static_cast<byte*>(ut_malloc_nokey(bufsize));
- heap = mem_heap_create(100);
-
- cnt = 1000;
-
- ret = row_search_for_mysql(buf, PAGE_CUR_G, prebuilt, 0, 0);
-loop:
- /* Check thd->killed every 1,000 scanned rows */
- if (--cnt == 0) {
- if (trx_is_interrupted(prebuilt->trx)) {
- ret = DB_INTERRUPTED;
- goto func_exit;
- }
- cnt = 1000;
- }
-
- switch (ret) {
- case DB_SUCCESS:
- break;
- case DB_DEADLOCK:
- case DB_LOCK_TABLE_FULL:
- case DB_LOCK_WAIT_TIMEOUT:
- case DB_INTERRUPTED:
- goto func_exit;
- default:
- ib::warn() << "CHECK TABLE on index " << index->name << " of"
- " table " << index->table->name << " returned " << ret;
- /* (this error is ignored by CHECK TABLE) */
- /* fall through */
- case DB_END_OF_INDEX:
- ret = DB_SUCCESS;
-func_exit:
- ut_free(buf);
- mem_heap_free(heap);
-
- return(ret);
- }
-
- *n_rows = *n_rows + 1;
-
- /* else this code is doing handler::check() for CHECK TABLE */
-
- /* row_search... returns the index record in buf, record origin offset
- within buf stored in the first 4 bytes, because we have built a dummy
- template */
-
- rec = buf + mach_read_from_4(buf);
-
- offsets = rec_get_offsets(rec, index, offsets_, index->n_core_fields,
- ULINT_UNDEFINED, &heap);
-
- if (prev_entry != NULL) {
- matched_fields = 0;
-
- cmp = cmp_dtuple_rec_with_match(prev_entry, rec, offsets,
- &matched_fields);
- contains_null = FALSE;
-
- /* In a unique secondary index we allow equal key values if
- they contain SQL NULLs */
-
- for (i = 0;
- i < dict_index_get_n_ordering_defined_by_user(index);
- i++) {
- if (UNIV_SQL_NULL == dfield_get_len(
- dtuple_get_nth_field(prev_entry, i))) {
-
- contains_null = TRUE;
- break;
- }
- }
-
- const char* msg;
-
- if (cmp > 0) {
- ret = DB_INDEX_CORRUPT;
- msg = "index records in a wrong order in ";
-not_ok:
- ib::error()
- << msg << index->name
- << " of table " << index->table->name
- << ": " << *prev_entry << ", "
- << rec_offsets_print(rec, offsets);
- /* Continue reading */
- } else if (dict_index_is_unique(index)
- && !contains_null
- && matched_fields
- >= dict_index_get_n_ordering_defined_by_user(
- index)) {
- ret = DB_DUPLICATE_KEY;
- msg = "duplicate key in ";
- goto not_ok;
- }
- }
-
- {
- mem_heap_t* tmp_heap = NULL;
-
- /* Empty the heap on each round. But preserve offsets[]
- for the row_rec_to_index_entry() call, by copying them
- into a separate memory heap when needed. */
- if (UNIV_UNLIKELY(offsets != offsets_)) {
- ulint size = rec_offs_get_n_alloc(offsets)
- * sizeof *offsets;
-
- tmp_heap = mem_heap_create(size);
-
- offsets = static_cast<rec_offs*>(
- mem_heap_dup(tmp_heap, offsets, size));
- }
-
- mem_heap_empty(heap);
-
- prev_entry = row_rec_to_index_entry(
- rec, index, offsets, heap);
-
- if (UNIV_LIKELY_NULL(tmp_heap)) {
- mem_heap_free(tmp_heap);
- }
- }
-
- ret = row_search_for_mysql(
- buf, PAGE_CUR_G, prebuilt, 0, ROW_SEL_NEXT);
-
- goto loop;
-}
-
-/*********************************************************************//**
-Initialize this module */
-void
-row_mysql_init(void)
-/*================*/
-{
- mutex_create(LATCH_ID_ROW_DROP_LIST, &row_drop_list_mutex);
-
- UT_LIST_INIT(
- row_mysql_drop_list,
- &row_mysql_drop_t::row_mysql_drop_list);
-
- row_mysql_drop_list_inited = true;
-}
-
-void row_mysql_close()
-{
- ut_ad(!UT_LIST_GET_LEN(row_mysql_drop_list) ||
- srv_force_recovery >= SRV_FORCE_NO_BACKGROUND);
- if (row_mysql_drop_list_inited)
- {
- row_mysql_drop_list_inited= false;
- mutex_free(&row_drop_list_mutex);
-
- while (row_mysql_drop_t *drop= UT_LIST_GET_FIRST(row_mysql_drop_list))
- {
- UT_LIST_REMOVE(row_mysql_drop_list, drop);
- ut_free(drop);
- }
- }
-}
diff --git a/storage/innobase/row/row0purge.cc b/storage/innobase/row/row0purge.cc
index 74bbc61df52..753b42332fc 100644
--- a/storage/innobase/row/row0purge.cc
+++ b/storage/innobase/row/row0purge.cc
@@ -1,7 +1,7 @@
/*****************************************************************************
Copyright (c) 1997, 2017, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2017, 2022, MariaDB Corporation.
+Copyright (c) 2017, 2023, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -25,8 +25,10 @@ Created 3/14/1997 Heikki Tuuri
*******************************************************/
#include "row0purge.h"
+#include "btr0cur.h"
#include "fsp0fsp.h"
#include "mach0data.h"
+#include "dict0crea.h"
#include "dict0stats.h"
#include "trx0rseg.h"
#include "trx0trx.h"
@@ -39,13 +41,13 @@ Created 3/14/1997 Heikki Tuuri
#include "row0upd.h"
#include "row0vers.h"
#include "row0mysql.h"
-#include "row0log.h"
#include "log0log.h"
#include "srv0mon.h"
#include "srv0start.h"
#include "handler.h"
#include "ha_innodb.h"
#include "fil0fil.h"
+#include <mysql/service_thd_mdl.h>
/*************************************************************************
IMPORTANT NOTE: Any operation that generates redo MUST check that there
@@ -65,7 +67,7 @@ static
ibool
row_purge_reposition_pcur(
/*======================*/
- ulint mode, /*!< in: latching mode */
+ btr_latch_mode mode, /*!< in: latching mode */
purge_node_t* node, /*!< in: row purge node */
mtr_t* mtr) /*!< in: mtr */
{
@@ -73,7 +75,7 @@ row_purge_reposition_pcur(
ut_ad(node->validate_pcur());
node->found_clust =
- btr_pcur_restore_position(mode, &node->pcur, mtr) ==
+ node->pcur.restore_position(mode, mtr) ==
btr_pcur_t::SAME_ALL;
} else {
@@ -102,20 +104,93 @@ bool
row_purge_remove_clust_if_poss_low(
/*===============================*/
purge_node_t* node, /*!< in/out: row purge node */
- ulint mode) /*!< in: BTR_MODIFY_LEAF or BTR_MODIFY_TREE */
+ btr_latch_mode mode) /*!< in: BTR_MODIFY_LEAF or BTR_PURGE_TREE */
{
dict_index_t* index = dict_table_get_first_index(node->table);
+ table_id_t table_id = 0;
+ index_id_t index_id = 0;
+ dict_table_t *table = nullptr;
+ pfs_os_file_t f = OS_FILE_CLOSED;
- log_free_check();
-
+ if (table_id) {
+retry:
+ purge_sys.check_stop_FTS();
+ dict_sys.lock(SRW_LOCK_CALL);
+ table = dict_sys.find_table(table_id);
+ if (!table) {
+ dict_sys.unlock();
+ } else if (table->n_rec_locks) {
+ for (dict_index_t* ind = UT_LIST_GET_FIRST(
+ table->indexes); ind;
+ ind = UT_LIST_GET_NEXT(indexes, ind)) {
+ if (ind->id == index_id) {
+ lock_discard_for_index(*ind);
+ }
+ }
+ }
+ }
mtr_t mtr;
mtr.start();
index->set_modified(mtr);
+ log_free_check();
+ bool success = true;
if (!row_purge_reposition_pcur(mode, node, &mtr)) {
/* The record was already removed. */
+removed:
mtr.commit();
- return true;
+close_and_exit:
+ if (table) {
+ dict_sys.unlock();
+ }
+ return success;
+ }
+
+ if (node->table->id == DICT_INDEXES_ID) {
+ /* If this is a record of the SYS_INDEXES table, then
+ we have to free the file segments of the index tree
+ associated with the index */
+ if (!table_id) {
+ const rec_t* rec = btr_pcur_get_rec(&node->pcur);
+
+ table_id = mach_read_from_8(rec);
+ index_id = mach_read_from_8(rec + 8);
+ if (table_id) {
+ mtr.commit();
+ goto retry;
+ }
+ ut_ad("corrupted SYS_INDEXES record" == 0);
+ }
+
+ if (const uint32_t space_id = dict_drop_index_tree(
+ &node->pcur, nullptr, &mtr)) {
+ if (table) {
+ if (table->get_ref_count() == 0) {
+ dict_sys.remove(table);
+ } else if (table->space_id == space_id) {
+ table->space = nullptr;
+ table->file_unreadable = true;
+ }
+ dict_sys.unlock();
+ table = nullptr;
+ }
+ f = fil_delete_tablespace(space_id);
+ }
+
+ mtr.commit();
+
+ if (table) {
+ dict_sys.unlock();
+ table = nullptr;
+ }
+
+ purge_sys.check_stop_SYS();
+ mtr.start();
+ index->set_modified(mtr);
+
+ if (!row_purge_reposition_pcur(mode, node, &mtr)) {
+ goto removed;
+ }
}
rec_t* rec = btr_pcur_get_rec(&node->pcur);
@@ -125,7 +200,6 @@ row_purge_remove_clust_if_poss_low(
rec_offs* offsets = rec_get_offsets(rec, index, offsets_,
index->n_core_fields,
ULINT_UNDEFINED, &heap);
- bool success = true;
if (node->roll_ptr != row_get_rec_roll_ptr(rec, index, offsets)) {
/* Someone else has modified the record later: do not remove */
@@ -138,24 +212,15 @@ row_purge_remove_clust_if_poss_low(
ut_ad(row_get_rec_trx_id(rec, index, offsets));
if (mode == BTR_MODIFY_LEAF) {
- success = btr_cur_optimistic_delete(
+ success = DB_FAIL != btr_cur_optimistic_delete(
btr_pcur_get_btr_cur(&node->pcur), 0, &mtr);
} else {
dberr_t err;
- ut_ad(mode == (BTR_MODIFY_TREE | BTR_LATCH_FOR_DELETE));
+ ut_ad(mode == BTR_PURGE_TREE);
btr_cur_pessimistic_delete(
&err, FALSE, btr_pcur_get_btr_cur(&node->pcur), 0,
false, &mtr);
-
- switch (err) {
- case DB_SUCCESS:
- break;
- case DB_OUT_OF_FILE_SPACE:
- success = false;
- break;
- default:
- ut_error;
- }
+ success = err == DB_SUCCESS;
}
func_exit:
@@ -170,7 +235,7 @@ func_exit:
mtr_commit(&mtr);
}
- return(success);
+ goto close_and_exit;
}
/***********************************************************//**
@@ -192,12 +257,11 @@ row_purge_remove_clust_if_poss(
for (ulint n_tries = 0;
n_tries < BTR_CUR_RETRY_DELETE_N_TIMES;
n_tries++) {
- if (row_purge_remove_clust_if_poss_low(
- node, BTR_MODIFY_TREE | BTR_LATCH_FOR_DELETE)) {
+ if (row_purge_remove_clust_if_poss_low(node, BTR_PURGE_TREE)) {
return(true);
}
- os_thread_sleep(BTR_CUR_RETRY_SLEEP_TIME);
+ std::this_thread::sleep_for(BTR_CUR_RETRY_SLEEP_TIME);
}
return(false);
@@ -278,39 +342,20 @@ row_purge_remove_sec_if_poss_tree(
ibool success = TRUE;
dberr_t err;
mtr_t mtr;
- enum row_search_result search_result;
log_free_check();
mtr.start();
index->set_modified(mtr);
+ pcur.btr_cur.page_cur.index = index;
- if (!index->is_committed()) {
- /* The index->online_status may change if the index is
- or was being created online, but not committed yet. It
- is protected by index->lock. */
- mtr_sx_lock_index(index, &mtr);
-
- if (dict_index_is_online_ddl(index)) {
- /* Online secondary index creation will not
- copy any delete-marked records. Therefore
- there is nothing to be purged. We must also
- skip the purge when a completed index is
- dropped by rollback_inplace_alter_table(). */
- goto func_exit_no_pcur;
+ if (index->is_spatial()) {
+ if (!rtr_search(entry, BTR_PURGE_TREE, &pcur, &mtr)) {
+ goto found;
}
- } else {
- /* For secondary indexes,
- index->online_status==ONLINE_INDEX_COMPLETE if
- index->is_committed(). */
- ut_ad(!dict_index_is_online_ddl(index));
+ goto func_exit;
}
- search_result = row_search_index_entry(
- index, entry,
- BTR_MODIFY_TREE | BTR_LATCH_FOR_DELETE,
- &pcur, &mtr);
-
- switch (search_result) {
+ switch (row_search_index_entry(entry, BTR_PURGE_TREE, &pcur, &mtr)) {
case ROW_NOT_FOUND:
/* Not found. This is a legitimate condition. In a
rollback, InnoDB will remove secondary recs that would
@@ -339,6 +384,7 @@ row_purge_remove_sec_if_poss_tree(
which cannot be purged yet, requires its existence. If some requires,
we should do nothing. */
+found:
if (row_purge_poss_sec(node, index, entry, &pcur, &mtr, true)) {
/* Remove the index record, which should have been
@@ -377,7 +423,6 @@ row_purge_remove_sec_if_poss_tree(
func_exit:
btr_pcur_close(&pcur); // FIXME: need this?
-func_exit_no_pcur:
mtr.commit();
return(success);
@@ -398,8 +443,6 @@ row_purge_remove_sec_if_poss_leaf(
{
mtr_t mtr;
btr_pcur_t pcur;
- enum btr_latch_mode mode;
- enum row_search_result search_result;
bool success = true;
log_free_check();
@@ -408,62 +451,27 @@ row_purge_remove_sec_if_poss_leaf(
mtr.start();
index->set_modified(mtr);
- if (!index->is_committed()) {
- /* For uncommitted spatial index, we also skip the purge. */
- if (dict_index_is_spatial(index)) {
- goto func_exit_no_pcur;
- }
-
- /* The index->online_status may change if the the
- index is or was being created online, but not
- committed yet. It is protected by index->lock. */
- mtr_s_lock_index(index, &mtr);
-
- if (dict_index_is_online_ddl(index)) {
- /* Online secondary index creation will not
- copy any delete-marked records. Therefore
- there is nothing to be purged. We must also
- skip the purge when a completed index is
- dropped by rollback_inplace_alter_table(). */
- goto func_exit_no_pcur;
- }
-
- mode = BTR_PURGE_LEAF_ALREADY_S_LATCHED;
- } else {
- /* For secondary indexes,
- index->online_status==ONLINE_INDEX_COMPLETE if
- index->is_committed(). */
- ut_ad(!dict_index_is_online_ddl(index));
-
- /* Change buffering is disabled for spatial index and
- virtual index. */
- mode = (dict_index_is_spatial(index)
- || dict_index_has_virtual(index))
- ? BTR_MODIFY_LEAF
- : BTR_PURGE_LEAF;
- }
+ pcur.btr_cur.page_cur.index = index;
/* Set the purge node for the call to row_purge_poss_sec(). */
pcur.btr_cur.purge_node = node;
- if (dict_index_is_spatial(index)) {
- rw_lock_sx_lock(dict_index_get_lock(index));
+ if (index->is_spatial()) {
pcur.btr_cur.thr = NULL;
- } else {
- /* Set the query thread, so that ibuf_insert_low() will be
- able to invoke thd_get_trx(). */
- pcur.btr_cur.thr = static_cast<que_thr_t*>(
- que_node_get_parent(node));
+ if (!rtr_search(entry, BTR_MODIFY_LEAF, &pcur, &mtr)) {
+ goto found;
+ }
+ goto func_exit;
}
- search_result = row_search_index_entry(
- index, entry, mode, &pcur, &mtr);
-
- if (dict_index_is_spatial(index)) {
- rw_lock_sx_unlock(dict_index_get_lock(index));
- }
+ /* Set the query thread, so that ibuf_insert_low() will be
+ able to invoke thd_get_trx(). */
+ pcur.btr_cur.thr = static_cast<que_thr_t*>(que_node_get_parent(node));
- switch (search_result) {
+ switch (row_search_index_entry(entry, index->has_virtual()
+ ? BTR_MODIFY_LEAF : BTR_PURGE_LEAF,
+ &pcur, &mtr)) {
case ROW_FOUND:
+found:
/* Before attempting to purge a record, check
if it is safe to do so. */
if (row_purge_poss_sec(node, index, entry, &pcur, &mtr, false)) {
@@ -483,11 +491,9 @@ row_purge_remove_sec_if_poss_leaf(
<< rec_index_print(
btr_cur_get_rec(btr_cur),
index);
- ut_ad(0);
-
- btr_pcur_close(&pcur);
-
- goto func_exit_no_pcur;
+ mtr.commit();
+ dict_set_corrupted(index, "purge");
+ goto cleanup;
}
if (index->is_spatial()) {
@@ -496,7 +502,7 @@ row_purge_remove_sec_if_poss_leaf(
if (block->page.id().page_no()
!= index->page
- && page_get_n_recs(block->frame) < 2
+ && page_get_n_recs(block->page.frame) < 2
&& !lock_test_prdt_page_lock(
btr_cur->rtr_info
&& btr_cur->rtr_info->thr
@@ -512,18 +518,12 @@ row_purge_remove_sec_if_poss_leaf(
"skip purging last"
" record on page "
<< block->page.id());
-
- btr_pcur_close(&pcur);
- mtr.commit();
- return(success);
+ goto func_exit;
}
}
- if (!btr_cur_optimistic_delete(btr_cur, 0, &mtr)) {
-
- /* The index entry could not be deleted. */
- success = false;
- }
+ success = btr_cur_optimistic_delete(btr_cur, 0, &mtr)
+ != DB_FAIL;
}
/* (The index entry is still needed,
@@ -535,9 +535,10 @@ row_purge_remove_sec_if_poss_leaf(
/* The deletion was buffered. */
case ROW_NOT_FOUND:
/* The index entry does not exist, nothing to do. */
- btr_pcur_close(&pcur); // FIXME: do we need these? when is btr_cur->rtr_info set?
-func_exit_no_pcur:
+func_exit:
mtr.commit();
+cleanup:
+ btr_pcur_close(&pcur); // FIXME: do we need these? when is btr_cur->rtr_info set?
return(success);
}
@@ -581,7 +582,7 @@ retry:
n_tries++;
- os_thread_sleep(BTR_CUR_RETRY_SLEEP_TIME);
+ std::this_thread::sleep_for(BTR_CUR_RETRY_SLEEP_TIME);
goto retry;
}
@@ -589,25 +590,6 @@ retry:
ut_a(success);
}
-/** Skip uncommitted virtual indexes on newly added virtual column.
-@param[in,out] index dict index object */
-static
-inline
-void
-row_purge_skip_uncommitted_virtual_index(
- dict_index_t*& index)
-{
- /* We need to skip virtual indexes which is not
- committed yet. It's safe because these indexes are
- newly created by alter table, and because we do
- not support LOCK=NONE when adding an index on newly
- added virtual column.*/
- while (index != NULL && dict_index_has_virtual(index)
- && !index->is_committed() && index->has_new_v_col()) {
- index = dict_table_get_next_index(index);
- }
-}
-
/***********************************************************//**
Purges a delete marking of a record.
@retval true if the row was not found, or it was successfully removed
@@ -619,34 +601,42 @@ row_purge_del_mark(
/*===============*/
purge_node_t* node) /*!< in/out: row purge node */
{
- mem_heap_t* heap;
-
- heap = mem_heap_create(1024);
-
- while (node->index != NULL) {
- /* skip corrupted secondary index */
- dict_table_skip_corrupt_index(node->index);
-
- row_purge_skip_uncommitted_virtual_index(node->index);
+ if (node->index)
+ {
+ mem_heap_t *heap= mem_heap_create(1024);
- if (!node->index) {
- break;
- }
+ do
+ {
+ const auto type= node->index->type;
+ if (type & (DICT_FTS | DICT_CORRUPT))
+ continue;
+ if (UNIV_UNLIKELY(DICT_VIRTUAL & type) && !node->index->is_committed() &&
+ node->index->has_new_v_col())
+ continue;
+ dtuple_t* entry= row_build_index_entry_low(node->row, nullptr,
+ node->index, heap,
+ ROW_BUILD_FOR_PURGE);
+ row_purge_remove_sec_if_poss(node, node->index, entry);
+ mem_heap_empty(heap);
+ }
+ while ((node->index= dict_table_get_next_index(node->index)));
- if (node->index->type != DICT_FTS) {
- dtuple_t* entry = row_build_index_entry_low(
- node->row, NULL, node->index,
- heap, ROW_BUILD_FOR_PURGE);
- row_purge_remove_sec_if_poss(node, node->index, entry);
- mem_heap_empty(heap);
- }
+ mem_heap_free(heap);
+ }
- node->index = dict_table_get_next_index(node->index);
- }
+ return row_purge_remove_clust_if_poss(node);
+}
- mem_heap_free(heap);
+void purge_sys_t::wait_SYS()
+{
+ while (must_wait_SYS())
+ std::this_thread::sleep_for(std::chrono::seconds(1));
+}
- return(row_purge_remove_clust_if_poss(node));
+void purge_sys_t::wait_FTS()
+{
+ while (must_wait_FTS())
+ std::this_thread::sleep_for(std::chrono::seconds(1));
}
/** Reset DB_TRX_ID, DB_ROLL_PTR of a clustered index record
@@ -655,6 +645,7 @@ whose old history can no longer be observed.
@param[in,out] mtr mini-transaction (will be started and committed) */
static void row_purge_reset_trx_id(purge_node_t* node, mtr_t* mtr)
{
+retry:
/* Reset DB_TRX_ID, DB_ROLL_PTR for old records. */
mtr->start();
@@ -690,6 +681,17 @@ static void row_purge_reset_trx_id(purge_node_t* node, mtr_t* mtr)
ut_ad(!rec_get_deleted_flag(
rec, rec_offs_comp(offsets))
|| rec_is_alter_metadata(rec, *index));
+ switch (node->table->id) {
+ case DICT_TABLES_ID:
+ case DICT_COLUMNS_ID:
+ case DICT_INDEXES_ID:
+ if (purge_sys.must_wait_SYS()) {
+ mtr->commit();
+ purge_sys.check_stop_SYS();
+ goto retry;
+ }
+ }
+
DBUG_LOG("purge", "reset DB_TRX_ID="
<< ib::hex(row_get_rec_trx_id(
rec, index, offsets)));
@@ -709,9 +711,9 @@ static void row_purge_reset_trx_id(purge_node_t* node, mtr_t* mtr)
size_t offs = page_offset(ptr);
mtr->memset(block, offs, DATA_TRX_ID_LEN, 0);
offs += DATA_TRX_ID_LEN;
- mtr->write<1,mtr_t::MAYBE_NOP>(*block,
- block->frame
- + offs, 0x80U);
+ mtr->write<1,mtr_t::MAYBE_NOP>(
+ *block, block->page.frame + offs,
+ 0x80U);
mtr->memset(block, offs + 1,
DATA_ROLL_PTR_LEN - 1, 0);
}
@@ -739,20 +741,25 @@ row_purge_upd_exist_or_extern_func(
ut_ad(!node->table->skip_alter_undo);
if (node->rec_type == TRX_UNDO_UPD_DEL_REC
- || (node->cmpl_info & UPD_NODE_NO_ORD_CHANGE)) {
+ || (node->cmpl_info & UPD_NODE_NO_ORD_CHANGE)
+ || !node->index) {
goto skip_secondaries;
}
heap = mem_heap_create(1024);
- while (node->index != NULL) {
- dict_table_skip_corrupt_index(node->index);
+ do {
+ const auto type = node->index->type;
- row_purge_skip_uncommitted_virtual_index(node->index);
+ if (type & (DICT_FTS | DICT_CORRUPT)) {
+ continue;
+ }
- if (!node->index) {
- break;
+ if (UNIV_UNLIKELY(DICT_VIRTUAL & type)
+ && !node->index->is_committed()
+ && node->index->has_new_v_col()) {
+ continue;
}
if (row_upd_changes_ord_field_binary(node->index, node->update,
@@ -767,9 +774,7 @@ row_purge_upd_exist_or_extern_func(
mem_heap_empty(heap);
}
-
- node->index = dict_table_get_next_index(node->index);
- }
+ } while ((node->index = dict_table_get_next_index(node->index)));
mem_heap_free(heap);
@@ -783,9 +788,6 @@ skip_secondaries:
= upd_get_nth_field(node->update, i);
if (dfield_is_ext(&ufield->new_val)) {
- trx_rseg_t* rseg;
- buf_block_t* block;
- byte* data_field;
bool is_insert;
ulint rseg_id;
uint32_t page_no;
@@ -808,11 +810,8 @@ skip_secondaries:
&is_insert, &rseg_id,
&page_no, &offset);
- rseg = trx_sys.rseg_array[rseg_id];
-
- ut_a(rseg != NULL);
- ut_ad(rseg->id == rseg_id);
- ut_ad(rseg->is_persistent());
+ const trx_rseg_t &rseg = trx_sys.rseg_array[rseg_id];
+ ut_ad(rseg.is_persistent());
mtr.start();
@@ -823,7 +822,7 @@ skip_secondaries:
index->set_modified(mtr);
- /* NOTE: we must also acquire an X-latch to the
+ /* NOTE: we must also acquire a U latch to the
root page of the tree. We will need it when we
free pages from the tree. If the tree is of height 1,
the tree X-latch does NOT protect the root page,
@@ -832,24 +831,26 @@ skip_secondaries:
latching order if we would only later latch the
root page of such a tree! */
- btr_root_get(index, &mtr);
-
- block = buf_page_get(
- page_id_t(rseg->space->id, page_no),
- 0, RW_X_LATCH, &mtr);
-
- buf_block_dbg_add_level(block, SYNC_TRX_UNDO_PAGE);
-
- data_field = buf_block_get_frame(block)
- + offset + internal_offset;
+ dberr_t err;
+ if (!btr_root_block_get(index, RW_SX_LATCH, &mtr,
+ &err)) {
+ } else if (buf_block_t* block =
+ buf_page_get(page_id_t(rseg.space->id,
+ page_no),
+ 0, RW_X_LATCH, &mtr)) {
+ byte* data_field = block->page.frame
+ + offset + internal_offset;
+
+ ut_a(dfield_get_len(&ufield->new_val)
+ >= BTR_EXTERN_FIELD_REF_SIZE);
+ btr_free_externally_stored_field(
+ index,
+ data_field
+ + dfield_get_len(&ufield->new_val)
+ - BTR_EXTERN_FIELD_REF_SIZE,
+ NULL, NULL, block, 0, false, &mtr);
+ }
- ut_a(dfield_get_len(&ufield->new_val)
- >= BTR_EXTERN_FIELD_REF_SIZE);
- btr_free_externally_stored_field(
- index,
- data_field + dfield_get_len(&ufield->new_val)
- - BTR_EXTERN_FIELD_REF_SIZE,
- NULL, NULL, block, 0, false, &mtr);
mtr.commit();
}
}
@@ -997,6 +998,7 @@ static byte *row_purge_get_partial(const byte *ptr, const dict_index_t &index,
return const_cast<byte*>(ptr);
}
+MY_ATTRIBUTE((nonnull,warn_unused_result))
/** Parses the row reference and other info in a modify undo log record.
@param[in] node row undo node
@param[in] undo_rec record to purge
@@ -1013,17 +1015,13 @@ row_purge_parse_undo_rec(
bool* updated_extern)
{
dict_index_t* clust_index;
- byte* ptr;
undo_no_t undo_no;
table_id_t table_id;
roll_ptr_t roll_ptr;
byte info_bits;
ulint type;
- ut_ad(node != NULL);
- ut_ad(thr != NULL);
-
- ptr = trx_undo_rec_get_pars(
+ const byte* ptr = trx_undo_rec_get_pars(
undo_rec, &type, &node->cmpl_info,
updated_extern, &undo_no, &table_id);
@@ -1032,6 +1030,7 @@ row_purge_parse_undo_rec(
switch (type) {
case TRX_UNDO_RENAME_TABLE:
return false;
+ case TRX_UNDO_EMPTY:
case TRX_UNDO_INSERT_METADATA:
case TRX_UNDO_INSERT_REC:
/* These records do not store any transaction identifier.
@@ -1065,10 +1064,17 @@ row_purge_parse_undo_rec(
}
try_again:
+ purge_sys.check_stop_FTS();
+
node->table = dict_table_open_on_id<true>(
table_id, false, DICT_TABLE_OP_NORMAL, node->purge_thd,
&node->mdl_ticket);
+ if (node->table == reinterpret_cast<dict_table_t*>(-1)) {
+ /* purge stop signal */
+ goto try_again;
+ }
+
if (!node->table) {
/* The table has been dropped: no need to do purge and
release mdl happened as a part of open process itself */
@@ -1094,7 +1100,7 @@ already_locked:
if (srv_shutdown_state > SRV_SHUTDOWN_NONE) {
return(false);
}
- os_thread_sleep(1000000);
+ std::this_thread::sleep_for(std::chrono::seconds(1));
goto try_again;
}
}
@@ -1122,6 +1128,9 @@ err_exit:
if (type == TRX_UNDO_INSERT_METADATA) {
node->ref = &trx_undo_metadata;
return(true);
+ } else if (type == TRX_UNDO_EMPTY) {
+ node->ref = nullptr;
+ return true;
}
ptr = trx_undo_rec_get_row_ref(ptr, clust_index, &(node->ref),
@@ -1164,18 +1173,18 @@ row_purge_record_func(
#endif /* UNIV_DEBUG || WITH_WSREP */
bool updated_extern)
{
- dict_index_t* clust_index;
- bool purged = true;
-
ut_ad(!node->found_clust);
ut_ad(!node->table->skip_alter_undo);
+ ut_ad(!trx_undo_roll_ptr_is_insert(node->roll_ptr));
- clust_index = dict_table_get_first_index(node->table);
+ node->index = dict_table_get_next_index(
+ dict_table_get_first_index(node->table));
- node->index = dict_table_get_next_index(clust_index);
- ut_ad(!trx_undo_roll_ptr_is_insert(node->roll_ptr));
+ bool purged = true;
switch (node->rec_type) {
+ case TRX_UNDO_EMPTY:
+ break;
case TRX_UNDO_DEL_MARK_REC:
purged = row_purge_del_mark(node);
if (purged) {
@@ -1205,8 +1214,8 @@ row_purge_record_func(
}
if (node->found_clust) {
+ node->found_clust = false;
btr_pcur_close(&node->pcur);
- node->found_clust = FALSE;
}
return(purged);
@@ -1232,7 +1241,7 @@ row_purge(
trx_undo_rec_t* undo_rec, /*!< in: record to purge */
que_thr_t* thr) /*!< in: query thread */
{
- if (undo_rec != &trx_purge_dummy_rec) {
+ if (undo_rec != reinterpret_cast<trx_undo_rec_t*>(-1)) {
bool updated_extern;
while (row_purge_parse_undo_rec(
@@ -1247,30 +1256,44 @@ row_purge(
}
/* Retry the purge in a second. */
- os_thread_sleep(1000000);
+ std::this_thread::sleep_for(std::chrono::seconds(1));
}
}
}
-/***********************************************************//**
-Reset the purge query thread. */
-UNIV_INLINE
-void
-row_purge_end(
-/*==========*/
- que_thr_t* thr) /*!< in: query thread */
+inline void purge_node_t::start()
{
- ut_ad(thr);
-
- thr->run_node = static_cast<purge_node_t*>(thr->run_node)->end();
+ ut_ad(in_progress);
+ DBUG_ASSERT(common.type == QUE_NODE_PURGE);
+
+ row= nullptr;
+ ref= nullptr;
+ index= nullptr;
+ update= nullptr;
+ found_clust= FALSE;
+ rec_type= ULINT_UNDEFINED;
+ cmpl_info= ULINT_UNDEFINED;
+ if (!purge_thd)
+ purge_thd= current_thd;
+}
- ut_a(thr->run_node != NULL);
+/** Reset the state at end
+@return the query graph parent */
+inline que_node_t *purge_node_t::end()
+{
+ DBUG_ASSERT(common.type == QUE_NODE_PURGE);
+ close_table();
+ ut_ad(undo_recs.empty());
+ ut_d(in_progress= false);
+ purge_thd= nullptr;
+ mem_heap_empty(heap);
+ return common.parent;
}
+
/***********************************************************//**
-Does the purge operation for a single undo log record. This is a high-level
-function used in an SQL execution graph.
-@return query thread to run next or NULL */
+Does the purge operation.
+@return query thread to run next */
que_thr_t*
row_purge_step(
/*===========*/
@@ -1282,22 +1305,15 @@ row_purge_step(
node->start();
- if (!node->undo_recs.empty()) {
+ while (!node->undo_recs.empty()) {
trx_purge_rec_t purge_rec = node->undo_recs.front();
node->undo_recs.pop();
node->roll_ptr = purge_rec.roll_ptr;
row_purge(node, purge_rec.undo_rec, thr);
-
- if (node->undo_recs.empty()) {
- row_purge_end(thr);
- } else {
- thr->run_node = node;
- }
- } else {
- row_purge_end(thr);
}
+ thr->run_node = node->end();
return(thr);
}
@@ -1324,11 +1340,11 @@ purge_node_t::validate_pcur()
return(true);
}
- if (!pcur.old_stored) {
+ if (!pcur.old_rec) {
return(true);
}
- dict_index_t* clust_index = pcur.btr_cur.index;
+ dict_index_t* clust_index = pcur.index();
rec_offs* offsets = rec_get_offsets(
pcur.old_rec, clust_index, NULL, pcur.old_n_core_fields,
diff --git a/storage/innobase/row/row0quiesce.cc b/storage/innobase/row/row0quiesce.cc
index 063fed764e8..a4d634f2d14 100644
--- a/storage/innobase/row/row0quiesce.cc
+++ b/storage/innobase/row/row0quiesce.cc
@@ -499,8 +499,6 @@ row_quiesce_table_has_fts_index(
{
bool exists = false;
- dict_mutex_enter_for_mysql();
-
for (const dict_index_t* index = UT_LIST_GET_FIRST(table->indexes);
index != 0;
index = UT_LIST_GET_NEXT(indexes, index)) {
@@ -511,8 +509,6 @@ row_quiesce_table_has_fts_index(
}
}
- dict_mutex_exit_for_mysql();
-
return(exists);
}
@@ -600,8 +596,7 @@ row_quiesce_table_complete(
<< " to complete";
}
- /* Sleep for a second. */
- os_thread_sleep(1000000);
+ std::this_thread::sleep_for(std::chrono::seconds(1));
++count;
}
@@ -685,15 +680,13 @@ row_quiesce_set_state(
dict_index_t* clust_index = dict_table_get_first_index(table);
- row_mysql_lock_data_dictionary(trx);
-
for (dict_index_t* index = dict_table_get_next_index(clust_index);
index != NULL;
index = dict_table_get_next_index(index)) {
- rw_lock_x_lock(&index->lock);
+ index->lock.x_lock(SRW_LOCK_CALL);
}
- rw_lock_x_lock(&clust_index->lock);
+ clust_index->lock.x_lock(SRW_LOCK_CALL);
switch (state) {
case QUIESCE_START:
@@ -713,11 +706,9 @@ row_quiesce_set_state(
for (dict_index_t* index = dict_table_get_first_index(table);
index != NULL;
index = dict_table_get_next_index(index)) {
- rw_lock_x_unlock(&index->lock);
+ index->lock.x_unlock();
}
- row_mysql_unlock_data_dictionary(trx);
-
return(DB_SUCCESS);
}
diff --git a/storage/innobase/row/row0row.cc b/storage/innobase/row/row0row.cc
index 30622d031ea..4a00b2a430e 100644
--- a/storage/innobase/row/row0row.cc
+++ b/storage/innobase/row/row0row.cc
@@ -1,7 +1,7 @@
/*****************************************************************************
Copyright (c) 1996, 2018, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2018, 2021, MariaDB Corporation.
+Copyright (c) 2018, 2023, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -531,7 +531,11 @@ row_build_low(
continue;
}
- ut_ad(ind_field < &index->fields[index->n_fields]);
+ if (UNIV_UNLIKELY(ind_field
+ >= &index->fields[index->n_fields])) {
+ ut_ad(rec_is_metadata(rec, *index));
+ continue;
+ }
const dict_col_t* col = dict_field_get_col(ind_field);
@@ -745,11 +749,15 @@ row_rec_to_index_entry_impl(
if (mblob == 2) {
ut_ad(info_bits == REC_INFO_METADATA_ALTER
|| info_bits == REC_INFO_METADATA_ADD);
- ut_ad(rec_len <= ulint(index->n_fields + got));
if (pad) {
+ ut_ad(rec_len <= ulint(index->n_fields + got));
rec_len = ulint(index->n_fields)
+ (info_bits == REC_INFO_METADATA_ALTER);
- } else if (!got && info_bits == REC_INFO_METADATA_ALTER) {
+ } else if (got) {
+ rec_len = std::min(rec_len,
+ ulint(index->n_fields + got));
+ } else if (info_bits == REC_INFO_METADATA_ALTER) {
+ ut_ad(rec_len <= index->n_fields);
rec_len++;
}
} else {
@@ -1175,32 +1183,28 @@ row_build_row_ref_in_tuple(
/***************************************************************//**
Searches the clustered index record for a row, if we have the row reference.
@return TRUE if found */
-ibool
+bool
row_search_on_row_ref(
/*==================*/
btr_pcur_t* pcur, /*!< out: persistent cursor, which must
be closed by the caller */
- ulint mode, /*!< in: BTR_MODIFY_LEAF, ... */
+ btr_latch_mode mode, /*!< in: BTR_MODIFY_LEAF, ... */
const dict_table_t* table, /*!< in: table */
const dtuple_t* ref, /*!< in: row reference */
mtr_t* mtr) /*!< in/out: mtr */
{
- ulint low_match;
- rec_t* rec;
- dict_index_t* index;
-
ut_ad(dtuple_check_typed(ref));
- index = dict_table_get_first_index(table);
+ dict_index_t *index = dict_table_get_first_index(table);
+ btr_pcur_init(pcur);
+ pcur->btr_cur.page_cur.index = index;
if (UNIV_UNLIKELY(ref->info_bits != 0)) {
ut_ad(ref->is_metadata());
ut_ad(ref->n_fields <= index->n_uniq);
- if (btr_pcur_open_at_index_side(
- true, index, mode, pcur, true, 0, mtr)
- != DB_SUCCESS
+ if (pcur->open_leaf(true, index, mode, mtr) != DB_SUCCESS
|| !btr_pcur_move_to_next_user_rec(pcur, mtr)) {
- return FALSE;
+ return false;
}
/* We do not necessarily have index->is_instant() here,
because we could be executing a rollback of an
@@ -1212,27 +1216,14 @@ row_search_on_row_ref(
& REC_INFO_MIN_REC_FLAG;
} else {
ut_a(ref->n_fields == index->n_uniq);
- if (btr_pcur_open(index, ref, PAGE_CUR_LE, mode, pcur, mtr)
+ if (btr_pcur_open(ref, PAGE_CUR_LE, mode, pcur, mtr)
!= DB_SUCCESS) {
- return FALSE;
+ return false;
}
}
- low_match = btr_pcur_get_low_match(pcur);
-
- rec = btr_pcur_get_rec(pcur);
-
- if (page_rec_is_infimum(rec)) {
-
- return(FALSE);
- }
-
- if (low_match != dtuple_get_n_fields(ref)) {
-
- return(FALSE);
- }
-
- return(TRUE);
+ return !page_rec_is_infimum(btr_pcur_get_rec(pcur))
+ && btr_pcur_get_low_match(pcur) == dtuple_get_n_fields(ref);
}
/*********************************************************************//**
@@ -1242,7 +1233,7 @@ on the secondary index record are preserved.
rec_t*
row_get_clust_rec(
/*==============*/
- ulint mode, /*!< in: BTR_MODIFY_LEAF, ... */
+ btr_latch_mode mode, /*!< in: BTR_MODIFY_LEAF, ... */
const rec_t* rec, /*!< in: record in a secondary index */
dict_index_t* index, /*!< in: secondary index */
dict_index_t** clust_index,/*!< out: clustered index */
@@ -1252,8 +1243,6 @@ row_get_clust_rec(
dtuple_t* ref;
dict_table_t* table;
btr_pcur_t pcur;
- ibool found;
- rec_t* clust_rec;
ut_ad(!dict_index_is_clust(index));
@@ -1263,17 +1252,12 @@ row_get_clust_rec(
ref = row_build_row_ref(ROW_COPY_POINTERS, index, rec, heap);
- found = row_search_on_row_ref(&pcur, mode, table, ref, mtr);
-
- clust_rec = found ? btr_pcur_get_rec(&pcur) : NULL;
+ auto found = row_search_on_row_ref(&pcur, mode, table, ref, mtr);
mem_heap_free(heap);
- btr_pcur_close(&pcur);
-
*clust_index = dict_table_get_first_index(table);
-
- return(clust_rec);
+ return found ? btr_pcur_get_rec(&pcur) : nullptr;
}
/***************************************************************//**
@@ -1282,9 +1266,8 @@ Searches an index record.
enum row_search_result
row_search_index_entry(
/*===================*/
- dict_index_t* index, /*!< in: index */
const dtuple_t* entry, /*!< in: index entry */
- ulint mode, /*!< in: BTR_MODIFY_LEAF, ... */
+ btr_latch_mode mode, /*!< in: BTR_MODIFY_LEAF, ... */
btr_pcur_t* pcur, /*!< in/out: persistent cursor, which must
be closed by the caller */
mtr_t* mtr) /*!< in: mtr */
@@ -1295,17 +1278,13 @@ row_search_index_entry(
ut_ad(dtuple_check_typed(entry));
- if (dict_index_is_spatial(index)) {
- ut_ad(mode & BTR_MODIFY_LEAF || mode & BTR_MODIFY_TREE);
- rtr_pcur_open(index, entry, PAGE_CUR_RTREE_LOCATE,
- mode, pcur, mtr);
- } else {
- btr_pcur_open(index, entry, PAGE_CUR_LE, mode, pcur, mtr);
+ if (btr_pcur_open(entry, PAGE_CUR_LE, mode, pcur, mtr) != DB_SUCCESS) {
+ return ROW_NOT_FOUND;
}
switch (btr_pcur_get_btr_cur(pcur)->flag) {
case BTR_CUR_DELETE_REF:
- ut_a(mode & BTR_DELETE && !dict_index_is_spatial(index));
+ ut_ad(!(~mode & BTR_DELETE));
return(ROW_NOT_DELETED_REF);
case BTR_CUR_DEL_MARK_IBUF:
diff --git a/storage/innobase/row/row0sel.cc b/storage/innobase/row/row0sel.cc
index 646526311da..d1d264a7e8a 100644
--- a/storage/innobase/row/row0sel.cc
+++ b/storage/innobase/row/row0sel.cc
@@ -2,7 +2,7 @@
Copyright (c) 1997, 2017, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2008, Google Inc.
-Copyright (c) 2015, 2021, MariaDB Corporation.
+Copyright (c) 2015, 2023, MariaDB Corporation.
Portions of this file contain modifications contributed and copyrighted by
Google, Inc. Those modifications are gratefully acknowledged and are described
@@ -36,6 +36,8 @@ Created 12/19/1997 Heikki Tuuri
#include "dict0boot.h"
#include "trx0undo.h"
#include "trx0trx.h"
+#include "trx0purge.h"
+#include "trx0rec.h"
#include "btr0btr.h"
#include "btr0cur.h"
#include "btr0sea.h"
@@ -54,6 +56,7 @@ Created 12/19/1997 Heikki Tuuri
#include "buf0lru.h"
#include "srv0srv.h"
#include "srv0mon.h"
+#include "sql_error.h"
#ifdef WITH_WSREP
#include "mysql/service_wsrep.h" /* For wsrep_thd_skip_locking */
#endif
@@ -282,7 +285,6 @@ row_sel_sec_rec_is_for_clust_rec(
rec_offs_init(clust_offsets_);
rec_offs_init(sec_offsets_);
-
ib_vcol_row vc(heap);
clust_offs = rec_get_offsets(clust_rec, clust_index, clust_offs,
@@ -947,6 +949,36 @@ row_sel_test_other_conds(
return(TRUE);
}
+/** Check that a clustered index record is visible in a consistent read view.
+@param rec clustered index record (in leaf page, or in memory)
+@param index clustered index
+@param offsets rec_get_offsets(rec, index)
+@param view consistent read view
+@retval DB_SUCCESS if rec is visible in view
+@retval DB_SUCCESS_LOCKED_REC if rec is not visible in view
+@retval DB_CORRUPTION if the DB_TRX_ID is corrupted */
+static dberr_t row_sel_clust_sees(const rec_t *rec, const dict_index_t &index,
+ const rec_offs *offsets,
+ const ReadView &view)
+{
+ ut_ad(index.is_primary());
+ ut_ad(page_rec_is_user_rec(rec));
+ ut_ad(rec_offs_validate(rec, &index, offsets));
+ ut_ad(!rec_is_metadata(rec, index));
+ ut_ad(!index.table->is_temporary());
+
+ const trx_id_t id= row_get_rec_trx_id(rec, &index, offsets);
+
+ if (view.changes_visible(id))
+ return DB_SUCCESS;
+ if (UNIV_LIKELY(id < view.low_limit_id() || id < trx_sys.get_max_trx_id()))
+ return DB_SUCCESS_LOCKED_REC;
+
+ ib::warn() << "A transaction id in a record of table " << index.table->name
+ << " is newer than the system-wide maximum.";
+ return DB_CORRUPTION;
+}
+
/*********************************************************************//**
Retrieves the clustered index record corresponding to a record in a
non-clustered index. Does the necessary locking.
@@ -970,7 +1002,6 @@ row_sel_get_clust_rec(
dict_index_t* index;
rec_t* clust_rec;
rec_t* old_vers;
- dberr_t err = DB_SUCCESS;
mem_heap_t* heap = NULL;
rec_offs offsets_[REC_OFFS_NORMAL_SIZE];
rec_offs* offsets = offsets_;
@@ -978,18 +1009,21 @@ row_sel_get_clust_rec(
*out_rec = NULL;
- offsets = rec_get_offsets(rec,
- btr_pcur_get_btr_cur(&plan->pcur)->index,
- offsets,
- btr_pcur_get_btr_cur(&plan->pcur)->index
- ->n_core_fields, ULINT_UNDEFINED, &heap);
+ offsets = rec_get_offsets(rec, plan->pcur.index(), offsets,
+ plan->pcur.index()->n_core_fields,
+ ULINT_UNDEFINED, &heap);
row_build_row_ref_fast(plan->clust_ref, plan->clust_map, rec, offsets);
index = dict_table_get_first_index(plan->table);
-
- btr_pcur_open_with_no_init(index, plan->clust_ref, PAGE_CUR_LE,
- BTR_SEARCH_LEAF, &plan->clust_pcur, mtr);
+ plan->clust_pcur.old_rec = nullptr;
+ plan->clust_pcur.btr_cur.page_cur.index = index;
+ dberr_t err = btr_pcur_open_with_no_init(plan->clust_ref,
+ PAGE_CUR_LE, BTR_SEARCH_LEAF,
+ &plan->clust_pcur, mtr);
+ if (UNIV_UNLIKELY(err != DB_SUCCESS)) {
+ goto err_exit;
+ }
clust_rec = btr_pcur_get_rec(&(plan->clust_pcur));
@@ -1000,9 +1034,10 @@ row_sel_get_clust_rec(
|| btr_pcur_get_low_match(&(plan->clust_pcur))
< dict_index_get_n_unique(index)) {
- ut_a(rec_get_deleted_flag(rec,
- dict_table_is_comp(plan->table)));
- ut_a(node->read_view);
+ if (!node->read_view ||
+ !rec_get_deleted_flag(rec, plan->table->not_redundant())) {
+ err = DB_CORRUPTION;
+ }
/* In a rare case it is possible that no clust rec is found
for a delete-marked secondary index record: if in row0umod.cc
@@ -1051,9 +1086,15 @@ row_sel_get_clust_rec(
old_vers = NULL;
- if (!lock_clust_rec_cons_read_sees(clust_rec, index, offsets,
- node->read_view)) {
+ err = row_sel_clust_sees(clust_rec, *index, offsets,
+ *node->read_view);
+ switch (err) {
+ default:
+ goto err_exit;
+ case DB_SUCCESS:
+ break;
+ case DB_SUCCESS_LOCKED_REC:
err = row_sel_build_prev_vers(
node->read_view, index, clust_rec,
&offsets, &heap, &plan->old_vers_heap,
@@ -1148,15 +1189,15 @@ sel_set_rtr_rec_lock(
return(DB_SUCCESS_LOCKED_REC);
}
- ut_ad(page_align(first_rec) == cur_block->frame);
+ ut_ad(page_align(first_rec) == cur_block->page.frame);
ut_ad(match->valid);
- rw_lock_x_lock(&(match->block.lock));
+ match->block.page.lock.x_lock();
retry:
cur_block = btr_pcur_get_block(pcur);
- ut_ad(rw_lock_own_flagged(&match->block.lock,
- RW_LOCK_FLAG_X | RW_LOCK_FLAG_S));
- ut_ad(page_is_leaf(buf_block_get_frame(cur_block)));
+ ut_ad(match->block.page.lock.have_x()
+ || match->block.page.lock.have_s());
+ ut_ad(page_is_leaf(cur_block->page.frame));
err = lock_sec_rec_read_check_and_lock(
0, cur_block, rec, index, my_offsets,
@@ -1166,31 +1207,29 @@ retry:
re_scan:
mtr->commit();
trx->error_state = err;
- que_thr_stop_for_mysql(thr);
thr->lock_state = QUE_THR_LOCK_ROW;
if (row_mysql_handle_errors(
&err, trx, thr, NULL)) {
thr->lock_state = QUE_THR_LOCK_NOLOCK;
mtr->start();
- mutex_enter(&match->rtr_match_mutex);
+ mysql_mutex_lock(&match->rtr_match_mutex);
if (!match->valid && match->matched_recs->empty()) {
- mutex_exit(&match->rtr_match_mutex);
+ mysql_mutex_unlock(&match->rtr_match_mutex);
err = DB_RECORD_NOT_FOUND;
goto func_end;
}
- mutex_exit(&match->rtr_match_mutex);
+ mysql_mutex_unlock(&match->rtr_match_mutex);
/* MDEV-14059 FIXME: why re-latch the block?
pcur is already positioned on it! */
- uint32_t page_no = page_get_page_no(
- btr_pcur_get_page(pcur));
-
cur_block = buf_page_get_gen(
- page_id_t(index->table->space_id, page_no),
- index->table->space->zip_size(),
- RW_X_LATCH, NULL, BUF_GET,
- __FILE__, __LINE__, mtr, &err);
+ btr_pcur_get_block(pcur)->page.id(),
+ btr_pcur_get_block(pcur)->zip_size(),
+ RW_X_LATCH, NULL, BUF_GET, mtr, &err);
+ if (!cur_block) {
+ goto func_end;
+ }
} else {
mtr->start();
goto func_end;
@@ -1207,6 +1246,7 @@ re_scan:
}
match->matched_recs->clear();
+ // FIXME: check for !cur_block
rtr_cur_search_with_match(
cur_block, index,
@@ -1271,7 +1311,7 @@ re_scan:
match->locked = true;
func_end:
- rw_lock_x_unlock(&(match->block.lock));
+ match->block.page.lock.x_unlock();
if (heap != NULL) {
mem_heap_free(heap);
}
@@ -1338,8 +1378,9 @@ sel_set_rec_lock(
/*********************************************************************//**
Opens a pcur to a table index. */
+MY_ATTRIBUTE((warn_unused_result, nonnull))
static
-void
+dberr_t
row_sel_open_pcur(
/*==============*/
plan_t* plan, /*!< in: table plan */
@@ -1351,6 +1392,10 @@ row_sel_open_pcur(
ulint n_fields;
ulint i;
+ ut_ad(!plan->n_rows_prefetched);
+ ut_ad(!plan->n_rows_fetched);
+ ut_ad(!plan->cursor_at_end);
+
index = plan->index;
/* Calculate the value of the search tuple: the exact match columns
@@ -1365,6 +1410,11 @@ row_sel_open_pcur(
cond = UT_LIST_GET_NEXT(cond_list, cond);
}
+ plan->pcur.old_rec = nullptr;
+ plan->pcur.btr_cur.page_cur.index = index;
+
+ dberr_t err;
+
if (plan->tuple) {
n_fields = dtuple_get_n_fields(plan->tuple);
@@ -1382,23 +1432,16 @@ row_sel_open_pcur(
que_node_get_val(exp));
}
- /* Open pcur to the index */
-
- btr_pcur_open_with_no_init(index, plan->tuple, plan->mode,
- BTR_SEARCH_LEAF, &plan->pcur, mtr);
+ err = btr_pcur_open_with_no_init(plan->tuple,
+ plan->mode, BTR_SEARCH_LEAF,
+ &plan->pcur, mtr);
} else {
- /* Open the cursor to the start or the end of the index
- (FALSE: no init) */
-
- btr_pcur_open_at_index_side(plan->asc, index, BTR_SEARCH_LEAF,
- &(plan->pcur), false, 0, mtr);
+ err = plan->pcur.open_leaf(plan->asc, index, BTR_SEARCH_LEAF,
+ mtr);
}
- ut_ad(plan->n_rows_prefetched == 0);
- ut_ad(plan->n_rows_fetched == 0);
- ut_ad(plan->cursor_at_end == FALSE);
-
- plan->pcur_is_open = TRUE;
+ plan->pcur_is_open = err == DB_SUCCESS;
+ return err;
}
/*********************************************************************//**
@@ -1422,7 +1465,7 @@ row_sel_restore_pcur_pos(
relative_position = btr_pcur_get_rel_pos(&(plan->pcur));
equal_position =
- btr_pcur_restore_position(BTR_SEARCH_LEAF, &plan->pcur, mtr) ==
+ plan->pcur.restore_position(BTR_SEARCH_LEAF, mtr) ==
btr_pcur_t::SAME_ALL;
/* If the cursor is traveling upwards, and relative_position is
@@ -1527,17 +1570,20 @@ row_sel_try_search_shortcut(
{
dict_index_t* index = plan->index;
+ ut_ad(!index->table->is_temporary());
ut_ad(node->read_view);
+ ut_ad(node->read_view->is_open());
ut_ad(plan->unique_search);
ut_ad(!plan->must_get_clust);
- row_sel_open_pcur(plan, mtr);
+ if (row_sel_open_pcur(plan, mtr) != DB_SUCCESS) {
+ return SEL_RETRY;
+ }
const rec_t* rec = btr_pcur_get_rec(&(plan->pcur));
if (!page_rec_is_user_rec(rec) || rec_is_metadata(rec, *index)) {
-retry:
- return(SEL_RETRY);
+ return SEL_RETRY;
}
ut_ad(plan->mode == PAGE_CUR_GE);
@@ -1547,8 +1593,14 @@ retry:
fields in the user record matched to the search tuple */
if (btr_pcur_get_up_match(&(plan->pcur)) < plan->n_exact_match) {
-exhausted:
- return(SEL_EXHAUSTED);
+ return SEL_EXHAUSTED;
+ }
+
+ if (trx_id_t bulk_trx_id = index->table->bulk_trx_id) {
+ /* See row_search_mvcc() for a comment on bulk_trx_id */
+ if (!node->read_view->changes_visible(bulk_trx_id)) {
+ return SEL_EXHAUSTED;
+ }
}
/* This is a non-locking consistent read: if necessary, fetch
@@ -1562,18 +1614,20 @@ exhausted:
ULINT_UNDEFINED, &heap);
if (dict_index_is_clust(index)) {
- if (!lock_clust_rec_cons_read_sees(rec, index, offsets,
- node->read_view)) {
- goto retry;
+ if (row_sel_clust_sees(rec, *index, offsets, *node->read_view)
+ != DB_SUCCESS) {
+ return SEL_RETRY;
+ }
+ } else if (!srv_read_only_mode) {
+ trx_id_t trx_id = page_get_max_trx_id(page_align(rec));
+ ut_ad(trx_id);
+ if (!node->read_view->sees(trx_id)) {
+ return SEL_RETRY;
}
- } else if (!srv_read_only_mode
- && !lock_sec_rec_cons_read_sees(
- rec, index, node->read_view)) {
- goto retry;
}
if (rec_get_deleted_flag(rec, dict_table_is_comp(plan->table))) {
- goto exhausted;
+ return SEL_EXHAUSTED;
}
/* Fetch the columns needed in test conditions. The index
@@ -1587,7 +1641,7 @@ exhausted:
/* Test the rest of search conditions */
if (!row_sel_test_other_conds(plan)) {
- goto exhausted;
+ return SEL_EXHAUSTED;
}
ut_ad(plan->pcur.latch_mode == BTR_SEARCH_LEAF);
@@ -1597,7 +1651,7 @@ exhausted:
if (UNIV_LIKELY_NULL(heap)) {
mem_heap_free(heap);
}
- return(SEL_FOUND);
+ return SEL_FOUND;
}
#endif /* BTR_CUR_HASH_ADAPT */
@@ -1618,7 +1672,6 @@ row_sel(
rec_t* rec;
rec_t* old_vers;
rec_t* clust_rec;
- ibool consistent_read;
/* The following flag becomes TRUE when we are doing a
consistent read from a non-clustered index and we must look
@@ -1641,21 +1694,11 @@ row_sel(
rec_offs offsets_[REC_OFFS_NORMAL_SIZE];
rec_offs* offsets = offsets_;
rec_offs_init(offsets_);
+ const trx_t* trx = thr_get_trx(thr);
ut_ad(thr->run_node == node);
-
- if (node->read_view) {
- /* In consistent reads, we try to do with the hash index and
- not to use the buffer page get. This is to reduce memory bus
- load resulting from semaphore operations. The search latch
- will be s-locked when we access an index with a unique search
- condition, but not locked when we access an index with a
- less selective search condition. */
-
- consistent_read = TRUE;
- } else {
- consistent_read = FALSE;
- }
+ ut_ad(!node->read_view || node->read_view == &trx->read_view);
+ ut_ad(!node->read_view || node->read_view->is_open());
table_loop:
/* TABLE LOOP
@@ -1690,7 +1733,7 @@ table_loop:
mtr.start();
#ifdef BTR_CUR_HASH_ADAPT
- if (consistent_read && plan->unique_search && !plan->pcur_is_open
+ if (node->read_view && plan->unique_search && !plan->pcur_is_open
&& !plan->must_get_clust) {
switch (row_sel_try_search_shortcut(node, plan, &mtr)) {
case SEL_FOUND:
@@ -1714,7 +1757,11 @@ table_loop:
if (!plan->pcur_is_open) {
/* Evaluate the expressions to build the search tuple and
open the cursor */
- row_sel_open_pcur(plan, &mtr);
+ err = row_sel_open_pcur(plan, &mtr);
+
+ if (UNIV_UNLIKELY(err != DB_SUCCESS)) {
+ goto mtr_commit_exit;
+ }
cursor_just_opened = TRUE;
@@ -1735,6 +1782,15 @@ table_loop:
}
}
+ if (!node->read_view
+ || trx->isolation_level == TRX_ISO_READ_UNCOMMITTED) {
+ } else if (trx_id_t bulk_trx_id = index->table->bulk_trx_id) {
+ /* See row_search_mvcc() for a comment on bulk_trx_id */
+ if (!trx->read_view.changes_visible(bulk_trx_id)) {
+ goto table_exhausted;
+ }
+ }
+
rec_loop:
/* RECORD LOOP
-----------
@@ -1766,12 +1822,13 @@ rec_loop:
and it might be that these new records should appear in the
search result set, resulting in the phantom problem. */
- if (!consistent_read) {
- rec_t* next_rec = page_rec_get_next(rec);
+ if (!node->read_view) {
+ const rec_t* next_rec = page_rec_get_next_const(rec);
+ if (UNIV_UNLIKELY(!next_rec)) {
+ err = DB_CORRUPTION;
+ goto lock_wait_or_error;
+ }
unsigned lock_type;
- trx_t* trx;
-
- trx = thr_get_trx(thr);
offsets = rec_get_offsets(next_rec, index, offsets,
index->n_core_fields,
@@ -1829,17 +1886,14 @@ skip_lock:
goto next_rec;
}
- if (!consistent_read) {
+ if (!node->read_view) {
/* Try to place a lock on the index record */
unsigned lock_type;
- trx_t* trx;
offsets = rec_get_offsets(rec, index, offsets,
index->n_core_fields,
ULINT_UNDEFINED, &heap);
- trx = thr_get_trx(thr);
-
/* At READ UNCOMMITTED or READ COMMITTED isolation level,
we lock only the record, i.e., next-key locking is
not used. */
@@ -1923,14 +1977,20 @@ skip_lock:
offsets = rec_get_offsets(rec, index, offsets, index->n_core_fields,
ULINT_UNDEFINED, &heap);
- if (consistent_read) {
+ if (node->read_view) {
/* This is a non-locking consistent read: if necessary, fetch
a previous version of the record */
if (dict_index_is_clust(index)) {
+ const trx_id_t id = row_get_rec_trx_id(
+ rec, index, offsets);
- if (!lock_clust_rec_cons_read_sees(
- rec, index, offsets, node->read_view)) {
+ if (!node->read_view->changes_visible(id)) {
+ if (id >= node->read_view->low_limit_id()
+ && id >= trx_sys.get_max_trx_id()) {
+ err = DB_CORRUPTION;
+ goto lock_wait_or_error;
+ }
err = row_sel_build_prev_vers(
node->read_view, index, rec,
@@ -1979,11 +2039,12 @@ skip_lock:
rec = old_vers;
}
- } else if (!srv_read_only_mode
- && !lock_sec_rec_cons_read_sees(
- rec, index, node->read_view)) {
-
- cons_read_requires_clust_rec = TRUE;
+ } else if (!srv_read_only_mode) {
+ trx_id_t trx_id = page_get_max_trx_id(page_align(rec));
+ ut_ad(trx_id);
+ if (!node->read_view->sees(trx_id)) {
+ cons_read_requires_clust_rec = TRUE;
+ }
}
}
@@ -2049,7 +2110,7 @@ skip_lock:
if (clust_rec == NULL) {
/* The record did not exist in the read view */
- ut_ad(consistent_read);
+ ut_ad(node->read_view);
goto next_rec;
}
@@ -2242,11 +2303,8 @@ stop_for_a_while:
plan->stored_cursor_rec_processed = FALSE;
btr_pcur_store_position(&(plan->pcur), &mtr);
- mtr.commit();
- ut_ad(!sync_check_iterate(sync_check()));
-
err = DB_SUCCESS;
- goto func_exit;
+ goto mtr_commit_exit;
commit_mtr_for_a_while:
/* Stores the cursor position and commits &mtr; this is used if
@@ -2260,7 +2318,6 @@ commit_mtr_for_a_while:
mtr.commit();
mtr_has_extra_clust_latch = FALSE;
- ut_ad(!sync_check_iterate(dict_sync_check()));
goto table_loop;
@@ -2271,12 +2328,10 @@ lock_wait_or_error:
plan->stored_cursor_rec_processed = FALSE;
btr_pcur_store_position(&(plan->pcur), &mtr);
-
+mtr_commit_exit:
mtr.commit();
func_exit:
- ut_ad(!sync_check_iterate(dict_sync_check()));
-
if (heap != NULL) {
mem_heap_free(heap);
}
@@ -2340,8 +2395,8 @@ row_sel_step(
que_node_get_next(table_node))) {
dberr_t err = lock_table(
- 0, table_node->table, i_lock_mode,
- thr);
+ table_node->table, nullptr,
+ i_lock_mode, thr);
if (err != DB_SUCCESS) {
trx_t* trx;
@@ -2702,7 +2757,7 @@ row_sel_convert_mysql_key_to_innobase(
<< ". Last data field length "
<< data_field_len << " bytes, key ptr now"
" exceeds key end by " << (key_ptr - key_end)
- << " bytes. Key value in the MySQL format:";
+ << " bytes. Key value in the MariaDB format:";
ut_print_buf(stderr, original_key_ptr, key_len);
putc('\n', stderr);
@@ -3202,6 +3257,14 @@ static bool row_sel_store_mysql_rec(
DBUG_RETURN(true);
}
+static void row_sel_reset_old_vers_heap(row_prebuilt_t *prebuilt)
+{
+ if (prebuilt->old_vers_heap)
+ mem_heap_empty(prebuilt->old_vers_heap);
+ else
+ prebuilt->old_vers_heap= mem_heap_create(200);
+}
+
/*********************************************************************//**
Builds a previous version of a clustered index record for a consistent read
@return DB_SUCCESS or error code */
@@ -3209,9 +3272,8 @@ static MY_ATTRIBUTE((warn_unused_result))
dberr_t
row_sel_build_prev_vers_for_mysql(
/*==============================*/
- ReadView* read_view, /*!< in: read view */
+ row_prebuilt_t* prebuilt, /*!< in/out: prebuilt struct */
dict_index_t* clust_index, /*!< in: clustered index */
- row_prebuilt_t* prebuilt, /*!< in: prebuilt struct */
const rec_t* rec, /*!< in: record in a clustered index */
rec_offs** offsets, /*!< in/out: offsets returned by
rec_get_offsets(rec, clust_index) */
@@ -3225,18 +3287,12 @@ row_sel_build_prev_vers_for_mysql(
column data */
mtr_t* mtr) /*!< in: mtr */
{
- dberr_t err;
+ row_sel_reset_old_vers_heap(prebuilt);
- if (prebuilt->old_vers_heap) {
- mem_heap_empty(prebuilt->old_vers_heap);
- } else {
- prebuilt->old_vers_heap = mem_heap_create(200);
- }
-
- err = row_vers_build_for_consistent_read(
- rec, mtr, clust_index, offsets, read_view, offset_heap,
+ return row_vers_build_for_consistent_read(
+ rec, mtr, clust_index, offsets,
+ &prebuilt->trx->read_view, offset_heap,
prebuilt->old_vers_heap, old_vers, vrow);
- return(err);
}
/** Helper class to cache clust_rec and old_vers */
@@ -3313,11 +3369,10 @@ Row_sel_get_clust_rec_for_mysql::operator()(
access the clustered index */
{
dict_index_t* clust_index;
- const rec_t* clust_rec;
rec_t* old_vers;
- dberr_t err;
trx_t* trx;
+ prebuilt->clust_pcur->old_rec = nullptr;
*out_rec = NULL;
trx = thr_get_trx(thr);
@@ -3328,12 +3383,16 @@ Row_sel_get_clust_rec_for_mysql::operator()(
sec_index, *offsets);
clust_index = dict_table_get_first_index(sec_index->table);
+ prebuilt->clust_pcur->btr_cur.page_cur.index = clust_index;
- btr_pcur_open_with_no_init(clust_index, prebuilt->clust_ref,
- PAGE_CUR_LE, BTR_SEARCH_LEAF,
- prebuilt->clust_pcur, mtr);
+ dberr_t err = btr_pcur_open_with_no_init(prebuilt->clust_ref,
+ PAGE_CUR_LE, BTR_SEARCH_LEAF,
+ prebuilt->clust_pcur, mtr);
+ if (UNIV_UNLIKELY(err != DB_SUCCESS)) {
+ return err;
+ }
- clust_rec = btr_pcur_get_rec(prebuilt->clust_pcur);
+ const rec_t* clust_rec = btr_pcur_get_rec(prebuilt->clust_pcur);
prebuilt->clust_pcur->trx_if_known = trx;
@@ -3352,35 +3411,29 @@ Row_sel_get_clust_rec_for_mysql::operator()(
if (dict_index_is_spatial(sec_index)
&& btr_cur->rtr_info->matches
&& (page_align(rec)
- == btr_cur->rtr_info->matches->block.frame
+ == btr_cur->rtr_info->matches->block.page.frame
|| rec != btr_pcur_get_rec(prebuilt->pcur))) {
#ifdef UNIV_DEBUG
rtr_info_t* rtr_info = btr_cur->rtr_info;
- mutex_enter(&rtr_info->matches->rtr_match_mutex);
+ mysql_mutex_lock(&rtr_info->matches->rtr_match_mutex);
/* The page could be deallocated (by rollback etc.) */
if (!rtr_info->matches->valid) {
- mutex_exit(&rtr_info->matches->rtr_match_mutex);
+ mysql_mutex_unlock(&rtr_info->matches->rtr_match_mutex);
clust_rec = NULL;
-
- err = DB_SUCCESS;
goto func_exit;
}
- mutex_exit(&rtr_info->matches->rtr_match_mutex);
+ mysql_mutex_unlock(&rtr_info->matches->rtr_match_mutex);
if (rec_get_deleted_flag(rec,
dict_table_is_comp(sec_index->table))
&& prebuilt->select_lock_type == LOCK_NONE) {
clust_rec = NULL;
-
- err = DB_SUCCESS;
goto func_exit;
}
if (rec != btr_pcur_get_rec(prebuilt->pcur)) {
clust_rec = NULL;
-
- err = DB_SUCCESS;
goto func_exit;
}
@@ -3390,32 +3443,31 @@ Row_sel_get_clust_rec_for_mysql::operator()(
buf_block_t* block = buf_page_get_gen(
btr_pcur_get_block(prebuilt->pcur)->page.id(),
btr_pcur_get_block(prebuilt->pcur)->zip_size(),
- RW_NO_LATCH, NULL, BUF_GET,
- __FILE__, __LINE__, mtr, &err);
+ RW_NO_LATCH, NULL, BUF_GET, mtr, &err);
+ ut_ad(block); // FIXME: avoid crash
mem_heap_t* heap = mem_heap_create(256);
dtuple_t* tuple = dict_index_build_data_tuple(
rec, sec_index, true,
sec_index->n_fields, heap);
page_cur_t page_cursor;
-
- ulint low_match = page_cur_search(
- block, sec_index, tuple,
- PAGE_CUR_LE, &page_cursor);
-
+ page_cursor.block = block;
+ page_cursor.index = sec_index;
+ ulint up_match = 0, low_match = 0;
+ ut_ad(!page_cur_search_with_match(tuple, PAGE_CUR_LE,
+ &up_match,
+ &low_match,
+ &page_cursor,
+ nullptr));
ut_ad(low_match < dtuple_get_n_fields_cmp(tuple));
mem_heap_free(heap);
- clust_rec = NULL;
-
err = DB_SUCCESS;
- goto func_exit;
#endif /* UNIV_DEBUG */
} else if (!rec_get_deleted_flag(rec,
dict_table_is_comp(sec_index->table))
- || prebuilt->select_lock_type != LOCK_NONE) {
+ || prebuilt->select_lock_type != LOCK_NONE) {
/* In a rare case it is possible that no clust
rec is found for a delete-marked secondary index
- record: if in row0umod.cc in
- row_undo_mod_remove_clust_low() we have already removed
+ record: if row_undo_mod_clust() has already removed
the clust rec, while purge is still cleaning and
removing secondary index records associated with
earlier versions of the clustered index record.
@@ -3430,17 +3482,10 @@ Row_sel_get_clust_rec_for_mysql::operator()(
fputs("\n"
"InnoDB: clust index record ", stderr);
rec_print(stderr, clust_rec, clust_index);
- putc('\n', stderr);
- trx_print(stderr, trx, 600);
- fputs("\n"
- "InnoDB: Submit a detailed bug report"
- " to https://jira.mariadb.org/\n", stderr);
- ut_ad(0);
+ err = DB_CORRUPTION;
}
clust_rec = NULL;
-
- err = DB_SUCCESS;
goto func_exit;
}
@@ -3465,7 +3510,7 @@ Row_sel_get_clust_rec_for_mysql::operator()(
case DB_SUCCESS_LOCKED_REC:
break;
default:
- goto err_exit;
+ return err;
}
} else {
/* This is a non-locking consistent read: if necessary, fetch
@@ -3473,13 +3518,22 @@ Row_sel_get_clust_rec_for_mysql::operator()(
old_vers = NULL;
- /* If the isolation level allows reading of uncommitted data,
- then we never look for an earlier version */
+ if (trx->isolation_level == TRX_ISO_READ_UNCOMMITTED
+ || clust_index->table->is_temporary()) {
+ } else {
+ /* If the isolation level allows reading of
+ uncommitted data, then we never look for an
+ earlier version */
+ err = row_sel_clust_sees(clust_rec, *clust_index,
+ *offsets, trx->read_view);
+ }
- if (trx->isolation_level > TRX_ISO_READ_UNCOMMITTED
- && !lock_clust_rec_cons_read_sees(
- clust_rec, clust_index, *offsets,
- &trx->read_view)) {
+ switch (err) {
+ default:
+ return err;
+ case DB_SUCCESS:
+ break;
+ case DB_SUCCESS_LOCKED_REC:
const buf_page_t& bpage = btr_pcur_get_block(
prebuilt->clust_pcur)->page;
@@ -3492,13 +3546,12 @@ Row_sel_get_clust_rec_for_mysql::operator()(
/* The following call returns 'offsets' associated with
'old_vers' */
err = row_sel_build_prev_vers_for_mysql(
- &trx->read_view, clust_index, prebuilt,
+ prebuilt, clust_index,
clust_rec, offsets, offset_heap, &old_vers,
vrow, mtr);
- if (err != DB_SUCCESS) {
-
- goto err_exit;
+ if (UNIV_UNLIKELY(err != DB_SUCCESS)) {
+ return err;
}
cached_lsn = lsn;
cached_page_id = bpage.id();
@@ -3522,7 +3575,7 @@ Row_sel_get_clust_rec_for_mysql::operator()(
}
if (old_vers == NULL) {
- goto err_exit;
+ return err;
}
clust_rec = old_vers;
@@ -3558,7 +3611,7 @@ Row_sel_get_clust_rec_for_mysql::operator()(
case DB_SUCCESS_LOCKED_REC:
break;
default:
- goto err_exit;
+ return err;
}
}
@@ -3575,8 +3628,7 @@ func_exit:
btr_pcur_store_position(prebuilt->clust_pcur, mtr);
}
-err_exit:
- return(err);
+ return err;
}
/** Restores cursor position after it has been stored. We have to take into
@@ -3591,10 +3643,11 @@ record with the same ordering prefix in in the B-tree index
@return true if we may need to process the record the cursor is now
positioned on (i.e. we should not go to the next record yet) */
static bool sel_restore_position_for_mysql(bool *same_user_rec,
- ulint latch_mode, btr_pcur_t *pcur,
+ btr_latch_mode latch_mode,
+ btr_pcur_t *pcur,
bool moves_up, mtr_t *mtr)
{
- auto status = btr_pcur_restore_position(latch_mode, pcur, mtr);
+ auto status = pcur->restore_position(latch_mode, mtr);
*same_user_rec = status == btr_pcur_t::SAME_ALL;
@@ -3620,7 +3673,7 @@ static bool sel_restore_position_for_mysql(bool *same_user_rec,
next:
if (btr_pcur_move_to_next(pcur, mtr)
&& rec_is_metadata(btr_pcur_get_rec(pcur),
- *pcur->btr_cur.index)) {
+ *pcur->index())) {
btr_pcur_move_to_next(pcur, mtr);
}
@@ -3636,8 +3689,10 @@ next:
prev:
if (btr_pcur_is_on_user_rec(pcur) && !moves_up
&& !rec_is_metadata(btr_pcur_get_rec(pcur),
- *pcur->btr_cur.index)) {
- btr_pcur_move_to_prev(pcur, mtr);
+ *pcur->index())) {
+ if (!btr_pcur_move_to_prev(pcur, mtr)) {
+ return true;
+ }
}
return true;
case BTR_PCUR_BEFORE:
@@ -3910,16 +3965,22 @@ row_sel_try_search_shortcut_for_mysql(
trx_t* trx = prebuilt->trx;
const rec_t* rec;
- ut_ad(dict_index_is_clust(index));
+ ut_ad(index->is_primary());
+ ut_ad(!index->table->is_temporary());
ut_ad(!prebuilt->templ_contains_blob);
+ ut_ad(trx->read_view.is_open());
+ pcur->old_rec = nullptr;
+
+ if (btr_pcur_open_with_no_init(search_tuple, PAGE_CUR_GE,
+ BTR_SEARCH_LEAF, pcur, mtr)
+ != DB_SUCCESS) {
+ return SEL_RETRY;
+ }
- btr_pcur_open_with_no_init(index, search_tuple, PAGE_CUR_GE,
- BTR_SEARCH_LEAF, pcur, mtr);
rec = btr_pcur_get_rec(pcur);
if (!page_rec_is_user_rec(rec) || rec_is_metadata(rec, *index)) {
-retry:
- return(SEL_RETRY);
+ return SEL_RETRY;
}
/* As the cursor is now placed on a user record after a search with
@@ -3927,8 +3988,15 @@ retry:
fields in the user record matched to the search tuple */
if (btr_pcur_get_up_match(pcur) < dtuple_get_n_fields(search_tuple)) {
-exhausted:
- return(SEL_EXHAUSTED);
+ return SEL_EXHAUSTED;
+ }
+
+ if (trx->isolation_level == TRX_ISO_READ_UNCOMMITTED) {
+ } else if (trx_id_t bulk_trx_id = index->table->bulk_trx_id) {
+ /* See row_search_mvcc() for a comment on bulk_trx_id */
+ if (!trx->read_view.changes_visible(bulk_trx_id)) {
+ return SEL_EXHAUSTED;
+ }
}
/* This is a non-locking consistent read: if necessary, fetch
@@ -3937,21 +4005,21 @@ exhausted:
*offsets = rec_get_offsets(rec, index, *offsets, index->n_core_fields,
ULINT_UNDEFINED, heap);
- if (!lock_clust_rec_cons_read_sees(rec, index, *offsets,
- &trx->read_view)) {
- goto retry;
+ if (row_sel_clust_sees(rec, *index, *offsets, trx->read_view)
+ != DB_SUCCESS) {
+ return SEL_RETRY;
}
if (rec_get_deleted_flag(rec, dict_table_is_comp(index->table))) {
/* In delete-marked records, DB_TRX_ID must
always refer to an existing undo log record. */
ut_ad(row_get_rec_trx_id(rec, index, *offsets));
- goto exhausted;
+ return SEL_EXHAUSTED;
}
*out_rec = rec;
- return(SEL_FOUND);
+ return SEL_FOUND;
}
#endif /* BTR_CUR_HASH_ADAPT */
@@ -4317,25 +4385,29 @@ row_search_mvcc(
DBUG_RETURN(DB_END_OF_INDEX);
}
- ut_ad(!sync_check_iterate(sync_check()));
-
if (!prebuilt->table->space) {
DBUG_RETURN(DB_TABLESPACE_DELETED);
} else if (!prebuilt->table->is_readable()) {
- DBUG_RETURN(prebuilt->table->space
- ? DB_DECRYPTION_FAILED
- : DB_TABLESPACE_NOT_FOUND);
+ if (fil_space_crypt_t* crypt_data =
+ prebuilt->table->space->crypt_data) {
+ if (crypt_data->should_encrypt()) {
+ DBUG_RETURN(DB_DECRYPTION_FAILED);
+ }
+ }
+ DBUG_RETURN(DB_CORRUPTION);
} else if (!prebuilt->index_usable) {
DBUG_RETURN(DB_MISSING_HISTORY);
} else if (prebuilt->index->is_corrupted()) {
DBUG_RETURN(DB_CORRUPTION);
}
+ pcur->btr_cur.page_cur.index = index;
+
/* We need to get the virtual column values stored in secondary
index key, if this is covered index scan or virtual key read is
requested. */
- bool need_vrow = dict_index_has_virtual(prebuilt->index)
- && prebuilt->read_just_key;
+ bool need_vrow = prebuilt->read_just_key
+ && prebuilt->index->has_virtual();
/* Reset the new record lock info if READ UNCOMMITTED or
READ COMMITED isolation level is used. Then
@@ -4472,6 +4544,7 @@ early_not_found:
&& unique_search
&& btr_search_enabled
&& dict_index_is_clust(index)
+ && !index->table->is_temporary()
&& !prebuilt->templ_contains_blob
&& !prebuilt->used_in_HANDLER
&& (prebuilt->mysql_row_len < srv_page_size / 8)) {
@@ -4549,7 +4622,6 @@ aborted:
/* NOTE that we do NOT store the cursor
position */
trx->op_info = "";
- ut_ad(!sync_check_iterate(sync_check()));
ut_ad(!did_semi_consistent_read);
if (UNIV_LIKELY_NULL(heap)) {
mem_heap_free(heap);
@@ -4575,16 +4647,22 @@ aborted:
spatial_search = dict_index_is_spatial(index)
&& mode >= PAGE_CUR_CONTAIN;
+#ifdef UNIV_DEBUG
/* The state of a running trx can only be changed by the
thread that is currently serving the transaction. Because we
are that thread, we can read trx->state without holding any
mutex. */
- ut_ad(prebuilt->sql_stat_start
- || trx->state == TRX_STATE_ACTIVE
- || (prebuilt->table->no_rollback()
- && trx->state == TRX_STATE_NOT_STARTED));
-
- ut_ad(!trx_is_started(trx) || trx->state == TRX_STATE_ACTIVE);
+ switch (trx->state) {
+ case TRX_STATE_ACTIVE:
+ break;
+ case TRX_STATE_NOT_STARTED:
+ ut_ad(prebuilt->sql_stat_start
+ || prebuilt->table->no_rollback());
+ break;
+ default:
+ ut_ad("invalid trx->state" == 0);
+ }
+#endif
ut_ad(prebuilt->sql_stat_start
|| prebuilt->select_lock_type != LOCK_NONE
@@ -4617,8 +4695,6 @@ aborted:
thr = que_fork_get_first_thr(prebuilt->sel_graph);
- thr->start_running();
-
clust_index = dict_table_get_first_index(prebuilt->table);
dberr_t err = DB_SUCCESS;
@@ -4641,7 +4717,7 @@ aborted:
trx->read_view.open(trx);
} else {
wait_table_again:
- err = lock_table(0, prebuilt->table,
+ err = lock_table(prebuilt->table, nullptr,
prebuilt->select_lock_type == LOCK_S
? LOCK_IS : LOCK_IX, thr);
@@ -4667,6 +4743,15 @@ wait_table_again:
pcur, moves_up, &mtr);
if (UNIV_UNLIKELY(need_to_process)) {
+ if (UNIV_UNLIKELY(!btr_pcur_get_rec(pcur))) {
+ mtr.commit();
+ trx->op_info = "";
+ if (UNIV_LIKELY_NULL(heap)) {
+ mem_heap_free(heap);
+ }
+ return DB_CORRUPTION;
+ }
+
if (UNIV_UNLIKELY(prebuilt->row_read_type
== ROW_READ_DID_SEMI_CONSISTENT)) {
/* We did a semi-consistent read,
@@ -4684,13 +4769,14 @@ wait_table_again:
pessimistic locking read, the record
cannot be skipped. */
- goto next_rec;
+ goto next_rec_after_check;
}
} else if (dtuple_get_n_fields(search_tuple) > 0) {
pcur->btr_cur.thr = thr;
+ pcur->old_rec = nullptr;
- if (dict_index_is_spatial(index)) {
+ if (index->is_spatial()) {
if (!prebuilt->rtr_info) {
prebuilt->rtr_info = rtr_create_rtr_info(
set_also_gap_locks, true,
@@ -4706,12 +4792,16 @@ wait_table_again:
prebuilt->rtr_info->search_tuple = search_tuple;
prebuilt->rtr_info->search_mode = mode;
}
- }
- err = btr_pcur_open_with_no_init(index, search_tuple, mode,
- BTR_SEARCH_LEAF, pcur, &mtr);
+ err = rtr_search_leaf(pcur, search_tuple, mode, &mtr);
+ } else {
+ err = btr_pcur_open_with_no_init(search_tuple, mode,
+ BTR_SEARCH_LEAF,
+ pcur, &mtr);
+ }
if (err != DB_SUCCESS) {
+page_corrupted:
rec = NULL;
goto page_read_error;
}
@@ -4729,6 +4819,10 @@ wait_table_again:
/* Try to place a gap lock on the next index record
to prevent phantoms in ORDER BY ... DESC queries */
const rec_t* next_rec = page_rec_get_next_const(rec);
+ if (UNIV_UNLIKELY(!next_rec)) {
+ err = DB_CORRUPTION;
+ goto page_corrupted;
+ }
offsets = rec_get_offsets(next_rec, index, offsets,
index->n_core_fields,
@@ -4749,25 +4843,66 @@ wait_table_again:
}
}
} else if (mode == PAGE_CUR_G || mode == PAGE_CUR_L) {
- err = btr_pcur_open_at_index_side(
- mode == PAGE_CUR_G, index, BTR_SEARCH_LEAF,
- pcur, false, 0, &mtr);
+ err = pcur->open_leaf(mode == PAGE_CUR_G, index,
+ BTR_SEARCH_LEAF, &mtr);
if (err != DB_SUCCESS) {
if (err == DB_DECRYPTION_FAILED) {
- ib_push_warning(trx->mysql_thd,
- DB_DECRYPTION_FAILED,
- "Table %s is encrypted but encryption service or"
- " used key_id is not available. "
- " Can't continue reading table.",
- prebuilt->table->name.m_name);
- index->table->file_unreadable = true;
+ btr_decryption_failed(*index);
}
rec = NULL;
goto page_read_error;
}
}
+ /* Check if the table is supposed to be empty for our read view.
+
+ If we read bulk_trx_id as an older transaction ID, it is not
+ incorrect to check here whether that transaction should be
+ visible to us. If bulk_trx_id is not visible to us, the table
+ must have been empty at an earlier point of time, also in our
+ read view.
+
+ An INSERT would only update bulk_trx_id in
+ row_ins_clust_index_entry_low() if the table really was empty
+ (everything had been purged), when holding a leaf page latch
+ in the clustered index (actually, the root page is the only
+ leaf page in that case).
+
+ We are already holding a leaf page latch here, either
+ in a secondary index or in a clustered index.
+
+ If we are holding a clustered index page latch, there clearly
+ is no potential for race condition with a concurrent INSERT:
+ such INSERT would be blocked by us.
+
+ If we are holding a secondary index page latch, then we are
+ not directly blocking a concurrent INSERT that might update
+ bulk_trx_id to something that does not exist in our read view.
+ But, in that case, the entire table (all indexes) must have
+ been empty. So, even if our read below missed the update of
+ index->table->bulk_trx_id, we can safely proceed to reading
+ the empty secondary index page. Our latch will prevent the
+ INSERT from proceeding to that page. It will first modify
+ the clustered index. Also, we may only look up something in
+ the clustered index if the secondary index page is not empty
+ to begin with. So, only if the table is corrupted
+ (the clustered index is empty but the secondary index is not)
+ we could return corrupted results. */
+ if (trx->isolation_level == TRX_ISO_READ_UNCOMMITTED
+ || !trx->read_view.is_open()) {
+ } else if (trx_id_t bulk_trx_id = index->table->bulk_trx_id) {
+ /* InnoDB should allow the transaction to read all
+ the rows when InnoDB intends to do any locking
+ on the record */
+ if (prebuilt->select_lock_type == LOCK_NONE
+ && !trx->read_view.changes_visible(bulk_trx_id)) {
+ trx->op_info = "";
+ err = DB_END_OF_INDEX;
+ goto normal_return;
+ }
+ }
+
rec_loop:
DEBUG_SYNC_C("row_search_rec_loop");
if (trx_is_interrupted(trx)) {
@@ -4783,11 +4918,6 @@ rec_loop:
rec = btr_pcur_get_rec(pcur);
- if (!index->table->is_readable()) {
- err = DB_DECRYPTION_FAILED;
- goto page_read_error;
- }
-
ut_ad(!!page_rec_is_comp(rec) == comp);
ut_ad(page_rec_is_leaf(rec));
@@ -4904,7 +5034,7 @@ wrong_offs:
page_cur_set_after_last(btr_pcur_get_block(pcur),
btr_pcur_get_page_cur(pcur));
- pcur->old_stored = false;
+ pcur->old_rec = nullptr;
goto next_rec;
}
}
@@ -5135,17 +5265,18 @@ no_gap_lock:
!= ROW_READ_TRY_SEMI_CONSISTENT)
|| unique_search
|| index != clust_index) {
-
- goto lock_wait_or_error;
+ if (!prebuilt->skip_locked) {
+ goto lock_wait_or_error;
+ }
+ } else {
+ /* The following call returns 'offsets'
+ associated with 'old_vers' */
+ row_sel_build_committed_vers_for_mysql(
+ clust_index, prebuilt, rec,
+ &offsets, &heap, &old_vers,
+ need_vrow ? &vrow : NULL, &mtr);
}
- /* The following call returns 'offsets'
- associated with 'old_vers' */
- row_sel_build_committed_vers_for_mysql(
- clust_index, prebuilt, rec,
- &offsets, &heap, &old_vers, need_vrow ? &vrow : NULL,
- &mtr);
-
/* Check whether it was a deadlock or not, if not
a deadlock and the transaction had to wait then
release the lock it is waiting on. */
@@ -5154,6 +5285,8 @@ no_gap_lock:
switch (err) {
case DB_SUCCESS:
+ ut_ad(
+ !trx->lock.was_chosen_as_deadlock_victim);
/* The lock was granted while we were
searching for the last committed version.
Do a normal locking read. */
@@ -5168,7 +5301,16 @@ no_gap_lock:
case DB_LOCK_WAIT:
ut_ad(!dict_index_is_spatial(index));
err = DB_SUCCESS;
+ if (prebuilt->skip_locked) {
+ goto next_rec;
+ }
break;
+ case DB_LOCK_WAIT_TIMEOUT:
+ if (prebuilt->skip_locked) {
+ err = DB_SUCCESS;
+ goto next_rec;
+ }
+ /* fall through */
default:
ut_error;
}
@@ -5188,7 +5330,13 @@ no_gap_lock:
} else {
goto lock_wait_or_error;
}
-
+ break;
+ case DB_LOCK_WAIT_TIMEOUT:
+ if (prebuilt->skip_locked) {
+ err = DB_SUCCESS;
+ goto next_rec;
+ }
+ /* fall through */
default:
goto lock_wait_or_error;
@@ -5198,6 +5346,7 @@ no_gap_lock:
a previous version of the record */
if (trx->isolation_level == TRX_ISO_READ_UNCOMMITTED
+ || prebuilt->table->is_temporary()
|| prebuilt->table->no_rollback()) {
/* Do nothing: we let a non-locking SELECT read the
@@ -5210,18 +5359,24 @@ no_gap_lock:
high force recovery level set, we try to avoid crashes
by skipping this lookup */
- if (!lock_clust_rec_cons_read_sees(
- rec, index, offsets, &trx->read_view)) {
+ err = row_sel_clust_sees(rec, *index, offsets,
+ trx->read_view);
+
+ switch (err) {
+ default:
+ goto lock_wait_or_error;
+ case DB_SUCCESS:
+ break;
+ case DB_SUCCESS_LOCKED_REC:
ut_ad(srv_force_recovery
< SRV_FORCE_NO_UNDO_LOG_SCAN);
rec_t* old_vers;
/* The following call returns 'offsets'
associated with 'old_vers' */
err = row_sel_build_prev_vers_for_mysql(
- &trx->read_view, clust_index,
- prebuilt, rec, &offsets, &heap,
- &old_vers, need_vrow ? &vrow : NULL,
- &mtr);
+ prebuilt, clust_index,
+ rec, &offsets, &heap, &old_vers,
+ need_vrow ? &vrow : nullptr, &mtr);
if (err != DB_SUCCESS) {
@@ -5246,9 +5401,13 @@ no_gap_lock:
ut_ad(!dict_index_is_clust(index));
- if (!srv_read_only_mode
- && !lock_sec_rec_cons_read_sees(
- rec, index, &trx->read_view)) {
+ if (!srv_read_only_mode) {
+ trx_id_t trx_id = page_get_max_trx_id(
+ page_align(rec));
+ ut_ad(trx_id);
+ if (trx->read_view.sees(trx_id)) {
+ goto locks_ok;
+ }
/* We should look at the clustered index.
However, as this is a non-locking read,
we can skip the clustered index lookup if
@@ -5358,13 +5517,15 @@ requires_clust_rec:
&offsets, &heap,
need_vrow ? &vrow : NULL,
&mtr);
+ if (err == DB_LOCK_WAIT && prebuilt->skip_locked) {
+ err = lock_trx_handle_wait(trx);
+ }
switch (err) {
case DB_SUCCESS:
if (clust_rec == NULL) {
/* The record did not exist in the read view */
ut_ad(prebuilt->select_lock_type == LOCK_NONE
|| dict_index_is_spatial(index));
-
goto next_rec;
}
break;
@@ -5377,6 +5538,13 @@ requires_clust_rec:
}
err = DB_SUCCESS;
break;
+ case DB_LOCK_WAIT_TIMEOUT:
+ case DB_LOCK_WAIT:
+ if (prebuilt->skip_locked) {
+ err = DB_SUCCESS;
+ goto next_rec;
+ }
+ /* fall through */
default:
vrow = NULL;
goto lock_wait_or_error;
@@ -5452,9 +5620,7 @@ use_covering_index:
&& !prebuilt->templ_contains_blob
&& !prebuilt->clust_index_was_generated
&& !prebuilt->used_in_HANDLER
- && prebuilt->template_type != ROW_MYSQL_DUMMY_TEMPLATE
&& !prebuilt->in_fts_query) {
-
/* Inside an update, for example, we do not cache rows,
since we may use the cursor position to do the actual
update, that is why we require ...lock_type == LOCK_NONE.
@@ -5519,29 +5685,8 @@ use_covering_index:
if (prebuilt->n_fetch_cached < MYSQL_FETCH_CACHE_SIZE) {
goto next_rec;
}
-
} else {
- if (UNIV_UNLIKELY
- (prebuilt->template_type == ROW_MYSQL_DUMMY_TEMPLATE)) {
- /* CHECK TABLE: fetch the row */
-
- if (result_rec != rec
- && !prebuilt->need_to_access_clustered) {
- /* We used 'offsets' for the clust
- rec, recalculate them for 'rec' */
- offsets = rec_get_offsets(rec, index, offsets,
- index->n_core_fields,
- ULINT_UNDEFINED,
- &heap);
- result_rec = rec;
- }
-
- memcpy(buf + 4, result_rec
- - rec_offs_extra_size(offsets),
- rec_offs_size(offsets));
- mach_write_to_4(buf,
- rec_offs_extra_size(offsets) + 4);
- } else if (!prebuilt->pk_filter && !prebuilt->idx_cond) {
+ if (!prebuilt->pk_filter && !prebuilt->idx_cond) {
/* The record was not yet converted to MySQL format. */
if (!row_sel_store_mysql_rec(
buf, prebuilt, result_rec, vrow,
@@ -5609,6 +5754,7 @@ next_rec:
== ROW_READ_DID_SEMI_CONSISTENT)) {
prebuilt->row_read_type = ROW_READ_TRY_SEMI_CONSISTENT;
}
+next_rec_after_check:
did_semi_consistent_read = false;
prebuilt->new_rec_locks = 0;
vrow = NULL;
@@ -5632,9 +5778,7 @@ next_rec:
if (spatial_search) {
/* No need to do store restore for R-tree */
- mtr.commit();
- mtr.start();
- mtr_extra_clust_savepoint = 0;
+ mtr.rollback_to_savepoint(0);
} else if (mtr_extra_clust_savepoint) {
/* We must release any clustered index latches
if we are moving to the next non-clustered
@@ -5642,9 +5786,10 @@ next_rec:
order if we would access a different clustered
index page right away without releasing the previous. */
mtr.rollback_to_savepoint(mtr_extra_clust_savepoint);
- mtr_extra_clust_savepoint = 0;
}
+ mtr_extra_clust_savepoint = 0;
+
if (moves_up) {
if (UNIV_UNLIKELY(spatial_search)) {
if (rtr_pcur_move_to_next(
@@ -5652,24 +5797,20 @@ next_rec:
goto rec_loop;
}
} else {
- const buf_block_t* block = btr_pcur_get_block(pcur);
- /* This is based on btr_pcur_move_to_next(),
- but avoids infinite read loop of a corrupted page. */
+ /* This is based on btr_pcur_move_to_next() */
ut_ad(pcur->pos_state == BTR_PCUR_IS_POSITIONED);
ut_ad(pcur->latch_mode != BTR_NO_LATCHES);
- pcur->old_stored = false;
+ pcur->old_rec = nullptr;
if (btr_pcur_is_after_last_on_page(pcur)) {
if (btr_pcur_is_after_last_in_tree(pcur)) {
goto not_moved;
}
- btr_pcur_move_to_next_page(pcur, &mtr);
- if (UNIV_UNLIKELY(btr_pcur_get_block(pcur)
- == block)) {
- err = DB_CORRUPTION;
+ err = btr_pcur_move_to_next_page(pcur, &mtr);
+ if (err != DB_SUCCESS) {
goto lock_wait_or_error;
}
- } else {
- btr_pcur_move_to_next_on_page(pcur);
+ } else if (!btr_pcur_move_to_next_on_page(pcur)) {
+ goto corrupted;
}
goto rec_loop;
@@ -5678,6 +5819,11 @@ next_rec:
if (btr_pcur_move_to_prev(pcur, &mtr)) {
goto rec_loop;
}
+ if (UNIV_UNLIKELY(!btr_pcur_get_rec(pcur))) {
+corrupted:
+ err = DB_CORRUPTION;
+ goto normal_return;
+ }
}
not_moved:
@@ -5705,13 +5851,6 @@ lock_table_wait:
mtr_extra_clust_savepoint = 0;
trx->error_state = err;
-
- /* The following is a patch for MySQL */
-
- if (thr->is_active) {
- que_thr_stop_for_mysql(thr);
- }
-
thr->lock_state = QUE_THR_LOCK_ROW;
if (row_mysql_handle_errors(&err, trx, thr, NULL)) {
@@ -5766,16 +5905,6 @@ lock_table_wait:
goto func_exit;
normal_return:
- /*-------------------------------------------------------------*/
- {
- /* handler_index_cond_check() may pull TR_table search
- which initates another row_search_mvcc(). */
- ut_d(ulint n_active_thrs= trx->lock.n_active_thrs);
- ut_d(trx->lock.n_active_thrs= 1);
- thr->stop_no_error();
- ut_d(trx->lock.n_active_thrs= n_active_thrs - 1);
- }
-
mtr.commit();
DEBUG_SYNC_C("row_search_for_mysql_before_return");
@@ -5830,8 +5959,6 @@ func_exit:
}
}
- ut_ad(!sync_check_iterate(sync_check()));
-
DEBUG_SYNC_C("innodb_row_search_for_mysql_exit");
DBUG_RETURN(err);
@@ -5914,18 +6041,11 @@ row_count_rtree_recs(
prebuilt->mysql_row_len);
buf = static_cast<byte*>(ut_malloc_nokey(bufsize));
- ulint cnt = 1000;
+ ulint direction = 0;
- ret = row_search_for_mysql(buf, PAGE_CUR_WITHIN, prebuilt, 0, 0);
loop:
- /* Check thd->killed every 1,000 scanned rows */
- if (--cnt == 0) {
- if (trx_is_interrupted(prebuilt->trx)) {
- ret = DB_INTERRUPTED;
- goto func_exit;
- }
- cnt = 1000;
- }
+ ret = row_search_mvcc(buf, PAGE_CUR_WITHIN, prebuilt, 0, direction);
+ direction = ROW_SEL_NEXT;
switch (ret) {
case DB_SUCCESS:
@@ -5947,12 +6067,774 @@ func_exit:
return(ret);
}
- *n_rows = *n_rows + 1;
+ ++*n_rows;
+ goto loop;
+}
- ret = row_search_for_mysql(
- buf, PAGE_CUR_WITHIN, prebuilt, 0, ROW_SEL_NEXT);
+/** Check if a version of a clustered index record and a secondary
+index record match.
+
+@param prebuilt index and transaction
+@param clust_rec a version of a clustered index record
+@param clust_index clustered index
+@param clust_offsets rec_get_offsets(clust_rec, clust_index)
+@param rec secondary index leaf page record
+@param offsets rec_get_offsets(rec, index)
+@return an error code
+@retval DB_SUCCESS if rec matches clust_rec
+@retval DB_SUCCESS_LOCKED_REC if rec does not match clust_rec
+*/
+static dberr_t row_check_index_match(row_prebuilt_t *prebuilt,
+ const rec_t *clust_rec,
+ const dict_index_t *clust_index,
+ const rec_offs *clust_offsets,
+ const rec_t *rec,
+ const dict_index_t *index,
+ const rec_offs *offsets)
+{
+ ut_ad(index == prebuilt->index);
- goto loop;
+ ib_vcol_row vc(index->has_virtual() ? mem_heap_create(256) : nullptr);
+
+ const uint16_t n= index->n_user_defined_cols;
+
+ for (uint16_t i= 0; i < n; i++)
+ {
+ ulint pos= 0;
+ ulint len, sec_len;
+
+ const dict_field_t &ifield= index->fields[i];
+ const byte *sec_field= rec_get_nth_field(rec, offsets, i, &sec_len);
+ const byte *field;
+
+ if (ifield.col->is_virtual())
+ {
+ /* Virtual column values must be reconstructed from the base columns. */
+ row_ext_t *ext;
+ byte *record= vc.record(prebuilt->trx->mysql_thd, clust_index,
+ &prebuilt->m_mysql_table);
+ const dict_v_col_t *v_col= reinterpret_cast<const dict_v_col_t*>
+ (ifield.col);
+ dtuple_t *row= row_build(ROW_COPY_POINTERS,
+ clust_index, clust_rec, clust_offsets,
+ nullptr, nullptr, nullptr, &ext, vc.heap);
+ if (dfield_t *vfield=
+ innobase_get_computed_value(row, v_col, clust_index, &vc.heap,
+ nullptr, nullptr,
+ prebuilt->trx->mysql_thd,
+ prebuilt->m_mysql_table,
+ record, nullptr, nullptr))
+ {
+ len= vfield->len;
+ field= static_cast<byte*>(vfield->data);
+ }
+ else
+ {
+ innobase_report_computed_value_failed(row);
+ return DB_COMPUTE_VALUE_FAILED;
+ }
+ }
+ else
+ {
+ pos= dict_col_get_clust_pos(ifield.col, clust_index);
+ field= rec_get_nth_cfield(clust_rec, clust_index, clust_offsets, pos,
+ &len);
+ if (len == UNIV_SQL_NULL)
+ {
+ if (sec_len == UNIV_SQL_NULL)
+ continue;
+ return DB_SUCCESS_LOCKED_REC;
+ }
+ if (sec_len == UNIV_SQL_NULL)
+ return DB_SUCCESS_LOCKED_REC;
+
+ if (rec_offs_nth_extern(clust_offsets, pos))
+ {
+ if (len == BTR_EXTERN_FIELD_REF_SIZE)
+ goto compare_blobs;
+ len-= BTR_EXTERN_FIELD_REF_SIZE;
+ }
+
+ if (ifield.prefix_len)
+ {
+ len=
+ dtype_get_at_most_n_mbchars(ifield.col->prtype, ifield.col->mbminlen,
+ ifield.col->mbmaxlen,
+ ifield.prefix_len, len,
+ reinterpret_cast<const char*>(field));
+ if (len < sec_len)
+ goto check_for_blob;
+ }
+ else
+ {
+check_for_blob:
+ if (rec_offs_nth_extern(clust_offsets, pos))
+ {
+compare_blobs:
+ if (!row_sel_sec_rec_is_for_blob(ifield.col->mtype,
+ ifield.col->prtype,
+ ifield.col->mbminlen,
+ ifield.col->mbmaxlen,
+ field, len, sec_field, sec_len,
+ ifield.prefix_len,
+ clust_index->table))
+ return DB_SUCCESS_LOCKED_REC;
+ continue;
+ }
+ }
+ }
+
+ if (cmp_data_data(ifield.col->mtype, ifield.col->prtype,
+ field, len, sec_field, sec_len))
+ return DB_SUCCESS_LOCKED_REC;
+ }
+
+ return DB_SUCCESS;
+}
+
+/**
+Check the index records in CHECK TABLE.
+The index must contain entries in an ascending order,
+unique constraint must not be violated by duplicated keys,
+and the number of index entries is counted in according to the
+current read view.
+
+@param prebuilt index and transaction
+@param n_rows number of records counted
+
+@return error code
+@retval DB_SUCCESS if no error was found */
+dberr_t row_check_index(row_prebuilt_t *prebuilt, ulint *n_rows)
+{
+ rec_offs offsets_[REC_OFFS_NORMAL_SIZE];
+ rec_offs_init(offsets_);
+
+ *n_rows= 0;
+ dict_index_t *const index= prebuilt->index;
+
+ if (!index->is_btree())
+ return DB_CORRUPTION;
+
+ mem_heap_t *heap= mem_heap_create(100);
+
+ dtuple_t *prev_entry= nullptr;
+ mtr_t mtr;
+ mtr.start();
+
+ dict_index_t *clust_index= dict_table_get_first_index(prebuilt->table);
+ prebuilt->clust_pcur->btr_cur.page_cur.index = clust_index;
+ dberr_t err= prebuilt->pcur->open_leaf(true, index, BTR_SEARCH_LEAF, &mtr);
+ if (UNIV_UNLIKELY(err != DB_SUCCESS))
+ {
+func_exit:
+ mtr.commit();
+ mem_heap_free(heap);
+ return err;
+ }
+
+ if (const trx_id_t bulk_trx_id= index->table->bulk_trx_id)
+ if (!prebuilt->trx->read_view.changes_visible(bulk_trx_id))
+ goto func_exit;
+
+ ReadView check_table_extended_view;
+ ReadView &view=
+ prebuilt->need_to_access_clustered &&
+ !prebuilt->table->is_temporary() &&
+ prebuilt->trx->isolation_level != TRX_ISO_READ_UNCOMMITTED
+ ? check_table_extended_view : prebuilt->trx->read_view;
+ if (&view == &check_table_extended_view)
+ check_table_extended_view.set_creator_trx_id(prebuilt->trx->id);
+
+page_loop:
+ if (&view == &check_table_extended_view)
+ /* In CHECK TABLE...EXTENDED, we make a copy of purge_sys.end_view
+ while holding a shared latch on the index leaf page.
+ Should a currently active purge batch desire to remove any further
+ records from this page, it would be blocked by our page latch.
+
+ We will consult check_table_extended_view to determine if a
+ clustered index record corresponding to a secondary index record
+ is visible to the current purge batch. Right after we have made our
+ copy, purge_sys.end_view is free to be changed again.
+
+ If we have an orphan secondary index record, we may attempt to
+ request a clustered index record version that cannot be retrieved
+ any more because the undo log records may have been freed
+ (according to the purge_sys.end_view). In such a case,
+ trx_undo_get_undo_rec() would cause
+ trx_undo_prev_version_build() and trx_undo_prev_version_build()
+ to return DB_MISSING_HISTORY. */
+ static_cast<ReadViewBase&>(check_table_extended_view)=
+ purge_sys_t::end_view_guard{}.view();
+
+rec_loop:
+ ut_ad(err == DB_SUCCESS);
+
+ if (!btr_pcur_move_to_next_on_page(prebuilt->pcur))
+ {
+ err= DB_CORRUPTION;
+ goto func_exit;
+ }
+
+ const rec_t *rec= btr_pcur_get_rec(prebuilt->pcur);
+ rec_offs *offsets= offsets_;
+
+ if (page_rec_is_supremum(rec))
+ {
+ next_page:
+ if (btr_pcur_is_after_last_in_tree(prebuilt->pcur))
+ goto func_exit;
+ err= btr_pcur_move_to_next_page(prebuilt->pcur, &mtr);
+ if (err == DB_SUCCESS && trx_is_interrupted(prebuilt->trx))
+ err= DB_INTERRUPTED;
+ if (UNIV_UNLIKELY(err != DB_SUCCESS))
+ goto func_exit;
+ goto page_loop;
+ }
+
+ offsets= rec_get_offsets(rec, index, offsets, index->n_core_fields,
+ ULINT_UNDEFINED, &heap);
+
+ const auto info_bits=
+ rec_get_info_bits(rec, prebuilt->table->not_redundant());
+ const bool rec_deleted= info_bits & REC_INFO_DELETED_FLAG;
+
+ if (UNIV_UNLIKELY(info_bits & REC_INFO_MIN_REC_FLAG))
+ {
+ if (*n_rows || !index->is_instant())
+ {
+ push_warning_printf(prebuilt->trx->mysql_thd,
+ Sql_condition::WARN_LEVEL_WARN, ER_NOT_KEYFILE,
+ "InnoDB: invalid record encountered");
+ prebuilt->autoinc_error= DB_INDEX_CORRUPT;
+ }
+ goto next_rec;
+ }
+
+ if (prebuilt->table->is_temporary())
+ {
+ count_or_not:
+ if (rec_deleted)
+ goto next_rec;
+ }
+ else if (index->is_clust())
+ {
+ if (prebuilt->trx->isolation_level == TRX_ISO_READ_UNCOMMITTED)
+ goto count_or_not;
+
+ trx_id_t rec_trx_id= row_get_rec_trx_id(rec, index, offsets);
+
+ if (rec_trx_id >= prebuilt->trx->read_view.low_limit_id() &&
+ UNIV_UNLIKELY(rec_trx_id >= trx_sys.get_max_trx_id()))
+ {
+ invalid_trx_id:
+ if (prebuilt->autoinc_error == DB_SUCCESS)
+ push_warning_printf(prebuilt->trx->mysql_thd,
+ Sql_condition::WARN_LEVEL_WARN,
+ ER_NOT_KEYFILE,
+ "InnoDB: DB_TRX_ID=" TRX_ID_FMT
+ " exceeds the system-wide maximum",
+ rec_trx_id);
+ prebuilt->autoinc_error= DB_CORRUPTION;
+ goto next_rec;
+ }
+
+ if (!prebuilt->trx->read_view.changes_visible(rec_trx_id))
+ {
+ ut_ad(srv_force_recovery < SRV_FORCE_NO_UNDO_LOG_SCAN);
+ rec_t *old_vers;
+ /* The following call returns 'offsets' associated with 'old_vers' */
+ err= row_sel_build_prev_vers_for_mysql(prebuilt, index, rec, &offsets,
+ &heap, &old_vers, nullptr, &mtr);
+
+ if (err != DB_SUCCESS)
+ goto func_exit;
+
+ if (old_vers)
+ {
+ rec= old_vers;
+ rec_trx_id= row_get_rec_trx_id(rec, index, offsets);
+
+ if (rec_trx_id >= prebuilt->trx->read_view.low_limit_id() &&
+ UNIV_UNLIKELY(rec_trx_id >= trx_sys.get_max_trx_id()))
+ goto invalid_trx_id;
+
+ if (!rec_get_deleted_flag(rec, prebuilt->table->not_redundant()))
+ goto count_row;
+ }
+ else
+ offsets= rec_get_offsets(rec, index, offsets, index->n_core_fields,
+ ULINT_UNDEFINED, &heap);
+ goto next_rec;
+ }
+ else if (!rec_deleted && !rec_trx_id);
+ else if (!check_table_extended_view.changes_visible(rec_trx_id));
+ else if (prebuilt->autoinc_error == DB_SUCCESS)
+ {
+ const char *msg= rec_deleted
+ ? "Unpurged clustered index record"
+ : "Clustered index record with stale history";
+
+ ib::warn w;
+ w << msg << " in table " << index->table->name << ": "
+ << rec_offsets_print(rec, offsets);
+ prebuilt->autoinc_error= DB_MISSING_HISTORY;
+ push_warning_printf(prebuilt->trx->mysql_thd,
+ Sql_condition::WARN_LEVEL_WARN,
+ ER_NOT_KEYFILE, "InnoDB: %s", w.m_oss.str().c_str());
+ }
+
+ goto count_or_not;
+ }
+ else if (const trx_id_t page_trx_id= page_get_max_trx_id(page_align(rec)))
+ {
+ if (page_trx_id >= trx_sys.get_max_trx_id())
+ goto invalid_PAGE_MAX_TRX_ID;
+ if (prebuilt->trx->isolation_level == TRX_ISO_READ_UNCOMMITTED);
+ else if (&view == &check_table_extended_view || rec_deleted ||
+ !view.sees(page_trx_id))
+ {
+ bool got_extended_match= &view == &check_table_extended_view;
+ const auto savepoint= mtr.get_savepoint();
+
+ row_build_row_ref_in_tuple(prebuilt->clust_ref, rec, index, offsets);
+ err= btr_pcur_open_with_no_init(prebuilt->clust_ref,
+ PAGE_CUR_LE, BTR_SEARCH_LEAF,
+ prebuilt->clust_pcur, &mtr);
+ if (err != DB_SUCCESS)
+ goto func_exit;
+
+ const rec_t *clust_rec= btr_pcur_get_rec(prebuilt->clust_pcur);
+
+ /* Note: only if the search ends up on a non-infimum record is the
+ low_match value the real match to the search tuple */
+
+ if (!page_rec_is_user_rec(clust_rec) ||
+ btr_pcur_get_low_match(prebuilt->clust_pcur) < clust_index->n_uniq)
+ {
+ if (!rec_deleted)
+ {
+ not_found:
+ /* MDEV-29823 FIXME: There is a race condition between
+ rollback, purge, and possibly other SQL connections that
+ are creating and releasing read views. At the time
+ row_undo_mod_del_mark_or_remove_sec_low() is executing
+ rollback on a secondary index record, purge_sys.view
+ may not allow it to delete the record, and it will be
+ delete-marked. Eventually purge_sys.view would advance,
+ but the delete-marked record could never be removed,
+ because no undo log record was ever added to
+ the purge queue by trx_purge_add_undo_to_history().
+
+ For now, we will not flag an error about orphan secondary index
+ records that are delete-marked; we will only warn about them. */
+
+ if (!rec_deleted || prebuilt->autoinc_error == DB_SUCCESS)
+ {
+ ib::error_or_warn w(!rec_deleted);
+ w << "Clustered index record not found for index "
+ << index->name << " of table " << index->table->name
+ << ": " << rec_offsets_print(rec, offsets);
+ push_warning_printf(prebuilt->trx->mysql_thd,
+ Sql_condition::WARN_LEVEL_WARN,
+ ER_NOT_KEYFILE, "InnoDB: %s",
+ w.m_oss.str().c_str());
+ }
+
+ if (prebuilt->autoinc_error == DB_SUCCESS)
+ prebuilt->autoinc_error= rec_deleted
+ ? DB_MISSING_HISTORY
+ : DB_CORRUPTION;
+ }
+ else if (&view == &check_table_extended_view)
+ extended_not_found:
+ if (view.changes_visible(page_trx_id))
+ goto not_found;
+ did_not_find:
+ mtr.rollback_to_savepoint(savepoint);
+ goto next_rec;
+ }
+
+ rec_offs *clust_offsets;
+ trx_id_t rec_trx_id;
+ rec_t *old_vers= nullptr;
+
+ bool found_in_view= false;
+ trx_id_t visible_trx_id= ~0ULL;
+
+ if (ulint trx_id_offset= clust_index->trx_id_offset)
+ {
+ clust_offsets= nullptr;
+ read_trx_id:
+ rec_trx_id= trx_read_trx_id(clust_rec + trx_id_offset);
+
+ if (clust_rec[trx_id_offset + DATA_TRX_ID_LEN] & 0x80)
+ {
+ if (UNIV_UNLIKELY
+ (rec_get_deleted_flag(clust_rec,
+ prebuilt->table->not_redundant())))
+ {
+ err= DB_CORRUPTION;
+ goto func_exit;
+ }
+
+ /* This is the oldest available record version (fresh insert). */
+ if (!view.changes_visible(rec_trx_id))
+ {
+ if (rec_trx_id >= view.low_limit_id() &&
+ UNIV_UNLIKELY(rec_trx_id >= trx_sys.get_max_trx_id()))
+ goto invalid_rec_trx_id;
+ if (got_extended_match)
+ goto check_latest_version;
+ goto did_not_find;
+ }
+ }
+ }
+ else
+ {
+ clust_offsets= rec_get_offsets(clust_rec, clust_index, nullptr,
+ clust_index->n_core_fields,
+ ULINT_UNDEFINED, &heap);
+ ulint trx_id_pos= clust_index->n_uniq ? clust_index->n_uniq : 1;
+ ulint len;
+ trx_id_offset= rec_get_nth_field_offs(clust_offsets, trx_id_pos, &len);
+ ut_ad(len == DATA_TRX_ID_LEN);
+ goto read_trx_id;
+ }
+
+ if (got_extended_match)
+ {
+ check_latest_version:
+ /* In CHECK TABLE...EXTENDED, always check if the secondary
+ index record matches the latest clustered index record
+ version, no matter if it is visible in our own read view.
+
+ If the latest clustered index version is delete-marked and
+ purgeable, it is not safe to fetch any BLOBs for column prefix
+ indexes because they may already have been freed. */
+ if (rec_trx_id &&
+ rec_get_deleted_flag(clust_rec,
+ prebuilt->table->not_redundant()) &&
+ purge_sys.is_purgeable(rec_trx_id))
+ goto did_not_find;
+
+ if (!clust_offsets)
+ clust_offsets= rec_get_offsets(clust_rec, clust_index, nullptr,
+ clust_index->n_core_fields,
+ ULINT_UNDEFINED, &heap);
+ err= row_check_index_match(prebuilt,
+ clust_rec, clust_index, clust_offsets,
+ rec, index, offsets);
+
+ switch (err) {
+ default:
+ goto func_exit;
+ case DB_SUCCESS_LOCKED_REC:
+ case DB_SUCCESS:
+ break;
+ }
+
+ got_extended_match= err == DB_SUCCESS;
+ err= DB_SUCCESS;
+
+ if (!prebuilt->trx->read_view.changes_visible(rec_trx_id))
+ /* While CHECK TABLE ... EXTENDED checks for a matching
+ clustered index record version for each secondary index
+ record, it must count only those records that belong to its
+ own read view.
+
+ If the latest version of clust_rec matches rec but is not
+ in our read view, there may still be an older version of
+ clust_rec that not only matches rec but is in our view.
+ We must evaluate old versions before deciding whether rec
+ should be counted. */
+ goto check_old_vers;
+
+ /* Remember that this is the visible clust_rec for rec,
+ and whether it matches rec. */
+ visible_trx_id= rec_trx_id;
+ found_in_view= got_extended_match &&
+ !rec_get_deleted_flag(clust_rec,
+ prebuilt->table->not_redundant());
+
+ if (!got_extended_match)
+ goto check_old_vers;
+
+ if (!found_in_view)
+ goto did_not_find;
+
+ found_match:
+ mtr.rollback_to_savepoint(savepoint);
+ goto count_row;
+ }
+ else if (!view.changes_visible(rec_trx_id))
+ {
+ check_old_vers:
+ if (rec_trx_id >= view.low_limit_id() &&
+ UNIV_UNLIKELY(rec_trx_id >= trx_sys.get_max_trx_id()))
+ {
+ invalid_rec_trx_id:
+ if (prebuilt->autoinc_error == DB_SUCCESS)
+ push_warning_printf(prebuilt->trx->mysql_thd,
+ Sql_condition::WARN_LEVEL_WARN,
+ ER_NOT_KEYFILE,
+ "InnoDB: DB_TRX_ID=" TRX_ID_FMT
+ " exceeds the system-wide maximum",
+ rec_trx_id);
+ goto not_found;
+ }
+
+ if (!clust_offsets)
+ clust_offsets= rec_get_offsets(clust_rec, clust_index, nullptr,
+ clust_index->n_core_fields,
+ ULINT_UNDEFINED, &heap);
+
+ row_sel_reset_old_vers_heap(prebuilt);
+ /* The following is adapted from row_vers_build_for_consistent_read()
+ because when using check_table_extended_view, we must
+ consider every available version of the clustered index record. */
+ mem_heap_t *vers_heap= nullptr;
+
+ for (;;)
+ {
+ mem_heap_t *prev_heap= vers_heap;
+ vers_heap= mem_heap_create(1024);
+ err= trx_undo_prev_version_build(clust_rec,
+ clust_index, clust_offsets,
+ vers_heap, &old_vers,
+ nullptr, nullptr, 0);
+ if (prev_heap)
+ mem_heap_free(prev_heap);
+ if (err != DB_SUCCESS)
+ {
+ old_vers_err:
+ mem_heap_free(vers_heap);
+ if (err == DB_MISSING_HISTORY)
+ {
+ err= DB_SUCCESS;
+ if (got_extended_match)
+ goto did_not_find;
+ goto not_found;
+ }
+ goto func_exit;
+ }
+
+ if (UNIV_UNLIKELY(!old_vers))
+ {
+ mem_heap_free(vers_heap);
+ /* We did not find a matching clustered index record version
+ for the secondary index record. Normal CHECK TABLE will simply
+ not count the secondary index record; CHECK TABLE ... EXTENDED
+ will flag such orphan records if appropriate.
+
+ A secondary index record may may be "temporarily orphan"
+ if purge is in progress. We will only flag them if
+ everything up to PAGE_MAX_TRX_ID has been fully purged.
+
+ "Temporary orphans" may be produced when
+ row_undo_mod_clust() resets the DB_TRX_ID of the latest
+ clust_rec version or when trx_undo_prev_version_build()
+ encounters a BLOB that may have been freed according to
+ purge_sys.view (not purge_sys.end_view). */
+ if (&view == &check_table_extended_view && !got_extended_match)
+ goto extended_not_found;
+ goto did_not_find;
+ }
+
+ clust_rec= old_vers;
+ clust_offsets= rec_get_offsets(clust_rec, clust_index, clust_offsets,
+ clust_index->n_core_fields,
+ ULINT_UNDEFINED, &heap);
+
+ rec_trx_id= row_get_rec_trx_id(clust_rec, clust_index,
+ clust_offsets);
+
+ if (UNIV_UNLIKELY(rec_trx_id >=
+ prebuilt->trx->read_view.low_limit_id() &&
+ rec_trx_id >= trx_sys.get_max_trx_id()))
+ {
+ mem_heap_free(vers_heap);
+ goto invalid_rec_trx_id;
+ }
+
+ const bool rec_visible=
+ prebuilt->trx->read_view.changes_visible(rec_trx_id);
+ const bool clust_rec_deleted=
+ rec_get_deleted_flag(clust_rec, prebuilt->table->not_redundant());
+
+ if (&view != &prebuilt->trx->read_view)
+ {
+ /* It is not safe to fetch BLOBs of committed delete-marked
+ records that may have been freed in purge. */
+ err= clust_rec_deleted && rec_trx_id &&
+ purge_sys.is_purgeable(rec_trx_id)
+ ? DB_SUCCESS_LOCKED_REC
+ : row_check_index_match(prebuilt,
+ clust_rec, clust_index, clust_offsets,
+ rec, index, offsets);
+
+ switch (err) {
+ default:
+ goto old_vers_err;
+ case DB_SUCCESS_LOCKED_REC:
+ if (rec_visible && !~visible_trx_id)
+ visible_trx_id= rec_trx_id;
+ continue;
+ case DB_SUCCESS:
+ got_extended_match= true;
+ if (!rec_visible)
+ continue;
+ if (!~visible_trx_id)
+ {
+ visible_trx_id= rec_trx_id;
+ found_in_view= !clust_rec_deleted;
+ }
+ mem_heap_free(vers_heap);
+ if (!found_in_view)
+ goto did_not_find;
+ goto found_match;
+ }
+ }
+ else if (rec_visible)
+ {
+ if (!clust_rec_deleted)
+ {
+ clust_rec= rec_copy(mem_heap_alloc(heap,
+ rec_offs_size(clust_offsets)),
+ clust_rec, clust_offsets);
+ rec_offs_make_valid(clust_rec, clust_index, true, clust_offsets);
+ }
+ mem_heap_free(vers_heap);
+ if (clust_rec_deleted)
+ goto did_not_find;
+ goto check_match;
+ }
+ }
+ }
+ else if (rec_get_deleted_flag(clust_rec,
+ prebuilt->table->not_redundant()))
+ goto did_not_find;
+
+ ut_ad(clust_rec);
+ ut_ad(&view != &check_table_extended_view);
+
+ /* If we had to go to an earlier version of row or the secondary
+ index record is delete marked, then it may be that the secondary
+ index record corresponding to clust_rec (or old_vers) is not
+ rec; in that case we must ignore such row because in our
+ snapshot rec would not have existed. Remember that from rec we
+ cannot see directly which transaction id corresponds to it: we
+ have to go to the clustered index record. A query where we want
+ to fetch all rows where the secondary index value is in some
+ interval would return a wrong result if we would not drop rows
+ which we come to visit through secondary index records that
+ would not really exist in our snapshot. */
+
+ if (rec_deleted)
+ {
+ if (!clust_offsets)
+ clust_offsets= rec_get_offsets(clust_rec, clust_index, nullptr,
+ clust_index->n_core_fields,
+ ULINT_UNDEFINED, &heap);
+ check_match:
+ /* This clustered index record version exists in
+ prebuilt->trx->read_view and is not delete-marked.
+ By design, any BLOBs in it are not allowed to be
+ freed in the purge of committed transaction history. */
+ err= row_check_index_match(prebuilt, clust_rec, clust_index,
+ clust_offsets, rec, index, offsets);
+ switch (err) {
+ case DB_SUCCESS:
+ break;
+ case DB_SUCCESS_LOCKED_REC:
+ err= DB_SUCCESS;
+ goto did_not_find;
+ default:
+ goto func_exit;
+ }
+ }
+
+ mtr.rollback_to_savepoint(savepoint);
+ }
+ }
+ else
+ {
+ invalid_PAGE_MAX_TRX_ID:
+ if (UNIV_LIKELY(srv_force_recovery < SRV_FORCE_NO_UNDO_LOG_SCAN))
+ {
+ push_warning_printf(prebuilt->trx->mysql_thd,
+ Sql_condition::WARN_LEVEL_WARN, ER_NOT_KEYFILE,
+ "InnoDB: Invalid PAGE_MAX_TRX_ID=%llu"
+ " in index '%-.200s'",
+ page_trx_id, index->name());
+ prebuilt->autoinc_error= DB_INDEX_CORRUPT;
+ }
+ goto next_rec;
+ }
+
+count_row:
+ ++*n_rows;
+
+ if (prev_entry)
+ {
+ ulint matched_fields= 0;
+ int cmp= cmp_dtuple_rec_with_match(prev_entry, rec, offsets,
+ &matched_fields);
+ const char* msg;
+
+ if (UNIV_LIKELY(cmp < 0));
+ else if (cmp > 0)
+ {
+ prebuilt->autoinc_error= DB_INDEX_CORRUPT;
+ msg= "index records in a wrong order in ";
+not_ok:
+ ib::error() << msg << index->name << " of table " << index->table->name
+ << ": " << *prev_entry << ", "
+ << rec_offsets_print(rec, offsets);
+ }
+ else if (index->is_unique() && matched_fields >=
+ dict_index_get_n_ordering_defined_by_user(index))
+ {
+ /* NULL values in unique indexes are considered not to be duplicates */
+ for (ulint i= 0; i < dict_index_get_n_ordering_defined_by_user(index);
+ i++)
+ if (dfield_is_null(dtuple_get_nth_field(prev_entry, i)))
+ goto next_rec;
+
+ if (prebuilt->autoinc_error == DB_SUCCESS)
+ prebuilt->autoinc_error= DB_DUPLICATE_KEY;
+ msg= "duplicate key in ";
+ goto not_ok;
+ }
+ }
+
+next_rec:
+ ut_ad(err == DB_SUCCESS);
+
+ {
+ mem_heap_t *tmp_heap= nullptr;
+
+ /* Empty the heap on each round. But preserve offsets[]
+ for the row_rec_to_index_entry() call, by copying them
+ into a separate memory heap when needed. */
+ if (UNIV_UNLIKELY(offsets != offsets_))
+ {
+ ulint size= rec_offs_get_n_alloc(offsets) * sizeof *offsets;
+ tmp_heap= mem_heap_create(size);
+ offsets= static_cast<rec_offs*>(mem_heap_dup(tmp_heap, offsets, size));
+ }
+
+ mem_heap_empty(heap);
+ prev_entry= row_rec_to_index_entry(rec, index, offsets, heap);
+
+ if (UNIV_LIKELY_NULL(tmp_heap))
+ mem_heap_free(tmp_heap);
+ }
+
+ if (btr_pcur_is_after_last_on_page(prebuilt->pcur))
+ goto next_page;
+
+ goto rec_loop;
}
/*******************************************************************//**
@@ -6014,8 +6896,9 @@ row_search_get_max_rec(
btr_pcur_t pcur;
const rec_t* rec;
/* Open at the high/right end (false), and init cursor */
- btr_pcur_open_at_index_side(
- false, index, BTR_SEARCH_LEAF, &pcur, true, 0, mtr);
+ if (pcur.open_leaf(false, index, BTR_SEARCH_LEAF, mtr) != DB_SUCCESS) {
+ return nullptr;
+ }
do {
const page_t* page;
@@ -6031,8 +6914,6 @@ row_search_get_max_rec(
btr_pcur_move_before_first_on_page(&pcur);
} while (btr_pcur_move_to_prev(&pcur, mtr));
- btr_pcur_close(&pcur);
-
ut_ad(!rec
|| !(rec_get_info_bits(rec, dict_table_is_comp(index->table))
& (REC_INFO_MIN_REC_FLAG | REC_INFO_DELETED_FLAG)));
diff --git a/storage/innobase/row/row0uins.cc b/storage/innobase/row/row0uins.cc
index 82c880a5920..50196e78092 100644
--- a/storage/innobase/row/row0uins.cc
+++ b/storage/innobase/row/row0uins.cc
@@ -1,7 +1,7 @@
/*****************************************************************************
Copyright (c) 1997, 2017, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2017, 2021, MariaDB Corporation.
+Copyright (c) 2017, 2023, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -35,7 +35,6 @@ Created 2/25/1997 Heikki Tuuri
#include "mach0data.h"
#include "row0undo.h"
#include "row0vers.h"
-#include "row0log.h"
#include "trx0trx.h"
#include "trx0rec.h"
#include "row0row.h"
@@ -44,6 +43,7 @@ Created 2/25/1997 Heikki Tuuri
#include "ibuf0ibuf.h"
#include "log0log.h"
#include "fil0fil.h"
+#include <mysql/service_thd_mdl.h>
/*************************************************************************
IMPORTANT NOTE: Any operation that generates redo MUST check that there
@@ -68,8 +68,18 @@ row_undo_ins_remove_clust_rec(
dberr_t err;
ulint n_tries = 0;
mtr_t mtr;
- dict_index_t* index = node->pcur.btr_cur.index;
- bool online;
+ dict_index_t* index = node->pcur.index();
+ table_id_t table_id = 0;
+ const bool dict_locked = node->trx->dict_operation_lock_mode;
+restart:
+ MDL_ticket* mdl_ticket = nullptr;
+ ut_ad(!table_id || dict_locked
+ || !node->trx->dict_operation_lock_mode);
+ dict_table_t *table = table_id
+ ? dict_table_open_on_id(table_id, dict_locked,
+ DICT_TABLE_OP_OPEN_ONLY_IF_CACHED,
+ node->trx->mysql_thd, &mdl_ticket)
+ : nullptr;
ut_ad(index->is_primary());
ut_ad(node->trx->in_rollback);
@@ -78,21 +88,10 @@ row_undo_ins_remove_clust_rec(
if (index->table->is_temporary()) {
ut_ad(node->rec_type == TRX_UNDO_INSERT_REC);
mtr.set_log_mode(MTR_LOG_NO_REDO);
- ut_ad(!dict_index_is_online_ddl(index));
ut_ad(index->table->id >= DICT_HDR_FIRST_ID);
- online = false;
} else {
index->set_modified(mtr);
ut_ad(lock_table_has_locks(index->table));
- online = dict_index_is_online_ddl(index);
- if (online) {
- ut_ad(node->rec_type == TRX_UNDO_INSERT_REC);
- ut_ad(node->trx->dict_operation_lock_mode
- != RW_X_LATCH);
- ut_ad(node->table->id != DICT_INDEXES_ID);
- ut_ad(node->table->id != DICT_COLUMNS_ID);
- mtr_s_lock_index(index, &mtr);
- }
}
/* This is similar to row_undo_mod_clust(). The DDL thread may
@@ -100,13 +99,11 @@ row_undo_ins_remove_clust_rec(
We must log the removal, so that the row will be correctly
purged. However, we can log the removal out of sync with the
B-tree modification. */
- ut_a(btr_pcur_restore_position(
- online ? BTR_MODIFY_LEAF | BTR_ALREADY_S_LATCHED
- : (node->rec_type == TRX_UNDO_INSERT_METADATA)
- ? BTR_MODIFY_TREE
- : BTR_MODIFY_LEAF,
- &node->pcur, &mtr) == btr_pcur_t::SAME_ALL);
-
+ ut_a(node->pcur.restore_position(
+ (node->rec_type == TRX_UNDO_INSERT_METADATA)
+ ? BTR_MODIFY_TREE
+ : BTR_MODIFY_LEAF,
+ &mtr) == btr_pcur_t::SAME_ALL);
rec_t* rec = btr_pcur_get_rec(&node->pcur);
ut_ad(rec_get_trx_id(rec, index) == node->trx->id
@@ -116,55 +113,88 @@ row_undo_ins_remove_clust_rec(
ut_ad(rec_is_metadata(rec, index->table->not_redundant())
== (node->rec_type == TRX_UNDO_INSERT_METADATA));
- if (online && dict_index_is_online_ddl(index)) {
- mem_heap_t* heap = NULL;
- const rec_offs* offsets = rec_get_offsets(
- rec, index, NULL, index->n_core_fields,
- ULINT_UNDEFINED, &heap);
- row_log_table_delete(rec, index, offsets, NULL);
- mem_heap_free(heap);
- } else {
- switch (node->table->id) {
- case DICT_INDEXES_ID:
- ut_ad(!online);
- ut_ad(node->trx->dict_operation_lock_mode
- == RW_X_LATCH);
- ut_ad(node->rec_type == TRX_UNDO_INSERT_REC);
-
- dict_drop_index_tree(&node->pcur, node->trx, &mtr);
- mtr.commit();
-
- mtr.start();
- ut_a(btr_pcur_restore_position(BTR_MODIFY_LEAF,
- &node->pcur, &mtr)== btr_pcur_t::SAME_ALL);
+ switch (node->table->id) {
+ case DICT_COLUMNS_ID:
+ /* This is rolling back an INSERT into SYS_COLUMNS.
+ If it was part of an instant ALTER TABLE operation, we
+ must evict the table definition, so that it can be
+ reloaded after the dictionary operation has been
+ completed. At this point, any corresponding operation
+ to the metadata record will have been rolled back. */
+ ut_ad(node->trx->dict_operation_lock_mode);
+ ut_ad(node->rec_type == TRX_UNDO_INSERT_REC);
+ if (rec_get_n_fields_old(rec)
+ != DICT_NUM_FIELDS__SYS_COLUMNS
+ || (rec_get_1byte_offs_flag(rec)
+ ? rec_1_get_field_end_info(rec, 0) != 8
+ : rec_2_get_field_end_info(rec, 0) != 8)) {
break;
- case DICT_COLUMNS_ID:
- /* This is rolling back an INSERT into SYS_COLUMNS.
- If it was part of an instant ALTER TABLE operation, we
- must evict the table definition, so that it can be
- reloaded after the dictionary operation has been
- completed. At this point, any corresponding operation
- to the metadata record will have been rolled back. */
- ut_ad(!online);
- ut_ad(node->trx->dict_operation_lock_mode
- == RW_X_LATCH);
- ut_ad(node->rec_type == TRX_UNDO_INSERT_REC);
- if (rec_get_n_fields_old(rec)
- != DICT_NUM_FIELDS__SYS_COLUMNS) {
- break;
+ }
+ static_assert(!DICT_FLD__SYS_COLUMNS__TABLE_ID, "");
+ node->trx->evict_table(mach_read_from_8(rec));
+ break;
+ case DICT_INDEXES_ID:
+ ut_ad(node->trx->dict_operation_lock_mode);
+ ut_ad(node->rec_type == TRX_UNDO_INSERT_REC);
+ if (!table_id) {
+ table_id = mach_read_from_8(rec);
+ if (table_id) {
+ mtr.commit();
+ goto restart;
}
- ulint len;
- const byte* data = rec_get_nth_field_old(
- rec, DICT_FLD__SYS_COLUMNS__TABLE_ID, &len);
- if (len != 8) {
- break;
+ ut_ad("corrupted SYS_INDEXES record" == 0);
+ }
+
+ pfs_os_file_t d = OS_FILE_CLOSED;
+
+ if (const uint32_t space_id = dict_drop_index_tree(
+ &node->pcur, node->trx, &mtr)) {
+ if (table) {
+ lock_release_on_rollback(node->trx,
+ table);
+ if (!dict_locked) {
+ dict_sys.lock(SRW_LOCK_CALL);
+ }
+ if (table->release()) {
+ dict_sys.remove(table);
+ } else if (table->space_id
+ == space_id) {
+ table->space = nullptr;
+ table->file_unreadable = true;
+ }
+ if (!dict_locked) {
+ dict_sys.unlock();
+ }
+ table = nullptr;
+ if (!mdl_ticket);
+ else if (MDL_context* mdl_context =
+ static_cast<MDL_context*>(
+ thd_mdl_context(
+ node->trx->
+ mysql_thd))) {
+ mdl_context->release_lock(
+ mdl_ticket);
+ mdl_ticket = nullptr;
+ }
}
- node->trx->evict_table(mach_read_from_8(data));
+
+ d = fil_delete_tablespace(space_id);
+ }
+
+ mtr.commit();
+
+ if (d != OS_FILE_CLOSED) {
+ os_file_close(d);
}
+
+ mtr.start();
+ ut_a(node->pcur.restore_position(
+ BTR_MODIFY_LEAF, &mtr) == btr_pcur_t::SAME_ALL);
}
- if (btr_cur_optimistic_delete(&node->pcur.btr_cur, 0, &mtr)) {
- err = DB_SUCCESS;
+ err = btr_cur_optimistic_delete(&node->pcur.btr_cur, 0, &mtr);
+
+ if (err != DB_FAIL) {
goto func_exit;
}
@@ -177,8 +207,8 @@ retry:
} else {
index->set_modified(mtr);
}
- ut_a(btr_pcur_restore_position(BTR_MODIFY_TREE | BTR_LATCH_FOR_DELETE,
- &node->pcur, &mtr) == btr_pcur_t::SAME_ALL);
+ ut_a(node->pcur.restore_position(BTR_PURGE_TREE, &mtr)
+ == btr_pcur_t::SAME_ALL);
btr_cur_pessimistic_delete(&err, FALSE, &node->pcur.btr_cur, 0, true,
&mtr);
@@ -194,7 +224,7 @@ retry:
n_tries++;
- os_thread_sleep(BTR_CUR_RETRY_SLEEP_TIME);
+ std::this_thread::sleep_for(BTR_CUR_RETRY_SLEEP_TIME);
goto retry;
}
@@ -207,6 +237,12 @@ func_exit:
}
btr_pcur_commit_specify_mtr(&node->pcur, &mtr);
+
+ if (UNIV_LIKELY_NULL(table)) {
+ dict_table_close(table, dict_locked,
+ node->trx->mysql_thd, mdl_ticket);
+ }
+
return(err);
}
@@ -217,7 +253,7 @@ static MY_ATTRIBUTE((nonnull, warn_unused_result))
dberr_t
row_undo_ins_remove_sec_low(
/*========================*/
- ulint mode, /*!< in: BTR_MODIFY_LEAF or BTR_MODIFY_TREE,
+ btr_latch_mode mode, /*!< in: BTR_MODIFY_LEAF or BTR_MODIFY_TREE,
depending on whether we wish optimistic or
pessimistic descent down the index tree */
dict_index_t* index, /*!< in: index */
@@ -229,29 +265,38 @@ row_undo_ins_remove_sec_low(
mtr_t mtr;
const bool modify_leaf = mode == BTR_MODIFY_LEAF;
+ pcur.btr_cur.page_cur.index = index;
row_mtr_start(&mtr, index, !modify_leaf);
- if (modify_leaf) {
- mode = BTR_MODIFY_LEAF | BTR_ALREADY_S_LATCHED;
- mtr_s_lock_index(index, &mtr);
- } else {
- ut_ad(mode == (BTR_MODIFY_TREE | BTR_LATCH_FOR_DELETE));
- mtr_sx_lock_index(index, &mtr);
- }
-
- if (row_log_online_op_try(index, entry, 0)) {
- goto func_exit_no_pcur;
- }
+ if (index->is_spatial()) {
+ mode = modify_leaf
+ ? btr_latch_mode(BTR_MODIFY_LEAF
+ | BTR_RTREE_DELETE_MARK
+ | BTR_RTREE_UNDO_INS)
+ : btr_latch_mode(BTR_PURGE_TREE | BTR_RTREE_UNDO_INS);
+ btr_pcur_get_btr_cur(&pcur)->thr = thr;
+ if (rtr_search(entry, mode, &pcur, &mtr)) {
+ goto func_exit;
+ }
- if (dict_index_is_spatial(index)) {
- if (modify_leaf) {
- mode |= BTR_RTREE_DELETE_MARK;
+ if (rec_get_deleted_flag(
+ btr_pcur_get_rec(&pcur),
+ dict_table_is_comp(index->table))) {
+ ib::error() << "Record found in index " << index->name
+ << " is deleted marked on insert rollback.";
+ ut_ad(0);
}
- btr_pcur_get_btr_cur(&pcur)->thr = thr;
- mode |= BTR_RTREE_UNDO_INS;
+ goto found;
+ } else if (modify_leaf) {
+ mode = BTR_MODIFY_LEAF_ALREADY_LATCHED;
+ mtr_s_lock_index(index, &mtr);
+ } else {
+ ut_ad(mode == BTR_PURGE_TREE);
+ mode = BTR_PURGE_TREE_ALREADY_LATCHED;
+ mtr_x_lock_index(index, &mtr);
}
- switch (row_search_index_entry(index, entry, mode, &pcur, &mtr)) {
+ switch (row_search_index_entry(entry, mode, &pcur, &mtr)) {
case ROW_BUFFERED:
case ROW_NOT_DELETED_REF:
/* These are invalid outcomes, because the mode passed
@@ -261,20 +306,11 @@ row_undo_ins_remove_sec_low(
case ROW_NOT_FOUND:
break;
case ROW_FOUND:
- if (dict_index_is_spatial(index)
- && rec_get_deleted_flag(
- btr_pcur_get_rec(&pcur),
- dict_table_is_comp(index->table))) {
- ib::error() << "Record found in index " << index->name
- << " is deleted marked on insert rollback.";
- ut_ad(0);
- }
-
+ found:
btr_cur_t* btr_cur = btr_pcur_get_btr_cur(&pcur);
if (modify_leaf) {
- err = btr_cur_optimistic_delete(btr_cur, 0, &mtr)
- ? DB_SUCCESS : DB_FAIL;
+ err = btr_cur_optimistic_delete(btr_cur, 0, &mtr);
} else {
/* Passing rollback=false here, because we are
deleting a secondary index record: the distinction
@@ -285,8 +321,8 @@ row_undo_ins_remove_sec_low(
}
}
+func_exit:
btr_pcur_close(&pcur);
-func_exit_no_pcur:
mtr_commit(&mtr);
return(err);
@@ -318,9 +354,7 @@ row_undo_ins_remove_sec(
/* Try then pessimistic descent to the B-tree */
retry:
- err = row_undo_ins_remove_sec_low(
- BTR_MODIFY_TREE | BTR_LATCH_FOR_DELETE,
- index, entry, thr);
+ err = row_undo_ins_remove_sec_low(BTR_PURGE_TREE, index, entry, thr);
/* The delete operation may fail if we have little
file space left: TODO: easiest to crash the database
@@ -330,7 +364,7 @@ retry:
n_tries++;
- os_thread_sleep(BTR_CUR_RETRY_SLEEP_TIME);
+ std::this_thread::sleep_for(BTR_CUR_RETRY_SLEEP_TIME);
goto retry;
}
@@ -344,7 +378,7 @@ retry:
static bool row_undo_ins_parse_undo_rec(undo_node_t* node, bool dict_locked)
{
dict_index_t* clust_index;
- byte* ptr;
+ const byte* ptr;
undo_no_t undo_no;
table_id_t table_id;
ulint dummy;
@@ -363,11 +397,11 @@ static bool row_undo_ins_parse_undo_rec(undo_node_t* node, bool dict_locked)
node->table = dict_table_open_on_id(table_id, dict_locked,
DICT_TABLE_OP_NORMAL);
} else if (!dict_locked) {
- mutex_enter(&dict_sys.mutex);
- node->table = dict_sys.get_temporary_table(table_id);
- mutex_exit(&dict_sys.mutex);
+ dict_sys.freeze(SRW_LOCK_CALL);
+ node->table = dict_sys.acquire_temporary_table(table_id);
+ dict_sys.unfreeze();
} else {
- node->table = dict_sys.get_temporary_table(table_id);
+ node->table = dict_sys.acquire_temporary_table(table_id);
}
if (!node->table) {
@@ -380,19 +414,26 @@ static bool row_undo_ins_parse_undo_rec(undo_node_t* node, bool dict_locked)
goto close_table;
case TRX_UNDO_INSERT_METADATA:
case TRX_UNDO_INSERT_REC:
+ case TRX_UNDO_EMPTY:
break;
case TRX_UNDO_RENAME_TABLE:
dict_table_t* table = node->table;
ut_ad(!table->is_temporary());
- ut_ad(dict_table_is_file_per_table(table)
+ ut_ad(table->file_unreadable
+ || dict_table_is_file_per_table(table)
== !is_system_tablespace(table->space_id));
size_t len = mach_read_from_2(node->undo_rec)
+ size_t(node->undo_rec - ptr) - 2;
- ptr[len] = 0;
- const char* name = reinterpret_cast<char*>(ptr);
- if (strcmp(table->name.m_name, name)) {
- dict_table_rename_in_cache(table, name, false,
- table_id != 0);
+ const span<const char> name(reinterpret_cast<const char*>(ptr),
+ len);
+ if (strlen(table->name.m_name) != len
+ || memcmp(table->name.m_name, ptr, len)) {
+ dict_table_rename_in_cache(table, name, true);
+ } else if (table->space && table->space->id) {
+ const auto s = table->space->name();
+ if (len != s.size() || memcmp(ptr, s.data(), len)) {
+ table->rename_tablespace(name, true);
+ }
}
goto close_table;
}
@@ -408,7 +449,7 @@ close_table:
would probably be better to just drop all temporary
tables (and temporary undo log records) of the current
connection, instead of doing this rollback. */
- dict_table_close(node->table, dict_locked, FALSE);
+ dict_table_close(node->table, dict_locked);
node->table = NULL;
return false;
} else {
@@ -416,11 +457,16 @@ close_table:
clust_index = dict_table_get_first_index(node->table);
if (clust_index != NULL) {
- if (node->rec_type == TRX_UNDO_INSERT_REC) {
+ switch (node->rec_type) {
+ case TRX_UNDO_INSERT_REC:
ptr = trx_undo_rec_get_row_ref(
ptr, clust_index, &node->ref,
node->heap);
- } else {
+ break;
+ case TRX_UNDO_EMPTY:
+ node->ref = nullptr;
+ return true;
+ default:
node->ref = &trx_undo_metadata;
if (!row_undo_search_clust_to_pcur(node)) {
/* An error probably occurred during
@@ -464,16 +510,15 @@ row_undo_ins_remove_sec_rec(
que_thr_t* thr) /*!< in: query thread */
{
dberr_t err = DB_SUCCESS;
- dict_index_t* index = node->index;
+ dict_index_t* index;
mem_heap_t* heap;
heap = mem_heap_create(1024);
- while (index != NULL) {
- dtuple_t* entry;
-
- if (index->type & DICT_FTS) {
- dict_table_next_uncorrupted_index(index);
+ for (index = node->index; index;
+ index = dict_table_get_next_index(index)) {
+ if (index->type & (DICT_FTS | DICT_CORRUPT)
+ || !index->is_committed()) {
continue;
}
@@ -481,7 +526,7 @@ row_undo_ins_remove_sec_rec(
always contain all fields of the index. It does not
matter if any indexes were created afterwards; all
index entries can be reconstructed from the row. */
- entry = row_build_index_entry(
+ dtuple_t* entry = row_build_index_entry(
node->row, node->ext, index, heap);
if (UNIV_UNLIKELY(!entry)) {
/* The database must have crashed after
@@ -504,7 +549,6 @@ row_undo_ins_remove_sec_rec(
}
mem_heap_empty(heap);
- dict_table_next_uncorrupted_index(index);
}
func_exit:
@@ -527,12 +571,15 @@ row_undo_ins(
que_thr_t* thr) /*!< in: query thread */
{
dberr_t err;
- bool dict_locked = node->trx->dict_operation_lock_mode == RW_X_LATCH;
+ const bool dict_locked = node->trx->dict_operation_lock_mode;
if (!row_undo_ins_parse_undo_rec(node, dict_locked)) {
return DB_SUCCESS;
}
+ ut_ad(node->table->is_temporary()
+ || lock_table_has_locks(node->table));
+
/* Iterate over all the indexes and undo the insert.*/
node->index = dict_table_get_first_index(node->table);
@@ -546,8 +593,6 @@ row_undo_ins(
/* Skip the clustered index (the first index) */
node->index = dict_table_get_next_index(node->index);
- dict_table_skip_corrupt_index(node->index);
-
err = row_undo_ins_remove_sec_rec(node, thr);
if (err != DB_SUCCESS) {
@@ -556,21 +601,19 @@ row_undo_ins(
log_free_check();
- if (node->table->id == DICT_INDEXES_ID) {
- ut_ad(!node->table->is_temporary());
- if (!dict_locked) {
- mutex_enter(&dict_sys.mutex);
- }
+ if (!dict_locked && node->table->id == DICT_INDEXES_ID) {
+ dict_sys.lock(SRW_LOCK_CALL);
err = row_undo_ins_remove_clust_rec(node);
- if (!dict_locked) {
- mutex_exit(&dict_sys.mutex);
- }
+ dict_sys.unlock();
} else {
+ ut_ad(node->table->id != DICT_INDEXES_ID
+ || !node->table->is_temporary());
err = row_undo_ins_remove_clust_rec(node);
}
if (err == DB_SUCCESS && node->table->stat_initialized) {
- /* Not protected by dict_sys.mutex for
+ /* Not protected by dict_sys.latch
+ or table->stats_mutex_lock() for
performance reasons, we would rather get garbage
in stat_n_rows (which is just an estimate anyway)
than protecting the following code with a latch. */
@@ -579,7 +622,7 @@ row_undo_ins(
/* Do not attempt to update statistics when
executing ROLLBACK in the InnoDB SQL
interpreter, because in that case we would
- already be holding dict_sys.mutex, which
+ already be holding dict_sys.latch, which
would be acquired when updating statistics. */
if (!dict_locked) {
dict_stats_update_if_needed(node->table,
@@ -592,9 +635,13 @@ row_undo_ins(
log_free_check();
ut_ad(!node->table->is_temporary());
err = row_undo_ins_remove_clust_rec(node);
+ break;
+ case TRX_UNDO_EMPTY:
+ err = node->table->clear(thr);
+ break;
}
- dict_table_close(node->table, dict_locked, FALSE);
+ dict_table_close(node->table, dict_locked);
node->table = NULL;
diff --git a/storage/innobase/row/row0umod.cc b/storage/innobase/row/row0umod.cc
index bea2baa3cd6..50e15e03cc9 100644
--- a/storage/innobase/row/row0umod.cc
+++ b/storage/innobase/row/row0umod.cc
@@ -1,7 +1,7 @@
/*****************************************************************************
Copyright (c) 1997, 2017, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2017, 2021, MariaDB Corporation.
+Copyright (c) 2017, 2023, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -36,7 +36,6 @@ Created 2/27/1997 Heikki Tuuri
#include "ibuf0ibuf.h"
#include "row0undo.h"
#include "row0vers.h"
-#include "row0log.h"
#include "trx0trx.h"
#include "trx0rec.h"
#include "row0row.h"
@@ -80,17 +79,12 @@ row_undo_mod_clust_low(
mem_heap_t** offsets_heap,
/*!< in/out: memory heap that can be emptied */
mem_heap_t* heap, /*!< in/out: memory heap */
- const dtuple_t**rebuilt_old_pk,
- /*!< out: row_log_table_get_pk()
- before the update, or NULL if
- the table is not being rebuilt online or
- the PRIMARY KEY definition does not change */
byte* sys, /*!< out: DB_TRX_ID, DB_ROLL_PTR
for row_log_table_delete() */
que_thr_t* thr, /*!< in: query thread */
mtr_t* mtr, /*!< in: mtr; must be committed before
latching any further pages */
- ulint mode) /*!< in: BTR_MODIFY_LEAF or BTR_MODIFY_TREE */
+ btr_latch_mode mode) /*!< in: BTR_MODIFY_LEAF or BTR_MODIFY_TREE */
{
btr_pcur_t* pcur;
btr_cur_t* btr_cur;
@@ -99,10 +93,10 @@ row_undo_mod_clust_low(
pcur = &node->pcur;
btr_cur = btr_pcur_get_btr_cur(pcur);
- ut_d(auto pcur_restore_result =)
- btr_pcur_restore_position(mode, pcur, mtr);
+ if (pcur->restore_position(mode, mtr) != btr_pcur_t::SAME_ALL) {
+ return DB_CORRUPTION;
+ }
- ut_ad(pcur_restore_result == btr_pcur_t::SAME_ALL);
ut_ad(rec_get_trx_id(btr_cur_get_rec(btr_cur),
btr_cur_get_index(btr_cur))
== thr_get_trx(thr)->id
@@ -111,18 +105,9 @@ row_undo_mod_clust_low(
|| node->update->info_bits == REC_INFO_METADATA_ADD
|| node->update->info_bits == REC_INFO_METADATA_ALTER);
- if (mode != BTR_MODIFY_LEAF
- && dict_index_is_online_ddl(btr_cur_get_index(btr_cur))) {
- *rebuilt_old_pk = row_log_table_get_pk(
- btr_cur_get_rec(btr_cur),
- btr_cur_get_index(btr_cur), NULL, sys, &heap);
- } else {
- *rebuilt_old_pk = NULL;
- }
-
if (mode != BTR_MODIFY_TREE) {
- ut_ad((mode & ulint(~BTR_ALREADY_S_LATCHED))
- == BTR_MODIFY_LEAF);
+ ut_ad(mode == BTR_MODIFY_LEAF
+ || mode == BTR_MODIFY_LEAF_ALREADY_LATCHED);
err = btr_cur_optimistic_update(
BTR_NO_LOCKING_FLAG | BTR_NO_UNDO_LOG_FLAG
@@ -148,26 +133,57 @@ row_undo_mod_clust_low(
&& node->ref == &trx_undo_metadata
&& btr_cur_get_index(btr_cur)->table->instant
&& node->update->info_bits == REC_INFO_METADATA_ADD) {
- btr_reset_instant(*btr_cur_get_index(btr_cur), false,
- mtr);
+ btr_reset_instant(*btr_cur->index(), false, mtr);
}
}
- if (err == DB_SUCCESS
- && btr_cur_get_index(btr_cur)->table->id == DICT_COLUMNS_ID) {
+ if (err != DB_SUCCESS) {
+ return err;
+ }
+
+ switch (const auto id = btr_cur_get_index(btr_cur)->table->id) {
+ unsigned c;
+ case DICT_TABLES_ID:
+ if (node->trx != trx_roll_crash_recv_trx) {
+ break;
+ }
+ c = DICT_COL__SYS_TABLES__ID;
+ goto evict;
+ case DICT_INDEXES_ID:
+ if (node->trx != trx_roll_crash_recv_trx) {
+ break;
+ } else if (node->rec_type == TRX_UNDO_DEL_MARK_REC
+ && btr_cur_get_rec(btr_cur)
+ [8 + 8 + DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN]
+ == static_cast<byte>(*TEMP_INDEX_PREFIX_STR)) {
+ /* We are rolling back the DELETE of metadata
+ for a failed ADD INDEX operation. This does
+ not affect any cached table definition,
+ because we are filtering out such indexes in
+ dict_load_indexes(). */
+ break;
+ }
+ /* fall through */
+ case DICT_COLUMNS_ID:
+ static_assert(!DICT_COL__SYS_INDEXES__TABLE_ID, "");
+ static_assert(!DICT_COL__SYS_COLUMNS__TABLE_ID, "");
+ c = DICT_COL__SYS_COLUMNS__TABLE_ID;
/* This is rolling back an UPDATE or DELETE on SYS_COLUMNS.
If it was part of an instant ALTER TABLE operation, we
must evict the table definition, so that it can be
reloaded after the dictionary operation has been
completed. At this point, any corresponding operation
to the metadata record will have been rolled back. */
- const dfield_t& table_id = *dtuple_get_nth_field(node->row, 0);
+ evict:
+ const dfield_t& table_id = *dtuple_get_nth_field(node->row, c);
ut_ad(dfield_get_len(&table_id) == 8);
- node->trx->evict_table(mach_read_from_8(static_cast<byte*>(
- table_id.data)));
+ node->trx->evict_table(mach_read_from_8(
+ static_cast<byte*>(
+ table_id.data)),
+ id == DICT_COLUMNS_ID);
}
- return(err);
+ return DB_SUCCESS;
}
/** Get the byte offset of the DB_TRX_ID column
@@ -199,28 +215,23 @@ static ulint row_trx_id_offset(const rec_t* rec, const dict_index_t* index)
}
/** Determine if rollback must execute a purge-like operation.
-@param[in,out] node row undo
-@param[in,out] mtr mini-transaction
+@param node row undo
@return whether the record should be purged */
-static bool row_undo_mod_must_purge(undo_node_t* node, mtr_t* mtr)
+static bool row_undo_mod_must_purge(const undo_node_t &node)
{
- ut_ad(node->rec_type == TRX_UNDO_UPD_DEL_REC);
- ut_ad(!node->table->is_temporary());
-
- btr_cur_t* btr_cur = btr_pcur_get_btr_cur(&node->pcur);
- ut_ad(btr_cur->index->is_primary());
- DEBUG_SYNC_C("rollback_purge_clust");
+ ut_ad(node.rec_type == TRX_UNDO_UPD_DEL_REC);
+ ut_ad(!node.table->is_temporary());
- mtr->s_lock(&purge_sys.latch, __FILE__, __LINE__);
-
- if (!purge_sys.changes_visible(node->new_trx_id, node->table->name)) {
- return false;
- }
+ const btr_cur_t &btr_cur= node.pcur.btr_cur;
+ ut_ad(btr_cur.index()->is_primary());
+ DEBUG_SYNC_C("rollback_purge_clust");
- const rec_t* rec = btr_cur_get_rec(btr_cur);
+ if (!purge_sys.is_purgeable(node.new_trx_id))
+ return false;
- return trx_read_trx_id(rec + row_trx_id_offset(rec, btr_cur->index))
- == node->new_trx_id;
+ const rec_t *rec= btr_cur_get_rec(&btr_cur);
+ return trx_read_trx_id(rec + row_trx_id_offset(rec, btr_cur.index())) ==
+ node.new_trx_id;
}
/***********************************************************//**
@@ -238,13 +249,9 @@ row_undo_mod_clust(
mtr_t mtr;
dberr_t err;
dict_index_t* index;
- bool online;
ut_ad(thr_get_trx(thr) == node->trx);
- ut_ad(node->trx->dict_operation_lock_mode);
ut_ad(node->trx->in_rollback);
- ut_ad(rw_lock_own_flagged(&dict_sys.latch,
- RW_LOCK_FLAG_X | RW_LOCK_FLAG_S));
log_free_check();
pcur = &node->pcur;
@@ -259,26 +266,16 @@ row_undo_mod_clust(
ut_ad(lock_table_has_locks(index->table));
}
- online = dict_index_is_online_ddl(index);
- if (online) {
- ut_ad(node->trx->dict_operation_lock_mode != RW_X_LATCH);
- mtr_s_lock_index(index, &mtr);
- }
-
mem_heap_t* heap = mem_heap_create(1024);
mem_heap_t* offsets_heap = NULL;
rec_offs* offsets = NULL;
- const dtuple_t* rebuilt_old_pk;
byte sys[DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN];
/* Try optimistic processing of the record, keeping changes within
the index page */
err = row_undo_mod_clust_low(node, &offsets, &offsets_heap,
- heap, &rebuilt_old_pk, sys,
- thr, &mtr, online
- ? BTR_MODIFY_LEAF | BTR_ALREADY_S_LATCHED
- : BTR_MODIFY_LEAF);
+ heap, sys, thr, &mtr, BTR_MODIFY_LEAF);
if (err != DB_SUCCESS) {
btr_pcur_commit_specify_mtr(pcur, &mtr);
@@ -293,44 +290,12 @@ row_undo_mod_clust(
index->set_modified(mtr);
}
- err = row_undo_mod_clust_low(
- node, &offsets, &offsets_heap,
- heap, &rebuilt_old_pk, sys,
- thr, &mtr, BTR_MODIFY_TREE);
+ err = row_undo_mod_clust_low(node, &offsets, &offsets_heap,
+ heap, sys, thr, &mtr,
+ BTR_MODIFY_TREE);
ut_ad(err == DB_SUCCESS || err == DB_OUT_OF_FILE_SPACE);
}
- /* Online rebuild cannot be initiated while we are holding
- dict_sys.latch and index->lock. (It can be aborted.) */
- ut_ad(online || !dict_index_is_online_ddl(index));
-
- if (err == DB_SUCCESS && online) {
-
- ut_ad(rw_lock_own_flagged(
- &index->lock,
- RW_LOCK_FLAG_S | RW_LOCK_FLAG_X
- | RW_LOCK_FLAG_SX));
-
- switch (node->rec_type) {
- case TRX_UNDO_DEL_MARK_REC:
- row_log_table_insert(
- btr_pcur_get_rec(pcur), index, offsets);
- break;
- case TRX_UNDO_UPD_EXIST_REC:
- row_log_table_update(
- btr_pcur_get_rec(pcur), index, offsets,
- rebuilt_old_pk);
- break;
- case TRX_UNDO_UPD_DEL_REC:
- row_log_table_delete(
- btr_pcur_get_rec(pcur), index, offsets, sys);
- break;
- default:
- ut_ad(0);
- break;
- }
- }
-
/**
* when scrubbing, and records gets cleared,
* the transaction id is not present afterwards.
@@ -358,47 +323,55 @@ row_undo_mod_clust(
ut_ad(node->new_trx_id);
mtr.start();
- if (btr_pcur_restore_position(BTR_MODIFY_LEAF, pcur, &mtr) !=
+ if (pcur->restore_position(BTR_MODIFY_LEAF, &mtr) !=
btr_pcur_t::SAME_ALL) {
goto mtr_commit_exit;
}
+ ut_ad(rec_get_deleted_flag(btr_pcur_get_rec(pcur),
+ dict_table_is_comp(node->table)));
+
if (index->table->is_temporary()) {
mtr.set_log_mode(MTR_LOG_NO_REDO);
- } else {
- if (!row_undo_mod_must_purge(node, &mtr)) {
+ err = btr_cur_optimistic_delete(&pcur->btr_cur, 0,
+ &mtr);
+ if (err != DB_FAIL) {
goto mtr_commit_exit;
}
+ err = DB_SUCCESS;
+ btr_pcur_commit_specify_mtr(pcur, &mtr);
+ } else {
index->set_modified(mtr);
+ if (!row_undo_mod_must_purge(*node)) {
+ goto mtr_commit_exit;
+ }
+ err = btr_cur_optimistic_delete(&pcur->btr_cur, 0,
+ &mtr);
+ if (err != DB_FAIL) {
+ goto mtr_commit_exit;
+ }
+ err = DB_SUCCESS;
+ btr_pcur_commit_specify_mtr(pcur, &mtr);
}
- ut_ad(rec_get_deleted_flag(btr_pcur_get_rec(pcur),
- dict_table_is_comp(node->table)));
- if (btr_cur_optimistic_delete(&pcur->btr_cur, 0, &mtr)) {
- goto mtr_commit_exit;
- }
-
- btr_pcur_commit_specify_mtr(pcur, &mtr);
-
mtr.start();
- if (btr_pcur_restore_position(
- BTR_MODIFY_TREE | BTR_LATCH_FOR_DELETE,
- pcur, &mtr) != btr_pcur_t::SAME_ALL) {
+ if (pcur->restore_position(BTR_PURGE_TREE, &mtr) !=
+ btr_pcur_t::SAME_ALL) {
goto mtr_commit_exit;
}
+ ut_ad(rec_get_deleted_flag(btr_pcur_get_rec(pcur),
+ dict_table_is_comp(node->table)));
+
if (index->table->is_temporary()) {
mtr.set_log_mode(MTR_LOG_NO_REDO);
} else {
- if (!row_undo_mod_must_purge(node, &mtr)) {
+ if (!row_undo_mod_must_purge(*node)) {
goto mtr_commit_exit;
}
index->set_modified(mtr);
}
- ut_ad(rec_get_deleted_flag(btr_pcur_get_rec(pcur),
- dict_table_is_comp(node->table)));
-
/* This operation is analogous to purge, we can free
also inherited externally stored fields. We can also
assume that the record was complete (including BLOBs),
@@ -407,25 +380,20 @@ row_undo_mod_clust(
rollback=false, just like purge does. */
btr_cur_pessimistic_delete(&err, FALSE, &pcur->btr_cur, 0,
false, &mtr);
- ut_ad(err == DB_SUCCESS
- || err == DB_OUT_OF_FILE_SPACE);
+ ut_ad(err == DB_SUCCESS || err == DB_OUT_OF_FILE_SPACE);
} else if (!index->table->is_temporary() && node->new_trx_id) {
/* We rolled back a record so that it still exists.
We must reset the DB_TRX_ID if the history is no
longer accessible by any active read view. */
mtr.start();
- if (btr_pcur_restore_position(BTR_MODIFY_LEAF, pcur, &mtr)
- != btr_pcur_t::SAME_ALL) {
- goto mtr_commit_exit;
- }
- rec_t* rec = btr_pcur_get_rec(pcur);
- mtr.s_lock(&purge_sys.latch, __FILE__, __LINE__);
- if (!purge_sys.changes_visible(node->new_trx_id,
- node->table->name)) {
+ if (pcur->restore_position(BTR_MODIFY_LEAF, &mtr)
+ != btr_pcur_t::SAME_ALL
+ || !purge_sys.is_purgeable(node->new_trx_id)) {
goto mtr_commit_exit;
}
+ rec_t* rec = btr_pcur_get_rec(pcur);
ulint trx_id_offset = index->trx_id_offset;
ulint trx_id_pos = index->n_uniq ? index->n_uniq : 1;
/* Reserve enough offsets for the PRIMARY KEY and
@@ -481,7 +449,7 @@ row_undo_mod_clust(
mtr.memset(block, offs, DATA_TRX_ID_LEN, 0);
offs += DATA_TRX_ID_LEN;
mtr.write<1,mtr_t::MAYBE_NOP>(*block,
- block->frame
+ block->page.frame
+ offs, 0x80U);
mtr.memset(block, offs + 1,
DATA_ROLL_PTR_LEN - 1, 0);
@@ -513,7 +481,7 @@ row_undo_mod_del_mark_or_remove_sec_low(
que_thr_t* thr, /*!< in: query thread */
dict_index_t* index, /*!< in: index */
dtuple_t* entry, /*!< in: index entry */
- ulint mode) /*!< in: latch mode BTR_MODIFY_LEAF or
+ btr_latch_mode mode) /*!< in: latch mode BTR_MODIFY_LEAF or
BTR_MODIFY_TREE */
{
btr_pcur_t pcur;
@@ -521,25 +489,36 @@ row_undo_mod_del_mark_or_remove_sec_low(
dberr_t err = DB_SUCCESS;
mtr_t mtr;
mtr_t mtr_vers;
- row_search_result search_result;
const bool modify_leaf = mode == BTR_MODIFY_LEAF;
row_mtr_start(&mtr, index, !modify_leaf);
- if (!index->is_committed()) {
+ pcur.btr_cur.page_cur.index = index;
+ btr_cur = btr_pcur_get_btr_cur(&pcur);
+
+ if (index->is_spatial()) {
+ mode = modify_leaf
+ ? btr_latch_mode(BTR_MODIFY_LEAF
+ | BTR_RTREE_DELETE_MARK
+ | BTR_RTREE_UNDO_INS)
+ : btr_latch_mode(BTR_PURGE_TREE | BTR_RTREE_UNDO_INS);
+ btr_cur->thr = thr;
+ if (UNIV_LIKELY(!rtr_search(entry, mode, &pcur, &mtr))) {
+ goto found;
+ } else {
+ goto func_exit;
+ }
+ } else if (!index->is_committed()) {
/* The index->online_status may change if the index is
or was being created online, but not committed yet. It
is protected by index->lock. */
if (modify_leaf) {
- mode = BTR_MODIFY_LEAF | BTR_ALREADY_S_LATCHED;
+ mode = BTR_MODIFY_LEAF_ALREADY_LATCHED;
mtr_s_lock_index(index, &mtr);
} else {
- ut_ad(mode == (BTR_MODIFY_TREE | BTR_LATCH_FOR_DELETE));
- mtr_sx_lock_index(index, &mtr);
- }
-
- if (row_log_online_op_try(index, entry, 0)) {
- goto func_exit_no_pcur;
+ ut_ad(mode == BTR_PURGE_TREE);
+ mode = BTR_PURGE_TREE_ALREADY_LATCHED;
+ mtr_x_lock_index(index, &mtr);
}
} else {
/* For secondary indexes,
@@ -548,20 +527,8 @@ row_undo_mod_del_mark_or_remove_sec_low(
ut_ad(!dict_index_is_online_ddl(index));
}
- btr_cur = btr_pcur_get_btr_cur(&pcur);
-
- if (dict_index_is_spatial(index)) {
- if (modify_leaf) {
- btr_cur->thr = thr;
- mode |= BTR_RTREE_DELETE_MARK;
- }
- mode |= BTR_RTREE_UNDO_INS;
- }
-
- search_result = row_search_index_entry(index, entry, mode,
- &pcur, &mtr);
-
- switch (UNIV_EXPECT(search_result, ROW_FOUND)) {
+ switch (UNIV_EXPECT(row_search_index_entry(entry, mode, &pcur, &mtr),
+ ROW_FOUND)) {
case ROW_NOT_FOUND:
/* In crash recovery, the secondary index record may
be missing if the UPDATE did not have time to insert
@@ -583,14 +550,15 @@ row_undo_mod_del_mark_or_remove_sec_low(
ut_error;
}
+found:
/* We should remove the index record if no prior version of the row,
which cannot be purged yet, requires its existence. If some requires,
we should delete mark the record. */
mtr_vers.start();
- ut_a(btr_pcur_restore_position(BTR_SEARCH_LEAF, &node->pcur, &mtr_vers)
- == btr_pcur_t::SAME_ALL);
+ ut_a(node->pcur.restore_position(BTR_SEARCH_LEAF, &mtr_vers) ==
+ btr_pcur_t::SAME_ALL);
/* For temporary table, we can skip to check older version of
clustered index entry, because there is no MVCC or purge. */
@@ -615,8 +583,7 @@ row_undo_mod_del_mark_or_remove_sec_low(
}
if (modify_leaf) {
- err = btr_cur_optimistic_delete(btr_cur, 0, &mtr)
- ? DB_SUCCESS : DB_FAIL;
+ err = btr_cur_optimistic_delete(btr_cur, 0, &mtr);
} else {
/* Passing rollback=false,
because we are deleting a secondary index record:
@@ -636,7 +603,6 @@ row_undo_mod_del_mark_or_remove_sec_low(
func_exit:
btr_pcur_close(&pcur);
-func_exit_no_pcur:
mtr_commit(&mtr);
return(err);
@@ -670,7 +636,7 @@ row_undo_mod_del_mark_or_remove_sec(
}
err = row_undo_mod_del_mark_or_remove_sec_low(node, thr, index,
- entry, BTR_MODIFY_TREE | BTR_LATCH_FOR_DELETE);
+ entry, BTR_PURGE_TREE);
return(err);
}
@@ -688,7 +654,7 @@ static MY_ATTRIBUTE((nonnull, warn_unused_result))
dberr_t
row_undo_mod_del_unmark_sec_and_undo_update(
/*========================================*/
- ulint mode, /*!< in: search mode: BTR_MODIFY_LEAF or
+ btr_latch_mode mode, /*!< in: search mode: BTR_MODIFY_LEAF or
BTR_MODIFY_TREE */
que_thr_t* thr, /*!< in: query thread */
dict_index_t* index, /*!< in: index */
@@ -703,51 +669,42 @@ row_undo_mod_del_unmark_sec_and_undo_update(
trx_t* trx = thr_get_trx(thr);
const ulint flags
= BTR_KEEP_SYS_FLAG | BTR_NO_LOCKING_FLAG;
- row_search_result search_result;
- ulint orig_mode = mode;
+ const auto orig_mode = mode;
+ pcur.btr_cur.page_cur.index = index;
ut_ad(trx->id != 0);
- if (dict_index_is_spatial(index)) {
+ if (index->is_spatial()) {
/* FIXME: Currently we do a 2-pass search for the undo
due to avoid undel-mark a wrong rec in rolling back in
partial update. Later, we could log some info in
secondary index updates to avoid this. */
- ut_ad(mode & BTR_MODIFY_LEAF);
- mode |= BTR_RTREE_DELETE_MARK;
+ static_assert(BTR_MODIFY_TREE == (8 | BTR_MODIFY_LEAF), "");
+ ut_ad(!(mode & 8));
+ mode = btr_latch_mode(mode | BTR_RTREE_DELETE_MARK);
}
try_again:
- row_mtr_start(&mtr, index, !(mode & BTR_MODIFY_LEAF));
+ row_mtr_start(&mtr, index, mode & 8);
- if (!index->is_committed()) {
- /* The index->online_status may change if the index is
- or was being created online, but not committed yet. It
- is protected by index->lock. */
- if (mode == BTR_MODIFY_LEAF) {
- mode = BTR_MODIFY_LEAF | BTR_ALREADY_S_LATCHED;
- mtr_s_lock_index(index, &mtr);
- } else {
- ut_ad(mode == BTR_MODIFY_TREE);
- mtr_sx_lock_index(index, &mtr);
- }
+ btr_cur->thr = thr;
- if (row_log_online_op_try(index, entry, trx->id)) {
- goto func_exit_no_pcur;
+ if (index->is_spatial()) {
+ if (!rtr_search(entry, mode, &pcur, &mtr)) {
+ goto found;
}
- } else {
- /* For secondary indexes,
- index->online_status==ONLINE_INDEX_COMPLETE if
- index->is_committed(). */
- ut_ad(!dict_index_is_online_ddl(index));
- }
- btr_cur->thr = thr;
+ if (mode != orig_mode && btr_cur->rtr_info->fd_del) {
+ mode = orig_mode;
+ btr_pcur_close(&pcur);
+ mtr.commit();
+ goto try_again;
+ }
- search_result = row_search_index_entry(index, entry, mode,
- &pcur, &mtr);
+ goto not_found;
+ }
- switch (search_result) {
+ switch (row_search_index_entry(entry, mode, &pcur, &mtr)) {
mem_heap_t* heap;
mem_heap_t* offsets_heap;
rec_offs* offsets;
@@ -758,45 +715,27 @@ try_again:
flags BTR_INSERT, BTR_DELETE, or BTR_DELETE_MARK. */
ut_error;
case ROW_NOT_FOUND:
- /* For spatial index, if first search didn't find an
- undel-marked rec, try to find a del-marked rec. */
- if (dict_index_is_spatial(index) && btr_cur->rtr_info->fd_del) {
- if (mode != orig_mode) {
- mode = orig_mode;
- btr_pcur_close(&pcur);
- mtr_commit(&mtr);
- goto try_again;
- }
- }
-
- if (index->is_committed()) {
- /* During online secondary index creation, it
- is possible that MySQL is waiting for a
- meta-data lock upgrade before invoking
- ha_innobase::commit_inplace_alter_table()
- while this ROLLBACK is executing. InnoDB has
- finished building the index, but it does not
- yet exist in MySQL. In this case, we suppress
- the printout to the error log. */
+not_found:
+ if (btr_cur->up_match >= dict_index_get_n_unique(index)
+ || btr_cur->low_match >= dict_index_get_n_unique(index)) {
ib::warn() << "Record in index " << index->name
<< " of table " << index->table->name
- << " was not found on rollback, trying to"
- " insert: " << *entry
+ << " was not found on rollback, and"
+ " a duplicate exists: "
+ << *entry
<< " at: " << rec_index_print(
btr_cur_get_rec(btr_cur), index);
- }
-
- if (btr_cur->up_match >= dict_index_get_n_unique(index)
- || btr_cur->low_match >= dict_index_get_n_unique(index)) {
- if (index->is_committed()) {
- ib::warn() << "Record in index " << index->name
- << " was not found on rollback, and"
- " a duplicate exists";
- }
err = DB_DUPLICATE_KEY;
break;
}
+ ib::warn() << "Record in index " << index->name
+ << " of table " << index->table->name
+ << " was not found on rollback, trying to insert: "
+ << *entry
+ << " at: " << rec_index_print(
+ btr_cur_get_rec(btr_cur), index);
+
/* Insert the missing record that we were trying to
delete-unmark. */
big_rec_t* big_rec;
@@ -834,6 +773,7 @@ try_again:
break;
case ROW_FOUND:
+found:
btr_rec_set_deleted<false>(btr_cur_get_block(btr_cur),
btr_cur_get_rec(btr_cur), &mtr);
heap = mem_heap_create(
@@ -879,44 +819,12 @@ try_again:
}
btr_pcur_close(&pcur);
-func_exit_no_pcur:
mtr_commit(&mtr);
return(err);
}
/***********************************************************//**
-Flags a secondary index corrupted. */
-static MY_ATTRIBUTE((nonnull))
-void
-row_undo_mod_sec_flag_corrupted(
-/*============================*/
- trx_t* trx, /*!< in/out: transaction */
- dict_index_t* index) /*!< in: secondary index */
-{
- ut_ad(!dict_index_is_clust(index));
-
- switch (trx->dict_operation_lock_mode) {
- case RW_S_LATCH:
- /* Because row_undo() is holding an S-latch
- on the data dictionary during normal rollback,
- we can only mark the index corrupted in the
- data dictionary cache. TODO: fix this somehow.*/
- mutex_enter(&dict_sys.mutex);
- dict_set_corrupted_index_cache_only(index);
- mutex_exit(&dict_sys.mutex);
- break;
- default:
- ut_ad(0);
- /* fall through */
- case RW_X_LATCH:
- /* This should be the rollback of a data dictionary
- transaction. */
- dict_set_corrupted(index, trx, "rollback");
- }
-}
-
-/***********************************************************//**
Undoes a modify in secondary indexes when undo record type is UPD_DEL.
@return DB_SUCCESS or DB_OUT_OF_FILE_SPACE */
static MY_ATTRIBUTE((nonnull, warn_unused_result))
@@ -934,12 +842,11 @@ row_undo_mod_upd_del_sec(
heap = mem_heap_create(1024);
- while (node->index != NULL) {
- dict_index_t* index = node->index;
- dtuple_t* entry;
+ do {
+ dict_index_t* index = node->index;
- if (index->type & DICT_FTS) {
- dict_table_next_uncorrupted_index(node->index);
+ if (index->type & (DICT_FTS | DICT_CORRUPT)
+ || !index->is_committed()) {
continue;
}
@@ -950,7 +857,7 @@ row_undo_mod_upd_del_sec(
time when the undo log record was written. When we get
to roll back an undo log entry TRX_UNDO_DEL_MARK_REC,
it should always cover all affected indexes. */
- entry = row_build_index_entry(
+ dtuple_t* entry = row_build_index_entry(
node->row, node->ext, index, heap);
if (UNIV_UNLIKELY(!entry)) {
@@ -975,8 +882,7 @@ row_undo_mod_upd_del_sec(
}
mem_heap_empty(heap);
- dict_table_next_uncorrupted_index(node->index);
- }
+ } while ((node->index = dict_table_get_next_index(node->index)));
mem_heap_free(heap);
@@ -1000,12 +906,11 @@ row_undo_mod_del_mark_sec(
heap = mem_heap_create(1024);
- while (node->index != NULL) {
- dict_index_t* index = node->index;
- dtuple_t* entry;
+ do {
+ dict_index_t* index = node->index;
- if (index->type == DICT_FTS) {
- dict_table_next_uncorrupted_index(node->index);
+ if (index->type & (DICT_FTS | DICT_CORRUPT)
+ || !index->is_committed()) {
continue;
}
@@ -1016,7 +921,7 @@ row_undo_mod_del_mark_sec(
time when the undo log record was written. When we get
to roll back an undo log entry TRX_UNDO_DEL_MARK_REC,
it should always cover all affected indexes. */
- entry = row_build_index_entry(
+ dtuple_t* entry = row_build_index_entry(
node->row, node->ext, index, heap);
ut_a(entry);
@@ -1029,8 +934,7 @@ row_undo_mod_del_mark_sec(
}
if (err == DB_DUPLICATE_KEY) {
- row_undo_mod_sec_flag_corrupted(
- thr_get_trx(thr), index);
+ index->type |= DICT_CORRUPT;
err = DB_SUCCESS;
/* Do not return any error to the caller. The
duplicate will be reported by ALTER TABLE or
@@ -1043,8 +947,7 @@ row_undo_mod_del_mark_sec(
}
mem_heap_empty(heap);
- dict_table_next_uncorrupted_index(node->index);
- }
+ } while ((node->index = dict_table_get_next_index(node->index)));
mem_heap_free(heap);
@@ -1061,48 +964,33 @@ row_undo_mod_upd_exist_sec(
undo_node_t* node, /*!< in: row undo node */
que_thr_t* thr) /*!< in: query thread */
{
- mem_heap_t* heap;
- dberr_t err = DB_SUCCESS;
-
- if (node->index == NULL
- || ((node->cmpl_info & UPD_NODE_NO_ORD_CHANGE))) {
- /* No change in secondary indexes */
-
- return(err);
+ if (node->cmpl_info & UPD_NODE_NO_ORD_CHANGE) {
+ return DB_SUCCESS;
}
- heap = mem_heap_create(1024);
+ mem_heap_t* heap = mem_heap_create(1024);
+ dberr_t err = DB_SUCCESS;
+ do {
+ dict_index_t* index = node->index;
- while (node->index != NULL) {
- dict_index_t* index = node->index;
- dtuple_t* entry;
+ if (index->type & (DICT_FTS | DICT_CORRUPT)
+ || !index->is_committed()) {
+ continue;
+ }
- if (dict_index_is_spatial(index)) {
- if (!row_upd_changes_ord_field_binary_func(
- index, node->update,
+ if (!row_upd_changes_ord_field_binary_func(
+ index, node->update,
#ifdef UNIV_DEBUG
- thr,
+ thr,
#endif /* UNIV_DEBUG */
- node->row,
- node->ext, ROW_BUILD_FOR_UNDO)) {
- dict_table_next_uncorrupted_index(node->index);
- continue;
- }
- } else {
- if (index->type == DICT_FTS
- || !row_upd_changes_ord_field_binary(index,
- node->update,
- thr, node->row,
- node->ext)) {
- dict_table_next_uncorrupted_index(node->index);
- continue;
- }
+ node->row, node->ext, ROW_BUILD_FOR_UNDO)) {
+ continue;
}
/* Build the newest version of the index entry */
- entry = row_build_index_entry(node->row, node->ext,
- index, heap);
+ dtuple_t* entry = row_build_index_entry(
+ node->row, node->ext, index, heap);
if (UNIV_UNLIKELY(!entry)) {
/* The server must have crashed in
row_upd_clust_rec_by_insert() before
@@ -1154,17 +1042,10 @@ row_undo_mod_upd_exist_sec(
the secondary index record if we updated its fields
but alphabetically they stayed the same, e.g.,
'abc' -> 'aBc'. */
- if (dict_index_is_spatial(index)) {
- entry = row_build_index_entry_low(node->undo_row,
- node->undo_ext,
- index, heap,
- ROW_BUILD_FOR_UNDO);
- } else {
- entry = row_build_index_entry(node->undo_row,
- node->undo_ext,
- index, heap);
- }
-
+ entry = row_build_index_entry_low(node->undo_row,
+ node->undo_ext,
+ index, heap,
+ ROW_BUILD_FOR_UNDO);
ut_a(entry);
err = row_undo_mod_del_unmark_sec_and_undo_update(
@@ -1175,16 +1056,14 @@ row_undo_mod_upd_exist_sec(
}
if (err == DB_DUPLICATE_KEY) {
- row_undo_mod_sec_flag_corrupted(
- thr_get_trx(thr), index);
+ index->type |= DICT_CORRUPT;
err = DB_SUCCESS;
} else if (err != DB_SUCCESS) {
break;
}
mem_heap_empty(heap);
- dict_table_next_uncorrupted_index(node->index);
- }
+ } while ((node->index = dict_table_get_next_index(node->index)));
mem_heap_free(heap);
@@ -1197,7 +1076,6 @@ row_undo_mod_upd_exist_sec(
static bool row_undo_mod_parse_undo_rec(undo_node_t* node, bool dict_locked)
{
dict_index_t* clust_index;
- byte* ptr;
undo_no_t undo_no;
table_id_t table_id;
trx_id_t trx_id;
@@ -1212,19 +1090,20 @@ static bool row_undo_mod_parse_undo_rec(undo_node_t* node, bool dict_locked)
ut_ad(node->trx->in_rollback);
ut_ad(!trx_undo_roll_ptr_is_insert(node->roll_ptr));
- ptr = trx_undo_rec_get_pars(node->undo_rec, &type, &cmpl_info,
- &dummy_extern, &undo_no, &table_id);
+ const byte *ptr = trx_undo_rec_get_pars(
+ node->undo_rec, &type, &cmpl_info,
+ &dummy_extern, &undo_no, &table_id);
node->rec_type = type;
if (node->state == UNDO_UPDATE_PERSISTENT) {
node->table = dict_table_open_on_id(table_id, dict_locked,
DICT_TABLE_OP_NORMAL);
} else if (!dict_locked) {
- mutex_enter(&dict_sys.mutex);
- node->table = dict_sys.get_temporary_table(table_id);
- mutex_exit(&dict_sys.mutex);
+ dict_sys.freeze(SRW_LOCK_CALL);
+ node->table = dict_sys.acquire_temporary_table(table_id);
+ dict_sys.unfreeze();
} else {
- node->table = dict_sys.get_temporary_table(table_id);
+ node->table = dict_sys.acquire_temporary_table(table_id);
}
if (!node->table) {
@@ -1244,7 +1123,7 @@ close_table:
would probably be better to just drop all temporary
tables (and temporary undo log records) of the current
connection, instead of doing this rollback. */
- dict_table_close(node->table, dict_locked, FALSE);
+ dict_table_close(node->table, dict_locked);
node->table = NULL;
return false;
}
@@ -1330,15 +1209,16 @@ row_undo_mod(
undo_node_t* node, /*!< in: row undo node */
que_thr_t* thr) /*!< in: query thread */
{
- dberr_t err;
+ dberr_t err = DB_SUCCESS;
ut_ad(thr_get_trx(thr) == node->trx);
- const bool dict_locked = node->trx->dict_operation_lock_mode
- == RW_X_LATCH;
+ const bool dict_locked = node->trx->dict_operation_lock_mode;
if (!row_undo_mod_parse_undo_rec(node, dict_locked)) {
return DB_SUCCESS;
}
+ ut_ad(node->table->is_temporary()
+ || lock_table_has_locks(node->table));
node->index = dict_table_get_first_index(node->table);
ut_ad(dict_index_is_clust(node->index));
@@ -1349,23 +1229,20 @@ row_undo_mod(
/* Skip the clustered index (the first index) */
node->index = dict_table_get_next_index(node->index);
-
- /* Skip all corrupted secondary index */
- dict_table_skip_corrupt_index(node->index);
-
- switch (node->rec_type) {
- case TRX_UNDO_UPD_EXIST_REC:
- err = row_undo_mod_upd_exist_sec(node, thr);
- break;
- case TRX_UNDO_DEL_MARK_REC:
- err = row_undo_mod_del_mark_sec(node, thr);
- break;
- case TRX_UNDO_UPD_DEL_REC:
- err = row_undo_mod_upd_del_sec(node, thr);
- break;
- default:
- ut_error;
- err = DB_ERROR;
+ if (node->index) {
+ switch (node->rec_type) {
+ case TRX_UNDO_UPD_EXIST_REC:
+ err = row_undo_mod_upd_exist_sec(node, thr);
+ break;
+ case TRX_UNDO_DEL_MARK_REC:
+ err = row_undo_mod_del_mark_sec(node, thr);
+ break;
+ case TRX_UNDO_UPD_DEL_REC:
+ err = row_undo_mod_upd_del_sec(node, thr);
+ break;
+ default:
+ MY_ASSERT_UNREACHABLE();
+ }
}
if (err == DB_SUCCESS) {
@@ -1394,7 +1271,7 @@ rollback_clust:
/* Do not attempt to update statistics when
executing ROLLBACK in the InnoDB SQL
interpreter, because in that case we would
- already be holding dict_sys.mutex, which
+ already be holding dict_sys.latch, which
would be acquired when updating statistics. */
if (update_statistics && !dict_locked) {
dict_stats_update_if_needed(node->table,
@@ -1405,7 +1282,7 @@ rollback_clust:
}
}
- dict_table_close(node->table, dict_locked, FALSE);
+ dict_table_close(node->table, dict_locked);
node->table = NULL;
diff --git a/storage/innobase/row/row0undo.cc b/storage/innobase/row/row0undo.cc
index 3ac8e434f35..4d6d779eee6 100644
--- a/storage/innobase/row/row0undo.cc
+++ b/storage/innobase/row/row0undo.cc
@@ -256,21 +256,6 @@ func_exit:
return(found);
}
-/** Try to truncate the undo logs.
-@param[in,out] trx transaction */
-static void row_undo_try_truncate(trx_t* trx)
-{
- if (trx_undo_t* undo = trx->rsegs.m_redo.undo) {
- ut_ad(undo->rseg == trx->rsegs.m_redo.rseg);
- trx_undo_truncate_end(*undo, trx->undo_no, false);
- }
-
- if (trx_undo_t* undo = trx->rsegs.m_noredo.undo) {
- ut_ad(undo->rseg == trx->rsegs.m_noredo.rseg);
- trx_undo_truncate_end(*undo, trx->undo_no, true);
- }
-}
-
/** Get the latest undo log record for rollback.
@param[in,out] node rollback context
@return whether an undo log record was fetched */
@@ -280,13 +265,14 @@ static bool row_undo_rec_get(undo_node_t* node)
if (trx->pages_undone) {
trx->pages_undone = 0;
- row_undo_try_truncate(trx);
+ trx_undo_try_truncate(*trx);
}
trx_undo_t* undo = NULL;
trx_undo_t* update = trx->rsegs.m_redo.undo;
trx_undo_t* temp = trx->rsegs.m_noredo.undo;
const undo_no_t limit = trx->roll_limit;
+ bool is_temp = false;
ut_ad(!update || !temp || update->empty() || temp->empty()
|| update->top_undo_no != temp->top_undo_no);
@@ -300,15 +286,14 @@ static bool row_undo_rec_get(undo_node_t* node)
}
if (temp && !temp->empty() && temp->top_undo_no >= limit) {
- if (!undo) {
- undo = temp;
- } else if (undo->top_undo_no < temp->top_undo_no) {
+ if (!undo || undo->top_undo_no < temp->top_undo_no) {
undo = temp;
+ is_temp = true;
}
}
if (undo == NULL) {
- row_undo_try_truncate(trx);
+ trx_undo_try_truncate(*trx);
/* Mark any ROLLBACK TO SAVEPOINT completed, so that
if the transaction object is committed and reused
later, we will default to a full ROLLBACK. */
@@ -321,13 +306,18 @@ static bool row_undo_rec_get(undo_node_t* node)
ut_ad(limit <= undo->top_undo_no);
node->roll_ptr = trx_undo_build_roll_ptr(
- false, undo->rseg->id, undo->top_page_no, undo->top_offset);
+ false, trx_sys.rseg_id(undo->rseg, !is_temp),
+ undo->top_page_no, undo->top_offset);
mtr_t mtr;
mtr.start();
- buf_block_t* undo_page = trx_undo_page_get_s_latched(
- page_id_t(undo->rseg->space->id, undo->top_page_no), &mtr);
+ buf_block_t* undo_page = buf_page_get(
+ page_id_t(undo->rseg->space->id, undo->top_page_no),
+ 0, RW_S_LATCH, &mtr);
+ if (!undo_page) {
+ return false;
+ }
uint16_t offset = undo->top_offset;
@@ -348,11 +338,15 @@ static bool row_undo_rec_get(undo_node_t* node)
ut_ad(undo->empty());
}
- node->undo_rec = trx_undo_rec_copy(undo_page->frame + offset,
+ node->undo_rec = trx_undo_rec_copy(undo_page->page.frame + offset,
node->heap);
mtr.commit();
- switch (trx_undo_rec_get_type(node->undo_rec)) {
+ if (UNIV_UNLIKELY(!node->undo_rec)) {
+ return false;
+ }
+
+ switch (node->undo_rec[2] & (TRX_UNDO_CMPL_INFO_MULT - 1)) {
case TRX_UNDO_INSERT_METADATA:
/* This record type was introduced in MDEV-11369
instant ADD COLUMN, which was implemented after
@@ -364,14 +358,14 @@ static bool row_undo_rec_get(undo_node_t* node)
ut_ad(undo == update);
/* fall through */
case TRX_UNDO_INSERT_REC:
+ case TRX_UNDO_EMPTY:
node->roll_ptr |= 1ULL << ROLL_PTR_INSERT_FLAG_POS;
- node->state = undo == temp
+ node->state = is_temp
? UNDO_INSERT_TEMPORARY : UNDO_INSERT_PERSISTENT;
break;
default:
- node->state = undo == temp
+ node->state = is_temp
? UNDO_UPDATE_TEMPORARY : UNDO_UPDATE_PERSISTENT;
- break;
}
trx->undo_no = node->undo_no = trx_undo_rec_get_undo_no(
@@ -399,19 +393,6 @@ row_undo(
return DB_SUCCESS;
}
- /* Prevent prepare_inplace_alter_table_dict() from adding
- dict_table_t::indexes while we are processing the record.
- Recovered transactions are not protected by MDL, and the
- secondary index creation is not protected by table locks
- for online operation. (A table lock would only be acquired
- when committing the ALTER TABLE operation.) */
- trx_t* trx = node->trx;
- const bool locked_data_dict = !trx->dict_operation_lock_mode;
-
- if (UNIV_UNLIKELY(locked_data_dict)) {
- row_mysql_freeze_data_dictionary(trx);
- }
-
dberr_t err;
switch (node->state) {
@@ -428,11 +409,6 @@ row_undo(
err = DB_CORRUPTION;
}
- if (locked_data_dict) {
-
- row_mysql_unfreeze_data_dictionary(trx);
- }
-
node->state = UNDO_NODE_FETCH_NEXT;
btr_pcur_close(&(node->pcur));
@@ -460,7 +436,7 @@ row_undo_step(
ut_ad(que_node_get_type(node) == QUE_NODE_UNDO);
- if (UNIV_UNLIKELY(trx_get_dict_operation(trx) == TRX_DICT_OP_NONE
+ if (UNIV_UNLIKELY(!trx->dict_operation
&& !srv_undo_sources
&& srv_shutdown_state != SRV_SHUTDOWN_NONE)
&& (srv_fast_shutdown == 3 || trx == trx_roll_crash_recv_trx)) {
diff --git a/storage/innobase/row/row0upd.cc b/storage/innobase/row/row0upd.cc
index 066e3d43d27..fe88fce58a2 100644
--- a/storage/innobase/row/row0upd.cc
+++ b/storage/innobase/row/row0upd.cc
@@ -1,7 +1,7 @@
/*****************************************************************************
Copyright (c) 1996, 2017, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2015, 2021, MariaDB Corporation.
+Copyright (c) 2015, 2023, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -127,10 +127,6 @@ row_upd_changes_first_fields_binary(
Checks if index currently is mentioned as a referenced index in a foreign
key constraint.
-NOTE that since we do not hold dict_sys.latch when leaving the
-function, it may be that the referencing table has been dropped when
-we leave this function: this function is only for heuristic use!
-
@return true if referenced */
static
bool
@@ -139,64 +135,44 @@ row_upd_index_is_referenced(
dict_index_t* index, /*!< in: index */
trx_t* trx) /*!< in: transaction */
{
- dict_table_t* table = index->table;
-
- if (table->referenced_set.empty()) {
- return false;
- }
-
- const bool froze_data_dict = !trx->dict_operation_lock_mode;
- if (froze_data_dict) {
- row_mysql_freeze_data_dictionary(trx);
- }
-
- dict_foreign_set::iterator it
- = std::find_if(table->referenced_set.begin(),
- table->referenced_set.end(),
- dict_foreign_with_index(index));
-
- const bool is_referenced = (it != table->referenced_set.end());
-
- if (froze_data_dict) {
- row_mysql_unfreeze_data_dictionary(trx);
- }
-
- return is_referenced;
+ dict_table_t *table= index->table;
+ /* The pointers in table->referenced_set are safe to dereference
+ thanks to the SQL layer having acquired MDL on all (grand)parent tables. */
+ dict_foreign_set::iterator end= table->referenced_set.end();
+ return end != std::find_if(table->referenced_set.begin(), end,
+ dict_foreign_with_index(index));
}
#ifdef WITH_WSREP
static
-ibool
+bool
wsrep_row_upd_index_is_foreign(
/*========================*/
dict_index_t* index, /*!< in: index */
trx_t* trx) /*!< in: transaction */
{
- dict_table_t* table = index->table;
- ibool froze_data_dict = FALSE;
- ibool is_referenced = FALSE;
+ if (!trx->is_wsrep())
+ return false;
- if (table->foreign_set.empty()) {
- return(FALSE);
- }
-
- if (trx->dict_operation_lock_mode == 0) {
- row_mysql_freeze_data_dictionary(trx);
- froze_data_dict = TRUE;
- }
+ dict_table_t *table= index->table;
- dict_foreign_set::iterator it
- = std::find_if(table->foreign_set.begin(),
- table->foreign_set.end(),
- dict_foreign_with_foreign_index(index));
+ if (table->foreign_set.empty())
+ return false;
- is_referenced = (it != table->foreign_set.end());
+ /* No MDL protects dereferencing the members of table->foreign_set. */
+ const bool no_lock= !trx->dict_operation_lock_mode;
+ if (no_lock)
+ dict_sys.freeze(SRW_LOCK_CALL);
- if (froze_data_dict) {
- row_mysql_unfreeze_data_dictionary(trx);
- }
+ auto end= table->foreign_set.end();
+ const bool is_referenced= end !=
+ std::find_if(table->foreign_set.begin(), end,
+ [index](const dict_foreign_t* f)
+ {return f->foreign_index == index;});
+ if (no_lock)
+ dict_sys.unfreeze();
- return(is_referenced);
+ return is_referenced;
}
#endif /* WITH_WSREP */
@@ -224,10 +200,8 @@ row_upd_check_references_constraints(
dict_foreign_t* foreign;
mem_heap_t* heap;
dtuple_t* entry;
- trx_t* trx;
const rec_t* rec;
dberr_t err;
- ibool got_s_lock = FALSE;
DBUG_ENTER("row_upd_check_references_constraints");
@@ -235,8 +209,6 @@ row_upd_check_references_constraints(
DBUG_RETURN(DB_SUCCESS);
}
- trx = thr_get_trx(thr);
-
rec = btr_pcur_get_rec(pcur);
ut_ad(rec_offs_validate(rec, index, offsets));
@@ -250,12 +222,6 @@ row_upd_check_references_constraints(
mtr->start();
- if (trx->dict_operation_lock_mode == 0) {
- got_s_lock = TRUE;
-
- row_mysql_freeze_data_dictionary(trx);
- }
-
DEBUG_SYNC_C_IF_THD(thr_get_trx(thr)->mysql_thd,
"foreign_constraint_check_for_insert");
@@ -275,34 +241,19 @@ row_upd_check_references_constraints(
|| row_upd_changes_first_fields_binary(
entry, index, node->update,
foreign->n_fields))) {
- dict_table_t* foreign_table = foreign->foreign_table;
-
- dict_table_t* ref_table = NULL;
-
- if (foreign_table == NULL) {
+ dict_table_t* ref_table = nullptr;
+ if (!foreign->foreign_table) {
ref_table = dict_table_open_on_name(
foreign->foreign_table_name_lookup,
- FALSE, FALSE, DICT_ERR_IGNORE_NONE);
+ false, DICT_ERR_IGNORE_NONE);
}
- if (foreign_table) {
- foreign_table->inc_fk_checks();
- }
-
- /* NOTE that if the thread ends up waiting for a lock
- we will release dict_sys.latch temporarily!
- But the inc_fk_checks() protects foreign_table from
- being dropped while the check is running. */
-
err = row_ins_check_foreign_constraint(
FALSE, foreign, table, entry, thr);
- if (foreign_table) {
- foreign_table->dec_fk_checks();
- }
- if (ref_table != NULL) {
- dict_table_close(ref_table, FALSE, FALSE);
+ if (ref_table) {
+ dict_table_close(ref_table);
}
if (err != DB_SUCCESS) {
@@ -314,10 +265,6 @@ row_upd_check_references_constraints(
err = DB_SUCCESS;
func_exit:
- if (got_s_lock) {
- row_mysql_unfreeze_data_dictionary(trx);
- }
-
mem_heap_free(heap);
DEBUG_SYNC_C("foreign_constraint_check_for_update_done");
@@ -341,18 +288,13 @@ wsrep_row_upd_check_foreign_constraints(
dict_foreign_t* foreign;
mem_heap_t* heap;
dtuple_t* entry;
- trx_t* trx;
const rec_t* rec;
dberr_t err;
- ibool got_s_lock = FALSE;
- ibool opened = FALSE;
if (table->foreign_set.empty()) {
return(DB_SUCCESS);
}
- trx = thr_get_trx(thr);
-
/* TODO: make native slave thread bail out here */
rec = btr_pcur_get_rec(pcur);
@@ -366,12 +308,6 @@ wsrep_row_upd_check_foreign_constraints(
mtr_start(mtr);
- if (trx->dict_operation_lock_mode == 0) {
- got_s_lock = TRUE;
-
- row_mysql_freeze_data_dictionary(trx);
- }
-
for (dict_foreign_set::iterator it = table->foreign_set.begin();
it != table->foreign_set.end();
++it) {
@@ -388,27 +324,21 @@ wsrep_row_upd_check_foreign_constraints(
entry, index, node->update,
foreign->n_fields))) {
- if (foreign->referenced_table == NULL) {
+ dict_table_t *opened = nullptr;
+
+ if (!foreign->referenced_table) {
foreign->referenced_table =
dict_table_open_on_name(
foreign->referenced_table_name_lookup,
- FALSE, FALSE, DICT_ERR_IGNORE_NONE);
- opened = (foreign->referenced_table) ? TRUE : FALSE;
+ false, DICT_ERR_IGNORE_NONE);
+ opened = foreign->referenced_table;
}
- /* NOTE that if the thread ends up waiting for a lock
- we will release dict_sys.latch temporarily!
- But the counter on the table protects 'foreign' from
- being dropped while the check is running. */
-
err = row_ins_check_foreign_constraint(
TRUE, foreign, table, entry, thr);
- if (foreign->referenced_table) {
- if (opened == TRUE) {
- dict_table_close(foreign->referenced_table, FALSE, FALSE);
- opened = FALSE;
- }
+ if (opened) {
+ dict_table_close(opened);
}
if (err != DB_SUCCESS) {
@@ -419,10 +349,6 @@ wsrep_row_upd_check_foreign_constraints(
err = DB_SUCCESS;
func_exit:
- if (got_s_lock) {
- row_mysql_unfreeze_data_dictionary(trx);
- }
-
mem_heap_free(heap);
return(err);
@@ -543,46 +469,6 @@ row_upd_changes_field_size_or_external(
return(FALSE);
}
-/***********************************************************//**
-Returns true if row update contains disowned external fields.
-@return true if the update contains disowned external fields. */
-bool
-row_upd_changes_disowned_external(
-/*==============================*/
- const upd_t* update) /*!< in: update vector */
-{
- const upd_field_t* upd_field;
- const dfield_t* new_val;
- ulint new_len;
- ulint n_fields;
- ulint i;
-
- n_fields = upd_get_n_fields(update);
-
- for (i = 0; i < n_fields; i++) {
- const byte* field_ref;
-
- upd_field = upd_get_nth_field(update, i);
- new_val = &(upd_field->new_val);
- new_len = dfield_get_len(new_val);
-
- if (!dfield_is_ext(new_val)) {
- continue;
- }
-
- ut_ad(new_len >= BTR_EXTERN_FIELD_REF_SIZE);
-
- field_ref = static_cast<const byte*>(dfield_get_data(new_val))
- + new_len - BTR_EXTERN_FIELD_REF_SIZE;
-
- if (field_ref[BTR_EXTERN_LEN] & BTR_EXTERN_OWNER_FLAG) {
- return(true);
- }
- }
-
- return(false);
-}
-
/***************************************************************//**
Builds an update vector from those fields which in a secondary index entry
differ from a record that has the equal ordering fields. NOTE: we compare
@@ -1146,16 +1032,7 @@ row_upd_replace_vcol(
/* If there is no index on the column, do not bother for
value update */
if (!col->m_col.ord_part) {
- dict_index_t* clust_index
- = dict_table_get_first_index(table);
-
- /* Skip the column if there is no online alter
- table in progress or it is not being indexed
- in new table */
- if (!dict_index_is_online_ddl(clust_index)
- || !row_log_col_is_indexed(clust_index, col_no)) {
- continue;
- }
+ continue;
}
dfield = dtuple_get_nth_v_field(row, col_no);
@@ -1345,9 +1222,6 @@ row_upd_changes_ord_field_binary_func(
ulint i;
const dict_index_t* clust_index;
- ut_ad(thr);
- ut_ad(thr->graph);
- ut_ad(thr->graph->trx);
ut_ad(!index->table->skip_alter_undo);
n_unique = dict_index_get_n_unique(index);
@@ -1547,9 +1421,11 @@ row_upd_changes_ord_field_binary_func(
trx_rollback_recovered()
when the server had crashed before
storing the field. */
- ut_ad(thr->graph->trx->is_recovered);
- ut_ad(thr->graph->trx
- == trx_roll_crash_recv_trx);
+ ut_ad(!thr
+ || thr->graph->trx->is_recovered);
+ ut_ad(!thr
+ || thr->graph->trx
+ == trx_roll_crash_recv_trx);
return(TRUE);
}
@@ -1956,25 +1832,28 @@ row_upd_sec_index_entry(
que_thr_t* thr) /*!< in: query thread */
{
mtr_t mtr;
- const rec_t* rec;
btr_pcur_t pcur;
mem_heap_t* heap;
dtuple_t* entry;
dict_index_t* index;
- btr_cur_t* btr_cur;
dberr_t err = DB_SUCCESS;
trx_t* trx = thr_get_trx(thr);
- ulint mode;
+ btr_latch_mode mode;
ulint flags;
enum row_search_result search_result;
ut_ad(trx->id != 0);
index = node->index;
+ ut_ad(index->is_committed());
+
+ /* For secondary indexes, index->online_status==ONLINE_INDEX_COMPLETE
+ if index->is_committed(). */
+ ut_ad(!dict_index_is_online_ddl(index));
const bool referenced = row_upd_index_is_referenced(index, trx);
#ifdef WITH_WSREP
- bool foreign = wsrep_row_upd_index_is_foreign(index, trx);
+ const bool foreign = wsrep_row_upd_index_is_foreign(index, trx);
#endif /* WITH_WSREP */
heap = mem_heap_create(1024);
@@ -1989,6 +1868,7 @@ row_upd_sec_index_entry(
"before_row_upd_sec_index_entry");
mtr.start();
+ mode = BTR_MODIFY_LEAF;
switch (index->table->space_id) {
case SRV_TMP_SPACE_ID:
@@ -2000,83 +1880,37 @@ row_upd_sec_index_entry(
/* fall through */
case IBUF_SPACE_ID:
flags = index->table->no_rollback() ? BTR_NO_ROLLBACK : 0;
+ /* We can only buffer delete-mark operations if there
+ are no foreign key constraints referring to the index. */
+ if (!referenced) {
+ mode = BTR_DELETE_MARK_LEAF;
+ }
break;
}
- bool uncommitted = !index->is_committed();
-
- if (uncommitted) {
- /* The index->online_status may change if the index is
- or was being created online, but not committed yet. It
- is protected by index->lock. */
-
- mtr_s_lock_index(index, &mtr);
+ /* Set the query thread, so that ibuf_insert_low() will be
+ able to invoke thd_get_trx(). */
+ pcur.btr_cur.thr = thr;
+ pcur.btr_cur.page_cur.index = index;
- switch (dict_index_get_online_status(index)) {
- case ONLINE_INDEX_COMPLETE:
- /* This is a normal index. Do not log anything.
- Perform the update on the index tree directly. */
- break;
- case ONLINE_INDEX_CREATION:
- /* Log a DELETE and optionally INSERT. */
- row_log_online_op(index, entry, 0);
-
- if (!node->is_delete) {
- mem_heap_empty(heap);
- entry = row_build_index_entry(
- node->upd_row, node->upd_ext,
- index, heap);
- ut_a(entry);
- row_log_online_op(index, entry, trx->id);
- }
- /* fall through */
- case ONLINE_INDEX_ABORTED:
- case ONLINE_INDEX_ABORTED_DROPPED:
- mtr_commit(&mtr);
- goto func_exit;
+ if (index->is_spatial()) {
+ mode = btr_latch_mode(BTR_MODIFY_LEAF | BTR_RTREE_DELETE_MARK);
+ if (UNIV_LIKELY(!rtr_search(entry, mode, &pcur, &mtr))) {
+ goto found;
}
- /* We can only buffer delete-mark operations if there
- are no foreign key constraints referring to the index.
- Change buffering is disabled for temporary tables and
- spatial index. */
- mode = (referenced || index->table->is_temporary()
- || dict_index_is_spatial(index))
- ? BTR_MODIFY_LEAF_ALREADY_S_LATCHED
- : BTR_DELETE_MARK_LEAF_ALREADY_S_LATCHED;
- } else {
- /* For secondary indexes,
- index->online_status==ONLINE_INDEX_COMPLETE if
- index->is_committed(). */
- ut_ad(!dict_index_is_online_ddl(index));
-
- /* We can only buffer delete-mark operations if there
- are no foreign key constraints referring to the index.
- Change buffering is disabled for temporary tables and
- spatial index. */
- mode = (referenced || index->table->is_temporary()
- || dict_index_is_spatial(index))
- ? BTR_MODIFY_LEAF
- : BTR_DELETE_MARK_LEAF;
- }
+ if (pcur.btr_cur.rtr_info->fd_del) {
+ /* We found the record, but a delete marked */
+ goto close;
+ }
- if (dict_index_is_spatial(index)) {
- ut_ad(mode & BTR_MODIFY_LEAF);
- mode |= BTR_RTREE_DELETE_MARK;
+ goto not_found;
}
- /* Set the query thread, so that ibuf_insert_low() will be
- able to invoke thd_get_trx(). */
- btr_pcur_get_btr_cur(&pcur)->thr = thr;
-
- search_result = row_search_index_entry(index, entry, mode,
- &pcur, &mtr);
-
- btr_cur = btr_pcur_get_btr_cur(&pcur);
-
- rec = btr_cur_get_rec(btr_cur);
+ search_result = row_search_index_entry(entry, mode, &pcur, &mtr);
switch (search_result) {
+ const rec_t* rec;
case ROW_NOT_DELETED_REF: /* should only occur for BTR_DELETE */
ut_error;
break;
@@ -2085,24 +1919,8 @@ row_upd_sec_index_entry(
break;
case ROW_NOT_FOUND:
- if (!index->is_committed()) {
- /* When online CREATE INDEX copied the update
- that we already made to the clustered index,
- and completed the secondary index creation
- before we got here, the old secondary index
- record would not exist. The CREATE INDEX
- should be waiting for a MySQL meta-data lock
- upgrade at least until this UPDATE returns.
- After that point, set_committed(true) would be
- invoked by commit_inplace_alter_table(). */
- break;
- }
-
- if (dict_index_is_spatial(index) && btr_cur->rtr_info->fd_del) {
- /* We found the record, but a delete marked */
- break;
- }
-
+not_found:
+ rec = btr_pcur_get_rec(&pcur);
ib::error()
<< "Record in index " << index->name
<< " of table " << index->table->name
@@ -2116,7 +1934,9 @@ row_upd_sec_index_entry(
#endif /* UNIV_DEBUG */
break;
case ROW_FOUND:
+found:
ut_ad(err == DB_SUCCESS);
+ rec = btr_pcur_get_rec(&pcur);
/* Delete mark the old index record; it can already be
delete marked if we return after a lock wait in
@@ -2125,14 +1945,14 @@ row_upd_sec_index_entry(
rec, dict_table_is_comp(index->table))) {
err = lock_sec_rec_modify_check_and_lock(
flags,
- btr_cur_get_block(btr_cur),
- btr_cur_get_rec(btr_cur), index, thr, &mtr);
+ btr_pcur_get_block(&pcur),
+ btr_pcur_get_rec(&pcur), index, thr, &mtr);
if (err != DB_SUCCESS) {
break;
}
- btr_rec_set_deleted<true>(btr_cur_get_block(btr_cur),
- btr_cur_get_rec(btr_cur),
+ btr_rec_set_deleted<true>(btr_pcur_get_block(&pcur),
+ btr_pcur_get_rec(&pcur),
&mtr);
#ifdef WITH_WSREP
if (!referenced && foreign
@@ -2191,6 +2011,7 @@ row_upd_sec_index_entry(
}
}
+close:
btr_pcur_close(&pcur);
mtr_commit(&mtr);
@@ -2204,35 +2025,11 @@ row_upd_sec_index_entry(
DEBUG_SYNC_C_IF_THD(trx->mysql_thd,
"before_row_upd_sec_new_index_entry");
- uncommitted = !index->is_committed();
- if (uncommitted) {
- mtr.start();
- /* The index->online_status may change if the index is
- being rollbacked. It is protected by index->lock. */
-
- mtr_s_lock_index(index, &mtr);
-
- switch (dict_index_get_online_status(index)) {
- case ONLINE_INDEX_COMPLETE:
- case ONLINE_INDEX_CREATION:
- break;
- case ONLINE_INDEX_ABORTED:
- case ONLINE_INDEX_ABORTED_DROPPED:
- mtr_commit(&mtr);
- goto func_exit;
- }
-
- }
-
/* Build a new index entry */
entry = row_build_index_entry(node->upd_row, node->upd_ext,
index, heap);
ut_a(entry);
- if (uncommitted) {
- mtr_commit(&mtr);
- }
-
/* Insert new index entry */
err = row_ins_sec_index_entry(index, entry, thr, !node->is_delete);
@@ -2553,7 +2350,6 @@ row_upd_clust_rec(
btr_pcur_t* pcur;
btr_cur_t* btr_cur;
dberr_t err;
- const dtuple_t* rebuilt_old_pk = NULL;
ut_ad(dict_index_is_clust(index));
ut_ad(!thr_get_trx(thr)->in_rollback);
@@ -2567,11 +2363,6 @@ row_upd_clust_rec(
dict_table_is_comp(index->table)));
ut_ad(rec_offs_validate(btr_cur_get_rec(btr_cur), index, offsets));
- if (dict_index_is_online_ddl(index)) {
- rebuilt_old_pk = row_log_table_get_pk(
- btr_cur_get_rec(btr_cur), index, offsets, NULL, &heap);
- }
-
/* Try optimistic updating of the record, keeping changes within
the page; we do not check locks because we assume the x-lock on the
record to update */
@@ -2589,7 +2380,7 @@ row_upd_clust_rec(
}
if (err == DB_SUCCESS) {
- goto success;
+ goto func_exit;
}
if (buf_pool.running_out()) {
@@ -2618,7 +2409,7 @@ row_upd_clust_rec(
the same transaction do not modify the record in the meantime.
Therefore we can assert that the restoration of the cursor succeeds. */
- ut_a(btr_pcur_restore_position(BTR_MODIFY_TREE, pcur, mtr) ==
+ ut_a(pcur->restore_position(BTR_MODIFY_TREE, mtr) ==
btr_pcur_t::SAME_ALL);
ut_ad(!rec_get_deleted_flag(btr_pcur_get_rec(pcur),
@@ -2642,15 +2433,6 @@ row_upd_clust_rec(
DEBUG_SYNC_C("after_row_upd_extern");
}
- if (err == DB_SUCCESS) {
-success:
- if (dict_index_is_online_ddl(index)) {
- row_log_table_update(
- btr_cur_get_rec(btr_cur),
- index, offsets, rebuilt_old_pk);
- }
- }
-
func_exit:
if (heap) {
mem_heap_free(heap);
@@ -2776,6 +2558,10 @@ row_upd_clust_step(
index = dict_table_get_first_index(node->table);
+ if (index->is_corrupted()) {
+ return DB_TABLE_CORRUPT;
+ }
+
const bool referenced = row_upd_index_is_referenced(index, trx);
#ifdef WITH_WSREP
const bool foreign = wsrep_row_upd_index_is_foreign(index, trx);
@@ -2810,57 +2596,30 @@ row_upd_clust_step(
ut_a(pcur->rel_pos == BTR_PCUR_ON);
- ulint mode;
+ btr_latch_mode mode;
DEBUG_SYNC_C_IF_THD(trx->mysql_thd, "innodb_row_upd_clust_step_enter");
if (dict_index_is_online_ddl(index)) {
ut_ad(node->table->id != DICT_INDEXES_ID);
- mode = BTR_MODIFY_LEAF | BTR_ALREADY_S_LATCHED;
+ mode = BTR_MODIFY_LEAF_ALREADY_LATCHED;
mtr_s_lock_index(index, &mtr);
} else {
mode = BTR_MODIFY_LEAF;
}
- if (btr_pcur_restore_position(mode, pcur, &mtr) !=
- btr_pcur_t::SAME_ALL) {
+ if (pcur->restore_position(mode, &mtr) != btr_pcur_t::SAME_ALL) {
err = DB_RECORD_NOT_FOUND;
goto exit_func;
}
- /* If this is a row in SYS_INDEXES table of the data dictionary,
- then we have to free the file segments of the index tree associated
- with the index */
-
- if (node->is_delete == PLAIN_DELETE
- && node->table->id == DICT_INDEXES_ID) {
-
- ut_ad(!dict_index_is_online_ddl(index));
-
- dict_drop_index_tree(pcur, trx, &mtr);
-
- mtr.commit();
-
- mtr.start();
- index->set_modified(mtr);
-
- if (btr_pcur_restore_position(BTR_MODIFY_LEAF, pcur, &mtr) !=
- btr_pcur_t::SAME_ALL) {
- err = DB_ERROR;
-
- mtr.commit();
-
- return(err);
- }
- }
-
rec = btr_pcur_get_rec(pcur);
offsets = rec_get_offsets(rec, index, offsets_, index->n_core_fields,
ULINT_UNDEFINED, &heap);
if (!flags && !node->has_clust_rec_x_lock) {
err = lock_clust_rec_modify_check_and_lock(
- 0, btr_pcur_get_block(pcur),
+ btr_pcur_get_block(pcur),
rec, index, offsets, thr);
if (err != DB_SUCCESS) {
goto exit_func;
@@ -2869,8 +2628,8 @@ row_upd_clust_step(
ut_ad(index->table->no_rollback() || index->table->is_temporary()
|| row_get_rec_trx_id(rec, index, offsets) == trx->id
- || lock_trx_has_expl_x_lock(trx, index->table,
- btr_pcur_get_block(pcur),
+ || lock_trx_has_expl_x_lock(*trx, *index->table,
+ btr_pcur_get_block(pcur)->page.id(),
page_rec_get_heap_no(rec)));
if (node->is_delete == PLAIN_DELETE) {
@@ -3017,14 +2776,12 @@ row_upd(
DBUG_EXECUTE_IF("row_upd_skip_sec", node->index = NULL;);
do {
- /* Skip corrupted index */
- dict_table_skip_corrupt_index(node->index);
-
if (!node->index) {
break;
}
- if (node->index->type != DICT_FTS) {
+ if (!(node->index->type & (DICT_FTS | DICT_CORRUPT))
+ && node->index->is_committed()) {
err = row_upd_sec_step(node, thr);
if (err != DB_SUCCESS) {
@@ -3091,7 +2848,7 @@ row_upd_step(
/* It may be that the current session has not yet
started its transaction, or it has been committed: */
- err = lock_table(0, node->table, LOCK_IX, thr);
+ err = lock_table(node->table, nullptr, LOCK_IX, thr);
if (err != DB_SUCCESS) {
diff --git a/storage/innobase/row/row0vers.cc b/storage/innobase/row/row0vers.cc
index 4774bef49ea..a4fc32cc5a8 100644
--- a/storage/innobase/row/row0vers.cc
+++ b/storage/innobase/row/row0vers.cc
@@ -1,7 +1,7 @@
/*****************************************************************************
Copyright (c) 1997, 2017, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2017, 2021, MariaDB Corporation.
+Copyright (c) 2017, 2022, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -77,7 +77,7 @@ index record.
@param[in] offsets rec_get_offsets(rec, index)
@param[in,out] mtr mini-transaction
@return the active transaction; state must be rechecked after
-trx_mutex_enter(), and trx->release_reference() must be invoked
+acquiring trx->mutex, and trx->release_reference() must be invoked
@retval NULL if the record was committed */
UNIV_INLINE
trx_t*
@@ -104,6 +104,9 @@ row_vers_impl_x_locked_low(
DBUG_ENTER("row_vers_impl_x_locked_low");
ut_ad(rec_offs_validate(rec, index, offsets));
+ ut_ad(mtr->memo_contains_page_flagged(clust_rec,
+ MTR_MEMO_PAGE_S_FIX
+ | MTR_MEMO_PAGE_X_FIX));
if (ulint trx_id_offset = clust_index->trx_id_offset) {
trx_id = mach_read_from_6(clust_rec + trx_id_offset);
@@ -190,14 +193,14 @@ row_vers_impl_x_locked_low(
heap = mem_heap_create(1024);
trx_undo_prev_version_build(
- clust_rec, mtr, version, clust_index, clust_offsets,
+ version, clust_index, clust_offsets,
heap, &prev_version, NULL,
dict_index_has_virtual(index) ? &vrow : NULL, 0);
- trx_mutex_enter(trx);
+ ut_d(trx->mutex_lock());
const bool committed = trx_state_eq(
trx, TRX_STATE_COMMITTED_IN_MEMORY);
- trx_mutex_exit(trx);
+ ut_d(trx->mutex_unlock());
/* The oldest visible clustered index version must not be
delete-marked, because we never start a transaction by
@@ -383,7 +386,7 @@ index record.
@param[in] index secondary index
@param[in] offsets rec_get_offsets(rec, index)
@return the active transaction; state must be rechecked after
-trx_mutex_enter(), and trx->release_reference() must be invoked
+acquiring trx->mutex, and trx->release_reference() must be invoked
@retval NULL if the record was committed */
trx_t*
row_vers_impl_x_locked(
@@ -397,7 +400,7 @@ row_vers_impl_x_locked(
const rec_t* clust_rec;
dict_index_t* clust_index;
- ut_ad(!lock_mutex_own());
+ lock_sys.assert_unlocked();
mtr_start(&mtr);
@@ -527,6 +530,10 @@ row_vers_build_cur_vrow_low(
= DATA_MISSING;
}
+ ut_ad(mtr->memo_contains_page_flagged(rec,
+ MTR_MEMO_PAGE_S_FIX
+ | MTR_MEMO_PAGE_X_FIX));
+
version = rec;
/* If this is called by purge thread, set TRX_UNDO_PREV_IN_PURGE
@@ -543,7 +550,7 @@ row_vers_build_cur_vrow_low(
version, clust_index, clust_offsets);
trx_undo_prev_version_build(
- rec, mtr, version, clust_index, clust_offsets,
+ version, clust_index, clust_offsets,
heap, &prev_version, NULL, vrow, status);
if (heap2) {
@@ -643,6 +650,10 @@ row_vers_vc_matches_cluster(
/* First compare non-virtual columns (primary keys) */
ut_ad(index->n_fields == n_fields);
ut_ad(n_fields == dtuple_get_n_fields(icentry));
+ ut_ad(mtr->memo_contains_page_flagged(rec,
+ MTR_MEMO_PAGE_S_FIX
+ | MTR_MEMO_PAGE_X_FIX));
+
{
const dfield_t* a = ientry->fields;
const dfield_t* b = icentry->fields;
@@ -684,7 +695,7 @@ row_vers_vc_matches_cluster(
ut_ad(roll_ptr != 0);
trx_undo_prev_version_build(
- rec, mtr, version, clust_index, clust_offsets,
+ version, clust_index, clust_offsets,
heap, &prev_version, NULL, vrow,
TRX_UNDO_PREV_IN_PURGE | TRX_UNDO_GET_OLD_V_VALUE);
@@ -858,7 +869,7 @@ static bool dtuple_vcol_data_missing(const dtuple_t &tuple,
}
/** Finds out if a version of the record, where the version >= the current
-purge view, should have ientry as its secondary index entry. We check
+purge_sys.view, should have ientry as its secondary index entry. We check
if there is any not delete marked version of the record where the trx
id >= purge view, and the secondary index entry == ientry; exactly in
this case we return TRUE.
@@ -1040,11 +1051,12 @@ unsafe_to_purge:
heap = mem_heap_create(1024);
vrow = NULL;
- trx_undo_prev_version_build(rec, mtr, version,
+ trx_undo_prev_version_build(version,
clust_index, clust_offsets,
- heap, &prev_version, NULL,
+ heap, &prev_version, nullptr,
dict_index_has_virtual(index)
- ? &vrow : NULL, 0);
+ ? &vrow : nullptr,
+ TRX_UNDO_CHECK_PURGEABILITY);
mem_heap_free(heap2); /* free version and clust_offsets */
if (!prev_version) {
@@ -1127,7 +1139,9 @@ nochange_index:
Constructs the version of a clustered index record which a consistent
read should see. We assume that the trx id stored in rec is such that
the consistent read should not see rec in its present version.
-@return DB_SUCCESS or DB_MISSING_HISTORY */
+@return error code
+@retval DB_SUCCESS if a previous version was fetched
+@retval DB_MISSING_HISTORY if the history is missing (a sign of corruption) */
dberr_t
row_vers_build_for_consistent_read(
/*===============================*/
@@ -1162,13 +1176,12 @@ row_vers_build_for_consistent_read(
ut_ad(index->is_primary());
ut_ad(mtr->memo_contains_page_flagged(rec, MTR_MEMO_PAGE_X_FIX
| MTR_MEMO_PAGE_S_FIX));
- ut_ad(!rw_lock_own(&(purge_sys.latch), RW_LOCK_S));
ut_ad(rec_offs_validate(rec, index, *offsets));
trx_id = row_get_rec_trx_id(rec, index, *offsets);
- ut_ad(!view->changes_visible(trx_id, index->table->name));
+ ut_ad(!view->changes_visible(trx_id));
ut_ad(!vrow || !(*vrow));
@@ -1186,12 +1199,10 @@ row_vers_build_for_consistent_read(
/* If purge can't see the record then we can't rely on
the UNDO log record. */
- bool purge_sees = trx_undo_prev_version_build(
- rec, mtr, version, index, *offsets, heap,
+ err = trx_undo_prev_version_build(
+ version, index, *offsets, heap,
&prev_version, NULL, vrow, 0);
- err = (purge_sees) ? DB_SUCCESS : DB_MISSING_HISTORY;
-
if (prev_heap != NULL) {
mem_heap_free(prev_heap);
}
@@ -1213,7 +1224,7 @@ row_vers_build_for_consistent_read(
trx_id = row_get_rec_trx_id(prev_version, index, *offsets);
- if (view->changes_visible(trx_id, index->table->name)) {
+ if (view->changes_visible(trx_id)) {
/* The view already sees this version: we can copy
it to in_heap and return */
@@ -1230,8 +1241,11 @@ row_vers_build_for_consistent_read(
dtuple_dup_v_fld(*vrow, in_heap);
}
break;
+ } else if (trx_id >= view->low_limit_id()
+ && trx_id >= trx_sys.get_max_trx_id()) {
+ err = DB_CORRUPTION;
+ break;
}
-
version = prev_version;
}
@@ -1240,6 +1254,10 @@ row_vers_build_for_consistent_read(
return(err);
}
+#if defined __aarch64__&&defined __GNUC__&&__GNUC__==4&&!defined __clang__
+/* Avoid GCC 4.8.5 internal compiler error "could not split insn". */
+# pragma GCC optimize ("O0")
+#endif
/*****************************************************************//**
Constructs the last committed version of a clustered index record,
which should be seen by a semi-consistent read. */
@@ -1275,7 +1293,6 @@ row_vers_build_for_semi_consistent_read(
ut_ad(index->is_primary());
ut_ad(mtr->memo_contains_page_flagged(rec, MTR_MEMO_PAGE_X_FIX
| MTR_MEMO_PAGE_S_FIX));
- ut_ad(!rw_lock_own(&(purge_sys.latch), RW_LOCK_S));
ut_ad(rec_offs_validate(rec, index, *offsets));
@@ -1345,10 +1362,9 @@ committed_version_trx:
heap2 = heap;
heap = mem_heap_create(1024);
- if (!trx_undo_prev_version_build(rec, mtr, version, index,
- *offsets, heap,
- &prev_version,
- in_heap, vrow, 0)) {
+ if (trx_undo_prev_version_build(version, index, *offsets, heap,
+ &prev_version, in_heap, vrow,
+ 0) != DB_SUCCESS) {
mem_heap_free(heap);
heap = heap2;
heap2 = NULL;