summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorThirunarayanan Balathandayuthapani <thiru@mariadb.com>2020-06-04 14:37:36 +0530
committerThirunarayanan Balathandayuthapani <thiru@mariadb.com>2020-06-04 19:42:13 +0530
commit6d4c121d98d9d221a2eb22436de8613287c38ac4 (patch)
treeaf67b33766ca5c38d16dd9c30134b9a2ca862632
parent2fcff310d024cc2201586c568391ba8b039f0bf3 (diff)
downloadmariadb-git-10.5-MDEV-515.tar.gz
MDEV-515 innodb bulk insert10.5-MDEV-515
- Introduced assign_stat_n_rows() in dict_table_t. It calculates the number of rows in the table and assign stat_n_rows during ha_innobase::open() - Introduced empty_table() in dict_table_t. Basically it empties all the indexes associated with table (not covered the fts index). This is undo operation of bulk operation. - Introduced new variable bulk_trx_id in dict_table_t. It stores the transaction id of bulk insert. Basically it is protected by exclusive lock of the table. - If table is empty then INSERT, INSERT..SELECT does take exclusive lock on the table. - Introduced new undo log record "TRX_UNDO_UNEMPTY". It should be first undo log during bulk operation. While rollback, if innodb encounters the undo record then it should empty the table. Limitations: =========== - InnoDB should write the undo log for consecutive insert during bulk operation - Parallel read should give empty table depends on bulk_trx_id. - Fix all test case failure in innodb suite - FTS index should be handled while rollback of bulk operation
-rw-r--r--storage/innobase/btr/btr0btr.cc70
-rw-r--r--storage/innobase/dict/dict0mem.cc86
-rw-r--r--storage/innobase/fsp/fsp0fsp.cc4
-rw-r--r--storage/innobase/handler/ha_innodb.cc20
-rw-r--r--storage/innobase/include/btr0btr.h19
-rw-r--r--storage/innobase/include/dict0mem.h9
-rw-r--r--storage/innobase/include/trx0rec.h1
-rw-r--r--storage/innobase/lock/lock0lock.cc3
-rw-r--r--storage/innobase/page/page0page.cc1
-rw-r--r--storage/innobase/row/row0purge.cc4
-rw-r--r--storage/innobase/row/row0uins.cc10
-rw-r--r--storage/innobase/row/row0undo.cc1
-rw-r--r--storage/innobase/trx/trx0rec.cc11
13 files changed, 204 insertions, 35 deletions
diff --git a/storage/innobase/btr/btr0btr.cc b/storage/innobase/btr/btr0btr.cc
index 10a2612c09f..e83b86fd55b 100644
--- a/storage/innobase/btr/btr0btr.cc
+++ b/storage/innobase/btr/btr0btr.cc
@@ -1030,6 +1030,44 @@ btr_free_root_check(
return(block);
}
+/** Initialize the root page of the b-tree
+@param[in,out] block root block
+@param[in] index_id index id
+@param[in] index index of root page
+@param[in,out] mtr mini-transaction */
+void
+btr_root_page_init(buf_block_t *block, index_id_t index_id,
+ dict_index_t *index, mtr_t *mtr)
+{
+ constexpr uint16_t field = PAGE_HEADER + PAGE_INDEX_ID;
+ byte* page_index_id = my_assume_aligned<2>(field + block->frame);
+
+ /* Create a new index page on the allocated segment page */
+ if (UNIV_LIKELY_NULL(block->page.zip.data))
+ {
+ mach_write_to_8(page_index_id, index_id);
+ ut_ad(!page_has_siblings(block->page.zip.data));
+ page_create_zip(block, index, 0, 0, mtr);
+ }
+ else
+ {
+ page_create(block, mtr, index && index->table->not_redundant());
+ if (index && index->is_spatial())
+ {
+ static_assert(((FIL_PAGE_INDEX & 0xff00) | byte(FIL_PAGE_RTREE))
+ == FIL_PAGE_RTREE, "compatibility");
+ mtr->write<1>(*block, FIL_PAGE_TYPE + 1 + block->frame,
+ byte(FIL_PAGE_RTREE));
+ if (mach_read_from_8(block->frame + FIL_RTREE_SPLIT_SEQ_NUM))
+ mtr->memset(block, FIL_RTREE_SPLIT_SEQ_NUM, 8, 0);
+ }
+ /* Set the level of the new index page */
+ mtr->write<2,mtr_t::MAYBE_NOP>(
+ *block, PAGE_HEADER + PAGE_LEVEL + block->frame, 0U);
+ mtr->write<8,mtr_t::MAYBE_NOP>(*block, page_index_id, index_id);
+ }
+}
+
/** Create the root node for a new index tree.
@param[in] type type of the index
@param[in] index_id index id
@@ -1115,36 +1153,7 @@ btr_create(
ut_ad(!page_has_siblings(block->frame));
- constexpr uint16_t field = PAGE_HEADER + PAGE_INDEX_ID;
-
- byte* page_index_id = my_assume_aligned<2>(field + block->frame);
-
- /* Create a new index page on the allocated segment page */
- if (UNIV_LIKELY_NULL(block->page.zip.data)) {
- mach_write_to_8(page_index_id, index_id);
- ut_ad(!page_has_siblings(block->page.zip.data));
- page_create_zip(block, index, 0, 0, mtr);
- } else {
- page_create(block, mtr,
- index && index->table->not_redundant());
- if (index && index->is_spatial()) {
- static_assert(((FIL_PAGE_INDEX & 0xff00)
- | byte(FIL_PAGE_RTREE))
- == FIL_PAGE_RTREE, "compatibility");
- mtr->write<1>(*block, FIL_PAGE_TYPE + 1 + block->frame,
- byte(FIL_PAGE_RTREE));
- if (mach_read_from_8(block->frame
- + FIL_RTREE_SPLIT_SEQ_NUM)) {
- mtr->memset(block, FIL_RTREE_SPLIT_SEQ_NUM,
- 8, 0);
- }
- }
- /* Set the level of the new index page */
- mtr->write<2,mtr_t::MAYBE_NOP>(*block, PAGE_HEADER + PAGE_LEVEL
- + block->frame, 0U);
- mtr->write<8,mtr_t::MAYBE_NOP>(*block, page_index_id,
- index_id);
- }
+ btr_root_page_init(block, index_id, index, mtr);
/* We reset the free bits for the page in a separate
mini-transaction to allow creation of several trees in the
@@ -1172,7 +1181,6 @@ btr_create(
this by calling btr_free_root.
@param[in,out] block root page
@param[in] log_mode mtr logging mode */
-static
void
btr_free_but_not_root(
buf_block_t* block,
diff --git a/storage/innobase/dict/dict0mem.cc b/storage/innobase/dict/dict0mem.cc
index 22a77a7a220..3800ef3012a 100644
--- a/storage/innobase/dict/dict0mem.cc
+++ b/storage/innobase/dict/dict0mem.cc
@@ -39,6 +39,7 @@ Created 1/8/1996 Heikki Tuuri
#include "row0row.h"
#include "sql_string.h"
#include <iostream>
+#include "btr0pcur.h"
#define DICT_HEAP_SIZE 100 /*!< initial memory heap size when
creating a table or index object */
@@ -1383,3 +1384,88 @@ dict_index_t::vers_history_row(
}
return(error);
}
+
+void dict_table_t::empty_table()
+{
+ mtr_t mtr;
+ for (dict_index_t* index= UT_LIST_GET_FIRST(indexes);
+ index != NULL; index= UT_LIST_GET_NEXT(indexes, index))
+ {
+ mtr.start();
+ /* Free the indexes */
+ buf_block_t* root_block= buf_page_get(page_id_t(space->id, index->page),
+ space->zip_size(), RW_X_LATCH,
+ &mtr);
+ if (root_block)
+ btr_free_but_not_root(root_block, mtr.get_log_mode());
+
+ mtr.set_named_space_id(space->id);
+ btr_root_page_init(root_block, index->id, index, &mtr);
+ if (!fseg_create(space, root_block->page.id.page_no(),
+ PAGE_HEADER + PAGE_BTR_SEG_LEAF, &mtr))
+ {
+ ut_ad(0);
+ }
+ mtr.commit();
+ }
+}
+
+void dict_table_t::assign_stat_n_rows()
+{
+ if (!space)
+ return;
+
+ dict_index_t* clust_index= dict_table_get_first_index(this);
+ mtr_t mtr;
+ btr_pcur_t pcur;
+ buf_block_t *block;
+ page_cur_t *cur;
+ const rec_t *rec;
+ bool next_page= false;
+
+ mtr.start();
+ btr_pcur_open_at_index_side(true, clust_index, BTR_SEARCH_LEAF,
+ &pcur, true, 0, &mtr);
+ btr_pcur_move_to_next_user_rec(&pcur, &mtr);
+ if (!rec_is_metadata(btr_pcur_get_rec(&pcur), *clust_index))
+ btr_pcur_move_to_prev_on_page(&pcur);
+ ulint n_rows= 0;
+scan_leaf:
+ cur= btr_pcur_get_page_cur(&pcur);
+ page_cur_move_to_next(cur);
+next_page:
+ if (next_page)
+ {
+ uint32_t next_page_no= btr_page_get_next(page_cur_get_page(cur));
+ if (next_page_no == FIL_NULL)
+ {
+ mtr.commit();
+ stat_n_rows= n_rows;
+ return;
+ }
+
+ next_page= false;
+ block= page_cur_get_block(cur);
+ block= btr_block_get(*clust_index, next_page_no, BTR_SEARCH_LEAF, false,
+ &mtr);
+ btr_leaf_page_release(page_cur_get_block(cur), BTR_SEARCH_LEAF, &mtr);
+ if (block == nullptr)
+ {
+ mtr.commit();
+ return;
+ }
+ page_cur_set_before_first(block, cur);
+ page_cur_move_to_next(cur);
+ }
+
+ rec= page_cur_get_rec(cur);
+ if (rec_get_deleted_flag(rec, dict_table_is_comp(this)));
+ else if (!page_rec_is_supremum(rec))
+ n_rows++;
+ else
+ {
+ next_page= true;
+ goto next_page;
+ }
+ goto scan_leaf;
+}
diff --git a/storage/innobase/fsp/fsp0fsp.cc b/storage/innobase/fsp/fsp0fsp.cc
index 9bab0fe355a..aa605973f06 100644
--- a/storage/innobase/fsp/fsp0fsp.cc
+++ b/storage/innobase/fsp/fsp0fsp.cc
@@ -1787,10 +1787,10 @@ fseg_create(
FIL_PAGE_TYPE_SYS);
}
- mtr->write<2>(*block, byte_offset + FSEG_HDR_OFFSET
+ mtr->write<2,mtr_t::MAYBE_NOP>(*block, byte_offset + FSEG_HDR_OFFSET
+ block->frame, page_offset(inode));
- mtr->write<4>(*block, byte_offset + FSEG_HDR_PAGE_NO
+ mtr->write<4,mtr_t::MAYBE_NOP>(*block, byte_offset + FSEG_HDR_PAGE_NO
+ block->frame, iblock->page.id.page_no());
mtr->write<4,mtr_t::MAYBE_NOP>(*block, byte_offset + FSEG_HDR_SPACE
diff --git a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc
index bea63919532..b7b56799075 100644
--- a/storage/innobase/handler/ha_innodb.cc
+++ b/storage/innobase/handler/ha_innodb.cc
@@ -5882,6 +5882,8 @@ ha_innobase::open(const char* name, int, uint)
}
}
+ ib_table->assign_stat_n_rows();
+
m_prebuilt = row_create_prebuilt(ib_table, table->s->reclength);
m_prebuilt->default_rec = table->s->default_values;
@@ -7665,6 +7667,11 @@ ha_innobase::write_row(
trx_t* trx = thd_to_trx(m_user_thd);
+ if (!dict_table_get_n_rows(m_prebuilt->table)) {
+ // bulk index code
+ m_prebuilt->table->bulk_trx_id = trx->id;
+ }
+
/* Validation checks before we commence write_row operation. */
if (high_level_read_only) {
ib_senderrf(ha_thd(), IB_LOG_LEVEL_WARN, ER_READ_ONLY_MODE);
@@ -15948,6 +15955,19 @@ ha_innobase::external_lock(
if (m_prebuilt->select_lock_type != LOCK_NONE) {
+ if (!dict_table_get_n_rows(m_prebuilt->table)
+ && (thd_sql_command(thd) == SQLCOM_INSERT
+ || thd_sql_command(thd)
+ == SQLCOM_INSERT_SELECT)) {
+ dberr_t error = row_lock_table(m_prebuilt);
+
+ if (error != DB_SUCCESS) {
+ DBUG_RETURN(
+ convert_error_code_to_mysql(
+ error, 0, thd));
+ }
+ }
+
if (thd_sql_command(thd) == SQLCOM_LOCK_TABLES
&& THDVAR(thd, table_locks)
&& thd_test_options(thd, OPTION_NOT_AUTOCOMMIT)
diff --git a/storage/innobase/include/btr0btr.h b/storage/innobase/include/btr0btr.h
index b02c65f3a31..23fd8077412 100644
--- a/storage/innobase/include/btr0btr.h
+++ b/storage/innobase/include/btr0btr.h
@@ -330,6 +330,16 @@ btr_node_ptr_get_child_page_no(
const rec_offs* offsets)/*!< in: array returned by rec_get_offsets() */
MY_ATTRIBUTE((warn_unused_result));
+
+/** Initialize the root page of the b-tree
+@param[in,out] block root block
+@param[in] index_id index id
+@param[in] index index of root page
+@param[in,out] mtr mini-transaction */
+void
+btr_root_page_init(buf_block_t *block, index_id_t index_id,
+ dict_index_t *index, mtr_t *mtr);
+
/** Create the root node for a new index tree.
@param[in] type type of the index
@param[in,out] space tablespace where created
@@ -346,6 +356,15 @@ btr_create(
dict_index_t* index,
mtr_t* mtr);
+/** Free a B-tree except the root page. The root page MUST be freed after
+this by calling btr_free_root.
+@param[in,out] block root page
+@param[in] log_mode mtr logging mode */
+void
+btr_free_but_not_root(
+ buf_block_t* block,
+ mtr_log_t log_mode);
+
/** Free a persistent index tree if it exists.
@param[in] page_id root page id
@param[in] zip_size ROW_FORMAT=COMPRESSED page size, or 0
diff --git a/storage/innobase/include/dict0mem.h b/storage/innobase/include/dict0mem.h
index 731ff545685..1aed96e9c9c 100644
--- a/storage/innobase/include/dict0mem.h
+++ b/storage/innobase/include/dict0mem.h
@@ -1936,6 +1936,11 @@ struct dict_table_t {
char (&tbl_name)[NAME_LEN + 1],
size_t *db_name_len, size_t *tbl_name_len) const;
+ /** Assign n_stat_rows in dict_table_t */
+ void assign_stat_n_rows();
+
+ /** Empty the table */
+ void empty_table();
private:
/** Initialize instant->field_map.
@param[in] table table definition to copy from */
@@ -2314,6 +2319,10 @@ public:
/** mysql_row_templ_t for base columns used for compute the virtual
columns */
dict_vcol_templ_t* vc_templ;
+
+ /** Trx id of bulk operation. This is under the protection of
+ exclusive lock of table object */
+ trx_id_t bulk_trx_id;
};
inline void dict_index_t::set_modified(mtr_t& mtr) const
diff --git a/storage/innobase/include/trx0rec.h b/storage/innobase/include/trx0rec.h
index 9aeff6312f6..4af7a991a77 100644
--- a/storage/innobase/include/trx0rec.h
+++ b/storage/innobase/include/trx0rec.h
@@ -296,6 +296,7 @@ record */
fields of the record can change */
#define TRX_UNDO_DEL_MARK_REC 14 /* delete marking of a record; fields
do not change */
+#define TRX_UNDO_UNEMPTY 15 /* Empty the table */
#define TRX_UNDO_CMPL_INFO_MULT 16U /* compilation info is multiplied by
this and ORed to the type above */
#define TRX_UNDO_UPD_EXTERN 128U /* This bit can be ORed to type_cmpl
diff --git a/storage/innobase/lock/lock0lock.cc b/storage/innobase/lock/lock0lock.cc
index c24d1f12623..7bd645299a7 100644
--- a/storage/innobase/lock/lock0lock.cc
+++ b/storage/innobase/lock/lock0lock.cc
@@ -4283,6 +4283,9 @@ void lock_release(trx_t* trx)
all currently active transactions. */
table->query_cache_inv_trx_id = max_trx_id;
+ } else if (lock_get_mode(lock) == LOCK_X
+ && table->bulk_trx_id == trx->id) {
+ table->bulk_trx_id = 0;
}
lock_table_dequeue(lock);
diff --git a/storage/innobase/page/page0page.cc b/storage/innobase/page/page0page.cc
index fb6fc5858e3..6d09ae293f4 100644
--- a/storage/innobase/page/page0page.cc
+++ b/storage/innobase/page/page0page.cc
@@ -410,7 +410,6 @@ page_create_empty(
&& !index->table->is_temporary()
&& page_is_leaf(block->frame)) {
max_trx_id = page_get_max_trx_id(block->frame);
- ut_ad(max_trx_id);
} else if (block->page.id.page_no() == index->page) {
/* Preserve PAGE_ROOT_AUTO_INC. */
max_trx_id = page_get_max_trx_id(block->frame);
diff --git a/storage/innobase/row/row0purge.cc b/storage/innobase/row/row0purge.cc
index 9934ede605b..309f7abc18b 100644
--- a/storage/innobase/row/row0purge.cc
+++ b/storage/innobase/row/row0purge.cc
@@ -897,6 +897,7 @@ row_purge_parse_undo_rec(
switch (type) {
case TRX_UNDO_RENAME_TABLE:
return false;
+ case TRX_UNDO_UNEMPTY:
case TRX_UNDO_INSERT_METADATA:
case TRX_UNDO_INSERT_REC:
/* These records do not store any transaction identifier.
@@ -987,6 +988,9 @@ err_exit:
if (type == TRX_UNDO_INSERT_METADATA) {
node->ref = &trx_undo_metadata;
return(true);
+ } else if (type == TRX_UNDO_UNEMPTY) {
+ node->ref = nullptr;
+ return true;
}
ptr = trx_undo_rec_get_row_ref(ptr, clust_index, &(node->ref),
diff --git a/storage/innobase/row/row0uins.cc b/storage/innobase/row/row0uins.cc
index 63edbd9b86d..6329825defd 100644
--- a/storage/innobase/row/row0uins.cc
+++ b/storage/innobase/row/row0uins.cc
@@ -382,6 +382,7 @@ static bool row_undo_ins_parse_undo_rec(undo_node_t* node, bool dict_locked)
ut_ad("wrong undo record type" == 0);
goto close_table;
case TRX_UNDO_INSERT_METADATA:
+ case TRX_UNDO_UNEMPTY:
case TRX_UNDO_INSERT_REC:
break;
case TRX_UNDO_RENAME_TABLE:
@@ -425,8 +426,12 @@ close_table:
node->heap);
} else {
node->ref = &trx_undo_metadata;
+ if (node->rec_type == TRX_UNDO_UNEMPTY) {
+ return true;
+ }
}
+
if (!row_undo_search_clust_to_pcur(node)) {
/* An error probably occurred during
an insert into the clustered index,
@@ -588,6 +593,11 @@ row_undo_ins(
log_free_check();
ut_ad(!node->table->is_temporary());
err = row_undo_ins_remove_clust_rec(node);
+ break;
+ case TRX_UNDO_UNEMPTY:
+ node->table->empty_table();
+ err = DB_SUCCESS;
+ break;
}
dict_table_close(node->table, dict_locked, FALSE);
diff --git a/storage/innobase/row/row0undo.cc b/storage/innobase/row/row0undo.cc
index 8fca99a44b8..5ebca29681d 100644
--- a/storage/innobase/row/row0undo.cc
+++ b/storage/innobase/row/row0undo.cc
@@ -363,6 +363,7 @@ static bool row_undo_rec_get(undo_node_t* node)
switch (trx_undo_rec_get_type(node->undo_rec)) {
case TRX_UNDO_INSERT_METADATA:
+ case TRX_UNDO_UNEMPTY:
/* This record type was introduced in MDEV-11369
instant ADD COLUMN, which was implemented after
MDEV-12288 removed the insert_undo log. There is no
diff --git a/storage/innobase/trx/trx0rec.cc b/storage/innobase/trx/trx0rec.cc
index cda1bd6f22c..bcc64ee367d 100644
--- a/storage/innobase/trx/trx0rec.cc
+++ b/storage/innobase/trx/trx0rec.cc
@@ -393,6 +393,15 @@ trx_undo_page_report_insert(
*ptr++ = TRX_UNDO_INSERT_REC;
ptr += mach_u64_write_much_compressed(ptr, trx->undo_no);
ptr += mach_u64_write_much_compressed(ptr, index->table->id);
+
+ /* Table is in bulk operation */
+ if (index->table->bulk_trx_id == trx->id
+ && !index->table->is_temporary()) {
+ undo_block->frame[first_free + 2] = TRX_UNDO_UNEMPTY;
+ index->table->bulk_trx_id = trx->id;
+ goto done;
+ }
+
/*----------------------------------------*/
/* Store then the fields required to uniquely determine the record
to be inserted in the clustered index */
@@ -470,7 +479,7 @@ trx_undo_rec_get_pars(
type_cmpl &= ~TRX_UNDO_UPD_EXTERN;
*type = type_cmpl & (TRX_UNDO_CMPL_INFO_MULT - 1);
ut_ad(*type >= TRX_UNDO_RENAME_TABLE);
- ut_ad(*type <= TRX_UNDO_DEL_MARK_REC);
+ ut_ad(*type <= TRX_UNDO_UNEMPTY);
*cmpl_info = type_cmpl / TRX_UNDO_CMPL_INFO_MULT;
*undo_no = mach_read_next_much_compressed(&ptr);