From 6d4c121d98d9d221a2eb22436de8613287c38ac4 Mon Sep 17 00:00:00 2001 From: Thirunarayanan Balathandayuthapani Date: Thu, 4 Jun 2020 14:37:36 +0530 Subject: MDEV-515 innodb bulk insert - Introduced assign_stat_n_rows() in dict_table_t. It calculates the number of rows in the table and assign stat_n_rows during ha_innobase::open() - Introduced empty_table() in dict_table_t. Basically it empties all the indexes associated with table (not covered the fts index). This is undo operation of bulk operation. - Introduced new variable bulk_trx_id in dict_table_t. It stores the transaction id of bulk insert. Basically it is protected by exclusive lock of the table. - If table is empty then INSERT, INSERT..SELECT does take exclusive lock on the table. - Introduced new undo log record "TRX_UNDO_UNEMPTY". It should be first undo log during bulk operation. While rollback, if innodb encounters the undo record then it should empty the table. Limitations: =========== - InnoDB should write the undo log for consecutive insert during bulk operation - Parallel read should give empty table depends on bulk_trx_id. - Fix all test case failure in innodb suite - FTS index should be handled while rollback of bulk operation --- storage/innobase/btr/btr0btr.cc | 70 +++++++++++++++------------- storage/innobase/dict/dict0mem.cc | 86 +++++++++++++++++++++++++++++++++++ storage/innobase/fsp/fsp0fsp.cc | 4 +- storage/innobase/handler/ha_innodb.cc | 20 ++++++++ storage/innobase/include/btr0btr.h | 19 ++++++++ storage/innobase/include/dict0mem.h | 9 ++++ storage/innobase/include/trx0rec.h | 1 + storage/innobase/lock/lock0lock.cc | 3 ++ storage/innobase/page/page0page.cc | 1 - storage/innobase/row/row0purge.cc | 4 ++ storage/innobase/row/row0uins.cc | 10 ++++ storage/innobase/row/row0undo.cc | 1 + storage/innobase/trx/trx0rec.cc | 11 ++++- 13 files changed, 204 insertions(+), 35 deletions(-) diff --git a/storage/innobase/btr/btr0btr.cc b/storage/innobase/btr/btr0btr.cc index 10a2612c09f..e83b86fd55b 100644 --- a/storage/innobase/btr/btr0btr.cc +++ b/storage/innobase/btr/btr0btr.cc @@ -1030,6 +1030,44 @@ btr_free_root_check( return(block); } +/** Initialize the root page of the b-tree +@param[in,out] block root block +@param[in] index_id index id +@param[in] index index of root page +@param[in,out] mtr mini-transaction */ +void +btr_root_page_init(buf_block_t *block, index_id_t index_id, + dict_index_t *index, mtr_t *mtr) +{ + constexpr uint16_t field = PAGE_HEADER + PAGE_INDEX_ID; + byte* page_index_id = my_assume_aligned<2>(field + block->frame); + + /* Create a new index page on the allocated segment page */ + if (UNIV_LIKELY_NULL(block->page.zip.data)) + { + mach_write_to_8(page_index_id, index_id); + ut_ad(!page_has_siblings(block->page.zip.data)); + page_create_zip(block, index, 0, 0, mtr); + } + else + { + page_create(block, mtr, index && index->table->not_redundant()); + if (index && index->is_spatial()) + { + static_assert(((FIL_PAGE_INDEX & 0xff00) | byte(FIL_PAGE_RTREE)) + == FIL_PAGE_RTREE, "compatibility"); + mtr->write<1>(*block, FIL_PAGE_TYPE + 1 + block->frame, + byte(FIL_PAGE_RTREE)); + if (mach_read_from_8(block->frame + FIL_RTREE_SPLIT_SEQ_NUM)) + mtr->memset(block, FIL_RTREE_SPLIT_SEQ_NUM, 8, 0); + } + /* Set the level of the new index page */ + mtr->write<2,mtr_t::MAYBE_NOP>( + *block, PAGE_HEADER + PAGE_LEVEL + block->frame, 0U); + mtr->write<8,mtr_t::MAYBE_NOP>(*block, page_index_id, index_id); + } +} + /** Create the root node for a new index tree. @param[in] type type of the index @param[in] index_id index id @@ -1115,36 +1153,7 @@ btr_create( ut_ad(!page_has_siblings(block->frame)); - constexpr uint16_t field = PAGE_HEADER + PAGE_INDEX_ID; - - byte* page_index_id = my_assume_aligned<2>(field + block->frame); - - /* Create a new index page on the allocated segment page */ - if (UNIV_LIKELY_NULL(block->page.zip.data)) { - mach_write_to_8(page_index_id, index_id); - ut_ad(!page_has_siblings(block->page.zip.data)); - page_create_zip(block, index, 0, 0, mtr); - } else { - page_create(block, mtr, - index && index->table->not_redundant()); - if (index && index->is_spatial()) { - static_assert(((FIL_PAGE_INDEX & 0xff00) - | byte(FIL_PAGE_RTREE)) - == FIL_PAGE_RTREE, "compatibility"); - mtr->write<1>(*block, FIL_PAGE_TYPE + 1 + block->frame, - byte(FIL_PAGE_RTREE)); - if (mach_read_from_8(block->frame - + FIL_RTREE_SPLIT_SEQ_NUM)) { - mtr->memset(block, FIL_RTREE_SPLIT_SEQ_NUM, - 8, 0); - } - } - /* Set the level of the new index page */ - mtr->write<2,mtr_t::MAYBE_NOP>(*block, PAGE_HEADER + PAGE_LEVEL - + block->frame, 0U); - mtr->write<8,mtr_t::MAYBE_NOP>(*block, page_index_id, - index_id); - } + btr_root_page_init(block, index_id, index, mtr); /* We reset the free bits for the page in a separate mini-transaction to allow creation of several trees in the @@ -1172,7 +1181,6 @@ btr_create( this by calling btr_free_root. @param[in,out] block root page @param[in] log_mode mtr logging mode */ -static void btr_free_but_not_root( buf_block_t* block, diff --git a/storage/innobase/dict/dict0mem.cc b/storage/innobase/dict/dict0mem.cc index 22a77a7a220..3800ef3012a 100644 --- a/storage/innobase/dict/dict0mem.cc +++ b/storage/innobase/dict/dict0mem.cc @@ -39,6 +39,7 @@ Created 1/8/1996 Heikki Tuuri #include "row0row.h" #include "sql_string.h" #include +#include "btr0pcur.h" #define DICT_HEAP_SIZE 100 /*!< initial memory heap size when creating a table or index object */ @@ -1383,3 +1384,88 @@ dict_index_t::vers_history_row( } return(error); } + +void dict_table_t::empty_table() +{ + mtr_t mtr; + for (dict_index_t* index= UT_LIST_GET_FIRST(indexes); + index != NULL; index= UT_LIST_GET_NEXT(indexes, index)) + { + mtr.start(); + /* Free the indexes */ + buf_block_t* root_block= buf_page_get(page_id_t(space->id, index->page), + space->zip_size(), RW_X_LATCH, + &mtr); + if (root_block) + btr_free_but_not_root(root_block, mtr.get_log_mode()); + + mtr.set_named_space_id(space->id); + btr_root_page_init(root_block, index->id, index, &mtr); + if (!fseg_create(space, root_block->page.id.page_no(), + PAGE_HEADER + PAGE_BTR_SEG_LEAF, &mtr)) + { + ut_ad(0); + } + mtr.commit(); + } +} + +void dict_table_t::assign_stat_n_rows() +{ + if (!space) + return; + + dict_index_t* clust_index= dict_table_get_first_index(this); + mtr_t mtr; + btr_pcur_t pcur; + buf_block_t *block; + page_cur_t *cur; + const rec_t *rec; + bool next_page= false; + + mtr.start(); + btr_pcur_open_at_index_side(true, clust_index, BTR_SEARCH_LEAF, + &pcur, true, 0, &mtr); + btr_pcur_move_to_next_user_rec(&pcur, &mtr); + if (!rec_is_metadata(btr_pcur_get_rec(&pcur), *clust_index)) + btr_pcur_move_to_prev_on_page(&pcur); + ulint n_rows= 0; +scan_leaf: + cur= btr_pcur_get_page_cur(&pcur); + page_cur_move_to_next(cur); +next_page: + if (next_page) + { + uint32_t next_page_no= btr_page_get_next(page_cur_get_page(cur)); + if (next_page_no == FIL_NULL) + { + mtr.commit(); + stat_n_rows= n_rows; + return; + } + + next_page= false; + block= page_cur_get_block(cur); + block= btr_block_get(*clust_index, next_page_no, BTR_SEARCH_LEAF, false, + &mtr); + btr_leaf_page_release(page_cur_get_block(cur), BTR_SEARCH_LEAF, &mtr); + if (block == nullptr) + { + mtr.commit(); + return; + } + page_cur_set_before_first(block, cur); + page_cur_move_to_next(cur); + } + + rec= page_cur_get_rec(cur); + if (rec_get_deleted_flag(rec, dict_table_is_comp(this))); + else if (!page_rec_is_supremum(rec)) + n_rows++; + else + { + next_page= true; + goto next_page; + } + goto scan_leaf; +} diff --git a/storage/innobase/fsp/fsp0fsp.cc b/storage/innobase/fsp/fsp0fsp.cc index 9bab0fe355a..aa605973f06 100644 --- a/storage/innobase/fsp/fsp0fsp.cc +++ b/storage/innobase/fsp/fsp0fsp.cc @@ -1787,10 +1787,10 @@ fseg_create( FIL_PAGE_TYPE_SYS); } - mtr->write<2>(*block, byte_offset + FSEG_HDR_OFFSET + mtr->write<2,mtr_t::MAYBE_NOP>(*block, byte_offset + FSEG_HDR_OFFSET + block->frame, page_offset(inode)); - mtr->write<4>(*block, byte_offset + FSEG_HDR_PAGE_NO + mtr->write<4,mtr_t::MAYBE_NOP>(*block, byte_offset + FSEG_HDR_PAGE_NO + block->frame, iblock->page.id.page_no()); mtr->write<4,mtr_t::MAYBE_NOP>(*block, byte_offset + FSEG_HDR_SPACE diff --git a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc index bea63919532..b7b56799075 100644 --- a/storage/innobase/handler/ha_innodb.cc +++ b/storage/innobase/handler/ha_innodb.cc @@ -5882,6 +5882,8 @@ ha_innobase::open(const char* name, int, uint) } } + ib_table->assign_stat_n_rows(); + m_prebuilt = row_create_prebuilt(ib_table, table->s->reclength); m_prebuilt->default_rec = table->s->default_values; @@ -7665,6 +7667,11 @@ ha_innobase::write_row( trx_t* trx = thd_to_trx(m_user_thd); + if (!dict_table_get_n_rows(m_prebuilt->table)) { + // bulk index code + m_prebuilt->table->bulk_trx_id = trx->id; + } + /* Validation checks before we commence write_row operation. */ if (high_level_read_only) { ib_senderrf(ha_thd(), IB_LOG_LEVEL_WARN, ER_READ_ONLY_MODE); @@ -15948,6 +15955,19 @@ ha_innobase::external_lock( if (m_prebuilt->select_lock_type != LOCK_NONE) { + if (!dict_table_get_n_rows(m_prebuilt->table) + && (thd_sql_command(thd) == SQLCOM_INSERT + || thd_sql_command(thd) + == SQLCOM_INSERT_SELECT)) { + dberr_t error = row_lock_table(m_prebuilt); + + if (error != DB_SUCCESS) { + DBUG_RETURN( + convert_error_code_to_mysql( + error, 0, thd)); + } + } + if (thd_sql_command(thd) == SQLCOM_LOCK_TABLES && THDVAR(thd, table_locks) && thd_test_options(thd, OPTION_NOT_AUTOCOMMIT) diff --git a/storage/innobase/include/btr0btr.h b/storage/innobase/include/btr0btr.h index b02c65f3a31..23fd8077412 100644 --- a/storage/innobase/include/btr0btr.h +++ b/storage/innobase/include/btr0btr.h @@ -330,6 +330,16 @@ btr_node_ptr_get_child_page_no( const rec_offs* offsets)/*!< in: array returned by rec_get_offsets() */ MY_ATTRIBUTE((warn_unused_result)); + +/** Initialize the root page of the b-tree +@param[in,out] block root block +@param[in] index_id index id +@param[in] index index of root page +@param[in,out] mtr mini-transaction */ +void +btr_root_page_init(buf_block_t *block, index_id_t index_id, + dict_index_t *index, mtr_t *mtr); + /** Create the root node for a new index tree. @param[in] type type of the index @param[in,out] space tablespace where created @@ -346,6 +356,15 @@ btr_create( dict_index_t* index, mtr_t* mtr); +/** Free a B-tree except the root page. The root page MUST be freed after +this by calling btr_free_root. +@param[in,out] block root page +@param[in] log_mode mtr logging mode */ +void +btr_free_but_not_root( + buf_block_t* block, + mtr_log_t log_mode); + /** Free a persistent index tree if it exists. @param[in] page_id root page id @param[in] zip_size ROW_FORMAT=COMPRESSED page size, or 0 diff --git a/storage/innobase/include/dict0mem.h b/storage/innobase/include/dict0mem.h index 731ff545685..1aed96e9c9c 100644 --- a/storage/innobase/include/dict0mem.h +++ b/storage/innobase/include/dict0mem.h @@ -1936,6 +1936,11 @@ struct dict_table_t { char (&tbl_name)[NAME_LEN + 1], size_t *db_name_len, size_t *tbl_name_len) const; + /** Assign n_stat_rows in dict_table_t */ + void assign_stat_n_rows(); + + /** Empty the table */ + void empty_table(); private: /** Initialize instant->field_map. @param[in] table table definition to copy from */ @@ -2314,6 +2319,10 @@ public: /** mysql_row_templ_t for base columns used for compute the virtual columns */ dict_vcol_templ_t* vc_templ; + + /** Trx id of bulk operation. This is under the protection of + exclusive lock of table object */ + trx_id_t bulk_trx_id; }; inline void dict_index_t::set_modified(mtr_t& mtr) const diff --git a/storage/innobase/include/trx0rec.h b/storage/innobase/include/trx0rec.h index 9aeff6312f6..4af7a991a77 100644 --- a/storage/innobase/include/trx0rec.h +++ b/storage/innobase/include/trx0rec.h @@ -296,6 +296,7 @@ record */ fields of the record can change */ #define TRX_UNDO_DEL_MARK_REC 14 /* delete marking of a record; fields do not change */ +#define TRX_UNDO_UNEMPTY 15 /* Empty the table */ #define TRX_UNDO_CMPL_INFO_MULT 16U /* compilation info is multiplied by this and ORed to the type above */ #define TRX_UNDO_UPD_EXTERN 128U /* This bit can be ORed to type_cmpl diff --git a/storage/innobase/lock/lock0lock.cc b/storage/innobase/lock/lock0lock.cc index c24d1f12623..7bd645299a7 100644 --- a/storage/innobase/lock/lock0lock.cc +++ b/storage/innobase/lock/lock0lock.cc @@ -4283,6 +4283,9 @@ void lock_release(trx_t* trx) all currently active transactions. */ table->query_cache_inv_trx_id = max_trx_id; + } else if (lock_get_mode(lock) == LOCK_X + && table->bulk_trx_id == trx->id) { + table->bulk_trx_id = 0; } lock_table_dequeue(lock); diff --git a/storage/innobase/page/page0page.cc b/storage/innobase/page/page0page.cc index fb6fc5858e3..6d09ae293f4 100644 --- a/storage/innobase/page/page0page.cc +++ b/storage/innobase/page/page0page.cc @@ -410,7 +410,6 @@ page_create_empty( && !index->table->is_temporary() && page_is_leaf(block->frame)) { max_trx_id = page_get_max_trx_id(block->frame); - ut_ad(max_trx_id); } else if (block->page.id.page_no() == index->page) { /* Preserve PAGE_ROOT_AUTO_INC. */ max_trx_id = page_get_max_trx_id(block->frame); diff --git a/storage/innobase/row/row0purge.cc b/storage/innobase/row/row0purge.cc index 9934ede605b..309f7abc18b 100644 --- a/storage/innobase/row/row0purge.cc +++ b/storage/innobase/row/row0purge.cc @@ -897,6 +897,7 @@ row_purge_parse_undo_rec( switch (type) { case TRX_UNDO_RENAME_TABLE: return false; + case TRX_UNDO_UNEMPTY: case TRX_UNDO_INSERT_METADATA: case TRX_UNDO_INSERT_REC: /* These records do not store any transaction identifier. @@ -987,6 +988,9 @@ err_exit: if (type == TRX_UNDO_INSERT_METADATA) { node->ref = &trx_undo_metadata; return(true); + } else if (type == TRX_UNDO_UNEMPTY) { + node->ref = nullptr; + return true; } ptr = trx_undo_rec_get_row_ref(ptr, clust_index, &(node->ref), diff --git a/storage/innobase/row/row0uins.cc b/storage/innobase/row/row0uins.cc index 63edbd9b86d..6329825defd 100644 --- a/storage/innobase/row/row0uins.cc +++ b/storage/innobase/row/row0uins.cc @@ -382,6 +382,7 @@ static bool row_undo_ins_parse_undo_rec(undo_node_t* node, bool dict_locked) ut_ad("wrong undo record type" == 0); goto close_table; case TRX_UNDO_INSERT_METADATA: + case TRX_UNDO_UNEMPTY: case TRX_UNDO_INSERT_REC: break; case TRX_UNDO_RENAME_TABLE: @@ -425,8 +426,12 @@ close_table: node->heap); } else { node->ref = &trx_undo_metadata; + if (node->rec_type == TRX_UNDO_UNEMPTY) { + return true; + } } + if (!row_undo_search_clust_to_pcur(node)) { /* An error probably occurred during an insert into the clustered index, @@ -588,6 +593,11 @@ row_undo_ins( log_free_check(); ut_ad(!node->table->is_temporary()); err = row_undo_ins_remove_clust_rec(node); + break; + case TRX_UNDO_UNEMPTY: + node->table->empty_table(); + err = DB_SUCCESS; + break; } dict_table_close(node->table, dict_locked, FALSE); diff --git a/storage/innobase/row/row0undo.cc b/storage/innobase/row/row0undo.cc index 8fca99a44b8..5ebca29681d 100644 --- a/storage/innobase/row/row0undo.cc +++ b/storage/innobase/row/row0undo.cc @@ -363,6 +363,7 @@ static bool row_undo_rec_get(undo_node_t* node) switch (trx_undo_rec_get_type(node->undo_rec)) { case TRX_UNDO_INSERT_METADATA: + case TRX_UNDO_UNEMPTY: /* This record type was introduced in MDEV-11369 instant ADD COLUMN, which was implemented after MDEV-12288 removed the insert_undo log. There is no diff --git a/storage/innobase/trx/trx0rec.cc b/storage/innobase/trx/trx0rec.cc index cda1bd6f22c..bcc64ee367d 100644 --- a/storage/innobase/trx/trx0rec.cc +++ b/storage/innobase/trx/trx0rec.cc @@ -393,6 +393,15 @@ trx_undo_page_report_insert( *ptr++ = TRX_UNDO_INSERT_REC; ptr += mach_u64_write_much_compressed(ptr, trx->undo_no); ptr += mach_u64_write_much_compressed(ptr, index->table->id); + + /* Table is in bulk operation */ + if (index->table->bulk_trx_id == trx->id + && !index->table->is_temporary()) { + undo_block->frame[first_free + 2] = TRX_UNDO_UNEMPTY; + index->table->bulk_trx_id = trx->id; + goto done; + } + /*----------------------------------------*/ /* Store then the fields required to uniquely determine the record to be inserted in the clustered index */ @@ -470,7 +479,7 @@ trx_undo_rec_get_pars( type_cmpl &= ~TRX_UNDO_UPD_EXTERN; *type = type_cmpl & (TRX_UNDO_CMPL_INFO_MULT - 1); ut_ad(*type >= TRX_UNDO_RENAME_TABLE); - ut_ad(*type <= TRX_UNDO_DEL_MARK_REC); + ut_ad(*type <= TRX_UNDO_UNEMPTY); *cmpl_info = type_cmpl / TRX_UNDO_CMPL_INFO_MULT; *undo_no = mach_read_next_much_compressed(&ptr); -- cgit v1.2.1