diff options
Diffstat (limited to 'storage/innobase/row')
-rw-r--r-- | storage/innobase/row/row0ftsort.cc | 70 | ||||
-rw-r--r-- | storage/innobase/row/row0import.cc | 230 | ||||
-rw-r--r-- | storage/innobase/row/row0merge.cc | 106 | ||||
-rw-r--r-- | storage/innobase/row/row0mysql.cc | 38 | ||||
-rw-r--r-- | storage/innobase/row/row0purge.cc | 333 | ||||
-rw-r--r-- | storage/innobase/row/row0sel.cc | 218 | ||||
-rw-r--r-- | storage/innobase/row/row0uins.cc | 31 | ||||
-rw-r--r-- | storage/innobase/row/row0umod.cc | 49 | ||||
-rw-r--r-- | storage/innobase/row/row0undo.cc | 35 | ||||
-rw-r--r-- | storage/innobase/row/row0upd.cc | 5 | ||||
-rw-r--r-- | storage/innobase/row/row0vers.cc | 54 |
11 files changed, 392 insertions, 777 deletions
diff --git a/storage/innobase/row/row0ftsort.cc b/storage/innobase/row/row0ftsort.cc index 7e3b70bfadb..ef4db392866 100644 --- a/storage/innobase/row/row0ftsort.cc +++ b/storage/innobase/row/row0ftsort.cc @@ -216,7 +216,6 @@ row_fts_psort_info_init( common_info->trx = trx; common_info->all_info = psort_info; common_info->sort_event = os_event_create(0); - common_info->merge_event = os_event_create(0); common_info->opt_doc_id_size = opt_doc_id_size; if (log_tmp_is_encrypted()) { @@ -253,14 +252,9 @@ row_fts_psort_info_init( } /* Need to align memory for O_DIRECT write */ - psort_info[j].block_alloc[i] = - static_cast<row_merge_block_t*>(ut_malloc_nokey( - block_size + 1024)); - psort_info[j].merge_block[i] = static_cast<row_merge_block_t*>( - ut_align( - psort_info[j].block_alloc[i], 1024)); + aligned_malloc(block_size, 1024)); if (!psort_info[j].merge_block[i]) { ret = FALSE; @@ -270,23 +264,17 @@ row_fts_psort_info_init( /* If tablespace is encrypted, allocate additional buffer for encryption/decryption. */ if (encrypted) { - /* Need to align memory for O_DIRECT write */ - psort_info[j].crypt_alloc[i] = - static_cast<row_merge_block_t*>(ut_malloc_nokey( - block_size + 1024)); - psort_info[j].crypt_block[i] = static_cast<row_merge_block_t*>( - ut_align( - psort_info[j].crypt_alloc[i], 1024)); + aligned_malloc(block_size, + 1024)); if (!psort_info[j].crypt_block[i]) { ret = FALSE; goto func_exit; } } else { - psort_info[j].crypt_alloc[i] = NULL; psort_info[j].crypt_block[i] = NULL; } } @@ -338,19 +326,15 @@ row_fts_psort_info_destroy( psort_info[j].merge_file[i]); } - ut_free(psort_info[j].block_alloc[i]); + aligned_free(psort_info[j].merge_block[i]); ut_free(psort_info[j].merge_file[i]); - - if (psort_info[j].crypt_alloc[i]) { - ut_free(psort_info[j].crypt_alloc[i]); - } + aligned_free(psort_info[j].crypt_block[i]); } mutex_free(&psort_info[j].mutex); } os_event_destroy(merge_info[0].psort_common->sort_event); - os_event_destroy(merge_info[0].psort_common->merge_event); ut_free(merge_info[0].psort_common->dup); ut_free(merge_info[0].psort_common); ut_free(psort_info); @@ -754,10 +738,9 @@ row_merge_fts_get_next_doc_item( /*********************************************************************//** Function performs parallel tokenization of the incoming doc strings. It also performs the initial in memory sort of the parsed records. -@return OS_THREAD_DUMMY_RETURN */ +*/ static -os_thread_ret_t -DECLARE_THREAD(fts_parallel_tokenization)( +void fts_parallel_tokenization( /*======================*/ void* arg) /*!< in: psort_info for the thread */ { @@ -1032,12 +1015,12 @@ exit: crypt_block[i], table->space_id); if (error != DB_SUCCESS) { - os_file_close(tmpfd[i]); + row_merge_file_destroy_low(tmpfd[i]); goto func_exit; } total_rec += merge_file[i]->n_rec; - os_file_close(tmpfd[i]); + row_merge_file_destroy_low(tmpfd[i]); } func_exit: @@ -1065,10 +1048,6 @@ func_exit: psort_info->child_status = FTS_CHILD_COMPLETE; os_event_set(psort_info->psort_common->sort_event); psort_info->child_status = FTS_CHILD_EXITING; - - os_thread_exit(); - - OS_THREAD_DUMMY_RETURN; } /*********************************************************************//** @@ -1079,23 +1058,20 @@ row_fts_start_psort( fts_psort_t* psort_info) /*!< parallel sort structure */ { ulint i = 0; - os_thread_id_t thd_id; for (i = 0; i < fts_sort_pll_degree; i++) { psort_info[i].psort_id = i; - psort_info[i].thread_hdl = - os_thread_create(fts_parallel_tokenization, - (void*) &psort_info[i], - &thd_id); + psort_info[i].task = + new tpool::waitable_task(fts_parallel_tokenization,&psort_info[i]); + srv_thread_pool->submit_task(psort_info[i].task); } } /*********************************************************************//** -Function performs the merge and insertion of the sorted records. -@return OS_THREAD_DUMMY_RETURN */ +Function performs the merge and insertion of the sorted records. */ static -os_thread_ret_t -DECLARE_THREAD(fts_parallel_merge)( +void +fts_parallel_merge( /*===============*/ void* arg) /*!< in: parallel merge info */ { @@ -1109,14 +1085,6 @@ DECLARE_THREAD(fts_parallel_merge)( row_fts_merge_insert(psort_info->psort_common->dup->index, psort_info->psort_common->new_table, psort_info->psort_common->all_info, id); - - psort_info->child_status = FTS_CHILD_COMPLETE; - os_event_set(psort_info->psort_common->merge_event); - psort_info->child_status = FTS_CHILD_EXITING; - - os_thread_exit(false); - - OS_THREAD_DUMMY_RETURN; } /*********************************************************************//** @@ -1128,15 +1096,15 @@ row_fts_start_parallel_merge( { ulint i = 0; - /* Kick off merge/insert threads */ + /* Kick off merge/insert tasks */ for (i = 0; i < FTS_NUM_AUX_INDEX; i++) { merge_info[i].psort_id = i; merge_info[i].child_status = 0; - merge_info[i].thread_hdl = os_thread_create( + merge_info[i].task = new tpool::waitable_task( fts_parallel_merge, - (void*) &merge_info[i], - &merge_info[i].thread_hdl); + (void*) &merge_info[i]); + srv_thread_pool->submit_task(merge_info[i].task); } } diff --git a/storage/innobase/row/row0import.cc b/storage/innobase/row/row0import.cc index d161a8134aa..a31c7288121 100644 --- a/storage/innobase/row/row0import.cc +++ b/storage/innobase/row/row0import.cc @@ -227,6 +227,9 @@ public: RecIterator() UNIV_NOTHROW { memset(&m_cur, 0x0, sizeof(m_cur)); + /* Make page_cur_delete_rec() happy. */ + m_mtr.start(); + m_mtr.set_log_mode(MTR_LOG_NONE); } /** Position the cursor on the first user record. */ @@ -253,6 +256,8 @@ public: return(page_cur_get_rec(&m_cur)); } + buf_block_t* current_block() const { return m_cur.block; } + /** @return true if cursor is at the end */ bool end() UNIV_NOTHROW @@ -264,19 +269,44 @@ public: @return true on success */ bool remove( const dict_index_t* index, - page_zip_des_t* page_zip, offset_t* offsets) UNIV_NOTHROW { + ut_ad(page_is_leaf(m_cur.block->frame)); /* We can't end up with an empty page unless it is root. */ if (page_get_n_recs(m_cur.block->frame) <= 1) { return(false); } - return(page_delete_rec(index, &m_cur, page_zip, offsets)); + if (!rec_offs_any_extern(offsets) + && m_cur.block->page.id.page_no() != index->page + && ((page_get_data_size(m_cur.block->frame) + - rec_offs_size(offsets) + < BTR_CUR_PAGE_COMPRESS_LIMIT(index)) + || !page_has_siblings(m_cur.block->frame) + || (page_get_n_recs(m_cur.block->frame) < 2))) { + return false; + } + +#ifdef UNIV_ZIP_DEBUG + page_zip_des_t* page_zip = buf_block_get_page_zip(m_cur.block); + ut_a(!page_zip || page_zip_validate( + page_zip, m_cur.block->frame, index)); +#endif /* UNIV_ZIP_DEBUG */ + + page_cur_delete_rec(&m_cur, index, offsets, &m_mtr); + +#ifdef UNIV_ZIP_DEBUG + ut_a(!page_zip || page_zip_validate( + page_zip, m_cur.block->frame, index)); +#endif /* UNIV_ZIP_DEBUG */ + + return true; } private: page_cur_t m_cur; +public: + mtr_t m_mtr; }; /** Class that purges delete marked reocords from indexes, both secondary @@ -502,7 +532,7 @@ protected: const xdes_t* xdesc = xdes(page_no, m_xdes); ulint pos = page_no % FSP_EXTENT_SIZE; - return(xdes_get_bit(xdesc, XDES_FREE_BIT, pos)); + return xdes_is_free(xdesc, pos); } /* If the current xdes was free, the page must be free. */ @@ -796,14 +826,11 @@ public: AbstractCallback(trx, space_id), m_cfg(cfg), m_index(cfg->m_indexes), - m_current_lsn(log_get_lsn()), - m_page_zip_ptr(0), m_rec_iter(), m_offsets_(), m_offsets(m_offsets_), m_heap(0), m_cluster_index(dict_table_get_first_index(cfg->m_table)) { - ut_ad(m_current_lsn); rec_offs_init(m_offsets_); } @@ -824,9 +851,8 @@ private: @param block block read from file @param page_type type of the page @retval DB_SUCCESS or error code */ - dberr_t update_page( - buf_block_t* block, - ulint& page_type) UNIV_NOTHROW; + dberr_t update_page(buf_block_t* block, uint16_t& page_type) + UNIV_NOTHROW; /** Update the space, index id, trx id. @param block block to convert @@ -907,12 +933,6 @@ private: /** Current index whose pages are being imported */ row_index_t* m_index; - /** Current system LSN */ - lsn_t m_current_lsn; - - /** Alias for m_page_zip, only set for compressed pages. */ - page_zip_des_t* m_page_zip_ptr; - /** Iterator over records in a block */ RecIterator m_rec_iter; @@ -1564,8 +1584,7 @@ IndexPurge::next() UNIV_NOTHROW dict_index_t* index = m_pcur.btr_cur.index; buf_block_t* next_block = btr_block_get( - page_id_t(block->page.id.space(), next_page), - block->zip_size(), BTR_MODIFY_LEAF, index, + *index, next_page, BTR_MODIFY_LEAF, false, &m_mtr); if (UNIV_UNLIKELY(!next_block @@ -1674,9 +1693,10 @@ PageConverter::adjust_cluster_index_blob_column( mach_write_to_4(field, get_space_id()); - if (m_page_zip_ptr) { + if (UNIV_LIKELY_NULL(m_rec_iter.current_block()->page.zip.data)) { page_zip_write_blob_ptr( - m_page_zip_ptr, rec, m_cluster_index, offsets, i, 0); + m_rec_iter.current_block(), rec, m_cluster_index, + offsets, i, &m_rec_iter.m_mtr); } return(DB_SUCCESS); @@ -1747,7 +1767,7 @@ inline bool PageConverter::purge() UNIV_NOTHROW const dict_index_t* index = m_index->m_srv_index; /* We can't have a page that is empty and not root. */ - if (m_rec_iter.remove(index, m_page_zip_ptr, m_offsets)) { + if (m_rec_iter.remove(index, m_offsets)) { ++m_index->m_stats.m_n_purged; @@ -1778,11 +1798,13 @@ PageConverter::adjust_cluster_record( record. */ ulint trx_id_pos = m_cluster_index->n_uniq ? m_cluster_index->n_uniq : 1; - if (m_page_zip_ptr) { + if (UNIV_LIKELY_NULL(m_rec_iter.current_block() + ->page.zip.data)) { page_zip_write_trx_id_and_roll_ptr( - m_page_zip_ptr, rec, m_offsets, trx_id_pos, + &m_rec_iter.current_block()->page.zip, + rec, m_offsets, trx_id_pos, 0, roll_ptr_t(1) << ROLL_PTR_INSERT_FLAG_POS, - NULL); + &m_rec_iter.m_mtr); } else { ulint len; byte* ptr = rec_get_nth_field( @@ -1893,19 +1915,23 @@ PageConverter::update_index_page( } #ifdef UNIV_ZIP_DEBUG - ut_a(!is_compressed_table() - || page_zip_validate(m_page_zip_ptr, page, m_index->m_srv_index)); + ut_a(!block->page.zip.data || page_zip_validate(&block->page.zip, page, + m_index->m_srv_index)); #endif /* UNIV_ZIP_DEBUG */ /* This has to be written to uncompressed index header. Set it to the current index id. */ - btr_page_set_index_id( - page, m_page_zip_ptr, m_index->m_srv_index->id, 0); + mach_write_to_8(page + (PAGE_HEADER + PAGE_INDEX_ID), + m_index->m_srv_index->id); + if (UNIV_LIKELY_NULL(block->page.zip.data)) { + memcpy(&block->page.zip.data[PAGE_HEADER + PAGE_INDEX_ID], + &block->frame[PAGE_HEADER + PAGE_INDEX_ID], 8); + } - if (dict_index_is_clust(m_index->m_srv_index)) { - dict_index_t* index = const_cast<dict_index_t*>( - m_index->m_srv_index); - if (block->page.id.page_no() == index->page) { + if (m_index->m_srv_index->is_clust()) { + if (block->page.id.page_no() == m_index->m_srv_index->page) { + dict_index_t* index = const_cast<dict_index_t*>( + m_index->m_srv_index); /* Preserve the PAGE_ROOT_AUTO_INC. */ if (index->table->supports_instant()) { if (btr_cur_instant_root_init(index, page)) { @@ -1939,18 +1965,32 @@ PageConverter::update_index_page( } } } else { - /* Clear PAGE_MAX_TRX_ID so that it can be - used for other purposes in the future. IMPORT - in MySQL 5.6, 5.7 and MariaDB 10.0 and 10.1 - would set the field to the transaction ID even - on clustered index pages. */ - page_set_max_trx_id(block, m_page_zip_ptr, 0, NULL); + goto clear_page_max_trx_id; + } + } else if (page_is_leaf(page)) { + /* Set PAGE_MAX_TRX_ID on secondary index leaf pages. */ + mach_write_to_8(&block->frame[PAGE_HEADER + PAGE_MAX_TRX_ID], + m_trx->id); + if (UNIV_LIKELY_NULL(block->page.zip.data)) { + memcpy_aligned<8>(&block->page.zip.data + [PAGE_HEADER + PAGE_MAX_TRX_ID], + &block->frame + [PAGE_HEADER + PAGE_MAX_TRX_ID], 8); } } else { - /* Set PAGE_MAX_TRX_ID on secondary index leaf pages, - and clear it on non-leaf pages. */ - page_set_max_trx_id(block, m_page_zip_ptr, - page_is_leaf(page) ? m_trx->id : 0, NULL); +clear_page_max_trx_id: + /* Clear PAGE_MAX_TRX_ID so that it can be + used for other purposes in the future. IMPORT + in MySQL 5.6, 5.7 and MariaDB 10.0 and 10.1 + would set the field to the transaction ID even + on clustered index pages. */ + memset_aligned<8>(&block->frame[PAGE_HEADER + PAGE_MAX_TRX_ID], + 0, 8); + if (UNIV_LIKELY_NULL(block->page.zip.data)) { + memset_aligned<8>(&block->page.zip.data + [PAGE_HEADER + PAGE_MAX_TRX_ID], + 0, 8); + } } if (page_is_empty(page)) { @@ -1972,38 +2012,25 @@ PageConverter::update_index_page( /** Validate the space flags and update tablespace header page. @param block block read from file, not from the buffer pool. @retval DB_SUCCESS or error code */ -inline -dberr_t -PageConverter::update_header( - buf_block_t* block) UNIV_NOTHROW +inline dberr_t PageConverter::update_header(buf_block_t* block) UNIV_NOTHROW { - /* Check for valid header */ - switch (fsp_header_get_space_id(get_frame(block))) { - case 0: - return(DB_CORRUPTION); - case ULINT_UNDEFINED: - ib::warn() << "Space id check in the header failed: ignored"; - } - - mach_write_to_8( - get_frame(block) + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION, - m_current_lsn); - - /* Write back the adjusted flags. */ - mach_write_to_4(FSP_HEADER_OFFSET + FSP_SPACE_FLAGS - + get_frame(block), m_space_flags); - - /* Write space_id to the tablespace header, page 0. */ - mach_write_to_4( - get_frame(block) + FSP_HEADER_OFFSET + FSP_SPACE_ID, - get_space_id()); - - /* This is on every page in the tablespace. */ - mach_write_to_4( - get_frame(block) + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID, - get_space_id()); - - return(DB_SUCCESS); + byte *frame= get_frame(block); + if (memcmp_aligned<4>(FIL_PAGE_SPACE_ID + frame, + FSP_HEADER_OFFSET + FSP_SPACE_ID + frame, 4)) + ib::warn() << "Space id check in the header failed: ignored"; + else if (!mach_read_from_4(FIL_PAGE_SPACE_ID + frame)) + return DB_CORRUPTION; + + memset(frame + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION, 0, 8); + + /* Write space_id to the tablespace header, page 0. */ + mach_write_to_4(FIL_PAGE_SPACE_ID + frame, get_space_id()); + memcpy_aligned<4>(FSP_HEADER_OFFSET + FSP_SPACE_ID + frame, + FIL_PAGE_SPACE_ID + frame, 4); + /* Write back the adjusted flags. */ + mach_write_to_4(FSP_HEADER_OFFSET + FSP_SPACE_FLAGS + frame, m_space_flags); + + return DB_SUCCESS; } /** Update the page, set the space id, max trx id and index id. @@ -2011,20 +2038,13 @@ PageConverter::update_header( @retval DB_SUCCESS or error code */ inline dberr_t -PageConverter::update_page( - buf_block_t* block, - ulint& page_type) UNIV_NOTHROW +PageConverter::update_page(buf_block_t* block, uint16_t& page_type) + UNIV_NOTHROW { dberr_t err = DB_SUCCESS; ut_ad(!block->page.zip.data == !is_compressed_table()); - if (block->page.zip.data) { - m_page_zip_ptr = &block->page.zip; - } else { - ut_ad(!m_page_zip_ptr); - } - switch (page_type = fil_page_get_type(get_frame(block))) { case FIL_PAGE_TYPE_FSP_HDR: ut_a(block->page.id.page_no() == 0); @@ -2094,27 +2114,29 @@ dberr_t PageConverter::operator()(buf_block_t* block) UNIV_NOTHROW RW_NO_LATCH, NULL, BUF_EVICT_IF_IN_POOL, __FILE__, __LINE__, NULL, NULL); - ulint page_type; + uint16_t page_type; if (dberr_t err = update_page(block, page_type)) { return err; } const bool full_crc32 = fil_space_t::full_crc32(get_space_flags()); + byte* frame = get_frame(block); + compile_time_assert(FIL_PAGE_LSN % 8 == 0); + *reinterpret_cast<uint64_t*>(frame + FIL_PAGE_LSN)= 0; if (!block->page.zip.data) { buf_flush_init_for_writing( - NULL, block->frame, NULL, m_current_lsn, full_crc32); + NULL, block->frame, NULL, full_crc32); } else if (fil_page_type_is_index(page_type)) { buf_flush_init_for_writing( NULL, block->page.zip.data, &block->page.zip, - m_current_lsn, full_crc32); + full_crc32); } else { /* Calculate and update the checksum of non-index pages for ROW_FORMAT=COMPRESSED tables. */ buf_flush_update_zip_checksum( - block->page.zip.data, block->zip_size(), - m_current_lsn); + block->page.zip.data, block->zip_size()); } return DB_SUCCESS; @@ -3619,7 +3641,6 @@ not_encrypted: iter.crypt_data, block->page.id.space(), block->page.id.page_no(), - mach_read_from_8(src + FIL_PAGE_LSN), src, block->zip_size(), dest, full_crc32); @@ -3738,11 +3759,10 @@ fil_tablespace_iterate( /* Allocate a page to read in the tablespace header, so that we can determine the page size and zip_size (if it is compressed). - We allocate an extra page in case it is a compressed table. One - page is to ensure alignement. */ + We allocate an extra page in case it is a compressed table. */ - void* page_ptr = ut_malloc_nokey(3U << srv_page_size_shift); - byte* page = static_cast<byte*>(ut_align(page_ptr, srv_page_size)); + byte* page = static_cast<byte*>(aligned_malloc(2 * srv_page_size, + srv_page_size)); buf_block_t* block = reinterpret_cast<buf_block_t*> (ut_zalloc_nokey(sizeof *block)); @@ -3794,20 +3814,16 @@ fil_tablespace_iterate( iter.n_io_buffers = n_io_buffers; /* Add an extra page for compressed page scratch area. */ - void* io_buffer = ut_malloc_nokey( - (2 + iter.n_io_buffers) << srv_page_size_shift); - iter.io_buffer = static_cast<byte*>( - ut_align(io_buffer, srv_page_size)); + aligned_malloc((1 + iter.n_io_buffers) + << srv_page_size_shift, srv_page_size)); - void* crypt_io_buffer = NULL; - if (iter.crypt_data) { - crypt_io_buffer = ut_malloc_nokey( - (2 + iter.n_io_buffers) - << srv_page_size_shift); - iter.crypt_io_buffer = static_cast<byte*>( - ut_align(crypt_io_buffer, srv_page_size)); - } + iter.crypt_io_buffer = iter.crypt_data + ? static_cast<byte*>( + aligned_malloc((1 + iter.n_io_buffers) + << srv_page_size_shift, + srv_page_size)) + : NULL; if (block->page.zip.ssize) { ut_ad(iter.n_io_buffers == 1); @@ -3821,8 +3837,8 @@ fil_tablespace_iterate( fil_space_destroy_crypt_data(&iter.crypt_data); } - ut_free(crypt_io_buffer); - ut_free(io_buffer); + aligned_free(iter.crypt_io_buffer); + aligned_free(iter.io_buffer); } if (err == DB_SUCCESS) { @@ -3838,7 +3854,7 @@ fil_tablespace_iterate( os_file_close(file); - ut_free(page_ptr); + aligned_free(page); ut_free(filepath); ut_free(block); @@ -3867,7 +3883,7 @@ row_import_for_mysql( ut_ad(!table->is_temporary()); ut_ad(table->space_id); - ut_ad(table->space_id < SRV_LOG_SPACE_FIRST_ID); + ut_ad(table->space_id < SRV_SPACE_ID_UPPER_BOUND); ut_ad(prebuilt->trx); ut_ad(!table->is_readable()); diff --git a/storage/innobase/row/row0merge.cc b/storage/innobase/row/row0merge.cc index afa692fa139..7d3da9636f4 100644 --- a/storage/innobase/row/row0merge.cc +++ b/storage/innobase/row/row0merge.cc @@ -2025,11 +2025,8 @@ end_of_index: block = page_cur_get_block(cur); block = btr_block_get( - page_id_t(block->page.id.space(), - next_page_no), - block->zip_size(), - BTR_SEARCH_LEAF, - clust_index, &mtr); + *clust_index, next_page_no, + RW_S_LATCH, false, &mtr); btr_leaf_page_release(page_cur_get_block(cur), BTR_SEARCH_LEAF, &mtr); @@ -2771,10 +2768,6 @@ all_done: DEBUG_FTS_SORT_PRINT("FTS_SORT: Complete Scan Table\n"); #endif if (fts_pll_sort) { - bool all_exit = false; - ulint trial_count = 0; - const ulint max_trial_count = 10000; - wait_again: /* Check if error occurs in child thread */ for (ulint j = 0; j < fts_sort_pll_degree; j++) { @@ -2807,27 +2800,9 @@ wait_again: } } - /* Now all children should complete, wait a bit until - they all finish setting the event, before we free everything. - This has a 10 second timeout */ - do { - all_exit = true; - - for (ulint j = 0; j < fts_sort_pll_degree; j++) { - if (psort_info[j].child_status - != FTS_CHILD_EXITING) { - all_exit = false; - os_thread_sleep(1000); - break; - } - } - trial_count++; - } while (!all_exit && trial_count < max_trial_count); - - if (!all_exit) { - ib::fatal() << "Not all child sort threads exited" - " when creating FTS index '" - << fts_sort_idx->name << "'"; + for (ulint j = 0; j < fts_sort_pll_degree; j++) { + psort_info[j].task->wait(); + delete psort_info[j].task; } } @@ -4075,6 +4050,9 @@ pfs_os_file_t row_merge_file_create_low( const char* path) { +#ifdef WITH_INNODB_DISALLOW_WRITES + os_event_wait(srv_allow_writes_event); +#endif /* WITH_INNODB_DISALLOW_WRITES */ #ifdef UNIV_PFS_IO /* This temp file open does not go through normal file APIs, add instrumentation to register with @@ -4095,7 +4073,13 @@ row_merge_file_create_low( PSI_FILE_CREATE, path ? name : label, __FILE__, __LINE__); #endif - pfs_os_file_t fd = innobase_mysql_tmpfile(path); + DBUG_ASSERT(strlen(path) + 2 <= FN_REFLEN); + char filename[FN_REFLEN]; + File f = create_temp_file(filename, path, "ib", + O_BINARY | O_SEQUENTIAL, + MYF(MY_WME | MY_TEMPORARY)); + pfs_os_file_t fd = IF_WIN((os_file_t)my_get_osfhandle(f), f); + #ifdef UNIV_PFS_IO register_pfs_file_open_end(locker, fd, (fd == OS_FILE_CLOSED)?NULL:&fd); @@ -4140,7 +4124,9 @@ row_merge_file_destroy_low( const pfs_os_file_t& fd) /*!< in: merge file descriptor */ { if (fd != OS_FILE_CLOSED) { - os_file_close(fd); + int res = mysql_file_close(IF_WIN(my_win_handle2File((os_file_t)fd), fd), + MYF(MY_WME)); + ut_a(res != -1); } } /*********************************************************************//** @@ -4572,7 +4558,6 @@ row_merge_build_indexes( dict_index_t* fts_sort_idx = NULL; fts_psort_t* psort_info = NULL; fts_psort_t* merge_info = NULL; - int64_t sig_count = 0; bool fts_psort_initiated = false; double total_static_cost = 0; @@ -4739,65 +4724,14 @@ row_merge_build_indexes( } if (indexes[i]->type & DICT_FTS) { - os_event_t fts_parallel_merge_event; sort_idx = fts_sort_idx; - fts_parallel_merge_event - = merge_info[0].psort_common->merge_event; - if (FTS_PLL_MERGE) { - ulint trial_count = 0; - bool all_exit = false; - - os_event_reset(fts_parallel_merge_event); row_fts_start_parallel_merge(merge_info); -wait_again: - os_event_wait_time_low( - fts_parallel_merge_event, 1000000, - sig_count); - for (j = 0; j < FTS_NUM_AUX_INDEX; j++) { - if (merge_info[j].child_status - != FTS_CHILD_COMPLETE - && merge_info[j].child_status - != FTS_CHILD_EXITING) { - sig_count = os_event_reset( - fts_parallel_merge_event); - - goto wait_again; - } - } - - /* Now all children should complete, wait - a bit until they all finish using event */ - while (!all_exit && trial_count < 10000) { - all_exit = true; - - for (j = 0; j < FTS_NUM_AUX_INDEX; - j++) { - if (merge_info[j].child_status - != FTS_CHILD_EXITING) { - all_exit = false; - os_thread_sleep(1000); - break; - } - } - trial_count++; - } - - if (!all_exit) { - ib::error() << "Not all child merge" - " threads exited when creating" - " FTS index '" - << indexes[i]->name << "'"; - } else { - for (j = 0; j < FTS_NUM_AUX_INDEX; - j++) { - - os_thread_join(merge_info[j] - .thread_hdl); - } + merge_info[j].task->wait(); + delete merge_info[j].task; } } else { /* This cannot report duplicates; an diff --git a/storage/innobase/row/row0mysql.cc b/storage/innobase/row/row0mysql.cc index bcc877d3d1f..b20dd44f21b 100644 --- a/storage/innobase/row/row0mysql.cc +++ b/storage/innobase/row/row0mysql.cc @@ -1978,8 +1978,8 @@ error: DBUG_RETURN(err); } -/** This can only be used when srv_locks_unsafe_for_binlog is TRUE or this -session is using a READ COMMITTED or READ UNCOMMITTED isolation level. +/** This can only be used when the current transaction is at +READ COMMITTED or READ UNCOMMITTED isolation level. Before calling this function row_search_for_mysql() must have initialized prebuilt->new_rec_locks to store the information which new record locks really were set. This function removes a newly set @@ -2002,17 +2002,8 @@ row_unlock_for_mysql( ut_ad(prebuilt != NULL); ut_ad(trx != NULL); + ut_ad(trx->isolation_level <= TRX_ISO_READ_COMMITTED); - if (UNIV_UNLIKELY - (!srv_locks_unsafe_for_binlog - && trx->isolation_level > TRX_ISO_READ_COMMITTED)) { - - ib::error() << "Calling row_unlock_for_mysql though" - " innodb_locks_unsafe_for_binlog is FALSE and this" - " session is not using READ COMMITTED isolation" - " level."; - return; - } if (dict_index_is_spatial(prebuilt->index)) { return; } @@ -3382,19 +3373,6 @@ row_drop_table_for_mysql( table records yet. Thus it is safe to release and reacquire the data dictionary latches. */ if (table->fts) { - ut_ad(!table->fts->add_wq); - ut_ad(lock_trx_has_sys_table_locks(trx) == 0); - - for (;;) { - bool retry = false; - if (dict_fts_index_syncing(table)) { - retry = true; - } - if (!retry) { - break; - } - DICT_BG_YIELD(trx); - } row_mysql_unlock_data_dictionary(trx); fts_optimize_remove_table(table); row_mysql_lock_data_dictionary(trx); @@ -3444,7 +3422,7 @@ row_drop_table_for_mysql( dict_stats_recalc_pool_del(table); dict_stats_defrag_pool_del(table, NULL); - if (btr_defragment_thread_active) { + if (btr_defragment_active) { /* During fts_drop_orphaned_tables() in recv_recovery_rollback_active() the btr_defragment_mutex has not yet been @@ -3526,7 +3504,13 @@ row_drop_table_for_mysql( if (table->n_foreign_key_checks_running > 0) { defer: - if (!is_temp_name) { + /* Rename #sql2 to #sql-ib if table has open ref count + while dropping the table. This scenario can happen + when purge thread is waiting for dict_sys.mutex so + that it could close the table. But drop table acquires + dict_sys.mutex. */ + if (!is_temp_name + || strstr(table->name.m_name, "/#sql2")) { heap = mem_heap_create(FN_REFLEN); const char* tmp_name = dict_mem_create_temporary_tablename( diff --git a/storage/innobase/row/row0purge.cc b/storage/innobase/row/row0purge.cc index 61920532c29..36bbc6dbc0c 100644 --- a/storage/innobase/row/row0purge.cc +++ b/storage/innobase/row/row0purge.cc @@ -103,15 +103,13 @@ row_purge_remove_clust_if_poss_low( purge_node_t* node, /*!< in/out: row purge node */ ulint mode) /*!< in: BTR_MODIFY_LEAF or BTR_MODIFY_TREE */ { - ut_ad(rw_lock_own(&dict_sys.latch, RW_LOCK_S) - || node->vcol_info.is_used()); - dict_index_t* index = dict_table_get_first_index(node->table); log_free_check(); mtr_t mtr; mtr.start(); + index->set_modified(mtr); if (!row_purge_reposition_pcur(mode, node, &mtr)) { /* The record was already removed. */ @@ -119,9 +117,6 @@ row_purge_remove_clust_if_poss_low( return true; } - ut_d(const bool was_instant = !!index->table->instant); - index->set_modified(mtr); - rec_t* rec = btr_pcur_get_rec(&node->pcur); offset_t offsets_[REC_OFFS_NORMAL_SIZE]; rec_offs_init(offsets_); @@ -161,10 +156,6 @@ row_purge_remove_clust_if_poss_low( } } - /* Prove that dict_index_t::clear_instant_alter() was - not called with index->table->instant != NULL. */ - ut_ad(!was_instant || index->table->instant); - func_exit: if (heap) { mem_heap_free(heap); @@ -210,54 +201,6 @@ row_purge_remove_clust_if_poss( return(false); } -/** Tries to store secondary index cursor before openin mysql table for -virtual index condition computation. -@param[in,out] node row purge node -@param[in] index secondary index -@param[in,out] sec_pcur secondary index cursor -@param[in,out] sec_mtr mini-transaction which holds - secondary index entry */ -static void row_purge_store_vsec_cur( - purge_node_t* node, - dict_index_t* index, - btr_pcur_t* sec_pcur, - mtr_t* sec_mtr) -{ - row_purge_reposition_pcur(BTR_SEARCH_LEAF, node, sec_mtr); - - if (!node->found_clust) { - return; - } - - node->vcol_info.set_requested(); - - btr_pcur_store_position(sec_pcur, sec_mtr); - - btr_pcurs_commit_specify_mtr(&node->pcur, sec_pcur, sec_mtr); -} - -/** Tries to restore secondary index cursor after opening the mysql table -@param[in,out] node row purge node -@param[in] index secondary index -@param[in,out] sec_mtr mini-transaction which holds secondary index entry -@param[in] is_tree true=pessimistic purge, - false=optimistic (leaf-page only) -@return false in case of restore failure. */ -static bool row_purge_restore_vsec_cur( - purge_node_t* node, - dict_index_t* index, - btr_pcur_t* sec_pcur, - mtr_t* sec_mtr, - bool is_tree) -{ - sec_mtr->start(); - index->set_modified(*sec_mtr); - - return btr_pcur_restore_position( - is_tree ? BTR_PURGE_TREE : BTR_PURGE_LEAF, - sec_pcur, sec_mtr); -} - /** Determines if it is possible to remove a secondary index entry. Removal is possible if the secondary index entry does not refer to any not delete marked version of a clustered index record where DB_TRX_ID @@ -297,53 +240,13 @@ row_purge_poss_sec( ut_ad(!dict_index_is_clust(index)); - const bool store_cur = sec_mtr && !node->vcol_info.is_used() - && dict_index_has_virtual(index); - - if (store_cur) { - row_purge_store_vsec_cur(node, index, sec_pcur, sec_mtr); - ut_ad(sec_mtr->has_committed() - == node->vcol_info.is_requested()); - - /* The PRIMARY KEY value was not found in the clustered - index. The secondary index record found. We can purge - the secondary index record. */ - if (!node->vcol_info.is_requested()) { - ut_ad(!node->found_clust); - return true; - } - } - -retry_purge_sec: mtr_start(&mtr); can_delete = !row_purge_reposition_pcur(BTR_SEARCH_LEAF, node, &mtr) || !row_vers_old_has_index_entry(true, btr_pcur_get_rec(&node->pcur), &mtr, index, entry, - node->roll_ptr, node->trx_id, - &node->vcol_info); - - if (node->vcol_info.is_first_fetch()) { - ut_ad(store_cur); - - const TABLE* t= node->vcol_info.table(); - DBUG_LOG("purge", "retry " << t - << (is_tree ? " tree" : " leaf") - << index->name << "," << index->table->name - << ": " << rec_printer(entry).str()); - - ut_ad(mtr.has_committed()); - - if (t) { - node->vcol_info.set_used(); - goto retry_purge_sec; - } - - node->table = NULL; - sec_pcur = NULL; - return false; - } + node->roll_ptr, node->trx_id); /* Persistent cursor is closed if reposition fails. */ if (node->found_clust) { @@ -354,18 +257,6 @@ retry_purge_sec: ut_ad(mtr.has_committed()); - /* If the virtual column info is not used then reset the virtual column - info. */ - if (node->vcol_info.is_requested() - && !node->vcol_info.is_used()) { - node->vcol_info.reset(); - } - - if (store_cur && !row_purge_restore_vsec_cur( - node, index, sec_pcur, sec_mtr, is_tree)) { - return false; - } - return can_delete; } @@ -482,13 +373,6 @@ row_purge_remove_sec_if_poss_tree( } } - if (node->vcol_op_failed()) { - ut_ad(mtr.has_committed()); - ut_ad(!pcur.old_rec_buf); - ut_ad(pcur.pos_state == BTR_PCUR_NOT_POSITIONED); - return false; - } - func_exit: btr_pcur_close(&pcur); // FIXME: need this? func_exit_no_pcur: @@ -604,26 +488,21 @@ row_purge_remove_sec_if_poss_leaf( goto func_exit_no_pcur; } - if (dict_index_is_spatial(index)) { - const page_t* page; - const trx_t* trx = NULL; - - if (btr_cur->rtr_info != NULL - && btr_cur->rtr_info->thr != NULL) { - trx = thr_get_trx( - btr_cur->rtr_info->thr); - } - - page = btr_cur_get_page(btr_cur); - - if (!lock_test_prdt_page_lock( - trx, - page_get_space_id(page), - page_get_page_no(page)) - && page_get_n_recs(page) < 2 - && btr_cur_get_block(btr_cur) - ->page.id.page_no() != - dict_index_get_page(index)) { + if (index->is_spatial()) { + const buf_block_t* block = btr_cur_get_block( + btr_cur); + + if (block->page.id.page_no() + != index->page + && page_get_n_recs(block->frame) < 2 + && !lock_test_prdt_page_lock( + btr_cur->rtr_info + && btr_cur->rtr_info->thr + ? thr_get_trx( + btr_cur->rtr_info->thr) + : NULL, + block->page.id.space(), + block->page.id.page_no())) { /* this is the last record on page, and it has a "page" lock on it, which mean search is still depending @@ -631,8 +510,7 @@ row_purge_remove_sec_if_poss_leaf( DBUG_LOG("purge", "skip purging last" " record on page " - << btr_cur_get_block(btr_cur) - ->page.id); + << block->page.id); btr_pcur_close(&pcur); mtr.commit(); @@ -647,11 +525,6 @@ row_purge_remove_sec_if_poss_leaf( } } - if (node->vcol_op_failed()) { - btr_pcur_close(&pcur); - return false; - } - /* (The index entry is still needed, or the deletion succeeded) */ /* fall through */ @@ -698,10 +571,6 @@ row_purge_remove_sec_if_poss( return; } retry: - if (node->vcol_op_failed()) { - return; - } - success = row_purge_remove_sec_if_poss_tree(node, index, entry); /* The delete operation may fail if we have little file space left: TODO: easiest to crash the database @@ -768,12 +637,6 @@ row_purge_del_mark( node->row, NULL, node->index, heap, ROW_BUILD_FOR_PURGE); row_purge_remove_sec_if_poss(node, node->index, entry); - - if (node->vcol_op_failed()) { - mem_heap_free(heap); - return false; - } - mem_heap_empty(heap); } @@ -791,8 +654,6 @@ whose old history can no longer be observed. @param[in,out] mtr mini-transaction (will be started and committed) */ static void row_purge_reset_trx_id(purge_node_t* node, mtr_t* mtr) { - ut_ad(rw_lock_own(&dict_sys.latch, RW_LOCK_S) - || node->vcol_info.is_used()); /* Reset DB_TRX_ID, DB_ROLL_PTR for old records. */ mtr->start(); @@ -844,8 +705,15 @@ static void row_purge_reset_trx_id(purge_node_t* node, mtr_t* mtr) byte* ptr = rec_get_nth_field( rec, offsets, trx_id_pos, &len); ut_ad(len == DATA_TRX_ID_LEN); - mlog_write_string(ptr, reset_trx_id, - sizeof reset_trx_id, mtr); + buf_block_t* block = btr_pcur_get_block( + &node->pcur); + uint16_t offs = page_offset(ptr); + mtr->memset(block, offs, DATA_TRX_ID_LEN, 0); + offs += DATA_TRX_ID_LEN; + mtr->write<1,mtr_t::OPT>(*block, block->frame + + offs, 0x80U); + mtr->memset(block, offs + 1, + DATA_ROLL_PTR_LEN - 1, 0); } } } @@ -868,8 +736,6 @@ row_purge_upd_exist_or_extern_func( { mem_heap_t* heap; - ut_ad(rw_lock_own(&dict_sys.latch, RW_LOCK_S) - || node->vcol_info.is_used()); ut_ad(!node->table->skip_alter_undo); if (node->rec_type == TRX_UNDO_UPD_DEL_REC @@ -897,11 +763,6 @@ row_purge_upd_exist_or_extern_func( heap, ROW_BUILD_FOR_PURGE); row_purge_remove_sec_if_poss(node, node->index, entry); - if (node->vcol_op_failed()) { - ut_ad(!node->table); - mem_heap_free(heap); - return; - } ut_ad(node->table); mem_heap_empty(heap); @@ -924,12 +785,11 @@ skip_secondaries: if (dfield_is_ext(&ufield->new_val)) { trx_rseg_t* rseg; buf_block_t* block; - ulint internal_offset; byte* data_field; - ibool is_insert; + bool is_insert; ulint rseg_id; - ulint page_no; - ulint offset; + uint32_t page_no; + uint16_t offset; /* We use the fact that new_val points to undo_rec and get thus the offset of @@ -937,7 +797,7 @@ skip_secondaries: can calculate from node->roll_ptr the file address of the new_val data */ - internal_offset = ulint( + const uint16_t internal_offset = uint16_t( static_cast<const byte*> (dfield_get_data(&ufield->new_val)) - undo_rec); @@ -989,7 +849,7 @@ skip_secondaries: index, data_field + dfield_get_len(&ufield->new_val) - BTR_EXTERN_FIELD_REF_SIZE, - NULL, NULL, NULL, 0, false, &mtr); + NULL, NULL, block, 0, false, &mtr); mtr.commit(); } } @@ -1005,18 +865,20 @@ skip_secondaries: row_purge_upd_exist_or_extern_func(node,undo_rec) #endif /* UNIV_DEBUG */ -/***********************************************************//** -Parses the row reference and other info in a modify undo log record. +/** Parses the row reference and other info in a modify undo log record. +@param[in] node row undo node +@param[in] undo_rec record to purge +@param[in] thr query thread +@param[out] updated_extern true if an externally stored field was + updated @return true if purge operation required */ static bool row_purge_parse_undo_rec( -/*=====================*/ - purge_node_t* node, /*!< in: row undo node */ - trx_undo_rec_t* undo_rec, /*!< in: record to purge */ - bool* updated_extern, /*!< out: true if an externally - stored field was updated */ - que_thr_t* thr) /*!< in: query thread */ + purge_node_t* node, + trx_undo_rec_t* undo_rec, + que_thr_t* thr, + bool* updated_extern) { dict_index_t* clust_index; byte* ptr; @@ -1063,28 +925,27 @@ row_purge_parse_undo_rec( return false; } - /* Prevent DROP TABLE etc. from running when we are doing the purge - for this row */ + trx_id_t trx_id = TRX_ID_MAX; -try_again: - rw_lock_s_lock_inline(&dict_sys.latch, 0, __FILE__, __LINE__); + if (node->retain_mdl(table_id)) { + ut_ad(node->table != NULL); + goto already_locked; + } +try_again: node->table = dict_table_open_on_id( - table_id, FALSE, DICT_TABLE_OP_NORMAL); + table_id, false, DICT_TABLE_OP_NORMAL, node->purge_thd, + &node->mdl_ticket); - trx_id_t trx_id = TRX_ID_MAX; - - if (node->table == NULL) { - /* The table has been dropped: no need to do purge */ + if (node->table == NULL || node->table->name.is_temporary()) { + /* The table has been dropped: no need to do purge and + release mdl happened as a part of open process itself */ goto err_exit; } +already_locked: ut_ad(!node->table->is_temporary()); - if (!fil_table_accessible(node->table)) { - goto inaccessible; - } - switch (type) { case TRX_UNDO_INSERT_METADATA: case TRX_UNDO_INSERT_REC: @@ -1097,19 +958,13 @@ try_again: /* Need server fully up for virtual column computation */ if (!mysqld_server_started) { - dict_table_close(node->table, FALSE, FALSE); - rw_lock_s_unlock(&dict_sys.latch); + node->close_table(); if (srv_shutdown_state != SRV_SHUTDOWN_NONE) { return(false); } os_thread_sleep(1000000); goto try_again; } - - node->vcol_info.set_requested(); - node->vcol_info.set_used(); - node->vcol_info.set_table(innobase_init_vc_templ(node->table)); - node->vcol_info.set_used(); } clust_index = dict_table_get_first_index(node->table); @@ -1118,21 +973,20 @@ try_again: /* The table was corrupt in the data dictionary. dict_set_corrupted() works on an index, and we do not have an index to call it with. */ -inaccessible: DBUG_ASSERT(table_id == node->table->id); trx_id = node->table->def_trx_id; if (!trx_id) { trx_id = TRX_ID_MAX; } - dict_table_close(node->table, FALSE, FALSE); - node->table = NULL; err_exit: - rw_lock_s_unlock(&dict_sys.latch); + node->close_table(); node->skip(table_id, trx_id); return(false); } + node->last_table_id = table_id; + if (type == TRX_UNDO_INSERT_METADATA) { node->ref = &trx_undo_metadata; return(true); @@ -1165,20 +1019,21 @@ err_exit: return(true); } -/***********************************************************//** -Purges the parsed record. +/** Purges the parsed record. +@param[in] node row purge node +@param[in] undo_rec record to purge +@param[in] thr query thread +@param[in] updated_extern whether external columns were updated @return true if purged, false if skipped */ static MY_ATTRIBUTE((nonnull, warn_unused_result)) bool row_purge_record_func( -/*==================*/ - purge_node_t* node, /*!< in: row purge node */ - trx_undo_rec_t* undo_rec, /*!< in: record to purge */ + purge_node_t* node, + trx_undo_rec_t* undo_rec, #if defined UNIV_DEBUG || defined WITH_WSREP - const que_thr_t*thr, /*!< in: query thread */ + const que_thr_t*thr, #endif /* UNIV_DEBUG || WITH_WSREP */ - bool updated_extern) /*!< in: whether external columns - were updated */ + bool updated_extern) { dict_index_t* clust_index; bool purged = true; @@ -1226,11 +1081,6 @@ row_purge_record_func( node->found_clust = FALSE; } - if (node->table != NULL) { - dict_table_close(node->table, FALSE, FALSE); - node->table = NULL; - } - return(purged); } @@ -1258,20 +1108,13 @@ row_purge( bool updated_extern; while (row_purge_parse_undo_rec( - node, undo_rec, &updated_extern, thr)) { + node, undo_rec, thr, &updated_extern)) { bool purged = row_purge_record( node, undo_rec, thr, updated_extern); - if (!node->vcol_info.is_used()) { - rw_lock_s_unlock(&dict_sys.latch); - } - - ut_ad(!rw_lock_own(&dict_sys.latch, RW_LOCK_S)); - if (purged - || srv_shutdown_state != SRV_SHUTDOWN_NONE - || node->vcol_op_failed()) { + || srv_shutdown_state != SRV_SHUTDOWN_NONE) { return; } @@ -1311,48 +1154,22 @@ row_purge_step( node->start(); -#ifdef UNIV_DEBUG - srv_slot_t *slot = thr->thread_slot; - ut_ad(slot); - - rw_lock_x_lock(&slot->debug_sync_lock); - while (UT_LIST_GET_LEN(slot->debug_sync)) { - srv_slot_t::debug_sync_t *sync = - UT_LIST_GET_FIRST(slot->debug_sync); - const char* sync_str = reinterpret_cast<char*>(&sync[1]); - bool result = debug_sync_set_action(current_thd, - sync_str, - strlen(sync_str)); - ut_a(!result); - - UT_LIST_REMOVE(slot->debug_sync, sync); - ut_free(sync); - } - rw_lock_x_unlock(&slot->debug_sync_lock); -#endif - - if (!(node->undo_recs == NULL || ib_vector_is_empty(node->undo_recs))) { - trx_purge_rec_t*purge_rec; - - purge_rec = static_cast<trx_purge_rec_t*>( - ib_vector_pop(node->undo_recs)); + if (!node->undo_recs.empty()) { + trx_purge_rec_t purge_rec = node->undo_recs.front(); + node->undo_recs.pop(); + node->roll_ptr = purge_rec.roll_ptr; - node->roll_ptr = purge_rec->roll_ptr; + row_purge(node, purge_rec.undo_rec, thr); - row_purge(node, purge_rec->undo_rec, thr); - - if (ib_vector_is_empty(node->undo_recs)) { + if (node->undo_recs.empty()) { row_purge_end(thr); } else { thr->run_node = node; - node->vcol_info.reset(); } } else { row_purge_end(thr); } - innobase_reset_background_thd(thr_get_trx(thr)->mysql_thd); - return(thr); } diff --git a/storage/innobase/row/row0sel.cc b/storage/innobase/row/row0sel.cc index 7b6df752043..11471b5f703 100644 --- a/storage/innobase/row/row0sel.cc +++ b/storage/innobase/row/row0sel.cc @@ -963,12 +963,10 @@ row_sel_get_clust_rec( trx = thr_get_trx(thr); - /* If innodb_locks_unsafe_for_binlog option is used - or this session is using READ COMMITTED or lower isolation level + /* At READ UNCOMMITTED or READ COMMITTED isolation level we lock only the record, i.e., next-key locking is not used. */ - if (srv_locks_unsafe_for_binlog - || trx->isolation_level <= TRX_ISO_READ_COMMITTED) { + if (trx->isolation_level <= TRX_ISO_READ_COMMITTED) { lock_type = LOCK_REC_NOT_GAP; } else { lock_type = LOCK_ORDINARY; @@ -1738,16 +1736,11 @@ rec_loop: true, ULINT_UNDEFINED, &heap); - /* If innodb_locks_unsafe_for_binlog option is used - or this session is using READ COMMITTED or lower isolation - level, we lock only the record, i.e., next-key - locking is not used. */ - if (srv_locks_unsafe_for_binlog - || trx->isolation_level - <= TRX_ISO_READ_COMMITTED) { - + /* At READ UNCOMMITTED or READ COMMITTED + isolation level, we lock only the record, + i.e., next-key locking is not used. */ + if (trx->isolation_level <= TRX_ISO_READ_COMMITTED) { if (page_rec_is_supremum(next_rec)) { - goto skip_lock; } @@ -1805,12 +1798,10 @@ skip_lock: trx = thr_get_trx(thr); - /* If innodb_locks_unsafe_for_binlog option is used - or this session is using READ COMMITTED or lower isolation level, + /* At READ UNCOMMITTED or READ COMMITTED isolation level, we lock only the record, i.e., next-key locking is not used. */ - if (srv_locks_unsafe_for_binlog - || trx->isolation_level <= TRX_ISO_READ_COMMITTED + if (trx->isolation_level <= TRX_ISO_READ_COMMITTED || dict_index_is_spatial(index)) { if (page_rec_is_supremum(rec)) { @@ -2742,7 +2733,7 @@ row_sel_field_store_in_mysql_format_func( } /* Copy the actual data */ - ut_memcpy(dest, data, len); + memcpy(dest, data, len); /* Pad with trailing spaces. */ @@ -3627,7 +3618,7 @@ row_sel_copy_cached_field_for_mysql( len = templ->mysql_col_len; } - ut_memcpy(buf, cache, len); + memcpy(buf, cache, len); } /** Copy used fields from cached row. @@ -3694,7 +3685,7 @@ row_sel_dequeue_cached_row_for_mysql( UNIV_MEM_INVALID(buf, prebuilt->mysql_prefix_len); /* First copy the NULL bits. */ - ut_memcpy(buf, cached_rec, prebuilt->null_bitmap_len); + memcpy(buf, cached_rec, prebuilt->null_bitmap_len); /* Then copy the requested fields. */ for (i = 0; i < prebuilt->n_template; i++) { @@ -3711,7 +3702,7 @@ row_sel_dequeue_cached_row_for_mysql( buf, cached_rec, templ); } } else { - ut_memcpy(buf, cached_rec, prebuilt->mysql_prefix_len); + memcpy(buf, cached_rec, prebuilt->mysql_prefix_len); } prebuilt->n_fetch_cached--; @@ -3794,9 +3785,8 @@ row_sel_enqueue_cache_row_for_mysql( next fetch cache slot. */ if (prebuilt->pk_filter || prebuilt->idx_cond) { - byte* dest = row_sel_fetch_last_buf(prebuilt); - - ut_memcpy(dest, mysql_rec, prebuilt->mysql_row_len); + memcpy(row_sel_fetch_last_buf(prebuilt), mysql_rec, + prebuilt->mysql_row_len); } ++prebuilt->n_fetch_cached; @@ -4197,29 +4187,18 @@ row_search_mvcc( const rec_t* result_rec = NULL; const rec_t* clust_rec; Row_sel_get_clust_rec_for_mysql row_sel_get_clust_rec_for_mysql; - dberr_t err = DB_SUCCESS; ibool unique_search = FALSE; ibool mtr_has_extra_clust_latch = FALSE; ibool moves_up = FALSE; - ibool set_also_gap_locks = TRUE; - /* if the query is a plain locking SELECT, and the isolation level - is <= TRX_ISO_READ_COMMITTED, then this is set to FALSE */ - ibool did_semi_consistent_read = FALSE; /* if the returned record was locked and we did a semi-consistent read (fetch the newest committed version), then this is set to TRUE */ ulint next_offs; ibool same_user_rec; - mtr_t mtr; - mem_heap_t* heap = NULL; - offset_t offsets_[REC_OFFS_NORMAL_SIZE]; - offset_t* offsets = offsets_; ibool table_lock_waited = FALSE; byte* next_buf = 0; bool spatial_search = false; - rec_offs_init(offsets_); - ut_ad(index && pcur && search_tuple); ut_a(prebuilt->magic_n == ROW_PREBUILT_ALLOCATED); ut_a(prebuilt->magic_n2 == ROW_PREBUILT_ALLOCATED); @@ -4253,8 +4232,8 @@ row_search_mvcc( && (prebuilt->read_just_key || prebuilt->m_read_virtual_key); - /* Reset the new record lock info if srv_locks_unsafe_for_binlog - is set or session is using a READ COMMITTED isolation level. Then + /* Reset the new record lock info if READ UNCOMMITTED or + READ COMMITED isolation level is used. Then we are able to remove the record locks set here on an individual row. */ prebuilt->new_rec_locks = 0; @@ -4297,20 +4276,18 @@ row_search_mvcc( row_sel_dequeue_cached_row_for_mysql(buf, prebuilt); prebuilt->n_rows_fetched++; - - err = DB_SUCCESS; - goto func_exit; + trx->op_info = ""; + DBUG_RETURN(DB_SUCCESS); } if (prebuilt->fetch_cache_first > 0 && prebuilt->fetch_cache_first < MYSQL_FETCH_CACHE_SIZE) { - +early_not_found: /* The previous returned row was popped from the fetch cache, but the cache was not full at the time of the popping: no more rows can exist in the result set */ - - err = DB_RECORD_NOT_FOUND; - goto func_exit; + trx->op_info = ""; + DBUG_RETURN(DB_RECORD_NOT_FOUND); } prebuilt->n_rows_fetched++; @@ -4354,22 +4331,28 @@ row_search_mvcc( if (UNIV_UNLIKELY(direction != 0 && !prebuilt->used_in_HANDLER)) { - - err = DB_RECORD_NOT_FOUND; - goto func_exit; + goto early_not_found; } } /* We don't support sequencial scan for Rtree index, because it is no meaning to do so. */ - if (dict_index_is_spatial(index) - && !RTREE_SEARCH_MODE(mode)) { - err = DB_END_OF_INDEX; - goto func_exit; + if (dict_index_is_spatial(index) && !RTREE_SEARCH_MODE(mode)) { + trx->op_info = ""; + DBUG_RETURN(DB_END_OF_INDEX); } + /* if the query is a plain locking SELECT, and the isolation level + is <= TRX_ISO_READ_COMMITTED, then this is set to FALSE */ + bool did_semi_consistent_read = false; + mtr_t mtr; mtr.start(); + mem_heap_t* heap = NULL; + offset_t offsets_[REC_OFFS_NORMAL_SIZE]; + offset_t* offsets = offsets_; + rec_offs_init(offsets_); + #ifdef BTR_CUR_HASH_ADAPT /*-------------------------------------------------------------*/ /* PHASE 2: Try fast adaptive hash index search if possible */ @@ -4399,6 +4382,7 @@ row_search_mvcc( let us try a search shortcut through the hash index. */ + dberr_t err = DB_SUCCESS; switch (row_sel_try_search_shortcut_for_mysql( &rec, prebuilt, &offsets, &heap, &mtr)) { @@ -4418,9 +4402,10 @@ row_search_mvcc( case ICP_OUT_OF_RANGE: case ICP_ABORTED_BY_USER: case ICP_ERROR: - goto shortcut_mismatch; + err = DB_RECORD_NOT_FOUND; + goto shortcut_done; case ICP_MATCH: - goto shortcut_match; + goto shortcut_done; } } @@ -4443,21 +4428,19 @@ row_search_mvcc( break; } - shortcut_match: - mtr.commit(); - - /* NOTE that we do NOT store the cursor - position */ - err = DB_SUCCESS; - goto func_exit; + goto shortcut_done; case SEL_EXHAUSTED: - shortcut_mismatch: + err = DB_RECORD_NOT_FOUND; + shortcut_done: mtr.commit(); + /* NOTE that we do NOT store the cursor position */ - err = DB_RECORD_NOT_FOUND; - goto func_exit; + trx->op_info = ""; + ut_ad(!sync_check_iterate(sync_check())); + ut_ad(!did_semi_consistent_read); + DBUG_RETURN(err); case SEL_RETRY: break; @@ -4495,22 +4478,16 @@ row_search_mvcc( || prebuilt->table->no_rollback() || srv_read_only_mode); - if (trx->isolation_level <= TRX_ISO_READ_COMMITTED - && prebuilt->select_lock_type != LOCK_NONE - && trx->mysql_thd != NULL - && thd_is_select(trx->mysql_thd)) { - /* It is a plain locking SELECT and the isolation - level is low: do not lock gaps */ - - set_also_gap_locks = FALSE; - } + /* Do not lock gaps for plain SELECT + at READ UNCOMMITTED or READ COMMITTED isolation level */ + const bool set_also_gap_locks = + prebuilt->select_lock_type != LOCK_NONE + && (trx->isolation_level > TRX_ISO_READ_COMMITTED + || !thd_is_select(trx->mysql_thd)) #ifdef WITH_WSREP - else if (wsrep_thd_skip_locking(trx->mysql_thd)) { - ut_ad(!strcmp(wsrep_get_sr_table_name(), - prebuilt->table->name.m_name)); - set_also_gap_locks = FALSE; - } + && !wsrep_thd_skip_locking(trx->mysql_thd) #endif /* WITH_WSREP */ + ; /* Note that if the search mode was GE or G, then the cursor naturally moves upward (in fetch next) in alphabetical order, @@ -4531,6 +4508,8 @@ row_search_mvcc( clust_index = dict_table_get_first_index(prebuilt->table); + dberr_t err = DB_SUCCESS; + /* Do some start-of-statement preparations */ if (prebuilt->table->no_rollback()) { @@ -4599,18 +4578,9 @@ wait_table_again: pcur->btr_cur.thr = thr; if (dict_index_is_spatial(index)) { - bool need_pred_lock; - - need_pred_lock = (set_also_gap_locks - && !(srv_locks_unsafe_for_binlog - || trx->isolation_level - <= TRX_ISO_READ_COMMITTED) - && prebuilt->select_lock_type - != LOCK_NONE); - if (!prebuilt->rtr_info) { prebuilt->rtr_info = rtr_create_rtr_info( - need_pred_lock, true, + set_also_gap_locks, true, btr_pcur_get_btr_cur(pcur), index); prebuilt->rtr_info->search_tuple = search_tuple; prebuilt->rtr_info->search_mode = mode; @@ -4619,7 +4589,7 @@ wait_table_again: } else { rtr_info_reinit_in_cursor( btr_pcur_get_btr_cur(pcur), - index, need_pred_lock); + index, set_also_gap_locks); prebuilt->rtr_info->search_tuple = search_tuple; prebuilt->rtr_info->search_mode = mode; } @@ -4640,11 +4610,8 @@ wait_table_again: ut_ad(page_rec_is_leaf(rec)); if (!moves_up - && !page_rec_is_supremum(rec) && set_also_gap_locks - && !(srv_locks_unsafe_for_binlog - || trx->isolation_level <= TRX_ISO_READ_COMMITTED) - && prebuilt->select_lock_type != LOCK_NONE + && !page_rec_is_supremum(rec) && !dict_index_is_spatial(index)) { /* Try to place a gap lock on the next index record @@ -4724,16 +4691,14 @@ rec_loop: if (page_rec_is_supremum(rec)) { if (set_also_gap_locks - && !(srv_locks_unsafe_for_binlog - || trx->isolation_level <= TRX_ISO_READ_COMMITTED) - && prebuilt->select_lock_type != LOCK_NONE + && trx->isolation_level > TRX_ISO_READ_COMMITTED && !dict_index_is_spatial(index)) { /* Try to place a lock on the index record */ - /* If innodb_locks_unsafe_for_binlog option is used - or this session is using a READ COMMITTED or lower isolation - level we do not lock gaps. Supremum record is really + /* If the transaction isolation level is + READ UNCOMMITTED or READ COMMITTED, + we do not lock gaps. Supremum record is really a gap and therefore we do not set locks there. */ offsets = rec_get_offsets(rec, index, offsets, true, @@ -4797,14 +4762,13 @@ wrong_offs: ib::error() << "Rec address " << static_cast<const void*>(rec) << ", buf block fix count " - << btr_cur_get_block( - btr_pcur_get_btr_cur(pcur))->page + << btr_pcur_get_block(pcur)->page .buf_fix_count; ib::error() << "Index corruption: rec offs " << page_offset(rec) << " next offs " << next_offs << ", page no " - << page_get_page_no(page_align(rec)) + << btr_pcur_get_block(pcur)->page.id.page_no() << ", index " << index->name << " of table " << index->table->name << ". Run CHECK TABLE. You may need to" @@ -4821,7 +4785,7 @@ wrong_offs: ib::info() << "Index corruption: rec offs " << page_offset(rec) << " next offs " << next_offs << ", page no " - << page_get_page_no(page_align(rec)) + << btr_pcur_get_block(pcur)->page.id.page_no() << ", index " << index->name << " of table " << index->table->name << ". We try to skip the rest of the page."; @@ -4849,7 +4813,7 @@ wrong_offs: ib::error() << "Index corruption: rec offs " << page_offset(rec) << " next offs " << next_offs << ", page no " - << page_get_page_no(page_align(rec)) + << btr_pcur_get_block(pcur)->page.id.page_no() << ", index " << index->name << " of table " << index->table->name << ". We try to skip the record."; @@ -4872,17 +4836,7 @@ wrong_offs: if (0 != cmp_dtuple_rec(search_tuple, rec, offsets)) { if (set_also_gap_locks - && !(srv_locks_unsafe_for_binlog - || trx->isolation_level - <= TRX_ISO_READ_COMMITTED) - && prebuilt->select_lock_type != LOCK_NONE && !dict_index_is_spatial(index)) { - - /* Try to place a gap lock on the index - record only if innodb_locks_unsafe_for_binlog - option is not set or this session is not - using a READ COMMITTED or lower isolation level. */ - err = sel_set_rec_lock( pcur, rec, index, offsets, @@ -4917,17 +4871,7 @@ wrong_offs: if (!cmp_dtuple_is_prefix_of_rec(search_tuple, rec, offsets)) { if (set_also_gap_locks - && !(srv_locks_unsafe_for_binlog - || trx->isolation_level - <= TRX_ISO_READ_COMMITTED) - && prebuilt->select_lock_type != LOCK_NONE && !dict_index_is_spatial(index)) { - - /* Try to place a gap lock on the index - record only if innodb_locks_unsafe_for_binlog - option is not set or this session is not - using a READ COMMITTED or lower isolation level. */ - err = sel_set_rec_lock( pcur, rec, index, offsets, @@ -4967,15 +4911,9 @@ wrong_offs: is a non-delete marked record, then it is enough to lock its existence with LOCK_REC_NOT_GAP. */ - /* If innodb_locks_unsafe_for_binlog option is used - or this session is using a READ COMMITTED isolation - level we lock only the record, i.e., next-key locking is - not used. */ - ulint lock_type; - if (srv_locks_unsafe_for_binlog - || trx->isolation_level <= TRX_ISO_READ_COMMITTED) { + if (trx->isolation_level <= TRX_ISO_READ_COMMITTED) { /* At READ COMMITTED or READ UNCOMMITTED isolation levels, do not lock committed delete-marked records. */ @@ -5056,9 +4994,7 @@ no_gap_lock: switch (err) { const rec_t* old_vers; case DB_SUCCESS_LOCKED_REC: - if (srv_locks_unsafe_for_binlog - || trx->isolation_level - <= TRX_ISO_READ_COMMITTED) { + if (trx->isolation_level <= TRX_ISO_READ_COMMITTED) { /* Note that a record of prebuilt->index was locked. */ prebuilt->new_rec_locks = 1; @@ -5121,7 +5057,7 @@ no_gap_lock: goto next_rec; } - did_semi_consistent_read = TRUE; + did_semi_consistent_read = true; rec = old_vers; break; case DB_RECORD_NOT_FOUND: @@ -5308,9 +5244,7 @@ requires_clust_rec: break; case DB_SUCCESS_LOCKED_REC: ut_a(clust_rec != NULL); - if (srv_locks_unsafe_for_binlog - || trx->isolation_level - <= TRX_ISO_READ_COMMITTED) { + if (trx->isolation_level <= TRX_ISO_READ_COMMITTED) { /* Note that the clustered index record was locked. */ prebuilt->new_rec_locks = 2; @@ -5326,8 +5260,7 @@ requires_clust_rec: /* The record is delete marked: we can skip it */ - if ((srv_locks_unsafe_for_binlog - || trx->isolation_level <= TRX_ISO_READ_COMMITTED) + if (trx->isolation_level <= TRX_ISO_READ_COMMITTED && prebuilt->select_lock_type != LOCK_NONE) { /* No need to keep a lock on a delete-marked @@ -5551,7 +5484,7 @@ next_rec: == ROW_READ_DID_SEMI_CONSISTENT)) { prebuilt->row_read_type = ROW_READ_TRY_SEMI_CONSISTENT; } - did_semi_consistent_read = FALSE; + did_semi_consistent_read = false; prebuilt->new_rec_locks = 0; vrow = NULL; @@ -5650,7 +5583,7 @@ page_read_error: == ROW_READ_DID_SEMI_CONSISTENT)) { prebuilt->row_read_type = ROW_READ_TRY_SEMI_CONSISTENT; } - did_semi_consistent_read = FALSE; + did_semi_consistent_read = false; lock_table_wait: mtr.commit(); @@ -5686,8 +5619,7 @@ lock_table_wait: moves_up, &mtr); } - if ((srv_locks_unsafe_for_binlog - || trx->isolation_level <= TRX_ISO_READ_COMMITTED) + if (trx->isolation_level <= TRX_ISO_READ_COMMITTED && !same_user_rec) { /* Since we were not able to restore the cursor diff --git a/storage/innobase/row/row0uins.cc b/storage/innobase/row/row0uins.cc index 5ba9de18ae9..74c0b51fbbf 100644 --- a/storage/innobase/row/row0uins.cc +++ b/storage/innobase/row/row0uins.cc @@ -130,8 +130,7 @@ row_undo_ins_remove_clust_rec( == RW_X_LATCH); ut_ad(node->rec_type == TRX_UNDO_INSERT_REC); - dict_drop_index_tree(rec, &node->pcur, node->trx, - &mtr); + dict_drop_index_tree(&node->pcur, node->trx, &mtr); mtr.commit(); mtr.start(); @@ -208,25 +207,29 @@ func_exit: /* When rolling back the very first instant ADD COLUMN operation, reset the root page to the basic state. */ ut_ad(!index->table->is_temporary()); - if (page_t* root = btr_root_get(index, &mtr)) { - byte* page_type = root + FIL_PAGE_TYPE; + if (buf_block_t* root = btr_root_block_get(index, RW_SX_LATCH, + &mtr)) { + byte* page_type = root->frame + FIL_PAGE_TYPE; ut_ad(mach_read_from_2(page_type) == FIL_PAGE_TYPE_INSTANT || mach_read_from_2(page_type) == FIL_PAGE_INDEX); - mlog_write_ulint(page_type, FIL_PAGE_INDEX, - MLOG_2BYTES, &mtr); - byte* instant = PAGE_INSTANT + PAGE_HEADER + root; - mlog_write_ulint(instant, - page_ptr_get_direction(instant + 1), - MLOG_2BYTES, &mtr); - rec_t* infimum = page_get_infimum_rec(root); - rec_t* supremum = page_get_supremum_rec(root); + mtr.write<2,mtr_t::OPT>(*root, page_type, + FIL_PAGE_INDEX); + byte* instant = PAGE_INSTANT + PAGE_HEADER + + root->frame; + mtr.write<2,mtr_t::OPT>( + *root, instant, + page_ptr_get_direction(instant + 1)); + rec_t* infimum = page_get_infimum_rec(root->frame); + rec_t* supremum = page_get_supremum_rec(root->frame); static const byte str[8 + 8] = "supremuminfimum"; if (memcmp(infimum, str + 8, 8) || memcmp(supremum, str, 8)) { - mlog_write_string(infimum, str + 8, 8, &mtr); - mlog_write_string(supremum, str, 8, &mtr); + mtr.memcpy(root, page_offset(infimum), + str + 8, 8); + mtr.memcpy(root, page_offset(supremum), + str, 8); } } } diff --git a/storage/innobase/row/row0umod.cc b/storage/innobase/row/row0umod.cc index e028e0ccb56..cbf3a9d1726 100644 --- a/storage/innobase/row/row0umod.cc +++ b/storage/innobase/row/row0umod.cc @@ -156,26 +156,27 @@ row_undo_mod_clust_low( && node->ref == &trx_undo_metadata && btr_cur_get_index(btr_cur)->table->instant && node->update->info_bits == REC_INFO_METADATA_ADD) { - if (page_t* root = btr_root_get( - btr_cur_get_index(btr_cur), mtr)) { - byte* infimum; - byte *supremum; - if (page_is_comp(root)) { - infimum = PAGE_NEW_INFIMUM + root; - supremum = PAGE_NEW_SUPREMUM + root; + if (buf_block_t* root = btr_root_block_get( + btr_cur_get_index(btr_cur), RW_SX_LATCH, + mtr)) { + uint16_t infimum, supremum; + if (page_is_comp(root->frame)) { + infimum = PAGE_NEW_INFIMUM; + supremum = PAGE_NEW_SUPREMUM; } else { - infimum = PAGE_OLD_INFIMUM + root; - supremum = PAGE_OLD_SUPREMUM + root; + infimum = PAGE_OLD_INFIMUM; + supremum = PAGE_OLD_SUPREMUM; } - ut_ad(!memcmp(infimum, INFIMUM, 8) - == !memcmp(supremum, SUPREMUM, 8)); + ut_ad(!memcmp(root->frame + infimum, + INFIMUM, 8) + == !memcmp(root->frame + supremum, + SUPREMUM, 8)); - if (memcmp(infimum, INFIMUM, 8)) { - mlog_write_string(infimum, INFIMUM, - 8, mtr); - mlog_write_string(supremum, SUPREMUM, - 8, mtr); + if (memcmp(root->frame + infimum, INFIMUM, 8)) { + mtr->memcpy(root, infimum, INFIMUM, 8); + mtr->memcpy(root, supremum, SUPREMUM, + 8); } } } @@ -459,7 +460,7 @@ row_undo_mod_clust( if (trx_id_offset) { } else if (rec_is_metadata(rec, *index)) { ut_ad(!buf_block_get_page_zip(btr_pcur_get_block( - &node->pcur))); + pcur))); for (unsigned i = index->first_user_field(); i--; ) { trx_id_offset += index->fields[i].fixed_len; } @@ -481,15 +482,21 @@ row_undo_mod_clust( || rec_is_alter_metadata(rec, *index)); index->set_modified(mtr); if (page_zip_des_t* page_zip = buf_block_get_page_zip( - btr_pcur_get_block(&node->pcur))) { + btr_pcur_get_block(pcur))) { page_zip_write_trx_id_and_roll_ptr( page_zip, rec, offsets, trx_id_pos, 0, 1ULL << ROLL_PTR_INSERT_FLAG_POS, &mtr); } else { - mlog_write_string(rec + trx_id_offset, - reset_trx_id, - sizeof reset_trx_id, &mtr); + buf_block_t* block = btr_pcur_get_block(pcur); + uint16_t offs = page_offset(rec + + trx_id_offset); + mtr.memset(block, offs, DATA_TRX_ID_LEN, 0); + offs += DATA_TRX_ID_LEN; + mtr.write<1,mtr_t::OPT>(*block, block->frame + + offs, 0x80U); + mtr.memset(block, offs + 1, + DATA_ROLL_PTR_LEN - 1, 0); } } } else { diff --git a/storage/innobase/row/row0undo.cc b/storage/innobase/row/row0undo.cc index 1cabeb3542b..9bf90f0c3c5 100644 --- a/storage/innobase/row/row0undo.cc +++ b/storage/innobase/row/row0undo.cc @@ -335,37 +335,30 @@ static bool row_undo_rec_get(undo_node_t* node) mtr_t mtr; mtr.start(); - page_t* undo_page = trx_undo_page_get_s_latched( + buf_block_t* undo_page = trx_undo_page_get_s_latched( page_id_t(undo->rseg->space->id, undo->top_page_no), &mtr); - ulint offset = undo->top_offset; - - trx_undo_rec_t* prev_rec = trx_undo_get_prev_rec( - undo_page + offset, undo->hdr_page_no, undo->hdr_offset, - true, &mtr); - - if (prev_rec == NULL) { - undo->top_undo_no = IB_ID_MAX; - ut_ad(undo->empty()); - } else { - page_t* prev_rec_page = page_align(prev_rec); - - if (prev_rec_page != undo_page) { + uint16_t offset = undo->top_offset; + buf_block_t* prev_page = undo_page; + if (trx_undo_rec_t* prev_rec = trx_undo_get_prev_rec( + prev_page, offset, undo->hdr_page_no, undo->hdr_offset, + true, &mtr)) { + if (prev_page != undo_page) { trx->pages_undone++; } - undo->top_page_no = page_get_page_no(prev_rec_page); - undo->top_offset = ulint(prev_rec - prev_rec_page); + undo->top_page_no = prev_page->page.id.page_no(); + undo->top_offset = page_offset(prev_rec); undo->top_undo_no = trx_undo_rec_get_undo_no(prev_rec); ut_ad(!undo->empty()); + } else { + undo->top_undo_no = IB_ID_MAX; + ut_ad(undo->empty()); } - { - const trx_undo_rec_t* undo_rec = undo_page + offset; - node->undo_rec = trx_undo_rec_copy(undo_rec, node->heap); - } - + node->undo_rec = trx_undo_rec_copy(undo_page->frame + offset, + node->heap); mtr.commit(); switch (trx_undo_rec_get_type(node->undo_rec)) { diff --git a/storage/innobase/row/row0upd.cc b/storage/innobase/row/row0upd.cc index 6ddd4046f09..1d776ab35fb 100644 --- a/storage/innobase/row/row0upd.cc +++ b/storage/innobase/row/row0upd.cc @@ -2755,7 +2755,7 @@ err_exit: insert fails, then this disown will be undone when the operation is rolled back. */ btr_cur_disown_inherited_fields( - btr_cur_get_page_zip(btr_cur), + btr_cur_get_block(btr_cur), rec, index, offsets, node->update, mtr); } @@ -3124,8 +3124,7 @@ row_upd_clust_step( ut_ad(!dict_index_is_online_ddl(index)); - dict_drop_index_tree( - btr_pcur_get_rec(pcur), pcur, trx, &mtr); + dict_drop_index_tree(pcur, trx, &mtr); mtr.commit(); diff --git a/storage/innobase/row/row0vers.cc b/storage/innobase/row/row0vers.cc index 5935b0a11ca..1d38a6753be 100644 --- a/storage/innobase/row/row0vers.cc +++ b/storage/innobase/row/row0vers.cc @@ -441,16 +441,14 @@ row_vers_impl_x_locked( @param[in,out] row the cluster index row in dtuple form @param[in] clust_index clustered index @param[in] index the secondary index -@param[in] heap heap used to build virtual dtuple -@param[in,out] vcol_info virtual column information. */ +@param[in] heap heap used to build virtual dtuple. */ static void row_vers_build_clust_v_col( dtuple_t* row, dict_index_t* clust_index, dict_index_t* index, - mem_heap_t* heap, - purge_vcol_info_t* vcol_info) + mem_heap_t* heap) { mem_heap_t* local_heap = NULL; VCOL_STORAGE *vcol_storage= NULL; @@ -461,10 +459,6 @@ row_vers_build_clust_v_col( ut_ad(dict_index_has_virtual(index)); ut_ad(index->table == clust_index->table); - if (vcol_info != NULL) { - vcol_info->set_used(); - maria_table = vcol_info->table(); - } DEBUG_SYNC(current_thd, "ib_clust_v_col_before_row_allocated"); innobase_allocate_row_for_vcol(thd, index, @@ -473,10 +467,7 @@ row_vers_build_clust_v_col( &record, &vcol_storage); - if (vcol_info && !vcol_info->table()) { - vcol_info->set_table(maria_table); - goto func_exit; - } + ut_ad(maria_table); for (ulint i = 0; i < dict_index_get_n_fields(index); i++) { const dict_field_t* ind_field = dict_index_get_nth_field( @@ -495,7 +486,6 @@ row_vers_build_clust_v_col( } } -func_exit: if (local_heap) { if (vcol_storage) innobase_free_row_for_vcol(vcol_storage); @@ -803,7 +793,6 @@ func_exit: @param[in,out] heap heap memory @param[in,out] v_heap heap memory to keep virtual colum dtuple @param[in] mtr mtr holding the latch on rec -@param[in,out] vcol_info virtual column information for purge thread @return dtuple contains virtual column data */ static dtuple_t* @@ -817,8 +806,7 @@ row_vers_build_cur_vrow( trx_id_t trx_id, mem_heap_t* heap, mem_heap_t* v_heap, - mtr_t* mtr, - purge_vcol_info_t* vcol_info) + mtr_t* mtr) { dtuple_t* cur_vrow = NULL; @@ -838,16 +826,8 @@ row_vers_build_cur_vrow( rec, *clust_offsets, NULL, NULL, NULL, NULL, heap); - if (vcol_info && !vcol_info->is_used()) { - mtr->commit(); - } - row_vers_build_clust_v_col( - row, clust_index, index, heap, vcol_info); - - if (vcol_info != NULL && vcol_info->is_first_fetch()) { - return NULL; - } + row, clust_index, index, heap); cur_vrow = dtuple_copy(row, v_heap); dtuple_dup_v_fld(cur_vrow, v_heap); @@ -879,7 +859,6 @@ this case we return TRUE. @param[in] ientry secondary index entry @param[in] roll_ptr roll_ptr for the purge record @param[in] trx_id transaction ID on the purging record -@param[in,out] vcol_info virtual column information for purge thread. @return TRUE if earlier version should have */ bool row_vers_old_has_index_entry( @@ -889,8 +868,7 @@ row_vers_old_has_index_entry( dict_index_t* index, const dtuple_t* ientry, roll_ptr_t roll_ptr, - trx_id_t trx_id, - purge_vcol_info_t* vcol_info) + trx_id_t trx_id) { const rec_t* version; rec_t* prev_version; @@ -907,9 +885,6 @@ row_vers_old_has_index_entry( ut_ad(mtr_memo_contains_page_flagged(mtr, rec, MTR_MEMO_PAGE_X_FIX | MTR_MEMO_PAGE_S_FIX)); - ut_ad(!rw_lock_own(&purge_sys.latch, RW_LOCK_S)); - ut_ad(also_curr || !vcol_info); - clust_index = dict_table_get_first_index(index->table); comp = page_rec_is_comp(rec); @@ -960,17 +935,8 @@ row_vers_old_has_index_entry( if (trx_undo_roll_ptr_is_insert(t_roll_ptr) || dbug_v_purge) { - if (vcol_info && !vcol_info->is_used()) { - mtr->commit(); - } - row_vers_build_clust_v_col( - row, clust_index, index, heap, - vcol_info); - - if (vcol_info && vcol_info->is_first_fetch()) { - goto unsafe_to_purge; - } + row, clust_index, index, heap); entry = row_build_index_entry( row, ext, index, heap); @@ -1046,11 +1012,7 @@ unsafe_to_purge: cur_vrow = row_vers_build_cur_vrow( also_curr, rec, clust_index, &clust_offsets, - index, roll_ptr, trx_id, heap, v_heap, mtr, vcol_info); - - if (vcol_info && vcol_info->is_first_fetch()) { - goto unsafe_to_purge; - } + index, roll_ptr, trx_id, heap, v_heap, mtr); } version = rec; |