diff options
Diffstat (limited to 'storage/innobase/btr/btr0btr.cc')
-rw-r--r-- | storage/innobase/btr/btr0btr.cc | 591 |
1 files changed, 454 insertions, 137 deletions
diff --git a/storage/innobase/btr/btr0btr.cc b/storage/innobase/btr/btr0btr.cc index 8b7a19777ab..e3e127c3ace 100644 --- a/storage/innobase/btr/btr0btr.cc +++ b/storage/innobase/btr/btr0btr.cc @@ -1,6 +1,7 @@ /***************************************************************************** Copyright (c) 1994, 2012, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 2012, Facebook Inc. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -697,14 +698,16 @@ btr_root_fseg_validate( #endif /* UNIV_BTR_DEBUG */ /**************************************************************//** -Gets the root node of a tree and x-latches it. -@return root page, x-latched */ +Gets the root node of a tree and x- or s-latches it. +@return root page, x- or s-latched */ static buf_block_t* btr_root_block_get( /*===============*/ - dict_index_t* index, /*!< in: index tree */ - mtr_t* mtr) /*!< in: mtr */ + const dict_index_t* index, /*!< in: index tree */ + ulint mode, /*!< in: either RW_S_LATCH + or RW_X_LATCH */ + mtr_t* mtr) /*!< in: mtr */ { ulint space; ulint zip_size; @@ -715,8 +718,7 @@ btr_root_block_get( zip_size = dict_table_zip_size(index->table); root_page_no = dict_index_get_page(index); - block = btr_block_get(space, zip_size, root_page_no, RW_X_LATCH, - index, mtr); + block = btr_block_get(space, zip_size, root_page_no, mode, index, mtr); btr_assert_not_corrupted(block, index); #ifdef UNIV_BTR_DEBUG if (!dict_index_is_ibuf(index)) { @@ -739,10 +741,162 @@ UNIV_INTERN page_t* btr_root_get( /*=========*/ + const dict_index_t* index, /*!< in: index tree */ + mtr_t* mtr) /*!< in: mtr */ +{ + return(buf_block_get_frame(btr_root_block_get(index, RW_X_LATCH, + mtr))); +} + +/**************************************************************//** +Gets the height of the B-tree (the level of the root, when the leaf +level is assumed to be 0). The caller must hold an S or X latch on +the index. +@return tree height (level of the root) */ +UNIV_INTERN +ulint +btr_height_get( +/*===========*/ dict_index_t* index, /*!< in: index tree */ - mtr_t* mtr) /*!< in: mtr */ + mtr_t* mtr) /*!< in/out: mini-transaction */ +{ + ulint height; + buf_block_t* root_block; + + ut_ad(mtr_memo_contains(mtr, dict_index_get_lock(index), + MTR_MEMO_S_LOCK) + || mtr_memo_contains(mtr, dict_index_get_lock(index), + MTR_MEMO_X_LOCK)); + + /* S latches the page */ + root_block = btr_root_block_get(index, RW_S_LATCH, mtr); + + height = btr_page_get_level(buf_block_get_frame(root_block), mtr); + + /* Release the S latch on the root page. */ + mtr_memo_release(mtr, root_block, MTR_MEMO_PAGE_S_FIX); +#ifdef UNIV_SYNC_DEBUG + sync_thread_reset_level(&root_block->lock); +#endif /* UNIV_SYNC_DEBUG */ + + return(height); +} + +/**************************************************************//** +Checks a file segment header within a B-tree root page and updates +the segment header space id. +@return TRUE if valid */ +static +bool +btr_root_fseg_adjust_on_import( +/*===========================*/ + fseg_header_t* seg_header, /*!< in/out: segment header */ + page_zip_des_t* page_zip, /*!< in/out: compressed page, + or NULL */ + ulint space, /*!< in: tablespace identifier */ + mtr_t* mtr) /*!< in/out: mini-transaction */ { - return(buf_block_get_frame(btr_root_block_get(index, mtr))); + ulint offset = mach_read_from_2(seg_header + FSEG_HDR_OFFSET); + + if (offset < FIL_PAGE_DATA + || offset > UNIV_PAGE_SIZE - FIL_PAGE_DATA_END) { + + return(FALSE); + + } else if (page_zip) { + mach_write_to_4(seg_header + FSEG_HDR_SPACE, space); + page_zip_write_header(page_zip, seg_header + FSEG_HDR_SPACE, + 4, mtr); + } else { + mlog_write_ulint(seg_header + FSEG_HDR_SPACE, + space, MLOG_4BYTES, mtr); + } + + return(TRUE); +} + +/**************************************************************//** +Checks and adjusts the root node of a tree during IMPORT TABLESPACE. +@return error code, or DB_SUCCESS */ +UNIV_INTERN +dberr_t +btr_root_adjust_on_import( +/*======================*/ + const dict_index_t* index) /*!< in: index tree */ +{ + dberr_t err; + mtr_t mtr; + page_t* page; + buf_block_t* block; + page_zip_des_t* page_zip; + dict_table_t* table = index->table; + ulint space_id = dict_index_get_space(index); + ulint zip_size = dict_table_zip_size(table); + ulint root_page_no = dict_index_get_page(index); + + mtr_start(&mtr); + + mtr_set_log_mode(&mtr, MTR_LOG_NO_REDO); + + DBUG_EXECUTE_IF("ib_import_trigger_corruption_3", + return(DB_CORRUPTION);); + + block = btr_block_get( + space_id, zip_size, root_page_no, RW_X_LATCH, index, &mtr); + + page = buf_block_get_frame(block); + page_zip = buf_block_get_page_zip(block); + + /* Check that this is a B-tree page and both the PREV and NEXT + pointers are FIL_NULL, because the root page does not have any + siblings. */ + if (fil_page_get_type(page) != FIL_PAGE_INDEX + || fil_page_get_prev(page) != FIL_NULL + || fil_page_get_next(page) != FIL_NULL) { + + err = DB_CORRUPTION; + + } else if (dict_index_is_clust(index)) { + bool page_is_compact_format; + + page_is_compact_format = page_is_comp(page) > 0; + + /* Check if the page format and table format agree. */ + if (page_is_compact_format != dict_table_is_comp(table)) { + err = DB_CORRUPTION; + } else { + + /* Check that the table flags and the tablespace + flags match. */ + ulint flags = fil_space_get_flags(table->space); + + if (flags + && flags != dict_tf_to_fsp_flags(table->flags)) { + + err = DB_CORRUPTION; + } else { + err = DB_SUCCESS; + } + } + } else { + err = DB_SUCCESS; + } + + /* Check and adjust the file segment headers, if all OK so far. */ + if (err == DB_SUCCESS + && (!btr_root_fseg_adjust_on_import( + FIL_PAGE_DATA + PAGE_BTR_SEG_LEAF + + page, page_zip, space_id, &mtr) + || !btr_root_fseg_adjust_on_import( + FIL_PAGE_DATA + PAGE_BTR_SEG_TOP + + page, page_zip, space_id, &mtr))) { + + err = DB_CORRUPTION; + } + + mtr_commit(&mtr); + + return(err); } /*************************************************************//** @@ -1033,8 +1187,7 @@ btr_get_size( ut_ad(mtr_memo_contains(mtr, dict_index_get_lock(index), MTR_MEMO_S_LOCK)); - if (index->page == FIL_NULL - || index->to_be_dropped + if (index->page == FIL_NULL || dict_index_is_online_ddl(index) || *index->name == TEMP_INDEX_PREFIX) { return(ULINT_UNDEFINED); } @@ -1584,6 +1737,8 @@ btr_page_reorganize_low( there cannot exist locks on the page, and a hash index should not be dropped: it cannot exist */ + ulint compression_level,/*!< in: compression level to be used + if dealing with compressed page */ buf_block_t* block, /*!< in: page to be reorganized */ dict_index_t* index, /*!< in: record descriptor */ mtr_t* mtr) /*!< in: mtr */ @@ -1601,6 +1756,8 @@ btr_page_reorganize_low( ulint max_ins_size1; ulint max_ins_size2; ibool success = FALSE; + byte type; + byte* log_ptr; ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX)); btr_assert_not_corrupted(block, index); @@ -1612,9 +1769,23 @@ btr_page_reorganize_low( #ifndef UNIV_HOTBACKUP /* Write the log record */ - mlog_open_and_write_index(mtr, page, index, page_is_comp(page) - ? MLOG_COMP_PAGE_REORGANIZE - : MLOG_PAGE_REORGANIZE, 0); + if (page_zip) { + type = MLOG_ZIP_PAGE_REORGANIZE; + } else if (page_is_comp(page)) { + type = MLOG_COMP_PAGE_REORGANIZE; + } else { + type = MLOG_PAGE_REORGANIZE; + } + + log_ptr = mlog_open_and_write_index( + mtr, page, index, type, page_zip ? 1 : 0); + + /* For compressed pages write the compression level. */ + if (log_ptr && page_zip) { + mach_write_to_1(log_ptr, compression_level); + mlog_close(mtr, log_ptr + 1); + } + #endif /* !UNIV_HOTBACKUP */ /* Turn logging off */ @@ -1662,7 +1833,9 @@ btr_page_reorganize_low( ut_ad(max_trx_id != 0 || recovery); } - if (page_zip && !page_zip_compress(page_zip, page, index, NULL)) { + if (page_zip + && !page_zip_compress(page_zip, page, index, + compression_level, NULL)) { /* Restore the old page and exit. */ btr_blob_dbg_restore(page, temp_page, index, @@ -1750,7 +1923,8 @@ btr_page_reorganize( dict_index_t* index, /*!< in: record descriptor */ mtr_t* mtr) /*!< in: mtr */ { - return(btr_page_reorganize_low(FALSE, block, index, mtr)); + return(btr_page_reorganize_low(FALSE, page_compression_level, + block, index, mtr)); } #endif /* !UNIV_HOTBACKUP */ @@ -1762,18 +1936,32 @@ byte* btr_parse_page_reorganize( /*======================*/ byte* ptr, /*!< in: buffer */ - byte* end_ptr __attribute__((unused)), - /*!< in: buffer end */ + byte* end_ptr,/*!< in: buffer end */ dict_index_t* index, /*!< in: record descriptor */ + bool compressed,/*!< in: true if compressed page */ buf_block_t* block, /*!< in: page to be reorganized, or NULL */ mtr_t* mtr) /*!< in: mtr or NULL */ { + ulint level = page_compression_level; + ut_ad(ptr && end_ptr); - /* The record is empty, except for the record initial part */ + /* If dealing with a compressed page the record has the + compression level used during original compression written in + one byte. Otherwise record is empty. */ + if (compressed) { + if (ptr == end_ptr) { + return(NULL); + } + + level = (ulint)mach_read_from_1(ptr); + + ut_a(level <= 9); + ++ptr; + } if (block != NULL) { - btr_page_reorganize_low(TRUE, block, index, mtr); + btr_page_reorganize_low(TRUE, level, block, index, mtr); } return(ptr); @@ -1827,10 +2015,13 @@ UNIV_INTERN rec_t* btr_root_raise_and_insert( /*======================*/ + ulint flags, /*!< in: undo logging and locking flags */ btr_cur_t* cursor, /*!< in: cursor at which to insert: must be on the root page; when the function returns, the cursor is positioned on the predecessor of the inserted record */ + ulint** offsets,/*!< out: offsets on inserted record */ + mem_heap_t** heap, /*!< in/out: pointer to memory heap, or NULL */ const dtuple_t* tuple, /*!< in: tuple to insert */ ulint n_ext, /*!< in: number of externally stored columns */ mtr_t* mtr) /*!< in: mtr */ @@ -1840,7 +2031,6 @@ btr_root_raise_and_insert( page_t* new_page; ulint new_page_no; rec_t* rec; - mem_heap_t* heap; dtuple_t* node_ptr; ulint level; rec_t* node_ptr_rec; @@ -1926,7 +2116,9 @@ btr_root_raise_and_insert( lock_update_root_raise(new_block, root_block); /* Create a memory heap where the node pointer is stored */ - heap = mem_heap_create(100); + if (!*heap) { + *heap = mem_heap_create(1000); + } rec = page_rec_get_next(page_get_infimum_rec(new_page)); new_page_no = buf_block_get_page_no(new_block); @@ -1934,8 +2126,8 @@ btr_root_raise_and_insert( /* Build the node pointer (= node key and page address) for the child */ - node_ptr = dict_index_build_node_ptr(index, rec, new_page_no, heap, - level); + node_ptr = dict_index_build_node_ptr( + index, rec, new_page_no, *heap, level); /* The node pointer must be marked as the predefined minimum record, as there is no lower alphabetical limit to records in the leftmost node of a level: */ @@ -1961,15 +2153,12 @@ btr_root_raise_and_insert( page_cur_set_before_first(root_block, page_cursor); node_ptr_rec = page_cur_tuple_insert(page_cursor, node_ptr, - index, 0, mtr); + index, offsets, heap, 0, mtr); /* The root page should only contain the node pointer to new_page at this point. Thus, the data should fit. */ ut_a(node_ptr_rec); - /* Free the memory heap */ - mem_heap_free(heap); - /* We play safe and reset the free bits for the new page */ #if 0 @@ -1985,7 +2174,8 @@ btr_root_raise_and_insert( PAGE_CUR_LE, page_cursor); /* Split the child and insert tuple */ - return(btr_page_split_and_insert(cursor, tuple, n_ext, mtr)); + return(btr_page_split_and_insert(flags, cursor, offsets, heap, + tuple, n_ext, mtr)); } /*************************************************************//** @@ -2213,9 +2403,9 @@ func_exit: /*************************************************************//** Returns TRUE if the insert fits on the appropriate half-page with the chosen split_rec. -@return TRUE if fits */ -static -ibool +@return true if fits */ +static __attribute__((nonnull(1,3,4,6), warn_unused_result)) +bool btr_page_insert_fits( /*=================*/ btr_cur_t* cursor, /*!< in: cursor at which insert @@ -2223,11 +2413,11 @@ btr_page_insert_fits( const rec_t* split_rec,/*!< in: suggestion for first record on upper half-page, or NULL if tuple to be inserted should be first */ - const ulint* offsets,/*!< in: rec_get_offsets( - split_rec, cursor->index) */ + ulint** offsets,/*!< in: rec_get_offsets( + split_rec, cursor->index); out: garbage */ const dtuple_t* tuple, /*!< in: tuple to insert */ ulint n_ext, /*!< in: number of externally stored columns */ - mem_heap_t* heap) /*!< in: temporary memory heap */ + mem_heap_t** heap) /*!< in: temporary memory heap */ { page_t* page; ulint insert_size; @@ -2236,15 +2426,13 @@ btr_page_insert_fits( ulint total_n_recs; const rec_t* rec; const rec_t* end_rec; - ulint* offs; page = btr_cur_get_page(cursor); - ut_ad(!split_rec == !offsets); - ut_ad(!offsets - || !page_is_comp(page) == !rec_offs_comp(offsets)); - ut_ad(!offsets - || rec_offs_validate(split_rec, cursor->index, offsets)); + ut_ad(!split_rec + || !page_is_comp(page) == !rec_offs_comp(*offsets)); + ut_ad(!split_rec + || rec_offs_validate(split_rec, cursor->index, *offsets)); insert_size = rec_get_converted_size(cursor->index, tuple, n_ext); free_space = page_get_free_space_of_empty(page_is_comp(page)); @@ -2262,7 +2450,7 @@ btr_page_insert_fits( rec = page_rec_get_next(page_get_infimum_rec(page)); end_rec = page_rec_get_next(btr_cur_get_rec(cursor)); - } else if (cmp_dtuple_rec(tuple, split_rec, offsets) >= 0) { + } else if (cmp_dtuple_rec(tuple, split_rec, *offsets) >= 0) { rec = page_rec_get_next(page_get_infimum_rec(page)); end_rec = split_rec; @@ -2277,19 +2465,17 @@ btr_page_insert_fits( /* Ok, there will be enough available space on the half page where the tuple is inserted */ - return(TRUE); + return(true); } - offs = NULL; - while (rec != end_rec) { /* In this loop we calculate the amount of reserved space after rec is removed from page. */ - offs = rec_get_offsets(rec, cursor->index, offs, - ULINT_UNDEFINED, &heap); + *offsets = rec_get_offsets(rec, cursor->index, *offsets, + ULINT_UNDEFINED, heap); - total_data -= rec_offs_size(offs); + total_data -= rec_offs_size(*offsets); total_n_recs--; if (total_data + page_dir_calc_reserved_space(total_n_recs) @@ -2298,13 +2484,13 @@ btr_page_insert_fits( /* Ok, there will be enough available space on the half page where the tuple is inserted */ - return(TRUE); + return(true); } rec = page_rec_get_next_const(rec); } - return(FALSE); + return(false); } /*******************************************************//** @@ -2314,6 +2500,7 @@ UNIV_INTERN void btr_insert_on_non_leaf_level_func( /*==============================*/ + ulint flags, /*!< in: undo logging and locking flags */ dict_index_t* index, /*!< in: index */ ulint level, /*!< in: level, must be > 0 */ dtuple_t* tuple, /*!< in: the record to be inserted */ @@ -2323,8 +2510,10 @@ btr_insert_on_non_leaf_level_func( { big_rec_t* dummy_big_rec; btr_cur_t cursor; - ulint err; + dberr_t err; rec_t* rec; + ulint* offsets = NULL; + mem_heap_t* heap = NULL; ut_ad(level > 0); @@ -2335,26 +2524,35 @@ btr_insert_on_non_leaf_level_func( ut_ad(cursor.flag == BTR_CUR_BINARY); err = btr_cur_optimistic_insert( - BTR_NO_LOCKING_FLAG | BTR_KEEP_SYS_FLAG - | BTR_NO_UNDO_LOG_FLAG, &cursor, tuple, &rec, - &dummy_big_rec, 0, NULL, mtr); + flags + | BTR_NO_LOCKING_FLAG + | BTR_KEEP_SYS_FLAG + | BTR_NO_UNDO_LOG_FLAG, + &cursor, &offsets, &heap, + tuple, &rec, &dummy_big_rec, 0, NULL, mtr); if (err == DB_FAIL) { - err = btr_cur_pessimistic_insert( - BTR_NO_LOCKING_FLAG | BTR_KEEP_SYS_FLAG - | BTR_NO_UNDO_LOG_FLAG, - &cursor, tuple, &rec, &dummy_big_rec, 0, NULL, mtr); + err = btr_cur_pessimistic_insert(flags + | BTR_NO_LOCKING_FLAG + | BTR_KEEP_SYS_FLAG + | BTR_NO_UNDO_LOG_FLAG, + &cursor, &offsets, &heap, + tuple, &rec, + &dummy_big_rec, 0, NULL, mtr); ut_a(err == DB_SUCCESS); } + mem_heap_free(heap); } /**************************************************************//** Attaches the halves of an index page on the appropriate level in an index tree. */ -static +static __attribute__((nonnull)) void btr_attach_half_pages( /*==================*/ + ulint flags, /*!< in: undo logging and + locking flags */ dict_index_t* index, /*!< in: the index tree */ buf_block_t* block, /*!< in/out: page to be split */ const rec_t* split_rec, /*!< in: first record on upper @@ -2432,7 +2630,8 @@ btr_attach_half_pages( /* Insert it next to the pointer to the lower half. Note that this may generate recursion leading to a split on the higher level. */ - btr_insert_on_non_leaf_level(index, level + 1, node_ptr_upper, mtr); + btr_insert_on_non_leaf_level(flags, index, level + 1, + node_ptr_upper, mtr); /* Free the memory heap */ mem_heap_free(heap); @@ -2484,13 +2683,13 @@ btr_attach_half_pages( /*************************************************************//** Determine if a tuple is smaller than any record on the page. @return TRUE if smaller */ -static -ibool +static __attribute__((nonnull, warn_unused_result)) +bool btr_page_tuple_smaller( /*===================*/ btr_cur_t* cursor, /*!< in: b-tree cursor */ const dtuple_t* tuple, /*!< in: tuple to consider */ - ulint* offsets,/*!< in/out: temporary storage */ + ulint** offsets,/*!< in/out: temporary storage */ ulint n_uniq, /*!< in: number of unique fields in the index page records */ mem_heap_t** heap) /*!< in/out: heap for offsets */ @@ -2505,11 +2704,11 @@ btr_page_tuple_smaller( page_cur_move_to_next(&pcur); first_rec = page_cur_get_rec(&pcur); - offsets = rec_get_offsets( - first_rec, cursor->index, offsets, + *offsets = rec_get_offsets( + first_rec, cursor->index, *offsets, n_uniq, heap); - return(cmp_dtuple_rec(tuple, first_rec, offsets) < 0); + return(cmp_dtuple_rec(tuple, first_rec, *offsets) < 0); } /*************************************************************//** @@ -2525,9 +2724,12 @@ UNIV_INTERN rec_t* btr_page_split_and_insert( /*======================*/ + ulint flags, /*!< in: undo logging and locking flags */ btr_cur_t* cursor, /*!< in: cursor at which to insert; when the function returns, the cursor is positioned on the predecessor of the inserted record */ + ulint** offsets,/*!< out: offsets on inserted record */ + mem_heap_t** heap, /*!< in/out: pointer to memory heap, or NULL */ const dtuple_t* tuple, /*!< in: tuple to insert */ ulint n_ext, /*!< in: number of externally stored columns */ mtr_t* mtr) /*!< in: mtr */ @@ -2553,18 +2755,21 @@ btr_page_split_and_insert( ibool insert_left; ulint n_iterations = 0; rec_t* rec; - mem_heap_t* heap; ulint n_uniq; - ulint* offsets; - heap = mem_heap_create(1024); + if (!*heap) { + *heap = mem_heap_create(1024); + } n_uniq = dict_index_get_n_unique_in_tree(cursor->index); func_start: - mem_heap_empty(heap); - offsets = NULL; + mem_heap_empty(*heap); + *offsets = NULL; ut_ad(mtr_memo_contains(mtr, dict_index_get_lock(cursor->index), MTR_MEMO_X_LOCK)); + ut_ad(!dict_index_is_online_ddl(cursor->index) + || (flags & BTR_CREATE_FLAG) + || dict_index_is_clust(cursor->index)); #ifdef UNIV_SYNC_DEBUG ut_ad(rw_lock_own(dict_index_get_lock(cursor->index), RW_LOCK_EX)); #endif /* UNIV_SYNC_DEBUG */ @@ -2590,7 +2795,7 @@ func_start: if (split_rec == NULL) { insert_left = btr_page_tuple_smaller( - cursor, tuple, offsets, n_uniq, &heap); + cursor, tuple, offsets, n_uniq, heap); } } else if (btr_page_get_split_rec_to_right(cursor, &split_rec)) { direction = FSP_UP; @@ -2612,7 +2817,7 @@ func_start: if (page_get_n_recs(page) > 1) { split_rec = page_get_middle_rec(page); } else if (btr_page_tuple_smaller(cursor, tuple, - offsets, n_uniq, &heap)) { + offsets, n_uniq, heap)) { split_rec = page_rec_get_next( page_get_infimum_rec(page)); } else { @@ -2635,10 +2840,10 @@ func_start: if (split_rec) { first_rec = move_limit = split_rec; - offsets = rec_get_offsets(split_rec, cursor->index, offsets, - n_uniq, &heap); + *offsets = rec_get_offsets(split_rec, cursor->index, *offsets, + n_uniq, heap); - insert_left = cmp_dtuple_rec(tuple, split_rec, offsets) < 0; + insert_left = cmp_dtuple_rec(tuple, split_rec, *offsets) < 0; if (!insert_left && new_page_zip && n_iterations > 0) { /* If a compressed page has already been split, @@ -2665,7 +2870,7 @@ insert_empty: /* 4. Do first the modifications in the tree structure */ - btr_attach_half_pages(cursor->index, block, + btr_attach_half_pages(flags, cursor->index, block, first_rec, new_block, direction, mtr); /* If the split is made on the leaf level and the insert will fit @@ -2685,10 +2890,11 @@ insert_empty: insert_will_fit = !new_page_zip && btr_page_insert_fits(cursor, NULL, - NULL, tuple, n_ext, heap); + offsets, tuple, n_ext, heap); } - if (insert_will_fit && page_is_leaf(page)) { + if (insert_will_fit && page_is_leaf(page) + && !dict_index_is_online_ddl(cursor->index)) { mtr_memo_release(mtr, dict_index_get_lock(cursor->index), MTR_MEMO_X_LOCK); @@ -2805,8 +3011,8 @@ insert_empty: page_cur_search(insert_block, cursor->index, tuple, PAGE_CUR_LE, page_cursor); - rec = page_cur_tuple_insert(page_cursor, tuple, - cursor->index, n_ext, mtr); + rec = page_cur_tuple_insert(page_cursor, tuple, cursor->index, + offsets, heap, n_ext, mtr); #ifdef UNIV_ZIP_DEBUG { @@ -2837,7 +3043,7 @@ insert_empty: page_cur_search(insert_block, cursor->index, tuple, PAGE_CUR_LE, page_cursor); rec = page_cur_tuple_insert(page_cursor, tuple, cursor->index, - n_ext, mtr); + offsets, heap, n_ext, mtr); if (rec == NULL) { /* The insert did not fit on the page: loop back to the @@ -2878,7 +3084,7 @@ func_exit: ut_ad(page_validate(buf_block_get_frame(left_block), cursor->index)); ut_ad(page_validate(buf_block_get_frame(right_block), cursor->index)); - mem_heap_free(heap); + ut_ad(!rec || rec_offs_validate(rec, cursor->index, *offsets)); return(rec); } @@ -3058,15 +3264,15 @@ btr_node_ptr_delete( { btr_cur_t cursor; ibool compressed; - ulint err; + dberr_t err; ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX)); /* Delete node pointer on father page */ btr_page_get_father(index, block, mtr, &cursor); - compressed = btr_cur_pessimistic_delete(&err, TRUE, &cursor, RB_NONE, - mtr); + compressed = btr_cur_pessimistic_delete(&err, TRUE, &cursor, + BTR_CREATE_FLAG, RB_NONE, mtr); ut_a(err == DB_SUCCESS); if (!compressed) { @@ -3098,6 +3304,8 @@ btr_lift_page_up( buf_block_t* blocks[BTR_MAX_LEVELS]; ulint n_blocks; /*!< last used index in blocks[] */ ulint i; + bool lift_father_up; + buf_block_t* block_orig = block; ut_ad(btr_page_get_prev(page, mtr) == FIL_NULL); ut_ad(btr_page_get_next(page, mtr) == FIL_NULL); @@ -3108,11 +3316,13 @@ btr_lift_page_up( { btr_cur_t cursor; - mem_heap_t* heap = mem_heap_create(100); - ulint* offsets; + ulint* offsets = NULL; + mem_heap_t* heap = mem_heap_create( + sizeof(*offsets) + * (REC_OFFS_HEADER_SIZE + 1 + 1 + index->n_fields)); buf_block_t* b; - offsets = btr_page_get_father_block(NULL, heap, index, + offsets = btr_page_get_father_block(offsets, heap, index, block, mtr, &cursor); father_block = btr_cur_get_block(&cursor); father_page_zip = buf_block_get_page_zip(father_block); @@ -3136,6 +3346,29 @@ btr_lift_page_up( blocks[n_blocks++] = b = btr_cur_get_block(&cursor); } + lift_father_up = (n_blocks && page_level == 0); + if (lift_father_up) { + /* The father page also should be the only on its level (not + root). We should lift up the father page at first. + Because the leaf page should be lifted up only for root page. + The freeing page is based on page_level (==0 or !=0) + to choose segment. If the page_level is changed ==0 from !=0, + later freeing of the page doesn't find the page allocation + to be freed.*/ + + block = father_block; + page = buf_block_get_frame(block); + page_level = btr_page_get_level(page, mtr); + + ut_ad(btr_page_get_prev(page, mtr) == FIL_NULL); + ut_ad(btr_page_get_next(page, mtr) == FIL_NULL); + ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX)); + + father_block = blocks[0]; + father_page_zip = buf_block_get_page_zip(father_block); + father_page = buf_block_get_frame(father_block); + } + mem_heap_free(heap); } @@ -3143,6 +3376,7 @@ btr_lift_page_up( /* Make the father empty */ btr_page_empty(father_block, father_page_zip, index, page_level, mtr); + page_level++; /* Copy the records to the father page one by one. */ if (0 @@ -3174,7 +3408,7 @@ btr_lift_page_up( lock_update_copy_and_discard(father_block, block); /* Go upward to root page, decrementing levels by one. */ - for (i = 0; i < n_blocks; i++, page_level++) { + for (i = lift_father_up ? 1 : 0; i < n_blocks; i++, page_level++) { page_t* page = buf_block_get_frame(blocks[i]); page_zip_des_t* page_zip= buf_block_get_page_zip(blocks[i]); @@ -3196,7 +3430,7 @@ btr_lift_page_up( ut_ad(page_validate(father_page, index)); ut_ad(btr_check_node_ptr(index, father_block, mtr)); - return(father_block); + return(lift_father_up ? block_orig : father_block); } /*************************************************************//** @@ -3267,6 +3501,7 @@ btr_compress( if (adjust) { nth_rec = page_rec_get_n_recs_before(btr_cur_get_rec(cursor)); + ut_ad(nth_rec > 0); } /* Decide the page to which we try to merge and which will inherit @@ -3323,6 +3558,16 @@ err_exit: return(FALSE); } + /* If compression padding tells us that merging will result in + too packed up page i.e.: which is likely to cause compression + failure then don't merge the pages. */ + if (zip_size && page_is_leaf(merge_page) + && (page_get_data_size(merge_page) + data_size + >= dict_index_zip_pad_optimal_page_size(index))) { + + goto err_exit; + } + ut_ad(page_validate(merge_page, index)); max_ins_size = page_get_max_insert_size(merge_page, n_recs); @@ -3502,6 +3747,7 @@ func_exit: mem_heap_free(heap); if (adjust) { + ut_ad(nth_rec > 0); btr_cur_position( index, page_rec_get_nth(merge_block->frame, nth_rec), @@ -3818,7 +4064,7 @@ btr_print_index( mtr_start(&mtr); - root = btr_root_block_get(index, &mtr); + root = btr_root_block_get(index, RW_X_LATCH, &mtr); btr_print_recursive(index, root, width, &heap, &offsets, &mtr); if (heap) { @@ -3827,7 +4073,7 @@ btr_print_index( mtr_commit(&mtr); - btr_validate_index(index, NULL); + btr_validate_index(index, 0); } #endif /* UNIV_BTR_PRINT */ @@ -4013,8 +4259,22 @@ btr_index_page_validate( { page_cur_t cur; ibool ret = TRUE; +#ifndef DBUG_OFF + ulint nth = 1; +#endif /* !DBUG_OFF */ page_cur_set_before_first(block, &cur); + + /* Directory slot 0 should only contain the infimum record. */ + DBUG_EXECUTE_IF("check_table_rec_next", + ut_a(page_rec_get_nth_const( + page_cur_get_page(&cur), 0) + == cur.rec); + ut_a(page_dir_slot_get_n_owned( + page_dir_get_nth_slot( + page_cur_get_page(&cur), 0)) + == 1);); + page_cur_move_to_next(&cur); for (;;) { @@ -4028,6 +4288,16 @@ btr_index_page_validate( return(FALSE); } + /* Verify that page_rec_get_nth_const() is correctly + retrieving each record. */ + DBUG_EXECUTE_IF("check_table_rec_next", + ut_a(cur.rec == page_rec_get_nth_const( + page_cur_get_page(&cur), + page_rec_get_n_recs_before( + cur.rec))); + ut_a(nth++ == page_rec_get_n_recs_before( + cur.rec));); + page_cur_move_to_next(&cur); } @@ -4078,14 +4348,15 @@ btr_validate_report2( Validates index tree level. @return TRUE if ok */ static -ibool +bool btr_validate_level( /*===============*/ dict_index_t* index, /*!< in: index tree */ - trx_t* trx, /*!< in: transaction or NULL */ + const trx_t* trx, /*!< in: transaction or NULL */ ulint level) /*!< in: level number */ { ulint space; + ulint space_flags; ulint zip_size; buf_block_t* block; page_t* page; @@ -4099,9 +4370,10 @@ btr_validate_level( ulint left_page_no; page_cur_t cursor; dtuple_t* node_ptr_tuple; - ibool ret = TRUE; + bool ret = true; mtr_t mtr; mem_heap_t* heap = mem_heap_create(256); + fseg_header_t* seg; ulint* offsets = NULL; ulint* offsets2= NULL; #ifdef UNIV_ZIP_DEBUG @@ -4112,15 +4384,39 @@ btr_validate_level( mtr_x_lock(dict_index_get_lock(index), &mtr); - block = btr_root_block_get(index, &mtr); + block = btr_root_block_get(index, RW_X_LATCH, &mtr); page = buf_block_get_frame(block); + seg = page + PAGE_HEADER + PAGE_BTR_SEG_TOP; space = dict_index_get_space(index); zip_size = dict_table_zip_size(index->table); + fil_space_get_latch(space, &space_flags); + + if (zip_size != dict_tf_get_zip_size(space_flags)) { + + ib_logf(IB_LOG_LEVEL_WARN, + "Flags mismatch: table=%lu, tablespace=%lu", + (ulint) index->table->flags, (ulint) space_flags); + + mtr_commit(&mtr); + + return(false); + } + while (level != btr_page_get_level(page, &mtr)) { const rec_t* node_ptr; + if (fseg_page_is_free(seg, + block->page.space, block->page.offset)) { + + btr_validate_report1(index, level, block); + + ib_logf(IB_LOG_LEVEL_WARN, "page is free"); + + ret = false; + } + ut_a(space == buf_block_get_space(block)); ut_a(space == page_get_space_id(page)); #ifdef UNIV_ZIP_DEBUG @@ -4141,12 +4437,13 @@ btr_validate_level( /* Now we are on the desired level. Loop through the pages on that level. */ -loop: - if (trx_is_interrupted(trx)) { - mtr_commit(&mtr); - mem_heap_free(heap); - return(ret); + + if (level == 0) { + /* Leaf pages are managed in their own file segment. */ + seg -= PAGE_BTR_SEG_TOP - PAGE_BTR_SEG_LEAF; } + +loop: mem_heap_empty(heap); offsets = offsets2 = NULL; mtr_x_lock(dict_index_get_lock(index), &mtr); @@ -4156,20 +4453,35 @@ loop: ut_a(!page_zip || page_zip_validate(page_zip, page, index)); #endif /* UNIV_ZIP_DEBUG */ - /* Check ordering etc. of records */ + ut_a(block->page.space == space); + + if (fseg_page_is_free(seg, block->page.space, block->page.offset)) { + + btr_validate_report1(index, level, block); + + ib_logf(IB_LOG_LEVEL_WARN, "Page is marked as free"); + ret = false; + + } else if (btr_page_get_index_id(page) != index->id) { + + ib_logf(IB_LOG_LEVEL_ERROR, + "Page index id " IB_ID_FMT " != data dictionary " + "index id " IB_ID_FMT, + btr_page_get_index_id(page), index->id); + + ret = false; + + } else if (!page_validate(page, index)) { - if (!page_validate(page, index)) { btr_validate_report1(index, level, block); + ret = false; + + } else if (level == 0 && !btr_index_page_validate(block, index)) { - ret = FALSE; - } else if (level == 0) { /* We are on level 0. Check that the records have the right number of fields, and field lengths are right. */ - if (!btr_index_page_validate(block, index)) { - - ret = FALSE; - } + ret = false; } ut_a(btr_page_get_level(page, &mtr) == level); @@ -4195,7 +4507,7 @@ loop: buf_page_print(page, 0, BUF_PAGE_PRINT_NO_CRASH); buf_page_print(right_page, 0, BUF_PAGE_PRINT_NO_CRASH); - ret = FALSE; + ret = false; } if (page_is_comp(right_page) != page_is_comp(page)) { @@ -4204,7 +4516,7 @@ loop: buf_page_print(page, 0, BUF_PAGE_PRINT_NO_CRASH); buf_page_print(right_page, 0, BUF_PAGE_PRINT_NO_CRASH); - ret = FALSE; + ret = false; goto node_ptr_fails; } @@ -4237,7 +4549,7 @@ loop: rec_print(stderr, rec, index); putc('\n', stderr); - ret = FALSE; + ret = false; } } @@ -4288,7 +4600,7 @@ loop: fputs("InnoDB: record on page ", stderr); rec_print_new(stderr, rec, offsets); putc('\n', stderr); - ret = FALSE; + ret = false; goto node_ptr_fails; } @@ -4318,7 +4630,7 @@ loop: fputs("InnoDB: first rec ", stderr); rec_print(stderr, first_rec, index); putc('\n', stderr); - ret = FALSE; + ret = false; goto node_ptr_fails; } @@ -4346,7 +4658,7 @@ loop: if (btr_cur_get_rec(&right_node_cur) != right_node_ptr) { - ret = FALSE; + ret = false; fputs("InnoDB: node pointer to" " the right page is wrong\n", stderr); @@ -4372,7 +4684,7 @@ loop: != page_rec_get_next( page_get_infimum_rec( right_father_page))) { - ret = FALSE; + ret = false; fputs("InnoDB: node pointer 2 to" " the right page is wrong\n", stderr); @@ -4397,7 +4709,7 @@ loop: if (page_get_page_no(right_father_page) != btr_page_get_next(father_page, &mtr)) { - ret = FALSE; + ret = false; fputs("InnoDB: node pointer 3 to" " the right page is wrong\n", stderr); @@ -4428,17 +4740,23 @@ node_ptr_fails: on the next loop. The page has already been checked. */ mtr_commit(&mtr); - if (right_page_no != FIL_NULL) { + if (trx_is_interrupted(trx)) { + /* On interrupt, return the current status. */ + } else if (right_page_no != FIL_NULL) { + mtr_start(&mtr); - block = btr_block_get(space, zip_size, right_page_no, - RW_X_LATCH, index, &mtr); + block = btr_block_get( + space, zip_size, right_page_no, + RW_X_LATCH, index, &mtr); + page = buf_block_get_frame(block); goto loop; } mem_heap_free(heap); + return(ret); } @@ -4446,40 +4764,39 @@ node_ptr_fails: Checks the consistency of an index tree. @return TRUE if ok */ UNIV_INTERN -ibool +bool btr_validate_index( /*===============*/ dict_index_t* index, /*!< in: index */ - trx_t* trx) /*!< in: transaction or NULL */ + const trx_t* trx) /*!< in: transaction or NULL */ { - mtr_t mtr; - page_t* root; - ulint i; - ulint n; - /* Full Text index are implemented by auxiliary tables, not the B-tree */ - if (index->type & DICT_FTS) { - return(TRUE); + if (dict_index_is_online_ddl(index) || (index->type & DICT_FTS)) { + return(true); } + mtr_t mtr; + mtr_start(&mtr); - mtr_x_lock(dict_index_get_lock(index), &mtr); - root = btr_root_get(index, &mtr); - n = btr_page_get_level(root, &mtr); + mtr_x_lock(dict_index_get_lock(index), &mtr); - for (i = 0; i <= n && !trx_is_interrupted(trx); i++) { - if (!btr_validate_level(index, trx, n - i)) { + bool ok = true; + page_t* root = btr_root_get(index, &mtr); + ulint n = btr_page_get_level(root, &mtr); - mtr_commit(&mtr); + for (ulint i = 0; i <= n; ++i) { - return(FALSE); + if (!btr_validate_level(index, trx, n - i)) { + ok = false; + break; } } mtr_commit(&mtr); - return(TRUE); + return(ok); } + #endif /* !UNIV_HOTBACKUP */ |