summaryrefslogtreecommitdiff
path: root/storage/innobase/btr/btr0btr.cc
diff options
context:
space:
mode:
Diffstat (limited to 'storage/innobase/btr/btr0btr.cc')
-rw-r--r--storage/innobase/btr/btr0btr.cc591
1 files changed, 454 insertions, 137 deletions
diff --git a/storage/innobase/btr/btr0btr.cc b/storage/innobase/btr/btr0btr.cc
index 8b7a19777ab..e3e127c3ace 100644
--- a/storage/innobase/btr/btr0btr.cc
+++ b/storage/innobase/btr/btr0btr.cc
@@ -1,6 +1,7 @@
/*****************************************************************************
Copyright (c) 1994, 2012, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2012, Facebook Inc.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -697,14 +698,16 @@ btr_root_fseg_validate(
#endif /* UNIV_BTR_DEBUG */
/**************************************************************//**
-Gets the root node of a tree and x-latches it.
-@return root page, x-latched */
+Gets the root node of a tree and x- or s-latches it.
+@return root page, x- or s-latched */
static
buf_block_t*
btr_root_block_get(
/*===============*/
- dict_index_t* index, /*!< in: index tree */
- mtr_t* mtr) /*!< in: mtr */
+ const dict_index_t* index, /*!< in: index tree */
+ ulint mode, /*!< in: either RW_S_LATCH
+ or RW_X_LATCH */
+ mtr_t* mtr) /*!< in: mtr */
{
ulint space;
ulint zip_size;
@@ -715,8 +718,7 @@ btr_root_block_get(
zip_size = dict_table_zip_size(index->table);
root_page_no = dict_index_get_page(index);
- block = btr_block_get(space, zip_size, root_page_no, RW_X_LATCH,
- index, mtr);
+ block = btr_block_get(space, zip_size, root_page_no, mode, index, mtr);
btr_assert_not_corrupted(block, index);
#ifdef UNIV_BTR_DEBUG
if (!dict_index_is_ibuf(index)) {
@@ -739,10 +741,162 @@ UNIV_INTERN
page_t*
btr_root_get(
/*=========*/
+ const dict_index_t* index, /*!< in: index tree */
+ mtr_t* mtr) /*!< in: mtr */
+{
+ return(buf_block_get_frame(btr_root_block_get(index, RW_X_LATCH,
+ mtr)));
+}
+
+/**************************************************************//**
+Gets the height of the B-tree (the level of the root, when the leaf
+level is assumed to be 0). The caller must hold an S or X latch on
+the index.
+@return tree height (level of the root) */
+UNIV_INTERN
+ulint
+btr_height_get(
+/*===========*/
dict_index_t* index, /*!< in: index tree */
- mtr_t* mtr) /*!< in: mtr */
+ mtr_t* mtr) /*!< in/out: mini-transaction */
+{
+ ulint height;
+ buf_block_t* root_block;
+
+ ut_ad(mtr_memo_contains(mtr, dict_index_get_lock(index),
+ MTR_MEMO_S_LOCK)
+ || mtr_memo_contains(mtr, dict_index_get_lock(index),
+ MTR_MEMO_X_LOCK));
+
+ /* S latches the page */
+ root_block = btr_root_block_get(index, RW_S_LATCH, mtr);
+
+ height = btr_page_get_level(buf_block_get_frame(root_block), mtr);
+
+ /* Release the S latch on the root page. */
+ mtr_memo_release(mtr, root_block, MTR_MEMO_PAGE_S_FIX);
+#ifdef UNIV_SYNC_DEBUG
+ sync_thread_reset_level(&root_block->lock);
+#endif /* UNIV_SYNC_DEBUG */
+
+ return(height);
+}
+
+/**************************************************************//**
+Checks a file segment header within a B-tree root page and updates
+the segment header space id.
+@return TRUE if valid */
+static
+bool
+btr_root_fseg_adjust_on_import(
+/*===========================*/
+ fseg_header_t* seg_header, /*!< in/out: segment header */
+ page_zip_des_t* page_zip, /*!< in/out: compressed page,
+ or NULL */
+ ulint space, /*!< in: tablespace identifier */
+ mtr_t* mtr) /*!< in/out: mini-transaction */
{
- return(buf_block_get_frame(btr_root_block_get(index, mtr)));
+ ulint offset = mach_read_from_2(seg_header + FSEG_HDR_OFFSET);
+
+ if (offset < FIL_PAGE_DATA
+ || offset > UNIV_PAGE_SIZE - FIL_PAGE_DATA_END) {
+
+ return(FALSE);
+
+ } else if (page_zip) {
+ mach_write_to_4(seg_header + FSEG_HDR_SPACE, space);
+ page_zip_write_header(page_zip, seg_header + FSEG_HDR_SPACE,
+ 4, mtr);
+ } else {
+ mlog_write_ulint(seg_header + FSEG_HDR_SPACE,
+ space, MLOG_4BYTES, mtr);
+ }
+
+ return(TRUE);
+}
+
+/**************************************************************//**
+Checks and adjusts the root node of a tree during IMPORT TABLESPACE.
+@return error code, or DB_SUCCESS */
+UNIV_INTERN
+dberr_t
+btr_root_adjust_on_import(
+/*======================*/
+ const dict_index_t* index) /*!< in: index tree */
+{
+ dberr_t err;
+ mtr_t mtr;
+ page_t* page;
+ buf_block_t* block;
+ page_zip_des_t* page_zip;
+ dict_table_t* table = index->table;
+ ulint space_id = dict_index_get_space(index);
+ ulint zip_size = dict_table_zip_size(table);
+ ulint root_page_no = dict_index_get_page(index);
+
+ mtr_start(&mtr);
+
+ mtr_set_log_mode(&mtr, MTR_LOG_NO_REDO);
+
+ DBUG_EXECUTE_IF("ib_import_trigger_corruption_3",
+ return(DB_CORRUPTION););
+
+ block = btr_block_get(
+ space_id, zip_size, root_page_no, RW_X_LATCH, index, &mtr);
+
+ page = buf_block_get_frame(block);
+ page_zip = buf_block_get_page_zip(block);
+
+ /* Check that this is a B-tree page and both the PREV and NEXT
+ pointers are FIL_NULL, because the root page does not have any
+ siblings. */
+ if (fil_page_get_type(page) != FIL_PAGE_INDEX
+ || fil_page_get_prev(page) != FIL_NULL
+ || fil_page_get_next(page) != FIL_NULL) {
+
+ err = DB_CORRUPTION;
+
+ } else if (dict_index_is_clust(index)) {
+ bool page_is_compact_format;
+
+ page_is_compact_format = page_is_comp(page) > 0;
+
+ /* Check if the page format and table format agree. */
+ if (page_is_compact_format != dict_table_is_comp(table)) {
+ err = DB_CORRUPTION;
+ } else {
+
+ /* Check that the table flags and the tablespace
+ flags match. */
+ ulint flags = fil_space_get_flags(table->space);
+
+ if (flags
+ && flags != dict_tf_to_fsp_flags(table->flags)) {
+
+ err = DB_CORRUPTION;
+ } else {
+ err = DB_SUCCESS;
+ }
+ }
+ } else {
+ err = DB_SUCCESS;
+ }
+
+ /* Check and adjust the file segment headers, if all OK so far. */
+ if (err == DB_SUCCESS
+ && (!btr_root_fseg_adjust_on_import(
+ FIL_PAGE_DATA + PAGE_BTR_SEG_LEAF
+ + page, page_zip, space_id, &mtr)
+ || !btr_root_fseg_adjust_on_import(
+ FIL_PAGE_DATA + PAGE_BTR_SEG_TOP
+ + page, page_zip, space_id, &mtr))) {
+
+ err = DB_CORRUPTION;
+ }
+
+ mtr_commit(&mtr);
+
+ return(err);
}
/*************************************************************//**
@@ -1033,8 +1187,7 @@ btr_get_size(
ut_ad(mtr_memo_contains(mtr, dict_index_get_lock(index),
MTR_MEMO_S_LOCK));
- if (index->page == FIL_NULL
- || index->to_be_dropped
+ if (index->page == FIL_NULL || dict_index_is_online_ddl(index)
|| *index->name == TEMP_INDEX_PREFIX) {
return(ULINT_UNDEFINED);
}
@@ -1584,6 +1737,8 @@ btr_page_reorganize_low(
there cannot exist locks on the
page, and a hash index should not be
dropped: it cannot exist */
+ ulint compression_level,/*!< in: compression level to be used
+ if dealing with compressed page */
buf_block_t* block, /*!< in: page to be reorganized */
dict_index_t* index, /*!< in: record descriptor */
mtr_t* mtr) /*!< in: mtr */
@@ -1601,6 +1756,8 @@ btr_page_reorganize_low(
ulint max_ins_size1;
ulint max_ins_size2;
ibool success = FALSE;
+ byte type;
+ byte* log_ptr;
ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
btr_assert_not_corrupted(block, index);
@@ -1612,9 +1769,23 @@ btr_page_reorganize_low(
#ifndef UNIV_HOTBACKUP
/* Write the log record */
- mlog_open_and_write_index(mtr, page, index, page_is_comp(page)
- ? MLOG_COMP_PAGE_REORGANIZE
- : MLOG_PAGE_REORGANIZE, 0);
+ if (page_zip) {
+ type = MLOG_ZIP_PAGE_REORGANIZE;
+ } else if (page_is_comp(page)) {
+ type = MLOG_COMP_PAGE_REORGANIZE;
+ } else {
+ type = MLOG_PAGE_REORGANIZE;
+ }
+
+ log_ptr = mlog_open_and_write_index(
+ mtr, page, index, type, page_zip ? 1 : 0);
+
+ /* For compressed pages write the compression level. */
+ if (log_ptr && page_zip) {
+ mach_write_to_1(log_ptr, compression_level);
+ mlog_close(mtr, log_ptr + 1);
+ }
+
#endif /* !UNIV_HOTBACKUP */
/* Turn logging off */
@@ -1662,7 +1833,9 @@ btr_page_reorganize_low(
ut_ad(max_trx_id != 0 || recovery);
}
- if (page_zip && !page_zip_compress(page_zip, page, index, NULL)) {
+ if (page_zip
+ && !page_zip_compress(page_zip, page, index,
+ compression_level, NULL)) {
/* Restore the old page and exit. */
btr_blob_dbg_restore(page, temp_page, index,
@@ -1750,7 +1923,8 @@ btr_page_reorganize(
dict_index_t* index, /*!< in: record descriptor */
mtr_t* mtr) /*!< in: mtr */
{
- return(btr_page_reorganize_low(FALSE, block, index, mtr));
+ return(btr_page_reorganize_low(FALSE, page_compression_level,
+ block, index, mtr));
}
#endif /* !UNIV_HOTBACKUP */
@@ -1762,18 +1936,32 @@ byte*
btr_parse_page_reorganize(
/*======================*/
byte* ptr, /*!< in: buffer */
- byte* end_ptr __attribute__((unused)),
- /*!< in: buffer end */
+ byte* end_ptr,/*!< in: buffer end */
dict_index_t* index, /*!< in: record descriptor */
+ bool compressed,/*!< in: true if compressed page */
buf_block_t* block, /*!< in: page to be reorganized, or NULL */
mtr_t* mtr) /*!< in: mtr or NULL */
{
+ ulint level = page_compression_level;
+
ut_ad(ptr && end_ptr);
- /* The record is empty, except for the record initial part */
+ /* If dealing with a compressed page the record has the
+ compression level used during original compression written in
+ one byte. Otherwise record is empty. */
+ if (compressed) {
+ if (ptr == end_ptr) {
+ return(NULL);
+ }
+
+ level = (ulint)mach_read_from_1(ptr);
+
+ ut_a(level <= 9);
+ ++ptr;
+ }
if (block != NULL) {
- btr_page_reorganize_low(TRUE, block, index, mtr);
+ btr_page_reorganize_low(TRUE, level, block, index, mtr);
}
return(ptr);
@@ -1827,10 +2015,13 @@ UNIV_INTERN
rec_t*
btr_root_raise_and_insert(
/*======================*/
+ ulint flags, /*!< in: undo logging and locking flags */
btr_cur_t* cursor, /*!< in: cursor at which to insert: must be
on the root page; when the function returns,
the cursor is positioned on the predecessor
of the inserted record */
+ ulint** offsets,/*!< out: offsets on inserted record */
+ mem_heap_t** heap, /*!< in/out: pointer to memory heap, or NULL */
const dtuple_t* tuple, /*!< in: tuple to insert */
ulint n_ext, /*!< in: number of externally stored columns */
mtr_t* mtr) /*!< in: mtr */
@@ -1840,7 +2031,6 @@ btr_root_raise_and_insert(
page_t* new_page;
ulint new_page_no;
rec_t* rec;
- mem_heap_t* heap;
dtuple_t* node_ptr;
ulint level;
rec_t* node_ptr_rec;
@@ -1926,7 +2116,9 @@ btr_root_raise_and_insert(
lock_update_root_raise(new_block, root_block);
/* Create a memory heap where the node pointer is stored */
- heap = mem_heap_create(100);
+ if (!*heap) {
+ *heap = mem_heap_create(1000);
+ }
rec = page_rec_get_next(page_get_infimum_rec(new_page));
new_page_no = buf_block_get_page_no(new_block);
@@ -1934,8 +2126,8 @@ btr_root_raise_and_insert(
/* Build the node pointer (= node key and page address) for the
child */
- node_ptr = dict_index_build_node_ptr(index, rec, new_page_no, heap,
- level);
+ node_ptr = dict_index_build_node_ptr(
+ index, rec, new_page_no, *heap, level);
/* The node pointer must be marked as the predefined minimum record,
as there is no lower alphabetical limit to records in the leftmost
node of a level: */
@@ -1961,15 +2153,12 @@ btr_root_raise_and_insert(
page_cur_set_before_first(root_block, page_cursor);
node_ptr_rec = page_cur_tuple_insert(page_cursor, node_ptr,
- index, 0, mtr);
+ index, offsets, heap, 0, mtr);
/* The root page should only contain the node pointer
to new_page at this point. Thus, the data should fit. */
ut_a(node_ptr_rec);
- /* Free the memory heap */
- mem_heap_free(heap);
-
/* We play safe and reset the free bits for the new page */
#if 0
@@ -1985,7 +2174,8 @@ btr_root_raise_and_insert(
PAGE_CUR_LE, page_cursor);
/* Split the child and insert tuple */
- return(btr_page_split_and_insert(cursor, tuple, n_ext, mtr));
+ return(btr_page_split_and_insert(flags, cursor, offsets, heap,
+ tuple, n_ext, mtr));
}
/*************************************************************//**
@@ -2213,9 +2403,9 @@ func_exit:
/*************************************************************//**
Returns TRUE if the insert fits on the appropriate half-page with the
chosen split_rec.
-@return TRUE if fits */
-static
-ibool
+@return true if fits */
+static __attribute__((nonnull(1,3,4,6), warn_unused_result))
+bool
btr_page_insert_fits(
/*=================*/
btr_cur_t* cursor, /*!< in: cursor at which insert
@@ -2223,11 +2413,11 @@ btr_page_insert_fits(
const rec_t* split_rec,/*!< in: suggestion for first record
on upper half-page, or NULL if
tuple to be inserted should be first */
- const ulint* offsets,/*!< in: rec_get_offsets(
- split_rec, cursor->index) */
+ ulint** offsets,/*!< in: rec_get_offsets(
+ split_rec, cursor->index); out: garbage */
const dtuple_t* tuple, /*!< in: tuple to insert */
ulint n_ext, /*!< in: number of externally stored columns */
- mem_heap_t* heap) /*!< in: temporary memory heap */
+ mem_heap_t** heap) /*!< in: temporary memory heap */
{
page_t* page;
ulint insert_size;
@@ -2236,15 +2426,13 @@ btr_page_insert_fits(
ulint total_n_recs;
const rec_t* rec;
const rec_t* end_rec;
- ulint* offs;
page = btr_cur_get_page(cursor);
- ut_ad(!split_rec == !offsets);
- ut_ad(!offsets
- || !page_is_comp(page) == !rec_offs_comp(offsets));
- ut_ad(!offsets
- || rec_offs_validate(split_rec, cursor->index, offsets));
+ ut_ad(!split_rec
+ || !page_is_comp(page) == !rec_offs_comp(*offsets));
+ ut_ad(!split_rec
+ || rec_offs_validate(split_rec, cursor->index, *offsets));
insert_size = rec_get_converted_size(cursor->index, tuple, n_ext);
free_space = page_get_free_space_of_empty(page_is_comp(page));
@@ -2262,7 +2450,7 @@ btr_page_insert_fits(
rec = page_rec_get_next(page_get_infimum_rec(page));
end_rec = page_rec_get_next(btr_cur_get_rec(cursor));
- } else if (cmp_dtuple_rec(tuple, split_rec, offsets) >= 0) {
+ } else if (cmp_dtuple_rec(tuple, split_rec, *offsets) >= 0) {
rec = page_rec_get_next(page_get_infimum_rec(page));
end_rec = split_rec;
@@ -2277,19 +2465,17 @@ btr_page_insert_fits(
/* Ok, there will be enough available space on the
half page where the tuple is inserted */
- return(TRUE);
+ return(true);
}
- offs = NULL;
-
while (rec != end_rec) {
/* In this loop we calculate the amount of reserved
space after rec is removed from page. */
- offs = rec_get_offsets(rec, cursor->index, offs,
- ULINT_UNDEFINED, &heap);
+ *offsets = rec_get_offsets(rec, cursor->index, *offsets,
+ ULINT_UNDEFINED, heap);
- total_data -= rec_offs_size(offs);
+ total_data -= rec_offs_size(*offsets);
total_n_recs--;
if (total_data + page_dir_calc_reserved_space(total_n_recs)
@@ -2298,13 +2484,13 @@ btr_page_insert_fits(
/* Ok, there will be enough available space on the
half page where the tuple is inserted */
- return(TRUE);
+ return(true);
}
rec = page_rec_get_next_const(rec);
}
- return(FALSE);
+ return(false);
}
/*******************************************************//**
@@ -2314,6 +2500,7 @@ UNIV_INTERN
void
btr_insert_on_non_leaf_level_func(
/*==============================*/
+ ulint flags, /*!< in: undo logging and locking flags */
dict_index_t* index, /*!< in: index */
ulint level, /*!< in: level, must be > 0 */
dtuple_t* tuple, /*!< in: the record to be inserted */
@@ -2323,8 +2510,10 @@ btr_insert_on_non_leaf_level_func(
{
big_rec_t* dummy_big_rec;
btr_cur_t cursor;
- ulint err;
+ dberr_t err;
rec_t* rec;
+ ulint* offsets = NULL;
+ mem_heap_t* heap = NULL;
ut_ad(level > 0);
@@ -2335,26 +2524,35 @@ btr_insert_on_non_leaf_level_func(
ut_ad(cursor.flag == BTR_CUR_BINARY);
err = btr_cur_optimistic_insert(
- BTR_NO_LOCKING_FLAG | BTR_KEEP_SYS_FLAG
- | BTR_NO_UNDO_LOG_FLAG, &cursor, tuple, &rec,
- &dummy_big_rec, 0, NULL, mtr);
+ flags
+ | BTR_NO_LOCKING_FLAG
+ | BTR_KEEP_SYS_FLAG
+ | BTR_NO_UNDO_LOG_FLAG,
+ &cursor, &offsets, &heap,
+ tuple, &rec, &dummy_big_rec, 0, NULL, mtr);
if (err == DB_FAIL) {
- err = btr_cur_pessimistic_insert(
- BTR_NO_LOCKING_FLAG | BTR_KEEP_SYS_FLAG
- | BTR_NO_UNDO_LOG_FLAG,
- &cursor, tuple, &rec, &dummy_big_rec, 0, NULL, mtr);
+ err = btr_cur_pessimistic_insert(flags
+ | BTR_NO_LOCKING_FLAG
+ | BTR_KEEP_SYS_FLAG
+ | BTR_NO_UNDO_LOG_FLAG,
+ &cursor, &offsets, &heap,
+ tuple, &rec,
+ &dummy_big_rec, 0, NULL, mtr);
ut_a(err == DB_SUCCESS);
}
+ mem_heap_free(heap);
}
/**************************************************************//**
Attaches the halves of an index page on the appropriate level in an
index tree. */
-static
+static __attribute__((nonnull))
void
btr_attach_half_pages(
/*==================*/
+ ulint flags, /*!< in: undo logging and
+ locking flags */
dict_index_t* index, /*!< in: the index tree */
buf_block_t* block, /*!< in/out: page to be split */
const rec_t* split_rec, /*!< in: first record on upper
@@ -2432,7 +2630,8 @@ btr_attach_half_pages(
/* Insert it next to the pointer to the lower half. Note that this
may generate recursion leading to a split on the higher level. */
- btr_insert_on_non_leaf_level(index, level + 1, node_ptr_upper, mtr);
+ btr_insert_on_non_leaf_level(flags, index, level + 1,
+ node_ptr_upper, mtr);
/* Free the memory heap */
mem_heap_free(heap);
@@ -2484,13 +2683,13 @@ btr_attach_half_pages(
/*************************************************************//**
Determine if a tuple is smaller than any record on the page.
@return TRUE if smaller */
-static
-ibool
+static __attribute__((nonnull, warn_unused_result))
+bool
btr_page_tuple_smaller(
/*===================*/
btr_cur_t* cursor, /*!< in: b-tree cursor */
const dtuple_t* tuple, /*!< in: tuple to consider */
- ulint* offsets,/*!< in/out: temporary storage */
+ ulint** offsets,/*!< in/out: temporary storage */
ulint n_uniq, /*!< in: number of unique fields
in the index page records */
mem_heap_t** heap) /*!< in/out: heap for offsets */
@@ -2505,11 +2704,11 @@ btr_page_tuple_smaller(
page_cur_move_to_next(&pcur);
first_rec = page_cur_get_rec(&pcur);
- offsets = rec_get_offsets(
- first_rec, cursor->index, offsets,
+ *offsets = rec_get_offsets(
+ first_rec, cursor->index, *offsets,
n_uniq, heap);
- return(cmp_dtuple_rec(tuple, first_rec, offsets) < 0);
+ return(cmp_dtuple_rec(tuple, first_rec, *offsets) < 0);
}
/*************************************************************//**
@@ -2525,9 +2724,12 @@ UNIV_INTERN
rec_t*
btr_page_split_and_insert(
/*======================*/
+ ulint flags, /*!< in: undo logging and locking flags */
btr_cur_t* cursor, /*!< in: cursor at which to insert; when the
function returns, the cursor is positioned
on the predecessor of the inserted record */
+ ulint** offsets,/*!< out: offsets on inserted record */
+ mem_heap_t** heap, /*!< in/out: pointer to memory heap, or NULL */
const dtuple_t* tuple, /*!< in: tuple to insert */
ulint n_ext, /*!< in: number of externally stored columns */
mtr_t* mtr) /*!< in: mtr */
@@ -2553,18 +2755,21 @@ btr_page_split_and_insert(
ibool insert_left;
ulint n_iterations = 0;
rec_t* rec;
- mem_heap_t* heap;
ulint n_uniq;
- ulint* offsets;
- heap = mem_heap_create(1024);
+ if (!*heap) {
+ *heap = mem_heap_create(1024);
+ }
n_uniq = dict_index_get_n_unique_in_tree(cursor->index);
func_start:
- mem_heap_empty(heap);
- offsets = NULL;
+ mem_heap_empty(*heap);
+ *offsets = NULL;
ut_ad(mtr_memo_contains(mtr, dict_index_get_lock(cursor->index),
MTR_MEMO_X_LOCK));
+ ut_ad(!dict_index_is_online_ddl(cursor->index)
+ || (flags & BTR_CREATE_FLAG)
+ || dict_index_is_clust(cursor->index));
#ifdef UNIV_SYNC_DEBUG
ut_ad(rw_lock_own(dict_index_get_lock(cursor->index), RW_LOCK_EX));
#endif /* UNIV_SYNC_DEBUG */
@@ -2590,7 +2795,7 @@ func_start:
if (split_rec == NULL) {
insert_left = btr_page_tuple_smaller(
- cursor, tuple, offsets, n_uniq, &heap);
+ cursor, tuple, offsets, n_uniq, heap);
}
} else if (btr_page_get_split_rec_to_right(cursor, &split_rec)) {
direction = FSP_UP;
@@ -2612,7 +2817,7 @@ func_start:
if (page_get_n_recs(page) > 1) {
split_rec = page_get_middle_rec(page);
} else if (btr_page_tuple_smaller(cursor, tuple,
- offsets, n_uniq, &heap)) {
+ offsets, n_uniq, heap)) {
split_rec = page_rec_get_next(
page_get_infimum_rec(page));
} else {
@@ -2635,10 +2840,10 @@ func_start:
if (split_rec) {
first_rec = move_limit = split_rec;
- offsets = rec_get_offsets(split_rec, cursor->index, offsets,
- n_uniq, &heap);
+ *offsets = rec_get_offsets(split_rec, cursor->index, *offsets,
+ n_uniq, heap);
- insert_left = cmp_dtuple_rec(tuple, split_rec, offsets) < 0;
+ insert_left = cmp_dtuple_rec(tuple, split_rec, *offsets) < 0;
if (!insert_left && new_page_zip && n_iterations > 0) {
/* If a compressed page has already been split,
@@ -2665,7 +2870,7 @@ insert_empty:
/* 4. Do first the modifications in the tree structure */
- btr_attach_half_pages(cursor->index, block,
+ btr_attach_half_pages(flags, cursor->index, block,
first_rec, new_block, direction, mtr);
/* If the split is made on the leaf level and the insert will fit
@@ -2685,10 +2890,11 @@ insert_empty:
insert_will_fit = !new_page_zip
&& btr_page_insert_fits(cursor, NULL,
- NULL, tuple, n_ext, heap);
+ offsets, tuple, n_ext, heap);
}
- if (insert_will_fit && page_is_leaf(page)) {
+ if (insert_will_fit && page_is_leaf(page)
+ && !dict_index_is_online_ddl(cursor->index)) {
mtr_memo_release(mtr, dict_index_get_lock(cursor->index),
MTR_MEMO_X_LOCK);
@@ -2805,8 +3011,8 @@ insert_empty:
page_cur_search(insert_block, cursor->index, tuple,
PAGE_CUR_LE, page_cursor);
- rec = page_cur_tuple_insert(page_cursor, tuple,
- cursor->index, n_ext, mtr);
+ rec = page_cur_tuple_insert(page_cursor, tuple, cursor->index,
+ offsets, heap, n_ext, mtr);
#ifdef UNIV_ZIP_DEBUG
{
@@ -2837,7 +3043,7 @@ insert_empty:
page_cur_search(insert_block, cursor->index, tuple,
PAGE_CUR_LE, page_cursor);
rec = page_cur_tuple_insert(page_cursor, tuple, cursor->index,
- n_ext, mtr);
+ offsets, heap, n_ext, mtr);
if (rec == NULL) {
/* The insert did not fit on the page: loop back to the
@@ -2878,7 +3084,7 @@ func_exit:
ut_ad(page_validate(buf_block_get_frame(left_block), cursor->index));
ut_ad(page_validate(buf_block_get_frame(right_block), cursor->index));
- mem_heap_free(heap);
+ ut_ad(!rec || rec_offs_validate(rec, cursor->index, *offsets));
return(rec);
}
@@ -3058,15 +3264,15 @@ btr_node_ptr_delete(
{
btr_cur_t cursor;
ibool compressed;
- ulint err;
+ dberr_t err;
ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
/* Delete node pointer on father page */
btr_page_get_father(index, block, mtr, &cursor);
- compressed = btr_cur_pessimistic_delete(&err, TRUE, &cursor, RB_NONE,
- mtr);
+ compressed = btr_cur_pessimistic_delete(&err, TRUE, &cursor,
+ BTR_CREATE_FLAG, RB_NONE, mtr);
ut_a(err == DB_SUCCESS);
if (!compressed) {
@@ -3098,6 +3304,8 @@ btr_lift_page_up(
buf_block_t* blocks[BTR_MAX_LEVELS];
ulint n_blocks; /*!< last used index in blocks[] */
ulint i;
+ bool lift_father_up;
+ buf_block_t* block_orig = block;
ut_ad(btr_page_get_prev(page, mtr) == FIL_NULL);
ut_ad(btr_page_get_next(page, mtr) == FIL_NULL);
@@ -3108,11 +3316,13 @@ btr_lift_page_up(
{
btr_cur_t cursor;
- mem_heap_t* heap = mem_heap_create(100);
- ulint* offsets;
+ ulint* offsets = NULL;
+ mem_heap_t* heap = mem_heap_create(
+ sizeof(*offsets)
+ * (REC_OFFS_HEADER_SIZE + 1 + 1 + index->n_fields));
buf_block_t* b;
- offsets = btr_page_get_father_block(NULL, heap, index,
+ offsets = btr_page_get_father_block(offsets, heap, index,
block, mtr, &cursor);
father_block = btr_cur_get_block(&cursor);
father_page_zip = buf_block_get_page_zip(father_block);
@@ -3136,6 +3346,29 @@ btr_lift_page_up(
blocks[n_blocks++] = b = btr_cur_get_block(&cursor);
}
+ lift_father_up = (n_blocks && page_level == 0);
+ if (lift_father_up) {
+ /* The father page also should be the only on its level (not
+ root). We should lift up the father page at first.
+ Because the leaf page should be lifted up only for root page.
+ The freeing page is based on page_level (==0 or !=0)
+ to choose segment. If the page_level is changed ==0 from !=0,
+ later freeing of the page doesn't find the page allocation
+ to be freed.*/
+
+ block = father_block;
+ page = buf_block_get_frame(block);
+ page_level = btr_page_get_level(page, mtr);
+
+ ut_ad(btr_page_get_prev(page, mtr) == FIL_NULL);
+ ut_ad(btr_page_get_next(page, mtr) == FIL_NULL);
+ ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
+
+ father_block = blocks[0];
+ father_page_zip = buf_block_get_page_zip(father_block);
+ father_page = buf_block_get_frame(father_block);
+ }
+
mem_heap_free(heap);
}
@@ -3143,6 +3376,7 @@ btr_lift_page_up(
/* Make the father empty */
btr_page_empty(father_block, father_page_zip, index, page_level, mtr);
+ page_level++;
/* Copy the records to the father page one by one. */
if (0
@@ -3174,7 +3408,7 @@ btr_lift_page_up(
lock_update_copy_and_discard(father_block, block);
/* Go upward to root page, decrementing levels by one. */
- for (i = 0; i < n_blocks; i++, page_level++) {
+ for (i = lift_father_up ? 1 : 0; i < n_blocks; i++, page_level++) {
page_t* page = buf_block_get_frame(blocks[i]);
page_zip_des_t* page_zip= buf_block_get_page_zip(blocks[i]);
@@ -3196,7 +3430,7 @@ btr_lift_page_up(
ut_ad(page_validate(father_page, index));
ut_ad(btr_check_node_ptr(index, father_block, mtr));
- return(father_block);
+ return(lift_father_up ? block_orig : father_block);
}
/*************************************************************//**
@@ -3267,6 +3501,7 @@ btr_compress(
if (adjust) {
nth_rec = page_rec_get_n_recs_before(btr_cur_get_rec(cursor));
+ ut_ad(nth_rec > 0);
}
/* Decide the page to which we try to merge and which will inherit
@@ -3323,6 +3558,16 @@ err_exit:
return(FALSE);
}
+ /* If compression padding tells us that merging will result in
+ too packed up page i.e.: which is likely to cause compression
+ failure then don't merge the pages. */
+ if (zip_size && page_is_leaf(merge_page)
+ && (page_get_data_size(merge_page) + data_size
+ >= dict_index_zip_pad_optimal_page_size(index))) {
+
+ goto err_exit;
+ }
+
ut_ad(page_validate(merge_page, index));
max_ins_size = page_get_max_insert_size(merge_page, n_recs);
@@ -3502,6 +3747,7 @@ func_exit:
mem_heap_free(heap);
if (adjust) {
+ ut_ad(nth_rec > 0);
btr_cur_position(
index,
page_rec_get_nth(merge_block->frame, nth_rec),
@@ -3818,7 +4064,7 @@ btr_print_index(
mtr_start(&mtr);
- root = btr_root_block_get(index, &mtr);
+ root = btr_root_block_get(index, RW_X_LATCH, &mtr);
btr_print_recursive(index, root, width, &heap, &offsets, &mtr);
if (heap) {
@@ -3827,7 +4073,7 @@ btr_print_index(
mtr_commit(&mtr);
- btr_validate_index(index, NULL);
+ btr_validate_index(index, 0);
}
#endif /* UNIV_BTR_PRINT */
@@ -4013,8 +4259,22 @@ btr_index_page_validate(
{
page_cur_t cur;
ibool ret = TRUE;
+#ifndef DBUG_OFF
+ ulint nth = 1;
+#endif /* !DBUG_OFF */
page_cur_set_before_first(block, &cur);
+
+ /* Directory slot 0 should only contain the infimum record. */
+ DBUG_EXECUTE_IF("check_table_rec_next",
+ ut_a(page_rec_get_nth_const(
+ page_cur_get_page(&cur), 0)
+ == cur.rec);
+ ut_a(page_dir_slot_get_n_owned(
+ page_dir_get_nth_slot(
+ page_cur_get_page(&cur), 0))
+ == 1););
+
page_cur_move_to_next(&cur);
for (;;) {
@@ -4028,6 +4288,16 @@ btr_index_page_validate(
return(FALSE);
}
+ /* Verify that page_rec_get_nth_const() is correctly
+ retrieving each record. */
+ DBUG_EXECUTE_IF("check_table_rec_next",
+ ut_a(cur.rec == page_rec_get_nth_const(
+ page_cur_get_page(&cur),
+ page_rec_get_n_recs_before(
+ cur.rec)));
+ ut_a(nth++ == page_rec_get_n_recs_before(
+ cur.rec)););
+
page_cur_move_to_next(&cur);
}
@@ -4078,14 +4348,15 @@ btr_validate_report2(
Validates index tree level.
@return TRUE if ok */
static
-ibool
+bool
btr_validate_level(
/*===============*/
dict_index_t* index, /*!< in: index tree */
- trx_t* trx, /*!< in: transaction or NULL */
+ const trx_t* trx, /*!< in: transaction or NULL */
ulint level) /*!< in: level number */
{
ulint space;
+ ulint space_flags;
ulint zip_size;
buf_block_t* block;
page_t* page;
@@ -4099,9 +4370,10 @@ btr_validate_level(
ulint left_page_no;
page_cur_t cursor;
dtuple_t* node_ptr_tuple;
- ibool ret = TRUE;
+ bool ret = true;
mtr_t mtr;
mem_heap_t* heap = mem_heap_create(256);
+ fseg_header_t* seg;
ulint* offsets = NULL;
ulint* offsets2= NULL;
#ifdef UNIV_ZIP_DEBUG
@@ -4112,15 +4384,39 @@ btr_validate_level(
mtr_x_lock(dict_index_get_lock(index), &mtr);
- block = btr_root_block_get(index, &mtr);
+ block = btr_root_block_get(index, RW_X_LATCH, &mtr);
page = buf_block_get_frame(block);
+ seg = page + PAGE_HEADER + PAGE_BTR_SEG_TOP;
space = dict_index_get_space(index);
zip_size = dict_table_zip_size(index->table);
+ fil_space_get_latch(space, &space_flags);
+
+ if (zip_size != dict_tf_get_zip_size(space_flags)) {
+
+ ib_logf(IB_LOG_LEVEL_WARN,
+ "Flags mismatch: table=%lu, tablespace=%lu",
+ (ulint) index->table->flags, (ulint) space_flags);
+
+ mtr_commit(&mtr);
+
+ return(false);
+ }
+
while (level != btr_page_get_level(page, &mtr)) {
const rec_t* node_ptr;
+ if (fseg_page_is_free(seg,
+ block->page.space, block->page.offset)) {
+
+ btr_validate_report1(index, level, block);
+
+ ib_logf(IB_LOG_LEVEL_WARN, "page is free");
+
+ ret = false;
+ }
+
ut_a(space == buf_block_get_space(block));
ut_a(space == page_get_space_id(page));
#ifdef UNIV_ZIP_DEBUG
@@ -4141,12 +4437,13 @@ btr_validate_level(
/* Now we are on the desired level. Loop through the pages on that
level. */
-loop:
- if (trx_is_interrupted(trx)) {
- mtr_commit(&mtr);
- mem_heap_free(heap);
- return(ret);
+
+ if (level == 0) {
+ /* Leaf pages are managed in their own file segment. */
+ seg -= PAGE_BTR_SEG_TOP - PAGE_BTR_SEG_LEAF;
}
+
+loop:
mem_heap_empty(heap);
offsets = offsets2 = NULL;
mtr_x_lock(dict_index_get_lock(index), &mtr);
@@ -4156,20 +4453,35 @@ loop:
ut_a(!page_zip || page_zip_validate(page_zip, page, index));
#endif /* UNIV_ZIP_DEBUG */
- /* Check ordering etc. of records */
+ ut_a(block->page.space == space);
+
+ if (fseg_page_is_free(seg, block->page.space, block->page.offset)) {
+
+ btr_validate_report1(index, level, block);
+
+ ib_logf(IB_LOG_LEVEL_WARN, "Page is marked as free");
+ ret = false;
+
+ } else if (btr_page_get_index_id(page) != index->id) {
+
+ ib_logf(IB_LOG_LEVEL_ERROR,
+ "Page index id " IB_ID_FMT " != data dictionary "
+ "index id " IB_ID_FMT,
+ btr_page_get_index_id(page), index->id);
+
+ ret = false;
+
+ } else if (!page_validate(page, index)) {
- if (!page_validate(page, index)) {
btr_validate_report1(index, level, block);
+ ret = false;
+
+ } else if (level == 0 && !btr_index_page_validate(block, index)) {
- ret = FALSE;
- } else if (level == 0) {
/* We are on level 0. Check that the records have the right
number of fields, and field lengths are right. */
- if (!btr_index_page_validate(block, index)) {
-
- ret = FALSE;
- }
+ ret = false;
}
ut_a(btr_page_get_level(page, &mtr) == level);
@@ -4195,7 +4507,7 @@ loop:
buf_page_print(page, 0, BUF_PAGE_PRINT_NO_CRASH);
buf_page_print(right_page, 0, BUF_PAGE_PRINT_NO_CRASH);
- ret = FALSE;
+ ret = false;
}
if (page_is_comp(right_page) != page_is_comp(page)) {
@@ -4204,7 +4516,7 @@ loop:
buf_page_print(page, 0, BUF_PAGE_PRINT_NO_CRASH);
buf_page_print(right_page, 0, BUF_PAGE_PRINT_NO_CRASH);
- ret = FALSE;
+ ret = false;
goto node_ptr_fails;
}
@@ -4237,7 +4549,7 @@ loop:
rec_print(stderr, rec, index);
putc('\n', stderr);
- ret = FALSE;
+ ret = false;
}
}
@@ -4288,7 +4600,7 @@ loop:
fputs("InnoDB: record on page ", stderr);
rec_print_new(stderr, rec, offsets);
putc('\n', stderr);
- ret = FALSE;
+ ret = false;
goto node_ptr_fails;
}
@@ -4318,7 +4630,7 @@ loop:
fputs("InnoDB: first rec ", stderr);
rec_print(stderr, first_rec, index);
putc('\n', stderr);
- ret = FALSE;
+ ret = false;
goto node_ptr_fails;
}
@@ -4346,7 +4658,7 @@ loop:
if (btr_cur_get_rec(&right_node_cur)
!= right_node_ptr) {
- ret = FALSE;
+ ret = false;
fputs("InnoDB: node pointer to"
" the right page is wrong\n",
stderr);
@@ -4372,7 +4684,7 @@ loop:
!= page_rec_get_next(
page_get_infimum_rec(
right_father_page))) {
- ret = FALSE;
+ ret = false;
fputs("InnoDB: node pointer 2 to"
" the right page is wrong\n",
stderr);
@@ -4397,7 +4709,7 @@ loop:
if (page_get_page_no(right_father_page)
!= btr_page_get_next(father_page, &mtr)) {
- ret = FALSE;
+ ret = false;
fputs("InnoDB: node pointer 3 to"
" the right page is wrong\n",
stderr);
@@ -4428,17 +4740,23 @@ node_ptr_fails:
on the next loop. The page has already been checked. */
mtr_commit(&mtr);
- if (right_page_no != FIL_NULL) {
+ if (trx_is_interrupted(trx)) {
+ /* On interrupt, return the current status. */
+ } else if (right_page_no != FIL_NULL) {
+
mtr_start(&mtr);
- block = btr_block_get(space, zip_size, right_page_no,
- RW_X_LATCH, index, &mtr);
+ block = btr_block_get(
+ space, zip_size, right_page_no,
+ RW_X_LATCH, index, &mtr);
+
page = buf_block_get_frame(block);
goto loop;
}
mem_heap_free(heap);
+
return(ret);
}
@@ -4446,40 +4764,39 @@ node_ptr_fails:
Checks the consistency of an index tree.
@return TRUE if ok */
UNIV_INTERN
-ibool
+bool
btr_validate_index(
/*===============*/
dict_index_t* index, /*!< in: index */
- trx_t* trx) /*!< in: transaction or NULL */
+ const trx_t* trx) /*!< in: transaction or NULL */
{
- mtr_t mtr;
- page_t* root;
- ulint i;
- ulint n;
-
/* Full Text index are implemented by auxiliary tables,
not the B-tree */
- if (index->type & DICT_FTS) {
- return(TRUE);
+ if (dict_index_is_online_ddl(index) || (index->type & DICT_FTS)) {
+ return(true);
}
+ mtr_t mtr;
+
mtr_start(&mtr);
- mtr_x_lock(dict_index_get_lock(index), &mtr);
- root = btr_root_get(index, &mtr);
- n = btr_page_get_level(root, &mtr);
+ mtr_x_lock(dict_index_get_lock(index), &mtr);
- for (i = 0; i <= n && !trx_is_interrupted(trx); i++) {
- if (!btr_validate_level(index, trx, n - i)) {
+ bool ok = true;
+ page_t* root = btr_root_get(index, &mtr);
+ ulint n = btr_page_get_level(root, &mtr);
- mtr_commit(&mtr);
+ for (ulint i = 0; i <= n; ++i) {
- return(FALSE);
+ if (!btr_validate_level(index, trx, n - i)) {
+ ok = false;
+ break;
}
}
mtr_commit(&mtr);
- return(TRUE);
+ return(ok);
}
+
#endif /* !UNIV_HOTBACKUP */