summaryrefslogtreecommitdiff
path: root/storage/innobase/btr/btr0cur.cc
diff options
context:
space:
mode:
Diffstat (limited to 'storage/innobase/btr/btr0cur.cc')
-rw-r--r--storage/innobase/btr/btr0cur.cc679
1 files changed, 456 insertions, 223 deletions
diff --git a/storage/innobase/btr/btr0cur.cc b/storage/innobase/btr/btr0cur.cc
index 99794144a04..c5a50f24b83 100644
--- a/storage/innobase/btr/btr0cur.cc
+++ b/storage/innobase/btr/btr0cur.cc
@@ -210,6 +210,7 @@ btr_rec_free_externally_stored_fields(
/** Latches the leaf page or pages requested.
@param[in] block leaf page where the search converged
@param[in] page_id page id of the leaf
+@param[in] zip_size ROW_FORMAT=COMPRESSED page size, or 0
@param[in] latch_mode BTR_SEARCH_LEAF, ...
@param[in] cursor cursor
@param[in] mtr mini-transaction
@@ -218,7 +219,7 @@ btr_latch_leaves_t
btr_cur_latch_leaves(
buf_block_t* block,
const page_id_t page_id,
- const page_size_t& page_size,
+ ulint zip_size,
ulint latch_mode,
btr_cur_t* cursor,
mtr_t* mtr)
@@ -249,7 +250,7 @@ btr_cur_latch_leaves(
mode = latch_mode == BTR_MODIFY_LEAF ? RW_X_LATCH : RW_S_LATCH;
latch_leaves.savepoints[1] = mtr_set_savepoint(mtr);
- get_block = btr_block_get(page_id, page_size, mode,
+ get_block = btr_block_get(page_id, zip_size, mode,
cursor->index, mtr);
latch_leaves.blocks[1] = get_block;
#ifdef UNIV_BTR_DEBUG
@@ -282,7 +283,7 @@ btr_cur_latch_leaves(
latch_leaves.savepoints[0] = mtr_set_savepoint(mtr);
get_block = btr_block_get(
page_id_t(page_id.space(), left_page_no),
- page_size, RW_X_LATCH, cursor->index, mtr);
+ zip_size, RW_X_LATCH, cursor->index, mtr);
latch_leaves.blocks[0] = get_block;
if (spatial) {
@@ -298,7 +299,7 @@ btr_cur_latch_leaves(
latch_leaves.savepoints[1] = mtr_set_savepoint(mtr);
get_block = btr_block_get(
- page_id, page_size, RW_X_LATCH, cursor->index, mtr);
+ page_id, zip_size, RW_X_LATCH, cursor->index, mtr);
latch_leaves.blocks[1] = get_block;
#ifdef UNIV_BTR_DEBUG
@@ -329,7 +330,7 @@ btr_cur_latch_leaves(
latch_leaves.savepoints[2] = mtr_set_savepoint(mtr);
get_block = btr_block_get(
page_id_t(page_id.space(), right_page_no),
- page_size, RW_X_LATCH, cursor->index, mtr);
+ zip_size, RW_X_LATCH, cursor->index, mtr);
latch_leaves.blocks[2] = get_block;
#ifdef UNIV_BTR_DEBUG
ut_a(page_is_comp(get_block->frame)
@@ -357,7 +358,7 @@ btr_cur_latch_leaves(
latch_leaves.savepoints[0] = mtr_set_savepoint(mtr);
get_block = btr_block_get(
page_id_t(page_id.space(), left_page_no),
- page_size, mode, cursor->index, mtr);
+ zip_size, mode, cursor->index, mtr);
latch_leaves.blocks[0] = get_block;
cursor->left_block = get_block;
#ifdef UNIV_BTR_DEBUG
@@ -369,7 +370,7 @@ btr_cur_latch_leaves(
}
latch_leaves.savepoints[1] = mtr_set_savepoint(mtr);
- get_block = btr_block_get(page_id, page_size, mode,
+ get_block = btr_block_get(page_id, zip_size, mode,
cursor->index, mtr);
latch_leaves.blocks[1] = get_block;
#ifdef UNIV_BTR_DEBUG
@@ -422,8 +423,12 @@ unreadable:
}
btr_cur_t cur;
+ /* Relax the assertion in rec_init_offsets(). */
+ ut_ad(!index->in_instant_init);
+ ut_d(index->in_instant_init = true);
dberr_t err = btr_cur_open_at_index_side(true, index, BTR_SEARCH_LEAF,
&cur, 0, mtr);
+ ut_d(index->in_instant_init = false);
if (err != DB_SUCCESS) {
index->table->corrupted = true;
return err;
@@ -457,8 +462,8 @@ unreadable:
return DB_CORRUPTION;
}
- if (info_bits != REC_INFO_MIN_REC_FLAG
- || (comp && rec_get_status(rec) != REC_STATUS_COLUMNS_ADDED)) {
+ if ((info_bits & ~REC_INFO_DELETED_FLAG) != REC_INFO_MIN_REC_FLAG
+ || (comp && rec_get_status(rec) != REC_STATUS_INSTANT)) {
incompatible:
ib::error() << "Table " << index->table->name
<< " contains unrecognizable instant ALTER metadata";
@@ -476,6 +481,72 @@ incompatible:
concurrent operations on the table, including table eviction
from the cache. */
+ if (info_bits & REC_INFO_DELETED_FLAG) {
+ /* This metadata record includes a BLOB that identifies
+ any dropped or reordered columns. */
+ ulint trx_id_offset = index->trx_id_offset;
+ if (!trx_id_offset) {
+ /* The PRIMARY KEY contains variable-length columns.
+ For the metadata record, variable-length columns are
+ always written with zero length. The DB_TRX_ID will
+ start right after any fixed-length columns. */
+ for (uint i = index->n_uniq; i--; ) {
+ trx_id_offset += index->fields[i].fixed_len;
+ }
+ }
+
+ const byte* ptr = rec + trx_id_offset
+ + (DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN);
+
+ if (mach_read_from_4(ptr + BTR_EXTERN_LEN)) {
+ goto incompatible;
+ }
+
+ uint len = mach_read_from_4(ptr + BTR_EXTERN_LEN + 4);
+ if (!len
+ || mach_read_from_4(ptr + BTR_EXTERN_OFFSET)
+ != FIL_PAGE_DATA
+ || mach_read_from_4(ptr + BTR_EXTERN_SPACE_ID)
+ != space->id) {
+ goto incompatible;
+ }
+
+ buf_block_t* block = buf_page_get(
+ page_id_t(space->id,
+ mach_read_from_4(ptr + BTR_EXTERN_PAGE_NO)),
+ 0, RW_S_LATCH, mtr);
+ buf_block_dbg_add_level(block, SYNC_EXTERN_STORAGE);
+ if (fil_page_get_type(block->frame) != FIL_PAGE_TYPE_BLOB
+ || mach_read_from_4(&block->frame[FIL_PAGE_DATA
+ + BTR_BLOB_HDR_NEXT_PAGE_NO])
+ != FIL_NULL
+ || mach_read_from_4(&block->frame[FIL_PAGE_DATA
+ + BTR_BLOB_HDR_PART_LEN])
+ != len) {
+ goto incompatible;
+ }
+
+ /* The unused part of the BLOB page should be zero-filled. */
+ for (const byte* b = block->frame
+ + (FIL_PAGE_DATA + BTR_BLOB_HDR_SIZE) + len,
+ * const end = block->frame + srv_page_size
+ - BTR_EXTERN_LEN;
+ b < end; ) {
+ if (*b++) {
+ goto incompatible;
+ }
+ }
+
+ if (index->table->deserialise_columns(
+ &block->frame[FIL_PAGE_DATA + BTR_BLOB_HDR_SIZE],
+ len)) {
+ goto incompatible;
+ }
+
+ /* Proceed to initialize the default values of
+ any instantly added columns. */
+ }
+
mem_heap_t* heap = NULL;
ulint* offsets = rec_get_offsets(rec, index, NULL, true,
ULINT_UNDEFINED, &heap);
@@ -489,7 +560,8 @@ inconsistent:
record, it is also OK to perform READ UNCOMMITTED and
then ignore any extra fields, provided that
trx_sys.is_registered(DB_TRX_ID). */
- if (rec_offs_n_fields(offsets) > index->n_fields
+ if (rec_offs_n_fields(offsets)
+ > ulint(index->n_fields) + !!index->table->instant
&& !trx_sys.is_registered(current_trx(),
row_get_rec_trx_id(rec, index,
offsets))) {
@@ -497,10 +569,11 @@ inconsistent:
}
for (unsigned i = index->n_core_fields; i < index->n_fields; i++) {
- ulint len;
- const byte* data = rec_get_nth_field(rec, offsets, i, &len);
dict_col_t* col = index->fields[i].col;
- ut_ad(!col->is_instant());
+ const unsigned o = i + !!index->table->instant;
+ ulint len;
+ const byte* data = rec_get_nth_field(rec, offsets, o, &len);
+ ut_ad(!col->is_added());
ut_ad(!col->def_val.data);
col->def_val.len = len;
switch (len) {
@@ -511,7 +584,7 @@ inconsistent:
continue;
}
ut_ad(len != UNIV_SQL_DEFAULT);
- if (!rec_offs_nth_extern(offsets, i)) {
+ if (!rec_offs_nth_extern(offsets, o)) {
col->def_val.data = mem_heap_dup(
index->table->heap, data, len);
} else if (len < BTR_EXTERN_FIELD_REF_SIZE
@@ -523,7 +596,7 @@ inconsistent:
} else {
col->def_val.data = btr_copy_externally_stored_field(
&col->def_val.len, data,
- dict_table_page_size(index->table),
+ cur.page_cur.block->zip_size(),
len, index->table->heap);
}
}
@@ -592,30 +665,49 @@ bool btr_cur_instant_root_init(dict_index_t* index, const page_t* page)
const uint16_t n = page_get_instant(page);
- if (n < index->n_uniq + DATA_ROLL_PTR || n > index->n_fields) {
+ if (n < index->n_uniq + DATA_ROLL_PTR) {
/* The PRIMARY KEY (or hidden DB_ROW_ID) and
DB_TRX_ID,DB_ROLL_PTR columns must always be present
- as 'core' fields. All fields, including those for
- instantly added columns, must be present in the data
- dictionary. */
+ as 'core' fields. */
return true;
}
- if (memcmp(page_get_infimum_rec(page), "infimum", 8)
- || memcmp(page_get_supremum_rec(page), "supremum", 8)) {
- /* In a later format, these fields in a FIL_PAGE_TYPE_INSTANT
- root page could be repurposed for something else. */
+ if (n > REC_MAX_N_FIELDS) {
return true;
}
index->n_core_fields = n;
- ut_ad(!index->is_dummy);
- ut_d(index->is_dummy = true);
- index->n_core_null_bytes = n == index->n_fields
- ? UT_BITS_IN_BYTES(unsigned(index->n_nullable))
- : UT_BITS_IN_BYTES(index->get_n_nullable(n));
- ut_d(index->is_dummy = false);
- return false;
+
+ const rec_t* infimum = page_get_infimum_rec(page);
+ const rec_t* supremum = page_get_supremum_rec(page);
+
+ if (!memcmp(infimum, "infimum", 8)
+ && !memcmp(supremum, "supremum", 8)) {
+ if (n > index->n_fields) {
+ /* All fields, including those for instantly
+ added columns, must be present in the
+ data dictionary. */
+ return true;
+ }
+
+ ut_ad(!index->is_dummy);
+ ut_d(index->is_dummy = true);
+ index->n_core_null_bytes = UT_BITS_IN_BYTES(
+ index->get_n_nullable(n));
+ ut_d(index->is_dummy = false);
+ return false;
+ }
+
+ if (memcmp(infimum, field_ref_zero, 8)
+ || memcmp(supremum, field_ref_zero, 7)) {
+ /* The infimum and supremum records must either contain
+ the original strings, or they must be filled with zero
+ bytes, except for the bytes that we have repurposed. */
+ return true;
+ }
+
+ index->n_core_null_bytes = supremum[7];
+ return index->n_core_null_bytes > 128;
}
/** Optimistically latches the leaf page or pages requested.
@@ -673,8 +765,7 @@ btr_cur_optimistic_latch_leaves(
cursor->left_block = btr_block_get(
page_id_t(cursor->index->table->space_id,
left_page_no),
- page_size_t(cursor->index->table->space
- ->flags),
+ cursor->index->table->space->zip_size(),
mode, cursor->index, mtr);
} else {
cursor->left_block = NULL;
@@ -775,7 +866,7 @@ btr_cur_latch_for_root_leaf(
@param[in] lock_intention lock intention for the tree operation
@param[in] rec record (current node_ptr)
@param[in] rec_size size of the record or max size of node_ptr
-@param[in] page_size page size
+@param[in] zip_size ROW_FORMAT=COMPRESSED page size, or 0
@param[in] mtr mtr
@return true if tree modification is needed */
static
@@ -786,7 +877,7 @@ btr_cur_will_modify_tree(
btr_intention_t lock_intention,
const rec_t* rec,
ulint rec_size,
- const page_size_t& page_size,
+ ulint zip_size,
mtr_t* mtr)
{
ut_ad(!page_is_leaf(page));
@@ -874,9 +965,8 @@ btr_cur_will_modify_tree(
This is based on the worst case, and we could invoke
page_zip_available() on the block->page.zip. */
/* needs 2 records' space also for worst compress rate. */
- if (page_size.is_compressed()
- && page_zip_empty_size(index->n_fields,
- page_size.physical())
+ if (zip_size
+ && page_zip_empty_size(index->n_fields, zip_size)
<= rec_size * 2 + page_get_data_size(page)
+ page_dir_calc_reserved_space(n_recs + 2)) {
return(true);
@@ -1315,7 +1405,7 @@ btr_cur_search_to_nth_level_func(
Free blocks and read IO bandwidth should be prior
for them, when the history list is glowing huge. */
if (lock_intention == BTR_INTENTION_DELETE
- && trx_sys.history_size() > BTR_CUR_FINE_HISTORY_LENGTH
+ && trx_sys.rseg_history_len > BTR_CUR_FINE_HISTORY_LENGTH
&& buf_get_n_pending_read_ios()) {
mtr_x_lock(dict_index_get_lock(index), mtr);
} else if (dict_index_is_spatial(index)
@@ -1372,7 +1462,7 @@ btr_cur_search_to_nth_level_func(
page_cursor = btr_cur_get_page_cur(cursor);
- const page_size_t page_size(index->table->space->flags);
+ const ulint zip_size = index->table->space->zip_size();
/* Start with the root page. */
page_id_t page_id(index->table->space_id, index->page);
@@ -1455,7 +1545,7 @@ search_loop:
retry_page_get:
ut_ad(n_blocks < BTR_MAX_LEVELS);
tree_savepoints[n_blocks] = mtr_set_savepoint(mtr);
- block = buf_page_get_gen(page_id, page_size, rw_latch, guess,
+ block = buf_page_get_gen(page_id, zip_size, rw_latch, guess,
buf_mode, file, line, mtr, &err);
tree_blocks[n_blocks] = block;
@@ -1491,7 +1581,7 @@ retry_page_get:
ut_ad(!dict_index_is_spatial(index));
if (ibuf_insert(IBUF_OP_INSERT, tuple, index,
- page_id, page_size, cursor->thr)) {
+ page_id, zip_size, cursor->thr)) {
cursor->flag = BTR_CUR_INSERT_TO_IBUF;
@@ -1504,7 +1594,7 @@ retry_page_get:
ut_ad(!dict_index_is_spatial(index));
if (ibuf_insert(IBUF_OP_DELETE_MARK, tuple,
- index, page_id, page_size,
+ index, page_id, zip_size,
cursor->thr)) {
cursor->flag = BTR_CUR_DEL_MARK_IBUF;
@@ -1524,7 +1614,7 @@ retry_page_get:
/* The record cannot be purged yet. */
cursor->flag = BTR_CUR_DELETE_REF;
} else if (ibuf_insert(IBUF_OP_DELETE, tuple,
- index, page_id, page_size,
+ index, page_id, zip_size,
cursor->thr)) {
/* The purge was buffered. */
@@ -1571,7 +1661,7 @@ retry_page_get:
= mtr_set_savepoint(mtr);
get_block = buf_page_get_gen(
page_id_t(page_id.space(), left_page_no),
- page_size, rw_latch, NULL, buf_mode,
+ zip_size, rw_latch, NULL, buf_mode,
file, line, mtr, &err);
prev_tree_blocks[prev_n_blocks] = get_block;
prev_n_blocks++;
@@ -1601,7 +1691,7 @@ retry_page_get:
tree_blocks[n_blocks]);
tree_savepoints[n_blocks] = mtr_set_savepoint(mtr);
- block = buf_page_get_gen(page_id, page_size, rw_latch, NULL,
+ block = buf_page_get_gen(page_id, zip_size, rw_latch, NULL,
buf_mode, file, line, mtr, &err);
tree_blocks[n_blocks] = block;
@@ -1699,7 +1789,7 @@ retry_page_get:
if (rw_latch == RW_NO_LATCH) {
latch_leaves = btr_cur_latch_leaves(
- block, page_id, page_size, latch_mode,
+ block, page_id, zip_size, latch_mode,
cursor, mtr);
}
@@ -2063,7 +2153,7 @@ need_opposite_intention:
&& latch_mode == BTR_MODIFY_TREE
&& !btr_cur_will_modify_tree(
index, page, lock_intention, node_ptr,
- node_ptr_max_size, page_size, mtr)
+ node_ptr_max_size, zip_size, mtr)
&& !rtree_parent_modified) {
ut_ad(upper_rw_latch == RW_X_LATCH);
ut_ad(n_releases <= n_blocks);
@@ -2261,12 +2351,12 @@ need_opposite_intention:
if (latch_mode == BTR_CONT_MODIFY_TREE) {
child_block = btr_block_get(
- page_id, page_size, RW_X_LATCH,
+ page_id, zip_size, RW_X_LATCH,
index, mtr);
} else {
ut_ad(latch_mode == BTR_CONT_SEARCH_TREE);
child_block = btr_block_get(
- page_id, page_size, RW_SX_LATCH,
+ page_id, zip_size, RW_SX_LATCH,
index, mtr);
}
@@ -2321,9 +2411,10 @@ need_opposite_intention:
ut_ad(index->is_instant());
/* This may be a search tuple for
btr_pcur_restore_position(). */
- ut_ad(tuple->info_bits == REC_INFO_METADATA
- || tuple->info_bits == REC_INFO_MIN_REC_FLAG);
- } else if (rec_is_metadata(btr_cur_get_rec(cursor), index)) {
+ ut_ad(tuple->is_metadata()
+ || (tuple->is_metadata(tuple->info_bits
+ ^ REC_STATUS_INSTANT)));
+ } else if (rec_is_metadata(btr_cur_get_rec(cursor), *index)) {
/* Only user records belong in the adaptive
hash index. */
} else {
@@ -2450,7 +2541,7 @@ btr_cur_open_at_index_side_func(
Free blocks and read IO bandwidth should be prior
for them, when the history list is glowing huge. */
if (lock_intention == BTR_INTENTION_DELETE
- && trx_sys.history_size() > BTR_CUR_FINE_HISTORY_LENGTH
+ && trx_sys.rseg_history_len > BTR_CUR_FINE_HISTORY_LENGTH
&& buf_get_n_pending_read_ios()) {
mtr_x_lock(dict_index_get_lock(index), mtr);
} else {
@@ -2483,7 +2574,7 @@ btr_cur_open_at_index_side_func(
cursor->index = index;
page_id_t page_id(index->table->space_id, index->page);
- const page_size_t page_size(index->table->space->flags);
+ const ulint zip_size = index->table->space->zip_size();
if (root_leaf_rw_latch == RW_X_LATCH) {
node_ptr_max_size = btr_node_ptr_max_size(index);
@@ -2506,7 +2597,7 @@ btr_cur_open_at_index_side_func(
}
tree_savepoints[n_blocks] = mtr_set_savepoint(mtr);
- block = buf_page_get_gen(page_id, page_size, rw_latch, NULL,
+ block = buf_page_get_gen(page_id, zip_size, rw_latch, NULL,
BUF_GET, file, line, mtr, &err);
ut_ad((block != NULL) == (err == DB_SUCCESS));
tree_blocks[n_blocks] = block;
@@ -2562,12 +2653,12 @@ btr_cur_open_at_index_side_func(
if (height == level) {
if (srv_read_only_mode) {
btr_cur_latch_leaves(
- block, page_id, page_size,
+ block, page_id, zip_size,
latch_mode, cursor, mtr);
} else if (height == 0) {
if (rw_latch == RW_NO_LATCH) {
btr_cur_latch_leaves(
- block, page_id, page_size,
+ block, page_id, zip_size,
latch_mode, cursor, mtr);
}
/* In versions <= 3.23.52 we had
@@ -2698,7 +2789,7 @@ btr_cur_open_at_index_side_func(
if (latch_mode == BTR_MODIFY_TREE
&& !btr_cur_will_modify_tree(
cursor->index, page, lock_intention, node_ptr,
- node_ptr_max_size, page_size, mtr)) {
+ node_ptr_max_size, zip_size, mtr)) {
ut_ad(upper_rw_latch == RW_X_LATCH);
ut_ad(n_releases <= n_blocks);
@@ -2795,7 +2886,7 @@ btr_cur_open_at_rnd_pos_func(
Free blocks and read IO bandwidth should be prior
for them, when the history list is glowing huge. */
if (lock_intention == BTR_INTENTION_DELETE
- && trx_sys.history_size() > BTR_CUR_FINE_HISTORY_LENGTH
+ && trx_sys.rseg_history_len > BTR_CUR_FINE_HISTORY_LENGTH
&& buf_get_n_pending_read_ios()) {
mtr_x_lock(dict_index_get_lock(index), mtr);
} else {
@@ -2840,7 +2931,7 @@ btr_cur_open_at_rnd_pos_func(
cursor->index = index;
page_id_t page_id(index->table->space_id, index->page);
- const page_size_t page_size(index->table->space->flags);
+ const ulint zip_size = index->table->space->zip_size();
dberr_t err = DB_SUCCESS;
if (root_leaf_rw_latch == RW_X_LATCH) {
@@ -2864,7 +2955,7 @@ btr_cur_open_at_rnd_pos_func(
}
tree_savepoints[n_blocks] = mtr_set_savepoint(mtr);
- block = buf_page_get_gen(page_id, page_size, rw_latch, NULL,
+ block = buf_page_get_gen(page_id, zip_size, rw_latch, NULL,
BUF_GET, file, line, mtr, &err);
tree_blocks[n_blocks] = block;
@@ -2917,7 +3008,7 @@ btr_cur_open_at_rnd_pos_func(
if (rw_latch == RW_NO_LATCH
|| srv_read_only_mode) {
btr_cur_latch_leaves(
- block, page_id, page_size,
+ block, page_id, zip_size,
latch_mode, cursor, mtr);
}
@@ -2993,7 +3084,7 @@ btr_cur_open_at_rnd_pos_func(
if (latch_mode == BTR_MODIFY_TREE
&& !btr_cur_will_modify_tree(
cursor->index, page, lock_intention, node_ptr,
- node_ptr_max_size, page_size, mtr)) {
+ node_ptr_max_size, zip_size, mtr)) {
ut_ad(upper_rw_latch == RW_X_LATCH);
ut_ad(n_releases <= n_blocks);
@@ -3170,8 +3261,11 @@ btr_cur_ins_lock_and_undo(
roll_ptr = roll_ptr_t(1) << ROLL_PTR_INSERT_FLAG_POS;
if (!(flags & BTR_KEEP_SYS_FLAG)) {
upd_sys:
- row_upd_index_entry_sys_field(entry, index,
- DATA_ROLL_PTR, roll_ptr);
+ dfield_t* r = dtuple_get_nth_field(
+ entry, index->db_roll_ptr());
+ ut_ad(r->len == DATA_ROLL_PTR_LEN);
+ trx_write_roll_ptr(static_cast<byte*>(r->data),
+ roll_ptr);
}
} else {
err = trx_undo_report_row_operation(thr, index, entry,
@@ -3203,12 +3297,12 @@ btr_cur_prefetch_siblings(
if (left_page_no != FIL_NULL) {
buf_read_page_background(
page_id_t(block->page.id.space(), left_page_no),
- block->page.size, false);
+ block->zip_size(), false);
}
if (right_page_no != FIL_NULL) {
buf_read_page_background(
page_id_t(block->page.id.space(), right_page_no),
- block->page.size, false);
+ block->zip_size(), false);
}
if (left_page_no != FIL_NULL
|| right_page_no != FIL_NULL) {
@@ -3275,23 +3369,28 @@ btr_cur_optimistic_insert(
|| (flags & BTR_CREATE_FLAG));
ut_ad(dtuple_check_typed(entry));
- const page_size_t& page_size = block->page.size;
-
#ifdef UNIV_DEBUG_VALGRIND
- if (page_size.is_compressed()) {
- UNIV_MEM_ASSERT_RW(page, page_size.logical());
- UNIV_MEM_ASSERT_RW(block->page.zip.data, page_size.physical());
+ if (block->page.zip.data) {
+ UNIV_MEM_ASSERT_RW(page, srv_page_size);
+ UNIV_MEM_ASSERT_RW(block->page.zip.data,
+ block->zip_size());
}
#endif /* UNIV_DEBUG_VALGRIND */
leaf = page_is_leaf(page);
+ if (UNIV_UNLIKELY(entry->is_alter_metadata())) {
+ ut_ad(leaf);
+ goto convert_big_rec;
+ }
+
/* Calculate the record size when entry is converted to a record */
rec_size = rec_get_converted_size(index, entry, n_ext);
if (page_zip_rec_needs_ext(rec_size, page_is_comp(page),
- dtuple_get_n_fields(entry), page_size)) {
-
+ dtuple_get_n_fields(entry),
+ block->zip_size())) {
+convert_big_rec:
/* The record is so big that we have to store some fields
externally on separate database pages */
big_rec_vec = dtuple_convert_big_rec(index, 0, entry, &n_ext);
@@ -3304,7 +3403,7 @@ btr_cur_optimistic_insert(
rec_size = rec_get_converted_size(index, entry, n_ext);
}
- if (page_size.is_compressed() && page_zip_is_too_big(index, entry)) {
+ if (block->page.zip.data && page_zip_is_too_big(index, entry)) {
if (big_rec_vec != NULL) {
dtuple_convert_back_big_rec(index, entry, big_rec_vec);
}
@@ -3315,7 +3414,7 @@ btr_cur_optimistic_insert(
LIMIT_OPTIMISTIC_INSERT_DEBUG(page_get_n_recs(page),
goto fail);
- if (leaf && page_size.is_compressed()
+ if (block->page.zip.data && leaf
&& (page_get_data_size(page) + rec_size
>= dict_index_zip_pad_optimal_page_size(index))) {
/* If compression padding tells us that insertion will
@@ -3358,7 +3457,7 @@ fail_err:
we have to split the page to reserve enough free space for
future updates of records. */
- if (leaf && !page_size.is_compressed() && dict_index_is_clust(index)
+ if (leaf && !block->page.zip.data && dict_index_is_clust(index)
&& page_get_n_recs(page) >= 2
&& dict_index_get_space_reserve() + rec_size > max_size
&& (btr_page_get_split_rec_to_right(cursor, &dummy)
@@ -3421,7 +3520,7 @@ fail_err:
}
if (*rec) {
- } else if (page_size.is_compressed()) {
+ } else if (block->page.zip.data) {
ut_ad(!index->table->is_temporary());
/* Reset the IBUF_BITMAP_FREE bits, because
page_cur_tuple_insert() will have attempted page
@@ -3462,7 +3561,7 @@ fail_err:
} else if (index->disable_ahi) {
# endif
} else if (entry->info_bits & REC_INFO_MIN_REC_FLAG) {
- ut_ad(entry->info_bits == REC_INFO_METADATA);
+ ut_ad(entry->is_metadata());
ut_ad(index->is_instant());
ut_ad(flags == BTR_NO_LOCKING_FLAG);
} else {
@@ -3497,7 +3596,7 @@ fail_err:
committed mini-transaction, because in crash recovery,
the free bits could momentarily be set too high. */
- if (page_size.is_compressed()) {
+ if (block->page.zip.data) {
/* Update the bits in the same mini-transaction. */
ibuf_update_free_bits_zip(block, mtr);
} else {
@@ -3595,9 +3694,14 @@ btr_cur_pessimistic_insert(
}
if (page_zip_rec_needs_ext(rec_get_converted_size(index, entry, n_ext),
- dict_table_is_comp(index->table),
+ index->table->not_redundant(),
dtuple_get_n_fields(entry),
- dict_table_page_size(index->table))) {
+ btr_cur_get_block(cursor)->zip_size())
+ || UNIV_UNLIKELY(entry->is_alter_metadata()
+ && !dfield_is_ext(
+ dtuple_get_nth_field(
+ entry,
+ index->first_user_field())))) {
/* The record is so big that we have to store some fields
externally on separate database pages */
@@ -3670,10 +3774,10 @@ btr_cur_pessimistic_insert(
if (index->disable_ahi); else
# endif
if (entry->info_bits & REC_INFO_MIN_REC_FLAG) {
- ut_ad(entry->info_bits == REC_INFO_METADATA);
+ ut_ad(entry->is_metadata());
ut_ad(index->is_instant());
- ut_ad((flags & ulint(~BTR_KEEP_IBUF_BITMAP))
- == BTR_NO_LOCKING_FLAG);
+ ut_ad(flags & BTR_NO_LOCKING_FLAG);
+ ut_ad(!(flags & BTR_CREATE_FLAG));
} else {
btr_search_update_hash_on_insert(
cursor, btr_get_search_latch(index));
@@ -3755,6 +3859,50 @@ btr_cur_upd_lock_and_undo(
cmpl_info, rec, offsets, roll_ptr));
}
+/** Copy DB_TRX_ID,DB_ROLL_PTR to the redo log.
+@param[in] index clustered index
+@param[in] trx_id_t DB_TRX_ID
+@param[in] roll_ptr DB_ROLL_PTR
+@param[in,out] log_ptr redo log buffer
+@return current end of the redo log buffer */
+static byte*
+btr_cur_log_sys(
+ const dict_index_t* index,
+ trx_id_t trx_id,
+ roll_ptr_t roll_ptr,
+ byte* log_ptr)
+{
+ log_ptr += mach_write_compressed(log_ptr, index->db_trx_id());
+ /* Yes, we are writing DB_ROLL_PTR,DB_TRX_ID in reverse order,
+ after emitting the position of DB_TRX_ID in the index.
+ This is how row_upd_write_sys_vals_to_log()
+ originally worked, and it is part of the redo log format. */
+ trx_write_roll_ptr(log_ptr, roll_ptr);
+ log_ptr += DATA_ROLL_PTR_LEN;
+ log_ptr += mach_u64_write_compressed(log_ptr, trx_id);
+
+ return log_ptr;
+}
+
+/** Write DB_TRX_ID,DB_ROLL_PTR to a clustered index entry.
+@param[in,out] entry clustered index entry
+@param[in] index clustered index
+@param[in] trx_id DB_TRX_ID
+@param[in] roll_ptr DB_ROLL_PTR */
+static void btr_cur_write_sys(
+ dtuple_t* entry,
+ const dict_index_t* index,
+ trx_id_t trx_id,
+ roll_ptr_t roll_ptr)
+{
+ dfield_t* t = dtuple_get_nth_field(entry, index->db_trx_id());
+ ut_ad(t->len == DATA_TRX_ID_LEN);
+ trx_write_trx_id(static_cast<byte*>(t->data), trx_id);
+ dfield_t* r = dtuple_get_nth_field(entry, index->db_roll_ptr());
+ ut_ad(r->len == DATA_ROLL_PTR_LEN);
+ trx_write_roll_ptr(static_cast<byte*>(r->data), roll_ptr);
+}
+
/***********************************************************//**
Writes a redo log record of updating a record in-place. */
void
@@ -3794,8 +3942,7 @@ btr_cur_update_in_place_log(
log_ptr++;
if (dict_index_is_clust(index)) {
- log_ptr = row_upd_write_sys_vals_to_log(
- index, trx_id, roll_ptr, log_ptr, mtr);
+ log_ptr = btr_cur_log_sys(index, trx_id, roll_ptr, log_ptr);
} else {
/* Dummy system fields for a secondary index */
/* TRX_ID Position */
@@ -4148,6 +4295,72 @@ func_exit:
return(err);
}
+/** Trim a metadata record during the rollback of instant ALTER TABLE.
+@param[in] entry metadata tuple
+@param[in] index primary key
+@param[in] update update vector for the rollback */
+ATTRIBUTE_COLD
+static void btr_cur_trim_alter_metadata(dtuple_t* entry,
+ const dict_index_t* index,
+ const upd_t* update)
+{
+ ut_ad(index->is_instant());
+ ut_ad(update->is_alter_metadata());
+ ut_ad(entry->is_alter_metadata());
+
+ ut_ad(update->fields[0].field_no == index->first_user_field());
+ ut_ad(update->fields[0].new_val.ext);
+ ut_ad(update->fields[0].new_val.len == FIELD_REF_SIZE);
+ ut_ad(entry->n_fields - 1 == index->n_fields);
+
+ const byte* ptr = static_cast<const byte*>(
+ update->fields[0].new_val.data);
+ ut_ad(!mach_read_from_4(ptr + BTR_EXTERN_LEN));
+ ut_ad(mach_read_from_4(ptr + BTR_EXTERN_LEN + 4) > 4);
+ ut_ad(mach_read_from_4(ptr + BTR_EXTERN_OFFSET) == FIL_PAGE_DATA);
+ ut_ad(mach_read_from_4(ptr + BTR_EXTERN_SPACE_ID)
+ == index->table->space->id);
+
+ ulint n_fields = update->fields[1].field_no;
+ ut_ad(n_fields <= index->n_fields);
+ if (n_fields != index->n_uniq) {
+ ut_ad(n_fields
+ >= index->n_core_fields);
+ entry->n_fields = n_fields;
+ return;
+ }
+
+ /* This is based on dict_table_t::deserialise_columns()
+ and btr_cur_instant_init_low(). */
+ mtr_t mtr;
+ mtr.start();
+ buf_block_t* block = buf_page_get(
+ page_id_t(index->table->space->id,
+ mach_read_from_4(ptr + BTR_EXTERN_PAGE_NO)),
+ 0, RW_S_LATCH, &mtr);
+ buf_block_dbg_add_level(block, SYNC_EXTERN_STORAGE);
+ ut_ad(fil_page_get_type(block->frame) == FIL_PAGE_TYPE_BLOB);
+ ut_ad(mach_read_from_4(&block->frame[FIL_PAGE_DATA
+ + BTR_BLOB_HDR_NEXT_PAGE_NO])
+ == FIL_NULL);
+ ut_ad(mach_read_from_4(&block->frame[FIL_PAGE_DATA
+ + BTR_BLOB_HDR_PART_LEN])
+ == mach_read_from_4(ptr + BTR_EXTERN_LEN + 4));
+ n_fields = mach_read_from_4(
+ &block->frame[FIL_PAGE_DATA + BTR_BLOB_HDR_SIZE])
+ + index->first_user_field();
+ /* Rollback should not increase the number of fields. */
+ ut_ad(n_fields <= index->n_fields);
+ ut_ad(n_fields + 1 <= entry->n_fields);
+ /* dict_index_t::clear_instant_alter() cannot be invoked while
+ rollback of an instant ALTER TABLE transaction is in progress
+ for an is_alter_metadata() record. */
+ ut_ad(n_fields >= index->n_core_fields);
+
+ mtr.commit();
+ entry->n_fields = n_fields + 1;
+}
+
/** Trim an update tuple due to instant ADD COLUMN, if needed.
For normal records, the trailing instantly added fields that match
the initial default values are omitted.
@@ -4169,13 +4382,12 @@ btr_cur_trim(
const que_thr_t* thr)
{
if (!index->is_instant()) {
- } else if (UNIV_UNLIKELY(update->info_bits == REC_INFO_METADATA)) {
+ } else if (UNIV_UNLIKELY(update->is_metadata())) {
/* We are either updating a metadata record
- (instantly adding columns to a table where instant ADD was
+ (instant ALTER TABLE on a table where instant ALTER was
already executed) or rolling back such an operation. */
ut_ad(!upd_get_nth_field(update, 0)->orig_len);
- ut_ad(upd_get_nth_field(update, 0)->field_no
- > index->n_core_fields);
+ ut_ad(entry->is_metadata());
if (thr->graph->trx->in_rollback) {
/* This rollback can occur either as part of
@@ -4192,6 +4404,13 @@ btr_cur_trim(
first instantly added column logged by
innobase_add_instant_try(). */
ut_ad(update->n_fields > 2);
+ if (update->is_alter_metadata()) {
+ btr_cur_trim_alter_metadata(
+ entry, index, update);
+ return;
+ }
+ ut_ad(!entry->is_alter_metadata());
+
ulint n_fields = upd_get_nth_field(update, 0)
->field_no;
ut_ad(n_fields + 1 >= entry->n_fields);
@@ -4277,9 +4496,7 @@ btr_cur_optimistic_update(
|| trx_is_recv(thr_get_trx(thr)));
#endif /* UNIV_DEBUG || UNIV_BLOB_LIGHT_DEBUG */
- const bool is_metadata = update->info_bits == REC_INFO_METADATA;
-
- if (UNIV_LIKELY(!is_metadata)
+ if (UNIV_LIKELY(!update->is_metadata())
&& !row_upd_changes_field_size_or_external(index, *offsets,
update)) {
@@ -4305,6 +4522,10 @@ any_extern:
return(DB_OVERFLOW);
}
+ if (rec_is_metadata(rec, *index) && index->table->instant) {
+ goto any_extern;
+ }
+
for (i = 0; i < upd_get_n_fields(update); i++) {
if (dfield_is_ext(&upd_get_nth_field(update, i)->new_val)) {
@@ -4349,7 +4570,7 @@ any_extern:
if (page_zip_rec_needs_ext(new_rec_size, page_is_comp(page),
dict_index_get_n_fields(index),
- dict_table_page_size(index->table))) {
+ block->zip_size())) {
goto any_extern;
}
@@ -4363,10 +4584,10 @@ any_extern:
}
/* We limit max record size to 16k even for 64k page size. */
- if (new_rec_size >= COMPRESSED_REC_MAX_DATA_SIZE ||
- (!dict_table_is_comp(index->table)
- && new_rec_size >= REDUNDANT_REC_MAX_DATA_SIZE)) {
- err = DB_OVERFLOW;
+ if (new_rec_size >= COMPRESSED_REC_MAX_DATA_SIZE ||
+ (!dict_table_is_comp(index->table)
+ && new_rec_size >= REDUNDANT_REC_MAX_DATA_SIZE)) {
+ err = DB_OVERFLOW;
goto func_exit;
}
@@ -4439,8 +4660,8 @@ any_extern:
lock_rec_store_on_page_infimum(block, rec);
}
- if (UNIV_UNLIKELY(is_metadata)) {
- ut_ad(new_entry->info_bits == REC_INFO_METADATA);
+ if (UNIV_UNLIKELY(update->is_metadata())) {
+ ut_ad(new_entry->is_metadata());
ut_ad(index->is_instant());
/* This can be innobase_add_instant_try() performing a
subsequent instant ADD COLUMN, or its rollback by
@@ -4455,10 +4676,7 @@ any_extern:
page_cur_move_to_prev(page_cursor);
if (!(flags & BTR_KEEP_SYS_FLAG)) {
- row_upd_index_entry_sys_field(new_entry, index, DATA_ROLL_PTR,
- roll_ptr);
- row_upd_index_entry_sys_field(new_entry, index, DATA_TRX_ID,
- trx_id);
+ btr_cur_write_sys(new_entry, index, trx_id, roll_ptr);
}
/* There are no externally stored columns in new_entry */
@@ -4466,7 +4684,7 @@ any_extern:
cursor, new_entry, offsets, heap, 0/*n_ext*/, mtr);
ut_a(rec); /* <- We calculated above the insert would fit */
- if (UNIV_UNLIKELY(is_metadata)) {
+ if (UNIV_UNLIKELY(update->is_metadata())) {
/* We must empty the PAGE_FREE list, because if this
was a rollback, the shortened metadata record
would have too many fields, and we would be unable to
@@ -4531,7 +4749,8 @@ btr_cur_pess_upd_restore_supremum(
const page_id_t page_id(block->page.id.space(), prev_page_no);
ut_ad(prev_page_no != FIL_NULL);
- prev_block = buf_page_get_with_no_latch(page_id, block->page.size, mtr);
+ prev_block = buf_page_get_with_no_latch(page_id, block->zip_size(),
+ mtr);
#ifdef UNIV_BTR_DEBUG
ut_a(btr_page_get_next(prev_block->frame, mtr)
== page_get_page_no(page));
@@ -4657,8 +4876,25 @@ btr_cur_pessimistic_update(
rec = btr_cur_get_rec(cursor);
ut_ad(rec_offs_validate(rec, index, *offsets));
- dtuple_t* new_entry = row_rec_to_index_entry(
- rec, index, *offsets, &n_ext, entry_heap);
+ dtuple_t* new_entry;
+
+ const bool is_metadata = rec_is_metadata(rec, *index);
+
+ if (UNIV_UNLIKELY(is_metadata)) {
+ ut_ad(update->is_metadata());
+ ut_ad(flags & BTR_NO_LOCKING_FLAG);
+ ut_ad(index->is_instant());
+ new_entry = row_metadata_to_tuple(
+ rec, index, *offsets,
+ &n_ext, entry_heap,
+ update->info_bits, !thr_get_trx(thr)->in_rollback);
+ ut_ad(new_entry->n_fields
+ == ulint(index->n_fields)
+ + update->is_alter_metadata());
+ } else {
+ new_entry = row_rec_to_index_entry(rec, index, *offsets,
+ &n_ext, entry_heap);
+ }
/* The page containing the clustered index record
corresponding to new_entry is latched in mtr. If the
@@ -4670,9 +4906,6 @@ btr_cur_pessimistic_update(
entry_heap);
btr_cur_trim(new_entry, index, update, thr);
- const bool is_metadata = new_entry->info_bits
- & REC_INFO_MIN_REC_FLAG;
-
/* We have to set appropriate extern storage bits in the new
record to be inserted: we have to remember which fields were such */
@@ -4700,11 +4933,14 @@ btr_cur_pessimistic_update(
}
if (page_zip_rec_needs_ext(
- rec_get_converted_size(index, new_entry, n_ext),
- page_is_comp(page),
- dict_index_get_n_fields(index),
- block->page.size)) {
-
+ rec_get_converted_size(index, new_entry, n_ext),
+ page_is_comp(page),
+ dict_index_get_n_fields(index),
+ block->zip_size())
+ || (UNIV_UNLIKELY(update->is_alter_metadata())
+ && !dfield_is_ext(dtuple_get_nth_field(
+ new_entry,
+ index->first_user_field())))) {
big_rec_vec = dtuple_convert_big_rec(index, update, new_entry, &n_ext);
if (UNIV_UNLIKELY(big_rec_vec == NULL)) {
@@ -4753,10 +4989,7 @@ btr_cur_pessimistic_update(
}
if (!(flags & BTR_KEEP_SYS_FLAG)) {
- row_upd_index_entry_sys_field(new_entry, index, DATA_ROLL_PTR,
- roll_ptr);
- row_upd_index_entry_sys_field(new_entry, index, DATA_TRX_ID,
- trx_id);
+ btr_cur_write_sys(new_entry, index, trx_id, roll_ptr);
}
if (!page_zip) {
@@ -4765,10 +4998,10 @@ btr_cur_pessimistic_update(
}
if (UNIV_UNLIKELY(is_metadata)) {
- ut_ad(new_entry->info_bits == REC_INFO_METADATA);
+ ut_ad(new_entry->is_metadata());
ut_ad(index->is_instant());
/* This can be innobase_add_instant_try() performing a
- subsequent instant ADD COLUMN, or its rollback by
+ subsequent instant ALTER TABLE, or its rollback by
row_undo_mod_clust_low(). */
ut_ad(flags & BTR_NO_LOCKING_FLAG);
} else {
@@ -4817,7 +5050,8 @@ btr_cur_pessimistic_update(
btr_cur_get_block(cursor), rec, block);
}
- if (!rec_get_deleted_flag(rec, rec_offs_comp(*offsets))) {
+ if (!rec_get_deleted_flag(rec, rec_offs_comp(*offsets))
+ || rec_is_alter_metadata(rec, *index)) {
/* The new inserted record owns its possible externally
stored fields */
btr_cur_unmark_extern_fields(
@@ -5027,8 +5261,7 @@ btr_cur_del_mark_set_clust_rec_log(
*log_ptr++ = 0;
*log_ptr++ = 1;
- log_ptr = row_upd_write_sys_vals_to_log(
- index, trx_id, roll_ptr, log_ptr, mtr);
+ log_ptr = btr_cur_log_sys(index, trx_id, roll_ptr, log_ptr);
mach_write_to_2(log_ptr, page_offset(rec));
log_ptr += 2;
@@ -5460,42 +5693,41 @@ btr_cur_optimistic_delete_func(
if (UNIV_UNLIKELY(block->page.id.page_no() == cursor->index->page
&& page_get_n_recs(block->frame) == 1
+ (cursor->index->is_instant()
- && !rec_is_metadata(rec, cursor->index)))) {
+ && !rec_is_metadata(rec, *cursor->index)))) {
/* The whole index (and table) becomes logically empty.
Empty the whole page. That is, if we are deleting the
only user record, also delete the metadata record
- if one exists (it exists if and only if is_instant()).
+ if one exists for instant ADD COLUMN (not generic ALTER TABLE).
If we are deleting the metadata record and the
table becomes empty, clean up the whole page. */
dict_index_t* index = cursor->index;
+ const rec_t* first_rec = page_rec_get_next_const(
+ page_get_infimum_rec(block->frame));
ut_ad(!index->is_instant()
- || rec_is_metadata(
- page_rec_get_next_const(
- page_get_infimum_rec(block->frame)),
- index));
- if (UNIV_UNLIKELY(rec_get_info_bits(rec, page_rec_is_comp(rec))
- & REC_INFO_MIN_REC_FLAG)) {
- /* This should be rolling back instant ADD COLUMN.
- If this is a recovered transaction, then
- index->is_instant() will hold until the
- insert into SYS_COLUMNS is rolled back. */
- ut_ad(index->table->supports_instant());
- ut_ad(index->is_primary());
- } else {
- lock_update_delete(block, rec);
- }
- btr_page_empty(block, buf_block_get_page_zip(block),
- index, 0, mtr);
- page_cur_set_after_last(block, btr_cur_get_page_cur(cursor));
-
- if (index->is_primary()) {
- /* Concurrent access is prevented by
- root_block->lock X-latch, so this should be
- safe. */
- index->remove_instant();
+ || rec_is_metadata(first_rec, *index));
+ const bool is_metadata = rec_is_metadata(rec, *index);
+ /* We can remove the metadata when rolling back an
+ instant ALTER TABLE operation, or when deleting the
+ last user record on the page such that only metadata for
+ instant ADD COLUMN (not generic ALTER TABLE) remains. */
+ const bool empty_table = is_metadata
+ || !index->is_instant()
+ || (first_rec != rec
+ && rec_is_add_metadata(first_rec, *index));
+ if (UNIV_LIKELY(empty_table)) {
+ if (UNIV_LIKELY(!is_metadata)) {
+ lock_update_delete(block, rec);
+ }
+ btr_page_empty(block, buf_block_get_page_zip(block),
+ index, 0, mtr);
+ if (index->is_instant()) {
+ /* MDEV-17383: free metadata BLOBs! */
+ index->clear_instant_alter();
+ }
+ page_cur_set_after_last(block,
+ btr_cur_get_page_cur(cursor));
+ return true;
}
-
- return true;
}
offsets = rec_get_offsets(rec, cursor->index, offsets, true,
@@ -5677,10 +5909,10 @@ btr_cur_pessimistic_delete(
}
if (page_is_leaf(page)) {
- const bool is_metadata = rec_get_info_bits(
- rec, page_rec_is_comp(rec)) & REC_INFO_MIN_REC_FLAG;
+ const bool is_metadata = rec_is_metadata(
+ rec, page_rec_is_comp(rec));
if (UNIV_UNLIKELY(is_metadata)) {
- /* This should be rolling back instant ADD COLUMN.
+ /* This should be rolling back instant ALTER TABLE.
If this is a recovered transaction, then
index->is_instant() will hold until the
insert into SYS_COLUMNS is rolled back. */
@@ -5696,30 +5928,34 @@ btr_cur_pessimistic_delete(
goto discard_page;
}
} else if (page_get_n_recs(page) == 1
- + (index->is_instant()
- && !rec_is_metadata(rec, index))) {
+ + (index->is_instant() && !is_metadata)) {
/* The whole index (and table) becomes logically empty.
Empty the whole page. That is, if we are deleting the
only user record, also delete the metadata record
- if one exists (it exists if and only if is_instant()).
- If we are deleting the metadata record and the
+ if one exists for instant ADD COLUMN
+ (not generic ALTER TABLE).
+ If we are deleting the metadata record
+ (in the rollback of instant ALTER TABLE) and the
table becomes empty, clean up the whole page. */
+
+ const rec_t* first_rec = page_rec_get_next_const(
+ page_get_infimum_rec(page));
ut_ad(!index->is_instant()
- || rec_is_metadata(
- page_rec_get_next_const(
- page_get_infimum_rec(page)),
- index));
- btr_page_empty(block, page_zip, index, 0, mtr);
- page_cur_set_after_last(block,
- btr_cur_get_page_cur(cursor));
- if (index->is_primary()) {
- /* Concurrent access is prevented by
- index->lock and root_block->lock
- X-latch, so this should be safe. */
- index->remove_instant();
+ || rec_is_metadata(first_rec, *index));
+ if (is_metadata || !index->is_instant()
+ || (first_rec != rec
+ && rec_is_add_metadata(first_rec, *index))) {
+ btr_page_empty(block, page_zip, index, 0, mtr);
+ if (index->is_instant()) {
+ /* MDEV-17383: free metadata BLOBs! */
+ index->clear_instant_alter();
+ }
+ page_cur_set_after_last(
+ block,
+ btr_cur_get_page_cur(cursor));
+ ret = TRUE;
+ goto return_after_reservations;
}
- ret = TRUE;
- goto return_after_reservations;
}
if (UNIV_LIKELY(!is_metadata)) {
@@ -5827,7 +6063,7 @@ discard_page:
|| btr_cur_will_modify_tree(
index, page, BTR_INTENTION_DELETE, rec,
btr_node_ptr_max_size(index),
- block->page.size, mtr);
+ block->zip_size(), mtr);
page_cur_delete_rec(btr_cur_get_page_cur(cursor), index,
offsets, mtr);
#ifdef UNIV_ZIP_DEBUG
@@ -5994,7 +6230,7 @@ btr_estimate_n_rows_in_range_on_level(
const fil_space_t* space = index->table->space;
page_id_t page_id(space->id, slot1->page_no);
- const page_size_t page_size(space->flags);
+ const ulint zip_size = space->zip_size();
level = slot1->page_level;
@@ -6011,7 +6247,7 @@ btr_estimate_n_rows_in_range_on_level(
attempting to read a page that is no longer part of
the B-tree. We pass BUF_GET_POSSIBLY_FREED in order to
silence a debug assertion about this. */
- block = buf_page_get_gen(page_id, page_size, RW_S_LATCH,
+ block = buf_page_get_gen(page_id, zip_size, RW_S_LATCH,
NULL, BUF_GET_POSSIBLY_FREED,
__FILE__, __LINE__, &mtr, &err);
@@ -7232,7 +7468,7 @@ struct btr_blob_log_check_t {
mtr_x_lock(dict_index_get_lock(index), m_mtr);
m_pcur->btr_cur.page_cur.block = btr_block_get(
page_id_t(index->table->space_id, page_no),
- page_size_t(index->table->space->flags),
+ index->table->space->zip_size(),
RW_X_LATCH, index, m_mtr);
m_pcur->btr_cur.page_cur.rec
= m_pcur->btr_cur.page_cur.block->frame
@@ -7320,9 +7556,6 @@ btr_store_big_rec_extern_fields(
ut_ad(buf_block_get_frame(rec_block) == page_align(rec));
ut_a(dict_index_is_clust(index));
- ut_a(dict_table_page_size(index->table)
- .equals_to(rec_block->page.size));
-
btr_blob_log_check_t redo_log(pcur, btr_mtr, offsets, &rec_block,
&rec, op);
page_zip = buf_block_get_page_zip(rec_block);
@@ -7366,15 +7599,13 @@ btr_store_big_rec_extern_fields(
}
#endif /* UNIV_DEBUG || UNIV_BLOB_LIGHT_DEBUG */
- const page_size_t page_size(dict_table_page_size(index->table));
-
/* Space available in compressed page to carry blob data */
- const ulint payload_size_zip = page_size.physical()
+ const ulint payload_size_zip = rec_block->physical_size()
- FIL_PAGE_DATA;
/* Space available in uncompressed page to carry blob data */
- const ulint payload_size = page_size.physical()
- - FIL_PAGE_DATA - BTR_BLOB_HDR_SIZE - FIL_PAGE_DATA_END;
+ const ulint payload_size = payload_size_zip
+ - (BTR_BLOB_HDR_SIZE + FIL_PAGE_DATA_END);
/* We have to create a file segment to the tablespace
for each field and put the pointer to the field in rec */
@@ -7430,7 +7661,7 @@ btr_store_big_rec_extern_fields(
mtr.set_flush_observer(btr_mtr->get_flush_observer());
buf_page_get(rec_block->page.id,
- rec_block->page.size, RW_X_LATCH, &mtr);
+ rec_block->zip_size(), RW_X_LATCH, &mtr);
if (prev_page_no == FIL_NULL) {
hint_page_no = 1 + rec_page_no;
@@ -7478,7 +7709,7 @@ btr_store_big_rec_extern_fields(
prev_block = buf_page_get(
page_id_t(space_id, prev_page_no),
- rec_block->page.size,
+ rec_block->zip_size(),
RW_X_LATCH, &mtr);
buf_block_dbg_add_level(prev_block,
@@ -7567,16 +7798,20 @@ btr_store_big_rec_extern_fields(
+ FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION + 4,
rec_page_no,
MLOG_4BYTES, &mtr);
-
- /* Zero out the unused part of the page. */
- memset(page + page_zip_get_size(page_zip)
- - c_stream.avail_out,
- 0, c_stream.avail_out);
mlog_log_string(page
+ FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION,
page_zip_get_size(page_zip)
- - FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION,
+ - FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION
+ - c_stream.avail_out,
&mtr);
+ /* Zero out the unused part of the page. */
+ if (c_stream.avail_out) {
+ mlog_memset(block,
+ page_zip_get_size(page_zip)
+ - c_stream.avail_out,
+ c_stream.avail_out,
+ 0, &mtr);
+ }
/* Copy the page to compressed storage,
because it will be flushed to disk
from there. */
@@ -7837,10 +8072,9 @@ btr_free_externally_stored_field(
ut_ad(space_id == index->table->space->id);
ut_ad(space_id == index->table->space_id);
- const page_size_t ext_page_size(dict_table_page_size(index->table));
- const page_size_t& rec_page_size(rec == NULL
- ? univ_page_size
- : ext_page_size);
+ const ulint ext_zip_size = index->table->space->zip_size();
+ const ulint rec_zip_size = rec ? ext_zip_size : 0;
+
if (rec == NULL) {
/* This is a call from row_purge_upd_exist_or_extern(). */
ut_ad(!page_zip);
@@ -7867,7 +8101,7 @@ btr_free_externally_stored_field(
#ifdef UNIV_DEBUG
rec_block =
#endif /* UNIV_DEBUG */
- buf_page_get(page_id, rec_page_size, RW_X_LATCH, &mtr);
+ buf_page_get(page_id, rec_zip_size, RW_X_LATCH, &mtr);
buf_block_dbg_add_level(rec_block, SYNC_NO_ORDER_CHECK);
page_no = mach_read_from_4(field_ref + BTR_EXTERN_PAGE_NO);
@@ -7893,13 +8127,13 @@ btr_free_externally_stored_field(
}
ext_block = buf_page_get(
- page_id_t(space_id, page_no), ext_page_size,
+ page_id_t(space_id, page_no), ext_zip_size,
RW_X_LATCH, &mtr);
buf_block_dbg_add_level(ext_block, SYNC_EXTERN_STORAGE);
page = buf_block_get_frame(ext_block);
- if (ext_page_size.is_compressed()) {
+ if (ext_zip_size) {
/* Note that page_zip will be NULL
in row_purge_upd_exist_or_extern(). */
switch (fil_page_get_type(page)) {
@@ -8068,7 +8302,7 @@ btr_copy_blob_prefix(
mtr_start(&mtr);
block = buf_page_get(page_id_t(space_id, page_no),
- univ_page_size, RW_S_LATCH, &mtr);
+ 0, RW_S_LATCH, &mtr);
buf_block_dbg_add_level(block, SYNC_EXTERN_STORAGE);
page = buf_block_get_frame(block);
@@ -8106,7 +8340,7 @@ by a lock or a page latch.
@param[out] buf the externally stored part of the field,
or a prefix of it
@param[in] len length of buf, in bytes
-@param[in] page_size compressed BLOB page size
+@param[in] zip_size ROW_FORMAT=COMPRESSED page size
@param[in] space_id space id of the BLOB pages
@param[in] offset offset on the first BLOB page
@return number of bytes written to buf */
@@ -8115,7 +8349,7 @@ ulint
btr_copy_zblob_prefix(
byte* buf,
ulint len,
- const page_size_t& page_size,
+ ulint zip_size,
ulint space_id,
ulint page_no,
ulint offset)
@@ -8135,7 +8369,8 @@ btr_copy_zblob_prefix(
heap = mem_heap_create(40000);
page_zip_set_alloc(&d_stream, heap);
- ut_ad(page_size.is_compressed());
+ ut_ad(zip_size);
+ ut_ad(ut_is_2pow(zip_size));
ut_ad(space_id);
err = inflateInit(&d_stream);
@@ -8150,7 +8385,7 @@ btr_copy_zblob_prefix(
is being held on the clustered index record, or,
in row_merge_copy_blobs(), by an exclusive table lock. */
bpage = buf_page_get_zip(page_id_t(space_id, page_no),
- page_size);
+ zip_size);
if (UNIV_UNLIKELY(!bpage)) {
ib::error() << "Cannot load compressed BLOB "
@@ -8182,8 +8417,7 @@ btr_copy_zblob_prefix(
}
d_stream.next_in = bpage->zip.data + offset;
- d_stream.avail_in = static_cast<uInt>(page_size.physical()
- - offset);
+ d_stream.avail_in = uInt(zip_size - offset);
err = inflate(&d_stream, Z_NO_FLUSH);
switch (err) {
@@ -8253,7 +8487,7 @@ by a lock or a page latch.
@param[out] buf the externally stored part of the
field, or a prefix of it
@param[in] len length of buf, in bytes
-@param[in] page_size BLOB page size
+@param[in] zip_size ROW_FORMAT=COMPRESSED page size, or 0
@param[in] space_id space id of the first BLOB page
@param[in] page_no page number of the first BLOB page
@param[in] offset offset on the first BLOB page
@@ -8263,7 +8497,7 @@ ulint
btr_copy_externally_stored_field_prefix_low(
byte* buf,
ulint len,
- const page_size_t& page_size,
+ ulint zip_size,
ulint space_id,
ulint page_no,
ulint offset)
@@ -8272,11 +8506,10 @@ btr_copy_externally_stored_field_prefix_low(
return(0);
}
- if (page_size.is_compressed()) {
- return(btr_copy_zblob_prefix(buf, len, page_size,
+ if (zip_size) {
+ return(btr_copy_zblob_prefix(buf, len, zip_size,
space_id, page_no, offset));
} else {
- ut_ad(page_size.equals_to(univ_page_size));
return(btr_copy_blob_prefix(buf, len, space_id,
page_no, offset));
}
@@ -8286,7 +8519,7 @@ btr_copy_externally_stored_field_prefix_low(
The clustered index record must be protected by a lock or a page latch.
@param[out] buf the field, or a prefix of it
@param[in] len length of buf, in bytes
-@param[in] page_size BLOB page size
+@param[in] zip_size ROW_FORMAT=COMPRESSED page size, or 0
@param[in] data 'internally' stored part of the field
containing also the reference to the external part; must be protected by
a lock or a page latch
@@ -8297,7 +8530,7 @@ ulint
btr_copy_externally_stored_field_prefix(
byte* buf,
ulint len,
- const page_size_t& page_size,
+ ulint zip_size,
const byte* data,
ulint local_len)
{
@@ -8336,7 +8569,7 @@ btr_copy_externally_stored_field_prefix(
return(local_len
+ btr_copy_externally_stored_field_prefix_low(buf + local_len,
len - local_len,
- page_size,
+ zip_size,
space_id, page_no,
offset));
}
@@ -8347,7 +8580,7 @@ The clustered index record must be protected by a lock or a page latch.
@param[in] data 'internally' stored part of the field
containing also the reference to the external part; must be protected by
a lock or a page latch
-@param[in] page_size BLOB page size
+@param[in] zip_size ROW_FORMAT=COMPRESSED page size, or 0
@param[in] local_len length of data
@param[in,out] heap mem heap
@return the whole field copied to heap */
@@ -8355,7 +8588,7 @@ byte*
btr_copy_externally_stored_field(
ulint* len,
const byte* data,
- const page_size_t& page_size,
+ ulint zip_size,
ulint local_len,
mem_heap_t* heap)
{
@@ -8386,7 +8619,7 @@ btr_copy_externally_stored_field(
*len = local_len
+ btr_copy_externally_stored_field_prefix_low(buf + local_len,
extern_len,
- page_size,
+ zip_size,
space_id,
page_no, offset);
@@ -8397,7 +8630,7 @@ btr_copy_externally_stored_field(
@param[in] rec record in a clustered index; must be
protected by a lock or a page latch
@param[in] offset array returned by rec_get_offsets()
-@param[in] page_size BLOB page size
+@param[in] zip_size ROW_FORMAT=COMPRESSED page size, or 0
@param[in] no field number
@param[out] len length of the field
@param[in,out] heap mem heap
@@ -8406,7 +8639,7 @@ byte*
btr_rec_copy_externally_stored_field(
const rec_t* rec,
const ulint* offsets,
- const page_size_t& page_size,
+ ulint zip_size,
ulint no,
ulint* len,
mem_heap_t* heap)
@@ -8440,5 +8673,5 @@ btr_rec_copy_externally_stored_field(
}
return(btr_copy_externally_stored_field(len, data,
- page_size, local_len, heap));
+ zip_size, local_len, heap));
}