diff options
Diffstat (limited to 'storage/innobase/btr')
-rw-r--r-- | storage/innobase/btr/btr0cur.cc | 202 |
1 files changed, 66 insertions, 136 deletions
diff --git a/storage/innobase/btr/btr0cur.cc b/storage/innobase/btr/btr0cur.cc index 47c0465f583..3a52011cf03 100644 --- a/storage/innobase/btr/btr0cur.cc +++ b/storage/innobase/btr/btr0cur.cc @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1994, 2018, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 1994, 2019, Oracle and/or its affiliates. All Rights Reserved. Copyright (c) 2008, Google Inc. Copyright (c) 2012, Facebook Inc. Copyright (c) 2015, 2020, MariaDB Corporation. @@ -931,46 +931,66 @@ btr_cur_will_modify_tree( const ulint n_recs = page_get_n_recs(page); if (lock_intention <= BTR_INTENTION_BOTH) { - ulint margin; + compile_time_assert(BTR_INTENTION_DELETE < BTR_INTENTION_BOTH); + compile_time_assert(BTR_INTENTION_BOTH < BTR_INTENTION_INSERT); - /* check delete will cause. (BTR_INTENTION_BOTH - or BTR_INTENTION_DELETE) */ - /* first, 2nd, 2nd-last and last records are 4 records */ - if (n_recs < 5) { - return(true); + if (!page_has_siblings(page)) { + return true; } - /* is first, 2nd or last record */ - if (page_rec_is_first(rec, page) - || (page_has_next(page) - && (page_rec_is_last(rec, page) - || page_rec_is_second_last(rec, page))) - || (page_has_prev(page) - && page_rec_is_second(rec, page))) { - return(true); - } + ulint margin = rec_size; if (lock_intention == BTR_INTENTION_BOTH) { + ulint level = btr_page_get_level(page); + + /* This value is the worst expectation for the node_ptr + records to be deleted from this page. It is used to + expect whether the cursor position can be the left_most + record in this page or not. */ + ulint max_nodes_deleted = 0; + + /* By modifying tree operations from the under of this + level, logically (2 ^ (level - 1)) opportunities to + deleting records in maximum even unreally rare case. */ + if (level > 7) { + /* TODO: adjust this practical limit. */ + max_nodes_deleted = 64; + } else if (level > 0) { + max_nodes_deleted = (ulint)1 << (level - 1); + } + /* check delete will cause. (BTR_INTENTION_BOTH + or BTR_INTENTION_DELETE) */ + if (n_recs <= max_nodes_deleted * 2 + || page_rec_is_first(rec, page)) { + /* The cursor record can be the left most record + in this page. */ + return true; + } + + if (page_has_prev(page) + && page_rec_distance_is_at_most( + page_get_infimum_rec(page), rec, + max_nodes_deleted)) { + return true; + } + + if (page_has_next(page) + && page_rec_distance_is_at_most( + rec, page_get_supremum_rec(page), + max_nodes_deleted)) { + return true; + } + /* Delete at leftmost record in a page causes delete & insert at its parent page. After that, the delete might cause btr_compress() and delete record at its - parent page. Thus we should consider max 2 deletes. */ - - margin = rec_size * 2; - } else { - ut_ad(lock_intention == BTR_INTENTION_DELETE); - - margin = rec_size; + parent page. Thus we should consider max deletes. */ + margin *= max_nodes_deleted; } - /* NOTE: call mach_read_from_4() directly to avoid assertion - failure. It is safe because we already have SX latch of the - index tree */ + + /* Safe because we already have SX latch of the index tree */ if (page_get_data_size(page) - < margin + BTR_CUR_PAGE_COMPRESS_LIMIT(index) - || (mach_read_from_4(page + FIL_PAGE_NEXT) - == FIL_NULL - && mach_read_from_4(page + FIL_PAGE_PREV) - == FIL_NULL)) { + < margin + BTR_CUR_PAGE_COMPRESS_LIMIT(index)) { return(true); } } @@ -2152,9 +2172,9 @@ need_opposite_intention: offsets2 = rec_get_offsets( first_rec, index, offsets2, false, ULINT_UNDEFINED, &heap); - cmp_rec_rec_with_match(node_ptr, first_rec, - offsets, offsets2, index, FALSE, - &matched_fields); + cmp_rec_rec(node_ptr, first_rec, + offsets, offsets2, index, false, + &matched_fields); if (matched_fields >= rec_offs_n_fields(offsets) - 1) { @@ -2170,10 +2190,10 @@ need_opposite_intention: offsets2 = rec_get_offsets( last_rec, index, offsets2, false, ULINT_UNDEFINED, &heap); - cmp_rec_rec_with_match( + cmp_rec_rec( node_ptr, last_rec, offsets, offsets2, index, - FALSE, &matched_fields); + false, &matched_fields); if (matched_fields >= rec_offs_n_fields(offsets) - 1) { detected_same_key_root = true; @@ -4463,7 +4483,6 @@ btr_cur_optimistic_update( dtuple_t* new_entry; roll_ptr_t roll_ptr; ulint i; - ulint n_ext; block = btr_cur_get_block(cursor); page = buf_block_get_frame(block); @@ -4542,10 +4561,8 @@ any_extern: + DTUPLE_EST_ALLOC(rec_offs_n_fields(*offsets))); } - new_entry = row_rec_to_index_entry(rec, index, *offsets, - &n_ext, *heap); - /* We checked above that there are no externally stored fields. */ - ut_a(!n_ext); + new_entry = row_rec_to_index_entry(rec, index, *offsets, *heap); + ut_ad(!dtuple_get_n_ext(new_entry)); /* The page containing the clustered index record corresponding to new_entry is latched in mtr. @@ -4806,7 +4823,6 @@ btr_cur_pessimistic_update( roll_ptr_t roll_ptr; ibool was_first; ulint n_reserved = 0; - ulint n_ext; *offsets = NULL; *big_rec = NULL; @@ -4877,15 +4893,14 @@ btr_cur_pessimistic_update( ut_ad(flags & BTR_NO_LOCKING_FLAG); ut_ad(index->is_instant()); new_entry = row_metadata_to_tuple( - rec, index, *offsets, - &n_ext, entry_heap, + rec, index, *offsets, entry_heap, update->info_bits, !thr_get_trx(thr)->in_rollback); ut_ad(new_entry->n_fields == ulint(index->n_fields) + update->is_alter_metadata()); } else { new_entry = row_rec_to_index_entry(rec, index, *offsets, - &n_ext, entry_heap); + entry_heap); } /* The page containing the clustered index record @@ -4896,7 +4911,6 @@ btr_cur_pessimistic_update( itself. Thus the following call is safe. */ row_upd_index_replace_new_col_vals_index_pos(new_entry, index, update, entry_heap); - const ulint n = new_entry->n_fields; btr_cur_trim(new_entry, index, update, thr); /* We have to set appropriate extern storage bits in the new @@ -4904,10 +4918,6 @@ btr_cur_pessimistic_update( ut_ad(!page_is_comp(block->frame) || !rec_get_node_ptr_flag(rec)); ut_ad(rec_offs_validate(rec, index, *offsets)); - if (index->is_primary()) { - n_ext += btr_push_update_extern_fields( - new_entry, n, update, entry_heap); - } if ((flags & BTR_NO_UNDO_LOG_FLAG) && rec_offs_any_extern(*offsets)) { @@ -4928,6 +4938,8 @@ btr_cur_pessimistic_update( index, rec, block, *offsets, update, true, mtr); } + ulint n_ext = index->is_primary() ? dtuple_get_n_ext(new_entry) : 0; + if (page_zip_rec_needs_ext( rec_get_converted_size(index, new_entry, n_ext), page_is_comp(block->frame), @@ -6947,10 +6959,10 @@ btr_estimate_number_of_different_key_vals( ULINT_UNDEFINED, &heap); - cmp_rec_rec_with_match(rec, next_rec, - offsets_rec, offsets_next_rec, - index, stats_null_not_equal, - &matched_fields); + cmp_rec_rec(rec, next_rec, + offsets_rec, offsets_next_rec, + index, stats_null_not_equal, + &matched_fields); for (j = matched_fields; j < n_cols; j++) { /* We add one if this index record has @@ -7220,88 +7232,6 @@ btr_cur_unmark_extern_fields( } } -/** Flag the data tuple fields that are marked as extern storage in the -update vector. We use this function to remember which fields we must -mark as extern storage in a record inserted for an update. -@param[in,out] tuple clustered index record -@param[in] n number of fields in tuple, before any btr_cur_trim() -@param[in] update update vector -@param[in,out] heap memory heap -@return number of flagged external columns */ -ulint -btr_push_update_extern_fields(dtuple_t* tuple, ulint n, const upd_t* update, - mem_heap_t* heap) -{ - ulint n_pushed = 0; - const upd_field_t* uf = update->fields; - - ut_ad(n >= tuple->n_fields); - /* The clustered index record must always contain a - PRIMARY KEY and the system columns DB_TRX_ID,DB_ROLL_PTR. */ - ut_ad(tuple->n_fields > DATA_ROLL_PTR); - compile_time_assert(DATA_ROLL_PTR == 2); - - for (ulint un = upd_get_n_fields(update); un--; uf++) { - ut_ad(uf->field_no < n); - - if (dfield_is_ext(&uf->new_val) - && uf->field_no < tuple->n_fields) { - dfield_t* field = &tuple->fields[uf->field_no]; - - if (!dfield_is_ext(field)) { - dfield_set_ext(field); - n_pushed++; - } - - switch (uf->orig_len) { - byte* data; - ulint len; - byte* buf; - case 0: - break; - case BTR_EXTERN_FIELD_REF_SIZE: - /* Restore the original locally stored - part of the column. In the undo log, - InnoDB writes a longer prefix of externally - stored columns, so that column prefixes - in secondary indexes can be reconstructed. */ - dfield_set_data(field, - (byte*) dfield_get_data(field) - + dfield_get_len(field) - - BTR_EXTERN_FIELD_REF_SIZE, - BTR_EXTERN_FIELD_REF_SIZE); - dfield_set_ext(field); - break; - default: - /* Reconstruct the original locally - stored part of the column. The data - will have to be copied. */ - ut_a(uf->orig_len > BTR_EXTERN_FIELD_REF_SIZE); - - data = (byte*) dfield_get_data(field); - len = dfield_get_len(field); - - buf = (byte*) mem_heap_alloc(heap, - uf->orig_len); - /* Copy the locally stored prefix. */ - memcpy(buf, data, - unsigned(uf->orig_len) - - BTR_EXTERN_FIELD_REF_SIZE); - /* Copy the BLOB pointer. */ - memcpy(buf + unsigned(uf->orig_len) - - BTR_EXTERN_FIELD_REF_SIZE, - data + len - BTR_EXTERN_FIELD_REF_SIZE, - BTR_EXTERN_FIELD_REF_SIZE); - - dfield_set_data(field, buf, uf->orig_len); - dfield_set_ext(field); - } - } - } - - return(n_pushed); -} - /*******************************************************************//** Returns the length of a BLOB part stored on the header page. @return part length */ |