diff options
author | Michael Widenius <monty@askmonty.org> | 2013-03-26 00:03:13 +0200 |
---|---|---|
committer | Michael Widenius <monty@askmonty.org> | 2013-03-26 00:03:13 +0200 |
commit | 068c61978e3a81836d52b8caf11e044290159ad1 (patch) | |
tree | 2cbca861ab2cebe3bd99379ca9668bb483ca0d2a /storage/innobase/btr/btr0cur.cc | |
parent | 35bc8f9f4353b64da215e52ff6f1612a8ce66f43 (diff) | |
download | mariadb-git-068c61978e3a81836d52b8caf11e044290159ad1.tar.gz |
Temporary commit of 10.0-merge
Diffstat (limited to 'storage/innobase/btr/btr0cur.cc')
-rw-r--r-- | storage/innobase/btr/btr0cur.cc | 728 |
1 files changed, 426 insertions, 302 deletions
diff --git a/storage/innobase/btr/btr0cur.cc b/storage/innobase/btr/btr0cur.cc index aeb16200f80..913b2088f24 100644 --- a/storage/innobase/btr/btr0cur.cc +++ b/storage/innobase/btr/btr0cur.cc @@ -2,6 +2,7 @@ Copyright (c) 1994, 2012, Oracle and/or its affiliates. All Rights Reserved. Copyright (c) 2008, Google Inc. +Copyright (c) 2012, Facebook Inc. Portions of this file contain modifications contributed and copyrighted by Google, Inc. Those modifications are gratefully acknowledged and are described @@ -57,6 +58,7 @@ Created 10/16/1994 Heikki Tuuri #include "buf0lru.h" #include "btr0btr.h" #include "btr0sea.h" +#include "row0log.h" #include "row0purge.h" #include "row0upd.h" #include "trx0rec.h" @@ -69,13 +71,13 @@ Created 10/16/1994 Heikki Tuuri #include "zlib.h" /** Buffered B-tree operation types, introduced as part of delete buffering. */ -typedef enum btr_op_enum { +enum btr_op_t { BTR_NO_OP = 0, /*!< Not buffered */ BTR_INSERT_OP, /*!< Insert, do not ignore UNIQUE */ BTR_INSERT_IGNORE_UNIQUE_OP, /*!< Insert, ignoring UNIQUE */ BTR_DELETE_OP, /*!< Purge a delete-marked record */ BTR_DELMARK_OP /*!< Mark a record for deletion */ -} btr_op_t; +}; #ifdef UNIV_DEBUG /** If the following is set to TRUE, this module prints a lot of @@ -97,6 +99,11 @@ srv_refresh_innodb_monitor_stats(). Referenced by srv_printf_innodb_monitor(). */ UNIV_INTERN ulint btr_cur_n_sea_old = 0; +#ifdef UNIV_DEBUG +/* Flag to limit optimistic insert records */ +UNIV_INTERN uint btr_cur_limit_optimistic_insert_debug = 0; +#endif /* UNIV_DEBUG */ + /** In the optimistic insert, if the insert does not fit, but this much space can be released by page reorganize, then it is reorganized */ #define BTR_CUR_PAGE_REORGANIZE_LIMIT (UNIV_PAGE_SIZE / 32) @@ -425,6 +432,14 @@ btr_cur_search_to_nth_level( cursor->low_match = ULINT_UNDEFINED; #endif + ibool s_latch_by_caller; + + s_latch_by_caller = latch_mode & BTR_ALREADY_S_LATCHED; + + ut_ad(!s_latch_by_caller + || mtr_memo_contains(mtr, dict_index_get_lock(index), + MTR_MEMO_S_LOCK)); + /* These flags are mutually exclusive, they are lumped together with the latch mode for historical reasons. It's possible for none of the flags to be set. */ @@ -460,11 +475,11 @@ btr_cur_search_to_nth_level( estimate = latch_mode & BTR_ESTIMATE; /* Turn the flags unrelated to the latch mode off. */ - latch_mode &= ~(BTR_INSERT - | BTR_DELETE_MARK - | BTR_DELETE - | BTR_ESTIMATE - | BTR_IGNORE_SEC_UNIQUE); + latch_mode = BTR_LATCH_MODE_WITHOUT_FLAGS(latch_mode); + + ut_ad(!s_latch_by_caller + || latch_mode == BTR_SEARCH_LEAF + || latch_mode == BTR_MODIFY_LEAF); cursor->flag = BTR_CUR_BINARY; cursor->index = index; @@ -478,16 +493,16 @@ btr_cur_search_to_nth_level( #ifdef BTR_CUR_HASH_ADAPT -#ifdef UNIV_SEARCH_PERF_STAT +# ifdef UNIV_SEARCH_PERF_STAT info->n_searches++; -#endif +# endif if (rw_lock_get_writer(&btr_search_latch) == RW_LOCK_NOT_LOCKED && latch_mode <= BTR_MODIFY_LEAF && info->last_hash_succ && !estimate -#ifdef PAGE_CUR_LE_OR_EXTENDS +# ifdef PAGE_CUR_LE_OR_EXTENDS && mode != PAGE_CUR_LE_OR_EXTENDS -#endif /* PAGE_CUR_LE_OR_EXTENDS */ +# endif /* PAGE_CUR_LE_OR_EXTENDS */ /* If !has_search_latch, we do a dirty read of btr_search_enabled below, and btr_search_guess_on_hash() will have to check it again. */ @@ -508,7 +523,7 @@ btr_cur_search_to_nth_level( return; } -#endif /* BTR_CUR_HASH_ADAPT */ +# endif /* BTR_CUR_HASH_ADAPT */ #endif /* BTR_CUR_ADAPT */ btr_cur_n_non_sea++; @@ -525,15 +540,19 @@ btr_cur_search_to_nth_level( savepoint = mtr_set_savepoint(mtr); - if (latch_mode == BTR_MODIFY_TREE) { + switch (latch_mode) { + case BTR_MODIFY_TREE: mtr_x_lock(dict_index_get_lock(index), mtr); - - } else if (latch_mode == BTR_CONT_MODIFY_TREE) { + break; + case BTR_CONT_MODIFY_TREE: /* Do nothing */ ut_ad(mtr_memo_contains(mtr, dict_index_get_lock(index), MTR_MEMO_X_LOCK)); - } else { - mtr_s_lock(dict_index_get_lock(index), mtr); + break; + default: + if (!s_latch_by_caller) { + mtr_s_lock(dict_index_get_lock(index), mtr); + } } page_cursor = btr_cur_get_page_cur(cursor); @@ -687,6 +706,7 @@ retry_page_get: ? SYNC_IBUF_TREE_NODE : SYNC_TREE_NODE); } + ut_ad(fil_page_get_type(page) == FIL_PAGE_INDEX); ut_ad(index->id == btr_page_get_index_id(page)); if (UNIV_UNLIKELY(height == ULINT_UNDEFINED)) { @@ -711,13 +731,17 @@ retry_page_get: cursor, mtr); } - if (latch_mode != BTR_MODIFY_TREE - && latch_mode != BTR_CONT_MODIFY_TREE) { - - /* Release the tree s-latch */ - - mtr_release_s_latch_at_savepoint( - mtr, savepoint, dict_index_get_lock(index)); + switch (latch_mode) { + case BTR_MODIFY_TREE: + case BTR_CONT_MODIFY_TREE: + break; + default: + if (!s_latch_by_caller) { + /* Release the tree s-latch */ + mtr_release_s_latch_at_savepoint( + mtr, savepoint, + dict_index_get_lock(index)); + } } page_mode = mode; @@ -784,8 +808,7 @@ retry_page_get: will properly check btr_search_enabled again in btr_search_build_page_hash_index() before building a page hash index, while holding btr_search_latch. */ - if (UNIV_LIKELY(btr_search_enabled)) { - + if (btr_search_enabled) { btr_search_info_update(index, cursor); } #endif @@ -815,14 +838,16 @@ UNIV_INTERN void btr_cur_open_at_index_side_func( /*============================*/ - ibool from_left, /*!< in: TRUE if open to the low end, - FALSE if to the high end */ + bool from_left, /*!< in: true if open to the low end, + false if to the high end */ dict_index_t* index, /*!< in: index */ ulint latch_mode, /*!< in: latch mode */ - btr_cur_t* cursor, /*!< in: cursor */ + btr_cur_t* cursor, /*!< in/out: cursor */ + ulint level, /*!< in: level to search for + (0=leaf). */ const char* file, /*!< in: file name */ ulint line, /*!< in: line where called */ - mtr_t* mtr) /*!< in: mtr */ + mtr_t* mtr) /*!< in/out: mini-transaction */ { page_cur_t* page_cursor; ulint page_no; @@ -839,16 +864,27 @@ btr_cur_open_at_index_side_func( rec_offs_init(offsets_); estimate = latch_mode & BTR_ESTIMATE; - latch_mode = latch_mode & ~BTR_ESTIMATE; + latch_mode &= ~BTR_ESTIMATE; + + ut_ad(level != ULINT_UNDEFINED); /* Store the position of the tree latch we push to mtr so that we know how to release it when we have latched the leaf node */ savepoint = mtr_set_savepoint(mtr); - if (latch_mode == BTR_MODIFY_TREE) { + switch (latch_mode) { + case BTR_CONT_MODIFY_TREE: + break; + case BTR_MODIFY_TREE: mtr_x_lock(dict_index_get_lock(index), mtr); - } else { + break; + case BTR_SEARCH_LEAF | BTR_ALREADY_S_LATCHED: + case BTR_MODIFY_LEAF | BTR_ALREADY_S_LATCHED: + ut_ad(mtr_memo_contains(mtr, dict_index_get_lock(index), + MTR_MEMO_S_LOCK)); + break; + default: mtr_s_lock(dict_index_get_lock(index), mtr); } @@ -868,6 +904,7 @@ btr_cur_open_at_index_side_func( RW_NO_LATCH, NULL, BUF_GET, file, line, mtr); page = buf_block_get_frame(block); + ut_ad(fil_page_get_type(page) == FIL_PAGE_INDEX); ut_ad(index->id == btr_page_get_index_id(page)); block->check_index_page_at_flush = TRUE; @@ -877,26 +914,40 @@ btr_cur_open_at_index_side_func( height = btr_page_get_level(page, mtr); root_height = height; + ut_a(height >= level); + } else { + /* TODO: flag the index corrupted if this fails */ + ut_ad(height == btr_page_get_level(page, mtr)); } - if (height == 0) { - btr_cur_latch_leaves(page, space, zip_size, page_no, - latch_mode, cursor, mtr); - - /* In versions <= 3.23.52 we had forgotten to - release the tree latch here. If in an index scan - we had to scan far to find a record visible to the - current transaction, that could starve others - waiting for the tree latch. */ - - if ((latch_mode != BTR_MODIFY_TREE) - && (latch_mode != BTR_CONT_MODIFY_TREE)) { + if (height == level) { + btr_cur_latch_leaves( + page, space, zip_size, page_no, + latch_mode & ~BTR_ALREADY_S_LATCHED, + cursor, mtr); - /* Release the tree s-latch */ + if (height == 0) { + /* In versions <= 3.23.52 we had + forgotten to release the tree latch + here. If in an index scan we had to + scan far to find a record visible to + the current transaction, that could + starve others waiting for the tree + latch. */ + + switch (latch_mode) { + case BTR_MODIFY_TREE: + case BTR_CONT_MODIFY_TREE: + case BTR_SEARCH_LEAF | BTR_ALREADY_S_LATCHED: + case BTR_MODIFY_LEAF | BTR_ALREADY_S_LATCHED: + break; + default: + /* Release the tree s-latch */ - mtr_release_s_latch_at_savepoint( - mtr, savepoint, - dict_index_get_lock(index)); + mtr_release_s_latch_at_savepoint( + mtr, savepoint, + dict_index_get_lock(index)); + } } } @@ -906,7 +957,7 @@ btr_cur_open_at_index_side_func( page_cur_set_after_last(block, page_cursor); } - if (height == 0) { + if (height == level) { if (estimate) { btr_cur_add_path_info(cursor, height, root_height); @@ -965,9 +1016,12 @@ btr_cur_open_at_rnd_pos_func( ulint* offsets = offsets_; rec_offs_init(offsets_); - if (latch_mode == BTR_MODIFY_TREE) { + switch (latch_mode) { + case BTR_MODIFY_TREE: mtr_x_lock(dict_index_get_lock(index), mtr); - } else { + break; + default: + ut_ad(latch_mode != BTR_CONT_MODIFY_TREE); mtr_s_lock(dict_index_get_lock(index), mtr); } @@ -988,6 +1042,7 @@ btr_cur_open_at_rnd_pos_func( RW_NO_LATCH, NULL, BUF_GET, file, line, mtr); page = buf_block_get_frame(block); + ut_ad(fil_page_get_type(page) == FIL_PAGE_INDEX); ut_ad(index->id == btr_page_get_index_id(page)); if (height == ULINT_UNDEFINED) { @@ -1032,7 +1087,7 @@ be freed by reorganizing. Differs from btr_cur_optimistic_insert because no heuristics is applied to whether it pays to use CPU time for reorganizing the page or not. @return pointer to inserted record if succeed, else NULL */ -static +static __attribute__((nonnull, warn_unused_result)) rec_t* btr_cur_insert_if_possible( /*=======================*/ @@ -1040,6 +1095,8 @@ btr_cur_insert_if_possible( cursor stays valid */ const dtuple_t* tuple, /*!< in: tuple to insert; the size info need not have been stored to tuple */ + ulint** offsets,/*!< out: offsets on *rec */ + mem_heap_t** heap, /*!< in/out: pointer to memory heap, or NULL */ ulint n_ext, /*!< in: number of externally stored columns */ mtr_t* mtr) /*!< in: mtr */ { @@ -1055,8 +1112,8 @@ btr_cur_insert_if_possible( page_cursor = btr_cur_get_page_cur(cursor); /* Now, try the insert */ - rec = page_cur_tuple_insert(page_cursor, tuple, - cursor->index, n_ext, mtr); + rec = page_cur_tuple_insert(page_cursor, tuple, cursor->index, + offsets, heap, n_ext, mtr); if (UNIV_UNLIKELY(!rec)) { /* If record did not fit, reorganize */ @@ -1066,19 +1123,21 @@ btr_cur_insert_if_possible( page_cur_search(block, cursor->index, tuple, PAGE_CUR_LE, page_cursor); - rec = page_cur_tuple_insert(page_cursor, tuple, - cursor->index, n_ext, mtr); + rec = page_cur_tuple_insert( + page_cursor, tuple, cursor->index, + offsets, heap, n_ext, mtr); } } + ut_ad(!rec || rec_offs_validate(rec, cursor->index, *offsets)); return(rec); } /*************************************************************//** For an insert, checks the locks and does the undo logging if desired. @return DB_SUCCESS, DB_WAIT_LOCK, DB_FAIL, or error number */ -UNIV_INLINE -ulint +UNIV_INLINE __attribute__((warn_unused_result, nonnull(2,3,5,6))) +dberr_t btr_cur_ins_lock_and_undo( /*======================*/ ulint flags, /*!< in: undo logging and locking flags: if @@ -1093,7 +1152,7 @@ btr_cur_ins_lock_and_undo( successor record */ { dict_index_t* index; - ulint err; + dberr_t err; rec_t* rec; roll_ptr_t roll_ptr; @@ -1103,6 +1162,10 @@ btr_cur_ins_lock_and_undo( rec = btr_cur_get_rec(cursor); index = cursor->index; + ut_ad(!dict_index_is_online_ddl(index) + || dict_index_is_clust(index) + || (flags & BTR_CREATE_FLAG)); + err = lock_rec_insert_check_and_lock(flags, rec, btr_cur_get_block(cursor), index, thr, mtr, inherit); @@ -1115,7 +1178,7 @@ btr_cur_ins_lock_and_undo( err = trx_undo_report_row_operation(flags, TRX_UNDO_INSERT_OP, thr, index, entry, - NULL, 0, NULL, + NULL, 0, NULL, NULL, &roll_ptr); if (err != DB_SUCCESS) { @@ -1140,13 +1203,13 @@ static void btr_cur_trx_report( /*===============*/ - trx_t* trx, /*!< in: transaction */ + trx_id_t trx_id, /*!< in: transaction id */ const dict_index_t* index, /*!< in: index */ const char* op) /*!< in: operation */ { - fprintf(stderr, "Trx with id " TRX_ID_FMT " going to ", trx->id); + fprintf(stderr, "Trx with id " TRX_ID_FMT " going to ", trx_id); fputs(op, stderr); - dict_index_name_print(stderr, trx, index); + dict_index_name_print(stderr, NULL, index); putc('\n', stderr); } #endif /* UNIV_DEBUG */ @@ -1159,7 +1222,7 @@ one record on the page, the insert will always succeed; this is to prevent trying to split a page with just one record. @return DB_SUCCESS, DB_WAIT_LOCK, DB_FAIL, or error number */ UNIV_INTERN -ulint +dberr_t btr_cur_optimistic_insert( /*======================*/ ulint flags, /*!< in: undo logging and locking flags: if not @@ -1167,6 +1230,8 @@ btr_cur_optimistic_insert( specified */ btr_cur_t* cursor, /*!< in: cursor on page after which to insert; cursor stays valid */ + ulint** offsets,/*!< out: offsets on *rec */ + mem_heap_t** heap, /*!< in/out: pointer to memory heap, or NULL */ dtuple_t* entry, /*!< in/out: entry to insert */ rec_t** rec, /*!< out: pointer to inserted record if succeed */ @@ -1193,13 +1258,16 @@ btr_cur_optimistic_insert( ibool inherit; ulint zip_size; ulint rec_size; - ulint err; + dberr_t err; *big_rec = NULL; block = btr_cur_get_block(cursor); page = buf_block_get_frame(block); index = cursor->index; + ut_ad(!dict_index_is_online_ddl(index) + || dict_index_is_clust(index) + || (flags & BTR_CREATE_FLAG)); zip_size = buf_block_get_zip_size(block); #ifdef UNIV_DEBUG_VALGRIND if (zip_size) { @@ -1214,7 +1282,7 @@ btr_cur_optimistic_insert( } #ifdef UNIV_DEBUG if (btr_cur_print_record_ops && thr) { - btr_cur_trx_report(thr_get_trx(thr), index, "insert into "); + btr_cur_trx_report(thr_get_trx(thr)->id, index, "insert "); dtuple_print(stderr, entry); } #endif /* UNIV_DEBUG */ @@ -1276,6 +1344,9 @@ btr_cur_optimistic_insert( } } + LIMIT_OPTIMISTIC_INSERT_DEBUG(page_get_n_recs(page), + goto fail); + /* If there have been many consecutive inserts, and we are on the leaf level, check if we have to split the page to reserve enough free space for future updates of records. */ @@ -1305,6 +1376,15 @@ fail_err: goto fail; } + /* If compression padding tells us that insertion will result in + too packed up page i.e.: which is likely to cause compression + failure then don't do an optimistic insertion. */ + if (zip_size && leaf + && (page_get_data_size(page) + rec_size + >= dict_index_zip_pad_optimal_page_size(index))) { + + goto fail; + } /* Check locks and write to the undo log, if specified */ err = btr_cur_ins_lock_and_undo(flags, cursor, entry, thr, mtr, &inherit); @@ -1321,7 +1401,7 @@ fail_err: { const rec_t* page_cursor_rec = page_cur_get_rec(page_cursor); *rec = page_cur_tuple_insert(page_cursor, entry, index, - n_ext, mtr); + offsets, heap, n_ext, mtr); reorg = page_cursor_rec != page_cur_get_rec(page_cursor); if (UNIV_UNLIKELY(reorg)) { @@ -1351,7 +1431,7 @@ fail_err: page_cur_search(block, index, entry, PAGE_CUR_LE, page_cursor); *rec = page_cur_tuple_insert(page_cursor, entry, index, - n_ext, mtr); + offsets, heap, n_ext, mtr); if (UNIV_UNLIKELY(!*rec)) { if (zip_size != 0) { @@ -1426,7 +1506,7 @@ made on the leaf level, to avoid deadlocks, mtr must also own x-latches to brothers of page, if those brothers exist. @return DB_SUCCESS or error number */ UNIV_INTERN -ulint +dberr_t btr_cur_pessimistic_insert( /*=======================*/ ulint flags, /*!< in: undo logging and locking flags: if not @@ -1437,6 +1517,9 @@ btr_cur_pessimistic_insert( insertion will certainly succeed */ btr_cur_t* cursor, /*!< in: cursor after which to insert; cursor stays valid */ + ulint** offsets,/*!< out: offsets on *rec */ + mem_heap_t** heap, /*!< in/out: pointer to memory heap + that can be emptied, or NULL */ dtuple_t* entry, /*!< in/out: entry to insert */ rec_t** rec, /*!< out: pointer to inserted record if succeed */ @@ -1450,8 +1533,7 @@ btr_cur_pessimistic_insert( dict_index_t* index = cursor->index; ulint zip_size = dict_table_zip_size(index->table); big_rec_t* big_rec_vec = NULL; - mem_heap_t* heap = NULL; - ulint err; + dberr_t err; ibool dummy_inh; ibool success; ulint n_extents = 0; @@ -1466,6 +1548,9 @@ btr_cur_pessimistic_insert( MTR_MEMO_X_LOCK)); ut_ad(mtr_memo_contains(mtr, btr_cur_get_block(cursor), MTR_MEMO_PAGE_X_FIX)); + ut_ad(!dict_index_is_online_ddl(index) + || dict_index_is_clust(index) + || (flags & BTR_CREATE_FLAG)); cursor->flag = BTR_CUR_BINARY; @@ -1523,13 +1608,11 @@ btr_cur_pessimistic_insert( == buf_block_get_page_no(btr_cur_get_block(cursor))) { /* The page is the root page */ - *rec = btr_root_raise_and_insert(cursor, entry, n_ext, mtr); + *rec = btr_root_raise_and_insert( + flags, cursor, offsets, heap, entry, n_ext, mtr); } else { - *rec = btr_page_split_and_insert(cursor, entry, n_ext, mtr); - } - - if (UNIV_LIKELY_NULL(heap)) { - mem_heap_free(heap); + *rec = btr_page_split_and_insert( + flags, cursor, offsets, heap, entry, n_ext, mtr); } ut_ad(page_rec_get_next(btr_cur_get_rec(cursor)) == *rec); @@ -1556,29 +1639,36 @@ btr_cur_pessimistic_insert( /*************************************************************//** For an update, checks the locks and does the undo logging. @return DB_SUCCESS, DB_WAIT_LOCK, or error number */ -UNIV_INLINE -ulint +UNIV_INLINE __attribute__((warn_unused_result, nonnull(2,3,6,7))) +dberr_t btr_cur_upd_lock_and_undo( /*======================*/ ulint flags, /*!< in: undo logging and locking flags */ btr_cur_t* cursor, /*!< in: cursor on record to update */ + const ulint* offsets,/*!< in: rec_get_offsets() on cursor */ const upd_t* update, /*!< in: update vector */ ulint cmpl_info,/*!< in: compiler info on secondary index updates */ - que_thr_t* thr, /*!< in: query thread */ + que_thr_t* thr, /*!< in: query thread + (can be NULL if BTR_NO_LOCKING_FLAG) */ mtr_t* mtr, /*!< in/out: mini-transaction */ roll_ptr_t* roll_ptr)/*!< out: roll pointer */ { dict_index_t* index; - rec_t* rec; - ulint err; + const rec_t* rec; + dberr_t err; - ut_ad(cursor && update && thr && roll_ptr); + ut_ad(thr || (flags & BTR_NO_LOCKING_FLAG)); rec = btr_cur_get_rec(cursor); index = cursor->index; + ut_ad(rec_offs_validate(rec, index, offsets)); + if (!dict_index_is_clust(index)) { + ut_ad(dict_index_is_online_ddl(index) + == !!(flags & BTR_CREATE_FLAG)); + /* We do undo logging only when we update a clustered index record */ return(lock_sec_rec_modify_check_and_lock( @@ -1589,50 +1679,39 @@ btr_cur_upd_lock_and_undo( /* Check if we have to wait for a lock: enqueue an explicit lock request if yes */ - err = DB_SUCCESS; - if (!(flags & BTR_NO_LOCKING_FLAG)) { - mem_heap_t* heap = NULL; - ulint offsets_[REC_OFFS_NORMAL_SIZE]; - rec_offs_init(offsets_); - err = lock_clust_rec_modify_check_and_lock( flags, btr_cur_get_block(cursor), rec, index, - rec_get_offsets(rec, index, offsets_, - ULINT_UNDEFINED, &heap), thr); - if (UNIV_LIKELY_NULL(heap)) { - mem_heap_free(heap); - } + offsets, thr); if (err != DB_SUCCESS) { - return(err); } } /* Append the info about the update in the undo log */ - err = trx_undo_report_row_operation(flags, TRX_UNDO_MODIFY_OP, thr, - index, NULL, update, - cmpl_info, rec, roll_ptr); - return(err); + return(trx_undo_report_row_operation( + flags, TRX_UNDO_MODIFY_OP, thr, + index, NULL, update, + cmpl_info, rec, offsets, roll_ptr)); } /***********************************************************//** Writes a redo log record of updating a record in-place. */ -UNIV_INLINE +UNIV_INLINE __attribute__((nonnull)) void btr_cur_update_in_place_log( /*========================*/ ulint flags, /*!< in: flags */ - rec_t* rec, /*!< in: record */ - dict_index_t* index, /*!< in: index where cursor positioned */ + const rec_t* rec, /*!< in: record */ + dict_index_t* index, /*!< in: index of the record */ const upd_t* update, /*!< in: update vector */ - trx_t* trx, /*!< in: transaction */ + trx_id_t trx_id, /*!< in: transaction id */ roll_ptr_t roll_ptr, /*!< in: roll ptr */ mtr_t* mtr) /*!< in: mtr */ { - byte* log_ptr; - page_t* page = page_align(rec); + byte* log_ptr; + const page_t* page = page_align(rec); ut_ad(flags < 256); ut_ad(!!page_is_comp(page) == dict_table_is_comp(index->table)); @@ -1657,8 +1736,8 @@ btr_cur_update_in_place_log( mach_write_to_1(log_ptr, flags); log_ptr++; - log_ptr = row_upd_write_sys_vals_to_log(index, trx, roll_ptr, log_ptr, - mtr); + log_ptr = row_upd_write_sys_vals_to_log( + index, trx_id, roll_ptr, log_ptr, mtr); mach_write_to_2(log_ptr, page_offset(rec)); log_ptr += 2; @@ -1761,6 +1840,13 @@ btr_cur_update_alloc_zip( FALSE=update-in-place */ mtr_t* mtr) /*!< in: mini-transaction */ { + + /* Have a local copy of the variables as these can change + dynamically. */ + bool log_compressed = page_log_compressed_pages; + ulint compression_level = page_compression_level; + page_t* page = buf_block_get_frame(block); + ut_a(page_zip == buf_block_get_page_zip(block)); ut_ad(page_zip); ut_ad(!dict_index_is_ibuf(index)); @@ -1776,12 +1862,27 @@ btr_cur_update_alloc_zip( return(FALSE); } - if (!page_zip_compress(page_zip, buf_block_get_frame(block), - index, mtr)) { + page = buf_block_get_frame(block); + + if (create && page_is_leaf(page) + && (length + page_get_data_size(page) + >= dict_index_zip_pad_optimal_page_size(index))) { + + return(FALSE); + } + + if (!page_zip_compress( + page_zip, page, index, compression_level, + log_compressed ? mtr : NULL)) { /* Unable to compress the page */ return(FALSE); } + if (mtr && !log_compressed) { + page_zip_compress_write_log_no_data( + compression_level, page, index, mtr); + } + /* After recompressing a page, we must make sure that the free bits in the insert buffer bitmap will not exceed the free space on the page. Because this function will not attempt @@ -1795,8 +1896,7 @@ btr_cur_update_alloc_zip( if (!page_zip_available(page_zip, dict_index_is_clust(index), length, create)) { /* Out of space: reset the free bits. */ - if (!dict_index_is_clust(index) - && page_is_leaf(buf_block_get_frame(block))) { + if (!dict_index_is_clust(index) && page_is_leaf(page)) { ibuf_reset_free_bits(block); } return(FALSE); @@ -1810,45 +1910,50 @@ Updates a record when the update causes no size changes in its fields. We assume here that the ordering fields of the record do not change. @return DB_SUCCESS or error number */ UNIV_INTERN -ulint +dberr_t btr_cur_update_in_place( /*====================*/ ulint flags, /*!< in: undo logging and locking flags */ btr_cur_t* cursor, /*!< in: cursor on the record to update; cursor stays valid and positioned on the same record */ + const ulint* offsets,/*!< in: offsets on cursor->page_cur.rec */ const upd_t* update, /*!< in: update vector */ ulint cmpl_info,/*!< in: compiler info on secondary index updates */ - que_thr_t* thr, /*!< in: query thread */ + que_thr_t* thr, /*!< in: query thread, or NULL if + appropriate flags are set */ + trx_id_t trx_id, /*!< in: transaction id */ mtr_t* mtr) /*!< in: mtr; must be committed before latching any further pages */ { dict_index_t* index; buf_block_t* block; page_zip_des_t* page_zip; - ulint err; + dberr_t err; rec_t* rec; roll_ptr_t roll_ptr = 0; - trx_t* trx; ulint was_delete_marked; ibool is_hashed; - mem_heap_t* heap = NULL; - ulint offsets_[REC_OFFS_NORMAL_SIZE]; - ulint* offsets = offsets_; - rec_offs_init(offsets_); rec = btr_cur_get_rec(cursor); index = cursor->index; + ut_ad(rec_offs_validate(rec, index, offsets)); ut_ad(!!page_rec_is_comp(rec) == dict_table_is_comp(index->table)); /* The insert buffer tree should never be updated in place. */ ut_ad(!dict_index_is_ibuf(index)); + ut_ad(dict_index_is_online_ddl(index) == !!(flags & BTR_CREATE_FLAG) + || dict_index_is_clust(index)); + ut_ad(!thr || thr_get_trx(thr)->id == trx_id); + ut_ad(thr || (flags & ~BTR_KEEP_POS_FLAG) + == (BTR_NO_UNDO_LOG_FLAG | BTR_NO_LOCKING_FLAG + | BTR_CREATE_FLAG | BTR_KEEP_SYS_FLAG)); + ut_ad(fil_page_get_type(btr_cur_get_page(cursor)) == FIL_PAGE_INDEX); + ut_ad(btr_page_get_index_id(btr_cur_get_page(cursor)) == index->id); - trx = thr_get_trx(thr); - offsets = rec_get_offsets(rec, index, offsets, ULINT_UNDEFINED, &heap); #ifdef UNIV_DEBUG - if (btr_cur_print_record_ops && thr) { - btr_cur_trx_report(trx, index, "update "); + if (btr_cur_print_record_ops) { + btr_cur_trx_report(trx_id, index, "update "); rec_print_new(stderr, rec, offsets); } #endif /* UNIV_DEBUG */ @@ -1864,19 +1969,17 @@ btr_cur_update_in_place( } /* Do lock checking and undo logging */ - err = btr_cur_upd_lock_and_undo(flags, cursor, update, cmpl_info, + err = btr_cur_upd_lock_and_undo(flags, cursor, offsets, + update, cmpl_info, thr, mtr, &roll_ptr); if (UNIV_UNLIKELY(err != DB_SUCCESS)) { - if (UNIV_LIKELY_NULL(heap)) { - mem_heap_free(heap); - } return(err); } if (!(flags & BTR_KEEP_SYS_FLAG)) { - row_upd_rec_sys_fields(rec, NULL, - index, offsets, trx, roll_ptr); + row_upd_rec_sys_fields(rec, NULL, index, offsets, + thr_get_trx(thr), roll_ptr); } was_delete_marked = rec_get_deleted_flag( @@ -1917,7 +2020,7 @@ btr_cur_update_in_place( } btr_cur_update_in_place_log(flags, rec, index, update, - trx, roll_ptr, mtr); + trx_id, roll_ptr, mtr); if (was_delete_marked && !rec_get_deleted_flag( @@ -1929,9 +2032,6 @@ btr_cur_update_in_place( rec, index, offsets, mtr); } - if (UNIV_LIKELY_NULL(heap)) { - mem_heap_free(heap); - } return(DB_SUCCESS); } @@ -1945,24 +2045,28 @@ fields of the record do not change. DB_UNDERFLOW if the page would become too empty, or DB_ZIP_OVERFLOW if there is not enough space left on the compressed page */ UNIV_INTERN -ulint +dberr_t btr_cur_optimistic_update( /*======================*/ ulint flags, /*!< in: undo logging and locking flags */ btr_cur_t* cursor, /*!< in: cursor on the record to update; cursor stays valid and positioned on the same record */ + ulint** offsets,/*!< out: offsets on cursor->page_cur.rec */ + mem_heap_t** heap, /*!< in/out: pointer to memory heap, or NULL */ const upd_t* update, /*!< in: update vector; this must also contain trx id and roll ptr fields */ ulint cmpl_info,/*!< in: compiler info on secondary index updates */ - que_thr_t* thr, /*!< in: query thread */ + que_thr_t* thr, /*!< in: query thread, or NULL if + appropriate flags are set */ + trx_id_t trx_id, /*!< in: transaction id */ mtr_t* mtr) /*!< in: mtr; must be committed before latching any further pages */ { dict_index_t* index; page_cur_t* page_cursor; - ulint err; + dberr_t err; buf_block_t* block; page_t* page; page_zip_des_t* page_zip; @@ -1972,10 +2076,8 @@ btr_cur_optimistic_update( ulint old_rec_size; dtuple_t* new_entry; roll_ptr_t roll_ptr; - mem_heap_t* heap; ulint i; ulint n_ext; - ulint* offsets; block = btr_cur_get_block(cursor); page = buf_block_get_frame(block); @@ -1985,39 +2087,46 @@ btr_cur_optimistic_update( ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX)); /* The insert buffer tree should never be updated in place. */ ut_ad(!dict_index_is_ibuf(index)); - - heap = mem_heap_create(1024); - offsets = rec_get_offsets(rec, index, NULL, ULINT_UNDEFINED, &heap); + ut_ad(dict_index_is_online_ddl(index) == !!(flags & BTR_CREATE_FLAG) + || dict_index_is_clust(index)); + ut_ad(!thr || thr_get_trx(thr)->id == trx_id); + ut_ad(thr || (flags & ~BTR_KEEP_POS_FLAG) + == (BTR_NO_UNDO_LOG_FLAG | BTR_NO_LOCKING_FLAG + | BTR_CREATE_FLAG | BTR_KEEP_SYS_FLAG)); + ut_ad(fil_page_get_type(page) == FIL_PAGE_INDEX); + ut_ad(btr_page_get_index_id(page) == index->id); + + *offsets = rec_get_offsets(rec, index, *offsets, + ULINT_UNDEFINED, heap); #if defined UNIV_DEBUG || defined UNIV_BLOB_LIGHT_DEBUG - ut_a(!rec_offs_any_null_extern(rec, offsets) + ut_a(!rec_offs_any_null_extern(rec, *offsets) || trx_is_recv(thr_get_trx(thr))); #endif /* UNIV_DEBUG || UNIV_BLOB_LIGHT_DEBUG */ #ifdef UNIV_DEBUG - if (btr_cur_print_record_ops && thr) { - btr_cur_trx_report(thr_get_trx(thr), index, "update "); - rec_print_new(stderr, rec, offsets); + if (btr_cur_print_record_ops) { + btr_cur_trx_report(trx_id, index, "update "); + rec_print_new(stderr, rec, *offsets); } #endif /* UNIV_DEBUG */ - if (!row_upd_changes_field_size_or_external(index, offsets, update)) { + if (!row_upd_changes_field_size_or_external(index, *offsets, update)) { /* The simplest and the most common case: the update does not change the size of any field and none of the updated fields is externally stored in rec or update, and there is enough space on the compressed page to log the update. */ - mem_heap_free(heap); - return(btr_cur_update_in_place(flags, cursor, update, - cmpl_info, thr, mtr)); + return(btr_cur_update_in_place( + flags, cursor, *offsets, update, + cmpl_info, thr, trx_id, mtr)); } - if (rec_offs_any_extern(offsets)) { + if (rec_offs_any_extern(*offsets)) { any_extern: /* Externally stored fields are treated in pessimistic update */ - mem_heap_free(heap); return(DB_OVERFLOW); } @@ -2030,8 +2139,14 @@ any_extern: page_cursor = btr_cur_get_page_cur(cursor); - new_entry = row_rec_to_index_entry(ROW_COPY_DATA, rec, index, offsets, - &n_ext, heap); + if (!*heap) { + *heap = mem_heap_create( + rec_offs_size(*offsets) + + DTUPLE_EST_ALLOC(rec_offs_n_fields(*offsets))); + } + + new_entry = row_rec_to_index_entry(rec, index, *offsets, + &n_ext, *heap); /* We checked above that there are no externally stored fields. */ ut_a(!n_ext); @@ -2039,8 +2154,8 @@ any_extern: corresponding to new_entry is latched in mtr. Thus the following call is safe. */ row_upd_index_replace_new_col_vals_index_pos(new_entry, index, update, - FALSE, heap); - old_rec_size = rec_offs_size(offsets); + FALSE, *heap); + old_rec_size = rec_offs_size(*offsets); new_rec_size = rec_get_converted_size(index, new_entry, 0); page_zip = buf_block_get_page_zip(block); @@ -2051,16 +2166,14 @@ any_extern: if (page_zip && !btr_cur_update_alloc_zip(page_zip, block, index, new_rec_size, TRUE, mtr)) { - err = DB_ZIP_OVERFLOW; - goto err_exit; + return(DB_ZIP_OVERFLOW); } if (UNIV_UNLIKELY(new_rec_size >= (page_get_free_space_of_empty(page_is_comp(page)) / 2))) { - err = DB_OVERFLOW; - goto err_exit; + return(DB_OVERFLOW); } if (UNIV_UNLIKELY(page_get_data_size(page) @@ -2069,8 +2182,7 @@ any_extern: /* The page would become too empty */ - err = DB_UNDERFLOW; - goto err_exit; + return(DB_UNDERFLOW); } /* We do not attempt to reorganize if the page is compressed. @@ -2088,16 +2200,16 @@ any_extern: reorganize: for simplicity, we decide what to do assuming a reorganization is needed, though it might not be necessary */ - err = DB_OVERFLOW; - goto err_exit; + return(DB_OVERFLOW); } /* Do lock checking and undo logging */ - err = btr_cur_upd_lock_and_undo(flags, cursor, update, cmpl_info, + err = btr_cur_upd_lock_and_undo(flags, cursor, *offsets, + update, cmpl_info, thr, mtr, &roll_ptr); if (err != DB_SUCCESS) { - goto err_exit; + return(err); } /* Ok, we may do the replacement. Store on the page infimum the @@ -2108,13 +2220,7 @@ any_extern: btr_search_update_hash_on_delete(cursor); - /* The call to row_rec_to_index_entry(ROW_COPY_DATA, ...) above - invokes rec_offs_make_valid() to point to the copied record that - the fields of new_entry point to. We have to undo it here. */ - ut_ad(rec_offs_validate(NULL, index, offsets)); - rec_offs_make_valid(page_cur_get_rec(page_cursor), index, offsets); - - page_cur_delete_rec(page_cursor, index, offsets, mtr); + page_cur_delete_rec(page_cursor, index, *offsets, mtr); page_cur_move_to_prev(page_cursor); @@ -2122,11 +2228,12 @@ any_extern: row_upd_index_entry_sys_field(new_entry, index, DATA_ROLL_PTR, roll_ptr); row_upd_index_entry_sys_field(new_entry, index, DATA_TRX_ID, - thr_get_trx(thr)->id); + trx_id); } /* There are no externally stored columns in new_entry */ - rec = btr_cur_insert_if_possible(cursor, new_entry, 0/*n_ext*/, mtr); + rec = btr_cur_insert_if_possible( + cursor, new_entry, offsets, heap, 0/*n_ext*/, mtr); ut_a(rec); /* <- We calculated above the insert would fit */ if (page_zip && !dict_index_is_clust(index) @@ -2141,10 +2248,7 @@ any_extern: page_cur_move_to_next(page_cursor); - err = DB_SUCCESS; -err_exit: - mem_heap_free(heap); - return(err); + return(DB_SUCCESS); } /*************************************************************//** @@ -2203,7 +2307,7 @@ own x-latches to brothers of page, if those brothers exist. We assume here that the ordering fields of the record do not change. @return DB_SUCCESS or error code */ UNIV_INTERN -ulint +dberr_t btr_cur_pessimistic_update( /*=======================*/ ulint flags, /*!< in: undo logging, locking, and rollback @@ -2211,7 +2315,13 @@ btr_cur_pessimistic_update( btr_cur_t* cursor, /*!< in/out: cursor on the record to update; cursor may become invalid if *big_rec == NULL || !(flags & BTR_KEEP_POS_FLAG) */ - mem_heap_t** heap, /*!< in/out: pointer to memory heap, or NULL */ + ulint** offsets,/*!< out: offsets on cursor->page_cur.rec */ + mem_heap_t** offsets_heap, + /*!< in/out: pointer to memory heap + that can be emptied, or NULL */ + mem_heap_t* entry_heap, + /*!< in/out: memory heap for allocating + big_rec and the index tuple */ big_rec_t** big_rec,/*!< out: big rec vector whose fields have to be stored externally by the caller, or NULL */ const upd_t* update, /*!< in: update vector; this is allowed also @@ -2219,7 +2329,9 @@ btr_cur_pessimistic_update( the values in update vector have no effect */ ulint cmpl_info,/*!< in: compiler info on secondary index updates */ - que_thr_t* thr, /*!< in: query thread */ + que_thr_t* thr, /*!< in: query thread, or NULL if + appropriate flags are set */ + trx_id_t trx_id, /*!< in: transaction id */ mtr_t* mtr) /*!< in: mtr; must be committed before latching any further pages */ { @@ -2231,17 +2343,15 @@ btr_cur_pessimistic_update( page_zip_des_t* page_zip; rec_t* rec; page_cur_t* page_cursor; - dtuple_t* new_entry; - ulint err; - ulint optim_err; + dberr_t err; + dberr_t optim_err; roll_ptr_t roll_ptr; - trx_t* trx; ibool was_first; ulint n_extents = 0; ulint n_reserved; ulint n_ext; - ulint* offsets = NULL; + *offsets = NULL; *big_rec = NULL; block = btr_cur_get_block(cursor); @@ -2258,9 +2368,16 @@ btr_cur_pessimistic_update( #endif /* UNIV_ZIP_DEBUG */ /* The insert buffer tree should never be updated in place. */ ut_ad(!dict_index_is_ibuf(index)); + ut_ad(dict_index_is_online_ddl(index) == !!(flags & BTR_CREATE_FLAG) + || dict_index_is_clust(index)); + ut_ad(!thr || thr_get_trx(thr)->id == trx_id); + ut_ad(thr || (flags & ~BTR_KEEP_POS_FLAG) + == (BTR_NO_UNDO_LOG_FLAG | BTR_NO_LOCKING_FLAG + | BTR_CREATE_FLAG | BTR_KEEP_SYS_FLAG)); - optim_err = btr_cur_optimistic_update(flags, cursor, update, - cmpl_info, thr, mtr); + optim_err = btr_cur_optimistic_update( + flags, cursor, offsets, offsets_heap, update, + cmpl_info, thr, trx_id, mtr); switch (optim_err) { case DB_UNDERFLOW: @@ -2272,7 +2389,8 @@ btr_cur_pessimistic_update( } /* Do lock checking and undo logging */ - err = btr_cur_upd_lock_and_undo(flags, cursor, update, cmpl_info, + err = btr_cur_upd_lock_and_undo(flags, cursor, *offsets, + update, cmpl_info, thr, mtr, &roll_ptr); if (err != DB_SUCCESS) { @@ -2300,20 +2418,11 @@ btr_cur_pessimistic_update( } } - if (!*heap) { - *heap = mem_heap_create(1024); - } - offsets = rec_get_offsets(rec, index, NULL, ULINT_UNDEFINED, heap); - - trx = thr_get_trx(thr); + *offsets = rec_get_offsets( + rec, index, *offsets, ULINT_UNDEFINED, offsets_heap); - new_entry = row_rec_to_index_entry(ROW_COPY_DATA, rec, index, offsets, - &n_ext, *heap); - /* The call to row_rec_to_index_entry(ROW_COPY_DATA, ...) above - invokes rec_offs_make_valid() to point to the copied record that - the fields of new_entry point to. We have to undo it here. */ - ut_ad(rec_offs_validate(NULL, index, offsets)); - rec_offs_make_valid(rec, index, offsets); + dtuple_t* new_entry = row_rec_to_index_entry( + rec, index, *offsets, &n_ext, entry_heap); /* The page containing the clustered index record corresponding to new_entry is latched in mtr. If the @@ -2322,15 +2431,15 @@ btr_cur_pessimistic_update( purge would also have removed the clustered index record itself. Thus the following call is safe. */ row_upd_index_replace_new_col_vals_index_pos(new_entry, index, update, - FALSE, *heap); + FALSE, entry_heap); if (!(flags & BTR_KEEP_SYS_FLAG)) { row_upd_index_entry_sys_field(new_entry, index, DATA_ROLL_PTR, roll_ptr); row_upd_index_entry_sys_field(new_entry, index, DATA_TRX_ID, - trx->id); + trx_id); } - if ((flags & BTR_NO_UNDO_LOG_FLAG) && rec_offs_any_extern(offsets)) { + if ((flags & BTR_NO_UNDO_LOG_FLAG) && rec_offs_any_extern(*offsets)) { /* We are in a transaction rollback undoing a row update: we must free possible externally stored fields which got new values in the update, if they are not @@ -2341,16 +2450,17 @@ btr_cur_pessimistic_update( ut_ad(big_rec_vec == NULL); btr_rec_free_updated_extern_fields( - index, rec, page_zip, offsets, update, - trx_is_recv(trx) ? RB_RECOVERY : RB_NORMAL, mtr); + index, rec, page_zip, *offsets, update, + trx_is_recv(thr_get_trx(thr)) + ? RB_RECOVERY : RB_NORMAL, mtr); } /* We have to set appropriate extern storage bits in the new record to be inserted: we have to remember which fields were such */ ut_ad(!page_is_comp(page) || !rec_get_node_ptr_flag(rec)); - ut_ad(rec_offs_validate(rec, index, offsets)); - n_ext += btr_push_update_extern_fields(new_entry, update, *heap); + ut_ad(rec_offs_validate(rec, index, *offsets)); + n_ext += btr_push_update_extern_fields(new_entry, update, entry_heap); if (page_zip) { ut_ad(page_is_comp(page)); @@ -2396,11 +2506,12 @@ make_external: #endif /* UNIV_ZIP_DEBUG */ page_cursor = btr_cur_get_page_cur(cursor); - page_cur_delete_rec(page_cursor, index, offsets, mtr); + page_cur_delete_rec(page_cursor, index, *offsets, mtr); page_cur_move_to_prev(page_cursor); - rec = btr_cur_insert_if_possible(cursor, new_entry, n_ext, mtr); + rec = btr_cur_insert_if_possible(cursor, new_entry, + offsets, offsets_heap, n_ext, mtr); if (rec) { page_cursor->rec = rec; @@ -2408,20 +2519,19 @@ make_external: lock_rec_restore_from_page_infimum(btr_cur_get_block(cursor), rec, block); - offsets = rec_get_offsets(rec, index, offsets, - ULINT_UNDEFINED, heap); - - if (!rec_get_deleted_flag(rec, rec_offs_comp(offsets))) { + if (!rec_get_deleted_flag(rec, rec_offs_comp(*offsets))) { /* The new inserted record owns its possible externally stored fields */ - btr_cur_unmark_extern_fields(page_zip, - rec, index, offsets, mtr); + btr_cur_unmark_extern_fields( + page_zip, rec, index, *offsets, mtr); } - btr_cur_compress_if_useful( - cursor, - big_rec_vec != NULL && (flags & BTR_KEEP_POS_FLAG), - mtr); + bool adjust = big_rec_vec && (flags & BTR_KEEP_POS_FLAG); + + if (btr_cur_compress_if_useful(cursor, adjust, mtr) + && adjust) { + rec_offs_make_valid(page_cursor->rec, index, *offsets); + } if (page_zip && !dict_index_is_clust(index) && page_is_leaf(page)) { @@ -2440,8 +2550,7 @@ make_external: ut_a(page_zip || optim_err != DB_UNDERFLOW); /* Out of space: reset the free bits. */ - if (!dict_index_is_clust(index) - && page_is_leaf(page)) { + if (!dict_index_is_clust(index) && page_is_leaf(page)) { ibuf_reset_free_bits(block); } } @@ -2473,11 +2582,13 @@ make_external: err = btr_cur_pessimistic_insert(BTR_NO_UNDO_LOG_FLAG | BTR_NO_LOCKING_FLAG | BTR_KEEP_SYS_FLAG, - cursor, new_entry, &rec, + cursor, offsets, offsets_heap, + new_entry, &rec, &dummy_big_rec, n_ext, NULL, mtr); ut_a(rec); ut_a(err == DB_SUCCESS); ut_a(dummy_big_rec == NULL); + ut_ad(rec_offs_validate(rec, cursor->index, *offsets)); page_cursor->rec = rec; if (dict_index_is_sec_or_ibuf(index)) { @@ -2490,10 +2601,10 @@ make_external: page_update_max_trx_id(rec_block, buf_block_get_page_zip(rec_block), - trx->id, mtr); + trx_id, mtr); } - if (!rec_get_deleted_flag(rec, rec_offs_comp(offsets))) { + if (!rec_get_deleted_flag(rec, rec_offs_comp(*offsets))) { /* The new inserted record owns its possible externally stored fields */ buf_block_t* rec_block = btr_cur_get_block(cursor); @@ -2504,10 +2615,8 @@ make_external: #endif /* UNIV_ZIP_DEBUG */ page_zip = buf_block_get_page_zip(rec_block); - offsets = rec_get_offsets(rec, index, offsets, - ULINT_UNDEFINED, heap); btr_cur_unmark_extern_fields(page_zip, - rec, index, offsets, mtr); + rec, index, *offsets, mtr); } lock_rec_restore_from_page_infimum(btr_cur_get_block(cursor), @@ -2546,17 +2655,13 @@ UNIV_INLINE void btr_cur_del_mark_set_clust_rec_log( /*===============================*/ - ulint flags, /*!< in: flags */ rec_t* rec, /*!< in: record */ dict_index_t* index, /*!< in: index of the record */ - ibool val, /*!< in: value to set */ - trx_t* trx, /*!< in: deleting transaction */ + trx_id_t trx_id, /*!< in: transaction id */ roll_ptr_t roll_ptr,/*!< in: roll ptr to the undo log record */ mtr_t* mtr) /*!< in: mtr */ { byte* log_ptr; - ut_ad(flags < 256); - ut_ad(val <= 1); ut_ad(!!page_rec_is_comp(rec) == dict_table_is_comp(index->table)); @@ -2572,13 +2677,11 @@ btr_cur_del_mark_set_clust_rec_log( return; } - mach_write_to_1(log_ptr, flags); - log_ptr++; - mach_write_to_1(log_ptr, val); - log_ptr++; + *log_ptr++ = 0; + *log_ptr++ = 1; - log_ptr = row_upd_write_sys_vals_to_log(index, trx, roll_ptr, log_ptr, - mtr); + log_ptr = row_upd_write_sys_vals_to_log( + index, trx_id, roll_ptr, log_ptr, mtr); mach_write_to_2(log_ptr, page_offset(rec)); log_ptr += 2; @@ -2675,20 +2778,18 @@ of the deleting transaction, and in the roll ptr field pointer to the undo log record created. @return DB_SUCCESS, DB_LOCK_WAIT, or error number */ UNIV_INTERN -ulint +dberr_t btr_cur_del_mark_set_clust_rec( /*===========================*/ - ulint flags, /*!< in: undo logging and locking flags */ buf_block_t* block, /*!< in/out: buffer block of the record */ rec_t* rec, /*!< in/out: record */ dict_index_t* index, /*!< in: clustered index of the record */ const ulint* offsets,/*!< in: rec_get_offsets(rec) */ - ibool val, /*!< in: value to set */ que_thr_t* thr, /*!< in: query thread */ mtr_t* mtr) /*!< in: mtr */ { roll_ptr_t roll_ptr; - ulint err; + dberr_t err; page_zip_des_t* page_zip; trx_t* trx; @@ -2700,7 +2801,7 @@ btr_cur_del_mark_set_clust_rec( #ifdef UNIV_DEBUG if (btr_cur_print_record_ops && thr) { - btr_cur_trx_report(thr_get_trx(thr), index, "del mark "); + btr_cur_trx_report(thr_get_trx(thr)->id, index, "del mark "); rec_print_new(stderr, rec, offsets); } #endif /* UNIV_DEBUG */ @@ -2708,7 +2809,7 @@ btr_cur_del_mark_set_clust_rec( ut_ad(dict_index_is_clust(index)); ut_ad(!rec_get_deleted_flag(rec, rec_offs_comp(offsets))); - err = lock_clust_rec_modify_check_and_lock(flags, block, + err = lock_clust_rec_modify_check_and_lock(BTR_NO_LOCKING_FLAG, block, rec, index, offsets, thr); if (err != DB_SUCCESS) { @@ -2716,8 +2817,8 @@ btr_cur_del_mark_set_clust_rec( return(err); } - err = trx_undo_report_row_operation(flags, TRX_UNDO_MODIFY_OP, thr, - index, NULL, NULL, 0, rec, + err = trx_undo_report_row_operation(0, TRX_UNDO_MODIFY_OP, thr, + index, NULL, NULL, 0, rec, offsets, &roll_ptr); if (err != DB_SUCCESS) { @@ -2730,17 +2831,21 @@ btr_cur_del_mark_set_clust_rec( page_zip = buf_block_get_page_zip(block); - btr_blob_dbg_set_deleted_flag(rec, index, offsets, val); - btr_rec_set_deleted_flag(rec, page_zip, val); + btr_blob_dbg_set_deleted_flag(rec, index, offsets, TRUE); + btr_rec_set_deleted_flag(rec, page_zip, TRUE); trx = thr_get_trx(thr); - if (!(flags & BTR_KEEP_SYS_FLAG)) { - row_upd_rec_sys_fields(rec, page_zip, - index, offsets, trx, roll_ptr); + if (dict_index_is_online_ddl(index)) { + row_log_table_delete( + rec, index, offsets, + trx_read_trx_id(row_get_trx_id_offset(index, offsets) + + rec)); } - btr_cur_del_mark_set_clust_rec_log(flags, rec, index, val, trx, + row_upd_rec_sys_fields(rec, page_zip, index, offsets, trx, roll_ptr); + + btr_cur_del_mark_set_clust_rec_log(rec, index, trx->id, roll_ptr, mtr); return(err); @@ -2829,7 +2934,7 @@ btr_cur_parse_del_mark_set_sec_rec( Sets a secondary index record delete mark to TRUE or FALSE. @return DB_SUCCESS, DB_LOCK_WAIT, or error number */ UNIV_INTERN -ulint +dberr_t btr_cur_del_mark_set_sec_rec( /*=========================*/ ulint flags, /*!< in: locking flag */ @@ -2840,14 +2945,14 @@ btr_cur_del_mark_set_sec_rec( { buf_block_t* block; rec_t* rec; - ulint err; + dberr_t err; block = btr_cur_get_block(cursor); rec = btr_cur_get_rec(cursor); #ifdef UNIV_DEBUG if (btr_cur_print_record_ops && thr) { - btr_cur_trx_report(thr_get_trx(thr), cursor->index, + btr_cur_trx_report(thr_get_trx(thr)->id, cursor->index, "del mark "); rec_print(stderr, rec, cursor->index); } @@ -2937,12 +3042,15 @@ positioned, but no latch on the whole tree. @return TRUE if success, i.e., the page did not become too empty */ UNIV_INTERN ibool -btr_cur_optimistic_delete( -/*======================*/ +btr_cur_optimistic_delete_func( +/*===========================*/ btr_cur_t* cursor, /*!< in: cursor on leaf page, on the record to delete; cursor stays valid: if deletion succeeds, on function exit it points to the successor of the deleted record */ +#ifdef UNIV_DEBUG + ulint flags, /*!< in: BTR_CREATE_FLAG or 0 */ +#endif /* UNIV_DEBUG */ mtr_t* mtr) /*!< in: mtr; if this function returns TRUE on a leaf page of a secondary index, the mtr must be committed @@ -2956,6 +3064,7 @@ btr_cur_optimistic_delete( ibool no_compress_needed; rec_offs_init(offsets_); + ut_ad(flags == 0 || flags == BTR_CREATE_FLAG); ut_ad(mtr_memo_contains(mtr, btr_cur_get_block(cursor), MTR_MEMO_PAGE_X_FIX)); /* This is intended only for leaf page deletions */ @@ -2963,6 +3072,9 @@ btr_cur_optimistic_delete( block = btr_cur_get_block(cursor); ut_ad(page_is_leaf(buf_block_get_frame(block))); + ut_ad(!dict_index_is_online_ddl(cursor->index) + || dict_index_is_clust(cursor->index) + || (flags & BTR_CREATE_FLAG)); rec = btr_cur_get_rec(cursor); offsets = rec_get_offsets(rec, cursor->index, offsets, @@ -3030,7 +3142,7 @@ UNIV_INTERN ibool btr_cur_pessimistic_delete( /*=======================*/ - ulint* err, /*!< out: DB_SUCCESS or DB_OUT_OF_FILE_SPACE; + dberr_t* err, /*!< out: DB_SUCCESS or DB_OUT_OF_FILE_SPACE; the latter may occur because we may have to update node pointers on upper levels, and in the case of variable length keys @@ -3043,6 +3155,7 @@ btr_cur_pessimistic_delete( if compression does not occur, the cursor stays valid: it points to successor of deleted record on function exit */ + ulint flags, /*!< in: BTR_CREATE_FLAG or 0 */ enum trx_rb_ctx rb_ctx, /*!< in: rollback context */ mtr_t* mtr) /*!< in: mtr */ { @@ -3051,7 +3164,6 @@ btr_cur_pessimistic_delete( page_zip_des_t* page_zip; dict_index_t* index; rec_t* rec; - dtuple_t* node_ptr; ulint n_extents = 0; ulint n_reserved; ibool success; @@ -3064,6 +3176,10 @@ btr_cur_pessimistic_delete( page = buf_block_get_frame(block); index = btr_cur_get_index(cursor); + ut_ad(flags == 0 || flags == BTR_CREATE_FLAG); + ut_ad(!dict_index_is_online_ddl(index) + || dict_index_is_clust(index) + || (flags & BTR_CREATE_FLAG)); ut_ad(mtr_memo_contains(mtr, dict_index_get_lock(index), MTR_MEMO_X_LOCK)); ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX)); @@ -3112,13 +3228,15 @@ btr_cur_pessimistic_delete( btr_discard_page(cursor, mtr); - *err = DB_SUCCESS; ret = TRUE; goto return_after_reservations; } - lock_update_delete(block, rec); + if (flags == 0) { + lock_update_delete(block, rec); + } + level = btr_page_get_level(page, mtr); if (level > 0 @@ -3147,12 +3265,12 @@ btr_cur_pessimistic_delete( btr_node_ptr_delete(index, block, mtr); - node_ptr = dict_index_build_node_ptr( + dtuple_t* node_ptr = dict_index_build_node_ptr( index, next_rec, buf_block_get_page_no(block), heap, level); - btr_insert_on_non_leaf_level(index, - level + 1, node_ptr, mtr); + btr_insert_on_non_leaf_level( + flags, index, level + 1, node_ptr, mtr); } } @@ -3165,9 +3283,9 @@ btr_cur_pessimistic_delete( ut_ad(btr_check_node_ptr(index, block, mtr)); +return_after_reservations: *err = DB_SUCCESS; -return_after_reservations: mem_heap_free(heap); if (ret == FALSE) { @@ -3194,8 +3312,8 @@ btr_cur_add_path_info( ulint root_height) /*!< in: root node height in tree */ { btr_path_t* slot; - rec_t* rec; - page_t* page; + const rec_t* rec; + const page_t* page; ut_a(cursor->path_arr); @@ -3407,6 +3525,9 @@ btr_estimate_n_rows_in_range( ibool is_n_rows_exact; ulint i; mtr_t mtr; + ib_int64_t table_n_rows; + + table_n_rows = dict_table_get_n_rows(index->table); mtr_start(&mtr); @@ -3419,9 +3540,9 @@ btr_estimate_n_rows_in_range( &cursor, 0, __FILE__, __LINE__, &mtr); } else { - btr_cur_open_at_index_side(TRUE, index, + btr_cur_open_at_index_side(true, index, BTR_SEARCH_LEAF | BTR_ESTIMATE, - &cursor, &mtr); + &cursor, 0, &mtr); } mtr_commit(&mtr); @@ -3437,9 +3558,9 @@ btr_estimate_n_rows_in_range( &cursor, 0, __FILE__, __LINE__, &mtr); } else { - btr_cur_open_at_index_side(FALSE, index, + btr_cur_open_at_index_side(false, index, BTR_SEARCH_LEAF | BTR_ESTIMATE, - &cursor, &mtr); + &cursor, 0, &mtr); } mtr_commit(&mtr); @@ -3471,20 +3592,21 @@ btr_estimate_n_rows_in_range( n_rows = n_rows * 2; } + DBUG_EXECUTE_IF("bug14007649", return(n_rows);); + /* Do not estimate the number of rows in the range to over 1 / 2 of the estimated rows in the whole table */ - if (n_rows > index->table->stat_n_rows / 2 - && !is_n_rows_exact) { + if (n_rows > table_n_rows / 2 && !is_n_rows_exact) { - n_rows = index->table->stat_n_rows / 2; + n_rows = table_n_rows / 2; /* If there are just 0 or 1 rows in the table, then we estimate all rows are in the range */ if (n_rows == 0) { - n_rows = index->table->stat_n_rows; + n_rows = table_n_rows; } } @@ -3544,9 +3666,9 @@ btr_estimate_n_rows_in_range( /*******************************************************************//** Record the number of non_null key values in a given index for -each n-column prefix of the index where n < dict_index_get_n_unique(index). +each n-column prefix of the index where 1 <= n <= dict_index_get_n_unique(index). The estimates are eventually stored in the array: -index->stat_n_non_null_key_vals. */ +index->stat_n_non_null_key_vals[], which is indexed from 0 to n-1. */ static void btr_record_not_null_field_in_rec( @@ -3557,7 +3679,7 @@ btr_record_not_null_field_in_rec( const ulint* offsets, /*!< in: rec_get_offsets(rec, index), its size could be for all fields or that of "n_unique" */ - ib_int64_t* n_not_null) /*!< in/out: array to record number of + ib_uint64_t* n_not_null) /*!< in/out: array to record number of not null rows for n-column prefix */ { ulint i; @@ -3579,11 +3701,12 @@ btr_record_not_null_field_in_rec( /*******************************************************************//** Estimates the number of different key values in a given index, for -each n-column prefix of the index where n <= dict_index_get_n_unique(index). -The estimates are stored in the array index->stat_n_diff_key_vals[] and -the number of pages that were sampled is saved in index->stat_n_sample_sizes[]. -If innodb_stats_method is "nulls_ignored", we also record the number of -non-null values for each prefix and store the estimates in +each n-column prefix of the index where 1 <= n <= dict_index_get_n_unique(index). +The estimates are stored in the array index->stat_n_diff_key_vals[] (indexed +0..n_uniq-1) and the number of pages that were sampled is saved in +index->stat_n_sample_sizes[]. +If innodb_stats_method is nulls_ignored, we also record the number of +non-null values for each prefix and stored the estimates in array index->stat_n_non_null_key_vals. */ UNIV_INTERN void @@ -3597,8 +3720,8 @@ btr_estimate_number_of_different_key_vals( ulint n_cols; ulint matched_fields; ulint matched_bytes; - ib_int64_t* n_diff; - ib_int64_t* n_not_null; + ib_uint64_t* n_diff; + ib_uint64_t* n_not_null; ibool stats_null_not_equal; ullint n_sample_pages; /* number of pages to sample */ ulint not_empty_flag = 0; @@ -3614,13 +3737,13 @@ btr_estimate_number_of_different_key_vals( n_cols = dict_index_get_n_unique(index); heap = mem_heap_create((sizeof *n_diff + sizeof *n_not_null) - * (n_cols + 1) + * n_cols + dict_index_get_n_fields(index) * (sizeof *offsets_rec + sizeof *offsets_next_rec)); - n_diff = (ib_int64_t*) mem_heap_zalloc(heap, (n_cols + 1) - * sizeof(ib_int64_t)); + n_diff = (ib_uint64_t*) mem_heap_zalloc( + heap, n_cols * sizeof(ib_int64_t)); n_not_null = NULL; @@ -3629,8 +3752,8 @@ btr_estimate_number_of_different_key_vals( considered equal (by setting stats_null_not_equal value) */ switch (srv_innodb_stats_method) { case SRV_STATS_NULLS_IGNORED: - n_not_null = (ib_int64_t*) mem_heap_zalloc(heap, (n_cols + 1) - * sizeof *n_not_null); + n_not_null = (ib_uint64_t*) mem_heap_zalloc( + heap, n_cols * sizeof *n_not_null); /* fall through */ case SRV_STATS_NULLS_UNEQUAL: @@ -3681,7 +3804,7 @@ btr_estimate_number_of_different_key_vals( offsets_rec = rec_get_offsets(rec, index, offsets_rec, ULINT_UNDEFINED, &heap); - if (n_not_null) { + if (n_not_null != NULL) { btr_record_not_null_field_in_rec( n_cols, offsets_rec, n_not_null); } @@ -3709,14 +3832,14 @@ btr_estimate_number_of_different_key_vals( &matched_fields, &matched_bytes); - for (j = matched_fields + 1; j <= n_cols; j++) { + for (j = matched_fields; j < n_cols; j++) { /* We add one if this index record has a different prefix from the previous */ n_diff[j]++; } - if (n_not_null) { + if (n_not_null != NULL) { btr_record_not_null_field_in_rec( n_cols, offsets_next_rec, n_not_null); } @@ -3751,7 +3874,7 @@ btr_estimate_number_of_different_key_vals( if (btr_page_get_prev(page, &mtr) != FIL_NULL || btr_page_get_next(page, &mtr) != FIL_NULL) { - n_diff[n_cols]++; + n_diff[n_cols - 1]++; } } @@ -3766,7 +3889,7 @@ btr_estimate_number_of_different_key_vals( also the pages used for external storage of fields (those pages are included in index->stat_n_leaf_pages) */ - for (j = 0; j <= n_cols; j++) { + for (j = 0; j < n_cols; j++) { index->stat_n_diff_key_vals[j] = BTR_TABLE_STATS_FROM_SAMPLE( n_diff[j], index, n_sample_pages, @@ -3796,7 +3919,7 @@ btr_estimate_number_of_different_key_vals( sampled result. stat_n_non_null_key_vals[] is created and initialized to zero in dict_index_add_to_cache(), along with stat_n_diff_key_vals[] array */ - if (n_not_null != NULL && (j < n_cols)) { + if (n_not_null != NULL) { index->stat_n_non_null_key_vals[j] = BTR_TABLE_STATS_FROM_SAMPLE( n_not_null[j], index, n_sample_pages, @@ -4146,7 +4269,7 @@ The fields are stored on pages allocated from leaf node file segment of the index tree. @return DB_SUCCESS or DB_OUT_OF_FILE_SPACE */ UNIV_INTERN -enum db_err +dberr_t btr_store_big_rec_extern_fields( /*============================*/ dict_index_t* index, /*!< in: index of rec; the index tree @@ -4180,7 +4303,7 @@ btr_store_big_rec_extern_fields( z_stream c_stream; buf_block_t** freed_pages = NULL; ulint n_freed_pages = 0; - enum db_err error = DB_SUCCESS; + dberr_t error = DB_SUCCESS; ut_ad(rec_offs_validate(rec, index, offsets)); ut_ad(rec_offs_any_extern(offsets)); @@ -4211,7 +4334,7 @@ btr_store_big_rec_extern_fields( heap = mem_heap_create(250000); page_zip_set_alloc(&c_stream, heap); - err = deflateInit2(&c_stream, Z_DEFAULT_COMPRESSION, + err = deflateInit2(&c_stream, page_compression_level, Z_DEFLATED, 15, 7, Z_DEFAULT_STRATEGY); ut_a(err == Z_OK); } @@ -5083,6 +5206,7 @@ btr_copy_zblob_prefix( " page %lu space %lu\n", (ulong) fil_page_get_type(bpage->zip.data), (ulong) page_no, (ulong) space_id); + ut_ad(0); goto end_of_blob; } |