diff options
author | Karen Langford <karen.langford@oracle.com> | 2011-11-17 00:26:16 +0100 |
---|---|---|
committer | Karen Langford <karen.langford@oracle.com> | 2011-11-17 00:26:16 +0100 |
commit | e1df69f75ac478b0456bc768b61dfd412fa510e9 (patch) | |
tree | 59efaebcf87a955329cd085b04e55e05558d41ee /storage | |
parent | 8c886b3bc0002ce5c14f9ffc9c0df4d2664b032d (diff) | |
parent | aac03193362f8e68ccbc46743a52e830ce94f44f (diff) | |
download | mariadb-git-e1df69f75ac478b0456bc768b61dfd412fa510e9.tar.gz |
Merge from mysql-5.1.60-release
Diffstat (limited to 'storage')
41 files changed, 512 insertions, 1446 deletions
diff --git a/storage/innobase/btr/btr0btr.c b/storage/innobase/btr/btr0btr.c index ad99913cf3b..5079757272a 100644 --- a/storage/innobase/btr/btr0btr.c +++ b/storage/innobase/btr/btr0btr.c @@ -300,30 +300,29 @@ btr_page_alloc_for_ibuf( /****************************************************************** Allocates a new file page to be used in an index tree. NOTE: we assume that the caller has made the reservation for free extents! */ -static -ulint -btr_page_alloc_low( -/*===============*/ - /* out: allocated page number, - FIL_NULL if out of space */ + +page_t* +btr_page_alloc( +/*===========*/ + /* out: new allocated page, x-latched; + NULL if out of space */ dict_index_t* index, /* in: index */ ulint hint_page_no, /* in: hint of a good page */ byte file_direction, /* in: direction where a possible page split is made */ ulint level, /* in: level where the page is placed in the tree */ - mtr_t* mtr, /* in/out: mini-transaction - for the allocation */ - mtr_t* init_mtr) /* in/out: mini-transaction - in which the page should be - initialized (may be the same - as mtr), or NULL if it should - not be initialized (the page - at hint was previously freed - in mtr) */ + mtr_t* mtr) /* in: mtr */ { fseg_header_t* seg_header; page_t* root; + page_t* new_page; + ulint new_page_no; + + if (index->type & DICT_IBUF) { + + return(btr_page_alloc_for_ibuf(index, mtr)); + } root = btr_root_get(index, mtr); @@ -337,61 +336,19 @@ btr_page_alloc_low( reservation for free extents, and thus we know that a page can be allocated: */ - return(fseg_alloc_free_page_general(seg_header, hint_page_no, - file_direction, TRUE, - mtr, init_mtr)); -} - -/**************************************************************//** -Allocates a new file page to be used in an index tree. NOTE: we assume -that the caller has made the reservation for free extents! */ - -page_t* -btr_page_alloc( -/*===========*/ - /* out: new allocated block, x-latched; - NULL if out of space */ - dict_index_t* index, /* in: index */ - ulint hint_page_no, /* in: hint of a good page */ - byte file_direction, /* in: direction where a possible - page split is made */ - ulint level, /* in: level where the page is placed - in the tree */ - mtr_t* mtr, /* in/out: mini-transaction - for the allocation */ - mtr_t* init_mtr) /* in/out: mini-transaction - for x-latching and initializing - the page */ -{ - page_t* new_page; - ulint new_page_no; - - if (index->type & DICT_IBUF) { - - return(btr_page_alloc_for_ibuf(index, mtr)); - } - - new_page_no = btr_page_alloc_low( - index, hint_page_no, file_direction, level, mtr, init_mtr); - + new_page_no = fseg_alloc_free_page_general(seg_header, hint_page_no, + file_direction, TRUE, mtr); if (new_page_no == FIL_NULL) { return(NULL); } new_page = buf_page_get(dict_index_get_space(index), new_page_no, - RW_X_LATCH, init_mtr); + RW_X_LATCH, mtr); #ifdef UNIV_SYNC_DEBUG buf_page_dbg_add_level(new_page, SYNC_TREE_NODE_NEW); #endif /* UNIV_SYNC_DEBUG */ - if (mtr->freed_clust_leaf) { - mtr_memo_release(mtr, new_page, MTR_MEMO_FREE_CLUST_LEAF); - ut_ad(!mtr_memo_contains(mtr, buf_block_align(new_page), - MTR_MEMO_FREE_CLUST_LEAF)); - } - - ut_ad(btr_freed_leaves_validate(mtr)); return(new_page); } @@ -538,138 +495,8 @@ btr_page_free( level = btr_page_get_level(page, mtr); btr_page_free_low(index, page, level, mtr); - - /* The handling of MTR_MEMO_FREE_CLUST_LEAF assumes this. */ - ut_ad(mtr_memo_contains(mtr, buf_block_align(page), - MTR_MEMO_PAGE_X_FIX)); - - if (level == 0 && (index->type & DICT_CLUSTERED)) { - /* We may have to call btr_mark_freed_leaves() to - temporarily mark the block nonfree for invoking - btr_store_big_rec_extern_fields() after an - update. Remember that the block was freed. */ - mtr->freed_clust_leaf = TRUE; - mtr_memo_push(mtr, buf_block_align(page), - MTR_MEMO_FREE_CLUST_LEAF); - } - - ut_ad(btr_freed_leaves_validate(mtr)); } -/**************************************************************//** -Marks all MTR_MEMO_FREE_CLUST_LEAF pages nonfree or free. -For invoking btr_store_big_rec_extern_fields() after an update, -we must temporarily mark freed clustered index pages allocated, so -that off-page columns will not be allocated from them. Between the -btr_store_big_rec_extern_fields() and mtr_commit() we have to -mark the pages free again, so that no pages will be leaked. */ - -void -btr_mark_freed_leaves( -/*==================*/ - dict_index_t* index, /* in/out: clustered index */ - mtr_t* mtr, /* in/out: mini-transaction */ - ibool nonfree)/* in: TRUE=mark nonfree, FALSE=mark freed */ -{ - /* This is loosely based on mtr_memo_release(). */ - - ulint offset; - - ut_ad(index->type & DICT_CLUSTERED); - ut_ad(mtr->magic_n == MTR_MAGIC_N); - ut_ad(mtr->state == MTR_ACTIVE); - - if (!mtr->freed_clust_leaf) { - return; - } - - offset = dyn_array_get_data_size(&mtr->memo); - - while (offset > 0) { - mtr_memo_slot_t* slot; - buf_block_t* block; - - offset -= sizeof *slot; - - slot = dyn_array_get_element(&mtr->memo, offset); - - if (slot->type != MTR_MEMO_FREE_CLUST_LEAF) { - continue; - } - - /* Because btr_page_alloc() does invoke - mtr_memo_release on MTR_MEMO_FREE_CLUST_LEAF, all - blocks tagged with MTR_MEMO_FREE_CLUST_LEAF in the - memo must still be clustered index leaf tree pages. */ - block = slot->object; - ut_a(buf_block_get_space(block) - == dict_index_get_space(index)); - ut_a(fil_page_get_type(buf_block_get_frame(block)) - == FIL_PAGE_INDEX); - ut_a(btr_page_get_level(buf_block_get_frame(block), mtr) == 0); - - if (nonfree) { - /* Allocate the same page again. */ - ulint page_no; - page_no = btr_page_alloc_low( - index, buf_block_get_page_no(block), - FSP_NO_DIR, 0, mtr, NULL); - ut_a(page_no == buf_block_get_page_no(block)); - } else { - /* Assert that the page is allocated and free it. */ - btr_page_free_low(index, buf_block_get_frame(block), - 0, mtr); - } - } - - ut_ad(btr_freed_leaves_validate(mtr)); -} - -#ifdef UNIV_DEBUG -/**************************************************************//** -Validates all pages marked MTR_MEMO_FREE_CLUST_LEAF. -See btr_mark_freed_leaves(). */ - -ibool -btr_freed_leaves_validate( -/*======================*/ - /* out: TRUE if valid */ - mtr_t* mtr) /* in: mini-transaction */ -{ - ulint offset; - - ut_ad(mtr->magic_n == MTR_MAGIC_N); - ut_ad(mtr->state == MTR_ACTIVE); - - offset = dyn_array_get_data_size(&mtr->memo); - - while (offset > 0) { - mtr_memo_slot_t* slot; - buf_block_t* block; - - offset -= sizeof *slot; - - slot = dyn_array_get_element(&mtr->memo, offset); - - if (slot->type != MTR_MEMO_FREE_CLUST_LEAF) { - continue; - } - - ut_a(mtr->freed_clust_leaf); - /* Because btr_page_alloc() does invoke - mtr_memo_release on MTR_MEMO_FREE_CLUST_LEAF, all - blocks tagged with MTR_MEMO_FREE_CLUST_LEAF in the - memo must still be clustered index leaf tree pages. */ - block = slot->object; - ut_a(fil_page_get_type(buf_block_get_frame(block)) - == FIL_PAGE_INDEX); - ut_a(btr_page_get_level(buf_block_get_frame(block), mtr) == 0); - } - - return(TRUE); -} -#endif /* UNIV_DEBUG */ - /****************************************************************** Sets the child node file address in a node pointer. */ UNIV_INLINE @@ -1199,7 +1026,7 @@ btr_root_raise_and_insert( a node pointer to the new page, and then splitting the new page. */ new_page = btr_page_alloc(index, 0, FSP_NO_DIR, - btr_page_get_level(root, mtr), mtr, mtr); + btr_page_get_level(root, mtr), mtr); btr_page_create(new_page, index, mtr); @@ -1820,7 +1647,7 @@ func_start: /* 2. Allocate a new page to the index */ new_page = btr_page_alloc(cursor->index, hint_page_no, direction, - btr_page_get_level(page, mtr), mtr, mtr); + btr_page_get_level(page, mtr), mtr); btr_page_create(new_page, cursor->index, mtr); /* 3. Calculate the first record on the upper half-page, and the @@ -2121,7 +1948,7 @@ btr_node_ptr_delete( ut_a(err == DB_SUCCESS); if (!compressed) { - btr_cur_compress_if_useful(&cursor, FALSE, mtr); + btr_cur_compress_if_useful(&cursor, mtr); } } @@ -2129,10 +1956,9 @@ btr_node_ptr_delete( If page is the only on its level, this function moves its records to the father page, thus reducing the tree height. */ static -page_t* +void btr_lift_page_up( /*=============*/ - /* out: father page */ dict_index_t* index, /* in: index tree */ page_t* page, /* in: page which is the only on its level; must not be empty: use @@ -2208,8 +2034,6 @@ btr_lift_page_up( ibuf_reset_free_bits(index, father_page); ut_ad(page_validate(father_page, index)); ut_ad(btr_check_node_ptr(index, father_page, mtr)); - - return(father_page); } /***************************************************************** @@ -2226,13 +2050,11 @@ enough free extents so that the compression will always succeed if done! */ void btr_compress( /*=========*/ - btr_cur_t* cursor, /* in/out: cursor on the page to merge - or lift; the page must not be empty: - when deleting records, use btr_discard_page() - if the page would become empty */ - ibool adjust, /* in: TRUE if should adjust the - cursor position even if compression occurs */ - mtr_t* mtr) /* in/out: mini-transaction */ + btr_cur_t* cursor, /* in: cursor on the page to merge or lift; + the page must not be empty: in record delete + use btr_discard_page if the page would become + empty */ + mtr_t* mtr) /* in: mtr */ { dict_index_t* index; ulint space; @@ -2247,7 +2069,6 @@ btr_compress( rec_t* node_ptr; ulint data_size; ulint n_recs; - ulint nth_rec = 0; /* remove bogus warning */ ulint max_ins_size; ulint max_ins_size_reorg; ulint comp; @@ -2255,7 +2076,6 @@ btr_compress( page = btr_cur_get_page(cursor); index = btr_cur_get_index(cursor); comp = page_is_comp(page); - ut_a((ibool)!!comp == dict_table_is_comp(index->table)); ut_ad(mtr_memo_contains(mtr, dict_index_get_lock(index), @@ -2277,10 +2097,6 @@ btr_compress( father_page = buf_frame_align(node_ptr); ut_a(comp == page_is_comp(father_page)); - if (adjust) { - nth_rec = page_rec_get_n_recs_before(btr_cur_get_rec(cursor)); - } - /* Decide the page to which we try to merge and which will inherit the locks */ @@ -2305,8 +2121,9 @@ btr_compress( } else { /* The page is the only one on the level, lift the records to the father */ - merge_page = btr_lift_page_up(index, page, mtr); - goto func_exit; + btr_lift_page_up(index, page, mtr); + + return; } n_recs = page_get_n_recs(page); @@ -2382,10 +2199,6 @@ btr_compress( index, mtr); lock_update_merge_left(merge_page, orig_pred, page); - - if (adjust) { - nth_rec += page_rec_get_n_recs_before(orig_pred); - } } else { orig_succ = page_rec_get_next( page_get_infimum_rec(merge_page)); @@ -2406,12 +2219,6 @@ btr_compress( btr_page_free(index, page, mtr); ut_ad(btr_check_node_ptr(index, merge_page, mtr)); - -func_exit: - if (adjust) { - btr_cur_position(index, page_rec_get_nth(merge_page, nth_rec), - cursor); - } } /***************************************************************** diff --git a/storage/innobase/btr/btr0cur.c b/storage/innobase/btr/btr0cur.c index 37bb3188785..3c12e28feb6 100644 --- a/storage/innobase/btr/btr0cur.c +++ b/storage/innobase/btr/btr0cur.c @@ -1790,9 +1790,7 @@ btr_cur_pessimistic_update( /* out: DB_SUCCESS or error code */ ulint flags, /* in: undo logging, locking, and rollback flags */ - btr_cur_t* cursor, /* in/out: cursor on the record to update; - cursor may become invalid if *big_rec == NULL - || !(flags & BTR_KEEP_POS_FLAG) */ + btr_cur_t* cursor, /* in: cursor on the record to update */ big_rec_t** big_rec,/* out: big rec vector whose fields have to be stored externally by the caller, or NULL */ upd_t* update, /* in: update vector; this is allowed also @@ -1927,10 +1925,6 @@ btr_cur_pessimistic_update( err = DB_TOO_BIG_RECORD; goto return_after_reservations; } - - ut_ad(index->type & DICT_CLUSTERED); - ut_ad(btr_page_get_level(page, mtr) == 0); - ut_ad(flags & BTR_KEEP_POS_FLAG); } page_cursor = btr_cur_get_page_cur(cursor); @@ -1957,8 +1951,6 @@ btr_cur_pessimistic_update( ut_a(rec || optim_err != DB_UNDERFLOW); if (rec) { - page_cursor->rec = rec; - lock_rec_restore_from_page_infimum(rec, page); rec_set_field_extern_bits(rec, index, ext_vect, n_ext_vect, mtr); @@ -1972,30 +1964,12 @@ btr_cur_pessimistic_update( btr_cur_unmark_extern_fields(rec, mtr, offsets); } - btr_cur_compress_if_useful( - cursor, - big_rec_vec != NULL && (flags & BTR_KEEP_POS_FLAG), - mtr); + btr_cur_compress_if_useful(cursor, mtr); err = DB_SUCCESS; goto return_after_reservations; } - if (big_rec_vec) { - ut_ad(index->type & DICT_CLUSTERED); - ut_ad(btr_page_get_level(page, mtr) == 0); - ut_ad(flags & BTR_KEEP_POS_FLAG); - - /* btr_page_split_and_insert() in - btr_cur_pessimistic_insert() invokes - mtr_memo_release(mtr, index->lock, MTR_MEMO_X_LOCK). - We must keep the index->lock when we created a - big_rec, so that row_upd_clust_rec() can store the - big_rec in the same mini-transaction. */ - - mtr_x_lock(dict_index_get_lock(index), mtr); - } - if (page_cur_is_before_first(page_cursor)) { /* The record to be updated was positioned as the first user record on its page */ @@ -2016,7 +1990,6 @@ btr_cur_pessimistic_update( ut_a(rec); ut_a(err == DB_SUCCESS); ut_a(dummy_big_rec == NULL); - page_cursor->rec = rec; rec_set_field_extern_bits(rec, index, ext_vect, n_ext_vect, mtr); offsets = rec_get_offsets(rec, index, offsets, ULINT_UNDEFINED, &heap); @@ -2419,6 +2392,30 @@ btr_cur_del_unmark_for_ibuf( /*==================== B-TREE RECORD REMOVE =========================*/ /***************************************************************** +Tries to compress a page of the tree on the leaf level. It is assumed +that mtr holds an x-latch on the tree and on the cursor page. To avoid +deadlocks, mtr must also own x-latches to brothers of page, if those +brothers exist. NOTE: it is assumed that the caller has reserved enough +free extents so that the compression will always succeed if done! */ + +void +btr_cur_compress( +/*=============*/ + btr_cur_t* cursor, /* in: cursor on the page to compress; + cursor does not stay valid */ + mtr_t* mtr) /* in: mtr */ +{ + ut_ad(mtr_memo_contains(mtr, + dict_index_get_lock(btr_cur_get_index(cursor)), + MTR_MEMO_X_LOCK)); + ut_ad(mtr_memo_contains(mtr, buf_block_align(btr_cur_get_rec(cursor)), + MTR_MEMO_PAGE_X_FIX)); + ut_ad(btr_page_get_level(btr_cur_get_page(cursor), mtr) == 0); + + btr_compress(cursor, mtr); +} + +/***************************************************************** Tries to compress a page of the tree if it seems useful. It is assumed that mtr holds an x-latch on the tree and on the cursor page. To avoid deadlocks, mtr must also own x-latches to brothers of page, if those @@ -2429,12 +2426,10 @@ ibool btr_cur_compress_if_useful( /*=======================*/ /* out: TRUE if compression occurred */ - btr_cur_t* cursor, /* in/out: cursor on the page to compress; - cursor does not stay valid if !adjust and - compression occurs */ - ibool adjust, /* in: TRUE if should adjust the - cursor position even if compression occurs */ - mtr_t* mtr) /* in/out: mini-transaction */ + btr_cur_t* cursor, /* in: cursor on the page to compress; + cursor does not stay valid if compression + occurs */ + mtr_t* mtr) /* in: mtr */ { ut_ad(mtr_memo_contains(mtr, dict_index_get_lock(btr_cur_get_index(cursor)), @@ -2444,7 +2439,7 @@ btr_cur_compress_if_useful( if (btr_cur_compress_recommendation(cursor, mtr)) { - btr_compress(cursor, adjust, mtr); + btr_compress(cursor, mtr); return(TRUE); } @@ -2657,7 +2652,7 @@ return_after_reservations: mem_heap_free(heap); if (ret == FALSE) { - ret = btr_cur_compress_if_useful(cursor, FALSE, mtr); + ret = btr_cur_compress_if_useful(cursor, mtr); } if (n_extents > 0) { @@ -3449,11 +3444,6 @@ btr_store_big_rec_extern_fields( this function returns */ big_rec_t* big_rec_vec, /* in: vector containing fields to be stored externally */ - mtr_t* alloc_mtr, /* in/out: in an insert, NULL; - in an update, local_mtr for - allocating BLOB pages and - updating BLOB pointers; alloc_mtr - must not have freed any leaf pages */ mtr_t* local_mtr __attribute__((unused))) /* in: mtr containing the latch to rec and to the tree */ @@ -3474,8 +3464,6 @@ btr_store_big_rec_extern_fields( ulint i; mtr_t mtr; - ut_ad(local_mtr); - ut_ad(!alloc_mtr || alloc_mtr == local_mtr); ut_ad(rec_offs_validate(rec, index, offsets)); ut_ad(mtr_memo_contains(local_mtr, dict_index_get_lock(index), MTR_MEMO_X_LOCK)); @@ -3485,25 +3473,6 @@ btr_store_big_rec_extern_fields( space_id = buf_frame_get_space_id(rec); - if (alloc_mtr) { - /* Because alloc_mtr will be committed after - mtr, it is possible that the tablespace has been - extended when the B-tree record was updated or - inserted, or it will be extended while allocating - pages for big_rec. - - TODO: In mtr (not alloc_mtr), write a redo log record - about extending the tablespace to its current size, - and remember the current size. Whenever the tablespace - grows as pages are allocated, write further redo log - records to mtr. (Currently tablespace extension is not - covered by the redo log. If it were, the record would - only be written to alloc_mtr, which is committed after - mtr.) */ - } else { - alloc_mtr = &mtr; - } - /* We have to create a file segment to the tablespace for each field and put the pointer to the field in rec */ @@ -3530,7 +3499,7 @@ btr_store_big_rec_extern_fields( } page = btr_page_alloc(index, hint_page_no, - FSP_NO_DIR, 0, alloc_mtr, &mtr); + FSP_NO_DIR, 0, &mtr); if (page == NULL) { mtr_commit(&mtr); @@ -3584,42 +3553,37 @@ btr_store_big_rec_extern_fields( extern_len -= store_len; - if (alloc_mtr == &mtr) { #ifdef UNIV_SYNC_DEBUG - rec_page = + rec_page = #endif /* UNIV_SYNC_DEBUG */ - buf_page_get( - space_id, - buf_frame_get_page_no(data), - RW_X_LATCH, &mtr); + buf_page_get(space_id, + buf_frame_get_page_no(data), + RW_X_LATCH, &mtr); #ifdef UNIV_SYNC_DEBUG - buf_page_dbg_add_level( - rec_page, SYNC_NO_ORDER_CHECK); + buf_page_dbg_add_level(rec_page, SYNC_NO_ORDER_CHECK); #endif /* UNIV_SYNC_DEBUG */ - } - mlog_write_ulint(data + local_len + BTR_EXTERN_LEN, 0, - MLOG_4BYTES, alloc_mtr); + MLOG_4BYTES, &mtr); mlog_write_ulint(data + local_len + BTR_EXTERN_LEN + 4, big_rec_vec->fields[i].len - extern_len, - MLOG_4BYTES, alloc_mtr); + MLOG_4BYTES, &mtr); if (prev_page_no == FIL_NULL) { mlog_write_ulint(data + local_len + BTR_EXTERN_SPACE_ID, space_id, - MLOG_4BYTES, alloc_mtr); + MLOG_4BYTES, &mtr); mlog_write_ulint(data + local_len + BTR_EXTERN_PAGE_NO, page_no, - MLOG_4BYTES, alloc_mtr); + MLOG_4BYTES, &mtr); mlog_write_ulint(data + local_len + BTR_EXTERN_OFFSET, FIL_PAGE_DATA, - MLOG_4BYTES, alloc_mtr); + MLOG_4BYTES, &mtr); /* Set the bit denoting that this field in rec is stored externally */ @@ -3627,7 +3591,7 @@ btr_store_big_rec_extern_fields( rec_set_nth_field_extern_bit( rec, index, big_rec_vec->fields[i].field_no, - TRUE, alloc_mtr); + TRUE, &mtr); } prev_page_no = page_no; diff --git a/storage/innobase/fsp/fsp0fsp.c b/storage/innobase/fsp/fsp0fsp.c index d5be8fca38f..90e6ad34a9a 100644 --- a/storage/innobase/fsp/fsp0fsp.c +++ b/storage/innobase/fsp/fsp0fsp.c @@ -300,12 +300,8 @@ fseg_alloc_free_page_low( inserted there in order, into which direction they go alphabetically: FSP_DOWN, FSP_UP, FSP_NO_DIR */ - mtr_t* mtr, /* in/out: mini-transaction */ - mtr_t* init_mtr);/* in/out: mini-transaction in which the - page should be initialized - (may be the same as mtr), or NULL if it - should not be initialized (the page at hint - was previously freed in mtr) */ + mtr_t* mtr); /* in/out: mini-transaction */ + /************************************************************************** Reads the file space size stored in the header page. */ @@ -1375,43 +1371,6 @@ fsp_alloc_free_extent( return(descr); } -/**********************************************************************//** -Allocates a single free page from a space. */ -static __attribute__((nonnull)) -void -fsp_alloc_from_free_frag( -/*=====================*/ - fsp_header_t* header, /* in/out: tablespace header */ - xdes_t* descr, /* in/out: extent descriptor */ - ulint bit, /* in: slot to allocate in the extent */ - mtr_t* mtr) /* in/out: mini-transaction */ -{ - ulint frag_n_used; - - ut_ad(xdes_get_state(descr, mtr) == XDES_FREE_FRAG); - ut_a(xdes_get_bit(descr, XDES_FREE_BIT, bit, mtr)); - xdes_set_bit(descr, XDES_FREE_BIT, bit, FALSE, mtr); - - /* Update the FRAG_N_USED field */ - frag_n_used = mtr_read_ulint(header + FSP_FRAG_N_USED, MLOG_4BYTES, - mtr); - frag_n_used++; - mlog_write_ulint(header + FSP_FRAG_N_USED, frag_n_used, MLOG_4BYTES, - mtr); - if (xdes_is_full(descr, mtr)) { - /* The fragment is full: move it to another list */ - flst_remove(header + FSP_FREE_FRAG, descr + XDES_FLST_NODE, - mtr); - xdes_set_state(descr, XDES_FULL_FRAG, mtr); - - flst_add_last(header + FSP_FULL_FRAG, descr + XDES_FLST_NODE, - mtr); - mlog_write_ulint(header + FSP_FRAG_N_USED, - frag_n_used - FSP_EXTENT_SIZE, MLOG_4BYTES, - mtr); - } -} - /************************************************************************** Allocates a single free page from a space. The page is marked as used. */ static @@ -1422,22 +1381,19 @@ fsp_alloc_free_page( be allocated */ ulint space, /* in: space id */ ulint hint, /* in: hint of which page would be desirable */ - mtr_t* mtr, /* in/out: mini-transaction */ - mtr_t* init_mtr)/* in/out: mini-transaction in which the - page should be initialized - (may be the same as mtr) */ + mtr_t* mtr) /* in/out: mini-transaction */ { fsp_header_t* header; fil_addr_t first; xdes_t* descr; page_t* page; ulint free; + ulint frag_n_used; ulint page_no; ulint space_size; ibool success; ut_ad(mtr); - ut_ad(init_mtr); header = fsp_get_space_header(space, mtr); @@ -1517,21 +1473,40 @@ fsp_alloc_free_page( } } - fsp_alloc_from_free_frag(header, descr, free, mtr); + xdes_set_bit(descr, XDES_FREE_BIT, free, FALSE, mtr); + + /* Update the FRAG_N_USED field */ + frag_n_used = mtr_read_ulint(header + FSP_FRAG_N_USED, MLOG_4BYTES, + mtr); + frag_n_used++; + mlog_write_ulint(header + FSP_FRAG_N_USED, frag_n_used, MLOG_4BYTES, + mtr); + if (xdes_is_full(descr, mtr)) { + /* The fragment is full: move it to another list */ + flst_remove(header + FSP_FREE_FRAG, descr + XDES_FLST_NODE, + mtr); + xdes_set_state(descr, XDES_FULL_FRAG, mtr); + + flst_add_last(header + FSP_FULL_FRAG, descr + XDES_FLST_NODE, + mtr); + mlog_write_ulint(header + FSP_FRAG_N_USED, + frag_n_used - FSP_EXTENT_SIZE, MLOG_4BYTES, + mtr); + } /* Initialize the allocated page to the buffer pool, so that it can be obtained immediately with buf_page_get without need for a disk read. */ - buf_page_create(space, page_no, init_mtr); + buf_page_create(space, page_no, mtr); - page = buf_page_get(space, page_no, RW_X_LATCH, init_mtr); + page = buf_page_get(space, page_no, RW_X_LATCH, mtr); #ifdef UNIV_SYNC_DEBUG buf_page_dbg_add_level(page, SYNC_FSP_PAGE); #endif /* UNIV_SYNC_DEBUG */ /* Prior contents of the page should be ignored */ - fsp_init_file_page(page, init_mtr); + fsp_init_file_page(page, mtr); return(page_no); } @@ -1750,7 +1725,7 @@ fsp_alloc_seg_inode_page( space = buf_frame_get_space_id(space_header); - page_no = fsp_alloc_free_page(space, 0, mtr, mtr); + page_no = fsp_alloc_free_page(space, 0, mtr); if (page_no == FIL_NULL) { @@ -2120,8 +2095,7 @@ fseg_create_general( } if (page == 0) { - page = fseg_alloc_free_page_low(space, - inode, 0, FSP_UP, mtr, mtr); + page = fseg_alloc_free_page_low(space, inode, 0, FSP_UP, mtr); if (page == FIL_NULL) { @@ -2365,12 +2339,7 @@ fseg_alloc_free_page_low( inserted there in order, into which direction they go alphabetically: FSP_DOWN, FSP_UP, FSP_NO_DIR */ - mtr_t* mtr, /* in/out: mini-transaction */ - mtr_t* init_mtr)/* in/out: mini-transaction in which the - page should be initialized - (may be the same as mtr), or NULL if it - should not be initialized (the page at hint - was previously freed in mtr) */ + mtr_t* mtr) /* in/out: mini-transaction */ { fsp_header_t* space_header; ulint space_size; @@ -2382,6 +2351,7 @@ fseg_alloc_free_page_low( if could not be allocated */ xdes_t* ret_descr; /* the extent of the allocated page */ page_t* page; + ibool frag_page_allocated = FALSE; ibool success; ulint n; @@ -2402,8 +2372,6 @@ fseg_alloc_free_page_low( if (descr == NULL) { /* Hint outside space or too high above free limit: reset hint */ - ut_a(init_mtr); - /* The file space header page is always allocated. */ hint = 0; descr = xdes_get_descriptor(space, hint, mtr); } @@ -2415,20 +2383,15 @@ fseg_alloc_free_page_low( mtr), seg_id)) && (xdes_get_bit(descr, XDES_FREE_BIT, hint % FSP_EXTENT_SIZE, mtr) == TRUE)) { -take_hinted_page: + /* 1. We can take the hinted page =================================*/ ret_descr = descr; ret_page = hint; - /* Skip the check for extending the tablespace. If the - page hint were not within the size of the tablespace, - we would have got (descr == NULL) above and reset the hint. */ - goto got_hinted_page; /*-----------------------------------------------------------*/ - } else if (xdes_get_state(descr, mtr) == XDES_FREE - && (!init_mtr - || ((reserved - used < reserved / FSEG_FILLFACTOR) - && used >= FSEG_FRAG_LIMIT))) { + } else if ((xdes_get_state(descr, mtr) == XDES_FREE) + && ((reserved - used) < reserved / FSEG_FILLFACTOR) + && (used >= FSEG_FRAG_LIMIT)) { /* 2. We allocate the free extent from space and can take ========================================================= @@ -2446,20 +2409,8 @@ take_hinted_page: /* Try to fill the segment free list */ fseg_fill_free_list(seg_inode, space, hint + FSP_EXTENT_SIZE, mtr); - goto take_hinted_page; - /*-----------------------------------------------------------*/ - } else if (!init_mtr) { - ut_a(xdes_get_state(descr, mtr) == XDES_FREE_FRAG); - fsp_alloc_from_free_frag(space_header, descr, - hint % FSP_EXTENT_SIZE, mtr); ret_page = hint; - ret_descr = NULL; - - /* Put the page in the fragment page array of the segment */ - n = fseg_find_free_frag_page_slot(seg_inode, mtr); - ut_a(n != FIL_NULL); - fseg_set_nth_frag_page_no(seg_inode, n, ret_page, mtr); - goto got_hinted_page; + /*-----------------------------------------------------------*/ } else if ((direction != FSP_NO_DIR) && ((reserved - used) < reserved / FSEG_FILLFACTOR) && (used >= FSEG_FRAG_LIMIT) @@ -2517,9 +2468,11 @@ take_hinted_page: } else if (used < FSEG_FRAG_LIMIT) { /* 6. We allocate an individual page from the space ===================================================*/ - ret_page = fsp_alloc_free_page(space, hint, mtr, init_mtr); + ret_page = fsp_alloc_free_page(space, hint, mtr); ret_descr = NULL; + frag_page_allocated = TRUE; + if (ret_page != FIL_NULL) { /* Put the page in the fragment page array of the segment */ @@ -2529,10 +2482,6 @@ take_hinted_page: fseg_set_nth_frag_page_no(seg_inode, n, ret_page, mtr); } - - /* fsp_alloc_free_page() invoked fsp_init_file_page() - already. */ - return(ret_page); /*-----------------------------------------------------------*/ } else { /* 7. We allocate a new extent and take its first page @@ -2579,31 +2528,22 @@ take_hinted_page: } } -got_hinted_page: - { + if (!frag_page_allocated) { /* Initialize the allocated page to buffer pool, so that it can be obtained immediately with buf_page_get without need for a disk read */ - mtr_t* block_mtr = init_mtr ? init_mtr : mtr; - page = buf_page_create(space, ret_page, block_mtr); + page = buf_page_create(space, ret_page, mtr); - ut_a(page == buf_page_get(space, ret_page, RW_X_LATCH, - block_mtr)); + ut_a(page == buf_page_get(space, ret_page, RW_X_LATCH, mtr)); #ifdef UNIV_SYNC_DEBUG buf_page_dbg_add_level(page, SYNC_FSP_PAGE); #endif /* UNIV_SYNC_DEBUG */ - if (init_mtr) { - /* The prior contents of the page should be ignored */ - fsp_init_file_page(page, init_mtr); - } - } + /* The prior contents of the page should be ignored */ + fsp_init_file_page(page, mtr); - /* ret_descr == NULL if the block was allocated from free_frag - (XDES_FREE_FRAG) */ - if (ret_descr != NULL) { /* At this point we know the extent and the page offset. The extent is still in the appropriate list (FSEG_NOT_FULL or FSEG_FREE), and the page is not yet marked as used. */ @@ -2640,11 +2580,7 @@ fseg_alloc_free_page_general( with fsp_reserve_free_extents, then there is no need to do the check for this individual page */ - mtr_t* mtr, /* in/out: mini-transaction handle */ - mtr_t* init_mtr)/* in/out: mtr or another mini-transaction - in which the page should be initialized, - or NULL if this is a "fake allocation" of - a page that was previously freed in mtr */ + mtr_t* mtr) /* in/out: mini-transaction */ { fseg_inode_t* inode; ulint space; @@ -2682,8 +2618,7 @@ fseg_alloc_free_page_general( } page_no = fseg_alloc_free_page_low(buf_frame_get_space_id(inode), - inode, hint, direction, - mtr, init_mtr); + inode, hint, direction, mtr); if (!has_done_reservation) { fil_space_release_free_extents(space, n_reserved); } @@ -2711,7 +2646,7 @@ fseg_alloc_free_page( mtr_t* mtr) /* in: mtr handle */ { return(fseg_alloc_free_page_general(seg_header, hint, direction, - FALSE, mtr, mtr)); + FALSE, mtr)); } /************************************************************************** diff --git a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc index 7f82959dffd..6709f790994 100644 --- a/storage/innobase/handler/ha_innodb.cc +++ b/storage/innobase/handler/ha_innodb.cc @@ -4263,14 +4263,16 @@ calc_row_difference( /* The field has changed */ ufield = uvect->fields + n_changed; + UNIV_MEM_INVALID(ufield, sizeof *ufield); /* Let us use a dummy dfield to make the conversion from the MySQL column format to the InnoDB format */ - dict_col_copy_type_noninline(prebuilt->table->cols + i, - &dfield.type); - if (n_len != UNIV_SQL_NULL) { + dict_col_copy_type_noninline( + prebuilt->table->cols + i, + &dfield.type); + buf = row_mysql_store_col_in_innobase_format( &dfield, (byte*)buf, @@ -4281,11 +4283,13 @@ calc_row_difference( prebuilt->table)); ufield->new_val.data = dfield.data; ufield->new_val.len = dfield.len; + ufield->new_val.type = dfield.type; } else { ufield->new_val.data = NULL; ufield->new_val.len = UNIV_SQL_NULL; } + ufield->extern_storage = FALSE; ufield->exp = NULL; ufield->field_no = dict_col_get_clust_pos_noninline( &prebuilt->table->cols[i], clust_index); diff --git a/storage/innobase/include/btr0btr.h b/storage/innobase/include/btr0btr.h index 3988019589d..1573de7e818 100644 --- a/storage/innobase/include/btr0btr.h +++ b/storage/innobase/include/btr0btr.h @@ -312,13 +312,11 @@ enough free extents so that the compression will always succeed if done! */ void btr_compress( /*=========*/ - btr_cur_t* cursor, /* in/out: cursor on the page to merge - or lift; the page must not be empty: - when deleting records, use btr_discard_page() - if the page would become empty */ - ibool adjust, /* in: TRUE if should adjust the - cursor position even if compression occurs */ - mtr_t* mtr); /* in/out: mini-transaction */ + btr_cur_t* cursor, /* in: cursor on the page to merge or lift; + the page must not be empty: in record delete + use btr_discard_page if the page would become + empty */ + mtr_t* mtr); /* in: mtr */ /***************************************************************** Discards a page from a B-tree. This is used to remove the last record from a B-tree page: the whole page must be removed at the same time. This cannot @@ -379,11 +377,7 @@ btr_page_alloc( page split is made */ ulint level, /* in: level where the page is placed in the tree */ - mtr_t* mtr, /* in/out: mini-transaction - for the allocation */ - mtr_t* init_mtr); /* in/out: mini-transaction - for x-latching and initializing - the page */ + mtr_t* mtr); /* in: mtr */ /****************************************************************** Frees a file page used in an index tree. NOTE: cannot free field external storage pages because the page must contain info on its level. */ @@ -406,31 +400,6 @@ btr_page_free_low( page_t* page, /* in: page to be freed, x-latched */ ulint level, /* in: page level */ mtr_t* mtr); /* in: mtr */ -/**************************************************************//** -Marks all MTR_MEMO_FREE_CLUST_LEAF pages nonfree or free. -For invoking btr_store_big_rec_extern_fields() after an update, -we must temporarily mark freed clustered index pages allocated, so -that off-page columns will not be allocated from them. Between the -btr_store_big_rec_extern_fields() and mtr_commit() we have to -mark the pages free again, so that no pages will be leaked. */ - -void -btr_mark_freed_leaves( -/*==================*/ - dict_index_t* index, /* in/out: clustered index */ - mtr_t* mtr, /* in/out: mini-transaction */ - ibool nonfree);/* in: TRUE=mark nonfree, FALSE=mark freed */ -#ifdef UNIV_DEBUG -/**************************************************************//** -Validates all pages marked MTR_MEMO_FREE_CLUST_LEAF. -See btr_mark_freed_leaves(). */ - -ibool -btr_freed_leaves_validate( -/*======================*/ - /* out: TRUE if valid */ - mtr_t* mtr); /* in: mini-transaction */ -#endif /* UNIV_DEBUG */ #ifdef UNIV_BTR_PRINT /***************************************************************** Prints size info of a B-tree. */ diff --git a/storage/innobase/include/btr0cur.h b/storage/innobase/include/btr0cur.h index c2bf84ef9cb..20235c55f22 100644 --- a/storage/innobase/include/btr0cur.h +++ b/storage/innobase/include/btr0cur.h @@ -23,9 +23,6 @@ Created 10/16/1994 Heikki Tuuri #define BTR_NO_LOCKING_FLAG 2 /* do no record lock checking */ #define BTR_KEEP_SYS_FLAG 4 /* sys fields will be found from the update vector or inserted entry */ -#define BTR_KEEP_POS_FLAG 8 /* btr_cur_pessimistic_update() - must keep cursor position when - moving columns to big_rec */ #define BTR_CUR_ADAPT #define BTR_CUR_HASH_ADAPT @@ -240,9 +237,7 @@ btr_cur_pessimistic_update( /* out: DB_SUCCESS or error code */ ulint flags, /* in: undo logging, locking, and rollback flags */ - btr_cur_t* cursor, /* in/out: cursor on the record to update; - cursor may become invalid if *big_rec == NULL - || !(flags & BTR_KEEP_POS_FLAG) */ + btr_cur_t* cursor, /* in: cursor on the record to update */ big_rec_t** big_rec,/* out: big rec vector whose fields have to be stored externally by the caller, or NULL */ upd_t* update, /* in: update vector; this is allowed also @@ -291,6 +286,19 @@ btr_cur_del_unmark_for_ibuf( rec_t* rec, /* in: record to delete unmark */ mtr_t* mtr); /* in: mtr */ /***************************************************************** +Tries to compress a page of the tree on the leaf level. It is assumed +that mtr holds an x-latch on the tree and on the cursor page. To avoid +deadlocks, mtr must also own x-latches to brothers of page, if those +brothers exist. NOTE: it is assumed that the caller has reserved enough +free extents so that the compression will always succeed if done! */ + +void +btr_cur_compress( +/*=============*/ + btr_cur_t* cursor, /* in: cursor on the page to compress; + cursor does not stay valid */ + mtr_t* mtr); /* in: mtr */ +/***************************************************************** Tries to compress a page of the tree if it seems useful. It is assumed that mtr holds an x-latch on the tree and on the cursor page. To avoid deadlocks, mtr must also own x-latches to brothers of page, if those @@ -301,12 +309,10 @@ ibool btr_cur_compress_if_useful( /*=======================*/ /* out: TRUE if compression occurred */ - btr_cur_t* cursor, /* in/out: cursor on the page to compress; - cursor does not stay valid if !adjust and - compression occurs */ - ibool adjust, /* in: TRUE if should adjust the - cursor position even if compression occurs */ - mtr_t* mtr); /* in/out: mini-transaction */ + btr_cur_t* cursor, /* in: cursor on the page to compress; + cursor does not stay valid if compression + occurs */ + mtr_t* mtr); /* in: mtr */ /*********************************************************** Removes the record on which the tree cursor is positioned. It is assumed that the mtr has an x-latch on the page where the cursor is positioned, @@ -462,11 +468,6 @@ btr_store_big_rec_extern_fields( this function returns */ big_rec_t* big_rec_vec, /* in: vector containing fields to be stored externally */ - mtr_t* alloc_mtr, /* in/out: in an insert, NULL; - in an update, local_mtr for - allocating BLOB pages and - updating BLOB pointers; alloc_mtr - must not have freed any leaf pages */ mtr_t* local_mtr); /* in: mtr containing the latch to rec and to the tree */ /*********************************************************************** diff --git a/storage/innobase/include/buf0buf.h b/storage/innobase/include/buf0buf.h index 50e0aa9376c..0f7553a7043 100644 --- a/storage/innobase/include/buf0buf.h +++ b/storage/innobase/include/buf0buf.h @@ -654,25 +654,6 @@ buf_page_address_fold( /* out: the folded value */ ulint space, /* in: space id */ ulint offset);/* in: offset of the page within space */ -#ifdef UNIV_SYNC_DEBUG -/*********************************************************************** -Increments the bufferfix count. */ -UNIV_INLINE -void -buf_block_buf_fix_inc_debug( -/*========================*/ - buf_block_t* block, /* in: block to bufferfix */ - const char* file __attribute__ ((unused)), /* in: file name */ - ulint line __attribute__ ((unused))); /* in: line */ -#else /* UNIV_SYNC_DEBUG */ -/*********************************************************************** -Increments the bufferfix count. */ -UNIV_INLINE -void -buf_block_buf_fix_inc( -/*==================*/ - buf_block_t* block); /* in: block to bufferfix */ -#endif /* UNIV_SYNC_DEBUG */ /********************************************************************** Returns the control block of a file page, NULL if not found. */ UNIV_INLINE diff --git a/storage/innobase/include/fsp0fsp.h b/storage/innobase/include/fsp0fsp.h index 4c58d6075e6..b7322944189 100644 --- a/storage/innobase/include/fsp0fsp.h +++ b/storage/innobase/include/fsp0fsp.h @@ -179,11 +179,7 @@ fseg_alloc_free_page_general( with fsp_reserve_free_extents, then there is no need to do the check for this individual page */ - mtr_t* mtr, /* in/out: mini-transaction */ - mtr_t* init_mtr);/* in/out: mtr or another mini-transaction - in which the page should be initialized, - or NULL if this is a "fake allocation" of - a page that was previously freed in mtr */ + mtr_t* mtr); /* in/out: mini-transaction */ /************************************************************************** Reserves free pages from a tablespace. All mini-transactions which may use several pages from the tablespace should call this function beforehand diff --git a/storage/innobase/include/mtr0mtr.h b/storage/innobase/include/mtr0mtr.h index 58983063361..2b41fa0059a 100644 --- a/storage/innobase/include/mtr0mtr.h +++ b/storage/innobase/include/mtr0mtr.h @@ -36,8 +36,6 @@ first 3 values must be RW_S_LATCH, RW_X_LATCH, RW_NO_LATCH */ #define MTR_MEMO_MODIFY 54 #define MTR_MEMO_S_LOCK 55 #define MTR_MEMO_X_LOCK 56 -/* The mini-transaction freed a clustered index leaf page. */ -#define MTR_MEMO_FREE_CLUST_LEAF 57 /* Log item types: we have made them to be of the type 'byte' for the compiler to warn if val and type parameters are switched @@ -317,12 +315,9 @@ struct mtr_struct{ ulint state; /* MTR_ACTIVE, MTR_COMMITTING, MTR_COMMITTED */ dyn_array_t memo; /* memo stack for locks etc. */ dyn_array_t log; /* mini-transaction log */ - unsigned modifications:1; + ibool modifications; /* TRUE if the mtr made modifications to buffer pool pages */ - unsigned freed_clust_leaf:1; - /* TRUE if MTR_MEMO_FREE_CLUST_LEAF - was logged in the mini-transaction */ ulint n_log_recs; /* count of how many page initial log records have been written to the mtr log */ diff --git a/storage/innobase/include/mtr0mtr.ic b/storage/innobase/include/mtr0mtr.ic index 6b4cacf0766..81eec3bfc92 100644 --- a/storage/innobase/include/mtr0mtr.ic +++ b/storage/innobase/include/mtr0mtr.ic @@ -26,7 +26,6 @@ mtr_start( mtr->log_mode = MTR_LOG_ALL; mtr->modifications = FALSE; - mtr->freed_clust_leaf = FALSE; mtr->n_log_recs = 0; #ifdef UNIV_DEBUG @@ -51,8 +50,7 @@ mtr_memo_push( ut_ad(object); ut_ad(type >= MTR_MEMO_PAGE_S_FIX); - ut_ad(type <= MTR_MEMO_FREE_CLUST_LEAF); - ut_ad(type != MTR_MEMO_FREE_CLUST_LEAF || mtr->freed_clust_leaf); + ut_ad(type <= MTR_MEMO_X_LOCK); ut_ad(mtr); ut_ad(mtr->magic_n == MTR_MAGIC_N); diff --git a/storage/innobase/include/page0page.h b/storage/innobase/include/page0page.h index 24698557e77..273007c2778 100644 --- a/storage/innobase/include/page0page.h +++ b/storage/innobase/include/page0page.h @@ -234,22 +234,11 @@ page_get_supremum_rec( /*==================*/ /* out: the last record in record list */ page_t* page); /* in: page which must have record(s) */ -/************************************************************//** -Returns the nth record of the record list. -This is the inverse function of page_rec_get_n_recs_before(). */ +/**************************************************************** +Returns the middle record of record list. If there are an even number +of records in the list, returns the first record of upper half-list. */ rec_t* -page_rec_get_nth( -/*=============*/ - /* out: nth record */ - page_t* page, /* in: page */ - ulint nth); /* in: nth record */ -/***************************************************************** -Returns the middle record of the records on the page. If there is an -even number of records in the list, returns the first record of the -upper half-list. */ -UNIV_INLINE -rec_t* page_get_middle_rec( /*================*/ /* out: middle record */ @@ -291,8 +280,7 @@ page_get_n_recs( page_t* page); /* in: index page */ /******************************************************************* Returns the number of records before the given record in chain. -The number includes infimum and supremum records. -This is the inverse function of page_rec_get_nth(). */ +The number includes infimum and supremum records. */ ulint page_rec_get_n_recs_before( diff --git a/storage/innobase/include/page0page.ic b/storage/innobase/include/page0page.ic index a019aa28515..d9e67f3eeeb 100644 --- a/storage/innobase/include/page0page.ic +++ b/storage/innobase/include/page0page.ic @@ -341,22 +341,6 @@ page_rec_is_infimum( } /***************************************************************** -Returns the middle record of the records on the page. If there is an -even number of records in the list, returns the first record of the -upper half-list. */ -UNIV_INLINE -rec_t* -page_get_middle_rec( -/*================*/ - /* out: middle record */ - page_t* page) /* in: page */ -{ - ulint middle = (page_get_n_recs(page) + 2) / 2; - - return(page_rec_get_nth(page, middle)); -} - -/***************************************************************** Compares a data tuple to a physical record. Differs from the function cmp_dtuple_rec_with_match in the way that the record must reside on an index page, and also page infimum and supremum records can be given in diff --git a/storage/innobase/mtr/mtr0mtr.c b/storage/innobase/mtr/mtr0mtr.c index 33b71f0766c..728c37ce564 100644 --- a/storage/innobase/mtr/mtr0mtr.c +++ b/storage/innobase/mtr/mtr0mtr.c @@ -53,13 +53,17 @@ mtr_memo_slot_release( buf_page_release((buf_block_t*)object, type, mtr); } else if (type == MTR_MEMO_S_LOCK) { rw_lock_s_unlock((rw_lock_t*)object); - } else if (type != MTR_MEMO_X_LOCK) { - ut_ad(type == MTR_MEMO_MODIFY - || type == MTR_MEMO_FREE_CLUST_LEAF); +#ifdef UNIV_DEBUG + } else if (type == MTR_MEMO_X_LOCK) { + rw_lock_x_unlock((rw_lock_t*)object); + } else { + ut_ad(type == MTR_MEMO_MODIFY); ut_ad(mtr_memo_contains(mtr, object, MTR_MEMO_PAGE_X_FIX)); +#else } else { rw_lock_x_unlock((rw_lock_t*)object); +#endif } } diff --git a/storage/innobase/page/page0page.c b/storage/innobase/page/page0page.c index 6a89df7de22..543cf9e34eb 100644 --- a/storage/innobase/page/page0page.c +++ b/storage/innobase/page/page0page.c @@ -1194,42 +1194,49 @@ page_dir_balance_slot( } /**************************************************************** -Returns the nth record of the record list. */ +Returns the middle record of the record list. If there are an even number +of records in the list, returns the first record of the upper half-list. */ rec_t* -page_rec_get_nth( -/*=============*/ - /* out: nth record */ - page_t* page, /* in: page */ - ulint nth) /* in: nth record */ +page_get_middle_rec( +/*================*/ + /* out: middle record */ + page_t* page) /* in: page */ { page_dir_slot_t* slot; + ulint middle; ulint i; ulint n_owned; + ulint count; rec_t* rec; - ut_ad(nth < UNIV_PAGE_SIZE / (REC_N_NEW_EXTRA_BYTES + 1)); + /* This many records we must leave behind */ + middle = (page_get_n_recs(page) + 2) / 2; + + count = 0; for (i = 0;; i++) { slot = page_dir_get_nth_slot(page, i); n_owned = page_dir_slot_get_n_owned(slot); - if (n_owned > nth) { + if (count + n_owned > middle) { break; } else { - nth -= n_owned; + count += n_owned; } } ut_ad(i > 0); slot = page_dir_get_nth_slot(page, i - 1); rec = page_dir_slot_get_rec(slot); + rec = page_rec_get_next(rec); + + /* There are now count records behind rec */ - do { + for (i = 0; i < middle - count; i++) { rec = page_rec_get_next(rec); - ut_ad(rec); - } while (nth--); + } return(rec); } diff --git a/storage/innobase/row/row0ins.c b/storage/innobase/row/row0ins.c index c18b6995066..db134ca7a41 100644 --- a/storage/innobase/row/row0ins.c +++ b/storage/innobase/row/row0ins.c @@ -259,7 +259,6 @@ row_ins_sec_index_entry_by_modify( err = btr_cur_pessimistic_update(BTR_KEEP_SYS_FLAG, cursor, &dummy_big_rec, update, 0, thr, mtr); - ut_a(!dummy_big_rec); } func_exit: mem_heap_free(heap); @@ -330,9 +329,8 @@ row_ins_clust_index_entry_by_modify( goto func_exit; } - err = btr_cur_pessimistic_update( - BTR_KEEP_POS_FLAG, cursor, big_rec, update, - 0, thr, mtr); + err = btr_cur_pessimistic_update(0, cursor, big_rec, update, + 0, thr, mtr); } func_exit: mem_heap_free(heap); @@ -425,11 +423,9 @@ row_ins_cascade_calc_update_vec( dict_table_t* table = foreign->foreign_table; dict_index_t* index = foreign->foreign_index; upd_t* update; - upd_field_t* ufield; dict_table_t* parent_table; dict_index_t* parent_index; upd_t* parent_update; - upd_field_t* parent_ufield; ulint n_fields_updated; ulint parent_field_no; ulint i; @@ -465,12 +461,14 @@ row_ins_cascade_calc_update_vec( dict_index_get_nth_col_no(parent_index, i)); for (j = 0; j < parent_update->n_fields; j++) { - parent_ufield = parent_update->fields + j; + const upd_field_t* parent_ufield + = &parent_update->fields[j]; if (parent_ufield->field_no == parent_field_no) { ulint min_size; const dict_col_t* col; + upd_field_t* ufield; col = dict_index_get_nth_col(index, i); @@ -975,10 +973,9 @@ row_ins_foreign_check_on_constraint( goto nonstandard_exit_func; } - if ((node->is_delete - && (foreign->type & DICT_FOREIGN_ON_DELETE_SET_NULL)) - || (!node->is_delete - && (foreign->type & DICT_FOREIGN_ON_UPDATE_SET_NULL))) { + if (node->is_delete + ? (foreign->type & DICT_FOREIGN_ON_DELETE_SET_NULL) + : (foreign->type & DICT_FOREIGN_ON_UPDATE_SET_NULL)) { /* Build the appropriate update vector which sets foreign->n_fields first fields in rec to SQL NULL */ @@ -987,6 +984,8 @@ row_ins_foreign_check_on_constraint( update->info_bits = 0; update->n_fields = foreign->n_fields; + UNIV_MEM_INVALID(update->fields, + update->n_fields * sizeof *update->fields); for (i = 0; i < foreign->n_fields; i++) { (update->fields + i)->field_no @@ -2085,50 +2084,6 @@ row_ins_index_entry_low( err = row_ins_clust_index_entry_by_modify( mode, &cursor, &big_rec, entry, ext_vec, n_ext_vec, thr, &mtr); - - if (big_rec) { - ut_a(err == DB_SUCCESS); - /* Write out the externally stored - columns, but allocate the pages and - write the pointers using the - mini-transaction of the record update. - If any pages were freed in the update, - temporarily mark them allocated so - that off-page columns will not - overwrite them. We must do this, - because we will write the redo log for - the BLOB writes before writing the - redo log for the record update. Thus, - redo log application at crash recovery - will see BLOBs being written to free pages. */ - - btr_mark_freed_leaves(index, &mtr, TRUE); - - rec = btr_cur_get_rec(&cursor); - offsets = rec_get_offsets(rec, index, offsets, - ULINT_UNDEFINED, - &heap); - - err = btr_store_big_rec_extern_fields( - index, rec, offsets, big_rec, - &mtr, &mtr); - /* If writing big_rec fails (for - example, because of DB_OUT_OF_FILE_SPACE), - the record will be corrupted. Even if - we did not update any externally - stored columns, our update could cause - the record to grow so that a - non-updated column was selected for - external storage. This non-update - would not have been written to the - undo log, and thus the record cannot - be rolled back. */ - ut_a(err == DB_SUCCESS); - /* Free the pages again - in order to avoid a leak. */ - btr_mark_freed_leaves(index, &mtr, FALSE); - goto stored_big_rec; - } } else { err = row_ins_sec_index_entry_by_modify( mode, &cursor, entry, thr, &mtr); @@ -2165,6 +2120,7 @@ function_exit: mtr_commit(&mtr); if (big_rec) { + rec_t* rec; mtr_start(&mtr); btr_cur_search_to_nth_level(index, 0, entry, PAGE_CUR_LE, @@ -2174,9 +2130,8 @@ function_exit: ULINT_UNDEFINED, &heap); err = btr_store_big_rec_extern_fields(index, rec, - offsets, big_rec, - NULL, &mtr); -stored_big_rec: + offsets, big_rec, &mtr); + if (modify) { dtuple_big_rec_free(big_rec); } else { diff --git a/storage/innobase/row/row0row.c b/storage/innobase/row/row0row.c index ccb3c1f7781..171039e34ac 100644 --- a/storage/innobase/row/row0row.c +++ b/storage/innobase/row/row0row.c @@ -212,27 +212,23 @@ row_build( } #if defined UNIV_DEBUG || defined UNIV_BLOB_LIGHT_DEBUG - if (rec_offs_any_null_extern(rec, offsets)) { - /* This condition can occur during crash recovery - before trx_rollback_or_clean_all_without_sess() has - completed execution. - - This condition is possible if the server crashed - during an insert or update before - btr_store_big_rec_extern_fields() did mtr_commit() all - BLOB pointers to the clustered index record. - - If the record contains a null BLOB pointer, look up the - transaction that holds the implicit lock on this record, and - assert that it is active. (In this version of InnoDB, we - cannot assert that it was recovered, because there is no - trx->is_recovered field.) */ - - ut_a(trx_assert_active( - row_get_rec_trx_id(rec, index, offsets))); - ut_a(trx_undo_roll_ptr_is_insert( - row_get_rec_roll_ptr(rec, index, offsets))); - } + /* This condition can occur during crash recovery before + trx_rollback_or_clean_all_without_sess() has completed + execution. + + This condition is possible if the server crashed + during an insert or update before + btr_store_big_rec_extern_fields() did mtr_commit() all + BLOB pointers to the clustered index record. + + If the record contains a null BLOB pointer, look up the + transaction that holds the implicit lock on this record, and + assert that it is active. (In this version of InnoDB, we + cannot assert that it was recovered, because there is no + trx->is_recovered field.) */ + + ut_a(!rec_offs_any_null_extern(rec, offsets) + || trx_assert_active(row_get_rec_trx_id(rec, index, offsets))); #endif /* UNIV_DEBUG || UNIV_BLOB_LIGHT_DEBUG */ if (type != ROW_COPY_POINTERS) { diff --git a/storage/innobase/row/row0umod.c b/storage/innobase/row/row0umod.c index 0b00aa2411a..a3333fcc536 100644 --- a/storage/innobase/row/row0umod.c +++ b/storage/innobase/row/row0umod.c @@ -119,7 +119,6 @@ row_undo_mod_clust_low( | BTR_KEEP_SYS_FLAG, btr_cur, &dummy_big_rec, node->update, node->cmpl_info, thr, mtr); - ut_ad(!dummy_big_rec); } return(err); @@ -472,7 +471,6 @@ row_undo_mod_del_unmark_sec_and_undo_update( BTR_KEEP_SYS_FLAG | BTR_NO_LOCKING_FLAG, btr_cur, &dummy_big_rec, update, 0, thr, &mtr); - ut_ad(!dummy_big_rec); } mem_heap_free(heap); diff --git a/storage/innobase/row/row0upd.c b/storage/innobase/row/row0upd.c index 58739edfd98..0790cfe02e2 100644 --- a/storage/innobase/row/row0upd.c +++ b/storage/innobase/row/row0upd.c @@ -1580,51 +1580,32 @@ row_upd_clust_rec( ut_ad(!rec_get_deleted_flag(btr_pcur_get_rec(pcur), dict_table_is_comp(index->table))); - err = btr_cur_pessimistic_update( - BTR_NO_LOCKING_FLAG | BTR_KEEP_POS_FLAG, btr_cur, - &big_rec, node->update, node->cmpl_info, thr, mtr); + err = btr_cur_pessimistic_update(BTR_NO_LOCKING_FLAG, btr_cur, + &big_rec, node->update, + node->cmpl_info, thr, mtr); + mtr_commit(mtr); - if (big_rec) { + if (err == DB_SUCCESS && big_rec) { mem_heap_t* heap = NULL; ulint offsets_[REC_OFFS_NORMAL_SIZE]; rec_t* rec; *offsets_ = (sizeof offsets_) / sizeof *offsets_; - ut_a(err == DB_SUCCESS); - /* Write out the externally stored columns, but - allocate the pages and write the pointers using the - mini-transaction of the record update. If any pages - were freed in the update, temporarily mark them - allocated so that off-page columns will not overwrite - them. We must do this, because we write the redo log - for the BLOB writes before writing the redo log for - the record update. */ - - btr_mark_freed_leaves(index, mtr, TRUE); + mtr_start(mtr); + + ut_a(btr_pcur_restore_position(BTR_MODIFY_TREE, pcur, mtr)); rec = btr_cur_get_rec(btr_cur); err = btr_store_big_rec_extern_fields( index, rec, rec_get_offsets(rec, index, offsets_, ULINT_UNDEFINED, &heap), - big_rec, mtr, mtr); + big_rec, mtr); if (UNIV_LIKELY_NULL(heap)) { mem_heap_free(heap); } - /* If writing big_rec fails (for example, because of - DB_OUT_OF_FILE_SPACE), the record will be corrupted. - Even if we did not update any externally stored - columns, our update could cause the record to grow so - that a non-updated column was selected for external - storage. This non-update would not have been written - to the undo log, and thus the record cannot be rolled - back. */ - ut_a(err == DB_SUCCESS); - /* Free the pages again in order to avoid a leak. */ - btr_mark_freed_leaves(index, mtr, FALSE); + mtr_commit(mtr); } - mtr_commit(mtr); - if (big_rec) { dtuple_big_rec_free(big_rec); } diff --git a/storage/innobase/trx/trx0undo.c b/storage/innobase/trx/trx0undo.c index ce09862f317..329565943c8 100644 --- a/storage/innobase/trx/trx0undo.c +++ b/storage/innobase/trx/trx0undo.c @@ -864,7 +864,7 @@ trx_undo_add_page( page_no = fseg_alloc_free_page_general(header_page + TRX_UNDO_SEG_HDR + TRX_UNDO_FSEG_HEADER, undo->top_page_no + 1, FSP_UP, - TRUE, mtr, mtr); + TRUE, mtr); fil_space_release_free_extents(undo->space, n_reserved); diff --git a/storage/innodb_plugin/ChangeLog b/storage/innodb_plugin/ChangeLog index a7c9c6e3212..951a1bd9c3b 100644 --- a/storage/innodb_plugin/ChangeLog +++ b/storage/innodb_plugin/ChangeLog @@ -17,7 +17,7 @@ 2011-10-20 The InnoDB Team - * btr/brt0cur.c: + * btr/btr0cur.c: Fix Bug#13116045 Compilation failure using GCC 4.6.1 in btr/btr0cur.c 2011-10-12 The InnoDB Team diff --git a/storage/innodb_plugin/btr/btr0btr.c b/storage/innodb_plugin/btr/btr0btr.c index 71e1599d19e..23729c12c1a 100644 --- a/storage/innodb_plugin/btr/btr0btr.c +++ b/storage/innodb_plugin/btr/btr0btr.c @@ -906,29 +906,28 @@ btr_page_alloc_for_ibuf( /**************************************************************//** Allocates a new file page to be used in an index tree. NOTE: we assume that the caller has made the reservation for free extents! -@return allocated page number, FIL_NULL if out of space */ -static __attribute__((nonnull(1,5), warn_unused_result)) -ulint -btr_page_alloc_low( -/*===============*/ +@return new allocated block, x-latched; NULL if out of space */ +UNIV_INTERN +buf_block_t* +btr_page_alloc( +/*===========*/ dict_index_t* index, /*!< in: index */ ulint hint_page_no, /*!< in: hint of a good page */ byte file_direction, /*!< in: direction where a possible page split is made */ ulint level, /*!< in: level where the page is placed in the tree */ - mtr_t* mtr, /*!< in/out: mini-transaction - for the allocation */ - mtr_t* init_mtr) /*!< in/out: mini-transaction - in which the page should be - initialized (may be the same - as mtr), or NULL if it should - not be initialized (the page - at hint was previously freed - in mtr) */ + mtr_t* mtr) /*!< in: mtr */ { fseg_header_t* seg_header; page_t* root; + buf_block_t* new_block; + ulint new_page_no; + + if (dict_index_is_ibuf(index)) { + + return(btr_page_alloc_for_ibuf(index, mtr)); + } root = btr_root_get(index, mtr); @@ -942,42 +941,8 @@ btr_page_alloc_low( reservation for free extents, and thus we know that a page can be allocated: */ - return(fseg_alloc_free_page_general( - seg_header, hint_page_no, file_direction, - TRUE, mtr, init_mtr)); -} - -/**************************************************************//** -Allocates a new file page to be used in an index tree. NOTE: we assume -that the caller has made the reservation for free extents! -@return new allocated block, x-latched; NULL if out of space */ -UNIV_INTERN -buf_block_t* -btr_page_alloc( -/*===========*/ - dict_index_t* index, /*!< in: index */ - ulint hint_page_no, /*!< in: hint of a good page */ - byte file_direction, /*!< in: direction where a possible - page split is made */ - ulint level, /*!< in: level where the page is placed - in the tree */ - mtr_t* mtr, /*!< in/out: mini-transaction - for the allocation */ - mtr_t* init_mtr) /*!< in/out: mini-transaction - for x-latching and initializing - the page */ -{ - buf_block_t* new_block; - ulint new_page_no; - - if (dict_index_is_ibuf(index)) { - - return(btr_page_alloc_for_ibuf(index, mtr)); - } - - new_page_no = btr_page_alloc_low( - index, hint_page_no, file_direction, level, mtr, init_mtr); - + new_page_no = fseg_alloc_free_page_general(seg_header, hint_page_no, + file_direction, TRUE, mtr); if (new_page_no == FIL_NULL) { return(NULL); @@ -985,16 +950,9 @@ btr_page_alloc( new_block = buf_page_get(dict_index_get_space(index), dict_table_zip_size(index->table), - new_page_no, RW_X_LATCH, init_mtr); + new_page_no, RW_X_LATCH, mtr); buf_block_dbg_add_level(new_block, SYNC_TREE_NODE_NEW); - if (mtr->freed_clust_leaf) { - mtr_memo_release(mtr, new_block, MTR_MEMO_FREE_CLUST_LEAF); - ut_ad(!mtr_memo_contains(mtr, new_block, - MTR_MEMO_FREE_CLUST_LEAF)); - } - - ut_ad(btr_freed_leaves_validate(mtr)); return(new_block); } @@ -1129,139 +1087,12 @@ btr_page_free( buf_block_t* block, /*!< in: block to be freed, x-latched */ mtr_t* mtr) /*!< in: mtr */ { - const page_t* page = buf_block_get_frame(block); - ulint level = btr_page_get_level(page, mtr); - - ut_ad(fil_page_get_type(block->frame) == FIL_PAGE_INDEX); - btr_page_free_low(index, block, level, mtr); - - /* The handling of MTR_MEMO_FREE_CLUST_LEAF assumes this. */ - ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX)); - - if (level == 0 && dict_index_is_clust(index)) { - /* We may have to call btr_mark_freed_leaves() to - temporarily mark the block nonfree for invoking - btr_store_big_rec_extern_fields_func() after an - update. Remember that the block was freed. */ - mtr->freed_clust_leaf = TRUE; - mtr_memo_push(mtr, block, MTR_MEMO_FREE_CLUST_LEAF); - } - - ut_ad(btr_freed_leaves_validate(mtr)); -} - -/**************************************************************//** -Marks all MTR_MEMO_FREE_CLUST_LEAF pages nonfree or free. -For invoking btr_store_big_rec_extern_fields() after an update, -we must temporarily mark freed clustered index pages allocated, so -that off-page columns will not be allocated from them. Between the -btr_store_big_rec_extern_fields() and mtr_commit() we have to -mark the pages free again, so that no pages will be leaked. */ -UNIV_INTERN -void -btr_mark_freed_leaves( -/*==================*/ - dict_index_t* index, /*!< in/out: clustered index */ - mtr_t* mtr, /*!< in/out: mini-transaction */ - ibool nonfree)/*!< in: TRUE=mark nonfree, FALSE=mark freed */ -{ - /* This is loosely based on mtr_memo_release(). */ - - ulint offset; - - ut_ad(dict_index_is_clust(index)); - ut_ad(mtr->magic_n == MTR_MAGIC_N); - ut_ad(mtr->state == MTR_ACTIVE); - - if (!mtr->freed_clust_leaf) { - return; - } - - offset = dyn_array_get_data_size(&mtr->memo); - - while (offset > 0) { - mtr_memo_slot_t* slot; - buf_block_t* block; - - offset -= sizeof *slot; - - slot = dyn_array_get_element(&mtr->memo, offset); - - if (slot->type != MTR_MEMO_FREE_CLUST_LEAF) { - continue; - } - - /* Because btr_page_alloc() does invoke - mtr_memo_release on MTR_MEMO_FREE_CLUST_LEAF, all - blocks tagged with MTR_MEMO_FREE_CLUST_LEAF in the - memo must still be clustered index leaf tree pages. */ - block = slot->object; - ut_a(buf_block_get_space(block) - == dict_index_get_space(index)); - ut_a(fil_page_get_type(buf_block_get_frame(block)) - == FIL_PAGE_INDEX); - ut_a(page_is_leaf(buf_block_get_frame(block))); - - if (nonfree) { - /* Allocate the same page again. */ - ulint page_no; - page_no = btr_page_alloc_low( - index, buf_block_get_page_no(block), - FSP_NO_DIR, 0, mtr, NULL); - ut_a(page_no == buf_block_get_page_no(block)); - } else { - /* Assert that the page is allocated and free it. */ - btr_page_free_low(index, block, 0, mtr); - } - } - - ut_ad(btr_freed_leaves_validate(mtr)); -} - -#ifdef UNIV_DEBUG -/**************************************************************//** -Validates all pages marked MTR_MEMO_FREE_CLUST_LEAF. -@see btr_mark_freed_leaves() -@return TRUE */ -UNIV_INTERN -ibool -btr_freed_leaves_validate( -/*======================*/ - mtr_t* mtr) /*!< in: mini-transaction */ -{ - ulint offset; - - ut_ad(mtr->magic_n == MTR_MAGIC_N); - ut_ad(mtr->state == MTR_ACTIVE); - - offset = dyn_array_get_data_size(&mtr->memo); - - while (offset > 0) { - const mtr_memo_slot_t* slot; - const buf_block_t* block; - - offset -= sizeof *slot; - - slot = dyn_array_get_element(&mtr->memo, offset); - - if (slot->type != MTR_MEMO_FREE_CLUST_LEAF) { - continue; - } + ulint level; - ut_a(mtr->freed_clust_leaf); - /* Because btr_page_alloc() does invoke - mtr_memo_release on MTR_MEMO_FREE_CLUST_LEAF, all - blocks tagged with MTR_MEMO_FREE_CLUST_LEAF in the - memo must still be clustered index leaf tree pages. */ - block = slot->object; - ut_a(fil_page_get_type(buf_block_get_frame(block)) - == FIL_PAGE_INDEX); - ut_a(page_is_leaf(buf_block_get_frame(block))); - } + level = btr_page_get_level(buf_block_get_frame(block), mtr); - return(TRUE); + btr_page_free_low(index, block, level, mtr); } -#endif /* UNIV_DEBUG */ /**************************************************************//** Sets the child node file address in a node pointer. */ @@ -1984,7 +1815,7 @@ btr_root_raise_and_insert( level = btr_page_get_level(root, mtr); - new_block = btr_page_alloc(index, 0, FSP_NO_DIR, level, mtr, mtr); + new_block = btr_page_alloc(index, 0, FSP_NO_DIR, level, mtr); new_page = buf_block_get_frame(new_block); new_page_zip = buf_block_get_page_zip(new_block); ut_a(!new_page_zip == !root_page_zip); @@ -2720,7 +2551,7 @@ func_start: /* 2. Allocate a new page to the index */ new_block = btr_page_alloc(cursor->index, hint_page_no, direction, - btr_page_get_level(page, mtr), mtr, mtr); + btr_page_get_level(page, mtr), mtr); new_page = buf_block_get_frame(new_block); new_page_zip = buf_block_get_page_zip(new_block); btr_page_create(new_block, new_page_zip, cursor->index, @@ -3170,16 +3001,15 @@ btr_node_ptr_delete( ut_a(err == DB_SUCCESS); if (!compressed) { - btr_cur_compress_if_useful(&cursor, FALSE, mtr); + btr_cur_compress_if_useful(&cursor, mtr); } } /*************************************************************//** If page is the only on its level, this function moves its records to the -father page, thus reducing the tree height. -@return father block */ +father page, thus reducing the tree height. */ static -buf_block_t* +void btr_lift_page_up( /*=============*/ dict_index_t* index, /*!< in: index tree */ @@ -3296,8 +3126,6 @@ btr_lift_page_up( } ut_ad(page_validate(father_page, index)); ut_ad(btr_check_node_ptr(index, father_block, mtr)); - - return(father_block); } /*************************************************************//** @@ -3314,13 +3142,11 @@ UNIV_INTERN ibool btr_compress( /*=========*/ - btr_cur_t* cursor, /*!< in/out: cursor on the page to merge - or lift; the page must not be empty: - when deleting records, use btr_discard_page() - if the page would become empty */ - ibool adjust, /*!< in: TRUE if should adjust the - cursor position even if compression occurs */ - mtr_t* mtr) /*!< in/out: mini-transaction */ + btr_cur_t* cursor, /*!< in: cursor on the page to merge or lift; + the page must not be empty: in record delete + use btr_discard_page if the page would become + empty */ + mtr_t* mtr) /*!< in: mtr */ { dict_index_t* index; ulint space; @@ -3338,14 +3164,12 @@ btr_compress( ulint* offsets; ulint data_size; ulint n_recs; - ulint nth_rec = 0; /* remove bogus warning */ ulint max_ins_size; ulint max_ins_size_reorg; block = btr_cur_get_block(cursor); page = btr_cur_get_page(cursor); index = btr_cur_get_index(cursor); - ut_a((ibool) !!page_is_comp(page) == dict_table_is_comp(index->table)); ut_ad(mtr_memo_contains(mtr, dict_index_get_lock(index), @@ -3366,10 +3190,6 @@ btr_compress( offsets = btr_page_get_father_block(NULL, heap, index, block, mtr, &father_cursor); - if (adjust) { - nth_rec = page_rec_get_n_recs_before(btr_cur_get_rec(cursor)); - } - /* Decide the page to which we try to merge and which will inherit the locks */ @@ -3396,9 +3216,9 @@ btr_compress( } else { /* The page is the only one on the level, lift the records to the father */ - - merge_block = btr_lift_page_up(index, block, mtr); - goto func_exit; + btr_lift_page_up(index, block, mtr); + mem_heap_free(heap); + return(TRUE); } n_recs = page_get_n_recs(page); @@ -3480,10 +3300,6 @@ err_exit: btr_node_ptr_delete(index, block, mtr); lock_update_merge_left(merge_block, orig_pred, block); - - if (adjust) { - nth_rec += page_rec_get_n_recs_before(orig_pred); - } } else { rec_t* orig_succ; #ifdef UNIV_BTR_DEBUG @@ -3548,6 +3364,7 @@ err_exit: } btr_blob_dbg_remove(page, index, "btr_compress"); + mem_heap_free(heap); if (!dict_index_is_clust(index) && page_is_leaf(merge_page)) { /* Update the free bits of the B-tree page in the @@ -3599,16 +3416,6 @@ err_exit: btr_page_free(index, block, mtr); ut_ad(btr_check_node_ptr(index, merge_block, mtr)); -func_exit: - mem_heap_free(heap); - - if (adjust) { - btr_cur_position( - index, - page_rec_get_nth(merge_block->frame, nth_rec), - merge_block, cursor); - } - return(TRUE); } diff --git a/storage/innodb_plugin/btr/btr0cur.c b/storage/innodb_plugin/btr/btr0cur.c index 1e603a6fc81..9f4be053a43 100644 --- a/storage/innodb_plugin/btr/btr0cur.c +++ b/storage/innodb_plugin/btr/btr0cur.c @@ -2099,9 +2099,7 @@ btr_cur_pessimistic_update( /*=======================*/ ulint flags, /*!< in: undo logging, locking, and rollback flags */ - btr_cur_t* cursor, /*!< in/out: cursor on the record to update; - cursor may become invalid if *big_rec == NULL - || !(flags & BTR_KEEP_POS_FLAG) */ + btr_cur_t* cursor, /*!< in: cursor on the record to update */ mem_heap_t** heap, /*!< in/out: pointer to memory heap, or NULL */ big_rec_t** big_rec,/*!< out: big rec vector whose fields have to be stored externally by the caller, or NULL */ @@ -2240,7 +2238,7 @@ btr_cur_pessimistic_update( record to be inserted: we have to remember which fields were such */ ut_ad(!page_is_comp(page) || !rec_get_node_ptr_flag(rec)); - ut_ad(rec_offs_validate(rec, index, offsets)); + offsets = rec_get_offsets(rec, index, offsets, ULINT_UNDEFINED, heap); n_ext += btr_push_update_extern_fields(new_entry, update, *heap); if (UNIV_LIKELY_NULL(page_zip)) { @@ -2263,10 +2261,6 @@ make_external: err = DB_TOO_BIG_RECORD; goto return_after_reservations; } - - ut_ad(page_is_leaf(page)); - ut_ad(dict_index_is_clust(index)); - ut_ad(flags & BTR_KEEP_POS_FLAG); } /* Store state of explicit locks on rec on the page infimum record, @@ -2294,8 +2288,6 @@ make_external: rec = btr_cur_insert_if_possible(cursor, new_entry, n_ext, mtr); if (rec) { - page_cursor->rec = rec; - lock_rec_restore_from_page_infimum(btr_cur_get_block(cursor), rec, block); @@ -2309,10 +2301,7 @@ make_external: rec, index, offsets, mtr); } - btr_cur_compress_if_useful( - cursor, - big_rec_vec != NULL && (flags & BTR_KEEP_POS_FLAG), - mtr); + btr_cur_compress_if_useful(cursor, mtr); if (page_zip && !dict_index_is_clust(index) && page_is_leaf(page)) { @@ -2332,21 +2321,6 @@ make_external: } } - if (big_rec_vec) { - ut_ad(page_is_leaf(page)); - ut_ad(dict_index_is_clust(index)); - ut_ad(flags & BTR_KEEP_POS_FLAG); - - /* btr_page_split_and_insert() in - btr_cur_pessimistic_insert() invokes - mtr_memo_release(mtr, index->lock, MTR_MEMO_X_LOCK). - We must keep the index->lock when we created a - big_rec, so that row_upd_clust_rec() can store the - big_rec in the same mini-transaction. */ - - mtr_x_lock(dict_index_get_lock(index), mtr); - } - /* Was the record to be updated positioned as the first user record on its page? */ was_first = page_cur_is_before_first(page_cursor); @@ -2362,7 +2336,6 @@ make_external: ut_a(rec); ut_a(err == DB_SUCCESS); ut_a(dummy_big_rec == NULL); - page_cursor->rec = rec; if (dict_index_is_sec_or_ibuf(index)) { /* Update PAGE_MAX_TRX_ID in the index page header. @@ -2796,12 +2769,10 @@ UNIV_INTERN ibool btr_cur_compress_if_useful( /*=======================*/ - btr_cur_t* cursor, /*!< in/out: cursor on the page to compress; - cursor does not stay valid if !adjust and - compression occurs */ - ibool adjust, /*!< in: TRUE if should adjust the - cursor position even if compression occurs */ - mtr_t* mtr) /*!< in/out: mini-transaction */ + btr_cur_t* cursor, /*!< in: cursor on the page to compress; + cursor does not stay valid if compression + occurs */ + mtr_t* mtr) /*!< in: mtr */ { ut_ad(mtr_memo_contains(mtr, dict_index_get_lock(btr_cur_get_index(cursor)), @@ -2810,7 +2781,7 @@ btr_cur_compress_if_useful( MTR_MEMO_PAGE_X_FIX)); return(btr_cur_compress_recommendation(cursor, mtr) - && btr_compress(cursor, adjust, mtr)); + && btr_compress(cursor, mtr)); } /*******************************************************//** @@ -3052,7 +3023,7 @@ return_after_reservations: mem_heap_free(heap); if (ret == FALSE) { - ret = btr_cur_compress_if_useful(cursor, FALSE, mtr); + ret = btr_cur_compress_if_useful(cursor, mtr); } if (n_extents > 0) { @@ -3863,9 +3834,6 @@ btr_store_big_rec_extern_fields_func( the "external storage" flags in offsets will not correspond to rec when this function returns */ - const big_rec_t*big_rec_vec, /*!< in: vector containing fields - to be stored externally */ - #ifdef UNIV_DEBUG mtr_t* local_mtr, /*!< in: mtr containing the latch to rec and to the tree */ @@ -3874,11 +3842,9 @@ btr_store_big_rec_extern_fields_func( ibool update_in_place,/*! in: TRUE if the record is updated in place (not delete+insert) */ #endif /* UNIV_DEBUG || UNIV_BLOB_LIGHT_DEBUG */ - mtr_t* alloc_mtr) /*!< in/out: in an insert, NULL; - in an update, local_mtr for - allocating BLOB pages and - updating BLOB pointers; alloc_mtr - must not have freed any leaf pages */ + const big_rec_t*big_rec_vec) /*!< in: vector containing fields + to be stored externally */ + { ulint rec_page_no; byte* field_ref; @@ -3897,9 +3863,6 @@ btr_store_big_rec_extern_fields_func( ut_ad(rec_offs_validate(rec, index, offsets)); ut_ad(rec_offs_any_extern(offsets)); - ut_ad(local_mtr); - ut_ad(!alloc_mtr || alloc_mtr == local_mtr); - ut_ad(!update_in_place || alloc_mtr); ut_ad(mtr_memo_contains(local_mtr, dict_index_get_lock(index), MTR_MEMO_X_LOCK)); ut_ad(mtr_memo_contains(local_mtr, rec_block, MTR_MEMO_PAGE_X_FIX)); @@ -3915,25 +3878,6 @@ btr_store_big_rec_extern_fields_func( rec_page_no = buf_block_get_page_no(rec_block); ut_a(fil_page_get_type(page_align(rec)) == FIL_PAGE_INDEX); - if (alloc_mtr) { - /* Because alloc_mtr will be committed after - mtr, it is possible that the tablespace has been - extended when the B-tree record was updated or - inserted, or it will be extended while allocating - pages for big_rec. - - TODO: In mtr (not alloc_mtr), write a redo log record - about extending the tablespace to its current size, - and remember the current size. Whenever the tablespace - grows as pages are allocated, write further redo log - records to mtr. (Currently tablespace extension is not - covered by the redo log. If it were, the record would - only be written to alloc_mtr, which is committed after - mtr.) */ - } else { - alloc_mtr = &mtr; - } - if (UNIV_LIKELY_NULL(page_zip)) { int err; @@ -4010,7 +3954,7 @@ btr_store_big_rec_extern_fields_func( } block = btr_page_alloc(index, hint_page_no, - FSP_NO_DIR, 0, alloc_mtr, &mtr); + FSP_NO_DIR, 0, &mtr); if (UNIV_UNLIKELY(block == NULL)) { mtr_commit(&mtr); @@ -4137,15 +4081,11 @@ btr_store_big_rec_extern_fields_func( goto next_zip_page; } - if (alloc_mtr == &mtr) { - rec_block = buf_page_get( - space_id, zip_size, - rec_page_no, - RW_X_LATCH, &mtr); - buf_block_dbg_add_level( - rec_block, - SYNC_NO_ORDER_CHECK); - } + rec_block = buf_page_get(space_id, zip_size, + rec_page_no, + RW_X_LATCH, &mtr); + buf_block_dbg_add_level(rec_block, + SYNC_NO_ORDER_CHECK); if (err == Z_STREAM_END) { mach_write_to_4(field_ref @@ -4179,8 +4119,7 @@ btr_store_big_rec_extern_fields_func( page_zip_write_blob_ptr( page_zip, rec, index, offsets, - big_rec_vec->fields[i].field_no, - alloc_mtr); + big_rec_vec->fields[i].field_no, &mtr); next_zip_page: prev_page_no = page_no; @@ -4225,23 +4164,19 @@ next_zip_page: extern_len -= store_len; - if (alloc_mtr == &mtr) { - rec_block = buf_page_get( - space_id, zip_size, - rec_page_no, - RW_X_LATCH, &mtr); - buf_block_dbg_add_level( - rec_block, - SYNC_NO_ORDER_CHECK); - } + rec_block = buf_page_get(space_id, zip_size, + rec_page_no, + RW_X_LATCH, &mtr); + buf_block_dbg_add_level(rec_block, + SYNC_NO_ORDER_CHECK); mlog_write_ulint(field_ref + BTR_EXTERN_LEN, 0, - MLOG_4BYTES, alloc_mtr); + MLOG_4BYTES, &mtr); mlog_write_ulint(field_ref + BTR_EXTERN_LEN + 4, big_rec_vec->fields[i].len - extern_len, - MLOG_4BYTES, alloc_mtr); + MLOG_4BYTES, &mtr); if (prev_page_no == FIL_NULL) { btr_blob_dbg_add_blob( @@ -4251,19 +4186,18 @@ next_zip_page: mlog_write_ulint(field_ref + BTR_EXTERN_SPACE_ID, - space_id, MLOG_4BYTES, - alloc_mtr); + space_id, + MLOG_4BYTES, &mtr); mlog_write_ulint(field_ref + BTR_EXTERN_PAGE_NO, - page_no, MLOG_4BYTES, - alloc_mtr); + page_no, + MLOG_4BYTES, &mtr); mlog_write_ulint(field_ref + BTR_EXTERN_OFFSET, FIL_PAGE_DATA, - MLOG_4BYTES, - alloc_mtr); + MLOG_4BYTES, &mtr); } prev_page_no = page_no; diff --git a/storage/innodb_plugin/fsp/fsp0fsp.c b/storage/innodb_plugin/fsp/fsp0fsp.c index 19846b63d5b..fee7fde2e5c 100644 --- a/storage/innodb_plugin/fsp/fsp0fsp.c +++ b/storage/innodb_plugin/fsp/fsp0fsp.c @@ -312,9 +312,8 @@ fsp_fill_free_list( descriptor page and ibuf bitmap page; then we do not allocate more extents */ ulint space, /*!< in: space */ - fsp_header_t* header, /*!< in/out: space header */ - mtr_t* mtr) /*!< in/out: mini-transaction */ - __attribute__((nonnull)); + fsp_header_t* header, /*!< in: space header */ + mtr_t* mtr); /*!< in: mtr */ /**********************************************************************//** Allocates a single free page from a segment. This function implements the intelligent allocation strategy which tries to minimize file space @@ -334,13 +333,7 @@ fseg_alloc_free_page_low( inserted there in order, into which direction they go alphabetically: FSP_DOWN, FSP_UP, FSP_NO_DIR */ - mtr_t* mtr, /*!< in/out: mini-transaction */ - mtr_t* init_mtr)/*!< in/out: mini-transaction in which the - page should be initialized - (may be the same as mtr), or NULL if it - should not be initialized (the page at hint - was previously freed in mtr) */ - __attribute__((warn_unused_result, nonnull(3,6))); + mtr_t* mtr); /*!< in/out: mini-transaction */ #endif /* !UNIV_HOTBACKUP */ /**********************************************************************//** @@ -708,18 +701,17 @@ list, if not free limit == space size. This adding is necessary to make the descriptor defined, as they are uninitialized above the free limit. @return pointer to the extent descriptor, NULL if the page does not exist in the space or if the offset exceeds the free limit */ -UNIV_INLINE __attribute__((nonnull, warn_unused_result)) +UNIV_INLINE xdes_t* xdes_get_descriptor_with_space_hdr( /*===============================*/ - fsp_header_t* sp_header, /*!< in/out: space header, x-latched - in mtr */ - ulint space, /*!< in: space id */ - ulint offset, /*!< in: page offset; if equal - to the free limit, we try to - add new extents to the space - free list */ - mtr_t* mtr) /*!< in/out: mini-transaction */ + fsp_header_t* sp_header,/*!< in/out: space header, x-latched */ + ulint space, /*!< in: space id */ + ulint offset, /*!< in: page offset; + if equal to the free limit, + we try to add new extents to + the space free list */ + mtr_t* mtr) /*!< in: mtr handle */ { ulint limit; ulint size; @@ -727,9 +719,11 @@ xdes_get_descriptor_with_space_hdr( ulint descr_page_no; page_t* descr_page; + ut_ad(mtr); ut_ad(mtr_memo_contains(mtr, fil_space_get_latch(space, NULL), MTR_MEMO_X_LOCK)); - ut_ad(mtr_memo_contains_page(mtr, sp_header, MTR_MEMO_PAGE_X_FIX)); + ut_ad(mtr_memo_contains_page(mtr, sp_header, MTR_MEMO_PAGE_S_FIX) + || mtr_memo_contains_page(mtr, sp_header, MTR_MEMO_PAGE_X_FIX)); ut_ad(page_offset(sp_header) == FSP_HEADER_OFFSET); /* Read free limit and space size */ limit = mach_read_from_4(sp_header + FSP_FREE_LIMIT); @@ -779,7 +773,7 @@ is necessary to make the descriptor defined, as they are uninitialized above the free limit. @return pointer to the extent descriptor, NULL if the page does not exist in the space or if the offset exceeds the free limit */ -static __attribute__((nonnull, warn_unused_result)) +static xdes_t* xdes_get_descriptor( /*================*/ @@ -788,7 +782,7 @@ xdes_get_descriptor( or 0 for uncompressed pages */ ulint offset, /*!< in: page offset; if equal to the free limit, we try to add new extents to the space free list */ - mtr_t* mtr) /*!< in/out: mini-transaction */ + mtr_t* mtr) /*!< in: mtr handle */ { buf_block_t* block; fsp_header_t* sp_header; @@ -1166,14 +1160,14 @@ fsp_header_get_tablespace_size(void) Tries to extend a single-table tablespace so that a page would fit in the data file. @return TRUE if success */ -static __attribute__((nonnull, warn_unused_result)) +static ibool fsp_try_extend_data_file_with_pages( /*================================*/ ulint space, /*!< in: space */ ulint page_no, /*!< in: page number */ - fsp_header_t* header, /*!< in/out: space header */ - mtr_t* mtr) /*!< in/out: mini-transaction */ + fsp_header_t* header, /*!< in: space header */ + mtr_t* mtr) /*!< in: mtr */ { ibool success; ulint actual_size; @@ -1198,7 +1192,7 @@ fsp_try_extend_data_file_with_pages( /***********************************************************************//** Tries to extend the last data file of a tablespace if it is auto-extending. @return FALSE if not auto-extending */ -static __attribute__((nonnull)) +static ibool fsp_try_extend_data_file( /*=====================*/ @@ -1208,8 +1202,8 @@ fsp_try_extend_data_file( the actual file size rounded down to megabyte */ ulint space, /*!< in: space */ - fsp_header_t* header, /*!< in/out: space header */ - mtr_t* mtr) /*!< in/out: mini-transaction */ + fsp_header_t* header, /*!< in: space header */ + mtr_t* mtr) /*!< in: mtr */ { ulint size; ulint zip_size; @@ -1345,7 +1339,7 @@ fsp_fill_free_list( then we do not allocate more extents */ ulint space, /*!< in: space */ fsp_header_t* header, /*!< in/out: space header */ - mtr_t* mtr) /*!< in/out: mini-transaction */ + mtr_t* mtr) /*!< in: mtr */ { ulint limit; ulint size; @@ -1544,46 +1538,9 @@ fsp_alloc_free_extent( } /**********************************************************************//** -Allocates a single free page from a space. */ -static __attribute__((nonnull)) -void -fsp_alloc_from_free_frag( -/*=====================*/ - fsp_header_t* header, /*!< in/out: tablespace header */ - xdes_t* descr, /*!< in/out: extent descriptor */ - ulint bit, /*!< in: slot to allocate in the extent */ - mtr_t* mtr) /*!< in/out: mini-transaction */ -{ - ulint frag_n_used; - - ut_ad(xdes_get_state(descr, mtr) == XDES_FREE_FRAG); - ut_a(xdes_get_bit(descr, XDES_FREE_BIT, bit, mtr)); - xdes_set_bit(descr, XDES_FREE_BIT, bit, FALSE, mtr); - - /* Update the FRAG_N_USED field */ - frag_n_used = mtr_read_ulint(header + FSP_FRAG_N_USED, MLOG_4BYTES, - mtr); - frag_n_used++; - mlog_write_ulint(header + FSP_FRAG_N_USED, frag_n_used, MLOG_4BYTES, - mtr); - if (xdes_is_full(descr, mtr)) { - /* The fragment is full: move it to another list */ - flst_remove(header + FSP_FREE_FRAG, descr + XDES_FLST_NODE, - mtr); - xdes_set_state(descr, XDES_FULL_FRAG, mtr); - - flst_add_last(header + FSP_FULL_FRAG, descr + XDES_FLST_NODE, - mtr); - mlog_write_ulint(header + FSP_FRAG_N_USED, - frag_n_used - FSP_EXTENT_SIZE, MLOG_4BYTES, - mtr); - } -} - -/**********************************************************************//** Allocates a single free page from a space. The page is marked as used. @return the page offset, FIL_NULL if no page could be allocated */ -static __attribute__((nonnull, warn_unused_result)) +static ulint fsp_alloc_free_page( /*================*/ @@ -1591,22 +1548,19 @@ fsp_alloc_free_page( ulint zip_size,/*!< in: compressed page size in bytes or 0 for uncompressed pages */ ulint hint, /*!< in: hint of which page would be desirable */ - mtr_t* mtr, /*!< in/out: mini-transaction */ - mtr_t* init_mtr)/*!< in/out: mini-transaction in which the - page should be initialized - (may be the same as mtr) */ + mtr_t* mtr) /*!< in/out: mini-transaction */ { fsp_header_t* header; fil_addr_t first; xdes_t* descr; buf_block_t* block; ulint free; + ulint frag_n_used; ulint page_no; ulint space_size; ibool success; ut_ad(mtr); - ut_ad(init_mtr); header = fsp_get_space_header(space, zip_size, mtr); @@ -1688,19 +1642,38 @@ fsp_alloc_free_page( } } - fsp_alloc_from_free_frag(header, descr, free, mtr); + xdes_set_bit(descr, XDES_FREE_BIT, free, FALSE, mtr); + + /* Update the FRAG_N_USED field */ + frag_n_used = mtr_read_ulint(header + FSP_FRAG_N_USED, MLOG_4BYTES, + mtr); + frag_n_used++; + mlog_write_ulint(header + FSP_FRAG_N_USED, frag_n_used, MLOG_4BYTES, + mtr); + if (xdes_is_full(descr, mtr)) { + /* The fragment is full: move it to another list */ + flst_remove(header + FSP_FREE_FRAG, descr + XDES_FLST_NODE, + mtr); + xdes_set_state(descr, XDES_FULL_FRAG, mtr); + + flst_add_last(header + FSP_FULL_FRAG, descr + XDES_FLST_NODE, + mtr); + mlog_write_ulint(header + FSP_FRAG_N_USED, + frag_n_used - FSP_EXTENT_SIZE, MLOG_4BYTES, + mtr); + } /* Initialize the allocated page to the buffer pool, so that it can be obtained immediately with buf_page_get without need for a disk read. */ - buf_page_create(space, page_no, zip_size, init_mtr); + buf_page_create(space, page_no, zip_size, mtr); - block = buf_page_get(space, zip_size, page_no, RW_X_LATCH, init_mtr); + block = buf_page_get(space, zip_size, page_no, RW_X_LATCH, mtr); buf_block_dbg_add_level(block, SYNC_FSP_PAGE); /* Prior contents of the page should be ignored */ - fsp_init_file_page(block, init_mtr); + fsp_init_file_page(block, mtr); return(page_no); } @@ -1936,7 +1909,7 @@ fsp_alloc_seg_inode_page( zip_size = dict_table_flags_to_zip_size( mach_read_from_4(FSP_SPACE_FLAGS + space_header)); - page_no = fsp_alloc_free_page(space, zip_size, 0, mtr, mtr); + page_no = fsp_alloc_free_page(space, zip_size, 0, mtr); if (page_no == FIL_NULL) { @@ -2350,7 +2323,7 @@ fseg_create_general( if (page == 0) { page = fseg_alloc_free_page_low(space, zip_size, - inode, 0, FSP_UP, mtr, mtr); + inode, 0, FSP_UP, mtr); if (page == FIL_NULL) { @@ -2606,12 +2579,7 @@ fseg_alloc_free_page_low( inserted there in order, into which direction they go alphabetically: FSP_DOWN, FSP_UP, FSP_NO_DIR */ - mtr_t* mtr, /*!< in/out: mini-transaction */ - mtr_t* init_mtr)/*!< in/out: mini-transaction in which the - page should be initialized - (may be the same as mtr), or NULL if it - should not be initialized (the page at hint - was previously freed in mtr) */ + mtr_t* mtr) /*!< in/out: mini-transaction */ { fsp_header_t* space_header; ulint space_size; @@ -2622,6 +2590,7 @@ fseg_alloc_free_page_low( ulint ret_page; /*!< the allocated page offset, FIL_NULL if could not be allocated */ xdes_t* ret_descr; /*!< the extent of the allocated page */ + ibool frag_page_allocated = FALSE; ibool success; ulint n; @@ -2643,8 +2612,6 @@ fseg_alloc_free_page_low( if (descr == NULL) { /* Hint outside space or too high above free limit: reset hint */ - ut_a(init_mtr); - /* The file space header page is always allocated. */ hint = 0; descr = xdes_get_descriptor(space, zip_size, hint, mtr); } @@ -2656,20 +2623,15 @@ fseg_alloc_free_page_low( mtr), seg_id)) && (xdes_get_bit(descr, XDES_FREE_BIT, hint % FSP_EXTENT_SIZE, mtr) == TRUE)) { -take_hinted_page: + /* 1. We can take the hinted page =================================*/ ret_descr = descr; ret_page = hint; - /* Skip the check for extending the tablespace. If the - page hint were not within the size of the tablespace, - we would have got (descr == NULL) above and reset the hint. */ - goto got_hinted_page; /*-----------------------------------------------------------*/ - } else if (xdes_get_state(descr, mtr) == XDES_FREE - && (!init_mtr - || ((reserved - used < reserved / FSEG_FILLFACTOR) - && used >= FSEG_FRAG_LIMIT))) { + } else if ((xdes_get_state(descr, mtr) == XDES_FREE) + && ((reserved - used) < reserved / FSEG_FILLFACTOR) + && (used >= FSEG_FRAG_LIMIT)) { /* 2. We allocate the free extent from space and can take ========================================================= @@ -2687,20 +2649,8 @@ take_hinted_page: /* Try to fill the segment free list */ fseg_fill_free_list(seg_inode, space, zip_size, hint + FSP_EXTENT_SIZE, mtr); - goto take_hinted_page; - /*-----------------------------------------------------------*/ - } else if (!init_mtr) { - ut_a(xdes_get_state(descr, mtr) == XDES_FREE_FRAG); - fsp_alloc_from_free_frag(space_header, descr, - hint % FSP_EXTENT_SIZE, mtr); ret_page = hint; - ret_descr = NULL; - - /* Put the page in the fragment page array of the segment */ - n = fseg_find_free_frag_page_slot(seg_inode, mtr); - ut_a(n != FIL_NULL); - fseg_set_nth_frag_page_no(seg_inode, n, ret_page, mtr); - goto got_hinted_page; + /*-----------------------------------------------------------*/ } else if ((direction != FSP_NO_DIR) && ((reserved - used) < reserved / FSEG_FILLFACTOR) && (used >= FSEG_FRAG_LIMIT) @@ -2760,10 +2710,11 @@ take_hinted_page: } else if (used < FSEG_FRAG_LIMIT) { /* 6. We allocate an individual page from the space ===================================================*/ - ret_page = fsp_alloc_free_page(space, zip_size, hint, - mtr, init_mtr); + ret_page = fsp_alloc_free_page(space, zip_size, hint, mtr); ret_descr = NULL; + frag_page_allocated = TRUE; + if (ret_page != FIL_NULL) { /* Put the page in the fragment page array of the segment */ @@ -2773,10 +2724,6 @@ take_hinted_page: fseg_set_nth_frag_page_no(seg_inode, n, ret_page, mtr); } - - /* fsp_alloc_free_page() invoked fsp_init_file_page() - already. */ - return(ret_page); /*-----------------------------------------------------------*/ } else { /* 7. We allocate a new extent and take its first page @@ -2824,34 +2771,26 @@ take_hinted_page: } } -got_hinted_page: - { + if (!frag_page_allocated) { /* Initialize the allocated page to buffer pool, so that it can be obtained immediately with buf_page_get without need for a disk read */ buf_block_t* block; ulint zip_size = dict_table_flags_to_zip_size( mach_read_from_4(FSP_SPACE_FLAGS + space_header)); - mtr_t* block_mtr = init_mtr ? init_mtr : mtr; - block = buf_page_create(space, ret_page, zip_size, block_mtr); + block = buf_page_create(space, ret_page, zip_size, mtr); buf_block_dbg_add_level(block, SYNC_FSP_PAGE); if (UNIV_UNLIKELY(block != buf_page_get(space, zip_size, ret_page, RW_X_LATCH, - block_mtr))) { + mtr))) { ut_error; } - if (init_mtr) { - /* The prior contents of the page should be ignored */ - fsp_init_file_page(block, init_mtr); - } - } + /* The prior contents of the page should be ignored */ + fsp_init_file_page(block, mtr); - /* ret_descr == NULL if the block was allocated from free_frag - (XDES_FREE_FRAG) */ - if (ret_descr != NULL) { /* At this point we know the extent and the page offset. The extent is still in the appropriate list (FSEG_NOT_FULL or FSEG_FREE), and the page is not yet marked as used. */ @@ -2888,11 +2827,7 @@ fseg_alloc_free_page_general( with fsp_reserve_free_extents, then there is no need to do the check for this individual page */ - mtr_t* mtr, /*!< in/out: mini-transaction handle */ - mtr_t* init_mtr)/*!< in/out: mtr or another mini-transaction - in which the page should be initialized, - or NULL if this is a "fake allocation" of - a page that was previously freed in mtr */ + mtr_t* mtr) /*!< in/out: mini-transaction */ { fseg_inode_t* inode; ulint space; @@ -2934,8 +2869,7 @@ fseg_alloc_free_page_general( } page_no = fseg_alloc_free_page_low(space, zip_size, - inode, hint, direction, - mtr, init_mtr); + inode, hint, direction, mtr); if (!has_done_reservation) { fil_space_release_free_extents(space, n_reserved); } @@ -2944,6 +2878,28 @@ fseg_alloc_free_page_general( } /**********************************************************************//** +Allocates a single free page from a segment. This function implements +the intelligent allocation strategy which tries to minimize file space +fragmentation. +@return allocated page offset, FIL_NULL if no page could be allocated */ +UNIV_INTERN +ulint +fseg_alloc_free_page( +/*=================*/ + fseg_header_t* seg_header,/*!< in: segment header */ + ulint hint, /*!< in: hint of which page would be desirable */ + byte direction,/*!< in: if the new page is needed because + of an index page split, and records are + inserted there in order, into which + direction they go alphabetically: FSP_DOWN, + FSP_UP, FSP_NO_DIR */ + mtr_t* mtr) /*!< in: mtr handle */ +{ + return(fseg_alloc_free_page_general(seg_header, hint, direction, + FALSE, mtr)); +} + +/**********************************************************************//** Checks that we have at least 2 frag pages free in the first extent of a single-table tablespace, and they are also physically initialized to the data file. That is we have already extended the data file so that those pages are diff --git a/storage/innodb_plugin/handler/ha_innodb.cc b/storage/innodb_plugin/handler/ha_innodb.cc index 5ed4abe3d08..d84bd38ba7e 100644 --- a/storage/innodb_plugin/handler/ha_innodb.cc +++ b/storage/innodb_plugin/handler/ha_innodb.cc @@ -4970,14 +4970,15 @@ calc_row_difference( /* The field has changed */ ufield = uvect->fields + n_changed; + UNIV_MEM_INVALID(ufield, sizeof *ufield); /* Let us use a dummy dfield to make the conversion from the MySQL column format to the InnoDB format */ - dict_col_copy_type(prebuilt->table->cols + i, - dfield_get_type(&dfield)); - if (n_len != UNIV_SQL_NULL) { + dict_col_copy_type(prebuilt->table->cols + i, + dfield_get_type(&dfield)); + buf = row_mysql_store_col_in_innobase_format( &dfield, (byte*)buf, @@ -4985,7 +4986,7 @@ calc_row_difference( new_mysql_row_col, col_pack_len, dict_table_is_comp(prebuilt->table)); - dfield_copy_data(&ufield->new_val, &dfield); + dfield_copy(&ufield->new_val, &dfield); } else { dfield_set_null(&ufield->new_val); } diff --git a/storage/innodb_plugin/include/btr0btr.h b/storage/innodb_plugin/include/btr0btr.h index 476ad29adac..e32da9e4c86 100644 --- a/storage/innodb_plugin/include/btr0btr.h +++ b/storage/innodb_plugin/include/btr0btr.h @@ -488,14 +488,11 @@ UNIV_INTERN ibool btr_compress( /*=========*/ - btr_cur_t* cursor, /*!< in/out: cursor on the page to merge - or lift; the page must not be empty: - when deleting records, use btr_discard_page() - if the page would become empty */ - ibool adjust, /*!< in: TRUE if should adjust the - cursor position even if compression occurs */ - mtr_t* mtr) /*!< in/out: mini-transaction */ - __attribute__((nonnull)); + btr_cur_t* cursor, /*!< in: cursor on the page to merge or lift; + the page must not be empty: in record delete + use btr_discard_page if the page would become + empty */ + mtr_t* mtr); /*!< in: mtr */ /*************************************************************//** Discards a page from a B-tree. This is used to remove the last record from a B-tree page: the whole page must be removed at the same time. This cannot @@ -557,12 +554,7 @@ btr_page_alloc( page split is made */ ulint level, /*!< in: level where the page is placed in the tree */ - mtr_t* mtr, /*!< in/out: mini-transaction - for the allocation */ - mtr_t* init_mtr) /*!< in/out: mini-transaction - for x-latching and initializing - the page */ - __attribute__((nonnull, warn_unused_result)); + mtr_t* mtr); /*!< in: mtr */ /**************************************************************//** Frees a file page used in an index tree. NOTE: cannot free field external storage pages because the page must contain info on its level. */ @@ -585,33 +577,6 @@ btr_page_free_low( buf_block_t* block, /*!< in: block to be freed, x-latched */ ulint level, /*!< in: page level */ mtr_t* mtr); /*!< in: mtr */ -/**************************************************************//** -Marks all MTR_MEMO_FREE_CLUST_LEAF pages nonfree or free. -For invoking btr_store_big_rec_extern_fields() after an update, -we must temporarily mark freed clustered index pages allocated, so -that off-page columns will not be allocated from them. Between the -btr_store_big_rec_extern_fields() and mtr_commit() we have to -mark the pages free again, so that no pages will be leaked. */ -UNIV_INTERN -void -btr_mark_freed_leaves( -/*==================*/ - dict_index_t* index, /*!< in/out: clustered index */ - mtr_t* mtr, /*!< in/out: mini-transaction */ - ibool nonfree)/*!< in: TRUE=mark nonfree, FALSE=mark freed */ - __attribute__((nonnull)); -#ifdef UNIV_DEBUG -/**************************************************************//** -Validates all pages marked MTR_MEMO_FREE_CLUST_LEAF. -@see btr_mark_freed_leaves() -@return TRUE */ -UNIV_INTERN -ibool -btr_freed_leaves_validate( -/*======================*/ - mtr_t* mtr) /*!< in: mini-transaction */ - __attribute__((nonnull, warn_unused_result)); -#endif /* UNIV_DEBUG */ #ifdef UNIV_BTR_PRINT /*************************************************************//** Prints size info of a B-tree. */ diff --git a/storage/innodb_plugin/include/btr0cur.h b/storage/innodb_plugin/include/btr0cur.h index 1d97c5b9452..3669ce28f02 100644 --- a/storage/innodb_plugin/include/btr0cur.h +++ b/storage/innodb_plugin/include/btr0cur.h @@ -36,9 +36,6 @@ Created 10/16/1994 Heikki Tuuri #define BTR_NO_LOCKING_FLAG 2 /* do no record lock checking */ #define BTR_KEEP_SYS_FLAG 4 /* sys fields will be found from the update vector or inserted entry */ -#define BTR_KEEP_POS_FLAG 8 /* btr_cur_pessimistic_update() - must keep cursor position when - moving columns to big_rec */ #ifndef UNIV_HOTBACKUP #include "que0types.h" @@ -312,9 +309,7 @@ btr_cur_pessimistic_update( /*=======================*/ ulint flags, /*!< in: undo logging, locking, and rollback flags */ - btr_cur_t* cursor, /*!< in/out: cursor on the record to update; - cursor may become invalid if *big_rec == NULL - || !(flags & BTR_KEEP_POS_FLAG) */ + btr_cur_t* cursor, /*!< in: cursor on the record to update */ mem_heap_t** heap, /*!< in/out: pointer to memory heap, or NULL */ big_rec_t** big_rec,/*!< out: big rec vector whose fields have to be stored externally by the caller, or NULL */ @@ -381,13 +376,10 @@ UNIV_INTERN ibool btr_cur_compress_if_useful( /*=======================*/ - btr_cur_t* cursor, /*!< in/out: cursor on the page to compress; + btr_cur_t* cursor, /*!< in: cursor on the page to compress; cursor does not stay valid if compression occurs */ - ibool adjust, /*!< in: TRUE if should adjust the - cursor position even if compression occurs */ - mtr_t* mtr) /*!< in/out: mini-transaction */ - __attribute__((nonnull)); + mtr_t* mtr); /*!< in: mtr */ /*******************************************************//** Removes the record on which the tree cursor is positioned. It is assumed that the mtr has an x-latch on the page where the cursor is positioned, @@ -530,8 +522,6 @@ btr_store_big_rec_extern_fields_func( the "external storage" flags in offsets will not correspond to rec when this function returns */ - const big_rec_t*big_rec_vec, /*!< in: vector containing fields - to be stored externally */ #ifdef UNIV_DEBUG mtr_t* local_mtr, /*!< in: mtr containing the latch to rec and to the tree */ @@ -540,12 +530,9 @@ btr_store_big_rec_extern_fields_func( ibool update_in_place,/*! in: TRUE if the record is updated in place (not delete+insert) */ #endif /* UNIV_DEBUG || UNIV_BLOB_LIGHT_DEBUG */ - mtr_t* alloc_mtr) /*!< in/out: in an insert, NULL; - in an update, local_mtr for - allocating BLOB pages and - updating BLOB pointers; alloc_mtr - must not have freed any leaf pages */ - __attribute__((nonnull(1,2,3,4,5), warn_unused_result)); + const big_rec_t*big_rec_vec) /*!< in: vector containing fields + to be stored externally */ + __attribute__((nonnull)); /** Stores the fields in big_rec_vec to the tablespace and puts pointers to them in rec. The extern flags in rec will have to be set beforehand. @@ -554,22 +541,21 @@ file segment of the index tree. @param index in: clustered index; MUST be X-latched by mtr @param b in/out: block containing rec; MUST be X-latched by mtr @param rec in/out: clustered index record -@param offs in: rec_get_offsets(rec, index); +@param offsets in: rec_get_offsets(rec, index); the "external storage" flags in offsets will not be adjusted -@param big in: vector containing fields to be stored externally @param mtr in: mini-transaction that holds x-latch on index and b @param upd in: TRUE if the record is updated in place (not delete+insert) -@param rmtr in/out: in updates, the mini-transaction that holds rec +@param big in: vector containing fields to be stored externally @return DB_SUCCESS or DB_OUT_OF_FILE_SPACE */ #ifdef UNIV_DEBUG -# define btr_store_big_rec_extern_fields(index,b,rec,offs,big,mtr,upd,rmtr) \ - btr_store_big_rec_extern_fields_func(index,b,rec,offs,big,mtr,upd,rmtr) +# define btr_store_big_rec_extern_fields(index,b,rec,offsets,mtr,upd,big) \ + btr_store_big_rec_extern_fields_func(index,b,rec,offsets,mtr,upd,big) #elif defined UNIV_BLOB_LIGHT_DEBUG -# define btr_store_big_rec_extern_fields(index,b,rec,offs,big,mtr,upd,rmtr) \ - btr_store_big_rec_extern_fields_func(index,b,rec,offs,big,upd,rmtr) +# define btr_store_big_rec_extern_fields(index,b,rec,offsets,mtr,upd,big) \ + btr_store_big_rec_extern_fields_func(index,b,rec,offsets,upd,big) #else -# define btr_store_big_rec_extern_fields(index,b,rec,offs,big,mtr,upd,rmtr) \ - btr_store_big_rec_extern_fields_func(index,b,rec,offs,big,rmtr) +# define btr_store_big_rec_extern_fields(index,b,rec,offsets,mtr,upd,big) \ + btr_store_big_rec_extern_fields_func(index,b,rec,offsets,big) #endif /*******************************************************************//** diff --git a/storage/innodb_plugin/include/btr0cur.ic b/storage/innodb_plugin/include/btr0cur.ic index c833b3e8572..cd3a5d895bb 100644 --- a/storage/innodb_plugin/include/btr0cur.ic +++ b/storage/innodb_plugin/include/btr0cur.ic @@ -139,7 +139,7 @@ btr_cur_compress_recommendation( btr_cur_t* cursor, /*!< in: btr cursor */ mtr_t* mtr) /*!< in: mtr */ { - const page_t* page; + page_t* page; ut_ad(mtr_memo_contains(mtr, btr_cur_get_block(cursor), MTR_MEMO_PAGE_X_FIX)); diff --git a/storage/innodb_plugin/include/buf0buf.h b/storage/innodb_plugin/include/buf0buf.h index e7fe3901ad4..489d1bec5b6 100644 --- a/storage/innodb_plugin/include/buf0buf.h +++ b/storage/innodb_plugin/include/buf0buf.h @@ -468,31 +468,6 @@ buf_block_get_modify_clock( #else /* !UNIV_HOTBACKUP */ # define buf_block_modify_clock_inc(block) ((void) 0) #endif /* !UNIV_HOTBACKUP */ -/*******************************************************************//** -Increments the bufferfix count. */ -UNIV_INLINE -void -buf_block_buf_fix_inc_func( -/*=======================*/ -#ifdef UNIV_SYNC_DEBUG - const char* file, /*!< in: file name */ - ulint line, /*!< in: line */ -#endif /* UNIV_SYNC_DEBUG */ - buf_block_t* block) /*!< in/out: block to bufferfix */ - __attribute__((nonnull)); -#ifdef UNIV_SYNC_DEBUG -/** Increments the bufferfix count. -@param b in/out: block to bufferfix -@param f in: file name where requested -@param l in: line number where requested */ -# define buf_block_buf_fix_inc(b,f,l) buf_block_buf_fix_inc_func(f,l,b) -#else /* UNIV_SYNC_DEBUG */ -/** Increments the bufferfix count. -@param b in/out: block to bufferfix -@param f in: file name where requested -@param l in: line number where requested */ -# define buf_block_buf_fix_inc(b,f,l) buf_block_buf_fix_inc_func(b) -#endif /* UNIV_SYNC_DEBUG */ /********************************************************************//** Calculates a page checksum which is stored to the page when it is written to a file. Note that we must be careful to calculate the same value diff --git a/storage/innodb_plugin/include/buf0buf.ic b/storage/innodb_plugin/include/buf0buf.ic index 2f3d65e80bd..0fe1dbc2da5 100644 --- a/storage/innodb_plugin/include/buf0buf.ic +++ b/storage/innodb_plugin/include/buf0buf.ic @@ -916,6 +916,19 @@ buf_block_buf_fix_inc_func( block->page.buf_fix_count++; } +#ifdef UNIV_SYNC_DEBUG +/** Increments the bufferfix count. +@param b in/out: block to bufferfix +@param f in: file name where requested +@param l in: line number where requested */ +# define buf_block_buf_fix_inc(b,f,l) buf_block_buf_fix_inc_func(f,l,b) +#else /* UNIV_SYNC_DEBUG */ +/** Increments the bufferfix count. +@param b in/out: block to bufferfix +@param f in: file name where requested +@param l in: line number where requested */ +# define buf_block_buf_fix_inc(b,f,l) buf_block_buf_fix_inc_func(b) +#endif /* UNIV_SYNC_DEBUG */ /*******************************************************************//** Decrements the bufferfix count. */ diff --git a/storage/innodb_plugin/include/fsp0fsp.h b/storage/innodb_plugin/include/fsp0fsp.h index 2221380c9a2..403e1d404a8 100644 --- a/storage/innodb_plugin/include/fsp0fsp.h +++ b/storage/innodb_plugin/include/fsp0fsp.h @@ -176,18 +176,19 @@ fseg_n_reserved_pages( Allocates a single free page from a segment. This function implements the intelligent allocation strategy which tries to minimize file space fragmentation. -@param[in/out] seg_header segment header -@param[in] hint hint of which page would be desirable -@param[in] direction if the new page is needed because +@return the allocated page offset FIL_NULL if no page could be allocated */ +UNIV_INTERN +ulint +fseg_alloc_free_page( +/*=================*/ + fseg_header_t* seg_header, /*!< in: segment header */ + ulint hint, /*!< in: hint of which page would be desirable */ + byte direction, /*!< in: if the new page is needed because of an index page split, and records are inserted there in order, into which direction they go alphabetically: FSP_DOWN, - FSP_UP, FSP_NO_DIR -@param[in/out] mtr mini-transaction -@return the allocated page offset FIL_NULL if no page could be allocated */ -#define fseg_alloc_free_page(seg_header, hint, direction, mtr) \ - fseg_alloc_free_page_general(seg_header, hint, direction, \ - FALSE, mtr, mtr) + FSP_UP, FSP_NO_DIR */ + mtr_t* mtr); /*!< in: mtr handle */ /**********************************************************************//** Allocates a single free page from a segment. This function implements the intelligent allocation strategy which tries to minimize file space @@ -209,11 +210,7 @@ fseg_alloc_free_page_general( with fsp_reserve_free_extents, then there is no need to do the check for this individual page */ - mtr_t* mtr, /*!< in/out: mini-transaction */ - mtr_t* init_mtr)/*!< in/out: mtr or another mini-transaction - in which the page should be initialized, - or NULL if this is a "fake allocation" of - a page that was previously freed in mtr */ + mtr_t* mtr) /*!< in/out: mini-transaction */ __attribute__((warn_unused_result, nonnull(1,5))); /**********************************************************************//** Reserves free pages from a tablespace. All mini-transactions which may diff --git a/storage/innodb_plugin/include/mtr0mtr.h b/storage/innodb_plugin/include/mtr0mtr.h index 3529519e7f4..8a9ec8ea7f0 100644 --- a/storage/innodb_plugin/include/mtr0mtr.h +++ b/storage/innodb_plugin/include/mtr0mtr.h @@ -53,8 +53,6 @@ first 3 values must be RW_S_LATCH, RW_X_LATCH, RW_NO_LATCH */ #define MTR_MEMO_MODIFY 54 #define MTR_MEMO_S_LOCK 55 #define MTR_MEMO_X_LOCK 56 -/** The mini-transaction freed a clustered index leaf page. */ -#define MTR_MEMO_FREE_CLUST_LEAF 57 /** @name Log item types The log items are declared 'byte' so that the compiler can warn if val @@ -379,12 +377,9 @@ struct mtr_struct{ #endif dyn_array_t memo; /*!< memo stack for locks etc. */ dyn_array_t log; /*!< mini-transaction log */ - unsigned modifications:1; - /*!< TRUE if the mini-transaction - modified buffer pool pages */ - unsigned freed_clust_leaf:1; - /*!< TRUE if MTR_MEMO_FREE_CLUST_LEAF - was logged in the mini-transaction */ + ibool modifications; + /* TRUE if the mtr made modifications to + buffer pool pages */ ulint n_log_recs; /* count of how many page initial log records have been written to the mtr log */ diff --git a/storage/innodb_plugin/include/mtr0mtr.ic b/storage/innodb_plugin/include/mtr0mtr.ic index 9c0ddff9132..9f92d2b06a1 100644 --- a/storage/innodb_plugin/include/mtr0mtr.ic +++ b/storage/innodb_plugin/include/mtr0mtr.ic @@ -44,7 +44,6 @@ mtr_start( mtr->log_mode = MTR_LOG_ALL; mtr->modifications = FALSE; - mtr->freed_clust_leaf = FALSE; mtr->n_log_recs = 0; ut_d(mtr->state = MTR_ACTIVE); @@ -68,8 +67,7 @@ mtr_memo_push( ut_ad(object); ut_ad(type >= MTR_MEMO_PAGE_S_FIX); - ut_ad(type <= MTR_MEMO_FREE_CLUST_LEAF); - ut_ad(type != MTR_MEMO_FREE_CLUST_LEAF || mtr->freed_clust_leaf); + ut_ad(type <= MTR_MEMO_X_LOCK); ut_ad(mtr); ut_ad(mtr->magic_n == MTR_MAGIC_N); ut_ad(mtr->state == MTR_ACTIVE); diff --git a/storage/innodb_plugin/include/page0page.h b/storage/innodb_plugin/include/page0page.h index caf4cee2c57..12c4fed75d2 100644 --- a/storage/innodb_plugin/include/page0page.h +++ b/storage/innodb_plugin/include/page0page.h @@ -281,42 +281,16 @@ page_get_supremum_offset( const page_t* page); /*!< in: page which must have record(s) */ #define page_get_infimum_rec(page) ((page) + page_get_infimum_offset(page)) #define page_get_supremum_rec(page) ((page) + page_get_supremum_offset(page)) - -/************************************************************//** -Returns the nth record of the record list. -This is the inverse function of page_rec_get_n_recs_before(). -@return nth record */ -UNIV_INTERN -const rec_t* -page_rec_get_nth_const( -/*===================*/ - const page_t* page, /*!< in: page */ - ulint nth) /*!< in: nth record */ - __attribute__((nonnull, warn_unused_result)); /************************************************************//** -Returns the nth record of the record list. -This is the inverse function of page_rec_get_n_recs_before(). -@return nth record */ -UNIV_INLINE -rec_t* -page_rec_get_nth( -/*=============*/ - page_t* page, /*< in: page */ - ulint nth) /*!< in: nth record */ - __attribute__((nonnull, warn_unused_result)); - -#ifndef UNIV_HOTBACKUP -/************************************************************//** -Returns the middle record of the records on the page. If there is an -even number of records in the list, returns the first record of the -upper half-list. +Returns the middle record of record list. If there are an even number +of records in the list, returns the first record of upper half-list. @return middle record */ -UNIV_INLINE +UNIV_INTERN rec_t* page_get_middle_rec( /*================*/ - page_t* page) /*!< in: page */ - __attribute__((nonnull, warn_unused_result)); + page_t* page); /*!< in: page */ +#ifndef UNIV_HOTBACKUP /*************************************************************//** Compares a data tuple to a physical record. Differs from the function cmp_dtuple_rec_with_match in the way that the record must reside on an @@ -371,7 +345,6 @@ page_get_n_recs( /***************************************************************//** Returns the number of records before the given record in chain. The number includes infimum and supremum records. -This is the inverse function of page_rec_get_nth(). @return number of records */ UNIV_INTERN ulint diff --git a/storage/innodb_plugin/include/page0page.ic b/storage/innodb_plugin/include/page0page.ic index 1450b0892b3..b096a5ba321 100644 --- a/storage/innodb_plugin/include/page0page.ic +++ b/storage/innodb_plugin/include/page0page.ic @@ -420,37 +420,7 @@ page_rec_is_infimum( return(page_rec_is_infimum_low(page_offset(rec))); } -/************************************************************//** -Returns the nth record of the record list. -This is the inverse function of page_rec_get_n_recs_before(). -@return nth record */ -UNIV_INLINE -rec_t* -page_rec_get_nth( -/*=============*/ - page_t* page, /*!< in: page */ - ulint nth) /*!< in: nth record */ -{ - return((rec_t*) page_rec_get_nth_const(page, nth)); -} - #ifndef UNIV_HOTBACKUP -/************************************************************//** -Returns the middle record of the records on the page. If there is an -even number of records in the list, returns the first record of the -upper half-list. -@return middle record */ -UNIV_INLINE -rec_t* -page_get_middle_rec( -/*================*/ - page_t* page) /*!< in: page */ -{ - ulint middle = (page_get_n_recs(page) + PAGE_HEAP_NO_USER_LOW) / 2; - - return(page_rec_get_nth(page, middle)); -} - /*************************************************************//** Compares a data tuple to a physical record. Differs from the function cmp_dtuple_rec_with_match in the way that the record must reside on an diff --git a/storage/innodb_plugin/mtr/mtr0mtr.c b/storage/innodb_plugin/mtr/mtr0mtr.c index e3fefbedec7..5fad61b2922 100644 --- a/storage/innodb_plugin/mtr/mtr0mtr.c +++ b/storage/innodb_plugin/mtr/mtr0mtr.c @@ -58,11 +58,12 @@ mtr_memo_slot_release( buf_page_release((buf_block_t*)object, type, mtr); } else if (type == MTR_MEMO_S_LOCK) { rw_lock_s_unlock((rw_lock_t*)object); +#ifdef UNIV_DEBUG } else if (type != MTR_MEMO_X_LOCK) { - ut_ad(type == MTR_MEMO_MODIFY - || type == MTR_MEMO_FREE_CLUST_LEAF); + ut_ad(type == MTR_MEMO_MODIFY); ut_ad(mtr_memo_contains(mtr, object, MTR_MEMO_PAGE_X_FIX)); +#endif /* UNIV_DEBUG */ } else { rw_lock_x_unlock((rw_lock_t*)object); } diff --git a/storage/innodb_plugin/page/page0cur.c b/storage/innodb_plugin/page/page0cur.c index b8c492328e8..ab5aa257338 100644 --- a/storage/innodb_plugin/page/page0cur.c +++ b/storage/innodb_plugin/page/page0cur.c @@ -1180,15 +1180,14 @@ page_cur_insert_rec_zip_reorg( /* Before trying to reorganize the page, store the number of preceding records on the page. */ pos = page_rec_get_n_recs_before(rec); - ut_ad(pos > 0); if (page_zip_reorganize(block, index, mtr)) { /* The page was reorganized: Find rec by seeking to pos, and update *current_rec. */ - if (pos > 1) { - rec = page_rec_get_nth(page, pos - 1); - } else { - rec = page + PAGE_NEW_INFIMUM; + rec = page + PAGE_NEW_INFIMUM; + + while (--pos) { + rec = page + rec_get_next_offs(rec, TRUE); } *current_rec = rec; @@ -1284,12 +1283,6 @@ page_cur_insert_rec_zip( insert_rec = page_cur_insert_rec_zip_reorg( current_rec, block, index, insert_rec, page, page_zip, mtr); -#ifdef UNIV_DEBUG - if (insert_rec) { - rec_offs_make_valid( - insert_rec, index, offsets); - } -#endif /* UNIV_DEBUG */ } return(insert_rec); diff --git a/storage/innodb_plugin/page/page0page.c b/storage/innodb_plugin/page/page0page.c index 1b9470acbbc..93869e997b5 100644 --- a/storage/innodb_plugin/page/page0page.c +++ b/storage/innodb_plugin/page/page0page.c @@ -1475,54 +1475,55 @@ page_dir_balance_slot( } } +#ifndef UNIV_HOTBACKUP /************************************************************//** -Returns the nth record of the record list. -This is the inverse function of page_rec_get_n_recs_before(). -@return nth record */ +Returns the middle record of the record list. If there are an even number +of records in the list, returns the first record of the upper half-list. +@return middle record */ UNIV_INTERN -const rec_t* -page_rec_get_nth_const( -/*===================*/ - const page_t* page, /*!< in: page */ - ulint nth) /*!< in: nth record */ +rec_t* +page_get_middle_rec( +/*================*/ + page_t* page) /*!< in: page */ { - const page_dir_slot_t* slot; + page_dir_slot_t* slot; + ulint middle; ulint i; ulint n_owned; - const rec_t* rec; + ulint count; + rec_t* rec; - ut_ad(nth < UNIV_PAGE_SIZE / (REC_N_NEW_EXTRA_BYTES + 1)); + /* This many records we must leave behind */ + middle = (page_get_n_recs(page) + PAGE_HEAP_NO_USER_LOW) / 2; + + count = 0; for (i = 0;; i++) { slot = page_dir_get_nth_slot(page, i); n_owned = page_dir_slot_get_n_owned(slot); - if (n_owned > nth) { + if (count + n_owned > middle) { break; } else { - nth -= n_owned; + count += n_owned; } } ut_ad(i > 0); slot = page_dir_get_nth_slot(page, i - 1); - rec = page_dir_slot_get_rec(slot); + rec = (rec_t*) page_dir_slot_get_rec(slot); + rec = page_rec_get_next(rec); - if (page_is_comp(page)) { - do { - rec = page_rec_get_next_low(rec, TRUE); - ut_ad(rec); - } while (nth--); - } else { - do { - rec = page_rec_get_next_low(rec, FALSE); - ut_ad(rec); - } while (nth--); + /* There are now count records behind rec */ + + for (i = 0; i < middle - count; i++) { + rec = page_rec_get_next(rec); } return(rec); } +#endif /* !UNIV_HOTBACKUP */ /***************************************************************//** Returns the number of records before the given record in chain. @@ -1584,7 +1585,6 @@ page_rec_get_n_recs_before( n--; ut_ad(n >= 0); - ut_ad(n < UNIV_PAGE_SIZE / (REC_N_NEW_EXTRA_BYTES + 1)); return((ulint) n); } diff --git a/storage/innodb_plugin/row/row0ins.c b/storage/innodb_plugin/row/row0ins.c index b14ae3de2bd..2cbe1e13edc 100644 --- a/storage/innodb_plugin/row/row0ins.c +++ b/storage/innodb_plugin/row/row0ins.c @@ -345,9 +345,9 @@ row_ins_clust_index_entry_by_modify( return(DB_LOCK_TABLE_FULL); } - err = btr_cur_pessimistic_update( - BTR_KEEP_POS_FLAG, cursor, heap, big_rec, update, - 0, thr, mtr); + err = btr_cur_pessimistic_update(0, cursor, + heap, big_rec, update, + 0, thr, mtr); } return(err); @@ -434,11 +434,9 @@ row_ins_cascade_calc_update_vec( dict_table_t* table = foreign->foreign_table; dict_index_t* index = foreign->foreign_index; upd_t* update; - upd_field_t* ufield; dict_table_t* parent_table; dict_index_t* parent_index; upd_t* parent_update; - upd_field_t* parent_ufield; ulint n_fields_updated; ulint parent_field_no; ulint i; @@ -474,13 +472,15 @@ row_ins_cascade_calc_update_vec( dict_index_get_nth_col_no(parent_index, i)); for (j = 0; j < parent_update->n_fields; j++) { - parent_ufield = parent_update->fields + j; + const upd_field_t* parent_ufield + = &parent_update->fields[j]; if (parent_ufield->field_no == parent_field_no) { ulint min_size; const dict_col_t* col; ulint ufield_len; + upd_field_t* ufield; col = dict_index_get_nth_col(index, i); @@ -493,6 +493,8 @@ row_ins_cascade_calc_update_vec( ufield->field_no = dict_table_get_nth_col_pos( table, dict_col_get_no(col)); + + ufield->orig_len = 0; ufield->exp = NULL; ufield->new_val = parent_ufield->new_val; @@ -993,10 +995,9 @@ row_ins_foreign_check_on_constraint( goto nonstandard_exit_func; } - if ((node->is_delete - && (foreign->type & DICT_FOREIGN_ON_DELETE_SET_NULL)) - || (!node->is_delete - && (foreign->type & DICT_FOREIGN_ON_UPDATE_SET_NULL))) { + if (node->is_delete + ? (foreign->type & DICT_FOREIGN_ON_DELETE_SET_NULL) + : (foreign->type & DICT_FOREIGN_ON_UPDATE_SET_NULL)) { /* Build the appropriate update vector which sets foreign->n_fields first fields in rec to SQL NULL */ @@ -1005,6 +1006,8 @@ row_ins_foreign_check_on_constraint( update->info_bits = 0; update->n_fields = foreign->n_fields; + UNIV_MEM_INVALID(update->fields, + update->n_fields * sizeof *update->fields); for (i = 0; i < foreign->n_fields; i++) { upd_field_t* ufield = &update->fields[i]; @@ -1986,7 +1989,6 @@ row_ins_index_entry_low( ulint modify = 0; /* remove warning */ rec_t* insert_rec; rec_t* rec; - ulint* offsets; ulint err; ulint n_unique; big_rec_t* big_rec = NULL; @@ -2090,51 +2092,6 @@ row_ins_index_entry_low( err = row_ins_clust_index_entry_by_modify( mode, &cursor, &heap, &big_rec, entry, thr, &mtr); - - if (big_rec) { - ut_a(err == DB_SUCCESS); - /* Write out the externally stored - columns, but allocate the pages and - write the pointers using the - mini-transaction of the record update. - If any pages were freed in the update, - temporarily mark them allocated so - that off-page columns will not - overwrite them. We must do this, - because we will write the redo log for - the BLOB writes before writing the - redo log for the record update. Thus, - redo log application at crash recovery - will see BLOBs being written to free pages. */ - - btr_mark_freed_leaves(index, &mtr, TRUE); - - rec = btr_cur_get_rec(&cursor); - offsets = rec_get_offsets( - rec, index, NULL, - ULINT_UNDEFINED, &heap); - - err = btr_store_big_rec_extern_fields( - index, btr_cur_get_block(&cursor), - rec, offsets, big_rec, &mtr, - FALSE, &mtr); - /* If writing big_rec fails (for - example, because of DB_OUT_OF_FILE_SPACE), - the record will be corrupted. Even if - we did not update any externally - stored columns, our update could cause - the record to grow so that a - non-updated column was selected for - external storage. This non-update - would not have been written to the - undo log, and thus the record cannot - be rolled back. */ - ut_a(err == DB_SUCCESS); - /* Free the pages again - in order to avoid a leak. */ - btr_mark_freed_leaves(index, &mtr, FALSE); - goto stored_big_rec; - } } else { ut_ad(!n_ext); err = row_ins_sec_index_entry_by_modify( @@ -2163,6 +2120,8 @@ function_exit: mtr_commit(&mtr); if (UNIV_LIKELY_NULL(big_rec)) { + rec_t* rec; + ulint* offsets; mtr_start(&mtr); btr_cur_search_to_nth_level(index, 0, entry, PAGE_CUR_LE, @@ -2174,9 +2133,8 @@ function_exit: err = btr_store_big_rec_extern_fields( index, btr_cur_get_block(&cursor), - rec, offsets, big_rec, &mtr, FALSE, NULL); + rec, offsets, &mtr, FALSE, big_rec); -stored_big_rec: if (modify) { dtuple_big_rec_free(big_rec); } else { diff --git a/storage/innodb_plugin/row/row0row.c b/storage/innodb_plugin/row/row0row.c index e476ffae84e..9cdbbe76e04 100644 --- a/storage/innodb_plugin/row/row0row.c +++ b/storage/innodb_plugin/row/row0row.c @@ -243,20 +243,19 @@ row_build( } #if defined UNIV_DEBUG || defined UNIV_BLOB_LIGHT_DEBUG - if (rec_offs_any_null_extern(rec, offsets)) { - /* This condition can occur during crash recovery - before trx_rollback_active() has completed execution. - - This condition is possible if the server crashed - during an insert or update-by-delete-and-insert before - btr_store_big_rec_extern_fields() did mtr_commit() all - BLOB pointers to the freshly inserted clustered index - record. */ - ut_a(trx_assert_recovered( - row_get_rec_trx_id(rec, index, offsets))); - ut_a(trx_undo_roll_ptr_is_insert( - row_get_rec_roll_ptr(rec, index, offsets))); - } + /* This condition can occur during crash recovery before + trx_rollback_active() has completed execution. + + This condition is possible if the server crashed + during an insert or update before + btr_store_big_rec_extern_fields() did mtr_commit() all + BLOB pointers to the clustered index record. + + If the record contains a null BLOB pointer, look up the + transaction that holds the implicit lock on this record, and + assert that it was recovered (and will soon be rolled back). */ + ut_a(!rec_offs_any_null_extern(rec, offsets) + || trx_assert_recovered(row_get_rec_trx_id(rec, index, offsets))); #endif /* UNIV_DEBUG || UNIV_BLOB_LIGHT_DEBUG */ if (type != ROW_COPY_POINTERS) { diff --git a/storage/innodb_plugin/row/row0upd.c b/storage/innodb_plugin/row/row0upd.c index 05856687015..072ca1d7b54 100644 --- a/storage/innodb_plugin/row/row0upd.c +++ b/storage/innodb_plugin/row/row0upd.c @@ -1969,46 +1969,28 @@ row_upd_clust_rec( ut_ad(!rec_get_deleted_flag(btr_pcur_get_rec(pcur), dict_table_is_comp(index->table))); - err = btr_cur_pessimistic_update( - BTR_NO_LOCKING_FLAG | BTR_KEEP_POS_FLAG, btr_cur, - &heap, &big_rec, node->update, node->cmpl_info, thr, mtr); - if (big_rec) { + err = btr_cur_pessimistic_update(BTR_NO_LOCKING_FLAG, btr_cur, + &heap, &big_rec, node->update, + node->cmpl_info, thr, mtr); + mtr_commit(mtr); + + if (err == DB_SUCCESS && big_rec) { ulint offsets_[REC_OFFS_NORMAL_SIZE]; rec_t* rec; rec_offs_init(offsets_); - ut_a(err == DB_SUCCESS); - /* Write out the externally stored columns, but - allocate the pages and write the pointers using the - mini-transaction of the record update. If any pages - were freed in the update, temporarily mark them - allocated so that off-page columns will not overwrite - them. We must do this, because we write the redo log - for the BLOB writes before writing the redo log for - the record update. */ - - btr_mark_freed_leaves(index, mtr, TRUE); + mtr_start(mtr); + + ut_a(btr_pcur_restore_position(BTR_MODIFY_TREE, pcur, mtr)); rec = btr_cur_get_rec(btr_cur); err = btr_store_big_rec_extern_fields( index, btr_cur_get_block(btr_cur), rec, rec_get_offsets(rec, index, offsets_, ULINT_UNDEFINED, &heap), - big_rec, mtr, TRUE, mtr); - /* If writing big_rec fails (for example, because of - DB_OUT_OF_FILE_SPACE), the record will be corrupted. - Even if we did not update any externally stored - columns, our update could cause the record to grow so - that a non-updated column was selected for external - storage. This non-update would not have been written - to the undo log, and thus the record cannot be rolled - back. */ - ut_a(err == DB_SUCCESS); - /* Free the pages again in order to avoid a leak. */ - btr_mark_freed_leaves(index, mtr, FALSE); + mtr, TRUE, big_rec); + mtr_commit(mtr); } - mtr_commit(mtr); - if (UNIV_LIKELY_NULL(heap)) { mem_heap_free(heap); } diff --git a/storage/innodb_plugin/trx/trx0undo.c b/storage/innodb_plugin/trx/trx0undo.c index c36f55fbd9c..746f0808643 100644 --- a/storage/innodb_plugin/trx/trx0undo.c +++ b/storage/innodb_plugin/trx/trx0undo.c @@ -912,7 +912,7 @@ trx_undo_add_page( page_no = fseg_alloc_free_page_general(header_page + TRX_UNDO_SEG_HDR + TRX_UNDO_FSEG_HEADER, undo->top_page_no + 1, FSP_UP, - TRUE, mtr, mtr); + TRUE, mtr); fil_space_release_free_extents(undo->space, n_reserved); |