diff options
Diffstat (limited to 'storage/xtradb/row')
-rw-r--r-- | storage/xtradb/row/row0ins.c | 73 | ||||
-rw-r--r-- | storage/xtradb/row/row0mysql.c | 45 | ||||
-rw-r--r-- | storage/xtradb/row/row0row.c | 109 | ||||
-rw-r--r-- | storage/xtradb/row/row0sel.c | 99 | ||||
-rw-r--r-- | storage/xtradb/row/row0upd.c | 86 | ||||
-rw-r--r-- | storage/xtradb/row/row0vers.c | 18 |
6 files changed, 341 insertions, 89 deletions
diff --git a/storage/xtradb/row/row0ins.c b/storage/xtradb/row/row0ins.c index efcea62f212..0e62185c02e 100644 --- a/storage/xtradb/row/row0ins.c +++ b/storage/xtradb/row/row0ins.c @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1996, 2010, Innobase Oy. All Rights Reserved. +Copyright (c) 1996, 2011, Oracle and/or its affiliates. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -345,9 +345,9 @@ row_ins_clust_index_entry_by_modify( return(DB_LOCK_TABLE_FULL); } - err = btr_cur_pessimistic_update(0, cursor, - heap, big_rec, update, - 0, thr, mtr); + err = btr_cur_pessimistic_update( + BTR_KEEP_POS_FLAG, cursor, heap, big_rec, update, + 0, thr, mtr); } return(err); @@ -1512,6 +1512,11 @@ exit_func: if (UNIV_LIKELY_NULL(heap)) { mem_heap_free(heap); } + + if (trx->fake_changes) { + err = DB_SUCCESS; + } + return(err); } @@ -1992,6 +1997,7 @@ row_ins_index_entry_low( ulint modify = 0; /* remove warning */ rec_t* insert_rec; rec_t* rec; + ulint* offsets; ulint err; ulint n_unique; big_rec_t* big_rec = NULL; @@ -2013,7 +2019,7 @@ row_ins_index_entry_low( } btr_cur_search_to_nth_level(index, 0, entry, PAGE_CUR_LE, - mode | BTR_INSERT | ignore_sec_unique, + thr_get_trx(thr)->fake_changes ? BTR_SEARCH_LEAF : (mode | BTR_INSERT | ignore_sec_unique), &cursor, 0, __FILE__, __LINE__, &mtr); if (cursor.flag == BTR_CUR_INSERT_TO_IBUF) { @@ -2073,7 +2079,7 @@ row_ins_index_entry_low( btr_cur_search_to_nth_level(index, 0, entry, PAGE_CUR_LE, - mode | BTR_INSERT, + thr_get_trx(thr)->fake_changes ? BTR_SEARCH_LEAF : (mode | BTR_INSERT), &cursor, 0, __FILE__, __LINE__, &mtr); } @@ -2097,6 +2103,42 @@ row_ins_index_entry_low( err = row_ins_clust_index_entry_by_modify( mode, &cursor, &heap, &big_rec, entry, thr, &mtr); + + if (big_rec) { + ut_a(err == DB_SUCCESS); + /* Write out the externally stored + columns while still x-latching + index->lock and block->lock. We have + to mtr_commit(mtr) first, so that the + redo log will be written in the + correct order. Otherwise, we would run + into trouble on crash recovery if mtr + freed B-tree pages on which some of + the big_rec fields will be written. */ + btr_cur_mtr_commit_and_start(&cursor, &mtr); + + rec = btr_cur_get_rec(&cursor); + offsets = rec_get_offsets( + rec, index, NULL, + ULINT_UNDEFINED, &heap); + + err = btr_store_big_rec_extern_fields( + index, btr_cur_get_block(&cursor), + rec, offsets, &mtr, FALSE, big_rec); + /* If writing big_rec fails (for + example, because of DB_OUT_OF_FILE_SPACE), + the record will be corrupted. Even if + we did not update any externally + stored columns, our update could cause + the record to grow so that a + non-updated column was selected for + external storage. This non-update + would not have been written to the + undo log, and thus the record cannot + be rolled back. */ + ut_a(err == DB_SUCCESS); + goto stored_big_rec; + } } else { ut_ad(!n_ext); err = row_ins_sec_index_entry_by_modify( @@ -2125,8 +2167,22 @@ function_exit: mtr_commit(&mtr); if (UNIV_LIKELY_NULL(big_rec)) { - rec_t* rec; - ulint* offsets; + + if (thr_get_trx(thr)->fake_changes) { + /* skip store extern */ + if (modify) { + dtuple_big_rec_free(big_rec); + } else { + dtuple_convert_back_big_rec(index, entry, big_rec); + } + + if (UNIV_LIKELY_NULL(heap)) { + mem_heap_free(heap); + } + + return(err); + } + mtr_start(&mtr); btr_cur_search_to_nth_level(index, 0, entry, PAGE_CUR_LE, @@ -2140,6 +2196,7 @@ function_exit: index, btr_cur_get_block(&cursor), rec, offsets, &mtr, FALSE, big_rec); +stored_big_rec: if (modify) { dtuple_big_rec_free(big_rec); } else { diff --git a/storage/xtradb/row/row0mysql.c b/storage/xtradb/row/row0mysql.c index 1b97cbb0009..cb003ad62e0 100644 --- a/storage/xtradb/row/row0mysql.c +++ b/storage/xtradb/row/row0mysql.c @@ -1190,6 +1190,7 @@ run_again: prebuilt->table->stat_n_rows--; } + if (!(trx->fake_changes)) row_update_statistics_if_needed(prebuilt->table); trx->op_info = ""; @@ -1450,6 +1451,7 @@ run_again: that changes indexed columns, UPDATEs that change only non-indexed columns would not affect statistics. */ if (node->is_delete || !(node->cmpl_info & UPD_NODE_NO_ORD_CHANGE)) { + if (!(trx->fake_changes)) row_update_statistics_if_needed(prebuilt->table); } @@ -1668,6 +1670,7 @@ run_again: srv_n_rows_updated++; } + if (!(trx->fake_changes)) row_update_statistics_if_needed(table); return(err); @@ -2556,10 +2559,29 @@ row_discard_tablespace_for_mysql( err = DB_ERROR; } else { + dict_index_t* index; + /* Set the flag which tells that now it is legal to IMPORT a tablespace for this table */ table->tablespace_discarded = TRUE; table->ibd_file_missing = TRUE; + + /* check adaptive hash entries */ + index = dict_table_get_first_index(table); + while (index) { + ulint ref_count = btr_search_info_get_ref_count(index->search_info); + if (ref_count) { + fprintf(stderr, "InnoDB: Warning:" + " hash index ref_count (%lu) is not zero" + " after fil_discard_tablespace().\n" + "index: \"%s\"" + " table: \"%s\"\n", + ref_count, + index->name, + table->name); + } + index = dict_table_get_next_index(index); + } } } @@ -2597,6 +2619,11 @@ row_import_tablespace_for_mysql( current_lsn = log_get_lsn(); + /* Enlarge the fatal lock wait timeout during import. */ + mutex_enter(&kernel_mutex); + srv_fatal_semaphore_wait_threshold += 7200; /* 2 hours */ + mutex_exit(&kernel_mutex); + /* It is possible, though very improbable, that the lsn's in the tablespace to be imported have risen above the current system lsn, if a lengthy purge, ibuf merge, or rollback was performed on a backup @@ -2708,6 +2735,11 @@ funct_exit: trx->op_info = ""; + /* Restore the fatal semaphore wait timeout */ + mutex_enter(&kernel_mutex); + srv_fatal_semaphore_wait_threshold -= 7200; /* 2 hours */ + mutex_exit(&kernel_mutex); + return((int) err); } @@ -2901,6 +2933,19 @@ row_truncate_table_for_mysql( table->space = space; index = dict_table_get_first_index(table); do { + ulint ref_count = btr_search_info_get_ref_count(index->search_info); + /* check adaptive hash entries */ + if (ref_count) { + fprintf(stderr, "InnoDB: Warning:" + " hash index ref_count (%lu) is not zero" + " after fil_discard_tablespace().\n" + "index: \"%s\"" + " table: \"%s\"\n", + ref_count, + index->name, + table->name); + } + index->space = space; index = dict_table_get_next_index(index); } while (index); diff --git a/storage/xtradb/row/row0row.c b/storage/xtradb/row/row0row.c index 0783d482f76..cea70e98dee 100644 --- a/storage/xtradb/row/row0row.c +++ b/storage/xtradb/row/row0row.c @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1996, 2010, Innobase Oy. All Rights Reserved. +Copyright (c) 1996, 2011, Oracle and/or its affiliates. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -47,35 +47,6 @@ Created 4/20/1996 Heikki Tuuri #include "read0read.h" #include "ut0mem.h" -/*********************************************************************//** -Gets the offset of trx id field, in bytes relative to the origin of -a clustered index record. -@return offset of DATA_TRX_ID */ -UNIV_INTERN -ulint -row_get_trx_id_offset( -/*==================*/ - const rec_t* rec __attribute__((unused)), - /*!< in: record */ - dict_index_t* index, /*!< in: clustered index */ - const ulint* offsets)/*!< in: rec_get_offsets(rec, index) */ -{ - ulint pos; - ulint offset; - ulint len; - - ut_ad(dict_index_is_clust(index)); - ut_ad(rec_offs_validate(rec, index, offsets)); - - pos = dict_index_get_sys_col_pos(index, DATA_TRX_ID); - - offset = rec_get_nth_field_offs(offsets, pos, &len); - - ut_ad(len == DATA_TRX_ID_LEN); - - return(offset); -} - /*****************************************************************//** When an insert or purge to a table is performed, this function builds the entry to be inserted into or purged from an index on the table. @@ -130,12 +101,27 @@ row_build_index_entry( dfield_copy(dfield, dfield2); - if (dfield_is_null(dfield) || ind_field->prefix_len == 0) { + if (dfield_is_null(dfield)) { + continue; + } + + if (ind_field->prefix_len == 0 + && (!dfield_is_ext(dfield) + || dict_index_is_clust(index))) { + /* The dfield_copy() above suffices for + columns that are stored in-page, or for + clustered index record columns that are not + part of a column prefix in the PRIMARY KEY. */ continue; } - /* If a column prefix index, take only the prefix. - Prefix-indexed columns may be externally stored. */ + /* If the column is stored externally (off-page) in + the clustered index, it must be an ordering field in + the secondary index. In the Antelope format, only + prefix-indexed columns may be stored off-page in the + clustered index record. In the Barracuda format, also + fully indexed long CHAR or VARCHAR columns may be + stored off-page. */ ut_ad(col->ord_part); if (UNIV_LIKELY_NULL(ext)) { @@ -148,17 +134,41 @@ row_build_index_entry( } dfield_set_data(dfield, buf, len); } + + if (ind_field->prefix_len == 0) { + /* In the Barracuda format + (ROW_FORMAT=DYNAMIC or + ROW_FORMAT=COMPRESSED), we can have a + secondary index on an entire column + that is stored off-page in the + clustered index. As this is not a + prefix index (prefix_len == 0), + include the entire off-page column in + the secondary index record. */ + continue; + } } else if (dfield_is_ext(dfield)) { + /* This table is either in Antelope format + (ROW_FORMAT=REDUNDANT or ROW_FORMAT=COMPACT) + or a purge record where the ordered part of + the field is not external. + In Antelope, the maximum column prefix + index length is 767 bytes, and the clustered + index record contains a 768-byte prefix of + each off-page column. */ ut_a(len >= BTR_EXTERN_FIELD_REF_SIZE); len -= BTR_EXTERN_FIELD_REF_SIZE; - ut_a(ind_field->prefix_len <= len - || dict_index_is_clust(index)); + dfield_set_len(dfield, len); } - len = dtype_get_at_most_n_mbchars( - col->prtype, col->mbminlen, col->mbmaxlen, - ind_field->prefix_len, len, dfield_get_data(dfield)); - dfield_set_len(dfield, len); + /* If a column prefix index, take only the prefix. */ + if (ind_field->prefix_len) { + len = dtype_get_at_most_n_mbchars( + col->prtype, col->mbminlen, col->mbmaxlen, + ind_field->prefix_len, len, + dfield_get_data(dfield)); + dfield_set_len(dfield, len); + } } ut_ad(dtuple_check_typed(entry)); @@ -223,6 +233,7 @@ row_build( ut_ad(index && rec && heap); ut_ad(dict_index_is_clust(index)); + ut_ad(!mutex_own(&kernel_mutex)); if (!offsets) { offsets = rec_get_offsets(rec, index, offsets_, @@ -231,6 +242,22 @@ row_build( ut_ad(rec_offs_validate(rec, index, offsets)); } +#if defined UNIV_DEBUG || defined UNIV_BLOB_LIGHT_DEBUG + /* This condition can occur during crash recovery before + trx_rollback_active() has completed execution. + + This condition is possible if the server crashed + during an insert or update before + btr_store_big_rec_extern_fields() did mtr_commit() all + BLOB pointers to the clustered index record. + + If the record contains a null BLOB pointer, look up the + transaction that holds the implicit lock on this record, and + assert that it was recovered (and will soon be rolled back). */ + ut_a(!rec_offs_any_null_extern(rec, offsets) + || trx_assert_recovered(row_get_rec_trx_id(rec, index, offsets))); +#endif /* UNIV_DEBUG || UNIV_BLOB_LIGHT_DEBUG */ + if (type != ROW_COPY_POINTERS) { /* Take a copy of rec to heap */ buf = mem_heap_alloc(heap, rec_offs_size(offsets)); @@ -431,6 +458,10 @@ row_rec_to_index_entry( rec = rec_copy(buf, rec, offsets); /* Avoid a debug assertion in rec_offs_validate(). */ rec_offs_make_valid(rec, index, offsets); +#if defined UNIV_DEBUG || defined UNIV_BLOB_LIGHT_DEBUG + } else { + ut_a(!rec_offs_any_null_extern(rec, offsets)); +#endif /* UNIV_DEBUG || UNIV_BLOB_LIGHT_DEBUG */ } entry = row_rec_to_index_entry_low(rec, index, offsets, n_ext, heap); diff --git a/storage/xtradb/row/row0sel.c b/storage/xtradb/row/row0sel.c index 1ced125b7bd..e8e9465d838 100644 --- a/storage/xtradb/row/row0sel.c +++ b/storage/xtradb/row/row0sel.c @@ -2544,6 +2544,8 @@ row_sel_field_store_in_mysql_format( ut_ad(len != UNIV_SQL_NULL); UNIV_MEM_ASSERT_RW(data, len); + UNIV_MEM_ASSERT_W(dest, templ->mysql_col_len); + UNIV_MEM_INVALID(dest, templ->mysql_col_len); switch (templ->type) { case DATA_INT: @@ -2580,14 +2582,16 @@ row_sel_field_store_in_mysql_format( dest = row_mysql_store_true_var_len( dest, len, templ->mysql_length_bytes); + /* Copy the actual data. Leave the rest of the + buffer uninitialized. */ + memcpy(dest, data, len); + break; } /* Copy the actual data */ ut_memcpy(dest, data, len); - /* Pad with trailing spaces. We pad with spaces also the - unused end of a >= 5.0.3 true VARCHAR column, just in case - MySQL expects its contents to be deterministic. */ + /* Pad with trailing spaces. */ pad_ptr = dest + len; @@ -3120,6 +3124,39 @@ sel_restore_position_for_mysql( } /********************************************************************//** +Copies a cached field for MySQL from the fetch cache. */ +static +void +row_sel_copy_cached_field_for_mysql( +/*================================*/ + byte* buf, /*!< in/out: row buffer */ + const byte* cache, /*!< in: cached row */ + const mysql_row_templ_t*templ) /*!< in: column template */ +{ + ulint len; + + buf += templ->mysql_col_offset; + cache += templ->mysql_col_offset; + + UNIV_MEM_ASSERT_W(buf, templ->mysql_col_len); + + if (templ->mysql_type == DATA_MYSQL_TRUE_VARCHAR + && templ->type != DATA_INT) { + /* Check for != DATA_INT to make sure we do + not treat MySQL ENUM or SET as a true VARCHAR! + Find the actual length of the true VARCHAR field. */ + row_mysql_read_true_varchar( + &len, cache, templ->mysql_length_bytes); + len += templ->mysql_length_bytes; + UNIV_MEM_INVALID(buf, templ->mysql_col_len); + } else { + len = templ->mysql_col_len; + } + + ut_memcpy(buf, cache, len); +} + +/********************************************************************//** Pops a cached row for MySQL from the fetch cache. */ UNIV_INLINE void @@ -3131,26 +3168,22 @@ row_sel_pop_cached_row_for_mysql( { ulint i; const mysql_row_templ_t*templ; - byte* cached_rec; + const byte* cached_rec; ut_ad(prebuilt->n_fetch_cached > 0); ut_ad(prebuilt->mysql_prefix_len <= prebuilt->mysql_row_len); + UNIV_MEM_ASSERT_W(buf, prebuilt->mysql_row_len); + + cached_rec = prebuilt->fetch_cache[prebuilt->fetch_cache_first]; + if (UNIV_UNLIKELY(prebuilt->keep_other_fields_on_keyread)) { /* Copy cache record field by field, don't touch fields that are not covered by current key */ - cached_rec = prebuilt->fetch_cache[ - prebuilt->fetch_cache_first]; for (i = 0; i < prebuilt->n_template; i++) { templ = prebuilt->mysql_template + i; -#if 0 /* Some of the cached_rec may legitimately be uninitialized. */ - UNIV_MEM_ASSERT_RW(cached_rec - + templ->mysql_col_offset, - templ->mysql_col_len); -#endif - ut_memcpy(buf + templ->mysql_col_offset, - cached_rec + templ->mysql_col_offset, - templ->mysql_col_len); + row_sel_copy_cached_field_for_mysql( + buf, cached_rec, templ); /* Copy NULL bit of the current field from cached_rec to buf */ if (templ->mysql_null_bit_mask) { @@ -3160,17 +3193,24 @@ row_sel_pop_cached_row_for_mysql( & (byte)templ->mysql_null_bit_mask; } } + } else if (prebuilt->mysql_prefix_len > 63) { + /* The record is long. Copy it field by field, in case + there are some long VARCHAR column of which only a + small length is being used. */ + UNIV_MEM_INVALID(buf, prebuilt->mysql_prefix_len); + + /* First copy the NULL bits. */ + ut_memcpy(buf, cached_rec, prebuilt->null_bitmap_len); + /* Then copy the requested fields. */ + + for (i = 0; i < prebuilt->n_template; i++) { + row_sel_copy_cached_field_for_mysql( + buf, cached_rec, prebuilt->mysql_template + i); + } + } else { + ut_memcpy(buf, cached_rec, prebuilt->mysql_prefix_len); } - else { -#if 0 /* Some of the cached_rec may legitimately be uninitialized. */ - UNIV_MEM_ASSERT_RW(prebuilt->fetch_cache - [prebuilt->fetch_cache_first], - prebuilt->mysql_prefix_len); -#endif - ut_memcpy(buf, - prebuilt->fetch_cache[prebuilt->fetch_cache_first], - prebuilt->mysql_prefix_len); - } + prebuilt->n_fetch_cached--; prebuilt->fetch_cache_first++; @@ -3964,7 +4004,13 @@ rec_loop: if (UNIV_UNLIKELY(next_offs >= UNIV_PAGE_SIZE - PAGE_DIR)) { wrong_offs: - if (srv_force_recovery == 0 || moves_up == FALSE) { + if (srv_pass_corrupt_table && !trx_sys_sys_space(index->table->space)) { + index->table->is_corrupt = TRUE; + fil_space_set_corrupt(index->table->space); + } + + if ((srv_force_recovery == 0 || moves_up == FALSE) + && srv_pass_corrupt_table <= 1) { ut_print_timestamp(stderr); buf_page_print(page_align(rec), 0); fprintf(stderr, @@ -4015,7 +4061,8 @@ wrong_offs: offsets = rec_get_offsets(rec, index, offsets, ULINT_UNDEFINED, &heap); - if (UNIV_UNLIKELY(srv_force_recovery > 0)) { + if (UNIV_UNLIKELY(srv_force_recovery > 0) + || (srv_pass_corrupt_table == 2 && index->table->is_corrupt)) { if (!rec_validate(rec, offsets) || !btr_index_rec_validate(rec, index, FALSE)) { fprintf(stderr, diff --git a/storage/xtradb/row/row0upd.c b/storage/xtradb/row/row0upd.c index a6fb266c4ed..5765d3b76d2 100644 --- a/storage/xtradb/row/row0upd.c +++ b/storage/xtradb/row/row0upd.c @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved. +Copyright (c) 1996, 2011, Oracle and/or its affiliates. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -504,14 +504,49 @@ row_upd_rec_in_place( n_fields = upd_get_n_fields(update); for (i = 0; i < n_fields; i++) { +#ifdef UNIV_BLOB_DEBUG + btr_blob_dbg_t b; + const byte* field_ref = NULL; +#endif /* UNIV_BLOB_DEBUG */ + upd_field = upd_get_nth_field(update, i); new_val = &(upd_field->new_val); ut_ad(!dfield_is_ext(new_val) == !rec_offs_nth_extern(offsets, upd_field->field_no)); +#ifdef UNIV_BLOB_DEBUG + if (dfield_is_ext(new_val)) { + ulint len; + field_ref = rec_get_nth_field(rec, offsets, i, &len); + ut_a(len != UNIV_SQL_NULL); + ut_a(len >= BTR_EXTERN_FIELD_REF_SIZE); + field_ref += len - BTR_EXTERN_FIELD_REF_SIZE; + + b.ref_page_no = page_get_page_no(page_align(rec)); + b.ref_heap_no = page_rec_get_heap_no(rec); + b.ref_field_no = i; + b.blob_page_no = mach_read_from_4( + field_ref + BTR_EXTERN_PAGE_NO); + ut_a(b.ref_field_no >= index->n_uniq); + btr_blob_dbg_rbt_delete(index, &b, "upd_in_place"); + } +#endif /* UNIV_BLOB_DEBUG */ rec_set_nth_field(rec, offsets, upd_field->field_no, dfield_get_data(new_val), dfield_get_len(new_val)); + +#ifdef UNIV_BLOB_DEBUG + if (dfield_is_ext(new_val)) { + b.blob_page_no = mach_read_from_4( + field_ref + BTR_EXTERN_PAGE_NO); + b.always_owner = b.owner = !(field_ref[BTR_EXTERN_LEN] + & BTR_EXTERN_OWNER_FLAG); + b.del = rec_get_deleted_flag( + rec, rec_offs_comp(offsets)); + + btr_blob_dbg_rbt_insert(index, &b, "upd_in_place"); + } +#endif /* UNIV_BLOB_DEBUG */ } if (UNIV_LIKELY_NULL(page_zip)) { @@ -1556,8 +1591,9 @@ row_upd_sec_index_entry( mtr_start(&mtr); - found = row_search_index_entry(index, entry, BTR_MODIFY_LEAF, &pcur, - &mtr); + found = row_search_index_entry(index, entry, + trx->fake_changes ? BTR_SEARCH_LEAF : BTR_MODIFY_LEAF, + &pcur, &mtr); btr_cur = btr_pcur_get_btr_cur(&pcur); rec = btr_cur_get_rec(btr_cur); @@ -1787,9 +1823,11 @@ row_upd_clust_rec_by_insert( the previous invocation of this function. Mark the off-page columns in the entry inherited. */ + if (!(trx->fake_changes)) { change_ownership = row_upd_clust_rec_by_insert_inherit( NULL, NULL, entry, node->update); ut_a(change_ownership); + } /* fall through */ case UPD_NODE_INSERT_CLUSTERED: /* A lock wait occurred in row_ins_index_entry() in @@ -1819,7 +1857,7 @@ err_exit: delete-marked old record, mark them disowned by the old record and owned by the new entry. */ - if (rec_offs_any_extern(offsets)) { + if (rec_offs_any_extern(offsets) && !(trx->fake_changes)) { change_ownership = row_upd_clust_rec_by_insert_inherit( rec, offsets, entry, node->update); @@ -1947,33 +1985,50 @@ row_upd_clust_rec( the same transaction do not modify the record in the meantime. Therefore we can assert that the restoration of the cursor succeeds. */ - ut_a(btr_pcur_restore_position(BTR_MODIFY_TREE, pcur, mtr)); + ut_a(btr_pcur_restore_position(thr_get_trx(thr)->fake_changes ? BTR_SEARCH_LEAF : BTR_MODIFY_TREE, + pcur, mtr)); ut_ad(!rec_get_deleted_flag(btr_pcur_get_rec(pcur), dict_table_is_comp(index->table))); - err = btr_cur_pessimistic_update(BTR_NO_LOCKING_FLAG, btr_cur, - &heap, &big_rec, node->update, - node->cmpl_info, thr, mtr); - mtr_commit(mtr); - - if (err == DB_SUCCESS && big_rec) { + err = btr_cur_pessimistic_update( + BTR_NO_LOCKING_FLAG | BTR_KEEP_POS_FLAG, btr_cur, + &heap, &big_rec, node->update, node->cmpl_info, thr, mtr); + /* skip store extern for fake_changes */ + if (big_rec && !(thr_get_trx(thr)->fake_changes)) { ulint offsets_[REC_OFFS_NORMAL_SIZE]; rec_t* rec; rec_offs_init(offsets_); - mtr_start(mtr); + ut_a(err == DB_SUCCESS); + /* Write out the externally stored columns while still + x-latching index->lock and block->lock. We have to + mtr_commit(mtr) first, so that the redo log will be + written in the correct order. Otherwise, we would run + into trouble on crash recovery if mtr freed B-tree + pages on which some of the big_rec fields will be + written. */ + btr_cur_mtr_commit_and_start(btr_cur, mtr); - ut_a(btr_pcur_restore_position(BTR_MODIFY_TREE, pcur, mtr)); rec = btr_cur_get_rec(btr_cur); err = btr_store_big_rec_extern_fields( index, btr_cur_get_block(btr_cur), rec, rec_get_offsets(rec, index, offsets_, ULINT_UNDEFINED, &heap), mtr, TRUE, big_rec); - mtr_commit(mtr); + /* If writing big_rec fails (for example, because of + DB_OUT_OF_FILE_SPACE), the record will be corrupted. + Even if we did not update any externally stored + columns, our update could cause the record to grow so + that a non-updated column was selected for external + storage. This non-update would not have been written + to the undo log, and thus the record cannot be rolled + back. */ + ut_a(err == DB_SUCCESS); } + mtr_commit(mtr); + if (UNIV_LIKELY_NULL(heap)) { mem_heap_free(heap); } @@ -2082,7 +2137,8 @@ row_upd_clust_step( ut_a(pcur->rel_pos == BTR_PCUR_ON); - success = btr_pcur_restore_position(BTR_MODIFY_LEAF, pcur, mtr); + success = btr_pcur_restore_position(thr_get_trx(thr)->fake_changes ? BTR_SEARCH_LEAF : BTR_MODIFY_LEAF, + pcur, mtr); if (!success) { err = DB_RECORD_NOT_FOUND; diff --git a/storage/xtradb/row/row0vers.c b/storage/xtradb/row/row0vers.c index d4fde0b939b..8a7bb842293 100644 --- a/storage/xtradb/row/row0vers.c +++ b/storage/xtradb/row/row0vers.c @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1997, 2009, Innobase Oy. All Rights Reserved. +Copyright (c) 1997, 2011, Oracle and/or its affiliates. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -550,6 +550,11 @@ row_vers_build_for_consistent_read( /* The view already sees this version: we can copy it to in_heap and return */ +#if defined UNIV_DEBUG || defined UNIV_BLOB_LIGHT_DEBUG + ut_a(!rec_offs_any_null_extern( + version, *offsets)); +#endif /* UNIV_DEBUG || UNIV_BLOB_LIGHT_DEBUG */ + buf = mem_heap_alloc(in_heap, rec_offs_size(*offsets)); *old_vers = rec_copy(buf, version, *offsets); @@ -583,6 +588,10 @@ row_vers_build_for_consistent_read( *offsets = rec_get_offsets(prev_version, index, *offsets, ULINT_UNDEFINED, offset_heap); +#if defined UNIV_DEBUG || defined UNIV_BLOB_LIGHT_DEBUG + ut_a(!rec_offs_any_null_extern(prev_version, *offsets)); +#endif /* UNIV_DEBUG || UNIV_BLOB_LIGHT_DEBUG */ + trx_id = row_get_rec_trx_id(prev_version, index, *offsets); if (read_view_sees_trx_id(view, trx_id)) { @@ -682,6 +691,10 @@ row_vers_build_for_semi_consistent_read( /* We found a version that belongs to a committed transaction: return it. */ +#if defined UNIV_DEBUG || defined UNIV_BLOB_LIGHT_DEBUG + ut_a(!rec_offs_any_null_extern(version, *offsets)); +#endif /* UNIV_DEBUG || UNIV_BLOB_LIGHT_DEBUG */ + if (rec == version) { *old_vers = rec; err = DB_SUCCESS; @@ -739,6 +752,9 @@ row_vers_build_for_semi_consistent_read( version = prev_version; *offsets = rec_get_offsets(version, index, *offsets, ULINT_UNDEFINED, offset_heap); +#if defined UNIV_DEBUG || defined UNIV_BLOB_LIGHT_DEBUG + ut_a(!rec_offs_any_null_extern(version, *offsets)); +#endif /* UNIV_DEBUG || UNIV_BLOB_LIGHT_DEBUG */ }/* for (;;) */ if (heap) { |