diff options
author | tomas@poseidon.ndb.mysql.com <> | 2005-07-12 20:01:22 +0200 |
---|---|---|
committer | tomas@poseidon.ndb.mysql.com <> | 2005-07-12 20:01:22 +0200 |
commit | 674b6bb15140673107bacb1574e8e26f51ff002e (patch) | |
tree | 8fd7b30e9e4cce3fdfc60e700fdb2d668f2f6449 /storage/innobase | |
parent | 9743190cd90c7a51dfb23d5cbdca4b04fb7e6ae0 (diff) | |
parent | e06e06ffeb8dbcd2737720be9f78c5fe75b9c0f5 (diff) | |
download | mariadb-git-674b6bb15140673107bacb1574e8e26f51ff002e.tar.gz |
Merge
Diffstat (limited to 'storage/innobase')
23 files changed, 528 insertions, 243 deletions
diff --git a/storage/innobase/btr/btr0btr.c b/storage/innobase/btr/btr0btr.c index 2d84586216a..c27fb73ff8d 100644 --- a/storage/innobase/btr/btr0btr.c +++ b/storage/innobase/btr/btr0btr.c @@ -143,7 +143,7 @@ btr_root_get( root_page_no = dict_tree_get_page(tree); root = btr_page_get(space, root_page_no, RW_X_LATCH, mtr); - ut_a(!!page_is_comp(root) == + ut_a((ibool)!!page_is_comp(root) == UT_LIST_GET_FIRST(tree->tree_indexes)->table->comp); return(root); @@ -2014,7 +2014,7 @@ btr_compress( page = btr_cur_get_page(cursor); tree = btr_cur_get_tree(cursor); comp = page_is_comp(page); - ut_a(!!comp == cursor->index->table->comp); + ut_a((ibool)!!comp == cursor->index->table->comp); ut_ad(mtr_memo_contains(mtr, dict_tree_get_lock(tree), MTR_MEMO_X_LOCK)); @@ -2508,7 +2508,7 @@ btr_index_rec_validate( return(TRUE); } - if (UNIV_UNLIKELY(!!page_is_comp(page) != index->table->comp)) { + if (UNIV_UNLIKELY((ibool)!!page_is_comp(page) != index->table->comp)) { btr_index_rec_validate_report(page, rec, index); fprintf(stderr, "InnoDB: compact flag=%lu, should be %lu\n", (ulong) !!page_is_comp(page), diff --git a/storage/innobase/btr/btr0cur.c b/storage/innobase/btr/btr0cur.c index 4ae27f007d6..f81cce5b8e9 100644 --- a/storage/innobase/btr/btr0cur.c +++ b/storage/innobase/btr/btr0cur.c @@ -316,7 +316,9 @@ btr_cur_search_to_nth_level( if (btr_search_latch.writer == RW_LOCK_NOT_LOCKED && latch_mode <= BTR_MODIFY_LEAF && info->last_hash_succ && !estimate +#ifdef PAGE_CUR_LE_OR_EXTENDS && mode != PAGE_CUR_LE_OR_EXTENDS +#endif /* PAGE_CUR_LE_OR_EXTENDS */ && srv_use_adaptive_hash_indexes && btr_search_guess_on_hash(index, info, tuple, mode, latch_mode, cursor, @@ -390,9 +392,12 @@ btr_cur_search_to_nth_level( page_mode = PAGE_CUR_LE; break; default: - ut_ad(mode == PAGE_CUR_L - || mode == PAGE_CUR_LE +#ifdef PAGE_CUR_LE_OR_EXTENDS + ut_ad(mode == PAGE_CUR_L || mode == PAGE_CUR_LE || mode == PAGE_CUR_LE_OR_EXTENDS); +#else /* PAGE_CUR_LE_OR_EXTENDS */ + ut_ad(mode == PAGE_CUR_L || mode == PAGE_CUR_LE); +#endif /* PAGE_CUR_LE_OR_EXTENDS */ page_mode = mode; break; } @@ -507,7 +512,7 @@ retry_page_get: /* x-latch the page */ page = btr_page_get(space, page_no, RW_X_LATCH, mtr); - ut_a(!!page_is_comp(page) + ut_a((ibool)!!page_is_comp(page) == index->table->comp); } @@ -1385,7 +1390,7 @@ btr_cur_parse_update_in_place( goto func_exit; } - ut_a(!!page_is_comp(page) == index->table->comp); + ut_a((ibool)!!page_is_comp(page) == index->table->comp); rec = page + rec_offset; /* We do not need to reserve btr_search_latch, as the page is only diff --git a/storage/innobase/configure.in b/storage/innobase/configure.in index baf11272ab9..c56bd8274c4 100644 --- a/storage/innobase/configure.in +++ b/storage/innobase/configure.in @@ -117,6 +117,13 @@ case "$target" in CFLAGS="$CFLAGS -DUNIV_MUST_NOT_INLINE";; esac +# must go in pair with AR as set by MYSQL_CHECK_AR +if test -z "$ARFLAGS" +then + ARFLAGS="cru" +fi +AC_SUBST(ARFLAGS) + AC_OUTPUT(Makefile os/Makefile ut/Makefile btr/Makefile dnl buf/Makefile data/Makefile dnl dict/Makefile dyn/Makefile dnl diff --git a/storage/innobase/fil/fil0fil.c b/storage/innobase/fil/fil0fil.c index 299b55f3d2b..20f522c1a60 100644 --- a/storage/innobase/fil/fil0fil.c +++ b/storage/innobase/fil/fil0fil.c @@ -3410,9 +3410,9 @@ fil_extend_space_to_desired_size( fil_space_t* space; byte* buf2; byte* buf; + ulint buf_size; ulint start_page_no; ulint file_start_page_no; - ulint n_pages; ulint offset_high; ulint offset_low; ibool success = TRUE; @@ -3437,22 +3437,20 @@ fil_extend_space_to_desired_size( fil_node_prepare_for_io(node, system, space); - /* Extend 1 MB at a time */ - - buf2 = mem_alloc(1024 * 1024 + UNIV_PAGE_SIZE); - buf = ut_align(buf2, UNIV_PAGE_SIZE); - - memset(buf, '\0', 1024 * 1024); - start_page_no = space->size; file_start_page_no = space->size - node->size; - while (start_page_no < size_after_extend) { - n_pages = size_after_extend - start_page_no; + /* Extend at most 64 pages at a time */ + buf_size = ut_min(64, size_after_extend - start_page_no) + * UNIV_PAGE_SIZE; + buf2 = mem_alloc(buf_size + UNIV_PAGE_SIZE); + buf = ut_align(buf2, UNIV_PAGE_SIZE); - if (n_pages > (1024 * 1024) / UNIV_PAGE_SIZE) { - n_pages = (1024 * 1024) / UNIV_PAGE_SIZE; - } + memset(buf, 0, buf_size); + + while (start_page_no < size_after_extend) { + ulint n_pages = ut_min(buf_size / UNIV_PAGE_SIZE, + size_after_extend - start_page_no); offset_high = (start_page_no - file_start_page_no) / (4096 * ((1024 * 1024) / UNIV_PAGE_SIZE)); @@ -4034,7 +4032,7 @@ fil_aio_wait( if (os_aio_use_native_aio) { srv_set_io_thread_op_info(segment, "native aio handle"); #ifdef WIN_ASYNC_IO - ret = os_aio_windows_handle(segment, 0, (void**) &fil_node, + ret = os_aio_windows_handle(segment, 0, &fil_node, &message, &type); #elif defined(POSIX_ASYNC_IO) ret = os_aio_posix_handle(segment, &fil_node, &message); diff --git a/storage/innobase/ibuf/ibuf0ibuf.c b/storage/innobase/ibuf/ibuf0ibuf.c index 712d43f916c..d7fa48b6e66 100644 --- a/storage/innobase/ibuf/ibuf0ibuf.c +++ b/storage/innobase/ibuf/ibuf0ibuf.c @@ -2810,7 +2810,7 @@ ibuf_insert_to_index_page( ut_ad(ibuf_inside()); ut_ad(dtuple_check_typed(entry)); - if (UNIV_UNLIKELY(index->table->comp != !!page_is_comp(page))) { + if (UNIV_UNLIKELY(index->table->comp != (ibool)!!page_is_comp(page))) { fputs( "InnoDB: Trying to insert a record from the insert buffer to an index page\n" "InnoDB: but the 'compact' flag does not match!\n", stderr); diff --git a/storage/innobase/include/os0file.h b/storage/innobase/include/os0file.h index e75281dd93c..362e3552411 100644 --- a/storage/innobase/include/os0file.h +++ b/storage/innobase/include/os0file.h @@ -373,7 +373,7 @@ os_file_get_size_as_iblonglong( /* out: size in bytes, -1 if error */ os_file_t file); /* in: handle to a file */ /*************************************************************************** -Sets a file size. This function can be used to extend or truncate a file. */ +Write the specified number of zeros to a newly created file. */ ibool os_file_set_size( diff --git a/storage/innobase/include/page0cur.h b/storage/innobase/include/page0cur.h index e89e740e775..b03302b0e77 100644 --- a/storage/innobase/include/page0cur.h +++ b/storage/innobase/include/page0cur.h @@ -26,11 +26,13 @@ Created 10/4/1994 Heikki Tuuri #define PAGE_CUR_GE 2 #define PAGE_CUR_L 3 #define PAGE_CUR_LE 4 -#define PAGE_CUR_LE_OR_EXTENDS 5 /* This is a search mode used in +/*#define PAGE_CUR_LE_OR_EXTENDS 5*/ /* This is a search mode used in "column LIKE 'abc%' ORDER BY column DESC"; we have to find strings which are <= 'abc' or which extend it */ -#define PAGE_CUR_DBG 6 +#ifdef UNIV_SEARCH_DEBUG +# define PAGE_CUR_DBG 6 /* As PAGE_CUR_LE, but skips search shortcut */ +#endif /* UNIV_SEARCH_DEBUG */ #ifdef PAGE_CUR_ADAPT # ifdef UNIV_SEARCH_PERF_STAT diff --git a/storage/innobase/include/row0mysql.h b/storage/innobase/include/row0mysql.h index 4bb9fa63cd1..4e6ff73b0f8 100644 --- a/storage/innobase/include/row0mysql.h +++ b/storage/innobase/include/row0mysql.h @@ -243,17 +243,27 @@ row_update_for_mysql( the MySQL format */ row_prebuilt_t* prebuilt); /* in: prebuilt struct in MySQL handle */ - /************************************************************************* -Does an unlock of a row for MySQL. */ +This can only be used when srv_locks_unsafe_for_binlog is TRUE. Before +calling this function we must use trx_reset_new_rec_lock_info() and +trx_register_new_rec_lock() to store the information which new record locks +really were set. This function removes a newly set lock under prebuilt->pcur, +and also under prebuilt->clust_pcur. Currently, this is only used and tested +in the case of an UPDATE or a DELETE statement, where the row lock is of the +LOCK_X type. +Thus, this implements a 'mini-rollback' that releases the latest record +locks we set. */ int row_unlock_for_mysql( /*=================*/ /* out: error code or DB_SUCCESS */ - row_prebuilt_t* prebuilt); /* in: prebuilt struct in MySQL + row_prebuilt_t* prebuilt, /* in: prebuilt struct in MySQL handle */ - + ibool has_latches_on_recs);/* TRUE if called so that we have + the latches on the records under pcur + and clust_pcur, and we do not need to + reposition the cursors. */ /************************************************************************* Creates an query graph node of 'update' type to be used in the MySQL interface. */ diff --git a/storage/innobase/include/srv0srv.h b/storage/innobase/include/srv0srv.h index 6e4241965c1..116ae7b6438 100644 --- a/storage/innobase/include/srv0srv.h +++ b/storage/innobase/include/srv0srv.h @@ -182,6 +182,7 @@ extern mutex_t* kernel_mutex_temp;/* mutex protecting the server, trx structs, #define kernel_mutex (*kernel_mutex_temp) #define SRV_MAX_N_IO_THREADS 100 +#define SRV_CONCURRENCY_THRESHOLD 20 /* Array of English strings describing the current state of an i/o handler thread */ diff --git a/storage/innobase/include/trx0trx.h b/storage/innobase/include/trx0trx.h index 8df50d6703d..146730d46f8 100644 --- a/storage/innobase/include/trx0trx.h +++ b/storage/innobase/include/trx0trx.h @@ -16,10 +16,39 @@ Created 3/26/1996 Heikki Tuuri #include "que0types.h" #include "mem0mem.h" #include "read0types.h" +#include "dict0types.h" #include "trx0xa.h" extern ulint trx_n_mysql_transactions; +/***************************************************************** +Resets the new record lock info in a transaction struct. */ +UNIV_INLINE +void +trx_reset_new_rec_lock_info( +/*========================*/ + trx_t* trx); /* in: transaction struct */ +/***************************************************************** +Registers that we have set a new record lock on an index. We only have space +to store 2 indexes! If this is called to store more than 2 indexes after +trx_reset_new_rec_lock_info(), then this function does nothing. */ +UNIV_INLINE +void +trx_register_new_rec_lock( +/*======================*/ + trx_t* trx, /* in: transaction struct */ + dict_index_t* index); /* in: trx sets a new record lock on this + index */ +/***************************************************************** +Checks if trx has set a new record lock on an index. */ +UNIV_INLINE +ibool +trx_new_rec_locks_contain( +/*======================*/ + /* out: TRUE if trx has set a new record lock + on index */ + trx_t* trx, /* in: transaction struct */ + dict_index_t* index); /* in: index */ /************************************************************************ Releases the search latch if trx has reserved it. */ @@ -495,8 +524,18 @@ struct trx_struct{ lock_t* auto_inc_lock; /* possible auto-inc lock reserved by the transaction; note that it is also in the lock list trx_locks */ - ibool trx_create_lock;/* this is TRUE if we have created a - new lock for a record accessed */ + dict_index_t* new_rec_locks[2];/* these are normally NULL; if + srv_locks_unsafe_for_binlog is TRUE, + in a cursor search, if we set a new + record lock on an index, this is set + to point to the index; this is + used in releasing the locks under the + cursors if we are performing an UPDATE + and we determine after retrieving + the row that it does not need to be + locked; thus, these can be used to + implement a 'mini-rollback' that + releases the latest record locks */ UT_LIST_NODE_T(trx_t) trx_list; /* list of transactions */ UT_LIST_NODE_T(trx_t) diff --git a/storage/innobase/include/trx0trx.ic b/storage/innobase/include/trx0trx.ic index 78e5acda148..54cf2ff331f 100644 --- a/storage/innobase/include/trx0trx.ic +++ b/storage/innobase/include/trx0trx.ic @@ -39,4 +39,60 @@ trx_start_if_not_started_low( } } +/***************************************************************** +Resets the new record lock info in a transaction struct. */ +UNIV_INLINE +void +trx_reset_new_rec_lock_info( +/*========================*/ + trx_t* trx) /* in: transaction struct */ +{ + trx->new_rec_locks[0] = NULL; + trx->new_rec_locks[1] = NULL; +} + +/***************************************************************** +Registers that we have set a new record lock on an index. We only have space +to store 2 indexes! If this is called to store more than 2 indexes after +trx_reset_new_rec_lock_info(), then this function does nothing. */ +UNIV_INLINE +void +trx_register_new_rec_lock( +/*======================*/ + trx_t* trx, /* in: transaction struct */ + dict_index_t* index) /* in: trx sets a new record lock on this + index */ +{ + if (trx->new_rec_locks[0] == NULL) { + trx->new_rec_locks[0] = index; + + return; + } + + if (trx->new_rec_locks[0] == index) { + return; + } + + if (trx->new_rec_locks[1] != NULL) { + + return; + } + + trx->new_rec_locks[1] = index; +} + +/***************************************************************** +Checks if trx has set a new record lock on an index. */ +UNIV_INLINE +ibool +trx_new_rec_locks_contain( +/*======================*/ + /* out: TRUE if trx has set a new record lock + on index */ + trx_t* trx, /* in: transaction struct */ + dict_index_t* index) /* in: index */ +{ + return(trx->new_rec_locks[0] == index + || trx->new_rec_locks[1] == index); +} diff --git a/storage/innobase/lock/lock0lock.c b/storage/innobase/lock/lock0lock.c index 48db7ced0cb..280c4871ee9 100644 --- a/storage/innobase/lock/lock0lock.c +++ b/storage/innobase/lock/lock0lock.c @@ -956,7 +956,7 @@ lock_rec_has_to_wait( cause waits */ if ((lock_is_on_supremum || (type_mode & LOCK_GAP)) - && !(type_mode & LOCK_INSERT_INTENTION)) { + && !(type_mode & LOCK_INSERT_INTENTION)) { /* Gap type locks without LOCK_INSERT_INTENTION flag do not need to wait for anything. This is because @@ -1765,10 +1765,7 @@ lock_rec_create( lock_rec_set_nth_bit(lock, heap_no); HASH_INSERT(lock_t, hash, lock_sys->rec_hash, - lock_rec_fold(space, page_no), lock); - /* Note that we have create a new lock */ - trx->trx_create_lock = TRUE; - + lock_rec_fold(space, page_no), lock); if (type_mode & LOCK_WAIT) { lock_set_lock_and_trx_wait(lock, trx); @@ -1945,15 +1942,6 @@ lock_rec_add_to_queue( if (similar_lock && !somebody_waits && !(type_mode & LOCK_WAIT)) { - /* If the nth bit of a record lock is already set then we - do not set a new lock bit, otherwice we set */ - - if (lock_rec_get_nth_bit(similar_lock, heap_no)) { - trx->trx_create_lock = FALSE; - } else { - trx->trx_create_lock = TRUE; - } - lock_rec_set_nth_bit(similar_lock, heap_no); return(similar_lock); @@ -2005,11 +1993,14 @@ lock_rec_lock_fast( lock = lock_rec_get_first_on_page(rec); trx = thr_get_trx(thr); - trx->trx_create_lock = FALSE; if (lock == NULL) { if (!impl) { lock_rec_create(mode, rec, index, trx); + + if (srv_locks_unsafe_for_binlog) { + trx_register_new_rec_lock(trx, index); + } } return(TRUE); @@ -2021,23 +2012,22 @@ lock_rec_lock_fast( } if (lock->trx != trx - || lock->type_mode != (mode | LOCK_REC) - || lock_rec_get_n_bits(lock) <= heap_no) { + || lock->type_mode != (mode | LOCK_REC) + || lock_rec_get_n_bits(lock) <= heap_no) { + return(FALSE); } if (!impl) { + /* If the nth bit of the record lock is already set then we + do not set a new lock bit, otherwise we do set */ - /* If the nth bit of a record lock is already set then we - do not set a new lock bit, otherwice we set */ - - if (lock_rec_get_nth_bit(lock, heap_no)) { - trx->trx_create_lock = FALSE; - } else { - trx->trx_create_lock = TRUE; + if (!lock_rec_get_nth_bit(lock, heap_no)) { + lock_rec_set_nth_bit(lock, heap_no); + if (srv_locks_unsafe_for_binlog) { + trx_register_new_rec_lock(trx, index); + } } - - lock_rec_set_nth_bit(lock, heap_no); } return(TRUE); @@ -2093,12 +2083,19 @@ lock_rec_lock_slow( enough already granted on the record, we have to wait. */ err = lock_rec_enqueue_waiting(mode, rec, index, thr); + + if (srv_locks_unsafe_for_binlog) { + trx_register_new_rec_lock(trx, index); + } } else { if (!impl) { /* Set the requested lock on the record */ lock_rec_add_to_queue(LOCK_REC | mode, rec, index, trx); + if (srv_locks_unsafe_for_binlog) { + trx_register_new_rec_lock(trx, index); + } } err = DB_SUCCESS; @@ -2436,8 +2433,15 @@ lock_rec_inherit_to_gap( lock = lock_rec_get_first(rec); + /* If srv_locks_unsafe_for_binlog is TRUE, we do not want locks set + by an UPDATE or a DELETE to be inherited as gap type locks. But we + DO want S-locks set by a consistency constraint to be inherited also + then. */ + while (lock != NULL) { - if (!lock_rec_get_insert_intention(lock)) { + if (!lock_rec_get_insert_intention(lock) + && !(srv_locks_unsafe_for_binlog + && lock_get_mode(lock) == LOCK_X)) { lock_rec_add_to_queue(LOCK_REC | lock_get_mode(lock) | LOCK_GAP, @@ -3069,7 +3073,7 @@ lock_update_insert( lock_rec_inherit_to_gap_if_gap_lock(rec, page_rec_get_next(rec)); lock_mutex_exit_kernel(); -} +} /***************************************************************** Updates the lock table when a record is removed. */ diff --git a/storage/innobase/log/log0recv.c b/storage/innobase/log/log0recv.c index 8d9780bfbda..42e854398ba 100644 --- a/storage/innobase/log/log0recv.c +++ b/storage/innobase/log/log0recv.c @@ -768,7 +768,8 @@ recv_parse_or_apply_log_rec_body( case MLOG_REC_INSERT: case MLOG_COMP_REC_INSERT: if (NULL != (ptr = mlog_parse_index(ptr, end_ptr, type == MLOG_COMP_REC_INSERT, &index))) { - ut_a(!page||!!page_is_comp(page)==index->table->comp); + ut_a(!page + || (ibool)!!page_is_comp(page)==index->table->comp); ptr = page_cur_parse_insert_rec(FALSE, ptr, end_ptr, index, page, mtr); } @@ -776,7 +777,8 @@ recv_parse_or_apply_log_rec_body( case MLOG_REC_CLUST_DELETE_MARK: case MLOG_COMP_REC_CLUST_DELETE_MARK: if (NULL != (ptr = mlog_parse_index(ptr, end_ptr, type == MLOG_COMP_REC_CLUST_DELETE_MARK, &index))) { - ut_a(!page||!!page_is_comp(page)==index->table->comp); + ut_a(!page + || (ibool)!!page_is_comp(page)==index->table->comp); ptr = btr_cur_parse_del_mark_set_clust_rec(ptr, end_ptr, index, page); } @@ -796,7 +798,8 @@ recv_parse_or_apply_log_rec_body( case MLOG_REC_UPDATE_IN_PLACE: case MLOG_COMP_REC_UPDATE_IN_PLACE: if (NULL != (ptr = mlog_parse_index(ptr, end_ptr, type == MLOG_COMP_REC_UPDATE_IN_PLACE, &index))) { - ut_a(!page||!!page_is_comp(page)==index->table->comp); + ut_a(!page + || (ibool)!!page_is_comp(page)==index->table->comp); ptr = btr_cur_parse_update_in_place(ptr, end_ptr, page, index); } @@ -806,7 +809,8 @@ recv_parse_or_apply_log_rec_body( if (NULL != (ptr = mlog_parse_index(ptr, end_ptr, type == MLOG_COMP_LIST_END_DELETE || type == MLOG_COMP_LIST_START_DELETE, &index))) { - ut_a(!page||!!page_is_comp(page)==index->table->comp); + ut_a(!page + || (ibool)!!page_is_comp(page)==index->table->comp); ptr = page_parse_delete_rec_list(type, ptr, end_ptr, index, page, mtr); } @@ -814,7 +818,8 @@ recv_parse_or_apply_log_rec_body( case MLOG_LIST_END_COPY_CREATED: case MLOG_COMP_LIST_END_COPY_CREATED: if (NULL != (ptr = mlog_parse_index(ptr, end_ptr, type == MLOG_COMP_LIST_END_COPY_CREATED, &index))) { - ut_a(!page||!!page_is_comp(page)==index->table->comp); + ut_a(!page + || (ibool)!!page_is_comp(page)==index->table->comp); ptr = page_parse_copy_rec_list_to_created_page(ptr, end_ptr, index, page, mtr); } @@ -822,7 +827,8 @@ recv_parse_or_apply_log_rec_body( case MLOG_PAGE_REORGANIZE: case MLOG_COMP_PAGE_REORGANIZE: if (NULL != (ptr = mlog_parse_index(ptr, end_ptr, type == MLOG_COMP_PAGE_REORGANIZE, &index))) { - ut_a(!page||!!page_is_comp(page)==index->table->comp); + ut_a(!page + || (ibool)!!page_is_comp(page)==index->table->comp); ptr = btr_parse_page_reorganize(ptr, end_ptr, index, page, mtr); } @@ -855,7 +861,8 @@ recv_parse_or_apply_log_rec_body( case MLOG_REC_DELETE: case MLOG_COMP_REC_DELETE: if (NULL != (ptr = mlog_parse_index(ptr, end_ptr, type == MLOG_COMP_REC_DELETE, &index))) { - ut_a(!page||!!page_is_comp(page)==index->table->comp); + ut_a(!page + || (ibool)!!page_is_comp(page)==index->table->comp); ptr = page_cur_parse_delete_rec(ptr, end_ptr, index, page, mtr); } diff --git a/storage/innobase/os/os0file.c b/storage/innobase/os/os0file.c index c68f5738798..48dc808e36c 100644 --- a/storage/innobase/os/os0file.c +++ b/storage/innobase/os/os0file.c @@ -1653,7 +1653,7 @@ os_file_get_size_as_iblonglong( } /*************************************************************************** -Sets a file size. This function can be used to extend or truncate a file. */ +Write the specified number of zeros to a newly created file. */ ibool os_file_set_size( @@ -1666,44 +1666,46 @@ os_file_set_size( size */ ulint size_high)/* in: most significant 32 bits of size */ { - ib_longlong offset; - ib_longlong low; - ulint n_bytes; + ib_longlong current_size; + ib_longlong desired_size; ibool ret; byte* buf; byte* buf2; + ulint buf_size; ut_a(size == (size & 0xFFFFFFFF)); - /* We use a very big 8 MB buffer in writing because Linux may be - extremely slow in fsync on 1 MB writes */ + current_size = 0; + desired_size = (ib_longlong)size + (((ib_longlong)size_high) << 32); - buf2 = ut_malloc(UNIV_PAGE_SIZE * 513); + /* Write up to 1 megabyte at a time. */ + buf_size = ut_min(64, (ulint) (desired_size / UNIV_PAGE_SIZE)) + * UNIV_PAGE_SIZE; + buf2 = ut_malloc(buf_size + UNIV_PAGE_SIZE); /* Align the buffer for possible raw i/o */ buf = ut_align(buf2, UNIV_PAGE_SIZE); /* Write buffer full of zeros */ - memset(buf, 0, UNIV_PAGE_SIZE * 512); + memset(buf, 0, buf_size); - offset = 0; - low = (ib_longlong)size + (((ib_longlong)size_high) << 32); - - if (low >= (ib_longlong)(100 * 1024 * 1024)) { + if (desired_size >= (ib_longlong)(100 * 1024 * 1024)) { fprintf(stderr, "InnoDB: Progress in MB:"); } - while (offset < low) { - if (low - offset < UNIV_PAGE_SIZE * 512) { - n_bytes = (ulint)(low - offset); - } else { - n_bytes = UNIV_PAGE_SIZE * 512; - } - + while (current_size < desired_size) { + ulint n_bytes; + + if (desired_size - current_size < (ib_longlong) buf_size) { + n_bytes = (ulint) (desired_size - current_size); + } else { + n_bytes = buf_size; + } + ret = os_file_write(name, file, buf, - (ulint)(offset & 0xFFFFFFFF), - (ulint)(offset >> 32), + (ulint)(current_size & 0xFFFFFFFF), + (ulint)(current_size >> 32), n_bytes); if (!ret) { ut_free(buf2); @@ -1711,18 +1713,18 @@ os_file_set_size( } /* Print about progress for each 100 MB written */ - if ((ib_longlong) (offset + n_bytes) / (ib_longlong)(100 * 1024 * 1024) - != offset / (ib_longlong)(100 * 1024 * 1024)) { + if ((current_size + n_bytes) / (ib_longlong)(100 * 1024 * 1024) + != current_size / (ib_longlong)(100 * 1024 * 1024)) { fprintf(stderr, " %lu00", - (ulong) ((offset + n_bytes) + (ulong) ((current_size + n_bytes) / (ib_longlong)(100 * 1024 * 1024))); } - offset += n_bytes; + current_size += n_bytes; } - if (low >= (ib_longlong)(100 * 1024 * 1024)) { + if (desired_size >= (ib_longlong)(100 * 1024 * 1024)) { fprintf(stderr, "\n"); } @@ -3296,7 +3298,7 @@ os_aio( ibool retval; BOOL ret = TRUE; DWORD len = (DWORD) n; - void* dummy_mess1; + struct fil_node_struct * dummy_mess1; void* dummy_mess2; ulint dummy_type; #endif diff --git a/storage/innobase/page/page0cur.c b/storage/innobase/page/page0cur.c index df6d898d4ac..d0b89e81787 100644 --- a/storage/innobase/page/page0cur.c +++ b/storage/innobase/page/page0cur.c @@ -47,7 +47,6 @@ page_cur_try_search_shortcut( not yet completely matched */ page_cur_t* cursor) /* out: page cursor */ { - int cmp; rec_t* rec; rec_t* next_rec; ulint low_match; @@ -79,9 +78,8 @@ page_cur_try_search_shortcut( up_match = low_match; up_bytes = low_bytes; - cmp = page_cmp_dtuple_rec_with_match(tuple, rec, offsets, &low_match, - &low_bytes); - if (cmp == -1) { + if (page_cmp_dtuple_rec_with_match(tuple, rec, offsets, + &low_match, &low_bytes) < 0) { goto exit_func; } @@ -89,9 +87,8 @@ page_cur_try_search_shortcut( offsets = rec_get_offsets(next_rec, index, offsets, dtuple_get_n_fields(tuple), &heap); - cmp = page_cmp_dtuple_rec_with_match(tuple, next_rec, offsets, - &up_match, &up_bytes); - if (cmp != -1) { + if (page_cmp_dtuple_rec_with_match(tuple, next_rec, offsets, + &up_match, &up_bytes) >= 0) { goto exit_func; } @@ -115,7 +112,7 @@ page_cur_try_search_shortcut( ut_a(*ilow_matched_fields == low_match); ut_a(*ilow_matched_bytes == low_bytes); #endif - if (next_rec != page_get_supremum_rec(page)) { + if (!page_rec_is_supremum(next_rec)) { *iup_matched_fields = up_match; *iup_matched_bytes = up_bytes; @@ -137,6 +134,7 @@ exit_func: #endif +#ifdef PAGE_CUR_LE_OR_EXTENDS /******************************************************************** Checks if the nth field in a record is a character type field which extends the nth field in tuple, i.e., the field is longer or equal in length and has @@ -185,6 +183,7 @@ page_cur_rec_field_extends( return(FALSE); } +#endif /* PAGE_CUR_LE_OR_EXTENDS */ /******************************************************************** Searches the right position for a page cursor. */ @@ -239,10 +238,17 @@ page_cur_search_with_match( && ilow_matched_fields && ilow_matched_bytes && cursor); ut_ad(dtuple_validate(tuple)); ut_ad(dtuple_check_typed(tuple)); +#ifdef UNIV_DEBUG +# ifdef PAGE_CUR_DBG + if (mode != PAGE_CUR_DBG) +# endif /* PAGE_CUR_DBG */ +# ifdef PAGE_CUR_LE_OR_EXTENDS + if (mode != PAGE_CUR_LE_OR_EXTENDS) +# endif /* PAGE_CUR_LE_OR_EXTENDS */ ut_ad((mode == PAGE_CUR_L) || (mode == PAGE_CUR_LE) - || (mode == PAGE_CUR_G) || (mode == PAGE_CUR_GE) - || (mode == PAGE_CUR_LE_OR_EXTENDS) || (mode == PAGE_CUR_DBG)); - + || (mode == PAGE_CUR_G) || (mode == PAGE_CUR_GE)); +#endif /* UNIV_DEBUG */ + page_check_dir(page); #ifdef PAGE_CUR_ADAPT @@ -261,16 +267,18 @@ page_cur_search_with_match( return; } } -/*#ifdef UNIV_SEARCH_DEBUG */ +# ifdef PAGE_CUR_DBG if (mode == PAGE_CUR_DBG) { mode = PAGE_CUR_LE; } -/*#endif */ +# endif #endif /* The following flag does not work for non-latin1 char sets because cmp_full_field does not tell how many bytes matched */ +#ifdef PAGE_CUR_LE_OR_EXTENDS ut_a(mode != PAGE_CUR_LE_OR_EXTENDS); +#endif /* PAGE_CUR_LE_OR_EXTENDS */ /* If mode PAGE_CUR_G is specified, we are trying to position the cursor to answer a query of the form "tuple < X", where tuple is @@ -308,33 +316,36 @@ page_cur_search_with_match( cmp = cmp_dtuple_rec_with_match(tuple, mid_rec, offsets, &cur_matched_fields, &cur_matched_bytes); - if (cmp == 1) { + if (UNIV_LIKELY(cmp > 0)) { +low_slot_match: low = mid; low_matched_fields = cur_matched_fields; low_matched_bytes = cur_matched_bytes; - } else if (cmp == -1) { + } else if (UNIV_LIKELY(cmp /* == -1 */)) { +#ifdef PAGE_CUR_LE_OR_EXTENDS if (mode == PAGE_CUR_LE_OR_EXTENDS && page_cur_rec_field_extends(tuple, mid_rec, offsets, cur_matched_fields)) { - low = mid; - low_matched_fields = cur_matched_fields; - low_matched_bytes = cur_matched_bytes; - } else { - up = mid; - up_matched_fields = cur_matched_fields; - up_matched_bytes = cur_matched_bytes; - } - } else if (mode == PAGE_CUR_G || mode == PAGE_CUR_LE - || mode == PAGE_CUR_LE_OR_EXTENDS) { - low = mid; - low_matched_fields = cur_matched_fields; - low_matched_bytes = cur_matched_bytes; - } else { + goto low_slot_match; + } +#endif /* PAGE_CUR_LE_OR_EXTENDS */ +up_slot_match: up = mid; up_matched_fields = cur_matched_fields; up_matched_bytes = cur_matched_bytes; + + } else if (mode == PAGE_CUR_G || mode == PAGE_CUR_LE +#ifdef PAGE_CUR_LE_OR_EXTENDS + || mode == PAGE_CUR_LE_OR_EXTENDS +#endif /* PAGE_CUR_LE_OR_EXTENDS */ + ) { + + goto low_slot_match; + } else { + + goto up_slot_match; } } @@ -360,32 +371,35 @@ page_cur_search_with_match( cmp = cmp_dtuple_rec_with_match(tuple, mid_rec, offsets, &cur_matched_fields, &cur_matched_bytes); - if (cmp == 1) { + if (UNIV_LIKELY(cmp > 0)) { +low_rec_match: low_rec = mid_rec; low_matched_fields = cur_matched_fields; low_matched_bytes = cur_matched_bytes; - } else if (cmp == -1) { + } else if (UNIV_LIKELY(cmp /* == -1 */)) { +#ifdef PAGE_CUR_LE_OR_EXTENDS if (mode == PAGE_CUR_LE_OR_EXTENDS && page_cur_rec_field_extends(tuple, mid_rec, offsets, cur_matched_fields)) { - low_rec = mid_rec; - low_matched_fields = cur_matched_fields; - low_matched_bytes = cur_matched_bytes; - } else { - up_rec = mid_rec; - up_matched_fields = cur_matched_fields; - up_matched_bytes = cur_matched_bytes; + + goto low_rec_match; } - } else if (mode == PAGE_CUR_G || mode == PAGE_CUR_LE - || mode == PAGE_CUR_LE_OR_EXTENDS) { - low_rec = mid_rec; - low_matched_fields = cur_matched_fields; - low_matched_bytes = cur_matched_bytes; - } else { +#endif /* PAGE_CUR_LE_OR_EXTENDS */ +up_rec_match: up_rec = mid_rec; up_matched_fields = cur_matched_fields; up_matched_bytes = cur_matched_bytes; + } else if (mode == PAGE_CUR_G || mode == PAGE_CUR_LE +#ifdef PAGE_CUR_LE_OR_EXTENDS + || mode == PAGE_CUR_LE_OR_EXTENDS +#endif /* PAGE_CUR_LE_OR_EXTENDS */ + ) { + + goto low_rec_match; + } else { + + goto up_rec_match; } } diff --git a/storage/innobase/page/page0page.c b/storage/innobase/page/page0page.c index 1fe7f1d9356..7e09cdf073e 100644 --- a/storage/innobase/page/page0page.c +++ b/storage/innobase/page/page0page.c @@ -483,7 +483,7 @@ page_copy_rec_list_end_no_locks( page_cur_move_to_next(&cur1); } - ut_a(!!page_is_comp(new_page) == index->table->comp); + ut_a((ibool)!!page_is_comp(new_page) == index->table->comp); ut_a(page_is_comp(new_page) == page_is_comp(page)); ut_a(mach_read_from_2(new_page + UNIV_PAGE_SIZE - 10) == (ulint) (page_is_comp(new_page) @@ -1347,7 +1347,7 @@ page_print_list( ulint* offsets = offsets_; *offsets_ = (sizeof offsets_) / sizeof *offsets_; - ut_a(!!page_is_comp(page) == index->table->comp); + ut_a((ibool)!!page_is_comp(page) == index->table->comp); fprintf(stderr, "--------------------------------\n" @@ -1741,7 +1741,7 @@ page_validate( ulint* offsets = NULL; ulint* old_offsets = NULL; - if (!!comp != index->table->comp) { + if ((ibool)!!comp != index->table->comp) { fputs("InnoDB: 'compact format' flag mismatch\n", stderr); goto func_exit2; } diff --git a/storage/innobase/rem/rem0rec.c b/storage/innobase/rem/rem0rec.c index 580a7bfe509..fbc33aea669 100644 --- a/storage/innobase/rem/rem0rec.c +++ b/storage/innobase/rem/rem0rec.c @@ -601,30 +601,38 @@ rec_set_nth_field_extern_bit_new( /* read the lengths of fields 0..n */ for (i = 0; i < n_fields; i++) { - ibool is_null; - ulint len; field = dict_index_get_nth_field(index, i); type = dict_col_get_type(dict_field_get_col(field)); - is_null = !(dtype_get_prtype(type) & DATA_NOT_NULL); - if (is_null) { - /* nullable field => read the null flag */ - is_null = !!(*nulls & null_mask); + if (!(dtype_get_prtype(type) & DATA_NOT_NULL)) { + if (UNIV_UNLIKELY(!(byte) null_mask)) { + nulls--; + null_mask = 1; + } + + if (*nulls & null_mask) { + null_mask <<= 1; + /* NULL fields cannot be external. */ + ut_ad(i != ith); + continue; + } + null_mask <<= 1; - if (null_mask == 0x100) - nulls--, null_mask = 1; } - if (is_null || field->fixed_len) { - /* No length (or extern bit) is stored for - fields that are NULL or fixed-length. */ + if (field->fixed_len) { + /* fixed-length fields cannot be external + (Fixed-length fields longer than + DICT_MAX_COL_PREFIX_LEN will be treated as + variable-length ones in dict_index_add_col().) */ ut_ad(i != ith); continue; } - len = *lens--; + lens--; if (dtype_get_len(type) > 255 || dtype_get_mtype(type) == DATA_BLOB) { + ulint len = lens[1]; if (len & 0x80) { /* 1exxxxxx: 2-byte length */ if (i == ith) { - if (!val == !(len & 0x20)) { + if (!val == !(len & 0x40)) { return; /* no change */ } /* toggle the extern bit */ @@ -823,6 +831,7 @@ rec_convert_dtuple_to_rec_new( byte* lens; ulint len; ulint i; + ulint n_node_ptr_field; ulint fixed_len; ulint null_mask = 1; const ulint n_fields = dtuple_get_n_fields(dtuple); @@ -831,16 +840,26 @@ rec_convert_dtuple_to_rec_new( ut_ad(index->table->comp); ut_ad(n_fields > 0); - switch (status) { + + /* Try to ensure that the memset() between the for() loops + completes fast. The address is not exact, but UNIV_PREFETCH + should never generate a memory fault. */ + UNIV_PREFETCH_RW(rec - REC_N_NEW_EXTRA_BYTES - n_fields); + UNIV_PREFETCH_RW(rec); + + switch (UNIV_EXPECT(status, REC_STATUS_ORDINARY)) { case REC_STATUS_ORDINARY: ut_ad(n_fields <= dict_index_get_n_fields(index)); + n_node_ptr_field = ULINT_UNDEFINED; break; case REC_STATUS_NODE_PTR: ut_ad(n_fields == dict_index_get_n_unique_in_tree(index) + 1); + n_node_ptr_field = n_fields - 1; break; case REC_STATUS_INFIMUM: case REC_STATUS_SUPREMUM: ut_ad(n_fields == 1); + n_node_ptr_field = ULINT_UNDEFINED; goto init; default: ut_a(0); @@ -852,15 +871,18 @@ rec_convert_dtuple_to_rec_new( rec += (index->n_nullable + 7) / 8; for (i = 0; i < n_fields; i++) { + if (UNIV_UNLIKELY(i == n_node_ptr_field)) { +#ifdef UNIV_DEBUG + field = dtuple_get_nth_field(dtuple, i); + type = dfield_get_type(field); + ut_ad(dtype_get_prtype(type) & DATA_NOT_NULL); + ut_ad(dfield_get_len(field) == 4); +#endif /* UNIV_DEBUG */ + goto init; + } field = dtuple_get_nth_field(dtuple, i); type = dfield_get_type(field); len = dfield_get_len(field); - if (status == REC_STATUS_NODE_PTR && i == n_fields - 1) { - fixed_len = 4; - ut_ad(dtype_get_prtype(type) & DATA_NOT_NULL); - ut_ad(len == 4); - continue; - } fixed_len = dict_index_get_nth_field(index, i)->fixed_len; if (!(dtype_get_prtype(type) & DATA_NOT_NULL)) { @@ -902,27 +924,33 @@ init: type = dfield_get_type(field); len = dfield_get_len(field); - if (status == REC_STATUS_NODE_PTR && i == n_fields - 1) { - fixed_len = 4; + if (UNIV_UNLIKELY(i == n_node_ptr_field)) { ut_ad(dtype_get_prtype(type) & DATA_NOT_NULL); ut_ad(len == 4); - goto copy; + memcpy(end, dfield_get_data(field), len); + break; } fixed_len = dict_index_get_nth_field(index, i)->fixed_len; if (!(dtype_get_prtype(type) & DATA_NOT_NULL)) { /* nullable field */ ut_ad(index->n_nullable > 0); + + if (UNIV_UNLIKELY(!(byte) null_mask)) { + nulls--; + null_mask = 1; + } + ut_ad(*nulls < null_mask); + /* set the null flag if necessary */ if (len == UNIV_SQL_NULL) { *nulls |= null_mask; + null_mask <<= 1; + continue; } + null_mask <<= 1; - if (null_mask == 0x100) - nulls--, null_mask = 1; - if (len == UNIV_SQL_NULL) - continue; } /* only nullable fields can be null */ ut_ad(len != UNIV_SQL_NULL); @@ -942,7 +970,7 @@ init: *lens-- = (byte) len; } } - copy: + memcpy(end, dfield_get_data(field), len); end += len; } @@ -1105,7 +1133,6 @@ rec_copy_prefix_to_buf( dtype_t* type; ulint i; ulint prefix_len; - ibool is_null; ulint null_mask; ulint status; @@ -1146,20 +1173,22 @@ rec_copy_prefix_to_buf( for (i = 0; i < n_fields; i++) { field = dict_index_get_nth_field(index, i); type = dict_col_get_type(dict_field_get_col(field)); - is_null = !(dtype_get_prtype(type) & DATA_NOT_NULL); - if (is_null) { + if (!(dtype_get_prtype(type) & DATA_NOT_NULL)) { /* nullable field => read the null flag */ - is_null = !!(*nulls & null_mask); - null_mask <<= 1; - if (null_mask == 0x100) { - --nulls; - UNIV_PREFETCH_R(nulls); + if (UNIV_UNLIKELY(!(byte) null_mask)) { + nulls--; null_mask = 1; } + + if (*nulls & null_mask) { + null_mask <<= 1; + continue; + } + + null_mask <<= 1; } - if (is_null) { - } else if (field->fixed_len) { + if (field->fixed_len) { prefix_len += field->fixed_len; } else { ulint len = *lens--; diff --git a/storage/innobase/row/row0mysql.c b/storage/innobase/row/row0mysql.c index eb50b83a4d5..2ac0824b331 100644 --- a/storage/innobase/row/row0mysql.c +++ b/storage/innobase/row/row0mysql.c @@ -1429,51 +1429,106 @@ run_again: } /************************************************************************* -Does an unlock of a row for MySQL. */ +This can only be used when srv_locks_unsafe_for_binlog is TRUE. Before +calling this function we must use trx_reset_new_rec_lock_info() and +trx_register_new_rec_lock() to store the information which new record locks +really were set. This function removes a newly set lock under prebuilt->pcur, +and also under prebuilt->clust_pcur. Currently, this is only used and tested +in the case of an UPDATE or a DELETE statement, where the row lock is of the +LOCK_X type. +Thus, this implements a 'mini-rollback' that releases the latest record +locks we set. */ int row_unlock_for_mysql( /*=================*/ /* out: error code or DB_SUCCESS */ - row_prebuilt_t* prebuilt) /* in: prebuilt struct in MySQL + row_prebuilt_t* prebuilt, /* in: prebuilt struct in MySQL handle */ + ibool has_latches_on_recs)/* TRUE if called so that we have + the latches on the records under pcur + and clust_pcur, and we do not need to + reposition the cursors. */ { - rec_t* rec; - btr_pcur_t* cur = prebuilt->pcur; + dict_index_t* index; + btr_pcur_t* pcur = prebuilt->pcur; + btr_pcur_t* clust_pcur = prebuilt->clust_pcur; trx_t* trx = prebuilt->trx; + rec_t* rec; mtr_t mtr; ut_ad(prebuilt && trx); ut_ad(trx->mysql_thread_id == os_thread_get_curr_id()); - + + if (!srv_locks_unsafe_for_binlog) { + + fprintf(stderr, +"InnoDB: Error: calling row_unlock_for_mysql though\n" +"InnoDB: srv_locks_unsafe_for_binlog is FALSE.\n"); + + return(DB_SUCCESS); + } + trx->op_info = "unlock_row"; - - if (srv_locks_unsafe_for_binlog) { - if (trx->trx_create_lock == TRUE) { - mtr_start(&mtr); + index = btr_pcur_get_btr_cur(pcur)->index; + + if (index != NULL && trx_new_rec_locks_contain(trx, index)) { + + mtr_start(&mtr); - /* Restore a cursor position and find a record */ - btr_pcur_restore_position(BTR_SEARCH_LEAF, cur, &mtr); - rec = btr_pcur_get_rec(cur); + /* Restore the cursor position and find the record */ + + if (!has_latches_on_recs) { + btr_pcur_restore_position(BTR_SEARCH_LEAF, pcur, &mtr); + } - if (rec) { + rec = btr_pcur_get_rec(pcur); - lock_rec_reset_and_release_wait(rec); - } else { - fputs("InnoDB: Error: " - "Record for the lock not found\n", - stderr); - mem_analyze_corruption((byte*) trx); - ut_error; - } + mutex_enter(&kernel_mutex); - trx->trx_create_lock = FALSE; - mtr_commit(&mtr); + lock_rec_reset_and_release_wait(rec); + + mutex_exit(&kernel_mutex); + + mtr_commit(&mtr); + + /* If the search was done through the clustered index, then + we have not used clust_pcur at all, and we must NOT try to + reset locks on clust_pcur. The values in clust_pcur may be + garbage! */ + + if (index->type & DICT_CLUSTERED) { + + goto func_exit; } - + } + + index = btr_pcur_get_btr_cur(clust_pcur)->index; + + if (index != NULL && trx_new_rec_locks_contain(trx, index)) { + + mtr_start(&mtr); + + /* Restore the cursor position and find the record */ + + if (!has_latches_on_recs) { + btr_pcur_restore_position(BTR_SEARCH_LEAF, clust_pcur, + &mtr); + } + + rec = btr_pcur_get_rec(clust_pcur); + + mutex_enter(&kernel_mutex); + + lock_rec_reset_and_release_wait(rec); + + mutex_exit(&kernel_mutex); + + mtr_commit(&mtr); } +func_exit: trx->op_info = ""; return(DB_SUCCESS); diff --git a/storage/innobase/row/row0sel.c b/storage/innobase/row/row0sel.c index c7a548fe448..15439bed7e7 100644 --- a/storage/innobase/row/row0sel.c +++ b/storage/innobase/row/row0sel.c @@ -2784,6 +2784,10 @@ sel_restore_position_for_mysql( process the record the cursor is now positioned on (i.e. we should not go to the next record yet) */ + ibool* same_user_rec, /* out: TRUE if we were able to restore + the cursor on a user record with the + same ordering prefix in in the + B-tree index */ ulint latch_mode, /* in: latch mode wished in restoration */ btr_pcur_t* pcur, /* in: cursor whose position @@ -2800,6 +2804,8 @@ sel_restore_position_for_mysql( success = btr_pcur_restore_position(latch_mode, pcur, mtr); + *same_user_rec = success; + if (relative_position == BTR_PCUR_ON) { if (success) { return(FALSE); @@ -3064,10 +3070,12 @@ row_search_for_mysql( ulint cnt = 0; #endif /* UNIV_SEARCH_DEBUG */ ulint next_offs; + ibool same_user_rec; mtr_t mtr; mem_heap_t* heap = NULL; ulint offsets_[REC_OFFS_NORMAL_SIZE]; ulint* offsets = offsets_; + *offsets_ = (sizeof offsets_) / sizeof *offsets_; ut_ad(index && pcur && search_tuple); @@ -3138,6 +3146,16 @@ row_search_for_mysql( trx->search_latch_timeout = BTR_SEA_TIMEOUT; } + /* Reset the new record lock info if we srv_locks_unsafe_for_binlog + is set. Then we are able to remove the record locks set here on an + individual row. */ + + if (srv_locks_unsafe_for_binlog + && prebuilt->select_lock_type != LOCK_NONE) { + + trx_reset_new_rec_lock_info(trx); + } + /*-------------------------------------------------------------*/ /* PHASE 1: Try to pop the row from the prefetch cache */ @@ -3396,8 +3414,9 @@ shortcut_fails_too_big_rec: clust_index = dict_table_get_first_index(index->table); if (UNIV_LIKELY(direction != 0)) { - if (!sel_restore_position_for_mysql(BTR_SEARCH_LEAF, pcur, - moves_up, &mtr)) { + if (!sel_restore_position_for_mysql(&same_user_rec, + BTR_SEARCH_LEAF, + pcur, moves_up, &mtr)) { goto next_rec; } @@ -3659,7 +3678,7 @@ rec_loop: goto normal_return; } } - + /* We are ready to look at a possible new index entry in the result set: the cursor is now placed on a user record */ @@ -3679,6 +3698,7 @@ rec_loop: || srv_locks_unsafe_for_binlog || (unique_search && !UNIV_UNLIKELY(rec_get_deleted_flag( rec, page_rec_is_comp(rec))))) { + goto no_gap_lock; } else { lock_type = LOCK_ORDINARY; @@ -3701,7 +3721,7 @@ rec_loop: && dtuple_get_n_fields_cmp(search_tuple) == dict_index_get_n_unique(index) && 0 == cmp_dtuple_rec(search_tuple, rec, offsets)) { - no_gap_lock: +no_gap_lock: lock_type = LOCK_REC_NOT_GAP; } @@ -3764,6 +3784,7 @@ rec_loop: /* Get the clustered index record if needed */ index_rec = rec; ut_ad(index != clust_index); + goto requires_clust_rec; } } @@ -3773,6 +3794,17 @@ rec_loop: /* The record is delete-marked: we can skip it if this is not a consistent read which might see an earlier version of a non-clustered index record */ + + if (srv_locks_unsafe_for_binlog + && prebuilt->select_lock_type != LOCK_NONE) { + + /* No need to keep a lock on a delete-marked record + if we do not want to use next-key locking. */ + + row_unlock_for_mysql(prebuilt, TRUE); + + trx_reset_new_rec_lock_info(trx); + } goto next_rec; } @@ -3783,7 +3815,8 @@ rec_loop: index_rec = rec; if (index != clust_index && prebuilt->need_to_access_clustered) { - requires_clust_rec: + +requires_clust_rec: /* Before and after this "if" block, "offsets" will be related to "rec", which may be in a secondary index "index" or the clustered index ("clust_index"). However, after this @@ -3816,6 +3849,18 @@ rec_loop: /* The record is delete marked: we can skip it */ + if (srv_locks_unsafe_for_binlog + && prebuilt->select_lock_type != LOCK_NONE) { + + /* No need to keep a lock on a delete-marked + record if we do not want to use next-key + locking. */ + + row_unlock_for_mysql(prebuilt, TRUE); + + trx_reset_new_rec_lock_info(trx); + } + goto next_rec; } @@ -3908,7 +3953,7 @@ got_row: next_rec: /*-------------------------------------------------------------*/ /* PHASE 5: Move the cursor to the next index record */ - + if (UNIV_UNLIKELY(mtr_has_extra_clust_latch)) { /* We must commit mtr if we are moving to the next non-clustered index record, because we could break the @@ -3921,8 +3966,9 @@ next_rec: mtr_has_extra_clust_latch = FALSE; mtr_start(&mtr); - if (sel_restore_position_for_mysql(BTR_SEARCH_LEAF, pcur, - moves_up, &mtr)) { + if (sel_restore_position_for_mysql(&same_user_rec, + BTR_SEARCH_LEAF, + pcur, moves_up, &mtr)) { #ifdef UNIV_SEARCH_DEBUG cnt++; #endif /* UNIV_SEARCH_DEBUG */ @@ -3973,11 +4019,34 @@ lock_wait_or_error: thr->lock_state = QUE_THR_LOCK_ROW; if (row_mysql_handle_errors(&err, trx, thr, NULL)) { + /* It was a lock wait, and it ended */ + thr->lock_state = QUE_THR_LOCK_NOLOCK; mtr_start(&mtr); - sel_restore_position_for_mysql(BTR_SEARCH_LEAF, pcur, - moves_up, &mtr); + sel_restore_position_for_mysql(&same_user_rec, + BTR_SEARCH_LEAF, pcur, + moves_up, &mtr); + if (srv_locks_unsafe_for_binlog && !same_user_rec) { + /* Since we were not able to restore the cursor + on the same user record, we cannot use + row_unlock_for_mysql() to unlock any records, and + we must thus reset the new rec lock info. Since + in lock0lock.c we have blocked the inheriting of gap + X-locks, we actually do not have any new record locks + set in this case. + + Note that if we were able to restore on the 'same' + user record, it is still possible that we were actually + waiting on a delete-marked record, and meanwhile + it was removed by purge and inserted again by some + other user. But that is no problem, because in + rec_loop we will again try to set a lock, and + new_rec_lock_info in trx will be right at the end. */ + + trx_reset_new_rec_lock_info(trx); + } + mode = pcur->search_mode; goto rec_loop; diff --git a/storage/innobase/row/row0upd.c b/storage/innobase/row/row0upd.c index cf2b8db5d32..514fb6bd577 100644 --- a/storage/innobase/row/row0upd.c +++ b/storage/innobase/row/row0upd.c @@ -818,7 +818,7 @@ row_upd_build_difference_binary( extern_bit = upd_ext_vec_contains(ext_vec, n_ext_vec, i); if (UNIV_UNLIKELY(extern_bit == - !rec_offs_nth_extern(offsets, i)) + (ibool)!rec_offs_nth_extern(offsets, i)) || !dfield_data_is_binary_equal(dfield, len, data)) { upd_field = upd_get_nth_field(update, n_diff); diff --git a/storage/innobase/srv/srv0srv.c b/storage/innobase/srv/srv0srv.c index f901425a5f9..837c5be2bb6 100644 --- a/storage/innobase/srv/srv0srv.c +++ b/storage/innobase/srv/srv0srv.c @@ -260,7 +260,7 @@ semaphore contention and convoy problems can occur withput this restriction. Value 10 should be good if there are less than 4 processors + 4 disks in the computer. Bigger computers need bigger values. */ -ulong srv_thread_concurrency = 8; +ulong srv_thread_concurrency = SRV_CONCURRENCY_THRESHOLD; os_fast_mutex_t srv_conc_mutex; /* this mutex protects srv_conc data structures */ @@ -983,12 +983,6 @@ srv_conc_enter_innodb( srv_conc_slot_t* slot = NULL; ulint i; - if (srv_thread_concurrency >= 500) { - /* Disable the concurrency check */ - - return; - } - /* If trx has 'free tickets' to enter the engine left, then use one such ticket */ @@ -1134,7 +1128,7 @@ srv_conc_force_enter_innodb( trx_t* trx) /* in: transaction object associated with the thread */ { - if (srv_thread_concurrency >= 500) { + if (srv_thread_concurrency >= SRV_CONCURRENCY_THRESHOLD) { return; } @@ -1160,7 +1154,7 @@ srv_conc_force_exit_innodb( { srv_conc_slot_t* slot = NULL; - if (srv_thread_concurrency >= 500) { + if (srv_thread_concurrency >= SRV_CONCURRENCY_THRESHOLD) { return; } @@ -1212,11 +1206,6 @@ srv_conc_exit_innodb( trx_t* trx) /* in: transaction object associated with the thread */ { - if (srv_thread_concurrency >= 500) { - - return; - } - if (trx->n_tickets_to_enter_innodb > 0) { /* We will pretend the thread is still inside InnoDB though it now leaves the InnoDB engine. In this way we save diff --git a/storage/innobase/trx/trx0trx.c b/storage/innobase/trx/trx0trx.c index 9e155ee1de0..10fbf3468c0 100644 --- a/storage/innobase/trx/trx0trx.c +++ b/storage/innobase/trx/trx0trx.c @@ -166,6 +166,8 @@ trx_create( memset(&trx->xid, 0, sizeof(trx->xid)); trx->xid.formatID = -1; + trx_reset_new_rec_lock_info(trx); + return(trx); } diff --git a/storage/innobase/trx/trx0undo.c b/storage/innobase/trx/trx0undo.c index c14e4a1f3ab..7441dd3f152 100644 --- a/storage/innobase/trx/trx0undo.c +++ b/storage/innobase/trx/trx0undo.c @@ -559,14 +559,14 @@ trx_undo_write_xid( const XID* xid, /* in: X/Open XA Transaction Identification */ mtr_t* mtr) /* in: mtr */ { - mlog_write_ulint(log_hdr + TRX_UNDO_XA_FORMAT, xid->formatID, - MLOG_4BYTES, mtr); + mlog_write_ulint(log_hdr + TRX_UNDO_XA_FORMAT, + (ulint)xid->formatID, MLOG_4BYTES, mtr); - mlog_write_ulint(log_hdr + TRX_UNDO_XA_TRID_LEN, xid->gtrid_length, - MLOG_4BYTES, mtr); + mlog_write_ulint(log_hdr + TRX_UNDO_XA_TRID_LEN, + (ulint)xid->gtrid_length, MLOG_4BYTES, mtr); - mlog_write_ulint(log_hdr + TRX_UNDO_XA_BQUAL_LEN, xid->bqual_length, - MLOG_4BYTES, mtr); + mlog_write_ulint(log_hdr + TRX_UNDO_XA_BQUAL_LEN, + (ulint)xid->bqual_length, MLOG_4BYTES, mtr); mlog_write_string(log_hdr + TRX_UNDO_XA_XID, (const byte*) xid->data, XIDDATASIZE, mtr); @@ -581,18 +581,14 @@ trx_undo_read_xid( trx_ulogf_t* log_hdr,/* in: undo log header */ XID* xid) /* out: X/Open XA Transaction Identification */ { - ulint i; - - xid->formatID = mach_read_from_4(log_hdr + TRX_UNDO_XA_FORMAT); + xid->formatID = (long)mach_read_from_4(log_hdr + TRX_UNDO_XA_FORMAT); - xid->gtrid_length = mach_read_from_4(log_hdr + TRX_UNDO_XA_TRID_LEN); - - xid->bqual_length = mach_read_from_4(log_hdr + TRX_UNDO_XA_BQUAL_LEN); + xid->gtrid_length = + (long)mach_read_from_4(log_hdr + TRX_UNDO_XA_TRID_LEN); + xid->bqual_length = + (long)mach_read_from_4(log_hdr + TRX_UNDO_XA_BQUAL_LEN); - for (i = 0; i < XIDDATASIZE; i++) { - xid->data[i] = (char)mach_read_from_1(log_hdr + - TRX_UNDO_XA_XID + i); - } + memcpy(xid->data, log_hdr + TRX_UNDO_XA_XID, XIDDATASIZE); } /******************************************************************* |