summaryrefslogtreecommitdiff
path: root/storage/xtradb
diff options
context:
space:
mode:
Diffstat (limited to 'storage/xtradb')
-rw-r--r--storage/xtradb/buf/buf0buf.c2
-rw-r--r--storage/xtradb/handler/ha_innodb.cc547
-rw-r--r--storage/xtradb/handler/ha_innodb.h37
-rw-r--r--storage/xtradb/include/db0err.h3
-rw-r--r--storage/xtradb/include/log0log.h17
-rw-r--r--storage/xtradb/include/row0mysql.h19
-rw-r--r--storage/xtradb/include/srv0srv.h5
-rw-r--r--storage/xtradb/include/trx0sys.ic9
-rw-r--r--storage/xtradb/include/trx0trx.h11
-rw-r--r--storage/xtradb/log/log0log.c97
-rw-r--r--storage/xtradb/row/row0sel.c121
-rw-r--r--storage/xtradb/srv/srv0srv.c9
-rw-r--r--storage/xtradb/trx/trx0trx.c2
13 files changed, 695 insertions, 184 deletions
diff --git a/storage/xtradb/buf/buf0buf.c b/storage/xtradb/buf/buf0buf.c
index 020896b5739..5ea8056bed4 100644
--- a/storage/xtradb/buf/buf0buf.c
+++ b/storage/xtradb/buf/buf0buf.c
@@ -2661,7 +2661,7 @@ buf_page_get_gen(
ulint fix_type;
ibool must_read;
ulint retries = 0;
- mutex_t* block_mutex= 0;
+ mutex_t* block_mutex= NULL;
trx_t* trx = NULL;
ulint sec;
ulint ms;
diff --git a/storage/xtradb/handler/ha_innodb.cc b/storage/xtradb/handler/ha_innodb.cc
index dfe98f09e4f..a6526bd1092 100644
--- a/storage/xtradb/handler/ha_innodb.cc
+++ b/storage/xtradb/handler/ha_innodb.cc
@@ -32,7 +32,8 @@ Place, Suite 330, Boston, MA 02111-1307 USA
*****************************************************************************/
/* TODO list for the InnoDB handler in 5.0:
- - Remove the flag trx->active_trans and look at trx->conc_state
+ - Remove the flag trx->active_flag & TRX_ACTIVE_IN_MYSQL and look
+ at trx->conc_state
- fix savepoint functions to use savepoint storage area
- Find out what kind of problems the OS X case-insensitivity causes to
table and database names; should we 'normalize' the names like we do
@@ -117,14 +118,18 @@ bool check_global_access(THD *thd, ulong want_access);
/** to protect innobase_open_files */
static pthread_mutex_t innobase_share_mutex;
-/** to force correct commit order in binlog */
-static pthread_mutex_t prepare_commit_mutex;
static ulong commit_threads = 0;
static pthread_mutex_t commit_threads_m;
static pthread_cond_t commit_cond;
static pthread_mutex_t commit_cond_m;
static bool innodb_inited = 0;
+C_MODE_START
+static xtradb_icp_result_t index_cond_func_innodb(void *arg);
+C_MODE_END
+
+
+
#define INSIDE_HA_INNOBASE_CC
/* In the Windows plugin, the return value of current_thd is
@@ -243,6 +248,8 @@ static const char* innobase_change_buffering_values[IBUF_USE_COUNT] = {
static INNOBASE_SHARE *get_share(const char *table_name);
static void free_share(INNOBASE_SHARE *share);
static int innobase_close_connection(handlerton *hton, THD* thd);
+static void innobase_prepare_ordered(handlerton *hton, THD* thd, bool all);
+static void innobase_commit_ordered(handlerton *hton, THD* thd, bool all);
static int innobase_commit(handlerton *hton, THD* thd, bool all);
static int innobase_rollback(handlerton *hton, THD* thd, bool all);
static int innobase_rollback_to_savepoint(handlerton *hton, THD* thd,
@@ -508,6 +515,17 @@ bool innobase_show_status(handlerton *hton, THD* thd,
stat_print_fn* stat_print,
enum ha_stat_type stat_type);
+/* Enable / disable checkpoints */
+static int innobase_checkpoint_state(handlerton *hton, bool disable)
+{
+ if (disable)
+ (void) log_disable_checkpoint();
+ else
+ log_enable_checkpoint();
+ return 0;
+}
+
+
/*****************************************************************//**
Commits a transaction in an InnoDB database. */
static
@@ -868,6 +886,9 @@ convert_error_code_to_mysql(
case DB_RECORD_NOT_FOUND:
return(HA_ERR_NO_ACTIVE_RECORD);
+ case DB_SEARCH_ABORTED_BY_USER:
+ return(HA_ERR_ABORTED_BY_USER);
+
case DB_DEADLOCK:
/* Since we rolled back the whole transaction, we must
tell it also to MySQL so that MySQL knows to empty the
@@ -1386,7 +1407,6 @@ innobase_trx_init(
trx_t* trx) /*!< in/out: InnoDB transaction handle */
{
DBUG_ENTER("innobase_trx_init");
- DBUG_ASSERT(EQ_CURRENT_THD(thd));
DBUG_ASSERT(thd == trx->mysql_thd);
trx->check_foreigns = !thd_test_options(
@@ -1445,8 +1465,6 @@ check_trx_exists(
{
trx_t*& trx = thd_to_trx(thd);
- ut_ad(EQ_CURRENT_THD(thd));
-
if (trx == NULL) {
trx = innobase_trx_allocate(thd);
} else if (UNIV_UNLIKELY(trx->magic_n != TRX_MAGIC_N)) {
@@ -1738,10 +1756,10 @@ innobase_query_caching_of_table_permitted(
/* The call of row_search_.. will start a new transaction if it is
not yet started */
- if (trx->active_trans == 0) {
+ if ((trx->active_flag & TRX_ACTIVE_IN_MYSQL) == 0) {
innobase_register_trx_and_stmt(innodb_hton_ptr, thd);
- trx->active_trans = 1;
+ trx->active_flag |= TRX_ACTIVE_IN_MYSQL;
}
if (row_search_check_if_query_cache_permitted(trx, norm_name)) {
@@ -2011,11 +2029,11 @@ ha_innobase::init_table_handle_for_HANDLER(void)
/* Set the MySQL flag to mark that there is an active transaction */
- if (prebuilt->trx->active_trans == 0) {
+ if ((prebuilt->trx->active_flag & TRX_ACTIVE_IN_MYSQL) == 0) {
innobase_register_trx_and_stmt(ht, user_thd);
- prebuilt->trx->active_trans = 1;
+ prebuilt->trx->active_flag |= TRX_ACTIVE_IN_MYSQL;
}
/* We did the necessary inits in this function, no need to repeat them
@@ -2066,12 +2084,18 @@ innobase_init(
innobase_hton->savepoint_set=innobase_savepoint;
innobase_hton->savepoint_rollback=innobase_rollback_to_savepoint;
innobase_hton->savepoint_release=innobase_release_savepoint;
+ if (innobase_release_locks_early)
+ innobase_hton->prepare_ordered=innobase_prepare_ordered;
+ else
+ innobase_hton->prepare_ordered=NULL;
+ innobase_hton->commit_ordered=innobase_commit_ordered;
innobase_hton->commit=innobase_commit;
innobase_hton->rollback=innobase_rollback;
innobase_hton->prepare=innobase_xa_prepare;
innobase_hton->recover=innobase_xa_recover;
innobase_hton->commit_by_xid=innobase_commit_by_xid;
innobase_hton->rollback_by_xid=innobase_rollback_by_xid;
+ innobase_hton->checkpoint_state= innobase_checkpoint_state;
innobase_hton->create_cursor_read_view=innobase_create_cursor_view;
innobase_hton->set_cursor_read_view=innobase_set_cursor_view;
innobase_hton->close_cursor_read_view=innobase_close_cursor_view;
@@ -2595,7 +2619,6 @@ skip_overwrite:
innobase_open_tables = hash_create(200);
pthread_mutex_init(&innobase_share_mutex, MY_MUTEX_INIT_FAST);
- pthread_mutex_init(&prepare_commit_mutex, MY_MUTEX_INIT_FAST);
pthread_mutex_init(&commit_threads_m, MY_MUTEX_INIT_FAST);
pthread_mutex_init(&commit_cond_m, MY_MUTEX_INIT_FAST);
pthread_cond_init(&commit_cond, NULL);
@@ -2650,7 +2673,6 @@ innobase_end(
my_free(internal_innobase_data_file_path,
MYF(MY_ALLOW_ZERO_PTR));
pthread_mutex_destroy(&innobase_share_mutex);
- pthread_mutex_destroy(&prepare_commit_mutex);
pthread_mutex_destroy(&commit_threads_m);
pthread_mutex_destroy(&commit_cond_m);
pthread_cond_destroy(&commit_cond);
@@ -2771,15 +2793,167 @@ innobase_start_trx_and_assign_read_view(
/* Set the MySQL flag to mark that there is an active transaction */
- if (trx->active_trans == 0) {
+ if ((trx->active_flag & TRX_ACTIVE_IN_MYSQL) == 0) {
innobase_register_trx_and_stmt(hton, thd);
- trx->active_trans = 1;
+ trx->active_flag |= TRX_ACTIVE_IN_MYSQL;
}
DBUG_RETURN(0);
}
/*****************************************************************//**
+Release row locks early during prepare phase.
+
+Only enabled if --innodb-release-locks-early=1. In this case, a prepared
+transaction is treated as committed to memory (but not to disk), and we
+release row locks at the end of the prepare phase.
+
+The consistent commit ordering guarantees of prepare_ordered() calls means
+that transactions will not be binlogged in different order than locks are
+released (which would cause trouble for statement-based replication).
+
+This optimisation is not 100% safe, so is not enabled by default. But some
+applications may decide to enable it to reduce contention on hotspot rows.
+
+The consequences of enabling this are:
+
+ - It is not possible to rollback after successful prepare(). If there is
+ a need to rollback (ie. failure to binlog the transaction), we crash the
+ server (!)
+
+ - If we crash during commit, it is possible that an application/user can have
+ seen another transaction committed that is not recovered by XA crash
+ recovery. Thus durability is partially lost. However, consistency is still
+ guaranteed, we never recover a transaction and not recover another
+ transaction that committed before. */
+static
+void
+innobase_prepare_ordered(
+/*============*/
+ handlerton *hton, /*!< in: Innodb handlerton */
+ THD* thd, /*!< in: MySQL thread handle of the user for whom
+ the transaction should be committed */
+ bool all) /*!< in: TRUE - commit transaction
+ FALSE - the current SQL statement ended */
+{
+ trx_t* trx;
+ DBUG_ENTER("innobase_prepare_ordered");
+ DBUG_ASSERT(hton == innodb_hton_ptr);
+
+ trx = check_trx_exists(thd);
+
+ mutex_enter(&kernel_mutex);
+ lock_release_off_kernel(trx);
+ mutex_exit(&kernel_mutex);
+
+ DBUG_VOID_RETURN;
+}
+
+static
+void
+innobase_commit_ordered_2(
+/*============*/
+ trx_t* trx, /*!< in: Innodb transaction */
+ THD* thd) /*!< in: MySQL thread handle */
+{
+ ulonglong tmp_pos;
+ DBUG_ENTER("innobase_commit_ordered");
+
+ /* We need current binlog position for ibbackup to work.
+ Note, the position is current because commit_ordered is guaranteed
+ to be called in same sequenece as writing to binlog. */
+
+retry:
+ if (innobase_commit_concurrency > 0) {
+ pthread_mutex_lock(&commit_cond_m);
+ commit_threads++;
+
+ if (commit_threads > innobase_commit_concurrency) {
+ commit_threads--;
+ pthread_cond_wait(&commit_cond,
+ &commit_cond_m);
+ pthread_mutex_unlock(&commit_cond_m);
+ goto retry;
+ }
+ else {
+ pthread_mutex_unlock(&commit_cond_m);
+ }
+ }
+
+ mysql_bin_log_commit_pos(thd, &tmp_pos, &(trx->mysql_log_file_name));
+ trx->mysql_log_offset = (ib_int64_t) tmp_pos;
+
+ /* Don't do write + flush right now. For group commit
+ to work we want to do the flush in the innobase_commit()
+ method, which runs without holding any locks. */
+ trx->flush_log_later = TRUE;
+ innobase_commit_low(trx);
+ trx->flush_log_later = FALSE;
+
+ if (innobase_commit_concurrency > 0) {
+ pthread_mutex_lock(&commit_cond_m);
+ commit_threads--;
+ pthread_cond_signal(&commit_cond);
+ pthread_mutex_unlock(&commit_cond_m);
+ }
+
+ DBUG_VOID_RETURN;
+}
+
+/*****************************************************************//**
+Perform the first, fast part of InnoDB commit.
+
+Doing it in this call ensures that we get the same commit order here
+as in binlog and any other participating transactional storage engines.
+
+Note that we want to do as little as really needed here, as we run
+under a global mutex. The expensive fsync() is done later, in
+innobase_commit(), without a lock so group commit can take place.
+
+Note also that this method can be called from a different thread than
+the one handling the rest of the transaction. */
+static
+void
+innobase_commit_ordered(
+/*============*/
+ handlerton *hton, /*!< in: Innodb handlerton */
+ THD* thd, /*!< in: MySQL thread handle of the user for whom
+ the transaction should be committed */
+ bool all) /*!< in: TRUE - commit transaction
+ FALSE - the current SQL statement ended */
+{
+ trx_t* trx;
+ DBUG_ENTER("innobase_commit_ordered");
+ DBUG_ASSERT(hton == innodb_hton_ptr);
+
+ trx = check_trx_exists(thd);
+
+ /* Since we will reserve the kernel mutex, we must not be holding the
+ search system latch, or we will disobey the latching order. But we
+ already released it in innobase_xa_prepare() (if not before), so just
+ have an assert here.*/
+ ut_ad(!trx->has_search_latch);
+
+ if ((trx->active_flag & TRX_ACTIVE_IN_MYSQL) == 0
+ && trx->conc_state != TRX_NOT_STARTED) {
+ /* We cannot throw error here; instead we will catch this error
+ again in innobase_commit() and report it from there. */
+ DBUG_VOID_RETURN;
+ }
+
+ /* commit_ordered is only called when committing the whole transaction
+ (or an SQL statement when autocommit is on). */
+ DBUG_ASSERT(all ||
+ (!thd_test_options(thd, OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN)));
+
+ innobase_commit_ordered_2(trx, thd);
+
+ trx->active_flag |= TRX_ACTIVE_COMMIT_ORDERED;
+
+ DBUG_VOID_RETURN;
+}
+
+/*****************************************************************//**
Commits a transaction in an InnoDB database or marks an SQL statement
ended.
@return 0 */
@@ -2804,11 +2978,12 @@ innobase_commit(
/* Since we will reserve the kernel mutex, we have to release
the search system latch first to obey the latching order. */
- if (trx->has_search_latch) {
+ if (trx->has_search_latch &&
+ (trx->active_flag & TRX_ACTIVE_COMMIT_ORDERED) == 0) {
trx_search_latch_release_if_reserved(trx);
}
- /* The flag trx->active_trans is set to 1 in
+ /* The flag TRX_ACTIVE_IN_MYSQL in trx->active_flag is set in
1. ::external_lock(),
2. ::start_stmt(),
@@ -2818,81 +2993,33 @@ innobase_commit(
6. innobase_start_trx_and_assign_read_view(),
7. ::transactional_table_lock()
- and it is only set to 0 in a commit or a rollback. If it is 0 we know
+ and it is only cleared in a commit or a rollback. If it is unset we know
there cannot be resources to be freed and we could return immediately.
For the time being, we play safe and do the cleanup though there should
be nothing to clean up. */
- if (trx->active_trans == 0
+ if ((trx->active_flag & TRX_ACTIVE_IN_MYSQL) == 0
&& trx->conc_state != TRX_NOT_STARTED) {
- sql_print_error("trx->active_trans == 0, but"
- " trx->conc_state != TRX_NOT_STARTED");
+ sql_print_error("trx->active_flag & TRX_ACTIVE_IN_MYSQL== 0,"
+ " but trx->conc_state != TRX_NOT_STARTED");
}
+
if (all
|| (!thd_test_options(thd, OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN))) {
- /* We were instructed to commit the whole transaction, or
- this is an SQL statement end and autocommit is on */
-
- /* We need current binlog position for ibbackup to work.
- Note, the position is current because of
- prepare_commit_mutex */
-retry:
- if (innobase_commit_concurrency > 0) {
- pthread_mutex_lock(&commit_cond_m);
- commit_threads++;
-
- if (commit_threads > innobase_commit_concurrency) {
- commit_threads--;
- pthread_cond_wait(&commit_cond,
- &commit_cond_m);
- pthread_mutex_unlock(&commit_cond_m);
- goto retry;
- }
- else {
- pthread_mutex_unlock(&commit_cond_m);
- }
- }
-
- /* The following calls to read the MySQL binary log
- file name and the position return consistent results:
- 1) Other InnoDB transactions cannot intervene between
- these calls as we are holding prepare_commit_mutex.
- 2) Binary logging of other engines is not relevant
- to InnoDB as all InnoDB requires is that committing
- InnoDB transactions appear in the same order in the
- MySQL binary log as they appear in InnoDB logs.
- 3) A MySQL log file rotation cannot happen because
- MySQL protects against this by having a counter of
- transactions in prepared state and it only allows
- a rotation when the counter drops to zero. See
- LOCK_prep_xids and COND_prep_xids in log.cc. */
- trx->mysql_log_file_name = mysql_bin_log_file_name();
- trx->mysql_log_offset = (ib_int64_t) mysql_bin_log_file_pos();
-
- /* Don't do write + flush right now. For group commit
- to work we want to do the flush after releasing the
- prepare_commit_mutex. */
- trx->flush_log_later = TRUE;
- innobase_commit_low(trx);
- trx->flush_log_later = FALSE;
-
- if (innobase_commit_concurrency > 0) {
- pthread_mutex_lock(&commit_cond_m);
- commit_threads--;
- pthread_cond_signal(&commit_cond);
- pthread_mutex_unlock(&commit_cond_m);
+ /* Run the fast part of commit if we did not already. */
+ if ((trx->active_flag & TRX_ACTIVE_COMMIT_ORDERED) == 0) {
+ innobase_commit_ordered_2(trx, thd);
}
- if (trx->active_trans == 2) {
-
- pthread_mutex_unlock(&prepare_commit_mutex);
- }
+ /* We were instructed to commit the whole transaction, or
+ this is an SQL statement end and autocommit is on */
- /* Now do a write + flush of logs. */
+ /* We did the first part already in innobase_commit_ordered(),
+ Now finish by doing a write + flush of logs. */
trx_commit_complete_for_mysql(trx);
- trx->active_trans = 0;
+ trx->active_flag = 0;
} else {
/* We just mark the SQL statement ended and do not do a
@@ -2961,11 +3088,21 @@ innobase_rollback(
row_unlock_table_autoinc_for_mysql(trx);
+ /* if transaction has already released locks, it is too late to
+ rollback */
+ if (innobase_release_locks_early && trx->conc_state == TRX_PREPARED
+ && UT_LIST_GET_LEN(trx->trx_locks) == 0) {
+ sql_print_error("Rollback after releasing locks! "
+ "errno=%d, dberr="ULINTPF,
+ errno, trx->error_state);
+ ut_error;
+ }
+
if (all
|| !thd_test_options(thd, OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN)) {
error = trx_rollback_for_mysql(trx);
- trx->active_trans = 0;
+ trx->active_flag = 0;
} else {
error = trx_rollback_last_sql_stat_for_mysql(trx);
}
@@ -3109,7 +3246,7 @@ innobase_savepoint(
innobase_release_stat_resources(trx);
/* cannot happen outside of transaction */
- DBUG_ASSERT(trx->active_trans);
+ DBUG_ASSERT(trx->active_flag & TRX_ACTIVE_IN_MYSQL);
/* TODO: use provided savepoint data area to store savepoint data */
char name[64];
@@ -3139,11 +3276,11 @@ innobase_close_connection(
ut_a(trx);
- if (trx->active_trans == 0
+ if ((trx->active_flag & TRX_ACTIVE_IN_MYSQL) == 0
&& trx->conc_state != TRX_NOT_STARTED) {
- sql_print_error("trx->active_trans == 0, but"
- " trx->conc_state != TRX_NOT_STARTED");
+ sql_print_error("trx->active_flag & TRX_ACTIVE_IN_MYSQL == 0,"
+ " but trx->conc_state != TRX_NOT_STARTED");
}
@@ -3277,7 +3414,7 @@ ha_innobase::index_flags(
const
{
return(HA_READ_NEXT | HA_READ_PREV | HA_READ_ORDER
- | HA_READ_RANGE | HA_KEYREAD_ONLY);
+ | HA_READ_RANGE | HA_KEYREAD_ONLY | HA_DO_INDEX_COND_PUSHDOWN);
}
/****************************************************************//**
@@ -4668,8 +4805,9 @@ build_template(
THD* thd, /*!< in: current user thread, used
only if templ_type is
ROW_MYSQL_REC_FIELDS */
- TABLE* table, /*!< in: MySQL table */
- uint templ_type) /*!< in: ROW_MYSQL_WHOLE_ROW or
+ TABLE* table, /* in: MySQL table */
+ ha_innobase* file, /* in: ha_innobase handler */
+ uint templ_type) /* in: ROW_MYSQL_WHOLE_ROW or
ROW_MYSQL_REC_FIELDS */
{
dict_index_t* index;
@@ -4683,7 +4821,9 @@ build_template(
ulint sql_idx, innodb_idx=0;
/* byte offset of the end of last requested column */
ulint mysql_prefix_len = 0;
-
+ ibool do_idx_cond_push= FALSE;
+ ibool need_second_pass= FALSE;
+
if (prebuilt->select_lock_type == LOCK_X) {
/* We always retrieve the whole clustered index record if we
use exclusive row level locks, for example, if the read is
@@ -4753,6 +4893,16 @@ build_template(
prebuilt->templ_contains_blob = FALSE;
+
+ /*
+ Setup index condition pushdown (note: we don't need to check if
+ this is a scan on primary key as that is checked in idx_cond_push)
+ */
+ if (file->active_index == file->pushed_idx_cond_keyno &&
+ file->active_index != MAX_KEY &&
+ templ_type == ROW_MYSQL_REC_FIELDS)
+ do_idx_cond_push= need_second_pass= TRUE;
+
/* Note that in InnoDB, i is the column number. MySQL calls columns
'fields'. */
for (sql_idx = 0; sql_idx < n_fields; sql_idx++) {
@@ -4766,6 +4916,8 @@ build_template(
and which we can skip. */
register const ibool index_contains_field =
dict_index_contains_col_or_prefix(index, innodb_idx);
+ register const ibool index_covers_field =
+ field->part_of_key.is_set(file->active_index);
if (!index_contains_field && prebuilt->read_just_key) {
/* If this is a 'key read', we do not need
@@ -4798,8 +4950,12 @@ build_template(
/* This field is not needed in the query, skip it */
goto skip_field;
- }
include_field:
+ if (do_idx_cond_push &&
+ ((need_second_pass && !index_covers_field) ||
+ (!need_second_pass && index_covers_field)))
+ goto skip_field;
+ }
n_requested_fields++;
templ->col_no = innodb_idx;
@@ -4854,6 +5010,13 @@ include_field:
prebuilt->templ_contains_blob = TRUE;
}
skip_field:
+ if (need_second_pass && (sql_idx+1 == n_fields))
+ {
+ prebuilt->n_index_fields= n_requested_fields;
+ need_second_pass= FALSE;
+ sql_idx= (~(ulint)0); /* to start from 0 */
+ innodb_idx= (~(ulint)0); /* to start from 0 */ ///psergey-merge-merge-last-change
+ }
if (field->stored_in_db) {
innodb_idx++;
}
@@ -4862,12 +5025,23 @@ skip_field:
prebuilt->n_template = n_requested_fields;
prebuilt->mysql_prefix_len = mysql_prefix_len;
+ if (do_idx_cond_push)
+ {
+ prebuilt->idx_cond_func= index_cond_func_innodb;
+ prebuilt->idx_cond_func_arg= file;
+ }
+ else
+ {
+ prebuilt->idx_cond_func= NULL;
+ prebuilt->n_index_fields= n_requested_fields;
+ }
+
if (index != clust_index && prebuilt->need_to_access_clustered) {
/* Change rec_field_no's to correspond to the clustered index
record */
- for (ulint i = 0; i < n_requested_fields; i++) {
+ for (ulint i = do_idx_cond_push? prebuilt->n_index_fields : 0;
+ i < n_requested_fields; i++) {
templ = prebuilt->mysql_template + i;
-
templ->rec_field_no = templ->clust_rec_field_no;
}
}
@@ -5068,7 +5242,7 @@ no_commit:
/* Altering to InnoDB format */
innobase_commit(ht, user_thd, 1);
/* Note that this transaction is still active. */
- prebuilt->trx->active_trans = 1;
+ prebuilt->trx->active_flag |= TRX_ACTIVE_IN_MYSQL;
/* We will need an IX lock on the destination table. */
prebuilt->sql_stat_start = TRUE;
} else {
@@ -5084,7 +5258,7 @@ no_commit:
locks, so they have to be acquired again. */
innobase_commit(ht, user_thd, 1);
/* Note that this transaction is still active. */
- prebuilt->trx->active_trans = 1;
+ prebuilt->trx->active_flag |= TRX_ACTIVE_IN_MYSQL;
/* Re-acquire the table lock on the source table. */
row_lock_table_for_mysql(prebuilt, src_table, mode);
/* We will need an IX lock on the destination table. */
@@ -5130,7 +5304,7 @@ no_commit:
/* Build the template used in converting quickly between
the two database formats */
- build_template(prebuilt, NULL, table, ROW_MYSQL_WHOLE_ROW);
+ build_template(prebuilt, NULL, table, this, ROW_MYSQL_WHOLE_ROW);
}
innodb_srv_conc_enter_innodb(prebuilt->trx);
@@ -5669,6 +5843,8 @@ ha_innobase::index_end(void)
int error = 0;
DBUG_ENTER("index_end");
active_index=MAX_KEY;
+ in_range_check_pushed_down= FALSE;
+ ds_mrr.dsmrr_close();
DBUG_RETURN(error);
}
@@ -5829,7 +6005,8 @@ ha_innobase::index_read(
necessarily prebuilt->index, but can also be the clustered index */
if (prebuilt->sql_stat_start) {
- build_template(prebuilt, user_thd, table, ROW_MYSQL_REC_FIELDS);
+ build_template(prebuilt, user_thd, table, this,
+ ROW_MYSQL_REC_FIELDS);
}
if (key_ptr) {
@@ -6044,7 +6221,7 @@ ha_innobase::change_active_index(
the flag ROW_MYSQL_WHOLE_ROW below, but that caused unnecessary
copying. Starting from MySQL-4.1 we use a more efficient flag here. */
- build_template(prebuilt, user_thd, table, ROW_MYSQL_REC_FIELDS);
+ build_template(prebuilt, user_thd, table, this, ROW_MYSQL_REC_FIELDS);
DBUG_RETURN(0);
}
@@ -8240,6 +8417,7 @@ ha_innobase::info_low(
}
stats.check_time = 0;
+ stats.mrr_length_per_rec= ref_length + 8; // 8 = max(sizeof(void *));
if (stats.records == 0) {
stats.mean_rec_length = 0;
@@ -8438,7 +8616,7 @@ ha_innobase::check(
/* Build the template; we will use a dummy template
in index scans done in checking */
- build_template(prebuilt, NULL, table, ROW_MYSQL_WHOLE_ROW);
+ build_template(prebuilt, NULL, table, this, ROW_MYSQL_WHOLE_ROW);
}
if (prebuilt->table->ibd_file_missing) {
@@ -8931,6 +9109,11 @@ ha_innobase::extra(
break;
case HA_EXTRA_RESET_STATE:
reset_template(prebuilt);
+ /* Reset index condition pushdown state */
+ pushed_idx_cond= FALSE;
+ pushed_idx_cond_keyno= MAX_KEY;
+ prebuilt->idx_cond_func= NULL;
+ in_range_check_pushed_down= FALSE;
break;
case HA_EXTRA_NO_KEYREAD:
prebuilt->read_just_key = 0;
@@ -8978,6 +9161,13 @@ ha_innobase::reset()
reset_template(prebuilt);
+ /* Reset index condition pushdown state */
+ pushed_idx_cond_keyno= MAX_KEY;
+ pushed_idx_cond= NULL;
+ in_range_check_pushed_down= FALSE;
+ ds_mrr.dsmrr_close();
+ prebuilt->idx_cond_func= NULL;
+
/* TODO: This should really be reset in reset_template() but for now
it's safer to do it explicitly here. */
@@ -9061,10 +9251,10 @@ ha_innobase::start_stmt(
trx->detailed_error[0] = '\0';
/* Set the MySQL flag to mark that there is an active transaction */
- if (trx->active_trans == 0) {
+ if ((trx->active_flag & TRX_ACTIVE_IN_MYSQL) == 0) {
innobase_register_trx_and_stmt(ht, thd);
- trx->active_trans = 1;
+ trx->active_flag |= TRX_ACTIVE_IN_MYSQL;
} else {
innobase_register_stmt(ht, thd);
}
@@ -9162,10 +9352,10 @@ ha_innobase::external_lock(
/* Set the MySQL flag to mark that there is an active
transaction */
- if (trx->active_trans == 0) {
+ if ((trx->active_flag & TRX_ACTIVE_IN_MYSQL) == 0) {
innobase_register_trx_and_stmt(ht, thd);
- trx->active_trans = 1;
+ trx->active_flag |= TRX_ACTIVE_IN_MYSQL;
} else if (trx->n_mysql_tables_in_use == 0) {
innobase_register_stmt(ht, thd);
}
@@ -9263,7 +9453,7 @@ ha_innobase::external_lock(
prebuilt->used_in_HANDLER = FALSE;
if (!thd_test_options(thd, OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN)) {
- if (trx->active_trans != 0) {
+ if ((trx->active_flag & TRX_ACTIVE_IN_MYSQL) != 0) {
innobase_commit(ht, thd, TRUE);
}
} else {
@@ -9348,10 +9538,10 @@ ha_innobase::transactional_table_lock(
/* MySQL is setting a new transactional table lock */
/* Set the MySQL flag to mark that there is an active transaction */
- if (trx->active_trans == 0) {
+ if ((trx->active_flag & TRX_ACTIVE_IN_MYSQL) == 0) {
innobase_register_trx_and_stmt(ht, thd);
- trx->active_trans = 1;
+ trx->active_flag |= TRX_ACTIVE_IN_MYSQL;
}
if (THDVAR(thd, table_locks) && thd_in_lock_tables(thd)) {
@@ -10405,10 +10595,11 @@ innobase_xa_prepare(
innobase_release_stat_resources(trx);
- if (trx->active_trans == 0 && trx->conc_state != TRX_NOT_STARTED) {
+ if ((trx->active_flag & TRX_ACTIVE_IN_MYSQL) == 0 &&
+ trx->conc_state != TRX_NOT_STARTED) {
- sql_print_error("trx->active_trans == 0, but trx->conc_state != "
- "TRX_NOT_STARTED");
+ sql_print_error("trx->active_flag & TRX_ACTIVE_IN_MYSQL == 0, but"
+ " trx->conc_state != TRX_NOT_STARTED");
}
if (all
@@ -10417,7 +10608,7 @@ innobase_xa_prepare(
/* We were instructed to prepare the whole transaction, or
this is an SQL statement end and autocommit is on */
- ut_ad(trx->active_trans);
+ ut_ad(trx->active_flag & TRX_ACTIVE_IN_MYSQL);
error = (int) trx_prepare_for_mysql(trx);
} else {
@@ -10441,36 +10632,6 @@ innobase_xa_prepare(
srv_active_wake_master_thread();
- if (thd_sql_command(thd) != SQLCOM_XA_PREPARE &&
- (all || !thd_test_options(thd, OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN)))
- {
- if (srv_enable_unsafe_group_commit && !THDVAR(thd, support_xa)) {
- /* choose group commit rather than binlog order */
- return(error);
- }
-
- /* For ibbackup to work the order of transactions in binlog
- and InnoDB must be the same. Consider the situation
-
- thread1> prepare; write to binlog; ...
- <context switch>
- thread2> prepare; write to binlog; commit
- thread1> ... commit
-
- To ensure this will not happen we're taking the mutex on
- prepare, and releasing it on commit.
-
- Note: only do it for normal commits, done via ha_commit_trans.
- If 2pc protocol is executed by external transaction
- coordinator, it will be just a regular MySQL client
- executing XA PREPARE and XA COMMIT commands.
- In this case we cannot know how many minutes or hours
- will be between XA PREPARE and XA COMMIT, and we don't want
- to block for undefined period of time. */
- pthread_mutex_lock(&prepare_commit_mutex);
- trx->active_trans = 2;
- }
-
return(error);
}
@@ -11760,9 +11921,10 @@ static MYSQL_SYSVAR_ENUM(adaptive_checkpoint, srv_adaptive_checkpoint,
"Enable/Disable flushing along modified age. (none, reflex, [estimate], keep_average)",
NULL, innodb_adaptive_checkpoint_update, 2, &adaptive_checkpoint_typelib);
-static MYSQL_SYSVAR_ULONG(enable_unsafe_group_commit, srv_enable_unsafe_group_commit,
+static MYSQL_SYSVAR_ULONG(enable_unsafe_group_commit, srv_deprecated_enable_unsafe_group_commit,
PLUGIN_VAR_RQCMDARG,
- "Enable/Disable unsafe group commit when support_xa=OFF and use with binlog or other XA storage engine.",
+ "Enable/Disable unsafe group commit when support_xa=OFF and use with binlog or other XA storage engine. "
+ "(Deprecated, and does nothing, group commit is always enabled in a safe way)",
NULL, NULL, 0, 0, 1, 0);
static MYSQL_SYSVAR_ULONG(expand_import, srv_expand_import,
@@ -11794,6 +11956,11 @@ static MYSQL_SYSVAR_ULINT(pass_corrupt_table, srv_pass_corrupt_table,
"except for the deletion.",
NULL, NULL, 0, 0, 1, 0);
+static MYSQL_SYSVAR_BOOL(release_locks_early, innobase_release_locks_early,
+ PLUGIN_VAR_READONLY,
+ "Release row locks in the prepare stage instead of in the commit stage",
+ NULL, NULL, FALSE);
+
static MYSQL_SYSVAR_ULINT(lazy_drop_table, srv_lazy_drop_table,
PLUGIN_VAR_RQCMDARG,
"At deleting tablespace, only miminum needed processes at the time are done. "
@@ -11891,6 +12058,7 @@ static struct st_mysql_sys_var* innobase_system_variables[]= {
MYSQL_SYSVAR(auto_lru_dump),
MYSQL_SYSVAR(use_purge_thread),
MYSQL_SYSVAR(pass_corrupt_table),
+ MYSQL_SYSVAR(release_locks_early),
MYSQL_SYSVAR(lazy_drop_table),
NULL
};
@@ -12105,3 +12273,106 @@ test_innobase_convert_name()
}
#endif /* UNIV_COMPILE_TEST_FUNCS */
+
+
+/****************************************************************************
+ * DS-MRR implementation
+ ***************************************************************************/
+
+/**
+ * Multi Range Read interface, DS-MRR calls
+ */
+
+int ha_innobase::multi_range_read_init(RANGE_SEQ_IF *seq, void *seq_init_param,
+ uint n_ranges, uint mode,
+ HANDLER_BUFFER *buf)
+{
+ return ds_mrr.dsmrr_init(this, seq, seq_init_param, n_ranges, mode, buf);
+}
+
+int ha_innobase::multi_range_read_next(range_id_t *range_info)
+{
+ return ds_mrr.dsmrr_next(range_info);
+}
+
+ha_rows ha_innobase::multi_range_read_info_const(uint keyno, RANGE_SEQ_IF *seq,
+ void *seq_init_param,
+ uint n_ranges, uint *bufsz,
+ uint *flags,
+ COST_VECT *cost)
+{
+ /* See comments in ha_myisam::multi_range_read_info_const */
+ ds_mrr.init(this, table);
+
+ if (prebuilt->select_lock_type != LOCK_NONE)
+ *flags |= HA_MRR_USE_DEFAULT_IMPL;
+
+ ha_rows res= ds_mrr.dsmrr_info_const(keyno, seq, seq_init_param, n_ranges,
+ bufsz, flags, cost);
+ return res;
+}
+
+ha_rows ha_innobase::multi_range_read_info(uint keyno, uint n_ranges, uint keys,
+ uint key_parts, uint *bufsz,
+ uint *flags, COST_VECT *cost)
+{
+ ds_mrr.init(this, table);
+ ha_rows res= ds_mrr.dsmrr_info(keyno, n_ranges, keys, key_parts, bufsz,
+ flags, cost);
+ return res;
+}
+
+int ha_innobase::multi_range_read_explain_info(uint mrr_mode, char *str, size_t size)
+{
+ return ds_mrr.dsmrr_explain_info(mrr_mode, str, size);
+}
+
+/*
+ A helper function used only in index_cond_func_innodb
+*/
+
+bool ha_innobase::is_thd_killed()
+{
+ return thd_killed(user_thd);
+}
+
+/**
+ * Index Condition Pushdown interface implementation
+ */
+
+C_MODE_START
+
+/*
+ Index condition check function to be called from within Innobase.
+ See note on ICP_RESULT for return values description.
+*/
+
+static xtradb_icp_result_t index_cond_func_innodb(void *arg)
+{
+ ha_innobase *h= (ha_innobase*)arg;
+ if (h->is_thd_killed())
+ return XTRADB_ICP_ABORTED_BY_USER;
+
+ if (h->end_range)
+ {
+ if (h->compare_key2(h->end_range) > 0)
+ return XTRADB_ICP_OUT_OF_RANGE; /* caller should return HA_ERR_END_OF_FILE already */
+ }
+ return h->pushed_idx_cond->val_int()? XTRADB_ICP_MATCH : XTRADB_ICP_NO_MATCH;
+}
+
+C_MODE_END
+
+
+Item *ha_innobase::idx_cond_push(uint keyno_arg, Item* idx_cond_arg)
+{
+ if (keyno_arg != primary_key && prebuilt->select_lock_type != LOCK_X)
+ {
+ pushed_idx_cond_keyno= keyno_arg;
+ pushed_idx_cond= idx_cond_arg;
+ in_range_check_pushed_down= TRUE;
+ return NULL; /* Table handler will check the entire condition */
+ }
+ return idx_cond_arg; /* Table handler will not make any checks */
+}
+
diff --git a/storage/xtradb/handler/ha_innodb.h b/storage/xtradb/handler/ha_innodb.h
index c60a5eae19e..599b48287e3 100644
--- a/storage/xtradb/handler/ha_innodb.h
+++ b/storage/xtradb/handler/ha_innodb.h
@@ -222,6 +222,28 @@ class ha_innobase: public handler
/** @} */
bool check_if_incompatible_data(HA_CREATE_INFO *info,
uint table_changes);
+ bool check_if_supported_virtual_columns(void) { return TRUE; }
+public:
+ /**
+ * Multi Range Read interface
+ */
+ int multi_range_read_init(RANGE_SEQ_IF *seq, void *seq_init_param,
+ uint n_ranges, uint mode, HANDLER_BUFFER *buf);
+ int multi_range_read_next(range_id_t *range_info);
+ ha_rows multi_range_read_info_const(uint keyno, RANGE_SEQ_IF *seq,
+ void *seq_init_param,
+ uint n_ranges, uint *bufsz,
+ uint *flags, COST_VECT *cost);
+ ha_rows multi_range_read_info(uint keyno, uint n_ranges, uint keys,
+ uint key_parts, uint *bufsz,
+ uint *flags, COST_VECT *cost);
+ int multi_range_read_explain_info(uint mrr_mode, char *str, size_t size);
+ DsMrr_impl ds_mrr;
+
+ Item *idx_cond_push(uint keyno, Item* idx_cond);
+
+ /* An helper function for index_cond_func_innodb: */
+ bool is_thd_killed();
};
/* Some accessor functions which the InnoDB plugin needs, but which
@@ -240,16 +262,6 @@ LEX_STRING *thd_query_string(MYSQL_THD thd);
char **thd_query(MYSQL_THD thd);
#endif
-/** Get the file name of the MySQL binlog.
- * @return the name of the binlog file
- */
-const char* mysql_bin_log_file_name(void);
-
-/** Get the current position of the MySQL binlog.
- * @return byte offset from the beginning of the binlog
- */
-ulonglong mysql_bin_log_file_pos(void);
-
/**
Check if a user thread is a replication slave thread
@param thd user thread
@@ -290,6 +302,11 @@ bool thd_binlog_filter_ok(const MYSQL_THD thd);
#endif /* MYSQL_VERSION_ID > 50140 */
}
+/** Get the file name and position of the MySQL binlog corresponding to the
+ * current commit.
+ */
+extern void mysql_bin_log_commit_pos(THD *thd, ulonglong *out_pos, const char **out_file);
+
typedef struct trx_struct trx_t;
/********************************************************************//**
@file handler/ha_innodb.h
diff --git a/storage/xtradb/include/db0err.h b/storage/xtradb/include/db0err.h
index c7fa6d2a444..220878629fb 100644
--- a/storage/xtradb/include/db0err.h
+++ b/storage/xtradb/include/db0err.h
@@ -105,7 +105,8 @@ enum db_err {
DB_STRONG_FAIL,
DB_ZIP_OVERFLOW,
DB_RECORD_NOT_FOUND = 1500,
- DB_END_OF_INDEX
+ DB_END_OF_INDEX,
+ DB_SEARCH_ABORTED_BY_USER= 1533
};
#endif
diff --git a/storage/xtradb/include/log0log.h b/storage/xtradb/include/log0log.h
index 2b4b34f2600..1bca9029648 100644
--- a/storage/xtradb/include/log0log.h
+++ b/storage/xtradb/include/log0log.h
@@ -249,12 +249,15 @@ log_checkpoint(
/*===========*/
ibool sync, /*!< in: TRUE if synchronous operation is
desired */
- ibool write_always); /*!< in: the function normally checks if the
+ ibool write_always, /*!< in: the function normally checks if the
the new checkpoint would have a greater
lsn than the previous one: if not, then no
physical write is done; by setting this
parameter TRUE, a physical write will always be
made to log files */
+ ibool safe_to_ignore);/*!< in: TRUE if checkpoint can be ignored in
+ the case checkpoint's are disabled */
+
/****************************************************************//**
Makes a checkpoint at a given lsn or later. */
UNIV_INTERN
@@ -272,6 +275,18 @@ log_make_checkpoint_at(
physical write will always be made to
log files */
/****************************************************************//**
+Disable checkpoints. This is used when doing a volume snapshot
+to ensure that we don't get checkpoint between snapshoting two
+different volumes */
+UNIV_INTERN
+ibool log_disable_checkpoint();
+
+/****************************************************************//**
+Enable checkpoints that was disabled with log_disable_checkpoint() */
+UNIV_INTERN
+void log_enable_checkpoint();
+
+/****************************************************************//**
Makes a checkpoint at the latest lsn and writes it to first page of each
data file in the database, so that we know that the file spaces contain
all modifications up to that lsn. This can only be called at database
diff --git a/storage/xtradb/include/row0mysql.h b/storage/xtradb/include/row0mysql.h
index 4acfd2e793b..cd9dec2f089 100644
--- a/storage/xtradb/include/row0mysql.h
+++ b/storage/xtradb/include/row0mysql.h
@@ -585,7 +585,18 @@ struct mysql_row_templ_struct {
#define ROW_PREBUILT_ALLOCATED 78540783
#define ROW_PREBUILT_FREED 26423527
+
+typedef enum xtradb_icp_result {
+ XTRADB_ICP_ERROR=-1,
+ XTRADB_ICP_NO_MATCH=0,
+ XTRADB_ICP_MATCH=1,
+ XTRADB_ICP_OUT_OF_RANGE=2,
+ XTRADB_ICP_ABORTED_BY_USER=3,
+} xtradb_icp_result_t;
+
+typedef xtradb_icp_result_t (*index_cond_func_t)(void *param);
/** A struct for (sometimes lazily) prebuilt structures in an Innobase table
+
handle used within MySQL; these are used to save CPU time. */
struct row_prebuilt_struct {
@@ -783,6 +794,14 @@ struct row_prebuilt_struct {
/*----------------------*/
ulint magic_n2; /*!< this should be the same as
magic_n */
+ /*----------------------*/
+ index_cond_func_t idx_cond_func;/* Index Condition Pushdown function,
+ or NULL if there is none set */
+ void* idx_cond_func_arg;/* ICP function argument */
+ ulint n_index_fields; /* Number of fields at the start of
+ mysql_template. Valid only when using
+ ICP. */
+ /*----------------------*/
};
#define ROW_PREBUILT_FETCH_MAGIC_N 465765687
diff --git a/storage/xtradb/include/srv0srv.h b/storage/xtradb/include/srv0srv.h
index d4329d16a62..8bcf215c7ac 100644
--- a/storage/xtradb/include/srv0srv.h
+++ b/storage/xtradb/include/srv0srv.h
@@ -234,7 +234,7 @@ extern ulong srv_ibuf_active_contract;
extern ulong srv_ibuf_accel_rate;
extern ulint srv_checkpoint_age_target;
extern ulong srv_flush_neighbor_pages;
-extern ulong srv_enable_unsafe_group_commit;
+extern ulong srv_deprecated_enable_unsafe_group_commit;
extern ulong srv_read_ahead;
extern ulong srv_adaptive_checkpoint;
@@ -351,6 +351,9 @@ extern ulint srv_buf_pool_reads;
/** Time in seconds between automatic buffer pool dumps */
extern uint srv_auto_lru_dump;
+/** Release row locks already in the prepare phase */
+extern my_bool innobase_release_locks_early;
+
/** Status variables to be passed to MySQL */
typedef struct export_var_struct export_struc;
diff --git a/storage/xtradb/include/trx0sys.ic b/storage/xtradb/include/trx0sys.ic
index 5e0f07c8b9d..f39ba3055be 100644
--- a/storage/xtradb/include/trx0sys.ic
+++ b/storage/xtradb/include/trx0sys.ic
@@ -338,6 +338,12 @@ trx_list_get_min_trx_id(void)
/****************************************************************//**
Checks if a transaction with the given id is active.
+IMPORTANT ASSUMPTION:
+ It is assumed that this function is only used for the purpose of
+ determining if locks need to be created for the input transaction.
+ So if innobase_release_locks_early global option is set, and if
+ the transaction is already in prepared state, this returns FALSE,
+ as locks are no longer needed for the transaction.
@return TRUE if active */
UNIV_INLINE
ibool
@@ -366,7 +372,8 @@ trx_is_active(
trx = trx_get_on_id(trx_id);
if (trx && (trx->conc_state == TRX_ACTIVE
- || trx->conc_state == TRX_PREPARED)) {
+ || (trx->conc_state == TRX_PREPARED &&
+ !innobase_release_locks_early))) {
return(TRUE);
}
diff --git a/storage/xtradb/include/trx0trx.h b/storage/xtradb/include/trx0trx.h
index 173c63918d3..8858fe2fafa 100644
--- a/storage/xtradb/include/trx0trx.h
+++ b/storage/xtradb/include/trx0trx.h
@@ -511,9 +511,10 @@ struct trx_struct{
in that case we must flush the log
in trx_commit_complete_for_mysql() */
ulint duplicates; /*!< TRX_DUP_IGNORE | TRX_DUP_REPLACE */
- ulint active_trans; /*!< 1 - if a transaction in MySQL
- is active. 2 - if prepare_commit_mutex
- was taken */
+ ulint active_flag; /*!< TRX_ACTIVE_IN_MYSQL - set if a
+ transaction in MySQL is active.
+ TRX_ACTIVE_COMMIT_ORDERED - set if
+ innobase_commit_ordered has run */
ulint has_search_latch;
/* TRUE if this trx has latched the
search system latch in S-mode */
@@ -824,6 +825,10 @@ Multiple flags can be combined with bitwise OR. */
#define TRX_SIG_OTHER_SESS 1 /* sent by another session (which
must hold rights to this) */
+/* Flag bits for trx_struct.active_flag */
+#define TRX_ACTIVE_IN_MYSQL (1<<0)
+#define TRX_ACTIVE_COMMIT_ORDERED (1<<1)
+
/** Commit node states */
enum commit_node_state {
COMMIT_NODE_SEND = 1, /*!< about to send a commit signal to
diff --git a/storage/xtradb/log/log0log.c b/storage/xtradb/log/log0log.c
index c39f60bd4b9..82c6c3da23e 100644
--- a/storage/xtradb/log/log0log.c
+++ b/storage/xtradb/log/log0log.c
@@ -97,6 +97,8 @@ archive */
UNIV_INTERN byte log_archive_io;
#endif /* UNIV_LOG_ARCHIVE */
+UNIV_INTERN ulint log_disable_checkpoint_active= 0;
+
/* A margin for free space in the log buffer before a log entry is catenated */
#define LOG_BUF_WRITE_MARGIN (4 * OS_FILE_LOG_BLOCK_SIZE)
@@ -174,7 +176,7 @@ log_fsp_current_free_limit_set_and_checkpoint(
success = FALSE;
while (!success) {
- success = log_checkpoint(TRUE, TRUE);
+ success = log_checkpoint(TRUE, TRUE, FALSE);
}
}
@@ -1988,12 +1990,14 @@ log_checkpoint(
/*===========*/
ibool sync, /*!< in: TRUE if synchronous operation is
desired */
- ibool write_always) /*!< in: the function normally checks if the
+ ibool write_always, /*!< in: the function normally checks if the
the new checkpoint would have a greater
lsn than the previous one: if not, then no
physical write is done; by setting this
parameter TRUE, a physical write will always be
made to log files */
+ ibool safe_to_ignore) /*!< in: TRUE if checkpoint can be ignored in
+ the case checkpoint's are disabled */
{
ib_uint64_t oldest_lsn;
@@ -2024,14 +2028,27 @@ log_checkpoint(
mutex_enter(&(log_sys->mutex));
+ /* Return if this is not a forced checkpoint and either there is no
+ need for a checkpoint or if checkpoints are disabled */
if (!write_always
- && log_sys->last_checkpoint_lsn >= oldest_lsn) {
+ && (log_sys->last_checkpoint_lsn >= oldest_lsn ||
+ (safe_to_ignore && log_disable_checkpoint_active)))
+ {
mutex_exit(&(log_sys->mutex));
return(TRUE);
}
+ if (log_disable_checkpoint_active)
+ {
+ /* Wait until we are allowed to do a checkpoint */
+ mutex_exit(&(log_sys->mutex));
+ rw_lock_s_lock(&(log_sys->checkpoint_lock));
+ rw_lock_s_unlock(&(log_sys->checkpoint_lock));
+ mutex_enter(&(log_sys->mutex));
+ }
+
ut_ad(log_sys->flushed_to_disk_lsn >= oldest_lsn);
if (log_sys->n_pending_checkpoint_writes > 0) {
@@ -2092,7 +2109,73 @@ log_make_checkpoint_at(
while (!log_preflush_pool_modified_pages(lsn, TRUE));
- while (!log_checkpoint(TRUE, write_always));
+ while (!log_checkpoint(TRUE, write_always, FALSE));
+}
+
+/****************************************************************//**
+Disable checkpoints. This is used when doing a volumne snapshot
+to ensure that we don't get checkpoint between snapshoting two
+different volumes */
+
+UNIV_INTERN
+ibool log_disable_checkpoint()
+{
+ mutex_enter(&(log_sys->mutex));
+
+ /*
+ Wait if a checkpoint write is running.
+ This is the same code that is used in log_checkpoint() to ensure
+ that two checkpoints are not happening at the same time.
+ */
+ while (log_sys->n_pending_checkpoint_writes > 0)
+ {
+ mutex_exit(&(log_sys->mutex));
+ rw_lock_s_lock(&(log_sys->checkpoint_lock));
+ rw_lock_s_unlock(&(log_sys->checkpoint_lock));
+ mutex_enter(&(log_sys->mutex));
+ }
+ /*
+ The following should never be true; It's is here just in case of
+ wrong usage of this function. (Better safe than sorry).
+ */
+
+ if (log_disable_checkpoint_active)
+ {
+ mutex_exit(&(log_sys->mutex));
+ return 1; /* Already disabled */
+ }
+ /*
+ Take the checkpoint lock to ensure we will not get any checkpoints
+ running
+ */
+ rw_lock_x_lock_gen(&(log_sys->checkpoint_lock), LOG_CHECKPOINT);
+ log_disable_checkpoint_active= 1;
+ mutex_exit(&(log_sys->mutex));
+ return 0;
+}
+
+
+/****************************************************************//**
+Enable checkpoints that was disabled with log_disable_checkpoint()
+This lock is called by MariaDB and only when we have done call earlier
+to log_disable_checkpoint().
+
+Note: We can't take a log->mutex lock here running log_checkpoint()
+which is waiting (log_sys->checkpoint_lock may already have it.
+This is however safe to do without a mutex as log_disable_checkpoint
+is protected by log_sys->checkpoint_lock.
+*/
+
+UNIV_INTERN
+void log_enable_checkpoint()
+{
+ ut_ad(log_disable_checkpoint_active);
+ /* Test variable, mostly to protect against wrong usage */
+ if (log_disable_checkpoint_active)
+ {
+ log_disable_checkpoint_active= 0;
+ rw_lock_x_unlock_gen(&(log_sys->checkpoint_lock), LOG_CHECKPOINT);
+ }
}
/****************************************************************//**
@@ -2189,7 +2272,7 @@ loop:
}
if (do_checkpoint) {
- log_checkpoint(checkpoint_sync, FALSE);
+ log_checkpoint(checkpoint_sync, FALSE, FALSE);
if (checkpoint_sync) {
@@ -3099,6 +3182,10 @@ logs_empty_and_mark_files_at_shutdown(void)
ut_print_timestamp(stderr);
fprintf(stderr, " InnoDB: Starting shutdown...\n");
}
+
+ /* Enable checkpoints if someone had turned them off */
+ log_enable_checkpoint();
+
/* Wait until the master thread and all other operations are idle: our
algorithm only works if the server is idle at shutdown */
diff --git a/storage/xtradb/row/row0sel.c b/storage/xtradb/row/row0sel.c
index 1ced125b7bd..13dccdffb96 100644
--- a/storage/xtradb/row/row0sel.c
+++ b/storage/xtradb/row/row0sel.c
@@ -2692,8 +2692,10 @@ row_sel_store_mysql_rec(
ibool rec_clust, /*!< in: TRUE if rec is in the
clustered index instead of
prebuilt->index */
- const ulint* offsets) /*!< in: array returned by
- rec_get_offsets(rec) */
+ const ulint* offsets, /* in: array returned by
+ rec_get_offsets() */
+ ulint start_field_no, /* in: start from this field */
+ ulint end_field_no) /* in: end at this field */
{
mem_heap_t* extern_field_heap = NULL;
mem_heap_t* heap;
@@ -2709,7 +2711,7 @@ row_sel_store_mysql_rec(
prebuilt->blob_heap = NULL;
}
- for (i = 0; i < prebuilt->n_template; i++) {
+ for (i = start_field_no; i < end_field_no /* prebuilt->n_template */ ; i++) {
const mysql_row_templ_t*templ = prebuilt->mysql_template + i;
const byte* data;
@@ -3154,10 +3156,14 @@ row_sel_pop_cached_row_for_mysql(
/* Copy NULL bit of the current field from cached_rec
to buf */
if (templ->mysql_null_bit_mask) {
- buf[templ->mysql_null_byte_offset]
+ /*buf[templ->mysql_null_byte_offset]
^= (buf[templ->mysql_null_byte_offset]
^ cached_rec[templ->mysql_null_byte_offset])
- & (byte)templ->mysql_null_bit_mask;
+ & (byte)templ->mysql_null_bit_mask;*/
+ byte *null_byte= buf + templ->mysql_null_byte_offset;
+ (*null_byte)&= ~templ->mysql_null_bit_mask;
+ (*null_byte)|= cached_rec[templ->mysql_null_byte_offset] &
+ templ->mysql_null_bit_mask;
}
}
}
@@ -3194,7 +3200,10 @@ row_sel_push_cache_row_for_mysql(
ibool rec_clust, /*!< in: TRUE if rec is in the
clustered index instead of
prebuilt->index */
- const ulint* offsets) /*!< in: rec_get_offsets(rec) */
+ const ulint* offsets, /* in: rec_get_offsets() */
+ ulint start_field_no, /* in: start from this field */
+ byte* remainder_buf) /* in: if start_field_no !=0,
+ where to take prev fields */
{
byte* buf;
ulint i;
@@ -3228,12 +3237,44 @@ row_sel_push_cache_row_for_mysql(
prebuilt->mysql_row_len);
if (UNIV_UNLIKELY(!row_sel_store_mysql_rec(
- prebuilt->fetch_cache[
+ prebuilt->fetch_cache[
prebuilt->n_fetch_cached],
- prebuilt, rec, rec_clust, offsets))) {
+ prebuilt,
+ rec,
+ rec_clust,
+ offsets,
+ start_field_no,
+ prebuilt->n_template))) {
return(FALSE);
}
+ if (start_field_no) {
+
+ for (i=0; i < start_field_no; i++) {
+ register ulint offs;
+ mysql_row_templ_t* templ;
+ register byte * null_byte;
+
+ templ = prebuilt->mysql_template + i;
+
+ if (templ->mysql_null_bit_mask) {
+ offs = templ->mysql_null_byte_offset;
+
+ null_byte= prebuilt->fetch_cache[
+ prebuilt->n_fetch_cached]+offs;
+ (*null_byte)&= ~templ->mysql_null_bit_mask;
+ (*null_byte)|= (*(remainder_buf + offs) &
+ templ->mysql_null_bit_mask);
+ }
+
+ offs = templ->mysql_col_offset;
+ memcpy(prebuilt->fetch_cache[prebuilt->n_fetch_cached]
+ + offs,
+ remainder_buf + offs,
+ templ->mysql_col_len);
+ }
+ }
+
prebuilt->n_fetch_cached++;
return(TRUE);
}
@@ -3319,7 +3360,8 @@ and fetch prev. NOTE that if we do a search with a full key value
from a unique index (ROW_SEL_EXACT), then we will not store the cursor
position and fetch next or fetch prev must not be tried to the cursor!
@return DB_SUCCESS, DB_RECORD_NOT_FOUND, DB_END_OF_INDEX, DB_DEADLOCK,
-DB_LOCK_TABLE_FULL, DB_CORRUPTION, or DB_TOO_BIG_RECORD */
+DB_LOCK_TABLE_FULL, DB_CORRUPTION, DB_SEARCH_ABORTED_BY_USER or
+DB_TOO_BIG_RECORD */
UNIV_INTERN
ulint
row_search_for_mysql(
@@ -3373,8 +3415,10 @@ row_search_for_mysql(
mem_heap_t* heap = NULL;
ulint offsets_[REC_OFFS_NORMAL_SIZE];
ulint* offsets = offsets_;
+ ibool some_fields_in_buffer;
ibool table_lock_waited = FALSE;
ibool problematic_use = FALSE;
+ ibool get_clust_rec = 0;
rec_offs_init(offsets_);
@@ -3638,8 +3682,9 @@ row_search_for_mysql(
ut_ad(!rec_get_deleted_flag(rec, comp));
if (!row_sel_store_mysql_rec(buf, prebuilt,
- rec, FALSE,
- offsets)) {
+ rec, FALSE,
+ offsets, 0,
+ prebuilt->n_template)) {
/* Only fresh inserts may contain
incomplete externally stored
columns. Pretend that such
@@ -4301,7 +4346,8 @@ no_gap_lock:
if (!lock_sec_rec_cons_read_sees(
rec, trx->read_view)) {
- goto requires_clust_rec;
+ get_clust_rec = TRUE;
+ goto idx_cond_check;
}
}
}
@@ -4346,12 +4392,39 @@ no_gap_lock:
goto next_rec;
}
+
+idx_cond_check:
+ if (prebuilt->idx_cond_func) {
+ int res;
+ ibool ib_res;
+ ut_ad(prebuilt->template_type != ROW_MYSQL_DUMMY_TEMPLATE);
+ offsets = rec_get_offsets(rec, index, offsets, ULINT_UNDEFINED, &heap);
+ ib_res= row_sel_store_mysql_rec(buf, prebuilt, rec, FALSE,
+ offsets, 0, prebuilt->n_index_fields);
+ /*
+ The above call will fail and return FALSE when requested to
+ store an "externally stored column" (afaiu, a blob). Index
+ Condition Pushdown is not supported for indexes with blob
+ columns, so we should never get this error.
+ */
+ ut_ad(ib_res);
+ res= prebuilt->idx_cond_func(prebuilt->idx_cond_func_arg);
+ if (res == XTRADB_ICP_NO_MATCH)
+ goto next_rec;
+ else if (res != XTRADB_ICP_MATCH) {
+ err= (res == XTRADB_ICP_ABORTED_BY_USER ?
+ DB_SEARCH_ABORTED_BY_USER :
+ DB_RECORD_NOT_FOUND);
+ goto idx_cond_failed;
+ }
+ /* res == XTRADB_ICP_MATCH */
+ }
+
/* Get the clustered index record if needed, if we did not do the
search using the clustered index. */
+ if (get_clust_rec || (index != clust_index
+ && prebuilt->need_to_access_clustered)) {
- if (index != clust_index && prebuilt->need_to_access_clustered) {
-
-requires_clust_rec:
/* We use a 'goto' to the preceding label if a consistent
read of a secondary index record requires us to look up old
versions of the associated clustered index record. */
@@ -4447,10 +4520,15 @@ requires_clust_rec:
are BLOBs in the fields to be fetched. In HANDLER we do
not cache rows because there the cursor is a scrollable
cursor. */
+ some_fields_in_buffer = (index != clust_index
+ && prebuilt->idx_cond_func);
if (!row_sel_push_cache_row_for_mysql(prebuilt, result_rec,
result_rec != rec,
- offsets)) {
+ offsets,
+ some_fields_in_buffer?
+ prebuilt->n_index_fields : 0,
+ buf)) {
/* Only fresh inserts may contain incomplete
externally stored columns. Pretend that such
records do not exist. Such records may only be
@@ -4491,7 +4569,10 @@ requires_clust_rec:
if (!row_sel_store_mysql_rec(buf, prebuilt, result_rec,
result_rec != rec,
- offsets)) {
+ offsets,
+ prebuilt->idx_cond_func?
+ prebuilt->n_index_fields: 0,
+ prebuilt->n_template)) {
/* Only fresh inserts may contain
incomplete externally stored
columns. Pretend that such records do
@@ -4527,6 +4608,9 @@ got_row:
HANDLER command where the user can move the cursor with PREV or NEXT
even after a unique search. */
+ err = DB_SUCCESS;
+
+idx_cond_failed:
if (!unique_search_from_clust_index
|| prebuilt->select_lock_type != LOCK_NONE
|| prebuilt->used_in_HANDLER) {
@@ -4536,12 +4620,11 @@ got_row:
btr_pcur_store_position(pcur, &mtr);
}
- err = DB_SUCCESS;
-
goto normal_return;
next_rec:
/* Reset the old and new "did semi-consistent read" flags. */
+ get_clust_rec = FALSE;
if (UNIV_UNLIKELY(prebuilt->row_read_type
== ROW_READ_DID_SEMI_CONSISTENT)) {
prebuilt->row_read_type = ROW_READ_TRY_SEMI_CONSISTENT;
diff --git a/storage/xtradb/srv/srv0srv.c b/storage/xtradb/srv/srv0srv.c
index f39d1b8a758..fb13ec395c2 100644
--- a/storage/xtradb/srv/srv0srv.c
+++ b/storage/xtradb/srv/srv0srv.c
@@ -414,7 +414,7 @@ UNIV_INTERN ulong srv_ibuf_accel_rate = 100;
UNIV_INTERN ulint srv_checkpoint_age_target = 0;
UNIV_INTERN ulong srv_flush_neighbor_pages = 1; /* 0:disable 1:enable */
-UNIV_INTERN ulong srv_enable_unsafe_group_commit = 0; /* 0:disable 1:enable */
+UNIV_INTERN ulong srv_deprecated_enable_unsafe_group_commit = 0;
UNIV_INTERN ulong srv_read_ahead = 3; /* 1: random 2: linear 3: Both */
UNIV_INTERN ulong srv_adaptive_checkpoint = 0; /* 0: none 1: reflex 2: estimate */
@@ -496,6 +496,9 @@ UNIV_INTERN FILE* srv_misc_tmpfile;
UNIV_INTERN ulint srv_main_thread_process_no = 0;
UNIV_INTERN ulint srv_main_thread_id = 0;
+/* Release row locks already in the prepare phase */
+UNIV_INTERN my_bool innobase_release_locks_early = FALSE;
+
/* The following count work done by srv_master_thread. */
/* Iterations by the 'once per second' loop. */
@@ -3239,7 +3242,7 @@ retry_flush_batch:
/* Make a new checkpoint about once in 10 seconds */
- log_checkpoint(TRUE, FALSE);
+ log_checkpoint(TRUE, FALSE, TRUE);
srv_main_thread_op_info = "reserving kernel mutex";
@@ -3360,7 +3363,7 @@ flush_loop:
srv_main_thread_op_info = "making checkpoint";
- log_checkpoint(TRUE, FALSE);
+ log_checkpoint(TRUE, FALSE, TRUE);
if (buf_get_modified_ratio_pct() > srv_max_buf_pool_modified_pct) {
diff --git a/storage/xtradb/trx/trx0trx.c b/storage/xtradb/trx/trx0trx.c
index 98bd9e4ac58..7ea3e09036f 100644
--- a/storage/xtradb/trx/trx0trx.c
+++ b/storage/xtradb/trx/trx0trx.c
@@ -121,7 +121,7 @@ trx_create(
trx->table_id = ut_dulint_zero;
trx->mysql_thd = NULL;
- trx->active_trans = 0;
+ trx->active_flag = 0;
trx->duplicates = 0;
trx->n_mysql_tables_in_use = 0;