summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSergei Golubchik <sergii@pisem.net>2011-07-17 11:28:48 +0200
committerSergei Golubchik <sergii@pisem.net>2011-07-17 11:28:48 +0200
commitd4d7a8fa62c406be73f6c0f6d75e795293db548b (patch)
tree43220eabdf2bfaf56a17a303008ee07c60a79f13
parent06eeb51101c2abe784cd77f8151970b32f1c678e (diff)
downloadmariadb-git-d4d7a8fa62c406be73f6c0f6d75e795293db548b.tar.gz
applied percona patches to InnoDB as of 5.5.15
-rw-r--r--btr/btr0cur.c23
-rw-r--r--buf/buf0buf.c10
-rw-r--r--buf/buf0flu.c2
-rw-r--r--data/data0data.c8
-rw-r--r--dict/dict0crea.c15
-rw-r--r--dict/dict0dict.c60
-rw-r--r--dict/dict0load.c86
-rw-r--r--dict/dict0mem.c1
-rw-r--r--fil/fil0fil.c20
-rw-r--r--handler/ha_innodb.cc26
-rw-r--r--handler/ha_innodb.h4
-rw-r--r--handler/handler0alter.cc314
-rw-r--r--include/buf0buf.h3
-rw-r--r--include/db0err.h2
-rw-r--r--include/dict0dict.h13
-rw-r--r--include/dict0dict.ic26
-rw-r--r--include/dict0load.h7
-rw-r--r--include/dict0mem.h42
-rw-r--r--include/fil0fil.h3
-rw-r--r--include/lock0lock.h7
-rw-r--r--include/lock0lock.ic6
-rw-r--r--include/mtr0mtr.ic2
-rw-r--r--include/os0file.h13
-rw-r--r--include/os0file.ic3
-rw-r--r--include/page0page.h19
-rw-r--r--include/page0page.ic23
-rw-r--r--include/rem0rec.h14
-rw-r--r--include/rem0rec.ic41
-rw-r--r--include/rem0types.h20
-rw-r--r--include/row0ext.h13
-rw-r--r--include/row0ext.ic9
-rw-r--r--include/row0row.h30
-rw-r--r--include/row0row.ic36
-rw-r--r--include/univ.i4
-rw-r--r--lock/lock0lock.c151
-rw-r--r--log/log0log.c8
-rw-r--r--log/log0recv.c4
-rw-r--r--mtr/mtr0mtr.c13
-rw-r--r--os/os0file.c44
-rw-r--r--page/page0page.c22
-rw-r--r--page/page0zip.c2
-rw-r--r--percona-suite/percona_mysqldump_innodb_optimize_keys.result109
-rw-r--r--percona-suite/percona_mysqldump_innodb_optimize_keys.test62
-rw-r--r--percona-suite/percona_query_cache_with_comments.inc.backup88
-rw-r--r--percona-suite/percona_query_response_time-replication.result66
-rw-r--r--percona-suite/percona_query_response_time-replication.test61
-rw-r--r--percona-suite/percona_query_response_time-stored.result306
-rw-r--r--percona-suite/percona_query_response_time-stored.test85
-rw-r--r--percona-suite/percona_query_response_time.result377
-rw-r--r--percona-suite/percona_query_response_time.test71
-rw-r--r--percona-suite/percona_query_response_time_flush.inc1
-rw-r--r--percona-suite/percona_query_response_time_show.inc8
-rw-r--r--percona-suite/percona_query_response_time_sleep.inc19
-rw-r--r--rem/rem0rec.c2
-rw-r--r--row/row0ext.c19
-rw-r--r--row/row0mysql.c13
-rw-r--r--row/row0row.c48
-rw-r--r--row/row0sel.c11
-rw-r--r--row/row0upd.c4
-rw-r--r--row/row0vers.c18
-rw-r--r--srv/srv0srv.c4
-rw-r--r--trx/trx0rec.c63
-rw-r--r--trx/trx0undo.c3
-rw-r--r--ut/ut0ut.c2
64 files changed, 2015 insertions, 574 deletions
diff --git a/btr/btr0cur.c b/btr/btr0cur.c
index 1d8c7904dc1..f6ef44e5b5c 100644
--- a/btr/btr0cur.c
+++ b/btr/btr0cur.c
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1994, 2010, Innobase Oy. All Rights Reserved.
+Copyright (c) 1994, 2011, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2008, Google Inc.
Portions of this file contain modifications contributed and copyrighted by
@@ -2067,6 +2067,9 @@ btr_cur_optimistic_update(
heap = mem_heap_create(1024);
offsets = rec_get_offsets(rec, index, NULL, ULINT_UNDEFINED, &heap);
+#ifdef UNIV_BLOB_NULL_DEBUG
+ ut_a(!rec_offs_any_null_extern(rec, offsets));
+#endif /* UNIV_BLOB_NULL_DEBUG */
#ifdef UNIV_DEBUG
if (btr_cur_print_record_ops && thr) {
@@ -3347,9 +3350,14 @@ btr_estimate_n_rows_in_range_on_level(
mtr_start(&mtr);
- /* fetch the page */
- block = buf_page_get(space, zip_size, page_no, RW_S_LATCH,
- &mtr);
+ /* Fetch the page. Because we are not holding the
+ index->lock, the tree may have changed and we may be
+ attempting to read a page that is no longer part of
+ the B-tree. We pass BUF_GET_POSSIBLY_FREED in order to
+ silence a debug assertion about this. */
+ block = buf_page_get_gen(space, zip_size, page_no, RW_S_LATCH,
+ NULL, BUF_GET_POSSIBLY_FREED,
+ __FILE__, __LINE__, &mtr);
page = buf_block_get_frame(block);
@@ -3368,6 +3376,13 @@ btr_estimate_n_rows_in_range_on_level(
goto inexact;
}
+ /* It is possible but highly unlikely that the page was
+ originally written by an old version of InnoDB that did
+ not initialize FIL_PAGE_TYPE on other than B-tree pages.
+ For example, this could be an almost-empty BLOB page
+ that happens to contain the magic values in the fields
+ that we checked above. */
+
n_pages_read++;
if (page_no != slot1->page_no) {
diff --git a/buf/buf0buf.c b/buf/buf0buf.c
index 76b8f04e978..d588d2b90f9 100644
--- a/buf/buf0buf.c
+++ b/buf/buf0buf.c
@@ -2984,6 +2984,7 @@ buf_page_get_gen(
case BUF_GET_IF_IN_POOL:
case BUF_PEEK_IF_IN_POOL:
case BUF_GET_IF_IN_POOL_OR_WATCH:
+ case BUF_GET_POSSIBLY_FREED:
break;
default:
ut_error;
@@ -3359,7 +3360,10 @@ wait_until_unfixed:
#endif /* UNIV_DEBUG || UNIV_IBUF_DEBUG */
buf_block_buf_fix_inc(block, file, line);
-
+#if defined UNIV_DEBUG_FILE_ACCESSES || defined UNIV_DEBUG
+ ut_a(mode == BUF_GET_POSSIBLY_FREED
+ || !block->page.file_page_was_freed);
+#endif
//mutex_exit(&block->mutex);
/* Check if this is the first access to the page */
@@ -3373,10 +3377,6 @@ wait_until_unfixed:
buf_page_set_accessed_make_young(&block->page, access_time);
}
-#if defined UNIV_DEBUG_FILE_ACCESSES || defined UNIV_DEBUG
- ut_a(!block->page.file_page_was_freed);
-#endif
-
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
ut_a(++buf_dbg_counter % 5771 || buf_validate());
ut_a(block->page.buf_fix_count > 0);
diff --git a/buf/buf0flu.c b/buf/buf0flu.c
index e642b532a8f..09d11dd21db 100644
--- a/buf/buf0flu.c
+++ b/buf/buf0flu.c
@@ -867,7 +867,7 @@ corrupted_page:
flush:
/* Now flush the doublewrite buffer data to disk */
- fil_flush(srv_doublewrite_file ? TRX_DOUBLEWRITE_SPACE : TRX_SYS_SPACE);
+ fil_flush(srv_doublewrite_file ? TRX_DOUBLEWRITE_SPACE : TRX_SYS_SPACE, FALSE);
/* We know that the writes have been flushed to disk now
and in recovery we will find them in the doublewrite buffer
diff --git a/data/data0data.c b/data/data0data.c
index 0ef0cfa554a..6d07fc249fa 100644
--- a/data/data0data.c
+++ b/data/data0data.c
@@ -585,7 +585,8 @@ dtuple_convert_big_rec(
if (dict_table_get_format(index->table) < DICT_TF_FORMAT_ZIP) {
/* up to MySQL 5.1: store a 768-byte prefix locally */
- local_len = BTR_EXTERN_FIELD_REF_SIZE + DICT_MAX_INDEX_COL_LEN;
+ local_len = BTR_EXTERN_FIELD_REF_SIZE
+ + DICT_ANTELOPE_MAX_INDEX_COL_LEN;
} else {
/* new-format table: do not store any BLOB prefix locally */
local_len = BTR_EXTERN_FIELD_REF_SIZE;
@@ -757,7 +758,10 @@ dtuple_convert_back_big_rec(
local_len -= BTR_EXTERN_FIELD_REF_SIZE;
- ut_ad(local_len <= DICT_MAX_INDEX_COL_LEN);
+ /* Only in REDUNDANT and COMPACT format, we store
+ up to DICT_ANTELOPE_MAX_INDEX_COL_LEN (768) bytes
+ locally */
+ ut_ad(local_len <= DICT_ANTELOPE_MAX_INDEX_COL_LEN);
dfield_set_data(dfield,
(char*) b->data - local_len,
diff --git a/dict/dict0crea.c b/dict/dict0crea.c
index 01025bac423..49c16b1daab 100644
--- a/dict/dict0crea.c
+++ b/dict/dict0crea.c
@@ -730,9 +730,9 @@ dict_create_index_tree_step(
/* printf("Created a new index tree in space %lu root page %lu\n",
index->space, index->page_no); */
- page_rec_write_index_page_no(btr_pcur_get_rec(&pcur),
- DICT_SYS_INDEXES_PAGE_NO_FIELD,
- node->page_no, &mtr);
+ page_rec_write_field(btr_pcur_get_rec(&pcur),
+ DICT_SYS_INDEXES_PAGE_NO_FIELD,
+ node->page_no, &mtr);
btr_pcur_close(&pcur);
mtr_commit(&mtr);
@@ -802,9 +802,8 @@ dict_drop_index_tree(
root_page_no); */
btr_free_root(space, zip_size, root_page_no, mtr);
- page_rec_write_index_page_no(rec,
- DICT_SYS_INDEXES_PAGE_NO_FIELD,
- FIL_NULL, mtr);
+ page_rec_write_field(rec, DICT_SYS_INDEXES_PAGE_NO_FIELD,
+ FIL_NULL, mtr);
}
/*******************************************************************//**
@@ -907,8 +906,8 @@ create:
in SYS_INDEXES, so that the database will not get into an
inconsistent state in case it crashes between the mtr_commit()
below and the following mtr_commit() call. */
- page_rec_write_index_page_no(rec, DICT_SYS_INDEXES_PAGE_NO_FIELD,
- FIL_NULL, mtr);
+ page_rec_write_field(rec, DICT_SYS_INDEXES_PAGE_NO_FIELD,
+ FIL_NULL, mtr);
/* We will need to commit the mini-transaction in order to avoid
deadlocks in the btr_create() call, because otherwise we would
diff --git a/dict/dict0dict.c b/dict/dict0dict.c
index 43e60bc3263..1ad540f47ab 100644
--- a/dict/dict0dict.c
+++ b/dict/dict0dict.c
@@ -1415,36 +1415,63 @@ dict_index_too_big_for_undo(
ulint fixed_size
= dict_col_get_fixed_size(col,
dict_table_is_comp(table));
+ ulint max_prefix
+ = col->max_prefix;
if (fixed_size) {
/* Fixed-size columns are stored locally. */
max_size = fixed_size;
} else if (max_size <= BTR_EXTERN_FIELD_REF_SIZE * 2) {
/* Short columns are stored locally. */
- } else if (!col->ord_part) {
+ } else if (!col->ord_part
+ || (col->max_prefix
+ < (ulint) DICT_MAX_FIELD_LEN_BY_FORMAT(table))) {
/* See if col->ord_part would be set
- because of new_index. */
+ because of new_index. Also check if the new
+ index could have longer prefix on columns
+ that already had ord_part set */
ulint j;
for (j = 0; j < new_index->n_uniq; j++) {
if (dict_index_get_nth_col(
new_index, j) == col) {
+ const dict_field_t* field
+ = dict_index_get_nth_field(
+ new_index, j);
+
+ if (field->prefix_len
+ > col->max_prefix) {
+ max_prefix =
+ field->prefix_len;
+ }
goto is_ord_part;
}
}
+ if (col->ord_part) {
+ goto is_ord_part;
+ }
+
/* This is not an ordering column in any index.
Thus, it can be stored completely externally. */
max_size = BTR_EXTERN_FIELD_REF_SIZE;
} else {
+ ulint max_field_len;
is_ord_part:
+ max_field_len = DICT_MAX_FIELD_LEN_BY_FORMAT(table);
+
/* This is an ordering column in some index.
A long enough prefix must be written to the
undo log. See trx_undo_page_fetch_ext(). */
+ max_size = ut_min(max_size, max_field_len);
+
+ /* We only store the needed prefix length in undo log */
+ if (max_prefix) {
+ ut_ad(dict_table_get_format(table)
+ >= DICT_TF_FORMAT_ZIP);
- if (max_size > REC_MAX_INDEX_COL_LEN) {
- max_size = REC_MAX_INDEX_COL_LEN;
+ max_size = ut_min(max_prefix, max_size);
}
max_size += BTR_EXTERN_FIELD_REF_SIZE;
@@ -1698,15 +1725,16 @@ too_big:
/* In dtuple_convert_big_rec(), variable-length columns
that are longer than BTR_EXTERN_FIELD_REF_SIZE * 2
may be chosen for external storage. If the column appears
- in an ordering column of an index, a longer prefix of
- REC_MAX_INDEX_COL_LEN will be copied to the undo log
- by trx_undo_page_report_modify() and
+ in an ordering column of an index, a longer prefix determined
+ by dict_max_field_len_store_undo() will be copied to the undo
+ log by trx_undo_page_report_modify() and
trx_undo_page_fetch_ext(). It suffices to check the
capacity of the undo log whenever new_index includes
a column prefix on a column that may be stored externally. */
if (field->prefix_len /* prefix index */
- && !col->ord_part /* not yet ordering column */
+ && (!col->ord_part /* not yet ordering column */
+ || field->prefix_len > col->max_prefix)
&& !dict_col_get_fixed_size(col, TRUE) /* variable-length */
&& dict_col_get_max_size(col)
> BTR_EXTERN_FIELD_REF_SIZE * 2 /* long enough */) {
@@ -1723,11 +1751,17 @@ too_big:
}
undo_size_ok:
- /* Flag the ordering columns */
+ /* Flag the ordering columns and also set column max_prefix */
for (i = 0; i < n_ord; i++) {
+ const dict_field_t* field
+ = dict_index_get_nth_field(new_index, i);
- dict_index_get_nth_field(new_index, i)->col->ord_part = 1;
+ field->col->ord_part = 1;
+
+ if (field->prefix_len > field->col->max_prefix) {
+ field->col->max_prefix = field->prefix_len;
+ }
}
/* Add the new index as the last index for the table */
@@ -1935,14 +1969,14 @@ dict_index_add_col(
variable-length fields, so that the extern flag can be embedded in
the length word. */
- if (field->fixed_len > DICT_MAX_INDEX_COL_LEN) {
+ if (field->fixed_len > DICT_MAX_FIXED_COL_LEN) {
field->fixed_len = 0;
}
-#if DICT_MAX_INDEX_COL_LEN != 768
+#if DICT_MAX_FIXED_COL_LEN != 768
/* The comparison limit above must be constant. If it were
changed, the disk format of some fixed-length columns would
change, which would be a disaster. */
-# error "DICT_MAX_INDEX_COL_LEN != 768"
+# error "DICT_MAX_FIXED_COL_LEN != 768"
#endif
if (!(col->prtype & DATA_NOT_NULL)) {
diff --git a/dict/dict0load.c b/dict/dict0load.c
index 83284fe6870..c5bd84f84ad 100644
--- a/dict/dict0load.c
+++ b/dict/dict0load.c
@@ -432,7 +432,7 @@ dict_process_sys_fields_rec(
mach_write_to_8(last_index_id, last_id);
err_msg = dict_load_field_low(buf, NULL, sys_field,
- pos, last_index_id, heap, rec);
+ pos, last_index_id, heap, rec, NULL, 0);
*index_id = mach_read_from_8(buf);
@@ -1066,6 +1066,9 @@ dict_load_columns(
/** Error message for a delete-marked record in dict_load_field_low() */
static const char* dict_load_field_del = "delete-marked record in SYS_FIELDS";
+static const char* dict_load_field_too_big = "column prefix exceeds maximum"
+ " limit";
+
/********************************************************************//**
Loads an index field definition from a SYS_FIELDS record to
dict_index_t.
@@ -1087,7 +1090,12 @@ dict_load_field_low(
byte* last_index_id, /*!< in: last index id */
mem_heap_t* heap, /*!< in/out: memory heap
for temporary storage */
- const rec_t* rec) /*!< in: SYS_FIELDS record */
+ const rec_t* rec, /*!< in: SYS_FIELDS record */
+ char* addition_err_str,/*!< out: additional error message
+ that requires information to be
+ filled, or NULL */
+ ulint err_str_len) /*!< in: length of addition_err_str
+ in bytes */
{
const byte* field;
ulint len;
@@ -1167,6 +1175,19 @@ err_len:
goto err_len;
}
+ if (prefix_len > REC_VERSION_56_MAX_INDEX_COL_LEN) {
+ if (addition_err_str) {
+ ut_snprintf(addition_err_str, err_str_len,
+ "index field '%s' has a prefix length"
+ " of %lu bytes",
+ mem_heap_strdupl(
+ heap, (const char*) field, len),
+ (ulong) prefix_len);
+ }
+
+ return(dict_load_field_too_big);
+ }
+
if (index) {
dict_mem_index_add_field(
index, mem_heap_strdupl(heap, (const char*) field, len),
@@ -1226,14 +1247,16 @@ dict_load_fields(
btr_pcur_open_on_user_rec(sys_index, tuple, PAGE_CUR_GE,
BTR_SEARCH_LEAF, &pcur, &mtr);
for (i = 0; i < index->n_fields; i++) {
- const char* err_msg;
+ const char* err_msg;
+ char addition_err_str[1024];
rec = btr_pcur_get_rec(&pcur);
ut_a(btr_pcur_is_on_user_rec(&pcur));
err_msg = dict_load_field_low(buf, index, NULL, NULL, NULL,
- heap, rec);
+ heap, rec, addition_err_str,
+ sizeof(addition_err_str));
if (err_msg == dict_load_field_del) {
/* There could be delete marked records in
@@ -1242,7 +1265,24 @@ dict_load_fields(
goto next_rec;
} else if (err_msg) {
- fprintf(stderr, "InnoDB: %s\n", err_msg);
+ if (err_msg == dict_load_field_too_big) {
+ fprintf(stderr, "InnoDB: Error: load index"
+ " '%s' failed.\n"
+ "InnoDB: %s,\n"
+ "InnoDB: which exceeds the"
+ " maximum limit of %lu bytes.\n"
+ "InnoDB: Please use server that"
+ " supports long index prefix\n"
+ "InnoDB: or turn on"
+ " innodb_force_recovery to load"
+ " the table\n",
+ index->name, addition_err_str,
+ (ulong) (REC_VERSION_56_MAX_INDEX_COL_LEN));
+
+ } else {
+ fprintf(stderr, "InnoDB: %s\n", err_msg);
+ }
+
error = DB_CORRUPTION;
goto func_exit;
}
@@ -1518,7 +1558,26 @@ corrupted:
of the database server */
dict_mem_index_free(index);
} else {
- dict_load_fields(index, heap);
+ error = dict_load_fields(index, heap);
+
+ if (error != DB_SUCCESS) {
+
+ fprintf(stderr, "InnoDB: Error: load index '%s'"
+ " for table '%s' failed\n",
+ index->name, table->name);
+
+ /* If the force recovery flag is set, and
+ if the failed index is not the primary index, we
+ will continue and open other indexes */
+ if (srv_force_recovery
+ && !dict_index_is_clust(index)) {
+ error = DB_SUCCESS;
+ goto next_rec;
+ } else {
+ goto func_exit;
+ }
+ }
+
error = dict_index_add_to_cache(table, index,
index->page, FALSE);
/* The data dictionary tables should never contain
@@ -1843,9 +1902,18 @@ err_exit:
} else {
table->fk_max_recusive_level = 0;
}
- } else if (!srv_force_recovery) {
- dict_table_remove_from_cache(table);
- table = NULL;
+ } else {
+ dict_index_t* index;
+
+ /* Make sure that at least the clustered index was loaded.
+ Otherwise refuse to load the table */
+ index = dict_table_get_first_index(table);
+
+ if (!srv_force_recovery || !index
+ || !dict_index_is_clust(index)) {
+ dict_table_remove_from_cache(table);
+ table = NULL;
+ }
}
#if 0
if (err != DB_SUCCESS && table != NULL) {
diff --git a/dict/dict0mem.c b/dict/dict0mem.c
index 5d5937390b3..617c68925cb 100644
--- a/dict/dict0mem.c
+++ b/dict/dict0mem.c
@@ -234,6 +234,7 @@ dict_mem_fill_column_struct(
column->ind = (unsigned int) col_pos;
column->ord_part = 0;
+ column->max_prefix = 0;
column->mtype = (unsigned int) mtype;
column->prtype = (unsigned int) prtype;
column->len = (unsigned int) col_len;
diff --git a/fil/fil0fil.c b/fil/fil0fil.c
index 22606b67897..0e42214aa31 100644
--- a/fil/fil0fil.c
+++ b/fil/fil0fil.c
@@ -866,7 +866,8 @@ fil_node_close_file(
ut_a(node->open);
ut_a(node->n_pending == 0 || node->space->is_being_deleted);
ut_a(node->n_pending_flushes == 0);
- ut_a(node->modification_counter == node->flush_counter);
+ ut_a(node->modification_counter == node->flush_counter
+ || srv_fast_shutdown == 2);
ret = os_file_close(node->handle);
ut_a(ret);
@@ -2628,7 +2629,7 @@ retry:
os_thread_sleep(20000);
- fil_flush(id);
+ fil_flush(id, TRUE);
goto retry;
@@ -2842,7 +2843,7 @@ error_exit2:
goto error_exit;
}
- ret = os_file_flush(file);
+ ret = os_file_flush(file, TRUE);
if (!ret) {
fputs("InnoDB: Error: file flush of tablespace ", stderr);
@@ -3028,7 +3029,7 @@ fil_reset_too_high_lsns(
}
}
- success = os_file_flush(file);
+ success = os_file_flush(file, TRUE);
if (!success) {
goto func_exit;
@@ -3050,7 +3051,7 @@ fil_reset_too_high_lsns(
goto func_exit;
}
- success = os_file_flush(file);
+ success = os_file_flush(file, TRUE);
func_exit:
os_file_close(file);
ut_free(buf2);
@@ -4838,7 +4839,7 @@ fil_extend_space_to_desired_size(
mutex_exit(&fil_system->mutex);
mutex_exit(&fil_system->file_extend_mutex);
- fil_flush(space_id);
+ fil_flush(space_id, TRUE);
return(success);
}
@@ -5550,8 +5551,9 @@ UNIV_INTERN
void
fil_flush(
/*======*/
- ulint space_id) /*!< in: file space id (this can be a group of
+ ulint space_id, /*!< in: file space id (this can be a group of
log files or a tablespace of the database) */
+ ibool metadata)
{
fil_space_t* space;
fil_node_t* node;
@@ -5622,7 +5624,7 @@ retry:
/* fprintf(stderr, "Flushing to file %s\n",
node->name); */
- os_file_flush(file);
+ os_file_flush(file, metadata);
mutex_enter(&fil_system->mutex);
@@ -5705,7 +5707,7 @@ fil_flush_file_spaces(
a non-existing space id. */
for (i = 0; i < n_space_ids; i++) {
- fil_flush(space_ids[i]);
+ fil_flush(space_ids[i], TRUE);
}
mem_free(space_ids);
diff --git a/handler/ha_innodb.cc b/handler/ha_innodb.cc
index ed6719b8f8b..7a2d8e8fdbe 100644
--- a/handler/ha_innodb.cc
+++ b/handler/ha_innodb.cc
@@ -196,6 +196,7 @@ static my_bool innobase_stats_on_metadata = TRUE;
static my_bool innobase_use_sys_stats_table = FALSE;
static my_bool innobase_buffer_pool_shm_checksum = TRUE;
static uint innobase_buffer_pool_shm_key = 0;
+static my_bool innobase_large_prefix = FALSE;
static char* internal_innobase_data_file_path = NULL;
@@ -1056,7 +1057,7 @@ int
convert_error_code_to_mysql(
/*========================*/
int error, /*!< in: InnoDB error code */
- ulint flags, /*!< in: InnoDB table flags, or 0 */
+ ulint flags, /*!< in: InnoDB table flags, or 0 */
THD* thd) /*!< in: user thread handle or NULL */
{
switch (error) {
@@ -1160,6 +1161,11 @@ convert_error_code_to_mysql(
& DICT_TF_COMPACT) / 2);
return(HA_ERR_TO_BIG_ROW);
+ case DB_TOO_BIG_INDEX_COL:
+ my_error(ER_INDEX_COLUMN_TOO_LONG, MYF(0),
+ DICT_MAX_FIELD_LEN_BY_FORMAT_FLAG(flags));
+ return(HA_ERR_INDEX_COL_TOO_LONG);
+
case DB_NO_SAVEPOINT:
return(HA_ERR_NO_SAVEPOINT);
@@ -3040,8 +3046,10 @@ innobase_alter_table_flags(
uint flags)
{
return(HA_INPLACE_ADD_INDEX_NO_READ_WRITE
+ | HA_INPLACE_ADD_INDEX_NO_WRITE
| HA_INPLACE_DROP_INDEX_NO_READ_WRITE
| HA_INPLACE_ADD_UNIQUE_INDEX_NO_READ_WRITE
+ | HA_INPLACE_ADD_UNIQUE_INDEX_NO_WRITE
| HA_INPLACE_DROP_UNIQUE_INDEX_NO_READ_WRITE
| HA_INPLACE_ADD_PK_INDEX_NO_READ_WRITE);
}
@@ -4403,7 +4411,11 @@ UNIV_INTERN
uint
ha_innobase::max_supported_key_part_length() const
{
- return(DICT_MAX_INDEX_COL_LEN - 1);
+ /* A table format specific index column length check will be performed
+ at ha_innobase::add_index() and row_create_index_for_mysql() */
+ return(innobase_large_prefix
+ ? REC_VERSION_56_MAX_INDEX_COL_LEN
+ : REC_ANTELOPE_MAX_INDEX_COL_LEN - 1);
}
/******************************************************************//**
@@ -7533,8 +7545,8 @@ ha_innobase::create(
if (i != (uint) primary_key_no) {
- if ((error = create_index(trx, form, flags, norm_name,
- i))) {
+ if ((error = create_index(trx, form, flags,
+ norm_name, i))) {
goto cleanup;
}
}
@@ -11714,6 +11726,11 @@ static MYSQL_SYSVAR_STR(flush_method, innobase_file_flush_method,
PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
"With which method to flush data.", NULL, NULL, NULL);
+static MYSQL_SYSVAR_BOOL(large_prefix, innobase_large_prefix,
+ PLUGIN_VAR_NOCMDARG,
+ "Support large index prefix length of REC_VERSION_56_MAX_INDEX_COL_LEN (3072) bytes.",
+ NULL, NULL, FALSE);
+
static MYSQL_SYSVAR_BOOL(locks_unsafe_for_binlog, innobase_locks_unsafe_for_binlog,
PLUGIN_VAR_NOCMDARG | PLUGIN_VAR_READONLY,
"Force InnoDB to not use next-key locking, to use only row-level locking.",
@@ -12162,6 +12179,7 @@ static struct st_mysql_sys_var* innobase_system_variables[]= {
MYSQL_SYSVAR(use_global_flush_log_at_trx_commit),
MYSQL_SYSVAR(flush_method),
MYSQL_SYSVAR(force_recovery),
+ MYSQL_SYSVAR(large_prefix),
MYSQL_SYSVAR(locks_unsafe_for_binlog),
MYSQL_SYSVAR(lock_wait_timeout),
#ifdef UNIV_LOG_ARCHIVE
diff --git a/handler/ha_innodb.h b/handler/ha_innodb.h
index 9ad3f0e0a83..dbd360e0ec9 100644
--- a/handler/ha_innodb.h
+++ b/handler/ha_innodb.h
@@ -217,7 +217,9 @@ class ha_innobase: public handler
bool primary_key_is_clustered();
int cmp_ref(const uchar *ref1, const uchar *ref2);
/** Fast index creation (smart ALTER TABLE) @see handler0alter.cc @{ */
- int add_index(TABLE *table_arg, KEY *key_info, uint num_of_keys);
+ int add_index(TABLE *table_arg, KEY *key_info, uint num_of_keys,
+ handler_add_index **add);
+ int final_add_index(handler_add_index *add, bool commit);
int prepare_drop_index(TABLE *table_arg, uint *key_num,
uint num_of_keys);
int final_drop_index(TABLE *table_arg);
diff --git a/handler/handler0alter.cc b/handler/handler0alter.cc
index cc7e48ebd44..6d5b7b4668f 100644
--- a/handler/handler0alter.cc
+++ b/handler/handler0alter.cc
@@ -539,7 +539,7 @@ innobase_create_key_def(
if (!new_primary && (key_info->flags & HA_NOSAME)
&& (!(key_info->flags & HA_KEY_HAS_PART_KEY_SEG))
&& row_table_got_default_clust_index(table)) {
- uint key_part = key_info->key_parts;
+ uint key_part = key_info->key_parts;
new_primary = TRUE;
@@ -595,6 +595,27 @@ innobase_create_key_def(
}
/*******************************************************************//**
+Check each index column size, make sure they do not exceed the max limit
+@return HA_ERR_INDEX_COL_TOO_LONG if index column size exceeds limit */
+static
+int
+innobase_check_column_length(
+/*=========================*/
+ const dict_table_t*table, /*!< in: table definition */
+ const KEY* key_info) /*!< in: Indexes to be created */
+{
+ ulint max_col_len = DICT_MAX_FIELD_LEN_BY_FORMAT(table);
+
+ for (ulint key_part = 0; key_part < key_info->key_parts; key_part++) {
+ if (key_info->key_part[key_part].length > max_col_len) {
+ my_error(ER_INDEX_COLUMN_TOO_LONG, MYF(0), max_col_len);
+ return(HA_ERR_INDEX_COL_TOO_LONG);
+ }
+ }
+ return(0);
+}
+
+/*******************************************************************//**
Create a temporary tablename using query id, thread id, and id
@return temporary tablename */
static
@@ -619,6 +640,18 @@ innobase_create_temporary_tablename(
return(name);
}
+class ha_innobase_add_index : public handler_add_index
+{
+public:
+ /** table where the indexes are being created */
+ dict_table_t* indexed_table;
+ ha_innobase_add_index(TABLE* table, KEY* key_info, uint num_of_keys,
+ dict_table_t* indexed_table_arg) :
+ handler_add_index(table, key_info, num_of_keys),
+ indexed_table (indexed_table_arg) {}
+ ~ha_innobase_add_index() {}
+};
+
/*******************************************************************//**
Create indexes.
@return 0 or error number */
@@ -626,12 +659,15 @@ UNIV_INTERN
int
ha_innobase::add_index(
/*===================*/
- TABLE* table, /*!< in: Table where indexes are created */
- KEY* key_info, /*!< in: Indexes to be created */
- uint num_of_keys) /*!< in: Number of indexes to be created */
+ TABLE* table, /*!< in: Table where indexes
+ are created */
+ KEY* key_info, /*!< in: Indexes
+ to be created */
+ uint num_of_keys, /*!< in: Number of indexes
+ to be created */
+ handler_add_index** add) /*!< out: context */
{
dict_index_t** index; /*!< Index to be created */
- dict_table_t* innodb_table; /*!< InnoDB table in dictionary */
dict_table_t* indexed_table; /*!< Table where indexes are created */
merge_index_def_t* index_defs; /*!< Index definitions */
mem_heap_t* heap; /*!< Heap for index definitions */
@@ -647,6 +683,8 @@ ha_innobase::add_index(
ut_a(key_info);
ut_a(num_of_keys);
+ *add = NULL;
+
if (srv_created_new_raw || srv_force_recovery) {
DBUG_RETURN(HA_ERR_WRONG_COMMAND);
}
@@ -662,20 +700,32 @@ ha_innobase::add_index(
DBUG_RETURN(-1);
}
- innodb_table = indexed_table
- = dict_table_get(prebuilt->table->name, FALSE);
+ indexed_table = dict_table_get(prebuilt->table->name, FALSE);
- if (UNIV_UNLIKELY(!innodb_table)) {
+ if (UNIV_UNLIKELY(!indexed_table)) {
DBUG_RETURN(HA_ERR_NO_SUCH_TABLE);
}
+ ut_a(indexed_table == prebuilt->table);
+
/* Check that index keys are sensible */
- error = innobase_check_index_keys(key_info, num_of_keys, innodb_table);
+ error = innobase_check_index_keys(key_info, num_of_keys, prebuilt->table);
if (UNIV_UNLIKELY(error)) {
DBUG_RETURN(error);
}
+ /* Check each index's column length to make sure they do not
+ exceed limit */
+ for (ulint i = 0; i < num_of_keys; i++) {
+ error = innobase_check_column_length(prebuilt->table,
+ &key_info[i]);
+
+ if (error) {
+ DBUG_RETURN(error);
+ }
+ }
+
heap = mem_heap_create(1024);
trx_start_if_not_started(prebuilt->trx);
@@ -691,7 +741,7 @@ ha_innobase::add_index(
num_of_idx = num_of_keys;
index_defs = innobase_create_key_def(
- trx, innodb_table, heap, key_info, num_of_idx);
+ trx, prebuilt->table, heap, key_info, num_of_idx);
new_primary = DICT_CLUSTERED & index_defs[0].ind_type;
@@ -705,7 +755,7 @@ ha_innobase::add_index(
trx_set_dict_operation(trx, TRX_DICT_OP_INDEX);
/* Acquire a lock on the table before creating any indexes. */
- error = row_merge_lock_table(prebuilt->trx, innodb_table,
+ error = row_merge_lock_table(prebuilt->trx, prebuilt->table,
new_primary ? LOCK_X : LOCK_S);
if (UNIV_UNLIKELY(error != DB_SUCCESS)) {
@@ -719,7 +769,7 @@ ha_innobase::add_index(
row_mysql_lock_data_dictionary(trx);
dict_locked = TRUE;
- ut_d(dict_table_check_for_dup_indexes(innodb_table, FALSE));
+ ut_d(dict_table_check_for_dup_indexes(prebuilt->table, FALSE));
/* If a new primary key is defined for the table we need
to drop the original table and rebuild all indexes. */
@@ -727,15 +777,15 @@ ha_innobase::add_index(
if (UNIV_UNLIKELY(new_primary)) {
/* This transaction should be the only one
operating on the table. */
- ut_a(innodb_table->n_mysql_handles_opened == 1);
+ ut_a(prebuilt->table->n_mysql_handles_opened == 1);
char* new_table_name = innobase_create_temporary_tablename(
- heap, '1', innodb_table->name);
+ heap, '1', prebuilt->table->name);
/* Clone the table. */
trx_set_dict_operation(trx, TRX_DICT_OP_TABLE);
indexed_table = row_merge_create_temporary_table(
- new_table_name, index_defs, innodb_table, trx);
+ new_table_name, index_defs, prebuilt->table, trx);
if (!indexed_table) {
@@ -749,11 +799,12 @@ ha_innobase::add_index(
break;
default:
error = convert_error_code_to_mysql(
- trx->error_state, innodb_table->flags,
+ trx->error_state,
+ prebuilt->table->flags,
user_thd);
}
- ut_d(dict_table_check_for_dup_indexes(innodb_table,
+ ut_d(dict_table_check_for_dup_indexes(prebuilt->table,
FALSE));
mem_heap_free(heap);
trx_general_rollback_for_mysql(trx, NULL);
@@ -768,17 +819,15 @@ ha_innobase::add_index(
/* Create the indexes in SYS_INDEXES and load into dictionary. */
- for (ulint i = 0; i < num_of_idx; i++) {
+ for (num_created = 0; num_created < num_of_idx; num_created++) {
- index[i] = row_merge_create_index(trx, indexed_table,
- &index_defs[i]);
+ index[num_created] = row_merge_create_index(
+ trx, indexed_table, &index_defs[num_created]);
- if (!index[i]) {
+ if (!index[num_created]) {
error = trx->error_state;
goto error_handling;
}
-
- num_created++;
}
ut_ad(error == DB_SUCCESS);
@@ -800,7 +849,7 @@ ha_innobase::add_index(
if (UNIV_UNLIKELY(new_primary)) {
/* A primary key is to be built. Acquire an exclusive
table lock also on the table that is being created. */
- ut_ad(indexed_table != innodb_table);
+ ut_ad(indexed_table != prebuilt->table);
error = row_merge_lock_table(prebuilt->trx, indexed_table,
LOCK_X);
@@ -814,7 +863,7 @@ ha_innobase::add_index(
/* Read the clustered index of the table and build indexes
based on this information using temporary files and merge sort. */
error = row_merge_build_indexes(prebuilt->trx,
- innodb_table, indexed_table,
+ prebuilt->table, indexed_table,
index, num_of_idx, table);
error_handling:
@@ -822,63 +871,15 @@ error_handling:
dictionary which were defined. */
switch (error) {
- const char* old_name;
- char* tmp_name;
case DB_SUCCESS:
ut_a(!dict_locked);
- row_mysql_lock_data_dictionary(trx);
- dict_locked = TRUE;
+ ut_d(mutex_enter(&dict_sys->mutex));
ut_d(dict_table_check_for_dup_indexes(prebuilt->table, TRUE));
-
- if (!new_primary) {
- error = row_merge_rename_indexes(trx, indexed_table);
-
- if (error != DB_SUCCESS) {
- row_merge_drop_indexes(trx, indexed_table,
- index, num_created);
- }
-
- goto convert_error;
- }
-
- /* If a new primary key was defined for the table and
- there was no error at this point, we can now rename
- the old table as a temporary table, rename the new
- temporary table as the old table and drop the old table. */
- old_name = innodb_table->name;
- tmp_name = innobase_create_temporary_tablename(heap, '2',
- old_name);
-
- error = row_merge_rename_tables(innodb_table, indexed_table,
- tmp_name, trx);
-
- if (error != DB_SUCCESS) {
-
- row_merge_drop_table(trx, indexed_table);
-
- switch (error) {
- case DB_TABLESPACE_ALREADY_EXISTS:
- case DB_DUPLICATE_KEY:
- innobase_convert_tablename(tmp_name);
- my_error(HA_ERR_TABLE_EXIST, MYF(0), tmp_name);
- error = HA_ERR_TABLE_EXIST;
- break;
- default:
- goto convert_error;
- }
- break;
- }
-
- trx_commit_for_mysql(prebuilt->trx);
- row_prebuilt_free(prebuilt, TRUE);
- prebuilt = row_create_prebuilt(indexed_table);
-
- indexed_table->n_mysql_handles_opened++;
-
- error = row_merge_drop_table(trx, innodb_table);
- innodb_table = indexed_table;
- goto convert_error;
+ ut_d(mutex_exit(&dict_sys->mutex));
+ *add = new ha_innobase_add_index(table, key_info, num_of_keys,
+ indexed_table);
+ break;
case DB_TOO_BIG_RECORD:
my_error(HA_ERR_TO_BIG_ROW, MYF(0));
@@ -894,7 +895,7 @@ error:
trx->error_state = DB_SUCCESS;
if (new_primary) {
- if (indexed_table != innodb_table) {
+ if (indexed_table != prebuilt->table) {
row_merge_drop_table(trx, indexed_table);
}
} else {
@@ -906,38 +907,161 @@ error:
row_merge_drop_indexes(trx, indexed_table,
index, num_created);
}
-
-convert_error:
- if (error == DB_SUCCESS) {
- /* Build index is successful. We will need to
- rebuild index translation table. Reset the
- index entry count in the translation table
- to zero, so that translation table will be rebuilt */
- share->idx_trans_tbl.index_count = 0;
- }
-
- error = convert_error_code_to_mysql(error,
- innodb_table->flags,
- user_thd);
}
- mem_heap_free(heap);
trx_commit_for_mysql(trx);
if (prebuilt->trx) {
trx_commit_for_mysql(prebuilt->trx);
}
if (dict_locked) {
- ut_d(dict_table_check_for_dup_indexes(innodb_table, FALSE));
row_mysql_unlock_data_dictionary(trx);
}
trx_free_for_mysql(trx);
+ mem_heap_free(heap);
/* There might be work for utility threads.*/
srv_active_wake_master_thread();
- DBUG_RETURN(error);
+ DBUG_RETURN(convert_error_code_to_mysql(error, prebuilt->table->flags,
+ user_thd));
+}
+
+/*******************************************************************//**
+Finalize or undo add_index().
+@return 0 or error number */
+UNIV_INTERN
+int
+ha_innobase::final_add_index(
+/*=========================*/
+ handler_add_index* add_arg,/*!< in: context from add_index() */
+ bool commit) /*!< in: true=commit, false=rollback */
+{
+ ha_innobase_add_index* add;
+ trx_t* trx;
+ int err = 0;
+
+ DBUG_ENTER("ha_innobase::final_add_index");
+
+ ut_ad(add_arg);
+ add = static_cast<class ha_innobase_add_index*>(add_arg);
+
+ /* Create a background transaction for the operations on
+ the data dictionary tables. */
+ trx = innobase_trx_allocate(user_thd);
+ trx_start_if_not_started(trx);
+
+ /* Flag this transaction as a dictionary operation, so that
+ the data dictionary will be locked in crash recovery. */
+ trx_set_dict_operation(trx, TRX_DICT_OP_INDEX);
+
+ /* Latch the InnoDB data dictionary exclusively so that no deadlocks
+ or lock waits can happen in it during an index create operation. */
+ row_mysql_lock_data_dictionary(trx);
+
+ if (add->indexed_table != prebuilt->table) {
+ ulint error;
+
+ /* We copied the table (new_primary). */
+ if (commit) {
+ mem_heap_t* heap;
+ char* tmp_name;
+
+ heap = mem_heap_create(1024);
+
+ /* A new primary key was defined for the table
+ and there was no error at this point. We can
+ now rename the old table as a temporary table,
+ rename the new temporary table as the old
+ table and drop the old table. */
+ tmp_name = innobase_create_temporary_tablename(
+ heap, '2', prebuilt->table->name);
+
+ error = row_merge_rename_tables(
+ prebuilt->table, add->indexed_table,
+ tmp_name, trx);
+
+ switch (error) {
+ case DB_TABLESPACE_ALREADY_EXISTS:
+ case DB_DUPLICATE_KEY:
+ innobase_convert_tablename(tmp_name);
+ my_error(HA_ERR_TABLE_EXIST, MYF(0), tmp_name);
+ err = HA_ERR_TABLE_EXIST;
+ break;
+ default:
+ err = convert_error_code_to_mysql(
+ error, prebuilt->table->flags,
+ user_thd);
+ break;
+ }
+
+ mem_heap_free(heap);
+ }
+
+ if (!commit || err) {
+ error = row_merge_drop_table(trx, add->indexed_table);
+ trx_commit_for_mysql(prebuilt->trx);
+ } else {
+ dict_table_t* old_table = prebuilt->table;
+ trx_commit_for_mysql(prebuilt->trx);
+ row_prebuilt_free(prebuilt, TRUE);
+ error = row_merge_drop_table(trx, old_table);
+ add->indexed_table->n_mysql_handles_opened++;
+ prebuilt = row_create_prebuilt(add->indexed_table);
+ }
+
+ err = convert_error_code_to_mysql(
+ error, prebuilt->table->flags, user_thd);
+ } else {
+ /* We created secondary indexes (!new_primary). */
+
+ if (commit) {
+ err = convert_error_code_to_mysql(
+ row_merge_rename_indexes(trx, prebuilt->table),
+ prebuilt->table->flags, user_thd);
+ }
+
+ if (!commit || err) {
+ dict_index_t* index;
+ dict_index_t* next_index;
+
+ for (index = dict_table_get_first_index(
+ prebuilt->table);
+ index; index = next_index) {
+
+ next_index = dict_table_get_next_index(index);
+
+ if (*index->name == TEMP_INDEX_PREFIX) {
+ row_merge_drop_index(
+ index, prebuilt->table, trx);
+ }
+ }
+ }
+ }
+
+ /* If index is successfully built, we will need to rebuild index
+ translation table. Set valid index entry count in the translation
+ table to zero. */
+ if (err == 0 && commit) {
+ share->idx_trans_tbl.index_count = 0;
+ }
+
+ trx_commit_for_mysql(trx);
+ if (prebuilt->trx) {
+ trx_commit_for_mysql(prebuilt->trx);
+ }
+
+ ut_d(dict_table_check_for_dup_indexes(prebuilt->table, FALSE));
+ row_mysql_unlock_data_dictionary(trx);
+
+ trx_free_for_mysql(trx);
+
+ /* There might be work for utility threads.*/
+ srv_active_wake_master_thread();
+
+ delete add;
+ DBUG_RETURN(err);
}
/*******************************************************************//**
diff --git a/include/buf0buf.h b/include/buf0buf.h
index 80cc9803ddf..ef6a26f9459 100644
--- a/include/buf0buf.h
+++ b/include/buf0buf.h
@@ -53,6 +53,9 @@ Created 11/5/1995 Heikki Tuuri
/*!< Get the page only if it's in the
buffer pool, if not then set a watch
on the page. */
+#define BUF_GET_POSSIBLY_FREED 16
+ /*!< Like BUF_GET, but do not mind
+ if the file page has been freed. */
/* @} */
/** @name Modes for buf_page_get_known_nowait */
/* @{ */
diff --git a/include/db0err.h b/include/db0err.h
index 8a71fa6511a..74a2354bce3 100644
--- a/include/db0err.h
+++ b/include/db0err.h
@@ -110,6 +110,8 @@ enum db_err {
DB_PARENT_NO_INDEX, /* the parent table does not
have an index that contains the
foreign keys as its prefix columns */
+ DB_TOO_BIG_INDEX_COL, /* index column size exceeds maximum
+ limit */
/* The following are partial failure codes */
DB_FAIL = 1000,
diff --git a/include/dict0dict.h b/include/dict0dict.h
index ddee226bb7e..d99177e0330 100644
--- a/include/dict0dict.h
+++ b/include/dict0dict.h
@@ -136,6 +136,19 @@ dict_col_copy_type(
/*===============*/
const dict_col_t* col, /*!< in: column */
dtype_t* type); /*!< out: data type */
+/**********************************************************************//**
+Determine bytes of column prefix to be stored in the undo log. Please
+note if the table format is UNIV_FORMAT_A (< DICT_TF_FORMAT_ZIP), no prefix
+needs to be stored in the undo log.
+@return bytes of column prefix to be stored in the undo log */
+UNIV_INLINE
+ulint
+dict_max_field_len_store_undo(
+/*==========================*/
+ dict_table_t* table, /*!< in: table */
+ const dict_col_t* col); /*!< in: column which index prefix
+ is based on */
+
#endif /* !UNIV_HOTBACKUP */
#ifdef UNIV_DEBUG
/*********************************************************************//**
diff --git a/include/dict0dict.ic b/include/dict0dict.ic
index e395a440a18..b03f5117295 100644
--- a/include/dict0dict.ic
+++ b/include/dict0dict.ic
@@ -924,4 +924,30 @@ dict_table_get_on_id_low(
return(table);
}
+
+/**********************************************************************//**
+Determine bytes of column prefix to be stored in the undo log. Please
+note if the table format is UNIV_FORMAT_A (< DICT_TF_FORMAT_ZIP), no prefix
+needs to be stored in the undo log.
+@return bytes of column prefix to be stored in the undo log */
+UNIV_INLINE
+ulint
+dict_max_field_len_store_undo(
+/*==========================*/
+ dict_table_t* table, /*!< in: table */
+ const dict_col_t* col) /*!< in: column which index prefix
+ is based on */
+{
+ ulint prefix_len = 0;
+
+ if (dict_table_get_format(table) >= DICT_TF_FORMAT_ZIP)
+ {
+ prefix_len = col->max_prefix
+ ? col->max_prefix
+ : DICT_MAX_FIELD_LEN_BY_FORMAT(table);
+ }
+
+ return(prefix_len);
+}
+
#endif /* !UNIV_HOTBACKUP */
diff --git a/include/dict0load.h b/include/dict0load.h
index 22d492a6609..215beee1ba6 100644
--- a/include/dict0load.h
+++ b/include/dict0load.h
@@ -156,7 +156,12 @@ dict_load_field_low(
byte* last_index_id, /*!< in: last index id */
mem_heap_t* heap, /*!< in/out: memory heap
for temporary storage */
- const rec_t* rec); /*!< in: SYS_FIELDS record */
+ const rec_t* rec, /*!< in: SYS_FIELDS record */
+ char* addition_err_str,/*!< out: additional error message
+ that requires information to be
+ filled, or NULL */
+ ulint err_str_len); /*!< in: length of addition_err_str
+ in bytes */
/********************************************************************//**
Loads a table definition and also all its index definitions, and also
the cluster definition if the table is a member in a cluster. Also loads
diff --git a/include/dict0mem.h b/include/dict0mem.h
index f7fbb99906b..71af9ce2f0c 100644
--- a/include/dict0mem.h
+++ b/include/dict0mem.h
@@ -302,32 +302,58 @@ struct dict_col_struct{
unsigned ord_part:1; /*!< nonzero if this column
appears in the ordering fields
of an index */
+ unsigned max_prefix:12; /*!< maximum index prefix length on
+ this column. Our current max limit is
+ 3072 for Barracuda table */
};
-/** @brief DICT_MAX_INDEX_COL_LEN is measured in bytes and is the maximum
-indexed column length (or indexed prefix length).
+/** @brief DICT_ANTELOPE_MAX_INDEX_COL_LEN is measured in bytes and
+is the maximum indexed column length (or indexed prefix length) in
+ROW_FORMAT=REDUNDANT and ROW_FORMAT=COMPACT. Also, in any format,
+any fixed-length field that is longer than this will be encoded as
+a variable-length field.
It is set to 3*256, so that one can create a column prefix index on
256 characters of a TEXT or VARCHAR column also in the UTF-8
charset. In that charset, a character may take at most 3 bytes. This
constant MUST NOT BE CHANGED, or the compatibility of InnoDB data
files would be at risk! */
-#define DICT_MAX_INDEX_COL_LEN REC_MAX_INDEX_COL_LEN
+#define DICT_ANTELOPE_MAX_INDEX_COL_LEN REC_ANTELOPE_MAX_INDEX_COL_LEN
+
+/** Find out maximum indexed column length by its table format.
+For ROW_FORMAT=REDUNDANT and ROW_FORMAT=COMPACT, the maximum
+field length is REC_ANTELOPE_MAX_INDEX_COL_LEN - 1 (767). For new
+barracuda format, the length could be REC_VERSION_56_MAX_INDEX_COL_LEN
+(3072) bytes */
+#define DICT_MAX_FIELD_LEN_BY_FORMAT(table) \
+ ((dict_table_get_format(table) < DICT_TF_FORMAT_ZIP) \
+ ? (REC_ANTELOPE_MAX_INDEX_COL_LEN - 1) \
+ : REC_VERSION_56_MAX_INDEX_COL_LEN)
+
+#define DICT_MAX_FIELD_LEN_BY_FORMAT_FLAG(flags) \
+ ((((flags & DICT_TF_FORMAT_MASK) >> DICT_TF_FORMAT_SHIFT)\
+ < DICT_TF_FORMAT_ZIP) \
+ ? (REC_ANTELOPE_MAX_INDEX_COL_LEN - 1) \
+ : REC_VERSION_56_MAX_INDEX_COL_LEN)
+
+/** Defines the maximum fixed length column size */
+#define DICT_MAX_FIXED_COL_LEN DICT_ANTELOPE_MAX_INDEX_COL_LEN
/** Data structure for a field in an index */
struct dict_field_struct{
dict_col_t* col; /*!< pointer to the table column */
const char* name; /*!< name of the column */
- unsigned prefix_len:10; /*!< 0 or the length of the column
+ unsigned prefix_len:12; /*!< 0 or the length of the column
prefix in bytes in a MySQL index of
type, e.g., INDEX (textcol(25));
must be smaller than
- DICT_MAX_INDEX_COL_LEN; NOTE that
- in the UTF-8 charset, MySQL sets this
- to 3 * the prefix len in UTF-8 chars */
+ DICT_MAX_FIELD_LEN_BY_FORMAT;
+ NOTE that in the UTF-8 charset, MySQL
+ sets this to (mbmaxlen * the prefix len)
+ in UTF-8 chars */
unsigned fixed_len:10; /*!< 0 or the fixed length of the
column if smaller than
- DICT_MAX_INDEX_COL_LEN */
+ DICT_ANTELOPE_MAX_INDEX_COL_LEN */
};
/** Data structure for an index. Most fields will be
diff --git a/include/fil0fil.h b/include/fil0fil.h
index f0cd0ef2567..840a9fbb13a 100644
--- a/include/fil0fil.h
+++ b/include/fil0fil.h
@@ -671,8 +671,9 @@ UNIV_INTERN
void
fil_flush(
/*======*/
- ulint space_id); /*!< in: file space id (this can be a group of
+ ulint space_id, /*!< in: file space id (this can be a group of
log files or a tablespace of the database) */
+ ibool metadata);
/**********************************************************************//**
Flushes to disk writes in file spaces of the given type possibly cached by
the OS. */
diff --git a/include/lock0lock.h b/include/lock0lock.h
index 7e583fdbd5f..ea636f985b4 100644
--- a/include/lock0lock.h
+++ b/include/lock0lock.h
@@ -73,9 +73,10 @@ UNIV_INLINE
trx_t*
lock_clust_rec_some_has_impl(
/*=========================*/
- const rec_t* rec, /*!< in: user record */
- dict_index_t* index, /*!< in: clustered index */
- const ulint* offsets);/*!< in: rec_get_offsets(rec, index) */
+ const rec_t* rec, /*!< in: user record */
+ const dict_index_t* index, /*!< in: clustered index */
+ const ulint* offsets)/*!< in: rec_get_offsets(rec, index) */
+ __attribute__((nonnull, warn_unused_result));
/*********************************************************************//**
Gets the heap_no of the smallest user record on a page.
@return heap_no of smallest user record, or PAGE_HEAP_NO_SUPREMUM */
diff --git a/include/lock0lock.ic b/include/lock0lock.ic
index 014722f51c4..1d740a5fa43 100644
--- a/include/lock0lock.ic
+++ b/include/lock0lock.ic
@@ -75,9 +75,9 @@ UNIV_INLINE
trx_t*
lock_clust_rec_some_has_impl(
/*=========================*/
- const rec_t* rec, /*!< in: user record */
- dict_index_t* index, /*!< in: clustered index */
- const ulint* offsets)/*!< in: rec_get_offsets(rec, index) */
+ const rec_t* rec, /*!< in: user record */
+ const dict_index_t* index, /*!< in: clustered index */
+ const ulint* offsets)/*!< in: rec_get_offsets(rec, index) */
{
trx_id_t trx_id;
diff --git a/include/mtr0mtr.ic b/include/mtr0mtr.ic
index 3d87eea2710..1db4a4bd735 100644
--- a/include/mtr0mtr.ic
+++ b/include/mtr0mtr.ic
@@ -37,6 +37,8 @@ mtr_start(
/*======*/
mtr_t* mtr) /*!< out: mini-transaction */
{
+ UNIV_MEM_INVALID(mtr, sizeof *mtr);
+
dyn_array_create(&(mtr->memo));
dyn_array_create(&(mtr->log));
diff --git a/include/os0file.h b/include/os0file.h
index 2104b0fe284..b778adaa809 100644
--- a/include/os0file.h
+++ b/include/os0file.h
@@ -290,8 +290,7 @@ The wrapper functions have the prefix of "innodb_". */
__FILE__, __LINE__)
# define os_file_read_trx(file, buf, offset, offset_high, n, trx) \
- pfs_os_file_read_func(file, buf, offset, offset_high, n, trx, \
- __FILE__, __LINE__)
+ os_file_read_func(file, buf, offset, offset_high, n, trx)
# define os_file_read_no_error_handling(file, buf, offset, \
offset_high, n) \
@@ -303,8 +302,8 @@ The wrapper functions have the prefix of "innodb_". */
pfs_os_file_write_func(name, file, buf, offset, offset_high, \
n, __FILE__, __LINE__)
-# define os_file_flush(file) \
- pfs_os_file_flush_func(file, __FILE__, __LINE__)
+# define os_file_flush(file, metadata) \
+ pfs_os_file_flush_func(file, metadata, __FILE__, __LINE__)
# define os_file_rename(key, oldpath, newpath) \
pfs_os_file_rename_func(key, oldpath, newpath, __FILE__, __LINE__)
@@ -343,7 +342,7 @@ to original un-instrumented file I/O APIs */
# define os_file_write(name, file, buf, offset, offset_high, n) \
os_file_write_func(name, file, buf, offset, offset_high, n)
-# define os_file_flush(file) os_file_flush_func(file)
+# define os_file_flush(file, metadata) os_file_flush_func(file, metadata)
# define os_file_rename(key, oldpath, newpath) \
os_file_rename_func(oldpath, newpath)
@@ -794,6 +793,7 @@ ibool
pfs_os_file_flush_func(
/*===================*/
os_file_t file, /*!< in, own: handle to a file */
+ ibool metadata,
const char* src_file,/*!< in: file name where func invoked */
ulint src_line);/*!< in: line where the func invoked */
@@ -873,7 +873,8 @@ UNIV_INTERN
ibool
os_file_flush_func(
/*===============*/
- os_file_t file); /*!< in, own: handle to a file */
+ os_file_t file, /*!< in, own: handle to a file */
+ ibool metadata);
/***********************************************************************//**
Retrieves the last error number if an error occurs in a file io function.
The number should be retrieved before any other OS calls (because they may
diff --git a/include/os0file.ic b/include/os0file.ic
index 83b689668c6..2d2145d72f0 100644
--- a/include/os0file.ic
+++ b/include/os0file.ic
@@ -372,6 +372,7 @@ ibool
pfs_os_file_flush_func(
/*===================*/
os_file_t file, /*!< in, own: handle to a file */
+ ibool metadata,
const char* src_file,/*!< in: file name where func invoked */
ulint src_line)/*!< in: line where the func invoked */
{
@@ -381,7 +382,7 @@ pfs_os_file_flush_func(
register_pfs_file_io_begin(&state, locker, file, 0, PSI_FILE_SYNC,
src_file, src_line);
- result = os_file_flush_func(file);
+ result = os_file_flush_func(file, metadata);
register_pfs_file_io_end(locker, 0);
diff --git a/include/page0page.h b/include/page0page.h
index 37a8df66b20..d4b90078029 100644
--- a/include/page0page.h
+++ b/include/page0page.h
@@ -618,18 +618,19 @@ rec_t*
page_rec_find_owner_rec(
/*====================*/
rec_t* rec); /*!< in: the physical record */
+#ifndef UNIV_HOTBACKUP
/***********************************************************************//**
-This is a low-level operation which is used in a database index creation
-to update the page number of a created B-tree to a data dictionary
-record. */
-UNIV_INTERN
+Write a 32-bit field in a data dictionary record. */
+UNIV_INLINE
void
-page_rec_write_index_page_no(
-/*=========================*/
- rec_t* rec, /*!< in: record to update */
+page_rec_write_field(
+/*=================*/
+ rec_t* rec, /*!< in/out: record to update */
ulint i, /*!< in: index of the field to update */
- ulint page_no,/*!< in: value to write */
- mtr_t* mtr); /*!< in: mtr */
+ ulint val, /*!< in: value to write */
+ mtr_t* mtr) /*!< in/out: mini-transaction */
+ __attribute__((nonnull));
+#endif /* !UNIV_HOTBACKUP */
/************************************************************//**
Returns the maximum combined size of records which can be inserted on top
of record heap.
diff --git a/include/page0page.ic b/include/page0page.ic
index 0e8669cf014..1bac2fb53fb 100644
--- a/include/page0page.ic
+++ b/include/page0page.ic
@@ -962,6 +962,29 @@ page_get_free_space_of_empty(
- 2 * PAGE_DIR_SLOT_SIZE));
}
+#ifndef UNIV_HOTBACKUP
+/***********************************************************************//**
+Write a 32-bit field in a data dictionary record. */
+UNIV_INLINE
+void
+page_rec_write_field(
+/*=================*/
+ rec_t* rec, /*!< in/out: record to update */
+ ulint i, /*!< in: index of the field to update */
+ ulint val, /*!< in: value to write */
+ mtr_t* mtr) /*!< in/out: mini-transaction */
+{
+ byte* data;
+ ulint len;
+
+ data = rec_get_nth_field_old(rec, i, &len);
+
+ ut_ad(len == 4);
+
+ mlog_write_ulint(data, val, MLOG_4BYTES, mtr);
+}
+#endif /* !UNIV_HOTBACKUP */
+
/************************************************************//**
Each user record on a page, and also the deleted user records in the heap
takes its size plus the fraction of the dir cell size /
diff --git a/include/rem0rec.h b/include/rem0rec.h
index 3d157f1da95..10b74d18c13 100644
--- a/include/rem0rec.h
+++ b/include/rem0rec.h
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 1994, 2011, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -480,6 +480,18 @@ ulint
rec_offs_any_extern(
/*================*/
const ulint* offsets);/*!< in: array returned by rec_get_offsets() */
+#ifdef UNIV_BLOB_NULL_DEBUG
+/******************************************************//**
+Determine if the offsets are for a record containing null BLOB pointers.
+@return first field containing a null BLOB pointer, or NULL if none found */
+UNIV_INLINE
+const byte*
+rec_offs_any_null_extern(
+/*=====================*/
+ const rec_t* rec, /*!< in: record */
+ const ulint* offsets) /*!< in: rec_get_offsets(rec) */
+ __attribute__((nonnull, warn_unused_result));
+#endif /* UNIV_BLOB_NULL_DEBUG */
/******************************************************//**
Returns nonzero if the extern bit is set in nth field of rec.
@return nonzero if externally stored */
diff --git a/include/rem0rec.ic b/include/rem0rec.ic
index 3d386710d7d..dc8ed515c30 100644
--- a/include/rem0rec.ic
+++ b/include/rem0rec.ic
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 1994, 2011, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -26,6 +26,7 @@ Created 5/30/1994 Heikki Tuuri
#include "mach0data.h"
#include "ut0byte.h"
#include "dict0dict.h"
+#include "btr0types.h"
/* Compact flag ORed to the extra size returned by rec_get_offsets() */
#define REC_OFFS_COMPACT ((ulint) 1 << 31)
@@ -1087,6 +1088,44 @@ rec_offs_any_extern(
return(UNIV_UNLIKELY(*rec_offs_base(offsets) & REC_OFFS_EXTERNAL));
}
+#ifdef UNIV_BLOB_NULL_DEBUG
+/******************************************************//**
+Determine if the offsets are for a record containing null BLOB pointers.
+@return first field containing a null BLOB pointer, or NULL if none found */
+UNIV_INLINE
+const byte*
+rec_offs_any_null_extern(
+/*=====================*/
+ const rec_t* rec, /*!< in: record */
+ const ulint* offsets) /*!< in: rec_get_offsets(rec) */
+{
+ ulint i;
+ ut_ad(rec_offs_validate(rec, NULL, offsets));
+
+ if (!rec_offs_any_extern(offsets)) {
+ return(NULL);
+ }
+
+ for (i = 0; i < rec_offs_n_fields(offsets); i++) {
+ if (rec_offs_nth_extern(offsets, i)) {
+ ulint len;
+ const byte* field
+ = rec_get_nth_field(rec, offsets, i, &len);
+
+ ut_a(len >= BTR_EXTERN_FIELD_REF_SIZE);
+ if (!memcmp(field + len
+ - BTR_EXTERN_FIELD_REF_SIZE,
+ field_ref_zero,
+ BTR_EXTERN_FIELD_REF_SIZE)) {
+ return(field);
+ }
+ }
+ }
+
+ return(NULL);
+}
+#endif /* UNIV_BLOB_NULL_DEBUG */
+
/******************************************************//**
Returns nonzero if the extern bit is set in nth field of rec.
@return nonzero if externally stored */
diff --git a/include/rem0types.h b/include/rem0types.h
index 8b84d4af233..7afd595be90 100644
--- a/include/rem0types.h
+++ b/include/rem0types.h
@@ -34,13 +34,21 @@ typedef byte rec_t;
#define REC_MAX_HEAP_NO (2 * 8192 - 1)
#define REC_MAX_N_OWNED (16 - 1)
-/* REC_MAX_INDEX_COL_LEN is measured in bytes and is the maximum
-indexed column length (or indexed prefix length). It is set to 3*256,
-so that one can create a column prefix index on 256 characters of a
-TEXT or VARCHAR column also in the UTF-8 charset. In that charset,
-a character may take at most 3 bytes.
+/* REC_ANTELOPE_MAX_INDEX_COL_LEN is measured in bytes and is the maximum
+indexed field length (or indexed prefix length) for indexes on tables of
+ROW_FORMAT=REDUNDANT and ROW_FORMAT=COMPACT format.
+Before we support UTF-8 encodings with mbmaxlen = 4, a UTF-8 character
+may take at most 3 bytes. So the limit was set to 3*256, so that one
+can create a column prefix index on 256 characters of a TEXT or VARCHAR
+column also in the UTF-8 charset.
This constant MUST NOT BE CHANGED, or the compatibility of InnoDB data
files would be at risk! */
-#define REC_MAX_INDEX_COL_LEN 768
+#define REC_ANTELOPE_MAX_INDEX_COL_LEN 768
+
+/** Maximum indexed field length for table format DICT_TF_FORMAT_ZIP and
+beyond.
+This (3072) is the maximum index row length allowed, so we cannot create index
+prefix column longer than that. */
+#define REC_VERSION_56_MAX_INDEX_COL_LEN 3072
#endif
diff --git a/include/row0ext.h b/include/row0ext.h
index 43d82d644e6..557da2c4a82 100644
--- a/include/row0ext.h
+++ b/include/row0ext.h
@@ -30,6 +30,7 @@ Created September 2006 Marko Makela
#include "row0types.h"
#include "data0types.h"
#include "mem0mem.h"
+#include "dict0types.h"
/********************************************************************//**
Creates a cache of column prefixes of externally stored columns.
@@ -43,13 +44,13 @@ row_ext_create(
in the InnoDB table object, as reported by
dict_col_get_no(); NOT relative to the records
in the clustered index */
+ ulint flags, /*!< in: table->flags */
const dtuple_t* tuple, /*!< in: data tuple containing the field
references of the externally stored
columns; must be indexed by col_no;
the clustered index record must be
covered by a lock or a page latch
to prevent deletion (rollback or purge). */
- ulint zip_size,/*!< compressed page size in bytes, or 0 */
mem_heap_t* heap); /*!< in: heap where created */
/********************************************************************//**
@@ -63,7 +64,8 @@ row_ext_lookup_ith(
const row_ext_t* ext, /*!< in/out: column prefix cache */
ulint i, /*!< in: index of ext->ext[] */
ulint* len); /*!< out: length of prefix, in bytes,
- at most REC_MAX_INDEX_COL_LEN */
+ at most the length determined by
+ DICT_MAX_FIELD_LEN_BY_FORMAT() */
/********************************************************************//**
Looks up a column prefix of an externally stored column.
@return column prefix, or NULL if the column is not stored externally,
@@ -78,13 +80,18 @@ row_ext_lookup(
dict_col_get_no(); NOT relative to the
records in the clustered index */
ulint* len); /*!< out: length of prefix, in bytes,
- at most REC_MAX_INDEX_COL_LEN */
+ at most the length determined by
+ DICT_MAX_FIELD_LEN_BY_FORMAT() */
/** Prefixes of externally stored columns */
struct row_ext_struct{
ulint n_ext; /*!< number of externally stored columns */
const ulint* ext; /*!< col_no's of externally stored columns */
byte* buf; /*!< backing store of the column prefix cache */
+ ulint max_len;/*!< maximum prefix length, it could be
+ REC_ANTELOPE_MAX_INDEX_COL_LEN or
+ REC_VERSION_56_MAX_INDEX_COL_LEN depending
+ on row format */
ulint len[1]; /*!< prefix lengths; 0 if not cached */
};
diff --git a/include/row0ext.ic b/include/row0ext.ic
index 82771a9312a..466046b2821 100644
--- a/include/row0ext.ic
+++ b/include/row0ext.ic
@@ -37,7 +37,7 @@ row_ext_lookup_ith(
const row_ext_t* ext, /*!< in/out: column prefix cache */
ulint i, /*!< in: index of ext->ext[] */
ulint* len) /*!< out: length of prefix, in bytes,
- at most REC_MAX_INDEX_COL_LEN */
+ at most ext->max_len */
{
ut_ad(ext);
ut_ad(len);
@@ -45,11 +45,14 @@ row_ext_lookup_ith(
*len = ext->len[i];
+ ut_ad(*len <= ext->max_len);
+ ut_ad(ext->max_len > 0);
+
if (UNIV_UNLIKELY(*len == 0)) {
/* The BLOB could not be fetched to the cache. */
return(field_ref_zero);
} else {
- return(ext->buf + i * REC_MAX_INDEX_COL_LEN);
+ return(ext->buf + i * ext->max_len);
}
}
@@ -67,7 +70,7 @@ row_ext_lookup(
dict_col_get_no(); NOT relative to the
records in the clustered index */
ulint* len) /*!< out: length of prefix, in bytes,
- at most REC_MAX_INDEX_COL_LEN */
+ at most ext->max_len */
{
ulint i;
diff --git a/include/row0row.h b/include/row0row.h
index 110525ecfed..75e15d67246 100644
--- a/include/row0row.h
+++ b/include/row0row.h
@@ -41,13 +41,24 @@ Created 4/20/1996 Heikki Tuuri
Gets the offset of the trx id field, in bytes relative to the origin of
a clustered index record.
@return offset of DATA_TRX_ID */
-UNIV_INTERN
+UNIV_INLINE
ulint
-row_get_trx_id_offset(
-/*==================*/
- const rec_t* rec, /*!< in: record */
- dict_index_t* index, /*!< in: clustered index */
- const ulint* offsets);/*!< in: rec_get_offsets(rec, index) */
+row_get_trx_id_offset_func(
+/*=======================*/
+#ifdef UNIV_DEBUG
+ const rec_t* rec, /*!< in: record */
+#endif /* UNIV_DEBUG */
+ const dict_index_t* index, /*!< in: clustered index */
+ const ulint* offsets)/*!< in: rec_get_offsets(rec, index) */
+ __attribute__((nonnull, warn_unused_result));
+#ifdef UNIV_DEBUG
+# define row_get_trx_id_offset(rec, index, offsets) \
+ row_get_trx_id_offset_func(rec, index, offsets)
+#else /* UNIV_DEBUG */
+# define row_get_trx_id_offset(rec, index, offsets) \
+ row_get_trx_id_offset_func(index, offsets)
+#endif /* UNIV_DEBUG */
+
/*********************************************************************//**
Reads the trx id field from a clustered index record.
@return value of the field */
@@ -55,9 +66,10 @@ UNIV_INLINE
trx_id_t
row_get_rec_trx_id(
/*===============*/
- const rec_t* rec, /*!< in: record */
- dict_index_t* index, /*!< in: clustered index */
- const ulint* offsets);/*!< in: rec_get_offsets(rec, index) */
+ const rec_t* rec, /*!< in: record */
+ const dict_index_t* index, /*!< in: clustered index */
+ const ulint* offsets)/*!< in: rec_get_offsets(rec, index) */
+ __attribute__((nonnull, warn_unused_result));
/*********************************************************************//**
Reads the roll pointer field from a clustered index record.
@return value of the field */
diff --git a/include/row0row.ic b/include/row0row.ic
index 05c007641af..9d19e430e16 100644
--- a/include/row0row.ic
+++ b/include/row0row.ic
@@ -28,15 +28,45 @@ Created 4/20/1996 Heikki Tuuri
#include "trx0undo.h"
/*********************************************************************//**
+Gets the offset of trx id field, in bytes relative to the origin of
+a clustered index record.
+@return offset of DATA_TRX_ID */
+UNIV_INLINE
+ulint
+row_get_trx_id_offset_func(
+/*=======================*/
+#ifdef UNIV_DEBUG
+ const rec_t* rec, /*!< in: record */
+#endif /* UNIV_DEBUG */
+ const dict_index_t* index, /*!< in: clustered index */
+ const ulint* offsets)/*!< in: rec_get_offsets(rec, index) */
+{
+ ulint pos;
+ ulint offset;
+ ulint len;
+
+ ut_ad(dict_index_is_clust(index));
+ ut_ad(rec_offs_validate(rec, index, offsets));
+
+ pos = dict_index_get_sys_col_pos(index, DATA_TRX_ID);
+
+ offset = rec_get_nth_field_offs(offsets, pos, &len);
+
+ ut_ad(len == DATA_TRX_ID_LEN);
+
+ return(offset);
+}
+
+/*********************************************************************//**
Reads the trx id field from a clustered index record.
@return value of the field */
UNIV_INLINE
trx_id_t
row_get_rec_trx_id(
/*===============*/
- const rec_t* rec, /*!< in: record */
- dict_index_t* index, /*!< in: clustered index */
- const ulint* offsets)/*!< in: rec_get_offsets(rec, index) */
+ const rec_t* rec, /*!< in: record */
+ const dict_index_t* index, /*!< in: clustered index */
+ const ulint* offsets)/*!< in: rec_get_offsets(rec, index) */
{
ulint offset;
diff --git a/include/univ.i b/include/univ.i
index eb6f5157d3c..195c09c8163 100644
--- a/include/univ.i
+++ b/include/univ.i
@@ -51,7 +51,7 @@ Created 1/20/1994 Heikki Tuuri
#define INNODB_VERSION_MAJOR 1
#define INNODB_VERSION_MINOR 1
-#define INNODB_VERSION_BUGFIX 7
+#define INNODB_VERSION_BUGFIX 8
#ifndef PERCONA_INNODB_VERSION
#define PERCONA_INNODB_VERSION 20.1
@@ -192,6 +192,8 @@ command. Not tested on Windows. */
debugging without UNIV_DEBUG */
#define UNIV_BLOB_LIGHT_DEBUG /* Enable off-page column
debugging without UNIV_DEBUG */
+#define UNIV_BLOB_NULL_DEBUG /* Enable deep off-page
+ column debugging */
#define UNIV_DEBUG /* Enable ut_ad() assertions
and disable UNIV_INLINE */
#define UNIV_DEBUG_LOCK_VALIDATE /* Enable
diff --git a/lock/lock0lock.c b/lock/lock0lock.c
index db008aa972f..0fcbee32454 100644
--- a/lock/lock0lock.c
+++ b/lock/lock0lock.c
@@ -359,10 +359,8 @@ static
ibool
lock_rec_validate_page(
/*===================*/
- ulint space, /*!< in: space id */
- ulint zip_size,/*!< in: compressed page size in bytes
- or 0 for uncompressed pages */
- ulint page_no);/*!< in: page number */
+ const buf_block_t* block) /*!< in: buffer block */
+ __attribute__((nonnull, warn_unused_result));
#endif /* UNIV_DEBUG */
/* The lock system */
@@ -1101,10 +1099,10 @@ lock_rec_reset_nth_bit(
Gets the first or next record lock on a page.
@return next lock, NULL if none exists */
UNIV_INLINE
-lock_t*
-lock_rec_get_next_on_page(
-/*======================*/
- lock_t* lock) /*!< in: a record lock */
+const lock_t*
+lock_rec_get_next_on_page_const(
+/*============================*/
+ const lock_t* lock) /*!< in: a record lock */
{
ulint space;
ulint page_no;
@@ -1134,6 +1132,18 @@ lock_rec_get_next_on_page(
}
/*********************************************************************//**
+Gets the first or next record lock on a page.
+@return next lock, NULL if none exists */
+UNIV_INLINE
+lock_t*
+lock_rec_get_next_on_page(
+/*======================*/
+ lock_t* lock) /*!< in: a record lock */
+{
+ return((lock_t*) lock_rec_get_next_on_page_const(lock));
+}
+
+/*********************************************************************//**
Gets the first record lock on a page, where the page is identified by its
file address.
@return first lock, NULL if none exists */
@@ -2655,9 +2665,7 @@ lock_move_reorganize_page(
mem_heap_free(heap);
#ifdef UNIV_DEBUG_LOCK_VALIDATE
- ut_ad(lock_rec_validate_page(buf_block_get_space(block),
- buf_block_get_zip_size(block),
- buf_block_get_page_no(block)));
+ ut_ad(lock_rec_validate_page(block));
#endif
}
@@ -2745,12 +2753,8 @@ lock_move_rec_list_end(
lock_mutex_exit_kernel();
#ifdef UNIV_DEBUG_LOCK_VALIDATE
- ut_ad(lock_rec_validate_page(buf_block_get_space(block),
- buf_block_get_zip_size(block),
- buf_block_get_page_no(block)));
- ut_ad(lock_rec_validate_page(buf_block_get_space(new_block),
- buf_block_get_zip_size(block),
- buf_block_get_page_no(new_block)));
+ ut_ad(lock_rec_validate_page(block));
+ ut_ad(lock_rec_validate_page(new_block));
#endif
}
@@ -2858,9 +2862,7 @@ lock_move_rec_list_start(
lock_mutex_exit_kernel();
#ifdef UNIV_DEBUG_LOCK_VALIDATE
- ut_ad(lock_rec_validate_page(buf_block_get_space(block),
- buf_block_get_zip_size(block),
- buf_block_get_page_no(block)));
+ ut_ad(lock_rec_validate_page(block));
#endif
}
@@ -3850,17 +3852,18 @@ Checks if other transactions have an incompatible mode lock request in
the lock queue.
@return lock or NULL */
UNIV_INLINE
-lock_t*
+const lock_t*
lock_table_other_has_incompatible(
/*==============================*/
- trx_t* trx, /*!< in: transaction, or NULL if all
- transactions should be included */
- ulint wait, /*!< in: LOCK_WAIT if also waiting locks are
- taken into account, or 0 if not */
- dict_table_t* table, /*!< in: table */
- enum lock_mode mode) /*!< in: lock mode */
+ const trx_t* trx, /*!< in: transaction, or NULL if all
+ transactions should be included */
+ ulint wait, /*!< in: LOCK_WAIT if also
+ waiting locks are taken into
+ account, or 0 if not */
+ const dict_table_t* table, /*!< in: table */
+ enum lock_mode mode) /*!< in: lock mode */
{
- lock_t* lock;
+ const lock_t* lock;
ut_ad(mutex_own(&kernel_mutex));
@@ -3951,10 +3954,10 @@ static
ibool
lock_table_has_to_wait_in_queue(
/*============================*/
- lock_t* wait_lock) /*!< in: waiting table lock */
+ const lock_t* wait_lock) /*!< in: waiting table lock */
{
- dict_table_t* table;
- lock_t* lock;
+ const dict_table_t* table;
+ const lock_t* lock;
ut_ad(mutex_own(&kernel_mutex));
ut_ad(lock_get_wait(wait_lock));
@@ -4696,9 +4699,9 @@ static
ibool
lock_table_queue_validate(
/*======================*/
- dict_table_t* table) /*!< in: table */
+ const dict_table_t* table) /*!< in: table */
{
- lock_t* lock;
+ const lock_t* lock;
ut_ad(mutex_own(&kernel_mutex));
@@ -4734,7 +4737,7 @@ lock_rec_queue_validate(
/*====================*/
const buf_block_t* block, /*!< in: buffer block containing rec */
const rec_t* rec, /*!< in: record to look at */
- dict_index_t* index, /*!< in: index, or NULL if not known */
+ const dict_index_t* index, /*!< in: index, or NULL if not known */
const ulint* offsets)/*!< in: rec_get_offsets(rec, index) */
{
trx_t* impl_trx;
@@ -4883,46 +4886,37 @@ static
ibool
lock_rec_validate_page(
/*===================*/
- ulint space, /*!< in: space id */
- ulint zip_size,/*!< in: compressed page size in bytes
- or 0 for uncompressed pages */
- ulint page_no)/*!< in: page number */
+ const buf_block_t* block) /*!< in: buffer block */
{
- dict_index_t* index;
- buf_block_t* block;
- const page_t* page;
- lock_t* lock;
+ const lock_t* lock;
const rec_t* rec;
ulint nth_lock = 0;
ulint nth_bit = 0;
ulint i;
- mtr_t mtr;
mem_heap_t* heap = NULL;
ulint offsets_[REC_OFFS_NORMAL_SIZE];
ulint* offsets = offsets_;
rec_offs_init(offsets_);
ut_ad(!mutex_own(&kernel_mutex));
-
- mtr_start(&mtr);
-
- ut_ad(zip_size != ULINT_UNDEFINED);
- block = buf_page_get(space, zip_size, page_no, RW_X_LATCH, &mtr);
- buf_block_dbg_add_level(block, SYNC_NO_ORDER_CHECK);
-
- page = block->frame;
+ ut_ad(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
lock_mutex_enter_kernel();
loop:
- lock = lock_rec_get_first_on_page_addr(space, page_no);
+ lock = lock_rec_get_first_on_page_addr(buf_block_get_space(block),
+ buf_block_get_page_no(block));
if (!lock) {
goto function_exit;
}
+#if defined UNIV_DEBUG_FILE_ACCESSES || defined UNIV_DEBUG
+ ut_a(!block->page.file_page_was_freed);
+#endif
+
for (i = 0; i < nth_lock; i++) {
- lock = lock_rec_get_next_on_page(lock);
+ lock = lock_rec_get_next_on_page_const(lock);
if (!lock) {
goto function_exit;
@@ -4945,15 +4939,14 @@ loop:
if (i == 1 || lock_rec_get_nth_bit(lock, i)) {
- index = lock->index;
- rec = page_find_rec_with_heap_no(page, i);
+ rec = page_find_rec_with_heap_no(block->frame, i);
ut_a(rec);
- offsets = rec_get_offsets(rec, index, offsets,
+ offsets = rec_get_offsets(rec, lock->index, offsets,
ULINT_UNDEFINED, &heap);
#if 0
fprintf(stderr,
- "Validating %lu %lu\n",
- (ulong) space, (ulong) page_no);
+ "Validating %u %u\n",
+ block->page.space, block->page.offset);
#endif
lock_mutex_exit_kernel();
@@ -4962,7 +4955,8 @@ loop:
check WILL break the latching order and may
cause a deadlock of threads. */
- lock_rec_queue_validate(block, rec, index, offsets);
+ lock_rec_queue_validate(block, rec, lock->index,
+ offsets);
lock_mutex_enter_kernel();
@@ -4980,8 +4974,6 @@ loop:
function_exit:
lock_mutex_exit_kernel();
- mtr_commit(&mtr);
-
if (UNIV_LIKELY_NULL(heap)) {
mem_heap_free(heap);
}
@@ -4996,11 +4988,8 @@ ibool
lock_validate(void)
/*===============*/
{
- lock_t* lock;
- trx_t* trx;
- ib_uint64_t limit;
- ulint space;
- ulint page_no;
+ const lock_t* lock;
+ const trx_t* trx;
ulint i;
lock_mutex_enter_kernel();
@@ -5025,9 +5014,14 @@ lock_validate(void)
for (i = 0; i < hash_get_n_cells(lock_sys->rec_hash); i++) {
- limit = 0;
+ ulint space;
+ ulint page_no;
+ ib_uint64_t limit = 0;
for (;;) {
+ mtr_t mtr;
+ buf_block_t* block;
+
lock = HASH_GET_FIRST(lock_sys->rec_hash, i);
while (lock) {
@@ -5053,13 +5047,26 @@ lock_validate(void)
lock_mutex_exit_kernel();
- lock_rec_validate_page(space,
- fil_space_get_zip_size(space),
- page_no);
+ /* The lock and the block that it is referring
+ to may be freed at this point. We pass
+ BUF_GET_POSSIBLY_FREED to skip a debug check.
+ If the lock exists in lock_rec_validate_page()
+ we assert !block->page.file_page_was_freed. */
- lock_mutex_enter_kernel();
+ mtr_start(&mtr);
+ block = buf_page_get_gen(
+ space, fil_space_get_zip_size(space),
+ page_no, RW_X_LATCH, NULL,
+ BUF_GET_POSSIBLY_FREED,
+ __FILE__, __LINE__, &mtr);
+ buf_block_dbg_add_level(block, SYNC_NO_ORDER_CHECK);
+
+ ut_ad(lock_rec_validate_page(block));
+ mtr_commit(&mtr);
+
+ limit++;
- limit = ut_ull_create(space, page_no + 1);
+ lock_mutex_enter_kernel();
}
}
diff --git a/log/log0log.c b/log/log0log.c
index bb3b7f3feb6..ae5f071261d 100644
--- a/log/log0log.c
+++ b/log/log0log.c
@@ -1133,7 +1133,7 @@ log_io_complete(
&& srv_unix_file_flush_method != SRV_UNIX_ALL_O_DIRECT
&& srv_unix_file_flush_method != SRV_UNIX_NOSYNC) {
- fil_flush(group->space_id);
+ fil_flush(group->space_id, FALSE);
}
#ifdef UNIV_DEBUG
@@ -1156,7 +1156,7 @@ log_io_complete(
&& srv_unix_file_flush_method != SRV_UNIX_NOSYNC
&& thd_flush_log_at_trx_commit(NULL) != 2) {
- fil_flush(group->space_id);
+ fil_flush(group->space_id, FALSE);
}
mutex_enter(&(log_sys->mutex));
@@ -1547,7 +1547,7 @@ loop:
group = UT_LIST_GET_FIRST(log_sys->log_groups);
- fil_flush(group->space_id);
+ fil_flush(group->space_id, FALSE);
log_sys->flushed_to_disk_lsn = log_sys->write_lsn;
}
@@ -2644,7 +2644,7 @@ log_io_complete_archive(void)
mutex_exit(&(log_sys->mutex));
- fil_flush(group->archive_space_id);
+ fil_flush(group->archive_space_id, TRUE);
mutex_enter(&(log_sys->mutex));
diff --git a/log/log0recv.c b/log/log0recv.c
index 256b29cb777..e0952a1ed0b 100644
--- a/log/log0recv.c
+++ b/log/log0recv.c
@@ -3643,7 +3643,7 @@ recv_reset_log_files_for_backup(
exit(1);
}
- os_file_flush(log_file);
+ os_file_flush(log_file, TRUE);
os_file_close(log_file);
}
@@ -3667,7 +3667,7 @@ recv_reset_log_files_for_backup(
os_file_write(name, log_file, buf, 0, 0,
LOG_FILE_HDR_SIZE + OS_FILE_LOG_BLOCK_SIZE);
- os_file_flush(log_file);
+ os_file_flush(log_file, TRUE);
os_file_close(log_file);
ut_free(buf);
diff --git a/mtr/mtr0mtr.c b/mtr/mtr0mtr.c
index 88e698ed818..439b429db43 100644
--- a/mtr/mtr0mtr.c
+++ b/mtr/mtr0mtr.c
@@ -265,9 +265,20 @@ mtr_commit(
mtr_memo_pop_all(mtr);
#endif /* !UNIV_HOTBACKUP */
- ut_d(mtr->state = MTR_COMMITTED);
dyn_array_free(&(mtr->memo));
dyn_array_free(&(mtr->log));
+#ifdef UNIV_DEBUG_VALGRIND
+ /* Declare everything uninitialized except
+ mtr->start_lsn, mtr->end_lsn and mtr->state. */
+ {
+ ib_uint64_t start_lsn = mtr->start_lsn;
+ ib_uint64_t end_lsn = mtr->end_lsn;
+ UNIV_MEM_INVALID(mtr, sizeof *mtr);
+ mtr->start_lsn = start_lsn;
+ mtr->end_lsn = end_lsn;
+ }
+#endif /* UNIV_DEBUG_VALGRIND */
+ ut_d(mtr->state = MTR_COMMITTED);
}
#ifndef UNIV_HOTBACKUP
diff --git a/os/os0file.c b/os/os0file.c
index 6ca8a953182..835210140f8 100644
--- a/os/os0file.c
+++ b/os/os0file.c
@@ -2017,7 +2017,7 @@ os_file_set_size(
ut_free(buf2);
- ret = os_file_flush(file);
+ ret = os_file_flush(file, TRUE);
if (ret) {
return(TRUE);
@@ -2055,7 +2055,8 @@ static
int
os_file_fsync(
/*==========*/
- os_file_t file) /*!< in: handle to a file */
+ os_file_t file, /*!< in: handle to a file */
+ ibool metadata)
{
int ret;
int failures;
@@ -2064,7 +2065,16 @@ os_file_fsync(
failures = 0;
do {
+#if defined(HAVE_FDATASYNC) && HAVE_DECL_FDATASYNC
+ if (metadata) {
+ ret = fsync(file);
+ } else {
+ ret = fdatasync(file);
+ }
+#else
+ (void) metadata;
ret = fsync(file);
+#endif
os_n_fsyncs++;
@@ -2104,7 +2114,8 @@ UNIV_INTERN
ibool
os_file_flush_func(
/*===============*/
- os_file_t file) /*!< in, own: handle to a file */
+ os_file_t file, /*!< in, own: handle to a file */
+ ibool metadata)
{
#ifdef __WIN__
BOOL ret;
@@ -2154,18 +2165,18 @@ os_file_flush_func(
/* If we are not on an operating system that supports this,
then fall back to a plain fsync. */
- ret = os_file_fsync(file);
+ ret = os_file_fsync(file, metadata);
} else {
ret = fcntl(file, F_FULLFSYNC, NULL);
if (ret) {
/* If we are not on a file system that supports this,
then fall back to a plain fsync. */
- ret = os_file_fsync(file);
+ ret = os_file_fsync(file, metadata);
}
}
#else
- ret = os_file_fsync(file);
+ ret = os_file_fsync(file, metadata);
#endif
if (ret == 0) {
@@ -2411,7 +2422,7 @@ os_file_pwrite(
the OS crashes, a database page is only partially
physically written to disk. */
- ut_a(TRUE == os_file_flush(file));
+ ut_a(TRUE == os_file_flush(file, TRUE));
}
# endif /* UNIV_DO_FLUSH */
@@ -2463,7 +2474,7 @@ os_file_pwrite(
the OS crashes, a database page is only partially
physically written to disk. */
- ut_a(TRUE == os_file_flush(file));
+ ut_a(TRUE == os_file_flush(file, TRUE));
}
# endif /* UNIV_DO_FLUSH */
@@ -2836,7 +2847,7 @@ retry:
# ifdef UNIV_DO_FLUSH
if (!os_do_not_call_flush_at_each_write) {
- ut_a(TRUE == os_file_flush(file));
+ ut_a(TRUE == os_file_flush(file, TRUE));
}
# endif /* UNIV_DO_FLUSH */
@@ -4141,7 +4152,13 @@ os_aio_func(
Windows async i/o, Windows does not allow us to use
ordinary synchronous os_file_read etc. on the same file,
therefore we have built a special mechanism for synchronous
- wait in the Windows case. */
+ wait in the Windows case.
+ Also note that the Performance Schema instrumentation has
+ been performed by current os_aio_func()'s wrapper function
+ pfs_os_aio_func(). So we would no longer need to call
+ Performance Schema instrumented os_file_read() and
+ os_file_write(). Instead, we should use os_file_read_func()
+ and os_file_write_func() */
if (type == OS_FILE_READ) {
return(os_file_read_trx(file, buf, offset,
@@ -4150,7 +4167,8 @@ os_aio_func(
ut_a(type == OS_FILE_WRITE);
- return(os_file_write(name, file, buf, offset, offset_high, n));
+ return(os_file_write_func(name, file, buf, offset,
+ offset_high, n));
}
try_again:
@@ -4398,7 +4416,7 @@ os_aio_windows_handle(
#ifdef UNIV_DO_FLUSH
if (slot->type == OS_FILE_WRITE
&& !os_do_not_call_flush_at_each_write) {
- if (!os_file_flush(slot->file)) {
+ if (!os_file_flush(slot->file, TRUE)) {
ut_error;
}
}
@@ -4701,7 +4719,7 @@ found:
#ifdef UNIV_DO_FLUSH
if (slot->type == OS_FILE_WRITE
&& !os_do_not_call_flush_at_each_write)
- && !os_file_flush(slot->file) {
+ && !os_file_flush(slot->file, TRUE) {
ut_error;
}
#endif /* UNIV_DO_FLUSH */
diff --git a/page/page0page.c b/page/page0page.c
index 7d33d3e2c50..17f6c6bbc36 100644
--- a/page/page0page.c
+++ b/page/page0page.c
@@ -1253,28 +1253,6 @@ page_move_rec_list_start(
return(TRUE);
}
-
-/***********************************************************************//**
-This is a low-level operation which is used in a database index creation
-to update the page number of a created B-tree to a data dictionary record. */
-UNIV_INTERN
-void
-page_rec_write_index_page_no(
-/*=========================*/
- rec_t* rec, /*!< in: record to update */
- ulint i, /*!< in: index of the field to update */
- ulint page_no,/*!< in: value to write */
- mtr_t* mtr) /*!< in: mtr */
-{
- byte* data;
- ulint len;
-
- data = rec_get_nth_field_old(rec, i, &len);
-
- ut_ad(len == 4);
-
- mlog_write_ulint(data, page_no, MLOG_4BYTES, mtr);
-}
#endif /* !UNIV_HOTBACKUP */
/**************************************************************//**
diff --git a/page/page0zip.c b/page/page0zip.c
index d5a290f9bba..b81cba4826a 100644
--- a/page/page0zip.c
+++ b/page/page0zip.c
@@ -464,7 +464,7 @@ page_zip_fields_encode(
if (fixed_sum && UNIV_UNLIKELY
(fixed_sum + field->fixed_len
- > DICT_MAX_INDEX_COL_LEN)) {
+ > DICT_MAX_FIXED_COL_LEN)) {
/* Write out the length of the
preceding non-nullable fields,
to avoid exceeding the maximum
diff --git a/percona-suite/percona_mysqldump_innodb_optimize_keys.result b/percona-suite/percona_mysqldump_innodb_optimize_keys.result
deleted file mode 100644
index e657e3de22a..00000000000
--- a/percona-suite/percona_mysqldump_innodb_optimize_keys.result
+++ /dev/null
@@ -1,109 +0,0 @@
-#
-# Test the --innodb-optimize-keys option.
-#
-CREATE TABLE t1 (a INT NOT NULL PRIMARY KEY, b INT, KEY(b)) ENGINE=MyISAM;
-######################################
-
-/*!40101 SET @OLD_CHARACTER_SET_CLIENT=@@CHARACTER_SET_CLIENT */;
-/*!40101 SET @OLD_CHARACTER_SET_RESULTS=@@CHARACTER_SET_RESULTS */;
-/*!40101 SET @OLD_COLLATION_CONNECTION=@@COLLATION_CONNECTION */;
-/*!40101 SET NAMES utf8 */;
-/*!40103 SET @OLD_TIME_ZONE=@@TIME_ZONE */;
-/*!40103 SET TIME_ZONE='+00:00' */;
-/*!40014 SET @OLD_UNIQUE_CHECKS=@@UNIQUE_CHECKS, UNIQUE_CHECKS=0 */;
-/*!40014 SET @OLD_FOREIGN_KEY_CHECKS=@@FOREIGN_KEY_CHECKS, FOREIGN_KEY_CHECKS=0 */;
-/*!40101 SET @OLD_SQL_MODE=@@SQL_MODE, SQL_MODE='NO_AUTO_VALUE_ON_ZERO' */;
-/*!40111 SET @OLD_SQL_NOTES=@@SQL_NOTES, SQL_NOTES=0 */;
-DROP TABLE IF EXISTS `t1`;
-/*!40101 SET @saved_cs_client = @@character_set_client */;
-/*!40101 SET character_set_client = utf8 */;
-CREATE TABLE `t1` (
- `a` int(11) NOT NULL,
- `b` int(11) DEFAULT NULL,
- PRIMARY KEY (`a`),
- KEY `b` (`b`)
-) ENGINE=MyISAM DEFAULT CHARSET=latin1;
-/*!40101 SET character_set_client = @saved_cs_client */;
-
-LOCK TABLES `t1` WRITE;
-/*!40000 ALTER TABLE `t1` DISABLE KEYS */;
-/*!40000 ALTER TABLE `t1` ENABLE KEYS */;
-UNLOCK TABLES;
-/*!40103 SET TIME_ZONE=@OLD_TIME_ZONE */;
-
-/*!40101 SET SQL_MODE=@OLD_SQL_MODE */;
-/*!40014 SET FOREIGN_KEY_CHECKS=@OLD_FOREIGN_KEY_CHECKS */;
-/*!40014 SET UNIQUE_CHECKS=@OLD_UNIQUE_CHECKS */;
-/*!40101 SET CHARACTER_SET_CLIENT=@OLD_CHARACTER_SET_CLIENT */;
-/*!40101 SET CHARACTER_SET_RESULTS=@OLD_CHARACTER_SET_RESULTS */;
-/*!40101 SET COLLATION_CONNECTION=@OLD_COLLATION_CONNECTION */;
-/*!40111 SET SQL_NOTES=@OLD_SQL_NOTES */;
-
-######################################
-DROP TABLE t1;
-CREATE TABLE t2 (a INT PRIMARY KEY) ENGINE=InnoDB;
-INSERT INTO t2 VALUES (0), (1), (2);
-CREATE TABLE t1 (
-id INT NOT NULL AUTO_INCREMENT PRIMARY KEY,
-a INT, b VARCHAR(255), c DECIMAL(10,3),
-KEY (b),
-UNIQUE KEY uniq(c,a),
-FOREIGN KEY (a) REFERENCES t2(a) ON DELETE CASCADE
-) ENGINE=InnoDB;
-INSERT INTO t1(a,b,c) VALUES (0, "0", 0.0), (1, "1", 1.1), (2, "2", 2.2);
-######################################
-
-/*!40101 SET @OLD_CHARACTER_SET_CLIENT=@@CHARACTER_SET_CLIENT */;
-/*!40101 SET @OLD_CHARACTER_SET_RESULTS=@@CHARACTER_SET_RESULTS */;
-/*!40101 SET @OLD_COLLATION_CONNECTION=@@COLLATION_CONNECTION */;
-/*!40101 SET NAMES utf8 */;
-/*!40103 SET @OLD_TIME_ZONE=@@TIME_ZONE */;
-/*!40103 SET TIME_ZONE='+00:00' */;
-/*!40014 SET @OLD_UNIQUE_CHECKS=@@UNIQUE_CHECKS, UNIQUE_CHECKS=0 */;
-/*!40014 SET @OLD_FOREIGN_KEY_CHECKS=@@FOREIGN_KEY_CHECKS, FOREIGN_KEY_CHECKS=0 */;
-/*!40101 SET @OLD_SQL_MODE=@@SQL_MODE, SQL_MODE='NO_AUTO_VALUE_ON_ZERO' */;
-/*!40111 SET @OLD_SQL_NOTES=@@SQL_NOTES, SQL_NOTES=0 */;
-DROP TABLE IF EXISTS `t1`;
-/*!40101 SET @saved_cs_client = @@character_set_client */;
-/*!40101 SET character_set_client = utf8 */;
-CREATE TABLE `t1` (
- `id` int(11) NOT NULL AUTO_INCREMENT,
- `a` int(11) DEFAULT NULL,
- `b` varchar(255) DEFAULT NULL,
- `c` decimal(10,3) DEFAULT NULL,
- PRIMARY KEY (`id`)
-) ENGINE=InnoDB AUTO_INCREMENT=4 DEFAULT CHARSET=latin1;
-/*!40101 SET character_set_client = @saved_cs_client */;
-
-LOCK TABLES `t1` WRITE;
-/*!40000 ALTER TABLE `t1` DISABLE KEYS */;
-INSERT INTO `t1` VALUES (1,0,'0',0.000),(2,1,'1',1.100),(3,2,'2',2.200);
-ALTER TABLE `t1` ADD UNIQUE KEY `uniq` (`c`,`a`), ADD KEY `b` (`b`), ADD KEY `a` (`a`), ADD CONSTRAINT `t1_ibfk_1` FOREIGN KEY (`a`) REFERENCES `t2` (`a`) ON DELETE CASCADE;
-/*!40000 ALTER TABLE `t1` ENABLE KEYS */;
-UNLOCK TABLES;
-DROP TABLE IF EXISTS `t2`;
-/*!40101 SET @saved_cs_client = @@character_set_client */;
-/*!40101 SET character_set_client = utf8 */;
-CREATE TABLE `t2` (
- `a` int(11) NOT NULL,
- PRIMARY KEY (`a`)
-) ENGINE=InnoDB DEFAULT CHARSET=latin1;
-/*!40101 SET character_set_client = @saved_cs_client */;
-
-LOCK TABLES `t2` WRITE;
-/*!40000 ALTER TABLE `t2` DISABLE KEYS */;
-INSERT INTO `t2` VALUES (0),(1),(2);
-/*!40000 ALTER TABLE `t2` ENABLE KEYS */;
-UNLOCK TABLES;
-/*!40103 SET TIME_ZONE=@OLD_TIME_ZONE */;
-
-/*!40101 SET SQL_MODE=@OLD_SQL_MODE */;
-/*!40014 SET FOREIGN_KEY_CHECKS=@OLD_FOREIGN_KEY_CHECKS */;
-/*!40014 SET UNIQUE_CHECKS=@OLD_UNIQUE_CHECKS */;
-/*!40101 SET CHARACTER_SET_CLIENT=@OLD_CHARACTER_SET_CLIENT */;
-/*!40101 SET CHARACTER_SET_RESULTS=@OLD_CHARACTER_SET_RESULTS */;
-/*!40101 SET COLLATION_CONNECTION=@OLD_COLLATION_CONNECTION */;
-/*!40111 SET SQL_NOTES=@OLD_SQL_NOTES */;
-
-######################################
-DROP TABLE t1, t2;
diff --git a/percona-suite/percona_mysqldump_innodb_optimize_keys.test b/percona-suite/percona_mysqldump_innodb_optimize_keys.test
deleted file mode 100644
index 5678830a169..00000000000
--- a/percona-suite/percona_mysqldump_innodb_optimize_keys.test
+++ /dev/null
@@ -1,62 +0,0 @@
-# Embedded server doesn't support external clients
---source include/not_embedded.inc
-
-# Fast index creation is only available in InnoDB plugin
---source include/have_innodb.inc
-
-# Save the initial number of concurrent sessions
---source include/count_sessions.inc
-
---echo #
---echo # Test the --innodb-optimize-keys option.
---echo #
-
---let $file=$MYSQLTEST_VARDIR/tmp/t1.sql
-
-# First test that the option has no effect on non-InnoDB tables
-
-CREATE TABLE t1 (a INT NOT NULL PRIMARY KEY, b INT, KEY(b)) ENGINE=MyISAM;
-
---exec $MYSQL_DUMP --skip-comments --innodb-optimize-keys test t1 >$file
-
---echo ######################################
---cat_file $file
---echo ######################################
-
---remove_file $file
-
-DROP TABLE t1;
-
-
-# Check that for InnoDB tables secondary and foreign keys are created
-# after the data is dumped
-
-CREATE TABLE t2 (a INT PRIMARY KEY) ENGINE=InnoDB;
-INSERT INTO t2 VALUES (0), (1), (2);
-
-CREATE TABLE t1 (
- id INT NOT NULL AUTO_INCREMENT PRIMARY KEY,
- a INT, b VARCHAR(255), c DECIMAL(10,3),
- KEY (b),
- UNIQUE KEY uniq(c,a),
- FOREIGN KEY (a) REFERENCES t2(a) ON DELETE CASCADE
-) ENGINE=InnoDB;
-
-INSERT INTO t1(a,b,c) VALUES (0, "0", 0.0), (1, "1", 1.1), (2, "2", 2.2);
-
---exec $MYSQL_DUMP --skip-comments --innodb-optimize-keys test t1 t2 >$file
-
---echo ######################################
---cat_file $file
---echo ######################################
-
-# Check that the resulting dump can be imported back
-
---exec $MYSQL test < $file
-
---remove_file $file
-
-DROP TABLE t1, t2;
-
-# Wait till we reached the initial number of concurrent sessions
---source include/wait_until_count_sessions.inc
diff --git a/percona-suite/percona_query_cache_with_comments.inc.backup b/percona-suite/percona_query_cache_with_comments.inc.backup
new file mode 100644
index 00000000000..4b5b31e9239
--- /dev/null
+++ b/percona-suite/percona_query_cache_with_comments.inc.backup
@@ -0,0 +1,88 @@
+--source include/percona_query_cache_with_comments_clear.inc
+let $query=/* with comment first */select * from t1;
+eval $query;
+--source include/percona_query_cache_with_comments_eval.inc
+
+let $query=# with comment first
+select * from t1;
+--source include/percona_query_cache_with_comments_eval.inc
+
+let $query=-- with comment first
+select * from t1;
+--source include/percona_query_cache_with_comments_eval.inc
+
+let $query=/* with comment first and "quote" */select * from t1;
+--source include/percona_query_cache_with_comments_eval.inc
+
+let $query=# with comment first and "quote"
+select * from t1;
+--source include/percona_query_cache_with_comments_eval.inc
+
+let $query=-- with comment first and "quote"
+select * from t1;
+--source include/percona_query_cache_with_comments_eval.inc
+
+let $query=
+ /* with comment and whitespaces first */select * from t1;
+--source include/percona_query_cache_with_comments_eval.inc
+
+let $query=
+ # with comment and whitespaces first
+select * from t1;
+--source include/percona_query_cache_with_comments_eval.inc
+
+let $query=
+ -- with comment and whitespaces first
+select * from t1;
+--source include/percona_query_cache_with_comments_eval.inc
+
+let $internal=* internal comment *;
+
+let $query=select * /$internal/ from t1;
+--source include/percona_query_cache_with_comments_eval.inc
+let $query=select */$internal/ from t1;
+--source include/percona_query_cache_with_comments_eval.inc
+let $query=select */$internal/from t1;
+--source include/percona_query_cache_with_comments_eval.inc
+
+let $internal=* internal comment with "quote" *;
+
+let $query=select * /$internal/ from t1;
+--source include/percona_query_cache_with_comments_eval.inc
+let $query=select */$internal/ from t1;
+--source include/percona_query_cache_with_comments_eval.inc
+let $query=select */$internal/from t1;
+--source include/percona_query_cache_with_comments_eval.inc
+
+let $query=select * from t1
+;
+--source include/percona_query_cache_with_comments_eval.inc
+
+let $query=select * from t1 ;
+--source include/percona_query_cache_with_comments_eval.inc
+
+let $query=select * from t1 ;
+--source include/percona_query_cache_with_comments_eval.inc
+
+let $query=select * from t1
+/* comment in the end */;
+--source include/percona_query_cache_with_comments_eval.inc
+
+let $query=select * from t1
+/* comment in the end */
+;
+--source include/percona_query_cache_with_comments_eval.inc
+
+let $query=select * from t1 #comment in the end;
+--source include/percona_query_cache_with_comments_eval.inc
+
+let $query=select * from t1 #comment in the end
+;
+--source include/percona_query_cache_with_comments_eval.inc
+
+let $query=select * from t1 -- comment in the end;
+--source include/percona_query_cache_with_comments_eval.inc
+
+let $query=select * from t1 -- comment in the end
+;
+--source include/percona_query_cache_with_comments_eval.inc
diff --git a/percona-suite/percona_query_response_time-replication.result b/percona-suite/percona_query_response_time-replication.result
new file mode 100644
index 00000000000..fd06d07b4d8
--- /dev/null
+++ b/percona-suite/percona_query_response_time-replication.result
@@ -0,0 +1,66 @@
+include/master-slave.inc
+[connection master]
+DROP TABLE IF EXISTS t;
+CREATE TABLE t(id INT);
+SELECT * from t;
+id
+SELECT * from t;
+id
+SET GLOBAL QUERY_RESPONSE_TIME_RANGE_BASE = 1;
+Warnings:
+Warning 1292 Truncated incorrect query_response_time_range_base value: '1'
+SHOW GLOBAL VARIABLES where Variable_name like 'QUERY_RESPONSE_TIME_RANGE_BASE';
+Variable_name Value
+query_response_time_range_base 2
+SET GLOBAL QUERY_RESPONSE_TIME_RANGE_BASE = 10;
+SHOW GLOBAL VARIABLES where Variable_name like 'QUERY_RESPONSE_TIME_RANGE_BASE';
+Variable_name Value
+query_response_time_range_base 10
+FLUSH QUERY_RESPONSE_TIME;
+SET GLOBAL QUERY_RESPONSE_TIME_STATS=ON;
+INSERT INTO t SELECT SLEEP(0.4);
+Warnings:
+Note 1592 Unsafe statement written to the binary log using statement format since BINLOG_FORMAT = STATEMENT. Statement is unsafe because it uses a system function that may return a different value on the slave.
+SELECT SUM(INFORMATION_SCHEMA.QUERY_RESPONSE_TIME.count) FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME;
+SUM(INFORMATION_SCHEMA.QUERY_RESPONSE_TIME.count)
+0
+INSERT INTO t SELECT SLEEP(0.4);
+Warnings:
+Note 1592 Unsafe statement written to the binary log using statement format since BINLOG_FORMAT = STATEMENT. Statement is unsafe because it uses a system function that may return a different value on the slave.
+SELECT SUM(INFORMATION_SCHEMA.QUERY_RESPONSE_TIME.count) FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME;
+SUM(INFORMATION_SCHEMA.QUERY_RESPONSE_TIME.count)
+0
+SELECT SUM(INFORMATION_SCHEMA.QUERY_RESPONSE_TIME.count) FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME;
+SUM(INFORMATION_SCHEMA.QUERY_RESPONSE_TIME.count)
+2
+SELECT SUM(INFORMATION_SCHEMA.QUERY_RESPONSE_TIME.count) FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME;
+SUM(INFORMATION_SCHEMA.QUERY_RESPONSE_TIME.count)
+3
+SET GLOBAL QUERY_RESPONSE_TIME_RANGE_BASE = 2;
+SHOW GLOBAL VARIABLES where Variable_name like 'QUERY_RESPONSE_TIME_RANGE_BASE';
+Variable_name Value
+query_response_time_range_base 2
+FLUSH QUERY_RESPONSE_TIME;
+INSERT INTO t SELECT SLEEP(0.4);
+Warnings:
+Note 1592 Unsafe statement written to the binary log using statement format since BINLOG_FORMAT = STATEMENT. Statement is unsafe because it uses a system function that may return a different value on the slave.
+SELECT SUM(INFORMATION_SCHEMA.QUERY_RESPONSE_TIME.count) FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME;
+SUM(INFORMATION_SCHEMA.QUERY_RESPONSE_TIME.count)
+0
+INSERT INTO t SELECT SLEEP(0.4);
+Warnings:
+Note 1592 Unsafe statement written to the binary log using statement format since BINLOG_FORMAT = STATEMENT. Statement is unsafe because it uses a system function that may return a different value on the slave.
+SELECT SUM(INFORMATION_SCHEMA.QUERY_RESPONSE_TIME.count) FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME;
+SUM(INFORMATION_SCHEMA.QUERY_RESPONSE_TIME.count)
+0
+SELECT SUM(INFORMATION_SCHEMA.QUERY_RESPONSE_TIME.count) FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME;
+SUM(INFORMATION_SCHEMA.QUERY_RESPONSE_TIME.count)
+2
+SELECT SUM(INFORMATION_SCHEMA.QUERY_RESPONSE_TIME.count) FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME;
+SUM(INFORMATION_SCHEMA.QUERY_RESPONSE_TIME.count)
+3
+DROP TABLE IF EXISTS t;
+SET GLOBAL QUERY_RESPONSE_TIME_RANGE_BASE = 10;
+SET GLOBAL QUERY_RESPONSE_TIME_STATS=OFF;
+STOP SLAVE;
+include/wait_for_slave_to_stop.inc
diff --git a/percona-suite/percona_query_response_time-replication.test b/percona-suite/percona_query_response_time-replication.test
new file mode 100644
index 00000000000..199ecce0977
--- /dev/null
+++ b/percona-suite/percona_query_response_time-replication.test
@@ -0,0 +1,61 @@
+--source include/have_response_time_distribution.inc
+--source include/master-slave.inc
+--source include/have_binlog_format_statement.inc
+--source include/have_debug.inc
+--disable_query_log
+call mtr.add_suppression("Unsafe statement written to the binary log using statement format since BINLOG_FORMAT = STATEMENT. Statement is unsafe because it uses a system function that may return a different value on the slave. Statement:");
+--enable_query_log
+
+connection master;
+-- disable_warnings
+DROP TABLE IF EXISTS t;
+-- enable_warnings
+CREATE TABLE t(id INT);
+SELECT * from t;
+
+sync_slave_with_master;
+
+connection slave;
+SELECT * from t;
+SET GLOBAL QUERY_RESPONSE_TIME_RANGE_BASE = 1;
+SHOW GLOBAL VARIABLES where Variable_name like 'QUERY_RESPONSE_TIME_RANGE_BASE';
+SET GLOBAL QUERY_RESPONSE_TIME_RANGE_BASE = 10;
+SHOW GLOBAL VARIABLES where Variable_name like 'QUERY_RESPONSE_TIME_RANGE_BASE';
+source include/percona_query_response_time_flush.inc;
+SET GLOBAL QUERY_RESPONSE_TIME_STATS=ON;
+
+connection master;
+INSERT INTO t SELECT SLEEP(0.4);
+SELECT SUM(INFORMATION_SCHEMA.QUERY_RESPONSE_TIME.count) FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME;
+INSERT INTO t SELECT SLEEP(0.4);
+SELECT SUM(INFORMATION_SCHEMA.QUERY_RESPONSE_TIME.count) FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME;
+sync_slave_with_master;
+
+connection slave;
+SELECT SUM(INFORMATION_SCHEMA.QUERY_RESPONSE_TIME.count) FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME;
+SELECT SUM(INFORMATION_SCHEMA.QUERY_RESPONSE_TIME.count) FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME;
+
+SET GLOBAL QUERY_RESPONSE_TIME_RANGE_BASE = 2;
+SHOW GLOBAL VARIABLES where Variable_name like 'QUERY_RESPONSE_TIME_RANGE_BASE';
+source include/percona_query_response_time_flush.inc;
+
+connection master;
+INSERT INTO t SELECT SLEEP(0.4);
+SELECT SUM(INFORMATION_SCHEMA.QUERY_RESPONSE_TIME.count) FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME;
+INSERT INTO t SELECT SLEEP(0.4);
+SELECT SUM(INFORMATION_SCHEMA.QUERY_RESPONSE_TIME.count) FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME;
+sync_slave_with_master;
+
+connection slave;
+SELECT SUM(INFORMATION_SCHEMA.QUERY_RESPONSE_TIME.count) FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME;
+SELECT SUM(INFORMATION_SCHEMA.QUERY_RESPONSE_TIME.count) FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME;
+
+connection master;
+DROP TABLE IF EXISTS t;
+sync_slave_with_master;
+connection slave;
+SET GLOBAL QUERY_RESPONSE_TIME_RANGE_BASE = 10;
+SET GLOBAL QUERY_RESPONSE_TIME_STATS=OFF;
+
+STOP SLAVE;
+-- source include/wait_for_slave_to_stop.inc
diff --git a/percona-suite/percona_query_response_time-stored.result b/percona-suite/percona_query_response_time-stored.result
new file mode 100644
index 00000000000..0168c4630be
--- /dev/null
+++ b/percona-suite/percona_query_response_time-stored.result
@@ -0,0 +1,306 @@
+SET GLOBAL debug="d,query_exec_time_debug";
+CREATE FUNCTION test_f()
+RETURNS CHAR(30) DETERMINISTIC
+BEGIN
+SET SESSION debug="+d,query_exec_time_1.1";
+RETURN 'Hello, world!';
+END/
+SET GLOBAL QUERY_RESPONSE_TIME_RANGE_BASE = 1;
+Warnings:
+Warning 1292 Truncated incorrect query_response_time_range_base value: '1'
+SHOW GLOBAL VARIABLES where Variable_name like 'QUERY_RESPONSE_TIME_RANGE_BASE';
+Variable_name Value
+query_response_time_range_base 2
+SET GLOBAL QUERY_RESPONSE_TIME_RANGE_BASE = 2;
+SHOW GLOBAL VARIABLES where Variable_name like 'QUERY_RESPONSE_TIME_RANGE_BASE';
+Variable_name Value
+query_response_time_range_base 2
+FLUSH QUERY_RESPONSE_TIME;
+SELECT d.count,
+(SELECT SUM(a.count) FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME as a WHERE a.count != 0) as query_count,
+(SELECT SUM((b.total * 1000000) DIV 1000000) FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME as b WHERE b.count != 0) as query_total,
+(SELECT COUNT(*) FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME as c WHERE c.count != 0) as not_zero_region_count,
+(SELECT COUNT(*) FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME) as region_count
+FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME as d WHERE d.count > 0;
+count query_count query_total not_zero_region_count region_count
+SELECT COUNT(*) as region_count FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME;
+region_count
+44
+SELECT time FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME;
+time
+ 0.000001
+ 0.000003
+ 0.000007
+ 0.000015
+ 0.000030
+ 0.000061
+ 0.000122
+ 0.000244
+ 0.000488
+ 0.000976
+ 0.001953
+ 0.003906
+ 0.007812
+ 0.015625
+ 0.031250
+ 0.062500
+ 0.125000
+ 0.250000
+ 0.500000
+ 1.000000
+ 2.000000
+ 4.000000
+ 8.000000
+ 16.000000
+ 32.000000
+ 64.000000
+ 128.000000
+ 256.000000
+ 512.000000
+ 1024.000000
+ 2048.000000
+ 4096.000000
+ 8192.000000
+ 16384.000000
+ 32768.000000
+ 65536.000000
+ 131072.000000
+ 262144.000000
+ 524288.000000
+ 1048576.00000
+ 2097152.00000
+ 4194304.00000
+ 8388608.00000
+TOO LONG
+SET GLOBAL QUERY_RESPONSE_TIME_STATS=1;
+SELECT test_f();
+test_f()
+Hello, world!
+SELECT test_f();
+test_f()
+Hello, world!
+SELECT test_f();
+test_f()
+Hello, world!
+SELECT test_f();
+test_f()
+Hello, world!
+SET GLOBAL QUERY_RESPONSE_TIME_STATS=0;
+SELECT d.count,
+(SELECT SUM(a.count) FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME as a WHERE a.count != 0) as query_count,
+(SELECT SUM((b.total * 1000000) DIV 1000000) FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME as b WHERE b.count != 0) as query_total,
+(SELECT COUNT(*) FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME as c WHERE c.count != 0) as not_zero_region_count,
+(SELECT COUNT(*) FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME) as region_count
+FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME as d WHERE d.count > 0;
+count query_count query_total not_zero_region_count region_count
+4 4 4 1 44
+SELECT COUNT(*) as region_count FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME;
+region_count
+44
+SELECT time FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME;
+time
+ 0.000001
+ 0.000003
+ 0.000007
+ 0.000015
+ 0.000030
+ 0.000061
+ 0.000122
+ 0.000244
+ 0.000488
+ 0.000976
+ 0.001953
+ 0.003906
+ 0.007812
+ 0.015625
+ 0.031250
+ 0.062500
+ 0.125000
+ 0.250000
+ 0.500000
+ 1.000000
+ 2.000000
+ 4.000000
+ 8.000000
+ 16.000000
+ 32.000000
+ 64.000000
+ 128.000000
+ 256.000000
+ 512.000000
+ 1024.000000
+ 2048.000000
+ 4096.000000
+ 8192.000000
+ 16384.000000
+ 32768.000000
+ 65536.000000
+ 131072.000000
+ 262144.000000
+ 524288.000000
+ 1048576.00000
+ 2097152.00000
+ 4194304.00000
+ 8388608.00000
+TOO LONG
+SHOW GLOBAL VARIABLES where Variable_name like 'QUERY_RESPONSE_TIME_RANGE_BASE';
+Variable_name Value
+query_response_time_range_base 2
+SET GLOBAL QUERY_RESPONSE_TIME_RANGE_BASE = 10;
+SHOW GLOBAL VARIABLES where Variable_name like 'QUERY_RESPONSE_TIME_RANGE_BASE';
+Variable_name Value
+query_response_time_range_base 10
+FLUSH QUERY_RESPONSE_TIME;
+SET GLOBAL QUERY_RESPONSE_TIME_STATS=1;
+SELECT test_f();
+test_f()
+Hello, world!
+SET GLOBAL QUERY_RESPONSE_TIME_STATS=0;
+SELECT d.count,
+(SELECT SUM(a.count) FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME as a WHERE a.count != 0) as query_count,
+(SELECT SUM((b.total * 1000000) DIV 1000000) FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME as b WHERE b.count != 0) as query_total,
+(SELECT COUNT(*) FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME as c WHERE c.count != 0) as not_zero_region_count,
+(SELECT COUNT(*) FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME) as region_count
+FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME as d WHERE d.count > 0;
+count query_count query_total not_zero_region_count region_count
+1 1 1 1 14
+SELECT COUNT(*) as region_count FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME;
+region_count
+14
+SELECT time FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME;
+time
+ 0.000001
+ 0.000010
+ 0.000100
+ 0.001000
+ 0.010000
+ 0.100000
+ 1.000000
+ 10.000000
+ 100.000000
+ 1000.000000
+ 10000.000000
+ 100000.000000
+ 1000000.00000
+TOO LONG
+SHOW GLOBAL VARIABLES where Variable_name like 'QUERY_RESPONSE_TIME_RANGE_BASE';
+Variable_name Value
+query_response_time_range_base 10
+SET GLOBAL QUERY_RESPONSE_TIME_RANGE_BASE = 7;
+SHOW GLOBAL VARIABLES where Variable_name like 'QUERY_RESPONSE_TIME_RANGE_BASE';
+Variable_name Value
+query_response_time_range_base 7
+FLUSH QUERY_RESPONSE_TIME;
+SET GLOBAL QUERY_RESPONSE_TIME_STATS=1;
+SELECT test_f();
+test_f()
+Hello, world!
+SET GLOBAL QUERY_RESPONSE_TIME_STATS=0;
+SELECT d.count,
+(SELECT SUM(a.count) FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME as a WHERE a.count != 0) as query_count,
+(SELECT SUM((b.total * 1000000) DIV 1000000) FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME as b WHERE b.count != 0) as query_total,
+(SELECT COUNT(*) FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME as c WHERE c.count != 0) as not_zero_region_count,
+(SELECT COUNT(*) FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME) as region_count
+FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME as d WHERE d.count > 0;
+count query_count query_total not_zero_region_count region_count
+1 1 1 1 17
+SELECT COUNT(*) as region_count FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME;
+region_count
+17
+SELECT time FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME;
+time
+ 0.000001
+ 0.000008
+ 0.000059
+ 0.000416
+ 0.002915
+ 0.020408
+ 0.142857
+ 1.000000
+ 7.000000
+ 49.000000
+ 343.000000
+ 2401.000000
+ 16807.000000
+ 117649.000000
+ 823543.000000
+ 5764801.00000
+TOO LONG
+SHOW GLOBAL VARIABLES where Variable_name like 'QUERY_RESPONSE_TIME_RANGE_BASE';
+Variable_name Value
+query_response_time_range_base 7
+SET GLOBAL QUERY_RESPONSE_TIME_RANGE_BASE = 156;
+SHOW GLOBAL VARIABLES where Variable_name like 'QUERY_RESPONSE_TIME_RANGE_BASE';
+Variable_name Value
+query_response_time_range_base 156
+FLUSH QUERY_RESPONSE_TIME;
+SET GLOBAL QUERY_RESPONSE_TIME_STATS=1;
+SELECT test_f();
+test_f()
+Hello, world!
+SET GLOBAL QUERY_RESPONSE_TIME_STATS=0;
+SELECT d.count,
+(SELECT SUM(a.count) FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME as a WHERE a.count != 0) as query_count,
+(SELECT SUM((b.total * 1000000) DIV 1000000) FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME as b WHERE b.count != 0) as query_total,
+(SELECT COUNT(*) FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME as c WHERE c.count != 0) as not_zero_region_count,
+(SELECT COUNT(*) FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME) as region_count
+FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME as d WHERE d.count > 0;
+count query_count query_total not_zero_region_count region_count
+1 1 1 1 7
+SELECT COUNT(*) as region_count FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME;
+region_count
+7
+SELECT time FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME;
+time
+ 0.000041
+ 0.006410
+ 1.000000
+ 156.000000
+ 24336.000000
+ 3796416.00000
+TOO LONG
+SHOW GLOBAL VARIABLES where Variable_name like 'QUERY_RESPONSE_TIME_RANGE_BASE';
+Variable_name Value
+query_response_time_range_base 156
+SET GLOBAL QUERY_RESPONSE_TIME_RANGE_BASE = 1000;
+SHOW GLOBAL VARIABLES where Variable_name like 'QUERY_RESPONSE_TIME_RANGE_BASE';
+Variable_name Value
+query_response_time_range_base 1000
+FLUSH QUERY_RESPONSE_TIME;
+SET GLOBAL QUERY_RESPONSE_TIME_STATS=1;
+SELECT test_f();
+test_f()
+Hello, world!
+SET GLOBAL QUERY_RESPONSE_TIME_STATS=0;
+SELECT d.count,
+(SELECT SUM(a.count) FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME as a WHERE a.count != 0) as query_count,
+(SELECT SUM((b.total * 1000000) DIV 1000000) FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME as b WHERE b.count != 0) as query_total,
+(SELECT COUNT(*) FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME as c WHERE c.count != 0) as not_zero_region_count,
+(SELECT COUNT(*) FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME) as region_count
+FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME as d WHERE d.count > 0;
+count query_count query_total not_zero_region_count region_count
+1 1 1 1 6
+SELECT COUNT(*) as region_count FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME;
+region_count
+6
+SELECT time FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME;
+time
+ 0.000001
+ 0.001000
+ 1.000000
+ 1000.000000
+ 1000000.00000
+TOO LONG
+SHOW GLOBAL VARIABLES where Variable_name like 'QUERY_RESPONSE_TIME_RANGE_BASE';
+Variable_name Value
+query_response_time_range_base 1000
+SET GLOBAL QUERY_RESPONSE_TIME_RANGE_BASE = 1001;
+Warnings:
+Warning 1292 Truncated incorrect query_response_time_range_base value: '1001'
+SHOW GLOBAL VARIABLES where Variable_name like 'QUERY_RESPONSE_TIME_RANGE_BASE';
+Variable_name Value
+query_response_time_range_base 1000
+SET GLOBAL QUERY_RESPONSE_TIME_STATS=0;
+SET GLOBAL QUERY_RESPONSE_TIME_RANGE_BASE =10;
+DROP FUNCTION test_f;
+SET GLOBAL debug=default;
diff --git a/percona-suite/percona_query_response_time-stored.test b/percona-suite/percona_query_response_time-stored.test
new file mode 100644
index 00000000000..e80fc3dd3c0
--- /dev/null
+++ b/percona-suite/percona_query_response_time-stored.test
@@ -0,0 +1,85 @@
+--source include/have_response_time_distribution.inc
+--source include/have_debug.inc
+
+SET GLOBAL debug="d,query_exec_time_debug";
+
+delimiter /;
+CREATE FUNCTION test_f()
+RETURNS CHAR(30) DETERMINISTIC
+BEGIN
+ SET SESSION debug="+d,query_exec_time_1.1";
+ RETURN 'Hello, world!';
+END/
+delimiter ;/
+
+SET GLOBAL QUERY_RESPONSE_TIME_RANGE_BASE = 1;
+SHOW GLOBAL VARIABLES where Variable_name like 'QUERY_RESPONSE_TIME_RANGE_BASE';
+SET GLOBAL QUERY_RESPONSE_TIME_RANGE_BASE = 2;
+SHOW GLOBAL VARIABLES where Variable_name like 'QUERY_RESPONSE_TIME_RANGE_BASE';
+
+source include/percona_query_response_time_flush.inc;
+source include/percona_query_response_time_show.inc;
+
+SET GLOBAL QUERY_RESPONSE_TIME_STATS=1;
+SELECT test_f();
+SELECT test_f();
+SELECT test_f();
+SELECT test_f();
+SET GLOBAL QUERY_RESPONSE_TIME_STATS=0;
+
+source include/percona_query_response_time_show.inc;
+
+SHOW GLOBAL VARIABLES where Variable_name like 'QUERY_RESPONSE_TIME_RANGE_BASE';
+SET GLOBAL QUERY_RESPONSE_TIME_RANGE_BASE = 10;
+SHOW GLOBAL VARIABLES where Variable_name like 'QUERY_RESPONSE_TIME_RANGE_BASE';
+
+source include/percona_query_response_time_flush.inc;
+SET GLOBAL QUERY_RESPONSE_TIME_STATS=1;
+SELECT test_f();
+SET GLOBAL QUERY_RESPONSE_TIME_STATS=0;
+
+source include/percona_query_response_time_show.inc;
+
+SHOW GLOBAL VARIABLES where Variable_name like 'QUERY_RESPONSE_TIME_RANGE_BASE';
+SET GLOBAL QUERY_RESPONSE_TIME_RANGE_BASE = 7;
+SHOW GLOBAL VARIABLES where Variable_name like 'QUERY_RESPONSE_TIME_RANGE_BASE';
+
+source include/percona_query_response_time_flush.inc;
+SET GLOBAL QUERY_RESPONSE_TIME_STATS=1;
+SELECT test_f();
+SET GLOBAL QUERY_RESPONSE_TIME_STATS=0;
+
+source include/percona_query_response_time_show.inc;
+
+SHOW GLOBAL VARIABLES where Variable_name like 'QUERY_RESPONSE_TIME_RANGE_BASE';
+SET GLOBAL QUERY_RESPONSE_TIME_RANGE_BASE = 156;
+SHOW GLOBAL VARIABLES where Variable_name like 'QUERY_RESPONSE_TIME_RANGE_BASE';
+
+source include/percona_query_response_time_flush.inc;
+SET GLOBAL QUERY_RESPONSE_TIME_STATS=1;
+SELECT test_f();
+SET GLOBAL QUERY_RESPONSE_TIME_STATS=0;
+
+source include/percona_query_response_time_show.inc;
+
+SHOW GLOBAL VARIABLES where Variable_name like 'QUERY_RESPONSE_TIME_RANGE_BASE';
+SET GLOBAL QUERY_RESPONSE_TIME_RANGE_BASE = 1000;
+SHOW GLOBAL VARIABLES where Variable_name like 'QUERY_RESPONSE_TIME_RANGE_BASE';
+
+source include/percona_query_response_time_flush.inc;
+SET GLOBAL QUERY_RESPONSE_TIME_STATS=1;
+SELECT test_f();
+SET GLOBAL QUERY_RESPONSE_TIME_STATS=0;
+
+source include/percona_query_response_time_show.inc;
+
+SHOW GLOBAL VARIABLES where Variable_name like 'QUERY_RESPONSE_TIME_RANGE_BASE';
+SET GLOBAL QUERY_RESPONSE_TIME_RANGE_BASE = 1001;
+SHOW GLOBAL VARIABLES where Variable_name like 'QUERY_RESPONSE_TIME_RANGE_BASE';
+
+SET GLOBAL QUERY_RESPONSE_TIME_STATS=0;
+SET GLOBAL QUERY_RESPONSE_TIME_RANGE_BASE =10;
+
+DROP FUNCTION test_f;
+
+SET GLOBAL debug=default;
diff --git a/percona-suite/percona_query_response_time.result b/percona-suite/percona_query_response_time.result
new file mode 100644
index 00000000000..2243c155e39
--- /dev/null
+++ b/percona-suite/percona_query_response_time.result
@@ -0,0 +1,377 @@
+SET GLOBAL debug="d,query_exec_time_debug";
+SET GLOBAL QUERY_RESPONSE_TIME_RANGE_BASE = 1;
+Warnings:
+Warning 1292 Truncated incorrect query_response_time_range_base value: '1'
+SHOW GLOBAL VARIABLES where Variable_name like 'QUERY_RESPONSE_TIME_RANGE_BASE';
+Variable_name Value
+query_response_time_range_base 2
+SET GLOBAL QUERY_RESPONSE_TIME_RANGE_BASE = 2;
+SHOW GLOBAL VARIABLES where Variable_name like 'QUERY_RESPONSE_TIME_RANGE_BASE';
+Variable_name Value
+query_response_time_range_base 2
+FLUSH QUERY_RESPONSE_TIME;
+SELECT d.count,
+(SELECT SUM(a.count) FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME as a WHERE a.count != 0) as query_count,
+(SELECT SUM((b.total * 1000000) DIV 1000000) FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME as b WHERE b.count != 0) as query_total,
+(SELECT COUNT(*) FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME as c WHERE c.count != 0) as not_zero_region_count,
+(SELECT COUNT(*) FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME) as region_count
+FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME as d WHERE d.count > 0;
+count query_count query_total not_zero_region_count region_count
+SELECT COUNT(*) as region_count FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME;
+region_count
+44
+SELECT time FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME;
+time
+ 0.000001
+ 0.000003
+ 0.000007
+ 0.000015
+ 0.000030
+ 0.000061
+ 0.000122
+ 0.000244
+ 0.000488
+ 0.000976
+ 0.001953
+ 0.003906
+ 0.007812
+ 0.015625
+ 0.031250
+ 0.062500
+ 0.125000
+ 0.250000
+ 0.500000
+ 1.000000
+ 2.000000
+ 4.000000
+ 8.000000
+ 16.000000
+ 32.000000
+ 64.000000
+ 128.000000
+ 256.000000
+ 512.000000
+ 1024.000000
+ 2048.000000
+ 4096.000000
+ 8192.000000
+ 16384.000000
+ 32768.000000
+ 65536.000000
+ 131072.000000
+ 262144.000000
+ 524288.000000
+ 1048576.00000
+ 2097152.00000
+ 4194304.00000
+ 8388608.00000
+TOO LONG
+SET GLOBAL QUERY_RESPONSE_TIME_STATS=1;
+SET SESSION debug="+d,query_exec_time_0.31";
+SET SESSION debug="+d,query_exec_time_0.32";
+SET SESSION debug="+d,query_exec_time_0.33";
+SET SESSION debug="+d,query_exec_time_0.34";
+SET SESSION debug="+d,query_exec_time_0.35";
+SET SESSION debug="+d,query_exec_time_0.36";
+SET SESSION debug="+d,query_exec_time_0.37";
+SET SESSION debug="+d,query_exec_time_0.38";
+SET SESSION debug="+d,query_exec_time_0.39";
+SET SESSION debug="+d,query_exec_time_0.4";
+SET SESSION debug="+d,query_exec_time_1.1";
+SET SESSION debug="+d,query_exec_time_1.2";
+SET SESSION debug="+d,query_exec_time_1.3";
+SET SESSION debug="+d,query_exec_time_1.5";
+SET SESSION debug="+d,query_exec_time_1.4";
+SET SESSION debug="+d,query_exec_time_0.5";
+SET SESSION debug="+d,query_exec_time_2.1";
+SET SESSION debug="+d,query_exec_time_2.3";
+SET SESSION debug="+d,query_exec_time_2.5";
+SET GLOBAL QUERY_RESPONSE_TIME_STATS=0;
+SELECT d.count,
+(SELECT SUM(a.count) FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME as a WHERE a.count != 0) as query_count,
+(SELECT SUM((b.total * 1000000) DIV 1000000) FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME as b WHERE b.count != 0) as query_total,
+(SELECT COUNT(*) FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME as c WHERE c.count != 0) as not_zero_region_count,
+(SELECT COUNT(*) FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME) as region_count
+FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME as d WHERE d.count > 0;
+count query_count query_total not_zero_region_count region_count
+10 19 15 4 44
+1 19 15 4 44
+5 19 15 4 44
+3 19 15 4 44
+SELECT COUNT(*) as region_count FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME;
+region_count
+44
+SELECT time FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME;
+time
+ 0.000001
+ 0.000003
+ 0.000007
+ 0.000015
+ 0.000030
+ 0.000061
+ 0.000122
+ 0.000244
+ 0.000488
+ 0.000976
+ 0.001953
+ 0.003906
+ 0.007812
+ 0.015625
+ 0.031250
+ 0.062500
+ 0.125000
+ 0.250000
+ 0.500000
+ 1.000000
+ 2.000000
+ 4.000000
+ 8.000000
+ 16.000000
+ 32.000000
+ 64.000000
+ 128.000000
+ 256.000000
+ 512.000000
+ 1024.000000
+ 2048.000000
+ 4096.000000
+ 8192.000000
+ 16384.000000
+ 32768.000000
+ 65536.000000
+ 131072.000000
+ 262144.000000
+ 524288.000000
+ 1048576.00000
+ 2097152.00000
+ 4194304.00000
+ 8388608.00000
+TOO LONG
+SHOW GLOBAL VARIABLES where Variable_name like 'QUERY_RESPONSE_TIME_RANGE_BASE';
+Variable_name Value
+query_response_time_range_base 2
+SET GLOBAL QUERY_RESPONSE_TIME_RANGE_BASE = 10;
+SHOW GLOBAL VARIABLES where Variable_name like 'QUERY_RESPONSE_TIME_RANGE_BASE';
+Variable_name Value
+query_response_time_range_base 10
+FLUSH QUERY_RESPONSE_TIME;
+SET GLOBAL QUERY_RESPONSE_TIME_STATS=1;
+SET SESSION debug="+d,query_exec_time_0.31";
+SET SESSION debug="+d,query_exec_time_0.32";
+SET SESSION debug="+d,query_exec_time_0.33";
+SET SESSION debug="+d,query_exec_time_0.34";
+SET SESSION debug="+d,query_exec_time_0.35";
+SET SESSION debug="+d,query_exec_time_0.36";
+SET SESSION debug="+d,query_exec_time_0.37";
+SET SESSION debug="+d,query_exec_time_0.38";
+SET SESSION debug="+d,query_exec_time_0.39";
+SET SESSION debug="+d,query_exec_time_0.4";
+SET SESSION debug="+d,query_exec_time_1.1";
+SET SESSION debug="+d,query_exec_time_1.2";
+SET SESSION debug="+d,query_exec_time_1.3";
+SET SESSION debug="+d,query_exec_time_1.5";
+SET SESSION debug="+d,query_exec_time_1.4";
+SET SESSION debug="+d,query_exec_time_0.5";
+SET SESSION debug="+d,query_exec_time_2.1";
+SET SESSION debug="+d,query_exec_time_2.3";
+SET SESSION debug="+d,query_exec_time_2.5";
+SET GLOBAL QUERY_RESPONSE_TIME_STATS=0;
+SELECT d.count,
+(SELECT SUM(a.count) FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME as a WHERE a.count != 0) as query_count,
+(SELECT SUM((b.total * 1000000) DIV 1000000) FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME as b WHERE b.count != 0) as query_total,
+(SELECT COUNT(*) FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME as c WHERE c.count != 0) as not_zero_region_count,
+(SELECT COUNT(*) FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME) as region_count
+FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME as d WHERE d.count > 0;
+count query_count query_total not_zero_region_count region_count
+11 19 17 2 14
+8 19 17 2 14
+SELECT COUNT(*) as region_count FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME;
+region_count
+14
+SELECT time FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME;
+time
+ 0.000001
+ 0.000010
+ 0.000100
+ 0.001000
+ 0.010000
+ 0.100000
+ 1.000000
+ 10.000000
+ 100.000000
+ 1000.000000
+ 10000.000000
+ 100000.000000
+ 1000000.00000
+TOO LONG
+SHOW GLOBAL VARIABLES where Variable_name like 'QUERY_RESPONSE_TIME_RANGE_BASE';
+Variable_name Value
+query_response_time_range_base 10
+SET GLOBAL QUERY_RESPONSE_TIME_RANGE_BASE = 7;
+SHOW GLOBAL VARIABLES where Variable_name like 'QUERY_RESPONSE_TIME_RANGE_BASE';
+Variable_name Value
+query_response_time_range_base 7
+FLUSH QUERY_RESPONSE_TIME;
+SET GLOBAL QUERY_RESPONSE_TIME_STATS=1;
+SET SESSION debug="+d,query_exec_time_0.31";
+SET SESSION debug="+d,query_exec_time_0.32";
+SET SESSION debug="+d,query_exec_time_0.33";
+SET SESSION debug="+d,query_exec_time_0.34";
+SET SESSION debug="+d,query_exec_time_0.35";
+SET SESSION debug="+d,query_exec_time_0.36";
+SET SESSION debug="+d,query_exec_time_0.37";
+SET SESSION debug="+d,query_exec_time_0.38";
+SET SESSION debug="+d,query_exec_time_0.39";
+SET SESSION debug="+d,query_exec_time_0.4";
+SET SESSION debug="+d,query_exec_time_1.1";
+SET SESSION debug="+d,query_exec_time_1.2";
+SET SESSION debug="+d,query_exec_time_1.3";
+SET SESSION debug="+d,query_exec_time_1.5";
+SET SESSION debug="+d,query_exec_time_1.4";
+SET SESSION debug="+d,query_exec_time_0.5";
+SET SESSION debug="+d,query_exec_time_2.1";
+SET SESSION debug="+d,query_exec_time_2.3";
+SET SESSION debug="+d,query_exec_time_2.5";
+SET GLOBAL QUERY_RESPONSE_TIME_STATS=0;
+SELECT d.count,
+(SELECT SUM(a.count) FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME as a WHERE a.count != 0) as query_count,
+(SELECT SUM((b.total * 1000000) DIV 1000000) FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME as b WHERE b.count != 0) as query_total,
+(SELECT COUNT(*) FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME as c WHERE c.count != 0) as not_zero_region_count,
+(SELECT COUNT(*) FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME) as region_count
+FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME as d WHERE d.count > 0;
+count query_count query_total not_zero_region_count region_count
+11 19 17 2 17
+8 19 17 2 17
+SELECT COUNT(*) as region_count FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME;
+region_count
+17
+SELECT time FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME;
+time
+ 0.000001
+ 0.000008
+ 0.000059
+ 0.000416
+ 0.002915
+ 0.020408
+ 0.142857
+ 1.000000
+ 7.000000
+ 49.000000
+ 343.000000
+ 2401.000000
+ 16807.000000
+ 117649.000000
+ 823543.000000
+ 5764801.00000
+TOO LONG
+SHOW GLOBAL VARIABLES where Variable_name like 'QUERY_RESPONSE_TIME_RANGE_BASE';
+Variable_name Value
+query_response_time_range_base 7
+SET GLOBAL QUERY_RESPONSE_TIME_RANGE_BASE = 156;
+SHOW GLOBAL VARIABLES where Variable_name like 'QUERY_RESPONSE_TIME_RANGE_BASE';
+Variable_name Value
+query_response_time_range_base 156
+FLUSH QUERY_RESPONSE_TIME;
+SET GLOBAL QUERY_RESPONSE_TIME_STATS=1;
+SET SESSION debug="+d,query_exec_time_0.31";
+SET SESSION debug="+d,query_exec_time_0.32";
+SET SESSION debug="+d,query_exec_time_0.33";
+SET SESSION debug="+d,query_exec_time_0.34";
+SET SESSION debug="+d,query_exec_time_0.35";
+SET SESSION debug="+d,query_exec_time_0.36";
+SET SESSION debug="+d,query_exec_time_0.37";
+SET SESSION debug="+d,query_exec_time_0.38";
+SET SESSION debug="+d,query_exec_time_0.39";
+SET SESSION debug="+d,query_exec_time_0.4";
+SET SESSION debug="+d,query_exec_time_1.1";
+SET SESSION debug="+d,query_exec_time_1.2";
+SET SESSION debug="+d,query_exec_time_1.3";
+SET SESSION debug="+d,query_exec_time_1.5";
+SET SESSION debug="+d,query_exec_time_1.4";
+SET SESSION debug="+d,query_exec_time_0.5";
+SET SESSION debug="+d,query_exec_time_2.1";
+SET SESSION debug="+d,query_exec_time_2.3";
+SET SESSION debug="+d,query_exec_time_2.5";
+SET GLOBAL QUERY_RESPONSE_TIME_STATS=0;
+SELECT d.count,
+(SELECT SUM(a.count) FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME as a WHERE a.count != 0) as query_count,
+(SELECT SUM((b.total * 1000000) DIV 1000000) FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME as b WHERE b.count != 0) as query_total,
+(SELECT COUNT(*) FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME as c WHERE c.count != 0) as not_zero_region_count,
+(SELECT COUNT(*) FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME) as region_count
+FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME as d WHERE d.count > 0;
+count query_count query_total not_zero_region_count region_count
+11 19 17 2 7
+8 19 17 2 7
+SELECT COUNT(*) as region_count FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME;
+region_count
+7
+SELECT time FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME;
+time
+ 0.000041
+ 0.006410
+ 1.000000
+ 156.000000
+ 24336.000000
+ 3796416.00000
+TOO LONG
+SHOW GLOBAL VARIABLES where Variable_name like 'QUERY_RESPONSE_TIME_RANGE_BASE';
+Variable_name Value
+query_response_time_range_base 156
+SET GLOBAL QUERY_RESPONSE_TIME_RANGE_BASE = 1000;
+SHOW GLOBAL VARIABLES where Variable_name like 'QUERY_RESPONSE_TIME_RANGE_BASE';
+Variable_name Value
+query_response_time_range_base 1000
+FLUSH QUERY_RESPONSE_TIME;
+SET GLOBAL QUERY_RESPONSE_TIME_STATS=1;
+SET SESSION debug="+d,query_exec_time_0.31";
+SET SESSION debug="+d,query_exec_time_0.32";
+SET SESSION debug="+d,query_exec_time_0.33";
+SET SESSION debug="+d,query_exec_time_0.34";
+SET SESSION debug="+d,query_exec_time_0.35";
+SET SESSION debug="+d,query_exec_time_0.36";
+SET SESSION debug="+d,query_exec_time_0.37";
+SET SESSION debug="+d,query_exec_time_0.38";
+SET SESSION debug="+d,query_exec_time_0.39";
+SET SESSION debug="+d,query_exec_time_0.4";
+SET SESSION debug="+d,query_exec_time_1.1";
+SET SESSION debug="+d,query_exec_time_1.2";
+SET SESSION debug="+d,query_exec_time_1.3";
+SET SESSION debug="+d,query_exec_time_1.5";
+SET SESSION debug="+d,query_exec_time_1.4";
+SET SESSION debug="+d,query_exec_time_0.5";
+SET SESSION debug="+d,query_exec_time_2.1";
+SET SESSION debug="+d,query_exec_time_2.3";
+SET SESSION debug="+d,query_exec_time_2.5";
+SET GLOBAL QUERY_RESPONSE_TIME_STATS=0;
+SELECT d.count,
+(SELECT SUM(a.count) FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME as a WHERE a.count != 0) as query_count,
+(SELECT SUM((b.total * 1000000) DIV 1000000) FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME as b WHERE b.count != 0) as query_total,
+(SELECT COUNT(*) FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME as c WHERE c.count != 0) as not_zero_region_count,
+(SELECT COUNT(*) FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME) as region_count
+FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME as d WHERE d.count > 0;
+count query_count query_total not_zero_region_count region_count
+11 19 17 2 6
+8 19 17 2 6
+SELECT COUNT(*) as region_count FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME;
+region_count
+6
+SELECT time FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME;
+time
+ 0.000001
+ 0.001000
+ 1.000000
+ 1000.000000
+ 1000000.00000
+TOO LONG
+SHOW GLOBAL VARIABLES where Variable_name like 'QUERY_RESPONSE_TIME_RANGE_BASE';
+Variable_name Value
+query_response_time_range_base 1000
+SET GLOBAL QUERY_RESPONSE_TIME_RANGE_BASE = 1001;
+Warnings:
+Warning 1292 Truncated incorrect query_response_time_range_base value: '1001'
+SHOW GLOBAL VARIABLES where Variable_name like 'QUERY_RESPONSE_TIME_RANGE_BASE';
+Variable_name Value
+query_response_time_range_base 1000
+SET GLOBAL QUERY_RESPONSE_TIME_STATS=0;
+SET GLOBAL QUERY_RESPONSE_TIME_RANGE_BASE =10;
+SET GLOBAL debug=default;
diff --git a/percona-suite/percona_query_response_time.test b/percona-suite/percona_query_response_time.test
new file mode 100644
index 00000000000..f2df156b13d
--- /dev/null
+++ b/percona-suite/percona_query_response_time.test
@@ -0,0 +1,71 @@
+--source include/have_response_time_distribution.inc
+--source include/have_debug.inc
+
+SET GLOBAL debug="d,query_exec_time_debug";
+
+SET GLOBAL QUERY_RESPONSE_TIME_RANGE_BASE = 1;
+SHOW GLOBAL VARIABLES where Variable_name like 'QUERY_RESPONSE_TIME_RANGE_BASE';
+SET GLOBAL QUERY_RESPONSE_TIME_RANGE_BASE = 2;
+SHOW GLOBAL VARIABLES where Variable_name like 'QUERY_RESPONSE_TIME_RANGE_BASE';
+
+source include/percona_query_response_time_flush.inc;
+source include/percona_query_response_time_show.inc;
+
+SET GLOBAL QUERY_RESPONSE_TIME_STATS=1;
+source include/percona_query_response_time_sleep.inc;
+SET GLOBAL QUERY_RESPONSE_TIME_STATS=0;
+
+source include/percona_query_response_time_show.inc;
+
+SHOW GLOBAL VARIABLES where Variable_name like 'QUERY_RESPONSE_TIME_RANGE_BASE';
+SET GLOBAL QUERY_RESPONSE_TIME_RANGE_BASE = 10;
+SHOW GLOBAL VARIABLES where Variable_name like 'QUERY_RESPONSE_TIME_RANGE_BASE';
+
+source include/percona_query_response_time_flush.inc;
+SET GLOBAL QUERY_RESPONSE_TIME_STATS=1;
+source include/percona_query_response_time_sleep.inc;
+SET GLOBAL QUERY_RESPONSE_TIME_STATS=0;
+
+source include/percona_query_response_time_show.inc;
+
+SHOW GLOBAL VARIABLES where Variable_name like 'QUERY_RESPONSE_TIME_RANGE_BASE';
+SET GLOBAL QUERY_RESPONSE_TIME_RANGE_BASE = 7;
+SHOW GLOBAL VARIABLES where Variable_name like 'QUERY_RESPONSE_TIME_RANGE_BASE';
+
+source include/percona_query_response_time_flush.inc;
+SET GLOBAL QUERY_RESPONSE_TIME_STATS=1;
+source include/percona_query_response_time_sleep.inc;
+SET GLOBAL QUERY_RESPONSE_TIME_STATS=0;
+
+source include/percona_query_response_time_show.inc;
+
+SHOW GLOBAL VARIABLES where Variable_name like 'QUERY_RESPONSE_TIME_RANGE_BASE';
+SET GLOBAL QUERY_RESPONSE_TIME_RANGE_BASE = 156;
+SHOW GLOBAL VARIABLES where Variable_name like 'QUERY_RESPONSE_TIME_RANGE_BASE';
+
+source include/percona_query_response_time_flush.inc;
+SET GLOBAL QUERY_RESPONSE_TIME_STATS=1;
+source include/percona_query_response_time_sleep.inc;
+SET GLOBAL QUERY_RESPONSE_TIME_STATS=0;
+
+source include/percona_query_response_time_show.inc;
+
+SHOW GLOBAL VARIABLES where Variable_name like 'QUERY_RESPONSE_TIME_RANGE_BASE';
+SET GLOBAL QUERY_RESPONSE_TIME_RANGE_BASE = 1000;
+SHOW GLOBAL VARIABLES where Variable_name like 'QUERY_RESPONSE_TIME_RANGE_BASE';
+
+source include/percona_query_response_time_flush.inc;
+SET GLOBAL QUERY_RESPONSE_TIME_STATS=1;
+source include/percona_query_response_time_sleep.inc;
+SET GLOBAL QUERY_RESPONSE_TIME_STATS=0;
+
+source include/percona_query_response_time_show.inc;
+
+SHOW GLOBAL VARIABLES where Variable_name like 'QUERY_RESPONSE_TIME_RANGE_BASE';
+SET GLOBAL QUERY_RESPONSE_TIME_RANGE_BASE = 1001;
+SHOW GLOBAL VARIABLES where Variable_name like 'QUERY_RESPONSE_TIME_RANGE_BASE';
+
+SET GLOBAL QUERY_RESPONSE_TIME_STATS=0;
+SET GLOBAL QUERY_RESPONSE_TIME_RANGE_BASE =10;
+
+SET GLOBAL debug=default;
diff --git a/percona-suite/percona_query_response_time_flush.inc b/percona-suite/percona_query_response_time_flush.inc
new file mode 100644
index 00000000000..44bb320fe13
--- /dev/null
+++ b/percona-suite/percona_query_response_time_flush.inc
@@ -0,0 +1 @@
+FLUSH QUERY_RESPONSE_TIME;
diff --git a/percona-suite/percona_query_response_time_show.inc b/percona-suite/percona_query_response_time_show.inc
new file mode 100644
index 00000000000..709abf9872e
--- /dev/null
+++ b/percona-suite/percona_query_response_time_show.inc
@@ -0,0 +1,8 @@
+SELECT d.count,
+(SELECT SUM(a.count) FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME as a WHERE a.count != 0) as query_count,
+(SELECT SUM((b.total * 1000000) DIV 1000000) FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME as b WHERE b.count != 0) as query_total,
+(SELECT COUNT(*) FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME as c WHERE c.count != 0) as not_zero_region_count,
+(SELECT COUNT(*) FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME) as region_count
+FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME as d WHERE d.count > 0;
+SELECT COUNT(*) as region_count FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME;
+SELECT time FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME;
diff --git a/percona-suite/percona_query_response_time_sleep.inc b/percona-suite/percona_query_response_time_sleep.inc
new file mode 100644
index 00000000000..d889fd9f98d
--- /dev/null
+++ b/percona-suite/percona_query_response_time_sleep.inc
@@ -0,0 +1,19 @@
+SET SESSION debug="+d,query_exec_time_0.31";
+SET SESSION debug="+d,query_exec_time_0.32";
+SET SESSION debug="+d,query_exec_time_0.33";
+SET SESSION debug="+d,query_exec_time_0.34";
+SET SESSION debug="+d,query_exec_time_0.35";
+SET SESSION debug="+d,query_exec_time_0.36";
+SET SESSION debug="+d,query_exec_time_0.37";
+SET SESSION debug="+d,query_exec_time_0.38";
+SET SESSION debug="+d,query_exec_time_0.39";
+SET SESSION debug="+d,query_exec_time_0.4";
+SET SESSION debug="+d,query_exec_time_1.1";
+SET SESSION debug="+d,query_exec_time_1.2";
+SET SESSION debug="+d,query_exec_time_1.3";
+SET SESSION debug="+d,query_exec_time_1.5";
+SET SESSION debug="+d,query_exec_time_1.4";
+SET SESSION debug="+d,query_exec_time_0.5";
+SET SESSION debug="+d,query_exec_time_2.1";
+SET SESSION debug="+d,query_exec_time_2.3";
+SET SESSION debug="+d,query_exec_time_2.5";
diff --git a/rem/rem0rec.c b/rem/rem0rec.c
index a54b5013155..5a96e608ab5 100644
--- a/rem/rem0rec.c
+++ b/rem/rem0rec.c
@@ -1174,7 +1174,7 @@ rec_convert_dtuple_to_rec_comp(
} else if (dfield_is_ext(field)) {
ut_ad(ifield->col->len >= 256
|| ifield->col->mtype == DATA_BLOB);
- ut_ad(len <= REC_MAX_INDEX_COL_LEN
+ ut_ad(len <= REC_ANTELOPE_MAX_INDEX_COL_LEN
+ BTR_EXTERN_FIELD_REF_SIZE);
*lens-- = (byte) (len >> 8) | 0xc0;
*lens-- = (byte) len;
diff --git a/row/row0ext.c b/row/row0ext.c
index 7320f5b1dca..07e970cf485 100644
--- a/row/row0ext.c
+++ b/row/row0ext.c
@@ -44,8 +44,9 @@ row_ext_cache_fill(
{
const byte* field = dfield_get_data(dfield);
ulint f_len = dfield_get_len(dfield);
- byte* buf = ext->buf + i * REC_MAX_INDEX_COL_LEN;
+ byte* buf = ext->buf + i * ext->max_len;
+ ut_ad(ext->max_len > 0);
ut_ad(i < ext->n_ext);
ut_ad(dfield_is_ext(dfield));
ut_a(f_len >= BTR_EXTERN_FIELD_REF_SIZE);
@@ -56,14 +57,14 @@ row_ext_cache_fill(
/* The BLOB pointer is not set: we cannot fetch it */
ext->len[i] = 0;
} else {
- /* Fetch at most REC_MAX_INDEX_COL_LEN of the column.
+ /* Fetch at most ext->max_len of the column.
The column should be non-empty. However,
trx_rollback_or_clean_all_recovered() may try to
access a half-deleted BLOB if the server previously
crashed during the execution of
btr_free_externally_stored_field(). */
ext->len[i] = btr_copy_externally_stored_field_prefix(
- buf, REC_MAX_INDEX_COL_LEN, zip_size, field, f_len);
+ buf, ext->max_len, zip_size, field, f_len);
}
}
@@ -79,16 +80,18 @@ row_ext_create(
in the InnoDB table object, as reported by
dict_col_get_no(); NOT relative to the records
in the clustered index */
+ ulint flags, /*!< in: table->flags */
const dtuple_t* tuple, /*!< in: data tuple containing the field
references of the externally stored
columns; must be indexed by col_no;
the clustered index record must be
covered by a lock or a page latch
to prevent deletion (rollback or purge). */
- ulint zip_size,/*!< compressed page size in bytes, or 0 */
mem_heap_t* heap) /*!< in: heap where created */
{
ulint i;
+ ulint zip_size = dict_table_flags_to_zip_size(flags);
+
row_ext_t* ret = mem_heap_alloc(heap, (sizeof *ret)
+ (n_ext - 1) * sizeof ret->len);
@@ -97,10 +100,12 @@ row_ext_create(
ret->n_ext = n_ext;
ret->ext = ext;
- ret->buf = mem_heap_alloc(heap, n_ext * REC_MAX_INDEX_COL_LEN);
+ ret->max_len = DICT_MAX_FIELD_LEN_BY_FORMAT_FLAG(flags);
+
+ ret->buf = mem_heap_alloc(heap, n_ext * ret->max_len);
#ifdef UNIV_DEBUG
- memset(ret->buf, 0xaa, n_ext * REC_MAX_INDEX_COL_LEN);
- UNIV_MEM_ALLOC(ret->buf, n_ext * REC_MAX_INDEX_COL_LEN);
+ memset(ret->buf, 0xaa, n_ext * ret->max_len);
+ UNIV_MEM_ALLOC(ret->buf, n_ext * ret->max_len);
#endif
/* Fetch the BLOB prefixes */
diff --git a/row/row0mysql.c b/row/row0mysql.c
index a86aae696b0..d9107d942f6 100644
--- a/row/row0mysql.c
+++ b/row/row0mysql.c
@@ -2008,6 +2008,7 @@ row_create_index_for_mysql(
ulint i;
ulint len;
char* table_name;
+ dict_table_t* table;
#ifdef UNIV_SYNC_DEBUG
ut_ad(rw_lock_own(&dict_operation_lock, RW_LOCK_EX));
@@ -2021,6 +2022,8 @@ row_create_index_for_mysql(
que_run_threads()) and thus index->table_name is not available. */
table_name = mem_strdup(index->table_name);
+ table = dict_table_get_low(table_name);
+
trx_start_if_not_started(trx);
/* Check that the same column does not appear twice in the index.
@@ -2053,7 +2056,7 @@ row_create_index_for_mysql(
}
/* Check also that prefix_len and actual length
- < DICT_MAX_INDEX_COL_LEN */
+ is less than that from DICT_MAX_FIELD_LEN_BY_FORMAT() */
len = dict_index_get_nth_field(index, i)->prefix_len;
@@ -2061,8 +2064,9 @@ row_create_index_for_mysql(
len = ut_max(len, field_lengths[i]);
}
- if (len >= DICT_MAX_INDEX_COL_LEN) {
- err = DB_TOO_BIG_RECORD;
+ /* Column or prefix length exceeds maximum column length */
+ if (len > (ulint) DICT_MAX_FIELD_LEN_BY_FORMAT(table)) {
+ err = DB_TOO_BIG_INDEX_COL;
goto error_handling;
}
@@ -2087,6 +2091,7 @@ row_create_index_for_mysql(
que_graph_free((que_t*) que_node_get_parent(thr));
error_handling:
+
if (err != DB_SUCCESS) {
/* We have special error handling here */
@@ -3027,7 +3032,7 @@ row_truncate_table_for_mysql(
rec = btr_pcur_get_rec(&pcur);
if (root_page_no != FIL_NULL) {
- page_rec_write_index_page_no(
+ page_rec_write_field(
rec, DICT_SYS_INDEXES_PAGE_NO_FIELD,
root_page_no, &mtr);
/* We will need to commit and restart the
diff --git a/row/row0row.c b/row/row0row.c
index b995ebf7b03..2882af00f30 100644
--- a/row/row0row.c
+++ b/row/row0row.c
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1996, 2010, Innobase Oy. All Rights Reserved.
+Copyright (c) 1996, 2011, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -47,35 +47,6 @@ Created 4/20/1996 Heikki Tuuri
#include "read0read.h"
#include "ut0mem.h"
-/*********************************************************************//**
-Gets the offset of trx id field, in bytes relative to the origin of
-a clustered index record.
-@return offset of DATA_TRX_ID */
-UNIV_INTERN
-ulint
-row_get_trx_id_offset(
-/*==================*/
- const rec_t* rec __attribute__((unused)),
- /*!< in: record */
- dict_index_t* index, /*!< in: clustered index */
- const ulint* offsets)/*!< in: rec_get_offsets(rec, index) */
-{
- ulint pos;
- ulint offset;
- ulint len;
-
- ut_ad(dict_index_is_clust(index));
- ut_ad(rec_offs_validate(rec, index, offsets));
-
- pos = dict_index_get_sys_col_pos(index, DATA_TRX_ID);
-
- offset = rec_get_nth_field_offs(offsets, pos, &len);
-
- ut_ad(len == DATA_TRX_ID_LEN);
-
- return(offset);
-}
-
/*****************************************************************//**
When an insert or purge to a table is performed, this function builds
the entry to be inserted into or purged from an index on the table.
@@ -151,8 +122,6 @@ row_build_index_entry(
} else if (dfield_is_ext(dfield)) {
ut_a(len >= BTR_EXTERN_FIELD_REF_SIZE);
len -= BTR_EXTERN_FIELD_REF_SIZE;
- ut_a(ind_field->prefix_len <= len
- || dict_index_is_clust(index));
}
len = dtype_get_at_most_n_mbchars(
@@ -231,6 +200,14 @@ row_build(
ut_ad(rec_offs_validate(rec, index, offsets));
}
+#if 0 && defined UNIV_BLOB_NULL_DEBUG
+ /* This one can fail in trx_rollback_active() if
+ the server crashed during an insert before the
+ btr_store_big_rec_extern_fields() did mtr_commit()
+ all BLOB pointers to the clustered index record. */
+ ut_a(!rec_offs_any_null_extern(rec, offsets));
+#endif /* 0 && UNIV_BLOB_NULL_DEBUG */
+
if (type != ROW_COPY_POINTERS) {
/* Take a copy of rec to heap */
buf = mem_heap_alloc(heap, rec_offs_size(offsets));
@@ -301,8 +278,7 @@ row_build(
ut_ad(dict_table_get_format(index->table)
< DICT_TF_FORMAT_ZIP);
} else if (j) {
- *ext = row_ext_create(j, ext_cols, row,
- dict_table_zip_size(index->table),
+ *ext = row_ext_create(j, ext_cols, index->table->flags, row,
heap);
} else {
*ext = NULL;
@@ -431,6 +407,10 @@ row_rec_to_index_entry(
rec = rec_copy(buf, rec, offsets);
/* Avoid a debug assertion in rec_offs_validate(). */
rec_offs_make_valid(rec, index, offsets);
+#ifdef UNIV_BLOB_NULL_DEBUG
+ } else {
+ ut_a(!rec_offs_any_null_extern(rec, offsets));
+#endif /* UNIV_BLOB_NULL_DEBUG */
}
entry = row_rec_to_index_entry_low(rec, index, offsets, n_ext, heap);
diff --git a/row/row0sel.c b/row/row0sel.c
index 4a0e14c969b..4720bdb0f1e 100644
--- a/row/row0sel.c
+++ b/row/row0sel.c
@@ -99,10 +99,12 @@ row_sel_sec_rec_is_for_blob(
ulint clust_len, /*!< in: length of clust_field */
const byte* sec_field, /*!< in: column in secondary index */
ulint sec_len, /*!< in: length of sec_field */
- ulint zip_size) /*!< in: compressed page size, or 0 */
+ dict_table_t* table) /*!< in: table */
{
ulint len;
- byte buf[DICT_MAX_INDEX_COL_LEN];
+ byte buf[REC_VERSION_56_MAX_INDEX_COL_LEN];
+ ulint zip_size = dict_table_flags_to_zip_size(table->flags);
+ ulint max_prefix_len = DICT_MAX_FIELD_LEN_BY_FORMAT(table);
ut_a(clust_len >= BTR_EXTERN_FIELD_REF_SIZE);
@@ -116,7 +118,7 @@ row_sel_sec_rec_is_for_blob(
return(FALSE);
}
- len = btr_copy_externally_stored_field_prefix(buf, sizeof buf,
+ len = btr_copy_externally_stored_field_prefix(buf, max_prefix_len,
zip_size,
clust_field, clust_len);
@@ -222,8 +224,7 @@ row_sel_sec_rec_is_for_clust_rec(
col->mbminmaxlen,
clust_field, clust_len,
sec_field, sec_len,
- dict_table_zip_size(
- clust_index->table))) {
+ clust_index->table)) {
goto inequal;
}
diff --git a/row/row0upd.c b/row/row0upd.c
index 29fc7ca3085..01fb44f42de 100644
--- a/row/row0upd.c
+++ b/row/row0upd.c
@@ -1229,8 +1229,8 @@ row_upd_replace(
}
if (n_ext_cols) {
- *ext = row_ext_create(n_ext_cols, ext_cols, row,
- dict_table_zip_size(table), heap);
+ *ext = row_ext_create(n_ext_cols, ext_cols, table->flags, row,
+ heap);
} else {
*ext = NULL;
}
diff --git a/row/row0vers.c b/row/row0vers.c
index 91c230db135..5fd7d082194 100644
--- a/row/row0vers.c
+++ b/row/row0vers.c
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1997, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 1997, 2011, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -550,6 +550,11 @@ row_vers_build_for_consistent_read(
/* The view already sees this version: we can
copy it to in_heap and return */
+#ifdef UNIV_BLOB_NULL_DEBUG
+ ut_a(!rec_offs_any_null_extern(
+ version, *offsets));
+#endif /* UNIV_BLOB_NULL_DEBUG */
+
buf = mem_heap_alloc(in_heap,
rec_offs_size(*offsets));
*old_vers = rec_copy(buf, version, *offsets);
@@ -583,6 +588,10 @@ row_vers_build_for_consistent_read(
*offsets = rec_get_offsets(prev_version, index, *offsets,
ULINT_UNDEFINED, offset_heap);
+#ifdef UNIV_BLOB_NULL_DEBUG
+ ut_a(!rec_offs_any_null_extern(prev_version, *offsets));
+#endif /* UNIV_BLOB_NULL_DEBUG */
+
trx_id = row_get_rec_trx_id(prev_version, index, *offsets);
if (read_view_sees_trx_id(view, trx_id)) {
@@ -682,6 +691,10 @@ row_vers_build_for_semi_consistent_read(
/* We found a version that belongs to a
committed transaction: return it. */
+#ifdef UNIV_BLOB_NULL_DEBUG
+ ut_a(!rec_offs_any_null_extern(version, *offsets));
+#endif /* UNIV_BLOB_NULL_DEBUG */
+
if (rec == version) {
*old_vers = rec;
err = DB_SUCCESS;
@@ -739,6 +752,9 @@ row_vers_build_for_semi_consistent_read(
version = prev_version;
*offsets = rec_get_offsets(version, index, *offsets,
ULINT_UNDEFINED, offset_heap);
+#ifdef UNIV_BLOB_NULL_DEBUG
+ ut_a(!rec_offs_any_null_extern(version, *offsets));
+#endif /* UNIV_BLOB_NULL_DEBUG */
}/* for (;;) */
if (heap) {
diff --git a/srv/srv0srv.c b/srv/srv0srv.c
index a1765cae3fe..e2d092540ff 100644
--- a/srv/srv0srv.c
+++ b/srv/srv0srv.c
@@ -1406,7 +1406,7 @@ retry:
trx->op_info = "waiting in InnoDB queue";
- thd_wait_begin(trx->mysql_thd, THD_WAIT_ROW_TABLE_LOCK);
+ thd_wait_begin(trx->mysql_thd, THD_WAIT_USER_LOCK);
os_event_wait(slot->event);
thd_wait_end(trx->mysql_thd);
@@ -1815,7 +1815,7 @@ srv_suspend_mysql_thread(
/* Suspend this thread and wait for the event. */
- thd_wait_begin(trx->mysql_thd, THD_WAIT_ROW_TABLE_LOCK);
+ thd_wait_begin(trx->mysql_thd, THD_WAIT_ROW_LOCK);
os_event_wait(event);
thd_wait_end(trx->mysql_thd);
diff --git a/trx/trx0rec.c b/trx/trx0rec.c
index cb590d6519a..84687c7195b 100644
--- a/trx/trx0rec.c
+++ b/trx/trx0rec.c
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1996, 2010, Innobase Oy. All Rights Reserved.
+Copyright (c) 1996, 2011, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -351,10 +351,10 @@ trx_undo_rec_get_col_val(
ut_ad(*orig_len >= BTR_EXTERN_FIELD_REF_SIZE);
ut_ad(*len > *orig_len);
/* @see dtuple_convert_big_rec() */
- ut_ad(*len >= BTR_EXTERN_FIELD_REF_SIZE * 2);
+ ut_ad(*len >= BTR_EXTERN_FIELD_REF_SIZE);
/* we do not have access to index->table here
ut_ad(dict_table_get_format(index->table) >= DICT_TF_FORMAT_ZIP
- || *len >= REC_MAX_INDEX_COL_LEN
+ || *len >= col->max_prefix
+ BTR_EXTERN_FIELD_REF_SIZE);
*/
@@ -456,9 +456,10 @@ static
byte*
trx_undo_page_fetch_ext(
/*====================*/
- byte* ext_buf, /*!< in: a buffer of
- REC_MAX_INDEX_COL_LEN
- + BTR_EXTERN_FIELD_REF_SIZE */
+ byte* ext_buf, /*!< in: buffer to hold the prefix
+ data and BLOB pointer */
+ ulint prefix_len, /*!< in: prefix size to store
+ in the undo log */
ulint zip_size, /*!< compressed page size in bytes,
or 0 for uncompressed BLOB */
const byte* field, /*!< in: an externally stored column */
@@ -467,7 +468,7 @@ trx_undo_page_fetch_ext(
{
/* Fetch the BLOB. */
ulint ext_len = btr_copy_externally_stored_field_prefix(
- ext_buf, REC_MAX_INDEX_COL_LEN, zip_size, field, *len);
+ ext_buf, prefix_len, zip_size, field, *len);
/* BLOBs should always be nonempty. */
ut_a(ext_len);
/* Append the BLOB pointer to the prefix. */
@@ -488,10 +489,11 @@ trx_undo_page_report_modify_ext(
byte* ptr, /*!< in: undo log position,
at least 15 bytes must be available */
byte* ext_buf, /*!< in: a buffer of
- REC_MAX_INDEX_COL_LEN
- + BTR_EXTERN_FIELD_REF_SIZE,
+ DICT_MAX_FIELD_LEN_BY_FORMAT() size,
or NULL when should not fetch
a longer prefix */
+ ulint prefix_len, /*!< prefix size to store in the
+ undo log */
ulint zip_size, /*!< compressed page size in bytes,
or 0 for uncompressed BLOB */
const byte** field, /*!< in/out: the locally stored part of
@@ -499,6 +501,8 @@ trx_undo_page_report_modify_ext(
ulint* len) /*!< in/out: length of field, in bytes */
{
if (ext_buf) {
+ ut_a(prefix_len > 0);
+
/* If an ordering column is externally stored, we will
have to store a longer prefix of the field. In this
case, write to the log a marker followed by the
@@ -507,7 +511,7 @@ trx_undo_page_report_modify_ext(
ptr += mach_write_compressed(ptr, *len);
- *field = trx_undo_page_fetch_ext(ext_buf, zip_size,
+ *field = trx_undo_page_fetch_ext(ext_buf, prefix_len, zip_size,
*field, len);
ptr += mach_write_compressed(ptr, *len);
@@ -553,7 +557,7 @@ trx_undo_page_report_modify(
ulint i;
trx_id_t trx_id;
ibool ignore_prefix = FALSE;
- byte ext_buf[REC_MAX_INDEX_COL_LEN
+ byte ext_buf[REC_VERSION_56_MAX_INDEX_COL_LEN
+ BTR_EXTERN_FIELD_REF_SIZE];
ut_a(dict_index_is_clust(index));
@@ -706,13 +710,21 @@ trx_undo_page_report_modify(
}
if (rec_offs_nth_extern(offsets, pos)) {
+ const dict_col_t* col
+ = dict_index_get_nth_col(index, pos);
+ ulint prefix_len
+ = dict_max_field_len_store_undo(
+ table, col);
+
+ ut_ad(prefix_len + BTR_EXTERN_FIELD_REF_SIZE
+ <= sizeof ext_buf);
+
ptr = trx_undo_page_report_modify_ext(
ptr,
- dict_index_get_nth_col(index, pos)
- ->ord_part
+ col->ord_part
&& !ignore_prefix
- && flen < REC_MAX_INDEX_COL_LEN
- ? ext_buf : NULL,
+ && flen < REC_ANTELOPE_MAX_INDEX_COL_LEN
+ ? ext_buf : NULL, prefix_len,
dict_table_zip_size(table),
&field, &flen);
@@ -791,11 +803,20 @@ trx_undo_page_report_modify(
&flen);
if (rec_offs_nth_extern(offsets, pos)) {
+ const dict_col_t* col =
+ dict_index_get_nth_col(
+ index, pos);
+ ulint prefix_len =
+ dict_max_field_len_store_undo(
+ table, col);
+
+ ut_a(prefix_len < sizeof ext_buf);
+
ptr = trx_undo_page_report_modify_ext(
ptr,
- flen < REC_MAX_INDEX_COL_LEN
+ flen < REC_ANTELOPE_MAX_INDEX_COL_LEN
&& !ignore_prefix
- ? ext_buf : NULL,
+ ? ext_buf : NULL, prefix_len,
dict_table_zip_size(table),
&field, &flen);
} else {
@@ -1095,11 +1116,11 @@ trx_undo_rec_get_partial_row(
undo log record. */
if (!ignore_prefix && col->ord_part) {
ut_a(dfield_get_len(dfield)
- >= 2 * BTR_EXTERN_FIELD_REF_SIZE);
+ >= BTR_EXTERN_FIELD_REF_SIZE);
ut_a(dict_table_get_format(index->table)
>= DICT_TF_FORMAT_ZIP
|| dfield_get_len(dfield)
- >= REC_MAX_INDEX_COL_LEN
+ >= REC_ANTELOPE_MAX_INDEX_COL_LEN
+ BTR_EXTERN_FIELD_REF_SIZE);
}
}
@@ -1584,6 +1605,10 @@ trx_undo_prev_version_build(
return(DB_ERROR);
}
+# ifdef UNIV_BLOB_NULL_DEBUG
+ ut_a(!rec_offs_any_null_extern(rec, offsets));
+# endif /* UNIV_BLOB_NULL_DEBUG */
+
if (row_upd_changes_field_size_or_external(index, offsets, update)) {
ulint n_ext;
diff --git a/trx/trx0undo.c b/trx/trx0undo.c
index 070d6332a4f..4cb4b7b79c5 100644
--- a/trx/trx0undo.c
+++ b/trx/trx0undo.c
@@ -1985,8 +1985,6 @@ trx_undo_free_prepared(
/*===================*/
trx_t* trx) /*!< in/out: PREPARED transaction */
{
- mutex_enter(&trx->rseg->mutex);
-
ut_ad(srv_shutdown_state == SRV_SHUTDOWN_EXIT_THREADS);
if (trx->update_undo) {
@@ -2001,6 +1999,5 @@ trx_undo_free_prepared(
trx->insert_undo);
trx_undo_mem_free(trx->insert_undo);
}
- mutex_exit(&trx->rseg->mutex);
}
#endif /* !UNIV_HOTBACKUP */
diff --git a/ut/ut0ut.c b/ut/ut0ut.c
index cd0894b132a..a9c0d381e16 100644
--- a/ut/ut0ut.c
+++ b/ut/ut0ut.c
@@ -662,6 +662,8 @@ ut_strerr(
return("Table is being used");
case DB_TOO_BIG_RECORD:
return("Record too big");
+ case DB_TOO_BIG_INDEX_COL:
+ return("Index columns size too big");
case DB_LOCK_WAIT_TIMEOUT:
return("Lock wait timeout");
case DB_NO_REFERENCED_ROW: