summaryrefslogtreecommitdiff
path: root/storage
diff options
context:
space:
mode:
authorunknown <knielsen@knielsen-hq.org>2010-04-28 14:52:24 +0200
committerunknown <knielsen@knielsen-hq.org>2010-04-28 14:52:24 +0200
commitb1e00b6be81c80b09d11085d77d86978e26df988 (patch)
treebb1fdd7363fbf2580572ac9a56dbd4c933cc4c0d /storage
parent1f683a7270e63abfadce20c6f51370621ff065e1 (diff)
parentc9cfd2df5f2f58c2cdf716999ebea252c307333f (diff)
downloadmariadb-git-b1e00b6be81c80b09d11085d77d86978e26df988.tar.gz
Merge MySQL 5.1.46 into MariaDB.
Still two test failures to be solved: main.myisam and main.subselect.
Diffstat (limited to 'storage')
-rw-r--r--storage/archive/ha_archive.cc35
-rw-r--r--storage/csv/ha_tina.cc16
-rw-r--r--storage/example/ha_example.h6
-rw-r--r--storage/federated/ha_federated.cc208
-rw-r--r--storage/federated/ha_federated.h9
-rw-r--r--storage/innobase/buf/buf0buf.c28
-rw-r--r--storage/innobase/buf/buf0rea.c15
-rw-r--r--storage/innobase/handler/ha_innodb.cc114
-rw-r--r--storage/innobase/include/buf0rea.h6
-rw-r--r--storage/innobase/lock/lock0lock.c92
-rw-r--r--storage/innobase/os/os0file.c10
-rw-r--r--storage/innobase/plug.in.disabled2
-rw-r--r--storage/innobase/row/row0sel.c4
-rw-r--r--storage/innobase/trx/trx0sys.c4
-rw-r--r--storage/innodb_plugin/CMakeLists.txt7
-rw-r--r--storage/innodb_plugin/ChangeLog197
-rw-r--r--storage/innodb_plugin/Makefile.am2
-rw-r--r--storage/innodb_plugin/btr/btr0btr.c22
-rw-r--r--storage/innodb_plugin/btr/btr0cur.c32
-rw-r--r--storage/innodb_plugin/btr/btr0pcur.c27
-rw-r--r--storage/innodb_plugin/buf/buf0buddy.c4
-rw-r--r--storage/innodb_plugin/buf/buf0buf.c72
-rw-r--r--storage/innodb_plugin/buf/buf0flu.c256
-rw-r--r--storage/innodb_plugin/buf/buf0lru.c114
-rw-r--r--storage/innodb_plugin/buf/buf0rea.c8
-rw-r--r--storage/innodb_plugin/dict/dict0boot.c10
-rw-r--r--storage/innodb_plugin/dict/dict0crea.c121
-rw-r--r--storage/innodb_plugin/dict/dict0dict.c9
-rw-r--r--storage/innodb_plugin/dict/dict0load.c117
-rw-r--r--storage/innodb_plugin/dict/dict0mem.c2
-rw-r--r--storage/innodb_plugin/fil/fil0fil.c57
-rw-r--r--storage/innodb_plugin/fsp/fsp0fsp.c38
-rw-r--r--storage/innodb_plugin/ha/ha0ha.c16
-rw-r--r--storage/innodb_plugin/ha/hash0hash.c6
-rw-r--r--storage/innodb_plugin/handler/ha_innodb.cc1085
-rw-r--r--storage/innodb_plugin/handler/ha_innodb.h35
-rw-r--r--storage/innodb_plugin/handler/handler0alter.cc90
-rw-r--r--storage/innodb_plugin/ibuf/ibuf0ibuf.c37
-rw-r--r--storage/innodb_plugin/include/btr0btr.h14
-rw-r--r--storage/innodb_plugin/include/btr0btr.ic6
-rw-r--r--storage/innodb_plugin/include/btr0cur.h20
-rw-r--r--storage/innodb_plugin/include/btr0pcur.h52
-rw-r--r--storage/innodb_plugin/include/btr0pcur.ic49
-rw-r--r--storage/innodb_plugin/include/buf0buf.h38
-rw-r--r--storage/innodb_plugin/include/buf0buf.ic10
-rw-r--r--storage/innodb_plugin/include/buf0flu.h32
-rw-r--r--storage/innodb_plugin/include/data0type.ic6
-rw-r--r--storage/innodb_plugin/include/dict0boot.h3
-rw-r--r--storage/innodb_plugin/include/dict0mem.h24
-rw-r--r--storage/innodb_plugin/include/fil0fil.h9
-rw-r--r--storage/innodb_plugin/include/hash0hash.h5
-rw-r--r--storage/innodb_plugin/include/hash0hash.ic20
-rw-r--r--storage/innodb_plugin/include/lock0lock.h11
-rw-r--r--storage/innodb_plugin/include/log0log.h31
-rw-r--r--storage/innodb_plugin/include/log0log.ic11
-rw-r--r--storage/innodb_plugin/include/log0recv.h10
-rw-r--r--storage/innodb_plugin/include/mem0dbg.h9
-rw-r--r--storage/innodb_plugin/include/mem0dbg.ic5
-rw-r--r--storage/innodb_plugin/include/mem0mem.h5
-rw-r--r--storage/innodb_plugin/include/mem0mem.ic10
-rw-r--r--storage/innodb_plugin/include/mtr0mtr.ic5
-rw-r--r--storage/innodb_plugin/include/os0file.h19
-rw-r--r--storage/innodb_plugin/include/que0que.h13
-rw-r--r--storage/innodb_plugin/include/que0que.ic16
-rw-r--r--storage/innodb_plugin/include/row0mysql.h23
-rw-r--r--storage/innodb_plugin/include/row0sel.h13
-rw-r--r--storage/innodb_plugin/include/srv0srv.h60
-rw-r--r--storage/innodb_plugin/include/sync0rw.h5
-rw-r--r--storage/innodb_plugin/include/sync0sync.h26
-rw-r--r--storage/innodb_plugin/include/trx0rseg.h13
-rw-r--r--storage/innodb_plugin/include/trx0sys.h4
-rw-r--r--storage/innodb_plugin/include/trx0trx.h69
-rw-r--r--storage/innodb_plugin/include/trx0types.h9
-rw-r--r--storage/innodb_plugin/include/univ.i9
-rw-r--r--storage/innodb_plugin/include/ut0rbt.h309
-rw-r--r--storage/innodb_plugin/include/ut0rnd.ic1
-rw-r--r--storage/innodb_plugin/lock/lock0lock.c237
-rw-r--r--storage/innodb_plugin/log/log0log.c23
-rw-r--r--storage/innodb_plugin/log/log0recv.c70
-rw-r--r--storage/innodb_plugin/mem/mem0dbg.c6
-rw-r--r--storage/innodb_plugin/mem/mem0mem.c20
-rw-r--r--storage/innodb_plugin/os/os0file.c32
-rw-r--r--storage/innodb_plugin/page/page0page.c50
-rw-r--r--storage/innodb_plugin/plug.in.disabled2
-rw-r--r--storage/innodb_plugin/rem/rem0rec.c18
-rw-r--r--storage/innodb_plugin/row/row0ins.c10
-rw-r--r--storage/innodb_plugin/row/row0merge.c157
-rw-r--r--storage/innodb_plugin/row/row0mysql.c281
-rw-r--r--storage/innodb_plugin/row/row0row.c17
-rw-r--r--storage/innodb_plugin/row/row0sel.c75
-rw-r--r--storage/innodb_plugin/row/row0umod.c33
-rw-r--r--storage/innodb_plugin/row/row0upd.c3
-rw-r--r--storage/innodb_plugin/srv/srv0srv.c226
-rw-r--r--storage/innodb_plugin/srv/srv0start.c92
-rw-r--r--storage/innodb_plugin/sync/sync0sync.c87
-rw-r--r--storage/innodb_plugin/trx/trx0i_s.c11
-rw-r--r--storage/innodb_plugin/trx/trx0rec.c3
-rw-r--r--storage/innodb_plugin/trx/trx0rseg.c38
-rw-r--r--storage/innodb_plugin/trx/trx0sys.c8
-rw-r--r--storage/innodb_plugin/trx/trx0trx.c3
-rw-r--r--storage/innodb_plugin/ut/ut0rbt.c1249
-rw-r--r--storage/myisam/ft_boolean_search.c12
-rw-r--r--storage/myisam/ha_myisam.cc14
-rw-r--r--storage/myisam/mi_check.c9
-rw-r--r--storage/myisam/mi_delete_all.c2
-rw-r--r--storage/myisam/mi_delete_table.c39
-rw-r--r--storage/myisam/mi_dynrec.c39
-rw-r--r--storage/myisam/mi_extra.c5
-rw-r--r--storage/myisam/mi_locking.c2
-rw-r--r--storage/myisam/mi_open.c3
-rw-r--r--storage/myisam/mi_page.c7
-rw-r--r--storage/myisam/mi_rnext.c17
-rw-r--r--storage/myisam/mi_write.c2
-rw-r--r--storage/myisam/myisamdef.h1
-rw-r--r--storage/myisam/rt_index.c32
-rw-r--r--storage/myisam/rt_split.c11
-rw-r--r--storage/myisam/sort.c40
-rw-r--r--storage/myisammrg/ha_myisammrg.cc140
-rw-r--r--storage/myisammrg/myrg_open.c8
-rw-r--r--storage/xtradb/handler/ha_innodb.cc16
120 files changed, 5257 insertions, 1780 deletions
diff --git a/storage/archive/ha_archive.cc b/storage/archive/ha_archive.cc
index ad54782996c..fd0993c9914 100644
--- a/storage/archive/ha_archive.cc
+++ b/storage/archive/ha_archive.cc
@@ -355,6 +355,9 @@ ARCHIVE_SHARE *ha_archive::get_share(const char *table_name, int *rc)
*/
if (!(azopen(&archive_tmp, share->data_file_name, O_RDONLY|O_BINARY)))
{
+ *rc= my_errno ? my_errno : -1;
+ pthread_mutex_unlock(&archive_mutex);
+ my_free(share, MYF(0));
DBUG_RETURN(NULL);
}
stats.auto_increment_value= archive_tmp.auto_increment + 1;
@@ -504,16 +507,18 @@ int ha_archive::open(const char *name, int mode, uint open_options)
For now we have to refuse to open such table to avoid
potential data loss.
*/
- if ((rc == HA_ERR_CRASHED_ON_USAGE && !(open_options & HA_OPEN_FOR_REPAIR))
- || rc == HA_ERR_TABLE_NEEDS_UPGRADE)
+ switch (rc)
{
- /* purecov: begin inspected */
+ case 0:
+ break;
+ case HA_ERR_CRASHED_ON_USAGE:
+ if (open_options & HA_OPEN_FOR_REPAIR)
+ break;
+ /* fall through */
+ case HA_ERR_TABLE_NEEDS_UPGRADE:
free_share();
- DBUG_RETURN(rc);
- /* purecov: end */
- }
- else if (rc == HA_ERR_OUT_OF_MEM)
- {
+ /* fall through */
+ default:
DBUG_RETURN(rc);
}
@@ -1264,13 +1269,12 @@ int ha_archive::rnd_pos(uchar * buf, uchar *pos)
/*
This method repairs the meta file. It does this by walking the datafile and
- rewriting the meta file. Currently it does this by calling optimize with
- the extended flag.
+ rewriting the meta file. If EXTENDED repair is requested, we attempt to
+ recover as much data as possible.
*/
int ha_archive::repair(THD* thd, HA_CHECK_OPT* check_opt)
{
DBUG_ENTER("ha_archive::repair");
- check_opt->flags= T_EXTEND;
int rc= optimize(thd, check_opt);
if (rc)
@@ -1364,7 +1368,14 @@ int ha_archive::optimize(THD* thd, HA_CHECK_OPT* check_opt)
DBUG_PRINT("ha_archive", ("recovered %llu archive rows",
(unsigned long long)share->rows_recorded));
- if (rc && rc != HA_ERR_END_OF_FILE)
+ /*
+ If REPAIR ... EXTENDED is requested, try to recover as much data
+ from data file as possible. In this case if we failed to read a
+ record, we assume EOF. This allows massive data loss, but we can
+ hardly do more with broken zlib stream. And this is the only way
+ to restore at least what is still recoverable.
+ */
+ if (rc && rc != HA_ERR_END_OF_FILE && !(check_opt->flags & T_EXTEND))
goto error;
}
diff --git a/storage/csv/ha_tina.cc b/storage/csv/ha_tina.cc
index 3fda2239b34..8a30467f16c 100644
--- a/storage/csv/ha_tina.cc
+++ b/storage/csv/ha_tina.cc
@@ -679,9 +679,21 @@ int ha_tina::find_current_row(uchar *buf)
if (read_all || bitmap_is_set(table->read_set, (*field)->field_index))
{
+ bool is_enum= ((*field)->real_type() == MYSQL_TYPE_ENUM);
+ /*
+ Here CHECK_FIELD_WARN checks that all values in the csv file are valid
+ which is normally the case, if they were written by
+ INSERT -> ha_tina::write_row. '0' values on ENUM fields are considered
+ invalid by Field_enum::store() but it can store them on INSERT anyway.
+ Thus, for enums we silence the warning, as it doesn't really mean
+ an invalid value.
+ */
if ((*field)->store(buffer.ptr(), buffer.length(), buffer.charset(),
- CHECK_FIELD_WARN))
- goto err;
+ is_enum ? CHECK_FIELD_IGNORE : CHECK_FIELD_WARN))
+ {
+ if (!is_enum)
+ goto err;
+ }
if ((*field)->flags & BLOB_FLAG)
{
Field_blob *blob= *(Field_blob**) field;
diff --git a/storage/example/ha_example.h b/storage/example/ha_example.h
index f67235eefd1..fad44f34615 100644
--- a/storage/example/ha_example.h
+++ b/storage/example/ha_example.h
@@ -1,4 +1,4 @@
-/* Copyright (C) 2003 MySQL AB
+/* Copyright (c) 2003, 2010 Oracle and/or its affiliates. All rights reserved.
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -155,8 +155,8 @@ public:
/** @brief
This method will never be called if you do not implement indexes.
*/
- virtual double read_time(uint index, uint ranges, ha_rows rows)
- { return (double) rows / 20.0+1; }
+ virtual double read_time(uint, uint, ha_rows rows)
+ { return (double) rows / 20.0+1; }
/*
Everything below are methods that we implement in ha_example.cc.
diff --git a/storage/federated/ha_federated.cc b/storage/federated/ha_federated.cc
index d9b1f1ffeeb..4330d9bdd30 100644
--- a/storage/federated/ha_federated.cc
+++ b/storage/federated/ha_federated.cc
@@ -1,4 +1,4 @@
-/* Copyright (C) 2004 MySQL AB
+/* Copyright (c) 2004, 2010 Oracle and/or its affiliates. All rights reserved.
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -1253,7 +1253,7 @@ bool ha_federated::create_where_from_key(String *to,
const key_range *start_key,
const key_range *end_key,
bool from_records_in_range,
- bool eq_range)
+ bool eq_range_arg)
{
bool both_not_null=
(start_key != NULL && end_key != NULL) ? TRUE : FALSE;
@@ -1360,7 +1360,7 @@ bool ha_federated::create_where_from_key(String *to,
}
break;
case HA_READ_AFTER_KEY:
- if (eq_range)
+ if (eq_range_arg)
{
if (tmp.append("1=1")) // Dummy
goto err;
@@ -1621,11 +1621,10 @@ int ha_federated::open(const char *name, int mode, uint test_if_locked)
DBUG_ASSERT(mysql == NULL);
- ref_length= (table->s->primary_key != MAX_KEY ?
- table->key_info[table->s->primary_key].key_length :
- table->s->reclength);
+ ref_length= sizeof(MYSQL_RES *) + sizeof(MYSQL_ROW_OFFSET);
DBUG_PRINT("info", ("ref_length: %u", ref_length));
+ my_init_dynamic_array(&results, sizeof(MYSQL_RES *), 4, 4);
reset();
DBUG_RETURN(0);
@@ -1645,21 +1644,17 @@ int ha_federated::open(const char *name, int mode, uint test_if_locked)
int ha_federated::close(void)
{
- int retval;
DBUG_ENTER("ha_federated::close");
- /* free the result set */
- if (stored_result)
- {
- mysql_free_result(stored_result);
- stored_result= 0;
- }
+ free_result();
+
+ delete_dynamic(&results);
+
/* Disconnect from mysql */
mysql_close(mysql);
mysql= NULL;
- retval= free_share(share);
- DBUG_RETURN(retval);
+ DBUG_RETURN(free_share(share));
}
/*
@@ -2326,8 +2321,7 @@ int ha_federated::index_read(uchar *buf, const uchar *key,
{
DBUG_ENTER("ha_federated::index_read");
- if (stored_result)
- mysql_free_result(stored_result);
+ free_result();
DBUG_RETURN(index_read_idx_with_result_set(buf, active_index, key,
key_len, find_flag,
&stored_result));
@@ -2359,7 +2353,8 @@ int ha_federated::index_read_idx(uchar *buf, uint index, const uchar *key,
&mysql_result)))
DBUG_RETURN(retval);
mysql_free_result(mysql_result);
- DBUG_RETURN(retval);
+ results.elements--;
+ DBUG_RETURN(0);
}
@@ -2415,18 +2410,20 @@ int ha_federated::index_read_idx_with_result_set(uchar *buf, uint index,
retval= ER_QUERY_ON_FOREIGN_DATA_SOURCE;
goto error;
}
- if (!(*result= mysql_store_result(mysql)))
+ if (!(*result= store_result(mysql)))
{
retval= HA_ERR_END_OF_FILE;
goto error;
}
- if (!(retval= read_next(buf, *result)))
+ if ((retval= read_next(buf, *result)))
+ {
+ mysql_free_result(*result);
+ results.elements--;
+ *result= 0;
+ table->status= STATUS_NOT_FOUND;
DBUG_RETURN(retval);
-
- mysql_free_result(*result);
- *result= 0;
- table->status= STATUS_NOT_FOUND;
- DBUG_RETURN(retval);
+ }
+ DBUG_RETURN(0);
error:
table->status= STATUS_NOT_FOUND;
@@ -2486,12 +2483,6 @@ int ha_federated::read_range_first(const key_range *start_key,
create_where_from_key(&sql_query,
&table->key_info[active_index],
start_key, end_key, 0, eq_range_arg);
-
- if (stored_result)
- {
- mysql_free_result(stored_result);
- stored_result= 0;
- }
if (real_query(sql_query.ptr(), sql_query.length()))
{
retval= ER_QUERY_ON_FOREIGN_DATA_SOURCE;
@@ -2499,14 +2490,13 @@ int ha_federated::read_range_first(const key_range *start_key,
}
sql_query.length(0);
- if (!(stored_result= mysql_store_result(mysql)))
+ if (!(stored_result= store_result(mysql)))
{
retval= HA_ERR_END_OF_FILE;
goto error;
}
-
- retval= read_next(table->record[0], stored_result);
- DBUG_RETURN(retval);
+
+ DBUG_RETURN(read_next(table->record[0], stored_result));
error:
table->status= STATUS_NOT_FOUND;
@@ -2516,10 +2506,8 @@ error:
int ha_federated::read_range_next()
{
- int retval;
DBUG_ENTER("ha_federated::read_range_next");
- retval= rnd_next(table->record[0]);
- DBUG_RETURN(retval);
+ DBUG_RETURN(rnd_next(table->record[0]));
}
@@ -2585,23 +2573,11 @@ int ha_federated::rnd_init(bool scan)
if (scan)
{
- if (stored_result)
- {
- mysql_free_result(stored_result);
- stored_result= 0;
- }
-
- if (real_query(share->select_query, strlen(share->select_query)))
- goto error;
-
- stored_result= mysql_store_result(mysql);
- if (!stored_result)
- goto error;
+ if (real_query(share->select_query, strlen(share->select_query)) ||
+ !(stored_result= store_result(mysql)))
+ DBUG_RETURN(stash_remote_error());
}
DBUG_RETURN(0);
-
-error:
- DBUG_RETURN(stash_remote_error());
}
@@ -2615,11 +2591,7 @@ int ha_federated::rnd_end()
int ha_federated::index_end(void)
{
DBUG_ENTER("ha_federated::index_end");
- if (stored_result)
- {
- mysql_free_result(stored_result);
- stored_result= 0;
- }
+ free_result();
active_index= MAX_KEY;
DBUG_RETURN(0);
}
@@ -2679,6 +2651,9 @@ int ha_federated::read_next(uchar *buf, MYSQL_RES *result)
DBUG_ENTER("ha_federated::read_next");
table->status= STATUS_NOT_FOUND; // For easier return
+
+ /* Save current data cursor position. */
+ current_position= result->data_cursor;
/* Fetch a row, insert it back in a row format. */
if (!(row= mysql_fetch_row(result)))
@@ -2691,24 +2666,38 @@ int ha_federated::read_next(uchar *buf, MYSQL_RES *result)
}
-/*
- store reference to current row so that we can later find it for
- a re-read, update or delete.
-
- In case of federated, a reference is either a primary key or
- the whole record.
-
- Called from filesort.cc, sql_select.cc, sql_delete.cc and sql_update.cc.
+/**
+ @brief Store a reference to current row.
+
+ @details During a query execution we may have different result sets (RS),
+ e.g. for different ranges. All the RS's used are stored in
+ memory and placed in @c results dynamic array. At the end of
+ execution all stored RS's are freed at once in the
+ @c ha_federated::reset().
+ So, in case of federated, a reference to current row is a
+ stored result address and current data cursor position.
+ As we keep all RS in memory during a query execution,
+ we can get any record using the reference any time until
+ @c ha_federated::reset() is called.
+ TODO: we don't have to store all RS's rows but only those
+ we call @c ha_federated::position() for, so we can free memory
+ where we store other rows in the @c ha_federated::index_end().
+
+ @param[in] record record data (unused)
*/
-void ha_federated::position(const uchar *record)
+void ha_federated::position(const uchar *record __attribute__ ((unused)))
{
DBUG_ENTER("ha_federated::position");
- if (table->s->primary_key != MAX_KEY)
- key_copy(ref, (uchar *)record, table->key_info + table->s->primary_key,
- ref_length);
- else
- memcpy(ref, record, ref_length);
+
+ DBUG_ASSERT(stored_result);
+
+ position_called= TRUE;
+ /* Store result set address. */
+ memcpy_fixed(ref, &stored_result, sizeof(MYSQL_RES *));
+ /* Store data cursor position. */
+ memcpy_fixed(ref + sizeof(MYSQL_RES *), &current_position,
+ sizeof(MYSQL_ROW_OFFSET));
DBUG_VOID_RETURN;
}
@@ -2724,23 +2713,19 @@ void ha_federated::position(const uchar *record)
int ha_federated::rnd_pos(uchar *buf, uchar *pos)
{
- int result;
+ MYSQL_RES *result;
DBUG_ENTER("ha_federated::rnd_pos");
+
ha_statistic_increment(&SSV::ha_read_rnd_count);
- if (table->s->primary_key != MAX_KEY)
- {
- /* We have a primary key, so use index_read_idx to find row */
- result= index_read_idx(buf, table->s->primary_key, pos,
- ref_length, HA_READ_KEY_EXACT);
- }
- else
- {
- /* otherwise, get the old record ref as obtained in ::position */
- memcpy(buf, pos, ref_length);
- result= 0;
- }
- table->status= result ? STATUS_NOT_FOUND : 0;
- DBUG_RETURN(result);
+
+ /* Get stored result set. */
+ memcpy_fixed(&result, pos, sizeof(MYSQL_RES *));
+ DBUG_ASSERT(result);
+ /* Set data cursor position. */
+ memcpy_fixed(&result->data_cursor, pos + sizeof(MYSQL_RES *),
+ sizeof(MYSQL_ROW_OFFSET));
+ /* Read a row. */
+ DBUG_RETURN(read_next(buf, result));
}
@@ -2943,6 +2928,16 @@ int ha_federated::reset(void)
insert_dup_update= FALSE;
ignore_duplicates= FALSE;
replace_duplicates= FALSE;
+
+ /* Free stored result sets. */
+ for (uint i= 0; i < results.elements; i++)
+ {
+ MYSQL_RES *result;
+ get_dynamic(&results, (uchar *) &result, i);
+ mysql_free_result(result);
+ }
+ reset_dynamic(&results);
+
return 0;
}
@@ -3206,6 +3201,45 @@ bool ha_federated::get_error_message(int error, String* buf)
DBUG_RETURN(FALSE);
}
+
+/**
+ @brief Store a result set.
+
+ @details Call @c mysql_store_result() to save a result set then
+ append it to the stored results array.
+
+ @param[in] mysql_arg MySLQ connection structure.
+
+ @return Stored result set (MYSQL_RES object).
+*/
+
+MYSQL_RES *ha_federated::store_result(MYSQL *mysql_arg)
+{
+ MYSQL_RES *result= mysql_store_result(mysql_arg);
+ DBUG_ENTER("ha_federated::store_result");
+ if (result)
+ {
+ (void) insert_dynamic(&results, (uchar*) &result);
+ }
+ position_called= FALSE;
+ DBUG_RETURN(result);
+}
+
+
+void ha_federated::free_result()
+{
+ DBUG_ENTER("ha_federated::free_result");
+ if (stored_result && !position_called)
+ {
+ mysql_free_result(stored_result);
+ stored_result= 0;
+ if (results.elements > 0)
+ results.elements--;
+ }
+ DBUG_VOID_RETURN;
+}
+
+
int ha_federated::external_lock(THD *thd, int lock_type)
{
int error= 0;
diff --git a/storage/federated/ha_federated.h b/storage/federated/ha_federated.h
index c0e8ff3bc31..ab31e3c1680 100644
--- a/storage/federated/ha_federated.h
+++ b/storage/federated/ha_federated.h
@@ -84,6 +84,11 @@ class ha_federated: public handler
FEDERATED_SHARE *share; /* Shared lock info */
MYSQL *mysql; /* MySQL connection */
MYSQL_RES *stored_result;
+ /**
+ Array of all stored results we get during a query execution.
+ */
+ DYNAMIC_ARRAY results;
+ bool position_called;
uint fetch_num; // stores the fetch num
MYSQL_ROW_OFFSET current_position; // Current position used by ::position()
int remote_error_number;
@@ -251,6 +256,10 @@ public:
THR_LOCK_DATA **store_lock(THD *thd, THR_LOCK_DATA **to,
enum thr_lock_type lock_type); //required
bool get_error_message(int error, String *buf);
+
+ MYSQL_RES *store_result(MYSQL *mysql);
+ void free_result();
+
int external_lock(THD *thd, int lock_type);
int connection_commit();
int connection_rollback();
diff --git a/storage/innobase/buf/buf0buf.c b/storage/innobase/buf/buf0buf.c
index 16ed9c9da3c..7d43532ea3d 100644
--- a/storage/innobase/buf/buf0buf.c
+++ b/storage/innobase/buf/buf0buf.c
@@ -224,6 +224,9 @@ in the free list to the frames.
/* Value in microseconds */
static const int WAIT_FOR_READ = 20000;
+/* Number of attemtps made to read in a page in the buffer pool */
+static const ulint BUF_PAGE_READ_MAX_RETRIES = 100;
+
buf_pool_t* buf_pool = NULL; /* The buffer buf_pool of the database */
#ifdef UNIV_DEBUG
@@ -1160,6 +1163,7 @@ buf_page_get_gen(
ulint fix_type;
ibool success;
ibool must_read;
+ ulint retries = 0;
ut_ad(mtr);
ut_ad((rw_latch == RW_S_LATCH)
@@ -1200,7 +1204,29 @@ loop:
return(NULL);
}
- buf_read_page(space, offset);
+ if (buf_read_page(space, offset)) {
+ retries = 0;
+ } else if (retries < BUF_PAGE_READ_MAX_RETRIES) {
+ ++retries;
+ } else {
+ fprintf(stderr, "InnoDB: Error: Unable"
+ " to read tablespace %lu page no"
+ " %lu into the buffer pool after"
+ " %lu attempts\n"
+ "InnoDB: The most probable cause"
+ " of this error may be that the"
+ " table has been corrupted.\n"
+ "InnoDB: You can try to fix this"
+ " problem by using"
+ " innodb_force_recovery.\n"
+ "InnoDB: Please see reference manual"
+ " for more details.\n"
+ "InnoDB: Aborting...\n",
+ space, offset,
+ BUF_PAGE_READ_MAX_RETRIES);
+
+ ut_error;
+ }
#ifdef UNIV_DEBUG
buf_dbg_counter++;
diff --git a/storage/innobase/buf/buf0rea.c b/storage/innobase/buf/buf0rea.c
index fdec0206990..451436ef75b 100644
--- a/storage/innobase/buf/buf0rea.c
+++ b/storage/innobase/buf/buf0rea.c
@@ -299,30 +299,27 @@ buffer buf_pool if it is not already there. Sets the io_fix flag and sets
an exclusive lock on the buffer frame. The flag is cleared and the x-lock
released by the i/o-handler thread. Does a random read-ahead if it seems
sensible. */
-
-ulint
+ibool
buf_read_page(
/*==========*/
- /* out: number of page read requests issued: this can
- be > 1 if read-ahead occurred */
+ /* out: TRUE if success, FALSE otherwise */
ulint space, /* in: space id */
ulint offset) /* in: page number */
{
ib_longlong tablespace_version;
ulint count;
- ulint count2;
ulint err;
tablespace_version = fil_space_get_version(space);
- count = buf_read_ahead_random(space, offset);
+ buf_read_ahead_random(space, offset);
/* We do the i/o in the synchronous aio mode to save thread
switches: hence TRUE */
- count2 = buf_read_page_low(&err, TRUE, BUF_READ_ANY_PAGE, space,
+ count = buf_read_page_low(&err, TRUE, BUF_READ_ANY_PAGE, space,
tablespace_version, offset);
- srv_buf_pool_reads+= count2;
+ srv_buf_pool_reads+= count;
if (err == DB_TABLESPACE_DELETED) {
ut_print_timestamp(stderr);
fprintf(stderr,
@@ -336,7 +333,7 @@ buf_read_page(
/* Flush pages from the end of the LRU list if necessary */
buf_flush_free_margin();
- return(count + count2);
+ return(count > 0);
}
/************************************************************************
diff --git a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc
index 9a31d064d1b..7927d003546 100644
--- a/storage/innobase/handler/ha_innodb.cc
+++ b/storage/innobase/handler/ha_innodb.cc
@@ -2662,9 +2662,9 @@ ha_innobase::innobase_initialize_autoinc()
auto_inc = innobase_get_int_col_max_value(field);
} else {
/* We have no idea what's been passed in to us as the
- autoinc column. We set it to the MAX_INT of our table
- autoinc type. */
- auto_inc = 0xFFFFFFFFFFFFFFFFULL;
+ autoinc column. We set it to the 0, effectively disabling
+ updates to the table. */
+ auto_inc = 0;
ut_print_timestamp(stderr);
fprintf(stderr, " InnoDB: Unable to determine the AUTOINC "
@@ -2673,7 +2673,7 @@ ha_innobase::innobase_initialize_autoinc()
if (srv_force_recovery >= SRV_FORCE_NO_IBUF_MERGE) {
/* If the recovery level is set so high that writes
- are disabled we force the AUTOINC counter to the MAX
+ are disabled we force the AUTOINC counter to 0
value effectively disabling writes to the table.
Secondly, we avoid reading the table in case the read
results in failure due to a corrupted table/index.
@@ -2682,7 +2682,10 @@ ha_innobase::innobase_initialize_autoinc()
tables can be dumped with minimal hassle. If an error
were returned in this case, the first attempt to read
the table would fail and subsequent SELECTs would succeed. */
+ auto_inc = 0;
} else if (field == NULL) {
+ /* This is a far more serious error, best to avoid
+ opening the table and return failure. */
my_error(ER_AUTOINC_READ_FAILED, MYF(0));
} else {
dict_index_t* index;
@@ -2711,7 +2714,7 @@ ha_innobase::innobase_initialize_autoinc()
"InnoDB: Unable to find the AUTOINC column "
"%s in the InnoDB table %s.\n"
"InnoDB: We set the next AUTOINC column "
- "value to the maximum possible value,\n"
+ "value to 0,\n"
"InnoDB: in effect disabling the AUTOINC "
"next value generation.\n"
"InnoDB: You can either set the next "
@@ -2720,7 +2723,13 @@ ha_innobase::innobase_initialize_autoinc()
"recreating the table.\n",
col_name, index->table->name);
- my_error(ER_AUTOINC_READ_FAILED, MYF(0));
+ /* This will disable the AUTOINC generation. */
+ auto_inc = 0;
+
+ /* We want the open to succeed, so that the user can
+ take corrective action. ie. reads should succeed but
+ updates should fail. */
+ err = DB_SUCCESS;
break;
default:
/* row_search_max_autoinc() should only return
@@ -3978,11 +3987,17 @@ no_commit:
prebuilt->autoinc_error = DB_SUCCESS;
if ((error = update_auto_increment())) {
-
/* We don't want to mask autoinc overflow errors. */
- if (prebuilt->autoinc_error != DB_SUCCESS) {
- error = (int) prebuilt->autoinc_error;
+ /* Handle the case where the AUTOINC sub-system
+ failed during initialization. */
+ if (prebuilt->autoinc_error == DB_UNSUPPORTED) {
+ error_result = ER_AUTOINC_READ_FAILED;
+ /* Set the error message to report too. */
+ my_error(ER_AUTOINC_READ_FAILED, MYF(0));
+ goto func_exit;
+ } else if (prebuilt->autoinc_error != DB_SUCCESS) {
+ error = (int) prebuilt->autoinc_error;
goto report_error;
}
@@ -7893,7 +7908,10 @@ ha_innobase::innobase_get_autoinc(
*value = dict_table_autoinc_read(prebuilt->table);
/* It should have been initialized during open. */
- ut_a(*value != 0);
+ if (*value == 0) {
+ prebuilt->autoinc_error = DB_UNSUPPORTED;
+ dict_table_autoinc_unlock(prebuilt->table);
+ }
}
return(ulong(prebuilt->autoinc_error));
@@ -7973,6 +7991,11 @@ ha_innobase::get_auto_increment(
invoking this method. So we are not sure if it's guaranteed to
be 0 or not. */
+ /* We need the upper limit of the col type to check for
+ whether we update the table autoinc counter or not. */
+ ulonglong col_max_value = innobase_get_int_col_max_value(
+ table->next_number_field);
+
/* Called for the first time ? */
if (trx->n_autoinc_rows == 0) {
@@ -7989,6 +8012,11 @@ ha_innobase::get_auto_increment(
/* Not in the middle of a mult-row INSERT. */
} else if (prebuilt->autoinc_last_value == 0) {
set_if_bigger(*first_value, autoinc);
+ /* Check for -ve values. */
+ } else if (*first_value > col_max_value && trx->n_autoinc_rows > 0) {
+ /* Set to next logical value. */
+ ut_a(autoinc > trx->n_autoinc_rows);
+ *first_value = (autoinc - trx->n_autoinc_rows) - 1;
}
*nb_reserved_values = trx->n_autoinc_rows;
@@ -7999,12 +8027,6 @@ ha_innobase::get_auto_increment(
ulonglong need;
ulonglong current;
ulonglong next_value;
- ulonglong col_max_value;
-
- /* We need the upper limit of the col type to check for
- whether we update the table autoinc counter or not. */
- col_max_value = innobase_get_int_col_max_value(
- table->next_number_field);
current = *first_value > col_max_value ? autoinc : *first_value;
need = *nb_reserved_values * increment;
@@ -8477,6 +8499,44 @@ innobase_set_cursor_view(
(cursor_view_t*) curview);
}
+/***********************************************************************
+If col_name is not NULL, check whether the named column is being
+renamed in the table. If col_name is not provided, check
+whether any one of columns in the table is being renamed. */
+static
+bool
+check_column_being_renamed(
+/*=======================*/
+ /* out: true if find the column
+ being renamed */
+ const TABLE* table, /* in: MySQL table */
+ const char* col_name) /* in: name of the column */
+{
+ uint k;
+ Field* field;
+
+ for (k = 0; k < table->s->fields; k++) {
+ field = table->field[k];
+
+ if (field->flags & FIELD_IS_RENAMED) {
+
+ /* If col_name is not provided, return
+ if the field is marked as being renamed. */
+ if (!col_name) {
+ return(true);
+ }
+
+ /* If col_name is provided, return only
+ if names match */
+ if (innobase_strcasecmp(field->field_name,
+ col_name) == 0) {
+ return(true);
+ }
+ }
+ }
+
+ return(false);
+}
/***********************************************************************
Check whether any of the given columns is being renamed in the table. */
@@ -8491,19 +8551,10 @@ column_is_being_renamed(
const char** col_names) /* in: names of the columns */
{
uint j;
- uint k;
- Field* field;
- const char* col_name;
for (j = 0; j < n_cols; j++) {
- col_name = col_names[j];
- for (k = 0; k < table->s->fields; k++) {
- field = table->field[k];
- if ((field->flags & FIELD_IS_RENAMED)
- && innobase_strcasecmp(field->field_name,
- col_name) == 0) {
- return(true);
- }
+ if (check_column_being_renamed(table, col_names[j])) {
+ return(true);
}
}
@@ -8585,6 +8636,15 @@ bool ha_innobase::check_if_incompatible_data(
return COMPATIBLE_DATA_NO;
}
+ /* For column rename operation, MySQL does not supply enough
+ information (new column name etc.) for InnoDB to make appropriate
+ system metadata change. To avoid system metadata inconsistency,
+ currently we can just request a table rebuild/copy by returning
+ COMPATIBLE_DATA_NO */
+ if (check_column_being_renamed(table, NULL)) {
+ return COMPATIBLE_DATA_NO;
+ }
+
/* Check if a column participating in a foreign key is being renamed.
There is no mechanism for updating InnoDB foreign key definitions. */
if (foreign_key_column_is_being_renamed(prebuilt, table)) {
diff --git a/storage/innobase/include/buf0rea.h b/storage/innobase/include/buf0rea.h
index e4620172860..015f30fe6c2 100644
--- a/storage/innobase/include/buf0rea.h
+++ b/storage/innobase/include/buf0rea.h
@@ -18,12 +18,10 @@ buffer buf_pool if it is not already there. Sets the io_fix flag and sets
an exclusive lock on the buffer frame. The flag is cleared and the x-lock
released by the i/o-handler thread. Does a random read-ahead if it seems
sensible. */
-
-ulint
+ibool
buf_read_page(
/*==========*/
- /* out: number of page read requests issued: this can
- be > 1 if read-ahead occurred */
+ /* out: TRUE if success, FALSE otherwise */
ulint space, /* in: space id */
ulint offset);/* in: page number */
/************************************************************************
diff --git a/storage/innobase/lock/lock0lock.c b/storage/innobase/lock/lock0lock.c
index 4cc10931060..8f4b64cda96 100644
--- a/storage/innobase/lock/lock0lock.c
+++ b/storage/innobase/lock/lock0lock.c
@@ -306,6 +306,7 @@ FILE* lock_latest_err_file;
/* Flags for recursive deadlock search */
#define LOCK_VICTIM_IS_START 1
#define LOCK_VICTIM_IS_OTHER 2
+#define LOCK_EXCEED_MAX_DEPTH 3
/************************************************************************
Checks if a lock request results in a deadlock. */
@@ -332,16 +333,18 @@ lock_deadlock_recursive(
was found and we chose some other trx as a
victim: we must do the search again in this
last case because there may be another
- deadlock! */
+ deadlock!
+ LOCK_EXCEED_MAX_DEPTH if the lock search
+ exceeds max steps and/or max depth. */
trx_t* start, /* in: recursion starting point */
trx_t* trx, /* in: a transaction waiting for a lock */
lock_t* wait_lock, /* in: the lock trx is waiting to be granted */
ulint* cost, /* in/out: number of calculation steps thus
far: if this exceeds LOCK_MAX_N_STEPS_...
- we return LOCK_VICTIM_IS_START */
+ we return LOCK_EXCEED_MAX_DEPTH */
ulint depth); /* in: recursion depth: if this exceeds
LOCK_MAX_DEPTH_IN_DEADLOCK_CHECK, we
- return LOCK_VICTIM_IS_START */
+ return LOCK_EXCEED_MAX_DEPTH */
/*************************************************************************
Gets the nth bit of a record lock. */
@@ -3084,8 +3087,6 @@ lock_deadlock_occurs(
lock_t* lock, /* in: lock the transaction is requesting */
trx_t* trx) /* in: transaction */
{
- dict_table_t* table;
- dict_index_t* index;
trx_t* mark_trx;
ulint ret;
ulint cost = 0;
@@ -3107,31 +3108,50 @@ retry:
ret = lock_deadlock_recursive(trx, trx, lock, &cost, 0);
- if (ret == LOCK_VICTIM_IS_OTHER) {
+ switch (ret) {
+ case LOCK_VICTIM_IS_OTHER:
/* We chose some other trx as a victim: retry if there still
is a deadlock */
-
goto retry;
- }
- if (ret == LOCK_VICTIM_IS_START) {
- if (lock_get_type(lock) & LOCK_TABLE) {
- table = lock->un_member.tab_lock.table;
- index = NULL;
+ case LOCK_EXCEED_MAX_DEPTH:
+ /* If the lock search exceeds the max step
+ or the max depth, the current trx will be
+ the victim. Print its information. */
+ rewind(lock_latest_err_file);
+ ut_print_timestamp(lock_latest_err_file);
+
+ fputs("TOO DEEP OR LONG SEARCH IN THE LOCK TABLE"
+ " WAITS-FOR GRAPH, WE WILL ROLL BACK"
+ " FOLLOWING TRANSACTION \n",
+ lock_latest_err_file);
+
+ fputs("\n*** TRANSACTION:\n", lock_latest_err_file);
+ trx_print(lock_latest_err_file, trx, 3000);
+
+ fputs("*** WAITING FOR THIS LOCK TO BE GRANTED:\n",
+ lock_latest_err_file);
+
+ if (lock_get_type(lock) == LOCK_REC) {
+ lock_rec_print(lock_latest_err_file, lock);
} else {
- index = lock->index;
- table = index->table;
+ lock_table_print(lock_latest_err_file, lock);
}
+ break;
- lock_deadlock_found = TRUE;
-
+ case LOCK_VICTIM_IS_START:
fputs("*** WE ROLL BACK TRANSACTION (2)\n",
lock_latest_err_file);
+ break;
- return(TRUE);
+ default:
+ /* No deadlock detected*/
+ return(FALSE);
}
- return(FALSE);
+ lock_deadlock_found = TRUE;
+
+ return(TRUE);
}
/************************************************************************
@@ -3147,16 +3167,18 @@ lock_deadlock_recursive(
was found and we chose some other trx as a
victim: we must do the search again in this
last case because there may be another
- deadlock! */
+ deadlock!
+ LOCK_EXCEED_MAX_DEPTH if the lock search
+ exceeds max steps and/or max depth. */
trx_t* start, /* in: recursion starting point */
trx_t* trx, /* in: a transaction waiting for a lock */
lock_t* wait_lock, /* in: the lock trx is waiting to be granted */
ulint* cost, /* in/out: number of calculation steps thus
far: if this exceeds LOCK_MAX_N_STEPS_...
- we return LOCK_VICTIM_IS_START */
+ we return LOCK_EXCEED_MAX_DEPTH */
ulint depth) /* in: recursion depth: if this exceeds
LOCK_MAX_DEPTH_IN_DEADLOCK_CHECK, we
- return LOCK_VICTIM_IS_START */
+ return LOCK_EXCEED_MAX_DEPTH */
{
lock_t* lock;
ulint bit_no = ULINT_UNDEFINED;
@@ -3215,7 +3237,7 @@ lock_deadlock_recursive(
lock_trx = lock->trx;
- if (lock_trx == start || too_far) {
+ if (lock_trx == start) {
/* We came back to the recursion starting
point: a deadlock detected; or we have
@@ -3262,19 +3284,10 @@ lock_deadlock_recursive(
}
#ifdef UNIV_DEBUG
if (lock_print_waits) {
- fputs("Deadlock detected"
- " or too long search\n",
+ fputs("Deadlock detected\n",
stderr);
}
#endif /* UNIV_DEBUG */
- if (too_far) {
-
- fputs("TOO DEEP OR LONG SEARCH"
- " IN THE LOCK TABLE"
- " WAITS-FOR GRAPH\n", ef);
-
- return(LOCK_VICTIM_IS_START);
- }
if (trx_weight_cmp(wait_lock->trx,
start) >= 0) {
@@ -3310,6 +3323,21 @@ lock_deadlock_recursive(
return(LOCK_VICTIM_IS_OTHER);
}
+ if (too_far) {
+
+#ifdef UNIV_DEBUG
+ if (lock_print_waits) {
+ fputs("Deadlock search exceeds"
+ " max steps or depth.\n",
+ stderr);
+ }
+#endif /* UNIV_DEBUG */
+ /* The information about transaction/lock
+ to be rolled back is available in the top
+ level. Do not print anything here. */
+ return(LOCK_EXCEED_MAX_DEPTH);
+ }
+
if (lock_trx->que_state == TRX_QUE_LOCK_WAIT) {
/* Another trx ahead has requested lock in an
diff --git a/storage/innobase/os/os0file.c b/storage/innobase/os/os0file.c
index 085f62daacc..7373a97cfb0 100644
--- a/storage/innobase/os/os0file.c
+++ b/storage/innobase/os/os0file.c
@@ -759,7 +759,15 @@ next_file:
#ifdef HAVE_READDIR_R
ret = readdir_r(dir, (struct dirent*)dirent_buf, &ent);
- if (ret != 0) {
+ if (ret != 0
+#ifdef UNIV_AIX
+ /* On AIX, only if we got non-NULL 'ent' (result) value and
+ a non-zero 'ret' (return) value, it indicates a failed
+ readdir_r() call. An NULL 'ent' with an non-zero 'ret'
+ would indicate the "end of the directory" is reached. */
+ && ent != NULL
+#endif
+ ) {
fprintf(stderr,
"InnoDB: cannot read directory %s, error %lu\n",
dirname, (ulong)ret);
diff --git a/storage/innobase/plug.in.disabled b/storage/innobase/plug.in.disabled
index f7d2abed751..de7302dd6cc 100644
--- a/storage/innobase/plug.in.disabled
+++ b/storage/innobase/plug.in.disabled
@@ -1,4 +1,4 @@
-MYSQL_STORAGE_ENGINE(innobase, innodb, [InnoDB Storage Engine],
+MYSQL_STORAGE_ENGINE(innobase, innodb, [InnoDB Storage Engine],
[Transactional Tables using InnoDB], [max,max-no-ndb])
MYSQL_PLUGIN_DIRECTORY(innobase, [storage/innobase])
MYSQL_PLUGIN_STATIC(innobase, [libinnobase.a])
diff --git a/storage/innobase/row/row0sel.c b/storage/innobase/row/row0sel.c
index 38902bca905..eee74f280af 100644
--- a/storage/innobase/row/row0sel.c
+++ b/storage/innobase/row/row0sel.c
@@ -4545,12 +4545,12 @@ row_search_autoinc_read_column(
case DATA_FLOAT:
ut_a(len == sizeof(float));
- value = mach_float_read(data);
+ value = (ib_ulonglong) mach_float_read(data);
break;
case DATA_DOUBLE:
ut_a(len == sizeof(double));
- value = mach_double_read(data);
+ value = (ib_ulonglong) mach_double_read(data);
break;
default:
diff --git a/storage/innobase/trx/trx0sys.c b/storage/innobase/trx/trx0sys.c
index 19c5159e15f..f732aca93f5 100644
--- a/storage/innobase/trx/trx0sys.c
+++ b/storage/innobase/trx/trx0sys.c
@@ -507,8 +507,8 @@ trx_sys_doublewrite_init_or_restore_pages(
" recover the database"
" with the my.cnf\n"
"InnoDB: option:\n"
- "InnoDB: set-variable="
- "innodb_force_recovery=6\n");
+ "InnoDB:"
+ " innodb_force_recovery=6\n");
exit(1);
}
diff --git a/storage/innodb_plugin/CMakeLists.txt b/storage/innodb_plugin/CMakeLists.txt
index 36e157ae736..16d86d0b378 100644
--- a/storage/innodb_plugin/CMakeLists.txt
+++ b/storage/innodb_plugin/CMakeLists.txt
@@ -78,9 +78,12 @@ SET(INNODB_PLUGIN_SOURCES btr/btr0btr.c btr/btr0cur.c btr/btr0pcur.c btr/btr0sea
trx/trx0i_s.c trx/trx0purge.c trx/trx0rec.c trx/trx0roll.c trx/trx0rseg.c
trx/trx0sys.c trx/trx0trx.c trx/trx0undo.c
usr/usr0sess.c
- ut/ut0byte.c ut/ut0dbg.c ut/ut0mem.c ut/ut0rnd.c ut/ut0ut.c ut/ut0vec.c
+ ut/ut0byte.c ut/ut0dbg.c ut/ut0mem.c ut/ut0rbt.c ut/ut0rnd.c ut/ut0ut.c ut/ut0vec.c
ut/ut0list.c ut/ut0wqueue.c)
-ADD_DEFINITIONS(-DHAVE_WINDOWS_ATOMICS -DIB_HAVE_PAUSE_INSTRUCTION)
+# Windows atomics do not perform well. Disable Windows atomics by default.
+# See bug#52102 for details.
+#ADD_DEFINITIONS(-DHAVE_WINDOWS_ATOMICS -DINNODB_RW_LOCKS_USE_ATOMICS -DHAVE_IB_PAUSE_INSTRUCTION)
+ADD_DEFINITIONS(-DHAVE_IB_PAUSE_INSTRUCTION)
#Disable storage engine, as we are using XtraDB
#MYSQL_STORAGE_ENGINE(INNODB_PLUGIN)
diff --git a/storage/innodb_plugin/ChangeLog b/storage/innodb_plugin/ChangeLog
index 1a6e07fd147..58e476716d8 100644
--- a/storage/innodb_plugin/ChangeLog
+++ b/storage/innodb_plugin/ChangeLog
@@ -1,3 +1,188 @@
+2010-03-18 The InnoDB Team
+
+ * CMakeLists.txt:
+ Fix Bug#52102 InnoDB Plugin shows performance drop compared to
+ InnoDB (Windows)
+
+2010-03-18 The InnoDB Team
+
+ * buf0buf.ic:
+ When comparing the time of the first access to a block against
+ innodb_old_blocks_time, use 32-bit arithmetics. The comparison was
+ incorrect on 64-bit systems.
+
+2010-03-11 The InnoDB Team
+
+ * buf0buf.h, buf0buf.ic:
+ Fix and clarify the latching of some buf_block_t members.
+ Note that check_index_page_at_flush is not protected by any mutex.
+ Note and assert that lock_hash_val is protected by the rw-latch.
+
+2010-03-10 The InnoDB Team
+
+ * trx/trx0sys.c:
+ Fix Bug#51653 outdated reference to set-variable
+
+2010-03-10 The InnoDB Team
+
+ * handler/ha_innodb.cc, mysql-test/innodb_bug21704.result,
+ mysql-test/innodb_bug47621.result, mysql-test/innodb_bug47621.test:
+ Fix Bug#47621 MySQL and InnoDB data dictionaries will become out of
+ sync when renaming columns
+
+2010-03-10 The InnoDB Team
+
+ * handler/ha_innodb.cc:
+ Fix Bug#51356 Many Valgrind errors in error messages
+ with concurrent DDL
+
+2010-03-10 The InnoDB Team
+
+ * handler/ha_innodb.cc, handler/handler0alter.cc,
+ mysql-test/innodb_bug51378.result, mysql-test/innodb_bug51378.test:
+ Fix Bug#51378 Init 'ref_length' to correct value, in case an out
+ of bound MySQL primary_key
+
+2010-03-10 The InnoDB Team
+
+ * log/log0recv.c:
+ Remove a bogus assertion about page numbers exceeding 0x90000000
+ in the redo log. Abort when encountering a corrupted redo log
+ record, unless innodb_force_recovery is set.
+
+2010-03-09 The InnoDB Team
+
+ * handler/ha_innodb.cc:
+ Make SHOW ENGINE INNODB MUTEX STATUS display SUM(os_waits)
+ for the buffer pool block mutexes and locks.
+
+2010-03-08 The InnoDB Team
+
+ * fil/fil0fil.c:
+ Fix ALTER TABLE ... IMPORT TABLESPACE of compressed tables.
+
+2010-03-03 The InnoDB Team
+
+ * handler/handler0alter.cc, innodb-index.result, innodb-index.test,
+ innodb.result, innodb.test:
+ Disallow a duplicate index name when creating an index.
+
+2010-02-11 The InnoDB Team
+
+ * include/mem0mem.h, include/mem0mem.ic, mem/mem0mem.c:
+ Fix Bug#49535 Available memory check slows down crash
+ recovery tens of times
+
+2010-02-09 The InnoDB Team
+
+ * buf/buf0buf.c:
+ Fix Bug#38901 InnoDB logs error repeatedly when trying to load
+ page into buffer pool
+
+2010-02-09 The InnoDB Team
+
+ * srv/srv0srv.c:
+ Let the master thread sleep if the amount of work to be done is
+ calibrated as taking less than a second.
+
+2010-02-04 The InnoDB Team
+
+ * btr/btr0btr.c, btr/btr0cur.c, btr/btr0pcur.c, buf/buf0buf.c,
+ include/btr0btr.h, include/btr0cur.h, include/btr0pcur.h,
+ include/btr0pcur.ic, include/buf0buf.h, row/row0ins.c, row/row0sel.c:
+ Pass the file name and line number of the caller of the
+ b-tree cursor functions to the buffer pool requests, in order
+ to make the latch diagnostics more accurate.
+
+2010-02-03 The InnoDB Team
+
+ * lock/lock0lock.c:
+ Fix Bug#49001 SHOW INNODB STATUS deadlock info incorrect
+ when deadlock detection aborts
+
+2010-02-03 The InnoDB Team
+
+ * buf/buf0lru.c:
+ Fix Bug#35077 Very slow DROP TABLE (ALTER TABLE, OPTIMIZE TABLE)
+ on compressed tables
+
+2010-02-03 The InnoDB Team
+
+ * handler/ha_innodb.cc, include/row0mysql.h, row/row0mysql.c:
+ Clean up CHECK TABLE error handling.
+
+2010-02-01 The InnoDB Team
+
+ * handler/ha_innodb.cc, mysql-test/innodb-autoinc.test,
+ mysql-test/innodb-autoinc.result,
+ mysql-test/innodb-autoinc-44030.test,
+ mysql-test/innodb-autoinc-44030.result:
+ Fix Bug#49497 Error 1467 (ER_AUTOINC_READ_FAILED) on inserting
+ a negative value
+
+2010-01-27 The InnoDB Team
+
+ * include/row0mysql.h, log/log0recv.c, row/row0mysql.c:
+ Drop temporary tables at startup.
+ This addresses the third aspect of
+ Bug#41609 Crash recovery does not work for InnoDB temporary tables.
+
+2010-01-21 The InnoDB Team
+
+ * buf/buf0buf.c:
+ Do not merge buffered inserts to compressed pages before
+ the redo log has been applied in crash recovery.
+
+2010-01-13 The InnoDB Team
+
+ * row/row0sel.c:
+ On the READ UNCOMMITTED isolation level, do not attempt to access
+ a clustered index record that has been marked for deletion. The
+ built-in InnoDB in MySQL 5.1 and earlier would attempt to retrieve
+ a previous version of the record in this case.
+
+2010-01-13 The InnoDB Team
+
+ * buf/buf0buf.c:
+ When disabling the adaptive hash index, check the block state
+ before checking block->is_hashed, because the latter may be
+ uninitialized right after server startup.
+
+2010-01-12 The InnoDB Team
+
+ * handler/ha_innodb.cc, handler/ha_innodb.h:
+ Fix Bug#46193 crash when accessing tables after enabling
+ innodb_force_recovery option
+
+2010-01-12 The InnoDB Team
+
+ * row/row0mysql.c:
+ Fix Bug#49238 Creating/Dropping a temporary table while at 1023
+ transactions will cause assert.
+
+2009-12-02 The InnoDB Team
+
+ * srv/srv0start.c:
+ Display the zlib version number at startup.
+ InnoDB compressed tables use zlib, and the implementation depends
+ on the zlib function compressBound(), whose definition was slightly
+ changed in zlib version 1.2.3.1 in 2006. MySQL bundles zlib 1.2.3
+ from 2005, but some installations use a more recent zlib.
+
+2009-11-30 The InnoDB Team
+
+ * dict/dict0crea.c, dict/dict0mem.c, dict/dict0load.c,
+ dict/dict0boot.c, fil/fil0fil.c, handler/ha_innodb.cc,
+ include/dict0mem.h, row/row0mysql.c:
+ Fix the bogus warning messages for non-existing temporary
+ tables that were reported in
+ Bug#41609 Crash recovery does not work for InnoDB temporary tables.
+ The actual crash recovery bug was corrected on 2009-04-29.
+
+2009-11-27 The InnoDB Team
+
+ InnoDB Plugin 1.0.6 released
+
2009-11-20 The InnoDB Team
* handler/ha_innodb.cc:
@@ -79,8 +264,8 @@
sync/sync0arr.c, sync/sync0sync.c, thr/thr0loc.c, trx/trx0i_s.c,
trx/trx0purge.c, trx/trx0rseg.c, trx/trx0sys.c, trx/trx0undo.c,
usr/usr0sess.c, ut/ut0mem.c:
- Fix Bug #45992 innodb memory not freed after shutdown
- Fix Bug #46656 InnoDB plugin: memory leaks (Valgrind)
+ Fix Bug#45992 innodb memory not freed after shutdown
+ Fix Bug#46656 InnoDB plugin: memory leaks (Valgrind)
2009-10-29 The InnoDB Team
@@ -422,7 +607,7 @@
* dict/dict0dict.c:
When an index column cannot be found in the table during index
creation, display additional diagnostic before an assertion failure.
- This does NOT fix Bug #44571 InnoDB Plugin crashes on ADD INDEX,
+ This does NOT fix Bug#44571 InnoDB Plugin crashes on ADD INDEX,
but it helps understand the reason of the crash.
2009-06-17 The InnoDB Team
@@ -535,6 +720,12 @@
Fix Bug#44320 InnoDB: missing DB_ROLL_PTR in Table Monitor COLUMNS
output
+2009-04-29 The InnoDB Team
+
+ * fil/fil0fil.c, include/fil0fil.h, include/mtr0mtr.h,
+ log/log0recv.c:
+ Fix Bug#41609 Crash recovery does not work for InnoDB temporary tables
+
2009-04-23 The InnoDB Team
* row/row0mysql.c:
diff --git a/storage/innodb_plugin/Makefile.am b/storage/innodb_plugin/Makefile.am
index 5c71fe18d14..1d0dd936895 100644
--- a/storage/innodb_plugin/Makefile.am
+++ b/storage/innodb_plugin/Makefile.am
@@ -217,6 +217,7 @@ noinst_HEADERS= \
include/ut0lst.h \
include/ut0mem.h \
include/ut0mem.ic \
+ include/ut0rbt.h \
include/ut0rnd.h \
include/ut0rnd.ic \
include/ut0sort.h \
@@ -318,6 +319,7 @@ libinnobase_a_SOURCES= \
ut/ut0dbg.c \
ut/ut0list.c \
ut/ut0mem.c \
+ ut/ut0rbt.c \
ut/ut0rnd.c \
ut/ut0ut.c \
ut/ut0vec.c \
diff --git a/storage/innodb_plugin/btr/btr0btr.c b/storage/innodb_plugin/btr/btr0btr.c
index 086b3a0a599..fa4fc05d96b 100644
--- a/storage/innodb_plugin/btr/btr0btr.c
+++ b/storage/innodb_plugin/btr/btr0btr.c
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 1994, 2010, Innobase Oy. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -592,13 +592,15 @@ an x-latch on the tree.
@return rec_get_offsets() of the node pointer record */
static
ulint*
-btr_page_get_father_node_ptr(
-/*=========================*/
+btr_page_get_father_node_ptr_func(
+/*==============================*/
ulint* offsets,/*!< in: work area for the return value */
mem_heap_t* heap, /*!< in: memory heap to use */
btr_cur_t* cursor, /*!< in: cursor pointing to user record,
out: cursor on node pointer record,
its page x-latched */
+ const char* file, /*!< in: file name */
+ ulint line, /*!< in: line where called */
mtr_t* mtr) /*!< in: mtr */
{
dtuple_t* tuple;
@@ -622,7 +624,8 @@ btr_page_get_father_node_ptr(
tuple = dict_index_build_node_ptr(index, user_rec, 0, heap, level);
btr_cur_search_to_nth_level(index, level + 1, tuple, PAGE_CUR_LE,
- BTR_CONT_MODIFY_TREE, cursor, 0, mtr);
+ BTR_CONT_MODIFY_TREE, cursor, 0,
+ file, line, mtr);
node_ptr = btr_cur_get_rec(cursor);
ut_ad(!page_rec_is_comp(node_ptr)
@@ -670,6 +673,9 @@ btr_page_get_father_node_ptr(
return(offsets);
}
+#define btr_page_get_father_node_ptr(of,heap,cur,mtr) \
+ btr_page_get_father_node_ptr_func(of,heap,cur,__FILE__,__LINE__,mtr)
+
/************************************************************//**
Returns the upper level node pointer to a page. It is assumed that mtr holds
an x-latch on the tree.
@@ -1662,11 +1668,13 @@ Inserts a data tuple to a tree on a non-leaf level. It is assumed
that mtr holds an x-latch on the tree. */
UNIV_INTERN
void
-btr_insert_on_non_leaf_level(
-/*=========================*/
+btr_insert_on_non_leaf_level_func(
+/*==============================*/
dict_index_t* index, /*!< in: index */
ulint level, /*!< in: level, must be > 0 */
dtuple_t* tuple, /*!< in: the record to be inserted */
+ const char* file, /*!< in: file name */
+ ulint line, /*!< in: line where called */
mtr_t* mtr) /*!< in: mtr */
{
big_rec_t* dummy_big_rec;
@@ -1678,7 +1686,7 @@ btr_insert_on_non_leaf_level(
btr_cur_search_to_nth_level(index, level, tuple, PAGE_CUR_LE,
BTR_CONT_MODIFY_TREE,
- &cursor, 0, mtr);
+ &cursor, 0, file, line, mtr);
err = btr_cur_pessimistic_insert(BTR_NO_LOCKING_FLAG
| BTR_KEEP_SYS_FLAG
diff --git a/storage/innodb_plugin/btr/btr0cur.c b/storage/innodb_plugin/btr/btr0cur.c
index 46dfb5d1a46..504bd162f32 100644
--- a/storage/innodb_plugin/btr/btr0cur.c
+++ b/storage/innodb_plugin/btr/btr0cur.c
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 1994, 2010, Innobase Oy. All Rights Reserved.
Copyright (c) 2008, Google Inc.
Portions of this file contain modifications contributed and copyrighted by
@@ -342,6 +342,8 @@ btr_cur_search_to_nth_level(
ulint has_search_latch,/*!< in: info on the latch mode the
caller currently has on btr_search_latch:
RW_S_LATCH, or 0 */
+ const char* file, /*!< in: file name */
+ ulint line, /*!< in: line where called */
mtr_t* mtr) /*!< in: mtr */
{
page_cur_t* page_cursor;
@@ -520,7 +522,7 @@ btr_cur_search_to_nth_level(
retry_page_get:
block = buf_page_get_gen(space, zip_size, page_no,
rw_latch, guess, buf_mode,
- __FILE__, __LINE__, mtr);
+ file, line, mtr);
if (block == NULL) {
/* This must be a search to perform an insert;
try insert to the insert buffer */
@@ -677,13 +679,15 @@ func_exit:
Opens a cursor at either end of an index. */
UNIV_INTERN
void
-btr_cur_open_at_index_side(
-/*=======================*/
+btr_cur_open_at_index_side_func(
+/*============================*/
ibool from_left, /*!< in: TRUE if open to the low end,
FALSE if to the high end */
dict_index_t* index, /*!< in: index */
ulint latch_mode, /*!< in: latch mode */
btr_cur_t* cursor, /*!< in: cursor */
+ const char* file, /*!< in: file name */
+ ulint line, /*!< in: line where called */
mtr_t* mtr) /*!< in: mtr */
{
page_cur_t* page_cursor;
@@ -728,7 +732,7 @@ btr_cur_open_at_index_side(
page_t* page;
block = buf_page_get_gen(space, zip_size, page_no,
RW_NO_LATCH, NULL, BUF_GET,
- __FILE__, __LINE__, mtr);
+ file, line, mtr);
page = buf_block_get_frame(block);
ut_ad(0 == ut_dulint_cmp(index->id,
btr_page_get_index_id(page)));
@@ -808,11 +812,13 @@ btr_cur_open_at_index_side(
Positions a cursor at a randomly chosen position within a B-tree. */
UNIV_INTERN
void
-btr_cur_open_at_rnd_pos(
-/*====================*/
+btr_cur_open_at_rnd_pos_func(
+/*=========================*/
dict_index_t* index, /*!< in: index */
ulint latch_mode, /*!< in: BTR_SEARCH_LEAF, ... */
btr_cur_t* cursor, /*!< in/out: B-tree cursor */
+ const char* file, /*!< in: file name */
+ ulint line, /*!< in: line where called */
mtr_t* mtr) /*!< in: mtr */
{
page_cur_t* page_cursor;
@@ -847,7 +853,7 @@ btr_cur_open_at_rnd_pos(
block = buf_page_get_gen(space, zip_size, page_no,
RW_NO_LATCH, NULL, BUF_GET,
- __FILE__, __LINE__, mtr);
+ file, line, mtr);
page = buf_block_get_frame(block);
ut_ad(0 == ut_dulint_cmp(index->id,
btr_page_get_index_id(page)));
@@ -3100,7 +3106,8 @@ btr_estimate_n_rows_in_range(
btr_cur_search_to_nth_level(index, 0, tuple1, mode1,
BTR_SEARCH_LEAF | BTR_ESTIMATE,
- &cursor, 0, &mtr);
+ &cursor, 0,
+ __FILE__, __LINE__, &mtr);
} else {
btr_cur_open_at_index_side(TRUE, index,
BTR_SEARCH_LEAF | BTR_ESTIMATE,
@@ -3117,7 +3124,8 @@ btr_estimate_n_rows_in_range(
btr_cur_search_to_nth_level(index, 0, tuple2, mode2,
BTR_SEARCH_LEAF | BTR_ESTIMATE,
- &cursor, 0, &mtr);
+ &cursor, 0,
+ __FILE__, __LINE__, &mtr);
} else {
btr_cur_open_at_index_side(FALSE, index,
BTR_SEARCH_LEAF | BTR_ESTIMATE,
@@ -4252,7 +4260,7 @@ btr_free_externally_stored_field(
/* In the rollback of uncommitted transactions, we may
encounter a clustered index record whose BLOBs have
not been written. There is nothing to free then. */
- ut_a(rb_ctx == RB_RECOVERY);
+ ut_a(rb_ctx == RB_RECOVERY || rb_ctx == RB_RECOVERY_PURGE_REC);
return;
}
@@ -4298,7 +4306,7 @@ btr_free_externally_stored_field(
|| (mach_read_from_1(field_ref + BTR_EXTERN_LEN)
& BTR_EXTERN_OWNER_FLAG)
/* Rollback and inherited field */
- || (rb_ctx != RB_NONE
+ || ((rb_ctx == RB_NORMAL || rb_ctx == RB_RECOVERY)
&& (mach_read_from_1(field_ref + BTR_EXTERN_LEN)
& BTR_EXTERN_INHERITED_FLAG))) {
diff --git a/storage/innodb_plugin/btr/btr0pcur.c b/storage/innodb_plugin/btr/btr0pcur.c
index ec98692c35b..658901208ef 100644
--- a/storage/innodb_plugin/btr/btr0pcur.c
+++ b/storage/innodb_plugin/btr/btr0pcur.c
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 1996, 2010, Innobase Oy. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -205,10 +205,12 @@ record and it can be restored on a user record whose ordering fields
are identical to the ones of the original user record */
UNIV_INTERN
ibool
-btr_pcur_restore_position(
-/*======================*/
+btr_pcur_restore_position_func(
+/*===========================*/
ulint latch_mode, /*!< in: BTR_SEARCH_LEAF, ... */
btr_pcur_t* cursor, /*!< in: detached persistent cursor */
+ const char* file, /*!< in: file name */
+ ulint line, /*!< in: line where called */
mtr_t* mtr) /*!< in: mtr */
{
dict_index_t* index;
@@ -217,6 +219,9 @@ btr_pcur_restore_position(
ulint old_mode;
mem_heap_t* heap;
+ ut_ad(mtr);
+ ut_ad(mtr->state == MTR_ACTIVE);
+
index = btr_cur_get_index(btr_pcur_get_btr_cur(cursor));
if (UNIV_UNLIKELY(cursor->old_stored != BTR_PCUR_OLD_STORED)
@@ -257,7 +262,8 @@ btr_pcur_restore_position(
if (UNIV_LIKELY(buf_page_optimistic_get(
latch_mode,
cursor->block_when_stored,
- cursor->modify_clock, mtr))) {
+ cursor->modify_clock,
+ file, line, mtr))) {
cursor->pos_state = BTR_PCUR_IS_POSITIONED;
buf_block_dbg_add_level(btr_pcur_get_block(cursor),
@@ -312,8 +318,8 @@ btr_pcur_restore_position(
mode = PAGE_CUR_L;
}
- btr_pcur_open_with_no_init(index, tuple, mode, latch_mode,
- cursor, 0, mtr);
+ btr_pcur_open_with_no_init_func(index, tuple, mode, latch_mode,
+ cursor, 0, file, line, mtr);
/* Restore the old search mode */
cursor->search_mode = old_mode;
@@ -553,8 +559,8 @@ before first in tree. The latching mode must be BTR_SEARCH_LEAF or
BTR_MODIFY_LEAF. */
UNIV_INTERN
void
-btr_pcur_open_on_user_rec(
-/*======================*/
+btr_pcur_open_on_user_rec_func(
+/*===========================*/
dict_index_t* index, /*!< in: index */
const dtuple_t* tuple, /*!< in: tuple on which search done */
ulint mode, /*!< in: PAGE_CUR_L, ... */
@@ -562,9 +568,12 @@ btr_pcur_open_on_user_rec(
BTR_MODIFY_LEAF */
btr_pcur_t* cursor, /*!< in: memory buffer for persistent
cursor */
+ const char* file, /*!< in: file name */
+ ulint line, /*!< in: line where called */
mtr_t* mtr) /*!< in: mtr */
{
- btr_pcur_open(index, tuple, mode, latch_mode, cursor, mtr);
+ btr_pcur_open_func(index, tuple, mode, latch_mode, cursor,
+ file, line, mtr);
if ((mode == PAGE_CUR_GE) || (mode == PAGE_CUR_G)) {
diff --git a/storage/innodb_plugin/buf/buf0buddy.c b/storage/innodb_plugin/buf/buf0buddy.c
index f0e1395c307..66d802f8a36 100644
--- a/storage/innodb_plugin/buf/buf0buddy.c
+++ b/storage/innodb_plugin/buf/buf0buddy.c
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 2006, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 2006, 2010, Innobase Oy. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -391,6 +391,8 @@ buf_buddy_relocate_block(
UT_LIST_ADD_FIRST(list, buf_pool->zip_clean, dpage);
}
+ UNIV_MEM_INVALID(bpage, sizeof *bpage);
+
mutex_exit(&buf_pool_zip_mutex);
return(TRUE);
}
diff --git a/storage/innodb_plugin/buf/buf0buf.c b/storage/innodb_plugin/buf/buf0buf.c
index 111d396fbc5..d4a88565570 100644
--- a/storage/innodb_plugin/buf/buf0buf.c
+++ b/storage/innodb_plugin/buf/buf0buf.c
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 1995, 2010, Innobase Oy. All Rights Reserved.
Copyright (c) 2008, Google Inc.
Portions of this file contain modifications contributed and copyrighted by
@@ -242,6 +242,8 @@ the read requests for the whole area.
#ifndef UNIV_HOTBACKUP
/** Value in microseconds */
static const int WAIT_FOR_READ = 5000;
+/** Number of attemtps made to read in a page in the buffer pool */
+static const ulint BUF_PAGE_READ_MAX_RETRIES = 100;
/** The buffer buf_pool of the database */
UNIV_INTERN buf_pool_t* buf_pool = NULL;
@@ -1058,7 +1060,9 @@ buf_pool_drop_hash_index(void)
when we have an x-latch on btr_search_latch;
see the comment in buf0buf.h */
- if (!block->is_hashed) {
+ if (buf_block_get_state(block)
+ != BUF_BLOCK_FILE_PAGE
+ || !block->is_hashed) {
continue;
}
@@ -1187,8 +1191,6 @@ buf_relocate(
HASH_DELETE(buf_page_t, hash, buf_pool->page_hash, fold, bpage);
HASH_INSERT(buf_page_t, hash, buf_pool->page_hash, fold, dpage);
-
- UNIV_MEM_INVALID(bpage, sizeof *bpage);
}
/********************************************************************//**
@@ -2034,8 +2036,10 @@ buf_page_get_gen(
unsigned access_time;
ulint fix_type;
ibool must_read;
+ ulint retries = 0;
ut_ad(mtr);
+ ut_ad(mtr->state == MTR_ACTIVE);
ut_ad((rw_latch == RW_S_LATCH)
|| (rw_latch == RW_X_LATCH)
|| (rw_latch == RW_NO_LATCH));
@@ -2088,7 +2092,29 @@ loop2:
return(NULL);
}
- buf_read_page(space, zip_size, offset);
+ if (buf_read_page(space, zip_size, offset)) {
+ retries = 0;
+ } else if (retries < BUF_PAGE_READ_MAX_RETRIES) {
+ ++retries;
+ } else {
+ fprintf(stderr, "InnoDB: Error: Unable"
+ " to read tablespace %lu page no"
+ " %lu into the buffer pool after"
+ " %lu attempts\n"
+ "InnoDB: The most probable cause"
+ " of this error may be that the"
+ " table has been corrupted.\n"
+ "InnoDB: You can try to fix this"
+ " problem by using"
+ " innodb_force_recovery.\n"
+ "InnoDB: Please see reference manual"
+ " for more details.\n"
+ "InnoDB: Aborting...\n",
+ space, offset,
+ BUF_PAGE_READ_MAX_RETRIES);
+
+ ut_error;
+ }
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
ut_a(++buf_dbg_counter % 37 || buf_validate());
@@ -2196,22 +2222,8 @@ wait_until_unfixed:
ut_ad(!block->page.in_flush_list);
} else {
/* Relocate buf_pool->flush_list. */
- buf_page_t* b;
-
- b = UT_LIST_GET_PREV(list, &block->page);
- ut_ad(block->page.in_flush_list);
- UT_LIST_REMOVE(list, buf_pool->flush_list,
- &block->page);
-
- if (b) {
- UT_LIST_INSERT_AFTER(
- list, buf_pool->flush_list, b,
- &block->page);
- } else {
- UT_LIST_ADD_FIRST(
- list, buf_pool->flush_list,
- &block->page);
- }
+ buf_flush_relocate_on_flush_list(bpage,
+ &block->page);
}
/* Buffer-fix, I/O-fix, and X-latch the block
@@ -2225,6 +2237,9 @@ wait_until_unfixed:
block->page.buf_fix_count = 1;
buf_block_set_io_fix(block, BUF_IO_READ);
rw_lock_x_lock(&block->lock);
+
+ UNIV_MEM_INVALID(bpage, sizeof *bpage);
+
mutex_exit(&block->mutex);
mutex_exit(&buf_pool_zip_mutex);
buf_pool->n_pend_unzip++;
@@ -2237,7 +2252,7 @@ wait_until_unfixed:
while not holding buf_pool_mutex or block->mutex. */
success = buf_zip_decompress(block, srv_use_checksums);
- if (UNIV_LIKELY(success)) {
+ if (UNIV_LIKELY(success && !recv_no_ibuf_operations)) {
ibuf_merge_or_delete_for_page(block, space, offset,
zip_size, TRUE);
}
@@ -2356,8 +2371,8 @@ page.
@return TRUE if success */
UNIV_INTERN
ibool
-buf_page_optimistic_get_func(
-/*=========================*/
+buf_page_optimistic_get(
+/*====================*/
ulint rw_latch,/*!< in: RW_S_LATCH, RW_X_LATCH */
buf_block_t* block, /*!< in: guessed buffer block */
ib_uint64_t modify_clock,/*!< in: modify clock value if mode is
@@ -2370,7 +2385,9 @@ buf_page_optimistic_get_func(
ibool success;
ulint fix_type;
- ut_ad(mtr && block);
+ ut_ad(block);
+ ut_ad(mtr);
+ ut_ad(mtr->state == MTR_ACTIVE);
ut_ad((rw_latch == RW_S_LATCH) || (rw_latch == RW_X_LATCH));
mutex_enter(&block->mutex);
@@ -2482,6 +2499,7 @@ buf_page_get_known_nowait(
ulint fix_type;
ut_ad(mtr);
+ ut_ad(mtr->state == MTR_ACTIVE);
ut_ad((rw_latch == RW_S_LATCH) || (rw_latch == RW_X_LATCH));
mutex_enter(&block->mutex);
@@ -2581,6 +2599,9 @@ buf_page_try_get_func(
ibool success;
ulint fix_type;
+ ut_ad(mtr);
+ ut_ad(mtr->state == MTR_ACTIVE);
+
buf_pool_mutex_enter();
block = buf_block_hash_get(space_id, page_no);
@@ -2954,6 +2975,7 @@ buf_page_create(
ulint time_ms = ut_time_ms();
ut_ad(mtr);
+ ut_ad(mtr->state == MTR_ACTIVE);
ut_ad(space || !zip_size);
free_block = buf_LRU_get_free_block(0);
diff --git a/storage/innodb_plugin/buf/buf0flu.c b/storage/innodb_plugin/buf/buf0flu.c
index 8b614ce90e5..ac62a7e9537 100644
--- a/storage/innodb_plugin/buf/buf0flu.c
+++ b/storage/innodb_plugin/buf/buf0flu.c
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 1995, 2010, Innobase Oy. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -88,6 +88,138 @@ buf_flush_validate_low(void);
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
/********************************************************************//**
+Insert a block in the flush_rbt and returns a pointer to its
+predecessor or NULL if no predecessor. The ordering is maintained
+on the basis of the <oldest_modification, space, offset> key.
+@return pointer to the predecessor or NULL if no predecessor. */
+static
+buf_page_t*
+buf_flush_insert_in_flush_rbt(
+/*==========================*/
+ buf_page_t* bpage) /*!< in: bpage to be inserted. */
+{
+ buf_page_t* prev = NULL;
+ const ib_rbt_node_t* c_node;
+ const ib_rbt_node_t* p_node;
+
+ ut_ad(buf_pool_mutex_own());
+
+ /* Insert this buffer into the rbt. */
+ c_node = rbt_insert(buf_pool->flush_rbt, &bpage, &bpage);
+ ut_a(c_node != NULL);
+
+ /* Get the predecessor. */
+ p_node = rbt_prev(buf_pool->flush_rbt, c_node);
+
+ if (p_node != NULL) {
+ prev = *rbt_value(buf_page_t*, p_node);
+ ut_a(prev != NULL);
+ }
+
+ return(prev);
+}
+
+/********************************************************************//**
+Delete a bpage from the flush_rbt. */
+static
+void
+buf_flush_delete_from_flush_rbt(
+/*============================*/
+ buf_page_t* bpage) /*!< in: bpage to be removed. */
+{
+
+ ibool ret = FALSE;
+
+ ut_ad(buf_pool_mutex_own());
+ ret = rbt_delete(buf_pool->flush_rbt, &bpage);
+ ut_ad(ret);
+}
+
+/********************************************************************//**
+Compare two modified blocks in the buffer pool. The key for comparison
+is:
+key = <oldest_modification, space, offset>
+This comparison is used to maintian ordering of blocks in the
+buf_pool->flush_rbt.
+Note that for the purpose of flush_rbt, we only need to order blocks
+on the oldest_modification. The other two fields are used to uniquely
+identify the blocks.
+@return < 0 if b2 < b1, 0 if b2 == b1, > 0 if b2 > b1 */
+static
+int
+buf_flush_block_cmp(
+/*================*/
+ const void* p1, /*!< in: block1 */
+ const void* p2) /*!< in: block2 */
+{
+ int ret;
+
+ ut_ad(p1 != NULL);
+ ut_ad(p2 != NULL);
+
+ const buf_page_t* b1 = *(const buf_page_t**) p1;
+ const buf_page_t* b2 = *(const buf_page_t**) p2;
+
+ ut_ad(b1 != NULL);
+ ut_ad(b2 != NULL);
+
+ ut_ad(b1->in_flush_list);
+ ut_ad(b2->in_flush_list);
+
+ if (b2->oldest_modification
+ > b1->oldest_modification) {
+ return(1);
+ }
+
+ if (b2->oldest_modification
+ < b1->oldest_modification) {
+ return(-1);
+ }
+
+ /* If oldest_modification is same then decide on the space. */
+ ret = (int)(b2->space - b1->space);
+
+ /* Or else decide ordering on the offset field. */
+ return(ret ? ret : (int)(b2->offset - b1->offset));
+}
+
+/********************************************************************//**
+Initialize the red-black tree to speed up insertions into the flush_list
+during recovery process. Should be called at the start of recovery
+process before any page has been read/written. */
+UNIV_INTERN
+void
+buf_flush_init_flush_rbt(void)
+/*==========================*/
+{
+ buf_pool_mutex_enter();
+
+ /* Create red black tree for speedy insertions in flush list. */
+ buf_pool->flush_rbt = rbt_create(sizeof(buf_page_t*),
+ buf_flush_block_cmp);
+ buf_pool_mutex_exit();
+}
+
+/********************************************************************//**
+Frees up the red-black tree. */
+UNIV_INTERN
+void
+buf_flush_free_flush_rbt(void)
+/*==========================*/
+{
+ buf_pool_mutex_enter();
+
+#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
+ ut_a(buf_flush_validate_low());
+#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
+
+ rbt_free(buf_pool->flush_rbt);
+ buf_pool->flush_rbt = NULL;
+
+ buf_pool_mutex_exit();
+}
+
+/********************************************************************//**
Inserts a modified block into the flush list. */
UNIV_INTERN
void
@@ -100,6 +232,13 @@ buf_flush_insert_into_flush_list(
|| (UT_LIST_GET_FIRST(buf_pool->flush_list)->oldest_modification
<= block->page.oldest_modification));
+ /* If we are in the recovery then we need to update the flush
+ red-black tree as well. */
+ if (UNIV_LIKELY_NULL(buf_pool->flush_rbt)) {
+ buf_flush_insert_sorted_into_flush_list(block);
+ return;
+ }
+
ut_ad(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
ut_ad(block->page.in_LRU_list);
ut_ad(block->page.in_page_hash);
@@ -136,12 +275,27 @@ buf_flush_insert_sorted_into_flush_list(
ut_d(block->page.in_flush_list = TRUE);
prev_b = NULL;
- b = UT_LIST_GET_FIRST(buf_pool->flush_list);
- while (b && b->oldest_modification > block->page.oldest_modification) {
- ut_ad(b->in_flush_list);
- prev_b = b;
- b = UT_LIST_GET_NEXT(list, b);
+ /* For the most part when this function is called the flush_rbt
+ should not be NULL. In a very rare boundary case it is possible
+ that the flush_rbt has already been freed by the recovery thread
+ before the last page was hooked up in the flush_list by the
+ io-handler thread. In that case we'll just do a simple
+ linear search in the else block. */
+ if (buf_pool->flush_rbt) {
+
+ prev_b = buf_flush_insert_in_flush_rbt(&block->page);
+
+ } else {
+
+ b = UT_LIST_GET_FIRST(buf_pool->flush_list);
+
+ while (b && b->oldest_modification
+ > block->page.oldest_modification) {
+ ut_ad(b->in_flush_list);
+ prev_b = b;
+ b = UT_LIST_GET_NEXT(list, b);
+ }
}
if (prev_b == NULL) {
@@ -237,7 +391,6 @@ buf_flush_remove(
ut_ad(buf_pool_mutex_own());
ut_ad(mutex_own(buf_page_get_mutex(bpage)));
ut_ad(bpage->in_flush_list);
- ut_d(bpage->in_flush_list = FALSE);
switch (buf_page_get_state(bpage)) {
case BUF_BLOCK_ZIP_PAGE:
@@ -259,6 +412,15 @@ buf_flush_remove(
break;
}
+ /* If the flush_rbt is active then delete from it as well. */
+ if (UNIV_LIKELY_NULL(buf_pool->flush_rbt)) {
+ buf_flush_delete_from_flush_rbt(bpage);
+ }
+
+ /* Must be done after we have removed it from the flush_rbt
+ because we assert on in_flush_list in comparison function. */
+ ut_d(bpage->in_flush_list = FALSE);
+
bpage->oldest_modification = 0;
ut_d(UT_LIST_VALIDATE(list, buf_page_t, buf_pool->flush_list,
@@ -266,6 +428,63 @@ buf_flush_remove(
}
/********************************************************************//**
+Relocates a buffer control block on the flush_list.
+Note that it is assumed that the contents of bpage has already been
+copied to dpage. */
+UNIV_INTERN
+void
+buf_flush_relocate_on_flush_list(
+/*=============================*/
+ buf_page_t* bpage, /*!< in/out: control block being moved */
+ buf_page_t* dpage) /*!< in/out: destination block */
+{
+ buf_page_t* prev;
+ buf_page_t* prev_b = NULL;
+
+ ut_ad(buf_pool_mutex_own());
+
+ ut_ad(mutex_own(buf_page_get_mutex(bpage)));
+
+ ut_ad(bpage->in_flush_list);
+ ut_ad(dpage->in_flush_list);
+
+ /* If recovery is active we must swap the control blocks in
+ the flush_rbt as well. */
+ if (UNIV_LIKELY_NULL(buf_pool->flush_rbt)) {
+ buf_flush_delete_from_flush_rbt(bpage);
+ prev_b = buf_flush_insert_in_flush_rbt(dpage);
+ }
+
+ /* Must be done after we have removed it from the flush_rbt
+ because we assert on in_flush_list in comparison function. */
+ ut_d(bpage->in_flush_list = FALSE);
+
+ prev = UT_LIST_GET_PREV(list, bpage);
+ UT_LIST_REMOVE(list, buf_pool->flush_list, bpage);
+
+ if (prev) {
+ ut_ad(prev->in_flush_list);
+ UT_LIST_INSERT_AFTER(
+ list,
+ buf_pool->flush_list,
+ prev, dpage);
+ } else {
+ UT_LIST_ADD_FIRST(
+ list,
+ buf_pool->flush_list,
+ dpage);
+ }
+
+ /* Just an extra check. Previous in flush_list
+ should be the same control block as in flush_rbt. */
+ ut_a(!buf_pool->flush_rbt || prev_b == prev);
+
+#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
+ ut_a(buf_flush_validate_low());
+#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
+}
+
+/********************************************************************//**
Updates the flush system data structures when a write is completed. */
UNIV_INTERN
void
@@ -1367,24 +1586,45 @@ ibool
buf_flush_validate_low(void)
/*========================*/
{
- buf_page_t* bpage;
+ buf_page_t* bpage;
+ const ib_rbt_node_t* rnode = NULL;
UT_LIST_VALIDATE(list, buf_page_t, buf_pool->flush_list,
ut_ad(ut_list_node_313->in_flush_list));
bpage = UT_LIST_GET_FIRST(buf_pool->flush_list);
+ /* If we are in recovery mode i.e.: flush_rbt != NULL
+ then each block in the flush_list must also be present
+ in the flush_rbt. */
+ if (UNIV_LIKELY_NULL(buf_pool->flush_rbt)) {
+ rnode = rbt_first(buf_pool->flush_rbt);
+ }
+
while (bpage != NULL) {
const ib_uint64_t om = bpage->oldest_modification;
ut_ad(bpage->in_flush_list);
ut_a(buf_page_in_file(bpage));
ut_a(om > 0);
+ if (UNIV_LIKELY_NULL(buf_pool->flush_rbt)) {
+ ut_a(rnode);
+ buf_page_t* rpage = *rbt_value(buf_page_t*,
+ rnode);
+ ut_a(rpage);
+ ut_a(rpage == bpage);
+ rnode = rbt_next(buf_pool->flush_rbt, rnode);
+ }
+
bpage = UT_LIST_GET_NEXT(list, bpage);
ut_a(!bpage || om >= bpage->oldest_modification);
}
+ /* By this time we must have exhausted the traversal of
+ flush_rbt (if active) as well. */
+ ut_a(rnode == NULL);
+
return(TRUE);
}
diff --git a/storage/innodb_plugin/buf/buf0lru.c b/storage/innodb_plugin/buf/buf0lru.c
index 4f19fd13fa5..9cfa02ba3ac 100644
--- a/storage/innodb_plugin/buf/buf0lru.c
+++ b/storage/innodb_plugin/buf/buf0lru.c
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 1995, 2010, Innobase Oy. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -350,17 +350,31 @@ scan_again:
bpage = UT_LIST_GET_LAST(buf_pool->LRU);
while (bpage != NULL) {
- mutex_t* block_mutex = buf_page_get_mutex(bpage);
buf_page_t* prev_bpage;
+ ibool prev_bpage_buf_fix = FALSE;
ut_a(buf_page_in_file(bpage));
- mutex_enter(block_mutex);
prev_bpage = UT_LIST_GET_PREV(LRU, bpage);
- if (buf_page_get_space(bpage) == id) {
- if (bpage->buf_fix_count > 0
- || buf_page_get_io_fix(bpage) != BUF_IO_NONE) {
+ /* bpage->space and bpage->io_fix are protected by
+ buf_pool_mutex and block_mutex. It is safe to check
+ them while holding buf_pool_mutex only. */
+
+ if (buf_page_get_space(bpage) != id) {
+ /* Skip this block, as it does not belong to
+ the space that is being invalidated. */
+ } else if (buf_page_get_io_fix(bpage) != BUF_IO_NONE) {
+ /* We cannot remove this page during this scan
+ yet; maybe the system is currently reading it
+ in, or flushing the modifications to the file */
+
+ all_freed = FALSE;
+ } else {
+ mutex_t* block_mutex = buf_page_get_mutex(bpage);
+ mutex_enter(block_mutex);
+
+ if (bpage->buf_fix_count > 0) {
/* We cannot remove this page during
this scan yet; maybe the system is
@@ -380,8 +394,40 @@ scan_again:
(ulong) buf_page_get_page_no(bpage));
}
#endif
- if (buf_page_get_state(bpage) == BUF_BLOCK_FILE_PAGE
- && ((buf_block_t*) bpage)->is_hashed) {
+ if (buf_page_get_state(bpage) != BUF_BLOCK_FILE_PAGE) {
+ /* This is a compressed-only block
+ descriptor. Ensure that prev_bpage
+ cannot be relocated when bpage is freed. */
+ if (UNIV_LIKELY(prev_bpage != NULL)) {
+ switch (buf_page_get_state(
+ prev_bpage)) {
+ case BUF_BLOCK_FILE_PAGE:
+ /* Descriptors of uncompressed
+ blocks will not be relocated,
+ because we are holding the
+ buf_pool_mutex. */
+ break;
+ case BUF_BLOCK_ZIP_PAGE:
+ case BUF_BLOCK_ZIP_DIRTY:
+ /* Descriptors of compressed-
+ only blocks can be relocated,
+ unless they are buffer-fixed.
+ Because both bpage and
+ prev_bpage are protected by
+ buf_pool_zip_mutex, it is
+ not necessary to acquire
+ further mutexes. */
+ ut_ad(&buf_pool_zip_mutex
+ == block_mutex);
+ ut_ad(mutex_own(block_mutex));
+ prev_bpage_buf_fix = TRUE;
+ prev_bpage->buf_fix_count++;
+ break;
+ default:
+ ut_error;
+ }
+ }
+ } else if (((buf_block_t*) bpage)->is_hashed) {
ulint page_no;
ulint zip_size;
@@ -405,7 +451,8 @@ scan_again:
buf_flush_remove(bpage);
}
- /* Remove from the LRU list */
+ /* Remove from the LRU list. */
+
if (buf_LRU_block_remove_hashed_page(bpage, TRUE)
!= BUF_BLOCK_ZIP_FREE) {
buf_LRU_block_free_hashed_page((buf_block_t*)
@@ -417,18 +464,27 @@ scan_again:
ut_ad(block_mutex == &buf_pool_zip_mutex);
ut_ad(!mutex_own(block_mutex));
- /* The compressed block descriptor
- (bpage) has been deallocated and
- block_mutex released. Also,
- buf_buddy_free() may have relocated
- prev_bpage. Rescan the LRU list. */
+ if (prev_bpage_buf_fix) {
+ /* We temporarily buffer-fixed
+ prev_bpage, so that
+ buf_buddy_free() could not
+ relocate it, in case it was a
+ compressed-only block
+ descriptor. */
+
+ mutex_enter(block_mutex);
+ ut_ad(prev_bpage->buf_fix_count > 0);
+ prev_bpage->buf_fix_count--;
+ mutex_exit(block_mutex);
+ }
- bpage = UT_LIST_GET_LAST(buf_pool->LRU);
- continue;
+ goto next_page_no_mutex;
}
- }
next_page:
- mutex_exit(block_mutex);
+ mutex_exit(block_mutex);
+ }
+
+next_page_no_mutex:
bpage = prev_bpage;
}
@@ -1474,26 +1530,8 @@ alloc:
if (b->state == BUF_BLOCK_ZIP_PAGE) {
buf_LRU_insert_zip_clean(b);
} else {
- buf_page_t* prev;
-
- ut_ad(b->in_flush_list);
- ut_d(bpage->in_flush_list = FALSE);
-
- prev = UT_LIST_GET_PREV(list, b);
- UT_LIST_REMOVE(list, buf_pool->flush_list, b);
-
- if (prev) {
- ut_ad(prev->in_flush_list);
- UT_LIST_INSERT_AFTER(
- list,
- buf_pool->flush_list,
- prev, b);
- } else {
- UT_LIST_ADD_FIRST(
- list,
- buf_pool->flush_list,
- b);
- }
+ /* Relocate on buf_pool->flush_list. */
+ buf_flush_relocate_on_flush_list(bpage, b);
}
bpage->zip.data = NULL;
diff --git a/storage/innodb_plugin/buf/buf0rea.c b/storage/innodb_plugin/buf/buf0rea.c
index dd98ea17eb5..81f788baac2 100644
--- a/storage/innodb_plugin/buf/buf0rea.c
+++ b/storage/innodb_plugin/buf/buf0rea.c
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 1995, 2010, Innobase Oy. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -608,14 +608,14 @@ buf_read_recv_pages(
while (buf_pool->n_pend_reads >= recv_n_pool_free_frames / 2) {
os_aio_simulated_wake_handler_threads();
- os_thread_sleep(500000);
+ os_thread_sleep(10000);
count++;
- if (count > 100) {
+ if (count > 1000) {
fprintf(stderr,
"InnoDB: Error: InnoDB has waited for"
- " 50 seconds for pending\n"
+ " 10 seconds for pending\n"
"InnoDB: reads to the buffer pool to"
" be finished.\n"
"InnoDB: Number of pending reads %lu,"
diff --git a/storage/innodb_plugin/dict/dict0boot.c b/storage/innodb_plugin/dict/dict0boot.c
index e55de30481b..70b5bfa99f7 100644
--- a/storage/innodb_plugin/dict/dict0boot.c
+++ b/storage/innodb_plugin/dict/dict0boot.c
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 1996, 2010, Innobase Oy. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -274,6 +274,9 @@ dict_boot(void)
and (TYPE & DICT_TF_FORMAT_MASK) are nonzero and TYPE = table->flags */
dict_mem_table_add_col(table, heap, "TYPE", DATA_INT, 0, 4);
dict_mem_table_add_col(table, heap, "MIX_ID", DATA_BINARY, 0, 0);
+ /* MIX_LEN may contain additional table flags when
+ ROW_FORMAT!=REDUNDANT. Currently, these flags include
+ DICT_TF2_TEMPORARY. */
dict_mem_table_add_col(table, heap, "MIX_LEN", DATA_INT, 0, 4);
dict_mem_table_add_col(table, heap, "CLUSTER_NAME", DATA_BINARY, 0, 0);
dict_mem_table_add_col(table, heap, "SPACE", DATA_INT, 0, 4);
@@ -355,7 +358,7 @@ dict_boot(void)
dict_mem_table_add_col(table, heap, "SPACE", DATA_INT, 0, 4);
dict_mem_table_add_col(table, heap, "PAGE_NO", DATA_INT, 0, 4);
- /* The '+ 2' below comes from the 2 system fields */
+ /* The '+ 2' below comes from the fields DB_TRX_ID, DB_ROLL_PTR */
#if DICT_SYS_INDEXES_PAGE_NO_FIELD != 6 + 2
#error "DICT_SYS_INDEXES_PAGE_NO_FIELD != 6 + 2"
#endif
@@ -365,6 +368,9 @@ dict_boot(void)
#if DICT_SYS_INDEXES_TYPE_FIELD != 4 + 2
#error "DICT_SYS_INDEXES_TYPE_FIELD != 4 + 2"
#endif
+#if DICT_SYS_INDEXES_NAME_FIELD != 1 + 2
+#error "DICT_SYS_INDEXES_NAME_FIELD != 1 + 2"
+#endif
table->id = DICT_INDEXES_ID;
dict_table_add_to_cache(table, heap);
diff --git a/storage/innodb_plugin/dict/dict0crea.c b/storage/innodb_plugin/dict/dict0crea.c
index 96a9bd8152e..4ba7cd8a48c 100644
--- a/storage/innodb_plugin/dict/dict0crea.c
+++ b/storage/innodb_plugin/dict/dict0crea.c
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 1996, 2010, Innobase Oy. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -51,16 +51,18 @@ static
dtuple_t*
dict_create_sys_tables_tuple(
/*=========================*/
- dict_table_t* table, /*!< in: table */
- mem_heap_t* heap) /*!< in: memory heap from which the memory for
- the built tuple is allocated */
+ const dict_table_t* table, /*!< in: table */
+ mem_heap_t* heap) /*!< in: memory heap from
+ which the memory for the built
+ tuple is allocated */
{
dict_table_t* sys_tables;
dtuple_t* entry;
dfield_t* dfield;
byte* ptr;
- ut_ad(table && heap);
+ ut_ad(table);
+ ut_ad(heap);
sys_tables = dict_sys->sys_tables;
@@ -69,18 +71,18 @@ dict_create_sys_tables_tuple(
dict_table_copy_types(entry, sys_tables);
/* 0: NAME -----------------------------*/
- dfield = dtuple_get_nth_field(entry, 0);
+ dfield = dtuple_get_nth_field(entry, 0/*NAME*/);
dfield_set_data(dfield, table->name, ut_strlen(table->name));
/* 3: ID -------------------------------*/
- dfield = dtuple_get_nth_field(entry, 1);
+ dfield = dtuple_get_nth_field(entry, 1/*ID*/);
ptr = mem_heap_alloc(heap, 8);
mach_write_to_8(ptr, table->id);
dfield_set_data(dfield, ptr, 8);
/* 4: N_COLS ---------------------------*/
- dfield = dtuple_get_nth_field(entry, 2);
+ dfield = dtuple_get_nth_field(entry, 2/*N_COLS*/);
#if DICT_TF_COMPACT != 1
#error
@@ -91,40 +93,41 @@ dict_create_sys_tables_tuple(
| ((table->flags & DICT_TF_COMPACT) << 31));
dfield_set_data(dfield, ptr, 4);
/* 5: TYPE -----------------------------*/
- dfield = dtuple_get_nth_field(entry, 3);
+ dfield = dtuple_get_nth_field(entry, 3/*TYPE*/);
ptr = mem_heap_alloc(heap, 4);
- if (table->flags & ~DICT_TF_COMPACT) {
+ if (table->flags & (~DICT_TF_COMPACT & ~(~0 << DICT_TF_BITS))) {
ut_a(table->flags & DICT_TF_COMPACT);
ut_a(dict_table_get_format(table) >= DICT_TF_FORMAT_ZIP);
ut_a((table->flags & DICT_TF_ZSSIZE_MASK)
<= (DICT_TF_ZSSIZE_MAX << DICT_TF_ZSSIZE_SHIFT));
- ut_a(!(table->flags & (~0 << DICT_TF_BITS)));
- mach_write_to_4(ptr, table->flags);
+ ut_a(!(table->flags & (~0 << DICT_TF2_BITS)));
+ mach_write_to_4(ptr, table->flags & ~(~0 << DICT_TF_BITS));
} else {
mach_write_to_4(ptr, DICT_TABLE_ORDINARY);
}
dfield_set_data(dfield, ptr, 4);
/* 6: MIX_ID (obsolete) ---------------------------*/
- dfield = dtuple_get_nth_field(entry, 4);
+ dfield = dtuple_get_nth_field(entry, 4/*MIX_ID*/);
ptr = mem_heap_zalloc(heap, 8);
dfield_set_data(dfield, ptr, 8);
- /* 7: MIX_LEN (obsolete) --------------------------*/
+ /* 7: MIX_LEN (additional flags) --------------------------*/
- dfield = dtuple_get_nth_field(entry, 5);
+ dfield = dtuple_get_nth_field(entry, 5/*MIX_LEN*/);
- ptr = mem_heap_zalloc(heap, 4);
+ ptr = mem_heap_alloc(heap, 4);
+ mach_write_to_4(ptr, table->flags >> DICT_TF2_SHIFT);
dfield_set_data(dfield, ptr, 4);
/* 8: CLUSTER_NAME ---------------------*/
- dfield = dtuple_get_nth_field(entry, 6);
+ dfield = dtuple_get_nth_field(entry, 6/*CLUSTER_NAME*/);
dfield_set_null(dfield); /* not supported */
/* 9: SPACE ----------------------------*/
- dfield = dtuple_get_nth_field(entry, 7);
+ dfield = dtuple_get_nth_field(entry, 7/*SPACE*/);
ptr = mem_heap_alloc(heap, 4);
mach_write_to_4(ptr, table->space);
@@ -143,19 +146,21 @@ static
dtuple_t*
dict_create_sys_columns_tuple(
/*==========================*/
- dict_table_t* table, /*!< in: table */
- ulint i, /*!< in: column number */
- mem_heap_t* heap) /*!< in: memory heap from which the memory for
- the built tuple is allocated */
+ const dict_table_t* table, /*!< in: table */
+ ulint i, /*!< in: column number */
+ mem_heap_t* heap) /*!< in: memory heap from
+ which the memory for the built
+ tuple is allocated */
{
dict_table_t* sys_columns;
dtuple_t* entry;
const dict_col_t* column;
dfield_t* dfield;
byte* ptr;
- const char* col_name;
+ const char* col_name;
- ut_ad(table && heap);
+ ut_ad(table);
+ ut_ad(heap);
column = dict_table_get_nth_col(table, i);
@@ -166,47 +171,47 @@ dict_create_sys_columns_tuple(
dict_table_copy_types(entry, sys_columns);
/* 0: TABLE_ID -----------------------*/
- dfield = dtuple_get_nth_field(entry, 0);
+ dfield = dtuple_get_nth_field(entry, 0/*TABLE_ID*/);
ptr = mem_heap_alloc(heap, 8);
mach_write_to_8(ptr, table->id);
dfield_set_data(dfield, ptr, 8);
/* 1: POS ----------------------------*/
- dfield = dtuple_get_nth_field(entry, 1);
+ dfield = dtuple_get_nth_field(entry, 1/*POS*/);
ptr = mem_heap_alloc(heap, 4);
mach_write_to_4(ptr, i);
dfield_set_data(dfield, ptr, 4);
/* 4: NAME ---------------------------*/
- dfield = dtuple_get_nth_field(entry, 2);
+ dfield = dtuple_get_nth_field(entry, 2/*NAME*/);
col_name = dict_table_get_col_name(table, i);
dfield_set_data(dfield, col_name, ut_strlen(col_name));
/* 5: MTYPE --------------------------*/
- dfield = dtuple_get_nth_field(entry, 3);
+ dfield = dtuple_get_nth_field(entry, 3/*MTYPE*/);
ptr = mem_heap_alloc(heap, 4);
mach_write_to_4(ptr, column->mtype);
dfield_set_data(dfield, ptr, 4);
/* 6: PRTYPE -------------------------*/
- dfield = dtuple_get_nth_field(entry, 4);
+ dfield = dtuple_get_nth_field(entry, 4/*PRTYPE*/);
ptr = mem_heap_alloc(heap, 4);
mach_write_to_4(ptr, column->prtype);
dfield_set_data(dfield, ptr, 4);
/* 7: LEN ----------------------------*/
- dfield = dtuple_get_nth_field(entry, 5);
+ dfield = dtuple_get_nth_field(entry, 5/*LEN*/);
ptr = mem_heap_alloc(heap, 4);
mach_write_to_4(ptr, column->len);
dfield_set_data(dfield, ptr, 4);
/* 8: PREC ---------------------------*/
- dfield = dtuple_get_nth_field(entry, 6);
+ dfield = dtuple_get_nth_field(entry, 6/*PREC*/);
ptr = mem_heap_alloc(heap, 4);
mach_write_to_4(ptr, 0/* unused */);
@@ -230,6 +235,7 @@ dict_build_table_def_step(
dict_table_t* table;
dtuple_t* row;
ulint error;
+ ulint flags;
const char* path_or_name;
ibool is_path;
mtr_t mtr;
@@ -268,9 +274,10 @@ dict_build_table_def_step(
ut_ad(!dict_table_zip_size(table)
|| dict_table_get_format(table) >= DICT_TF_FORMAT_ZIP);
+ flags = table->flags & ~(~0 << DICT_TF_BITS);
error = fil_create_new_single_table_tablespace(
&space, path_or_name, is_path,
- table->flags == DICT_TF_COMPACT ? 0 : table->flags,
+ flags == DICT_TF_COMPACT ? 0 : flags,
FIL_IBD_FILE_INITIAL_SIZE);
table->space = (unsigned int) space;
@@ -286,7 +293,7 @@ dict_build_table_def_step(
mtr_commit(&mtr);
} else {
/* Create in the system tablespace: disallow new features */
- table->flags &= DICT_TF_COMPACT;
+ table->flags &= (~0 << DICT_TF_BITS) | DICT_TF_COMPACT;
}
row = dict_create_sys_tables_tuple(table, node->heap);
@@ -322,9 +329,10 @@ static
dtuple_t*
dict_create_sys_indexes_tuple(
/*==========================*/
- dict_index_t* index, /*!< in: index */
- mem_heap_t* heap) /*!< in: memory heap from which the memory for
- the built tuple is allocated */
+ const dict_index_t* index, /*!< in: index */
+ mem_heap_t* heap) /*!< in: memory heap from
+ which the memory for the built
+ tuple is allocated */
{
dict_table_t* sys_indexes;
dict_table_t* table;
@@ -333,7 +341,8 @@ dict_create_sys_indexes_tuple(
byte* ptr;
ut_ad(mutex_own(&(dict_sys->mutex)));
- ut_ad(index && heap);
+ ut_ad(index);
+ ut_ad(heap);
sys_indexes = dict_sys->sys_indexes;
@@ -344,32 +353,32 @@ dict_create_sys_indexes_tuple(
dict_table_copy_types(entry, sys_indexes);
/* 0: TABLE_ID -----------------------*/
- dfield = dtuple_get_nth_field(entry, 0);
+ dfield = dtuple_get_nth_field(entry, 0/*TABLE_ID*/);
ptr = mem_heap_alloc(heap, 8);
mach_write_to_8(ptr, table->id);
dfield_set_data(dfield, ptr, 8);
/* 1: ID ----------------------------*/
- dfield = dtuple_get_nth_field(entry, 1);
+ dfield = dtuple_get_nth_field(entry, 1/*ID*/);
ptr = mem_heap_alloc(heap, 8);
mach_write_to_8(ptr, index->id);
dfield_set_data(dfield, ptr, 8);
/* 4: NAME --------------------------*/
- dfield = dtuple_get_nth_field(entry, 2);
+ dfield = dtuple_get_nth_field(entry, 2/*NAME*/);
dfield_set_data(dfield, index->name, ut_strlen(index->name));
/* 5: N_FIELDS ----------------------*/
- dfield = dtuple_get_nth_field(entry, 3);
+ dfield = dtuple_get_nth_field(entry, 3/*N_FIELDS*/);
ptr = mem_heap_alloc(heap, 4);
mach_write_to_4(ptr, index->n_fields);
dfield_set_data(dfield, ptr, 4);
/* 6: TYPE --------------------------*/
- dfield = dtuple_get_nth_field(entry, 4);
+ dfield = dtuple_get_nth_field(entry, 4/*TYPE*/);
ptr = mem_heap_alloc(heap, 4);
mach_write_to_4(ptr, index->type);
@@ -381,7 +390,7 @@ dict_create_sys_indexes_tuple(
#error "DICT_SYS_INDEXES_SPACE_NO_FIELD != 7"
#endif
- dfield = dtuple_get_nth_field(entry, 5);
+ dfield = dtuple_get_nth_field(entry, 5/*SPACE*/);
ptr = mem_heap_alloc(heap, 4);
mach_write_to_4(ptr, index->space);
@@ -393,7 +402,7 @@ dict_create_sys_indexes_tuple(
#error "DICT_SYS_INDEXES_PAGE_NO_FIELD != 8"
#endif
- dfield = dtuple_get_nth_field(entry, 6);
+ dfield = dtuple_get_nth_field(entry, 6/*PAGE_NO*/);
ptr = mem_heap_alloc(heap, 4);
mach_write_to_4(ptr, FIL_NULL);
@@ -412,10 +421,11 @@ static
dtuple_t*
dict_create_sys_fields_tuple(
/*=========================*/
- dict_index_t* index, /*!< in: index */
- ulint i, /*!< in: field number */
- mem_heap_t* heap) /*!< in: memory heap from which the memory for
- the built tuple is allocated */
+ const dict_index_t* index, /*!< in: index */
+ ulint i, /*!< in: field number */
+ mem_heap_t* heap) /*!< in: memory heap from
+ which the memory for the built
+ tuple is allocated */
{
dict_table_t* sys_fields;
dtuple_t* entry;
@@ -425,7 +435,8 @@ dict_create_sys_fields_tuple(
ibool index_contains_column_prefix_field = FALSE;
ulint j;
- ut_ad(index && heap);
+ ut_ad(index);
+ ut_ad(heap);
for (j = 0; j < index->n_fields; j++) {
if (dict_index_get_nth_field(index, j)->prefix_len > 0) {
@@ -443,7 +454,7 @@ dict_create_sys_fields_tuple(
dict_table_copy_types(entry, sys_fields);
/* 0: INDEX_ID -----------------------*/
- dfield = dtuple_get_nth_field(entry, 0);
+ dfield = dtuple_get_nth_field(entry, 0/*INDEX_ID*/);
ptr = mem_heap_alloc(heap, 8);
mach_write_to_8(ptr, index->id);
@@ -451,7 +462,7 @@ dict_create_sys_fields_tuple(
dfield_set_data(dfield, ptr, 8);
/* 1: POS + PREFIX LENGTH ----------------------------*/
- dfield = dtuple_get_nth_field(entry, 1);
+ dfield = dtuple_get_nth_field(entry, 1/*POS*/);
ptr = mem_heap_alloc(heap, 4);
@@ -471,7 +482,7 @@ dict_create_sys_fields_tuple(
dfield_set_data(dfield, ptr, 4);
/* 4: COL_NAME -------------------------*/
- dfield = dtuple_get_nth_field(entry, 2);
+ dfield = dtuple_get_nth_field(entry, 2/*COL_NAME*/);
dfield_set_data(dfield, field->name,
ut_strlen(field->name));
@@ -602,6 +613,7 @@ dict_create_index_tree_step(
dict_table_t* sys_indexes;
dict_table_t* table;
dtuple_t* search_tuple;
+ ulint zip_size;
btr_pcur_t pcur;
mtr_t mtr;
@@ -626,8 +638,9 @@ dict_create_index_tree_step(
btr_pcur_move_to_next_user_rec(&pcur, &mtr);
- node->page_no = btr_create(index->type, index->space,
- dict_table_zip_size(index->table),
+ zip_size = dict_table_zip_size(index->table);
+
+ node->page_no = btr_create(index->type, index->space, zip_size,
index->id, index, &mtr);
/* printf("Created a new index tree in space %lu root page %lu\n",
index->space, index->page_no); */
diff --git a/storage/innodb_plugin/dict/dict0dict.c b/storage/innodb_plugin/dict/dict0dict.c
index 2e524a5a2e3..8a03151d062 100644
--- a/storage/innodb_plugin/dict/dict0dict.c
+++ b/storage/innodb_plugin/dict/dict0dict.c
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 1996, 2010, Innobase Oy. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -140,7 +140,7 @@ static
void
dict_field_print_low(
/*=================*/
- dict_field_t* field); /*!< in: field */
+ const dict_field_t* field); /*!< in: field */
/*********************************************************************//**
Frees a foreign key struct. */
static
@@ -1460,6 +1460,7 @@ dict_index_add_to_cache(
if (!dict_index_find_cols(table, index)) {
+ dict_mem_index_free(index);
return(DB_CORRUPTION);
}
@@ -4402,7 +4403,7 @@ static
void
dict_field_print_low(
/*=================*/
- dict_field_t* field) /*!< in: field */
+ const dict_field_t* field) /*!< in: field */
{
ut_ad(mutex_own(&(dict_sys->mutex)));
@@ -4775,6 +4776,8 @@ dict_table_check_for_dup_indexes(
const dict_index_t* index1;
const dict_index_t* index2;
+ ut_ad(mutex_own(&dict_sys->mutex));
+
/* The primary index _must_ exist */
ut_a(UT_LIST_GET_LEN(table->indexes) > 0);
diff --git a/storage/innodb_plugin/dict/dict0load.c b/storage/innodb_plugin/dict/dict0load.c
index 842a129c1a6..377818308c5 100644
--- a/storage/innodb_plugin/dict/dict0load.c
+++ b/storage/innodb_plugin/dict/dict0load.c
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 1996, 2010, Innobase Oy. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -260,7 +260,7 @@ dict_sys_tables_get_flags(
return(0);
}
- field = rec_get_nth_field_old(rec, 4, &len);
+ field = rec_get_nth_field_old(rec, 4/*N_COLS*/, &len);
n_cols = mach_read_from_4(field);
if (UNIV_UNLIKELY(!(n_cols & 0x80000000UL))) {
@@ -390,15 +390,35 @@ loop:
mtr_commit(&mtr);
- if (space_id != 0 && in_crash_recovery) {
+ if (space_id == 0) {
+ /* The system tablespace always exists. */
+ } else if (in_crash_recovery) {
/* Check that the tablespace (the .ibd file) really
- exists; print a warning to the .err log if not */
-
- fil_space_for_table_exists_in_mem(space_id, name,
- FALSE, TRUE, TRUE);
- }
+ exists; print a warning to the .err log if not.
+ Do not print warnings for temporary tables. */
+ ibool is_temp;
+
+ field = rec_get_nth_field_old(rec, 4, &len);
+ if (0x80000000UL & mach_read_from_4(field)) {
+ /* ROW_FORMAT=COMPACT: read the is_temp
+ flag from SYS_TABLES.MIX_LEN. */
+ field = rec_get_nth_field_old(rec, 7, &len);
+ is_temp = mach_read_from_4(field)
+ & DICT_TF2_TEMPORARY;
+ } else {
+ /* For tables created with old versions
+ of InnoDB, SYS_TABLES.MIX_LEN may contain
+ garbage. Such tables would always be
+ in ROW_FORMAT=REDUNDANT. Pretend that
+ all such tables are non-temporary. That is,
+ do not suppress error printouts about
+ temporary tables not being found. */
+ is_temp = FALSE;
+ }
- if (space_id != 0 && !in_crash_recovery) {
+ fil_space_for_table_exists_in_mem(
+ space_id, name, is_temp, TRUE, !is_temp);
+ } else {
/* It is a normal database startup: create the space
object and check that the .ibd file exists. */
@@ -894,43 +914,72 @@ err_exit:
(ulong) flags);
goto err_exit;
}
+ } else {
+ flags = 0;
+ }
- if (fil_space_for_table_exists_in_mem(space, name, FALSE,
- FALSE, FALSE)) {
- /* Ok; (if we did a crash recovery then the tablespace
- can already be in the memory cache) */
- } else {
- /* In >= 4.1.9, InnoDB scans the data dictionary also
- at a normal mysqld startup. It is an error if the
- space object does not exist in memory. */
+ ut_a(name_of_col_is(sys_tables, sys_index, 4, "N_COLS"));
+ field = rec_get_nth_field_old(rec, 4, &len);
+ n_cols = mach_read_from_4(field);
+
+ /* The high-order bit of N_COLS is the "compact format" flag.
+ For tables in that format, MIX_LEN may hold additional flags. */
+ if (n_cols & 0x80000000UL) {
+ ulint flags2;
+
+ flags |= DICT_TF_COMPACT;
+
+ ut_a(name_of_col_is(sys_tables, sys_index, 7, "MIX_LEN"));
+ field = rec_get_nth_field_old(rec, 7, &len);
+
+ flags2 = mach_read_from_4(field);
+
+ if (flags2 & (~0 << (DICT_TF2_BITS - DICT_TF2_SHIFT))) {
+ ut_print_timestamp(stderr);
+ fputs(" InnoDB: Warning: table ", stderr);
+ ut_print_filename(stderr, name);
+ fprintf(stderr, "\n"
+ "InnoDB: in InnoDB data dictionary"
+ " has unknown flags %lx.\n",
+ (ulong) flags2);
+
+ flags2 &= ~(~0 << (DICT_TF2_BITS - DICT_TF2_SHIFT));
+ }
+
+ flags |= flags2 << DICT_TF2_SHIFT;
+ }
+
+ /* See if the tablespace is available. */
+ if (space == 0) {
+ /* The system tablespace is always available. */
+ } else if (!fil_space_for_table_exists_in_mem(
+ space, name,
+ (flags >> DICT_TF2_SHIFT) & DICT_TF2_TEMPORARY,
+ FALSE, FALSE)) {
+
+ if ((flags >> DICT_TF2_SHIFT) & DICT_TF2_TEMPORARY) {
+ /* Do not bother to retry opening temporary tables. */
+ ibd_file_missing = TRUE;
+ } else {
ut_print_timestamp(stderr);
fprintf(stderr,
- " InnoDB: error: space object of table %s,\n"
+ " InnoDB: error: space object of table");
+ ut_print_filename(stderr, name);
+ fprintf(stderr, ",\n"
"InnoDB: space id %lu did not exist in memory."
" Retrying an open.\n",
- name, (ulong)space);
+ (ulong) space);
/* Try to open the tablespace */
if (!fil_open_single_table_tablespace(
- TRUE, space, flags, name)) {
- /* We failed to find a sensible tablespace
- file */
+ TRUE, space,
+ flags & ~(~0 << DICT_TF_BITS), name)) {
+ /* We failed to find a sensible
+ tablespace file */
ibd_file_missing = TRUE;
}
}
- } else {
- flags = 0;
- }
-
- ut_a(name_of_col_is(sys_tables, sys_index, 4, "N_COLS"));
-
- field = rec_get_nth_field_old(rec, 4, &len);
- n_cols = mach_read_from_4(field);
-
- /* The high-order bit of N_COLS is the "compact format" flag. */
- if (n_cols & 0x80000000UL) {
- flags |= DICT_TF_COMPACT;
}
table = dict_mem_table_create(name, space, n_cols & ~0x80000000UL,
diff --git a/storage/innodb_plugin/dict/dict0mem.c b/storage/innodb_plugin/dict/dict0mem.c
index 6458cbab92d..66b4b43f296 100644
--- a/storage/innodb_plugin/dict/dict0mem.c
+++ b/storage/innodb_plugin/dict/dict0mem.c
@@ -59,7 +59,7 @@ dict_mem_table_create(
mem_heap_t* heap;
ut_ad(name);
- ut_a(!(flags & (~0 << DICT_TF_BITS)));
+ ut_a(!(flags & (~0 << DICT_TF2_BITS)));
heap = mem_heap_create(DICT_HEAP_SIZE);
diff --git a/storage/innodb_plugin/fil/fil0fil.c b/storage/innodb_plugin/fil/fil0fil.c
index 112a0e27d50..963e306c00c 100644
--- a/storage/innodb_plugin/fil/fil0fil.c
+++ b/storage/innodb_plugin/fil/fil0fil.c
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 1995, 2010, Innobase Oy. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -38,6 +38,7 @@ Created 10/25/1995 Heikki Tuuri
#include "mtr0mtr.h"
#include "mtr0log.h"
#include "dict0dict.h"
+#include "page0page.h"
#include "page0zip.h"
#ifndef UNIV_HOTBACKUP
# include "buf0lru.h"
@@ -1097,10 +1098,13 @@ fil_space_create(
fil_space_t* space;
/* The tablespace flags (FSP_SPACE_FLAGS) should be 0 for
- ROW_FORMAT=COMPACT (table->flags == DICT_TF_COMPACT) and
+ ROW_FORMAT=COMPACT
+ ((table->flags & ~(~0 << DICT_TF_BITS)) == DICT_TF_COMPACT) and
ROW_FORMAT=REDUNDANT (table->flags == 0). For any other
- format, the tablespace flags should equal table->flags. */
+ format, the tablespace flags should equal
+ (table->flags & ~(~0 << DICT_TF_BITS)). */
ut_a(flags != DICT_TF_COMPACT);
+ ut_a(!(flags & (~0UL << DICT_TF_BITS)));
try_again:
/*printf(
@@ -2582,10 +2586,13 @@ fil_create_new_single_table_tablespace(
ut_a(size >= FIL_IBD_FILE_INITIAL_SIZE);
/* The tablespace flags (FSP_SPACE_FLAGS) should be 0 for
- ROW_FORMAT=COMPACT (table->flags == DICT_TF_COMPACT) and
+ ROW_FORMAT=COMPACT
+ ((table->flags & ~(~0 << DICT_TF_BITS)) == DICT_TF_COMPACT) and
ROW_FORMAT=REDUNDANT (table->flags == 0). For any other
- format, the tablespace flags should equal table->flags. */
+ format, the tablespace flags should equal
+ (table->flags & ~(~0 << DICT_TF_BITS)). */
ut_a(flags != DICT_TF_COMPACT);
+ ut_a(!(flags & (~0UL << DICT_TF_BITS)));
path = fil_make_ibd_name(tablename, is_temp);
@@ -2786,6 +2793,7 @@ fil_reset_too_high_lsns(
ib_int64_t offset;
ulint zip_size;
ibool success;
+ page_zip_des_t page_zip;
filepath = fil_make_ibd_name(name, FALSE);
@@ -2833,6 +2841,12 @@ fil_reset_too_high_lsns(
space_id = fsp_header_get_space_id(page);
zip_size = fsp_header_get_zip_size(page);
+ page_zip_des_init(&page_zip);
+ page_zip_set_size(&page_zip, zip_size);
+ if (zip_size) {
+ page_zip.data = page + UNIV_PAGE_SIZE;
+ }
+
ut_print_timestamp(stderr);
fprintf(stderr,
" InnoDB: Flush lsn in the tablespace file %lu"
@@ -2867,20 +2881,23 @@ fil_reset_too_high_lsns(
/* We have to reset the lsn */
if (zip_size) {
- memcpy(page + UNIV_PAGE_SIZE, page, zip_size);
+ memcpy(page_zip.data, page, zip_size);
buf_flush_init_for_writing(
- page, page + UNIV_PAGE_SIZE,
- current_lsn);
+ page, &page_zip, current_lsn);
+ success = os_file_write(
+ filepath, file, page_zip.data,
+ (ulint) offset & 0xFFFFFFFFUL,
+ (ulint) (offset >> 32), zip_size);
} else {
buf_flush_init_for_writing(
page, NULL, current_lsn);
+ success = os_file_write(
+ filepath, file, page,
+ (ulint)(offset & 0xFFFFFFFFUL),
+ (ulint)(offset >> 32),
+ UNIV_PAGE_SIZE);
}
- success = os_file_write(filepath, file, page,
- (ulint)(offset & 0xFFFFFFFFUL),
- (ulint)(offset >> 32),
- zip_size
- ? zip_size
- : UNIV_PAGE_SIZE);
+
if (!success) {
goto func_exit;
@@ -2956,10 +2973,13 @@ fil_open_single_table_tablespace(
filepath = fil_make_ibd_name(name, FALSE);
/* The tablespace flags (FSP_SPACE_FLAGS) should be 0 for
- ROW_FORMAT=COMPACT (table->flags == DICT_TF_COMPACT) and
+ ROW_FORMAT=COMPACT
+ ((table->flags & ~(~0 << DICT_TF_BITS)) == DICT_TF_COMPACT) and
ROW_FORMAT=REDUNDANT (table->flags == 0). For any other
- format, the tablespace flags should equal table->flags. */
+ format, the tablespace flags should equal
+ (table->flags & ~(~0 << DICT_TF_BITS)). */
ut_a(flags != DICT_TF_COMPACT);
+ ut_a(!(flags & (~0UL << DICT_TF_BITS)));
file = os_file_create_simple_no_error_handling(
filepath, OS_FILE_OPEN, OS_FILE_READ_ONLY, &success);
@@ -3011,7 +3031,8 @@ fil_open_single_table_tablespace(
ut_free(buf2);
- if (UNIV_UNLIKELY(space_id != id || space_flags != flags)) {
+ if (UNIV_UNLIKELY(space_id != id
+ || space_flags != (flags & ~(~0 << DICT_TF_BITS)))) {
ut_print_timestamp(stderr);
fputs(" InnoDB: Error: tablespace id and flags in file ",
@@ -4781,8 +4802,10 @@ void
fil_close(void)
/*===========*/
{
+#ifndef UNIV_HOTBACKUP
/* The mutex should already have been freed. */
ut_ad(fil_system->mutex.magic_n == 0);
+#endif /* !UNIV_HOTBACKUP */
hash_table_free(fil_system->spaces);
diff --git a/storage/innodb_plugin/fsp/fsp0fsp.c b/storage/innodb_plugin/fsp/fsp0fsp.c
index 3cc4318fc06..c7f1a299d8a 100644
--- a/storage/innodb_plugin/fsp/fsp0fsp.c
+++ b/storage/innodb_plugin/fsp/fsp0fsp.c
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 1995, 2010, Innobase Oy. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -386,11 +386,11 @@ UNIV_INLINE
ibool
xdes_get_bit(
/*=========*/
- xdes_t* descr, /*!< in: descriptor */
- ulint bit, /*!< in: XDES_FREE_BIT or XDES_CLEAN_BIT */
- ulint offset, /*!< in: page offset within extent:
- 0 ... FSP_EXTENT_SIZE - 1 */
- mtr_t* mtr) /*!< in: mtr */
+ const xdes_t* descr, /*!< in: descriptor */
+ ulint bit, /*!< in: XDES_FREE_BIT or XDES_CLEAN_BIT */
+ ulint offset, /*!< in: page offset within extent:
+ 0 ... FSP_EXTENT_SIZE - 1 */
+ mtr_t* mtr) /*!< in: mtr */
{
ulint index;
ulint byte_index;
@@ -527,8 +527,8 @@ UNIV_INLINE
ulint
xdes_get_n_used(
/*============*/
- xdes_t* descr, /*!< in: descriptor */
- mtr_t* mtr) /*!< in: mtr */
+ const xdes_t* descr, /*!< in: descriptor */
+ mtr_t* mtr) /*!< in: mtr */
{
ulint i;
ulint count = 0;
@@ -551,8 +551,8 @@ UNIV_INLINE
ibool
xdes_is_free(
/*=========*/
- xdes_t* descr, /*!< in: descriptor */
- mtr_t* mtr) /*!< in: mtr */
+ const xdes_t* descr, /*!< in: descriptor */
+ mtr_t* mtr) /*!< in: mtr */
{
if (0 == xdes_get_n_used(descr, mtr)) {
@@ -569,8 +569,8 @@ UNIV_INLINE
ibool
xdes_is_full(
/*=========*/
- xdes_t* descr, /*!< in: descriptor */
- mtr_t* mtr) /*!< in: mtr */
+ const xdes_t* descr, /*!< in: descriptor */
+ mtr_t* mtr) /*!< in: mtr */
{
if (FSP_EXTENT_SIZE == xdes_get_n_used(descr, mtr)) {
@@ -586,7 +586,7 @@ UNIV_INLINE
void
xdes_set_state(
/*===========*/
- xdes_t* descr, /*!< in: descriptor */
+ xdes_t* descr, /*!< in/out: descriptor */
ulint state, /*!< in: state to set */
mtr_t* mtr) /*!< in: mtr handle */
{
@@ -605,8 +605,8 @@ UNIV_INLINE
ulint
xdes_get_state(
/*===========*/
- xdes_t* descr, /*!< in: descriptor */
- mtr_t* mtr) /*!< in: mtr handle */
+ const xdes_t* descr, /*!< in: descriptor */
+ mtr_t* mtr) /*!< in: mtr handle */
{
ulint state;
@@ -705,7 +705,7 @@ UNIV_INLINE
xdes_t*
xdes_get_descriptor_with_space_hdr(
/*===============================*/
- fsp_header_t* sp_header,/*!< in: space header, x-latched */
+ fsp_header_t* sp_header,/*!< in/out: space header, x-latched */
ulint space, /*!< in: space id */
ulint offset, /*!< in: page offset;
if equal to the free limit,
@@ -869,9 +869,7 @@ fsp_init_file_page_low(
return;
}
-#ifdef UNIV_BASIC_LOG_DEBUG
- memset(page, 0xff, UNIV_PAGE_SIZE);
-#endif
+ UNIV_MEM_INVALID(page, UNIV_PAGE_SIZE);
mach_write_to_4(page + FIL_PAGE_OFFSET, buf_block_get_page_no(block));
memset(page + FIL_PAGE_LSN, 0, 8);
mach_write_to_4(page + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID,
@@ -1342,7 +1340,7 @@ fsp_fill_free_list(
descriptor page and ibuf bitmap page;
then we do not allocate more extents */
ulint space, /*!< in: space */
- fsp_header_t* header, /*!< in: space header */
+ fsp_header_t* header, /*!< in/out: space header */
mtr_t* mtr) /*!< in: mtr */
{
ulint limit;
diff --git a/storage/innodb_plugin/ha/ha0ha.c b/storage/innodb_plugin/ha/ha0ha.c
index cb5e541b55d..db85288298d 100644
--- a/storage/innodb_plugin/ha/ha0ha.c
+++ b/storage/innodb_plugin/ha/ha0ha.c
@@ -101,6 +101,8 @@ ha_clear(
ulint i;
ulint n;
+ ut_ad(table);
+ ut_ad(table->magic_n == HASH_TABLE_MAGIC_N);
#ifdef UNIV_SYNC_DEBUG
ut_ad(rw_lock_own(&btr_search_latch, RW_LOCK_EXCLUSIVE));
#endif /* UNIV_SYNC_DEBUG */
@@ -146,7 +148,9 @@ ha_insert_for_fold_func(
ha_node_t* prev_node;
ulint hash;
- ut_ad(table && data);
+ ut_ad(data);
+ ut_ad(table);
+ ut_ad(table->magic_n == HASH_TABLE_MAGIC_N);
#if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
ut_a(block->frame == page_align(data));
#endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */
@@ -237,6 +241,8 @@ ha_delete_hash_node(
hash_table_t* table, /*!< in: hash table */
ha_node_t* del_node) /*!< in: node to be deleted */
{
+ ut_ad(table);
+ ut_ad(table->magic_n == HASH_TABLE_MAGIC_N);
#if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
# ifndef UNIV_HOTBACKUP
if (table->adaptive) {
@@ -267,6 +273,8 @@ ha_search_and_update_if_found_func(
{
ha_node_t* node;
+ ut_ad(table);
+ ut_ad(table->magic_n == HASH_TABLE_MAGIC_N);
ASSERT_HASH_MUTEX_OWN(table, fold);
#if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
ut_a(new_block->frame == page_align(new_data));
@@ -304,6 +312,8 @@ ha_remove_all_nodes_to_page(
{
ha_node_t* node;
+ ut_ad(table);
+ ut_ad(table->magic_n == HASH_TABLE_MAGIC_N);
ASSERT_HASH_MUTEX_OWN(table, fold);
node = ha_chain_get_first(table, fold);
@@ -353,6 +363,8 @@ ha_validate(
ibool ok = TRUE;
ulint i;
+ ut_ad(table);
+ ut_ad(table->magic_n == HASH_TABLE_MAGIC_N);
ut_a(start_index <= end_index);
ut_a(start_index < hash_get_n_cells(table));
ut_a(end_index < hash_get_n_cells(table));
@@ -391,6 +403,8 @@ ha_print_info(
FILE* file, /*!< in: file where to print */
hash_table_t* table) /*!< in: hash table */
{
+ ut_ad(table);
+ ut_ad(table->magic_n == HASH_TABLE_MAGIC_N);
#ifdef UNIV_DEBUG
/* Some of the code here is disabled for performance reasons in production
builds, see http://bugs.mysql.com/36941 */
diff --git a/storage/innodb_plugin/ha/hash0hash.c b/storage/innodb_plugin/ha/hash0hash.c
index 2800d7793f8..30c304dafcd 100644
--- a/storage/innodb_plugin/ha/hash0hash.c
+++ b/storage/innodb_plugin/ha/hash0hash.c
@@ -119,7 +119,7 @@ hash_create(
table->heaps = NULL;
#endif /* !UNIV_HOTBACKUP */
table->heap = NULL;
- table->magic_n = HASH_TABLE_MAGIC_N;
+ ut_d(table->magic_n = HASH_TABLE_MAGIC_N);
/* Initialize the cell array */
hash_table_clear(table);
@@ -135,6 +135,8 @@ hash_table_free(
/*============*/
hash_table_t* table) /*!< in, own: hash table */
{
+ ut_ad(table);
+ ut_ad(table->magic_n == HASH_TABLE_MAGIC_N);
#ifndef UNIV_HOTBACKUP
ut_a(table->mutexes == NULL);
#endif /* !UNIV_HOTBACKUP */
@@ -160,6 +162,8 @@ hash_create_mutexes_func(
{
ulint i;
+ ut_ad(table);
+ ut_ad(table->magic_n == HASH_TABLE_MAGIC_N);
ut_a(n_mutexes > 0);
ut_a(ut_is_2pow(n_mutexes));
diff --git a/storage/innodb_plugin/handler/ha_innodb.cc b/storage/innodb_plugin/handler/ha_innodb.cc
index 47b8203091c..fbbebbce8fd 100644
--- a/storage/innodb_plugin/handler/ha_innodb.cc
+++ b/storage/innodb_plugin/handler/ha_innodb.cc
@@ -1,7 +1,8 @@
/*****************************************************************************
-Copyright (c) 2000, 2009, MySQL AB & Innobase Oy. All Rights Reserved.
+Copyright (c) 2000, 2010, MySQL AB & Innobase Oy. All Rights Reserved.
Copyright (c) 2008, 2009 Google Inc.
+Copyright (c) 2009, Percona Inc.
Portions of this file contain modifications contributed and copyrighted by
Google, Inc. Those modifications are gratefully acknowledged and are described
@@ -9,6 +10,13 @@ briefly in the InnoDB documentation. The contributions by Google are
incorporated with their permission, and subject to the conditions contained in
the file COPYING.Google.
+Portions of this file contain modifications contributed and copyrighted
+by Percona Inc.. Those modifications are
+gratefully acknowledged and are described briefly in the InnoDB
+documentation. The contributions by Percona Inc. are incorporated with
+their permission, and subject to the conditions contained in the file
+COPYING.Percona.
+
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
Foundation; version 2 of the License.
@@ -22,32 +30,6 @@ this program; if not, write to the Free Software Foundation, Inc., 59 Temple
Place, Suite 330, Boston, MA 02111-1307 USA
*****************************************************************************/
-/***********************************************************************
-
-Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
-Copyright (c) 2009, Percona Inc.
-
-Portions of this file contain modifications contributed and copyrighted
-by Percona Inc.. Those modifications are
-gratefully acknowledged and are described briefly in the InnoDB
-documentation. The contributions by Percona Inc. are incorporated with
-their permission, and subject to the conditions contained in the file
-COPYING.Percona.
-
-This program is free software; you can redistribute it and/or modify it
-under the terms of the GNU General Public License as published by the
-Free Software Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
-Public License for more details.
-
-You should have received a copy of the GNU General Public License along
-with this program; if not, write to the Free Software Foundation, Inc.,
-59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
-
-***********************************************************************/
/* TODO list for the InnoDB handler in 5.0:
- Remove the flag trx->active_trans and look at trx->conc_state
@@ -3006,59 +2988,370 @@ normalize_table_name(
}
/********************************************************************//**
+Get the upper limit of the MySQL integral and floating-point type.
+@return maximum allowed value for the field */
+static
+ulonglong
+innobase_get_int_col_max_value(
+/*===========================*/
+ const Field* field) /*!< in: MySQL field */
+{
+ ulonglong max_value = 0;
+
+ switch(field->key_type()) {
+ /* TINY */
+ case HA_KEYTYPE_BINARY:
+ max_value = 0xFFULL;
+ break;
+ case HA_KEYTYPE_INT8:
+ max_value = 0x7FULL;
+ break;
+ /* SHORT */
+ case HA_KEYTYPE_USHORT_INT:
+ max_value = 0xFFFFULL;
+ break;
+ case HA_KEYTYPE_SHORT_INT:
+ max_value = 0x7FFFULL;
+ break;
+ /* MEDIUM */
+ case HA_KEYTYPE_UINT24:
+ max_value = 0xFFFFFFULL;
+ break;
+ case HA_KEYTYPE_INT24:
+ max_value = 0x7FFFFFULL;
+ break;
+ /* LONG */
+ case HA_KEYTYPE_ULONG_INT:
+ max_value = 0xFFFFFFFFULL;
+ break;
+ case HA_KEYTYPE_LONG_INT:
+ max_value = 0x7FFFFFFFULL;
+ break;
+ /* BIG */
+ case HA_KEYTYPE_ULONGLONG:
+ max_value = 0xFFFFFFFFFFFFFFFFULL;
+ break;
+ case HA_KEYTYPE_LONGLONG:
+ max_value = 0x7FFFFFFFFFFFFFFFULL;
+ break;
+ case HA_KEYTYPE_FLOAT:
+ /* We use the maximum as per IEEE754-2008 standard, 2^24 */
+ max_value = 0x1000000ULL;
+ break;
+ case HA_KEYTYPE_DOUBLE:
+ /* We use the maximum as per IEEE754-2008 standard, 2^53 */
+ max_value = 0x20000000000000ULL;
+ break;
+ default:
+ ut_error;
+ }
+
+ return(max_value);
+}
+
+/*******************************************************************//**
+This function checks whether the index column information
+is consistent between KEY info from mysql and that from innodb index.
+@return TRUE if all column types match. */
+static
+ibool
+innobase_match_index_columns(
+/*=========================*/
+ const KEY* key_info, /*!< in: Index info
+ from mysql */
+ const dict_index_t* index_info) /*!< in: Index info
+ from Innodb */
+{
+ const KEY_PART_INFO* key_part;
+ const KEY_PART_INFO* key_end;
+ const dict_field_t* innodb_idx_fld;
+ const dict_field_t* innodb_idx_fld_end;
+
+ DBUG_ENTER("innobase_match_index_columns");
+
+ /* Check whether user defined index column count matches */
+ if (key_info->key_parts != index_info->n_user_defined_cols) {
+ DBUG_RETURN(FALSE);
+ }
+
+ key_part = key_info->key_part;
+ key_end = key_part + key_info->key_parts;
+ innodb_idx_fld = index_info->fields;
+ innodb_idx_fld_end = index_info->fields + index_info->n_fields;
+
+ /* Check each index column's datatype. We do not check
+ column name because there exists case that index
+ column name got modified in mysql but such change does not
+ propagate to InnoDB.
+ One hidden assumption here is that the index column sequences
+ are matched up between those in mysql and Innodb. */
+ for (; key_part != key_end; ++key_part) {
+ ulint col_type;
+ ibool is_unsigned;
+ ulint mtype = innodb_idx_fld->col->mtype;
+
+ /* Need to translate to InnoDB column type before
+ comparison. */
+ col_type = get_innobase_type_from_mysql_type(&is_unsigned,
+ key_part->field);
+
+ /* Ignore Innodb specific system columns. */
+ while (mtype == DATA_SYS) {
+ innodb_idx_fld++;
+
+ if (innodb_idx_fld >= innodb_idx_fld_end) {
+ DBUG_RETURN(FALSE);
+ }
+ }
+
+ if (col_type != mtype) {
+ /* Column Type mismatches */
+ DBUG_RETURN(FALSE);
+ }
+
+ innodb_idx_fld++;
+ }
+
+ DBUG_RETURN(TRUE);
+}
+
+/*******************************************************************//**
+This function builds a translation table in INNOBASE_SHARE
+structure for fast index location with mysql array number from its
+table->key_info structure. This also provides the necessary translation
+between the key order in mysql key_info and Innodb ib_table->indexes if
+they are not fully matched with each other.
+Note we do not have any mutex protecting the translation table
+building based on the assumption that there is no concurrent
+index creation/drop and DMLs that requires index lookup. All table
+handle will be closed before the index creation/drop.
+@return TRUE if index translation table built successfully */
+static
+ibool
+innobase_build_index_translation(
+/*=============================*/
+ const TABLE* table, /*!< in: table in MySQL data
+ dictionary */
+ dict_table_t* ib_table, /*!< in: table in Innodb data
+ dictionary */
+ INNOBASE_SHARE* share) /*!< in/out: share structure
+ where index translation table
+ will be constructed in. */
+{
+ ulint mysql_num_index;
+ ulint ib_num_index;
+ dict_index_t** index_mapping;
+ ibool ret = TRUE;
+
+ DBUG_ENTER("innobase_build_index_translation");
+
+ mysql_num_index = table->s->keys;
+ ib_num_index = UT_LIST_GET_LEN(ib_table->indexes);
+
+ index_mapping = share->idx_trans_tbl.index_mapping;
+
+ /* If there exists inconsistency between MySQL and InnoDB dictionary
+ (metadata) information, the number of index defined in MySQL
+ could exceed that in InnoDB, do not build index translation
+ table in such case */
+ if (UNIV_UNLIKELY(ib_num_index < mysql_num_index)) {
+ ret = FALSE;
+ goto func_exit;
+ }
+
+ /* If index entry count is non-zero, nothing has
+ changed since last update, directly return TRUE */
+ if (share->idx_trans_tbl.index_count) {
+ /* Index entry count should still match mysql_num_index */
+ ut_a(share->idx_trans_tbl.index_count == mysql_num_index);
+ goto func_exit;
+ }
+
+ /* The number of index increased, rebuild the mapping table */
+ if (mysql_num_index > share->idx_trans_tbl.array_size) {
+ index_mapping = (dict_index_t**) my_realloc(index_mapping,
+ mysql_num_index *
+ sizeof(*index_mapping),
+ MYF(MY_ALLOW_ZERO_PTR));
+
+ if (!index_mapping) {
+ ret = FALSE;
+ goto func_exit;
+ }
+
+ share->idx_trans_tbl.array_size = mysql_num_index;
+ }
+
+
+ /* For each index in the mysql key_info array, fetch its
+ corresponding InnoDB index pointer into index_mapping
+ array. */
+ for (ulint count = 0; count < mysql_num_index; count++) {
+
+ /* Fetch index pointers into index_mapping according to mysql
+ index sequence */
+ index_mapping[count] = dict_table_get_index_on_name(
+ ib_table, table->key_info[count].name);
+
+ if (!index_mapping[count]) {
+ sql_print_error("Cannot find index %s in InnoDB "
+ "index dictionary.",
+ table->key_info[count].name);
+ ret = FALSE;
+ goto func_exit;
+ }
+
+ /* Double check fetched index has the same
+ column info as those in mysql key_info. */
+ if (!innobase_match_index_columns(&table->key_info[count],
+ index_mapping[count])) {
+ sql_print_error("Found index %s whose column info "
+ "does not match that of MySQL.",
+ table->key_info[count].name);
+ ret = FALSE;
+ goto func_exit;
+ }
+ }
+
+ /* Successfully built the translation table */
+ share->idx_trans_tbl.index_count = mysql_num_index;
+
+func_exit:
+ if (!ret) {
+ /* Build translation table failed. */
+ my_free(index_mapping, MYF(MY_ALLOW_ZERO_PTR));
+
+ share->idx_trans_tbl.array_size = 0;
+ share->idx_trans_tbl.index_count = 0;
+ index_mapping = NULL;
+ }
+
+ share->idx_trans_tbl.index_mapping = index_mapping;
+
+ DBUG_RETURN(ret);
+}
+
+/*******************************************************************//**
+This function uses index translation table to quickly locate the
+requested index structure.
+Note we do not have mutex protection for the index translatoin table
+access, it is based on the assumption that there is no concurrent
+translation table rebuild (fter create/drop index) and DMLs that
+require index lookup.
+@return dict_index_t structure for requested index. NULL if
+fail to locate the index structure. */
+static
+dict_index_t*
+innobase_index_lookup(
+/*==================*/
+ INNOBASE_SHARE* share, /*!< in: share structure for index
+ translation table. */
+ uint keynr) /*!< in: index number for the requested
+ index */
+{
+ if (!share->idx_trans_tbl.index_mapping
+ || keynr >= share->idx_trans_tbl.index_count) {
+ return(NULL);
+ }
+
+ return(share->idx_trans_tbl.index_mapping[keynr]);
+}
+
+/************************************************************************
Set the autoinc column max value. This should only be called once from
-ha_innobase::open(). Therefore there's no need for a covering lock.
-@return DB_SUCCESS or error code */
+ha_innobase::open(). Therefore there's no need for a covering lock. */
UNIV_INTERN
-ulint
+void
ha_innobase::innobase_initialize_autoinc()
/*======================================*/
{
- dict_index_t* index;
ulonglong auto_inc;
- const char* col_name;
- ulint error;
+ const Field* field = table->found_next_number_field;
- col_name = table->found_next_number_field->field_name;
- index = innobase_get_index(table->s->next_number_index);
+ if (field != NULL) {
+ auto_inc = innobase_get_int_col_max_value(field);
+ } else {
+ /* We have no idea what's been passed in to us as the
+ autoinc column. We set it to the 0, effectively disabling
+ updates to the table. */
+ auto_inc = 0;
- /* Execute SELECT MAX(col_name) FROM TABLE; */
- error = row_search_max_autoinc(index, col_name, &auto_inc);
+ ut_print_timestamp(stderr);
+ fprintf(stderr, " InnoDB: Unable to determine the AUTOINC "
+ "column name\n");
+ }
- switch (error) {
- case DB_SUCCESS:
+ if (srv_force_recovery >= SRV_FORCE_NO_IBUF_MERGE) {
+ /* If the recovery level is set so high that writes
+ are disabled we force the AUTOINC counter to 0
+ value effectively disabling writes to the table.
+ Secondly, we avoid reading the table in case the read
+ results in failure due to a corrupted table/index.
+
+ We will not return an error to the client, so that the
+ tables can be dumped with minimal hassle. If an error
+ were returned in this case, the first attempt to read
+ the table would fail and subsequent SELECTs would succeed. */
+ auto_inc = 0;
+ } else if (field == NULL) {
+ /* This is a far more serious error, best to avoid
+ opening the table and return failure. */
+ my_error(ER_AUTOINC_READ_FAILED, MYF(0));
+ } else {
+ dict_index_t* index;
+ const char* col_name;
+ ulonglong read_auto_inc;
+ ulint err;
- /* At the this stage we don't know the increment
- or the offset, so use default inrement of 1. */
- ++auto_inc;
- break;
+ update_thd(ha_thd());
- case DB_RECORD_NOT_FOUND:
- ut_print_timestamp(stderr);
- fprintf(stderr, " InnoDB: MySQL and InnoDB data "
- "dictionaries are out of sync.\n"
- "InnoDB: Unable to find the AUTOINC column %s in the "
- "InnoDB table %s.\n"
- "InnoDB: We set the next AUTOINC column value to the "
- "maximum possible value,\n"
- "InnoDB: in effect disabling the AUTOINC next value "
- "generation.\n"
- "InnoDB: You can either set the next AUTOINC value "
- "explicitly using ALTER TABLE\n"
- "InnoDB: or fix the data dictionary by recreating "
- "the table.\n",
- col_name, index->table->name);
-
- auto_inc = 0xFFFFFFFFFFFFFFFFULL;
- break;
+ ut_a(prebuilt->trx == thd_to_trx(user_thd));
- default:
- return(error);
+ col_name = field->field_name;
+ index = innobase_get_index(table->s->next_number_index);
+
+ /* Execute SELECT MAX(col_name) FROM TABLE; */
+ err = row_search_max_autoinc(index, col_name, &read_auto_inc);
+
+ switch (err) {
+ case DB_SUCCESS:
+ /* At the this stage we do not know the increment
+ or the offset, so use a default increment of 1. */
+ auto_inc = read_auto_inc + 1;
+ break;
+
+ case DB_RECORD_NOT_FOUND:
+ ut_print_timestamp(stderr);
+ fprintf(stderr, " InnoDB: MySQL and InnoDB data "
+ "dictionaries are out of sync.\n"
+ "InnoDB: Unable to find the AUTOINC column "
+ "%s in the InnoDB table %s.\n"
+ "InnoDB: We set the next AUTOINC column "
+ "value to 0,\n"
+ "InnoDB: in effect disabling the AUTOINC "
+ "next value generation.\n"
+ "InnoDB: You can either set the next "
+ "AUTOINC value explicitly using ALTER TABLE\n"
+ "InnoDB: or fix the data dictionary by "
+ "recreating the table.\n",
+ col_name, index->table->name);
+
+ /* This will disable the AUTOINC generation. */
+ auto_inc = 0;
+
+ /* We want the open to succeed, so that the user can
+ take corrective action. ie. reads should succeed but
+ updates should fail. */
+ err = DB_SUCCESS;
+ break;
+ default:
+ /* row_search_max_autoinc() should only return
+ one of DB_SUCCESS or DB_RECORD_NOT_FOUND. */
+ ut_error;
+ }
}
dict_table_autoinc_initialize(prebuilt->table, auto_inc);
-
- return(DB_SUCCESS);
}
/*****************************************************************//**
@@ -3192,6 +3485,11 @@ retry:
primary_key = table->s->primary_key;
key_used_on_scan = primary_key;
+ if (!innobase_build_index_translation(table, ib_table, share)) {
+ sql_print_error("Build InnoDB index translation table for"
+ " Table %s failed", name);
+ }
+
/* Allocate a buffer for a 'row reference'. A row reference is
a string of bytes of length ref_length which uniquely specifies
a row in our table. Note that MySQL may also compare two row
@@ -3199,31 +3497,86 @@ retry:
of length ref_length! */
if (!row_table_got_default_clust_index(ib_table)) {
- if (primary_key >= MAX_KEY) {
- sql_print_error("Table %s has a primary key in InnoDB data "
- "dictionary, but not in MySQL!", name);
- }
prebuilt->clust_index_was_generated = FALSE;
- /* MySQL allocates the buffer for ref. key_info->key_length
- includes space for all key columns + one byte for each column
- that may be NULL. ref_length must be as exact as possible to
- save space, because all row reference buffers are allocated
- based on ref_length. */
+ if (UNIV_UNLIKELY(primary_key >= MAX_KEY)) {
+ sql_print_error("Table %s has a primary key in "
+ "InnoDB data dictionary, but not "
+ "in MySQL!", name);
- ref_length = table->key_info[primary_key].key_length;
+ /* This mismatch could cause further problems
+ if not attended, bring this to the user's attention
+ by printing a warning in addition to log a message
+ in the errorlog */
+ push_warning_printf(thd, MYSQL_ERROR::WARN_LEVEL_WARN,
+ ER_NO_SUCH_INDEX,
+ "InnoDB: Table %s has a "
+ "primary key in InnoDB data "
+ "dictionary, but not in "
+ "MySQL!", name);
+
+ /* If primary_key >= MAX_KEY, its (primary_key)
+ value could be out of bound if continue to index
+ into key_info[] array. Find InnoDB primary index,
+ and assign its key_length to ref_length.
+ In addition, since MySQL indexes are sorted starting
+ with primary index, unique index etc., initialize
+ ref_length to the first index key length in
+ case we fail to find InnoDB cluster index.
+
+ Please note, this will not resolve the primary
+ index mismatch problem, other side effects are
+ possible if users continue to use the table.
+ However, we allow this table to be opened so
+ that user can adopt necessary measures for the
+ mismatch while still being accessible to the table
+ date. */
+ ref_length = table->key_info[0].key_length;
+
+ /* Find correspoinding cluster index
+ key length in MySQL's key_info[] array */
+ for (ulint i = 0; i < table->s->keys; i++) {
+ dict_index_t* index;
+ index = innobase_get_index(i);
+ if (dict_index_is_clust(index)) {
+ ref_length =
+ table->key_info[i].key_length;
+ }
+ }
+ } else {
+ /* MySQL allocates the buffer for ref.
+ key_info->key_length includes space for all key
+ columns + one byte for each column that may be
+ NULL. ref_length must be as exact as possible to
+ save space, because all row reference buffers are
+ allocated based on ref_length. */
+
+ ref_length = table->key_info[primary_key].key_length;
+ }
} else {
if (primary_key != MAX_KEY) {
- sql_print_error("Table %s has no primary key in InnoDB data "
- "dictionary, but has one in MySQL! If you "
- "created the table with a MySQL version < "
- "3.23.54 and did not define a primary key, "
- "but defined a unique key with all non-NULL "
- "columns, then MySQL internally treats that "
- "key as the primary key. You can fix this "
- "error by dump + DROP + CREATE + reimport "
- "of the table.", name);
+ sql_print_error(
+ "Table %s has no primary key in InnoDB data "
+ "dictionary, but has one in MySQL! If you "
+ "created the table with a MySQL version < "
+ "3.23.54 and did not define a primary key, "
+ "but defined a unique key with all non-NULL "
+ "columns, then MySQL internally treats that "
+ "key as the primary key. You can fix this "
+ "error by dump + DROP + CREATE + reimport "
+ "of the table.", name);
+
+ /* This mismatch could cause further problems
+ if not attended, bring this to the user attention
+ by printing a warning in addition to log a message
+ in the errorlog */
+ push_warning_printf(thd, MYSQL_ERROR::WARN_LEVEL_WARN,
+ ER_NO_SUCH_INDEX,
+ "InnoDB: Table %s has no "
+ "primary key in InnoDB data "
+ "dictionary, but has one in "
+ "MySQL!", name);
}
prebuilt->clust_index_was_generated = TRUE;
@@ -3265,8 +3618,6 @@ retry:
/* Only if the table has an AUTOINC column. */
if (prebuilt->table != NULL && table->found_next_number_field != NULL) {
- ulint error;
-
dict_table_autoinc_lock(prebuilt->table);
/* Since a table can already be "open" in InnoDB's internal
@@ -3275,8 +3626,7 @@ retry:
autoinc value from a previous MySQL open. */
if (dict_table_autoinc_read(prebuilt->table) == 0) {
- error = innobase_initialize_autoinc();
- ut_a(error == DB_SUCCESS);
+ innobase_initialize_autoinc();
}
dict_table_autoinc_unlock(prebuilt->table);
@@ -4093,67 +4443,6 @@ skip_field:
}
/********************************************************************//**
-Get the upper limit of the MySQL integral and floating-point type. */
-UNIV_INTERN
-ulonglong
-ha_innobase::innobase_get_int_col_max_value(
-/*========================================*/
- const Field* field)
-{
- ulonglong max_value = 0;
-
- switch(field->key_type()) {
- /* TINY */
- case HA_KEYTYPE_BINARY:
- max_value = 0xFFULL;
- break;
- case HA_KEYTYPE_INT8:
- max_value = 0x7FULL;
- break;
- /* SHORT */
- case HA_KEYTYPE_USHORT_INT:
- max_value = 0xFFFFULL;
- break;
- case HA_KEYTYPE_SHORT_INT:
- max_value = 0x7FFFULL;
- break;
- /* MEDIUM */
- case HA_KEYTYPE_UINT24:
- max_value = 0xFFFFFFULL;
- break;
- case HA_KEYTYPE_INT24:
- max_value = 0x7FFFFFULL;
- break;
- /* LONG */
- case HA_KEYTYPE_ULONG_INT:
- max_value = 0xFFFFFFFFULL;
- break;
- case HA_KEYTYPE_LONG_INT:
- max_value = 0x7FFFFFFFULL;
- break;
- /* BIG */
- case HA_KEYTYPE_ULONGLONG:
- max_value = 0xFFFFFFFFFFFFFFFFULL;
- break;
- case HA_KEYTYPE_LONGLONG:
- max_value = 0x7FFFFFFFFFFFFFFFULL;
- break;
- case HA_KEYTYPE_FLOAT:
- /* We use the maximum as per IEEE754-2008 standard, 2^24 */
- max_value = 0x1000000ULL;
- break;
- case HA_KEYTYPE_DOUBLE:
- /* We use the maximum as per IEEE754-2008 standard, 2^53 */
- max_value = 0x20000000000000ULL;
- break;
- default:
- ut_error;
- }
-
- return(max_value);
-}
-
-/********************************************************************//**
This special handling is really to overcome the limitations of MySQL's
binlogging. We need to eliminate the non-determinism that will arise in
INSERT ... SELECT type of statements, since MySQL binlog only stores the
@@ -4378,11 +4667,17 @@ no_commit:
prebuilt->autoinc_error = DB_SUCCESS;
if ((error = update_auto_increment())) {
-
/* We don't want to mask autoinc overflow errors. */
- if (prebuilt->autoinc_error != DB_SUCCESS) {
- error = (int) prebuilt->autoinc_error;
+ /* Handle the case where the AUTOINC sub-system
+ failed during initialization. */
+ if (prebuilt->autoinc_error == DB_UNSUPPORTED) {
+ error_result = ER_AUTOINC_READ_FAILED;
+ /* Set the error message to report too. */
+ my_error(ER_AUTOINC_READ_FAILED, MYF(0));
+ goto func_exit;
+ } else if (prebuilt->autoinc_error != DB_SUCCESS) {
+ error = (int) prebuilt->autoinc_error;
goto report_error;
}
@@ -4463,24 +4758,29 @@ no_commit:
update the table upper limit. Note: last_value
will be 0 if get_auto_increment() was not called.*/
- if (auto_inc <= col_max_value
- && auto_inc >= prebuilt->autoinc_last_value) {
+ if (auto_inc >= prebuilt->autoinc_last_value) {
set_max_autoinc:
- ut_a(prebuilt->autoinc_increment > 0);
+ /* This should filter out the negative
+ values set explicitly by the user. */
+ if (auto_inc <= col_max_value) {
+ ut_a(prebuilt->autoinc_increment > 0);
- ulonglong need;
- ulonglong offset;
+ ulonglong need;
+ ulonglong offset;
- offset = prebuilt->autoinc_offset;
- need = prebuilt->autoinc_increment;
+ offset = prebuilt->autoinc_offset;
+ need = prebuilt->autoinc_increment;
- auto_inc = innobase_next_autoinc(
- auto_inc, need, offset, col_max_value);
+ auto_inc = innobase_next_autoinc(
+ auto_inc,
+ need, offset, col_max_value);
- err = innobase_set_max_autoinc(auto_inc);
+ err = innobase_set_max_autoinc(
+ auto_inc);
- if (err != DB_SUCCESS) {
- error = err;
+ if (err != DB_SUCCESS) {
+ error = err;
+ }
}
}
break;
@@ -5160,14 +5460,30 @@ ha_innobase::innobase_get_index(
DBUG_ENTER("innobase_get_index");
ha_statistic_increment(&SSV::ha_read_key_count);
- ut_ad(user_thd == ha_thd());
- ut_a(prebuilt->trx == thd_to_trx(user_thd));
-
if (keynr != MAX_KEY && table->s->keys > 0) {
key = table->key_info + keynr;
- index = dict_table_get_index_on_name(prebuilt->table,
- key->name);
+ index = innobase_index_lookup(share, keynr);
+
+ if (index) {
+ ut_a(ut_strcmp(index->name, key->name) == 0);
+ } else {
+ /* Can't find index with keynr in the translation
+ table. Only print message if the index translation
+ table exists */
+ if (share->idx_trans_tbl.index_mapping) {
+ sql_print_error("InnoDB could not find "
+ "index %s key no %u for "
+ "table %s through its "
+ "index translation table",
+ key ? key->name : "NULL",
+ keynr,
+ prebuilt->table->name);
+ }
+
+ index = dict_table_get_index_on_name(prebuilt->table,
+ key->name);
+ }
} else {
index = dict_table_get_first_index(prebuilt->table);
}
@@ -5228,7 +5544,7 @@ ha_innobase::change_active_index(
dtuple_set_n_fields(prebuilt->search_tuple, prebuilt->index->n_fields);
dict_index_copy_types(prebuilt->search_tuple, prebuilt->index,
- prebuilt->index->n_fields);
+ prebuilt->index->n_fields);
/* MySQL changes the active index for a handle also during some
queries, for example SELECT MAX(a), SUM(a) first retrieves the MAX()
@@ -5729,9 +6045,11 @@ create_table_def(
if (error == DB_DUPLICATE_KEY) {
char buf[100];
- innobase_convert_identifier(buf, sizeof buf,
- table_name, strlen(table_name),
- trx->mysql_thd, TRUE);
+ char* buf_end = innobase_convert_identifier(
+ buf, sizeof buf - 1, table_name, strlen(table_name),
+ trx->mysql_thd, TRUE);
+
+ *buf_end = '\0';
my_error(ER_TABLE_EXISTS_ERROR, MYF(0), buf);
}
@@ -6333,6 +6651,10 @@ ha_innobase::create(
goto cleanup;
}
+ if (create_info->options & HA_LEX_CREATE_TMP_TABLE) {
+ flags |= DICT_TF2_TEMPORARY << DICT_TF2_SHIFT;
+ }
+
error = create_table_def(trx, form, norm_name,
create_info->options & HA_LEX_CREATE_TMP_TABLE ? name2 : NULL,
flags);
@@ -6854,10 +7176,15 @@ ha_innobase::records_in_range(
key = table->key_info + active_index;
- index = dict_table_get_index_on_name(prebuilt->table, key->name);
+ index = innobase_get_index(keynr);
- /* MySQL knows about this index and so we must be able to find it.*/
- ut_a(index);
+ /* There exists possibility of not being able to find requested
+ index due to inconsistency between MySQL and InoDB dictionary info.
+ Necessary message should have been printed in innobase_get_index() */
+ if (UNIV_UNLIKELY(!index)) {
+ n_rows = HA_POS_ERROR;
+ goto func_exit;
+ }
heap = mem_heap_create(2 * (key->key_parts * sizeof(dfield_t)
+ sizeof(dtuple_t)));
@@ -6902,6 +7229,7 @@ ha_innobase::records_in_range(
mem_heap_free(heap);
+func_exit:
my_free(key_val_buff2, MYF(0));
prebuilt->trx->op_info = (char*)"";
@@ -7043,6 +7371,7 @@ ha_innobase::info(
char path[FN_REFLEN];
os_file_stat_t stat_info;
+
DBUG_ENTER("info");
/* If we are forcing recovery at a high level, we will suppress
@@ -7203,13 +7532,29 @@ ha_innobase::info(
}
if (flag & HA_STATUS_CONST) {
- index = dict_table_get_first_index(ib_table);
-
- if (prebuilt->clust_index_was_generated) {
- index = dict_table_get_next_index(index);
+ /* Verify the number of index in InnoDB and MySQL
+ matches up. If prebuilt->clust_index_was_generated
+ holds, InnoDB defines GEN_CLUST_INDEX internally */
+ ulint num_innodb_index = UT_LIST_GET_LEN(ib_table->indexes)
+ - prebuilt->clust_index_was_generated;
+
+ if (table->s->keys != num_innodb_index) {
+ sql_print_error("Table %s contains %lu "
+ "indexes inside InnoDB, which "
+ "is different from the number of "
+ "indexes %u defined in the MySQL ",
+ ib_table->name, num_innodb_index,
+ table->s->keys);
}
for (i = 0; i < table->s->keys; i++) {
+ /* We could get index quickly through internal
+ index mapping with the index translation table.
+ The identity of index (match up index name with
+ that of table->key_info[i]) is already verified in
+ innobase_get_index(). */
+ index = innobase_get_index(i);
+
if (index == NULL) {
sql_print_error("Table %s contains fewer "
"indexes inside InnoDB than "
@@ -7261,8 +7606,6 @@ ha_innobase::info(
rec_per_key >= ~(ulong) 0 ? ~(ulong) 0 :
(ulong) rec_per_key;
}
-
- index = dict_table_get_next_index(index);
}
}
@@ -7340,8 +7683,13 @@ ha_innobase::check(
HA_CHECK_OPT* check_opt) /*!< in: check options, currently
ignored */
{
- ulint ret;
+ dict_index_t* index;
+ ulint n_rows;
+ ulint n_rows_in_table = ULINT_UNDEFINED;
+ ibool is_ok = TRUE;
+ ulint old_isolation_level;
+ DBUG_ENTER("ha_innobase::check");
DBUG_ASSERT(thd == ha_thd());
ut_a(prebuilt->trx);
ut_a(prebuilt->trx->magic_n == TRX_MAGIC_N);
@@ -7354,17 +7702,140 @@ ha_innobase::check(
build_template(prebuilt, NULL, table, ROW_MYSQL_WHOLE_ROW);
}
- ret = row_check_table_for_mysql(prebuilt);
+ if (prebuilt->table->ibd_file_missing) {
+ sql_print_error("InnoDB: Error:\n"
+ "InnoDB: MySQL is trying to use a table handle"
+ " but the .ibd file for\n"
+ "InnoDB: table %s does not exist.\n"
+ "InnoDB: Have you deleted the .ibd file"
+ " from the database directory under\n"
+ "InnoDB: the MySQL datadir, or have you"
+ " used DISCARD TABLESPACE?\n"
+ "InnoDB: Please refer to\n"
+ "InnoDB: " REFMAN "innodb-troubleshooting.html\n"
+ "InnoDB: how you can resolve the problem.\n",
+ prebuilt->table->name);
+ DBUG_RETURN(HA_ADMIN_CORRUPT);
+ }
+
+ prebuilt->trx->op_info = "checking table";
- switch (ret) {
- case DB_SUCCESS:
- return(HA_ADMIN_OK);
- case DB_INTERRUPTED:
+ old_isolation_level = prebuilt->trx->isolation_level;
+
+ /* We must run the index record counts at an isolation level
+ >= READ COMMITTED, because a dirty read can see a wrong number
+ of records in some index; to play safe, we use always
+ REPEATABLE READ here */
+
+ prebuilt->trx->isolation_level = TRX_ISO_REPEATABLE_READ;
+
+ /* Enlarge the fatal lock wait timeout during CHECK TABLE. */
+ mutex_enter(&kernel_mutex);
+ srv_fatal_semaphore_wait_threshold += 7200; /* 2 hours */
+ mutex_exit(&kernel_mutex);
+
+ for (index = dict_table_get_first_index(prebuilt->table);
+ index != NULL;
+ index = dict_table_get_next_index(index)) {
+#if 0
+ fputs("Validating index ", stderr);
+ ut_print_name(stderr, trx, FALSE, index->name);
+ putc('\n', stderr);
+#endif
+
+ if (!btr_validate_index(index, prebuilt->trx)) {
+ is_ok = FALSE;
+ push_warning_printf(thd, MYSQL_ERROR::WARN_LEVEL_WARN,
+ ER_NOT_KEYFILE,
+ "InnoDB: The B-tree of"
+ " index '%-.200s' is corrupted.",
+ index->name);
+ continue;
+ }
+
+ /* Instead of invoking change_active_index(), set up
+ a dummy template for non-locking reads, disabling
+ access to the clustered index. */
+ prebuilt->index = index;
+
+ prebuilt->index_usable = row_merge_is_index_usable(
+ prebuilt->trx, prebuilt->index);
+
+ if (UNIV_UNLIKELY(!prebuilt->index_usable)) {
+ push_warning_printf(thd, MYSQL_ERROR::WARN_LEVEL_WARN,
+ HA_ERR_TABLE_DEF_CHANGED,
+ "InnoDB: Insufficient history for"
+ " index '%-.200s'",
+ index->name);
+ continue;
+ }
+
+ prebuilt->sql_stat_start = TRUE;
+ prebuilt->template_type = ROW_MYSQL_DUMMY_TEMPLATE;
+ prebuilt->n_template = 0;
+ prebuilt->need_to_access_clustered = FALSE;
+
+ dtuple_set_n_fields(prebuilt->search_tuple, 0);
+
+ prebuilt->select_lock_type = LOCK_NONE;
+
+ if (!row_check_index_for_mysql(prebuilt, index, &n_rows)) {
+ push_warning_printf(thd, MYSQL_ERROR::WARN_LEVEL_WARN,
+ ER_NOT_KEYFILE,
+ "InnoDB: The B-tree of"
+ " index '%-.200s' is corrupted.",
+ index->name);
+ is_ok = FALSE;
+ }
+
+ if (thd_killed(user_thd)) {
+ break;
+ }
+
+#if 0
+ fprintf(stderr, "%lu entries in index %s\n", n_rows,
+ index->name);
+#endif
+
+ if (index == dict_table_get_first_index(prebuilt->table)) {
+ n_rows_in_table = n_rows;
+ } else if (n_rows != n_rows_in_table) {
+ push_warning_printf(thd, MYSQL_ERROR::WARN_LEVEL_WARN,
+ ER_NOT_KEYFILE,
+ "InnoDB: Index '%-.200s'"
+ " contains %lu entries,"
+ " should be %lu.",
+ index->name,
+ (ulong) n_rows,
+ (ulong) n_rows_in_table);
+ is_ok = FALSE;
+ }
+ }
+
+ /* Restore the original isolation level */
+ prebuilt->trx->isolation_level = old_isolation_level;
+
+ /* We validate also the whole adaptive hash index for all tables
+ at every CHECK TABLE */
+
+ if (!btr_search_validate()) {
+ push_warning(thd, MYSQL_ERROR::WARN_LEVEL_WARN,
+ ER_NOT_KEYFILE,
+ "InnoDB: The adaptive hash index is corrupted.");
+ is_ok = FALSE;
+ }
+
+ /* Restore the fatal lock wait timeout after CHECK TABLE. */
+ mutex_enter(&kernel_mutex);
+ srv_fatal_semaphore_wait_threshold -= 7200; /* 2 hours */
+ mutex_exit(&kernel_mutex);
+
+ prebuilt->trx->op_info = "";
+ if (thd_killed(user_thd)) {
my_error(ER_QUERY_INTERRUPTED, MYF(0));
- return(-1);
- default:
- return(HA_ADMIN_CORRUPT);
}
+
+ DBUG_RETURN(is_ok ? HA_ADMIN_OK : HA_ADMIN_CORRUPT);
}
/*************************************************************//**
@@ -8187,8 +8658,8 @@ innodb_show_status(
mutex_enter(&srv_monitor_file_mutex);
rewind(srv_monitor_file);
- srv_printf_innodb_monitor(srv_monitor_file,
- &trx_list_start, &trx_list_end);
+ srv_printf_innodb_monitor(srv_monitor_file, FALSE,
+ &trx_list_start, &trx_list_end);
flen = ftell(srv_monitor_file);
os_file_set_eof(srv_monitor_file);
@@ -8245,19 +8716,25 @@ innodb_show_status(
}
/************************************************************************//**
-Implements the SHOW MUTEX STATUS command. . */
+Implements the SHOW MUTEX STATUS command.
+@return TRUE on failure, FALSE on success. */
static
bool
innodb_mutex_show_status(
/*=====================*/
- handlerton* hton, /*!< in: the innodb handlerton */
+ handlerton* hton, /*!< in: the innodb handlerton */
THD* thd, /*!< in: the MySQL query thread of the
caller */
- stat_print_fn* stat_print)
+ stat_print_fn* stat_print) /*!< in: function for printing
+ statistics */
{
char buf1[IO_SIZE], buf2[IO_SIZE];
mutex_t* mutex;
rw_lock_t* lock;
+ ulint block_mutex_oswait_count = 0;
+ ulint block_lock_oswait_count = 0;
+ mutex_t* block_mutex = NULL;
+ rw_lock_t* block_lock = NULL;
#ifdef UNIV_DEBUG
ulint rw_lock_count= 0;
ulint rw_lock_count_spin_loop= 0;
@@ -8272,12 +8749,16 @@ innodb_mutex_show_status(
mutex_enter(&mutex_list_mutex);
- mutex = UT_LIST_GET_FIRST(mutex_list);
+ for (mutex = UT_LIST_GET_FIRST(mutex_list); mutex != NULL;
+ mutex = UT_LIST_GET_NEXT(list, mutex)) {
+ if (mutex->count_os_wait == 0) {
+ continue;
+ }
- while (mutex != NULL) {
- if (mutex->count_os_wait == 0
- || buf_pool_is_block_mutex(mutex)) {
- goto next_mutex;
+ if (buf_pool_is_block_mutex(mutex)) {
+ block_mutex = mutex;
+ block_mutex_oswait_count += mutex->count_os_wait;
+ continue;
}
#ifdef UNIV_DEBUG
if (mutex->mutex_type != 1) {
@@ -8304,8 +8785,7 @@ innodb_mutex_show_status(
DBUG_RETURN(1);
}
}
- }
- else {
+ } else {
rw_lock_count += mutex->count_using;
rw_lock_count_spin_loop += mutex->count_spin_loop;
rw_lock_count_spin_rounds += mutex->count_spin_rounds;
@@ -8317,7 +8797,7 @@ innodb_mutex_show_status(
buf1len= (uint) my_snprintf(buf1, sizeof(buf1), "%s:%lu",
mutex->cfile_name, (ulong) mutex->cline);
buf2len= (uint) my_snprintf(buf2, sizeof(buf2), "os_waits=%lu",
- mutex->count_os_wait);
+ (ulong) mutex->count_os_wait);
if (stat_print(thd, innobase_hton_name,
hton_name_len, buf1, buf1len,
@@ -8326,45 +8806,83 @@ innodb_mutex_show_status(
DBUG_RETURN(1);
}
#endif /* UNIV_DEBUG */
+ }
+
+ if (block_mutex) {
+ buf1len = (uint) my_snprintf(buf1, sizeof buf1,
+ "combined %s:%lu",
+ block_mutex->cfile_name,
+ (ulong) block_mutex->cline);
+ buf2len = (uint) my_snprintf(buf2, sizeof buf2,
+ "os_waits=%lu",
+ (ulong) block_mutex_oswait_count);
-next_mutex:
- mutex = UT_LIST_GET_NEXT(list, mutex);
+ if (stat_print(thd, innobase_hton_name,
+ hton_name_len, buf1, buf1len,
+ buf2, buf2len)) {
+ mutex_exit(&mutex_list_mutex);
+ DBUG_RETURN(1);
+ }
}
mutex_exit(&mutex_list_mutex);
mutex_enter(&rw_lock_list_mutex);
- lock = UT_LIST_GET_FIRST(rw_lock_list);
-
- while (lock != NULL) {
- if (lock->count_os_wait
- && !buf_pool_is_block_lock(lock)) {
- buf1len= my_snprintf(buf1, sizeof(buf1), "%s:%lu",
- lock->cfile_name, (ulong) lock->cline);
- buf2len= my_snprintf(buf2, sizeof(buf2),
- "os_waits=%lu", lock->count_os_wait);
-
- if (stat_print(thd, innobase_hton_name,
- hton_name_len, buf1, buf1len,
- buf2, buf2len)) {
- mutex_exit(&rw_lock_list_mutex);
- DBUG_RETURN(1);
- }
+ for (lock = UT_LIST_GET_FIRST(rw_lock_list); lock != NULL;
+ lock = UT_LIST_GET_NEXT(list, lock)) {
+ if (lock->count_os_wait == 0) {
+ continue;
+ }
+
+ if (buf_pool_is_block_lock(lock)) {
+ block_lock = lock;
+ block_lock_oswait_count += lock->count_os_wait;
+ continue;
+ }
+
+ buf1len = my_snprintf(buf1, sizeof buf1, "%s:%lu",
+ lock->cfile_name, (ulong) lock->cline);
+ buf2len = my_snprintf(buf2, sizeof buf2, "os_waits=%lu",
+ (ulong) lock->count_os_wait);
+
+ if (stat_print(thd, innobase_hton_name,
+ hton_name_len, buf1, buf1len,
+ buf2, buf2len)) {
+ mutex_exit(&rw_lock_list_mutex);
+ DBUG_RETURN(1);
+ }
+ }
+
+ if (block_lock) {
+ buf1len = (uint) my_snprintf(buf1, sizeof buf1,
+ "combined %s:%lu",
+ block_lock->cfile_name,
+ (ulong) block_lock->cline);
+ buf2len = (uint) my_snprintf(buf2, sizeof buf2,
+ "os_waits=%lu",
+ (ulong) block_lock_oswait_count);
+
+ if (stat_print(thd, innobase_hton_name,
+ hton_name_len, buf1, buf1len,
+ buf2, buf2len)) {
+ mutex_exit(&rw_lock_list_mutex);
+ DBUG_RETURN(1);
}
- lock = UT_LIST_GET_NEXT(list, lock);
}
mutex_exit(&rw_lock_list_mutex);
#ifdef UNIV_DEBUG
- buf2len= my_snprintf(buf2, sizeof(buf2),
- "count=%lu, spin_waits=%lu, spin_rounds=%lu, "
- "os_waits=%lu, os_yields=%lu, os_wait_times=%lu",
- rw_lock_count, rw_lock_count_spin_loop,
- rw_lock_count_spin_rounds,
- rw_lock_count_os_wait, rw_lock_count_os_yield,
- (ulong) (rw_lock_wait_time/1000));
+ buf2len = my_snprintf(buf2, sizeof buf2,
+ "count=%lu, spin_waits=%lu, spin_rounds=%lu, "
+ "os_waits=%lu, os_yields=%lu, os_wait_times=%lu",
+ (ulong) rw_lock_count,
+ (ulong) rw_lock_count_spin_loop,
+ (ulong) rw_lock_count_spin_rounds,
+ (ulong) rw_lock_count_os_wait,
+ (ulong) rw_lock_count_os_yield,
+ (ulong) (rw_lock_wait_time / 1000));
if (stat_print(thd, innobase_hton_name, hton_name_len,
STRING_WITH_LEN("rw_lock_mutexes"), buf2, buf2len)) {
@@ -8426,6 +8944,11 @@ static INNOBASE_SHARE* get_share(const char* table_name)
innobase_open_tables, fold, share);
thr_lock_init(&share->lock);
+
+ /* Index translation table initialization */
+ share->idx_trans_tbl.index_mapping = NULL;
+ share->idx_trans_tbl.index_count = 0;
+ share->idx_trans_tbl.array_size = 0;
}
share->use_count++;
@@ -8456,6 +8979,11 @@ static void free_share(INNOBASE_SHARE* share)
HASH_DELETE(INNOBASE_SHARE, table_name_hash,
innobase_open_tables, fold, share);
thr_lock_delete(&share->lock);
+
+ /* Free any memory from index translation table */
+ my_free(share->idx_trans_tbl.index_mapping,
+ MYF(MY_ALLOW_ZERO_PTR));
+
my_free(share, MYF(0));
/* TODO: invoke HASH_MIGRATE if innobase_open_tables
@@ -8690,7 +9218,10 @@ ha_innobase::innobase_get_autoinc(
*value = dict_table_autoinc_read(prebuilt->table);
/* It should have been initialized during open. */
- ut_a(*value != 0);
+ if (*value == 0) {
+ prebuilt->autoinc_error = DB_UNSUPPORTED;
+ dict_table_autoinc_unlock(prebuilt->table);
+ }
}
return(prebuilt->autoinc_error);
@@ -8770,6 +9301,11 @@ ha_innobase::get_auto_increment(
invoking this method. So we are not sure if it's guaranteed to
be 0 or not. */
+ /* We need the upper limit of the col type to check for
+ whether we update the table autoinc counter or not. */
+ ulonglong col_max_value = innobase_get_int_col_max_value(
+ table->next_number_field);
+
/* Called for the first time ? */
if (trx->n_autoinc_rows == 0) {
@@ -8786,6 +9322,11 @@ ha_innobase::get_auto_increment(
/* Not in the middle of a mult-row INSERT. */
} else if (prebuilt->autoinc_last_value == 0) {
set_if_bigger(*first_value, autoinc);
+ /* Check for -ve values. */
+ } else if (*first_value > col_max_value && trx->n_autoinc_rows > 0) {
+ /* Set to next logical value. */
+ ut_a(autoinc > trx->n_autoinc_rows);
+ *first_value = (autoinc - trx->n_autoinc_rows) - 1;
}
*nb_reserved_values = trx->n_autoinc_rows;
@@ -8796,12 +9337,6 @@ ha_innobase::get_auto_increment(
ulonglong need;
ulonglong current;
ulonglong next_value;
- ulonglong col_max_value;
-
- /* We need the upper limit of the col type to check for
- whether we update the table autoinc counter or not. */
- col_max_value = innobase_get_int_col_max_value(
- table->next_number_field);
current = *first_value > col_max_value ? autoinc : *first_value;
need = *nb_reserved_values * increment;
@@ -9298,33 +9833,60 @@ innobase_set_cursor_view(
(cursor_view_t*) curview);
}
+/*******************************************************************//**
+If col_name is not NULL, check whether the named column is being
+renamed in the table. If col_name is not provided, check
+whether any one of columns in the table is being renamed.
+@return true if the column is being renamed */
+static
+bool
+check_column_being_renamed(
+/*=======================*/
+ const TABLE* table, /*!< in: MySQL table */
+ const char* col_name) /*!< in: name of the column */
+{
+ uint k;
+ Field* field;
+
+ for (k = 0; k < table->s->fields; k++) {
+ field = table->field[k];
-/***********************************************************************
-Check whether any of the given columns is being renamed in the table. */
+ if (field->flags & FIELD_IS_RENAMED) {
+
+ /* If col_name is not provided, return
+ if the field is marked as being renamed. */
+ if (!col_name) {
+ return(true);
+ }
+
+ /* If col_name is provided, return only
+ if names match */
+ if (innobase_strcasecmp(field->field_name,
+ col_name) == 0) {
+ return(true);
+ }
+ }
+ }
+
+ return(false);
+}
+
+/*******************************************************************//**
+Check whether any of the given columns is being renamed in the table.
+@return true if any of col_names is being renamed in table */
static
bool
column_is_being_renamed(
/*====================*/
- /* out: true if any of col_names is
- being renamed in table */
- TABLE* table, /* in: MySQL table */
- uint n_cols, /* in: number of columns */
- const char** col_names) /* in: names of the columns */
+ TABLE* table, /*!< in: MySQL table */
+ uint n_cols, /*!< in: number of columns */
+ const char** col_names) /*!< in: names of the columns */
{
uint j;
- uint k;
- Field* field;
- const char* col_name;
for (j = 0; j < n_cols; j++) {
- col_name = col_names[j];
- for (k = 0; k < table->s->fields; k++) {
- field = table->field[k];
- if ((field->flags & FIELD_IS_RENAMED)
- && innobase_strcasecmp(field->field_name,
- col_name) == 0) {
- return(true);
- }
+ if (check_column_being_renamed(table, col_names[j])) {
+ return(true);
}
}
@@ -9408,6 +9970,15 @@ ha_innobase::check_if_incompatible_data(
return(COMPATIBLE_DATA_NO);
}
+ /* For column rename operation, MySQL does not supply enough
+ information (new column name etc.) for InnoDB to make appropriate
+ system metadata change. To avoid system metadata inconsistency,
+ currently we can just request a table rebuild/copy by returning
+ COMPATIBLE_DATA_NO */
+ if (check_column_being_renamed(table, NULL)) {
+ return COMPATIBLE_DATA_NO;
+ }
+
/* Check if a column participating in a foreign key is being renamed.
There is no mechanism for updating InnoDB foreign key definitions. */
if (foreign_key_column_is_being_renamed(prebuilt, table)) {
@@ -10153,13 +10724,13 @@ static MYSQL_SYSVAR_BOOL(use_sys_malloc, srv_use_sys_malloc,
static MYSQL_SYSVAR_STR(change_buffering, innobase_change_buffering,
PLUGIN_VAR_RQCMDARG,
"Buffer changes to reduce random access: "
- "OFF, ON, inserting, deleting, changing, or purging.",
+ "OFF, ON, none, inserts.",
innodb_change_buffering_validate,
innodb_change_buffering_update, NULL);
static MYSQL_SYSVAR_ULONG(read_ahead_threshold, srv_read_ahead_threshold,
PLUGIN_VAR_RQCMDARG,
- "Number of pages that must be accessed sequentially for InnoDB to"
+ "Number of pages that must be accessed sequentially for InnoDB to "
"trigger a readahead.",
NULL, NULL, 56, 0, 64, 0);
diff --git a/storage/innodb_plugin/handler/ha_innodb.h b/storage/innodb_plugin/handler/ha_innodb.h
index 31e88ed8530..8a3e1ccff82 100644
--- a/storage/innodb_plugin/handler/ha_innodb.h
+++ b/storage/innodb_plugin/handler/ha_innodb.h
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 2000, 2009, MySQL AB & Innobase Oy. All Rights Reserved.
+Copyright (c) 2000, 2010, MySQL AB & Innobase Oy. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -27,15 +27,31 @@ Place, Suite 330, Boston, MA 02111-1307 USA
#pragma interface /* gcc class implementation */
#endif
+/* Structure defines translation table between mysql index and innodb
+index structures */
+typedef struct innodb_idx_translate_struct {
+ ulint index_count; /*!< number of valid index entries
+ in the index_mapping array */
+ ulint array_size; /*!< array size of index_mapping */
+ dict_index_t** index_mapping; /*!< index pointer array directly
+ maps to index in Innodb from MySQL
+ array index */
+} innodb_idx_translate_t;
+
+
/** InnoDB table share */
typedef struct st_innobase_share {
- THR_LOCK lock; /*!< MySQL lock protecting
- this structure */
- const char* table_name; /*!< InnoDB table name */
- uint use_count; /*!< reference count,
- incremented in get_share()
- and decremented in free_share() */
- void* table_name_hash;/*!< hash table chain node */
+ THR_LOCK lock; /*!< MySQL lock protecting
+ this structure */
+ const char* table_name; /*!< InnoDB table name */
+ uint use_count; /*!< reference count,
+ incremented in get_share()
+ and decremented in
+ free_share() */
+ void* table_name_hash;/*!< hash table chain node */
+ innodb_idx_translate_t idx_trans_tbl; /*!< index translation
+ table between MySQL and
+ Innodb */
} INNOBASE_SHARE;
@@ -91,9 +107,8 @@ class ha_innobase: public handler
ulint innobase_reset_autoinc(ulonglong auto_inc);
ulint innobase_get_autoinc(ulonglong* value);
ulint innobase_update_autoinc(ulonglong auto_inc);
- ulint innobase_initialize_autoinc();
+ void innobase_initialize_autoinc();
dict_index_t* innobase_get_index(uint keynr);
- ulonglong innobase_get_int_col_max_value(const Field* field);
/* Init values for the class: */
public:
diff --git a/storage/innodb_plugin/handler/handler0alter.cc b/storage/innodb_plugin/handler/handler0alter.cc
index a5008991400..071253d2dae 100644
--- a/storage/innodb_plugin/handler/handler0alter.cc
+++ b/storage/innodb_plugin/handler/handler0alter.cc
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 2005, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 2005, 2010, Innobase Oy. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -229,9 +229,11 @@ static
int
innobase_check_index_keys(
/*======================*/
- const KEY* key_info, /*!< in: Indexes to be created */
- ulint num_of_keys) /*!< in: Number of indexes to
- be created */
+ const KEY* key_info, /*!< in: Indexes to be
+ created */
+ ulint num_of_keys, /*!< in: Number of
+ indexes to be created */
+ const dict_table_t* table) /*!< in: Existing indexes */
{
ulint key_num;
@@ -248,9 +250,22 @@ innobase_check_index_keys(
const KEY& key2 = key_info[i];
if (0 == strcmp(key.name, key2.name)) {
- sql_print_error("InnoDB: key name `%s` appears"
- " twice in CREATE INDEX\n",
- key.name);
+ my_error(ER_WRONG_NAME_FOR_INDEX, MYF(0),
+ key.name);
+
+ return(ER_WRONG_NAME_FOR_INDEX);
+ }
+ }
+
+ /* Check that the same index name does not already exist. */
+
+ for (const dict_index_t* index
+ = dict_table_get_first_index(table);
+ index; index = dict_table_get_next_index(index)) {
+
+ if (0 == strcmp(key.name, index->name)) {
+ my_error(ER_WRONG_NAME_FOR_INDEX, MYF(0),
+ key.name);
return(ER_WRONG_NAME_FOR_INDEX);
}
@@ -258,7 +273,7 @@ innobase_check_index_keys(
/* Check that MySQL does not try to create a column
prefix index field on an inappropriate data type and
- that the same colum does not appear twice in the index. */
+ that the same column does not appear twice in the index. */
for (ulint i = 0; i < key.key_parts; i++) {
const KEY_PART_INFO& key_part1
@@ -289,14 +304,8 @@ innobase_check_index_keys(
}
}
- sql_print_error("InnoDB: MySQL is trying to"
- " create a column prefix"
- " index field on an"
- " inappropriate data type."
- " column `%s`,"
- " index `%s`.\n",
- field->field_name,
- key.name);
+ my_error(ER_WRONG_KEY_COLUMN, MYF(0),
+ field->field_name);
return(ER_WRONG_KEY_COLUMN);
}
@@ -309,11 +318,8 @@ innobase_check_index_keys(
continue;
}
- sql_print_error("InnoDB: column `%s`"
- " is not allowed to occur"
- " twice in index `%s`.\n",
- key_part1.field->field_name,
- key.name);
+ my_error(ER_WRONG_KEY_COLUMN, MYF(0),
+ key_part1.field->field_name);
return(ER_WRONG_KEY_COLUMN);
}
}
@@ -522,12 +528,14 @@ innobase_create_key_def(
key_info->name, "PRIMARY");
/* If there is a UNIQUE INDEX consisting entirely of NOT NULL
- columns, MySQL will treat it as a PRIMARY KEY unless the
- table already has one. */
+ columns and if the index does not contain column prefix(es)
+ (only prefix/part of the column is indexed), MySQL will treat the
+ index as a PRIMARY KEY unless the table already has one. */
if (!new_primary && (key_info->flags & HA_NOSAME)
+ && (!(key_info->flags & HA_KEY_HAS_PART_KEY_SEG))
&& row_table_got_default_clust_index(table)) {
- uint key_part = key_info->key_parts;
+ uint key_part = key_info->key_parts;
new_primary = TRUE;
@@ -656,12 +664,18 @@ ha_innobase::add_index(
innodb_table = indexed_table
= dict_table_get(prebuilt->table->name, FALSE);
+ if (UNIV_UNLIKELY(!innodb_table)) {
+ error = HA_ERR_NO_SUCH_TABLE;
+ goto err_exit;
+ }
+
/* Check if the index name is reserved. */
if (innobase_index_name_is_reserved(trx, key_info, num_of_keys)) {
error = -1;
} else {
/* Check that index keys are sensible */
- error = innobase_check_index_keys(key_info, num_of_keys);
+ error = innobase_check_index_keys(key_info, num_of_keys,
+ innodb_table);
}
if (UNIV_UNLIKELY(error)) {
@@ -764,6 +778,10 @@ err_exit:
ut_ad(error == DB_SUCCESS);
+ /* We will need to rebuild index translation table. Set
+ valid index entry count in the translation table to zero */
+ share->idx_trans_tbl.index_count = 0;
+
/* Commit the data dictionary transaction in order to release
the table locks on the system tables. This means that if
MySQL crashes while creating a new primary key inside
@@ -799,18 +817,6 @@ err_exit:
index, num_of_idx, table);
error_handling:
-#ifdef UNIV_DEBUG
- /* TODO: At the moment we can't handle the following statement
- in our debugging code below:
-
- alter table t drop index b, add index (b);
-
- The fix will have to parse the SQL and note that the index
- being added has the same name as the one being dropped and
- ignore that in the dup index check.*/
- //dict_table_check_for_dup_indexes(prebuilt->table);
-#endif
-
/* After an error, remove all those index definitions from the
dictionary which were defined. */
@@ -822,6 +828,8 @@ error_handling:
row_mysql_lock_data_dictionary(trx);
dict_locked = TRUE;
+ ut_d(dict_table_check_for_dup_indexes(prebuilt->table));
+
if (!new_primary) {
error = row_merge_rename_indexes(trx, indexed_table);
@@ -1198,9 +1206,11 @@ ha_innobase::final_drop_index(
ut_a(!index->to_be_dropped);
}
-#ifdef UNIV_DEBUG
- dict_table_check_for_dup_indexes(prebuilt->table);
-#endif
+ /* We will need to rebuild index translation table. Set
+ valid index entry count in the translation table to zero */
+ share->idx_trans_tbl.index_count = 0;
+
+ ut_d(dict_table_check_for_dup_indexes(prebuilt->table));
func_exit:
trx_commit_for_mysql(trx);
diff --git a/storage/innodb_plugin/ibuf/ibuf0ibuf.c b/storage/innodb_plugin/ibuf/ibuf0ibuf.c
index 08986fac0ef..5e9b4b27611 100644
--- a/storage/innodb_plugin/ibuf/ibuf0ibuf.c
+++ b/storage/innodb_plugin/ibuf/ibuf0ibuf.c
@@ -730,24 +730,41 @@ page containing the descriptor bits for the file page; the bitmap page
is x-latched */
static
page_t*
-ibuf_bitmap_get_map_page(
-/*=====================*/
- ulint space, /*!< in: space id of the file page */
- ulint page_no,/*!< in: page number of the file page */
- ulint zip_size,/*!< in: compressed page size in bytes;
- 0 for uncompressed pages */
- mtr_t* mtr) /*!< in: mtr */
+ibuf_bitmap_get_map_page_func(
+/*==========================*/
+ ulint space, /*!< in: space id of the file page */
+ ulint page_no,/*!< in: page number of the file page */
+ ulint zip_size,/*!< in: compressed page size in bytes;
+ 0 for uncompressed pages */
+ const char* file, /*!< in: file name */
+ ulint line, /*!< in: line where called */
+ mtr_t* mtr) /*!< in: mtr */
{
buf_block_t* block;
- block = buf_page_get(space, zip_size,
- ibuf_bitmap_page_no_calc(zip_size, page_no),
- RW_X_LATCH, mtr);
+ block = buf_page_get_gen(space, zip_size,
+ ibuf_bitmap_page_no_calc(zip_size, page_no),
+ RW_X_LATCH, NULL, BUF_GET,
+ file, line, mtr);
buf_block_dbg_add_level(block, SYNC_IBUF_BITMAP);
return(buf_block_get_frame(block));
}
+/********************************************************************//**
+Gets the ibuf bitmap page where the bits describing a given file page are
+stored.
+@return bitmap page where the file page is mapped, that is, the bitmap
+page containing the descriptor bits for the file page; the bitmap page
+is x-latched
+@param space in: space id of the file page
+@param page_no in: page number of the file page
+@param zip_size in: compressed page size in bytes; 0 for uncompressed pages
+@param mtr in: mini-transaction */
+#define ibuf_bitmap_get_map_page(space, page_no, zip_size, mtr) \
+ ibuf_bitmap_get_map_page_func(space, page_no, zip_size, \
+ __FILE__, __LINE__, mtr)
+
/************************************************************************//**
Sets the free bits of the page in the ibuf bitmap. This is done in a separate
mini-transaction, hence this operation does not restrict further work to only
diff --git a/storage/innodb_plugin/include/btr0btr.h b/storage/innodb_plugin/include/btr0btr.h
index d5c8258513c..5e6a76c7d21 100644
--- a/storage/innodb_plugin/include/btr0btr.h
+++ b/storage/innodb_plugin/include/btr0btr.h
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 1994, 2010, Innobase Oy. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -193,6 +193,10 @@ btr_leaf_page_release(
mtr_t* mtr); /*!< in: mtr */
/**************************************************************//**
Gets the child node file address in a node pointer.
+NOTE: the offsets array must contain all offsets for the record since
+we read the last field according to offsets and assume that it contains
+the child page number. In other words offsets must have been retrieved
+with rec_get_offsets(n_fields=ULINT_UNDEFINED).
@return child node address */
UNIV_INLINE
ulint
@@ -317,12 +321,16 @@ Inserts a data tuple to a tree on a non-leaf level. It is assumed
that mtr holds an x-latch on the tree. */
UNIV_INTERN
void
-btr_insert_on_non_leaf_level(
-/*=========================*/
+btr_insert_on_non_leaf_level_func(
+/*==============================*/
dict_index_t* index, /*!< in: index */
ulint level, /*!< in: level, must be > 0 */
dtuple_t* tuple, /*!< in: the record to be inserted */
+ const char* file, /*!< in: file name */
+ ulint line, /*!< in: line where called */
mtr_t* mtr); /*!< in: mtr */
+# define btr_insert_on_non_leaf_level(i,l,t,m) \
+ btr_insert_on_non_leaf_level_func(i,l,t,__FILE__,__LINE__,m)
#endif /* !UNIV_HOTBACKUP */
/****************************************************************//**
Sets a record as the predefined minimum record. */
diff --git a/storage/innodb_plugin/include/btr0btr.ic b/storage/innodb_plugin/include/btr0btr.ic
index 2259d22c9a6..97944cc2e26 100644
--- a/storage/innodb_plugin/include/btr0btr.ic
+++ b/storage/innodb_plugin/include/btr0btr.ic
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 1994, 2010, Innobase Oy. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -255,6 +255,10 @@ btr_page_set_prev(
/**************************************************************//**
Gets the child node file address in a node pointer.
+NOTE: the offsets array must contain all offsets for the record since
+we read the last field according to offsets and assume that it contains
+the child page number. In other words offsets must have been retrieved
+with rec_get_offsets(n_fields=ULINT_UNDEFINED).
@return child node address */
UNIV_INLINE
ulint
diff --git a/storage/innodb_plugin/include/btr0cur.h b/storage/innodb_plugin/include/btr0cur.h
index 480a3877e54..716f15c4267 100644
--- a/storage/innodb_plugin/include/btr0cur.h
+++ b/storage/innodb_plugin/include/btr0cur.h
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 1994, 2010, Innobase Oy. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -152,29 +152,39 @@ btr_cur_search_to_nth_level(
ulint has_search_latch,/*!< in: latch mode the caller
currently has on btr_search_latch:
RW_S_LATCH, or 0 */
+ const char* file, /*!< in: file name */
+ ulint line, /*!< in: line where called */
mtr_t* mtr); /*!< in: mtr */
/*****************************************************************//**
Opens a cursor at either end of an index. */
UNIV_INTERN
void
-btr_cur_open_at_index_side(
-/*=======================*/
+btr_cur_open_at_index_side_func(
+/*============================*/
ibool from_left, /*!< in: TRUE if open to the low end,
FALSE if to the high end */
dict_index_t* index, /*!< in: index */
ulint latch_mode, /*!< in: latch mode */
btr_cur_t* cursor, /*!< in: cursor */
+ const char* file, /*!< in: file name */
+ ulint line, /*!< in: line where called */
mtr_t* mtr); /*!< in: mtr */
+#define btr_cur_open_at_index_side(f,i,l,c,m) \
+ btr_cur_open_at_index_side_func(f,i,l,c,__FILE__,__LINE__,m)
/**********************************************************************//**
Positions a cursor at a randomly chosen position within a B-tree. */
UNIV_INTERN
void
-btr_cur_open_at_rnd_pos(
-/*====================*/
+btr_cur_open_at_rnd_pos_func(
+/*=========================*/
dict_index_t* index, /*!< in: index */
ulint latch_mode, /*!< in: BTR_SEARCH_LEAF, ... */
btr_cur_t* cursor, /*!< in/out: B-tree cursor */
+ const char* file, /*!< in: file name */
+ ulint line, /*!< in: line where called */
mtr_t* mtr); /*!< in: mtr */
+#define btr_cur_open_at_rnd_pos(i,l,c,m) \
+ btr_cur_open_at_rnd_pos_func(i,l,c,__FILE__,__LINE__,m)
/*************************************************************//**
Tries to perform an insert to a page in an index tree, next to cursor.
It is assumed that mtr holds an x-latch on the page. The operation does
diff --git a/storage/innodb_plugin/include/btr0pcur.h b/storage/innodb_plugin/include/btr0pcur.h
index 12b1375d8b7..2334a266280 100644
--- a/storage/innodb_plugin/include/btr0pcur.h
+++ b/storage/innodb_plugin/include/btr0pcur.h
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 1996, 2010, Innobase Oy. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -82,8 +82,8 @@ Initializes and opens a persistent cursor to an index tree. It should be
closed with btr_pcur_close. */
UNIV_INLINE
void
-btr_pcur_open(
-/*==========*/
+btr_pcur_open_func(
+/*===============*/
dict_index_t* index, /*!< in: index */
const dtuple_t* tuple, /*!< in: tuple on which search done */
ulint mode, /*!< in: PAGE_CUR_L, ...;
@@ -94,14 +94,18 @@ btr_pcur_open(
record! */
ulint latch_mode,/*!< in: BTR_SEARCH_LEAF, ... */
btr_pcur_t* cursor, /*!< in: memory buffer for persistent cursor */
+ const char* file, /*!< in: file name */
+ ulint line, /*!< in: line where called */
mtr_t* mtr); /*!< in: mtr */
+#define btr_pcur_open(i,t,md,l,c,m) \
+ btr_pcur_open_func(i,t,md,l,c,__FILE__,__LINE__,m)
/**************************************************************//**
Opens an persistent cursor to an index tree without initializing the
cursor. */
UNIV_INLINE
void
-btr_pcur_open_with_no_init(
-/*=======================*/
+btr_pcur_open_with_no_init_func(
+/*============================*/
dict_index_t* index, /*!< in: index */
const dtuple_t* tuple, /*!< in: tuple on which search done */
ulint mode, /*!< in: PAGE_CUR_L, ...;
@@ -119,7 +123,12 @@ btr_pcur_open_with_no_init(
ulint has_search_latch,/*!< in: latch mode the caller
currently has on btr_search_latch:
RW_S_LATCH, or 0 */
+ const char* file, /*!< in: file name */
+ ulint line, /*!< in: line where called */
mtr_t* mtr); /*!< in: mtr */
+#define btr_pcur_open_with_no_init(ix,t,md,l,cur,has,m) \
+ btr_pcur_open_with_no_init_func(ix,t,md,l,cur,has,__FILE__,__LINE__,m)
+
/*****************************************************************//**
Opens a persistent cursor at either end of an index. */
UNIV_INLINE
@@ -160,8 +169,8 @@ before first in tree. The latching mode must be BTR_SEARCH_LEAF or
BTR_MODIFY_LEAF. */
UNIV_INTERN
void
-btr_pcur_open_on_user_rec(
-/*======================*/
+btr_pcur_open_on_user_rec_func(
+/*===========================*/
dict_index_t* index, /*!< in: index */
const dtuple_t* tuple, /*!< in: tuple on which search done */
ulint mode, /*!< in: PAGE_CUR_L, ... */
@@ -169,17 +178,25 @@ btr_pcur_open_on_user_rec(
BTR_MODIFY_LEAF */
btr_pcur_t* cursor, /*!< in: memory buffer for persistent
cursor */
+ const char* file, /*!< in: file name */
+ ulint line, /*!< in: line where called */
mtr_t* mtr); /*!< in: mtr */
+#define btr_pcur_open_on_user_rec(i,t,md,l,c,m) \
+ btr_pcur_open_on_user_rec_func(i,t,md,l,c,__FILE__,__LINE__,m)
/**********************************************************************//**
Positions a cursor at a randomly chosen position within a B-tree. */
UNIV_INLINE
void
-btr_pcur_open_at_rnd_pos(
-/*=====================*/
+btr_pcur_open_at_rnd_pos_func(
+/*==========================*/
dict_index_t* index, /*!< in: index */
ulint latch_mode, /*!< in: BTR_SEARCH_LEAF, ... */
btr_pcur_t* cursor, /*!< in/out: B-tree pcur */
+ const char* file, /*!< in: file name */
+ ulint line, /*!< in: line where called */
mtr_t* mtr); /*!< in: mtr */
+#define btr_pcur_open_at_rnd_pos(i,l,c,m) \
+ btr_pcur_open_at_rnd_pos_func(i,l,c,__FILE__,__LINE__,m)
/**************************************************************//**
Frees the possible old_rec_buf buffer of a persistent cursor and sets the
latch mode of the persistent cursor to BTR_NO_LATCHES. */
@@ -218,11 +235,15 @@ record and it can be restored on a user record whose ordering fields
are identical to the ones of the original user record */
UNIV_INTERN
ibool
-btr_pcur_restore_position(
-/*======================*/
+btr_pcur_restore_position_func(
+/*===========================*/
ulint latch_mode, /*!< in: BTR_SEARCH_LEAF, ... */
btr_pcur_t* cursor, /*!< in: detached persistent cursor */
+ const char* file, /*!< in: file name */
+ ulint line, /*!< in: line where called */
mtr_t* mtr); /*!< in: mtr */
+#define btr_pcur_restore_position(l,cur,mtr) \
+ btr_pcur_restore_position_func(l,cur,__FILE__,__LINE__,mtr)
/**************************************************************//**
If the latch mode of the cursor is BTR_LEAF_SEARCH or BTR_LEAF_MODIFY,
releases the page latch and bufferfix reserved by the cursor.
@@ -260,20 +281,13 @@ btr_pcur_get_mtr(
/*=============*/
btr_pcur_t* cursor); /*!< in: persistent cursor */
/**************************************************************//**
-Commits the pcur mtr and sets the pcur latch mode to BTR_NO_LATCHES,
+Commits the mtr and sets the pcur latch mode to BTR_NO_LATCHES,
that is, the cursor becomes detached. If there have been modifications
to the page where pcur is positioned, this can be used instead of
btr_pcur_release_leaf. Function btr_pcur_store_position should be used
before calling this, if restoration of cursor is wanted later. */
UNIV_INLINE
void
-btr_pcur_commit(
-/*============*/
- btr_pcur_t* pcur); /*!< in: persistent cursor */
-/**************************************************************//**
-Differs from btr_pcur_commit in that we can specify the mtr to commit. */
-UNIV_INLINE
-void
btr_pcur_commit_specify_mtr(
/*========================*/
btr_pcur_t* pcur, /*!< in: persistent cursor */
diff --git a/storage/innodb_plugin/include/btr0pcur.ic b/storage/innodb_plugin/include/btr0pcur.ic
index 0ca7223f861..0c38797e6c5 100644
--- a/storage/innodb_plugin/include/btr0pcur.ic
+++ b/storage/innodb_plugin/include/btr0pcur.ic
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 1996, 2010, Innobase Oy. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -395,30 +395,13 @@ btr_pcur_move_to_next(
}
/**************************************************************//**
-Commits the pcur mtr and sets the pcur latch mode to BTR_NO_LATCHES,
+Commits the mtr and sets the pcur latch mode to BTR_NO_LATCHES,
that is, the cursor becomes detached. If there have been modifications
to the page where pcur is positioned, this can be used instead of
btr_pcur_release_leaf. Function btr_pcur_store_position should be used
before calling this, if restoration of cursor is wanted later. */
UNIV_INLINE
void
-btr_pcur_commit(
-/*============*/
- btr_pcur_t* pcur) /*!< in: persistent cursor */
-{
- ut_a(pcur->pos_state == BTR_PCUR_IS_POSITIONED);
-
- pcur->latch_mode = BTR_NO_LATCHES;
-
- mtr_commit(pcur->mtr);
-
- pcur->pos_state = BTR_PCUR_WAS_POSITIONED;
-}
-
-/**************************************************************//**
-Differs from btr_pcur_commit in that we can specify the mtr to commit. */
-UNIV_INLINE
-void
btr_pcur_commit_specify_mtr(
/*========================*/
btr_pcur_t* pcur, /*!< in: persistent cursor */
@@ -483,8 +466,8 @@ Initializes and opens a persistent cursor to an index tree. It should be
closed with btr_pcur_close. */
UNIV_INLINE
void
-btr_pcur_open(
-/*==========*/
+btr_pcur_open_func(
+/*===============*/
dict_index_t* index, /*!< in: index */
const dtuple_t* tuple, /*!< in: tuple on which search done */
ulint mode, /*!< in: PAGE_CUR_L, ...;
@@ -495,6 +478,8 @@ btr_pcur_open(
record! */
ulint latch_mode,/*!< in: BTR_SEARCH_LEAF, ... */
btr_pcur_t* cursor, /*!< in: memory buffer for persistent cursor */
+ const char* file, /*!< in: file name */
+ ulint line, /*!< in: line where called */
mtr_t* mtr) /*!< in: mtr */
{
btr_cur_t* btr_cursor;
@@ -511,7 +496,7 @@ btr_pcur_open(
btr_cursor = btr_pcur_get_btr_cur(cursor);
btr_cur_search_to_nth_level(index, 0, tuple, mode, latch_mode,
- btr_cursor, 0, mtr);
+ btr_cursor, 0, file, line, mtr);
cursor->pos_state = BTR_PCUR_IS_POSITIONED;
cursor->trx_if_known = NULL;
@@ -522,8 +507,8 @@ Opens an persistent cursor to an index tree without initializing the
cursor. */
UNIV_INLINE
void
-btr_pcur_open_with_no_init(
-/*=======================*/
+btr_pcur_open_with_no_init_func(
+/*============================*/
dict_index_t* index, /*!< in: index */
const dtuple_t* tuple, /*!< in: tuple on which search done */
ulint mode, /*!< in: PAGE_CUR_L, ...;
@@ -541,6 +526,8 @@ btr_pcur_open_with_no_init(
ulint has_search_latch,/*!< in: latch mode the caller
currently has on btr_search_latch:
RW_S_LATCH, or 0 */
+ const char* file, /*!< in: file name */
+ ulint line, /*!< in: line where called */
mtr_t* mtr) /*!< in: mtr */
{
btr_cur_t* btr_cursor;
@@ -553,7 +540,8 @@ btr_pcur_open_with_no_init(
btr_cursor = btr_pcur_get_btr_cur(cursor);
btr_cur_search_to_nth_level(index, 0, tuple, mode, latch_mode,
- btr_cursor, has_search_latch, mtr);
+ btr_cursor, has_search_latch,
+ file, line, mtr);
cursor->pos_state = BTR_PCUR_IS_POSITIONED;
cursor->old_stored = BTR_PCUR_OLD_NOT_STORED;
@@ -600,11 +588,13 @@ btr_pcur_open_at_index_side(
Positions a cursor at a randomly chosen position within a B-tree. */
UNIV_INLINE
void
-btr_pcur_open_at_rnd_pos(
-/*=====================*/
+btr_pcur_open_at_rnd_pos_func(
+/*==========================*/
dict_index_t* index, /*!< in: index */
ulint latch_mode, /*!< in: BTR_SEARCH_LEAF, ... */
btr_pcur_t* cursor, /*!< in/out: B-tree pcur */
+ const char* file, /*!< in: file name */
+ ulint line, /*!< in: line where called */
mtr_t* mtr) /*!< in: mtr */
{
/* Initialize the cursor */
@@ -614,8 +604,9 @@ btr_pcur_open_at_rnd_pos(
btr_pcur_init(cursor);
- btr_cur_open_at_rnd_pos(index, latch_mode,
- btr_pcur_get_btr_cur(cursor), mtr);
+ btr_cur_open_at_rnd_pos_func(index, latch_mode,
+ btr_pcur_get_btr_cur(cursor),
+ file, line, mtr);
cursor->pos_state = BTR_PCUR_IS_POSITIONED;
cursor->old_stored = BTR_PCUR_OLD_NOT_STORED;
diff --git a/storage/innodb_plugin/include/buf0buf.h b/storage/innodb_plugin/include/buf0buf.h
index 927ff893e39..cd4ee5906f0 100644
--- a/storage/innodb_plugin/include/buf0buf.h
+++ b/storage/innodb_plugin/include/buf0buf.h
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 1995, 2010, Innobase Oy. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -33,6 +33,7 @@ Created 11/5/1995 Heikki Tuuri
#include "hash0hash.h"
#include "ut0byte.h"
#include "page0types.h"
+#include "ut0rbt.h"
#ifndef UNIV_HOTBACKUP
#include "os0proc.h"
@@ -202,20 +203,14 @@ with care. */
#define buf_page_get_with_no_latch(SP, ZS, OF, MTR) buf_page_get_gen(\
SP, ZS, OF, RW_NO_LATCH, NULL,\
BUF_GET_NO_LATCH, __FILE__, __LINE__, MTR)
-/**************************************************************//**
-NOTE! The following macros should be used instead of
-buf_page_optimistic_get_func, to improve debugging. Only values RW_S_LATCH and
-RW_X_LATCH are allowed as LA! */
-#define buf_page_optimistic_get(LA, BL, MC, MTR) \
- buf_page_optimistic_get_func(LA, BL, MC, __FILE__, __LINE__, MTR)
/********************************************************************//**
This is the general function used to get optimistic access to a database
page.
@return TRUE if success */
UNIV_INTERN
ibool
-buf_page_optimistic_get_func(
-/*=========================*/
+buf_page_optimistic_get(
+/*====================*/
ulint rw_latch,/*!< in: RW_S_LATCH, RW_X_LATCH */
buf_block_t* block, /*!< in: guessed block */
ib_uint64_t modify_clock,/*!< in: modify clock value if mode is
@@ -1185,15 +1180,21 @@ struct buf_block_struct{
rw_lock_t lock; /*!< read-write lock of the buffer
frame */
unsigned lock_hash_val:32;/*!< hashed value of the page address
- in the record lock hash table */
- unsigned check_index_page_at_flush:1;
+ in the record lock hash table;
+ protected by buf_block_t::lock
+ (or buf_block_t::mutex, buf_pool_mutex
+ in buf_page_get_gen(),
+ buf_page_init_for_read()
+ and buf_page_create()) */
+ ibool check_index_page_at_flush;
/*!< TRUE if we know that this is
an index page, and want the database
to check its consistency before flush;
note that there may be pages in the
buffer pool which are index pages,
but this flag is not set because
- we do not keep track of all pages */
+ we do not keep track of all pages;
+ NOT protected by any mutex */
/* @} */
/** @name Optimistic search field */
/* @{ */
@@ -1359,6 +1360,19 @@ struct buf_pool_struct{
/*!< this is in the set state
when there is no flush batch
of the given type running */
+ ib_rbt_t* flush_rbt; /* !< a red-black tree is used
+ exclusively during recovery to
+ speed up insertions in the
+ flush_list. This tree contains
+ blocks in order of
+ oldest_modification LSN and is
+ kept in sync with the
+ flush_list.
+ Each member of the tree MUST
+ also be on the flush_list.
+ This tree is relevant only in
+ recovery and is set to NULL
+ once the recovery is over. */
ulint freed_page_clock;/*!< a sequence number used
to count the number of buffer
blocks removed from the end of
diff --git a/storage/innodb_plugin/include/buf0buf.ic b/storage/innodb_plugin/include/buf0buf.ic
index 0f92a59a1c7..378c3590181 100644
--- a/storage/innodb_plugin/include/buf0buf.ic
+++ b/storage/innodb_plugin/include/buf0buf.ic
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 1995, 2010, Innobase Oy. All Rights Reserved.
Copyright (c) 2008, Google Inc.
Portions of this file contain modifications contributed and copyrighted by
@@ -81,7 +81,7 @@ buf_page_peek_if_too_old(
unsigned access_time = buf_page_is_accessed(bpage);
if (access_time > 0
- && (ut_time_ms() - access_time)
+ && ((ib_uint32_t) (ut_time_ms() - access_time))
>= buf_LRU_old_threshold_ms) {
return(TRUE);
}
@@ -705,6 +705,12 @@ buf_block_get_lock_hash_val(
/*========================*/
const buf_block_t* block) /*!< in: block */
{
+ ut_ad(block);
+ ut_ad(buf_page_in_file(&block->page));
+#ifdef UNIV_SYNC_DEBUG
+ ut_ad(rw_lock_own(&(((buf_block_t*) block)->lock), RW_LOCK_EXCLUSIVE)
+ || rw_lock_own(&(((buf_block_t*) block)->lock), RW_LOCK_SHARED));
+#endif /* UNIV_SYNC_DEBUG */
return(block->lock_hash_val);
}
diff --git a/storage/innodb_plugin/include/buf0flu.h b/storage/innodb_plugin/include/buf0flu.h
index 6c751852f54..c996f6eaab4 100644
--- a/storage/innodb_plugin/include/buf0flu.h
+++ b/storage/innodb_plugin/include/buf0flu.h
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 1995, 2010, Innobase Oy. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -40,6 +40,16 @@ buf_flush_remove(
/*=============*/
buf_page_t* bpage); /*!< in: pointer to the block in question */
/********************************************************************//**
+Relocates a buffer control block on the flush_list.
+Note that it is assumed that the contents of bpage has already been
+copied to dpage. */
+UNIV_INTERN
+void
+buf_flush_relocate_on_flush_list(
+/*=============================*/
+ buf_page_t* bpage, /*!< in/out: control block being moved */
+ buf_page_t* dpage); /*!< in/out: destination block */
+/********************************************************************//**
Updates the flush system data structures when a write is completed. */
UNIV_INTERN
void
@@ -139,8 +149,8 @@ how much redo the workload is generating and at what rate. */
struct buf_flush_stat_struct
{
- ib_uint64_t redo; /**< amount of redo generated. */
- ulint n_flushed; /**< number of pages flushed. */
+ ib_uint64_t redo; /*!< amount of redo generated. */
+ ulint n_flushed; /*!< number of pages flushed. */
};
/** Statistics for selecting flush rate of dirty pages. */
@@ -175,6 +185,22 @@ buf_flush_validate(void);
/*====================*/
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
+/******************************************************************//**
+Initialize the red-black tree to speed up insertions into the flush_list
+during recovery process. Should be called at the start of recovery
+process before any page has been read/written. */
+UNIV_INTERN
+void
+buf_flush_init_flush_rbt(void);
+/*==========================*/
+
+/******************************************************************//**
+Frees up the red-black tree. */
+UNIV_INTERN
+void
+buf_flush_free_flush_rbt(void);
+/*==========================*/
+
/** When buf_flush_free_margin is called, it tries to make this many blocks
available to replacement in the free list and at the end of the LRU list (to
make sure that a read-ahead batch can be read efficiently in a single
diff --git a/storage/innodb_plugin/include/data0type.ic b/storage/innodb_plugin/include/data0type.ic
index 240b4288f39..2bf67a941bd 100644
--- a/storage/innodb_plugin/include/data0type.ic
+++ b/storage/innodb_plugin/include/data0type.ic
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 1996, 2010, Innobase Oy. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -285,6 +285,10 @@ dtype_new_store_for_order_and_null_size(
#endif
ulint len;
+ ut_ad(type);
+ ut_ad(type->mtype >= DATA_VARCHAR);
+ ut_ad(type->mtype <= DATA_MYSQL);
+
buf[0] = (byte)(type->mtype & 0xFFUL);
if (type->prtype & DATA_BINARY_TYPE) {
diff --git a/storage/innodb_plugin/include/dict0boot.h b/storage/innodb_plugin/include/dict0boot.h
index 51d37ee98d1..e01fafe652d 100644
--- a/storage/innodb_plugin/include/dict0boot.h
+++ b/storage/innodb_plugin/include/dict0boot.h
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 1996, 2010, Innobase Oy. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -137,6 +137,7 @@ clustered index */
#define DICT_SYS_INDEXES_PAGE_NO_FIELD 8
#define DICT_SYS_INDEXES_SPACE_NO_FIELD 7
#define DICT_SYS_INDEXES_TYPE_FIELD 6
+#define DICT_SYS_INDEXES_NAME_FIELD 3
/* When a row id which is zero modulo this number (which must be a power of
two) is assigned, the field DICT_HDR_ROW_ID on the dictionary header page is
diff --git a/storage/innodb_plugin/include/dict0mem.h b/storage/innodb_plugin/include/dict0mem.h
index 2d001111938..9996fb59a75 100644
--- a/storage/innodb_plugin/include/dict0mem.h
+++ b/storage/innodb_plugin/include/dict0mem.h
@@ -80,21 +80,39 @@ combination of types */
/** File format */
/* @{ */
#define DICT_TF_FORMAT_SHIFT 5 /* file format */
-#define DICT_TF_FORMAT_MASK (127 << DICT_TF_FORMAT_SHIFT)
+#define DICT_TF_FORMAT_MASK \
+((~(~0 << (DICT_TF_BITS - DICT_TF_FORMAT_SHIFT))) << DICT_TF_FORMAT_SHIFT)
#define DICT_TF_FORMAT_51 0 /*!< InnoDB/MySQL up to 5.1 */
#define DICT_TF_FORMAT_ZIP 1 /*!< InnoDB plugin for 5.1:
compressed tables,
new BLOB treatment */
/** Maximum supported file format */
#define DICT_TF_FORMAT_MAX DICT_TF_FORMAT_ZIP
-
+/* @} */
#define DICT_TF_BITS 6 /*!< number of flag bits */
#if (1 << (DICT_TF_BITS - DICT_TF_FORMAT_SHIFT)) <= DICT_TF_FORMAT_MAX
# error "DICT_TF_BITS is insufficient for DICT_TF_FORMAT_MAX"
#endif
/* @} */
+
+/** @brief Additional table flags.
+
+These flags will be stored in SYS_TABLES.MIX_LEN. All unused flags
+will be written as 0. The column may contain garbage for tables
+created with old versions of InnoDB that only implemented
+ROW_FORMAT=REDUNDANT. */
+/* @{ */
+#define DICT_TF2_SHIFT DICT_TF_BITS
+ /*!< Shift value for
+ table->flags. */
+#define DICT_TF2_TEMPORARY 1 /*!< TRUE for tables from
+ CREATE TEMPORARY TABLE. */
+#define DICT_TF2_BITS (DICT_TF2_SHIFT + 1)
+ /*!< Total number of bits
+ in table->flags. */
/* @} */
+
/**********************************************************************//**
Creates a table memory object.
@return own: table object */
@@ -374,7 +392,7 @@ struct dict_table_struct{
unsigned space:32;
/*!< space where the clustered index of the
table is placed */
- unsigned flags:DICT_TF_BITS;/*!< DICT_TF_COMPACT, ... */
+ unsigned flags:DICT_TF2_BITS;/*!< DICT_TF_COMPACT, ... */
unsigned ibd_file_missing:1;
/*!< TRUE if this is in a single-table
tablespace and the .ibd file is missing; then
diff --git a/storage/innodb_plugin/include/fil0fil.h b/storage/innodb_plugin/include/fil0fil.h
index 74d0fbcdacd..de8ef9e9687 100644
--- a/storage/innodb_plugin/include/fil0fil.h
+++ b/storage/innodb_plugin/include/fil0fil.h
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 1995, 2010, Innobase Oy. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -110,9 +110,10 @@ extern fil_addr_t fil_addr_null;
contents of this field is valid
for all uncompressed pages. */
#define FIL_PAGE_FILE_FLUSH_LSN 26 /*!< this is only defined for the
- first page in a data file: the file
- has been flushed to disk at least up
- to this lsn */
+ first page in a system tablespace
+ data file (ibdata*, not *.ibd):
+ the file has been flushed to disk
+ at least up to this lsn */
#define FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID 34 /*!< starting from 4.1.x this
contains the space id of the page */
#define FIL_PAGE_DATA 38 /*!< start of the data on the page */
diff --git a/storage/innodb_plugin/include/hash0hash.h b/storage/innodb_plugin/include/hash0hash.h
index 977cb829f35..b17c21a45ef 100644
--- a/storage/innodb_plugin/include/hash0hash.h
+++ b/storage/innodb_plugin/include/hash0hash.h
@@ -434,11 +434,12 @@ struct hash_table_struct {
these heaps */
#endif /* !UNIV_HOTBACKUP */
mem_heap_t* heap;
+#ifdef UNIV_DEBUG
ulint magic_n;
+# define HASH_TABLE_MAGIC_N 76561114
+#endif /* UNIV_DEBUG */
};
-#define HASH_TABLE_MAGIC_N 76561114
-
#ifndef UNIV_NONINL
#include "hash0hash.ic"
#endif
diff --git a/storage/innodb_plugin/include/hash0hash.ic b/storage/innodb_plugin/include/hash0hash.ic
index 19da2d50701..0b437894e2e 100644
--- a/storage/innodb_plugin/include/hash0hash.ic
+++ b/storage/innodb_plugin/include/hash0hash.ic
@@ -35,6 +35,8 @@ hash_get_nth_cell(
hash_table_t* table, /*!< in: hash table */
ulint n) /*!< in: cell index */
{
+ ut_ad(table);
+ ut_ad(table->magic_n == HASH_TABLE_MAGIC_N);
ut_ad(n < table->n_cells);
return(table->array + n);
@@ -48,6 +50,8 @@ hash_table_clear(
/*=============*/
hash_table_t* table) /*!< in/out: hash table */
{
+ ut_ad(table);
+ ut_ad(table->magic_n == HASH_TABLE_MAGIC_N);
memset(table->array, 0x0,
table->n_cells * sizeof(*table->array));
}
@@ -61,6 +65,8 @@ hash_get_n_cells(
/*=============*/
hash_table_t* table) /*!< in: table */
{
+ ut_ad(table);
+ ut_ad(table->magic_n == HASH_TABLE_MAGIC_N);
return(table->n_cells);
}
@@ -74,6 +80,8 @@ hash_calc_hash(
ulint fold, /*!< in: folded value */
hash_table_t* table) /*!< in: hash table */
{
+ ut_ad(table);
+ ut_ad(table->magic_n == HASH_TABLE_MAGIC_N);
return(ut_hash_ulint(fold, table->n_cells));
}
@@ -88,6 +96,8 @@ hash_get_mutex_no(
hash_table_t* table, /*!< in: hash table */
ulint fold) /*!< in: fold */
{
+ ut_ad(table);
+ ut_ad(table->magic_n == HASH_TABLE_MAGIC_N);
ut_ad(ut_is_2pow(table->n_mutexes));
return(ut_2pow_remainder(hash_calc_hash(fold, table),
table->n_mutexes));
@@ -103,6 +113,8 @@ hash_get_nth_heap(
hash_table_t* table, /*!< in: hash table */
ulint i) /*!< in: index of the heap */
{
+ ut_ad(table);
+ ut_ad(table->magic_n == HASH_TABLE_MAGIC_N);
ut_ad(i < table->n_mutexes);
return(table->heaps[i]);
@@ -120,6 +132,9 @@ hash_get_heap(
{
ulint i;
+ ut_ad(table);
+ ut_ad(table->magic_n == HASH_TABLE_MAGIC_N);
+
if (table->heap) {
return(table->heap);
}
@@ -139,6 +154,8 @@ hash_get_nth_mutex(
hash_table_t* table, /*!< in: hash table */
ulint i) /*!< in: index of the mutex */
{
+ ut_ad(table);
+ ut_ad(table->magic_n == HASH_TABLE_MAGIC_N);
ut_ad(i < table->n_mutexes);
return(table->mutexes + i);
@@ -156,6 +173,9 @@ hash_get_mutex(
{
ulint i;
+ ut_ad(table);
+ ut_ad(table->magic_n == HASH_TABLE_MAGIC_N);
+
i = hash_get_mutex_no(table, fold);
return(hash_get_nth_mutex(table, i));
diff --git a/storage/innodb_plugin/include/lock0lock.h b/storage/innodb_plugin/include/lock0lock.h
index 82e4c9bd976..7d76cbe3c75 100644
--- a/storage/innodb_plugin/include/lock0lock.h
+++ b/storage/innodb_plugin/include/lock0lock.h
@@ -613,13 +613,16 @@ lock_rec_print(
FILE* file, /*!< in: file where to print */
const lock_t* lock); /*!< in: record type lock */
/*********************************************************************//**
-Prints info of locks for all transactions. */
+Prints info of locks for all transactions.
+@return FALSE if not able to obtain kernel mutex
+and exits without printing info */
UNIV_INTERN
-void
+ibool
lock_print_info_summary(
/*====================*/
- FILE* file); /*!< in: file where to print */
-/*********************************************************************//**
+ FILE* file, /*!< in: file where to print */
+ ibool nowait);/*!< in: whether to wait for the kernel mutex */
+/*************************************************************************
Prints info of locks for each transaction. */
UNIV_INTERN
void
diff --git a/storage/innodb_plugin/include/log0log.h b/storage/innodb_plugin/include/log0log.h
index 135aeb69e2d..8fce4ef96bc 100644
--- a/storage/innodb_plugin/include/log0log.h
+++ b/storage/innodb_plugin/include/log0log.h
@@ -1,23 +1,6 @@
/*****************************************************************************
-Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc., 59 Temple
-Place, Suite 330, Boston, MA 02111-1307 USA
-
-*****************************************************************************/
-/*****************************************************************************
-
-Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 1995, 2010, Innobase Oy. All Rights Reserved.
Copyright (c) 2009, Google Inc.
Portions of this file contain modifications contributed and copyrighted by
@@ -825,7 +808,17 @@ struct log_struct{
written to some log group; for this to
be advanced, it is enough that the
write i/o has been completed for all
- log groups */
+ log groups.
+ Note that since InnoDB currently
+ has only one log group therefore
+ this value is redundant. Also it
+ is possible that this value
+ falls behind the
+ flushed_to_disk_lsn transiently.
+ It is appropriate to use either
+ flushed_to_disk_lsn or
+ write_lsn which are always
+ up-to-date and accurate. */
ib_uint64_t write_lsn; /*!< end lsn for the current running
write */
ulint write_end_offset;/*!< the data in buffer has
diff --git a/storage/innodb_plugin/include/log0log.ic b/storage/innodb_plugin/include/log0log.ic
index 36d151a3064..139f4041a36 100644
--- a/storage/innodb_plugin/include/log0log.ic
+++ b/storage/innodb_plugin/include/log0log.ic
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 1995, 2010, Innobase Oy. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -314,12 +314,15 @@ log_reserve_and_write_fast(
ulint data_len;
#ifdef UNIV_LOG_LSN_DEBUG
/* length of the LSN pseudo-record */
- ulint lsn_len = 1
- + mach_get_compressed_size(log_sys->lsn >> 32)
- + mach_get_compressed_size(log_sys->lsn & 0xFFFFFFFFUL);
+ ulint lsn_len;
#endif /* UNIV_LOG_LSN_DEBUG */
mutex_enter(&log_sys->mutex);
+#ifdef UNIV_LOG_LSN_DEBUG
+ lsn_len = 1
+ + mach_get_compressed_size(log_sys->lsn >> 32)
+ + mach_get_compressed_size(log_sys->lsn & 0xFFFFFFFFUL);
+#endif /* UNIV_LOG_LSN_DEBUG */
data_len = len
#ifdef UNIV_LOG_LSN_DEBUG
diff --git a/storage/innodb_plugin/include/log0recv.h b/storage/innodb_plugin/include/log0recv.h
index a3d2bd050f5..3209799e140 100644
--- a/storage/innodb_plugin/include/log0recv.h
+++ b/storage/innodb_plugin/include/log0recv.h
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1997, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 1997, 2010, Innobase Oy. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -176,6 +176,12 @@ UNIV_INTERN
void
recv_recovery_from_checkpoint_finish(void);
/*======================================*/
+/********************************************************//**
+Initiates the rollback of active transactions. */
+UNIV_INTERN
+void
+recv_recovery_rollback_active(void);
+/*===============================*/
/*******************************************************//**
Scans log from a buffer and stores new log data to the parsing buffer.
Parses and hashes the log records if new data found. Unless
@@ -258,12 +264,14 @@ void
recv_sys_init(
/*==========*/
ulint available_memory); /*!< in: available memory in bytes */
+#ifndef UNIV_HOTBACKUP
/********************************************************//**
Reset the state of the recovery system variables. */
UNIV_INTERN
void
recv_sys_var_init(void);
/*===================*/
+#endif /* !UNIV_HOTBACKUP */
/*******************************************************************//**
Empties the hash table of stored log records, applying them to appropriate
pages. */
diff --git a/storage/innodb_plugin/include/mem0dbg.h b/storage/innodb_plugin/include/mem0dbg.h
index a064af5c678..d81e1418b2b 100644
--- a/storage/innodb_plugin/include/mem0dbg.h
+++ b/storage/innodb_plugin/include/mem0dbg.h
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 1994, 2010, Innobase Oy. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -28,6 +28,13 @@ Created 6/9/1994 Heikki Tuuri
check fields whose sizes are given below */
#ifdef UNIV_MEM_DEBUG
+# ifndef UNIV_HOTBACKUP
+/* The mutex which protects in the debug version the hash table
+containing the list of live memory heaps, and also the global
+variables in mem0dbg.c. */
+extern mutex_t mem_hash_mutex;
+# endif /* !UNIV_HOTBACKUP */
+
#define MEM_FIELD_HEADER_SIZE ut_calc_align(2 * sizeof(ulint),\
UNIV_MEM_ALIGNMENT)
#define MEM_FIELD_TRAILER_SIZE sizeof(ulint)
diff --git a/storage/innodb_plugin/include/mem0dbg.ic b/storage/innodb_plugin/include/mem0dbg.ic
index cb9245411dc..b0c8178a623 100644
--- a/storage/innodb_plugin/include/mem0dbg.ic
+++ b/storage/innodb_plugin/include/mem0dbg.ic
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 1994, 2010, Innobase Oy. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -25,9 +25,6 @@ Created 6/8/1994 Heikki Tuuri
*************************************************************************/
#ifdef UNIV_MEM_DEBUG
-# ifndef UNIV_HOTBACKUP
-extern mutex_t mem_hash_mutex;
-# endif /* !UNIV_HOTBACKUP */
extern ulint mem_current_allocated_memory;
/******************************************************************//**
diff --git a/storage/innodb_plugin/include/mem0mem.h b/storage/innodb_plugin/include/mem0mem.h
index 98f8748e529..ee28cf7b225 100644
--- a/storage/innodb_plugin/include/mem0mem.h
+++ b/storage/innodb_plugin/include/mem0mem.h
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 1994, 2010, Innobase Oy. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -359,6 +359,9 @@ struct mem_block_info_struct {
to the heap is also the first block in this list,
though it also contains the base node of the list. */
ulint len; /*!< physical length of this block in bytes */
+ ulint total_size; /* physical length in bytes of all blocks
+ in the heap. This is defined only in the base
+ node and is set to ULINT_UNDEFINED in others. */
ulint type; /*!< type of heap: MEM_HEAP_DYNAMIC, or
MEM_HEAP_BUF possibly ORed to MEM_HEAP_BTR_SEARCH */
ulint free; /*!< offset in bytes of the first free position for
diff --git a/storage/innodb_plugin/include/mem0mem.ic b/storage/innodb_plugin/include/mem0mem.ic
index e7080d8c508..cbce2edc661 100644
--- a/storage/innodb_plugin/include/mem0mem.ic
+++ b/storage/innodb_plugin/include/mem0mem.ic
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 1994, 2010, Innobase Oy. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -579,18 +579,12 @@ mem_heap_get_size(
/*==============*/
mem_heap_t* heap) /*!< in: heap */
{
- mem_block_t* block;
ulint size = 0;
ut_ad(mem_heap_check(heap));
- block = heap;
-
- while (block != NULL) {
+ size = heap->total_size;
- size += mem_block_get_len(block);
- block = UT_LIST_GET_NEXT(list, block);
- }
#ifndef UNIV_HOTBACKUP
if (heap->free_block) {
size += UNIV_PAGE_SIZE;
diff --git a/storage/innodb_plugin/include/mtr0mtr.ic b/storage/innodb_plugin/include/mtr0mtr.ic
index 310c7c4117f..18f8e87b3cf 100644
--- a/storage/innodb_plugin/include/mtr0mtr.ic
+++ b/storage/innodb_plugin/include/mtr0mtr.ic
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 1995, 2010, Innobase Oy. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -70,6 +70,7 @@ mtr_memo_push(
ut_ad(type <= MTR_MEMO_X_LOCK);
ut_ad(mtr);
ut_ad(mtr->magic_n == MTR_MAGIC_N);
+ ut_ad(mtr->state == MTR_ACTIVE);
memo = &(mtr->memo);
@@ -92,6 +93,7 @@ mtr_set_savepoint(
ut_ad(mtr);
ut_ad(mtr->magic_n == MTR_MAGIC_N);
+ ut_ad(mtr->state == MTR_ACTIVE);
memo = &(mtr->memo);
@@ -149,6 +151,7 @@ mtr_memo_contains(
ut_ad(mtr);
ut_ad(mtr->magic_n == MTR_MAGIC_N);
+ ut_ad(mtr->state == MTR_ACTIVE || mtr->state == MTR_COMMITTING);
memo = &(mtr->memo);
diff --git a/storage/innodb_plugin/include/os0file.h b/storage/innodb_plugin/include/os0file.h
index 16568579f31..d645cae38bb 100644
--- a/storage/innodb_plugin/include/os0file.h
+++ b/storage/innodb_plugin/include/os0file.h
@@ -1,23 +1,6 @@
-/*****************************************************************************
-
-Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc., 59 Temple
-Place, Suite 330, Boston, MA 02111-1307 USA
-
-*****************************************************************************/
/***********************************************************************
-Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 1995, 2010, Innobase Oy. All Rights Reserved.
Copyright (c) 2009, Percona Inc.
Portions of this file contain modifications contributed and copyrighted
diff --git a/storage/innodb_plugin/include/que0que.h b/storage/innodb_plugin/include/que0que.h
index 420f34550e2..39f8d07af89 100644
--- a/storage/innodb_plugin/include/que0que.h
+++ b/storage/innodb_plugin/include/que0que.h
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 1996, 2010, Innobase Oy. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -30,6 +30,7 @@ Created 5/27/1996 Heikki Tuuri
#include "data0data.h"
#include "dict0types.h"
#include "trx0trx.h"
+#include "trx0roll.h"
#include "srv0srv.h"
#include "usr0types.h"
#include "que0types.h"
@@ -215,6 +216,16 @@ trx_t*
thr_get_trx(
/*========*/
que_thr_t* thr); /*!< in: query thread */
+/*******************************************************************//**
+Determines if this thread is rolling back an incomplete transaction
+in crash recovery.
+@return TRUE if thr is rolling back an incomplete transaction in crash
+recovery */
+UNIV_INLINE
+ibool
+thr_is_recv(
+/*========*/
+ const que_thr_t* thr); /*!< in: query thread */
/***********************************************************************//**
Gets the type of a graph node. */
UNIV_INLINE
diff --git a/storage/innodb_plugin/include/que0que.ic b/storage/innodb_plugin/include/que0que.ic
index a1c0dc1e77a..bd936670e1e 100644
--- a/storage/innodb_plugin/include/que0que.ic
+++ b/storage/innodb_plugin/include/que0que.ic
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 1996, 2010, Innobase Oy. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -38,6 +38,20 @@ thr_get_trx(
return(thr->graph->trx);
}
+/*******************************************************************//**
+Determines if this thread is rolling back an incomplete transaction
+in crash recovery.
+@return TRUE if thr is rolling back an incomplete transaction in crash
+recovery */
+UNIV_INLINE
+ibool
+thr_is_recv(
+/*========*/
+ const que_thr_t* thr) /*!< in: query thread */
+{
+ return(trx_is_recv(thr->graph->trx));
+}
+
/***********************************************************************//**
Gets the first thr in a fork. */
UNIV_INLINE
diff --git a/storage/innodb_plugin/include/row0mysql.h b/storage/innodb_plugin/include/row0mysql.h
index b05241f00f8..d2a8734c61f 100644
--- a/storage/innodb_plugin/include/row0mysql.h
+++ b/storage/innodb_plugin/include/row0mysql.h
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 2000, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 2000, 2010, Innobase Oy. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -451,6 +451,12 @@ row_drop_table_for_mysql(
const char* name, /*!< in: table name */
trx_t* trx, /*!< in: transaction handle */
ibool drop_db);/*!< in: TRUE=dropping whole database */
+/*********************************************************************//**
+Drop all temporary tables during crash recovery. */
+UNIV_INTERN
+void
+row_mysql_drop_temp_tables(void);
+/*============================*/
/*********************************************************************//**
Discards the tablespace of a table which stored in an .ibd file. Discarding
@@ -494,14 +500,19 @@ row_rename_table_for_mysql(
trx_t* trx, /*!< in: transaction handle */
ibool commit); /*!< in: if TRUE then commit trx */
/*********************************************************************//**
-Checks a table for corruption.
-@return DB_ERROR or DB_SUCCESS */
+Checks that the index contains entries in an ascending order, unique
+constraint is not broken, and calculates the number of index entries
+in the read view of the current transaction.
+@return DB_SUCCESS if ok */
UNIV_INTERN
ulint
-row_check_table_for_mysql(
+row_check_index_for_mysql(
/*======================*/
- row_prebuilt_t* prebuilt); /*!< in: prebuilt struct in MySQL
- handle */
+ row_prebuilt_t* prebuilt, /*!< in: prebuilt struct
+ in MySQL handle */
+ const dict_index_t* index, /*!< in: index */
+ ulint* n_rows); /*!< out: number of entries
+ seen in the consistent read */
/*********************************************************************//**
Determines if a table is a magic monitor table.
diff --git a/storage/innodb_plugin/include/row0sel.h b/storage/innodb_plugin/include/row0sel.h
index 01a5afaa23e..8544b9d08ba 100644
--- a/storage/innodb_plugin/include/row0sel.h
+++ b/storage/innodb_plugin/include/row0sel.h
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1997, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 1997, 2010, Innobase Oy. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -105,17 +105,6 @@ row_fetch_print(
/*============*/
void* row, /*!< in: sel_node_t* */
void* user_arg); /*!< in: not used */
-/****************************************************************//**
-Callback function for fetch that stores an unsigned 4 byte integer to the
-location pointed. The column's type must be DATA_INT, DATA_UNSIGNED, length
-= 4.
-@return always returns NULL */
-UNIV_INTERN
-void*
-row_fetch_store_uint4(
-/*==================*/
- void* row, /*!< in: sel_node_t* */
- void* user_arg); /*!< in: data pointer */
/***********************************************************//**
Prints a row in a select result.
@return query thread to run next or NULL */
diff --git a/storage/innodb_plugin/include/srv0srv.h b/storage/innodb_plugin/include/srv0srv.h
index 228c9f6600a..7aa2ce74720 100644
--- a/storage/innodb_plugin/include/srv0srv.h
+++ b/storage/innodb_plugin/include/srv0srv.h
@@ -1,7 +1,8 @@
/*****************************************************************************
-Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 1995, 2010, Innobase Oy. All Rights Reserved.
Copyright (c) 2008, 2009, Google Inc.
+Copyright (c) 2009, Percona Inc.
Portions of this file contain modifications contributed and copyrighted by
Google, Inc. Those modifications are gratefully acknowledged and are described
@@ -9,6 +10,13 @@ briefly in the InnoDB documentation. The contributions by Google are
incorporated with their permission, and subject to the conditions contained in
the file COPYING.Google.
+Portions of this file contain modifications contributed and copyrighted
+by Percona Inc.. Those modifications are
+gratefully acknowledged and are described briefly in the InnoDB
+documentation. The contributions by Percona Inc. are incorporated with
+their permission, and subject to the conditions contained in the file
+COPYING.Percona.
+
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
Foundation; version 2 of the License.
@@ -22,32 +30,6 @@ this program; if not, write to the Free Software Foundation, Inc., 59 Temple
Place, Suite 330, Boston, MA 02111-1307 USA
*****************************************************************************/
-/***********************************************************************
-
-Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
-Copyright (c) 2009, Percona Inc.
-
-Portions of this file contain modifications contributed and copyrighted
-by Percona Inc.. Those modifications are
-gratefully acknowledged and are described briefly in the InnoDB
-documentation. The contributions by Percona Inc. are incorporated with
-their permission, and subject to the conditions contained in the file
-COPYING.Percona.
-
-This program is free software; you can redistribute it and/or modify it
-under the terms of the GNU General Public License as published by the
-Free Software Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
-Public License for more details.
-
-You should have received a copy of the GNU General Public License along
-with this program; if not, write to the Free Software Foundation, Inc.,
-59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
-
-***********************************************************************/
/**************************************************//**
@file include/srv0srv.h
@@ -227,7 +209,8 @@ extern ibool srv_print_innodb_tablespace_monitor;
extern ibool srv_print_verbose_log;
extern ibool srv_print_innodb_table_monitor;
-extern ibool srv_lock_timeout_and_monitor_active;
+extern ibool srv_lock_timeout_active;
+extern ibool srv_monitor_active;
extern ibool srv_error_monitor_active;
extern ulong srv_n_spin_wait_rounds;
@@ -540,15 +523,23 @@ srv_release_mysql_thread_if_suspended(
MySQL OS thread */
/*********************************************************************//**
A thread which wakes up threads whose lock wait may have lasted too long.
-This also prints the info output by various InnoDB monitors.
@return a dummy parameter */
UNIV_INTERN
os_thread_ret_t
-srv_lock_timeout_and_monitor_thread(
-/*================================*/
+srv_lock_timeout_thread(
+/*====================*/
void* arg); /*!< in: a dummy parameter required by
os_thread_create */
/*********************************************************************//**
+A thread which prints the info output by various InnoDB monitors.
+@return a dummy parameter */
+UNIV_INTERN
+os_thread_ret_t
+srv_monitor_thread(
+/*===============*/
+ void* arg); /*!< in: a dummy parameter required by
+ os_thread_create */
+/*************************************************************************
A thread which prints warnings about semaphore waits which have lasted
too long. These can be used to track bugs which cause hangs.
@return a dummy parameter */
@@ -559,12 +550,15 @@ srv_error_monitor_thread(
void* arg); /*!< in: a dummy parameter required by
os_thread_create */
/******************************************************************//**
-Outputs to a file the output of the InnoDB Monitor. */
+Outputs to a file the output of the InnoDB Monitor.
+@return FALSE if not all information printed
+due to failure to obtain necessary mutex */
UNIV_INTERN
-void
+ibool
srv_printf_innodb_monitor(
/*======================*/
FILE* file, /*!< in: output stream */
+ ibool nowait, /*!< in: whether to wait for kernel mutex */
ulint* trx_start, /*!< out: file position of the start of
the list of active transactions */
ulint* trx_end); /*!< out: file position of the end of
diff --git a/storage/innodb_plugin/include/sync0rw.h b/storage/innodb_plugin/include/sync0rw.h
index aedfd5f3f86..6f7e13220c1 100644
--- a/storage/innodb_plugin/include/sync0rw.h
+++ b/storage/innodb_plugin/include/sync0rw.h
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 1995, 2010, Innobase Oy. All Rights Reserved.
Copyright (c) 2008, Google Inc.
Portions of this file contain modifications contributed and copyrighted by
@@ -429,8 +429,9 @@ ibool
rw_lock_own(
/*========*/
rw_lock_t* lock, /*!< in: rw-lock */
- ulint lock_type); /*!< in: lock type: RW_LOCK_SHARED,
+ ulint lock_type) /*!< in: lock type: RW_LOCK_SHARED,
RW_LOCK_EX */
+ __attribute__((warn_unused_result));
#endif /* UNIV_SYNC_DEBUG */
/******************************************************************//**
Checks if somebody has locked the rw-lock in the specified mode. */
diff --git a/storage/innodb_plugin/include/sync0sync.h b/storage/innodb_plugin/include/sync0sync.h
index df990823cc4..d470b823fc3 100644
--- a/storage/innodb_plugin/include/sync0sync.h
+++ b/storage/innodb_plugin/include/sync0sync.h
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 1995, 2010, Innobase Oy. All Rights Reserved.
Copyright (c) 2008, Google Inc.
Portions of this file contain modifications contributed and copyrighted by
@@ -206,7 +206,8 @@ UNIV_INTERN
ibool
mutex_own(
/*======*/
- const mutex_t* mutex); /*!< in: mutex */
+ const mutex_t* mutex) /*!< in: mutex */
+ __attribute__((warn_unused_result));
#endif /* UNIV_DEBUG */
#ifdef UNIV_SYNC_DEBUG
/******************************************************************//**
@@ -238,16 +239,27 @@ ibool
sync_thread_levels_empty(void);
/*==========================*/
/******************************************************************//**
-Checks that the level array for the current thread is empty.
-@return TRUE if empty except the exceptions specified below */
+Checks if the level array for the current thread contains a
+mutex or rw-latch at the specified level.
+@return a matching latch, or NULL if not found */
UNIV_INTERN
-ibool
-sync_thread_levels_empty_gen(
-/*=========================*/
+void*
+sync_thread_levels_contains(
+/*========================*/
+ ulint level); /*!< in: latching order level
+ (SYNC_DICT, ...)*/
+/******************************************************************//**
+Checks if the level array for the current thread is empty.
+@return a latch, or NULL if empty except the exceptions specified below */
+UNIV_INTERN
+void*
+sync_thread_levels_nonempty_gen(
+/*============================*/
ibool dict_mutex_allowed); /*!< in: TRUE if dictionary mutex is
allowed to be owned by the thread,
also purge_is_running mutex is
allowed */
+#define sync_thread_levels_empty_gen(d) (!sync_thread_levels_nonempty_gen(d))
/******************************************************************//**
Gets the debug information for a reserved mutex. */
UNIV_INTERN
diff --git a/storage/innodb_plugin/include/trx0rseg.h b/storage/innodb_plugin/include/trx0rseg.h
index ba1fc88b6c4..a25d84f1e84 100644
--- a/storage/innodb_plugin/include/trx0rseg.h
+++ b/storage/innodb_plugin/include/trx0rseg.h
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 1996, 2010, Innobase Oy. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -114,17 +114,6 @@ trx_rseg_list_and_array_init(
/*=========================*/
trx_sysf_t* sys_header, /*!< in: trx system header */
mtr_t* mtr); /*!< in: mtr */
-/****************************************************************//**
-Creates a new rollback segment to the database.
-@return the created segment object, NULL if fail */
-UNIV_INTERN
-trx_rseg_t*
-trx_rseg_create(
-/*============*/
- ulint space, /*!< in: space id */
- ulint max_size, /*!< in: max size in pages */
- ulint* id, /*!< out: rseg id */
- mtr_t* mtr); /*!< in: mtr */
/***************************************************************************
Free's an instance of the rollback segment in memory. */
UNIV_INTERN
diff --git a/storage/innodb_plugin/include/trx0sys.h b/storage/innodb_plugin/include/trx0sys.h
index a53296a06d9..cbb89689748 100644
--- a/storage/innodb_plugin/include/trx0sys.h
+++ b/storage/innodb_plugin/include/trx0sys.h
@@ -333,12 +333,14 @@ UNIV_INTERN
void
trx_sys_file_format_tag_init(void);
/*==============================*/
+#ifndef UNIV_HOTBACKUP
/*****************************************************************//**
Shutdown/Close the transaction system. */
UNIV_INTERN
void
trx_sys_close(void);
/*===============*/
+#endif /* !UNIV_HOTBACKUP */
/*****************************************************************//**
Get the name representation of the file format from its id.
@return pointer to the name */
@@ -495,7 +497,6 @@ this contains the same fields as TRX_SYS_MYSQL_LOG_INFO below */
within that file */
#define TRX_SYS_MYSQL_LOG_NAME 12 /*!< MySQL log file name */
-#ifndef UNIV_HOTBACKUP
/** Doublewrite buffer */
/* @{ */
/** The offset of the doublewrite buffer header on the trx system header page */
@@ -547,6 +548,7 @@ FIL_PAGE_ARCH_LOG_NO_OR_SPACE_NO. */
#define TRX_SYS_DOUBLEWRITE_BLOCK_SIZE FSP_EXTENT_SIZE
/* @} */
+#ifndef UNIV_HOTBACKUP
/** File format tag */
/* @{ */
/** The offset of the file format tag on the trx system header page
diff --git a/storage/innodb_plugin/include/trx0trx.h b/storage/innodb_plugin/include/trx0trx.h
index 5f2c1246f37..480f265a138 100644
--- a/storage/innodb_plugin/include/trx0trx.h
+++ b/storage/innodb_plugin/include/trx0trx.h
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 1996, 2010, Innobase Oy. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -349,7 +349,7 @@ trx_print(
use the default max length */
/** Type of data dictionary operation */
-enum trx_dict_op {
+typedef enum trx_dict_op {
/** The transaction is not modifying the data dictionary. */
TRX_DICT_OP_NONE = 0,
/** The transaction is creating a table or an index, or
@@ -361,7 +361,7 @@ enum trx_dict_op {
existing table. In crash recovery, the data dictionary
must be locked, but the table must not be dropped. */
TRX_DICT_OP_INDEX = 2
-};
+} trx_dict_op_t;
/**********************************************************************//**
Determine if a transaction is a dictionary operation.
@@ -463,69 +463,79 @@ rolling back after a database recovery */
struct trx_struct{
ulint magic_n;
- /* All the next fields are protected by the kernel mutex, except the
- undo logs which are protected by undo_mutex */
+
+ /* These fields are not protected by any mutex. */
const char* op_info; /*!< English text describing the
current operation, or an empty
string */
- unsigned is_purge:1; /*!< 0=user transaction, 1=purge */
- unsigned is_recovered:1; /*!< 0=normal transaction,
- 1=recovered, must be rolled back */
- unsigned conc_state:2; /*!< state of the trx from the point
+ ulint conc_state; /*!< state of the trx from the point
of view of concurrency control:
TRX_ACTIVE, TRX_COMMITTED_IN_MEMORY,
... */
- unsigned que_state:2; /*!< valid when conc_state == TRX_ACTIVE:
- TRX_QUE_RUNNING, TRX_QUE_LOCK_WAIT,
- ... */
- unsigned isolation_level:2;/* TRX_ISO_REPEATABLE_READ, ... */
- unsigned check_foreigns:1;/* normally TRUE, but if the user
+ ulint isolation_level;/* TRX_ISO_REPEATABLE_READ, ... */
+ ulint check_foreigns; /* normally TRUE, but if the user
wants to suppress foreign key checks,
(in table imports, for example) we
set this FALSE */
- unsigned check_unique_secondary:1;
+ ulint check_unique_secondary;
/* normally TRUE, but if the user
wants to speed up inserts by
suppressing unique key checks
for secondary indexes when we decide
if we can use the insert buffer for
them, we set this FALSE */
- unsigned support_xa:1; /*!< normally we do the XA two-phase
+ ulint support_xa; /*!< normally we do the XA two-phase
commit steps, but by setting this to
FALSE, one can save CPU time and about
150 bytes in the undo log size as then
we skip XA steps */
- unsigned flush_log_later:1;/* In 2PC, we hold the
+ ulint flush_log_later;/* In 2PC, we hold the
prepare_commit mutex across
both phases. In that case, we
defer flush of the logs to disk
until after we release the
mutex. */
- unsigned must_flush_log_later:1;/* this flag is set to TRUE in
+ ulint must_flush_log_later;/* this flag is set to TRUE in
trx_commit_off_kernel() if
flush_log_later was TRUE, and there
were modifications by the transaction;
in that case we must flush the log
in trx_commit_complete_for_mysql() */
- unsigned dict_operation:2;/**< @see enum trx_dict_op */
- unsigned duplicates:2; /*!< TRX_DUP_IGNORE | TRX_DUP_REPLACE */
- unsigned active_trans:2; /*!< 1 - if a transaction in MySQL
+ ulint duplicates; /*!< TRX_DUP_IGNORE | TRX_DUP_REPLACE */
+ ulint active_trans; /*!< 1 - if a transaction in MySQL
is active. 2 - if prepare_commit_mutex
was taken */
- unsigned has_search_latch:1;
+ ulint has_search_latch;
/* TRUE if this trx has latched the
search system latch in S-mode */
- unsigned declared_to_be_inside_innodb:1;
+ ulint deadlock_mark; /*!< a mark field used in deadlock
+ checking algorithm. */
+ trx_dict_op_t dict_operation; /**< @see enum trx_dict_op */
+
+ /* Fields protected by the srv_conc_mutex. */
+ ulint declared_to_be_inside_innodb;
/* this is TRUE if we have declared
this transaction in
srv_conc_enter_innodb to be inside the
InnoDB engine */
- unsigned handling_signals:1;/* this is TRUE as long as the trx
- is handling signals */
- unsigned dict_operation_lock_mode:2;
- /* 0, RW_S_LATCH, or RW_X_LATCH:
+
+ /* Fields protected by dict_operation_lock. The very latch
+ it is used to track. */
+ ulint dict_operation_lock_mode;
+ /*!< 0, RW_S_LATCH, or RW_X_LATCH:
the latch mode trx currently holds
on dict_operation_lock */
+
+ /* All the next fields are protected by the kernel mutex, except the
+ undo logs which are protected by undo_mutex */
+ ulint is_purge; /*!< 0=user transaction, 1=purge */
+ ulint is_recovered; /*!< 0=normal transaction,
+ 1=recovered, must be rolled back */
+ ulint que_state; /*!< valid when conc_state
+ == TRX_ACTIVE: TRX_QUE_RUNNING,
+ TRX_QUE_LOCK_WAIT, ... */
+ ulint handling_signals;/* this is TRUE as long as the trx
+ is handling signals */
time_t start_time; /*!< time the trx object was created
or the state last time became
TRX_ACTIVE */
@@ -640,11 +650,6 @@ struct trx_struct{
wait_thrs; /*!< query threads belonging to this
trx that are in the QUE_THR_LOCK_WAIT
state */
- ulint deadlock_mark; /*!< a mark field used in deadlock
- checking algorithm. This must be
- in its own machine word, because
- it can be changed by other
- threads while holding kernel_mutex. */
/*------------------------------*/
mem_heap_t* lock_heap; /*!< memory heap for the locks of the
transaction */
diff --git a/storage/innodb_plugin/include/trx0types.h b/storage/innodb_plugin/include/trx0types.h
index 24cf57d53d5..40a7256cbfd 100644
--- a/storage/innodb_plugin/include/trx0types.h
+++ b/storage/innodb_plugin/include/trx0types.h
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 1996, 2010, Innobase Oy. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -70,6 +70,13 @@ typedef struct trx_named_savept_struct trx_named_savept_t;
enum trx_rb_ctx {
RB_NONE = 0, /*!< no rollback */
RB_NORMAL, /*!< normal rollback */
+ RB_RECOVERY_PURGE_REC,
+ /*!< rolling back an incomplete transaction,
+ in crash recovery, rolling back an
+ INSERT that was performed by updating a
+ delete-marked record; if the delete-marked record
+ no longer exists in an active read view, it will
+ be purged */
RB_RECOVERY /*!< rolling back an incomplete transaction,
in crash recovery */
};
diff --git a/storage/innodb_plugin/include/univ.i b/storage/innodb_plugin/include/univ.i
index 2081e136590..0df05999845 100644
--- a/storage/innodb_plugin/include/univ.i
+++ b/storage/innodb_plugin/include/univ.i
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 1994, 2010, Innobase Oy. All Rights Reserved.
Copyright (c) 2008, Google Inc.
Copyright (c) 2009, Sun Microsystems, Inc.
@@ -46,7 +46,7 @@ Created 1/20/1994 Heikki Tuuri
#define INNODB_VERSION_MAJOR 1
#define INNODB_VERSION_MINOR 0
-#define INNODB_VERSION_BUGFIX 6
+#define INNODB_VERSION_BUGFIX 7
/* The following is the InnoDB version as shown in
SELECT plugin_version FROM information_schema.plugins;
@@ -229,11 +229,6 @@ by one. */
/* the above option prevents forcing of log to disk
at a buffer page write: it should be tested with this
option off; also some ibuf tests are suppressed */
-/*
-#define UNIV_BASIC_LOG_DEBUG
-*/
- /* the above option enables basic recovery debugging:
- new allocated file pages are reset */
/* Linkage specifier for non-static InnoDB symbols (variables and functions)
that are only referenced from within InnoDB, not from MySQL */
diff --git a/storage/innodb_plugin/include/ut0rbt.h b/storage/innodb_plugin/include/ut0rbt.h
new file mode 100644
index 00000000000..6fd050acfe7
--- /dev/null
+++ b/storage/innodb_plugin/include/ut0rbt.h
@@ -0,0 +1,309 @@
+/*****************************************************************************
+Copyright (c) 2006, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/*******************************************************************//**
+@file include/ut0rbt.h
+Red-Black tree implementation.
+
+Created 2007-03-20 Sunny Bains
+************************************************************************/
+
+#ifndef INNOBASE_UT0RBT_H
+#define INNOBASE_UT0RBT_H
+
+#if !defined(IB_RBT_TESTING)
+#include "univ.i"
+#include "ut0mem.h"
+#else
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <assert.h>
+
+#define ut_malloc malloc
+#define ut_free free
+#define ulint unsigned long
+#define ut_a(c) assert(c)
+#define ut_error assert(0)
+#define ibool unsigned int
+#define TRUE 1
+#define FALSE 0
+#endif
+
+/* Red black tree typedefs */
+typedef struct ib_rbt_struct ib_rbt_t;
+typedef struct ib_rbt_node_struct ib_rbt_node_t;
+/* FIXME: Iterator is a better name than _bound_ */
+typedef struct ib_rbt_bound_struct ib_rbt_bound_t;
+typedef void (*ib_rbt_print_node)(const ib_rbt_node_t* node);
+typedef int (*ib_rbt_compare)(const void* p1, const void* p2);
+
+/* Red black tree color types */
+enum ib_rbt_color_enum {
+ IB_RBT_RED,
+ IB_RBT_BLACK
+};
+
+typedef enum ib_rbt_color_enum ib_rbt_color_t;
+
+/* Red black tree node */
+struct ib_rbt_node_struct {
+ ib_rbt_color_t color; /* color of this node */
+
+ ib_rbt_node_t* left; /* points left child */
+ ib_rbt_node_t* right; /* points right child */
+ ib_rbt_node_t* parent; /* points parent node */
+
+ char value[1]; /* Data value */
+};
+
+/* Red black tree instance.*/
+struct ib_rbt_struct {
+ ib_rbt_node_t* nil; /* Black colored node that is
+ used as a sentinel. This is
+ pre-allocated too.*/
+
+ ib_rbt_node_t* root; /* Root of the tree, this is
+ pre-allocated and the first
+ data node is the left child.*/
+
+ ulint n_nodes; /* Total number of data nodes */
+
+ ib_rbt_compare compare; /* Fn. to use for comparison */
+ ulint sizeof_value; /* Sizeof the item in bytes */
+};
+
+/* The result of searching for a key in the tree, this is useful for
+a speedy lookup and insert if key doesn't exist.*/
+struct ib_rbt_bound_struct {
+ const ib_rbt_node_t*
+ last; /* Last node visited */
+
+ int result; /* Result of comparing with
+ the last non-nil node that
+ was visited */
+};
+
+/* Size in elements (t is an rb tree instance) */
+#define rbt_size(t) (t->n_nodes)
+
+/* Check whether the rb tree is empty (t is an rb tree instance) */
+#define rbt_empty(t) (rbt_size(t) == 0)
+
+/* Get data value (t is the data type, n is an rb tree node instance) */
+#define rbt_value(t, n) ((t*) &n->value[0])
+
+/* Compare a key with the node value (t is tree, k is key, n is node)*/
+#define rbt_compare(t, k, n) (t->compare(k, n->value))
+
+/****************************************************************//**
+Free an instance of a red black tree */
+UNIV_INTERN
+void
+rbt_free(
+/*=====*/
+ ib_rbt_t* tree); /*!< in: rb tree to free */
+/****************************************************************//**
+Create an instance of a red black tree
+@return rb tree instance */
+UNIV_INTERN
+ib_rbt_t*
+rbt_create(
+/*=======*/
+ size_t sizeof_value, /*!< in: size in bytes */
+ ib_rbt_compare compare); /*!< in: comparator */
+/****************************************************************//**
+Delete a node from the red black tree, identified by key.
+@return TRUE if success FALSE if not found */
+UNIV_INTERN
+ibool
+rbt_delete(
+/*=======*/
+ ib_rbt_t* tree, /*!< in: rb tree */
+ const void* key); /*!< in: key to delete */
+/****************************************************************//**
+Remove a node from the rb tree, the node is not free'd, that is the
+callers responsibility.
+@return the deleted node with the const. */
+UNIV_INTERN
+ib_rbt_node_t*
+rbt_remove_node(
+/*============*/
+ ib_rbt_t* tree, /*!< in: rb tree */
+ const ib_rbt_node_t*
+ node); /*!< in: node to delete, this
+ is a fudge and declared const
+ because the caller has access
+ only to const nodes.*/
+/****************************************************************//**
+Find a matching node in the rb tree.
+@return node if found else return NULL */
+UNIV_INTERN
+const ib_rbt_node_t*
+rbt_lookup(
+/*=======*/
+ const ib_rbt_t* tree, /*!< in: rb tree to search */
+ const void* key); /*!< in: key to lookup */
+/****************************************************************//**
+Generic insert of a value in the rb tree.
+@return inserted node */
+UNIV_INTERN
+const ib_rbt_node_t*
+rbt_insert(
+/*=======*/
+ ib_rbt_t* tree, /*!< in: rb tree */
+ const void* key, /*!< in: key for ordering */
+ const void* value); /*!< in: data that will be
+ copied to the node.*/
+/****************************************************************//**
+Add a new node to the tree, useful for data that is pre-sorted.
+@return appended node */
+UNIV_INTERN
+const ib_rbt_node_t*
+rbt_add_node(
+/*=========*/
+ ib_rbt_t* tree, /*!< in: rb tree */
+ ib_rbt_bound_t* parent, /*!< in: parent */
+ const void* value); /*!< in: this value is copied
+ to the node */
+/****************************************************************//**
+Return the left most data node in the tree
+@return left most node */
+UNIV_INTERN
+const ib_rbt_node_t*
+rbt_first(
+/*======*/
+ const ib_rbt_t* tree); /*!< in: rb tree */
+/****************************************************************//**
+Return the right most data node in the tree
+@return right most node */
+UNIV_INTERN
+const ib_rbt_node_t*
+rbt_last(
+/*=====*/
+ const ib_rbt_t* tree); /*!< in: rb tree */
+/****************************************************************//**
+Return the next node from current.
+@return successor node to current that is passed in. */
+UNIV_INTERN
+const ib_rbt_node_t*
+rbt_next(
+/*=====*/
+ const ib_rbt_t* tree, /*!< in: rb tree */
+ const ib_rbt_node_t* /*!< in: current node */
+ current);
+/****************************************************************//**
+Return the prev node from current.
+@return precedessor node to current that is passed in */
+UNIV_INTERN
+const ib_rbt_node_t*
+rbt_prev(
+/*=====*/
+ const ib_rbt_t* tree, /*!< in: rb tree */
+ const ib_rbt_node_t* /*!< in: current node */
+ current);
+/****************************************************************//**
+Find the node that has the lowest key that is >= key.
+@return node that satisfies the lower bound constraint or NULL */
+UNIV_INTERN
+const ib_rbt_node_t*
+rbt_lower_bound(
+/*============*/
+ const ib_rbt_t* tree, /*!< in: rb tree */
+ const void* key); /*!< in: key to search */
+/****************************************************************//**
+Find the node that has the greatest key that is <= key.
+@return node that satisifies the upper bound constraint or NULL */
+UNIV_INTERN
+const ib_rbt_node_t*
+rbt_upper_bound(
+/*============*/
+ const ib_rbt_t* tree, /*!< in: rb tree */
+ const void* key); /*!< in: key to search */
+/****************************************************************//**
+Search for the key, a node will be retuned in parent.last, whether it
+was found or not. If not found then parent.last will contain the
+parent node for the possibly new key otherwise the matching node.
+@return result of last comparison */
+UNIV_INTERN
+int
+rbt_search(
+/*=======*/
+ const ib_rbt_t* tree, /*!< in: rb tree */
+ ib_rbt_bound_t* parent, /*!< in: search bounds */
+ const void* key); /*!< in: key to search */
+/****************************************************************//**
+Search for the key, a node will be retuned in parent.last, whether it
+was found or not. If not found then parent.last will contain the
+parent node for the possibly new key otherwise the matching node.
+@return result of last comparison */
+UNIV_INTERN
+int
+rbt_search_cmp(
+/*===========*/
+ const ib_rbt_t* tree, /*!< in: rb tree */
+ ib_rbt_bound_t* parent, /*!< in: search bounds */
+ const void* key, /*!< in: key to search */
+ ib_rbt_compare compare); /*!< in: comparator */
+/****************************************************************//**
+Clear the tree, deletes (and free's) all the nodes. */
+UNIV_INTERN
+void
+rbt_clear(
+/*======*/
+ ib_rbt_t* tree); /*!< in: rb tree */
+/****************************************************************//**
+Merge the node from dst into src. Return the number of nodes merged.
+@return no. of recs merged */
+UNIV_INTERN
+ulint
+rbt_merge_uniq(
+/*===========*/
+ ib_rbt_t* dst, /*!< in: dst rb tree */
+ const ib_rbt_t* src); /*!< in: src rb tree */
+/****************************************************************//**
+Merge the node from dst into src. Return the number of nodes merged.
+Delete the nodes from src after copying node to dst. As a side effect
+the duplicates will be left untouched in the src, since we don't support
+duplicates (yet). NOTE: src and dst must be similar, the function doesn't
+check for this condition (yet).
+@return no. of recs merged */
+UNIV_INTERN
+ulint
+rbt_merge_uniq_destructive(
+/*=======================*/
+ ib_rbt_t* dst, /*!< in: dst rb tree */
+ ib_rbt_t* src); /*!< in: src rb tree */
+/****************************************************************//**
+Verify the integrity of the RB tree. For debugging. 0 failure else height
+of tree (in count of black nodes).
+@return TRUE if OK FALSE if tree invalid. */
+UNIV_INTERN
+ibool
+rbt_validate(
+/*=========*/
+ const ib_rbt_t* tree); /*!< in: tree to validate */
+/****************************************************************//**
+Iterate over the tree in depth first order. */
+UNIV_INTERN
+void
+rbt_print(
+/*======*/
+ const ib_rbt_t* tree, /*!< in: tree to traverse */
+ ib_rbt_print_node print); /*!< in: print function */
+
+#endif /* INNOBASE_UT0RBT_H */
diff --git a/storage/innodb_plugin/include/ut0rnd.ic b/storage/innodb_plugin/include/ut0rnd.ic
index 763469142ec..c3dbd86923c 100644
--- a/storage/innodb_plugin/include/ut0rnd.ic
+++ b/storage/innodb_plugin/include/ut0rnd.ic
@@ -152,6 +152,7 @@ ut_hash_ulint(
ulint key, /*!< in: value to be hashed */
ulint table_size) /*!< in: hash table size */
{
+ ut_ad(table_size);
key = key ^ UT_HASH_RANDOM_MASK2;
return(key % table_size);
diff --git a/storage/innodb_plugin/lock/lock0lock.c b/storage/innodb_plugin/lock/lock0lock.c
index 1fce8002bdf..d5fff572aee 100644
--- a/storage/innodb_plugin/lock/lock0lock.c
+++ b/storage/innodb_plugin/lock/lock0lock.c
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 1996, 2010, Innobase Oy. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -376,6 +376,7 @@ UNIV_INTERN FILE* lock_latest_err_file;
/* Flags for recursive deadlock search */
#define LOCK_VICTIM_IS_START 1
#define LOCK_VICTIM_IS_OTHER 2
+#define LOCK_EXCEED_MAX_DEPTH 3
/********************************************************************//**
Checks if a lock request results in a deadlock.
@@ -394,24 +395,25 @@ Looks recursively for a deadlock.
deadlock and we chose 'start' as the victim, LOCK_VICTIM_IS_OTHER if a
deadlock was found and we chose some other trx as a victim: we must do
the search again in this last case because there may be another
-deadlock! */
+deadlock!
+LOCK_EXCEED_MAX_DEPTH if the lock search exceeds max steps or max depth. */
static
ulint
lock_deadlock_recursive(
/*====================*/
trx_t* start, /*!< in: recursion starting point */
trx_t* trx, /*!< in: a transaction waiting for a lock */
- lock_t* wait_lock, /*!< in: the lock trx is waiting to be granted */
+ lock_t* wait_lock, /*!< in: lock that is waiting to be granted */
ulint* cost, /*!< in/out: number of calculation steps thus
far: if this exceeds LOCK_MAX_N_STEPS_...
- we return LOCK_VICTIM_IS_START */
+ we return LOCK_EXCEED_MAX_DEPTH */
ulint depth); /*!< in: recursion depth: if this exceeds
LOCK_MAX_DEPTH_IN_DEADLOCK_CHECK, we
- return LOCK_VICTIM_IS_START */
+ return LOCK_EXCEED_MAX_DEPTH */
/*********************************************************************//**
Gets the nth bit of a record lock.
-@return TRUE if bit set */
+@return TRUE if bit set also if i == ULINT_UNDEFINED return FALSE*/
UNIV_INLINE
ibool
lock_rec_get_nth_bit(
@@ -1222,7 +1224,7 @@ lock_rec_get_first_on_page(
/*********************************************************************//**
Gets the next explicit lock request on a record.
-@return next lock, NULL if none exists */
+@return next lock, NULL if none exists or if heap_no == ULINT_UNDEFINED */
UNIV_INLINE
lock_t*
lock_rec_get_next(
@@ -3261,8 +3263,6 @@ lock_deadlock_occurs(
lock_t* lock, /*!< in: lock the transaction is requesting */
trx_t* trx) /*!< in: transaction */
{
- dict_table_t* table;
- dict_index_t* index;
trx_t* mark_trx;
ulint ret;
ulint cost = 0;
@@ -3284,31 +3284,50 @@ retry:
ret = lock_deadlock_recursive(trx, trx, lock, &cost, 0);
- if (ret == LOCK_VICTIM_IS_OTHER) {
+ switch (ret) {
+ case LOCK_VICTIM_IS_OTHER:
/* We chose some other trx as a victim: retry if there still
is a deadlock */
-
goto retry;
- }
- if (UNIV_UNLIKELY(ret == LOCK_VICTIM_IS_START)) {
- if (lock_get_type_low(lock) & LOCK_TABLE) {
- table = lock->un_member.tab_lock.table;
- index = NULL;
+ case LOCK_EXCEED_MAX_DEPTH:
+ /* If the lock search exceeds the max step
+ or the max depth, the current trx will be
+ the victim. Print its information. */
+ rewind(lock_latest_err_file);
+ ut_print_timestamp(lock_latest_err_file);
+
+ fputs("TOO DEEP OR LONG SEARCH IN THE LOCK TABLE"
+ " WAITS-FOR GRAPH, WE WILL ROLL BACK"
+ " FOLLOWING TRANSACTION \n",
+ lock_latest_err_file);
+
+ fputs("\n*** TRANSACTION:\n", lock_latest_err_file);
+ trx_print(lock_latest_err_file, trx, 3000);
+
+ fputs("*** WAITING FOR THIS LOCK TO BE GRANTED:\n",
+ lock_latest_err_file);
+
+ if (lock_get_type(lock) == LOCK_REC) {
+ lock_rec_print(lock_latest_err_file, lock);
} else {
- index = lock->index;
- table = index->table;
+ lock_table_print(lock_latest_err_file, lock);
}
+ break;
- lock_deadlock_found = TRUE;
-
+ case LOCK_VICTIM_IS_START:
fputs("*** WE ROLL BACK TRANSACTION (2)\n",
lock_latest_err_file);
+ break;
- return(TRUE);
+ default:
+ /* No deadlock detected*/
+ return(FALSE);
}
- return(FALSE);
+ lock_deadlock_found = TRUE;
+
+ return(TRUE);
}
/********************************************************************//**
@@ -3317,25 +3336,26 @@ Looks recursively for a deadlock.
deadlock and we chose 'start' as the victim, LOCK_VICTIM_IS_OTHER if a
deadlock was found and we chose some other trx as a victim: we must do
the search again in this last case because there may be another
-deadlock! */
+deadlock!
+LOCK_EXCEED_MAX_DEPTH if the lock search exceeds max steps or max depth. */
static
ulint
lock_deadlock_recursive(
/*====================*/
trx_t* start, /*!< in: recursion starting point */
trx_t* trx, /*!< in: a transaction waiting for a lock */
- lock_t* wait_lock, /*!< in: the lock trx is waiting to be granted */
+ lock_t* wait_lock, /*!< in: lock that is waiting to be granted */
ulint* cost, /*!< in/out: number of calculation steps thus
far: if this exceeds LOCK_MAX_N_STEPS_...
- we return LOCK_VICTIM_IS_START */
+ we return LOCK_EXCEED_MAX_DEPTH */
ulint depth) /*!< in: recursion depth: if this exceeds
LOCK_MAX_DEPTH_IN_DEADLOCK_CHECK, we
- return LOCK_VICTIM_IS_START */
+ return LOCK_EXCEED_MAX_DEPTH */
{
+ ulint ret;
lock_t* lock;
- ulint bit_no = ULINT_UNDEFINED;
trx_t* lock_trx;
- ulint ret;
+ ulint heap_no = ULINT_UNDEFINED;
ut_a(trx);
ut_a(start);
@@ -3351,27 +3371,44 @@ lock_deadlock_recursive(
*cost = *cost + 1;
- lock = wait_lock;
-
if (lock_get_type_low(wait_lock) == LOCK_REC) {
+ ulint space;
+ ulint page_no;
+
+ heap_no = lock_rec_find_set_bit(wait_lock);
+ ut_a(heap_no != ULINT_UNDEFINED);
- bit_no = lock_rec_find_set_bit(wait_lock);
+ space = wait_lock->un_member.rec_lock.space;
+ page_no = wait_lock->un_member.rec_lock.page_no;
- ut_a(bit_no != ULINT_UNDEFINED);
+ lock = lock_rec_get_first_on_page_addr(space, page_no);
+
+ /* Position the iterator on the first matching record lock. */
+ while (lock != NULL
+ && lock != wait_lock
+ && !lock_rec_get_nth_bit(lock, heap_no)) {
+
+ lock = lock_rec_get_next_on_page(lock);
+ }
+
+ if (lock == wait_lock) {
+ lock = NULL;
+ }
+
+ ut_ad(lock == NULL || lock_rec_get_nth_bit(lock, heap_no));
+
+ } else {
+ lock = wait_lock;
}
/* Look at the locks ahead of wait_lock in the lock queue */
for (;;) {
- if (lock_get_type_low(lock) & LOCK_TABLE) {
-
- lock = UT_LIST_GET_PREV(un_member.tab_lock.locks,
- lock);
- } else {
- ut_ad(lock_get_type_low(lock) == LOCK_REC);
- ut_a(bit_no != ULINT_UNDEFINED);
+ /* Get previous table lock. */
+ if (heap_no == ULINT_UNDEFINED) {
- lock = (lock_t*) lock_rec_get_prev(lock, bit_no);
+ lock = UT_LIST_GET_PREV(
+ un_member.tab_lock.locks, lock);
}
if (lock == NULL) {
@@ -3389,7 +3426,7 @@ lock_deadlock_recursive(
lock_trx = lock->trx;
- if (lock_trx == start || too_far) {
+ if (lock_trx == start) {
/* We came back to the recursion starting
point: a deadlock detected; or we have
@@ -3436,19 +3473,10 @@ lock_deadlock_recursive(
}
#ifdef UNIV_DEBUG
if (lock_print_waits) {
- fputs("Deadlock detected"
- " or too long search\n",
+ fputs("Deadlock detected\n",
stderr);
}
#endif /* UNIV_DEBUG */
- if (too_far) {
-
- fputs("TOO DEEP OR LONG SEARCH"
- " IN THE LOCK TABLE"
- " WAITS-FOR GRAPH\n", ef);
-
- return(LOCK_VICTIM_IS_START);
- }
if (trx_weight_cmp(wait_lock->trx,
start) >= 0) {
@@ -3484,6 +3512,21 @@ lock_deadlock_recursive(
return(LOCK_VICTIM_IS_OTHER);
}
+ if (too_far) {
+
+#ifdef UNIV_DEBUG
+ if (lock_print_waits) {
+ fputs("Deadlock search exceeds"
+ " max steps or depth.\n",
+ stderr);
+ }
+#endif /* UNIV_DEBUG */
+ /* The information about transaction/lock
+ to be rolled back is available in the top
+ level. Do not print anything here. */
+ return(LOCK_EXCEED_MAX_DEPTH);
+ }
+
if (lock_trx->que_state == TRX_QUE_LOCK_WAIT) {
/* Another trx ahead has requested lock in an
@@ -3493,12 +3536,28 @@ lock_deadlock_recursive(
ret = lock_deadlock_recursive(
start, lock_trx,
lock_trx->wait_lock, cost, depth + 1);
+
if (ret != 0) {
return(ret);
}
}
}
+ /* Get the next record lock to check. */
+ if (heap_no != ULINT_UNDEFINED) {
+
+ ut_a(lock != NULL);
+
+ do {
+ lock = lock_rec_get_next_on_page(lock);
+ } while (lock != NULL
+ && lock != wait_lock
+ && !lock_rec_get_nth_bit(lock, heap_no));
+
+ if (lock == wait_lock) {
+ lock = NULL;
+ }
+ }
}/* end of the 'for (;;)'-loop */
}
@@ -3694,9 +3753,10 @@ lock_table_enqueue_waiting(
/*********************************************************************//**
Checks if other transactions have an incompatible mode lock request in
-the lock queue. */
+the lock queue.
+@return lock or NULL */
UNIV_INLINE
-ibool
+lock_t*
lock_table_other_has_incompatible(
/*==============================*/
trx_t* trx, /*!< in: transaction, or NULL if all
@@ -3718,13 +3778,13 @@ lock_table_other_has_incompatible(
&& (!lock_mode_compatible(lock_get_mode(lock), mode))
&& (wait || !(lock_get_wait(lock)))) {
- return(TRUE);
+ return(lock);
}
lock = UT_LIST_GET_PREV(un_member.tab_lock.locks, lock);
}
- return(FALSE);
+ return(NULL);
}
/*********************************************************************//**
@@ -4249,28 +4309,29 @@ lock_rec_print(
block = buf_page_try_get(space, page_no, &mtr);
- if (block) {
- for (i = 0; i < lock_rec_get_n_bits(lock); i++) {
+ for (i = 0; i < lock_rec_get_n_bits(lock); ++i) {
+
+ if (!lock_rec_get_nth_bit(lock, i)) {
+ continue;
+ }
+
+ fprintf(file, "Record lock, heap no %lu", (ulong) i);
- if (lock_rec_get_nth_bit(lock, i)) {
+ if (block) {
+ const rec_t* rec;
- const rec_t* rec
- = page_find_rec_with_heap_no(
- buf_block_get_frame(block), i);
- offsets = rec_get_offsets(
- rec, lock->index, offsets,
- ULINT_UNDEFINED, &heap);
+ rec = page_find_rec_with_heap_no(
+ buf_block_get_frame(block), i);
- fprintf(file, "Record lock, heap no %lu ",
- (ulong) i);
- rec_print_new(file, rec, offsets);
- putc('\n', file);
- }
- }
- } else {
- for (i = 0; i < lock_rec_get_n_bits(lock); i++) {
- fprintf(file, "Record lock, heap no %lu\n", (ulong) i);
+ offsets = rec_get_offsets(
+ rec, lock->index, offsets,
+ ULINT_UNDEFINED, &heap);
+
+ putc(' ', file);
+ rec_print_new(file, rec, offsets);
}
+
+ putc('\n', file);
}
mtr_commit(&mtr);
@@ -4317,14 +4378,26 @@ lock_get_n_rec_locks(void)
#endif /* PRINT_NUM_OF_LOCK_STRUCTS */
/*********************************************************************//**
-Prints info of locks for all transactions. */
+Prints info of locks for all transactions.
+@return FALSE if not able to obtain kernel mutex
+and exits without printing info */
UNIV_INTERN
-void
+ibool
lock_print_info_summary(
/*====================*/
- FILE* file) /*!< in: file where to print */
+ FILE* file, /*!< in: file where to print */
+ ibool nowait) /*!< in: whether to wait for the kernel mutex */
{
- lock_mutex_enter_kernel();
+ /* if nowait is FALSE, wait on the kernel mutex,
+ otherwise return immediately if fail to obtain the
+ mutex. */
+ if (!nowait) {
+ lock_mutex_enter_kernel();
+ } else if (mutex_enter_nowait(&kernel_mutex)) {
+ fputs("FAIL TO OBTAIN KERNEL MUTEX, "
+ "SKIP LOCK INFO PRINTING\n", file);
+ return(FALSE);
+ }
if (lock_deadlock_found) {
fputs("------------------------\n"
@@ -4356,6 +4429,7 @@ lock_print_info_summary(
"Total number of lock structs in row lock hash table %lu\n",
(ulong) lock_get_n_rec_locks());
#endif /* PRINT_NUM_OF_LOCK_STRUCTS */
+ return(TRUE);
}
/*********************************************************************//**
@@ -4753,6 +4827,13 @@ loop:
|| lock->trx->conc_state == TRX_PREPARED
|| lock->trx->conc_state == TRX_COMMITTED_IN_MEMORY);
+# ifdef UNIV_SYNC_DEBUG
+ /* Only validate the record queues when this thread is not
+ holding a space->latch. Deadlocks are possible due to
+ latching order violation when UNIV_DEBUG is defined while
+ UNIV_SYNC_DEBUG is not. */
+ if (!sync_thread_levels_contains(SYNC_FSP))
+# endif /* UNIV_SYNC_DEBUG */
for (i = nth_bit; i < lock_rec_get_n_bits(lock); i++) {
if (i == 1 || lock_rec_get_nth_bit(lock, i)) {
@@ -4918,7 +4999,7 @@ lock_rec_insert_check_and_lock(
}
trx = thr_get_trx(thr);
- next_rec = page_rec_get_next((rec_t*) rec);
+ next_rec = page_rec_get_next_const(rec);
next_rec_heap_no = page_rec_get_heap_no(next_rec);
lock_mutex_enter_kernel();
diff --git a/storage/innodb_plugin/log/log0log.c b/storage/innodb_plugin/log/log0log.c
index d5b696074b3..183c24d2147 100644
--- a/storage/innodb_plugin/log/log0log.c
+++ b/storage/innodb_plugin/log/log0log.c
@@ -1,23 +1,6 @@
/*****************************************************************************
-Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc., 59 Temple
-Place, Suite 330, Boston, MA 02111-1307 USA
-
-*****************************************************************************/
-/*****************************************************************************
-
-Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 1995, 2010, Innobase Oy. All Rights Reserved.
Copyright (c) 2009, Google Inc.
Portions of this file contain modifications contributed and copyrighted by
@@ -2013,7 +1996,7 @@ log_checkpoint(
return(TRUE);
}
- ut_ad(log_sys->written_to_all_lsn >= oldest_lsn);
+ ut_ad(log_sys->flushed_to_disk_lsn >= oldest_lsn);
if (log_sys->n_pending_checkpoint_writes > 0) {
/* A checkpoint write is running */
@@ -3095,7 +3078,7 @@ loop:
if (srv_fast_shutdown < 2
&& (srv_error_monitor_active
- || srv_lock_timeout_and_monitor_active)) {
+ || srv_lock_timeout_active || srv_monitor_active)) {
mutex_exit(&kernel_mutex);
diff --git a/storage/innodb_plugin/log/log0recv.c b/storage/innodb_plugin/log/log0recv.c
index ddbc71d4b71..7f5b2df7882 100644
--- a/storage/innodb_plugin/log/log0recv.c
+++ b/storage/innodb_plugin/log/log0recv.c
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1997, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 1997, 2010, Innobase Oy. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -138,7 +138,9 @@ UNIV_INTERN ulint recv_max_parsed_page_no;
/** This many frames must be left free in the buffer pool when we scan
the log and store the scanned log records in the buffer pool: we will
use these free frames to read in pages when we start applying the
-log records to the database. */
+log records to the database.
+This is the default value. If the actual size of the buffer pool is
+larger than 10 MB we'll set this value to 512. */
UNIV_INTERN ulint recv_n_pool_free_frames;
/** The maximum lsn we see for a page during the recovery process. If this
@@ -239,6 +241,7 @@ recv_sys_mem_free(void)
}
}
+#ifndef UNIV_HOTBACKUP
/************************************************************
Reset the state of the recovery system variables. */
UNIV_INTERN
@@ -278,6 +281,7 @@ recv_sys_var_init(void)
recv_max_page_lsn = 0;
}
+#endif /* !UNIV_HOTBACKUP */
/************************************************************
Inits the recovery system for a recovery operation. */
@@ -292,6 +296,12 @@ recv_sys_init(
return;
}
+ /* Initialize red-black tree for fast insertions into the
+ flush_list during recovery process.
+ As this initialization is done while holding the buffer pool
+ mutex we perform it before acquiring recv_sys->mutex. */
+ buf_flush_init_flush_rbt();
+
mutex_enter(&(recv_sys->mutex));
#ifndef UNIV_HOTBACKUP
@@ -301,6 +311,12 @@ recv_sys_init(
recv_is_from_backup = TRUE;
#endif /* !UNIV_HOTBACKUP */
+ /* Set appropriate value of recv_n_pool_free_frames. */
+ if (buf_pool_get_curr_size() >= (10 * 1024 * 1024)) {
+ /* Buffer pool of size greater than 10 MB. */
+ recv_n_pool_free_frames = 512;
+ }
+
recv_sys->buf = ut_malloc(RECV_PARSING_BUF_SIZE);
recv_sys->len = 0;
recv_sys->recovered_offset = 0;
@@ -370,6 +386,9 @@ recv_sys_debug_free(void)
recv_sys->last_block_buf_start = NULL;
mutex_exit(&(recv_sys->mutex));
+
+ /* Free up the flush_rbt. */
+ buf_flush_free_flush_rbt();
}
# endif /* UNIV_LOG_DEBUG */
@@ -2050,15 +2069,6 @@ recv_parse_log_rec(
}
#endif /* UNIV_LOG_LSN_DEBUG */
- /* Check that page_no is sensible */
-
- if (UNIV_UNLIKELY(*page_no > 0x8FFFFFFFUL)) {
-
- recv_sys->found_corrupt_log = TRUE;
-
- return(0);
- }
-
new_ptr = recv_parse_or_apply_log_rec_body(*type, new_ptr, end_ptr,
NULL, NULL);
if (UNIV_UNLIKELY(new_ptr == NULL)) {
@@ -2167,6 +2177,14 @@ recv_report_corrupt_log(
putc('\n', stderr);
}
+#ifndef UNIV_HOTBACKUP
+ if (!srv_force_recovery) {
+ fputs("InnoDB: Set innodb_force_recovery"
+ " to ignore this error.\n", stderr);
+ ut_error;
+ }
+#endif /* !UNIV_HOTBACKUP */
+
fputs("InnoDB: WARNING: the log file may have been corrupt and it\n"
"InnoDB: is possible that the log scan did not proceed\n"
"InnoDB: far enough in recovery! Please run CHECK TABLE\n"
@@ -2556,7 +2574,7 @@ recv_scan_log_recs(
ut_ad(start_lsn % OS_FILE_LOG_BLOCK_SIZE == 0);
ut_ad(len % OS_FILE_LOG_BLOCK_SIZE == 0);
- ut_ad(len > 0);
+ ut_ad(len >= OS_FILE_LOG_BLOCK_SIZE);
ut_a(store_to_hash <= TRUE);
finished = FALSE;
@@ -2681,6 +2699,16 @@ recv_scan_log_recs(
recv_sys->found_corrupt_log = TRUE;
+#ifndef UNIV_HOTBACKUP
+ if (!srv_force_recovery) {
+ fputs("InnoDB: Set"
+ " innodb_force_recovery"
+ " to ignore this error.\n",
+ stderr);
+ ut_error;
+ }
+#endif /* !UNIV_HOTBACKUP */
+
} else if (!recv_sys->found_corrupt_log) {
more_data = recv_sys_add_to_parsing_buf(
log_block, scanned_lsn);
@@ -3210,8 +3238,6 @@ void
recv_recovery_from_checkpoint_finish(void)
/*======================================*/
{
- int i;
-
/* Apply the hashed log records to the respective file pages */
if (srv_force_recovery < SRV_FORCE_NO_LOG_REDO) {
@@ -3259,9 +3285,16 @@ recv_recovery_from_checkpoint_finish(void)
The data dictionary latch should guarantee that there is at
most one data dictionary transaction active at a time. */
trx_rollback_or_clean_recovered(FALSE);
+}
- /* Drop partially created indexes. */
- row_merge_drop_temp_indexes();
+/********************************************************//**
+Initiates the rollback of active transactions. */
+UNIV_INTERN
+void
+recv_recovery_rollback_active(void)
+/*===============================*/
+{
+ int i;
#ifdef UNIV_SYNC_DEBUG
/* Wait for a while so that created threads have time to suspend
@@ -3271,6 +3304,11 @@ recv_recovery_from_checkpoint_finish(void)
/* Switch latching order checks on in sync0sync.c */
sync_order_checks_on = TRUE;
#endif
+ /* Drop partially created indexes. */
+ row_merge_drop_temp_indexes();
+ /* Drop temporary tables. */
+ row_mysql_drop_temp_tables();
+
if (srv_force_recovery < SRV_FORCE_NO_TRX_UNDO) {
/* Rollback the uncommitted transactions which have no user
session */
diff --git a/storage/innodb_plugin/mem/mem0dbg.c b/storage/innodb_plugin/mem/mem0dbg.c
index 01eda20ec45..1cd2ff15bab 100644
--- a/storage/innodb_plugin/mem/mem0dbg.c
+++ b/storage/innodb_plugin/mem/mem0dbg.c
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 1994, 2010, Innobase Oy. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -180,6 +180,10 @@ mem_close(void)
{
mem_pool_free(mem_comm_pool);
mem_comm_pool = NULL;
+#ifdef UNIV_MEM_DEBUG
+ mutex_free(&mem_hash_mutex);
+ mem_hash_initialized = FALSE;
+#endif /* UNIV_MEM_DEBUG */
}
#endif /* !UNIV_HOTBACKUP */
diff --git a/storage/innodb_plugin/mem/mem0mem.c b/storage/innodb_plugin/mem/mem0mem.c
index ccb2fd8a7b4..c0ce8a3e1ac 100644
--- a/storage/innodb_plugin/mem/mem0mem.c
+++ b/storage/innodb_plugin/mem/mem0mem.c
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 1994, 2010, Innobase Oy. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -383,6 +383,20 @@ mem_heap_create_block(
mem_block_set_free(block, MEM_BLOCK_HEADER_SIZE);
mem_block_set_start(block, MEM_BLOCK_HEADER_SIZE);
+ if (UNIV_UNLIKELY(heap == NULL)) {
+ /* This is the first block of the heap. The field
+ total_size should be initialized here */
+ block->total_size = len;
+ } else {
+ /* Not the first allocation for the heap. This block's
+ total_length field should be set to undefined. */
+ ut_d(block->total_size = ULINT_UNDEFINED);
+ UNIV_MEM_INVALID(&block->total_size,
+ sizeof block->total_size);
+
+ heap->total_size += len;
+ }
+
ut_ad((ulint)MEM_BLOCK_HEADER_SIZE < len);
return(block);
@@ -471,6 +485,10 @@ mem_heap_block_free(
mem_pool_mutex_exit();
#endif
+
+ ut_ad(heap->total_size >= block->len);
+ heap->total_size -= block->len;
+
type = heap->type;
len = block->len;
block->magic_n = MEM_FREED_BLOCK_MAGIC_N;
diff --git a/storage/innodb_plugin/os/os0file.c b/storage/innodb_plugin/os/os0file.c
index 37edad442db..b244e3974b3 100644
--- a/storage/innodb_plugin/os/os0file.c
+++ b/storage/innodb_plugin/os/os0file.c
@@ -1,23 +1,6 @@
-/*****************************************************************************
-
-Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc., 59 Temple
-Place, Suite 330, Boston, MA 02111-1307 USA
-
-*****************************************************************************/
/***********************************************************************
-Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 1995, 2010, Innobase Oy. All Rights Reserved.
Copyright (c) 2009, Percona Inc.
Portions of this file contain modifications contributed and copyrighted
@@ -806,7 +789,15 @@ next_file:
#ifdef HAVE_READDIR_R
ret = readdir_r(dir, (struct dirent*)dirent_buf, &ent);
- if (ret != 0) {
+ if (ret != 0
+#ifdef UNIV_AIX
+ /* On AIX, only if we got non-NULL 'ent' (result) value and
+ a non-zero 'ret' (return) value, it indicates a failed
+ readdir_r() call. An NULL 'ent' with an non-zero 'ret'
+ would indicate the "end of the directory" is reached. */
+ && ent != NULL
+#endif
+ ) {
fprintf(stderr,
"InnoDB: cannot read directory %s, error %lu\n",
dirname, (ulong)ret);
@@ -3923,6 +3914,9 @@ os_aio_simulated_handle(
ulint n;
ulint i;
+ /* Fix compiler warning */
+ *consecutive_ios = NULL;
+
segment = os_aio_get_array_and_local_segment(&array, global_segment);
restart:
diff --git a/storage/innodb_plugin/page/page0page.c b/storage/innodb_plugin/page/page0page.c
index ab2ba60570e..10008f9ac25 100644
--- a/storage/innodb_plugin/page/page0page.c
+++ b/storage/innodb_plugin/page/page0page.c
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 1994, 2010, Innobase Oy. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -658,6 +658,14 @@ page_copy_rec_list_end(
index, mtr);
}
+ /* Update PAGE_MAX_TRX_ID on the uncompressed page.
+ Modifications will be redo logged and copied to the compressed
+ page in page_zip_compress() or page_zip_reorganize() below. */
+ if (dict_index_is_sec_or_ibuf(index) && page_is_leaf(page)) {
+ page_update_max_trx_id(new_block, NULL,
+ page_get_max_trx_id(page), mtr);
+ }
+
if (UNIV_LIKELY_NULL(new_page_zip)) {
mtr_set_log_mode(mtr, log_mode);
@@ -696,15 +704,10 @@ page_copy_rec_list_end(
}
}
- /* Update the lock table, MAX_TRX_ID, and possible hash index */
+ /* Update the lock table and possible hash index */
lock_move_rec_list_end(new_block, block, rec);
- if (dict_index_is_sec_or_ibuf(index) && page_is_leaf(page)) {
- page_update_max_trx_id(new_block, new_page_zip,
- page_get_max_trx_id(page), mtr);
- }
-
btr_search_move_or_delete_hash_entries(new_block, block, index);
return(ret);
@@ -772,6 +775,16 @@ page_copy_rec_list_start(
mem_heap_free(heap);
}
+ /* Update PAGE_MAX_TRX_ID on the uncompressed page.
+ Modifications will be redo logged and copied to the compressed
+ page in page_zip_compress() or page_zip_reorganize() below. */
+ if (dict_index_is_sec_or_ibuf(index)
+ && page_is_leaf(page_align(rec))) {
+ page_update_max_trx_id(new_block, NULL,
+ page_get_max_trx_id(page_align(rec)),
+ mtr);
+ }
+
if (UNIV_LIKELY_NULL(new_page_zip)) {
mtr_set_log_mode(mtr, log_mode);
@@ -809,14 +822,7 @@ page_copy_rec_list_start(
}
}
- /* Update MAX_TRX_ID, the lock table, and possible hash index */
-
- if (dict_index_is_sec_or_ibuf(index)
- && page_is_leaf(page_align(rec))) {
- page_update_max_trx_id(new_block, new_page_zip,
- page_get_max_trx_id(page_align(rec)),
- mtr);
- }
+ /* Update the lock table and possible hash index */
lock_move_rec_list_start(new_block, block, rec, ret);
@@ -2408,8 +2414,13 @@ page_validate(
}
offs = page_offset(rec_get_start(rec, offsets));
+ i = rec_offs_size(offsets);
+ if (UNIV_UNLIKELY(offs + i >= UNIV_PAGE_SIZE)) {
+ fputs("InnoDB: record offset out of bounds\n", stderr);
+ goto func_exit;
+ }
- for (i = rec_offs_size(offsets); i--; ) {
+ while (i--) {
if (UNIV_UNLIKELY(buf[offs + i])) {
/* No other record may overlap this */
@@ -2517,8 +2528,13 @@ n_owned_zero:
count++;
offs = page_offset(rec_get_start(rec, offsets));
+ i = rec_offs_size(offsets);
+ if (UNIV_UNLIKELY(offs + i >= UNIV_PAGE_SIZE)) {
+ fputs("InnoDB: record offset out of bounds\n", stderr);
+ goto func_exit;
+ }
- for (i = rec_offs_size(offsets); i--; ) {
+ while (i--) {
if (UNIV_UNLIKELY(buf[offs + i])) {
fputs("InnoDB: Record overlaps another"
diff --git a/storage/innodb_plugin/plug.in.disabled b/storage/innodb_plugin/plug.in.disabled
index 4ebde50cb35..9aed029cfc2 100644
--- a/storage/innodb_plugin/plug.in.disabled
+++ b/storage/innodb_plugin/plug.in.disabled
@@ -1,5 +1,5 @@
#
-# Copyright (c) 2006, 2009, Innobase Oy. All Rights Reserved.
+# Copyright (c) 2006, 2010, Innobase Oy. All Rights Reserved.
#
# This program is free software; you can redistribute it and/or modify it under
# the terms of the GNU General Public License as published by the Free Software
diff --git a/storage/innodb_plugin/rem/rem0rec.c b/storage/innodb_plugin/rem/rem0rec.c
index 1c8b3fd8c1e..27c11dacc8c 100644
--- a/storage/innodb_plugin/rem/rem0rec.c
+++ b/storage/innodb_plugin/rem/rem0rec.c
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 1994, 2010, Innobase Oy. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -695,19 +695,9 @@ rec_get_nth_field_offs_old(
ulint os;
ulint next_os;
- ut_ad(rec && len);
- ut_ad(n < rec_get_n_fields_old(rec));
-
- if (UNIV_UNLIKELY(n > REC_MAX_N_FIELDS)) {
- fprintf(stderr, "Error: trying to access field %lu in rec\n",
- (ulong) n);
- ut_error;
- }
-
- if (UNIV_UNLIKELY(rec == NULL)) {
- fputs("Error: rec is NULL pointer\n", stderr);
- ut_error;
- }
+ ut_ad(len);
+ ut_a(rec);
+ ut_a(n < rec_get_n_fields_old(rec));
if (rec_get_1byte_offs_flag(rec)) {
os = rec_1_get_field_start_offs(rec, n);
diff --git a/storage/innodb_plugin/row/row0ins.c b/storage/innodb_plugin/row/row0ins.c
index fe51fce82c4..230dc45dadc 100644
--- a/storage/innodb_plugin/row/row0ins.c
+++ b/storage/innodb_plugin/row/row0ins.c
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 1996, 2010, Innobase Oy. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -1991,7 +1991,7 @@ row_ins_index_entry_low(
btr_cur_search_to_nth_level(index, 0, entry, PAGE_CUR_LE,
mode | BTR_INSERT | ignore_sec_unique,
- &cursor, 0, &mtr);
+ &cursor, 0, __FILE__, __LINE__, &mtr);
if (cursor.flag == BTR_CUR_INSERT_TO_IBUF) {
/* The insertion was made to the insert buffer already during
@@ -2049,7 +2049,8 @@ row_ins_index_entry_low(
btr_cur_search_to_nth_level(index, 0, entry,
PAGE_CUR_LE,
mode | BTR_INSERT,
- &cursor, 0, &mtr);
+ &cursor, 0,
+ __FILE__, __LINE__, &mtr);
}
}
@@ -2104,7 +2105,8 @@ function_exit:
mtr_start(&mtr);
btr_cur_search_to_nth_level(index, 0, entry, PAGE_CUR_LE,
- BTR_MODIFY_TREE, &cursor, 0, &mtr);
+ BTR_MODIFY_TREE, &cursor, 0,
+ __FILE__, __LINE__, &mtr);
rec = btr_cur_get_rec(&cursor);
offsets = rec_get_offsets(rec, index, NULL,
ULINT_UNDEFINED, &heap);
diff --git a/storage/innodb_plugin/row/row0merge.c b/storage/innodb_plugin/row/row0merge.c
index 25f041c0885..88a2770032d 100644
--- a/storage/innodb_plugin/row/row0merge.c
+++ b/storage/innodb_plugin/row/row0merge.c
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 2005, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 2005, 2010, Innobase Oy. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -424,14 +424,13 @@ row_merge_dup_report(
row_merge_dup_t* dup, /*!< in/out: for reporting duplicates */
const dfield_t* entry) /*!< in: duplicate index entry */
{
- mrec_buf_t buf;
+ mrec_buf_t* buf;
const dtuple_t* tuple;
dtuple_t tuple_store;
const rec_t* rec;
const dict_index_t* index = dup->index;
ulint n_fields= dict_index_get_n_fields(index);
- mem_heap_t* heap = NULL;
- ulint offsets_[REC_OFFS_NORMAL_SIZE];
+ mem_heap_t* heap;
ulint* offsets;
ulint n_ext;
@@ -441,22 +440,22 @@ row_merge_dup_report(
return;
}
- rec_offs_init(offsets_);
-
/* Convert the tuple to a record and then to MySQL format. */
+ heap = mem_heap_create((1 + REC_OFFS_HEADER_SIZE + n_fields)
+ * sizeof *offsets
+ + sizeof *buf);
+
+ buf = mem_heap_alloc(heap, sizeof *buf);
tuple = dtuple_from_fields(&tuple_store, entry, n_fields);
n_ext = dict_index_is_clust(index) ? dtuple_get_n_ext(tuple) : 0;
- rec = rec_convert_dtuple_to_rec(buf, index, tuple, n_ext);
- offsets = rec_get_offsets(rec, index, offsets_, ULINT_UNDEFINED,
- &heap);
+ rec = rec_convert_dtuple_to_rec(*buf, index, tuple, n_ext);
+ offsets = rec_get_offsets(rec, index, NULL, ULINT_UNDEFINED, &heap);
innobase_rec_to_mysql(dup->table, rec, index, offsets);
- if (UNIV_LIKELY_NULL(heap)) {
- mem_heap_free(heap);
- }
+ mem_heap_free(heap);
}
/*************************************************************//**
@@ -627,22 +626,26 @@ row_merge_buf_write(
}
/******************************************************//**
-Create a memory heap and allocate space for row_merge_rec_offsets().
+Create a memory heap and allocate space for row_merge_rec_offsets()
+and mrec_buf_t[3].
@return memory heap */
static
mem_heap_t*
row_merge_heap_create(
/*==================*/
const dict_index_t* index, /*!< in: record descriptor */
+ mrec_buf_t** buf, /*!< out: 3 buffers */
ulint** offsets1, /*!< out: offsets */
ulint** offsets2) /*!< out: offsets */
{
ulint i = 1 + REC_OFFS_HEADER_SIZE
+ dict_index_get_n_fields(index);
- mem_heap_t* heap = mem_heap_create(2 * i * sizeof *offsets1);
+ mem_heap_t* heap = mem_heap_create(2 * i * sizeof **offsets1
+ + 3 * sizeof **buf);
- *offsets1 = mem_heap_alloc(heap, i * sizeof *offsets1);
- *offsets2 = mem_heap_alloc(heap, i * sizeof *offsets2);
+ *buf = mem_heap_alloc(heap, 3 * sizeof **buf);
+ *offsets1 = mem_heap_alloc(heap, i * sizeof **offsets1);
+ *offsets2 = mem_heap_alloc(heap, i * sizeof **offsets2);
(*offsets1)[0] = (*offsets2)[0] = i;
(*offsets1)[1] = (*offsets2)[1] = dict_index_get_n_fields(index);
@@ -1394,7 +1397,8 @@ row_merge_blocks(
{
mem_heap_t* heap; /*!< memory heap for offsets0, offsets1 */
- mrec_buf_t buf[3]; /*!< buffer for handling split mrec in block[] */
+ mrec_buf_t* buf; /*!< buffer for handling
+ split mrec in block[] */
const byte* b0; /*!< pointer to block[0] */
const byte* b1; /*!< pointer to block[1] */
byte* b2; /*!< pointer to block[2] */
@@ -1414,7 +1418,7 @@ row_merge_blocks(
}
#endif /* UNIV_DEBUG */
- heap = row_merge_heap_create(index, &offsets0, &offsets1);
+ heap = row_merge_heap_create(index, &buf, &offsets0, &offsets1);
/* Write a record and read the next record. Split the output
file in two halves, which can be merged on the following pass. */
@@ -1500,7 +1504,7 @@ row_merge_blocks_copy(
{
mem_heap_t* heap; /*!< memory heap for offsets0, offsets1 */
- mrec_buf_t buf[3]; /*!< buffer for handling
+ mrec_buf_t* buf; /*!< buffer for handling
split mrec in block[] */
const byte* b0; /*!< pointer to block[0] */
byte* b2; /*!< pointer to block[2] */
@@ -1518,7 +1522,7 @@ row_merge_blocks_copy(
}
#endif /* UNIV_DEBUG */
- heap = row_merge_heap_create(index, &offsets0, &offsets1);
+ heap = row_merge_heap_create(index, &buf, &offsets0, &offsets1);
/* Write a record and read the next record. Split the output
file in two halves, which can be merged on the following pass. */
@@ -1760,7 +1764,6 @@ row_merge_insert_index_tuples(
int fd, /*!< in: file descriptor */
row_merge_block_t* block) /*!< in/out: file buffer */
{
- mrec_buf_t buf;
const byte* b;
que_thr_t* thr;
ins_node_t* node;
@@ -1779,7 +1782,7 @@ row_merge_insert_index_tuples(
trx->op_info = "inserting index entries";
- graph_heap = mem_heap_create(500);
+ graph_heap = mem_heap_create(500 + sizeof(mrec_buf_t));
node = ins_node_create(INS_DIRECT, table, graph_heap);
thr = pars_complete_graph_for_exec(node, trx, graph_heap);
@@ -1801,12 +1804,14 @@ row_merge_insert_index_tuples(
if (!row_merge_read(fd, foffs, block)) {
error = DB_CORRUPTION;
} else {
+ mrec_buf_t* buf = mem_heap_alloc(graph_heap, sizeof *buf);
+
for (;;) {
const mrec_t* mrec;
dtuple_t* dtuple;
ulint n_ext;
- b = row_merge_read_rec(block, &buf, b, index,
+ b = row_merge_read_rec(block, buf, b, index,
fd, &foffs, &mrec, offsets);
if (UNIV_UNLIKELY(!b)) {
/* End of list, or I/O error */
@@ -1977,14 +1982,12 @@ row_merge_drop_index(
/* Drop the field definitions of the index. */
"DELETE FROM SYS_FIELDS WHERE INDEX_ID = :indexid;\n"
/* Drop the index definition and the B-tree. */
- "DELETE FROM SYS_INDEXES WHERE ID = :indexid\n"
- " AND TABLE_ID = :tableid;\n"
+ "DELETE FROM SYS_INDEXES WHERE ID = :indexid;\n"
"END;\n";
ut_ad(index && table && trx);
pars_info_add_dulint_literal(info, "indexid", index->id);
- pars_info_add_dulint_literal(info, "tableid", table->id);
trx_start_if_not_started(trx);
trx->op_info = "dropping index";
@@ -2033,47 +2036,79 @@ row_merge_drop_temp_indexes(void)
/*=============================*/
{
trx_t* trx;
- ulint err;
-
- /* We use the private SQL parser of Innobase to generate the
- query graphs needed in deleting the dictionary data from system
- tables in Innobase. Deleting a row from SYS_INDEXES table also
- frees the file segments of the B-tree associated with the index. */
- static const char drop_temp_indexes[] =
- "PROCEDURE DROP_TEMP_INDEXES_PROC () IS\n"
- "indexid CHAR;\n"
- "DECLARE CURSOR c IS SELECT ID FROM SYS_INDEXES\n"
- "WHERE SUBSTR(NAME,0,1)='" TEMP_INDEX_PREFIX_STR "';\n"
- "BEGIN\n"
- "\tOPEN c;\n"
- "\tWHILE 1=1 LOOP\n"
- "\t\tFETCH c INTO indexid;\n"
- "\t\tIF (SQL % NOTFOUND) THEN\n"
- "\t\t\tEXIT;\n"
- "\t\tEND IF;\n"
- "\t\tDELETE FROM SYS_FIELDS WHERE INDEX_ID = indexid;\n"
- "\t\tDELETE FROM SYS_INDEXES WHERE ID = indexid;\n"
- "\tEND LOOP;\n"
- "\tCLOSE c;\n"
- "\tCOMMIT WORK;\n"
- "END;\n";
+ btr_pcur_t pcur;
+ mtr_t mtr;
+ /* Load the table definitions that contain partially defined
+ indexes, so that the data dictionary information can be checked
+ when accessing the tablename.ibd files. */
trx = trx_allocate_for_background();
trx->op_info = "dropping partially created indexes";
row_mysql_lock_data_dictionary(trx);
- /* Incomplete transactions may be holding some locks on the
- data dictionary tables. However, they should never have been
- able to lock the records corresponding to the partially
- created indexes that we are attempting to delete, because the
- table was locked when the indexes were being created. We will
- drop the partially created indexes before the rollback of
- incomplete transactions is initiated. Thus, this should not
- interfere with the incomplete transactions. */
- trx->isolation_level = TRX_ISO_READ_UNCOMMITTED;
- err = que_eval_sql(NULL, drop_temp_indexes, FALSE, trx);
- ut_a(err == DB_SUCCESS);
+ mtr_start(&mtr);
+
+ btr_pcur_open_at_index_side(
+ TRUE,
+ dict_table_get_first_index(dict_sys->sys_indexes),
+ BTR_SEARCH_LEAF, &pcur, TRUE, &mtr);
+
+ for (;;) {
+ const rec_t* rec;
+ const byte* field;
+ ulint len;
+ dulint table_id;
+ dict_table_t* table;
+ btr_pcur_move_to_next_user_rec(&pcur, &mtr);
+
+ if (!btr_pcur_is_on_user_rec(&pcur)) {
+ break;
+ }
+
+ rec = btr_pcur_get_rec(&pcur);
+ field = rec_get_nth_field_old(rec, DICT_SYS_INDEXES_NAME_FIELD,
+ &len);
+ if (len == UNIV_SQL_NULL || len == 0
+ || mach_read_from_1(field) != (ulint) TEMP_INDEX_PREFIX) {
+ continue;
+ }
+
+ /* This is a temporary index. */
+
+ field = rec_get_nth_field_old(rec, 0/*TABLE_ID*/, &len);
+ if (len != 8) {
+ /* Corrupted TABLE_ID */
+ continue;
+ }
+
+ table_id = mach_read_from_8(field);
+
+ btr_pcur_store_position(&pcur, &mtr);
+ btr_pcur_commit_specify_mtr(&pcur, &mtr);
+
+ table = dict_load_table_on_id(table_id);
+
+ if (table) {
+ dict_index_t* index;
+
+ for (index = dict_table_get_first_index(table);
+ index; index = dict_table_get_next_index(index)) {
+
+ if (*index->name == TEMP_INDEX_PREFIX) {
+ row_merge_drop_index(index, table, trx);
+ trx_commit_for_mysql(trx);
+ }
+ }
+ }
+
+ mtr_start(&mtr);
+ btr_pcur_restore_position(BTR_SEARCH_LEAF,
+ &pcur, &mtr);
+ }
+
+ btr_pcur_close(&pcur);
+ mtr_commit(&mtr);
row_mysql_unlock_data_dictionary(trx);
trx_free_for_background(trx);
}
diff --git a/storage/innodb_plugin/row/row0mysql.c b/storage/innodb_plugin/row/row0mysql.c
index 181c39de881..0d8d298453c 100644
--- a/storage/innodb_plugin/row/row0mysql.c
+++ b/storage/innodb_plugin/row/row0mysql.c
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 2000, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 2000, 2010, Innobase Oy. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -485,7 +485,7 @@ next_column:
/****************************************************************//**
Handles user errors and lock waits detected by the database engine.
@return TRUE if it was a lock wait and we should continue running the
-query thread */
+query thread and in that case the thr is ALREADY in the running state. */
UNIV_INTERN
ibool
row_mysql_handle_errors(
@@ -3255,19 +3255,13 @@ check_next_foreign:
"END;\n"
, FALSE, trx);
- if (err != DB_SUCCESS) {
- ut_a(err == DB_OUT_OF_FILE_SPACE);
-
- err = DB_MUST_GET_MORE_FILE_SPACE;
-
- row_mysql_handle_errors(&err, trx, NULL, NULL);
-
- ut_error;
- } else {
- ibool is_path;
+ switch (err) {
+ ibool is_temp;
const char* name_or_path;
mem_heap_t* heap;
+ case DB_SUCCESS:
+
heap = mem_heap_create(200);
/* Clone the name, in case it has been allocated
@@ -3277,12 +3271,13 @@ check_next_foreign:
space_id = table->space;
if (table->dir_path_of_temp_table != NULL) {
- is_path = TRUE;
name_or_path = mem_heap_strdup(
heap, table->dir_path_of_temp_table);
+ is_temp = TRUE;
} else {
- is_path = FALSE;
name_or_path = name;
+ is_temp = (table->flags >> DICT_TF2_SHIFT)
+ & DICT_TF2_TEMPORARY;
}
dict_table_remove_from_cache(table);
@@ -3302,8 +3297,8 @@ check_next_foreign:
if (err == DB_SUCCESS && space_id > 0) {
if (!fil_space_for_table_exists_in_mem(space_id,
name_or_path,
- is_path,
- FALSE, TRUE)) {
+ is_temp, FALSE,
+ !is_temp)) {
err = DB_SUCCESS;
fprintf(stderr,
@@ -3332,7 +3327,27 @@ check_next_foreign:
}
mem_heap_free(heap);
+ break;
+
+ case DB_TOO_MANY_CONCURRENT_TRXS:
+ /* Cannot even find a free slot for the
+ the undo log. We can directly exit here
+ and return the DB_TOO_MANY_CONCURRENT_TRXS
+ error. */
+ break;
+
+ case DB_OUT_OF_FILE_SPACE:
+ err = DB_MUST_GET_MORE_FILE_SPACE;
+
+ row_mysql_handle_errors(&err, trx, NULL, NULL);
+
+ /* Fall through to raise error */
+
+ default:
+ /* No other possible error returns */
+ ut_error;
}
+
funct_exit:
if (locked_dictionary) {
@@ -3348,6 +3363,90 @@ funct_exit:
return((int) err);
}
+/*********************************************************************//**
+Drop all temporary tables during crash recovery. */
+UNIV_INTERN
+void
+row_mysql_drop_temp_tables(void)
+/*============================*/
+{
+ trx_t* trx;
+ btr_pcur_t pcur;
+ mtr_t mtr;
+ mem_heap_t* heap;
+
+ trx = trx_allocate_for_background();
+ trx->op_info = "dropping temporary tables";
+ row_mysql_lock_data_dictionary(trx);
+
+ heap = mem_heap_create(200);
+
+ mtr_start(&mtr);
+
+ btr_pcur_open_at_index_side(
+ TRUE,
+ dict_table_get_first_index(dict_sys->sys_tables),
+ BTR_SEARCH_LEAF, &pcur, TRUE, &mtr);
+
+ for (;;) {
+ const rec_t* rec;
+ const byte* field;
+ ulint len;
+ const char* table_name;
+ dict_table_t* table;
+
+ btr_pcur_move_to_next_user_rec(&pcur, &mtr);
+
+ if (!btr_pcur_is_on_user_rec(&pcur)) {
+ break;
+ }
+
+ rec = btr_pcur_get_rec(&pcur);
+ field = rec_get_nth_field_old(rec, 4/*N_COLS*/, &len);
+ if (len != 4 || !(mach_read_from_4(field) & 0x80000000UL)) {
+ continue;
+ }
+
+ /* Because this is not a ROW_FORMAT=REDUNDANT table,
+ the is_temp flag is valid. Examine it. */
+
+ field = rec_get_nth_field_old(rec, 7/*MIX_LEN*/, &len);
+ if (len != 4
+ || !(mach_read_from_4(field) & DICT_TF2_TEMPORARY)) {
+ continue;
+ }
+
+ /* This is a temporary table. */
+ field = rec_get_nth_field_old(rec, 0/*NAME*/, &len);
+ if (len == UNIV_SQL_NULL || len == 0) {
+ /* Corrupted SYS_TABLES.NAME */
+ continue;
+ }
+
+ table_name = mem_heap_strdupl(heap, (const char*) field, len);
+
+ btr_pcur_store_position(&pcur, &mtr);
+ btr_pcur_commit_specify_mtr(&pcur, &mtr);
+
+ table = dict_load_table(table_name);
+
+ if (table) {
+ row_drop_table_for_mysql(table_name, trx, FALSE);
+ trx_commit_for_mysql(trx);
+ }
+
+ mtr_start(&mtr);
+ btr_pcur_restore_position(BTR_SEARCH_LEAF,
+ &pcur, &mtr);
+ }
+
+ btr_pcur_close(&pcur);
+ mtr_commit(&mtr);
+ mem_heap_free(heap);
+ row_mysql_unlock_data_dictionary(trx);
+ trx_free_for_background(trx);
+}
+
/*******************************************************************//**
Drop all foreign keys in a database, see Bug#18942.
Called at the end of row_drop_database_for_mysql().
@@ -3899,14 +3998,15 @@ Checks that the index contains entries in an ascending order, unique
constraint is not broken, and calculates the number of index entries
in the read view of the current transaction.
@return TRUE if ok */
-static
+UNIV_INTERN
ibool
-row_scan_and_check_index(
-/*=====================*/
- row_prebuilt_t* prebuilt, /*!< in: prebuilt struct in MySQL */
- dict_index_t* index, /*!< in: index */
- ulint* n_rows) /*!< out: number of entries seen in the
- current consistent read */
+row_check_index_for_mysql(
+/*======================*/
+ row_prebuilt_t* prebuilt, /*!< in: prebuilt struct
+ in MySQL handle */
+ const dict_index_t* index, /*!< in: index */
+ ulint* n_rows) /*!< out: number of entries
+ seen in the consistent read */
{
dtuple_t* prev_entry = NULL;
ulint matched_fields;
@@ -3927,31 +4027,9 @@ row_scan_and_check_index(
*n_rows = 0;
- if (!row_merge_is_index_usable(prebuilt->trx, index)) {
- /* A newly created index may lack some delete-marked
- records that may exist in the read view of
- prebuilt->trx. Thus, such indexes must not be
- accessed by consistent read. */
- return(is_ok);
- }
-
buf = mem_alloc(UNIV_PAGE_SIZE);
heap = mem_heap_create(100);
- /* Make a dummy template in prebuilt, which we will use
- in scanning the index entries */
-
- prebuilt->index = index;
- /* row_merge_is_index_usable() was already checked above. */
- prebuilt->index_usable = TRUE;
- prebuilt->sql_stat_start = TRUE;
- prebuilt->template_type = ROW_MYSQL_DUMMY_TEMPLATE;
- prebuilt->n_template = 0;
- prebuilt->need_to_access_clustered = FALSE;
-
- dtuple_set_n_fields(prebuilt->search_tuple, 0);
-
- prebuilt->select_lock_type = LOCK_NONE;
cnt = 1000;
ret = row_search_for_mysql(buf, PAGE_CUR_G, prebuilt, 0, 0);
@@ -4070,119 +4148,6 @@ not_ok:
}
/*********************************************************************//**
-Checks a table for corruption.
-@return DB_ERROR or DB_SUCCESS */
-UNIV_INTERN
-ulint
-row_check_table_for_mysql(
-/*======================*/
- row_prebuilt_t* prebuilt) /*!< in: prebuilt struct in MySQL
- handle */
-{
- dict_table_t* table = prebuilt->table;
- dict_index_t* index;
- ulint n_rows;
- ulint n_rows_in_table = ULINT_UNDEFINED;
- ulint ret = DB_SUCCESS;
- ulint old_isolation_level;
-
- if (table->ibd_file_missing) {
- ut_print_timestamp(stderr);
- fprintf(stderr, " InnoDB: Error:\n"
- "InnoDB: MySQL is trying to use a table handle"
- " but the .ibd file for\n"
- "InnoDB: table %s does not exist.\n"
- "InnoDB: Have you deleted the .ibd file"
- " from the database directory under\n"
- "InnoDB: the MySQL datadir, or have you"
- " used DISCARD TABLESPACE?\n"
- "InnoDB: Look from\n"
- "InnoDB: " REFMAN "innodb-troubleshooting.html\n"
- "InnoDB: how you can resolve the problem.\n",
- table->name);
- return(DB_ERROR);
- }
-
- prebuilt->trx->op_info = "checking table";
-
- old_isolation_level = prebuilt->trx->isolation_level;
-
- /* We must run the index record counts at an isolation level
- >= READ COMMITTED, because a dirty read can see a wrong number
- of records in some index; to play safe, we use always
- REPEATABLE READ here */
-
- prebuilt->trx->isolation_level = TRX_ISO_REPEATABLE_READ;
-
- /* Enlarge the fatal lock wait timeout during CHECK TABLE. */
- mutex_enter(&kernel_mutex);
- srv_fatal_semaphore_wait_threshold += 7200; /* 2 hours */
- mutex_exit(&kernel_mutex);
-
- index = dict_table_get_first_index(table);
-
- while (index != NULL) {
- /* fputs("Validating index ", stderr);
- ut_print_name(stderr, trx, FALSE, index->name);
- putc('\n', stderr); */
-
- if (!btr_validate_index(index, prebuilt->trx)) {
- ret = DB_ERROR;
- } else {
- if (!row_scan_and_check_index(prebuilt,index, &n_rows)){
- ret = DB_ERROR;
- }
-
- if (trx_is_interrupted(prebuilt->trx)) {
- ret = DB_INTERRUPTED;
- break;
- }
-
- /* fprintf(stderr, "%lu entries in index %s\n", n_rows,
- index->name); */
-
- if (index == dict_table_get_first_index(table)) {
- n_rows_in_table = n_rows;
- } else if (n_rows != n_rows_in_table) {
-
- ret = DB_ERROR;
-
- fputs("Error: ", stderr);
- dict_index_name_print(stderr,
- prebuilt->trx, index);
- fprintf(stderr,
- " contains %lu entries,"
- " should be %lu\n",
- (ulong) n_rows,
- (ulong) n_rows_in_table);
- }
- }
-
- index = dict_table_get_next_index(index);
- }
-
- /* Restore the original isolation level */
- prebuilt->trx->isolation_level = old_isolation_level;
-
- /* We validate also the whole adaptive hash index for all tables
- at every CHECK TABLE */
-
- if (!btr_search_validate()) {
-
- ret = DB_ERROR;
- }
-
- /* Restore the fatal lock wait timeout after CHECK TABLE. */
- mutex_enter(&kernel_mutex);
- srv_fatal_semaphore_wait_threshold -= 7200; /* 2 hours */
- mutex_exit(&kernel_mutex);
-
- prebuilt->trx->op_info = "";
-
- return(ret);
-}
-
-/*********************************************************************//**
Determines if a table is a magic monitor table.
@return TRUE if monitor table */
UNIV_INTERN
diff --git a/storage/innodb_plugin/row/row0row.c b/storage/innodb_plugin/row/row0row.c
index 128ac3ba3e8..cb7dfa2b7c9 100644
--- a/storage/innodb_plugin/row/row0row.c
+++ b/storage/innodb_plugin/row/row0row.c
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 1996, 2010, Innobase Oy. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -915,6 +915,10 @@ row_raw_format(
ret = row_raw_format_int(data, data_len, prtype,
buf, buf_size, &format_in_hex);
+ if (format_in_hex) {
+
+ goto format_in_hex;
+ }
break;
case DATA_CHAR:
case DATA_VARCHAR:
@@ -923,14 +927,15 @@ row_raw_format(
ret = row_raw_format_str(data, data_len, prtype,
buf, buf_size, &format_in_hex);
+ if (format_in_hex) {
+
+ goto format_in_hex;
+ }
+
break;
/* XXX support more data types */
default:
-
- format_in_hex = TRUE;
- }
-
- if (format_in_hex) {
+ format_in_hex:
if (UNIV_LIKELY(buf_size > 2)) {
diff --git a/storage/innodb_plugin/row/row0sel.c b/storage/innodb_plugin/row/row0sel.c
index 3ef9726588e..78318bf6461 100644
--- a/storage/innodb_plugin/row/row0sel.c
+++ b/storage/innodb_plugin/row/row0sel.c
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1997, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 1997, 2010, Innobase Oy. All Rights Reserved.
Copyright (c) 2008, Google Inc.
Portions of this file contain modifications contributed and copyrighted by
@@ -132,7 +132,8 @@ index record.
NOTE: the comparison is NOT done as a binary comparison, but character
fields are compared with collation!
@return TRUE if the secondary record is equal to the corresponding
-fields in the clustered record, when compared with collation */
+fields in the clustered record, when compared with collation;
+FALSE if not equal or if the clustered record has been marked for deletion */
static
ibool
row_sel_sec_rec_is_for_clust_rec(
@@ -431,10 +432,6 @@ row_sel_fetch_columns(
data = rec_get_nth_field(rec, offsets,
field_no, &len);
- if (len == UNIV_SQL_NULL) {
- len = UNIV_SQL_NULL;
- }
-
needs_copy = column->copy_val;
}
@@ -2170,36 +2167,6 @@ row_fetch_print(
return((void*)42);
}
-/****************************************************************//**
-Callback function for fetch that stores an unsigned 4 byte integer to the
-location pointed. The column's type must be DATA_INT, DATA_UNSIGNED, length
-= 4.
-@return always returns NULL */
-UNIV_INTERN
-void*
-row_fetch_store_uint4(
-/*==================*/
- void* row, /*!< in: sel_node_t* */
- void* user_arg) /*!< in: data pointer */
-{
- sel_node_t* node = row;
- ib_uint32_t* val = user_arg;
- ulint tmp;
-
- dfield_t* dfield = que_node_get_val(node->select_list);
- const dtype_t* type = dfield_get_type(dfield);
- ulint len = dfield_get_len(dfield);
-
- ut_a(dtype_get_mtype(type) == DATA_INT);
- ut_a(dtype_get_prtype(type) & DATA_UNSIGNED);
- ut_a(len == 4);
-
- tmp = mach_read_from_4(dfield_get_data(dfield));
- *val = (ib_uint32_t) tmp;
-
- return(NULL);
-}
-
/***********************************************************//**
Prints a row in a select result.
@return query thread to run next or NULL */
@@ -2981,6 +2948,7 @@ row_sel_get_clust_rec_for_mysql(
if (clust_rec
&& (old_vers
+ || trx->isolation_level <= TRX_ISO_READ_UNCOMMITTED
|| rec_get_deleted_flag(rec, dict_table_is_comp(
sec_index->table)))
&& !row_sel_sec_rec_is_for_clust_rec(
@@ -3202,14 +3170,17 @@ row_sel_try_search_shortcut_for_mysql(
ut_ad(dict_index_is_clust(index));
ut_ad(!prebuilt->templ_contains_blob);
+#ifndef UNIV_SEARCH_DEBUG
btr_pcur_open_with_no_init(index, search_tuple, PAGE_CUR_GE,
BTR_SEARCH_LEAF, pcur,
-#ifndef UNIV_SEARCH_DEBUG
RW_S_LATCH,
-#else
+ mtr);
+#else /* UNIV_SEARCH_DEBUG */
+ btr_pcur_open_with_no_init(index, search_tuple, PAGE_CUR_GE,
+ BTR_SEARCH_LEAF, pcur,
0,
-#endif
mtr);
+#endif /* UNIV_SEARCH_DEBUG */
rec = btr_pcur_get_rec(pcur);
if (!page_rec_is_user_rec(rec)) {
@@ -4616,6 +4587,7 @@ row_search_autoinc_read_column(
dict_index_t* index, /*!< in: index to read from */
const rec_t* rec, /*!< in: current rec */
ulint col_no, /*!< in: column number */
+ ulint mtype, /*!< in: column main type */
ibool unsigned_type) /*!< in: signed or unsigned flag */
{
ulint len;
@@ -4632,10 +4604,26 @@ row_search_autoinc_read_column(
data = rec_get_nth_field(rec, offsets, col_no, &len);
ut_a(len != UNIV_SQL_NULL);
- ut_a(len <= sizeof value);
- /* we assume AUTOINC value cannot be negative */
- value = mach_read_int_type(data, len, unsigned_type);
+ switch (mtype) {
+ case DATA_INT:
+ ut_a(len <= sizeof value);
+ value = mach_read_int_type(data, len, unsigned_type);
+ break;
+
+ case DATA_FLOAT:
+ ut_a(len == sizeof(float));
+ value = mach_float_read(data);
+ break;
+
+ case DATA_DOUBLE:
+ ut_a(len == sizeof(double));
+ value = mach_double_read(data);
+ break;
+
+ default:
+ ut_error;
+ }
if (UNIV_LIKELY_NULL(heap)) {
mem_heap_free(heap);
@@ -4721,7 +4709,8 @@ row_search_max_autoinc(
dfield->col->prtype & DATA_UNSIGNED);
*value = row_search_autoinc_read_column(
- index, rec, i, unsigned_type);
+ index, rec, i,
+ dfield->col->mtype, unsigned_type);
}
}
diff --git a/storage/innodb_plugin/row/row0umod.c b/storage/innodb_plugin/row/row0umod.c
index 6be475d8c78..e7245dbee41 100644
--- a/storage/innodb_plugin/row/row0umod.c
+++ b/storage/innodb_plugin/row/row0umod.c
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1997, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 1997, 2010, Innobase Oy. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -144,13 +144,17 @@ row_undo_mod_clust_low(
/***********************************************************//**
Removes a clustered index record after undo if possible.
+This is attempted when the record was inserted by updating a
+delete-marked record and there no longer exist transactions
+that would see the delete-marked record. In other words, we
+roll back the insert by purging the record.
@return DB_SUCCESS, DB_FAIL, or error code: we may run out of file space */
static
ulint
row_undo_mod_remove_clust_low(
/*==========================*/
undo_node_t* node, /*!< in: row undo node */
- que_thr_t* thr __attribute__((unused)), /*!< in: query thread */
+ que_thr_t* thr, /*!< in: query thread */
mtr_t* mtr, /*!< in: mtr */
ulint mode) /*!< in: BTR_MODIFY_LEAF or BTR_MODIFY_TREE */
{
@@ -159,6 +163,7 @@ row_undo_mod_remove_clust_low(
ulint err;
ibool success;
+ ut_ad(node->rec_type == TRX_UNDO_UPD_DEL_REC);
pcur = &(node->pcur);
btr_cur = btr_pcur_get_btr_cur(pcur);
@@ -190,11 +195,13 @@ row_undo_mod_remove_clust_low(
} else {
ut_ad(mode == BTR_MODIFY_TREE);
- /* Note that since this operation is analogous to purge,
- we can free also inherited externally stored fields:
- hence the RB_NONE in the call below */
+ /* This operation is analogous to purge, we can free also
+ inherited externally stored fields */
- btr_cur_pessimistic_delete(&err, FALSE, btr_cur, RB_NONE, mtr);
+ btr_cur_pessimistic_delete(&err, FALSE, btr_cur,
+ thr_is_recv(thr)
+ ? RB_RECOVERY_PURGE_REC
+ : RB_NONE, mtr);
/* The delete operation may fail if we have little
file space left: TODO: easiest to crash the database
@@ -370,10 +377,11 @@ row_undo_mod_del_mark_or_remove_sec_low(
} else {
ut_ad(mode == BTR_MODIFY_TREE);
- /* No need to distinguish RB_RECOVERY here, because we
- are deleting a secondary index record: the distinction
- between RB_NORMAL and RB_RECOVERY only matters when
- deleting a record that contains externally stored
+ /* No need to distinguish RB_RECOVERY_PURGE here,
+ because we are deleting a secondary index record:
+ the distinction between RB_NORMAL and
+ RB_RECOVERY_PURGE only matters when deleting a
+ record that contains externally stored
columns. */
ut_ad(!dict_index_is_clust(index));
btr_cur_pessimistic_delete(&err, FALSE, btr_cur,
@@ -438,7 +446,7 @@ row_undo_mod_del_unmark_sec_and_undo_update(
BTR_MODIFY_TREE */
que_thr_t* thr, /*!< in: query thread */
dict_index_t* index, /*!< in: index */
- dtuple_t* entry) /*!< in: index entry */
+ const dtuple_t* entry) /*!< in: index entry */
{
mem_heap_t* heap;
btr_pcur_t pcur;
@@ -533,6 +541,7 @@ row_undo_mod_upd_del_sec(
dict_index_t* index;
ulint err = DB_SUCCESS;
+ ut_ad(node->rec_type == TRX_UNDO_UPD_DEL_REC);
heap = mem_heap_create(1024);
while (node->index != NULL) {
@@ -550,7 +559,7 @@ row_undo_mod_upd_del_sec(
does not exist. However, this situation may
only occur during the rollback of incomplete
transactions. */
- ut_a(trx_is_recv(thr_get_trx(thr)));
+ ut_a(thr_is_recv(thr));
} else {
err = row_undo_mod_del_mark_or_remove_sec(
node, thr, index, entry);
diff --git a/storage/innodb_plugin/row/row0upd.c b/storage/innodb_plugin/row/row0upd.c
index 58dfd43ead9..95d1d00aeef 100644
--- a/storage/innodb_plugin/row/row0upd.c
+++ b/storage/innodb_plugin/row/row0upd.c
@@ -1344,9 +1344,6 @@ row_upd_copy_columns(
data = rec_get_nth_field(rec, offsets,
column->field_nos[SYM_CLUST_FIELD_NO],
&len);
- if (len == UNIV_SQL_NULL) {
- len = UNIV_SQL_NULL;
- }
eval_node_copy_and_alloc_val(column, data, len);
column = UT_LIST_GET_NEXT(col_var_list, column);
diff --git a/storage/innodb_plugin/srv/srv0srv.c b/storage/innodb_plugin/srv/srv0srv.c
index 639da1ed2f3..78f11cfd2f3 100644
--- a/storage/innodb_plugin/srv/srv0srv.c
+++ b/storage/innodb_plugin/srv/srv0srv.c
@@ -1,7 +1,8 @@
/*****************************************************************************
-Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 1995, 2010, Innobase Oy. All Rights Reserved.
Copyright (c) 2008, 2009 Google Inc.
+Copyright (c) 2009, Percona Inc.
Portions of this file contain modifications contributed and copyrighted by
Google, Inc. Those modifications are gratefully acknowledged and are described
@@ -9,6 +10,13 @@ briefly in the InnoDB documentation. The contributions by Google are
incorporated with their permission, and subject to the conditions contained in
the file COPYING.Google.
+Portions of this file contain modifications contributed and copyrighted
+by Percona Inc.. Those modifications are
+gratefully acknowledged and are described briefly in the InnoDB
+documentation. The contributions by Percona Inc. are incorporated with
+their permission, and subject to the conditions contained in the file
+COPYING.Percona.
+
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
Foundation; version 2 of the License.
@@ -22,32 +30,6 @@ this program; if not, write to the Free Software Foundation, Inc., 59 Temple
Place, Suite 330, Boston, MA 02111-1307 USA
*****************************************************************************/
-/***********************************************************************
-
-Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
-Copyright (c) 2009, Percona Inc.
-
-Portions of this file contain modifications contributed and copyrighted
-by Percona Inc.. Those modifications are
-gratefully acknowledged and are described briefly in the InnoDB
-documentation. The contributions by Percona Inc. are incorporated with
-their permission, and subject to the conditions contained in the file
-COPYING.Percona.
-
-This program is free software; you can redistribute it and/or modify it
-under the terms of the GNU General Public License as published by the
-Free Software Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
-Public License for more details.
-
-You should have received a copy of the GNU General Public License along
-with this program; if not, write to the Free Software Foundation, Inc.,
-59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
-
-***********************************************************************/
/**************************************************//**
@file srv/srv0srv.c
@@ -119,7 +101,8 @@ UNIV_INTERN ulint srv_fatal_semaphore_wait_threshold = 600;
in microseconds, in order to reduce the lagging of the purge thread. */
UNIV_INTERN ulint srv_dml_needed_delay = 0;
-UNIV_INTERN ibool srv_lock_timeout_and_monitor_active = FALSE;
+UNIV_INTERN ibool srv_lock_timeout_active = FALSE;
+UNIV_INTERN ibool srv_monitor_active = FALSE;
UNIV_INTERN ibool srv_error_monitor_active = FALSE;
UNIV_INTERN const char* srv_main_thread_op_info = "";
@@ -188,7 +171,17 @@ UNIV_INTERN ulong srv_flush_log_at_trx_commit = 1;
the checkpoints. */
UNIV_INTERN char srv_adaptive_flushing = TRUE;
-/* The sort order table of the MySQL latin1_swedish_ci character set
+/** Maximum number of times allowed to conditionally acquire
+mutex before switching to blocking wait on the mutex */
+#define MAX_MUTEX_NOWAIT 20
+
+/** Check whether the number of failed nonblocking mutex
+acquisition attempts exceeds maximum allowed value. If so,
+srv_printf_innodb_monitor() will request mutex acquisition
+with mutex_enter(), which will wait until it gets the mutex. */
+#define MUTEX_NOWAIT(mutex_skipped) ((mutex_skipped) < MAX_MUTEX_NOWAIT)
+
+/** The sort order table of the MySQL latin1_swedish_ci character set
collation */
UNIV_INTERN const byte* srv_latin1_ordering;
@@ -1683,12 +1676,15 @@ srv_refresh_innodb_monitor_stats(void)
}
/******************************************************************//**
-Outputs to a file the output of the InnoDB Monitor. */
+Outputs to a file the output of the InnoDB Monitor.
+@return FALSE if not all information printed
+due to failure to obtain necessary mutex */
UNIV_INTERN
-void
+ibool
srv_printf_innodb_monitor(
/*======================*/
FILE* file, /*!< in: output stream */
+ ibool nowait, /*!< in: whether to wait for kernel mutex */
ulint* trx_start, /*!< out: file position of the start of
the list of active transactions */
ulint* trx_end) /*!< out: file position of the end of
@@ -1697,6 +1693,7 @@ srv_printf_innodb_monitor(
double time_elapsed;
time_t current_time;
ulint n_reserved;
+ ibool ret;
mutex_enter(&srv_innodb_monitor_mutex);
@@ -1720,9 +1717,9 @@ srv_printf_innodb_monitor(
"Per second averages calculated from the last %lu seconds\n",
(ulong)time_elapsed);
- fputs("----------\n"
- "BACKGROUND THREAD\n"
- "----------\n", file);
+ fputs("-----------------\n"
+ "BACKGROUND THREAD\n"
+ "-----------------\n", file);
srv_print_master_thread_info(file);
fputs("----------\n"
@@ -1746,24 +1743,31 @@ srv_printf_innodb_monitor(
mutex_exit(&dict_foreign_err_mutex);
- lock_print_info_summary(file);
- if (trx_start) {
- long t = ftell(file);
- if (t < 0) {
- *trx_start = ULINT_UNDEFINED;
- } else {
- *trx_start = (ulint) t;
+ /* Only if lock_print_info_summary proceeds correctly,
+ before we call the lock_print_info_all_transactions
+ to print all the lock information. */
+ ret = lock_print_info_summary(file, nowait);
+
+ if (ret) {
+ if (trx_start) {
+ long t = ftell(file);
+ if (t < 0) {
+ *trx_start = ULINT_UNDEFINED;
+ } else {
+ *trx_start = (ulint) t;
+ }
}
- }
- lock_print_info_all_transactions(file);
- if (trx_end) {
- long t = ftell(file);
- if (t < 0) {
- *trx_end = ULINT_UNDEFINED;
- } else {
- *trx_end = (ulint) t;
+ lock_print_info_all_transactions(file);
+ if (trx_end) {
+ long t = ftell(file);
+ if (t < 0) {
+ *trx_end = ULINT_UNDEFINED;
+ } else {
+ *trx_end = (ulint) t;
+ }
}
}
+
fputs("--------\n"
"FILE I/O\n"
"--------\n", file);
@@ -1861,6 +1865,8 @@ srv_printf_innodb_monitor(
"============================\n", file);
mutex_exit(&srv_innodb_monitor_mutex);
fflush(file);
+
+ return(ret);
}
/******************************************************************//**
@@ -1948,26 +1954,23 @@ srv_export_innodb_status(void)
}
/*********************************************************************//**
-A thread which wakes up threads whose lock wait may have lasted too long.
-This also prints the info output by various InnoDB monitors.
+A thread which prints the info output by various InnoDB monitors.
@return a dummy parameter */
UNIV_INTERN
os_thread_ret_t
-srv_lock_timeout_and_monitor_thread(
-/*================================*/
+srv_monitor_thread(
+/*===============*/
void* arg __attribute__((unused)))
/*!< in: a dummy parameter required by
os_thread_create */
{
- srv_slot_t* slot;
double time_elapsed;
time_t current_time;
time_t last_table_monitor_time;
time_t last_tablespace_monitor_time;
time_t last_monitor_time;
- ibool some_waits;
- double wait_time;
- ulint i;
+ ulint mutex_skipped;
+ ibool last_srv_print_monitor;
#ifdef UNIV_DEBUG_THREAD_CREATION
fprintf(stderr, "Lock timeout thread starts, id %lu\n",
@@ -1978,13 +1981,15 @@ srv_lock_timeout_and_monitor_thread(
last_table_monitor_time = time(NULL);
last_tablespace_monitor_time = time(NULL);
last_monitor_time = time(NULL);
+ mutex_skipped = 0;
+ last_srv_print_monitor = srv_print_innodb_monitor;
loop:
- srv_lock_timeout_and_monitor_active = TRUE;
+ srv_monitor_active = TRUE;
- /* When someone is waiting for a lock, we wake up every second
- and check if a timeout has passed for a lock wait */
+ /* Wake up every 5 seconds to see if we need to print
+ monitor information. */
- os_thread_sleep(1000000);
+ os_thread_sleep(5000000);
current_time = time(NULL);
@@ -1994,14 +1999,40 @@ loop:
last_monitor_time = time(NULL);
if (srv_print_innodb_monitor) {
- srv_printf_innodb_monitor(stderr, NULL, NULL);
+ /* Reset mutex_skipped counter everytime
+ srv_print_innodb_monitor changes. This is to
+ ensure we will not be blocked by kernel_mutex
+ for short duration information printing,
+ such as requested by sync_array_print_long_waits() */
+ if (!last_srv_print_monitor) {
+ mutex_skipped = 0;
+ last_srv_print_monitor = TRUE;
+ }
+
+ if (!srv_printf_innodb_monitor(stderr,
+ MUTEX_NOWAIT(mutex_skipped),
+ NULL, NULL)) {
+ mutex_skipped++;
+ } else {
+ /* Reset the counter */
+ mutex_skipped = 0;
+ }
+ } else {
+ last_srv_print_monitor = FALSE;
}
+
if (srv_innodb_status) {
mutex_enter(&srv_monitor_file_mutex);
rewind(srv_monitor_file);
- srv_printf_innodb_monitor(srv_monitor_file, NULL,
- NULL);
+ if (!srv_printf_innodb_monitor(srv_monitor_file,
+ MUTEX_NOWAIT(mutex_skipped),
+ NULL, NULL)) {
+ mutex_skipped++;
+ } else {
+ mutex_skipped = 0;
+ }
+
os_file_set_eof(srv_monitor_file);
mutex_exit(&srv_monitor_file_mutex);
}
@@ -2054,6 +2085,56 @@ loop:
}
}
+ if (srv_shutdown_state >= SRV_SHUTDOWN_CLEANUP) {
+ goto exit_func;
+ }
+
+ if (srv_print_innodb_monitor
+ || srv_print_innodb_lock_monitor
+ || srv_print_innodb_tablespace_monitor
+ || srv_print_innodb_table_monitor) {
+ goto loop;
+ }
+
+ srv_monitor_active = FALSE;
+
+ goto loop;
+
+exit_func:
+ srv_monitor_active = FALSE;
+
+ /* We count the number of threads in os_thread_exit(). A created
+ thread should always use that to exit and not use return() to exit. */
+
+ os_thread_exit(NULL);
+
+ OS_THREAD_DUMMY_RETURN;
+}
+
+/*********************************************************************//**
+A thread which wakes up threads whose lock wait may have lasted too long.
+@return a dummy parameter */
+UNIV_INTERN
+os_thread_ret_t
+srv_lock_timeout_thread(
+/*====================*/
+ void* arg __attribute__((unused)))
+ /* in: a dummy parameter required by
+ os_thread_create */
+{
+ srv_slot_t* slot;
+ ibool some_waits;
+ double wait_time;
+ ulint i;
+
+loop:
+ /* When someone is waiting for a lock, we wake up every second
+ and check if a timeout has passed for a lock wait */
+
+ os_thread_sleep(1000000);
+
+ srv_lock_timeout_active = TRUE;
+
mutex_enter(&kernel_mutex);
some_waits = FALSE;
@@ -2104,17 +2185,11 @@ loop:
goto exit_func;
}
- if (some_waits || srv_print_innodb_monitor
- || srv_print_innodb_lock_monitor
- || srv_print_innodb_tablespace_monitor
- || srv_print_innodb_table_monitor) {
+ if (some_waits) {
goto loop;
}
- /* No one was waiting for a lock and no monitor was active:
- suspend this thread */
-
- srv_lock_timeout_and_monitor_active = FALSE;
+ srv_lock_timeout_active = FALSE;
#if 0
/* The following synchronisation is disabled, since
@@ -2124,7 +2199,7 @@ loop:
goto loop;
exit_func:
- srv_lock_timeout_and_monitor_active = FALSE;
+ srv_lock_timeout_active = FALSE;
/* We count the number of threads in os_thread_exit(). A created
thread should always use that to exit and not use return() to exit. */
@@ -2449,7 +2524,10 @@ loop:
BUF_FLUSH_LIST,
n_flush,
IB_ULONGLONG_MAX);
- skip_sleep = TRUE;
+
+ if (n_flush == PCT_IO(100)) {
+ skip_sleep = TRUE;
+ }
}
}
diff --git a/storage/innodb_plugin/srv/srv0start.c b/storage/innodb_plugin/srv/srv0start.c
index d5f6120ca31..e517b9a86b0 100644
--- a/storage/innodb_plugin/srv/srv0start.c
+++ b/storage/innodb_plugin/srv/srv0start.c
@@ -1,7 +1,8 @@
/*****************************************************************************
-Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 1996, 2010, Innobase Oy. All Rights Reserved.
Copyright (c) 2008, Google Inc.
+Copyright (c) 2009, Percona Inc.
Portions of this file contain modifications contributed and copyrighted by
Google, Inc. Those modifications are gratefully acknowledged and are described
@@ -9,6 +10,13 @@ briefly in the InnoDB documentation. The contributions by Google are
incorporated with their permission, and subject to the conditions contained in
the file COPYING.Google.
+Portions of this file contain modifications contributed and copyrighted
+by Percona Inc.. Those modifications are
+gratefully acknowledged and are described briefly in the InnoDB
+documentation. The contributions by Percona Inc. are incorporated with
+their permission, and subject to the conditions contained in the file
+COPYING.Percona.
+
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
Foundation; version 2 of the License.
@@ -22,32 +30,6 @@ this program; if not, write to the Free Software Foundation, Inc., 59 Temple
Place, Suite 330, Boston, MA 02111-1307 USA
*****************************************************************************/
-/***********************************************************************
-
-Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
-Copyright (c) 2009, Percona Inc.
-
-Portions of this file contain modifications contributed and copyrighted
-by Percona Inc.. Those modifications are
-gratefully acknowledged and are described briefly in the InnoDB
-documentation. The contributions by Percona Inc. are incorporated with
-their permission, and subject to the conditions contained in the file
-COPYING.Percona.
-
-This program is free software; you can redistribute it and/or modify it
-under the terms of the GNU General Public License as published by the
-Free Software Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
-Public License for more details.
-
-You should have received a copy of the GNU General Public License along
-with this program; if not, write to the Free Software Foundation, Inc.,
-59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
-
-***********************************************************************/
/********************************************************************//**
@file srv/srv0start.c
@@ -105,6 +87,7 @@ Created 2/16/1996 Heikki Tuuri
# include "btr0pcur.h"
# include "thr0loc.h"
# include "os0sync.h" /* for INNODB_RW_LOCKS_USE_ATOMICS */
+# include "zlib.h" /* for ZLIB_VERSION */
/** Log sequence number immediately after startup */
UNIV_INTERN ib_uint64_t srv_start_lsn;
@@ -143,9 +126,9 @@ static mutex_t ios_mutex;
static ulint ios;
/** io_handler_thread parameters for thread identification */
-static ulint n[SRV_MAX_N_IO_THREADS + 5];
+static ulint n[SRV_MAX_N_IO_THREADS + 6];
/** io_handler_thread identifiers */
-static os_thread_id_t thread_ids[SRV_MAX_N_IO_THREADS + 5];
+static os_thread_id_t thread_ids[SRV_MAX_N_IO_THREADS + 6];
/** We use this mutex to test the return value of pthread_mutex_trylock
on successful locking. HP-UX does NOT return 0, though Linux et al do. */
@@ -1074,7 +1057,11 @@ innobase_start_or_create_for_mysql(void)
#ifdef UNIV_IBUF_DEBUG
fprintf(stderr,
"InnoDB: !!!!!!!! UNIV_IBUF_DEBUG switched on !!!!!!!!!\n"
- "InnoDB: Crash recovery will fail with UNIV_IBUF_DEBUG\n");
+# ifdef UNIV_IBUF_COUNT_DEBUG
+ "InnoDB: !!!!!!!! UNIV_IBUF_COUNT_DEBUG switched on !!!!!!!!!\n"
+ "InnoDB: Crash recovery will fail with UNIV_IBUF_COUNT_DEBUG\n"
+# endif
+ );
#endif
#ifdef UNIV_SYNC_DEBUG
@@ -1101,7 +1088,15 @@ innobase_start_or_create_for_mysql(void)
"InnoDB: The InnoDB memory heap is disabled\n");
}
- fprintf(stderr, "InnoDB: %s\n", IB_ATOMICS_STARTUP_MSG);
+ fputs("InnoDB: " IB_ATOMICS_STARTUP_MSG
+ "\nInnoDB: Compressed tables use zlib " ZLIB_VERSION
+#ifdef UNIV_ZIP_DEBUG
+ " with validation"
+#endif /* UNIV_ZIP_DEBUG */
+#ifdef UNIV_ZIP_COPY
+ " and extra copying"
+#endif /* UNIV_ZIP_COPY */
+ "\n" , stderr);
/* Since InnoDB does not currently clean up all its internal data
structures in MySQL Embedded Server Library server_end(), we
@@ -1575,6 +1570,14 @@ innobase_start_or_create_for_mysql(void)
dict_boot();
trx_sys_init_at_db_start();
+ /* Initialize the fsp free limit global variable in the log
+ system */
+ fsp_header_get_free_limit();
+
+ /* recv_recovery_from_checkpoint_finish needs trx lists which
+ are initialized in trx_sys_init_at_db_start(). */
+
+ recv_recovery_from_checkpoint_finish();
if (srv_force_recovery < SRV_FORCE_NO_IBUF_MERGE) {
/* The following call is necessary for the insert
buffer to work with multiple tablespaces. We must
@@ -1590,26 +1593,14 @@ innobase_start_or_create_for_mysql(void)
every table in the InnoDB data dictionary that has
an .ibd file.
- We also determine the maximum tablespace id used.
-
- TODO: We may have incomplete transactions in the
- data dictionary tables. Does that harm the scanning of
- the data dictionary below? */
+ We also determine the maximum tablespace id used. */
dict_check_tablespaces_and_store_max_id(
recv_needed_recovery);
}
srv_startup_is_before_trx_rollback_phase = FALSE;
-
- /* Initialize the fsp free limit global variable in the log
- system */
- fsp_header_get_free_limit();
-
- /* recv_recovery_from_checkpoint_finish needs trx lists which
- are initialized in trx_sys_init_at_db_start(). */
-
- recv_recovery_from_checkpoint_finish();
+ recv_recovery_rollback_active();
/* It is possible that file_format tag has never
been set. In this case we initialize it to minimum
@@ -1658,15 +1649,18 @@ innobase_start_or_create_for_mysql(void)
/* fprintf(stderr, "Max allowed record size %lu\n",
page_get_free_space_of_empty() / 2); */
- /* Create the thread which watches the timeouts for lock waits
- and prints InnoDB monitor info */
-
- os_thread_create(&srv_lock_timeout_and_monitor_thread, NULL,
+ /* Create the thread which watches the timeouts for lock waits */
+ os_thread_create(&srv_lock_timeout_thread, NULL,
thread_ids + 2 + SRV_MAX_N_IO_THREADS);
/* Create the thread which warns of long semaphore waits */
os_thread_create(&srv_error_monitor_thread, NULL,
thread_ids + 3 + SRV_MAX_N_IO_THREADS);
+
+ /* Create the thread which prints InnoDB monitor info */
+ os_thread_create(&srv_monitor_thread, NULL,
+ thread_ids + 4 + SRV_MAX_N_IO_THREADS);
+
srv_is_being_started = FALSE;
if (trx_doublewrite == NULL) {
diff --git a/storage/innodb_plugin/sync/sync0sync.c b/storage/innodb_plugin/sync/sync0sync.c
index 569fc6328c4..2be9d667705 100644
--- a/storage/innodb_plugin/sync/sync0sync.c
+++ b/storage/innodb_plugin/sync/sync0sync.c
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 1995, 2010, Innobase Oy. All Rights Reserved.
Copyright (c) 2008, Google Inc.
Portions of this file contain modifications contributed and copyrighted by
@@ -315,6 +315,15 @@ mutex_free(
ut_a(mutex_get_lock_word(mutex) == 0);
ut_a(mutex_get_waiters(mutex) == 0);
+#ifdef UNIV_MEM_DEBUG
+ if (mutex == &mem_hash_mutex) {
+ ut_ad(UT_LIST_GET_LEN(mutex_list) == 1);
+ ut_ad(UT_LIST_GET_FIRST(mutex_list) == &mem_hash_mutex);
+ UT_LIST_REMOVE(list, mutex_list, mutex);
+ goto func_exit;
+ }
+#endif /* UNIV_MEM_DEBUG */
+
if (mutex != &mutex_list_mutex
#ifdef UNIV_SYNC_DEBUG
&& mutex != &sync_thread_mutex
@@ -336,7 +345,9 @@ mutex_free(
}
os_event_free(mutex->event);
-
+#ifdef UNIV_MEM_DEBUG
+func_exit:
+#endif /* UNIV_MEM_DEBUG */
#if !defined(HAVE_ATOMIC_BUILTINS)
os_fast_mutex_free(&(mutex->os_fast_mutex));
#endif
@@ -947,12 +958,62 @@ sync_thread_levels_contain(
}
/******************************************************************//**
+Checks if the level array for the current thread contains a
+mutex or rw-latch at the specified level.
+@return a matching latch, or NULL if not found */
+UNIV_INTERN
+void*
+sync_thread_levels_contains(
+/*========================*/
+ ulint level) /*!< in: latching order level
+ (SYNC_DICT, ...)*/
+{
+ sync_level_t* arr;
+ sync_thread_t* thread_slot;
+ sync_level_t* slot;
+ ulint i;
+
+ if (!sync_order_checks_on) {
+
+ return(NULL);
+ }
+
+ mutex_enter(&sync_thread_mutex);
+
+ thread_slot = sync_thread_level_arrays_find_slot();
+
+ if (thread_slot == NULL) {
+
+ mutex_exit(&sync_thread_mutex);
+
+ return(NULL);
+ }
+
+ arr = thread_slot->levels;
+
+ for (i = 0; i < SYNC_THREAD_N_LEVELS; i++) {
+
+ slot = sync_thread_levels_get_nth(arr, i);
+
+ if (slot->latch != NULL && slot->level == level) {
+
+ mutex_exit(&sync_thread_mutex);
+ return(slot->latch);
+ }
+ }
+
+ mutex_exit(&sync_thread_mutex);
+
+ return(NULL);
+}
+
+/******************************************************************//**
Checks that the level array for the current thread is empty.
-@return TRUE if empty except the exceptions specified below */
+@return a latch, or NULL if empty except the exceptions specified below */
UNIV_INTERN
-ibool
-sync_thread_levels_empty_gen(
-/*=========================*/
+void*
+sync_thread_levels_nonempty_gen(
+/*============================*/
ibool dict_mutex_allowed) /*!< in: TRUE if dictionary mutex is
allowed to be owned by the thread,
also purge_is_running mutex is
@@ -965,7 +1026,7 @@ sync_thread_levels_empty_gen(
if (!sync_order_checks_on) {
- return(TRUE);
+ return(NULL);
}
mutex_enter(&sync_thread_mutex);
@@ -976,7 +1037,7 @@ sync_thread_levels_empty_gen(
mutex_exit(&sync_thread_mutex);
- return(TRUE);
+ return(NULL);
}
arr = thread_slot->levels;
@@ -993,13 +1054,13 @@ sync_thread_levels_empty_gen(
mutex_exit(&sync_thread_mutex);
ut_error;
- return(FALSE);
+ return(slot->latch);
}
}
mutex_exit(&sync_thread_mutex);
- return(TRUE);
+ return(NULL);
}
/******************************************************************//**
@@ -1370,6 +1431,12 @@ sync_close(void)
mutex = UT_LIST_GET_FIRST(mutex_list);
while (mutex) {
+#ifdef UNIV_MEM_DEBUG
+ if (mutex == &mem_hash_mutex) {
+ mutex = UT_LIST_GET_NEXT(list, mutex);
+ continue;
+ }
+#endif /* UNIV_MEM_DEBUG */
mutex_free(mutex);
mutex = UT_LIST_GET_FIRST(mutex_list);
}
diff --git a/storage/innodb_plugin/trx/trx0i_s.c b/storage/innodb_plugin/trx/trx0i_s.c
index 1b20eaabf42..c160eb2942a 100644
--- a/storage/innodb_plugin/trx/trx0i_s.c
+++ b/storage/innodb_plugin/trx/trx0i_s.c
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 2007, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 2007, 2010, Innobase Oy. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -28,11 +28,18 @@ table cache" for later retrieval.
Created July 17, 2007 Vasil Dimov
*******************************************************/
+/* Found during the build of 5.5.3 on Linux 2.4 and early 2.6 kernels:
+ The includes "univ.i" -> "my_global.h" cause a different path
+ to be taken further down with pthread functions and types,
+ so they must come first.
+ From the symptoms, this is related to bug#46587 in the MySQL bug DB.
+*/
+#include "univ.i"
+
#include <mysql/plugin.h>
#include "mysql_addons.h"
-#include "univ.i"
#include "buf0buf.h"
#include "dict0dict.h"
#include "ha0storage.h"
diff --git a/storage/innodb_plugin/trx/trx0rec.c b/storage/innodb_plugin/trx/trx0rec.c
index 5097cf18dcd..bcc1f81381e 100644
--- a/storage/innodb_plugin/trx/trx0rec.c
+++ b/storage/innodb_plugin/trx/trx0rec.c
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 1996, 2010, Innobase Oy. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -977,6 +977,7 @@ trx_undo_update_rec_get_update(
fprintf(stderr, "\n"
"InnoDB: n_fields = %lu, i = %lu, ptr %p\n",
(ulong) n_fields, (ulong) i, ptr);
+ *upd = NULL;
return(NULL);
}
diff --git a/storage/innodb_plugin/trx/trx0rseg.c b/storage/innodb_plugin/trx/trx0rseg.c
index 8d754788e2a..36dea9b2a95 100644
--- a/storage/innodb_plugin/trx/trx0rseg.c
+++ b/storage/innodb_plugin/trx/trx0rseg.c
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 1996, 2010, Innobase Oy. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -286,39 +286,3 @@ trx_rseg_list_and_array_init(
}
}
}
-
-/****************************************************************//**
-Creates a new rollback segment to the database.
-@return the created segment object, NULL if fail */
-UNIV_INTERN
-trx_rseg_t*
-trx_rseg_create(
-/*============*/
- ulint space, /*!< in: space id */
- ulint max_size, /*!< in: max size in pages */
- ulint* id, /*!< out: rseg id */
- mtr_t* mtr) /*!< in: mtr */
-{
- ulint flags;
- ulint zip_size;
- ulint page_no;
- trx_rseg_t* rseg;
-
- mtr_x_lock(fil_space_get_latch(space, &flags), mtr);
- zip_size = dict_table_flags_to_zip_size(flags);
- mutex_enter(&kernel_mutex);
-
- page_no = trx_rseg_header_create(space, zip_size, max_size, id, mtr);
-
- if (page_no == FIL_NULL) {
-
- mutex_exit(&kernel_mutex);
- return(NULL);
- }
-
- rseg = trx_rseg_mem_create(*id, space, zip_size, page_no, mtr);
-
- mutex_exit(&kernel_mutex);
-
- return(rseg);
-}
diff --git a/storage/innodb_plugin/trx/trx0sys.c b/storage/innodb_plugin/trx/trx0sys.c
index 79e5af1c677..410c55f132d 100644
--- a/storage/innodb_plugin/trx/trx0sys.c
+++ b/storage/innodb_plugin/trx/trx0sys.c
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 1996, 2010, Innobase Oy. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -584,8 +584,8 @@ trx_sys_doublewrite_init_or_restore_pages(
" recover the database"
" with the my.cnf\n"
"InnoDB: option:\n"
- "InnoDB: set-variable="
- "innodb_force_recovery=6\n");
+ "InnoDB:"
+ " innodb_force_recovery=6\n");
exit(1);
}
@@ -1535,6 +1535,7 @@ trx_sys_file_format_id_to_name(
#endif /* !UNIV_HOTBACKUP */
+#ifndef UNIV_HOTBACKUP
/*********************************************************************
Shutdown/Close the transaction system. */
UNIV_INTERN
@@ -1611,3 +1612,4 @@ trx_sys_close(void)
trx_sys = NULL;
mutex_exit(&kernel_mutex);
}
+#endif /* !UNIV_HOTBACKUP */
diff --git a/storage/innodb_plugin/trx/trx0trx.c b/storage/innodb_plugin/trx/trx0trx.c
index 0951b98b79f..e8c98e22918 100644
--- a/storage/innodb_plugin/trx/trx0trx.c
+++ b/storage/innodb_plugin/trx/trx0trx.c
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 1996, 2010, Innobase Oy. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -425,6 +425,7 @@ trx_lists_init_at_db_start(void)
trx_undo_t* undo;
trx_t* trx;
+ ut_ad(mutex_own(&kernel_mutex));
UT_LIST_INIT(trx_sys->trx_list);
/* Look from the rollback segments if there exist undo logs for
diff --git a/storage/innodb_plugin/ut/ut0rbt.c b/storage/innodb_plugin/ut/ut0rbt.c
new file mode 100644
index 00000000000..3d7bc91e714
--- /dev/null
+++ b/storage/innodb_plugin/ut/ut0rbt.c
@@ -0,0 +1,1249 @@
+/*****************************************************************************
+
+Copyright (c) 2006, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/*******************************************************************//**
+@file ut/ut0rbt.c
+Red-Black tree implementation
+
+Created 2007-03-20 Sunny Bains
+***********************************************************************/
+
+#include "ut0rbt.h"
+
+/************************************************************************
+Definition of a red-black tree
+==============================
+
+A red-black tree is a binary search tree which has the following
+red-black properties:
+
+ 1. Every node is either red or black.
+ 2. Every leaf (NULL - in our case tree->nil) is black.
+ 3. If a node is red, then both its children are black.
+ 4. Every simple path from a node to a descendant leaf contains the
+ same number of black nodes.
+
+ from (3) above, the implication is that on any path from the root
+ to a leaf, red nodes must not be adjacent.
+
+ However, any number of black nodes may appear in a sequence. */
+
+#if defined(IB_RBT_TESTING)
+#warning "Testing enabled!"
+#endif
+
+#define ROOT(t) (t->root->left)
+#define SIZEOF_NODE(t) ((sizeof(ib_rbt_node_t) + t->sizeof_value) - 1)
+
+/****************************************************************//**
+Print out the sub-tree recursively. */
+static
+void
+rbt_print_subtree(
+/*==============*/
+ const ib_rbt_t* tree, /*!< in: tree to traverse */
+ const ib_rbt_node_t* node, /*!< in: node to print */
+ ib_rbt_print_node print) /*!< in: print key function */
+{
+ /* FIXME: Doesn't do anything yet */
+ if (node != tree->nil) {
+ print(node);
+ rbt_print_subtree(tree, node->left, print);
+ rbt_print_subtree(tree, node->right, print);
+ }
+}
+
+/****************************************************************//**
+Verify that the keys are in order.
+@return TRUE of OK. FALSE if not ordered */
+static
+ibool
+rbt_check_ordering(
+/*===============*/
+ const ib_rbt_t* tree) /*!< in: tree to verfify */
+{
+ const ib_rbt_node_t* node;
+ const ib_rbt_node_t* prev = NULL;
+
+ /* Iterate over all the nodes, comparing each node with the prev */
+ for (node = rbt_first(tree); node; node = rbt_next(tree, prev)) {
+
+ if (prev && tree->compare(prev->value, node->value) >= 0) {
+ return(FALSE);
+ }
+
+ prev = node;
+ }
+
+ return(TRUE);
+}
+
+/****************************************************************//**
+Check that every path from the root to the leaves has the same count.
+Count is expressed in the number of black nodes.
+@return 0 on failure else black height of the subtree */
+static
+ibool
+rbt_count_black_nodes(
+/*==================*/
+ const ib_rbt_t* tree, /*!< in: tree to verify */
+ const ib_rbt_node_t* node) /*!< in: start of sub-tree */
+{
+ ulint result;
+
+ if (node != tree->nil) {
+ ulint left_height = rbt_count_black_nodes(tree, node->left);
+
+ ulint right_height = rbt_count_black_nodes(tree, node->right);
+
+ if (left_height == 0
+ || right_height == 0
+ || left_height != right_height) {
+
+ result = 0;
+ } else if (node->color == IB_RBT_RED) {
+
+ /* Case 3 */
+ if (node->left->color != IB_RBT_BLACK
+ || node->right->color != IB_RBT_BLACK) {
+
+ result = 0;
+ } else {
+ result = left_height;
+ }
+ /* Check if it's anything other than RED or BLACK. */
+ } else if (node->color != IB_RBT_BLACK) {
+
+ result = 0;
+ } else {
+
+ result = right_height + 1;
+ }
+ } else {
+ result = 1;
+ }
+
+ return(result);
+}
+
+/****************************************************************//**
+Turn the node's right child's left sub-tree into node's right sub-tree.
+This will also make node's right child it's parent. */
+static
+void
+rbt_rotate_left(
+/*============*/
+ const ib_rbt_node_t* nil, /*!< in: nil node of the tree */
+ ib_rbt_node_t* node) /*!< in: node to rotate */
+{
+ ib_rbt_node_t* right = node->right;
+
+ node->right = right->left;
+
+ if (right->left != nil) {
+ right->left->parent = node;
+ }
+
+ /* Right's new parent was node's parent. */
+ right->parent = node->parent;
+
+ /* Since root's parent is tree->nil and root->parent->left points
+ back to root, we can avoid the check. */
+ if (node == node->parent->left) {
+ /* Node was on the left of its parent. */
+ node->parent->left = right;
+ } else {
+ /* Node must have been on the right. */
+ node->parent->right = right;
+ }
+
+ /* Finally, put node on right's left. */
+ right->left = node;
+ node->parent = right;
+}
+
+/****************************************************************//**
+Turn the node's left child's right sub-tree into node's left sub-tree.
+This also make node's left child it's parent. */
+static
+void
+rbt_rotate_right(
+/*=============*/
+ const ib_rbt_node_t* nil, /*!< in: nil node of tree */
+ ib_rbt_node_t* node) /*!< in: node to rotate */
+{
+ ib_rbt_node_t* left = node->left;
+
+ node->left = left->right;
+
+ if (left->right != nil) {
+ left->right->parent = node;
+ }
+
+ /* Left's new parent was node's parent. */
+ left->parent = node->parent;
+
+ /* Since root's parent is tree->nil and root->parent->left points
+ back to root, we can avoid the check. */
+ if (node == node->parent->right) {
+ /* Node was on the left of its parent. */
+ node->parent->right = left;
+ } else {
+ /* Node must have been on the left. */
+ node->parent->left = left;
+ }
+
+ /* Finally, put node on left's right. */
+ left->right = node;
+ node->parent = left;
+}
+
+/****************************************************************//**
+Append a node to the tree.
+@return inserted node */
+static
+ib_rbt_node_t*
+rbt_tree_add_child(
+/*===============*/
+ const ib_rbt_t* tree, /*!< in: rbt tree */
+ ib_rbt_bound_t* parent, /*!< in: node's parent */
+ ib_rbt_node_t* node) /*!< in: node to add */
+{
+ /* Cast away the const. */
+ ib_rbt_node_t* last = (ib_rbt_node_t*) parent->last;
+
+ if (last == tree->root || parent->result < 0) {
+ last->left = node;
+ } else {
+ /* FIXME: We don't handle duplicates (yet)! */
+ ut_a(parent->result != 0);
+
+ last->right = node;
+ }
+
+ node->parent = last;
+
+ return(node);
+}
+
+/****************************************************************//**
+Generic binary tree insert
+@return inserted node */
+static
+ib_rbt_node_t*
+rbt_tree_insert(
+/*============*/
+ ib_rbt_t* tree, /*!< in: rb tree */
+ const void* key, /*!< in: key for ordering */
+ ib_rbt_node_t* node) /*!< in: node hold the insert value */
+{
+ ib_rbt_bound_t parent;
+ ib_rbt_node_t* current = ROOT(tree);
+
+ parent.result = 0;
+ parent.last = tree->root;
+
+ /* Regular binary search. */
+ while (current != tree->nil) {
+
+ parent.last = current;
+ parent.result = tree->compare(key, current->value);
+
+ if (parent.result < 0) {
+ current = current->left;
+ } else {
+ current = current->right;
+ }
+ }
+
+ ut_a(current == tree->nil);
+
+ rbt_tree_add_child(tree, &parent, node);
+
+ return(node);
+}
+
+/****************************************************************//**
+Balance a tree after inserting a node. */
+static
+void
+rbt_balance_tree(
+/*=============*/
+ const ib_rbt_t* tree, /*!< in: tree to balance */
+ ib_rbt_node_t* node) /*!< in: node that was inserted */
+{
+ const ib_rbt_node_t* nil = tree->nil;
+ ib_rbt_node_t* parent = node->parent;
+
+ /* Restore the red-black property. */
+ node->color = IB_RBT_RED;
+
+ while (node != ROOT(tree) && parent->color == IB_RBT_RED) {
+ ib_rbt_node_t* grand_parent = parent->parent;
+
+ if (parent == grand_parent->left) {
+ ib_rbt_node_t* uncle = grand_parent->right;
+
+ if (uncle->color == IB_RBT_RED) {
+
+ /* Case 1 - change the colors. */
+ uncle->color = IB_RBT_BLACK;
+ parent->color = IB_RBT_BLACK;
+ grand_parent->color = IB_RBT_RED;
+
+ /* Move node up the tree. */
+ node = grand_parent;
+
+ } else {
+
+ if (node == parent->right) {
+ /* Right is a black node and node is
+ to the right, case 2 - move node
+ up and rotate. */
+ node = parent;
+ rbt_rotate_left(nil, node);
+ }
+
+ grand_parent = node->parent->parent;
+
+ /* Case 3. */
+ node->parent->color = IB_RBT_BLACK;
+ grand_parent->color = IB_RBT_RED;
+
+ rbt_rotate_right(nil, grand_parent);
+ }
+
+ } else {
+ ib_rbt_node_t* uncle = grand_parent->left;
+
+ if (uncle->color == IB_RBT_RED) {
+
+ /* Case 1 - change the colors. */
+ uncle->color = IB_RBT_BLACK;
+ parent->color = IB_RBT_BLACK;
+ grand_parent->color = IB_RBT_RED;
+
+ /* Move node up the tree. */
+ node = grand_parent;
+
+ } else {
+
+ if (node == parent->left) {
+ /* Left is a black node and node is to
+ the right, case 2 - move node up and
+ rotate. */
+ node = parent;
+ rbt_rotate_right(nil, node);
+ }
+
+ grand_parent = node->parent->parent;
+
+ /* Case 3. */
+ node->parent->color = IB_RBT_BLACK;
+ grand_parent->color = IB_RBT_RED;
+
+ rbt_rotate_left(nil, grand_parent);
+ }
+ }
+
+ parent = node->parent;
+ }
+
+ /* Color the root black. */
+ ROOT(tree)->color = IB_RBT_BLACK;
+}
+
+/****************************************************************//**
+Find the given node's successor.
+@return successor node or NULL if no successor */
+static
+ib_rbt_node_t*
+rbt_find_successor(
+/*===============*/
+ const ib_rbt_t* tree, /*!< in: rb tree */
+ const ib_rbt_node_t* current)/*!< in: this is declared const
+ because it can be called via
+ rbt_next() */
+{
+ const ib_rbt_node_t* nil = tree->nil;
+ ib_rbt_node_t* next = current->right;
+
+ /* Is there a sub-tree to the right that we can follow. */
+ if (next != nil) {
+
+ /* Follow the left most links of the current right child. */
+ while (next->left != nil) {
+ next = next->left;
+ }
+
+ } else { /* We will have to go up the tree to find the successor. */
+ ib_rbt_node_t* parent = current->parent;
+
+ /* Cast away the const. */
+ next = (ib_rbt_node_t*) current;
+
+ while (parent != tree->root && next == parent->right) {
+ next = parent;
+ parent = next->parent;
+ }
+
+ next = (parent == tree->root) ? NULL : parent;
+ }
+
+ return(next);
+}
+
+/****************************************************************//**
+Find the given node's precedecessor.
+@return predecessor node or NULL if no predecesor */
+static
+ib_rbt_node_t*
+rbt_find_predecessor(
+/*=================*/
+ const ib_rbt_t* tree, /*!< in: rb tree */
+ const ib_rbt_node_t* current) /*!< in: this is declared const
+ because it can be called via
+ rbt_prev() */
+{
+ const ib_rbt_node_t* nil = tree->nil;
+ ib_rbt_node_t* prev = current->left;
+
+ /* Is there a sub-tree to the left that we can follow. */
+ if (prev != nil) {
+
+ /* Follow the right most links of the current left child. */
+ while (prev->right != nil) {
+ prev = prev->right;
+ }
+
+ } else { /* We will have to go up the tree to find the precedecessor. */
+ ib_rbt_node_t* parent = current->parent;
+
+ /* Cast away the const. */
+ prev = (ib_rbt_node_t*)current;
+
+ while (parent != tree->root && prev == parent->left) {
+ prev = parent;
+ parent = prev->parent;
+ }
+
+ prev = (parent == tree->root) ? NULL : parent;
+ }
+
+ return(prev);
+}
+
+/****************************************************************//**
+Replace node with child. After applying transformations eject becomes
+an orphan. */
+static
+void
+rbt_eject_node(
+/*===========*/
+ ib_rbt_node_t* eject, /*!< in: node to eject */
+ ib_rbt_node_t* node) /*!< in: node to replace with */
+{
+ /* Update the to be ejected node's parent's child pointers. */
+ if (eject->parent->left == eject) {
+ eject->parent->left = node;
+ } else if (eject->parent->right == eject) {
+ eject->parent->right = node;
+ } else {
+ ut_a(0);
+ }
+ /* eject is now an orphan but otherwise its pointers
+ and color are left intact. */
+
+ node->parent = eject->parent;
+}
+
+/****************************************************************//**
+Replace a node with another node. */
+static
+void
+rbt_replace_node(
+/*=============*/
+ ib_rbt_node_t* replace, /*!< in: node to replace */
+ ib_rbt_node_t* node) /*!< in: node to replace with */
+{
+ ib_rbt_color_t color = node->color;
+
+ /* Update the node pointers. */
+ node->left = replace->left;
+ node->right = replace->right;
+
+ /* Update the child node pointers. */
+ node->left->parent = node;
+ node->right->parent = node;
+
+ /* Make the parent of replace point to node. */
+ rbt_eject_node(replace, node);
+
+ /* Swap the colors. */
+ node->color = replace->color;
+ replace->color = color;
+}
+
+/****************************************************************//**
+Detach node from the tree replacing it with one of it's children.
+@return the child node that now occupies the position of the detached node */
+static
+ib_rbt_node_t*
+rbt_detach_node(
+/*============*/
+ const ib_rbt_t* tree, /*!< in: rb tree */
+ ib_rbt_node_t* node) /*!< in: node to detach */
+{
+ ib_rbt_node_t* child;
+ const ib_rbt_node_t* nil = tree->nil;
+
+ if (node->left != nil && node->right != nil) {
+ /* Case where the node to be deleted has two children. */
+ ib_rbt_node_t* successor = rbt_find_successor(tree, node);
+
+ ut_a(successor != nil);
+ ut_a(successor->parent != nil);
+ ut_a(successor->left == nil);
+
+ child = successor->right;
+
+ /* Remove the successor node and replace with its child. */
+ rbt_eject_node(successor, child);
+
+ /* Replace the node to delete with its successor node. */
+ rbt_replace_node(node, successor);
+ } else {
+ ut_a(node->left == nil || node->right == nil);
+
+ child = (node->left != nil) ? node->left : node->right;
+
+ /* Replace the node to delete with one of it's children. */
+ rbt_eject_node(node, child);
+ }
+
+ /* Reset the node links. */
+ node->parent = node->right = node->left = tree->nil;
+
+ return(child);
+}
+
+/****************************************************************//**
+Rebalance the right sub-tree after deletion.
+@return node to rebalance if more rebalancing required else NULL */
+static
+ib_rbt_node_t*
+rbt_balance_right(
+/*==============*/
+ const ib_rbt_node_t* nil, /*!< in: rb tree nil node */
+ ib_rbt_node_t* parent, /*!< in: parent node */
+ ib_rbt_node_t* sibling)/*!< in: sibling node */
+{
+ ib_rbt_node_t* node = NULL;
+
+ ut_a(sibling != nil);
+
+ /* Case 3. */
+ if (sibling->color == IB_RBT_RED) {
+
+ parent->color = IB_RBT_RED;
+ sibling->color = IB_RBT_BLACK;
+
+ rbt_rotate_left(nil, parent);
+
+ sibling = parent->right;
+
+ ut_a(sibling != nil);
+ }
+
+ /* Since this will violate case 3 because of the change above. */
+ if (sibling->left->color == IB_RBT_BLACK
+ && sibling->right->color == IB_RBT_BLACK) {
+
+ node = parent; /* Parent needs to be rebalanced too. */
+ sibling->color = IB_RBT_RED;
+
+ } else {
+ if (sibling->right->color == IB_RBT_BLACK) {
+
+ ut_a(sibling->left->color == IB_RBT_RED);
+
+ sibling->color = IB_RBT_RED;
+ sibling->left->color = IB_RBT_BLACK;
+
+ rbt_rotate_right(nil, sibling);
+
+ sibling = parent->right;
+ ut_a(sibling != nil);
+ }
+
+ sibling->color = parent->color;
+ sibling->right->color = IB_RBT_BLACK;
+
+ parent->color = IB_RBT_BLACK;
+
+ rbt_rotate_left(nil, parent);
+ }
+
+ return(node);
+}
+
+/****************************************************************//**
+Rebalance the left sub-tree after deletion.
+@return node to rebalance if more rebalancing required else NULL */
+static
+ib_rbt_node_t*
+rbt_balance_left(
+/*=============*/
+ const ib_rbt_node_t* nil, /*!< in: rb tree nil node */
+ ib_rbt_node_t* parent, /*!< in: parent node */
+ ib_rbt_node_t* sibling)/*!< in: sibling node */
+{
+ ib_rbt_node_t* node = NULL;
+
+ ut_a(sibling != nil);
+
+ /* Case 3. */
+ if (sibling->color == IB_RBT_RED) {
+
+ parent->color = IB_RBT_RED;
+ sibling->color = IB_RBT_BLACK;
+
+ rbt_rotate_right(nil, parent);
+ sibling = parent->left;
+
+ ut_a(sibling != nil);
+ }
+
+ /* Since this will violate case 3 because of the change above. */
+ if (sibling->right->color == IB_RBT_BLACK
+ && sibling->left->color == IB_RBT_BLACK) {
+
+ node = parent; /* Parent needs to be rebalanced too. */
+ sibling->color = IB_RBT_RED;
+
+ } else {
+ if (sibling->left->color == IB_RBT_BLACK) {
+
+ ut_a(sibling->right->color == IB_RBT_RED);
+
+ sibling->color = IB_RBT_RED;
+ sibling->right->color = IB_RBT_BLACK;
+
+ rbt_rotate_left(nil, sibling);
+
+ sibling = parent->left;
+
+ ut_a(sibling != nil);
+ }
+
+ sibling->color = parent->color;
+ sibling->left->color = IB_RBT_BLACK;
+
+ parent->color = IB_RBT_BLACK;
+
+ rbt_rotate_right(nil, parent);
+ }
+
+ return(node);
+}
+
+/****************************************************************//**
+Delete the node and rebalance the tree if necessary */
+static
+void
+rbt_remove_node_and_rebalance(
+/*==========================*/
+ ib_rbt_t* tree, /*!< in: rb tree */
+ ib_rbt_node_t* node) /*!< in: node to remove */
+{
+ /* Detach node and get the node that will be used
+ as rebalance start. */
+ ib_rbt_node_t* child = rbt_detach_node(tree, node);
+
+ if (node->color == IB_RBT_BLACK) {
+ ib_rbt_node_t* last = child;
+
+ ROOT(tree)->color = IB_RBT_RED;
+
+ while (child && child->color == IB_RBT_BLACK) {
+ ib_rbt_node_t* parent = child->parent;
+
+ /* Did the deletion cause an imbalance in the
+ parents left sub-tree. */
+ if (parent->left == child) {
+
+ child = rbt_balance_right(
+ tree->nil, parent, parent->right);
+
+ } else if (parent->right == child) {
+
+ child = rbt_balance_left(
+ tree->nil, parent, parent->left);
+
+ } else {
+ ut_error;
+ }
+
+ if (child) {
+ last = child;
+ }
+ }
+
+ ut_a(last);
+
+ last->color = IB_RBT_BLACK;
+ ROOT(tree)->color = IB_RBT_BLACK;
+ }
+
+ /* Note that we have removed a node from the tree. */
+ --tree->n_nodes;
+}
+
+/****************************************************************//**
+Recursively free the nodes. */
+static
+void
+rbt_free_node(
+/*==========*/
+ ib_rbt_node_t* node, /*!< in: node to free */
+ ib_rbt_node_t* nil) /*!< in: rb tree nil node */
+{
+ if (node != nil) {
+ rbt_free_node(node->left, nil);
+ rbt_free_node(node->right, nil);
+
+ ut_free(node);
+ }
+}
+
+/****************************************************************//**
+Free all the nodes and free the tree. */
+UNIV_INTERN
+void
+rbt_free(
+/*=====*/
+ ib_rbt_t* tree) /*!< in: rb tree to free */
+{
+ rbt_free_node(tree->root, tree->nil);
+ ut_free(tree->nil);
+ ut_free(tree);
+}
+
+/****************************************************************//**
+Create an instance of a red black tree.
+@return an empty rb tree */
+UNIV_INTERN
+ib_rbt_t*
+rbt_create(
+/*=======*/
+ size_t sizeof_value, /*!< in: sizeof data item */
+ ib_rbt_compare compare) /*!< in: fn to compare items */
+{
+ ib_rbt_t* tree;
+ ib_rbt_node_t* node;
+
+ tree = (ib_rbt_t*) ut_malloc(sizeof(*tree));
+ memset(tree, 0, sizeof(*tree));
+
+ tree->sizeof_value = sizeof_value;
+
+ /* Create the sentinel (NIL) node. */
+ node = tree->nil = (ib_rbt_node_t*) ut_malloc(sizeof(*node));
+ memset(node, 0, sizeof(*node));
+
+ node->color = IB_RBT_BLACK;
+ node->parent = node->left = node->right = node;
+
+ /* Create the "fake" root, the real root node will be the
+ left child of this node. */
+ node = tree->root = (ib_rbt_node_t*) ut_malloc(sizeof(*node));
+ memset(node, 0, sizeof(*node));
+
+ node->color = IB_RBT_BLACK;
+ node->parent = node->left = node->right = tree->nil;
+
+ tree->compare = compare;
+
+ return(tree);
+}
+
+/****************************************************************//**
+Generic insert of a value in the rb tree.
+@return inserted node */
+UNIV_INTERN
+const ib_rbt_node_t*
+rbt_insert(
+/*=======*/
+ ib_rbt_t* tree, /*!< in: rb tree */
+ const void* key, /*!< in: key for ordering */
+ const void* value) /*!< in: value of key, this value
+ is copied to the node */
+{
+ ib_rbt_node_t* node;
+
+ /* Create the node that will hold the value data. */
+ node = (ib_rbt_node_t*) ut_malloc(SIZEOF_NODE(tree));
+
+ memcpy(node->value, value, tree->sizeof_value);
+ node->parent = node->left = node->right = tree->nil;
+
+ /* Insert in the tree in the usual way. */
+ rbt_tree_insert(tree, key, node);
+ rbt_balance_tree(tree, node);
+
+ ++tree->n_nodes;
+
+ return(node);
+}
+
+/****************************************************************//**
+Add a new node to the tree, useful for data that is pre-sorted.
+@return appended node */
+UNIV_INTERN
+const ib_rbt_node_t*
+rbt_add_node(
+/*=========*/
+ ib_rbt_t* tree, /*!< in: rb tree */
+ ib_rbt_bound_t* parent, /*!< in: bounds */
+ const void* value) /*!< in: this value is copied
+ to the node */
+{
+ ib_rbt_node_t* node;
+
+ /* Create the node that will hold the value data */
+ node = (ib_rbt_node_t*) ut_malloc(SIZEOF_NODE(tree));
+
+ memcpy(node->value, value, tree->sizeof_value);
+ node->parent = node->left = node->right = tree->nil;
+
+ /* If tree is empty */
+ if (parent->last == NULL) {
+ parent->last = tree->root;
+ }
+
+ /* Append the node, the hope here is that the caller knows
+ what s/he is doing. */
+ rbt_tree_add_child(tree, parent, node);
+ rbt_balance_tree(tree, node);
+
+ ++tree->n_nodes;
+
+#if defined(IB_RBT_TESTING)
+ ut_a(rbt_validate(tree));
+#endif
+ return(node);
+}
+
+/****************************************************************//**
+Find a matching node in the rb tree.
+@return NULL if not found else the node where key was found */
+UNIV_INTERN
+const ib_rbt_node_t*
+rbt_lookup(
+/*=======*/
+ const ib_rbt_t* tree, /*!< in: rb tree */
+ const void* key) /*!< in: key to use for search */
+{
+ const ib_rbt_node_t* current = ROOT(tree);
+
+ /* Regular binary search. */
+ while (current != tree->nil) {
+ int result = tree->compare(key, current->value);
+
+ if (result < 0) {
+ current = current->left;
+ } else if (result > 0) {
+ current = current->right;
+ } else {
+ break;
+ }
+ }
+
+ return(current != tree->nil ? current : NULL);
+}
+
+/****************************************************************//**
+Delete a node from the red black tree, identified by key.
+@return TRUE if success FALSE if not found */
+UNIV_INTERN
+ibool
+rbt_delete(
+/*=======*/
+ ib_rbt_t* tree, /*!< in: rb tree */
+ const void* key) /*!< in: key to delete */
+{
+ ibool deleted = FALSE;
+ ib_rbt_node_t* node = (ib_rbt_node_t*) rbt_lookup(tree, key);
+
+ if (node) {
+ rbt_remove_node_and_rebalance(tree, node);
+
+ ut_free(node);
+ deleted = TRUE;
+ }
+
+ return(deleted);
+}
+
+/****************************************************************//**
+Remove a node from the rb tree, the node is not free'd, that is the
+callers responsibility.
+@return deleted node but without the const */
+UNIV_INTERN
+ib_rbt_node_t*
+rbt_remove_node(
+/*============*/
+ ib_rbt_t* tree, /*!< in: rb tree */
+ const ib_rbt_node_t* const_node) /*!< in: node to delete, this
+ is a fudge and declared const
+ because the caller can access
+ only const nodes */
+{
+ /* Cast away the const. */
+ rbt_remove_node_and_rebalance(tree, (ib_rbt_node_t*) const_node);
+
+ /* This is to make it easier to do something like this:
+ ut_free(rbt_remove_node(node));
+ */
+
+ return((ib_rbt_node_t*) const_node);
+}
+
+/****************************************************************//**
+Find the node that has the lowest key that is >= key.
+@return node satisfying the lower bound constraint or NULL */
+UNIV_INTERN
+const ib_rbt_node_t*
+rbt_lower_bound(
+/*============*/
+ const ib_rbt_t* tree, /*!< in: rb tree */
+ const void* key) /*!< in: key to search */
+{
+ ib_rbt_node_t* lb_node = NULL;
+ ib_rbt_node_t* current = ROOT(tree);
+
+ while (current != tree->nil) {
+ int result = tree->compare(key, current->value);
+
+ if (result > 0) {
+
+ current = current->right;
+
+ } else if (result < 0) {
+
+ lb_node = current;
+ current = current->left;
+
+ } else {
+ lb_node = current;
+ break;
+ }
+ }
+
+ return(lb_node);
+}
+
+/****************************************************************//**
+Find the node that has the greatest key that is <= key.
+@return node satisfying the upper bound constraint or NULL */
+UNIV_INTERN
+const ib_rbt_node_t*
+rbt_upper_bound(
+/*============*/
+ const ib_rbt_t* tree, /*!< in: rb tree */
+ const void* key) /*!< in: key to search */
+{
+ ib_rbt_node_t* ub_node = NULL;
+ ib_rbt_node_t* current = ROOT(tree);
+
+ while (current != tree->nil) {
+ int result = tree->compare(key, current->value);
+
+ if (result > 0) {
+
+ ub_node = current;
+ current = current->right;
+
+ } else if (result < 0) {
+
+ current = current->left;
+
+ } else {
+ ub_node = current;
+ break;
+ }
+ }
+
+ return(ub_node);
+}
+
+/****************************************************************//**
+Find the node that has the greatest key that is <= key.
+@return value of result */
+UNIV_INTERN
+int
+rbt_search(
+/*=======*/
+ const ib_rbt_t* tree, /*!< in: rb tree */
+ ib_rbt_bound_t* parent, /*!< in: search bounds */
+ const void* key) /*!< in: key to search */
+{
+ ib_rbt_node_t* current = ROOT(tree);
+
+ /* Every thing is greater than the NULL root. */
+ parent->result = 1;
+ parent->last = NULL;
+
+ while (current != tree->nil) {
+
+ parent->last = current;
+ parent->result = tree->compare(key, current->value);
+
+ if (parent->result > 0) {
+ current = current->right;
+ } else if (parent->result < 0) {
+ current = current->left;
+ } else {
+ break;
+ }
+ }
+
+ return(parent->result);
+}
+
+/****************************************************************//**
+Find the node that has the greatest key that is <= key. But use the
+supplied comparison function.
+@return value of result */
+UNIV_INTERN
+int
+rbt_search_cmp(
+/*===========*/
+ const ib_rbt_t* tree, /*!< in: rb tree */
+ ib_rbt_bound_t* parent, /*!< in: search bounds */
+ const void* key, /*!< in: key to search */
+ ib_rbt_compare compare) /*!< in: fn to compare items */
+{
+ ib_rbt_node_t* current = ROOT(tree);
+
+ /* Every thing is greater than the NULL root. */
+ parent->result = 1;
+ parent->last = NULL;
+
+ while (current != tree->nil) {
+
+ parent->last = current;
+ parent->result = compare(key, current->value);
+
+ if (parent->result > 0) {
+ current = current->right;
+ } else if (parent->result < 0) {
+ current = current->left;
+ } else {
+ break;
+ }
+ }
+
+ return(parent->result);
+}
+
+/****************************************************************//**
+Get the leftmost node.
+Return the left most node in the tree. */
+UNIV_INTERN
+const ib_rbt_node_t*
+rbt_first(
+/*======*/
+ const ib_rbt_t* tree) /* in: rb tree */
+{
+ ib_rbt_node_t* first = NULL;
+ ib_rbt_node_t* current = ROOT(tree);
+
+ while (current != tree->nil) {
+ first = current;
+ current = current->left;
+ }
+
+ return(first);
+}
+
+/****************************************************************//**
+Return the right most node in the tree.
+@return the rightmost node or NULL */
+UNIV_INTERN
+const ib_rbt_node_t*
+rbt_last(
+/*=====*/
+ const ib_rbt_t* tree) /*!< in: rb tree */
+{
+ ib_rbt_node_t* last = NULL;
+ ib_rbt_node_t* current = ROOT(tree);
+
+ while (current != tree->nil) {
+ last = current;
+ current = current->right;
+ }
+
+ return(last);
+}
+
+/****************************************************************//**
+Return the next node.
+@return node next from current */
+UNIV_INTERN
+const ib_rbt_node_t*
+rbt_next(
+/*=====*/
+ const ib_rbt_t* tree, /*!< in: rb tree */
+ const ib_rbt_node_t* current)/*!< in: current node */
+{
+ return(current ? rbt_find_successor(tree, current) : NULL);
+}
+
+/****************************************************************//**
+Return the previous node.
+@return node prev from current */
+UNIV_INTERN
+const ib_rbt_node_t*
+rbt_prev(
+/*=====*/
+ const ib_rbt_t* tree, /*!< in: rb tree */
+ const ib_rbt_node_t* current)/*!< in: current node */
+{
+ return(current ? rbt_find_predecessor(tree, current) : NULL);
+}
+
+/****************************************************************//**
+Reset the tree. Delete all the nodes. */
+UNIV_INTERN
+void
+rbt_clear(
+/*======*/
+ ib_rbt_t* tree) /*!< in: rb tree */
+{
+ rbt_free_node(ROOT(tree), tree->nil);
+
+ tree->n_nodes = 0;
+ tree->root->left = tree->root->right = tree->nil;
+}
+
+/****************************************************************//**
+Merge the node from dst into src. Return the number of nodes merged.
+@return no. of recs merged */
+UNIV_INTERN
+ulint
+rbt_merge_uniq(
+/*===========*/
+ ib_rbt_t* dst, /*!< in: dst rb tree */
+ const ib_rbt_t* src) /*!< in: src rb tree */
+{
+ ib_rbt_bound_t parent;
+ ulint n_merged = 0;
+ const ib_rbt_node_t* src_node = rbt_first(src);
+
+ if (rbt_empty(src) || dst == src) {
+ return(0);
+ }
+
+ for (/* No op */; src_node; src_node = rbt_next(src, src_node)) {
+
+ if (rbt_search(dst, &parent, src_node->value) != 0) {
+ rbt_add_node(dst, &parent, src_node->value);
+ ++n_merged;
+ }
+ }
+
+ return(n_merged);
+}
+
+/****************************************************************//**
+Merge the node from dst into src. Return the number of nodes merged.
+Delete the nodes from src after copying node to dst. As a side effect
+the duplicates will be left untouched in the src.
+@return no. of recs merged */
+UNIV_INTERN
+ulint
+rbt_merge_uniq_destructive(
+/*=======================*/
+ ib_rbt_t* dst, /*!< in: dst rb tree */
+ ib_rbt_t* src) /*!< in: src rb tree */
+{
+ ib_rbt_bound_t parent;
+ ib_rbt_node_t* src_node;
+ ulint old_size = rbt_size(dst);
+
+ if (rbt_empty(src) || dst == src) {
+ return(0);
+ }
+
+ for (src_node = (ib_rbt_node_t*) rbt_first(src); src_node; /* */) {
+ ib_rbt_node_t* prev = src_node;
+
+ src_node = (ib_rbt_node_t*)rbt_next(src, prev);
+
+ /* Skip duplicates. */
+ if (rbt_search(dst, &parent, prev->value) != 0) {
+
+ /* Remove and reset the node but preserve
+ the node (data) value. */
+ rbt_remove_node_and_rebalance(src, prev);
+
+ /* The nil should be taken from the dst tree. */
+ prev->parent = prev->left = prev->right = dst->nil;
+ rbt_tree_add_child(dst, &parent, prev);
+ rbt_balance_tree(dst, prev);
+
+ ++dst->n_nodes;
+ }
+ }
+
+#if defined(IB_RBT_TESTING)
+ ut_a(rbt_validate(dst));
+ ut_a(rbt_validate(src));
+#endif
+ return(rbt_size(dst) - old_size);
+}
+
+/****************************************************************//**
+Check that every path from the root to the leaves has the same count and
+the tree nodes are in order.
+@return TRUE if OK FALSE otherwise */
+UNIV_INTERN
+ibool
+rbt_validate(
+/*=========*/
+ const ib_rbt_t* tree) /*!< in: RB tree to validate */
+{
+ if (rbt_count_black_nodes(tree, ROOT(tree)) > 0) {
+ return(rbt_check_ordering(tree));
+ }
+
+ return(FALSE);
+}
+
+/****************************************************************//**
+Iterate over the tree in depth first order. */
+UNIV_INTERN
+void
+rbt_print(
+/*======*/
+ const ib_rbt_t* tree, /*!< in: tree to traverse */
+ ib_rbt_print_node print) /*!< in: print function */
+{
+ rbt_print_subtree(tree, ROOT(tree), print);
+}
diff --git a/storage/myisam/ft_boolean_search.c b/storage/myisam/ft_boolean_search.c
index 3cc47576827..8846a5c3c1a 100644
--- a/storage/myisam/ft_boolean_search.c
+++ b/storage/myisam/ft_boolean_search.c
@@ -437,8 +437,18 @@ static int _ft2_search(FTB *ftb, FTB_WORD *ftbw, my_bool init_search)
return 0;
}
- /* going up to the first-level tree to continue search there */
+ /*
+ Going up to the first-level tree to continue search there.
+ Only done when performing prefix search.
+
+ Key buffer data pointer as well as docid[0] may be smaller
+ than values we got while searching first-level tree. Thus
+ they must be restored to original values to avoid dead-loop,
+ when subsequent search for a bigger value eventually ends up
+ in this same second-level tree.
+ */
_mi_dpointer(info, (uchar*) (lastkey_buf+HA_FT_WLEN), ftbw->key_root);
+ ftbw->docid[0]= ftbw->key_root;
ftbw->key_root=info->s->state.key_root[ftb->keynr];
ftbw->keyinfo=info->s->keyinfo+ftb->keynr;
ftbw->off=0;
diff --git a/storage/myisam/ha_myisam.cc b/storage/myisam/ha_myisam.cc
index f5145c3ee79..9597f848616 100644
--- a/storage/myisam/ha_myisam.cc
+++ b/storage/myisam/ha_myisam.cc
@@ -1473,9 +1473,17 @@ int ha_myisam::enable_indexes(uint mode)
{
sql_print_warning("Warning: Enabling keys got errno %d on %s.%s, retrying",
my_errno, param.db_name, param.table_name);
- /* Repairing by sort failed. Now try standard repair method. */
- param.testflag&= ~(T_REP_BY_SORT | T_QUICK);
- error= (repair(thd,param,0) != HA_ADMIN_OK);
+ /*
+ Repairing by sort failed. Now try standard repair method.
+ Still we want to fix only index file. If data file corruption
+ was detected (T_RETRY_WITHOUT_QUICK), we shouldn't do much here.
+ Let implicit repair do this job.
+ */
+ if (!(param.testflag & T_RETRY_WITHOUT_QUICK))
+ {
+ param.testflag&= ~T_REP_BY_SORT;
+ error= (repair(thd,param,0) != HA_ADMIN_OK);
+ }
/*
If the standard repair succeeded, clear all error messages which
might have been set by the first repair. They can still be seen
diff --git a/storage/myisam/mi_check.c b/storage/myisam/mi_check.c
index 9fe954273fe..7096c03cf7d 100644
--- a/storage/myisam/mi_check.c
+++ b/storage/myisam/mi_check.c
@@ -2393,10 +2393,8 @@ int mi_repair_by_sort(HA_CHECK *param, register MI_INFO *info,
/*
fulltext indexes may have much more entries than the
number of rows in the table. We estimate the number here.
-
- Note, built-in parser is always nr. 0 - see ftparser_call_initializer()
*/
- if (sort_param.keyinfo->ftkey_nr == 0)
+ if (sort_param.keyinfo->parser == &ft_default_parser)
{
/*
for built-in parser the number of generated index entries
@@ -2413,8 +2411,9 @@ int mi_repair_by_sort(HA_CHECK *param, register MI_INFO *info,
so, we'll use all the sort memory and start from ~10 buffpeks.
(see _create_index_by_sort)
*/
- sort_info.max_records=
- 10*param->sort_buffer_length/sort_param.key_length;
+ sort_info.max_records= 10 *
+ max(param->sort_buffer_length, MIN_SORT_BUFFER) /
+ sort_param.key_length;
}
sort_param.key_read=sort_ft_key_read;
diff --git a/storage/myisam/mi_delete_all.c b/storage/myisam/mi_delete_all.c
index e2bbb04ab3c..7c3ed178c4c 100644
--- a/storage/myisam/mi_delete_all.c
+++ b/storage/myisam/mi_delete_all.c
@@ -55,7 +55,7 @@ int mi_delete_all_rows(MI_INFO *info)
flush_key_blocks(share->key_cache, share->kfile, FLUSH_IGNORE_CHANGED);
#ifdef HAVE_MMAP
if (share->file_map)
- _mi_unmap_file(info);
+ mi_munmap_file(info);
#endif
if (my_chsize(info->dfile, 0, 0, MYF(MY_WME)) ||
my_chsize(share->kfile, share->base.keystart, 0, MYF(MY_WME)) )
diff --git a/storage/myisam/mi_delete_table.c b/storage/myisam/mi_delete_table.c
index a3c0dede581..c4797187bec 100644
--- a/storage/myisam/mi_delete_table.c
+++ b/storage/myisam/mi_delete_table.c
@@ -19,6 +19,41 @@
#include "fulltext.h"
+
+/**
+ Remove MyISAM data/index file safely
+
+ @details
+ If name is a symlink and file it is pointing to is not in
+ data directory, file is also removed.
+
+ @param name file to remove
+
+ @returns
+ 0 on success or my_errno on failure
+*/
+
+static int _mi_safe_delete_file(const char *name)
+{
+ DBUG_ENTER("_mi_safe_delete_file");
+ if (my_is_symlink(name) && (*myisam_test_invalid_symlink)(name))
+ {
+ /*
+ Symlink is pointing to file in data directory.
+ Remove symlink, keep file.
+ */
+ if (my_delete(name, MYF(MY_WME)))
+ DBUG_RETURN(my_errno);
+ }
+ else
+ {
+ if (my_delete_with_symlink(name, MYF(MY_WME)))
+ DBUG_RETURN(my_errno);
+ }
+ DBUG_RETURN(0);
+}
+
+
int mi_delete_table(const char *name)
{
char from[FN_REFLEN];
@@ -58,12 +93,12 @@ int mi_delete_table(const char *name)
#endif /* USE_RAID */
fn_format(from,name,"",MI_NAME_IEXT,MY_UNPACK_FILENAME|MY_APPEND_EXT);
- if (my_delete_with_symlink(from, MYF(MY_WME)))
+ if (_mi_safe_delete_file(from))
DBUG_RETURN(my_errno);
fn_format(from,name,"",MI_NAME_DEXT,MY_UNPACK_FILENAME|MY_APPEND_EXT);
#ifdef USE_RAID
if (raid_type)
DBUG_RETURN(my_raid_delete(from, raid_chunks, MYF(MY_WME)) ? my_errno : 0);
#endif
- DBUG_RETURN(my_delete_with_symlink(from, MYF(MY_WME)) ? my_errno : 0);
+ DBUG_RETURN(_mi_safe_delete_file(from));
}
diff --git a/storage/myisam/mi_dynrec.c b/storage/myisam/mi_dynrec.c
index 374fcfd4ddf..adae7076858 100644
--- a/storage/myisam/mi_dynrec.c
+++ b/storage/myisam/mi_dynrec.c
@@ -94,6 +94,34 @@ my_bool mi_dynmap_file(MI_INFO *info, my_off_t size)
madvise((char*) info->s->file_map, size, MADV_RANDOM);
#endif
info->s->mmaped_length= size;
+ info->s->file_read= mi_mmap_pread;
+ info->s->file_write= mi_mmap_pwrite;
+ DBUG_RETURN(0);
+}
+
+
+/*
+ Destroy mmaped area for MyISAM handler
+
+ SYNOPSIS
+ mi_munmap_file()
+ info MyISAM handler
+
+ RETURN
+ 0 ok
+ !0 error.
+*/
+
+int mi_munmap_file(MI_INFO *info)
+{
+ int ret;
+ DBUG_ENTER("mi_unmap_file");
+ if ((ret= my_munmap(info->s->file_map, info->s->mmaped_length)))
+ DBUG_RETURN(ret);
+ info->s->file_read= mi_nommap_pread;
+ info->s->file_write= mi_nommap_pwrite;
+ info->s->file_map= 0;
+ info->s->mmaped_length= 0;
DBUG_RETURN(0);
}
@@ -112,8 +140,7 @@ void mi_remap_file(MI_INFO *info, my_off_t size)
{
if (info->s->file_map)
{
- VOID(my_munmap((char*) info->s->file_map,
- (size_t) info->s->mmaped_length));
+ mi_munmap_file(info);
mi_dynmap_file(info, size);
}
}
@@ -933,8 +960,16 @@ static int update_dynamic_record(MI_INFO *info, my_off_t filepos, uchar *record,
}
if (block_info.next_filepos != HA_OFFSET_ERROR)
+ {
+ /*
+ delete_dynamic_record() may change data file position.
+ IO cache must be notified as it may still have cached
+ data, which has to be flushed later.
+ */
+ info->rec_cache.seek_not_done= 1;
if (delete_dynamic_record(info,block_info.next_filepos,1))
goto err;
+ }
DBUG_RETURN(0);
err:
DBUG_RETURN(1);
diff --git a/storage/myisam/mi_extra.c b/storage/myisam/mi_extra.c
index 64e7a55231d..3b14e5eb98e 100644
--- a/storage/myisam/mi_extra.c
+++ b/storage/myisam/mi_extra.c
@@ -381,11 +381,6 @@ int mi_extra(MI_INFO *info, enum ha_extra_function function, void *extra_arg)
DBUG_PRINT("warning",("mmap failed: errno: %d",errno));
error= my_errno= errno;
}
- else
- {
- share->file_read= mi_mmap_pread;
- share->file_write= mi_mmap_pwrite;
- }
}
pthread_mutex_unlock(&share->intern_lock);
#endif
diff --git a/storage/myisam/mi_locking.c b/storage/myisam/mi_locking.c
index b935d517c81..baa87fc05ee 100644
--- a/storage/myisam/mi_locking.c
+++ b/storage/myisam/mi_locking.c
@@ -328,7 +328,6 @@ void mi_update_status(void* param)
(long) info->s->state.state.data_file_length));
#endif
info->s->state.state= *info->state;
- info->state= &info->s->state.state;
#ifdef HAVE_QUERY_CACHE
DBUG_PRINT("info", ("invalidator... '%s' (status update)",
info->filename));
@@ -336,6 +335,7 @@ void mi_update_status(void* param)
(*info->s->chst_invalidator)((const char *)info->filename);
#endif
}
+ info->state= &info->s->state.state;
info->append_insert_at_end= 0;
/*
diff --git a/storage/myisam/mi_open.c b/storage/myisam/mi_open.c
index 2ded6a8e8cd..7436548c7e1 100644
--- a/storage/myisam/mi_open.c
+++ b/storage/myisam/mi_open.c
@@ -663,6 +663,9 @@ MI_INFO *mi_open(const char *name, int mode, uint open_flags)
myisam_open_list=list_add(myisam_open_list,&m_info->open_list);
pthread_mutex_unlock(&THR_LOCK_myisam);
+
+ bzero(info.buff, share->base.max_key_block_length * 2);
+
if (myisam_log_file >= 0)
{
intern_filename(name_buff,share->index_file_name);
diff --git a/storage/myisam/mi_page.c b/storage/myisam/mi_page.c
index a05a96e514b..dd00d15b77d 100644
--- a/storage/myisam/mi_page.c
+++ b/storage/myisam/mi_page.c
@@ -86,13 +86,6 @@ int _mi_write_keypage(register MI_INFO *info, register MI_KEYDEF *keyinfo,
if ((length=keyinfo->block_length) > IO_SIZE*2 &&
info->state->key_file_length != page+length)
length= ((mi_getint(buff)+IO_SIZE-1) & (uint) ~(IO_SIZE-1));
-#ifdef HAVE_valgrind
- {
- length=mi_getint(buff);
- bzero((uchar*) buff+length,keyinfo->block_length-length);
- length=keyinfo->block_length;
- }
-#endif
DBUG_RETURN((key_cache_write(info->s->key_cache,
info->s->kfile,page, level, (uchar*) buff,length,
(uint) keyinfo->block_length,
diff --git a/storage/myisam/mi_rnext.c b/storage/myisam/mi_rnext.c
index 7ce66d41e0f..b9bbda3cacb 100644
--- a/storage/myisam/mi_rnext.c
+++ b/storage/myisam/mi_rnext.c
@@ -28,6 +28,7 @@ int mi_rnext(MI_INFO *info, uchar *buf, int inx)
{
int error,changed;
uint flag;
+ uint update_mask= HA_STATE_NEXT_FOUND;
DBUG_ENTER("mi_rnext");
if ((inx = _mi_check_index(info,inx)) < 0)
@@ -55,6 +56,20 @@ int mi_rnext(MI_INFO *info, uchar *buf, int inx)
info->s->state.key_root[inx]);
break;
}
+ /*
+ "search first" failed. This means we have no pivot for
+ "search next", or in other words MI_INFO::lastkey is
+ likely uninitialized.
+
+ Normally SQL layer would never request "search next" if
+ "search first" failed. But HANDLER may do anything.
+
+ As mi_rnext() without preceeding mi_rkey()/mi_rfirst()
+ equals to mi_rfirst(), we must restore original state
+ as if failing mi_rfirst() was not called.
+ */
+ if (error)
+ update_mask|= HA_STATE_PREV_FOUND;
}
else
{
@@ -100,7 +115,7 @@ int mi_rnext(MI_INFO *info, uchar *buf, int inx)
}
/* Don't clear if database-changed */
info->update&= (HA_STATE_CHANGED | HA_STATE_ROW_CHANGED);
- info->update|= HA_STATE_NEXT_FOUND;
+ info->update|= update_mask;
if (error)
{
diff --git a/storage/myisam/mi_write.c b/storage/myisam/mi_write.c
index 6ad23154b45..86c4df77817 100644
--- a/storage/myisam/mi_write.c
+++ b/storage/myisam/mi_write.c
@@ -830,7 +830,7 @@ static int _mi_balance_page(register MI_INFO *info, MI_KEYDEF *keyinfo,
(size_t) (length=new_left_length - left_length - k_length));
pos=buff+2+length;
memcpy((uchar*) father_key_pos,(uchar*) pos,(size_t) k_length);
- bmove((uchar*) buff+2,(uchar*) pos+k_length,new_right_length);
+ bmove((uchar*) buff + 2, (uchar*) pos + k_length, new_right_length - 2);
}
else
{ /* Move keys -> buff */
diff --git a/storage/myisam/myisamdef.h b/storage/myisam/myisamdef.h
index 7eb7089f38c..3262808803c 100644
--- a/storage/myisam/myisamdef.h
+++ b/storage/myisam/myisamdef.h
@@ -722,6 +722,7 @@ int mi_open_datafile(MI_INFO *info, MYISAM_SHARE *share, const char *orn_name,
int mi_open_keyfile(MYISAM_SHARE *share);
void mi_setup_functions(register MYISAM_SHARE *share);
my_bool mi_dynmap_file(MI_INFO *info, my_off_t size);
+int mi_munmap_file(MI_INFO *info);
void mi_remap_file(MI_INFO *info, my_off_t size);
/* Functions needed by mi_check */
diff --git a/storage/myisam/rt_index.c b/storage/myisam/rt_index.c
index 608f91ddf30..575cc32eb56 100644
--- a/storage/myisam/rt_index.c
+++ b/storage/myisam/rt_index.c
@@ -404,10 +404,16 @@ int rtree_get_first(MI_INFO *info, uint keynr, uint key_length)
int rtree_get_next(MI_INFO *info, uint keynr, uint key_length)
{
- my_off_t root;
+ my_off_t root= info->s->state.key_root[keynr];
MI_KEYDEF *keyinfo = info->s->keyinfo + keynr;
- if (!info->buff_used)
+ if (root == HA_OFFSET_ERROR)
+ {
+ my_errno= HA_ERR_END_OF_FILE;
+ return -1;
+ }
+
+ if (!info->buff_used && !info->page_changed)
{
uint k_len = keyinfo->keylength - info->s->base.rec_reflength;
/* rt_PAGE_NEXT_KEY(info->int_keypos) */
@@ -428,16 +434,8 @@ int rtree_get_next(MI_INFO *info, uint keynr, uint key_length)
return 0;
}
- else
- {
- if ((root = info->s->state.key_root[keynr]) == HA_OFFSET_ERROR)
- {
- my_errno= HA_ERR_END_OF_FILE;
- return -1;
- }
-
- return rtree_get_req(info, keyinfo, key_length, root, 0);
- }
+
+ return rtree_get_req(info, keyinfo, key_length, root, 0);
}
@@ -643,18 +641,12 @@ static int rtree_insert_level(MI_INFO *info, uint keynr, uchar *key,
}
case 1: /* root was split, grow a new root */
{
- uchar *new_root_buf;
+ uchar *new_root_buf= info->buff + info->s->base.max_key_block_length;
my_off_t new_root;
uchar *new_key;
uint nod_flag = info->s->base.key_reflength;
DBUG_PRINT("rtree", ("root was split, grow a new root"));
- if (!(new_root_buf = (uchar*)my_alloca((uint)keyinfo->block_length +
- HA_MAX_KEY_BUFF)))
- {
- my_errno = HA_ERR_OUT_OF_MEM;
- DBUG_RETURN(-1); /* purecov: inspected */
- }
mi_putint(new_root_buf, 2, nod_flag);
if ((new_root = _mi_new(info, keyinfo, DFLT_INIT_HITS)) ==
@@ -682,10 +674,8 @@ static int rtree_insert_level(MI_INFO *info, uint keynr, uchar *key,
DBUG_PRINT("rtree", ("new root page: %lu level: %d nod_flag: %u",
(ulong) new_root, 0, mi_test_if_nod(new_root_buf)));
- my_afree((uchar*)new_root_buf);
break;
err1:
- my_afree((uchar*)new_root_buf);
DBUG_RETURN(-1); /* purecov: inspected */
}
default:
diff --git a/storage/myisam/rt_split.c b/storage/myisam/rt_split.c
index ef988dbd048..88cf643faf9 100644
--- a/storage/myisam/rt_split.c
+++ b/storage/myisam/rt_split.c
@@ -258,7 +258,7 @@ int rtree_split_page(MI_INFO *info, MI_KEYDEF *keyinfo, uchar *page, uchar *key,
double *old_coord;
int n_dim;
uchar *source_cur, *cur1, *cur2;
- uchar *new_page;
+ uchar *new_page= info->buff;
int err_code= 0;
uint nod_flag= mi_test_if_nod(page);
uint full_length= key_length + (nod_flag ? nod_flag :
@@ -304,12 +304,7 @@ int rtree_split_page(MI_INFO *info, MI_KEYDEF *keyinfo, uchar *page, uchar *key,
goto split_err;
}
- if (!(new_page = (uchar*)my_alloca((uint)keyinfo->block_length)))
- {
- err_code= -1;
- goto split_err;
- }
-
+ info->buff_used= 1;
stop = task + (max_keys + 1);
cur1 = rt_PAGE_FIRST_KEY(page, nod_flag);
cur2 = rt_PAGE_FIRST_KEY(new_page, nod_flag);
@@ -345,8 +340,6 @@ int rtree_split_page(MI_INFO *info, MI_KEYDEF *keyinfo, uchar *page, uchar *key,
DFLT_INIT_HITS, new_page);
DBUG_PRINT("rtree", ("split new block: %lu", (ulong) *new_page_offs));
- my_afree((uchar*)new_page);
-
split_err:
my_afree((uchar*) coord_buf);
DBUG_RETURN(err_code);
diff --git a/storage/myisam/sort.c b/storage/myisam/sort.c
index 86e4c8351c4..fd0bd971e10 100644
--- a/storage/myisam/sort.c
+++ b/storage/myisam/sort.c
@@ -28,13 +28,11 @@
/* static variables */
-#undef MIN_SORT_MEMORY
#undef MYF_RW
#undef DISK_BUFFER_SIZE
#define MERGEBUFF 15
#define MERGEBUFF2 31
-#define MIN_SORT_MEMORY (4096-MALLOC_OVERHEAD)
#define MYF_RW MYF(MY_NABP | MY_WME | MY_WAIT_IF_FULL)
#define DISK_BUFFER_SIZE (IO_SIZE*16)
@@ -131,12 +129,12 @@ int _create_index_by_sort(MI_SORT_PARAM *info,my_bool no_messages,
sort_keys= (uchar **) NULL; error= 1;
maxbuffer=1;
- memavl=max(sortbuff_size,MIN_SORT_MEMORY);
+ memavl= max(sortbuff_size, MIN_SORT_BUFFER);
records= info->sort_info->max_records;
sort_length= info->key_length;
LINT_INIT(keys);
- while (memavl >= MIN_SORT_MEMORY)
+ while (memavl >= MIN_SORT_BUFFER)
{
if ((records < UINT_MAX32) &&
((my_off_t) (records + 1) *
@@ -171,10 +169,10 @@ int _create_index_by_sort(MI_SORT_PARAM *info,my_bool no_messages,
break;
}
old_memavl=memavl;
- if ((memavl=memavl/4*3) < MIN_SORT_MEMORY && old_memavl > MIN_SORT_MEMORY)
- memavl=MIN_SORT_MEMORY;
+ if ((memavl= memavl/4*3) < MIN_SORT_BUFFER && old_memavl > MIN_SORT_BUFFER)
+ memavl= MIN_SORT_BUFFER;
}
- if (memavl < MIN_SORT_MEMORY)
+ if (memavl < MIN_SORT_BUFFER)
{
mi_check_print_error(info->sort_info->param,"MyISAM sort buffer too small"); /* purecov: tested */
goto err; /* purecov: tested */
@@ -348,12 +346,12 @@ pthread_handler_t thr_find_all_keys(void *arg)
bzero((char*) &sort_param->unique, sizeof(sort_param->unique));
sort_keys= (uchar **) NULL;
- memavl= max(sort_param->sortbuff_size, MIN_SORT_MEMORY);
+ memavl= max(sort_param->sortbuff_size, MIN_SORT_BUFFER);
idx= (uint)sort_param->sort_info->max_records;
sort_length= sort_param->key_length;
maxbuffer= 1;
- while (memavl >= MIN_SORT_MEMORY)
+ while (memavl >= MIN_SORT_BUFFER)
{
if ((my_off_t) (idx+1)*(sort_length+sizeof(char*)) <=
(my_off_t) memavl)
@@ -391,11 +389,11 @@ pthread_handler_t thr_find_all_keys(void *arg)
break;
}
old_memavl= memavl;
- if ((memavl= memavl/4*3) < MIN_SORT_MEMORY &&
- old_memavl > MIN_SORT_MEMORY)
- memavl= MIN_SORT_MEMORY;
+ if ((memavl= memavl / 4 * 3) < MIN_SORT_BUFFER &&
+ old_memavl > MIN_SORT_BUFFER)
+ memavl= MIN_SORT_BUFFER;
}
- if (memavl < MIN_SORT_MEMORY)
+ if (memavl < MIN_SORT_BUFFER)
{
mi_check_print_error(sort_param->sort_info->param,
"MyISAM sort buffer too small");
@@ -506,7 +504,7 @@ int thr_write_keys(MI_SORT_PARAM *sort_param)
for (i= 0, sinfo= sort_param ;
i < sort_info->total_keys ;
- i++, rec_per_key_part+=sinfo->keyinfo->keysegs, sinfo++)
+ i++, sinfo++)
{
if (!sinfo->sort_keys)
{
@@ -529,11 +527,6 @@ int thr_write_keys(MI_SORT_PARAM *sort_param)
flush_ft_buf(sinfo) || flush_pending_blocks(sinfo))
got_error=1;
}
- if (!got_error && param->testflag & T_STATISTICS)
- update_key_parts(sinfo->keyinfo, rec_per_key_part, sinfo->unique,
- param->stats_method == MI_STATS_METHOD_IGNORE_NULLS?
- sinfo->notnull: NULL,
- (ulonglong) info->state->records);
}
my_free((uchar*) sinfo->sort_keys,MYF(0));
my_free(mi_get_rec_buff_ptr(info, sinfo->rec_buff),
@@ -547,7 +540,7 @@ int thr_write_keys(MI_SORT_PARAM *sort_param)
delete_dynamic(&sinfo->buffpek),
close_cached_file(&sinfo->tempfile),
close_cached_file(&sinfo->tempfile_for_exceptions),
- sinfo++)
+ rec_per_key_part+= sinfo->keyinfo->keysegs, sinfo++)
{
if (got_error)
continue;
@@ -569,7 +562,7 @@ int thr_write_keys(MI_SORT_PARAM *sort_param)
if (!mergebuf)
{
length=param->sort_buffer_length;
- while (length >= MIN_SORT_MEMORY)
+ while (length >= MIN_SORT_BUFFER)
{
if ((mergebuf= my_malloc(length, MYF(0))))
break;
@@ -639,6 +632,11 @@ int thr_write_keys(MI_SORT_PARAM *sort_param)
got_error=1;
}
}
+ if (!got_error && param->testflag & T_STATISTICS)
+ update_key_parts(sinfo->keyinfo, rec_per_key_part, sinfo->unique,
+ param->stats_method == MI_STATS_METHOD_IGNORE_NULLS ?
+ sinfo->notnull : NULL,
+ (ulonglong) info->state->records);
}
my_free((uchar*) mergebuf,MYF(MY_ALLOW_ZERO_PTR));
DBUG_RETURN(got_error);
diff --git a/storage/myisammrg/ha_myisammrg.cc b/storage/myisammrg/ha_myisammrg.cc
index 9dac355fd0a..bd9e7db600a 100644
--- a/storage/myisammrg/ha_myisammrg.cc
+++ b/storage/myisammrg/ha_myisammrg.cc
@@ -1,4 +1,4 @@
-/* Copyright (C) 2000-2006 MySQL AB
+/* Copyright (c) 2000, 2010 Oracle and/or its affiliates. All rights reserved.
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -216,36 +216,14 @@ const char *ha_myisammrg::index_type(uint key_number)
static int myisammrg_parent_open_callback(void *callback_param,
const char *filename)
{
- ha_myisammrg *ha_myrg;
- TABLE *parent;
+ ha_myisammrg *ha_myrg= (ha_myisammrg*) callback_param;
+ TABLE *parent= ha_myrg->table_ptr();
TABLE_LIST *child_l;
- const char *db;
- const char *table_name;
size_t dirlen;
char dir_path[FN_REFLEN];
+ char name_buf[NAME_LEN];
DBUG_ENTER("myisammrg_parent_open_callback");
- /* Extract child table name and database name from filename. */
- dirlen= dirname_length(filename);
- if (dirlen >= FN_REFLEN)
- {
- /* purecov: begin inspected */
- DBUG_PRINT("error", ("name too long: '%.64s'", filename));
- my_errno= ENAMETOOLONG;
- DBUG_RETURN(1);
- /* purecov: end */
- }
- table_name= filename + dirlen;
- dirlen--; /* Strip off trailing '/'. */
- memcpy(dir_path, filename, dirlen);
- dir_path[dirlen]= '\0';
- db= base_name(dir_path);
- dirlen-= db - dir_path; /* This is now the length of 'db'. */
- DBUG_PRINT("myrg", ("open: '%s'.'%s'", db, table_name));
-
- ha_myrg= (ha_myisammrg*) callback_param;
- parent= ha_myrg->table_ptr();
-
/* Get a TABLE_LIST object. */
if (!(child_l= (TABLE_LIST*) alloc_root(&parent->mem_root,
sizeof(TABLE_LIST))))
@@ -257,13 +235,69 @@ static int myisammrg_parent_open_callback(void *callback_param,
}
bzero((char*) child_l, sizeof(TABLE_LIST));
- /* Set database (schema) name. */
- child_l->db_length= dirlen;
- child_l->db= strmake_root(&parent->mem_root, db, dirlen);
- /* Set table name. */
- child_l->table_name_length= strlen(table_name);
- child_l->table_name= strmake_root(&parent->mem_root, table_name,
- child_l->table_name_length);
+ /*
+ Depending on MySQL version, filename may be encoded by table name to
+ file name encoding or not. Always encoded if parent table is created
+ by 5.1.46+. Encoded if parent is created by 5.1.6+ and child table is
+ in different database.
+ */
+ if (!has_path(filename))
+ {
+ /* Child is in the same database as parent. */
+ child_l->db_length= parent->s->db.length;
+ child_l->db= strmake_root(&parent->mem_root, parent->s->db.str,
+ child_l->db_length);
+ /* Child table name is encoded in parent dot-MRG starting with 5.1.46. */
+ if (parent->s->mysql_version >= 50146)
+ {
+ child_l->table_name_length= filename_to_tablename(filename, name_buf,
+ sizeof(name_buf));
+ child_l->table_name= strmake_root(&parent->mem_root, name_buf,
+ child_l->table_name_length);
+ }
+ else
+ {
+ child_l->table_name_length= strlen(filename);
+ child_l->table_name= strmake_root(&parent->mem_root, filename,
+ child_l->table_name_length);
+ }
+ }
+ else
+ {
+ DBUG_ASSERT(strlen(filename) < sizeof(dir_path));
+ fn_format(dir_path, filename, "", "", 0);
+ /* Extract child table name and database name from filename. */
+ dirlen= dirname_length(dir_path);
+ /* Child db/table name is encoded in parent dot-MRG starting with 5.1.6. */
+ if (parent->s->mysql_version >= 50106)
+ {
+ child_l->table_name_length= filename_to_tablename(dir_path + dirlen,
+ name_buf,
+ sizeof(name_buf));
+ child_l->table_name= strmake_root(&parent->mem_root, name_buf,
+ child_l->table_name_length);
+ dir_path[dirlen - 1]= 0;
+ dirlen= dirname_length(dir_path);
+ child_l->db_length= filename_to_tablename(dir_path + dirlen, name_buf,
+ sizeof(name_buf));
+ child_l->db= strmake_root(&parent->mem_root, name_buf, child_l->db_length);
+ }
+ else
+ {
+ child_l->table_name_length= strlen(dir_path + dirlen);
+ child_l->table_name= strmake_root(&parent->mem_root, dir_path + dirlen,
+ child_l->table_name_length);
+ dir_path[dirlen - 1]= 0;
+ dirlen= dirname_length(dir_path);
+ child_l->db_length= strlen(dir_path + dirlen);
+ child_l->db= strmake_root(&parent->mem_root, dir_path + dirlen,
+ child_l->db_length);
+ }
+ }
+
+ DBUG_PRINT("myrg", ("open: '%.*s'.'%.*s'", child_l->db_length, child_l->db,
+ child_l->table_name_length, child_l->table_name));
+
/* Convert to lowercase if required. */
if (lower_case_table_names && child_l->table_name_length)
child_l->table_name_length= my_casedn_str(files_charset_info,
@@ -395,13 +429,13 @@ static MI_INFO *myisammrg_attach_children_callback(void *callback_param)
@detail This function initializes the MERGE storage engine structures
and adds a child list of TABLE_LIST to the parent TABLE.
- @param[in] name MERGE table path name
- @param[in] mode read/write mode, unused
- @param[in] test_if_locked open flags
+ @param[in] name MERGE table path name
+ @param[in] mode read/write mode, unused
+ @param[in] test_if_locked_arg open flags
@return status
- @retval 0 OK
- @retval -1 Error, my_errno gives reason
+ @retval 0 OK
+ @retval -1 Error, my_errno gives reason
*/
int ha_myisammrg::open(const char *name, int mode __attribute__((unused)),
@@ -409,7 +443,7 @@ int ha_myisammrg::open(const char *name, int mode __attribute__((unused)),
{
DBUG_ENTER("ha_myisammrg::open");
DBUG_PRINT("myrg", ("name: '%s' table: 0x%lx", name, (long) table));
- DBUG_PRINT("myrg", ("test_if_locked: %u", test_if_locked_arg));
+ DBUG_PRINT("myrg", ("test_if_locked_arg: %u", test_if_locked_arg));
/* Save for later use. */
test_if_locked= test_if_locked_arg;
@@ -1135,7 +1169,7 @@ int ha_myisammrg::create(const char *name, register TABLE *form,
/* Create child path names. */
for (pos= table_names; tables; tables= tables->next_local)
{
- const char *table_name;
+ const char *table_name= buff;
/*
Construct the path to the MyISAM table. Try to meet two conditions:
@@ -1161,10 +1195,12 @@ int ha_myisammrg::create(const char *name, register TABLE *form,
as the MyISAM tables are from the same database as the MERGE table.
*/
if ((dirname_length(buff) == dirlgt) && ! memcmp(buff, name, dirlgt))
- table_name= tables->table_name;
- else
- if (! (table_name= thd->strmake(buff, length)))
- DBUG_RETURN(HA_ERR_OUT_OF_MEM); /* purecov: inspected */
+ {
+ table_name+= dirlgt;
+ length-= dirlgt;
+ }
+ if (!(table_name= thd->strmake(table_name, length)))
+ DBUG_RETURN(HA_ERR_OUT_OF_MEM); /* purecov: inspected */
*pos++= table_name;
}
@@ -1185,7 +1221,7 @@ void ha_myisammrg::append_create_info(String *packet)
const char *current_db;
size_t db_length;
THD *thd= current_thd;
- MYRG_TABLE *open_table, *first;
+ TABLE_LIST *open_table, *first;
if (file->merge_insert_method != MERGE_INSERT_DISABLED)
{
@@ -1203,14 +1239,11 @@ void ha_myisammrg::append_create_info(String *packet)
current_db= table->s->db.str;
db_length= table->s->db.length;
- for (first=open_table=file->open_tables ;
- open_table != file->end_table ;
- open_table++)
+ for (first= open_table= table->child_l;;
+ open_table= open_table->next_global)
{
- LEX_STRING db, name;
- LINT_INIT(db.str);
+ LEX_STRING db= { open_table->db, open_table->db_length };
- split_file_name(open_table->table->filename, &db, &name);
if (open_table != first)
packet->append(',');
/* Report database for mapped table if it isn't in current database */
@@ -1221,7 +1254,10 @@ void ha_myisammrg::append_create_info(String *packet)
append_identifier(thd, packet, db.str, db.length);
packet->append('.');
}
- append_identifier(thd, packet, name.str, name.length);
+ append_identifier(thd, packet, open_table->table_name,
+ open_table->table_name_length);
+ if (&open_table->next_global == table->child_last_l)
+ break;
}
packet->append(')');
}
diff --git a/storage/myisammrg/myrg_open.c b/storage/myisammrg/myrg_open.c
index 32ac719ec4d..ea306c5ba9c 100644
--- a/storage/myisammrg/myrg_open.c
+++ b/storage/myisammrg/myrg_open.c
@@ -312,14 +312,6 @@ MYRG_INFO *myrg_parent_open(const char *parent_name,
if (!child_name_buff[0] || (child_name_buff[0] == '#'))
continue;
- if (!has_path(child_name_buff))
- {
- VOID(strmake(parent_name_buff + dir_length, child_name_buff,
- sizeof(parent_name_buff) - 1 - dir_length));
- VOID(cleanup_dirname(child_name_buff, parent_name_buff));
- }
- else
- fn_format(child_name_buff, child_name_buff, "", "", 0);
DBUG_PRINT("info", ("child: '%s'", child_name_buff));
/* Callback registers child with handler table. */
diff --git a/storage/xtradb/handler/ha_innodb.cc b/storage/xtradb/handler/ha_innodb.cc
index c28e0c57d2b..8bb84d5026c 100644
--- a/storage/xtradb/handler/ha_innodb.cc
+++ b/storage/xtradb/handler/ha_innodb.cc
@@ -9076,6 +9076,11 @@ ha_innobase::get_auto_increment(
invoking this method. So we are not sure if it's guaranteed to
be 0 or not. */
+ /* We need the upper limit of the col type to check for
+ whether we update the table autoinc counter or not. */
+ ulonglong col_max_value = innobase_get_int_col_max_value(
+ table->next_number_field);
+
/* Called for the first time ? */
if (trx->n_autoinc_rows == 0) {
@@ -9092,6 +9097,11 @@ ha_innobase::get_auto_increment(
/* Not in the middle of a mult-row INSERT. */
} else if (prebuilt->autoinc_last_value == 0) {
set_if_bigger(*first_value, autoinc);
+ /* Check for -ve values. */
+ } else if (*first_value > col_max_value && trx->n_autoinc_rows > 0) {
+ /* Set to next logical value. */
+ ut_a(autoinc > trx->n_autoinc_rows);
+ *first_value = (autoinc - trx->n_autoinc_rows) - 1;
}
*nb_reserved_values = trx->n_autoinc_rows;
@@ -9102,12 +9112,6 @@ ha_innobase::get_auto_increment(
ulonglong need;
ulonglong current;
ulonglong next_value;
- ulonglong col_max_value;
-
- /* We need the upper limit of the col type to check for
- whether we update the table autoinc counter or not. */
- col_max_value = innobase_get_int_col_max_value(
- table->next_number_field);
current = *first_value > col_max_value ? autoinc : *first_value;
need = *nb_reserved_values * increment;