summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--include/mysql/plugin.h1
-rw-r--r--include/mysql/plugin.h.pp1
-rw-r--r--mysql-test/r/endspace.result18
-rw-r--r--mysql-test/r/rowid_order_innodb.result2
-rw-r--r--mysql-test/r/type_bit_innodb.result2
-rw-r--r--mysql-test/suite/innodb/r/innodb_gis.result2
-rw-r--r--mysql-test/suite/innodb/r/innodb_mysql.result38
-rw-r--r--mysql-test/suite/sys_vars/r/innodb_strict_mode_basic.result16
-rw-r--r--mysql-test/t/endspace.test6
-rw-r--r--sql/sql_class.cc5
-rw-r--r--sql/sql_class.h2
-rw-r--r--storage/innobase/btr/btr0cur.c176
-rw-r--r--storage/innobase/btr/btr0sea.c2
-rw-r--r--storage/innobase/buf/buf0flu.c33
-rw-r--r--storage/innobase/dict/dict0load.c26
-rw-r--r--storage/innobase/ha/ha0ha.c2
-rw-r--r--storage/innobase/handler/ha_innodb.cc16
-rw-r--r--storage/innobase/ibuf/ibuf0ibuf.c335
-rw-r--r--storage/innobase/include/btr0cur.h5
-rw-r--r--storage/innobase/include/btr0sea.h4
-rw-r--r--storage/innobase/include/dict0load.h16
-rw-r--r--storage/innobase/include/ha0ha.h2
-rw-r--r--storage/innobase/include/ha_prototypes.h8
-rw-r--r--storage/innobase/include/ibuf0ibuf.ic11
-rw-r--r--storage/innobase/include/univ.i6
-rw-r--r--storage/innobase/srv/srv0srv.c3
26 files changed, 498 insertions, 240 deletions
diff --git a/include/mysql/plugin.h b/include/mysql/plugin.h
index 19cf0ed050d..15f7d785ead 100644
--- a/include/mysql/plugin.h
+++ b/include/mysql/plugin.h
@@ -528,6 +528,7 @@ long long thd_test_options(const MYSQL_THD thd, long long test_options);
int thd_sql_command(const MYSQL_THD thd);
const char *thd_proc_info(MYSQL_THD thd, const char *info);
void **thd_ha_data(const MYSQL_THD thd, const struct handlerton *hton);
+void thd_storage_lock_wait(MYSQL_THD thd, long long value);
int thd_tx_isolation(const MYSQL_THD thd);
char *thd_security_context(MYSQL_THD thd, char *buffer, unsigned int length,
unsigned int max_query_len);
diff --git a/include/mysql/plugin.h.pp b/include/mysql/plugin.h.pp
index ce9902ee418..55d62b8daab 100644
--- a/include/mysql/plugin.h.pp
+++ b/include/mysql/plugin.h.pp
@@ -151,6 +151,7 @@ long long thd_test_options(const void* thd, long long test_options);
int thd_sql_command(const void* thd);
const char *thd_proc_info(void* thd, const char *info);
void **thd_ha_data(const void* thd, const struct handlerton *hton);
+void thd_storage_lock_wait(void* thd, long long value);
int thd_tx_isolation(const void* thd);
char *thd_security_context(void* thd, char *buffer, unsigned int length,
unsigned int max_query_len);
diff --git a/mysql-test/r/endspace.result b/mysql-test/r/endspace.result
index 9c8d12362c4..4eca88774b4 100644
--- a/mysql-test/r/endspace.result
+++ b/mysql-test/r/endspace.result
@@ -54,8 +54,8 @@ text1 like 'teststring_%' ORDER BY text1;
text1
teststring
teststring
-select concat('|', text1, '|') from t1 where text1='teststring' or text1 like 'teststring_%';
-concat('|', text1, '|')
+select concat('|', text1, '|') as c from t1 where text1='teststring' or text1 like 'teststring_%' order by c;
+c
|teststring |
|teststring|
select concat('|', text1, '|') from t1 where text1='teststring' or text1 > 'teststring\t';
@@ -105,11 +105,11 @@ select concat('|', text1, '|') from t1 where text1 like 'teststring_%';
concat('|', text1, '|')
|teststring |
|teststring |
-select concat('|', text1, '|') from t1 where text1='teststring' or text1 like 'teststring_%';
-concat('|', text1, '|')
+select concat('|', text1, '|') as c from t1 where text1='teststring' or text1 like 'teststring_%' order by c;
+c
|teststring |
-|teststring|
|teststring |
+|teststring|
select concat('|', text1, '|') from t1 where text1='teststring' or text1 > 'teststring\t';
concat('|', text1, '|')
|teststring|
@@ -123,8 +123,8 @@ concat('|', text1, '|')
drop table t1;
create table t1 (text1 varchar(32) not NULL, KEY key1 (text1)) pack_keys=0;
insert into t1 values ('teststring'), ('nothing'), ('teststring\t');
-select concat('|', text1, '|') from t1 where text1='teststring' or text1 like 'teststring_%';
-concat('|', text1, '|')
+select concat('|', text1, '|') as c from t1 where text1='teststring' or text1 like 'teststring_%' order by c;
+c
|teststring |
|teststring|
select concat('|', text1, '|') from t1 where text1='teststring' or text1 >= 'teststring\t';
@@ -203,13 +203,13 @@ teststring
teststring
select text1, length(text1) from t1 where text1='teststring' or text1 like 'teststring_%';
text1 length(text1)
-teststring 11
teststring 10
+teststring 11
teststring 11
select text1, length(text1) from t1 where text1='teststring' or text1 >= 'teststring\t';
text1 length(text1)
-teststring 11
teststring 10
+teststring 11
teststring 11
select concat('|', text1, '|') from t1 order by text1;
concat('|', text1, '|')
diff --git a/mysql-test/r/rowid_order_innodb.result b/mysql-test/r/rowid_order_innodb.result
index e0796cd7ab5..dc339304041 100644
--- a/mysql-test/r/rowid_order_innodb.result
+++ b/mysql-test/r/rowid_order_innodb.result
@@ -15,7 +15,7 @@ insert into t1 values (-5, 1, 1),
(10, 1, 1);
explain select * from t1 force index(key1, key2) where key1 < 3 or key2 < 3;
id select_type table type possible_keys key key_len ref rows Extra
-1 SIMPLE t1 index_merge key1,key2 key1,key2 5,5 NULL 4 Using sort_union(key1,key2); Using where
+1 SIMPLE t1 index_merge key1,key2 key1,key2 5,5 NULL 5 Using sort_union(key1,key2); Using where
select * from t1 force index(key1, key2) where key1 < 3 or key2 < 3;
pk1 key1 key2
-100 1 1
diff --git a/mysql-test/r/type_bit_innodb.result b/mysql-test/r/type_bit_innodb.result
index a9c3cae1770..909db576b27 100644
--- a/mysql-test/r/type_bit_innodb.result
+++ b/mysql-test/r/type_bit_innodb.result
@@ -233,7 +233,7 @@ a+0 b+0
127 403
explain select a+0, b+0 from t1 where a > 40 and b > 200 order by 1;
id select_type table type possible_keys key key_len ref rows Extra
-1 SIMPLE t1 range a a 2 NULL 19 Using where; Using index; Using filesort
+1 SIMPLE t1 range a a 2 NULL 27 Using where; Using index; Using filesort
select a+0, b+0 from t1 where a > 40 and b > 200 order by 1;
a+0 b+0
44 307
diff --git a/mysql-test/suite/innodb/r/innodb_gis.result b/mysql-test/suite/innodb/r/innodb_gis.result
index 0ce1ebe56ad..5712d08c9fa 100644
--- a/mysql-test/suite/innodb/r/innodb_gis.result
+++ b/mysql-test/suite/innodb/r/innodb_gis.result
@@ -572,7 +572,7 @@ COUNT(*)
EXPLAIN
SELECT COUNT(*) FROM t2 WHERE p=POINTFROMTEXT('POINT(1 2)');
id select_type table type possible_keys key key_len ref rows Extra
-1 SIMPLE t2 ref p p 28 const 1 Using where
+1 SIMPLE t2 ref p p 28 const 2 Using where
SELECT COUNT(*) FROM t2 WHERE p=POINTFROMTEXT('POINT(1 2)');
COUNT(*)
2
diff --git a/mysql-test/suite/innodb/r/innodb_mysql.result b/mysql-test/suite/innodb/r/innodb_mysql.result
index 9a677f83080..51beed66f0b 100644
--- a/mysql-test/suite/innodb/r/innodb_mysql.result
+++ b/mysql-test/suite/innodb/r/innodb_mysql.result
@@ -889,13 +889,13 @@ EXPLAIN SELECT * FROM t1 WHERE b BETWEEN 1 AND 2 ORDER BY a;
id 1
select_type SIMPLE
table t1
-type range
+type index
possible_keys bkey
-key bkey
-key_len 5
+key PRIMARY
+key_len 4
ref NULL
-rows 16
-Extra Using where; Using index; Using filesort
+rows 32
+Extra Using where
SELECT * FROM t1 WHERE b BETWEEN 1 AND 2 ORDER BY a;
a b
1 2
@@ -934,12 +934,12 @@ EXPLAIN SELECT * FROM t1 WHERE b BETWEEN 1 AND 2 ORDER BY b,a;
id 1
select_type SIMPLE
table t1
-type range
+type index
possible_keys bkey
key bkey
key_len 5
ref NULL
-rows 16
+rows 32
Extra Using where; Using index
SELECT * FROM t1 WHERE b BETWEEN 1 AND 2 ORDER BY b,a;
a b
@@ -989,7 +989,7 @@ possible_keys bkey
key bkey
key_len 5
ref const
-rows 8
+rows 16
Extra Using where; Using index; Using filesort
SELECT * FROM t2 WHERE b=1 ORDER BY a;
a b c
@@ -1018,7 +1018,7 @@ possible_keys bkey
key bkey
key_len 10
ref const,const
-rows 8
+rows 16
Extra Using where; Using index
SELECT * FROM t2 WHERE b=1 AND c=1 ORDER BY a;
a b c
@@ -1047,7 +1047,7 @@ possible_keys bkey
key bkey
key_len 10
ref const,const
-rows 8
+rows 16
Extra Using where; Using index
SELECT * FROM t2 WHERE b=1 AND c=1 ORDER BY b,c,a;
a b c
@@ -1076,7 +1076,7 @@ possible_keys bkey
key bkey
key_len 10
ref const,const
-rows 8
+rows 16
Extra Using where; Using index
SELECT * FROM t2 WHERE b=1 AND c=1 ORDER BY c,a;
a b c
@@ -1213,7 +1213,7 @@ possible_keys b
key b
key_len 5
ref const
-rows 1
+rows 2
Extra Using where; Using index
SELECT * FROM t1 WHERE b=2 ORDER BY a ASC;
a b
@@ -1228,7 +1228,7 @@ possible_keys b
key b
key_len 5
ref const
-rows 1
+rows 2
Extra Using where; Using index
SELECT * FROM t1 WHERE b=2 ORDER BY a DESC;
a b
@@ -1372,7 +1372,7 @@ INSERT INTO t1 (a,b,c) VALUES (1,1,1), (2,1,1), (3,1,1), (4,1,1);
INSERT INTO t1 (a,b,c) SELECT a+4,b,c FROM t1;
EXPLAIN SELECT a, b, c FROM t1 WHERE b = 1 ORDER BY a DESC LIMIT 5;
id select_type table type possible_keys key key_len ref rows Extra
-1 SIMPLE t1 index t1_b PRIMARY 4 NULL 8 Using where
+1 SIMPLE t1 range t1_b t1_b 5 NULL 8 Using where
SELECT a, b, c FROM t1 WHERE b = 1 ORDER BY a DESC LIMIT 5;
a b c
8 1 1
@@ -1735,7 +1735,7 @@ SELECT 1 FROM (SELECT COUNT(DISTINCT c1)
FROM t1 WHERE c2 IN (1, 1) AND c3 = 2 GROUP BY c2) x;
id select_type table type possible_keys key key_len ref rows Extra
1 PRIMARY <derived2> system NULL NULL NULL NULL 1
-2 DERIVED t1 index c3,c2 c2 10 NULL 5
+2 DERIVED t1 ALL c3,c2 c3 5 5 Using filesort
DROP TABLE t1;
CREATE TABLE t1 (c1 REAL, c2 REAL, c3 REAL, KEY (c3), KEY (c2, c3))
ENGINE=InnoDB;
@@ -1749,7 +1749,7 @@ SELECT 1 FROM (SELECT COUNT(DISTINCT c1)
FROM t1 WHERE c2 IN (1, 1) AND c3 = 2 GROUP BY c2) x;
id select_type table type possible_keys key key_len ref rows Extra
1 PRIMARY <derived2> system NULL NULL NULL NULL 1
-2 DERIVED t1 index c3,c2 c2 18 NULL 5
+2 DERIVED t1 ALL c3,c2 c3 9 5 Using filesort
DROP TABLE t1;
CREATE TABLE t1 (c1 DECIMAL(12,2), c2 DECIMAL(12,2), c3 DECIMAL(12,2),
KEY (c3), KEY (c2, c3))
@@ -1764,7 +1764,7 @@ SELECT 1 FROM (SELECT COUNT(DISTINCT c1)
FROM t1 WHERE c2 IN (1, 1) AND c3 = 2 GROUP BY c2) x;
id select_type table type possible_keys key key_len ref rows Extra
1 PRIMARY <derived2> system NULL NULL NULL NULL 1
-2 DERIVED t1 index c3,c2 c2 14 NULL 5
+2 DERIVED t1 ALL c3,c2 c3 7 5 Using filesort
DROP TABLE t1;
End of 5.1 tests
#
@@ -1871,7 +1871,7 @@ possible_keys b
key b
key_len 5
ref NULL
-rows 3
+rows 5
Extra Using where; Using index
EXPLAIN SELECT c FROM bar WHERE c>2;;
id 1
@@ -2536,7 +2536,7 @@ f1 f2 f3 f4
EXPLAIN SELECT * FROM t1 WHERE f2 = 1 AND f4 = TRUE
ORDER BY f1 DESC LIMIT 5;
id select_type table type possible_keys key key_len ref rows Extra
-1 SIMPLE t1 range f2,f4 f4 1 NULL 11 Using where
+1 SIMPLE t1 range f2,f4 f4 1 NULL 22 Using where
DROP TABLE t1;
#
# Bug#54117 crash in thr_multi_unlock, temporary table
diff --git a/mysql-test/suite/sys_vars/r/innodb_strict_mode_basic.result b/mysql-test/suite/sys_vars/r/innodb_strict_mode_basic.result
index 5e55faa99c9..200f9166215 100644
--- a/mysql-test/suite/sys_vars/r/innodb_strict_mode_basic.result
+++ b/mysql-test/suite/sys_vars/r/innodb_strict_mode_basic.result
@@ -1,32 +1,32 @@
SET @start_global_value = @@global.innodb_strict_mode;
SELECT @start_global_value;
@start_global_value
-1
+0
Valid values are 'ON' and 'OFF'
select @@global.innodb_strict_mode in (0, 1);
@@global.innodb_strict_mode in (0, 1)
1
select @@global.innodb_strict_mode;
@@global.innodb_strict_mode
-1
+0
select @@session.innodb_strict_mode in (0, 1);
@@session.innodb_strict_mode in (0, 1)
1
select @@session.innodb_strict_mode;
@@session.innodb_strict_mode
-1
+0
show global variables like 'innodb_strict_mode';
Variable_name Value
-innodb_strict_mode ON
+innodb_strict_mode OFF
show session variables like 'innodb_strict_mode';
Variable_name Value
-innodb_strict_mode ON
+innodb_strict_mode OFF
select * from information_schema.global_variables where variable_name='innodb_strict_mode';
VARIABLE_NAME VARIABLE_VALUE
-INNODB_STRICT_MODE ON
+INNODB_STRICT_MODE OFF
select * from information_schema.session_variables where variable_name='innodb_strict_mode';
VARIABLE_NAME VARIABLE_VALUE
-INNODB_STRICT_MODE ON
+INNODB_STRICT_MODE OFF
set global innodb_strict_mode='OFF';
set session innodb_strict_mode='OFF';
select @@global.innodb_strict_mode;
@@ -117,4 +117,4 @@ INNODB_STRICT_MODE ON
SET @@global.innodb_strict_mode = @start_global_value;
SELECT @@global.innodb_strict_mode;
@@global.innodb_strict_mode
-1
+0
diff --git a/mysql-test/t/endspace.test b/mysql-test/t/endspace.test
index b223c683cde..7c71b05f687 100644
--- a/mysql-test/t/endspace.test
+++ b/mysql-test/t/endspace.test
@@ -27,7 +27,7 @@ alter table t1 modify text1 char(32) binary not null;
check table t1;
select * from t1 ignore key (key1) where text1='teststring' or
text1 like 'teststring_%' ORDER BY text1;
-select concat('|', text1, '|') from t1 where text1='teststring' or text1 like 'teststring_%';
+select concat('|', text1, '|') as c from t1 where text1='teststring' or text1 like 'teststring_%' order by c;
select concat('|', text1, '|') from t1 where text1='teststring' or text1 > 'teststring\t';
select text1, length(text1) from t1 order by text1;
select text1, length(text1) from t1 order by binary text1;
@@ -44,14 +44,14 @@ select concat('|', text1, '|') from t1 where text1='teststring';
select concat('|', text1, '|') from t1 where text1='teststring ';
explain select concat('|', text1, '|') from t1 where text1='teststring ';
select concat('|', text1, '|') from t1 where text1 like 'teststring_%';
-select concat('|', text1, '|') from t1 where text1='teststring' or text1 like 'teststring_%';
+select concat('|', text1, '|') as c from t1 where text1='teststring' or text1 like 'teststring_%' order by c;
select concat('|', text1, '|') from t1 where text1='teststring' or text1 > 'teststring\t';
select concat('|', text1, '|') from t1 order by text1;
drop table t1;
create table t1 (text1 varchar(32) not NULL, KEY key1 (text1)) pack_keys=0;
insert into t1 values ('teststring'), ('nothing'), ('teststring\t');
-select concat('|', text1, '|') from t1 where text1='teststring' or text1 like 'teststring_%';
+select concat('|', text1, '|') as c from t1 where text1='teststring' or text1 like 'teststring_%' order by c;
select concat('|', text1, '|') from t1 where text1='teststring' or text1 >= 'teststring\t';
drop table t1;
diff --git a/sql/sql_class.cc b/sql/sql_class.cc
index 2fdf270ae66..08d06b07578 100644
--- a/sql/sql_class.cc
+++ b/sql/sql_class.cc
@@ -306,6 +306,11 @@ void **thd_ha_data(const THD *thd, const struct handlerton *hton)
return (void **) &thd->ha_data[hton->slot].ha_ptr;
}
+extern "C"
+void thd_storage_lock_wait(THD *thd, long long value)
+{
+ thd->utime_after_lock+= value;
+}
/**
Provide a handler data getter to simplify coding
diff --git a/sql/sql_class.h b/sql/sql_class.h
index 78eeda5843d..052537e521f 100644
--- a/sql/sql_class.h
+++ b/sql/sql_class.h
@@ -1501,7 +1501,7 @@ public:
// track down slow pthread_create
ulonglong prior_thr_create_utime, thr_create_utime;
ulonglong start_utime, utime_after_lock;
-
+
thr_lock_type update_lock_default;
Delayed_insert *di;
diff --git a/storage/innobase/btr/btr0cur.c b/storage/innobase/btr/btr0cur.c
index 537d5f51184..2549589b0c7 100644
--- a/storage/innobase/btr/btr0cur.c
+++ b/storage/innobase/btr/btr0cur.c
@@ -3153,6 +3153,7 @@ btr_cur_add_path_info(
{
btr_path_t* slot;
rec_t* rec;
+ page_t* page;
ut_a(cursor->path_arr);
@@ -3175,8 +3176,155 @@ btr_cur_add_path_info(
slot = cursor->path_arr + (root_height - height);
+ page = page_align(rec);
+
slot->nth_rec = page_rec_get_n_recs_before(rec);
- slot->n_recs = page_get_n_recs(page_align(rec));
+ slot->n_recs = page_get_n_recs(page);
+ slot->page_no = page_get_page_no(page);
+ slot->page_level = btr_page_get_level_low(page);
+}
+
+/*******************************************************************//**
+Estimate the number of rows between slot1 and slot2 for any level on a
+B-tree. This function starts from slot1->page and reads a few pages to
+the right, counting their records. If we reach slot2->page quickly then
+we know exactly how many records there are between slot1 and slot2 and
+we set is_n_rows_exact to TRUE. If we cannot reach slot2->page quickly
+then we calculate the average number of records in the pages scanned
+so far and assume that all pages that we did not scan up to slot2->page
+contain the same number of records, then we multiply that average to
+the number of pages between slot1->page and slot2->page (which is
+n_rows_on_prev_level). In this case we set is_n_rows_exact to FALSE.
+@return number of rows (exact or estimated) */
+static
+ib_int64_t
+btr_estimate_n_rows_in_range_on_level(
+/*==================================*/
+ dict_index_t* index, /*!< in: index */
+ btr_path_t* slot1, /*!< in: left border */
+ btr_path_t* slot2, /*!< in: right border */
+ ib_int64_t n_rows_on_prev_level, /*!< in: number of rows
+ on the previous level for the
+ same descend paths; used to
+ determine the numbe of pages
+ on this level */
+ ibool* is_n_rows_exact) /*!< out: TRUE if the returned
+ value is exact i.e. not an
+ estimation */
+{
+ ulint space;
+ ib_int64_t n_rows;
+ ulint n_pages_read;
+ ulint page_no;
+ ulint zip_size;
+ ulint level;
+
+ space = dict_index_get_space(index);
+
+ n_rows = 0;
+ n_pages_read = 0;
+
+ /* Assume by default that we will scan all pages between
+ slot1->page_no and slot2->page_no */
+ *is_n_rows_exact = TRUE;
+
+ /* add records from slot1->page_no which are to the right of
+ the record which serves as a left border of the range, if any */
+ if (slot1->nth_rec < slot1->n_recs) {
+ n_rows += slot1->n_recs - slot1->nth_rec;
+ }
+
+ /* add records from slot2->page_no which are to the left of
+ the record which servers as a right border of the range, if any */
+ if (slot2->nth_rec > 1) {
+ n_rows += slot2->nth_rec - 1;
+ }
+
+ /* count the records in the pages between slot1->page_no and
+ slot2->page_no (non inclusive), if any */
+
+ zip_size = fil_space_get_zip_size(space);
+
+ /* Do not read more than this number of pages in order not to hurt
+ performance with this code which is just an estimation. If we read
+ this many pages before reaching slot2->page_no then we estimate the
+ average from the pages scanned so far */
+ #define N_PAGES_READ_LIMIT 10
+
+ page_no = slot1->page_no;
+ level = slot1->page_level;
+
+ do {
+ mtr_t mtr;
+ page_t* page;
+ buf_block_t* block;
+
+ mtr_start(&mtr);
+
+ /* fetch the page */
+ block = buf_page_get(space, zip_size, page_no, RW_S_LATCH,
+ &mtr);
+
+ page = buf_block_get_frame(block);
+
+ /* It is possible that the tree has been reorganized in the
+ meantime and this is a different page. If this happens the
+ calculated estimate will be bogus, which is not fatal as
+ this is only an estimate. We are sure that a page with
+ page_no exists because InnoDB never frees pages, only
+ reuses them. */
+ if (fil_page_get_type(page) != FIL_PAGE_INDEX
+ || btr_page_get_index_id(page) != index->id
+ || btr_page_get_level_low(page) != level) {
+
+ /* The page got reused for something else */
+ goto inexact;
+ }
+
+ n_pages_read++;
+
+ if (page_no != slot1->page_no) {
+ /* Do not count the records on slot1->page_no,
+ we already counted them before this loop. */
+ n_rows += page_get_n_recs(page);
+ }
+
+ page_no = btr_page_get_next(page, &mtr);
+
+ mtr_commit(&mtr);
+
+ if (n_pages_read == N_PAGES_READ_LIMIT
+ || page_no == FIL_NULL) {
+ /* Either we read too many pages or
+ we reached the end of the level without passing
+ through slot2->page_no, the tree must have changed
+ in the meantime */
+ goto inexact;
+ }
+
+ } while (page_no != slot2->page_no);
+
+ return(n_rows);
+
+inexact:
+
+ *is_n_rows_exact = FALSE;
+
+ /* We did interrupt before reaching slot2->page */
+
+ if (n_pages_read > 0) {
+ /* The number of pages on this level is
+ n_rows_on_prev_level, multiply it by the
+ average number of recs per page so far */
+ n_rows = n_rows_on_prev_level
+ * n_rows / n_pages_read;
+ } else {
+ /* The tree changed before we could even
+ start with slot1->page_no */
+ n_rows = 10;
+ }
+
+ return(n_rows);
}
/*******************************************************************//**
@@ -3201,6 +3349,7 @@ btr_estimate_n_rows_in_range(
ibool diverged_lot;
ulint divergence_level;
ib_int64_t n_rows;
+ ibool is_n_rows_exact;
ulint i;
mtr_t mtr;
@@ -3243,6 +3392,7 @@ btr_estimate_n_rows_in_range(
/* We have the path information for the range in path1 and path2 */
n_rows = 1;
+ is_n_rows_exact = TRUE;
diverged = FALSE; /* This becomes true when the path is not
the same any more */
diverged_lot = FALSE; /* This becomes true when the paths are
@@ -3258,7 +3408,7 @@ btr_estimate_n_rows_in_range(
if (slot1->nth_rec == ULINT_UNDEFINED
|| slot2->nth_rec == ULINT_UNDEFINED) {
- if (i > divergence_level + 1) {
+ if (i > divergence_level + 1 && !is_n_rows_exact) {
/* In trees whose height is > 1 our algorithm
tends to underestimate: multiply the estimate
by 2: */
@@ -3270,7 +3420,9 @@ btr_estimate_n_rows_in_range(
to over 1 / 2 of the estimated rows in the whole
table */
- if (n_rows > index->table->stat_n_rows / 2) {
+ if (n_rows > index->table->stat_n_rows / 2
+ && !is_n_rows_exact) {
+
n_rows = index->table->stat_n_rows / 2;
/* If there are just 0 or 1 rows in the table,
@@ -3296,10 +3448,15 @@ btr_estimate_n_rows_in_range(
divergence_level = i;
}
} else {
- /* Maybe the tree has changed between
- searches */
-
- return(10);
+ /* It is possible that
+ slot1->nth_rec >= slot2->nth_rec
+ if, for example, we have a single page
+ tree which contains (inf, 5, 6, supr)
+ and we select where x > 20 and x < 30;
+ in this case slot1->nth_rec will point
+ to the supr record and slot2->nth_rec
+ will point to 6 */
+ n_rows = 0;
}
} else if (diverged && !diverged_lot) {
@@ -3323,8 +3480,9 @@ btr_estimate_n_rows_in_range(
}
} else if (diverged_lot) {
- n_rows = (n_rows * (slot1->n_recs + slot2->n_recs))
- / 2;
+ n_rows = btr_estimate_n_rows_in_range_on_level(
+ index, slot1, slot2, n_rows,
+ &is_n_rows_exact);
}
}
}
diff --git a/storage/innobase/btr/btr0sea.c b/storage/innobase/btr/btr0sea.c
index 06cc48c7c60..fb667bcae82 100644
--- a/storage/innobase/btr/btr0sea.c
+++ b/storage/innobase/btr/btr0sea.c
@@ -1746,6 +1746,7 @@ function_exit:
}
}
+#if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
/********************************************************************//**
Validates the search system.
@return TRUE if ok */
@@ -1913,3 +1914,4 @@ btr_search_validate(void)
return(ok);
}
+#endif /* defined UNIV_AHI_DEBUG || defined UNIV_DEBUG */
diff --git a/storage/innobase/buf/buf0flu.c b/storage/innobase/buf/buf0flu.c
index 3737627301f..4131d863e6a 100644
--- a/storage/innobase/buf/buf0flu.c
+++ b/storage/innobase/buf/buf0flu.c
@@ -1248,8 +1248,12 @@ buf_flush_try_neighbors(
/*====================*/
ulint space, /*!< in: space id */
ulint offset, /*!< in: page offset */
- enum buf_flush flush_type) /*!< in: BUF_FLUSH_LRU or
+ enum buf_flush flush_type, /*!< in: BUF_FLUSH_LRU or
BUF_FLUSH_LIST */
+ ulint n_flushed, /*!< in: number of pages
+ flushed so far in this batch */
+ ulint n_to_flush) /*!< in: maximum number of pages
+ we are allowed to flush */
{
ulint i;
ulint low;
@@ -1290,6 +1294,21 @@ buf_flush_try_neighbors(
buf_page_t* bpage;
+ if ((count + n_flushed) >= n_to_flush) {
+
+ /* We have already flushed enough pages and
+ should call it a day. There is, however, one
+ exception. If the page whose neighbors we
+ are flushing has not been flushed yet then
+ we'll try to flush the victim that we
+ selected originally. */
+ if (i <= offset) {
+ i = offset;
+ } else {
+ break;
+ }
+ }
+
buf_pool = buf_pool_get(space, i);
buf_pool_mutex_enter(buf_pool);
@@ -1357,6 +1376,8 @@ buf_flush_page_and_try_neighbors(
buf_page_in_file(bpage) */
enum buf_flush flush_type, /*!< in: BUF_FLUSH_LRU
or BUF_FLUSH_LIST */
+ ulint n_to_flush, /*!< in: number of pages to
+ flush */
ulint* count) /*!< in/out: number of pages
flushed */
{
@@ -1390,7 +1411,11 @@ buf_flush_page_and_try_neighbors(
mutex_exit(block_mutex);
/* Try to flush also all the neighbors */
- *count += buf_flush_try_neighbors(space, offset, flush_type);
+ *count += buf_flush_try_neighbors(space,
+ offset,
+ flush_type,
+ *count,
+ n_to_flush);
buf_pool_mutex_enter(buf_pool);
flushed = TRUE;
@@ -1430,7 +1455,7 @@ buf_flush_LRU_list_batch(
a page that isn't ready for flushing. */
while (bpage != NULL
&& !buf_flush_page_and_try_neighbors(
- bpage, BUF_FLUSH_LRU, &count)) {
+ bpage, BUF_FLUSH_LRU, max, &count)) {
bpage = UT_LIST_GET_PREV(LRU, bpage);
}
@@ -1511,7 +1536,7 @@ buf_flush_flush_list_batch(
while (bpage != NULL
&& len > 0
&& !buf_flush_page_and_try_neighbors(
- bpage, BUF_FLUSH_LIST, &count)) {
+ bpage, BUF_FLUSH_LIST, min_n, &count)) {
buf_flush_list_mutex_enter(buf_pool);
diff --git a/storage/innobase/dict/dict0load.c b/storage/innobase/dict/dict0load.c
index 20a18c72a39..6bd15f0556a 100644
--- a/storage/innobase/dict/dict0load.c
+++ b/storage/innobase/dict/dict0load.c
@@ -1175,23 +1175,23 @@ static const char* dict_load_index_id_err = "SYS_INDEXES.TABLE_ID mismatch";
/********************************************************************//**
Loads an index definition from a SYS_INDEXES record to dict_index_t.
-If "cached" is set to "TRUE", we will create a dict_index_t structure
-and fill it accordingly. Otherwise, the dict_index_t will
-be supplied by the caller and filled with information read from
-the record.
-@return error message, or NULL on success */
+If allocate=TRUE, we will create a dict_index_t structure and fill it
+accordingly. If allocated=FALSE, the dict_index_t will be supplied by
+the caller and filled with information read from the record. @return
+error message, or NULL on success */
UNIV_INTERN
const char*
dict_load_index_low(
/*================*/
byte* table_id, /*!< in/out: table id (8 bytes),
- an "in" value if cached=TRUE
- and "out" when cached=FALSE */
+ an "in" value if allocate=TRUE
+ and "out" when allocate=FALSE */
const char* table_name, /*!< in: table name */
mem_heap_t* heap, /*!< in/out: temporary memory heap */
const rec_t* rec, /*!< in: SYS_INDEXES record */
- ibool cached, /*!< in: TRUE = add to cache,
- FALSE = do not */
+ ibool allocate, /*!< in: TRUE=allocate *index,
+ FALSE=fill in a pre-allocated
+ *index */
dict_index_t** index) /*!< out,own: index, or NULL */
{
const byte* field;
@@ -1203,8 +1203,8 @@ dict_load_index_low(
ulint type;
ulint space;
- if (cached) {
- /* If "cached" is set to TRUE, no dict_index_t will
+ if (allocate) {
+ /* If allocate=TRUE, no dict_index_t will
be supplied. Initialize "*index" to NULL */
*index = NULL;
}
@@ -1223,7 +1223,7 @@ err_len:
return("incorrect column length in SYS_INDEXES");
}
- if (!cached) {
+ if (!allocate) {
/* We are reading a SYS_INDEXES record. Copy the table_id */
memcpy(table_id, (const char*)field, 8);
} else if (memcmp(field, table_id, 8)) {
@@ -1279,7 +1279,7 @@ err_len:
goto err_len;
}
- if (cached) {
+ if (allocate) {
*index = dict_mem_index_create(table_name, name_buf,
space, type, n_fields);
} else {
diff --git a/storage/innobase/ha/ha0ha.c b/storage/innobase/ha/ha0ha.c
index f9e798012f8..7f11917de0a 100644
--- a/storage/innobase/ha/ha0ha.c
+++ b/storage/innobase/ha/ha0ha.c
@@ -354,6 +354,7 @@ ha_remove_all_nodes_to_page(
#endif
}
+#if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
/*************************************************************//**
Validates a given range of the cells in hash table.
@return TRUE if ok */
@@ -400,6 +401,7 @@ ha_validate(
return(ok);
}
+#endif /* defined UNIV_AHI_DEBUG || defined UNIV_DEBUG */
/*************************************************************//**
Prints info of a hash table. */
diff --git a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc
index ab9df9a0272..a004cba9603 100644
--- a/storage/innobase/handler/ha_innodb.cc
+++ b/storage/innobase/handler/ha_innodb.cc
@@ -425,7 +425,7 @@ static MYSQL_THDVAR_BOOL(table_locks, PLUGIN_VAR_OPCMDARG,
static MYSQL_THDVAR_BOOL(strict_mode, PLUGIN_VAR_OPCMDARG,
"Use strict mode when evaluating create options.",
- NULL, NULL, TRUE);
+ NULL, NULL, FALSE);
static MYSQL_THDVAR_ULONG(lock_wait_timeout, PLUGIN_VAR_RQCMDARG,
"Timeout in seconds an InnoDB transaction may wait for a lock before being rolled back. Values above 100000000 disable the timeout.",
@@ -807,6 +807,20 @@ thd_lock_wait_timeout(
return(THDVAR((THD*) thd, lock_wait_timeout));
}
+/******************************************************************//**
+Set the time waited for the lock for the current query. */
+extern "C" UNIV_INTERN
+void
+thd_set_lock_wait_time(
+/*===================*/
+ void* thd, /*!< in: thread handle (THD*) */
+ ulint value) /*!< in: time waited for the lock */
+{
+ if (thd) {
+ thd_storage_lock_wait((THD*)thd, value);
+ }
+}
+
/********************************************************************//**
Obtain the InnoDB transaction of a MySQL thread.
@return reference to transaction pointer */
diff --git a/storage/innobase/ibuf/ibuf0ibuf.c b/storage/innobase/ibuf/ibuf0ibuf.c
index dc8e61e5070..a048de0e884 100644
--- a/storage/innobase/ibuf/ibuf0ibuf.c
+++ b/storage/innobase/ibuf/ibuf0ibuf.c
@@ -55,6 +55,7 @@ Created 7/19/1997 Heikki Tuuri
#include "lock0lock.h"
#include "log0recv.h"
#include "que0que.h"
+#include "srv0start.h" /* srv_shutdown_state */
/* STRUCTURE OF AN INSERT BUFFER RECORD
@@ -395,8 +396,10 @@ ibuf_tree_root_get(
mtr_t* mtr) /*!< in: mtr */
{
buf_block_t* block;
+ page_t* root;
ut_ad(ibuf_inside());
+ ut_ad(mutex_own(&ibuf_mutex));
mtr_x_lock(dict_index_get_lock(ibuf->index), mtr);
@@ -405,7 +408,13 @@ ibuf_tree_root_get(
buf_block_dbg_add_level(block, SYNC_TREE_NODE);
- return(buf_block_get_frame(block));
+ root = buf_block_get_frame(block);
+
+ ut_ad(page_get_space_id(root) == IBUF_SPACE_ID);
+ ut_ad(page_get_page_no(root) == FSP_IBUF_TREE_ROOT_PAGE_NO);
+ ut_ad(ibuf->empty == (page_get_n_recs(root) == 0));
+
+ return(root);
}
#ifdef UNIV_IBUF_COUNT_DEBUG
@@ -482,8 +491,6 @@ ibuf_size_update(
/* the '1 +' is the ibuf header page */
ibuf->size = ibuf->seg_size - (1 + ibuf->free_list_len);
-
- ibuf->empty = page_get_n_recs(root) == 0;
}
/******************************************************************//**
@@ -554,6 +561,7 @@ ibuf_init_at_db_start(void)
ibuf_size_update(root, &mtr);
mutex_exit(&ibuf_mutex);
+ ibuf->empty = (page_get_n_recs(root) == 0);
mtr_commit(&mtr);
ibuf_exit();
@@ -1350,10 +1358,18 @@ ibuf_add_ops(
const ulint* ops) /*!< in: operation counts */
{
+#ifndef HAVE_ATOMIC_BUILTINS
+ ut_ad(mutex_own(&ibuf_mutex));
+#endif /* !HAVE_ATOMIC_BUILTINS */
+
ulint i;
for (i = 0; i < IBUF_OP_COUNT; i++) {
+#ifdef HAVE_ATOMIC_BUILTINS
+ os_atomic_increment_ulint(&arr[i], ops[i]);
+#else /* HAVE_ATOMIC_BUILTINS */
arr[i] += ops[i];
+#endif /* HAVE_ATOMIC_BUILTINS */
}
}
@@ -2017,9 +2033,9 @@ ibuf_data_too_much_free(void)
/*********************************************************************//**
Allocates a new page from the ibuf file segment and adds it to the free
list.
-@return DB_SUCCESS, or DB_STRONG_FAIL if no space left */
+@return TRUE on success, FALSE if no space left */
static
-ulint
+ibool
ibuf_add_free_page(void)
/*====================*/
{
@@ -2055,10 +2071,10 @@ ibuf_add_free_page(void)
header_page + IBUF_HEADER + IBUF_TREE_SEG_HEADER, 0, FSP_UP,
&mtr);
- if (page_no == FIL_NULL) {
+ if (UNIV_UNLIKELY(page_no == FIL_NULL)) {
mtr_commit(&mtr);
- return(DB_STRONG_FAIL);
+ return(FALSE);
}
{
@@ -2096,16 +2112,16 @@ ibuf_add_free_page(void)
bitmap_page = ibuf_bitmap_get_map_page(
IBUF_SPACE_ID, page_no, zip_size, &mtr);
+ mutex_exit(&ibuf_mutex);
+
ibuf_bitmap_page_set_bits(
bitmap_page, page_no, zip_size, IBUF_BITMAP_IBUF, TRUE, &mtr);
mtr_commit(&mtr);
- mutex_exit(&ibuf_mutex);
-
ibuf_exit();
- return(DB_SUCCESS);
+ return(TRUE);
}
/*********************************************************************//**
@@ -2135,20 +2151,17 @@ ibuf_remove_free_page(void)
header_page = ibuf_header_page_get(&mtr);
/* Prevent pessimistic inserts to insert buffer trees for a while */
- mutex_enter(&ibuf_pessimistic_insert_mutex);
-
ibuf_enter();
-
+ mutex_enter(&ibuf_pessimistic_insert_mutex);
mutex_enter(&ibuf_mutex);
if (!ibuf_data_too_much_free()) {
mutex_exit(&ibuf_mutex);
+ mutex_exit(&ibuf_pessimistic_insert_mutex);
ibuf_exit();
- mutex_exit(&ibuf_pessimistic_insert_mutex);
-
mtr_commit(&mtr);
return;
@@ -2158,6 +2171,8 @@ ibuf_remove_free_page(void)
root = ibuf_tree_root_get(&mtr2);
+ mutex_exit(&ibuf_mutex);
+
page_no = flst_get_last(root + PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST,
&mtr2).page;
@@ -2166,7 +2181,6 @@ ibuf_remove_free_page(void)
is a level 2 page. */
mtr_commit(&mtr2);
- mutex_exit(&ibuf_mutex);
ibuf_exit();
@@ -2209,17 +2223,19 @@ ibuf_remove_free_page(void)
flst_remove(root + PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST,
page + PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST_NODE, &mtr);
+ mutex_exit(&ibuf_pessimistic_insert_mutex);
+
ibuf->seg_size--;
ibuf->free_list_len--;
- mutex_exit(&ibuf_pessimistic_insert_mutex);
-
/* Set the bit indicating that this page is no more an ibuf tree page
(level 2 page) */
bitmap_page = ibuf_bitmap_get_map_page(
IBUF_SPACE_ID, page_no, zip_size, &mtr);
+ mutex_exit(&ibuf_mutex);
+
ibuf_bitmap_page_set_bits(
bitmap_page, page_no, zip_size, IBUF_BITMAP_IBUF, FALSE, &mtr);
@@ -2228,8 +2244,6 @@ ibuf_remove_free_page(void)
#endif
mtr_commit(&mtr);
- mutex_exit(&ibuf_mutex);
-
ibuf_exit();
}
@@ -2270,17 +2284,16 @@ ibuf_free_excess_pages(void)
for (i = 0; i < 4; i++) {
- mutex_enter(&ibuf_mutex);
-
- if (!ibuf_data_too_much_free()) {
+ ibool too_much_free;
- mutex_exit(&ibuf_mutex);
+ mutex_enter(&ibuf_mutex);
+ too_much_free = ibuf_data_too_much_free();
+ mutex_exit(&ibuf_mutex);
+ if (!too_much_free) {
return;
}
- mutex_exit(&ibuf_mutex);
-
ibuf_remove_free_page();
}
}
@@ -2476,18 +2489,20 @@ ibuf_contract_ext(
ulint page_nos[IBUF_MAX_N_PAGES_MERGED];
ulint space_ids[IBUF_MAX_N_PAGES_MERGED];
ib_int64_t space_versions[IBUF_MAX_N_PAGES_MERGED];
- ulint n_stored;
ulint sum_sizes;
mtr_t mtr;
*n_pages = 0;
ut_ad(!ibuf_inside());
- mutex_enter(&ibuf_mutex);
+ /* We perform a dirty read of ibuf->empty, without latching
+ the insert buffer root page. We trust this dirty read except
+ when a slow shutdown is being executed. During a slow
+ shutdown, the insert buffer merge must be completed. */
- if (ibuf->empty) {
+ if (UNIV_UNLIKELY(ibuf->empty)
+ && UNIV_LIKELY(!srv_shutdown_state)) {
ibuf_is_empty:
- mutex_exit(&ibuf_mutex);
#if 0 /* TODO */
if (srv_shutdown_state) {
@@ -2519,13 +2534,14 @@ ibuf_is_empty:
ut_ad(page_validate(btr_pcur_get_page(&pcur), ibuf->index));
if (page_get_n_recs(btr_pcur_get_page(&pcur)) == 0) {
- /* When the ibuf tree is emptied completely, the last record
- is removed using an optimistic delete and ibuf_size_update
- is not called, causing ibuf->empty to remain FALSE. If we do
- not reset it to TRUE here then database shutdown will hang
- in the loop in ibuf_contract_for_n_pages. */
-
- ibuf->empty = TRUE;
+ /* If a B-tree page is empty, it must be the root page
+ and the whole B-tree must be empty. InnoDB does not
+ allow empty B-tree pages other than the root. */
+ ut_ad(ibuf->empty);
+ ut_ad(page_get_space_id(btr_pcur_get_page(&pcur))
+ == IBUF_SPACE_ID);
+ ut_ad(page_get_page_no(btr_pcur_get_page(&pcur))
+ == FSP_IBUF_TREE_ROOT_PAGE_NO);
ibuf_exit();
@@ -2535,14 +2551,12 @@ ibuf_is_empty:
goto ibuf_is_empty;
}
- mutex_exit(&ibuf_mutex);
-
sum_sizes = ibuf_get_merge_page_nos(TRUE, btr_pcur_get_rec(&pcur),
space_ids, space_versions,
- page_nos, &n_stored);
+ page_nos, n_pages);
#if 0 /* defined UNIV_IBUF_DEBUG */
fprintf(stderr, "Ibuf contract sync %lu pages %lu volume %lu\n",
- sync, n_stored, sum_sizes);
+ sync, *n_pages, sum_sizes);
#endif
ibuf_exit();
@@ -2550,8 +2564,7 @@ ibuf_is_empty:
btr_pcur_close(&pcur);
buf_read_ibuf_merge_pages(sync, space_ids, space_versions, page_nos,
- n_stored);
- *n_pages = n_stored;
+ *n_pages);
return(sum_sizes + 1);
}
@@ -2621,33 +2634,33 @@ ibuf_contract_after_insert(
ibool sync;
ulint sum_sizes;
ulint size;
-
- mutex_enter(&ibuf_mutex);
-
- if (ibuf->size < ibuf->max_size + IBUF_CONTRACT_ON_INSERT_NON_SYNC) {
- mutex_exit(&ibuf_mutex);
-
+ ulint max_size;
+
+ /* Perform dirty reads of ibuf->size and ibuf->max_size, to
+ reduce ibuf_mutex contention. ibuf->max_size remains constant
+ after ibuf_init_at_db_start(), but ibuf->size should be
+ protected by ibuf_mutex. Given that ibuf->size fits in a
+ machine word, this should be OK; at worst we are doing some
+ excessive ibuf_contract() or occasionally skipping a
+ ibuf_contract(). */
+ size = ibuf->size;
+ max_size = ibuf->max_size;
+
+ if (size < max_size + IBUF_CONTRACT_ON_INSERT_NON_SYNC) {
return;
}
- sync = FALSE;
-
- if (ibuf->size >= ibuf->max_size + IBUF_CONTRACT_ON_INSERT_SYNC) {
-
- sync = TRUE;
- }
-
- mutex_exit(&ibuf_mutex);
+ sync = (size >= max_size + IBUF_CONTRACT_ON_INSERT_SYNC);
/* Contract at least entry_size many bytes */
sum_sizes = 0;
size = 1;
- while ((size > 0) && (sum_sizes < entry_size)) {
+ do {
size = ibuf_contract(sync);
sum_sizes += size;
- }
+ } while (size > 0 && sum_sizes < entry_size);
}
/*********************************************************************//**
@@ -3265,7 +3278,7 @@ ibuf_set_entry_counter(
/*********************************************************************//**
Buffer an operation in the insert/delete buffer, instead of doing it
directly to the disk page, if this is possible.
-@return DB_SUCCESS, DB_FAIL, DB_STRONG_FAIL */
+@return DB_SUCCESS, DB_STRONG_FAIL or other error */
static
ulint
ibuf_insert_low(
@@ -3295,6 +3308,7 @@ ibuf_insert_low(
rec_t* ins_rec;
ibool old_bit_value;
page_t* bitmap_page;
+ buf_block_t* block;
page_t* root;
ulint err;
ibool do_merge;
@@ -3315,13 +3329,17 @@ ibuf_insert_low(
do_merge = FALSE;
- mutex_enter(&ibuf_mutex);
-
+ /* Perform dirty reads of ibuf->size and ibuf->max_size, to
+ reduce ibuf_mutex contention. ibuf->max_size remains constant
+ after ibuf_init_at_db_start(), but ibuf->size should be
+ protected by ibuf_mutex. Given that ibuf->size fits in a
+ machine word, this should be OK; at worst we are doing some
+ excessive ibuf_contract() or occasionally skipping a
+ ibuf_contract(). */
if (ibuf->size >= ibuf->max_size + IBUF_CONTRACT_DO_NOT_INSERT) {
/* Insert buffer is now too big, contract it but do not try
to insert */
- mutex_exit(&ibuf_mutex);
#ifdef UNIV_IBUF_DEBUG
fputs("Ibuf too big\n", stderr);
@@ -3332,40 +3350,6 @@ ibuf_insert_low(
return(DB_STRONG_FAIL);
}
- mutex_exit(&ibuf_mutex);
-
- if (mode == BTR_MODIFY_TREE) {
- mutex_enter(&ibuf_pessimistic_insert_mutex);
-
- ibuf_enter();
-
- mutex_enter(&ibuf_mutex);
-
- while (!ibuf_data_enough_free_for_insert()) {
-
- mutex_exit(&ibuf_mutex);
-
- ibuf_exit();
-
- mutex_exit(&ibuf_pessimistic_insert_mutex);
-
- err = ibuf_add_free_page();
-
- if (err == DB_STRONG_FAIL) {
-
- return(err);
- }
-
- mutex_enter(&ibuf_pessimistic_insert_mutex);
-
- ibuf_enter();
-
- mutex_enter(&ibuf_mutex);
- }
- } else {
- ibuf_enter();
- }
-
heap = mem_heap_create(512);
/* Build the entry which contains the space id and the page number
@@ -3384,6 +3368,31 @@ ibuf_insert_low(
the new entry to it without exceeding the free space limit for the
page. */
+ if (mode == BTR_MODIFY_TREE) {
+ for (;;) {
+ ibuf_enter();
+ mutex_enter(&ibuf_pessimistic_insert_mutex);
+ mutex_enter(&ibuf_mutex);
+
+ if (UNIV_LIKELY(ibuf_data_enough_free_for_insert())) {
+
+ break;
+ }
+
+ mutex_exit(&ibuf_mutex);
+ mutex_exit(&ibuf_pessimistic_insert_mutex);
+ ibuf_exit();
+
+ if (UNIV_UNLIKELY(!ibuf_add_free_page())) {
+
+ mem_heap_free(heap);
+ return(DB_STRONG_FAIL);
+ }
+ }
+ } else {
+ ibuf_enter();
+ }
+
mtr_start(&mtr);
btr_pcur_open(ibuf->index, ibuf_entry, PAGE_CUR_LE, mode, &pcur, &mtr);
@@ -3417,9 +3426,14 @@ ibuf_insert_low(
before mtr_commit(&mtr). We must not mtr_commit(&mtr)
until after the IBUF_OP_DELETE has been buffered. */
- err = DB_STRONG_FAIL;
+fail_exit:
+ if (mode == BTR_MODIFY_TREE) {
+ mutex_exit(&ibuf_mutex);
+ mutex_exit(&ibuf_pessimistic_insert_mutex);
+ }
- goto function_exit;
+ err = DB_STRONG_FAIL;
+ goto func_exit;
}
/* After this point, the page could still be loaded to the
@@ -3465,9 +3479,7 @@ ibuf_insert_low(
space_ids, space_versions,
page_nos, &n_stored);
- err = DB_STRONG_FAIL;
-
- goto function_exit;
+ goto fail_exit;
}
}
@@ -3478,11 +3490,9 @@ ibuf_insert_low(
&& !ibuf_set_entry_counter(ibuf_entry, space, page_no, &pcur,
mode == BTR_MODIFY_PREV, &mtr)) {
bitmap_fail:
- err = DB_STRONG_FAIL;
-
mtr_commit(&bitmap_mtr);
- goto function_exit;
+ goto fail_exit;
}
/* Set the bitmap bit denoting that the insert buffer contains
@@ -3506,10 +3516,19 @@ bitmap_fail:
err = btr_cur_optimistic_insert(BTR_NO_LOCKING_FLAG, cursor,
ibuf_entry, &ins_rec,
&dummy_big_rec, 0, thr, &mtr);
- if (err == DB_SUCCESS && op != IBUF_OP_DELETE) {
- /* Update the page max trx id field */
- page_update_max_trx_id(btr_cur_get_block(cursor), NULL,
- thr_get_trx(thr)->id, &mtr);
+ block = btr_cur_get_block(cursor);
+ ut_ad(buf_block_get_space(block) == IBUF_SPACE_ID);
+
+ /* If this is the root page, update ibuf->empty. */
+ if (UNIV_UNLIKELY(buf_block_get_page_no(block)
+ == FSP_IBUF_TREE_ROOT_PAGE_NO)) {
+ const page_t* root = buf_block_get_frame(block);
+
+ ut_ad(page_get_space_id(root) == IBUF_SPACE_ID);
+ ut_ad(page_get_page_no(root)
+ == FSP_IBUF_TREE_ROOT_PAGE_NO);
+
+ ibuf->empty = (page_get_n_recs(root) == 0);
}
} else {
ut_ad(mode == BTR_MODIFY_TREE);
@@ -3526,16 +3545,22 @@ bitmap_fail:
cursor,
ibuf_entry, &ins_rec,
&dummy_big_rec, 0, thr, &mtr);
- if (err == DB_SUCCESS && op != IBUF_OP_DELETE) {
- /* Update the page max trx id field */
- page_update_max_trx_id(btr_cur_get_block(cursor), NULL,
- thr_get_trx(thr)->id, &mtr);
- }
-
+ mutex_exit(&ibuf_pessimistic_insert_mutex);
ibuf_size_update(root, &mtr);
+ mutex_exit(&ibuf_mutex);
+ ibuf->empty = (page_get_n_recs(root) == 0);
+
+ block = btr_cur_get_block(cursor);
+ ut_ad(buf_block_get_space(block) == IBUF_SPACE_ID);
}
-function_exit:
+ if (err == DB_SUCCESS && op != IBUF_OP_DELETE) {
+ /* Update the page max trx id field */
+ page_update_max_trx_id(block, NULL,
+ thr_get_trx(thr)->id, &mtr);
+ }
+
+func_exit:
#ifdef UNIV_IBUF_COUNT_DEBUG
if (err == DB_SUCCESS) {
fprintf(stderr,
@@ -3547,11 +3572,6 @@ function_exit:
ibuf_count_get(space, page_no) + 1);
}
#endif
- if (mode == BTR_MODIFY_TREE) {
-
- mutex_exit(&ibuf_mutex);
- mutex_exit(&ibuf_pessimistic_insert_mutex);
- }
mtr_commit(&mtr);
btr_pcur_close(&pcur);
@@ -3559,16 +3579,8 @@ function_exit:
mem_heap_free(heap);
- if (err == DB_SUCCESS) {
- mutex_enter(&ibuf_mutex);
-
- ibuf->empty = FALSE;
-
- mutex_exit(&ibuf_mutex);
-
- if (mode == BTR_MODIFY_TREE) {
- ibuf_contract_after_insert(entry_size);
- }
+ if (err == DB_SUCCESS && mode == BTR_MODIFY_TREE) {
+ ibuf_contract_after_insert(entry_size);
}
if (do_merge) {
@@ -4075,6 +4087,22 @@ ibuf_delete_rec(
success = btr_cur_optimistic_delete(btr_pcur_get_btr_cur(pcur), mtr);
if (success) {
+ if (UNIV_UNLIKELY(!page_get_n_recs(btr_pcur_get_page(pcur)))) {
+ /* If a B-tree page is empty, it must be the root page
+ and the whole B-tree must be empty. InnoDB does not
+ allow empty B-tree pages other than the root. */
+ root = btr_pcur_get_page(pcur);
+
+ ut_ad(page_get_space_id(root) == IBUF_SPACE_ID);
+ ut_ad(page_get_page_no(root)
+ == FSP_IBUF_TREE_ROOT_PAGE_NO);
+
+ /* ibuf->empty is protected by the root page latch.
+ Before the deletion, it had to be FALSE. */
+ ut_ad(!ibuf->empty);
+ ibuf->empty = TRUE;
+ }
+
#ifdef UNIV_IBUF_COUNT_DEBUG
fprintf(stderr,
"Decrementing ibuf count of space %lu page %lu\n"
@@ -4102,6 +4130,7 @@ ibuf_delete_rec(
if (!ibuf_restore_pos(space, page_no, search_tuple,
BTR_MODIFY_TREE, pcur, mtr)) {
+ mutex_exit(&ibuf_mutex);
goto func_exit;
}
@@ -4115,13 +4144,14 @@ ibuf_delete_rec(
ibuf_count_set(space, page_no, ibuf_count_get(space, page_no) - 1);
#endif
ibuf_size_update(root, mtr);
+ mutex_exit(&ibuf_mutex);
+
+ ibuf->empty = (page_get_n_recs(root) == 0);
btr_pcur_commit_specify_mtr(pcur, mtr);
func_exit:
btr_pcur_close(pcur);
- mutex_exit(&ibuf_mutex);
-
return(TRUE);
}
@@ -4495,6 +4525,11 @@ reset_bit:
btr_pcur_close(&pcur);
mem_heap_free(heap);
+#ifdef HAVE_ATOMIC_BUILTINS
+ os_atomic_increment_ulint(&ibuf->n_merges, 1);
+ ibuf_add_ops(ibuf->n_merged_ops, mops);
+ ibuf_add_ops(ibuf->n_discarded_ops, dops);
+#else /* HAVE_ATOMIC_BUILTINS */
/* Protect our statistics keeping from race conditions */
mutex_enter(&ibuf_mutex);
@@ -4503,6 +4538,7 @@ reset_bit:
ibuf_add_ops(ibuf->n_discarded_ops, dops);
mutex_exit(&ibuf_mutex);
+#endif /* HAVE_ATOMIC_BUILTINS */
if (update_ibuf_bitmap && !tablespace_being_deleted) {
@@ -4604,10 +4640,14 @@ leave_loop:
mtr_commit(&mtr);
btr_pcur_close(&pcur);
+#ifdef HAVE_ATOMIC_BUILTINS
+ ibuf_add_ops(ibuf->n_discarded_ops, dops);
+#else /* HAVE_ATOMIC_BUILTINS */
/* Protect our statistics keeping from race conditions */
mutex_enter(&ibuf_mutex);
ibuf_add_ops(ibuf->n_discarded_ops, dops);
mutex_exit(&ibuf_mutex);
+#endif /* HAVE_ATOMIC_BUILTINS */
ibuf_exit();
@@ -4627,37 +4667,18 @@ ibuf_is_empty(void)
mtr_t mtr;
ibuf_enter();
-
- mutex_enter(&ibuf_mutex);
-
mtr_start(&mtr);
+ mutex_enter(&ibuf_mutex);
root = ibuf_tree_root_get(&mtr);
-
- if (page_get_n_recs(root) == 0) {
-
- is_empty = TRUE;
-
- if (ibuf->empty == FALSE) {
- fprintf(stderr,
- "InnoDB: Warning: insert buffer tree is empty"
- " but the data struct does not\n"
- "InnoDB: know it. This condition is legal"
- " if the master thread has not yet\n"
- "InnoDB: run to completion.\n");
- }
- } else {
- ut_a(ibuf->empty == FALSE);
-
- is_empty = FALSE;
- }
-
- mtr_commit(&mtr);
-
mutex_exit(&ibuf_mutex);
+ is_empty = (page_get_n_recs(root) == 0);
+ mtr_commit(&mtr);
ibuf_exit();
+ ut_a(is_empty == ibuf->empty);
+
return(is_empty);
}
diff --git a/storage/innobase/include/btr0cur.h b/storage/innobase/include/btr0cur.h
index 757477838ee..7cafa6e0df5 100644
--- a/storage/innobase/include/btr0cur.h
+++ b/storage/innobase/include/btr0cur.h
@@ -615,6 +615,11 @@ struct btr_path_struct{
order); value ULINT_UNDEFINED
denotes array end */
ulint n_recs; /*!< number of records on the page */
+ ulint page_no; /*!< no of the page containing the record */
+ ulint page_level; /*!< level of the page, if later we fetch
+ the page under page_no and it is no different
+ level then we know that the tree has been
+ reorganized */
};
#define BTR_PATH_ARRAY_N_SLOTS 250 /*!< size of path array (in slots) */
diff --git a/storage/innobase/include/btr0sea.h b/storage/innobase/include/btr0sea.h
index 20a2be7f877..6493689a969 100644
--- a/storage/innobase/include/btr0sea.h
+++ b/storage/innobase/include/btr0sea.h
@@ -180,6 +180,7 @@ btr_search_update_hash_on_delete(
btr_cur_t* cursor);/*!< in: cursor which was positioned on the
record to delete using btr_cur_search_...,
the record is not yet deleted */
+#if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
/********************************************************************//**
Validates the search system.
@return TRUE if ok */
@@ -187,6 +188,9 @@ UNIV_INTERN
ibool
btr_search_validate(void);
/*======================*/
+#else
+# define btr_search_validate() TRUE
+#endif /* defined UNIV_AHI_DEBUG || defined UNIV_DEBUG */
/** Flag: has the search system been enabled?
Protected by btr_search_latch and btr_search_enabled_mutex. */
diff --git a/storage/innobase/include/dict0load.h b/storage/innobase/include/dict0load.h
index 6a718a464ab..05d3532d59a 100644
--- a/storage/innobase/include/dict0load.h
+++ b/storage/innobase/include/dict0load.h
@@ -116,19 +116,23 @@ dict_load_column_low(
const rec_t* rec); /*!< in: SYS_COLUMNS record */
/********************************************************************//**
Loads an index definition from a SYS_INDEXES record to dict_index_t.
-@return error message, or NULL on success */
+If allocate=TRUE, we will create a dict_index_t structure and fill it
+accordingly. If allocated=FALSE, the dict_index_t will be supplied by
+the caller and filled with information read from the record. @return
+error message, or NULL on success */
UNIV_INTERN
const char*
dict_load_index_low(
/*================*/
- byte* table_id, /*!< in/out: table id (8 bytes_,
- an "in" value if cached=TRUE
- and "out" when cached=FALSE */
+ byte* table_id, /*!< in/out: table id (8 bytes),
+ an "in" value if allocate=TRUE
+ and "out" when allocate=FALSE */
const char* table_name, /*!< in: table name */
mem_heap_t* heap, /*!< in/out: temporary memory heap */
const rec_t* rec, /*!< in: SYS_INDEXES record */
- ibool cached, /*!< in: TRUE = add to cache
- FALSE = do not */
+ ibool allocate, /*!< in: TRUE=allocate *index,
+ FALSE=fill in a pre-allocated
+ *index */
dict_index_t** index); /*!< out,own: index, or NULL */
/********************************************************************//**
Loads an index field definition from a SYS_FIELDS record to
diff --git a/storage/innobase/include/ha0ha.h b/storage/innobase/include/ha0ha.h
index 1ffbd3440aa..3299000bf3c 100644
--- a/storage/innobase/include/ha0ha.h
+++ b/storage/innobase/include/ha0ha.h
@@ -186,6 +186,7 @@ ha_remove_all_nodes_to_page(
hash_table_t* table, /*!< in: hash table */
ulint fold, /*!< in: fold value */
const page_t* page); /*!< in: buffer page */
+#if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
/*************************************************************//**
Validates a given range of the cells in hash table.
@return TRUE if ok */
@@ -196,6 +197,7 @@ ha_validate(
hash_table_t* table, /*!< in: hash table */
ulint start_index, /*!< in: start index */
ulint end_index); /*!< in: end index */
+#endif /* defined UNIV_AHI_DEBUG || defined UNIV_DEBUG */
/*************************************************************//**
Prints info of a hash table. */
UNIV_INTERN
diff --git a/storage/innobase/include/ha_prototypes.h b/storage/innobase/include/ha_prototypes.h
index a9ee1d66b99..b75002944bd 100644
--- a/storage/innobase/include/ha_prototypes.h
+++ b/storage/innobase/include/ha_prototypes.h
@@ -267,5 +267,13 @@ thd_lock_wait_timeout(
/*==================*/
void* thd); /*!< in: thread handle (THD*), or NULL to query
the global innodb_lock_wait_timeout */
+/******************************************************************//**
+Add up the time waited for the lock for the current query. */
+UNIV_INTERN
+void
+thd_set_lock_wait_time(
+/*===================*/
+ void* thd, /*!< in: thread handle (THD*) */
+ ulint value); /*!< in: time waited for the lock */
#endif
diff --git a/storage/innobase/include/ibuf0ibuf.ic b/storage/innobase/include/ibuf0ibuf.ic
index aee27cf9739..e3fa6e3e929 100644
--- a/storage/innobase/include/ibuf0ibuf.ic
+++ b/storage/innobase/include/ibuf0ibuf.ic
@@ -46,11 +46,12 @@ struct ibuf_struct{
ulint seg_size; /*!< allocated pages of the file
segment containing ibuf header and
tree */
- ibool empty; /*!< after an insert to the ibuf tree
- is performed, this is set to FALSE,
- and if a contract operation finds
- the tree empty, this is set to
- TRUE */
+ ibool empty; /*!< Protected by the page
+ latch of the root page of the
+ insert buffer tree
+ (FSP_IBUF_TREE_ROOT_PAGE_NO). TRUE
+ if and only if the insert
+ buffer tree is empty. */
ulint free_list_len; /*!< length of the free list */
ulint height; /*!< tree height */
dict_index_t* index; /*!< insert buffer index */
diff --git a/storage/innobase/include/univ.i b/storage/innobase/include/univ.i
index ac87942f255..5a5af76e175 100644
--- a/storage/innobase/include/univ.i
+++ b/storage/innobase/include/univ.i
@@ -254,8 +254,10 @@ by one. */
option off; also some ibuf tests are suppressed */
/* Linkage specifier for non-static InnoDB symbols (variables and functions)
-that are only referenced from within InnoDB, not from MySQL */
-#if defined(__GNUC__) && (__GNUC__ >= 4) || defined(__INTEL_COMPILER)
+that are only referenced from within InnoDB, not from MySQL. We disable the
+GCC visibility directive on all Sun operating systems because there is no
+easy way to get it to work. See http://bugs.mysql.com/bug.php?id=52263. */
+#if defined(__GNUC__) && (__GNUC__ >= 4) && !defined(sun) || defined(__INTEL_COMPILER)
# define UNIV_INTERN __attribute__((visibility ("hidden")))
#else
# define UNIV_INTERN
diff --git a/storage/innobase/srv/srv0srv.c b/storage/innobase/srv/srv0srv.c
index 97d699dde99..bea8d7f8fdc 100644
--- a/storage/innobase/srv/srv0srv.c
+++ b/storage/innobase/srv/srv0srv.c
@@ -1643,6 +1643,9 @@ srv_suspend_mysql_thread(
start_time != -1 && finish_time != -1) {
srv_n_lock_max_wait_time = diff_time;
}
+
+ /* Record the lock wait time for this thread */
+ thd_set_lock_wait_time(trx->mysql_thd, diff_time);
}
if (trx->was_chosen_as_deadlock_victim) {