diff options
26 files changed, 498 insertions, 240 deletions
diff --git a/include/mysql/plugin.h b/include/mysql/plugin.h index 19cf0ed050d..15f7d785ead 100644 --- a/include/mysql/plugin.h +++ b/include/mysql/plugin.h @@ -528,6 +528,7 @@ long long thd_test_options(const MYSQL_THD thd, long long test_options); int thd_sql_command(const MYSQL_THD thd); const char *thd_proc_info(MYSQL_THD thd, const char *info); void **thd_ha_data(const MYSQL_THD thd, const struct handlerton *hton); +void thd_storage_lock_wait(MYSQL_THD thd, long long value); int thd_tx_isolation(const MYSQL_THD thd); char *thd_security_context(MYSQL_THD thd, char *buffer, unsigned int length, unsigned int max_query_len); diff --git a/include/mysql/plugin.h.pp b/include/mysql/plugin.h.pp index ce9902ee418..55d62b8daab 100644 --- a/include/mysql/plugin.h.pp +++ b/include/mysql/plugin.h.pp @@ -151,6 +151,7 @@ long long thd_test_options(const void* thd, long long test_options); int thd_sql_command(const void* thd); const char *thd_proc_info(void* thd, const char *info); void **thd_ha_data(const void* thd, const struct handlerton *hton); +void thd_storage_lock_wait(void* thd, long long value); int thd_tx_isolation(const void* thd); char *thd_security_context(void* thd, char *buffer, unsigned int length, unsigned int max_query_len); diff --git a/mysql-test/r/endspace.result b/mysql-test/r/endspace.result index 9c8d12362c4..4eca88774b4 100644 --- a/mysql-test/r/endspace.result +++ b/mysql-test/r/endspace.result @@ -54,8 +54,8 @@ text1 like 'teststring_%' ORDER BY text1; text1 teststring teststring -select concat('|', text1, '|') from t1 where text1='teststring' or text1 like 'teststring_%'; -concat('|', text1, '|') +select concat('|', text1, '|') as c from t1 where text1='teststring' or text1 like 'teststring_%' order by c; +c |teststring | |teststring| select concat('|', text1, '|') from t1 where text1='teststring' or text1 > 'teststring\t'; @@ -105,11 +105,11 @@ select concat('|', text1, '|') from t1 where text1 like 'teststring_%'; concat('|', text1, '|') |teststring | |teststring | -select concat('|', text1, '|') from t1 where text1='teststring' or text1 like 'teststring_%'; -concat('|', text1, '|') +select concat('|', text1, '|') as c from t1 where text1='teststring' or text1 like 'teststring_%' order by c; +c |teststring | -|teststring| |teststring | +|teststring| select concat('|', text1, '|') from t1 where text1='teststring' or text1 > 'teststring\t'; concat('|', text1, '|') |teststring| @@ -123,8 +123,8 @@ concat('|', text1, '|') drop table t1; create table t1 (text1 varchar(32) not NULL, KEY key1 (text1)) pack_keys=0; insert into t1 values ('teststring'), ('nothing'), ('teststring\t'); -select concat('|', text1, '|') from t1 where text1='teststring' or text1 like 'teststring_%'; -concat('|', text1, '|') +select concat('|', text1, '|') as c from t1 where text1='teststring' or text1 like 'teststring_%' order by c; +c |teststring | |teststring| select concat('|', text1, '|') from t1 where text1='teststring' or text1 >= 'teststring\t'; @@ -203,13 +203,13 @@ teststring teststring select text1, length(text1) from t1 where text1='teststring' or text1 like 'teststring_%'; text1 length(text1) -teststring 11 teststring 10 +teststring 11 teststring 11 select text1, length(text1) from t1 where text1='teststring' or text1 >= 'teststring\t'; text1 length(text1) -teststring 11 teststring 10 +teststring 11 teststring 11 select concat('|', text1, '|') from t1 order by text1; concat('|', text1, '|') diff --git a/mysql-test/r/rowid_order_innodb.result b/mysql-test/r/rowid_order_innodb.result index e0796cd7ab5..dc339304041 100644 --- a/mysql-test/r/rowid_order_innodb.result +++ b/mysql-test/r/rowid_order_innodb.result @@ -15,7 +15,7 @@ insert into t1 values (-5, 1, 1), (10, 1, 1); explain select * from t1 force index(key1, key2) where key1 < 3 or key2 < 3; id select_type table type possible_keys key key_len ref rows Extra -1 SIMPLE t1 index_merge key1,key2 key1,key2 5,5 NULL 4 Using sort_union(key1,key2); Using where +1 SIMPLE t1 index_merge key1,key2 key1,key2 5,5 NULL 5 Using sort_union(key1,key2); Using where select * from t1 force index(key1, key2) where key1 < 3 or key2 < 3; pk1 key1 key2 -100 1 1 diff --git a/mysql-test/r/type_bit_innodb.result b/mysql-test/r/type_bit_innodb.result index a9c3cae1770..909db576b27 100644 --- a/mysql-test/r/type_bit_innodb.result +++ b/mysql-test/r/type_bit_innodb.result @@ -233,7 +233,7 @@ a+0 b+0 127 403 explain select a+0, b+0 from t1 where a > 40 and b > 200 order by 1; id select_type table type possible_keys key key_len ref rows Extra -1 SIMPLE t1 range a a 2 NULL 19 Using where; Using index; Using filesort +1 SIMPLE t1 range a a 2 NULL 27 Using where; Using index; Using filesort select a+0, b+0 from t1 where a > 40 and b > 200 order by 1; a+0 b+0 44 307 diff --git a/mysql-test/suite/innodb/r/innodb_gis.result b/mysql-test/suite/innodb/r/innodb_gis.result index 0ce1ebe56ad..5712d08c9fa 100644 --- a/mysql-test/suite/innodb/r/innodb_gis.result +++ b/mysql-test/suite/innodb/r/innodb_gis.result @@ -572,7 +572,7 @@ COUNT(*) EXPLAIN SELECT COUNT(*) FROM t2 WHERE p=POINTFROMTEXT('POINT(1 2)'); id select_type table type possible_keys key key_len ref rows Extra -1 SIMPLE t2 ref p p 28 const 1 Using where +1 SIMPLE t2 ref p p 28 const 2 Using where SELECT COUNT(*) FROM t2 WHERE p=POINTFROMTEXT('POINT(1 2)'); COUNT(*) 2 diff --git a/mysql-test/suite/innodb/r/innodb_mysql.result b/mysql-test/suite/innodb/r/innodb_mysql.result index 9a677f83080..51beed66f0b 100644 --- a/mysql-test/suite/innodb/r/innodb_mysql.result +++ b/mysql-test/suite/innodb/r/innodb_mysql.result @@ -889,13 +889,13 @@ EXPLAIN SELECT * FROM t1 WHERE b BETWEEN 1 AND 2 ORDER BY a; id 1 select_type SIMPLE table t1 -type range +type index possible_keys bkey -key bkey -key_len 5 +key PRIMARY +key_len 4 ref NULL -rows 16 -Extra Using where; Using index; Using filesort +rows 32 +Extra Using where SELECT * FROM t1 WHERE b BETWEEN 1 AND 2 ORDER BY a; a b 1 2 @@ -934,12 +934,12 @@ EXPLAIN SELECT * FROM t1 WHERE b BETWEEN 1 AND 2 ORDER BY b,a; id 1 select_type SIMPLE table t1 -type range +type index possible_keys bkey key bkey key_len 5 ref NULL -rows 16 +rows 32 Extra Using where; Using index SELECT * FROM t1 WHERE b BETWEEN 1 AND 2 ORDER BY b,a; a b @@ -989,7 +989,7 @@ possible_keys bkey key bkey key_len 5 ref const -rows 8 +rows 16 Extra Using where; Using index; Using filesort SELECT * FROM t2 WHERE b=1 ORDER BY a; a b c @@ -1018,7 +1018,7 @@ possible_keys bkey key bkey key_len 10 ref const,const -rows 8 +rows 16 Extra Using where; Using index SELECT * FROM t2 WHERE b=1 AND c=1 ORDER BY a; a b c @@ -1047,7 +1047,7 @@ possible_keys bkey key bkey key_len 10 ref const,const -rows 8 +rows 16 Extra Using where; Using index SELECT * FROM t2 WHERE b=1 AND c=1 ORDER BY b,c,a; a b c @@ -1076,7 +1076,7 @@ possible_keys bkey key bkey key_len 10 ref const,const -rows 8 +rows 16 Extra Using where; Using index SELECT * FROM t2 WHERE b=1 AND c=1 ORDER BY c,a; a b c @@ -1213,7 +1213,7 @@ possible_keys b key b key_len 5 ref const -rows 1 +rows 2 Extra Using where; Using index SELECT * FROM t1 WHERE b=2 ORDER BY a ASC; a b @@ -1228,7 +1228,7 @@ possible_keys b key b key_len 5 ref const -rows 1 +rows 2 Extra Using where; Using index SELECT * FROM t1 WHERE b=2 ORDER BY a DESC; a b @@ -1372,7 +1372,7 @@ INSERT INTO t1 (a,b,c) VALUES (1,1,1), (2,1,1), (3,1,1), (4,1,1); INSERT INTO t1 (a,b,c) SELECT a+4,b,c FROM t1; EXPLAIN SELECT a, b, c FROM t1 WHERE b = 1 ORDER BY a DESC LIMIT 5; id select_type table type possible_keys key key_len ref rows Extra -1 SIMPLE t1 index t1_b PRIMARY 4 NULL 8 Using where +1 SIMPLE t1 range t1_b t1_b 5 NULL 8 Using where SELECT a, b, c FROM t1 WHERE b = 1 ORDER BY a DESC LIMIT 5; a b c 8 1 1 @@ -1735,7 +1735,7 @@ SELECT 1 FROM (SELECT COUNT(DISTINCT c1) FROM t1 WHERE c2 IN (1, 1) AND c3 = 2 GROUP BY c2) x; id select_type table type possible_keys key key_len ref rows Extra 1 PRIMARY <derived2> system NULL NULL NULL NULL 1 -2 DERIVED t1 index c3,c2 c2 10 NULL 5 +2 DERIVED t1 ALL c3,c2 c3 5 5 Using filesort DROP TABLE t1; CREATE TABLE t1 (c1 REAL, c2 REAL, c3 REAL, KEY (c3), KEY (c2, c3)) ENGINE=InnoDB; @@ -1749,7 +1749,7 @@ SELECT 1 FROM (SELECT COUNT(DISTINCT c1) FROM t1 WHERE c2 IN (1, 1) AND c3 = 2 GROUP BY c2) x; id select_type table type possible_keys key key_len ref rows Extra 1 PRIMARY <derived2> system NULL NULL NULL NULL 1 -2 DERIVED t1 index c3,c2 c2 18 NULL 5 +2 DERIVED t1 ALL c3,c2 c3 9 5 Using filesort DROP TABLE t1; CREATE TABLE t1 (c1 DECIMAL(12,2), c2 DECIMAL(12,2), c3 DECIMAL(12,2), KEY (c3), KEY (c2, c3)) @@ -1764,7 +1764,7 @@ SELECT 1 FROM (SELECT COUNT(DISTINCT c1) FROM t1 WHERE c2 IN (1, 1) AND c3 = 2 GROUP BY c2) x; id select_type table type possible_keys key key_len ref rows Extra 1 PRIMARY <derived2> system NULL NULL NULL NULL 1 -2 DERIVED t1 index c3,c2 c2 14 NULL 5 +2 DERIVED t1 ALL c3,c2 c3 7 5 Using filesort DROP TABLE t1; End of 5.1 tests # @@ -1871,7 +1871,7 @@ possible_keys b key b key_len 5 ref NULL -rows 3 +rows 5 Extra Using where; Using index EXPLAIN SELECT c FROM bar WHERE c>2;; id 1 @@ -2536,7 +2536,7 @@ f1 f2 f3 f4 EXPLAIN SELECT * FROM t1 WHERE f2 = 1 AND f4 = TRUE ORDER BY f1 DESC LIMIT 5; id select_type table type possible_keys key key_len ref rows Extra -1 SIMPLE t1 range f2,f4 f4 1 NULL 11 Using where +1 SIMPLE t1 range f2,f4 f4 1 NULL 22 Using where DROP TABLE t1; # # Bug#54117 crash in thr_multi_unlock, temporary table diff --git a/mysql-test/suite/sys_vars/r/innodb_strict_mode_basic.result b/mysql-test/suite/sys_vars/r/innodb_strict_mode_basic.result index 5e55faa99c9..200f9166215 100644 --- a/mysql-test/suite/sys_vars/r/innodb_strict_mode_basic.result +++ b/mysql-test/suite/sys_vars/r/innodb_strict_mode_basic.result @@ -1,32 +1,32 @@ SET @start_global_value = @@global.innodb_strict_mode; SELECT @start_global_value; @start_global_value -1 +0 Valid values are 'ON' and 'OFF' select @@global.innodb_strict_mode in (0, 1); @@global.innodb_strict_mode in (0, 1) 1 select @@global.innodb_strict_mode; @@global.innodb_strict_mode -1 +0 select @@session.innodb_strict_mode in (0, 1); @@session.innodb_strict_mode in (0, 1) 1 select @@session.innodb_strict_mode; @@session.innodb_strict_mode -1 +0 show global variables like 'innodb_strict_mode'; Variable_name Value -innodb_strict_mode ON +innodb_strict_mode OFF show session variables like 'innodb_strict_mode'; Variable_name Value -innodb_strict_mode ON +innodb_strict_mode OFF select * from information_schema.global_variables where variable_name='innodb_strict_mode'; VARIABLE_NAME VARIABLE_VALUE -INNODB_STRICT_MODE ON +INNODB_STRICT_MODE OFF select * from information_schema.session_variables where variable_name='innodb_strict_mode'; VARIABLE_NAME VARIABLE_VALUE -INNODB_STRICT_MODE ON +INNODB_STRICT_MODE OFF set global innodb_strict_mode='OFF'; set session innodb_strict_mode='OFF'; select @@global.innodb_strict_mode; @@ -117,4 +117,4 @@ INNODB_STRICT_MODE ON SET @@global.innodb_strict_mode = @start_global_value; SELECT @@global.innodb_strict_mode; @@global.innodb_strict_mode -1 +0 diff --git a/mysql-test/t/endspace.test b/mysql-test/t/endspace.test index b223c683cde..7c71b05f687 100644 --- a/mysql-test/t/endspace.test +++ b/mysql-test/t/endspace.test @@ -27,7 +27,7 @@ alter table t1 modify text1 char(32) binary not null; check table t1; select * from t1 ignore key (key1) where text1='teststring' or text1 like 'teststring_%' ORDER BY text1; -select concat('|', text1, '|') from t1 where text1='teststring' or text1 like 'teststring_%'; +select concat('|', text1, '|') as c from t1 where text1='teststring' or text1 like 'teststring_%' order by c; select concat('|', text1, '|') from t1 where text1='teststring' or text1 > 'teststring\t'; select text1, length(text1) from t1 order by text1; select text1, length(text1) from t1 order by binary text1; @@ -44,14 +44,14 @@ select concat('|', text1, '|') from t1 where text1='teststring'; select concat('|', text1, '|') from t1 where text1='teststring '; explain select concat('|', text1, '|') from t1 where text1='teststring '; select concat('|', text1, '|') from t1 where text1 like 'teststring_%'; -select concat('|', text1, '|') from t1 where text1='teststring' or text1 like 'teststring_%'; +select concat('|', text1, '|') as c from t1 where text1='teststring' or text1 like 'teststring_%' order by c; select concat('|', text1, '|') from t1 where text1='teststring' or text1 > 'teststring\t'; select concat('|', text1, '|') from t1 order by text1; drop table t1; create table t1 (text1 varchar(32) not NULL, KEY key1 (text1)) pack_keys=0; insert into t1 values ('teststring'), ('nothing'), ('teststring\t'); -select concat('|', text1, '|') from t1 where text1='teststring' or text1 like 'teststring_%'; +select concat('|', text1, '|') as c from t1 where text1='teststring' or text1 like 'teststring_%' order by c; select concat('|', text1, '|') from t1 where text1='teststring' or text1 >= 'teststring\t'; drop table t1; diff --git a/sql/sql_class.cc b/sql/sql_class.cc index 2fdf270ae66..08d06b07578 100644 --- a/sql/sql_class.cc +++ b/sql/sql_class.cc @@ -306,6 +306,11 @@ void **thd_ha_data(const THD *thd, const struct handlerton *hton) return (void **) &thd->ha_data[hton->slot].ha_ptr; } +extern "C" +void thd_storage_lock_wait(THD *thd, long long value) +{ + thd->utime_after_lock+= value; +} /** Provide a handler data getter to simplify coding diff --git a/sql/sql_class.h b/sql/sql_class.h index 78eeda5843d..052537e521f 100644 --- a/sql/sql_class.h +++ b/sql/sql_class.h @@ -1501,7 +1501,7 @@ public: // track down slow pthread_create ulonglong prior_thr_create_utime, thr_create_utime; ulonglong start_utime, utime_after_lock; - + thr_lock_type update_lock_default; Delayed_insert *di; diff --git a/storage/innobase/btr/btr0cur.c b/storage/innobase/btr/btr0cur.c index 537d5f51184..2549589b0c7 100644 --- a/storage/innobase/btr/btr0cur.c +++ b/storage/innobase/btr/btr0cur.c @@ -3153,6 +3153,7 @@ btr_cur_add_path_info( { btr_path_t* slot; rec_t* rec; + page_t* page; ut_a(cursor->path_arr); @@ -3175,8 +3176,155 @@ btr_cur_add_path_info( slot = cursor->path_arr + (root_height - height); + page = page_align(rec); + slot->nth_rec = page_rec_get_n_recs_before(rec); - slot->n_recs = page_get_n_recs(page_align(rec)); + slot->n_recs = page_get_n_recs(page); + slot->page_no = page_get_page_no(page); + slot->page_level = btr_page_get_level_low(page); +} + +/*******************************************************************//** +Estimate the number of rows between slot1 and slot2 for any level on a +B-tree. This function starts from slot1->page and reads a few pages to +the right, counting their records. If we reach slot2->page quickly then +we know exactly how many records there are between slot1 and slot2 and +we set is_n_rows_exact to TRUE. If we cannot reach slot2->page quickly +then we calculate the average number of records in the pages scanned +so far and assume that all pages that we did not scan up to slot2->page +contain the same number of records, then we multiply that average to +the number of pages between slot1->page and slot2->page (which is +n_rows_on_prev_level). In this case we set is_n_rows_exact to FALSE. +@return number of rows (exact or estimated) */ +static +ib_int64_t +btr_estimate_n_rows_in_range_on_level( +/*==================================*/ + dict_index_t* index, /*!< in: index */ + btr_path_t* slot1, /*!< in: left border */ + btr_path_t* slot2, /*!< in: right border */ + ib_int64_t n_rows_on_prev_level, /*!< in: number of rows + on the previous level for the + same descend paths; used to + determine the numbe of pages + on this level */ + ibool* is_n_rows_exact) /*!< out: TRUE if the returned + value is exact i.e. not an + estimation */ +{ + ulint space; + ib_int64_t n_rows; + ulint n_pages_read; + ulint page_no; + ulint zip_size; + ulint level; + + space = dict_index_get_space(index); + + n_rows = 0; + n_pages_read = 0; + + /* Assume by default that we will scan all pages between + slot1->page_no and slot2->page_no */ + *is_n_rows_exact = TRUE; + + /* add records from slot1->page_no which are to the right of + the record which serves as a left border of the range, if any */ + if (slot1->nth_rec < slot1->n_recs) { + n_rows += slot1->n_recs - slot1->nth_rec; + } + + /* add records from slot2->page_no which are to the left of + the record which servers as a right border of the range, if any */ + if (slot2->nth_rec > 1) { + n_rows += slot2->nth_rec - 1; + } + + /* count the records in the pages between slot1->page_no and + slot2->page_no (non inclusive), if any */ + + zip_size = fil_space_get_zip_size(space); + + /* Do not read more than this number of pages in order not to hurt + performance with this code which is just an estimation. If we read + this many pages before reaching slot2->page_no then we estimate the + average from the pages scanned so far */ + #define N_PAGES_READ_LIMIT 10 + + page_no = slot1->page_no; + level = slot1->page_level; + + do { + mtr_t mtr; + page_t* page; + buf_block_t* block; + + mtr_start(&mtr); + + /* fetch the page */ + block = buf_page_get(space, zip_size, page_no, RW_S_LATCH, + &mtr); + + page = buf_block_get_frame(block); + + /* It is possible that the tree has been reorganized in the + meantime and this is a different page. If this happens the + calculated estimate will be bogus, which is not fatal as + this is only an estimate. We are sure that a page with + page_no exists because InnoDB never frees pages, only + reuses them. */ + if (fil_page_get_type(page) != FIL_PAGE_INDEX + || btr_page_get_index_id(page) != index->id + || btr_page_get_level_low(page) != level) { + + /* The page got reused for something else */ + goto inexact; + } + + n_pages_read++; + + if (page_no != slot1->page_no) { + /* Do not count the records on slot1->page_no, + we already counted them before this loop. */ + n_rows += page_get_n_recs(page); + } + + page_no = btr_page_get_next(page, &mtr); + + mtr_commit(&mtr); + + if (n_pages_read == N_PAGES_READ_LIMIT + || page_no == FIL_NULL) { + /* Either we read too many pages or + we reached the end of the level without passing + through slot2->page_no, the tree must have changed + in the meantime */ + goto inexact; + } + + } while (page_no != slot2->page_no); + + return(n_rows); + +inexact: + + *is_n_rows_exact = FALSE; + + /* We did interrupt before reaching slot2->page */ + + if (n_pages_read > 0) { + /* The number of pages on this level is + n_rows_on_prev_level, multiply it by the + average number of recs per page so far */ + n_rows = n_rows_on_prev_level + * n_rows / n_pages_read; + } else { + /* The tree changed before we could even + start with slot1->page_no */ + n_rows = 10; + } + + return(n_rows); } /*******************************************************************//** @@ -3201,6 +3349,7 @@ btr_estimate_n_rows_in_range( ibool diverged_lot; ulint divergence_level; ib_int64_t n_rows; + ibool is_n_rows_exact; ulint i; mtr_t mtr; @@ -3243,6 +3392,7 @@ btr_estimate_n_rows_in_range( /* We have the path information for the range in path1 and path2 */ n_rows = 1; + is_n_rows_exact = TRUE; diverged = FALSE; /* This becomes true when the path is not the same any more */ diverged_lot = FALSE; /* This becomes true when the paths are @@ -3258,7 +3408,7 @@ btr_estimate_n_rows_in_range( if (slot1->nth_rec == ULINT_UNDEFINED || slot2->nth_rec == ULINT_UNDEFINED) { - if (i > divergence_level + 1) { + if (i > divergence_level + 1 && !is_n_rows_exact) { /* In trees whose height is > 1 our algorithm tends to underestimate: multiply the estimate by 2: */ @@ -3270,7 +3420,9 @@ btr_estimate_n_rows_in_range( to over 1 / 2 of the estimated rows in the whole table */ - if (n_rows > index->table->stat_n_rows / 2) { + if (n_rows > index->table->stat_n_rows / 2 + && !is_n_rows_exact) { + n_rows = index->table->stat_n_rows / 2; /* If there are just 0 or 1 rows in the table, @@ -3296,10 +3448,15 @@ btr_estimate_n_rows_in_range( divergence_level = i; } } else { - /* Maybe the tree has changed between - searches */ - - return(10); + /* It is possible that + slot1->nth_rec >= slot2->nth_rec + if, for example, we have a single page + tree which contains (inf, 5, 6, supr) + and we select where x > 20 and x < 30; + in this case slot1->nth_rec will point + to the supr record and slot2->nth_rec + will point to 6 */ + n_rows = 0; } } else if (diverged && !diverged_lot) { @@ -3323,8 +3480,9 @@ btr_estimate_n_rows_in_range( } } else if (diverged_lot) { - n_rows = (n_rows * (slot1->n_recs + slot2->n_recs)) - / 2; + n_rows = btr_estimate_n_rows_in_range_on_level( + index, slot1, slot2, n_rows, + &is_n_rows_exact); } } } diff --git a/storage/innobase/btr/btr0sea.c b/storage/innobase/btr/btr0sea.c index 06cc48c7c60..fb667bcae82 100644 --- a/storage/innobase/btr/btr0sea.c +++ b/storage/innobase/btr/btr0sea.c @@ -1746,6 +1746,7 @@ function_exit: } } +#if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG /********************************************************************//** Validates the search system. @return TRUE if ok */ @@ -1913,3 +1914,4 @@ btr_search_validate(void) return(ok); } +#endif /* defined UNIV_AHI_DEBUG || defined UNIV_DEBUG */ diff --git a/storage/innobase/buf/buf0flu.c b/storage/innobase/buf/buf0flu.c index 3737627301f..4131d863e6a 100644 --- a/storage/innobase/buf/buf0flu.c +++ b/storage/innobase/buf/buf0flu.c @@ -1248,8 +1248,12 @@ buf_flush_try_neighbors( /*====================*/ ulint space, /*!< in: space id */ ulint offset, /*!< in: page offset */ - enum buf_flush flush_type) /*!< in: BUF_FLUSH_LRU or + enum buf_flush flush_type, /*!< in: BUF_FLUSH_LRU or BUF_FLUSH_LIST */ + ulint n_flushed, /*!< in: number of pages + flushed so far in this batch */ + ulint n_to_flush) /*!< in: maximum number of pages + we are allowed to flush */ { ulint i; ulint low; @@ -1290,6 +1294,21 @@ buf_flush_try_neighbors( buf_page_t* bpage; + if ((count + n_flushed) >= n_to_flush) { + + /* We have already flushed enough pages and + should call it a day. There is, however, one + exception. If the page whose neighbors we + are flushing has not been flushed yet then + we'll try to flush the victim that we + selected originally. */ + if (i <= offset) { + i = offset; + } else { + break; + } + } + buf_pool = buf_pool_get(space, i); buf_pool_mutex_enter(buf_pool); @@ -1357,6 +1376,8 @@ buf_flush_page_and_try_neighbors( buf_page_in_file(bpage) */ enum buf_flush flush_type, /*!< in: BUF_FLUSH_LRU or BUF_FLUSH_LIST */ + ulint n_to_flush, /*!< in: number of pages to + flush */ ulint* count) /*!< in/out: number of pages flushed */ { @@ -1390,7 +1411,11 @@ buf_flush_page_and_try_neighbors( mutex_exit(block_mutex); /* Try to flush also all the neighbors */ - *count += buf_flush_try_neighbors(space, offset, flush_type); + *count += buf_flush_try_neighbors(space, + offset, + flush_type, + *count, + n_to_flush); buf_pool_mutex_enter(buf_pool); flushed = TRUE; @@ -1430,7 +1455,7 @@ buf_flush_LRU_list_batch( a page that isn't ready for flushing. */ while (bpage != NULL && !buf_flush_page_and_try_neighbors( - bpage, BUF_FLUSH_LRU, &count)) { + bpage, BUF_FLUSH_LRU, max, &count)) { bpage = UT_LIST_GET_PREV(LRU, bpage); } @@ -1511,7 +1536,7 @@ buf_flush_flush_list_batch( while (bpage != NULL && len > 0 && !buf_flush_page_and_try_neighbors( - bpage, BUF_FLUSH_LIST, &count)) { + bpage, BUF_FLUSH_LIST, min_n, &count)) { buf_flush_list_mutex_enter(buf_pool); diff --git a/storage/innobase/dict/dict0load.c b/storage/innobase/dict/dict0load.c index 20a18c72a39..6bd15f0556a 100644 --- a/storage/innobase/dict/dict0load.c +++ b/storage/innobase/dict/dict0load.c @@ -1175,23 +1175,23 @@ static const char* dict_load_index_id_err = "SYS_INDEXES.TABLE_ID mismatch"; /********************************************************************//** Loads an index definition from a SYS_INDEXES record to dict_index_t. -If "cached" is set to "TRUE", we will create a dict_index_t structure -and fill it accordingly. Otherwise, the dict_index_t will -be supplied by the caller and filled with information read from -the record. -@return error message, or NULL on success */ +If allocate=TRUE, we will create a dict_index_t structure and fill it +accordingly. If allocated=FALSE, the dict_index_t will be supplied by +the caller and filled with information read from the record. @return +error message, or NULL on success */ UNIV_INTERN const char* dict_load_index_low( /*================*/ byte* table_id, /*!< in/out: table id (8 bytes), - an "in" value if cached=TRUE - and "out" when cached=FALSE */ + an "in" value if allocate=TRUE + and "out" when allocate=FALSE */ const char* table_name, /*!< in: table name */ mem_heap_t* heap, /*!< in/out: temporary memory heap */ const rec_t* rec, /*!< in: SYS_INDEXES record */ - ibool cached, /*!< in: TRUE = add to cache, - FALSE = do not */ + ibool allocate, /*!< in: TRUE=allocate *index, + FALSE=fill in a pre-allocated + *index */ dict_index_t** index) /*!< out,own: index, or NULL */ { const byte* field; @@ -1203,8 +1203,8 @@ dict_load_index_low( ulint type; ulint space; - if (cached) { - /* If "cached" is set to TRUE, no dict_index_t will + if (allocate) { + /* If allocate=TRUE, no dict_index_t will be supplied. Initialize "*index" to NULL */ *index = NULL; } @@ -1223,7 +1223,7 @@ err_len: return("incorrect column length in SYS_INDEXES"); } - if (!cached) { + if (!allocate) { /* We are reading a SYS_INDEXES record. Copy the table_id */ memcpy(table_id, (const char*)field, 8); } else if (memcmp(field, table_id, 8)) { @@ -1279,7 +1279,7 @@ err_len: goto err_len; } - if (cached) { + if (allocate) { *index = dict_mem_index_create(table_name, name_buf, space, type, n_fields); } else { diff --git a/storage/innobase/ha/ha0ha.c b/storage/innobase/ha/ha0ha.c index f9e798012f8..7f11917de0a 100644 --- a/storage/innobase/ha/ha0ha.c +++ b/storage/innobase/ha/ha0ha.c @@ -354,6 +354,7 @@ ha_remove_all_nodes_to_page( #endif } +#if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG /*************************************************************//** Validates a given range of the cells in hash table. @return TRUE if ok */ @@ -400,6 +401,7 @@ ha_validate( return(ok); } +#endif /* defined UNIV_AHI_DEBUG || defined UNIV_DEBUG */ /*************************************************************//** Prints info of a hash table. */ diff --git a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc index ab9df9a0272..a004cba9603 100644 --- a/storage/innobase/handler/ha_innodb.cc +++ b/storage/innobase/handler/ha_innodb.cc @@ -425,7 +425,7 @@ static MYSQL_THDVAR_BOOL(table_locks, PLUGIN_VAR_OPCMDARG, static MYSQL_THDVAR_BOOL(strict_mode, PLUGIN_VAR_OPCMDARG, "Use strict mode when evaluating create options.", - NULL, NULL, TRUE); + NULL, NULL, FALSE); static MYSQL_THDVAR_ULONG(lock_wait_timeout, PLUGIN_VAR_RQCMDARG, "Timeout in seconds an InnoDB transaction may wait for a lock before being rolled back. Values above 100000000 disable the timeout.", @@ -807,6 +807,20 @@ thd_lock_wait_timeout( return(THDVAR((THD*) thd, lock_wait_timeout)); } +/******************************************************************//** +Set the time waited for the lock for the current query. */ +extern "C" UNIV_INTERN +void +thd_set_lock_wait_time( +/*===================*/ + void* thd, /*!< in: thread handle (THD*) */ + ulint value) /*!< in: time waited for the lock */ +{ + if (thd) { + thd_storage_lock_wait((THD*)thd, value); + } +} + /********************************************************************//** Obtain the InnoDB transaction of a MySQL thread. @return reference to transaction pointer */ diff --git a/storage/innobase/ibuf/ibuf0ibuf.c b/storage/innobase/ibuf/ibuf0ibuf.c index dc8e61e5070..a048de0e884 100644 --- a/storage/innobase/ibuf/ibuf0ibuf.c +++ b/storage/innobase/ibuf/ibuf0ibuf.c @@ -55,6 +55,7 @@ Created 7/19/1997 Heikki Tuuri #include "lock0lock.h" #include "log0recv.h" #include "que0que.h" +#include "srv0start.h" /* srv_shutdown_state */ /* STRUCTURE OF AN INSERT BUFFER RECORD @@ -395,8 +396,10 @@ ibuf_tree_root_get( mtr_t* mtr) /*!< in: mtr */ { buf_block_t* block; + page_t* root; ut_ad(ibuf_inside()); + ut_ad(mutex_own(&ibuf_mutex)); mtr_x_lock(dict_index_get_lock(ibuf->index), mtr); @@ -405,7 +408,13 @@ ibuf_tree_root_get( buf_block_dbg_add_level(block, SYNC_TREE_NODE); - return(buf_block_get_frame(block)); + root = buf_block_get_frame(block); + + ut_ad(page_get_space_id(root) == IBUF_SPACE_ID); + ut_ad(page_get_page_no(root) == FSP_IBUF_TREE_ROOT_PAGE_NO); + ut_ad(ibuf->empty == (page_get_n_recs(root) == 0)); + + return(root); } #ifdef UNIV_IBUF_COUNT_DEBUG @@ -482,8 +491,6 @@ ibuf_size_update( /* the '1 +' is the ibuf header page */ ibuf->size = ibuf->seg_size - (1 + ibuf->free_list_len); - - ibuf->empty = page_get_n_recs(root) == 0; } /******************************************************************//** @@ -554,6 +561,7 @@ ibuf_init_at_db_start(void) ibuf_size_update(root, &mtr); mutex_exit(&ibuf_mutex); + ibuf->empty = (page_get_n_recs(root) == 0); mtr_commit(&mtr); ibuf_exit(); @@ -1350,10 +1358,18 @@ ibuf_add_ops( const ulint* ops) /*!< in: operation counts */ { +#ifndef HAVE_ATOMIC_BUILTINS + ut_ad(mutex_own(&ibuf_mutex)); +#endif /* !HAVE_ATOMIC_BUILTINS */ + ulint i; for (i = 0; i < IBUF_OP_COUNT; i++) { +#ifdef HAVE_ATOMIC_BUILTINS + os_atomic_increment_ulint(&arr[i], ops[i]); +#else /* HAVE_ATOMIC_BUILTINS */ arr[i] += ops[i]; +#endif /* HAVE_ATOMIC_BUILTINS */ } } @@ -2017,9 +2033,9 @@ ibuf_data_too_much_free(void) /*********************************************************************//** Allocates a new page from the ibuf file segment and adds it to the free list. -@return DB_SUCCESS, or DB_STRONG_FAIL if no space left */ +@return TRUE on success, FALSE if no space left */ static -ulint +ibool ibuf_add_free_page(void) /*====================*/ { @@ -2055,10 +2071,10 @@ ibuf_add_free_page(void) header_page + IBUF_HEADER + IBUF_TREE_SEG_HEADER, 0, FSP_UP, &mtr); - if (page_no == FIL_NULL) { + if (UNIV_UNLIKELY(page_no == FIL_NULL)) { mtr_commit(&mtr); - return(DB_STRONG_FAIL); + return(FALSE); } { @@ -2096,16 +2112,16 @@ ibuf_add_free_page(void) bitmap_page = ibuf_bitmap_get_map_page( IBUF_SPACE_ID, page_no, zip_size, &mtr); + mutex_exit(&ibuf_mutex); + ibuf_bitmap_page_set_bits( bitmap_page, page_no, zip_size, IBUF_BITMAP_IBUF, TRUE, &mtr); mtr_commit(&mtr); - mutex_exit(&ibuf_mutex); - ibuf_exit(); - return(DB_SUCCESS); + return(TRUE); } /*********************************************************************//** @@ -2135,20 +2151,17 @@ ibuf_remove_free_page(void) header_page = ibuf_header_page_get(&mtr); /* Prevent pessimistic inserts to insert buffer trees for a while */ - mutex_enter(&ibuf_pessimistic_insert_mutex); - ibuf_enter(); - + mutex_enter(&ibuf_pessimistic_insert_mutex); mutex_enter(&ibuf_mutex); if (!ibuf_data_too_much_free()) { mutex_exit(&ibuf_mutex); + mutex_exit(&ibuf_pessimistic_insert_mutex); ibuf_exit(); - mutex_exit(&ibuf_pessimistic_insert_mutex); - mtr_commit(&mtr); return; @@ -2158,6 +2171,8 @@ ibuf_remove_free_page(void) root = ibuf_tree_root_get(&mtr2); + mutex_exit(&ibuf_mutex); + page_no = flst_get_last(root + PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST, &mtr2).page; @@ -2166,7 +2181,6 @@ ibuf_remove_free_page(void) is a level 2 page. */ mtr_commit(&mtr2); - mutex_exit(&ibuf_mutex); ibuf_exit(); @@ -2209,17 +2223,19 @@ ibuf_remove_free_page(void) flst_remove(root + PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST, page + PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST_NODE, &mtr); + mutex_exit(&ibuf_pessimistic_insert_mutex); + ibuf->seg_size--; ibuf->free_list_len--; - mutex_exit(&ibuf_pessimistic_insert_mutex); - /* Set the bit indicating that this page is no more an ibuf tree page (level 2 page) */ bitmap_page = ibuf_bitmap_get_map_page( IBUF_SPACE_ID, page_no, zip_size, &mtr); + mutex_exit(&ibuf_mutex); + ibuf_bitmap_page_set_bits( bitmap_page, page_no, zip_size, IBUF_BITMAP_IBUF, FALSE, &mtr); @@ -2228,8 +2244,6 @@ ibuf_remove_free_page(void) #endif mtr_commit(&mtr); - mutex_exit(&ibuf_mutex); - ibuf_exit(); } @@ -2270,17 +2284,16 @@ ibuf_free_excess_pages(void) for (i = 0; i < 4; i++) { - mutex_enter(&ibuf_mutex); - - if (!ibuf_data_too_much_free()) { + ibool too_much_free; - mutex_exit(&ibuf_mutex); + mutex_enter(&ibuf_mutex); + too_much_free = ibuf_data_too_much_free(); + mutex_exit(&ibuf_mutex); + if (!too_much_free) { return; } - mutex_exit(&ibuf_mutex); - ibuf_remove_free_page(); } } @@ -2476,18 +2489,20 @@ ibuf_contract_ext( ulint page_nos[IBUF_MAX_N_PAGES_MERGED]; ulint space_ids[IBUF_MAX_N_PAGES_MERGED]; ib_int64_t space_versions[IBUF_MAX_N_PAGES_MERGED]; - ulint n_stored; ulint sum_sizes; mtr_t mtr; *n_pages = 0; ut_ad(!ibuf_inside()); - mutex_enter(&ibuf_mutex); + /* We perform a dirty read of ibuf->empty, without latching + the insert buffer root page. We trust this dirty read except + when a slow shutdown is being executed. During a slow + shutdown, the insert buffer merge must be completed. */ - if (ibuf->empty) { + if (UNIV_UNLIKELY(ibuf->empty) + && UNIV_LIKELY(!srv_shutdown_state)) { ibuf_is_empty: - mutex_exit(&ibuf_mutex); #if 0 /* TODO */ if (srv_shutdown_state) { @@ -2519,13 +2534,14 @@ ibuf_is_empty: ut_ad(page_validate(btr_pcur_get_page(&pcur), ibuf->index)); if (page_get_n_recs(btr_pcur_get_page(&pcur)) == 0) { - /* When the ibuf tree is emptied completely, the last record - is removed using an optimistic delete and ibuf_size_update - is not called, causing ibuf->empty to remain FALSE. If we do - not reset it to TRUE here then database shutdown will hang - in the loop in ibuf_contract_for_n_pages. */ - - ibuf->empty = TRUE; + /* If a B-tree page is empty, it must be the root page + and the whole B-tree must be empty. InnoDB does not + allow empty B-tree pages other than the root. */ + ut_ad(ibuf->empty); + ut_ad(page_get_space_id(btr_pcur_get_page(&pcur)) + == IBUF_SPACE_ID); + ut_ad(page_get_page_no(btr_pcur_get_page(&pcur)) + == FSP_IBUF_TREE_ROOT_PAGE_NO); ibuf_exit(); @@ -2535,14 +2551,12 @@ ibuf_is_empty: goto ibuf_is_empty; } - mutex_exit(&ibuf_mutex); - sum_sizes = ibuf_get_merge_page_nos(TRUE, btr_pcur_get_rec(&pcur), space_ids, space_versions, - page_nos, &n_stored); + page_nos, n_pages); #if 0 /* defined UNIV_IBUF_DEBUG */ fprintf(stderr, "Ibuf contract sync %lu pages %lu volume %lu\n", - sync, n_stored, sum_sizes); + sync, *n_pages, sum_sizes); #endif ibuf_exit(); @@ -2550,8 +2564,7 @@ ibuf_is_empty: btr_pcur_close(&pcur); buf_read_ibuf_merge_pages(sync, space_ids, space_versions, page_nos, - n_stored); - *n_pages = n_stored; + *n_pages); return(sum_sizes + 1); } @@ -2621,33 +2634,33 @@ ibuf_contract_after_insert( ibool sync; ulint sum_sizes; ulint size; - - mutex_enter(&ibuf_mutex); - - if (ibuf->size < ibuf->max_size + IBUF_CONTRACT_ON_INSERT_NON_SYNC) { - mutex_exit(&ibuf_mutex); - + ulint max_size; + + /* Perform dirty reads of ibuf->size and ibuf->max_size, to + reduce ibuf_mutex contention. ibuf->max_size remains constant + after ibuf_init_at_db_start(), but ibuf->size should be + protected by ibuf_mutex. Given that ibuf->size fits in a + machine word, this should be OK; at worst we are doing some + excessive ibuf_contract() or occasionally skipping a + ibuf_contract(). */ + size = ibuf->size; + max_size = ibuf->max_size; + + if (size < max_size + IBUF_CONTRACT_ON_INSERT_NON_SYNC) { return; } - sync = FALSE; - - if (ibuf->size >= ibuf->max_size + IBUF_CONTRACT_ON_INSERT_SYNC) { - - sync = TRUE; - } - - mutex_exit(&ibuf_mutex); + sync = (size >= max_size + IBUF_CONTRACT_ON_INSERT_SYNC); /* Contract at least entry_size many bytes */ sum_sizes = 0; size = 1; - while ((size > 0) && (sum_sizes < entry_size)) { + do { size = ibuf_contract(sync); sum_sizes += size; - } + } while (size > 0 && sum_sizes < entry_size); } /*********************************************************************//** @@ -3265,7 +3278,7 @@ ibuf_set_entry_counter( /*********************************************************************//** Buffer an operation in the insert/delete buffer, instead of doing it directly to the disk page, if this is possible. -@return DB_SUCCESS, DB_FAIL, DB_STRONG_FAIL */ +@return DB_SUCCESS, DB_STRONG_FAIL or other error */ static ulint ibuf_insert_low( @@ -3295,6 +3308,7 @@ ibuf_insert_low( rec_t* ins_rec; ibool old_bit_value; page_t* bitmap_page; + buf_block_t* block; page_t* root; ulint err; ibool do_merge; @@ -3315,13 +3329,17 @@ ibuf_insert_low( do_merge = FALSE; - mutex_enter(&ibuf_mutex); - + /* Perform dirty reads of ibuf->size and ibuf->max_size, to + reduce ibuf_mutex contention. ibuf->max_size remains constant + after ibuf_init_at_db_start(), but ibuf->size should be + protected by ibuf_mutex. Given that ibuf->size fits in a + machine word, this should be OK; at worst we are doing some + excessive ibuf_contract() or occasionally skipping a + ibuf_contract(). */ if (ibuf->size >= ibuf->max_size + IBUF_CONTRACT_DO_NOT_INSERT) { /* Insert buffer is now too big, contract it but do not try to insert */ - mutex_exit(&ibuf_mutex); #ifdef UNIV_IBUF_DEBUG fputs("Ibuf too big\n", stderr); @@ -3332,40 +3350,6 @@ ibuf_insert_low( return(DB_STRONG_FAIL); } - mutex_exit(&ibuf_mutex); - - if (mode == BTR_MODIFY_TREE) { - mutex_enter(&ibuf_pessimistic_insert_mutex); - - ibuf_enter(); - - mutex_enter(&ibuf_mutex); - - while (!ibuf_data_enough_free_for_insert()) { - - mutex_exit(&ibuf_mutex); - - ibuf_exit(); - - mutex_exit(&ibuf_pessimistic_insert_mutex); - - err = ibuf_add_free_page(); - - if (err == DB_STRONG_FAIL) { - - return(err); - } - - mutex_enter(&ibuf_pessimistic_insert_mutex); - - ibuf_enter(); - - mutex_enter(&ibuf_mutex); - } - } else { - ibuf_enter(); - } - heap = mem_heap_create(512); /* Build the entry which contains the space id and the page number @@ -3384,6 +3368,31 @@ ibuf_insert_low( the new entry to it without exceeding the free space limit for the page. */ + if (mode == BTR_MODIFY_TREE) { + for (;;) { + ibuf_enter(); + mutex_enter(&ibuf_pessimistic_insert_mutex); + mutex_enter(&ibuf_mutex); + + if (UNIV_LIKELY(ibuf_data_enough_free_for_insert())) { + + break; + } + + mutex_exit(&ibuf_mutex); + mutex_exit(&ibuf_pessimistic_insert_mutex); + ibuf_exit(); + + if (UNIV_UNLIKELY(!ibuf_add_free_page())) { + + mem_heap_free(heap); + return(DB_STRONG_FAIL); + } + } + } else { + ibuf_enter(); + } + mtr_start(&mtr); btr_pcur_open(ibuf->index, ibuf_entry, PAGE_CUR_LE, mode, &pcur, &mtr); @@ -3417,9 +3426,14 @@ ibuf_insert_low( before mtr_commit(&mtr). We must not mtr_commit(&mtr) until after the IBUF_OP_DELETE has been buffered. */ - err = DB_STRONG_FAIL; +fail_exit: + if (mode == BTR_MODIFY_TREE) { + mutex_exit(&ibuf_mutex); + mutex_exit(&ibuf_pessimistic_insert_mutex); + } - goto function_exit; + err = DB_STRONG_FAIL; + goto func_exit; } /* After this point, the page could still be loaded to the @@ -3465,9 +3479,7 @@ ibuf_insert_low( space_ids, space_versions, page_nos, &n_stored); - err = DB_STRONG_FAIL; - - goto function_exit; + goto fail_exit; } } @@ -3478,11 +3490,9 @@ ibuf_insert_low( && !ibuf_set_entry_counter(ibuf_entry, space, page_no, &pcur, mode == BTR_MODIFY_PREV, &mtr)) { bitmap_fail: - err = DB_STRONG_FAIL; - mtr_commit(&bitmap_mtr); - goto function_exit; + goto fail_exit; } /* Set the bitmap bit denoting that the insert buffer contains @@ -3506,10 +3516,19 @@ bitmap_fail: err = btr_cur_optimistic_insert(BTR_NO_LOCKING_FLAG, cursor, ibuf_entry, &ins_rec, &dummy_big_rec, 0, thr, &mtr); - if (err == DB_SUCCESS && op != IBUF_OP_DELETE) { - /* Update the page max trx id field */ - page_update_max_trx_id(btr_cur_get_block(cursor), NULL, - thr_get_trx(thr)->id, &mtr); + block = btr_cur_get_block(cursor); + ut_ad(buf_block_get_space(block) == IBUF_SPACE_ID); + + /* If this is the root page, update ibuf->empty. */ + if (UNIV_UNLIKELY(buf_block_get_page_no(block) + == FSP_IBUF_TREE_ROOT_PAGE_NO)) { + const page_t* root = buf_block_get_frame(block); + + ut_ad(page_get_space_id(root) == IBUF_SPACE_ID); + ut_ad(page_get_page_no(root) + == FSP_IBUF_TREE_ROOT_PAGE_NO); + + ibuf->empty = (page_get_n_recs(root) == 0); } } else { ut_ad(mode == BTR_MODIFY_TREE); @@ -3526,16 +3545,22 @@ bitmap_fail: cursor, ibuf_entry, &ins_rec, &dummy_big_rec, 0, thr, &mtr); - if (err == DB_SUCCESS && op != IBUF_OP_DELETE) { - /* Update the page max trx id field */ - page_update_max_trx_id(btr_cur_get_block(cursor), NULL, - thr_get_trx(thr)->id, &mtr); - } - + mutex_exit(&ibuf_pessimistic_insert_mutex); ibuf_size_update(root, &mtr); + mutex_exit(&ibuf_mutex); + ibuf->empty = (page_get_n_recs(root) == 0); + + block = btr_cur_get_block(cursor); + ut_ad(buf_block_get_space(block) == IBUF_SPACE_ID); } -function_exit: + if (err == DB_SUCCESS && op != IBUF_OP_DELETE) { + /* Update the page max trx id field */ + page_update_max_trx_id(block, NULL, + thr_get_trx(thr)->id, &mtr); + } + +func_exit: #ifdef UNIV_IBUF_COUNT_DEBUG if (err == DB_SUCCESS) { fprintf(stderr, @@ -3547,11 +3572,6 @@ function_exit: ibuf_count_get(space, page_no) + 1); } #endif - if (mode == BTR_MODIFY_TREE) { - - mutex_exit(&ibuf_mutex); - mutex_exit(&ibuf_pessimistic_insert_mutex); - } mtr_commit(&mtr); btr_pcur_close(&pcur); @@ -3559,16 +3579,8 @@ function_exit: mem_heap_free(heap); - if (err == DB_SUCCESS) { - mutex_enter(&ibuf_mutex); - - ibuf->empty = FALSE; - - mutex_exit(&ibuf_mutex); - - if (mode == BTR_MODIFY_TREE) { - ibuf_contract_after_insert(entry_size); - } + if (err == DB_SUCCESS && mode == BTR_MODIFY_TREE) { + ibuf_contract_after_insert(entry_size); } if (do_merge) { @@ -4075,6 +4087,22 @@ ibuf_delete_rec( success = btr_cur_optimistic_delete(btr_pcur_get_btr_cur(pcur), mtr); if (success) { + if (UNIV_UNLIKELY(!page_get_n_recs(btr_pcur_get_page(pcur)))) { + /* If a B-tree page is empty, it must be the root page + and the whole B-tree must be empty. InnoDB does not + allow empty B-tree pages other than the root. */ + root = btr_pcur_get_page(pcur); + + ut_ad(page_get_space_id(root) == IBUF_SPACE_ID); + ut_ad(page_get_page_no(root) + == FSP_IBUF_TREE_ROOT_PAGE_NO); + + /* ibuf->empty is protected by the root page latch. + Before the deletion, it had to be FALSE. */ + ut_ad(!ibuf->empty); + ibuf->empty = TRUE; + } + #ifdef UNIV_IBUF_COUNT_DEBUG fprintf(stderr, "Decrementing ibuf count of space %lu page %lu\n" @@ -4102,6 +4130,7 @@ ibuf_delete_rec( if (!ibuf_restore_pos(space, page_no, search_tuple, BTR_MODIFY_TREE, pcur, mtr)) { + mutex_exit(&ibuf_mutex); goto func_exit; } @@ -4115,13 +4144,14 @@ ibuf_delete_rec( ibuf_count_set(space, page_no, ibuf_count_get(space, page_no) - 1); #endif ibuf_size_update(root, mtr); + mutex_exit(&ibuf_mutex); + + ibuf->empty = (page_get_n_recs(root) == 0); btr_pcur_commit_specify_mtr(pcur, mtr); func_exit: btr_pcur_close(pcur); - mutex_exit(&ibuf_mutex); - return(TRUE); } @@ -4495,6 +4525,11 @@ reset_bit: btr_pcur_close(&pcur); mem_heap_free(heap); +#ifdef HAVE_ATOMIC_BUILTINS + os_atomic_increment_ulint(&ibuf->n_merges, 1); + ibuf_add_ops(ibuf->n_merged_ops, mops); + ibuf_add_ops(ibuf->n_discarded_ops, dops); +#else /* HAVE_ATOMIC_BUILTINS */ /* Protect our statistics keeping from race conditions */ mutex_enter(&ibuf_mutex); @@ -4503,6 +4538,7 @@ reset_bit: ibuf_add_ops(ibuf->n_discarded_ops, dops); mutex_exit(&ibuf_mutex); +#endif /* HAVE_ATOMIC_BUILTINS */ if (update_ibuf_bitmap && !tablespace_being_deleted) { @@ -4604,10 +4640,14 @@ leave_loop: mtr_commit(&mtr); btr_pcur_close(&pcur); +#ifdef HAVE_ATOMIC_BUILTINS + ibuf_add_ops(ibuf->n_discarded_ops, dops); +#else /* HAVE_ATOMIC_BUILTINS */ /* Protect our statistics keeping from race conditions */ mutex_enter(&ibuf_mutex); ibuf_add_ops(ibuf->n_discarded_ops, dops); mutex_exit(&ibuf_mutex); +#endif /* HAVE_ATOMIC_BUILTINS */ ibuf_exit(); @@ -4627,37 +4667,18 @@ ibuf_is_empty(void) mtr_t mtr; ibuf_enter(); - - mutex_enter(&ibuf_mutex); - mtr_start(&mtr); + mutex_enter(&ibuf_mutex); root = ibuf_tree_root_get(&mtr); - - if (page_get_n_recs(root) == 0) { - - is_empty = TRUE; - - if (ibuf->empty == FALSE) { - fprintf(stderr, - "InnoDB: Warning: insert buffer tree is empty" - " but the data struct does not\n" - "InnoDB: know it. This condition is legal" - " if the master thread has not yet\n" - "InnoDB: run to completion.\n"); - } - } else { - ut_a(ibuf->empty == FALSE); - - is_empty = FALSE; - } - - mtr_commit(&mtr); - mutex_exit(&ibuf_mutex); + is_empty = (page_get_n_recs(root) == 0); + mtr_commit(&mtr); ibuf_exit(); + ut_a(is_empty == ibuf->empty); + return(is_empty); } diff --git a/storage/innobase/include/btr0cur.h b/storage/innobase/include/btr0cur.h index 757477838ee..7cafa6e0df5 100644 --- a/storage/innobase/include/btr0cur.h +++ b/storage/innobase/include/btr0cur.h @@ -615,6 +615,11 @@ struct btr_path_struct{ order); value ULINT_UNDEFINED denotes array end */ ulint n_recs; /*!< number of records on the page */ + ulint page_no; /*!< no of the page containing the record */ + ulint page_level; /*!< level of the page, if later we fetch + the page under page_no and it is no different + level then we know that the tree has been + reorganized */ }; #define BTR_PATH_ARRAY_N_SLOTS 250 /*!< size of path array (in slots) */ diff --git a/storage/innobase/include/btr0sea.h b/storage/innobase/include/btr0sea.h index 20a2be7f877..6493689a969 100644 --- a/storage/innobase/include/btr0sea.h +++ b/storage/innobase/include/btr0sea.h @@ -180,6 +180,7 @@ btr_search_update_hash_on_delete( btr_cur_t* cursor);/*!< in: cursor which was positioned on the record to delete using btr_cur_search_..., the record is not yet deleted */ +#if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG /********************************************************************//** Validates the search system. @return TRUE if ok */ @@ -187,6 +188,9 @@ UNIV_INTERN ibool btr_search_validate(void); /*======================*/ +#else +# define btr_search_validate() TRUE +#endif /* defined UNIV_AHI_DEBUG || defined UNIV_DEBUG */ /** Flag: has the search system been enabled? Protected by btr_search_latch and btr_search_enabled_mutex. */ diff --git a/storage/innobase/include/dict0load.h b/storage/innobase/include/dict0load.h index 6a718a464ab..05d3532d59a 100644 --- a/storage/innobase/include/dict0load.h +++ b/storage/innobase/include/dict0load.h @@ -116,19 +116,23 @@ dict_load_column_low( const rec_t* rec); /*!< in: SYS_COLUMNS record */ /********************************************************************//** Loads an index definition from a SYS_INDEXES record to dict_index_t. -@return error message, or NULL on success */ +If allocate=TRUE, we will create a dict_index_t structure and fill it +accordingly. If allocated=FALSE, the dict_index_t will be supplied by +the caller and filled with information read from the record. @return +error message, or NULL on success */ UNIV_INTERN const char* dict_load_index_low( /*================*/ - byte* table_id, /*!< in/out: table id (8 bytes_, - an "in" value if cached=TRUE - and "out" when cached=FALSE */ + byte* table_id, /*!< in/out: table id (8 bytes), + an "in" value if allocate=TRUE + and "out" when allocate=FALSE */ const char* table_name, /*!< in: table name */ mem_heap_t* heap, /*!< in/out: temporary memory heap */ const rec_t* rec, /*!< in: SYS_INDEXES record */ - ibool cached, /*!< in: TRUE = add to cache - FALSE = do not */ + ibool allocate, /*!< in: TRUE=allocate *index, + FALSE=fill in a pre-allocated + *index */ dict_index_t** index); /*!< out,own: index, or NULL */ /********************************************************************//** Loads an index field definition from a SYS_FIELDS record to diff --git a/storage/innobase/include/ha0ha.h b/storage/innobase/include/ha0ha.h index 1ffbd3440aa..3299000bf3c 100644 --- a/storage/innobase/include/ha0ha.h +++ b/storage/innobase/include/ha0ha.h @@ -186,6 +186,7 @@ ha_remove_all_nodes_to_page( hash_table_t* table, /*!< in: hash table */ ulint fold, /*!< in: fold value */ const page_t* page); /*!< in: buffer page */ +#if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG /*************************************************************//** Validates a given range of the cells in hash table. @return TRUE if ok */ @@ -196,6 +197,7 @@ ha_validate( hash_table_t* table, /*!< in: hash table */ ulint start_index, /*!< in: start index */ ulint end_index); /*!< in: end index */ +#endif /* defined UNIV_AHI_DEBUG || defined UNIV_DEBUG */ /*************************************************************//** Prints info of a hash table. */ UNIV_INTERN diff --git a/storage/innobase/include/ha_prototypes.h b/storage/innobase/include/ha_prototypes.h index a9ee1d66b99..b75002944bd 100644 --- a/storage/innobase/include/ha_prototypes.h +++ b/storage/innobase/include/ha_prototypes.h @@ -267,5 +267,13 @@ thd_lock_wait_timeout( /*==================*/ void* thd); /*!< in: thread handle (THD*), or NULL to query the global innodb_lock_wait_timeout */ +/******************************************************************//** +Add up the time waited for the lock for the current query. */ +UNIV_INTERN +void +thd_set_lock_wait_time( +/*===================*/ + void* thd, /*!< in: thread handle (THD*) */ + ulint value); /*!< in: time waited for the lock */ #endif diff --git a/storage/innobase/include/ibuf0ibuf.ic b/storage/innobase/include/ibuf0ibuf.ic index aee27cf9739..e3fa6e3e929 100644 --- a/storage/innobase/include/ibuf0ibuf.ic +++ b/storage/innobase/include/ibuf0ibuf.ic @@ -46,11 +46,12 @@ struct ibuf_struct{ ulint seg_size; /*!< allocated pages of the file segment containing ibuf header and tree */ - ibool empty; /*!< after an insert to the ibuf tree - is performed, this is set to FALSE, - and if a contract operation finds - the tree empty, this is set to - TRUE */ + ibool empty; /*!< Protected by the page + latch of the root page of the + insert buffer tree + (FSP_IBUF_TREE_ROOT_PAGE_NO). TRUE + if and only if the insert + buffer tree is empty. */ ulint free_list_len; /*!< length of the free list */ ulint height; /*!< tree height */ dict_index_t* index; /*!< insert buffer index */ diff --git a/storage/innobase/include/univ.i b/storage/innobase/include/univ.i index ac87942f255..5a5af76e175 100644 --- a/storage/innobase/include/univ.i +++ b/storage/innobase/include/univ.i @@ -254,8 +254,10 @@ by one. */ option off; also some ibuf tests are suppressed */ /* Linkage specifier for non-static InnoDB symbols (variables and functions) -that are only referenced from within InnoDB, not from MySQL */ -#if defined(__GNUC__) && (__GNUC__ >= 4) || defined(__INTEL_COMPILER) +that are only referenced from within InnoDB, not from MySQL. We disable the +GCC visibility directive on all Sun operating systems because there is no +easy way to get it to work. See http://bugs.mysql.com/bug.php?id=52263. */ +#if defined(__GNUC__) && (__GNUC__ >= 4) && !defined(sun) || defined(__INTEL_COMPILER) # define UNIV_INTERN __attribute__((visibility ("hidden"))) #else # define UNIV_INTERN diff --git a/storage/innobase/srv/srv0srv.c b/storage/innobase/srv/srv0srv.c index 97d699dde99..bea8d7f8fdc 100644 --- a/storage/innobase/srv/srv0srv.c +++ b/storage/innobase/srv/srv0srv.c @@ -1643,6 +1643,9 @@ srv_suspend_mysql_thread( start_time != -1 && finish_time != -1) { srv_n_lock_max_wait_time = diff_time; } + + /* Record the lock wait time for this thread */ + thd_set_lock_wait_time(trx->mysql_thd, diff_time); } if (trx->was_chosen_as_deadlock_victim) { |