diff options
author | Marko Mäkelä <marko.makela@mariadb.com> | 2018-05-29 13:52:43 +0300 |
---|---|---|
committer | Marko Mäkelä <marko.makela@mariadb.com> | 2018-05-29 14:00:20 +0300 |
commit | 6aa50bad3947a0eab24fb029cd58f5945439e365 (patch) | |
tree | 801221782a6dc0860e03ccdd519010dc91eeac75 | |
parent | b7985a45a67f5e3537b1d7b5a6830a0ad4267554 (diff) | |
download | mariadb-git-6aa50bad3947a0eab24fb029cd58f5945439e365.tar.gz |
MDEV-16283 ALTER TABLE...DISCARD TABLESPACE still takes long on a large buffer pool
Also fixes MDEV-14727, MDEV-14491
InnoDB: Error: Waited for 5 secs for hash index ref_count (1) to drop to 0
by replacing the flawed wait logic in dict_index_remove_from_cache_low().
On DISCARD TABLESPACE, there is no need to drop the adaptive hash index.
We must drop it on IMPORT TABLESPACE, and eventually on DROP TABLE or
DROP INDEX. As long as the dict_index_t object remains in the cache
and the table remains inaccessible, the adaptive hash index entries
to orphaned pages would not do any harm. They would be dropped when
buffer pool pages are reused for something else.
btr_search_drop_page_hash_when_freed(), buf_LRU_drop_page_hash_batch():
Remove the parameter zip_size, and pass 0 to buf_page_get_gen().
buf_page_get_gen(): Ignore zip_size if mode==BUF_PEEK_IF_IN_POOL.
buf_LRU_drop_page_hash_for_tablespace(): Drop the adaptive hash index
even if the tablespace is inaccessible.
buf_LRU_drop_page_hash_for_tablespace(): New global function, to drop
the adaptive hash index.
buf_LRU_flush_or_remove_pages(), fil_delete_tablespace():
Remove the parameter drop_ahi.
dict_index_remove_from_cache_low(): Actively drop the adaptive hash index
if entries exist. This should prevent InnoDB hangs on DROP TABLE or
DROP INDEX.
row_import_for_mysql(): Drop any adaptive hash index entries for the table.
row_drop_table_for_mysql(): Drop any adaptive hash index for the table,
except if the table resides in the system tablespace. (DISCARD TABLESPACE
does not apply to the system tablespace, and we do no want to drop the
adaptive hash index for other tables than the one that is being dropped.)
row_truncate_table_for_mysql(): Drop any adaptive hash index entries for
the table, except if the table resides in the system tablespace.
-rw-r--r-- | storage/innobase/btr/btr0sea.cc | 15 | ||||
-rw-r--r-- | storage/innobase/buf/buf0buf.cc | 8 | ||||
-rw-r--r-- | storage/innobase/buf/buf0lru.cc | 54 | ||||
-rw-r--r-- | storage/innobase/dict/dict0dict.cc | 32 | ||||
-rw-r--r-- | storage/innobase/fil/fil0fil.cc | 4 | ||||
-rw-r--r-- | storage/innobase/fsp/fsp0fsp.cc | 6 | ||||
-rw-r--r-- | storage/innobase/include/btr0sea.h | 13 | ||||
-rw-r--r-- | storage/innobase/include/buf0lru.h | 14 | ||||
-rw-r--r-- | storage/innobase/row/row0import.cc | 12 | ||||
-rw-r--r-- | storage/innobase/row/row0mysql.cc | 17 | ||||
-rw-r--r-- | storage/xtradb/btr/btr0sea.cc | 15 | ||||
-rw-r--r-- | storage/xtradb/buf/buf0buf.cc | 8 | ||||
-rw-r--r-- | storage/xtradb/buf/buf0lru.cc | 54 | ||||
-rw-r--r-- | storage/xtradb/dict/dict0dict.cc | 34 | ||||
-rw-r--r-- | storage/xtradb/fil/fil0fil.cc | 4 | ||||
-rw-r--r-- | storage/xtradb/fsp/fsp0fsp.cc | 6 | ||||
-rw-r--r-- | storage/xtradb/include/btr0sea.h | 13 | ||||
-rw-r--r-- | storage/xtradb/include/buf0lru.h | 14 | ||||
-rw-r--r-- | storage/xtradb/row/row0import.cc | 12 | ||||
-rw-r--r-- | storage/xtradb/row/row0mysql.cc | 17 |
20 files changed, 178 insertions, 174 deletions
diff --git a/storage/innobase/btr/btr0sea.cc b/storage/innobase/btr/btr0sea.cc index e36e6d6194c..bd5cd02aa75 100644 --- a/storage/innobase/btr/btr0sea.cc +++ b/storage/innobase/btr/btr0sea.cc @@ -2,6 +2,7 @@ Copyright (c) 1996, 2016, Oracle and/or its affiliates. All Rights Reserved. Copyright (c) 2008, Google Inc. +Copyright (c) 2018, MariaDB Corporation. Portions of this file contain modifications contributed and copyrighted by Google, Inc. Those modifications are gratefully acknowledged and are described @@ -1250,17 +1251,11 @@ cleanup: mem_free(folds); } -/********************************************************************//** -Drops a possible page hash index when a page is evicted from the buffer pool -or freed in a file segment. */ +/** Drop possible adaptive hash index entries when a page is evicted +from the buffer pool or freed in a file, or the index is being dropped. */ UNIV_INTERN void -btr_search_drop_page_hash_when_freed( -/*=================================*/ - ulint space, /*!< in: space id */ - ulint zip_size, /*!< in: compressed page size in bytes - or 0 for uncompressed pages */ - ulint page_no) /*!< in: page number */ +btr_search_drop_page_hash_when_freed(ulint space, ulint page_no) { buf_block_t* block; mtr_t mtr; @@ -1273,7 +1268,7 @@ btr_search_drop_page_hash_when_freed( are possibly holding, we cannot s-latch the page, but must (recursively) x-latch it, even though we are only reading. */ - block = buf_page_get_gen(space, zip_size, page_no, RW_X_LATCH, NULL, + block = buf_page_get_gen(space, 0, page_no, RW_X_LATCH, NULL, BUF_PEEK_IF_IN_POOL, __FILE__, __LINE__, &mtr); diff --git a/storage/innobase/buf/buf0buf.cc b/storage/innobase/buf/buf0buf.cc index 3b160209cf0..50c052fc690 100644 --- a/storage/innobase/buf/buf0buf.cc +++ b/storage/innobase/buf/buf0buf.cc @@ -3075,17 +3075,18 @@ buf_page_get_gen( #ifdef UNIV_DEBUG switch (mode) { case BUF_EVICT_IF_IN_POOL: + case BUF_PEEK_IF_IN_POOL: /* After DISCARD TABLESPACE, the tablespace would not exist, but in IMPORT TABLESPACE, PageConverter::operator() must replace any old pages, which were not evicted during DISCARD. - Skip the assertion on zip_size. */ + Similarly, btr_search_drop_page_hash_when_freed() must + remove any old pages. Skip the assertion on zip_size. */ break; case BUF_GET_NO_LATCH: ut_ad(rw_latch == RW_NO_LATCH); /* fall through */ case BUF_GET: case BUF_GET_IF_IN_POOL: - case BUF_PEEK_IF_IN_POOL: case BUF_GET_IF_IN_POOL_OR_WATCH: case BUF_GET_POSSIBLY_FREED: ut_ad(zip_size == fil_space_get_zip_size(space)); @@ -3257,7 +3258,8 @@ got_block: fix_mutex = buf_page_get_mutex(&fix_block->page); - ut_ad(page_zip_get_size(&block->page.zip) == zip_size); + ut_ad(page_zip_get_size(&block->page.zip) == zip_size + || mode == BUF_PEEK_IF_IN_POOL); switch (mode) { case BUF_GET_IF_IN_POOL: diff --git a/storage/innobase/buf/buf0lru.cc b/storage/innobase/buf/buf0lru.cc index 9e89a291c80..7039ecdf4a6 100644 --- a/storage/innobase/buf/buf0lru.cc +++ b/storage/innobase/buf/buf0lru.cc @@ -241,8 +241,6 @@ void buf_LRU_drop_page_hash_batch( /*=========================*/ ulint space_id, /*!< in: space id */ - ulint zip_size, /*!< in: compressed page size in bytes - or 0 for uncompressed pages */ const ulint* arr, /*!< in: array of page_no */ ulint count) /*!< in: number of entries in array */ { @@ -252,8 +250,7 @@ buf_LRU_drop_page_hash_batch( ut_ad(count <= BUF_LRU_DROP_SEARCH_SIZE); for (i = 0; i < count; ++i) { - btr_search_drop_page_hash_when_freed(space_id, zip_size, - arr[i]); + btr_search_drop_page_hash_when_freed(space_id, arr[i]); } } @@ -272,15 +269,6 @@ buf_LRU_drop_page_hash_for_tablespace( buf_page_t* bpage; ulint* page_arr; ulint num_entries; - ulint zip_size; - - zip_size = fil_space_get_zip_size(id); - - if (UNIV_UNLIKELY(zip_size == ULINT_UNDEFINED)) { - /* Somehow, the tablespace does not exist. Nothing to drop. */ - ut_ad(0); - return; - } page_arr = static_cast<ulint*>(ut_malloc( sizeof(ulint) * BUF_LRU_DROP_SEARCH_SIZE)); @@ -333,8 +321,7 @@ next_page: the latching order. */ buf_pool_mutex_exit(buf_pool); - buf_LRU_drop_page_hash_batch( - id, zip_size, page_arr, num_entries); + buf_LRU_drop_page_hash_batch(id, page_arr, num_entries); num_entries = 0; @@ -365,10 +352,32 @@ next_page: buf_pool_mutex_exit(buf_pool); /* Drop any remaining batch of search hashed pages. */ - buf_LRU_drop_page_hash_batch(id, zip_size, page_arr, num_entries); + buf_LRU_drop_page_hash_batch(id, page_arr, num_entries); ut_free(page_arr); } +/** Drop the adaptive hash index for a tablespace. +@param[in,out] table table */ +UNIV_INTERN void buf_LRU_drop_page_hash_for_tablespace(dict_table_t* table) +{ + for (dict_index_t* index = dict_table_get_first_index(table); + index != NULL; + index = dict_table_get_next_index(index)) { + if (btr_search_info_get_ref_count( + btr_search_get_info(index))) { + goto drop_ahi; + } + } + + return; +drop_ahi: + ulint id = table->space; + for (ulint i = 0; i < srv_buf_pool_instances; i++) { + buf_LRU_drop_page_hash_for_tablespace(buf_pool_from_array(i), + id); + } +} + /******************************************************************//** While flushing (or removing dirty) pages from a tablespace we don't want to hog the CPU and resources. Release the buffer pool and block @@ -675,18 +684,11 @@ buf_flush_dirty_pages(buf_pool_t* buf_pool, ulint id, const trx_t* trx) /** Empty the flush list for all pages belonging to a tablespace. @param[in] id tablespace identifier @param[in] trx transaction, for checking for user interrupt; - or NULL if nothing is to be written -@param[in] drop_ahi whether to drop the adaptive hash index */ -UNIV_INTERN -void -buf_LRU_flush_or_remove_pages(ulint id, const trx_t* trx, bool drop_ahi) + or NULL if nothing is to be written */ +UNIV_INTERN void buf_LRU_flush_or_remove_pages(ulint id, const trx_t* trx) { for (ulint i = 0; i < srv_buf_pool_instances; i++) { - buf_pool_t* buf_pool = buf_pool_from_array(i); - if (drop_ahi) { - buf_LRU_drop_page_hash_for_tablespace(buf_pool, id); - } - buf_flush_dirty_pages(buf_pool, id, trx); + buf_flush_dirty_pages(buf_pool_from_array(i), id, trx); } if (trx && !trx_is_interrupted(trx)) { diff --git a/storage/innobase/dict/dict0dict.cc b/storage/innobase/dict/dict0dict.cc index 623657ef9fe..9609ef96343 100644 --- a/storage/innobase/dict/dict0dict.cc +++ b/storage/innobase/dict/dict0dict.cc @@ -2,7 +2,7 @@ Copyright (c) 1996, 2016, Oracle and/or its affiliates. All Rights Reserved. Copyright (c) 2012, Facebook Inc. -Copyright (c) 2013, 2017, MariaDB Corporation. +Copyright (c) 2013, 2018, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -1674,7 +1674,7 @@ dict_table_rename_in_cache( filepath = fil_make_ibd_name(table->name, false); } - fil_delete_tablespace(table->space, true); + fil_delete_tablespace(table->space); /* Delete any temp file hanging around. */ if (os_file_status(filepath, &exists, &ftype) @@ -2719,35 +2719,13 @@ dict_index_remove_from_cache_low( zero. See also: dict_table_can_be_evicted() */ do { - ulint ref_count = btr_search_info_get_ref_count(info); - - if (ref_count == 0) { + if (!btr_search_info_get_ref_count(info)) { break; } - /* Sleep for 10ms before trying again. */ - os_thread_sleep(10000); - ++retries; - - if (retries % 500 == 0) { - /* No luck after 5 seconds of wait. */ - fprintf(stderr, "InnoDB: Error: Waited for" - " %lu secs for hash index" - " ref_count (%lu) to drop" - " to 0.\n" - "index: \"%s\"" - " table: \"%s\"\n", - retries/100, - ref_count, - index->name, - table->name); - } + buf_LRU_drop_page_hash_for_tablespace(table); - /* To avoid a hang here we commit suicide if the - ref_count doesn't drop to zero in 600 seconds. */ - if (retries >= 60000) { - ut_error; - } + ut_a(++retries < 10000); } while (srv_shutdown_state == SRV_SHUTDOWN_NONE || !lru_evict); rw_lock_free(&index->lock); diff --git a/storage/innobase/fil/fil0fil.cc b/storage/innobase/fil/fil0fil.cc index 738afe4ab86..2ba1d90d347 100644 --- a/storage/innobase/fil/fil0fil.cc +++ b/storage/innobase/fil/fil0fil.cc @@ -2891,7 +2891,7 @@ fil_delete_tablespace(ulint id, bool drop_ahi) To deal with potential read requests by checking the ::stop_new_ops flag in fil_io() */ - buf_LRU_flush_or_remove_pages(id, NULL, drop_ahi); + buf_LRU_flush_or_remove_pages(id, NULL); #endif /* !UNIV_HOTBACKUP */ @@ -3002,7 +3002,7 @@ fil_discard_tablespace( { dberr_t err; - switch (err = fil_delete_tablespace(id, true)) { + switch (err = fil_delete_tablespace(id)) { case DB_SUCCESS: break; diff --git a/storage/innobase/fsp/fsp0fsp.cc b/storage/innobase/fsp/fsp0fsp.cc index 1cf37f366d7..b20c59c4d8c 100644 --- a/storage/innobase/fsp/fsp0fsp.cc +++ b/storage/innobase/fsp/fsp0fsp.cc @@ -1,7 +1,7 @@ /***************************************************************************** Copyright (c) 1995, 2016, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2017, MariaDB Corporation. +Copyright (c) 2017, 2018, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -3027,7 +3027,7 @@ fseg_free_page_low( /* Drop search system page hash index if the page is found in the pool and is hashed */ - btr_search_drop_page_hash_when_freed(space, zip_size, page); + btr_search_drop_page_hash_when_freed(space, page); descr = xdes_get_descriptor(space, zip_size, page, mtr); @@ -3247,7 +3247,7 @@ fseg_free_extent( found in the pool and is hashed */ btr_search_drop_page_hash_when_freed( - space, zip_size, first_page_in_extent + i); + space, first_page_in_extent + i); } } diff --git a/storage/innobase/include/btr0sea.h b/storage/innobase/include/btr0sea.h index c95ca28057a..4e1df7066d1 100644 --- a/storage/innobase/include/btr0sea.h +++ b/storage/innobase/include/btr0sea.h @@ -1,6 +1,7 @@ /***************************************************************************** Copyright (c) 1996, 2016, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 2018, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -141,17 +142,11 @@ btr_search_drop_page_hash_index( s- or x-latched, or an index page for which we know that block->buf_fix_count == 0 */ -/********************************************************************//** -Drops a possible page hash index when a page is evicted from the buffer pool -or freed in a file segment. */ +/** Drop possible adaptive hash index entries when a page is evicted +from the buffer pool or freed in a file, or the index is being dropped. */ UNIV_INTERN void -btr_search_drop_page_hash_when_freed( -/*=================================*/ - ulint space, /*!< in: space id */ - ulint zip_size, /*!< in: compressed page size in bytes - or 0 for uncompressed pages */ - ulint page_no); /*!< in: page number */ +btr_search_drop_page_hash_when_freed(ulint space, ulint page_no); /********************************************************************//** Updates the page hash index when a single record is inserted on a page. */ UNIV_INTERN diff --git a/storage/innobase/include/buf0lru.h b/storage/innobase/include/buf0lru.h index 308bda20c7b..623883433c2 100644 --- a/storage/innobase/include/buf0lru.h +++ b/storage/innobase/include/buf0lru.h @@ -1,7 +1,7 @@ /***************************************************************************** Copyright (c) 1995, 2016, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2017, MariaDB Corporation. +Copyright (c) 2017, 2018, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -34,6 +34,7 @@ Created 11/5/1995 Heikki Tuuri // Forward declaration struct trx_t; +struct dict_table_t; /******************************************************************//** Returns TRUE if less than 25 % of the buffer pool is available. This can be @@ -52,14 +53,15 @@ These are low-level functions /** Minimum LRU list length for which the LRU_old pointer is defined */ #define BUF_LRU_OLD_MIN_LEN 512 /* 8 megabytes of 16k pages */ +/** Drop the adaptive hash index for a tablespace. +@param[in,out] table table */ +UNIV_INTERN void buf_LRU_drop_page_hash_for_tablespace(dict_table_t* table); + /** Empty the flush list for all pages belonging to a tablespace. @param[in] id tablespace identifier @param[in] trx transaction, for checking for user interrupt; - or NULL if nothing is to be written -@param[in] drop_ahi whether to drop the adaptive hash index */ -UNIV_INTERN -void -buf_LRU_flush_or_remove_pages(ulint id, const trx_t* trx, bool drop_ahi=false); + or NULL if nothing is to be written */ +UNIV_INTERN void buf_LRU_flush_or_remove_pages(ulint id, const trx_t* trx); #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG /********************************************************************//** diff --git a/storage/innobase/row/row0import.cc b/storage/innobase/row/row0import.cc index a9c24a0f8cc..dfd6b4bfbea 100644 --- a/storage/innobase/row/row0import.cc +++ b/storage/innobase/row/row0import.cc @@ -31,6 +31,7 @@ Created 2012-02-08 by Sunny Bains. #endif #include "btr0pcur.h" +#include "btr0sea.h" #include "que0que.h" #include "dict0boot.h" #include "ibuf0ibuf.h" @@ -3983,6 +3984,17 @@ row_import_for_mysql( return(row_import_cleanup(prebuilt, trx, err)); } + /* On DISCARD TABLESPACE, we did not drop any adaptive hash + index entries. If we replaced the discarded tablespace with a + smaller one here, there could still be some adaptive hash + index entries that point to cached garbage pages in the buffer + pool, because PageConverter::operator() only evicted those + pages that were replaced by the imported pages. We must + discard all remaining adaptive hash index entries, because the + adaptive hash index must be a subset of the table contents; + false positives are not tolerated. */ + buf_LRU_drop_page_hash_for_tablespace(table); + row_mysql_lock_data_dictionary(trx); /* If the table is stored in a remote tablespace, we need to diff --git a/storage/innobase/row/row0mysql.cc b/storage/innobase/row/row0mysql.cc index e34e4fa94ff..be24ae885a2 100644 --- a/storage/innobase/row/row0mysql.cc +++ b/storage/innobase/row/row0mysql.cc @@ -3516,6 +3516,8 @@ row_truncate_table_for_mysql( fil_space_release(space); } + buf_LRU_drop_page_hash_for_tablespace(table); + if (flags != ULINT_UNDEFINED && fil_discard_tablespace(space_id) == DB_SUCCESS) { @@ -4209,6 +4211,21 @@ row_drop_table_for_mysql( rw_lock_x_unlock(dict_index_get_lock(index)); } + if (table->space != TRX_SYS_SPACE) { + /* On DISCARD TABLESPACE, we would not drop the + adaptive hash index entries. If the tablespace is + missing here, delete-marking the record in SYS_INDEXES + would not free any pages in the buffer pool. Thus, + dict_index_remove_from_cache() would hang due to + adaptive hash index entries existing in the buffer + pool. To prevent this hang, and also to guarantee + that btr_search_drop_page_hash_when_freed() will avoid + calling btr_search_drop_page_hash_index() while we + hold the InnoDB dictionary lock, we will drop any + adaptive hash index entries upfront. */ + buf_LRU_drop_page_hash_for_tablespace(table); + } + /* We use the private SQL parser of Innobase to generate the query graphs needed in deleting the dictionary data from system tables in Innobase. Deleting a row from SYS_INDEXES table also diff --git a/storage/xtradb/btr/btr0sea.cc b/storage/xtradb/btr/btr0sea.cc index 12c99246f16..713a584ee7e 100644 --- a/storage/xtradb/btr/btr0sea.cc +++ b/storage/xtradb/btr/btr0sea.cc @@ -2,6 +2,7 @@ Copyright (c) 1996, 2016, Oracle and/or its affiliates. All Rights Reserved. Copyright (c) 2008, Google Inc. +Copyright (c) 2018, MariaDB Corporation. Portions of this file contain modifications contributed and copyrighted by Google, Inc. Those modifications are gratefully acknowledged and are described @@ -1299,17 +1300,11 @@ cleanup: mem_free(folds); } -/********************************************************************//** -Drops a possible page hash index when a page is evicted from the buffer pool -or freed in a file segment. */ +/** Drop possible adaptive hash index entries when a page is evicted +from the buffer pool or freed in a file, or the index is being dropped. */ UNIV_INTERN void -btr_search_drop_page_hash_when_freed( -/*=================================*/ - ulint space, /*!< in: space id */ - ulint zip_size, /*!< in: compressed page size in bytes - or 0 for uncompressed pages */ - ulint page_no) /*!< in: page number */ +btr_search_drop_page_hash_when_freed(ulint space, ulint page_no) { buf_block_t* block; mtr_t mtr; @@ -1322,7 +1317,7 @@ btr_search_drop_page_hash_when_freed( are possibly holding, we cannot s-latch the page, but must (recursively) x-latch it, even though we are only reading. */ - block = buf_page_get_gen(space, zip_size, page_no, RW_X_LATCH, NULL, + block = buf_page_get_gen(space, 0, page_no, RW_X_LATCH, NULL, BUF_PEEK_IF_IN_POOL, __FILE__, __LINE__, &mtr); diff --git a/storage/xtradb/buf/buf0buf.cc b/storage/xtradb/buf/buf0buf.cc index 12c5c6efb29..3814d08f19a 100644 --- a/storage/xtradb/buf/buf0buf.cc +++ b/storage/xtradb/buf/buf0buf.cc @@ -2969,17 +2969,18 @@ buf_page_get_gen( #ifdef UNIV_DEBUG switch (mode) { case BUF_EVICT_IF_IN_POOL: + case BUF_PEEK_IF_IN_POOL: /* After DISCARD TABLESPACE, the tablespace would not exist, but in IMPORT TABLESPACE, PageConverter::operator() must replace any old pages, which were not evicted during DISCARD. - Skip the assertion on zip_size. */ + Similarly, btr_search_drop_page_hash_when_freed() must + remove any old pages. Skip the assertion on zip_size. */ break; case BUF_GET_NO_LATCH: ut_ad(rw_latch == RW_NO_LATCH); /* fall through */ case BUF_GET: case BUF_GET_IF_IN_POOL: - case BUF_PEEK_IF_IN_POOL: case BUF_GET_IF_IN_POOL_OR_WATCH: case BUF_GET_POSSIBLY_FREED: ut_ad(zip_size == fil_space_get_zip_size(space)); @@ -3156,7 +3157,8 @@ got_block: fix_mutex = buf_page_get_mutex(&fix_block->page); - ut_ad(page_zip_get_size(&block->page.zip) == zip_size); + ut_ad(page_zip_get_size(&block->page.zip) == zip_size + || mode == BUF_PEEK_IF_IN_POOL); switch (mode) { case BUF_GET_IF_IN_POOL: diff --git a/storage/xtradb/buf/buf0lru.cc b/storage/xtradb/buf/buf0lru.cc index 7bf423ed740..2c4a4049de6 100644 --- a/storage/xtradb/buf/buf0lru.cc +++ b/storage/xtradb/buf/buf0lru.cc @@ -238,8 +238,6 @@ void buf_LRU_drop_page_hash_batch( /*=========================*/ ulint space_id, /*!< in: space id */ - ulint zip_size, /*!< in: compressed page size in bytes - or 0 for uncompressed pages */ const ulint* arr, /*!< in: array of page_no */ ulint count) /*!< in: number of entries in array */ { @@ -249,8 +247,7 @@ buf_LRU_drop_page_hash_batch( ut_ad(count <= BUF_LRU_DROP_SEARCH_SIZE); for (i = 0; i < count; ++i) { - btr_search_drop_page_hash_when_freed(space_id, zip_size, - arr[i]); + btr_search_drop_page_hash_when_freed(space_id, arr[i]); } } @@ -269,15 +266,6 @@ buf_LRU_drop_page_hash_for_tablespace( buf_page_t* bpage; ulint* page_arr; ulint num_entries; - ulint zip_size; - - zip_size = fil_space_get_zip_size(id); - - if (UNIV_UNLIKELY(zip_size == ULINT_UNDEFINED)) { - /* Somehow, the tablespace does not exist. Nothing to drop. */ - ut_ad(0); - return; - } page_arr = static_cast<ulint*>(ut_malloc( sizeof(ulint) * BUF_LRU_DROP_SEARCH_SIZE)); @@ -331,8 +319,7 @@ next_page: the latching order. */ mutex_exit(&buf_pool->LRU_list_mutex); - buf_LRU_drop_page_hash_batch( - id, zip_size, page_arr, num_entries); + buf_LRU_drop_page_hash_batch(id, page_arr, num_entries); num_entries = 0; @@ -363,10 +350,32 @@ next_page: mutex_exit(&buf_pool->LRU_list_mutex); /* Drop any remaining batch of search hashed pages. */ - buf_LRU_drop_page_hash_batch(id, zip_size, page_arr, num_entries); + buf_LRU_drop_page_hash_batch(id, page_arr, num_entries); ut_free(page_arr); } +/** Drop the adaptive hash index for a tablespace. +@param[in,out] table table */ +UNIV_INTERN void buf_LRU_drop_page_hash_for_tablespace(dict_table_t* table) +{ + for (dict_index_t* index = dict_table_get_first_index(table); + index != NULL; + index = dict_table_get_next_index(index)) { + if (btr_search_info_get_ref_count(btr_search_get_info(index), + index)) { + goto drop_ahi; + } + } + + return; +drop_ahi: + ulint id = table->space; + for (ulint i = 0; i < srv_buf_pool_instances; i++) { + buf_LRU_drop_page_hash_for_tablespace(buf_pool_from_array(i), + id); + } +} + /******************************************************************//** While flushing (or removing dirty) pages from a tablespace we don't want to hog the CPU and resources. Release the buffer pool and block @@ -733,18 +742,11 @@ buf_flush_dirty_pages(buf_pool_t* buf_pool, ulint id, const trx_t* trx) /** Empty the flush list for all pages belonging to a tablespace. @param[in] id tablespace identifier @param[in] trx transaction, for checking for user interrupt; - or NULL if nothing is to be written -@param[in] drop_ahi whether to drop the adaptive hash index */ -UNIV_INTERN -void -buf_LRU_flush_or_remove_pages(ulint id, const trx_t* trx, bool drop_ahi) + or NULL if nothing is to be written */ +UNIV_INTERN void buf_LRU_flush_or_remove_pages(ulint id, const trx_t* trx) { for (ulint i = 0; i < srv_buf_pool_instances; i++) { - buf_pool_t* buf_pool = buf_pool_from_array(i); - if (drop_ahi) { - buf_LRU_drop_page_hash_for_tablespace(buf_pool, id); - } - buf_flush_dirty_pages(buf_pool, id, trx); + buf_flush_dirty_pages(buf_pool_from_array(i), id, trx); } if (trx && !trx_is_interrupted(trx)) { diff --git a/storage/xtradb/dict/dict0dict.cc b/storage/xtradb/dict/dict0dict.cc index e361b73ab77..7ade6d79adf 100644 --- a/storage/xtradb/dict/dict0dict.cc +++ b/storage/xtradb/dict/dict0dict.cc @@ -2,7 +2,7 @@ Copyright (c) 1996, 2017, Oracle and/or its affiliates. All Rights Reserved. Copyright (c) 2012, Facebook Inc. -Copyright (c) 2013, 2017, MariaDB Corporation. +Copyright (c) 2013, 2018, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -1680,7 +1680,7 @@ dict_table_rename_in_cache( filepath = fil_make_ibd_name(table->name, false); } - fil_delete_tablespace(table->space, true); + fil_delete_tablespace(table->space); /* Delete any temp file hanging around. */ if (os_file_status(filepath, &exists, &ftype) @@ -2729,36 +2729,12 @@ dict_index_remove_from_cache_low( zero. See also: dict_table_can_be_evicted() */ do { - ulint ref_count = btr_search_info_get_ref_count(info, - index); - - if (ref_count == 0) { + if (!btr_search_info_get_ref_count(info, index)) { break; } - /* Sleep for 10ms before trying again. */ - os_thread_sleep(10000); - ++retries; - - if (retries % 500 == 0) { - /* No luck after 5 seconds of wait. */ - fprintf(stderr, "InnoDB: Error: Waited for" - " %lu secs for hash index" - " ref_count (%lu) to drop" - " to 0.\n" - "index: \"%s\"" - " table: \"%s\"\n", - retries/100, - ref_count, - index->name, - table->name); - } - - /* To avoid a hang here we commit suicide if the - ref_count doesn't drop to zero in 600 seconds. */ - if (retries >= 60000) { - ut_error; - } + buf_LRU_drop_page_hash_for_tablespace(table); + ut_a(++retries < 10000); } while (srv_shutdown_state == SRV_SHUTDOWN_NONE || !lru_evict); rw_lock_free(&index->lock); diff --git a/storage/xtradb/fil/fil0fil.cc b/storage/xtradb/fil/fil0fil.cc index a24b319fda6..9bd26fcf35b 100644 --- a/storage/xtradb/fil/fil0fil.cc +++ b/storage/xtradb/fil/fil0fil.cc @@ -2936,7 +2936,7 @@ fil_delete_tablespace(ulint id, bool drop_ahi) To deal with potential read requests by checking the ::stop_new_ops flag in fil_io() */ - buf_LRU_flush_or_remove_pages(id, NULL, drop_ahi); + buf_LRU_flush_or_remove_pages(id, NULL); #endif /* !UNIV_HOTBACKUP */ @@ -3047,7 +3047,7 @@ fil_discard_tablespace( { dberr_t err; - switch (err = fil_delete_tablespace(id, true)) { + switch (err = fil_delete_tablespace(id)) { case DB_SUCCESS: break; diff --git a/storage/xtradb/fsp/fsp0fsp.cc b/storage/xtradb/fsp/fsp0fsp.cc index ffed8a6edd3..f97e0c1331b 100644 --- a/storage/xtradb/fsp/fsp0fsp.cc +++ b/storage/xtradb/fsp/fsp0fsp.cc @@ -1,7 +1,7 @@ /***************************************************************************** Copyright (c) 1995, 2016, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2017, MariaDB Corporation. +Copyright (c) 2017, 2018, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -3035,7 +3035,7 @@ fseg_free_page_low( /* Drop search system page hash index if the page is found in the pool and is hashed */ - btr_search_drop_page_hash_when_freed(space, zip_size, page); + btr_search_drop_page_hash_when_freed(space, page); descr = xdes_get_descriptor(space, zip_size, page, mtr); @@ -3261,7 +3261,7 @@ fseg_free_extent( found in the pool and is hashed */ btr_search_drop_page_hash_when_freed( - space, zip_size, first_page_in_extent + i); + space, first_page_in_extent + i); } } diff --git a/storage/xtradb/include/btr0sea.h b/storage/xtradb/include/btr0sea.h index bfe2c43defb..55366d3c0d3 100644 --- a/storage/xtradb/include/btr0sea.h +++ b/storage/xtradb/include/btr0sea.h @@ -1,6 +1,7 @@ /***************************************************************************** Copyright (c) 1996, 2016, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 2018, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -142,17 +143,11 @@ btr_search_drop_page_hash_index( s- or x-latched, or an index page for which we know that block->buf_fix_count == 0 */ -/********************************************************************//** -Drops a possible page hash index when a page is evicted from the buffer pool -or freed in a file segment. */ +/** Drop possible adaptive hash index entries when a page is evicted +from the buffer pool or freed in a file, or the index is being dropped. */ UNIV_INTERN void -btr_search_drop_page_hash_when_freed( -/*=================================*/ - ulint space, /*!< in: space id */ - ulint zip_size, /*!< in: compressed page size in bytes - or 0 for uncompressed pages */ - ulint page_no); /*!< in: page number */ +btr_search_drop_page_hash_when_freed(ulint space, ulint page_no); /********************************************************************//** Updates the page hash index when a single record is inserted on a page. */ UNIV_INTERN diff --git a/storage/xtradb/include/buf0lru.h b/storage/xtradb/include/buf0lru.h index 1bc11937fa1..f0ba1bb227d 100644 --- a/storage/xtradb/include/buf0lru.h +++ b/storage/xtradb/include/buf0lru.h @@ -1,7 +1,7 @@ /***************************************************************************** Copyright (c) 1995, 2016, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2017, MariaDB Corporation. +Copyright (c) 2017, 2018, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -36,6 +36,7 @@ Created 11/5/1995 Heikki Tuuri // Forward declaration struct trx_t; +struct dict_table_t; /******************************************************************//** Returns TRUE if less than 25 % of the buffer pool is available. This can be @@ -54,14 +55,15 @@ These are low-level functions /** Minimum LRU list length for which the LRU_old pointer is defined */ #define BUF_LRU_OLD_MIN_LEN 512 /* 8 megabytes of 16k pages */ +/** Drop the adaptive hash index for a tablespace. +@param[in,out] table table */ +UNIV_INTERN void buf_LRU_drop_page_hash_for_tablespace(dict_table_t* table); + /** Empty the flush list for all pages belonging to a tablespace. @param[in] id tablespace identifier @param[in] trx transaction, for checking for user interrupt; - or NULL if nothing is to be written -@param[in] drop_ahi whether to drop the adaptive hash index */ -UNIV_INTERN -void -buf_LRU_flush_or_remove_pages(ulint id, const trx_t* trx, bool drop_ahi=false); + or NULL if nothing is to be written */ +UNIV_INTERN void buf_LRU_flush_or_remove_pages(ulint id, const trx_t* trx); #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG /********************************************************************//** diff --git a/storage/xtradb/row/row0import.cc b/storage/xtradb/row/row0import.cc index 36ffba68291..a3478dc2fc4 100644 --- a/storage/xtradb/row/row0import.cc +++ b/storage/xtradb/row/row0import.cc @@ -31,6 +31,7 @@ Created 2012-02-08 by Sunny Bains. #endif #include "btr0pcur.h" +#include "btr0sea.h" #include "que0que.h" #include "dict0boot.h" #include "ibuf0ibuf.h" @@ -3982,6 +3983,17 @@ row_import_for_mysql( return(row_import_cleanup(prebuilt, trx, err)); } + /* On DISCARD TABLESPACE, we did not drop any adaptive hash + index entries. If we replaced the discarded tablespace with a + smaller one here, there could still be some adaptive hash + index entries that point to cached garbage pages in the buffer + pool, because PageConverter::operator() only evicted those + pages that were replaced by the imported pages. We must + discard all remaining adaptive hash index entries, because the + adaptive hash index must be a subset of the table contents; + false positives are not tolerated. */ + buf_LRU_drop_page_hash_for_tablespace(table); + row_mysql_lock_data_dictionary(trx); /* If the table is stored in a remote tablespace, we need to diff --git a/storage/xtradb/row/row0mysql.cc b/storage/xtradb/row/row0mysql.cc index 1acdfe53e0c..2b6f38ba2af 100644 --- a/storage/xtradb/row/row0mysql.cc +++ b/storage/xtradb/row/row0mysql.cc @@ -3540,6 +3540,8 @@ row_truncate_table_for_mysql( fil_space_release(space); } + buf_LRU_drop_page_hash_for_tablespace(table); + if (flags != ULINT_UNDEFINED && fil_discard_tablespace(space_id) == DB_SUCCESS) { @@ -4239,6 +4241,21 @@ row_drop_table_for_mysql( rw_lock_x_unlock(dict_index_get_lock(index)); } + if (table->space != TRX_SYS_SPACE) { + /* On DISCARD TABLESPACE, we would not drop the + adaptive hash index entries. If the tablespace is + missing here, delete-marking the record in SYS_INDEXES + would not free any pages in the buffer pool. Thus, + dict_index_remove_from_cache() would hang due to + adaptive hash index entries existing in the buffer + pool. To prevent this hang, and also to guarantee + that btr_search_drop_page_hash_when_freed() will avoid + calling btr_search_drop_page_hash_index() while we + hold the InnoDB dictionary lock, we will drop any + adaptive hash index entries upfront. */ + buf_LRU_drop_page_hash_for_tablespace(table); + } + /* We use the private SQL parser of Innobase to generate the query graphs needed in deleting the dictionary data from system tables in Innobase. Deleting a row from SYS_INDEXES table also |