diff options
24 files changed, 515 insertions, 66 deletions
diff --git a/storage/xtradb/btr/btr0pcur.cc b/storage/xtradb/btr/btr0pcur.cc index dd6ef484fc3..748bcc0b5b0 100644 --- a/storage/xtradb/btr/btr0pcur.cc +++ b/storage/xtradb/btr/btr0pcur.cc @@ -33,6 +33,40 @@ Created 2/23/1996 Heikki Tuuri #include "rem0cmp.h" #include "trx0trx.h" #include "srv0srv.h" + +/** Updates fragmentation statistics for a single page transition. +@param[in] page the current page being processed +@param[in] page_no page number to move to (next_page_no + if forward_direction is true, + prev_page_no otherwise. +@param[in] forward_direction move direction: true means moving + forward, false - backward. */ +static +void +btr_update_scan_stats(const page_t* page, ulint page_no, bool forward_direction) +{ + fragmentation_stats_t stats; + memset(&stats, 0, sizeof(stats)); + ulint extracted_page_no = page_get_page_no(page); + ulint delta = forward_direction ? + page_no - extracted_page_no : + extracted_page_no - page_no; + + if (delta == 1) { + ++stats.scan_pages_contiguous; + } else { + ++stats.scan_pages_disjointed; + } + stats.scan_pages_total_seek_distance += + extracted_page_no > page_no ? + extracted_page_no - page_no : + page_no - extracted_page_no; + + stats.scan_data_size += page_get_data_size(page); + stats.scan_deleted_recs_size += + page_header_get_field(page, PAGE_GARBAGE); + thd_add_fragmentation_stats(current_thd, &stats); +} /**************************************************************//** Allocates memory for a persistent cursor object and initializes the cursor. @return own: persistent cursor */ @@ -426,6 +460,8 @@ btr_pcur_move_to_next_page( ut_ad(next_page_no != FIL_NULL); + btr_update_scan_stats(page, next_page_no, true /* forward */); + next_block = btr_block_get(space, zip_size, next_page_no, cursor->latch_mode, btr_pcur_get_btr_cur(cursor)->index, mtr); @@ -509,6 +545,10 @@ btr_pcur_move_backward_from_page( prev_page_no = btr_page_get_prev(page, mtr); + if (prev_page_no != FIL_NULL) { + btr_update_scan_stats(page, prev_page_no, false /* backward */); + } + if (prev_page_no == FIL_NULL) { } else if (btr_pcur_is_before_first_on_page(cursor)) { diff --git a/storage/xtradb/buf/buf0rea.cc b/storage/xtradb/buf/buf0rea.cc index 9053f38924e..921fa9214a8 100644 --- a/storage/xtradb/buf/buf0rea.cc +++ b/storage/xtradb/buf/buf0rea.cc @@ -123,7 +123,12 @@ buf_read_page_low( use to stop dangling page reads from a tablespace which we have DISCARDed + IMPORTed back */ ulint offset, /*!< in: page number */ - trx_t* trx) + trx_t* trx, + bool should_buffer) /*!< in: whether to buffer an aio request. + AIO read ahead uses this. If you plan to + use this parameter, make sure you remember + to call os_aio_dispatch_read_array_submit() + when you're ready to commit all your requests.*/ { buf_page_t* bpage; ulint wake_later; @@ -229,14 +234,15 @@ not_to_recover: *err = _fil_io(OS_FILE_READ | wake_later | ignore_nonexistent_pages, sync, space, zip_size, offset, 0, zip_size, - bpage->zip.data, bpage, trx); + bpage->zip.data, bpage, trx, should_buffer); } else { ut_a(buf_page_get_state(bpage) == BUF_BLOCK_FILE_PAGE); *err = _fil_io(OS_FILE_READ | wake_later | ignore_nonexistent_pages, sync, space, 0, offset, 0, UNIV_PAGE_SIZE, - ((buf_block_t*) bpage)->frame, bpage, trx); + ((buf_block_t*) bpage)->frame, bpage, trx, + should_buffer); } if (sync) { @@ -395,7 +401,7 @@ read_ahead: &err, false, ibuf_mode | OS_AIO_SIMULATED_WAKE_LATER, space, zip_size, FALSE, - tablespace_version, i, trx); + tablespace_version, i, trx, false); if (err == DB_TABLESPACE_DELETED) { ut_print_timestamp(stderr); fprintf(stderr, @@ -459,7 +465,7 @@ buf_read_page( count = buf_read_page_low(&err, true, BUF_READ_ANY_PAGE, space, zip_size, FALSE, - tablespace_version, offset, trx); + tablespace_version, offset, trx, false); srv_stats.buf_pool_reads.add(count); if (err == DB_TABLESPACE_DELETED) { ut_print_timestamp(stderr); @@ -507,7 +513,7 @@ buf_read_page_async( | OS_AIO_SIMULATED_WAKE_LATER | BUF_READ_IGNORE_NONEXISTENT_PAGES, space, zip_size, FALSE, - tablespace_version, offset, NULL); + tablespace_version, offset, NULL, false); srv_stats.buf_pool_reads.add(count); /* We do not increment number of I/O operations used for LRU policy @@ -775,7 +781,8 @@ buf_read_ahead_linear( count += buf_read_page_low( &err, false, ibuf_mode, - space, zip_size, FALSE, tablespace_version, i, trx); + space, zip_size, FALSE, tablespace_version, + i, trx, true); if (err == DB_TABLESPACE_DELETED) { ut_print_timestamp(stderr); fprintf(stderr, @@ -788,6 +795,7 @@ buf_read_ahead_linear( } } } + os_aio_dispatch_read_array_submit(); /* In simulated aio we wake the aio handler threads only after queuing all aio requests, in native aio the following call does @@ -865,7 +873,7 @@ buf_read_ibuf_merge_pages( buf_read_page_low(&err, sync && (i + 1 == n_stored), BUF_READ_ANY_PAGE, space_ids[i], zip_size, TRUE, space_versions[i], - page_nos[i], NULL); + page_nos[i], NULL, false); if (UNIV_UNLIKELY(err == DB_TABLESPACE_DELETED)) { tablespace_deleted: @@ -1001,12 +1009,13 @@ not_to_recover: if ((i + 1 == n_stored) && sync) { buf_read_page_low(&err, true, BUF_READ_ANY_PAGE, space, zip_size, TRUE, tablespace_version, - page_nos[i], NULL); + page_nos[i], NULL, false); } else { buf_read_page_low(&err, false, BUF_READ_ANY_PAGE | OS_AIO_SIMULATED_WAKE_LATER, space, zip_size, TRUE, - tablespace_version, page_nos[i], NULL); + tablespace_version, page_nos[i], + NULL, false); } } diff --git a/storage/xtradb/dict/dict0stats_bg.cc b/storage/xtradb/dict/dict0stats_bg.cc index 4d646ed3081..c2bac776975 100644 --- a/storage/xtradb/dict/dict0stats_bg.cc +++ b/storage/xtradb/dict/dict0stats_bg.cc @@ -203,7 +203,7 @@ dict_stats_wait_bg_to_stop_using_table( unlocking/locking the data dict */ { while (!dict_stats_stop_bg(table)) { - DICT_STATS_BG_YIELD(trx); + DICT_BG_YIELD(trx); } } diff --git a/storage/xtradb/fil/fil0fil.cc b/storage/xtradb/fil/fil0fil.cc index d459a5f7e9b..3807e6b3395 100644 --- a/storage/xtradb/fil/fil0fil.cc +++ b/storage/xtradb/fil/fil0fil.cc @@ -5111,7 +5111,7 @@ retry: success = os_aio(OS_FILE_WRITE, OS_AIO_SYNC, node->name, node->handle, buf, offset, page_size * n_pages, - NULL, NULL, space_id, NULL); + NULL, NULL, space_id, NULL, false); #endif /* UNIV_HOTBACKUP */ DBUG_EXECUTE_IF("ib_os_aio_func_io_failure_28", @@ -5485,7 +5485,12 @@ _fil_io( appropriately aligned */ void* message, /*!< in: message for aio handler if non-sync aio used, else ignored */ - trx_t* trx) + trx_t* trx, + bool should_buffer) /*!< in: whether to buffer an aio request. + AIO read ahead uses this. If you plan to + use this parameter, make sure you remember + to call os_aio_dispatch_read_array_submit() + when you're ready to commit all your requests.*/ { ulint mode; fil_space_t* space; @@ -5705,7 +5710,7 @@ _fil_io( const char* name = node->name == NULL ? space->name : node->name; ret = os_aio(type, mode | wake_later, name, node->handle, buf, - offset, len, node, message, space_id, trx); + offset, len, node, message, space_id, trx, should_buffer); #else /* In mysqlbackup do normal i/o, not aio */ diff --git a/storage/xtradb/fts/fts0fts.cc b/storage/xtradb/fts/fts0fts.cc index 19dc087369d..bad62ba974d 100644 --- a/storage/xtradb/fts/fts0fts.cc +++ b/storage/xtradb/fts/fts0fts.cc @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 2011, 2016, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 2011, 2017, Oracle and/or its affiliates. All Rights Reserved. Copyright (c) 2016, 2017, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under @@ -26,6 +26,7 @@ Full Text Search interface #include "row0mysql.h" #include "row0upd.h" #include "dict0types.h" +#include "dict0stats_bg.h" #include "row0sel.h" #include "fts0fts.h" @@ -868,18 +869,37 @@ fts_drop_index( err = fts_drop_index_tables(trx, index); - fts_free(table); - + for(;;) { + bool retry = false; + if (index->index_fts_syncing) { + retry = true; + } + if (!retry){ + fts_free(table); + break; + } + DICT_BG_YIELD(trx); + } return(err); } - current_doc_id = table->fts->cache->next_doc_id; - first_doc_id = table->fts->cache->first_doc_id; - fts_cache_clear(table->fts->cache); - fts_cache_destroy(table->fts->cache); - table->fts->cache = fts_cache_create(table); - table->fts->cache->next_doc_id = current_doc_id; - table->fts->cache->first_doc_id = first_doc_id; + for(;;) { + bool retry = false; + if (index->index_fts_syncing) { + retry = true; + } + if (!retry){ + current_doc_id = table->fts->cache->next_doc_id; + first_doc_id = table->fts->cache->first_doc_id; + fts_cache_clear(table->fts->cache); + fts_cache_destroy(table->fts->cache); + table->fts->cache = fts_cache_create(table); + table->fts->cache->next_doc_id = current_doc_id; + table->fts->cache->first_doc_id = first_doc_id; + break; + } + DICT_BG_YIELD(trx); + } } else { fts_cache_t* cache = table->fts->cache; fts_index_cache_t* index_cache; @@ -889,9 +909,17 @@ fts_drop_index( index_cache = fts_find_index_cache(cache, index); if (index_cache != NULL) { - if (index_cache->words) { - fts_words_free(index_cache->words); - rbt_free(index_cache->words); + for(;;) { + bool retry = false; + if (index->index_fts_syncing) { + retry = true; + } + if (!retry && index_cache->words) { + fts_words_free(index_cache->words); + rbt_free(index_cache->words); + break; + } + DICT_BG_YIELD(trx); } ib_vector_remove(cache->indexes, *(void**) index_cache); @@ -4568,10 +4596,16 @@ begin_sync: index_cache = static_cast<fts_index_cache_t*>( ib_vector_get(cache->indexes, i)); - if (index_cache->index->to_be_dropped) { + if (index_cache->index->to_be_dropped + || index_cache->index->table->to_be_dropped) { continue; } + index_cache->index->index_fts_syncing = true; + DBUG_EXECUTE_IF("fts_instrument_sync_sleep_drop_waits", + os_thread_sleep(10000000); + ); + error = fts_sync_index(sync, index_cache); if (error != DB_SUCCESS && !sync->interrupted) { @@ -4604,11 +4638,33 @@ begin_sync: end_sync: if (error == DB_SUCCESS && !sync->interrupted) { error = fts_sync_commit(sync); + if (error == DB_SUCCESS) { + for (i = 0; i < ib_vector_size(cache->indexes); ++i) { + fts_index_cache_t* index_cache; + index_cache = static_cast<fts_index_cache_t*>( + ib_vector_get(cache->indexes, i)); + if (index_cache->index->index_fts_syncing) { + index_cache->index->index_fts_syncing + = false; + } + } + } } else { fts_sync_rollback(sync); } rw_lock_x_lock(&cache->lock); + /* Clear fts syncing flags of any indexes incase sync is + interrupeted */ + for (i = 0; i < ib_vector_size(cache->indexes); ++i) { + fts_index_cache_t* index_cache; + index_cache = static_cast<fts_index_cache_t*>( + ib_vector_get(cache->indexes, i)); + if (index_cache->index->index_fts_syncing == true) { + index_cache->index->index_fts_syncing = false; + } + } + sync->interrupted = false; sync->in_progress = false; os_event_set(sync->event); diff --git a/storage/xtradb/fts/fts0opt.cc b/storage/xtradb/fts/fts0opt.cc index cb30122adcb..d9f96948000 100644 --- a/storage/xtradb/fts/fts0opt.cc +++ b/storage/xtradb/fts/fts0opt.cc @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 2007, 2016, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 2007, 2017, Oracle and/or its affiliates. All Rights Reserved. Copyright (c) 2016, MariaDB Corporation. All Rights reserved. This program is free software; you can redistribute it and/or modify it under @@ -2971,13 +2971,6 @@ fts_optimize_sync_table( { dict_table_t* table = NULL; - /* Prevent DROP INDEX etc. from running when we are syncing - cache in background. */ - if (!rw_lock_s_lock_nowait(&dict_operation_lock, __FILE__, __LINE__)) { - /* Exit when fail to get dict operation lock. */ - return; - } - table = dict_table_open_on_id(table_id, FALSE, DICT_TABLE_OP_NORMAL); if (table) { @@ -2987,8 +2980,6 @@ fts_optimize_sync_table( dict_table_close(table, FALSE, FALSE); } - - rw_lock_s_unlock(&dict_operation_lock); } /**********************************************************************//** diff --git a/storage/xtradb/fts/fts0que.cc b/storage/xtradb/fts/fts0que.cc index 358d979fff6..d63c1f4d054 100644 --- a/storage/xtradb/fts/fts0que.cc +++ b/storage/xtradb/fts/fts0que.cc @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 2007, 2016, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 2007, 2017, Oracle and/or its affiliates. All Rights Reserved. Copyright (c) 2017, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under @@ -3656,6 +3656,7 @@ fts_query_free( if (query->intersection) { fts_query_free_doc_ids(query, query->intersection); + query->intersection = NULL; } if (query->doc_ids) { diff --git a/storage/xtradb/handler/ha_innodb.cc b/storage/xtradb/handler/ha_innodb.cc index 88b59db9a46..c481db021c9 100644 --- a/storage/xtradb/handler/ha_innodb.cc +++ b/storage/xtradb/handler/ha_innodb.cc @@ -1088,6 +1088,24 @@ static SHOW_VAR innodb_status_variables[]= { (char*) &export_vars.innodb_sec_rec_cluster_reads, SHOW_LONG}, {"secondary_index_triggered_cluster_reads_avoided", (char*) &export_vars.innodb_sec_rec_cluster_reads_avoided, SHOW_LONG}, + {"buffered_aio_submitted", + (char*) &export_vars.innodb_buffered_aio_submitted, SHOW_LONG}, + + {"scan_pages_contiguous", + (char*) &export_vars.innodb_fragmentation_stats.scan_pages_contiguous, + SHOW_LONG}, + {"scan_pages_disjointed", + (char*) &export_vars.innodb_fragmentation_stats.scan_pages_disjointed, + SHOW_LONG}, + {"scan_pages_total_seek_distance", + (char*) &export_vars.innodb_fragmentation_stats.scan_pages_total_seek_distance, + SHOW_LONG}, + {"scan_data_size", + (char*) &export_vars.innodb_fragmentation_stats.scan_data_size, + SHOW_LONG}, + {"scan_deleted_recs_size", + (char*) &export_vars.innodb_fragmentation_stats.scan_deleted_recs_size, + SHOW_LONG}, {NullS, NullS, SHOW_LONG} }; @@ -2922,7 +2940,7 @@ ha_innobase::ha_innobase( HA_BINLOG_ROW_CAPABLE | HA_CAN_GEOMETRY | HA_PARTIAL_COLUMN_READ | HA_TABLE_SCAN_ON_INDEX | HA_CAN_FULLTEXT | - HA_CAN_FULLTEXT_EXT | HA_CAN_EXPORT), + HA_CAN_FULLTEXT_EXT | HA_CAN_EXPORT | HA_ONLINE_ANALYZE), start_of_scan(0), num_write_row(0) {} @@ -10874,6 +10892,7 @@ index_bad: case ROW_TYPE_DEFAULT: /* If we fell through, set row format to Compact. */ row_format = ROW_TYPE_COMPACT; + /* fall through */ case ROW_TYPE_COMPACT: break; } @@ -18777,6 +18796,13 @@ static MYSQL_SYSVAR_BOOL(print_all_deadlocks, srv_print_all_deadlocks, "Print all deadlocks to MySQL error log (off by default)", NULL, NULL, FALSE); +static MYSQL_SYSVAR_BOOL( + print_lock_wait_timeout_info, + srv_print_lock_wait_timeout_info, + PLUGIN_VAR_OPCMDARG, + "Print lock wait timeout info to MySQL error log (off by default)", + NULL, NULL, FALSE); + static MYSQL_SYSVAR_ULONG(compression_failure_threshold_pct, zip_failure_threshold_pct, PLUGIN_VAR_OPCMDARG, "If the compression failure rate of a table is greater than this number" @@ -19047,6 +19073,7 @@ static struct st_mysql_sys_var* innobase_system_variables[]= { MYSQL_SYSVAR(foreground_preflush), MYSQL_SYSVAR(empty_free_list_algorithm), MYSQL_SYSVAR(print_all_deadlocks), + MYSQL_SYSVAR(print_lock_wait_timeout_info), MYSQL_SYSVAR(cmp_per_index_enabled), MYSQL_SYSVAR(undo_logs), MYSQL_SYSVAR(rollback_segments), diff --git a/storage/xtradb/handler/handler0alter.cc b/storage/xtradb/handler/handler0alter.cc index 2184da589dc..e5be3aea44a 100644 --- a/storage/xtradb/handler/handler0alter.cc +++ b/storage/xtradb/handler/handler0alter.cc @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 2005, 2016, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 2005, 2017, Oracle and/or its affiliates. All Rights Reserved. Copyright (c) 2017, 2018, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under @@ -5784,7 +5784,47 @@ ha_innobase::commit_inplace_alter_table( break; } - DICT_STATS_BG_YIELD(trx); + DICT_BG_YIELD(trx); + } + + /* Make a concurrent Drop fts Index to wait until sync of that + fts index is happening in the background */ + for (;;) { + bool retry = false; + + for (inplace_alter_handler_ctx** pctx = ctx_array; + *pctx; pctx++) { + int count =0; + ha_innobase_inplace_ctx* ctx + = static_cast<ha_innobase_inplace_ctx*>(*pctx); + DBUG_ASSERT(new_clustered == ctx->need_rebuild()); + + if (dict_fts_index_syncing(ctx->old_table)) { + count++; + if (count == 100) { + fprintf(stderr, + "Drop index waiting for background sync" + "to finish\n"); + } + retry = true; + } + + if (new_clustered && dict_fts_index_syncing(ctx->new_table)) { + count++; + if (count == 100) { + fprintf(stderr, + "Drop index waiting for background sync" + "to finish\n"); + } + retry = true; + } + } + + if (!retry) { + break; + } + + DICT_BG_YIELD(trx); } /* Apply the changes to the data dictionary tables, for all @@ -6100,8 +6140,13 @@ foreign_fail: ut_d(dict_table_check_for_dup_indexes( ctx->new_table, CHECK_ABORTED_OK)); - ut_a(fts_check_cached_index(ctx->new_table)); +#ifdef UNIV_DEBUG + if (!(ctx->new_table->fts != NULL + && ctx->new_table->fts->cache->sync->in_progress)) { + ut_a(fts_check_cached_index(ctx->new_table)); + } +#endif if (new_clustered) { /* Since the table has been rebuilt, we remove all persistent statistics corresponding to the diff --git a/storage/xtradb/include/dict0dict.ic b/storage/xtradb/include/dict0dict.ic index be02bbd3614..024208f31b2 100644 --- a/storage/xtradb/include/dict0dict.ic +++ b/storage/xtradb/include/dict0dict.ic @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1996, 2016, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 1996, 2017, Oracle and/or its affiliates. All Rights Reserved. Copyright (c) 2018, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under @@ -889,6 +889,27 @@ dict_table_x_lock_indexes( } /*********************************************************************//** +Returns true if the particular FTS index in the table is still syncing +in the background, false otherwise. +@param [in] table Table containing FTS index +@return True if sync of fts index is still going in the background */ +UNIV_INLINE +bool +dict_fts_index_syncing( + dict_table_t* table) +{ + dict_index_t* index; + + for (index = dict_table_get_first_index(table); + index != NULL; + index = dict_table_get_next_index(index)) { + if (index->index_fts_syncing) { + return(true); + } + } + return(false); +} +/*********************************************************************//** Release the exclusive locks on all index tree. */ UNIV_INLINE void diff --git a/storage/xtradb/include/dict0mem.h b/storage/xtradb/include/dict0mem.h index 9652d5ad116..a4f810652e0 100644 --- a/storage/xtradb/include/dict0mem.h +++ b/storage/xtradb/include/dict0mem.h @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1996, 2016, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 1996, 2017, Oracle and/or its affiliates. All Rights Reserved. Copyright (c) 2012, Facebook Inc. Copyright (c) 2015, 2018, MariaDB Corporation. @@ -626,6 +626,8 @@ struct dict_index_t{ dict_sys->mutex. Other changes are protected by index->lock. */ dict_field_t* fields; /*!< array of field descriptions */ + bool index_fts_syncing;/*!< Whether the fts index is + still syncing in the background */ #ifndef UNIV_HOTBACKUP UT_LIST_NODE_T(dict_index_t) indexes;/*!< list of indexes of the table */ diff --git a/storage/xtradb/include/dict0stats_bg.h b/storage/xtradb/include/dict0stats_bg.h index 2bd441d5f5e..69d61ea88f1 100644 --- a/storage/xtradb/include/dict0stats_bg.h +++ b/storage/xtradb/include/dict0stats_bg.h @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 2012, 2016, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 2012, 2017, Oracle and/or its affiliates. All Rights Reserved. Copyright (c) 2017, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under @@ -61,7 +61,7 @@ dict_stats_recalc_pool_del( /** Yield the data dictionary latch when waiting for the background thread to stop accessing a table. @param trx transaction holding the data dictionary locks */ -#define DICT_STATS_BG_YIELD(trx) do { \ +#define DICT_BG_YIELD(trx) do { \ row_mysql_unlock_data_dictionary(trx); \ os_thread_sleep(250000); \ row_mysql_lock_data_dictionary(trx); \ diff --git a/storage/xtradb/include/fil0fil.h b/storage/xtradb/include/fil0fil.h index db1578aee69..6230bd7a4d4 100644 --- a/storage/xtradb/include/fil0fil.h +++ b/storage/xtradb/include/fil0fil.h @@ -931,8 +931,10 @@ fil_space_get_n_reserved_extents( Reads or writes data. This operation is asynchronous (aio). @return DB_SUCCESS, or DB_TABLESPACE_DELETED if we are trying to do i/o on a tablespace which does not exist */ -#define fil_io(type, sync, space_id, zip_size, block_offset, byte_offset, len, buf, message) \ - _fil_io(type, sync, space_id, zip_size, block_offset, byte_offset, len, buf, message, NULL) +#define fil_io(type, sync, space_id, zip_size, block_offset, \ + byte_offset, len, buf, message) \ + _fil_io(type, sync, space_id, zip_size, block_offset, \ + byte_offset, len, buf, message, NULL, false) UNIV_INTERN dberr_t @@ -963,7 +965,9 @@ _fil_io( appropriately aligned */ void* message, /*!< in: message for aio handler if non-sync aio used, else ignored */ - trx_t* trx) + trx_t* trx, + bool should_buffer) /*!< in: whether to buffer an aio request. + Only used by aio read ahead*/ MY_ATTRIBUTE((nonnull(8))); /**********************************************************************//** Waits for an aio operation to complete. This function is used to write the diff --git a/storage/xtradb/include/os0file.h b/storage/xtradb/include/os0file.h index 5f11ba86275..f393e59558f 100644 --- a/storage/xtradb/include/os0file.h +++ b/storage/xtradb/include/os0file.h @@ -363,9 +363,9 @@ The wrapper functions have the prefix of "innodb_". */ pfs_os_file_close_no_error_handling_func(file, __FILE__, __LINE__) # define os_aio(type, mode, name, file, buf, offset, \ - n, message1, message2, space_id, trx) \ + n, message1, message2, space_id, trx, should_buffer) \ pfs_os_aio_func(type, mode, name, file, buf, offset, \ - n, message1, message2, space_id, trx, \ + n, message1, message2, space_id, trx, should_buffer, \ __FILE__, __LINE__) # define os_file_read_pfs(file, buf, offset, n) \ @@ -435,9 +435,9 @@ to original un-instrumented file I/O APIs */ os_file_close_no_error_handling_func(file) # define os_aio(type, mode, name, file, buf, offset, n, message1, \ - message2, space_id, trx) \ + message2, space_id, trx, should_buffer) \ os_aio_func(type, mode, name, file, buf, offset, n, \ - message1, message2, space_id, trx) + message1, message2, space_id, trx, should_buffer) # define os_file_read_pfs(file, buf, offset, n) \ os_file_read_func(file, buf, offset, n, NULL) @@ -967,6 +967,12 @@ pfs_os_aio_func( OS_AIO_SYNC */ ulint space_id, trx_t* trx, + bool should_buffer, + /*!< in: Whether to buffer an aio request. + AIO read ahead uses this. If you plan to + use this parameter, make sure you remember + to call os_aio_dispatch_read_array_submit() + when you're ready to commit all your requests.*/ const char* src_file,/*!< in: file name where func invoked */ ulint src_line);/*!< in: line where the func invoked */ /*******************************************************************//** @@ -1381,7 +1387,13 @@ os_aio_func( aio operation); ignored if mode is OS_AIO_SYNC */ ulint space_id, - trx_t* trx); + trx_t* trx, + bool should_buffer); + /*!< in: Whether to buffer an aio request. + AIO read ahead uses this. If you plan to + use this parameter, make sure you remember + to call os_aio_dispatch_read_array_submit() + when you're ready to commit all your requests.*/ /************************************************************************//** Wakes up all async i/o threads so that they know to exit themselves in shutdown. */ @@ -1564,6 +1576,10 @@ os_file_handle_error_no_exit( ibool on_error_silent);/*!< in: if TRUE then don't print any message to the log. */ +/** Submit buffered AIO requests on the given segment to the kernel. */ +UNIV_INTERN +void +os_aio_dispatch_read_array_submit(); #ifndef UNIV_NONINL #include "os0file.ic" diff --git a/storage/xtradb/include/os0file.ic b/storage/xtradb/include/os0file.ic index c82a2559fed..98474a33c7e 100644 --- a/storage/xtradb/include/os0file.ic +++ b/storage/xtradb/include/os0file.ic @@ -246,6 +246,9 @@ pfs_os_aio_func( OS_AIO_SYNC */ ulint space_id, trx_t* trx, + bool should_buffer, + /*!< in: whether to buffer an aio request. + Only used by aio read ahead*/ const char* src_file,/*!< in: file name where func invoked */ ulint src_line)/*!< in: line where the func invoked */ { @@ -261,7 +264,8 @@ pfs_os_aio_func( src_file, src_line); result = os_aio_func(type, mode, name, file, buf, offset, - n, message1, message2, space_id, trx); + n, message1, message2, space_id, trx, + should_buffer); register_pfs_file_io_end(locker, n); diff --git a/storage/xtradb/include/srv0srv.h b/storage/xtradb/include/srv0srv.h index f27ed9857ec..457ffc4fff0 100644 --- a/storage/xtradb/include/srv0srv.h +++ b/storage/xtradb/include/srv0srv.h @@ -53,6 +53,8 @@ Created 10/10/1995 Heikki Tuuri #include "buf0checksum.h" #include "ut0counter.h" +#include <sql_class.h> + /* Global counters used inside InnoDB. */ struct srv_stats_t { typedef ib_counter_t<ulint, 64> ulint_ctr_64_t; @@ -145,6 +147,9 @@ struct srv_stats_t { /** Number of lock waits that have been up to max time (i.e.) lock wait timeout */ ulint_ctr_1_t n_lock_max_wait_time; + + /** Number of buffered aio requests submitted */ + ulint_ctr_64_t n_aio_submitted; }; extern const char* srv_main_thread_op_info; @@ -608,6 +613,9 @@ extern ulong srv_sync_array_size; /* print all user-level transactions deadlocks to mysqld stderr */ extern my_bool srv_print_all_deadlocks; +/* print lock wait timeout info to mysqld stderr */ +extern my_bool srv_print_lock_wait_timeout_info; + extern my_bool srv_cmp_per_index_enabled; /** Number of times secondary index lookup triggered cluster lookup */ @@ -1121,6 +1129,11 @@ struct export_var_t{ ulint innodb_sec_rec_cluster_reads; /*!< srv_sec_rec_cluster_reads */ ulint innodb_sec_rec_cluster_reads_avoided; /*!< srv_sec_rec_cluster_reads_avoided */ + + ulint innodb_buffered_aio_submitted; + + fragmentation_stats_t innodb_fragmentation_stats;/*!< Fragmentation + statistics */ }; /** Thread slot in the thread table. */ diff --git a/storage/xtradb/include/trx0rec.h b/storage/xtradb/include/trx0rec.h index ec15250ec7b..fef12548003 100644 --- a/storage/xtradb/include/trx0rec.h +++ b/storage/xtradb/include/trx0rec.h @@ -229,7 +229,7 @@ trx_undo_report_row_operation( const ulint* offsets, /*!< in: rec_get_offsets(rec) */ roll_ptr_t* roll_ptr) /*!< out: DB_ROLL_PTR to the undo log record */ - MY_ATTRIBUTE((nonnull(1,2,8), warn_unused_result)); + MY_ATTRIBUTE((nonnull(2,8), warn_unused_result)); /******************************************************************//** Copies an undo record to heap. This function can be called if we know that the undo log record exists. diff --git a/storage/xtradb/include/univ.i b/storage/xtradb/include/univ.i index 7c11df5e795..96e67f88c8a 100644 --- a/storage/xtradb/include/univ.i +++ b/storage/xtradb/include/univ.i @@ -48,7 +48,7 @@ Created 1/20/1994 Heikki Tuuri #define INNODB_VERSION_BUGFIX 36 #ifndef PERCONA_INNODB_VERSION -#define PERCONA_INNODB_VERSION 82.2 +#define PERCONA_INNODB_VERSION 83.0 #endif /* Enable UNIV_LOG_ARCHIVE in XtraDB */ diff --git a/storage/xtradb/lock/lock0wait.cc b/storage/xtradb/lock/lock0wait.cc index 5b67fbe72b7..3ccd23c0496 100644 --- a/storage/xtradb/lock/lock0wait.cc +++ b/storage/xtradb/lock/lock0wait.cc @@ -33,6 +33,20 @@ Created 25/5/2010 Sunny Bains #include "srv0start.h" #include "ha_prototypes.h" #include "lock0priv.h" +#include "lock0iter.h" + +#include <sstream> + +extern "C" +LEX_STRING* thd_query_string(MYSQL_THD thd); + +struct blocking_trx_info { + uint64_t trx_id; + uint32_t thread_id; + int64_t query_id; +}; + +static const size_t MAX_BLOCKING_TRX_IN_REPORT = 10; /*********************************************************************//** Print the contents of the lock_sys_t::waiting_threads array. */ @@ -185,6 +199,46 @@ lock_wait_table_reserve_slot( return(NULL); } +/** Print lock wait timeout info to stderr. It's supposed this function +is executed in trx's THD thread as it calls some non-thread-safe +functions to get some info from THD. +@param[in] trx requested trx +@param[in] blocking blocking info array +@param[in] blocking_count blocking info array size */ +void +print_lock_wait_timeout( + const trx_t &trx, + blocking_trx_info *blocking, + size_t blocking_count) +{ + std::ostringstream outs; + + outs << "Lock wait timeout info:\n"; + outs << "Requested thread id: " << + thd_get_thread_id(trx.mysql_thd) << + "\n"; + outs << "Requested trx id: " << trx.id << "\n"; + outs << "Requested query: " << + thd_query_string(trx.mysql_thd)->str << "\n"; + + outs << "Total blocking transactions count: " << + blocking_count << + "\n"; + + for (size_t i = 0; i < blocking_count; ++i) { + outs << "Blocking transaction number: " << (i + 1) << "\n"; + outs << "Blocking thread id: " << + blocking[i].thread_id << + "\n"; + outs << "Blocking query id: " << + blocking[i].query_id << + "\n"; + outs << "Blocking trx id: " << blocking[i].trx_id << "\n"; + } + ut_print_timestamp(stderr); + fprintf(stderr, " %s", outs.str().c_str()); +} + /***************************************************************//** Puts a user OS thread to wait for a lock to be released. If an error occurs during the wait trx->error_state associated with thr is @@ -208,6 +262,8 @@ lock_wait_suspend_thread( ulint sec; ulint ms; ulong lock_wait_timeout; + blocking_trx_info blocking[MAX_BLOCKING_TRX_IN_REPORT]; + size_t blocking_count = 0; trx = thr_get_trx(thr); @@ -379,6 +435,8 @@ lock_wait_suspend_thread( && wait_time > (double) lock_wait_timeout) { trx->error_state = DB_LOCK_WAIT_TIMEOUT; + if (srv_print_lock_wait_timeout_info) + print_lock_wait_timeout(*trx, blocking, blocking_count); MONITOR_INC(MONITOR_TIMEOUT); } diff --git a/storage/xtradb/log/log0online.cc b/storage/xtradb/log/log0online.cc index 1e373c8345f..1a30501f266 100644 --- a/storage/xtradb/log/log0online.cc +++ b/storage/xtradb/log/log0online.cc @@ -487,6 +487,9 @@ log_online_make_bitmap_name( } /*********************************************************************//** +} + +/*********************************************************************//** Check if an old file that has the name of a new bitmap file we are about to create should be overwritten. */ static diff --git a/storage/xtradb/os/os0file.cc b/storage/xtradb/os/os0file.cc index 9be9c64b759..5ed7cbbc9b3 100644 --- a/storage/xtradb/os/os0file.cc +++ b/storage/xtradb/os/os0file.cc @@ -248,6 +248,16 @@ struct os_aio_array_t{ There is one such event for each possible pending IO. The size of the array is equal to n_slots. */ + struct iocb** pending; + /* Array to buffer the not-submitted aio + requests. The array length is n_slots. + It is divided into n_segments segments. + pending requests on each segment are buffered + separately.*/ + ulint* count; + /* Array of length n_segments. Each element + counts the number of not-submitted aio + request on that segment.*/ #endif /* LINUX_NATIV_AIO */ }; @@ -4014,6 +4024,13 @@ os_aio_array_create( memset(io_event, 0x0, sizeof(*io_event) * n); array->aio_events = io_event; + array->pending = static_cast<struct iocb**>( + ut_malloc(n * sizeof(struct iocb*))); + memset(array->pending, 0x0, sizeof(struct iocb*) * n); + array->count = static_cast<ulint*>( + ut_malloc(n_segments * sizeof(ulint))); + memset(array->count, 0x0, sizeof(ulint) * n_segments); + skip_native_aio: #endif /* LINUX_NATIVE_AIO */ for (ulint i = 0; i < n; i++) { @@ -4048,6 +4065,16 @@ os_aio_array_free( if (srv_use_native_aio) { ut_free(array->aio_events); ut_free(array->aio_ctx); + +#ifdef UNIV_DEBUG + for (size_t idx = 0; idx < array->n_slots; ++idx) + ut_ad(array->pending[idx] == NULL); + for (size_t idx = 0; idx < array->n_segments; ++idx) + ut_ad(array->count[idx] == 0); +#endif + + ut_free(array->pending); + ut_free(array->count); } #endif /* LINUX_NATIVE_AIO */ @@ -4680,6 +4707,83 @@ readahead requests. */ } #endif /* _WIN32 */ +/** Submit buffered AIO requests on the given segment to the kernel +(low level function). +@param acquire_mutex specifies whether to lock array mutex +*/ +static +void +os_aio_dispatch_read_array_submit_low(bool acquire_mutex MY_ATTRIBUTE((unused))) +{ + if (!srv_use_native_aio) { + return; + } +#if defined(LINUX_NATIVE_AIO) + os_aio_array_t* array = os_aio_read_array; + ulint total_submitted = 0; + if (acquire_mutex) + os_mutex_enter(array->mutex); + /* Submit aio requests buffered on all segments. */ + for (ulint i = 0; i < array->n_segments; i++) { + const int count = array->count[i]; + int offset = 0; + while (offset != count) { + struct iocb** const iocb_array = array->pending + + i * array->n_slots / array->n_segments + + offset; + const int partial_count = count - offset; + /* io_submit() returns number of successfully queued + requests or (-errno). + It returns 0 only if the number of iocb blocks passed + is also 0. */ + const int submitted = io_submit(array->aio_ctx[i], + partial_count, iocb_array); + + /* This assertion prevents infinite loop in both + debug and release modes. */ + ut_a(submitted != 0); + + if (submitted < 0) { + /* Terminating with fatal error */ + const char* errmsg = + strerror(-submitted); + ib_logf(IB_LOG_LEVEL_FATAL, + "Trying to sumbit %d aio requests, " + "io_submit() set errno to %d: %s", + partial_count, -submitted, + errmsg ? errmsg : "<unknown>"); + } + ut_ad(submitted <= partial_count); + if (submitted < partial_count) + { + ib_logf(IB_LOG_LEVEL_WARN, + "Trying to sumbit %d aio requests, " + "io_submit() submitted only %d", + partial_count, submitted); + } + offset += submitted; + } + total_submitted += count; + } + /* Reset the aio request buffer. */ + memset(array->pending, 0x0, sizeof(struct iocb*) * array->n_slots); + memset(array->count, 0x0, sizeof(ulint) * array->n_segments); + + if (acquire_mutex) + os_mutex_exit(array->mutex); + + srv_stats.n_aio_submitted.add(total_submitted); +#endif +} + +/** Submit buffered AIO requests on the given segment to the kernel. */ +UNIV_INTERN +void +os_aio_dispatch_read_array_submit() +{ + os_aio_dispatch_read_array_submit_low(true); +} + #if defined(LINUX_NATIVE_AIO) /*******************************************************************//** Dispatch an AIO request to the kernel. @@ -4689,10 +4793,11 @@ ibool os_aio_linux_dispatch( /*==================*/ os_aio_array_t* array, /*!< in: io request array. */ - os_aio_slot_t* slot) /*!< in: an already reserved slot. */ + os_aio_slot_t* slot, /*!< in: an already reserved slot. */ + bool should_buffer) /*!< in: should buffer the request + rather than submit. */ { int ret; - ulint io_ctx_index; struct iocb* iocb; ut_ad(slot != NULL); @@ -4704,9 +4809,31 @@ os_aio_linux_dispatch( The iocb struct is directly in the slot. The io_context is one per segment. */ + ulint slots_per_segment = array->n_slots / array->n_segments; iocb = &slot->control; - io_ctx_index = (slot->pos * array->n_segments) / array->n_slots; + ulint io_ctx_index = slot->pos / slots_per_segment; + if (should_buffer) { + ut_ad(array == os_aio_read_array); + os_mutex_enter(array->mutex); + /* There are array->n_slots elements in array->pending, + which is divided into array->n_segments area of equal size. + The iocb of each segment are buffered in its corresponding area + in the pending array consecutively as they come. + array->count[i] records the number of buffered aio requests + in the ith segment.*/ + ulint& count = array->count[io_ctx_index]; + ut_ad(count != slots_per_segment); + ulint n = io_ctx_index * slots_per_segment + count; + array->pending[n] = iocb; + ++count; + if (count == slots_per_segment) { + os_aio_dispatch_read_array_submit_low(false); + } + os_mutex_exit(array->mutex); + return(TRUE); + } + /* Submit the given request. */ ret = io_submit(array->aio_ctx[io_ctx_index], 1, &iocb); #if defined(UNIV_AIO_DEBUG) @@ -4766,7 +4893,13 @@ os_aio_func( aio operation); ignored if mode is OS_AIO_SYNC */ ulint space_id, - trx_t* trx) + trx_t* trx, + bool should_buffer) + /*!< in: Whether to buffer an aio request. + AIO read ahead uses this. If you plan to + use this parameter, make sure you remember + to call os_aio_dispatch_read_array_submit() + when you're ready to commit all your requests.*/ { os_aio_array_t* array; os_aio_slot_t* slot; @@ -4878,7 +5011,8 @@ try_again: goto err_exit; #elif defined(LINUX_NATIVE_AIO) - if (!os_aio_linux_dispatch(array, slot)) { + if (!os_aio_linux_dispatch(array, slot, + should_buffer)) { goto err_exit; } #endif /* WIN_ASYNC_IO */ @@ -4900,7 +5034,7 @@ try_again: if(!ret && GetLastError() != ERROR_IO_PENDING) goto err_exit; #elif defined(LINUX_NATIVE_AIO) - if (!os_aio_linux_dispatch(array, slot)) { + if (!os_aio_linux_dispatch(array, slot, false)) { goto err_exit; } #endif /* WIN_ASYNC_IO */ diff --git a/storage/xtradb/page/page0page.cc b/storage/xtradb/page/page0page.cc index ab488c1bf77..7c525c78909 100644 --- a/storage/xtradb/page/page0page.cc +++ b/storage/xtradb/page/page0page.cc @@ -2831,7 +2831,7 @@ page_warn_strict_checksum( ulint space_id, ulint page_no) { - srv_checksum_algorithm_t curr_algo_nonstrict; + srv_checksum_algorithm_t curr_algo_nonstrict = srv_checksum_algorithm_t(); switch (curr_algo) { case SRV_CHECKSUM_ALGORITHM_STRICT_CRC32: curr_algo_nonstrict = SRV_CHECKSUM_ALGORITHM_CRC32; diff --git a/storage/xtradb/row/row0mysql.cc b/storage/xtradb/row/row0mysql.cc index ea1c34efce7..56eeb1a83f3 100644 --- a/storage/xtradb/row/row0mysql.cc +++ b/storage/xtradb/row/row0mysql.cc @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 2000, 2016, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 2000, 2017, Oracle and/or its affiliates. All Rights Reserved. Copyright (c) 2017, 2018, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under @@ -57,6 +57,7 @@ Created 9/17/2000 Heikki Tuuri #include "log0log.h" #include "btr0sea.h" #include "fil0fil.h" +#include "srv0srv.h" #include "ibuf0ibuf.h" #include "fts0fts.h" #include "fts0types.h" @@ -3912,6 +3913,16 @@ row_drop_table_for_mysql( ut_ad(!table->fts->add_wq); ut_ad(lock_trx_has_sys_table_locks(trx) == 0); + for (;;) { + bool retry = false; + if (dict_fts_index_syncing(table)) { + retry = true; + } + if (!retry) { + break; + } + DICT_BG_YIELD(trx); + } row_mysql_unlock_data_dictionary(trx); fts_optimize_remove_table(table); row_mysql_lock_data_dictionary(trx); diff --git a/storage/xtradb/srv/srv0srv.cc b/storage/xtradb/srv/srv0srv.cc index d79c29ffe97..44c35824ba6 100644 --- a/storage/xtradb/srv/srv0srv.cc +++ b/storage/xtradb/srv/srv0srv.cc @@ -429,6 +429,9 @@ UNIV_INTERN my_bool srv_print_all_deadlocks = FALSE; /* Produce a stacktrace on long semaphore wait */ UNIV_INTERN my_bool srv_use_stacktrace = FALSE; +/** Print lock wait timeout info to mysqld stderr */ +my_bool srv_print_lock_wait_timeout_info = FALSE; + /** Enable INFORMATION_SCHEMA.innodb_cmp_per_index */ UNIV_INTERN my_bool srv_cmp_per_index_enabled = FALSE; @@ -1990,6 +1993,12 @@ srv_export_innodb_status(void) export_vars.innodb_sec_rec_cluster_reads_avoided = srv_sec_rec_cluster_reads_avoided; + export_vars.innodb_buffered_aio_submitted = + srv_stats.n_aio_submitted; + + thd_get_fragmentation_stats(current_thd, + &export_vars.innodb_fragmentation_stats); + mutex_exit(&srv_innodb_monitor_mutex); } |