diff options
author | Jan Lindström <jan.lindstrom@mariadb.com> | 2017-03-14 12:56:01 +0200 |
---|---|---|
committer | Jan Lindström <jan.lindstrom@mariadb.com> | 2017-03-14 16:23:10 +0200 |
commit | 50eb40a2a8aa3af6cc271f6028f4d6d74301d030 (patch) | |
tree | dd194385ba0e8fc0302d17c7a682d3ae3d65b19f /storage/xtradb/fil/fil0fil.cc | |
parent | a2f34809e55c492af9a23d43840133f01528df7b (diff) | |
download | mariadb-git-50eb40a2a8aa3af6cc271f6028f4d6d74301d030.tar.gz |
MDEV-11738: Mariadb uses 100% of several of my 8 cpus doing nothing
MDEV-11581: Mariadb starts InnoDB encryption threads
when key has not changed or data scrubbing turned off
Background: Key rotation is based on background threads
(innodb-encryption-threads) periodically going through
all tablespaces on fil_system. For each tablespace
current used key version is compared to max key age
(innodb-encryption-rotate-key-age). This process
naturally takes CPU. Similarly, in same time need for
scrubbing is investigated. Currently, key rotation
is fully supported on Amazon AWS key management plugin
only but InnoDB does not have knowledge what key
management plugin is used.
This patch re-purposes innodb-encryption-rotate-key-age=0
to disable key rotation and background data scrubbing.
All new tables are added to special list for key rotation
and key rotation is based on sending a event to
background encryption threads instead of using periodic
checking (i.e. timeout).
fil0fil.cc: Added functions fil_space_acquire_low()
to acquire a tablespace when it could be dropped concurrently.
This function is used from fil_space_acquire() or
fil_space_acquire_silent() that will not print
any messages if we try to acquire space that does not exist.
fil_space_release() to release a acquired tablespace.
fil_space_next() to iterate tablespaces in fil_system
using fil_space_acquire() and fil_space_release().
Similarly, fil_space_keyrotation_next() to iterate new
list fil_system->rotation_list where new tables.
are added if key rotation is disabled.
Removed unnecessary functions fil_get_first_space_safe()
fil_get_next_space_safe()
fil_node_open_file(): After page 0 is read read also
crypt_info if it is not yet read.
btr_scrub_lock_dict_func()
buf_page_check_corrupt()
buf_page_encrypt_before_write()
buf_merge_or_delete_for_page()
lock_print_info_all_transactions()
row_fts_psort_info_init()
row_truncate_table_for_mysql()
row_drop_table_for_mysql()
Use fil_space_acquire()/release() to access fil_space_t.
buf_page_decrypt_after_read():
Use fil_space_get_crypt_data() because at this point
we might not yet have read page 0.
fil0crypt.cc/fil0fil.h: Lot of changes. Pass fil_space_t* directly
to functions needing it and store fil_space_t* to rotation state.
Use fil_space_acquire()/release() when iterating tablespaces
and removed unnecessary is_closing from fil_crypt_t. Use
fil_space_t::is_stopping() to detect when access to
tablespace should be stopped. Removed unnecessary
fil_space_get_crypt_data().
fil_space_create(): Inform key rotation that there could
be something to do if key rotation is disabled and new
table with encryption enabled is created.
Remove unnecessary functions fil_get_first_space_safe()
and fil_get_next_space_safe(). fil_space_acquire()
and fil_space_release() are used instead. Moved
fil_space_get_crypt_data() and fil_space_set_crypt_data()
to fil0crypt.cc.
fsp_header_init(): Acquire fil_space_t*, write crypt_data
and release space.
check_table_options()
Renamed FIL_SPACE_ENCRYPTION_* TO FIL_ENCRYPTION_*
i_s.cc: Added ROTATING_OR_FLUSHING field to
information_schema.innodb_tablespace_encryption
to show current status of key rotation.
Diffstat (limited to 'storage/xtradb/fil/fil0fil.cc')
-rw-r--r-- | storage/xtradb/fil/fil0fil.cc | 437 |
1 files changed, 250 insertions, 187 deletions
diff --git a/storage/xtradb/fil/fil0fil.cc b/storage/xtradb/fil/fil0fil.cc index ac1a7d3bbcd..36900839160 100644 --- a/storage/xtradb/fil/fil0fil.cc +++ b/storage/xtradb/fil/fil0fil.cc @@ -157,7 +157,11 @@ UNIV_INTERN mysql_pfs_key_t fil_space_latch_key; /** The tablespace memory cache. This variable is NULL before the module is initialized. */ -fil_system_t* fil_system = NULL; +UNIV_INTERN fil_system_t* fil_system = NULL; + +/** At this age or older a space/page will be rotated */ +UNIV_INTERN extern uint srv_fil_crypt_rotate_key_age; +UNIV_INTERN extern ib_mutex_t fil_crypt_threads_mutex; /** Determine if (i) is a user tablespace id or not. */ # define fil_is_user_tablespace_id(i) ((i) > srv_undo_tablespaces_open) @@ -169,7 +173,7 @@ fil_system_t* fil_system = NULL; && srv_unix_file_flush_method == SRV_UNIX_O_DIRECT_NO_FSYNC)\ || ((s)->purpose == FIL_LOG \ && srv_unix_file_flush_method == SRV_UNIX_ALL_O_DIRECT)) - + #else /* __WIN__ */ # define fil_buffering_disabled(s) (0) #endif /* __WIN__ */ @@ -618,6 +622,7 @@ fil_node_open_file( node->handle = os_file_create_simple_no_error_handling( innodb_file_data_key, node->name, OS_FILE_OPEN, OS_FILE_READ_ONLY, &success, 0); + if (!success) { /* The following call prints an error message */ os_file_get_last_error(true); @@ -669,6 +674,16 @@ fil_node_open_file( const ulint space_id = fsp_header_get_space_id(page); ulint flags = fsp_header_get_flags(page); + /* Try to read crypt_data from page 0 if it is not yet + read. */ + if (!node->space->page_0_crypt_read) { + ulint offset = fsp_header_get_crypt_offset( + fsp_flags_get_zip_size(flags)); + ut_ad(node->space->crypt_data == NULL); + node->space->crypt_data = fil_space_read_crypt_data(space_id, page, offset); + node->space->page_0_crypt_read = true; + } + ut_free(buf2); os_file_close(node->handle); @@ -1491,17 +1506,24 @@ fil_space_contains_node( /*******************************************************************//** Creates a space memory object and puts it to the 'fil system' hash table. If there is an error, prints an error message to the .err log. +@param[in] name Space name +@param[in] id Space id +@param[in] flags Tablespace flags +@param[in] purpose FIL_TABLESPACE or FIL_LOG if log +@param[in] crypt_data Encryption information +@param[in] create_table True if this is create table +@param[in] mode Encryption mode @return TRUE if success */ UNIV_INTERN -ibool +bool fil_space_create( -/*=============*/ - const char* name, /*!< in: space name */ - ulint id, /*!< in: space id */ - ulint flags, /*!< in: tablespace flags */ - ulint purpose,/*!< in: FIL_TABLESPACE, or FIL_LOG if log */ - fil_space_crypt_t* crypt_data, /*!< in: crypt data */ - bool create_table) /*!< in: true if create table */ + const char* name, + ulint id, + ulint flags, + ulint purpose, + fil_space_crypt_t* crypt_data, + bool create_table, + fil_encryption_t mode) { fil_space_t* space; @@ -1525,7 +1547,7 @@ fil_space_create( mutex_exit(&fil_system->mutex); - return(FALSE); + return(false); } ib_logf(IB_LOG_LEVEL_WARN, @@ -1552,7 +1574,7 @@ fil_space_create( mutex_exit(&fil_system->mutex); - return(FALSE); + return(false); } space = static_cast<fil_space_t*>(mem_zalloc(sizeof(*space))); @@ -1583,17 +1605,6 @@ fil_space_create( space->flags = flags; space->magic_n = FIL_SPACE_MAGIC_N; - space->printed_compression_failure = false; - - rw_lock_create(fil_space_latch_key, &space->latch, SYNC_FSP); - - HASH_INSERT(fil_space_t, hash, fil_system->spaces, id, space); - - HASH_INSERT(fil_space_t, name_hash, fil_system->name_hash, - ut_fold_string(name), space); - space->is_in_unflushed_spaces = false; - - space->is_corrupt = FALSE; space->crypt_data = crypt_data; /* In create table we write page 0 so we have already @@ -1612,11 +1623,33 @@ fil_space_create( space->crypt_data ? space->crypt_data->encryption : 0); #endif + rw_lock_create(fil_space_latch_key, &space->latch, SYNC_FSP); + + HASH_INSERT(fil_space_t, hash, fil_system->spaces, id, space); + + HASH_INSERT(fil_space_t, name_hash, fil_system->name_hash, + ut_fold_string(name), space); + UT_LIST_ADD_LAST(space_list, fil_system->space_list, space); - mutex_exit(&fil_system->mutex); + /* Inform key rotation that there could be something + to do */ + if (purpose == FIL_TABLESPACE && !srv_fil_crypt_rotate_key_age && fil_crypt_threads_event && + (mode == FIL_ENCRYPTION_ON || mode == FIL_ENCRYPTION_OFF || + srv_encrypt_tables)) { + /* Key rotation is not enabled, need to inform background + encryption threads. */ + UT_LIST_ADD_LAST(rotation_list, fil_system->rotation_list, space); + mutex_exit(&fil_system->mutex); + space->is_in_rotation_list = true; + mutex_enter(&fil_crypt_threads_mutex); + os_event_set(fil_crypt_threads_event); + mutex_exit(&fil_crypt_threads_mutex); + } else { + mutex_exit(&fil_system->mutex); + } - return(TRUE); + return(true); } /*******************************************************************//** @@ -1728,6 +1761,11 @@ fil_space_free( space); } + if (space->is_in_rotation_list) { + space->is_in_rotation_list = false; + UT_LIST_REMOVE(rotation_list, fil_system->rotation_list, space); + } + UT_LIST_REMOVE(space_list, fil_system->space_list, space); ut_a(space->magic_n == FIL_SPACE_MAGIC_N); @@ -2390,7 +2428,6 @@ fil_read_first_page( const char* check_msg = NULL; fil_space_crypt_t* cdata; - buf = static_cast<byte*>(ut_malloc(2 * UNIV_PAGE_SIZE)); /* Align the memory for a possible read from a raw device */ @@ -2430,7 +2467,7 @@ fil_read_first_page( ulint space = fsp_header_get_space_id(page); ulint offset = fsp_header_get_crypt_offset( - fsp_flags_get_zip_size(*flags), NULL); + fsp_flags_get_zip_size(*flags)); cdata = fil_space_read_crypt_data(space, page, offset); @@ -2809,7 +2846,7 @@ fil_op_log_parse_or_replay( space_id, name, path, flags, DICT_TF2_USE_TABLESPACE, FIL_IBD_FILE_INITIAL_SIZE, - FIL_SPACE_ENCRYPTION_DEFAULT, + FIL_ENCRYPTION_DEFAULT, FIL_DEFAULT_ENCRYPTION_KEY) != DB_SUCCESS) { ut_error; } @@ -2933,16 +2970,27 @@ fil_check_pending_operations( *space = 0; - /* Wait for crypt threads to stop accessing space */ - fil_space_crypt_close_tablespace(id); - mutex_enter(&fil_system->mutex); fil_space_t* sp = fil_space_get_by_id(id); + if (sp) { sp->stop_new_ops = TRUE; + /* space could be freed by other threads as soon + as n_pending_ops reaches 0, thus increment pending + ops here. */ + sp->n_pending_ops++; } + mutex_exit(&fil_system->mutex); + /* Wait for crypt threads to stop accessing space */ + if (sp) { + fil_space_crypt_close_tablespace(sp); + /* We have "acquired" this space and must + free it now as below we compare n_pending_ops. */ + fil_space_release(sp); + } + /* Check for pending change buffer merges. */ do { @@ -3985,13 +4033,13 @@ fil_create_new_single_table_tablespace( /* Create crypt data if the tablespace is either encrypted or user has requested it to remain unencrypted. */ - if (mode == FIL_SPACE_ENCRYPTION_ON || mode == FIL_SPACE_ENCRYPTION_OFF || + if (mode == FIL_ENCRYPTION_ON || mode == FIL_ENCRYPTION_OFF || srv_encrypt_tables) { crypt_data = fil_space_create_crypt_data(mode, key_id); } success = fil_space_create(tablename, space_id, flags, FIL_TABLESPACE, - crypt_data, true); + crypt_data, true, mode); if (!success || !fil_node_create(path, size, space_id, FALSE)) { err = DB_ERROR; @@ -6699,7 +6747,8 @@ fil_iterate( page_type == FIL_PAGE_PAGE_COMPRESSED); /* If tablespace is encrypted, we need to decrypt - the page. */ + the page. Note that tablespaces are not in + fil_system during import. */ if (encrypted) { decrypted = fil_space_decrypt( iter.crypt_data, @@ -6952,8 +7001,11 @@ fil_tablespace_iterate( iter.n_io_buffers = n_io_buffers; iter.page_size = callback.get_page_size(); + /* In MariaDB/MySQL 5.6 tablespace does not exist + during import, therefore we can't use space directly + here. */ ulint crypt_data_offset = fsp_header_get_crypt_offset( - callback.get_zip_size(), 0); + callback.get_zip_size()); /* read (optional) crypt data */ iter.crypt_data = fil_space_read_crypt_data( @@ -6995,7 +7047,7 @@ fil_tablespace_iterate( mem_free(io_buffer); - if (iter.crypt_data != NULL) { + if (crypt_io_buffer != NULL) { mem_free(crypt_io_buffer); iter.crypt_io_buffer = NULL; fil_space_destroy_crypt_data(&iter.crypt_data); @@ -7254,7 +7306,7 @@ fil_space_set_corrupt( space = fil_space_get_by_id(space_id); if (space) { - space->is_corrupt = TRUE; + space->is_corrupt = true; } mutex_exit(&fil_system->mutex); @@ -7290,36 +7342,6 @@ fil_get_first_space() } /****************************************************************** -Get id of first tablespace that has node or ULINT_UNDEFINED if none */ -UNIV_INTERN -ulint -fil_get_first_space_safe() -/*======================*/ -{ - ulint out_id = ULINT_UNDEFINED; - fil_space_t* space; - - mutex_enter(&fil_system->mutex); - - space = UT_LIST_GET_FIRST(fil_system->space_list); - if (space != NULL) { - do - { - if (!space->stop_new_ops && UT_LIST_GET_LEN(space->chain) > 0) { - out_id = space->id; - break; - } - - space = UT_LIST_GET_NEXT(space_list, space); - } while (space != NULL); - } - - mutex_exit(&fil_system->mutex); - - return out_id; -} - -/****************************************************************** Get id of next tablespace or ULINT_UNDEFINED if none */ UNIV_INTERN ulint @@ -7360,165 +7382,206 @@ fil_get_next_space( return out_id; } -/****************************************************************** -Get id of next tablespace that has node or ULINT_UNDEFINED if none */ -UNIV_INTERN -ulint -fil_get_next_space_safe( -/*====================*/ - ulint id) /*!< in: previous space id */ +/** Acquire a tablespace when it could be dropped concurrently. +Used by background threads that do not necessarily hold proper locks +for concurrency control. +@param[in] id tablespace ID +@param[in] silent whether to silently ignore missing tablespaces +@return the tablespace, or NULL if missing or being deleted */ +inline +fil_space_t* +fil_space_acquire_low( + ulint id, + bool silent) { - bool found; - fil_space_t* space; - ulint out_id = ULINT_UNDEFINED; + fil_space_t* space; mutex_enter(&fil_system->mutex); space = fil_space_get_by_id(id); - if (space == NULL) { - /* we didn't find it...search for space with space->id > id */ - found = false; - space = UT_LIST_GET_FIRST(fil_system->space_list); - } else { - /* we found it, take next available space */ - found = true; - } - - while ((space = UT_LIST_GET_NEXT(space_list, space)) != NULL) { - - if (!found && space->id <= id) - continue; - if (!space->stop_new_ops) { - /* inc reference to prevent drop */ - out_id = space->id; - break; + if (space == NULL) { + if (!silent) { + ib_logf(IB_LOG_LEVEL_WARN, "Trying to access missing" + " tablespace " ULINTPF ".", id); + ut_error; } + } else if (space->stop_new_ops) { + space = NULL; + } else { + space->n_pending_ops++; } mutex_exit(&fil_system->mutex); - return out_id; + return(space); } -/****************************************************************** -Get crypt data for a tablespace */ -UNIV_INTERN -fil_space_crypt_t* -fil_space_get_crypt_data( -/*=====================*/ - ulint id) /*!< in: space id */ +/** Acquire a tablespace when it could be dropped concurrently. +Used by background threads that do not necessarily hold proper locks +for concurrency control. +@param[in] id tablespace ID +@return the tablespace, or NULL if missing or being deleted */ +fil_space_t* +fil_space_acquire( + ulint id) { - fil_space_t* space; - fil_space_crypt_t* crypt_data = NULL; + return(fil_space_acquire_low(id, false)); +} - ut_ad(fil_system); +/** Acquire a tablespace that may not exist. +Used by background threads that do not necessarily hold proper locks +for concurrency control. +@param[in] id tablespace ID +@return the tablespace, or NULL if missing or being deleted */ +fil_space_t* +fil_space_acquire_silent( + ulint id) +{ + return(fil_space_acquire_low(id, true)); +} +/** Release a tablespace acquired with fil_space_acquire(). +@param[in,out] space tablespace to release */ +void +fil_space_release( + fil_space_t* space) +{ mutex_enter(&fil_system->mutex); + ut_ad(space->magic_n == FIL_SPACE_MAGIC_N); + ut_ad(space->n_pending_ops > 0); + space->n_pending_ops--; + mutex_exit(&fil_system->mutex); +} - space = fil_space_get_by_id(id); +/** Return the next fil_space_t. +Once started, the caller must keep calling this until it returns NULL. +fil_space_acquire() and fil_space_release() are invoked here which +blocks a concurrent operation from dropping the tablespace. +@param[in] prev_space Pointer to the previous fil_space_t. +If NULL, use the first fil_space_t on fil_system->space_list. +@return pointer to the next fil_space_t. +@retval NULL if this was the last*/ +fil_space_t* +fil_space_next( + fil_space_t* prev_space) +{ + fil_space_t* space=prev_space; - mutex_exit(&fil_system->mutex); + mutex_enter(&fil_system->mutex); - if (space != NULL) { - /* If we have not yet read the page0 - of this tablespace we will do it now. */ - if (!space->crypt_data && !space->page_0_crypt_read) { - ulint space_id = space->id; - fil_node_t* node; - - ut_a(space->crypt_data == NULL); - node = UT_LIST_GET_FIRST(space->chain); - - byte *buf = static_cast<byte*>(ut_malloc(2 * UNIV_PAGE_SIZE)); - byte *page = static_cast<byte*>(ut_align(buf, UNIV_PAGE_SIZE)); - fil_read(true, space_id, 0, 0, 0, UNIV_PAGE_SIZE, page, - NULL, NULL); - ulint offset = fsp_header_get_crypt_offset( - fsp_header_get_zip_size(page), NULL); - space->crypt_data = fil_space_read_crypt_data(space_id, page, offset); - ut_free(buf); + if (prev_space == NULL) { + space = UT_LIST_GET_FIRST(fil_system->space_list); -#ifdef UNIV_DEBUG - ib_logf(IB_LOG_LEVEL_INFO, - "Read page 0 from tablespace for space %lu name %s key_id %u encryption %d handle %d.", - space_id, - space->name, - space->crypt_data ? space->crypt_data->key_id : 0, - space->crypt_data ? space->crypt_data->encryption : 0, - node->handle); -#endif + /* We can trust that space is not NULL because at least the + system tablespace is always present and loaded first. */ + space->n_pending_ops++; + } else { + ut_ad(space->n_pending_ops > 0); - ut_a(space->id == space_id); + /* Move on to the next fil_space_t */ + space->n_pending_ops--; + space = UT_LIST_GET_NEXT(space_list, space); - space->page_0_crypt_read = true; + /* Skip spaces that are being created by + fil_ibd_create(), or dropped, or !tablespace. */ + while (space != NULL + && (UT_LIST_GET_LEN(space->chain) == 0 + || space->stop_new_ops + || space->purpose != FIL_TABLESPACE)) { + space = UT_LIST_GET_NEXT(space_list, space); } - crypt_data = space->crypt_data; - - if (!space->page_0_crypt_read) { - ib_logf(IB_LOG_LEVEL_WARN, - "Space %lu name %s contains encryption %d information for key_id %u but page0 is not read.", - space->id, - space->name, - space->crypt_data ? space->crypt_data->encryption : 0, - space->crypt_data ? space->crypt_data->key_id : 0); + if (space != NULL) { + space->n_pending_ops++; } } - return(crypt_data); + mutex_exit(&fil_system->mutex); + + return(space); } -/****************************************************************** -Get crypt data for a tablespace */ -UNIV_INTERN -fil_space_crypt_t* -fil_space_set_crypt_data( -/*=====================*/ - ulint id, /*!< in: space id */ - fil_space_crypt_t* crypt_data) /*!< in: crypt data */ +/** +Remove space from key rotation list if there are no more +pending operations. +@param[in] space Tablespace */ +static +void +fil_space_remove_from_keyrotation( + fil_space_t* space) { - fil_space_t* space; - fil_space_crypt_t* free_crypt_data = NULL; - fil_space_crypt_t* ret_crypt_data = NULL; + ut_ad(mutex_own(&fil_system->mutex)); + ut_ad(space); - ut_ad(fil_system); + if (space->n_pending_ops == 0) { + space->is_in_rotation_list = false; + UT_LIST_REMOVE(rotation_list, fil_system->rotation_list, space); + } +} - mutex_enter(&fil_system->mutex); - space = fil_space_get_by_id(id); +/** Return the next fil_space_t from key rotation list. +Once started, the caller must keep calling this until it returns NULL. +fil_space_acquire() and fil_space_release() are invoked here which +blocks a concurrent operation from dropping the tablespace. +@param[in] prev_space Pointer to the previous fil_space_t. +If NULL, use the first fil_space_t on fil_system->space_list. +@return pointer to the next fil_space_t. +@retval NULL if this was the last*/ +fil_space_t* +fil_space_keyrotate_next( + fil_space_t* prev_space) +{ + fil_space_t* space = prev_space; + fil_space_t* old = NULL; - if (space != NULL) { - if (space->crypt_data != NULL) { - /* Here we need to release fil_system mutex to - avoid mutex deadlock assertion. Here we would - take mutexes in order fil_system, crypt_data and - in fil_crypt_start_encrypting_space we would - take them in order crypt_data, fil_system - at fil_space_get_flags -> fil_space_get_space */ - mutex_exit(&fil_system->mutex); - fil_space_merge_crypt_data(space->crypt_data, - crypt_data); - ret_crypt_data = space->crypt_data; - free_crypt_data = crypt_data; - } else { - space->crypt_data = crypt_data; - ret_crypt_data = space->crypt_data; - mutex_exit(&fil_system->mutex); + mutex_enter(&fil_system->mutex); + + if (UT_LIST_GET_LEN(fil_system->rotation_list) == 0) { + if (space) { + ut_ad(space->n_pending_ops > 0); + space->n_pending_ops--; + fil_space_remove_from_keyrotation(space); } - } else { - /* there is a small risk that tablespace has been deleted */ - free_crypt_data = crypt_data; mutex_exit(&fil_system->mutex); + return(NULL); + } + + if (prev_space == NULL) { + space = UT_LIST_GET_FIRST(fil_system->rotation_list); + + /* We can trust that space is not NULL because we + checked list length above */ + } else { + ut_ad(space->n_pending_ops > 0); + + /* Move on to the next fil_space_t */ + space->n_pending_ops--; + + old = space; + space = UT_LIST_GET_NEXT(rotation_list, space); + + fil_space_remove_from_keyrotation(old); } - if (free_crypt_data != NULL) { - /* there was already crypt data present and the new crypt - * data provided as argument to this function has been merged - * into that => free new crypt data - */ - fil_space_destroy_crypt_data(&free_crypt_data); + /* Skip spaces that are being created by fil_ibd_create(), + or dropped. Note that rotation_list contains only + space->purpose == FIL_TABLESPACE. */ + while (space != NULL + && (UT_LIST_GET_LEN(space->chain) == 0 + || space->stop_new_ops)) { + + old = space; + space = UT_LIST_GET_NEXT(rotation_list, space); + fil_space_remove_from_keyrotation(old); } - return ret_crypt_data; + if (space != NULL) { + space->n_pending_ops++; + } + + mutex_exit(&fil_system->mutex); + + return(space); } |