diff options
author | Sergei Golubchik <serg@mariadb.org> | 2017-03-30 12:48:42 +0200 |
---|---|---|
committer | Sergei Golubchik <serg@mariadb.org> | 2017-03-30 12:48:42 +0200 |
commit | da4d71d10d23c1ac2d10b72baee14991ccb7a146 (patch) | |
tree | 7cdf3a8c8e72ca7c1c8105427c04123f025bd870 /storage/innobase | |
parent | 9ec85009985d644ce7ae797bc3572d0ad0f69bb0 (diff) | |
parent | a00517ac9707ffd51c092f5af5d198c5ee789bb4 (diff) | |
download | mariadb-git-da4d71d10d23c1ac2d10b72baee14991ccb7a146.tar.gz |
Merge branch '10.1' into 10.2
Diffstat (limited to 'storage/innobase')
50 files changed, 2006 insertions, 2180 deletions
diff --git a/storage/innobase/btr/btr0cur.cc b/storage/innobase/btr/btr0cur.cc index 7d93c495aa0..c15784a97a4 100644 --- a/storage/innobase/btr/btr0cur.cc +++ b/storage/innobase/btr/btr0cur.cc @@ -3519,8 +3519,6 @@ btr_cur_update_alloc_zip_func( const page_t* page = page_cur_get_page(cursor); ut_ad(page_zip == page_cur_get_page_zip(cursor)); - - ut_ad(page_zip); ut_ad(!dict_index_is_ibuf(index)); ut_ad(rec_offs_validate(page_cur_get_rec(cursor), index, offsets)); @@ -6267,7 +6265,6 @@ btr_cur_disown_inherited_fields( ut_ad(rec_offs_validate(rec, index, offsets)); ut_ad(!rec_offs_comp(offsets) || !rec_get_node_ptr_flag(rec)); ut_ad(rec_offs_any_extern(offsets)); - ut_ad(mtr); for (i = 0; i < rec_offs_n_fields(offsets); i++) { if (rec_offs_nth_extern(offsets, i) @@ -6329,9 +6326,6 @@ btr_push_update_extern_fields( ulint n; const upd_field_t* uf; - ut_ad(tuple); - ut_ad(update); - uf = update->fields; n = upd_get_n_fields(update); @@ -6625,7 +6619,6 @@ btr_store_big_rec_extern_fields( ut_ad(rec_offs_validate(rec, index, offsets)); ut_ad(rec_offs_any_extern(offsets)); - ut_ad(btr_mtr); ut_ad(mtr_memo_contains_flagged(btr_mtr, dict_index_get_lock(index), MTR_MEMO_X_LOCK | MTR_MEMO_SX_LOCK)); ut_ad(mtr_is_block_fix( diff --git a/storage/innobase/btr/btr0scrub.cc b/storage/innobase/btr/btr0scrub.cc index c30227ef085..4cb46dd415f 100644 --- a/storage/innobase/btr/btr0scrub.cc +++ b/storage/innobase/btr/btr0scrub.cc @@ -118,21 +118,33 @@ log_scrub_failure( Lock dict mutexes */ static bool -btr_scrub_lock_dict_func(ulint space, bool lock_to_close_table, +btr_scrub_lock_dict_func(ulint space_id, bool lock_to_close_table, const char * file, uint line) { time_t start = time(0); time_t last = start; + /* FIXME: this is not the proper way of doing things. The + dict_sys->mutex should not be held by any thread for longer + than a few microseconds. It must not be held during I/O, + for example. So, what is the purpose for this busy-waiting? + This function should be rewritten as part of MDEV-8139: + Fix scrubbing tests. */ + while (mutex_enter_nowait(&(dict_sys->mutex))) { /* if we lock to close a table, we wait forever * if we don't lock to close a table, we check if space * is closing, and then instead give up */ if (lock_to_close_table == false) { - if (fil_crypt_is_closing(space)) { + fil_space_t* space = fil_space_acquire(space_id); + if (!space || space->stop_new_ops) { + if (space) { + fil_space_release(space); + } return false; } + fil_space_release(space); } os_thread_sleep(250000); @@ -141,9 +153,10 @@ btr_scrub_lock_dict_func(ulint space, bool lock_to_close_table, if (now >= last + 30) { fprintf(stderr, "WARNING: %s:%u waited %lu seconds for" - " dict_sys lock, space: %lu" + " dict_sys lock, space: " ULINTPF " lock_to_close_table: %u\n", - file, line, (unsigned long)(now - start), space, + file, line, (unsigned long)(now - start), + space_id, lock_to_close_table); last = now; @@ -189,16 +202,24 @@ void btr_scrub_table_close_for_thread( btr_scrub_t *scrub_data) { - if (scrub_data->current_table == NULL) + if (scrub_data->current_table == NULL) { return; + } - bool lock_for_close = true; - btr_scrub_lock_dict(scrub_data->space, lock_for_close); + fil_space_t* space = fil_space_acquire(scrub_data->space); - /* perform the actual closing */ - btr_scrub_table_close(scrub_data->current_table); + /* If tablespace is not marked as stopping perform + the actual close. */ + if (space && !space->is_stopping()) { + mutex_enter(&dict_sys->mutex); + /* perform the actual closing */ + btr_scrub_table_close(scrub_data->current_table); + mutex_exit(&dict_sys->mutex); + } - btr_scrub_unlock_dict(); + if (space) { + fil_space_release(space); + } scrub_data->current_table = NULL; scrub_data->current_index = NULL; diff --git a/storage/innobase/buf/buf0buf.cc b/storage/innobase/buf/buf0buf.cc index 93e0bebfe4e..11fe77d75de 100644 --- a/storage/innobase/buf/buf0buf.cc +++ b/storage/innobase/buf/buf0buf.cc @@ -403,22 +403,17 @@ buf_pool_register_chunk( chunk->blocks->frame, chunk)); } +/** Decrypt a page. +@param[in,out] bpage Page control block +@return whether the operation was successful */ +static +bool +buf_page_decrypt_after_read(buf_page_t* bpage); + /* prototypes for new functions added to ha_innodb.cc */ trx_t* innobase_get_trx(); /********************************************************************//** -Check if page is maybe compressed, encrypted or both when we encounter -corrupted page. Note that we can't be 100% sure if page is corrupted -or decrypt/decompress just failed. -*/ -static -ibool -buf_page_check_corrupt( -/*===================*/ - buf_page_t* bpage); /*!< in/out: buffer page read from - disk */ - -/********************************************************************//** Gets the smallest oldest_modification lsn for any page in the pool. Returns zero if all modified pages have been flushed to disk. @return oldest modification in pool, zero if none */ @@ -611,7 +606,6 @@ buf_page_is_zeroes( @param[in] curr_algo current checksum algorithm @param[in] use_legacy_big_endian use legacy big endian algorithm @return true if the page is in crc32 checksum format. */ -UNIV_INLINE bool buf_page_is_checksum_valid_crc32( const byte* read_buf, @@ -670,7 +664,6 @@ invalid: @param[in] log_file file pointer to log_file @param[in] curr_algo current checksum algorithm @return true if the page is in innodb checksum format. */ -UNIV_INLINE bool buf_page_is_checksum_valid_innodb( const byte* read_buf, @@ -767,7 +760,6 @@ buf_page_is_checksum_valid_innodb( @param[in] log_file file pointer to log_file @param[in] curr_algo current checksum algorithm @return true if the page is in none checksum format. */ -UNIV_INLINE bool buf_page_is_checksum_valid_none( const byte* read_buf, @@ -792,7 +784,7 @@ buf_page_is_checksum_valid_none( << " lsn " << mach_read_from_4(read_buf + FIL_PAGE_LSN)); } -#endif +#endif /* DBUG_OFF */ #ifdef UNIV_INNOCHECKSUM if (is_log_enabled @@ -806,7 +798,6 @@ buf_page_is_checksum_valid_none( } #endif /* UNIV_INNOCHECKSUM */ - return(checksum_field1 == checksum_field2 && checksum_field1 == BUF_NO_CHECKSUM_MAGIC); } @@ -816,18 +807,18 @@ buf_page_is_checksum_valid_none( the LSN @param[in] read_buf database page @param[in] page_size page size -@param[in] skip_checksum if true, skip checksum +@param[in] space tablespace @param[in] page_no page number of given read_buf @param[in] strict_check true if strict-check option is enabled @param[in] is_log_enabled true if log option is enabled @param[in] log_file file pointer to log_file @return TRUE if corrupted */ -ibool +bool buf_page_is_corrupted( bool check_lsn, const byte* read_buf, const page_size_t& page_size, - bool skip_checksum + const fil_space_t* space #ifdef UNIV_INNOCHECKSUM ,uintmax_t page_no, bool strict_check, @@ -838,40 +829,35 @@ buf_page_is_corrupted( { ulint checksum_field1; ulint checksum_field2; - bool no_checksum = false; #ifndef UNIV_INNOCHECKSUM - ulint space_id = mach_read_from_4( - read_buf + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID); - ulint page_type = mach_read_from_4( - read_buf + FIL_PAGE_TYPE); - no_checksum = (page_type == FIL_PAGE_PAGE_COMPRESSED || - page_type == FIL_PAGE_PAGE_COMPRESSED_ENCRYPTED); - fil_space_crypt_t* crypt_data = fil_space_get_crypt_data(space_id); + DBUG_EXECUTE_IF("buf_page_import_corrupt_failure", return(true); ); - /* Page is encrypted if encryption information is found from - tablespace and page contains used key_version. This is true - also for pages first compressed and then encrypted. */ - if (crypt_data && - crypt_data->type != CRYPT_SCHEME_UNENCRYPTED && - fil_page_is_encrypted(read_buf)) { - no_checksum = true; - } + ulint page_type = mach_read_from_2( + read_buf + FIL_PAGE_TYPE); - /* Return early if there is no checksum or END_LSN */ - if (no_checksum) { - return (FALSE); + /* We can trust page type if page compression is set on tablespace + flags because page compression flag means file must have been + created with 10.1 (later than 5.5 code base). In 10.1 page + compressed tables do not contain post compression checksum and + FIL_PAGE_END_LSN_OLD_CHKSUM field stored. Note that space can + be null if we are in fil_check_first_page() and first page + is not compressed or encrypted. Page checksum is verified + after decompression (i.e. normally pages are already + decompressed at this stage). */ + if ((page_type == FIL_PAGE_PAGE_COMPRESSED || + page_type == FIL_PAGE_PAGE_COMPRESSED_ENCRYPTED) + && space && FSP_FLAGS_HAS_PAGE_COMPRESSION(space->flags)) { + return(false); } #else if (mach_read_from_4(read_buf+FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION) != 0 || mach_read_from_2(read_buf+FIL_PAGE_TYPE) == FIL_PAGE_PAGE_COMPRESSED_ENCRYPTED) { - no_checksum= true; + return(false); } - #endif - DBUG_EXECUTE_IF("buf_page_import_corrupt_failure", return(TRUE); ); - if (!no_checksum && !page_size.is_compressed() + if (!page_size.is_compressed() && memcmp(read_buf + FIL_PAGE_LSN + 4, read_buf + page_size.logical() - FIL_PAGE_END_LSN_OLD_CHKSUM + 4, 4)) { @@ -886,7 +872,7 @@ buf_page_is_corrupted( << mach_read_from_4(read_buf + UNIV_PAGE_SIZE - FIL_PAGE_END_LSN_OLD_CHKSUM + 4) << " do not match"; #endif - return(TRUE); + return(true); } #ifndef UNIV_INNOCHECKSUM @@ -923,9 +909,8 @@ buf_page_is_corrupted( /* Check whether the checksum fields have correct values */ - if (srv_checksum_algorithm == SRV_CHECKSUM_ALGORITHM_NONE - || skip_checksum) { - return(FALSE); + if (srv_checksum_algorithm == SRV_CHECKSUM_ALGORITHM_NONE) { + return(false); } if (page_size.is_compressed()) { @@ -984,7 +969,7 @@ buf_page_is_corrupted( " is empty and uncorrupted\n", page_no); } - return(FALSE); + return(false); } #else return(i < page_size.logical()); @@ -1013,7 +998,7 @@ buf_page_is_corrupted( page_no, is_log_enabled, log_file, curr_algo, #endif /* UNIV_INNOCHECKSUM */ false)) { - return(FALSE); + return(false); } if (buf_page_is_checksum_valid_none(read_buf, @@ -1048,7 +1033,7 @@ buf_page_is_corrupted( checksum_field1); } #endif /* UNIV_INNOCHECKSUM */ - return(FALSE); + return(false); } /* We need to check whether the stored checksum matches legacy @@ -1064,7 +1049,7 @@ buf_page_is_corrupted( #endif /* UNIV_INNOCHECKSUM */ true)) { - return(FALSE); + return(false); } legacy_checksum_checked = true; } @@ -1083,7 +1068,7 @@ buf_page_is_corrupted( page_id); } #endif /* UNIV_INNOCHECKSUM */ - return(FALSE); + return(false); } /* If legacy checksum is not checked, do it now. */ @@ -1095,7 +1080,7 @@ buf_page_is_corrupted( true)) { legacy_big_endian_checksum = true; - return(FALSE); + return(false); } #ifdef UNIV_INNOCHECKSUM @@ -1105,7 +1090,7 @@ buf_page_is_corrupted( page_no); } #endif /* UNIV_INNOCHECKSUM */ - return(TRUE); + return(true); case SRV_CHECKSUM_ALGORITHM_INNODB: case SRV_CHECKSUM_ALGORITHM_STRICT_INNODB: @@ -1116,7 +1101,7 @@ buf_page_is_corrupted( , page_no, is_log_enabled, log_file, curr_algo #endif /* UNIV_INNOCHECKSUM */ )) { - return(FALSE); + return(false); } if (buf_page_is_checksum_valid_none(read_buf, @@ -1150,7 +1135,7 @@ buf_page_is_corrupted( checksum_field1); } #endif /* UNIV_INNOCHECKSUM */ - return(FALSE); + return(false); } #ifdef UNIV_INNOCHECKSUM @@ -1175,7 +1160,7 @@ buf_page_is_corrupted( } #endif /* UNIV_INNOCHECKSUM */ - return(FALSE); + return(false); } #ifdef UNIV_INNOCHECKSUM @@ -1185,7 +1170,7 @@ buf_page_is_corrupted( page_no); } #endif /* UNIV_INNOCHECKSUM */ - return(TRUE); + return(true); case SRV_CHECKSUM_ALGORITHM_STRICT_NONE: @@ -1216,7 +1201,7 @@ buf_page_is_corrupted( SRV_CHECKSUM_ALGORITHM_CRC32, page_id); #endif /* UNIV_INNOCHECKSUM */ - return(FALSE); + return(false); } if (buf_page_is_checksum_valid_innodb(read_buf, @@ -1230,7 +1215,7 @@ buf_page_is_corrupted( SRV_CHECKSUM_ALGORITHM_INNODB, page_id); #endif /* UNIV_INNOCHECKSUM */ - return(FALSE); + return(false); } #ifdef UNIV_INNOCHECKSUM @@ -1240,17 +1225,17 @@ buf_page_is_corrupted( page_no); } #endif /* UNIV_INNOCHECKSUM */ - return(TRUE); + return(true); case SRV_CHECKSUM_ALGORITHM_NONE: - /* should have returned FALSE earlier */ + /* should have returned false earlier */ break; /* no default so the compiler will emit a warning if new enum is added and not handled here */ } ut_error; - return(FALSE); + return(false); } #ifndef UNIV_INNOCHECKSUM @@ -1388,8 +1373,7 @@ buf_page_print( case FIL_PAGE_INDEX: case FIL_PAGE_RTREE: index_id = btr_page_get_index_id(read_buf); - ib::error() << - "InnoDB: Page may be an index page where" + ib::info() << "Page may be an index page where" " index id is " << index_id; index = dict_index_find_on_id_low(index_id); @@ -1521,11 +1505,7 @@ buf_block_init( block->page.io_fix = BUF_IO_NONE; block->page.flush_observer = NULL; block->page.key_version = 0; - block->page.page_encrypted = false; - block->page.page_compressed = false; block->page.encrypted = false; - block->page.stored_checksum = BUF_NO_CHECKSUM_MAGIC; - block->page.calculated_checksum = BUF_NO_CHECKSUM_MAGIC; block->page.real_size = 0; block->page.write_size = 0; block->modify_clock = 0; @@ -4365,14 +4345,14 @@ loop: } else if (retries < BUF_PAGE_READ_MAX_RETRIES) { ++retries; - bool corrupted = true; + bool corrupted = false; if (bpage) { corrupted = buf_page_check_corrupt(bpage); } /* Do not try again for encrypted pages */ - if (!corrupted) { + if (corrupted && bpage->encrypted) { BPageMutex* pmutex = buf_page_get_mutex(bpage); buf_pool = buf_pool_from_bpage(bpage); @@ -4400,13 +4380,13 @@ loop: retries = BUF_PAGE_READ_MAX_RETRIES; ); } else { - bool corrupted = true; + bool corrupted = false; if (bpage) { corrupted = buf_page_check_corrupt(bpage); } - if (corrupted) { + if (corrupted && !bpage->encrypted) { ib::fatal() << "Unable to read page " << page_id << " into the buffer pool after " << BUF_PAGE_READ_MAX_RETRIES << " attempts." @@ -5208,10 +5188,6 @@ buf_page_init_low( bpage->oldest_modification = 0; bpage->write_size = 0; bpage->key_version = 0; - bpage->stored_checksum = BUF_NO_CHECKSUM_MAGIC; - bpage->calculated_checksum = BUF_NO_CHECKSUM_MAGIC; - bpage->page_encrypted = false; - bpage->page_compressed = false; bpage->encrypted = false; bpage->real_size = 0; bpage->slot = NULL; @@ -5861,81 +5837,76 @@ buf_mark_space_corrupt( Check if page is maybe compressed, encrypted or both when we encounter corrupted page. Note that we can't be 100% sure if page is corrupted or decrypt/decompress just failed. -*/ -static -ibool +@param[in,out] bpage Page +@return true if page corrupted, false if not */ +UNIV_INTERN +bool buf_page_check_corrupt( -/*===================*/ - buf_page_t* bpage) /*!< in/out: buffer page read from disk */ + buf_page_t* bpage) { byte* dst_frame = (bpage->zip.data) ? bpage->zip.data : ((buf_block_t*) bpage)->frame; - bool page_compressed = bpage->page_encrypted; - ulint stored_checksum = bpage->stored_checksum; - ulint calculated_checksum = bpage->calculated_checksum; - bool page_compressed_encrypted = bpage->page_compressed; - ulint space_id = mach_read_from_4( - dst_frame + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID); - fil_space_crypt_t* crypt_data = fil_space_get_crypt_data(space_id); - fil_space_t* space = fil_space_found_by_id(space_id); - bool corrupted = true; - ulint key_version = bpage->key_version; - - if (key_version != 0 || page_compressed_encrypted) { + fil_space_t* space = fil_space_acquire_silent(bpage->id.space()); + bool still_encrypted = false; + bool corrupted = false; + fil_space_crypt_t* crypt_data = space ? space->crypt_data : NULL; + + /* In buf_decrypt_after_read we have either decrypted the page if + page post encryption checksum matches and used key_id is found + from the encryption plugin. If checksum did not match page was + not decrypted and it could be either encrypted and corrupted + or corrupted or good page. If we decrypted, there page could + still be corrupted if used key does not match. */ + still_encrypted = crypt_data + && crypt_data->type != CRYPT_SCHEME_UNENCRYPTED + && !bpage->encrypted + && fil_space_verify_crypt_checksum(dst_frame, bpage->size, + bpage->id.space(), + bpage->id.page_no()); + + if (!still_encrypted) { + /* If traditional checksums match, we assume that page is + not anymore encrypted. */ + corrupted = buf_page_is_corrupted( + true, dst_frame, bpage->size, space); + + if (!corrupted) { + bpage->encrypted = false; + } + } + + /* Pages that we think are unencrypted but do not match the checksum + checks could be corrupted or encrypted or both. */ + if (corrupted && !bpage->encrypted) { + /* An error will be reported by + buf_page_io_complete(). */ + } else if (still_encrypted || (bpage->encrypted && corrupted)) { bpage->encrypted = true; - } + corrupted = true; - if (key_version != 0 || - (crypt_data && crypt_data->type != CRYPT_SCHEME_UNENCRYPTED) || - page_compressed || page_compressed_encrypted) { + ib::error() + << "The page " << bpage->id << " in file " + << (space && space->name ? space->name : "NULL") + << " cannot be decrypted."; - /* Page is really corrupted if post encryption stored - checksum does not match calculated checksum after page was - read. For pages compressed and then encrypted, there is no - checksum. */ - corrupted = (!page_compressed_encrypted && stored_checksum != calculated_checksum); + ib::info() + << "However key management plugin or used key_version " + << bpage->key_version << " is not found or" + " used encryption algorithm or method does not match."; - if (corrupted) { - ib::error() << (page_compressed_encrypted ? "Maybe corruption" : "Corruption") - << ": Block in space_id " << space_id - << " in file " << (space ? space->name : "NULL") - << " corrupted."; - - ib::error() << "Page based on contents " - << ((key_version == 0 && page_compressed_encrypted == false) ? "not" : "maybe") - << " encrypted."; - - if (stored_checksum != BUF_NO_CHECKSUM_MAGIC || - calculated_checksum != BUF_NO_CHECKSUM_MAGIC) { - ib::error() << "Page stored checksum " << stored_checksum - << " but calculated checksum " - << calculated_checksum << " ."; - } - - ib::error() << "Reason could be that key_version " << key_version - << " in page or in crypt_data " << crypt_data - << " could not be found."; - ib::error() << "Reason could be also that key management plugin is not found or" - " used encryption algorithm or method does not match."; - ib::error() << "Based on page page compressed" - << page_compressed - << ", compressed and encrypted " - << page_compressed_encrypted << " ."; - } else { - ib::error() << "Block in space_id " - << space_id - << " in file " - << (space ? space->name : "NULL") - << " encrypted."; - ib::error() << "However key management plugin or used key_id " - << key_version - << " is not found or" - << " used encryption algorithm or method does not match."; - ib::error() << "Marking tablespace as missing. You may drop this table or" - << " install correct key management plugin and key file."; + if (bpage->id.space() != TRX_SYS_SPACE) { + ib::info() + << "Marking tablespace as missing." + " You may drop this table or" + " install correct key management plugin" + " and key file."; } } + if (space) { + fil_space_release(space); + } + return corrupted; } @@ -5955,6 +5926,9 @@ buf_page_io_complete( buf_pool_t* buf_pool = buf_pool_from_bpage(bpage); const ibool uncompressed = (buf_page_get_state(bpage) == BUF_BLOCK_FILE_PAGE); + byte* frame = NULL; + bool corrupted = false; + ut_a(buf_page_in_file(bpage)); /* We do not need protect io_fix here by mutex to read @@ -5969,24 +5943,15 @@ buf_page_io_complete( if (io_type == BUF_IO_READ) { ulint read_page_no; ulint read_space_id; - byte* frame = NULL; ut_ad(bpage->zip.data != NULL || ((buf_block_t*)bpage)->frame != NULL); - if (!buf_page_decrypt_after_read(bpage)) { - /* encryption error! */ - if (bpage->size.is_compressed()) { - frame = bpage->zip.data; - } else { - frame = ((buf_block_t*) bpage)->frame; - } - - ib::info() << "Page " - << bpage->id - << " encryption error key_version " - << bpage->key_version; + buf_page_decrypt_after_read(bpage); - goto database_corrupted; + if (bpage->size.is_compressed()) { + frame = bpage->zip.data; + } else { + frame = ((buf_block_t*) bpage)->frame; } if (bpage->size.is_compressed()) { @@ -6040,12 +6005,11 @@ buf_page_io_complete( << ", should be " << bpage->id; } - /* From version 3.23.38 up we store the page checksum - to the 4 first bytes of the page end lsn field */ - if (buf_page_is_corrupted( - true, frame, bpage->size, - fsp_is_checksum_disabled(bpage->id.space()))) { + corrupted = buf_page_check_corrupt(bpage); + +database_corrupted: + if (corrupted) { /* Not a real corruption if it was triggered by error injection */ DBUG_EXECUTE_IF( @@ -6058,21 +6022,25 @@ buf_page_io_complete( "corruption"; return(true); } - goto page_not_corrupt;); -database_corrupted: - bool corrupted = buf_page_check_corrupt(bpage); + goto page_not_corrupt; + ); - /* Compressed and encrypted pages are basically gibberish avoid - printing the contents. */ - if (corrupted) { + if (!bpage->encrypted) { + fil_system_enter(); + fil_space_t* space = fil_space_get_by_id(bpage->id.space()); + fil_system_exit(); ib::error() << "Database page corruption on disk" - " or a failed file read of page " - << bpage->id + " or a failed file read of tablespace " + << (space->name ? space->name : "NULL") + << " page " << bpage->id << ". You may have to recover from " << "a backup."; + buf_page_print(frame, bpage->size, + BUF_PAGE_PRINT_NO_CRASH); + ib::info() << "It is also possible that your" " operating system has corrupted" @@ -6095,13 +6063,9 @@ database_corrupted: if (bpage->id.space() > TRX_SYS_SPACE && buf_mark_space_corrupt(bpage)) { - return(false); } else { - corrupted = buf_page_check_corrupt(bpage); - ulint key_version = bpage->key_version; - - if (corrupted) { + if (!bpage->encrypted) { ib::fatal() << "Aborting because of a" " corrupt database page in" @@ -6111,16 +6075,20 @@ database_corrupted: " as corrupt."; } - ib_push_warning((void *)NULL, DB_DECRYPTION_FAILED, - "Table in tablespace %u encrypted." - "However key management plugin or used key_id %u is not found or" + ib_push_warning(innobase_get_trx(), DB_DECRYPTION_FAILED, + "Table in tablespace %lu encrypted." + "However key management plugin or used key_id %lu is not found or" " used encryption algorithm or method does not match." " Can't continue opening the table.", - bpage->id.space(), key_version); + bpage->id.space(), bpage->key_version); - buf_page_print(frame, bpage->size, BUF_PAGE_PRINT_NO_CRASH); + if (bpage->encrypted && bpage->id.space() > TRX_SYS_SPACE) { + buf_mark_space_corrupt(bpage); + } else { + ut_error; + } - return (false); + return(false); } } } @@ -6145,18 +6113,23 @@ database_corrupted: && fil_page_get_type(frame) == FIL_PAGE_INDEX && page_is_leaf(frame)) { - if (bpage && bpage->encrypted) { - fprintf(stderr, - "InnoDB: Warning: Table in tablespace %u encrypted." - "However key management plugin or used key_id %u is not found or" + if (bpage && bpage->encrypted) { + ib::warn() + << "Table in tablespace " + << bpage->id.space() + << " encrypted. However key " + "management plugin or used " + << "key_version " << bpage->key_version + << "is not found or" " used encryption algorithm or method does not match." - " Can't continue opening the table.\n", - bpage->id.space(), bpage->key_version); + " Can't continue opening the table."; } else { + ibuf_merge_or_delete_for_page( (buf_block_t*) bpage, bpage->id, &bpage->size, TRUE); } + } } else { /* io_type == BUF_IO_WRITE */ @@ -6273,11 +6246,9 @@ buf_all_freed_instance( const buf_block_t* block = buf_chunk_not_freed(chunk); - if (UNIV_LIKELY_NULL(block)) { - if (block->page.key_version == 0) { - ib::fatal() << "Page " << block->page.id - << " still fixed or dirty"; - } + if (UNIV_LIKELY_NULL(block) && block->page.key_version == 0) { + ib::fatal() << "Page " << block->page.id + << " still fixed or dirty"; } } @@ -7403,20 +7374,18 @@ buf_pool_reserve_tmp_slot( /********************************************************************//** Encrypts a buffer page right before it's flushed to disk +@param[in,out] bpage Page control block +@param[in,out] src_frame Source page +@param[in] space_id Tablespace id +@return either unencrypted source page or decrypted page. */ +UNIV_INTERN byte* buf_page_encrypt_before_write( -/*==========================*/ - buf_page_t* bpage, /*!< in/out: buffer page to be flushed */ - byte* src_frame, /*!< in: src frame */ - ulint space_id) /*!< in: space id */ + buf_page_t* bpage, + byte* src_frame, + ulint space_id) { - fil_space_crypt_t* crypt_data = fil_space_get_crypt_data(space_id); - const page_size_t& page_size = bpage->size; - buf_pool_t* buf_pool = buf_pool_from_bpage(bpage); - bool page_compressed = fil_space_is_page_compressed(space_id); - bool encrypted = true; - bpage->real_size = UNIV_PAGE_SIZE; fil_page_type_validate(src_frame); @@ -7433,7 +7402,21 @@ buf_page_encrypt_before_write( return src_frame; } - if (crypt_data != NULL && crypt_data->not_encrypted()) { + fil_space_t* space = fil_space_acquire_silent(space_id); + + /* Tablespace must exist during write operation */ + if (!space) { + /* This could be true on discard if we have injected a error + case e.g. in innodb.innodb-wl5522-debug-zip so that space + is already marked as stop_new_ops = true. */ + return src_frame; + } + + const page_size_t page_size(space->flags); + fil_space_crypt_t* crypt_data = space->crypt_data; + bool encrypted = true; + + if (space->crypt_data != NULL && space->crypt_data->not_encrypted()) { /* Encryption is disabled */ encrypted = false; } @@ -7450,11 +7433,15 @@ buf_page_encrypt_before_write( encrypted = false; } + bool page_compressed = fil_space_is_page_compressed(bpage->id.space()); + if (!encrypted && !page_compressed) { /* No need to encrypt or page compress the page */ + fil_space_release(space); return src_frame; } + buf_pool_t* buf_pool = buf_pool_from_bpage(bpage); /* Find free slot from temporary memory array */ buf_tmp_buffer_t* slot = buf_pool_reserve_tmp_slot(buf_pool, page_compressed); slot->out_buf = NULL; @@ -7464,11 +7451,10 @@ buf_page_encrypt_before_write( if (!page_compressed) { /* Encrypt page content */ - byte* tmp = fil_space_encrypt(space_id, + byte* tmp = fil_space_encrypt(space, bpage->id.page_no(), bpage->newest_modification, src_frame, - page_size, dst_frame); uint32_t key_version = mach_read_from_4( @@ -7507,32 +7493,29 @@ buf_page_encrypt_before_write( if(encrypted) { /* And then we encrypt the page content */ - tmp = fil_space_encrypt(space_id, + tmp = fil_space_encrypt(space, bpage->id.page_no(), bpage->newest_modification, tmp, - page_size, dst_frame); } slot->out_buf = dst_frame = tmp; } -#ifdef UNIV_DEBUG - fil_page_type_validate(dst_frame); -#endif + ut_d(fil_page_type_validate(dst_frame)); + fil_space_release(space); // return dst_frame which will be written return dst_frame; } -/********************************************************************//** -Decrypt page after it has been read from disk -*/ -ibool -buf_page_decrypt_after_read( -/*========================*/ - buf_page_t* bpage) /*!< in/out: buffer page read from disk */ +/** Decrypt a page. +@param[in,out] bpage Page control block +@return whether the operation was successful */ +static +bool +buf_page_decrypt_after_read(buf_page_t* bpage) { bool compressed = bpage->size.is_compressed(); const page_size_t& size = bpage->size; @@ -7544,61 +7527,29 @@ buf_page_decrypt_after_read( bool page_compressed_encrypted = fil_page_is_compressed_encrypted(dst_frame); buf_pool_t* buf_pool = buf_pool_from_bpage(bpage); bool success = true; - ulint space_id = mach_read_from_4( - dst_frame + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID); - fil_space_crypt_t* crypt_data = fil_space_get_crypt_data(space_id); - - /* Page is encrypted if encryption information is found from - tablespace and page contains used key_version. This is true - also for pages first compressed and then encrypted. */ - if (!crypt_data || - (crypt_data && - crypt_data->type == CRYPT_SCHEME_UNENCRYPTED && - key_version != 0)) { - byte* frame = NULL; - - if (bpage->size.is_compressed()) { - frame = bpage->zip.data; - } else { - frame = ((buf_block_t*) bpage)->frame; - } - - /* If page is not corrupted at this point, page can't be - encrypted, thus set key_version to 0. If page is corrupted, - we assume at this point that it is encrypted as page - contained key_version != 0. Note that page could still be - really corrupted. This we will find out after decrypt by - checking page checksums. */ - if (!buf_page_is_corrupted(false, frame, bpage->size, false)) { - key_version = 0; - } - } - - /* If page is encrypted read post-encryption checksum */ - if (!page_compressed_encrypted && key_version != 0) { - bpage->stored_checksum = mach_read_from_4(dst_frame + + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION + 4); - } - ut_ad(bpage->key_version == 0); + bpage->key_version = key_version; if (bpage->id.page_no() == 0) { /* File header pages are not encrypted/compressed */ - return (TRUE); + return (true); } - /* Store these for corruption check */ - bpage->key_version = key_version; - bpage->page_encrypted = page_compressed_encrypted; - bpage->page_compressed = page_compressed; + FilSpace space(bpage->id.space()); + + /* Page is encrypted if encryption information is found from + tablespace and page contains used key_version. This is true + also for pages first compressed and then encrypted. */ + if (!space()->crypt_data) { + key_version = 0; + } if (page_compressed) { /* the page we read is unencrypted */ /* Find free slot from temporary memory array */ buf_tmp_buffer_t* slot = buf_pool_reserve_tmp_slot(buf_pool, page_compressed); -#ifdef UNIV_DEBUG - fil_page_type_validate(dst_frame); -#endif + ut_d(fil_page_type_validate(dst_frame)); /* decompress using comp_buf to dst_frame */ fil_decompress_page(slot->comp_buf, @@ -7610,40 +7561,35 @@ buf_page_decrypt_after_read( slot->reserved = false; key_version = 0; -#ifdef UNIV_DEBUG - fil_page_type_validate(dst_frame); -#endif + ut_d(fil_page_type_validate(dst_frame)); } else { buf_tmp_buffer_t* slot = NULL; if (key_version) { + /* Verify encryption checksum before we even try to + decrypt. */ + if (!fil_space_verify_crypt_checksum( + dst_frame, size, + bpage->id.space(), bpage->id.page_no())) { + return (false); + } + /* Find free slot from temporary memory array */ slot = buf_pool_reserve_tmp_slot(buf_pool, page_compressed); -#ifdef UNIV_DEBUG - fil_page_type_validate(dst_frame); -#endif - - /* Calculate checksum before decrypt, this will be - used later to find out if incorrect key was used. */ - if (!page_compressed_encrypted) { - bpage->calculated_checksum = fil_crypt_calculate_checksum(size, dst_frame); - } + ut_d(fil_page_type_validate(dst_frame)); /* decrypt using crypt_buf to dst_frame */ - byte* res = fil_space_decrypt(bpage->id.space(), + byte* res = fil_space_decrypt(space, slot->crypt_buf, - size, - dst_frame); - + dst_frame, + &bpage->encrypted); if (!res) { - bpage->encrypted = true; success = false; } -#ifdef UNIV_DEBUG - fil_page_type_validate(dst_frame); -#endif + + ut_d(fil_page_type_validate(dst_frame)); } if (page_compressed_encrypted && success) { @@ -7651,18 +7597,13 @@ buf_page_decrypt_after_read( slot = buf_pool_reserve_tmp_slot(buf_pool, page_compressed); } -#ifdef UNIV_DEBUG - fil_page_type_validate(dst_frame); -#endif + ut_d(fil_page_type_validate(dst_frame)); /* decompress using comp_buf to dst_frame */ fil_decompress_page(slot->comp_buf, dst_frame, size.logical(), &bpage->write_size); - -#ifdef UNIV_DEBUG - fil_page_type_validate(dst_frame); -#endif + ut_d(fil_page_type_validate(dst_frame)); } /* Mark this slot as free */ @@ -7671,8 +7612,6 @@ buf_page_decrypt_after_read( } } - bpage->key_version = key_version; - return (success); } diff --git a/storage/innobase/buf/buf0dblwr.cc b/storage/innobase/buf/buf0dblwr.cc index c4f2280a1f2..c0cf26b869c 100644 --- a/storage/innobase/buf/buf0dblwr.cc +++ b/storage/innobase/buf/buf0dblwr.cc @@ -392,13 +392,7 @@ buf_dblwr_init_or_load_pages( doublewrite = read_buf + TRX_SYS_DOUBLEWRITE; - if (mach_read_from_4(read_buf + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION) != 0) { - byte* tmp = fil_space_decrypt((ulint)TRX_SYS_SPACE, - read_buf + UNIV_PAGE_SIZE, - univ_page_size, /* page size */ - read_buf); - doublewrite = tmp + TRX_SYS_DOUBLEWRITE; - } + /* TRX_SYS_PAGE_NO is not encrypted see fil_crypt_rotate_page() */ if (mach_read_from_4(doublewrite + TRX_SYS_DOUBLEWRITE_MAGIC) == TRX_SYS_DOUBLEWRITE_MAGIC_N) { @@ -614,9 +608,9 @@ buf_dblwr_process(void) } if (fil_space_verify_crypt_checksum( - read_buf, page_size) + read_buf, page_size, space_id, page_no) || !buf_page_is_corrupted( - true, read_buf, page_size, false)) { + true, read_buf, page_size, space)) { /* The page is good; there is no need to consult the doublewrite buffer. */ continue; @@ -638,8 +632,9 @@ buf_dblwr_process(void) NULL, page, UNIV_PAGE_SIZE, NULL, true); } - if (!fil_space_verify_crypt_checksum(page, page_size) - && buf_page_is_corrupted(true, page, page_size, false)) { + if (!fil_space_verify_crypt_checksum(page, page_size, + space_id, page_no) + && buf_page_is_corrupted(true, page, page_size, space)) { if (!is_all_zero) { ib::warn() << "A doublewrite copy of page " << page_id << " is corrupted."; diff --git a/storage/innobase/buf/buf0dump.cc b/storage/innobase/buf/buf0dump.cc index 873f4ea438a..f7883ded070 100644 --- a/storage/innobase/buf/buf0dump.cc +++ b/storage/innobase/buf/buf0dump.cc @@ -703,7 +703,7 @@ buf_load() if tablespace is encrypted we cant use it. */ if (space == NULL || (space && space->crypt_data && - space->crypt_data->encryption != FIL_SPACE_ENCRYPTION_OFF && + space->crypt_data->encryption != FIL_ENCRYPTION_OFF && space->crypt_data->type != CRYPT_SCHEME_UNENCRYPTED)) { continue; } diff --git a/storage/innobase/buf/buf0flu.cc b/storage/innobase/buf/buf0flu.cc index ae33334ca17..a0ed243d74b 100644 --- a/storage/innobase/buf/buf0flu.cc +++ b/storage/innobase/buf/buf0flu.cc @@ -1,7 +1,7 @@ /***************************************************************************** Copyright (c) 1995, 2016, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2013, 2017, MariaDB Corporation. All Rights Reserved. +Copyright (c) 2013, 2017, MariaDB Corporation. Copyright (c) 2013, 2014, Fusion-io This program is free software; you can redistribute it and/or modify it under @@ -893,6 +893,9 @@ buf_flush_init_for_writing( newest_lsn); if (skip_checksum) { + ut_ad(block == NULL + || block->page.id.space() == SRV_TMP_SPACE_ID); + ut_ad(page_get_space_id(page) == SRV_TMP_SPACE_ID); mach_write_to_4(page + FIL_PAGE_SPACE_OR_CHKSUM, checksum); } else { if (block != NULL && UNIV_PAGE_SIZE == 16384) { @@ -1005,7 +1008,8 @@ buf_flush_write_block_low( { page_t* frame = NULL; ulint space_id = bpage->id.space(); - bool atomic_writes = fil_space_get_atomic_writes(space_id); + const bool is_temp = fsp_is_system_temporary(space_id); + bool atomic_writes = is_temp || fil_space_get_atomic_writes(space_id); #ifdef UNIV_DEBUG buf_pool_t* buf_pool = buf_pool_from_bpage(bpage); @@ -1068,8 +1072,7 @@ buf_flush_write_block_low( reinterpret_cast<const buf_block_t*>(bpage), reinterpret_cast<const buf_block_t*>(bpage)->frame, bpage->zip.data ? &bpage->zip : NULL, - bpage->newest_modification, - fsp_is_checksum_disabled(bpage->id.space())); + bpage->newest_modification, is_temp); break; } @@ -1082,7 +1085,6 @@ buf_flush_write_block_low( if (!srv_use_doublewrite_buf || buf_dblwr == NULL || srv_read_only_mode - || fsp_is_system_temporary(bpage->id.space()) || atomic_writes) { ut_ad(!srv_read_only_mode diff --git a/storage/innobase/dict/dict0crea.cc b/storage/innobase/dict/dict0crea.cc index 90dba680f25..c1bd5c2d368 100644 --- a/storage/innobase/dict/dict0crea.cc +++ b/storage/innobase/dict/dict0crea.cc @@ -447,7 +447,7 @@ dict_build_tablespace_for_table( err = fil_ibd_create( space, table->name.m_name, filepath, fsp_flags, FIL_IBD_FILE_INITIAL_SIZE, - node ? node->mode : FIL_SPACE_ENCRYPTION_DEFAULT, + node ? node->mode : FIL_ENCRYPTION_DEFAULT, node ? node->key_id : FIL_DEFAULT_ENCRYPTION_KEY); ut_free(filepath); diff --git a/storage/innobase/dict/dict0dict.cc b/storage/innobase/dict/dict0dict.cc index dc64163bee3..d7fcbdf3906 100644 --- a/storage/innobase/dict/dict0dict.cc +++ b/storage/innobase/dict/dict0dict.cc @@ -6096,7 +6096,6 @@ dict_set_corrupted( row_mysql_lock_data_dictionary(trx); } - ut_ad(index); ut_ad(mutex_own(&dict_sys->mutex)); ut_ad(!dict_table_is_comp(dict_sys->sys_tables)); ut_ad(!dict_table_is_comp(dict_sys->sys_indexes)); diff --git a/storage/innobase/dict/dict0stats_bg.cc b/storage/innobase/dict/dict0stats_bg.cc index 1c998eb6ff4..4ffee160c9f 100644 --- a/storage/innobase/dict/dict0stats_bg.cc +++ b/storage/innobase/dict/dict0stats_bg.cc @@ -46,10 +46,10 @@ os_event_t dict_stats_event; /** Variable to initiate shutdown the dict stats thread. Note we don't use 'srv_shutdown_state' because we want to shutdown dict stats thread before purge thread. */ -bool dict_stats_start_shutdown = false; +bool dict_stats_start_shutdown; /** Event to wait for shutdown of the dict stats thread */ -os_event_t dict_stats_shutdown_event = NULL; +os_event_t dict_stats_shutdown_event; #ifdef UNIV_DEBUG /** Used by SET GLOBAL innodb_dict_stats_disabled_debug = 1; */ diff --git a/storage/innobase/fil/fil0crypt.cc b/storage/innobase/fil/fil0crypt.cc index ccec6191ed4..a61d7439e2c 100644 --- a/storage/innobase/fil/fil0crypt.cc +++ b/storage/innobase/fil/fil0crypt.cc @@ -39,7 +39,6 @@ Modified Jan Lindström jan.lindstrom@mariadb.com #include "fsp0fsp.h" #include "fil0pagecompress.h" #include "ha_prototypes.h" // IB_LOG_ - #include <my_crypt.h> /** Mutex for keys */ @@ -57,7 +56,7 @@ UNIV_INTERN uint srv_n_fil_crypt_threads = 0; UNIV_INTERN uint srv_n_fil_crypt_threads_started = 0; /** At this age or older a space/page will be rotated */ -UNIV_INTERN uint srv_fil_crypt_rotate_key_age = 1; +UNIV_INTERN uint srv_fil_crypt_rotate_key_age; /** Event to signal FROM the key rotation threads. */ static os_event_t fil_crypt_event; @@ -65,11 +64,11 @@ static os_event_t fil_crypt_event; /** Event to signal TO the key rotation threads. */ UNIV_INTERN os_event_t fil_crypt_threads_event; -/** Event for waking up threads throttle */ +/** Event for waking up threads throttle. */ static os_event_t fil_crypt_throttle_sleep_event; -/** Mutex for key rotation threads */ -static ib_mutex_t fil_crypt_threads_mutex; +/** Mutex for key rotation threads. */ +UNIV_INTERN ib_mutex_t fil_crypt_threads_mutex; /** Variable ensuring only 1 thread at time does initial conversion */ static bool fil_crypt_start_converting = false; @@ -89,9 +88,12 @@ extern uint srv_background_scrub_data_check_interval; static fil_crypt_stat_t crypt_stat; static ib_mutex_t crypt_stat_mutex; +/** Is background scrubbing enabled, defined on btr0scrub.cc */ +extern my_bool srv_background_scrub_data_uncompressed; +extern my_bool srv_background_scrub_data_compressed; + static bool fil_crypt_needs_rotation( -/*=====================*/ fil_encryption_t encrypt_mode, /*!< in: Encryption mode */ uint key_version, /*!< in: Key version */ @@ -103,7 +105,6 @@ Init space crypt */ UNIV_INTERN void fil_space_crypt_init() -/*==================*/ { mutex_create(LATCH_ID_FIL_CRYPT_MUTEX, &fil_crypt_key_mutex); @@ -118,7 +119,6 @@ Cleanup space crypt */ UNIV_INTERN void fil_space_crypt_cleanup() -/*=====================*/ { os_event_destroy(fil_crypt_throttle_sleep_event); mutex_free(&fil_crypt_key_mutex); @@ -129,7 +129,7 @@ fil_space_crypt_cleanup() Get latest key version from encryption plugin. @return key version or ENCRYPTION_KEY_VERSION_INVALID */ uint -fil_space_crypt_struct::key_get_latest_version(void) +fil_space_crypt_t::key_get_latest_version(void) { uint key_version = key_found; @@ -143,12 +143,12 @@ fil_space_crypt_struct::key_get_latest_version(void) } /****************************************************************** -Get the latest(key-version), waking the encrypt thread, if needed */ +Get the latest(key-version), waking the encrypt thread, if needed +@param[in,out] crypt_data Crypt data */ static inline uint fil_crypt_get_latest_key_version( -/*=============================*/ - fil_space_crypt_t* crypt_data) /*!< in: crypt data */ + fil_space_crypt_t* crypt_data) { ut_ad(crypt_data != NULL); @@ -187,28 +187,29 @@ crypt_data_scheme_locker( /****************************************************************** Create a fil_space_crypt_t object +@param[in] type CRYPT_SCHEME_UNENCRYPTE or + CRYPT_SCHEME_1 +@param[in] encrypt_mode FIL_ENCRYPTION_DEFAULT or + FIL_ENCRYPTION_ON or + FIL_ENCRYPTION_OFF +@param[in] min_key_version key_version or 0 +@param[in] key_id Used key id @return crypt object */ static fil_space_crypt_t* fil_space_create_crypt_data( -/*========================*/ uint type, fil_encryption_t encrypt_mode, uint min_key_version, - uint key_id, - ulint offset) + uint key_id) { - const uint sz = sizeof(fil_space_crypt_t); - void* buf = ut_zalloc_nokey(sz); fil_space_crypt_t* crypt_data = NULL; - - if (buf) { + if (void* buf = ut_zalloc_nokey(sizeof(fil_space_crypt_t))) { crypt_data = new(buf) - fil_space_crypt_struct( + fil_space_crypt_t( type, min_key_version, key_id, - offset, encrypt_mode); } @@ -217,25 +218,30 @@ fil_space_create_crypt_data( /****************************************************************** Create a fil_space_crypt_t object +@param[in] encrypt_mode FIL_ENCRYPTION_DEFAULT or + FIL_ENCRYPTION_ON or + FIL_ENCRYPTION_OFF + +@param[in] key_id Encryption key id @return crypt object */ UNIV_INTERN fil_space_crypt_t* fil_space_create_crypt_data( -/*========================*/ - fil_encryption_t encrypt_mode, /*!< in: encryption mode */ - uint key_id) /*!< in: encryption key id */ + fil_encryption_t encrypt_mode, + uint key_id) { - return (fil_space_create_crypt_data(0, encrypt_mode, 0, key_id, 0)); + return (fil_space_create_crypt_data(0, encrypt_mode, 0, key_id)); } /****************************************************************** -Merge fil_space_crypt_t object */ +Merge fil_space_crypt_t object +@param[in,out] dst Destination cryp data +@param[in] src Source crypt data */ UNIV_INTERN void fil_space_merge_crypt_data( -/*=======================*/ - fil_space_crypt_t* dst,/*!< out: Crypt data */ - const fil_space_crypt_t* src)/*!< in: Crypt data */ + fil_space_crypt_t* dst, + const fil_space_crypt_t* src) { mutex_enter(&dst->mutex); @@ -250,62 +256,44 @@ fil_space_merge_crypt_data( dst->type = src->type; dst->min_key_version = src->min_key_version; dst->keyserver_requests += src->keyserver_requests; - dst->closing = src->closing; mutex_exit(&dst->mutex); } -/****************************************************************** -Read crypt data from a page (0) -@return crypt data from page 0. */ +/** Initialize encryption parameters from a tablespace header page. +@param[in] page_size page size of the tablespace +@param[in] page first page of the tablespace +@return crypt data from page 0 +@retval NULL if not present or not valid */ UNIV_INTERN fil_space_crypt_t* -fil_space_read_crypt_data( -/*======================*/ - ulint space, /*!< in: file space id*/ - const byte* page, /*!< in: page 0 */ - ulint offset) /*!< in: offset */ +fil_space_read_crypt_data(const page_size_t& page_size, const byte* page) { + const ulint offset = FSP_HEADER_OFFSET + + fsp_header_get_encryption_offset(page_size); + if (memcmp(page + offset, CRYPT_MAGIC, MAGIC_SZ) != 0) { /* Crypt data is not stored. */ return NULL; } ulint type = mach_read_from_1(page + offset + MAGIC_SZ + 0); + ulint iv_length = mach_read_from_1(page + offset + MAGIC_SZ + 1); + fil_space_crypt_t* crypt_data; - if (! (type == CRYPT_SCHEME_UNENCRYPTED || - type == CRYPT_SCHEME_1)) { + if (!(type == CRYPT_SCHEME_UNENCRYPTED || + type == CRYPT_SCHEME_1) + || iv_length != sizeof crypt_data->iv) { ib::error() << "Found non sensible crypt scheme: " - << type << " for space: " - << space << " offset: " + << type << "," << iv_length << " for space: " + << page_get_space_id(page) << " offset: " << offset << " bytes: [" - << page[offset + 0 + MAGIC_SZ] - << page[offset + 1 + MAGIC_SZ] - << page[offset + 2 + MAGIC_SZ] - << page[offset + 3 + MAGIC_SZ] - << page[offset + 4 + MAGIC_SZ] - << page[offset + 5 + MAGIC_SZ] - << "]."; - ut_error; - } - - fil_space_crypt_t* crypt_data; - ulint iv_length = mach_read_from_1(page + offset + MAGIC_SZ + 1); - - if (! (iv_length == sizeof(crypt_data->iv))) { - ib::error() << "Found non sensible iv length: " - << iv_length << " for space: " - << space << " offset: " - << offset << " type: " - << type << " bytes: [" - << page[offset + 0 + MAGIC_SZ] - << page[offset + 1 + MAGIC_SZ] << page[offset + 2 + MAGIC_SZ] << page[offset + 3 + MAGIC_SZ] << page[offset + 4 + MAGIC_SZ] << page[offset + 5 + MAGIC_SZ] << "]."; - ut_error; + return NULL; } uint min_key_version = mach_read_from_4 @@ -329,45 +317,42 @@ fil_space_read_crypt_data( } /****************************************************************** -Free a crypt data object */ +Free a crypt data object +@param[in,out] crypt_data crypt data to be freed */ UNIV_INTERN void fil_space_destroy_crypt_data( -/*=========================*/ - fil_space_crypt_t **crypt_data) /*!< out: crypt data */ + fil_space_crypt_t **crypt_data) { if (crypt_data != NULL && (*crypt_data) != NULL) { mutex_enter(&fil_crypt_threads_mutex); fil_space_crypt_t* c = *crypt_data; - c->~fil_space_crypt_struct(); - ut_free(c); *crypt_data = NULL; mutex_exit(&fil_crypt_threads_mutex); + if (c) { + c->~fil_space_crypt_t(); + ut_free(c); + } } } /****************************************************************** -Write crypt data to a page (0) */ -static +Write crypt data to a page (0) +@param[in] space tablespace +@param[in,out] page0 first page of the tablespace +@param[in,out] mtr mini-transaction */ +UNIV_INTERN void -fil_space_write_crypt_data_low( -/*===========================*/ - fil_space_crypt_t* crypt_data, /*<! out: crypt data */ - ulint type, /*<! in: crypt scheme */ - byte* page, /*<! in: page 0 */ - ulint offset, /*<! in: offset */ - ulint maxsize, /*<! in: size of crypt data */ - mtr_t* mtr) /*<! in: minitransaction */ +fil_space_crypt_t::write_page0( + const fil_space_t* space, + byte* page, + mtr_t* mtr) { - ut_a(offset > 0 && offset < UNIV_PAGE_SIZE); - ulint space_id = mach_read_from_4( - page + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID); - const uint len = sizeof(crypt_data->iv); - const uint min_key_version = crypt_data->min_key_version; - const uint key_id = crypt_data->key_id; - const fil_encryption_t encryption = crypt_data->encryption; - crypt_data->page0_offset = offset; - ut_a(2 + len + 4 + 1 + 4 + MAGIC_SZ < maxsize); + ut_ad(this == space->crypt_data); + const uint len = sizeof(iv); + const ulint offset = FSP_HEADER_OFFSET + + fsp_header_get_encryption_offset(page_size_t(space->flags)); + page0_offset = offset; /* redo log this as bytewise updates to page 0 @@ -377,7 +362,7 @@ fil_space_write_crypt_data_low( mlog_write_string(page + offset, CRYPT_MAGIC, MAGIC_SZ, mtr); mlog_write_ulint(page + offset + MAGIC_SZ + 0, type, MLOG_1BYTE, mtr); mlog_write_ulint(page + offset + MAGIC_SZ + 1, len, MLOG_1BYTE, mtr); - mlog_write_string(page + offset + MAGIC_SZ + 2, crypt_data->iv, len, + mlog_write_string(page + offset + MAGIC_SZ + 2, iv, len, mtr); mlog_write_ulint(page + offset + MAGIC_SZ + 2 + len, min_key_version, MLOG_4BYTES, mtr); @@ -393,7 +378,7 @@ fil_space_write_crypt_data_low( page, MLOG_FILE_WRITE_CRYPT_DATA, log_ptr, mtr); - mach_write_to_4(log_ptr, space_id); + mach_write_to_4(log_ptr, space->id); log_ptr += 4; mach_write_to_2(log_ptr, offset); log_ptr += 2; @@ -409,44 +394,61 @@ fil_space_write_crypt_data_low( log_ptr += 1; mlog_close(mtr, log_ptr); - mlog_catenate_string(mtr, crypt_data->iv, len); + mlog_catenate_string(mtr, iv, len); } } /****************************************************************** -Write crypt data to a page (0) */ -UNIV_INTERN -void -fil_space_write_crypt_data( -/*=======================*/ - ulint space, /*<! in: file space */ - byte* page, /*<! in: page 0 */ - ulint offset, /*<! in: offset */ - ulint maxsize, /*<! in: size of crypt data */ - mtr_t* mtr) /*<! in: minitransaction */ +Set crypt data for a tablespace +@param[in,out] space Tablespace +@param[in,out] crypt_data Crypt data to be set +@return crypt_data in tablespace */ +static +fil_space_crypt_t* +fil_space_set_crypt_data( + fil_space_t* space, + fil_space_crypt_t* crypt_data) { - fil_space_crypt_t* crypt_data = fil_space_get_crypt_data(space); + fil_space_crypt_t* free_crypt_data = NULL; + fil_space_crypt_t* ret_crypt_data = NULL; + + /* Provided space is protected using fil_space_acquire() + from concurrent operations. */ + if (space->crypt_data != NULL) { + /* There is already crypt data present, + merge new crypt_data */ + fil_space_merge_crypt_data(space->crypt_data, + crypt_data); + ret_crypt_data = space->crypt_data; + free_crypt_data = crypt_data; + } else { + space->crypt_data = crypt_data; + ret_crypt_data = space->crypt_data; + } - /* If no crypt data is stored on memory cache for this space, - then do not continue writing crypt data to page 0. */ - if (crypt_data == NULL) { - return; + if (free_crypt_data != NULL) { + /* there was already crypt data present and the new crypt + * data provided as argument to this function has been merged + * into that => free new crypt data + */ + fil_space_destroy_crypt_data(&free_crypt_data); } - fil_space_write_crypt_data_low(crypt_data, crypt_data->type, - page, offset, maxsize, mtr); + return ret_crypt_data; } /****************************************************************** Parse a MLOG_FILE_WRITE_CRYPT_DATA log entry +@param[in] ptr Log entry start +@param[in] end_ptr Log entry end +@param[in] block buffer block @return position on log buffer */ UNIV_INTERN -byte* +const byte* fil_parse_write_crypt_data( -/*=======================*/ - byte* ptr, /*!< in: Log entry start */ - byte* end_ptr,/*!< in: Log entry end */ - buf_block_t* block) /*!< in: buffer block */ + const byte* ptr, + const byte* end_ptr, + const buf_block_t* block) { /* check that redo log entry is complete */ uint entry_size = @@ -458,7 +460,7 @@ fil_parse_write_crypt_data( 4 + // size of key_id 1; // fil_encryption_t - if ((uint) (end_ptr - ptr) < entry_size){ + if (ptr + entry_size > end_ptr) { return NULL; } @@ -484,7 +486,7 @@ fil_parse_write_crypt_data( fil_encryption_t encryption = (fil_encryption_t)mach_read_from_1(ptr); ptr +=1; - if ((uint) (end_ptr - ptr) < len) { + if (ptr + len > end_ptr) { return NULL; } @@ -497,56 +499,40 @@ fil_parse_write_crypt_data( ptr += len; /* update fil_space memory cache with crypt_data */ - fil_space_set_crypt_data(space_id, crypt_data); + fil_space_t* space = fil_space_acquire_silent(space_id); - return ptr; -} + if (space) { + crypt_data = fil_space_set_crypt_data(space, crypt_data); + fil_space_release(space); + } -/****************************************************************** -Clear crypt data from a page (0) */ -UNIV_INTERN -void -fil_space_clear_crypt_data( -/*=======================*/ - byte* page, /*!< in/out: Page 0 */ - ulint offset) /*!< in: Offset */ -{ - //TODO(jonaso): pass crypt-data and read len from there - ulint len = CRYPT_SCHEME_1_IV_LEN; - ulint size = - sizeof(CRYPT_MAGIC) + - 1 + // type - 1 + // len - len + // iv - 4 + // min key version - 4 + // key id - 1; // fil_encryption_t - memset(page + offset, 0, size); + return ptr; } -/****************************************************************** -Encrypt a buffer */ +/** Encrypt a buffer. +@param[in,out] crypt_data Crypt data +@param[in] space space_id +@param[in] offset Page offset +@param[in] lsn Log sequence number +@param[in] src_frame Page to encrypt +@param[in] page_size Page size +@param[in,out] dst_frame Output buffer +@return encrypted buffer or NULL */ UNIV_INTERN byte* fil_encrypt_buf( -/*============*/ - fil_space_crypt_t* crypt_data, /*!< in: crypt data */ - ulint space, /*!< in: Space id */ - ulint offset, /*!< in: Page offset */ - lsn_t lsn, /*!< in: lsn */ - byte* src_frame, /*!< in: Source page to be encrypted */ - const page_size_t& page_size, /*!< in: page size */ - byte* dst_frame) /*!< in: outbut buffer */ + fil_space_crypt_t* crypt_data, + ulint space, + ulint offset, + lsn_t lsn, + const byte* src_frame, + const page_size_t& page_size, + byte* dst_frame) { ulint size = page_size.physical(); uint key_version = fil_crypt_get_latest_key_version(crypt_data); - if (key_version == ENCRYPTION_KEY_VERSION_INVALID) { - ib::error() << "Unknown key id: " - << crypt_data->key_id - << " Can't continue!"; - ut_error; - } + ut_a(key_version != ENCRYPTION_KEY_VERSION_INVALID); ulint orig_page_type = mach_read_from_2(src_frame+FIL_PAGE_TYPE); ibool page_compressed = (orig_page_type == FIL_PAGE_PAGE_COMPRESSED_ENCRYPTED); @@ -576,14 +562,8 @@ fil_encrypt_buf( int rc = encryption_scheme_encrypt(src, srclen, dst, &dstlen, crypt_data, key_version, space, offset, lsn); - - if (! ((rc == MY_AES_OK) && ((ulint) dstlen == srclen))) { - ib::error() << "Unable to encrypt data-block " - << " src: " << src << " srclen: " << srclen - << " buf: " << dst << " buflen: " << dstlen - << " return-code: "<< rc << " Can't continue!"; - ut_error; - } + ut_a(rc == MY_AES_OK); + ut_a(dstlen == srclen); /* For compressed tables we do not store the FIL header because the whole page is not stored to the disk. In compressed tables only @@ -596,7 +576,8 @@ fil_encrypt_buf( FIL_PAGE_DATA_END); } else { /* Clean up rest of buffer */ - memset(dst_frame+header_len+srclen, 0, page_size.physical() - (header_len+srclen)); + memset(dst_frame+header_len+srclen, 0, + page_size.physical() - (header_len + srclen)); } /* handle post encryption checksum */ @@ -607,46 +588,50 @@ fil_encrypt_buf( // store the post-encryption checksum after the key-version mach_write_to_4(dst_frame + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION + 4, checksum); + ut_ad(fil_space_verify_crypt_checksum(dst_frame, page_size, + space, offset)); + srv_stats.pages_encrypted.inc(); return dst_frame; } /****************************************************************** -Encrypt a page */ +Encrypt a page + +@param[in] space Tablespace +@param[in] offset Page offset +@param[in] lsn Log sequence number +@param[in] src_frame Page to encrypt +@param[in,out] dst_frame Output buffer +@return encrypted buffer or NULL */ UNIV_INTERN byte* fil_space_encrypt( -/*==============*/ - ulint space, /*!< in: Space id */ - ulint offset, /*!< in: Page offset */ - lsn_t lsn, /*!< in: lsn */ - byte* src_frame, /*!< in: Source page to be encrypted */ - const page_size_t& page_size, /*!< in: page size */ - byte* dst_frame) /*!< in: outbut buffer */ + const fil_space_t* space, + ulint offset, + lsn_t lsn, + byte* src_frame, + byte* dst_frame) { - fil_space_crypt_t* crypt_data = NULL; - - ulint orig_page_type = mach_read_from_2(src_frame+FIL_PAGE_TYPE); - - if (orig_page_type == FIL_PAGE_TYPE_FSP_HDR || - orig_page_type == FIL_PAGE_TYPE_XDES || - orig_page_type == FIL_PAGE_RTREE) { + switch (mach_read_from_2(src_frame+FIL_PAGE_TYPE)) { + case FIL_PAGE_TYPE_FSP_HDR: + case FIL_PAGE_TYPE_XDES: + case FIL_PAGE_RTREE: /* File space header, extent descriptor or spatial index are not encrypted. */ return src_frame; } - /* Get crypt data from file space */ - crypt_data = fil_space_get_crypt_data(space); - - if (crypt_data == NULL) { - return src_frame; + if (!space->crypt_data || !space->crypt_data->is_encrypted()) { + return (src_frame); } - ut_a(crypt_data != NULL && crypt_data->is_encrypted()); - - byte* tmp = fil_encrypt_buf(crypt_data, space, offset, lsn, src_frame, page_size, dst_frame); + fil_space_crypt_t* crypt_data = space->crypt_data; + const page_size_t page_size(space->flags); + ut_ad(space->n_pending_ops); + byte* tmp = fil_encrypt_buf(crypt_data, space->id, offset, lsn, + src_frame, page_size, dst_frame); #ifdef UNIV_DEBUG if (tmp) { @@ -666,7 +651,7 @@ fil_space_encrypt( src = uncomp_mem; } - bool corrupted1 = buf_page_is_corrupted(true, src, page_size, fsp_is_checksum_disabled(space)); + bool corrupted1 = buf_page_is_corrupted(true, src, page_size, space); bool ok = fil_space_decrypt(crypt_data, tmp_mem, page_size, tmp, &err); /* Need to decompress the page if it was also compressed */ @@ -675,18 +660,17 @@ fil_space_encrypt( fil_decompress_page(tmp_mem, comp_mem, page_size.physical(), NULL); } - bool corrupted = buf_page_is_corrupted(true, tmp_mem, page_size, fsp_is_checksum_disabled(space)); + bool corrupted = buf_page_is_corrupted(true, tmp_mem, page_size, space); bool different = memcmp(src, tmp_mem, page_size.physical()); if (!ok || corrupted || corrupted1 || err != DB_SUCCESS || different) { - fprintf(stderr, "JAN: ok %d corrupted %d corrupted1 %d err %d different %d\n", ok , corrupted, corrupted1, err, different); - fprintf(stderr, "JAN1: src_frame\n"); + fprintf(stderr, "ok %d corrupted %d corrupted1 %d err %d different %d\n", ok , corrupted, corrupted1, err, different); + fprintf(stderr, "src_frame\n"); buf_page_print(src_frame, page_size, BUF_PAGE_PRINT_NO_CRASH); - fprintf(stderr, "JAN2: encrypted_frame\n"); + fprintf(stderr, "encrypted_frame\n"); buf_page_print(tmp, page_size, BUF_PAGE_PRINT_NO_CRASH); - fprintf(stderr, "JAN1: decrypted_frame\n"); - buf_page_print(tmp_mem, page_size, BUF_PAGE_PRINT_NO_CRASH); - ut_error; + fprintf(stderr, "decrypted_frame\n"); + buf_page_print(tmp_mem, page_size, 0); } free(tmp_mem); @@ -704,45 +688,21 @@ fil_space_encrypt( return tmp; } -/********************************************************************* -Check if extra buffer shall be allocated for decrypting after read -@return true if fil space has encryption data. */ -UNIV_INTERN -bool -fil_space_check_encryption_read( -/*=============================*/ - ulint space) /*!< in: tablespace id */ -{ - fil_space_crypt_t* crypt_data = fil_space_get_crypt_data(space); - - if (crypt_data == NULL) { - return false; - } - - if (crypt_data->type == CRYPT_SCHEME_UNENCRYPTED) { - return false; - } - - if (crypt_data->not_encrypted()) { - return false; - } - - return true; -} - -/****************************************************************** -Decrypt a page +/** Decrypt a page. +@param[in] crypt_data crypt_data +@param[in] tmp_frame Temporary buffer +@param[in] page_size Page size +@param[in,out] src_frame Page to decrypt +@param[out] err DB_SUCCESS or DB_DECRYPTION_FAILED @return true if page decrypted, false if not.*/ UNIV_INTERN bool fil_space_decrypt( -/*==============*/ - fil_space_crypt_t* crypt_data, /*!< in: crypt data */ - byte* tmp_frame, /*!< in: temporary buffer */ - const page_size_t& page_size, /*!< in: page size */ - byte* src_frame, /*!< in: out: page buffer */ - dberr_t* err) /*!< in: out: DB_SUCCESS or - error code */ + fil_space_crypt_t* crypt_data, + byte* tmp_frame, + const page_size_t& page_size, + byte* src_frame, + dberr_t* err) { ulint page_type = mach_read_from_2(src_frame+FIL_PAGE_TYPE); uint key_version = mach_read_from_4(src_frame + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION); @@ -750,6 +710,7 @@ fil_space_decrypt( ulint offset = mach_read_from_4(src_frame + FIL_PAGE_OFFSET); ulint space = mach_read_from_4(src_frame + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID); ib_uint64_t lsn = mach_read_from_8(src_frame + FIL_PAGE_LSN); + *err = DB_SUCCESS; if (key_version == ENCRYPTION_KEY_NOT_ENCRYPTED) { @@ -764,12 +725,12 @@ fil_space_decrypt( first page in a system tablespace data file (ibdata*, not *.ibd), if not clear it. */ -#ifdef UNIV_DEBUG - ib::warn() - << "Page on space "<< space << " offset " << offset - << " has key_version " << key_version - << " when it shoud be undefined."; -#endif + + DBUG_LOG("crypt", + "Page " << page_id_t(space, offset) + << " carries key_version " << key_version + << " (should be undefined)"); + mach_write_to_4(src_frame + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION, 0); } @@ -809,12 +770,11 @@ fil_space_decrypt( return false; } - ib::error() << "Unable to decrypt data-block " + ib::fatal() << "Unable to decrypt data-block " << " src: " << src << "srclen: " << srclen << " buf: " << dst << "buflen: " << dstlen << " return-code: " << rc << " Can't continue!"; - ut_error; } /* For compressed tables we do not store the FIL header because @@ -836,31 +796,36 @@ fil_space_decrypt( return true; /* page was decrypted */ } -/****************************************************************** -Decrypt a page -@return encrypted page, or original not encrypted page if encryption is -not needed. */ +/** +Decrypt a page. +@param[in] space Tablespace +@param[in] tmp_frame Temporary buffer used for decrypting +@param[in,out] src_frame Page to decrypt +@param[out] decrypted true if page was decrypted +@return decrypted page, or original not encrypted page if decryption is +not needed.*/ UNIV_INTERN byte* fil_space_decrypt( -/*==============*/ - ulint space, /*!< in: Fil space id */ - byte* tmp_frame, /*!< in: temporary buffer */ - const page_size_t& page_size, /*!< in: page size */ - byte* src_frame) /*!< in/out: page buffer */ + const fil_space_t* space, + byte* tmp_frame, + byte* src_frame, + bool* decrypted) { dberr_t err = DB_SUCCESS; byte* res = NULL; + const page_size_t page_size(space->flags); + *decrypted = false; + + ut_ad(space->crypt_data != NULL && space->crypt_data->is_encrypted()); + ut_ad(space->n_pending_ops > 0); - bool encrypted = fil_space_decrypt( - fil_space_get_crypt_data(space), - tmp_frame, - page_size, - src_frame, - &err); + bool encrypted = fil_space_decrypt(space->crypt_data, tmp_frame, + page_size, src_frame, &err); if (err == DB_SUCCESS) { if (encrypted) { + *decrypted = true; /* Copy the decrypted page back to page buffer, not really any other options. */ memcpy(src_frame, tmp_frame, page_size.physical()); @@ -874,14 +839,15 @@ fil_space_decrypt( /****************************************************************** Calculate post encryption checksum +@param[in] page_size page size +@param[in] dst_frame Block where checksum is calculated @return page checksum or BUF_NO_CHECKSUM_MAGIC not needed. */ UNIV_INTERN ulint fil_crypt_calculate_checksum( -/*=========================*/ - const page_size_t& page_size, /*!< in: page size */ - byte* dst_frame) /*!< in: page where to calculate */ + const page_size_t& page_size, + const byte* dst_frame) { ib_uint32_t checksum = 0; srv_checksum_algorithm_t algorithm = @@ -929,12 +895,13 @@ struct key_state_t { }; /*********************************************************************** -Copy global key state */ +Copy global key state +@param[in,out] new_state key state +@param[in] crypt_data crypt data */ static void fil_crypt_get_key_state( -/*====================*/ - key_state_t* new_state, /*!< out: key state */ - fil_space_crypt_t* crypt_data) /*!< in, out: crypt_data */ + key_state_t* new_state, + fil_space_crypt_t* crypt_data) { if (srv_encrypt_tables) { new_state->key_version = crypt_data->key_get_latest_version(); @@ -949,15 +916,17 @@ fil_crypt_get_key_state( /*********************************************************************** Check if a key needs rotation given a key_state +@param[in] encrypt_mode Encryption mode +@param[in] key_version Current key version +@param[in] latest_key_version Latest key version +@param[in] rotate_key_age when to rotate @return true if key needs rotation, false if not */ static bool fil_crypt_needs_rotation( -/*=====================*/ - fil_encryption_t encrypt_mode, /*!< in: Encryption - mode */ - uint key_version, /*!< in: Key version */ - uint latest_key_version, /*!< in: Latest key version */ - uint rotate_key_age) /*!< in: When to rotate */ + fil_encryption_t encrypt_mode, + uint key_version, + uint latest_key_version, + uint rotate_key_age) { if (key_version == ENCRYPTION_KEY_VERSION_INVALID) { return false; @@ -970,7 +939,7 @@ fil_crypt_needs_rotation( } if (latest_key_version == 0 && key_version != 0) { - if (encrypt_mode == FIL_SPACE_ENCRYPTION_DEFAULT) { + if (encrypt_mode == FIL_ENCRYPTION_DEFAULT) { /* this is rotation encrypted => unencrypted */ return true; } @@ -987,59 +956,34 @@ fil_crypt_needs_rotation( } /*********************************************************************** -Check if a space is closing (i.e just before drop) -@return true if space is closing, false if not. */ -UNIV_INTERN -bool -fil_crypt_is_closing( -/*=================*/ - ulint space) /*!< in: FIL space id */ -{ - bool closing=true; - fil_space_crypt_t *crypt_data = fil_space_get_crypt_data(space); - - if (crypt_data) { - closing = crypt_data->is_closing(false); - } - - return closing; -} - -/*********************************************************************** Start encrypting a space -@return true if a pending op (fil_inc_pending_ops/fil_decr_pending_ops) is held -*/ +@param[in,out] space Tablespace +@return true if a recheck is needed */ static bool fil_crypt_start_encrypting_space( -/*=============================*/ - ulint space, /*!< in: FIL space id */ - bool* recheck)/*!< out: true if recheck needed */ + fil_space_t* space) { - - /* we have a pending op when entering function */ - bool pending_op = true; - + bool recheck = false; mutex_enter(&fil_crypt_threads_mutex); - fil_space_crypt_t *crypt_data = fil_space_get_crypt_data(space); - ibool page_encrypted = (crypt_data != NULL); + fil_space_crypt_t *crypt_data = space->crypt_data; - /*If spage is not encrypted and encryption is not enabled, then + /* If space is not encrypted and encryption is not enabled, then do not continue encrypting the space. */ - if (!page_encrypted && !srv_encrypt_tables) { + if (!crypt_data && !srv_encrypt_tables) { mutex_exit(&fil_crypt_threads_mutex); - return pending_op; + return false; } if (crypt_data != NULL || fil_crypt_start_converting) { /* someone beat us to it */ if (fil_crypt_start_converting) { - *recheck = true; + recheck = true; } mutex_exit(&fil_crypt_threads_mutex); - return pending_op; + return recheck; } /* NOTE: we need to write and flush page 0 before publishing @@ -1048,10 +992,11 @@ fil_crypt_start_encrypting_space( * crypt data in page 0 */ /* 1 - create crypt data */ - crypt_data = fil_space_create_crypt_data(FIL_SPACE_ENCRYPTION_DEFAULT, FIL_DEFAULT_ENCRYPTION_KEY); + crypt_data = fil_space_create_crypt_data(FIL_ENCRYPTION_DEFAULT, FIL_DEFAULT_ENCRYPTION_KEY); + if (crypt_data == NULL) { mutex_exit(&fil_crypt_threads_mutex); - return pending_op; + return false; } crypt_data->type = CRYPT_SCHEME_UNENCRYPTED; @@ -1069,92 +1014,42 @@ fil_crypt_start_encrypting_space( do { - if (fil_crypt_is_closing(space) || - fil_space_found_by_id(space) == NULL) { - break; - } - mtr_t mtr; mtr_start(&mtr); /* 2 - get page 0 */ - ulint offset = 0; - const page_id_t page_id(space, offset); - bool tsfound; - const page_size_t page_size = fil_space_get_page_size(space, &tsfound); dberr_t err = DB_SUCCESS; - buf_block_t* block = buf_page_get_gen(page_id, page_size, - RW_X_LATCH, - NULL, - BUF_GET, - __FILE__, __LINE__, - &mtr, &err); - - if (fil_crypt_is_closing(space) || - fil_space_found_by_id(space) == NULL || - err != DB_SUCCESS) { - mtr_commit(&mtr); - break; - } + buf_block_t* block = buf_page_get_gen( + page_id_t(space->id, 0), page_size_t(space->flags), + RW_X_LATCH, NULL, BUF_GET, + __FILE__, __LINE__, + &mtr, &err); + - /* 3 - compute location to store crypt data */ + /* 3 - write crypt data to page 0 */ byte* frame = buf_block_get_frame(block); - ut_ad(crypt_data); - crypt_data->page0_offset = FSP_HEADER_OFFSET - + fsp_header_get_encryption_offset(page_size); - const ulint maxsize = page_size.logical() - - crypt_data->page0_offset - FIL_PAGE_DATA_END; - - /* 4 - write crypt data to page 0 */ - fil_space_write_crypt_data_low(crypt_data, - CRYPT_SCHEME_1, - frame, - crypt_data->page0_offset, - maxsize, &mtr); + crypt_data->type = CRYPT_SCHEME_1; + crypt_data->write_page0(space, frame, &mtr); mtr_commit(&mtr); - if (fil_crypt_is_closing(space) || - fil_space_found_by_id(space) == NULL) { - break; - } - /* record lsn of update */ lsn_t end_lsn = mtr.commit_lsn(); /* 4 - sync tablespace before publishing crypt data */ - /* release "lock" while syncing */ - fil_decr_pending_ops(space); - pending_op = false; - bool success = false; - ulint n_pages = 0; ulint sum_pages = 0; + do { + ulint n_pages = 0; success = buf_flush_lists(ULINT_MAX, end_lsn, &n_pages); buf_flush_wait_batch_end(NULL, BUF_FLUSH_LIST); sum_pages += n_pages; - } while (!success && - !fil_crypt_is_closing(space) && - !fil_space_found_by_id(space)); - - /* try to reacquire pending op */ - if (fil_inc_pending_ops(space, true)) { - break; - } - - /* pending op reacquired! */ - pending_op = true; - - if (fil_crypt_is_closing(space) || - fil_space_found_by_id(space) == NULL) { - break; - } + } while (!success); /* 5 - publish crypt data */ mutex_enter(&fil_crypt_threads_mutex); - ut_ad(crypt_data); mutex_enter(&crypt_data->mutex); crypt_data->type = CRYPT_SCHEME_1; ut_a(crypt_data->rotate_state.active_threads == 1); @@ -1165,10 +1060,9 @@ fil_crypt_start_encrypting_space( mutex_exit(&crypt_data->mutex); mutex_exit(&fil_crypt_threads_mutex); - return pending_op; + return recheck; } while (0); - ut_ad(crypt_data); mutex_enter(&crypt_data->mutex); ut_a(crypt_data->rotate_state.active_threads == 1); crypt_data->rotate_state.active_threads = 0; @@ -1178,7 +1072,7 @@ fil_crypt_start_encrypting_space( fil_crypt_start_converting = false; mutex_exit(&fil_crypt_threads_mutex); - return pending_op; + return recheck; } /** State of a rotation thread */ @@ -1192,7 +1086,7 @@ struct rotate_thread_t { uint thread_no; bool first; /*!< is position before first space */ - ulint space; /*!< current space */ + fil_space_t* space; /*!< current space or NULL */ ulint offset; /*!< current offset */ ulint batch; /*!< #pages to rotate */ uint min_key_version_found;/*!< min key version found but not rotated */ @@ -1228,54 +1122,41 @@ struct rotate_thread_t { /*********************************************************************** Check if space needs rotation given a key_state +@param[in,out] state Key rotation state +@param[in,out] key_state Key state +@param[in,out] recheck needs recheck ? @return true if space needs key rotation */ static bool fil_crypt_space_needs_rotation( -/*===========================*/ - rotate_thread_t* state, /*!< in: Key rotation state */ - key_state_t* key_state, /*!< in: Key state */ - bool* recheck) /*!< out: needs recheck ? */ + rotate_thread_t* state, + key_state_t* key_state, + bool* recheck) { - ulint space = state->space; + fil_space_t* space = state->space; - /* Make sure that tablespace is found and it is normal tablespace */ - if (fil_space_found_by_id(space) == NULL || - fil_space_get_type(space) != FIL_TYPE_TABLESPACE) { + /* Make sure that tablespace is normal tablespace */ + if (space->purpose != FIL_TYPE_TABLESPACE) { return false; } - if (fil_inc_pending_ops(space, true)) { - /* tablespace being dropped */ - return false; - } + ut_ad(space->n_pending_ops > 0); - /* keep track of if we have pending op */ - bool pending_op = true; - - fil_space_crypt_t *crypt_data = fil_space_get_crypt_data(space); + fil_space_crypt_t *crypt_data = space->crypt_data; if (crypt_data == NULL) { /** * space has no crypt data * start encrypting it... */ - pending_op = fil_crypt_start_encrypting_space(space, recheck); - - crypt_data = fil_space_get_crypt_data(space); + *recheck = fil_crypt_start_encrypting_space(space); + crypt_data = space->crypt_data; if (crypt_data == NULL) { - if (pending_op) { - fil_decr_pending_ops(space); - } return false; } crypt_data->key_get_latest_version(); - - if (!crypt_data->is_key_found()) { - return false; - } } /* If used key_id is not found from encryption plugin we can't @@ -1295,7 +1176,7 @@ fil_crypt_space_needs_rotation( } /* prevent threads from starting to rotate space */ - if (crypt_data->is_closing(true)) { + if (space->is_stopping()) { break; } @@ -1319,39 +1200,39 @@ fil_crypt_space_needs_rotation( key_state->key_version, key_state->rotate_key_age); crypt_data->rotate_state.scrubbing.is_active = - btr_scrub_start_space(space, &state->scrub_data); + btr_scrub_start_space(space->id, &state->scrub_data); time_t diff = time(0) - crypt_data->rotate_state.scrubbing. last_scrub_completed; bool need_scrubbing = + (srv_background_scrub_data_uncompressed || + srv_background_scrub_data_compressed) && crypt_data->rotate_state.scrubbing.is_active - && diff >= (time_t) srv_background_scrub_data_interval; + && diff >= 0 + && ulint(diff) >= srv_background_scrub_data_interval; if (need_key_rotation == false && need_scrubbing == false) { break; } mutex_exit(&crypt_data->mutex); - /* NOTE! fil_decr_pending_ops is performed outside */ + return true; } while (0); mutex_exit(&crypt_data->mutex); - if (pending_op) { - fil_decr_pending_ops(space); - } return false; } /*********************************************************************** -Update global statistics with thread statistics */ +Update global statistics with thread statistics +@param[in,out] state key rotation statistics */ static void fil_crypt_update_total_stat( -/*========================*/ - rotate_thread_t *state) /*!< in: Key rotation status */ + rotate_thread_t *state) { mutex_enter(&crypt_stat_mutex); crypt_stat.pages_read_from_cache += @@ -1375,15 +1256,19 @@ fil_crypt_update_total_stat( /*********************************************************************** Allocate iops to thread from global setting, used before starting to rotate a space. +@param[in,out] state Rotation state @return true if allocation succeeded, false if failed */ static bool fil_crypt_alloc_iops( -/*=================*/ - rotate_thread_t *state) /*!< in: Key rotation status */ + rotate_thread_t *state) { ut_ad(state->allocated_iops == 0); + /* We have not yet selected the space to rotate, thus + state might not contain space and we can't check + its status yet. */ + uint max_iops = state->estimated_max_iops; mutex_enter(&fil_crypt_threads_mutex); @@ -1409,12 +1294,12 @@ fil_crypt_alloc_iops( /*********************************************************************** Reallocate iops to thread, -used when inside a space */ +used when inside a space +@param[in,out] state Rotation state */ static void fil_crypt_realloc_iops( -/*===================*/ - rotate_thread_t *state) /*!< in: Key rotation status */ + rotate_thread_t *state) { ut_a(state->allocated_iops > 0); @@ -1423,13 +1308,12 @@ fil_crypt_realloc_iops( uint avg_wait_time_us = state->sum_waited_us / state->cnt_waited; -#if DEBUG_KEYROTATION_THROTTLING - ib_logf(IB_LOG_LEVEL_INFO, - "thr_no: %u - update estimated_max_iops from %u to %u.", + DBUG_PRINT("ib_crypt", + ("thr_no: %u - update estimated_max_iops from %u to %u.", state->thread_no, state->estimated_max_iops, - 1000000 / avg_wait_time_us); -#endif + 1000000 / avg_wait_time_us)); + if (avg_wait_time_us == 0) { avg_wait_time_us = 1; // prevent division by zero } @@ -1438,12 +1322,11 @@ fil_crypt_realloc_iops( state->cnt_waited = 0; state->sum_waited_us = 0; } else { -#if DEBUG_KEYROTATION_THROTTLING - ib_logf(IB_LOG_LEVEL_INFO, - "thr_no: %u only waited %lu%% skip re-estimate.", + + DBUG_PRINT("ib_crypt", + ("thr_no: %u only waited %lu%% skip re-estimate.", state->thread_no, - (100 * state->cnt_waited) / state->batch); -#endif + (100 * state->cnt_waited) / state->batch)); } if (state->estimated_max_iops <= state->allocated_iops) { @@ -1469,8 +1352,9 @@ fil_crypt_realloc_iops( state->allocated_iops ++; n_fil_crypt_iops_allocated ++; } - mutex_exit(&fil_crypt_threads_mutex); + os_event_set(fil_crypt_threads_event); + mutex_exit(&fil_crypt_threads_mutex); } } else { /* see if there are more to get */ @@ -1487,13 +1371,13 @@ fil_crypt_realloc_iops( } n_fil_crypt_iops_allocated += extra; state->allocated_iops += extra; -#if DEBUG_KEYROTATION_THROTTLING - ib_logf(IB_LOG_LEVEL_INFO, - "thr_no: %u increased iops from %u to %u.", + + DBUG_PRINT("ib_crypt", + ("thr_no: %u increased iops from %u to %u.", state->thread_no, state->allocated_iops - extra, - state->allocated_iops); -#endif + state->allocated_iops)); + } mutex_exit(&fil_crypt_threads_mutex); } @@ -1502,12 +1386,12 @@ fil_crypt_realloc_iops( } /*********************************************************************** -Return allocated iops to global */ +Return allocated iops to global +@param[in,out] state Rotation state */ static void fil_crypt_return_iops( -/*==================*/ - rotate_thread_t *state) /*!< in: Key rotation status */ + rotate_thread_t *state) { if (state->allocated_iops > 0) { uint iops = state->allocated_iops; @@ -1520,25 +1404,27 @@ fil_crypt_return_iops( ut_ad(0); iops = 0; } + n_fil_crypt_iops_allocated -= iops; - mutex_exit(&fil_crypt_threads_mutex); state->allocated_iops = 0; os_event_set(fil_crypt_threads_event); + mutex_exit(&fil_crypt_threads_mutex); } fil_crypt_update_total_stat(state); } /*********************************************************************** -Search for a space needing rotation */ -UNIV_INTERN +Search for a space needing rotation +@param[in,out] key_state Key state +@param[in,out] state Rotation state +@param[in,out] recheck recheck ? */ +static bool fil_crypt_find_space_to_rotate( -/*===========================*/ - key_state_t* key_state, /*!< in: Key state */ - rotate_thread_t* state, /*!< in: Key rotation state */ - bool* recheck) /*!< out: true if recheck - needed */ + key_state_t* key_state, + rotate_thread_t* state, + bool* recheck) { /* we need iops to start rotating */ while (!state->should_shutdown() && !fil_crypt_alloc_iops(state)) { @@ -1547,30 +1433,44 @@ fil_crypt_find_space_to_rotate( } if (state->should_shutdown()) { + if (state->space) { + fil_space_release(state->space); + state->space = NULL; + } return false; } if (state->first) { state->first = false; - state->space = fil_get_first_space_safe(); - } else { - state->space = fil_get_next_space_safe(state->space); + if (state->space) { + fil_space_release(state->space); + } + state->space = NULL; } - while (!state->should_shutdown() && state->space != ULINT_UNDEFINED) { - fil_space_t* space = fil_space_found_by_id(state->space); + /* If key rotation is enabled (default) we iterate all tablespaces. + If key rotation is not enabled we iterate only the tablespaces + added to keyrotation list. */ + if (srv_fil_crypt_rotate_key_age) { + state->space = fil_space_next(state->space); + } else { + state->space = fil_space_keyrotate_next(state->space); + } - if (space) { - if (fil_crypt_space_needs_rotation(state, key_state, recheck)) { - ut_ad(key_state->key_id); - /* init state->min_key_version_found before - * starting on a space */ - state->min_key_version_found = key_state->key_version; - return true; - } + while (!state->should_shutdown() && state->space) { + if (fil_crypt_space_needs_rotation(state, key_state, recheck)) { + ut_ad(key_state->key_id); + /* init state->min_key_version_found before + * starting on a space */ + state->min_key_version_found = key_state->key_version; + return true; } - state->space = fil_get_next_space_safe(state->space); + if (srv_fil_crypt_rotate_key_age) { + state->space = fil_space_next(state->space); + } else { + state->space = fil_space_keyrotate_next(state->space); + } } /* if we didn't find any space return iops */ @@ -1581,16 +1481,16 @@ fil_crypt_find_space_to_rotate( } /*********************************************************************** -Start rotating a space */ +Start rotating a space +@param[in] key_state Key state +@param[in,out] state Rotation state */ static void fil_crypt_start_rotate_space( -/*=========================*/ - const key_state_t* key_state, /*!< in: Key state */ - rotate_thread_t* state) /*!< in: Key rotation state */ + const key_state_t* key_state, + rotate_thread_t* state) { - ulint space = state->space; - fil_space_crypt_t *crypt_data = fil_space_get_crypt_data(space); + fil_space_crypt_t *crypt_data = state->space->crypt_data; ut_ad(crypt_data); mutex_enter(&crypt_data->mutex); @@ -1601,8 +1501,9 @@ fil_crypt_start_rotate_space( crypt_data->rotate_state.next_offset = 1; // skip page 0 /* no need to rotate beyond current max * if space extends, it will be encrypted with newer version */ - crypt_data->rotate_state.max_offset = fil_space_get_size(space); - + /* FIXME: max_offset could be removed and instead + space->size consulted.*/ + crypt_data->rotate_state.max_offset = state->space->size; crypt_data->rotate_state.end_lsn = 0; crypt_data->rotate_state.min_key_version_found = key_state->key_version; @@ -1630,26 +1531,34 @@ fil_crypt_start_rotate_space( /*********************************************************************** Search for batch of pages needing rotation +@param[in] key_state Key state +@param[in,out] state Rotation state @return true if page needing key rotation found, false if not found */ static bool fil_crypt_find_page_to_rotate( -/*==========================*/ - const key_state_t* key_state, /*!< in: Key state */ - rotate_thread_t* state) /*!< in: Key rotation state */ + const key_state_t* key_state, + rotate_thread_t* state) { ulint batch = srv_alloc_time * state->allocated_iops; - ulint space = state->space; - fil_space_crypt_t *crypt_data = fil_space_get_crypt_data(space); + fil_space_t* space = state->space; + + ut_ad(!space || space->n_pending_ops > 0); + + /* If space is marked to be dropped stop rotation. */ + if (!space || space->is_stopping()) { + return false; + } + + fil_space_crypt_t *crypt_data = space->crypt_data; /* Space might already be dropped */ if (crypt_data) { mutex_enter(&crypt_data->mutex); ut_ad(key_state->key_id == crypt_data->key_id); - if (!crypt_data->is_closing(true) && - crypt_data->rotate_state.next_offset < - crypt_data->rotate_state.max_offset) { + if (crypt_data->rotate_state.next_offset < + crypt_data->rotate_state.max_offset) { state->offset = crypt_data->rotate_state.next_offset; ulint remaining = crypt_data->rotate_state.max_offset - @@ -1674,79 +1583,64 @@ fil_crypt_find_page_to_rotate( /*********************************************************************** Check if a page is uninitialized (doesn't need to be rotated) -@return true if page is uninitialized, false if not.*/ -static +@param[in] frame Page to check +@param[in] page_size Page size +@return true if page is uninitialized, false if not. */ +static inline bool fil_crypt_is_page_uninitialized( -/*============================*/ - const byte* frame, /*!< in: Page */ - const page_size_t& page_size) /*!< in: page size */ + const byte *frame, + const page_size_t& page_size) { - if (fil_page_get_type(frame) == FIL_PAGE_TYPE_ALLOCATED) { - /* empty pages aren't encrypted */ - return true; - } - - if (page_size.is_compressed()) { - ulint stored_checksum = mach_read_from_4( - frame + FIL_PAGE_SPACE_OR_CHKSUM); - /* empty pages aren't encrypted */ - if (stored_checksum == 0) { - return true; - } - } else { - ulint size = page_size.logical(); - ulint checksum_field1 = mach_read_from_4( - frame + FIL_PAGE_SPACE_OR_CHKSUM); - ulint checksum_field2 = mach_read_from_4( - frame + size - FIL_PAGE_END_LSN_OLD_CHKSUM); - /* empty pages are not encrypted */ - if (checksum_field1 == 0 && checksum_field2 == 0 - && mach_read_from_4(frame + FIL_PAGE_LSN) == 0) { - return true; - } - } - return false; + return (buf_page_is_zeroes(frame, page_size)); } -#define fil_crypt_get_page_throttle(state,space,page_size,offset,mtr,sleeptime_ms) \ - fil_crypt_get_page_throttle_func(state, space, page_size, offset, mtr, \ +#define fil_crypt_get_page_throttle(state,offset,mtr,sleeptime_ms) \ + fil_crypt_get_page_throttle_func(state, offset, mtr, \ sleeptime_ms, __FILE__, __LINE__) /*********************************************************************** Get a page and compute sleep time -@return page */ +@param[in,out] state Rotation state +@param[in] offset Page offset +@param[in,out] mtr Minitransaction +@param[out] sleeptime_ms Sleep time +@param[in] file File where called +@param[in] line Line where called +@return page or NULL*/ static buf_block_t* fil_crypt_get_page_throttle_func( -/*=============================*/ - rotate_thread_t* state, /*!< in/out: Key rotation state */ - ulint space, /*!< in: FIL space id */ - const page_size_t& page_size, /*!< in: page size */ - ulint offset, /*!< in: page offsett */ - mtr_t* mtr, /*!< in/out: minitransaction */ - ulint* sleeptime_ms, /*!< out: sleep time */ - const char* file, /*!< in: file name */ - unsigned line) /*!< in: file line */ + rotate_thread_t* state, + ulint offset, + mtr_t* mtr, + ulint* sleeptime_ms, + const char* file, + ulint line) { - const page_id_t& page_id = page_id_t(space, offset); - dberr_t err = DB_SUCCESS; - buf_block_t* block = NULL; + fil_space_t* space = state->space; + const page_size_t page_size = page_size_t(space->flags); + const page_id_t page_id(space->id, offset); + ut_ad(space->n_pending_ops > 0); - // JAN: TODO: - // buf_block_t* block = buf_page_try_get_func(page_id, file, line, mtr); + /* Before reading from tablespace we need to make sure that + the tablespace is not about to be dropped or truncated. */ + if (space->is_stopping()) { + return NULL; + } + dberr_t err = DB_SUCCESS; + buf_block_t* block = buf_page_get_gen(page_id, page_size, RW_X_LATCH, + NULL, + BUF_PEEK_IF_IN_POOL, file, line, + mtr, &err); if (block != NULL) { /* page was in buffer pool */ state->crypt_stat.pages_read_from_cache++; return block; } - /* Before reading from tablespace we need to make sure that - tablespace exists and is not is just being dropped. */ - - if (fil_crypt_is_closing(space) || - fil_space_found_by_id(space) == NULL) { + if (space->is_stopping()) { return NULL; } @@ -1756,7 +1650,7 @@ fil_crypt_get_page_throttle_func( block = buf_page_get_gen(page_id, page_size, RW_X_LATCH, NULL, BUF_GET_POSSIBLY_FREED, - file, line, mtr, &err); + file, line, mtr, &err); uintmax_t end = ut_time_us(NULL); if (end < start) { @@ -1779,6 +1673,7 @@ fil_crypt_get_page_throttle_func( } *sleeptime_ms += add_sleeptime_ms; + return block; } @@ -1788,26 +1683,32 @@ Get block and allocation status note: innodb locks fil_space_latch and then block when allocating page but locks block and then fil_space_latch when freeing page. -@return block + +@param[in,out] state Rotation state +@param[in] offset Page offset +@param[in,out] mtr Minitransaction +@param[out] allocation_status Allocation status +@param[out] sleeptime_ms Sleep time +@return block or NULL */ static buf_block_t* btr_scrub_get_block_and_allocation_status( -/*======================================*/ - rotate_thread_t* state, /*!< in/out: Key rotation state */ - ulint space, /*!< in: FIL space id */ - const page_size_t& page_size, /*!< in: page size */ - ulint offset, /*!< in: page offsett */ - mtr_t* mtr, /*!< in/out: minitransaction - */ + rotate_thread_t* state, + ulint offset, + mtr_t* mtr, btr_scrub_page_allocation_status_t *allocation_status, - /*!< in/out: allocation status */ - ulint* sleeptime_ms) /*!< out: sleep time */ + ulint* sleeptime_ms) { mtr_t local_mtr; buf_block_t *block = NULL; + fil_space_t* space = state->space; + + ut_ad(space->n_pending_ops > 0); + mtr_start(&local_mtr); - *allocation_status = fsp_page_is_free(space, offset, &local_mtr) ? + + *allocation_status = fsp_page_is_free(space->id, offset, &local_mtr) ? BTR_SCRUB_PAGE_FREE : BTR_SCRUB_PAGE_ALLOCATED; @@ -1815,7 +1716,6 @@ btr_scrub_get_block_and_allocation_status( /* this is easy case, we lock fil_space_latch first and then block */ block = fil_crypt_get_page_throttle(state, - space, page_size, offset, mtr, sleeptime_ms); mtr_commit(&local_mtr); @@ -1832,7 +1732,6 @@ btr_scrub_get_block_and_allocation_status( */ block = fil_crypt_get_page_throttle(state, - space, page_size, offset, mtr, sleeptime_ms); } @@ -1842,22 +1741,29 @@ btr_scrub_get_block_and_allocation_status( /*********************************************************************** -Rotate one page */ +Rotate one page +@param[in,out] key_state Key state +@param[in,out] state Rotation state */ static void fil_crypt_rotate_page( -/*==================*/ - const key_state_t* key_state, /*!< in: Key state */ - rotate_thread_t* state) /*!< in: Key rotation state */ + const key_state_t* key_state, + rotate_thread_t* state) { - ulint space = state->space; + fil_space_t*space = state->space; + ulint space_id = space->id; ulint offset = state->offset; - bool tsfound; - const page_size_t page_size = fil_space_get_page_size(space, &tsfound); ulint sleeptime_ms = 0; + fil_space_crypt_t *crypt_data = space->crypt_data; + const page_size_t page_size = page_size_t(space->flags); - /* check if tablespace is closing before reading page */ - if (fil_crypt_is_closing(space) || fil_space_found_by_id(space) == NULL) { + ut_ad(space->n_pending_ops > 0); + + /* In fil_crypt_thread where key rotation is done we have + acquired space and checked that this space is not yet + marked to be dropped. Similarly, in fil_crypt_find_page_to_rotate(). + Check here also to give DROP TABLE or similar a change. */ + if (space->is_stopping()) { return; } @@ -1869,7 +1775,6 @@ fil_crypt_rotate_page( mtr_t mtr; mtr_start(&mtr); buf_block_t* block = fil_crypt_get_page_throttle(state, - space, page_size, offset, &mtr, &sleeptime_ms); @@ -1881,9 +1786,8 @@ fil_crypt_rotate_page( uint kv = block->page.key_version; /* check if tablespace is closing after reading page */ - if (!fil_crypt_is_closing(space)) { + if (space->is_stopping()) { byte* frame = buf_block_get_frame(block); - fil_space_crypt_t *crypt_data = fil_space_get_crypt_data(space); if (kv == 0 && fil_crypt_is_page_uninitialized(frame, page_size)) { @@ -1903,7 +1807,7 @@ fil_crypt_rotate_page( /* force rotation by dummy updating page */ mlog_write_ulint(frame + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID, - space, MLOG_4BYTES, &mtr); + space_id, MLOG_4BYTES, &mtr); /* update block */ block->page.key_version = key_state->key_version; @@ -1937,7 +1841,7 @@ fil_crypt_rotate_page( btr_scrub_page_allocation_status_t allocated; block = btr_scrub_get_block_and_allocation_status( - state, space, page_size, offset, &mtr, + state, offset, &mtr, &allocated, &sleeptime_ms); @@ -1951,7 +1855,7 @@ fil_crypt_rotate_page( /* we need to refetch it once more now that we have * index locked */ block = btr_scrub_get_block_and_allocation_status( - state, space, page_size, offset, &mtr, + state, offset, &mtr, &allocated, &sleeptime_ms); @@ -1982,7 +1886,6 @@ fil_crypt_rotate_page( if (needs_scrubbing == BTR_SCRUB_TURNED_OFF) { /* if we just detected that scrubbing was turned off * update global state to reflect this */ - fil_space_crypt_t *crypt_data = fil_space_get_crypt_data(space); ut_ad(crypt_data); mutex_enter(&crypt_data->mutex); crypt_data->rotate_state.scrubbing.is_active = false; @@ -2010,17 +1913,20 @@ fil_crypt_rotate_page( } /*********************************************************************** -Rotate a batch of pages */ +Rotate a batch of pages +@param[in,out] key_state Key state +@param[in,out] state Rotation state */ static void fil_crypt_rotate_pages( -/*===================*/ - const key_state_t* key_state, /*!< in: Key state */ - rotate_thread_t* state) /*!< in: Key rotation state */ + const key_state_t* key_state, + rotate_thread_t* state) { - ulint space = state->space; + ulint space = state->space->id; ulint end = state->offset + state->batch; + ut_ad(state->space->n_pending_ops > 0); + for (; state->offset < end; state->offset++) { /* we can't rotate pages in dblwr buffer as @@ -2041,20 +1947,23 @@ fil_crypt_rotate_pages( } /*********************************************************************** -Flush rotated pages and then update page 0 */ +Flush rotated pages and then update page 0 + +@param[in,out] state rotation state */ static void fil_crypt_flush_space( -/*==================*/ - rotate_thread_t* state, /*!< in: Key rotation state */ - ulint space) /*!< in: FIL space id */ + rotate_thread_t* state) { - fil_space_crypt_t *crypt_data = fil_space_get_crypt_data(space); + fil_space_t* space = state->space; + fil_space_crypt_t *crypt_data = space->crypt_data; + + ut_ad(space->n_pending_ops > 0); /* flush tablespace pages so that there are no pages left with old key */ lsn_t end_lsn = crypt_data->rotate_state.end_lsn; - if (end_lsn > 0 && !fil_crypt_is_closing(space)) { + if (end_lsn > 0 && !space->is_stopping()) { bool success = false; ulint n_pages = 0; ulint sum_pages = 0; @@ -2064,7 +1973,7 @@ fil_crypt_flush_space( success = buf_flush_lists(ULINT_MAX, end_lsn, &n_pages); buf_flush_wait_batch_end(NULL, BUF_FLUSH_LIST); sum_pages += n_pages; - } while (!success && !fil_crypt_is_closing(space)); + } while (!success && !space->is_stopping()); uintmax_t end = ut_time_us(NULL); @@ -2082,48 +1991,38 @@ fil_crypt_flush_space( } /* update page 0 */ - if (!fil_crypt_is_closing(space)) { - mtr_t mtr; - mtr_start(&mtr); - ulint offset = 0; // page 0 - const page_id_t page_id(space, offset); - bool tsfound; - const page_size_t page_size = fil_space_get_page_size(space, &tsfound); - dberr_t err = DB_SUCCESS; - - buf_block_t* block = buf_page_get_gen(page_id, page_size, - RW_X_LATCH, NULL, BUF_GET, - __FILE__, __LINE__, &mtr, &err); - - if (block && err == DB_SUCCESS) { - byte* frame = buf_block_get_frame(block); - - crypt_data->page0_offset = FSP_HEADER_OFFSET - + fsp_header_get_encryption_offset(page_size); + mtr_t mtr; + mtr.start(); - fil_space_write_crypt_data(space, frame, - crypt_data->page0_offset, - ULINT_MAX, &mtr); - } + dberr_t err; - mtr_commit(&mtr); + if (buf_block_t* block = buf_page_get_gen( + page_id_t(space->id, 0), page_size_t(space->flags), + RW_X_LATCH, NULL, BUF_GET, + __FILE__, __LINE__, &mtr, &err)) { + crypt_data->write_page0(space, block->frame, &mtr); } + + mtr.commit(); } /*********************************************************************** -Complete rotating a space */ +Complete rotating a space +@param[in,out] key_state Key state +@param[in,out] state Rotation state */ static void fil_crypt_complete_rotate_space( -/*============================*/ - const key_state_t* key_state, /*!< in: Key state */ - rotate_thread_t* state) /*!< in: Key rotation state */ + const key_state_t* key_state, + rotate_thread_t* state) { - ulint space = state->space; - fil_space_crypt_t *crypt_data = fil_space_get_crypt_data(space); + fil_space_crypt_t *crypt_data = state->space->crypt_data; + + ut_ad(crypt_data); + ut_ad(state->space->n_pending_ops > 0); /* Space might already be dropped */ - if (crypt_data != NULL && !crypt_data->is_closing(false)) { + if (!state->space->is_stopping()) { mutex_enter(&crypt_data->mutex); /** @@ -2181,9 +2080,8 @@ fil_crypt_complete_rotate_space( } if (should_flush) { - fil_crypt_flush_space(state, space); + fil_crypt_flush_space(state); - ut_ad(crypt_data); mutex_enter(&crypt_data->mutex); crypt_data->rotate_state.flushing = false; mutex_exit(&crypt_data->mutex); @@ -2206,8 +2104,8 @@ DECLARE_THREAD(fil_crypt_thread)( mutex_enter(&fil_crypt_threads_mutex); uint thread_no = srv_n_fil_crypt_threads_started; srv_n_fil_crypt_threads_started++; - mutex_exit(&fil_crypt_threads_mutex); os_event_set(fil_crypt_event); /* signal that we started */ + mutex_exit(&fil_crypt_threads_mutex); /* state of this thread */ rotate_thread_t thr(thread_no); @@ -2227,6 +2125,7 @@ DECLARE_THREAD(fil_crypt_thread)( * i.e either new key version of change or * new rotate_key_age */ os_event_reset(fil_crypt_threads_event); + if (os_event_wait_time(fil_crypt_threads_event, 1000000) == 0) { break; } @@ -2240,7 +2139,12 @@ DECLARE_THREAD(fil_crypt_thread)( time_t waited = time(0) - wait_start; - if (waited >= (time_t) srv_background_scrub_data_check_interval) { + /* Break if we have waited the background scrub + internal and background scrubbing is enabled */ + if (waited >= 0 + && ulint(waited) >= srv_background_scrub_data_check_interval + && (srv_background_scrub_data_uncompressed + || srv_background_scrub_data_compressed)) { break; } } @@ -2255,29 +2159,32 @@ DECLARE_THREAD(fil_crypt_thread)( /* we found a space to rotate */ fil_crypt_start_rotate_space(&new_state, &thr); - /* decrement pending ops that was incremented in - * fil_crypt_space_needs_rotation - * (called from fil_crypt_find_space_to_rotate), - * this makes sure that tablespace won't be dropped - * just after we decided to start processing it. */ - fil_decr_pending_ops(thr.space); - /* iterate all pages (cooperativly with other threads) */ - while (!thr.should_shutdown() && + while (!thr.should_shutdown() && thr.space && fil_crypt_find_page_to_rotate(&new_state, &thr)) { /* rotate a (set) of pages */ fil_crypt_rotate_pages(&new_state, &thr); + /* If space is marked as stopping, release + space and stop rotation. */ + if (thr.space->is_stopping()) { + fil_space_release(thr.space); + thr.space = NULL; + break; + } + /* realloc iops */ fil_crypt_realloc_iops(&thr); } /* complete rotation */ - fil_crypt_complete_rotate_space(&new_state, &thr); + if (thr.space) { + fil_crypt_complete_rotate_space(&new_state, &thr); + } /* force key state refresh */ - new_state.key_id= 0; + new_state.key_id = 0; /* return iops */ fil_crypt_return_iops(&thr); @@ -2287,10 +2194,16 @@ DECLARE_THREAD(fil_crypt_thread)( /* return iops if shutting down */ fil_crypt_return_iops(&thr); + /* release current space if shutting down */ + if (thr.space) { + fil_space_release(thr.space); + thr.space = NULL; + } + mutex_enter(&fil_crypt_threads_mutex); srv_n_fil_crypt_threads_started--; - mutex_exit(&fil_crypt_threads_mutex); os_event_set(fil_crypt_event); /* signal that we stopped */ + mutex_exit(&fil_crypt_threads_mutex); /* We count the number of threads in os_thread_exit(). A created thread should always use that to exit and not use return() to exit. */ @@ -2301,17 +2214,19 @@ DECLARE_THREAD(fil_crypt_thread)( } /********************************************************************* -Adjust thread count for key rotation */ +Adjust thread count for key rotation +@param[in] enw_cnt Number of threads to be used */ UNIV_INTERN void fil_crypt_set_thread_cnt( -/*=====================*/ - uint new_cnt) /*!< in: New key rotation thread count */ + const uint new_cnt) { if (!fil_crypt_threads_inited) { fil_crypt_threads_init(); } + mutex_enter(&fil_crypt_threads_mutex); + if (new_cnt > srv_n_fil_crypt_threads) { uint add = new_cnt - srv_n_fil_crypt_threads; srv_n_fil_crypt_threads = new_cnt; @@ -2328,6 +2243,8 @@ fil_crypt_set_thread_cnt( os_event_set(fil_crypt_threads_event); } + mutex_exit(&fil_crypt_threads_mutex); + while(srv_n_fil_crypt_threads_started != srv_n_fil_crypt_threads) { os_event_reset(fil_crypt_event); os_event_wait_time(fil_crypt_event, 1000000); @@ -2335,39 +2252,39 @@ fil_crypt_set_thread_cnt( } /********************************************************************* -Adjust max key age */ +Adjust max key age +@param[in] val New max key age */ UNIV_INTERN void fil_crypt_set_rotate_key_age( -/*=========================*/ - uint val) /*!< in: New max key age */ + uint val) { srv_fil_crypt_rotate_key_age = val; os_event_set(fil_crypt_threads_event); } /********************************************************************* -Adjust rotation iops */ +Adjust rotation iops +@param[in] val New max roation iops */ UNIV_INTERN void fil_crypt_set_rotation_iops( -/*========================*/ - uint val) /*!< in: New iops setting */ + uint val) { srv_n_fil_crypt_iops = val; os_event_set(fil_crypt_threads_event); } /********************************************************************* -Adjust encrypt tables */ +Adjust encrypt tables +@param[in] val New setting for innodb-encrypt-tables */ UNIV_INTERN void fil_crypt_set_encrypt_tables( -/*=========================*/ - uint val) /*!< in: New srv_encrypt_tables setting */ + uint val) { - srv_encrypt_tables = val; - os_event_set(fil_crypt_threads_event); + srv_encrypt_tables = val; + os_event_set(fil_crypt_threads_event); } /********************************************************************* @@ -2375,7 +2292,6 @@ Init threads for key rotation */ UNIV_INTERN void fil_crypt_threads_init() -/*====================*/ { if (!fil_crypt_threads_inited) { fil_crypt_event = os_event_create(0); @@ -2395,7 +2311,6 @@ Clean up key rotation threads resources */ UNIV_INTERN void fil_crypt_threads_cleanup() -/*=======================*/ { if (!fil_crypt_threads_inited) { return; @@ -2408,62 +2323,26 @@ fil_crypt_threads_cleanup() } /********************************************************************* -Mark a space as closing */ +Wait for crypt threads to stop accessing space +@param[in] space Tablespace */ UNIV_INTERN void -fil_space_crypt_mark_space_closing( -/*===============================*/ - ulint space, /*!< in: tablespace id */ - fil_space_crypt_t* crypt_data) /*!< in: crypt_data or NULL */ +fil_space_crypt_close_tablespace( + const fil_space_t* space) { - if (!fil_crypt_threads_inited) { - return; - } - - mutex_enter(&fil_crypt_threads_mutex); + fil_space_crypt_t* crypt_data = space->crypt_data; if (!crypt_data) { - crypt_data = fil_space_get_crypt_data(space); - } - - if (crypt_data == NULL) { - mutex_exit(&fil_crypt_threads_mutex); - return; - } - - mutex_enter(&crypt_data->mutex); - mutex_exit(&fil_crypt_threads_mutex); - crypt_data->closing = true; - mutex_exit(&crypt_data->mutex); -} - -/********************************************************************* -Wait for crypt threads to stop accessing space */ -UNIV_INTERN -void -fil_space_crypt_close_tablespace( -/*=============================*/ - ulint space) /*!< in: Space id */ -{ - if (!srv_encrypt_tables) { return; } mutex_enter(&fil_crypt_threads_mutex); - fil_space_crypt_t* crypt_data = fil_space_get_crypt_data(space); - - if (crypt_data == NULL || crypt_data->is_closing(false)) { - mutex_exit(&fil_crypt_threads_mutex); - return; - } - time_t start = time(0); time_t last = start; mutex_enter(&crypt_data->mutex); mutex_exit(&fil_crypt_threads_mutex); - crypt_data->closing = true; uint cnt = crypt_data->rotate_state.active_threads; bool flushing = crypt_data->rotate_state.flushing; @@ -2473,8 +2352,10 @@ fil_space_crypt_close_tablespace( /* release dict mutex so that scrub threads can release their * table references */ dict_mutex_exit_for_mysql(); + /* wakeup throttle (all) sleepers */ os_event_set(fil_crypt_throttle_sleep_event); + os_thread_sleep(20000); dict_mutex_enter_for_mysql(); mutex_enter(&crypt_data->mutex); @@ -2487,7 +2368,7 @@ fil_space_crypt_close_tablespace( ib::warn() << "Waited " << now - start << " seconds to drop space: " - << space << "."; + << space->name << "."; last = now; } } @@ -2497,22 +2378,23 @@ fil_space_crypt_close_tablespace( /********************************************************************* Get crypt status for a space (used by information_schema) -return 0 if crypt data present */ +@param[in] space Tablespace +@param[out] status Crypt status */ UNIV_INTERN -int +void fil_space_crypt_get_status( -/*=======================*/ - ulint id, /*!< in: space id */ - struct fil_space_crypt_status_t* status) /*!< out: status */ + const fil_space_t* space, + struct fil_space_crypt_status_t* status) { - fil_space_crypt_t* crypt_data = fil_space_get_crypt_data(id); - memset(status, 0, sizeof(*status)); + ut_ad(space->n_pending_ops > 0); + fil_space_crypt_t* crypt_data = space->crypt_data; + status->space = space->id; + if (crypt_data != NULL) { - status->space = id; - status->scheme = crypt_data->type; mutex_enter(&crypt_data->mutex); + status->scheme = crypt_data->type; status->keyserver_requests = crypt_data->keyserver_requests; status->min_key_version = crypt_data->min_key_version; status->key_id = crypt_data->key_id; @@ -2526,8 +2408,6 @@ fil_space_crypt_get_status( crypt_data->rotate_state.next_offset; status->rotate_max_page_number = crypt_data->rotate_state.max_offset; - } else { - status->rotating = false; } mutex_exit(&crypt_data->mutex); @@ -2535,25 +2415,17 @@ fil_space_crypt_get_status( if (srv_encrypt_tables || crypt_data->min_key_version) { status->current_key_version = fil_crypt_get_latest_key_version(crypt_data); - } else { - status->current_key_version = 0; - } - } else { - if (srv_encrypt_tables) { - os_event_set(fil_crypt_threads_event); } } - - return crypt_data == NULL ? 1 : 0; } /********************************************************************* -Return crypt statistics */ +Return crypt statistics +@param[out] stat Crypt statistics */ UNIV_INTERN void fil_crypt_total_stat( -/*=================*/ - fil_crypt_stat_t *stat) /*!< out: Crypt statistics */ + fil_crypt_stat_t *stat) { mutex_enter(&crypt_stat_mutex); *stat = crypt_stat; @@ -2562,23 +2434,24 @@ fil_crypt_total_stat( /********************************************************************* Get scrub status for a space (used by information_schema) -return 0 if data found */ + +@param[in] space Tablespace +@param[out] status Scrub status */ UNIV_INTERN -int +void fil_space_get_scrub_status( -/*=======================*/ - ulint id, /*!< in: space id */ - struct fil_space_scrub_status_t* status) /*!< out: status */ + const fil_space_t* space, + struct fil_space_scrub_status_t* status) { - fil_space_crypt_t* crypt_data = fil_space_get_crypt_data(id); - memset(status, 0, sizeof(*status)); + ut_ad(space->n_pending_ops > 0); + fil_space_crypt_t* crypt_data = space->crypt_data; + + status->space = space->id; + if (crypt_data != NULL) { - bool tsfound; - const page_size_t page_size = fil_space_get_page_size(id, &tsfound); - status->space = id; - status->compressed = page_size.is_compressed(); + status->compressed = FSP_FLAGS_GET_ZIP_SSIZE(space->flags) > 0; mutex_enter(&crypt_data->mutex); status->last_scrub_completed = crypt_data->rotate_state.scrubbing.last_scrub_completed; @@ -2593,102 +2466,168 @@ fil_space_get_scrub_status( crypt_data->rotate_state.next_offset; status->current_scrub_max_page_number = crypt_data->rotate_state.max_offset; - } else { - status->scrubbing = false; } mutex_exit(&crypt_data->mutex); } - - return crypt_data == NULL ? 1 : 0; } #endif /* UNIV_INNOCHECKSUM */ -/********************************************************************* -Verify checksum for a page (iff it's encrypted) -NOTE: currently this function can only be run in single threaded mode -as it modifies srv_checksum_algorithm (temporarily) -@param[in] src_fame page to verify -@param[in] page_size page_size -@param[in] page_no page number of given read_buf -@param[in] strict_check true if strict-check option is enabled +/** +Verify that post encryption checksum match calculated checksum. +This function should be called only if tablespace contains crypt_data +metadata (this is strong indication that tablespace is encrypted). +Function also verifies that traditional checksum does not match +calculated checksum as if it does page could be valid unencrypted, +encrypted, or corrupted. + +@param[in,out] page page frame (checksum is temporarily modified) +@param[in] page_size page size +@param[in] space tablespace identifier +@param[in] offset page number @return true if page is encrypted AND OK, false otherwise */ UNIV_INTERN bool fil_space_verify_crypt_checksum( -/*============================*/ - const byte* src_frame, /*!< in: page the verify */ - const page_size_t& page_size /*!< in: page size */ + byte* page, + const page_size_t& page_size, #ifdef UNIV_INNOCHECKSUM - ,uintmax_t page_no, - bool strict_check + bool strict_check, /*!< --strict-check */ + FILE* log_file, /*!< --log */ #endif /* UNIV_INNOCHECKSUM */ -) + ulint space, + ulint offset) { - // key version - uint key_version = mach_read_from_4( - src_frame + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION); + uint key_version = mach_read_from_4(page+ FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION); + /* If page is not encrypted, return false */ if (key_version == 0) { - return false; // unencrypted page + return false; } - /* "trick" the normal checksum routines by storing the post-encryption - * checksum into the normal checksum field allowing for reuse of - * the normal routines */ + srv_checksum_algorithm_t algorithm = + static_cast<srv_checksum_algorithm_t>(srv_checksum_algorithm); + /* If no checksum is used, can't continue checking. */ + if (algorithm == SRV_CHECKSUM_ALGORITHM_NONE) { + return(true); + } - // post encryption checksum - ib_uint32_t stored_post_encryption = mach_read_from_4( - src_frame + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION + 4); + /* Read stored post encryption checksum. */ + ib_uint32_t checksum = mach_read_from_4( + page + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION + 4); - // save pre encryption checksum for restore in end of this function - ib_uint32_t stored_pre_encryption = mach_read_from_4( - src_frame + FIL_PAGE_SPACE_OR_CHKSUM); + /* Declare empty pages non-corrupted */ + if (checksum == 0 + && *reinterpret_cast<const ib_uint64_t*>(page + FIL_PAGE_LSN) == 0 + && buf_page_is_zeroes(page, page_size)) { + return(true); + } - ib_uint32_t checksum_field2 = mach_read_from_4( - src_frame + UNIV_PAGE_SIZE - FIL_PAGE_END_LSN_OLD_CHKSUM); + /* Compressed and encrypted pages do not have checksum. Assume not + corrupted. Page verification happens after decompression in + buf_page_io_complete() using buf_page_is_corrupted(). */ + if (mach_read_from_2(page+FIL_PAGE_TYPE) == FIL_PAGE_PAGE_COMPRESSED_ENCRYPTED) { + return (true); + } - /** prepare frame for usage of normal checksum routines */ - mach_write_to_4(const_cast<byte*>(src_frame) + FIL_PAGE_SPACE_OR_CHKSUM, - stored_post_encryption); + /* Compressed pages use different checksum method. We first store + the post encryption checksum on checksum location and after function + restore the original. */ + if (page_size.is_compressed()) { + ib_uint32_t old = static_cast<ib_uint32_t>(mach_read_from_4( + page + FIL_PAGE_SPACE_OR_CHKSUM)); - /* NOTE: this function is (currently) only run when restoring - * dblwr-buffer, server is single threaded so it's safe to modify - * srv_checksum_algorithm */ - srv_checksum_algorithm_t save_checksum_algorithm = - (srv_checksum_algorithm_t)srv_checksum_algorithm; + mach_write_to_4(page + FIL_PAGE_SPACE_OR_CHKSUM, checksum); - if (!page_size.is_compressed() && - (save_checksum_algorithm == SRV_CHECKSUM_ALGORITHM_STRICT_INNODB || - save_checksum_algorithm == SRV_CHECKSUM_ALGORITHM_INNODB)) { - /* handle ALGORITHM_INNODB specially, - * "downgrade" to ALGORITHM_INNODB and store BUF_NO_CHECKSUM_MAGIC - * checksum_field2 is sort of pointless anyway... - */ - srv_checksum_algorithm = SRV_CHECKSUM_ALGORITHM_INNODB; - mach_write_to_4(const_cast<byte*>(src_frame) + - UNIV_PAGE_SIZE - FIL_PAGE_END_LSN_OLD_CHKSUM, - BUF_NO_CHECKSUM_MAGIC); + bool valid = page_zip_verify_checksum(page, + page_size.physical() +#ifdef UNIV_INNOCHECKSUM + , offset, + strict_check, + log_file != NULL, + log_file +#endif + ); + + mach_write_to_4(page + FIL_PAGE_SPACE_OR_CHKSUM, old); + + return (valid); } - /* verify checksums */ - bool corrupted = buf_page_is_corrupted(false, src_frame, - page_size, false -#ifdef UNIV_INNOCHECKSUM - ,page_no, strict_check, false, NULL -#endif /* UNIV_INNOCHECKSUM */ - ); + /* If stored checksum matches one of the calculated checksums + page is not corrupted. */ + + ib_uint32_t cchecksum1 = buf_calc_page_crc32(page); + ib_uint32_t cchecksum2 = (ib_uint32_t) buf_calc_page_new_checksum( + page); + bool encrypted = (checksum == cchecksum1 || checksum == cchecksum2 + || checksum == BUF_NO_CHECKSUM_MAGIC); + + /* MySQL 5.6 and MariaDB 10.0 and 10.1 will write an LSN to the + first page of each system tablespace file at + FIL_PAGE_FILE_FLUSH_LSN offset. On other pages and in other files, + the field might have been uninitialized until MySQL 5.5. In MySQL 5.7 + (and MariaDB Server 10.2.2) WL#7990 stopped writing the field for other + than page 0 of the system tablespace. - /** restore frame & algorithm */ - srv_checksum_algorithm = save_checksum_algorithm; + Starting from MariaDB 10.1 the field has been repurposed for + encryption key_version. - mach_write_to_4(const_cast<byte*>(src_frame) + - FIL_PAGE_SPACE_OR_CHKSUM, - stored_pre_encryption); + Starting with MySQL 5.7 (and MariaDB Server 10.2), the + field has been repurposed for SPATIAL INDEX pages for + FIL_RTREE_SPLIT_SEQ_NUM. - mach_write_to_4(const_cast<byte*>(src_frame) + - UNIV_PAGE_SIZE - FIL_PAGE_END_LSN_OLD_CHKSUM, - checksum_field2); + Note that FIL_PAGE_FILE_FLUSH_LSN is not included in the InnoDB page + checksum. + + Thus, FIL_PAGE_FILE_FLUSH_LSN could contain any value. While the + field would usually be 0 for pages that are not encrypted, we cannot + assume that a nonzero value means that the page is encrypted. + Therefore we must validate the page both as encrypted and unencrypted + when FIL_PAGE_FILE_FLUSH_LSN does not contain 0. + */ + + ulint checksum1 = mach_read_from_4( + page + FIL_PAGE_SPACE_OR_CHKSUM); + + ulint checksum2 = mach_read_from_4( + page + UNIV_PAGE_SIZE - FIL_PAGE_END_LSN_OLD_CHKSUM); + +#ifdef UNIV_INNOCHECKSUM +# define CKARGS page, checksum1, checksum2, \ + offset, log_file != NULL, log_file, algorithm +#else +# define CKARGS page, checksum1, checksum2 +#endif + + bool valid = buf_page_is_checksum_valid_crc32( + CKARGS, false + /* FIXME: also try the original crc32 that was + buggy on big-endian architectures? */) + || buf_page_is_checksum_valid_innodb(CKARGS); +#undef CKARGS + + if (encrypted && valid) { + /* If page is encrypted and traditional checksums match, + page could be still encrypted, or not encrypted and valid or + corrupted. */ +#ifdef UNIV_INNOCHECKSUM + fprintf(log_file ? log_file : stderr, + "Page " ULINTPF ":" ULINTPF " may be corrupted." + " Post encryption checksum %u" + " stored [" ULINTPF ":" ULINTPF "] key_version %u\n", + space, offset, checksum, checksum1, checksum2, + key_version); +#else /* UNIV_INNOCHECKSUM */ + ib::error() + << " Page " << space << ":" << offset + << " may be corrupted." + " Post encryption checksum " << checksum + << " stored [" << checksum1 << ":" << checksum2 + << "] key_version " << key_version; +#endif + encrypted = false; + } - return (!corrupted); + return(encrypted); } diff --git a/storage/innobase/fil/fil0fil.cc b/storage/innobase/fil/fil0fil.cc index 9413a2b3cba..b38899e6de4 100644 --- a/storage/innobase/fil/fil0fil.cc +++ b/storage/innobase/fil/fil0fil.cc @@ -1,7 +1,7 @@ /***************************************************************************** -Copyright (c) 1995, 2016, Oracle and/or its affiliates. -Copyright (c) 2013, 2017, MariaDB Corporation. +Copyright (c) 1995, 2016, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 2014, 2017, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -153,7 +153,11 @@ fil_addr_t fil_addr_null = {FIL_NULL, 0}; /** The tablespace memory cache. This variable is NULL before the module is initialized. */ -fil_system_t* fil_system = NULL; +UNIV_INTERN fil_system_t* fil_system = NULL; + +/** At this age or older a space/page will be rotated */ +UNIV_INTERN extern uint srv_fil_crypt_rotate_key_age; +UNIV_INTERN extern ib_mutex_t fil_crypt_threads_mutex; /** Determine if user has explicitly disabled fsync(). */ # define fil_buffering_disabled(s) \ @@ -241,18 +245,12 @@ fil_node_prepare_for_io( fil_system_t* system, /*!< in: tablespace memory cache */ fil_space_t* space); /*!< in: space */ -/** -Updates the data structures when an i/o operation finishes. Updates the -pending i/o's field in the node appropriately. +/** Update the data structures when an i/o operation finishes. @param[in,out] node file node -@param[in,out] system tablespace instance @param[in] type IO context */ static void -fil_node_complete_io( - fil_node_t* node, - fil_system_t* system, - const IORequest& type); +fil_node_complete_io(fil_node_t* node, const IORequest& type); /** Reads data from a space to a buffer. Remember that the possible incomplete blocks at the end of file are ignored: they are not taken into account when @@ -654,6 +652,18 @@ retry: page, FSP_FREE_LIMIT); const ulint free_len = flst_get_len( FSP_HEADER_OFFSET + FSP_FREE + page); + + /* Try to read crypt_data from page 0 if it is not yet + read. FIXME: Remove page_0_crypt_read, and simply ensure in + fil_space_t object creation that node->size==0 if and only + if the crypt_data is not known and must be read. */ + if (!space->page_0_crypt_read) { + space->page_0_crypt_read = true; + ut_ad(space->crypt_data == NULL); + space->crypt_data = fil_space_read_crypt_data( + page_size_t(space->flags), page); + } + ut_free(buf2); os_file_close(node->handle); @@ -993,61 +1003,6 @@ skip_flush: space->n_pending_flushes--; } -/** -Fill the pages with NULs -@param[in] node File node -@param[in] page_size physical page size -@param[in] start Offset from the start of the file in bytes -@param[in] len Length in bytes -@param[in] read_only_mode - if true, then read only mode checks are enforced. -@return DB_SUCCESS or error code */ -static -dberr_t -fil_write_zeros( - const fil_node_t* node, - ulint page_size, - os_offset_t start, - ulint len, - bool read_only_mode) -{ - ut_a(len > 0); - - /* Extend at most 1M at a time */ - ulint n_bytes = ut_min(static_cast<ulint>(1024 * 1024), len); - byte* ptr = reinterpret_cast<byte*>(ut_zalloc_nokey(n_bytes - + page_size)); - byte* buf = reinterpret_cast<byte*>(ut_align(ptr, page_size)); - - os_offset_t offset = start; - dberr_t err = DB_SUCCESS; - const os_offset_t end = start + len; - IORequest request(IORequest::WRITE); - - while (offset < end) { - err = os_aio( - request, OS_AIO_SYNC, node->name, - node->handle, buf, offset, n_bytes, read_only_mode, - NULL, NULL); - - if (err != DB_SUCCESS) { - break; - } - - offset += n_bytes; - - n_bytes = ut_min(n_bytes, static_cast<ulint>(end - offset)); - - DBUG_EXECUTE_IF("ib_crash_during_tablespace_extension", - DBUG_SUICIDE();); - } - - ut_free(ptr); - - return(err); -} - - /** Try to extend a tablespace. @param[in,out] space tablespace to be extended @param[in,out] node last file of the tablespace @@ -1098,79 +1053,177 @@ fil_space_extend_must_retry( ut_ad(size > space->size); - ulint pages_added = size - space->size; + ulint last_page_no = space->size; + const ulint file_start_page_no = last_page_no - node->size; + + /* Determine correct file block size */ + if (node->block_size == 0) { + node->block_size = os_file_get_block_size( + node->handle, node->name); + } + const page_size_t pageSize(space->flags); const ulint page_size = pageSize.physical(); - os_offset_t start = os_file_get_size(node->handle); - ut_a(start != (os_offset_t) -1); - start &= ~(page_size - 1); - const os_offset_t end - = (node->size + pages_added) * page_size; +#ifdef _WIN32 + /* Logically or physically extend the file with zero bytes, + depending on whether it is sparse. */ - *success = end <= start; + /* FIXME: Call DeviceIoControl(node->handle, FSCTL_SET_SPARSE, ...) + when opening a file when FSP_FLAGS_HAS_PAGE_COMPRESSION(). */ + { + FILE_END_OF_FILE_INFO feof; + /* fil_read_first_page() expects UNIV_PAGE_SIZE bytes. + fil_node_open_file() expects at least 4 * UNIV_PAGE_SIZE bytes. + Do not shrink short ROW_FORMAT=COMPRESSED files. */ + feof.EndOfFile.QuadPart = std::max( + os_offset_t(size - file_start_page_no) * page_size, + os_offset_t(FIL_IBD_FILE_INITIAL_SIZE + * UNIV_PAGE_SIZE)); + *success = SetFileInformationByHandle(node->handle, + FileEndOfFileInfo, + &feof, sizeof feof); + if (!*success) { + ib::error() << "extending file '" << node->name + << "' from " + << os_offset_t(node->size) * page_size + << " to " << feof.EndOfFile.QuadPart + << " bytes failed with " << GetLastError(); + } else { + last_page_no = size; + } + } +#else + /* We will logically extend the file with ftruncate() if + page_compression is enabled, because the file is expected to + be sparse in that case. Make sure that ftruncate() can deal + with large files. */ + const bool is_sparse = sizeof(off_t) >= 8 + && FSP_FLAGS_HAS_PAGE_COMPRESSION(space->flags); + + if (is_sparse) { + /* fil_read_first_page() expects UNIV_PAGE_SIZE bytes. + fil_node_open_file() expects at least 4 * UNIV_PAGE_SIZE bytes. + Do not shrink short ROW_FORMAT=COMPRESSED files. */ + off_t s = std::max(off_t(size - file_start_page_no) + * off_t(page_size), + off_t(FIL_IBD_FILE_INITIAL_SIZE + * UNIV_PAGE_SIZE)); + *success = !ftruncate(node->handle, s); + if (!*success) { + ib::error() << "ftruncate of file '" << node->name + << "' from " + << os_offset_t(last_page_no + - file_start_page_no) + * page_size << " to " << os_offset_t(s) + << " bytes failed with " << errno; + } else { + last_page_no = size; + } + } else { + const os_offset_t start_offset + = os_offset_t(last_page_no - file_start_page_no) + * page_size; + const ulint n_pages = size - last_page_no; + const os_offset_t len = os_offset_t(n_pages) * page_size; +# ifdef HAVE_POSIX_FALLOCATE + int err; + do { + err = posix_fallocate(node->handle, start_offset, len); + } while (err == EINTR + && srv_shutdown_state == SRV_SHUTDOWN_NONE); + + if (err != EINVAL) { + + *success = !err; + if (!*success) { + ib::error() << "extending file '" << node->name + << "' from " + << start_offset + << " to " << len + start_offset + << " bytes failed with: " << err; + } + } else +# endif /* HAVE_POSIX_FALLOCATE */ + { + /* Extend at most 1 megabyte pages at a time */ + ulint n_bytes = std::min(ulint(1) << 20, n_pages) + * page_size; + byte* buf2 = static_cast<byte*>( + calloc(1, n_bytes + page_size)); + *success = buf2 != NULL; + if (!buf2) { + ib::error() << "Cannot allocate " + << n_bytes + page_size + << " bytes to extend file"; + } + byte* const buf = static_cast<byte*>( + ut_align(buf2, page_size)); + IORequest request(IORequest::WRITE); - if (!*success) { - DBUG_EXECUTE_IF("ib_crash_during_tablespace_extension", - DBUG_SUICIDE();); -#ifdef HAVE_POSIX_FALLOCATE - /* On Linux, FusionIO atomic writes cannot extend - files, so we must use posix_fallocate(). */ - int ret = posix_fallocate(node->handle, start, - end - start); - - /* EINVAL means that fallocate() is not supported. - One known case is Linux ext3 file system with O_DIRECT. */ - if (ret == 0) { - } else if (ret != EINVAL) { - ib::error() - << "posix_fallocate(): Failed to preallocate" - " data for file " - << node->name << ", desired size " - << end << " bytes." - " Operating system error number " - << ret << ". Check" - " that the disk is not full or a disk quota" - " exceeded. Some operating system error" - " numbers are described at " REFMAN - "operating-system-error-codes.html"; - } else -#endif - if (DB_SUCCESS != fil_write_zeros( - node, page_size, start, - static_cast<ulint>(end - start), - space->purpose == FIL_TYPE_TEMPORARY - && srv_read_only_mode)) { - ib::warn() - << "Error while writing " << end - start - << " zeroes to " << node->name - << " starting at offset " << start; - } + os_offset_t offset = start_offset; + const os_offset_t end = start_offset + len; + const bool read_only_mode = space->purpose + == FIL_TYPE_TEMPORARY && srv_read_only_mode; - /* Check how many pages actually added */ - os_offset_t actual_end = os_file_get_size(node->handle); - ut_a(actual_end != static_cast<os_offset_t>(-1)); - ut_a(actual_end >= start); + while (*success && offset < end) { + dberr_t err = os_aio( + request, OS_AIO_SYNC, node->name, + node->handle, buf, offset, n_bytes, + read_only_mode, NULL, NULL); - *success = end >= actual_end; - pages_added = static_cast<ulint>( - (std::min(actual_end, end) - start) / page_size); - } + if (err != DB_SUCCESS) { + *success = false; + ib::error() << "writing zeroes to file '" + << node->name << "' from " + << offset << " to " << offset + n_bytes + << " bytes failed with: " + << ut_strerr(err); + break; + } - os_has_said_disk_full = !*success; + offset += n_bytes; - mutex_enter(&fil_system->mutex); + n_bytes = std::min(n_bytes, + static_cast<ulint>(end - offset)); + } - space->size += pages_added; + free(buf2); + } + + os_has_said_disk_full = *success; + if (*success) { + last_page_no = size; + } else { + /* Let us measure the size of the file + to determine how much we were able to + extend it */ + os_offset_t fsize = os_file_get_size(node->handle); + ut_a(fsize != os_offset_t(-1)); + + last_page_no = ulint(fsize / page_size) + + file_start_page_no; + } + } +#endif + mutex_enter(&fil_system->mutex); ut_a(node->being_extended); node->being_extended = false; - node->size += pages_added; + ut_a(last_page_no - file_start_page_no >= node->size); + + ulint file_size = last_page_no - file_start_page_no; + space->size += file_size - node->size; + node->size = file_size; const ulint pages_in_MiB = node->size & ~((1 << (20 - UNIV_PAGE_SIZE_SHIFT)) - 1); - fil_node_complete_io(node, fil_system, IORequestWrite); + fil_node_complete_io(node, +#ifndef _WIN32 + !is_sparse ? IORequestWrite : +#endif /* _WIN32 */ + IORequestRead); /* Keep the last data file size info up to date, rounded to full megabytes */ @@ -1423,6 +1476,12 @@ fil_space_detach( UT_LIST_REMOVE(fil_system->unflushed_spaces, space); } + if (space->is_in_rotation_list) { + space->is_in_rotation_list = false; + + UT_LIST_REMOVE(fil_system->rotation_list, space); + } + UT_LIST_REMOVE(fil_system->space_list, space); ut_a(space->magic_n == FIL_SPACE_MAGIC_N); @@ -1518,22 +1577,25 @@ fil_space_free( } /** Create a space memory object and put it to the fil_system hash table. -The tablespace name is independent from the tablespace file-name. Error messages are issued to the server log. -@param[in] name Tablespace name -@param[in] id Tablespace identifier -@param[in] flags Tablespace flags -@param[in] purpose Tablespace purpose +@param[in] name tablespace name +@param[in] id tablespace identifier +@param[in] flags tablespace flags +@param[in] purpose tablespace purpose +@param[in,out] crypt_data encryption information +@param[in] create_table whether this is CREATE TABLE +@param[in] mode encryption mode @return pointer to created tablespace, to be filled in with fil_node_create() @retval NULL on failure (such as when the same tablespace exists) */ fil_space_t* fil_space_create( - const char* name, - ulint id, - ulint flags, - fil_type_t purpose, - fil_space_crypt_t* crypt_data, /*!< in: crypt data */ - bool create_table) /*!< in: true if create table */ + const char* name, + ulint id, + ulint flags, + fil_type_t purpose, + fil_space_crypt_t* crypt_data, + bool create_table, + fil_encryption_t mode) { fil_space_t* space; @@ -1595,7 +1657,6 @@ fil_space_create( space->flags = flags; space->magic_n = FIL_SPACE_MAGIC_N; - space->crypt_data = crypt_data; /* In create table we write page 0 so we have already @@ -1635,7 +1696,23 @@ fil_space_create( fil_system->max_assigned_id = id; } - mutex_exit(&fil_system->mutex); + /* Inform key rotation that there could be something + to do */ + if (purpose == FIL_TYPE_TABLESPACE + && !srv_fil_crypt_rotate_key_age && fil_crypt_threads_event && + (mode == FIL_ENCRYPTION_ON || mode == FIL_ENCRYPTION_OFF || + srv_encrypt_tables)) { + /* Key rotation is not enabled, need to inform background + encryption threads. */ + UT_LIST_ADD_LAST(fil_system->rotation_list, space); + space->is_in_rotation_list = true; + mutex_exit(&fil_system->mutex); + mutex_enter(&fil_crypt_threads_mutex); + os_event_set(fil_crypt_threads_event); + mutex_exit(&fil_crypt_threads_mutex); + } else { + mutex_exit(&fil_system->mutex); + } return(space); } @@ -1750,7 +1827,7 @@ fil_space_get_space( return(NULL); } - fil_node_complete_io(node, fil_system, IORequestRead); + fil_node_complete_io(node, IORequestRead); } return(space); @@ -1972,6 +2049,7 @@ fil_init( UT_LIST_INIT(fil_system->LRU, &fil_node_t::LRU); UT_LIST_INIT(fil_system->space_list, &fil_space_t::space_list); + UT_LIST_INIT(fil_system->rotation_list, &fil_space_t::rotation_list); UT_LIST_INIT(fil_system->unflushed_spaces, &fil_space_t::unflushed_spaces); UT_LIST_INIT(fil_system->named_spaces, &fil_space_t::named_spaces); @@ -2546,9 +2624,7 @@ fil_recreate_tablespace( page_zip.m_start = #endif /* UNIV_DEBUG */ page_zip.m_end = page_zip.m_nonempty = page_zip.n_blobs = 0; - buf_flush_init_for_writing( - NULL, page, &page_zip, 0, - fsp_is_checksum_disabled(space_id)); + buf_flush_init_for_writing(NULL, page, &page_zip, 0); err = fil_write(page_id_t(space_id, 0), page_size, 0, page_size.physical(), page_zip.data); @@ -2611,7 +2687,7 @@ fil_recreate_tablespace( ut_ad(!page_size.is_compressed()); buf_flush_init_for_writing( - block, page, NULL, recv_lsn, false); + block, page, NULL, recv_lsn); err = fil_write(cur_page_id, page_size, 0, page_size.physical(), page); @@ -2625,8 +2701,7 @@ fil_recreate_tablespace( buf_block_get_page_zip(block); buf_flush_init_for_writing( - block, page, page_zip, recv_lsn, - fsp_is_checksum_disabled(space_id)); + block, page, page_zip, recv_lsn); err = fil_write(cur_page_id, page_size, 0, page_size.physical(), @@ -2854,16 +2929,22 @@ fil_check_pending_operations( mutex_enter(&fil_system->mutex); fil_space_t* sp = fil_space_get_by_id(id); + if (sp) { sp->stop_new_ops = true; + if (sp->crypt_data) { + sp->n_pending_ops++; + mutex_exit(&fil_system->mutex); + fil_space_crypt_close_tablespace(sp); + mutex_enter(&fil_system->mutex); + ut_ad(sp->n_pending_ops > 0); + sp->n_pending_ops--; + } } - mutex_exit(&fil_system->mutex); /* Check for pending operations. */ do { - mutex_enter(&fil_system->mutex); - sp = fil_space_get_by_id(id); count = fil_check_pending_ops(sp, count); @@ -2874,15 +2955,14 @@ fil_check_pending_operations( os_thread_sleep(20000); } + mutex_enter(&fil_system->mutex); } while (count > 0); /* Check for pending IO. */ *path = 0; - do { - mutex_enter(&fil_system->mutex); - + for (;;) { sp = fil_space_get_by_id(id); if (sp == NULL) { @@ -2900,11 +2980,13 @@ fil_check_pending_operations( mutex_exit(&fil_system->mutex); - if (count > 0) { - os_thread_sleep(20000); + if (count == 0) { + break; } - } while (count > 0); + os_thread_sleep(20000); + mutex_enter(&fil_system->mutex); + } ut_ad(sp); @@ -3801,9 +3883,7 @@ fil_ibd_create( if (!page_size.is_compressed()) { - buf_flush_init_for_writing( - NULL, page, NULL, 0, - fsp_is_checksum_disabled(space_id)); + buf_flush_init_for_writing(NULL, page, NULL, 0); err = os_file_write( request, path, file, page, 0, page_size.physical()); @@ -3817,9 +3897,7 @@ fil_ibd_create( page_zip.m_end = page_zip.m_nonempty = page_zip.n_blobs = 0; - buf_flush_init_for_writing( - NULL, page, &page_zip, 0, - fsp_is_checksum_disabled(space_id)); + buf_flush_init_for_writing(NULL, page, &page_zip, 0); err = os_file_write( request, path, file, page_zip.data, 0, @@ -3863,13 +3941,13 @@ fil_ibd_create( /* Create crypt data if the tablespace is either encrypted or user has requested it to remain unencrypted. */ - if (mode == FIL_SPACE_ENCRYPTION_ON || mode == FIL_SPACE_ENCRYPTION_OFF || + if (mode == FIL_ENCRYPTION_ON || mode == FIL_ENCRYPTION_OFF || srv_encrypt_tables) { crypt_data = fil_space_create_crypt_data(mode, key_id); } space = fil_space_create(name, space_id, flags, FIL_TYPE_TABLESPACE, - crypt_data, true); + crypt_data, true, mode); fil_node_t* node = NULL; @@ -4957,19 +5035,14 @@ fil_node_prepare_for_io( return(true); } -/********************************************************************//** -Updates the data structures when an i/o operation finishes. Updates the -pending i/o's field in the node appropriately. */ +/** Update the data structures when an i/o operation finishes. +@param[in,out] node file node +@param[in] type IO context */ static void -fil_node_complete_io( -/*=================*/ - fil_node_t* node, /*!< in: file node */ - fil_system_t* system, /*!< in: tablespace memory cache */ - const IORequest&type) /*!< in: IO_TYPE_*, marks the node as - modified if TYPE_IS_WRITE() */ +fil_node_complete_io(fil_node_t* node, const IORequest& type) { - ut_ad(mutex_own(&system->mutex)); + ut_ad(mutex_own(&fil_system->mutex)); ut_a(node->n_pending > 0); --node->n_pending; @@ -4981,9 +5054,9 @@ fil_node_complete_io( ut_ad(!srv_read_only_mode || fsp_is_system_temporary(node->space->id)); - ++system->modification_counter; + ++fil_system->modification_counter; - node->modification_counter = system->modification_counter; + node->modification_counter = fil_system->modification_counter; if (fil_buffering_disabled(node->space)) { @@ -4998,14 +5071,14 @@ fil_node_complete_io( node->space->is_in_unflushed_spaces = true; UT_LIST_ADD_FIRST( - system->unflushed_spaces, node->space); + fil_system->unflushed_spaces, node->space); } } if (node->n_pending == 0 && fil_space_belongs_in_lru(node->space)) { /* The node must be put back to the LRU list */ - UT_LIST_ADD_FIRST(system->LRU, node); + UT_LIST_ADD_FIRST(fil_system->LRU, node); } } @@ -5247,7 +5320,7 @@ fil_io( /* If we can tolerate the non-existent pages, we should return with DB_ERROR and let caller decide what to do. */ - fil_node_complete_io(node, fil_system, req_type); + fil_node_complete_io(node, req_type); mutex_exit(&fil_system->mutex); return(DB_ERROR); } @@ -5320,7 +5393,7 @@ fil_io( mutex_enter(&fil_system->mutex); - fil_node_complete_io(node, fil_system, req_type); + fil_node_complete_io(node, req_type); mutex_exit(&fil_system->mutex); @@ -5360,7 +5433,7 @@ fil_aio_wait( mutex_enter(&fil_system->mutex); - fil_node_complete_io(node, fil_system, type); + fil_node_complete_io(node, type); mutex_exit(&fil_system->mutex); @@ -5793,7 +5866,8 @@ fil_iterate( || page_type == FIL_PAGE_PAGE_COMPRESSED); /* If tablespace is encrypted, we need to decrypt - the page. */ + the page. Note that tablespaces are not in + fil_system during import. */ if (encrypted) { decrypted = fil_space_decrypt( iter.crypt_data, @@ -6070,9 +6144,7 @@ fil_tablespace_iterate( /* read (optional) crypt data */ iter.crypt_data = fil_space_read_crypt_data( - 0, page, FSP_HEADER_OFFSET - + fsp_header_get_encryption_offset( - callback.get_page_size())); + callback.get_page_size(), page); if (err == DB_SUCCESS) { @@ -6107,10 +6179,12 @@ fil_tablespace_iterate( err = fil_iterate(iter, block, callback); + if (iter.crypt_data) { + fil_space_destroy_crypt_data(&iter.crypt_data); + } + ut_free(io_buffer); ut_free(crypt_io_buffer); - - fil_space_destroy_crypt_data(&iter.crypt_data); } } @@ -6618,269 +6692,138 @@ fil_space_t::release_free_extents(ulint n_reserved) n_reserved_extents -= n_reserved; } -/****************************************************************** -Get crypt data for a tablespace */ -UNIV_INTERN -fil_space_crypt_t* -fil_space_get_crypt_data( -/*=====================*/ - ulint id) /*!< in: space id */ +/** Return the next fil_space_t. +Once started, the caller must keep calling this until it returns NULL. +fil_space_acquire() and fil_space_release() are invoked here which +blocks a concurrent operation from dropping the tablespace. +@param[in] prev_space Pointer to the previous fil_space_t. +If NULL, use the first fil_space_t on fil_system->space_list. +@return pointer to the next fil_space_t. +@retval NULL if this was the last*/ +fil_space_t* +fil_space_next( + fil_space_t* prev_space) { - fil_space_t* space; - fil_space_crypt_t* crypt_data = NULL; - - ut_ad(fil_system); + fil_space_t* space=prev_space; mutex_enter(&fil_system->mutex); - space = fil_space_get_by_id(id); + if (prev_space == NULL) { + space = UT_LIST_GET_FIRST(fil_system->space_list); - mutex_exit(&fil_system->mutex); + /* We can trust that space is not NULL because at least the + system tablespace is always present and loaded first. */ + space->n_pending_ops++; + } else { + ut_ad(space->n_pending_ops > 0); - if (space != NULL) { - /* If we have not yet read the page0 - of this tablespace we will do it now. */ - if (!space->crypt_data && !space->page_0_crypt_read) { - ulint space_id = space->id; - fil_node_t* node; - - ut_a(space->crypt_data == NULL); - node = UT_LIST_GET_FIRST(space->chain); - - byte *buf = static_cast<byte*>(ut_malloc(2 * UNIV_PAGE_SIZE, PSI_INSTRUMENT_ME)); - byte *page = static_cast<byte*>(ut_align(buf, UNIV_PAGE_SIZE)); - fil_read(page_id_t(space_id, 0), univ_page_size, 0, univ_page_size.physical(), - page); - ulint offset = FSP_HEADER_OFFSET - + fsp_header_get_encryption_offset( - page_size_t(space->flags)); - space->crypt_data = fil_space_read_crypt_data(space_id, page, offset); - ut_free(buf); - - DBUG_LOG("crypt", - "Read page 0 from" - << " tablespace " << space_id - << " name " << space->name - << " key_id " << (space->crypt_data - ? space->crypt_data->key_id - : 0) - << " encryption " - << (space->crypt_data - ? space->crypt_data->encryption : 0) - << " handle " << node->handle); - - ut_a(space->id == space_id); + /* Move on to the next fil_space_t */ + space->n_pending_ops--; + space = UT_LIST_GET_NEXT(space_list, space); - space->page_0_crypt_read = true; + /* Skip spaces that are being created by + fil_ibd_create(), or dropped, or !tablespace. */ + while (space != NULL + && (UT_LIST_GET_LEN(space->chain) == 0 + || space->stop_new_ops + || space->purpose != FIL_TYPE_TABLESPACE)) { + space = UT_LIST_GET_NEXT(space_list, space); } - crypt_data = space->crypt_data; - - if (!space->page_0_crypt_read) { - ib::warn() << "Space " << space->id << " name " - << space->name << " contains encryption " - << (space->crypt_data ? space->crypt_data->encryption : 0) - << " information for key_id " - << (space->crypt_data ? space->crypt_data->key_id : 0) - << " but page0 is not read."; + if (space != NULL) { + space->n_pending_ops++; } } - return(crypt_data); -} - -/*******************************************************************//** -Increments the count of pending operation, if space is not being deleted. -@return TRUE if being deleted, and operation should be skipped */ -UNIV_INTERN -ibool -fil_inc_pending_ops( -/*================*/ - ulint id, /*!< in: space id */ - ibool print_err) /*!< in: need to print error or not */ -{ - fil_space_t* space; - - mutex_enter(&fil_system->mutex); - - space = fil_space_get_by_id(id); - - if (space == NULL) { - if (print_err) { - fprintf(stderr, - "InnoDB: Error: trying to do an operation on a" - " dropped tablespace %lu\n", - (ulong) id); - } - } - - if (space == NULL || space->stop_new_ops) { - mutex_exit(&fil_system->mutex); - - return(TRUE); - } - - space->n_pending_ops++; - mutex_exit(&fil_system->mutex); - return(FALSE); + return(space); } -/*******************************************************************//** -Decrements the count of pending operations. */ -UNIV_INTERN +/** +Remove space from key rotation list if there are no more +pending operations. +@param[in,out] space Tablespace */ +static void -fil_decr_pending_ops( -/*=================*/ - ulint id) /*!< in: space id */ +fil_space_remove_from_keyrotation(fil_space_t* space) { - fil_space_t* space; - - mutex_enter(&fil_system->mutex); - - space = fil_space_get_by_id(id); - - if (space == NULL) { - fprintf(stderr, - "InnoDB: Error: decrementing pending operation" - " of a dropped tablespace %lu\n", - (ulong) id); - } + ut_ad(mutex_own(&fil_system->mutex)); + ut_ad(space); - if (space != NULL) { - space->n_pending_ops--; + if (space->n_pending_ops == 0 && space->is_in_rotation_list) { + space->is_in_rotation_list = false; + ut_a(UT_LIST_GET_LEN(fil_system->rotation_list) > 0); + UT_LIST_REMOVE(fil_system->rotation_list, space); } - - mutex_exit(&fil_system->mutex); } -/****************************************************************** -Set crypt data for a tablespace */ -UNIV_INTERN -fil_space_crypt_t* -fil_space_set_crypt_data( -/*=====================*/ - ulint id, /*!< in: space id */ - fil_space_crypt_t* crypt_data) /*!< in: crypt data */ -{ - fil_space_t* space; - fil_space_crypt_t* free_crypt_data = NULL; - fil_space_crypt_t* ret_crypt_data = NULL; - ut_ad(fil_system); +/** Return the next fil_space_t from key rotation list. +Once started, the caller must keep calling this until it returns NULL. +fil_space_acquire() and fil_space_release() are invoked here which +blocks a concurrent operation from dropping the tablespace. +@param[in] prev_space Pointer to the previous fil_space_t. +If NULL, use the first fil_space_t on fil_system->space_list. +@return pointer to the next fil_space_t. +@retval NULL if this was the last*/ +fil_space_t* +fil_space_keyrotate_next( + fil_space_t* prev_space) +{ + fil_space_t* space = prev_space; + fil_space_t* old = NULL; mutex_enter(&fil_system->mutex); - space = fil_space_get_by_id(id); - - if (space != NULL) { - if (space->crypt_data != NULL) { - /* Here we need to release fil_system mutex to - avoid mutex deadlock assertion. Here we would - taje mutexes in order fil_system, crypt_data and - in fil_crypt_start_encrypting_space we would - take them in order crypt_data, fil_system - at fil_space_get_flags -> fil_space_get_space */ - mutex_exit(&fil_system->mutex); - fil_space_merge_crypt_data(space->crypt_data, - crypt_data); - ret_crypt_data = space->crypt_data; - free_crypt_data = crypt_data; - } else { - space->crypt_data = crypt_data; - ret_crypt_data = space->crypt_data; - mutex_exit(&fil_system->mutex); + if (UT_LIST_GET_LEN(fil_system->rotation_list) == 0) { + if (space) { + ut_ad(space->n_pending_ops > 0); + space->n_pending_ops--; + fil_space_remove_from_keyrotation(space); } - } else { - /* there is a small risk that tablespace has been deleted */ - free_crypt_data = crypt_data; mutex_exit(&fil_system->mutex); + return(NULL); } - if (free_crypt_data != NULL) { - /* there was already crypt data present and the new crypt - * data provided as argument to this function has been merged - * into that => free new crypt data - */ - fil_space_destroy_crypt_data(&free_crypt_data); - } - - return ret_crypt_data; -} + if (prev_space == NULL) { + space = UT_LIST_GET_FIRST(fil_system->rotation_list); -/****************************************************************** -Get id of first tablespace that has node or ULINT_UNDEFINED if none */ -UNIV_INTERN -ulint -fil_get_first_space_safe() -/*======================*/ -{ - ulint out_id = ULINT_UNDEFINED; - fil_space_t* space; + /* We can trust that space is not NULL because we + checked list length above */ + } else { + ut_ad(space->n_pending_ops > 0); - mutex_enter(&fil_system->mutex); + /* Move on to the next fil_space_t */ + space->n_pending_ops--; - space = UT_LIST_GET_FIRST(fil_system->space_list); - if (space != NULL) { - do - { - if (!space->stop_new_ops && UT_LIST_GET_LEN(space->chain) > 0) { - out_id = space->id; - break; - } + old = space; + space = UT_LIST_GET_NEXT(rotation_list, space); - space = UT_LIST_GET_NEXT(space_list, space); - } while (space != NULL); + fil_space_remove_from_keyrotation(old); } - mutex_exit(&fil_system->mutex); - - return out_id; -} - -/****************************************************************** -Get id of next tablespace that has node or ULINT_UNDEFINED if none */ -UNIV_INTERN -ulint -fil_get_next_space_safe( -/*====================*/ - ulint id) /*!< in: previous space id */ -{ - bool found; - fil_space_t* space; - ulint out_id = ULINT_UNDEFINED; - - mutex_enter(&fil_system->mutex); + /* Skip spaces that are being created by fil_ibd_create(), + or dropped or truncated. Note that rotation_list contains only + space->purpose == FIL_TYPE_TABLESPACE. */ + while (space != NULL + && (UT_LIST_GET_LEN(space->chain) == 0 + || space->is_stopping())) { - space = fil_space_get_by_id(id); - if (space == NULL) { - /* we didn't find it...search for space with space->id > id */ - found = false; - space = UT_LIST_GET_FIRST(fil_system->space_list); - } else { - /* we found it, take next available space */ - found = true; + old = space; + space = UT_LIST_GET_NEXT(rotation_list, space); + fil_space_remove_from_keyrotation(old); } - while ((space = UT_LIST_GET_NEXT(space_list, space)) != NULL) { - - if (!found && space->id <= id) - continue; - - if (!space->stop_new_ops) { - /* inc reference to prevent drop */ - out_id = space->id; - break; - } + if (space != NULL) { + space->n_pending_ops++; } mutex_exit(&fil_system->mutex); - return out_id; + return(space); } - /********************************************************************//** Find correct node from file space @return node */ @@ -6981,26 +6924,6 @@ fil_space_found_by_id( return space; } -/****************************************************************//** -Acquire fil_system mutex */ -void -fil_system_enter(void) -/*==================*/ -{ - ut_ad(!mutex_own(&fil_system->mutex)); - mutex_enter(&fil_system->mutex); -} - -/****************************************************************//** -Release fil_system mutex */ -void -fil_system_exit(void) -/*=================*/ -{ - ut_ad(mutex_own(&fil_system->mutex)); - mutex_exit(&fil_system->mutex); -} - /** Get should we punch hole to tablespace. @param[in] node File node diff --git a/storage/innobase/fil/fil0pagecompress.cc b/storage/innobase/fil/fil0pagecompress.cc index 1eb9ec37f5d..39a02aa40df 100644 --- a/storage/innobase/fil/fil0pagecompress.cc +++ b/storage/innobase/fil/fil0pagecompress.cc @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (C) 2013, 2017, MariaDB Corporation. All Rights Reserved. +Copyright (C) 2013, 2017, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -309,9 +309,8 @@ fil_compress_page( fil_decompress_page(uncomp_page, comp_page, len, NULL); - if(buf_page_is_corrupted(false, uncomp_page, page_size, false)) { - buf_page_print(uncomp_page, page_size, BUF_PAGE_PRINT_NO_CRASH); - ut_error; + if (buf_page_is_corrupted(false, uncomp_page, page_size, space)) { + buf_page_print(uncomp_page, page_size, 0); } ut_free(comp_page); @@ -657,7 +656,5 @@ err_exit: << " compression method: " << fil_get_compression_alg_name(compression_alg) << "."; - buf_page_print(buf, page_size, BUF_PAGE_PRINT_NO_CRASH); - - ut_error; + buf_page_print(buf, page_size, 0); } diff --git a/storage/innobase/fsp/fsp0file.cc b/storage/innobase/fsp/fsp0file.cc index 74c153a65d5..b8ad49a254f 100644 --- a/storage/innobase/fsp/fsp0file.cc +++ b/storage/innobase/fsp/fsp0file.cc @@ -372,9 +372,7 @@ Datafile::read_first_page(bool read_only_mode) return(DB_CORRUPTION); } - m_crypt_info = fil_space_read_crypt_data( - m_space_id, m_first_page, - FSP_HEADER_OFFSET + fsp_header_get_encryption_offset(ps)); + m_crypt_info = fil_space_read_crypt_data(ps, m_first_page); return(err); } @@ -574,9 +572,7 @@ Datafile::validate_first_page(lsn_t* flush_lsn) /* The space_id can be most anything, except -1. */ error_txt = "A bad Space ID was found"; - } else if (buf_page_is_corrupted( - false, m_first_page, page_size, - fsp_is_checksum_disabled(m_space_id))) { + } else if (buf_page_is_corrupted(false, m_first_page, page_size)) { /* Look for checksum and other corruptions. */ error_txt = "Checksum mismatch"; @@ -701,7 +697,7 @@ Datafile::find_space_id() equal to univ_page_size.physical(). */ if (page_size == univ_page_size.physical()) { noncompressed_ok = !buf_page_is_corrupted( - false, page, univ_page_size, false); + false, page, univ_page_size, NULL); } bool compressed_ok = false; @@ -721,7 +717,7 @@ Datafile::find_space_id() true); compressed_ok = !buf_page_is_corrupted( - false, page, compr_page_size, false); + false, page, compr_page_size, NULL); } if (noncompressed_ok || compressed_ok) { diff --git a/storage/innobase/fsp/fsp0fsp.cc b/storage/innobase/fsp/fsp0fsp.cc index d37e3348820..57b6c8de825 100644 --- a/storage/innobase/fsp/fsp0fsp.cc +++ b/storage/innobase/fsp/fsp0fsp.cc @@ -176,16 +176,6 @@ fsp_get_space_header( return(header); } -/** Check if checksum is disabled for the given space. -@param[in] space_id tablespace ID -@return true if checksum is disabled for given space. */ -bool -fsp_is_checksum_disabled( - ulint space_id) -{ - return(fsp_is_system_temporary(space_id)); -} - #ifdef UNIV_DEBUG /** Skip some of the sanity checks that are time consuming even in debug mode and can affect frequent verification runs that are done to ensure stability of @@ -770,11 +760,9 @@ fsp_header_init( } } - ulint offset = FSP_HEADER_OFFSET - + fsp_header_get_encryption_offset(page_size); - fil_space_write_crypt_data(space_id, page, offset, - page_size.logical() - - offset - FIL_PAGE_DATA_END, mtr); + if (space->crypt_data) { + space->crypt_data->write_page0(space, page, mtr); + } return(true); } @@ -1065,8 +1053,6 @@ fsp_fill_free_list( ulint frag_n_used; ulint i; - ut_ad(header != NULL); - ut_ad(mtr != NULL); ut_ad(page_offset(header) == FSP_HEADER_OFFSET); ut_d(fsp_space_modify_check(space, mtr)); @@ -1379,7 +1365,7 @@ initialized (may be the same as mtr) @retval block rw_lock_x_lock_count(&block->lock) == 1 if allocation succeeded (init_mtr == mtr, or the page was not previously freed in mtr) @retval block (not allocated or initialized) otherwise */ -static MY_ATTRIBUTE((warn_unused_result)) +static MY_ATTRIBUTE((warn_unused_result, nonnull)) buf_block_t* fsp_alloc_free_page( fil_space_t* space, @@ -1395,9 +1381,6 @@ fsp_alloc_free_page( ulint free; const ulint space_id = space->id; - ut_ad(mtr); - ut_ad(init_mtr); - ut_d(fsp_space_modify_check(space, mtr)); header = fsp_get_space_header(space, page_size, mtr); @@ -2434,7 +2417,6 @@ fseg_alloc_free_page_low( ulint n; const ulint space_id = space->id; - ut_ad(mtr); ut_ad((direction >= FSP_UP) && (direction <= FSP_NO_DIR)); ut_ad(mach_read_from_4(seg_inode + FSEG_MAGIC_N) == FSEG_MAGIC_N_VALUE); @@ -2816,7 +2798,7 @@ fsp_reserve_free_extents( ulint size; ulint n_free; ulint n_free_up; - ulint reserve= 0; + ulint reserve; size_t total_reserved = 0; ulint rounds = 0; ulint n_pages_added = 0; @@ -2890,6 +2872,7 @@ try_again: break; case FSP_CLEANING: case FSP_BLOB: + reserve = 0; break; default: ut_error; diff --git a/storage/innobase/fsp/fsp0sysspace.cc b/storage/innobase/fsp/fsp0sysspace.cc index 6f7d09b6faa..974140fe565 100644 --- a/storage/innobase/fsp/fsp0sysspace.cc +++ b/storage/innobase/fsp/fsp0sysspace.cc @@ -935,7 +935,7 @@ SysTablespace::open_or_create( /* Create default crypt info for system tablespace if it does not yet exists. */ m_crypt_info = fil_space_create_crypt_data( - FIL_SPACE_ENCRYPTION_DEFAULT, + FIL_ENCRYPTION_DEFAULT, FIL_DEFAULT_ENCRYPTION_KEY); } diff --git a/storage/innobase/fts/fts0fts.cc b/storage/innobase/fts/fts0fts.cc index 58924724ef1..a7d09b5dd47 100644 --- a/storage/innobase/fts/fts0fts.cc +++ b/storage/innobase/fts/fts0fts.cc @@ -1798,7 +1798,7 @@ fts_create_one_common_table( } error = row_create_table_for_mysql(new_table, trx, false, - FIL_SPACE_ENCRYPTION_DEFAULT, FIL_DEFAULT_ENCRYPTION_KEY); + FIL_ENCRYPTION_DEFAULT, FIL_DEFAULT_ENCRYPTION_KEY); if (error == DB_SUCCESS) { @@ -2015,7 +2015,7 @@ fts_create_one_index_table( FTS_INDEX_ILIST_LEN); error = row_create_table_for_mysql(new_table, trx, false, - FIL_SPACE_ENCRYPTION_DEFAULT, FIL_DEFAULT_ENCRYPTION_KEY); + FIL_ENCRYPTION_DEFAULT, FIL_DEFAULT_ENCRYPTION_KEY); if (error == DB_SUCCESS) { dict_index_t* index = dict_mem_index_create( diff --git a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc index 79056f949ba..869864e303c 100644 --- a/storage/innobase/handler/ha_innodb.cc +++ b/storage/innobase/handler/ha_innodb.cc @@ -1184,6 +1184,9 @@ static SHOW_VAR innodb_status_variables[]= { {"encryption_rotation_estimated_iops", (char*) &export_vars.innodb_encryption_rotation_estimated_iops, SHOW_LONG}, + {"encryption_key_rotation_list_length", + (char*)&export_vars.innodb_key_rotation_list_length, + SHOW_LONGLONG}, /* scrubing */ {"scrub_background_page_reorganizations", @@ -1936,6 +1939,15 @@ thd_has_edited_nontrans_tables( return((ibool) thd_non_transactional_update(thd)); } +/* Return high resolution timestamp for the start of the current query */ +UNIV_INTERN +unsigned long long +thd_query_start_micro( + const THD* thd) /*!< in: thread handle */ +{ + return thd_start_utime(thd); +} + /******************************************************************//** Returns true if the thread is executing a SELECT statement. @return true if thd is executing SELECT */ @@ -12402,7 +12414,7 @@ create_table_info_t::check_table_options() enum row_type row_format = m_form->s->row_type; ha_table_option_struct *options= m_form->s->option_struct; fil_encryption_t encrypt = (fil_encryption_t)options->encryption; - bool should_encrypt = (encrypt == FIL_SPACE_ENCRYPTION_ON); + bool should_encrypt = (encrypt == FIL_ENCRYPTION_ON); /* Currently we do not support encryption for spatial indexes thus do not allow creating table with forced @@ -12418,7 +12430,7 @@ create_table_info_t::check_table_options() } } - if (encrypt != FIL_SPACE_ENCRYPTION_DEFAULT && !m_allow_file_per_table) { + if (encrypt != FIL_ENCRYPTION_DEFAULT && !m_allow_file_per_table) { push_warning( m_thd, Sql_condition::WARN_LEVEL_WARN, HA_WRONG_CREATE_OPTION, @@ -12426,7 +12438,7 @@ create_table_info_t::check_table_options() return "ENCRYPTED"; } - if (encrypt == FIL_SPACE_ENCRYPTION_OFF && srv_encrypt_tables == 2) { + if (encrypt == FIL_ENCRYPTION_OFF && srv_encrypt_tables == 2) { push_warning( m_thd, Sql_condition::WARN_LEVEL_WARN, HA_WRONG_CREATE_OPTION, @@ -12507,8 +12519,8 @@ create_table_info_t::check_table_options() } /* If encryption is set up make sure that used key_id is found */ - if (encrypt == FIL_SPACE_ENCRYPTION_ON || - (encrypt == FIL_SPACE_ENCRYPTION_DEFAULT && srv_encrypt_tables)) { + if (encrypt == FIL_ENCRYPTION_ON || + (encrypt == FIL_ENCRYPTION_DEFAULT && srv_encrypt_tables)) { if (!encryption_key_id_exists((unsigned int)options->encryption_key_id)) { push_warning_printf( m_thd, Sql_condition::WARN_LEVEL_WARN, @@ -12521,7 +12533,7 @@ create_table_info_t::check_table_options() } /* Ignore nondefault key_id if encryption is set off */ - if (encrypt == FIL_SPACE_ENCRYPTION_OFF && + if (encrypt == FIL_ENCRYPTION_OFF && options->encryption_key_id != THDVAR(m_thd, default_encryption_key_id)) { push_warning_printf( m_thd, Sql_condition::WARN_LEVEL_WARN, @@ -12534,7 +12546,7 @@ create_table_info_t::check_table_options() /* If default encryption is used make sure that used kay is found from key file. */ - if (encrypt == FIL_SPACE_ENCRYPTION_DEFAULT && + if (encrypt == FIL_ENCRYPTION_DEFAULT && !srv_encrypt_tables && options->encryption_key_id != FIL_DEFAULT_ENCRYPTION_KEY) { if (!encryption_key_id_exists((unsigned int)options->encryption_key_id)) { @@ -20290,22 +20302,24 @@ wsrep_innobase_kill_one_trx( if (!thd) { DBUG_PRINT("wsrep", ("no thd for conflicting lock")); - WSREP_WARN("no THD for trx: %lu", (ulong) victim_trx->id); + WSREP_WARN("no THD for trx: " TRX_ID_FMT, victim_trx->id); DBUG_RETURN(1); } if (!bf_thd) { DBUG_PRINT("wsrep", ("no BF thd for conflicting lock")); - WSREP_WARN("no BF THD for trx: %lu", (bf_trx) ? (ulong) bf_trx->id : (ulong) 0); + WSREP_WARN("no BF THD for trx: " TRX_ID_FMT, + bf_trx ? bf_trx->id : 0); DBUG_RETURN(1); } WSREP_LOG_CONFLICT(bf_thd, thd, TRUE); - WSREP_DEBUG("BF kill (%lu, seqno: %lld), victim: (%lu) trx: %llu", + WSREP_DEBUG("BF kill (%lu, seqno: %lld), victim: (%lu) trx: " + TRX_ID_FMT, signal, (long long)bf_seqno, thd_get_thread_id(thd), - (ulonglong) victim_trx->id); + victim_trx->id); WSREP_DEBUG("Aborting query: %s", (thd && wsrep_thd_query(thd)) ? wsrep_thd_query(thd) : "void"); @@ -20322,15 +20336,15 @@ wsrep_innobase_kill_one_trx( if (wsrep_thd_query_state(thd) == QUERY_EXITING) { - WSREP_DEBUG("kill trx EXITING for %llu", - (ulonglong) victim_trx->id); + WSREP_DEBUG("kill trx EXITING for " TRX_ID_FMT, + victim_trx->id); wsrep_thd_UNLOCK(thd); DBUG_RETURN(0); } if (wsrep_thd_exec_mode(thd) != LOCAL_STATE) { - WSREP_DEBUG("withdraw for BF trx: %llu, state: %d", - (longlong) victim_trx->id, + WSREP_DEBUG("withdraw for BF trx: " TRX_ID_FMT ", state: %d", + victim_trx->id, wsrep_thd_get_conflict_state(thd)); } @@ -20339,8 +20353,8 @@ wsrep_innobase_kill_one_trx( wsrep_thd_set_conflict_state(thd, MUST_ABORT); break; case MUST_ABORT: - WSREP_DEBUG("victim %llu in MUST ABORT state", - (longlong) victim_trx->id); + WSREP_DEBUG("victim " TRX_ID_FMT " in MUST ABORT state", + victim_trx->id); wsrep_thd_UNLOCK(thd); wsrep_thd_awake(thd, signal); DBUG_RETURN(0); @@ -20348,9 +20362,8 @@ wsrep_innobase_kill_one_trx( case ABORTED: case ABORTING: // fall through default: - WSREP_DEBUG("victim %llu in state %d", - (longlong) victim_trx->id, - wsrep_thd_get_conflict_state(thd)); + WSREP_DEBUG("victim " TRX_ID_FMT " in state %d", + victim_trx->id, wsrep_thd_get_conflict_state(thd)); wsrep_thd_UNLOCK(thd); DBUG_RETURN(0); break; @@ -20362,8 +20375,8 @@ wsrep_innobase_kill_one_trx( WSREP_DEBUG("kill query for: %ld", thd_get_thread_id(thd)); - WSREP_DEBUG("kill trx QUERY_COMMITTING for %llu", - (longlong) victim_trx->id); + WSREP_DEBUG("kill trx QUERY_COMMITTING for " TRX_ID_FMT, + victim_trx->id); if (wsrep_thd_exec_mode(thd) == REPL_RECV) { wsrep_abort_slave_trx(bf_seqno, @@ -20377,8 +20390,9 @@ wsrep_innobase_kill_one_trx( switch (rcode) { case WSREP_WARNING: - WSREP_DEBUG("cancel commit warning: %llu", - (ulonglong) victim_trx->id); + WSREP_DEBUG("cancel commit warning: " + TRX_ID_FMT, + victim_trx->id); wsrep_thd_UNLOCK(thd); wsrep_thd_awake(thd, signal); DBUG_RETURN(1); @@ -20387,9 +20401,9 @@ wsrep_innobase_kill_one_trx( break; default: WSREP_ERROR( - "cancel commit bad exit: %d %llu", - rcode, - (ulonglong) victim_trx->id); + "cancel commit bad exit: %d " + TRX_ID_FMT, + rcode, victim_trx->id); /* unable to interrupt, must abort */ /* note: kill_mysql() will block, if we cannot. * kill the lock holder first. @@ -20405,8 +20419,8 @@ wsrep_innobase_kill_one_trx( /* it is possible that victim trx is itself waiting for some * other lock. We need to cancel this waiting */ - WSREP_DEBUG("kill trx QUERY_EXEC for %llu", - (ulonglong) victim_trx->id); + WSREP_DEBUG("kill trx QUERY_EXEC for " TRX_ID_FMT, + victim_trx->id); victim_trx->lock.was_chosen_as_deadlock_victim= TRUE; @@ -20443,7 +20457,7 @@ wsrep_innobase_kill_one_trx( break; case QUERY_IDLE: { - WSREP_DEBUG("kill IDLE for %llu", (ulonglong) victim_trx->id); + WSREP_DEBUG("kill IDLE for " TRX_ID_FMT, victim_trx->id); if (wsrep_thd_exec_mode(thd) == REPL_RECV) { WSREP_DEBUG("kill BF IDLE, seqno: %lld", @@ -21749,10 +21763,11 @@ static MYSQL_SYSVAR_UINT(encryption_rotate_key_age, PLUGIN_VAR_RQCMDARG, "Key rotation - re-encrypt in background " "all pages that were encrypted with a key that " - "many (or more) versions behind", + "many (or more) versions behind. Value 0 indicates " + "that key rotation is disabled.", NULL, innodb_encryption_rotate_key_age_update, - srv_fil_crypt_rotate_key_age, 0, UINT_MAX32, 0); + 1, 0, UINT_MAX32, 0); static MYSQL_SYSVAR_UINT(encryption_rotation_iops, srv_n_fil_crypt_iops, PLUGIN_VAR_RQCMDARG, @@ -22992,8 +23007,9 @@ innodb_encrypt_tables_validate( for update function */ struct st_mysql_value* value) /*!< in: incoming string */ { - if (check_sysvar_enum(thd, var, save, value)) + if (check_sysvar_enum(thd, var, save, value)) { return 1; + } ulong encrypt_tables = *(ulong*)save; @@ -23005,6 +23021,17 @@ innodb_encrypt_tables_validate( "encryption plugin is not available"); return 1; } + + if (!srv_fil_crypt_rotate_key_age) { + const char *msg = (encrypt_tables ? "enable" : "disable"); + push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN, + HA_ERR_UNSUPPORTED, + "InnoDB: cannot %s encryption, " + "innodb_encryption_rotate_key_age=0" + " i.e. key rotation disabled", msg); + return 1; + } + return 0; } diff --git a/storage/innobase/handler/ha_innodb.h b/storage/innobase/handler/ha_innodb.h index 49eb150036b..463717ee6b2 100644 --- a/storage/innobase/handler/ha_innodb.h +++ b/storage/innobase/handler/ha_innodb.h @@ -549,6 +549,14 @@ int thd_slave_thread(const MYSQL_THD thd); @retval 1 the user thread is running a non-transactional update */ int thd_non_transactional_update(const MYSQL_THD thd); +/** Get high resolution timestamp for the current query start time. +The timestamp is not anchored to any specific point in time, +but can be used for comparison. +@param thd user thread +@retval timestamp in microseconds precision +*/ +unsigned long long thd_start_utime(const MYSQL_THD thd); + /** Get the user thread's binary logging format @param thd user thread @return Value to be used as index into the binlog_format_names array */ @@ -1022,4 +1030,3 @@ ib_push_frm_error( TABLE* table, /*!< in: MySQL table */ ulint n_keys, /*!< in: InnoDB #keys */ bool push_warning); /*!< in: print warning ? */ - diff --git a/storage/innobase/handler/handler0alter.cc b/storage/innobase/handler/handler0alter.cc index 82fa4b724f4..fd61db9725d 100644 --- a/storage/innobase/handler/handler0alter.cc +++ b/storage/innobase/handler/handler0alter.cc @@ -4532,9 +4532,11 @@ prepare_inplace_alter_table_dict( ulint space_id = 0; ulint z = 0; ulint key_id = FIL_DEFAULT_ENCRYPTION_KEY; - fil_encryption_t mode = FIL_SPACE_ENCRYPTION_DEFAULT; + fil_encryption_t mode = FIL_ENCRYPTION_DEFAULT; - crypt_data = fil_space_get_crypt_data(ctx->prebuilt->table->space); + fil_space_t* space = fil_space_acquire(ctx->prebuilt->table->space); + crypt_data = space->crypt_data; + fil_space_release(space); if (crypt_data) { key_id = crypt_data->key_id; diff --git a/storage/innobase/handler/i_s.cc b/storage/innobase/handler/i_s.cc index dd43a52ae66..6bf16573efd 100644 --- a/storage/innobase/handler/i_s.cc +++ b/storage/innobase/handler/i_s.cc @@ -8560,22 +8560,31 @@ static ST_FIELD_INFO innodb_tablespaces_encryption_fields_info[] = STRUCT_FLD(old_name, ""), STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, +#define TABLESPACES_ENCRYPTION_ROTATING_OR_FLUSHING 9 + {STRUCT_FLD(field_name, "ROTATING_OR_FLUSHING"), + STRUCT_FLD(field_length, 1), + STRUCT_FLD(field_type, MYSQL_TYPE_LONG), + STRUCT_FLD(value, 0), + STRUCT_FLD(field_flags, MY_I_S_UNSIGNED), + STRUCT_FLD(old_name, ""), + STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, + END_OF_ST_FIELD_INFO }; /**********************************************************************//** Function to fill INFORMATION_SCHEMA.INNODB_TABLESPACES_ENCRYPTION -with information collected by scanning SYS_TABLESPACES table and then use -fil_space() +with information collected by scanning SYS_TABLESPACES table. +@param[in] thd thread handle +@param[in] space Tablespace +@param[in] table_to_fill I_S table to fill @return 0 on success */ static int i_s_dict_fill_tablespaces_encryption( -/*==========================*/ - THD* thd, /*!< in: thread */ - ulint space, /*!< in: space ID */ - const char* name, /*!< in: tablespace name */ - TABLE* table_to_fill) /*!< in/out: fill this table */ + THD* thd, + fil_space_t* space, + TABLE* table_to_fill) { Field** fields; struct fil_space_crypt_status_t status; @@ -8585,10 +8594,11 @@ i_s_dict_fill_tablespaces_encryption( fields = table_to_fill->field; fil_space_crypt_get_status(space, &status); - OK(fields[TABLESPACES_ENCRYPTION_SPACE]->store(space)); + + OK(fields[TABLESPACES_ENCRYPTION_SPACE]->store(space->id)); OK(field_store_string(fields[TABLESPACES_ENCRYPTION_NAME], - name)); + space->name)); OK(fields[TABLESPACES_ENCRYPTION_ENCRYPTION_SCHEME]->store( status.scheme)); @@ -8600,6 +8610,9 @@ i_s_dict_fill_tablespaces_encryption( status.current_key_version)); OK(fields[TABLESPACES_ENCRYPTION_CURRENT_KEY_ID]->store( status.key_id)); + OK(fields[TABLESPACES_ENCRYPTION_ROTATING_OR_FLUSHING]->store( + (status.rotating || status.flushing) ? 1 : 0)); + if (status.rotating) { fields[TABLESPACES_ENCRYPTION_KEY_ROTATION_PAGE_NUMBER]->set_notnull(); OK(fields[TABLESPACES_ENCRYPTION_KEY_ROTATION_PAGE_NUMBER]->store( @@ -8613,6 +8626,7 @@ i_s_dict_fill_tablespaces_encryption( fields[TABLESPACES_ENCRYPTION_KEY_ROTATION_MAX_PAGE_NUMBER] ->set_null(); } + OK(schema_table_store_record(thd, table_to_fill)); DBUG_RETURN(0); @@ -8652,30 +8666,36 @@ i_s_tablespaces_encryption_fill_table( while (rec) { const char* err_msg; - ulint space; + ulint space_id; const char* name; ulint flags; /* Extract necessary information from a SYS_TABLESPACES row */ err_msg = dict_process_sys_tablespaces( - heap, rec, &space, &name, &flags); + heap, rec, &space_id, &name, &flags); mtr_commit(&mtr); mutex_exit(&dict_sys->mutex); - if (space == 0) { + if (space_id == 0) { found_space_0 = true; } - if (!err_msg) { + fil_space_t* space = fil_space_acquire_silent(space_id); + + if (!err_msg && space) { i_s_dict_fill_tablespaces_encryption( - thd, space, name, tables->table); + thd, space, tables->table); } else { push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN, ER_CANT_FIND_SYSTEM_REC, "%s", err_msg); } + if (space) { + fil_space_release(space); + } + mem_heap_empty(heap); /* Get the next record */ @@ -8691,10 +8711,13 @@ i_s_tablespaces_encryption_fill_table( if (found_space_0 == false) { /* space 0 does for what ever unknown reason not show up * in iteration above, add it manually */ - ulint space = 0; - const char* name = NULL; + + fil_space_t* space = fil_space_acquire_silent(0); + i_s_dict_fill_tablespaces_encryption( - thd, space, name, tables->table); + thd, space, tables->table); + + fil_space_release(space); } DBUG_RETURN(0); @@ -8845,22 +8868,32 @@ static ST_FIELD_INFO innodb_tablespaces_scrubbing_fields_info[] = STRUCT_FLD(old_name, ""), STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, +#define TABLESPACES_ENCRYPTION_ROTATING_OR_FLUSHING 9 + {STRUCT_FLD(field_name, "ROTATING_OR_FLUSHING"), + STRUCT_FLD(field_length, MY_INT32_NUM_DECIMAL_DIGITS), + STRUCT_FLD(field_type, MYSQL_TYPE_LONG), + STRUCT_FLD(value, 0), + STRUCT_FLD(field_flags, MY_I_S_UNSIGNED), + STRUCT_FLD(old_name, ""), + STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, + END_OF_ST_FIELD_INFO }; /**********************************************************************//** Function to fill INFORMATION_SCHEMA.INNODB_TABLESPACES_SCRUBBING -with information collected by scanning SYS_TABLESPACES table and then use -fil_space() +with information collected by scanning SYS_TABLESPACES table and +fil_space. +@param[in] thd Thread handle +@param[in] space Tablespace +@param[in] table_to_fill I_S table @return 0 on success */ static int i_s_dict_fill_tablespaces_scrubbing( -/*==========================*/ - THD* thd, /*!< in: thread */ - ulint space, /*!< in: space ID */ - const char* name, /*!< in: tablespace name */ - TABLE* table_to_fill) /*!< in/out: fill this table */ + THD* thd, + fil_space_t* space, + TABLE* table_to_fill) { Field** fields; struct fil_space_scrub_status_t status; @@ -8870,10 +8903,11 @@ i_s_dict_fill_tablespaces_scrubbing( fields = table_to_fill->field; fil_space_get_scrub_status(space, &status); - OK(fields[TABLESPACES_SCRUBBING_SPACE]->store(space)); + + OK(fields[TABLESPACES_SCRUBBING_SPACE]->store(space->id)); OK(field_store_string(fields[TABLESPACES_SCRUBBING_NAME], - name)); + space->name)); OK(fields[TABLESPACES_SCRUBBING_COMPRESSED]->store( status.compressed ? 1 : 0)); @@ -8893,6 +8927,7 @@ i_s_dict_fill_tablespaces_scrubbing( TABLESPACES_SCRUBBING_CURRENT_SCRUB_ACTIVE_THREADS, TABLESPACES_SCRUBBING_CURRENT_SCRUB_PAGE_NUMBER, TABLESPACES_SCRUBBING_CURRENT_SCRUB_MAX_PAGE_NUMBER }; + if (status.scrubbing) { for (uint i = 0; i < array_elements(field_numbers); i++) { fields[field_numbers[i]]->set_notnull(); @@ -8912,6 +8947,7 @@ i_s_dict_fill_tablespaces_scrubbing( fields[field_numbers[i]]->set_null(); } } + OK(schema_table_store_record(thd, table_to_fill)); DBUG_RETURN(0); @@ -8951,30 +8987,36 @@ i_s_tablespaces_scrubbing_fill_table( while (rec) { const char* err_msg; - ulint space; + ulint space_id; const char* name; ulint flags; /* Extract necessary information from a SYS_TABLESPACES row */ err_msg = dict_process_sys_tablespaces( - heap, rec, &space, &name, &flags); + heap, rec, &space_id, &name, &flags); mtr_commit(&mtr); mutex_exit(&dict_sys->mutex); - if (space == 0) { + if (space_id == 0) { found_space_0 = true; } - if (!err_msg) { + fil_space_t* space = fil_space_acquire_silent(space_id); + + if (!err_msg && space) { i_s_dict_fill_tablespaces_scrubbing( - thd, space, name, tables->table); + thd, space, tables->table); } else { push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN, ER_CANT_FIND_SYSTEM_REC, "%s", err_msg); } + if (space) { + fil_space_release(space); + } + mem_heap_empty(heap); /* Get the next record */ @@ -8990,10 +9032,12 @@ i_s_tablespaces_scrubbing_fill_table( if (found_space_0 == false) { /* space 0 does for what ever unknown reason not show up * in iteration above, add it manually */ - ulint space = 0; - const char* name = NULL; + fil_space_t* space = fil_space_acquire_silent(0); + i_s_dict_fill_tablespaces_scrubbing( - thd, space, name, tables->table); + thd, space, tables->table); + + fil_space_release(space); } DBUG_RETURN(0); diff --git a/storage/innobase/ibuf/ibuf0ibuf.cc b/storage/innobase/ibuf/ibuf0ibuf.cc index 7dee46dc4a9..2fde0cb23b9 100644 --- a/storage/innobase/ibuf/ibuf0ibuf.cc +++ b/storage/innobase/ibuf/ibuf0ibuf.cc @@ -4458,7 +4458,6 @@ ibuf_merge_or_delete_for_page( ulint volume = 0; #endif /* UNIV_IBUF_DEBUG */ page_zip_des_t* page_zip = NULL; - fil_space_t* space = NULL; bool corruption_noticed = false; mtr_t mtr; @@ -4489,6 +4488,8 @@ ibuf_merge_or_delete_for_page( return; } + fil_space_t* space; + if (update_ibuf_bitmap) { ut_ad(page_size != NULL); @@ -4500,10 +4501,9 @@ ibuf_merge_or_delete_for_page( space = fil_space_acquire(page_id.space()); - if (space == NULL) { - /* Do not try to read the bitmap page from space; - just delete the ibuf records for the page */ - + if (UNIV_UNLIKELY(!space)) { + /* Do not try to read the bitmap page from the + non-existent tablespace, delete the ibuf records */ block = NULL; update_ibuf_bitmap = FALSE; } else { @@ -4536,6 +4536,8 @@ ibuf_merge_or_delete_for_page( || fsp_descr_page(page_id, *page_size))) { return; + } else { + space = NULL; } heap = mem_heap_create(512); @@ -4566,9 +4568,6 @@ ibuf_merge_or_delete_for_page( " insert buffer merge for this page. Please" " run CHECK TABLE on your tables to determine" " if they are corrupt after this."; - - ib::error() << "Please submit a detailed bug" - " report to http://bugs.mysql.com"; ut_ad(0); } } @@ -4788,15 +4787,17 @@ reset_bit: } ibuf_mtr_commit(&mtr); + + if (space) { + fil_space_release(space); + } + btr_pcur_close(&pcur); mem_heap_free(heap); my_atomic_addlint(&ibuf->n_merges, 1); ibuf_add_ops(ibuf->n_merged_ops, mops); ibuf_add_ops(ibuf->n_discarded_ops, dops); - if (space != NULL) { - fil_space_release(space); - } #ifdef UNIV_IBUF_COUNT_DEBUG ut_a(ibuf_count_get(page_id) == 0); diff --git a/storage/innobase/include/buf0buf.h b/storage/innobase/include/buf0buf.h index d9243c6627c..3aab242606d 100644 --- a/storage/innobase/include/buf0buf.h +++ b/storage/innobase/include/buf0buf.h @@ -30,7 +30,6 @@ Created 11/5/1995 Heikki Tuuri /** Magic value to use instead of checksums when they are disabled */ #define BUF_NO_CHECKSUM_MAGIC 0xDEADBEEFUL -#include "univ.i" #include "fil0fil.h" #include "mtr0types.h" #include "buf0types.h" @@ -766,6 +765,87 @@ buf_block_unfix( # endif /* UNIV_DEBUG */ #endif /* !UNIV_INNOCHECKSUM */ +/** Checks if the page is in crc32 checksum format. +@param[in] read_buf database page +@param[in] checksum_field1 new checksum field +@param[in] checksum_field2 old checksum field +@param[in] page_no page number of given read_buf +@param[in] is_log_enabled true if log option is enabled +@param[in] log_file file pointer to log_file +@param[in] curr_algo current checksum algorithm +@param[in] use_legacy_big_endian use legacy big endian algorithm +@return true if the page is in crc32 checksum format. */ +bool +buf_page_is_checksum_valid_crc32( + const byte* read_buf, + ulint checksum_field1, + ulint checksum_field2, +#ifdef UNIV_INNOCHECKSUM + uintmax_t page_no, + bool is_log_enabled, + FILE* log_file, + const srv_checksum_algorithm_t curr_algo, +#endif /* UNIV_INNOCHECKSUM */ + bool use_legacy_big_endian) + MY_ATTRIBUTE((nonnull(1), warn_unused_result)); + +/** Checks if the page is in innodb checksum format. +@param[in] read_buf database page +@param[in] checksum_field1 new checksum field +@param[in] checksum_field2 old checksum field +@param[in] page_no page number of given read_buf +@param[in] is_log_enabled true if log option is enabled +@param[in] log_file file pointer to log_file +@param[in] curr_algo current checksum algorithm +@return true if the page is in innodb checksum format. */ +bool +buf_page_is_checksum_valid_innodb( + const byte* read_buf, + ulint checksum_field1, + ulint checksum_field2 +#ifdef UNIV_INNOCHECKSUM + ,uintmax_t page_no, + bool is_log_enabled, + FILE* log_file, + const srv_checksum_algorithm_t curr_algo +#endif /* UNIV_INNOCHECKSUM */ + ) + MY_ATTRIBUTE((nonnull(1), warn_unused_result)); + +/** Checks if the page is in none checksum format. +@param[in] read_buf database page +@param[in] checksum_field1 new checksum field +@param[in] checksum_field2 old checksum field +@param[in] page_no page number of given read_buf +@param[in] is_log_enabled true if log option is enabled +@param[in] log_file file pointer to log_file +@param[in] curr_algo current checksum algorithm +@return true if the page is in none checksum format. */ +bool +buf_page_is_checksum_valid_none( + const byte* read_buf, + ulint checksum_field1, + ulint checksum_field2 +#ifdef UNIV_INNOCHECKSUM + ,uintmax_t page_no, + bool is_log_enabled, + FILE* log_file, + const srv_checksum_algorithm_t curr_algo +#endif /* UNIV_INNOCHECKSUM */ + ) + MY_ATTRIBUTE((nonnull(1), warn_unused_result)); + +/********************************************************************//** +Check if page is maybe compressed, encrypted or both when we encounter +corrupted page. Note that we can't be 100% sure if page is corrupted +or decrypt/decompress just failed. +@param[in] bpage Page +@return true if page corrupted, false if not */ +bool +buf_page_check_corrupt( + buf_page_t* bpage) /*!< in/out: buffer page read from disk */ + MY_ATTRIBUTE((nonnull, warn_unused_result)); + /** Checks if a page contains only zeroes. @param[in] read_buf database page @param[in] page_size page size @@ -780,23 +860,23 @@ buf_page_is_zeroes( the LSN @param[in] read_buf database page @param[in] page_size page size -@param[in] skip_checksum if true, skip checksum +@param[in] space tablespace @param[in] page_no page number of given read_buf @param[in] strict_check true if strict-check option is enabled @param[in] is_log_enabled true if log option is enabled @param[in] log_file file pointer to log_file -@return TRUE if corrupted */ -ibool +@return whether the page is corrupted */ +bool buf_page_is_corrupted( bool check_lsn, const byte* read_buf, const page_size_t& page_size, - bool skip_checksum + const fil_space_t* space = NULL #ifdef UNIV_INNOCHECKSUM - ,uintmax_t page_no, - bool strict_check, - bool is_log_enabled, - FILE* log_file + ,uintmax_t page_no = 0, + bool strict_check = false, + bool is_log_enabled = false, + FILE* log_file = NULL #endif /* UNIV_INNOCHECKSUM */ ) MY_ATTRIBUTE((warn_unused_result)); #ifndef UNIV_INNOCHECKSUM @@ -1476,37 +1556,6 @@ buf_page_encrypt_before_write( byte* frame, /*!< in: src frame */ ulint space_id); /*!< in: space id */ -/********************************************************************** -The hook that is called after page is written to disk. -The function releases any resources needed for encryption that was allocated -in buf_page_encrypt_before_write */ -UNIV_INTERN -ibool -buf_page_encrypt_after_write( -/*=========================*/ - buf_page_t* page); /*!< in/out: buffer page that was flushed */ - -/********************************************************************//** -The hook that is called just before a page is read from disk. -The function allocates memory that is used to temporarily store disk content -before getting decrypted */ -UNIV_INTERN -byte* -buf_page_decrypt_before_read( -/*=========================*/ - buf_page_t* page, /*!< in/out: buffer page read from disk */ - ulint zip_size); /*!< in: compressed page size, or 0 */ - -/********************************************************************//** -The hook that is called just after a page is read from disk. -The function decrypt disk content into buf_page_t and releases the -temporary buffer that was allocated in buf_page_decrypt_before_read */ -UNIV_INTERN -ibool -buf_page_decrypt_after_read( -/*========================*/ - buf_page_t* page); /*!< in/out: buffer page read from disk */ - /** @brief The temporary memory structure. NOTE! The definition appears here only for other modules of this @@ -1589,14 +1638,8 @@ public: operation needed. */ unsigned key_version; /*!< key version for this block */ - bool page_encrypted; /*!< page is page encrypted */ - bool page_compressed;/*!< page is page compressed */ - ulint stored_checksum;/*!< stored page checksum if page - encrypted */ - bool encrypted; /*!< page is still encrypted */ - ulint calculated_checksum; - /*!< calculated checksum if page - encrypted */ + bool encrypted; /*!< page is still encrypted */ + ulint real_size; /*!< Real size of the page Normal pages == UNIV_PAGE_SIZE page compressed pages, payload diff --git a/storage/innobase/include/buf0flu.h b/storage/innobase/include/buf0flu.h index 337a5417c12..d3a83b62a46 100644 --- a/storage/innobase/include/buf0flu.h +++ b/storage/innobase/include/buf0flu.h @@ -88,7 +88,7 @@ buf_flush_init_for_writing( byte* page, void* page_zip_, lsn_t newest_lsn, - bool skip_checksum); + bool skip_checksum = false); # if defined UNIV_DEBUG || defined UNIV_IBUF_DEBUG /********************************************************************//** diff --git a/storage/innobase/include/dict0dict.ic b/storage/innobase/include/dict0dict.ic index 061153589c8..580118f2fdd 100644 --- a/storage/innobase/include/dict0dict.ic +++ b/storage/innobase/include/dict0dict.ic @@ -295,7 +295,6 @@ dict_index_is_clust( /*================*/ const dict_index_t* index) /*!< in: index */ { - ut_ad(index); ut_ad(index->magic_n == DICT_INDEX_MAGIC_N); return(index->type & DICT_CLUSTERED); @@ -322,7 +321,6 @@ dict_index_is_unique( /*=================*/ const dict_index_t* index) /*!< in: index */ { - ut_ad(index); ut_ad(index->magic_n == DICT_INDEX_MAGIC_N); return(index->type & DICT_UNIQUE); @@ -337,7 +335,6 @@ dict_index_is_univ( /*===============*/ const dict_index_t* index) /*!< in: index */ { - ut_ad(index); ut_ad(index->magic_n == DICT_INDEX_MAGIC_N); return(index->type & DICT_UNIVERSAL); @@ -398,7 +395,6 @@ dict_index_is_sec_or_ibuf( { ulint type; - ut_ad(index); ut_ad(index->magic_n == DICT_INDEX_MAGIC_N); type = index->type; @@ -417,7 +413,6 @@ dict_table_get_n_user_cols( /*=======================*/ const dict_table_t* table) /*!< in: table */ { - ut_ad(table); ut_ad(table->magic_n == DICT_TABLE_MAGIC_N); return(table->n_cols - dict_table_get_n_sys_cols(table)); @@ -449,7 +444,6 @@ dict_table_get_n_cols( /*==================*/ const dict_table_t* table) /*!< in: table */ { - ut_ad(table); ut_ad(table->magic_n == DICT_TABLE_MAGIC_N); return(table->n_cols); @@ -1686,7 +1680,6 @@ dict_index_is_corrupted( /*====================*/ const dict_index_t* index) /*!< in: index */ { - ut_ad(index); ut_ad(index->magic_n == DICT_INDEX_MAGIC_N); return((index->type & DICT_CORRUPT) diff --git a/storage/innobase/include/fil0crypt.h b/storage/innobase/include/fil0crypt.h index d6a6ecb1538..831d61445d8 100644 --- a/storage/innobase/include/fil0crypt.h +++ b/storage/innobase/include/fil0crypt.h @@ -26,6 +26,8 @@ Created 04/01/2015 Jan Lindström #ifndef fil0crypt_h #define fil0crypt_h +#ifndef UNIV_INNOCHECKSUM + #include "os0event.h" #include "my_crypt.h" @@ -40,14 +42,6 @@ static const unsigned char CRYPT_MAGIC[MAGIC_SZ] = { /* This key will be used if nothing else is given */ #define FIL_DEFAULT_ENCRYPTION_KEY ENCRYPTION_KEY_SYSTEM_DATA -/** Enum values for encryption table option */ -typedef enum { - FIL_SPACE_ENCRYPTION_DEFAULT = 0, /* Tablespace encrypted if - srv_encrypt_tables = ON */ - FIL_SPACE_ENCRYPTION_ON = 1, /* Tablespace is encrypted always */ - FIL_SPACE_ENCRYPTION_OFF = 2 /* Tablespace is not encrypted */ -} fil_encryption_t; - extern os_event_t fil_crypt_threads_event; /** @@ -107,23 +101,21 @@ struct fil_space_rotate_state_t } scrubbing; }; -struct fil_space_crypt_struct : st_encryption_scheme +struct fil_space_crypt_t : st_encryption_scheme { public: /** Constructor. Does not initialize the members! The object is expected to be placed in a buffer that has been zero-initialized. */ - fil_space_crypt_struct( + fil_space_crypt_t( uint new_type, uint new_min_key_version, uint new_key_id, - ulint offset, fil_encryption_t new_encryption) : st_encryption_scheme(), min_key_version(new_min_key_version), - page0_offset(offset), + page0_offset(0), encryption(new_encryption), - closing(false), key_found(), rotate_state() { @@ -134,9 +126,9 @@ struct fil_space_crypt_struct : st_encryption_scheme locker = crypt_data_scheme_locker; type = new_type; - if (new_encryption == FIL_SPACE_ENCRYPTION_OFF || + if (new_encryption == FIL_ENCRYPTION_OFF || (!srv_encrypt_tables && - new_encryption == FIL_SPACE_ENCRYPTION_DEFAULT)) { + new_encryption == FIL_ENCRYPTION_DEFAULT)) { type = CRYPT_SCHEME_UNENCRYPTED; } else { type = CRYPT_SCHEME_1; @@ -145,9 +137,8 @@ struct fil_space_crypt_struct : st_encryption_scheme } /** Destructor */ - ~fil_space_crypt_struct() + ~fil_space_crypt_t() { - closing = true; mutex_free(&mutex); } @@ -165,45 +156,37 @@ struct fil_space_crypt_struct : st_encryption_scheme /** Returns true if tablespace should be encrypted */ bool should_encrypt() const { - return ((encryption == FIL_SPACE_ENCRYPTION_ON) || + return ((encryption == FIL_ENCRYPTION_ON) || (srv_encrypt_tables && - encryption == FIL_SPACE_ENCRYPTION_DEFAULT)); + encryption == FIL_ENCRYPTION_DEFAULT)); } /** Return true if tablespace is encrypted. */ bool is_encrypted() const { - return (encryption != FIL_SPACE_ENCRYPTION_OFF); + return (encryption != FIL_ENCRYPTION_OFF); } /** Return true if default tablespace encryption is used, */ bool is_default_encryption() const { - return (encryption == FIL_SPACE_ENCRYPTION_DEFAULT); + return (encryption == FIL_ENCRYPTION_DEFAULT); } /** Return true if tablespace is not encrypted. */ bool not_encrypted() const { - return (encryption == FIL_SPACE_ENCRYPTION_OFF); + return (encryption == FIL_ENCRYPTION_OFF); } - /** Is this tablespace closing. */ - bool is_closing(bool is_fixed) { - bool closed; - if (!is_fixed) { - mutex_enter(&mutex); - } - closed = closing; - if (!is_fixed) { - mutex_exit(&mutex); - } - return closed; - } + /** Write crypt data to a page (0) + @param[in] space tablespace + @param[in,out] page0 first page of the tablespace + @param[in,out] mtr mini-transaction */ + void write_page0(const fil_space_t* space, byte* page0, mtr_t* mtr); uint min_key_version; // min key version for this space ulint page0_offset; // byte offset on page 0 for crypt data fil_encryption_t encryption; // Encryption setup ib_mutex_t mutex; // mutex protecting following variables - bool closing; // is tablespace being closed /** Return code from encryption_key_get_latest_version. If ENCRYPTION_KEY_VERSION_INVALID encryption plugin @@ -215,324 +198,306 @@ struct fil_space_crypt_struct : st_encryption_scheme fil_space_rotate_state_t rotate_state; }; -/* structure containing encryption specification */ -typedef struct fil_space_crypt_struct fil_space_crypt_t; +/** Status info about encryption */ +struct fil_space_crypt_status_t { + ulint space; /*!< tablespace id */ + ulint scheme; /*!< encryption scheme */ + uint min_key_version; /*!< min key version */ + uint current_key_version;/*!< current key version */ + uint keyserver_requests;/*!< no of key requests to key server */ + ulint key_id; /*!< current key_id */ + bool rotating; /*!< is key rotation ongoing */ + bool flushing; /*!< is flush at end of rotation ongoing */ + ulint rotate_next_page_number; /*!< next page if key rotating */ + ulint rotate_max_page_number; /*!< max page if key rotating */ +}; + +/** Statistics about encryption key rotation */ +struct fil_crypt_stat_t { + ulint pages_read_from_cache; + ulint pages_read_from_disk; + ulint pages_modified; + ulint pages_flushed; + ulint estimated_iops; +}; + +/** Status info about scrubbing */ +struct fil_space_scrub_status_t { + ulint space; /*!< tablespace id */ + bool compressed; /*!< is space compressed */ + time_t last_scrub_completed; /*!< when was last scrub completed */ + bool scrubbing; /*!< is scrubbing ongoing */ + time_t current_scrub_started; /*!< when started current scrubbing */ + ulint current_scrub_active_threads; /*!< current scrub active threads */ + ulint current_scrub_page_number; /*!< current scrub page no */ + ulint current_scrub_max_page_number; /*!< current scrub max page no */ +}; /********************************************************************* -Init global resources needed for tablespace encryption/decryption */ +Init space crypt */ UNIV_INTERN void fil_space_crypt_init(); /********************************************************************* -Cleanup global resources needed for tablespace encryption/decryption */ +Cleanup space crypt */ UNIV_INTERN void fil_space_crypt_cleanup(); -/********************************************************************* -Create crypt data, i.e data that is used for a single tablespace */ -UNIV_INTERN -fil_space_crypt_t * -fil_space_create_crypt_data( -/*========================*/ - fil_encryption_t encrypt_mode, /*!< in: encryption mode */ - uint key_id) /*!< in: encryption key id */ - __attribute__((warn_unused_result)); - -/********************************************************************* -Destroy crypt data */ -UNIV_INTERN -void -fil_space_destroy_crypt_data( -/*=========================*/ - fil_space_crypt_t **crypt_data); /*!< in/out: crypt data */ - -/********************************************************************* -Get crypt data for a space*/ -UNIV_INTERN -fil_space_crypt_t * -fil_space_get_crypt_data( -/*=====================*/ - ulint space); /*!< in: tablespace id */ +/** +Create a fil_space_crypt_t object +@param[in] encrypt_mode FIL_ENCRYPTION_DEFAULT or + FIL_ENCRYPTION_ON or + FIL_ENCRYPTION_OFF -/********************************************************************* -Set crypt data for a space*/ +@param[in] key_id Encryption key id +@return crypt object */ UNIV_INTERN fil_space_crypt_t* -fil_space_set_crypt_data( -/*=====================*/ - ulint space, /*!< in: tablespace id */ - fil_space_crypt_t* crypt_data); /*!< in: crypt data to set */ +fil_space_create_crypt_data( + fil_encryption_t encrypt_mode, + uint key_id) + MY_ATTRIBUTE((warn_unused_result)); -/********************************************************************* -Merge crypt data */ +/****************************************************************** +Merge fil_space_crypt_t object +@param[in,out] dst Destination cryp data +@param[in] src Source crypt data */ UNIV_INTERN void fil_space_merge_crypt_data( -/*=======================*/ - fil_space_crypt_t* dst_crypt_data, /*!< in: crypt_data */ - const fil_space_crypt_t* src_crypt_data); /*!< in: crypt data */ - -/********************************************************************* -Read crypt data from buffer page */ + fil_space_crypt_t* dst, + const fil_space_crypt_t* src); + +/** Initialize encryption parameters from a tablespace header page. +@param[in] page_size page size of the tablespace +@param[in] page first page of the tablespace +@return crypt data from page 0 +@retval NULL if not present or not valid */ UNIV_INTERN -fil_space_crypt_t * -fil_space_read_crypt_data( -/*======================*/ - ulint space, /*!< in: tablespace id */ - const byte* page, /*!< in: buffer page */ - ulint offset); /*!< in: offset where crypt data is stored */ +fil_space_crypt_t* +fil_space_read_crypt_data(const page_size_t& page_size, const byte* page) + MY_ATTRIBUTE((nonnull, warn_unused_result)); -/********************************************************************* -Write crypt data to buffer page */ +/** +Free a crypt data object +@param[in,out] crypt_data crypt data to be freed */ UNIV_INTERN void -fil_space_write_crypt_data( -/*=======================*/ - ulint space, /*!< in: tablespace id */ - byte* page, /*!< in: buffer page */ - ulint offset, /*!< in: offset where to store data */ - ulint maxsize, /*!< in: max space available to store crypt data in */ - mtr_t * mtr); /*!< in: mini-transaction */ +fil_space_destroy_crypt_data( + fil_space_crypt_t **crypt_data); -/********************************************************************* -Clear crypt data from page 0 (used for import tablespace) */ +/****************************************************************** +Parse a MLOG_FILE_WRITE_CRYPT_DATA log entry +@param[in] ptr Log entry start +@param[in] end_ptr Log entry end +@param[in] block buffer block +@return position on log buffer */ UNIV_INTERN -void -fil_space_clear_crypt_data( -/*=======================*/ - byte* page, /*!< in: buffer page */ - ulint offset); /*!< in: offset where crypt data is stored */ +const byte* +fil_parse_write_crypt_data( + const byte* ptr, + const byte* end_ptr, + const buf_block_t* block) + MY_ATTRIBUTE((warn_unused_result)); + +/** Encrypt a buffer. +@param[in,out] crypt_data Crypt data +@param[in] space space_id +@param[in] offset Page offset +@param[in] lsn Log sequence number +@param[in] src_frame Page to encrypt +@param[in] page_size Page size +@param[in,out] dst_frame Output buffer +@return encrypted buffer or NULL */ +byte* +fil_encrypt_buf( + fil_space_crypt_t* crypt_data, + ulint space, + ulint offset, + lsn_t lsn, + const byte* src_frame, + const page_size_t& page_size, + byte* dst_frame) + MY_ATTRIBUTE((warn_unused_result)); -/********************************************************************* -Parse crypt data log record */ +/** +Encrypt a page. + +@param[in] space Tablespace +@param[in] offset Page offset +@param[in] lsn Log sequence number +@param[in] src_frame Page to encrypt +@param[in,out] dst_frame Output buffer +@return encrypted buffer or NULL */ UNIV_INTERN byte* -fil_parse_write_crypt_data( -/*=======================*/ - byte* ptr, /*!< in: start of log record */ - byte* end_ptr, /*!< in: end of log record */ - buf_block_t*); /*!< in: buffer page to apply record to */ +fil_space_encrypt( + const fil_space_t* space, + ulint offset, + lsn_t lsn, + byte* src_frame, + byte* dst_frame) + MY_ATTRIBUTE((warn_unused_result)); -/********************************************************************* -Check if extra buffer shall be allocated for decrypting after read */ +/** +Decrypt a page. +@param[in,out] crypt_data crypt_data +@param[in] tmp_frame Temporary buffer +@param[in] page_size Page size +@param[in,out] src_frame Page to decrypt +@param[out] err DB_SUCCESS or error +@return true if page decrypted, false if not.*/ UNIV_INTERN bool -fil_space_check_encryption_read( -/*============================*/ - ulint space) /*!< in: tablespace id */ - __attribute__((warn_unused_result)); +fil_space_decrypt( + fil_space_crypt_t* crypt_data, + byte* tmp_frame, + const page_size_t& page_size, + byte* src_frame, + dberr_t* err); /****************************************************************** Decrypt a page -@return true if page is decrypted, false if not. */ -UNIV_INTERN -bool -fil_space_decrypt( -/*==============*/ - fil_space_crypt_t* crypt_data, /*!< in: crypt data */ - byte* tmp_frame, /*!< in: temporary buffer */ - const page_size_t& page_size, /*!< in: page size */ - byte* src_frame, /*!< in:out: page buffer */ - dberr_t* err) /*!< in: out: DB_SUCCESS or - error code */ - __attribute__((warn_unused_result)); - -/********************************************************************* -Encrypt buffer page -@return encrypted page, or original not encrypted page if encrypt -is not needed. */ -UNIV_INTERN -byte* -fil_space_encrypt( -/*==============*/ - ulint space, /*!< in: tablespace id */ - ulint offset, /*!< in: page no */ - lsn_t lsn, /*!< in: page lsn */ - byte* src_frame, /*!< in: page frame */ - const page_size_t& page_size, /*!< in: page size */ - byte* dst_frame) /*!< in: where to encrypt to */ - __attribute__((warn_unused_result)); - -/********************************************************************* -Decrypt buffer page -@return decrypted page, or original not encrypted page if decrypt is +@param[in] space Tablespace +@param[in] tmp_frame Temporary buffer used for decrypting +@param[in,out] src_frame Page to decrypt +@param[out] decrypted true if page was decrypted +@return decrypted page, or original not encrypted page if decryption is not needed.*/ UNIV_INTERN byte* fil_space_decrypt( -/*==============*/ - ulint space, /*!< in: tablespace id */ - byte* src_frame, /*!< in: page frame */ - const page_size_t& page_size, /*!< in: page size */ - byte* dst_frame) /*!< in: where to decrypt to */ - __attribute__((warn_unused_result)); + const fil_space_t* space, + byte* tmp_frame, + byte* src_frame, + bool* decrypted) + MY_ATTRIBUTE((warn_unused_result)); -/********************************************************************* -fil_space_verify_crypt_checksum -NOTE: currently this function can only be run in single threaded mode -as it modifies srv_checksum_algorithm (temporarily) -@return true if page is encrypted AND OK, false otherwise */ +/****************************************************************** +Calculate post encryption checksum +@param[in] page_size page size +@param[in] dst_frame Block where checksum is calculated +@return page checksum or BUF_NO_CHECKSUM_MAGIC +not needed. */ UNIV_INTERN -bool -fil_space_verify_crypt_checksum( -/*============================*/ - const byte* src_frame,/*!< in: page frame */ - const page_size_t& page_size) /*!< in: page size */ - __attribute__((warn_unused_result)); +ulint +fil_crypt_calculate_checksum( + const page_size_t& page_size, + const byte* dst_frame) + MY_ATTRIBUTE((warn_unused_result)); /********************************************************************* -Init threads for key rotation */ +Adjust thread count for key rotation +@param[in] enw_cnt Number of threads to be used */ UNIV_INTERN void -fil_crypt_threads_init(); +fil_crypt_set_thread_cnt( + uint new_cnt); /********************************************************************* -Set thread count (e.g start or stops threads) used for key rotation */ +Adjust max key age +@param[in] val New max key age */ UNIV_INTERN void -fil_crypt_set_thread_cnt( -/*=====================*/ - uint new_cnt); /*!< in: requested #threads */ +fil_crypt_set_rotate_key_age( + uint val); /********************************************************************* -Cleanup resources for threads for key rotation */ +Adjust rotation iops +@param[in] val New max roation iops */ UNIV_INTERN void -fil_crypt_threads_cleanup(); +fil_crypt_set_rotation_iops( + uint val); /********************************************************************* -Set rotate key age */ +Adjust encrypt tables +@param[in] val New setting for innodb-encrypt-tables */ UNIV_INTERN void -fil_crypt_set_rotate_key_age( -/*=========================*/ - uint rotate_age); /*!< in: requested rotate age */ +fil_crypt_set_encrypt_tables( + uint val); /********************************************************************* -Set rotation threads iops */ +Init threads for key rotation */ UNIV_INTERN void -fil_crypt_set_rotation_iops( -/*========================*/ - uint iops); /*!< in: requested iops */ +fil_crypt_threads_init(); /********************************************************************* -Mark a space as closing */ +Clean up key rotation threads resources */ UNIV_INTERN void -fil_space_crypt_mark_space_closing( -/*===============================*/ - ulint space, /*!< in: tablespace id */ - fil_space_crypt_t* crypt_data); /*!< in: crypt_data or NULL */ +fil_crypt_threads_cleanup(); /********************************************************************* -Wait for crypt threads to stop accessing space */ +Wait for crypt threads to stop accessing space +@param[in] space Tablespace */ UNIV_INTERN void fil_space_crypt_close_tablespace( -/*=============================*/ - ulint space); /*!< in: tablespace id */ - -/** Struct for retreiving info about encryption */ -struct fil_space_crypt_status_t { - ulint space; /*!< tablespace id */ - ulint scheme; /*!< encryption scheme */ - uint min_key_version; /*!< min key version */ - uint current_key_version;/*!< current key version */ - uint keyserver_requests;/*!< no of key requests to key server */ - ulint key_id; /*!< current key_id */ - bool rotating; /*!< is key rotation ongoing */ - bool flushing; /*!< is flush at end of rotation ongoing */ - ulint rotate_next_page_number; /*!< next page if key rotating */ - ulint rotate_max_page_number; /*!< max page if key rotating */ -}; + const fil_space_t* space); /********************************************************************* -Get crypt status for a space -@return 0 if crypt data found */ +Get crypt status for a space (used by information_schema) +@param[in] space Tablespace +@param[out] status Crypt status +return 0 if crypt data present */ UNIV_INTERN -int +void fil_space_crypt_get_status( -/*=======================*/ - ulint id, /*!< in: space id */ - struct fil_space_crypt_status_t * status); /*!< out: status */ - -/** Struct for retreiving statistics about encryption key rotation */ -struct fil_crypt_stat_t { - ulint pages_read_from_cache; - ulint pages_read_from_disk; - ulint pages_modified; - ulint pages_flushed; - ulint estimated_iops; -}; + const fil_space_t* space, + struct fil_space_crypt_status_t* status); /********************************************************************* -Get crypt rotation statistics */ +Return crypt statistics +@param[out] stat Crypt statistics */ UNIV_INTERN void fil_crypt_total_stat( -/*==================*/ - fil_crypt_stat_t* stat); /*!< out: crypt stat */ - -/** Struct for retreiving info about scrubbing */ -struct fil_space_scrub_status_t { - ulint space; /*!< tablespace id */ - bool compressed; /*!< is space compressed */ - time_t last_scrub_completed; /*!< when was last scrub completed */ - bool scrubbing; /*!< is scrubbing ongoing */ - time_t current_scrub_started; /*!< when started current scrubbing */ - ulint current_scrub_active_threads; /*!< current scrub active threads */ - ulint current_scrub_page_number; /*!< current scrub page no */ - ulint current_scrub_max_page_number; /*!< current scrub max page no */ -}; + fil_crypt_stat_t *stat); -/********************************************************************* -Get scrub status for a space -@return 0 if no scrub info found */ -UNIV_INTERN -int -fil_space_get_scrub_status( -/*=======================*/ - ulint id, /*!< in: space id */ - struct fil_space_scrub_status_t * status); /*!< out: status */ +/** +Get scrub status for a space (used by information_schema) -/********************************************************************* -Adjust encrypt tables */ +@param[in] space Tablespace +@param[out] status Scrub status +return 0 if data found */ UNIV_INTERN void -fil_crypt_set_encrypt_tables( -/*=========================*/ - uint val); /*!< in: New srv_encrypt_tables setting */ +fil_space_get_scrub_status( + const fil_space_t* space, + fil_space_scrub_status_t* status); -/****************************************************************** -Encrypt a buffer */ -UNIV_INTERN -byte* -fil_encrypt_buf( -/*============*/ - fil_space_crypt_t* crypt_data, /*!< in: crypt data */ - ulint space, /*!< in: Space id */ - ulint offset, /*!< in: Page offset */ - lsn_t lsn, /*!< in: lsn */ - byte* src_frame, /*!< in: Source page to be encrypted */ - const page_size_t& page_size, /*!< in: page size */ - byte* dst_frame) /*!< in: outbut buffer */ - __attribute__((warn_unused_result)); +#include "fil0crypt.ic" +#endif /* !UNIV_INNOCHECKSUM */ -/****************************************************************** -Calculate post encryption checksum -@return page checksum or BUF_NO_CHECKSUM_MAGIC -not needed. */ +/** +Verify that post encryption checksum match calculated checksum. +This function should be called only if tablespace contains crypt_data +metadata (this is strong indication that tablespace is encrypted). +Function also verifies that traditional checksum does not match +calculated checksum as if it does page could be valid unencrypted, +encrypted, or corrupted. + +@param[in,out] page page frame (checksum is temporarily modified) +@param[in] page_size page size +@param[in] space tablespace identifier +@param[in] offset page number +@return true if page is encrypted AND OK, false otherwise */ UNIV_INTERN -ulint -fil_crypt_calculate_checksum( -/*=========================*/ - const page_size_t& page_size, /*!< in: page size */ - byte* dst_frame) /*!< in: page where to calculate */ - __attribute__((warn_unused_result)); - -#include "fil0crypt.ic" +bool +fil_space_verify_crypt_checksum( + byte* page, + const page_size_t& page_size, +#ifdef UNIV_INNOCHECKSUM + bool strict_check, /*!< --strict-check */ + FILE* log_file, /*!< --log */ +#endif /* UNIV_INNOCHECKSUM */ + ulint space, + ulint offset) + MY_ATTRIBUTE((warn_unused_result)); #endif /* fil0crypt_h */ diff --git a/storage/innobase/include/fil0crypt.ic b/storage/innobase/include/fil0crypt.ic index fe3a21f0643..6b83521cdde 100644 --- a/storage/innobase/include/fil0crypt.ic +++ b/storage/innobase/include/fil0crypt.ic @@ -36,38 +36,6 @@ fil_page_is_encrypted( } /*******************************************************************//** -Find out whether the page can be decrypted. -The function for decrypting the page should already be executed before this. -@return 1 if key provider not available or key is not available - 0 if decryption should be possible -*/ -UNIV_INLINE -bool -fil_page_encryption_status( -/*===================*/ - const byte *buf, /*!< in: page */ - ulint space_id) /*!< in: space_id */ -{ - fil_space_crypt_t *crypt_data = fil_space_get_crypt_data(space_id); - ulint page_type = mach_read_from_2(buf+FIL_PAGE_TYPE); - - if (page_type == FIL_PAGE_TYPE_FSP_HDR) { - if (crypt_data != NULL) { - if (!encryption_key_id_exists(crypt_data->key_id)) { - /* accessing table would surely fail, because no key or no key provider available */ - return 1; - } - } - } else { - unsigned key = mach_read_from_4(buf + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION); - if (!encryption_key_version_exists(crypt_data->key_id, key)) { - return 1; - } - } - return 0; -} - -/*******************************************************************//** Get current encryption mode from crypt_data. @return string representation */ UNIV_INLINE @@ -76,22 +44,16 @@ fil_crypt_get_mode( /*===============*/ const fil_space_crypt_t* crypt_data) { - ut_ad(crypt_data != NULL); - - switch(crypt_data->encryption) { - case FIL_SPACE_ENCRYPTION_DEFAULT: + switch (crypt_data->encryption) { + case FIL_ENCRYPTION_DEFAULT: return("Default tablespace encryption mode"); - break; - case FIL_SPACE_ENCRYPTION_ON: + case FIL_ENCRYPTION_ON: return("Tablespace encrypted"); - break; - case FIL_SPACE_ENCRYPTION_OFF: + case FIL_ENCRYPTION_OFF: return("Tablespace not encrypted"); - break; - default: - ut_error; } + ut_error; return ("NULL"); } diff --git a/storage/innobase/include/fil0fil.h b/storage/innobase/include/fil0fil.h index 66d79bd24b5..abd9ff9a9ed 100644 --- a/storage/innobase/include/fil0fil.h +++ b/storage/innobase/include/fil0fil.h @@ -1,7 +1,7 @@ /***************************************************************************** Copyright (c) 1995, 2016, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2017, MariaDB Corporation. +Copyright (c) 2013, 2017, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -28,6 +28,8 @@ Created 10/25/1995 Heikki Tuuri #define fil0fil_h #include "univ.i" +struct fil_space_t; + #ifndef UNIV_INNOCHECKSUM #include "log0recv.h" @@ -43,11 +45,11 @@ struct trx_t; class page_id_t; class truncate_t; -/* structure containing encryption specification */ -typedef struct fil_space_crypt_struct fil_space_crypt_t; - typedef std::list<char*, ut_allocator<char*> > space_name_list_t; +/** Structure containing encryption specification */ +struct fil_space_crypt_t; + /** File types */ enum fil_type_t { /** temporary tablespace (temporary undo log or tables) */ @@ -163,9 +165,13 @@ struct fil_space_t { unflushed_spaces */ UT_LIST_NODE_T(fil_space_t) space_list; /*!< list of all spaces */ + /** other tablespaces needing key rotation */ + UT_LIST_NODE_T(fil_space_t) rotation_list; + /** whether this tablespace needs key rotation */ + bool is_in_rotation_list; /** MariaDB encryption data */ - fil_space_crypt_t* crypt_data; + fil_space_crypt_t* crypt_data; /** tablespace crypt data has been read */ bool page_0_crypt_read; @@ -173,11 +179,8 @@ struct fil_space_t { /** True if we have already printed compression failure */ bool printed_compression_failure; - /** True if page 0 of tablespace is read */ - bool read_page0; - - /** True if the device this filespace is on supports atomic writes */ - bool atomic_write_supported; + /** True if the device this filespace is on supports atomic writes */ + bool atomic_write_supported; /** Release the reserved free extents. @param[in] n_reserved number of reserved extents */ @@ -188,6 +191,13 @@ struct fil_space_t { bool punch_hole; ulint magic_n;/*!< FIL_SPACE_MAGIC_N */ + + /** @return whether the tablespace is about to be dropped or + truncated */ + bool is_stopping() const + { + return stop_new_ops || is_being_truncated; + } }; /** Value of fil_space_t::magic_n */ @@ -419,6 +429,16 @@ index */ #ifndef UNIV_INNOCHECKSUM +/** Enum values for encryption table option */ +enum fil_encryption_t { + /** Encrypted if innodb_encrypt_tables=ON (srv_encrypt_tables) */ + FIL_ENCRYPTION_DEFAULT, + /** Encrypted */ + FIL_ENCRYPTION_ON, + /** Not encrypted */ + FIL_ENCRYPTION_OFF +}; + /** The number of fsyncs done to the log */ extern ulint fil_n_log_flushes; @@ -490,6 +510,10 @@ struct fil_system_t { record has been written since the latest redo log checkpoint. Protected only by log_sys->mutex. */ + UT_LIST_BASE_NODE_T(fil_space_t) rotation_list; + /*!< list of all file spaces needing + key rotation.*/ + ibool space_id_reuse_warned; /* !< TRUE if fil_space_create() has issued a warning about @@ -558,22 +582,25 @@ fil_node_create( MY_ATTRIBUTE((warn_unused_result)); /** Create a space memory object and put it to the fil_system hash table. -The tablespace name is independent from the tablespace file-name. Error messages are issued to the server log. -@param[in] name tablespace name -@param[in] id tablespace identifier -@param[in] flags tablespace flags -@param[in] purpose tablespace purpose +@param[in] name tablespace name +@param[in] id tablespace identifier +@param[in] flags tablespace flags +@param[in] purpose tablespace purpose +@param[in,out] crypt_data encryption information +@param[in] create_table whether this is CREATE TABLE +@param[in] mode encryption mode @return pointer to created tablespace, to be filled in with fil_node_create() @retval NULL on failure (such as when the same tablespace exists) */ fil_space_t* fil_space_create( - const char* name, - ulint id, - ulint flags, - fil_type_t purpose, /*!< in: FIL_TABLESPACE, or FIL_LOG if log */ - fil_space_crypt_t* crypt_data, /*!< in: crypt data */ - bool create_table) /*!< in: true if create table */ + const char* name, + ulint id, + ulint flags, + fil_type_t purpose, + fil_space_crypt_t* crypt_data, + bool create_table, + fil_encryption_t mode = FIL_ENCRYPTION_DEFAULT) MY_ATTRIBUTE((warn_unused_result)); /*******************************************************************//** @@ -730,6 +757,32 @@ void fil_space_release( fil_space_t* space); +/** Return the next fil_space_t. +Once started, the caller must keep calling this until it returns NULL. +fil_space_acquire() and fil_space_release() are invoked here which +blocks a concurrent operation from dropping the tablespace. +@param[in,out] prev_space Pointer to the previous fil_space_t. +If NULL, use the first fil_space_t on fil_system->space_list. +@return pointer to the next fil_space_t. +@retval NULL if this was the last */ +fil_space_t* +fil_space_next( + fil_space_t* prev_space) + MY_ATTRIBUTE((warn_unused_result)); + +/** Return the next fil_space_t from key rotation list. +Once started, the caller must keep calling this until it returns NULL. +fil_space_acquire() and fil_space_release() are invoked here which +blocks a concurrent operation from dropping the tablespace. +@param[in,out] prev_space Pointer to the previous fil_space_t. +If NULL, use the first fil_space_t on fil_system->space_list. +@return pointer to the next fil_space_t. +@retval NULL if this was the last*/ +fil_space_t* +fil_space_keyrotate_next( + fil_space_t* prev_space) + MY_ATTRIBUTE((warn_unused_result)); + /** Wrapper with reference-counting for a fil_space_t. */ class FilSpace { @@ -1420,16 +1473,10 @@ fil_mtr_rename_log( mtr_t* mtr) MY_ATTRIBUTE((warn_unused_result)); -/****************************************************************//** -Acquire fil_system mutex */ -void -fil_system_enter(void); -/*==================*/ -/****************************************************************//** -Release fil_system mutex */ -void -fil_system_exit(void); -/*==================*/ +/** Acquire the fil_system mutex. */ +#define fil_system_enter() mutex_enter(&fil_system->mutex) +/** Release the fil_system mutex. */ +#define fil_system_exit() mutex_exit(&fil_system->mutex) /*******************************************************************//** Returns the table space by a given id, NULL if not found. */ @@ -1445,36 +1492,6 @@ fil_space_get_by_id( /*================*/ ulint id); /*!< in: space id */ -/****************************************************************** -Get id of first tablespace or ULINT_UNDEFINED if none */ -UNIV_INTERN -ulint -fil_get_first_space(); -/*=================*/ - -/****************************************************************** -Get id of next tablespace or ULINT_UNDEFINED if none */ -UNIV_INTERN -ulint -fil_get_next_space( - ulint id); /*!< in: space id */ - -/****************************************************************** -Get id of first tablespace that has node or ULINT_UNDEFINED if none */ -UNIV_INTERN -ulint -fil_get_first_space_safe(); -/*======================*/ - -/****************************************************************** -Get id of next tablespace that has node or ULINT_UNDEFINED if none */ -UNIV_INTERN -ulint -fil_get_next_space_safe( -/*====================*/ - ulint id); /*!< in: previous space id */ - - /*******************************************************************//** by redo log. @param[in,out] space tablespace */ diff --git a/storage/innobase/include/fsp0types.h b/storage/innobase/include/fsp0types.h index ec33f2e4d10..cd2a07af4f0 100644 --- a/storage/innobase/include/fsp0types.h +++ b/storage/innobase/include/fsp0types.h @@ -197,13 +197,6 @@ fsp_is_system_temporary(ulint space_id) return(space_id == SRV_TMP_SPACE_ID); } -/** Check if checksum is disabled for the given space. -@param[in] space_id verify is checksum is enabled for given space. -@return true if checksum is disabled for given space. */ -bool -fsp_is_checksum_disabled( - ulint space_id); - #ifdef UNIV_DEBUG /** Skip some of the sanity checks that are time consuming even in debug mode and can affect frequent verification runs that are done to ensure stability of diff --git a/storage/innobase/include/ha_prototypes.h b/storage/innobase/include/ha_prototypes.h index f4446ca32d2..6992f4a6689 100644 --- a/storage/innobase/include/ha_prototypes.h +++ b/storage/innobase/include/ha_prototypes.h @@ -137,6 +137,13 @@ thd_has_edited_nontrans_tables( /*===========================*/ THD* thd); /*!< in: thread handle */ +/** +Get high resolution timestamp for the current query start time. + +@retval timestamp in microseconds precision +*/ +unsigned long long thd_query_start_micro(const MYSQL_THD thd); + /*************************************************************//** Prints info of a THD object (== user session thread) to the given file. */ void diff --git a/storage/innobase/include/mach0data.ic b/storage/innobase/include/mach0data.ic index 6c879b38354..34d375aa1e8 100644 --- a/storage/innobase/include/mach0data.ic +++ b/storage/innobase/include/mach0data.ic @@ -38,7 +38,6 @@ mach_write_to_1( byte* b, /*!< in: pointer to byte where to store */ ulint n) /*!< in: ulint integer to be stored, >= 0, < 256 */ { - ut_ad(b); ut_ad((n & ~0xFFUL) == 0); b[0] = (byte) n; @@ -56,7 +55,6 @@ mach_write_to_2( byte* b, /*!< in: pointer to two bytes where to store */ ulint n) /*!< in: ulint integer to be stored */ { - ut_ad(b); ut_ad((n & ~0xFFFFUL) == 0); b[0] = (byte)(n >> 8); @@ -71,7 +69,6 @@ uint8_t mach_read_from_1( const byte* b) { - ut_ad(b); return(uint8_t(*b)); } @@ -130,7 +127,6 @@ mach_write_to_3( byte* b, /*!< in: pointer to 3 bytes where to store */ ulint n) /*!< in: ulint integer to be stored */ { - ut_ad(b); ut_ad((n & ~0xFFFFFFUL) == 0); b[0] = (byte)(n >> 16); @@ -147,7 +143,6 @@ uint32_t mach_read_from_3( const byte* b) { - ut_ad(b); return( (static_cast<uint32_t>(b[0]) << 16) | (static_cast<uint32_t>(b[1]) << 8) | static_cast<uint32_t>(b[2]) @@ -165,8 +160,6 @@ mach_write_to_4( byte* b, /*!< in: pointer to four bytes where to store */ ulint n) /*!< in: ulint integer to be stored */ { - ut_ad(b); - b[0] = (byte)(n >> 24); b[1] = (byte)(n >> 16); b[2] = (byte)(n >> 8); @@ -182,7 +175,6 @@ uint32_t mach_read_from_4( const byte* b) { - ut_ad(b); return( (static_cast<uint32_t>(b[0]) << 24) | (static_cast<uint32_t>(b[1]) << 16) | (static_cast<uint32_t>(b[2]) << 8) @@ -207,8 +199,6 @@ mach_write_compressed( byte* b, /*!< in: pointer to memory where to store */ ulint n) /*!< in: ulint integer (< 2^32) to be stored */ { - ut_ad(b); - if (n < 0x80) { /* 0nnnnnnn (7 bits) */ mach_write_to_1(b, n); @@ -271,8 +261,6 @@ mach_read_compressed( { ulint val; - ut_ad(b); - val = mach_read_from_1(b); if (val < 0x80) { @@ -349,8 +337,6 @@ mach_write_to_8( void* b, /*!< in: pointer to 8 bytes where to store */ ib_uint64_t n) /*!< in: 64-bit integer to be stored */ { - ut_ad(b); - mach_write_to_4(static_cast<byte*>(b), (ulint) (n >> 32)); mach_write_to_4(static_cast<byte*>(b) + 4, (ulint) n); } @@ -388,8 +374,6 @@ mach_write_to_7( byte* b, /*!< in: pointer to 7 bytes where to store */ ib_uint64_t n) /*!< in: 56-bit integer */ { - ut_ad(b); - mach_write_to_3(b, (ulint) (n >> 32)); mach_write_to_4(b + 3, (ulint) n); } @@ -404,8 +388,6 @@ mach_read_from_7( /*=============*/ const byte* b) /*!< in: pointer to 7 bytes */ { - ut_ad(b); - return(ut_ull_create(mach_read_from_3(b), mach_read_from_4(b + 3))); } @@ -419,8 +401,6 @@ mach_write_to_6( byte* b, /*!< in: pointer to 6 bytes where to store */ ib_uint64_t n) /*!< in: 48-bit integer */ { - ut_ad(b); - mach_write_to_2(b, (ulint) (n >> 32)); mach_write_to_4(b + 2, (ulint) n); } @@ -435,8 +415,6 @@ mach_read_from_6( /*=============*/ const byte* b) /*!< in: pointer to 6 bytes */ { - ut_ad(b); - return(ut_ull_create(mach_read_from_2(b), mach_read_from_4(b + 2))); } @@ -450,11 +428,7 @@ mach_u64_write_compressed( byte* b, /*!< in: pointer to memory where to store */ ib_uint64_t n) /*!< in: 64-bit integer to be stored */ { - ulint size; - - ut_ad(b); - - size = mach_write_compressed(b, (ulint) (n >> 32)); + ulint size = mach_write_compressed(b, (ulint) (n >> 32)); mach_write_to_4(b + size, (ulint) n); return(size + 4); @@ -490,8 +464,6 @@ mach_u64_write_much_compressed( { ulint size; - ut_ad(b); - if (!(n >> 32)) { return(mach_write_compressed(b, (ulint) n)); } diff --git a/storage/innobase/include/os0file.h b/storage/innobase/include/os0file.h index f37e735de4d..6a82c06e3ed 100644 --- a/storage/innobase/include/os0file.h +++ b/storage/innobase/include/os0file.h @@ -2,7 +2,7 @@ Copyright (c) 1995, 2016, Oracle and/or its affiliates. All Rights Reserved. Copyright (c) 2009, Percona Inc. -Copyright (c) 2013, 2017, MariaDB Corporation. All Rights Reserved. +Copyright (c) 2013, 2017, MariaDB Corporation. Portions of this file contain modifications contributed and copyrighted by Percona Inc.. Those modifications are diff --git a/storage/innobase/include/page0page.ic b/storage/innobase/include/page0page.ic index 3940931125e..7250a96bb5d 100644 --- a/storage/innobase/include/page0page.ic +++ b/storage/innobase/include/page0page.ic @@ -218,7 +218,6 @@ page_header_get_offs( { ulint offs; - ut_ad(page); ut_ad((field == PAGE_FREE) || (field == PAGE_LAST_INSERT) || (field == PAGE_HEAP_TOP)); diff --git a/storage/innobase/include/srv0srv.h b/storage/innobase/include/srv0srv.h index dd77738584b..e469cd90737 100644 --- a/storage/innobase/include/srv0srv.h +++ b/storage/innobase/include/srv0srv.h @@ -3,7 +3,7 @@ Copyright (c) 1995, 2016, Oracle and/or its affiliates. All rights reserved. Copyright (c) 2008, 2009, Google Inc. Copyright (c) 2009, Percona Inc. -Copyright (c) 2013, 2017, MariaDB Corporation. All Rights Reserved. +Copyright (c) 2013, 2017, MariaDB Corporation. Portions of this file contain modifications contributed and copyrighted by Google, Inc. Those modifications are gratefully acknowledged and are described @@ -181,6 +181,9 @@ struct srv_stats_t { /** Number of log scrub operations */ ulint_ctr_64_t n_log_scrubs; + + /** Number of spaces in keyrotation list */ + ulint_ctr_64_t key_rotation_list_length; }; extern const char* srv_main_thread_op_info; @@ -1050,6 +1053,7 @@ struct export_var_t{ ulint innodb_encryption_rotation_pages_flushed; ulint innodb_encryption_rotation_estimated_iops; int64_t innodb_encryption_key_requests; + int64_t innodb_key_rotation_list_length; ulint innodb_scrub_page_reorganizations; ulint innodb_scrub_page_splits; diff --git a/storage/innobase/include/trx0trx.h b/storage/innobase/include/trx0trx.h index 7d9db25ddce..2b42d15731d 100644 --- a/storage/innobase/include/trx0trx.h +++ b/storage/innobase/include/trx0trx.h @@ -1104,8 +1104,8 @@ struct trx_t { time_t start_time; /*!< time the state last time became TRX_STATE_ACTIVE */ - clock_t start_time_micro; /*!< start time of the transaction - in microseconds. */ + ib_uint64_t start_time_micro; /*!< start time of transaction in + microseconds */ lsn_t commit_lsn; /*!< lsn at the time of the commit */ table_id_t table_id; /*!< Table to drop iff dict_operation == TRX_DICT_OP_TABLE, or 0. */ diff --git a/storage/innobase/include/ut0ut.h b/storage/innobase/include/ut0ut.h index bf127517051..3027004dbaf 100644 --- a/storage/innobase/include/ut0ut.h +++ b/storage/innobase/include/ut0ut.h @@ -72,9 +72,7 @@ typedef time_t ib_time_t; # define UT_RELAX_CPU() YieldProcessor() #elif defined(__powerpc__) && defined __GLIBC__ # include <sys/platform/ppc.h> -# define UT_RELAX_CPU() do { \ - volatile lint volatile_var = __ppc_get_timebase(); \ - } while (0) +# define UT_RELAX_CPU() __ppc_get_timebase() #else # define UT_RELAX_CPU() do { \ volatile int32 volatile_var; \ diff --git a/storage/innobase/lock/lock0lock.cc b/storage/innobase/lock/lock0lock.cc index a37ad19e81d..134853dd0d7 100644 --- a/storage/innobase/lock/lock0lock.cc +++ b/storage/innobase/lock/lock0lock.cc @@ -800,7 +800,7 @@ lock_reset_lock_and_trx_wait( stmt2 = innobase_get_stmt_unsafe(lock->trx->lock.wait_lock->trx->mysql_thd, &stmt_len); } - ib::info() << + ib::error() << "Trx id " << lock->trx->id << " is waiting a lock in statement " << (stmt ? stmt : "NULL") @@ -808,7 +808,7 @@ lock_reset_lock_and_trx_wait( << " and statement " << (stmt2 ? stmt2 : "NULL") << "wait_lock " << lock->trx->lock.wait_lock; - ut_ad(lock->trx->lock.wait_lock != lock); + ut_ad(0); } lock->trx->lock.wait_lock = NULL; diff --git a/storage/innobase/log/log0recv.cc b/storage/innobase/log/log0recv.cc index e6d6f4b0af2..adf2d3aca0a 100644 --- a/storage/innobase/log/log0recv.cc +++ b/storage/innobase/log/log0recv.cc @@ -1495,7 +1495,7 @@ parse_log: } break; case MLOG_FILE_WRITE_CRYPT_DATA: - ptr = fil_parse_write_crypt_data(ptr, end_ptr, block); + ptr = const_cast<byte*>(fil_parse_write_crypt_data(ptr, end_ptr, block)); break; default: ptr = NULL; @@ -1911,7 +1911,7 @@ recv_recover_page(bool just_read_in, buf_block_t* block) ib_time_t time = ut_time(); - mutex_enter(&(recv_sys->mutex)); + mutex_enter(&recv_sys->mutex); if (recv_max_page_lsn < page_lsn) { recv_max_page_lsn = page_lsn; @@ -2013,9 +2013,8 @@ recv_apply_hashed_log_recs(bool last_batch) recv_sys->apply_batch_on = TRUE; for (ulint i = 0; i < hash_get_n_cells(recv_sys->addr_hash); i++) { - for (recv_addr_t* recv_addr = static_cast<recv_addr_t*>( - HASH_GET_FIRST(recv_sys->addr_hash, i)); + HASH_GET_FIRST(recv_sys->addr_hash, i)); recv_addr; recv_addr = static_cast<recv_addr_t*>( HASH_GET_NEXT(addr_hash, recv_addr))) { @@ -2064,7 +2063,7 @@ recv_apply_hashed_log_recs(bool last_batch) recv_read_in_area(page_id); } - mutex_enter(&(recv_sys->mutex)); + mutex_enter(&recv_sys->mutex); } } } diff --git a/storage/innobase/os/os0file.cc b/storage/innobase/os/os0file.cc index 7c118bde46d..43f04186f0e 100644 --- a/storage/innobase/os/os0file.cc +++ b/storage/innobase/os/os0file.cc @@ -2,7 +2,7 @@ Copyright (c) 1995, 2016, Oracle and/or its affiliates. All Rights Reserved. Copyright (c) 2009, Percona Inc. -Copyright (c) 2013, 2017, MariaDB Corporation. All Rights Reserved. +Copyright (c) 2012, 2017, MariaDB Corporation. Portions of this file contain modifications contributed and copyrighted by Percona Inc.. Those modifications are @@ -678,22 +678,22 @@ static ulint os_aio_n_segments = ULINT_UNDEFINED; /** If the following is true, read i/o handler threads try to wait until a batch of new read requests have been posted */ -static bool os_aio_recommend_sleep_for_read_threads = false; - -ulint os_n_file_reads = 0; -static ulint os_bytes_read_since_printout = 0; -ulint os_n_file_writes = 0; -ulint os_n_fsyncs = 0; -static ulint os_n_file_reads_old = 0; -static ulint os_n_file_writes_old = 0; -static ulint os_n_fsyncs_old = 0; +static bool os_aio_recommend_sleep_for_read_threads; + +ulint os_n_file_reads; +static ulint os_bytes_read_since_printout; +ulint os_n_file_writes; +ulint os_n_fsyncs; +static ulint os_n_file_reads_old; +static ulint os_n_file_writes_old; +static ulint os_n_fsyncs_old; /** Number of pending write operations */ -ulint os_n_pending_writes = 0; +ulint os_n_pending_writes; /** Number of pending read operations */ -ulint os_n_pending_reads = 0; +ulint os_n_pending_reads; static time_t os_last_printout; -bool os_has_said_disk_full = false; +bool os_has_said_disk_full; /** Default Zip compression level */ extern uint page_zip_level; @@ -6085,9 +6085,7 @@ void os_aio_wake_all_threads_at_shutdown() { #ifdef WIN_ASYNC_IO - AIO::wake_at_shutdown(); - #elif defined(LINUX_NATIVE_AIO) /* When using native AIO interface the io helper threads wait on io_getevents with a timeout value of 500ms. At diff --git a/storage/innobase/page/page0page.cc b/storage/innobase/page/page0page.cc index 437b6c01a41..3765f8112a1 100644 --- a/storage/innobase/page/page0page.cc +++ b/storage/innobase/page/page0page.cc @@ -1493,7 +1493,6 @@ page_dir_split_slot( ulint i; ulint n_owned; - ut_ad(page); ut_ad(!page_zip || page_is_comp(page)); ut_ad(slot_no > 0); @@ -1554,7 +1553,6 @@ page_dir_balance_slot( rec_t* old_rec; rec_t* new_rec; - ut_ad(page); ut_ad(!page_zip || page_is_comp(page)); ut_ad(slot_no > 0); diff --git a/storage/innobase/pars/pars0pars.cc b/storage/innobase/pars/pars0pars.cc index 39fd84a2b96..21325cac12a 100644 --- a/storage/innobase/pars/pars0pars.cc +++ b/storage/innobase/pars/pars0pars.cc @@ -1931,7 +1931,7 @@ pars_create_table( } node = tab_create_graph_create(table, pars_sym_tab_global->heap, - FIL_SPACE_ENCRYPTION_DEFAULT, FIL_DEFAULT_ENCRYPTION_KEY); + FIL_ENCRYPTION_DEFAULT, FIL_DEFAULT_ENCRYPTION_KEY); table_sym->resolved = TRUE; table_sym->token_type = SYM_TABLE; diff --git a/storage/innobase/row/row0ftsort.cc b/storage/innobase/row/row0ftsort.cc index 0bf38503a2c..19e1ba926d0 100644 --- a/storage/innobase/row/row0ftsort.cc +++ b/storage/innobase/row/row0ftsort.cc @@ -225,7 +225,14 @@ row_fts_psort_info_init( common_info->sort_event = os_event_create(0); common_info->merge_event = os_event_create(0); common_info->opt_doc_id_size = opt_doc_id_size; - crypt_data = fil_space_get_crypt_data(new_table->space); + + /* Theoretically the tablespace can be dropped straight away. + In practice, the DDL completion will wait for this thread to + finish. */ + if (fil_space_t* space = fil_space_acquire(new_table->space)) { + crypt_data = space->crypt_data; + fil_space_release(space); + } if (crypt_data && crypt_data->should_encrypt()) { common_info->crypt_data = crypt_data; diff --git a/storage/innobase/row/row0import.cc b/storage/innobase/row/row0import.cc index 9bd5cedc8bb..b70a1952f97 100644 --- a/storage/innobase/row/row0import.cc +++ b/storage/innobase/row/row0import.cc @@ -1958,14 +1958,16 @@ PageConverter::validate( buf_block_t* block) UNIV_NOTHROW { buf_frame_t* page = get_frame(block); + ulint space_id = mach_read_from_4( + page + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID); + fil_space_t* space = fil_space_found_by_id(space_id); /* Check that the page number corresponds to the offset in the file. Flag as corrupt if it doesn't. Disable the check for LSN in buf_page_is_corrupted() */ if (buf_page_is_corrupted( - false, page, get_page_size(), - fsp_is_checksum_disabled(block->page.id.space())) + false, page, get_page_size(), space) || (page_get_page_no(page) != offset / m_page_size.physical() && page_get_page_no(page) != 0)) { @@ -2028,9 +2030,7 @@ PageConverter::operator() ( !is_compressed_table() ? block->frame : block->page.zip.data, !is_compressed_table() ? 0 : m_page_zip_ptr, - m_current_lsn, - fsp_is_checksum_disabled( - block->page.id.space())); + m_current_lsn); } else { /* Calculate and update the checksum of non-btree pages for compressed tables explicitly here. */ diff --git a/storage/innobase/row/row0merge.cc b/storage/innobase/row/row0merge.cc index ef832c38b95..85f0ce2c9e6 100644 --- a/storage/innobase/row/row0merge.cc +++ b/storage/innobase/row/row0merge.cc @@ -298,13 +298,13 @@ row_merge_encrypt_buf( space, ofs, 0); if (! ((rc == MY_AES_OK) && ((ulint)dstlen == srv_sort_buf_size-ROW_MERGE_RESERVE_SIZE))) { - ib::error() + ib::fatal() << "Unable to encrypt data-block " - " src: %p srclen: %lu buf: %p buflen: %u." - << srv_sort_buf_size << " buf: " << crypted_buf + " src: " << static_cast<const void*>(input_buf) + << " srclen: " << srv_sort_buf_size + << " buf: " << static_cast<const void*>(crypted_buf) << " buflen: " << dstlen - << " return-code: " << rc << " Can't continue!"; - ut_error; + << ". return-code: " << rc << ". Can't continue!"; } } @@ -340,13 +340,13 @@ row_merge_decrypt_buf( space, ofs, 0); if (! ((rc == MY_AES_OK) && ((ulint)dstlen == srv_sort_buf_size-ROW_MERGE_RESERVE_SIZE))) { - ib::error() + ib::fatal() << "Unable to decrypt data-block " - << " src: " << input_buf << " srclen: " - << srv_sort_buf_size << " buf: " << crypted_buf + << " src: " << static_cast<const void*>(input_buf) + << " srclen: " << srv_sort_buf_size + << " buf: " << static_cast<const void*>(crypted_buf) << " buflen: " << dstlen - << " return-code: " << rc << " Can't continue!"; - ut_error; + << ". return-code: " << rc << ". Can't continue!"; } return true; @@ -1268,14 +1268,8 @@ row_merge_read_rec( ulint data_size; ulint avail_size; - ut_ad(block); - ut_ad(buf); ut_ad(b >= &block[0]); ut_ad(b < &block[srv_sort_buf_size]); - ut_ad(index); - ut_ad(foffs); - ut_ad(mrec); - ut_ad(offsets); ut_ad(*offsets == 1 + REC_OFFS_HEADER_SIZE + dict_index_get_n_fields(index)); @@ -4627,7 +4621,7 @@ row_merge_build_indexes( row_merge_block_t* block; ut_new_pfx_t block_pfx; ut_new_pfx_t crypt_pfx; - row_merge_block_t* crypt_block; + row_merge_block_t* crypt_block = NULL; ulint i; ulint j; dberr_t error; @@ -4668,9 +4662,15 @@ row_merge_build_indexes( DBUG_RETURN(DB_OUT_OF_MEMORY); } - /* Get crypt data from tablespace if present. */ - crypt_data = fil_space_get_crypt_data(new_table->space); - crypt_block = NULL; + /* Get crypt data from tablespace if present. We should be protected + from concurrent DDL (e.g. drop table) by MDL-locks. */ + FilSpace space(new_table->space); + + if (const fil_space_t* fs = space()) { + crypt_data = fs->crypt_data; + } else { + DBUG_RETURN(DB_TABLESPACE_NOT_FOUND); + } /* If tablespace is encrypted, allocate additional buffer for encryption/decryption. */ diff --git a/storage/innobase/row/row0mysql.cc b/storage/innobase/row/row0mysql.cc index d3fb6db880c..8b7c64868b8 100644 --- a/storage/innobase/row/row0mysql.cc +++ b/storage/innobase/row/row0mysql.cc @@ -3434,9 +3434,6 @@ fil_wait_crypt_bg_threads( { time_t start = time(0); time_t last = start; - if (table->space != 0) { - fil_space_crypt_mark_space_closing(table->space, table->crypt_data); - } while (table->get_ref_count()> 0) { dict_mutex_exit_for_mysql(); @@ -3448,14 +3445,14 @@ fil_wait_crypt_bg_threads( ib::warn() << "Waited " << now - start << " seconds for ref-count on table: " - << table->name.m_name << " space: " << table->space; + << table->name << " space: " << table->space; last = now; } if (now >= start + 300) { ib::warn() << "After " << now - start << " seconds, gave up waiting " - << "for ref-count on table: " << table->name.m_name + << "for ref-count on table: " << table->name << " space: " << table->space; break; } @@ -3905,7 +3902,14 @@ row_drop_table_for_mysql( /* If table has not yet have crypt_data, try to read it to make freeing the table easier. */ if (!table->crypt_data) { - table->crypt_data = fil_space_get_crypt_data(table->space); + if (fil_space_t* space = fil_space_acquire_silent( + table->space)) { + /* We use crypt data in dict_table_t + in ha_innodb.cc to push warnings to + user thread. */ + table->crypt_data = space->crypt_data; + fil_space_release(space); + } } /* We use the private SQL parser of Innobase to generate the diff --git a/storage/innobase/row/row0trunc.cc b/storage/innobase/row/row0trunc.cc index 152970d9413..46cff288059 100644 --- a/storage/innobase/row/row0trunc.cc +++ b/storage/innobase/row/row0trunc.cc @@ -2274,7 +2274,7 @@ truncate_t::truncate_t( m_log_lsn(), m_log_file_name(), /* JAN: TODO: Encryption */ - m_encryption(FIL_SPACE_ENCRYPTION_DEFAULT), + m_encryption(FIL_ENCRYPTION_DEFAULT), m_key_id(FIL_DEFAULT_ENCRYPTION_KEY) { if (dir_path != NULL) { @@ -2301,7 +2301,7 @@ truncate_t::truncate_t( m_log_lsn(), m_log_file_name(), /* JAN: TODO: Encryption */ - m_encryption(FIL_SPACE_ENCRYPTION_DEFAULT), + m_encryption(FIL_ENCRYPTION_DEFAULT), m_key_id(FIL_DEFAULT_ENCRYPTION_KEY) { diff --git a/storage/innobase/row/row0upd.cc b/storage/innobase/row/row0upd.cc index 9c159c6bd15..18ea3cf3cf8 100644 --- a/storage/innobase/row/row0upd.cc +++ b/storage/innobase/row/row0upd.cc @@ -1285,8 +1285,6 @@ row_upd_index_replace_new_col_vals_index_pos( ulint n_fields; const page_size_t& page_size = dict_table_page_size(index->table); - ut_ad(index); - dtuple_set_info_bits(entry, update->info_bits); if (order_only) { @@ -1637,8 +1635,6 @@ row_upd_changes_ord_field_binary_func( ulint i; const dict_index_t* clust_index; - ut_ad(index); - ut_ad(update); ut_ad(thr); ut_ad(thr->graph); ut_ad(thr->graph->trx); diff --git a/storage/innobase/srv/srv0srv.cc b/storage/innobase/srv/srv0srv.cc index 728a5415be9..b689cd012c6 100644 --- a/storage/innobase/srv/srv0srv.cc +++ b/storage/innobase/srv/srv0srv.cc @@ -954,14 +954,12 @@ srv_release_threads(enum srv_thread_type type, ulint n) ut_ad(n > 0); do { - srv_sys_mutex_enter(); - running = 0; - for (ulint i = 0; i < srv_sys->n_sys_threads; i++) { - srv_slot_t* slot; + srv_sys_mutex_enter(); - slot = &srv_sys->sys_threads[i]; + for (ulint i = 0; i < srv_sys->n_sys_threads; i++) { + srv_slot_t* slot = &srv_sys->sys_threads[i]; if (!slot->in_use || srv_slot_get_type(slot) != type) { continue; @@ -1692,6 +1690,8 @@ srv_export_innodb_status(void) crypt_stat.estimated_iops; export_vars.innodb_encryption_key_requests = srv_stats.n_key_requests; + export_vars.innodb_key_rotation_list_length = + srv_stats.key_rotation_list_length; export_vars.innodb_scrub_page_reorganizations = scrub_stat.page_reorganizations; diff --git a/storage/innobase/srv/srv0start.cc b/storage/innobase/srv/srv0start.cc index 077b93e9327..7c16ce1e58f 100644 --- a/storage/innobase/srv/srv0start.cc +++ b/storage/innobase/srv/srv0start.cc @@ -3,7 +3,7 @@ Copyright (c) 1996, 2016, Oracle and/or its affiliates. All rights reserved. Copyright (c) 2008, Google Inc. Copyright (c) 2009, Percona Inc. -Copyright (c) 2013, 2017, MariaDB Corporation +Copyright (c) 2013, 2017, MariaDB Corporation. Portions of this file contain modifications contributed and copyrighted by Google, Inc. Those modifications are gratefully acknowledged and are described @@ -450,6 +450,7 @@ create_log_files( "innodb_redo_log", SRV_LOG_SPACE_FIRST_ID, 0, FIL_TYPE_LOG, NULL, /* innodb_encrypt_log works at a different level */ true /* this is create */); + ut_a(fil_validate()); ut_a(log_space != NULL); @@ -541,9 +542,10 @@ create_log_files_rename( DBUG_EXECUTE_IF("innodb_log_abort_10", err = DB_ERROR;); - fil_open_log_and_system_tablespace_files(); - - ib::info() << "New log files created, LSN=" << lsn; + if (err == DB_SUCCESS) { + fil_open_log_and_system_tablespace_files(); + ib::info() << "New log files created, LSN=" << lsn; + } return(err); } diff --git a/storage/innobase/trx/trx0trx.cc b/storage/innobase/trx/trx0trx.cc index f7a488d7507..4374041982c 100644 --- a/storage/innobase/trx/trx0trx.cc +++ b/storage/innobase/trx/trx0trx.cc @@ -1464,7 +1464,8 @@ trx_start_low( ut_a(trx->error_state == DB_SUCCESS); - trx->start_time_micro = clock(); + trx->start_time_micro = + trx->mysql_thd ? thd_query_start_micro(trx->mysql_thd) : 0; MONITOR_INC(MONITOR_TRX_ACTIVE); } diff --git a/storage/innobase/trx/trx0undo.cc b/storage/innobase/trx/trx0undo.cc index d92f6641910..92e18c1c372 100644 --- a/storage/innobase/trx/trx0undo.cc +++ b/storage/innobase/trx/trx0undo.cc @@ -1,7 +1,7 @@ /***************************************************************************** Copyright (c) 1996, 2016, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2017, MariaDB Corporation. +Copyright (c) 2014, 2017, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software |