diff options
Diffstat (limited to 'storage/xtradb/buf/buf0buf.cc')
-rw-r--r-- | storage/xtradb/buf/buf0buf.cc | 769 |
1 files changed, 529 insertions, 240 deletions
diff --git a/storage/xtradb/buf/buf0buf.cc b/storage/xtradb/buf/buf0buf.cc index fbd7aeb581a..85023be9402 100644 --- a/storage/xtradb/buf/buf0buf.cc +++ b/storage/xtradb/buf/buf0buf.cc @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1995, 2014, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 1995, 2015, Oracle and/or its affiliates. All Rights Reserved. Copyright (c) 2008, Google Inc. Copyright (c) 2013, 2015, MariaDB Corporation. All Rights Reserved. @@ -59,11 +59,24 @@ Created 11/5/1995 Heikki Tuuri #include "srv0start.h" #include "ut0byte.h" #include "fil0pagecompress.h" +#include "ha_prototypes.h" /* prototypes for new functions added to ha_innodb.cc */ trx_t* innobase_get_trx(); +/********************************************************************//** +Check if page is maybe compressed, encrypted or both when we encounter +corrupted page. Note that we can't be 100% sure if page is corrupted +or decrypt/decompress just failed. +*/ +static +ibool +buf_page_check_corrupt( +/*===================*/ + buf_page_t* bpage); /*!< in/out: buffer page read from + disk */ + static inline void _increment_page_get_statistics(buf_block_t* block, trx_t* trx) @@ -99,10 +112,6 @@ _increment_page_get_statistics(buf_block_t* block, trx_t* trx) #include "lzo/lzo1x.h" #endif -/* Number of temporary slots used for encryption/compression -memory allocation before/after I/O operations */ -#define BUF_MAX_TMP_SLOTS 200 - /* IMPLEMENTATION OF THE BUFFER POOL ================================= @@ -568,6 +577,79 @@ buf_page_is_zeroes( return(true); } +/** Checks if the page is in crc32 checksum format. +@param[in] read_buf database page +@param[in] checksum_field1 new checksum field +@param[in] checksum_field2 old checksum field +@return true if the page is in crc32 checksum format */ +UNIV_INLINE +bool +buf_page_is_checksum_valid_crc32( + const byte* read_buf, + ulint checksum_field1, + ulint checksum_field2) +{ + ib_uint32_t crc32 = buf_calc_page_crc32(read_buf); + + return(checksum_field1 == crc32 && checksum_field2 == crc32); +} + +/** Checks if the page is in innodb checksum format. +@param[in] read_buf database page +@param[in] checksum_field1 new checksum field +@param[in] checksum_field2 old checksum field +@return true if the page is in innodb checksum format */ +UNIV_INLINE +bool +buf_page_is_checksum_valid_innodb( + const byte* read_buf, + ulint checksum_field1, + ulint checksum_field2) +{ + /* There are 2 valid formulas for + checksum_field2 (old checksum field) which algo=innodb could have + written to the page: + + 1. Very old versions of InnoDB only stored 8 byte lsn to the + start and the end of the page. + + 2. Newer InnoDB versions store the old formula checksum + (buf_calc_page_old_checksum()). */ + + if (checksum_field2 != mach_read_from_4(read_buf + FIL_PAGE_LSN) + && checksum_field2 != buf_calc_page_old_checksum(read_buf)) { + return(false); + } + + /* old field is fine, check the new field */ + + /* InnoDB versions < 4.0.14 and < 4.1.1 stored the space id + (always equal to 0), to FIL_PAGE_SPACE_OR_CHKSUM */ + + if (checksum_field1 != 0 + && checksum_field1 != buf_calc_page_new_checksum(read_buf)) { + return(false); + } + + return(true); +} + +/** Checks if the page is in none checksum format. +@param[in] read_buf database page +@param[in] checksum_field1 new checksum field +@param[in] checksum_field2 old checksum field +@return true if the page is in none checksum format */ +UNIV_INLINE +bool +buf_page_is_checksum_valid_none( + const byte* read_buf, + ulint checksum_field1, + ulint checksum_field2) +{ + return(checksum_field1 == checksum_field2 + && checksum_field1 == BUF_NO_CHECKSUM_MAGIC); +} + /********************************************************************//** Checks if a page is corrupt. @return TRUE if corrupted */ @@ -584,8 +666,6 @@ buf_page_is_corrupted( ulint page_encrypted = fil_page_is_encrypted(read_buf); ulint checksum_field1; ulint checksum_field2; - ibool crc32_inited = FALSE; - ib_uint32_t crc32 = ULINT32_UNDEFINED; if (!page_encrypted && !zip_size && memcmp(read_buf + FIL_PAGE_LSN + 4, @@ -668,148 +748,121 @@ buf_page_is_corrupted( return(FALSE); } - switch ((srv_checksum_algorithm_t) srv_checksum_algorithm) { - case SRV_CHECKSUM_ALGORITHM_STRICT_CRC32: - - crc32 = buf_calc_page_crc32(read_buf); - - return(checksum_field1 != crc32 || checksum_field2 != crc32); - - case SRV_CHECKSUM_ALGORITHM_STRICT_INNODB: - - return(checksum_field1 - != buf_calc_page_new_checksum(read_buf) - || checksum_field2 - != buf_calc_page_old_checksum(read_buf)); - - case SRV_CHECKSUM_ALGORITHM_STRICT_NONE: + DBUG_EXECUTE_IF("buf_page_is_corrupt_failure", return(TRUE); ); - return(checksum_field1 != BUF_NO_CHECKSUM_MAGIC - || checksum_field2 != BUF_NO_CHECKSUM_MAGIC); + ulint page_no = mach_read_from_4(read_buf + FIL_PAGE_OFFSET); + ulint space_id = mach_read_from_4(read_buf + FIL_PAGE_SPACE_ID); + const srv_checksum_algorithm_t curr_algo = + static_cast<srv_checksum_algorithm_t>(srv_checksum_algorithm); + switch (curr_algo) { case SRV_CHECKSUM_ALGORITHM_CRC32: - case SRV_CHECKSUM_ALGORITHM_INNODB: - /* There are 3 valid formulas for - checksum_field2 (old checksum field): - - 1. Very old versions of InnoDB only stored 8 byte lsn to the - start and the end of the page. - - 2. InnoDB versions before MySQL 5.6.3 store the old formula - checksum (buf_calc_page_old_checksum()). - - 3. InnoDB versions 5.6.3 and newer with - innodb_checksum_algorithm=strict_crc32|crc32 store CRC32. */ - - /* since innodb_checksum_algorithm is not strict_* allow - any of the algos to match for the old field */ - - if (checksum_field2 - != mach_read_from_4(read_buf + FIL_PAGE_LSN) - && checksum_field2 != BUF_NO_CHECKSUM_MAGIC) { - - /* The checksum does not match any of the - fast to check. First check the selected algorithm - for writing checksums because we assume that the - chance of it matching is higher. */ - - if (srv_checksum_algorithm - == SRV_CHECKSUM_ALGORITHM_CRC32) { - - crc32 = buf_calc_page_crc32(read_buf); - crc32_inited = TRUE; - - if (checksum_field2 != crc32 - && checksum_field2 - != buf_calc_page_old_checksum(read_buf)) { + case SRV_CHECKSUM_ALGORITHM_STRICT_CRC32: - return(TRUE); - } - } else { - ut_ad(srv_checksum_algorithm - == SRV_CHECKSUM_ALGORITHM_INNODB); + if (buf_page_is_checksum_valid_crc32(read_buf, + checksum_field1, checksum_field2)) { + return(FALSE); + } - if (checksum_field2 - != buf_calc_page_old_checksum(read_buf)) { + if (buf_page_is_checksum_valid_none(read_buf, + checksum_field1, checksum_field2)) { + if (curr_algo + == SRV_CHECKSUM_ALGORITHM_STRICT_CRC32) { + page_warn_strict_checksum( + curr_algo, + SRV_CHECKSUM_ALGORITHM_NONE, + space_id, page_no); + } - crc32 = buf_calc_page_crc32(read_buf); - crc32_inited = TRUE; + return(FALSE); + } - if (checksum_field2 != crc32) { - return(TRUE); - } - } + if (buf_page_is_checksum_valid_innodb(read_buf, + checksum_field1, checksum_field2)) { + if (curr_algo + == SRV_CHECKSUM_ALGORITHM_STRICT_CRC32) { + page_warn_strict_checksum( + curr_algo, + SRV_CHECKSUM_ALGORITHM_INNODB, + space_id, page_no); } - } - /* old field is fine, check the new field */ + return(FALSE); + } - /* InnoDB versions < 4.0.14 and < 4.1.1 stored the space id - (always equal to 0), to FIL_PAGE_SPACE_OR_CHKSUM */ + return(TRUE); - if (checksum_field1 != 0 - && checksum_field1 != BUF_NO_CHECKSUM_MAGIC) { + case SRV_CHECKSUM_ALGORITHM_INNODB: + case SRV_CHECKSUM_ALGORITHM_STRICT_INNODB: - /* The checksum does not match any of the - fast to check. First check the selected algorithm - for writing checksums because we assume that the - chance of it matching is higher. */ + if (buf_page_is_checksum_valid_innodb(read_buf, + checksum_field1, checksum_field2)) { + return(FALSE); + } - if (srv_checksum_algorithm - == SRV_CHECKSUM_ALGORITHM_CRC32) { + if (buf_page_is_checksum_valid_none(read_buf, + checksum_field1, checksum_field2)) { + if (curr_algo + == SRV_CHECKSUM_ALGORITHM_STRICT_INNODB) { + page_warn_strict_checksum( + curr_algo, + SRV_CHECKSUM_ALGORITHM_NONE, + space_id, page_no); + } - if (!crc32_inited) { - crc32 = buf_calc_page_crc32(read_buf); - crc32_inited = TRUE; - } + return(FALSE); + } - if (checksum_field1 != crc32 - && checksum_field1 - != buf_calc_page_new_checksum(read_buf)) { + if (buf_page_is_checksum_valid_crc32(read_buf, + checksum_field1, checksum_field2)) { + if (curr_algo + == SRV_CHECKSUM_ALGORITHM_STRICT_INNODB) { + page_warn_strict_checksum( + curr_algo, + SRV_CHECKSUM_ALGORITHM_CRC32, + space_id, page_no); + } - return(TRUE); - } - } else { - ut_ad(srv_checksum_algorithm - == SRV_CHECKSUM_ALGORITHM_INNODB); + return(FALSE); + } - if (checksum_field1 - != buf_calc_page_new_checksum(read_buf)) { + return(TRUE); - if (!crc32_inited) { - crc32 = buf_calc_page_crc32( - read_buf); - crc32_inited = TRUE; - } + case SRV_CHECKSUM_ALGORITHM_STRICT_NONE: - if (checksum_field1 != crc32) { - return(TRUE); - } - } - } + if (buf_page_is_checksum_valid_none(read_buf, + checksum_field1, checksum_field2)) { + return(FALSE); } - /* If CRC32 is stored in at least one of the fields, then the - other field must also be CRC32 */ - if (crc32_inited - && ((checksum_field1 == crc32 - && checksum_field2 != crc32) - || (checksum_field1 != crc32 - && checksum_field2 == crc32))) { + if (buf_page_is_checksum_valid_crc32(read_buf, + checksum_field1, checksum_field2)) { + page_warn_strict_checksum( + curr_algo, + SRV_CHECKSUM_ALGORITHM_CRC32, + space_id, page_no); + return(FALSE); + } - return(TRUE); + if (buf_page_is_checksum_valid_innodb(read_buf, + checksum_field1, checksum_field2)) { + page_warn_strict_checksum( + curr_algo, + SRV_CHECKSUM_ALGORITHM_INNODB, + space_id, page_no); + return(FALSE); } - break; + return(TRUE); + case SRV_CHECKSUM_ALGORITHM_NONE: /* should have returned FALSE earlier */ - ut_error; + break; /* no default so the compiler will emit a warning if new enum is added and not handled here */ } - DBUG_EXECUTE_IF("buf_page_is_corrupt_failure", return(TRUE); ); - + ut_error; return(FALSE); } @@ -1086,6 +1139,11 @@ buf_block_init( block->page.buf_fix_count = 0; block->page.io_fix = BUF_IO_NONE; block->page.key_version = 0; + block->page.page_encrypted = false; + block->page.page_compressed = false; + block->page.encrypted = false; + block->page.stored_checksum = BUF_NO_CHECKSUM_MAGIC; + block->page.calculated_checksum = BUF_NO_CHECKSUM_MAGIC; block->page.real_size = 0; block->page.write_size = 0; block->modify_clock = 0; @@ -1444,8 +1502,9 @@ buf_pool_init_instance( /* Initialize the temporal memory array and slots */ buf_pool->tmp_arr = (buf_tmp_array_t *)mem_zalloc(sizeof(buf_tmp_array_t)); - buf_pool->tmp_arr->n_slots = BUF_MAX_TMP_SLOTS; - buf_pool->tmp_arr->slots = (buf_tmp_buffer_t*)mem_zalloc(sizeof(buf_tmp_buffer_t) * BUF_MAX_TMP_SLOTS); + ulint n_slots = srv_n_read_io_threads * srv_n_write_io_threads * (8 * OS_AIO_N_PENDING_IOS_PER_THREAD); + buf_pool->tmp_arr->n_slots = n_slots; + buf_pool->tmp_arr->slots = (buf_tmp_buffer_t*)mem_zalloc(sizeof(buf_tmp_buffer_t) * n_slots); buf_pool->try_LRU_scan = TRUE; @@ -1499,8 +1558,31 @@ buf_pool_free_instance( hash_table_free(buf_pool->page_hash); hash_table_free(buf_pool->zip_hash); + /* Free all used temporary slots */ + if (buf_pool->tmp_arr) { + for(ulint i = 0; i < buf_pool->tmp_arr->n_slots; i++) { + buf_tmp_buffer_t* slot = &(buf_pool->tmp_arr->slots[i]); +#ifdef HAVE_LZO + if (slot && slot->lzo_mem) { + ut_free(slot->lzo_mem); + slot->lzo_mem = NULL; + } +#endif + if (slot && slot->crypt_buf_free) { + ut_free(slot->crypt_buf_free); + slot->crypt_buf_free = NULL; + } + + if (slot && slot->comp_buf_free) { + ut_free(slot->comp_buf_free); + slot->comp_buf_free = NULL; + } + } + } + mem_free(buf_pool->tmp_arr->slots); mem_free(buf_pool->tmp_arr); + buf_pool->tmp_arr = NULL; } /********************************************************************//** @@ -1795,6 +1877,9 @@ page_found: goto page_found; } + /* The maximum number of purge threads should never exceed + BUF_POOL_WATCH_SIZE. So there is no way for purge thread + instance to hold a watch when setting another watch. */ for (i = 0; i < BUF_POOL_WATCH_SIZE; i++) { bpage = &buf_pool->watch[i]; @@ -2165,7 +2250,7 @@ lookup: /* Page not in buf_pool: needs to be read from file */ ut_ad(!hash_lock); - buf_read_page(space, zip_size, offset, trx); + buf_read_page(space, zip_size, offset, trx, NULL); #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG ut_a(++buf_dbg_counter % 5771 || buf_validate()); @@ -2684,7 +2769,8 @@ buf_page_get_gen( BUF_GET_IF_IN_POOL_OR_WATCH */ const char* file, /*!< in: file name */ ulint line, /*!< in: line where called */ - mtr_t* mtr) /*!< in: mini-transaction */ + mtr_t* mtr, /*!< in: mini-transaction */ + dberr_t* err) /*!< out: error code */ { buf_block_t* block; ulint fold; @@ -2702,6 +2788,11 @@ buf_page_get_gen( ut_ad((rw_latch == RW_S_LATCH) || (rw_latch == RW_X_LATCH) || (rw_latch == RW_NO_LATCH)); + + if (err) { + *err = DB_SUCCESS; + } + #ifdef UNIV_DEBUG switch (mode) { case BUF_GET_NO_LATCH: @@ -2765,6 +2856,8 @@ loop: } if (block == NULL) { + buf_page_t* bpage=NULL; + /* Page not in buf_pool: needs to be read from file */ if (mode == BUF_GET_IF_IN_POOL_OR_WATCH) { @@ -2799,35 +2892,83 @@ loop: return(NULL); } - if (buf_read_page(space, zip_size, offset, trx)) { + if (buf_read_page(space, zip_size, offset, trx, &bpage)) { buf_read_ahead_random(space, zip_size, offset, ibuf_inside(mtr), trx); retries = 0; } else if (retries < BUF_PAGE_READ_MAX_RETRIES) { ++retries; + + bool corrupted = true; + + if (bpage) { + corrupted = buf_page_check_corrupt(bpage); + } + + /* Do not try again for encrypted pages */ + if (!corrupted) { + ib_mutex_t* pmutex = buf_page_get_mutex(bpage); + mutex_enter(&buf_pool->LRU_list_mutex); + mutex_enter(pmutex); + buf_block_t* block = buf_page_get_block(bpage); + buf_page_set_io_fix(bpage, BUF_IO_NONE); + buf_block_set_state(block, BUF_BLOCK_NOT_USED); + buf_block_set_state(block, BUF_BLOCK_READY_FOR_USE); + mutex_exit(&buf_pool->LRU_list_mutex); + mutex_exit(pmutex); + + if (err) { + *err = DB_DECRYPTION_FAILED; + } + return (NULL); + } + DBUG_EXECUTE_IF( "innodb_page_corruption_retries", retries = BUF_PAGE_READ_MAX_RETRIES; ); } else { - fprintf(stderr, "InnoDB: Error: Unable" - " to read tablespace %lu page no" - " %lu into the buffer pool after" - " %lu attempts\n" - "InnoDB: The most probable cause" - " of this error may be that the" - " table has been corrupted.\n" - "InnoDB: You can try to fix this" - " problem by using" - " innodb_force_recovery.\n" - "InnoDB: Please see reference manual" - " for more details.\n" - "InnoDB: Aborting...\n", - space, offset, - BUF_PAGE_READ_MAX_RETRIES); + bool corrupted = true; - ut_error; + if (bpage) { + corrupted = buf_page_check_corrupt(bpage); + } + + if (corrupted) { + fprintf(stderr, "InnoDB: Error: Unable" + " to read tablespace %lu page no" + " %lu into the buffer pool after" + " %lu attempts\n" + "InnoDB: The most probable cause" + " of this error may be that the" + " table has been corrupted.\n" + "InnoDB: You can try to fix this" + " problem by using" + " innodb_force_recovery.\n" + "InnoDB: Please see reference manual" + " for more details.\n" + "InnoDB: Aborting...\n", + space, offset, + BUF_PAGE_READ_MAX_RETRIES); + + ut_error; + } else { + ib_mutex_t* pmutex = buf_page_get_mutex(bpage); + mutex_enter(&buf_pool->LRU_list_mutex); + mutex_enter(pmutex); + buf_block_t* block = buf_page_get_block(bpage); + buf_page_set_io_fix(bpage, BUF_IO_NONE); + buf_block_set_state(block, BUF_BLOCK_NOT_USED); + buf_block_set_state(block, BUF_BLOCK_READY_FOR_USE); + mutex_exit(&buf_pool->LRU_list_mutex); + mutex_exit(pmutex); + + if (err) { + *err = DB_DECRYPTION_FAILED; + } + return (NULL); + } } #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG @@ -3600,6 +3741,11 @@ buf_page_init_low( bpage->oldest_modification = 0; bpage->write_size = 0; bpage->key_version = 0; + bpage->stored_checksum = BUF_NO_CHECKSUM_MAGIC; + bpage->calculated_checksum = BUF_NO_CHECKSUM_MAGIC; + bpage->page_encrypted = false; + bpage->page_compressed = false; + bpage->encrypted = false; bpage->real_size = 0; HASH_INVALIDATE(bpage, hash); @@ -4269,38 +4415,125 @@ buf_mark_space_corrupt( /* First unfix and release lock on the bpage */ ut_ad(!mutex_own(&buf_pool->LRU_list_mutex)); - mutex_enter(&buf_pool->LRU_list_mutex); - rw_lock_x_lock(hash_lock); - mutex_enter(buf_page_get_mutex(bpage)); - ut_ad(buf_page_get_io_fix(bpage) == BUF_IO_READ); - ut_ad(bpage->buf_fix_count == 0); - /* Set BUF_IO_NONE before we remove the block from LRU list */ - buf_page_set_io_fix(bpage, BUF_IO_NONE); + if (!bpage->encrypted) { + mutex_enter(&buf_pool->LRU_list_mutex); + rw_lock_x_lock(hash_lock); + mutex_enter(buf_page_get_mutex(bpage)); + ut_ad(buf_page_get_io_fix(bpage) == BUF_IO_READ); + ut_ad(bpage->buf_fix_count == 0); - if (uncompressed) { - rw_lock_x_unlock_gen( - &((buf_block_t*) bpage)->lock, - BUF_IO_READ); + /* Set BUF_IO_NONE before we remove the block from LRU list */ + buf_page_set_io_fix(bpage, BUF_IO_NONE); + + if (uncompressed) { + rw_lock_x_unlock_gen( + &((buf_block_t*) bpage)->lock, + BUF_IO_READ); + } } /* Find the table with specified space id, and mark it corrupted */ if (dict_set_corrupted_by_space(space)) { - buf_LRU_free_one_page(bpage); + if (!bpage->encrypted) { + buf_LRU_free_one_page(bpage); + } } else { - mutex_exit(buf_page_get_mutex(bpage)); + if (!bpage->encrypted) { + mutex_exit(buf_page_get_mutex(bpage)); + } ret = FALSE; } - mutex_exit(&buf_pool->LRU_list_mutex); - - ut_ad(buf_pool->n_pend_reads > 0); - os_atomic_decrement_ulint(&buf_pool->n_pend_reads, 1); + if(!bpage->encrypted) { + mutex_exit(&buf_pool->LRU_list_mutex); + ut_ad(buf_pool->n_pend_reads > 0); + os_atomic_decrement_ulint(&buf_pool->n_pend_reads, 1); + } return(ret); } /********************************************************************//** +Check if page is maybe compressed, encrypted or both when we encounter +corrupted page. Note that we can't be 100% sure if page is corrupted +or decrypt/decompress just failed. +*/ +static +ibool +buf_page_check_corrupt( +/*===================*/ + buf_page_t* bpage) /*!< in/out: buffer page read from disk */ +{ + ulint zip_size = buf_page_get_zip_size(bpage); + byte* dst_frame = (zip_size) ? bpage->zip.data : + ((buf_block_t*) bpage)->frame; + unsigned key_version = bpage->key_version; + bool page_compressed = bpage->page_encrypted; + ulint stored_checksum = bpage->stored_checksum; + ulint calculated_checksum = bpage->stored_checksum; + bool page_compressed_encrypted = bpage->page_compressed; + ulint space_id = mach_read_from_4( + dst_frame + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID); + fil_space_crypt_t* crypt_data = fil_space_get_crypt_data(space_id); + fil_space_t* space = fil_space_found_by_id(space_id); + bool corrupted = true; + + if (key_version != 0 || page_compressed_encrypted) { + bpage->encrypted = true; + } + + if (key_version != 0 || + (crypt_data && crypt_data->type != CRYPT_SCHEME_UNENCRYPTED) || + page_compressed || page_compressed_encrypted) { + + /* Page is really corrupted if post encryption stored + checksum does not match calculated checksum after page was + read. For pages compressed and then encrypted, there is no + checksum. */ + corrupted = (!page_compressed_encrypted && stored_checksum != calculated_checksum); + + if (corrupted) { + ib_logf(IB_LOG_LEVEL_ERROR, + "%s: Block in space_id %lu in file %s corrupted.", + page_compressed_encrypted ? "Maybe corruption" : "Corruption", + space_id, space ? space->name : "NULL"); + ib_logf(IB_LOG_LEVEL_ERROR, + "Page based on contents %s encrypted.", + (key_version == 0 && page_compressed_encrypted == false) ? "not" : "maybe"); + if (stored_checksum != BUF_NO_CHECKSUM_MAGIC || calculated_checksum != BUF_NO_CHECKSUM_MAGIC) { + ib_logf(IB_LOG_LEVEL_ERROR, + "Page stored checksum %lu but calculated checksum %lu.", + stored_checksum, calculated_checksum); + } + ib_logf(IB_LOG_LEVEL_ERROR, + "Reason could be that key_version %u in page " + "or in crypt_data %p could not be found.", + key_version, crypt_data); + ib_logf(IB_LOG_LEVEL_ERROR, + "Reason could be also that key management plugin is not found or" + " used encryption algorithm or method does not match."); + ib_logf(IB_LOG_LEVEL_ERROR, + "Based on page page compressed %d, compressed and encrypted %d.", + page_compressed, page_compressed_encrypted); + } else { + ib_logf(IB_LOG_LEVEL_ERROR, + "Block in space_id %lu in file %s encrypted.", + space_id, space ? space->name : "NULL"); + ib_logf(IB_LOG_LEVEL_ERROR, + "However key management plugin or used key_id %u is not found or" + " used encryption algorithm or method does not match.", + key_version); + ib_logf(IB_LOG_LEVEL_ERROR, + "Marking tablespace as missing. You may drop this table or" + " install correct key management plugin and key file."); + } + } + + return corrupted; +} + +/********************************************************************//** Completes an asynchronous read or write request of a file page to or from the buffer pool. @return true if successful */ @@ -4418,47 +4651,46 @@ buf_page_io_complete( ;); corrupt: - fil_system_enter(); - space = fil_space_get_by_id(bpage->space); - fil_system_exit(); - fprintf(stderr, - "InnoDB: Database page corruption on disk" - " or a failed\n" - "InnoDB: space %lu file %s read of page %lu.\n" - "InnoDB: You may have to recover" - " from a backup.\n", - (ulint)bpage->space, - space ? space->name : "NULL", - (ulong) bpage->offset); - - buf_page_print(frame, buf_page_get_zip_size(bpage), - BUF_PAGE_PRINT_NO_CRASH); - fprintf(stderr, - "InnoDB: Database page corruption on disk" - " or a failed\n" - "InnoDB: file read of page %lu.\n" - "InnoDB: You may have to recover" - " from a backup.\n", - (ulong) bpage->offset); - fputs("InnoDB: It is also possible that" - " your operating\n" - "InnoDB: system has corrupted its" - " own file cache\n" - "InnoDB: and rebooting your computer" - " removes the\n" - "InnoDB: error.\n" - "InnoDB: If the corrupt page is an index page\n" - "InnoDB: you can also try to" - " fix the corruption\n" - "InnoDB: by dumping, dropping," - " and reimporting\n" - "InnoDB: the corrupt table." - " You can use CHECK\n" - "InnoDB: TABLE to scan your" - " table for corruption.\n" - "InnoDB: See also " - REFMAN "forcing-innodb-recovery.html\n" - "InnoDB: about forcing recovery.\n", stderr); + bool corrupted = buf_page_check_corrupt(bpage); + + if (corrupted) { + fil_system_enter(); + space = fil_space_get_by_id(bpage->space); + fil_system_exit(); + ib_logf(IB_LOG_LEVEL_ERROR, + "Database page corruption on disk" + " or a failed"); + ib_logf(IB_LOG_LEVEL_ERROR, + "Space %lu file %s read of page %lu.", + (ulint)bpage->space, + space ? space->name : "NULL", + (ulong) bpage->offset); + ib_logf(IB_LOG_LEVEL_ERROR, + "You may have to recover" + " from a backup."); + + + buf_page_print(frame, buf_page_get_zip_size(bpage), + BUF_PAGE_PRINT_NO_CRASH); + + ib_logf(IB_LOG_LEVEL_ERROR, + "It is also possible that your operating" + "system has corrupted its own file cache."); + ib_logf(IB_LOG_LEVEL_ERROR, + "and rebooting your computer removes the error."); + ib_logf(IB_LOG_LEVEL_ERROR, + "If the corrupt page is an index page you can also try to"); + ib_logf(IB_LOG_LEVEL_ERROR, + "fix the corruption by dumping, dropping, and reimporting"); + ib_logf(IB_LOG_LEVEL_ERROR, + "the corrupt table. You can use CHECK"); + ib_logf(IB_LOG_LEVEL_ERROR, + "TABLE to scan your table for corruption."); + ib_logf(IB_LOG_LEVEL_ERROR, + "See also " + REFMAN "forcing-innodb-recovery.html" + " about forcing recovery."); + } if (srv_pass_corrupt_table && bpage->space != 0 && bpage->space < SRV_LOG_SPACE_FIRST_ID) { @@ -4476,7 +4708,8 @@ corrupt: dict_table_set_corrupt_by_space(bpage->space, TRUE); } bpage->is_corrupt = TRUE; - } else + } + if (srv_force_recovery < SRV_FORCE_IGNORE_CORRUPT) { /* If page space id is larger than TRX_SYS_SPACE (0), we will attempt to mark the corresponding @@ -4485,12 +4718,30 @@ corrupt: && buf_mark_space_corrupt(bpage)) { return(false); } else { - fputs("InnoDB: Ending processing" - " because of" - " a corrupt database page.\n", - stderr); + corrupted = buf_page_check_corrupt(bpage); - ut_error; + if (corrupted) { + ib_logf(IB_LOG_LEVEL_ERROR, + "Ending processing because of a corrupt database page."); + + ut_error; + } + + ib_push_warning(innobase_get_trx(), DB_DECRYPTION_FAILED, + "Table in tablespace %lu encrypted." + "However key management plugin or used key_id %lu is not found or" + " used encryption algorithm or method does not match." + " Can't continue opening the table.", + bpage->key_version); + + if (bpage->space > TRX_SYS_SPACE) { + if (corrupted) { + buf_mark_space_corrupt(bpage); + } + } else { + ut_error; + } + return(false); } } } @@ -4668,11 +4919,13 @@ buf_all_freed_instance( mutex_exit(&buf_pool->LRU_list_mutex); if (UNIV_LIKELY_NULL(block)) { - fprintf(stderr, - "Page %lu %lu still fixed or dirty\n", - (ulong) block->page.space, - (ulong) block->page.offset); - ut_error; + if (block->page.key_version == 0) { + fprintf(stderr, + "Page %lu %lu still fixed or dirty\n", + (ulong) block->page.space, + (ulong) block->page.offset); + ut_error; + } } } @@ -5873,30 +6126,31 @@ Encrypts a buffer page right before it's flushed to disk byte* buf_page_encrypt_before_write( /*==========================*/ - buf_page_t* bpage, /*!< in/out: buffer page to be flushed */ - const byte* src_frame, /*!< in: src frame */ - ulint space_id) /*!< in: space id */ + buf_page_t* bpage, /*!< in/out: buffer page to be flushed */ + byte* src_frame, /*!< in: src frame */ + ulint space_id) /*!< in: space id */ { fil_space_crypt_t* crypt_data = fil_space_get_crypt_data(space_id); ulint zip_size = buf_page_get_zip_size(bpage); ulint page_size = (zip_size) ? zip_size : UNIV_PAGE_SIZE; buf_pool_t* buf_pool = buf_pool_from_bpage(bpage); bool page_compressed = fil_space_is_page_compressed(bpage->space); - bpage->real_size = UNIV_PAGE_SIZE; bool encrypted = true; + bpage->real_size = UNIV_PAGE_SIZE; + fil_page_type_validate(src_frame); if (bpage->offset == 0) { /* Page 0 of a tablespace is not encrypted/compressed */ ut_ad(bpage->key_version == 0); - return const_cast<byte*>(src_frame); + return src_frame; } if (bpage->space == TRX_SYS_SPACE && bpage->offset == TRX_SYS_PAGE_NO) { /* don't encrypt/compress page as it contains address to dblwr buffer */ bpage->key_version = 0; - return const_cast<byte*>(src_frame); + return src_frame; } if (crypt_data != NULL && crypt_data->encryption == FIL_SPACE_ENCRYPTION_OFF) { @@ -5918,31 +6172,35 @@ buf_page_encrypt_before_write( if (!encrypted && !page_compressed) { /* No need to encrypt or page compress the page */ - return const_cast<byte*>(src_frame); + return src_frame; } /* Find free slot from temporary memory array */ buf_tmp_buffer_t* slot = buf_pool_reserve_tmp_slot(buf_pool, page_compressed); + slot->out_buf = NULL; bpage->slot = slot; - byte *dst_frame = bpage->slot->out_buf = slot->crypt_buf; + byte *dst_frame = slot->crypt_buf; if (!page_compressed) { /* Encrypt page content */ - fil_space_encrypt(bpage->space, - bpage->offset, - bpage->newest_modification, - src_frame, - zip_size, - dst_frame); + byte* tmp = fil_space_encrypt(bpage->space, + bpage->offset, + bpage->newest_modification, + src_frame, + zip_size, + dst_frame); unsigned key_version = mach_read_from_4(dst_frame + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION); ut_ad(key_version == 0 || key_version >= bpage->key_version); bpage->key_version = key_version; bpage->real_size = page_size; + slot->out_buf = dst_frame = tmp; - fil_page_type_validate(dst_frame); +#ifdef UNIV_DEBUG + fil_page_type_validate(tmp); +#endif } else { /* First we compress the page content */ @@ -5962,22 +6220,27 @@ buf_page_encrypt_before_write( bpage->real_size = out_len; +#ifdef UNIV_DEBUG fil_page_type_validate(tmp); +#endif + if(encrypted) { /* And then we encrypt the page content */ - fil_space_encrypt(bpage->space, - bpage->offset, - bpage->newest_modification, - tmp, - zip_size, - dst_frame); - } else { - bpage->slot->out_buf = dst_frame = tmp; + tmp = fil_space_encrypt(bpage->space, + bpage->offset, + bpage->newest_modification, + tmp, + zip_size, + dst_frame); } + + slot->out_buf = dst_frame = tmp; } +#ifdef UNIV_DEBUG fil_page_type_validate(dst_frame); +#endif // return dst_frame which will be written return dst_frame; @@ -5989,7 +6252,7 @@ Decrypt page after it has been read from disk ibool buf_page_decrypt_after_read( /*========================*/ - buf_page_t* bpage) /*!< in/out: buffer page read from disk */ + buf_page_t* bpage) /*!< in/out: buffer page read from disk */ { ulint zip_size = buf_page_get_zip_size(bpage); ulint size = (zip_size) ? zip_size : UNIV_PAGE_SIZE; @@ -6002,6 +6265,11 @@ buf_page_decrypt_after_read( bool page_compressed_encrypted = fil_page_is_compressed_encrypted(dst_frame); buf_pool_t* buf_pool = buf_pool_from_bpage(bpage); + /* If page is encrypted read post-encryption checksum */ + if (!page_compressed_encrypted && key_version != 0) { + bpage->stored_checksum = mach_read_from_4(dst_frame + + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION + 4); + } + ut_ad(bpage->key_version == 0); if (bpage->offset == 0) { @@ -6009,13 +6277,21 @@ buf_page_decrypt_after_read( return (TRUE); } + /* Store these for corruption check */ + bpage->key_version = key_version; + bpage->page_encrypted = page_compressed_encrypted; + bpage->page_compressed = page_compressed; + if (page_compressed) { /* the page we read is unencrypted */ /* Find free slot from temporary memory array */ buf_tmp_buffer_t* slot = buf_pool_reserve_tmp_slot(buf_pool, page_compressed); +#ifdef UNIV_DEBUG fil_page_type_validate(dst_frame); +#endif + /* decompress using comp_buf to dst_frame */ fil_decompress_page(slot->comp_buf, dst_frame, size, @@ -6025,24 +6301,34 @@ buf_page_decrypt_after_read( slot->reserved = false; key_version = 0; +#ifdef UNIV_DEBUG fil_page_type_validate(dst_frame); +#endif } else { buf_tmp_buffer_t* slot = NULL; if (key_version) { /* Find free slot from temporary memory array */ slot = buf_pool_reserve_tmp_slot(buf_pool, page_compressed); - memcpy(slot->crypt_buf, dst_frame, size); +#ifdef UNIV_DEBUG fil_page_type_validate(dst_frame); - fil_page_type_validate(slot->crypt_buf); - /* decrypt from crypt_buf to dst_frame */ +#endif + + /* Calculate checksum before decrypt, this will be + used later to find out if incorrect key was used. */ + if (!page_compressed_encrypted) { + bpage->calculated_checksum = fil_crypt_calculate_checksum(zip_size, dst_frame); + } + + /* decrypt using crypt_buf to dst_frame */ fil_space_decrypt(bpage->space, - slot->crypt_buf, - size, - dst_frame); + slot->crypt_buf, + size, + dst_frame); +#ifdef UNIV_DEBUG fil_page_type_validate(dst_frame); - fil_page_type_validate(slot->crypt_buf); +#endif } if (page_compressed_encrypted) { @@ -6053,13 +6339,16 @@ buf_page_decrypt_after_read( #ifdef UNIV_DEBUG fil_page_type_validate(dst_frame); #endif + /* decompress using comp_buf to dst_frame */ fil_decompress_page(slot->comp_buf, dst_frame, size, &bpage->write_size); } +#ifdef UNIV_DEBUG fil_page_type_validate(dst_frame); +#endif /* Mark this slot as free */ if (slot) { |