summaryrefslogtreecommitdiff
path: root/storage/xtradb/buf/buf0buf.cc
diff options
context:
space:
mode:
Diffstat (limited to 'storage/xtradb/buf/buf0buf.cc')
-rw-r--r--storage/xtradb/buf/buf0buf.cc769
1 files changed, 529 insertions, 240 deletions
diff --git a/storage/xtradb/buf/buf0buf.cc b/storage/xtradb/buf/buf0buf.cc
index fbd7aeb581a..85023be9402 100644
--- a/storage/xtradb/buf/buf0buf.cc
+++ b/storage/xtradb/buf/buf0buf.cc
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1995, 2014, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1995, 2015, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2008, Google Inc.
Copyright (c) 2013, 2015, MariaDB Corporation. All Rights Reserved.
@@ -59,11 +59,24 @@ Created 11/5/1995 Heikki Tuuri
#include "srv0start.h"
#include "ut0byte.h"
#include "fil0pagecompress.h"
+#include "ha_prototypes.h"
/* prototypes for new functions added to ha_innodb.cc */
trx_t* innobase_get_trx();
+/********************************************************************//**
+Check if page is maybe compressed, encrypted or both when we encounter
+corrupted page. Note that we can't be 100% sure if page is corrupted
+or decrypt/decompress just failed.
+*/
+static
+ibool
+buf_page_check_corrupt(
+/*===================*/
+ buf_page_t* bpage); /*!< in/out: buffer page read from
+ disk */
+
static inline
void
_increment_page_get_statistics(buf_block_t* block, trx_t* trx)
@@ -99,10 +112,6 @@ _increment_page_get_statistics(buf_block_t* block, trx_t* trx)
#include "lzo/lzo1x.h"
#endif
-/* Number of temporary slots used for encryption/compression
-memory allocation before/after I/O operations */
-#define BUF_MAX_TMP_SLOTS 200
-
/*
IMPLEMENTATION OF THE BUFFER POOL
=================================
@@ -568,6 +577,79 @@ buf_page_is_zeroes(
return(true);
}
+/** Checks if the page is in crc32 checksum format.
+@param[in] read_buf database page
+@param[in] checksum_field1 new checksum field
+@param[in] checksum_field2 old checksum field
+@return true if the page is in crc32 checksum format */
+UNIV_INLINE
+bool
+buf_page_is_checksum_valid_crc32(
+ const byte* read_buf,
+ ulint checksum_field1,
+ ulint checksum_field2)
+{
+ ib_uint32_t crc32 = buf_calc_page_crc32(read_buf);
+
+ return(checksum_field1 == crc32 && checksum_field2 == crc32);
+}
+
+/** Checks if the page is in innodb checksum format.
+@param[in] read_buf database page
+@param[in] checksum_field1 new checksum field
+@param[in] checksum_field2 old checksum field
+@return true if the page is in innodb checksum format */
+UNIV_INLINE
+bool
+buf_page_is_checksum_valid_innodb(
+ const byte* read_buf,
+ ulint checksum_field1,
+ ulint checksum_field2)
+{
+ /* There are 2 valid formulas for
+ checksum_field2 (old checksum field) which algo=innodb could have
+ written to the page:
+
+ 1. Very old versions of InnoDB only stored 8 byte lsn to the
+ start and the end of the page.
+
+ 2. Newer InnoDB versions store the old formula checksum
+ (buf_calc_page_old_checksum()). */
+
+ if (checksum_field2 != mach_read_from_4(read_buf + FIL_PAGE_LSN)
+ && checksum_field2 != buf_calc_page_old_checksum(read_buf)) {
+ return(false);
+ }
+
+ /* old field is fine, check the new field */
+
+ /* InnoDB versions < 4.0.14 and < 4.1.1 stored the space id
+ (always equal to 0), to FIL_PAGE_SPACE_OR_CHKSUM */
+
+ if (checksum_field1 != 0
+ && checksum_field1 != buf_calc_page_new_checksum(read_buf)) {
+ return(false);
+ }
+
+ return(true);
+}
+
+/** Checks if the page is in none checksum format.
+@param[in] read_buf database page
+@param[in] checksum_field1 new checksum field
+@param[in] checksum_field2 old checksum field
+@return true if the page is in none checksum format */
+UNIV_INLINE
+bool
+buf_page_is_checksum_valid_none(
+ const byte* read_buf,
+ ulint checksum_field1,
+ ulint checksum_field2)
+{
+ return(checksum_field1 == checksum_field2
+ && checksum_field1 == BUF_NO_CHECKSUM_MAGIC);
+}
+
/********************************************************************//**
Checks if a page is corrupt.
@return TRUE if corrupted */
@@ -584,8 +666,6 @@ buf_page_is_corrupted(
ulint page_encrypted = fil_page_is_encrypted(read_buf);
ulint checksum_field1;
ulint checksum_field2;
- ibool crc32_inited = FALSE;
- ib_uint32_t crc32 = ULINT32_UNDEFINED;
if (!page_encrypted && !zip_size
&& memcmp(read_buf + FIL_PAGE_LSN + 4,
@@ -668,148 +748,121 @@ buf_page_is_corrupted(
return(FALSE);
}
- switch ((srv_checksum_algorithm_t) srv_checksum_algorithm) {
- case SRV_CHECKSUM_ALGORITHM_STRICT_CRC32:
-
- crc32 = buf_calc_page_crc32(read_buf);
-
- return(checksum_field1 != crc32 || checksum_field2 != crc32);
-
- case SRV_CHECKSUM_ALGORITHM_STRICT_INNODB:
-
- return(checksum_field1
- != buf_calc_page_new_checksum(read_buf)
- || checksum_field2
- != buf_calc_page_old_checksum(read_buf));
-
- case SRV_CHECKSUM_ALGORITHM_STRICT_NONE:
+ DBUG_EXECUTE_IF("buf_page_is_corrupt_failure", return(TRUE); );
- return(checksum_field1 != BUF_NO_CHECKSUM_MAGIC
- || checksum_field2 != BUF_NO_CHECKSUM_MAGIC);
+ ulint page_no = mach_read_from_4(read_buf + FIL_PAGE_OFFSET);
+ ulint space_id = mach_read_from_4(read_buf + FIL_PAGE_SPACE_ID);
+ const srv_checksum_algorithm_t curr_algo =
+ static_cast<srv_checksum_algorithm_t>(srv_checksum_algorithm);
+ switch (curr_algo) {
case SRV_CHECKSUM_ALGORITHM_CRC32:
- case SRV_CHECKSUM_ALGORITHM_INNODB:
- /* There are 3 valid formulas for
- checksum_field2 (old checksum field):
-
- 1. Very old versions of InnoDB only stored 8 byte lsn to the
- start and the end of the page.
-
- 2. InnoDB versions before MySQL 5.6.3 store the old formula
- checksum (buf_calc_page_old_checksum()).
-
- 3. InnoDB versions 5.6.3 and newer with
- innodb_checksum_algorithm=strict_crc32|crc32 store CRC32. */
-
- /* since innodb_checksum_algorithm is not strict_* allow
- any of the algos to match for the old field */
-
- if (checksum_field2
- != mach_read_from_4(read_buf + FIL_PAGE_LSN)
- && checksum_field2 != BUF_NO_CHECKSUM_MAGIC) {
-
- /* The checksum does not match any of the
- fast to check. First check the selected algorithm
- for writing checksums because we assume that the
- chance of it matching is higher. */
-
- if (srv_checksum_algorithm
- == SRV_CHECKSUM_ALGORITHM_CRC32) {
-
- crc32 = buf_calc_page_crc32(read_buf);
- crc32_inited = TRUE;
-
- if (checksum_field2 != crc32
- && checksum_field2
- != buf_calc_page_old_checksum(read_buf)) {
+ case SRV_CHECKSUM_ALGORITHM_STRICT_CRC32:
- return(TRUE);
- }
- } else {
- ut_ad(srv_checksum_algorithm
- == SRV_CHECKSUM_ALGORITHM_INNODB);
+ if (buf_page_is_checksum_valid_crc32(read_buf,
+ checksum_field1, checksum_field2)) {
+ return(FALSE);
+ }
- if (checksum_field2
- != buf_calc_page_old_checksum(read_buf)) {
+ if (buf_page_is_checksum_valid_none(read_buf,
+ checksum_field1, checksum_field2)) {
+ if (curr_algo
+ == SRV_CHECKSUM_ALGORITHM_STRICT_CRC32) {
+ page_warn_strict_checksum(
+ curr_algo,
+ SRV_CHECKSUM_ALGORITHM_NONE,
+ space_id, page_no);
+ }
- crc32 = buf_calc_page_crc32(read_buf);
- crc32_inited = TRUE;
+ return(FALSE);
+ }
- if (checksum_field2 != crc32) {
- return(TRUE);
- }
- }
+ if (buf_page_is_checksum_valid_innodb(read_buf,
+ checksum_field1, checksum_field2)) {
+ if (curr_algo
+ == SRV_CHECKSUM_ALGORITHM_STRICT_CRC32) {
+ page_warn_strict_checksum(
+ curr_algo,
+ SRV_CHECKSUM_ALGORITHM_INNODB,
+ space_id, page_no);
}
- }
- /* old field is fine, check the new field */
+ return(FALSE);
+ }
- /* InnoDB versions < 4.0.14 and < 4.1.1 stored the space id
- (always equal to 0), to FIL_PAGE_SPACE_OR_CHKSUM */
+ return(TRUE);
- if (checksum_field1 != 0
- && checksum_field1 != BUF_NO_CHECKSUM_MAGIC) {
+ case SRV_CHECKSUM_ALGORITHM_INNODB:
+ case SRV_CHECKSUM_ALGORITHM_STRICT_INNODB:
- /* The checksum does not match any of the
- fast to check. First check the selected algorithm
- for writing checksums because we assume that the
- chance of it matching is higher. */
+ if (buf_page_is_checksum_valid_innodb(read_buf,
+ checksum_field1, checksum_field2)) {
+ return(FALSE);
+ }
- if (srv_checksum_algorithm
- == SRV_CHECKSUM_ALGORITHM_CRC32) {
+ if (buf_page_is_checksum_valid_none(read_buf,
+ checksum_field1, checksum_field2)) {
+ if (curr_algo
+ == SRV_CHECKSUM_ALGORITHM_STRICT_INNODB) {
+ page_warn_strict_checksum(
+ curr_algo,
+ SRV_CHECKSUM_ALGORITHM_NONE,
+ space_id, page_no);
+ }
- if (!crc32_inited) {
- crc32 = buf_calc_page_crc32(read_buf);
- crc32_inited = TRUE;
- }
+ return(FALSE);
+ }
- if (checksum_field1 != crc32
- && checksum_field1
- != buf_calc_page_new_checksum(read_buf)) {
+ if (buf_page_is_checksum_valid_crc32(read_buf,
+ checksum_field1, checksum_field2)) {
+ if (curr_algo
+ == SRV_CHECKSUM_ALGORITHM_STRICT_INNODB) {
+ page_warn_strict_checksum(
+ curr_algo,
+ SRV_CHECKSUM_ALGORITHM_CRC32,
+ space_id, page_no);
+ }
- return(TRUE);
- }
- } else {
- ut_ad(srv_checksum_algorithm
- == SRV_CHECKSUM_ALGORITHM_INNODB);
+ return(FALSE);
+ }
- if (checksum_field1
- != buf_calc_page_new_checksum(read_buf)) {
+ return(TRUE);
- if (!crc32_inited) {
- crc32 = buf_calc_page_crc32(
- read_buf);
- crc32_inited = TRUE;
- }
+ case SRV_CHECKSUM_ALGORITHM_STRICT_NONE:
- if (checksum_field1 != crc32) {
- return(TRUE);
- }
- }
- }
+ if (buf_page_is_checksum_valid_none(read_buf,
+ checksum_field1, checksum_field2)) {
+ return(FALSE);
}
- /* If CRC32 is stored in at least one of the fields, then the
- other field must also be CRC32 */
- if (crc32_inited
- && ((checksum_field1 == crc32
- && checksum_field2 != crc32)
- || (checksum_field1 != crc32
- && checksum_field2 == crc32))) {
+ if (buf_page_is_checksum_valid_crc32(read_buf,
+ checksum_field1, checksum_field2)) {
+ page_warn_strict_checksum(
+ curr_algo,
+ SRV_CHECKSUM_ALGORITHM_CRC32,
+ space_id, page_no);
+ return(FALSE);
+ }
- return(TRUE);
+ if (buf_page_is_checksum_valid_innodb(read_buf,
+ checksum_field1, checksum_field2)) {
+ page_warn_strict_checksum(
+ curr_algo,
+ SRV_CHECKSUM_ALGORITHM_INNODB,
+ space_id, page_no);
+ return(FALSE);
}
- break;
+ return(TRUE);
+
case SRV_CHECKSUM_ALGORITHM_NONE:
/* should have returned FALSE earlier */
- ut_error;
+ break;
/* no default so the compiler will emit a warning if new enum
is added and not handled here */
}
- DBUG_EXECUTE_IF("buf_page_is_corrupt_failure", return(TRUE); );
-
+ ut_error;
return(FALSE);
}
@@ -1086,6 +1139,11 @@ buf_block_init(
block->page.buf_fix_count = 0;
block->page.io_fix = BUF_IO_NONE;
block->page.key_version = 0;
+ block->page.page_encrypted = false;
+ block->page.page_compressed = false;
+ block->page.encrypted = false;
+ block->page.stored_checksum = BUF_NO_CHECKSUM_MAGIC;
+ block->page.calculated_checksum = BUF_NO_CHECKSUM_MAGIC;
block->page.real_size = 0;
block->page.write_size = 0;
block->modify_clock = 0;
@@ -1444,8 +1502,9 @@ buf_pool_init_instance(
/* Initialize the temporal memory array and slots */
buf_pool->tmp_arr = (buf_tmp_array_t *)mem_zalloc(sizeof(buf_tmp_array_t));
- buf_pool->tmp_arr->n_slots = BUF_MAX_TMP_SLOTS;
- buf_pool->tmp_arr->slots = (buf_tmp_buffer_t*)mem_zalloc(sizeof(buf_tmp_buffer_t) * BUF_MAX_TMP_SLOTS);
+ ulint n_slots = srv_n_read_io_threads * srv_n_write_io_threads * (8 * OS_AIO_N_PENDING_IOS_PER_THREAD);
+ buf_pool->tmp_arr->n_slots = n_slots;
+ buf_pool->tmp_arr->slots = (buf_tmp_buffer_t*)mem_zalloc(sizeof(buf_tmp_buffer_t) * n_slots);
buf_pool->try_LRU_scan = TRUE;
@@ -1499,8 +1558,31 @@ buf_pool_free_instance(
hash_table_free(buf_pool->page_hash);
hash_table_free(buf_pool->zip_hash);
+ /* Free all used temporary slots */
+ if (buf_pool->tmp_arr) {
+ for(ulint i = 0; i < buf_pool->tmp_arr->n_slots; i++) {
+ buf_tmp_buffer_t* slot = &(buf_pool->tmp_arr->slots[i]);
+#ifdef HAVE_LZO
+ if (slot && slot->lzo_mem) {
+ ut_free(slot->lzo_mem);
+ slot->lzo_mem = NULL;
+ }
+#endif
+ if (slot && slot->crypt_buf_free) {
+ ut_free(slot->crypt_buf_free);
+ slot->crypt_buf_free = NULL;
+ }
+
+ if (slot && slot->comp_buf_free) {
+ ut_free(slot->comp_buf_free);
+ slot->comp_buf_free = NULL;
+ }
+ }
+ }
+
mem_free(buf_pool->tmp_arr->slots);
mem_free(buf_pool->tmp_arr);
+ buf_pool->tmp_arr = NULL;
}
/********************************************************************//**
@@ -1795,6 +1877,9 @@ page_found:
goto page_found;
}
+ /* The maximum number of purge threads should never exceed
+ BUF_POOL_WATCH_SIZE. So there is no way for purge thread
+ instance to hold a watch when setting another watch. */
for (i = 0; i < BUF_POOL_WATCH_SIZE; i++) {
bpage = &buf_pool->watch[i];
@@ -2165,7 +2250,7 @@ lookup:
/* Page not in buf_pool: needs to be read from file */
ut_ad(!hash_lock);
- buf_read_page(space, zip_size, offset, trx);
+ buf_read_page(space, zip_size, offset, trx, NULL);
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
ut_a(++buf_dbg_counter % 5771 || buf_validate());
@@ -2684,7 +2769,8 @@ buf_page_get_gen(
BUF_GET_IF_IN_POOL_OR_WATCH */
const char* file, /*!< in: file name */
ulint line, /*!< in: line where called */
- mtr_t* mtr) /*!< in: mini-transaction */
+ mtr_t* mtr, /*!< in: mini-transaction */
+ dberr_t* err) /*!< out: error code */
{
buf_block_t* block;
ulint fold;
@@ -2702,6 +2788,11 @@ buf_page_get_gen(
ut_ad((rw_latch == RW_S_LATCH)
|| (rw_latch == RW_X_LATCH)
|| (rw_latch == RW_NO_LATCH));
+
+ if (err) {
+ *err = DB_SUCCESS;
+ }
+
#ifdef UNIV_DEBUG
switch (mode) {
case BUF_GET_NO_LATCH:
@@ -2765,6 +2856,8 @@ loop:
}
if (block == NULL) {
+ buf_page_t* bpage=NULL;
+
/* Page not in buf_pool: needs to be read from file */
if (mode == BUF_GET_IF_IN_POOL_OR_WATCH) {
@@ -2799,35 +2892,83 @@ loop:
return(NULL);
}
- if (buf_read_page(space, zip_size, offset, trx)) {
+ if (buf_read_page(space, zip_size, offset, trx, &bpage)) {
buf_read_ahead_random(space, zip_size, offset,
ibuf_inside(mtr), trx);
retries = 0;
} else if (retries < BUF_PAGE_READ_MAX_RETRIES) {
++retries;
+
+ bool corrupted = true;
+
+ if (bpage) {
+ corrupted = buf_page_check_corrupt(bpage);
+ }
+
+ /* Do not try again for encrypted pages */
+ if (!corrupted) {
+ ib_mutex_t* pmutex = buf_page_get_mutex(bpage);
+ mutex_enter(&buf_pool->LRU_list_mutex);
+ mutex_enter(pmutex);
+ buf_block_t* block = buf_page_get_block(bpage);
+ buf_page_set_io_fix(bpage, BUF_IO_NONE);
+ buf_block_set_state(block, BUF_BLOCK_NOT_USED);
+ buf_block_set_state(block, BUF_BLOCK_READY_FOR_USE);
+ mutex_exit(&buf_pool->LRU_list_mutex);
+ mutex_exit(pmutex);
+
+ if (err) {
+ *err = DB_DECRYPTION_FAILED;
+ }
+ return (NULL);
+ }
+
DBUG_EXECUTE_IF(
"innodb_page_corruption_retries",
retries = BUF_PAGE_READ_MAX_RETRIES;
);
} else {
- fprintf(stderr, "InnoDB: Error: Unable"
- " to read tablespace %lu page no"
- " %lu into the buffer pool after"
- " %lu attempts\n"
- "InnoDB: The most probable cause"
- " of this error may be that the"
- " table has been corrupted.\n"
- "InnoDB: You can try to fix this"
- " problem by using"
- " innodb_force_recovery.\n"
- "InnoDB: Please see reference manual"
- " for more details.\n"
- "InnoDB: Aborting...\n",
- space, offset,
- BUF_PAGE_READ_MAX_RETRIES);
+ bool corrupted = true;
- ut_error;
+ if (bpage) {
+ corrupted = buf_page_check_corrupt(bpage);
+ }
+
+ if (corrupted) {
+ fprintf(stderr, "InnoDB: Error: Unable"
+ " to read tablespace %lu page no"
+ " %lu into the buffer pool after"
+ " %lu attempts\n"
+ "InnoDB: The most probable cause"
+ " of this error may be that the"
+ " table has been corrupted.\n"
+ "InnoDB: You can try to fix this"
+ " problem by using"
+ " innodb_force_recovery.\n"
+ "InnoDB: Please see reference manual"
+ " for more details.\n"
+ "InnoDB: Aborting...\n",
+ space, offset,
+ BUF_PAGE_READ_MAX_RETRIES);
+
+ ut_error;
+ } else {
+ ib_mutex_t* pmutex = buf_page_get_mutex(bpage);
+ mutex_enter(&buf_pool->LRU_list_mutex);
+ mutex_enter(pmutex);
+ buf_block_t* block = buf_page_get_block(bpage);
+ buf_page_set_io_fix(bpage, BUF_IO_NONE);
+ buf_block_set_state(block, BUF_BLOCK_NOT_USED);
+ buf_block_set_state(block, BUF_BLOCK_READY_FOR_USE);
+ mutex_exit(&buf_pool->LRU_list_mutex);
+ mutex_exit(pmutex);
+
+ if (err) {
+ *err = DB_DECRYPTION_FAILED;
+ }
+ return (NULL);
+ }
}
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
@@ -3600,6 +3741,11 @@ buf_page_init_low(
bpage->oldest_modification = 0;
bpage->write_size = 0;
bpage->key_version = 0;
+ bpage->stored_checksum = BUF_NO_CHECKSUM_MAGIC;
+ bpage->calculated_checksum = BUF_NO_CHECKSUM_MAGIC;
+ bpage->page_encrypted = false;
+ bpage->page_compressed = false;
+ bpage->encrypted = false;
bpage->real_size = 0;
HASH_INVALIDATE(bpage, hash);
@@ -4269,38 +4415,125 @@ buf_mark_space_corrupt(
/* First unfix and release lock on the bpage */
ut_ad(!mutex_own(&buf_pool->LRU_list_mutex));
- mutex_enter(&buf_pool->LRU_list_mutex);
- rw_lock_x_lock(hash_lock);
- mutex_enter(buf_page_get_mutex(bpage));
- ut_ad(buf_page_get_io_fix(bpage) == BUF_IO_READ);
- ut_ad(bpage->buf_fix_count == 0);
- /* Set BUF_IO_NONE before we remove the block from LRU list */
- buf_page_set_io_fix(bpage, BUF_IO_NONE);
+ if (!bpage->encrypted) {
+ mutex_enter(&buf_pool->LRU_list_mutex);
+ rw_lock_x_lock(hash_lock);
+ mutex_enter(buf_page_get_mutex(bpage));
+ ut_ad(buf_page_get_io_fix(bpage) == BUF_IO_READ);
+ ut_ad(bpage->buf_fix_count == 0);
- if (uncompressed) {
- rw_lock_x_unlock_gen(
- &((buf_block_t*) bpage)->lock,
- BUF_IO_READ);
+ /* Set BUF_IO_NONE before we remove the block from LRU list */
+ buf_page_set_io_fix(bpage, BUF_IO_NONE);
+
+ if (uncompressed) {
+ rw_lock_x_unlock_gen(
+ &((buf_block_t*) bpage)->lock,
+ BUF_IO_READ);
+ }
}
/* Find the table with specified space id, and mark it corrupted */
if (dict_set_corrupted_by_space(space)) {
- buf_LRU_free_one_page(bpage);
+ if (!bpage->encrypted) {
+ buf_LRU_free_one_page(bpage);
+ }
} else {
- mutex_exit(buf_page_get_mutex(bpage));
+ if (!bpage->encrypted) {
+ mutex_exit(buf_page_get_mutex(bpage));
+ }
ret = FALSE;
}
- mutex_exit(&buf_pool->LRU_list_mutex);
-
- ut_ad(buf_pool->n_pend_reads > 0);
- os_atomic_decrement_ulint(&buf_pool->n_pend_reads, 1);
+ if(!bpage->encrypted) {
+ mutex_exit(&buf_pool->LRU_list_mutex);
+ ut_ad(buf_pool->n_pend_reads > 0);
+ os_atomic_decrement_ulint(&buf_pool->n_pend_reads, 1);
+ }
return(ret);
}
/********************************************************************//**
+Check if page is maybe compressed, encrypted or both when we encounter
+corrupted page. Note that we can't be 100% sure if page is corrupted
+or decrypt/decompress just failed.
+*/
+static
+ibool
+buf_page_check_corrupt(
+/*===================*/
+ buf_page_t* bpage) /*!< in/out: buffer page read from disk */
+{
+ ulint zip_size = buf_page_get_zip_size(bpage);
+ byte* dst_frame = (zip_size) ? bpage->zip.data :
+ ((buf_block_t*) bpage)->frame;
+ unsigned key_version = bpage->key_version;
+ bool page_compressed = bpage->page_encrypted;
+ ulint stored_checksum = bpage->stored_checksum;
+ ulint calculated_checksum = bpage->stored_checksum;
+ bool page_compressed_encrypted = bpage->page_compressed;
+ ulint space_id = mach_read_from_4(
+ dst_frame + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID);
+ fil_space_crypt_t* crypt_data = fil_space_get_crypt_data(space_id);
+ fil_space_t* space = fil_space_found_by_id(space_id);
+ bool corrupted = true;
+
+ if (key_version != 0 || page_compressed_encrypted) {
+ bpage->encrypted = true;
+ }
+
+ if (key_version != 0 ||
+ (crypt_data && crypt_data->type != CRYPT_SCHEME_UNENCRYPTED) ||
+ page_compressed || page_compressed_encrypted) {
+
+ /* Page is really corrupted if post encryption stored
+ checksum does not match calculated checksum after page was
+ read. For pages compressed and then encrypted, there is no
+ checksum. */
+ corrupted = (!page_compressed_encrypted && stored_checksum != calculated_checksum);
+
+ if (corrupted) {
+ ib_logf(IB_LOG_LEVEL_ERROR,
+ "%s: Block in space_id %lu in file %s corrupted.",
+ page_compressed_encrypted ? "Maybe corruption" : "Corruption",
+ space_id, space ? space->name : "NULL");
+ ib_logf(IB_LOG_LEVEL_ERROR,
+ "Page based on contents %s encrypted.",
+ (key_version == 0 && page_compressed_encrypted == false) ? "not" : "maybe");
+ if (stored_checksum != BUF_NO_CHECKSUM_MAGIC || calculated_checksum != BUF_NO_CHECKSUM_MAGIC) {
+ ib_logf(IB_LOG_LEVEL_ERROR,
+ "Page stored checksum %lu but calculated checksum %lu.",
+ stored_checksum, calculated_checksum);
+ }
+ ib_logf(IB_LOG_LEVEL_ERROR,
+ "Reason could be that key_version %u in page "
+ "or in crypt_data %p could not be found.",
+ key_version, crypt_data);
+ ib_logf(IB_LOG_LEVEL_ERROR,
+ "Reason could be also that key management plugin is not found or"
+ " used encryption algorithm or method does not match.");
+ ib_logf(IB_LOG_LEVEL_ERROR,
+ "Based on page page compressed %d, compressed and encrypted %d.",
+ page_compressed, page_compressed_encrypted);
+ } else {
+ ib_logf(IB_LOG_LEVEL_ERROR,
+ "Block in space_id %lu in file %s encrypted.",
+ space_id, space ? space->name : "NULL");
+ ib_logf(IB_LOG_LEVEL_ERROR,
+ "However key management plugin or used key_id %u is not found or"
+ " used encryption algorithm or method does not match.",
+ key_version);
+ ib_logf(IB_LOG_LEVEL_ERROR,
+ "Marking tablespace as missing. You may drop this table or"
+ " install correct key management plugin and key file.");
+ }
+ }
+
+ return corrupted;
+}
+
+/********************************************************************//**
Completes an asynchronous read or write request of a file page to or from
the buffer pool.
@return true if successful */
@@ -4418,47 +4651,46 @@ buf_page_io_complete(
;);
corrupt:
- fil_system_enter();
- space = fil_space_get_by_id(bpage->space);
- fil_system_exit();
- fprintf(stderr,
- "InnoDB: Database page corruption on disk"
- " or a failed\n"
- "InnoDB: space %lu file %s read of page %lu.\n"
- "InnoDB: You may have to recover"
- " from a backup.\n",
- (ulint)bpage->space,
- space ? space->name : "NULL",
- (ulong) bpage->offset);
-
- buf_page_print(frame, buf_page_get_zip_size(bpage),
- BUF_PAGE_PRINT_NO_CRASH);
- fprintf(stderr,
- "InnoDB: Database page corruption on disk"
- " or a failed\n"
- "InnoDB: file read of page %lu.\n"
- "InnoDB: You may have to recover"
- " from a backup.\n",
- (ulong) bpage->offset);
- fputs("InnoDB: It is also possible that"
- " your operating\n"
- "InnoDB: system has corrupted its"
- " own file cache\n"
- "InnoDB: and rebooting your computer"
- " removes the\n"
- "InnoDB: error.\n"
- "InnoDB: If the corrupt page is an index page\n"
- "InnoDB: you can also try to"
- " fix the corruption\n"
- "InnoDB: by dumping, dropping,"
- " and reimporting\n"
- "InnoDB: the corrupt table."
- " You can use CHECK\n"
- "InnoDB: TABLE to scan your"
- " table for corruption.\n"
- "InnoDB: See also "
- REFMAN "forcing-innodb-recovery.html\n"
- "InnoDB: about forcing recovery.\n", stderr);
+ bool corrupted = buf_page_check_corrupt(bpage);
+
+ if (corrupted) {
+ fil_system_enter();
+ space = fil_space_get_by_id(bpage->space);
+ fil_system_exit();
+ ib_logf(IB_LOG_LEVEL_ERROR,
+ "Database page corruption on disk"
+ " or a failed");
+ ib_logf(IB_LOG_LEVEL_ERROR,
+ "Space %lu file %s read of page %lu.",
+ (ulint)bpage->space,
+ space ? space->name : "NULL",
+ (ulong) bpage->offset);
+ ib_logf(IB_LOG_LEVEL_ERROR,
+ "You may have to recover"
+ " from a backup.");
+
+
+ buf_page_print(frame, buf_page_get_zip_size(bpage),
+ BUF_PAGE_PRINT_NO_CRASH);
+
+ ib_logf(IB_LOG_LEVEL_ERROR,
+ "It is also possible that your operating"
+ "system has corrupted its own file cache.");
+ ib_logf(IB_LOG_LEVEL_ERROR,
+ "and rebooting your computer removes the error.");
+ ib_logf(IB_LOG_LEVEL_ERROR,
+ "If the corrupt page is an index page you can also try to");
+ ib_logf(IB_LOG_LEVEL_ERROR,
+ "fix the corruption by dumping, dropping, and reimporting");
+ ib_logf(IB_LOG_LEVEL_ERROR,
+ "the corrupt table. You can use CHECK");
+ ib_logf(IB_LOG_LEVEL_ERROR,
+ "TABLE to scan your table for corruption.");
+ ib_logf(IB_LOG_LEVEL_ERROR,
+ "See also "
+ REFMAN "forcing-innodb-recovery.html"
+ " about forcing recovery.");
+ }
if (srv_pass_corrupt_table && bpage->space != 0
&& bpage->space < SRV_LOG_SPACE_FIRST_ID) {
@@ -4476,7 +4708,8 @@ corrupt:
dict_table_set_corrupt_by_space(bpage->space, TRUE);
}
bpage->is_corrupt = TRUE;
- } else
+ }
+
if (srv_force_recovery < SRV_FORCE_IGNORE_CORRUPT) {
/* If page space id is larger than TRX_SYS_SPACE
(0), we will attempt to mark the corresponding
@@ -4485,12 +4718,30 @@ corrupt:
&& buf_mark_space_corrupt(bpage)) {
return(false);
} else {
- fputs("InnoDB: Ending processing"
- " because of"
- " a corrupt database page.\n",
- stderr);
+ corrupted = buf_page_check_corrupt(bpage);
- ut_error;
+ if (corrupted) {
+ ib_logf(IB_LOG_LEVEL_ERROR,
+ "Ending processing because of a corrupt database page.");
+
+ ut_error;
+ }
+
+ ib_push_warning(innobase_get_trx(), DB_DECRYPTION_FAILED,
+ "Table in tablespace %lu encrypted."
+ "However key management plugin or used key_id %lu is not found or"
+ " used encryption algorithm or method does not match."
+ " Can't continue opening the table.",
+ bpage->key_version);
+
+ if (bpage->space > TRX_SYS_SPACE) {
+ if (corrupted) {
+ buf_mark_space_corrupt(bpage);
+ }
+ } else {
+ ut_error;
+ }
+ return(false);
}
}
}
@@ -4668,11 +4919,13 @@ buf_all_freed_instance(
mutex_exit(&buf_pool->LRU_list_mutex);
if (UNIV_LIKELY_NULL(block)) {
- fprintf(stderr,
- "Page %lu %lu still fixed or dirty\n",
- (ulong) block->page.space,
- (ulong) block->page.offset);
- ut_error;
+ if (block->page.key_version == 0) {
+ fprintf(stderr,
+ "Page %lu %lu still fixed or dirty\n",
+ (ulong) block->page.space,
+ (ulong) block->page.offset);
+ ut_error;
+ }
}
}
@@ -5873,30 +6126,31 @@ Encrypts a buffer page right before it's flushed to disk
byte*
buf_page_encrypt_before_write(
/*==========================*/
- buf_page_t* bpage, /*!< in/out: buffer page to be flushed */
- const byte* src_frame, /*!< in: src frame */
- ulint space_id) /*!< in: space id */
+ buf_page_t* bpage, /*!< in/out: buffer page to be flushed */
+ byte* src_frame, /*!< in: src frame */
+ ulint space_id) /*!< in: space id */
{
fil_space_crypt_t* crypt_data = fil_space_get_crypt_data(space_id);
ulint zip_size = buf_page_get_zip_size(bpage);
ulint page_size = (zip_size) ? zip_size : UNIV_PAGE_SIZE;
buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
bool page_compressed = fil_space_is_page_compressed(bpage->space);
- bpage->real_size = UNIV_PAGE_SIZE;
bool encrypted = true;
+ bpage->real_size = UNIV_PAGE_SIZE;
+
fil_page_type_validate(src_frame);
if (bpage->offset == 0) {
/* Page 0 of a tablespace is not encrypted/compressed */
ut_ad(bpage->key_version == 0);
- return const_cast<byte*>(src_frame);
+ return src_frame;
}
if (bpage->space == TRX_SYS_SPACE && bpage->offset == TRX_SYS_PAGE_NO) {
/* don't encrypt/compress page as it contains address to dblwr buffer */
bpage->key_version = 0;
- return const_cast<byte*>(src_frame);
+ return src_frame;
}
if (crypt_data != NULL && crypt_data->encryption == FIL_SPACE_ENCRYPTION_OFF) {
@@ -5918,31 +6172,35 @@ buf_page_encrypt_before_write(
if (!encrypted && !page_compressed) {
/* No need to encrypt or page compress the page */
- return const_cast<byte*>(src_frame);
+ return src_frame;
}
/* Find free slot from temporary memory array */
buf_tmp_buffer_t* slot = buf_pool_reserve_tmp_slot(buf_pool, page_compressed);
+ slot->out_buf = NULL;
bpage->slot = slot;
- byte *dst_frame = bpage->slot->out_buf = slot->crypt_buf;
+ byte *dst_frame = slot->crypt_buf;
if (!page_compressed) {
/* Encrypt page content */
- fil_space_encrypt(bpage->space,
- bpage->offset,
- bpage->newest_modification,
- src_frame,
- zip_size,
- dst_frame);
+ byte* tmp = fil_space_encrypt(bpage->space,
+ bpage->offset,
+ bpage->newest_modification,
+ src_frame,
+ zip_size,
+ dst_frame);
unsigned key_version =
mach_read_from_4(dst_frame + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION);
ut_ad(key_version == 0 || key_version >= bpage->key_version);
bpage->key_version = key_version;
bpage->real_size = page_size;
+ slot->out_buf = dst_frame = tmp;
- fil_page_type_validate(dst_frame);
+#ifdef UNIV_DEBUG
+ fil_page_type_validate(tmp);
+#endif
} else {
/* First we compress the page content */
@@ -5962,22 +6220,27 @@ buf_page_encrypt_before_write(
bpage->real_size = out_len;
+#ifdef UNIV_DEBUG
fil_page_type_validate(tmp);
+#endif
+
if(encrypted) {
/* And then we encrypt the page content */
- fil_space_encrypt(bpage->space,
- bpage->offset,
- bpage->newest_modification,
- tmp,
- zip_size,
- dst_frame);
- } else {
- bpage->slot->out_buf = dst_frame = tmp;
+ tmp = fil_space_encrypt(bpage->space,
+ bpage->offset,
+ bpage->newest_modification,
+ tmp,
+ zip_size,
+ dst_frame);
}
+
+ slot->out_buf = dst_frame = tmp;
}
+#ifdef UNIV_DEBUG
fil_page_type_validate(dst_frame);
+#endif
// return dst_frame which will be written
return dst_frame;
@@ -5989,7 +6252,7 @@ Decrypt page after it has been read from disk
ibool
buf_page_decrypt_after_read(
/*========================*/
- buf_page_t* bpage) /*!< in/out: buffer page read from disk */
+ buf_page_t* bpage) /*!< in/out: buffer page read from disk */
{
ulint zip_size = buf_page_get_zip_size(bpage);
ulint size = (zip_size) ? zip_size : UNIV_PAGE_SIZE;
@@ -6002,6 +6265,11 @@ buf_page_decrypt_after_read(
bool page_compressed_encrypted = fil_page_is_compressed_encrypted(dst_frame);
buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
+ /* If page is encrypted read post-encryption checksum */
+ if (!page_compressed_encrypted && key_version != 0) {
+ bpage->stored_checksum = mach_read_from_4(dst_frame + + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION + 4);
+ }
+
ut_ad(bpage->key_version == 0);
if (bpage->offset == 0) {
@@ -6009,13 +6277,21 @@ buf_page_decrypt_after_read(
return (TRUE);
}
+ /* Store these for corruption check */
+ bpage->key_version = key_version;
+ bpage->page_encrypted = page_compressed_encrypted;
+ bpage->page_compressed = page_compressed;
+
if (page_compressed) {
/* the page we read is unencrypted */
/* Find free slot from temporary memory array */
buf_tmp_buffer_t* slot = buf_pool_reserve_tmp_slot(buf_pool, page_compressed);
+#ifdef UNIV_DEBUG
fil_page_type_validate(dst_frame);
+#endif
+ /* decompress using comp_buf to dst_frame */
fil_decompress_page(slot->comp_buf,
dst_frame,
size,
@@ -6025,24 +6301,34 @@ buf_page_decrypt_after_read(
slot->reserved = false;
key_version = 0;
+#ifdef UNIV_DEBUG
fil_page_type_validate(dst_frame);
+#endif
} else {
buf_tmp_buffer_t* slot = NULL;
if (key_version) {
/* Find free slot from temporary memory array */
slot = buf_pool_reserve_tmp_slot(buf_pool, page_compressed);
- memcpy(slot->crypt_buf, dst_frame, size);
+#ifdef UNIV_DEBUG
fil_page_type_validate(dst_frame);
- fil_page_type_validate(slot->crypt_buf);
- /* decrypt from crypt_buf to dst_frame */
+#endif
+
+ /* Calculate checksum before decrypt, this will be
+ used later to find out if incorrect key was used. */
+ if (!page_compressed_encrypted) {
+ bpage->calculated_checksum = fil_crypt_calculate_checksum(zip_size, dst_frame);
+ }
+
+ /* decrypt using crypt_buf to dst_frame */
fil_space_decrypt(bpage->space,
- slot->crypt_buf,
- size,
- dst_frame);
+ slot->crypt_buf,
+ size,
+ dst_frame);
+#ifdef UNIV_DEBUG
fil_page_type_validate(dst_frame);
- fil_page_type_validate(slot->crypt_buf);
+#endif
}
if (page_compressed_encrypted) {
@@ -6053,13 +6339,16 @@ buf_page_decrypt_after_read(
#ifdef UNIV_DEBUG
fil_page_type_validate(dst_frame);
#endif
+ /* decompress using comp_buf to dst_frame */
fil_decompress_page(slot->comp_buf,
dst_frame,
size,
&bpage->write_size);
}
+#ifdef UNIV_DEBUG
fil_page_type_validate(dst_frame);
+#endif
/* Mark this slot as free */
if (slot) {