diff options
author | Sergei Golubchik <serg@mariadb.org> | 2018-06-21 23:47:39 +0200 |
---|---|---|
committer | Sergei Golubchik <serg@mariadb.org> | 2018-06-21 23:47:39 +0200 |
commit | b942aa34c10ddfa3fe4977ae60afed5cbdc51083 (patch) | |
tree | 0a1a934ca7b728e3ffb81cd2f7c6a858524726d4 /storage | |
parent | fe3f9fa9183ea3d10397b6f7f4d422ae9bba00a4 (diff) | |
parent | c09a8b5b36edb494e2bcc93074c06e26cd9f2b92 (diff) | |
download | mariadb-git-b942aa34c10ddfa3fe4977ae60afed5cbdc51083.tar.gz |
Merge branch '10.1' into 10.2
Diffstat (limited to 'storage')
56 files changed, 987 insertions, 1381 deletions
diff --git a/storage/archive/ha_archive.cc b/storage/archive/ha_archive.cc index 2b6079d4a29..edbb5d358fa 100644 --- a/storage/archive/ha_archive.cc +++ b/storage/archive/ha_archive.cc @@ -1908,7 +1908,7 @@ maria_declare_plugin(archive) &archive_storage_engine, "ARCHIVE", "Brian Aker, MySQL AB", - "Archive storage engine", + "gzip-compresses tables for a low storage footprint", PLUGIN_LICENSE_GPL, archive_db_init, /* Plugin Init */ NULL, /* Plugin Deinit */ diff --git a/storage/csv/ha_tina.cc b/storage/csv/ha_tina.cc index 7e0c61ff634..6659e529e97 100644 --- a/storage/csv/ha_tina.cc +++ b/storage/csv/ha_tina.cc @@ -1788,7 +1788,7 @@ maria_declare_plugin(csv) &csv_storage_engine, "CSV", "Brian Aker, MySQL AB", - "CSV storage engine", + "Stores tables as CSV files", PLUGIN_LICENSE_GPL, tina_init_func, /* Plugin Init */ tina_done_func, /* Plugin Deinit */ diff --git a/storage/federated/ha_federated.cc b/storage/federated/ha_federated.cc index ec05127c475..35a68a80f27 100644 --- a/storage/federated/ha_federated.cc +++ b/storage/federated/ha_federated.cc @@ -3410,7 +3410,7 @@ maria_declare_plugin(federated) &federated_storage_engine, "FEDERATED", "Patrick Galbraith and Brian Aker, MySQL AB", - "Federated MySQL storage engine", + "Allows to access tables on other MariaDB servers", PLUGIN_LICENSE_GPL, federated_db_init, /* Plugin Init */ federated_done, /* Plugin Deinit */ diff --git a/storage/federatedx/ha_federatedx.cc b/storage/federatedx/ha_federatedx.cc index 8f9b499c611..840a5a68885 100644 --- a/storage/federatedx/ha_federatedx.cc +++ b/storage/federatedx/ha_federatedx.cc @@ -3645,7 +3645,7 @@ maria_declare_plugin(federatedx) &federatedx_storage_engine, "FEDERATED", "Patrick Galbraith", - "FederatedX pluggable storage engine", + "Allows to access tables on other MariaDB servers, supports transactions and more", PLUGIN_LICENSE_GPL, federatedx_db_init, /* Plugin Init */ federatedx_done, /* Plugin Deinit */ diff --git a/storage/heap/hp_hash.c b/storage/heap/hp_hash.c index eaf284c2015..54f5d93d0c6 100644 --- a/storage/heap/hp_hash.c +++ b/storage/heap/hp_hash.c @@ -1006,7 +1006,7 @@ void heap_update_auto_increment(HP_INFO *info, const uchar *record) switch (info->s->auto_key_type) { case HA_KEYTYPE_INT8: - s_value= (longlong) *(char*)key; + s_value= (longlong) *(const signed char*) key; break; case HA_KEYTYPE_BINARY: value=(ulonglong) *(uchar*) key; diff --git a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc index 7ac7495e221..198255a30ef 100644 --- a/storage/innobase/handler/ha_innodb.cc +++ b/storage/innobase/handler/ha_innodb.cc @@ -2541,8 +2541,12 @@ innobase_mysql_tmpfile( } } #else +#ifdef F_DUPFD_CLOEXEC + fd2 = fcntl(fd, F_DUPFD_CLOEXEC, 0); +#else fd2 = dup(fd); #endif +#endif if (fd2 < 0) { char errbuf[MYSYS_STRERROR_SIZE]; DBUG_PRINT("error",("Got error %d on dup",fd2)); @@ -8484,13 +8488,12 @@ report_error: error, m_prebuilt->table->flags, m_user_thd); #ifdef WITH_WSREP - if (!error_result && - wsrep_thd_exec_mode(m_user_thd) == LOCAL_STATE && - wsrep_on(m_user_thd) && - !wsrep_consistency_check(m_user_thd) && - !wsrep_thd_ignore_table(m_user_thd)) { - if (wsrep_append_keys(m_user_thd, false, record, NULL)) - { + if (!error_result + && wsrep_on(m_user_thd) + && wsrep_thd_exec_mode(m_user_thd) == LOCAL_STATE + && !wsrep_consistency_check(m_user_thd) + && !wsrep_thd_ignore_table(m_user_thd)) { + if (wsrep_append_keys(m_user_thd, false, record, NULL)) { DBUG_PRINT("wsrep", ("row key failed")); error_result = HA_ERR_INTERNAL_ERROR; goto wsrep_error; @@ -19671,8 +19674,10 @@ wsrep_innobase_kill_one_trx( thd_get_thread_id(thd), victim_trx->id); - WSREP_DEBUG("Aborting query: %s", - (thd && wsrep_thd_query(thd)) ? wsrep_thd_query(thd) : "void"); + WSREP_DEBUG("Aborting query: %s conf %d trx: %lu", + (thd && wsrep_thd_query(thd)) ? wsrep_thd_query(thd) : "void", + wsrep_thd_conflict_state(thd, FALSE), + wsrep_thd_ws_handle(thd)->trx_id); wsrep_thd_LOCK(thd); DBUG_EXECUTE_IF("sync.wsrep_after_BF_victim_lock", @@ -19735,7 +19740,7 @@ wsrep_innobase_kill_one_trx( wsrep_t *wsrep= get_wsrep(); rcode = wsrep->abort_pre_commit( wsrep, bf_seqno, - (wsrep_trx_id_t)victim_trx->id + (wsrep_trx_id_t)wsrep_thd_ws_handle(thd)->trx_id ); switch (rcode) { @@ -19860,12 +19865,14 @@ wsrep_abort_transaction( my_bool signal) { DBUG_ENTER("wsrep_innobase_abort_thd"); - trx_t* victim_trx = thd_to_trx(victim_thd); - trx_t* bf_trx = (bf_thd) ? thd_to_trx(bf_thd) : NULL; + + trx_t* victim_trx = thd_to_trx(victim_thd); + trx_t* bf_trx = (bf_thd) ? thd_to_trx(bf_thd) : NULL; - WSREP_DEBUG("abort transaction: BF: %s victim: %s", - wsrep_thd_query(bf_thd), - wsrep_thd_query(victim_thd)); + WSREP_DEBUG("abort transaction: BF: %s victim: %s victim conf: %d", + wsrep_thd_query(bf_thd), + wsrep_thd_query(victim_thd), + wsrep_thd_conflict_state(victim_thd, FALSE)); if (victim_trx) { lock_mutex_enter(); @@ -19917,8 +19924,8 @@ innobase_wsrep_get_checkpoint( XID* xid) { DBUG_ASSERT(hton == innodb_hton_ptr); - trx_sys_read_wsrep_checkpoint(xid); - return 0; + trx_sys_read_wsrep_checkpoint(xid); + return 0; } static diff --git a/storage/innobase/include/fil0fil.ic b/storage/innobase/include/fil0fil.ic index 9505cc0bd69..023a48a5066 100644 --- a/storage/innobase/include/fil0fil.ic +++ b/storage/innobase/include/fil0fil.ic @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 2015, 2017, MariaDB Corporation. +Copyright (c) 2015, 2018, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software diff --git a/storage/innobase/include/fsp0fsp.h b/storage/innobase/include/fsp0fsp.h index 6697c1f37ed..368f0daa201 100644 --- a/storage/innobase/include/fsp0fsp.h +++ b/storage/innobase/include/fsp0fsp.h @@ -743,7 +743,7 @@ fsp_flags_convert_from_101(ulint flags) /* Bits 13..16 are the wrong position for PAGE_SSIZE, and they should contain one of the values 3,4,6,7, that is, be of the form - 0b0011 or 0b01xx (except 0b0110). + 0b0011 or 0b01xx (except 0b0101). In correct versions, these bits should be 0bc0se where c is the MariaDB COMPRESSED flag and e is the MySQL 5.7 ENCRYPTION flag diff --git a/storage/innobase/os/os0file.cc b/storage/innobase/os/os0file.cc index 613a3cad0e9..568239b79ed 100644 --- a/storage/innobase/os/os0file.cc +++ b/storage/innobase/os/os0file.cc @@ -2700,7 +2700,7 @@ os_file_create_simple_func( bool retry; do { - file = open(name, create_flag, os_innodb_umask); + file = open(name, create_flag | O_CLOEXEC, os_innodb_umask); if (file == -1) { *success = false; @@ -3002,7 +3002,7 @@ os_file_create_func( bool retry; do { - file = open(name, create_flag, os_innodb_umask); + file = open(name, create_flag | O_CLOEXEC, os_innodb_umask); if (file == -1) { const char* operation; @@ -3136,7 +3136,7 @@ os_file_create_simple_no_error_handling_func( return(OS_FILE_CLOSED); } - file = open(name, create_flag, os_innodb_umask); + file = open(name, create_flag | O_CLOEXEC, os_innodb_umask); *success = (file != -1); diff --git a/storage/maria/ma_control_file.c b/storage/maria/ma_control_file.c index 1ccb67d5698..99c934c479d 100644 --- a/storage/maria/ma_control_file.c +++ b/storage/maria/ma_control_file.c @@ -277,7 +277,7 @@ CONTROL_FILE_ERROR ma_control_file_open(my_bool create_if_missing, " file is probably in use by another process"; uint new_cf_create_time_size, new_cf_changeable_size, new_block_size; my_off_t file_size; - int open_flags= O_BINARY | /*O_DIRECT |*/ O_RDWR; + int open_flags= O_BINARY | /*O_DIRECT |*/ O_RDWR | O_CLOEXEC; int error= CONTROL_FILE_UNKNOWN_ERROR; DBUG_ENTER("ma_control_file_open"); diff --git a/storage/maria/ma_key.c b/storage/maria/ma_key.c index 6f3e17ed80d..8010d9aa147 100644 --- a/storage/maria/ma_key.c +++ b/storage/maria/ma_key.c @@ -728,7 +728,7 @@ ulonglong ma_retrieve_auto_increment(const uchar *key, uint8 key_type) switch (key_type) { case HA_KEYTYPE_INT8: - s_value= (longlong) *(const char*)key; + s_value= (longlong) *(const signed char*) key; break; case HA_KEYTYPE_BINARY: value=(ulonglong) *key; diff --git a/storage/maria/ma_loghandler.c b/storage/maria/ma_loghandler.c index e30ddb756a6..9410e5332e3 100644 --- a/storage/maria/ma_loghandler.c +++ b/storage/maria/ma_loghandler.c @@ -78,6 +78,32 @@ typedef union uchar buffer[TRANSLOG_PAGE_SIZE]; } TRANSLOG_PAGE_SIZE_BUFF; +#define MAX_TRUNSLOG_USED_BUFFERS 3 + +typedef struct +{ + struct st_translog_buffer *buff[MAX_TRUNSLOG_USED_BUFFERS]; + uint8 wrt_ptr; + uint8 unlck_ptr; +} TRUNSLOG_USED_BUFFERS; + +static void +used_buffs_init(TRUNSLOG_USED_BUFFERS *buffs) +{ + buffs->unlck_ptr= buffs->wrt_ptr= 0; +} + +static void +used_buffs_add(TRUNSLOG_USED_BUFFERS *buffs, + struct st_translog_buffer *buff); + +static void +used_buffs_register_unlock(TRUNSLOG_USED_BUFFERS *buffs, + struct st_translog_buffer *buff); + +static void +used_buffs_urgent_unlock(TRUNSLOG_USED_BUFFERS *buffs); + /* min chunk length */ #define TRANSLOG_MIN_CHUNK 3 /* @@ -156,7 +182,28 @@ struct st_translog_buffer TRANSLOG_FILE *file; /* Threads which are waiting for buffer filling/freeing */ mysql_cond_t waiting_filling_buffer; - /* Number of records which are in copy progress */ + /* + Number of records which are in copy progress. + + Controlled via translog_buffer_increase_writers() and + translog_buffer_decrease_writers(). + + 1 Simple case: translog_force_current_buffer_to_finish both called in + the same procedure. + + 2 Simple case: translog_write_variable_record_1group: + translog_advance_pointer() increase writer of the buffer and + translog_buffer_decrease_writers() decrease it. + + Usual case: + 1) translog_advance_pointer (i.e. reserve place for future writing) + increase writers for all buffers where place reserved. + Simpliest case: just all space reserved in one buffer + complex case: end of the first buffer, all second buffer, beginning + of the third buffer. + 2) When we finish with writing translog_chaser_page_next() will be + called and unlock the buffer by decreasing number of writers. + */ uint copy_to_buffer_in_progress; /* list of waiting buffer ready threads */ struct st_my_thread_var *waiting_flush; @@ -214,6 +261,7 @@ struct st_translog_buffer struct st_buffer_cursor { + TRUNSLOG_USED_BUFFERS buffs; /* pointer into the buffer */ uchar *ptr; /* current buffer */ @@ -935,7 +983,7 @@ static File create_logfile_by_number_no_cache(uint32 file_no) /* TODO: add O_DIRECT to open flags (when buffer is aligned) */ if ((file= mysql_file_create(key_file_translog, translog_filename_by_fileno(file_no, path), - 0, O_BINARY | O_RDWR, MYF(MY_WME))) < 0) + 0, O_BINARY | O_RDWR | O_CLOEXEC, MYF(MY_WME))) < 0) { DBUG_PRINT("error", ("Error %d during creating file '%s'", errno, path)); translog_stop_writing(); @@ -973,7 +1021,7 @@ static File open_logfile_by_number_no_cache(uint32 file_no) /* TODO: use mysql_file_create() */ if ((file= mysql_file_open(key_file_translog, translog_filename_by_fileno(file_no, path), - log_descriptor.open_flags, + log_descriptor.open_flags | O_CLOEXEC, MYF(MY_WME))) < 0) { DBUG_PRINT("error", ("Error %d during opening file '%s'", errno, path)); @@ -1648,15 +1696,12 @@ static my_bool translog_create_new_file() DBUG_PRINT("info", ("file_no: %lu", (ulong)file_no)); if (translog_write_file_header()) - DBUG_RETURN(1); + goto error; if (ma_control_file_write_and_force(last_checkpoint_lsn, file_no, max_trid_in_control_file, recovery_failures)) - { - translog_stop_writing(); - DBUG_RETURN(1); - } + goto error; DBUG_RETURN(0); @@ -1697,10 +1742,6 @@ static void translog_buffer_lock(struct st_translog_buffer *buffer) SYNOPSIS translog_buffer_unlock() buffer This buffer which should be unlocked - - RETURN - 0 OK - 1 Error */ static void translog_buffer_unlock(struct st_translog_buffer *buffer) @@ -1894,7 +1935,10 @@ static void translog_finish_page(TRANSLOG_ADDRESS *horizon, (uint) cursor->buffer->size, (uint) (cursor->ptr -cursor->buffer->buffer), (uint) cursor->current_page_fill, (uint) left)); - DBUG_ASSERT(LSN_FILE_NO(*horizon) == LSN_FILE_NO(cursor->buffer->offset)); + DBUG_ASSERT(LSN_FILE_NO(*horizon) == LSN_FILE_NO(cursor->buffer->offset) + || translog_status == TRANSLOG_UNINITED); + if ((LSN_FILE_NO(*horizon) != LSN_FILE_NO(cursor->buffer->offset))) + DBUG_VOID_RETURN; // everything wrong do not write to awoid more problems translog_check_cursor(cursor); if (cursor->protected) { @@ -3243,7 +3287,7 @@ static my_bool translog_get_last_page_addr(TRANSLOG_ADDRESS *addr, File fd; if ((fd= mysql_file_open(key_file_translog, translog_filename_by_fileno(file_no, path), - O_RDONLY, (no_errors ? MYF(0) : MYF(MY_WME)))) < 0) + O_RDONLY | O_CLOEXEC, (no_errors ? MYF(0) : MYF(MY_WME)))) < 0) { my_errno= errno; DBUG_PRINT("error", ("Error %d during opening file #%d", @@ -4588,6 +4632,7 @@ static my_bool translog_chaser_page_next(TRANSLOG_ADDRESS *horizon, { translog_buffer_lock(buffer_to_flush); translog_buffer_decrease_writers(buffer_to_flush); + used_buffs_register_unlock(&cursor->buffs, buffer_to_flush); if (!rc) rc= translog_buffer_flush(buffer_to_flush); translog_buffer_unlock(buffer_to_flush); @@ -4692,7 +4737,8 @@ translog_write_variable_record_chunk3_page(struct st_translog_parts *parts, 1 Error */ -static my_bool translog_advance_pointer(int pages, uint16 last_page_data) +static my_bool translog_advance_pointer(int pages, uint16 last_page_data, + TRUNSLOG_USED_BUFFERS *buffs) { translog_size_t last_page_offset= (log_descriptor.page_overhead + last_page_data); @@ -4709,6 +4755,8 @@ static my_bool translog_advance_pointer(int pages, uint16 last_page_data) (uint) last_page_data)); translog_lock_assert_owner(); + used_buffs_init(buffs); + if (pages == -1) { /* @@ -4786,8 +4834,10 @@ static my_bool translog_advance_pointer(int pages, uint16 last_page_data) translog_wait_for_buffer_free(new_buffer); #ifndef DBUG_OFF /* We keep the handler locked so nobody can start this new buffer */ - DBUG_ASSERT(offset == new_buffer->offset && new_buffer->file == NULL && - (file == NULL ? ver : (uint8)(ver + 1)) == new_buffer->ver); + DBUG_ASSERT((offset == new_buffer->offset && new_buffer->file == NULL && + (file == NULL ? ver : (uint8)(ver + 1)) == + new_buffer->ver) || + translog_status == TRANSLOG_READONLY); } #endif @@ -4808,6 +4858,8 @@ static my_bool translog_advance_pointer(int pages, uint16 last_page_data) DBUG_ASSERT(log_descriptor.bc.buffer->buffer_no == log_descriptor.bc.buffer_no); translog_buffer_increase_writers(log_descriptor.bc.buffer); + // register for case of error + used_buffs_add(buffs, log_descriptor.bc.buffer); if (file_end_offset <= buffer_end_offset) { @@ -4818,6 +4870,10 @@ static my_bool translog_advance_pointer(int pages, uint16 last_page_data) (ulong) LSN_FILE_NO(log_descriptor.horizon))); if (translog_create_new_file()) { + struct st_translog_buffer *ob= log_descriptor.bc.buffer; + translog_buffer_unlock(ob); + used_buffs_urgent_unlock(buffs); + translog_buffer_lock(ob); DBUG_RETURN(1); } } @@ -4839,6 +4895,7 @@ end: log_descriptor.bc.ptr+= offset; log_descriptor.bc.buffer->size+= offset; translog_buffer_increase_writers(log_descriptor.bc.buffer); + used_buffs_add(buffs, log_descriptor.bc.buffer); log_descriptor.horizon+= offset; /* offset increasing */ log_descriptor.bc.current_page_fill= last_page_offset; DBUG_PRINT("info", ("NewP buffer #%u: %p chaser: %d Size: %lu (%lu) " @@ -4859,6 +4916,56 @@ end: DBUG_RETURN(0); } +static void +used_buffs_add(TRUNSLOG_USED_BUFFERS *buffs, + struct st_translog_buffer *buff) +{ + DBUG_ENTER("used_buffs_add"); + DBUG_PRINT("enter", ("ADD buffs: %p unlk %u (%p) wrt_ptr: %u (%p)" + " buff %p (%u)", + buffs, + buffs->wrt_ptr, buffs->buff[buffs->wrt_ptr], + buffs->unlck_ptr, buffs->buff[buffs->unlck_ptr], + buff, buff->buffer_no)); + DBUG_ASSERT(buffs->wrt_ptr < MAX_TRUNSLOG_USED_BUFFERS); + buffs->buff[buffs->wrt_ptr++]= buff; + DBUG_VOID_RETURN; +} + +static void +used_buffs_register_unlock(TRUNSLOG_USED_BUFFERS *buffs, + struct st_translog_buffer *buff + __attribute__((unused)) ) +{ + DBUG_ENTER("used_buffs_register_unlock"); + DBUG_PRINT("enter", ("SUB buffs: %p unlk %u (%p) wrt_ptr: %u (%p)" + " buff %p (%u)", + buffs, + buffs->wrt_ptr, buffs->buff[buffs->wrt_ptr], + buffs->unlck_ptr, buffs->buff[buffs->unlck_ptr], + buff, buff->buffer_no)); + DBUG_ASSERT(buffs->buff[buffs->unlck_ptr] == buff); + buffs->unlck_ptr++; + DBUG_VOID_RETURN; +} +static void used_buffs_urgent_unlock(TRUNSLOG_USED_BUFFERS *buffs) +{ + uint i; + DBUG_ENTER("used_buffs_urgent_unlock"); + translog_lock(); + translog_stop_writing(); + translog_unlock(); + for (i= buffs->unlck_ptr; i < buffs->wrt_ptr; i++) + { + struct st_translog_buffer *buf= buffs->buff[i]; + translog_buffer_lock(buf); + translog_buffer_decrease_writers(buf); + translog_buffer_unlock(buf); + buffs->buff[i]= NULL; + } + used_buffs_init(buffs); + DBUG_VOID_RETURN; +} /* Get page rest @@ -4997,6 +5104,11 @@ translog_write_variable_record_1group(LSN *lsn, lsn, hook_arg))) { translog_unlock(); + if (buffer_to_flush != NULL) + { + translog_buffer_flush(buffer_to_flush); + translog_buffer_unlock(buffer_to_flush); + } DBUG_RETURN(1); } cursor= log_descriptor.bc; @@ -5027,8 +5139,9 @@ translog_write_variable_record_1group(LSN *lsn, (log_descriptor.page_capacity_chunk_2 - 1), record_rest, parts->record_length)); /* record_rest + 3 is chunk type 3 overhead + record_rest */ - rc|= translog_advance_pointer((int)(full_pages + additional_chunk3_page), - (record_rest ? record_rest + 3 : 0)); + rc= translog_advance_pointer((int)(full_pages + additional_chunk3_page), + (record_rest ? record_rest + 3 : 0), + &cursor.buffs); log_descriptor.bc.buffer->last_lsn= *lsn; DBUG_PRINT("info", ("last_lsn set to " LSN_FMT " buffer: %p", LSN_IN_PARTS(log_descriptor.bc.buffer->last_lsn), @@ -5047,7 +5160,11 @@ translog_write_variable_record_1group(LSN *lsn, translog_buffer_unlock(buffer_to_flush); } if (rc) + { + //translog_advance_pointer decreased writers so it is OK + DBUG_ASSERT(cursor.buffs.unlck_ptr == cursor.buffs.wrt_ptr); DBUG_RETURN(1); + } translog_write_variable_record_1group_header(parts, type, short_trid, header_length, chunk0_header); @@ -5062,7 +5179,7 @@ translog_write_variable_record_1group(LSN *lsn, for (i= 0; i < full_pages; i++) { if (translog_write_variable_record_chunk2_page(parts, &horizon, &cursor)) - DBUG_RETURN(1); + goto error; DBUG_PRINT("info", ("absolute horizon: " LSN_FMT " local: " LSN_FMT, LSN_IN_PARTS(log_descriptor.horizon), @@ -5075,7 +5192,7 @@ translog_write_variable_record_1group(LSN *lsn, log_descriptor. page_capacity_chunk_2 - 2, &horizon, &cursor)) - DBUG_RETURN(1); + goto error; DBUG_PRINT("info", ("absolute horizon: " LSN_FMT " local: " LSN_FMT, LSN_IN_PARTS(log_descriptor.horizon), LSN_IN_PARTS(horizon))); @@ -5085,17 +5202,22 @@ translog_write_variable_record_1group(LSN *lsn, if (translog_write_variable_record_chunk3_page(parts, record_rest, &horizon, &cursor)) - DBUG_RETURN(1); - DBUG_PRINT("info", ("absolute horizon: " LSN_FMT " local: " LSN_FMT, - (uint) LSN_FILE_NO(log_descriptor.horizon), - (uint) LSN_OFFSET(log_descriptor.horizon), - (uint) LSN_FILE_NO(horizon), - (uint) LSN_OFFSET(horizon))); + goto error; + DBUG_PRINT("info", ("absolute horizon: " LSN_FMT " local: " LSN_FMT, + (uint) LSN_FILE_NO(log_descriptor.horizon), + (uint) LSN_OFFSET(log_descriptor.horizon), + (uint) LSN_FILE_NO(horizon), + (uint) LSN_OFFSET(horizon))); translog_buffer_lock(cursor.buffer); translog_buffer_decrease_writers(cursor.buffer); + used_buffs_register_unlock(&cursor.buffs, cursor.buffer); translog_buffer_unlock(cursor.buffer); - DBUG_RETURN(rc); + DBUG_ASSERT(cursor.buffs.unlck_ptr == cursor.buffs.wrt_ptr); + DBUG_RETURN(0); +error: + used_buffs_urgent_unlock(&cursor.buffs); + DBUG_RETURN(1); } @@ -5149,7 +5271,8 @@ translog_write_variable_record_1chunk(LSN *lsn, lsn, hook_arg))) { translog_unlock(); - DBUG_RETURN(1); + rc= 1; + goto err; } rc= translog_write_parts_on_page(&log_descriptor.horizon, @@ -5165,6 +5288,7 @@ translog_write_variable_record_1chunk(LSN *lsn, check if we switched buffer and need process it (current buffer is unlocked already => we will not delay other threads */ +err: if (buffer_to_flush != NULL) { if (!rc) @@ -5504,9 +5628,11 @@ translog_write_variable_record_mgroup(LSN *lsn, uint file_of_the_first_group; int pages_to_skip; struct st_translog_buffer *buffer_of_last_lsn; + my_bool external_buffer_to_flush= TRUE; DBUG_ENTER("translog_write_variable_record_mgroup"); translog_lock_assert_owner(); + used_buffs_init(&cursor.buffs); chunk2_header[0]= TRANSLOG_CHUNK_NOHDR; if (my_init_dynamic_array(&groups, @@ -5514,6 +5640,11 @@ translog_write_variable_record_mgroup(LSN *lsn, 10, 10, MYF(0))) { translog_unlock(); + if (buffer_to_flush != NULL) + { + translog_buffer_flush(buffer_to_flush); + translog_buffer_unlock(buffer_to_flush); + } DBUG_PRINT("error", ("init array failed")); DBUG_RETURN(1); } @@ -5540,6 +5671,7 @@ translog_write_variable_record_mgroup(LSN *lsn, translog_mark_file_unfinished(file_of_the_first_group); do { + DBUG_ASSERT(cursor.buffs.unlck_ptr == cursor.buffs.wrt_ptr); group.addr= horizon= log_descriptor.horizon; cursor= log_descriptor.bc; cursor.chaser= 1; @@ -5572,21 +5704,26 @@ translog_write_variable_record_mgroup(LSN *lsn, (ulong)(parts->record_length - (first_page - 1 + buffer_rest) - done))); - rc|= translog_advance_pointer((int)full_pages, 0); + rc= translog_advance_pointer((int)full_pages, 0, &cursor.buffs); translog_unlock(); if (buffer_to_flush != NULL) { - translog_buffer_decrease_writers(buffer_to_flush); + if (!external_buffer_to_flush) + translog_buffer_decrease_writers(buffer_to_flush); if (!rc) rc= translog_buffer_flush(buffer_to_flush); translog_buffer_unlock(buffer_to_flush); buffer_to_flush= NULL; } + external_buffer_to_flush= FALSE; + if (rc) { DBUG_PRINT("error", ("flush of unlock buffer failed")); + //translog_advance_pointer decreased writers so it is OK + DBUG_ASSERT(cursor.buffs.unlck_ptr == cursor.buffs.wrt_ptr); goto err; } @@ -5623,6 +5760,7 @@ translog_write_variable_record_mgroup(LSN *lsn, } translog_buffer_lock(cursor.buffer); translog_buffer_decrease_writers(cursor.buffer); + used_buffs_register_unlock(&cursor.buffs, cursor.buffer); translog_buffer_unlock(cursor.buffer); translog_lock(); @@ -5637,6 +5775,11 @@ translog_write_variable_record_mgroup(LSN *lsn, first_page= translog_get_current_page_rest(); } buffer_rest= translog_get_current_group_size(); + + if (buffer_to_flush) + used_buffs_register_unlock(&cursor.buffs, + buffer_to_flush); // will be unlocked + } while ((translog_size_t)(first_page + buffer_rest) < (translog_size_t)(parts->record_length - done)); @@ -5732,17 +5875,21 @@ translog_write_variable_record_mgroup(LSN *lsn, (ulong) full_pages * log_descriptor.page_capacity_chunk_2, chunk3_pages, (uint) chunk3_size, (uint) record_rest)); + + DBUG_ASSERT(cursor.buffs.unlck_ptr == cursor.buffs.wrt_ptr); rc= translog_advance_pointer(pages_to_skip + (int)(chunk0_pages - 1), record_rest + header_fixed_part + (groups.elements - ((page_capacity - header_fixed_part) / (7 + 1)) * - (chunk0_pages - 1)) * (7 + 1)); + (chunk0_pages - 1)) * (7 + 1), + &cursor.buffs); buffer_of_last_lsn= log_descriptor.bc.buffer; translog_unlock(); if (buffer_to_flush != NULL) { + DBUG_ASSERT(!external_buffer_to_flush); translog_buffer_decrease_writers(buffer_to_flush); if (!rc) rc= translog_buffer_flush(buffer_to_flush); @@ -5906,8 +6053,10 @@ translog_write_variable_record_mgroup(LSN *lsn, } while (chunk0_pages != 0); translog_buffer_lock(cursor.buffer); translog_buffer_decrease_writers(cursor.buffer); + used_buffs_register_unlock(&cursor.buffs, cursor.buffer); translog_buffer_unlock(cursor.buffer); rc= 0; + DBUG_ASSERT(cursor.buffs.unlck_ptr == cursor.buffs.wrt_ptr); if (translog_set_lsn_for_files(file_of_the_first_group, LSN_FILE_NO(*lsn), *lsn, FALSE)) @@ -5916,17 +6065,22 @@ translog_write_variable_record_mgroup(LSN *lsn, translog_mark_file_finished(file_of_the_first_group); delete_dynamic(&groups); - DBUG_RETURN(rc); + DBUG_RETURN(0); err_unlock: translog_unlock(); err: + + if (cursor.buffs.unlck_ptr != cursor.buffs.wrt_ptr) + used_buffs_urgent_unlock(&cursor.buffs); + if (buffer_to_flush != NULL) { /* This is to prevent locking buffer forever in case of error */ - translog_buffer_decrease_writers(buffer_to_flush); + if (!external_buffer_to_flush) + translog_buffer_decrease_writers(buffer_to_flush); if (!rc) rc= translog_buffer_flush(buffer_to_flush); translog_buffer_unlock(buffer_to_flush); @@ -7480,7 +7634,8 @@ static void translog_force_current_buffer_to_finish() DBUG_ASSERT(log_descriptor.bc.ptr !=NULL); DBUG_ASSERT(LSN_FILE_NO(log_descriptor.horizon) == - LSN_FILE_NO(old_buffer->offset)); + LSN_FILE_NO(old_buffer->offset) || + translog_status == TRANSLOG_READONLY ); translog_check_cursor(&log_descriptor.bc); DBUG_ASSERT(left < TRANSLOG_PAGE_SIZE); if (left) diff --git a/storage/maria/ma_open.c b/storage/maria/ma_open.c index d05e2313891..a781b7e00d3 100644 --- a/storage/maria/ma_open.c +++ b/storage/maria/ma_open.c @@ -334,13 +334,13 @@ MARIA_HA *maria_open(const char *name, int mode, uint open_flags) }); DEBUG_SYNC_C("mi_open_kfile"); if ((kfile=mysql_file_open(key_file_kfile, name_buff, - (open_mode=O_RDWR) | O_SHARE | O_NOFOLLOW, + (open_mode=O_RDWR) | O_SHARE | O_NOFOLLOW | O_CLOEXEC, MYF(MY_NOSYMLINKS))) < 0) { if ((errno != EROFS && errno != EACCES) || mode != O_RDONLY || (kfile=mysql_file_open(key_file_kfile, name_buff, - (open_mode=O_RDONLY) | O_SHARE | O_NOFOLLOW, + (open_mode=O_RDONLY) | O_SHARE | O_NOFOLLOW | O_CLOEXEC, MYF(MY_NOSYMLINKS))) < 0) goto err; } @@ -1949,7 +1949,7 @@ int _ma_open_datafile(MARIA_HA *info, MARIA_SHARE *share) DEBUG_SYNC_C("mi_open_datafile"); info->dfile.file= share->bitmap.file.file= mysql_file_open(key_file_dfile, share->data_file_name.str, - share->mode | O_SHARE, MYF(flags)); + share->mode | O_SHARE | O_CLOEXEC, MYF(flags)); return info->dfile.file >= 0 ? 0 : 1; } @@ -1963,7 +1963,7 @@ int _ma_open_keyfile(MARIA_SHARE *share) mysql_mutex_lock(&share->intern_lock); share->kfile.file= mysql_file_open(key_file_kfile, share->unique_file_name.str, - share->mode | O_SHARE | O_NOFOLLOW, + share->mode | O_SHARE | O_NOFOLLOW | O_CLOEXEC, MYF(MY_WME | MY_NOSYMLINKS)); mysql_mutex_unlock(&share->intern_lock); return (share->kfile.file < 0); diff --git a/storage/myisam/ha_myisam.cc b/storage/myisam/ha_myisam.cc index a70f464ef4a..622e5c66d4f 100644 --- a/storage/myisam/ha_myisam.cc +++ b/storage/myisam/ha_myisam.cc @@ -2568,7 +2568,7 @@ maria_declare_plugin(myisam) &myisam_storage_engine, "MyISAM", "MySQL AB", - "MyISAM storage engine", + "Non-transactional engine with good performance and small data footprint", PLUGIN_LICENSE_GPL, myisam_init, /* Plugin Init */ NULL, /* Plugin Deinit */ diff --git a/storage/myisam/mi_key.c b/storage/myisam/mi_key.c index 9a2526ad2cf..18ecc9e8ba3 100644 --- a/storage/myisam/mi_key.c +++ b/storage/myisam/mi_key.c @@ -553,7 +553,7 @@ ulonglong retrieve_auto_increment(MI_INFO *info,const uchar *record) switch (keyseg->type) { case HA_KEYTYPE_INT8: - s_value= (longlong) *(char*)key; + s_value= (longlong) *(const signed char*) key; break; case HA_KEYTYPE_BINARY: value=(ulonglong) *(uchar*) key; diff --git a/storage/myisam/mi_open.c b/storage/myisam/mi_open.c index 41b0e18da02..85fb270869b 100644 --- a/storage/myisam/mi_open.c +++ b/storage/myisam/mi_open.c @@ -139,13 +139,13 @@ MI_INFO *mi_open(const char *name, int mode, uint open_flags) DEBUG_SYNC_C("mi_open_kfile"); if ((kfile= mysql_file_open(mi_key_file_kfile, name_buff, - (open_mode= O_RDWR) | O_SHARE | O_NOFOLLOW, + (open_mode= O_RDWR) | O_SHARE | O_NOFOLLOW | O_CLOEXEC, MYF(MY_NOSYMLINKS))) < 0) { if ((errno != EROFS && errno != EACCES) || mode != O_RDONLY || (kfile= mysql_file_open(mi_key_file_kfile, name_buff, - (open_mode= O_RDONLY) | O_SHARE| O_NOFOLLOW, + (open_mode= O_RDONLY) | O_SHARE| O_NOFOLLOW | O_CLOEXEC, MYF(MY_NOSYMLINKS))) < 0) goto err; } @@ -1270,7 +1270,7 @@ int mi_open_datafile(MI_INFO *info, MYISAM_SHARE *share) myf flags= MY_WME | (share->mode & O_NOFOLLOW ? MY_NOSYMLINKS: 0); DEBUG_SYNC_C("mi_open_datafile"); info->dfile= mysql_file_open(mi_key_file_dfile, share->data_file_name, - share->mode | O_SHARE, MYF(flags)); + share->mode | O_SHARE | O_CLOEXEC, MYF(flags)); return info->dfile >= 0 ? 0 : 1; } @@ -1279,7 +1279,7 @@ int mi_open_keyfile(MYISAM_SHARE *share) { if ((share->kfile= mysql_file_open(mi_key_file_kfile, share->unique_file_name, - share->mode | O_SHARE | O_NOFOLLOW, + share->mode | O_SHARE | O_NOFOLLOW | O_CLOEXEC, MYF(MY_NOSYMLINKS | MY_WME))) < 0) return 1; return 0; diff --git a/storage/tokudb/CMakeLists.txt b/storage/tokudb/CMakeLists.txt index 22c2cd0d490..8facf6c3f10 100644 --- a/storage/tokudb/CMakeLists.txt +++ b/storage/tokudb/CMakeLists.txt @@ -75,6 +75,7 @@ IF(INSTALL_SYSCONFDIR) COMPONENT tokudb-engine) ENDIF() +MY_CHECK_AND_SET_COMPILER_FLAG("-Wno-shadow") MY_CHECK_AND_SET_COMPILER_FLAG("-Wno-vla" DEBUG) MY_CHECK_AND_SET_COMPILER_FLAG("-Wno-implicit-fallthrough") diff --git a/storage/tokudb/PerconaFT/cmake_modules/TokuSetupCompiler.cmake b/storage/tokudb/PerconaFT/cmake_modules/TokuSetupCompiler.cmake index 385723aebc7..50d35ee4906 100644 --- a/storage/tokudb/PerconaFT/cmake_modules/TokuSetupCompiler.cmake +++ b/storage/tokudb/PerconaFT/cmake_modules/TokuSetupCompiler.cmake @@ -149,7 +149,7 @@ set_cflags_if_supported( -Wmissing-prototypes -Wmissing-declarations -Wpointer-arith - -Wshadow + #-Wshadow will fail with GCC-8 ${OPTIONAL_CFLAGS} ## other flags to try: #-Wunsafe-loop-optimizations diff --git a/storage/tokudb/PerconaFT/ft/ft-ops.cc b/storage/tokudb/PerconaFT/ft/ft-ops.cc index a98768158dd..aefd6f0ec22 100644 --- a/storage/tokudb/PerconaFT/ft/ft-ops.cc +++ b/storage/tokudb/PerconaFT/ft/ft-ops.cc @@ -822,22 +822,22 @@ int toku_ftnode_fetch_callback(CACHEFILE UU(cachefile), fprintf( stderr, "%s:%d:toku_ftnode_fetch_callback - " - "file[%s], blocknum[%ld], toku_deserialize_ftnode_from " + "file[%s], blocknum[%lld], toku_deserialize_ftnode_from " "failed with a checksum error.\n", __FILE__, __LINE__, toku_cachefile_fname_in_env(cachefile), - blocknum.b); + (longlong)blocknum.b); } else { fprintf( stderr, "%s:%d:toku_ftnode_fetch_callback - " - "file[%s], blocknum[%ld], toku_deserialize_ftnode_from " + "file[%s], blocknum[%lld], toku_deserialize_ftnode_from " "failed with %d.\n", __FILE__, __LINE__, toku_cachefile_fname_in_env(cachefile), - blocknum.b, + (longlong)blocknum.b, r); } // make absolutely sure we crash before doing anything else. diff --git a/storage/tokudb/PerconaFT/ft/serialize/ft-serialize.cc b/storage/tokudb/PerconaFT/ft/serialize/ft-serialize.cc index b24d72a5dff..0d6573972d7 100644 --- a/storage/tokudb/PerconaFT/ft/serialize/ft-serialize.cc +++ b/storage/tokudb/PerconaFT/ft/serialize/ft-serialize.cc @@ -656,20 +656,20 @@ exit: fprintf(stderr, \ "%s:%d toku_deserialize_ft_from: " \ "filename[%s] " \ - "r[%d] max_acceptable_lsn[%lu]" \ - "r0[%d] checkpoint_lsn_0[%lu] checkpoint_count_0[%lu] " \ - "r1[%d] checkpoint_lsn_1[%lu] checkpoint_count_1[%lu]\n", \ + "r[%d] max_acceptable_lsn[%llu]" \ + "r0[%d] checkpoint_lsn_0[%llu] checkpoint_count_0[%llu] " \ + "r1[%d] checkpoint_lsn_1[%llu] checkpoint_count_1[%llu]\n", \ __FILE__, \ __LINE__, \ fn, \ r, \ - max_acceptable_lsn.lsn, \ + (ulonglong)max_acceptable_lsn.lsn, \ r0, \ - checkpoint_lsn_0.lsn, \ - checkpoint_count_0, \ + (ulonglong)checkpoint_lsn_0.lsn, \ + (ulonglong)checkpoint_count_0, \ r1, \ - checkpoint_lsn_1.lsn, \ - checkpoint_count_1); + (ulonglong)checkpoint_lsn_1.lsn, \ + (ulonglong)checkpoint_count_1); int toku_deserialize_ft_from(int fd, const char *fn, diff --git a/storage/tokudb/PerconaFT/ft/serialize/ft_node-serialize.cc b/storage/tokudb/PerconaFT/ft/serialize/ft_node-serialize.cc index 22a562ae24c..f3b31eb31be 100644 --- a/storage/tokudb/PerconaFT/ft/serialize/ft_node-serialize.cc +++ b/storage/tokudb/PerconaFT/ft/serialize/ft_node-serialize.cc @@ -1170,11 +1170,11 @@ int verify_ftnode_sub_block(struct sub_block *sb, fprintf( stderr, "%s:%d:verify_ftnode_sub_block - " - "file[%s], blocknum[%ld], stored_xsum[%u] != actual_xsum[%u]\n", + "file[%s], blocknum[%lld], stored_xsum[%u] != actual_xsum[%u]\n", __FILE__, __LINE__, fname ? fname : "unknown", - blocknum.b, + (longlong)blocknum.b, stored_xsum, actual_xsum); dump_bad_block((Bytef *) sb->uncompressed_ptr, sb->uncompressed_size); @@ -1197,11 +1197,11 @@ static int deserialize_ftnode_info(struct sub_block *sb, FTNODE node) { fprintf( stderr, "%s:%d:deserialize_ftnode_info - " - "file[%s], blocknum[%ld], verify_ftnode_sub_block failed with %d\n", + "file[%s], blocknum[%lld], verify_ftnode_sub_block failed with %d\n", __FILE__, __LINE__, fname ? fname : "unknown", - node->blocknum.b, + (longlong)node->blocknum.b, r); dump_bad_block(static_cast<unsigned char *>(sb->uncompressed_ptr), sb->uncompressed_size); @@ -1253,11 +1253,11 @@ static int deserialize_ftnode_info(struct sub_block *sb, FTNODE node) { fprintf( stderr, "%s:%d:deserialize_ftnode_info - " - "file[%s], blocknum[%ld], data_size[%d] != rb.ndone[%d]\n", + "file[%s], blocknum[%lld], data_size[%d] != rb.ndone[%d]\n", __FILE__, __LINE__, fname ? fname : "unknown", - node->blocknum.b, + (longlong)node->blocknum.b, data_size, rb.ndone); dump_bad_block(rb.buf, rb.size); @@ -1388,12 +1388,12 @@ static int deserialize_ftnode_partition( if (r != 0) { fprintf(stderr, "%s:%d:deserialize_ftnode_partition - " - "file[%s], blocknum[%ld], " + "file[%s], blocknum[%lld], " "verify_ftnode_sub_block failed with %d\n", __FILE__, __LINE__, fname ? fname : "unknown", - node->blocknum.b, + (longlong)node->blocknum.b, r); goto exit; } @@ -1410,12 +1410,12 @@ static int deserialize_ftnode_partition( if (ch != FTNODE_PARTITION_MSG_BUFFER) { fprintf(stderr, "%s:%d:deserialize_ftnode_partition - " - "file[%s], blocknum[%ld], ch[%d] != " + "file[%s], blocknum[%lld], ch[%d] != " "FTNODE_PARTITION_MSG_BUFFER[%d]\n", __FILE__, __LINE__, fname ? fname : "unknown", - node->blocknum.b, + (longlong)node->blocknum.b, ch, FTNODE_PARTITION_MSG_BUFFER); dump_bad_block(rb.buf, rb.size); @@ -1433,12 +1433,12 @@ static int deserialize_ftnode_partition( if (ch != FTNODE_PARTITION_DMT_LEAVES) { fprintf(stderr, "%s:%d:deserialize_ftnode_partition - " - "file[%s], blocknum[%ld], ch[%d] != " + "file[%s], blocknum[%lld], ch[%d] != " "FTNODE_PARTITION_DMT_LEAVES[%d]\n", __FILE__, __LINE__, fname ? fname : "unknown", - node->blocknum.b, + (longlong)node->blocknum.b, ch, FTNODE_PARTITION_DMT_LEAVES); dump_bad_block(rb.buf, rb.size); @@ -1457,11 +1457,11 @@ static int deserialize_ftnode_partition( if (rb.ndone != rb.size) { fprintf(stderr, "%s:%d:deserialize_ftnode_partition - " - "file[%s], blocknum[%ld], rb.ndone[%d] != rb.size[%d]\n", + "file[%s], blocknum[%lld], rb.ndone[%d] != rb.size[%d]\n", __FILE__, __LINE__, fname ? fname : "unknown", - node->blocknum.b, + (longlong)node->blocknum.b, rb.ndone, rb.size); dump_bad_block(rb.buf, rb.size); @@ -1485,12 +1485,12 @@ static int decompress_and_deserialize_worker(struct rbuf curr_rbuf, const char *fname = toku_ftnode_get_cachefile_fname_in_env(node); fprintf(stderr, "%s:%d:decompress_and_deserialize_worker - " - "file[%s], blocknum[%ld], read_and_decompress_sub_block failed " + "file[%s], blocknum[%lld], read_and_decompress_sub_block failed " "with %d\n", __FILE__, __LINE__, fname ? fname : "unknown", - node->blocknum.b, + (longlong)node->blocknum.b, r); dump_bad_block(curr_rbuf.buf, curr_rbuf.size); goto exit; @@ -1502,12 +1502,12 @@ static int decompress_and_deserialize_worker(struct rbuf curr_rbuf, const char *fname = toku_ftnode_get_cachefile_fname_in_env(node); fprintf(stderr, "%s:%d:decompress_and_deserialize_worker - " - "file[%s], blocknum[%ld], deserialize_ftnode_partition failed " + "file[%s], blocknum[%lld], deserialize_ftnode_partition failed " "with %d\n", __FILE__, __LINE__, fname ? fname : "unknown", - node->blocknum.b, + (longlong)node->blocknum.b, r); dump_bad_block(curr_rbuf.buf, curr_rbuf.size); goto exit; @@ -1582,11 +1582,11 @@ static int deserialize_ftnode_header_from_rbuf_if_small_enough( fprintf( stderr, "%s:%d:deserialize_ftnode_header_from_rbuf_if_small_enough - " - "file[%s], blocknum[%ld], rb->size[%u] < 24\n", + "file[%s], blocknum[%lld], rb->size[%u] < 24\n", __FILE__, __LINE__, fname ? fname : "unknown", - blocknum.b, + (longlong)blocknum.b, rb->size); dump_bad_block(rb->buf, rb->size); // TODO: What error do we return here? @@ -1602,12 +1602,12 @@ static int deserialize_ftnode_header_from_rbuf_if_small_enough( fprintf( stderr, "%s:%d:deserialize_ftnode_header_from_rbuf_if_small_enough - " - "file[%s], blocknum[%ld], unrecognized magic number " + "file[%s], blocknum[%lld], unrecognized magic number " "%2.2x %2.2x %2.2x %2.2x %2.2x %2.2x %2.2x %2.2x\n", __FILE__, __LINE__, fname ? fname : "unknown", - blocknum.b, + (longlong)blocknum.b, static_cast<const uint8_t*>(magic)[0], static_cast<const uint8_t*>(magic)[1], static_cast<const uint8_t*>(magic)[2], @@ -1627,12 +1627,12 @@ static int deserialize_ftnode_header_from_rbuf_if_small_enough( fprintf( stderr, "%s:%d:deserialize_ftnode_header_from_rbuf_if_small_enough - " - "file[%s], blocknum[%ld], node->layout_version_read_from_disk[%d] " + "file[%s], blocknum[%lld], node->layout_version_read_from_disk[%d] " "< FT_FIRST_LAYOUT_VERSION_WITH_BASEMENT_NODES[%d]\n", __FILE__, __LINE__, fname ? fname : "unknown", - blocknum.b, + (longlong)blocknum.b, node->layout_version_read_from_disk, FT_FIRST_LAYOUT_VERSION_WITH_BASEMENT_NODES); dump_bad_block(rb->buf, rb->size); @@ -1667,11 +1667,11 @@ static int deserialize_ftnode_header_from_rbuf_if_small_enough( fprintf( stderr, "%s:%d:deserialize_ftnode_header_from_rbuf_if_small_enough - " - "file[%s], blocknum[%ld], needed_size[%d] > rb->size[%d]\n", + "file[%s], blocknum[%lld], needed_size[%d] > rb->size[%d]\n", __FILE__, __LINE__, fname ? fname : "unknown", - blocknum.b, + (longlong)blocknum.b, needed_size, rb->size); dump_bad_block(rb->buf, rb->size); @@ -1695,11 +1695,11 @@ static int deserialize_ftnode_header_from_rbuf_if_small_enough( fprintf( stderr, "%s:%d:deserialize_ftnode_header_from_rbuf_if_small_enough - " - "file[%s], blocknum[%ld], stored_checksum[%d] != checksum[%d]\n", + "file[%s], blocknum[%lld], stored_checksum[%d] != checksum[%d]\n", __FILE__, __LINE__, fname ? fname : "unknown", - blocknum.b, + (longlong)blocknum.b, stored_checksum, checksum); dump_bad_block(rb->buf, rb->size); @@ -1717,12 +1717,12 @@ static int deserialize_ftnode_header_from_rbuf_if_small_enough( fprintf( stderr, "%s:%d:deserialize_ftnode_header_from_rbuf_if_small_enough - " - "file[%s], blocknum[%ld], rb->size[%d] - rb->ndone[%d] < " + "file[%s], blocknum[%lld], rb->size[%d] - rb->ndone[%d] < " "sb_node_info.compressed_size[%d] + 8\n", __FILE__, __LINE__, fname ? fname : "unknown", - blocknum.b, + (longlong)blocknum.b, rb->size, rb->ndone, sb_node_info.compressed_size); @@ -1744,11 +1744,11 @@ static int deserialize_ftnode_header_from_rbuf_if_small_enough( fprintf( stderr, "%s:%d:deserialize_ftnode_header_from_rbuf_if_small_enough - " - "file[%s], blocknum[%ld], sb_node_info.xsum[%d] != actual_xsum[%d]\n", + "file[%s], blocknum[%lld], sb_node_info.xsum[%d] != actual_xsum[%d]\n", __FILE__, __LINE__, fname ? fname : "unknown", - blocknum.b, + (longlong)blocknum.b, sb_node_info.xsum, actual_xsum); dump_bad_block(rb->buf, rb->size); @@ -1774,12 +1774,12 @@ static int deserialize_ftnode_header_from_rbuf_if_small_enough( fprintf( stderr, "%s:%d:deserialize_ftnode_header_from_rbuf_if_small_enough - " - "file[%s], blocknum[%ld], deserialize_ftnode_info failed with " + "file[%s], blocknum[%lld], deserialize_ftnode_info failed with " "%d\n", __FILE__, __LINE__, fname ? fname : "unknown", - blocknum.b, + (longlong)blocknum.b, r); dump_bad_block( static_cast<unsigned char *>(sb_node_info.uncompressed_ptr), @@ -1812,12 +1812,12 @@ static int deserialize_ftnode_header_from_rbuf_if_small_enough( fprintf( stderr, "%s:%d:deserialize_ftnode_header_from_rbuf_if_small_enough - " - "file[%s], blocknum[%ld], toku_ftnode_pf_callback failed with " + "file[%s], blocknum[%lld], toku_ftnode_pf_callback failed with " "%d\n", __FILE__, __LINE__, fname ? fname : "unknown", - blocknum.b, + (longlong)blocknum.b, r); dump_bad_block(rb->buf, rb->size); goto cleanup; @@ -2164,12 +2164,12 @@ static int deserialize_and_upgrade_ftnode(FTNODE node, const char* fname = toku_cachefile_fname_in_env(bfe->ft->cf); fprintf(stderr, "%s:%d:deserialize_and_upgrade_ftnode - " - "file[%s], blocknum[%ld], " + "file[%s], blocknum[%lld], " "read_and_decompress_block_from_fd_into_rbuf failed with %d\n", __FILE__, __LINE__, fname ? fname : "unknown", - blocknum.b, + (longlong)blocknum.b, r); goto exit; } @@ -2190,12 +2190,12 @@ static int deserialize_and_upgrade_ftnode(FTNODE node, const char* fname = toku_cachefile_fname_in_env(bfe->ft->cf); fprintf(stderr, "%s:%d:deserialize_and_upgrade_ftnode - " - "file[%s], blocknum[%ld], version[%d] > " + "file[%s], blocknum[%lld], version[%d] > " "FT_LAYOUT_VERSION_14[%d]\n", __FILE__, __LINE__, fname ? fname : "unknown", - blocknum.b, + (longlong)blocknum.b, version, FT_LAYOUT_VERSION_14); dump_bad_block(rb.buf, rb.size); @@ -2278,12 +2278,12 @@ static int deserialize_ftnode_from_rbuf(FTNODE *ftnode, memcmp(magic, "tokunode", 8) != 0) { fprintf(stderr, "%s:%d:deserialize_ftnode_from_rbuf - " - "file[%s], blocknum[%ld], unrecognized magic number " + "file[%s], blocknum[%lld], unrecognized magic number " "%2.2x %2.2x %2.2x %2.2x %2.2x %2.2x %2.2x %2.2x\n", __FILE__, __LINE__, fname ? fname : "unknown", - blocknum.b, + (longlong)blocknum.b, static_cast<const uint8_t *>(magic)[0], static_cast<const uint8_t *>(magic)[1], static_cast<const uint8_t *>(magic)[2], @@ -2309,12 +2309,12 @@ static int deserialize_ftnode_from_rbuf(FTNODE *ftnode, if (r != 0) { fprintf(stderr, "%s:%d:deserialize_ftnode_from_rbuf - " - "file[%s], blocknum[%ld], deserialize_and_upgrade_ftnode " + "file[%s], blocknum[%lld], deserialize_and_upgrade_ftnode " "failed with %d\n", __FILE__, __LINE__, fname ? fname : "unknown", - blocknum.b, + (longlong)blocknum.b, r); dump_bad_block(rb->buf, rb->size); goto cleanup; @@ -2355,11 +2355,11 @@ static int deserialize_ftnode_from_rbuf(FTNODE *ftnode, fprintf( stderr, "%s:%d:deserialize_ftnode_from_rbuf - " - "file[%s], blocknum[%ld], stored_checksum[%d] != checksum[%d]\n", + "file[%s], blocknum[%lld], stored_checksum[%d] != checksum[%d]\n", __FILE__, __LINE__, fname ? fname : "unknown", - blocknum.b, + (longlong)blocknum.b, stored_checksum, checksum); dump_bad_block(rb->buf, rb->size); @@ -2377,12 +2377,12 @@ static int deserialize_ftnode_from_rbuf(FTNODE *ftnode, fprintf( stderr, "%s:%d:deserialize_ftnode_from_rbuf - " - "file[%s], blocknum[%ld], read_and_decompress_sub_block failed " + "file[%s], blocknum[%lld], read_and_decompress_sub_block failed " "with %d\n", __FILE__, __LINE__, fname ? fname : "unknown", - blocknum.b, + (longlong)blocknum.b, r); dump_bad_block( static_cast<unsigned char *>(sb_node_info.uncompressed_ptr), @@ -2398,12 +2398,12 @@ static int deserialize_ftnode_from_rbuf(FTNODE *ftnode, fprintf( stderr, "%s:%d:deserialize_ftnode_from_rbuf - " - "file[%s], blocknum[%ld], deserialize_ftnode_info failed with " + "file[%s], blocknum[%lld], deserialize_ftnode_info failed with " "%d\n", __FILE__, __LINE__, fname ? fname : "unknown", - blocknum.b, + (longlong)blocknum.b, r); dump_bad_block(rb->buf, rb->size); goto cleanup; @@ -2470,12 +2470,12 @@ static int deserialize_ftnode_from_rbuf(FTNODE *ftnode, fprintf( stderr, "%s:%d:deserialize_ftnode_from_rbuf - " - "file[%s], blocknum[%ld], childnum[%d], " + "file[%s], blocknum[%lld], childnum[%d], " "decompress_and_deserialize_worker failed with %d\n", __FILE__, __LINE__, fname ? fname : "unknown", - blocknum.b, + (longlong)blocknum.b, i, r); dump_bad_block(rb->buf, rb->size); @@ -2490,13 +2490,13 @@ static int deserialize_ftnode_from_rbuf(FTNODE *ftnode, fprintf( stderr, "%s:%d:deserialize_ftnode_from_rbuf - " - "file[%s], blocknum[%ld], childnum[%d], " + "file[%s], blocknum[%lld], childnum[%d], " "check_and_copy_compressed_sub_block_worker failed with " "%d\n", __FILE__, __LINE__, fname ? fname : "unknown", - blocknum.b, + (longlong)blocknum.b, i, r); dump_bad_block(rb->buf, rb->size); @@ -2641,12 +2641,12 @@ int toku_deserialize_bp_from_compressed(FTNODE node, const char* fname = toku_cachefile_fname_in_env(bfe->ft->cf); fprintf(stderr, "%s:%d:toku_deserialize_bp_from_compressed - " - "file[%s], blocknum[%ld], " + "file[%s], blocknum[%lld], " "deserialize_ftnode_partition failed with %d\n", __FILE__, __LINE__, fname ? fname : "unknown", - node->blocknum.b, + (longlong)node->blocknum.b, r); dump_bad_block(static_cast<unsigned char *>(curr_sb->compressed_ptr), curr_sb->compressed_size); @@ -2689,12 +2689,12 @@ static int deserialize_ftnode_from_fd(int fd, fprintf( stderr, "%s:%d:deserialize_ftnode_from_fd - " - "file[%s], blocknum[%ld], deserialize_ftnode_from_rbuf failed with " + "file[%s], blocknum[%lld], deserialize_ftnode_from_rbuf failed with " "%d\n", __FILE__, __LINE__, fname ? fname : "unknown", - blocknum.b, + (longlong)blocknum.b, r); dump_bad_block(rb.buf, rb.size); } diff --git a/storage/tokudb/PerconaFT/portability/memory.h b/storage/tokudb/PerconaFT/portability/memory.h index 5ae652d39fc..851e4d69e03 100644 --- a/storage/tokudb/PerconaFT/portability/memory.h +++ b/storage/tokudb/PerconaFT/portability/memory.h @@ -107,7 +107,7 @@ size_t toku_malloc_usable_size(void *p) __attribute__((__visibility__("default") #define XMALLOC(v) CAST_FROM_VOIDP(v, toku_xmalloc(sizeof(*v))) #define XMALLOC_N(n,v) CAST_FROM_VOIDP(v, toku_xmalloc((n)*sizeof(*v))) #define XCALLOC_N(n,v) CAST_FROM_VOIDP(v, toku_xcalloc((n), (sizeof(*v)))) -#define XCALLOC(v) XCALLOC_N(1,(v)) +#define XCALLOC(v) XCALLOC_N(1,v) #define XREALLOC(v,s) CAST_FROM_VOIDP(v, toku_xrealloc(v, s)) #define XREALLOC_N(n,v) CAST_FROM_VOIDP(v, toku_xrealloc(v, (n)*sizeof(*v))) diff --git a/storage/tokudb/PerconaFT/portability/toku_debug_sync.h b/storage/tokudb/PerconaFT/portability/toku_debug_sync.h index b5394e58d68..493075c36c3 100644 --- a/storage/tokudb/PerconaFT/portability/toku_debug_sync.h +++ b/storage/tokudb/PerconaFT/portability/toku_debug_sync.h @@ -62,9 +62,6 @@ inline void toku_debug_sync(struct tokutxn *txn, const char *sync_point_name) { void *client_extra; THD *thd; - if (likely(!opt_debug_sync_timeout)) - return; - toku_txn_get_client_id(txn, &client_id, &client_extra); thd = reinterpret_cast<THD *>(client_extra); DEBUG_SYNC(thd, sync_point_name); diff --git a/storage/tokudb/PerconaFT/portability/toku_pthread.h b/storage/tokudb/PerconaFT/portability/toku_pthread.h index e3bd3bce598..a0dfcc246a7 100644 --- a/storage/tokudb/PerconaFT/portability/toku_pthread.h +++ b/storage/tokudb/PerconaFT/portability/toku_pthread.h @@ -162,10 +162,20 @@ typedef struct toku_mutex_aligned { #define ZERO_COND_INITIALIZER \ { 0 } #elif defined(__APPLE__) +#if TOKU_PTHREAD_DEBUG +#define ZERO_COND_INITIALIZER \ + { \ + { 0 , { 0 } }, \ + nullptr, \ + 0 \ + } +#else #define ZERO_COND_INITIALIZER \ { \ - { 0 } \ + { 0 , { 0 } }, \ + nullptr \ } +#endif #else // __linux__, at least #define ZERO_COND_INITIALIZER \ {} diff --git a/storage/tokudb/PerconaFT/src/CMakeLists.txt b/storage/tokudb/PerconaFT/src/CMakeLists.txt index 65bf4814cf8..bae37389004 100644 --- a/storage/tokudb/PerconaFT/src/CMakeLists.txt +++ b/storage/tokudb/PerconaFT/src/CMakeLists.txt @@ -18,7 +18,7 @@ set(tokudb_srcs ## make the shared library add_library(${LIBTOKUDB} SHARED ${tokudb_srcs}) add_dependencies(${LIBTOKUDB} install_tdb_h generate_log_code) -target_link_libraries(${LIBTOKUDB} LINK_PRIVATE locktree_static ft_static util_static lzma snappy ${LIBTOKUPORTABILITY}) +target_link_libraries(${LIBTOKUDB} LINK_PRIVATE locktree_static ft_static util_static lzma snappy dbug ${LIBTOKUPORTABILITY}) target_link_libraries(${LIBTOKUDB} LINK_PUBLIC ${ZLIB_LIBRARY} ) ## make the static library diff --git a/storage/tokudb/PerconaFT/third_party/xz-4.999.9beta/autogen.sh b/storage/tokudb/PerconaFT/third_party/xz-4.999.9beta/autogen.sh index f0195ecadd3..f0195ecadd3 100644..100755 --- a/storage/tokudb/PerconaFT/third_party/xz-4.999.9beta/autogen.sh +++ b/storage/tokudb/PerconaFT/third_party/xz-4.999.9beta/autogen.sh diff --git a/storage/tokudb/PerconaFT/third_party/xz-4.999.9beta/build-aux/compile b/storage/tokudb/PerconaFT/third_party/xz-4.999.9beta/build-aux/compile index ec64c622026..ec64c622026 100644..100755 --- a/storage/tokudb/PerconaFT/third_party/xz-4.999.9beta/build-aux/compile +++ b/storage/tokudb/PerconaFT/third_party/xz-4.999.9beta/build-aux/compile diff --git a/storage/tokudb/PerconaFT/third_party/xz-4.999.9beta/build-aux/config.guess b/storage/tokudb/PerconaFT/third_party/xz-4.999.9beta/build-aux/config.guess index 7501b1bee01..7501b1bee01 100644..100755 --- a/storage/tokudb/PerconaFT/third_party/xz-4.999.9beta/build-aux/config.guess +++ b/storage/tokudb/PerconaFT/third_party/xz-4.999.9beta/build-aux/config.guess diff --git a/storage/tokudb/PerconaFT/third_party/xz-4.999.9beta/build-aux/config.rpath b/storage/tokudb/PerconaFT/third_party/xz-4.999.9beta/build-aux/config.rpath index c492a93b663..c492a93b663 100644..100755 --- a/storage/tokudb/PerconaFT/third_party/xz-4.999.9beta/build-aux/config.rpath +++ b/storage/tokudb/PerconaFT/third_party/xz-4.999.9beta/build-aux/config.rpath diff --git a/storage/tokudb/PerconaFT/third_party/xz-4.999.9beta/build-aux/config.sub b/storage/tokudb/PerconaFT/third_party/xz-4.999.9beta/build-aux/config.sub index a39437d0158..a39437d0158 100644..100755 --- a/storage/tokudb/PerconaFT/third_party/xz-4.999.9beta/build-aux/config.sub +++ b/storage/tokudb/PerconaFT/third_party/xz-4.999.9beta/build-aux/config.sub diff --git a/storage/tokudb/PerconaFT/third_party/xz-4.999.9beta/build-aux/depcomp b/storage/tokudb/PerconaFT/third_party/xz-4.999.9beta/build-aux/depcomp index df8eea7e4ce..df8eea7e4ce 100644..100755 --- a/storage/tokudb/PerconaFT/third_party/xz-4.999.9beta/build-aux/depcomp +++ b/storage/tokudb/PerconaFT/third_party/xz-4.999.9beta/build-aux/depcomp diff --git a/storage/tokudb/PerconaFT/third_party/xz-4.999.9beta/build-aux/install-sh b/storage/tokudb/PerconaFT/third_party/xz-4.999.9beta/build-aux/install-sh index 6781b987bdb..6781b987bdb 100644..100755 --- a/storage/tokudb/PerconaFT/third_party/xz-4.999.9beta/build-aux/install-sh +++ b/storage/tokudb/PerconaFT/third_party/xz-4.999.9beta/build-aux/install-sh diff --git a/storage/tokudb/PerconaFT/third_party/xz-4.999.9beta/build-aux/ltmain.sh b/storage/tokudb/PerconaFT/third_party/xz-4.999.9beta/build-aux/ltmain.sh index b36c4ad366c..b36c4ad366c 100644..100755 --- a/storage/tokudb/PerconaFT/third_party/xz-4.999.9beta/build-aux/ltmain.sh +++ b/storage/tokudb/PerconaFT/third_party/xz-4.999.9beta/build-aux/ltmain.sh diff --git a/storage/tokudb/PerconaFT/third_party/xz-4.999.9beta/build-aux/missing b/storage/tokudb/PerconaFT/third_party/xz-4.999.9beta/build-aux/missing index 28055d2ae6f..28055d2ae6f 100644..100755 --- a/storage/tokudb/PerconaFT/third_party/xz-4.999.9beta/build-aux/missing +++ b/storage/tokudb/PerconaFT/third_party/xz-4.999.9beta/build-aux/missing diff --git a/storage/tokudb/PerconaFT/third_party/xz-4.999.9beta/extra/7z2lzma/7z2lzma.bash b/storage/tokudb/PerconaFT/third_party/xz-4.999.9beta/extra/7z2lzma/7z2lzma.bash index 35ea4dae973..35ea4dae973 100644..100755 --- a/storage/tokudb/PerconaFT/third_party/xz-4.999.9beta/extra/7z2lzma/7z2lzma.bash +++ b/storage/tokudb/PerconaFT/third_party/xz-4.999.9beta/extra/7z2lzma/7z2lzma.bash diff --git a/storage/tokudb/PerconaFT/third_party/xz-4.999.9beta/tests/test_compress.sh b/storage/tokudb/PerconaFT/third_party/xz-4.999.9beta/tests/test_compress.sh index ff0cb304df4..ff0cb304df4 100644..100755 --- a/storage/tokudb/PerconaFT/third_party/xz-4.999.9beta/tests/test_compress.sh +++ b/storage/tokudb/PerconaFT/third_party/xz-4.999.9beta/tests/test_compress.sh diff --git a/storage/tokudb/PerconaFT/third_party/xz-4.999.9beta/tests/test_files.sh b/storage/tokudb/PerconaFT/third_party/xz-4.999.9beta/tests/test_files.sh index 7dd9a3901bf..7dd9a3901bf 100644..100755 --- a/storage/tokudb/PerconaFT/third_party/xz-4.999.9beta/tests/test_files.sh +++ b/storage/tokudb/PerconaFT/third_party/xz-4.999.9beta/tests/test_files.sh diff --git a/storage/tokudb/PerconaFT/util/dmt.cc b/storage/tokudb/PerconaFT/util/dmt.cc index b5b94982487..642c9367d7e 100644 --- a/storage/tokudb/PerconaFT/util/dmt.cc +++ b/storage/tokudb/PerconaFT/util/dmt.cc @@ -80,8 +80,8 @@ void dmt<dmtdata_t, dmtdataout_t, dmtwriter_t>::create_from_sorted_memory_of_fix paranoid_invariant(numvalues > 0); void *ptr = toku_mempool_malloc(&this->mp, aligned_memsize); paranoid_invariant_notnull(ptr); - uint8_t * const CAST_FROM_VOIDP(dest, ptr); - const uint8_t * const CAST_FROM_VOIDP(src, mem); + uint8_t * CAST_FROM_VOIDP(dest, ptr); + const uint8_t * CAST_FROM_VOIDP(src, mem); if (pad_bytes == 0) { paranoid_invariant(aligned_memsize == mem_length); memcpy(dest, src, aligned_memsize); diff --git a/storage/tokudb/PerconaFT/util/omt.h b/storage/tokudb/PerconaFT/util/omt.h index c7ed2ca546f..36946401381 100644 --- a/storage/tokudb/PerconaFT/util/omt.h +++ b/storage/tokudb/PerconaFT/util/omt.h @@ -127,7 +127,7 @@ public: paranoid_invariant(index != NODE_NULL); m_index = index; } -} __attribute__((__packed__,aligned(4))); +} ; template<> class subtree_templated<true> { @@ -184,7 +184,7 @@ public: inline void disable_bit(void) { m_bitfield &= MASK_INDEX; } -} __attribute__((__packed__)) ; +} ; template<typename omtdata_t, bool subtree_supports_marks> class omt_node_templated { @@ -197,7 +197,7 @@ public: // this needs to be in both implementations because we don't have // a "static if" the caller can use inline void clear_stolen_bits(void) {} -} __attribute__((__packed__,aligned(4))); +} ; template<typename omtdata_t> class omt_node_templated<omtdata_t, true> { @@ -234,7 +234,7 @@ public: this->unset_marked_bit(); this->unset_marks_below_bit(); } -} __attribute__((__packed__,aligned(4))); +} ; } diff --git a/storage/tokudb/tokudb_thread.h b/storage/tokudb/tokudb_thread.h index dec58f3fd35..5df0159901f 100644 --- a/storage/tokudb/tokudb_thread.h +++ b/storage/tokudb/tokudb_thread.h @@ -111,7 +111,6 @@ public: // wait for the event to become signalled void wait(void); - int wait(ulonglong microseconds); // signal the event void signal(void); @@ -152,7 +151,6 @@ public: // wait for the semaphore to become signalled E_WAIT wait(void); - E_WAIT wait(ulonglong microseconds); // signal the semaphore to increase the count // return true if signalled, false if ignored due to count @@ -372,28 +370,6 @@ inline void event_t::wait(void) { assert_debug(r == 0); return; } -inline int event_t::wait(ulonglong microseconds) { - timespec waittime = time::offset_timespec(microseconds); - int r = pthread_mutex_timedlock(&_mutex, &waittime); - if (r == ETIMEDOUT) return ETIMEDOUT; - assert_debug(r == 0); - while (_signalled == false && _pulsed == false) { - r = pthread_cond_timedwait(&_cond, &_mutex, &waittime); - if (r == ETIMEDOUT) { - r = pthread_mutex_unlock(&_mutex); - assert_debug(r == 0); - return ETIMEDOUT; - } - assert_debug(r == 0); - } - if (_manual_reset == false) - _signalled = false; - if (_pulsed) - _pulsed = false; - r = pthread_mutex_unlock(&_mutex); - assert_debug(r == 0); - return 0; -} inline void event_t::signal(void) { int r MY_ATTRIBUTE((unused)) = pthread_mutex_lock(&_mutex); assert_debug(r == 0); @@ -479,31 +455,6 @@ inline semaphore_t::E_WAIT semaphore_t::wait(void) { assert_debug(r == 0); return ret; } -inline semaphore_t::E_WAIT semaphore_t::wait(ulonglong microseconds) { - E_WAIT ret; - timespec waittime = time::offset_timespec(microseconds); - int r = pthread_mutex_timedlock(&_mutex, &waittime); - if (r == ETIMEDOUT) return E_TIMEDOUT; - assert_debug(r == 0); - while (_signalled == 0 && _interrupted == false) { - r = pthread_cond_timedwait(&_cond, &_mutex, &waittime); - if (r == ETIMEDOUT) { - r = pthread_mutex_unlock(&_mutex); - assert_debug(r == 0); - return E_TIMEDOUT; - } - assert_debug(r == 0); - } - if (_interrupted) { - ret = E_INTERRUPTED; - } else { - _signalled--; - ret = E_SIGNALLED; - } - r = pthread_mutex_unlock(&_mutex); - assert_debug(r == 0); - return ret; -} inline bool semaphore_t::signal(void) { bool ret = false; int r MY_ATTRIBUTE((unused)) = pthread_mutex_lock(&_mutex); diff --git a/storage/xtradb/buf/buf0buf.cc b/storage/xtradb/buf/buf0buf.cc index b49ee86f78f..ce4dd4715db 100644 --- a/storage/xtradb/buf/buf0buf.cc +++ b/storage/xtradb/buf/buf0buf.cc @@ -77,15 +77,6 @@ Created 11/5/1995 Heikki Tuuri #include "snappy-c.h" #endif -/** Decrypt a page. -@param[in,out] bpage Page control block -@param[in,out] space tablespace -@return whether the operation was successful */ -static -bool -buf_page_decrypt_after_read(buf_page_t* bpage, fil_space_t* space) - MY_ATTRIBUTE((nonnull)); - /********************************************************************//** Mark a table with the specified space pointed by bpage->space corrupted. Also remove the bpage from LRU list. @@ -390,6 +381,143 @@ on the io_type */ ? (counter##_READ) \ : (counter##_WRITTEN)) + +/** Reserve a buffer slot for encryption, decryption or page compression. +@param[in,out] buf_pool buffer pool +@return reserved buffer slot */ +static buf_tmp_buffer_t* buf_pool_reserve_tmp_slot(buf_pool_t* buf_pool) +{ + for (ulint i = 0; i < buf_pool->tmp_arr->n_slots; i++) { + buf_tmp_buffer_t* slot = &buf_pool->tmp_arr->slots[i]; + if (slot->acquire()) { + return slot; + } + } + + /* We assume that free slot is found */ + ut_error; + return NULL; +} + +/** Reserve a buffer for encryption, decryption or decompression. +@param[in,out] slot reserved slot */ +static void buf_tmp_reserve_crypt_buf(buf_tmp_buffer_t* slot) +{ + if (!slot->crypt_buf) { + slot->crypt_buf = static_cast<byte*>( + aligned_malloc(srv_page_size, srv_page_size)); + } +} + +/** Reserve a buffer for compression. +@param[in,out] slot reserved slot */ +static void buf_tmp_reserve_compression_buf(buf_tmp_buffer_t* slot) +{ + if (!slot->comp_buf) { + /* Both snappy and lzo compression methods require that + output buffer used for compression is bigger than input + buffer. Increase the allocated buffer size accordingly. */ + ulint size = srv_page_size; +#ifdef HAVE_LZO + size += LZO1X_1_15_MEM_COMPRESS; +#elif defined HAVE_SNAPPY + size = snappy_max_compressed_length(size); +#endif + slot->comp_buf = static_cast<byte*>( + aligned_malloc(size, srv_page_size)); + } +} + +/** Decrypt a page. +@param[in,out] bpage Page control block +@param[in,out] space tablespace +@return whether the operation was successful */ +static bool buf_page_decrypt_after_read(buf_page_t* bpage, fil_space_t* space) +{ + ut_ad(space->n_pending_ios > 0); + ut_ad(space->id == bpage->space); + + byte* dst_frame = bpage->zip.data ? bpage->zip.data : + ((buf_block_t*) bpage)->frame; + bool page_compressed = fil_page_is_compressed(dst_frame); + buf_pool_t* buf_pool = buf_pool_from_bpage(bpage); + + if (bpage->offset == 0) { + /* File header pages are not encrypted/compressed */ + return true; + } + + /* Page is encrypted if encryption information is found from + tablespace and page contains used key_version. This is true + also for pages first compressed and then encrypted. */ + + buf_tmp_buffer_t* slot; + + if (page_compressed) { + /* the page we read is unencrypted */ + /* Find free slot from temporary memory array */ +decompress: + slot = buf_pool_reserve_tmp_slot(buf_pool); + /* For decompression, use crypt_buf. */ + buf_tmp_reserve_crypt_buf(slot); +decompress_with_slot: + ut_d(fil_page_type_validate(dst_frame)); + + bpage->write_size = fil_page_decompress(slot->crypt_buf, + dst_frame); + slot->release(); + + ut_ad(!bpage->write_size || fil_page_type_validate(dst_frame)); + ut_ad(space->n_pending_ios > 0); + return bpage->write_size != 0; + } + + if (space->crypt_data + && mach_read_from_4(FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION + + dst_frame)) { + /* Verify encryption checksum before we even try to + decrypt. */ + if (!fil_space_verify_crypt_checksum( + dst_frame, buf_page_get_zip_size(bpage), NULL, + bpage->offset)) { +decrypt_failed: + /* Mark page encrypted in case it should be. */ + if (space->crypt_data->type + != CRYPT_SCHEME_UNENCRYPTED) { + bpage->encrypted = true; + } + + return false; + } + + /* Find free slot from temporary memory array */ + slot = buf_pool_reserve_tmp_slot(buf_pool); + buf_tmp_reserve_crypt_buf(slot); + + ut_d(fil_page_type_validate(dst_frame)); + + /* decrypt using crypt_buf to dst_frame */ + if (!fil_space_decrypt(space, slot->crypt_buf, + dst_frame, &bpage->encrypted)) { + slot->release(); + goto decrypt_failed; + } + + ut_d(fil_page_type_validate(dst_frame)); + + if (fil_page_is_compressed_encrypted(dst_frame)) { + goto decompress_with_slot; + } + + slot->release(); + } else if (fil_page_is_compressed_encrypted(dst_frame)) { + goto decompress; + } + + ut_ad(space->n_pending_ios > 0); + return true; +} + /********************************************************************//** Gets the smallest oldest_modification lsn for any page in the pool. Returns zero if all modified pages have been flushed to disk. @@ -4696,9 +4824,9 @@ buf_page_check_corrupt(buf_page_t* bpage, fil_space_t* space) ib_logf(IB_LOG_LEVEL_ERROR, "The page [page id: space=%u" ", page number=%u]" - " in file %s cannot be decrypted.", + " in file '%s' cannot be decrypted.", bpage->space, bpage->offset, - space->name); + space->chain.start->name); ib_logf(IB_LOG_LEVEL_INFO, "However key management plugin or used key_version " ULINTPF @@ -4733,7 +4861,6 @@ buf_page_io_complete(buf_page_t* bpage) const ibool uncompressed = (buf_page_get_state(bpage) == BUF_BLOCK_FILE_PAGE); bool have_LRU_mutex = false; - byte* frame = NULL; dberr_t err = DB_SUCCESS; ut_a(buf_page_in_file(bpage)); @@ -4751,19 +4878,18 @@ buf_page_io_complete(buf_page_t* bpage) ulint read_page_no = 0; ulint read_space_id = 0; uint key_version = 0; - - ut_ad(bpage->zip.data || ((buf_block_t*)bpage)->frame); + byte* frame = bpage->zip.data + ? bpage->zip.data + : reinterpret_cast<buf_block_t*>(bpage)->frame; + ut_ad(frame); fil_space_t* space = fil_space_acquire_for_io(bpage->space); if (!space) { return(DB_TABLESPACE_DELETED); } - buf_page_decrypt_after_read(bpage, space); - - if (buf_page_get_zip_size(bpage)) { - frame = bpage->zip.data; - } else { - frame = ((buf_block_t*) bpage)->frame; + if (!buf_page_decrypt_after_read(bpage, space)) { + err = DB_DECRYPTION_FAILED; + goto database_corrupted; } if (buf_page_get_zip_size(bpage)) { @@ -4944,7 +5070,7 @@ database_corrupted: /* io_type == BUF_IO_WRITE */ if (bpage->slot) { /* Mark slot free */ - bpage->slot->reserved = false; + bpage->slot->release(); bpage->slot = NULL; } } @@ -6243,66 +6369,6 @@ buf_pool_mutex_exit( mutex_exit(&buf_pool->LRU_list_mutex); } -/********************************************************************//** -Reserve unused slot from temporary memory array and allocate necessary -temporary memory if not yet allocated. -@return reserved slot */ -UNIV_INTERN -buf_tmp_buffer_t* -buf_pool_reserve_tmp_slot( -/*======================*/ - buf_pool_t* buf_pool, /*!< in: buffer pool where to - reserve */ - bool compressed) /*!< in: is file space compressed */ -{ - buf_tmp_buffer_t *free_slot=NULL; - - /* Array is protected by buf_pool mutex */ - buf_pool_mutex_enter(buf_pool); - - for(ulint i = 0; i < buf_pool->tmp_arr->n_slots; i++) { - buf_tmp_buffer_t *slot = &buf_pool->tmp_arr->slots[i]; - - if(slot->reserved == false) { - free_slot = slot; - break; - } - } - - /* We assume that free slot is found */ - ut_a(free_slot != NULL); - free_slot->reserved = true; - /* Now that we have reserved this slot we can release - buf_pool mutex */ - buf_pool_mutex_exit(buf_pool); - - /* Allocate temporary memory for encryption/decryption */ - if (free_slot->crypt_buf == NULL) { - free_slot->crypt_buf = static_cast<byte*>(aligned_malloc(UNIV_PAGE_SIZE, UNIV_PAGE_SIZE)); - memset(free_slot->crypt_buf, 0, UNIV_PAGE_SIZE); - } - - /* For page compressed tables allocate temporary memory for - compression/decompression */ - if (compressed && free_slot->comp_buf == NULL) { - ulint size = UNIV_PAGE_SIZE; - - /* Both snappy and lzo compression methods require that - output buffer used for compression is bigger than input - buffer. Increase the allocated buffer size accordingly. */ -#if HAVE_SNAPPY - size = snappy_max_compressed_length(size); -#endif -#if HAVE_LZO - size += LZO1X_1_15_MEM_COMPRESS; -#endif - free_slot->comp_buf = static_cast<byte*>(aligned_malloc(size, UNIV_PAGE_SIZE)); - memset(free_slot->comp_buf, 0, size); - } - - return (free_slot); -} - /** Encryption and page_compression hook that is called just before a page is written to disk. @param[in,out] space tablespace @@ -6352,16 +6418,18 @@ buf_page_encrypt_before_write( } ulint zip_size = buf_page_get_zip_size(bpage); - ulint page_size = (zip_size) ? zip_size : UNIV_PAGE_SIZE; + ut_ad(!zip_size || !page_compressed); buf_pool_t* buf_pool = buf_pool_from_bpage(bpage); /* Find free slot from temporary memory array */ - buf_tmp_buffer_t* slot = buf_pool_reserve_tmp_slot(buf_pool, page_compressed); + buf_tmp_buffer_t* slot = buf_pool_reserve_tmp_slot(buf_pool); slot->out_buf = NULL; bpage->slot = slot; + buf_tmp_reserve_crypt_buf(slot); byte *dst_frame = slot->crypt_buf; if (!page_compressed) { +not_compressed: /* Encrypt page content */ byte* tmp = fil_space_encrypt(space, bpage->offset, @@ -6369,32 +6437,28 @@ buf_page_encrypt_before_write( src_frame, dst_frame); - bpage->real_size = page_size; + bpage->real_size = UNIV_PAGE_SIZE; slot->out_buf = dst_frame = tmp; ut_d(fil_page_type_validate(tmp)); } else { /* First we compress the page content */ - ulint out_len = 0; - - byte *tmp = fil_compress_page( - space, - (byte *)src_frame, - slot->comp_buf, - page_size, + buf_tmp_reserve_compression_buf(slot); + byte* tmp = slot->comp_buf; + ulint out_len = fil_page_compress( + src_frame, tmp, fsp_flags_get_page_compression_level(space->flags), fil_space_get_block_size(space, bpage->offset), - encrypted, - &out_len); + encrypted); + if (!out_len) { + goto not_compressed; + } bpage->real_size = out_len; -#ifdef UNIV_DEBUG - fil_page_type_validate(tmp); -#endif - - if(encrypted) { + ut_d(fil_page_type_validate(tmp)); + if (encrypted) { /* And then we encrypt the page content */ tmp = fil_space_encrypt(space, bpage->offset, @@ -6406,130 +6470,8 @@ buf_page_encrypt_before_write( slot->out_buf = dst_frame = tmp; } -#ifdef UNIV_DEBUG - fil_page_type_validate(dst_frame); -#endif + ut_d(fil_page_type_validate(dst_frame)); // return dst_frame which will be written return dst_frame; } - -/** Decrypt a page. -@param[in,out] bpage Page control block -@param[in,out] space tablespace -@return whether the operation was successful */ -static -bool -buf_page_decrypt_after_read(buf_page_t* bpage, fil_space_t* space) -{ - ut_ad(space->n_pending_ios > 0); - ut_ad(space->id == bpage->space); - - ulint zip_size = buf_page_get_zip_size(bpage); - ulint size = (zip_size) ? zip_size : UNIV_PAGE_SIZE; - - byte* dst_frame = (zip_size) ? bpage->zip.data : - ((buf_block_t*) bpage)->frame; - unsigned key_version = - mach_read_from_4(dst_frame + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION); - bool page_compressed = fil_page_is_compressed(dst_frame); - bool page_compressed_encrypted = fil_page_is_compressed_encrypted(dst_frame); - buf_pool_t* buf_pool = buf_pool_from_bpage(bpage); - bool success = true; - - if (bpage->offset == 0) { - /* File header pages are not encrypted/compressed */ - return (true); - } - - /* Page is encrypted if encryption information is found from - tablespace and page contains used key_version. This is true - also for pages first compressed and then encrypted. */ - if (!space->crypt_data) { - key_version = 0; - } - - if (page_compressed) { - /* the page we read is unencrypted */ - /* Find free slot from temporary memory array */ - buf_tmp_buffer_t* slot = buf_pool_reserve_tmp_slot(buf_pool, page_compressed); - -#ifdef UNIV_DEBUG - fil_page_type_validate(dst_frame); -#endif - - /* decompress using comp_buf to dst_frame */ - fil_decompress_page(slot->comp_buf, - dst_frame, - ulong(size), - &bpage->write_size); - - /* Mark this slot as free */ - slot->reserved = false; - key_version = 0; - -#ifdef UNIV_DEBUG - fil_page_type_validate(dst_frame); -#endif - } else { - buf_tmp_buffer_t* slot = NULL; - - if (key_version) { - /* Verify encryption checksum before we even try to - decrypt. */ - if (!fil_space_verify_crypt_checksum(dst_frame, - zip_size, NULL, bpage->offset)) { - - /* Mark page encrypted in case it should - be. */ - if (space->crypt_data->type - != CRYPT_SCHEME_UNENCRYPTED) { - bpage->encrypted = true; - } - - return (false); - } - - /* Find free slot from temporary memory array */ - slot = buf_pool_reserve_tmp_slot(buf_pool, page_compressed); - -#ifdef UNIV_DEBUG - fil_page_type_validate(dst_frame); -#endif - - /* decrypt using crypt_buf to dst_frame */ - if (!fil_space_decrypt(space, slot->crypt_buf, - dst_frame, &bpage->encrypted)) { - success = false; - } - -#ifdef UNIV_DEBUG - fil_page_type_validate(dst_frame); -#endif - } - - if (page_compressed_encrypted && success) { - if (!slot) { - slot = buf_pool_reserve_tmp_slot(buf_pool, page_compressed); - } - -#ifdef UNIV_DEBUG - fil_page_type_validate(dst_frame); -#endif - /* decompress using comp_buf to dst_frame */ - fil_decompress_page(slot->comp_buf, - dst_frame, - ulong(size), - &bpage->write_size); - ut_d(fil_page_type_validate(dst_frame)); - } - - /* Mark this slot as free */ - if (slot) { - slot->reserved = false; - } - } - - ut_ad(space->n_pending_ios > 0); - return (success); -} diff --git a/storage/xtradb/buf/buf0dblwr.cc b/storage/xtradb/buf/buf0dblwr.cc index 4995df98165..f9cf710728f 100644 --- a/storage/xtradb/buf/buf0dblwr.cc +++ b/storage/xtradb/buf/buf0dblwr.cc @@ -510,10 +510,11 @@ buf_dblwr_process() "Restoring possible half-written data pages " "from the doublewrite buffer..."); - unaligned_read_buf = static_cast<byte*>(ut_malloc(2 * UNIV_PAGE_SIZE)); + unaligned_read_buf = static_cast<byte*>(ut_malloc(3 * UNIV_PAGE_SIZE)); read_buf = static_cast<byte*>( ut_align(unaligned_read_buf, UNIV_PAGE_SIZE)); + byte* const buf = read_buf + UNIV_PAGE_SIZE; for (std::list<byte*>::iterator i = recv_dblwr.pages.begin(); i != recv_dblwr.pages.end(); ++i, ++page_no_dblwr ) { @@ -562,24 +563,26 @@ buf_dblwr_process() ignore this page (there should be redo log records to initialize it). */ } else { - if (fil_page_is_compressed_encrypted(read_buf) || - fil_page_is_compressed(read_buf)) { - /* Decompress the page before - validating the checksum. */ - fil_decompress_page( - NULL, read_buf, srv_page_size, - NULL, true); + /* Decompress the page before + validating the checksum. */ + ulint decomp = fil_page_decompress(buf, read_buf); + if (!decomp) { + goto bad; + } + if (!decomp || (decomp != srv_page_size && zip_size)) { + goto bad; } if (fil_space_verify_crypt_checksum( - read_buf, zip_size, NULL, page_no) - || !buf_page_is_corrupted( - true, read_buf, zip_size, space())) { + read_buf, zip_size, NULL, page_no) + || !buf_page_is_corrupted( + true, read_buf, zip_size, space())) { /* The page is good; there is no need to consult the doublewrite buffer. */ continue; } +bad: /* We intentionally skip this message for is_all_zero pages. */ ib_logf(IB_LOG_LEVEL_INFO, @@ -588,18 +591,15 @@ buf_dblwr_process() space_id, page_no); } - /* Next, validate the doublewrite page. */ - if (fil_page_is_compressed_encrypted(page) || - fil_page_is_compressed(page)) { - /* Decompress the page before - validating the checksum. */ - fil_decompress_page( - NULL, page, srv_page_size, NULL, true); + ulint decomp = fil_page_decompress(buf, page); + if (!decomp || (decomp != srv_page_size && zip_size)) { + goto bad_doublewrite; } - - if (!fil_space_verify_crypt_checksum(page, zip_size, NULL, page_no) + if (!fil_space_verify_crypt_checksum(page, zip_size, NULL, + page_no) && buf_page_is_corrupted(true, page, zip_size, space)) { if (!is_all_zero) { +bad_doublewrite: ib_logf(IB_LOG_LEVEL_WARN, "A doublewrite copy of page " ULINTPF ":" ULINTPF " is corrupted.", diff --git a/storage/xtradb/fil/fil0crypt.cc b/storage/xtradb/fil/fil0crypt.cc index 93778bf471f..bf7c24ecaae 100644 --- a/storage/xtradb/fil/fil0crypt.cc +++ b/storage/xtradb/fil/fil0crypt.cc @@ -708,60 +708,39 @@ fil_space_encrypt( #ifdef UNIV_DEBUG if (tmp) { /* Verify that encrypted buffer is not corrupted */ - byte* tmp_mem = (byte *)malloc(UNIV_PAGE_SIZE); dberr_t err = DB_SUCCESS; byte* src = src_frame; bool page_compressed_encrypted = (mach_read_from_2(tmp+FIL_PAGE_TYPE) == FIL_PAGE_PAGE_COMPRESSED_ENCRYPTED); - byte* comp_mem = NULL; - byte* uncomp_mem = NULL; + byte uncomp_mem[UNIV_PAGE_SIZE_MAX]; + byte tmp_mem[UNIV_PAGE_SIZE_MAX]; ulint size = (zip_size) ? zip_size : UNIV_PAGE_SIZE; if (page_compressed_encrypted) { - comp_mem = (byte *)malloc(UNIV_PAGE_SIZE); - uncomp_mem = (byte *)malloc(UNIV_PAGE_SIZE); - memcpy(comp_mem, src_frame, UNIV_PAGE_SIZE); - fil_decompress_page(uncomp_mem, comp_mem, - srv_page_size, NULL); - src = uncomp_mem; + memcpy(uncomp_mem, src, srv_page_size); + ulint unzipped1 = fil_page_decompress( + tmp_mem, uncomp_mem); + ut_ad(unzipped1); + if (unzipped1 != srv_page_size) { + src = uncomp_mem; + } } - bool corrupted1 = buf_page_is_corrupted(true, src, zip_size, space); - bool ok = fil_space_decrypt(crypt_data, tmp_mem, size, tmp, &err); + ut_ad(!buf_page_is_corrupted(true, src, zip_size, space)); + ut_ad(fil_space_decrypt(crypt_data, tmp_mem, size, tmp, &err)); + ut_ad(err == DB_SUCCESS); /* Need to decompress the page if it was also compressed */ if (page_compressed_encrypted) { - memcpy(comp_mem, tmp_mem, UNIV_PAGE_SIZE); - fil_decompress_page(tmp_mem, comp_mem, - srv_page_size, NULL); + byte buf[UNIV_PAGE_SIZE_MAX]; + memcpy(buf, tmp_mem, srv_page_size); + ulint unzipped2 = fil_page_decompress(tmp_mem, buf); + ut_ad(unzipped2); } - bool corrupted = buf_page_is_corrupted(true, tmp_mem, zip_size, space); - memcpy(tmp_mem+FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION, src+FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION, 8); - bool different = memcmp(src, tmp_mem, size); - - if (!ok || corrupted || corrupted1 || err != DB_SUCCESS || different) { - fprintf(stderr, "ok %d corrupted %d corrupted1 %d err %d different %d\n", - ok , corrupted, corrupted1, err, different); - fprintf(stderr, "src_frame\n"); - buf_page_print(src_frame, zip_size); - fprintf(stderr, "encrypted_frame\n"); - buf_page_print(tmp, zip_size); - fprintf(stderr, "decrypted_frame\n"); - buf_page_print(tmp_mem, zip_size); - ut_ad(0); - } - - free(tmp_mem); - - if (comp_mem) { - free(comp_mem); - } - - if (uncomp_mem) { - free(uncomp_mem); - } + memcpy(tmp_mem + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION, + src + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION, 8); + ut_ad(!memcmp(src, tmp_mem, size)); } - #endif /* UNIV_DEBUG */ return tmp; diff --git a/storage/xtradb/fil/fil0fil.cc b/storage/xtradb/fil/fil0fil.cc index c656dc37071..4ad572697cd 100644 --- a/storage/xtradb/fil/fil0fil.cc +++ b/storage/xtradb/fil/fil0fil.cc @@ -351,19 +351,6 @@ fil_space_get_by_id( return(space); } -/****************************************************************//** -Get space id from fil node */ -ulint -fil_node_get_space_id( -/*==================*/ - fil_node_t* node) /*!< in: Compressed node*/ -{ - ut_ad(node); - ut_ad(node->space); - - return (node->space->id); -} - /*******************************************************************//** Returns the table space by a given name, NULL if not found. */ fil_space_t* diff --git a/storage/xtradb/fil/fil0pagecompress.cc b/storage/xtradb/fil/fil0pagecompress.cc index edc932f36f5..25cd8e28a91 100644 --- a/storage/xtradb/fil/fil0pagecompress.cc +++ b/storage/xtradb/fil/fil0pagecompress.cc @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (C) 2013, 2017, MariaDB Corporation. All Rights Reserved. +Copyright (C) 2013, 2018, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -80,73 +80,26 @@ static ulint srv_data_read, srv_data_written; #include "snappy-c.h" #endif -/* Used for debugging */ -//#define UNIV_PAGECOMPRESS_DEBUG 1 - -/****************************************************************//** -For page compressed pages compress the page before actual write -operation. -@return compressed page to be written*/ -UNIV_INTERN -byte* -fil_compress_page( -/*==============*/ - fil_space_t* space, /*!< in,out: tablespace (NULL during IMPORT) */ - byte* buf, /*!< in: buffer from which to write; in aio - this must be appropriately aligned */ - byte* out_buf, /*!< out: compressed buffer */ - ulint len, /*!< in: length of input buffer.*/ - ulint level, /* in: compression level */ - ulint block_size, /*!< in: block size */ - bool encrypted, /*!< in: is page also encrypted */ - ulint* out_len) /*!< out: actual length of compressed - page */ +/** Compress a page_compressed page before writing to a data file. +@param[in] buf page to be compressed +@param[out] out_buf compressed page +@param[in] level compression level +@param[in] block_size file system block size +@param[in] encrypted whether the page will be subsequently encrypted +@return actual length of compressed page +@retval 0 if the page was not compressed */ +UNIV_INTERN ulint fil_page_compress(const byte* buf, byte* out_buf, ulint level, + ulint block_size, bool encrypted) { - int err = Z_OK; - int comp_level = level; + int comp_level = int(level); ulint header_len = FIL_PAGE_DATA + FIL_PAGE_COMPRESSED_SIZE; - ulint write_size = 0; -#if HAVE_LZO - lzo_uint write_size_lzo = write_size; -#endif /* Cache to avoid change during function execution */ ulint comp_method = innodb_compression_algorithm; - bool allocated = false; - - /* page_compression does not apply to tables or tablespaces - that use ROW_FORMAT=COMPRESSED */ - ut_ad(!space || !FSP_FLAGS_GET_ZIP_SSIZE(space->flags)); if (encrypted) { header_len += FIL_PAGE_COMPRESSION_METHOD_SIZE; } - if (!out_buf) { - allocated = true; - ulint size = UNIV_PAGE_SIZE; - - /* Both snappy and lzo compression methods require that - output buffer used for compression is bigger than input - buffer. Increase the allocated buffer size accordingly. */ -#if HAVE_SNAPPY - if (comp_method == PAGE_SNAPPY_ALGORITHM) { - size = snappy_max_compressed_length(size); - } -#endif -#if HAVE_LZO - if (comp_method == PAGE_LZO_ALGORITHM) { - size += LZO1X_1_15_MEM_COMPRESS; - } -#endif - - out_buf = static_cast<byte *>(ut_malloc(size)); - } - - ut_ad(buf); - ut_ad(out_buf); - ut_ad(len); - ut_ad(out_len); - /* Let's not compress file space header or extent descriptor */ switch (fil_page_get_type(buf)) { @@ -154,8 +107,7 @@ fil_compress_page( case FIL_PAGE_TYPE_FSP_HDR: case FIL_PAGE_TYPE_XDES: case FIL_PAGE_PAGE_COMPRESSED: - *out_len = len; - goto err_exit; + return 0; } /* If no compression level was provided to this table, use system @@ -164,204 +116,113 @@ fil_compress_page( comp_level = page_zip_level; } - DBUG_PRINT("compress", - ("Preparing for space " ULINTPF " '%s' len " ULINTPF, - space ? space->id : 0, - space ? space->name : "(import)", - len)); - - write_size = UNIV_PAGE_SIZE - header_len; + ulint write_size = srv_page_size - header_len; - switch(comp_method) { + switch (comp_method) { + default: + ut_ad(!"unknown compression method"); + /* fall through */ + case PAGE_UNCOMPRESSED: + return 0; + case PAGE_ZLIB_ALGORITHM: + { + ulong len = uLong(write_size); + if (Z_OK == compress2( + out_buf + header_len, &len, + buf, uLong(srv_page_size), comp_level)) { + write_size = len; + goto success; + } + } + break; #ifdef HAVE_LZ4 case PAGE_LZ4_ALGORITHM: - -#ifdef HAVE_LZ4_COMPRESS_DEFAULT - err = LZ4_compress_default((const char *)buf, - (char *)out_buf+header_len, len, write_size); -#else - err = LZ4_compress_limitedOutput((const char *)buf, - (char *)out_buf+header_len, len, write_size); -#endif /* HAVE_LZ4_COMPRESS_DEFAULT */ - write_size = err; - - if (err == 0) { - /* If error we leave the actual page as it was */ - -#ifndef UNIV_PAGECOMPRESS_DEBUG - if (space && !space->printed_compression_failure) { - space->printed_compression_failure = true; -#endif - ib_logf(IB_LOG_LEVEL_WARN, - "Compression failed for space " ULINTPF - " name %s len " ULINTPF - " err %d write_size " ULINTPF ".", - space->id, space->name, len, - err, write_size); -#ifndef UNIV_PAGECOMPRESS_DEBUG - } -#endif - srv_stats.pages_page_compression_error.inc(); - *out_len = len; - goto err_exit; +# ifdef HAVE_LZ4_COMPRESS_DEFAULT + write_size = LZ4_compress_default( + reinterpret_cast<const char*>(buf), + reinterpret_cast<char*>(out_buf) + header_len, + int(srv_page_size), int(write_size)); +# else + write_size = LZ4_compress_limitedOutput( + reinterpret_cast<const char*>(buf), + reinterpret_cast<char*>(out_buf) + header_len, + int(srv_page_size), int(write_size)); +# endif + + if (write_size) { + goto success; } break; #endif /* HAVE_LZ4 */ #ifdef HAVE_LZO - case PAGE_LZO_ALGORITHM: - err = lzo1x_1_15_compress( - buf, len, out_buf+header_len, &write_size_lzo, out_buf+UNIV_PAGE_SIZE); - - write_size = write_size_lzo; - - if (err != LZO_E_OK || write_size > UNIV_PAGE_SIZE-header_len) { - if (space && !space->printed_compression_failure) { - space->printed_compression_failure = true; - ib_logf(IB_LOG_LEVEL_WARN, - "Compression failed for space " ULINTPF - " name %s len " ULINTPF - " err %d write_size " ULINTPF ".", - space->id, space->name, len, - err, write_size); - } - - srv_stats.pages_page_compression_error.inc(); - *out_len = len; - goto err_exit; + case PAGE_LZO_ALGORITHM: { + lzo_uint len = write_size; + + if (LZO_E_OK == lzo1x_1_15_compress( + buf, srv_page_size, + out_buf + header_len, &len, + out_buf + srv_page_size) + && len <= write_size) { + write_size = len; + goto success; } - break; + } #endif /* HAVE_LZO */ #ifdef HAVE_LZMA case PAGE_LZMA_ALGORITHM: { - size_t out_pos=0; - - err = lzma_easy_buffer_encode( - comp_level, - LZMA_CHECK_NONE, - NULL, /* No custom allocator, use malloc/free */ - reinterpret_cast<uint8_t*>(buf), - len, - reinterpret_cast<uint8_t*>(out_buf + header_len), - &out_pos, - (size_t)write_size); - - if (err != LZMA_OK || out_pos > UNIV_PAGE_SIZE-header_len) { - if (space && !space->printed_compression_failure) { - space->printed_compression_failure = true; - ib_logf(IB_LOG_LEVEL_WARN, - "Compression failed for space " ULINTPF - " name %s len " ULINTPF - " err %d write_size " ULINTPF ".", - space->id, space->name, len, - err, out_pos); - } - - srv_stats.pages_page_compression_error.inc(); - *out_len = len; - goto err_exit; + size_t out_pos = 0; + + if (LZMA_OK == lzma_easy_buffer_encode( + comp_level, LZMA_CHECK_NONE, NULL, + buf, srv_page_size, out_buf + header_len, + &out_pos, write_size) + && out_pos <= write_size) { + write_size = out_pos; + goto success; } - - write_size = out_pos; - break; } #endif /* HAVE_LZMA */ #ifdef HAVE_BZIP2 case PAGE_BZIP2_ALGORITHM: { - - err = BZ2_bzBuffToBuffCompress( - (char *)(out_buf + header_len), - (unsigned int *)&write_size, - (char *)buf, - len, - 1, - 0, - 0); - - if (err != BZ_OK || write_size > UNIV_PAGE_SIZE-header_len) { - if (space && !space->printed_compression_failure) { - space->printed_compression_failure = true; - ib_logf(IB_LOG_LEVEL_WARN, - "Compression failed for space " ULINTPF - " name %s len " ULINTPF - " err %d write_size " ULINTPF ".", - space->id, space->name, len, - err, write_size); - } - - srv_stats.pages_page_compression_error.inc(); - *out_len = len; - goto err_exit; + unsigned len = unsigned(write_size); + if (BZ_OK == BZ2_bzBuffToBuffCompress( + reinterpret_cast<char*>(out_buf + header_len), + &len, + const_cast<char*>( + reinterpret_cast<const char*>(buf)), + unsigned(srv_page_size), 1, 0, 0) + && len <= write_size) { + write_size = len; + goto success; } break; } #endif /* HAVE_BZIP2 */ #ifdef HAVE_SNAPPY - case PAGE_SNAPPY_ALGORITHM: - { - snappy_status cstatus; - write_size = snappy_max_compressed_length(UNIV_PAGE_SIZE); - - cstatus = snappy_compress( - (const char *)buf, - (size_t)len, - (char *)(out_buf+header_len), - (size_t*)&write_size); - - if (cstatus != SNAPPY_OK || write_size > UNIV_PAGE_SIZE-header_len) { - if (space && !space->printed_compression_failure) { - space->printed_compression_failure = true; - ib_logf(IB_LOG_LEVEL_WARN, - "Compression failed for space " ULINTPF - " name %s len " ULINTPF - " err %d write_size " ULINTPF ".", - space->id, space->name, len, - (int)cstatus, write_size); - } - - srv_stats.pages_page_compression_error.inc(); - *out_len = len; - goto err_exit; + case PAGE_SNAPPY_ALGORITHM: { + size_t len = snappy_max_compressed_length(srv_page_size); + + if (SNAPPY_OK == snappy_compress( + reinterpret_cast<const char*>(buf), + srv_page_size, + reinterpret_cast<char*>(out_buf) + header_len, + &len) + && len <= write_size) { + write_size = len; + goto success; } break; } #endif /* HAVE_SNAPPY */ - - case PAGE_ZLIB_ALGORITHM: - err = compress2(out_buf+header_len, (ulong*)&write_size, buf, - uLong(len), comp_level); - - if (err != Z_OK) { - /* If error we leave the actual page as it was */ - - if (space && !space->printed_compression_failure) { - space->printed_compression_failure = true; - ib_logf(IB_LOG_LEVEL_WARN, - "Compression failed for space " ULINTPF - " name %s len " ULINTPF - " rt %d write_size " ULINTPF ".", - space->id, space->name, len, - err, write_size); - } - - srv_stats.pages_page_compression_error.inc(); - *out_len = len; - goto err_exit; - } - break; - - case PAGE_UNCOMPRESSED: - *out_len = len; - return (buf); - break; - default: - ut_error; - break; } + srv_stats.pages_page_compression_error.inc(); + return 0; +success: /* Set up the page header */ memcpy(out_buf, buf, FIL_PAGE_DATA); /* Set up the checksum */ @@ -392,22 +253,11 @@ fil_compress_page( /* Verify that page can be decompressed */ { - byte *comp_page; - byte *uncomp_page; - - comp_page = static_cast<byte *>(ut_malloc(UNIV_PAGE_SIZE)); - uncomp_page = static_cast<byte *>(ut_malloc(UNIV_PAGE_SIZE)); - memcpy(comp_page, out_buf, UNIV_PAGE_SIZE); - - fil_decompress_page(uncomp_page, comp_page, ulong(len), NULL); - - if (buf_page_is_corrupted(false, uncomp_page, 0, space)) { - buf_page_print(uncomp_page, 0); - ut_ad(0); - } - - ut_free(comp_page); - ut_free(uncomp_page); + page_t tmp_buf[UNIV_PAGE_SIZE_MAX]; + page_t page[UNIV_PAGE_SIZE_MAX]; + memcpy(page, out_buf, srv_page_size); + ut_ad(fil_page_decompress(tmp_buf, page)); + ut_ad(!buf_page_is_corrupted(false, page, 0, NULL)); } #endif /* UNIV_DEBUG */ @@ -431,324 +281,144 @@ fil_compress_page( #endif } - DBUG_PRINT("compress", - ("Succeeded for space " ULINTPF - " '%s' len " ULINTPF " out_len " ULINTPF, - space ? space->id : 0, - space ? space->name : "(import)", - len, write_size)); - - srv_stats.page_compression_saved.add((len - write_size)); + srv_stats.page_compression_saved.add(srv_page_size - write_size); srv_stats.pages_page_compressed.inc(); /* If we do not persistently trim rest of page, we need to write it all */ if (!srv_use_trim) { - memset(out_buf+write_size,0,len-write_size); - write_size = len; - } - - *out_len = write_size; - - if (allocated) { - /* TODO: reduce number of memcpy's */ - memcpy(buf, out_buf, len); - } else { - return(out_buf); - } - -err_exit: - if (allocated) { - ut_free(out_buf); + memset(out_buf + write_size, 0, srv_page_size - write_size); } - return (buf); - + return write_size; } -/****************************************************************//** -For page compressed pages decompress the page after actual read -operation. */ -UNIV_INTERN -void -fil_decompress_page( -/*================*/ - byte* page_buf, /*!< in: preallocated buffer or NULL */ - byte* buf, /*!< out: buffer from which to read; in aio - this must be appropriately aligned */ - ulong len, /*!< in: length of output buffer.*/ - ulint* write_size, /*!< in/out: Actual payload size of - the compressed data. */ - bool return_error) /*!< in: true if only an error should - be produced when decompression fails. - By default this parameter is false. */ +/** Decompress a page that may be subject to page_compressed compression. +@param[in,out] tmp_buf temporary buffer (of innodb_page_size) +@param[in,out] buf possibly compressed page buffer +@return size of the compressed data +@retval 0 if decompression failed +@retval srv_page_size if the page was not compressed */ +UNIV_INTERN ulint fil_page_decompress(byte* tmp_buf, byte* buf) { - int err = 0; - ulint actual_size = 0; - ulint compression_alg = 0; - byte *in_buf; - ulint ptype; - ulint header_len = FIL_PAGE_DATA + FIL_PAGE_COMPRESSED_SIZE; - - ut_ad(buf); - ut_ad(len); - - ptype = mach_read_from_2(buf+FIL_PAGE_TYPE); - - if (ptype == FIL_PAGE_PAGE_COMPRESSED_ENCRYPTED) { - header_len += FIL_PAGE_COMPRESSION_METHOD_SIZE; - } - - /* Do not try to uncompressed pages that are not compressed */ - if (ptype != FIL_PAGE_PAGE_COMPRESSED && - ptype != FIL_PAGE_PAGE_COMPRESSED_ENCRYPTED && - ptype != FIL_PAGE_TYPE_COMPRESSED) { - return; - } - - // If no buffer was given, we need to allocate temporal buffer - if (page_buf == NULL) { - in_buf = static_cast<byte *>(ut_malloc(UNIV_PAGE_SIZE)); - memset(in_buf, 0, UNIV_PAGE_SIZE); - } else { - in_buf = page_buf; + const unsigned ptype = mach_read_from_2(buf+FIL_PAGE_TYPE); + ulint header_len; + ib_uint64_t compression_alg; + switch (ptype) { + case FIL_PAGE_PAGE_COMPRESSED_ENCRYPTED: + header_len = FIL_PAGE_DATA + FIL_PAGE_COMPRESSED_SIZE + + FIL_PAGE_COMPRESSION_METHOD_SIZE; + compression_alg = mach_read_from_2( + FIL_PAGE_DATA + FIL_PAGE_COMPRESSED_SIZE + buf); + break; + case FIL_PAGE_PAGE_COMPRESSED: + header_len = FIL_PAGE_DATA + FIL_PAGE_COMPRESSED_SIZE; + compression_alg = mach_read_from_8( + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION + buf); + break; + default: + return srv_page_size; } - /* Before actual decompress, make sure that page type is correct */ - - if (mach_read_from_4(buf+FIL_PAGE_SPACE_OR_CHKSUM) != BUF_NO_CHECKSUM_MAGIC || - (ptype != FIL_PAGE_PAGE_COMPRESSED && - ptype != FIL_PAGE_PAGE_COMPRESSED_ENCRYPTED)) { - ib_logf(IB_LOG_LEVEL_ERROR, - "Corruption: We try to uncompress corrupted page" - " CRC " ULINTPF " type " ULINTPF " len " ULINTPF ".", - mach_read_from_4(buf+FIL_PAGE_SPACE_OR_CHKSUM), - mach_read_from_2(buf+FIL_PAGE_TYPE), len); - - fflush(stderr); - if (return_error) { - goto error_return; - } - ut_error; + if (mach_read_from_4(buf + FIL_PAGE_SPACE_OR_CHKSUM) + != BUF_NO_CHECKSUM_MAGIC) { + return 0; } - /* Get compression algorithm */ - if (ptype == FIL_PAGE_PAGE_COMPRESSED_ENCRYPTED) { - compression_alg = mach_read_from_2(buf+FIL_PAGE_DATA+FIL_PAGE_COMPRESSED_SIZE); - } else { - compression_alg = mach_read_from_8(buf+FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION); - } + ulint actual_size = mach_read_from_2(buf + FIL_PAGE_DATA); - /* Get the actual size of compressed page */ - actual_size = mach_read_from_2(buf+FIL_PAGE_DATA); /* Check if payload size is corrupted */ - if (actual_size == 0 || actual_size > UNIV_PAGE_SIZE) { - ib_logf(IB_LOG_LEVEL_ERROR, - "Corruption: We try to uncompress corrupted page" - " actual size " ULINTPF " compression %s.", - actual_size, fil_get_compression_alg_name(compression_alg)); - fflush(stderr); - if (return_error) { - goto error_return; - } - ut_error; + if (actual_size == 0 || actual_size > srv_page_size - header_len) { + return 0; } - /* Store actual payload size of the compressed data. This pointer - points to buffer pool. */ - if (write_size) { - *write_size = actual_size; - } - - DBUG_PRINT("compress", - ("Preparing for decompress for len " ULINTPF ".", - actual_size)); - - switch(compression_alg) { + switch (compression_alg) { + default: + ib_logf(IB_LOG_LEVEL_ERROR, + "Unknown compression algorithm " UINT64PF, + compression_alg); + return 0; case PAGE_ZLIB_ALGORITHM: - err= uncompress(in_buf, &len, buf+header_len, (unsigned long)actual_size); - - /* If uncompress fails it means that page is corrupted */ - if (err != Z_OK) { - - ib_logf(IB_LOG_LEVEL_ERROR, - "Corruption: Page is marked as compressed" - " but uncompress failed with error %d " - " size " ULINTPF " len " ULINTPF ".", - err, actual_size, len); - - fflush(stderr); - - if (return_error) { - goto error_return; + { + uLong len = srv_page_size; + if (Z_OK != uncompress(tmp_buf, &len, + buf + header_len, + uLong(actual_size)) + && len != srv_page_size) { + return 0; } - ut_error; } break; - #ifdef HAVE_LZ4 case PAGE_LZ4_ALGORITHM: - err = LZ4_decompress_fast((const char *)buf+header_len, (char *)in_buf, len); - - if (err != (int)actual_size) { - ib_logf(IB_LOG_LEVEL_ERROR, - "Corruption: Page is marked as compressed" - " but uncompress failed with error %d " - " size " ULINTPF " len " ULINTPF ".", - err, actual_size, len); - - fflush(stderr); - - if (return_error) { - goto error_return; - } - ut_error; + if (LZ4_decompress_safe(reinterpret_cast<const char*>(buf) + + header_len, + reinterpret_cast<char*>(tmp_buf), + actual_size, srv_page_size) + == int(srv_page_size)) { + break; } - break; + return 0; #endif /* HAVE_LZ4 */ #ifdef HAVE_LZO case PAGE_LZO_ALGORITHM: { - ulint olen = 0; - lzo_uint olen_lzo = olen; - err = lzo1x_decompress((const unsigned char *)buf+header_len, - actual_size,(unsigned char *)in_buf, &olen_lzo, NULL); - - olen = olen_lzo; - - if (err != LZO_E_OK || (olen == 0 || olen > UNIV_PAGE_SIZE)) { - ib_logf(IB_LOG_LEVEL_ERROR, - "Corruption: Page is marked as compressed" - " but uncompress failed with error %d " - " size " ULINTPF " len " ULINTPF ".", - err, actual_size, len); - - fflush(stderr); - - if (return_error) { - goto error_return; - } - ut_error; + lzo_uint len_lzo = srv_page_size; + if (LZO_E_OK == lzo1x_decompress_safe( + buf + header_len, + actual_size, tmp_buf, &len_lzo, NULL) + && len_lzo == srv_page_size) { + break; } - break; + return 0; } #endif /* HAVE_LZO */ #ifdef HAVE_LZMA case PAGE_LZMA_ALGORITHM: { - - lzma_ret ret; size_t src_pos = 0; size_t dst_pos = 0; uint64_t memlimit = UINT64_MAX; - ret = lzma_stream_buffer_decode( - &memlimit, - 0, - NULL, - buf+header_len, - &src_pos, - actual_size, - in_buf, - &dst_pos, - len); - - - if (ret != LZMA_OK || (dst_pos == 0 || dst_pos > UNIV_PAGE_SIZE)) { - ib_logf(IB_LOG_LEVEL_ERROR, - "Corruption: Page is marked as compressed" - " but decompression read only %ld bytes" - " size " ULINTPF "len " ULINTPF ".", - dst_pos, actual_size, len); - fflush(stderr); - - if (return_error) { - goto error_return; - } - ut_error; + if (LZMA_OK == lzma_stream_buffer_decode( + &memlimit, 0, NULL, buf + header_len, + &src_pos, actual_size, tmp_buf, &dst_pos, + srv_page_size) + && dst_pos == srv_page_size) { + break; } - - break; + return 0; } #endif /* HAVE_LZMA */ #ifdef HAVE_BZIP2 case PAGE_BZIP2_ALGORITHM: { - unsigned int dst_pos = UNIV_PAGE_SIZE; - - err = BZ2_bzBuffToBuffDecompress( - (char *)in_buf, - &dst_pos, - (char *)(buf+header_len), - actual_size, - 1, - 0); - - if (err != BZ_OK || (dst_pos == 0 || dst_pos > UNIV_PAGE_SIZE)) { - ib_logf(IB_LOG_LEVEL_ERROR, - "Corruption: Page is marked as compressed" - " but decompression read only %du bytes" - " size " ULINTPF " len " ULINTPF " err %d.", - dst_pos, actual_size, len, err); - fflush(stderr); - - if (return_error) { - goto error_return; - } - ut_error; + unsigned int dst_pos = srv_page_size; + if (BZ_OK == BZ2_bzBuffToBuffDecompress( + reinterpret_cast<char*>(tmp_buf), + &dst_pos, + reinterpret_cast<char*>(buf) + header_len, + actual_size, 1, 0) + && dst_pos == srv_page_size) { + break; } - break; + return 0; } #endif /* HAVE_BZIP2 */ #ifdef HAVE_SNAPPY - case PAGE_SNAPPY_ALGORITHM: - { - snappy_status cstatus; - ulint olen = UNIV_PAGE_SIZE; - - cstatus = snappy_uncompress( - (const char *)(buf+header_len), - (size_t)actual_size, - (char *)in_buf, - (size_t*)&olen); - - if (cstatus != SNAPPY_OK || olen != UNIV_PAGE_SIZE) { - ib_logf(IB_LOG_LEVEL_ERROR, - "Corruption: Page is marked as compressed" - " but decompression read only " ULINTPF " bytes" - " size " ULINTPF " len " ULINTPF " err %d.", - olen, actual_size, len, (int)cstatus); - fflush(stderr); - - if (return_error) { - goto error_return; - } - ut_error; + case PAGE_SNAPPY_ALGORITHM: { + size_t olen = srv_page_size; + + if (SNAPPY_OK == snappy_uncompress( + reinterpret_cast<const char*>(buf) + header_len, + actual_size, + reinterpret_cast<char*>(tmp_buf), &olen) + && olen == srv_page_size) { + break; } - - break; + return 0; } #endif /* HAVE_SNAPPY */ - default: - ib_logf(IB_LOG_LEVEL_ERROR, - "Corruption: Page is marked as compressed" - " but compression algorithm %s" - " is not known." - ,fil_get_compression_alg_name(compression_alg)); - - fflush(stderr); - if (return_error) { - goto error_return; - } - ut_error; - break; } srv_stats.pages_page_decompressed.inc(); - - /* Copy the uncompressed page to the buffer pool, not - really any other options. */ - memcpy(buf, in_buf, len); - -error_return: - if (page_buf != in_buf) { - ut_free(in_buf); - } + memcpy(buf, tmp_buf, srv_page_size); + return actual_size; } diff --git a/storage/xtradb/handler/ha_innodb.cc b/storage/xtradb/handler/ha_innodb.cc index 97de34b9083..30fabbdb089 100644 --- a/storage/xtradb/handler/ha_innodb.cc +++ b/storage/xtradb/handler/ha_innodb.cc @@ -2710,8 +2710,12 @@ innobase_mysql_tmpfile( } } #else +#ifdef F_DUPFD_CLOEXEC + fd2 = fcntl(fd, F_DUPFD_CLOEXEC, 0); +#else fd2 = dup(fd); #endif +#endif if (fd2 < 0) { DBUG_PRINT("error",("Got error %d on dup",fd2)); my_errno=errno; @@ -8984,14 +8988,12 @@ report_error: user_thd); #ifdef WITH_WSREP - if (!error_result && - wsrep_thd_exec_mode(user_thd) == LOCAL_STATE && - wsrep_on(user_thd) && - !wsrep_consistency_check(user_thd) && - !wsrep_thd_ignore_table(user_thd)) - { - if (wsrep_append_keys(user_thd, false, record, NULL)) - { + if (!error_result + && wsrep_on(user_thd) + && wsrep_thd_exec_mode(user_thd) == LOCAL_STATE + && !wsrep_consistency_check(user_thd) + && !wsrep_thd_ignore_table(user_thd)) { + if (wsrep_append_keys(user_thd, false, record, NULL)) { DBUG_PRINT("wsrep", ("row key failed")); error_result = HA_ERR_INTERNAL_ERROR; goto wsrep_error; diff --git a/storage/xtradb/ibuf/ibuf0ibuf.cc b/storage/xtradb/ibuf/ibuf0ibuf.cc index b169916c34e..96a86eea4c1 100644 --- a/storage/xtradb/ibuf/ibuf0ibuf.cc +++ b/storage/xtradb/ibuf/ibuf0ibuf.cc @@ -5218,6 +5218,10 @@ ibuf_check_bitmap_on_import( bitmap_page = ibuf_bitmap_get_map_page( space_id, page_no, zip_size, &mtr); + if (!bitmap_page) { + mutex_exit(&ibuf_mutex); + return DB_CORRUPTION; + } for (i = FSP_IBUF_BITMAP_OFFSET + 1; i < page_size; i++) { const ulint offset = page_no + i; diff --git a/storage/xtradb/include/buf0buf.h b/storage/xtradb/include/buf0buf.h index 7661ba1785d..0944b5d4067 100644 --- a/storage/xtradb/include/buf0buf.h +++ b/storage/xtradb/include/buf0buf.h @@ -1,7 +1,7 @@ /***************************************************************************** Copyright (c) 1995, 2016, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2013, 2017, MariaDB Corporation. +Copyright (c) 2013, 2018, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -38,6 +38,7 @@ Created 11/5/1995 Heikki Tuuri #include "ut0rbt.h" #include "os0proc.h" #include "log0log.h" +#include "my_atomic.h" /** @name Modes for buf_page_get_gen */ /* @{ */ @@ -1528,45 +1529,16 @@ buf_page_encrypt_before_write( buf_page_t* bpage, byte* src_frame); -/********************************************************************** -The hook that is called after page is written to disk. -The function releases any resources needed for encryption that was allocated -in buf_page_encrypt_before_write */ -UNIV_INTERN -ibool -buf_page_encrypt_after_write( -/*=========================*/ - buf_page_t* page); /*!< in/out: buffer page that was flushed */ - -/********************************************************************//** -The hook that is called just before a page is read from disk. -The function allocates memory that is used to temporarily store disk content -before getting decrypted */ -UNIV_INTERN -byte* -buf_page_decrypt_before_read( -/*=========================*/ - buf_page_t* page, /*!< in/out: buffer page read from disk */ - ulint zip_size); /*!< in: compressed page size, or 0 */ - -/********************************************************************//** -The hook that is called just after a page is read from disk. -The function decrypt disk content into buf_page_t and releases the -temporary buffer that was allocated in buf_page_decrypt_before_read */ -UNIV_INTERN -bool -buf_page_decrypt_after_read( -/*========================*/ - buf_page_t* page); /*!< in/out: buffer page read from disk */ - /** @brief The temporary memory structure. NOTE! The definition appears here only for other modules of this directory (buf) to see it. Do not use from outside! */ typedef struct { - bool reserved; /*!< true if this slot is reserved +private: + int32 reserved; /*!< true if this slot is reserved */ +public: byte* crypt_buf; /*!< for encryption the data needs to be copied to a separate buffer before it's encrypted&written. this as a page can be @@ -1577,6 +1549,21 @@ typedef struct { byte* out_buf; /*!< resulting buffer after encryption/compression. This is a pointer and not allocated. */ + + /** Release the slot */ + void release() + { + my_atomic_store32_explicit(&reserved, false, + MY_MEMORY_ORDER_RELAXED); + } + + /** Acquire the slot + @return whether the slot was acquired */ + bool acquire() + { + return !my_atomic_fas32_explicit(&reserved, true, + MY_MEMORY_ORDER_RELAXED); + } } buf_tmp_buffer_t; /** The common buffer control block structure diff --git a/storage/xtradb/include/fil0fil.h b/storage/xtradb/include/fil0fil.h index a0db48f7037..7b8339fec7d 100644 --- a/storage/xtradb/include/fil0fil.h +++ b/storage/xtradb/include/fil0fil.h @@ -170,8 +170,7 @@ extern fil_addr_t fil_addr_null; #define FIL_PAGE_TYPE_BLOB 10 /*!< Uncompressed BLOB page */ #define FIL_PAGE_TYPE_ZBLOB 11 /*!< First compressed BLOB page */ #define FIL_PAGE_TYPE_ZBLOB2 12 /*!< Subsequent compressed BLOB page */ -#define FIL_PAGE_TYPE_COMPRESSED 13 /*!< Compressed page */ -#define FIL_PAGE_TYPE_LAST FIL_PAGE_TYPE_COMPRESSED +#define FIL_PAGE_TYPE_LAST FIL_PAGE_TYPE_ZBLOB2 /*!< Last page type */ /* @} */ @@ -341,9 +340,6 @@ struct fil_space_t { corrupted page. */ bool is_corrupt; /*!< true if tablespace corrupted */ - bool printed_compression_failure; - /*!< true if we have already printed - compression failure */ fil_space_crypt_t* crypt_data; /*!< tablespace crypt data or NULL */ ulint file_block_size; diff --git a/storage/xtradb/include/fil0fil.ic b/storage/xtradb/include/fil0fil.ic index 6c2504c9f8c..3f21c529308 100644 --- a/storage/xtradb/include/fil0fil.ic +++ b/storage/xtradb/include/fil0fil.ic @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 2015, 2017, MariaDB Corporation. +Copyright (c) 2015, 2018, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -65,12 +65,9 @@ fil_get_page_type_name( return "ZBLOB"; case FIL_PAGE_TYPE_ZBLOB2: return "ZBLOB2"; - case FIL_PAGE_TYPE_COMPRESSED: - return "ORACLE PAGE COMPRESSED"; } return "PAGE TYPE CORRUPTED"; - } /****************************************************************//** @@ -112,8 +109,7 @@ fil_page_type_validate( page_type == FIL_PAGE_TYPE_XDES || page_type == FIL_PAGE_TYPE_BLOB || page_type == FIL_PAGE_TYPE_ZBLOB || - page_type == FIL_PAGE_TYPE_ZBLOB2 || - page_type == FIL_PAGE_TYPE_COMPRESSED))) { + page_type == FIL_PAGE_TYPE_ZBLOB2))) { ulint key_version = mach_read_from_4(page + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION); bool page_compressed = (page_type == FIL_PAGE_PAGE_COMPRESSED); diff --git a/storage/xtradb/include/fil0pagecompress.h b/storage/xtradb/include/fil0pagecompress.h index 03e16699ce3..934372c55b2 100644 --- a/storage/xtradb/include/fil0pagecompress.h +++ b/storage/xtradb/include/fil0pagecompress.h @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (C) 2013, 2017 MariaDB Corporation. All Rights Reserved. +Copyright (C) 2013, 2018 MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -30,70 +30,26 @@ atomic writes information to table space. Created 11/12/2013 Jan Lindström jan.lindstrom@skysql.com ***********************************************************************/ -/*******************************************************************//** -Find out wheather the page is index page or not -@return true if page type index page, false if not */ -UNIV_INLINE -ibool -fil_page_is_index_page( -/*===================*/ - byte *buf); /*!< in: page */ - -/****************************************************************//** -Get the name of the compression algorithm used for page -compression. -@return compression algorithm name or "UNKNOWN" if not known*/ -UNIV_INLINE -const char* -fil_get_compression_alg_name( -/*=========================*/ - ulint comp_alg); /*!<in: compression algorithm number */ - -/****************************************************************//** -For page compressed pages compress the page before actual write -operation. -@return compressed page to be written*/ -UNIV_INTERN -byte* -fil_compress_page( -/*==============*/ - fil_space_t* space, /*!< in,out: tablespace (NULL during IMPORT) */ - byte* buf, /*!< in: buffer from which to write; in aio - this must be appropriately aligned */ - byte* out_buf, /*!< out: compressed buffer */ - ulint len, /*!< in: length of input buffer.*/ - ulint level, /* in: compression level */ - ulint block_size, /*!< in: block size */ - bool encrypted, /*!< in: is page also encrypted */ - ulint* out_len); /*!< out: actual length of compressed - page */ - -/****************************************************************//** -For page compressed pages decompress the page after actual read -operation. */ -UNIV_INTERN -void -fil_decompress_page( -/*================*/ - byte* page_buf, /*!< in: preallocated buffer or NULL */ - byte* buf, /*!< out: buffer from which to read; in aio - this must be appropriately aligned */ - ulong len, /*!< in: length of output buffer.*/ - ulint* write_size, /*!< in/out: Actual payload size of - the compressed data. */ - bool return_error=false); - /*!< in: true if only an error should - be produced when decompression fails. - By default this parameter is false. */ - -/****************************************************************//** -Get space id from fil node -@return space id*/ -UNIV_INTERN -ulint -fil_node_get_space_id( -/*==================*/ - fil_node_t* node); /*!< in: Node where to get space id*/ +/** Compress a page_compressed page before writing to a data file. +@param[in] buf page to be compressed +@param[out] out_buf compressed page +@param[in] level compression level +@param[in] block_size file system block size +@param[in] encrypted whether the page will be subsequently encrypted +@return actual length of compressed page +@retval 0 if the page was not compressed */ +UNIV_INTERN ulint fil_page_compress(const byte* buf, byte* out_buf, ulint level, + ulint block_size, bool encrypted) + MY_ATTRIBUTE((nonnull, warn_unused_result)); + +/** Decompress a page that may be subject to page_compressed compression. +@param[in,out] tmp_buf temporary buffer (of innodb_page_size) +@param[in,out] buf compressed page buffer +@return size of the compressed data +@retval 0 if decompression failed +@retval srv_page_size if the page was not compressed */ +UNIV_INTERN ulint fil_page_decompress(byte* tmp_buf, byte* buf) + MY_ATTRIBUTE((nonnull, warn_unused_result)); /****************************************************************//** Get block size from fil node @@ -120,13 +76,4 @@ ibool fil_page_is_compressed_encrypted( /*=============================*/ byte* buf); /*!< in: page */ - -/*******************************************************************//** -Find out wheather the page is page compressed with lzo method -@return true if page is page compressed with lzo method*/ -UNIV_INLINE -ibool -fil_page_is_lzo_compressed( -/*=======================*/ - byte* buf); /*!< in: page */ #endif diff --git a/storage/xtradb/include/fsp0fsp.h b/storage/xtradb/include/fsp0fsp.h index 715572199ab..f9aca33a71e 100644 --- a/storage/xtradb/include/fsp0fsp.h +++ b/storage/xtradb/include/fsp0fsp.h @@ -915,7 +915,7 @@ fsp_flags_convert_from_101(ulint flags) /* Bits 13..16 are the wrong position for PAGE_SSIZE, and they should contain one of the values 3,4,6,7, that is, be of the form - 0b0011 or 0b01xx (except 0b0110). + 0b0011 or 0b01xx (except 0b0101). In correct versions, these bits should be 0bc0se where c is the MariaDB COMPRESSED flag and e is the MySQL 5.7 ENCRYPTION flag diff --git a/storage/xtradb/include/fsp0pagecompress.ic b/storage/xtradb/include/fsp0pagecompress.ic index 14f968e319e..99d0dfb3c7c 100644 --- a/storage/xtradb/include/fsp0pagecompress.ic +++ b/storage/xtradb/include/fsp0pagecompress.ic @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (C) 2013, 2017, MariaDB Corporation. All Rights Reserved. +Copyright (C) 2013, 2018, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -50,18 +50,6 @@ fsp_flags_get_atomic_writes( } /*******************************************************************//** -Find out wheather the page is index page or not -@return true if page type index page, false if not */ -UNIV_INLINE -ibool -fil_page_is_index_page( -/*===================*/ - byte* buf) /*!< in: page */ -{ - return(mach_read_from_2(buf+FIL_PAGE_TYPE) == FIL_PAGE_INDEX); -} - -/*******************************************************************//** Find out wheather the page is page compressed @return true if page is page compressed, false if not */ UNIV_INLINE @@ -84,59 +72,3 @@ fil_page_is_compressed_encrypted( { return(mach_read_from_2(buf+FIL_PAGE_TYPE) == FIL_PAGE_PAGE_COMPRESSED_ENCRYPTED); } - -/****************************************************************//** -Get the name of the compression algorithm used for page -compression. -@return compression algorithm name or "UNKNOWN" if not known*/ -UNIV_INLINE -const char* -fil_get_compression_alg_name( -/*=========================*/ - ulint comp_alg) /*!<in: compression algorithm number */ -{ - switch(comp_alg) { - case PAGE_UNCOMPRESSED: - return ("uncompressed"); - break; - case PAGE_ZLIB_ALGORITHM: - return ("ZLIB"); - break; - case PAGE_LZ4_ALGORITHM: - return ("LZ4"); - break; - case PAGE_LZO_ALGORITHM: - return ("LZO"); - break; - case PAGE_LZMA_ALGORITHM: - return ("LZMA"); - break; - case PAGE_BZIP2_ALGORITHM: - return ("BZIP2"); - break; - case PAGE_SNAPPY_ALGORITHM: - return ("SNAPPY"); - break; - /* No default to get compiler warning */ - } - - return ("NULL"); -} - -#ifndef UNIV_INNOCHECKSUM -/*******************************************************************//** -Find out wheather the page is page compressed with lzo method -@return true if page is page compressed with lzo method, false if not */ -UNIV_INLINE -ibool -fil_page_is_lzo_compressed( -/*=======================*/ - byte* buf) /*!< in: page */ -{ - return((mach_read_from_2(buf+FIL_PAGE_TYPE) == FIL_PAGE_PAGE_COMPRESSED && - mach_read_from_8(buf+FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION) == PAGE_LZO_ALGORITHM) || - (mach_read_from_2(buf+FIL_PAGE_TYPE) == FIL_PAGE_PAGE_COMPRESSED_ENCRYPTED && - mach_read_from_2(buf+FIL_PAGE_DATA+FIL_PAGE_COMPRESSED_SIZE) == PAGE_LZO_ALGORITHM)); -} - -#endif /* UNIV_INNOCHECKSUM */ diff --git a/storage/xtradb/os/os0file.cc b/storage/xtradb/os/os0file.cc index da8548f0fa8..2e172a7524c 100644 --- a/storage/xtradb/os/os0file.cc +++ b/storage/xtradb/os/os0file.cc @@ -1509,7 +1509,7 @@ os_file_create_simple_func( } do { - file = ::open(name, create_flag, os_innodb_umask); + file = ::open(name, create_flag | O_CLOEXEC, os_innodb_umask); if (file == -1) { *success = FALSE; @@ -1731,7 +1731,7 @@ os_file_create_simple_no_error_handling_func( return(file); } - file = open(name, create_flag, os_innodb_umask); + file = ::open(name, create_flag | O_CLOEXEC , os_innodb_umask); *success = file != -1; @@ -2146,7 +2146,7 @@ os_file_create_func( #endif /* O_SYNC */ do { - file = open(name, create_flag, os_innodb_umask); + file = ::open(name, create_flag | O_CLOEXEC, os_innodb_umask); if (file == -1) { const char* operation; @@ -2359,6 +2359,24 @@ loop: #endif } +/** Handle RENAME error. +@param name old name of the file +@param new_name new name of the file */ +static void os_file_handle_rename_error(const char* name, const char* new_name) +{ + if (os_file_get_last_error(true) != OS_FILE_DISK_FULL) { + ib_logf(IB_LOG_LEVEL_ERROR, "Cannot rename file '%s' to '%s'", + name, new_name); + } else if (!os_has_said_disk_full) { + os_has_said_disk_full = true; + /* Disk full error is reported irrespective of the + on_error_silent setting. */ + ib_logf(IB_LOG_LEVEL_ERROR, + "Full disk prevents renaming file '%s' to '%s'", + name, new_name); + } +} + /***********************************************************************//** NOTE! Use the corresponding macro os_file_rename(), not directly this function! Renames a file (can also move it to another directory). It is safest that the @@ -2394,7 +2412,7 @@ os_file_rename_func( return(TRUE); } - os_file_handle_error_no_exit(oldpath, "rename", FALSE, __FILE__, __LINE__); + os_file_handle_rename_error(oldpath, newpath); return(FALSE); #else @@ -2404,7 +2422,7 @@ os_file_rename_func( ret = rename(oldpath, newpath); if (ret != 0) { - os_file_handle_error_no_exit(oldpath, "rename", FALSE, __FILE__, __LINE__); + os_file_handle_rename_error(oldpath, newpath); return(FALSE); } @@ -3718,7 +3736,7 @@ os_file_get_status( access = !srv_read_only_mode ? O_RDWR : O_RDONLY; - fh = ::open(path, access, os_innodb_umask); + fh = ::open(path, access | O_CLOEXEC, os_innodb_umask); if (fh == -1) { stat_info->rw_perm = false; @@ -4147,7 +4165,7 @@ os_aio_native_aio_supported(void) strcpy(name + dirnamelen, "ib_logfile0"); - fd = ::open(name, O_RDONLY); + fd = ::open(name, O_RDONLY | O_CLOEXEC); if (fd == -1) { diff --git a/storage/xtradb/row/row0import.cc b/storage/xtradb/row/row0import.cc index 2315384649d..d4d2c4028fd 100644 --- a/storage/xtradb/row/row0import.cc +++ b/storage/xtradb/row/row0import.cc @@ -42,6 +42,12 @@ Created 2012-02-08 by Sunny Bains. #include "srv0start.h" #include "row0quiesce.h" #include "fil0pagecompress.h" +#ifdef HAVE_LZO +#include "lzo/lzo1x.h" +#endif +#ifdef HAVE_SNAPPY +#include "snappy-c.h" +#endif #include <vector> @@ -427,12 +433,9 @@ public: updated then its state must be set to BUF_PAGE_NOT_USED. For compressed tables the page descriptor memory will be at offset: block->frame + UNIV_PAGE_SIZE; - @param offset - physical offset within the file - @param block - block read from file, note it is not from the buffer pool + @param block block read from file, note it is not from the buffer pool @retval DB_SUCCESS or error code. */ - virtual dberr_t operator()( - os_offset_t offset, - buf_block_t* block) UNIV_NOTHROW = 0; + virtual dberr_t operator()(buf_block_t* block) UNIV_NOTHROW = 0; /** @return the space id of the tablespace */ @@ -689,12 +692,9 @@ struct FetchIndexRootPages : public AbstractCallback { /** Called for each block as it is read from the file. - @param offset - physical offset in the file - @param block - block to convert, it is not from the buffer pool. + @param block block to convert, it is not from the buffer pool. @retval DB_SUCCESS or error code. */ - virtual dberr_t operator() ( - os_offset_t offset, - buf_block_t* block) UNIV_NOTHROW; + dberr_t operator()(buf_block_t* block) UNIV_NOTHROW; /** Update the import configuration that will be used to import the tablespace. */ @@ -712,13 +712,9 @@ Called for each block as it is read from the file. Check index pages to determine the exact row format. We can't get that from the tablespace header flags alone. -@param offset - physical offset in the file -@param block - block to convert, it is not from the buffer pool. +@param block block to convert, it is not from the buffer pool. @retval DB_SUCCESS or error code. */ -dberr_t -FetchIndexRootPages::operator() ( - os_offset_t offset, - buf_block_t* block) UNIV_NOTHROW +dberr_t FetchIndexRootPages::operator()(buf_block_t* block) UNIV_NOTHROW { if (is_interrupted()) return DB_INTERRUPTED; @@ -726,15 +722,7 @@ FetchIndexRootPages::operator() ( ulint page_type = fil_page_get_type(page); - if (block->page.offset * m_page_size != offset) { - ib_logf(IB_LOG_LEVEL_ERROR, - "Page offset doesn't match file offset: " - "page offset: %u, file offset: " ULINTPF, - block->page.offset, - (ulint) (offset / m_page_size)); - - return DB_CORRUPTION; - } else if (page_type == FIL_PAGE_TYPE_XDES) { + if (page_type == FIL_PAGE_TYPE_XDES) { return set_current_xdes(block->page.offset, page); } else if (page_type == FIL_PAGE_INDEX && !is_free(block->page.offset) @@ -877,12 +865,9 @@ public: /** Called for each block as it is read from the file. - @param offset - physical offset in the file - @param block - block to convert, it is not from the buffer pool. + @param block block to convert, it is not from the buffer pool. @retval DB_SUCCESS or error code. */ - virtual dberr_t operator() ( - os_offset_t offset, - buf_block_t* block) UNIV_NOTHROW; + dberr_t operator()(buf_block_t* block) UNIV_NOTHROW; private: /** Update the page, set the space id, max trx id and index id. @@ -2038,10 +2023,9 @@ PageConverter::update_page( /** Called for every page in the tablespace. If the page was not updated then its state must be set to BUF_PAGE_NOT_USED. -@param block - block read from file, note it is not from the buffer pool +@param block block read from file, note it is not from the buffer pool @retval DB_SUCCESS or error code. */ -dberr_t -PageConverter::operator() (os_offset_t, buf_block_t* block) UNIV_NOTHROW +dberr_t PageConverter::operator()(buf_block_t* block) UNIV_NOTHROW { /* If we already had an old page with matching number in the buffer pool, evict it now, because @@ -3386,15 +3370,30 @@ fil_iterate( os_offset_t offset; ulint n_bytes = iter.n_io_buffers * iter.page_size; + const ulint buf_size = srv_page_size +#ifdef HAVE_LZO + + LZO1X_1_15_MEM_COMPRESS +#elif defined HAVE_SNAPPY + + snappy_max_compressed_length(srv_page_size) +#endif + ; + byte* page_compress_buf = static_cast<byte*>( + ut_malloc_low(buf_size, false)); ut_ad(!srv_read_only_mode); + if (!page_compress_buf) { + return DB_OUT_OF_MEMORY; + } + /* TODO: For ROW_FORMAT=COMPRESSED tables we do a lot of useless copying for non-index pages. Unfortunately, it is required by buf_zip_decompress() */ + dberr_t err = DB_SUCCESS; for (offset = iter.start; offset < iter.end; offset += n_bytes) { if (callback.is_interrupted()) { - return DB_INTERRUPTED; + err = DB_INTERRUPTED; + goto func_exit; } byte* io_buffer = iter.io_buffer; @@ -3425,30 +3424,20 @@ fil_iterate( if (!os_file_read_no_error_handling(iter.file, readptr, offset, n_bytes)) { ib_logf(IB_LOG_LEVEL_ERROR, "os_file_read() failed"); - return DB_IO_ERROR; + err = DB_IO_ERROR; + goto func_exit; } bool updated = false; - os_offset_t page_off = offset; - ulint n_pages_read = (ulint) n_bytes / iter.page_size; const ulint size = iter.page_size; - block->page.offset = page_off / size; + ulint n_pages_read = ulint(n_bytes) / size; + block->page.offset = offset / size; for (ulint i = 0; i < n_pages_read; - ++i, page_off += size, block->frame += size, - block->page.offset++) { - bool decrypted = false; - dberr_t err = DB_SUCCESS; + ++i, block->frame += size, block->page.offset++) { byte* src = readptr + (i * size); - byte* dst = io_buffer + (i * size); - bool frame_changed = false; - ulint page_type = mach_read_from_2(src+FIL_PAGE_TYPE); - const bool page_compressed - = page_type - == FIL_PAGE_PAGE_COMPRESSED_ENCRYPTED - || page_type == FIL_PAGE_PAGE_COMPRESSED; const ulint page_no = page_get_page_no(src); - if (!page_no && page_off) { + if (!page_no && block->page.offset) { const ulint* b = reinterpret_cast<const ulint*> (src); const ulint* const e = b + size / sizeof *b; @@ -3463,55 +3452,85 @@ fil_iterate( continue; } - if (page_no != page_off / size) { + if (page_no != block->page.offset) { +page_corrupted: + ib_logf(IB_LOG_LEVEL_WARN, + "%s: Page %lu at offset " + UINT64PF " looks corrupted.", + callback.filename(), + ulong(offset / size), offset); + err = DB_CORRUPTION; + goto func_exit; + } + + bool decrypted = false; + byte* dst = io_buffer + (i * size); + bool frame_changed = false; + ulint page_type = mach_read_from_2(src+FIL_PAGE_TYPE); + const bool page_compressed + = page_type + == FIL_PAGE_PAGE_COMPRESSED_ENCRYPTED + || page_type == FIL_PAGE_PAGE_COMPRESSED; + + if (page_compressed && block->page.zip.data) { goto page_corrupted; } - if (encrypted) { + if (!encrypted) { + } else if (!mach_read_from_4( + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION + + src)) { +not_encrypted: + if (!page_compressed + && !block->page.zip.data) { + block->frame = src; + frame_changed = true; + } else { + ut_ad(dst != src); + memcpy(dst, src, size); + } + } else { + if (!fil_space_verify_crypt_checksum( + src, callback.get_zip_size(), + NULL, block->page.offset)) { + goto page_corrupted; + } + decrypted = fil_space_decrypt( iter.crypt_data, dst, iter.page_size, src, &err); if (err != DB_SUCCESS) { - return err; + goto func_exit; } - if (decrypted) { - updated = true; - } else { - if (!page_compressed - && !block->page.zip.data) { - block->frame = src; - frame_changed = true; - } else { - ut_ad(dst != src); - memcpy(dst, src, size); - } + if (!decrypted) { + goto not_encrypted; } + + updated = true; } /* If the original page is page_compressed, we need to decompress it before adjusting further. */ if (page_compressed) { - fil_decompress_page(NULL, dst, ulong(size), - NULL); + ulint compress_length = fil_page_decompress( + page_compress_buf, dst); + ut_ad(compress_length != srv_page_size); + if (compress_length == 0) { + goto page_corrupted; + } updated = true; } else if (buf_page_is_corrupted( false, encrypted && !frame_changed ? dst : src, callback.get_zip_size(), NULL)) { -page_corrupted: - ib_logf(IB_LOG_LEVEL_WARN, - "%s: Page %lu at offset " - UINT64PF " looks corrupted.", - callback.filename(), - ulong(offset / size), offset); - return DB_CORRUPTION; + goto page_corrupted; } - if ((err = callback(page_off, block)) != DB_SUCCESS) { - return err; + if ((err = callback(block)) != DB_SUCCESS) { + goto func_exit; } else if (!updated) { updated = buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE; @@ -3561,19 +3580,17 @@ page_corrupted: src = io_buffer + (i * size); if (page_compressed) { - ulint len = 0; - - fil_compress_page( - NULL, - src, - NULL, - size, - 0,/* FIXME: compression level */ - 512,/* FIXME: use proper block size */ - encrypted, - &len); - updated = true; + if (fil_page_compress( + src, + page_compress_buf, + 0,/* FIXME: compression level */ + 512,/* FIXME: proper block size */ + encrypted)) { + /* FIXME: remove memcpy() */ + memcpy(src, page_compress_buf, + srv_page_size); + } } /* If tablespace is encrypted, encrypt page before we @@ -3583,19 +3600,15 @@ page_corrupted: buffer pool is not being used at all! */ if (decrypted && encrypted) { byte *dest = writeptr + (i * size); - ulint space = mach_read_from_4( - src + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID); - ulint offset = mach_read_from_4(src + FIL_PAGE_OFFSET); - ib_uint64_t lsn = mach_read_from_8(src + FIL_PAGE_LSN); byte* tmp = fil_encrypt_buf( - iter.crypt_data, - space, - offset, - lsn, - src, - iter.page_size == UNIV_PAGE_SIZE ? 0 : iter.page_size, - dest); + iter.crypt_data, + callback.get_space_id(), + block->page.offset, + mach_read_from_8(src + FIL_PAGE_LSN), + src, + callback.get_zip_size(), + dest); if (tmp == src) { /* TODO: remove unnecessary memcpy's */ @@ -3614,11 +3627,14 @@ page_corrupted: offset, (ulint) n_bytes)) { ib_logf(IB_LOG_LEVEL_ERROR, "os_file_write() failed"); - return DB_IO_ERROR; + err = DB_IO_ERROR; + goto func_exit; } } - return DB_SUCCESS; +func_exit: + ut_free(page_compress_buf); + return err; } /********************************************************************//** diff --git a/storage/xtradb/row/row0upd.cc b/storage/xtradb/row/row0upd.cc index 1b9f87e233f..93ccc07c9af 100644 --- a/storage/xtradb/row/row0upd.cc +++ b/storage/xtradb/row/row0upd.cc @@ -1806,6 +1806,23 @@ row_upd_store_row( } } +#ifdef WITH_WSREP +/** Determine if a FOREIGN KEY constraint needs to be processed. +@param[in] node query node +@param[in] trx transaction +@return whether the node cannot be ignored */ + +inline bool wsrep_must_process_fk(const upd_node_t* node, const trx_t* trx) +{ + if (!wsrep_on_trx(trx)) { + return false; + } + return que_node_get_type(node->common.parent) != QUE_NODE_UPDATE + || static_cast<upd_node_t*>(node->common.parent)->cascade_node + != node; +} +#endif /* WITH_WSREP */ + /***********************************************************//** Updates a secondary index entry of a row. @return DB_SUCCESS if operation successfully completed, else error @@ -1836,7 +1853,7 @@ row_upd_sec_index_entry( referenced = row_upd_index_is_referenced(index, trx); #ifdef WITH_WSREP - ibool foreign = wsrep_row_upd_index_is_foreign(index, trx); + bool foreign = wsrep_row_upd_index_is_foreign(index, trx); #endif /* WITH_WSREP */ heap = mem_heap_create(1024); @@ -1968,61 +1985,61 @@ row_upd_sec_index_entry( row_ins_sec_index_entry() below */ if (!rec_get_deleted_flag( rec, dict_table_is_comp(index->table))) { -#ifdef WITH_WSREP - que_node_t *parent = que_node_get_parent(node); -#endif /* WITH_WSREP */ err = btr_cur_del_mark_set_sec_rec( 0, btr_cur, TRUE, thr, &mtr); - if (err == DB_SUCCESS && referenced) { - - ulint* offsets; - - offsets = rec_get_offsets( - rec, index, NULL, ULINT_UNDEFINED, - &heap); - - /* NOTE that the following call loses - the position of pcur ! */ - err = row_upd_check_references_constraints( - node, &pcur, index->table, - index, offsets, thr, &mtr); + if (err != DB_SUCCESS) { + break; } + #ifdef WITH_WSREP - if (err == DB_SUCCESS && !referenced && - wsrep_on_trx(trx) && - !wsrep_thd_is_BF(trx->mysql_thd, FALSE) && - !(parent && que_node_get_type(parent) == - QUE_NODE_UPDATE && - ((upd_node_t*)parent)->cascade_node == node) && - foreign - ) { - ulint* offsets = - rec_get_offsets( + if (!referenced && foreign + && wsrep_must_process_fk(node, trx) + && !wsrep_thd_is_BF(trx->mysql_thd, FALSE)) { + ulint* offsets = rec_get_offsets( rec, index, NULL, ULINT_UNDEFINED, &heap); + err = wsrep_row_upd_check_foreign_constraints( node, &pcur, index->table, index, offsets, thr, &mtr); + switch (err) { case DB_SUCCESS: case DB_NO_REFERENCED_ROW: err = DB_SUCCESS; break; case DB_DEADLOCK: - if (wsrep_debug) fprintf (stderr, - "WSREP: sec index FK check fail for deadlock"); + if (wsrep_debug) { + ib_logf(IB_LOG_LEVEL_WARN, + "WSREP: sec index FK check fail for deadlock: " + " index %s table %s", index->name, index->table->name); + } break; default: - fprintf (stderr, - "WSREP: referenced FK check fail: %d", - (int)err); + ib_logf(IB_LOG_LEVEL_ERROR, + "WSREP: referenced FK check fail: %s index %s table %s", + ut_strerr(err), index->name, index->table->name); break; } } #endif /* WITH_WSREP */ } - break; + + if (referenced) { + + ulint* offsets; + + offsets = rec_get_offsets( + rec, index, NULL, ULINT_UNDEFINED, + &heap); + + /* NOTE that the following call loses + the position of pcur ! */ + err = row_upd_check_references_constraints( + node, &pcur, index->table, + index, offsets, thr, &mtr); + } } btr_pcur_close(&pcur); @@ -2191,9 +2208,6 @@ row_upd_clust_rec_by_insert( rec_t* rec; ulint* offsets = NULL; -#ifdef WITH_WSREP - que_node_t *parent = que_node_get_parent(node); -#endif /* WITH_WSREP */ ut_ad(node); ut_ad(dict_index_is_clust(index)); @@ -2278,35 +2292,31 @@ err_exit: if (err != DB_SUCCESS) { goto err_exit; } - } #ifdef WITH_WSREP - if (!referenced && wsrep_on_trx(trx) && - !(parent && que_node_get_type(parent) == QUE_NODE_UPDATE && - ((upd_node_t*)parent)->cascade_node == node) && - foreign - ) { + } else if ((foreign && wsrep_must_process_fk(node, trx))) { err = wsrep_row_upd_check_foreign_constraints( node, pcur, table, index, offsets, thr, mtr); + switch (err) { case DB_SUCCESS: case DB_NO_REFERENCED_ROW: err = DB_SUCCESS; break; case DB_DEADLOCK: - if (wsrep_debug) fprintf (stderr, - "WSREP: insert FK check fail for deadlock"); + if (wsrep_debug) { + ib_logf(IB_LOG_LEVEL_WARN, + "WSREP: sec index FK check fail for deadlock: " + " index %s table %s", index->name, index->table->name); + } break; default: - fprintf (stderr, - "WSREP: referenced FK check fail: %d", - (int)err); + ib_logf(IB_LOG_LEVEL_ERROR, + "WSREP: referenced FK check fail: %s index %s table %s", + ut_strerr(err), index->name, index->table->name); break; } - if (err != DB_SUCCESS) { - goto err_exit; - } - } #endif /* WITH_WSREP */ + } } mtr_commit(mtr); @@ -2512,7 +2522,7 @@ row_upd_del_mark_clust_rec( dberr_t err; #ifdef WITH_WSREP rec_t* rec; - que_node_t *parent = que_node_get_parent(node); + trx_t* trx = thr_get_trx(thr) ; #endif /* WITH_WSREP */ ut_ad(node); @@ -2541,38 +2551,37 @@ row_upd_del_mark_clust_rec( btr_cur_get_block(btr_cur), btr_cur_get_rec(btr_cur), #endif /* WITH_WSREP */ index, offsets, thr, mtr); - if (err == DB_SUCCESS && referenced) { + if (err != DB_SUCCESS) { + } else if (referenced) { /* NOTE that the following call loses the position of pcur ! */ err = row_upd_check_references_constraints( node, pcur, index->table, index, offsets, thr, mtr); - } #ifdef WITH_WSREP - trx_t* trx = thr_get_trx(thr) ; - if (err == DB_SUCCESS && !referenced && wsrep_on_trx(trx) && - !(parent && que_node_get_type(parent) == QUE_NODE_UPDATE && - ((upd_node_t*)parent)->cascade_node == node) && - foreign - ) { + } else if (foreign && wsrep_must_process_fk(node, trx)) { err = wsrep_row_upd_check_foreign_constraints( node, pcur, index->table, index, offsets, thr, mtr); + switch (err) { case DB_SUCCESS: case DB_NO_REFERENCED_ROW: err = DB_SUCCESS; break; case DB_DEADLOCK: - if (wsrep_debug) fprintf (stderr, - "WSREP: clust rec FK check fail for deadlock"); + if (wsrep_debug) { + ib_logf(IB_LOG_LEVEL_WARN, + "WSREP: sec index FK check fail for deadlock: " + " index %s table %s", index->name, index->table->name); + } break; default: - fprintf (stderr, - "WSREP: clust rec referenced FK check fail: %d", - (int)err); + ib_logf(IB_LOG_LEVEL_ERROR, + "WSREP: referenced FK check fail: %s index %s table %s", + ut_strerr(err), index->name, index->table->name); break; } - } #endif /* WITH_WSREP */ + } mtr_commit(mtr); |