diff options
author | Monty <monty@mariadb.org> | 2020-10-15 02:25:57 +0300 |
---|---|---|
committer | Sergei Golubchik <serg@mariadb.org> | 2021-05-19 22:54:12 +0200 |
commit | 47010ccffa8db1b88883314932e1a0f33ec32bc0 (patch) | |
tree | 4536c39f1bc71c82cf78ed2804caa3b2e348e222 /sql | |
parent | 55c771b4f3e1c8a42f018730f39a26278abcf817 (diff) | |
download | mariadb-git-47010ccffa8db1b88883314932e1a0f33ec32bc0.tar.gz |
MDEV-23842 Atomic RENAME TABLE
- Major rewrite of ddl_log.cc and ddl_log.h
- ddl_log.cc described in the beginning how the recovery works.
- ddl_log.log has unique signature and is dynamic. It's easy to
add more information to the header and other ddl blocks while still
being able to execute old ddl entries.
- IO_SIZE for ddl blocks is now dynamic. Can be changed without affecting
recovery of old logs.
- Code is more modular and is now usable outside of partition handling.
- Renamed log file to dll_recovery.log and added option --log-ddl-recovery
to allow one to specify the path & filename.
- Added ddl_log_entry_phase[], number of phases for each DDL action,
which allowed me to greatly simply set_global_from_ddl_log_entry()
- Changed how strings are stored in log entries, which allows us to
store much more information in a log entry.
- ddl log is now always created at start and deleted on normal shutdown.
This simplices things notable.
- Added probes debug_crash_here() and debug_simulate_error() to simply
crash testing and allow crash after a given number of times a probe
is executed. See comments in debug_sync.cc and rename_table.test for
how this can be used.
- Reverting failed table and view renames is done trough the ddl log.
This ensures that the ddl log is tested also outside of recovery.
- Added helper function 'handler::needs_lower_case_filenames()'
- Extend binary log with Q_XID events. ddl log handling is using this
to check if a ddl log entry was logged to the binary log (if yes,
it will be deleted from the log during ddl_log_close_binlogged_events()
- If a DDL entry fails 3 time, disable it. This is to ensure that if
we have a crash in ddl recovery code the server will not get stuck
in a forever crash-restart-crash loop.
mysqltest.cc changes:
- --die will now replace $variables with their values
- $error will contain the error of the last failed statement
storage engine changes:
- maria_rename() was changed to be more robust against crashes during
rename.
Diffstat (limited to 'sql')
-rw-r--r-- | sql/ddl_log.cc | 1752 | ||||
-rw-r--r-- | sql/ddl_log.h | 156 | ||||
-rw-r--r-- | sql/debug_sync.cc | 71 | ||||
-rw-r--r-- | sql/debug_sync.h | 8 | ||||
-rw-r--r-- | sql/handler.cc | 2 | ||||
-rw-r--r-- | sql/handler.h | 11 | ||||
-rw-r--r-- | sql/log.cc | 66 | ||||
-rw-r--r-- | sql/log_event.cc | 10 | ||||
-rw-r--r-- | sql/log_event.h | 3 | ||||
-rw-r--r-- | sql/log_event_client.cc | 5 | ||||
-rw-r--r-- | sql/log_event_server.cc | 9 | ||||
-rw-r--r-- | sql/mysqld.cc | 15 | ||||
-rw-r--r-- | sql/mysqld.h | 2 | ||||
-rw-r--r-- | sql/parse_file.cc | 11 | ||||
-rw-r--r-- | sql/sql_class.cc | 1 | ||||
-rw-r--r-- | sql/sql_class.h | 5 | ||||
-rw-r--r-- | sql/sql_partition.cc | 54 | ||||
-rw-r--r-- | sql/sql_partition_admin.cc | 17 | ||||
-rw-r--r-- | sql/sql_rename.cc | 356 | ||||
-rw-r--r-- | sql/sql_table.cc | 77 | ||||
-rw-r--r-- | sql/sql_table.h | 4 | ||||
-rw-r--r-- | sql/sql_trigger.cc | 4 | ||||
-rw-r--r-- | sql/sql_view.cc | 28 | ||||
-rw-r--r-- | sql/sql_view.h | 6 |
24 files changed, 1888 insertions, 785 deletions
diff --git a/sql/ddl_log.cc b/sql/ddl_log.cc index 43e065e44ab..1da6b5e52d5 100644 --- a/sql/ddl_log.cc +++ b/sql/ddl_log.cc @@ -1,6 +1,6 @@ /* Copyright (c) 2000, 2019, Oracle and/or its affiliates. - Copyright (c) 2010, 2020, MariaDB + Copyright (c) 2010, 2021, MariaDB This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -22,178 +22,333 @@ #include "log.h" // sql_print_error() #include "ddl_log.h" #include "ha_partition.h" // PAR_EXT +#include "sql_table.h" // build_table_filename +#include "sql_statistics.h" // rename_table_in_stats_tables +#include "sql_view.h" // mysql_rename_view() +#include "strfunc.h" // strconvert +#include <mysys_err.h> // EE_LINK /*-------------------------------------------------------------------------- - MODULE: DDL log - ----------------- + MODULE: DDL log + ----------------- + + This module is used to ensure that we can recover from crashes that + occur in the middle of a meta-data operation in MySQL. E.g. DROP + TABLE t1, t2; We need to ensure that both t1 and t2 are dropped and + not only t1 and also that each table drop is entirely done and not + "half-baked". + + To support this we create log entries for each meta-data statement + in the ddl log while we are executing. These entries are dropped + when the operation is completed. + + At recovery those entries that were not completed will be executed. + + There is only one ddl log in the system and it is protected by a mutex + and there is a global struct that contains information about its current + state. + + DDL recovery after a crash works the following way: + + - ddl_log_initialize() initializes the global global_ddl_log variable + and opens the binary log if it exists. If it doesn't exists a new one + is created. + - ddl_log_close_binlogged_events() loops over all log events and checks if + their xid (stored in the EXECUTE_CODE event) is in the binary log. If xid + exists in the binary log the entry is marked as finished in the ddl log. + - After a new binary log is created and is open for new entries, + ddl_log_execute_recovery() is executed on remaining open events: + - Loop over all events + - For each entry with DDL_LOG_ENTRY_CODE execute the remaining phases + in ddl_log_execute_entry_no_lock() + + The ddl_log.log file is created at startup and deleted when server goes down. + After the final recovery phase is done, the file is truncated. + + History: + First version written in 2006 by Mikael Ronstrom + Second version in 2020 by Monty +--------------------------------------------------------------------------*/ - This module is used to ensure that we can recover from crashes that occur - in the middle of a meta-data operation in MySQL. E.g. DROP TABLE t1, t2; - We need to ensure that both t1 and t2 are dropped and not only t1 and - also that each table drop is entirely done and not "half-baked". +#define DDL_LOG_MAGIC_LENGTH 4 +/* How many times to try to execute a ddl log entry that causes crashes */ +#define DDL_LOG_MAX_RETRY 3 - To support this we create log entries for each meta-data statement in the - ddl log while we are executing. These entries are dropped when the - operation is completed. +uchar ddl_log_file_magic[]= +{ (uchar) 254, (uchar) 254, (uchar) 11, (uchar) 2 }; - At recovery those entries that were not completed will be executed. +/* Action names for ddl_log_action_code */ - There is only one ddl log in the system and it is protected by a mutex - and there is a global struct that contains information about its current - state. +const char *ddl_log_action_name[DDL_LOG_LAST_ACTION]= +{ + "Unknown", "partitioning delete", "partitioning rename", + "partitioning replace", "partitioning exchange", + "rename table", "rename view" +}; + +/* Number of phases per entry */ +const uchar ddl_log_entry_phases[DDL_LOG_LAST_ACTION]= +{ + 1, 1, 2, 3, 4, 1 +}; - History: - First version written in 2006 by Mikael Ronstrom - Second version written in 2020 by Monty ---------------------------------------------------------------------------*/ struct st_global_ddl_log { - /* - We need to adjust buffer size to be able to handle downgrades/upgrades - where IO_SIZE has changed. We'll set the buffer size such that we can - handle that the buffer size was upto 4 times bigger in the version - that wrote the DDL log. - */ - char file_entry_buf[4*IO_SIZE]; - char file_name_str[FN_REFLEN]; - char *file_name; + uchar *file_entry_buf; DDL_LOG_MEMORY_ENTRY *first_free; DDL_LOG_MEMORY_ENTRY *first_used; - uint num_entries; File file_id; - uint name_len; + uint num_entries; + uint name_pos; uint io_size; - bool inited; - bool do_release; - bool recovery_phase; - st_global_ddl_log() : inited(false), do_release(false) {} + bool initialized; + bool open; }; st_global_ddl_log global_ddl_log; mysql_mutex_t LOCK_gdl; +/* Positions to different data in a ddl log block */ #define DDL_LOG_ENTRY_TYPE_POS 0 +/* + Note that ACTION_TYPE and PHASE_POS must be after each other. + See update_phase() +*/ #define DDL_LOG_ACTION_TYPE_POS 1 #define DDL_LOG_PHASE_POS 2 #define DDL_LOG_NEXT_ENTRY_POS 4 -#define DDL_LOG_NAME_POS 8 +/* Flags to remember something unique about the query, like if .frm was used */ +#define DDL_LOG_FLAG_POS 8 +/* Used to store XID entry that was written to binary log */ +#define DDL_LOG_XID_POS 10 +/* Used to store unique uuid from the .frm file */ +#define DDL_LOG_UUID_POS 18 +/* ID_POS can be used to store something unique, like file size (4 bytes) */ +#define DDL_LOG_ID_POS DDL_LOG_UUID_POS + MY_UUID_SIZE +#define DDL_LOG_END_POS DDL_LOG_ID_POS + 8 + +/* + Position to where names are stored in the ddl log blocks. The current + value is stored in the header and can thus be changed if we need more + space for constants in the header than what is between DDL_LOG_ID_POS and + DDL_LOG_TMP_NAME_POS. +*/ +#define DDL_LOG_TMP_NAME_POS 56 + +/* Definitions for the ddl log header, the first block in the file */ +/* IO_SIZE is stored in the header and can thus be changed */ +#define DDL_LOG_IO_SIZE IO_SIZE + +/* Header is stored in positions 0-3 */ +#define DDL_LOG_IO_SIZE_POS 4 +#define DDL_LOG_NAME_OFFSET_POS 6 +/* Sum of the above variables */ +#define DDL_LOG_HEADER_SIZE 4+2+2 + +/** + Sync the ddl log file. + + @return Operation status + @retval FALSE Success + @retval TRUE Error +*/ + +static bool ddl_log_sync_file() +{ + DBUG_ENTER("ddl_log_sync_file"); + DBUG_RETURN(mysql_file_sync(global_ddl_log.file_id, MYF(MY_WME))); +} + +/* Same as above, but ensure we have the LOCK_gdl locked */ + +static bool ddl_log_sync_no_lock() +{ + DBUG_ENTER("ddl_log_sync_no_lock"); + + mysql_mutex_assert_owner(&LOCK_gdl); + DBUG_RETURN(ddl_log_sync_file()); +} + + +/** + Create ddl log file name. + @param file_name Filename setup +*/ + +static inline void create_ddl_log_file_name(char *file_name) +{ + fn_format(file_name, opt_ddl_recovery_file, mysql_data_home, ".log", 0); +} + + +/** + Write ddl log header. + + @return Operation status + @retval TRUE Error + @retval FALSE Success +*/ + +static bool write_ddl_log_header() +{ + uchar header[DDL_LOG_HEADER_SIZE]; + DBUG_ENTER("write_ddl_log_header"); + + memcpy(&header, ddl_log_file_magic, DDL_LOG_MAGIC_LENGTH); + int2store(&header[DDL_LOG_IO_SIZE_POS], global_ddl_log.io_size); + int2store(&header[DDL_LOG_NAME_OFFSET_POS], global_ddl_log.name_pos); + + if (mysql_file_pwrite(global_ddl_log.file_id, + header, sizeof(header), 0, + MYF(MY_WME | MY_NABP))) + DBUG_RETURN(TRUE); + DBUG_RETURN(ddl_log_sync_file()); +} -#define DDL_LOG_NUM_ENTRY_POS 0 -#define DDL_LOG_NAME_LEN_POS 4 -#define DDL_LOG_IO_SIZE_POS 8 /** Read one entry from ddl log file. - @param entry_no Entry number to read + @param entry_pos Entry number to read @return Operation status @retval true Error @retval false Success */ -static bool read_ddl_log_file_entry(uint entry_no) +static bool read_ddl_log_file_entry(uint entry_pos) { - bool error= FALSE; - File file_id= global_ddl_log.file_id; - uchar *file_entry_buf= (uchar*)global_ddl_log.file_entry_buf; + uchar *file_entry_buf= global_ddl_log.file_entry_buf; size_t io_size= global_ddl_log.io_size; DBUG_ENTER("read_ddl_log_file_entry"); mysql_mutex_assert_owner(&LOCK_gdl); - if (mysql_file_pread(file_id, file_entry_buf, io_size, io_size * entry_no, - MYF(MY_WME)) != io_size) - error= TRUE; - DBUG_RETURN(error); + DBUG_RETURN (mysql_file_pread(global_ddl_log.file_id, + file_entry_buf, io_size, + io_size * entry_pos, + MYF(MY_WME | MY_NABP))); } /** Write one entry to ddl log file. - @param entry_no Entry number to write + @param entry_pos Entry number to write - @return Operation status + @return @retval true Error @retval false Success */ -static bool write_ddl_log_file_entry(uint entry_no) +static bool write_ddl_log_file_entry(uint entry_pos) { bool error= FALSE; File file_id= global_ddl_log.file_id; - uchar *file_entry_buf= (uchar*)global_ddl_log.file_entry_buf; + uchar *file_entry_buf= global_ddl_log.file_entry_buf; DBUG_ENTER("write_ddl_log_file_entry"); - mysql_mutex_assert_owner(&LOCK_gdl); - if (mysql_file_pwrite(file_id, file_entry_buf, - IO_SIZE, IO_SIZE * entry_no, MYF(MY_WME)) != IO_SIZE) - error= TRUE; + mysql_mutex_assert_owner(&LOCK_gdl); // To be removed + DBUG_RETURN(mysql_file_pwrite(file_id, file_entry_buf, + global_ddl_log.io_size, + global_ddl_log.io_size * entry_pos, + MYF(MY_WME | MY_NABP))); DBUG_RETURN(error); } /** - Sync the ddl log file. + Update phase of ddl log entry - @return Operation status - @retval FALSE Success - @retval TRUE Error + @param entry_pos ddl_log entry to update + @param phase New phase + + @return + @retval 0 ok + Â @retval 1 Write error. Error given + + This is done without locks as it's guaranteed to be atomic */ +static bool update_phase(uint entry_pos, uchar phase) +{ + DBUG_ENTER("update_phase"); -static bool ddl_log_sync_file() + DBUG_RETURN(mysql_file_pwrite(global_ddl_log.file_id, &phase, 1, + global_ddl_log.io_size * entry_pos + + DDL_LOG_PHASE_POS, + MYF(MY_WME | MY_NABP)) || + ddl_log_sync_file()); +} + + +static bool update_xid(uint entry_pos, ulonglong xid) { - DBUG_ENTER("ddl_log_sync_file"); - DBUG_RETURN(mysql_file_sync(global_ddl_log.file_id, MYF(MY_WME))); + uchar buff[8]; + DBUG_ENTER("update_xid"); + + int8store(buff, xid); + DBUG_RETURN(mysql_file_pwrite(global_ddl_log.file_id, buff, 8, + global_ddl_log.io_size * entry_pos + + DDL_LOG_XID_POS, + MYF(MY_WME | MY_NABP)) || + ddl_log_sync_file()); } -/** - Write ddl log header. +static bool update_unique_id(uint entry_pos, ulonglong id) +{ + uchar buff[8]; + DBUG_ENTER("update_unique_xid"); + + int8store(buff, id); + DBUG_RETURN(mysql_file_pwrite(global_ddl_log.file_id, buff, sizeof(buff), + global_ddl_log.io_size * entry_pos + + DDL_LOG_ID_POS, + MYF(MY_WME | MY_NABP)) || + ddl_log_sync_file()); +} - @return Operation status - @retval TRUE Error - @retval FALSE Success + +/* + Disable an execute entry + + @param entry_pos ddl_log entry to update + + Notes: + We don't need sync here as this is mainly done during + recover phase to mark already done entries. We instead sync all entries + at the same time. */ -static bool write_ddl_log_header() +static bool disable_execute_entry(uint entry_pos) { - uint16 const_var; - DBUG_ENTER("write_ddl_log_header"); - - int4store(&global_ddl_log.file_entry_buf[DDL_LOG_NUM_ENTRY_POS], - global_ddl_log.num_entries); - const_var= FN_REFLEN; - int4store(&global_ddl_log.file_entry_buf[DDL_LOG_NAME_LEN_POS], - (ulong) const_var); - const_var= IO_SIZE; - int4store(&global_ddl_log.file_entry_buf[DDL_LOG_IO_SIZE_POS], - (ulong) const_var); - if (write_ddl_log_file_entry(0UL)) - { - sql_print_error("Error writing ddl log header"); - DBUG_RETURN(TRUE); - } - DBUG_RETURN(ddl_log_sync_file()); + uchar buff[1]; + DBUG_ENTER("disable_execute_entry"); + + buff[0]= DDL_IGNORE_LOG_ENTRY_CODE; + DBUG_RETURN(mysql_file_pwrite(global_ddl_log.file_id, buff, sizeof(buff), + global_ddl_log.io_size * entry_pos + + DDL_LOG_ENTRY_TYPE_POS, + MYF(MY_WME | MY_NABP))); } - -/** - Create ddl log file name. - @param file_name Filename setup +/* + Disable an execute entry */ -static inline void create_ddl_log_file_name(char *file_name) +bool ddl_log_disable_execute_entry(DDL_LOG_MEMORY_ENTRY **active_entry) { - strxmov(file_name, mysql_data_home, "/", "ddl_log.log", NullS); + bool res= disable_execute_entry((*active_entry)->entry_pos); + ddl_log_sync_no_lock(); + return res; } + /** Read header of ddl log file. @@ -201,50 +356,121 @@ static inline void create_ddl_log_file_name(char *file_name) of names in the ddl log and we also get information about the number of entries in the ddl log. - @return Last entry in ddl log (0 if no entries) + This is read only once at server startup, so no mutex is needed. + + @return Last entry in ddl log (0 if no entries). + @return -1 if log could not be opened or could not be read */ -static uint read_ddl_log_header() +static int read_ddl_log_header(const char *file_name) { - uchar *file_entry_buf= (uchar*)global_ddl_log.file_entry_buf; - char file_name[FN_REFLEN]; - uint entry_no; - bool successful_open= FALSE; + uchar header[DDL_LOG_HEADER_SIZE]; + int max_entry; + int file_id; + uint io_size; DBUG_ENTER("read_ddl_log_header"); - mysql_mutex_init(key_LOCK_gdl, &LOCK_gdl, MY_MUTEX_INIT_SLOW); - mysql_mutex_lock(&LOCK_gdl); - create_ddl_log_file_name(file_name); - if ((global_ddl_log.file_id= mysql_file_open(key_file_global_ddl_log, + if ((file_id= mysql_file_open(key_file_global_ddl_log, file_name, - O_RDWR | O_BINARY, MYF(0))) >= 0) + O_RDWR | O_BINARY, MYF(0))) < 0) + DBUG_RETURN(-1); + + if (mysql_file_read(file_id, + header, sizeof(header), MYF(MY_WME | MY_NABP))) { - if (read_ddl_log_file_entry(0UL)) - { - /* Write message into error log */ - sql_print_error("Failed to read ddl log file in recovery"); - } - else - successful_open= TRUE; + /* Write message into error log */ + sql_print_error("DDL_LOG: Failed to read ddl log file '%s' during " + "recovery", file_name); + goto err; } - if (successful_open) + + if (memcmp(header, ddl_log_file_magic, 4)) { - entry_no= uint4korr(&file_entry_buf[DDL_LOG_NUM_ENTRY_POS]); - global_ddl_log.name_len= uint4korr(&file_entry_buf[DDL_LOG_NAME_LEN_POS]); - global_ddl_log.io_size= uint4korr(&file_entry_buf[DDL_LOG_IO_SIZE_POS]); - DBUG_ASSERT(global_ddl_log.io_size <= - sizeof(global_ddl_log.file_entry_buf)); + /* Probably upgrade from MySQL 10.5 or earlier */ + sql_print_warning("DDL_LOG: Wrong header in %s. Assuming it is an old " + "recovery file from MariaDB 10.5 or earlier. " + "Skipping DDL recovery", file_name); + goto err; } - else + + io_size= uint2korr(&header[DDL_LOG_IO_SIZE_POS]); + global_ddl_log.name_pos= uint2korr(&header[DDL_LOG_NAME_OFFSET_POS]); + + max_entry= (uint) (mysql_file_seek(file_id, 0L, MY_SEEK_END, MYF(0)) / + io_size); + if (max_entry) + max_entry--; // Don't count first block + + if (!(global_ddl_log.file_entry_buf= (uchar*) + my_malloc(key_memory_DDL_LOG_MEMORY_ENTRY, io_size, + MYF(MY_WME | MY_ZEROFILL)))) + goto err; + + global_ddl_log.open= TRUE; + global_ddl_log.file_id= file_id; + global_ddl_log.num_entries= max_entry; + global_ddl_log.io_size= io_size; + DBUG_RETURN(max_entry); + +err: + if (file_id >= 0) + my_close(file_id, MYF(0)); + /* We return -1 to force the ddl log to be re-created */ + DBUG_RETURN(-1); +} + + +/* + Store and read strings in ddl log buffers + + Format is: + 2 byte: length (not counting end \0) + X byte: string value of length 'length' + 1 byte: \0 +*/ + +static uchar *store_string(uchar *pos, uchar *end, const LEX_CSTRING *str) +{ + uint32 length= (uint32) str->length; + if (unlikely(pos + 2 + length + 1 > end)) { - entry_no= 0; + DBUG_ASSERT(0); + return end; // Overflow } - global_ddl_log.first_free= NULL; - global_ddl_log.first_used= NULL; - global_ddl_log.num_entries= 0; - global_ddl_log.do_release= true; - mysql_mutex_unlock(&LOCK_gdl); - DBUG_RETURN(entry_no); + + int2store(pos, length); + if (likely(length)) + memcpy(pos+2, str->str, length); + pos[2+length]= 0; // Store end \0 + return pos + 2 + length +1; +} + + +static LEX_CSTRING get_string(uchar **pos, const uchar *end) +{ + LEX_CSTRING tmp; + uint32 length; + if (likely(*pos + 3 <= end)) + { + length= uint2korr(*pos); + if (likely(*pos + 2 + length + 1 <= end)) + { + char *str= (char*) *pos+2; + *pos= *pos + 2 + length + 1; + tmp.str= str; + tmp.length= length; + return tmp; + } + } + /* + Overflow on read, should never happen + Set *pos to end to ensure any future calls also returns empty string + */ + DBUG_ASSERT(0); + *pos= (uchar*) end; + tmp.str= ""; + tmp.length= 0; + return tmp; } @@ -256,38 +482,32 @@ static uint read_ddl_log_header() static void set_global_from_ddl_log_entry(const DDL_LOG_ENTRY *ddl_log_entry) { + uchar *file_entry_buf= global_ddl_log.file_entry_buf, *pos, *end; + mysql_mutex_assert_owner(&LOCK_gdl); - global_ddl_log.file_entry_buf[DDL_LOG_ENTRY_TYPE_POS]= - (char)DDL_LOG_ENTRY_CODE; - global_ddl_log.file_entry_buf[DDL_LOG_ACTION_TYPE_POS]= - (char)ddl_log_entry->action_type; - global_ddl_log.file_entry_buf[DDL_LOG_PHASE_POS]= 0; - int4store(&global_ddl_log.file_entry_buf[DDL_LOG_NEXT_ENTRY_POS], - ddl_log_entry->next_entry); - DBUG_ASSERT(strlen(ddl_log_entry->name) < FN_REFLEN); - strmake(&global_ddl_log.file_entry_buf[DDL_LOG_NAME_POS], - ddl_log_entry->name, FN_REFLEN - 1); - if (ddl_log_entry->action_type == DDL_LOG_RENAME_ACTION || - ddl_log_entry->action_type == DDL_LOG_REPLACE_ACTION || - ddl_log_entry->action_type == DDL_LOG_EXCHANGE_ACTION) - { - DBUG_ASSERT(strlen(ddl_log_entry->from_name) < FN_REFLEN); - strmake(&global_ddl_log.file_entry_buf[DDL_LOG_NAME_POS + FN_REFLEN], - ddl_log_entry->from_name, FN_REFLEN - 1); - } - else - global_ddl_log.file_entry_buf[DDL_LOG_NAME_POS + FN_REFLEN]= 0; - DBUG_ASSERT(strlen(ddl_log_entry->handler_name) < FN_REFLEN); - strmake(&global_ddl_log.file_entry_buf[DDL_LOG_NAME_POS + (2*FN_REFLEN)], - ddl_log_entry->handler_name, FN_REFLEN - 1); - if (ddl_log_entry->action_type == DDL_LOG_EXCHANGE_ACTION) - { - DBUG_ASSERT(strlen(ddl_log_entry->tmp_name) < FN_REFLEN); - strmake(&global_ddl_log.file_entry_buf[DDL_LOG_NAME_POS + (3*FN_REFLEN)], - ddl_log_entry->tmp_name, FN_REFLEN - 1); - } - else - global_ddl_log.file_entry_buf[DDL_LOG_NAME_POS + (3*FN_REFLEN)]= 0; + + file_entry_buf[DDL_LOG_ENTRY_TYPE_POS]= (uchar) ddl_log_entry->entry_type; + file_entry_buf[DDL_LOG_ACTION_TYPE_POS]= (uchar) ddl_log_entry->action_type; + file_entry_buf[DDL_LOG_PHASE_POS]= (uchar) ddl_log_entry->phase; + int4store(file_entry_buf+DDL_LOG_NEXT_ENTRY_POS, ddl_log_entry->next_entry); + int2store(file_entry_buf+DDL_LOG_FLAG_POS, ddl_log_entry->flags); + int8store(file_entry_buf+DDL_LOG_XID_POS, ddl_log_entry->xid); + memcpy(file_entry_buf+DDL_LOG_UUID_POS, ddl_log_entry->uuid, MY_UUID_SIZE); + int8store(file_entry_buf+DDL_LOG_ID_POS, ddl_log_entry->unique_id); + bzero(file_entry_buf+DDL_LOG_END_POS, + global_ddl_log.name_pos - DDL_LOG_END_POS); + + pos= file_entry_buf + global_ddl_log.name_pos; + end= file_entry_buf + global_ddl_log.io_size; + + pos= store_string(pos, end, &ddl_log_entry->handler_name); + pos= store_string(pos, end, &ddl_log_entry->db); + pos= store_string(pos, end, &ddl_log_entry->name); + pos= store_string(pos, end, &ddl_log_entry->from_handler_name); + pos= store_string(pos, end, &ddl_log_entry->from_db); + pos= store_string(pos, end, &ddl_log_entry->from_name); + pos= store_string(pos, end, &ddl_log_entry->tmp_name); + bzero(pos, global_ddl_log.io_size - (pos - file_entry_buf)); } @@ -303,30 +523,31 @@ static void set_global_from_ddl_log_entry(const DDL_LOG_ENTRY *ddl_log_entry) static void set_ddl_log_entry_from_global(DDL_LOG_ENTRY *ddl_log_entry, const uint read_entry) { - char *file_entry_buf= (char*) global_ddl_log.file_entry_buf; - uint inx; + uchar *file_entry_buf= global_ddl_log.file_entry_buf, *pos; + const uchar *end= file_entry_buf + global_ddl_log.io_size; uchar single_char; mysql_mutex_assert_owner(&LOCK_gdl); ddl_log_entry->entry_pos= read_entry; single_char= file_entry_buf[DDL_LOG_ENTRY_TYPE_POS]; - ddl_log_entry->entry_type= (enum ddl_log_entry_code)single_char; + ddl_log_entry->entry_type= (enum ddl_log_entry_code) single_char; single_char= file_entry_buf[DDL_LOG_ACTION_TYPE_POS]; - ddl_log_entry->action_type= (enum ddl_log_action_code)single_char; + ddl_log_entry->action_type= (enum ddl_log_action_code) single_char; ddl_log_entry->phase= file_entry_buf[DDL_LOG_PHASE_POS]; ddl_log_entry->next_entry= uint4korr(&file_entry_buf[DDL_LOG_NEXT_ENTRY_POS]); - ddl_log_entry->name= &file_entry_buf[DDL_LOG_NAME_POS]; - inx= DDL_LOG_NAME_POS + global_ddl_log.name_len; - ddl_log_entry->from_name= &file_entry_buf[inx]; - inx+= global_ddl_log.name_len; - ddl_log_entry->handler_name= &file_entry_buf[inx]; - if (ddl_log_entry->action_type == DDL_LOG_EXCHANGE_ACTION) - { - inx+= global_ddl_log.name_len; - ddl_log_entry->tmp_name= &file_entry_buf[inx]; - } - else - ddl_log_entry->tmp_name= NULL; + ddl_log_entry->flags= uint2korr(file_entry_buf + DDL_LOG_FLAG_POS); + ddl_log_entry->xid= uint8korr(file_entry_buf + DDL_LOG_XID_POS); + ddl_log_entry->unique_id= uint8korr(file_entry_buf + DDL_LOG_ID_POS); + memcpy(ddl_log_entry->uuid, file_entry_buf+ DDL_LOG_UUID_POS, MY_UUID_SIZE); + + pos= file_entry_buf + global_ddl_log.name_pos; + ddl_log_entry->handler_name= get_string(&pos, end); + ddl_log_entry->db= get_string(&pos, end); + ddl_log_entry->name= get_string(&pos, end); + ddl_log_entry->from_handler_name= get_string(&pos, end); + ddl_log_entry->from_db= get_string(&pos, end); + ddl_log_entry->from_name= get_string(&pos, end); + ddl_log_entry->tmp_name= get_string(&pos, end); } @@ -349,6 +570,7 @@ static bool read_ddl_log_entry(uint read_entry, DDL_LOG_ENTRY *ddl_log_entry) if (read_ddl_log_file_entry(read_entry)) { + sql_print_error("DDL_LOG: Failed to read entry %u", read_entry); DBUG_RETURN(TRUE); } set_ddl_log_entry_from_global(ddl_log_entry, read_entry); @@ -357,68 +579,93 @@ static bool read_ddl_log_entry(uint read_entry, DDL_LOG_ENTRY *ddl_log_entry) /** - Initialise ddl log. - - Write the header of the ddl log file and length of names. Also set - number of entries to zero. + Create the ddl log file @return Operation status @retval TRUE Error @retval FALSE Success */ -static bool init_ddl_log() +static bool create_ddl_log() { char file_name[FN_REFLEN]; - DBUG_ENTER("init_ddl_log"); + DBUG_ENTER("create_ddl_log"); - if (global_ddl_log.inited) - goto end; + global_ddl_log.open= 0; + global_ddl_log.num_entries= 0; + global_ddl_log.name_pos= DDL_LOG_TMP_NAME_POS; - global_ddl_log.io_size= IO_SIZE; - global_ddl_log.name_len= FN_REFLEN; + /* + Fix file_entry_buf if the old log had a different io_size or if open of old + log didn't succeed. + */ + if (global_ddl_log.io_size != DDL_LOG_IO_SIZE) + { + uchar *ptr= (uchar*) + my_realloc(key_memory_DDL_LOG_MEMORY_ENTRY, + global_ddl_log.file_entry_buf, IO_SIZE, + MYF(MY_WME | MY_ALLOW_ZERO_PTR)); + if (ptr) // Resize succeded */ + { + global_ddl_log.file_entry_buf= ptr; + global_ddl_log.io_size= IO_SIZE; + } + if (!global_ddl_log.file_entry_buf) + DBUG_RETURN(TRUE); + } + DBUG_ASSERT(global_ddl_log.file_entry_buf); + bzero(global_ddl_log.file_entry_buf, global_ddl_log.io_size); create_ddl_log_file_name(file_name); - if ((global_ddl_log.file_id= mysql_file_create(key_file_global_ddl_log, - file_name, CREATE_MODE, - O_RDWR | O_TRUNC | O_BINARY, - MYF(MY_WME))) < 0) + if ((global_ddl_log.file_id= + mysql_file_create(key_file_global_ddl_log, + file_name, CREATE_MODE, + O_RDWR | O_TRUNC | O_BINARY, + MYF(MY_WME | ME_ERROR_LOG))) < 0) { /* Couldn't create ddl log file, this is serious error */ - sql_print_error("Failed to open ddl log file"); + sql_print_error("DDL_LOG: Failed to create ddl log file: %s", file_name); + my_free(global_ddl_log.file_entry_buf); + global_ddl_log.file_entry_buf= 0; DBUG_RETURN(TRUE); } - global_ddl_log.inited= TRUE; if (write_ddl_log_header()) { (void) mysql_file_close(global_ddl_log.file_id, MYF(MY_WME)); - global_ddl_log.inited= FALSE; + my_free(global_ddl_log.file_entry_buf); + global_ddl_log.file_entry_buf= 0; DBUG_RETURN(TRUE); } - -end: + global_ddl_log.open= TRUE; DBUG_RETURN(FALSE); } /** - Sync ddl log file. - - @return Operation status - @retval TRUE Error - @retval FALSE Success + Open ddl log and initialise ddl log variables */ -static bool ddl_log_sync_no_lock() +bool ddl_log_initialize() { - DBUG_ENTER("ddl_log_sync_no_lock"); + int num_entries; + char file_name[FN_REFLEN]; + DBUG_ENTER("ddl_log_initialize"); - mysql_mutex_assert_owner(&LOCK_gdl); - if ((!global_ddl_log.recovery_phase) && - init_ddl_log()) + bzero(&global_ddl_log, sizeof(global_ddl_log)); + global_ddl_log.file_id= (File) -1; + global_ddl_log.initialized= 1; + + mysql_mutex_init(key_LOCK_gdl, &LOCK_gdl, MY_MUTEX_INIT_SLOW); + + create_ddl_log_file_name(file_name); + if (likely((num_entries= read_ddl_log_header(file_name)) < 0)) { - DBUG_RETURN(TRUE); + /* Fatal error, log not opened. Recreate it */ + if (create_ddl_log()) + DBUG_RETURN(1); } - DBUG_RETURN(ddl_log_sync_file()); + else + global_ddl_log.num_entries= (uint) num_entries; + DBUG_RETURN(0); } @@ -442,66 +689,123 @@ static bool ddl_log_sync_no_lock() action. Thus the first phase will drop y and the second phase will rename x -> y. - @param entry_no Entry position of record to change + @param entry_pos Entry position of record to change @return Operation status @retval TRUE Error @retval FALSE Success */ -static bool ddl_log_increment_phase_no_lock(uint entry_no) +static bool ddl_log_increment_phase_no_lock(uint entry_pos) { - uchar *file_entry_buf= (uchar*)global_ddl_log.file_entry_buf; + uchar *file_entry_buf= global_ddl_log.file_entry_buf; DBUG_ENTER("ddl_log_increment_phase_no_lock"); mysql_mutex_assert_owner(&LOCK_gdl); - if (!read_ddl_log_file_entry(entry_no)) + if (!read_ddl_log_file_entry(entry_pos)) { - if (file_entry_buf[DDL_LOG_ENTRY_TYPE_POS] == DDL_LOG_ENTRY_CODE) + ddl_log_entry_code code= ((ddl_log_entry_code) + file_entry_buf[DDL_LOG_ENTRY_TYPE_POS]); + ddl_log_action_code action= ((ddl_log_action_code) + file_entry_buf[DDL_LOG_ACTION_TYPE_POS]); + + if (code == DDL_LOG_ENTRY_CODE && action < (uint) DDL_LOG_LAST_ACTION) { /* - Log entry, if complete mark it done (IGNORE). - Otherwise increase the phase by one. + Log entry: + Increase the phase by one. If complete mark it done (IGNORE). */ - if (file_entry_buf[DDL_LOG_ACTION_TYPE_POS] == DDL_LOG_DELETE_ACTION || - file_entry_buf[DDL_LOG_ACTION_TYPE_POS] == DDL_LOG_RENAME_ACTION || - (file_entry_buf[DDL_LOG_ACTION_TYPE_POS] == DDL_LOG_REPLACE_ACTION && - file_entry_buf[DDL_LOG_PHASE_POS] == 1) || - (file_entry_buf[DDL_LOG_ACTION_TYPE_POS] == DDL_LOG_EXCHANGE_ACTION && - file_entry_buf[DDL_LOG_PHASE_POS] >= EXCH_PHASE_TEMP_TO_FROM)) - file_entry_buf[DDL_LOG_ENTRY_TYPE_POS]= DDL_IGNORE_LOG_ENTRY_CODE; - else if (file_entry_buf[DDL_LOG_ACTION_TYPE_POS] == DDL_LOG_REPLACE_ACTION) + char phase= file_entry_buf[DDL_LOG_PHASE_POS]+ 1; + if (ddl_log_entry_phases[action] <= phase) { - DBUG_ASSERT(file_entry_buf[DDL_LOG_PHASE_POS] == 0); - file_entry_buf[DDL_LOG_PHASE_POS]= 1; + DBUG_ASSERT(phase == ddl_log_entry_phases[action]); + /* Same effect as setting DDL_IGNORE_LOG_ENTRY_CODE */ + phase= DDL_LOG_FINAL_PHASE; } - else if (file_entry_buf[DDL_LOG_ACTION_TYPE_POS] == DDL_LOG_EXCHANGE_ACTION) - { - DBUG_ASSERT(file_entry_buf[DDL_LOG_PHASE_POS] <= - EXCH_PHASE_FROM_TO_NAME); - file_entry_buf[DDL_LOG_PHASE_POS]++; - } - else - { - DBUG_ASSERT(0); - } - if (write_ddl_log_file_entry(entry_no)) - { - sql_print_error("Error in deactivating log entry. Position = %u", - entry_no); + file_entry_buf[DDL_LOG_PHASE_POS]= phase; + if (update_phase(entry_pos, phase)) DBUG_RETURN(TRUE); - } + } + else + { + /* + Trying to deativate an execute entry or already deactive entry. + This should not happen + */ + DBUG_ASSERT(0); } } else { - sql_print_error("Failed in reading entry before deactivating it"); + sql_print_error("DDL_LOG: Failed in reading entry before updating it"); DBUG_RETURN(TRUE); } DBUG_RETURN(FALSE); } +/* + Ignore errors from the file system about: + - Non existing tables or file (from drop table or delete file) + - Error about tables files that already exists. +*/ + +class ddl_log_error_handler : public Internal_error_handler +{ +public: + int handled_errors; + int unhandled_errors; + + ddl_log_error_handler() : handled_errors(0), unhandled_errors(0) + {} + + bool handle_condition(THD *thd, + uint sql_errno, + const char* sqlstate, + Sql_condition::enum_warning_level *level, + const char* msg, + Sql_condition ** cond_hdl) + { + *cond_hdl= NULL; + if (non_existing_table_error(sql_errno) || sql_errno == EE_LINK) + { + handled_errors++; + return TRUE; + } + + if (*level == Sql_condition::WARN_LEVEL_ERROR) + unhandled_errors++; + return FALSE; + } + + bool safely_trapped_errors() + { + return (handled_errors > 0 && unhandled_errors == 0); + } +}; + + +/* + Build a filename for a table, trigger file or .frm + Delete also any temporary file suffixed with ~ +*/ + +static void build_filename_and_delete_tmp_file(char *path, size_t path_length, + const LEX_CSTRING *db, + const LEX_CSTRING *name, + const char *ext, + PSI_file_key psi_key) +{ + uint length= build_table_filename(path, path_length-1, + db->str, name->str, ext, 0); + + path[length]= '~'; + path[length+1]= 0; + (void) mysql_file_delete(psi_key, path, MYF(0)); + path[length]= 0; +} + + /** Execute one action in a ddl log entry @@ -512,180 +816,329 @@ static bool ddl_log_increment_phase_no_lock(uint entry_no) @retval FALSE Success */ -static int execute_ddl_log_action(THD *thd, DDL_LOG_ENTRY *ddl_log_entry) +static int ddl_log_execute_action(THD *thd, MEM_ROOT *mem_root, + DDL_LOG_ENTRY *ddl_log_entry) { - bool frm_action= FALSE; LEX_CSTRING handler_name; handler *file= NULL; - MEM_ROOT mem_root; - int error= 1; - char to_path[FN_REFLEN]; - char from_path[FN_REFLEN]; - handlerton *hton; - DBUG_ENTER("execute_ddl_log_action"); + char to_path[FN_REFLEN+1], from_path[FN_REFLEN+1]; + handlerton *hton= 0; + ddl_log_error_handler no_such_table_handler; + uint entry_pos= ddl_log_entry->entry_pos; + int error; + bool frm_action= FALSE; + DBUG_ENTER("ddl_log_execute_action"); mysql_mutex_assert_owner(&LOCK_gdl); - if (ddl_log_entry->entry_type == DDL_IGNORE_LOG_ENTRY_CODE) - { - DBUG_RETURN(FALSE); - } DBUG_PRINT("ddl_log", - ("execute type %c next %u name '%s' from_name '%s' handler '%s'" - " tmp_name '%s'", - ddl_log_entry->action_type, - ddl_log_entry->next_entry, - ddl_log_entry->name, - ddl_log_entry->from_name, - ddl_log_entry->handler_name, - ddl_log_entry->tmp_name)); - handler_name.str= (char*)ddl_log_entry->handler_name; - handler_name.length= strlen(ddl_log_entry->handler_name); - init_sql_alloc(key_memory_gdl, &mem_root, TABLE_ALLOC_BLOCK_SIZE, 0, - MYF(MY_THREAD_SPECIFIC)); - if (!strcmp(ddl_log_entry->handler_name, reg_ext)) + ("entry type: %u action type: %u phase: %u next: %u " + "handler: '%s' name: '%s' from_name: '%s' tmp_name: '%s'", + (uint) ddl_log_entry->entry_type, + (uint) ddl_log_entry->action_type, + (uint) ddl_log_entry->phase, + ddl_log_entry->next_entry, + ddl_log_entry->handler_name.str, + ddl_log_entry->name.str, + ddl_log_entry->from_name.str, + ddl_log_entry->tmp_name.str)); + + if (ddl_log_entry->entry_type == DDL_IGNORE_LOG_ENTRY_CODE || + ddl_log_entry->phase == DDL_LOG_FINAL_PHASE) + DBUG_RETURN(FALSE); + + handler_name= ddl_log_entry->handler_name; + thd->push_internal_handler(&no_such_table_handler); + + if (!strcmp(ddl_log_entry->handler_name.str, reg_ext)) frm_action= TRUE; - else + else if (ddl_log_entry->handler_name.length) { - plugin_ref plugin= ha_resolve_by_name(thd, &handler_name, false); + plugin_ref plugin= my_plugin_lock_by_name(thd, &handler_name, + MYSQL_STORAGE_ENGINE_PLUGIN); if (!plugin) { my_error(ER_UNKNOWN_STORAGE_ENGINE, MYF(0), ddl_log_entry->handler_name); - goto error; + goto end; } - hton= plugin_data(plugin, handlerton*); - file= get_new_handler((TABLE_SHARE*)0, &mem_root, hton); + hton= plugin_hton(plugin); + file= get_new_handler((TABLE_SHARE*)0, mem_root, hton); if (unlikely(!file)) - goto error; + goto end; } - switch (ddl_log_entry->action_type) + + switch (ddl_log_entry->action_type) { + case DDL_LOG_REPLACE_ACTION: + case DDL_LOG_DELETE_ACTION: { - case DDL_LOG_REPLACE_ACTION: - case DDL_LOG_DELETE_ACTION: + if (ddl_log_entry->phase == 0) { - if (ddl_log_entry->phase == 0) + if (frm_action) { - if (frm_action) - { - strxmov(to_path, ddl_log_entry->name, reg_ext, NullS); - if (unlikely((error= mysql_file_delete(key_file_frm, to_path, - MYF(MY_WME | - MY_IGNORE_ENOENT))))) - break; + strxmov(to_path, ddl_log_entry->name.str, reg_ext, NullS); + if (unlikely((error= mysql_file_delete(key_file_frm, to_path, + MYF(MY_WME | + MY_IGNORE_ENOENT))))) + break; #ifdef WITH_PARTITION_STORAGE_ENGINE - strxmov(to_path, ddl_log_entry->name, PAR_EXT, NullS); - (void) mysql_file_delete(key_file_partition_ddl_log, to_path, - MYF(0)); + strxmov(to_path, ddl_log_entry->name.str, PAR_EXT, NullS); + (void) mysql_file_delete(key_file_partition_ddl_log, to_path, + MYF(0)); #endif - } - else + } + else + { + if (unlikely((error= hton->drop_table(hton, ddl_log_entry->name.str)))) { - if (unlikely((error= hton->drop_table(hton, ddl_log_entry->name)))) - { - if (!non_existing_table_error(error)) - break; - } + if (!non_existing_table_error(error)) + break; } - if ((ddl_log_increment_phase_no_lock(ddl_log_entry->entry_pos))) - break; - (void) ddl_log_sync_no_lock(); - error= 0; - if (ddl_log_entry->action_type == DDL_LOG_DELETE_ACTION) - break; } - DBUG_ASSERT(ddl_log_entry->action_type == DDL_LOG_REPLACE_ACTION); - /* - Fall through and perform the rename action of the replace - action. We have already indicated the success of the delete - action in the log entry by stepping up the phase. - */ + if (ddl_log_increment_phase_no_lock(entry_pos)) + break; + (void) ddl_log_sync_no_lock(); + error= 0; + if (ddl_log_entry->action_type == DDL_LOG_DELETE_ACTION) + break; } - /* fall through */ - case DDL_LOG_RENAME_ACTION: + } + DBUG_ASSERT(ddl_log_entry->action_type == DDL_LOG_REPLACE_ACTION); + /* + Fall through and perform the rename action of the replace + action. We have already indicated the success of the delete + action in the log entry by stepping up the phase. + */ + /* fall through */ + case DDL_LOG_RENAME_ACTION: + { + error= TRUE; + if (frm_action) { - error= TRUE; - if (frm_action) - { - strxmov(to_path, ddl_log_entry->name, reg_ext, NullS); - strxmov(from_path, ddl_log_entry->from_name, reg_ext, NullS); - if (mysql_file_rename(key_file_frm, from_path, to_path, MYF(MY_WME))) - break; + strxmov(to_path, ddl_log_entry->name.str, reg_ext, NullS); + strxmov(from_path, ddl_log_entry->from_name.str, reg_ext, NullS); + (void) mysql_file_rename(key_file_frm, from_path, to_path, MYF(MY_WME)); #ifdef WITH_PARTITION_STORAGE_ENGINE - strxmov(to_path, ddl_log_entry->name, PAR_EXT, NullS); - strxmov(from_path, ddl_log_entry->from_name, PAR_EXT, NullS); - (void) mysql_file_rename(key_file_partition_ddl_log, from_path, to_path, MYF(MY_WME)); + strxmov(to_path, ddl_log_entry->name.str, PAR_EXT, NullS); + strxmov(from_path, ddl_log_entry->from_name.str, PAR_EXT, NullS); + (void) mysql_file_rename(key_file_partition_ddl_log, from_path, to_path, + MYF(MY_WME)); #endif + } + else + (void) file->ha_rename_table(ddl_log_entry->from_name.str, + ddl_log_entry->name.str); + if (ddl_log_increment_phase_no_lock(entry_pos)) + break; + (void) ddl_log_sync_no_lock(); + break; + } + case DDL_LOG_EXCHANGE_ACTION: + { + /* We hold LOCK_gdl, so we can alter global_ddl_log.file_entry_buf */ + uchar *file_entry_buf= global_ddl_log.file_entry_buf; + /* not yet implemented for frm */ + DBUG_ASSERT(!frm_action); + /* + Using a case-switch here to revert all currently done phases, + since it will fall through until the first phase is undone. + */ + switch (ddl_log_entry->phase) { + case EXCH_PHASE_TEMP_TO_FROM: + /* tmp_name -> from_name possibly done */ + (void) file->ha_rename_table(ddl_log_entry->from_name.str, + ddl_log_entry->tmp_name.str); + /* decrease the phase and sync */ + file_entry_buf[DDL_LOG_PHASE_POS]--; + if (write_ddl_log_file_entry(entry_pos)) + break; + (void) ddl_log_sync_no_lock(); + /* fall through */ + case EXCH_PHASE_FROM_TO_NAME: + /* from_name -> name possibly done */ + (void) file->ha_rename_table(ddl_log_entry->name.str, + ddl_log_entry->from_name.str); + /* decrease the phase and sync */ + file_entry_buf[DDL_LOG_PHASE_POS]--; + if (write_ddl_log_file_entry(entry_pos)) + break; + (void) ddl_log_sync_no_lock(); + /* fall through */ + case EXCH_PHASE_NAME_TO_TEMP: + /* name -> tmp_name possibly done */ + (void) file->ha_rename_table(ddl_log_entry->tmp_name.str, + ddl_log_entry->name.str); + /* disable the entry and sync */ + file_entry_buf[DDL_LOG_ENTRY_TYPE_POS]= DDL_IGNORE_LOG_ENTRY_CODE; + (void) write_ddl_log_file_entry(entry_pos); + (void) ddl_log_sync_no_lock(); + break; + } + break; + } + case DDL_LOG_RENAME_TABLE_ACTION: + { + /* + We should restore things by renaming from + 'entry->name' to 'entry->from_name' + + In the following code 'to_' stands for what the table was renamed to + that we have to rename back. + */ + size_t fr_length, to_length; + LEX_CSTRING from_table, to_table, to_converted_name; + from_table= ddl_log_entry->from_name; + to_table= ddl_log_entry->name; + + /* Some functions wants to have the lower case table name as an argument */ + if (lower_case_table_names) + { + uint errors; + to_converted_name.str= to_path; + to_converted_name.length= + strconvert(system_charset_info, to_table.str, to_table.length, + files_charset_info, from_path, FN_REFLEN, &errors); + } + else + to_converted_name= to_table; + + switch (ddl_log_entry->phase) { + case DDL_RENAME_PHASE_TRIGGER: + { + MDL_request mdl_request; + + build_filename_and_delete_tmp_file(to_path, sizeof(to_path), + &ddl_log_entry->db, + &ddl_log_entry->name, + TRG_EXT, + key_file_trg); + build_filename_and_delete_tmp_file(from_path, sizeof(from_path), + &ddl_log_entry->from_db, + &ddl_log_entry->from_name, + TRG_EXT, key_file_trg); + + if (!access(from_path, F_OK)) + { + /* + The original file was never renamed or we crashed in recovery + just after renaming back the file. + In this case the current file is correct and we can remove any + left over copied files + */ + (void) mysql_file_delete(key_file_trg, to_path, MYF(0)); } - else + else if (!access(to_path, F_OK)) { - if (file->ha_rename_table(ddl_log_entry->from_name, - ddl_log_entry->name)) - break; + /* .TRG file was renamed. Rename it back */ + /* + We have to create a MDL lock as change_table_names() checks that we + have a mdl locks for the table + */ + MDL_REQUEST_INIT(&mdl_request, MDL_key::TABLE, + ddl_log_entry->db.str, + to_converted_name.str, + MDL_EXCLUSIVE, MDL_EXPLICIT); + error= thd->mdl_context.acquire_lock(&mdl_request, 1); + /* acquire_locks() should never fail during recovery */ + DBUG_ASSERT(error == 0); + + (void) Table_triggers_list::change_table_name(thd, + &ddl_log_entry->db, + &to_table, + &to_converted_name, + &ddl_log_entry->from_db, + &from_table); + + thd->mdl_context.release_lock(mdl_request.ticket); } - if ((ddl_log_increment_phase_no_lock(ddl_log_entry->entry_pos))) + if (ddl_log_increment_phase_no_lock(entry_pos)) break; (void) ddl_log_sync_no_lock(); - error= FALSE; - break; } - case DDL_LOG_EXCHANGE_ACTION: + /* fall through */ + case DDL_RENAME_PHASE_STAT: { - /* We hold LOCK_gdl, so we can alter global_ddl_log.file_entry_buf */ - char *file_entry_buf= (char*)&global_ddl_log.file_entry_buf; - /* not yet implemented for frm */ - DBUG_ASSERT(!frm_action); - /* - Using a case-switch here to revert all currently done phases, - since it will fall through until the first phase is undone. - */ - switch (ddl_log_entry->phase) { - case EXCH_PHASE_TEMP_TO_FROM: - /* tmp_name -> from_name possibly done */ - (void) file->ha_rename_table(ddl_log_entry->from_name, - ddl_log_entry->tmp_name); - /* decrease the phase and sync */ - file_entry_buf[DDL_LOG_PHASE_POS]--; - if (write_ddl_log_file_entry(ddl_log_entry->entry_pos)) - break; - if (ddl_log_sync_no_lock()) - break; - /* fall through */ - case EXCH_PHASE_FROM_TO_NAME: - /* from_name -> name possibly done */ - (void) file->ha_rename_table(ddl_log_entry->name, - ddl_log_entry->from_name); - /* decrease the phase and sync */ - file_entry_buf[DDL_LOG_PHASE_POS]--; - if (write_ddl_log_file_entry(ddl_log_entry->entry_pos)) - break; - if (ddl_log_sync_no_lock()) - break; - /* fall through */ - case EXCH_PHASE_NAME_TO_TEMP: - /* name -> tmp_name possibly done */ - (void) file->ha_rename_table(ddl_log_entry->tmp_name, - ddl_log_entry->name); - /* disable the entry and sync */ - file_entry_buf[DDL_LOG_ENTRY_TYPE_POS]= DDL_IGNORE_LOG_ENTRY_CODE; - if (write_ddl_log_file_entry(ddl_log_entry->entry_pos)) - break; - if (ddl_log_sync_no_lock()) - break; - error= FALSE; - break; - default: - DBUG_ASSERT(0); - break; + (void) rename_table_in_stat_tables(thd, + &ddl_log_entry->db, + &to_converted_name, + &ddl_log_entry->from_db, + &from_table); + if (ddl_log_increment_phase_no_lock(entry_pos)) + break; + (void) ddl_log_sync_no_lock(); + } + /* fall through */ + case DDL_RENAME_PHASE_TABLE: + /* Restore frm and table to original names */ + to_length= build_table_filename(to_path, sizeof(to_path) - 1, + ddl_log_entry->db.str, + ddl_log_entry->name.str, + reg_ext, 0); + fr_length= build_table_filename(from_path, sizeof(from_path) - 1, + ddl_log_entry->from_db.str, + ddl_log_entry->from_name.str, + reg_ext, 0); + (void) mysql_file_rename(key_file_frm, to_path, from_path, MYF(MY_WME)); + + if (file->needs_lower_case_filenames()) + { + build_lower_case_table_filename(to_path, sizeof(to_path) - 1, + &ddl_log_entry->db, + &to_table, 0); + build_lower_case_table_filename(from_path, sizeof(from_path) - 1, + &ddl_log_entry->from_db, + &from_table, 0); + } + else + { + /* remove extension from file name */ + DBUG_ASSERT(to_length != 0 && fr_length != 0); + to_path[to_length - reg_ext_length]= 0; + from_path[fr_length - reg_ext_length]= 0; } + file->ha_rename_table(to_path, from_path); + /* disable the entry and sync */ + (void) update_phase(entry_pos, DDL_LOG_FINAL_PHASE); break; - } default: DBUG_ASSERT(0); break; + } + break; } + case DDL_LOG_RENAME_VIEW_ACTION: + { + LEX_CSTRING from_table, to_table; + from_table= ddl_log_entry->from_name; + to_table= ddl_log_entry->name; + + /* Delete any left over .frm~ files */ + build_filename_and_delete_tmp_file(to_path, sizeof(to_path) - 1, + &ddl_log_entry->db, + &ddl_log_entry->name, + reg_ext, + key_file_fileparser); + build_filename_and_delete_tmp_file(from_path, sizeof(from_path) - 1, + &ddl_log_entry->from_db, + &ddl_log_entry->from_name, + reg_ext, key_file_fileparser); + + /* Rename view back if the original rename did succeed */ + if (!access(to_path, F_OK)) + (void) mysql_rename_view(thd, + &ddl_log_entry->from_db, &from_table, + &ddl_log_entry->db, &to_table); + (void) update_phase(entry_pos, DDL_LOG_FINAL_PHASE); + } + break; + default: + DBUG_ASSERT(0); + break; + } + +end: delete file; -error: - free_root(&mem_root, MYF(0)); + error= no_such_table_handler.unhandled_errors > 0; + thd->pop_internal_handler(); DBUG_RETURN(error); } @@ -694,36 +1147,37 @@ error: Get a free entry in the ddl log @param[out] active_entry A ddl log memory entry returned + @param[out] write_header Set to 1 if ddl log was enlarged @return Operation status @retval TRUE Error @retval FALSE Success */ -static bool get_free_ddl_log_entry(DDL_LOG_MEMORY_ENTRY **active_entry, - bool *write_header) +static bool ddl_log_get_free_entry(DDL_LOG_MEMORY_ENTRY **active_entry) { DDL_LOG_MEMORY_ENTRY *used_entry; DDL_LOG_MEMORY_ENTRY *first_used= global_ddl_log.first_used; - DBUG_ENTER("get_free_ddl_log_entry"); + DBUG_ENTER("ddl_log_get_free_entry"); if (global_ddl_log.first_free == NULL) { - if (!(used_entry= (DDL_LOG_MEMORY_ENTRY*)my_malloc(key_memory_DDL_LOG_MEMORY_ENTRY, - sizeof(DDL_LOG_MEMORY_ENTRY), MYF(MY_WME)))) + if (!(used_entry= ((DDL_LOG_MEMORY_ENTRY*) + my_malloc(key_memory_DDL_LOG_MEMORY_ENTRY, + sizeof(DDL_LOG_MEMORY_ENTRY), MYF(MY_WME))))) { - sql_print_error("Failed to allocate memory for ddl log free list"); + sql_print_error("DDL_LOG: Failed to allocate memory for ddl log free " + "list"); + *active_entry= 0; DBUG_RETURN(TRUE); } global_ddl_log.num_entries++; used_entry->entry_pos= global_ddl_log.num_entries; - *write_header= TRUE; } else { used_entry= global_ddl_log.first_free; global_ddl_log.first_free= used_entry->next_log_entry; - *write_header= FALSE; } /* Move from free list to used list @@ -741,6 +1195,31 @@ static bool get_free_ddl_log_entry(DDL_LOG_MEMORY_ENTRY **active_entry, /** + Release a log memory entry. + @param log_memory_entry Log memory entry to release +*/ + +void ddl_log_release_memory_entry(DDL_LOG_MEMORY_ENTRY *log_entry) +{ + DDL_LOG_MEMORY_ENTRY *next_log_entry= log_entry->next_log_entry; + DDL_LOG_MEMORY_ENTRY *prev_log_entry= log_entry->prev_log_entry; + DBUG_ENTER("ddl_log_release_memory_entry"); + + mysql_mutex_assert_owner(&LOCK_gdl); + log_entry->next_log_entry= global_ddl_log.first_free; + global_ddl_log.first_free= log_entry; + + if (prev_log_entry) + prev_log_entry->next_log_entry= next_log_entry; + else + global_ddl_log.first_used= next_log_entry; + if (next_log_entry) + next_log_entry->prev_log_entry= prev_log_entry; + DBUG_VOID_RETURN; +} + + +/** Execute one entry in the ddl log. Executing an entry means executing a linked list of actions. @@ -756,30 +1235,39 @@ static bool ddl_log_execute_entry_no_lock(THD *thd, uint first_entry) { DDL_LOG_ENTRY ddl_log_entry; uint read_entry= first_entry; + MEM_ROOT mem_root; DBUG_ENTER("ddl_log_execute_entry_no_lock"); mysql_mutex_assert_owner(&LOCK_gdl); + init_sql_alloc(key_memory_gdl, &mem_root, TABLE_ALLOC_BLOCK_SIZE, 0, + MYF(MY_THREAD_SPECIFIC)); do { if (read_ddl_log_entry(read_entry, &ddl_log_entry)) { - /* Write to error log and continue with next log entry */ - sql_print_error("Failed to read entry = %u from ddl log", - read_entry); + /* Error logged to error log. Continue with next log entry */ break; } DBUG_ASSERT(ddl_log_entry.entry_type == DDL_LOG_ENTRY_CODE || ddl_log_entry.entry_type == DDL_IGNORE_LOG_ENTRY_CODE); - if (execute_ddl_log_action(thd, &ddl_log_entry)) + if (ddl_log_execute_action(thd, &mem_root, &ddl_log_entry)) { + uint action_type= ddl_log_entry.action_type; + if (action_type >= DDL_LOG_LAST_ACTION) + action_type= 0; + /* Write to error log and continue with next log entry */ - sql_print_error("Failed to execute action for entry = %u from ddl log", - read_entry); + sql_print_error("DDL_LOG: Got error %d when trying to execute action " + "for entry %u of type '%s'", + (int) my_errno, read_entry, + ddl_log_action_name[action_type]); break; } read_entry= ddl_log_entry.next_entry; } while (read_entry); + + free_root(&mem_root, MYF(0)); DBUG_RETURN(FALSE); } @@ -806,154 +1294,130 @@ static bool ddl_log_execute_entry_no_lock(THD *thd, uint first_entry) bool ddl_log_write_entry(DDL_LOG_ENTRY *ddl_log_entry, DDL_LOG_MEMORY_ENTRY **active_entry) { - bool error, write_header; + bool error; + uchar *pos, *end; DBUG_ENTER("ddl_log_write_entry"); mysql_mutex_assert_owner(&LOCK_gdl); - if (init_ddl_log()) - { + if (!global_ddl_log.open) DBUG_RETURN(TRUE); - } + + ddl_log_entry->entry_type= DDL_LOG_ENTRY_CODE; set_global_from_ddl_log_entry(ddl_log_entry); - if (get_free_ddl_log_entry(active_entry, &write_header)) - { + if (ddl_log_get_free_entry(active_entry)) DBUG_RETURN(TRUE); - } + error= FALSE; + pos= global_ddl_log.file_entry_buf + global_ddl_log.name_pos; + end= global_ddl_log.file_entry_buf + global_ddl_log.io_size; DBUG_PRINT("ddl_log", - ("write type %c next %u name '%s' from_name '%s' handler '%s'" - " tmp_name '%s'", - (char) global_ddl_log.file_entry_buf[DDL_LOG_ACTION_TYPE_POS], - ddl_log_entry->next_entry, - (char*) &global_ddl_log.file_entry_buf[DDL_LOG_NAME_POS], - (char*) &global_ddl_log.file_entry_buf[DDL_LOG_NAME_POS - + FN_REFLEN], - (char*) &global_ddl_log.file_entry_buf[DDL_LOG_NAME_POS - + (2*FN_REFLEN)], - (char*) &global_ddl_log.file_entry_buf[DDL_LOG_NAME_POS - + (3*FN_REFLEN)])); + ("type: %c next: %u handler: %s " + "to_name: '%s.%s' from_name: '%s.%s' " + "tmp_name: '%s'", + (char) global_ddl_log.file_entry_buf[DDL_LOG_ACTION_TYPE_POS], + ddl_log_entry->next_entry, + get_string(&pos, end).str, // Handler + get_string(&pos, end).str, // to db.table + get_string(&pos, end).str, + get_string(&pos, end).str, // From db.table + get_string(&pos, end).str, + get_string(&pos, end).str)); // Tmp name + if (unlikely(write_ddl_log_file_entry((*active_entry)->entry_pos))) { + ddl_log_release_memory_entry(*active_entry); + *active_entry= 0; error= TRUE; - sql_print_error("Failed to write entry_no = %u", + sql_print_error("DDL_LOG: Failed to write entry %u", (*active_entry)->entry_pos); } - if (write_header && likely(!error)) - { - (void) ddl_log_sync_no_lock(); - if (write_ddl_log_header()) - error= TRUE; - } - if (unlikely(error)) - ddl_log_release_memory_entry(*active_entry); DBUG_RETURN(error); } /** - @brief Write final entry in the ddl log. + @brief Write or update execute entry in the ddl log. + + @details An execute entry points to the first entry that should + be excuted during recovery. In some cases it's only written once, + in other cases it's updated for each log entry to point to the new + header for the list. - @details This is the last write in the ddl log. The previous log entries - have already been written but not yet synched to disk. - We write a couple of log entries that describes action to perform. - This entries are set-up in a linked list, however only when a first - execute entry is put as the first entry these will be executed. - This routine writes this first. + When called, the previous log entries have already been written but not yet + synched to disk. We write a couple of log entries that describes + action to perform. This entries are set-up in a linked list, + however only when an execute entry is put as the first entry these will be + executed during recovery. @param first_entry First entry in linked list of entries - to execute, if 0 = NULL it means that - the entry is removed and the entries - are put into the free list. - @param complete Flag indicating we are simply writing - info about that entry has been completed + to execute. @param[in,out] active_entry Entry to execute, 0 = NULL if the entry is written first time and needs to be returned. In this case the entry written is returned in this parameter - @return Operation status @retval TRUE Error @retval FALSE Success */ bool ddl_log_write_execute_entry(uint first_entry, - bool complete, DDL_LOG_MEMORY_ENTRY **active_entry) { - bool write_header= FALSE; - char *file_entry_buf= (char*)global_ddl_log.file_entry_buf; + uchar *file_entry_buf= global_ddl_log.file_entry_buf; + bool got_free_entry= 0; DBUG_ENTER("ddl_log_write_execute_entry"); mysql_mutex_assert_owner(&LOCK_gdl); - if (init_ddl_log()) - { - DBUG_RETURN(TRUE); - } - if (!complete) - { - /* - We haven't synched the log entries yet, we synch them now before - writing the execute entry. If complete is true we haven't written - any log entries before, we are only here to write the execute - entry to indicate it is done. - */ - (void) ddl_log_sync_no_lock(); - file_entry_buf[DDL_LOG_ENTRY_TYPE_POS]= (char)DDL_LOG_EXECUTE_CODE; - } - else - file_entry_buf[DDL_LOG_ENTRY_TYPE_POS]= (char)DDL_IGNORE_LOG_ENTRY_CODE; - file_entry_buf[DDL_LOG_ACTION_TYPE_POS]= 0; /* Ignored for execute entries */ - file_entry_buf[DDL_LOG_PHASE_POS]= 0; - int4store(&file_entry_buf[DDL_LOG_NEXT_ENTRY_POS], first_entry); - file_entry_buf[DDL_LOG_NAME_POS]= 0; - file_entry_buf[DDL_LOG_NAME_POS + FN_REFLEN]= 0; - file_entry_buf[DDL_LOG_NAME_POS + 2*FN_REFLEN]= 0; + /* + We haven't synched the log entries yet, we sync them now before + writing the execute entry. + */ + (void) ddl_log_sync_no_lock(); + bzero(file_entry_buf, global_ddl_log.io_size); + + file_entry_buf[DDL_LOG_ENTRY_TYPE_POS]= (uchar)DDL_LOG_EXECUTE_CODE; + int4store(file_entry_buf + DDL_LOG_NEXT_ENTRY_POS, first_entry); + if (!(*active_entry)) { - if (get_free_ddl_log_entry(active_entry, &write_header)) - { + if (ddl_log_get_free_entry(active_entry)) DBUG_RETURN(TRUE); - } - write_header= TRUE; + got_free_entry= TRUE; } if (write_ddl_log_file_entry((*active_entry)->entry_pos)) { - sql_print_error("Error writing execute entry in ddl log"); - ddl_log_release_memory_entry(*active_entry); - DBUG_RETURN(TRUE); - } - (void) ddl_log_sync_no_lock(); - if (write_header) - { - if (write_ddl_log_header()) + if (got_free_entry) { ddl_log_release_memory_entry(*active_entry); - DBUG_RETURN(TRUE); + *active_entry= 0; } + sql_print_error("DDL_LOG: Error writing execute entry %u", + (*active_entry)->entry_pos); + DBUG_RETURN(TRUE); } + (void) ddl_log_sync_no_lock(); DBUG_RETURN(FALSE); } - /** - Deactivate an individual entry. + Increment phase for enty. Will deactivate entry after all phases are done @details see ddl_log_increment_phase_no_lock. - @param entry_no Entry position of record to change + @param entry_pos Entry position of record to change @return Operation status @retval TRUE Error @retval FALSE Success */ -bool ddl_log_increment_phase(uint entry_no) +bool ddl_log_increment_phase(uint entry_pos) { bool error; DBUG_ENTER("ddl_log_increment_phase"); mysql_mutex_lock(&LOCK_gdl); - error= ddl_log_increment_phase_no_lock(entry_no); + error= ddl_log_increment_phase_no_lock(entry_pos); mysql_mutex_unlock(&LOCK_gdl); DBUG_RETURN(error); } @@ -981,32 +1445,6 @@ bool ddl_log_sync() /** - Release a log memory entry. - @param log_memory_entry Log memory entry to release -*/ - -void ddl_log_release_memory_entry(DDL_LOG_MEMORY_ENTRY *log_entry) -{ - DDL_LOG_MEMORY_ENTRY *first_free= global_ddl_log.first_free; - DDL_LOG_MEMORY_ENTRY *next_log_entry= log_entry->next_log_entry; - DDL_LOG_MEMORY_ENTRY *prev_log_entry= log_entry->prev_log_entry; - DBUG_ENTER("ddl_log_release_memory_entry"); - - mysql_mutex_assert_owner(&LOCK_gdl); - global_ddl_log.first_free= log_entry; - log_entry->next_log_entry= first_free; - - if (prev_log_entry) - prev_log_entry->next_log_entry= next_log_entry; - else - global_ddl_log.first_used= next_log_entry; - if (next_log_entry) - next_log_entry->prev_log_entry= prev_log_entry; - DBUG_VOID_RETURN; -} - - -/** Execute one entry in the ddl log. Executing an entry means executing a linked list of actions. @@ -1042,89 +1480,161 @@ static void close_ddl_log() (void) mysql_file_close(global_ddl_log.file_id, MYF(MY_WME)); global_ddl_log.file_id= (File) -1; } + global_ddl_log.open= 0; DBUG_VOID_RETURN; } /** + Loop over ddl log excute entries and mark those that are already stored + in the binary log as completed + + @return + @retval 0 ok + @return 1 fail (write error) + +*/ + +bool ddl_log_close_binlogged_events(HASH *xids) +{ + uint i; + DDL_LOG_ENTRY ddl_log_entry; + DBUG_ENTER("ddl_log_close_binlogged_events"); + + if (global_ddl_log.num_entries == 0 || xids->records == 0) + DBUG_RETURN(0); + + mysql_mutex_lock(&LOCK_gdl); + for (i= 1; i <= global_ddl_log.num_entries; i++) + { + if (read_ddl_log_entry(i, &ddl_log_entry)) + break; // Read error. Stop reading + if (ddl_log_entry.entry_type == DDL_LOG_EXECUTE_CODE && + ddl_log_entry.xid != 0 && + my_hash_search(xids, (uchar*) &ddl_log_entry.xid, + sizeof(ddl_log_entry.xid))) + { + if (disable_execute_entry(i)) + { + mysql_mutex_unlock(&LOCK_gdl); + DBUG_RETURN(1); // Write error. Fatal! + } + } + } + (void) ddl_log_sync_no_lock(); + mysql_mutex_unlock(&LOCK_gdl); + DBUG_RETURN(0); +} + + +/** Execute the ddl log at recovery of MySQL Server. + + @return + @retval 0 Ok. + @retval > 0 Fatal error. We have to abort (can't create ddl log) + @return < -1 Recovery failed, but new log exists and is usable + */ -void ddl_log_execute_recovery() +int ddl_log_execute_recovery() { - uint num_entries, i; + uint i, count= 0; + int error= 0; THD *thd, *original_thd; DDL_LOG_ENTRY ddl_log_entry; - char file_name[FN_REFLEN]; static char recover_query_string[]= "INTERNAL DDL LOG RECOVER IN PROGRESS"; DBUG_ENTER("ddl_log_execute_recovery"); - /* - Initialise global_ddl_log struct - */ - bzero(global_ddl_log.file_entry_buf, sizeof(global_ddl_log.file_entry_buf)); - global_ddl_log.inited= FALSE; - global_ddl_log.recovery_phase= TRUE; - global_ddl_log.io_size= IO_SIZE; - global_ddl_log.file_id= (File) -1; + if (global_ddl_log.num_entries == 0) + DBUG_RETURN(0); /* To be able to run this from boot, we allocate a temporary THD */ if (!(thd=new THD(0))) - DBUG_VOID_RETURN; + { + DBUG_ASSERT(0); // Fatal error + DBUG_RETURN(1); + } original_thd= current_thd; // Probably NULL thd->thread_stack= (char*) &thd; thd->store_globals(); thd->init(); // Needed for error messages + thd->log_all_errors= (global_system_variables.log_warnings >= 3); + thd->set_query(recover_query_string, strlen(recover_query_string)); - /* this also initialize LOCK_gdl */ - num_entries= read_ddl_log_header(); mysql_mutex_lock(&LOCK_gdl); - for (i= 1; i < num_entries + 1; i++) + for (i= 1; i <= global_ddl_log.num_entries; i++) { if (read_ddl_log_entry(i, &ddl_log_entry)) { - sql_print_error("Failed to read entry no = %u from ddl log", - i); + error= -1; continue; } if (ddl_log_entry.entry_type == DDL_LOG_EXECUTE_CODE) { + /* purecov: begin tested */ + if (ddl_log_entry.unique_id > DDL_LOG_MAX_RETRY) + { + error= -1; + continue; + } + update_unique_id(i, ++ddl_log_entry.unique_id); + if (ddl_log_entry.unique_id > DDL_LOG_MAX_RETRY) + { + sql_print_error("DDL_LOG: Aborting executing entry %u after %llu " + "retries", i, ddl_log_entry.unique_id); + error= -1; + continue; + } + /* purecov: end tested */ if (ddl_log_execute_entry_no_lock(thd, ddl_log_entry.next_entry)) { - /* Real unpleasant scenario but we continue anyways. */ + /* Real unpleasant scenario but we have to continue anyway */ + error= -1; continue; } + count++; } } close_ddl_log(); - create_ddl_log_file_name(file_name); - (void) mysql_file_delete(key_file_global_ddl_log, file_name, MYF(0)); - global_ddl_log.recovery_phase= FALSE; mysql_mutex_unlock(&LOCK_gdl); thd->reset_query(); delete thd; + + /* + Create a new ddl_log to get rid of old stuff and ensure that header matches + the current source version + */ + if (create_ddl_log()) + error= 1; + if (count > 0) + sql_print_information("DDL_LOG: Crash recovery executed %u entries", + count); + set_current_thd(original_thd); - DBUG_VOID_RETURN; + DBUG_RETURN(error); } /** - Release all memory allocated to the ddl log. + Release all memory allocated to the ddl log and delete the ddl log */ void ddl_log_release() { + char file_name[FN_REFLEN]; DDL_LOG_MEMORY_ENTRY *free_list; DDL_LOG_MEMORY_ENTRY *used_list; DBUG_ENTER("ddl_log_release"); - if (!global_ddl_log.do_release) + if (!global_ddl_log.initialized) DBUG_VOID_RETURN; - mysql_mutex_lock(&LOCK_gdl); + global_ddl_log.initialized= 0; + free_list= global_ddl_log.first_free; used_list= global_ddl_log.first_used; while (used_list) @@ -1139,20 +1649,218 @@ void ddl_log_release() my_free(free_list); free_list= tmp; } + my_free(global_ddl_log.file_entry_buf); + global_ddl_log.file_entry_buf= 0; close_ddl_log(); - global_ddl_log.inited= 0; - mysql_mutex_unlock(&LOCK_gdl); + + create_ddl_log_file_name(file_name); + (void) mysql_file_delete(key_file_global_ddl_log, file_name, MYF(0)); mysql_mutex_destroy(&LOCK_gdl); - global_ddl_log.do_release= false; + DBUG_VOID_RETURN; +} + + +/** + Methods for DDL_LOG_STATE +*/ + +static void add_log_entry(DDL_LOG_STATE *state, + DDL_LOG_MEMORY_ENTRY *log_entry) +{ + log_entry->next_active_log_entry= state->list; + state->list= log_entry; +} + + +void ddl_log_release_entries(DDL_LOG_STATE *ddl_log_state) +{ + DDL_LOG_MEMORY_ENTRY *next; + for (DDL_LOG_MEMORY_ENTRY *log_entry= ddl_log_state->list; + log_entry; + log_entry= next) + { + next= log_entry->next_active_log_entry; + ddl_log_release_memory_entry(log_entry); + } + + if (ddl_log_state->execute_entry) + { + ddl_log_release_memory_entry(ddl_log_state->execute_entry); + ddl_log_state->execute_entry= 0; // Not needed but future safe + } +} + + +/**************************************************************************** + Implementations of common ddl entries +*****************************************************************************/ + +/** + Complete ddl logging. This is done when all statements has completed + successfully and we can disable the execute log entry. +*/ + +void ddl_log_complete(DDL_LOG_STATE *state) +{ + DBUG_ENTER("ddl_log_complete"); + + if (unlikely(!state->list)) + DBUG_VOID_RETURN; // ddl log not used + + mysql_mutex_lock(&LOCK_gdl); + if (likely(state->execute_entry)) + ddl_log_disable_execute_entry(&state->execute_entry); + ddl_log_release_entries(state); + mysql_mutex_unlock(&LOCK_gdl); + DBUG_VOID_RETURN; +}; + + +/** + Revert all entries in the ddl log +*/ + +void ddl_log_revert(THD *thd, DDL_LOG_STATE *state) +{ + DBUG_ENTER("ddl_log_revert"); + + if (unlikely(!state->list)) + DBUG_VOID_RETURN; // ddl log not used + + mysql_mutex_lock(&LOCK_gdl); + if (likely(state->execute_entry)) + { + ddl_log_execute_entry_no_lock(thd, state->list->entry_pos); + ddl_log_disable_execute_entry(&state->execute_entry); + } + ddl_log_release_entries(state); + mysql_mutex_unlock(&LOCK_gdl); DBUG_VOID_RETURN; } /* ---------------------------------------------------------------------------- + Update phase of last created ddl log entry +*/ + +bool ddl_log_update_phase(DDL_LOG_STATE *state, uchar phase) +{ + DBUG_ENTER("ddl_log_update_phase"); + DBUG_RETURN(update_phase(state->list->entry_pos, phase)); +} + + +/** + Disable last ddl entry +*/ + +bool ddl_log_disable_entry(DDL_LOG_STATE *state) +{ + DBUG_ENTER("ddl_log_disable_entry"); + /* The following may not be true in case of temporary tables */ + if (likely(state->list)) + DBUG_RETURN(update_phase(state->list->entry_pos, DDL_LOG_FINAL_PHASE)); + DBUG_RETURN(0); +} + + +/** + Update XID for execute event +*/ + +bool ddl_log_update_xid(DDL_LOG_STATE *state, ulonglong xid) +{ + DBUG_ENTER("ddl_log_update_xid"); + DBUG_PRINT("enter", ("xid: %llu", xid)); + /* The following may not be true in case of temporary tables */ + if (likely(state->execute_entry)) + DBUG_RETURN(update_xid(state->execute_entry->entry_pos, xid)); + DBUG_RETURN(0); +} - END MODULE DDL log - -------------------- ---------------------------------------------------------------------------- +/** + Logging of rename table */ + +bool ddl_log_rename_table(THD *thd, DDL_LOG_STATE *ddl_state, + handlerton *hton, + const LEX_CSTRING *org_db, + const LEX_CSTRING *org_alias, + const LEX_CSTRING *new_db, + const LEX_CSTRING *new_alias) +{ + DDL_LOG_ENTRY ddl_log_entry; + DDL_LOG_MEMORY_ENTRY *log_entry; + DBUG_ENTER("ddl_log_rename_file"); + + bzero(&ddl_log_entry, sizeof(ddl_log_entry)); + + mysql_mutex_lock(&LOCK_gdl); + + ddl_log_entry.action_type= DDL_LOG_RENAME_TABLE_ACTION; + ddl_log_entry.next_entry= ddl_state->list ? ddl_state->list->entry_pos : 0; + lex_string_set(&ddl_log_entry.handler_name, + ha_resolve_storage_engine_name(hton)); + ddl_log_entry.db= *const_cast<LEX_CSTRING*>(new_db); + ddl_log_entry.name= *const_cast<LEX_CSTRING*>(new_alias); + ddl_log_entry.from_db= *const_cast<LEX_CSTRING*>(org_db); + ddl_log_entry.from_name= *const_cast<LEX_CSTRING*>(org_alias); + ddl_log_entry.phase= DDL_RENAME_PHASE_TABLE; + + if (ddl_log_write_entry(&ddl_log_entry, &log_entry)) + goto error; + + if (ddl_log_write_execute_entry(log_entry->entry_pos, + &ddl_state->execute_entry)) + goto error; + + add_log_entry(ddl_state, log_entry); + mysql_mutex_unlock(&LOCK_gdl); + DBUG_RETURN(0); + +error: + mysql_mutex_unlock(&LOCK_gdl); + DBUG_RETURN(1); +} + +/* + Logging of rename view +*/ + +bool ddl_log_rename_view(THD *thd, DDL_LOG_STATE *ddl_state, + const LEX_CSTRING *org_db, + const LEX_CSTRING *org_alias, + const LEX_CSTRING *new_db, + const LEX_CSTRING *new_alias) +{ + DDL_LOG_ENTRY ddl_log_entry; + DDL_LOG_MEMORY_ENTRY *log_entry; + DBUG_ENTER("ddl_log_rename_file"); + + bzero(&ddl_log_entry, sizeof(ddl_log_entry)); + + mysql_mutex_lock(&LOCK_gdl); + + ddl_log_entry.action_type= DDL_LOG_RENAME_VIEW_ACTION; + ddl_log_entry.next_entry= ddl_state->list ? ddl_state->list->entry_pos : 0; + ddl_log_entry.db= *const_cast<LEX_CSTRING*>(new_db); + ddl_log_entry.name= *const_cast<LEX_CSTRING*>(new_alias); + ddl_log_entry.from_db= *const_cast<LEX_CSTRING*>(org_db); + ddl_log_entry.from_name= *const_cast<LEX_CSTRING*>(org_alias); + + if (ddl_log_write_entry(&ddl_log_entry, &log_entry)) + goto error; + + if (ddl_log_write_execute_entry(log_entry->entry_pos, + &ddl_state->execute_entry)) + goto error; + + add_log_entry(ddl_state, log_entry); + mysql_mutex_unlock(&LOCK_gdl); + DBUG_RETURN(0); + +error: + mysql_mutex_unlock(&LOCK_gdl); + DBUG_RETURN(1); +} diff --git a/sql/ddl_log.h b/sql/ddl_log.h index 0747699cd85..ec91900609f 100644 --- a/sql/ddl_log.h +++ b/sql/ddl_log.h @@ -1,6 +1,6 @@ /* Copyright (c) 2000, 2019, Oracle and/or its affiliates. - Copyright (c) 2010, 2020, MariaDB + Copyright (c) 2010, 2021, MariaDB This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -24,6 +24,9 @@ enum ddl_log_entry_code { /* + DDL_LOG_UNKOWN + Here mainly to detect blocks that are all zero + DDL_LOG_EXECUTE_CODE: This is a code that indicates that this is a log entry to be executed, from this entry a linked list of log entries @@ -34,48 +37,97 @@ enum ddl_log_entry_code DDL_IGNORE_LOG_ENTRY_CODE: An entry that is to be ignored */ - DDL_LOG_EXECUTE_CODE = 'e', - DDL_LOG_ENTRY_CODE = 'l', - DDL_IGNORE_LOG_ENTRY_CODE = 'i' + DDL_LOG_UNKNOWN= 0, + DDL_LOG_EXECUTE_CODE= 1, + DDL_LOG_ENTRY_CODE= 2, + DDL_IGNORE_LOG_ENTRY_CODE= 3, + DDL_LOG_ENTRY_CODE_LAST= 4 }; + +/* + When adding things below, also add an entry to ddl_log_entry_phases in + ddl_log.cc +*/ + enum ddl_log_action_code { /* The type of action that a DDL_LOG_ENTRY_CODE entry is to perform. - DDL_LOG_DELETE_ACTION: - Delete an entity - DDL_LOG_RENAME_ACTION: - Rename an entity - DDL_LOG_REPLACE_ACTION: - Rename an entity after removing the previous entry with the - new name, that is replace this entry. - DDL_LOG_EXCHANGE_ACTION: - Exchange two entities by renaming them a -> tmp, b -> a, tmp -> b. */ - DDL_LOG_DELETE_ACTION = 'd', - DDL_LOG_RENAME_ACTION = 'r', - DDL_LOG_REPLACE_ACTION = 's', - DDL_LOG_EXCHANGE_ACTION = 'e' + DDL_LOG_UNKNOWN_ACTION= 0, + + /* Delete a .frm file or a table in the partition engine */ + DDL_LOG_DELETE_ACTION= 1, + + /* Rename a .frm fire a table in the partition engine */ + DDL_LOG_RENAME_ACTION= 2, + + /* + Rename an entity after removing the previous entry with the + new name, that is replace this entry. + */ + DDL_LOG_REPLACE_ACTION= 3, + + /* Exchange two entities by renaming them a -> tmp, b -> a, tmp -> b */ + DDL_LOG_EXCHANGE_ACTION= 4, + /* + log do_rename(): Rename of .frm file, table, stat_tables and triggers + */ + DDL_LOG_RENAME_TABLE_ACTION= 5, + DDL_LOG_RENAME_VIEW_ACTION= 6, + DDL_LOG_LAST_ACTION /* End marker */ }; + +/* Number of phases for each ddl_log_action_code */ +extern const uchar ddl_log_entry_phases[DDL_LOG_LAST_ACTION]; + + enum enum_ddl_log_exchange_phase { EXCH_PHASE_NAME_TO_TEMP= 0, EXCH_PHASE_FROM_TO_NAME= 1, EXCH_PHASE_TEMP_TO_FROM= 2 }; +enum enum_ddl_log_rename_table_phase { + DDL_RENAME_PHASE_TRIGGER= 0, + DDL_RENAME_PHASE_STAT, + DDL_RENAME_PHASE_TABLE, +}; + +/* + Setting ddl_log_entry.phase to this has the same effect as setting + the phase to the maximum phase (..PHASE_END) for an entry. +*/ + +#define DDL_LOG_FINAL_PHASE ((uchar) 0xff) typedef struct st_ddl_log_entry { - const char *name; - const char *from_name; - const char *handler_name; - const char *tmp_name; + LEX_CSTRING name; + LEX_CSTRING from_name; + LEX_CSTRING handler_name; + LEX_CSTRING tmp_name; + LEX_CSTRING db; + LEX_CSTRING from_db; + LEX_CSTRING from_handler_name; + uchar uuid[MY_UUID_SIZE]; // UUID for new frm file + + ulonglong xid; // Xid stored in the binary log + /* + unique_id can be used to store a unique number to check current state. + Currently it is used to store new size of frm file, link to another ddl log + entry or store an a uniq version for a storage engine in alter table. + For execute entries this is reused as an execute counter to ensure we + don't repeat an entry too many times if executing the entry fails. + */ + ulonglong unique_id; uint next_entry; - uint entry_pos; - enum ddl_log_entry_code entry_type; + uint entry_pos; // Set by write_dll_log_entry() + uint16 flags; // Flags unique for each command + enum ddl_log_entry_code entry_type; // Set automatically enum ddl_log_action_code action_type; /* Most actions have only one phase. REPLACE does however have two @@ -83,7 +135,7 @@ typedef struct st_ddl_log_entry there was one there before and the second phase renames the old name to the new name. */ - char phase; + uchar phase; // set automatically } DDL_LOG_ENTRY; typedef struct st_ddl_log_memory_entry @@ -95,17 +147,63 @@ typedef struct st_ddl_log_memory_entry } DDL_LOG_MEMORY_ENTRY; +/* + State of the ddl log during execution of a DDL. + + A ddl log state has one execute entry (main entry pointing to the first + action entry) and many 'action entries' linked in a list in the order + they should be executed. + One recovery the log is parsed and all execute entries will be executed. + + All entries are stored as separate blocks in the ddl recovery file. +*/ + +typedef struct st_ddl_log_state +{ + /* List of ddl log entries */ + DDL_LOG_MEMORY_ENTRY *list; + /* One execute entry per list */ + DDL_LOG_MEMORY_ENTRY *execute_entry; +} DDL_LOG_STATE; + + +/* These functions are for recovery */ +bool ddl_log_initialize(); +void ddl_log_release(); +bool ddl_log_close_binlogged_events(HASH *xids); +int ddl_log_execute_recovery(); + +/* functions for updating the ddl log */ bool ddl_log_write_entry(DDL_LOG_ENTRY *ddl_log_entry, DDL_LOG_MEMORY_ENTRY **active_entry); + bool ddl_log_write_execute_entry(uint first_entry, - bool complete, - DDL_LOG_MEMORY_ENTRY **active_entry); -bool ddl_log_increment_phase(uint entry_no); + DDL_LOG_MEMORY_ENTRY **active_entry); +bool ddl_log_disable_execute_entry(DDL_LOG_MEMORY_ENTRY **active_entry); + +void ddl_log_complete(DDL_LOG_STATE *ddl_log_state); +void ddl_log_revert(THD *thd, DDL_LOG_STATE *ddl_log_state); + +bool ddl_log_update_phase(DDL_LOG_STATE *entry, uchar phase); +bool ddl_log_update_xid(DDL_LOG_STATE *state, ulonglong xid); +bool ddl_log_disable_entry(DDL_LOG_STATE *state); +bool ddl_log_increment_phase(uint entry_pos); void ddl_log_release_memory_entry(DDL_LOG_MEMORY_ENTRY *log_entry); bool ddl_log_sync(); -void ddl_log_release(); -void ddl_log_execute_recovery(); bool ddl_log_execute_entry(THD *thd, uint first_entry); +void ddl_log_release_entries(DDL_LOG_STATE *ddl_log_state); +bool ddl_log_rename_table(THD *thd, DDL_LOG_STATE *ddl_state, + handlerton *hton, + const LEX_CSTRING *org_db, + const LEX_CSTRING *org_alias, + const LEX_CSTRING *new_db, + const LEX_CSTRING *new_alias); +bool ddl_log_rename_view(THD *thd, DDL_LOG_STATE *ddl_state, + const LEX_CSTRING *org_db, + const LEX_CSTRING *org_alias, + const LEX_CSTRING *new_db, + const LEX_CSTRING *new_alias); + extern mysql_mutex_t LOCK_gdl; #endif /* DDL_LOG_INCLUDED */ diff --git a/sql/debug_sync.cc b/sql/debug_sync.cc index 1fbb15592a4..f523e22f6ce 100644 --- a/sql/debug_sync.cc +++ b/sql/debug_sync.cc @@ -1628,3 +1628,74 @@ bool debug_sync_set_action(THD *thd, const char *action_str, size_t len) /* prevent linker/lib warning about file without public symbols */ int debug_sync_dummy; #endif /* defined(ENABLED_DEBUG_SYNC) */ + + +/** + Debug utility to do crash after a set number of executions + + The user variable, either @debug_crash_counter or @debug_error_counter, + is decremented each time debug_crash() or debug_simulate_error is called + if the keyword is set with @@debug_push, like + @@debug_push="d+frm_data_type_info_emulate" + + If the variable is not set or is not an integer it will be ignored. +*/ + +#ifndef DBUG_OFF + +static const LEX_CSTRING debug_crash_counter= +{ STRING_WITH_LEN("debug_crash_counter") }; +static const LEX_CSTRING debug_error_counter= +{ STRING_WITH_LEN("debug_error_counter") }; + +static bool debug_decrement_counter(const LEX_CSTRING *name) +{ + THD *thd= current_thd; + user_var_entry *entry= (user_var_entry*) + my_hash_search(&thd->user_vars, (uchar*) name->str, name->length); + if (!entry || entry->type != INT_RESULT || ! entry->value) + return 0; + (*(ulonglong*) entry->value)= (*(ulonglong*) entry->value)-1; + return !*(ulonglong*) entry->value; +} + +void debug_crash_here(const char *keyword) +{ + DBUG_ENTER("debug_crash_here"); + DBUG_PRINT("enter", ("keyword: %s", keyword)); + + DBUG_EXECUTE_IF(keyword, + if (debug_decrement_counter(&debug_crash_counter)) + { + my_printf_error(ER_INTERNAL_ERROR, + "Crashing at %s", + MYF(ME_ERROR_LOG | ME_NOTE), keyword); + DBUG_SUICIDE(); + }); + DBUG_VOID_RETURN; +} + +/* + This can be used as debug_counter to simulate an error at a specific + position. + + Typical usage would be + if (debug_simualte_error("keyword")) + error= 1; +*/ + +bool debug_simulate_error(const char *keyword, uint error) +{ + DBUG_ENTER("debug_crash_here"); + DBUG_PRINT("enter", ("keyword: %s", keyword)); + DBUG_EXECUTE_IF(keyword, + if (debug_decrement_counter(&debug_error_counter)) + { + my_printf_error(error, + "Simulating error for '%s'", + MYF(ME_ERROR_LOG), keyword); + DBUG_RETURN(1); + }); + DBUG_RETURN(0); +} +#endif /* DBUG_OFF */ diff --git a/sql/debug_sync.h b/sql/debug_sync.h index 3b8aa8815e1..4e3e10fcc51 100644 --- a/sql/debug_sync.h +++ b/sql/debug_sync.h @@ -53,4 +53,12 @@ static inline bool debug_sync_set_action(THD *, const char *, size_t) { return false; } #endif /* defined(ENABLED_DEBUG_SYNC) */ +#ifndef DBUG_OFF +void debug_crash_here(const char *keyword); +bool debug_simulate_error(const char *keyword, uint error); +#else +#define debug_crash_here(A) do { } while(0) +#define debug_simulate_error(A, B) 0 +#endif + #endif /* DEBUG_SYNC_INCLUDED */ diff --git a/sql/handler.cc b/sql/handler.cc index 610c7deb19b..44102081e94 100644 --- a/sql/handler.cc +++ b/sql/handler.cc @@ -2727,7 +2727,7 @@ const char *get_canonical_filename(handler *file, const char *path, char *tmp_path) { uint i; - if (lower_case_table_names != 2 || (file->ha_table_flags() & HA_FILE_BASED)) + if (!file->needs_lower_case_filenames()) return path; for (i= 0; i <= mysql_tmpdir_list.max; i++) diff --git a/sql/handler.h b/sql/handler.h index 0f322987cc3..3187c5d749f 100644 --- a/sql/handler.h +++ b/sql/handler.h @@ -5100,6 +5100,17 @@ public: const KEY_PART_INFO &old_part, const KEY_PART_INFO &new_part) const; + +/* + If lower_case_table_names == 2 (case-preserving but case-insensitive + file system) and the storage is not HA_FILE_BASED, we need to provide + a lowercase file name for the engine. +*/ + inline bool needs_lower_case_filenames() + { + return (lower_case_table_names == 2 && !(ha_table_flags() & HA_FILE_BASED)); + } + protected: Handler_share *get_ha_share_ptr(); void set_ha_share_ptr(Handler_share *arg_ha_share); diff --git a/sql/log.cc b/sql/log.cc index c5d1335882c..519dc3e63b3 100644 --- a/sql/log.cc +++ b/sql/log.cc @@ -39,6 +39,7 @@ #include "rpl_rli.h" #include "sql_audit.h" #include "mysqld.h" +#include "ddl_log.h" #include <my_dir.h> #include <m_ctype.h> // For test_if_number @@ -10454,12 +10455,23 @@ start_binlog_background_thread() } +/* + Execute recovery of the binary log + + @param do_xa + if true: Collect all Xid events and call ha_recover(). + if false: Collect only Xid events from Query events. This is + used to disable entries in the ddl recovery log that + are found in the binary log (and thus already executed and + logged and thus don't have to be redone). +*/ + int TC_LOG_BINLOG::recover(LOG_INFO *linfo, const char *last_log_name, IO_CACHE *first_log, Format_description_log_event *fdle, bool do_xa) { Log_event *ev= NULL; - HASH xids; + HASH xids, ddl_log_ids; MEM_ROOT mem_root; char binlog_checkpoint_name[FN_REFLEN]; bool binlog_checkpoint_found; @@ -10472,16 +10484,19 @@ int TC_LOG_BINLOG::recover(LOG_INFO *linfo, const char *last_log_name, bool last_gtid_standalone= false; bool last_gtid_valid= false; #endif + DBUG_ENTER("TC_LOG_BINLOG::recover"); if (! fdle->is_valid() || - (do_xa && my_hash_init(key_memory_binlog_recover_exec, &xids, - &my_charset_bin, TC_LOG_PAGE_SIZE/3, 0, - sizeof(my_xid), 0, 0, MYF(0)))) + (my_hash_init(key_memory_binlog_recover_exec, &xids, + &my_charset_bin, TC_LOG_PAGE_SIZE/3, 0, + sizeof(my_xid), 0, 0, MYF(0))) || + (my_hash_init(key_memory_binlog_recover_exec, &ddl_log_ids, + &my_charset_bin, 64, 0, + sizeof(my_xid), 0, 0, MYF(0)))) goto err1; - if (do_xa) - init_alloc_root(key_memory_binlog_recover_exec, &mem_root, - TC_LOG_PAGE_SIZE, TC_LOG_PAGE_SIZE, MYF(0)); + init_alloc_root(key_memory_binlog_recover_exec, &mem_root, + TC_LOG_PAGE_SIZE, TC_LOG_PAGE_SIZE, MYF(0)); fdle->flags&= ~LOG_EVENT_BINLOG_IN_USE_F; // abort on the first error @@ -10516,6 +10531,20 @@ int TC_LOG_BINLOG::recover(LOG_INFO *linfo, const char *last_log_name, } break; } + case QUERY_EVENT: + { + Query_log_event *query_ev= (Query_log_event*) ev; + if (query_ev->xid) + { + DBUG_ASSERT(sizeof(query_ev->xid) == sizeof(my_xid)); + uchar *x= (uchar *) memdup_root(&mem_root, + (uchar*) &query_ev->xid, + sizeof(query_ev->xid)); + if (!x || my_hash_insert(&ddl_log_ids, x)) + goto err2; + } + break; + } case BINLOG_CHECKPOINT_EVENT: if (first_round && do_xa) { @@ -10597,8 +10626,6 @@ int TC_LOG_BINLOG::recover(LOG_INFO *linfo, const char *last_log_name, ev= NULL; } - if (!do_xa) - break; /* If the last binlog checkpoint event points to an older log, we have to scan all logs from there also, to get all possible XIDs to recover. @@ -10656,11 +10683,13 @@ int TC_LOG_BINLOG::recover(LOG_INFO *linfo, const char *last_log_name, { if (ha_recover(&xids)) goto err2; - - free_root(&mem_root, MYF(0)); - my_hash_free(&xids); } - return 0; + if (ddl_log_close_binlogged_events(&ddl_log_ids)) + goto err2; + free_root(&mem_root, MYF(0)); + my_hash_free(&xids); + my_hash_free(&ddl_log_ids); + DBUG_RETURN(0); err2: delete ev; @@ -10669,17 +10698,16 @@ err2: end_io_cache(&log); mysql_file_close(file, MYF(MY_WME)); } - if (do_xa) - { - free_root(&mem_root, MYF(0)); - my_hash_free(&xids); - } + free_root(&mem_root, MYF(0)); + my_hash_free(&xids); + my_hash_free(&ddl_log_ids); + err1: sql_print_error("Crash recovery failed. Either correct the problem " "(if it's, for example, out of memory error) and restart, " "or delete (or rename) binary log and start mysqld with " "--tc-heuristic-recover={commit|rollback}"); - return 1; + DBUG_RETURN(1); } diff --git a/sql/log_event.cc b/sql/log_event.cc index 321980f0a16..9c7c56b1c34 100644 --- a/sql/log_event.cc +++ b/sql/log_event.cc @@ -1373,6 +1373,7 @@ code_name(int code) case Q_TABLE_MAP_FOR_UPDATE_CODE: return "Q_TABLE_MAP_FOR_UPDATE_CODE"; case Q_MASTER_DATA_WRITTEN_CODE: return "Q_MASTER_DATA_WRITTEN_CODE"; case Q_HRNOW: return "Q_HRNOW"; + case Q_XID: return "XID"; } sprintf(buf, "CODE#%d", code); return buf; @@ -1421,7 +1422,7 @@ Query_log_event::Query_log_event(const uchar *buf, uint event_len, flags2_inited(0), sql_mode_inited(0), charset_inited(0), flags2(0), auto_increment_increment(1), auto_increment_offset(1), time_zone_len(0), lc_time_names_number(0), charset_database_number(0), - table_map_for_update(0), master_data_written(0) + table_map_for_update(0), xid(0), master_data_written(0) { ulong data_len; uint32 tmp; @@ -1605,6 +1606,13 @@ Query_log_event::Query_log_event(const uchar *buf, uint event_len, pos+= 3; break; } + case Q_XID: + { + CHECK_SPACE(pos, end, 8); + xid= uint8korr(pos); + pos+= 8; + break; + } default: /* That's why you must write status vars in growing order of code */ DBUG_PRINT("info",("Query_log_event has unknown status vars (first has\ diff --git a/sql/log_event.h b/sql/log_event.h index 5b3fb60bbe1..990d95e1dc0 100644 --- a/sql/log_event.h +++ b/sql/log_event.h @@ -319,6 +319,7 @@ class String; #define Q_INVOKER 11 #define Q_HRNOW 128 +#define Q_XID 129 /* Intvar event post-header */ @@ -2125,6 +2126,8 @@ public: statement, for other query statements, this will be zero. */ ulonglong table_map_for_update; + /* Xid for the event, if such exists */ + ulonglong xid; /* Holds the original length of a Query_log_event that comes from a master of version < 5.0 (i.e., binlog_version < 4). When the IO diff --git a/sql/log_event_client.cc b/sql/log_event_client.cc index 6b5d71348e1..067bfb7c54b 100644 --- a/sql/log_event_client.cc +++ b/sql/log_event_client.cc @@ -1820,9 +1820,10 @@ bool Query_log_event::print_query_header(IO_CACHE* file, { if (print_header(file, print_event_info, FALSE) || my_b_printf(file, - "\t%s\tthread_id=%lu\texec_time=%lu\terror_code=%d\n", + "\t%s\tthread_id=%lu\texec_time=%lu\terror_code=%d" + "\txid=%lu\n", get_type_str(), (ulong) thread_id, (ulong) exec_time, - error_code)) + error_code, (ulong) xid)) goto err; } diff --git a/sql/log_event_server.cc b/sql/log_event_server.cc index e9057d09c34..e216724ca7a 100644 --- a/sql/log_event_server.cc +++ b/sql/log_event_server.cc @@ -1294,6 +1294,15 @@ bool Query_log_event::write() int3store(start, when_sec_part); start+= 3; } + + /* xid's is used with ddl_log handling */ + if (thd && thd->binlog_xid) + { + *start++= Q_XID; + int8store(start, thd->query_id); + start+= 8; + } + /* NOTE: When adding new status vars, please don't forget to update the MAX_SIZE_LOG_EVENT_STATUS in log_event.h and update the function diff --git a/sql/mysqld.cc b/sql/mysqld.cc index f8b5889488e..9cc67419564 100644 --- a/sql/mysqld.cc +++ b/sql/mysqld.cc @@ -571,7 +571,7 @@ char log_error_file[FN_REFLEN], glob_hostname[FN_REFLEN], *opt_log_basename; char mysql_real_data_home[FN_REFLEN], lc_messages_dir[FN_REFLEN], reg_ext[FN_EXTLEN], mysql_charsets_dir[FN_REFLEN], - *opt_init_file, *opt_tc_log_file; + *opt_init_file, *opt_tc_log_file, *opt_ddl_recovery_file; char *lc_messages_dir_ptr= lc_messages_dir, *log_error_file_ptr; char mysql_unpacked_real_data_home[FN_REFLEN]; size_t mysql_unpacked_real_data_home_len; @@ -5269,6 +5269,9 @@ static int init_server_components() } #endif + if (ddl_log_initialize()) + unireg_abort(1); + tc_log= get_tc_log_implementation(); if (tc_log->open(opt_bin_log ? opt_bin_logname : opt_tc_log_file)) @@ -5278,9 +5281,7 @@ static int init_server_components() } if (ha_recover(0)) - { unireg_abort(1); - } if (opt_bin_log) { @@ -5680,7 +5681,8 @@ int mysqld_main(int argc, char **argv) initialize_information_schema_acl(); - ddl_log_execute_recovery(); + if (ddl_log_execute_recovery() > 0) + unireg_abort(1); /* Change EVENTS_ORIGINAL to EVENTS_OFF (the default value) as there is no @@ -6457,6 +6459,10 @@ struct my_option my_long_options[]= "relay logs", &opt_relaylog_index_name, &opt_relaylog_index_name, 0, GET_STR, REQUIRED_ARG, 0, 0, 0, 0, 0, 0}, + {"log-ddl-recovery", 0, + "Path to file used for recovery of DDL statements after a crash", + &opt_ddl_recovery_file, &opt_ddl_recovery_file, 0, GET_STR, + REQUIRED_ARG, 0, 0, 0, 0, 0, 0}, {"log-isam", OPT_ISAM_LOG, "Log all MyISAM changes to file.", &myisam_log_filename, &myisam_log_filename, 0, GET_STR, OPT_ARG, 0, 0, 0, 0, 0, 0}, @@ -7584,6 +7590,7 @@ static int mysql_init_variables(void) opt_logname= opt_binlog_index_name= opt_slow_logname= 0; opt_log_basename= 0; opt_tc_log_file= (char *)"tc.log"; // no hostname in tc_log file name ! + opt_ddl_recovery_file= (char *) "ddl_recovery.log"; opt_secure_auth= 0; opt_bootstrap= opt_myisam_log= 0; disable_log_notes= 0; diff --git a/sql/mysqld.h b/sql/mysqld.h index 6573b6b6a4b..b50b98e0dea 100644 --- a/sql/mysqld.h +++ b/sql/mysqld.h @@ -207,7 +207,7 @@ extern MYSQL_PLUGIN_IMPORT char glob_hostname[FN_REFLEN]; extern char mysql_home[FN_REFLEN]; extern char pidfile_name[FN_REFLEN], system_time_zone[30], *opt_init_file; extern char default_logfile_name[FN_REFLEN]; -extern char log_error_file[FN_REFLEN], *opt_tc_log_file; +extern char log_error_file[FN_REFLEN], *opt_tc_log_file, *opt_ddl_recovery_file; extern const double log_10[309]; extern ulonglong keybuff_size; extern ulonglong thd_startup_options; diff --git a/sql/parse_file.cc b/sql/parse_file.cc index 59b4027a352..aaf36970702 100644 --- a/sql/parse_file.cc +++ b/sql/parse_file.cc @@ -24,7 +24,9 @@ #include "sql_priv.h" #include "parse_file.h" #include "unireg.h" // CREATE_MODE -#include "sql_table.h" // build_table_filename +#include "sql_table.h" // build_table_filename +#include "debug_sync.h" +#include <mysys_err.h> // EE_WRITE #include <m_ctype.h> #include <my_dir.h> @@ -245,7 +247,6 @@ write_parameter(IO_CACHE *file, const uchar* base, File_option *parameter) TRUE error */ - my_bool sql_create_definition_file(const LEX_CSTRING *dir, const LEX_CSTRING *file_name, @@ -287,6 +288,8 @@ sql_create_definition_file(const LEX_CSTRING *dir, DBUG_RETURN(TRUE); } + debug_crash_here("definition_file_after_create"); + if (init_io_cache(&file, handler, 0, WRITE_CACHE, 0L, 0, MYF(MY_WME))) goto err_w_file; @@ -296,6 +299,9 @@ sql_create_definition_file(const LEX_CSTRING *dir, my_b_write(&file, (const uchar *)STRING_WITH_LEN("\n"))) goto err_w_cache; + if (debug_simulate_error("definition_file_simulate_write_error", EE_WRITE)) + goto err_w_cache; + // write parameters to temporary file for (param= parameters; param->name.str; param++) { @@ -337,6 +343,7 @@ err_w_cache: end_io_cache(&file); err_w_file: mysql_file_close(handler, MYF(MY_WME)); + mysql_file_delete(key_file_fileparser, path, MYF(MY_WME)); DBUG_RETURN(TRUE); } diff --git a/sql/sql_class.cc b/sql/sql_class.cc index ff927447c79..92d9e90adc1 100644 --- a/sql/sql_class.cc +++ b/sql/sql_class.cc @@ -788,6 +788,7 @@ THD::THD(my_thread_id id, bool is_wsrep_applier) mysys_var=0; binlog_evt_union.do_union= FALSE; binlog_table_maps= FALSE; + binlog_xid= 0; enable_slow_log= 0; durability_property= HA_REGULAR_DURABILITY; diff --git a/sql/sql_class.h b/sql/sql_class.h index 5a299edbcb8..77861611794 100644 --- a/sql/sql_class.h +++ b/sql/sql_class.h @@ -2839,6 +2839,11 @@ public: #ifndef MYSQL_CLIENT binlog_cache_mngr * binlog_setup_trx_data(); + /* + If set, tell binlog to store the value as query 'xid' in the next + Query_log_event + */ + ulonglong binlog_xid; /* Public interface to write RBR events to the binlog diff --git a/sql/sql_partition.cc b/sql/sql_partition.cc index 9afdd6f9dc0..22efecb9210 100644 --- a/sql/sql_partition.cc +++ b/sql/sql_partition.cc @@ -6203,15 +6203,17 @@ static bool write_log_replace_delete_frm(ALTER_PARTITION_PARAM_TYPE *lpt, DDL_LOG_MEMORY_ENTRY *log_entry; DBUG_ENTER("write_log_replace_delete_frm"); + bzero(&ddl_log_entry, sizeof(ddl_log_entry)); if (replace_flag) ddl_log_entry.action_type= DDL_LOG_REPLACE_ACTION; else ddl_log_entry.action_type= DDL_LOG_DELETE_ACTION; ddl_log_entry.next_entry= next_entry; - ddl_log_entry.handler_name= reg_ext; - ddl_log_entry.name= to_path; + lex_string_set(&ddl_log_entry.handler_name, reg_ext); + lex_string_set(&ddl_log_entry.name, to_path); + if (replace_flag) - ddl_log_entry.from_name= from_path; + lex_string_set(&ddl_log_entry.from_name, from_path); if (ddl_log_write_entry(&ddl_log_entry, &log_entry)) { DBUG_RETURN(TRUE); @@ -6263,6 +6265,7 @@ static bool write_log_changed_partitions(ALTER_PARTITION_PARAM_TYPE *lpt, if (part_elem->part_state == PART_IS_CHANGED || (part_elem->part_state == PART_IS_ADDED && temp_partitions)) { + bzero(&ddl_log_entry, sizeof(ddl_log_entry)); if (part_info->is_sub_partitioned()) { List_iterator<partition_element> sub_it(part_elem->subpartitions); @@ -6272,8 +6275,9 @@ static bool write_log_changed_partitions(ALTER_PARTITION_PARAM_TYPE *lpt, { partition_element *sub_elem= sub_it++; ddl_log_entry.next_entry= *next_entry; - ddl_log_entry.handler_name= - ha_resolve_storage_engine_name(sub_elem->engine_type); + lex_string_set(&ddl_log_entry.handler_name, + ha_resolve_storage_engine_name(sub_elem-> + engine_type)); if (create_subpartition_name(tmp_path, sizeof(tmp_path), path, part_elem->partition_name, sub_elem->partition_name, @@ -6283,16 +6287,15 @@ static bool write_log_changed_partitions(ALTER_PARTITION_PARAM_TYPE *lpt, sub_elem->partition_name, NORMAL_PART_NAME)) DBUG_RETURN(TRUE); - ddl_log_entry.name= normal_path; - ddl_log_entry.from_name= tmp_path; + lex_string_set(&ddl_log_entry.name, normal_path); + lex_string_set(&ddl_log_entry.from_name, tmp_path); if (part_elem->part_state == PART_IS_CHANGED) ddl_log_entry.action_type= DDL_LOG_REPLACE_ACTION; else ddl_log_entry.action_type= DDL_LOG_RENAME_ACTION; if (ddl_log_write_entry(&ddl_log_entry, &log_entry)) - { DBUG_RETURN(TRUE); - } + *next_entry= log_entry->entry_pos; sub_elem->log_entry= log_entry; insert_part_info_log_entry_list(part_info, log_entry); @@ -6301,8 +6304,8 @@ static bool write_log_changed_partitions(ALTER_PARTITION_PARAM_TYPE *lpt, else { ddl_log_entry.next_entry= *next_entry; - ddl_log_entry.handler_name= - ha_resolve_storage_engine_name(part_elem->engine_type); + lex_string_set(&ddl_log_entry.handler_name, + ha_resolve_storage_engine_name(part_elem->engine_type)); if (create_partition_name(tmp_path, sizeof(tmp_path), path, part_elem->partition_name, TEMP_PART_NAME, TRUE) || @@ -6310,8 +6313,8 @@ static bool write_log_changed_partitions(ALTER_PARTITION_PARAM_TYPE *lpt, part_elem->partition_name, NORMAL_PART_NAME, TRUE)) DBUG_RETURN(TRUE); - ddl_log_entry.name= normal_path; - ddl_log_entry.from_name= tmp_path; + lex_string_set(&ddl_log_entry.name, normal_path); + lex_string_set(&ddl_log_entry.from_name, tmp_path); if (part_elem->part_state == PART_IS_CHANGED) ddl_log_entry.action_type= DDL_LOG_REPLACE_ACTION; else @@ -6355,6 +6358,7 @@ static bool write_log_dropped_partitions(ALTER_PARTITION_PARAM_TYPE *lpt, uint num_elements= part_info->partitions.elements; DBUG_ENTER("write_log_dropped_partitions"); + bzero(&ddl_log_entry, sizeof(ddl_log_entry)); ddl_log_entry.action_type= DDL_LOG_DELETE_ACTION; if (temp_list) num_elements= num_temp_partitions; @@ -6385,13 +6389,14 @@ static bool write_log_dropped_partitions(ALTER_PARTITION_PARAM_TYPE *lpt, { partition_element *sub_elem= sub_it++; ddl_log_entry.next_entry= *next_entry; - ddl_log_entry.handler_name= - ha_resolve_storage_engine_name(sub_elem->engine_type); + lex_string_set(&ddl_log_entry.handler_name, + ha_resolve_storage_engine_name(sub_elem-> + engine_type)); if (create_subpartition_name(tmp_path, sizeof(tmp_path), path, part_elem->partition_name, sub_elem->partition_name, name_variant)) DBUG_RETURN(TRUE); - ddl_log_entry.name= tmp_path; + lex_string_set(&ddl_log_entry.name, tmp_path); if (ddl_log_write_entry(&ddl_log_entry, &log_entry)) { DBUG_RETURN(TRUE); @@ -6404,13 +6409,13 @@ static bool write_log_dropped_partitions(ALTER_PARTITION_PARAM_TYPE *lpt, else { ddl_log_entry.next_entry= *next_entry; - ddl_log_entry.handler_name= - ha_resolve_storage_engine_name(part_elem->engine_type); + lex_string_set(&ddl_log_entry.handler_name, + ha_resolve_storage_engine_name(part_elem->engine_type)); if (create_partition_name(tmp_path, sizeof(tmp_path), path, part_elem->partition_name, name_variant, TRUE)) DBUG_RETURN(TRUE); - ddl_log_entry.name= tmp_path; + lex_string_set(&ddl_log_entry.name, tmp_path); if (ddl_log_write_entry(&ddl_log_entry, &log_entry)) { DBUG_RETURN(TRUE); @@ -6474,7 +6479,7 @@ static bool write_log_drop_shadow_frm(ALTER_PARTITION_PARAM_TYPE *lpt) goto error; log_entry= part_info->first_log_entry; if (ddl_log_write_execute_entry(log_entry->entry_pos, - FALSE, &exec_log_entry)) + &exec_log_entry)) goto error; mysql_mutex_unlock(&LOCK_gdl); set_part_info_exec_log_entry(part_info, exec_log_entry); @@ -6521,7 +6526,7 @@ static bool write_log_rename_frm(ALTER_PARTITION_PARAM_TYPE *lpt) log_entry= part_info->first_log_entry; part_info->frm_log_entry= log_entry; if (ddl_log_write_execute_entry(log_entry->entry_pos, - FALSE, &exec_log_entry)) + &exec_log_entry)) goto error; release_part_info_log_entries(old_first_log_entry); mysql_mutex_unlock(&LOCK_gdl); @@ -6576,7 +6581,7 @@ static bool write_log_drop_partition(ALTER_PARTITION_PARAM_TYPE *lpt) log_entry= part_info->first_log_entry; part_info->frm_log_entry= log_entry; if (ddl_log_write_execute_entry(log_entry->entry_pos, - FALSE, &exec_log_entry)) + &exec_log_entry)) goto error; release_part_info_log_entries(old_first_log_entry); mysql_mutex_unlock(&LOCK_gdl); @@ -6635,7 +6640,6 @@ static bool write_log_add_change_partition(ALTER_PARTITION_PARAM_TYPE *lpt) log_entry= part_info->first_log_entry; if (ddl_log_write_execute_entry(log_entry->entry_pos, - FALSE, /* Reuse the old execute ddl_log_entry */ &exec_log_entry)) goto error; @@ -6705,7 +6709,7 @@ static bool write_log_final_change_partition(ALTER_PARTITION_PARAM_TYPE *lpt) part_info->frm_log_entry= log_entry; /* Overwrite the revert execute log entry with this retry execute entry */ if (ddl_log_write_execute_entry(log_entry->entry_pos, - FALSE, &exec_log_entry)) + &exec_log_entry)) goto error; release_part_info_log_entries(old_first_log_entry); mysql_mutex_unlock(&LOCK_gdl); @@ -6741,7 +6745,7 @@ static void write_log_completed(ALTER_PARTITION_PARAM_TYPE *lpt, DBUG_ASSERT(log_entry); mysql_mutex_lock(&LOCK_gdl); - if (ddl_log_write_execute_entry(0UL, TRUE, &log_entry)) + if (ddl_log_disable_execute_entry(&log_entry)) { /* Failed to write, Bad... diff --git a/sql/sql_partition_admin.cc b/sql/sql_partition_admin.cc index 18c53a7e2fb..ca564af1bf6 100644 --- a/sql/sql_partition_admin.cc +++ b/sql/sql_partition_admin.cc @@ -355,13 +355,14 @@ static bool exchange_name_with_ddl_log(THD *thd, DBUG_RETURN(TRUE); /* prepare the action entry */ + bzero(&exchange_entry, sizeof(exchange_entry)); exchange_entry.entry_type= DDL_LOG_ENTRY_CODE; exchange_entry.action_type= DDL_LOG_EXCHANGE_ACTION; - exchange_entry.next_entry= 0; - exchange_entry.name= name; - exchange_entry.from_name= from_name; - exchange_entry.tmp_name= tmp_name; - exchange_entry.handler_name= ha_resolve_storage_engine_name(ht); + lex_string_set(&exchange_entry.name, name); + lex_string_set(&exchange_entry.from_name, from_name); + lex_string_set(&exchange_entry.tmp_name, tmp_name); + lex_string_set(&exchange_entry.handler_name, + ha_resolve_storage_engine_name(ht)); exchange_entry.phase= EXCH_PHASE_NAME_TO_TEMP; mysql_mutex_lock(&LOCK_gdl); @@ -377,8 +378,8 @@ static bool exchange_name_with_ddl_log(THD *thd, DBUG_EXECUTE_IF("exchange_partition_fail_2", goto err_no_execute_written;); DBUG_EXECUTE_IF("exchange_partition_abort_2", DBUG_SUICIDE();); - if (unlikely(ddl_log_write_execute_entry(log_entry->entry_pos, FALSE, - &exec_log_entry))) + if (unlikely(ddl_log_write_execute_entry(log_entry->entry_pos, + &exec_log_entry))) goto err_no_execute_written; /* ddl_log is written and synced */ @@ -457,7 +458,7 @@ err_rename: (void) ddl_log_execute_entry(current_thd, log_entry->entry_pos); mysql_mutex_lock(&LOCK_gdl); /* mark the execute log entry done */ - (void) ddl_log_write_execute_entry(0, TRUE, &exec_log_entry); + (void) ddl_log_disable_execute_entry(&exec_log_entry); /* release the execute log entry */ (void) ddl_log_release_memory_entry(exec_log_entry); err_no_execute_written: diff --git a/sql/sql_rename.cc b/sql/sql_rename.cc index b7aed97a8a2..2b2f32cf126 100644 --- a/sql/sql_rename.cc +++ b/sql/sql_rename.cc @@ -30,17 +30,20 @@ #include "sql_base.h" // tdc_remove_table, lock_table_names, #include "sql_handler.h" // mysql_ha_rm_tables #include "sql_statistics.h" +#include "ddl_log.h" +#include "debug_sync.h" + +/* used to hold table entries for as part of list of renamed temporary tables */ +struct TABLE_PAIR +{ + TABLE_LIST *from, *to; +}; -static TABLE_LIST *rename_tables(THD *thd, TABLE_LIST *table_list, - bool skip_error, bool if_exits, - bool *force_if_exists); -static bool do_rename(THD *thd, TABLE_LIST *ren_table, - const LEX_CSTRING *new_db, - const LEX_CSTRING *new_table_name, - const LEX_CSTRING *new_table_alias, - bool skip_error, bool if_exists, bool *force_if_exists); -static TABLE_LIST *reverse_table_list(TABLE_LIST *table_list); +static bool rename_tables(THD *thd, TABLE_LIST *table_list, + DDL_LOG_STATE *ddl_log_state, + bool skip_error, bool if_exits, + bool *force_if_exists); /* Every two entries in the table_list form a pair of original name and @@ -55,6 +58,7 @@ bool mysql_rename_tables(THD *thd, TABLE_LIST *table_list, bool silent, TABLE_LIST *ren_table= 0; int to_table; const char *rename_log_table[2]= {NULL, NULL}; + DDL_LOG_STATE ddl_log_state; DBUG_ENTER("mysql_rename_tables"); /* @@ -151,32 +155,14 @@ bool mysql_rename_tables(THD *thd, TABLE_LIST *table_list, bool silent, goto err; error=0; + bzero(&ddl_log_state, sizeof(ddl_log_state)); + /* An exclusive lock on table names is satisfactory to ensure no other thread accesses this table. */ - if ((ren_table= rename_tables(thd, table_list, 0, if_exists, - &force_if_exists))) - { - /* Rename didn't succeed; rename back the tables in reverse order */ - TABLE_LIST *table; - - /* Reverse the table list */ - table_list= reverse_table_list(table_list); - - /* Find the last renamed table */ - for (table= table_list; - table->next_local != ren_table ; - table= table->next_local->next_local) ; - table= table->next_local->next_local; // Skip error table - /* Revert to old names */ - rename_tables(thd, table, 1, if_exists, &force_if_exists); - - /* Revert the table list (for prepared statements) */ - table_list= reverse_table_list(table_list); - - error= 1; - } + error= rename_tables(thd, table_list, &ddl_log_state, + 0, if_exists, &force_if_exists); if (likely(!silent && !error)) { @@ -186,49 +172,43 @@ bool mysql_rename_tables(THD *thd, TABLE_LIST *table_list, bool silent, /* Add IF EXISTS to binary log */ thd->variables.option_bits|= OPTION_IF_EXISTS; } + + debug_crash_here("ddl_log_rename_before_binlog"); + /* + Store xid in ddl log and binary log so that we can check on ddl recovery + if the item is in the binary log (and thus the operation was complete + */ + thd->binlog_xid= thd->query_id; + ddl_log_update_xid(&ddl_log_state, thd->binlog_xid); binlog_error= write_bin_log(thd, TRUE, thd->query(), thd->query_length()); + if (binlog_error) + error= 1; + thd->binlog_xid= 0; thd->variables.option_bits= save_option_bits; + debug_crash_here("ddl_log_rename_after_binlog"); if (likely(!binlog_error)) my_ok(thd); } if (likely(!error)) + { query_cache_invalidate3(thd, table_list, 0); + ddl_log_complete(&ddl_log_state); + } + else + { + /* Revert the renames of normal tables with the help of the ddl log */ + ddl_log_revert(thd, &ddl_log_state); + } err: DBUG_RETURN(error || binlog_error); } -/* - reverse table list - - SYNOPSIS - reverse_table_list() - table_list pointer to table _list - - RETURN - pointer to new (reversed) list -*/ -static TABLE_LIST *reverse_table_list(TABLE_LIST *table_list) -{ - TABLE_LIST *prev= 0; - - while (table_list) - { - TABLE_LIST *next= table_list->next_local; - table_list->next_local= prev; - prev= table_list; - table_list= next; - } - return (prev); -} - - static bool -do_rename_temporary(THD *thd, TABLE_LIST *ren_table, TABLE_LIST *new_table, - bool skip_error) +do_rename_temporary(THD *thd, TABLE_LIST *ren_table, TABLE_LIST *new_table) { LEX_CSTRING *new_alias; DBUG_ENTER("do_rename_temporary"); @@ -242,84 +222,129 @@ do_rename_temporary(THD *thd, TABLE_LIST *ren_table, TABLE_LIST *new_table, DBUG_RETURN(1); // This can't be skipped } - DBUG_RETURN(thd->rename_temporary_table(ren_table->table, &new_table->db, new_alias)); } -/* - Rename a single table or a view +/** + Parameters for do_rename +*/ - SYNPOSIS - do_rename() - thd Thread handle - ren_table A table/view to be renamed - new_db The database to which the table to be moved to - new_table_name The new table/view name - new_table_alias The new table/view alias - skip_error Whether to skip error - if_exists Skip error, but only if the table didn't exists - force_if_exists Set to 1 if we have to log the query with 'IF EXISTS' - Otherwise don't touch the value +struct rename_param +{ + LEX_CSTRING old_alias, new_alias; + handlerton *from_table_hton; +}; - DESCRIPTION - Rename a single table or a view. - RETURN - false Ok - true rename failed +/** + check_rename() + + Check pre-conditions for rename + - From table should exists + - To table should not exists. + + SYNOPSIS + @param new_table_name The new table/view name + @param new_table_alias The new table/view alias + @param if_exists If not set, give an error if the table does not + exists. If set, just give a warning in this case. + @return + @retval 0 ok + @retval >0 Error (from table doesn't exists or to table exists) + @retval <0 Can't do rename, but no error */ -static bool -do_rename(THD *thd, TABLE_LIST *ren_table, const LEX_CSTRING *new_db, - const LEX_CSTRING *new_table_name, - const LEX_CSTRING *new_table_alias, - bool skip_error, bool if_exists, bool *force_if_exists) +static int +check_rename(THD *thd, rename_param *param, + TABLE_LIST *ren_table, + const LEX_CSTRING *new_db, + const LEX_CSTRING *new_table_name, + const LEX_CSTRING *new_table_alias, + bool if_exists) { - int rc= 1; - handlerton *hton, *new_hton; - LEX_CSTRING old_alias, new_alias; - DBUG_ENTER("do_rename"); - DBUG_PRINT("enter", ("skip_error: %d if_exists: %d", (int) skip_error, - (int) if_exists)); + DBUG_ENTER("check_rename"); + DBUG_PRINT("enter", ("if_exists: %d", (int) if_exists)); + if (lower_case_table_names == 2) { - old_alias= ren_table->alias; - new_alias= *new_table_alias; + param->old_alias= ren_table->alias; + param->new_alias= *new_table_alias; } else { - old_alias= ren_table->table_name; - new_alias= *new_table_name; + param->old_alias= ren_table->table_name; + param->new_alias= *new_table_name; } - DBUG_ASSERT(new_alias.str); + DBUG_ASSERT(param->new_alias.str); - if (!ha_table_exists(thd, &ren_table->db, &old_alias, &hton) || !hton) + if (!ha_table_exists(thd, &ren_table->db, ¶m->old_alias, + ¶m->from_table_hton) || + !param->from_table_hton) { - my_error(ER_NO_SUCH_TABLE, MYF((skip_error | if_exists) ? ME_NOTE : 0), - ren_table->db.str, old_alias.str); - DBUG_RETURN(skip_error || if_exists ? 0 : 1); + my_error(ER_NO_SUCH_TABLE, MYF(if_exists ? ME_NOTE : 0), + ren_table->db.str, param->old_alias.str); + DBUG_RETURN(if_exists ? -1 : 1); } - if (hton != view_pseudo_hton && - ha_check_if_updates_are_ignored(thd, hton, "RENAME")) + if (param->from_table_hton != view_pseudo_hton && + ha_check_if_updates_are_ignored(thd, param->from_table_hton, "RENAME")) { /* Shared table. Just drop the old .frm as it's not correct anymore Discovery will find the old table when it's accessed */ tdc_remove_table(thd, ren_table->db.str, ren_table->table_name.str); - quick_rm_table(thd, 0, &ren_table->db, &old_alias, FRM_ONLY, 0); - DBUG_RETURN(0); + quick_rm_table(thd, 0, &ren_table->db, ¶m->old_alias, FRM_ONLY, 0); + DBUG_RETURN(-1); } - if (ha_table_exists(thd, new_db, &new_alias, &new_hton)) + if (ha_table_exists(thd, new_db, ¶m->new_alias, 0)) { - my_error(ER_TABLE_EXISTS_ERROR, MYF(0), new_alias.str); + my_error(ER_TABLE_EXISTS_ERROR, MYF(0), param->new_alias.str); DBUG_RETURN(1); // This can't be skipped } + DBUG_RETURN(0); +} + + +/* + Rename a single table or a view + + SYNPOSIS + do_rename() + thd Thread handle + ren_table A table/view to be renamed + new_db The database to which the table to be moved to + skip_error Skip error, but only if the table didn't exists + force_if_exists Set to 1 if we have to log the query with 'IF EXISTS' + Otherwise don't touch the value + + DESCRIPTION + Rename a single table or a view. + In case of failure, all changes will be reverted + + RETURN + false Ok + true rename failed +*/ + +static bool +do_rename(THD *thd, rename_param *param, DDL_LOG_STATE *ddl_log_state, + TABLE_LIST *ren_table, const LEX_CSTRING *new_db, + bool skip_error, bool *force_if_exists) +{ + int rc= 1; + handlerton *hton; + LEX_CSTRING *old_alias, *new_alias; + DBUG_ENTER("do_rename"); + DBUG_PRINT("enter", ("skip_error: %d", (int) skip_error)); + + old_alias= ¶m->old_alias; + new_alias= ¶m->new_alias; + hton= param->from_table_hton; DBUG_ASSERT(!thd->locked_tables_mode); @@ -337,17 +362,36 @@ do_rename(THD *thd, TABLE_LIST *ren_table, const LEX_CSTRING *new_db, *force_if_exists= 1; thd->replication_flags= 0; - if (!(rc= mysql_rename_table(hton, &ren_table->db, &old_alias, - new_db, &new_alias, 0))) + + if (ddl_log_rename_table(thd, ddl_log_state, hton, + &ren_table->db, old_alias, new_db, new_alias)) + DBUG_RETURN(1); + + debug_crash_here("ddl_log_rename_before_rename_table"); + if (!(rc= mysql_rename_table(hton, &ren_table->db, old_alias, + new_db, new_alias, 0))) { - (void) rename_table_in_stat_tables(thd, &ren_table->db, - &ren_table->table_name, - new_db, &new_alias); - if ((rc= Table_triggers_list::change_table_name(thd, &ren_table->db, - &old_alias, - &ren_table->table_name, - new_db, - &new_alias))) + /* Table rename succeded. + It's safe to start recovery at rename trigger phase + */ + debug_crash_here("ddl_log_rename_before_phase_trigger"); + ddl_log_update_phase(ddl_log_state, DDL_RENAME_PHASE_TRIGGER); + + debug_crash_here("ddl_log_rename_before_rename_trigger"); + + if (!(rc= Table_triggers_list::change_table_name(thd, &ren_table->db, + old_alias, + &ren_table->table_name, + new_db, + new_alias))) + { + debug_crash_here("ddl_log_rename_before_stat_tables"); + (void) rename_table_in_stat_tables(thd, &ren_table->db, + &ren_table->table_name, + new_db, new_alias); + debug_crash_here("ddl_log_rename_after_stat_tables"); + } + else { /* We've succeeded in renaming table's .frm and in updating @@ -355,8 +399,12 @@ do_rename(THD *thd, TABLE_LIST *ren_table, const LEX_CSTRING *new_db, triggers appropriately. So let us revert operations on .frm and handler's data and report about failure to rename table. */ - (void) mysql_rename_table(hton, new_db, &new_alias, - &ren_table->db, &old_alias, NO_FK_CHECKS); + debug_crash_here("ddl_log_rename_after_failed_rename_trigger"); + (void) mysql_rename_table(hton, new_db, new_alias, + &ren_table->db, old_alias, NO_FK_CHECKS); + debug_crash_here("ddl_log_rename_after_revert_rename_table"); + ddl_log_disable_entry(ddl_log_state); + debug_crash_here("ddl_log_rename_after_disable_entry"); } } if (thd->replication_flags & OPTION_IF_EXISTS) @@ -371,9 +419,25 @@ do_rename(THD *thd, TABLE_LIST *ren_table, const LEX_CSTRING *new_db, */ if (thd->lex->sql_command != SQLCOM_ALTER_DB_UPGRADE && cmp(&ren_table->db, new_db)) + { my_error(ER_FORBID_SCHEMA_CHANGE, MYF(0), ren_table->db.str, new_db->str); - else - rc= mysql_rename_view(thd, new_db, &new_alias, ren_table); + DBUG_RETURN(1); + } + + ddl_log_rename_view(thd, ddl_log_state, &ren_table->db, + &ren_table->table_name, new_db, new_alias); + debug_crash_here("ddl_log_rename_before_rename_view"); + rc= mysql_rename_view(thd, new_db, new_alias, &ren_table->db, + &ren_table->table_name); + debug_crash_here("ddl_log_rename_after_rename_view"); + if (rc) + { + /* + On error mysql_rename_view() will leave things as such. + */ + ddl_log_disable_entry(ddl_log_state); + debug_crash_here("ddl_log_rename_after_disable_entry"); + } } DBUG_RETURN(rc && !skip_error ? 1 : 0); } @@ -391,6 +455,7 @@ do_rename(THD *thd, TABLE_LIST *ren_table, const LEX_CSTRING *new_db, rename_tables() thd Thread handle table_list List of tables to rename + ddl_log_state ddl logging skip_error Whether to skip errors if_exists Don't give an error if table doesn't exists force_if_exists Set to 1 if we have to log the query with 'IF EXISTS' @@ -403,14 +468,16 @@ do_rename(THD *thd, TABLE_LIST *ren_table, const LEX_CSTRING *new_db, RETURN 0 Ok - table pointer to the table list element which rename failed + 1 error + All tables are reverted to their original names */ -static TABLE_LIST * -rename_tables(THD *thd, TABLE_LIST *table_list, bool skip_error, - bool if_exists, bool *force_if_exists) +static bool +rename_tables(THD *thd, TABLE_LIST *table_list, DDL_LOG_STATE *ddl_log_state, + bool skip_error, bool if_exists, bool *force_if_exists) { TABLE_LIST *ren_table, *new_table; + List<TABLE_PAIR> tmp_tables; DBUG_ENTER("rename_tables"); *force_if_exists= 0; @@ -419,11 +486,48 @@ rename_tables(THD *thd, TABLE_LIST *table_list, bool skip_error, { new_table= ren_table->next_local; - if (is_temporary_table(ren_table) ? - do_rename_temporary(thd, ren_table, new_table, skip_error) : - do_rename(thd, ren_table, &new_table->db, &new_table->table_name, - &new_table->alias, skip_error, if_exists, force_if_exists)) - DBUG_RETURN(ren_table); + if (is_temporary_table(ren_table)) + { + /* + Store renamed temporary tables into a list. + We don't store these in the ddl log to avoid writes and syncs + when only using temporary tables. We don't need the log as + all temporary tables will disappear anyway in a crash. + */ + TABLE_PAIR *pair= (TABLE_PAIR*) thd->alloc(sizeof(*pair)); + if (! pair || tmp_tables.push_front(pair, thd->mem_root)) + goto revert_rename; + pair->from= ren_table; + pair->to= new_table; + + if (do_rename_temporary(thd, ren_table, new_table)) + goto revert_rename; + } + else + { + int error; + rename_param param; + error= check_rename(thd, ¶m, ren_table, &new_table->db, + &new_table->table_name, + &new_table->alias, (skip_error || if_exists)); + if (error < 0) + continue; // Ignore rename (if exists) + if (error > 0) + goto revert_rename; + + if (do_rename(thd, ¶m, ddl_log_state, + ren_table, &new_table->db, + skip_error, force_if_exists)) + goto revert_rename; + } } DBUG_RETURN(0); + +revert_rename: + /* Revert temporary tables. Normal tables are reverted in the caller */ + List_iterator_fast<TABLE_PAIR> it(tmp_tables); + while (TABLE_PAIR *pair= it++) + do_rename_temporary(thd, pair->to, pair->from); + + DBUG_RETURN(1); } diff --git a/sql/sql_table.cc b/sql/sql_table.cc index 9da8ea1e442..11cdfd92923 100644 --- a/sql/sql_table.cc +++ b/sql/sql_table.cc @@ -565,13 +565,16 @@ uint build_table_filename(char *buff, size_t bufflen, const char *db, (void) tablename_to_filename(table_name, tbbuff, sizeof(tbbuff)); char *end = buff + bufflen; - /* Don't add FN_ROOTDIR if mysql_data_home already includes it */ - char *pos = strnmov(buff, mysql_data_home, bufflen); - size_t rootdir_len= strlen(FN_ROOTDIR); - if (pos - rootdir_len >= buff && - memcmp(pos - rootdir_len, FN_ROOTDIR, rootdir_len) != 0) - pos= strnmov(pos, FN_ROOTDIR, end - pos); - pos= strxnmov(pos, end - pos, dbbuff, FN_ROOTDIR, NullS); + char *pos= strnmov(buff, mysql_data_home, bufflen-3); + /* + Add FN_LIBCHAR if mysql_data_home does not include it + In most cases mysql_data_home is just '.' + */ + if (pos[-1] != FN_LIBCHAR) + *pos++= FN_LIBCHAR; + pos= strxnmov(pos, end - 2 - pos, dbbuff,NullS); + *pos++= FN_LIBCHAR; + *pos= 0; #ifdef USE_SYMDIR if (!(flags & SKIP_SYMDIR_ACCESS)) { @@ -621,6 +624,31 @@ uint build_tmptable_filename(THD* thd, char *buff, size_t bufflen) DBUG_RETURN((uint)length); } +/* + Create lower case paths for engines that requires them +*/ + +void build_lower_case_table_filename(char *buff, size_t bufflen, + const LEX_CSTRING *db, + const LEX_CSTRING *table, + uint flags) +{ + char table_name[SAFE_NAME_LEN+1], db_name[SAFE_NAME_LEN+1]; + + DBUG_ASSERT(db->length <= SAFE_NAME_LEN && table->length <= SAFE_NAME_LEN); + + memcpy(db_name, db->str, db->length); + db_name[db->length]= 0; + my_casedn_str(files_charset_info, db_name); + + memcpy(table_name, table->str, table->length); + table_name[table->length]= 0; + my_casedn_str(files_charset_info, table_name); + + build_table_filename(buff, bufflen, db_name, table_name, "", + flags & FN_IS_TMP); +} + /** @brief construct a temporary shadow file name. @@ -4703,10 +4731,8 @@ mysql_rename_table(handlerton *base, const LEX_CSTRING *old_db, const LEX_CSTRING *new_name, uint flags) { THD *thd= current_thd; - char from[FN_REFLEN + 1], to[FN_REFLEN + 1], - lc_from[FN_REFLEN + 1], lc_to[FN_REFLEN + 1]; + char from[FN_REFLEN], to[FN_REFLEN], lc_from[FN_REFLEN], lc_to[FN_REFLEN]; char *from_base= from, *to_base= to; - char tmp_name[SAFE_NAME_LEN+1], tmp_db_name[SAFE_NAME_LEN+1]; handler *file; int error=0; ulonglong save_bits= thd->variables.option_bits; @@ -4728,37 +4754,20 @@ mysql_rename_table(handlerton *base, const LEX_CSTRING *old_db, length= build_table_filename(to, sizeof(to) - 1, new_db->str, new_name->str, "", flags & FN_TO_IS_TMP); // Check if we hit FN_REFLEN bytes along with file extension. - if (length+reg_ext_length > FN_REFLEN) + if (length+reg_ext_length >= FN_REFLEN) { my_error(ER_IDENT_CAUSES_TOO_LONG_PATH, MYF(0), (int) sizeof(to)-1, to); DBUG_RETURN(TRUE); } - /* - If lower_case_table_names == 2 (case-preserving but case-insensitive - file system) and the storage is not HA_FILE_BASED, we need to provide - a lowercase file name, but we leave the .frm in mixed case. - */ - if (lower_case_table_names == 2 && file && - !(file->ha_table_flags() & HA_FILE_BASED)) + if (file->needs_lower_case_filenames()) { - strmov(tmp_name, old_name->str); - my_casedn_str(files_charset_info, tmp_name); - strmov(tmp_db_name, old_db->str); - my_casedn_str(files_charset_info, tmp_db_name); - - build_table_filename(lc_from, sizeof(lc_from) - 1, tmp_db_name, tmp_name, - "", flags & FN_FROM_IS_TMP); + build_lower_case_table_filename(lc_from, sizeof(lc_from) -1, + old_db, old_name, flags & FN_FROM_IS_TMP); + build_lower_case_table_filename(lc_to, sizeof(lc_from) -1, + new_db, new_name, flags & FN_TO_IS_TMP); from_base= lc_from; - - strmov(tmp_name, new_name->str); - my_casedn_str(files_charset_info, tmp_name); - strmov(tmp_db_name, new_db->str); - my_casedn_str(files_charset_info, tmp_db_name); - - build_table_filename(lc_to, sizeof(lc_to) - 1, tmp_db_name, tmp_name, "", - flags & FN_TO_IS_TMP); - to_base= lc_to; + to_base= lc_to; } if (flags & NO_HA_TABLE) diff --git a/sql/sql_table.h b/sql/sql_table.h index 0b247582c7e..720643d7851 100644 --- a/sql/sql_table.h +++ b/sql/sql_table.h @@ -77,6 +77,10 @@ uint build_table_filename(char *buff, size_t bufflen, const char *db, const char *table, const char *ext, uint flags); uint build_table_shadow_filename(char *buff, size_t bufflen, ALTER_PARTITION_PARAM_TYPE *lpt); +void build_lower_case_table_filename(char *buff, size_t bufflen, + const LEX_CSTRING *db, + const LEX_CSTRING *table, + uint flags); uint build_tmptable_filename(THD* thd, char *buff, size_t bufflen); bool mysql_create_table(THD *thd, TABLE_LIST *create_table, Table_specification_st *create_info, diff --git a/sql/sql_trigger.cc b/sql/sql_trigger.cc index e78e4f1d90c..57d5b9a4c37 100644 --- a/sql/sql_trigger.cc +++ b/sql/sql_trigger.cc @@ -2073,7 +2073,9 @@ bool Trigger::change_on_table_name(void* param_arg) @param[in,out] thd Thread context @param[in] db Old database of subject table @param[in] old_alias Old alias of subject table - @param[in] old_table Old name of subject table + @param[in] old_table Old name of subject table. The difference between + old_table and old_alias is that in case of lower_case_table_names + old_table == lowercase(old_alias) @param[in] new_db New database for subject table @param[in] new_table New name of subject table diff --git a/sql/sql_view.cc b/sql/sql_view.cc index 5df5ecb76a9..e6d726b30d7 100644 --- a/sql/sql_view.cc +++ b/sql/sql_view.cc @@ -37,6 +37,7 @@ #include "sql_cte.h" // check_dependencies_in_with_clauses() #include "opt_trace.h" #include "wsrep_mysqld.h" +#include "debug_sync.h" // debug_crash_here #define MD5_BUFF_LENGTH 33 @@ -2175,7 +2176,8 @@ bool mysql_rename_view(THD *thd, const LEX_CSTRING *new_db, const LEX_CSTRING *new_name, - TABLE_LIST *view) + const LEX_CSTRING *old_db, + const LEX_CSTRING *old_name) { LEX_CSTRING pathstr; File_parser *parser; @@ -2185,7 +2187,7 @@ mysql_rename_view(THD *thd, pathstr.str= (char *) path_buff; pathstr.length= build_table_filename(path_buff, sizeof(path_buff) - 1, - view->db.str, view->table_name.str, + old_db->str, old_name->str, reg_ext, 0); if ((parser= sql_parse_prepare(&pathstr, thd->mem_root, 1)) && @@ -2212,9 +2214,10 @@ mysql_rename_view(THD *thd, goto err; /* rename view and it's backups */ - if (rename_in_schema_file(thd, view->db.str, view->table_name.str, + if (rename_in_schema_file(thd, old_db->str, old_name->str, new_db->str, new_name->str)) goto err; + debug_crash_here("rename_view_after_rename_schema_file"); dir.str= dir_buff; dir.length= build_table_filename(dir_buff, sizeof(dir_buff) - 1, @@ -2231,16 +2234,25 @@ mysql_rename_view(THD *thd, (uchar*)&view_def, view_parameters)) { /* restore renamed view in case of error */ - rename_in_schema_file(thd, new_db->str, new_name->str, view->db.str, - view->table_name.str); + rename_in_schema_file(thd, new_db->str, new_name->str, old_db->str, + old_name->str); goto err; } - } else + } + else DBUG_RETURN(1); /* remove cache entries */ - query_cache_invalidate3(thd, view, 0); - sp_cache_invalidate(); + { + char key[NAME_LEN*2+1], *ptr; + memcpy(key, old_db->str, old_db->length); + ptr= key+ old_db->length; + *ptr++= 0; + memcpy(key, old_name->str, old_name->length); + ptr= key+ old_db->length; + *ptr++= 0; + query_cache.invalidate(thd, key, (size_t) (ptr-key), 0); + } error= FALSE; err: diff --git a/sql/sql_view.h b/sql/sql_view.h index c1e5dc49da3..536b5f1b784 100644 --- a/sql/sql_view.h +++ b/sql/sql_view.h @@ -53,8 +53,10 @@ extern TYPELIB updatable_views_with_limit_typelib; bool check_duplicate_names(THD *thd, List<Item>& item_list, bool gen_unique_view_names); -bool mysql_rename_view(THD *thd, const LEX_CSTRING *new_db, const LEX_CSTRING *new_name, - TABLE_LIST *view); +bool mysql_rename_view(THD *thd, const LEX_CSTRING *new_db, + const LEX_CSTRING *new_name, + const LEX_CSTRING *old_db, + const LEX_CSTRING *old_name); void make_valid_column_names(THD *thd, List<Item> &item_list); |