From 111a6fd5be55bb6ab3cfca9b71035762fc058b4c Mon Sep 17 00:00:00 2001 From: unknown Date: Mon, 25 Jun 2007 10:07:46 +0300 Subject: Creation of new log when maria change version added. storage/maria/ma_loghandler.c: Structure and function to read loghandler file data added. Creation of new log when maria change version added. --- storage/maria/ma_loghandler.c | 60 ++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 59 insertions(+), 1 deletion(-) diff --git a/storage/maria/ma_loghandler.c b/storage/maria/ma_loghandler.c index 9ed1d4b9d93..44be624bed0 100644 --- a/storage/maria/ma_loghandler.c +++ b/storage/maria/ma_loghandler.c @@ -635,6 +635,56 @@ static my_bool translog_write_file_header() } +/* + Information from transaction log file header +*/ + +typedef struct st_loghandler_file_info +{ + ulonglong timestamp; /* Time stamp */ + ulong maria_version; /* Version of maria loghandler */ + ulong mysql_versiob; /* Version of mysql server */ + ulong server_id; /* Server ID */ + uint page_size; /* Loghandler page size */ + uint file_number; /* Number of the file (from the file header) */ +} LOGHANDLER_FILE_INFO; + +/* + @brief Read hander file information from last opened loghandler file + + @param desc header information descriptor to be filled with information + + @retval 0 OK + @retval 1 Error +*/ + +my_bool translog_read_file_header(LOGHANDLER_FILE_INFO *desc) +{ + byte page_buff[TRANSLOG_PAGE_SIZE], *ptr; + DBUG_ENTER("translog_read_file_header"); + + if (my_pread(log_descriptor.log_file_num[0], page_buff, + sizeof(page_buff), 0, MYF(MY_FNABP | MY_WME))) + { + DBUG_PRINT("info", ("log read fail error: %d", my_errno)); + DBUG_RETURN(1); + } + ptr= page_buff + sizeof(maria_trans_file_magic); + desc->timestamp= uint8korr(ptr); + ptr+= 8; + desc->maria_version= uint4korr(ptr); + ptr+= 4; + desc->mysql_versiob= uint4korr(ptr); + ptr+= 4; + desc->server_id= uint4korr(ptr); + ptr+= 2; + desc->page_size= uint2korr(ptr); + ptr+= 2; + desc->file_number= uint3korr(ptr); + DBUG_RETURN(0); +} + + /* Initialize transaction log file buffer @@ -1958,6 +2008,7 @@ my_bool translog_init(const char *directory, int old_log_was_recovered= 0, logs_found= 0; uint old_flags= flags; TRANSLOG_ADDRESS sure_page, last_page, last_valid_page; + my_bool version_changed= 0; DBUG_ENTER("translog_init"); loghandler_init(); /* Safe to do many times */ @@ -2201,6 +2252,13 @@ my_bool translog_init(const char *directory, buffer->buffer))); DBUG_EXECUTE("info", translog_check_cursor(&log_descriptor.bc);); } + if (!old_log_was_recovered && old_flags == flags) + { + LOGHANDLER_FILE_INFO info; + if (translog_read_file_header(&info)) + DBUG_RETURN(1); + version_changed= (info.maria_version != TRANSLOG_VERSION_ID); + } } DBUG_PRINT("info", ("Logs found: %d was recovered: %d", logs_found, old_log_was_recovered)); @@ -2221,7 +2279,7 @@ my_bool translog_init(const char *directory, translog_start_buffer(log_descriptor.buffers, &log_descriptor.bc, 0); translog_new_page_header(&log_descriptor.horizon, &log_descriptor.bc); } - else if (old_log_was_recovered || old_flags != flags) + else if (old_log_was_recovered || old_flags != flags || version_changed) { /* leave the damaged file untouched */ log_descriptor.horizon+= LSN_ONE_FILE; -- cgit v1.2.1 From 35536366edbfd8bd1799f031f61e8a9ccb5a50af Mon Sep 17 00:00:00 2001 From: unknown Date: Tue, 26 Jun 2007 16:49:23 +0200 Subject: WL#3072 Maria Recovery - new program maria_read_log to display and apply log records found in a Maria log (see file's revision comment) - minor, misc fixes storage/maria/Makefile.am: new program maria_read_log storage/maria/ha_maria.cc: create control file if missing storage/maria/ma_blockrec.c: 0 -> LSN_IMPOSSIBLE; comments storage/maria/ma_checkpoint.h: preparations for Checkpoint module storage/maria/ma_close.c: comment storage/maria/ma_control_file.c: renaming constants. Possibility to say "open control file but don't create it if it's missing" (used by maria_read_log which does not want to create anything) storage/maria/ma_control_file.h: renaming constants storage/maria/ma_create.c: I had duplicated "linkname" and "linkname_ptr", now I see it's not needed, reverting. Indeed those variables don't contain interesting information; fixing log record accordingly (the links are in ci->data/index_file_name). Storing keystart in log record is needed, to know at which size we must extend the file if we replay LOGREC_CREATE_TABLE. storage/maria/ma_loghandler.c: some structures need to be known to maria_read_log.c, taking them to ma_loghandler.h storage/maria/ma_loghandler.h: we have page_store, adding page_korr. translog_lock() made public, because Checkpoint will need it (to write to control file). Some structures moved from ma_loghandler.c because maria_read_log.c needs them (needs to know the execute-in-REDO-phase hooks of each record). storage/maria/ma_loghandler_lsn.h: constants defined in ma_control_file.h serve everywhere, and they relate to LSNs, so putting them in ma_loghandler_lsn.h. Stronger constraints in LSN_VALID(). storage/maria/ma_pagecache.c: renaming constants storage/maria/ma_recovery.h: copyright storage/maria/ma_test1.c: new prototype storage/maria/ma_test2.c: new prototype storage/maria/trnman_public.h: double-inclusion safe storage/maria/unittest/ma_control_file-t.c: constants renamed, new prototype storage/maria/unittest/ma_test_loghandler-t.c: constants renamed, new prototype storage/maria/unittest/ma_test_loghandler_multigroup-t.c: constants renamed, new prototype storage/maria/unittest/ma_test_loghandler_multithread-t.c: constants renamed, new prototype storage/maria/unittest/ma_test_loghandler_pagecache-t.c: constants renamed, new prototype storage/myisam/mi_close.c: comment storage/maria/maria_read_log.c: program to read and print log records from a Maria transaction log, and optionally apply them to tables. Very basic, early version. Should serve as a base for Recovery's code. Designed to be idempotent. Create a log by running maria.test, then cd to var/master-data and run "maria_read_log --only-display" to see info about records; run "maria_read_log --display-and-apply" to also apply the records to tables (it's more interesting if you first wipe out the tables in var/master-data/test, to see how they get re-created). Only a few records are handled by now: LONG_TRANSACTION_ID, COMMIT, FILE_ID, REDO_CREATE_TABLE; place is ready for REDO_INSERT_ROW_HEAD where I could use Monty's help (search for "Monty" in the file). Note: changes to the index pages, index's header and bitmap pages are not properly logged yet, so don't expect the program to work with that. --- storage/maria/Makefile.am | 8 +- storage/maria/ha_maria.cc | 2 +- storage/maria/ma_blockrec.c | 30 +- storage/maria/ma_checkpoint.h | 67 +- storage/maria/ma_close.c | 1 + storage/maria/ma_control_file.c | 40 +- storage/maria/ma_control_file.h | 23 +- storage/maria/ma_create.c | 48 +- storage/maria/ma_loghandler.c | 231 ++++--- storage/maria/ma_loghandler.h | 99 +++ storage/maria/ma_loghandler_lsn.h | 11 +- storage/maria/ma_pagecache.c | 25 +- storage/maria/ma_recovery.h | 2 +- storage/maria/ma_test1.c | 2 +- storage/maria/ma_test2.c | 2 +- storage/maria/maria_read_log.c | 696 +++++++++++++++++++++ storage/maria/trnman_public.h | 4 + storage/maria/unittest/ma_control_file-t.c | 23 +- storage/maria/unittest/ma_test_loghandler-t.c | 10 +- .../unittest/ma_test_loghandler_multigroup-t.c | 10 +- .../unittest/ma_test_loghandler_multithread-t.c | 4 +- .../unittest/ma_test_loghandler_pagecache-t.c | 2 +- storage/myisam/mi_close.c | 1 + 23 files changed, 1093 insertions(+), 248 deletions(-) create mode 100644 storage/maria/maria_read_log.c diff --git a/storage/maria/Makefile.am b/storage/maria/Makefile.am index fbb25584910..2d11d2f470b 100644 --- a/storage/maria/Makefile.am +++ b/storage/maria/Makefile.am @@ -33,7 +33,7 @@ SUBDIRS = . unittest EXTRA_DIST = ma_test_all.sh ma_test_all.res ma_ft_stem.c CMakeLists.txt plug.in pkgdata_DATA = ma_test_all ma_test_all.res pkglib_LIBRARIES = libmaria.a -bin_PROGRAMS = maria_chk maria_pack maria_ftdump +bin_PROGRAMS = maria_chk maria_pack maria_ftdump maria_read_log maria_chk_DEPENDENCIES= $(LIBRARIES) # Only reason to link with libmyisam.a here is that it's where some fulltext # pieces are (but soon we'll remove fulltext dependencies from Maria). @@ -49,6 +49,12 @@ maria_pack_LDADD= @CLIENT_EXTRA_LDFLAGS@ libmaria.a \ $(top_builddir)/mysys/libmysys.a \ $(top_builddir)/dbug/libdbug.a \ $(top_builddir)/strings/libmystrings.a @ZLIB_LIBS@ +maria_read_log_DEPENDENCIES=$(LIBRARIES) +maria_read_log_LDADD= @CLIENT_EXTRA_LDFLAGS@ libmaria.a \ + $(top_builddir)/storage/myisam/libmyisam.a \ + $(top_builddir)/mysys/libmysys.a \ + $(top_builddir)/dbug/libdbug.a \ + $(top_builddir)/strings/libmystrings.a @ZLIB_LIBS@ noinst_PROGRAMS = ma_test1 ma_test2 ma_test3 ma_rt_test ma_sp_test noinst_HEADERS = maria_def.h ma_rt_index.h ma_rt_key.h ma_rt_mbr.h \ ma_sp_defs.h ma_fulltext.h ma_ftdefs.h ma_ft_test1.h \ diff --git a/storage/maria/ha_maria.cc b/storage/maria/ha_maria.cc index e05f97a384d..24cc6dfb915 100644 --- a/storage/maria/ha_maria.cc +++ b/storage/maria/ha_maria.cc @@ -2241,7 +2241,7 @@ static int ha_maria_init(void *p) maria_hton->flags= HTON_CAN_RECREATE | HTON_SUPPORT_LOG_TABLES; bzero(maria_log_pagecache, sizeof(*maria_log_pagecache)); maria_data_root= mysql_real_data_home; - res= maria_init() || ma_control_file_create_or_open() || + res= maria_init() || ma_control_file_create_or_open(TRUE) || (init_pagecache(maria_log_pagecache, TRANSLOG_PAGECACHE_SIZE, 0, 0, TRANSLOG_PAGE_SIZE) == 0) || diff --git a/storage/maria/ma_blockrec.c b/storage/maria/ma_blockrec.c index d2512f1e025..17ca22390f4 100644 --- a/storage/maria/ma_blockrec.c +++ b/storage/maria/ma_blockrec.c @@ -557,7 +557,8 @@ static my_bool check_if_zero(byte *pos, uint length) SYNOPSIS _ma_unpin_all_pages() info Maria handler - undo_lsn LSN for undo pages. 0 if we shouldn't write undo (error) + undo_lsn LSN for undo pages. LSN_IMPOSSIBLE if we shouldn't write undo + (error) NOTE We unpin pages in the reverse order as they where pinned; This may not @@ -580,14 +581,15 @@ void _ma_unpin_all_pages(MARIA_HA *info, LSN undo_lsn) DBUG_PRINT("info", ("undo_lsn: %lu", (ulong) undo_lsn)); /* True if not disk error */ - DBUG_ASSERT(undo_lsn != 0 || !info->s->base.transactional); + DBUG_ASSERT((undo_lsn != LSN_IMPOSSIBLE) || !info->s->base.transactional); if (!info->s->base.transactional) { /* If this is a transactional table but with transactionality temporarily disabled (like in ALTER TABLE) we need to give a sensible LSN to pages - and not 0. If this is not a transactional table it will reduce to 0. + and not LSN_IMPOSSIBLE. If this is not a transactional table it will + reduce to LSN_IMPOSSIBLE. */ undo_lsn= info->s->state.create_rename_lsn; } @@ -1958,8 +1960,8 @@ static my_bool write_block_record(MARIA_HA *info, size_t data_length= (size_t) (data - row_pos->data); /* Log REDO changes of head page */ - page_store(log_data+ FILEID_STORE_SIZE, head_block->page); - dirpos_store(log_data+ FILEID_STORE_SIZE + PAGE_STORE_SIZE, + page_store(log_data + FILEID_STORE_SIZE, head_block->page); + dirpos_store(log_data + FILEID_STORE_SIZE + PAGE_STORE_SIZE, row_pos->rownr); log_array[TRANSLOG_INTERNAL_PARTS + 0].str= (char*) log_data; log_array[TRANSLOG_INTERNAL_PARTS + 0].length= sizeof(log_data); @@ -2183,12 +2185,22 @@ crashed: disk_err: /** @todo RECOVERY we are going to let dirty pages go to disk while we have - logged UNDO, this violates WAL. If we have not written any full pages, - all dirty pages are pinned so we could just delete them from the - pagecache. Moreover, we have written some REDOs without a closing UNDO, + logged UNDO, this violates WAL. We must mark the table corrupted! + + @todo RECOVERY we have written some REDOs without a closing UNDO, it's possible that a next operation by this transaction succeeds and then Recovery would glue the "orphan REDOs" to the succeeded operation and - execute the failed REDOs. + execute the failed REDOs. We need some mark "abort this group" in the + log, or mark the table corrupted (then user will repair it and thus REDOs + will be skipped). + + @todo RECOVERY to not let write errors go unnoticed, pagecache_write() + should take a MARIA_HA* in argument, and it it + fails when flushing a page to disk it should call + (*the_maria_ha->write_error_func)(the_maria_ha) + and this hook will mark the table corrupted. + Maybe hook should be stored in the pagecache's block structure, or in a + hash "file->maria_ha*". */ /* Unpin all pinned pages to not cause problems for disk cache */ _ma_unpin_all_pages(info, 0); diff --git a/storage/maria/ma_checkpoint.h b/storage/maria/ma_checkpoint.h index 1ce2ccb7012..c011c8234b7 100644 --- a/storage/maria/ma_checkpoint.h +++ b/storage/maria/ma_checkpoint.h @@ -1,4 +1,4 @@ -/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB +/* Copyright (C) 2006,2007 MySQL AB This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -21,14 +21,61 @@ /* This is the interface of this module. */ -typedef enum enum_checkpoint_level { - NONE=-1, - INDIRECT, /* just write dirty_pages, transactions table and sync files */ - MEDIUM, /* also flush all dirty pages which were already dirty at prev checkpoint*/ - FULL /* also flush all dirty pages */ +typedef enum enum_ma_checkpoint_level { + CHECKPOINT_NONE= 0, + /* just write dirty_pages, transactions table and sync files */ + CHECKPOINT_INDIRECT, + /* also flush all dirty pages which were already dirty at prev checkpoint */ + CHECKPOINT_MEDIUM, + /* also flush all dirty pages */ + CHECKPOINT_FULL } CHECKPOINT_LEVEL; -void request_asynchronous_checkpoint(CHECKPOINT_LEVEL level); -my_bool execute_synchronous_checkpoint(CHECKPOINT_LEVEL level); -my_bool execute_asynchronous_checkpoint_if_any(); -/* that's all that's needed in the interface */ +C_MODE_START +int ma_checkpoint_init(); +void ma_checkpoint_end(); +int ma_checkpoint_execute(CHECKPOINT_LEVEL level, my_bool no_wait); +C_MODE_END + +/** + @brief reads some LSNs with special trickery + + If a 64-bit variable transitions between both halves being zero to both + halves being non-zero, and back, this function can be used to do a read of + it (without mutex, without atomic load) which always produces a correct + (though maybe slightly old) value (even on 32-bit CPUs). The value is at + least as new as the latest mutex unlock done by the calling thread. + The assumption is that the system sets both 4-byte halves either at the + same time, or one after the other (in any order), but NOT some bytes of the + first half then some bytes of the second half then the rest of bytes of the + first half. With this assumption, the function can detect when it is + seeing an inconsistent value. + + @param LSN pointer to the LSN variable to read + + @return LSN part (most significant byte always 0) +*/ +#if ( SIZEOF_CHARP >= 8 ) +/* 64-bit CPU, 64-bit reads are atomic */ +#define lsn_read_non_atomic LSN_WITH_FLAGS_TO_LSN +#else +static inline LSN lsn_read_non_atomic_32(const volatile LSN *x) +{ + /* + 32-bit CPU, 64-bit reads may give a mixed of old half and new half (old + low bits and new high bits, or the contrary). + */ + for (;;) /* loop until no atomicity problems */ + { + /* + Remove most significant byte in case this is a LSN_WITH_FLAGS object. + Those flags in TRN::first_undo_lsn break the condition on transitions so + they must be removed below. + */ + LSN y= LSN_WITH_FLAGS_TO_LSN(*x); + if (likely((y == LSN_IMPOSSIBLE) || LSN_VALID(y))) + return y; + } +} +#define lsn_read_non_atomic(x) lsn_read_non_atomic_32(&x) +#endif diff --git a/storage/maria/ma_close.c b/storage/maria/ma_close.c index 34c1bfb4d6d..fdee50f6fde 100644 --- a/storage/maria/ma_close.c +++ b/storage/maria/ma_close.c @@ -85,6 +85,7 @@ int maria_close(register MARIA_HA *info) not change the crashed state. We can NOT write the state in other cases as other threads may be using the file at this point + IF using --external-locking, which does not apply to Maria. */ if (share->mode != O_RDONLY && maria_is_crashed(info)) _ma_state_info_write(share->kfile.file, &share->state, 1); diff --git a/storage/maria/ma_control_file.c b/storage/maria/ma_control_file.c index db5440dc873..66f0c37f4a3 100644 --- a/storage/maria/ma_control_file.c +++ b/storage/maria/ma_control_file.c @@ -40,15 +40,9 @@ #define CONTROL_FILE_FILENO_SIZE 4 #define CONTROL_FILE_SIZE (CONTROL_FILE_FILENO_OFFSET + CONTROL_FILE_FILENO_SIZE) -/* - This module owns these two vars. - uint32 is always atomically updated, but LSN is 8 bytes, we will need - provisions to ensure that it's updated atomically in - ma_control_file_write_and_force(). Probably the log mutex could be - used. TODO. -*/ -LSN last_checkpoint_lsn; -uint32 last_logno; +/* This module owns these two vars. */ +LSN last_checkpoint_lsn= LSN_IMPOSSIBLE; +uint32 last_logno= FILENO_IMPOSSIBLE; /** @brief If log's lock should be asserted when writing to control file. @@ -65,16 +59,16 @@ my_bool maria_multi_threaded= FALSE; static int control_file_fd= -1; /* - Initialize control file subsystem - - SYNOPSIS - ma_control_file_create_or_open() + @brief Initialize control file subsystem - Looks for the control file. If absent, it's a fresh start, creates file. + Looks for the control file. If none and creation is requested, creates file. If present, reads it to find out last checkpoint's LSN and last log, updates the last_checkpoint_lsn and last_logno global variables. Called at engine's start. + @param create_if_missing + + @note The format of the control file is: 4 bytes: magic string 4 bytes: checksum of the following bytes @@ -82,11 +76,11 @@ static int control_file_fd= -1; 4 bytes: offset in log where last checkpoint is 4 bytes: number of last log - RETURN - 0 - OK - 1 - Error (in which case the file is left closed) + @return Operation status + @retval 0 OK + @retval 1 Error (in which case the file is left closed) */ -CONTROL_FILE_ERROR ma_control_file_create_or_open() +CONTROL_FILE_ERROR ma_control_file_create_or_open(my_bool create_if_missing) { char buffer[CONTROL_FILE_SIZE]; char name[FN_REFLEN]; @@ -115,6 +109,8 @@ CONTROL_FILE_ERROR ma_control_file_create_or_open() if (create_file) { + if (!create_if_missing) + DBUG_RETURN(CONTROL_FILE_MISSING); if ((control_file_fd= my_create(name, 0, open_flags, MYF(MY_SYNC_DIR))) < 0) DBUG_RETURN(CONTROL_FILE_UNKNOWN_ERROR); @@ -136,8 +132,8 @@ CONTROL_FILE_ERROR ma_control_file_create_or_open() */ /* init the file with these "undefined" values */ - DBUG_RETURN(ma_control_file_write_and_force(CONTROL_FILE_IMPOSSIBLE_LSN, - CONTROL_FILE_IMPOSSIBLE_FILENO, + DBUG_RETURN(ma_control_file_write_and_force(LSN_IMPOSSIBLE, + FILENO_IMPOSSIBLE, CONTROL_FILE_UPDATE_ALL)); } @@ -315,8 +311,8 @@ int ma_control_file_end() As this module owns these variables, closing the module forbids access to them (just a safety): */ - last_checkpoint_lsn= CONTROL_FILE_IMPOSSIBLE_LSN; - last_logno= CONTROL_FILE_IMPOSSIBLE_FILENO; + last_checkpoint_lsn= LSN_IMPOSSIBLE; + last_logno= FILENO_IMPOSSIBLE; DBUG_RETURN(close_error); } diff --git a/storage/maria/ma_control_file.h b/storage/maria/ma_control_file.h index c974838684b..fa4ec442e41 100644 --- a/storage/maria/ma_control_file.h +++ b/storage/maria/ma_control_file.h @@ -19,27 +19,17 @@ */ #define CONTROL_FILE_BASE_NAME "maria_control" -/* - indicate absence of the log file number; first log is always number 1, 0 is - impossible. -*/ -#define CONTROL_FILE_IMPOSSIBLE_FILENO 0 -/* logs always have a header */ -#define CONTROL_FILE_IMPOSSIBLE_LOG_OFFSET 0 -/* indicate absence of LSN. */ -#define CONTROL_FILE_IMPOSSIBLE_LSN ((LSN)0) /* Here is the interface of this module */ /* LSN of the last checkoint - (if last_checkpoint_lsn == CONTROL_FILE_IMPOSSIBLE_LSN - then there was never a checkpoint) + (if last_checkpoint_lsn == LSN_IMPOSSIBLE then there was never a checkpoint) */ extern LSN last_checkpoint_lsn; /* - Last log number (if last_logno == - CONTROL_FILE_IMPOSSIBLE_FILENO then there is no log file yet) + Last log number (if last_logno == FILENO_IMPOSSIBLE then there is no log + file yet) */ extern uint32 last_logno; @@ -51,6 +41,7 @@ typedef enum enum_control_file_error { CONTROL_FILE_TOO_BIG, CONTROL_FILE_BAD_MAGIC_STRING, CONTROL_FILE_BAD_CHECKSUM, + CONTROL_FILE_MISSING, CONTROL_FILE_UNKNOWN_ERROR /* any other error */ } CONTROL_FILE_ERROR; @@ -63,11 +54,11 @@ extern "C" { #endif /* - Looks for the control file. If absent, it's a fresh start, create file. - If present, read it to find out last checkpoint's LSN and last log. + Looks for the control file. If none and creation was requested, creates file. + If present, reads it to find out last checkpoint's LSN and last log. Called at engine's start. */ -CONTROL_FILE_ERROR ma_control_file_create_or_open(); +CONTROL_FILE_ERROR ma_control_file_create_or_open(my_bool); /* Write information durably to the control file. Called when we have created a new log (after syncing this log's creation) diff --git a/storage/maria/ma_create.c b/storage/maria/ma_create.c index 53e15deb74b..22b490c907c 100644 --- a/storage/maria/ma_create.c +++ b/storage/maria/ma_create.c @@ -52,8 +52,7 @@ int maria_create(const char *name, enum data_file_type datafile_type, unique_key_parts,fulltext_keys,offset, not_block_record_extra_length; uint max_field_lengths, extra_header_size; ulong reclength, real_reclength,min_pack_length; - char filename[FN_REFLEN], dlinkname[FN_REFLEN], *dlinkname_ptr= NULL, - klinkname[FN_REFLEN], *klinkname_ptr= NULL; + char filename[FN_REFLEN], linkname[FN_REFLEN], *linkname_ptr; ulong pack_reclength; ulonglong tot_length,max_rows, tmp; enum en_fieldtype type; @@ -628,7 +627,7 @@ int maria_create(const char *name, enum data_file_type datafile_type, share.state.dellink = HA_OFFSET_ERROR; share.state.first_bitmap_with_space= 0; - share.state.create_rename_lsn= 0; + share.state.create_rename_lsn= LSN_IMPOSSIBLE; share.state.process= (ulong) getpid(); share.state.unique= (ulong) 0; share.state.update_count=(ulong) 0; @@ -721,9 +720,9 @@ int maria_create(const char *name, enum data_file_type datafile_type, MY_UNPACK_FILENAME | (have_iext ? MY_REPLACE_EXT : MY_APPEND_EXT)); } - fn_format(klinkname, name, "", MARIA_NAME_IEXT, + fn_format(linkname, name, "", MARIA_NAME_IEXT, MY_UNPACK_FILENAME|MY_APPEND_EXT); - klinkname_ptr= klinkname; + linkname_ptr= linkname; /* Don't create the table if the link or file exists to ensure that one doesn't accidently destroy another table. @@ -739,6 +738,7 @@ int maria_create(const char *name, enum data_file_type datafile_type, (MY_UNPACK_FILENAME | (flags & HA_DONT_TOUCH_DATA) ? MY_RETURN_REAL_PATH : 0) | MY_APPEND_EXT); + linkname_ptr= NULL; /* Replace the current file. Don't sync dir now if the data file has the same path. @@ -761,7 +761,7 @@ int maria_create(const char *name, enum data_file_type datafile_type, goto err; } - if ((file= my_create_with_symlink(klinkname_ptr, filename, 0, create_mode, + if ((file= my_create_with_symlink(linkname_ptr, filename, 0, create_mode, MYF(MY_WME|create_flag))) < 0) goto err; errpos=1; @@ -788,19 +788,20 @@ int maria_create(const char *name, enum data_file_type datafile_type, MY_UNPACK_FILENAME | (have_dext ? MY_REPLACE_EXT : MY_APPEND_EXT)); } - fn_format(dlinkname, name, "",MARIA_NAME_DEXT, + fn_format(linkname, name, "",MARIA_NAME_DEXT, MY_UNPACK_FILENAME | MY_APPEND_EXT); - dlinkname_ptr= dlinkname; + linkname_ptr= linkname; create_flag=0; } else { fn_format(filename,name,"", MARIA_NAME_DEXT, MY_UNPACK_FILENAME | MY_APPEND_EXT); + linkname_ptr= NULL; create_flag=MY_DELETE_OLD; } if ((dfile= - my_create_with_symlink(dlinkname_ptr, filename, 0, create_mode, + my_create_with_symlink(linkname_ptr, filename, 0, create_mode, MYF(MY_WME | create_flag | sync_dir))) < 0) goto err; errpos=3; @@ -948,15 +949,15 @@ int maria_create(const char *name, enum data_file_type datafile_type, not log 1 KB of mostly zeroes if this is a small table. */ char empty_string[]= ""; - LEX_STRING log_array[TRANSLOG_INTERNAL_PARTS + 3]; + LEX_STRING log_array[TRANSLOG_INTERNAL_PARTS + 4]; uint total_rec_length= 0; uint i; - log_array[TRANSLOG_INTERNAL_PARTS + 0].length= 1 + 2 + + log_array[TRANSLOG_INTERNAL_PARTS + 1].length= 1 + 2 + 2 + kfile_size_before_extension; /* we are needing maybe 64 kB, so don't use the stack */ - log_data= my_malloc(log_array[TRANSLOG_INTERNAL_PARTS + 0].length, MYF(0)); + log_data= my_malloc(log_array[TRANSLOG_INTERNAL_PARTS + 1].length, MYF(0)); if ((log_data == NULL) || - my_pread(file, 1 + 2 + log_data, kfile_size_before_extension, + my_pread(file, 1 + 2 + 2 + log_data, kfile_size_before_extension, 0, MYF(MY_NABP))) goto err_no_lock; /* @@ -965,16 +966,21 @@ int maria_create(const char *name, enum data_file_type datafile_type, */ log_data[0]= test(flags & HA_DONT_TOUCH_DATA); int2store(log_data + 1, kfile_size_before_extension); - log_array[TRANSLOG_INTERNAL_PARTS + 0].str= log_data; + int2store(log_data + 1 + 2, share.base.keystart); + log_array[TRANSLOG_INTERNAL_PARTS + 0].str= (char *)name; + /* we store the end-zero, for Recovery to just pass it to my_create() */ + log_array[TRANSLOG_INTERNAL_PARTS + 0].length= + strlen(log_array[TRANSLOG_INTERNAL_PARTS + 0].str) + 1; + log_array[TRANSLOG_INTERNAL_PARTS + 1].str= log_data; /* symlink description is also needed for re-creation by Recovery: */ - log_array[TRANSLOG_INTERNAL_PARTS + 1].str= - dlinkname_ptr ? dlinkname : empty_string; - log_array[TRANSLOG_INTERNAL_PARTS + 1].length= - strlen(log_array[TRANSLOG_INTERNAL_PARTS + 1].str); - log_array[TRANSLOG_INTERNAL_PARTS + 2].str= - klinkname_ptr ? klinkname : empty_string; + log_array[TRANSLOG_INTERNAL_PARTS + 2].str= (char *) + (ci->data_file_name ? ci->data_file_name : empty_string); log_array[TRANSLOG_INTERNAL_PARTS + 2].length= - strlen(log_array[TRANSLOG_INTERNAL_PARTS + 2].str); + strlen(log_array[TRANSLOG_INTERNAL_PARTS + 2].str) + 1; + log_array[TRANSLOG_INTERNAL_PARTS + 3].str= (char *) + (ci->index_file_name ? ci->index_file_name : empty_string); + log_array[TRANSLOG_INTERNAL_PARTS + 3].length= + strlen(log_array[TRANSLOG_INTERNAL_PARTS + 3].str) + 1; for (i= TRANSLOG_INTERNAL_PARTS; i < (sizeof(log_array)/sizeof(log_array[0])); i++) total_rec_length+= log_array[i].length; diff --git a/storage/maria/ma_loghandler.c b/storage/maria/ma_loghandler.c index 44be624bed0..6f238ef4055 100644 --- a/storage/maria/ma_loghandler.c +++ b/storage/maria/ma_loghandler.c @@ -61,21 +61,6 @@ #define COMPRESSED_LSN_MAX_STORE_SIZE (2 + LSN_STORE_SIZE) #define MAX_NUMBER_OF_LSNS_PER_RECORD 2 -/* record parts descriptor */ -struct st_translog_parts -{ - /* full record length */ - translog_size_t record_length; - /* full record length with chunk headers */ - translog_size_t total_record_length; - /* current part index */ - uint current; - /* total number of elements in parts */ - uint elements; - /* array of parts (LEX_STRING) */ - LEX_STRING *parts; -}; - /* log write buffer descriptor */ struct st_translog_buffer { @@ -176,15 +161,6 @@ static byte end_of_log= 0; my_bool translog_inited= 0; -/* record classes */ -enum record_class -{ - LOGRECTYPE_NOT_ALLOWED, - LOGRECTYPE_VARIABLE_LENGTH, - LOGRECTYPE_PSEUDOFIXEDLENGTH, - LOGRECTYPE_FIXEDLENGTH -}; - /* chunk types */ #define TRANSLOG_CHUNK_LSN 0x00 /* 0 chunk refer as LSN (head or tail */ #define TRANSLOG_CHUNK_FIXED (1 << 6) /* 1 (pseudo)fixed record (also LSN) */ @@ -196,46 +172,6 @@ enum record_class /* compressed (relative) LSN constants */ #define TRANSLOG_CLSN_LEN_BITS 0xC0 /* Mask to get compressed LSN length */ -typedef my_bool(*prewrite_rec_hook) (enum translog_record_type type, - TRN *trn, struct st_maria_share *share, - struct st_translog_parts *parts); - -typedef my_bool(*inwrite_rec_hook) (enum translog_record_type type, - TRN *trn, - LSN *lsn, - struct st_translog_parts *parts); - -typedef uint16(*read_rec_hook) (enum translog_record_type type, - uint16 read_length, uchar *read_buff, - byte *decoded_buff); - -/* - Descriptor of log record type - Note: Don't reorder because of constructs later... -*/ -struct st_log_record_type_descriptor -{ - /* internal class of the record */ - enum record_class class; - /* - length for fixed-size record, pseudo-fixed record - length with uncompressed LSNs - */ - uint16 fixed_length; - /* how much record body (belonged to headers too) read with headers */ - uint16 read_header_len; - /* HOOK for writing the record called before lock */ - prewrite_rec_hook prewrite_hook; - /* HOOK for writing the record called when LSN is known, inside lock */ - inwrite_rec_hook inwrite_hook; - /* HOOK for reading headers */ - read_rec_hook read_hook; - /* - For pseudo fixed records number of compressed LSNs followed by - system header - */ - int16 compressed_LSN; -}; #include @@ -257,27 +193,32 @@ static my_bool write_hook_for_undo(enum translog_record_type type, NOTE that after first public Maria release, these can NOT be changed */ -typedef struct st_log_record_type_descriptor LOG_DESC; -static LOG_DESC log_record_type_descriptor[LOGREC_NUMBER_OF_TYPES]; +LOG_DESC log_record_type_descriptor[LOGREC_NUMBER_OF_TYPES]; static LOG_DESC INIT_LOGREC_FIXED_RECORD_0LSN_EXAMPLE= -{LOGRECTYPE_FIXEDLENGTH, 6, 6, NULL, NULL, NULL, 0}; +{LOGRECTYPE_FIXEDLENGTH, 6, 6, NULL, NULL, NULL, 0, + "fixed0example", FALSE, NULL, NULL}; static LOG_DESC INIT_LOGREC_VARIABLE_RECORD_0LSN_EXAMPLE= -{LOGRECTYPE_VARIABLE_LENGTH, 0, 9, NULL, NULL, NULL, 0}; +{LOGRECTYPE_VARIABLE_LENGTH, 0, 9, NULL, NULL, NULL, 0, +"variable0example", FALSE, NULL, NULL}; static LOG_DESC INIT_LOGREC_FIXED_RECORD_1LSN_EXAMPLE= -{LOGRECTYPE_PSEUDOFIXEDLENGTH, 7, 7, NULL, NULL, NULL, 1}; +{LOGRECTYPE_PSEUDOFIXEDLENGTH, 7, 7, NULL, NULL, NULL, 1, +"fixed1example", FALSE, NULL, NULL}; static LOG_DESC INIT_LOGREC_VARIABLE_RECORD_1LSN_EXAMPLE= -{LOGRECTYPE_VARIABLE_LENGTH, 0, 12, NULL, NULL, NULL, 1}; +{LOGRECTYPE_VARIABLE_LENGTH, 0, 12, NULL, NULL, NULL, 1, +"variable1example", FALSE, NULL, NULL}; static LOG_DESC INIT_LOGREC_FIXED_RECORD_2LSN_EXAMPLE= -{LOGRECTYPE_PSEUDOFIXEDLENGTH, 23, 23, NULL, NULL, NULL, 2}; +{LOGRECTYPE_PSEUDOFIXEDLENGTH, 23, 23, NULL, NULL, NULL, 2, +"fixed2example", FALSE, NULL, NULL}; static LOG_DESC INIT_LOGREC_VARIABLE_RECORD_2LSN_EXAMPLE= -{LOGRECTYPE_VARIABLE_LENGTH, 0, 19, NULL, NULL, NULL, 2}; +{LOGRECTYPE_VARIABLE_LENGTH, 0, 19, NULL, NULL, NULL, 2, +"variable2example", FALSE, NULL, NULL}; void example_loghandler_init() @@ -298,126 +239,158 @@ void example_loghandler_init() static LOG_DESC INIT_LOGREC_RESERVED_FOR_CHUNKS23= -{LOGRECTYPE_NOT_ALLOWED, 0, 0, NULL, NULL, NULL, 0 }; +{LOGRECTYPE_NOT_ALLOWED, 0, 0, NULL, NULL, NULL, 0, + "reserved", FALSE, NULL, NULL }; static LOG_DESC INIT_LOGREC_REDO_INSERT_ROW_HEAD= {LOGRECTYPE_VARIABLE_LENGTH, 0, FILEID_STORE_SIZE + PAGE_STORE_SIZE + DIRPOS_STORE_SIZE, NULL, - write_hook_for_redo, NULL, 0}; + write_hook_for_redo, NULL, 0, + "redo_insert_row_head", FALSE, NULL, NULL}; static LOG_DESC INIT_LOGREC_REDO_INSERT_ROW_TAIL= {LOGRECTYPE_VARIABLE_LENGTH, 0, FILEID_STORE_SIZE + PAGE_STORE_SIZE + DIRPOS_STORE_SIZE, NULL, - write_hook_for_redo, NULL, 0}; + write_hook_for_redo, NULL, 0, + "redo_insert_row_tail", FALSE, NULL, NULL}; static LOG_DESC INIT_LOGREC_REDO_INSERT_ROW_BLOB= -{LOGRECTYPE_VARIABLE_LENGTH, 0, 8, NULL, write_hook_for_redo, NULL, 0}; +{LOGRECTYPE_VARIABLE_LENGTH, 0, 8, NULL, write_hook_for_redo, NULL, 0, + "redo_insert_row_blob", FALSE, NULL, NULL}; /*QQQ:TODO:header???*/ static LOG_DESC INIT_LOGREC_REDO_INSERT_ROW_BLOBS= -{LOGRECTYPE_VARIABLE_LENGTH, 0, 0, NULL, write_hook_for_redo, NULL, 0}; +{LOGRECTYPE_VARIABLE_LENGTH, 0, FILEID_STORE_SIZE, NULL, + write_hook_for_redo, NULL, 0, + "redo_insert_row_blobs", FALSE, NULL, NULL}; static LOG_DESC INIT_LOGREC_REDO_PURGE_ROW_HEAD= {LOGRECTYPE_FIXEDLENGTH, FILEID_STORE_SIZE + PAGE_STORE_SIZE + DIRPOS_STORE_SIZE, FILEID_STORE_SIZE + PAGE_STORE_SIZE + DIRPOS_STORE_SIZE, - NULL, write_hook_for_redo, NULL, 0}; + NULL, write_hook_for_redo, NULL, 0, + "redo_purge_row_head", FALSE, NULL, NULL}; static LOG_DESC INIT_LOGREC_REDO_PURGE_ROW_TAIL= {LOGRECTYPE_FIXEDLENGTH, FILEID_STORE_SIZE + PAGE_STORE_SIZE + DIRPOS_STORE_SIZE, FILEID_STORE_SIZE + PAGE_STORE_SIZE + DIRPOS_STORE_SIZE, - NULL, write_hook_for_redo, NULL, 0}; + NULL, write_hook_for_redo, NULL, 0, + "redo_purge_row_tail", FALSE, NULL, NULL}; /* QQQ: TODO: variable and fixed size??? */ static LOG_DESC INIT_LOGREC_REDO_PURGE_BLOCKS= {LOGRECTYPE_VARIABLE_LENGTH, 0, - FILEID_STORE_SIZE + PAGERANGE_STORE_SIZE + PAGE_STORE_SIZE + - PAGERANGE_STORE_SIZE, - NULL, write_hook_for_redo, NULL, 0}; + FILEID_STORE_SIZE + PAGERANGE_STORE_SIZE, + NULL, write_hook_for_redo, NULL, 0, + "redo_purge_blocks", FALSE, NULL, NULL}; static LOG_DESC INIT_LOGREC_REDO_DELETE_ROW= -{LOGRECTYPE_FIXEDLENGTH, 16, 16, NULL, write_hook_for_redo, NULL, 0}; +{LOGRECTYPE_FIXEDLENGTH, 16, 16, NULL, write_hook_for_redo, NULL, 0, + "redo_delete_row", FALSE, NULL, NULL}; static LOG_DESC INIT_LOGREC_REDO_UPDATE_ROW_HEAD= -{LOGRECTYPE_VARIABLE_LENGTH, 0, 9, NULL, write_hook_for_redo, NULL, 0}; +{LOGRECTYPE_VARIABLE_LENGTH, 0, 9, NULL, write_hook_for_redo, NULL, 0, + "redo_update_row_head", FALSE, NULL, NULL}; static LOG_DESC INIT_LOGREC_REDO_INDEX= -{LOGRECTYPE_VARIABLE_LENGTH, 0, 9, NULL, write_hook_for_redo, NULL, 0}; +{LOGRECTYPE_VARIABLE_LENGTH, 0, 9, NULL, write_hook_for_redo, NULL, 0, + "redo_index", FALSE, NULL, NULL}; static LOG_DESC INIT_LOGREC_REDO_UNDELETE_ROW= -{LOGRECTYPE_FIXEDLENGTH, 16, 16, NULL, write_hook_for_redo, NULL, 0}; +{LOGRECTYPE_FIXEDLENGTH, 16, 16, NULL, write_hook_for_redo, NULL, 0, + "redo_undelete_row", FALSE, NULL, NULL}; static LOG_DESC INIT_LOGREC_CLR_END= -{LOGRECTYPE_PSEUDOFIXEDLENGTH, 5, 5, NULL, write_hook_for_redo, NULL, 1}; +{LOGRECTYPE_PSEUDOFIXEDLENGTH, 5, 5, NULL, write_hook_for_redo, NULL, 1, + "clr_end", TRUE, NULL, NULL}; static LOG_DESC INIT_LOGREC_PURGE_END= -{LOGRECTYPE_PSEUDOFIXEDLENGTH, 5, 5, NULL, NULL, NULL, 1}; +{LOGRECTYPE_PSEUDOFIXEDLENGTH, 5, 5, NULL, NULL, NULL, 1, + "purge_end", TRUE, NULL, NULL}; static LOG_DESC INIT_LOGREC_UNDO_ROW_INSERT= {LOGRECTYPE_FIXEDLENGTH, LSN_STORE_SIZE + FILEID_STORE_SIZE + PAGE_STORE_SIZE + DIRPOS_STORE_SIZE, LSN_STORE_SIZE + FILEID_STORE_SIZE + PAGE_STORE_SIZE + DIRPOS_STORE_SIZE, - NULL, write_hook_for_undo, NULL, 0}; + NULL, write_hook_for_undo, NULL, 0, + "undo_row_insert", TRUE, NULL, NULL}; static LOG_DESC INIT_LOGREC_UNDO_ROW_DELETE= {LOGRECTYPE_VARIABLE_LENGTH, 0, LSN_STORE_SIZE + FILEID_STORE_SIZE + PAGE_STORE_SIZE + DIRPOS_STORE_SIZE, - NULL, write_hook_for_undo, NULL, 0}; + NULL, write_hook_for_undo, NULL, 0, + "undo_row_delete", TRUE, NULL, NULL}; static LOG_DESC INIT_LOGREC_UNDO_ROW_UPDATE= {LOGRECTYPE_VARIABLE_LENGTH, 0, LSN_STORE_SIZE + FILEID_STORE_SIZE + PAGE_STORE_SIZE + DIRPOS_STORE_SIZE, - NULL, write_hook_for_undo, NULL, 1}; + NULL, write_hook_for_undo, NULL, 1, + "undo_row_update", TRUE, NULL, NULL}; static LOG_DESC INIT_LOGREC_UNDO_ROW_PURGE= {LOGRECTYPE_PSEUDOFIXEDLENGTH, LSN_STORE_SIZE, LSN_STORE_SIZE, - NULL, NULL, NULL, 1}; + NULL, NULL, NULL, 1, + "undo_row_purge", TRUE, NULL, NULL}; static LOG_DESC INIT_LOGREC_UNDO_KEY_INSERT= -{LOGRECTYPE_VARIABLE_LENGTH, 0, 10, NULL, write_hook_for_undo, NULL, 1}; +{LOGRECTYPE_VARIABLE_LENGTH, 0, 10, NULL, write_hook_for_undo, NULL, 1, + "undo_key_insert", TRUE, NULL, NULL}; static LOG_DESC INIT_LOGREC_UNDO_KEY_DELETE= -{LOGRECTYPE_VARIABLE_LENGTH, 0, 15, NULL, write_hook_for_undo, NULL, 0}; +{LOGRECTYPE_VARIABLE_LENGTH, 0, 15, NULL, write_hook_for_undo, NULL, 0, + "undo_key_delete", TRUE, NULL, NULL}; // QQ: why not compressed? static LOG_DESC INIT_LOGREC_PREPARE= -{LOGRECTYPE_VARIABLE_LENGTH, 0, 0, NULL, NULL, NULL, 0}; +{LOGRECTYPE_VARIABLE_LENGTH, 0, 0, NULL, NULL, NULL, 0, + "prepare", TRUE, NULL, NULL}; static LOG_DESC INIT_LOGREC_PREPARE_WITH_UNDO_PURGE= -{LOGRECTYPE_VARIABLE_LENGTH, 0, 5, NULL, NULL, NULL, 1}; +{LOGRECTYPE_VARIABLE_LENGTH, 0, 5, NULL, NULL, NULL, 1, + "prepare_with_undo_purge", TRUE, NULL, NULL}; static LOG_DESC INIT_LOGREC_COMMIT= -{LOGRECTYPE_FIXEDLENGTH, 0, 0, NULL, NULL, NULL, 0}; +{LOGRECTYPE_FIXEDLENGTH, 0, 0, NULL, NULL, NULL, 0, + "commit", TRUE, NULL, NULL}; static LOG_DESC INIT_LOGREC_COMMIT_WITH_UNDO_PURGE= -{LOGRECTYPE_PSEUDOFIXEDLENGTH, 5, 5, NULL, NULL, NULL, 1}; +{LOGRECTYPE_PSEUDOFIXEDLENGTH, 5, 5, NULL, NULL, NULL, 1, + "commit_with_undo_purge", TRUE, NULL, NULL}; static LOG_DESC INIT_LOGREC_CHECKPOINT= -{LOGRECTYPE_VARIABLE_LENGTH, 0, 0, NULL, NULL, NULL, 0}; +{LOGRECTYPE_VARIABLE_LENGTH, 0, 0, NULL, NULL, NULL, 0, + "checkpoint", TRUE, NULL, NULL}; static LOG_DESC INIT_LOGREC_REDO_CREATE_TABLE= -{LOGRECTYPE_VARIABLE_LENGTH, 0, 0, NULL, NULL, NULL, 0}; +{LOGRECTYPE_VARIABLE_LENGTH, 0, 1 + 2, NULL, NULL, NULL, 0, +"redo_create_table", TRUE, NULL, NULL}; static LOG_DESC INIT_LOGREC_REDO_RENAME_TABLE= -{LOGRECTYPE_VARIABLE_LENGTH, 0, 0, NULL, NULL, NULL, 0}; +{LOGRECTYPE_VARIABLE_LENGTH, 0, 0, NULL, NULL, NULL, 0, + "redo_rename_table", TRUE, NULL, NULL}; static LOG_DESC INIT_LOGREC_REDO_DROP_TABLE= -{LOGRECTYPE_VARIABLE_LENGTH, 0, 0, NULL, NULL, NULL, 0}; +{LOGRECTYPE_VARIABLE_LENGTH, 0, 0, NULL, NULL, NULL, 0, + "redo_drop_table", TRUE, NULL, NULL}; static LOG_DESC INIT_LOGREC_REDO_DELETE_ALL= {LOGRECTYPE_FIXEDLENGTH, FILEID_STORE_SIZE, FILEID_STORE_SIZE, - NULL, NULL, NULL, 0}; + NULL, NULL, NULL, 0, + "redo_delete_all", TRUE, NULL, NULL}; static LOG_DESC INIT_LOGREC_REDO_REPAIR_TABLE= {LOGRECTYPE_FIXEDLENGTH, FILEID_STORE_SIZE + 4, FILEID_STORE_SIZE + 4, - NULL, NULL, NULL, 0}; + NULL, NULL, NULL, 0, + "redo_repair_table", TRUE, NULL, NULL}; static LOG_DESC INIT_LOGREC_FILE_ID= -{LOGRECTYPE_VARIABLE_LENGTH, 0, 4, NULL, NULL, NULL, 0}; +{LOGRECTYPE_VARIABLE_LENGTH, 0, 2, NULL, NULL, NULL, 0, + "file_id", TRUE, NULL, NULL}; static LOG_DESC INIT_LOGREC_LONG_TRANSACTION_ID= -{LOGRECTYPE_FIXEDLENGTH, 6, 6, NULL, NULL, NULL, 0}; +{LOGRECTYPE_FIXEDLENGTH, 6, 6, NULL, NULL, NULL, 0, + "long_transaction_id", TRUE, NULL, NULL}; const myf log_write_flags= MY_WME | MY_NABP | MY_WAIT_IF_FULL; @@ -701,7 +674,7 @@ static my_bool translog_buffer_init(struct st_translog_buffer *buffer) { DBUG_ENTER("translog_buffer_init"); /* This buffer offset */ - buffer->last_lsn= CONTROL_FILE_IMPOSSIBLE_LSN; + buffer->last_lsn= LSN_IMPOSSIBLE; /* This Buffer File */ buffer->file= -1; buffer->overlay= 0; @@ -779,7 +752,7 @@ static my_bool translog_create_new_file() translog_write_file_header()) DBUG_RETURN(1); - if (ma_control_file_write_and_force(CONTROL_FILE_IMPOSSIBLE_LSN, file_no, + if (ma_control_file_write_and_force(LSN_IMPOSSIBLE, file_no, CONTROL_FILE_UPDATE_ONLY_LOGNO)) DBUG_RETURN(1); @@ -1206,7 +1179,7 @@ static void translog_start_buffer(struct st_translog_buffer *buffer, (ulong) LSN_OFFSET(log_descriptor.horizon), (ulong) LSN_OFFSET(log_descriptor.horizon))); DBUG_ASSERT(buffer_no == buffer->buffer_no); - buffer->last_lsn= CONTROL_FILE_IMPOSSIBLE_LSN; + buffer->last_lsn= LSN_IMPOSSIBLE; buffer->offset= log_descriptor.horizon; buffer->file= log_descriptor.log_file_num[0]; buffer->overlay= 0; @@ -2088,7 +2061,7 @@ my_bool translog_init(const char *directory, i, (ulong) log_descriptor.buffers + i)); } - logs_found= (last_logno != CONTROL_FILE_IMPOSSIBLE_FILENO); + logs_found= (last_logno != FILENO_IMPOSSIBLE); if (logs_found) { @@ -2100,7 +2073,7 @@ my_bool translog_init(const char *directory, find the log end */ - if (LSN_FILE_NO(last_checkpoint_lsn) == CONTROL_FILE_IMPOSSIBLE_FILENO) + if (LSN_FILE_NO(last_checkpoint_lsn) == FILENO_IMPOSSIBLE) { DBUG_ASSERT(LSN_OFFSET(last_checkpoint_lsn) == 0); /* there was no checkpoints we will read from the beginning */ @@ -2138,7 +2111,7 @@ my_bool translog_init(const char *directory, /* TODO: check page size */ - last_valid_page= CONTROL_FILE_IMPOSSIBLE_LSN; + last_valid_page= LSN_IMPOSSIBLE; /* scan and validate pages */ do { @@ -2186,7 +2159,7 @@ my_bool translog_init(const char *directory, current_page= LSN_REPLACE_OFFSET(current_page, TRANSLOG_PAGE_SIZE); } while (LSN_FILE_NO(current_page) <= LSN_FILE_NO(last_page) && !old_log_was_recovered); - if (last_valid_page == CONTROL_FILE_IMPOSSIBLE_LSN) + if (last_valid_page == LSN_IMPOSSIBLE) { /* Panic!!! Even page which should be valid is invalid */ /* TODO: issue error */ @@ -2272,7 +2245,7 @@ my_bool translog_init(const char *directory, open_logfile_by_number_no_cache(1)) == -1 || translog_write_file_header()) DBUG_RETURN(1); - if (ma_control_file_write_and_force(CONTROL_FILE_IMPOSSIBLE_LSN, 1, + if (ma_control_file_write_and_force(LSN_IMPOSSIBLE, 1, CONTROL_FILE_UPDATE_ONLY_LOGNO)) DBUG_RETURN(1); /* assign buffer 0 */ @@ -2405,7 +2378,7 @@ void translog_destroy() 1 Error */ -static my_bool translog_lock() +my_bool translog_lock() { struct st_translog_buffer *current_buffer; DBUG_ENTER("translog_lock"); @@ -2438,7 +2411,7 @@ static my_bool translog_lock() 1 Error */ -static inline my_bool translog_unlock() +my_bool translog_unlock() { DBUG_ENTER("translog_unlock"); translog_buffer_unlock(log_descriptor.bc.buffer); @@ -4312,14 +4285,14 @@ my_bool translog_write_record(LSN *lsn, } if (unlikely(!(trn->first_undo_lsn & TRANSACTION_LOGGED_LONG_ID))) { - LSN lsn; + LSN dummy_lsn; LEX_STRING log_array[TRANSLOG_INTERNAL_PARTS + 1]; uchar log_data[6]; int6store(log_data, trn->trid); log_array[TRANSLOG_INTERNAL_PARTS + 0].str= (char*) log_data; log_array[TRANSLOG_INTERNAL_PARTS + 0].length= sizeof(log_data); trn->first_undo_lsn|= TRANSACTION_LOGGED_LONG_ID; /* no recursion */ - if (unlikely(translog_write_record(&lsn, LOGREC_LONG_TRANSACTION_ID, + if (unlikely(translog_write_record(&dummy_lsn, LOGREC_LONG_TRANSACTION_ID, trn, NULL, sizeof(log_data), sizeof(log_array)/sizeof(log_array[0]), log_array, NULL))) @@ -4404,6 +4377,8 @@ my_bool translog_write_record(LSN *lsn, } } + DBUG_PRINT("info", ("LSN: (%lu,0x%lx)", (ulong) LSN_FILE_NO(*lsn), + (ulong) LSN_OFFSET(*lsn))); DBUG_RETURN(rc); } @@ -5093,7 +5068,7 @@ translog_read_record_header_scan(TRANSLOG_SCANNER_DATA - it is like translog_read_record_header, but read next record, so see its NOTES. - in case of end of the log buff->lsn will be set to - (CONTROL_FILE_IMPOSSIBLE_LSN) + (LSN_IMPOSSIBLE) RETURN 0 error @@ -5138,7 +5113,7 @@ translog_size_t translog_read_next_record_header(TRANSLOG_SCANNER_DATA if (scanner->page[scanner->page_offset] == 0) { /* Last record was read */ - buff->lsn= CONTROL_FILE_IMPOSSIBLE_LSN; + buff->lsn= LSN_IMPOSSIBLE; /* Return 'end of log' marker */ DBUG_RETURN(TRANSLOG_RECORD_HEADER_MAX_SIZE + 1); } @@ -5300,7 +5275,7 @@ translog_size_t translog_read_record(LSN lsn, if (data == NULL) { - DBUG_ASSERT(lsn != CONTROL_FILE_IMPOSSIBLE_LSN); + DBUG_ASSERT(lsn != LSN_IMPOSSIBLE); data= &internal_data; } if (lsn || @@ -5739,7 +5714,7 @@ int translog_assign_id_to_share(MARIA_SHARE *share, TRN *trn) strlen() */ log_array[TRANSLOG_INTERNAL_PARTS + 1].length= - strlen(share->open_file_name); + strlen(share->open_file_name) + 1; if (unlikely(translog_write_record(&lsn, LOGREC_FILE_ID, trn, share, sizeof(log_data) + log_array[TRANSLOG_INTERNAL_PARTS + @@ -5773,3 +5748,15 @@ void translog_deassign_id_from_share(MARIA_SHARE *share) my_atomic_storeptr((void **)&id_to_share[share->id], 0); my_atomic_rwlock_rdunlock(&LOCK_id_to_share); } + + +/** + @brief returns the LSN of the first record starting in this log + + @note so far works only for the very first log created on this system +*/ + +LSN first_lsn_in_log() +{ + return MAKE_LSN(1, TRANSLOG_PAGE_SIZE + log_descriptor.page_overhead); +} diff --git a/storage/maria/ma_loghandler.h b/storage/maria/ma_loghandler.h index 0a160a9bc53..22b8cca3a08 100644 --- a/storage/maria/ma_loghandler.h +++ b/storage/maria/ma_loghandler.h @@ -1,3 +1,8 @@ +// TODO copyright + +#ifndef _ma_loghandler_h +#define _ma_loghandler_h + /* transaction log default cache size (TODO: make it global variable) */ #define TRANSLOG_PAGECACHE_SIZE 1024*1024*2 /* transaction log default file size (TODO: make it global variable) */ @@ -20,6 +25,7 @@ #define TRANSLOG_PAGE_SIZE (8*1024) #include "ma_loghandler_lsn.h" +#include "trnman_public.h" /* short transaction ID type */ typedef uint16 SHORT_TRANSACTION_ID; @@ -41,6 +47,10 @@ struct st_maria_share; #define page_store(T,A) int5store(T,A) #define dirpos_store(T,A) ((*(uchar*) (T)) = A) #define pagerange_store(T,A) int2store(T,A) +#define fileid_korr(P) uint2korr(P) +#define page_korr(P) uint5korr(P) +#define dirpos_korr(P) (P[0]) +#define pagerange_korr(P) uint2korr(P) /* Length of disk drive sector size (we assume that writing it @@ -228,10 +238,99 @@ extern translog_size_t translog_read_next_record_header(TRANSLOG_SCANNER_DATA *scanner, TRANSLOG_HEADER_BUFFER *buff); +extern my_bool translog_lock(); +extern my_bool translog_unlock(); extern void translog_lock_assert_owner(); extern TRANSLOG_ADDRESS translog_get_horizon(); extern int translog_assign_id_to_share(struct st_maria_share *share, struct st_transaction *trn); extern void translog_deassign_id_from_share(struct st_maria_share *share); extern my_bool translog_inited; + +/* + all the rest added because of recovery; should we make + ma_loghandler_for_recovery.h ? +*/ +extern LSN first_lsn_in_log(); + +/* record parts descriptor */ +struct st_translog_parts +{ + /* full record length */ + translog_size_t record_length; + /* full record length with chunk headers */ + translog_size_t total_record_length; + /* current part index */ + uint current; + /* total number of elements in parts */ + uint elements; + /* array of parts (LEX_STRING) */ + LEX_STRING *parts; +}; + +typedef my_bool(*prewrite_rec_hook) (enum translog_record_type type, + TRN *trn, struct st_maria_share *share, + struct st_translog_parts *parts); + +typedef my_bool(*inwrite_rec_hook) (enum translog_record_type type, + TRN *trn, + LSN *lsn, + struct st_translog_parts *parts); + +typedef uint16(*read_rec_hook) (enum translog_record_type type, + uint16 read_length, uchar *read_buff, + byte *decoded_buff); + + +/* record classes */ +enum record_class +{ + LOGRECTYPE_NOT_ALLOWED, + LOGRECTYPE_VARIABLE_LENGTH, + LOGRECTYPE_PSEUDOFIXEDLENGTH, + LOGRECTYPE_FIXEDLENGTH +}; + +/* C++ can't bear that a variable's name is "class" */ +#ifndef __cplusplus +/* + Descriptor of log record type + Note: Don't reorder because of constructs later... +*/ +typedef struct st_log_record_type_descriptor +{ + /* internal class of the record */ + enum record_class class; + /* + length for fixed-size record, pseudo-fixed record + length with uncompressed LSNs + */ + uint16 fixed_length; + /* how much record body (belonged to headers too) read with headers */ + uint16 read_header_len; + /* HOOK for writing the record called before lock */ + prewrite_rec_hook prewrite_hook; + /* HOOK for writing the record called when LSN is known, inside lock */ + inwrite_rec_hook inwrite_hook; + /* HOOK for reading headers */ + read_rec_hook read_hook; + /* + For pseudo fixed records number of compressed LSNs followed by + system header + */ + int16 compressed_LSN; + /* the rest is for maria_read_log & Recovery */ + /** @brief for debug error messages or "maria_read_log" command-line tool */ + const char *name; + my_bool record_ends_group; + /* a function to execute when we see the record during the REDO phase */ + int (*record_execute_in_redo_phase)(const TRANSLOG_HEADER_BUFFER *); + /* a function to execute when we see the record during the UNDO phase */ + int (*record_execute_in_undo_phase)(const TRANSLOG_HEADER_BUFFER *); +} LOG_DESC; + +extern LOG_DESC log_record_type_descriptor[LOGREC_NUMBER_OF_TYPES]; +#endif + C_MODE_END +#endif diff --git a/storage/maria/ma_loghandler_lsn.h b/storage/maria/ma_loghandler_lsn.h index c641337e8ba..af7594e3b00 100644 --- a/storage/maria/ma_loghandler_lsn.h +++ b/storage/maria/ma_loghandler_lsn.h @@ -24,7 +24,7 @@ typedef TRANSLOG_ADDRESS LSN; #define LSN_FILE_NO(L) ((L) >> 32) /* Gets raw file number part of a LSN/log address */ -#define LSN_FINE_NO_PART(L) ((L) & ((int64)0xFFFFFF00000000LL)) +#define LSN_FILE_NO_PART(L) ((L) & ((int64)0xFFFFFF00000000LL)) /* Gets record offset of a LSN/log address */ #define LSN_OFFSET(L) ((L) & 0xFFFFFFFFL) @@ -33,7 +33,9 @@ typedef TRANSLOG_ADDRESS LSN; #define MAKE_LSN(F,S) ((((uint64)(F)) << 32) | (S)) /* checks LSN */ -#define LSN_VALID(L) DBUG_ASSERT((L) >= 0 && (L) < (uint64)0xFFFFFFFFFFFFFFLL) +#define LSN_VALID(L) \ + ((LSN_FILE_NO_PART(L) != FILENO_IMPOSSIBLE) && \ + (LSN_OFFSET(L) != LOG_OFFSET_IMPOSSIBLE)) /* size of stored LSN on a disk, don't change it! */ #define LSN_STORE_SIZE 7 @@ -51,7 +53,7 @@ typedef TRANSLOG_ADDRESS LSN; /* what we need to add to LSN to increase it on one file */ #define LSN_ONE_FILE ((int64)0x100000000LL) -#define LSN_REPLACE_OFFSET(L, S) (LSN_FINE_NO_PART(L) | (S)) +#define LSN_REPLACE_OFFSET(L, S) (LSN_FILE_NO_PART(L) | (S)) /* an 8-byte type whose most significant byte is used for "flags"; 7 @@ -61,4 +63,7 @@ typedef LSN LSN_WITH_FLAGS; #define LSN_WITH_FLAGS_TO_LSN(x) (x & ULL(0x00FFFFFFFFFFFFFF)) #define LSN_WITH_FLAGS_TO_FLAGS(x) (x & ULL(0xFF00000000000000)) +#define FILENO_IMPOSSIBLE 0 /**< log file's numbering starts at 1 */ +#define LOG_OFFSET_IMPOSSIBLE 0 /**< log always has a header */ +#define LSN_IMPOSSIBLE 0 #endif diff --git a/storage/maria/ma_pagecache.c b/storage/maria/ma_pagecache.c index ae42f702b0a..b1ebfbbe7c6 100755 --- a/storage/maria/ma_pagecache.c +++ b/storage/maria/ma_pagecache.c @@ -587,11 +587,7 @@ static uint pagecache_fwrite(PAGECACHE *pagecache, DBUG_PRINT("info", ("Log handler call")); /* TODO: integrate with page format */ lsn= lsn_korr(buffer + PAGE_LSN_OFFSET); - /* - check CONTROL_FILE_IMPOSSIBLE_FILENO & - CONTROL_FILE_IMPOSSIBLE_LOG_OFFSET - */ - DBUG_ASSERT(lsn != 0); + DBUG_ASSERT(LSN_VALID(lsn)); translog_flush(lsn); } DBUG_RETURN(my_pwrite(filedesc->file, buffer, pagecache->block_size, @@ -2474,7 +2470,7 @@ static void check_and_set_lsn(LSN lsn, PAGECACHE_BLOCK_LINK *block) lock lock change pin pin page first_REDO_LSN_for_page do not set it if it is zero - lsn if it is not CONTROL_FILE_IMPOSSIBLE_LSN (0) and it + lsn if it is not LSN_IMPOSSIBLE (0) and it is bigger then LSN on the page it will be written on the page @@ -2566,7 +2562,7 @@ void pagecache_unlock(PAGECACHE *pagecache, pagecache pointer to a page cache data structure file handler for the file for the block of data to be read pageno number of the block of data in the file - lsn if it is not CONTROL_FILE_IMPOSSIBLE_LSN (0) and it + lsn if it is not LSN_IMPOSSIBLE (0) and it is bigger then LSN on the page it will be written on the page */ @@ -2635,10 +2631,9 @@ void pagecache_unpin(PAGECACHE *pagecache, link direct link to page (returned by read or write) lock lock change pin pin page - first_REDO_LSN_for_page do not set it if it is zero - lsn if it is not CONTROL_FILE_IMPOSSIBLE_LSN (0) and it - is bigger then LSN on the page it will be written on - the page + first_REDO_LSN_for_page do not set it if it is LSN_IMPOSSIBLE (0) + lsn if it is not LSN_IMPOSSIBLE and it is bigger then + LSN on the page it will be written on the page */ void pagecache_unlock_by_link(PAGECACHE *pagecache, @@ -2681,7 +2676,7 @@ void pagecache_unlock_by_link(PAGECACHE *pagecache, DBUG_ASSERT(pagecache->can_be_used); inc_counter_for_resize_op(pagecache); - if (first_REDO_LSN_for_page) + if (first_REDO_LSN_for_page != LSN_IMPOSSIBLE) { /* LOCK_READ_UNLOCK is ok here as the page may have first locked @@ -2694,10 +2689,8 @@ void pagecache_unlock_by_link(PAGECACHE *pagecache, if (block->rec_lsn == 0) block->rec_lsn= first_REDO_LSN_for_page; } - if (lsn != 0) - { + if (lsn != LSN_IMPOSSIBLE) check_and_set_lsn(lsn, block); - } if (make_lock_and_pin(pagecache, block, lock, pin)) DBUG_ASSERT(0); /* should not happend */ @@ -2726,7 +2719,7 @@ void pagecache_unlock_by_link(PAGECACHE *pagecache, pagecache_unpin_by_link() pagecache pointer to a page cache data structure link direct link to page (returned by read or write) - lsn if it is not CONTROL_FILE_IMPOSSIBLE_LSN (0) and it + lsn if it is not LSN_IMPOSSIBLE (0) and it is bigger then LSN on the page it will be written on the page */ diff --git a/storage/maria/ma_recovery.h b/storage/maria/ma_recovery.h index d2901f5724c..42c5071babd 100644 --- a/storage/maria/ma_recovery.h +++ b/storage/maria/ma_recovery.h @@ -1,4 +1,4 @@ -/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB +/* Copyright (C) 2006,2007 MySQL AB This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by diff --git a/storage/maria/ma_test1.c b/storage/maria/ma_test1.c index 028e02ab9d1..a87e3445082 100644 --- a/storage/maria/ma_test1.c +++ b/storage/maria/ma_test1.c @@ -60,7 +60,7 @@ int main(int argc,char *argv[]) if (maria_init() || (init_pagecache(maria_pagecache, IO_SIZE*16, 0, 0, maria_block_size) == 0) || - ma_control_file_create_or_open() || + ma_control_file_create_or_open(TRUE) || (init_pagecache(maria_log_pagecache, TRANSLOG_PAGECACHE_SIZE, 0, 0, TRANSLOG_PAGE_SIZE) == 0) || diff --git a/storage/maria/ma_test2.c b/storage/maria/ma_test2.c index bbbb4fca1bf..1839efd0813 100644 --- a/storage/maria/ma_test2.c +++ b/storage/maria/ma_test2.c @@ -224,7 +224,7 @@ int main(int argc, char *argv[]) /* Maria requires that we always have a page cache */ if ((init_pagecache(maria_pagecache, pagecache_size, 0, 0, maria_block_size) == 0) || - ma_control_file_create_or_open() || + ma_control_file_create_or_open(TRUE) || (init_pagecache(maria_log_pagecache, TRANSLOG_PAGECACHE_SIZE, 0, 0, TRANSLOG_PAGE_SIZE) == 0) || diff --git a/storage/maria/maria_read_log.c b/storage/maria/maria_read_log.c new file mode 100644 index 00000000000..7bb15e27f7a --- /dev/null +++ b/storage/maria/maria_read_log.c @@ -0,0 +1,696 @@ +/* Copyright (C) 2007 MySQL AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +#include "maria_def.h" +#include + +#define PCACHE_SIZE (1024*1024*10) +#define LOG_FLAGS 0 +#define LOG_FILE_SIZE (1024L*1024L) + + +static PAGECACHE pagecache; + +static const char *load_default_groups[]= { "maria_read_log",0 }; +static void get_options(int *argc,char * * *argv); +#ifndef DBUG_OFF +static const char *default_dbug_option; +#endif +static my_bool opt_only_display, opt_display_and_apply; + +struct TRN_FOR_RECOVERY +{ + LSN group_start_lsn, undo_lsn; + TrID long_trid; +}; + +struct TRN_FOR_RECOVERY all_active_trans[SHORT_TRID_MAX + 1]; +MARIA_HA *all_tables[SHORT_TRID_MAX + 1]; + +static void end_of_redo_phase(); +static void display_record_position(const LOG_DESC *log_desc, + const TRANSLOG_HEADER_BUFFER *rec, + uint number); +static int display_and_apply_record(const LOG_DESC *log_desc, + const TRANSLOG_HEADER_BUFFER *rec); +#define prototype_exec_hook(R) \ +static int exec_LOGREC_ ## R(const TRANSLOG_HEADER_BUFFER *rec) +prototype_exec_hook(LONG_TRANSACTION_ID); +prototype_exec_hook(CHECKPOINT); +prototype_exec_hook(REDO_CREATE_TABLE); +prototype_exec_hook(FILE_ID); +prototype_exec_hook(REDO_INSERT_ROW_HEAD); +prototype_exec_hook(COMMIT); +/* + To implement REDO_DROP_TABLE and REDO_RENAME_TABLE, we would need to go + through the all_tables[] array, find all open instances of the + table-to-drop-or-rename, and remove them from the array. + We however know that in real Recovery, we don't have to handle those log + records at all, same for REDO_CREATE_TABLE. + So for now, we can use this program to replay/debug a sequence of CREATE + + DMLs, but not DROP/RENAME; it is probably enough for a start. +*/ + +int main(int argc, char **argv) +{ + LSN lsn; + char **default_argv; + MY_INIT(argv[0]); + + load_defaults("my", load_default_groups, &argc, &argv); + default_argv= argv; + get_options(&argc, &argv); + + maria_data_root= "."; + +#ifndef DBUG_OFF +#if defined(__WIN__) + default_dbug_option= "d:t:i:O,\\maria_read_log.trace"; +#else + default_dbug_option= "d:t:i:o,/tmp/maria_read_log.trace"; +#endif + if (argc > 1) + { + DBUG_SET(default_dbug_option); + DBUG_SET_INITIAL(default_dbug_option); + } +#endif + + if (maria_init()) + { + fprintf(stderr, "Can't init Maria engine (%d)\n", errno); + goto err; + } + /* we don't want to create a control file, it MUST exist */ + if (ma_control_file_create_or_open(FALSE)) + { + fprintf(stderr, "Can't open control file (%d)\n", errno); + goto err; + } + if (last_logno == FILENO_IMPOSSIBLE) + { + fprintf(stderr, "Can't find any log\n"); + goto err; + } + if (init_pagecache(&pagecache, PCACHE_SIZE, 0, 0, + TRANSLOG_PAGE_SIZE) == 0) + { + fprintf(stderr, "Got error in init_pagecache() (errno: %d)\n", errno); + goto err; + } + /* + If log handler does not find the "last_logno" log it will return error, + which is good. + But if it finds a log and this log was crashed, it will create a new log, + which is useless. TODO: start log handler in read-only mode. + */ + if (translog_init(".", LOG_FILE_SIZE, 50112, 0, &pagecache, + TRANSLOG_DEFAULT_FLAGS)) + { + fprintf(stderr, "Can't init loghandler (%d)\n", errno); + goto err; + } + + /* install hooks for execution */ +#define install_exec_hook(R) \ + log_record_type_descriptor[LOGREC_ ## R].record_execute_in_redo_phase= \ + exec_LOGREC_ ## R; + install_exec_hook(LONG_TRANSACTION_ID); + install_exec_hook(CHECKPOINT); + install_exec_hook(REDO_CREATE_TABLE); + install_exec_hook(FILE_ID); + install_exec_hook(REDO_INSERT_ROW_HEAD); + install_exec_hook(COMMIT); + + if (opt_only_display) + printf("You are using --only-display, NOTHING will be written to disk\n"); + + lsn= first_lsn_in_log(); /*could also be last_checkpoint_lsn */ + + TRANSLOG_HEADER_BUFFER rec; + struct st_translog_scanner_data scanner; + uint i= 1; + + translog_size_t len= translog_read_record_header(lsn, &rec); + + if (len == (TRANSLOG_RECORD_HEADER_MAX_SIZE + 1)) + { + printf("EOF on the log\n"); + goto end; + } + + if (translog_init_scanner(lsn, 1, &scanner)) + { + fprintf(stderr, "Scanner init failed\n"); + goto err; + } + for (;;i++) + { + uint16 sid= rec.short_trid; + const LOG_DESC *log_desc= &log_record_type_descriptor[rec.type]; + display_record_position(log_desc, &rec, i); + + /* + A complete group is a set of log records with an "end mark" record + (e.g. a set of REDOs for an operation, terminated by an UNDO for this + operation); if there is no "end mark" record the group is incomplete + and won't be executed. + */ + if (log_desc->record_ends_group) + { + if (all_active_trans[sid].group_start_lsn != LSN_IMPOSSIBLE) + { + /* + There is a complete group for this transaction, containing more than + this event. + */ + printf(" ends a group:\n"); + struct st_translog_scanner_data scanner2; + TRANSLOG_HEADER_BUFFER rec2; + len= + translog_read_record_header(all_active_trans[sid].group_start_lsn, &rec2); + if (len == (TRANSLOG_RECORD_HEADER_MAX_SIZE + 1)) + { + fprintf(stderr, "Cannot find record where it should be\n"); + goto err; + } + if (translog_init_scanner(rec2.lsn, 1, &scanner2)) + { + fprintf(stderr, "Scanner2 init failed\n"); + goto err; + } + do + { + if (rec2.short_trid == sid) /* it's in our group */ + { + const LOG_DESC *log_desc2= &log_record_type_descriptor[rec2.type]; + display_record_position(log_desc2, &rec2, 0); + if (display_and_apply_record(log_desc2, &rec2)) + goto err; + } + len= translog_read_next_record_header(&scanner2, &rec2); + if (len == (TRANSLOG_RECORD_HEADER_MAX_SIZE + 1)) + { + fprintf(stderr, "Cannot find record where it should be\n"); + goto err; + } + } + while (rec2.lsn < rec.lsn); + translog_free_record_header(&rec2); + /* group finished */ + all_active_trans[sid].group_start_lsn= LSN_IMPOSSIBLE; + } + if (display_and_apply_record(log_desc, &rec)) + goto err; + } + else /* record does not end group */ + { + /* just record the fact, can't know if can execute yet */ + if (all_active_trans[sid].group_start_lsn == LSN_IMPOSSIBLE) + { + /* group not yet started */ + all_active_trans[sid].group_start_lsn= rec.lsn; + } + } + len= translog_read_next_record_header(&scanner, &rec); + if (len == (TRANSLOG_RECORD_HEADER_MAX_SIZE + 1)) + { + printf("EOF on the log\n"); + goto end; + } + } + translog_free_record_header(&rec); + + /* + So we have applied all REDOs. + We may now have unfinished transactions. + I don't think it's this program's job to roll them back: + to roll back and at the same time stay idempotent, it needs to write log + records (without CLRs, 2nd rollback would hit the effects of first + rollback and fail). But this standalone tool is not allowed to write to + the server's transaction log. So we do not roll back anything. + In the real Recovery code, or the code to do "recover after online + backup", yes we will roll back. + */ + end_of_redo_phase(); + goto end; +err: + /* don't touch anything more, in case we hit a bug */ + exit(1); +end: + maria_end(); + free_defaults(default_argv); + my_end(0); + exit(0); + return 0; /* No compiler warning */ +} + + +static struct my_option my_long_options[] = +{ + {"only-display", 'o', "display brief info about records's header", + (gptr*) &opt_only_display, (gptr*) &opt_only_display, 0, GET_BOOL, NO_ARG, + 0, 0, 0, 0, 0, 0}, + {"display-and-apply", 'a', + "like --only-display but displays more info and modifies tables", + (gptr*) &opt_display_and_apply, (gptr*) &opt_display_and_apply, 0, + GET_BOOL, NO_ARG, 0, 0, 0, 0, 0, 0}, +#ifndef DBUG_OFF + {"debug", '#', "Output debug log. Often this is 'd:t:o,filename'.", + 0, 0, 0, GET_STR, OPT_ARG, 0, 0, 0, 0, 0, 0}, +#endif + { 0, 0, 0, 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0} +}; + +#include + +static void print_version(void) +{ + VOID(printf("%s Ver 1.0 for %s on %s\n", + my_progname, SYSTEM_TYPE, MACHINE_TYPE)); + NETWARE_SET_SCREEN_MODE(1); +} + + +static void usage(void) +{ + print_version(); + puts("Copyright (C) 2007 MySQL AB"); + puts("This software comes with ABSOLUTELY NO WARRANTY. This is free software,"); + puts("and you are welcome to modify and redistribute it under the GPL license\n"); + + puts("Display and apply log records from a MARIA transaction log"); + puts("found in the current directory (for now)"); + VOID(printf("\nUsage: %s OPTIONS\n", my_progname)); + puts("You need to use one of -o or -a"); + my_print_help(my_long_options); + print_defaults("my", load_default_groups); + my_print_variables(my_long_options); +} + +#include + +static my_bool +get_one_option(int optid __attribute__((unused)), + const struct my_option *opt __attribute__((unused)), + char *argument __attribute__((unused))) +{ + /* for now there is nothing special with our options */ + return 0; +} + +static void get_options(int *argc,char ***argv) +{ + int ho_error; + + my_progname= argv[0][0]; + + if ((ho_error=handle_options(argc, argv, my_long_options, get_one_option))) + exit(ho_error); + + if ((opt_only_display + opt_display_and_apply) != 1) + { + usage(); + exit(1); + } +} + + +/* very basic info about the record's header */ +static void display_record_position(const LOG_DESC *log_desc, + const TRANSLOG_HEADER_BUFFER *rec, + uint number) +{ + /* + if number==0, we're going over records which we had already seen and which + form a group, so we indent below the group's end record + */ + printf("%sRecord #%u LSN (%lu,0x%lx) short_trid %u %s(num_type:%u)\n", + number ? "" : " ", number, + (ulong) LSN_FILE_NO(rec->lsn), (ulong) LSN_OFFSET(rec->lsn), + rec->short_trid, log_desc->name, rec->type); +} + + +static int display_and_apply_record(const LOG_DESC *log_desc, + const TRANSLOG_HEADER_BUFFER *rec) +{ + int error; + if (opt_only_display) + return 0; + if (log_desc->record_execute_in_redo_phase == NULL) + { + /* die on all not-yet-handled records :) */ + DBUG_ASSERT("one more hook" == "to write"); + } + if ((error= (*log_desc->record_execute_in_redo_phase)(rec))) + fprintf(stderr, "Got error when executing record\n"); + return error; +} + + +prototype_exec_hook(LONG_TRANSACTION_ID) +{ + uint16 sid= rec->short_trid; + TrID long_trid= all_active_trans[sid].long_trid; + /* abort group of this trn (must be of before a crash) */ + LSN gslsn= all_active_trans[sid].group_start_lsn; + if (gslsn != LSN_IMPOSSIBLE) + { + printf("Group at LSN (%lu,0x%lx) short_trid %u aborted\n", + (ulong) LSN_FILE_NO(gslsn), (ulong) LSN_OFFSET(gslsn), sid); + all_active_trans[sid].group_start_lsn= LSN_IMPOSSIBLE; + } + if (long_trid != 0) + { + LSN ulsn= all_active_trans[sid].undo_lsn; + if (ulsn != LSN_IMPOSSIBLE) + { + fprintf(stderr, "Found an old transaction long_trid %llu short_trid %u" + " with same short id as this new transaction, and has neither" + " committed nor rollback (undo_lsn: (%lu,0x%lx))\n", long_trid, + sid, (ulong) LSN_FILE_NO(ulsn), (ulong) LSN_OFFSET(ulsn)); + goto err; + } + } + long_trid= uint6korr(rec->header); + all_active_trans[sid].long_trid= long_trid; + printf("Transaction long_trid %lu short_trid %u starts\n", long_trid, sid); + goto end; +err: + DBUG_ASSERT(0); + return 1; +end: + return 0; +} + +prototype_exec_hook(CHECKPOINT) +{ + /* the only checkpoint we care about was found via control file, ignore */ + return 0; +} + + +prototype_exec_hook(REDO_CREATE_TABLE) +{ + File dfile= -1, kfile= -1; + char *linkname_ptr, filename[FN_REFLEN]; + char *name, *ptr; + myf create_flag; + uint flags; + int error, create_mode= O_RDWR | O_TRUNC; + MARIA_HA *info= NULL; + if (((name= my_malloc(rec->record_length, MYF(MY_WME))) == NULL) || + (translog_read_record(rec->lsn, 0, rec->record_length, name, NULL) != + rec->record_length)) + { + fprintf(stderr, "Failed to read record\n"); + goto err; + } + printf("Table '%s'", name); + /* we try hard to get create_rename_lsn, to avoid mistakes if possible */ + info= maria_open(name, O_RDONLY, HA_OPEN_FOR_REPAIR); + if (info) + { + if (!info->s->base.transactional) + { + /* + could be that transactional table was later dropped, and a non-trans + one was renamed to its name, thus create_rename_lsn is 0 and should + not be trusted. + */ + printf(", is not transactional\n"); + DBUG_ASSERT(0); /* I want to know this */ + goto end; + } + if (cmp_translog_addr(info->s->state.create_rename_lsn, rec->lsn) >= 0) + { + printf(", has create_rename_lsn (%lu,0x%lx) is more recent than log" + " record\n", + (ulong) LSN_FILE_NO(rec->lsn), + (ulong) LSN_OFFSET(rec->lsn)); + goto end; + } + if (maria_is_crashed(info)) + { + printf(", is crashed, overwriting it"); + DBUG_ASSERT(0); /* I want to know this */ + } + maria_close(info); + info= NULL; + } + /* if does not exist, is older, or its header is corrupted, overwrite it */ + // TODO symlinks + ptr= name + strlen(name) + 1; + if ((flags= ptr[0] ? HA_DONT_TOUCH_DATA : 0)) + printf(", we will only touch index file"); + fn_format(filename, name, "", MARIA_NAME_IEXT, + (MY_UNPACK_FILENAME | + (flags & HA_DONT_TOUCH_DATA) ? MY_RETURN_REAL_PATH : 0) | + MY_APPEND_EXT); + linkname_ptr= NULL; + create_flag= MY_DELETE_OLD; + printf(", creating as '%s'", filename); + if ((kfile= my_create_with_symlink(linkname_ptr, filename, 0, create_mode, + MYF(MY_WME|create_flag))) < 0) + { + fprintf(stderr, "Failed to create index file\n"); + goto err; + } + ptr++; + uint kfile_size_before_extension= uint2korr(ptr); + ptr+= 2; + uint keystart= uint2korr(ptr); + ptr+= 2; + /* set create_rename_lsn (for maria_read_log to be idempotent) */ + lsn_store(ptr + sizeof(info->s->state.header) + 2, rec->lsn); + if (my_pwrite(kfile, ptr, + kfile_size_before_extension, 0, MYF(MY_NABP|MY_WME)) || + my_chsize(kfile, keystart, 0, MYF(MY_WME))) + { + fprintf(stderr, "Failed to write to index file\n"); + goto err; + } + if (!(flags & HA_DONT_TOUCH_DATA)) + { + fn_format(filename,name,"", MARIA_NAME_DEXT, + MY_UNPACK_FILENAME | MY_APPEND_EXT); + linkname_ptr= NULL; + create_flag=MY_DELETE_OLD; + if ((dfile= + my_create_with_symlink(linkname_ptr, filename, 0, create_mode, + MYF(MY_WME | create_flag))) < 0) + { + fprintf(stderr, "Failed to create data file\n"); + goto err; + } + /* + we now have an empty data file. To be able to + _ma_initialize_data_file() we need some pieces of the share to be + correctly filled. So we just open the table (fortunately, an empty + data file does not preclude this). + */ + if (((info= maria_open(name, O_RDONLY, 0)) == NULL) || + _ma_initialize_data_file(dfile, info->s)) + { + fprintf(stderr, "Failed to open new table or write to data file\n"); + goto err; + } + } + error= 0; + goto end; +err: + DBUG_ASSERT(0); + error= 1; +end: + printf("\n"); + if (kfile >= 0) + error|= my_close(kfile, MYF(MY_WME)); + if (dfile >= 0) + error|= my_close(dfile, MYF(MY_WME)); + if (info != NULL) + error|= maria_close(info); + my_free(name, MYF(MY_ALLOW_ZERO_PTR)); + return 0; +} + + +prototype_exec_hook(FILE_ID) +{ + uint16 sid; + int error; + char *name, *buff; + MARIA_HA *info= NULL; + if (((buff= my_malloc(rec->record_length, MYF(MY_WME))) == NULL) || + (translog_read_record(rec->lsn, 0, rec->record_length, buff, NULL) != + rec->record_length)) + { + fprintf(stderr, "Failed to read record\n"); + goto err; + } + sid= fileid_korr(buff); + name= buff + FILEID_STORE_SIZE; + printf("Table '%s', id %u", name, sid); + info= all_tables[sid]; + if (info != NULL) + { + printf(", closing table '%s'", info->s->open_file_name); + all_tables[sid]= NULL; + info->s->base.transactional= TRUE; /* put back the truth */ + if (maria_close(info)) + { + fprintf(stderr, "Failed to close table\n"); + goto err; + } + } + info= maria_open(name, O_RDWR, HA_OPEN_FOR_REPAIR); + if (info == NULL) + { + printf(", is absent (must have been dropped later?)" + " or its header is so corrupted that we cannot open it;" + " we skip it\n"); + goto end; + } + if (maria_is_crashed(info)) + { + fprintf(stderr, "Table is crashed, can't apply log records to it\n"); + goto err; + } + if (!info->s->base.transactional) + { + printf(", is not transactional\n"); + DBUG_ASSERT(0); /* I want to know this */ + goto end; + } + all_tables[sid]= info; + /* + don't log any records for this work. TODO make sure this variable does not + go to disk before we restore it to its true value. + */ + info->s->base.transactional= FALSE; + printf(", opened\n"); + error= 0; + goto end; +err: + DBUG_ASSERT(0); + error= 1; + if (info != NULL) + error|= maria_close(info); +end: + my_free(buff, MYF(MY_ALLOW_ZERO_PTR)); + return 0; +} + + +prototype_exec_hook(REDO_INSERT_ROW_HEAD) +{ + uint16 sid; + ulonglong page; + MARIA_HA *info; + sid= fileid_korr(rec->header); + page= page_korr(rec->header + FILEID_STORE_SIZE); + printf("For page %llu of table of short id %u", page, sid); + info= all_tables[sid]; + if (info == NULL) + { + printf(", table skipped, so skipping record\n"); + goto end; + } + printf(", '%s'", info->s->open_file_name); + if (cmp_translog_addr(info->s->state.create_rename_lsn, rec->lsn) >= 0) + { + printf(", has create_rename_lsn (%lu,0x%lx) is more recent than log" + " record\n", + (ulong) LSN_FILE_NO(rec->lsn), (ulong) LSN_OFFSET(rec->lsn)); + goto end; + } + /* + Soon we will also skip the page depending on the rec_lsn for this page in + the checkpoint record, but this is not absolutely needed for now (just + assume we have made no checkpoint). + */ + printf(", applying record\n"); + DBUG_ASSERT("Monty" == "this is the place"); +end: + /* as we don't have apply working: */ + return 1; +} + + +prototype_exec_hook(COMMIT) +{ + uint16 sid= rec->short_trid; + TrID long_trid= all_active_trans[sid].long_trid; + LSN gslsn= all_active_trans[sid].group_start_lsn; + + if (long_trid == 0) + { + printf("We don't know about transaction short_trid %u;" + "it probably committed long ago, forget it\n", sid); + return 0; + } + printf("Transaction long_trid %lu short_trid %u committed", long_trid, sid); + if (gslsn != LSN_IMPOSSIBLE) + { + /* + It's not an error, it may be that trn got a disk error when writing to a + table, so an unfinished group staid in the log. + */ + printf(", with group at LSN (%lu,0x%lx) short_trid %u aborted\n", + (ulong) LSN_FILE_NO(gslsn), (ulong) LSN_OFFSET(gslsn), sid); + all_active_trans[sid].group_start_lsn= LSN_IMPOSSIBLE; + } + else + printf("\n"); + all_active_trans[sid].long_trid= 0; +#ifdef MARIA_VERSIONING + /* + if real recovery: + transaction was committed, move it to some separate list for later + purging (but don't purge now! purging may have been started before, we + may find REDO_PURGE records soon). + */ +#endif + return 0; +} + + +/* Just to inform about any aborted groups or unfinished transactions */ +static void end_of_redo_phase() +{ + uint sid; + for (sid= 0; sid <= SHORT_TRID_MAX; sid++) + { + TrID long_trid= all_active_trans[sid].long_trid; + LSN gslsn= all_active_trans[sid].group_start_lsn; + if (long_trid == 0) + continue; + if (all_active_trans[sid].undo_lsn != LSN_IMPOSSIBLE) + printf("Transaction long_trid %lu short_trid %u unfinished\n", + long_trid, sid); + if (gslsn != LSN_IMPOSSIBLE) + { + printf("Group at LSN (%lu,0x%lx) short_trid %u aborted\n", + (ulong) LSN_FILE_NO(gslsn), (ulong) LSN_OFFSET(gslsn), sid); + } + /* If real recovery: roll back unfinished transaction */ +#ifdef MARIA_VERSIONING + /* + If real recovery: transaction was committed, move it to some separate + list for soon purging. + */ +#endif + } +} diff --git a/storage/maria/trnman_public.h b/storage/maria/trnman_public.h index 3e0a21c26a6..e1891466c4d 100644 --- a/storage/maria/trnman_public.h +++ b/storage/maria/trnman_public.h @@ -20,6 +20,9 @@ to include my_atomic.h in C++ code. */ +#ifndef _trnman_public_h +#define _trnman_public_h + #include "ma_loghandler_lsn.h" C_MODE_START @@ -52,3 +55,4 @@ my_bool trnman_has_locked_tables(TRN *trn); void trnman_reset_locked_tables(TRN *trn); C_MODE_END +#endif diff --git a/storage/maria/unittest/ma_control_file-t.c b/storage/maria/unittest/ma_control_file-t.c index 71a1157f1ba..a7472361dad 100644 --- a/storage/maria/unittest/ma_control_file-t.c +++ b/storage/maria/unittest/ma_control_file-t.c @@ -121,8 +121,8 @@ static int delete_file(myf my_flags) The error will however be printed on stderr. */ my_delete(file_name, my_flags); - expect_checkpoint_lsn= CONTROL_FILE_IMPOSSIBLE_LSN; - expect_logno= CONTROL_FILE_IMPOSSIBLE_FILENO; + expect_checkpoint_lsn= LSN_IMPOSSIBLE; + expect_logno= FILENO_IMPOSSIBLE; return 0; } @@ -146,9 +146,9 @@ static int verify_module_values_match_expected() */ static int verify_module_values_are_impossible() { - RET_ERR_UNLESS(last_logno == CONTROL_FILE_IMPOSSIBLE_FILENO); + RET_ERR_UNLESS(last_logno == FILENO_IMPOSSIBLE); RET_ERR_UNLESS(last_checkpoint_lsn == - CONTROL_FILE_IMPOSSIBLE_LSN); + LSN_IMPOSSIBLE); return 0; } @@ -164,7 +164,7 @@ static int close_file() static int create_or_open_file() { - RET_ERR_UNLESS(ma_control_file_create_or_open() == CONTROL_FILE_OK); + RET_ERR_UNLESS(ma_control_file_create_or_open(TRUE) == CONTROL_FILE_OK); /* Check that the module reports expected information */ RET_ERR_UNLESS(verify_module_values_match_expected() == 0); return 0; @@ -188,7 +188,7 @@ static int test_one_log() RET_ERR_UNLESS(create_or_open_file() == CONTROL_FILE_OK); objs_to_write= CONTROL_FILE_UPDATE_ONLY_LOGNO; expect_logno= 123; - RET_ERR_UNLESS(write_file(CONTROL_FILE_IMPOSSIBLE_LSN, + RET_ERR_UNLESS(write_file(LSN_IMPOSSIBLE, expect_logno, objs_to_write) == 0); RET_ERR_UNLESS(close_file() == 0); @@ -206,7 +206,7 @@ static int test_five_logs() for (i= 0; i<5; i++) { expect_logno*= 3; - RET_ERR_UNLESS(write_file(CONTROL_FILE_IMPOSSIBLE_LSN, expect_logno, + RET_ERR_UNLESS(write_file(LSN_IMPOSSIBLE, expect_logno, objs_to_write) == 0); } RET_ERR_UNLESS(close_file() == 0); @@ -320,7 +320,7 @@ static int test_bad_magic_string() RET_ERR_UNLESS(my_pwrite(fd, "papa", 4, 0, MYF(MY_FNABP | MY_WME)) == 0); /* Check that control file module sees the problem */ - RET_ERR_UNLESS(ma_control_file_create_or_open() == + RET_ERR_UNLESS(ma_control_file_create_or_open(TRUE) == CONTROL_FILE_BAD_MAGIC_STRING); /* Restore magic string */ RET_ERR_UNLESS(my_pwrite(fd, buffer, 4, 0, MYF(MY_FNABP | MY_WME)) == 0); @@ -346,7 +346,7 @@ static int test_bad_checksum() buffer[0]+= 3; /* mangle checksum */ RET_ERR_UNLESS(my_pwrite(fd, buffer, 1, 8, MYF(MY_FNABP | MY_WME)) == 0); /* Check that control file module sees the problem */ - RET_ERR_UNLESS(ma_control_file_create_or_open() == + RET_ERR_UNLESS(ma_control_file_create_or_open(TRUE) == CONTROL_FILE_BAD_CHECKSUM); /* Restore checksum */ buffer[0]-= 3; @@ -369,10 +369,11 @@ static int test_bad_size() MYF(MY_WME))) >= 0); RET_ERR_UNLESS(my_write(fd, buffer, 10, MYF(MY_FNABP | MY_WME)) == 0); /* Check that control file module sees the problem */ - RET_ERR_UNLESS(ma_control_file_create_or_open() == CONTROL_FILE_TOO_SMALL); + RET_ERR_UNLESS(ma_control_file_create_or_open(TRUE) == + CONTROL_FILE_TOO_SMALL); RET_ERR_UNLESS(my_write(fd, buffer, 30, MYF(MY_FNABP | MY_WME)) == 0); /* Check that control file module sees the problem */ - RET_ERR_UNLESS(ma_control_file_create_or_open() == CONTROL_FILE_TOO_BIG); + RET_ERR_UNLESS(ma_control_file_create_or_open(TRUE) == CONTROL_FILE_TOO_BIG); RET_ERR_UNLESS(my_close(fd, MYF(MY_WME)) == 0); /* Leave a correct control file */ diff --git a/storage/maria/unittest/ma_test_loghandler-t.c b/storage/maria/unittest/ma_test_loghandler-t.c index e31136d52ec..19e6704dc5a 100644 --- a/storage/maria/unittest/ma_test_loghandler-t.c +++ b/storage/maria/unittest/ma_test_loghandler-t.c @@ -164,7 +164,7 @@ int main(int argc __attribute__((unused)), char *argv[]) } #endif - if (ma_control_file_create_or_open()) + if (ma_control_file_create_or_open(TRUE)) { fprintf(stderr, "Can't init control file (%d)\n", errno); exit(1); @@ -336,7 +336,7 @@ int main(int argc __attribute__((unused)), char *argv[]) ma_control_file_end(); - if (ma_control_file_create_or_open()) + if (ma_control_file_create_or_open(TRUE)) { fprintf(stderr, "pass2: Can't init control file (%d)\n", errno); exit(1); @@ -398,7 +398,7 @@ int main(int argc __attribute__((unused)), char *argv[]) i, errno); goto err; } - if (rec.lsn == CONTROL_FILE_IMPOSSIBLE_LSN) + if (rec.lsn == LSN_IMPOSSIBLE) { if (i != ITERATIONS) { @@ -477,7 +477,7 @@ int main(int argc __attribute__((unused)), char *argv[]) "failed (%d)\n", i, errno); goto err; } - if (rec.lsn == CONTROL_FILE_IMPOSSIBLE_LSN) + if (rec.lsn == LSN_IMPOSSIBLE) { fprintf(stderr, "EOL met at the middle of iteration (first var) %u " "instead of beginning of %u\n", i, ITERATIONS); @@ -572,7 +572,7 @@ int main(int argc __attribute__((unused)), char *argv[]) i, errno); goto err; } - if (rec.lsn == CONTROL_FILE_IMPOSSIBLE_LSN) + if (rec.lsn == LSN_IMPOSSIBLE) { fprintf(stderr, "EOL met at the middle of iteration %u " "instead of beginning of %u\n", i, ITERATIONS); diff --git a/storage/maria/unittest/ma_test_loghandler_multigroup-t.c b/storage/maria/unittest/ma_test_loghandler_multigroup-t.c index 1281ee425d8..5fe24be597d 100644 --- a/storage/maria/unittest/ma_test_loghandler_multigroup-t.c +++ b/storage/maria/unittest/ma_test_loghandler_multigroup-t.c @@ -161,7 +161,7 @@ int main(int argc __attribute__((unused)), char *argv[]) } #endif - if (ma_control_file_create_or_open()) + if (ma_control_file_create_or_open(TRUE)) { fprintf(stderr, "Can't init control file (%d)\n", errno); exit(1); @@ -325,7 +325,7 @@ int main(int argc __attribute__((unused)), char *argv[]) end_pagecache(&pagecache, 1); ma_control_file_end(); - if (ma_control_file_create_or_open()) + if (ma_control_file_create_or_open(TRUE)) { fprintf(stderr, "pass2: Can't init control file (%d)\n", errno); exit(1); @@ -390,7 +390,7 @@ int main(int argc __attribute__((unused)), char *argv[]) translog_free_record_header(&rec); goto err; } - if (rec.lsn == CONTROL_FILE_IMPOSSIBLE_LSN) + if (rec.lsn == LSN_IMPOSSIBLE) { if (i != ITERATIONS) { @@ -470,7 +470,7 @@ int main(int argc __attribute__((unused)), char *argv[]) "failed (%d)\n", i, errno); goto err; } - if (rec.lsn == CONTROL_FILE_IMPOSSIBLE_LSN) + if (rec.lsn == LSN_IMPOSSIBLE) { fprintf(stderr, "EOL met at the middle of iteration (first var) %u " "instead of beginning of %u\n", i, ITERATIONS); @@ -568,7 +568,7 @@ int main(int argc __attribute__((unused)), char *argv[]) translog_free_record_header(&rec); goto err; } - if (rec.lsn == CONTROL_FILE_IMPOSSIBLE_LSN) + if (rec.lsn == LSN_IMPOSSIBLE) { fprintf(stderr, "EOL met at the middle of iteration %u " "instead of beginning of %u\n", i, ITERATIONS); diff --git a/storage/maria/unittest/ma_test_loghandler_multithread-t.c b/storage/maria/unittest/ma_test_loghandler_multithread-t.c index ff966160acc..ba5d217a45a 100644 --- a/storage/maria/unittest/ma_test_loghandler_multithread-t.c +++ b/storage/maria/unittest/ma_test_loghandler_multithread-t.c @@ -270,7 +270,7 @@ int main(int argc __attribute__((unused)), my_thread_global_init(); - if (ma_control_file_create_or_open()) + if (ma_control_file_create_or_open(TRUE)) { fprintf(stderr, "Can't init control file (%d)\n", errno); exit(1); @@ -384,7 +384,7 @@ int main(int argc __attribute__((unused)), translog_free_record_header(&rec); goto err; } - if (rec.lsn == CONTROL_FILE_IMPOSSIBLE_LSN) + if (rec.lsn == LSN_IMPOSSIBLE) { if (i != WRITERS * ITERATIONS * 2) { diff --git a/storage/maria/unittest/ma_test_loghandler_pagecache-t.c b/storage/maria/unittest/ma_test_loghandler_pagecache-t.c index 35e05f9c997..4ac500ce8b2 100644 --- a/storage/maria/unittest/ma_test_loghandler_pagecache-t.c +++ b/storage/maria/unittest/ma_test_loghandler_pagecache-t.c @@ -56,7 +56,7 @@ int main(int argc __attribute__((unused)), char *argv[]) } #endif - if (ma_control_file_create_or_open()) + if (ma_control_file_create_or_open(TRUE)) { fprintf(stderr, "Can't init control file (%d)\n", errno); exit(1); diff --git a/storage/myisam/mi_close.c b/storage/myisam/mi_close.c index 47b7ba855c0..270a5dff056 100644 --- a/storage/myisam/mi_close.c +++ b/storage/myisam/mi_close.c @@ -75,6 +75,7 @@ int mi_close(register MI_INFO *info) not change the crashed state. We can NOT write the state in other cases as other threads may be using the file at this point + IF using --external-locking. */ if (share->mode != O_RDONLY && mi_is_crashed(info)) mi_state_info_write(share->kfile, &share->state, 1); -- cgit v1.2.1 From 61a3af38583e0cfc0631e233655fad00d4519269 Mon Sep 17 00:00:00 2001 From: unknown Date: Tue, 26 Jun 2007 18:29:17 +0200 Subject: WL#3072 - Maria recovery: safety assertions. storage/maria/maria_read_log.c: assertions to protect against future bugs (especially, to ensure that replaying DROP TABLE, if implemented, wouldn't leave open tables behind it) --- storage/maria/maria_read_log.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/storage/maria/maria_read_log.c b/storage/maria/maria_read_log.c index 7bb15e27f7a..e654b8ea2ac 100644 --- a/storage/maria/maria_read_log.c +++ b/storage/maria/maria_read_log.c @@ -424,6 +424,7 @@ prototype_exec_hook(REDO_CREATE_TABLE) info= maria_open(name, O_RDONLY, HA_OPEN_FOR_REPAIR); if (info) { + DBUG_ASSERT(info->s->reopen == 1); /* check that we're not using it */ if (!info->s->base.transactional) { /* @@ -437,8 +438,7 @@ prototype_exec_hook(REDO_CREATE_TABLE) } if (cmp_translog_addr(info->s->state.create_rename_lsn, rec->lsn) >= 0) { - printf(", has create_rename_lsn (%lu,0x%lx) is more recent than log" - " record\n", + printf(", has create_rename_lsn (%lu,0x%lx) is more recent than record", (ulong) LSN_FILE_NO(rec->lsn), (ulong) LSN_OFFSET(rec->lsn)); goto end; @@ -568,6 +568,7 @@ prototype_exec_hook(FILE_ID) fprintf(stderr, "Table is crashed, can't apply log records to it\n"); goto err; } + DBUG_ASSERT(info->s->reopen == 1); /* should always be only one instance */ if (!info->s->base.transactional) { printf(", is not transactional\n"); -- cgit v1.2.1 From f99bae82033beb1cce7afa1dff740b9e938fd59c Mon Sep 17 00:00:00 2001 From: unknown Date: Tue, 26 Jun 2007 22:30:09 +0200 Subject: WL#3072 - Maria recovery fixes for build failures; copyrights; small bugfixes and comments mysys/Makefile.am: missing .h breaks building from tarball storage/maria/ma_loghandler.c: applying Serg's bugfix of trnman_new_trid() to translog_assign_id_to_share() storage/maria/ma_loghandler.h: copyright storage/maria/ma_loghandler_lsn.h: copyright storage/maria/maria_read_log.c: fix for compiler warnings. Comments. Close tables when program ends. --- mysys/Makefile.am | 2 +- storage/maria/ma_loghandler.c | 35 +++++++++++---------- storage/maria/ma_loghandler.h | 18 ++++++++++- storage/maria/ma_loghandler_lsn.h | 15 +++++++++ storage/maria/maria_read_log.c | 66 ++++++++++++++++++++++++++++++++------- 5 files changed, 106 insertions(+), 30 deletions(-) diff --git a/mysys/Makefile.am b/mysys/Makefile.am index cd84e09a60e..60aa59fd3eb 100644 --- a/mysys/Makefile.am +++ b/mysys/Makefile.am @@ -20,7 +20,7 @@ INCLUDES = @ZLIB_INCLUDES@ -I$(top_builddir)/include \ -I$(top_srcdir)/include -I$(srcdir) pkglib_LIBRARIES = libmysys.a LDADD = libmysys.a $(top_builddir)/strings/libmystrings.a $(top_builddir)/dbug/libdbug.a -noinst_HEADERS = mysys_priv.h my_static.h +noinst_HEADERS = mysys_priv.h my_static.h my_safehash.h libmysys_a_SOURCES = my_init.c my_getwd.c mf_getdate.c my_mmap.c \ mf_path.c mf_loadpath.c my_file.c \ my_open.c my_create.c my_dup.c my_seek.c my_read.c \ diff --git a/storage/maria/ma_loghandler.c b/storage/maria/ma_loghandler.c index 6f238ef4055..79bf44046b1 100644 --- a/storage/maria/ma_loghandler.c +++ b/storage/maria/ma_loghandler.c @@ -177,7 +177,6 @@ my_bool translog_inited= 0; #include /* an array that maps id of a MARIA_SHARE to this MARIA_SHARE */ static MARIA_SHARE **id_to_share= NULL; -#define SHARE_ID_MAX 65535 /* array's size */ /* lock for id_to_share */ static my_atomic_rwlock_t LOCK_id_to_share; @@ -2282,8 +2281,8 @@ my_bool translog_init(const char *directory, structures for generating 2-byte ids: */ my_atomic_rwlock_init(&LOCK_id_to_share); - id_to_share= (MARIA_SHARE **) my_malloc(SHARE_ID_MAX*sizeof(MARIA_SHARE*), - MYF(MY_WME|MY_ZEROFILL)); + id_to_share= (MARIA_SHARE **) my_malloc(SHARE_ID_MAX * sizeof(MARIA_SHARE*), + MYF(MY_WME | MY_ZEROFILL)); if (unlikely(!id_to_share)) DBUG_RETURN(1); id_to_share--; /* min id is 1 */ @@ -5682,21 +5681,23 @@ int translog_assign_id_to_share(MARIA_SHARE *share, TRN *trn) if (likely(share->id == 0)) { /* Inspired by set_short_trid() of trnman.c */ - int i= share->kfile.file % SHARE_ID_MAX + 1; - my_atomic_rwlock_wrlock(&LOCK_id_to_share); - /** - @todo RECOVERY BUG: if all slots are used, and we're using rwlocks - above, we will never exit the loop. To be discussed with Serg. - */ - for ( ; ; i= i % SHARE_ID_MAX + 1) /* the range is [1..SHARE_ID_MAX] */ + uint i= share->kfile.file % SHARE_ID_MAX + 1; + do { - void *tmp= NULL; - if (id_to_share[i] == NULL && - my_atomic_casptr((void **)&id_to_share[i], &tmp, share)) - break; - } - my_atomic_rwlock_wrunlock(&LOCK_id_to_share); - share->id= (uint16)i; + my_atomic_rwlock_wrlock(&LOCK_id_to_share); + for ( ; i <= SHARE_ID_MAX ; i++) /* the range is [1..SHARE_ID_MAX] */ + { + void *tmp= NULL; + if (id_to_share[i] == NULL && + my_atomic_casptr((void **)&id_to_share[i], &tmp, share)) + { + share->id= (uint16)i; + break; + } + } + my_atomic_rwlock_wrunlock(&LOCK_id_to_share); + i= 1; /* scan the whole array */ + } while (share->id == 0); DBUG_PRINT("info", ("id_to_share: 0x%lx -> %u", (ulong)share, i)); LSN lsn; LEX_STRING log_array[TRANSLOG_INTERNAL_PARTS + 2]; diff --git a/storage/maria/ma_loghandler.h b/storage/maria/ma_loghandler.h index 22b8cca3a08..f2bfd2c9d7e 100644 --- a/storage/maria/ma_loghandler.h +++ b/storage/maria/ma_loghandler.h @@ -1,4 +1,17 @@ -// TODO copyright +/* Copyright (C) 2007 MySQL AB & Sanja Belkin + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #ifndef _ma_loghandler_h #define _ma_loghandler_h @@ -251,6 +264,9 @@ extern my_bool translog_inited; all the rest added because of recovery; should we make ma_loghandler_for_recovery.h ? */ + +#define SHARE_ID_MAX 65535 /* array's size */ + extern LSN first_lsn_in_log(); /* record parts descriptor */ diff --git a/storage/maria/ma_loghandler_lsn.h b/storage/maria/ma_loghandler_lsn.h index af7594e3b00..34cb7616b74 100644 --- a/storage/maria/ma_loghandler_lsn.h +++ b/storage/maria/ma_loghandler_lsn.h @@ -1,3 +1,18 @@ +/* Copyright (C) 2007 MySQL AB & Sanja Belkin + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + #ifndef _ma_loghandler_lsn_h #define _ma_loghandler_lsn_h diff --git a/storage/maria/maria_read_log.c b/storage/maria/maria_read_log.c index e654b8ea2ac..c8263495fbc 100644 --- a/storage/maria/maria_read_log.c +++ b/storage/maria/maria_read_log.c @@ -48,7 +48,9 @@ static int display_and_apply_record(const LOG_DESC *log_desc, #define prototype_exec_hook(R) \ static int exec_LOGREC_ ## R(const TRANSLOG_HEADER_BUFFER *rec) prototype_exec_hook(LONG_TRANSACTION_ID); +#ifdef MARIA_CHECKPOINT prototype_exec_hook(CHECKPOINT); +#endif prototype_exec_hook(REDO_CREATE_TABLE); prototype_exec_hook(FILE_ID); prototype_exec_hook(REDO_INSERT_ROW_HEAD); @@ -128,7 +130,9 @@ int main(int argc, char **argv) log_record_type_descriptor[LOGREC_ ## R].record_execute_in_redo_phase= \ exec_LOGREC_ ## R; install_exec_hook(LONG_TRANSACTION_ID); +#ifdef MARIA_CHECKPOINT install_exec_hook(CHECKPOINT); +#endif install_exec_hook(REDO_CREATE_TABLE); install_exec_hook(FILE_ID); install_exec_hook(REDO_INSERT_ROW_HEAD); @@ -337,10 +341,11 @@ static void display_record_position(const LOG_DESC *log_desc, if number==0, we're going over records which we had already seen and which form a group, so we indent below the group's end record */ - printf("%sRecord #%u LSN (%lu,0x%lx) short_trid %u %s(num_type:%u)\n", + printf("%sRec#%u LSN (%lu,0x%lx) short_trid %u %s(num_type:%u) len %lu\n", number ? "" : " ", number, (ulong) LSN_FILE_NO(rec->lsn), (ulong) LSN_OFFSET(rec->lsn), - rec->short_trid, log_desc->name, rec->type); + rec->short_trid, log_desc->name, rec->type, + (ulong)rec->record_length); } @@ -367,6 +372,7 @@ prototype_exec_hook(LONG_TRANSACTION_ID) TrID long_trid= all_active_trans[sid].long_trid; /* abort group of this trn (must be of before a crash) */ LSN gslsn= all_active_trans[sid].group_start_lsn; + char llbuf[22]; if (gslsn != LSN_IMPOSSIBLE) { printf("Group at LSN (%lu,0x%lx) short_trid %u aborted\n", @@ -378,16 +384,18 @@ prototype_exec_hook(LONG_TRANSACTION_ID) LSN ulsn= all_active_trans[sid].undo_lsn; if (ulsn != LSN_IMPOSSIBLE) { - fprintf(stderr, "Found an old transaction long_trid %llu short_trid %u" + llstr(long_trid, llbuf); + fprintf(stderr, "Found an old transaction long_trid %s short_trid %u" " with same short id as this new transaction, and has neither" - " committed nor rollback (undo_lsn: (%lu,0x%lx))\n", long_trid, + " committed nor rollback (undo_lsn: (%lu,0x%lx))\n", llbuf, sid, (ulong) LSN_FILE_NO(ulsn), (ulong) LSN_OFFSET(ulsn)); goto err; } } long_trid= uint6korr(rec->header); all_active_trans[sid].long_trid= long_trid; - printf("Transaction long_trid %lu short_trid %u starts\n", long_trid, sid); + llstr(long_trid, llbuf); + printf("Transaction long_trid %s short_trid %u starts\n", llbuf, sid); goto end; err: DBUG_ASSERT(0); @@ -396,11 +404,14 @@ end: return 0; } + +#ifdef MARIA_CHECKPOINT prototype_exec_hook(CHECKPOINT) { /* the only checkpoint we care about was found via control file, ignore */ return 0; } +#endif prototype_exec_hook(REDO_CREATE_TABLE) @@ -600,9 +611,11 @@ prototype_exec_hook(REDO_INSERT_ROW_HEAD) uint16 sid; ulonglong page; MARIA_HA *info; + char llbuf[22]; sid= fileid_korr(rec->header); page= page_korr(rec->header + FILEID_STORE_SIZE); - printf("For page %llu of table of short id %u", page, sid); + llstr(page, llbuf); + printf("For page %s of table of short id %u", llbuf, sid); info= all_tables[sid]; if (info == NULL) { @@ -623,6 +636,16 @@ prototype_exec_hook(REDO_INSERT_ROW_HEAD) assume we have made no checkpoint). */ printf(", applying record\n"); + /* + If REDO's LSN is > page's LSN (read from disk), we are going to modify the + page and change its LSN. The normal runtime code stores the UNDO's LSN + into the page; but here storing the REDO's LSN (rec->lsn) is more + straightforward and should not cause any problem (we are not writing to + the log here, so don't have to "flush up to UNDO's LSN"). + If the UNDO's LSN is desired, it can be found, as we saw the UNDO record + before deciding to execute this REDO; UNDO's LSN could simply be stored in + all_trans[rec->short_trid].group_end_lsn for this. + */ DBUG_ASSERT("Monty" == "this is the place"); end: /* as we don't have apply working: */ @@ -635,14 +658,15 @@ prototype_exec_hook(COMMIT) uint16 sid= rec->short_trid; TrID long_trid= all_active_trans[sid].long_trid; LSN gslsn= all_active_trans[sid].group_start_lsn; - + char llbuf[22]; if (long_trid == 0) { printf("We don't know about transaction short_trid %u;" "it probably committed long ago, forget it\n", sid); return 0; } - printf("Transaction long_trid %lu short_trid %u committed", long_trid, sid); + llstr(long_trid, llbuf); + printf("Transaction long_trid %s short_trid %u committed", llbuf, sid); if (gslsn != LSN_IMPOSSIBLE) { /* @@ -671,7 +695,7 @@ prototype_exec_hook(COMMIT) /* Just to inform about any aborted groups or unfinished transactions */ static void end_of_redo_phase() { - uint sid; + uint sid, unfinished= 0; for (sid= 0; sid <= SHORT_TRID_MAX; sid++) { TrID long_trid= all_active_trans[sid].long_trid; @@ -679,8 +703,12 @@ static void end_of_redo_phase() if (long_trid == 0) continue; if (all_active_trans[sid].undo_lsn != LSN_IMPOSSIBLE) - printf("Transaction long_trid %lu short_trid %u unfinished\n", - long_trid, sid); + { + char llbuf[22]; + llstr(long_trid, llbuf); + printf("Transaction long_trid %s short_trid %u unfinished\n", + llbuf, sid); + } if (gslsn != LSN_IMPOSSIBLE) { printf("Group at LSN (%lu,0x%lx) short_trid %u aborted\n", @@ -694,4 +722,20 @@ static void end_of_redo_phase() */ #endif } + /* + We don't close tables if there are some unfinished transactions, because + closing tables normally requires that all unfinished transactions on them + be rolled back. + For example, closing will soon write the state to disk and when doing that + it will think this is a committed state, but it may not be. + */ + if (unfinished == 0) + { + for (sid= 0; sid <= SHORT_TRID_MAX; sid++) + { + MARIA_HA *info= all_tables[sid]; + if (info != NULL) + maria_close(info); + } + } } -- cgit v1.2.1 From fab8b0a8b6532383540c43e240642732556df597 Mon Sep 17 00:00:00 2001 From: unknown Date: Tue, 26 Jun 2007 22:53:35 +0200 Subject: WL#3072 - Maria recovery comments; remember the UNDO's LSN for storing it in pages when executing REDO's (to imitate what the runtime code does) storage/maria/maria_read_log.c: comments; remember the UNDO's LSN for storing it in pages when executing REDO's (to imitate what the runtime code does) --- storage/maria/maria_read_log.c | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/storage/maria/maria_read_log.c b/storage/maria/maria_read_log.c index c8263495fbc..568814f6f8a 100644 --- a/storage/maria/maria_read_log.c +++ b/storage/maria/maria_read_log.c @@ -38,6 +38,7 @@ struct TRN_FOR_RECOVERY struct TRN_FOR_RECOVERY all_active_trans[SHORT_TRID_MAX + 1]; MARIA_HA *all_tables[SHORT_TRID_MAX + 1]; +LSN current_group_end_lsn= LSN_IMPOSSIBLE; static void end_of_redo_phase(); static void display_record_position(const LOG_DESC *log_desc, @@ -171,6 +172,10 @@ int main(int argc, char **argv) (e.g. a set of REDOs for an operation, terminated by an UNDO for this operation); if there is no "end mark" record the group is incomplete and won't be executed. + There are pitfalls: if a table write failed, the transaction may have + put an incomplete group in the log and then a COMMIT record, that will + make a complete group which is wrong. We say that we should mark the + table corrupted if such error happens (what if it cannot be marked?). */ if (log_desc->record_ends_group) { @@ -195,6 +200,7 @@ int main(int argc, char **argv) fprintf(stderr, "Scanner2 init failed\n"); goto err; } + current_group_end_lsn= rec.lsn; do { if (rec2.short_trid == sid) /* it's in our group */ @@ -215,6 +221,7 @@ int main(int argc, char **argv) translog_free_record_header(&rec2); /* group finished */ all_active_trans[sid].group_start_lsn= LSN_IMPOSSIBLE; + current_group_end_lsn= LSN_IMPOSSIBLE; /* for debugging */ } if (display_and_apply_record(log_desc, &rec)) goto err; @@ -639,12 +646,12 @@ prototype_exec_hook(REDO_INSERT_ROW_HEAD) /* If REDO's LSN is > page's LSN (read from disk), we are going to modify the page and change its LSN. The normal runtime code stores the UNDO's LSN - into the page; but here storing the REDO's LSN (rec->lsn) is more - straightforward and should not cause any problem (we are not writing to - the log here, so don't have to "flush up to UNDO's LSN"). - If the UNDO's LSN is desired, it can be found, as we saw the UNDO record - before deciding to execute this REDO; UNDO's LSN could simply be stored in - all_trans[rec->short_trid].group_end_lsn for this. + into the page. Here storing the REDO's LSN (rec->lsn) would work + (we are not writing to the log here, so don't have to "flush up to UNDO's + LSN"). But in a test scenario where we do updates at runtime, then remove + tables, apply the log and check that this results in the same table as at + runtime, putting the same LSN as runtime had done will decrease + differences. So we use the UNDO's LSN which is current_group_end_lsn. */ DBUG_ASSERT("Monty" == "this is the place"); end: -- cgit v1.2.1 From e0c5a2645c22fed5e9a49ad60c5cd32ed13a7e4f Mon Sep 17 00:00:00 2001 From: unknown Date: Wed, 27 Jun 2007 12:58:08 +0200 Subject: WL#3072 - Maria recovery For this scenario: server crashes (could be because a table is corrupted) and Recovery repeatedly crashes on this table. User repairs it with maria_chk (as REPAIR TABLE is not possible), restarts the server, Recovery runs: for Recovery to not apply old REDOs to this repaired table (which would fail: rows have moved), maria_chk sets create_rename_lsn to the max value. Later when the server opens the table via ha_maria, it sets the LSN to the correct current value. storage/maria/ma_check.c: using helper function storage/maria/ma_create.c: A new helper function which stores the create_rename_lsn into the table's header on disk when we cannot wait for this to happen naturally at a later _ma_state_info_write(). storage/maria/ma_delete_all.c: using helper function; so log_data now can be FILEID_STORE_SIZE. storage/maria/ma_open.c: When opening a transactional table in the server, we discover if it has been repaired with maria_chk and if yes, give it a correct create_rename_lsn. storage/maria/ma_rename.c: using helper function storage/maria/maria_chk.c: By setting create_rename_lsn to the maximum possible LSN, maria_chk ensures that old REDOs are not applied to the new table it is going to produce. storage/maria/maria_def.h: new helper function --- storage/maria/ma_check.c | 7 ++----- storage/maria/ma_create.c | 34 +++++++++++++++++++++++++++++++--- storage/maria/ma_delete_all.c | 8 +++----- storage/maria/ma_open.c | 12 ++++++++++++ storage/maria/ma_rename.c | 9 +++------ storage/maria/maria_chk.c | 7 +++++++ storage/maria/maria_def.h | 5 +++-- 7 files changed, 61 insertions(+), 21 deletions(-) diff --git a/storage/maria/ma_check.c b/storage/maria/ma_check.c index 0fc2b77304d..72054ffe92a 100644 --- a/storage/maria/ma_check.c +++ b/storage/maria/ma_check.c @@ -5200,12 +5200,9 @@ int _ma_repair_write_log_record(const HA_CHECK *param, MARIA_HA *info) and to not apply old REDOs to the new table. The table's existence was made durable earlier (MY_SYNC_DIR passed to maria_change_to_newfile()). */ - lsn_store(log_data, share->state.create_rename_lsn); DBUG_ASSERT(info->dfile.file >= 0); - DBUG_ASSERT(share->kfile.file >= 0); - return (my_pwrite(share->kfile.file, log_data, sizeof(log_data), - sizeof(share->state.header) + 2, MYF(MY_NABP)) || - _ma_sync_table_files(info)); + return _ma_update_create_rename_lsn_on_disk(share, FALSE) || + _ma_sync_table_files(info); } return 0; } diff --git a/storage/maria/ma_create.c b/storage/maria/ma_create.c index 22b490c907c..b439d7760e7 100644 --- a/storage/maria/ma_create.c +++ b/storage/maria/ma_create.c @@ -1009,9 +1009,8 @@ int maria_create(const char *name, enum data_file_type datafile_type, If such direct my_pwrite() to a fixed offset is too "hackish", I can call ma_state_info_write() again but it will be less efficient. */ - lsn_store(log_data, share.state.create_rename_lsn); - if (my_pwrite(file, log_data, LSN_STORE_SIZE, - sizeof(share.state.header) + 2, MYF(MY_NABP))) + share.kfile.file= file; + if (_ma_update_create_rename_lsn_on_disk(&share, FALSE)) goto err_no_lock; my_free(log_data, MYF(0)); } @@ -1163,3 +1162,32 @@ int _ma_initialize_data_file(File dfile, MARIA_SHARE *share) } return 0; } + + +/** + @brief Writes create_rename_lsn to disk, optionally forces + + This is for special cases where: + - we don't want to write the full state to disk (so, not call + _ma_state_info_write()) because some parts of the state may be + currently inconsistent, or because it would be overkill + - we must sync this LSN immediately for correctness. + + @param share table's share + @param do_sync if the write should be forced to disk + + @return Operation status + @retval 0 ok + @retval 1 error (disk problem) +*/ + +int _ma_update_create_rename_lsn_on_disk(MARIA_SHARE *share, my_bool do_sync) +{ + char buf[LSN_STORE_SIZE]; + File file= share->kfile.file; + DBUG_ASSERT(file >= 0); + lsn_store(buf, share->state.create_rename_lsn); + return (my_pwrite(file, buf, sizeof(buf), + sizeof(share->state.header) + 2, MYF(MY_NABP)) || + (do_sync && my_sync(file, MYF(0)))); +} diff --git a/storage/maria/ma_delete_all.c b/storage/maria/ma_delete_all.c index 7286f540aa1..a08e259d09b 100644 --- a/storage/maria/ma_delete_all.c +++ b/storage/maria/ma_delete_all.c @@ -89,9 +89,9 @@ int maria_delete_all_rows(MARIA_HA *info) { /* For now this record is only informative */ LEX_STRING log_array[TRANSLOG_INTERNAL_PARTS + 1]; - uchar log_data[LSN_STORE_SIZE]; + uchar log_data[FILEID_STORE_SIZE]; log_array[TRANSLOG_INTERNAL_PARTS + 0].str= (char*) log_data; - log_array[TRANSLOG_INTERNAL_PARTS + 0].length= FILEID_STORE_SIZE; + log_array[TRANSLOG_INTERNAL_PARTS + 0].length= sizeof(log_data); if (unlikely(translog_write_record(&share->state.create_rename_lsn, LOGREC_REDO_DELETE_ALL, info->trn, share, 0, @@ -106,9 +106,7 @@ int maria_delete_all_rows(MARIA_HA *info) Note that storing the LSN could not be done by _ma_writeinfo() above as the table is locked at this moment. So we need to do it by ourselves. */ - lsn_store(log_data, share->state.create_rename_lsn); - if (my_pwrite(share->kfile.file, log_data, sizeof(log_data), - sizeof(share->state.header) + 2, MYF(MY_NABP)) || + if (_ma_update_create_rename_lsn_on_disk(share, FALSE) || _ma_sync_table_files(info)) goto err; /** diff --git a/storage/maria/ma_open.c b/storage/maria/ma_open.c index 4e72adf3b7e..5cd2bfbb838 100644 --- a/storage/maria/ma_open.c +++ b/storage/maria/ma_open.c @@ -587,7 +587,19 @@ MARIA_HA *maria_open(const char *name, int mode, uint open_flags) share->base.pack_bytes + test(share->options & HA_OPTION_CHECKSUM)); if (share->base.transactional) + { share->base_length+= TRANS_ROW_EXTRA_HEADER_SIZE; + if (unlikely((share->state.create_rename_lsn == (LSN)ULONGLONG_MAX) && + (open_flags & HA_OPEN_FROM_SQL_LAYER))) + { + /* + This table was repaired with maria_chk. Past log records should be + ignored, future log records should not: we define the present. + */ + share->state.create_rename_lsn= translog_get_horizon(); + _ma_update_create_rename_lsn_on_disk(share, TRUE); + } + } share->base.default_rec_buff_size= max(share->base.pack_reclength, share->base.max_key_length); share->page_type= (share->base.transactional ? PAGECACHE_LSN_PAGE : diff --git a/storage/maria/ma_rename.c b/storage/maria/ma_rename.c index 5224698c614..3f2a0a9002c 100644 --- a/storage/maria/ma_rename.c +++ b/storage/maria/ma_rename.c @@ -60,13 +60,13 @@ int maria_rename(const char *old_name, const char *new_name) MY_SYNC_DIR : 0; if (sync_dir) { - uchar log_data[LSN_STORE_SIZE]; + uchar log_data[2 + 2]; LEX_STRING log_array[TRANSLOG_INTERNAL_PARTS + 3]; uint old_name_len= strlen(old_name), new_name_len= strlen(new_name); int2store(log_data, old_name_len); int2store(log_data + 2, new_name_len); log_array[TRANSLOG_INTERNAL_PARTS + 0].str= log_data; - log_array[TRANSLOG_INTERNAL_PARTS + 0].length= 2 + 2; + log_array[TRANSLOG_INTERNAL_PARTS + 0].length= sizeof(log_data); log_array[TRANSLOG_INTERNAL_PARTS + 1].str= (char *)old_name; log_array[TRANSLOG_INTERNAL_PARTS + 1].length= old_name_len; log_array[TRANSLOG_INTERNAL_PARTS + 2].str= (char *)new_name; @@ -93,10 +93,7 @@ int maria_rename(const char *old_name, const char *new_name) store LSN into file, needed for Recovery to not be confused if a RENAME happened (applying REDOs to the wrong table). */ - lsn_store(log_data, share->state.create_rename_lsn); - if (my_pwrite(share->kfile.file, log_data, sizeof(log_data), - sizeof(share->state.header) + 2, MYF(MY_NABP)) || - my_sync(share->kfile.file, MYF(MY_WME))) + if (_ma_update_create_rename_lsn_on_disk(share, TRUE)) { maria_close(info); DBUG_RETURN(1); diff --git a/storage/maria/maria_chk.c b/storage/maria/maria_chk.c index 0b82a71f736..9019cc33295 100644 --- a/storage/maria/maria_chk.c +++ b/storage/maria/maria_chk.c @@ -1026,6 +1026,13 @@ static int maria_chk(HA_CHECK *param, my_string filename) } if (!error) { + /* + Tell the server's Recovery to ignore old REDOs on this table; we don't + know what the log's end LSN is now, so we just let the server know + that it will have to find and store it. + */ + if (share->base.transactional) + share->state.create_rename_lsn= (LSN)ULONGLONG_MAX; if ((param->testflag & (T_REP_BY_SORT | T_REP_PARALLEL)) && (maria_is_any_key_active(share->state.key_map) || (rep_quick && !param->keys_in_use && !recreate)) && diff --git a/storage/maria/maria_def.h b/storage/maria/maria_def.h index 740808c7bbe..39b8ba2292c 100644 --- a/storage/maria/maria_def.h +++ b/storage/maria/maria_def.h @@ -886,13 +886,13 @@ void _ma_remap_file(MARIA_HA *info, my_off_t size); MARIA_RECORD_POS _ma_write_init_default(MARIA_HA *info, const byte *record); my_bool _ma_write_abort_default(MARIA_HA *info); -/* Functions needed by _ma_check (are overrided in MySQL) */ C_MODE_START +int _ma_repair_write_log_record(const HA_CHECK *param, MARIA_HA *info); +/* Functions needed by _ma_check (are overrided in MySQL) */ volatile int *_ma_killed_ptr(HA_CHECK *param); void _ma_check_print_error _VARARGS((HA_CHECK *param, const char *fmt, ...)); void _ma_check_print_warning _VARARGS((HA_CHECK *param, const char *fmt, ...)); void _ma_check_print_info _VARARGS((HA_CHECK *param, const char *fmt, ...)); -int _ma_repair_write_log_record(const HA_CHECK *param, MARIA_HA *info); C_MODE_END int _ma_flush_pending_blocks(MARIA_SORT_PARAM *param); @@ -909,6 +909,7 @@ int _ma_create_index_by_sort(MARIA_SORT_PARAM *info, my_bool no_messages, ulong); int _ma_sync_table_files(const MARIA_HA *info); int _ma_initialize_data_file(File dfile, MARIA_SHARE *share); +int _ma_update_create_rename_lsn_on_disk(MARIA_SHARE *share, my_bool do_sync); void _ma_unpin_all_pages(MARIA_HA *info, LSN undo_lsn); -- cgit v1.2.1 From 34d15a59d8b3ad13ca9c090be91353eb519b29c0 Mon Sep 17 00:00:00 2001 From: unknown Date: Thu, 28 Jun 2007 14:01:57 +0200 Subject: WL#3239 "log CREATE TABLE in Maria": write the log record _before_ creating the data file, and sync this log, so that the table cannot be used if log record didn't reach disk. The same way, we force the log in DROP/RENAME TABLE. Also in REPAIR TABLE though logging in this case is not polished. Making DELETE FROM t atomic: we log the record before starting the operation, and will finish this op at Recovery if needed. storage/maria/ma_check.c: comment. Force the log record for the log to have a complete history. storage/maria/ma_create.c: better conformance to the text of WL#3239 "log CREATE TABLE in Maria": write the log record before creating the data file. This ensures that the log can be applied to an old backup in all circumstances. errpos=2 was wrong. storage/maria/ma_delete_all.c: making DELETE FROM t atomic: we log the record before starting the operation, and will finish the operation at Recovery if needed. Thus there is no need to force files to disk. storage/maria/ma_delete_table.c: forcing the log before dropping a table, so that the log has the entire history. storage/maria/ma_loghandler.c: LOGREC_REDO_DELETE_ALL needs to set trn's rec_lsn so that the log's low-water mark and Checkpoint retain this record until the delete operation has finished. storage/maria/ma_rename.c: force the log before renaming a table, so that the log has a complete history. --- storage/maria/ma_check.c | 21 +++++++- storage/maria/ma_create.c | 112 +++++++++++++++++++++------------------- storage/maria/ma_delete_all.c | 65 +++++++++++------------ storage/maria/ma_delete_table.c | 9 ++-- storage/maria/ma_loghandler.c | 2 +- storage/maria/ma_rename.c | 7 +-- 6 files changed, 118 insertions(+), 98 deletions(-) diff --git a/storage/maria/ma_check.c b/storage/maria/ma_check.c index 72054ffe92a..cd10e87325c 100644 --- a/storage/maria/ma_check.c +++ b/storage/maria/ma_check.c @@ -5176,7 +5176,23 @@ int _ma_repair_write_log_record(const HA_CHECK *param, MARIA_HA *info) /* Only called from ha_maria.cc, not maria_check, so translog is inited */ if (share->base.transactional && !share->temporary) { - /* For now this record is only informative */ + /* + For now this record is only informative. It could serve when applying + logs to a backup, but that needs more thought. Assume table became + corrupted. It is repaired, then some writes happen to it. + Later we restore an old backup, and want to apply this REDO_REPAIR_TABLE + record. For it to give the same result as originally, the table should + be corrupted the same way, so applying previous REDOs should produce the + same corruption; that's really not guaranteed (different execution paths + in execution of REDOs vs runtime code so not same bugs hit, temporary + hardware issues not repeatable etc). Corruption may not be repeatable. + A reasonable solution is to execute the REDO_REPAIR_TABLE record and + check if the checksum of the resulting table matches what it was at the + end of the original repair (should be stored in log record); or execute + the REDO_REPAIR_TABLE if the checksum of the table-before-repair matches + was it was at the start of the original repair (should be stored in log + record). + */ LEX_STRING log_array[TRANSLOG_INTERNAL_PARTS + 1]; uchar log_data[LSN_STORE_SIZE]; compile_time_assert(LSN_STORE_SIZE >= (FILEID_STORE_SIZE + 4)); @@ -5193,7 +5209,8 @@ int _ma_repair_write_log_record(const HA_CHECK *param, MARIA_HA *info) log_array[TRANSLOG_INTERNAL_PARTS + 0].length, sizeof(log_array)/sizeof(log_array[0]), - log_array, log_data))) + log_array, log_data) || + translog_flush(share->state.create_rename_lsn))) return 1; /* But this piece is really needed, to have the new table's content durable diff --git a/storage/maria/ma_create.c b/storage/maria/ma_create.c index b439d7760e7..8ad8f0564d7 100644 --- a/storage/maria/ma_create.c +++ b/storage/maria/ma_create.c @@ -620,7 +620,7 @@ int maria_create(const char *name, enum data_file_type datafile_type, mi_int2store(share.state.header.state_info_length,MARIA_STATE_INFO_SIZE); mi_int2store(share.state.header.base_info_length,MARIA_BASE_INFO_SIZE); mi_int2store(share.state.header.base_pos,base_pos); - share.state.header.data_file_type= datafile_type; + share.state.header.data_file_type= share.data_file_type= datafile_type; share.state.header.org_data_file_type= org_datafile_type; share.state.header.language= (ci->language ? ci->language : default_charset_info->number); @@ -766,50 +766,6 @@ int maria_create(const char *name, enum data_file_type datafile_type, goto err; errpos=1; - if (!(flags & HA_DONT_TOUCH_DATA)) - { - if (ci->data_file_name) - { - char *dext= strrchr(ci->data_file_name, '.'); - int have_dext= dext && !strcmp(dext, MARIA_NAME_DEXT); - - if (tmp_table) - { - char *path; - /* chop off the table name, tempory tables use generated name */ - if ((path= strrchr(ci->data_file_name, FN_LIBCHAR))) - *path= '\0'; - fn_format(filename, name, ci->data_file_name, MARIA_NAME_DEXT, - MY_REPLACE_DIR | MY_UNPACK_FILENAME | MY_APPEND_EXT); - } - else - { - fn_format(filename, ci->data_file_name, "", MARIA_NAME_DEXT, - MY_UNPACK_FILENAME | - (have_dext ? MY_REPLACE_EXT : MY_APPEND_EXT)); - } - fn_format(linkname, name, "",MARIA_NAME_DEXT, - MY_UNPACK_FILENAME | MY_APPEND_EXT); - linkname_ptr= linkname; - create_flag=0; - } - else - { - fn_format(filename,name,"", MARIA_NAME_DEXT, - MY_UNPACK_FILENAME | MY_APPEND_EXT); - linkname_ptr= NULL; - create_flag=MY_DELETE_OLD; - } - if ((dfile= - my_create_with_symlink(linkname_ptr, filename, 0, create_mode, - MYF(MY_WME | create_flag | sync_dir))) < 0) - goto err; - errpos=3; - - share.data_file_type= datafile_type; - if (_ma_initialize_data_file(dfile, &share)) - goto err; - } DBUG_PRINT("info", ("write state info and base info")); if (_ma_state_info_write(file, &share.state, 2) || _ma_base_info_write(file, &share.base)) @@ -959,7 +915,7 @@ int maria_create(const char *name, enum data_file_type datafile_type, if ((log_data == NULL) || my_pread(file, 1 + 2 + 2 + log_data, kfile_size_before_extension, 0, MYF(MY_NABP))) - goto err_no_lock; + goto err; /* remember if the data file was created or not, to know if Recovery can do it or not, in the future @@ -989,8 +945,14 @@ int maria_create(const char *name, enum data_file_type datafile_type, MySQL layer to be crash-safe, which it is not now (that would require work using the ddl_log of sql/sql_table.cc); when it is, we should reconsider the moment of writing this log record (before or after op, - under THR_LOCK_maria or not...), how to use it in Recovery, and force - the log. For now this record is just informative. + under THR_LOCK_maria or not...), how to use it in Recovery. + For now this record can serve when we apply logs to a backup, + so we sync it. This happens before the data file is created. If the data + file was created before, and we crashed before writing the log record, + at restart the table may be used, so we would not have a trustable + history in the log (impossible to apply this log to a backup). The way + we do it, if we crash before writing the log record then there is no + data file and the table cannot be used. Note that in case of TRUNCATE TABLE we also come here. When in CREATE/TRUNCATE (or DROP or RENAME or REPAIR) we have not called external_lock(), so have no TRN. It does not matter, as all these @@ -1001,20 +963,63 @@ int maria_create(const char *name, enum data_file_type datafile_type, &dummy_transaction_object, NULL, total_rec_length, sizeof(log_array)/sizeof(log_array[0]), - log_array, NULL))) - goto err_no_lock; + log_array, NULL) || + translog_flush(share.state.create_rename_lsn))) + goto err; /* store LSN into file, needed for Recovery to not be confused if a DROP+CREATE happened (applying REDOs to the wrong table). - If such direct my_pwrite() to a fixed offset is too "hackish", I can - call ma_state_info_write() again but it will be less efficient. */ share.kfile.file= file; if (_ma_update_create_rename_lsn_on_disk(&share, FALSE)) - goto err_no_lock; + goto err; my_free(log_data, MYF(0)); } + if (!(flags & HA_DONT_TOUCH_DATA)) + { + if (ci->data_file_name) + { + char *dext= strrchr(ci->data_file_name, '.'); + int have_dext= dext && !strcmp(dext, MARIA_NAME_DEXT); + + if (tmp_table) + { + char *path; + /* chop off the table name, tempory tables use generated name */ + if ((path= strrchr(ci->data_file_name, FN_LIBCHAR))) + *path= '\0'; + fn_format(filename, name, ci->data_file_name, MARIA_NAME_DEXT, + MY_REPLACE_DIR | MY_UNPACK_FILENAME | MY_APPEND_EXT); + } + else + { + fn_format(filename, ci->data_file_name, "", MARIA_NAME_DEXT, + MY_UNPACK_FILENAME | + (have_dext ? MY_REPLACE_EXT : MY_APPEND_EXT)); + } + fn_format(linkname, name, "",MARIA_NAME_DEXT, + MY_UNPACK_FILENAME | MY_APPEND_EXT); + linkname_ptr= linkname; + create_flag=0; + } + else + { + fn_format(filename,name,"", MARIA_NAME_DEXT, + MY_UNPACK_FILENAME | MY_APPEND_EXT); + linkname_ptr= NULL; + create_flag=MY_DELETE_OLD; + } + if ((dfile= + my_create_with_symlink(linkname_ptr, filename, 0, create_mode, + MYF(MY_WME | create_flag | sync_dir))) < 0) + goto err; + errpos=3; + + if (_ma_initialize_data_file(dfile, &share)) + goto err; + } + /* Enlarge files */ DBUG_PRINT("info", ("enlarge to keystart: %lu", (ulong) share.base.keystart)); @@ -1030,7 +1035,6 @@ int maria_create(const char *name, enum data_file_type datafile_type, if (my_chsize(dfile,share.base.min_pack_length*ci->reloc_rows,0,MYF(0))) goto err; #endif - errpos=2; if ((sync_dir && my_sync(dfile, MYF(0))) || my_close(dfile,MYF(0))) goto err; } diff --git a/storage/maria/ma_delete_all.c b/storage/maria/ma_delete_all.c index a08e259d09b..3e531b518f8 100644 --- a/storage/maria/ma_delete_all.c +++ b/storage/maria/ma_delete_all.c @@ -17,7 +17,7 @@ /* This clears the status information and truncates files */ #include "maria_def.h" -#include "trnman_public.h" +#include "trnman.h" /** @brief deletes all rows from a table @@ -52,6 +52,25 @@ int maria_delete_all_rows(MARIA_HA *info) if (_ma_mark_file_changed(info)) goto err; + if (log_record) + { + /* + This record will be used by Recovery to finish the deletion if it + crashed. We force it because it's a non-undoable operation. + */ + LSN lsn; + LEX_STRING log_array[TRANSLOG_INTERNAL_PARTS + 1]; + uchar log_data[FILEID_STORE_SIZE]; + log_array[TRANSLOG_INTERNAL_PARTS + 0].str= (char*) log_data; + log_array[TRANSLOG_INTERNAL_PARTS + 0].length= sizeof(log_data); + if (unlikely(translog_write_record(&lsn, LOGREC_REDO_DELETE_ALL, + info->trn, share, 0, + sizeof(log_array)/sizeof(log_array[0]), + log_array, log_data) || + translog_flush(lsn))) + goto err; + } + info->state->records=info->state->del=state->split=0; state->changed= 0; /* File is optimized */ state->dellink = HA_OFFSET_ERROR; @@ -78,6 +97,12 @@ int maria_delete_all_rows(MARIA_HA *info) if (_ma_initialize_data_file(info->dfile.file, share)) goto err; + /* + The operations above on the index/data file will be forced to disk at + Checkpoint or maria_close() time. So we can reset: + */ + info->trn->rec_lsn= LSN_IMPOSSIBLE; + VOID(_ma_writeinfo(info,WRITEINFO_UPDATE_KEYFILE)); #ifdef HAVE_MMAP /* Resize mmaped area */ @@ -85,36 +110,6 @@ int maria_delete_all_rows(MARIA_HA *info) _ma_remap_file(info, (my_off_t)0); rw_unlock(&info->s->mmap_lock); #endif - if (log_record) - { - /* For now this record is only informative */ - LEX_STRING log_array[TRANSLOG_INTERNAL_PARTS + 1]; - uchar log_data[FILEID_STORE_SIZE]; - log_array[TRANSLOG_INTERNAL_PARTS + 0].str= (char*) log_data; - log_array[TRANSLOG_INTERNAL_PARTS + 0].length= sizeof(log_data); - if (unlikely(translog_write_record(&share->state.create_rename_lsn, - LOGREC_REDO_DELETE_ALL, - info->trn, share, 0, - sizeof(log_array)/sizeof(log_array[0]), - log_array, log_data))) - goto err; - /* - store LSN into file. It is an optimization so that all old REDOs for - this table are ignored (scenario: checkpoint, INSERT1s, DELETE ALL; - INSERT2s, crash: then Recovery can skip INSERT1s). It also allows us to - ignore the present record at Recovery. - Note that storing the LSN could not be done by _ma_writeinfo() above as - the table is locked at this moment. So we need to do it by ourselves. - */ - if (_ma_update_create_rename_lsn_on_disk(share, FALSE) || - _ma_sync_table_files(info)) - goto err; - /** - @todo RECOVERY Until we take into account the log record above - for log-low-water-mark calculation and use it in Recovery, we need - to sync above. - */ - } allow_break(); /* Allow SIGHUP & SIGINT */ DBUG_RETURN(0); @@ -123,9 +118,11 @@ err: int save_errno=my_errno; VOID(_ma_writeinfo(info,WRITEINFO_UPDATE_KEYFILE)); info->update|=HA_STATE_WRITTEN; /* Buffer changed */ - /** @todo RECOVERY until we use the log record above we have to sync */ - if (log_record &&_ma_sync_table_files(info) && !save_errno) - save_errno= my_errno; + /** + @todo RECOVERY if we come here, Recovery may later apply the REDO above, + which may be wrong. Not fixing it now, as anyway this way of deleting + rows will have to be re-examined when we have versioning. + */ allow_break(); /* Allow SIGHUP & SIGINT */ DBUG_RETURN(my_errno=save_errno); } diff --git a/storage/maria/ma_delete_table.c b/storage/maria/ma_delete_table.c index 990714043bf..39a286ad1f7 100644 --- a/storage/maria/ma_delete_table.c +++ b/storage/maria/ma_delete_table.c @@ -78,9 +78,9 @@ int maria_delete_table(const char *name) { /* For this log record to be of any use for Recovery, we need the upper - MySQL layer to be crash-safe in DDLs; when it is we should reconsider - the moment of writing this log record, how to use it in Recovery, and - force the log. For now this record is only informative. + MySQL layer to be crash-safe in DDLs. + For now this record can serve when we apply logs to a backup, so we sync + it. */ LSN lsn; LEX_STRING log_array[TRANSLOG_INTERNAL_PARTS + 1]; @@ -91,7 +91,8 @@ int maria_delete_table(const char *name) log_array[TRANSLOG_INTERNAL_PARTS + 0].length, sizeof(log_array)/sizeof(log_array[0]), - log_array, NULL))) + log_array, NULL) || + translog_flush(lsn))) DBUG_RETURN(1); } diff --git a/storage/maria/ma_loghandler.c b/storage/maria/ma_loghandler.c index 79bf44046b1..3a8e01da09a 100644 --- a/storage/maria/ma_loghandler.c +++ b/storage/maria/ma_loghandler.c @@ -375,7 +375,7 @@ static LOG_DESC INIT_LOGREC_REDO_DROP_TABLE= static LOG_DESC INIT_LOGREC_REDO_DELETE_ALL= {LOGRECTYPE_FIXEDLENGTH, FILEID_STORE_SIZE, FILEID_STORE_SIZE, - NULL, NULL, NULL, 0, + NULL, write_hook_for_redo, NULL, 0, "redo_delete_all", TRUE, NULL, NULL}; static LOG_DESC INIT_LOGREC_REDO_REPAIR_TABLE= diff --git a/storage/maria/ma_rename.c b/storage/maria/ma_rename.c index 3f2a0a9002c..8f42a5b931a 100644 --- a/storage/maria/ma_rename.c +++ b/storage/maria/ma_rename.c @@ -76,15 +76,16 @@ int maria_rename(const char *old_name, const char *new_name) MySQL layer to be crash-safe, which it is not now (that would require work using the ddl_log of sql/sql_table.cc); when it is, we should reconsider the moment of writing this log record (before or after op, - under THR_LOCK_maria or not...), how to use it in Recovery, and force - the log. For now this record is just informative. + under THR_LOCK_maria or not...), how to use it in Recovery. + For now it can serve to apply logs to a backup so we sync it. */ if (unlikely(translog_write_record(&share->state.create_rename_lsn, LOGREC_REDO_RENAME_TABLE, &dummy_transaction_object, NULL, 2 + 2 + old_name_len + new_name_len, sizeof(log_array)/sizeof(log_array[0]), - log_array, NULL))) + log_array, NULL) || + translog_flush(share->state.create_rename_lsn))) { maria_close(info); DBUG_RETURN(1); -- cgit v1.2.1 From 37a0005ec5fdcc9abd90350c9be4760e4e1d823f Mon Sep 17 00:00:00 2001 From: unknown Date: Sun, 1 Jul 2007 16:20:57 +0300 Subject: Fixed REPAIR/CHECK/ANALYZE TABLE for tables with new BLOCK-ROW format. Fixed maria_chk to repair BLOCK-ROW tables. Added CREATE options ROW_FORMAT=PAGE & TRANSACTIONAL= 0|1 More DBUG information in a lot of functions Some minor code cleanups Enable handler errors earlier for better clear text error messages at handler startup / standalone usage. Don't print NULL strings in my_create_with_symlink(); Fixes core dump when used with --debug include/maria.h: Added extra variables needed for REPAIR with BLOCK records include/my_base.h: Added argument for opening copy of maria table without a shared object include/my_handler.h: Prototypes for my_handler_error_register() & my_handler_error_unregister() include/pagecache.h: Added PAGECACHE_READ_UNKNOWN_PAGE mysql-test/include/ps_conv.inc: Enforce creation of table as MyISAM (to allow one to use --default-storage-engine) mysql-test/r/maria.result: Moved some things to maria-connect.test Updared results as REPAIR now works Added tests for creation option TRANSACTIONAL mysql-test/r/ps_2myisam.result: Enforce creation of table as MyISAM (to allow one to use --default-storage-engine) mysql-test/r/ps_3innodb.result: Enforce creation of table as MyISAM (to allow one to use --default-storage-engine) mysql-test/r/ps_4heap.result: Enforce creation of table as MyISAM (to allow one to use --default-storage-engine) mysql-test/r/ps_5merge.result: Enforce creation of table as MyISAM (to allow one to use --default-storage-engine) mysql-test/r/ps_7ndb.result: Enforce creation of table as MyISAM (to allow one to use --default-storage-engine) mysql-test/r/ps_maria.result: Enforce creation of table as MyISAM (to allow one to use --default-storage-engine) mysql-test/t/maria.test: Moved some things to maria-connect.test Updared results as REPAIR now works Added tests for creation option TRANSACTIONAL mysys/mf_iocache.c: More debugging mysys/mf_tempfile.c: Added missing close() mysys/my_error.c: init_glob_errs() is now done in my_init() mysys/my_handler.c: Added functions to initialize handler error messages mysys/my_init.c: Moevd init_glob_errs() here. mysys/my_open.c: More comments More debugging Code cleanup (join multiple code paths) and indentation fixes. No change in logic. mysys/my_symlink2.c: Don't print NULL strings sql/handler.cc: Added printing of PAGE row type Moved out initializing of handler errors to allow handler to give better error messages at startup sql/handler.h: ROW_TYPE_PAGES -> ROW_TYPE_PAGE sql/lex.h: Added 'PAGE' and 'TRANSACTIONAL' sql/mysqld.cc: Initialize handler error messages early to get better error messages from handler startup sql/sql_show.cc: ROW_TYPE_PAGES -> ROW_TYPE_PAGE sql/sql_table.cc: Removed not needed initializer sql/sql_yacc.yy: Added CREATE options ROW_FORMAT=PAGE and TRANSACTIONAL=[0|1] sql/table.cc: Store transactional flag in .frm More comments sql-bench/example: Better example sql/table.h: Added transactional table option storage/maria/ha_maria.cc: More debug information Enable REPAIR Detect usage of TRANSACTIONAL table option storage/maria/ma_bitmap.c: More comments (from Guilhem) storage/maria/ma_blockrec.c: SANITY_CHECK -> SANITY_CHECKS (fixed typo) Write out pages on delete even if there is no rows. (Fixed problem with REPAIR) Removed some ASSERTS to runtime checks (for better REPAIR) Fixed bug when scanning rows More DBUG information storage/maria/ma_check.c: Partial rewrite to allow REPAIR of BLOCK/PAGE format. Repair of BLOCK format rows is for now only done with 'maria_repair()' (= repair through key cache) The new logic to repair rows with BLOCK format is: - Create new, unrelated MARIA_HA of the table - Create new datafile and associate it with new handler - Reset all statistic information in new handler - Copy all data to new handler with normal write operations - Move state of new handler to old handler - Close new handler - Close data file in old handler - Rename old data file to new data file. - Reopen data file in old handler storage/maria/ma_close.c: REmoved not needed block storage/maria/ma_create.c: Swap arguments to _ma_initialize_data_file() storage/maria/ma_delete_all.c: Split maria_delete_all_rows() to two functions to allow REPAIR to easily reset all status information. storage/maria/ma_dynrec.c: Added checksum argument to _ma_rec_check (multi-thread fix) storage/maria/ma_info.c: Indentation fix storage/maria/ma_init.c: Register error message to get better error message on init and when using as standalone module. storage/maria/ma_loghandler.c: Fixed typo that disabled some error detection by valgrind storage/maria/ma_open.c: Added 'calc_check_checksum()' Don't log things during repair Added option HA_OPEN_COPY to allow one to open a Maria table with an independent share (required by REPAIR) storage/maria/ma_pagecache.c: Fixed some compiler warnings Added support for PAGECACHE_READ_UNKNOWN_PAGE (used for scanning file without knowing page types) storage/maria/ma_test_all.sh: More test of REPAIR storage/maria/ma_update.c: Optimized checksum code storage/maria/maria_chk.c: Use DBUG_SET_INITIAL() to get DBUG to work with --parallel-repair Ensure we always use maria_repair() for BLOCK format (for now) More DBUG information storage/maria/maria_def.h: For now, always run with more checkings (SANITY_CHECKS) Added share->calc_check_checksum to be used with REPAIR / CHECK table. Swaped arguments to _ma_initialize_data_file() storage/myisam/ft_stopwords.c: Added DBUG information mysql-test/r/maria-connect.result: New BitKeeper file ``mysql-test/r/maria-connect.result'' mysql-test/t/maria-connect.test: New BitKeeper file ``mysql-test/t/maria-connect.test'' --- include/maria.h | 5 +- include/my_base.h | 1 + include/my_handler.h | 3 +- include/pagecache.h | 4 +- mysql-test/include/ps_conv.inc | 2 +- mysql-test/r/maria-connect.result | 23 ++ mysql-test/r/maria.result | 73 +++-- mysql-test/r/ps_2myisam.result | 2 +- mysql-test/r/ps_3innodb.result | 2 +- mysql-test/r/ps_4heap.result | 2 +- mysql-test/r/ps_5merge.result | 4 +- mysql-test/r/ps_7ndb.result | 2 +- mysql-test/r/ps_maria.result | 2 +- mysql-test/t/maria-connect.test | 39 +++ mysql-test/t/maria.test | 42 +-- mysys/mf_iocache.c | 1 + mysys/mf_tempfile.c | 1 + mysys/my_error.c | 5 - mysys/my_handler.c | 66 +++++ mysys/my_init.c | 1 + mysys/my_open.c | 73 ++--- mysys/my_symlink2.c | 4 +- sql-bench/example | 9 +- sql/handler.cc | 8 +- sql/handler.h | 7 +- sql/lex.h | 2 + sql/mysqld.cc | 4 + sql/sql_show.cc | 9 +- sql/sql_table.cc | 3 +- sql/sql_yacc.yy | 11 + sql/table.cc | 8 +- sql/table.h | 1 + storage/maria/ha_maria.cc | 31 ++- storage/maria/ma_bitmap.c | 13 + storage/maria/ma_blockrec.c | 54 ++-- storage/maria/ma_check.c | 570 +++++++++++++++++++++++++++++--------- storage/maria/ma_close.c | 4 +- storage/maria/ma_create.c | 4 +- storage/maria/ma_delete_all.c | 54 ++-- storage/maria/ma_dynrec.c | 5 +- storage/maria/ma_info.c | 1 + storage/maria/ma_init.c | 1 + storage/maria/ma_loghandler.c | 4 +- storage/maria/ma_open.c | 8 +- storage/maria/ma_pagecache.c | 14 +- storage/maria/ma_test_all.sh | 21 ++ storage/maria/ma_update.c | 3 +- storage/maria/maria_chk.c | 42 +-- storage/maria/maria_def.h | 11 +- storage/myisam/ft_stopwords.c | 17 +- 50 files changed, 944 insertions(+), 332 deletions(-) create mode 100644 mysql-test/r/maria-connect.result create mode 100644 mysql-test/t/maria-connect.test diff --git a/include/maria.h b/include/maria.h index fbf4bc68c29..c7c32ff0e78 100644 --- a/include/maria.h +++ b/include/maria.h @@ -327,17 +327,18 @@ typedef struct st_maria_sort_info pthread_mutex_t mutex; pthread_cond_t cond; #endif - MARIA_HA *info; + MARIA_HA *info, *new_info; HA_CHECK *param; char *buff; SORT_KEY_BLOCKS *key_block, *key_block_end; SORT_FT_BUF *ft_buf; my_off_t filelength, dupp, buff_length; + ulonglong page; ha_rows max_records; uint current_key, total_keys; uint got_error, threads_running; myf myf_rw; - enum data_file_type new_data_file_type; + enum data_file_type new_data_file_type, org_data_file_type; } MARIA_SORT_INFO; typedef struct st_maria_sort_param diff --git a/include/my_base.h b/include/my_base.h index 38376adfe85..952c325f911 100644 --- a/include/my_base.h +++ b/include/my_base.h @@ -47,6 +47,7 @@ #define HA_OPEN_FOR_REPAIR 32 /* open even if crashed */ #define HA_OPEN_FROM_SQL_LAYER 64 #define HA_OPEN_MMAP 128 /* open memory mapped */ +#define HA_OPEN_COPY 256 /* Open copy (for repair) */ /* The following is parameter to ha_rkey() how to use key */ diff --git a/include/my_handler.h b/include/my_handler.h index 13dcd01a332..1a1235d0588 100644 --- a/include/my_handler.h +++ b/include/my_handler.h @@ -110,7 +110,8 @@ extern int ha_key_cmp(register HA_KEYSEG *keyseg, register uchar *a, uint *diff_pos); extern HA_KEYSEG *ha_find_null(HA_KEYSEG *keyseg, uchar *a); - +extern void my_handler_error_register(void); +extern void my_handler_error_unregister(void); /* Inside an in-memory data record, memory pointers to pieces of the record (like BLOBs) are stored in their native byte order and in diff --git a/include/pagecache.h b/include/pagecache.h index 12906676959..f505dec0fe9 100644 --- a/include/pagecache.h +++ b/include/pagecache.h @@ -34,7 +34,9 @@ enum pagecache_page_type /* the page does not contain LSN */ PAGECACHE_PLAIN_PAGE, /* the page contain LSN (maria tablespace page) */ - PAGECACHE_LSN_PAGE + PAGECACHE_LSN_PAGE, + /* Page type used when scanning file and we don't care about the type */ + PAGECACHE_READ_UNKNOWN_PAGE }; /* diff --git a/mysql-test/include/ps_conv.inc b/mysql-test/include/ps_conv.inc index 0dd819f6e62..b0c0f9bd9e0 100644 --- a/mysql-test/include/ps_conv.inc +++ b/mysql-test/include/ps_conv.inc @@ -52,7 +52,7 @@ set @arg14= 'abc'; set @arg14= NULL ; set @arg15= CAST('abc' as binary) ; set @arg15= NULL ; -create table t5 as select +eval create table t5 engine = MyISAM as select 8 as const01, @arg01 as param01, 8.0 as const02, @arg02 as param02, 80.00000000000e-1 as const03, @arg03 as param03, diff --git a/mysql-test/r/maria-connect.result b/mysql-test/r/maria-connect.result new file mode 100644 index 00000000000..e232f564d10 --- /dev/null +++ b/mysql-test/r/maria-connect.result @@ -0,0 +1,23 @@ +set global storage_engine=maria; +set session storage_engine=maria; +drop table if exists t1; +SET SQL_WARNINGS=1; +RESET MASTER; +set binlog_format=statement; +CREATE TABLE t1 (a int primary key); +insert t1 values (1),(2),(3); +insert t1 values (4),(2),(5); +ERROR 23000: Duplicate entry '2' for key 'PRIMARY' +select * from t1; +a +1 +2 +3 +4 +SHOW BINLOG EVENTS FROM 102; +Log_name Pos Event_type Server_id End_log_pos Info +master-bin.000001 102 Query 1 200 use `test`; CREATE TABLE t1 (a int primary key) +master-bin.000001 200 Query 1 291 use `test`; insert t1 values (1),(2),(3) +master-bin.000001 291 Query 1 382 use `test`; insert t1 values (4),(2),(5) +drop table t1; +set binlog_format=default; diff --git a/mysql-test/r/maria.result b/mysql-test/r/maria.result index 3ec9af0fffa..a1c49fdfe4f 100644 --- a/mysql-test/r/maria.result +++ b/mysql-test/r/maria.result @@ -2,25 +2,6 @@ set global storage_engine=maria; set session storage_engine=maria; drop table if exists t1,t2; SET SQL_WARNINGS=1; -RESET MASTER; -set binlog_format=statement; -CREATE TABLE t1 (a int primary key); -insert t1 values (1),(2),(3); -insert t1 values (4),(2),(5); -ERROR 23000: Duplicate entry '2' for key 'PRIMARY' -select * from t1; -a -1 -2 -3 -4 -SHOW BINLOG EVENTS FROM 102; -Log_name Pos Event_type Server_id End_log_pos Info -master-bin.000001 102 Query 1 200 use `test`; CREATE TABLE t1 (a int primary key) -master-bin.000001 200 Query 1 291 use `test`; insert t1 values (1),(2),(3) -master-bin.000001 291 Query 1 382 use `test`; insert t1 values (4),(2),(5) -drop table t1; -set binlog_format=default; CREATE TABLE t1 ( STRING_DATA char(255) default NULL, KEY string_data (STRING_DATA) @@ -618,7 +599,7 @@ t1 1 a 1 a A NULL NULL NULL YES BTREE disabled alter table t1 enable keys; show keys from t1; Table Non_unique Key_name Seq_in_index Column_name Collation Cardinality Sub_part Packed Null Index_type Comment -t1 1 a 1 a A NULL NULL NULL YES BTREE disabled +t1 1 a 1 a A NULL NULL NULL YES BTREE alter table t1 engine=heap; alter table t1 disable keys; Warnings: @@ -853,19 +834,19 @@ _id DELETE FROM t1 WHERE _id < 8; SHOW TABLE STATUS LIKE 't1'; Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment -t1 MARIA 10 Paged 2 # # # # 0 # # # # # # +t1 MARIA 10 Page 2 # # # # 0 # # # # # # CHECK TABLE t1 EXTENDED; Table Op Msg_type Msg_text test.t1 check status OK OPTIMIZE TABLE t1; Table Op Msg_type Msg_text -test.t1 optimize status Table is already up to date +test.t1 optimize status OK CHECK TABLE t1 EXTENDED; Table Op Msg_type Msg_text test.t1 check status OK SHOW TABLE STATUS LIKE 't1'; Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment -t1 MARIA 10 Paged 2 # # # # 0 # # # # # # +t1 MARIA 10 Page 2 # # # # 0 # # # # # # SELECT _id FROM t1; _id 8 @@ -912,7 +893,7 @@ _id DELETE FROM t1 WHERE _id < 8; SHOW TABLE STATUS LIKE 't1'; Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment -t1 MARIA 10 Paged 2 # # # # 0 # # # # # # +t1 MARIA 10 Page 2 # # # # 0 # # # # # # CHECK TABLE t1 EXTENDED; Table Op Msg_type Msg_text test.t1 check status OK @@ -924,7 +905,7 @@ Table Op Msg_type Msg_text test.t1 check status OK SHOW TABLE STATUS LIKE 't1'; Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment -t1 MARIA 10 Paged 2 # # # # 0 # # # # # # +t1 MARIA 10 Page 2 # # # # 0 # # # # # # SELECT _id FROM t1; _id 8 @@ -1598,7 +1579,7 @@ alter table t1 disable keys; alter table t1 enable keys; show keys from t1; Table Non_unique Key_name Seq_in_index Column_name Collation Cardinality Sub_part Packed Null Index_type Comment -t1 1 a 1 a A 8 NULL NULL YES BTREE disabled +t1 1 a 1 a A 8 NULL NULL YES BTREE drop table t1; show create table t1; show create table t1; @@ -1811,3 +1792,43 @@ CHECK TABLE t1; Table Op Msg_type Msg_text test.t1 check status OK DROP TABLE t1; +create table t1 (a int) transactional=0; +show create table t1; +Table Create Table +t1 CREATE TABLE `t1` ( + `a` int(11) DEFAULT NULL +) ENGINE=MARIA DEFAULT CHARSET=latin1 TRANSACTIONAL=0 +drop table t1; +create table t1 (a int) row_format=dynamic transactional=0; +show create table t1; +Table Create Table +t1 CREATE TABLE `t1` ( + `a` int(11) DEFAULT NULL +) ENGINE=MARIA DEFAULT CHARSET=latin1 ROW_FORMAT=DYNAMIC TRANSACTIONAL=0 +drop table t1; +create table t1 (a int) row_format=dynamic transactional=1; +show create table t1; +Table Create Table +t1 CREATE TABLE `t1` ( + `a` int(11) DEFAULT NULL +) ENGINE=MARIA DEFAULT CHARSET=latin1 ROW_FORMAT=DYNAMIC TRANSACTIONAL=1 +alter table t1 row_format=PAGE; +show create table t1; +Table Create Table +t1 CREATE TABLE `t1` ( + `a` int(11) DEFAULT NULL +) ENGINE=MARIA DEFAULT CHARSET=latin1 ROW_FORMAT=PAGE TRANSACTIONAL=1 +alter table t1 transactional=0; +show create table t1; +Table Create Table +t1 CREATE TABLE `t1` ( + `a` int(11) DEFAULT NULL +) ENGINE=MARIA DEFAULT CHARSET=latin1 ROW_FORMAT=PAGE TRANSACTIONAL=0 +drop table t1; +create table t1 (a int) row_format=PAGE; +show create table t1; +Table Create Table +t1 CREATE TABLE `t1` ( + `a` int(11) DEFAULT NULL +) ENGINE=MARIA DEFAULT CHARSET=latin1 ROW_FORMAT=PAGE +drop table t1; diff --git a/mysql-test/r/ps_2myisam.result b/mysql-test/r/ps_2myisam.result index 2bfd6d31ac9..9330ac2853e 100644 --- a/mysql-test/r/ps_2myisam.result +++ b/mysql-test/r/ps_2myisam.result @@ -1756,7 +1756,7 @@ set @arg14= 'abc'; set @arg14= NULL ; set @arg15= CAST('abc' as binary) ; set @arg15= NULL ; -create table t5 as select +create table t5 engine = MyISAM as select 8 as const01, @arg01 as param01, 8.0 as const02, @arg02 as param02, 80.00000000000e-1 as const03, @arg03 as param03, diff --git a/mysql-test/r/ps_3innodb.result b/mysql-test/r/ps_3innodb.result index 607a0426bd7..4972942e6f3 100644 --- a/mysql-test/r/ps_3innodb.result +++ b/mysql-test/r/ps_3innodb.result @@ -1739,7 +1739,7 @@ set @arg14= 'abc'; set @arg14= NULL ; set @arg15= CAST('abc' as binary) ; set @arg15= NULL ; -create table t5 as select +create table t5 engine = MyISAM as select 8 as const01, @arg01 as param01, 8.0 as const02, @arg02 as param02, 80.00000000000e-1 as const03, @arg03 as param03, diff --git a/mysql-test/r/ps_4heap.result b/mysql-test/r/ps_4heap.result index f4eec0c610c..bb17d0d161c 100644 --- a/mysql-test/r/ps_4heap.result +++ b/mysql-test/r/ps_4heap.result @@ -1740,7 +1740,7 @@ set @arg14= 'abc'; set @arg14= NULL ; set @arg15= CAST('abc' as binary) ; set @arg15= NULL ; -create table t5 as select +create table t5 engine = MyISAM as select 8 as const01, @arg01 as param01, 8.0 as const02, @arg02 as param02, 80.00000000000e-1 as const03, @arg03 as param03, diff --git a/mysql-test/r/ps_5merge.result b/mysql-test/r/ps_5merge.result index 38e4626d59c..1ed2136151b 100644 --- a/mysql-test/r/ps_5merge.result +++ b/mysql-test/r/ps_5merge.result @@ -1676,7 +1676,7 @@ set @arg14= 'abc'; set @arg14= NULL ; set @arg15= CAST('abc' as binary) ; set @arg15= NULL ; -create table t5 as select +create table t5 engine = MyISAM as select 8 as const01, @arg01 as param01, 8.0 as const02, @arg02 as param02, 80.00000000000e-1 as const03, @arg03 as param03, @@ -4690,7 +4690,7 @@ set @arg14= 'abc'; set @arg14= NULL ; set @arg15= CAST('abc' as binary) ; set @arg15= NULL ; -create table t5 as select +create table t5 engine = MyISAM as select 8 as const01, @arg01 as param01, 8.0 as const02, @arg02 as param02, 80.00000000000e-1 as const03, @arg03 as param03, diff --git a/mysql-test/r/ps_7ndb.result b/mysql-test/r/ps_7ndb.result index 432a07df9d0..af693de986b 100644 --- a/mysql-test/r/ps_7ndb.result +++ b/mysql-test/r/ps_7ndb.result @@ -1739,7 +1739,7 @@ set @arg14= 'abc'; set @arg14= NULL ; set @arg15= CAST('abc' as binary) ; set @arg15= NULL ; -create table t5 as select +create table t5 engine = MyISAM as select 8 as const01, @arg01 as param01, 8.0 as const02, @arg02 as param02, 80.00000000000e-1 as const03, @arg03 as param03, diff --git a/mysql-test/r/ps_maria.result b/mysql-test/r/ps_maria.result index 9268c44eecd..b1ea905c406 100644 --- a/mysql-test/r/ps_maria.result +++ b/mysql-test/r/ps_maria.result @@ -1756,7 +1756,7 @@ set @arg14= 'abc'; set @arg14= NULL ; set @arg15= CAST('abc' as binary) ; set @arg15= NULL ; -create table t5 as select +create table t5 engine = MyISAM as select 8 as const01, @arg01 as param01, 8.0 as const02, @arg02 as param02, 80.00000000000e-1 as const03, @arg03 as param03, diff --git a/mysql-test/t/maria-connect.test b/mysql-test/t/maria-connect.test new file mode 100644 index 00000000000..aedfa92e278 --- /dev/null +++ b/mysql-test/t/maria-connect.test @@ -0,0 +1,39 @@ +# +# Test that can't be run with --extern +# + +-- source include/have_maria.inc + +let $default=`select @@global.storage_engine`; +set global storage_engine=maria; +set session storage_engine=maria; + +# Initialise +--disable_warnings +drop table if exists t1; +--enable_warnings +SET SQL_WARNINGS=1; + +# +# UNIQUE key test +# +# as long as maria cannot rollback, binlog should contain both inserts +# +RESET MASTER; +set binlog_format=statement; +CREATE TABLE t1 (a int primary key); +insert t1 values (1),(2),(3); +--error 1582 +insert t1 values (4),(2),(5); +select * from t1; +SHOW BINLOG EVENTS FROM 102; +drop table t1; +set binlog_format=default; + +# End of 5.2 tests + +--disable_result_log +--disable_query_log +eval set global storage_engine=$default; +--enable_result_log +--enable_query_log diff --git a/mysql-test/t/maria.test b/mysql-test/t/maria.test index 763abbd9d25..25000abd426 100644 --- a/mysql-test/t/maria.test +++ b/mysql-test/t/maria.test @@ -15,22 +15,6 @@ drop table if exists t1,t2; --enable_warnings SET SQL_WARNINGS=1; -# -# UNIQUE key test -# -# as long as maria cannot rollback, binlog should contain both inserts -# -RESET MASTER; -set binlog_format=statement; -CREATE TABLE t1 (a int primary key); -insert t1 values (1),(2),(3); ---error 1582 -insert t1 values (4),(2),(5); -select * from t1; -SHOW BINLOG EVENTS FROM 102; -drop table t1; -set binlog_format=default; - # # Test problem with CHECK TABLE; # @@ -597,10 +581,7 @@ insert t1 select * from t2; show keys from t1; alter table t1 enable keys; show keys from t1; -#TODO after we have repair: delete the following --disable-warnings ---disable_warnings alter table t1 engine=heap; ---enable_warnings alter table t1 disable keys; show keys from t1; drop table t1,t2; @@ -1072,10 +1053,10 @@ create table t1 (a int not null, key key_block_size=1024 (a)); --error 1064 create table t1 (a int not null, key `a` key_block_size=1024 (a)); - # # Test of changing MI_KEY_BLOCK_LENGTH # + CREATE TABLE t1 ( c1 INT, c2 VARCHAR(300), @@ -1116,6 +1097,27 @@ DELETE FROM t1 WHERE c1 >= 10; CHECK TABLE t1; DROP TABLE t1; +# +# Test that TRANSACTIONAL is preserved +# + +create table t1 (a int) transactional=0; +show create table t1; +drop table t1; +create table t1 (a int) row_format=dynamic transactional=0; +show create table t1; +drop table t1; +create table t1 (a int) row_format=dynamic transactional=1; +show create table t1; +alter table t1 row_format=PAGE; +show create table t1; +alter table t1 transactional=0; +show create table t1; +drop table t1; +create table t1 (a int) row_format=PAGE; +show create table t1; +drop table t1; + # End of 5.2 tests --disable_result_log diff --git a/mysys/mf_iocache.c b/mysys/mf_iocache.c index e40490776f8..8b8ba540a4e 100644 --- a/mysys/mf_iocache.c +++ b/mysys/mf_iocache.c @@ -1696,6 +1696,7 @@ int my_b_flush_io_cache(IO_CACHE *info, int need_append_buffer_lock) my_bool append_cache; my_off_t pos_in_file; DBUG_ENTER("my_b_flush_io_cache"); + DBUG_PRINT("enter", ("cache: 0x%lx", (long) info)); if (!(append_cache = (info->type == SEQ_READ_APPEND))) need_append_buffer_lock=0; diff --git a/mysys/mf_tempfile.c b/mysys/mf_tempfile.c index 6c412157937..a820d09a2c6 100644 --- a/mysys/mf_tempfile.c +++ b/mysys/mf_tempfile.c @@ -107,6 +107,7 @@ File create_temp_file(char *to, const char *dir, const char *prefix, if (org_file >= 0 && file < 0) { int tmp=my_errno; + close(org_file); (void) my_delete(to, MYF(MY_WME | ME_NOINPUT)); my_errno=tmp; } diff --git a/mysys/my_error.c b/mysys/my_error.c index 48392fe84c3..00c78b64e0e 100644 --- a/mysys/my_error.c +++ b/mysys/my_error.c @@ -84,11 +84,6 @@ int my_error(int nr, myf MyFlags, ...) if (nr <= meh_p->meh_last) break; -#ifdef SHARED_LIBRARY - if ((meh_p == &my_errmsgs_globerrs) && ! globerrs[0]) - init_glob_errs(); -#endif - /* get the error message string. Default, if NULL or empty string (""). */ if (! (format= (meh_p && (nr >= meh_p->meh_first)) ? meh_p->meh_errmsgs[nr - meh_p->meh_first] : NULL) || ! *format) diff --git a/mysys/my_handler.c b/mysys/my_handler.c index 757cbe490f8..bf75d992f9d 100644 --- a/mysys/my_handler.c +++ b/mysys/my_handler.c @@ -19,6 +19,7 @@ #include #include #include +#include int ha_compare_text(CHARSET_INFO *charset_info, uchar *a, uint a_length, uchar *b, uint b_length, my_bool part_key, @@ -563,3 +564,68 @@ HA_KEYSEG *ha_find_null(HA_KEYSEG *keyseg, uchar *a) return keyseg; } + +/* + Errors a handler can give you +*/ + +static const char *handler_error_messages[]= +{ + "Didn't find key on read or update", + "Duplicate key on write or update", + "Undefined handler error 122", + "Someone has changed the row since it was read (while the table was locked to prevent it)", + "Wrong index given to function", + "Undefined handler error 125", + "Index file is crashed", + "Record file is crashed", + "Out of memory in engine", + "Undefined handler error 129", + "Incorrect file format", + "Command not supported by database", + "Old database file", + "No record read before update", + "Record was already deleted (or record file crashed)", + "No more room in record file", + "No more room in index file", + "No more records (read after end of file)", + "Unsupported extension used for table", + "Too big row", + "Wrong create options", + "Duplicate unique key or constraint on write or update", + "Unknown character set used in table", + "Conflicting table definitions in sub-tables of MERGE table", + "Table is crashed and last repair failed", + "Table was marked as crashed and should be repaired", + "Lock timed out; Retry transaction", + "Lock table is full; Restart program with a larger locktable", + "Updates are not allowed under a read only transactions", + "Lock deadlock; Retry transaction", + "Foreign key constraint is incorrectly formed", + "Cannot add a child row", + "Cannot delete a parent row", + "Unknown handler error" +}; + + +/* + Register handler error messages for usage with my_error() + + NOTES + This is safe to call multiple times as my_error_register() + will ignore calls to register already registered error numbers. +*/ + + +void my_handler_error_register(void) +{ + my_error_register(handler_error_messages, HA_ERR_FIRST, + HA_ERR_FIRST+ array_elements(handler_error_messages)-1); +} + + +void my_handler_error_unregister(void) +{ + my_error_unregister(HA_ERR_FIRST, + HA_ERR_FIRST+ array_elements(handler_error_messages)-1); +} diff --git a/mysys/my_init.c b/mysys/my_init.c index e8a55fdc1e6..2023a7da223 100644 --- a/mysys/my_init.c +++ b/mysys/my_init.c @@ -77,6 +77,7 @@ my_bool my_init(void) mysys_usage_id++; my_umask= 0660; /* Default umask for new files */ my_umask_dir= 0700; /* Default umask for new directories */ + init_glob_errs(); #if defined(THREAD) && defined(SAFE_MUTEX) safe_mutex_global_init(); /* Must be called early */ #endif diff --git a/mysys/my_open.c b/mysys/my_open.c index 6fe7883b99b..b4bb7e25810 100644 --- a/mysys/my_open.c +++ b/mysys/my_open.c @@ -71,6 +71,7 @@ File my_open(const char *FileName, int Flags, myf MyFlags) #else fd = open((my_string) FileName, Flags); #endif + DBUG_RETURN(my_register_filename(fd, FileName, FILE_BY_OPEN, EE_FILENOTFOUND, MyFlags)); } /* my_open */ @@ -124,61 +125,65 @@ int my_close(File fd, myf MyFlags) SYNOPSIS my_register_filename() - fd - FileName - type_file_type + fd File number opened, -1 if error on open + FileName File name + type_file_type How file was created + error_message_number Error message number if caller got error (fd == -1) + MyFlags Flags for my_close() + + RETURN + -1 error + # Filenumber + */ File my_register_filename(File fd, const char *FileName, enum file_type type_of_file, uint error_message_number, myf MyFlags) { + DBUG_ENTER("my_register_filename"); if ((int) fd >= 0) { if ((uint) fd >= my_file_limit) { #if defined(THREAD) && !defined(HAVE_PREAD) - (void) my_close(fd,MyFlags); - my_errno=EMFILE; - if (MyFlags & (MY_FFNF | MY_FAE | MY_WME)) - my_error(EE_OUT_OF_FILERESOURCES, MYF(ME_BELL+ME_WAITTANG), - FileName, my_errno); - return(-1); -#endif + my_errno= EMFILE; +#else thread_safe_increment(my_file_opened,&THR_LOCK_open); - return(fd); /* safeguard */ + DBUG_RETURN(fd); /* safeguard */ +#endif } - pthread_mutex_lock(&THR_LOCK_open); - if ((my_file_info[fd].name = (char*) my_strdup(FileName,MyFlags))) + else { - my_file_opened++; - my_file_info[fd].type = type_of_file; + pthread_mutex_lock(&THR_LOCK_open); + if ((my_file_info[fd].name = (char*) my_strdup(FileName,MyFlags))) + { + my_file_opened++; + my_file_info[fd].type = type_of_file; #if defined(THREAD) && !defined(HAVE_PREAD) - pthread_mutex_init(&my_file_info[fd].mutex,MY_MUTEX_INIT_FAST); + pthread_mutex_init(&my_file_info[fd].mutex,MY_MUTEX_INIT_FAST); #endif + pthread_mutex_unlock(&THR_LOCK_open); + DBUG_PRINT("exit",("fd: %d",fd)); + DBUG_RETURN(fd); + } pthread_mutex_unlock(&THR_LOCK_open); - DBUG_PRINT("exit",("fd: %d",fd)); - return(fd); + my_errno= ENOMEM; } - pthread_mutex_unlock(&THR_LOCK_open); (void) my_close(fd, MyFlags); - fd= -1; - my_errno=ENOMEM; } else - my_errno=errno; - DBUG_PRINT("error",("Got error %d on open",my_errno)); - if (MyFlags & (MY_FFNF | MY_FAE | MY_WME)) { - if (my_errno == EMFILE) { - DBUG_PRINT("error",("print err: %d",EE_OUT_OF_FILERESOURCES)); - my_error(EE_OUT_OF_FILERESOURCES, MYF(ME_BELL+ME_WAITTANG), - FileName, my_errno); - } else { - DBUG_PRINT("error",("print err: %d",error_message_number)); - my_error(error_message_number, MYF(ME_BELL+ME_WAITTANG), - FileName, my_errno); - } + my_errno= errno; + + DBUG_PRINT("error",("Got error %d on open", my_errno)); + if (MyFlags & (MY_FFNF | MY_FAE | MY_WME)) + { + if (my_errno == EMFILE) + error_message_number= EE_OUT_OF_FILERESOURCES; + DBUG_PRINT("error",("print err: %d",error_message_number)); + my_error(error_message_number, MYF(ME_BELL+ME_WAITTANG), + FileName, my_errno); } - return(fd); + DBUG_RETURN(-1); } #ifdef __WIN__ diff --git a/mysys/my_symlink2.c b/mysys/my_symlink2.c index 279672be11c..932f2b6424f 100644 --- a/mysys/my_symlink2.c +++ b/mysys/my_symlink2.c @@ -33,7 +33,9 @@ File my_create_with_symlink(const char *linkname, const char *filename, int create_link; char abs_linkname[FN_REFLEN]; DBUG_ENTER("my_create_with_symlink"); - DBUG_PRINT("enter", ("linkname: %s filename: %s", linkname, filename)); + DBUG_PRINT("enter", + ("linkname: %s filename: %s", linkname ? linkname : "NULL", + filename)); if (my_disable_symlinks) { diff --git a/sql-bench/example b/sql-bench/example index df2a9b8be69..cb39fad819e 100644 --- a/sql-bench/example +++ b/sql-bench/example @@ -6,15 +6,14 @@ machine="Linux-x64" # InnoDB tests -./run-all-tests --suffix=-innodb --comments="Engine=InnoDB --innodb_log_file_size=100M" --create-options="ENGINE=InnoDB" --hw="$hw" --optimization="$optimization" --machine="$machine" --log - -./run-all-tests --suffix=_fast-innodb --comments="Engine=InnoDB --innodb_log_file_size=100M" --create-options="ENGINE=InnoDB" --hw="$hw" --optimization="$optimization" --machine="$machine" --fast --log +./run-all-tests --suffix=-innodb --comments="Engine=InnoDB --innodb_buffer_pool_size=256M --innodb_additional_mem_pool_size=20M --innodb_log_file_size=1000M --innodb_log_buffer_size=16M --innodb_lock_wait_timeout=50 --innodb_flush_log_at_trx_commit=1 --innodb_flush_method=O_DIRECT --innodb_log_files_in_group=2 --skip-innodb-doblewrite" --create-options="ENGINE=InnoDB" --hw="$hw" --optimization="$optimization" --machine="$machine" --log +./run-all-tests --suffix=_fast-innodb --comments="Engine=InnoDB --innodb_buffer_pool_size=256M --innodb_additional_mem_pool_size=20M --innodb_log_file_size=1000M --innodb_log_buffer_size=16M --innodb_lock_wait_timeout=50 --innodb_flush_log_at_trx_commit=1 --innodb_flush_method=O_DIRECT --innodb_log_files_in_group=2 --skip-innodb-doblewrite" --create-options="ENGINE=InnoDB" --hw="$hw" --optimization="$optimization" --machine="$machine" --fast --log # MyISAM tests -./run-all-tests --suffix=-myisam --comments="Engine=MyISAM key_buffer_size=16M" --create-options="ENGINE=myisam" --hw="$hw" --optimization="$optimization" --machine="$machine" --log +./run-all-tests --suffix=-myisam --comments="Engine=MyISAM key_buffer_size=256M" --create-options="ENGINE=myisam" --hw="$hw" --optimization="$optimization" --machine="$machine" --log -./run-all-tests --suffix=_fast-myisam --comments="Engine=MyISAM key_buffer_size=16M" --create-options="ENGINE=myisam" --hw="$hw" --optimization="$optimization" --machine="$machine" --fast --log +./run-all-tests --suffix=_fast-myisam --comments="Engine=MyISAM key_buffer_size=256M" --create-options="ENGINE=myisam" --hw="$hw" --optimization="$optimization" --machine="$machine" --fast --log compare-results --relative output/RUN-mysql-myisam-* output/RUN-mysql_fast-myisam* output/RUN-mysql* diff --git a/sql/handler.cc b/sql/handler.cc index b32098bfc78..f3c71887e9a 100644 --- a/sql/handler.cc +++ b/sql/handler.cc @@ -68,7 +68,7 @@ static const LEX_STRING sys_table_aliases[]= }; const char *ha_row_type[] = { - "", "FIXED", "DYNAMIC", "COMPRESSED", "REDUNDANT", "COMPACT", "?","?","?" + "", "FIXED", "DYNAMIC", "COMPRESSED", "REDUNDANT", "COMPACT", "PAGE","?","?","?" }; const char *tx_isolation_names[] = @@ -281,7 +281,8 @@ handler *get_ha_partition(partition_info *part_info) 0 OK != 0 Error */ -static int ha_init_errors(void) + +int ha_init_errors(void) { #define SETMSG(nr, msg) errmsgs[(nr) - HA_ERR_FIRST]= (msg) const char **errmsgs; @@ -495,9 +496,6 @@ int ha_init() int error= 0; DBUG_ENTER("ha_init"); - if (ha_init_errors()) - DBUG_RETURN(1); - DBUG_ASSERT(total_ha < MAX_HA); /* Check if there is a transaction-capable storage engine besides the diff --git a/sql/handler.h b/sql/handler.h index d488ca5f07e..b5679147d8a 100644 --- a/sql/handler.h +++ b/sql/handler.h @@ -268,7 +268,7 @@ enum legacy_db_type enum row_type { ROW_TYPE_NOT_USED=-1, ROW_TYPE_DEFAULT, ROW_TYPE_FIXED, ROW_TYPE_DYNAMIC, ROW_TYPE_COMPRESSED, - ROW_TYPE_REDUNDANT, ROW_TYPE_COMPACT, ROW_TYPE_PAGES }; + ROW_TYPE_REDUNDANT, ROW_TYPE_COMPACT, ROW_TYPE_PAGE }; enum enum_binlog_func { BFN_RESET_LOGS= 1, @@ -311,6 +311,7 @@ enum enum_binlog_command { #define HA_CREATE_USED_PASSWORD (1L << 17) #define HA_CREATE_USED_CONNECTION (1L << 18) #define HA_CREATE_USED_KEY_BLOCK_SIZE (1L << 19) +#define HA_CREATE_USED_TRANSACTIONAL (1L << 20) typedef ulonglong my_xid; // this line is the same as in log_event.h #define MYSQL_XID_PREFIX "MySQLXid" @@ -741,6 +742,7 @@ class partition_info; struct st_partition_iter; #define NOT_A_PARTITION_ID ((uint32)-1) +enum ha_choice { HA_CHOICE_UNDEF, HA_CHOICE_NO, HA_CHOICE_YES }; typedef struct st_ha_create_information { @@ -763,6 +765,8 @@ typedef struct st_ha_create_information uint options; /* OR of HA_CREATE_ options */ uint merge_insert_method; uint extra_size; /* length of extra data segment */ + /* 0 not used, 1 if not transactional, 2 if transactional */ + enum ha_choice transactional; bool table_existed; /* 1 in create if table existed */ bool frm_only; /* 1 if no ha_create_table() */ bool varchar; /* 1 if table has a VARCHAR */ @@ -1661,6 +1665,7 @@ static inline bool ha_storage_engine_is_enabled(const handlerton *db_type) } /* basic stuff */ +int ha_init_errors(void); int ha_init(void); int ha_end(void); int ha_initialize_handlerton(st_plugin_int *plugin); diff --git a/sql/lex.h b/sql/lex.h index 45155da7692..28271bf46d9 100644 --- a/sql/lex.h +++ b/sql/lex.h @@ -379,6 +379,7 @@ static SYMBOL symbols[] = { { "OWNER", SYM(OWNER_SYM)}, { "PACK_KEYS", SYM(PACK_KEYS_SYM)}, { "PARSER", SYM(PARSER_SYM)}, + { "PAGE", SYM(PAGE_SYM)}, { "PARTIAL", SYM(PARTIAL)}, { "PARTITION", SYM(PARTITION_SYM)}, { "PARTITIONING", SYM(PARTITIONING_SYM)}, @@ -528,6 +529,7 @@ static SYMBOL symbols[] = { { "TO", SYM(TO_SYM)}, { "TRAILING", SYM(TRAILING)}, { "TRANSACTION", SYM(TRANSACTION_SYM)}, + { "TRANSACTIONAL", SYM(TRANSACTIONAL_SYM)}, { "TRIGGER", SYM(TRIGGER_SYM)}, { "TRIGGERS", SYM(TRIGGERS_SYM)}, { "TRUE", SYM(TRUE_SYM)}, diff --git a/sql/mysqld.cc b/sql/mysqld.cc index a49e4005c67..fd77317509b 100644 --- a/sql/mysqld.cc +++ b/sql/mysqld.cc @@ -3350,6 +3350,10 @@ server."); using_update_log=1; } + /* Allow storage engine to give real error messages */ + if (ha_init_errors()) + DBUG_RETURN(1); + if (plugin_init(opt_bootstrap)) { sql_print_error("Failed to init plugins."); diff --git a/sql/sql_show.cc b/sql/sql_show.cc index 250d9d917eb..9ae38d5dcec 100644 --- a/sql/sql_show.cc +++ b/sql/sql_show.cc @@ -1371,6 +1371,11 @@ int store_create_info(THD *thd, TABLE_LIST *table_list, String *packet, packet->append(STRING_WITH_LEN(" ROW_FORMAT=")); packet->append(ha_row_type[(uint) share->row_type]); } + if (share->transactional != HA_CHOICE_UNDEF) + { + packet->append(STRING_WITH_LEN(" TRANSACTIONAL=")); + packet->append(share->transactional == HA_CHOICE_YES ? "1" : "0", 1); + } if (table->s->key_block_size) { char *end; @@ -2910,8 +2915,8 @@ static int get_schema_tables_record(THD *thd, struct st_table_list *tables, case ROW_TYPE_COMPACT: tmp_buff= "Compact"; break; - case ROW_TYPE_PAGES: - tmp_buff= "Paged"; + case ROW_TYPE_PAGE: + tmp_buff= "Page"; break; } table->field[6]->store(tmp_buff, strlen(tmp_buff), cs); diff --git a/sql/sql_table.cc b/sql/sql_table.cc index 0697fdd79b4..a037fc6f727 100644 --- a/sql/sql_table.cc +++ b/sql/sql_table.cc @@ -5653,6 +5653,8 @@ view_err: create_info->default_table_charset= table->s->table_charset; if (!(used_fields & HA_CREATE_USED_KEY_BLOCK_SIZE)) create_info->key_block_size= table->s->key_block_size; + if (!(used_fields & HA_CREATE_USED_TRANSACTIONAL)) + create_info->transactional= table->s->transactional; if (!create_info->tablespace && create_info->storage_media != HA_SM_MEMORY) { @@ -6916,7 +6918,6 @@ bool mysql_recreate_table(THD *thd, TABLE_LIST *table_list, lex->col_list.empty(); lex->alter_info.reset(); bzero((char*) &create_info,sizeof(create_info)); - create_info.db_type= 0; create_info.row_type=ROW_TYPE_NOT_USED; create_info.default_table_charset=default_charset_info; /* Force alter table to recreate table */ diff --git a/sql/sql_yacc.yy b/sql/sql_yacc.yy index 9c062407921..0653863cc73 100644 --- a/sql/sql_yacc.yy +++ b/sql/sql_yacc.yy @@ -720,6 +720,7 @@ bool my_yyoverflow(short **a, YYSTYPE **b, ulong *yystacksize); %token OUT_SYM /* SQL-2003-R */ %token OWNER_SYM %token PACK_KEYS_SYM +%token PAGE_SYM %token PARAM_MARKER %token PARSER_SYM %token PARTIAL /* SQL-2003-N */ @@ -872,6 +873,7 @@ bool my_yyoverflow(short **a, YYSTYPE **b, ulong *yystacksize); %token TO_SYM /* SQL-2003-R */ %token TRAILING /* SQL-2003-R */ %token TRANSACTION_SYM +%token TRANSACTIONAL_SYM %token TRIGGERS_SYM %token TRIGGER_SYM /* SQL-2003-R */ %token TRIM /* SQL-2003-N */ @@ -4213,6 +4215,12 @@ create_table_option: Lex->create_info.used_fields|= HA_CREATE_USED_KEY_BLOCK_SIZE; Lex->create_info.key_block_size= $3; } + | TRANSACTIONAL_SYM opt_equal ulong_num + { + Lex->create_info.used_fields|= HA_CREATE_USED_TRANSACTIONAL; + Lex->create_info.transactional= ($3 != 0 ? HA_CHOICE_YES : + HA_CHOICE_NO); + } ; default_charset: @@ -4273,6 +4281,7 @@ row_types: | COMPRESSED_SYM { $$= ROW_TYPE_COMPRESSED; } | REDUNDANT_SYM { $$= ROW_TYPE_REDUNDANT; } | COMPACT_SYM { $$= ROW_TYPE_COMPACT; }; + | PAGE_SYM { $$= ROW_TYPE_PAGE; }; merge_insert_types: NO_SYM { $$= MERGE_INSERT_DISABLED; } @@ -9786,6 +9795,7 @@ keyword_sp: | ONE_SHOT_SYM {} | ONE_SYM {} | PACK_KEYS_SYM {} + | PAGE_SYM {} | PARTIAL {} | PARTITIONING_SYM {} | PARTITIONS_SYM {} @@ -9855,6 +9865,7 @@ keyword_sp: | TEXT_SYM {} | THAN_SYM {} | TRANSACTION_SYM {} + | TRANSACTIONAL_SYM {} | TRIGGERS_SYM {} | TIMESTAMP {} | TIMESTAMP_ADD {} diff --git a/sql/table.cc b/sql/table.cc index ed3cac85214..316d99a85b5 100644 --- a/sql/table.cc +++ b/sql/table.cc @@ -460,7 +460,8 @@ static int open_binary_frm(THD *thd, TABLE_SHARE *share, uchar *head, if (!head[32]) // New frm file in 3.23 { share->avg_row_length= uint4korr(head+34); - share-> row_type= (row_type) head[40]; + share->transactional= (ha_choice) head[39]; + share->row_type= (row_type) head[40]; share->table_charset= get_charset((uint) head[38],MYF(0)); share->null_field_first= 1; } @@ -2111,7 +2112,9 @@ File create_frm(THD *thd, const char *name, const char *db, int2store(fileinfo+16,reclength); int4store(fileinfo+18,create_info->max_rows); int4store(fileinfo+22,create_info->min_rows); + /* fileinfo[26] is set in mysql_create_frm() */ fileinfo[27]=2; // Use long pack-fields + /* fileinfo[28 & 29] is set to key_info_length in mysql_create_frm() */ create_info->table_options|=HA_OPTION_LONG_BLOB_PTR; // Use portable blob pointers int2store(fileinfo+30,create_info->table_options); fileinfo[32]=0; // No filename anymore @@ -2119,8 +2122,9 @@ File create_frm(THD *thd, const char *name, const char *db, int4store(fileinfo+34,create_info->avg_row_length); fileinfo[38]= (create_info->default_table_charset ? create_info->default_table_charset->number : 0); + fileinfo[39]= (uchar) create_info->transactional; fileinfo[40]= (uchar) create_info->row_type; - /* Next few bytes were for RAID support */ + /* Next few bytes where for RAID support */ fileinfo[41]= 0; fileinfo[42]= 0; fileinfo[43]= 0; diff --git a/sql/table.h b/sql/table.h index fc2f25f3aa8..fc9f1b7caa4 100644 --- a/sql/table.h +++ b/sql/table.h @@ -175,6 +175,7 @@ typedef struct st_table_share handlerton *db_type; /* table_type for handler */ enum row_type row_type; /* How rows are stored */ enum tmp_table_type tmp_table; + enum ha_choice transactional; uint ref_count; /* How many TABLE objects uses this */ uint open_count; /* Number of tables in open list */ diff --git a/storage/maria/ha_maria.cc b/storage/maria/ha_maria.cc index f7fd417836a..0c83d73c3ef 100644 --- a/storage/maria/ha_maria.cc +++ b/storage/maria/ha_maria.cc @@ -436,32 +436,38 @@ volatile int *_ma_killed_ptr(HA_CHECK *param) void _ma_check_print_error(HA_CHECK *param, const char *fmt, ...) { + va_list args; + DBUG_ENTER("_ma_check_print_error"); param->error_printed |= 1; param->out_flag |= O_DATA_LOST; - va_list args; va_start(args, fmt); _ma_check_print_msg(param, "error", fmt, args); va_end(args); + DBUG_VOID_RETURN; } void _ma_check_print_info(HA_CHECK *param, const char *fmt, ...) { va_list args; + DBUG_ENTER("_ma_check_print_info"); va_start(args, fmt); _ma_check_print_msg(param, "info", fmt, args); va_end(args); + DBUG_VOID_RETURN; } void _ma_check_print_warning(HA_CHECK *param, const char *fmt, ...) { + va_list args; + DBUG_ENTER("_ma_check_print_warning"); param->warning_printed= 1; param->out_flag |= O_DATA_LOST; - va_list args; va_start(args, fmt); _ma_check_print_msg(param, "warning", fmt, args); va_end(args); + DBUG_VOID_RETURN; } } @@ -1065,16 +1071,6 @@ int ha_maria::repair(THD *thd, HA_CHECK ¶m, bool do_optimize) param.out_flag= 0; strmov(fixed_name, file->s->open_file_name); -#ifndef TO_BE_FIXED - /* QQ: Until we have repair for block format, lie that it succeded */ - if (file->s->data_file_type == BLOCK_RECORD) - { - if (do_optimize) - DBUG_RETURN(analyze(thd, (HA_CHECK_OPT*) 0)); - DBUG_RETURN(HA_ADMIN_OK); - } -#endif - // Don't lock tables if we have used LOCK TABLE if (!thd->locked_tables && maria_lock_database(file, table->s->tmp_table ? F_EXTRA_LCK : F_WRLCK)) @@ -1099,7 +1095,9 @@ int ha_maria::repair(THD *thd, HA_CHECK ¶m, bool do_optimize) local_testflag |= T_STATISTICS; param.testflag |= T_STATISTICS; // We get this for free statistics_done= 1; - if (thd->variables.maria_repair_threads > 1) + /* TODO: Remove BLOCK_RECORD test when parallel works with blocks */ + if (thd->variables.maria_repair_threads > 1 && + file->s->data_file_type != BLOCK_RECORD) { char buf[40]; /* TODO: respect maria_repair_threads variable */ @@ -1954,7 +1952,7 @@ enum row_type ha_maria::get_row_type() const switch (file->s->data_file_type) { case STATIC_RECORD: return ROW_TYPE_FIXED; case DYNAMIC_RECORD: return ROW_TYPE_DYNAMIC; - case BLOCK_RECORD: return ROW_TYPE_PAGES; + case BLOCK_RECORD: return ROW_TYPE_PAGE; case COMPRESSED_RECORD: return ROW_TYPE_COMPRESSED; default: return ROW_TYPE_NOT_USED; } @@ -1963,6 +1961,8 @@ enum row_type ha_maria::get_row_type() const static enum data_file_type maria_row_type(HA_CREATE_INFO *info) { + if (info->transactional == HA_CHOICE_YES) + return BLOCK_RECORD; switch (info->row_type) { case ROW_TYPE_FIXED: return STATIC_RECORD; case ROW_TYPE_DYNAMIC: return DYNAMIC_RECORD; @@ -2007,7 +2007,8 @@ int ha_maria::create(const char *name, register TABLE *table_arg, share->avg_row_length); create_info.data_file_name= ha_create_info->data_file_name; create_info.index_file_name= ha_create_info->index_file_name; - create_info.transactional= row_type == BLOCK_RECORD; + create_info.transactional= (row_type == BLOCK_RECORD && + ha_create_info->transactional != HA_CHOICE_NO); if (ha_create_info->options & HA_LEX_CREATE_TMP_TABLE) create_flags|= HA_CREATE_TMP_TABLE; diff --git a/storage/maria/ma_bitmap.c b/storage/maria/ma_bitmap.c index e1308bce487..f6a8172935f 100644 --- a/storage/maria/ma_bitmap.c +++ b/storage/maria/ma_bitmap.c @@ -106,6 +106,19 @@ put on disk even if they are not in the page cache). - When explicitely requested (for example on backup or after recvoery, to simplify things) + + The flow of writing a row is that: + - Lock the bitmap + - Decide which data pages we will write to + - Mark them full in the bitmap page so that other threads do not try to + use the same data pages as us + - We unlock the bitmap + - Write the data pages + - Lock the bitmap + - Correct the bitmap page with the true final occupation of the data + pages (that is, we marked pages full but when we are done we realize + we didn't fill them) + - Unlock the bitmap. */ #include "maria_def.h" diff --git a/storage/maria/ma_blockrec.c b/storage/maria/ma_blockrec.c index c747aaeb6cb..0ed8c1f7232 100644 --- a/storage/maria/ma_blockrec.c +++ b/storage/maria/ma_blockrec.c @@ -1778,7 +1778,7 @@ static my_bool write_block_record(MARIA_HA *info, ulong length; ulong data_length= (tmp_data - info->rec_buff); -#ifdef SANITY_CHECK +#ifdef SANITY_CHECKS if (cur_block->sub_blocks == 1) goto crashed; /* no reserved full or tails */ #endif @@ -1814,8 +1814,8 @@ static my_bool write_block_record(MARIA_HA *info, FULL_PAGE_SIZE(block_size))) && cur_block->page_count) { -#ifdef SANITY_CHECK - if ((cur_block == end_block) || (cur_block->used & BLOCKUSED_BIT)) +#ifdef SANITY_CHECKS + if ((cur_block == end_block) || (cur_block->used & BLOCKUSED_USED)) goto crashed; #endif data_length-= length; @@ -1829,7 +1829,7 @@ static my_bool write_block_record(MARIA_HA *info, /* Skip empty filler block */ cur_block++; } -#ifdef SANITY_CHECK +#ifdef SANITY_CHECKS if ((cur_block >= end_block)) goto crashed; #endif @@ -2548,11 +2548,6 @@ static my_bool delete_head_or_tail(MARIA_HA *info, PAGECACHE_PIN_LEFT_PINNED, PAGECACHE_WRITE_DELAY, &page_link.link)) DBUG_RETURN(1); - - /* Change the lock used when we read the page */ - page_link.unlock= PAGECACHE_LOCK_READ_UNLOCK; - set_dynamic(&info->pinned_pages, (void*) &page_link, - info->pinned_pages.elements-1); } else { @@ -2564,7 +2559,7 @@ static my_bool delete_head_or_tail(MARIA_HA *info, pagerange_store(log_data + FILEID_STORE_SIZE, 1); page_store(log_data+ FILEID_STORE_SIZE + PAGERANGE_STORE_SIZE, page); pagerange_store(log_data + FILEID_STORE_SIZE + PAGERANGE_STORE_SIZE + - PAGERANGE_STORE_SIZE, 1); + PAGE_STORE_SIZE, 1); log_array[TRANSLOG_INTERNAL_PARTS + 0].str= (char*) log_data; log_array[TRANSLOG_INTERNAL_PARTS + 0].length= sizeof(log_data); if (translog_write_record(!info->trn->rec_lsn ? &info->trn->rec_lsn : &lsn, @@ -2573,8 +2568,24 @@ static my_bool delete_head_or_tail(MARIA_HA *info, sizeof(log_data), TRANSLOG_INTERNAL_PARTS + 1, log_array)) DBUG_RETURN(1); + + /* Write the empty page (needed only for REPAIR to work) */ + buff[PAGE_TYPE_OFFSET]= UNALLOCATED_PAGE; + if (pagecache_write(share->pagecache, + &info->dfile, page, 0, + buff, share->page_type, + PAGECACHE_LOCK_WRITE_TO_READ, + PAGECACHE_PIN_LEFT_PINNED, + PAGECACHE_WRITE_DELAY, &page_link.link)) + DBUG_RETURN(1); + DBUG_ASSERT(empty_space >= info->s->bitmap.sizes[0]); } + /* Change the lock used when we read the page */ + page_link.unlock= PAGECACHE_LOCK_READ_UNLOCK; + set_dynamic(&info->pinned_pages, (void*) &page_link, + info->pinned_pages.elements-1); + DBUG_PRINT("info", ("empty_space: %u", empty_space)); DBUG_RETURN(_ma_bitmap_set(info, page, head, empty_space)); } @@ -2794,7 +2805,8 @@ static byte *read_next_extent(MARIA_HA *info, MARIA_EXTENT_CURSOR *extent, extent->extent+= ROW_EXTENT_SIZE; extent->page= uint5korr(extent->extent); page_count= uint2korr(extent->extent+ROW_EXTENT_PAGE_SIZE); - DBUG_ASSERT(page_count != 0); + if (!page_count) + goto crashed; extent->tail= page_count & TAIL_BIT; extent->page_count= (page_count & ~TAIL_BIT); extent->first_extent= 0; @@ -2817,7 +2829,8 @@ static byte *read_next_extent(MARIA_HA *info, MARIA_EXTENT_CURSOR *extent, if (!extent->tail) { /* Full data page */ - DBUG_ASSERT((buff[PAGE_TYPE_OFFSET] & PAGE_TYPE_MASK) == BLOB_PAGE); + if ((buff[PAGE_TYPE_OFFSET] & PAGE_TYPE_MASK) != BLOB_PAGE) + goto crashed; extent->page++; /* point to next page */ extent->page_count--; *end_of_data= buff + share->block_size; @@ -2826,7 +2839,8 @@ static byte *read_next_extent(MARIA_HA *info, MARIA_EXTENT_CURSOR *extent, } /* Found tail. page_count is in this case the position in the tail page */ - DBUG_ASSERT((buff[PAGE_TYPE_OFFSET] & PAGE_TYPE_MASK) == TAIL_PAGE); + if ((buff[PAGE_TYPE_OFFSET] & PAGE_TYPE_MASK) != TAIL_PAGE) + goto crashed; *(extent->tail_positions++)= ma_recordpos(extent->page, extent->page_count); info->cur_row.tail_count++; /* For maria_chk */ @@ -2948,7 +2962,6 @@ int _ma_read_block_record2(MARIA_HA *info, byte *record, MARIA_COLUMNDEF *column, *end_column; DBUG_ENTER("_ma_read_block_record2"); - LINT_INIT(field_lengths); LINT_INIT(field_length_data); LINT_INIT(blob_buffer); @@ -2994,6 +3007,7 @@ int _ma_read_block_record2(MARIA_HA *info, byte *record, } extent.first_extent= 1; + field_lengths= 0; if (share->base.max_field_lengths) { get_key_length(field_lengths, data); @@ -3028,7 +3042,7 @@ int _ma_read_block_record2(MARIA_HA *info, byte *record, Read row extents (note that first extent was already read into info->cur_row.extents above) */ - if (row_extents) + if (row_extents > 1) { if (read_long_data(info, info->cur_row.extents + ROW_EXTENT_SIZE, (row_extents - 1) * ROW_EXTENT_SIZE, @@ -3053,7 +3067,7 @@ int _ma_read_block_record2(MARIA_HA *info, byte *record, } /* Read array of field lengths. This may be stored in several extents */ - if (share->base.max_field_lengths) + if (field_lengths) { field_length_data= info->cur_row.field_lengths; if (read_long_data(info, field_length_data, field_lengths, &extent, @@ -3459,6 +3473,8 @@ restart_bitmap_scan: DBUG_PRINT("error", ("Wrong page header")); DBUG_RETURN((my_errno= HA_ERR_WRONG_IN_RECORD)); } + DBUG_PRINT("info", ("Page %lu has %u rows", + (ulong) page, info->scan.number_of_rows)); info->scan.dir= (info->scan.page_buff + block_size - PAGE_SUFFIX_SIZE - DIR_ENTRY_SIZE); info->scan.dir_end= (info->scan.dir - @@ -3471,7 +3487,8 @@ restart_bitmap_scan: for (data+= 6; data < info->scan.bitmap_end; data+= 6) { bits= uint6korr(data); - if (bits && ((bits & LL(04444444444444444)) != LL(04444444444444444))) + /* Skip not allocated pages and blob / full tail pages */ + if (bits && bits != LL(07777777777777777)) break; } bit_pos= 0; @@ -3483,8 +3500,11 @@ restart_bitmap_scan: filepos= (my_off_t) info->scan.bitmap_page * block_size; if (unlikely(filepos >= info->state->data_file_length)) { + DBUG_PRINT("info", ("Found end of file")); DBUG_RETURN((my_errno= HA_ERR_END_OF_FILE)); } + DBUG_PRINT("info", ("Reading bitmap at %lu", + (ulong) info->scan.bitmap_page)); if (!(pagecache_read(share->pagecache, &info->dfile, info->scan.bitmap_page, 0, info->scan.bitmap_buff, PAGECACHE_PLAIN_PAGE, diff --git a/storage/maria/ma_check.c b/storage/maria/ma_check.c index 8f10c98d0ee..9d017bc6ad5 100644 --- a/storage/maria/ma_check.c +++ b/storage/maria/ma_check.c @@ -42,7 +42,6 @@ #include "ma_ftdefs.h" #include -#include #include #include #ifdef HAVE_SYS_VADVISE_H @@ -86,6 +85,12 @@ static SORT_KEY_BLOCKS *alloc_key_blocks(HA_CHECK *param, uint blocks, static ha_checksum maria_byte_checksum(const byte *buf, uint length); static void set_data_file_type(MARIA_SORT_INFO *sort_info, MARIA_SHARE *share); static void restore_data_file_type(MARIA_SHARE *share); +static void change_data_file_descriptor(MARIA_HA *info, File new_file); +static int _ma_safe_scan_block_record(MARIA_SORT_INFO *sort_info, + MARIA_HA *info, byte *record); +static void copy_data_file_state(MARIA_STATE_INFO *to, + MARIA_STATE_INFO *from); + void maria_chk_init(HA_CHECK *param) { @@ -837,7 +842,7 @@ static int chk_index(HA_CHECK *param, MARIA_HA *info, MARIA_KEYDEF *keyinfo, } } (*key_checksum)+= maria_byte_checksum((byte*) key, - key_length- info->s->rec_reflength); + key_length- info->s->rec_reflength); record= _ma_dpos(info,0,key+key_length); if (keyinfo->flag & HA_FULLTEXT) /* special handling for ft2 */ { @@ -1262,18 +1267,21 @@ static int check_dynamic_record(HA_CHECK *param, MARIA_HA *info, int extend, } else { - info->cur_row.checksum= _ma_checksum(info,record); + ha_checksum checksum= 0; + if (info->s->calc_checksum) + checksum= (*info->s->calc_checksum)(info, record); + if (param->testflag & (T_EXTEND | T_MEDIUM | T_VERBOSE)) { if (_ma_rec_check(info,record, info->rec_buff,block_info.rec_len, - test(info->s->calc_checksum))) + test(info->s->calc_checksum), checksum)) { _ma_check_print_error(param,"Found wrong packed record at %s", llstr(start_recpos,llbuff)); got_error= 1; } } - param->glob_crc+= info->cur_row.checksum; + param->glob_crc+= checksum; } if (! got_error) @@ -1506,8 +1514,11 @@ static my_bool check_head_page(HA_CHECK *param, MARIA_HA *info, byte *record, } if (info->s->calc_checksum) { - info->cur_row.checksum= _ma_checksum(info, record); - param->glob_crc+= info->cur_row.checksum; + ha_checksum checksum= (*info->s->calc_checksum)(info, record); + if (info->cur_row.checksum != (checksum & 255)) + _ma_check_print_error(param, "Page %9s: Row %3d has wrong checksum", + llstr(page_pos, llbuff), row); + param->glob_crc+= checksum; } if (info->cur_row.extents_count) { @@ -1571,6 +1582,8 @@ static int check_block_record(HA_CHECK *param, MARIA_HA *info, int extend, my_bool full_dir; uint offset_page, offset; + LINT_INIT(full_dir); + if (_ma_scan_init_block_record(info)) { _ma_check_print_error(param, "got error %d when initializing scan", @@ -1648,13 +1661,12 @@ static int check_block_record(HA_CHECK *param, MARIA_HA *info, int extend, llstr(pos, llbuff), page_type); if (param->err_count++ > MAXERR || !(param->testflag & T_VERBOSE)) goto err; + continue; } switch ((enum en_page_type) page_type) { case UNALLOCATED_PAGE: case MAX_PAGE_TYPE: - DBUG_PRINT("warning", - ("Found page with wrong page type: %d", page_type)); - DBUG_ASSERT(0); + DBUG_ASSERT(0); /* Impossible */ break; case HEAD_PAGE: row_count= ((uchar*) page_buff)[DIR_COUNT_OFFSET]; @@ -1907,13 +1919,28 @@ int maria_chk_data_link(HA_CHECK *param, MARIA_HA *info,int extend) } /* maria_chk_data_link */ - /* Recover old table by reading each record and writing all keys */ - /* Save new datafile-name in temp_filename */ +/* + Recover old table by reading each record and writing all keys + + NOTES + Save new datafile-name in temp_filename + + IMPLEMENTATION (for hard repair with block format) + - Create new, unrelated MARIA_HA of the table + - Create new datafile and associate it with new handler + - Reset all statistic information in new handler + - Copy all data to new handler with normal write operations + - Move state of new handler to old handler + - Close new handler + - Close data file in old handler + - Rename old data file to new data file. + - Reopen data file in old handler +*/ int maria_repair(HA_CHECK *param, register MARIA_HA *info, my_string name, int rep_quick) { - int error,got_error; + int error, got_error= 1; uint i; ha_rows start_records,new_header_length; my_off_t del; @@ -1922,6 +1949,8 @@ int maria_repair(HA_CHECK *param, register MARIA_HA *info, char llbuff[22],llbuff2[22]; MARIA_SORT_INFO sort_info; MARIA_SORT_PARAM sort_param; + my_bool block_record, scan_inited= 0; + enum data_file_type org_data_file_type= info->s->data_file_type; DBUG_ENTER("maria_repair"); bzero((char *)&sort_info, sizeof(sort_info)); @@ -1929,9 +1958,11 @@ int maria_repair(HA_CHECK *param, register MARIA_HA *info, start_records=info->state->records; new_header_length=(param->testflag & T_UNPACK) ? 0L : share->pack.header_length; - got_error=1; new_file= -1; sort_param.sort_info=&sort_info; + block_record= org_data_file_type == BLOCK_RECORD; + sort_info.info= sort_info.new_info= info; + bzero(&info->rec_cache,sizeof(info->rec_cache)); if (!(param->testflag & T_SILENT)) { @@ -1943,28 +1974,6 @@ int maria_repair(HA_CHECK *param, register MARIA_HA *info, if (info->s->options & (HA_OPTION_CHECKSUM | HA_OPTION_COMPRESS_RECORD)) param->testflag|=T_CALC_CHECKSUM; - if (init_io_cache(¶m->read_cache, info->dfile.file, - (uint) param->read_buffer_length, - READ_CACHE,share->pack.header_length,1,MYF(MY_WME))) - { - bzero(&info->rec_cache,sizeof(info->rec_cache)); - goto err; - } - if (!rep_quick) - if (init_io_cache(&info->rec_cache,-1,(uint) param->write_buffer_length, - WRITE_CACHE, new_header_length, 1, - MYF(MY_WME | MY_WAIT_IF_FULL))) - goto err; - info->opt_flag|=WRITE_CACHE_USED; - if (!(sort_param.record=(byte*) my_malloc((uint) share->base.pack_reclength, - MYF(0))) || - _ma_alloc_buffer(&sort_param.rec_buff, &sort_param.rec_buff_size, - info->s->base.default_rec_buff_size)) - { - _ma_check_print_error(param, "Not enough memory for extra record"); - goto err; - } - if (!rep_quick) { /* Get real path for data file */ @@ -1983,11 +1992,71 @@ int maria_repair(HA_CHECK *param, register MARIA_HA *info, new_header_length, "datafile-header")) goto err; info->s->state.dellink= HA_OFFSET_ERROR; - info->rec_cache.file=new_file; - if (param->testflag & T_UNPACK) - restore_data_file_type(share); + info->rec_cache.file= new_file; + if (share->data_file_type == BLOCK_RECORD || + ((param->testflag & T_UNPACK) && + share->state.header.org_data_file_type == BLOCK_RECORD)) + { + MARIA_HA *new_info; + if (!(sort_info.new_info= maria_open(info->s->unique_file_name, O_RDWR, + HA_OPEN_COPY | HA_OPEN_FOR_REPAIR))) + goto err; + new_info= sort_info.new_info; + change_data_file_descriptor(new_info, new_file); + maria_lock_database(new_info, F_EXTRA_LCK); + if ((param->testflag & T_UNPACK) && + share->data_file_type == COMPRESSED_RECORD) + { + (*new_info->s->once_end)(new_info->s); + (*new_info->s->end)(new_info); + restore_data_file_type(new_info->s); + _ma_setup_functions(new_info->s); + if ((*new_info->s->once_init)(new_info->s, new_file) || + (*new_info->s->init)(new_info)) + goto err; + } + _ma_reset_status(sort_info.new_info); + if (_ma_initialize_data_file(sort_info.new_info->s, new_file)) + goto err; + block_record= 1; + } + } + + if (org_data_file_type != BLOCK_RECORD) + { + /* We need a read buffer to read rows in big blocks */ + if (init_io_cache(¶m->read_cache, info->dfile.file, + (uint) param->read_buffer_length, + READ_CACHE, share->pack.header_length, 1, MYF(MY_WME))) + goto err; } - sort_info.info=info; + if (sort_info.new_info->s->data_file_type != BLOCK_RECORD) + { + /* When writing to not block records, we need a write buffer */ + if (!rep_quick) + if (init_io_cache(&info->rec_cache, new_file, + (uint) param->write_buffer_length, + WRITE_CACHE, new_header_length, 1, + MYF(MY_WME | MY_WAIT_IF_FULL))) + goto err; + info->opt_flag|=WRITE_CACHE_USED; + } + else + { + scan_inited= 1; + if (maria_scan_init(sort_info.info)) + goto err; + } + + if (!(sort_param.record=(byte*) my_malloc((uint) share->base.pack_reclength, + MYF(0))) || + _ma_alloc_buffer(&sort_param.rec_buff, &sort_param.rec_buff_size, + info->s->base.default_rec_buff_size)) + { + _ma_check_print_error(param, "Not enough memory for extra record"); + goto err; + } + sort_info.param = param; sort_param.read_cache=param->read_cache; sort_param.pos=sort_param.max_pos=share->pack.header_length; @@ -2030,9 +2099,14 @@ int maria_repair(HA_CHECK *param, register MARIA_HA *info, maria_lock_memory(param); /* Everything is alloced */ + sort_info.org_data_file_type= info->s->data_file_type; + /* Re-create all keys, which are set in key_map. */ while (!(error=sort_get_next_record(&sort_param))) { + if (block_record && _ma_sort_write_record(&sort_param)) + goto err; + if (writekeys(&sort_param)) { if (my_errno != HA_ERR_FOUND_DUPP_KEY) @@ -2058,7 +2132,8 @@ int maria_repair(HA_CHECK *param, register MARIA_HA *info, } continue; } - if (_ma_sort_write_record(&sort_param)) + + if (!block_record && _ma_sort_write_record(&sort_param)) goto err; } if (error > 0 || maria_write_data_suffix(&sort_info, (my_bool)!rep_quick) || @@ -2081,35 +2156,58 @@ int maria_repair(HA_CHECK *param, register MARIA_HA *info, { _ma_check_print_error(param,"Couldn't fix table with quick recovery: Found wrong number of deleted records"); _ma_check_print_error(param,"Run recovery again without -q"); - got_error=1; param->retry_repair=1; param->testflag|=T_RETRY_WITHOUT_QUICK; goto err; } + if (param->testflag & T_SAFE_REPAIR) { /* Don't repair if we loosed more than one row */ - if (info->state->records+1 < start_records) + if (sort_info.new_info->state->records+1 < start_records) { info->state->records=start_records; - got_error=1; goto err; } } if (!rep_quick) { - my_close(info->dfile.file, MYF(0)); - info->dfile.file= new_file; - info->state->data_file_length=sort_param.filepos; + if (sort_info.new_info != sort_info.info) + { + MARIA_STATE_INFO save_state= sort_info.new_info->s->state; + if (maria_close(sort_info.new_info)) + { + _ma_check_print_error(param, "Got error %d on close", my_errno); + goto err; + } + copy_data_file_state(&info->s->state, &save_state); + new_file= -1; + } + else + info->state->data_file_length= sort_param.filepos; share->state.version=(ulong) time((time_t*) 0); /* Force reopen */ + + /* Replace the actual file with the temporary file */ + if (new_file >= 0) + my_close(new_file, MYF(MY_WME)); + my_close(info->dfile.file, MYF(MY_WME)); + info->dfile.file= new_file= -1; + if (maria_change_to_newfile(share->data_file_name,MARIA_NAME_DEXT, + DATA_TMP_EXT, + (param->testflag & T_BACKUP_DATA ? + MYF(MY_REDEL_MAKE_BACKUP): MYF(0))) || + _ma_open_datafile(info, share, -1)) + { + goto err; + } } else { - info->state->data_file_length=sort_param.max_pos; + info->state->data_file_length= sort_param.max_pos; } if (param->testflag & T_CALC_CHECKSUM) - info->state->checksum=param->glob_crc; + info->state->checksum= param->glob_crc; if (!(param->testflag & T_SILENT)) { @@ -2127,25 +2225,19 @@ int maria_repair(HA_CHECK *param, register MARIA_HA *info, memcpy( &share->state.state, info->state, sizeof(*info->state)); err: - if (!got_error) - { - /* Replace the actual file with the temporary file */ - if (new_file >= 0) - { - my_close(new_file,MYF(0)); - info->dfile.file= new_file= -1; - if (maria_change_to_newfile(share->data_file_name,MARIA_NAME_DEXT, - DATA_TMP_EXT, (param->testflag & T_BACKUP_DATA ? - MYF(MY_REDEL_MAKE_BACKUP): MYF(0))) || - _ma_open_datafile(info,share,-1)) - got_error=1; - } - } + if (scan_inited) + maria_scan_end(sort_info.info); + if (got_error) { if (! param->error_printed) _ma_check_print_error(param,"%d for record at pos %s",my_errno, llstr(sort_param.start_recpos,llbuff)); + if (sort_info.new_info && sort_info.new_info != sort_info.info) + { + sort_info.new_info->dfile.file= -1; + maria_close(sort_info.new_info); + } if (new_file >= 0) { VOID(my_close(new_file,MYF(0))); @@ -2595,7 +2687,7 @@ int maria_repair_by_sort(HA_CHECK *param, register MARIA_HA *info, uint i; ulong length; ha_rows start_records; - my_off_t new_header_length,del; + my_off_t new_header_length, org_header_length, del; File new_file; MARIA_SORT_PARAM sort_param; MARIA_SHARE *share=info->s; @@ -2606,11 +2698,15 @@ int maria_repair_by_sort(HA_CHECK *param, register MARIA_HA *info, ulonglong key_map=share->state.key_map; DBUG_ENTER("maria_repair_by_sort"); + bzero((char*)&sort_info,sizeof(sort_info)); + bzero((char *)&sort_param, sizeof(sort_param)); + start_records=info->state->records; got_error=1; new_file= -1; - new_header_length=(param->testflag & T_UNPACK) ? 0 : - share->pack.header_length; + org_header_length= share->pack.header_length; + new_header_length= (param->testflag & T_UNPACK) ? 0 : org_header_length; + if (!(param->testflag & T_SILENT)) { printf("- recovering (with sort) MARIA-table '%s'\n",name); @@ -2621,15 +2717,13 @@ int maria_repair_by_sort(HA_CHECK *param, register MARIA_HA *info, if (info->s->options & (HA_OPTION_CHECKSUM | HA_OPTION_COMPRESS_RECORD)) param->testflag|=T_CALC_CHECKSUM; - bzero((char*)&sort_info,sizeof(sort_info)); - bzero((char *)&sort_param, sizeof(sort_param)); if (!(sort_info.key_block= alloc_key_blocks(param, (uint) param->sort_key_blocks, - share->base.max_key_block_length)) - || init_io_cache(¶m->read_cache, info->dfile.file, - (uint) param->read_buffer_length, - READ_CACHE,share->pack.header_length,1,MYF(MY_WME)) || + share->base.max_key_block_length)) || + init_io_cache(¶m->read_cache, info->dfile.file, + (uint) param->read_buffer_length, + READ_CACHE, org_header_length, 1, MYF(MY_WME)) || (! rep_quick && init_io_cache(&info->rec_cache, info->dfile.file, (uint) param->write_buffer_length, @@ -2639,6 +2733,7 @@ int maria_repair_by_sort(HA_CHECK *param, register MARIA_HA *info, sort_info.key_block_end=sort_info.key_block+param->sort_key_blocks; info->opt_flag|=WRITE_CACHE_USED; info->rec_cache.file= info->dfile.file; /* for sort_delete_record */ + sort_info.org_data_file_type= info->s->data_file_type; if (!(sort_param.record=(byte*) my_malloc((uint) share->base.pack_reclength, MYF(0))) || @@ -2694,8 +2789,8 @@ int maria_repair_by_sort(HA_CHECK *param, register MARIA_HA *info, key_map= ~key_map; /* Create the missing keys */ } - sort_info.info=info; - sort_info.param = param; + sort_info.info= sort_info.new_info= info; + sort_info.param= param; set_data_file_type(&sort_info, share); sort_param.filepos=new_header_length; @@ -2707,9 +2802,9 @@ int maria_repair_by_sort(HA_CHECK *param, register MARIA_HA *info, sort_param.wordlist=NULL; init_alloc_root(&sort_param.wordroot, FTPARSER_MEMROOT_ALLOC_SIZE, 0); - if (share->data_file_type == DYNAMIC_RECORD) + if (sort_info.org_data_file_type == DYNAMIC_RECORD) length=max(share->base.min_pack_length+1,share->base.min_block_length); - else if (share->data_file_type == COMPRESSED_RECORD) + else if (sort_info.org_data_file_type == COMPRESSED_RECORD) length=share->base.min_block_length; else length=share->base.pack_reclength; @@ -2747,7 +2842,7 @@ int maria_repair_by_sort(HA_CHECK *param, register MARIA_HA *info, if ((!(param->testflag & T_SILENT))) printf ("- Fixing index %d\n",sort_param.key+1); - sort_param.max_pos=sort_param.pos=share->pack.header_length; + sort_param.max_pos= sort_param.pos= org_header_length; keyseg=sort_param.seg; bzero((char*) sort_param.unique,sizeof(sort_param.unique)); sort_param.key_length=share->rec_reflength; @@ -2845,8 +2940,9 @@ int maria_repair_by_sort(HA_CHECK *param, register MARIA_HA *info, share->state.version=(ulong) time((time_t*) 0); my_close(info->dfile.file, MYF(0)); info->dfile.file= new_file; - share->data_file_type=sort_info.new_data_file_type; - share->pack.header_length=(ulong) new_header_length; + share->data_file_type= sort_info.new_data_file_type; + org_header_length= (ulong) new_header_length; + sort_info.org_data_file_type= info->s->data_file_type; sort_param.fix_datafile=0; } else @@ -2874,11 +2970,11 @@ int maria_repair_by_sort(HA_CHECK *param, register MARIA_HA *info, if (rep_quick & T_FORCE_UNIQUENESS) { - my_off_t skr=info->state->data_file_length+ - (share->options & HA_OPTION_COMPRESS_RECORD ? - MEMMAP_EXTRA_MARGIN : 0); + my_off_t skr= (info->state->data_file_length + + (sort_info.org_data_file_type == COMPRESSED_RECORD) ? + MEMMAP_EXTRA_MARGIN : 0); #ifdef USE_RELOC - if (share->data_file_type == STATIC_RECORD && + if (sort_info.org_data_file_type == STATIC_RECORD && skr < share->base.reloc*share->base.min_pack_length) skr=share->base.reloc*share->base.min_pack_length; #endif @@ -3073,6 +3169,8 @@ int maria_repair_parallel(HA_CHECK *param, register MARIA_HA *info, pthread_mutex_init(&sort_info.mutex, MY_MUTEX_INIT_FAST); pthread_cond_init(&sort_info.cond, 0); + sort_info.org_data_file_type= info->s->data_file_type; + if (!(sort_info.key_block= alloc_key_blocks(param, (uint) param->sort_key_blocks, share->base.max_key_block_length)) || @@ -3140,8 +3238,8 @@ int maria_repair_parallel(HA_CHECK *param, register MARIA_HA *info, key_map= ~key_map; /* Create the missing keys */ } - sort_info.info=info; - sort_info.param = param; + sort_info.info= sort_info.new_info= info; + sort_info.param= param; set_data_file_type(&sort_info, share); sort_info.dupp=0; @@ -3149,9 +3247,9 @@ int maria_repair_parallel(HA_CHECK *param, register MARIA_HA *info, param->read_cache.end_of_file=sort_info.filelength= my_seek(param->read_cache.file,0L,MY_SEEK_END,MYF(0)); - if (share->data_file_type == DYNAMIC_RECORD) + if (sort_info.org_data_file_type == DYNAMIC_RECORD) rec_length=max(share->base.min_pack_length+1,share->base.min_block_length); - else if (share->data_file_type == COMPRESSED_RECORD) + else if (sort_info.org_data_file_type == COMPRESSED_RECORD) rec_length=share->base.min_block_length; else rec_length=share->base.pack_reclength; @@ -3367,8 +3465,6 @@ int maria_repair_parallel(HA_CHECK *param, register MARIA_HA *info, */ my_close(info->dfile.file, MYF(0)); info->dfile.file= new_file; - - share->data_file_type=sort_info.new_data_file_type; share->pack.header_length=(ulong) new_header_length; } else @@ -3385,11 +3481,11 @@ int maria_repair_parallel(HA_CHECK *param, register MARIA_HA *info, if (rep_quick & T_FORCE_UNIQUENESS) { - my_off_t skr=info->state->data_file_length+ - (share->options & HA_OPTION_COMPRESS_RECORD ? - MEMMAP_EXTRA_MARGIN : 0); + my_off_t skr= (info->state->data_file_length + + (sort_info.org_data_file_type == COMPRESSED_RECORD) ? + MEMMAP_EXTRA_MARGIN : 0); #ifdef USE_RELOC - if (share->data_file_type == STATIC_RECORD && + if (sort_info.org_data_file_type == STATIC_RECORD && skr < share->base.reloc*share->base.min_pack_length) skr=share->base.reloc*share->base.min_pack_length; #endif @@ -3574,27 +3670,28 @@ static int sort_maria_ft_key_read(MARIA_SORT_PARAM *sort_param, byte *key) sort_get_next_record() sort_param Information about and for the sort process - NOTE - + NOTES Dynamic Records With Non-Quick Parallel Repair - For non-quick parallel repair we use a synchronized read/write - cache. This means that one thread is the master who fixes the data - file by reading each record from the old data file and writing it - to the new data file. By doing this the records in the new data - file are written contiguously. Whenever the write buffer is full, - it is copied to the read buffer. The slaves read from the read - buffer, which is not associated with a file. Thus read_cache.file - is -1. When using _mi_read_cache(), the slaves must always set - flag to READING_NEXT so that the function never tries to read from - file. This is safe because the records are contiguous. There is no - need to read outside the cache. This condition is evaluated in the - variable 'parallel_flag' for quick reference. read_cache.file must - be >= 0 in every other case. + For non-quick parallel repair we use a synchronized read/write + cache. This means that one thread is the master who fixes the data + file by reading each record from the old data file and writing it + to the new data file. By doing this the records in the new data + file are written contiguously. Whenever the write buffer is full, + it is copied to the read buffer. The slaves read from the read + buffer, which is not associated with a file. Thus read_cache.file + is -1. When using _mi_read_cache(), the slaves must always set + flag to READING_NEXT so that the function never tries to read from + file. This is safe because the records are contiguous. There is no + need to read outside the cache. This condition is evaluated in the + variable 'parallel_flag' for quick reference. read_cache.file must + be >= 0 in every other case. RETURN -1 end of file 0 ok + sort_param->filepos points to record position. + sort_param->record contains record > 0 error */ @@ -3615,10 +3712,61 @@ static int sort_get_next_record(MARIA_SORT_PARAM *sort_param) if (*_ma_killed_ptr(param)) DBUG_RETURN(1); - switch (share->data_file_type) { + switch (sort_info->org_data_file_type) { case BLOCK_RECORD: - DBUG_ASSERT(0); + { + for (;;) + { + int flag; + + if (info != sort_info->new_info) + { + /* Safe scanning */ + flag= _ma_safe_scan_block_record(sort_info, info, + sort_param->record); + } + else + { + /* Scan on clean table */ + flag= _ma_scan_block_record(info, sort_param->record, + info->cur_row.nextpos, 1); + } + if (!flag) + { + if (sort_param->calc_checksum) + { + ha_checksum checksum; + checksum= (*info->s->calc_check_checksum)(info, sort_param->record); + if (info->s->calc_checksum && + info->cur_row.checksum != (checksum & 255)) + { + if (param->testflag & T_VERBOSE) + { + char llbuff[22]; + record_pos_to_txt(info, sort_param->filepos, llbuff); + _ma_check_print_info(param, + "Found record with wrong checksum at %s", + llbuff); + } + continue; + } + info->cur_row.checksum= checksum; + param->glob_crc+= checksum; + } + sort_param->filepos= info->cur_row.lastpos; + DBUG_RETURN(0); + } + if (flag == HA_ERR_END_OF_FILE) + { + sort_param->max_pos= sort_info->filelength; + DBUG_RETURN(-1); + } + /* Retry only if wrong record, not if disk error */ + if (flag != HA_ERR_WRONG_IN_RECORD) + DBUG_RETURN(flag); + } break; + } case STATIC_RECORD: for (;;) { @@ -3656,6 +3804,8 @@ static int sort_get_next_record(MARIA_SORT_PARAM *sort_param) { byte *to; LINT_INIT(to); + ha_checksum checksum= 0; + pos=sort_param->pos; searching=(sort_param->fix_datafile && (param->testflag & T_EXTEND)); parallel_flag= (sort_param->read_cache.file < 0) ? READING_NEXT : 0; @@ -3925,14 +4075,14 @@ static int sort_get_next_record(MARIA_SORT_PARAM *sort_param) if (sort_param->read_cache.error < 0) DBUG_RETURN(1); if (sort_param->calc_checksum) - info->cur_row.checksum= _ma_checksum(info, sort_param->record); + checksum= (info->s->calc_check_checksum)(info, sort_param->record); if ((param->testflag & (T_EXTEND | T_REP)) || searching) { if (_ma_rec_check(info, sort_param->record, sort_param->rec_buff, sort_param->find_length, (param->testflag & T_QUICK) && sort_param->calc_checksum && - test(info->s->calc_checksum))) + test(info->s->calc_checksum), checksum)) { _ma_check_print_info(param,"Found wrong packed record at %s", llstr(sort_param->start_recpos,llbuff)); @@ -3940,7 +4090,7 @@ static int sort_get_next_record(MARIA_SORT_PARAM *sort_param) } } if (sort_param->calc_checksum) - param->glob_crc+= info->cur_row.checksum; + param->glob_crc+= checksum; DBUG_RETURN(0); } if (!searching) @@ -4014,8 +4164,9 @@ static int sort_get_next_record(MARIA_SORT_PARAM *sort_param) if (sort_param->calc_checksum) { - info->cur_row.checksum= (*info->s->calc_checksum)(info, - sort_param->record); + info->cur_row.checksum= (*info->s->calc_check_checksum)(info, + sort_param-> + record); param->glob_crc+= info->cur_row.checksum; } DBUG_RETURN(0); @@ -4048,8 +4199,8 @@ int _ma_sort_write_record(MARIA_SORT_PARAM *sort_param) byte *from; byte block_buff[8]; MARIA_SORT_INFO *sort_info=sort_param->sort_info; - HA_CHECK *param=sort_info->param; - MARIA_HA *info=sort_info->info; + HA_CHECK *param= sort_info->param; + MARIA_HA *info= sort_info->new_info; MARIA_SHARE *share=info->s; DBUG_ENTER("_ma_sort_write_record"); @@ -4057,7 +4208,11 @@ int _ma_sort_write_record(MARIA_SORT_PARAM *sort_param) { switch (sort_info->new_data_file_type) { case BLOCK_RECORD: - DBUG_ASSERT(0); + if ((sort_param->filepos= (*share->write_record_init)(info, + sort_param-> + record)) == + HA_OFFSET_ERROR) + DBUG_RETURN(1); break; case STATIC_RECORD: if (my_b_write(&info->rec_cache,sort_param->record, @@ -4090,7 +4245,9 @@ int _ma_sort_write_record(MARIA_SORT_PARAM *sort_param) from=sort_info->buff+ALIGN_SIZE(MARIA_MAX_DYN_BLOCK_HEADER); } /* We can use info->checksum here as only one thread calls this */ - info->cur_row.checksum= _ma_checksum(info,sort_param->record); + info->cur_row.checksum= (*info->s->calc_check_checksum)(info, + sort_param-> + record); reclength= _ma_rec_pack(info,from,sort_param->record); flag=0; @@ -4147,7 +4304,7 @@ int _ma_sort_write_record(MARIA_SORT_PARAM *sort_param) } /* _ma_sort_write_record */ - /* Compare two keys from _ma_create_index_by_sort */ +/* Compare two keys from _ma_create_index_by_sort */ static int sort_key_cmp(MARIA_SORT_PARAM *sort_param, const void *a, const void *b) @@ -4505,7 +4662,8 @@ static int sort_delete_record(MARIA_SORT_PARAM *sort_param) } } if (sort_param->calc_checksum) - param->glob_crc-=(*info->s->calc_checksum)(info, sort_param->record); + param->glob_crc-=(*info->s->calc_check_checksum)(info, + sort_param->record); } error= (flush_io_cache(&info->rec_cache) || (*info->s->delete_record)(info, sort_param->record)); @@ -4514,7 +4672,8 @@ static int sort_delete_record(MARIA_SORT_PARAM *sort_param) DBUG_RETURN(error); } /* sort_delete_record */ - /* Fix all pending blocks and flush everything to disk */ + +/* Fix all pending blocks and flush everything to disk */ int _ma_flush_pending_blocks(MARIA_SORT_PARAM *sort_param) { @@ -4786,9 +4945,9 @@ end: int maria_write_data_suffix(MARIA_SORT_INFO *sort_info, my_bool fix_datafile) { - MARIA_HA *info=sort_info->info; + MARIA_HA *info=sort_info->new_info; - if (info->s->options & HA_OPTION_COMPRESS_RECORD && fix_datafile) + if (info->s->data_file_type == COMPRESSED_RECORD && fix_datafile) { char buff[MEMMAP_EXTRA_MARGIN]; bzero(buff,sizeof(buff)); @@ -5101,6 +5260,9 @@ my_bool maria_test_if_sort_rep(MARIA_HA *info, ha_rows rows, */ if (! maria_is_any_key_active(key_map)) return FALSE; /* Can't use sort */ + /* QQ: Remove this when maria_repair_by_sort() works with block format */ + if (info->s->data_file_type == BLOCK_RECORD) + return FALSE; for (i=0 ; i < share->base.keys ; i++,key++) { if (!force && maria_too_big_key_for_sort(key,rows)) @@ -5119,7 +5281,8 @@ set_data_file_type(MARIA_SORT_INFO *sort_info, MARIA_SHARE *share) MARIA_SHARE tmp; sort_info->new_data_file_type= share->state.header.org_data_file_type; /* Set delete_function for sort_delete_record() */ - memcpy((char*) &tmp, share, sizeof(*share)); + tmp= *share; + tmp.state.header.data_file_type= tmp.state.header.org_data_file_type; tmp.options= ~HA_OPTION_COMPRESS_RECORD; _ma_setup_functions(&tmp); share->delete_record=tmp.delete_record; @@ -5132,6 +5295,161 @@ static void restore_data_file_type(MARIA_SHARE *share) mi_int2store(share->state.header.options,share->options); share->state.header.data_file_type= share->state.header.org_data_file_type; - share->data_file_type= share->state.header.data_file_type= + share->data_file_type= share->state.header.data_file_type; share->pack.header_length= 0; } + + +static void change_data_file_descriptor(MARIA_HA *info, File new_file) +{ + my_close(info->dfile.file, MYF(0)); + info->dfile.file= info->s->bitmap.file.file= new_file; +} + + +/* + Copy all states that has to do with the data file + + NOTES + This is done to copy the state from the data file generated from + repair to the original handler +*/ + +static void copy_data_file_state(MARIA_STATE_INFO *to, + MARIA_STATE_INFO *from) +{ + to->state.records= from->state.records; + to->state.del= from->state.del; + to->state.empty= from->state.empty; + to->state.data_file_length= from->state.data_file_length; + to->split= from->split; + to->dellink= from->dellink; + to->first_bitmap_with_space= from->first_bitmap_with_space; +} + + +/* + Read 'safely' next record while scanning table. + + SYNOPSIS + _ma_safe_scan_block_record() + info Maria handler + record Store found here + + NOTES + - One must have called mi_scan() before this + + Differences compared to _ma_scan_block_records() are: + - We read all blocks, not only blocks marked by the bitmap to be safe + - In case of errors, next read will read next record. + - More sanity checks + + RETURN + 0 ok + HA_ERR_END_OF_FILE End of file + # error number +*/ + + +static int _ma_safe_scan_block_record(MARIA_SORT_INFO *sort_info, + MARIA_HA *info, byte *record) +{ + uint record_pos= info->cur_row.nextpos; + ulonglong page= sort_info->page; + DBUG_ENTER("_ma_safe_scan_block_record"); + + for (;;) + { + /* Find next row in current page */ + if (likely(record_pos < info->scan.number_of_rows)) + { + uint length, offset; + byte *data, *end_of_data; + char llbuff[22]; + + while (!(offset= uint2korr(info->scan.dir))) + { + info->scan.dir-= DIR_ENTRY_SIZE; + record_pos++; + if (info->scan.dir < info->scan.dir_end) + { + _ma_check_print_info(sort_info->param, + "Wrong directory on page: %s", + llstr(page, llbuff)); + goto read_next_page; + } + } + /* found row */ + info->cur_row.lastpos= info->scan.row_base_page + record_pos; + info->cur_row.nextpos= record_pos + 1; + data= info->scan.page_buff + offset; + length= uint2korr(info->scan.dir + 2); + end_of_data= data + length; + info->scan.dir-= DIR_ENTRY_SIZE; /* Point to previous row */ + + if (end_of_data > info->scan.dir_end || + offset < PAGE_HEADER_SIZE || length < info->s->base.min_block_length) + { + _ma_check_print_info(sort_info->param, + "Wrong directory entry %3u at page %s", + record_pos, llstr(page, llbuff)); + record_pos++; + continue; + } + else + { + DBUG_PRINT("info", ("rowid: %lu", (ulong) info->cur_row.lastpos)); + DBUG_RETURN(_ma_read_block_record2(info, record, data, end_of_data)); + } + } + +read_next_page: + /* Read until we find next head page */ + for (;;) + { + uint page_type; + char llbuff[22]; + + sort_info->page++; /* In case of errors */ + page++; + if (!(page % info->s->bitmap.pages_covered)) + page++; /* Skip bitmap */ + if ((page + 1) * info->s->block_size > sort_info->filelength) + DBUG_RETURN(HA_ERR_END_OF_FILE); + if (!(pagecache_read(info->s->pagecache, + &info->dfile, + page, 0, info->scan.page_buff, + PAGECACHE_READ_UNKNOWN_PAGE, + PAGECACHE_LOCK_LEFT_UNLOCKED, 0))) + DBUG_RETURN(my_errno); + + page_type= (info->scan.page_buff[PAGE_TYPE_OFFSET] & + PAGE_TYPE_MASK); + if (page_type == HEAD_PAGE) + { + if ((info->scan.number_of_rows= + (uint) (uchar) info->scan.page_buff[DIR_COUNT_OFFSET]) != 0) + break; + _ma_check_print_info(sort_info->param, + "Wrong head page at %s", + llstr(page * info->s->block_size, llbuff)); + } + else if (page_type >= MAX_PAGE_TYPE) + { + _ma_check_print_info(sort_info->param, + "Found wrong page type: %d at %s", + page_type, llstr(page * info->s->block_size, + llbuff)); + } + } + + /* New head page */ + info->scan.dir= (info->scan.page_buff + info->s->block_size - + PAGE_SUFFIX_SIZE - DIR_ENTRY_SIZE); + info->scan.dir_end= (info->scan.dir - + (info->scan.number_of_rows - 1) * + DIR_ENTRY_SIZE); + info->scan.row_base_page= ma_recordpos(page, 0); + record_pos= 0; + } +} diff --git a/storage/maria/ma_close.c b/storage/maria/ma_close.c index dc60ce8aa83..e73629c3c87 100644 --- a/storage/maria/ma_close.c +++ b/storage/maria/ma_close.c @@ -124,8 +124,6 @@ int maria_close(register MARIA_HA *info) my_free((gptr) info,MYF(0)); if (error) - { - DBUG_RETURN(my_errno=error); - } + DBUG_RETURN(my_errno= error); DBUG_RETURN(0); } /* maria_close */ diff --git a/storage/maria/ma_create.c b/storage/maria/ma_create.c index d8660dd41cb..280321d40ec 100644 --- a/storage/maria/ma_create.c +++ b/storage/maria/ma_create.c @@ -798,7 +798,7 @@ int maria_create(const char *name, enum data_file_type datafile_type, goto err; errpos=3; - if (_ma_initialize_data_file(dfile, &share)) + if (_ma_initialize_data_file(&share, dfile)) goto err; } DBUG_PRINT("info", ("write state info and base info")); @@ -1082,7 +1082,7 @@ static int compare_columns(MARIA_COLUMNDEF **a_ptr, MARIA_COLUMNDEF **b_ptr) /* Initialize data file */ -int _ma_initialize_data_file(File dfile, MARIA_SHARE *share) +int _ma_initialize_data_file(MARIA_SHARE *share, File dfile) { if (share->data_file_type == BLOCK_RECORD) { diff --git a/storage/maria/ma_delete_all.c b/storage/maria/ma_delete_all.c index 2d85b347662..b18b1105391 100644 --- a/storage/maria/ma_delete_all.c +++ b/storage/maria/ma_delete_all.c @@ -20,9 +20,7 @@ int maria_delete_all_rows(MARIA_HA *info) { - uint i; MARIA_SHARE *share=info->s; - MARIA_STATE_INFO *state=&share->state; DBUG_ENTER("maria_delete_all_rows"); if (share->options & HA_OPTION_READ_ONLY_DATA) @@ -35,18 +33,7 @@ int maria_delete_all_rows(MARIA_HA *info) if (_ma_mark_file_changed(info)) goto err; - info->state->records=info->state->del=state->split=0; - state->changed= 0; /* File is optimized */ - state->dellink = HA_OFFSET_ERROR; - state->sortkey= (ushort) ~0; - info->state->key_file_length=share->base.keystart; - info->state->data_file_length=0; - info->state->empty=info->state->key_empty=0; - info->state->checksum=0; - - state->key_del= HA_OFFSET_ERROR; - for (i=0 ; i < share->base.keys ; i++) - state->key_root[i]= HA_OFFSET_ERROR; + _ma_reset_status(info); /* If we are using delayed keys or if the user has done changes to the tables @@ -67,7 +54,7 @@ int maria_delete_all_rows(MARIA_HA *info) my_chsize(share->kfile.file, share->base.keystart, 0, MYF(MY_WME)) ) goto err; - if (_ma_initialize_data_file(info->dfile.file, info->s)) + if (_ma_initialize_data_file(info->s, info->dfile.file)) goto err; /* @@ -104,4 +91,39 @@ err: allow_break(); /* Allow SIGHUP & SIGINT */ DBUG_RETURN(my_errno=save_errno); } -} /* maria_delete */ +} /* maria_delete_all_rows */ + + +/* + Reset status information + + SYNOPSIS + _ma_reset_status() + maria Maria handler + + DESCRIPTION + Resets data and index file information as if the file would be empty + Files are not touched. +*/ + +void _ma_reset_status(MARIA_HA *info) +{ + MARIA_SHARE *share= info->s; + MARIA_STATE_INFO *state= &share->state; + uint i; + + info->state->records= info->state->del= state->split= 0; + state->changed= 0; /* File is optimized */ + state->dellink= HA_OFFSET_ERROR; + state->sortkey= (ushort) ~0; + info->state->key_file_length= share->base.keystart; + info->state->data_file_length= 0; + info->state->empty= info->state->key_empty= 0; + info->state->checksum= 0; + + /* Drop the delete key chain. */ + state->key_del= HA_OFFSET_ERROR; + /* Clear all keys */ + for (i=0 ; i < share->base.keys ; i++) + state->key_root[i]= HA_OFFSET_ERROR; +} diff --git a/storage/maria/ma_dynrec.c b/storage/maria/ma_dynrec.c index ebf84032106..9281378fd33 100644 --- a/storage/maria/ma_dynrec.c +++ b/storage/maria/ma_dynrec.c @@ -1018,7 +1018,8 @@ uint _ma_rec_pack(MARIA_HA *info, register byte *to, register const byte *from) */ my_bool _ma_rec_check(MARIA_HA *info,const char *record, byte *rec_buff, - ulong packed_length, my_bool with_checksum) + ulong packed_length, my_bool with_checksum, + ha_checksum checksum) { uint length,new_length,flag,bit,i; char *pos,*end,*packpos,*to; @@ -1124,7 +1125,7 @@ my_bool _ma_rec_check(MARIA_HA *info,const char *record, byte *rec_buff, if (packed_length != (uint) (to - rec_buff) + test(info->s->calc_checksum) || (bit != 1 && (flag & ~(bit - 1)))) goto err; - if (with_checksum && ((uchar) info->cur_row.checksum != (uchar) *to)) + if (with_checksum && ((uchar) checksum != (uchar) *to)) { DBUG_PRINT("error",("wrong checksum for row")); goto err; diff --git a/storage/maria/ma_info.c b/storage/maria/ma_info.c index a04fba4e0d8..cfb4580a72f 100644 --- a/storage/maria/ma_info.c +++ b/storage/maria/ma_info.c @@ -135,6 +135,7 @@ void _ma_report_error(int errcode, const char *file_name) file_name+= length - 64; } } + my_error(errcode, MYF(ME_NOREFRESH), file_name); DBUG_VOID_RETURN; } diff --git a/storage/maria/ma_init.c b/storage/maria/ma_init.c index 19b835a837f..ab62d1bfaa0 100644 --- a/storage/maria/ma_init.c +++ b/storage/maria/ma_init.c @@ -45,6 +45,7 @@ int maria_init(void) pthread_mutex_init(&THR_LOCK_maria,MY_MUTEX_INIT_SLOW); _ma_init_block_record_data(); loghandler_init(); + my_handler_error_register(); } return 0; } diff --git a/storage/maria/ma_loghandler.c b/storage/maria/ma_loghandler.c index f398ec90897..b029297d2d0 100644 --- a/storage/maria/ma_loghandler.c +++ b/storage/maria/ma_loghandler.c @@ -4143,12 +4143,12 @@ my_bool translog_write_record(LSN *lsn, { uint i; uint len= 0; -#ifdef HAVE_PURIFY +#ifdef HAVE_purify ha_checksum checksum= 0; #endif for (i= TRANSLOG_INTERNAL_PARTS; i < part_no; i++) { -#ifdef HAVE_PURIFY +#ifdef HAVE_purify /* Find unitialized bytes early */ checksum+= my_checksum(checksum, parts_data[i].str, parts_data[i].length); diff --git a/storage/maria/ma_open.c b/storage/maria/ma_open.c index b8ce6d123e7..79ff25e3c2f 100644 --- a/storage/maria/ma_open.c +++ b/storage/maria/ma_open.c @@ -260,7 +260,9 @@ MARIA_HA *maria_open(const char *name, int mode, uint open_flags) my_realpath(name_buff, fn_format(org_name,name,"",MARIA_NAME_IEXT, MY_UNPACK_FILENAME),MYF(0)); pthread_mutex_lock(&THR_LOCK_maria); - if (!(old_info=_ma_test_if_reopen(name_buff))) + old_info= 0; + if ((open_flags & HA_OPEN_COPY) || + !(old_info=_ma_test_if_reopen(name_buff))) { share= &share_buff; bzero((gptr) &share_buff,sizeof(share_buff)); @@ -586,6 +588,8 @@ MARIA_HA *maria_open(const char *name, int mode, uint open_flags) share->base.null_bytes + share->base.pack_bytes + test(share->options & HA_OPTION_CHECKSUM)); + if (open_flags & HA_OPEN_COPY) + share->base.transactional= 0; /* Repair: no logging */ if (share->base.transactional) share->base_length+= TRANS_ROW_EXTRA_HEADER_SIZE; share->base.default_rec_buff_size= max(share->base.pack_reclength, @@ -858,6 +862,8 @@ void _ma_setup_functions(register MARIA_SHARE *share) } share->file_read= _ma_nommap_pread; share->file_write= _ma_nommap_pwrite; + share->calc_check_checksum= share->calc_checksum; + if (!(share->options & HA_OPTION_CHECKSUM) && share->data_file_type != COMPRESSED_RECORD) share->calc_checksum= share->calc_write_checksum= 0; diff --git a/storage/maria/ma_pagecache.c b/storage/maria/ma_pagecache.c index 53a24e36861..ca47230cfbd 100755 --- a/storage/maria/ma_pagecache.c +++ b/storage/maria/ma_pagecache.c @@ -315,7 +315,8 @@ struct st_pagecache_block_link #ifndef DBUG_OFF /* debug checks */ static my_bool info_check_pin(PAGECACHE_BLOCK_LINK *block, - enum pagecache_page_pin mode) + enum pagecache_page_pin mode + __attribute__((unused))) { struct st_my_thread_var *thread= my_thread_var; PAGECACHE_PIN_INFO *info= info_find(block->pin_list, thread); @@ -373,6 +374,7 @@ static my_bool info_check_pin(PAGECACHE_BLOCK_LINK *block, 1 - Error */ +#ifdef NOT_USED static my_bool info_check_lock(PAGECACHE_BLOCK_LINK *block, enum pagecache_page_lock lock, enum pagecache_page_pin pin) @@ -440,7 +442,8 @@ error: page_cache_page_pin_str[pin])); DBUG_RETURN(1); } -#endif +#endif /* NOT_USED */ +#endif /* !DBUG_OFF */ #define FLUSH_CACHE 2000 /* sort this many blocks at once */ @@ -2858,8 +2861,10 @@ restart: (pin == PAGECACHE_PIN)), &page_st); DBUG_ASSERT(block->type == PAGECACHE_EMPTY_PAGE || - block->type == type); - block->type= type; + block->type == type || type == PAGECACHE_READ_UNKNOWN_PAGE); + if (type != PAGECACHE_READ_UNKNOWN_PAGE || + block->type == PAGECACHE_EMPTY_PAGE) + block->type= type; if (((block->status & PCBLOCK_ERROR) == 0) && (page_st != PAGE_READ)) { DBUG_PRINT("info", ("read block 0x%lx", (ulong)block)); @@ -3223,6 +3228,7 @@ restart: } DBUG_ASSERT(block->type == PAGECACHE_EMPTY_PAGE || + block->type == PAGECACHE_READ_UNKNOWN_PAGE || block->type == type); block->type= type; diff --git a/storage/maria/ma_test_all.sh b/storage/maria/ma_test_all.sh index 8ee326a9c69..a7c8df827ff 100755 --- a/storage/maria/ma_test_all.sh +++ b/storage/maria/ma_test_all.sh @@ -8,6 +8,9 @@ silent="-s" suffix="" #set -x -v -e +# Delete temporary files +rm -f *.TMD + run_tests() { row_type=$1 @@ -120,6 +123,11 @@ run_repair_tests() ./maria_chk$suffix -se test1 ./maria_chk$suffix -rqos --correct-checksum test1 ./maria_chk$suffix -se test1 + ./ma_test2$suffix $silent -c -d1 $row_type + ./maria_chk$suffix -s --parallel-recover test2 + ./maria_chk$suffix -se test2 + ./maria_chk$suffix -s --parallel-recover --quick test2 + ./maria_chk$suffix -se test2 } run_pack_tests() @@ -147,6 +155,15 @@ run_pack_tests() ./maria_chk$suffix -es test1 ./maria_chk$suffix -rus test1 ./maria_chk$suffix -es test1 + + ./ma_test2$suffix $silent -c -d1 $row_type + ./maria_chk$suffix -s --parallel-recover test2 + ./maria_chk$suffix -se test2 + ./maria_chk$suffix -s --parallel-recover --unpack test2 + ./maria_chk$suffix -se test2 + ./maria_pack$suffix --force -s test1 + ./maria_chk$suffix -s --parallel-recover --unpack test2 + ./maria_chk$suffix -se test2 } echo "Running tests with dynamic row format" @@ -161,9 +178,13 @@ run_pack_tests -S echo "Running tests with block row format" run_tests -M +run_repair_tests -M +run_pack_tests -M echo "Running tests with block row format and transactions" run_tests "-M -T" +run_repair_tests "-M -T" +run_pack_tests "-M -T" # # Tests that gives warnings diff --git a/storage/maria/ma_update.c b/storage/maria/ma_update.c index 737c7c909b4..913959717fc 100644 --- a/storage/maria/ma_update.c +++ b/storage/maria/ma_update.c @@ -147,6 +147,7 @@ int maria_update(register MARIA_HA *info, const byte *oldrec, byte *newrec) if (share->calc_checksum) { info->cur_row.checksum= (*share->calc_checksum)(info,newrec); + info->state->checksum+= (info->cur_row.checksum - old_checksum); /* Store new checksum in index file header */ key_changed|= HA_STATE_CHANGED; } @@ -173,8 +174,6 @@ int maria_update(register MARIA_HA *info, const byte *oldrec, byte *newrec) if (auto_key_changed) set_if_bigger(info->s->state.auto_increment, ma_retrieve_auto_increment(info, newrec)); - if (share->calc_checksum) - info->state->checksum+= (info->cur_row.checksum - old_checksum); /* We can't yet have HA_STATE_AKTIV here, as block_record dosn't support diff --git a/storage/maria/maria_chk.c b/storage/maria/maria_chk.c index 0b82a71f736..e7e0f5d40b5 100644 --- a/storage/maria/maria_chk.c +++ b/storage/maria/maria_chk.c @@ -676,14 +676,7 @@ get_one_option(int optid, check_param.testflag|= T_UPDATE_STATE; break; case '#': - if (argument == disabled_my_option) - { - DBUG_POP(); - } - else - { - DBUG_PUSH(argument ? argument : "d:t:o,/tmp/maria_chk.trace"); - } + DBUG_SET_INITIAL(argument ? argument : "d:t:o,/tmp/maria_chk.trace"); break; case 'V': print_version(); @@ -862,16 +855,25 @@ static int maria_chk(HA_CHECK *param, my_string filename) share->r_locks=0; maria_block_size= share->base.block_size; - if (share->data_file_type == BLOCK_RECORD && - (param->testflag & (T_REP_ANY | T_SORT_RECORDS | T_FAST | T_STATISTICS | - T_CHECK | T_CHECK_ONLY_CHANGED))) + if (share->data_file_type == BLOCK_RECORD || + ((param->testflag & T_UNPACK) && + share->state.header.org_data_file_type == BLOCK_RECORD)) { - _ma_check_print_error(param, - "Record format used by '%s' is is not yet supported with repair/check", - filename); - param->error_printed= 0; - error= 1; - goto end2; + if (param->testflag & T_SORT_RECORDS) + { + _ma_check_print_error(param, + "Record format used by '%s' is is not yet supported with repair/check", + filename); + param->error_printed= 0; + error= 1; + goto end2; + } + /* We can't do parallell repair with BLOCK_RECORD yet */ + if (param->testflag & (T_REP_BY_SORT | T_REP_PARALLEL)) + { + param->testflag&= ~(T_REP_BY_SORT | T_REP_PARALLEL); + param->testflag|= T_REP; + } } /* @@ -1757,11 +1759,14 @@ void _ma_check_print_info(HA_CHECK *param __attribute__((unused)), const char *fmt,...) { va_list args; + DBUG_ENTER("_ma_check_print_info"); + DBUG_PRINT("enter", ("format: %s", fmt)); va_start(args,fmt); VOID(vfprintf(stdout, fmt, args)); VOID(fputc('\n',stdout)); va_end(args); + DBUG_VOID_RETURN; } /* VARARGS */ @@ -1770,6 +1775,7 @@ void _ma_check_print_warning(HA_CHECK *param, const char *fmt,...) { va_list args; DBUG_ENTER("_ma_check_print_warning"); + DBUG_PRINT("enter", ("format: %s", fmt)); fflush(stdout); if (!param->warning_printed && !param->error_printed) @@ -1795,7 +1801,7 @@ void _ma_check_print_error(HA_CHECK *param, const char *fmt,...) { va_list args; DBUG_ENTER("_ma_check_print_error"); - DBUG_PRINT("enter",("format: %s",fmt)); + DBUG_PRINT("enter", ("format: %s", fmt)); fflush(stdout); if (!param->warning_printed && !param->error_printed) diff --git a/storage/maria/maria_def.h b/storage/maria/maria_def.h index d9e31e800c4..bd48a5288d5 100644 --- a/storage/maria/maria_def.h +++ b/storage/maria/maria_def.h @@ -30,6 +30,7 @@ #define MAX_NONMAPPED_INSERTS 1000 #define MARIA_MAX_TREE_LEVELS 32 +#define SANITY_CHECKS struct st_transaction; @@ -261,7 +262,9 @@ typedef struct st_maria_share Calculate checksum for a row during write. May be 0 if we calculate the checksum in write_record_init() */ - ha_checksum(*calc_write_checksum) (struct st_maria_info *, const byte *); + ha_checksum(*calc_write_checksum)(struct st_maria_info *, const byte *); + /* calculate checksum for a row during check table */ + ha_checksum(*calc_check_checksum)(struct st_maria_info *, const byte *); /* Compare a row in memory with a row on disk */ my_bool (*compare_unique)(struct st_maria_info *, MARIA_UNIQUEDEF *, const byte *record, MARIA_RECORD_POS pos); @@ -746,7 +749,7 @@ extern ulong _ma_rec_unpack(MARIA_HA *info, byte *to, byte *from, ulong reclength); extern my_bool _ma_rec_check(MARIA_HA *info, const char *record, byte *packpos, ulong packed_length, - my_bool with_checkum); + my_bool with_checkum, ha_checksum checksum); extern int _ma_write_part_record(MARIA_HA *info, my_off_t filepos, ulong length, my_off_t next_filepos, byte ** record, ulong *reclength, @@ -871,6 +874,7 @@ void _ma_update_status(void *param); void _ma_restore_status(void *param); void _ma_copy_status(void *to, void *from); my_bool _ma_check_status(void *param); +void _ma_reset_status(MARIA_HA *maria); extern MARIA_HA *_ma_test_if_reopen(char *filename); my_bool _ma_check_table_is_closed(const char *name, const char *where); @@ -904,9 +908,8 @@ int _ma_sort_write_record(MARIA_SORT_PARAM *sort_param); int _ma_create_index_by_sort(MARIA_SORT_PARAM *info, my_bool no_messages, ulong); int _ma_sync_table_files(const MARIA_HA *info); -int _ma_initialize_data_file(File dfile, MARIA_SHARE *share); +int _ma_initialize_data_file(MARIA_SHARE *share, File dfile); void _ma_unpin_all_pages(MARIA_HA *info, LSN undo_lsn); extern PAGECACHE *maria_log_pagecache; - diff --git a/storage/myisam/ft_stopwords.c b/storage/myisam/ft_stopwords.c index 1b6cff5e903..b95e0f4d857 100644 --- a/storage/myisam/ft_stopwords.c +++ b/storage/myisam/ft_stopwords.c @@ -51,10 +51,11 @@ static int ft_add_stopword(const char *w) int ft_init_stopwords() { + DBUG_ENTER("ft_init_stopwords"); if (!stopwords3) { if (!(stopwords3=(TREE *)my_malloc(sizeof(TREE),MYF(0)))) - return -1; + DBUG_RETURN(-1); init_tree(stopwords3,0,0,sizeof(FT_STOPWORD),(qsort_cmp2)&FT_STOPWORD_cmp, 0, (ft_stopword_file ? (tree_element_free)&FT_STOPWORD_free : 0), @@ -70,10 +71,10 @@ int ft_init_stopwords() int error=-1; if (!*ft_stopword_file) - return 0; + DBUG_RETURN(0); if ((fd=my_open(ft_stopword_file, O_RDONLY, MYF(MY_WME))) == -1) - return -1; + DBUG_RETURN(-1); len=(uint)my_seek(fd, 0L, MY_SEEK_END, MYF(0)); my_seek(fd, 0L, MY_SEEK_SET, MYF(0)); if (!(start=buffer=my_malloc(len+1, MYF(MY_WME)))) @@ -90,7 +91,7 @@ err1: my_free(buffer, MYF(0)); err0: my_close(fd, MYF(MY_WME)); - return error; + DBUG_RETURN(error); } else { @@ -100,13 +101,14 @@ err0: for (;*sws;sws++) { if (ft_add_stopword(*sws)) - return -1; + DBUG_RETURN(-1); } ft_stopword_file="(built-in)"; /* for SHOW VARIABLES */ } - return 0; + DBUG_RETURN(0); } + int is_stopword(char *word, uint len) { FT_STOPWORD sw; @@ -118,6 +120,8 @@ int is_stopword(char *word, uint len) void ft_free_stopwords() { + DBUG_ENTER("ft_free_stopwords"); + if (stopwords3) { delete_tree(stopwords3); /* purecov: inspected */ @@ -125,4 +129,5 @@ void ft_free_stopwords() stopwords3=0; } ft_stopword_file= 0; + DBUG_VOID_RETURN; } -- cgit v1.2.1 From 85cdbee3b385f940c778549c41fe815ac3aa737c Mon Sep 17 00:00:00 2001 From: unknown Date: Sun, 1 Jul 2007 20:45:01 +0300 Subject: After merge fixes BitKeeper/etc/ignore: added storage/maria/maria_read_log support-files/compiler_warnings.supp: Ignore function used when debugging (can be called from gdb) --- .bzrignore | 1 + storage/maria/ma_delete_all.c | 1 + storage/maria/maria_read_log.c | 2 +- support-files/compiler_warnings.supp | 5 +++++ 4 files changed, 8 insertions(+), 1 deletion(-) diff --git a/.bzrignore b/.bzrignore index e4b60deec56..cb4056778cf 100644 --- a/.bzrignore +++ b/.bzrignore @@ -3001,3 +3001,4 @@ storage/maria/unittest/ma_pagecache_single_64k-t-big storage/maria/maria_control storage/maria/maria_log.* storage/maria/unittest/ma_test_loghandler_long-t-big +storage/maria/maria_read_log diff --git a/storage/maria/ma_delete_all.c b/storage/maria/ma_delete_all.c index 846006787fe..c3bdcdf365c 100644 --- a/storage/maria/ma_delete_all.c +++ b/storage/maria/ma_delete_all.c @@ -142,6 +142,7 @@ void _ma_reset_status(MARIA_HA *info) info->state->data_file_length= 0; info->state->empty= info->state->key_empty= 0; info->state->checksum= 0; + share->state.create_rename_lsn= LSN_IMPOSSIBLE; /* Drop the delete key chain. */ state->key_del= HA_OFFSET_ERROR; diff --git a/storage/maria/maria_read_log.c b/storage/maria/maria_read_log.c index 568814f6f8a..5b2d5b057c2 100644 --- a/storage/maria/maria_read_log.c +++ b/storage/maria/maria_read_log.c @@ -521,7 +521,7 @@ prototype_exec_hook(REDO_CREATE_TABLE) data file does not preclude this). */ if (((info= maria_open(name, O_RDONLY, 0)) == NULL) || - _ma_initialize_data_file(dfile, info->s)) + _ma_initialize_data_file(info->s, dfile)) { fprintf(stderr, "Failed to open new table or write to data file\n"); goto err; diff --git a/support-files/compiler_warnings.supp b/support-files/compiler_warnings.supp index babc482976d..1d73e7a55cc 100644 --- a/support-files/compiler_warnings.supp +++ b/support-files/compiler_warnings.supp @@ -54,6 +54,11 @@ db_vrfy.c : .*comparison is always false due to limited range of data type.* .*/ndb/.* : .*unused variable.* .*/ndb/.* : .*defined but not used.* +# +# Maria warning that is ok in debug builds +# +storage/maria/ma_pagecache.c: .*'info_check_pin' defined but not used + # # Unexplanable (?) stuff # -- cgit v1.2.1 From 1aa9027c6ad579cbb6d991b0ddb40d339457d5a1 Mon Sep 17 00:00:00 2001 From: unknown Date: Tue, 3 Jul 2007 01:19:26 +0300 Subject: Mark the page dirty if we store LSN on it. New type of page in the page cache fixes. storage/maria/ma_pagecache.c: Mark the page dirty if we store LSN on it. Symbolic representation of new page type added (for debugging output). Asserts added (unknown type can't be used if we write the page). --- storage/maria/ma_pagecache.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/storage/maria/ma_pagecache.c b/storage/maria/ma_pagecache.c index 05173eddf46..bf66a8fd088 100755 --- a/storage/maria/ma_pagecache.c +++ b/storage/maria/ma_pagecache.c @@ -177,7 +177,8 @@ static const char *page_cache_page_type_str[]= /* used only for control page type changing during debugging */ "EMPTY", "PLAIN", - "LSN" + "LSN", + "READ_UNKNOWN" }; static const char *page_cache_page_write_mode_str[]= @@ -584,6 +585,7 @@ static uint pagecache_fwrite(PAGECACHE *pagecache, myf flags) { DBUG_ENTER("pagecache_fwrite"); + DBUG_ASSERT(type != PAGECACHE_READ_UNKNOWN_PAGE); if (type == PAGECACHE_LSN_PAGE) { LSN lsn; @@ -2457,7 +2459,12 @@ static void check_and_set_lsn(LSN lsn, PAGECACHE_BLOCK_LINK *block) (ulong)LSN_FILE_NO(old), (ulong)LSN_OFFSET(old), (ulong)LSN_FILE_NO(lsn), (ulong)LSN_OFFSET(lsn))); if (cmp_translog_addr(lsn, old) > 0) + { + + DBUG_ASSERT(block->type != PAGECACHE_READ_UNKNOWN_PAGE); lsn_store(block->buffer + PAGE_LSN_OFFSET, lsn); + block->status|= PCBLOCK_CHANGED; + } DBUG_VOID_RETURN; } @@ -3179,6 +3186,7 @@ my_bool pagecache_write_part(PAGECACHE *pagecache, page_cache_page_pin_str[pin], page_cache_page_write_mode_str[write_mode], offset, size)); + DBUG_ASSERT(type != PAGECACHE_READ_UNKNOWN_PAGE); DBUG_ASSERT(lock != PAGECACHE_LOCK_LEFT_READLOCKED); DBUG_ASSERT(lock != PAGECACHE_LOCK_READ_UNLOCK); DBUG_ASSERT(offset + size <= pagecache->block_size); -- cgit v1.2.1 From e4c2d748fa27fb67e4b7f4739961058b2d738b85 Mon Sep 17 00:00:00 2001 From: unknown Date: Tue, 3 Jul 2007 15:20:41 +0200 Subject: Maria: * Don't modify share->base.born_transactional; now it is a value carved in stone at creation time. share->now_transactional is what can be modified: it starts at born_transactional, can become false during ALTER TABLE (when we want no logging), and restored later. * Not resetting create_rename_lsn to 0 during delete_all or repair. * when we temporarily disable transactionality, we also change the page type to PAGECACHE_PLAIN_PAGE: it bypasses some work in the page cache (optimization), and avoids assertions related to LSNs. * Disable INSERT DELAYED for transactional tables, because durability could not be guaranteed (insertion may even not happen) mysys/mf_keycache.c: comment storage/maria/ha_maria.cc: * a transactional table cannot do INSERT DELAYED * ha_maria::save_transactional not needed anymore, as now instead we don't modify MARIA_SHARE::MARIA_BASE_INFO::born_transactional (born_transactional plays the role of save_transactional), and modify MARIA_SHARE::now_transactional. * REPAIR_TABLE log record is now logged by maria_repair() * comment why we rely on born_transactional to know if we should skipping a transaction. * putting together two if()s which test for F_UNLCK storage/maria/ha_maria.h: ha_maria::save_transactional not needed anymore (moved to the C layer) storage/maria/ma_blockrec.c: * For the block record's code (writing/updating/deleting records), all that counts is now_transactional, not born_transactional. * As we now set the page type to PAGECACHE_PLAIN_PAGE for tables which have now_transactional==FALSE, pagecache will not expect a meaningful LSN for them in pagecache_unlock_by_link(), so we can pass it LSN_IMPOSSIBLE. storage/maria/ma_check.c: * writing LOGREC_REPAIR_TABLE moves from ha_maria::repair() to maria_repair(), sounds cleaner (less functions to export). * when opening a table during REPAIR, don't use the realpath-ed name, as this may fail if the table has symlinked files (maria_open() would try to find the data and index file in the directory of unique_file_name, it would fail if data and index files are in different dirs); use the unresolved name, open_file_name, which is the argument which was passed to the maria_open() which created 'info'. storage/maria/ma_close.c: assert that when a statement is done with a table, it cleans up storage/maria/ma_create.c: new name storage/maria/ma_delete_all.c: * using now_transactional * no reason to reset create_rename_lsn during delete_all (a bug); also no reason to do it during repair: it was put there because a positive create_rename_lsn caused a call to check_and_set_lsn() which asserted in DBUG_ASSERT(block->type == PAGECACHE_LSN_PAGE); first solution was to use LSN_IMPOSSIBLE in _ma_unpin_all_pages() if not transactional; but then in the case of ALTER TABLE, with transactionality temporarily disabled, it asserted in DBUG_ASSERT(LSN_VALID(lsn)) in pagecache_fwrite() (PAGECACHE_LSN_PAGE page with zero LSN - bad). The additional solution is to use PAGECACHE_PLAIN_PAGE when we disable transactionality temporarily: this avoids checks on the LSN, and also bypasses (optimization) the "flush log up to LSN" call when the pagecache flushes our page (in other words, no WAL needed). storage/maria/ma_delete_table.c: use now_transactional storage/maria/ma_locking.c: assert that when a statement is done with a table, it cleans up. storage/maria/ma_loghandler.c: * now_transactional should be used to test if we want a log record. * Assertions to make sure dummy_transaction_object is not spoilt by its many users. storage/maria/ma_open.c: base.transactional -> base.born_transactional storage/maria/ma_pagecache.c: missing name for page's type. Comment for future. storage/maria/ma_rename.c: use now_transactional storage/maria/maria_chk.c: use born_transactional storage/maria/maria_def.h: MARIA_BASE_INFO::transactional renamed to born_transactional. MARIA_SHARE::now_transactional introduced. _ma_repair_write_log_record() is made local to ma_check.c. Macros to temporarily disable, and re-enable, transactionality for a table. storage/maria/maria_read_log.c: assertions and using the new macros. Adding a forgotten resetting when we finally close all tables. --- mysys/mf_keycache.c | 9 ++++- storage/maria/ha_maria.cc | 73 +++++++++++++++++++++++++---------------- storage/maria/ha_maria.h | 5 --- storage/maria/ma_blockrec.c | 26 +++++---------- storage/maria/ma_check.c | 28 ++++++++++------ storage/maria/ma_close.c | 3 +- storage/maria/ma_create.c | 2 +- storage/maria/ma_delete_all.c | 3 +- storage/maria/ma_delete_table.c | 2 +- storage/maria/ma_locking.c | 1 + storage/maria/ma_loghandler.c | 13 +++++++- storage/maria/ma_open.c | 23 +++++++++---- storage/maria/ma_pagecache.c | 11 ++++++- storage/maria/ma_rename.c | 8 ++++- storage/maria/maria_chk.c | 2 +- storage/maria/maria_def.h | 20 +++++++++-- storage/maria/maria_read_log.c | 29 +++++++++------- 17 files changed, 167 insertions(+), 91 deletions(-) diff --git a/mysys/mf_keycache.c b/mysys/mf_keycache.c index 95a9f08a07a..065c10e3d73 100644 --- a/mysys/mf_keycache.c +++ b/mysys/mf_keycache.c @@ -2456,7 +2456,14 @@ restart: } else { - /* Link the block into a list of blocks 'in switch' */ + /* + Link the block into a list of blocks 'in switch'. + Note that if there could be two concurrent flush_key_blocks_int() + on this file (normally this does not happen, as MyISAM uses + intern_lock for flushing), then the first one may move the block + into its first_in_switch, and the second one would just not see + the block and wrongly consider its job done. + */ unlink_changed(block); link_changed(block, &first_in_switch); } diff --git a/storage/maria/ha_maria.cc b/storage/maria/ha_maria.cc index 938e99375f2..232dd7e695d 100644 --- a/storage/maria/ha_maria.cc +++ b/storage/maria/ha_maria.cc @@ -479,7 +479,7 @@ handler(hton, table_arg), file(0), int_table_flags(HA_NULL_IN_KEY | HA_CAN_FULLTEXT | HA_CAN_SQL_HANDLER | HA_DUPLICATE_POS | HA_CAN_INDEX_BLOBS | HA_AUTO_PART_KEY | HA_FILE_BASED | HA_CAN_GEOMETRY | MARIA_CANNOT_ROLLBACK | - HA_CAN_INSERT_DELAYED | HA_CAN_BIT_FIELD | HA_CAN_RTREEKEYS | + HA_CAN_BIT_FIELD | HA_CAN_RTREEKEYS | HA_HAS_RECORDS | HA_STATS_RECORDS_IS_EXACT), can_enable_indexes(1) {} @@ -697,9 +697,19 @@ int ha_maria::open(const char *name, int mode, uint test_if_locked) info(HA_STATUS_NO_LOCK | HA_STATUS_VARIABLE | HA_STATUS_CONST); if (!(test_if_locked & HA_OPEN_WAIT_IF_LOCKED)) VOID(maria_extra(file, HA_EXTRA_WAIT_LOCK, 0)); - save_transactional= file->s->base.transactional; if ((data_file_type= file->s->data_file_type) != STATIC_RECORD) int_table_flags |= HA_REC_NOT_IN_SEQ; + if (!file->s->base.born_transactional) + { + /* + INSERT DELAYED cannot work with transactional tables (because it cannot + stand up to "when client gets ok the data is safe on disk": the record + may not even be inserted). In the future, we could enable it back (as a + client doing INSERT DELAYED knows the specificities; but we then should + make sure to regularly commit in the delayed_insert thread). + */ + int_table_flags|= HA_CAN_INSERT_DELAYED; + } if (file->s->options & (HA_OPTION_CHECKSUM | HA_OPTION_COMPRESS_RECORD)) int_table_flags |= HA_HAS_CHECKSUM; @@ -1178,8 +1188,6 @@ int ha_maria::repair(THD *thd, HA_CHECK ¶m, bool do_optimize) llstr(rows, llbuff), llstr(file->state->records, llbuff2)); } - if (!error) - error= _ma_repair_write_log_record(¶m, file); } else { @@ -1861,30 +1869,19 @@ int ha_maria::external_lock(THD *thd, int lock_type) { TRN *trn= THD_TRN; DBUG_ENTER("ha_maria::external_lock"); - if (!save_transactional) + /* + We don't test now_transactional because it may vary between lock/unlock + and thus confuse our reference counting. + It is critical to skip non-transactional tables: user-visible temporary + tables get an external_lock() when read/written for the first time, but no + corresponding unlock (they just stay locked and are later dropped while + locked); if a tmp table was transactional, "SELECT FROM non_tmp, tmp" + would never commit as its "locked_tables" count would stay 1. + */ + if (!file->s->base.born_transactional) goto skip_transaction; - if (!trn && lock_type != F_UNLCK) /* no transaction yet - open it now */ - { - trn= trnman_new_trn(& thd->mysys_var->mutex, - & thd->mysys_var->suspend, - thd->thread_stack + STACK_DIRECTION * - (my_thread_stack_size - STACK_MIN_SIZE)); - if (!trn) - DBUG_RETURN(HA_ERR_OUT_OF_MEM); - - DBUG_PRINT("info", ("THD_TRN set to 0x%lx", (ulong)trn)); - THD_TRN= trn; - if (thd->options & (OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN)) - trans_register_ha(thd, TRUE, maria_hton); - } if (lock_type != F_UNLCK) { - this->file->trn= trn; - if (!trnman_increment_locked_tables(trn)) - { - trans_register_ha(thd, FALSE, maria_hton); - trnman_new_statement(trn); - } if (!thd->transaction.on) { /* @@ -1896,11 +1893,32 @@ int ha_maria::external_lock(THD *thd, int lock_type) tons of archived logs to roll-forward, we could then not disable REDOs/UNDOs in this case. */ - file->s->base.transactional= FALSE; + _ma_tmp_disable_logging_for_table(file->s); + } + if (!trn) /* no transaction yet - open it now */ + { + trn= trnman_new_trn(& thd->mysys_var->mutex, + & thd->mysys_var->suspend, + thd->thread_stack + STACK_DIRECTION * + (my_thread_stack_size - STACK_MIN_SIZE)); + if (unlikely(!trn)) + DBUG_RETURN(HA_ERR_OUT_OF_MEM); + + DBUG_PRINT("info", ("THD_TRN set to 0x%lx", (ulong)trn)); + THD_TRN= trn; + if (thd->options & (OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN)) + trans_register_ha(thd, TRUE, maria_hton); + } + this->file->trn= trn; + if (!trnman_increment_locked_tables(trn)) + { + trans_register_ha(thd, FALSE, maria_hton); + trnman_new_statement(trn); } } else { + _ma_reenable_logging_for_table(file->s); this->file->trn= 0; /* TODO: remove it also in commit and rollback */ if (trn && trnman_has_locked_tables(trn)) { @@ -1921,7 +1939,6 @@ int ha_maria::external_lock(THD *thd, int lock_type) #endif } } - file->s->base.transactional= save_transactional; } skip_transaction: DBUG_RETURN(maria_lock_database(file, !table->s->tmp_table ? @@ -1932,7 +1949,7 @@ skip_transaction: int ha_maria::start_stmt(THD *thd, thr_lock_type lock_type) { TRN *trn= THD_TRN; - if (save_transactional) + if (file->s->base.born_transactional) { DBUG_ASSERT(trn); // this may be called only after external_lock() DBUG_ASSERT(trnman_has_locked_tables(trn)); diff --git a/storage/maria/ha_maria.h b/storage/maria/ha_maria.h index a2f6b190657..dd0a9594ef3 100644 --- a/storage/maria/ha_maria.h +++ b/storage/maria/ha_maria.h @@ -39,11 +39,6 @@ class ha_maria :public handler char *data_file_name, *index_file_name; enum data_file_type data_file_type; bool can_enable_indexes; - /** - @brief for temporarily disabling table's transactionality - (if THD::transaction::on is false), remember the original value here - */ - bool save_transactional; int repair(THD * thd, HA_CHECK ¶m, bool optimize); public: diff --git a/storage/maria/ma_blockrec.c b/storage/maria/ma_blockrec.c index cfa9df02102..d8694f50a68 100644 --- a/storage/maria/ma_blockrec.c +++ b/storage/maria/ma_blockrec.c @@ -581,18 +581,10 @@ void _ma_unpin_all_pages(MARIA_HA *info, LSN undo_lsn) DBUG_PRINT("info", ("undo_lsn: %lu", (ulong) undo_lsn)); /* True if not disk error */ - DBUG_ASSERT((undo_lsn != LSN_IMPOSSIBLE) || !info->s->base.transactional); + DBUG_ASSERT((undo_lsn != LSN_IMPOSSIBLE) || !info->s->now_transactional); - if (!info->s->base.transactional) - { - /* - If this is a transactional table but with transactionality temporarily - disabled (like in ALTER TABLE) we need to give a sensible LSN to pages - and not LSN_IMPOSSIBLE. If this is not a transactional table it will - reduce to LSN_IMPOSSIBLE. - */ - undo_lsn= info->s->state.create_rename_lsn; - } + if (!info->s->now_transactional) + undo_lsn= LSN_IMPOSSIBLE; /* don't try to set a LSN on pages */ while (pinned_page-- != page_link) pagecache_unlock_by_link(info->s->pagecache, pinned_page->link, @@ -1446,7 +1438,7 @@ static my_bool free_full_page_range(MARIA_HA *info, ulonglong page, uint count) page, count, PAGECACHE_LOCK_WRITE, 0)) res= 1; - if (info->s->base.transactional) + if (info->s->now_transactional) { LSN lsn; DBUG_ASSERT(info->trn->rec_lsn); @@ -1953,7 +1945,7 @@ static my_bool write_block_record(MARIA_HA *info, head_block+1, bitmap_blocks->count - 1); } - if (share->base.transactional) + if (share->now_transactional) { uchar log_data[FILEID_STORE_SIZE + PAGE_STORE_SIZE + DIRPOS_STORE_SIZE]; LEX_STRING log_array[TRANSLOG_INTERNAL_PARTS + 2]; @@ -1998,7 +1990,7 @@ static my_bool write_block_record(MARIA_HA *info, else push_dynamic(&info->pinned_pages, (void*) &page_link); - if (share->base.transactional && (tmp_data_used || blob_full_pages_exists)) + if (share->now_transactional && (tmp_data_used || blob_full_pages_exists)) { /* Log REDO writes for all full pages (head part and all blobs) @@ -2095,7 +2087,7 @@ static my_bool write_block_record(MARIA_HA *info, } /* Write UNDO record */ - if (share->base.transactional) + if (share->now_transactional) { uchar log_data[LSN_STORE_SIZE + FILEID_STORE_SIZE + PAGE_STORE_SIZE + DIRPOS_STORE_SIZE]; @@ -2312,7 +2304,7 @@ my_bool _ma_write_abort_block_record(MARIA_HA *info) } } - if (info->s->base.transactional) + if (info->s->now_transactional) { LSN lsn; LEX_STRING log_array[TRANSLOG_INTERNAL_PARTS + 1]; @@ -2671,7 +2663,7 @@ my_bool _ma_delete_block_record(MARIA_HA *info, const byte *record) if (info->cur_row.extents && free_full_pages(info, &info->cur_row)) goto err; - if (info->s->base.transactional) + if (info->s->now_transactional) { LSN lsn; uchar log_data[LSN_STORE_SIZE + FILEID_STORE_SIZE + PAGE_STORE_SIZE + diff --git a/storage/maria/ma_check.c b/storage/maria/ma_check.c index bbe7e6a193a..ae23e64575b 100644 --- a/storage/maria/ma_check.c +++ b/storage/maria/ma_check.c @@ -91,6 +91,7 @@ static int _ma_safe_scan_block_record(MARIA_SORT_INFO *sort_info, MARIA_HA *info, byte *record); static void copy_data_file_state(MARIA_STATE_INFO *to, MARIA_STATE_INFO *from); +static int write_log_record_for_repair(const HA_CHECK *param, MARIA_HA *info); void maria_chk_init(HA_CHECK *param) @@ -1952,6 +1953,8 @@ int maria_repair(HA_CHECK *param, register MARIA_HA *info, MARIA_SORT_PARAM sort_param; my_bool block_record, scan_inited= 0; enum data_file_type org_data_file_type= info->s->data_file_type; + myf sync_dir= ((share->now_transactional && !share->temporary) ? + MY_SYNC_DIR : 0); DBUG_ENTER("maria_repair"); bzero((char *)&sort_info, sizeof(sort_info)); @@ -1999,7 +2002,15 @@ int maria_repair(HA_CHECK *param, register MARIA_HA *info, share->state.header.org_data_file_type == BLOCK_RECORD)) { MARIA_HA *new_info; - if (!(sort_info.new_info= maria_open(info->s->unique_file_name, O_RDWR, + /** + @todo RECOVERY it's a bit worrying to have two MARIA_SHARE on the + same index file: + - Checkpoint will see them as two tables + - are we sure that new_info never flushes an in-progress state + to the index file? And how to prevent Checkpoint from doing that? + - in the close future maria_close() will write the state... + */ + if (!(sort_info.new_info= maria_open(info->s->open_file_name, O_RDWR, HA_OPEN_COPY | HA_OPEN_FOR_REPAIR))) goto err; new_info= sort_info.new_info; @@ -2174,8 +2185,6 @@ int maria_repair(HA_CHECK *param, register MARIA_HA *info, if (!rep_quick) { - myf sync_dir= ((share->base.transactional && !share->temporary) ? - MY_SYNC_DIR : 0); if (sort_info.new_info != sort_info.info) { MARIA_STATE_INFO save_state= sort_info.new_info->s->state; @@ -2223,7 +2232,7 @@ int maria_repair(HA_CHECK *param, register MARIA_HA *info, llstr(sort_info.dupp,llbuff)); } - got_error=0; + got_error= sync_dir ? write_log_record_for_repair(param, info) : 0; /* If invoked by external program that uses thr_lock */ if (&share->state.state != info->state) memcpy( &share->state.state, info->state, sizeof(*info->state)); @@ -2424,7 +2433,7 @@ int maria_sort_index(HA_CHECK *param, register MARIA_HA *info, my_string name) int old_lock; MARIA_SHARE *share=info->s; MARIA_STATE_INFO old_state; - myf sync_dir= (share->base.transactional && !share->temporary) ? + myf sync_dir= (share->now_transactional && !share->temporary) ? MY_SYNC_DIR : 0; DBUG_ENTER("maria_sort_index"); @@ -2702,7 +2711,7 @@ int maria_repair_by_sort(HA_CHECK *param, register MARIA_HA *info, char llbuff[22]; MARIA_SORT_INFO sort_info; ulonglong key_map=share->state.key_map; - myf sync_dir= ((share->base.transactional && !share->temporary) ? + myf sync_dir= ((share->now_transactional && !share->temporary) ? MY_SYNC_DIR : 0); DBUG_ENTER("maria_repair_by_sort"); @@ -3127,7 +3136,7 @@ int maria_repair_parallel(HA_CHECK *param, register MARIA_HA *info, MARIA_SORT_INFO sort_info; ulonglong key_map=share->state.key_map; pthread_attr_t thr_attr; - myf sync_dir= (share->base.transactional && !share->temporary) ? + myf sync_dir= (share->now_transactional && !share->temporary) ? MY_SYNC_DIR : 0; DBUG_ENTER("maria_repair_parallel"); @@ -5487,11 +5496,10 @@ read_next_page: @retval 1 error (disk problem) */ -int _ma_repair_write_log_record(const HA_CHECK *param, MARIA_HA *info) +static int write_log_record_for_repair(const HA_CHECK *param, MARIA_HA *info) { MARIA_SHARE *share= info->s; - /* Only called from ha_maria.cc, not maria_check, so translog is inited */ - if (share->base.transactional && !share->temporary) + if (translog_inited) /* test it in case this is maria_chk */ { /* For now this record is only informative. It could serve when applying diff --git a/storage/maria/ma_close.c b/storage/maria/ma_close.c index 19f94aa3b56..4fec7359d66 100644 --- a/storage/maria/ma_close.c +++ b/storage/maria/ma_close.c @@ -108,7 +108,8 @@ int maria_close(register MARIA_HA *info) } } #endif - my_free((gptr) info->s,MYF(0)); + DBUG_ASSERT(share->now_transactional == share->base.born_transactional); + my_free((gptr) share, MYF(0)); } pthread_mutex_unlock(&THR_LOCK_maria); if (info->ftparser_param) diff --git a/storage/maria/ma_create.c b/storage/maria/ma_create.c index 997ce13ca27..2098d7119eb 100644 --- a/storage/maria/ma_create.c +++ b/storage/maria/ma_create.c @@ -259,7 +259,7 @@ int maria_create(const char *name, enum data_file_type datafile_type, } share.base.null_bytes= ci->null_bytes; share.base.original_null_bytes= ci->null_bytes; - share.base.transactional= ci->transactional; + share.base.born_transactional= ci->transactional; share.base.max_field_lengths= max_field_lengths; share.base.field_offsets= 0; /* for future */ diff --git a/storage/maria/ma_delete_all.c b/storage/maria/ma_delete_all.c index c3bdcdf365c..42e7fb3c2f9 100644 --- a/storage/maria/ma_delete_all.c +++ b/storage/maria/ma_delete_all.c @@ -46,7 +46,7 @@ int maria_delete_all_rows(MARIA_HA *info) */ if (_ma_readinfo(info,F_WRLCK,1)) DBUG_RETURN(my_errno); - log_record= share->base.transactional && !share->temporary; + log_record= share->now_transactional && !share->temporary; if (_ma_mark_file_changed(info)) goto err; @@ -142,7 +142,6 @@ void _ma_reset_status(MARIA_HA *info) info->state->data_file_length= 0; info->state->empty= info->state->key_empty= 0; info->state->checksum= 0; - share->state.create_rename_lsn= LSN_IMPOSSIBLE; /* Drop the delete key chain. */ state->key_del= HA_OFFSET_ERROR; diff --git a/storage/maria/ma_delete_table.c b/storage/maria/ma_delete_table.c index 39a286ad1f7..6d6b9d032fd 100644 --- a/storage/maria/ma_delete_table.c +++ b/storage/maria/ma_delete_table.c @@ -64,7 +64,7 @@ int maria_delete_table(const char *name) raid_type= info->s->base.raid_type; raid_chunks= info->s->base.raid_chunks; #endif - sync_dir= (info->s->base.transactional && !info->s->temporary) ? + sync_dir= (info->s->now_transactional && !info->s->temporary) ? MY_SYNC_DIR : 0; maria_close(info); } diff --git a/storage/maria/ma_locking.c b/storage/maria/ma_locking.c index abb095d47c2..1825367c44c 100644 --- a/storage/maria/ma_locking.c +++ b/storage/maria/ma_locking.c @@ -129,6 +129,7 @@ int maria_lock_database(MARIA_HA *info, int lock_type) } info->opt_flag&= ~(READ_CACHE_USED | WRITE_CACHE_USED); info->lock_type= F_UNLCK; + DBUG_ASSERT(share->now_transactional == share->base.born_transactional); break; case F_RDLCK: if (info->lock_type == F_WRLCK) diff --git a/storage/maria/ma_loghandler.c b/storage/maria/ma_loghandler.c index dc524d858e7..6195e552185 100644 --- a/storage/maria/ma_loghandler.c +++ b/storage/maria/ma_loghandler.c @@ -4263,7 +4263,7 @@ my_bool translog_write_record(LSN *lsn, if (share) { - if (!share->base.transactional) + if (!share->now_transactional) { DBUG_PRINT("info", ("It is not transactional table")); DBUG_RETURN(0); @@ -5614,6 +5614,16 @@ static my_bool write_hook_for_redo(enum translog_record_type type struct st_translog_parts *parts __attribute__ ((unused))) { + /* + Users of dummy_transaction_object must keep this TRN clean as it + is used by many threads (like those manipulating non-transactional + tables). It might be dangerous if one user sets rec_lsn or some other + member and it is picked up by another user (like putting this rec_lsn into + a page of a non-transactional table); it's safer if all members stay 0. So + non-transactional log records (REPAIR, CREATE, RENAME, DROP) should not + call this hook; we trust them but verify ;) + */ + DBUG_ASSERT(trn->trid != 0); /* If the hook stays so simple, it would be faster to pass !trn->rec_lsn ? trn->rec_lsn : some_dummy_lsn @@ -5640,6 +5650,7 @@ static my_bool write_hook_for_undo(enum translog_record_type type struct st_translog_parts *parts __attribute__ ((unused))) { + DBUG_ASSERT(trn->trid != 0); /* see write_hook_for_redo() */ trn->undo_lsn= *lsn; if (unlikely(LSN_WITH_FLAGS_TO_LSN(trn->first_undo_lsn) == 0)) trn->first_undo_lsn= diff --git a/storage/maria/ma_open.c b/storage/maria/ma_open.c index cb05ab5b5f0..eb0bba7503f 100644 --- a/storage/maria/ma_open.c +++ b/storage/maria/ma_open.c @@ -589,9 +589,17 @@ MARIA_HA *maria_open(const char *name, int mode, uint open_flags) share->base.pack_bytes + test(share->options & HA_OPTION_CHECKSUM)); if (open_flags & HA_OPEN_COPY) - share->base.transactional= 0; /* Repair: no logging */ - if (share->base.transactional) { + /* + this instance will be a temporary one used just to create a data + file for REPAIR. Don't do logging. This base information will not go + to disk. + */ + share->base.born_transactional= FALSE; + } + if (share->base.born_transactional) + { + share->page_type= PAGECACHE_LSN_PAGE; share->base_length+= TRANS_ROW_EXTRA_HEADER_SIZE; if (unlikely((share->state.create_rename_lsn == (LSN)ULONGLONG_MAX) && (open_flags & HA_OPEN_FROM_SQL_LAYER))) @@ -604,11 +612,12 @@ MARIA_HA *maria_open(const char *name, int mode, uint open_flags) _ma_update_create_rename_lsn_on_disk(share, TRUE); } } + else + share->page_type= PAGECACHE_PLAIN_PAGE; + share->now_transactional= share->base.born_transactional; + share->base.default_rec_buff_size= max(share->base.pack_reclength, share->base.max_key_length); - share->page_type= (share->base.transactional ? PAGECACHE_LSN_PAGE : - PAGECACHE_PLAIN_PAGE); - if (share->data_file_type == DYNAMIC_RECORD) { share->base.extra_rec_buff_size= @@ -1124,7 +1133,7 @@ uint _ma_base_info_write(File file, MARIA_BASE_INFO *base) *ptr++= base->key_reflength; *ptr++= base->keys; *ptr++= base->auto_key; - *ptr++= base->transactional; + *ptr++= base->born_transactional; *ptr++= 0; /* Reserved */ mi_int2store(ptr,base->pack_bytes); ptr+= 2; mi_int2store(ptr,base->blobs); ptr+= 2; @@ -1167,7 +1176,7 @@ static byte *_ma_base_info_read(byte *ptr, MARIA_BASE_INFO *base) base->key_reflength= *ptr++; base->keys= *ptr++; base->auto_key= *ptr++; - base->transactional= *ptr++; + base->born_transactional= *ptr++; ptr++; base->pack_bytes= mi_uint2korr(ptr); ptr+= 2; base->blobs= mi_uint2korr(ptr); ptr+= 2; diff --git a/storage/maria/ma_pagecache.c b/storage/maria/ma_pagecache.c index 05173eddf46..994da92e0e9 100755 --- a/storage/maria/ma_pagecache.c +++ b/storage/maria/ma_pagecache.c @@ -177,7 +177,8 @@ static const char *page_cache_page_type_str[]= /* used only for control page type changing during debugging */ "EMPTY", "PLAIN", - "LSN" + "LSN", + "UNKNOWN" }; static const char *page_cache_page_write_mode_str[]= @@ -3649,6 +3650,14 @@ restart: ("changed_blocks") though it's still dirty (the flush by another thread has not yet happened). Checkpoint will miss the page and so must be blocked until that flush has happened. + Note that if there are two concurrent + flush_pagecache_blocks_int() on this file, then the first one may + move the block into its first_in_switch, and the second one would + just not see the block and wrongly consider its job done. + @todo RECOVERY Maria does protect such flushes with intern_lock, + but Checkpoint does not (Checkpoint makes sure that + changed_blocks_is_incomplete is 0 when it starts, but as + flush_cached_blocks() releases mutex, this may change... */ /** @todo RECOVERY: check all places where we remove a page from the diff --git a/storage/maria/ma_rename.c b/storage/maria/ma_rename.c index 8f42a5b931a..9dd75705229 100644 --- a/storage/maria/ma_rename.c +++ b/storage/maria/ma_rename.c @@ -56,7 +56,13 @@ int maria_rename(const char *old_name, const char *new_name) raid_chunks = share->base.raid_chunks; #endif - sync_dir= (share->base.transactional && !share->temporary) ? + /* + the renaming of an internal table to the final table (like in ALTER TABLE) + is the moment when this table receives its correct create_rename_lsn and + this is important; make sure transactionality has been re-enabled. + */ + DBUG_ASSERT(share->now_transactional == share->base.born_transactional); + sync_dir= (share->now_transactional && !share->temporary) ? MY_SYNC_DIR : 0; if (sync_dir) { diff --git a/storage/maria/maria_chk.c b/storage/maria/maria_chk.c index 3c93b39509f..37f6f1fb49b 100644 --- a/storage/maria/maria_chk.c +++ b/storage/maria/maria_chk.c @@ -1033,7 +1033,7 @@ static int maria_chk(HA_CHECK *param, my_string filename) know what the log's end LSN is now, so we just let the server know that it will have to find and store it. */ - if (share->base.transactional) + if (share->base.born_transactional) share->state.create_rename_lsn= (LSN)ULONGLONG_MAX; if ((param->testflag & (T_REP_BY_SORT | T_REP_PARALLEL)) && (maria_is_any_key_active(share->state.key_map) || diff --git a/storage/maria/maria_def.h b/storage/maria/maria_def.h index 5f508da213d..e46b120bf3f 100644 --- a/storage/maria/maria_def.h +++ b/storage/maria/maria_def.h @@ -171,8 +171,11 @@ typedef struct st_ma_base_info /* The following are from the header */ uint key_parts, all_key_parts; - /* If false, we disable logging, versioning, transaction etc */ - my_bool transactional; + /** + @brief If false, we disable logging, versioning, transaction etc. Observe + difference with MARIA_SHARE::now_transactional + */ + my_bool born_transactional; } MARIA_BASE_INFO; @@ -306,6 +309,13 @@ typedef struct st_maria_share not_flushed, concurrent_insert; my_bool delay_key_write; my_bool have_rtree; + /** + @brief if the table is transactional right now. It may have been created + transactional (base.born_transactional==TRUE) but with transactionality + (logging) temporarily disabled (now_transactional==FALSE). The opposite + (FALSE, TRUE) is impossible. + */ + my_bool now_transactional; #ifdef THREAD THR_LOCK lock; pthread_mutex_t intern_lock; /* Locking for use with _locking */ @@ -891,7 +901,6 @@ MARIA_RECORD_POS _ma_write_init_default(MARIA_HA *info, const byte *record); my_bool _ma_write_abort_default(MARIA_HA *info); C_MODE_START -int _ma_repair_write_log_record(const HA_CHECK *param, MARIA_HA *info); /* Functions needed by _ma_check (are overrided in MySQL) */ volatile int *_ma_killed_ptr(HA_CHECK *param); void _ma_check_print_error _VARARGS((HA_CHECK *param, const char *fmt, ...)); @@ -916,5 +925,10 @@ int _ma_initialize_data_file(MARIA_SHARE *share, File dfile); int _ma_update_create_rename_lsn_on_disk(MARIA_SHARE *share, my_bool do_sync); void _ma_unpin_all_pages(MARIA_HA *info, LSN undo_lsn); +#define _ma_tmp_disable_logging_for_table(S) \ + { (S)->now_transactional= FALSE; (S)->page_type= PAGECACHE_PLAIN_PAGE; } +#define _ma_reenable_logging_for_table(S) \ + { if (((S)->now_transactional= (S)->base.born_transactional)) \ + (S)->page_type= PAGECACHE_LSN_PAGE; } extern PAGECACHE *maria_log_pagecache; diff --git a/storage/maria/maria_read_log.c b/storage/maria/maria_read_log.c index 5b2d5b057c2..55f6c9f0cdf 100644 --- a/storage/maria/maria_read_log.c +++ b/storage/maria/maria_read_log.c @@ -442,8 +442,11 @@ prototype_exec_hook(REDO_CREATE_TABLE) info= maria_open(name, O_RDONLY, HA_OPEN_FOR_REPAIR); if (info) { - DBUG_ASSERT(info->s->reopen == 1); /* check that we're not using it */ - if (!info->s->base.transactional) + MARIA_SHARE *share= info->s; + /* check that we're not already using it */ + DBUG_ASSERT(share->reopen == 1); + DBUG_ASSERT(share->now_transactional == share->base.born_transactional); + if (!share->base.born_transactional) { /* could be that transactional table was later dropped, and a non-trans @@ -454,7 +457,7 @@ prototype_exec_hook(REDO_CREATE_TABLE) DBUG_ASSERT(0); /* I want to know this */ goto end; } - if (cmp_translog_addr(info->s->state.create_rename_lsn, rec->lsn) >= 0) + if (cmp_translog_addr(share->state.create_rename_lsn, rec->lsn) >= 0) { printf(", has create_rename_lsn (%lu,0x%lx) is more recent than record", (ulong) LSN_FILE_NO(rec->lsn), @@ -551,6 +554,7 @@ prototype_exec_hook(FILE_ID) int error; char *name, *buff; MARIA_HA *info= NULL; + MARIA_SHARE *share; if (((buff= my_malloc(rec->record_length, MYF(MY_WME))) == NULL) || (translog_read_record(rec->lsn, 0, rec->record_length, buff, NULL) != rec->record_length)) @@ -566,7 +570,7 @@ prototype_exec_hook(FILE_ID) { printf(", closing table '%s'", info->s->open_file_name); all_tables[sid]= NULL; - info->s->base.transactional= TRUE; /* put back the truth */ + _ma_reenable_logging_for_table(info->s); /* put back the truth */ if (maria_close(info)) { fprintf(stderr, "Failed to close table\n"); @@ -586,19 +590,19 @@ prototype_exec_hook(FILE_ID) fprintf(stderr, "Table is crashed, can't apply log records to it\n"); goto err; } - DBUG_ASSERT(info->s->reopen == 1); /* should always be only one instance */ - if (!info->s->base.transactional) + share= info->s; + /* check that we're not already using it */ + DBUG_ASSERT(share->reopen == 1); + DBUG_ASSERT(share->now_transactional == share->base.born_transactional); + if (!share->base.born_transactional) { printf(", is not transactional\n"); DBUG_ASSERT(0); /* I want to know this */ goto end; } all_tables[sid]= info; - /* - don't log any records for this work. TODO make sure this variable does not - go to disk before we restore it to its true value. - */ - info->s->base.transactional= FALSE; + /* don't log any records for this work */ + _ma_tmp_disable_logging_for_table(share); printf(", opened\n"); error= 0; goto end; @@ -742,7 +746,10 @@ static void end_of_redo_phase() { MARIA_HA *info= all_tables[sid]; if (info != NULL) + { + _ma_reenable_logging_for_table(info->s); /* put back the truth */ maria_close(info); + } } } } -- cgit v1.2.1 From e3bfb8974ea845cad184f96fb7393a4ce7e654f2 Mon Sep 17 00:00:00 2001 From: unknown Date: Tue, 3 Jul 2007 16:00:05 +0200 Subject: Marking the block dirty requires linking it into the changed_blocks[] list (for flush_pagecache*() functions and Checkpoint to see it) --- storage/maria/ma_pagecache.c | 40 ++++++++++++++++++---------------------- 1 file changed, 18 insertions(+), 22 deletions(-) diff --git a/storage/maria/ma_pagecache.c b/storage/maria/ma_pagecache.c index 6add7231e6f..50dde101c0d 100755 --- a/storage/maria/ma_pagecache.c +++ b/storage/maria/ma_pagecache.c @@ -2440,16 +2440,16 @@ static void read_block(PAGECACHE *pagecache, } -/* - Set LSN on the page to the given one if the given LSN is bigger +/** + @brief Set LSN on the page to the given one if the given LSN is bigger - SYNOPSIS - check_and_set_lsn() - lsn LSN to set - block block to check and set + @param pagecache pointer to a page cache data structure + @param lsn LSN to set + @param block block to check and set */ -static void check_and_set_lsn(LSN lsn, PAGECACHE_BLOCK_LINK *block) +static void check_and_set_lsn(PAGECACHE *pagecache, + LSN lsn, PAGECACHE_BLOCK_LINK *block) { LSN old; DBUG_ENTER("check_and_set_lsn"); @@ -2463,7 +2463,9 @@ static void check_and_set_lsn(LSN lsn, PAGECACHE_BLOCK_LINK *block) DBUG_ASSERT(block->type != PAGECACHE_READ_UNKNOWN_PAGE); lsn_store(block->buffer + PAGE_LSN_OFFSET, lsn); - block->status|= PCBLOCK_CHANGED; + /* we stored LSN in page so we dirtied it */ + if (!(block->status & PCBLOCK_CHANGED)) + link_to_changed_list(pagecache, block); } DBUG_VOID_RETURN; } @@ -2537,10 +2539,8 @@ void pagecache_unlock(PAGECACHE *pagecache, if (block->rec_lsn == 0) block->rec_lsn= first_REDO_LSN_for_page; } - if (lsn != 0) - { - check_and_set_lsn(lsn, block); - } + if (lsn != LSN_IMPOSSIBLE) + check_and_set_lsn(pagecache, lsn, block); if (make_lock_and_pin(pagecache, block, lock, pin)) { @@ -2600,10 +2600,8 @@ void pagecache_unpin(PAGECACHE *pagecache, DBUG_ASSERT(block != 0); DBUG_ASSERT(page_st == PAGE_READ); - if (lsn != 0) - { - check_and_set_lsn(lsn, block); - } + if (lsn != LSN_IMPOSSIBLE) + check_and_set_lsn(pagecache, lsn, block); /* we can just unpin only with keeping read lock because: @@ -2700,7 +2698,7 @@ void pagecache_unlock_by_link(PAGECACHE *pagecache, block->rec_lsn= first_REDO_LSN_for_page; } if (lsn != LSN_IMPOSSIBLE) - check_and_set_lsn(lsn, block); + check_and_set_lsn(pagecache, lsn, block); if (make_lock_and_pin(pagecache, block, lock, pin)) DBUG_ASSERT(0); /* should not happend */ @@ -2754,10 +2752,8 @@ void pagecache_unpin_by_link(PAGECACHE *pagecache, inc_counter_for_resize_op(pagecache); - if (lsn != 0) - { - check_and_set_lsn(lsn, block); - } + if (lsn != LSN_IMPOSSIBLE) + check_and_set_lsn(pagecache, lsn, block); /* We can just unpin only with keeping read lock because: @@ -3920,7 +3916,7 @@ my_bool pagecache_collect_changed_blocks_with_lsn(PAGECACHE *pagecache, ptr+= 4; lsn_store(ptr, block->rec_lsn); ptr+= LSN_STORE_SIZE; - if (block->rec_lsn != 0) + if (block->rec_lsn != LSN_IMPOSSIBLE) { if (cmp_translog_addr(block->rec_lsn, minimum_rec_lsn) < 0) minimum_rec_lsn= block->rec_lsn; -- cgit v1.2.1 From 53218beea793726eb7bbf31074fd18a24a63b69a Mon Sep 17 00:00:00 2001 From: unknown Date: Tue, 3 Jul 2007 16:36:13 +0200 Subject: Verify that INSERT DELAYED is disabled only for transactional Maria tables mysql-test/r/maria.result: result update (we see DELAYED is refused only for transactional Maria tables) mysql-test/t/maria.test: verify that INSERT DELAYED is disabled only for transactional Maria tables --- mysql-test/r/maria.result | 24 ++++++++++++++++++++++++ mysql-test/t/maria.test | 18 ++++++++++++++++++ 2 files changed, 42 insertions(+) diff --git a/mysql-test/r/maria.result b/mysql-test/r/maria.result index a1c49fdfe4f..6303f498e36 100644 --- a/mysql-test/r/maria.result +++ b/mysql-test/r/maria.result @@ -1832,3 +1832,27 @@ t1 CREATE TABLE `t1` ( `a` int(11) DEFAULT NULL ) ENGINE=MARIA DEFAULT CHARSET=latin1 ROW_FORMAT=PAGE drop table t1; +create table t1 (a int) row_format=page; +insert delayed into t1 values(1); +ERROR HY000: Table storage engine for 't1' doesn't have this option +drop table t1; +create table t1 (a int) row_format=page transactional=0; +insert delayed into t1 values(1); +flush table t1; +select * from t1; +a +1 +select count(*) from t1; +count(*) +1 +drop table t1; +create table t1 (a int) row_format=dynamic; +insert delayed into t1 values(1); +flush table t1; +select * from t1; +a +1 +select count(*) from t1; +count(*) +1 +drop table t1; diff --git a/mysql-test/t/maria.test b/mysql-test/t/maria.test index 25000abd426..f03d744f850 100644 --- a/mysql-test/t/maria.test +++ b/mysql-test/t/maria.test @@ -1118,6 +1118,24 @@ create table t1 (a int) row_format=PAGE; show create table t1; drop table t1; +# Verify that INSERT DELAYED is disabled only for transactional tables +create table t1 (a int) row_format=page; +--error ER_ILLEGAL_HA +insert delayed into t1 values(1); +drop table t1; +create table t1 (a int) row_format=page transactional=0; +insert delayed into t1 values(1); +flush table t1; +select * from t1; +select count(*) from t1; +drop table t1; +create table t1 (a int) row_format=dynamic; +insert delayed into t1 values(1); +flush table t1; +select * from t1; +select count(*) from t1; +drop table t1; + # End of 5.2 tests --disable_result_log -- cgit v1.2.1 From 8116cd23509e8dc77d0d9a52ae25164c09f8f39a Mon Sep 17 00:00:00 2001 From: unknown Date: Wed, 4 Jul 2007 00:50:17 +0300 Subject: Implement applying of REDO entries for - LOGREC_REDO_INSERT_ROW_HEAD - LOGREC_REDO_INSERT_ROW_TAIL - LOGREC_REDO_PURGE_ROW_HEAD - LOGREC_REDO_PURGE_ROW_TAIL sql/sql_yacc.yy: Fixed typo in previous push storage/maria/ma_bitmap.c: Ensure we flush the new bitmap on close storage/maria/ma_blockrec.c: Implement applying of REDO entries for - LOGREC_REDO_INSERT_ROW_HEAD - LOGREC_REDO_INSERT_ROW_TAIL - LOGREC_REDO_PURGE_ROW_HEAD - LOGREC_REDO_PURGE_ROW_TAIL Split some functions into subfunctions to be able to reuse code storage/maria/ma_blockrec.h: Added prototypes for REDO applying functions storage/maria/ma_loghandler.h: Safety fix storage/maria/ma_loghandler_lsn.h: Avoid compiler warnings storage/maria/maria_read_log.c: Added hocks for: - REDO_INSERT_ROW_HEAD - REDO_INSERT_ROW_TAIL - REDO_PURGE_ROW_HEAD - REDO_PURGE_ROW_TAIL Added dummy hooks for: - UNDO_ROW_INSERT - UNDO_ROW_DELETE Changed to use maria_pagecache instead of own pagecache (fixed problem with unitialized share->pagecache) Use maria_panic() at end to ensure that all files are closed properly. Fixed option handling for --debug --- sql/sql_yacc.yy | 2 +- storage/maria/ma_bitmap.c | 2 +- storage/maria/ma_blockrec.c | 493 ++++++++++++++++++++++++++++++++------ storage/maria/ma_blockrec.h | 8 + storage/maria/ma_loghandler.h | 2 +- storage/maria/ma_loghandler_lsn.h | 2 +- storage/maria/maria_read_log.c | 272 +++++++++++++++++++-- 7 files changed, 677 insertions(+), 104 deletions(-) diff --git a/sql/sql_yacc.yy b/sql/sql_yacc.yy index 0653863cc73..1bf198b5b4a 100644 --- a/sql/sql_yacc.yy +++ b/sql/sql_yacc.yy @@ -4280,7 +4280,7 @@ row_types: | DYNAMIC_SYM { $$= ROW_TYPE_DYNAMIC; } | COMPRESSED_SYM { $$= ROW_TYPE_COMPRESSED; } | REDUNDANT_SYM { $$= ROW_TYPE_REDUNDANT; } - | COMPACT_SYM { $$= ROW_TYPE_COMPACT; }; + | COMPACT_SYM { $$= ROW_TYPE_COMPACT; } | PAGE_SYM { $$= ROW_TYPE_PAGE; }; merge_insert_types: diff --git a/storage/maria/ma_bitmap.c b/storage/maria/ma_bitmap.c index f6a8172935f..3376f4abf2c 100644 --- a/storage/maria/ma_bitmap.c +++ b/storage/maria/ma_bitmap.c @@ -296,7 +296,7 @@ void _ma_bitmap_delete_all(MARIA_SHARE *share) { bzero(bitmap->map, share->block_size); memcpy(bitmap->map + share->block_size - 2, maria_bitmap_marker, 2); - bitmap->changed= 0; + bitmap->changed= 1; bitmap->page= 0; bitmap->used_size= bitmap->total_size; } diff --git a/storage/maria/ma_blockrec.c b/storage/maria/ma_blockrec.c index cfa9df02102..453af37089d 100644 --- a/storage/maria/ma_blockrec.c +++ b/storage/maria/ma_blockrec.c @@ -868,7 +868,7 @@ static void calc_record_size(MARIA_HA *info, const byte *record, compact_page() buff Page to compact block_size Size of page - recnr Put empty data after this row + rownr Put empty data after this row extend_block If 1, extend the block at 'rownr' to cover the whole block. */ @@ -980,6 +980,13 @@ static void compact_page(byte *buff, uint block_size, uint rownr, uint length= (uint) (dir - buff) - start_of_found_block; int2store(dir+2, length); } + else + { + /* + TODO: + Update (buff + EMPTY_SPACE_OFFSET) if we remove transid from rows + */ + } buff[PAGE_TYPE_OFFSET]&= ~(byte) PAGE_CAN_BE_COMPACTED; } DBUG_EXECUTE("directory", _ma_print_directory(buff, block_size);); @@ -987,6 +994,37 @@ static void compact_page(byte *buff, uint block_size, uint rownr, } +/* + Create an empty tail or head page + + SYNOPSIS + make_empty_page() + buff Page buffer + block_size Block size + page_type HEAD_PAGE or TAIL_PAGE + + NOTES + EMPTY_SPACE is not updated +*/ + +static void make_empty_page(byte *buff, uint block_size, uint page_type) +{ + + bzero(buff, PAGE_HEADER_SIZE); + /* + We zero the rest of the block to avoid getting old memory information + to disk and to allow the file to be compressed better if archived. + The rest of the code does not assume the block is zeroed above + PAGE_OVERHEAD_SIZE + */ + bzero(buff+ PAGE_HEADER_SIZE, block_size - PAGE_HEADER_SIZE); + buff[PAGE_TYPE_OFFSET]= (byte) page_type; + buff[DIR_COUNT_OFFSET]= 1; + /* Store position to the first row */ + int2store(buff + block_size - PAGE_SUFFIX_SIZE - DIR_ENTRY_SIZE, + PAGE_HEADER_SIZE); +} + /* Read or initialize new head or tail page @@ -1019,6 +1057,7 @@ struct st_row_pos_info uint empty_space; /* Space left on page */ }; + static my_bool get_head_or_tail_page(MARIA_HA *info, MARIA_BITMAP_BLOCK *block, byte *buff, uint length, uint page_type, @@ -1035,25 +1074,12 @@ static my_bool get_head_or_tail_page(MARIA_HA *info, if (block->org_bitmap_value == 0) /* Empty block */ { /* New page */ - bzero(buff, PAGE_HEADER_SIZE); - - /* - We zero the rest of the block to avoid getting old memory information - to disk and to allow the file to be compressed better if archived. - The rest of the code does not assume the block is zeroed above - PAGE_OVERHEAD_SIZE - */ - bzero(buff+ PAGE_HEADER_SIZE, block_size - PAGE_HEADER_SIZE); - - buff[PAGE_TYPE_OFFSET]= (byte) page_type; - buff[DIR_COUNT_OFFSET]= 1; + make_empty_page(buff, block_size, page_type); res->buff= buff; res->empty_space= res->length= (block_size - PAGE_OVERHEAD_SIZE); res->data= (buff + PAGE_HEADER_SIZE); res->dir= res->data + res->length; res->rownr= 0; - /* Store position to the first row */ - int2store(res->dir, PAGE_HEADER_SIZE); DBUG_ASSERT(length <= res->length); } else @@ -1710,8 +1736,12 @@ static my_bool write_block_record(MARIA_HA *info, uint length= (uint) (data - row_pos->data); DBUG_PRINT("info", ("head length: %u", length)); if (length < info->s->base.min_row_length) + { + uint diff_length= info->s->base.min_row_length - length; + bzero(data, diff_length); + data+= diff_length; length= info->s->base.min_row_length; - + } int2store(row_pos->dir + 2, length); /* update empty space at start of block */ row_pos->empty_space-= length; @@ -2471,6 +2501,76 @@ err: } +/* + Delete a directory entry + + SYNOPSIS + delete_dir_entry() + buff Page buffer + block_size Block size + record_number Record number to delete + empty_space Empty space on page after delete + + RETURN + -1 Error on page + 0 ok + 1 Page is now empty +*/ + +static int delete_dir_entry(byte *buff, uint block_size, uint record_number, + uint *empty_space_res) +{ + uint number_of_records= (uint) ((uchar *) buff)[DIR_COUNT_OFFSET]; + uint length, empty_space; + byte *dir; + DBUG_ENTER("delete_dir_entry"); + +#ifdef SANITY_CHECKS + if (record_number >= number_of_records || + record_number > ((block_size - LSN_SIZE - PAGE_TYPE_SIZE - 1 - + PAGE_SUFFIX_SIZE) / DIR_ENTRY_SIZE)) + { + DBUG_PRINT("error", ("record_number: %u number_of_records: %u", + record_number, number_of_records)); + + DBUG_RETURN(-1); + } +#endif + + empty_space= uint2korr(buff + EMPTY_SPACE_OFFSET); + dir= (buff + block_size - DIR_ENTRY_SIZE * record_number - + DIR_ENTRY_SIZE - PAGE_SUFFIX_SIZE); + dir[0]= dir[1]= 0; /* Delete entry */ + length= uint2korr(dir + 2); + + if (record_number == number_of_records - 1) + { + /* Delete this entry and all following empty directory entries */ + byte *end= buff + block_size - PAGE_SUFFIX_SIZE; + do + { + number_of_records--; + dir+= DIR_ENTRY_SIZE; + empty_space+= DIR_ENTRY_SIZE; + } while (dir < end && dir[0] == 0 && dir[1] == 0); + buff[DIR_COUNT_OFFSET]= (byte) (uchar) number_of_records; + } + empty_space+= length; + if (number_of_records != 0) + { + /* Update directory */ + int2store(buff + EMPTY_SPACE_OFFSET, empty_space); + buff[PAGE_TYPE_OFFSET]|= (byte) PAGE_CAN_BE_COMPACTED; + + *empty_space_res= empty_space; + DBUG_RETURN(0); + } + buff[PAGE_TYPE_OFFSET]= UNALLOCATED_PAGE; + *empty_space_res= block_size; + DBUG_RETURN(1); +} + + /* Delete a head a tail part @@ -2493,11 +2593,12 @@ static my_bool delete_head_or_tail(MARIA_HA *info, my_bool head) { MARIA_SHARE *share= info->s; - uint number_of_records, empty_space, length; + uint empty_space; uint block_size= share->block_size; - byte *buff, *dir; + byte *buff; LSN lsn; MARIA_PINNED_PAGE page_link; + int res; DBUG_ENTER("delete_head_or_tail"); info->keyread_buff_used= 1; @@ -2511,60 +2612,30 @@ static my_bool delete_head_or_tail(MARIA_HA *info, page_link.unlock= PAGECACHE_LOCK_WRITE_UNLOCK; push_dynamic(&info->pinned_pages, (void*) &page_link); - number_of_records= (uint) ((uchar *) buff)[DIR_COUNT_OFFSET]; -#ifdef SANITY_CHECKS - if (record_number >= number_of_records || - record_number > ((block_size - LSN_SIZE - PAGE_TYPE_SIZE - 1 - - PAGE_SUFFIX_SIZE) / DIR_ENTRY_SIZE)) - { - DBUG_PRINT("error", ("record_number: %u number_of_records: %u", - record_number, number_of_records)); + res= delete_dir_entry(buff, block_size, record_number, &empty_space); + if (res < 0) DBUG_RETURN(1); - } -#endif - - dir= (buff + block_size - DIR_ENTRY_SIZE * record_number - - DIR_ENTRY_SIZE - PAGE_SUFFIX_SIZE); - dir[0]= dir[1]= 0; /* Delete entry */ - length= uint2korr(dir + 2); - empty_space= uint2korr(buff + EMPTY_SPACE_OFFSET); - - if (record_number == number_of_records - 1) - { - /* Delete this entry and all following empty directory entries */ - byte *end= buff + block_size - PAGE_SUFFIX_SIZE; - do - { - number_of_records--; - dir+= DIR_ENTRY_SIZE; - empty_space+= DIR_ENTRY_SIZE; - } while (dir < end && dir[0] == 0 && dir[1] == 0); - buff[DIR_COUNT_OFFSET]= (byte) (uchar) number_of_records; - } - empty_space+= length; - if (number_of_records != 0) + if (res == 0) { uchar log_data[FILEID_STORE_SIZE + PAGE_STORE_SIZE + DIRPOS_STORE_SIZE]; LEX_STRING log_array[TRANSLOG_INTERNAL_PARTS + 1]; - /* Update directory */ - int2store(buff + EMPTY_SPACE_OFFSET, empty_space); - buff[PAGE_TYPE_OFFSET]|= (byte) PAGE_CAN_BE_COMPACTED; - DBUG_ASSERT(share->pagecache->block_size == block_size); - - /* Log REDO data */ - page_store(log_data+ FILEID_STORE_SIZE, page); - dirpos_store(log_data+ FILEID_STORE_SIZE + PAGE_STORE_SIZE, + if (info->s->base.transactional) + { + /* Log REDO data */ + page_store(log_data+ FILEID_STORE_SIZE, page); + dirpos_store(log_data+ FILEID_STORE_SIZE + PAGE_STORE_SIZE, record_number); - log_array[TRANSLOG_INTERNAL_PARTS + 0].str= (char*) log_data; - log_array[TRANSLOG_INTERNAL_PARTS + 0].length= sizeof(log_data); - if (translog_write_record(&lsn, (head ? LOGREC_REDO_PURGE_ROW_HEAD : - LOGREC_REDO_PURGE_ROW_TAIL), - info->trn, share, sizeof(log_data), - TRANSLOG_INTERNAL_PARTS + 1, log_array, - log_data)) - DBUG_RETURN(1); + log_array[TRANSLOG_INTERNAL_PARTS + 0].str= (char*) log_data; + log_array[TRANSLOG_INTERNAL_PARTS + 0].length= sizeof(log_data); + if (translog_write_record(&lsn, (head ? LOGREC_REDO_PURGE_ROW_HEAD : + LOGREC_REDO_PURGE_ROW_TAIL), + info->trn, share, sizeof(log_data), + TRANSLOG_INTERNAL_PARTS + 1, log_array, + log_data)) + DBUG_RETURN(1); + } if (pagecache_write(share->pagecache, &info->dfile, page, 0, buff, share->page_type, @@ -2579,20 +2650,21 @@ static my_bool delete_head_or_tail(MARIA_HA *info, PAGE_STORE_SIZE + PAGERANGE_STORE_SIZE]; LEX_STRING log_array[TRANSLOG_INTERNAL_PARTS + 1]; - pagerange_store(log_data + FILEID_STORE_SIZE, 1); - page_store(log_data+ FILEID_STORE_SIZE + PAGERANGE_STORE_SIZE, page); - pagerange_store(log_data + FILEID_STORE_SIZE + PAGERANGE_STORE_SIZE + - PAGE_STORE_SIZE, 1); - log_array[TRANSLOG_INTERNAL_PARTS + 0].str= (char*) log_data; - log_array[TRANSLOG_INTERNAL_PARTS + 0].length= sizeof(log_data); - if (translog_write_record(&lsn, LOGREC_REDO_PURGE_BLOCKS, - info->trn, share, sizeof(log_data), - TRANSLOG_INTERNAL_PARTS + 1, log_array, - log_data)) - DBUG_RETURN(1); - + if (info->s->base.transactional) + { + pagerange_store(log_data + FILEID_STORE_SIZE, 1); + page_store(log_data+ FILEID_STORE_SIZE + PAGERANGE_STORE_SIZE, page); + pagerange_store(log_data + FILEID_STORE_SIZE + PAGERANGE_STORE_SIZE + + PAGE_STORE_SIZE, 1); + log_array[TRANSLOG_INTERNAL_PARTS + 0].str= (char*) log_data; + log_array[TRANSLOG_INTERNAL_PARTS + 0].length= sizeof(log_data); + if (translog_write_record(&lsn, LOGREC_REDO_PURGE_BLOCKS, + info->trn, share, sizeof(log_data), + TRANSLOG_INTERNAL_PARTS + 1, log_array, + log_data)) + DBUG_RETURN(1); + } /* Write the empty page (needed only for REPAIR to work) */ - buff[PAGE_TYPE_OFFSET]= UNALLOCATED_PAGE; if (pagecache_write(share->pagecache, &info->dfile, page, 0, buff, share->page_type, @@ -4024,3 +4096,268 @@ static size_t fill_update_undo_parts(MARIA_HA *info, const byte *oldrec, row_length+= start_log_parts->length; DBUG_RETURN(row_length); } + +/*************************************************************************** + Applying of REDO log records +***************************************************************************/ + +/* + Apply LOGREC_REDO_INSERT_ROW_HEAD & LOGREC_REDO_INSERT_ROW_TAIL + + SYNOPSIS + _ma_apply_redo_insert_row_head_or_tail() + info Maria handler + lsn LSN to put on page + page_type HEAD_PAGE or TAIL_PAGE + header Header (without FILEID) + data Data to be put on page + data_length Length of data + + RETURN + 0 ok + # Error number +*/ + +uint _ma_apply_redo_insert_row_head_or_tail(MARIA_HA *info, LSN lsn, + uint page_type, + const byte *header, + const byte *data, + size_t data_length) +{ + MARIA_SHARE *share= info->s; + ulonglong page; + uint rownr, empty_space; + uint block_size= share->block_size; + uint rec_offset; + byte *buff= info->keyread_buff, *dir; + DBUG_ENTER("_ma_apply_redo_insert_row_head"); + + info->keyread_buff_used= 1; + page= page_korr(header); + rownr= dirpos_korr(header+PAGE_STORE_SIZE); + + if (page * info->s->block_size > info->state->data_file_length) + { + /* New page at end of file */ + DBUG_ASSERT(rownr == 0); + if (rownr != 0) + goto err; + make_empty_page(buff, block_size, page_type); + empty_space= (block_size - PAGE_OVERHEAD_SIZE); + rec_offset= PAGE_HEADER_SIZE; + dir= buff+ block_size - PAGE_SUFFIX_SIZE - DIR_ENTRY_SIZE; + + /* Update that file is extended */ + info->state->data_file_length= page * info->s->block_size; + } + else + { + uint max_entry; + if (!(buff= pagecache_read(share->pagecache, + &info->dfile, + page, 0, + buff, PAGECACHE_PLAIN_PAGE, + PAGECACHE_LOCK_LEFT_UNLOCKED, 0))) + DBUG_RETURN(my_errno); + if (lsn_korr(buff) >= lsn) + { + /* Already applied */ + + /* Fix bitmap, just in case */ + empty_space= uint2korr(buff + EMPTY_SPACE_OFFSET); + if (_ma_bitmap_set(info, page, page_type == HEAD_PAGE, empty_space)) + DBUG_RETURN(my_errno); + DBUG_RETURN(0); + } + + max_entry= (uint) ((uchar*) buff)[DIR_COUNT_OFFSET]; + if (((buff[PAGE_TYPE_OFFSET] & PAGE_TYPE_MASK) != page_type)) + { + /* + This is a page that has been freed before and now should be + changed to new type. + */ + if ((buff[PAGE_TYPE_OFFSET] & PAGE_TYPE_MASK) != BLOB_PAGE && + (buff[PAGE_TYPE_OFFSET] & PAGE_TYPE_MASK) != UNALLOCATED_PAGE) + goto err; + make_empty_page(buff, block_size, page_type); + empty_space= (block_size - PAGE_OVERHEAD_SIZE); + rec_offset= PAGE_HEADER_SIZE; + dir= buff+ block_size - PAGE_SUFFIX_SIZE - DIR_ENTRY_SIZE; + } + else + { + dir= (buff + block_size - DIR_ENTRY_SIZE * (rownr + 1) - + PAGE_SUFFIX_SIZE); + empty_space= uint2korr(buff + EMPTY_SPACE_OFFSET); + + if (max_entry >= rownr) + { + /* Add directory entry first in directory and data last on page */ + DBUG_ASSERT(max_entry == rownr); + if (max_entry != rownr) + goto err; + rec_offset= (uint2korr(dir + DIR_ENTRY_SIZE) + + uint2korr(dir + DIR_ENTRY_SIZE +2)); + if ((uint) (dir - buff) < rec_offset + data_length) + { + /* Create place for directory & data */ + compact_page(buff, block_size, max_entry - 1, 0); + rec_offset= (uint2korr(dir + DIR_ENTRY_SIZE) + + uint2korr(dir + DIR_ENTRY_SIZE +2)); + empty_space= uint2korr(buff + EMPTY_SPACE_OFFSET); + DBUG_ASSERT(!((uint) (dir - buff) < rec_offset + data_length)); + if ((uint) (dir - buff) < rec_offset + data_length) + goto err; + } + buff[DIR_COUNT_OFFSET]= (byte) (uchar) max_entry+1; + int2store(dir, rec_offset); + empty_space-= DIR_ENTRY_SIZE; + } + else + { + /* reuse old empty entry */ + byte *pos, *end, *end_data; + DBUG_ASSERT(uint2korr(dir) == 0); + if (uint2korr(dir)) + goto err; /* Should have been empty */ + + /* Find start of where we can put data */ + end= (buff + block_size - DIR_ENTRY_SIZE * max_entry - + PAGE_SUFFIX_SIZE); + for (pos= dir ; pos >= end ; pos-= DIR_ENTRY_SIZE) + { + if ((rec_offset= uint2korr(pos))) + { + rec_offset+= uint2korr(pos+2); + break; + } + } + DBUG_ASSERT(pos >= end); + if (pos < end) /* Wrong directory */ + goto err; + + /* find end data */ + end_data= end; /* Start of directory */ + end= (buff + block_size - PAGE_SUFFIX_SIZE); + for (pos= dir ; pos < end ; pos+= DIR_ENTRY_SIZE) + { + uint offset; + if ((offset= uint2korr(pos))) + { + end_data= buff + offset; + break; + } + } + if ((uint) (end_data - (buff + rec_offset)) < data_length) + { + uint length; + /* Not enough continues space, compact page to get more */ + int2store(dir, rec_offset); + compact_page(buff, block_size, rownr, 1); + rec_offset= uint2korr(dir); + length= uint2korr(dir+2); + DBUG_ASSERT(length >= data_length); + if (length < data_length) + goto err; + empty_space= length; + } + } + } + } + /* Copy data */ + int2store(dir+2, data_length); + memcpy(buff + rec_offset, data, data_length); + empty_space-= data_length; + int2store(buff + EMPTY_SPACE_OFFSET, empty_space); + + /* Write modified page */ + lsn_store(buff, lsn); + if (pagecache_write(share->pagecache, + &info->dfile, page, 0, + buff, PAGECACHE_PLAIN_PAGE, + PAGECACHE_LOCK_LEFT_UNLOCKED, + PAGECACHE_PIN_LEFT_UNPINNED, + PAGECACHE_WRITE_DELAY, 0)) + DBUG_RETURN(my_errno); + + /* Fix bitmap */ + if (_ma_bitmap_set(info, page, page_type == HEAD_PAGE, empty_space)) + DBUG_RETURN(my_errno); + + DBUG_RETURN(0); + +err: + DBUG_RETURN(HA_ERR_WRONG_IN_RECORD); +} + + +/* + Apply LOGREC_REDO_PURGE_ROW_HEAD & LOGREC_REDO_PURGE_ROW_TAIL + + SYNOPSIS + _ma_apply_redo_purge_row_head_or_tail() + info Maria handler + lsn LSN to put on page + page_type HEAD_PAGE or TAIL_PAGE + header Header (without FILEID) + data Data to be put on page + data_length Length of data + + NOTES + This function is very similar to delete_head_or_tail() + + RETURN + 0 ok + # Error number +*/ + +uint _ma_apply_redo_purge_row_head_or_tail(MARIA_HA *info, LSN lsn, + uint page_type, + const byte *header) +{ + MARIA_SHARE *share= info->s; + ulonglong page; + uint record_number, empty_space; + uint block_size= share->block_size; + byte *buff= info->keyread_buff; + DBUG_ENTER("_ma_apply_redo_purge_row_head_or_tail"); + + info->keyread_buff_used= 1; + page= page_korr(header); + record_number= dirpos_korr(header+PAGE_STORE_SIZE); + + if (!(buff= pagecache_read(share->pagecache, + &info->dfile, + page, 0, + buff, PAGECACHE_PLAIN_PAGE, + PAGECACHE_LOCK_LEFT_UNLOCKED, 0))) + DBUG_RETURN(my_errno); + DBUG_ASSERT((buff[PAGE_TYPE_OFFSET] & PAGE_TYPE_MASK) == (byte) page_type); + + if (lsn_korr(buff) >= lsn) + { + /* Already applied */ + empty_space= uint2korr(buff + EMPTY_SPACE_OFFSET); + if (_ma_bitmap_set(info, page, page_type == HEAD_PAGE, empty_space)) + DBUG_RETURN(my_errno); + DBUG_RETURN(0); + } + + if (delete_dir_entry(buff, block_size, record_number, &empty_space) < 0) + DBUG_RETURN(HA_ERR_WRONG_IN_RECORD); + + if (pagecache_write(share->pagecache, + &info->dfile, page, 0, + buff, PAGECACHE_PLAIN_PAGE, + PAGECACHE_LOCK_LEFT_UNLOCKED, + PAGECACHE_PIN_LEFT_UNPINNED, + PAGECACHE_WRITE_DELAY, 0)) + DBUG_RETURN(my_errno); + + /* This will work even if the page was marked as UNALLOCATED_PAGE */ + if (_ma_bitmap_set(info, page, page_type == HEAD_PAGE, empty_space)) + DBUG_RETURN(my_errno); + + DBUG_RETURN(0); +} diff --git a/storage/maria/ma_blockrec.h b/storage/maria/ma_blockrec.h index 819d1c2e4d2..0ed0898859c 100644 --- a/storage/maria/ma_blockrec.h +++ b/storage/maria/ma_blockrec.h @@ -178,3 +178,11 @@ my_bool _ma_check_if_right_bitmap_type(MARIA_HA *info, ulonglong page, uint *bitmap_pattern); void _ma_bitmap_delete_all(MARIA_SHARE *share); +uint _ma_apply_redo_insert_row_head_or_tail(MARIA_HA *info, LSN lsn, + uint page_type, + const byte *header, + const byte *data, + size_t data_length); +uint _ma_apply_redo_purge_row_head_or_tail(MARIA_HA *info, LSN lsn, + uint page_type, + const byte *header); diff --git a/storage/maria/ma_loghandler.h b/storage/maria/ma_loghandler.h index f2bfd2c9d7e..230f999c19a 100644 --- a/storage/maria/ma_loghandler.h +++ b/storage/maria/ma_loghandler.h @@ -62,7 +62,7 @@ struct st_maria_share; #define pagerange_store(T,A) int2store(T,A) #define fileid_korr(P) uint2korr(P) #define page_korr(P) uint5korr(P) -#define dirpos_korr(P) (P[0]) +#define dirpos_korr(P) ((P)[0]) #define pagerange_korr(P) uint2korr(P) /* diff --git a/storage/maria/ma_loghandler_lsn.h b/storage/maria/ma_loghandler_lsn.h index 34cb7616b74..e034834aa20 100644 --- a/storage/maria/ma_loghandler_lsn.h +++ b/storage/maria/ma_loghandler_lsn.h @@ -45,7 +45,7 @@ typedef TRANSLOG_ADDRESS LSN; #define LSN_OFFSET(L) ((L) & 0xFFFFFFFFL) /* Makes lsn/log address from file number and record offset */ -#define MAKE_LSN(F,S) ((((uint64)(F)) << 32) | (S)) +#define MAKE_LSN(F,S) ((LSN) ((((uint64)(F)) << 32) | (S))) /* checks LSN */ #define LSN_VALID(L) \ diff --git a/storage/maria/maria_read_log.c b/storage/maria/maria_read_log.c index 5b2d5b057c2..e6911007230 100644 --- a/storage/maria/maria_read_log.c +++ b/storage/maria/maria_read_log.c @@ -14,20 +14,22 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include "maria_def.h" +#include #include #define PCACHE_SIZE (1024*1024*10) #define LOG_FLAGS 0 #define LOG_FILE_SIZE (1024L*1024L) - -static PAGECACHE pagecache; - static const char *load_default_groups[]= { "maria_read_log",0 }; static void get_options(int *argc,char * * *argv); #ifndef DBUG_OFF -static const char *default_dbug_option; +#if defined(__WIN__) +const char *default_dbug_option= "d:t:i:O,\\maria_read_log.trace"; +#else +const char *default_dbug_option= "d:t:i:o,/tmp/maria_read_log.trace"; #endif +#endif /* DBUG_OFF */ static my_bool opt_only_display, opt_display_and_apply; struct TRN_FOR_RECOVERY @@ -55,7 +57,25 @@ prototype_exec_hook(CHECKPOINT); prototype_exec_hook(REDO_CREATE_TABLE); prototype_exec_hook(FILE_ID); prototype_exec_hook(REDO_INSERT_ROW_HEAD); +prototype_exec_hook(REDO_INSERT_ROW_TAIL); +prototype_exec_hook(REDO_PURGE_ROW_HEAD); +prototype_exec_hook(REDO_PURGE_ROW_TAIL); +prototype_exec_hook(UNDO_ROW_INSERT); +prototype_exec_hook(UNDO_ROW_DELETE); prototype_exec_hook(COMMIT); + + +/* + TODO: Avoid mallocs in exec. + + Proposed fix: + Add either a context/buffer argument to all exec_hook functions + or add 'record_buffer' and 'record_buffer_length' to + TRANSLOG_HEADER_BUFFER. + With this we could use my_realloc() instead of my_malloc() to + allocate data and save some mallocs. +*/ + /* To implement REDO_DROP_TABLE and REDO_RENAME_TABLE, we would need to go through the all_tables[] array, find all open instances of the @@ -78,19 +98,6 @@ int main(int argc, char **argv) maria_data_root= "."; -#ifndef DBUG_OFF -#if defined(__WIN__) - default_dbug_option= "d:t:i:O,\\maria_read_log.trace"; -#else - default_dbug_option= "d:t:i:o,/tmp/maria_read_log.trace"; -#endif - if (argc > 1) - { - DBUG_SET(default_dbug_option); - DBUG_SET_INITIAL(default_dbug_option); - } -#endif - if (maria_init()) { fprintf(stderr, "Can't init Maria engine (%d)\n", errno); @@ -107,7 +114,7 @@ int main(int argc, char **argv) fprintf(stderr, "Can't find any log\n"); goto err; } - if (init_pagecache(&pagecache, PCACHE_SIZE, 0, 0, + if (init_pagecache(maria_pagecache, PCACHE_SIZE, 0, 0, TRANSLOG_PAGE_SIZE) == 0) { fprintf(stderr, "Got error in init_pagecache() (errno: %d)\n", errno); @@ -119,7 +126,7 @@ int main(int argc, char **argv) But if it finds a log and this log was crashed, it will create a new log, which is useless. TODO: start log handler in read-only mode. */ - if (translog_init(".", LOG_FILE_SIZE, 50112, 0, &pagecache, + if (translog_init(".", LOG_FILE_SIZE, 50112, 0, maria_pagecache, TRANSLOG_DEFAULT_FLAGS)) { fprintf(stderr, "Can't init loghandler (%d)\n", errno); @@ -137,6 +144,11 @@ int main(int argc, char **argv) install_exec_hook(REDO_CREATE_TABLE); install_exec_hook(FILE_ID); install_exec_hook(REDO_INSERT_ROW_HEAD); + install_exec_hook(REDO_INSERT_ROW_TAIL); + install_exec_hook(REDO_PURGE_ROW_HEAD); + install_exec_hook(REDO_PURGE_ROW_TAIL); + install_exec_hook(UNDO_ROW_INSERT); + install_exec_hook(UNDO_ROW_DELETE); install_exec_hook(COMMIT); if (opt_only_display) @@ -261,7 +273,7 @@ err: /* don't touch anything more, in case we hit a bug */ exit(1); end: - maria_end(); + maria_panic(HA_PANIC_CLOSE); free_defaults(default_argv); my_end(0); exit(0); @@ -318,7 +330,13 @@ get_one_option(int optid __attribute__((unused)), const struct my_option *opt __attribute__((unused)), char *argument __attribute__((unused))) { - /* for now there is nothing special with our options */ + switch (optid) { +#ifndef DBUG_OFF + case '#': + DBUG_SET_INITIAL(argument ? argument : default_dbug_option); + break; + } +#endif return 0; } @@ -619,6 +637,140 @@ prototype_exec_hook(REDO_INSERT_ROW_HEAD) ulonglong page; MARIA_HA *info; char llbuf[22]; + byte *buff= 0; + + sid= fileid_korr(rec->header); + page= page_korr(rec->header + FILEID_STORE_SIZE); + llstr(page, llbuf); + printf("For page %s of table of short id %u", llbuf, sid); + info= all_tables[sid]; + if (info == NULL) + { + printf(", table skipped, so skipping record\n"); + goto end; + } + printf(", '%s'", info->s->open_file_name); + if (cmp_translog_addr(info->s->state.create_rename_lsn, rec->lsn) >= 0) + { + printf(", has create_rename_lsn (%lu,0x%lx) is more recent than log" + " record\n", + (ulong) LSN_FILE_NO(rec->lsn), (ulong) LSN_OFFSET(rec->lsn)); + goto end; + } + /* + Soon we will also skip the page depending on the rec_lsn for this page in + the checkpoint record, but this is not absolutely needed for now (just + assume we have made no checkpoint). + */ + printf(", applying record\n"); + /* + If REDO's LSN is > page's LSN (read from disk), we are going to modify the + page and change its LSN. The normal runtime code stores the UNDO's LSN + into the page. Here storing the REDO's LSN (rec->lsn) would work + (we are not writing to the log here, so don't have to "flush up to UNDO's + LSN"). But in a test scenario where we do updates at runtime, then remove + tables, apply the log and check that this results in the same table as at + runtime, putting the same LSN as runtime had done will decrease + differences. So we use the UNDO's LSN which is current_group_end_lsn. + */ + + if ((!(buff= (byte*) my_malloc(rec->record_length, MYF(MY_WME)))) || + (translog_read_record(rec->lsn, 0, rec->record_length, buff, NULL) != + rec->record_length)) + { + fprintf(stderr, "Failed to read record\n"); + goto end; + } + if (_ma_apply_redo_insert_row_head_or_tail(info, rec->lsn, HEAD_PAGE, + rec->header + FILEID_STORE_SIZE, + buff + (rec->record_length - + rec->non_header_data_len), + rec->non_header_data_len)) + goto end; + my_free(buff, MYF(0)); + return 0; + +end: + /* as we don't have apply working: */ + my_free(buff, MYF(MY_ALLOW_ZERO_PTR)); + return 1; +} + + +prototype_exec_hook(REDO_INSERT_ROW_TAIL) +{ + uint16 sid; + ulonglong page; + MARIA_HA *info; + char llbuf[22]; + byte *buff= 0; + + sid= fileid_korr(rec->header); + page= page_korr(rec->header + FILEID_STORE_SIZE); + llstr(page, llbuf); + printf("For page %s of table of short id %u", llbuf, sid); + info= all_tables[sid]; + if (info == NULL) + { + printf(", table skipped, so skipping record\n"); + goto end; + } + printf(", '%s'", info->s->open_file_name); + if (cmp_translog_addr(info->s->state.create_rename_lsn, rec->lsn) >= 0) + { + printf(", has create_rename_lsn (%lu,0x%lx) is more recent than log" + " record\n", + (ulong) LSN_FILE_NO(rec->lsn), (ulong) LSN_OFFSET(rec->lsn)); + goto end; + } + /* + Soon we will also skip the page depending on the rec_lsn for this page in + the checkpoint record, but this is not absolutely needed for now (just + assume we have made no checkpoint). + */ + printf(", applying record\n"); + /* + If REDO's LSN is > page's LSN (read from disk), we are going to modify the + page and change its LSN. The normal runtime code stores the UNDO's LSN + into the page. Here storing the REDO's LSN (rec->lsn) would work + (we are not writing to the log here, so don't have to "flush up to UNDO's + LSN"). But in a test scenario where we do updates at runtime, then remove + tables, apply the log and check that this results in the same table as at + runtime, putting the same LSN as runtime had done will decrease + differences. So we use the UNDO's LSN which is current_group_end_lsn. + */ + + if ((!(buff= (byte*) my_malloc(rec->record_length, MYF(MY_WME)))) || + (translog_read_record(rec->lsn, 0, rec->record_length, buff, NULL) != + rec->record_length)) + { + fprintf(stderr, "Failed to read record\n"); + goto end; + } + if (_ma_apply_redo_insert_row_head_or_tail(info, rec->lsn, TAIL_PAGE, + rec->header + FILEID_STORE_SIZE, + buff + (rec->record_length - + rec->non_header_data_len), + rec->non_header_data_len)) + goto end; + + my_free(buff, MYF(0)); + return 0; + +end: + /* as we don't have apply working: */ + my_free(buff, MYF(MY_ALLOW_ZERO_PTR)); + return 1; +} + + +prototype_exec_hook(REDO_PURGE_ROW_HEAD) +{ + uint16 sid; + ulonglong page; + MARIA_HA *info; + char llbuf[22]; + sid= fileid_korr(rec->header); page= page_korr(rec->header + FILEID_STORE_SIZE); llstr(page, llbuf); @@ -653,13 +805,89 @@ prototype_exec_hook(REDO_INSERT_ROW_HEAD) runtime, putting the same LSN as runtime had done will decrease differences. So we use the UNDO's LSN which is current_group_end_lsn. */ - DBUG_ASSERT("Monty" == "this is the place"); + + if (_ma_apply_redo_purge_row_head_or_tail(info, rec->lsn, HEAD_PAGE, + rec->header + FILEID_STORE_SIZE)) + goto end; + + return 0; + end: /* as we don't have apply working: */ return 1; } +prototype_exec_hook(REDO_PURGE_ROW_TAIL) +{ + uint16 sid; + ulonglong page; + MARIA_HA *info; + char llbuf[22]; + + sid= fileid_korr(rec->header); + page= page_korr(rec->header + FILEID_STORE_SIZE); + llstr(page, llbuf); + printf("For page %s of table of short id %u", llbuf, sid); + info= all_tables[sid]; + if (info == NULL) + { + printf(", table skipped, so skipping record\n"); + goto end; + } + printf(", '%s'", info->s->open_file_name); + if (cmp_translog_addr(info->s->state.create_rename_lsn, rec->lsn) >= 0) + { + printf(", has create_rename_lsn (%lu,0x%lx) is more recent than log" + " record\n", + (ulong) LSN_FILE_NO(rec->lsn), (ulong) LSN_OFFSET(rec->lsn)); + goto end; + } + /* + Soon we will also skip the page depending on the rec_lsn for this page in + the checkpoint record, but this is not absolutely needed for now (just + assume we have made no checkpoint). + */ + printf(", applying record\n"); + /* + If REDO's LSN is > page's LSN (read from disk), we are going to modify the + page and change its LSN. The normal runtime code stores the UNDO's LSN + into the page. Here storing the REDO's LSN (rec->lsn) would work + (we are not writing to the log here, so don't have to "flush up to UNDO's + LSN"). But in a test scenario where we do updates at runtime, then remove + tables, apply the log and check that this results in the same table as at + runtime, putting the same LSN as runtime had done will decrease + differences. So we use the UNDO's LSN which is current_group_end_lsn. + */ + + if (_ma_apply_redo_purge_row_head_or_tail(info, rec->lsn, TAIL_PAGE, + rec->header + FILEID_STORE_SIZE)) + goto end; + + return 0; + +end: + /* as we don't have apply working: */ + return 1; +} + + +static int exec_LOGREC_UNDO_ROW_INSERT(const TRANSLOG_HEADER_BUFFER *rec + __attribute__((unused))) +{ + /* Ignore this during the redo phase */ + return 0; +} + +static int exec_LOGREC_UNDO_ROW_DELETE(const TRANSLOG_HEADER_BUFFER *rec + __attribute__((unused))) +{ + /* Ignore this during the redo phase */ + return 0; +} + + + prototype_exec_hook(COMMIT) { uint16 sid= rec->short_trid; -- cgit v1.2.1 From 55bb37319624ab72cb20caf1328dba216074b417 Mon Sep 17 00:00:00 2001 From: unknown Date: Wed, 4 Jul 2007 01:04:21 +0300 Subject: After merge fixes Note that ma_test_all doesn't work for the moment. (ma_test1 -s -M -T fails because it uses the dummy_transaction_object) storage/maria/ma_blockrec.c: After merge fixes --- storage/maria/ma_blockrec.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/storage/maria/ma_blockrec.c b/storage/maria/ma_blockrec.c index 7b8bc9ea2f0..06c1df16663 100644 --- a/storage/maria/ma_blockrec.c +++ b/storage/maria/ma_blockrec.c @@ -2612,7 +2612,7 @@ static my_bool delete_head_or_tail(MARIA_HA *info, uchar log_data[FILEID_STORE_SIZE + PAGE_STORE_SIZE + DIRPOS_STORE_SIZE]; LEX_STRING log_array[TRANSLOG_INTERNAL_PARTS + 1]; - if (info->s->base.transactional) + if (info->s->now_transactional) { /* Log REDO data */ page_store(log_data+ FILEID_STORE_SIZE, page); @@ -2642,7 +2642,7 @@ static my_bool delete_head_or_tail(MARIA_HA *info, PAGE_STORE_SIZE + PAGERANGE_STORE_SIZE]; LEX_STRING log_array[TRANSLOG_INTERNAL_PARTS + 1]; - if (info->s->base.transactional) + if (info->s->now_transactional) { pagerange_store(log_data + FILEID_STORE_SIZE, 1); page_store(log_data+ FILEID_STORE_SIZE + PAGERANGE_STORE_SIZE, page); -- cgit v1.2.1