diff options
26 files changed, 427 insertions, 245 deletions
diff --git a/mysql-test/include/maria_empty_logs.inc b/mysql-test/include/maria_empty_logs.inc index a24541717b0..64ea9e7a47e 100644 --- a/mysql-test/include/maria_empty_logs.inc +++ b/mysql-test/include/maria_empty_logs.inc @@ -18,9 +18,45 @@ if (!$mel_keep_control_file) remove_file $MYSQLTEST_VARDIR/master-data/maria_log_control; } remove_file $MYSQLTEST_VARDIR/master-data/maria_log.00000001; --- error 0,1 # maybe there is just one log +-- error 0,1 remove_file $MYSQLTEST_VARDIR/master-data/maria_log.00000002; -# Hope there were not more than these logs. +-- error 0,1 +remove_file $MYSQLTEST_VARDIR/master-data/maria_log.00000003; +-- error 0,1 +remove_file $MYSQLTEST_VARDIR/master-data/maria_log.00000004; +-- error 0,1 +remove_file $MYSQLTEST_VARDIR/master-data/maria_log.00000005; +-- error 0,1 +remove_file $MYSQLTEST_VARDIR/master-data/maria_log.00000006; +-- error 0,1 +remove_file $MYSQLTEST_VARDIR/master-data/maria_log.00000007; +-- error 0,1 +remove_file $MYSQLTEST_VARDIR/master-data/maria_log.00000008; +-- error 0,1 +remove_file $MYSQLTEST_VARDIR/master-data/maria_log.00000009; +-- error 0,1 +remove_file $MYSQLTEST_VARDIR/master-data/maria_log.00000010; +-- error 0,1 +remove_file $MYSQLTEST_VARDIR/master-data/maria_log.00000011; +-- error 0,1 +remove_file $MYSQLTEST_VARDIR/master-data/maria_log.00000012; +-- error 0,1 +remove_file $MYSQLTEST_VARDIR/master-data/maria_log.00000013; +-- error 0,1 +remove_file $MYSQLTEST_VARDIR/master-data/maria_log.00000014; +-- error 0,1 +remove_file $MYSQLTEST_VARDIR/master-data/maria_log.00000015; +-- error 0,1 +remove_file $MYSQLTEST_VARDIR/master-data/maria_log.00000016; +-- error 0,1 +remove_file $MYSQLTEST_VARDIR/master-data/maria_log.00000017; +-- error 0,1 +remove_file $MYSQLTEST_VARDIR/master-data/maria_log.00000018; +-- error 0,1 +remove_file $MYSQLTEST_VARDIR/master-data/maria_log.00000019; +-- error 0,1 +remove_file $MYSQLTEST_VARDIR/master-data/maria_log.00000020; +# hope there are not more than these logs... -- error 0,1 remove_file $MYSQLTEST_VARDIR/master-data/maria_recovery.trace; diff --git a/mysql-test/r/maria-recovery-bitmap.result b/mysql-test/r/maria-recovery-bitmap.result index 4eb1d2f491b..9c8ee173041 100644 --- a/mysql-test/r/maria-recovery-bitmap.result +++ b/mysql-test/r/maria-recovery-bitmap.result @@ -25,5 +25,23 @@ mysqltest.t1 check status OK Checksum-check ok use mysqltest; +* TEST of bitmap flushed without REDO-UNDO in the log (WAL violation) +flush table t1; +* copied t1 for comparison +lock tables t1 write; +insert into t1 values (REPEAT('a', 6000)); +SET SESSION debug="+d,maria_flush_bitmap,maria_crash"; +* crashing mysqld intentionally +set global maria_checkpoint_interval=1; +ERROR HY000: Lost connection to MySQL server during query +* recovery happens +check table t1 extended; +Table Op Msg_type Msg_text +mysqltest.t1 check status OK +* testing that checksum after recovery is as expected +Checksum-check +ok +use mysqltest; +drop table t1; drop database mysqltest_for_comparison; drop database mysqltest; diff --git a/mysql-test/t/maria-recovery-bitmap.test b/mysql-test/t/maria-recovery-bitmap.test index 28d122ed6f7..ee5f6cbadd3 100644 --- a/mysql-test/t/maria-recovery-bitmap.test +++ b/mysql-test/t/maria-recovery-bitmap.test @@ -57,10 +57,6 @@ sleep 5; set global maria_checkpoint_interval=1; -- source include/maria_verify_recovery.inc -# disabled until pagecache callback framework is coded at which point -# we can add a get_lsn() callback for bitmaps, fixing the below bug. -if (0) -{ --echo * TEST of bitmap flushed without REDO-UNDO in the log (WAL violation) # before crashing we'll flush the bitmap page let $mvr_debug_option="+d,maria_flush_bitmap,maria_crash"; @@ -71,7 +67,6 @@ insert into t1 values (REPEAT('a', 6000)); # log is not flushed the bitmap is inconsistent with the data. -- source include/maria_verify_recovery.inc drop table t1; -} # clean up everything let $mms_purpose=comparison; diff --git a/sql/sql_table.cc b/sql/sql_table.cc index 8e3e1b7dd8a..69969a582c6 100644 --- a/sql/sql_table.cc +++ b/sql/sql_table.cc @@ -3774,6 +3774,7 @@ void wait_while_table_is_used(THD *thd,TABLE *table, remove_table_from_cache(thd, table->s->db.str, table->s->table_name.str, RTFC_WAIT_OTHER_THREAD_FLAG); + /* extra() call must come only after all instances above are closed */ VOID(table->file->extra(function)); DBUG_VOID_RETURN; } diff --git a/storage/maria/ha_maria.cc b/storage/maria/ha_maria.cc index 99b897d2c5c..4097f35e6cd 100644 --- a/storage/maria/ha_maria.cc +++ b/storage/maria/ha_maria.cc @@ -120,8 +120,8 @@ static MYSQL_SYSVAR_ULONG(block_size, maria_block_size, static MYSQL_SYSVAR_ULONG(checkpoint_interval, checkpoint_interval, PLUGIN_VAR_RQCMDARG, - "Interval between automatic checkpoints, in seconds;" - " 0 means 'no automatic checkpoints'.", + "Interval between automatic checkpoints, in seconds; 0 means" + " 'no automatic checkpoints' which makes sense only for testing.", NULL, update_checkpoint_interval, 30, 0, UINT_MAX, 1); static MYSQL_SYSVAR_BOOL(page_checksum, maria_page_checksums, 0, @@ -1249,6 +1249,7 @@ int ha_maria::repair(THD *thd, HA_CHECK ¶m, bool do_optimize) DBUG_RETURN(HA_ADMIN_FAILED); } + /** @todo BUG the if() below is always false for BLOCK_RECORD */ if (!do_optimize || ((file->state->del || ((file->s->data_file_type != BLOCK_RECORD) && @@ -1293,6 +1294,12 @@ int ha_maria::repair(THD *thd, HA_CHECK ¶m, bool do_optimize) { thd_proc_info(thd, "Repair with keycache"); param.testflag &= ~(T_REP_BY_SORT | T_REP_PARALLEL); + /** + @todo In REPAIR TABLE EXTENDED this will log + REDO_INDEX_NEW_PAGE and UNDO_KEY_INSERT though unneeded. + maria_chk -o does not have this problem as it disables + transactionality. + */ error= maria_repair(¶m, file, fixed_name, param.testflag & T_QUICK); /** @todo RECOVERY BUG we do things with the index file @@ -1366,15 +1373,7 @@ int ha_maria::repair(THD *thd, HA_CHECK ¶m, bool do_optimize) pthread_mutex_unlock(&share->intern_lock); thd_proc_info(thd, old_proc_info); if (!thd->locked_tables) - { - /** - @todo RECOVERY BUG find why this is needed. Monty says it's because a - new non-transactional table is created by maria_repair(): find how this - new table's state influences the old one's. - */ - _ma_reenable_logging_for_table(file->s); maria_lock_database(file, F_UNLCK); - } DBUG_RETURN(error ? HA_ADMIN_FAILED : !optimize_done ? HA_ADMIN_ALREADY_DONE : HA_ADMIN_OK); } @@ -1623,6 +1622,17 @@ int ha_maria::enable_indexes(uint mode) /* mode not implemented */ error= HA_ERR_WRONG_COMMAND; } + DBUG_EXECUTE_IF("maria_flush_whole_log", + { + DBUG_PRINT("maria_flush_whole_log", ("now")); + translog_flush(translog_get_horizon()); + }); + DBUG_EXECUTE_IF("maria_crash_enable_index", + { + DBUG_PRINT("maria_crash_enable_index", ("now")); + fflush(DBUG_FILE); + abort(); + }); return error; } @@ -1694,6 +1704,11 @@ void ha_maria::start_bulk_insert(ha_rows rows) { maria_init_bulk_insert(file, thd->variables.bulk_insert_buff_size, rows); } + /** + @todo If we have 0 records here, there is no need to log REDO/UNDO for + each data row, we can just log some special UNDO which will empty the + data file if need to rollback. + */ } DBUG_VOID_RETURN; } @@ -2093,8 +2108,8 @@ int ha_maria::external_lock(THD *thd, int lock_type) } else { - _ma_reenable_logging_for_table(file->s); - this->file->trn= 0; /* TODO: remove it also in commit and rollback */ + _ma_reenable_logging_for_table(file); + /** @todo zero file->trn also in commit and rollback */ if (trn && trnman_has_locked_tables(trn)) { if (!trnman_decrement_locked_tables(trn)) diff --git a/storage/maria/ma_bitmap.c b/storage/maria/ma_bitmap.c index 59e0689ece1..31cd2586d01 100644 --- a/storage/maria/ma_bitmap.c +++ b/storage/maria/ma_bitmap.c @@ -133,7 +133,7 @@ static my_bool _ma_read_bitmap_page(MARIA_SHARE *share, MARIA_FILE_BITMAP *bitmap, ulonglong page); - +static TRANSLOG_ADDRESS _ma_bitmap_get_log_address(); /* Write bitmap page to key cache */ @@ -221,21 +221,8 @@ my_bool _ma_bitmap_init(MARIA_SHARE *share, File file) bitmap->block_size= share->block_size; bitmap->file.file= file; - bitmap->file.callback_data= (uchar*) share; - bitmap->file.write_fail= &maria_page_write_failure; - if (share->temporary) - { - bitmap->file.read_callback= &maria_page_crc_check_none; - bitmap->file.write_callback= &maria_page_filler_set_none; - } - else - { - bitmap->file.read_callback= &maria_page_crc_check_bitmap; - if (share->options & HA_OPTION_PAGE_CHECKSUM) - bitmap->file.write_callback= &maria_page_crc_set_normal; - else - bitmap->file.write_callback= &maria_page_filler_set_bitmap; - } + bitmap->file.write_fail= &maria_page_write_failure; aaaaa + _ma_bitmap_set_pagecache_callbacks(&bitmap->file, share); /* Size needs to be aligned on 6 */ aligned_bit_blocks= (share->block_size - PAGE_SUFFIX_SIZE) / 6; @@ -2586,3 +2573,49 @@ int _ma_bitmap_create_first(MARIA_SHARE *share) _ma_bitmap_delete_all(share); return 0; } + + +/** + @brief Pagecache callback to get the TRANSLOG_ADDRESS to flush up to, when a + bitmap page needs to be flushed. + + @param page Page's content + @param page_no Page's number (<offset>/<page length>) + @param data_ptr Callback data pointer (pointer to MARIA_SHARE) + + @retval TRANSLOG_ADDRESS to flush up to. +*/ + +TRANSLOG_ADDRESS +_ma_bitmap_get_log_address(uchar *page __attribute__((unused)), + pgcache_page_no_t page_no __attribute__((unused)), + uchar* data_ptr) +{ +#ifndef DBUG_OFF + const MARIA_SHARE *share= (MARIA_SHARE*)data_ptr; +#endif + DBUG_ENTER("_ma_bitmap_get_log_address"); + DBUG_ASSERT(share->page_type == PAGECACHE_LSN_PAGE && + share->now_transactional); + /* + WAL imposes that UNDOs reach disk before bitmap is flushed. We don't know + the LSN of the last UNDO about this bitmap page, so we flush whole log. + */ + DBUG_RETURN(translog_get_horizon()); +} + + +void _ma_bitmap_set_pagecache_callbacks(PAGECACHE_FILE *file, + MARIA_SHARE *share) +{ + if (share->temporary) + pagecache_file_init(*file, &maria_page_crc_check_none, + &maria_page_filler_set_none, NULL, share); + else + pagecache_file_init(*file, &maria_page_crc_check_bitmap, + ((share->options & HA_OPTION_PAGE_CHECKSUM) ? + &maria_page_crc_set_normal : + &maria_page_filler_set_bitmap), + share->now_transactional ? + &_ma_bitmap_get_log_address : NULL, share); +} diff --git a/storage/maria/ma_blockrec.c b/storage/maria/ma_blockrec.c index b1c99e83aa4..31579db04ac 100644 --- a/storage/maria/ma_blockrec.c +++ b/storage/maria/ma_blockrec.c @@ -6753,3 +6753,28 @@ err: my_free(current_record, MYF(0)); DBUG_RETURN(error); } + + +/** + @brief Pagecache callback to get the TRANSLOG_ADDRESS to flush up to, when a + data (non-bitmap) or index page needs to be flushed. Returns a real LSN. + + @param page Page's content + @param page_no Page's number (<offset>/<page length>) + @param data_ptr Callback data pointer (pointer to MARIA_SHARE) + + @retval LSN to flush up to +*/ + +TRANSLOG_ADDRESS +maria_page_get_lsn(uchar *page, + pgcache_page_no_t page_no __attribute__((unused)), + uchar* data_ptr __attribute__((unused))) +{ +#ifndef DBUG_OFF + const MARIA_SHARE *share= (MARIA_SHARE*)data_ptr; + DBUG_ASSERT(share->page_type == PAGECACHE_LSN_PAGE && + share->now_transactional); +#endif + return lsn_korr(page); +} diff --git a/storage/maria/ma_blockrec.h b/storage/maria/ma_blockrec.h index 96907ff0ee4..f4e45c85f71 100644 --- a/storage/maria/ma_blockrec.h +++ b/storage/maria/ma_blockrec.h @@ -174,6 +174,8 @@ my_bool _ma_compare_block_record(register MARIA_HA *info, register const uchar *record); void _ma_compact_block_page(uchar *buff, uint block_size, uint rownr, my_bool extend_block); +TRANSLOG_ADDRESS +maria_page_get_lsn(uchar *page, pgcache_page_no_t page_no, uchar* data_ptr); /* ma_bitmap.c */ my_bool _ma_bitmap_init(MARIA_SHARE *share, File file); @@ -211,6 +213,8 @@ uint _ma_bitmap_get_page_bits(MARIA_HA *info, MARIA_FILE_BITMAP *bitmap, void _ma_bitmap_delete_all(MARIA_SHARE *share); int _ma_bitmap_create_first(MARIA_SHARE *share); void _ma_bitmap_flushable(MARIA_HA *info, int non_flushable_inc); +void _ma_bitmap_set_pagecache_callbacks(PAGECACHE_FILE *file, + MARIA_SHARE *share); #ifndef DBUG_OFF void _ma_print_bitmap(MARIA_FILE_BITMAP *bitmap, uchar *data, ulonglong page); diff --git a/storage/maria/ma_check.c b/storage/maria/ma_check.c index 3245452408c..35397c18243 100644 --- a/storage/maria/ma_check.c +++ b/storage/maria/ma_check.c @@ -95,7 +95,7 @@ static void copy_data_file_state(MARIA_STATE_INFO *to, MARIA_STATE_INFO *from); static int write_log_record_for_repair(const HA_CHECK *param, MARIA_HA *info); static void report_keypage_fault(HA_CHECK *param, my_off_t position); -my_bool create_new_data_handle(MARIA_SORT_PARAM *param, File new_file); +static my_bool create_new_data_handle(MARIA_SORT_PARAM *param, File new_file); void maria_chk_init(HA_CHECK *param) @@ -2372,6 +2372,11 @@ int maria_repair(HA_CHECK *param, register MARIA_HA *info, VOID(end_io_cache(&sort_info.new_info->rec_cache)); info->opt_flag&= ~WRITE_CACHE_USED; + /** + @todo RECOVERY BUG seems misplaced in some cases. We modify state after + writing it below. But if we move the call below too much down, flushing + of pages may happen too late, after files have been closed. + */ if (_ma_flush_table_files_after_repair(param, info)) goto err; @@ -2626,15 +2631,16 @@ void maria_lock_memory(HA_CHECK *param __attribute__((unused))) int _ma_flush_table_files_after_repair(HA_CHECK *param, MARIA_HA *info) { MARIA_SHARE *share= info->s; + DBUG_ENTER("_ma_flush_table_files_after_repair"); if (_ma_flush_table_files(info, MARIA_FLUSH_DATA | MARIA_FLUSH_INDEX, FLUSH_RELEASE, FLUSH_RELEASE) || _ma_state_info_write(share, 1|4) || (share->base.born_transactional && _ma_sync_table_files(info))) { _ma_check_print_error(param,"%d when trying to write bufferts",my_errno); - return 1; + DBUG_RETURN(1); } - return 0; + DBUG_RETURN(0); } /* _ma_flush_table_files_after_repair */ @@ -2732,6 +2738,17 @@ int maria_sort_index(HA_CHECK *param, register MARIA_HA *info, char *name) share->state.key_del= HA_OFFSET_ERROR; share->state.changed&= ~STATE_NOT_SORTED_PAGES; + DBUG_EXECUTE_IF("maria_flush_whole_log", + { + DBUG_PRINT("maria_flush_whole_log", ("now")); + translog_flush(translog_get_horizon()); + }); + DBUG_EXECUTE_IF("maria_crash_sort_index", + { + DBUG_PRINT("maria_crash_sort_index", ("now")); + fflush(DBUG_FILE); + abort(); + }); DBUG_RETURN(0); err: @@ -3346,6 +3363,17 @@ int maria_repair_by_sort(HA_CHECK *param, register MARIA_HA *info, param->retry_repair=1; goto err; } + DBUG_EXECUTE_IF("maria_flush_whole_log", + { + DBUG_PRINT("maria_flush_whole_log", ("now")); + translog_flush(translog_get_horizon()); + }); + DBUG_EXECUTE_IF("maria_crash_create_index_by_sort", + { + DBUG_PRINT("maria_crash_create_index_by_sort", ("now")); + fflush(DBUG_FILE); + abort(); + }); if (scan_inited) { scan_inited= 0; @@ -3386,6 +3414,7 @@ int maria_repair_by_sort(HA_CHECK *param, register MARIA_HA *info, } } + /** @todo RECOVERY BUG seems misplaced in some cases */ if (_ma_flush_table_files_after_repair(param, info)) goto err; @@ -3524,6 +3553,17 @@ err: Now that we have flushed and forced everything, we can bump create_rename_lsn: */ + DBUG_EXECUTE_IF("maria_flush_whole_log", + { + DBUG_PRINT("maria_flush_whole_log", ("now")); + translog_flush(translog_get_horizon()); + }); + DBUG_EXECUTE_IF("maria_crash_repair", + { + DBUG_PRINT("maria_crash_repair", ("now")); + fflush(DBUG_FILE); + abort(); + }); write_log_record_for_repair(param, info); } share->state.changed|= STATE_NOT_SORTED_PAGES; @@ -4004,6 +4044,7 @@ err: */ if (!rep_quick) VOID(end_io_cache(&new_data_cache)); + /** @todo RECOVERY BUG seems misplaced in some cases */ got_error|= _ma_flush_table_files_after_repair(param, info); if (!got_error) { @@ -5811,7 +5852,7 @@ my_bool maria_test_if_sort_rep(MARIA_HA *info, ha_rows rows, because the one we create here is not transactional */ -my_bool create_new_data_handle(MARIA_SORT_PARAM *param, File new_file) +static my_bool create_new_data_handle(MARIA_SORT_PARAM *param, File new_file) { MARIA_SORT_INFO *sort_info= param->sort_info; @@ -5828,12 +5869,12 @@ my_bool create_new_data_handle(MARIA_SORT_PARAM *param, File new_file) (new_info->s->options & HA_OPTION_PAGE_CHECKSUM ? &maria_page_crc_set_normal : &maria_page_filler_set_bitmap), - &maria_page_write_failure, new_info->s); + &maria_page_write_failure, NULL, new_info->s); pagecache_file_init(new_info->dfile, &maria_page_crc_check_data, (new_info->s->options & HA_OPTION_PAGE_CHECKSUM ? &maria_page_crc_set_normal : &maria_page_filler_set_normal), - &maria_page_write_failure, new_info->s); + &maria_page_write_failure, NULL, new_info->s); change_data_file_descriptor(new_info, new_file); maria_lock_database(new_info, F_EXTRA_LCK); if ((sort_info->param->testflag & T_UNPACK) && @@ -6138,11 +6179,6 @@ static int write_log_record_for_repair(const HA_CHECK *param, MARIA_HA *info) log_array[TRANSLOG_INTERNAL_PARTS + 0].length= sizeof(log_data); share->now_transactional= 1; - /** - @todo RECOVERY maria_chk --transaction-log may come here; to be sure - that ha_maria is not using the log too, we should do a my_lock() on the - control file when Maria starts. - */ if (unlikely(translog_write_record(&lsn, LOGREC_REDO_REPAIR_TABLE, &dummy_transaction_object, info, sizeof(log_data), diff --git a/storage/maria/ma_checkpoint.c b/storage/maria/ma_checkpoint.c index becaf45b9a2..271f24a34de 100644 --- a/storage/maria/ma_checkpoint.c +++ b/storage/maria/ma_checkpoint.c @@ -157,6 +157,7 @@ static int really_execute_checkpoint(void) TRANSLOG_ADDRESS checkpoint_start_log_horizon; char checkpoint_start_log_horizon_char[LSN_STORE_SIZE]; DBUG_ENTER("really_execute_checkpoint"); + DBUG_PRINT("enter", ("level: %d", checkpoint_in_progress)); bzero(&record_pieces, sizeof(record_pieces)); /* @@ -389,6 +390,10 @@ static void flush_all_tables(int what_to_flush) void ma_checkpoint_end(void) { DBUG_ENTER("ma_checkpoint_end"); + /* + Some intentional crash methods, usually triggered by + SET MARIA_CHECKPOINT_INTERVAL=X + */ DBUG_EXECUTE_IF("maria_flush_bitmap", { DBUG_PRINT("maria_flush_bitmap", ("now")); @@ -708,11 +713,15 @@ pthread_handler_t ma_checkpoint_background(void *arg) } pthread_mutex_unlock(&LOCK_checkpoint); DBUG_PRINT("info",("Maria background checkpoint thread ends")); - /* - That's the final one, which guarantees that a clean shutdown always ends - with a checkpoint. - */ - ma_checkpoint_execute(CHECKPOINT_FULL, FALSE); + { + CHECKPOINT_LEVEL level= CHECKPOINT_FULL; + /* + That's the final one, which guarantees that a clean shutdown always ends + with a checkpoint. + */ + DBUG_EXECUTE_IF("maria_checkpoint_indirect", level= CHECKPOINT_INDIRECT;); + ma_checkpoint_execute(level, FALSE); + } pthread_mutex_lock(&LOCK_checkpoint); checkpoint_thread_die= 2; /* indicate that we are dead */ /* wake up ma_checkpoint_end() which may be waiting for our death */ @@ -824,8 +833,6 @@ static int collect_tables(LEX_STRING *str, LSN checkpoint_start_log_horizon) str->length= 4 + /* number of tables */ (2 + /* short id */ - 4 + /* kfile */ - 4 + /* dfile */ LSN_STORE_SIZE + /* first_log_write_at_lsn */ 1 /* end-of-name 0 */ ) * nb + total_names_length; @@ -982,19 +989,6 @@ static int collect_tables(LEX_STRING *str, LSN checkpoint_start_log_horizon) nb_stored++; int2store(ptr, share->id); ptr+= 2; - /* - We must store the OS file descriptors, because the pagecache, which - tells us the list of dirty pages, refers to these pages by OS file - descriptors. An alternative is to make the page cache aware of the - 2-byte id and of the location of a page ("is it a data file page or an - index file page?"). - If one descriptor is -1, normally there should be no dirty pages - collected for this file, it's ok to store -1, it will not be used. - */ - int4store(ptr, kfile.file); - ptr+= 4; - int4store(ptr, dfile.file); - ptr+= 4; lsn_store(ptr, share->lsn_of_file_id); ptr+= LSN_STORE_SIZE; /* diff --git a/storage/maria/ma_commit.c b/storage/maria/ma_commit.c index 763dfb88107..9dfbd1da24f 100644 --- a/storage/maria/ma_commit.c +++ b/storage/maria/ma_commit.c @@ -116,26 +116,3 @@ int maria_begin(MARIA_HA *info) } DBUG_RETURN(0); } - - -/* - @brief Disable logging for this table - - @note - Mainly used during repair table, where we don't want to log all - changes to index or rows -*/ - -void maria_disable_logging(MARIA_HA *info) -{ - info->s->now_transactional= 0; - info->trn= &dummy_transaction_object; - info->s->page_type= PAGECACHE_PLAIN_PAGE; -} - - -void maria_enable_logging(MARIA_HA *info) -{ - if ((info->s->now_transactional= info->s->base.born_transactional)) - info->s->page_type= PAGECACHE_LSN_PAGE; -} diff --git a/storage/maria/ma_extra.c b/storage/maria/ma_extra.c index ee1439e752a..d7fa9377018 100644 --- a/storage/maria/ma_extra.c +++ b/storage/maria/ma_extra.c @@ -338,10 +338,8 @@ int maria_extra(MARIA_HA *info, enum ha_extra_function function, if (_ma_state_info_write(share, 1 | 2) || my_sync(share->kfile.file, MYF(0))) error= my_errno; -#ifdef ASK_MONTY /* see same tag in HA_EXTRA_FORCE_REOPEN */ else share->changed= 0; -#endif } else { diff --git a/storage/maria/ma_key_recover.c b/storage/maria/ma_key_recover.c index 86a23df4d81..057b4ac7aef 100644 --- a/storage/maria/ma_key_recover.c +++ b/storage/maria/ma_key_recover.c @@ -32,6 +32,9 @@ @param undo_lsn LSN for undo pages. LSN_IMPOSSIBLE if we shouldn't write undo (like on duplicate key errors) + info->pinned_pages is the list of pages to unpin. Each member of the list + must have its 'changed' saying if the page was changed or not. + @note We unpin pages in the reverse order as they where pinned; This is not necessary now, but may simplify things in the future. diff --git a/storage/maria/ma_locking.c b/storage/maria/ma_locking.c index 3d80eddeeb9..2044772284d 100644 --- a/storage/maria/ma_locking.c +++ b/storage/maria/ma_locking.c @@ -135,13 +135,6 @@ int maria_lock_database(MARIA_HA *info, int lock_type) } info->opt_flag&= ~(READ_CACHE_USED | WRITE_CACHE_USED); info->lock_type= F_UNLCK; - /* - Verify that user of the table cleaned up after itself. Not in - recovery, as for example maria_extra(HA_EXTRA_PREPARE_FOR_RENAME) may - call us here, with transactionality temporarily disabled. - */ - DBUG_ASSERT(maria_in_recovery || - share->now_transactional == share->base.born_transactional); break; case F_RDLCK: if (info->lock_type == F_WRLCK) diff --git a/storage/maria/ma_loghandler.c b/storage/maria/ma_loghandler.c index 7ff0cce9eab..9906ae09858 100644 --- a/storage/maria/ma_loghandler.c +++ b/storage/maria/ma_loghandler.c @@ -1354,7 +1354,7 @@ static void translog_file_init(TRANSLOG_FILE *file, uint32 number, { pagecache_file_init(file->handler, &translog_page_validator, &translog_dummy_callback, - &translog_dummy_write_failure, file); + &translog_dummy_write_failure, NULL, file); file->number= number; file->was_recovered= 0; file->is_sync= is_sync; diff --git a/storage/maria/ma_open.c b/storage/maria/ma_open.c index aac634773a1..6cd0e15d3b1 100644 --- a/storage/maria/ma_open.c +++ b/storage/maria/ma_open.c @@ -36,11 +36,6 @@ static my_bool maria_once_init_dummy(MARIA_SHARE *, File); static my_bool maria_once_end_dummy(MARIA_SHARE *); static uchar *_ma_base_info_read(uchar *ptr, MARIA_BASE_INFO *base); static uchar *_ma_state_info_read(uchar *ptr, MARIA_STATE_INFO *state); -static void set_data_pagecache_callbacks(PAGECACHE_FILE *file, - MARIA_SHARE *share); -static void set_index_pagecache_callbacks(PAGECACHE_FILE *file, - MARIA_SHARE *share); - #define get_next_element(to,pos,size) { memcpy((char*) to,pos,(size_t) size); \ pos+=size;} @@ -1537,44 +1532,42 @@ uchar *_ma_column_nr_read(uchar *ptr, uint16 *offsets, uint columns) } -static void set_data_pagecache_callbacks(PAGECACHE_FILE *file, - MARIA_SHARE *share) +void set_data_pagecache_callbacks(PAGECACHE_FILE *file, MARIA_SHARE *share) { - file->callback_data= (uchar*) share; + /* + Note that non-BLOCK_RECORD formats don't use the pagecache for their data + files, so it does not matter that maria_page* calls are passed below for + them. On the other hand, index file can always have page CRCs, for all + data formats. + */ file->write_fail= &maria_page_write_failure; if (share->temporary) - { - file->read_callback= &maria_page_crc_check_none; - file->write_callback= &maria_page_filler_set_none; - } + pagecache_file_init(*file, &maria_page_crc_check_none, + &maria_page_filler_set_none, NULL, share); else - { - file->read_callback= &maria_page_crc_check_data; - if (share->options & HA_OPTION_PAGE_CHECKSUM) - file->write_callback= &maria_page_crc_set_normal; - else - file->write_callback= &maria_page_filler_set_normal; - } + pagecache_file_init(*file, &maria_page_crc_check_data, + ((share->options & HA_OPTION_PAGE_CHECKSUM) ? + &maria_page_crc_set_normal : + &maria_page_filler_set_normal), + share->now_transactional ? + &maria_page_get_lsn : NULL, share); } -static void set_index_pagecache_callbacks(PAGECACHE_FILE *file, - MARIA_SHARE *share) +void set_index_pagecache_callbacks(PAGECACHE_FILE *file, MARIA_SHARE *share) { - file->callback_data= (uchar*) share; + no write_fail set here? if (share->temporary) - { - file->read_callback= &maria_page_crc_check_none; - file->write_callback= &maria_page_filler_set_none; - } + pagecache_file_init(*file, &maria_page_crc_check_none, + &maria_page_filler_set_none, NULL, share); else - { - file->read_callback= &maria_page_crc_check_index; - if (share->options & HA_OPTION_PAGE_CHECKSUM) - file->write_callback= &maria_page_crc_set_index; - else - file->write_callback= &maria_page_filler_set_normal; - } + pagecache_file_init(*file, &maria_page_crc_check_index, + ((share->options & HA_OPTION_PAGE_CHECKSUM) ? + &maria_page_crc_set_index : + &maria_page_filler_set_normal), + share->now_transactional ? + &maria_page_get_lsn : NULL, + share); } diff --git a/storage/maria/ma_pagecache.c b/storage/maria/ma_pagecache.c index f49683553c1..1f559e1f66c 100755 --- a/storage/maria/ma_pagecache.c +++ b/storage/maria/ma_pagecache.c @@ -42,6 +42,7 @@ #include "maria_def.h" #include <m_string.h> #include "ma_pagecache.h" +#include "ma_blockrec.h" #include <my_bit.h> #include <errno.h> @@ -124,9 +125,6 @@ my_bool my_disable_flush_pagecache_blocks= 0; #define COND_FOR_WRLOCK 2 /* queue of write lock */ #define COND_SIZE 3 /* number of COND_* queues */ -/* offset of LSN on the page */ -#define PAGE_LSN_OFFSET 0 - typedef pthread_cond_t KEYCACHE_CONDVAR; /* descriptor of the page in the page cache block buffer */ @@ -574,7 +572,7 @@ static int ___pagecache_pthread_cond_signal(pthread_cond_t *cond); #define pagecache_pthread_cond_signal pthread_cond_signal #endif /* defined(PAGECACHE_DEBUG) */ -extern my_bool translog_flush(LSN lsn); +extern my_bool translog_flush(TRANSLOG_ADDRESS lsn); /* Write page to the disk @@ -599,20 +597,18 @@ static uint pagecache_fwrite(PAGECACHE *pagecache, enum pagecache_page_type type, myf flags) { + TRANSLOG_ADDRESS (*addr_callback) + (uchar *page, pgcache_page_no_t offset, uchar *data)= + filedesc->get_log_address_callback; DBUG_ENTER("pagecache_fwrite"); DBUG_ASSERT(type != PAGECACHE_READ_UNKNOWN_PAGE); - /** - @todo RECOVERY BUG Here, we should call a callback get_lsn(): it will use - lsn_korr() for LSN pages, and translog_get_horizon() for bitmap pages. - */ - if (type == PAGECACHE_LSN_PAGE) + if (addr_callback != NULL) { - LSN lsn; + TRANSLOG_ADDRESS addr= + (*addr_callback)(buffer, pageno, filedesc->callback_data); DBUG_PRINT("info", ("Log handler call")); - /* TODO: integrate with page format */ - lsn= lsn_korr(buffer + PAGE_LSN_OFFSET); - DBUG_ASSERT(LSN_VALID(lsn)); - if (translog_flush(lsn)) + DBUG_ASSERT(LSN_VALID(addr)); + if (translog_flush(addr)) { (*filedesc->write_fail)(filedesc->callback_data); DBUG_RETURN(1); @@ -621,7 +617,7 @@ static uint pagecache_fwrite(PAGECACHE *pagecache, DBUG_PRINT("info", ("write_callback: 0x%lx data: 0x%lx", (ulong) filedesc->write_callback, (ulong) filedesc->callback_data)); - if ((filedesc->write_callback)(buffer, pageno, filedesc->callback_data)) + if ((*filedesc->write_callback)(buffer, pageno, filedesc->callback_data)) { DBUG_PRINT("error", ("write callback problem")); DBUG_RETURN(1); @@ -2535,14 +2531,14 @@ static void check_and_set_lsn(PAGECACHE *pagecache, to not log REDOs). */ DBUG_ASSERT((block->type == PAGECACHE_LSN_PAGE) || maria_in_recovery); - old= lsn_korr(block->buffer + PAGE_LSN_OFFSET); + old= lsn_korr(block->buffer); DBUG_PRINT("info", ("old lsn: (%lu, 0x%lx) new lsn: (%lu, 0x%lx)", LSN_IN_PARTS(old), LSN_IN_PARTS(lsn))); if (cmp_translog_addr(lsn, old) > 0) { DBUG_ASSERT(block->type != PAGECACHE_READ_UNKNOWN_PAGE); - lsn_store(block->buffer + PAGE_LSN_OFFSET, lsn); + lsn_store(block->buffer, lsn); /* we stored LSN in page so we dirtied it */ if (!(block->status & PCBLOCK_CHANGED)) link_to_changed_list(pagecache, block); @@ -2956,7 +2952,7 @@ uchar *pagecache_read(PAGECACHE *pagecache, int error= 0; enum pagecache_page_pin pin= lock_to_pin[test(buff==0)][lock]; PAGECACHE_BLOCK_LINK *fake_link; - DBUG_ENTER("pagecache_valid_read"); + DBUG_ENTER("pagecache_read"); DBUG_PRINT("enter", ("fd: %u page: %lu buffer: 0x%lx level: %u " "t:%s %s %s", (uint) file->file, (ulong) pageno, @@ -3684,8 +3680,8 @@ static int flush_cached_blocks(PAGECACHE *pagecache, block->pins)); DBUG_ASSERT(block->pins == 1); /** - @todo If page is contiguous with next page to flush, group flushes in - one single my_pwrite(). + @todo IO If page is contiguous with next page to flush, group flushes + in one single my_pwrite(). */ error= pagecache_fwrite(pagecache, &block->hash_link->file, block->buffer, @@ -4198,7 +4194,7 @@ my_bool pagecache_collect_changed_blocks_with_lsn(PAGECACHE *pagecache, wqueue_add_to_queue(&other_flusher->flush_queue, thread); do { - KEYCACHE_DBUG_PRINT("pagecache_collect_çhanged_blocks_with_lsn: wait", + KEYCACHE_DBUG_PRINT("pagecache_collect_changed_blocks_with_lsn: wait", ("suspend thread %ld", thread->id)); pagecache_pthread_cond_wait(&thread->suspend, &pagecache->cache_lock); @@ -4222,6 +4218,7 @@ my_bool pagecache_collect_changed_blocks_with_lsn(PAGECACHE *pagecache, */ DBUG_ASSERT(block->hash_link != NULL); DBUG_ASSERT(block->status & PCBLOCK_CHANGED); + /* Note that we don't store bitmap pages */ if (block->type != PAGECACHE_LSN_PAGE) continue; /* no need to store it */ stored_list_size++; @@ -4230,7 +4227,8 @@ my_bool pagecache_collect_changed_blocks_with_lsn(PAGECACHE *pagecache, compile_time_assert(sizeof(pagecache->blocks) <= 8); str->length= 8 + /* number of dirty pages */ - (4 + /* file */ + (2 + /* table id */ + 1 + /* data or index file */ 4 + /* pageno */ LSN_STORE_SIZE /* rec_lsn */ ) * stored_list_size; @@ -4239,7 +4237,8 @@ my_bool pagecache_collect_changed_blocks_with_lsn(PAGECACHE *pagecache, ptr= str->str; int8store(ptr, (ulonglong)stored_list_size); ptr+= 8; - if (!stored_list_size) + DBUG_PRINT("info", ("found %lu dirty pages", stored_list_size)); + if (stored_list_size == 0) goto end; for (file_hash= 0; file_hash < PAGECACHE_CHANGED_BLOCKS_HASH; file_hash++) { @@ -4248,16 +4247,17 @@ my_bool pagecache_collect_changed_blocks_with_lsn(PAGECACHE *pagecache, block; block= block->next_changed) { + uint16 table_id; + MARIA_SHARE *share; if (block->type != PAGECACHE_LSN_PAGE) continue; /* no need to store it in the checkpoint record */ - compile_time_assert(sizeof(block->hash_link->file.file) <= 4); compile_time_assert(sizeof(block->hash_link->pageno) <= 4); - /** - @todo RECOVERY when we have a pointer to MARIA_SHARE, store share->id - instead of this file. - */ - int4store(ptr, block->hash_link->file.file); - ptr+= 4; + share= (MARIA_SHARE *)(block->hash_link->file.callback_data); + table_id= share->id; + int2store(ptr, table_id); + ptr+= 2; + ptr[0]= (share->kfile.file == block->hash_link->file.file); + ptr++; int4store(ptr, block->hash_link->pageno); ptr+= 4; lsn_store(ptr, block->rec_lsn); diff --git a/storage/maria/ma_pagecache.h b/storage/maria/ma_pagecache.h index 88130bffb73..78b7200bab9 100644 --- a/storage/maria/ma_pagecache.h +++ b/storage/maria/ma_pagecache.h @@ -81,11 +81,16 @@ typedef uint32 pgcache_page_no_t; typedef struct st_pagecache_file { File file; + /** Cannot be NULL */ my_bool (*read_callback)(uchar *page, pgcache_page_no_t offset, uchar *data); + /** Cannot be NULL */ my_bool (*write_callback)(uchar *page, pgcache_page_no_t offset, uchar *data); void (*write_fail)(uchar *data); + /** Can be NULL */ or use dummy + TRANSLOG_ADDRESS (*get_log_address_callback) + (uchar *page, pgcache_page_no_t offset, uchar *data); uchar *callback_data; } PAGECACHE_FILE; @@ -258,11 +263,11 @@ extern void pagecache_unpin_by_link(PAGECACHE *pagecache, /* PCFLUSH_ERROR and PCFLUSH_PINNED. */ #define PCFLUSH_PINNED_AND_ERROR (PCFLUSH_ERROR|PCFLUSH_PINNED) -#define pagecache_file_init(F,RC,WC,WF,D) \ +#define pagecache_file_init(F,RC,WC,WF,GLC,D) \ do{ \ (F).read_callback= (RC); (F).write_callback= (WC); \ (F).write_fail= (WF); \ - (F).callback_data= (uchar*)(D); \ + (F).get_log_address_callback= (GLC); (F).callback_data= (uchar*)(D); \ } while(0) #define flush_pagecache_blocks(A,B,C) \ diff --git a/storage/maria/ma_panic.c b/storage/maria/ma_panic.c index 867abfd1c62..a86563f31fb 100644 --- a/storage/maria/ma_panic.c +++ b/storage/maria/ma_panic.c @@ -98,20 +98,16 @@ int maria_panic(enum ha_panic_function flag) #ifdef CANT_OPEN_FILES_TWICE { /* Open closed files */ char name_buff[FN_REFLEN]; - if (info->s->kfile.file < 0) + MARIA_SHARE *share= info->s; + if (share->kfile.file < 0) { - if ((info->s->kfile.file= my_open(fn_format(name_buff, - info->filename, "", - N_NAME_IEXT,4), - info->mode, - MYF(MY_WME))) < 0) - error = my_errno; - pagecache_file_init(info->s->kfile, &maria_page_crc_check_index, - (info->s->options & HA_OPTION_PAGE_CHECKSUM ? - &maria_page_crc_set_index : - &maria_page_filler_set_normal), - &maria_page_write_failure, info->s); + if ((share->kfile.file= my_open(fn_format(name_buff, + info->filename, "", + N_NAME_IEXT,4), + info->mode, + MYF(MY_WME))) < 0) + error = my_errno; } if (info->dfile.file < 0) { @@ -120,13 +116,10 @@ int maria_panic(enum ha_panic_function flag) info->mode, MYF(MY_WME))) < 0) error = my_errno; - pagecache_file_init(info->dfile, &maria_page_crc_check_data, - (share->options & HA_OPTION_PAGE_CHECKSUM ? - &maria_page_crc_set_normal: - &maria_page_filler_set_normal), - &maria_page_write_failure, share); info->rec_cache.file= info->dfile.file; } + if (share->bitmap.file.file < 0) + share->bitmap.file.file= info->dfile.file; } #endif if (info->was_locked) diff --git a/storage/maria/ma_recovery.c b/storage/maria/ma_recovery.c index 4444f73b49f..2ac708246e2 100644 --- a/storage/maria/ma_recovery.c +++ b/storage/maria/ma_recovery.c @@ -40,15 +40,18 @@ struct st_dirty_page /* used only in the REDO phase */ struct st_table_for_recovery /* used in the REDO and UNDO phase */ { MARIA_HA *info; - File org_kfile, org_dfile; /**< OS descriptors when Checkpoint saw table */ }; /* Variables used by all functions of this module. Ok as single-threaded */ static struct st_trn_for_recovery *all_active_trans; static struct st_table_for_recovery *all_tables; static HASH all_dirty_pages; static struct st_dirty_page *dirty_pages_pool; -static LSN current_group_end_lsn, - checkpoint_start= LSN_IMPOSSIBLE; +static LSN current_group_end_lsn; +/* + LSN after which dirty pages list does not apply. Can be slightly before + when ma_checkpoint_execute() started. +*/ +static LSN checkpoint_start= LSN_IMPOSSIBLE; #ifndef DBUG_OFF /** Current group of REDOs is about this table and only this one */ static MARIA_HA *current_group_table; @@ -58,6 +61,7 @@ static FILE *tracef; /**< trace file for debugging */ static my_bool skip_DDLs; /**< if REDO phase should skip DDL records */ /** @brief to avoid writing a checkpoint if recovery did nothing. */ static my_bool checkpoint_useful; +/** @todo looks like duplicate of recovery_message_printed */ static my_bool procent_printed; static ulonglong now; /**< for tracking execution time of phases */ static int (*save_error_handler_hook)(uint, const char *,myf); @@ -124,10 +128,8 @@ static void prepare_table_for_close(MARIA_HA *info, TRANSLOG_ADDRESS horizon); static LSN parse_checkpoint_record(LSN lsn); static void new_transaction(uint16 sid, TrID long_id, LSN undo_lsn, LSN first_undo_lsn); -static int new_table(uint16 sid, const char *name, - File org_kfile, File org_dfile, - LSN lsn_of_file_id); -static int new_page(File fileid, pgcache_page_no_t pageid, LSN rec_lsn, +static int new_table(uint16 sid, const char *name, LSN lsn_of_file_id); +static int new_page(uint32 fileid, pgcache_page_no_t pageid, LSN rec_lsn, struct st_dirty_page *dirty_page); static int close_all_tables(void); static my_bool close_one_table(const char *name, TRANSLOG_ADDRESS addr); @@ -136,6 +138,10 @@ static void print_redo_phase_progress(TRANSLOG_ADDRESS addr); /** @brief global [out] buffer for translog_read_record(); never shrinks */ static struct { + /* + uchar* is more adapted (less casts) than char*, thus we don't use + LEX_STRING. + */ uchar *str; size_t length; } log_record_buffer; @@ -1158,7 +1164,7 @@ prototype_redo_exec_hook(FILE_ID) all_tables[sid].info= NULL; } name= (char *)log_record_buffer.str + FILEID_STORE_SIZE; - if (new_table(sid, name, -1, -1, rec->lsn)) + if (new_table(sid, name, rec->lsn)) goto end; error= 0; end: @@ -1166,9 +1172,7 @@ end: } -static int new_table(uint16 sid, const char *name, - File org_kfile, File org_dfile, - LSN lsn_of_file_id) +static int new_table(uint16 sid, const char *name, LSN lsn_of_file_id) { /* -1 (skip table): close table and return 0; @@ -1201,12 +1205,6 @@ static int new_table(uint16 sid, const char *name, error= 0; goto end; } - if (maria_is_crashed(info)) - { - /** @todo what should we do? how to continue recovery? */ - tprint(tracef, "Table is crashed, can't apply log records to it\n"); - goto end; - } share= info->s; /* check that we're not already using it */ if (share->reopen != 1) @@ -1235,6 +1233,16 @@ static int new_table(uint16 sid, const char *name, LSN_IN_PARTS(lsn_of_file_id)); error= -1; goto end; + /* + Note that we tested that before testing corruption; a recent corrupted + table is not a blocker for the present log record. + */ + } + if (maria_is_crashed(info)) + { + /** @todo what should we do? how to continue recovery? */ + tprint(tracef, "Table is crashed, can't apply log records to it\n"); + goto end; } /* don't log any records for this work */ _ma_tmp_disable_logging_for_table(info, FALSE); @@ -1276,8 +1284,6 @@ static int new_table(uint16 sid, const char *name, */ info->s->lsn_of_file_id= lsn_of_file_id; all_tables[sid].info= info; - all_tables[sid].org_kfile= org_kfile; - all_tables[sid].org_dfile= org_dfile; /* We don't set info->s->id, it would be useless (no logging in REDO phase); if you change that, know that some records in REDO phase call @@ -1588,10 +1594,17 @@ prototype_redo_exec_hook(UNDO_ROW_INSERT) MARIA_HA *info= get_MARIA_HA_from_UNDO_record(rec); MARIA_SHARE *share; + set_undo_lsn_for_active_trans(rec->short_trid, rec->lsn); if (info == NULL) + { + /* + Note that we set undo_lsn anyway. So that if the transaction is later + rolled back, this UNDO is tried for execution and we get an error (as it + would then be abnormal that info==NULL). + */ return 0; + } share= info->s; - set_undo_lsn_for_active_trans(rec->short_trid, rec->lsn); if (cmp_translog_addr(rec->lsn, share->state.is_of_horizon) >= 0) { tprint(tracef, " state has LSN (%lu,0x%lx) older than record, updating" @@ -1625,10 +1638,10 @@ prototype_redo_exec_hook(UNDO_ROW_DELETE) MARIA_HA *info= get_MARIA_HA_from_UNDO_record(rec); MARIA_SHARE *share; + set_undo_lsn_for_active_trans(rec->short_trid, rec->lsn); if (info == NULL) return 0; share= info->s; - set_undo_lsn_for_active_trans(rec->short_trid, rec->lsn); if (cmp_translog_addr(rec->lsn, share->state.is_of_horizon) >= 0) { tprint(tracef, " state older than record\n"); @@ -1661,10 +1674,11 @@ prototype_redo_exec_hook(UNDO_ROW_UPDATE) { MARIA_HA *info= get_MARIA_HA_from_UNDO_record(rec); MARIA_SHARE *share; + + set_undo_lsn_for_active_trans(rec->short_trid, rec->lsn); if (info == NULL) return 0; share= info->s; - set_undo_lsn_for_active_trans(rec->short_trid, rec->lsn); if (cmp_translog_addr(rec->lsn, share->state.is_of_horizon) >= 0) { if (share->calc_checksum) @@ -1692,10 +1706,11 @@ prototype_redo_exec_hook(UNDO_KEY_INSERT) { MARIA_HA *info; MARIA_SHARE *share; + + set_undo_lsn_for_active_trans(rec->short_trid, rec->lsn); if (!(info= get_MARIA_HA_from_UNDO_record(rec))) return 0; share= info->s; - set_undo_lsn_for_active_trans(rec->short_trid, rec->lsn); if (cmp_translog_addr(rec->lsn, share->state.is_of_horizon) >= 0) { const uchar *ptr= rec->header + LSN_STORE_SIZE + FILEID_STORE_SIZE; @@ -1746,9 +1761,10 @@ prototype_redo_exec_hook(UNDO_KEY_INSERT) prototype_redo_exec_hook(UNDO_KEY_DELETE) { MARIA_HA *info; + + set_undo_lsn_for_active_trans(rec->short_trid, rec->lsn); if (!(info= get_MARIA_HA_from_UNDO_record(rec))) return 0; - set_undo_lsn_for_active_trans(rec->short_trid, rec->lsn); _ma_unpin_all_pages(info, rec->lsn); return 0; } @@ -1758,10 +1774,11 @@ prototype_redo_exec_hook(UNDO_KEY_DELETE_WITH_ROOT) { MARIA_HA *info= get_MARIA_HA_from_UNDO_record(rec); MARIA_SHARE *share; + + set_undo_lsn_for_active_trans(rec->short_trid, rec->lsn); if (info == NULL) return 0; share= info->s; - set_undo_lsn_for_active_trans(rec->short_trid, rec->lsn); if (cmp_translog_addr(rec->lsn, share->state.is_of_horizon) >= 0) { uint key_nr; @@ -1816,15 +1833,15 @@ prototype_redo_exec_hook(CLR_END) uchar *logpos; DBUG_ENTER("exec_REDO_LOGREC_CLR_END"); - if (info == NULL) - DBUG_RETURN(0); - share= info->s; previous_undo_lsn= lsn_korr(rec->header); undone_record_type= clr_type_korr(rec->header + LSN_STORE_SIZE + FILEID_STORE_SIZE); log_desc= &log_record_type_descriptor[undone_record_type]; set_undo_lsn_for_active_trans(rec->short_trid, previous_undo_lsn); + if (info == NULL) + DBUG_RETURN(0); + share= info->s; tprint(tracef, " CLR_END was about %s, undo_lsn now LSN (%lu,0x%lx)\n", log_desc->name, LSN_IN_PARTS(previous_undo_lsn)); @@ -2558,7 +2575,7 @@ static void prepare_table_for_close(MARIA_HA *info, TRANSLOG_ADDRESS horizon) share->state.is_of_horizon= horizon; _ma_state_info_write_sub(share->kfile.file, &share->state, 1); } - _ma_reenable_logging_for_table(share); + _ma_reenable_logging_for_table(info); info->trn= NULL; /* safety */ } @@ -2624,12 +2641,19 @@ static MARIA_HA *get_MARIA_HA_from_REDO_record(const DBUG_ASSERT(info->s->last_version != 0); if (cmp_translog_addr(rec->lsn, checkpoint_start) < 0) { + /* + 64-bit key is formed like this: + Most significant byte: 0 + Next byte: 0 if data page, 1 if index page + Next 2 bytes: table's short id + Next 4 bytes: page number + */ uint64 file_and_page_id= - (((uint64) (index_page_redo_entry ? all_tables[sid].org_kfile : - all_tables[sid].org_dfile)) << 32) | page; + (((uint64)((index_page_redo_entry << 16) | sid)) << 32) | page; struct st_dirty_page *dirty_page= (struct st_dirty_page *) hash_search(&all_dirty_pages, (uchar *)&file_and_page_id, sizeof(file_and_page_id)); + DBUG_PRINT("info", ("in dirty pages list: %d", dirty_page != NULL)); if ((dirty_page == NULL) || cmp_translog_addr(rec->lsn, dirty_page->rec_lsn) < 0) { @@ -2736,7 +2760,8 @@ static LSN parse_checkpoint_record(LSN lsn) /* how much brain juice and discussions there was to come to writing this - line + line. It may make start_address slightly decrease (only by the time it + takes to write one or a few rows, roughly). */ set_if_smaller(start_address, minimum_rec_lsn_of_active_transactions); @@ -2769,22 +2794,17 @@ static LSN parse_checkpoint_record(LSN lsn) for (i= 0; i< nb_tables; i++) { char name[FN_REFLEN]; - File kfile, dfile; LSN first_log_write_lsn; uint name_len; uint16 sid= uint2korr(ptr); ptr+= 2; DBUG_ASSERT(sid > 0); - kfile= uint4korr(ptr); - ptr+= 4; - dfile= uint4korr(ptr); - ptr+= 4; first_log_write_lsn= lsn_korr(ptr); ptr+= LSN_STORE_SIZE; name_len= strlen((char *)ptr) + 1; strmake(name, (char *)ptr, sizeof(name)-1); ptr+= name_len; - if (new_table(sid, name, kfile, dfile, first_log_write_lsn)) + if (new_table(sid, name, first_log_write_lsn)) return LSN_ERROR; } @@ -2807,15 +2827,18 @@ static LSN parse_checkpoint_record(LSN lsn) minimum_rec_lsn_of_dirty_pages= LSN_MAX; for (i= 0; i < nb_dirty_pages ; i++) { - pgcache_page_no_t pageid; + pgcache_page_no_t page_id; LSN rec_lsn; - File fileid= uint4korr(ptr); - ptr+= 4; - pageid= uint4korr(ptr); + uint16 table_id= uint2korr(ptr); + ptr+= 2; + uint32 is_index= ptr[0]; + ptr++; + page_id= uint4korr(ptr); ptr+= 4; rec_lsn= lsn_korr(ptr); ptr+= LSN_STORE_SIZE; - if (new_page(fileid, pageid, rec_lsn, next_dirty_page_in_pool++)) + if (new_page((is_index << 16) | table_id, + page_id, rec_lsn, next_dirty_page_in_pool++)) return LSN_ERROR; set_if_smaller(minimum_rec_lsn_of_dirty_pages, rec_lsn); } @@ -2829,11 +2852,11 @@ static LSN parse_checkpoint_record(LSN lsn) eprint(tracef, "checkpoint record corrupted\n"); return LSN_ERROR; } - set_if_smaller(start_address, minimum_rec_lsn_of_dirty_pages); /* + start_address is now from where the dirty pages list can be ignored. Find LSN higher or equal to this TRANSLOG_ADDRESS, suitable for - translog_read_record() functions + translog_read_record() functions. */ checkpoint_start= translog_next_LSN(start_address, LSN_IMPOSSIBLE); if (checkpoint_start == LSN_IMPOSSIBLE) @@ -2844,10 +2867,16 @@ static LSN parse_checkpoint_record(LSN lsn) */ return LSN_ERROR; } - return checkpoint_start; + /* now, where the REDO phase should start reading log: */ + set_if_smaller(start_address, minimum_rec_lsn_of_dirty_pages); + DBUG_PRINT("info", + ("checkpoint_start: (%lu,0x%lx) start_address: (%lu,0x%lx)", + LSN_IN_PARTS(checkpoint_start), LSN_IN_PARTS(start_address))); + return start_address; } -static int new_page(File fileid, pgcache_page_no_t pageid, LSN rec_lsn, + +static int new_page(uint32 fileid, pgcache_page_no_t pageid, LSN rec_lsn, struct st_dirty_page *dirty_page) { /* serves as hash key */ @@ -2953,6 +2982,7 @@ static my_bool close_one_table(const char *name, TRANSLOG_ADDRESS addr) @note for example in the REDO phase we disable logging but that does not make the log incomplete. */ + void _ma_tmp_disable_logging_for_table(MARIA_HA *info, my_bool log_incomplete) { @@ -2965,15 +2995,52 @@ void _ma_tmp_disable_logging_for_table(MARIA_HA *info, log_array[TRANSLOG_INTERNAL_PARTS + 0].str= (char*) log_data; log_array[TRANSLOG_INTERNAL_PARTS + 0].length= sizeof(log_data); translog_write_record(&lsn, LOGREC_INCOMPLETE_LOG, - info->trn, info, sizeof(log_data), + &dummy_transaction_object, info, sizeof(log_data), TRANSLOG_INTERNAL_PARTS + 1, log_array, log_data, NULL); } /* if we disabled before writing the record, record wouldn't reach log */ share->now_transactional= FALSE; + /* + Some code in ma_blockrec.c assumes a trn. + info->trn in some cases can be not NULL and not dummy_transaction_object + when arriving here, but overwriting it does not leak as it is still + remembered in THD_TRN. + */ + info->trn= &dummy_transaction_object; share->page_type= PAGECACHE_PLAIN_PAGE; + /* Functions below will pick up now_transactional and change callbacks */ + set_data_pagecache_callbacks(&info->dfile, share); + set_index_pagecache_callbacks(&share->kfile, share); + _ma_bitmap_set_pagecache_callbacks(&share->bitmap.file, share); +} + + +/** + Re-enables logging for a table which had it temporarily disabled. + + @param info table +*/ + +void _ma_reenable_logging_for_table(MARIA_HA *info) +{ + MARIA_SHARE *share= info->s; + if ((share->now_transactional= share->base.born_transactional)) + { + /* + The change below does NOT affect pages already in the page cache, so you + should have flushed them out already, or write a pagecache function to + change their type. + */ + share->page_type= PAGECACHE_LSN_PAGE; + info->trn= NULL; /* safety */ + } + set_data_pagecache_callbacks(&info->dfile, share); + set_index_pagecache_callbacks(&share->kfile, share); + _ma_bitmap_set_pagecache_callbacks(&share->bitmap.file, share); } + static void print_redo_phase_progress(TRANSLOG_ADDRESS addr) { static int end_logno= FILENO_IMPOSSIBLE, end_offset, percentage_printed= 0; diff --git a/storage/maria/ma_write.c b/storage/maria/ma_write.c index c988f4ff253..90c8f7246fa 100644 --- a/storage/maria/ma_write.c +++ b/storage/maria/ma_write.c @@ -201,10 +201,6 @@ int maria_write(MARIA_HA *info, uchar *record) { if ((*share->write_record)(info,record)) goto err; - /** - @todo when we enable multiple writers, we will have to protect - 'records' and 'checksum' somehow. - */ if (!share->now_transactional) info->state->checksum+= info->cur_row.checksum; } diff --git a/storage/maria/maria_chk.c b/storage/maria/maria_chk.c index 9275db3a466..e94bac1c91b 100644 --- a/storage/maria/maria_chk.c +++ b/storage/maria/maria_chk.c @@ -1049,7 +1049,7 @@ static int maria_chk(HA_CHECK *param, char *filename) T_ZEROFILL)) { /* Mark table as not transactional to avoid logging */ - maria_disable_logging(info); + _ma_tmp_disable_logging_for_table(info, FALSE); if (param->testflag & T_REP_ANY) { @@ -1231,7 +1231,7 @@ static int maria_chk(HA_CHECK *param, char *filename) ((param->testflag & T_SORT_RECORDS) ? UPDATE_SORT : 0))); info->update&= ~HA_STATE_CHANGED; - maria_enable_logging(info); + _ma_reenable_logging_for_table(info); maria_lock_database(info, F_UNLCK); end2: @@ -1695,7 +1695,7 @@ static int maria_sort_records(HA_CHECK *param, (share->options & HA_OPTION_PAGE_CHECKSUM ? &maria_page_crc_set_normal : &maria_page_filler_set_normal), - &maria_page_write_failure, share); + &maria_page_write_failure, NULL, share); info->state->del=0; info->state->empty=0; share->state.dellink= HA_OFFSET_ERROR; diff --git a/storage/maria/maria_def.h b/storage/maria/maria_def.h index 64b59c2fdcf..7ab41bcf986 100644 --- a/storage/maria/maria_def.h +++ b/storage/maria/maria_def.h @@ -1068,12 +1068,13 @@ int _ma_update_create_rename_lsn(MARIA_SHARE *share, LSN lsn, my_bool do_sync); int _ma_update_create_rename_lsn_sub(MARIA_SHARE *share, LSN lsn, my_bool do_sync); - +void set_data_pagecache_callbacks(PAGECACHE_FILE *file, + MARIA_SHARE *share); +void set_index_pagecache_callbacks(PAGECACHE_FILE *file, + MARIA_SHARE *share); void _ma_tmp_disable_logging_for_table(MARIA_HA *info, my_bool log_incomplete); -#define _ma_reenable_logging_for_table(S) \ - { if (((S)->now_transactional= (S)->base.born_transactional)) \ - (S)->page_type= PAGECACHE_LSN_PAGE; } +void _ma_reenable_logging_for_table(MARIA_HA *info); #define MARIA_NO_CRC_NORMAL_PAGE 0xffffffff #define MARIA_NO_CRC_BITMAP_PAGE 0xfffffffe diff --git a/storage/maria/unittest/ma_pagecache_consist.c b/storage/maria/unittest/ma_pagecache_consist.c index 4ca06147ff4..a0c299ce9d1 100644 --- a/storage/maria/unittest/ma_pagecache_consist.c +++ b/storage/maria/unittest/ma_pagecache_consist.c @@ -346,7 +346,7 @@ int main(int argc __attribute__((unused)), exit(1); } pagecache_file_init(file1, &dummy_callback, &dummy_callback, - &dummy_fail_callback, NULL); + &dummy_fail_callback, NULL, NULL); DBUG_PRINT("info", ("file1: %d", file1.file)); if (chmod(file1_name, S_IRWXU | S_IRWXG | S_IRWXO) != 0) { diff --git a/storage/maria/unittest/ma_pagecache_single.c b/storage/maria/unittest/ma_pagecache_single.c index 3f76bbdb863..85728085559 100644 --- a/storage/maria/unittest/ma_pagecache_single.c +++ b/storage/maria/unittest/ma_pagecache_single.c @@ -532,7 +532,7 @@ int main(int argc __attribute__((unused)), exit(1); } pagecache_file_init(file1, &dummy_callback, &dummy_callback, - &dummy_fail_callback, NULL); + &dummy_fail_callback, NULL, NULL); my_close(tmp_file, MYF(0)); my_delete(file2_name, MYF(0)); diff --git a/storage/maria/unittest/ma_test_loghandler_pagecache-t.c b/storage/maria/unittest/ma_test_loghandler_pagecache-t.c index fe3f37f8b2a..ff9503b2252 100644 --- a/storage/maria/unittest/ma_test_loghandler_pagecache-t.c +++ b/storage/maria/unittest/ma_test_loghandler_pagecache-t.c @@ -138,7 +138,7 @@ int main(int argc __attribute__((unused)), char *argv[]) exit(1); } pagecache_file_init(file1, &dummy_callback, &dummy_callback, - &dummy_fail_callback, NULL); + &dummy_fail_callback, NULL, NULL); if (chmod(file1_name, S_IRWXU | S_IRWXG | S_IRWXO) != 0) { fprintf(stderr, "Got error during file1 chmod() (errno: %d)\n", @@ -150,8 +150,7 @@ int main(int argc __attribute__((unused)), char *argv[]) uchar page[PCACHE_PAGE]; bzero(page, PCACHE_PAGE); -#define PAGE_LSN_OFFSET 0 - lsn_store(page + PAGE_LSN_OFFSET, lsn); + lsn_store(page, lsn); pagecache_write(&pagecache, &file1, 0, 3, (char*)page, PAGECACHE_LSN_PAGE, PAGECACHE_LOCK_LEFT_UNLOCKED, |