diff options
author | unknown <guilhem@gbichot3.local> | 2006-09-14 19:06:51 +0200 |
---|---|---|
committer | unknown <guilhem@gbichot3.local> | 2006-09-14 19:06:51 +0200 |
commit | cdf831cf94fe9aabde6ffb5b19557893416061d6 (patch) | |
tree | 9721c7b1eb3d18d02a307cd7827649221192e1bd | |
parent | 15b9ce2201453ce7ecc346b053910023e7d51b83 (diff) | |
download | mariadb-git-cdf831cf94fe9aabde6ffb5b19557893416061d6.tar.gz |
WL#3071 Maria checkpoint:
changing pseudocode to use the structures of the Maria pagecache
("pagecache->changed_blocks" etc) and other Maria structures
inherited from MyISAM (THR_LOCK_maria etc).
mysys/mf_pagecache.c:
comment
storage/maria/ma_checkpoint.c:
changing pseudocode to use the structures of the Maria pagecache
("pagecache->changed_blocks" etc) and other Maria structures
inherited from MyISAM (THR_LOCK_maria etc).
storage/maria/ma_checkpoint.h:
copyright
storage/maria/ma_control_file.c:
copyright
storage/maria/ma_control_file.h:
copyright
storage/maria/ma_least_recently_dirtied.c:
copyright
storage/maria/ma_least_recently_dirtied.h:
copyright
storage/maria/ma_recovery.c:
copyright
storage/maria/ma_recovery.h:
copyright
storage/maria/unittest/Makefile.am:
copyright
-rwxr-xr-x | mysys/mf_pagecache.c | 4 | ||||
-rw-r--r-- | storage/maria/ma_checkpoint.c | 124 | ||||
-rw-r--r-- | storage/maria/ma_checkpoint.h | 16 | ||||
-rw-r--r-- | storage/maria/ma_control_file.c | 21 | ||||
-rw-r--r-- | storage/maria/ma_control_file.h | 16 | ||||
-rw-r--r-- | storage/maria/ma_least_recently_dirtied.c | 16 | ||||
-rw-r--r-- | storage/maria/ma_least_recently_dirtied.h | 16 | ||||
-rw-r--r-- | storage/maria/ma_recovery.c | 16 | ||||
-rw-r--r-- | storage/maria/ma_recovery.h | 16 | ||||
-rw-r--r-- | storage/maria/unittest/Makefile.am | 2 |
10 files changed, 225 insertions, 22 deletions
diff --git a/mysys/mf_pagecache.c b/mysys/mf_pagecache.c index 4693995f922..3a054077809 100755 --- a/mysys/mf_pagecache.c +++ b/mysys/mf_pagecache.c @@ -2937,6 +2937,10 @@ restart: } pagecache->blocks_changed--; pagecache->global_blocks_changed--; + /* + free_block() will change the status of the block so no need to change + it here. + */ } /* Cache is locked, so we can relese page before freeing it */ diff --git a/storage/maria/ma_checkpoint.c b/storage/maria/ma_checkpoint.c index 22e7b93d2f4..83312ce37b8 100644 --- a/storage/maria/ma_checkpoint.c +++ b/storage/maria/ma_checkpoint.c @@ -1,3 +1,19 @@ +/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + /* WL#3071 Maria checkpoint First version written by Guilhem Bichot on 2006-04-27. @@ -110,7 +126,7 @@ my_bool execute_checkpoint(CHECKPOINT_LEVEL level) LSN candidate_max_rec_lsn_at_last_checkpoint; /* to avoid { lock + no-op + unlock } in the common (==indirect) case */ my_bool need_log_mutex; - + DBUG_ENTER("execute_checkpoint"); safemutex_assert_owner(log_mutex); @@ -120,7 +136,7 @@ my_bool execute_checkpoint(CHECKPOINT_LEVEL level) { /* much I/O work to do, release log mutex */ unlock(log_mutex); - + switch (level) { case FULL: @@ -167,6 +183,13 @@ my_bool execute_checkpoint(CHECKPOINT_LEVEL level) } +/* + Does an indirect checpoint (collects data from data structures, writes into + a checkpoint log record). + Returns the largest LSN of the LRD when the checkpoint happened (this is a + fuzzy definition), or LSN_IMPOSSIBLE on error. That LSN is used for the + "two-checkpoint rule" (MEDIUM checkpoints). +*/ LSN checkpoint_indirect(my_bool need_log_mutex) { DBUG_ENTER("checkpoint_indirect"); @@ -180,6 +203,7 @@ LSN checkpoint_indirect(my_bool need_log_mutex) LSN checkpoint_lsn; LSN candidate_max_rec_lsn_at_last_checkpoint= 0; list_element *el; /* to scan lists */ + ulong stored_LRD_size= 0; DBUG_ASSERT(sizeof(byte *) <= 8); @@ -192,27 +216,70 @@ LSN checkpoint_indirect(my_bool need_log_mutex) DBUG_PRINT("info",("checkpoint_start_lsn %lu", checkpoint_start_lsn)); - lock(global_LRD_mutex); - string1.length= 8+8+(8+8)*LRD->count; + /* STEP 1: fetch information about dirty pages */ + + /* + We lock the entire cache but will be quick, just reading/writing a few MBs + of memory at most. + */ + pagecache_pthread_mutex_lock(&pagecache->cache_lock); + + /* + This is an over-estimation, as in theory blocks_changed may contain + non-PAGECACHE_LSN_PAGE pages, which we don't want to store into the + checkpoint record; the true number of page-LRD-info we'll store into the + record is stored_LRD_size. + */ + string1.length= 8+8+(8+8)*pagecache->blocks_changed; if (NULL == (string1.str= my_malloc(string1.length))) goto err; ptr= string1.str; int8store(ptr, checkpoint_start_lsn); - ptr+= 8; - int8store(ptr, LRD->count); - ptr+= 8; - if (LRD->count) + ptr+= 8+8; /* don't store stored_LRD_size now, wait */ + if (pagecache->blocks_changed > 0) { - candidate_max_rec_lsn_at_last_checkpoint= LRD->last->rec_lsn; - for (el= LRD->first; el; el= el->next) + /* + There are different ways to scan the dirty blocks; + flush_all_key_blocks() uses a loop over pagecache->used_last->next_used, + and for each element of the loop, loops into + pagecache->changed_blocks[FILE_HASH(file of the element)]. + This has the drawback that used_last includes non-dirty blocks, and it's + two loops over many elements. Here we try something simpler. + If there are no blocks in changed_blocks[file_hash], we should hit + zeroes and skip them. + */ + uint file_hash; + for (file_hash= 0; file_hash < PAGECACHE_CHANGED_BLOCKS_HASH; file_hash++) { - int8store(ptr, el->page_id); - ptr+= 8; - int8store(ptr, el->rec_lsn); - ptr+= 8; + PAGECACHE_BLOCK_LINK *block; + for (block= pagecache->changed_blocks[file_hash] ; + block; + block= block->next_changed) + { + DBUG_ASSERT(block->hash_link != NULL); + DBUG_ASSERT(block->status & BLOCK_CHANGED); + if (block->type != PAGECACHE_LSN_PAGE) + { + /* no need to store it in the checkpoint record */ + continue; + } + /* Q: two "block"s cannot have the same "hash_link", right? */ + int8store(ptr, block->hash_link->pageno); + ptr+= 8; + /* I assume rec_lsn will be member of "block", not of "hash_link" */ + int8store(ptr, block->rec_lsn); + ptr+= 8; + stored_LRD_size++; + set_if_bigger(candidate_max_rec_lsn_at_last_checkpoint, + block->rec_lsn); + } } - } - unlock(global_LRD_mutex); + pagecache_pthread_mutex_unlock(&pagecache->cache_lock); + DBUG_ASSERT(stored_LRD_size <= pagecache->blocks_changed); + int8store(string1.str+8, stored_LRD_size); + string1.length= 8+8+(8+8)*stored_LRD_size; + + /* STEP 2: fetch information about transactions */ /* If trx are in more than one list (e.g. three: @@ -253,19 +320,28 @@ LSN checkpoint_indirect(my_bool need_log_mutex) } unlock(global_transactions_list_mutex); + /* STEP 3: fetch information about table files */ + + /* This global mutex is in fact THR_LOCK_maria (see ma_open()) */ lock(global_share_list_mutex); string3.length= 8+(8+8)*share_list->count; if (NULL == (string3.str= my_malloc(string3.length))) goto err; ptr= string3.str; - /* possibly latch each MARIA_SHARE */ + /* possibly latch each MARIA_SHARE, one by one, like this: */ + pthread_mutex_lock(&share->intern_lock); + /* + We'll copy the file id (a bit like share->kfile), the file name + (like share->unique_file_name[_length]). + */ make_copy_of_global_share_list_to_array; + pthread_mutex_unlock(&share->intern_lock); unlock(global_share_list_mutex); /* work on copy */ int8store(ptr, elements_in_array); ptr+= 8; - for (scan_array) + for (el in array) { int8store(ptr, array[...].file_id); ptr+= 8; @@ -273,9 +349,11 @@ LSN checkpoint_indirect(my_bool need_log_mutex) ptr+= ...; /* these two are long ops (involving disk I/O) that's why we copied the - list: + list, to not keep the list locked for long: */ flush_bitmap_pages(el); + /* TODO: and also autoinc counter, logical file end, free page list */ + /* fsyncs the fd, that's the loooong operation (e.g. max 150 fsync per second, so if you have touched 1000 files it's 7 seconds). @@ -283,7 +361,8 @@ LSN checkpoint_indirect(my_bool need_log_mutex) force_file(el); } - /* now write the record */ + /* LAST STEP: now write the checkpoint log record */ + string_array[0]= string1; string_array[1]= string2; string_array[2]= string3; @@ -292,6 +371,11 @@ LSN checkpoint_indirect(my_bool need_log_mutex) checkpoint_lsn= log_write_record(LOGREC_CHECKPOINT, &system_trans, string_array); + /* + Do nothing between the log write and the control file write, for the + "repair control file" tool to be possible one day. + */ + if (LSN_IMPOSSIBLE == checkpoint_lsn) goto err; diff --git a/storage/maria/ma_checkpoint.h b/storage/maria/ma_checkpoint.h index a9de18c695f..1b8064fa755 100644 --- a/storage/maria/ma_checkpoint.h +++ b/storage/maria/ma_checkpoint.h @@ -1,3 +1,19 @@ +/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + /* WL#3071 Maria checkpoint First version written by Guilhem Bichot on 2006-04-27. diff --git a/storage/maria/ma_control_file.c b/storage/maria/ma_control_file.c index 5fbb0a084df..5b66577938f 100644 --- a/storage/maria/ma_control_file.c +++ b/storage/maria/ma_control_file.c @@ -1,3 +1,19 @@ +/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + /* WL#3234 Maria control file First version written by Guilhem Bichot on 2006-04-27. @@ -137,7 +153,10 @@ CONTROL_FILE_ERROR ma_control_file_create_or_open() We could have a tool which can rebuild the control file, by reading the directory of logs, finding the newest log, reading it to find last - checkpoint... Slow but can save your db. + checkpoint... Slow but can save your db. For this to be possible, we + must always write to the control file right after writing the checkpoint + log record, and do nothing in between (i.e. the checkpoint must be + usable as soon as it has been written to the log). */ LSN imposs_lsn= CONTROL_FILE_IMPOSSIBLE_LSN; diff --git a/storage/maria/ma_control_file.h b/storage/maria/ma_control_file.h index 5ac6f158183..9a99a721469 100644 --- a/storage/maria/ma_control_file.h +++ b/storage/maria/ma_control_file.h @@ -1,3 +1,19 @@ +/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + /* WL#3234 Maria control file First version written by Guilhem Bichot on 2006-04-27. diff --git a/storage/maria/ma_least_recently_dirtied.c b/storage/maria/ma_least_recently_dirtied.c index c6285fe47cd..b0b7fb1ef10 100644 --- a/storage/maria/ma_least_recently_dirtied.c +++ b/storage/maria/ma_least_recently_dirtied.c @@ -1,3 +1,19 @@ +/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + /* WL#3261 Maria - background flushing of the least-recently-dirtied pages First version written by Guilhem Bichot on 2006-04-27. diff --git a/storage/maria/ma_least_recently_dirtied.h b/storage/maria/ma_least_recently_dirtied.h index 6a30db4b5f0..f6d7420febc 100644 --- a/storage/maria/ma_least_recently_dirtied.h +++ b/storage/maria/ma_least_recently_dirtied.h @@ -1,3 +1,19 @@ +/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + /* WL#3261 Maria - background flushing of the least-recently-dirtied pages First version written by Guilhem Bichot on 2006-04-27. diff --git a/storage/maria/ma_recovery.c b/storage/maria/ma_recovery.c index babf7507ef1..b6739b86874 100644 --- a/storage/maria/ma_recovery.c +++ b/storage/maria/ma_recovery.c @@ -1,3 +1,19 @@ +/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + /* WL#3072 Maria recovery First version written by Guilhem Bichot on 2006-04-27. diff --git a/storage/maria/ma_recovery.h b/storage/maria/ma_recovery.h index b85ffdeef59..05026f4b52a 100644 --- a/storage/maria/ma_recovery.h +++ b/storage/maria/ma_recovery.h @@ -1,3 +1,19 @@ +/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + /* WL#3072 Maria recovery First version written by Guilhem Bichot on 2006-04-27. diff --git a/storage/maria/unittest/Makefile.am b/storage/maria/unittest/Makefile.am index eae2990aea9..8a5ca3d669f 100644 --- a/storage/maria/unittest/Makefile.am +++ b/storage/maria/unittest/Makefile.am @@ -1,4 +1,4 @@ -# Copyright (C) 2000 MySQL AB & MySQL Finland AB & TCX DataKonsult AB +# Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by |