summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorunknown <guilhem@gbichot3.local>2006-09-14 19:06:51 +0200
committerunknown <guilhem@gbichot3.local>2006-09-14 19:06:51 +0200
commitcdf831cf94fe9aabde6ffb5b19557893416061d6 (patch)
tree9721c7b1eb3d18d02a307cd7827649221192e1bd
parent15b9ce2201453ce7ecc346b053910023e7d51b83 (diff)
downloadmariadb-git-cdf831cf94fe9aabde6ffb5b19557893416061d6.tar.gz
WL#3071 Maria checkpoint:
changing pseudocode to use the structures of the Maria pagecache ("pagecache->changed_blocks" etc) and other Maria structures inherited from MyISAM (THR_LOCK_maria etc). mysys/mf_pagecache.c: comment storage/maria/ma_checkpoint.c: changing pseudocode to use the structures of the Maria pagecache ("pagecache->changed_blocks" etc) and other Maria structures inherited from MyISAM (THR_LOCK_maria etc). storage/maria/ma_checkpoint.h: copyright storage/maria/ma_control_file.c: copyright storage/maria/ma_control_file.h: copyright storage/maria/ma_least_recently_dirtied.c: copyright storage/maria/ma_least_recently_dirtied.h: copyright storage/maria/ma_recovery.c: copyright storage/maria/ma_recovery.h: copyright storage/maria/unittest/Makefile.am: copyright
-rwxr-xr-xmysys/mf_pagecache.c4
-rw-r--r--storage/maria/ma_checkpoint.c124
-rw-r--r--storage/maria/ma_checkpoint.h16
-rw-r--r--storage/maria/ma_control_file.c21
-rw-r--r--storage/maria/ma_control_file.h16
-rw-r--r--storage/maria/ma_least_recently_dirtied.c16
-rw-r--r--storage/maria/ma_least_recently_dirtied.h16
-rw-r--r--storage/maria/ma_recovery.c16
-rw-r--r--storage/maria/ma_recovery.h16
-rw-r--r--storage/maria/unittest/Makefile.am2
10 files changed, 225 insertions, 22 deletions
diff --git a/mysys/mf_pagecache.c b/mysys/mf_pagecache.c
index 4693995f922..3a054077809 100755
--- a/mysys/mf_pagecache.c
+++ b/mysys/mf_pagecache.c
@@ -2937,6 +2937,10 @@ restart:
}
pagecache->blocks_changed--;
pagecache->global_blocks_changed--;
+ /*
+ free_block() will change the status of the block so no need to change
+ it here.
+ */
}
/* Cache is locked, so we can relese page before freeing it */
diff --git a/storage/maria/ma_checkpoint.c b/storage/maria/ma_checkpoint.c
index 22e7b93d2f4..83312ce37b8 100644
--- a/storage/maria/ma_checkpoint.c
+++ b/storage/maria/ma_checkpoint.c
@@ -1,3 +1,19 @@
+/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+
/*
WL#3071 Maria checkpoint
First version written by Guilhem Bichot on 2006-04-27.
@@ -110,7 +126,7 @@ my_bool execute_checkpoint(CHECKPOINT_LEVEL level)
LSN candidate_max_rec_lsn_at_last_checkpoint;
/* to avoid { lock + no-op + unlock } in the common (==indirect) case */
my_bool need_log_mutex;
-
+
DBUG_ENTER("execute_checkpoint");
safemutex_assert_owner(log_mutex);
@@ -120,7 +136,7 @@ my_bool execute_checkpoint(CHECKPOINT_LEVEL level)
{
/* much I/O work to do, release log mutex */
unlock(log_mutex);
-
+
switch (level)
{
case FULL:
@@ -167,6 +183,13 @@ my_bool execute_checkpoint(CHECKPOINT_LEVEL level)
}
+/*
+ Does an indirect checpoint (collects data from data structures, writes into
+ a checkpoint log record).
+ Returns the largest LSN of the LRD when the checkpoint happened (this is a
+ fuzzy definition), or LSN_IMPOSSIBLE on error. That LSN is used for the
+ "two-checkpoint rule" (MEDIUM checkpoints).
+*/
LSN checkpoint_indirect(my_bool need_log_mutex)
{
DBUG_ENTER("checkpoint_indirect");
@@ -180,6 +203,7 @@ LSN checkpoint_indirect(my_bool need_log_mutex)
LSN checkpoint_lsn;
LSN candidate_max_rec_lsn_at_last_checkpoint= 0;
list_element *el; /* to scan lists */
+ ulong stored_LRD_size= 0;
DBUG_ASSERT(sizeof(byte *) <= 8);
@@ -192,27 +216,70 @@ LSN checkpoint_indirect(my_bool need_log_mutex)
DBUG_PRINT("info",("checkpoint_start_lsn %lu", checkpoint_start_lsn));
- lock(global_LRD_mutex);
- string1.length= 8+8+(8+8)*LRD->count;
+ /* STEP 1: fetch information about dirty pages */
+
+ /*
+ We lock the entire cache but will be quick, just reading/writing a few MBs
+ of memory at most.
+ */
+ pagecache_pthread_mutex_lock(&pagecache->cache_lock);
+
+ /*
+ This is an over-estimation, as in theory blocks_changed may contain
+ non-PAGECACHE_LSN_PAGE pages, which we don't want to store into the
+ checkpoint record; the true number of page-LRD-info we'll store into the
+ record is stored_LRD_size.
+ */
+ string1.length= 8+8+(8+8)*pagecache->blocks_changed;
if (NULL == (string1.str= my_malloc(string1.length)))
goto err;
ptr= string1.str;
int8store(ptr, checkpoint_start_lsn);
- ptr+= 8;
- int8store(ptr, LRD->count);
- ptr+= 8;
- if (LRD->count)
+ ptr+= 8+8; /* don't store stored_LRD_size now, wait */
+ if (pagecache->blocks_changed > 0)
{
- candidate_max_rec_lsn_at_last_checkpoint= LRD->last->rec_lsn;
- for (el= LRD->first; el; el= el->next)
+ /*
+ There are different ways to scan the dirty blocks;
+ flush_all_key_blocks() uses a loop over pagecache->used_last->next_used,
+ and for each element of the loop, loops into
+ pagecache->changed_blocks[FILE_HASH(file of the element)].
+ This has the drawback that used_last includes non-dirty blocks, and it's
+ two loops over many elements. Here we try something simpler.
+ If there are no blocks in changed_blocks[file_hash], we should hit
+ zeroes and skip them.
+ */
+ uint file_hash;
+ for (file_hash= 0; file_hash < PAGECACHE_CHANGED_BLOCKS_HASH; file_hash++)
{
- int8store(ptr, el->page_id);
- ptr+= 8;
- int8store(ptr, el->rec_lsn);
- ptr+= 8;
+ PAGECACHE_BLOCK_LINK *block;
+ for (block= pagecache->changed_blocks[file_hash] ;
+ block;
+ block= block->next_changed)
+ {
+ DBUG_ASSERT(block->hash_link != NULL);
+ DBUG_ASSERT(block->status & BLOCK_CHANGED);
+ if (block->type != PAGECACHE_LSN_PAGE)
+ {
+ /* no need to store it in the checkpoint record */
+ continue;
+ }
+ /* Q: two "block"s cannot have the same "hash_link", right? */
+ int8store(ptr, block->hash_link->pageno);
+ ptr+= 8;
+ /* I assume rec_lsn will be member of "block", not of "hash_link" */
+ int8store(ptr, block->rec_lsn);
+ ptr+= 8;
+ stored_LRD_size++;
+ set_if_bigger(candidate_max_rec_lsn_at_last_checkpoint,
+ block->rec_lsn);
+ }
}
- }
- unlock(global_LRD_mutex);
+ pagecache_pthread_mutex_unlock(&pagecache->cache_lock);
+ DBUG_ASSERT(stored_LRD_size <= pagecache->blocks_changed);
+ int8store(string1.str+8, stored_LRD_size);
+ string1.length= 8+8+(8+8)*stored_LRD_size;
+
+ /* STEP 2: fetch information about transactions */
/*
If trx are in more than one list (e.g. three:
@@ -253,19 +320,28 @@ LSN checkpoint_indirect(my_bool need_log_mutex)
}
unlock(global_transactions_list_mutex);
+ /* STEP 3: fetch information about table files */
+
+ /* This global mutex is in fact THR_LOCK_maria (see ma_open()) */
lock(global_share_list_mutex);
string3.length= 8+(8+8)*share_list->count;
if (NULL == (string3.str= my_malloc(string3.length)))
goto err;
ptr= string3.str;
- /* possibly latch each MARIA_SHARE */
+ /* possibly latch each MARIA_SHARE, one by one, like this: */
+ pthread_mutex_lock(&share->intern_lock);
+ /*
+ We'll copy the file id (a bit like share->kfile), the file name
+ (like share->unique_file_name[_length]).
+ */
make_copy_of_global_share_list_to_array;
+ pthread_mutex_unlock(&share->intern_lock);
unlock(global_share_list_mutex);
/* work on copy */
int8store(ptr, elements_in_array);
ptr+= 8;
- for (scan_array)
+ for (el in array)
{
int8store(ptr, array[...].file_id);
ptr+= 8;
@@ -273,9 +349,11 @@ LSN checkpoint_indirect(my_bool need_log_mutex)
ptr+= ...;
/*
these two are long ops (involving disk I/O) that's why we copied the
- list:
+ list, to not keep the list locked for long:
*/
flush_bitmap_pages(el);
+ /* TODO: and also autoinc counter, logical file end, free page list */
+
/*
fsyncs the fd, that's the loooong operation (e.g. max 150 fsync per
second, so if you have touched 1000 files it's 7 seconds).
@@ -283,7 +361,8 @@ LSN checkpoint_indirect(my_bool need_log_mutex)
force_file(el);
}
- /* now write the record */
+ /* LAST STEP: now write the checkpoint log record */
+
string_array[0]= string1;
string_array[1]= string2;
string_array[2]= string3;
@@ -292,6 +371,11 @@ LSN checkpoint_indirect(my_bool need_log_mutex)
checkpoint_lsn= log_write_record(LOGREC_CHECKPOINT,
&system_trans, string_array);
+ /*
+ Do nothing between the log write and the control file write, for the
+ "repair control file" tool to be possible one day.
+ */
+
if (LSN_IMPOSSIBLE == checkpoint_lsn)
goto err;
diff --git a/storage/maria/ma_checkpoint.h b/storage/maria/ma_checkpoint.h
index a9de18c695f..1b8064fa755 100644
--- a/storage/maria/ma_checkpoint.h
+++ b/storage/maria/ma_checkpoint.h
@@ -1,3 +1,19 @@
+/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+
/*
WL#3071 Maria checkpoint
First version written by Guilhem Bichot on 2006-04-27.
diff --git a/storage/maria/ma_control_file.c b/storage/maria/ma_control_file.c
index 5fbb0a084df..5b66577938f 100644
--- a/storage/maria/ma_control_file.c
+++ b/storage/maria/ma_control_file.c
@@ -1,3 +1,19 @@
+/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+
/*
WL#3234 Maria control file
First version written by Guilhem Bichot on 2006-04-27.
@@ -137,7 +153,10 @@ CONTROL_FILE_ERROR ma_control_file_create_or_open()
We could have a tool which can rebuild the control file, by reading the
directory of logs, finding the newest log, reading it to find last
- checkpoint... Slow but can save your db.
+ checkpoint... Slow but can save your db. For this to be possible, we
+ must always write to the control file right after writing the checkpoint
+ log record, and do nothing in between (i.e. the checkpoint must be
+ usable as soon as it has been written to the log).
*/
LSN imposs_lsn= CONTROL_FILE_IMPOSSIBLE_LSN;
diff --git a/storage/maria/ma_control_file.h b/storage/maria/ma_control_file.h
index 5ac6f158183..9a99a721469 100644
--- a/storage/maria/ma_control_file.h
+++ b/storage/maria/ma_control_file.h
@@ -1,3 +1,19 @@
+/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+
/*
WL#3234 Maria control file
First version written by Guilhem Bichot on 2006-04-27.
diff --git a/storage/maria/ma_least_recently_dirtied.c b/storage/maria/ma_least_recently_dirtied.c
index c6285fe47cd..b0b7fb1ef10 100644
--- a/storage/maria/ma_least_recently_dirtied.c
+++ b/storage/maria/ma_least_recently_dirtied.c
@@ -1,3 +1,19 @@
+/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+
/*
WL#3261 Maria - background flushing of the least-recently-dirtied pages
First version written by Guilhem Bichot on 2006-04-27.
diff --git a/storage/maria/ma_least_recently_dirtied.h b/storage/maria/ma_least_recently_dirtied.h
index 6a30db4b5f0..f6d7420febc 100644
--- a/storage/maria/ma_least_recently_dirtied.h
+++ b/storage/maria/ma_least_recently_dirtied.h
@@ -1,3 +1,19 @@
+/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+
/*
WL#3261 Maria - background flushing of the least-recently-dirtied pages
First version written by Guilhem Bichot on 2006-04-27.
diff --git a/storage/maria/ma_recovery.c b/storage/maria/ma_recovery.c
index babf7507ef1..b6739b86874 100644
--- a/storage/maria/ma_recovery.c
+++ b/storage/maria/ma_recovery.c
@@ -1,3 +1,19 @@
+/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+
/*
WL#3072 Maria recovery
First version written by Guilhem Bichot on 2006-04-27.
diff --git a/storage/maria/ma_recovery.h b/storage/maria/ma_recovery.h
index b85ffdeef59..05026f4b52a 100644
--- a/storage/maria/ma_recovery.h
+++ b/storage/maria/ma_recovery.h
@@ -1,3 +1,19 @@
+/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+
/*
WL#3072 Maria recovery
First version written by Guilhem Bichot on 2006-04-27.
diff --git a/storage/maria/unittest/Makefile.am b/storage/maria/unittest/Makefile.am
index eae2990aea9..8a5ca3d669f 100644
--- a/storage/maria/unittest/Makefile.am
+++ b/storage/maria/unittest/Makefile.am
@@ -1,4 +1,4 @@
-# Copyright (C) 2000 MySQL AB & MySQL Finland AB & TCX DataKonsult AB
+# Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by