From d2b8d744d9f9ab32478416885d7929ad00d3b276 Mon Sep 17 00:00:00 2001 From: unknown Date: Tue, 11 Apr 2006 16:45:10 +0300 Subject: Added storage/maria (based on MyISAM). WL#3245 Moved things into ft_global.h, my_handler.h and myisamchk.h to allow MyISAM and Maria to share code and defines Rename of not properly renamed functions in MyISAM and my_handler.c Renamed some MI_ defines to HA_ to allow MyISAM and Maria to share Added maria variables to mysqld.cc and set_var.cc Fixed compiler warnings BitKeeper/etc/ignore: added storage/maria/*.MAI BUILD/SETUP.sh: Compile maria by default BitKeeper/triggers/post-commit: No public maria emails. Mark changesets emails with 'maria' configure.in: Add maria include/ft_global.h: Move defines needed by maria and MyISAM here include/keycache.h: Add support for default key_cache if cache not found include/my_base.h: Add invalidator_by_filename include/my_handler.h: Remove duplicate header files Add defines that are typical for handlers (MyISAM and Maria) include/myisam.h: Move things to my_handler.h to allow Maria and MyISAM to share things (Some things needed to be shared to allow sharing of HA_CHECK structure) libmysqld/Makefile.am: Added ha_maria.cc mysys/mf_keycaches.c: Added default value for multi_key_cache_search mysys/my_handler.c: mi_compare_text -> ha_compare_text Removed compiler warnings sql/ha_myisam.cc: MI_CHECK -> HA_CHECK MI_MAX_KEY_LENGTH -> HA_MAX_KEY_LENGTH sql/ha_myisam.h: MI_CHECK -> HA_CHECK MI_MAX_KEY_LENGTH -> HA_MAX_KEY_LENGTH sql/ha_myisammrg.h: MI_CHECK -> HA_CHECK MI_MAX_KEY_LENGTH -> HA_MAX_KEY_LENGTH sql/handler.h: Added MARIA Added inclusion of my_handler.h sql/item_func.h: Remove duplicate include sql/mysql_priv.h: Added maria variables sql/mysqld.cc: Added maria sql/set_var.cc: Added maria status variables sql/set_var.h: Added maria sql/sql_class.h: Added maria status variables sql/sql_sort.h: Remove duplicate BUFFPEK struct storage/Makefile.am: Added maria storage/csv/ha_tina.cc: Removed compiler warning storage/myisam/Makefile.am: Added ft_myisam.c storage/myisam/ft_boolean_search.c: mi_compare_text -> ha_compare_text MI_MAX_KEY_BUFF -> HA_MAX_KEY_BUFF Remove compiler warnings storage/myisam/ft_nlq_search.c: mi_compare_text -> ha_compare_text storage/myisam/ft_parser.c: mi_compare_text -> ha_compare_text storage/myisam/ft_static.c: Move ft_init_search() to ft_myisam.c to make ft_static.c independent of MyISAM storage/myisam/ft_stopwords.c: mi_compare_text -> ha_compare_text storage/myisam/ft_update.c: mi_compare_text -> ha_compare_text storage/myisam/fulltext.h: Move things to ft_global.h to allow to share more things between MyISAM and Maria storage/myisam/mi_check.c: MI_CHECK -> HA_CHECK storage/myisam/mi_create.c: MI_MAX_POSSIBLE_KEY -> HA_MAX_POSSIBLE_KEY MI_MAX_KEY_BLOCK_SIZE -> HA_MAX_KEY_BLOCK_SIZE MI_MAX_KEY_SEG -> HA_MAX_KEY_SEG MI_MAX_KEY_BUFF -> HA_MAX_KEY_BUFF storage/myisam/mi_delete.c: MI_MAX_KEY_BUFF -> HA_MAX_KEY_BUFF storage/myisam/mi_delete_all.c: Remove not used variable storage/myisam/mi_dynrec.c: _my_calc_total_blob_length -> _mi_calc_total_blob_length storage/myisam/mi_key.c: _my_store_blob_length -> _mi_store_blob_length storage/myisam/mi_log.c: _my_calc_total_blob_length -> _mi_calc_total_blob_length storage/myisam/mi_open.c: MI_MAX_POSSIBLE_KEY -> HA_MAX_POSSIBLE_KEY MI_MAX_KEY_SEG -> HA_MAX_KEY_SEG MI_MAX_KEY_BUFF -> HA_MAX_KEY_BUFF my_n_base_info_read -> mi_n_base_info_read storage/myisam/mi_packrec.c: Made read_pack_length static _my_store_blob_length -> _mi_store_blob_length Remove not used variable storage/myisam/mi_range.c: MI_MAX_KEY_BUFF -> HA_MAX_KEY_BUFF storage/myisam/mi_search.c: MI_MAX_KEY_BUFF -> HA_MAX_KEY_BUFF storage/myisam/mi_test1.c: MI_MAX_KEY_LENGTH -> HA_MAX_KEY_LENGTH storage/myisam/mi_test2.c: Fixed compiler warning storage/myisam/mi_unique.c: Fixed compiler warning mi_compare_text -> ha_compare_text storage/myisam/mi_update.c: MI_MAX_KEY_BUFF -> HA_MAX_KEY_BUFF storage/myisam/mi_write.c: Rename of defines and functions storage/myisam/myisamchk.c: Rename of defines and functions storage/myisam/myisamdef.h: Remove tabs Indentation fixes (Large changes as I did run indent-ex on the file) Move some things to myisamchk.h Added missing functions that gave compiler warnings storage/myisam/myisamlog.c: Rename of defines and functions storage/myisam/myisampack.c: Remove compiler warning storage/myisam/rt_index.c: Rename of defines and functions storage/myisam/sort.c: Rename of defines, functions and structures config/ac-macros/ha_maria.m4: New BitKeeper file ``config/ac-macros/ha_maria.m4'' include/maria.h: New BitKeeper file ``include/maria.h'' include/myisamchk.h: New BitKeeper file ``include/myisamchk.h'' libmysqld/ha_maria.cc: New BitKeeper file ``libmysqld/ha_maria.cc'' mysql-test/include/have_maria.inc: New BitKeeper file ``mysql-test/include/have_maria.inc'' mysql-test/r/have_maria.require: New BitKeeper file ``mysql-test/r/have_maria.require'' mysql-test/r/maria.result: New BitKeeper file ``mysql-test/r/maria.result'' mysql-test/r/ps_maria.result: New BitKeeper file ``mysql-test/r/ps_maria.result'' mysql-test/t/maria.test: New BitKeeper file ``mysql-test/t/maria.test'' mysql-test/t/ps_maria.test: New BitKeeper file ``mysql-test/t/ps_maria.test'' sql/ha_maria.cc: New BitKeeper file ``sql/ha_maria.cc'' sql/ha_maria.h: New BitKeeper file ``sql/ha_maria.h'' storage/maria/Makefile.am: New BitKeeper file ``storage/maria/Makefile.am'' storage/maria/cmakelists.txt: New BitKeeper file ``storage/maria/cmakelists.txt'' storage/maria/ft_maria.c: New BitKeeper file ``storage/maria/ft_maria.c'' storage/maria/ma_cache.c: New BitKeeper file ``storage/maria/ma_cache.c'' storage/maria/ma_changed.c: New BitKeeper file ``storage/maria/ma_changed.c'' storage/maria/ma_check.c: New BitKeeper file ``storage/maria/ma_check.c'' storage/maria/ma_checksum.c: New BitKeeper file ``storage/maria/ma_checksum.c'' storage/maria/ma_close.c: New BitKeeper file ``storage/maria/ma_close.c'' storage/maria/ma_create.c: New BitKeeper file ``storage/maria/ma_create.c'' storage/maria/ma_dbug.c: New BitKeeper file ``storage/maria/ma_dbug.c'' storage/maria/ma_delete.c: New BitKeeper file ``storage/maria/ma_delete.c'' storage/maria/ma_delete_all.c: New BitKeeper file ``storage/maria/ma_delete_all.c'' storage/maria/ma_delete_table.c: New BitKeeper file ``storage/maria/ma_delete_table.c'' storage/maria/ma_dynrec.c: New BitKeeper file ``storage/maria/ma_dynrec.c'' storage/maria/ma_extra.c: New BitKeeper file ``storage/maria/ma_extra.c'' storage/maria/ma_ft_boolean_search.c: New BitKeeper file ``storage/maria/ma_ft_boolean_search.c'' storage/maria/ma_ft_eval.c: New BitKeeper file ``storage/maria/ma_ft_eval.c'' storage/maria/ma_ft_eval.h: New BitKeeper file ``storage/maria/ma_ft_eval.h'' storage/maria/ma_ft_nlq_search.c: New BitKeeper file ``storage/maria/ma_ft_nlq_search.c'' storage/maria/ma_ft_parser.c: New BitKeeper file ``storage/maria/ma_ft_parser.c'' storage/maria/ma_ft_stem.c: New BitKeeper file ``storage/maria/ma_ft_stem.c'' storage/maria/ma_ft_test1.c: New BitKeeper file ``storage/maria/ma_ft_test1.c'' storage/maria/ma_ft_test1.h: New BitKeeper file ``storage/maria/ma_ft_test1.h'' storage/maria/ma_ft_update.c: New BitKeeper file ``storage/maria/ma_ft_update.c'' storage/maria/ma_ftdefs.h: New BitKeeper file ``storage/maria/ma_ftdefs.h'' storage/maria/ma_fulltext.h: New BitKeeper file ``storage/maria/ma_fulltext.h'' storage/maria/ma_info.c: New BitKeeper file ``storage/maria/ma_info.c'' storage/maria/ma_init.c: New BitKeeper file ``storage/maria/ma_init.c'' storage/maria/ma_key.c: New BitKeeper file ``storage/maria/ma_key.c'' storage/maria/ma_keycache.c: New BitKeeper file ``storage/maria/ma_keycache.c'' storage/maria/ma_locking.c: New BitKeeper file ``storage/maria/ma_locking.c'' storage/maria/ma_log.c: New BitKeeper file ``storage/maria/ma_log.c'' storage/maria/ma_open.c: New BitKeeper file ``storage/maria/ma_open.c'' storage/maria/ma_packrec.c: New BitKeeper file ``storage/maria/ma_packrec.c'' storage/maria/ma_page.c: New BitKeeper file ``storage/maria/ma_page.c'' storage/maria/ma_panic.c: New BitKeeper file ``storage/maria/ma_panic.c'' storage/maria/ma_preload.c: New BitKeeper file ``storage/maria/ma_preload.c'' storage/maria/ma_range.c: New BitKeeper file ``storage/maria/ma_range.c'' storage/maria/ma_rename.c: New BitKeeper file ``storage/maria/ma_rename.c'' storage/maria/ma_rfirst.c: New BitKeeper file ``storage/maria/ma_rfirst.c'' storage/maria/ma_rkey.c: New BitKeeper file ``storage/maria/ma_rkey.c'' storage/maria/ma_rlast.c: New BitKeeper file ``storage/maria/ma_rlast.c'' storage/maria/ma_rnext.c: New BitKeeper file ``storage/maria/ma_rnext.c'' storage/maria/ma_rnext_same.c: New BitKeeper file ``storage/maria/ma_rnext_same.c'' storage/maria/ma_rprev.c: New BitKeeper file ``storage/maria/ma_rprev.c'' storage/maria/ma_rrnd.c: New BitKeeper file ``storage/maria/ma_rrnd.c'' storage/maria/ma_rsame.c: New BitKeeper file ``storage/maria/ma_rsame.c'' storage/maria/ma_rsamepos.c: New BitKeeper file ``storage/maria/ma_rsamepos.c'' storage/maria/ma_rt_index.c: New BitKeeper file ``storage/maria/ma_rt_index.c'' storage/maria/ma_rt_index.h: New BitKeeper file ``storage/maria/ma_rt_index.h'' storage/maria/ma_rt_key.c: New BitKeeper file ``storage/maria/ma_rt_key.c'' storage/maria/ma_rt_key.h: New BitKeeper file ``storage/maria/ma_rt_key.h'' storage/maria/ma_rt_mbr.c: New BitKeeper file ``storage/maria/ma_rt_mbr.c'' storage/maria/ma_rt_mbr.h: New BitKeeper file ``storage/maria/ma_rt_mbr.h'' storage/maria/ma_rt_split.c: New BitKeeper file ``storage/maria/ma_rt_split.c'' storage/maria/ma_rt_test.c: New BitKeeper file ``storage/maria/ma_rt_test.c'' storage/maria/ma_scan.c: New BitKeeper file ``storage/maria/ma_scan.c'' storage/maria/ma_search.c: New BitKeeper file ``storage/maria/ma_search.c'' storage/maria/ma_sort.c: New BitKeeper file ``storage/maria/ma_sort.c'' storage/maria/ma_sp_defs.h: New BitKeeper file ``storage/maria/ma_sp_defs.h'' storage/maria/ma_sp_key.c: New BitKeeper file ``storage/maria/ma_sp_key.c'' storage/maria/ma_sp_test.c: New BitKeeper file ``storage/maria/ma_sp_test.c'' storage/maria/ma_static.c: New BitKeeper file ``storage/maria/ma_static.c'' storage/maria/ma_statrec.c: New BitKeeper file ``storage/maria/ma_statrec.c'' storage/maria/ma_test1.c: New BitKeeper file ``storage/maria/ma_test1.c'' storage/maria/ma_test2.c: New BitKeeper file ``storage/maria/ma_test2.c'' storage/maria/ma_test3.c: New BitKeeper file ``storage/maria/ma_test3.c'' storage/maria/ma_test_all.sh: New BitKeeper file ``storage/maria/ma_test_all.sh'' storage/maria/ma_unique.c: New BitKeeper file ``storage/maria/ma_unique.c'' storage/maria/ma_update.c: New BitKeeper file ``storage/maria/ma_update.c'' storage/maria/ma_write.c: New BitKeeper file ``storage/maria/ma_write.c'' storage/maria/maria_chk.c: New BitKeeper file ``storage/maria/maria_chk.c'' storage/maria/maria_def.h: New BitKeeper file ``storage/maria/maria_def.h'' storage/maria/maria_ftdump.c: New BitKeeper file ``storage/maria/maria_ftdump.c'' storage/maria/maria_log.c: New BitKeeper file ``storage/maria/maria_log.c'' storage/maria/maria_pack.c: New BitKeeper file ``storage/maria/maria_pack.c'' storage/maria/maria_rename.sh: New BitKeeper file ``storage/maria/maria_rename.sh'' storage/maria/test_pack: New BitKeeper file ``storage/maria/test_pack'' storage/myisam/ft_myisam.c: New BitKeeper file ``storage/myisam/ft_myisam.c'' --- mysys/mf_keycaches.c | 14 +++++++++----- mysys/my_handler.c | 30 +++++++++++++++++++++--------- 2 files changed, 30 insertions(+), 14 deletions(-) (limited to 'mysys') diff --git a/mysys/mf_keycaches.c b/mysys/mf_keycaches.c index 38fef31fdd4..fdac198697d 100644 --- a/mysys/mf_keycaches.c +++ b/mysys/mf_keycaches.c @@ -148,7 +148,8 @@ static void safe_hash_free(SAFE_HASH *hash) Return the value stored for a key or default value if no key */ -static byte *safe_hash_search(SAFE_HASH *hash, const byte *key, uint length) +static byte *safe_hash_search(SAFE_HASH *hash, const byte *key, uint length, + byte *def) { byte *result; DBUG_ENTER("safe_hash_search"); @@ -156,7 +157,7 @@ static byte *safe_hash_search(SAFE_HASH *hash, const byte *key, uint length) result= hash_search(&hash->hash, key, length); rw_unlock(&hash->mutex); if (!result) - result= hash->default_value; + result= def; else result= ((SAFE_HASH_ENTRY*) result)->data; DBUG_PRINT("exit",("data: 0x%lx", result)); @@ -316,6 +317,7 @@ void multi_keycache_free(void) multi_key_cache_search() key key to find (usually table path) uint length Length of key. + def Default value if no key cache NOTES This function is coded in such a way that we will return the @@ -326,11 +328,13 @@ void multi_keycache_free(void) key cache to use */ -KEY_CACHE *multi_key_cache_search(byte *key, uint length) +KEY_CACHE *multi_key_cache_search(byte *key, uint length, + KEY_CACHE *def) { if (!key_cache_hash.hash.records) - return dflt_key_cache; - return (KEY_CACHE*) safe_hash_search(&key_cache_hash, key, length); + return def; + return (KEY_CACHE*) safe_hash_search(&key_cache_hash, key, length, + (void*) def); } diff --git a/mysys/my_handler.c b/mysys/my_handler.c index 56f2298a9f0..5f7bf5ff1a8 100644 --- a/mysys/my_handler.c +++ b/mysys/my_handler.c @@ -15,9 +15,12 @@ Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA */ -#include "my_handler.h" +#include +#include +#include +#include -int mi_compare_text(CHARSET_INFO *charset_info, uchar *a, uint a_length, +int ha_compare_text(CHARSET_INFO *charset_info, uchar *a, uint a_length, uchar *b, uint b_length, my_bool part_key, my_bool skip_end_space) { @@ -173,7 +176,7 @@ int ha_key_cmp(register HA_KEYSEG *keyseg, register uchar *a, next_key_length=key_length-b_length-pack_length; if (piks && - (flag=mi_compare_text(keyseg->charset,a,a_length,b,b_length, + (flag=ha_compare_text(keyseg->charset,a,a_length,b,b_length, (my_bool) ((nextflag & SEARCH_PREFIX) && next_key_length <= 0), (my_bool)!(nextflag & SEARCH_PREFIX)))) @@ -186,7 +189,7 @@ int ha_key_cmp(register HA_KEYSEG *keyseg, register uchar *a, { uint length=(uint) (end-a), a_length=length, b_length=length; if (piks && - (flag= mi_compare_text(keyseg->charset, a, a_length, b, b_length, + (flag= ha_compare_text(keyseg->charset, a, a_length, b, b_length, (my_bool) ((nextflag & SEARCH_PREFIX) && next_key_length <= 0), (my_bool)!(nextflag & SEARCH_PREFIX)))) @@ -234,7 +237,7 @@ int ha_key_cmp(register HA_KEYSEG *keyseg, register uchar *a, next_key_length=key_length-b_length-pack_length; if (piks && - (flag= mi_compare_text(keyseg->charset,a,a_length,b,b_length, + (flag= ha_compare_text(keyseg->charset,a,a_length,b,b_length, (my_bool) ((nextflag & SEARCH_PREFIX) && next_key_length <= 0), (my_bool) ((nextflag & (SEARCH_FIND | @@ -481,12 +484,15 @@ end: DESCRIPTION Find the first NULL value in index-suffix values tuple. - TODO Consider optimizing this fuction or its use so we don't search for - NULL values in completely NOT NULL index suffixes. + + TODO + Consider optimizing this function or its use so we don't search for + NULL values in completely NOT NULL index suffixes. RETURN - First key part that has NULL as value in values tuple, or the last key part - (with keyseg->type==HA_TYPE_END) if values tuple doesn't contain NULLs. + First key part that has NULL as value in values tuple, or the last key + part (with keyseg->type==HA_TYPE_END) if values tuple doesn't contain + NULLs. */ HA_KEYSEG *ha_find_null(HA_KEYSEG *keyseg, uchar *a) @@ -504,6 +510,7 @@ HA_KEYSEG *ha_find_null(HA_KEYSEG *keyseg, uchar *a) switch ((enum ha_base_keytype) keyseg->type) { case HA_KEYTYPE_TEXT: case HA_KEYTYPE_BINARY: + case HA_KEYTYPE_BIT: if (keyseg->flag & HA_SPACE_PACK) { int a_length; @@ -516,6 +523,8 @@ HA_KEYSEG *ha_find_null(HA_KEYSEG *keyseg, uchar *a) break; case HA_KEYTYPE_VARTEXT1: case HA_KEYTYPE_VARBINARY1: + case HA_KEYTYPE_VARTEXT2: + case HA_KEYTYPE_VARBINARY2: { int a_length; get_key_length(a_length, a); @@ -545,6 +554,9 @@ HA_KEYSEG *ha_find_null(HA_KEYSEG *keyseg, uchar *a) case HA_KEYTYPE_DOUBLE: a= end; break; + case HA_KEYTYPE_END: + DBUG_ASSERT(0); + break; } } return keyseg; -- cgit v1.2.1 From fbe22b6020a5d41e5cc0819192d437f3963fd298 Mon Sep 17 00:00:00 2001 From: unknown Date: Thu, 6 Jul 2006 11:10:34 +0300 Subject: The pagae cache added. mysys/Makefile.am: the page cache and test proms for it added include/pagecache.h: New BitKeeper file ``include/pagecache.h'' mysys/mf_pagecache.c: New BitKeeper file ``mysys/mf_pagecache.c'' mysys/test_file.c: New BitKeeper file ``mysys/test_file.c'' mysys/test_file.h: New BitKeeper file ``mysys/test_file.h'' mysys/test_pagecache_consist.c: New BitKeeper file ``mysys/test_pagecache_consist.c'' mysys/test_pagecache_single.c: New BitKeeper file ``mysys/test_pagecache_single.c'' --- mysys/Makefile.am | 58 +- mysys/mf_pagecache.c | 3920 ++++++++++++++++++++++++++++++++++++++++ mysys/test_file.c | 70 + mysys/test_file.h | 14 + mysys/test_pagecache_consist.c | 447 +++++ mysys/test_pagecache_single.c | 589 ++++++ 6 files changed, 5097 insertions(+), 1 deletion(-) create mode 100755 mysys/mf_pagecache.c create mode 100644 mysys/test_file.c create mode 100644 mysys/test_file.h create mode 100755 mysys/test_pagecache_consist.c create mode 100644 mysys/test_pagecache_single.c (limited to 'mysys') diff --git a/mysys/Makefile.am b/mysys/Makefile.am index 1241e8cdded..485887a085e 100644 --- a/mysys/Makefile.am +++ b/mysys/Makefile.am @@ -54,7 +54,7 @@ libmysys_a_SOURCES = my_init.c my_getwd.c mf_getdate.c my_mmap.c \ my_gethostbyname.c rijndael.c my_aes.c sha1.c \ my_handler.c my_netware.c my_largepage.c \ my_memmem.c \ - my_windac.c my_access.c base64.c + my_windac.c my_access.c base64.c mf_pagecache.c EXTRA_DIST = thr_alarm.c thr_lock.c my_pthread.c my_thr_init.c \ thr_mutex.c thr_rwlock.c \ CMakeLists.txt mf_soundex.c \ @@ -129,5 +129,61 @@ test_base64$(EXEEXT): base64.c $(LIBRARIES) $(LINK) $(FLAGS) -DMAIN ./test_base64.c $(LDADD) $(LIBS) $(RM) -f ./test_base64.c +test_mf_pagecache.o: mf_pagecache.c ../include/pagecache.h $(LIBRARIES) + $(CP) $(srcdir)/mf_pagecache.c test_mf_pagecache.c + $(COMPILE) $(FLAGS) -DPAGECACHE_DEBUG -DEXTRA_DEBUG -c test_mf_pagecache.c + +test_file.o: test_file.c test_file.h + $(COMPILE) $(FLAGS) -DPAGECACHE_DEBUG -DEXTRA_DEBUG -c test_file.c + +test_pagecache_single1k$(EXEEXT): test_pagecache_single.c test_mf_pagecache.o ../unittest/mytap/tap.o test_file.o $(LIBRARIES) + $(LINK) $(FLAGS) -DMAIN -DPAGECACHE_DEBUG -DPAGE_SIZE=1024 -DEXTRA_DEBUG $(srcdir)/test_pagecache_single.c test_mf_pagecache.o ../unittest/mytap/tap.o test_file.o $(LDADD) $(LIBS) + +test_pagecache_single8k$(EXEEXT): test_pagecache_single.c test_mf_pagecache.o ../unittest/mytap/tap.o test_file.o $(LIBRARIES) + $(LINK) $(FLAGS) -DMAIN -DPAGECACHE_DEBUG -DPAGE_SIZE=8192 -DEXTRA_DEBUG $(srcdir)/test_pagecache_single.c test_mf_pagecache.o ../unittest/mytap/tap.o test_file.o $(LDADD) $(LIBS) + +test_pagecache_single64k$(EXEEXT): test_pagecache_single.c test_mf_pagecache.o ../unittest/mytap/tap.o test_file.o $(LIBRARIES) + $(LINK) $(FLAGS) -DMAIN -DPAGECACHE_DEBUG -DPAGE_SIZE=65536 -DEXTRA_DEBUG $(srcdir)/test_pagecache_single.c test_mf_pagecache.o ../unittest/mytap/tap.o test_file.o $(LDADD) $(LIBS) + +test_pagecache_single: test_pagecache_single1k$(EXEEXT) test_pagecache_single8k$(EXEEXT) test_pagecache_single64k$(EXEEXT) + ./test_pagecache_single64k$(EXEEXT) + ./test_pagecache_single8k$(EXEEXT) + ./test_pagecache_single1k$(EXEEXT) + +test_pagecache_consist1k$(EXEEXT): test_pagecache_consist.c test_mf_pagecache.o ../unittest/mytap/tap.o $(LIBRARIES) + $(LINK) $(FLAGS) -DMAIN -DPAGECACHE_DEBUG -DPAGE_SIZE=1024 -DEXTRA_DEBUG $(srcdir)/test_pagecache_consist.c test_mf_pagecache.o ../unittest/mytap/tap.o $(LDADD) $(LIBS) + +test_pagecache_consist64k$(EXEEXT): test_pagecache_consist.c test_mf_pagecache.o ../unittest/mytap/tap.o $(LIBRARIES) + $(LINK) $(FLAGS) -DMAIN -DPAGECACHE_DEBUG -DPAGE_SIZE=65536 -DEXTRA_DEBUG $(srcdir)/test_pagecache_consist.c test_mf_pagecache.o ../unittest/mytap/tap.o $(LDADD) $(LIBS) + +test_pagecache_consist1kHC$(EXEEXT): test_pagecache_consist.c test_mf_pagecache.o ../unittest/mytap/tap.o $(LIBRARIES) + $(LINK) $(FLAGS) -DMAIN -DPAGECACHE_DEBUG -DTEST_HIGH_CONCURENCY -DPAGE_SIZE=1024 -DEXTRA_DEBUG $(srcdir)/test_pagecache_consist.c test_mf_pagecache.o ../unittest/mytap/tap.o $(LDADD) $(LIBS) + +test_pagecache_consist64kHC$(EXEEXT): test_pagecache_consist.c test_mf_pagecache.o ../unittest/mytap/tap.o $(LIBRARIES) + $(LINK) $(FLAGS) -DMAIN -DPAGECACHE_DEBUG -DTEST_HIGH_CONCURENCY -DPAGE_SIZE=65536 -DEXTRA_DEBUG $(srcdir)/test_pagecache_consist.c test_mf_pagecache.o ../unittest/mytap/tap.o $(LDADD) $(LIBS) + +test_pagecache_consist1kRD$(EXEEXT): test_pagecache_consist.c test_mf_pagecache.o ../unittest/mytap/tap.o $(LIBRARIES) + $(LINK) $(FLAGS) -DMAIN -DPAGECACHE_DEBUG -DTEST_READERS -DPAGE_SIZE=1024 -DEXTRA_DEBUG $(srcdir)/test_pagecache_consist.c test_mf_pagecache.o ../unittest/mytap/tap.o $(LDADD) $(LIBS) + +test_pagecache_consist64kRD$(EXEEXT): test_pagecache_consist.c test_mf_pagecache.o ../unittest/mytap/tap.o $(LIBRARIES) + $(LINK) $(FLAGS) -DMAIN -DPAGECACHE_DEBUG -DTEST_READERS -DPAGE_SIZE=65536 -DEXTRA_DEBUG $(srcdir)/test_pagecache_consist.c test_mf_pagecache.o ../unittest/mytap/tap.o $(LDADD) $(LIBS) + +test_pagecache_consist1kWR$(EXEEXT): test_pagecache_consist.c test_mf_pagecache.o ../unittest/mytap/tap.o $(LIBRARIES) + $(LINK) $(FLAGS) -DMAIN -DPAGECACHE_DEBUG -DTEST_WRITERS -DPAGE_SIZE=1024 -DEXTRA_DEBUG $(srcdir)/test_pagecache_consist.c test_mf_pagecache.o ../unittest/mytap/tap.o $(LDADD) $(LIBS) + +test_pagecache_consist64kWR$(EXEEXT): test_pagecache_consist.c test_mf_pagecache.o ../unittest/mytap/tap.o $(LIBRARIES) + $(LINK) $(FLAGS) -DMAIN -DPAGECACHE_DEBUG -DTEST_WRITERS -DPAGE_SIZE=65536 -DEXTRA_DEBUG $(srcdir)/test_pagecache_consist.c test_mf_pagecache.o ../unittest/mytap/tap.o $(LDADD) $(LIBS) + +test_pagecache_consist: test_pagecache_consist1k$(EXEEXT) test_pagecache_consist64k$(EXEEXT) test_pagecache_consist1kHC$(EXEEXT) test_pagecache_consist64kHC$(EXEEXT) test_pagecache_consist1kRD$(EXEEXT) test_pagecache_consist64kRD$(EXEEXT) test_pagecache_consist1kWR$(EXEEXT) test_pagecache_consist64kWR$(EXEEXT) + ./test_pagecache_consist1k$(EXEEXT) + ./test_pagecache_consist64k$(EXEEXT) + ./test_pagecache_consist1kHC$(EXEEXT) + ./test_pagecache_consist64kHC$(EXEEXT) + ./test_pagecache_consist1kRD$(EXEEXT) + ./test_pagecache_consist64kRD$(EXEEXT) + ./test_pagecache_consist1kWR$(EXEEXT) + ./test_pagecache_consist64kWR$(EXEEXT) + + # Don't update the files from bitkeeper %::SCCS/s.% diff --git a/mysys/mf_pagecache.c b/mysys/mf_pagecache.c new file mode 100755 index 00000000000..4693995f922 --- /dev/null +++ b/mysys/mf_pagecache.c @@ -0,0 +1,3920 @@ +/* Copyright (C) 2000 MySQL AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +/* + These functions handle page cacheing for Maria tables. + + One cache can handle many files. + It must contain buffers of the same blocksize. + init_pagecache() should be used to init cache handler. + + The free list (free_block_list) is a stack like structure. + When a block is freed by free_block(), it is pushed onto the stack. + When a new block is required it is first tried to pop one from the stack. + If the stack is empty, it is tried to get a never-used block from the pool. + If this is empty too, then a block is taken from the LRU ring, flushing it + to disk, if neccessary. This is handled in find_key_block(). + With the new free list, the blocks can have three temperatures: + hot, warm and cold (which is free). This is remembered in the block header + by the enum BLOCK_TEMPERATURE temperature variable. Remembering the + temperature is neccessary to correctly count the number of warm blocks, + which is required to decide when blocks are allowed to become hot. Whenever + a block is inserted to another (sub-)chain, we take the old and new + temperature into account to decide if we got one more or less warm block. + blocks_unused is the sum of never used blocks in the pool and of currently + free blocks. blocks_used is the number of blocks fetched from the pool and + as such gives the maximum number of in-use blocks at any time. +*/ + +#include "mysys_priv.h" +#include +#include "my_static.h" +#include +#include +#include + +/* + Some compilation flags have been added specifically for this module + to control the following: + - not to let a thread to yield the control when reading directly + from page cache, which might improve performance in many cases; + to enable this add: + #define SERIALIZED_READ_FROM_CACHE + - to set an upper bound for number of threads simultaneously + using the page cache; this setting helps to determine an optimal + size for hash table and improve performance when the number of + blocks in the page cache much less than the number of threads + accessing it; + to set this number equal to add + #define MAX_THREADS + - to substitute calls of pthread_cond_wait for calls of + pthread_cond_timedwait (wait with timeout set up); + this setting should be used only when you want to trap a deadlock + situation, which theoretically should not happen; + to set timeout equal to seconds add + #define PAGECACHE_TIMEOUT + - to enable the module traps and to send debug information from + page cache module to a special debug log add: + #define PAGECACHE_DEBUG + the name of this debug log file can be set through: + #define PAGECACHE_DEBUG_LOG + if the name is not defined, it's set by default; + if the PAGECACHE_DEBUG flag is not set up and we are in a debug + mode, i.e. when ! defined(DBUG_OFF), the debug information from the + module is sent to the regular debug log. + + Example of the settings: + #define SERIALIZED_READ_FROM_CACHE + #define MAX_THREADS 100 + #define PAGECACHE_TIMEOUT 1 + #define PAGECACHE_DEBUG + #define PAGECACHE_DEBUG_LOG "my_pagecache_debug.log" +*/ + +#define PAGECACHE_DEBUG_LOG "my_pagecache_debug.log" + +/* + In key cache we have external raw locking here we use + SERIALIZED_READ_FROM_CACHE to avoid problem of reading + not consistent data from te page +*/ +#define SERIALIZED_READ_FROM_CACHE yes + +#define BLOCK_INFO(B) DBUG_PRINT("info", \ + ("block 0x%lx, file %lu, page %lu, s %0x", \ + (ulong)(B), \ + (ulong)((B)->hash_link ? \ + (B)->hash_link->file.file : \ + 0), \ + (ulong)((B)->hash_link ? \ + (B)->hash_link->pageno : \ + 0), \ + (B)->status)) + +/* TODO: put it to my_static.c */ +my_bool my_disable_flush_pagecache_blocks= 0; + +#if defined(MSDOS) && !defined(M_IC80386) +/* we nead much memory */ +#undef my_malloc_lock +#undef my_free_lock +#define my_malloc_lock(A,B) halloc((long) (A/IO_SIZE),IO_SIZE) +#define my_free_lock(A,B) hfree(A) +#endif /* defined(MSDOS) && !defined(M_IC80386) */ + +#define STRUCT_PTR(TYPE, MEMBER, a) \ + (TYPE *) ((char *) (a) - offsetof(TYPE, MEMBER)) + +/* types of condition variables */ +#define COND_FOR_REQUESTED 0 +#define COND_FOR_SAVED 1 +#define COND_FOR_WRLOCK 2 +#define COND_FOR_COPY 3 +#define COND_SIZE 4 + +typedef pthread_cond_t KEYCACHE_CONDVAR; + +/* descriptor of the page in the page cache block buffer */ +struct st_pagecache_page +{ + PAGECACHE_FILE file; /* file to which the page belongs to */ + maria_page_no_t pageno; /* number of the page in the file */ +}; + +/* element in the chain of a hash table bucket */ +struct st_pagecache_hash_link +{ + struct st_pagecache_hash_link + *next, **prev; /* to connect links in the same bucket */ + struct st_pagecache_block_link + *block; /* reference to the block for the page: */ + PAGECACHE_FILE file; /* from such a file */ + maria_page_no_t pageno; /* this page */ + uint requests; /* number of requests for the page */ +}; + +/* simple states of a block */ +#define BLOCK_ERROR 1 /* an error occured when performing disk i/o */ +#define BLOCK_READ 2 /* the is page in the block buffer */ +#define BLOCK_IN_SWITCH 4 /* block is preparing to read new page */ +#define BLOCK_REASSIGNED 8 /* block does not accept requests for old page */ +#define BLOCK_IN_FLUSH 16 /* block is in flush operation */ +#define BLOCK_CHANGED 32 /* block buffer contains a dirty page */ +#define BLOCK_WRLOCK 64 /* write locked block */ +#define BLOCK_CPYWRT 128 /* block buffer is in copy&write (see also cpyrd) */ + +/* page status, returned by find_key_block */ +#define PAGE_READ 0 +#define PAGE_TO_BE_READ 1 +#define PAGE_WAIT_TO_BE_READ 2 + +/* block temperature determines in which (sub-)chain the block currently is */ +enum BLOCK_TEMPERATURE { BLOCK_COLD /*free*/ , BLOCK_WARM , BLOCK_HOT }; + +/* debug info */ +#ifndef DBUG_OFF +static char *page_cache_page_type_str[]= +{ + (char*)"PLAIN", + (char*)"LSN" +}; +static char *page_cache_page_write_mode_str[]= +{ + (char*)"DELAY", + (char*)"NOW", + (char*)"DONE" +}; +static char *page_cache_page_lock_str[]= +{ + (char*)"free -> free ", + (char*)"read -> read ", + (char*)"write -> write", + (char*)"free -> read ", + (char*)"free -> write", + (char*)"read -> free ", + (char*)"write -> free ", + (char*)"write -> read " +}; +static char *page_cache_page_pin_str[]= +{ + (char*)"pinned -> pinned ", + (char*)"unpinned -> unpinned", + (char*)"unpinned -> pinned ", + (char*)"pinned -> unpinned" +}; +#endif +#ifdef PAGECACHE_DEBUG +typedef struct st_pagecache_pin_info +{ + struct st_pagecache_pin_info *next, **prev; + struct st_my_thread_var *thread; +} PAGECACHE_PIN_INFO; +/* + st_pagecache_lock_info structure should be kept in next, prev, thread part + compatible with st_pagecache_pin_info to be compatible in functions. +*/ +typedef struct st_pagecache_lock_info +{ + struct st_pagecache_lock_info *next, **prev; + struct st_my_thread_var *thread; + my_bool write_lock; +} PAGECACHE_LOCK_INFO; +/* service functions */ +void info_link(PAGECACHE_PIN_INFO **list, PAGECACHE_PIN_INFO *node) +{ + if ((node->next= *list)) + node->next->prev= &(node->next); + *list= node; + node->prev= list; +} +void info_unlink(PAGECACHE_PIN_INFO *node) +{ + if ((*node->prev= node->next)) + node->next->prev= node->prev; +} +PAGECACHE_PIN_INFO *info_find(PAGECACHE_PIN_INFO *list, + struct st_my_thread_var *thread) +{ + register PAGECACHE_PIN_INFO *i= list; + for(; i != 0; i= i->next) + if (i->thread == thread) + return i; + return 0; +} +#endif + +/* page cache block */ +struct st_pagecache_block_link +{ + struct st_pagecache_block_link + *next_used, **prev_used; /* to connect links in the LRU chain (ring) */ + struct st_pagecache_block_link + *next_changed, **prev_changed; /* for lists of file dirty/clean blocks */ + struct st_pagecache_hash_link + *hash_link; /* backward ptr to referring hash_link */ + PAGECACHE_WQUEUE + wqueue[COND_SIZE]; /* queues on waiting requests for new/old pages */ + uint requests; /* number of requests for the block */ + byte *buffer; /* buffer for the block page */ + volatile uint status; /* state of the block */ + volatile uint pins; /* pin counter */ +#ifdef PAGECACHE_DEBUG + PAGECACHE_PIN_INFO *pin_list; + PAGECACHE_LOCK_INFO *lock_list; +#endif + enum BLOCK_TEMPERATURE temperature; /* block temperature: cold, warm, hot */ + enum pagecache_page_type type; /* type of the block */ + uint hits_left; /* number of hits left until promotion */ + ulonglong last_hit_time; /* timestamp of the last hit */ + KEYCACHE_CONDVAR *condvar; /* condition variable for 'no readers' event */ +}; + +#ifdef PAGECACHE_DEBUG +/* debug checks */ +bool info_check_pin(PAGECACHE_BLOCK_LINK *block, + enum pagecache_page_pin mode) +{ + struct st_my_thread_var *thread= my_thread_var; + DBUG_ENTER("info_check_pin"); + PAGECACHE_PIN_INFO *info= info_find(block->pin_list, thread); + if (info) + { + if (mode == PAGECACHE_PIN_LEFT_UNPINNED) + { + DBUG_PRINT("info", + ("info_check_pin: thread: 0x%lx block 0x%lx: LEFT_UNPINNED!!!", + (ulong)thread, (ulong)block)); + DBUG_RETURN(1); + } + else if (mode == PAGECACHE_PIN) + { + DBUG_PRINT("info", + ("info_check_pin: thread: 0x%lx block 0x%lx: PIN!!!", + (ulong)thread, (ulong)block)); + DBUG_RETURN(1); + } + } + else + { + if (mode == PAGECACHE_PIN_LEFT_PINNED) + { + DBUG_PRINT("info", + ("info_check_pin: thread: 0x%lx block 0x%lx: LEFT_PINNED!!!", + (ulong)thread, (ulong)block)); + DBUG_RETURN(1); + } + else if (mode == PAGECACHE_UNPIN) + { + DBUG_PRINT("info", + ("info_check_pin: thread: 0x%lx block 0x%lx: UNPIN!!!", + (ulong)thread, (ulong)block)); + DBUG_RETURN(1); + } + } + DBUG_RETURN(0); +} +bool info_check_lock(PAGECACHE_BLOCK_LINK *block, + enum pagecache_page_lock lock, + enum pagecache_page_pin pin) +{ + struct st_my_thread_var *thread= my_thread_var; + DBUG_ENTER("info_check_lock"); + PAGECACHE_LOCK_INFO *info= + (PAGECACHE_LOCK_INFO *) info_find((PAGECACHE_PIN_INFO *) block->lock_list, + thread); + switch(lock) + { + case PAGECACHE_LOCK_LEFT_UNLOCKED: + DBUG_ASSERT(pin == PAGECACHE_PIN_LEFT_UNPINNED); + if (info) + { + DBUG_PRINT("info", + ("info_check_lock: thread: 0x%lx block 0x%lx: %c : U->U", + (ulong)thread, (ulong)block, (info->write_lock?'W':'R'))); + DBUG_RETURN(1); + } + break; + case PAGECACHE_LOCK_LEFT_READLOCKED: + DBUG_ASSERT(pin == PAGECACHE_PIN_LEFT_UNPINNED || + pin == PAGECACHE_PIN_LEFT_UNPINNED); + if (info == 0 || info->write_lock) + { + DBUG_PRINT("info", + ("info_check_lock: thread: 0x%lx block 0x%lx: %c : R->R", + (ulong)thread, (ulong)block, (info?'W':'U'))); + DBUG_RETURN(1); + } + break; + case PAGECACHE_LOCK_LEFT_WRITELOCKED: + DBUG_ASSERT(pin == PAGECACHE_PIN_LEFT_PINNED); + if (info == 0 || !info->write_lock) + { + DBUG_PRINT("info", + ("info_check_lock: thread: 0x%lx block 0x%lx: %c : W->W", + (ulong)thread, (ulong)block, (info?'R':'U'))); + DBUG_RETURN(1); + } + break; + case PAGECACHE_LOCK_READ: + DBUG_ASSERT(pin == PAGECACHE_PIN_LEFT_UNPINNED || + pin == PAGECACHE_PIN); + if (info != 0) + { + DBUG_PRINT("info", + ("info_check_lock: thread: 0x%lx block 0x%lx: %c : U->R", + (ulong)thread, (ulong)block, (info->write_lock?'W':'R'))); + DBUG_RETURN(1); + } + break; + case PAGECACHE_LOCK_WRITE: + DBUG_ASSERT(pin == PAGECACHE_PIN); + if (info != 0) + { + DBUG_PRINT("info", + ("info_check_lock: thread: 0x%lx block 0x%lx: %c : U->W", + (ulong)thread, (ulong)block, (info->write_lock?'W':'R'))); + DBUG_RETURN(1); + } + break; + + case PAGECACHE_LOCK_READ_UNLOCK: + DBUG_ASSERT(pin == PAGECACHE_PIN_LEFT_UNPINNED || + pin == PAGECACHE_UNPIN); + if (info == 0 || info->write_lock) + { + DBUG_PRINT("info", + ("info_check_lock: thread: 0x%lx block 0x%lx: %c : R->U", + (ulong)thread, (ulong)block, (info?'W':'U'))); + DBUG_RETURN(1); + } + break; + case PAGECACHE_LOCK_WRITE_UNLOCK: + DBUG_ASSERT(pin == PAGECACHE_UNPIN); + if (info == 0 || !info->write_lock) + { + DBUG_PRINT("info", + ("info_check_lock: thread: 0x%lx block 0x%lx: %c : W->U", + (ulong)thread, (ulong)block, (info?'R':'U'))); + DBUG_RETURN(1); + } + break; + case PAGECACHE_LOCK_WRITE_TO_READ: + DBUG_ASSERT(pin == PAGECACHE_PIN_LEFT_PINNED || + pin == PAGECACHE_UNPIN); + if (info == 0 || !info->write_lock) + { + DBUG_PRINT("info", + ("info_check_lock: thread: 0x%lx block 0x%lx: %c : W->U", + (ulong)thread, (ulong)block, (info?'R':'U'))); + DBUG_RETURN(1); + } + break; + } + DBUG_RETURN(0); +} +#endif + +#define FLUSH_CACHE 2000 /* sort this many blocks at once */ + +static int flush_all_key_blocks(PAGECACHE *pagecache); +#ifdef THREAD +static void link_into_queue(PAGECACHE_WQUEUE *wqueue, + struct st_my_thread_var *thread); +static void unlink_from_queue(PAGECACHE_WQUEUE *wqueue, + struct st_my_thread_var *thread); +#endif +static void free_block(PAGECACHE *pagecache, PAGECACHE_BLOCK_LINK *block); +static void test_key_cache(PAGECACHE *pagecache, + const char *where, my_bool lock); + +#define PAGECACHE_HASH(p, f, pos) (((ulong) (pos) + \ + (ulong) (f).file) & (p->hash_entries-1)) +#define FILE_HASH(f) ((uint) (f).file & (PAGECACHE_CHANGED_BLOCKS_HASH - 1)) + +#define DEFAULT_PAGECACHE_DEBUG_LOG "pagecache_debug.log" + +#if defined(PAGECACHE_DEBUG) && ! defined(PAGECACHE_DEBUG_LOG) +#define PAGECACHE_DEBUG_LOG DEFAULT_PAGECACHE_DEBUG_LOG +#endif + +#if defined(PAGECACHE_DEBUG_LOG) +static FILE *pagecache_debug_log= NULL; +static void pagecache_debug_print _VARARGS((const char *fmt, ...)); +#define PAGECACHE_DEBUG_OPEN \ + if (!pagecache_debug_log) \ + { \ + pagecache_debug_log= fopen(PAGECACHE_DEBUG_LOG, "w"); \ + (void) setvbuf(pagecache_debug_log, NULL, _IOLBF, BUFSIZ); \ + } + +#define PAGECACHE_DEBUG_CLOSE \ + if (pagecache_debug_log) \ + { \ + fclose(pagecache_debug_log); \ + pagecache_debug_log= 0; \ + } +#else +#define PAGECACHE_DEBUG_OPEN +#define PAGECACHE_DEBUG_CLOSE +#endif /* defined(PAGECACHE_DEBUG_LOG) */ + +#if defined(PAGECACHE_DEBUG_LOG) && defined(PAGECACHE_DEBUG) +#define KEYCACHE_DBUG_PRINT(l, m) \ + { if (pagecache_debug_log) \ + fprintf(pagecache_debug_log, "%s: ", l); \ + pagecache_debug_print m; } + +#define KEYCACHE_DBUG_ASSERT(a) \ + { if (! (a) && pagecache_debug_log) \ + fclose(pagecache_debug_log); \ + assert(a); } +#else +#define KEYCACHE_DBUG_PRINT(l, m) DBUG_PRINT(l, m) +#define KEYCACHE_DBUG_ASSERT(a) DBUG_ASSERT(a) +#endif /* defined(PAGECACHE_DEBUG_LOG) && defined(PAGECACHE_DEBUG) */ + +#if defined(PAGECACHE_DEBUG) || !defined(DBUG_OFF) +#ifdef THREAD +static long pagecache_thread_id; +#define KEYCACHE_THREAD_TRACE(l) \ + KEYCACHE_DBUG_PRINT(l,("|thread %ld",pagecache_thread_id)) + +#define KEYCACHE_THREAD_TRACE_BEGIN(l) \ + { struct st_my_thread_var *thread_var= my_thread_var; \ + pagecache_thread_id= thread_var->id; \ + KEYCACHE_DBUG_PRINT(l,("[thread %ld",pagecache_thread_id)) } + +#define KEYCACHE_THREAD_TRACE_END(l) \ + KEYCACHE_DBUG_PRINT(l,("]thread %ld",pagecache_thread_id)) +#else /* THREAD */ +#define KEYCACHE_THREAD_TRACE(l) KEYCACHE_DBUG_PRINT(l,("")) +#define KEYCACHE_THREAD_TRACE_BEGIN(l) KEYCACHE_DBUG_PRINT(l,("")) +#define KEYCACHE_THREAD_TRACE_END(l) KEYCACHE_DBUG_PRINT(l,("")) +#endif /* THREAD */ +#else +#define KEYCACHE_THREAD_TRACE_BEGIN(l) +#define KEYCACHE_THREAD_TRACE_END(l) +#define KEYCACHE_THREAD_TRACE(l) +#endif /* defined(PAGECACHE_DEBUG) || !defined(DBUG_OFF) */ + +#define BLOCK_NUMBER(p, b) \ + ((uint) (((char*)(b)-(char *) p->block_root)/sizeof(PAGECACHE_BLOCK_LINK))) +#define PAGECACHE_HASH_LINK_NUMBER(p, h) \ + ((uint) (((char*)(h)-(char *) p->hash_link_root)/ \ + sizeof(PAGECACHE_HASH_LINK))) + +#if (defined(PAGECACHE_TIMEOUT) && !defined(__WIN__)) || defined(PAGECACHE_DEBUG) +static int pagecache_pthread_cond_wait(pthread_cond_t *cond, + pthread_mutex_t *mutex); +#else +#define pagecache_pthread_cond_wait pthread_cond_wait +#endif + +#if defined(PAGECACHE_DEBUG) +static int ___pagecache_pthread_mutex_lock(pthread_mutex_t *mutex); +static void ___pagecache_pthread_mutex_unlock(pthread_mutex_t *mutex); +static int ___pagecache_pthread_cond_signal(pthread_cond_t *cond); +#define pagecache_pthread_mutex_lock(M) \ +{ DBUG_PRINT("lock", ("mutex lock 0x%lx %u", (ulong)(M), __LINE__)); \ + ___pagecache_pthread_mutex_lock(M);} +#define pagecache_pthread_mutex_unlock(M) \ +{ DBUG_PRINT("lock", ("mutex unlock 0x%lx %u", (ulong)(M), __LINE__)); \ + ___pagecache_pthread_mutex_unlock(M);} +#define pagecache_pthread_cond_signal(M) \ +{ DBUG_PRINT("lock", ("signal 0x%lx %u", (ulong)(M), __LINE__)); \ + ___pagecache_pthread_cond_signal(M);} +#else +#define pagecache_pthread_mutex_lock pthread_mutex_lock +#define pagecache_pthread_mutex_unlock pthread_mutex_unlock +#define pagecache_pthread_cond_signal pthread_cond_signal +#endif /* defined(PAGECACHE_DEBUG) */ + + +/* + Read page from the disk + + SYNOPSIS + pagecache_fwrite() + pagecache - page cache pointer + filedesc - pagecache file descriptor structure + buffer - buffer in which we will read + type - page type (plain or with LSN) + flags - MYF() flags + + RETURN + 0 - OK + !=0 - Error +*/ +uint pagecache_fwrite(PAGECACHE *pagecache, + PAGECACHE_FILE *filedesc, + byte *buffer, + maria_page_no_t pageno, + enum pagecache_page_type type, + myf flags) +{ + DBUG_ENTER("pagecache_fwrite"); + if (type == PAGECACHE_LSN_PAGE) + { + DBUG_PRINT("info", ("Log handler call")); + /* TODO: put here loghandler call */ + } + DBUG_RETURN(my_pwrite(filedesc->file, buffer, pagecache->block_size, + (pageno)<<(pagecache->shift), flags)); +} + + +/* + Read page from the disk + + SYNOPSIS + pagecache_fread() + pagecache - page cache pointer + filedesc - pagecache file descriptor structure + buffer - buffer in which we will read + pageno - page number + flags - MYF() flags +*/ +#define pagecache_fread(pagecache, filedesc, buffer, pageno, flags) \ + my_pread((filedesc)->file, buffer, pagecache->block_size, \ + (pageno)<<(pagecache->shift), flags) + + +static uint next_power(uint value) +{ + uint old_value= 1; + while (value) + { + old_value= value; + value&= value-1; + } + return (old_value << 1); +} + + +/* + Initialize a page cache + + SYNOPSIS + init_pagecache() + pagecache pointer to a page cache data structure + key_cache_block_size size of blocks to keep cached data + use_mem total memory to use for the key cache + division_limit division limit (may be zero) + age_threshold age threshold (may be zero) + block_size size of block (should be power of 2) + loghandler logfandler pointer to call it in case of + pages with LSN + + RETURN VALUE + number of blocks in the key cache, if successful, + 0 - otherwise. + + NOTES. + if pagecache->inited != 0 we assume that the key cache + is already initialized. This is for now used by myisamchk, but shouldn't + be something that a program should rely on! + + It's assumed that no two threads call this function simultaneously + referring to the same key cache handle. + +*/ + +int init_pagecache(PAGECACHE *pagecache, my_size_t use_mem, + uint division_limit, uint age_threshold, + uint block_size, + LOG_HANDLER *loghandler) +{ + int blocks, hash_links, length; + int error; + DBUG_ENTER("init_key_cache"); + DBUG_ASSERT(block_size >= 512); + + PAGECACHE_DEBUG_OPEN; + if (pagecache->inited && pagecache->disk_blocks > 0) + { + DBUG_PRINT("warning",("key cache already in use")); + DBUG_RETURN(0); + } + + pagecache->loghandler= loghandler; + + pagecache->global_cache_w_requests= pagecache->global_cache_r_requests= 0; + pagecache->global_cache_read= pagecache->global_cache_write= 0; + pagecache->disk_blocks= -1; + if (! pagecache->inited) + { + pagecache->inited= 1; + pagecache->in_init= 0; + pthread_mutex_init(&pagecache->cache_lock, MY_MUTEX_INIT_FAST); + pagecache->resize_queue.last_thread= NULL; + } + + pagecache->mem_size= use_mem; + pagecache->block_size= block_size; + pagecache->shift= my_bit_log2(block_size); + DBUG_PRINT("info", ("block_size: %u", + block_size)); + + blocks= (int) (use_mem / (sizeof(PAGECACHE_BLOCK_LINK) + + 2 * sizeof(PAGECACHE_HASH_LINK) + + sizeof(PAGECACHE_HASH_LINK*) * + 5/4 + block_size)); + /* It doesn't make sense to have too few blocks (less than 8) */ + if (blocks >= 8 && pagecache->disk_blocks < 0) + { + for ( ; ; ) + { + /* Set my_hash_entries to the next bigger 2 power */ + if ((pagecache->hash_entries= next_power((uint)blocks)) < + ((uint)blocks) * 5/4) + pagecache->hash_entries<<= 1; + hash_links= 2 * blocks; +#if defined(MAX_THREADS) + if (hash_links < MAX_THREADS + blocks - 1) + hash_links= MAX_THREADS + blocks - 1; +#endif + while ((length= (ALIGN_SIZE(blocks * sizeof(PAGECACHE_BLOCK_LINK)) + + ALIGN_SIZE(hash_links * sizeof(PAGECACHE_HASH_LINK)) + + ALIGN_SIZE(sizeof(PAGECACHE_HASH_LINK*) * + pagecache->hash_entries))) + + ((ulong) blocks << pagecache->shift) > use_mem) + blocks--; + /* Allocate memory for cache page buffers */ + if ((pagecache->block_mem= + my_large_malloc((ulong) blocks * pagecache->block_size, + MYF(MY_WME)))) + { + /* + Allocate memory for blocks, hash_links and hash entries; + For each block 2 hash links are allocated + */ + if ((pagecache->block_root= + (PAGECACHE_BLOCK_LINK*) my_malloc((uint) length, + MYF(0)))) + break; + my_large_free(pagecache->block_mem, MYF(0)); + pagecache->block_mem= 0; + } + if (blocks < 8) + { + my_errno= ENOMEM; + goto err; + } + blocks= blocks / 4*3; + } + pagecache->blocks_unused= (ulong) blocks; + pagecache->disk_blocks= (int) blocks; + pagecache->hash_links= hash_links; + pagecache->hash_root= + (PAGECACHE_HASH_LINK**) ((char*) pagecache->block_root + + ALIGN_SIZE(blocks*sizeof(PAGECACHE_BLOCK_LINK))); + pagecache->hash_link_root= + (PAGECACHE_HASH_LINK*) ((char*) pagecache->hash_root + + ALIGN_SIZE((sizeof(PAGECACHE_HASH_LINK*) * + pagecache->hash_entries))); + bzero((byte*) pagecache->block_root, + pagecache->disk_blocks * sizeof(PAGECACHE_BLOCK_LINK)); + bzero((byte*) pagecache->hash_root, + pagecache->hash_entries * sizeof(PAGECACHE_HASH_LINK*)); + bzero((byte*) pagecache->hash_link_root, + pagecache->hash_links * sizeof(PAGECACHE_HASH_LINK)); + pagecache->hash_links_used= 0; + pagecache->free_hash_list= NULL; + pagecache->blocks_used= pagecache->blocks_changed= 0; + + pagecache->global_blocks_changed= 0; + pagecache->blocks_available=0; /* For debugging */ + + /* The LRU chain is empty after initialization */ + pagecache->used_last= NULL; + pagecache->used_ins= NULL; + pagecache->free_block_list= NULL; + pagecache->time= 0; + pagecache->warm_blocks= 0; + pagecache->min_warm_blocks= (division_limit ? + blocks * division_limit / 100 + 1 : + (ulong)blocks); + pagecache->age_threshold= (age_threshold ? + blocks * age_threshold / 100 : + (ulong)blocks); + + pagecache->cnt_for_resize_op= 0; + pagecache->resize_in_flush= 0; + pagecache->can_be_used= 1; + + pagecache->waiting_for_hash_link.last_thread= NULL; + pagecache->waiting_for_block.last_thread= NULL; + DBUG_PRINT("exit", + ("disk_blocks: %d block_root: 0x%lx hash_entries: %d\ + hash_root: 0x%lx hash_links: %d hash_link_root: 0x%lx", + pagecache->disk_blocks, pagecache->block_root, + pagecache->hash_entries, pagecache->hash_root, + pagecache->hash_links, pagecache->hash_link_root)); + bzero((gptr) pagecache->changed_blocks, + sizeof(pagecache->changed_blocks[0]) * + PAGECACHE_CHANGED_BLOCKS_HASH); + bzero((gptr) pagecache->file_blocks, + sizeof(pagecache->file_blocks[0]) * + PAGECACHE_CHANGED_BLOCKS_HASH); + } + + pagecache->blocks= pagecache->disk_blocks > 0 ? pagecache->disk_blocks : 0; + DBUG_RETURN((uint) pagecache->disk_blocks); + +err: + error= my_errno; + pagecache->disk_blocks= 0; + pagecache->blocks= 0; + if (pagecache->block_mem) + { + my_large_free((gptr) pagecache->block_mem, MYF(0)); + pagecache->block_mem= NULL; + } + if (pagecache->block_root) + { + my_free((gptr) pagecache->block_root, MYF(0)); + pagecache->block_root= NULL; + } + my_errno= error; + pagecache->can_be_used= 0; + DBUG_RETURN(0); +} + + +/* + Resize a key cache + + SYNOPSIS + resize_pagecache() + pagecache pointer to a page cache data structure + use_mem total memory to use for the new key cache + division_limit new division limit (if not zero) + age_threshold new age threshold (if not zero) + + RETURN VALUE + number of blocks in the key cache, if successful, + 0 - otherwise. + + NOTES. + The function first compares the memory size parameter + with the key cache value. + + If they differ the function free the the memory allocated for the + old key cache blocks by calling the end_pagecache function and + then rebuilds the key cache with new blocks by calling + init_key_cache. + + The function starts the operation only when all other threads + performing operations with the key cache let her to proceed + (when cnt_for_resize=0). +*/ + +int resize_pagecache(PAGECACHE *pagecache, + my_size_t use_mem, uint division_limit, + uint age_threshold) +{ + int blocks; + struct st_my_thread_var *thread; + PAGECACHE_WQUEUE *wqueue; + DBUG_ENTER("resize_pagecache"); + + if (!pagecache->inited) + DBUG_RETURN(pagecache->disk_blocks); + + if(use_mem == pagecache->mem_size) + { + change_pagecache_param(pagecache, division_limit, age_threshold); + DBUG_RETURN(pagecache->disk_blocks); + } + + pagecache_pthread_mutex_lock(&pagecache->cache_lock); + +#ifdef THREAD + wqueue= &pagecache->resize_queue; + thread= my_thread_var; + link_into_queue(wqueue, thread); + + while (wqueue->last_thread->next != thread) + { + pagecache_pthread_cond_wait(&thread->suspend, &pagecache->cache_lock); + } +#endif + + pagecache->resize_in_flush= 1; + if (flush_all_key_blocks(pagecache)) + { + /* TODO: if this happens, we should write a warning in the log file ! */ + pagecache->resize_in_flush= 0; + blocks= 0; + pagecache->can_be_used= 0; + goto finish; + } + pagecache->resize_in_flush= 0; + pagecache->can_be_used= 0; +#ifdef THREAD + while (pagecache->cnt_for_resize_op) + { + KEYCACHE_DBUG_PRINT("resize_pagecache: wait", + ("suspend thread %ld", thread->id)); + pagecache_pthread_cond_wait(&thread->suspend, &pagecache->cache_lock); + } +#else + KEYCACHE_DBUG_ASSERT(pagecache->cnt_for_resize_op == 0); +#endif + + end_pagecache(pagecache, 0); /* Don't free mutex */ + /* The following will work even if use_mem is 0 */ + blocks= init_pagecache(pagecache, pagecache->block_size, use_mem, + division_limit, age_threshold, + pagecache->loghandler); + +finish: +#ifdef THREAD + unlink_from_queue(wqueue, thread); + /* Signal for the next resize request to proceeed if any */ + if (wqueue->last_thread) + { + KEYCACHE_DBUG_PRINT("resize_pagecache: signal", + ("thread %ld", wqueue->last_thread->next->id)); + pagecache_pthread_cond_signal(&wqueue->last_thread->next->suspend); + } +#endif + pagecache_pthread_mutex_unlock(&pagecache->cache_lock); + DBUG_RETURN(blocks); +} + + +/* + Increment counter blocking resize key cache operation +*/ +static inline void inc_counter_for_resize_op(PAGECACHE *pagecache) +{ + pagecache->cnt_for_resize_op++; +} + + +/* + Decrement counter blocking resize key cache operation; + Signal the operation to proceed when counter becomes equal zero +*/ +static inline void dec_counter_for_resize_op(PAGECACHE *pagecache) +{ +#ifdef THREAD + struct st_my_thread_var *last_thread; + if (!--pagecache->cnt_for_resize_op && + (last_thread= pagecache->resize_queue.last_thread)) + { + KEYCACHE_DBUG_PRINT("dec_counter_for_resize_op: signal", + ("thread %ld", last_thread->next->id)); + pagecache_pthread_cond_signal(&last_thread->next->suspend); + } +#else + pagecache->cnt_for_resize_op--; +#endif +} + +/* + Change the page cache parameters + + SYNOPSIS + change_pagecache_param() + pagecache pointer to a page cache data structure + division_limit new division limit (if not zero) + age_threshold new age threshold (if not zero) + + RETURN VALUE + none + + NOTES. + Presently the function resets the key cache parameters + concerning midpoint insertion strategy - division_limit and + age_threshold. +*/ + +void change_pagecache_param(PAGECACHE *pagecache, uint division_limit, + uint age_threshold) +{ + DBUG_ENTER("change_pagecache_param"); + + pagecache_pthread_mutex_lock(&pagecache->cache_lock); + if (division_limit) + pagecache->min_warm_blocks= (pagecache->disk_blocks * + division_limit / 100 + 1); + if (age_threshold) + pagecache->age_threshold= (pagecache->disk_blocks * + age_threshold / 100); + pagecache_pthread_mutex_unlock(&pagecache->cache_lock); + DBUG_VOID_RETURN; +} + + +/* + Remove page cache from memory + + SYNOPSIS + end_pagecache() + pagecache page cache handle + cleanup Complete free (Free also mutex for key cache) + + RETURN VALUE + none +*/ + +void end_pagecache(PAGECACHE *pagecache, my_bool cleanup) +{ + DBUG_ENTER("end_pagecache"); + DBUG_PRINT("enter", ("key_cache: 0x%lx", pagecache)); + + if (!pagecache->inited) + DBUG_VOID_RETURN; + + if (pagecache->disk_blocks > 0) + { + if (pagecache->block_mem) + { + my_large_free((gptr) pagecache->block_mem, MYF(0)); + pagecache->block_mem= NULL; + my_free((gptr) pagecache->block_root, MYF(0)); + pagecache->block_root= NULL; + } + pagecache->disk_blocks= -1; + /* Reset blocks_changed to be safe if flush_all_key_blocks is called */ + pagecache->blocks_changed= 0; + } + + DBUG_PRINT("status", ("used: %d changed: %d w_requests: %lu " + "writes: %lu r_requests: %lu reads: %lu", + pagecache->blocks_used, pagecache->global_blocks_changed, + (ulong) pagecache->global_cache_w_requests, + (ulong) pagecache->global_cache_write, + (ulong) pagecache->global_cache_r_requests, + (ulong) pagecache->global_cache_read)); + + if (cleanup) + { + pthread_mutex_destroy(&pagecache->cache_lock); + pagecache->inited= pagecache->can_be_used= 0; + PAGECACHE_DEBUG_CLOSE; + } + DBUG_VOID_RETURN; +} /* end_pagecache */ + + +#ifdef THREAD +/* + Link a thread into double-linked queue of waiting threads. + + SYNOPSIS + link_into_queue() + wqueue pointer to the queue structure + thread pointer to the thread to be added to the queue + + RETURN VALUE + none + + NOTES. + Queue is represented by a circular list of the thread structures + The list is double-linked of the type (**prev,*next), accessed by + a pointer to the last element. +*/ + +static void link_into_queue(PAGECACHE_WQUEUE *wqueue, + struct st_my_thread_var *thread) +{ + struct st_my_thread_var *last; + if (! (last= wqueue->last_thread)) + { + /* Queue is empty */ + thread->next= thread; + thread->prev= &thread->next; + } + else + { + thread->prev= last->next->prev; + last->next->prev= &thread->next; + thread->next= last->next; + last->next= thread; + } + wqueue->last_thread= thread; +} + +/* + Unlink a thread from double-linked queue of waiting threads + + SYNOPSIS + unlink_from_queue() + wqueue pointer to the queue structure + thread pointer to the thread to be removed from the queue + + RETURN VALUE + none + + NOTES. + See NOTES for link_into_queue +*/ + +static void unlink_from_queue(PAGECACHE_WQUEUE *wqueue, + struct st_my_thread_var *thread) +{ + KEYCACHE_DBUG_PRINT("unlink_from_queue", ("thread %ld", thread->id)); + if (thread->next == thread) + /* The queue contains only one member */ + wqueue->last_thread= NULL; + else + { + thread->next->prev= thread->prev; + *thread->prev=thread->next; + if (wqueue->last_thread == thread) + wqueue->last_thread= STRUCT_PTR(struct st_my_thread_var, next, + thread->prev); + } + thread->next= NULL; +} + + +/* + Add a thread to single-linked queue of waiting threads + + SYNOPSIS + add_to_queue() + wqueue pointer to the queue structure + thread pointer to the thread to be added to the queue + + RETURN VALUE + none + + NOTES. + Queue is represented by a circular list of the thread structures + The list is single-linked of the type (*next), accessed by a pointer + to the last element. +*/ + +static inline void add_to_queue(PAGECACHE_WQUEUE *wqueue, + struct st_my_thread_var *thread) +{ + struct st_my_thread_var *last; + if (! (last= wqueue->last_thread)) + thread->next= thread; + else + { + thread->next= last->next; + last->next= thread; + } + wqueue->last_thread= thread; +} + + +/* + Remove all threads from queue signaling them to proceed + + SYNOPSIS + realease_queue() + wqueue pointer to the queue structure + thread pointer to the thread to be added to the queue + + RETURN VALUE + none + + NOTES. + See notes for add_to_queue + When removed from the queue each thread is signaled via condition + variable thread->suspend. +*/ + +static void release_queue(PAGECACHE_WQUEUE *wqueue) +{ + struct st_my_thread_var *last= wqueue->last_thread; + struct st_my_thread_var *next= last->next; + struct st_my_thread_var *thread; + do + { + thread=next; + KEYCACHE_DBUG_PRINT("release_queue: signal", ("thread %ld", thread->id)); + pagecache_pthread_cond_signal(&thread->suspend); + next=thread->next; + thread->next= NULL; + } + while (thread != last); + wqueue->last_thread= NULL; +} +#endif + + +/* + Unlink a block from the chain of dirty/clean blocks +*/ + +static inline void unlink_changed(PAGECACHE_BLOCK_LINK *block) +{ + if (block->next_changed) + block->next_changed->prev_changed= block->prev_changed; + *block->prev_changed= block->next_changed; +} + + +/* + Link a block into the chain of dirty/clean blocks +*/ + +static inline void link_changed(PAGECACHE_BLOCK_LINK *block, + PAGECACHE_BLOCK_LINK **phead) +{ + block->prev_changed= phead; + if ((block->next_changed= *phead)) + (*phead)->prev_changed= &block->next_changed; + *phead= block; +} + + +/* + Unlink a block from the chain of dirty/clean blocks, if it's asked for, + and link it to the chain of clean blocks for the specified file +*/ + +static void link_to_file_list(PAGECACHE *pagecache, + PAGECACHE_BLOCK_LINK *block, + PAGECACHE_FILE *file, my_bool unlink) +{ + if (unlink) + unlink_changed(block); + link_changed(block, &pagecache->file_blocks[FILE_HASH(*file)]); + if (block->status & BLOCK_CHANGED) + { + block->status&= ~BLOCK_CHANGED; + pagecache->blocks_changed--; + pagecache->global_blocks_changed--; + } +} + + +/* + Unlink a block from the chain of clean blocks for the specified + file and link it to the chain of dirty blocks for this file +*/ + +static inline void link_to_changed_list(PAGECACHE *pagecache, + PAGECACHE_BLOCK_LINK *block) +{ + unlink_changed(block); + link_changed(block, + &pagecache->changed_blocks[FILE_HASH(block->hash_link->file)]); + block->status|=BLOCK_CHANGED; + pagecache->blocks_changed++; + pagecache->global_blocks_changed++; +} + + +/* + Link a block to the LRU chain at the beginning or at the end of + one of two parts. + + SYNOPSIS + link_block() + pagecache pointer to a page cache data structure + block pointer to the block to link to the LRU chain + hot <-> to link the block into the hot subchain + at_end <-> to link the block at the end of the subchain + + RETURN VALUE + none + + NOTES. + The LRU chain is represented by a curcular list of block structures. + The list is double-linked of the type (**prev,*next) type. + The LRU chain is divided into two parts - hot and warm. + There are two pointers to access the last blocks of these two + parts. The beginning of the warm part follows right after the + end of the hot part. + Only blocks of the warm part can be used for replacement. + The first block from the beginning of this subchain is always + taken for eviction (pagecache->last_used->next) + + LRU chain: +------+ H O T +------+ + +----| end |----...<----| beg |----+ + | +------+last +------+ | + v<-link in latest hot (new end) | + | link in latest warm (new end)->^ + | +------+ W A R M +------+ | + +----| beg |---->...----| end |----+ + +------+ +------+ins + first for eviction +*/ + +static void link_block(PAGECACHE *pagecache, PAGECACHE_BLOCK_LINK *block, + my_bool hot, my_bool at_end) +{ + PAGECACHE_BLOCK_LINK *ins; + PAGECACHE_BLOCK_LINK **pins; + + KEYCACHE_DBUG_ASSERT(! (block->hash_link && block->hash_link->requests)); +#ifdef THREAD + if (!hot && pagecache->waiting_for_block.last_thread) + { + /* Signal that in the LRU warm sub-chain an available block has appeared */ + struct st_my_thread_var *last_thread= + pagecache->waiting_for_block.last_thread; + struct st_my_thread_var *first_thread= last_thread->next; + struct st_my_thread_var *next_thread= first_thread; + PAGECACHE_HASH_LINK *hash_link= + (PAGECACHE_HASH_LINK *) first_thread->opt_info; + struct st_my_thread_var *thread; + do + { + thread= next_thread; + next_thread= thread->next; + /* + We notify about the event all threads that ask + for the same page as the first thread in the queue + */ + if ((PAGECACHE_HASH_LINK *) thread->opt_info == hash_link) + { + KEYCACHE_DBUG_PRINT("link_block: signal", ("thread %ld", thread->id)); + pagecache_pthread_cond_signal(&thread->suspend); + unlink_from_queue(&pagecache->waiting_for_block, thread); + block->requests++; + } + } + while (thread != last_thread); + hash_link->block= block; + KEYCACHE_THREAD_TRACE("link_block: after signaling"); +#if defined(PAGECACHE_DEBUG) + KEYCACHE_DBUG_PRINT("link_block", + ("linked,unlinked block %u status=%x #requests=%u #available=%u", + BLOCK_NUMBER(pagecache, block), block->status, + block->requests, pagecache->blocks_available)); +#endif + return; + } +#else /* THREAD */ + KEYCACHE_DBUG_ASSERT(! (!hot && pagecache->waiting_for_block.last_thread)); + /* Condition not transformed using DeMorgan, to keep the text identical */ +#endif /* THREAD */ + pins= hot ? &pagecache->used_ins : &pagecache->used_last; + ins= *pins; + if (ins) + { + ins->next_used->prev_used= &block->next_used; + block->next_used= ins->next_used; + block->prev_used= &ins->next_used; + ins->next_used= block; + if (at_end) + *pins= block; + } + else + { + /* The LRU chain is empty */ + pagecache->used_last= pagecache->used_ins= block->next_used= block; + block->prev_used= &block->next_used; + } + KEYCACHE_THREAD_TRACE("link_block"); +#if defined(PAGECACHE_DEBUG) + pagecache->blocks_available++; + KEYCACHE_DBUG_PRINT("link_block", + ("linked block %u:%1u status=%x #requests=%u #available=%u", + BLOCK_NUMBER(pagecache, block), at_end, block->status, + block->requests, pagecache->blocks_available)); + KEYCACHE_DBUG_ASSERT((ulong) pagecache->blocks_available <= + pagecache->blocks_used); +#endif +} + + +/* + Unlink a block from the LRU chain + + SYNOPSIS + unlink_block() + pagecache pointer to a page cache data structure + block pointer to the block to unlink from the LRU chain + + RETURN VALUE + none + + NOTES. + See NOTES for link_block +*/ + +static void unlink_block(PAGECACHE *pagecache, PAGECACHE_BLOCK_LINK *block) +{ + if (block->next_used == block) + /* The list contains only one member */ + pagecache->used_last= pagecache->used_ins= NULL; + else + { + block->next_used->prev_used= block->prev_used; + *block->prev_used= block->next_used; + if (pagecache->used_last == block) + pagecache->used_last= STRUCT_PTR(PAGECACHE_BLOCK_LINK, + next_used, block->prev_used); + if (pagecache->used_ins == block) + pagecache->used_ins= STRUCT_PTR(PAGECACHE_BLOCK_LINK, + next_used, block->prev_used); + } + block->next_used= NULL; + + KEYCACHE_THREAD_TRACE("unlink_block"); +#if defined(PAGECACHE_DEBUG) + pagecache->blocks_available--; + KEYCACHE_DBUG_PRINT("unlink_block", + ("unlinked block 0x%lx (%u) status=%x #requests=%u #available=%u", + (ulong)block, BLOCK_NUMBER(pagecache, block), block->status, + block->requests, pagecache->blocks_available)); + BLOCK_INFO(block); + KEYCACHE_DBUG_ASSERT(pagecache->blocks_available >= 0); +#endif +} + + +/* + Register requests for a block +*/ +static void reg_requests(PAGECACHE *pagecache, PAGECACHE_BLOCK_LINK *block, + int count) +{ + DBUG_ENTER("reg_requests"); + DBUG_PRINT("enter", ("block 0x%lx (%u) status=%x, reqs: %u", + (ulong)block, BLOCK_NUMBER(pagecache, block), + block->status, block->requests)); + BLOCK_INFO(block); + if (! block->requests) + /* First request for the block unlinks it */ + unlink_block(pagecache, block); + block->requests+=count; + DBUG_VOID_RETURN; +} + + +/* + Unregister request for a block + linking it to the LRU chain if it's the last request + + SYNOPSIS + unreg_request() + pagecache pointer to a page cache data structure + block pointer to the block to link to the LRU chain + at_end <-> to link the block at the end of the LRU chain + + RETURN VALUE + none + + NOTES. + Every linking to the LRU chain decrements by one a special block + counter (if it's positive). If the at_end parameter is TRUE the block is + added either at the end of warm sub-chain or at the end of hot sub-chain. + It is added to the hot subchain if its counter is zero and number of + blocks in warm sub-chain is not less than some low limit (determined by + the division_limit parameter). Otherwise the block is added to the warm + sub-chain. If the at_end parameter is FALSE the block is always added + at beginning of the warm sub-chain. + Thus a warm block can be promoted to the hot sub-chain when its counter + becomes zero for the first time. + At the same time the block at the very beginning of the hot subchain + might be moved to the beginning of the warm subchain if it stays untouched + for a too long time (this time is determined by parameter age_threshold). +*/ + +static void unreg_request(PAGECACHE *pagecache, + PAGECACHE_BLOCK_LINK *block, int at_end) +{ + DBUG_ENTER("unreg_request"); + DBUG_PRINT("enter", ("block 0x%lx (%u) status=%x, reqs: %u", + (ulong)block, BLOCK_NUMBER(pagecache, block), + block->status, block->requests)); + BLOCK_INFO(block); + if (! --block->requests) + { + my_bool hot; + if (block->hits_left) + block->hits_left--; + hot= !block->hits_left && at_end && + pagecache->warm_blocks > pagecache->min_warm_blocks; + if (hot) + { + if (block->temperature == BLOCK_WARM) + pagecache->warm_blocks--; + block->temperature= BLOCK_HOT; + KEYCACHE_DBUG_PRINT("unreg_request", ("#warm_blocks=%u", + pagecache->warm_blocks)); + } + link_block(pagecache, block, hot, (my_bool)at_end); + block->last_hit_time= pagecache->time; + pagecache->time++; + + block= pagecache->used_ins; + /* Check if we should link a hot block to the warm block */ + if (block && pagecache->time - block->last_hit_time > + pagecache->age_threshold) + { + unlink_block(pagecache, block); + link_block(pagecache, block, 0, 0); + if (block->temperature != BLOCK_WARM) + { + pagecache->warm_blocks++; + block->temperature= BLOCK_WARM; + } + KEYCACHE_DBUG_PRINT("unreg_request", ("#warm_blocks=%u", + pagecache->warm_blocks)); + } + } + DBUG_VOID_RETURN; +} + +/* + Remove a reader of the page in block +*/ + +static inline void remove_reader(PAGECACHE_BLOCK_LINK *block) +{ + if (! --block->hash_link->requests && block->condvar) + pagecache_pthread_cond_signal(block->condvar); +} + + +/* + Wait until the last reader of the page in block + signals on its termination +*/ + +static inline void wait_for_readers(PAGECACHE *pagecache, + PAGECACHE_BLOCK_LINK *block) +{ +#ifdef THREAD + struct st_my_thread_var *thread= my_thread_var; + while (block->hash_link->requests) + { + KEYCACHE_DBUG_PRINT("wait_for_readers: wait", + ("suspend thread %ld block %u", + thread->id, BLOCK_NUMBER(pagecache, block))); + block->condvar= &thread->suspend; + pagecache_pthread_cond_wait(&thread->suspend, &pagecache->cache_lock); + block->condvar= NULL; + } +#else + KEYCACHE_DBUG_ASSERT(block->hash_link->requests == 0); +#endif +} + + +/* + Add a hash link to a bucket in the hash_table +*/ + +static inline void link_hash(PAGECACHE_HASH_LINK **start, + PAGECACHE_HASH_LINK *hash_link) +{ + if (*start) + (*start)->prev= &hash_link->next; + hash_link->next= *start; + hash_link->prev= start; + *start= hash_link; +} + + +/* + Remove a hash link from the hash table +*/ + +static void unlink_hash(PAGECACHE *pagecache, PAGECACHE_HASH_LINK *hash_link) +{ + KEYCACHE_DBUG_PRINT("unlink_hash", ("fd: %u pos_ %lu #requests=%u", + (uint) hash_link->file.file, (ulong) hash_link->pageno, + hash_link->requests)); + KEYCACHE_DBUG_ASSERT(hash_link->requests == 0); + if ((*hash_link->prev= hash_link->next)) + hash_link->next->prev= hash_link->prev; + hash_link->block= NULL; +#ifdef THREAD + if (pagecache->waiting_for_hash_link.last_thread) + { + /* Signal that a free hash link has appeared */ + struct st_my_thread_var *last_thread= + pagecache->waiting_for_hash_link.last_thread; + struct st_my_thread_var *first_thread= last_thread->next; + struct st_my_thread_var *next_thread= first_thread; + PAGECACHE_PAGE *first_page= (PAGECACHE_PAGE *) (first_thread->opt_info); + struct st_my_thread_var *thread; + + hash_link->file= first_page->file; + hash_link->pageno= first_page->pageno; + do + { + PAGECACHE_PAGE *page; + thread= next_thread; + page= (PAGECACHE_PAGE *) thread->opt_info; + next_thread= thread->next; + /* + We notify about the event all threads that ask + for the same page as the first thread in the queue + */ + if (page->file.file == hash_link->file.file && + page->pageno == hash_link->pageno) + { + KEYCACHE_DBUG_PRINT("unlink_hash: signal", ("thread %ld", thread->id)); + pagecache_pthread_cond_signal(&thread->suspend); + unlink_from_queue(&pagecache->waiting_for_hash_link, thread); + } + } + while (thread != last_thread); + link_hash(&pagecache->hash_root[PAGECACHE_HASH(pagecache, + hash_link->file, + hash_link->pageno)], + hash_link); + return; + } +#else /* THREAD */ + KEYCACHE_DBUG_ASSERT(! (pagecache->waiting_for_hash_link.last_thread)); +#endif /* THREAD */ + hash_link->next= pagecache->free_hash_list; + pagecache->free_hash_list= hash_link; +} +/* + Get the hash link for the page if it is inthe cache +*/ + +static PAGECACHE_HASH_LINK *get_present_hash_link(PAGECACHE *pagecache, + PAGECACHE_FILE *file, + maria_page_no_t pageno, + PAGECACHE_HASH_LINK ***start) +{ + reg1 PAGECACHE_HASH_LINK *hash_link; +#if defined(PAGECACHE_DEBUG) + int cnt; +#endif + DBUG_ENTER("get_present_hash_link"); + + KEYCACHE_DBUG_PRINT("get_present_hash_link", ("fd: %u pos: %lu", + (uint) file->file, (ulong) pageno)); + + /* + Find the bucket in the hash table for the pair (file, pageno); + start contains the head of the bucket list, + hash_link points to the first member of the list + */ + hash_link= *(*start= &pagecache->hash_root[PAGECACHE_HASH(pagecache, + *file, pageno)]); +#if defined(PAGECACHE_DEBUG) + cnt= 0; +#endif + /* Look for an element for the pair (file, pageno) in the bucket chain */ + while (hash_link && + (hash_link->pageno != pageno || + hash_link->file.file != file->file)) + { + hash_link= hash_link->next; +#if defined(PAGECACHE_DEBUG) + cnt++; + if (! (cnt <= pagecache->hash_links_used)) + { + int i; + for (i=0, hash_link= **start ; + i < cnt ; i++, hash_link= hash_link->next) + { + KEYCACHE_DBUG_PRINT("get_present_hash_link", ("fd: %u pos: %lu", + (uint) hash_link->file.file, (ulong) hash_link->pageno)); + } + } + KEYCACHE_DBUG_ASSERT(cnt <= pagecache->hash_links_used); +#endif + } + DBUG_RETURN(hash_link); +} + + +/* + Get the hash link for a page +*/ + +static PAGECACHE_HASH_LINK *get_hash_link(PAGECACHE *pagecache, + PAGECACHE_FILE *file, + maria_page_no_t pageno) +{ + reg1 PAGECACHE_HASH_LINK *hash_link; + PAGECACHE_HASH_LINK **start; + PAGECACHE_PAGE page; + + KEYCACHE_DBUG_PRINT("get_hash_link", ("fd: %u pos: %lu", + (uint) file->file, (ulong) pageno)); + +restart: + /* try to find the page in the cache */ + hash_link= get_present_hash_link(pagecache, file, pageno, + &start); + if (! hash_link) + { + /* There is no hash link in the hash table for the pair (file, pageno) */ + if (pagecache->free_hash_list) + { + hash_link= pagecache->free_hash_list; + pagecache->free_hash_list= hash_link->next; + } + else if (pagecache->hash_links_used < pagecache->hash_links) + { + hash_link= &pagecache->hash_link_root[pagecache->hash_links_used++]; + } + else + { +#ifdef THREAD + /* Wait for a free hash link */ + struct st_my_thread_var *thread= my_thread_var; + KEYCACHE_DBUG_PRINT("get_hash_link", ("waiting")); + page.file= *file; + page.pageno= pageno; + thread->opt_info= (void *) &page; + link_into_queue(&pagecache->waiting_for_hash_link, thread); + KEYCACHE_DBUG_PRINT("get_hash_link: wait", + ("suspend thread %ld", thread->id)); + pagecache_pthread_cond_wait(&thread->suspend, + &pagecache->cache_lock); + thread->opt_info= NULL; +#else + KEYCACHE_DBUG_ASSERT(0); +#endif + goto restart; + } + hash_link->file= *file; + hash_link->pageno= pageno; + link_hash(start, hash_link); + } + /* Register the request for the page */ + hash_link->requests++; + + return hash_link; +} + + +/* + Get a block for the file page requested by a pagecache read/write operation; + If the page is not in the cache return a free block, if there is none + return the lru block after saving its buffer if the page is dirty. + + SYNOPSIS + + find_key_block() + pagecache pointer to a page cache data structure + file handler for the file to read page from + pageno number of the page in the file + init_hits_left how initialize the block counter for the page + wrmode <-> get for writing + reg_req Register request to thye page + page_st out {PAGE_READ,PAGE_TO_BE_READ,PAGE_WAIT_TO_BE_READ} + + RETURN VALUE + Pointer to the found block if successful, 0 - otherwise + + NOTES. + For the page from file positioned at pageno the function checks whether + the page is in the key cache specified by the first parameter. + If this is the case it immediately returns the block. + If not, the function first chooses a block for this page. If there is + no not used blocks in the key cache yet, the function takes the block + at the very beginning of the warm sub-chain. It saves the page in that + block if it's dirty before returning the pointer to it. + The function returns in the page_st parameter the following values: + PAGE_READ - if page already in the block, + PAGE_TO_BE_READ - if it is to be read yet by the current thread + WAIT_TO_BE_READ - if it is to be read by another thread + If an error occurs THE BLOCK_ERROR bit is set in the block status. + It might happen that there are no blocks in LRU chain (in warm part) - + all blocks are unlinked for some read/write operations. Then the function + waits until first of this operations links any block back. +*/ + +static PAGECACHE_BLOCK_LINK *find_key_block(PAGECACHE *pagecache, + PAGECACHE_FILE *file, + maria_page_no_t pageno, + int init_hits_left, + my_bool wrmode, + my_bool reg_req, + int *page_st) +{ + PAGECACHE_HASH_LINK *hash_link; + PAGECACHE_BLOCK_LINK *block; + int error= 0; + int page_status; + + DBUG_ENTER("find_key_block"); + KEYCACHE_THREAD_TRACE("find_key_block:begin"); + DBUG_PRINT("enter", ("fd: %u pos %lu wrmode: %lu", + (uint) file->file, (ulong) pageno, (uint) wrmode)); + KEYCACHE_DBUG_PRINT("find_key_block", ("fd: %u pos: %lu wrmode: %lu", + (uint) file->file, (ulong) pageno, + (uint) wrmode)); +#if !defined(DBUG_OFF) && defined(EXTRA_DEBUG) + DBUG_EXECUTE("check_pagecache", + test_key_cache(pagecache, "start of find_key_block", 0);); +#endif + +restart: + /* Find the hash link for the requested page (file, pageno) */ + hash_link= get_hash_link(pagecache, file, pageno); + + page_status= -1; + if ((block= hash_link->block) && + block->hash_link == hash_link && (block->status & BLOCK_READ)) + page_status= PAGE_READ; + + if (wrmode && pagecache->resize_in_flush) + { + /* This is a write request during the flush phase of a resize operation */ + + if (page_status != PAGE_READ) + { + /* We don't need the page in the cache: we are going to write on disk */ + hash_link->requests--; + unlink_hash(pagecache, hash_link); + return 0; + } + if (!(block->status & BLOCK_IN_FLUSH)) + { + hash_link->requests--; + /* + Remove block to invalidate the page in the block buffer + as we are going to write directly on disk. + Although we have an exlusive lock for the updated key part + the control can be yieded by the current thread as we might + have unfinished readers of other key parts in the block + buffer. Still we are guaranteed not to have any readers + of the key part we are writing into until the block is + removed from the cache as we set the BLOCK_REASSIGNED + flag (see the code below that handles reading requests). + */ + free_block(pagecache, block); + return 0; + } + /* Wait intil the page is flushed on disk */ + hash_link->requests--; + { +#ifdef THREAD + struct st_my_thread_var *thread= my_thread_var; + add_to_queue(&block->wqueue[COND_FOR_SAVED], thread); + do + { + KEYCACHE_DBUG_PRINT("find_key_block: wait", + ("suspend thread %ld", thread->id)); + pagecache_pthread_cond_wait(&thread->suspend, + &pagecache->cache_lock); + } + while(thread->next); +#else + KEYCACHE_DBUG_ASSERT(0); + /* + Given the use of "resize_in_flush", it seems impossible + that this whole branch is ever entered in single-threaded case + because "(wrmode && pagecache->resize_in_flush)" cannot be true. + TODO: Check this, and then put the whole branch into the + "#ifdef THREAD" guard. + */ +#endif + } + /* Invalidate page in the block if it has not been done yet */ + if (block->status) + free_block(pagecache, block); + return 0; + } + + if (page_status == PAGE_READ && + (block->status & (BLOCK_IN_SWITCH | BLOCK_REASSIGNED))) + { + /* This is a request for a page to be removed from cache */ + + KEYCACHE_DBUG_PRINT("find_key_block", + ("request for old page in block %u " + "wrmode: %d block->status: %d", + BLOCK_NUMBER(pagecache, block), wrmode, + block->status)); + /* + Only reading requests can proceed until the old dirty page is flushed, + all others are to be suspended, then resubmitted + */ + if (!wrmode && !(block->status & BLOCK_REASSIGNED)) + { + if (reg_req) + reg_requests(pagecache, block, 1); + } + else + { + hash_link->requests--; + KEYCACHE_DBUG_PRINT("find_key_block", + ("request waiting for old page to be saved")); + { +#ifdef THREAD + struct st_my_thread_var *thread= my_thread_var; + /* Put the request into the queue of those waiting for the old page */ + add_to_queue(&block->wqueue[COND_FOR_SAVED], thread); + /* Wait until the request can be resubmitted */ + do + { + KEYCACHE_DBUG_PRINT("find_key_block: wait", + ("suspend thread %ld", thread->id)); + pagecache_pthread_cond_wait(&thread->suspend, + &pagecache->cache_lock); + } + while(thread->next); +#else + KEYCACHE_DBUG_ASSERT(0); + /* No parallel requests in single-threaded case */ +#endif + } + KEYCACHE_DBUG_PRINT("find_key_block", + ("request for old page resubmitted")); + /* Resubmit the request */ + goto restart; + } + } + else + { + /* This is a request for a new page or for a page not to be removed */ + if (! block) + { + /* No block is assigned for the page yet */ + if (pagecache->blocks_unused) + { + if (pagecache->free_block_list) + { + /* There is a block in the free list. */ + block= pagecache->free_block_list; + pagecache->free_block_list= block->next_used; + block->next_used= NULL; + } + else + { + /* There are some never used blocks, take first of them */ + block= &pagecache->block_root[pagecache->blocks_used]; + block->buffer= ADD_TO_PTR(pagecache->block_mem, + ((ulong) pagecache->blocks_used* + pagecache->block_size), + byte*); + pagecache->blocks_used++; + } + pagecache->blocks_unused--; + DBUG_ASSERT((block->status & BLOCK_WRLOCK) == 0); + block->status= 0; +#ifndef DBUG_OFF + block->type= PAGECACHE_EMPTY_PAGE; +#endif + block->requests= 1; + block->temperature= BLOCK_COLD; + block->hits_left= init_hits_left; + block->last_hit_time= 0; + link_to_file_list(pagecache, block, file, 0); + block->hash_link= hash_link; + hash_link->block= block; + page_status= PAGE_TO_BE_READ; + KEYCACHE_DBUG_PRINT("find_key_block", + ("got free or never used block %u", + BLOCK_NUMBER(pagecache, block))); + } + else + { + /* There are no never used blocks, use a block from the LRU chain */ + + /* + Wait until a new block is added to the LRU chain; + several threads might wait here for the same page, + all of them must get the same block + */ + +#ifdef THREAD + if (! pagecache->used_last) + { + struct st_my_thread_var *thread= my_thread_var; + thread->opt_info= (void *) hash_link; + link_into_queue(&pagecache->waiting_for_block, thread); + do + { + KEYCACHE_DBUG_PRINT("find_key_block: wait", + ("suspend thread %ld", thread->id)); + pagecache_pthread_cond_wait(&thread->suspend, + &pagecache->cache_lock); + } + while (thread->next); + thread->opt_info= NULL; + } +#else + KEYCACHE_DBUG_ASSERT(pagecache->used_last); +#endif + block= hash_link->block; + if (! block) + { + /* + Take the first block from the LRU chain + unlinking it from the chain + */ + block= pagecache->used_last->next_used; + block->hits_left= init_hits_left; + block->last_hit_time= 0; + if (reg_req) + reg_requests(pagecache, block,1); + hash_link->block= block; + } + + if (block->hash_link != hash_link && + ! (block->status & BLOCK_IN_SWITCH) ) + { + /* this is a primary request for a new page */ + block->status|= BLOCK_IN_SWITCH; + + KEYCACHE_DBUG_PRINT("find_key_block", + ("got block %u for new page", + BLOCK_NUMBER(pagecache, block))); + + if (block->status & BLOCK_CHANGED) + { + /* The block contains a dirty page - push it out of the cache */ + + KEYCACHE_DBUG_PRINT("find_key_block", ("block is dirty")); + + pagecache_pthread_mutex_unlock(&pagecache->cache_lock); + /* + The call is thread safe because only the current + thread might change the block->hash_link value + */ + DBUG_ASSERT(block->pins == 0); + error= pagecache_fwrite(pagecache, + &block->hash_link->file, + block->buffer, + block->hash_link->pageno, + block->type, + MYF(MY_NABP | MY_WAIT_IF_FULL)); + pagecache_pthread_mutex_lock(&pagecache->cache_lock); + pagecache->global_cache_write++; + } + + block->status|= BLOCK_REASSIGNED; + if (block->hash_link) + { + /* + Wait until all pending read requests + for this page are executed + (we could have avoided this waiting, if we had read + a page in the cache in a sweep, without yielding control) + */ + wait_for_readers(pagecache, block); + + /* Remove the hash link for this page from the hash table */ + unlink_hash(pagecache, block->hash_link); + /* All pending requests for this page must be resubmitted */ + if (block->wqueue[COND_FOR_SAVED].last_thread) + release_queue(&block->wqueue[COND_FOR_SAVED]); + } + link_to_file_list(pagecache, block, file, + (my_bool)(block->hash_link ? 1 : 0)); + DBUG_ASSERT((block->status & BLOCK_WRLOCK) == 0); + block->status= error? BLOCK_ERROR : 0; +#ifndef DBUG_OFF + block->type= PAGECACHE_EMPTY_PAGE; +#endif + block->hash_link= hash_link; + page_status= PAGE_TO_BE_READ; + + KEYCACHE_DBUG_ASSERT(block->hash_link->block == block); + KEYCACHE_DBUG_ASSERT(hash_link->block->hash_link == hash_link); + } + else + { + /* This is for secondary requests for a new page only */ + KEYCACHE_DBUG_PRINT("find_key_block", + ("block->hash_link: %p hash_link: %p " + "block->status: %u", block->hash_link, + hash_link, block->status )); + page_status= (((block->hash_link == hash_link) && + (block->status & BLOCK_READ)) ? + PAGE_READ : PAGE_WAIT_TO_BE_READ); + } + } + pagecache->global_cache_read++; + } + else + { + if (reg_req) + reg_requests(pagecache, block, 1); + KEYCACHE_DBUG_PRINT("find_key_block", + ("block->hash_link: %p hash_link: %p " + "block->status: %u", block->hash_link, + hash_link, block->status )); + page_status= (((block->hash_link == hash_link) && + (block->status & BLOCK_READ)) ? + PAGE_READ : PAGE_WAIT_TO_BE_READ); + } + } + + KEYCACHE_DBUG_ASSERT(page_status != -1); + *page_st=page_status; + DBUG_PRINT("info", + ("block: 0x%lx fd: %u pos %lu block->status %u page_status %lu", + (ulong) block, (uint) file->file, + (ulong) pageno, block->status, (uint) page_status)); + KEYCACHE_DBUG_PRINT("find_key_block", + ("block: 0x%lx fd: %u pos %lu block->status %u page_status %lu", + (ulong) block, + (uint) file->file, (ulong) pageno, block->status, + (uint) page_status)); + +#if !defined(DBUG_OFF) && defined(EXTRA_DEBUG) + DBUG_EXECUTE("check_pagecache", + test_key_cache(pagecache, "end of find_key_block",0);); +#endif + KEYCACHE_THREAD_TRACE("find_key_block:end"); + DBUG_RETURN(block); +} + + +void pagecache_add_pin(PAGECACHE_BLOCK_LINK *block) +{ + DBUG_ENTER("pagecache_add_pin"); + DBUG_PRINT("enter", ("block 0x%lx pins: %u", + (ulong) block, + block->pins)); + BLOCK_INFO(block); + block->pins++; +#ifdef PAGECACHE_DEBUG + { + PAGECACHE_PIN_INFO *info= + (PAGECACHE_PIN_INFO *)my_malloc(sizeof(PAGECACHE_PIN_INFO), MYF(0)); + info->thread= my_thread_var; + info_link(&block->pin_list, info); + } +#endif + DBUG_VOID_RETURN; +} + +void pagecache_remove_pin(PAGECACHE_BLOCK_LINK *block) +{ + DBUG_ENTER("pagecache_remove_pin"); + DBUG_PRINT("enter", ("block 0x%lx pins: %u", + (ulong) block, + block->pins)); + BLOCK_INFO(block); + DBUG_ASSERT(block->pins > 0); + block->pins--; +#ifdef PAGECACHE_DEBUG + { + PAGECACHE_PIN_INFO *info= info_find(block->pin_list, my_thread_var); + DBUG_ASSERT(info != 0); + info_unlink(info); + my_free((gptr) info, MYF(0)); + } +#endif + DBUG_VOID_RETURN; +} +#ifdef PAGECACHE_DEBUG +void pagecache_add_lock(PAGECACHE_BLOCK_LINK *block, bool wl) +{ + PAGECACHE_LOCK_INFO *info= + (PAGECACHE_LOCK_INFO *)my_malloc(sizeof(PAGECACHE_LOCK_INFO), MYF(0)); + info->thread= my_thread_var; + info->write_lock= wl; + info_link((PAGECACHE_PIN_INFO **)&block->lock_list, + (PAGECACHE_PIN_INFO *)info); +} +void pagecache_remove_lock(PAGECACHE_BLOCK_LINK *block) +{ + PAGECACHE_LOCK_INFO *info= + (PAGECACHE_LOCK_INFO *)info_find((PAGECACHE_PIN_INFO *)block->lock_list, + my_thread_var); + DBUG_ASSERT(info != 0); + info_unlink((PAGECACHE_PIN_INFO *)info); + my_free((gptr)info, MYF(0)); +} +void pagecache_change_lock(PAGECACHE_BLOCK_LINK *block, bool wl) +{ + PAGECACHE_LOCK_INFO *info= + (PAGECACHE_LOCK_INFO *)info_find((PAGECACHE_PIN_INFO *)block->lock_list, + my_thread_var); + DBUG_ASSERT(info != 0 && info->write_lock != wl); + info->write_lock= wl; +} +#else +#define pagecache_add_lock(B,W) +#define pagecache_remove_lock(B) +#define pagecache_change_lock(B,W) +#endif + +/* + Put on the block "update" type lock + + SYNOPSIS + pagecache_lock_block() + pagecache pointer to a page cache data structure + block the block to work with + + RETURN + 0 - OK + 1 - Try to lock the block failed +*/ + +my_bool pagecache_lock_block(PAGECACHE *pagecache, + PAGECACHE_BLOCK_LINK *block) +{ + DBUG_ENTER("pagecache_lock_block"); + BLOCK_INFO(block); + if (block->status & BLOCK_WRLOCK) + { + DBUG_PRINT("info", ("fail to lock, waiting...")); + /* Lock failed we will wait */ +#ifdef THREAD + struct st_my_thread_var *thread= my_thread_var; + add_to_queue(&block->wqueue[COND_FOR_WRLOCK], thread); + dec_counter_for_resize_op(pagecache); + do + { + KEYCACHE_DBUG_PRINT("pagecache_lock_block: wait", + ("suspend thread %ld", thread->id)); + pagecache_pthread_cond_wait(&thread->suspend, + &pagecache->cache_lock); + } + while(thread->next); +#else + DBUG_ASSERT(0); +#endif + BLOCK_INFO(block); + DBUG_RETURN(1); + } + /* we are doing it by global cache mutex protectio, so it is OK */ + block->status|= BLOCK_WRLOCK; + DBUG_RETURN(0); +} + +void pagecache_ulock_block(PAGECACHE_BLOCK_LINK *block) +{ + DBUG_ENTER("pagecache_ulock_block"); + BLOCK_INFO(block); + DBUG_ASSERT(block->status & BLOCK_WRLOCK); + block->status&= ~BLOCK_WRLOCK; +#ifdef THREAD + /* release all threads waiting for write lock */ + if (block->wqueue[COND_FOR_WRLOCK].last_thread) + release_queue(&block->wqueue[COND_FOR_WRLOCK]); +#endif + BLOCK_INFO(block); + DBUG_VOID_RETURN; +} + +/* + Try to lock/uplock and pin/unpin the block + + SYNOPSIS + pagecache_make_lock_and_pin() + pagecache pointer to a page cache data structure + block the block to work with + lock lock change mode + pin pinchange mode + + RETURN + 0 - OK + 1 - Try to lock the block failed +*/ + +my_bool pagecache_make_lock_and_pin(PAGECACHE *pagecache, + PAGECACHE_BLOCK_LINK *block, + enum pagecache_page_lock lock, + enum pagecache_page_pin pin) +{ + DBUG_ENTER("pagecache_make_lock_and_pin"); + DBUG_PRINT("enter", ("block: 0x%lx (%u), wrlock: %c pins: %u, lock %s, pin: %s", + (ulong)block, BLOCK_NUMBER(pagecache, block), + ((block->status & BLOCK_WRLOCK)?'Y':'N'), + block->pins, + page_cache_page_lock_str[lock], + page_cache_page_pin_str[pin])); + BLOCK_INFO(block); +#ifdef PAGECACHE_DEBUG + DBUG_ASSERT(info_check_pin(block, pin) == 0 && + info_check_lock(block, lock, pin) == 0); +#endif + switch (lock) + { + case PAGECACHE_LOCK_WRITE: /* free -> write */ + /* Writelock and pin the buffer */ + if (pagecache_lock_block(pagecache, block)) + { + DBUG_PRINT("info", ("restart")); + /* in case of fail pagecache_lock_block unlock cache */ + DBUG_RETURN(1); + } + /* The cache is locked so nothing afraid off */ + pagecache_add_pin(block); + pagecache_add_lock(block, 1); + break; + case PAGECACHE_LOCK_WRITE_TO_READ: /* write -> read */ + case PAGECACHE_LOCK_WRITE_UNLOCK: /* write -> free */ + /* + Removes writelog and puts read lock (which is nothing in our + implementation) + */ + pagecache_ulock_block(block); + case PAGECACHE_LOCK_READ_UNLOCK: /* read -> free */ + case PAGECACHE_LOCK_LEFT_READLOCKED: /* read -> read */ +#ifndef DBUG_OFF + if (pin == PAGECACHE_UNPIN) + { + pagecache_remove_pin(block); + } +#endif +#ifdef PAGECACHE_DEBUG + if (lock == PAGECACHE_LOCK_WRITE_TO_READ) + { + pagecache_change_lock(block, 0); + } else if (lock == PAGECACHE_LOCK_WRITE_UNLOCK || + lock == PAGECACHE_LOCK_READ_UNLOCK) + { + pagecache_remove_lock(block); + } +#endif + break; + case PAGECACHE_LOCK_READ: /* free -> read */ +#ifndef DBUG_OFF + if (pin == PAGECACHE_PIN) + { + /* The cache is locked so nothing afraid off */ + pagecache_add_pin(block); + } + pagecache_add_lock(block, 0); + break; +#endif + case PAGECACHE_LOCK_LEFT_UNLOCKED: /* free -> free */ + case PAGECACHE_LOCK_LEFT_WRITELOCKED: /* write -> write */ + break; /* do nothing */ + default: + DBUG_ASSERT(0); /* Never should happened */ + } + + BLOCK_INFO(block); + DBUG_RETURN(0); +} + + +/* + Read into a key cache block buffer from disk. + + SYNOPSIS + + read_block() + pagecache pointer to a page cache data structure + block block to which buffer the data is to be read + primary <-> the current thread will read the data + + RETURN VALUE + None + + NOTES. + The function either reads a page data from file to the block buffer, + or waits until another thread reads it. What page to read is determined + by a block parameter - reference to a hash link for this page. + If an error occurs THE BLOCK_ERROR bit is set in the block status. +*/ + +static void read_block(PAGECACHE *pagecache, + PAGECACHE_BLOCK_LINK *block, + my_bool primary) +{ + uint got_length; + + /* On entry cache_lock is locked */ + + KEYCACHE_THREAD_TRACE("read_block"); + if (primary) + { + /* + This code is executed only by threads + that submitted primary requests + */ + + KEYCACHE_DBUG_PRINT("read_block", + ("page to be read by primary request")); + + /* Page is not in buffer yet, is to be read from disk */ + pagecache_pthread_mutex_unlock(&pagecache->cache_lock); + /* + Here other threads may step in and register as secondary readers. + They will register in block->wqueue[COND_FOR_REQUESTED]. + */ + got_length= pagecache_fread(pagecache, &block->hash_link->file, + block->buffer, + block->hash_link->pageno, MYF(0)); + pagecache_pthread_mutex_lock(&pagecache->cache_lock); + if (got_length < pagecache->block_size) + block->status|= BLOCK_ERROR; + else + block->status= (BLOCK_READ | (block->status & BLOCK_WRLOCK)); + + KEYCACHE_DBUG_PRINT("read_block", + ("primary request: new page in cache")); + /* Signal that all pending requests for this page now can be processed */ + if (block->wqueue[COND_FOR_REQUESTED].last_thread) + release_queue(&block->wqueue[COND_FOR_REQUESTED]); + } + else + { + /* + This code is executed only by threads + that submitted secondary requests + */ + KEYCACHE_DBUG_PRINT("read_block", + ("secondary request waiting for new page to be read")); + { +#ifdef THREAD + struct st_my_thread_var *thread= my_thread_var; + /* Put the request into a queue and wait until it can be processed */ + add_to_queue(&block->wqueue[COND_FOR_REQUESTED], thread); + do + { + KEYCACHE_DBUG_PRINT("read_block: wait", + ("suspend thread %ld", thread->id)); + pagecache_pthread_cond_wait(&thread->suspend, + &pagecache->cache_lock); + } + while (thread->next); +#else + KEYCACHE_DBUG_ASSERT(0); + /* No parallel requests in single-threaded case */ +#endif + } + KEYCACHE_DBUG_PRINT("read_block", + ("secondary request: new page in cache")); + } +} + + +/* + Unlock/unpin page and put LSN stamp if it need + + SYNOPSIS + pagecache_unlock_page() + pagecache pointer to a page cache data structure + file handler for the file for the block of data to be read + pageno number of the block of data in the file + lock lock change + pin pin page + stamp_this_page put LSN stamp on the page + first_REDO_LSN_for_page +*/ + +void pagecache_unlock_page(PAGECACHE *pagecache, + PAGECACHE_FILE *file, + maria_page_no_t pageno, + enum pagecache_page_lock lock, + enum pagecache_page_pin pin, + my_bool stamp_this_page, + LSN first_REDO_LSN_for_page) +{ + PAGECACHE_BLOCK_LINK *block; + int page_st; + DBUG_ENTER("pagecache_unlock_page"); + DBUG_PRINT("enter", ("fd: %u page: %lu l%s p%s", + (uint) file->file, (ulong) pageno, + page_cache_page_lock_str[lock], + page_cache_page_pin_str[pin])); + /* we do not allow any lock/pin increasing here */ + DBUG_ASSERT(pin != PAGECACHE_PIN && + lock != PAGECACHE_LOCK_READ && + lock != PAGECACHE_LOCK_WRITE); + if (pin == PAGECACHE_PIN_LEFT_UNPINNED && + lock == PAGECACHE_LOCK_READ_UNLOCK) + { +#ifndef DBUG_OFF + if ( +#endif + /* block do not need here so we do not provide it */ + pagecache_make_lock_and_pin(pagecache, 0, lock, pin) +#ifndef DBUG_OFF + ) + { + DBUG_ASSERT(0); /* should not happend */ + } +#else + ; +#endif + DBUG_VOID_RETURN; + } + + pagecache_pthread_mutex_lock(&pagecache->cache_lock); + /* + As soon as we keep lock cache can be used, and we have lock bacause want + aunlock. + */ + DBUG_ASSERT(pagecache->can_be_used); + + inc_counter_for_resize_op(pagecache); + block= find_key_block(pagecache, file, pageno, 0, 0, 0, &page_st); + DBUG_ASSERT(block != 0 && page_st == PAGE_READ); + if (stamp_this_page) + { + DBUG_ASSERT(lock == PAGECACHE_LOCK_WRITE_UNLOCK && + pin == PAGECACHE_UNPIN); + /* TODO: insert LSN writing code */ + } + +#ifndef DBUG_OFF + if ( +#endif + pagecache_make_lock_and_pin(pagecache, block, lock, pin) +#ifndef DBUG_OFF + ) + { + DBUG_ASSERT(0); /* should not happend */ + } +#else + ; +#endif + + remove_reader(block); + /* + Link the block into the LRU chain if it's the last submitted request + for the block and block will not be pinned + */ + if (pin != PAGECACHE_PIN_LEFT_PINNED) + unreg_request(pagecache, block, 1); + + dec_counter_for_resize_op(pagecache); + + pagecache_pthread_mutex_unlock(&pagecache->cache_lock); + + DBUG_VOID_RETURN; +} + + +/* + Unpin page + + SYNOPSIS + pagecache_unpin_page() + pagecache pointer to a page cache data structure + file handler for the file for the block of data to be read + pageno number of the block of data in the file +*/ + +void pagecache_unpin_page(PAGECACHE *pagecache, + PAGECACHE_FILE *file, + maria_page_no_t pageno) +{ + PAGECACHE_BLOCK_LINK *block; + int page_st; + DBUG_ENTER("pagecache_unpin_page"); + DBUG_PRINT("enter", ("fd: %u page: %lu", + (uint) file->file, (ulong) pageno)); + + pagecache_pthread_mutex_lock(&pagecache->cache_lock); + /* + As soon as we keep lock cache can be used, and we have lock bacause want + aunlock. + */ + DBUG_ASSERT(pagecache->can_be_used); + + inc_counter_for_resize_op(pagecache); + block= find_key_block(pagecache, file, pageno, 0, 0, 0, &page_st); + DBUG_ASSERT(block != 0 && page_st == PAGE_READ); + +#ifndef DBUG_OFF + if ( +#endif + /* + we can just unpin only with keeping read lock because: + a) we can't pin without any lock + b) we can't unpin keeping write lock + */ + pagecache_make_lock_and_pin(pagecache, block, + PAGECACHE_LOCK_LEFT_READLOCKED, + PAGECACHE_UNPIN) +#ifndef DBUG_OFF + ) + { + DBUG_ASSERT(0); /* should not happend */ + } +#else + ; +#endif + + remove_reader(block); + /* + Link the block into the LRU chain if it's the last submitted request + for the block and block will not be pinned + */ + unreg_request(pagecache, block, 1); + + dec_counter_for_resize_op(pagecache); + + pagecache_pthread_mutex_unlock(&pagecache->cache_lock); + + DBUG_VOID_RETURN; +} + + +/* + Unlock/unpin page and put LSN stamp if it need + (uses direct block/page pointer) + + SYNOPSIS + pagecache_unlock() + pagecache pointer to a page cache data structure + link direct link to page (returned by read or write) + lock lock change + pin pin page + stamp_this_page put LSN stamp on the page + first_REDO_LSN_for_page +*/ + +void pagecache_unlock(PAGECACHE *pagecache, + PAGECACHE_PAGE_LINK *link, + enum pagecache_page_lock lock, + enum pagecache_page_pin pin, + my_bool stamp_this_page, + LSN first_REDO_LSN_for_page) +{ + PAGECACHE_BLOCK_LINK *block= (PAGECACHE_BLOCK_LINK *)link; + DBUG_ENTER("pagecache_unlock"); + DBUG_PRINT("enter", ("block: 0x%lx fd: %u page: %lu l%s p%s", + (ulong) block, + (uint) block->hash_link->file.file, + (ulong) block->hash_link->pageno, + page_cache_page_lock_str[lock], + page_cache_page_pin_str[pin])); + /* we do not allow any lock/pin increasing here */ + DBUG_ASSERT(pin != PAGECACHE_PIN && + lock != PAGECACHE_LOCK_READ && + lock != PAGECACHE_LOCK_WRITE); + if (pin == PAGECACHE_PIN_LEFT_UNPINNED && + lock == PAGECACHE_LOCK_READ_UNLOCK) + { +#ifndef DBUG_OFF + if ( +#endif + /* block do not need here so we do not provide it */ + pagecache_make_lock_and_pin(pagecache, 0, lock, pin) +#ifndef DBUG_OFF + ) + { + DBUG_ASSERT(0); /* should not happend */ + } +#else + ; +#endif + DBUG_VOID_RETURN; + } + + pagecache_pthread_mutex_lock(&pagecache->cache_lock); + /* + As soon as we keep lock cache can be used, and we have lock bacause want + aunlock. + */ + DBUG_ASSERT(pagecache->can_be_used); + + inc_counter_for_resize_op(pagecache); + if (stamp_this_page) + { + DBUG_ASSERT(lock == PAGECACHE_LOCK_WRITE_UNLOCK && + pin == PAGECACHE_UNPIN); + /* TODO: insert LSN writing code */ + } + +#ifndef DBUG_OFF + if ( +#endif + pagecache_make_lock_and_pin(pagecache, block, lock, pin) +#ifndef DBUG_OFF + ) + { + DBUG_ASSERT(0); /* should not happend */ + } +#else + ; +#endif + + remove_reader(block); + /* + Link the block into the LRU chain if it's the last submitted request + for the block and block will not be pinned + */ + if (pin != PAGECACHE_PIN_LEFT_PINNED) + unreg_request(pagecache, block, 1); + + dec_counter_for_resize_op(pagecache); + + pagecache_pthread_mutex_unlock(&pagecache->cache_lock); + + DBUG_VOID_RETURN; +} + + +/* + Unpin page + (uses direct block/page pointer) + + SYNOPSIS + pagecache_unpin_page() + pagecache pointer to a page cache data structure + link direct link to page (returned by read or write) +*/ + +void pagecache_unpin(PAGECACHE *pagecache, + PAGECACHE_PAGE_LINK *link) +{ + PAGECACHE_BLOCK_LINK *block= (PAGECACHE_BLOCK_LINK *)link; + DBUG_ENTER("pagecache_unpin"); + DBUG_PRINT("enter", ("block: 0x%lx fd: %u page: %lu", + (ulong) block, + (uint) block->hash_link->file.file, + (ulong) block->hash_link->pageno)); + + pagecache_pthread_mutex_lock(&pagecache->cache_lock); + /* + As soon as we keep lock cache can be used, and we have lock bacause want + aunlock. + */ + DBUG_ASSERT(pagecache->can_be_used); + + inc_counter_for_resize_op(pagecache); + +#ifndef DBUG_OFF + if ( +#endif + /* + we can just unpin only with keeping read lock because: + a) we can't pin without any lock + b) we can't unpin keeping write lock + */ + pagecache_make_lock_and_pin(pagecache, block, + PAGECACHE_LOCK_LEFT_READLOCKED, + PAGECACHE_UNPIN) +#ifndef DBUG_OFF + ) + { + DBUG_ASSERT(0); /* should not happend */ + } +#else + ; +#endif + + remove_reader(block); + /* + Link the block into the LRU chain if it's the last submitted request + for the block and block will not be pinned + */ + unreg_request(pagecache, block, 1); + + dec_counter_for_resize_op(pagecache); + + pagecache_pthread_mutex_unlock(&pagecache->cache_lock); + + DBUG_VOID_RETURN; +} + + +/* + Read a block of data from a cached file into a buffer; + + SYNOPSIS + pagecache_read() + pagecache pointer to a page cache data structure + file handler for the file for the block of data to be read + pageno number of the block of data in the file + level determines the weight of the data + buff buffer to where the data must be placed + type type of the page + lock lock change + link link to the page if we pin it + + RETURN VALUE + Returns address from where the data is placed if sucessful, 0 - otherwise. + + NOTES. + + The function ensures that a block of data of size length from file + positioned at pageno is in the buffers for some key cache blocks. + Then the function copies the data into the buffer buff. + + Pin will be choosen according to lock parameter (see lock_to_pin) +*/ +static enum pagecache_page_pin lock_to_pin[]= +{ + PAGECACHE_PIN_LEFT_UNPINNED /*PAGECACHE_LOCK_LEFT_UNLOCKED*/, + PAGECACHE_PIN_LEFT_UNPINNED /*PAGECACHE_LOCK_LEFT_READLOCKED*/, + PAGECACHE_PIN_LEFT_PINNED /*PAGECACHE_LOCK_LEFT_WRITELOCKED*/, + PAGECACHE_PIN_LEFT_UNPINNED /*PAGECACHE_LOCK_READ*/, + PAGECACHE_PIN /*PAGECACHE_LOCK_WRITE*/, + PAGECACHE_PIN_LEFT_UNPINNED /*PAGECACHE_LOCK_READ_UNLOCK*/, + PAGECACHE_UNPIN /*PAGECACHE_LOCK_WRITE_UNLOCK*/, + PAGECACHE_UNPIN /*PAGECACHE_LOCK_WRITE_TO_READ*/ +}; + +byte *pagecache_read(PAGECACHE *pagecache, + PAGECACHE_FILE *file, + maria_page_no_t pageno, + uint level, + byte *buff, + enum pagecache_page_type type, + enum pagecache_page_lock lock, + PAGECACHE_PAGE_LINK *link) +{ + int error= 0; + enum pagecache_page_pin pin= lock_to_pin[lock]; + PAGECACHE_PAGE_LINK fake_link; + DBUG_ENTER("page_cache_read"); + DBUG_PRINT("enter", ("fd: %u page: %lu level: %u t:%s l%s p%s", + (uint) file->file, (ulong) pageno, level, + page_cache_page_type_str[type], + page_cache_page_lock_str[lock], + page_cache_page_pin_str[pin])); + + if (!link) + link= &fake_link; + else + *link= 0; + +restart: + + if (pagecache->can_be_used) + { + /* Key cache is used */ + reg1 PAGECACHE_BLOCK_LINK *block; + uint status; + int page_st; + + pagecache_pthread_mutex_lock(&pagecache->cache_lock); + if (!pagecache->can_be_used) + { + pagecache_pthread_mutex_unlock(&pagecache->cache_lock); + goto no_key_cache; + } + + inc_counter_for_resize_op(pagecache); + pagecache->global_cache_r_requests++; + block= find_key_block(pagecache, file, pageno, level, 0, + (((pin == PAGECACHE_PIN_LEFT_PINNED) || + (pin == PAGECACHE_UNPIN)) ? 0 : 1), + &page_st); + DBUG_ASSERT(block->type == PAGECACHE_EMPTY_PAGE || + block->type == type); + block->type= type; + if (pagecache_make_lock_and_pin(pagecache, block, lock, pin)) + { + /* + We failed to writelock the block, cache is unlocked, and last write + lock is released, we will try to get the block again. + */ + pagecache_pthread_mutex_unlock(&pagecache->cache_lock); + goto restart; + } + if (block->status != BLOCK_ERROR && page_st != PAGE_READ) + { + /* The requested page is to be read into the block buffer */ + read_block(pagecache, block, + (my_bool)(page_st == PAGE_TO_BE_READ)); + } + + if (! ((status= block->status) & BLOCK_ERROR)) + { +#if !defined(SERIALIZED_READ_FROM_CACHE) + pagecache_pthread_mutex_unlock(&pagecache->cache_lock); +#endif + + DBUG_ASSERT((pagecache->block_size & 511) == 0); + /* Copy data from the cache buffer */ + bmove512(buff, block->buffer, pagecache->block_size); + +#if !defined(SERIALIZED_READ_FROM_CACHE) + pagecache_pthread_mutex_lock(&pagecache->cache_lock); +#endif + } + + remove_reader(block); + /* + Link the block into the LRU chain if it's the last submitted request + for the block and block will not be pinned + */ + if (pin != PAGECACHE_PIN_LEFT_PINNED && pin != PAGECACHE_PIN) + unreg_request(pagecache, block, 1); + else + *link= (PAGECACHE_PAGE_LINK)block; + + dec_counter_for_resize_op(pagecache); + + pagecache_pthread_mutex_unlock(&pagecache->cache_lock); + + if (status & BLOCK_ERROR) + DBUG_RETURN((byte *) 0); + + DBUG_RETURN(buff); + } + +no_key_cache: /* Key cache is not used */ + + /* We can't use mutex here as the key cache may not be initialized */ + pagecache->global_cache_r_requests++; + pagecache->global_cache_read++; + if (pagecache_fread(pagecache, file, (byte*) buff, pageno, MYF(MY_NABP))) + error= 1; + DBUG_RETURN(error ? (byte*) 0 : buff); +} + + +/* + Delete page from the buffer + + SYNOPSIS + pagecache_delete_page() + pagecache pointer to a page cache data structure + file handler for the file for the block of data to be read + pageno number of the block of data in the file + lock lock change + flush flush page if it is dirty + + RETURN VALUE + 0 - deleted or was not present at all + 1 - error + + NOTES. + lock can be only PAGECACHE_LOCK_LEFT_WRITELOCKED (page was write locked + before) or PAGECACHE_LOCK_WRITE (delete will write lock page before delete) +*/ +my_bool pagecache_delete_page(PAGECACHE *pagecache, + PAGECACHE_FILE *file, + maria_page_no_t pageno, + enum pagecache_page_lock lock, + my_bool flush) +{ + int error= 0; + enum pagecache_page_pin pin= lock_to_pin[lock]; + DBUG_ENTER("pagecache_delete_page"); + DBUG_PRINT("enter", ("fd: %u page: %lu l%s p%s", + (uint) file->file, (ulong) pageno, + page_cache_page_lock_str[lock], + page_cache_page_pin_str[pin])); + DBUG_ASSERT(lock == PAGECACHE_LOCK_WRITE || + lock == PAGECACHE_LOCK_LEFT_WRITELOCKED); + +restart: + + if (pagecache->can_be_used) + { + /* Key cache is used */ + reg1 PAGECACHE_BLOCK_LINK *block; + PAGECACHE_HASH_LINK **unused_start, *link; + + pagecache_pthread_mutex_lock(&pagecache->cache_lock); + if (!pagecache->can_be_used) + goto end; + + inc_counter_for_resize_op(pagecache); + link= get_present_hash_link(pagecache, file, pageno, &unused_start); + if (!link) + { + DBUG_PRINT("info", ("There is no fuch page in the cache")); + DBUG_RETURN(0); + } + block= link->block; + DBUG_ASSERT(block != 0); + if (pagecache_make_lock_and_pin(pagecache, block, lock, pin)) + { + /* + We failed to writelock the block, cache is unlocked, and last write + lock is released, we will try to get the block again. + */ + pagecache_pthread_mutex_unlock(&pagecache->cache_lock); + goto restart; + } + + if (block->status & BLOCK_CHANGED && flush) + { + if (flush) + { + /* The block contains a dirty page - push it out of the cache */ + + KEYCACHE_DBUG_PRINT("find_key_block", ("block is dirty")); + + pagecache_pthread_mutex_unlock(&pagecache->cache_lock); + /* + The call is thread safe because only the current + thread might change the block->hash_link value + */ + DBUG_ASSERT(block->pins == 1); + error= pagecache_fwrite(pagecache, + &block->hash_link->file, + block->buffer, + block->hash_link->pageno, + block->type, + MYF(MY_NABP | MY_WAIT_IF_FULL)); + pagecache_pthread_mutex_lock(&pagecache->cache_lock); + pagecache->global_cache_write++; + if (error) + { + block->status|= BLOCK_ERROR; + goto err; + } + } + pagecache->blocks_changed--; + pagecache->global_blocks_changed--; + + } + /* Cache is locked, so we can relese page before freeing it */ + pagecache_make_lock_and_pin(pagecache, block, + PAGECACHE_LOCK_WRITE_UNLOCK, + PAGECACHE_UNPIN); + if (pin == PAGECACHE_PIN_LEFT_PINNED) + unreg_request(pagecache, block, 1); + free_block(pagecache, block); + +err: + dec_counter_for_resize_op(pagecache); +end: + pagecache_pthread_mutex_unlock(&pagecache->cache_lock); + } + + DBUG_RETURN(error); +} + + +/* + Write a buffer into a cached file. + + SYNOPSIS + + pagecache_write() + pagecache pointer to a page cache data structure + file handler for the file to write data to + pageno number of the block of data in the file + level determines the weight of the data + buff buffer to where the data must be placed + type type of the page + lock lock change + pin pin page + write_mode how to write page + link link to the page if we pin it + + RETURN VALUE + 0 if a success, 1 - otherwise. +*/ + +struct write_lock_change +{ + int need_lock_change; + enum pagecache_page_lock new_lock; + enum pagecache_page_lock unlock_lock; +}; + +static struct write_lock_change write_lock_change_table[]= +{ + {1, + PAGECACHE_LOCK_WRITE, + PAGECACHE_LOCK_WRITE_UNLOCK} /*PAGECACHE_LOCK_LEFT_UNLOCKED*/, + {0, /*unsupported*/ + PAGECACHE_LOCK_LEFT_UNLOCKED, + PAGECACHE_LOCK_LEFT_UNLOCKED} /*PAGECACHE_LOCK_LEFT_READLOCKED*/, + {0, PAGECACHE_LOCK_LEFT_WRITELOCKED, 0} /*PAGECACHE_LOCK_LEFT_WRITELOCKED*/, + {1, + PAGECACHE_LOCK_WRITE, + PAGECACHE_LOCK_WRITE_TO_READ} /*PAGECACHE_LOCK_READ*/, + {0, PAGECACHE_LOCK_WRITE, 0} /*PAGECACHE_LOCK_WRITE*/, + {0, /*unsupported*/ + PAGECACHE_LOCK_LEFT_UNLOCKED, + PAGECACHE_LOCK_LEFT_UNLOCKED} /*PAGECACHE_LOCK_READ_UNLOCK*/, + {1, + PAGECACHE_LOCK_LEFT_WRITELOCKED, + PAGECACHE_LOCK_WRITE_UNLOCK } /*PAGECACHE_LOCK_WRITE_UNLOCK*/, + {1, + PAGECACHE_LOCK_LEFT_WRITELOCKED, + PAGECACHE_LOCK_WRITE_TO_READ}/*PAGECACHE_LOCK_WRITE_TO_READ*/ +}; + +struct write_pin_change +{ + enum pagecache_page_pin new_pin; + enum pagecache_page_pin unlock_pin; +}; + +static struct write_pin_change write_pin_change_table[]= +{ + {PAGECACHE_PIN_LEFT_PINNED, + PAGECACHE_PIN_LEFT_PINNED} /*PAGECACHE_PIN_LEFT_PINNED*/, + {PAGECACHE_PIN, + PAGECACHE_UNPIN} /*PAGECACHE_PIN_LEFT_UNPINNED*/, + {PAGECACHE_PIN, + PAGECACHE_PIN_LEFT_PINNED} /*PAGECACHE_PIN*/, + {PAGECACHE_PIN_LEFT_PINNED, + PAGECACHE_UNPIN} /*PAGECACHE_UNPIN*/ +}; + +my_bool pagecache_write(PAGECACHE *pagecache, + PAGECACHE_FILE *file, + maria_page_no_t pageno, + uint level, + byte *buff, + enum pagecache_page_type type, + enum pagecache_page_lock lock, + enum pagecache_page_pin pin, + enum pagecache_write_mode write_mode, + PAGECACHE_PAGE_LINK *link) +{ + reg1 PAGECACHE_BLOCK_LINK *block; + PAGECACHE_PAGE_LINK fake_link; + int error= 0; + int need_lock_change= write_lock_change_table[lock].need_lock_change; + DBUG_ENTER("pagecache_write"); + DBUG_PRINT("enter", ("fd: %u page: %lu level: %u t:%s l%s p%s m%s", + (uint) file->file, (ulong) pageno, level, + page_cache_page_type_str[type], + page_cache_page_lock_str[lock], + page_cache_page_pin_str[pin], + page_cache_page_write_mode_str[write_mode])); + DBUG_ASSERT(lock != PAGECACHE_LOCK_LEFT_READLOCKED && + lock != PAGECACHE_LOCK_READ_UNLOCK); + if (!link) + link= &fake_link; + else + *link= 0; + + if (write_mode == PAGECACHE_WRITE_NOW) + { + /* we allow direct write if wwe do not use long term lockings */ + DBUG_ASSERT(lock == PAGECACHE_LOCK_LEFT_UNLOCKED); + /* Force writing from buff into disk */ + pagecache->global_cache_write++; + if (pagecache_fwrite(pagecache, file, buff, pageno, type, + MYF(MY_NABP | MY_WAIT_IF_FULL))) + DBUG_RETURN(1); + } +restart: + +#if !defined(DBUG_OFF) && defined(EXTRA_DEBUG) + DBUG_EXECUTE("check_pagecache", + test_key_cache(pagecache, "start of key_cache_write", 1);); +#endif + + if (pagecache->can_be_used) + { + /* Key cache is used */ + int page_st; + + pagecache_pthread_mutex_lock(&pagecache->cache_lock); + if (!pagecache->can_be_used) + { + pagecache_pthread_mutex_unlock(&pagecache->cache_lock); + goto no_key_cache; + } + + inc_counter_for_resize_op(pagecache); + pagecache->global_cache_w_requests++; + block= find_key_block(pagecache, file, pageno, level, + (write_mode == PAGECACHE_WRITE_DONE ? 0 : 1), + (((pin == PAGECACHE_PIN_LEFT_PINNED) || + (pin == PAGECACHE_UNPIN)) ? 0 : 1), + &page_st); + if (!block) + { + DBUG_ASSERT(write_mode != PAGECACHE_WRITE_DONE); + /* It happens only for requests submitted during resize operation */ + dec_counter_for_resize_op(pagecache); + pagecache_pthread_mutex_unlock(&pagecache->cache_lock); + /* Write to the disk key cache is in resize at the moment*/ + goto no_key_cache; + } + + DBUG_ASSERT(block->type == PAGECACHE_EMPTY_PAGE || + block->type == type); + block->type= type; + + if (pagecache_make_lock_and_pin(pagecache, block, + write_lock_change_table[lock].new_lock, + (need_lock_change ? + write_pin_change_table[pin].new_pin : + pin))) + { + /* + We failed to writelock the block, cache is unlocked, and last write + lock is released, we will try to get the block again. + */ + pagecache_pthread_mutex_unlock(&pagecache->cache_lock); + goto restart; + } + + + if (write_mode == PAGECACHE_WRITE_DONE) + { + if (block->status != BLOCK_ERROR && page_st != PAGE_READ) + { + /* Copy data from buff */ + bmove512(block->buffer, buff, pagecache->block_size); + block->status= (BLOCK_READ | (block->status & BLOCK_WRLOCK)); + KEYCACHE_DBUG_PRINT("key_cache_insert", + ("primary request: new page in cache")); + /* Signal that all pending requests for this now can be processed. */ + if (block->wqueue[COND_FOR_REQUESTED].last_thread) + release_queue(&block->wqueue[COND_FOR_REQUESTED]); + } + } + else + { + if (write_mode == PAGECACHE_WRITE_NOW) + { + /* buff has been written to disk at start */ + if (block->status & BLOCK_CHANGED) + link_to_file_list(pagecache, block, &block->hash_link->file, 1); + } + else + { + if (! (block->status & BLOCK_CHANGED)) + link_to_changed_list(pagecache, block); + } + if (! (block->status & BLOCK_ERROR)) + { + bmove512(block->buffer, buff, pagecache->block_size); + } + block->status|= BLOCK_READ; + } + + + if (need_lock_change) + { +#ifndef DBUG_OFF + int rc= +#endif + pagecache_make_lock_and_pin(pagecache, block, + write_lock_change_table[lock].unlock_lock, + write_pin_change_table[pin].unlock_pin); +#ifndef DBUG_OFF + DBUG_ASSERT(rc == 0); +#endif + } + + /* Unregister the request */ + block->hash_link->requests--; + if (pin != PAGECACHE_PIN_LEFT_PINNED && pin != PAGECACHE_PIN) + unreg_request(pagecache, block, 1); + else + *link= (PAGECACHE_PAGE_LINK)block; + + + if (block->status & BLOCK_ERROR) + error= 1; + + dec_counter_for_resize_op(pagecache); + + pagecache_pthread_mutex_unlock(&pagecache->cache_lock); + + goto end; + } + +no_key_cache: + /* Key cache is not used */ + if (write_mode == PAGECACHE_WRITE_DELAY) + { + pagecache->global_cache_w_requests++; + pagecache->global_cache_write++; + if (pagecache_fwrite(pagecache, file, (byte*) buff, pageno, type, + MYF(MY_NABP | MY_WAIT_IF_FULL))) + error=1; + } + +end: +#if !defined(DBUG_OFF) && defined(EXTRA_DEBUG) + DBUG_EXECUTE("exec", + test_key_cache(pagecache, "end of key_cache_write", 1);); +#endif + DBUG_RETURN(error); +} + + +/* + Free block: remove reference to it from hash table, + remove it from the chain file of dirty/clean blocks + and add it to the free list. +*/ + +static void free_block(PAGECACHE *pagecache, PAGECACHE_BLOCK_LINK *block) +{ + KEYCACHE_THREAD_TRACE("free block"); + KEYCACHE_DBUG_PRINT("free_block", + ("block %u to be freed, hash_link %p", + BLOCK_NUMBER(pagecache, block), block->hash_link)); + if (block->hash_link) + { + /* + While waiting for readers to finish, new readers might request the + block. But since we set block->status|= BLOCK_REASSIGNED, they + will wait on block->wqueue[COND_FOR_SAVED]. They must be signalled + later. + */ + block->status|= BLOCK_REASSIGNED; + wait_for_readers(pagecache, block); + unlink_hash(pagecache, block->hash_link); + } + + unlink_changed(block); + DBUG_ASSERT((block->status & BLOCK_WRLOCK) == 0); + block->status= 0; +#ifndef DBUG_OFF + block->type= PAGECACHE_EMPTY_PAGE; +#endif + KEYCACHE_THREAD_TRACE("free block"); + KEYCACHE_DBUG_PRINT("free_block", + ("block is freed")); + unreg_request(pagecache, block, 0); + block->hash_link= NULL; + + /* Remove the free block from the LRU ring. */ + unlink_block(pagecache, block); + if (block->temperature == BLOCK_WARM) + pagecache->warm_blocks--; + block->temperature= BLOCK_COLD; + /* Insert the free block in the free list. */ + block->next_used= pagecache->free_block_list; + pagecache->free_block_list= block; + /* Keep track of the number of currently unused blocks. */ + pagecache->blocks_unused++; + + /* All pending requests for this page must be resubmitted. */ + if (block->wqueue[COND_FOR_SAVED].last_thread) + release_queue(&block->wqueue[COND_FOR_SAVED]); +} + + +static int cmp_sec_link(PAGECACHE_BLOCK_LINK **a, PAGECACHE_BLOCK_LINK **b) +{ + return (((*a)->hash_link->pageno < (*b)->hash_link->pageno) ? -1 : + ((*a)->hash_link->pageno > (*b)->hash_link->pageno) ? 1 : 0); +} + + +/* + Flush a portion of changed blocks to disk, + free used blocks if requested +*/ + +static int flush_cached_blocks(PAGECACHE *pagecache, + PAGECACHE_FILE *file, + PAGECACHE_BLOCK_LINK **cache, + PAGECACHE_BLOCK_LINK **end, + enum flush_type type) +{ + int error; + int last_errno= 0; + uint count= (uint) (end-cache); + DBUG_ENTER("flush_cached_blocks"); + + /* Don't lock the cache during the flush */ + pagecache_pthread_mutex_unlock(&pagecache->cache_lock); + /* + As all blocks referred in 'cache' are marked by BLOCK_IN_FLUSH + we are guarunteed no thread will change them + */ + qsort((byte*) cache, count, sizeof(*cache), (qsort_cmp) cmp_sec_link); + + pagecache_pthread_mutex_lock(&pagecache->cache_lock); + for (; cache != end; cache++) + { + PAGECACHE_BLOCK_LINK *block= *cache; + + if (block->pins) + { + KEYCACHE_DBUG_PRINT("flush_cached_blocks", + ("block %u (0x%lx) pinned", + BLOCK_NUMBER(pagecache, block), (ulong)block)); + DBUG_PRINT("info", ("block %u (0x%lx) pinned", + BLOCK_NUMBER(pagecache, block), (ulong)block)); + BLOCK_INFO(block); + last_errno= -1; + unreg_request(pagecache, block, 1); + continue; + } + /* if the block is not pinned then it is not write locked */ + DBUG_ASSERT((block->status & BLOCK_WRLOCK) == 0); +#ifndef DBUG_OFF + { + int rc= +#endif + pagecache_make_lock_and_pin(pagecache, block, + PAGECACHE_LOCK_WRITE, PAGECACHE_PIN); +#ifndef DBUG_OFF + DBUG_ASSERT(rc == 0); + } +#endif + + KEYCACHE_DBUG_PRINT("flush_cached_blocks", + ("block %u (0x%lx) to be flushed", + BLOCK_NUMBER(pagecache, block), (ulong)block)); + DBUG_PRINT("info", ("block %u (0x%lx) to be flushed", + BLOCK_NUMBER(pagecache, block), (ulong)block)); + BLOCK_INFO(block); + pagecache_pthread_mutex_unlock(&pagecache->cache_lock); + DBUG_PRINT("info", ("block %u (0x%lx) pins: %u", + BLOCK_NUMBER(pagecache, block), (ulong)block, + block->pins)); + DBUG_ASSERT(block->pins == 1); + error= pagecache_fwrite(pagecache, file, + block->buffer, + block->hash_link->pageno, + block->type, + MYF(MY_NABP | MY_WAIT_IF_FULL)); + pagecache_pthread_mutex_lock(&pagecache->cache_lock); + + pagecache_make_lock_and_pin(pagecache, block, + PAGECACHE_LOCK_WRITE_UNLOCK, + PAGECACHE_UNPIN); + + pagecache->global_cache_write++; + if (error) + { + block->status|= BLOCK_ERROR; + if (!last_errno) + last_errno= errno ? errno : -1; + } + /* + Let to proceed for possible waiting requests to write to the block page. + It might happen only during an operation to resize the key cache. + */ + if (block->wqueue[COND_FOR_SAVED].last_thread) + release_queue(&block->wqueue[COND_FOR_SAVED]); + /* type will never be FLUSH_IGNORE_CHANGED here */ + if (! (type == FLUSH_KEEP || type == FLUSH_FORCE_WRITE)) + { + pagecache->blocks_changed--; + pagecache->global_blocks_changed--; + free_block(pagecache, block); + } + else + { + block->status&= ~BLOCK_IN_FLUSH; + link_to_file_list(pagecache, block, file, 1); + unreg_request(pagecache, block, 1); + } + } + DBUG_RETURN(last_errno); +} + + +/* + flush all key blocks for a file to disk, but don't do any mutex locks + + flush_pagecache_blocks_int() + pagecache pointer to a key cache data structure + file handler for the file to flush to + flush_type type of the flush + + NOTES + This function doesn't do any mutex locks because it needs to be called + both from flush_pagecache_blocks and flush_all_key_blocks (the later one + does the mutex lock in the resize_pagecache() function). + + RETURN + 0 ok + 1 error +*/ + +static int flush_pagecache_blocks_int(PAGECACHE *pagecache, + PAGECACHE_FILE *file, + enum flush_type type) +{ + PAGECACHE_BLOCK_LINK *cache_buff[FLUSH_CACHE],**cache; + int last_errno= 0; + DBUG_ENTER("flush_pagecache_blocks_int"); + DBUG_PRINT("enter",("file: %d blocks_used: %d blocks_changed: %d", + file->file, pagecache->blocks_used, pagecache->blocks_changed)); + +#if !defined(DBUG_OFF) && defined(EXTRA_DEBUG) + DBUG_EXECUTE("check_pagecache", + test_key_cache(pagecache, + "start of flush_pagecache_blocks", 0);); +#endif + + cache= cache_buff; + if (pagecache->disk_blocks > 0 && + (!my_disable_flush_pagecache_blocks || type != FLUSH_KEEP)) + { + /* Key cache exists and flush is not disabled */ + int error= 0; + uint count= 0; + PAGECACHE_BLOCK_LINK **pos, **end; + PAGECACHE_BLOCK_LINK *first_in_switch= NULL; + PAGECACHE_BLOCK_LINK *block, *next; +#if defined(PAGECACHE_DEBUG) + uint cnt= 0; +#endif + + if (type != FLUSH_IGNORE_CHANGED) + { + /* + Count how many key blocks we have to cache to be able + to flush all dirty pages with minimum seek moves + */ + for (block= pagecache->changed_blocks[FILE_HASH(*file)] ; + block; + block= block->next_changed) + { + if (block->hash_link->file.file == file->file) + { + count++; + KEYCACHE_DBUG_ASSERT(count<= pagecache->blocks_used); + } + } + /* Allocate a new buffer only if its bigger than the one we have */ + if (count > FLUSH_CACHE && + !(cache= + (PAGECACHE_BLOCK_LINK**) + my_malloc(sizeof(PAGECACHE_BLOCK_LINK*)*count, MYF(0)))) + { + cache= cache_buff; + count= FLUSH_CACHE; + } + } + + /* Retrieve the blocks and write them to a buffer to be flushed */ +restart: + end= (pos= cache)+count; + for (block= pagecache->changed_blocks[FILE_HASH(*file)] ; + block; + block= next) + { +#if defined(PAGECACHE_DEBUG) + cnt++; + KEYCACHE_DBUG_ASSERT(cnt <= pagecache->blocks_used); +#endif + next= block->next_changed; + if (block->hash_link->file.file == file->file) + { + /* + Mark the block with BLOCK_IN_FLUSH in order not to let + other threads to use it for new pages and interfere with + our sequence ot flushing dirty file pages + */ + block->status|= BLOCK_IN_FLUSH; + + if (! (block->status & BLOCK_IN_SWITCH)) + { + /* + We care only for the blocks for which flushing was not + initiated by other threads as a result of page swapping + */ + reg_requests(pagecache, block, 1); + if (type != FLUSH_IGNORE_CHANGED) + { + /* It's not a temporary file */ + if (pos == end) + { + /* + This happens only if there is not enough + memory for the big block + */ + if ((error= flush_cached_blocks(pagecache, file, cache, + end,type))) + last_errno=error; + /* + Restart the scan as some other thread might have changed + the changed blocks chain: the blocks that were in switch + state before the flush started have to be excluded + */ + goto restart; + } + *pos++= block; + } + else + { + /* It's a temporary file */ + pagecache->blocks_changed--; + pagecache->global_blocks_changed--; + free_block(pagecache, block); + } + } + else + { + /* Link the block into a list of blocks 'in switch' */ + unlink_changed(block); + link_changed(block, &first_in_switch); + } + } + } + if (pos != cache) + { + if ((error= flush_cached_blocks(pagecache, file, cache, pos, type))) + last_errno= error; + } + /* Wait until list of blocks in switch is empty */ + while (first_in_switch) + { +#if defined(PAGECACHE_DEBUG) + cnt= 0; +#endif + block= first_in_switch; + { +#ifdef THREAD + struct st_my_thread_var *thread= my_thread_var; + add_to_queue(&block->wqueue[COND_FOR_SAVED], thread); + do + { + KEYCACHE_DBUG_PRINT("flush_pagecache_blocks_int: wait", + ("suspend thread %ld", thread->id)); + pagecache_pthread_cond_wait(&thread->suspend, + &pagecache->cache_lock); + } + while (thread->next); +#else + KEYCACHE_DBUG_ASSERT(0); + /* No parallel requests in single-threaded case */ +#endif + } +#if defined(PAGECACHE_DEBUG) + cnt++; + KEYCACHE_DBUG_ASSERT(cnt <= pagecache->blocks_used); +#endif + } + /* The following happens very seldom */ + if (! (type == FLUSH_KEEP || type == FLUSH_FORCE_WRITE)) + { +#if defined(PAGECACHE_DEBUG) + cnt=0; +#endif + for (block= pagecache->file_blocks[FILE_HASH(*file)] ; + block; + block= next) + { +#if defined(PAGECACHE_DEBUG) + cnt++; + KEYCACHE_DBUG_ASSERT(cnt <= pagecache->blocks_used); +#endif + next= block->next_changed; + if (block->hash_link->file.file == file->file && + (! (block->status & BLOCK_CHANGED) + || type == FLUSH_IGNORE_CHANGED)) + { + reg_requests(pagecache, block, 1); + free_block(pagecache, block); + } + } + } + } + +#ifndef DBUG_OFF + DBUG_EXECUTE("check_pagecache", + test_key_cache(pagecache, "end of flush_pagecache_blocks", 0);); +#endif + if (cache != cache_buff) + my_free((gptr) cache, MYF(0)); + if (last_errno) + errno=last_errno; /* Return first error */ + DBUG_RETURN(last_errno != 0); +} + + +/* + Flush all blocks for a file to disk + + SYNOPSIS + + flush_pagecache_blocks() + pagecache pointer to a page cache data structure + file handler for the file to flush to + flush_type type of the flush + + RETURN + 0 ok + 1 error +*/ + +int flush_pagecache_blocks(PAGECACHE *pagecache, + PAGECACHE_FILE *file, enum flush_type type) +{ + int res; + DBUG_ENTER("flush_pagecache_blocks"); + DBUG_PRINT("enter", ("pagecache: 0x%lx", pagecache)); + + if (pagecache->disk_blocks <= 0) + DBUG_RETURN(0); + pagecache_pthread_mutex_lock(&pagecache->cache_lock); + inc_counter_for_resize_op(pagecache); + res= flush_pagecache_blocks_int(pagecache, file, type); + dec_counter_for_resize_op(pagecache); + pagecache_pthread_mutex_unlock(&pagecache->cache_lock); + DBUG_RETURN(res); +} + + +/* + Flush all blocks in the key cache to disk +*/ + +static int flush_all_key_blocks(PAGECACHE *pagecache) +{ +#if defined(PAGECACHE_DEBUG) + uint cnt=0; +#endif + while (pagecache->blocks_changed > 0) + { + PAGECACHE_BLOCK_LINK *block; + for (block= pagecache->used_last->next_used ; ; block=block->next_used) + { + if (block->hash_link) + { +#if defined(PAGECACHE_DEBUG) + cnt++; + KEYCACHE_DBUG_ASSERT(cnt <= pagecache->blocks_used); +#endif + if (flush_pagecache_blocks_int(pagecache, &block->hash_link->file, + FLUSH_RELEASE)) + return 1; + break; + } + if (block == pagecache->used_last) + break; + } + } + return 0; +} + + +/* + Reset the counters of a key cache. + + SYNOPSIS + reset_key_cache_counters() + name the name of a key cache + key_cache pointer to the key kache to be reset + + DESCRIPTION + This procedure is used by process_key_caches() to reset the counters of all + currently used key caches, both the default one and the named ones. + + RETURN + 0 on success (always because it can't fail) +*/ + +int reset_key_cache_counters(const char *name, PAGECACHE *key_cache) +{ + DBUG_ENTER("reset_key_cache_counters"); + if (!key_cache->inited) + { + DBUG_PRINT("info", ("Key cache %s not initialized.", name)); + DBUG_RETURN(0); + } + DBUG_PRINT("info", ("Resetting counters for key cache %s.", name)); + + key_cache->global_blocks_changed= 0; /* Key_blocks_not_flushed */ + key_cache->global_cache_r_requests= 0; /* Key_read_requests */ + key_cache->global_cache_read= 0; /* Key_reads */ + key_cache->global_cache_w_requests= 0; /* Key_write_requests */ + key_cache->global_cache_write= 0; /* Key_writes */ + DBUG_RETURN(0); +} + + +#ifndef DBUG_OFF +/* + Test if disk-cache is ok +*/ +static void test_key_cache(PAGECACHE *pagecache __attribute__((unused)), + const char *where __attribute__((unused)), + my_bool lock __attribute__((unused))) +{ + /* TODO */ +} +#endif + +#if defined(PAGECACHE_TIMEOUT) + +#define KEYCACHE_DUMP_FILE "pagecache_dump.txt" +#define MAX_QUEUE_LEN 100 + + +static void pagecache_dump(PAGECACHE *pagecache) +{ + FILE *pagecache_dump_file=fopen(KEYCACHE_DUMP_FILE, "w"); + struct st_my_thread_var *last; + struct st_my_thread_var *thread; + PAGECACHE_BLOCK_LINK *block; + PAGECACHE_HASH_LINK *hash_link; + PAGECACHE_PAGE *page; + uint i; + + fprintf(pagecache_dump_file, "thread:%u\n", thread->id); + + i=0; + thread=last=waiting_for_hash_link.last_thread; + fprintf(pagecache_dump_file, "queue of threads waiting for hash link\n"); + if (thread) + do + { + thread= thread->next; + page= (PAGECACHE_PAGE *) thread->opt_info; + fprintf(pagecache_dump_file, + "thread:%u, (file,pageno)=(%u,%lu)\n", + thread->id,(uint) page->file.file,(ulong) page->pageno); + if (++i == MAX_QUEUE_LEN) + break; + } + while (thread != last); + + i=0; + thread=last=waiting_for_block.last_thread; + fprintf(pagecache_dump_file, "queue of threads waiting for block\n"); + if (thread) + do + { + thread=thread->next; + hash_link= (PAGECACHE_HASH_LINK *) thread->opt_info; + fprintf(pagecache_dump_file, + "thread:%u hash_link:%u (file,pageno)=(%u,%lu)\n", + thread->id, (uint) PAGECACHE_HASH_LINK_NUMBER(pagecache, hash_link), + (uint) hash_link->file.file,(ulong) hash_link->pageno); + if (++i == MAX_QUEUE_LEN) + break; + } + while (thread != last); + + for (i=0 ; i < pagecache->blocks_used ; i++) + { + int j; + block= &pagecache->block_root[i]; + hash_link= block->hash_link; + fprintf(pagecache_dump_file, + "block:%u hash_link:%d status:%x #requests=%u waiting_for_readers:%d\n", + i, (int) (hash_link ? + PAGECACHE_HASH_LINK_NUMBER(pagecache, hash_link) : + -1), + block->status, block->requests, block->condvar ? 1 : 0); + for (j=0 ; j < COND_SIZE; j++) + { + PAGECACHE_WQUEUE *wqueue=&block->wqueue[j]; + thread= last= wqueue->last_thread; + fprintf(pagecache_dump_file, "queue #%d\n", j); + if (thread) + { + do + { + thread=thread->next; + fprintf(pagecache_dump_file, + "thread:%u\n", thread->id); + if (++i == MAX_QUEUE_LEN) + break; + } + while (thread != last); + } + } + } + fprintf(pagecache_dump_file, "LRU chain:"); + block= pagecache= used_last; + if (block) + { + do + { + block= block->next_used; + fprintf(pagecache_dump_file, + "block:%u, ", BLOCK_NUMBER(pagecache, block)); + } + while (block != pagecache->used_last); + } + fprintf(pagecache_dump_file, "\n"); + + fclose(pagecache_dump_file); +} + +#endif /* defined(PAGECACHE_TIMEOUT) */ + +#if defined(PAGECACHE_TIMEOUT) && !defined(__WIN__) + + +static int pagecache_pthread_cond_wait(pthread_cond_t *cond, + pthread_mutex_t *mutex) +{ + int rc; + struct timeval now; /* time when we started waiting */ + struct timespec timeout; /* timeout value for the wait function */ + struct timezone tz; +#if defined(PAGECACHE_DEBUG) + int cnt=0; +#endif + + /* Get current time */ + gettimeofday(&now, &tz); + /* Prepare timeout value */ + timeout.tv_sec= now.tv_sec + PAGECACHE_TIMEOUT; + /* + timeval uses microseconds. + timespec uses nanoseconds. + 1 nanosecond = 1000 micro seconds + */ + timeout.tv_nsec= now.tv_usec * 1000; + KEYCACHE_THREAD_TRACE_END("started waiting"); +#if defined(PAGECACHE_DEBUG) + cnt++; + if (cnt % 100 == 0) + fprintf(pagecache_debug_log, "waiting...\n"); + fflush(pagecache_debug_log); +#endif + rc= pthread_cond_timedwait(cond, mutex, &timeout); + KEYCACHE_THREAD_TRACE_BEGIN("finished waiting"); + if (rc == ETIMEDOUT || rc == ETIME) + { +#if defined(PAGECACHE_DEBUG) + fprintf(pagecache_debug_log,"aborted by pagecache timeout\n"); + fclose(pagecache_debug_log); + abort(); +#endif + pagecache_dump(); + } + +#if defined(PAGECACHE_DEBUG) + KEYCACHE_DBUG_ASSERT(rc != ETIMEDOUT); +#else + assert(rc != ETIMEDOUT); +#endif + return rc; +} +#else +#if defined(PAGECACHE_DEBUG) +static int pagecache_pthread_cond_wait(pthread_cond_t *cond, + pthread_mutex_t *mutex) +{ + int rc; + KEYCACHE_THREAD_TRACE_END("started waiting"); + rc= pthread_cond_wait(cond, mutex); + KEYCACHE_THREAD_TRACE_BEGIN("finished waiting"); + return rc; +} +#endif +#endif /* defined(PAGECACHE_TIMEOUT) && !defined(__WIN__) */ + +#if defined(PAGECACHE_DEBUG) +static int ___pagecache_pthread_mutex_lock(pthread_mutex_t *mutex) +{ + int rc; + rc= pthread_mutex_lock(mutex); + KEYCACHE_THREAD_TRACE_BEGIN(""); + return rc; +} + + +static void ___pagecache_pthread_mutex_unlock(pthread_mutex_t *mutex) +{ + KEYCACHE_THREAD_TRACE_END(""); + pthread_mutex_unlock(mutex); + return; +} + + +static int ___pagecache_pthread_cond_signal(pthread_cond_t *cond) +{ + int rc; + KEYCACHE_THREAD_TRACE("signal"); + rc= pthread_cond_signal(cond); + return rc; +} + + +#if defined(PAGECACHE_DEBUG_LOG) + + +static void pagecache_debug_print(const char * fmt, ...) +{ + va_list args; + va_start(args,fmt); + if (pagecache_debug_log) + { + VOID(vfprintf(pagecache_debug_log, fmt, args)); + VOID(fputc('\n',pagecache_debug_log)); + } + va_end(args); +} +#endif /* defined(PAGECACHE_DEBUG_LOG) */ + +#if defined(PAGECACHE_DEBUG_LOG) + + +void pagecache_debug_log_close(void) +{ + if (pagecache_debug_log) + fclose(pagecache_debug_log); +} +#endif /* defined(PAGECACHE_DEBUG_LOG) */ + +#endif /* defined(PAGECACHE_DEBUG) */ diff --git a/mysys/test_file.c b/mysys/test_file.c new file mode 100644 index 00000000000..7ffca48023d --- /dev/null +++ b/mysys/test_file.c @@ -0,0 +1,70 @@ +#include "mysys_priv.h" +#include "my_dir.h" +#include +#include +#include +#include "test_file.h" + + +/* + Check that file contance correspond to descriptor + + SYNOPSIS + test_file() + file File to test + file_name Path (and name) of file which is tested + size size of file + buff_size size of buffer which is enought to check the file + desc file descriptor to check with + + RETURN + 1 file if OK + 0 error +*/ + +int test_file(PAGECACHE_FILE file, char *file_name, + off_t size, size_t buff_size, struct file_desc *desc) +{ + MY_STAT stat_buff, *stat; + unsigned char *buffr= malloc(buff_size); + off_t pos= 0; + size_t byte; + int step= 0; + + if ((stat= my_stat(file_name, &stat_buff, MYF(0))) == NULL) + { + diag("Can't stat() %s (errno: %d)\n", file_name, errno); + return 0; + } + if (stat->st_size != size) + { + diag("file %s size is %lu (should be %lu)\n", + file_name, (ulong) stat->st_size, (ulong) size); + return 0; + } + /* check content */ + my_seek(file.file, 0, SEEK_SET, MYF(0)); + while (desc[step].length != 0) + { + if (my_read(file.file, (char*)buffr, desc[step].length, MYF(0)) != + desc[step].length) + { + diag("Can't read %u bytes from %s (errno: %d)\n", + (uint)desc[step].length, file_name, errno); + return 0; + } + for (byte= 0; byte < desc[step].length; byte++) + { + if (buffr[byte] != desc[step].content) + { + diag("content of %s mismatch 0x%x in position %lu instead of 0x%x\n", + file_name, (uint) buffr[byte], (ulong) (pos + byte), + desc[step].content); + return 0; + } + } + pos+= desc[step].length; + step++; + } + return 1; +} diff --git a/mysys/test_file.h b/mysys/test_file.h new file mode 100644 index 00000000000..ea787c123ed --- /dev/null +++ b/mysys/test_file.h @@ -0,0 +1,14 @@ + +#include + +/* + File content descriptor +*/ +struct file_desc +{ + unsigned int length; + unsigned char content; +}; + +int test_file(PAGECACHE_FILE file, char *file_name, + off_t size, size_t buff_size, struct file_desc *desc); diff --git a/mysys/test_pagecache_consist.c b/mysys/test_pagecache_consist.c new file mode 100755 index 00000000000..1cc54af2460 --- /dev/null +++ b/mysys/test_pagecache_consist.c @@ -0,0 +1,447 @@ +#include "mysys_priv.h" +#include "../include/my_pthread.h" +#include "../include/pagecache.h" +#include +#include "my_dir.h" +#include +#include +#include +#include +#include "../unittest/mytap/tap.h" +#include + +/*#define PAGE_SIZE 65536*/ +#define PCACHE_SIZE (PAGE_SIZE*1024*20) + +#ifndef DBUG_OFF +static const char* default_dbug_option; +#endif + +static char *file1_name= (char*)"page_cache_test_file_1"; +static PAGECACHE_FILE file1; +static pthread_cond_t COND_thread_count; +static pthread_mutex_t LOCK_thread_count; +static uint thread_count; +static PAGECACHE pagecache; + +#ifdef TEST_HIGH_CONCURENCY +static uint number_of_readers= 10; +static uint number_of_writers= 20; +static uint number_of_tests= 30000; +static uint record_length_limit= PAGE_SIZE/200; +static uint number_of_pages= 20; +static uint flush_divider= 1000; +#else /*TEST_HIGH_CONCURENCY*/ +#ifdef TEST_READERS +static uint number_of_readers= 10; +static uint number_of_writers= 1; +static uint number_of_tests= 30000; +static uint record_length_limit= PAGE_SIZE/200; +static uint number_of_pages= 20; +static uint flush_divider= 1000; +#else /*TEST_READERS*/ +#ifdef TEST_WRITERS +static uint number_of_readers= 0; +static uint number_of_writers= 10; +static uint number_of_tests= 30000; +static uint record_length_limit= PAGE_SIZE/200; +static uint number_of_pages= 20; +static uint flush_divider= 1000; +#else /*TEST_WRITERS*/ +static uint number_of_readers= 10; +static uint number_of_writers= 10; +static uint number_of_tests= 50000; +static uint record_length_limit= PAGE_SIZE/200; +static uint number_of_pages= 20000; +static uint flush_divider= 1000; +#endif /*TEST_WRITERS*/ +#endif /*TEST_READERS*/ +#endif /*TEST_HIGH_CONCURENCY*/ + + +/* check page consistemcy */ +uint check_page(uchar *buff, ulong offset, int page_locked, int page_no, + int tag) +{ + uint end= sizeof(uint); + uint num= *((uint *)buff); + uint i; + DBUG_ENTER("check_page"); + + for (i= 0; i < num; i++) + { + uint len= *((uint *)(buff + end)); + uint j; + end+= sizeof(uint)+ sizeof(uint); + if (len + end > PAGE_SIZE) + { + diag("incorrect field header #%u by offset %lu\n", i, offset + end + j); + goto err; + } + for(j= 0; j < len; j++) + { + if (buff[end + j] != (uchar)((i+1) % 256)) + { + diag("incorrect %lu byte\n", offset + end + j); + goto err; + } + } + end+= len; + } + for(i= end; i < PAGE_SIZE; i++) + { + if (buff[i] != 0) + { + int h; + DBUG_PRINT("err", + ("byte %lu (%lu + %u), page %u (%s, end: %u, recs: %u, tag: %d) should be 0\n", + offset + i, offset, i, page_no, + (page_locked ? "locked" : "unlocked"), + end, num, tag)); + diag("byte %lu (%lu + %u), page %u (%s, end: %u, recs: %u, tag: %d) should be 0\n", + offset + i, offset, i, page_no, + (page_locked ? "locked" : "unlocked"), + end, num, tag); + h= my_open("wrong_page", O_CREAT | O_TRUNC | O_RDWR, MYF(0)); + my_pwrite(h, buff, PAGE_SIZE, 0, MYF(0)); + my_close(h, MYF(0)); + goto err; + } + } + DBUG_RETURN(end); +err: + DBUG_PRINT("err", ("try to flush")); + if (page_locked) + { + pagecache_delete_page(&pagecache, &file1, page_no, + PAGECACHE_LOCK_LEFT_WRITELOCKED, 1); + } + else + { + flush_pagecache_blocks(&pagecache, &file1, FLUSH_RELEASE); + } + exit(1); +} + +void put_rec(uchar *buff, uint end, uint len, uint tag) +{ + uint i; + uint num= *((uint *)buff); + if (!len) + len= 1; + if (end + sizeof(uint)*2 + len > PAGE_SIZE) + return; + *((uint *)(buff + end))= len; + end+= sizeof(uint); + *((uint *)(buff + end))= tag; + end+= sizeof(uint); + num++; + *((uint *)buff)= num; + *((uint*)(buff + end))= len; + for (i= end; i < (len + end); i++) + { + buff[i]= (uchar) num % 256; + } +} + +/* + Recreate and reopen a file for test + + SYNOPSIS + reset_file() + file File to reset + file_name Path (and name) of file which should be reset +*/ + +void reset_file(PAGECACHE_FILE file, char *file_name) +{ + flush_pagecache_blocks(&pagecache, &file1, FLUSH_RELEASE); + if (my_close(file1.file, MYF(0)) != 0) + { + diag("Got error during %s closing from close() (errno: %d)\n", + file_name, errno); + exit(1); + } + my_delete(file_name, MYF(0)); + if ((file.file= my_open(file_name, + O_CREAT | O_TRUNC | O_RDWR, MYF(0))) == -1) + { + diag("Got error during %s creation from open() (errno: %d)\n", + file_name, errno); + exit(1); + } +} + + +void reader(int num) +{ + unsigned char *buffr= malloc(PAGE_SIZE); + uint i; + + for (i= 0; i < number_of_tests; i++) + { + uint page= rand()/(RAND_MAX/number_of_pages); + pagecache_read(&pagecache, &file1, page, 3, (char*)buffr, + PAGECACHE_PLAIN_PAGE, + PAGECACHE_LOCK_LEFT_UNLOCKED, + 0); + check_page(buffr, page * PAGE_SIZE, 0, page, -num); + if (i % 500 == 0) + printf("reader%d: %d\n", num, i); + + } + printf("reader%d: done\n", num); + free(buffr); +} + + +void writer(int num) +{ + unsigned char *buffr= malloc(PAGE_SIZE); + uint i; + + for (i= 0; i < number_of_tests; i++) + { + uint end; + uint page= rand()/(RAND_MAX/number_of_pages); + pagecache_read(&pagecache, &file1, page, 3, (char*)buffr, + PAGECACHE_PLAIN_PAGE, + PAGECACHE_LOCK_WRITE, + 0); + end= check_page(buffr, page * PAGE_SIZE, 1, page, num); + put_rec(buffr, end, rand()/(RAND_MAX/record_length_limit), num); + pagecache_write(&pagecache, &file1, page, 3, (char*)buffr, + PAGECACHE_PLAIN_PAGE, + PAGECACHE_LOCK_WRITE_UNLOCK, + PAGECACHE_UNPIN, + PAGECACHE_WRITE_DELAY, + 0); + + if (i % flush_divider == 0) + flush_pagecache_blocks(&pagecache, &file1, FLUSH_FORCE_WRITE); + if (i % 500 == 0) + printf("writer%d: %d\n", num, i); + } + printf("writer%d: done\n", num); + free(buffr); +} + + +static void *test_thread_reader(void *arg) +{ + int param=*((int*) arg); + + my_thread_init(); + DBUG_ENTER("test_reader"); + DBUG_PRINT("enter", ("param: %d", param)); + + reader(param); + + DBUG_PRINT("info", ("Thread %s ended\n", my_thread_name())); + pthread_mutex_lock(&LOCK_thread_count); + thread_count--; + VOID(pthread_cond_signal(&COND_thread_count)); /* Tell main we are ready */ + pthread_mutex_unlock(&LOCK_thread_count); + free((gptr) arg); + my_thread_end(); + DBUG_RETURN(0); +} + +static void *test_thread_writer(void *arg) +{ + int param=*((int*) arg); + + my_thread_init(); + DBUG_ENTER("test_writer"); + DBUG_PRINT("enter", ("param: %d", param)); + + writer(param); + + DBUG_PRINT("info", ("Thread %s ended\n", my_thread_name())); + pthread_mutex_lock(&LOCK_thread_count); + thread_count--; + VOID(pthread_cond_signal(&COND_thread_count)); /* Tell main we are ready */ + pthread_mutex_unlock(&LOCK_thread_count); + free((gptr) arg); + my_thread_end(); + DBUG_RETURN(0); +} + +int main(int argc, char **argv __attribute__((unused))) +{ + pthread_t tid; + pthread_attr_t thr_attr; + int *param, error, pagen; + + MY_INIT(argv[0]); + +#ifndef DBUG_OFF +#if defined(__WIN__) + default_dbug_option= "d:t:i:O,\\test_pagecache_consist.trace"; +#else + default_dbug_option= "d:t:i:o,/tmp/test_pagecache_consist.trace"; +#endif + if (argc > 1) + { + DBUG_SET(default_dbug_option); + DBUG_SET_INITIAL(default_dbug_option); + } +#endif + + + DBUG_ENTER("main"); + DBUG_PRINT("info", ("Main thread: %s\n", my_thread_name())); + if ((file1.file= my_open(file1_name, + O_CREAT | O_TRUNC | O_RDWR, MYF(0))) == -1) + { + fprintf(stderr, "Got error during file1 creation from open() (errno: %d)\n", + errno); + exit(1); + } + DBUG_PRINT("info", ("file1: %d", file1.file)); + if (chmod(file1_name, S_IRWXU | S_IRWXG | S_IRWXO) != 0) + { + fprintf(stderr, "Got error during file1 chmod() (errno: %d)\n", + errno); + exit(1); + } + my_pwrite(file1.file, "test file", 9, 0, MYF(0)); + + if ((error= pthread_cond_init(&COND_thread_count, NULL))) + { + fprintf(stderr, "COND_thread_count: %d from pthread_cond_init (errno: %d)\n", + error, errno); + exit(1); + } + if ((error= pthread_mutex_init(&LOCK_thread_count, MY_MUTEX_INIT_FAST))) + { + fprintf(stderr, "LOCK_thread_count: %d from pthread_cond_init (errno: %d)\n", + error, errno); + exit(1); + } + + if ((error= pthread_attr_init(&thr_attr))) + { + fprintf(stderr,"Got error: %d from pthread_attr_init (errno: %d)\n", + error,errno); + exit(1); + } + if ((error= pthread_attr_setdetachstate(&thr_attr, PTHREAD_CREATE_DETACHED))) + { + fprintf(stderr, + "Got error: %d from pthread_attr_setdetachstate (errno: %d)\n", + error,errno); + exit(1); + } + +#ifndef pthread_attr_setstacksize /* void return value */ + if ((error= pthread_attr_setstacksize(&thr_attr, 65536L))) + { + fprintf(stderr,"Got error: %d from pthread_attr_setstacksize (errno: %d)\n", + error,errno); + exit(1); + } +#endif +#ifdef HAVE_THR_SETCONCURRENCY + VOID(thr_setconcurrency(2)); +#endif + + my_thread_global_init(); + + + if ((pagen= init_pagecache(&pagecache, PCACHE_SIZE, 0, 0, + PAGE_SIZE, 0)) == 0) + { + fprintf(stderr,"Got error: init_pagecache() (errno: %d)\n", + errno); + exit(1); + } + DBUG_PRINT("info", ("Page cache %d pages", pagen)); + { + unsigned char *buffr= malloc(PAGE_SIZE); + uint i; + memset(buffr, '\0', PAGE_SIZE); + for (i= 0; i < number_of_pages; i++) + { + pagecache_write(&pagecache, &file1, i, 3, (char*)buffr, + PAGECACHE_PLAIN_PAGE, + PAGECACHE_LOCK_LEFT_UNLOCKED, + PAGECACHE_PIN_LEFT_UNPINNED, + PAGECACHE_WRITE_DELAY, + 0); + } + flush_pagecache_blocks(&pagecache, &file1, FLUSH_FORCE_WRITE); + free(buffr); + } + if ((error= pthread_mutex_lock(&LOCK_thread_count))) + { + fprintf(stderr,"LOCK_thread_count: %d from pthread_mutex_lock (errno: %d)\n", + error,errno); + exit(1); + } + while (number_of_readers != 0 || number_of_writers != 0) + { + if (number_of_readers != 0) + { + param=(int*) malloc(sizeof(int)); + *param= number_of_readers; + if ((error= pthread_create(&tid, &thr_attr, test_thread_reader, + (void*) param))) + { + fprintf(stderr,"Got error: %d from pthread_create (errno: %d)\n", + error,errno); + exit(1); + } + thread_count++; + number_of_readers--; + } + if (number_of_writers != 0) + { + param=(int*) malloc(sizeof(int)); + *param= number_of_writers; + if ((error= pthread_create(&tid, &thr_attr, test_thread_writer, + (void*) param))) + { + fprintf(stderr,"Got error: %d from pthread_create (errno: %d)\n", + error,errno); + exit(1); + } + thread_count++; + number_of_writers--; + } + } + DBUG_PRINT("info", ("Thread started")); + pthread_mutex_unlock(&LOCK_thread_count); + + pthread_attr_destroy(&thr_attr); + + /* wait finishing */ + if ((error= pthread_mutex_lock(&LOCK_thread_count))) + fprintf(stderr,"LOCK_thread_count: %d from pthread_mutex_lock\n",error); + while (thread_count) + { + if ((error= pthread_cond_wait(&COND_thread_count,&LOCK_thread_count))) + fprintf(stderr,"COND_thread_count: %d from pthread_cond_wait\n",error); + } + if ((error= pthread_mutex_unlock(&LOCK_thread_count))) + fprintf(stderr,"LOCK_thread_count: %d from pthread_mutex_unlock\n",error); + DBUG_PRINT("info", ("thread ended")); + + end_pagecache(&pagecache, 1); + DBUG_PRINT("info", ("Page cache ended")); + + if (my_close(file1.file, MYF(0)) != 0) + { + fprintf(stderr, "Got error during file1 closing from close() (errno: %d)\n", + errno); + exit(1); + } + /*my_delete(file1_name, MYF(0));*/ + my_thread_global_end(); + + DBUG_PRINT("info", ("file1 (%d) closed", file1.file)); + + DBUG_PRINT("info", ("Program end")); + + DBUG_RETURN(0); +} diff --git a/mysys/test_pagecache_single.c b/mysys/test_pagecache_single.c new file mode 100644 index 00000000000..9df7844cfa5 --- /dev/null +++ b/mysys/test_pagecache_single.c @@ -0,0 +1,589 @@ +#include "mysys_priv.h" +#include "../include/my_pthread.h" +#include "../include/pagecache.h" +#include "my_dir.h" +#include +#include +#include +#include +#include +#include "../unittest/mytap/tap.h" +#include "test_file.h" + +/* #define PAGE_SIZE 1024 */ +#define PCACHE_SIZE (PAGE_SIZE*1024*10) + +#ifndef DBUG_OFF +static const char* default_dbug_option; +#endif + +static char *file1_name= (char*)"page_cache_test_file_1"; +static PAGECACHE_FILE file1; +static pthread_cond_t COND_thread_count; +static pthread_mutex_t LOCK_thread_count; +static uint thread_count; +static PAGECACHE pagecache; + +/* + File contance descriptors +*/ +static struct file_desc simple_read_write_test_file[]= +{ + {PAGE_SIZE, '\1'}, + { 0, 0} +}; +static struct file_desc simple_read_change_write_read_test_file[]= +{ + {PAGE_SIZE/2, '\65'}, + {PAGE_SIZE/2, '\1'}, + { 0, 0} +}; +static struct file_desc simple_pin_test_file1[]= +{ + {PAGE_SIZE*2, '\1'}, + { 0, 0} +}; +static struct file_desc simple_pin_test_file2[]= +{ + {PAGE_SIZE/2, '\1'}, + {PAGE_SIZE/2, (unsigned char)129}, + {PAGE_SIZE, '\1'}, + { 0, 0} +}; +static struct file_desc simple_delete_forget_test_file[]= +{ + {PAGE_SIZE, '\1'}, + { 0, 0} +}; +static struct file_desc simple_delete_flush_test_file[]= +{ + {PAGE_SIZE, '\2'}, + { 0, 0} +}; + + +/* + Recreate and reopen a file for test + + SYNOPSIS + reset_file() + file File to reset + file_name Path (and name) of file which should be reset +*/ + +void reset_file(PAGECACHE_FILE file, char *file_name) +{ + flush_pagecache_blocks(&pagecache, &file1, FLUSH_RELEASE); + if (my_close(file1.file, MYF(0)) != 0) + { + diag("Got error during %s closing from close() (errno: %d)\n", + file_name, errno); + exit(1); + } + my_delete(file_name, MYF(0)); + if ((file.file= my_open(file_name, + O_CREAT | O_TRUNC | O_RDWR, MYF(0))) == -1) + { + diag("Got error during %s creation from open() (errno: %d)\n", + file_name, errno); + exit(1); + } +} + +/* + Write then read page, check file on disk +*/ + +int simple_read_write_test() +{ + unsigned char *buffw= malloc(PAGE_SIZE); + unsigned char *buffr= malloc(PAGE_SIZE); + int res; + DBUG_ENTER("simple_read_write_test"); + memset(buffw, '\1', PAGE_SIZE); + pagecache_write(&pagecache, &file1, 0, 3, (char*)buffw, + PAGECACHE_PLAIN_PAGE, + PAGECACHE_LOCK_LEFT_UNLOCKED, + PAGECACHE_PIN_LEFT_UNPINNED, + PAGECACHE_WRITE_DELAY, + 0); + pagecache_read(&pagecache, &file1, 0, 3, (char*)buffr, + PAGECACHE_PLAIN_PAGE, + PAGECACHE_LOCK_LEFT_UNLOCKED, + 0); + ok((res= test(memcmp(buffr, buffw, PAGE_SIZE) == 0)), + "Simple write-read page "); + flush_pagecache_blocks(&pagecache, &file1, FLUSH_FORCE_WRITE); + ok((res&= test(test_file(file1, file1_name, PAGE_SIZE, PAGE_SIZE, + simple_read_write_test_file))), + "Simple write-read page file"); + if (res) + reset_file(file1, file1_name); + free(buffw); + free(buffr); + DBUG_RETURN(res); +} + + +/* + Prepare page, then read (and lock), change (write new value and unlock), + then check the page in the cache and on the disk +*/ +int simple_read_change_write_read_test() +{ + unsigned char *buffw= malloc(PAGE_SIZE); + unsigned char *buffr= malloc(PAGE_SIZE); + int res; + DBUG_ENTER("simple_read_change_write_read_test"); + /* prepare the file */ + memset(buffw, '\1', PAGE_SIZE); + pagecache_write(&pagecache, &file1, 0, 3, (char*)buffw, + PAGECACHE_PLAIN_PAGE, + PAGECACHE_LOCK_LEFT_UNLOCKED, + PAGECACHE_PIN_LEFT_UNPINNED, + PAGECACHE_WRITE_DELAY, + 0); + flush_pagecache_blocks(&pagecache, &file1, FLUSH_FORCE_WRITE); + /* test */ + pagecache_read(&pagecache, &file1, 0, 3, (char*)buffw, + PAGECACHE_PLAIN_PAGE, + PAGECACHE_LOCK_WRITE, + 0); + memset(buffw, '\65', PAGE_SIZE/2); + pagecache_write(&pagecache, &file1, 0, 3, (char*)buffw, + PAGECACHE_PLAIN_PAGE, + PAGECACHE_LOCK_WRITE_UNLOCK, + PAGECACHE_UNPIN, + PAGECACHE_WRITE_DELAY, + 0); + + pagecache_read(&pagecache, &file1, 0, 3, (char*)buffr, + PAGECACHE_PLAIN_PAGE, + PAGECACHE_LOCK_LEFT_UNLOCKED, + 0); + ok((res= test(memcmp(buffr, buffw, PAGE_SIZE) == 0)), + "Simple read-change-write-read page "); + flush_pagecache_blocks(&pagecache, &file1, FLUSH_FORCE_WRITE); + ok((res&= test(test_file(file1, file1_name, PAGE_SIZE, PAGE_SIZE, + simple_read_change_write_read_test_file))), + "Simple read-change-write-read page file"); + if (res) + reset_file(file1, file1_name); + free(buffw); + free(buffr); + DBUG_RETURN(res); +} + + +/* + Prepare page, read page 0 (and pin) then write page 1 and page 0. + Flush the file (shold flush only page 1 and return 1 (page 0 is + still pinned). + Check file on the disk. + Unpin and flush. + Check file on the disk. +*/ +int simple_pin_test() +{ + unsigned char *buffw= malloc(PAGE_SIZE); + unsigned char *buffr= malloc(PAGE_SIZE); + int res; + DBUG_ENTER("simple_pin_test"); + /* prepare the file */ + memset(buffw, '\1', PAGE_SIZE); + pagecache_write(&pagecache, &file1, 0, 3, (char*)buffw, + PAGECACHE_PLAIN_PAGE, + PAGECACHE_LOCK_LEFT_UNLOCKED, + PAGECACHE_PIN_LEFT_UNPINNED, + PAGECACHE_WRITE_DELAY, + 0); + /* test */ + if (flush_pagecache_blocks(&pagecache, &file1, FLUSH_FORCE_WRITE)) + { + diag("error in flush_pagecache_blocks\n"); + exit(1); + } + pagecache_read(&pagecache, &file1, 0, 3, (char*)buffw, + PAGECACHE_PLAIN_PAGE, + PAGECACHE_LOCK_WRITE, + 0); + pagecache_write(&pagecache, &file1, 1, 3, (char*)buffw, + PAGECACHE_PLAIN_PAGE, + PAGECACHE_LOCK_LEFT_UNLOCKED, + PAGECACHE_PIN_LEFT_UNPINNED, + PAGECACHE_WRITE_DELAY, + 0); + memset(buffw + PAGE_SIZE/2, ((unsigned char) 129), PAGE_SIZE/2); + pagecache_write(&pagecache, &file1, 0, 3, (char*)buffw, + PAGECACHE_PLAIN_PAGE, + PAGECACHE_LOCK_WRITE_TO_READ, + PAGECACHE_PIN_LEFT_PINNED, + PAGECACHE_WRITE_DELAY, + 0); + /* + We have to get error because one page of the file is pinned, + other page should be flushed + */ + if (!flush_pagecache_blocks(&pagecache, &file1, FLUSH_FORCE_WRITE)) + { + diag("Did not get error in flush_pagecache_blocks\n"); + res= 0; + goto err; + } + ok((res= test(test_file(file1, file1_name, PAGE_SIZE*2, PAGE_SIZE*2, + simple_pin_test_file1))), + "Simple pin page file with pin"); + pagecache_unlock_page(&pagecache, + &file1, + 0, + PAGECACHE_LOCK_READ_UNLOCK, + PAGECACHE_UNPIN, + 0, 0); + if (flush_pagecache_blocks(&pagecache, &file1, FLUSH_FORCE_WRITE)) + { + diag("Got error in flush_pagecache_blocks\n"); + res= 0; + goto err; + } + ok((res&= test(test_file(file1, file1_name, PAGE_SIZE*2, PAGE_SIZE, + simple_pin_test_file2))), + "Simple pin page result file"); + if (res) + reset_file(file1, file1_name); +err: + free(buffw); + free(buffr); + DBUG_RETURN(res); +} + +/* + Prepare page, write new value, then delete page from cache without flush, + on the disk should be page with old content written during preparation +*/ + +int simple_delete_forget_test() +{ + unsigned char *buffw= malloc(PAGE_SIZE); + unsigned char *buffr= malloc(PAGE_SIZE); + int res; + DBUG_ENTER("simple_delete_forget_test"); + /* prepare the file */ + memset(buffw, '\1', PAGE_SIZE); + pagecache_write(&pagecache, &file1, 0, 3, (char*)buffw, + PAGECACHE_PLAIN_PAGE, + PAGECACHE_LOCK_LEFT_UNLOCKED, + PAGECACHE_PIN_LEFT_UNPINNED, + PAGECACHE_WRITE_DELAY, + 0); + flush_pagecache_blocks(&pagecache, &file1, FLUSH_FORCE_WRITE); + /* test */ + memset(buffw, '\2', PAGE_SIZE); + pagecache_write(&pagecache, &file1, 0, 3, (char*)buffw, + PAGECACHE_PLAIN_PAGE, + PAGECACHE_LOCK_LEFT_UNLOCKED, + PAGECACHE_PIN_LEFT_UNPINNED, + PAGECACHE_WRITE_DELAY, + 0); + pagecache_delete_page(&pagecache, &file1, 0, + PAGECACHE_LOCK_WRITE, 0); + flush_pagecache_blocks(&pagecache, &file1, FLUSH_FORCE_WRITE); + ok((res= test(test_file(file1, file1_name, PAGE_SIZE, PAGE_SIZE, + simple_delete_forget_test_file))), + "Simple delete-forget page file"); + if (res) + reset_file(file1, file1_name); + free(buffw); + free(buffr); + DBUG_RETURN(res); +} + +/* + Prepare page with locking, write new content to the page, + delete page with flush and on existing lock, + check that page on disk contain new value. +*/ + +int simple_delete_flush_test() +{ + unsigned char *buffw= malloc(PAGE_SIZE); + unsigned char *buffr= malloc(PAGE_SIZE); + int res; + DBUG_ENTER("simple_delete_flush_test"); + /* prepare the file */ + memset(buffw, '\1', PAGE_SIZE); + pagecache_write(&pagecache, &file1, 0, 3, (char*)buffw, + PAGECACHE_PLAIN_PAGE, + PAGECACHE_LOCK_WRITE, + PAGECACHE_PIN, + PAGECACHE_WRITE_DELAY, + 0); + flush_pagecache_blocks(&pagecache, &file1, FLUSH_FORCE_WRITE); + /* test */ + memset(buffw, '\2', PAGE_SIZE); + pagecache_write(&pagecache, &file1, 0, 3, (char*)buffw, + PAGECACHE_PLAIN_PAGE, + PAGECACHE_LOCK_LEFT_WRITELOCKED, + PAGECACHE_PIN_LEFT_PINNED, + PAGECACHE_WRITE_DELAY, + 0); + pagecache_delete_page(&pagecache, &file1, 0, + PAGECACHE_LOCK_LEFT_WRITELOCKED, 1); + flush_pagecache_blocks(&pagecache, &file1, FLUSH_FORCE_WRITE); + ok((res= test(test_file(file1, file1_name, PAGE_SIZE, PAGE_SIZE, + simple_delete_flush_test_file))), + "Simple delete-forget page file"); + if (res) + reset_file(file1, file1_name); + free(buffw); + free(buffr); + DBUG_RETURN(res); +} + + +/* + write then read file bigger then cache +*/ + +int simple_big_test() +{ + unsigned char *buffw= (unsigned char *)malloc(PAGE_SIZE); + unsigned char *buffr= (unsigned char *)malloc(PAGE_SIZE); + struct file_desc *desc= + (struct file_desc *)malloc((PCACHE_SIZE/(PAGE_SIZE/2)) * + sizeof(struct file_desc)); + int res, i; + DBUG_ENTER("simple_big_test"); + /* prepare the file twice larger then cache */ + for (i= 0; i < PCACHE_SIZE/(PAGE_SIZE/2); i++) + { + memset(buffw, (unsigned char) (i & 0xff), PAGE_SIZE); + desc[i].length= PAGE_SIZE; + desc[i].content= (i & 0xff); + pagecache_write(&pagecache, &file1, i, 3, (char*)buffw, + PAGECACHE_PLAIN_PAGE, + PAGECACHE_LOCK_LEFT_UNLOCKED, + PAGECACHE_PIN_LEFT_UNPINNED, + PAGECACHE_WRITE_DELAY, + 0); + } + ok(1, "Simple big file write"); + /* check written pages sequentally read */ + for (i= 0; i < PCACHE_SIZE/(PAGE_SIZE/2); i++) + { + int j; + pagecache_read(&pagecache, &file1, i, 3, (char*)buffr, + PAGECACHE_PLAIN_PAGE, + PAGECACHE_LOCK_LEFT_UNLOCKED, + 0); + for(j= 0; j < PAGE_SIZE; j++) + { + if (buffr[j] != (i & 0xff)) + { + diag("simple_big_test seq: page %u byte %u mismatch\n", i, j); + return 0; + } + } + } + ok(1, "simple big file sequentally read"); + /* chack random reads */ + for (i= 0; i < PCACHE_SIZE/(PAGE_SIZE); i++) + { + int j, page; + page= rand() % (PCACHE_SIZE/(PAGE_SIZE/2)); + pagecache_read(&pagecache, &file1, page, 3, (char*)buffr, + PAGECACHE_PLAIN_PAGE, + PAGECACHE_LOCK_LEFT_UNLOCKED, + 0); + for(j= 0; j < PAGE_SIZE; j++) + { + if (buffr[j] != (page & 0xff)) + { + diag("simple_big_test rnd: page %u byte %u mismatch\n", page, j); + return 0; + } + } + } + ok(1, "simple big file random read"); + flush_pagecache_blocks(&pagecache, &file1, FLUSH_FORCE_WRITE); + + ok((res= test(test_file(file1, file1_name, PCACHE_SIZE*2, PAGE_SIZE, + desc))), + "Simple big file"); + if (res) + reset_file(file1, file1_name); + free(buffw); + free(buffr); + DBUG_RETURN(res); +} +/* + Thread function +*/ + +static void *test_thread(void *arg) +{ + int param=*((int*) arg); + + my_thread_init(); + DBUG_ENTER("test_thread"); + + DBUG_PRINT("enter", ("param: %d", param)); + + if (!simple_read_write_test() || + !simple_read_change_write_read_test() || + !simple_pin_test() || + !simple_delete_forget_test() || + !simple_delete_flush_test() || + !simple_big_test()) + exit(1); + + DBUG_PRINT("info", ("Thread %s ended\n", my_thread_name())); + pthread_mutex_lock(&LOCK_thread_count); + thread_count--; + VOID(pthread_cond_signal(&COND_thread_count)); /* Tell main we are ready */ + pthread_mutex_unlock(&LOCK_thread_count); + free((gptr) arg); + my_thread_end(); + DBUG_RETURN(0); +} + + +int main(int argc, char **argv __attribute__((unused))) +{ + pthread_t tid; + pthread_attr_t thr_attr; + int *param, error, pagen; + + MY_INIT(argv[0]); + +#ifndef DBUG_OFF +#if defined(__WIN__) + default_dbug_option= "d:t:i:O,\\test_pagecache_single.trace"; +#else + default_dbug_option= "d:t:i:o,/tmp/test_pagecache_single.trace"; +#endif + if (argc > 1) + { + DBUG_SET(default_dbug_option); + DBUG_SET_INITIAL(default_dbug_option); + } +#endif + + + DBUG_ENTER("main"); + DBUG_PRINT("info", ("Main thread: %s\n", my_thread_name())); + if ((file1.file= my_open(file1_name, + O_CREAT | O_TRUNC | O_RDWR, MYF(0))) == -1) + { + fprintf(stderr, "Got error during file1 creation from open() (errno: %d)\n", + errno); + exit(1); + } + DBUG_PRINT("info", ("file1: %d", file1.file)); + if (chmod(file1_name, S_IRWXU | S_IRWXG | S_IRWXO) != 0) + { + fprintf(stderr, "Got error during file1 chmod() (errno: %d)\n", + errno); + exit(1); + } + my_pwrite(file1.file, "test file", 9, 0, MYF(0)); + + if ((error= pthread_cond_init(&COND_thread_count, NULL))) + { + fprintf(stderr, "Got error: %d from pthread_cond_init (errno: %d)\n", + error, errno); + exit(1); + } + if ((error= pthread_mutex_init(&LOCK_thread_count, MY_MUTEX_INIT_FAST))) + { + fprintf(stderr, "Got error: %d from pthread_cond_init (errno: %d)\n", + error, errno); + exit(1); + } + + if ((error= pthread_attr_init(&thr_attr))) + { + fprintf(stderr,"Got error: %d from pthread_attr_init (errno: %d)\n", + error,errno); + exit(1); + } + if ((error= pthread_attr_setdetachstate(&thr_attr, PTHREAD_CREATE_DETACHED))) + { + fprintf(stderr, + "Got error: %d from pthread_attr_setdetachstate (errno: %d)\n", + error,errno); + exit(1); + } + +#ifndef pthread_attr_setstacksize /* void return value */ + if ((error= pthread_attr_setstacksize(&thr_attr, 65536L))) + { + fprintf(stderr,"Got error: %d from pthread_attr_setstacksize (errno: %d)\n", + error,errno); + exit(1); + } +#endif +#ifdef HAVE_THR_SETCONCURRENCY + VOID(thr_setconcurrency(2)); +#endif + + my_thread_global_init(); + + + if ((pagen= init_pagecache(&pagecache, PCACHE_SIZE, 0, 0, + PAGE_SIZE, 0)) == 0) + { + fprintf(stderr,"Got error: init_pagecache() (errno: %d)\n", + errno); + exit(1); + } + DBUG_PRINT("info", ("Page cache %d pages", pagen)); + + if ((error=pthread_mutex_lock(&LOCK_thread_count))) + { + fprintf(stderr,"Got error: %d from pthread_mutex_lock (errno: %d)\n", + error,errno); + exit(1); + } + param=(int*) malloc(sizeof(int)); + *param= 1; + if ((error= pthread_create(&tid, &thr_attr, test_thread, (void*) param))) + { + fprintf(stderr,"Got error: %d from pthread_create (errno: %d)\n", + error,errno); + exit(1); + } + thread_count++; + DBUG_PRINT("info", ("Thread started")); + pthread_mutex_unlock(&LOCK_thread_count); + + pthread_attr_destroy(&thr_attr); + + if ((error= pthread_mutex_lock(&LOCK_thread_count))) + fprintf(stderr,"Got error: %d from pthread_mutex_lock\n",error); + while (thread_count) + { + if ((error= pthread_cond_wait(&COND_thread_count,&LOCK_thread_count))) + fprintf(stderr,"Got error: %d from pthread_cond_wait\n",error); + } + if ((error= pthread_mutex_unlock(&LOCK_thread_count))) + fprintf(stderr,"Got error: %d from pthread_mutex_unlock\n",error); + DBUG_PRINT("info", ("thread ended")); + + end_pagecache(&pagecache, 1); + DBUG_PRINT("info", ("Page cache ended")); + + if (my_close(file1.file, MYF(0)) != 0) + { + fprintf(stderr, "Got error during file1 closing from close() (errno: %d)\n", + errno); + exit(1); + } + /*my_delete(file1_name, MYF(0));*/ + my_thread_global_end(); + + DBUG_PRINT("info", ("file1 (%d) closed", file1.file)); + + DBUG_PRINT("info", ("Program end")); + + DBUG_RETURN(0); +} -- cgit v1.2.1 From 99c431db92f8904bf50f6944e1488a0172c4ebd8 Mon Sep 17 00:00:00 2001 From: unknown Date: Thu, 10 Aug 2006 16:36:54 +0200 Subject: Completion of merge of mysql-5.1 into mysql-maria. Manually imported changes done to MyISAM (include/myisam.h, storage/myisam/*, sql/ha_myisam.*, mysql-test/t/myisam.test, mysql-test/t/ps_2myisam.test) the last months into Maria (tedious, should do it more frequently in the future), including those not done at the previous 5.1->Maria merge (please in the future don't forget to apply MyISAM changes to Maria when you merge 5.1 into Maria). Note: I didn't try to import anything which could be MyISAM-related in other tests of mysql-test (I didn't want to dig in all csets), but as QA is working to make most tests re-usable for other engines (Falcon), it is likely that we'll benefit from this and just have to set engine=Maria somewhere to run those tests on Maria. func_group and partition tests fail but they already do in main 5.1 on my machine. No Valgrind error in t/*maria*.test. Monty: please see the commit comment of maria.result and check. BitKeeper/deleted/.del-ha_maria.m4: Delete: config/ac-macros/ha_maria.m4 configure.in: fix for the new way of enabling engines include/maria.h: importing changes done to MyISAM the last months into Maria include/my_handler.h: importing changes done to MyISAM the last months into Maria include/myisam.h: importing changes done to MyISAM the last months into Maria mysql-test/r/maria.result: identical to myisam.result, except the engine name in some places AND in the line testing key_block_size=1000000000000000000: Maria gives a key block size of 8192 while MyISAM gives 4096; is it explainable by the difference between MARIA_KEY_BLOCK_LENGTH and the same constant in MyISAM? Monty? mysql-test/r/ps_maria.result: identical to ps_2myisam.result (except the engine name in some places) mysql-test/t/maria.test: instead of engine=maria everywhere, I use @@storage_engine (reduces the diff with myisam.test). importing changes done to MyISAM the last months into Maria mysys/my_handler.c: importing changes done to MyISAM the last months into Maria sql/ha_maria.cc: importing changes done to MyISAM the last months into Maria sql/ha_maria.h: importing changes done to MyISAM the last months into Maria sql/mysqld.cc: unneeded storage/maria/Makefile.am: importing changes done to MyISAM the last months into Maria storage/maria/ma_check.c: importing changes done to MyISAM the last months into Maria storage/maria/ma_create.c: importing changes done to MyISAM the last months into Maria storage/maria/ma_delete_table.c: importing changes done to MyISAM the last months into Maria storage/maria/ma_dynrec.c: importing changes done to MyISAM the last months into Maria storage/maria/ma_extra.c: importing changes done to MyISAM the last months into Maria storage/maria/ma_ft_boolean_search.c: importing changes done to MyISAM the last months into Maria storage/maria/ma_ft_eval.c: importing changes done to MyISAM the last months into Maria storage/maria/ma_ft_nlq_search.c: importing changes done to MyISAM the last months into Maria storage/maria/ma_ft_parser.c: importing changes done to MyISAM the last months into Maria storage/maria/ma_ft_test1.c: importing changes done to MyISAM the last months into Maria storage/maria/ma_ft_update.c: importing changes done to MyISAM the last months into Maria storage/maria/ma_ftdefs.h: importing changes done to MyISAM the last months into Maria storage/maria/ma_key.c: importing changes done to MyISAM the last months into Maria storage/maria/ma_open.c: importing changes done to MyISAM the last months into Maria storage/maria/ma_page.c: importing changes done to MyISAM the last months into Maria storage/maria/ma_rkey.c: importing changes done to MyISAM the last months into Maria storage/maria/ma_rsamepos.c: importing changes done to MyISAM the last months into Maria storage/maria/ma_rt_index.c: importing changes done to MyISAM the last months into Maria storage/maria/ma_rt_mbr.c: importing changes done to MyISAM the last months into Maria storage/maria/ma_search.c: importing changes done to MyISAM the last months into Maria storage/maria/ma_sort.c: importing changes done to MyISAM the last months into Maria storage/maria/ma_test1.c: importing changes done to MyISAM the last months into Maria storage/maria/ma_test2.c: importing changes done to MyISAM the last months into Maria storage/maria/ma_test3.c: importing changes done to MyISAM the last months into Maria storage/maria/ma_update.c: importing changes done to MyISAM the last months into Maria storage/maria/ma_write.c: importing changes done to MyISAM the last months into Maria storage/maria/maria_chk.c: importing changes done to MyISAM the last months into Maria storage/maria/maria_def.h: importing changes done to MyISAM the last months into Maria storage/maria/maria_ftdump.c: importing changes done to MyISAM the last months into Maria storage/maria/maria_pack.c: importing changes done to MyISAM the last months into Maria --- mysys/my_handler.c | 1 - 1 file changed, 1 deletion(-) (limited to 'mysys') diff --git a/mysys/my_handler.c b/mysys/my_handler.c index 256f67f6125..3230a669641 100644 --- a/mysys/my_handler.c +++ b/mysys/my_handler.c @@ -524,7 +524,6 @@ HA_KEYSEG *ha_find_null(HA_KEYSEG *keyseg, uchar *a) case HA_KEYTYPE_VARTEXT1: case HA_KEYTYPE_VARTEXT2: case HA_KEYTYPE_VARBINARY1: - case HA_KEYTYPE_VARTEXT2: case HA_KEYTYPE_VARBINARY2: { int a_length; -- cgit v1.2.1 From cd876fb11883f68f93027a70b5f3f99ad9234f27 Mon Sep 17 00:00:00 2001 From: unknown Date: Thu, 10 Aug 2006 19:19:47 +0200 Subject: amd64 atomic ops lock-free alloc (WL#3229), lock-free hash (WL#3230) bit functions made inline include/Makefile.am: lf.h added mysys/Makefile.am: lf_hash.c lf_dynarray.c lf_alloc-pin.c include/atomic/nolock.h: amd64 atomic ops include/atomic/rwlock.h: s/rw_lock/mutex/g include/atomic/x86-gcc.h: amd64 atomic ops try PAUSE include/my_global.h: STATIC_INLINE mysys/mf_keycache.c: make bit functions inline mysys/my_atomic.c: STATIC_INLINE mysys/my_bitmap.c: make bit functions inline sql/ha_myisam.cc: make bit functions inline sql/item_func.cc: make bit functions inline include/my_atomic.h: STATIC_INLINE mysys/my_bit.c: make bit functions inline sql/sql_select.cc: make bit functions inline storage/myisam/mi_create.c: make bit functions inline storage/myisam/mi_test2.c: make bit functions inline storage/myisam/myisamchk.c: make bit functions inline mysys/my_init.c: thread_size moved to mysys sql/mysql_priv.h: thread_size moved to mysys sql/set_var.cc: thread_size moved to mysys include/my_sys.h: thread_size moved to mysys sql/mysqld.cc: thread_size moved to mysys sql/sql_parse.cc: thread_size moved to mysys sql/sql_test.cc: thread_size moved to mysys include/lf.h: dylf_dynarray refactored to remove 65536 elements limit mysys/lf_alloc-pin.c: dylf_dynarray refactored to remove 65536 elements limit mysys/lf_dynarray.c: dylf_dynarray refactored to remove 65536 elements limit mysys/lf_hash.c: dylf_dynarray refactored to remove 65536 elements limit unittest/mysys/my_atomic-t.c: fix to commit (remove debug code) --- mysys/Makefile.am | 3 +- mysys/lf_alloc-pin.c | 319 ++++++++++++++++++++++++++++++++++++++++++++ mysys/lf_dynarray.c | 186 ++++++++++++++++++++++++++ mysys/lf_hash.c | 370 +++++++++++++++++++++++++++++++++++++++++++++++++++ mysys/mf_keycache.c | 1 + mysys/my_atomic.c | 7 +- mysys/my_bit.c | 100 +++++--------- mysys/my_bitmap.c | 1 + mysys/my_init.c | 1 + 9 files changed, 915 insertions(+), 73 deletions(-) create mode 100644 mysys/lf_alloc-pin.c create mode 100644 mysys/lf_dynarray.c create mode 100644 mysys/lf_hash.c (limited to 'mysys') diff --git a/mysys/Makefile.am b/mysys/Makefile.am index b209d64e78f..d870437573a 100644 --- a/mysys/Makefile.am +++ b/mysys/Makefile.am @@ -31,7 +31,8 @@ libmysys_a_SOURCES = my_init.c my_getwd.c mf_getdate.c my_mmap.c \ mf_tempdir.c my_lock.c mf_brkhant.c my_alarm.c \ my_malloc.c my_realloc.c my_once.c mulalloc.c \ my_alloc.c safemalloc.c my_new.cc \ - my_vle.c my_atomic.c \ + my_vle.c my_atomic.c lf_hash.c \ + lf_dynarray.c lf_alloc-pin.c \ my_fopen.c my_fstream.c my_getsystime.c \ my_error.c errors.c my_div.c my_messnc.c \ mf_format.c mf_same.c mf_dirname.c mf_fn_ext.c \ diff --git a/mysys/lf_alloc-pin.c b/mysys/lf_alloc-pin.c new file mode 100644 index 00000000000..a9ea1802c03 --- /dev/null +++ b/mysys/lf_alloc-pin.c @@ -0,0 +1,319 @@ +/* Copyright (C) 2000 MySQL AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +/* + concurrent allocator based on pinning addresses + + strictly speaking it's not lock-free, as it can be blocked + if a thread's purgatory is full and all addresses from there + are pinned. + + But until the above happens, it's wait-free. + + It can be made strictly wait-free by increasing purgatory size. + If it's larger than pins_in_stack*LF_PINBOX_PINS, then apocalyptical + condition above will never happen. But than the memory requirements + will be O(pins_in_stack^2). + + Note, that for large purgatory sizes it makes sense to remove + purgatory array, and link objects in a list using embedded pointer. + + TODO test with more than 256 threads + TODO test w/o alloca +*/ + +#include +#include +#include + +#define LF_PINBOX_MAX_PINS 65536 + +static void _lf_pinbox_real_free(LF_PINS *pins); + +void lf_pinbox_init(LF_PINBOX *pinbox, lf_pinbox_free_func *free_func, + void *free_func_arg) +{ + DBUG_ASSERT(sizeof(LF_PINS) == 128); + lf_dynarray_init(&pinbox->pinstack, sizeof(LF_PINS)); + pinbox->pinstack_top_ver=0; + pinbox->pins_in_stack=0; + pinbox->free_func=free_func; + pinbox->free_func_arg=free_func_arg; +} + +void lf_pinbox_end(LF_PINBOX *pinbox) +{ + lf_dynarray_end(&pinbox->pinstack); +} + +LF_PINS *_lf_pinbox_get_pins(LF_PINBOX *pinbox) +{ + uint32 pins, next, top_ver; + LF_PINS *el; + + top_ver=pinbox->pinstack_top_ver; + do + { + if (!(pins=top_ver % LF_PINBOX_MAX_PINS)) + { + pins=my_atomic_add32(&pinbox->pins_in_stack, 1)+1; + el=(LF_PINS *)_lf_dynarray_lvalue(&pinbox->pinstack, pins); + break; + } + el=(LF_PINS *)_lf_dynarray_value(&pinbox->pinstack, pins); + next=el->link; + } while (!my_atomic_cas32(&pinbox->pinstack_top_ver, &top_ver, + top_ver-pins+next+LF_PINBOX_MAX_PINS)); + el->link=pins; + el->purgatory_count=0; + el->pinbox=pinbox; + return el; +} + +void _lf_pinbox_put_pins(LF_PINS *pins) +{ + LF_PINBOX *pinbox=pins->pinbox; + uint32 top_ver, nr; + nr=pins->link; +#ifdef MY_LF_EXTRA_DEBUG + { + int i; + for (i=0; i < LF_PINBOX_PINS; i++) + assert(pins->pin[i] == 0); + } +#endif + while (pins->purgatory_count) + { + _lf_pinbox_real_free(pins); + if (pins->purgatory_count && my_getncpus() == 1) + { + my_atomic_rwlock_wrunlock(&pins->pinbox->pinstack.lock); + pthread_yield(); + my_atomic_rwlock_wrlock(&pins->pinbox->pinstack.lock); + } + } + top_ver=pinbox->pinstack_top_ver; + if (nr == pinbox->pins_in_stack) + { + int32 tmp=nr; + if (my_atomic_cas32(&pinbox->pins_in_stack, &tmp, tmp-1)) + goto ret; + } + + do + { + pins->link=top_ver % LF_PINBOX_MAX_PINS; + } while (!my_atomic_cas32(&pinbox->pinstack_top_ver, &top_ver, + top_ver-pins->link+nr+LF_PINBOX_MAX_PINS)); +ret: + return; +} + +static int ptr_cmp(void **a, void **b) +{ + return *a < *b ? -1 : *a == *b ? 0 : 1; +} + +void _lf_pinbox_free(LF_PINS *pins, void *addr) +{ + while (pins->purgatory_count == LF_PURGATORY_SIZE) + { + _lf_pinbox_real_free(pins); + if (pins->purgatory_count == LF_PURGATORY_SIZE && my_getncpus() == 1) + { + my_atomic_rwlock_wrunlock(&pins->pinbox->pinstack.lock); + pthread_yield(); + my_atomic_rwlock_wrlock(&pins->pinbox->pinstack.lock); + } + } + pins->purgatory[pins->purgatory_count++]=addr; +} + +struct st_harvester { + void **granary; + int npins; +}; + +static int harvest_pins(LF_PINS *el, struct st_harvester *hv) +{ + int i; + LF_PINS *el_end= el+min(hv->npins, LF_DYNARRAY_LEVEL_LENGTH); + for (; el < el_end; el++) + { + for (i= 0; i < LF_PINBOX_PINS; i++) + { + void *p= el->pin[i]; + if (p) + *hv->granary++= p; + } + } + hv->npins-= LF_DYNARRAY_LEVEL_LENGTH; + return 0; +} + +static int match_pins(LF_PINS *el, void *addr) +{ + int i; + LF_PINS *el_end= el+LF_DYNARRAY_LEVEL_LENGTH; + for (; el < el_end; el++) + for (i= 0; i < LF_PINBOX_PINS; i++) + if (el->pin[i] == addr) + return 1; + return 0; +} + +static void _lf_pinbox_real_free(LF_PINS *pins) +{ + int npins; + void **addr=0; + void **start, **cur, **end=pins->purgatory+pins->purgatory_count; + LF_PINBOX *pinbox=pins->pinbox; + + npins=pinbox->pins_in_stack+1; + +#ifdef HAVE_ALLOCA + /* create a sorted list of pinned addresses, to speed up searches */ + if (sizeof(void *)*LF_PINBOX_PINS*npins < my_thread_stack_size) + { + struct st_harvester hv; + addr= (void **) alloca(sizeof(void *)*LF_PINBOX_PINS*npins); + hv.granary=addr; + hv.npins=npins; + _lf_dynarray_iterate(&pinbox->pinstack, + (lf_dynarray_func)harvest_pins, &hv); + + npins=hv.granary-addr; + if (npins) + qsort(addr, npins, sizeof(void *), (qsort_cmp)ptr_cmp); + } +#endif + + start= cur= pins->purgatory; + end= start+pins->purgatory_count; + for (; cur < end; cur++) + { + if (npins) + { + if (addr) + { + void **a,**b,**c; + for (a=addr, b=addr+npins-1, c=a+(b-a)/2; b-a>1; c=a+(b-a)/2) + if (*cur == *c) + a=b=c; + else if (*cur > *c) + a=c; + else + b=c; + if (*cur == *a || *cur == *b) + goto found; + } + else + { + if (_lf_dynarray_iterate(&pinbox->pinstack, + (lf_dynarray_func)match_pins, *cur)) + goto found; + } + } + /* not pinned - freeing */ + pinbox->free_func(*cur, pinbox->free_func_arg); + continue; +found: + /* pinned - keeping */ + *start++=*cur; + } + pins->purgatory_count=start-pins->purgatory; +#ifdef MY_LF_EXTRA_DEBUG + while (start < pins->purgatory + LF_PURGATORY_SIZE) + *start++=0; +#endif +} + +static void alloc_free(void *node, LF_ALLOCATOR *allocator) +{ + void *tmp; + tmp=allocator->top; + do + { + (*(void **)node)=tmp; + } while (!my_atomic_casptr((void **)&allocator->top, (void **)&tmp, node) && + LF_BACKOFF); +} + +LF_REQUIRE_PINS(1); +void *_lf_alloc_new(LF_PINS *pins) +{ + LF_ALLOCATOR *allocator=(LF_ALLOCATOR *)(pins->pinbox->free_func_arg); + void *node; + for (;;) + { + do + { + node=allocator->top; + _lf_pin(pins, 0, node); + } while (node !=allocator->top && LF_BACKOFF); + if (!node) + { + if (!(node=my_malloc(allocator->element_size, MYF(MY_WME|MY_ZEROFILL)))) + goto ret; +#ifdef MY_LF_EXTRA_DEBUG + my_atomic_add32(&allocator->mallocs, 1); +#endif + goto ret; + } + if (my_atomic_casptr((void **)&allocator->top, + (void *)&node, *(void **)node)) + goto ret; + } +ret: + _lf_unpin(pins, 0); + return node; +} + +void lf_alloc_init(LF_ALLOCATOR *allocator, uint size) +{ + lf_pinbox_init(&allocator->pinbox, + (lf_pinbox_free_func *)alloc_free, allocator); + allocator->top=0; + allocator->mallocs=0; + allocator->element_size=size; + DBUG_ASSERT(size >= (int)sizeof(void *)); +} + +void lf_alloc_end(LF_ALLOCATOR *allocator) +{ + void *el=allocator->top; + while (el) + { + void *tmp=*(void **)el; + my_free(el, MYF(0)); + el=tmp; + } + lf_pinbox_end(&allocator->pinbox); + allocator->top=0; +} + +/* + NOTE + this is NOT thread-safe !!! +*/ +uint lf_alloc_in_pool(LF_ALLOCATOR *allocator) +{ + uint i; + void *node; + for (node=allocator->top, i=0; node; node=*(void **)node, i++) /* no op */; + return i; +} + diff --git a/mysys/lf_dynarray.c b/mysys/lf_dynarray.c new file mode 100644 index 00000000000..dcf99163ca1 --- /dev/null +++ b/mysys/lf_dynarray.c @@ -0,0 +1,186 @@ +/* Copyright (C) 2000 MySQL AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +/* + Analog of DYNAMIC_ARRAY that never reallocs + (so no pointer into the array may ever become invalid). + + Memory is allocated in non-contiguous chunks. + This data structure is not space efficient for sparce arrays. + + The number of elements is limited to 2^16 + + Every element is aligned to sizeof(element) boundary + (to avoid false sharing if element is big enough). + + Actually, it's wait-free, not lock-free ;-) +*/ + +#undef DBUG_OFF +#include +#include +#include +#include + +void lf_dynarray_init(LF_DYNARRAY *array, uint element_size) +{ + bzero(array, sizeof(*array)); + array->size_of_element=element_size; + my_atomic_rwlock_init(&array->lock); +} + +static void recursive_free(void **alloc, int level) +{ + if (!alloc) return; + + if (level) + { + int i; + for (i=0; i < LF_DYNARRAY_LEVEL_LENGTH; i++) + recursive_free(alloc[i], level-1); + my_free((void *)alloc, MYF(0)); + } + else + my_free(alloc[-1], MYF(0)); +} + +void lf_dynarray_end(LF_DYNARRAY *array) +{ + int i; + for (i=0; i < LF_DYNARRAY_LEVELS; i++) + recursive_free(array->level[i], i); + my_atomic_rwlock_destroy(&array->lock); + bzero(array, sizeof(*array)); +} + +static const int dynarray_idxes_in_level[LF_DYNARRAY_LEVELS]= +{ + 0, /* +1 here to to avoid -1's below */ + LF_DYNARRAY_LEVEL_LENGTH, + LF_DYNARRAY_LEVEL_LENGTH * LF_DYNARRAY_LEVEL_LENGTH, + LF_DYNARRAY_LEVEL_LENGTH * LF_DYNARRAY_LEVEL_LENGTH * + LF_DYNARRAY_LEVEL_LENGTH +}; + +void *_lf_dynarray_lvalue(LF_DYNARRAY *array, uint idx) +{ + void * ptr, * volatile * ptr_ptr=0; + int i; + + for (i=3; i > 0; i--) + { + if (ptr_ptr || idx >= dynarray_idxes_in_level[i]) + { + if (!ptr_ptr) + { + ptr_ptr=&array->level[i]; + idx-= dynarray_idxes_in_level[i]; + } + ptr=*ptr_ptr; + if (!ptr) + { + void *alloc=my_malloc(LF_DYNARRAY_LEVEL_LENGTH * sizeof(void *), + MYF(MY_WME|MY_ZEROFILL)); + if (!alloc) + return(NULL); + if (my_atomic_casptr(ptr_ptr, &ptr, alloc)) + ptr= alloc; + else + my_free(alloc, MYF(0)); + } + ptr_ptr=((void **)ptr) + idx / dynarray_idxes_in_level[i]; + idx%= dynarray_idxes_in_level[i]; + } + } + if (!ptr_ptr) + ptr_ptr=&array->level[0]; + ptr=*ptr_ptr; + if (!ptr) + { + void *alloc, *data; + alloc=my_malloc(LF_DYNARRAY_LEVEL_LENGTH * array->size_of_element + + max(array->size_of_element, sizeof(void *)), + MYF(MY_WME|MY_ZEROFILL)); + if (!alloc) + return(NULL); + /* reserve the space for free() address */ + data= alloc + sizeof(void *); + { /* alignment */ + intptr mod= ((intptr)data) % array->size_of_element; + if (mod) + data+= array->size_of_element - mod; + } + ((void **)data)[-1]=alloc; /* free() will need the original pointer */ + if (my_atomic_casptr(ptr_ptr, &ptr, data)) + ptr= data; + else + my_free(alloc, MYF(0)); + } + return ptr + array->size_of_element * idx; +} + +void *_lf_dynarray_value(LF_DYNARRAY *array, uint idx) +{ + void * ptr, * volatile * ptr_ptr=0; + int i; + + for (i=3; i > 0; i--) + { + if (ptr_ptr || idx >= dynarray_idxes_in_level[i]) + { + if (!ptr_ptr) + { + ptr_ptr=&array->level[i]; + idx-= dynarray_idxes_in_level[i]; + } + ptr=*ptr_ptr; + if (!ptr) + return(NULL); + ptr_ptr=((void **)ptr) + idx / dynarray_idxes_in_level[i]; + idx %= dynarray_idxes_in_level[i]; + } + } + if (!ptr_ptr) + ptr_ptr=&array->level[0]; + ptr=*ptr_ptr; + if (!ptr) + return(NULL); + return ptr + array->size_of_element * idx; +} + +static int recursive_iterate(LF_DYNARRAY *array, void *ptr, int level, + lf_dynarray_func func, void *arg) +{ + int res, i; + if (!ptr) + return 0; + if (!level) + return func(ptr, arg); + for (i=0; i < LF_DYNARRAY_LEVEL_LENGTH; i++) + if ((res=recursive_iterate(array, ((void **)ptr)[i], level-1, func, arg))) + return res; + return 0; +} + +int _lf_dynarray_iterate(LF_DYNARRAY *array, lf_dynarray_func func, void *arg) +{ + int i, res; + for (i=0; i < LF_DYNARRAY_LEVELS; i++) + if ((res=recursive_iterate(array, array->level[i], i, func, arg))) + return res; + return 0; +} + diff --git a/mysys/lf_hash.c b/mysys/lf_hash.c new file mode 100644 index 00000000000..6d3d30ebc3f --- /dev/null +++ b/mysys/lf_hash.c @@ -0,0 +1,370 @@ +/* Copyright (C) 2000 MySQL AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +/* + extensible hash + + TODO + dummy nodes use element_size=0 + try to get rid of dummy nodes ? +*/ +#include +#include +#include +#include + +LF_REQUIRE_PINS(3); + +typedef struct { + intptr volatile link; + uint32 hashnr; + const uchar *key; + uint keylen; +} LF_SLIST; + +typedef struct { + intptr volatile *prev; + LF_SLIST *curr, *next; +} CURSOR; + +#define PTR(V) (LF_SLIST *)((V) & (~(intptr)1)) +#define DELETED(V) ((V) & 1) + +/* + RETURN + 0 - not found + 1 - found + + NOTE + cursor is positioned in either case + pins[0..2] are used, they are NOT removed on return +*/ +static int lfind(intptr volatile *head, uint32 hashnr, + const uchar *key, uint keylen, CURSOR *cursor, LF_PINS *pins) +{ + uint32 cur_hashnr; + const uchar *cur_key; + uint cur_keylen; + intptr link; + +retry: + cursor->prev=head; + do { + cursor->curr=PTR(*cursor->prev); + _lf_pin(pins,1,cursor->curr); + } while(*cursor->prev != (intptr)cursor->curr && LF_BACKOFF); + for (;;) + { + if (!cursor->curr) + return 0; + do { // XXX or goto retry ? + link=cursor->curr->link; + cursor->next=PTR(link); + _lf_pin(pins, 0, cursor->next); + } while(link != cursor->curr->link && LF_BACKOFF); + cur_hashnr=cursor->curr->hashnr; + cur_key=cursor->curr->key; + cur_keylen=cursor->curr->keylen; + if (*cursor->prev != (intptr)cursor->curr) + { + LF_BACKOFF; + goto retry; + } + if (!DELETED(link)) + { + if (cur_hashnr >= hashnr) + { + int r=1; + if (cur_hashnr > hashnr || (r=memcmp(cur_key, key, keylen)) >= 0) + return !r; + } + cursor->prev=&(cursor->curr->link); + _lf_pin(pins, 2, cursor->curr); + } + else + { + if (my_atomic_casptr((void **)cursor->prev, + (void **)&cursor->curr, cursor->next)) + _lf_alloc_free(pins, cursor->curr); + else + { + LF_BACKOFF; + goto retry; + } + } + cursor->curr=cursor->next; + _lf_pin(pins, 1, cursor->curr); + } +} + +/* + RETURN + 0 - inserted + not 0 - a pointer to a conflict + + NOTE + it uses pins[0..2], on return all pins are removed. +*/ +static LF_SLIST *linsert(LF_SLIST * volatile *head, LF_SLIST *node, + LF_PINS *pins, uint flags) +{ + CURSOR cursor; + int res=-1; + + do + { + if (lfind((intptr*)head, node->hashnr, node->key, node->keylen, + &cursor, pins) && + (flags & LF_HASH_UNIQUE)) + res=0; + else + { + node->link=(intptr)cursor.curr; + assert(node->link != (intptr)node); + assert(cursor.prev != &node->link); + if (my_atomic_casptr((void **)cursor.prev, (void **)&cursor.curr, node)) + res=1; + } + } while (res == -1); + _lf_unpin(pins, 0); + _lf_unpin(pins, 1); + _lf_unpin(pins, 2); + return res ? 0 : cursor.curr; +} + +/* + RETURN + 0 - ok + 1 - not found + NOTE + it uses pins[0..2], on return all pins are removed. +*/ +static int ldelete(LF_SLIST * volatile *head, uint32 hashnr, + const uchar *key, uint keylen, LF_PINS *pins) +{ + CURSOR cursor; + int res=-1; + + do + { + if (!lfind((intptr *)head, hashnr, key, keylen, &cursor, pins)) + res= 1; + else + if (my_atomic_casptr((void **)&(cursor.curr->link), + (void **)&cursor.next, 1+(char *)cursor.next)) + { + if (my_atomic_casptr((void **)cursor.prev, + (void **)&cursor.curr, cursor.next)) + _lf_alloc_free(pins, cursor.curr); + else + lfind((intptr *)head, hashnr, key, keylen, &cursor, pins); + res= 0; + } + } while (res == -1); + _lf_unpin(pins, 0); + _lf_unpin(pins, 1); + _lf_unpin(pins, 2); + return res; +} + +/* + RETURN + 0 - not found + node - found + NOTE + it uses pins[0..2], on return the pin[2] keeps the node found + all other pins are removed. +*/ +static LF_SLIST *lsearch(LF_SLIST * volatile *head, uint32 hashnr, + const uchar *key, uint keylen, LF_PINS *pins) +{ + CURSOR cursor; + int res=lfind((intptr *)head, hashnr, key, keylen, &cursor, pins); + if (res) _lf_pin(pins, 2, cursor.curr); + _lf_unpin(pins, 0); + _lf_unpin(pins, 1); + return res ? cursor.curr : 0; +} + +static inline const uchar* hash_key(const LF_HASH *hash, + const uchar *record, uint *length) +{ + if (hash->get_key) + return (*hash->get_key)(record,length,0); + *length=hash->key_length; + return record + hash->key_offset; +} + +static inline uint calc_hash(LF_HASH *hash, const uchar *key, uint keylen) +{ + ulong nr1=1, nr2=4; + hash->charset->coll->hash_sort(hash->charset,key,keylen,&nr1,&nr2); + return nr1 & INT_MAX32; +} + +#define MAX_LOAD 1 +static void initialize_bucket(LF_HASH *, LF_SLIST * volatile*, uint, LF_PINS *); + +void lf_hash_init(LF_HASH *hash, uint element_size, uint flags, + uint key_offset, uint key_length, hash_get_key get_key, + CHARSET_INFO *charset) +{ + lf_alloc_init(&hash->alloc,sizeof(LF_SLIST)+element_size); + lf_dynarray_init(&hash->array, sizeof(LF_SLIST **)); + hash->size=1; + hash->count=0; + hash->element_size=element_size; + hash->flags=flags; + hash->charset=charset; + hash->key_offset=key_offset; + hash->key_length=key_length; + hash->get_key=get_key; + DBUG_ASSERT(get_key ? !key_offset && !key_length : key_length); +} + +void lf_hash_end(LF_HASH *hash) +{ + LF_SLIST *el=*(LF_SLIST **)_lf_dynarray_lvalue(&hash->array, 0); + while (el) + { + intptr next=el->link; + lf_alloc_real_free(&hash->alloc, el); + el=(LF_SLIST *)next; + } + lf_alloc_end(&hash->alloc); + lf_dynarray_end(&hash->array); +} + +/* + NOTE + see linsert() for pin usage +*/ +int lf_hash_insert(LF_HASH *hash, LF_PINS *pins, const void *data) +{ + uint csize, bucket, hashnr, keylen; + LF_SLIST *node, * volatile *el; + const uchar *key; + + key= hash_key(hash, data, &keylen); + hashnr= calc_hash(hash, key, keylen); + bucket= hashnr % hash->size; + lf_lock_by_pins(pins); + node=(LF_SLIST *)_lf_alloc_new(pins); + memcpy(node+1, data, hash->element_size); + el=_lf_dynarray_lvalue(&hash->array, bucket); + if (*el == NULL) + initialize_bucket(hash, el, bucket, pins); + node->hashnr=my_reverse_bits(hashnr) | 1; + node->key=((char *)(node+1))+(key-(uchar *)data); + node->keylen=keylen; + if (linsert(el, node, pins, hash->flags)) + { + _lf_alloc_free(pins, node); + lf_unlock_by_pins(pins); + return 0; + } + csize= hash->size; + if ((my_atomic_add32(&hash->count, 1)+1.0) / csize > MAX_LOAD) + my_atomic_cas32(&hash->size, &csize, csize*2); +#if 0 + node=*(LF_SLIST **)_lf_dynarray_lvalue(&hash->array, 0); + hashnr=0; + while (node) + { + assert (node->hashnr >= hashnr); + hashnr=node->hashnr; + node=(LF_SLIST *)node->link; + } +#endif + lf_unlock_by_pins(pins); + return 1; +} + +/* + NOTE + see ldelete() for pin usage +*/ +int lf_hash_delete(LF_HASH *hash, LF_PINS *pins, const uchar *key, uint keylen) +{ + LF_SLIST * volatile *el; + uint bucket, hashnr=calc_hash(hash, key, keylen); + + bucket= hashnr % hash->size; + lf_lock_by_pins(pins); + el=_lf_dynarray_lvalue(&hash->array, bucket); + if (*el == NULL) + initialize_bucket(hash, el, bucket, pins); + if (ldelete(el, my_reverse_bits(hashnr) | 1, key, keylen, pins)) + { + lf_unlock_by_pins(pins); + return 0; + } + my_atomic_add32(&hash->count, -1); +#if 0 + { + LF_SLIST *node=*(LF_SLIST **)_lf_dynarray_lvalue(&hash->array, 0); + hashnr=0; + while (node) + { + assert (node->hashnr >= hashnr); + hashnr=node->hashnr; + node=(LF_SLIST *)node->link; + } + } +#endif + lf_unlock_by_pins(pins); + return 1; +} + +/* + NOTE + see lsearch() for pin usage +*/ +int lf_hash_search(LF_HASH *hash, LF_PINS *pins, const uchar *key, uint keylen) +{ + int res; + LF_SLIST * volatile *el; + uint bucket, hashnr=calc_hash(hash, key, keylen); + + bucket= hashnr % hash->size; + lf_lock_by_pins(pins); + el=_lf_dynarray_lvalue(&hash->array, bucket); + if (*el == NULL) + initialize_bucket(hash, el, bucket, pins); + res=NULL != lsearch(el, my_reverse_bits(hashnr) | 1, key, keylen, pins); + lf_unlock_by_pins(pins); + return res; +} + +static void initialize_bucket(LF_HASH *hash, LF_SLIST * volatile *node, + uint bucket, LF_PINS *pins) +{ + uint parent= my_clear_highest_bit(bucket); + LF_SLIST *dummy=_lf_alloc_new(pins), **tmp=0, *cur; + LF_SLIST * volatile *el=_lf_dynarray_lvalue(&hash->array, parent); + if (*el == NULL && bucket) + initialize_bucket(hash, el, parent, pins); + dummy->hashnr=my_reverse_bits(bucket); + LINT_INIT(dummy->key); + LINT_INIT(dummy->keylen); + if ((cur= linsert(el, dummy, pins, 0))) + { + _lf_alloc_free(pins, dummy); + dummy= cur; + } + my_atomic_casptr((void **)node, (void **)&tmp, dummy); +} + diff --git a/mysys/mf_keycache.c b/mysys/mf_keycache.c index e6f4348968f..9a99a278bc5 100644 --- a/mysys/mf_keycache.c +++ b/mysys/mf_keycache.c @@ -43,6 +43,7 @@ #include #include "my_static.h" #include +#include #include #include diff --git a/mysys/my_atomic.c b/mysys/my_atomic.c index 6f56d76e6b8..2908a44961a 100644 --- a/mysys/my_atomic.c +++ b/mysys/my_atomic.c @@ -18,11 +18,10 @@ #include #ifndef HAVE_INLINE -/* - the following will cause all inline functions to be instantiated -*/ +/* the following will cause all inline functions to be instantiated */ #define HAVE_INLINE -#define static extern +#undef STATIC_INLINE +#define STATIC_INLINE extern #endif #include diff --git a/mysys/my_bit.c b/mysys/my_bit.c index 6ef0e171695..11d98f5f6ae 100644 --- a/mysys/my_bit.c +++ b/mysys/my_bit.c @@ -14,23 +14,18 @@ along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ -/* Some useful bit functions */ +#include -#include "mysys_priv.h" - -/* - Find smallest X in 2^X >= value - This can be used to divide a number with value by doing a shift instead -*/ +#ifndef HAVE_INLINE +/* the following will cause all inline functions to be instantiated */ +#define HAVE_INLINE +#undef STATIC_INLINE +#define STATIC_INLINE extern +#endif -uint my_bit_log2(ulong value) -{ - uint bit; - for (bit=0 ; value > 1 ; value>>=1, bit++) ; - return bit; -} +#include -static char nbits[256] = { +const char _my_bits_nbits[256] = { 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, @@ -49,60 +44,29 @@ static char nbits[256] = { 4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8, }; -uint my_count_bits(ulonglong v) -{ -#if SIZEOF_LONG_LONG > 4 - /* The following code is a bit faster on 16 bit machines than if we would - only shift v */ - ulong v2=(ulong) (v >> 32); - return (uint) (uchar) (nbits[(uchar) v] + - nbits[(uchar) (v >> 8)] + - nbits[(uchar) (v >> 16)] + - nbits[(uchar) (v >> 24)] + - nbits[(uchar) (v2)] + - nbits[(uchar) (v2 >> 8)] + - nbits[(uchar) (v2 >> 16)] + - nbits[(uchar) (v2 >> 24)]); -#else - return (uint) (uchar) (nbits[(uchar) v] + - nbits[(uchar) (v >> 8)] + - nbits[(uchar) (v >> 16)] + - nbits[(uchar) (v >> 24)]); -#endif -} - -uint my_count_bits_ushort(ushort v) -{ - return nbits[v]; -} - - /* - Next highest power of two - - SYNOPSIS - my_round_up_to_next_power() - v Value to check - - RETURN - Next or equal power of 2 - Note: 0 will return 0 - - NOTES - Algorithm by Sean Anderson, according to: - http://graphics.stanford.edu/~seander/bithacks.html - (Orignal code public domain) - - Comments shows how this works with 01100000000000000000000000001011 + perl -e 'print map{", 0x".unpack H2,pack B8,unpack b8,chr$_}(0..255)' */ +const uchar _my_bits_reverse_table[256]={ +0x00, 0x80, 0x40, 0xC0, 0x20, 0xA0, 0x60, 0xE0, 0x10, 0x90, 0x50, 0xD0, 0x30, +0xB0, 0x70, 0xF0, 0x08, 0x88, 0x48, 0xC8, 0x28, 0xA8, 0x68, 0xE8, 0x18, 0x98, +0x58, 0xD8, 0x38, 0xB8, 0x78, 0xF8, 0x04, 0x84, 0x44, 0xC4, 0x24, 0xA4, 0x64, +0xE4, 0x14, 0x94, 0x54, 0xD4, 0x34, 0xB4, 0x74, 0xF4, 0x0C, 0x8C, 0x4C, 0xCC, +0x2C, 0xAC, 0x6C, 0xEC, 0x1C, 0x9C, 0x5C, 0xDC, 0x3C, 0xBC, 0x7C, 0xFC, 0x02, +0x82, 0x42, 0xC2, 0x22, 0xA2, 0x62, 0xE2, 0x12, 0x92, 0x52, 0xD2, 0x32, 0xB2, +0x72, 0xF2, 0x0A, 0x8A, 0x4A, 0xCA, 0x2A, 0xAA, 0x6A, 0xEA, 0x1A, 0x9A, 0x5A, +0xDA, 0x3A, 0xBA, 0x7A, 0xFA, 0x06, 0x86, 0x46, 0xC6, 0x26, 0xA6, 0x66, 0xE6, +0x16, 0x96, 0x56, 0xD6, 0x36, 0xB6, 0x76, 0xF6, 0x0E, 0x8E, 0x4E, 0xCE, 0x2E, +0xAE, 0x6E, 0xEE, 0x1E, 0x9E, 0x5E, 0xDE, 0x3E, 0xBE, 0x7E, 0xFE, 0x01, 0x81, +0x41, 0xC1, 0x21, 0xA1, 0x61, 0xE1, 0x11, 0x91, 0x51, 0xD1, 0x31, 0xB1, 0x71, +0xF1, 0x09, 0x89, 0x49, 0xC9, 0x29, 0xA9, 0x69, 0xE9, 0x19, 0x99, 0x59, 0xD9, +0x39, 0xB9, 0x79, 0xF9, 0x05, 0x85, 0x45, 0xC5, 0x25, 0xA5, 0x65, 0xE5, 0x15, +0x95, 0x55, 0xD5, 0x35, 0xB5, 0x75, 0xF5, 0x0D, 0x8D, 0x4D, 0xCD, 0x2D, 0xAD, +0x6D, 0xED, 0x1D, 0x9D, 0x5D, 0xDD, 0x3D, 0xBD, 0x7D, 0xFD, 0x03, 0x83, 0x43, +0xC3, 0x23, 0xA3, 0x63, 0xE3, 0x13, 0x93, 0x53, 0xD3, 0x33, 0xB3, 0x73, 0xF3, +0x0B, 0x8B, 0x4B, 0xCB, 0x2B, 0xAB, 0x6B, 0xEB, 0x1B, 0x9B, 0x5B, 0xDB, 0x3B, +0xBB, 0x7B, 0xFB, 0x07, 0x87, 0x47, 0xC7, 0x27, 0xA7, 0x67, 0xE7, 0x17, 0x97, +0x57, 0xD7, 0x37, 0xB7, 0x77, 0xF7, 0x0F, 0x8F, 0x4F, 0xCF, 0x2F, 0xAF, 0x6F, +0xEF, 0x1F, 0x9F, 0x5F, 0xDF, 0x3F, 0xBF, 0x7F, 0xFF +}; -uint32 my_round_up_to_next_power(uint32 v) -{ - v--; /* 01100000000000000000000000001010 */ - v|= v >> 1; /* 01110000000000000000000000001111 */ - v|= v >> 2; /* 01111100000000000000000000001111 */ - v|= v >> 4; /* 01111111110000000000000000001111 */ - v|= v >> 8; /* 01111111111111111100000000001111 */ - v|= v >> 16; /* 01111111111111111111111111111111 */ - return v+1; /* 10000000000000000000000000000000 */ -} diff --git a/mysys/my_bitmap.c b/mysys/my_bitmap.c index 2c85ce0bf04..8cf8ac0e97c 100644 --- a/mysys/my_bitmap.c +++ b/mysys/my_bitmap.c @@ -39,6 +39,7 @@ #include "mysys_priv.h" #include #include +#include void create_last_word_mask(MY_BITMAP *map) { diff --git a/mysys/my_init.c b/mysys/my_init.c index dca68637161..a1dca635054 100644 --- a/mysys/my_init.c +++ b/mysys/my_init.c @@ -44,6 +44,7 @@ static void netware_init(); my_bool my_init_done= 0; uint mysys_usage_id= 0; /* Incremented for each my_init() */ +ulong my_thread_stack_size= 65536; static ulong atoi_octal(const char *str) { -- cgit v1.2.1 From 74d050d000ff9db79e36931988386fe7988f8dd2 Mon Sep 17 00:00:00 2001 From: unknown Date: Thu, 17 Aug 2006 15:20:58 +0200 Subject: maria transaction manager with unit tests include/lf.h: few lf API changes mysys/lf_alloc-pin.c: few lf API changes mysys/lf_dynarray.c: few lf API changes mysys/lf_hash.c: few lf API changes storage/maria/Makefile.am: transaction manager unittest/Makefile.am: maria transaction manager unittest/mysys/my_atomic-t.c: ensure that values are positive storage/maria/trxman.h: New BitKeeper file ``storage/maria/trxman.h'' unittest/maria/Makefile.am: New BitKeeper file ``unittest/maria/Makefile.am'' unittest/maria/trxman-t.c: New BitKeeper file ``unittest/maria/trxman-t.c'' storage/maria/trxman.c: comment clarified --- mysys/lf_alloc-pin.c | 8 +++--- mysys/lf_dynarray.c | 2 +- mysys/lf_hash.c | 78 +++++++++++++++++++--------------------------------- 3 files changed, 34 insertions(+), 54 deletions(-) (limited to 'mysys') diff --git a/mysys/lf_alloc-pin.c b/mysys/lf_alloc-pin.c index a9ea1802c03..cf1612b73d1 100644 --- a/mysys/lf_alloc-pin.c +++ b/mysys/lf_alloc-pin.c @@ -54,9 +54,9 @@ void lf_pinbox_init(LF_PINBOX *pinbox, lf_pinbox_free_func *free_func, pinbox->free_func_arg=free_func_arg; } -void lf_pinbox_end(LF_PINBOX *pinbox) +void lf_pinbox_destroy(LF_PINBOX *pinbox) { - lf_dynarray_end(&pinbox->pinstack); + lf_dynarray_destroy(&pinbox->pinstack); } LF_PINS *_lf_pinbox_get_pins(LF_PINBOX *pinbox) @@ -292,7 +292,7 @@ void lf_alloc_init(LF_ALLOCATOR *allocator, uint size) DBUG_ASSERT(size >= (int)sizeof(void *)); } -void lf_alloc_end(LF_ALLOCATOR *allocator) +void lf_alloc_destroy(LF_ALLOCATOR *allocator) { void *el=allocator->top; while (el) @@ -301,7 +301,7 @@ void lf_alloc_end(LF_ALLOCATOR *allocator) my_free(el, MYF(0)); el=tmp; } - lf_pinbox_end(&allocator->pinbox); + lf_pinbox_destroy(&allocator->pinbox); allocator->top=0; } diff --git a/mysys/lf_dynarray.c b/mysys/lf_dynarray.c index dcf99163ca1..0fa04ab095c 100644 --- a/mysys/lf_dynarray.c +++ b/mysys/lf_dynarray.c @@ -57,7 +57,7 @@ static void recursive_free(void **alloc, int level) my_free(alloc[-1], MYF(0)); } -void lf_dynarray_end(LF_DYNARRAY *array) +void lf_dynarray_destroy(LF_DYNARRAY *array) { int i; for (i=0; i < LF_DYNARRAY_LEVELS; i++) diff --git a/mysys/lf_hash.c b/mysys/lf_hash.c index 6d3d30ebc3f..57936ea7b1f 100644 --- a/mysys/lf_hash.c +++ b/mysys/lf_hash.c @@ -228,14 +228,14 @@ void lf_hash_init(LF_HASH *hash, uint element_size, uint flags, hash->count=0; hash->element_size=element_size; hash->flags=flags; - hash->charset=charset; + hash->charset=charset ? charset : &my_charset_bin; hash->key_offset=key_offset; hash->key_length=key_length; hash->get_key=get_key; DBUG_ASSERT(get_key ? !key_offset && !key_length : key_length); } -void lf_hash_end(LF_HASH *hash) +void lf_hash_destroy(LF_HASH *hash) { LF_SLIST *el=*(LF_SLIST **)_lf_dynarray_lvalue(&hash->array, 0); while (el) @@ -244,109 +244,89 @@ void lf_hash_end(LF_HASH *hash) lf_alloc_real_free(&hash->alloc, el); el=(LF_SLIST *)next; } - lf_alloc_end(&hash->alloc); - lf_dynarray_end(&hash->array); + lf_alloc_destroy(&hash->alloc); + lf_dynarray_destroy(&hash->array); } /* + RETURN + 0 - inserted + 1 - didn't (unique key conflict) NOTE - see linsert() for pin usage + see linsert() for pin usage notes */ int lf_hash_insert(LF_HASH *hash, LF_PINS *pins, const void *data) { - uint csize, bucket, hashnr, keylen; + uint csize, bucket, hashnr; LF_SLIST *node, * volatile *el; - const uchar *key; - key= hash_key(hash, data, &keylen); - hashnr= calc_hash(hash, key, keylen); - bucket= hashnr % hash->size; lf_lock_by_pins(pins); node=(LF_SLIST *)_lf_alloc_new(pins); memcpy(node+1, data, hash->element_size); + node->key= hash_key(hash, node+1, &node->keylen); + hashnr= calc_hash(hash, node->key, node->keylen); + bucket= hashnr % hash->size; el=_lf_dynarray_lvalue(&hash->array, bucket); if (*el == NULL) initialize_bucket(hash, el, bucket, pins); node->hashnr=my_reverse_bits(hashnr) | 1; - node->key=((char *)(node+1))+(key-(uchar *)data); - node->keylen=keylen; if (linsert(el, node, pins, hash->flags)) { _lf_alloc_free(pins, node); lf_unlock_by_pins(pins); - return 0; + return 1; } csize= hash->size; if ((my_atomic_add32(&hash->count, 1)+1.0) / csize > MAX_LOAD) my_atomic_cas32(&hash->size, &csize, csize*2); -#if 0 - node=*(LF_SLIST **)_lf_dynarray_lvalue(&hash->array, 0); - hashnr=0; - while (node) - { - assert (node->hashnr >= hashnr); - hashnr=node->hashnr; - node=(LF_SLIST *)node->link; - } -#endif lf_unlock_by_pins(pins); - return 1; + return 0; } /* + RETURN + 0 - deleted + 1 - didn't (not found) NOTE - see ldelete() for pin usage + see ldelete() for pin usage notes */ -int lf_hash_delete(LF_HASH *hash, LF_PINS *pins, const uchar *key, uint keylen) +int lf_hash_delete(LF_HASH *hash, LF_PINS *pins, const void *key, uint keylen) { LF_SLIST * volatile *el; - uint bucket, hashnr=calc_hash(hash, key, keylen); + uint bucket, hashnr=calc_hash(hash, (uchar *)key, keylen); bucket= hashnr % hash->size; lf_lock_by_pins(pins); el=_lf_dynarray_lvalue(&hash->array, bucket); if (*el == NULL) initialize_bucket(hash, el, bucket, pins); - if (ldelete(el, my_reverse_bits(hashnr) | 1, key, keylen, pins)) + if (ldelete(el, my_reverse_bits(hashnr) | 1, (uchar *)key, keylen, pins)) { lf_unlock_by_pins(pins); - return 0; + return 1; } my_atomic_add32(&hash->count, -1); -#if 0 - { - LF_SLIST *node=*(LF_SLIST **)_lf_dynarray_lvalue(&hash->array, 0); - hashnr=0; - while (node) - { - assert (node->hashnr >= hashnr); - hashnr=node->hashnr; - node=(LF_SLIST *)node->link; - } - } -#endif lf_unlock_by_pins(pins); - return 1; + return 0; } /* NOTE - see lsearch() for pin usage + see lsearch() for pin usage notes */ -int lf_hash_search(LF_HASH *hash, LF_PINS *pins, const uchar *key, uint keylen) +void *lf_hash_search(LF_HASH *hash, LF_PINS *pins, const void *key, uint keylen) { - int res; - LF_SLIST * volatile *el; - uint bucket, hashnr=calc_hash(hash, key, keylen); + LF_SLIST * volatile *el, *found; + uint bucket, hashnr=calc_hash(hash, (uchar *)key, keylen); bucket= hashnr % hash->size; lf_lock_by_pins(pins); el=_lf_dynarray_lvalue(&hash->array, bucket); if (*el == NULL) initialize_bucket(hash, el, bucket, pins); - res=NULL != lsearch(el, my_reverse_bits(hashnr) | 1, key, keylen, pins); + found= lsearch(el, my_reverse_bits(hashnr) | 1, (uchar *)key, keylen, pins); lf_unlock_by_pins(pins); - return res; + return found+1; } static void initialize_bucket(LF_HASH *hash, LF_SLIST * volatile *node, -- cgit v1.2.1 From cd40855e9d8ee3a70f53aa061a9e303eec38a52d Mon Sep 17 00:00:00 2001 From: unknown Date: Thu, 17 Aug 2006 15:22:54 +0200 Subject: lf_hash: only data nodes use lf_alloc now, dummy nodes are malloc'ed directly mysys/lf_hash.c: only data nodes use lf_alloc now, dummy nodes are malloc'ed directly --- mysys/lf_hash.c | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) (limited to 'mysys') diff --git a/mysys/lf_hash.c b/mysys/lf_hash.c index 57936ea7b1f..736c3ea4887 100644 --- a/mysys/lf_hash.c +++ b/mysys/lf_hash.c @@ -18,7 +18,6 @@ extensible hash TODO - dummy nodes use element_size=0 try to get rid of dummy nodes ? */ #include @@ -241,7 +240,10 @@ void lf_hash_destroy(LF_HASH *hash) while (el) { intptr next=el->link; - lf_alloc_real_free(&hash->alloc, el); + if (el->hashnr & 1) + lf_alloc_real_free(&hash->alloc, el); + else + my_free((void *)el, MYF(0)); el=(LF_SLIST *)next; } lf_alloc_destroy(&hash->alloc); @@ -263,7 +265,7 @@ int lf_hash_insert(LF_HASH *hash, LF_PINS *pins, const void *data) lf_lock_by_pins(pins); node=(LF_SLIST *)_lf_alloc_new(pins); memcpy(node+1, data, hash->element_size); - node->key= hash_key(hash, node+1, &node->keylen); + node->key= hash_key(hash, (uchar *)(node+1), &node->keylen); hashnr= calc_hash(hash, node->key, node->keylen); bucket= hashnr % hash->size; el=_lf_dynarray_lvalue(&hash->array, bucket); @@ -329,17 +331,20 @@ void *lf_hash_search(LF_HASH *hash, LF_PINS *pins, const void *key, uint keylen) return found+1; } +static char *dummy_key=""; + static void initialize_bucket(LF_HASH *hash, LF_SLIST * volatile *node, uint bucket, LF_PINS *pins) { uint parent= my_clear_highest_bit(bucket); - LF_SLIST *dummy=_lf_alloc_new(pins), **tmp=0, *cur; + LF_SLIST *dummy=(LF_SLIST *)my_malloc(sizeof(LF_SLIST), MYF(MY_WME)); + LF_SLIST **tmp=0, *cur; LF_SLIST * volatile *el=_lf_dynarray_lvalue(&hash->array, parent); if (*el == NULL && bucket) initialize_bucket(hash, el, parent, pins); dummy->hashnr=my_reverse_bits(bucket); - LINT_INIT(dummy->key); - LINT_INIT(dummy->keylen); + dummy->key=dummy_key; + dummy->keylen=0; if ((cur= linsert(el, dummy, pins, 0))) { _lf_alloc_free(pins, dummy); -- cgit v1.2.1 From 52191ea4d8dda55173a7c721371faf15bf0ea39c Mon Sep 17 00:00:00 2001 From: unknown Date: Tue, 29 Aug 2006 22:10:06 +0200 Subject: importing Sanja's changes to the control file, with my changes on them. mysys/my_pread.c: print errno in case of error storage/maria/control_file.c: importing Sanja's changes, with my minor changes on them :) storage/maria/control_file.h: importing Sanja's changes, with my minor changes on them :) --- mysys/my_pread.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'mysys') diff --git a/mysys/my_pread.c b/mysys/my_pread.c index ac52895efe9..072c18ae7f6 100644 --- a/mysys/my_pread.c +++ b/mysys/my_pread.c @@ -155,7 +155,8 @@ uint my_pwrite(int Filedes, const byte *Buffer, uint Count, my_off_t offset, Count-=writenbytes; offset+=writenbytes; } - DBUG_PRINT("error",("Write only %d bytes",writenbytes)); + DBUG_PRINT("error",("Write only %d bytes, error: %d", + writenbytes, my_errno)); #ifndef NO_BACKGROUND #ifdef THREAD if (my_thread_var->abort) -- cgit v1.2.1 From cdf831cf94fe9aabde6ffb5b19557893416061d6 Mon Sep 17 00:00:00 2001 From: unknown Date: Thu, 14 Sep 2006 19:06:51 +0200 Subject: WL#3071 Maria checkpoint: changing pseudocode to use the structures of the Maria pagecache ("pagecache->changed_blocks" etc) and other Maria structures inherited from MyISAM (THR_LOCK_maria etc). mysys/mf_pagecache.c: comment storage/maria/ma_checkpoint.c: changing pseudocode to use the structures of the Maria pagecache ("pagecache->changed_blocks" etc) and other Maria structures inherited from MyISAM (THR_LOCK_maria etc). storage/maria/ma_checkpoint.h: copyright storage/maria/ma_control_file.c: copyright storage/maria/ma_control_file.h: copyright storage/maria/ma_least_recently_dirtied.c: copyright storage/maria/ma_least_recently_dirtied.h: copyright storage/maria/ma_recovery.c: copyright storage/maria/ma_recovery.h: copyright storage/maria/unittest/Makefile.am: copyright --- mysys/mf_pagecache.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'mysys') diff --git a/mysys/mf_pagecache.c b/mysys/mf_pagecache.c index 4693995f922..3a054077809 100755 --- a/mysys/mf_pagecache.c +++ b/mysys/mf_pagecache.c @@ -2937,6 +2937,10 @@ restart: } pagecache->blocks_changed--; pagecache->global_blocks_changed--; + /* + free_block() will change the status of the block so no need to change + it here. + */ } /* Cache is locked, so we can relese page before freeing it */ -- cgit v1.2.1 From b860539e37bb585794e144d5785906af13e0e1b9 Mon Sep 17 00:00:00 2001 From: unknown Date: Wed, 20 Sep 2006 10:42:15 +0200 Subject: Maria, very minor changes. mysys/Makefile.am: test_pagecache* programs need to link with dbug (I used BUILD/compile-pentium64-valgrind-max) mysys/mf_pagecache.c: unused constant --- mysys/Makefile.am | 3 ++- mysys/mf_pagecache.c | 1 - 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'mysys') diff --git a/mysys/Makefile.am b/mysys/Makefile.am index 6317d6c9285..edafd128eb0 100644 --- a/mysys/Makefile.am +++ b/mysys/Makefile.am @@ -20,7 +20,8 @@ MYSQLBASEdir= $(prefix) INCLUDES = @ZLIB_INCLUDES@ -I$(top_builddir)/include \ -I$(top_srcdir)/include -I$(srcdir) pkglib_LIBRARIES = libmysys.a -LDADD = libmysys.a $(top_builddir)/strings/libmystrings.a +LDADD = libmysys.a $(top_builddir)/strings/libmystrings.a \ + $(top_builddir)/dbug/libdbug.a noinst_HEADERS = mysys_priv.h my_static.h libmysys_a_SOURCES = my_init.c my_getwd.c mf_getdate.c my_mmap.c \ mf_path.c mf_loadpath.c my_file.c \ diff --git a/mysys/mf_pagecache.c b/mysys/mf_pagecache.c index 3a054077809..9c6f586be8a 100755 --- a/mysys/mf_pagecache.c +++ b/mysys/mf_pagecache.c @@ -154,7 +154,6 @@ struct st_pagecache_hash_link #define BLOCK_IN_FLUSH 16 /* block is in flush operation */ #define BLOCK_CHANGED 32 /* block buffer contains a dirty page */ #define BLOCK_WRLOCK 64 /* write locked block */ -#define BLOCK_CPYWRT 128 /* block buffer is in copy&write (see also cpyrd) */ /* page status, returned by find_key_block */ #define PAGE_READ 0 -- cgit v1.2.1 From 8e04cdb2dd1b5102e578c9c98b220a07857bfcbb Mon Sep 17 00:00:00 2001 From: unknown Date: Thu, 21 Sep 2006 23:12:56 +0200 Subject: Maria: fixes for build failures in pushbuild. Comments, fixing of function names. mysys/mf_pagecache.c: comments fixing. More comments. pagecache_ulock_block->page_unlock_block sql/mysqld.cc: MyISAM is always enabled so Maria needs have_maria which MyISAM does not need. This should fix a link failure in pushbuild storage/Makefile.am: force myisam to be built before maria (will not be needed when Maria does not depend on MyISAM anymore) --- mysys/mf_pagecache.c | 25 +++++++++++++++---------- 1 file changed, 15 insertions(+), 10 deletions(-) (limited to 'mysys') diff --git a/mysys/mf_pagecache.c b/mysys/mf_pagecache.c index 9c6f586be8a..4fa814d8188 100755 --- a/mysys/mf_pagecache.c +++ b/mysys/mf_pagecache.c @@ -523,13 +523,13 @@ static int ___pagecache_pthread_cond_signal(pthread_cond_t *cond); /* - Read page from the disk + Write page to the disk SYNOPSIS pagecache_fwrite() pagecache - page cache pointer filedesc - pagecache file descriptor structure - buffer - buffer in which we will read + buffer - buffer which we will write type - page type (plain or with LSN) flags - MYF() flags @@ -571,6 +571,10 @@ uint pagecache_fwrite(PAGECACHE *pagecache, (pageno)<<(pagecache->shift), flags) +/* + next_power(value) is 2 at the power of (1+floor(log2(value))); + e.g. next_power(2)=4, next_power(3)=4. +*/ static uint next_power(uint value) { uint old_value= 1; @@ -2167,9 +2171,9 @@ my_bool pagecache_lock_block(PAGECACHE *pagecache, DBUG_RETURN(0); } -void pagecache_ulock_block(PAGECACHE_BLOCK_LINK *block) +void pagecache_unlock_block(PAGECACHE_BLOCK_LINK *block) { - DBUG_ENTER("pagecache_ulock_block"); + DBUG_ENTER("pagecache_unlock_block"); BLOCK_INFO(block); DBUG_ASSERT(block->status & BLOCK_WRLOCK); block->status&= ~BLOCK_WRLOCK; @@ -2234,7 +2238,7 @@ my_bool pagecache_make_lock_and_pin(PAGECACHE *pagecache, Removes writelog and puts read lock (which is nothing in our implementation) */ - pagecache_ulock_block(block); + pagecache_unlock_block(block); case PAGECACHE_LOCK_READ_UNLOCK: /* read -> free */ case PAGECACHE_LOCK_LEFT_READLOCKED: /* read -> read */ #ifndef DBUG_OFF @@ -2247,8 +2251,9 @@ my_bool pagecache_make_lock_and_pin(PAGECACHE *pagecache, if (lock == PAGECACHE_LOCK_WRITE_TO_READ) { pagecache_change_lock(block, 0); - } else if (lock == PAGECACHE_LOCK_WRITE_UNLOCK || - lock == PAGECACHE_LOCK_READ_UNLOCK) + } + else if (lock == PAGECACHE_LOCK_WRITE_UNLOCK || + lock == PAGECACHE_LOCK_READ_UNLOCK) { pagecache_remove_lock(block); } @@ -2422,8 +2427,8 @@ void pagecache_unlock_page(PAGECACHE *pagecache, pagecache_pthread_mutex_lock(&pagecache->cache_lock); /* - As soon as we keep lock cache can be used, and we have lock bacause want - aunlock. + As soon as we keep lock cache can be used, and we have lock because want + to unlock. */ DBUG_ASSERT(pagecache->can_be_used); @@ -2891,7 +2896,7 @@ restart: link= get_present_hash_link(pagecache, file, pageno, &unused_start); if (!link) { - DBUG_PRINT("info", ("There is no fuch page in the cache")); + DBUG_PRINT("info", ("There is no such page in the cache")); DBUG_RETURN(0); } block= link->block; -- cgit v1.2.1 From c2872bafde6d6ec2444c293f7a8aa397eb1dbb59 Mon Sep 17 00:00:00 2001 From: unknown Date: Fri, 13 Oct 2006 11:37:27 +0200 Subject: push for trnman review (lockmanager still fails unit tests) BitKeeper/deleted/.del-Makefile.am~4375ae3d4de2bdf0: Delete: unittest/maria/Makefile.am configure.in: silence up configure warnings, don't generate unittest/maria/Makefile include/atomic/nolock.h: s/LOCK/LOCK_prefix/ include/atomic/x86-gcc.h: s/LOCK/LOCK_prefix/ include/atomic/x86-msvc.h: s/LOCK/LOCK_prefix/ include/lf.h: pin asserts, renames include/my_atomic.h: move cleanup include/my_bit.h: s/uint/uint32/ mysys/lf_dynarray.c: style fixes, split for() in two, remove if()s mysys/lf_hash.c: renames, minor fixes mysys/my_atomic.c: run-time assert -> compile-time assert storage/maria/Makefile.am: lockman here storage/maria/unittest/Makefile.am: new unit tests storage/maria/unittest/trnman-t.c: lots of changes storage/maria/lockman.c: many changes: second meaning of "blocker" portability: s/gettimeofday/my_getsystime/ move mutex/cond out of LOCK_OWNER - it creates a race condition that will be fixed in a separate changeset increment lm->count for every element, not only for distinct ones - because we cannot decrease it for distinct elements only :( storage/maria/lockman.h: move mutex/cond out of LOCK_OWNER storage/maria/trnman.c: move mutex/cond out of LOCK_OWNER atomic-ops to access short_trid_to_trn[] storage/maria/trnman.h: move mutex/cond out of LOCK_OWNER storage/maria/unittest/lockman-t.c: unit stress test --- mysys/lf_dynarray.c | 92 +++++++++++++++++++++-------------------------------- mysys/lf_hash.c | 41 ++++++++++++------------ mysys/my_atomic.c | 2 +- 3 files changed, 59 insertions(+), 76 deletions(-) (limited to 'mysys') diff --git a/mysys/lf_dynarray.c b/mysys/lf_dynarray.c index 0fa04ab095c..ade1c28d51c 100644 --- a/mysys/lf_dynarray.c +++ b/mysys/lf_dynarray.c @@ -38,7 +38,7 @@ void lf_dynarray_init(LF_DYNARRAY *array, uint element_size) { bzero(array, sizeof(*array)); - array->size_of_element=element_size; + array->size_of_element= element_size; my_atomic_rwlock_init(&array->lock); } @@ -49,7 +49,7 @@ static void recursive_free(void **alloc, int level) if (level) { int i; - for (i=0; i < LF_DYNARRAY_LEVEL_LENGTH; i++) + for (i= 0; i < LF_DYNARRAY_LEVEL_LENGTH; i++) recursive_free(alloc[i], level-1); my_free((void *)alloc, MYF(0)); } @@ -60,13 +60,13 @@ static void recursive_free(void **alloc, int level) void lf_dynarray_destroy(LF_DYNARRAY *array) { int i; - for (i=0; i < LF_DYNARRAY_LEVELS; i++) + for (i= 0; i < LF_DYNARRAY_LEVELS; i++) recursive_free(array->level[i], i); my_atomic_rwlock_destroy(&array->lock); bzero(array, sizeof(*array)); } -static const int dynarray_idxes_in_level[LF_DYNARRAY_LEVELS]= +static const int dynarray_idxes_in_prev_level[LF_DYNARRAY_LEVELS]= { 0, /* +1 here to to avoid -1's below */ LF_DYNARRAY_LEVEL_LENGTH, @@ -77,41 +77,32 @@ static const int dynarray_idxes_in_level[LF_DYNARRAY_LEVELS]= void *_lf_dynarray_lvalue(LF_DYNARRAY *array, uint idx) { - void * ptr, * volatile * ptr_ptr=0; + void * ptr, * volatile * ptr_ptr= 0; int i; - for (i=3; i > 0; i--) + for (i= 3; idx < dynarray_idxes_in_prev_level[i]; i--) /* no-op */; + ptr_ptr= &array->level[i]; + idx-= dynarray_idxes_in_prev_level[i]; + for (; i > 0; i--) { - if (ptr_ptr || idx >= dynarray_idxes_in_level[i]) + if (!(ptr= *ptr_ptr)) { - if (!ptr_ptr) - { - ptr_ptr=&array->level[i]; - idx-= dynarray_idxes_in_level[i]; - } - ptr=*ptr_ptr; - if (!ptr) - { - void *alloc=my_malloc(LF_DYNARRAY_LEVEL_LENGTH * sizeof(void *), - MYF(MY_WME|MY_ZEROFILL)); - if (!alloc) - return(NULL); - if (my_atomic_casptr(ptr_ptr, &ptr, alloc)) - ptr= alloc; - else - my_free(alloc, MYF(0)); - } - ptr_ptr=((void **)ptr) + idx / dynarray_idxes_in_level[i]; - idx%= dynarray_idxes_in_level[i]; + void *alloc= my_malloc(LF_DYNARRAY_LEVEL_LENGTH * sizeof(void *), + MYF(MY_WME|MY_ZEROFILL)); + if (!alloc) + return(NULL); + if (my_atomic_casptr(ptr_ptr, &ptr, alloc)) + ptr= alloc; + else + my_free(alloc, MYF(0)); } + ptr_ptr= ((void **)ptr) + idx / dynarray_idxes_in_prev_level[i]; + idx%= dynarray_idxes_in_prev_level[i]; } - if (!ptr_ptr) - ptr_ptr=&array->level[0]; - ptr=*ptr_ptr; - if (!ptr) + if (!(ptr= *ptr_ptr)) { void *alloc, *data; - alloc=my_malloc(LF_DYNARRAY_LEVEL_LENGTH * array->size_of_element + + alloc= my_malloc(LF_DYNARRAY_LEVEL_LENGTH * array->size_of_element + max(array->size_of_element, sizeof(void *)), MYF(MY_WME|MY_ZEROFILL)); if (!alloc) @@ -123,7 +114,7 @@ void *_lf_dynarray_lvalue(LF_DYNARRAY *array, uint idx) if (mod) data+= array->size_of_element - mod; } - ((void **)data)[-1]=alloc; /* free() will need the original pointer */ + ((void **)data)[-1]= alloc; /* free() will need the original pointer */ if (my_atomic_casptr(ptr_ptr, &ptr, data)) ptr= data; else @@ -134,29 +125,20 @@ void *_lf_dynarray_lvalue(LF_DYNARRAY *array, uint idx) void *_lf_dynarray_value(LF_DYNARRAY *array, uint idx) { - void * ptr, * volatile * ptr_ptr=0; + void * ptr, * volatile * ptr_ptr= 0; int i; - for (i=3; i > 0; i--) + for (i= 3; idx < dynarray_idxes_in_prev_level[i]; i--) /* no-op */; + ptr_ptr= &array->level[i]; + idx-= dynarray_idxes_in_prev_level[i]; + for (; i > 0; i--) { - if (ptr_ptr || idx >= dynarray_idxes_in_level[i]) - { - if (!ptr_ptr) - { - ptr_ptr=&array->level[i]; - idx-= dynarray_idxes_in_level[i]; - } - ptr=*ptr_ptr; - if (!ptr) - return(NULL); - ptr_ptr=((void **)ptr) + idx / dynarray_idxes_in_level[i]; - idx %= dynarray_idxes_in_level[i]; - } + if (!(ptr= *ptr_ptr)) + return(NULL); + ptr_ptr= ((void **)ptr) + idx / dynarray_idxes_in_prev_level[i]; + idx %= dynarray_idxes_in_prev_level[i]; } - if (!ptr_ptr) - ptr_ptr=&array->level[0]; - ptr=*ptr_ptr; - if (!ptr) + if (!(ptr= *ptr_ptr)) return(NULL); return ptr + array->size_of_element * idx; } @@ -169,8 +151,8 @@ static int recursive_iterate(LF_DYNARRAY *array, void *ptr, int level, return 0; if (!level) return func(ptr, arg); - for (i=0; i < LF_DYNARRAY_LEVEL_LENGTH; i++) - if ((res=recursive_iterate(array, ((void **)ptr)[i], level-1, func, arg))) + for (i= 0; i < LF_DYNARRAY_LEVEL_LENGTH; i++) + if ((res= recursive_iterate(array, ((void **)ptr)[i], level-1, func, arg))) return res; return 0; } @@ -178,8 +160,8 @@ static int recursive_iterate(LF_DYNARRAY *array, void *ptr, int level, int _lf_dynarray_iterate(LF_DYNARRAY *array, lf_dynarray_func func, void *arg) { int i, res; - for (i=0; i < LF_DYNARRAY_LEVELS; i++) - if ((res=recursive_iterate(array, array->level[i], i, func, arg))) + for (i= 0; i < LF_DYNARRAY_LEVELS; i++) + if ((res= recursive_iterate(array, array->level[i], i, func, arg))) return res; return 0; } diff --git a/mysys/lf_hash.c b/mysys/lf_hash.c index 736c3ea4887..a0425e89556 100644 --- a/mysys/lf_hash.c +++ b/mysys/lf_hash.c @@ -19,6 +19,7 @@ TODO try to get rid of dummy nodes ? + for non-unique hash, count only _distinct_ values */ #include #include @@ -51,7 +52,7 @@ typedef struct { cursor is positioned in either case pins[0..2] are used, they are NOT removed on return */ -static int lfind(intptr volatile *head, uint32 hashnr, +static int lfind(LF_SLIST * volatile *head, uint32 hashnr, const uchar *key, uint keylen, CURSOR *cursor, LF_PINS *pins) { uint32 cur_hashnr; @@ -60,7 +61,7 @@ static int lfind(intptr volatile *head, uint32 hashnr, intptr link; retry: - cursor->prev=head; + cursor->prev=(intptr *)head; do { cursor->curr=PTR(*cursor->prev); _lf_pin(pins,1,cursor->curr); @@ -112,7 +113,7 @@ retry: /* RETURN 0 - inserted - not 0 - a pointer to a conflict + not 0 - a pointer to a conflict (not pinned and thus unusable) NOTE it uses pins[0..2], on return all pins are removed. @@ -125,17 +126,17 @@ static LF_SLIST *linsert(LF_SLIST * volatile *head, LF_SLIST *node, do { - if (lfind((intptr*)head, node->hashnr, node->key, node->keylen, + if (lfind(head, node->hashnr, node->key, node->keylen, &cursor, pins) && (flags & LF_HASH_UNIQUE)) - res=0; + res=0; /* duplicate found */ else { node->link=(intptr)cursor.curr; assert(node->link != (intptr)node); assert(cursor.prev != &node->link); if (my_atomic_casptr((void **)cursor.prev, (void **)&cursor.curr, node)) - res=1; + res=1; /* inserted ok */ } } while (res == -1); _lf_unpin(pins, 0); @@ -159,7 +160,7 @@ static int ldelete(LF_SLIST * volatile *head, uint32 hashnr, do { - if (!lfind((intptr *)head, hashnr, key, keylen, &cursor, pins)) + if (!lfind(head, hashnr, key, keylen, &cursor, pins)) res= 1; else if (my_atomic_casptr((void **)&(cursor.curr->link), @@ -169,7 +170,7 @@ static int ldelete(LF_SLIST * volatile *head, uint32 hashnr, (void **)&cursor.curr, cursor.next)) _lf_alloc_free(pins, cursor.curr); else - lfind((intptr *)head, hashnr, key, keylen, &cursor, pins); + lfind(head, hashnr, key, keylen, &cursor, pins); res= 0; } } while (res == -1); @@ -191,7 +192,7 @@ static LF_SLIST *lsearch(LF_SLIST * volatile *head, uint32 hashnr, const uchar *key, uint keylen, LF_PINS *pins) { CURSOR cursor; - int res=lfind((intptr *)head, hashnr, key, keylen, &cursor, pins); + int res=lfind(head, hashnr, key, keylen, &cursor, pins); if (res) _lf_pin(pins, 2, cursor.curr); _lf_unpin(pins, 0); _lf_unpin(pins, 1); @@ -214,7 +215,7 @@ static inline uint calc_hash(LF_HASH *hash, const uchar *key, uint keylen) return nr1 & INT_MAX32; } -#define MAX_LOAD 1 +#define MAX_LOAD 1.0 static void initialize_bucket(LF_HASH *, LF_SLIST * volatile*, uint, LF_PINS *); void lf_hash_init(LF_HASH *hash, uint element_size, uint flags, @@ -262,7 +263,7 @@ int lf_hash_insert(LF_HASH *hash, LF_PINS *pins, const void *data) uint csize, bucket, hashnr; LF_SLIST *node, * volatile *el; - lf_lock_by_pins(pins); + lf_rwlock_by_pins(pins); node=(LF_SLIST *)_lf_alloc_new(pins); memcpy(node+1, data, hash->element_size); node->key= hash_key(hash, (uchar *)(node+1), &node->keylen); @@ -275,13 +276,13 @@ int lf_hash_insert(LF_HASH *hash, LF_PINS *pins, const void *data) if (linsert(el, node, pins, hash->flags)) { _lf_alloc_free(pins, node); - lf_unlock_by_pins(pins); + lf_rwunlock_by_pins(pins); return 1; } csize= hash->size; if ((my_atomic_add32(&hash->count, 1)+1.0) / csize > MAX_LOAD) my_atomic_cas32(&hash->size, &csize, csize*2); - lf_unlock_by_pins(pins); + lf_rwunlock_by_pins(pins); return 0; } @@ -298,17 +299,17 @@ int lf_hash_delete(LF_HASH *hash, LF_PINS *pins, const void *key, uint keylen) uint bucket, hashnr=calc_hash(hash, (uchar *)key, keylen); bucket= hashnr % hash->size; - lf_lock_by_pins(pins); + lf_rwlock_by_pins(pins); el=_lf_dynarray_lvalue(&hash->array, bucket); if (*el == NULL) initialize_bucket(hash, el, bucket, pins); if (ldelete(el, my_reverse_bits(hashnr) | 1, (uchar *)key, keylen, pins)) { - lf_unlock_by_pins(pins); + lf_rwunlock_by_pins(pins); return 1; } my_atomic_add32(&hash->count, -1); - lf_unlock_by_pins(pins); + lf_rwunlock_by_pins(pins); return 0; } @@ -322,13 +323,13 @@ void *lf_hash_search(LF_HASH *hash, LF_PINS *pins, const void *key, uint keylen) uint bucket, hashnr=calc_hash(hash, (uchar *)key, keylen); bucket= hashnr % hash->size; - lf_lock_by_pins(pins); + lf_rwlock_by_pins(pins); el=_lf_dynarray_lvalue(&hash->array, bucket); if (*el == NULL) initialize_bucket(hash, el, bucket, pins); found= lsearch(el, my_reverse_bits(hashnr) | 1, (uchar *)key, keylen, pins); - lf_unlock_by_pins(pins); - return found+1; + lf_rwunlock_by_pins(pins); + return found ? found+1 : 0; } static char *dummy_key=""; @@ -347,7 +348,7 @@ static void initialize_bucket(LF_HASH *hash, LF_SLIST * volatile *node, dummy->keylen=0; if ((cur= linsert(el, dummy, pins, 0))) { - _lf_alloc_free(pins, dummy); + my_free((void *)dummy, MYF(0)); dummy= cur; } my_atomic_casptr((void **)node, (void **)&tmp, dummy); diff --git a/mysys/my_atomic.c b/mysys/my_atomic.c index 2908a44961a..fbeb3d63bef 100644 --- a/mysys/my_atomic.c +++ b/mysys/my_atomic.c @@ -35,7 +35,7 @@ */ int my_atomic_initialize() { - DBUG_ASSERT(sizeof(intptr) == sizeof(void *)); + char assert_the_size[sizeof(intptr) == sizeof(void *) ? 1 : -1]; /* currently the only thing worth checking is SMP/UP issue */ #ifdef MY_ATOMIC_MODE_DUMMY return my_getncpus() == 1 ? MY_ATOMIC_OK : MY_ATOMIC_NOT_1CPU; -- cgit v1.2.1 From 12a55aeabc353fdc1c3829ddd8baacb142160c80 Mon Sep 17 00:00:00 2001 From: unknown Date: Wed, 18 Oct 2006 17:24:07 +0200 Subject: lock manager passed unit tests storage/maria/trnman.c: comments include/my_dbug.h: make DBUG_ASSERT always a statement storage/maria/lockman.h: comments include/lf.h: lf_pinbox - don't use a fixed-size purgatory. mysys/lf_alloc-pin.c: lf_pinbox - don't use a fixed-size purgatory. mysys/lf_hash.c: lf_pinbox - don't use a fixed-size purgatory. storage/maria/lockman.c: removed IGNORE_ME/UPGDARED matching - it was wrong in the first place. updated for "lf_pinbox - don't use a fixed-size purgatory" storage/maria/unittest/lockman-t.c: IGNORE_ME/UPGRADED pair counting bugtest. more tests unittest/mysys/my_atomic-t.c: lf_pinbox - don't use a fixed-size purgatory. --- mysys/lf_alloc-pin.c | 172 +++++++++++++++++++++++++-------------------------- mysys/lf_hash.c | 4 +- 2 files changed, 87 insertions(+), 89 deletions(-) (limited to 'mysys') diff --git a/mysys/lf_alloc-pin.c b/mysys/lf_alloc-pin.c index cf1612b73d1..842c9690463 100644 --- a/mysys/lf_alloc-pin.c +++ b/mysys/lf_alloc-pin.c @@ -15,21 +15,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ /* - concurrent allocator based on pinning addresses - - strictly speaking it's not lock-free, as it can be blocked - if a thread's purgatory is full and all addresses from there - are pinned. - - But until the above happens, it's wait-free. - - It can be made strictly wait-free by increasing purgatory size. - If it's larger than pins_in_stack*LF_PINBOX_PINS, then apocalyptical - condition above will never happen. But than the memory requirements - will be O(pins_in_stack^2). - - Note, that for large purgatory sizes it makes sense to remove - purgatory array, and link objects in a list using embedded pointer. + wait-free concurrent allocator based on pinning addresses TODO test with more than 256 threads TODO test w/o alloca @@ -43,15 +29,17 @@ static void _lf_pinbox_real_free(LF_PINS *pins); -void lf_pinbox_init(LF_PINBOX *pinbox, lf_pinbox_free_func *free_func, - void *free_func_arg) +void lf_pinbox_init(LF_PINBOX *pinbox, uint free_ptr_offset, + lf_pinbox_free_func *free_func,void *free_func_arg) { DBUG_ASSERT(sizeof(LF_PINS) == 128); + DBUG_ASSERT(free_ptr_offset % sizeof(void *) == 0); lf_dynarray_init(&pinbox->pinstack, sizeof(LF_PINS)); - pinbox->pinstack_top_ver=0; - pinbox->pins_in_stack=0; - pinbox->free_func=free_func; - pinbox->free_func_arg=free_func_arg; + pinbox->pinstack_top_ver= 0; + pinbox->pins_in_stack= 0; + pinbox->free_ptr_offset= free_ptr_offset; + pinbox->free_func= free_func; + pinbox->free_func_arg= free_func_arg; } void lf_pinbox_destroy(LF_PINBOX *pinbox) @@ -64,58 +52,64 @@ LF_PINS *_lf_pinbox_get_pins(LF_PINBOX *pinbox) uint32 pins, next, top_ver; LF_PINS *el; - top_ver=pinbox->pinstack_top_ver; + top_ver= pinbox->pinstack_top_ver; do { - if (!(pins=top_ver % LF_PINBOX_MAX_PINS)) + if (!(pins= top_ver % LF_PINBOX_MAX_PINS)) { - pins=my_atomic_add32(&pinbox->pins_in_stack, 1)+1; - el=(LF_PINS *)_lf_dynarray_lvalue(&pinbox->pinstack, pins); + pins= my_atomic_add32(&pinbox->pins_in_stack, 1)+1; + el= (LF_PINS *)_lf_dynarray_lvalue(&pinbox->pinstack, pins); break; } - el=(LF_PINS *)_lf_dynarray_value(&pinbox->pinstack, pins); - next=el->link; + el= (LF_PINS *)_lf_dynarray_value(&pinbox->pinstack, pins); + next= el->link; } while (!my_atomic_cas32(&pinbox->pinstack_top_ver, &top_ver, top_ver-pins+next+LF_PINBOX_MAX_PINS)); - el->link=pins; - el->purgatory_count=0; - el->pinbox=pinbox; + el->link= pins; + el->purgatory_count= 0; + el->pinbox= pinbox; return el; } void _lf_pinbox_put_pins(LF_PINS *pins) { - LF_PINBOX *pinbox=pins->pinbox; + LF_PINBOX *pinbox= pins->pinbox; uint32 top_ver, nr; - nr=pins->link; + nr= pins->link; #ifdef MY_LF_EXTRA_DEBUG { int i; - for (i=0; i < LF_PINBOX_PINS; i++) + for (i= 0; i < LF_PINBOX_PINS; i++) assert(pins->pin[i] == 0); } #endif + /* + Note - this will deadlock if other threads will wait for + the caller to do something after _lf_pinbox_put_pins(), + and they would have pinned addresses that the caller wants to free. + Thus: only free pins when all work is done and nobody can wait for you!!! + */ while (pins->purgatory_count) { _lf_pinbox_real_free(pins); - if (pins->purgatory_count && my_getncpus() == 1) + if (pins->purgatory_count) { my_atomic_rwlock_wrunlock(&pins->pinbox->pinstack.lock); pthread_yield(); my_atomic_rwlock_wrlock(&pins->pinbox->pinstack.lock); } } - top_ver=pinbox->pinstack_top_ver; + top_ver= pinbox->pinstack_top_ver; if (nr == pinbox->pins_in_stack) { - int32 tmp=nr; + int32 tmp= nr; if (my_atomic_cas32(&pinbox->pins_in_stack, &tmp, tmp-1)) goto ret; } do { - pins->link=top_ver % LF_PINBOX_MAX_PINS; + pins->link= top_ver % LF_PINBOX_MAX_PINS; } while (!my_atomic_cas32(&pinbox->pinstack_top_ver, &top_ver, top_ver-pins->link+nr+LF_PINBOX_MAX_PINS)); ret: @@ -127,19 +121,20 @@ static int ptr_cmp(void **a, void **b) return *a < *b ? -1 : *a == *b ? 0 : 1; } +#define add_to_purgatory(PINS, ADDR) \ + do \ + { \ + *(void **)((char *)(ADDR)+(PINS)->pinbox->free_ptr_offset)= \ + (PINS)->purgatory; \ + (PINS)->purgatory= (ADDR); \ + (PINS)->purgatory_count++; \ + } while (0) + void _lf_pinbox_free(LF_PINS *pins, void *addr) { - while (pins->purgatory_count == LF_PURGATORY_SIZE) - { + if (pins->purgatory_count % LF_PURGATORY_SIZE) _lf_pinbox_real_free(pins); - if (pins->purgatory_count == LF_PURGATORY_SIZE && my_getncpus() == 1) - { - my_atomic_rwlock_wrunlock(&pins->pinbox->pinstack.lock); - pthread_yield(); - my_atomic_rwlock_wrlock(&pins->pinbox->pinstack.lock); - } - } - pins->purgatory[pins->purgatory_count++]=addr; + add_to_purgatory(pins, addr); } struct st_harvester { @@ -178,11 +173,11 @@ static int match_pins(LF_PINS *el, void *addr) static void _lf_pinbox_real_free(LF_PINS *pins) { int npins; - void **addr=0; - void **start, **cur, **end=pins->purgatory+pins->purgatory_count; - LF_PINBOX *pinbox=pins->pinbox; + void *list; + void **addr; + LF_PINBOX *pinbox= pins->pinbox; - npins=pinbox->pins_in_stack+1; + npins= pinbox->pins_in_stack+1; #ifdef HAVE_ALLOCA /* create a sorted list of pinned addresses, to speed up searches */ @@ -190,64 +185,64 @@ static void _lf_pinbox_real_free(LF_PINS *pins) { struct st_harvester hv; addr= (void **) alloca(sizeof(void *)*LF_PINBOX_PINS*npins); - hv.granary=addr; - hv.npins=npins; + hv.granary= addr; + hv.npins= npins; _lf_dynarray_iterate(&pinbox->pinstack, (lf_dynarray_func)harvest_pins, &hv); - npins=hv.granary-addr; + npins= hv.granary-addr; if (npins) qsort(addr, npins, sizeof(void *), (qsort_cmp)ptr_cmp); } + else #endif + addr= 0; - start= cur= pins->purgatory; - end= start+pins->purgatory_count; - for (; cur < end; cur++) + list= pins->purgatory; + pins->purgatory= 0; + pins->purgatory_count= 0; + while (list) { + void *cur= list; + list= *(void **)((char *)cur+pinbox->free_ptr_offset); if (npins) { if (addr) { void **a,**b,**c; - for (a=addr, b=addr+npins-1, c=a+(b-a)/2; b-a>1; c=a+(b-a)/2) - if (*cur == *c) - a=b=c; - else if (*cur > *c) - a=c; + for (a= addr, b= addr+npins-1, c= a+(b-a)/2; b-a>1; c= a+(b-a)/2) + if (cur == *c) + a= b= c; + else if (cur > *c) + a= c; else - b=c; - if (*cur == *a || *cur == *b) + b= c; + if (cur == *a || cur == *b) goto found; } else { if (_lf_dynarray_iterate(&pinbox->pinstack, - (lf_dynarray_func)match_pins, *cur)) + (lf_dynarray_func)match_pins, cur)) goto found; } } /* not pinned - freeing */ - pinbox->free_func(*cur, pinbox->free_func_arg); + pinbox->free_func(cur, pinbox->free_func_arg); continue; found: /* pinned - keeping */ - *start++=*cur; + add_to_purgatory(pins, cur); } - pins->purgatory_count=start-pins->purgatory; -#ifdef MY_LF_EXTRA_DEBUG - while (start < pins->purgatory + LF_PURGATORY_SIZE) - *start++=0; -#endif } static void alloc_free(void *node, LF_ALLOCATOR *allocator) { void *tmp; - tmp=allocator->top; + tmp= allocator->top; do { - (*(void **)node)=tmp; + (*(void **)node)= tmp; } while (!my_atomic_casptr((void **)&allocator->top, (void **)&tmp, node) && LF_BACKOFF); } @@ -255,18 +250,18 @@ static void alloc_free(void *node, LF_ALLOCATOR *allocator) LF_REQUIRE_PINS(1); void *_lf_alloc_new(LF_PINS *pins) { - LF_ALLOCATOR *allocator=(LF_ALLOCATOR *)(pins->pinbox->free_func_arg); + LF_ALLOCATOR *allocator= (LF_ALLOCATOR *)(pins->pinbox->free_func_arg); void *node; for (;;) { do { - node=allocator->top; + node= allocator->top; _lf_pin(pins, 0, node); - } while (node !=allocator->top && LF_BACKOFF); + } while (node != allocator->top && LF_BACKOFF); if (!node) { - if (!(node=my_malloc(allocator->element_size, MYF(MY_WME|MY_ZEROFILL)))) + if (!(node= my_malloc(allocator->element_size, MYF(MY_WME|MY_ZEROFILL)))) goto ret; #ifdef MY_LF_EXTRA_DEBUG my_atomic_add32(&allocator->mallocs, 1); @@ -282,27 +277,27 @@ ret: return node; } -void lf_alloc_init(LF_ALLOCATOR *allocator, uint size) +void lf_alloc_init(LF_ALLOCATOR *allocator, uint size, uint free_ptr_offset) { - lf_pinbox_init(&allocator->pinbox, + lf_pinbox_init(&allocator->pinbox, free_ptr_offset, (lf_pinbox_free_func *)alloc_free, allocator); - allocator->top=0; - allocator->mallocs=0; - allocator->element_size=size; + allocator->top= 0; + allocator->mallocs= 0; + allocator->element_size= size; DBUG_ASSERT(size >= (int)sizeof(void *)); } void lf_alloc_destroy(LF_ALLOCATOR *allocator) { - void *el=allocator->top; + void *el= allocator->top; while (el) { - void *tmp=*(void **)el; + void *tmp= *(void **)el; my_free(el, MYF(0)); - el=tmp; + el= tmp; } lf_pinbox_destroy(&allocator->pinbox); - allocator->top=0; + allocator->top= 0; } /* @@ -313,7 +308,8 @@ uint lf_alloc_in_pool(LF_ALLOCATOR *allocator) { uint i; void *node; - for (node=allocator->top, i=0; node; node=*(void **)node, i++) /* no op */; + for (node= allocator->top, i= 0; node; node= *(void **)node, i++) + /* no op */; return i; } diff --git a/mysys/lf_hash.c b/mysys/lf_hash.c index a0425e89556..45b45f7531e 100644 --- a/mysys/lf_hash.c +++ b/mysys/lf_hash.c @@ -20,6 +20,7 @@ TODO try to get rid of dummy nodes ? for non-unique hash, count only _distinct_ values + (but how to do it in lf_hash_delete ?) */ #include #include @@ -222,7 +223,8 @@ void lf_hash_init(LF_HASH *hash, uint element_size, uint flags, uint key_offset, uint key_length, hash_get_key get_key, CHARSET_INFO *charset) { - lf_alloc_init(&hash->alloc,sizeof(LF_SLIST)+element_size); + lf_alloc_init(&hash->alloc, sizeof(LF_SLIST)+element_size, + offsetof(LF_SLIST, key)); lf_dynarray_init(&hash->array, sizeof(LF_SLIST **)); hash->size=1; hash->count=0; -- cgit v1.2.1 From a79868ae99567e974373d29d631db552f998ccc7 Mon Sep 17 00:00:00 2001 From: unknown Date: Thu, 19 Oct 2006 13:33:49 +0200 Subject: comments --- mysys/lf_alloc-pin.c | 193 ++++++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 159 insertions(+), 34 deletions(-) (limited to 'mysys') diff --git a/mysys/lf_alloc-pin.c b/mysys/lf_alloc-pin.c index 842c9690463..ac55185864a 100644 --- a/mysys/lf_alloc-pin.c +++ b/mysys/lf_alloc-pin.c @@ -17,8 +17,63 @@ /* wait-free concurrent allocator based on pinning addresses - TODO test with more than 256 threads - TODO test w/o alloca + It works as follows: every thread (strictly speaking - every CPU, but it's + too difficult to do) has a small array of pointers. They're called "pins". + Before using an object its address must be stored in this array (pinned). + When an object is no longer necessary its address must be removed from + this array (unpinned). When a thread wants to free() an object it + scans all pins of all threads to see if somebody has this object pinned. + If yes - the object is not freed (but stored in a purgatory). + To reduce the cost of a single free() pins are not scanned on every free() + but only added to (thread-local) purgatory. On every LF_PURGATORY_SIZE + free() purgatory is scanned and all unpinned objects are freed. + + Pins are used to solve ABA problem. To use pins one must obey + a pinning protocol: + 1. Let's assume that PTR is a shared pointer to an object. Shared means + that any thread may modify it anytime to point to a different object and + free the old object. Later the freed object may be potentially allocated + by another thread. If we're unlucky that another thread may set PTR to + point to this object again. This is ABA problem. + 2. Create a local pointer LOCAL_PTR. + 3. Pin the PTR in a loop: + do + { + LOCAL_PTR= PTR; + pin(PTR, PIN_NUMBER); + } while (LOCAL_PTR != PTR) + 4. It is guaranteed that after the loop is ended, LOCAL_PTR + points to an object (or NULL, if PTR may be NULL), that + will never be freed. It is not guaranteed though + that LOCAL_PTR == PTR + 5. When done working with the object, remove the pin: + unpin(PIN_NUMBER) + 6. When copying pins (as in the list: + while () + { + pin(CUR, 0); + do + { + NEXT=CUR->next; + pin(NEXT, 1); + } while (NEXT != CUR->next); + ... + ... + pin(CUR, 1); + CUR=NEXT; + } + which keeps CUR address constantly pinned), note than pins may be copied + only upwards (!!!), that is pin N to pin M > N. + 7. Don't keep the object pinned longer than necessary - the number of pins + you have is limited (and small), keeping an object pinned prevents its + reuse and cause unnecessary mallocs. + + Implementation details: + Pins are given away from a "pinbox". Pinbox is stack-based allocator. + It used dynarray for storing pins, new elements are allocated by dynarray + as necessary, old are pushed in the stack for reuse. ABA is solved by + versioning a pointer - because we use an array, a pointer to pins is 32 bit, + upper 32 bits are used for a version. */ #include @@ -29,6 +84,10 @@ static void _lf_pinbox_real_free(LF_PINS *pins); +/* + Initialize a pinbox. Must be usually called from lf_alloc_init. + See the latter for details. +*/ void lf_pinbox_init(LF_PINBOX *pinbox, uint free_ptr_offset, lf_pinbox_free_func *free_func,void *free_func_arg) { @@ -47,6 +106,14 @@ void lf_pinbox_destroy(LF_PINBOX *pinbox) lf_dynarray_destroy(&pinbox->pinstack); } +/* + Get pins from a pinbox. Usually called via lf_alloc_get_pins() or + lf_hash_get_pins(). + + DESCRIPTION + get a new LF_PINS structure from a stack of unused pins, + or allocate a new one out of dynarray. +*/ LF_PINS *_lf_pinbox_get_pins(LF_PINBOX *pinbox) { uint32 pins, next, top_ver; @@ -71,6 +138,14 @@ LF_PINS *_lf_pinbox_get_pins(LF_PINBOX *pinbox) return el; } +/* + Put pins back to a pinbox. Usually called via lf_alloc_put_pins() or + lf_hash_put_pins(). + + DESCRIPTION + empty the purgatory (XXX deadlock warning below!), + push LF_PINS structure to a stack +*/ void _lf_pinbox_put_pins(LF_PINS *pins) { LF_PINBOX *pinbox= pins->pinbox; @@ -80,11 +155,11 @@ void _lf_pinbox_put_pins(LF_PINS *pins) { int i; for (i= 0; i < LF_PINBOX_PINS; i++) - assert(pins->pin[i] == 0); + DBUG_ASSERT(pins->pin[i] == 0); } #endif /* - Note - this will deadlock if other threads will wait for + XXX this will deadlock if other threads will wait for the caller to do something after _lf_pinbox_put_pins(), and they would have pinned addresses that the caller wants to free. Thus: only free pins when all work is done and nobody can wait for you!!! @@ -130,6 +205,13 @@ static int ptr_cmp(void **a, void **b) (PINS)->purgatory_count++; \ } while (0) +/* + Free an object allocated via pinbox allocator + + DESCRIPTION + add an object to purgatory. if necessary, call _lf_pinbox_real_free() + to actually free something. +*/ void _lf_pinbox_free(LF_PINS *pins, void *addr) { if (pins->purgatory_count % LF_PURGATORY_SIZE) @@ -142,6 +224,10 @@ struct st_harvester { int npins; }; +/* + callback for _lf_dynarray_iterate: + scan all pins or all threads and accumulate all pins +*/ static int harvest_pins(LF_PINS *el, struct st_harvester *hv) { int i; @@ -159,6 +245,10 @@ static int harvest_pins(LF_PINS *el, struct st_harvester *hv) return 0; } +/* + callback for _lf_dynarray_iterate: + scan all pins or all threads and see if addr is present there +*/ static int match_pins(LF_PINS *el, void *addr) { int i; @@ -170,6 +260,9 @@ static int match_pins(LF_PINS *el, void *addr) return 0; } +/* + Scan the purgatory as free everything that can be freed +*/ static void _lf_pinbox_real_free(LF_PINS *pins) { int npins; @@ -187,10 +280,12 @@ static void _lf_pinbox_real_free(LF_PINS *pins) addr= (void **) alloca(sizeof(void *)*LF_PINBOX_PINS*npins); hv.granary= addr; hv.npins= npins; + /* scan the dynarray and accumulate all pinned addresses */ _lf_dynarray_iterate(&pinbox->pinstack, (lf_dynarray_func)harvest_pins, &hv); npins= hv.granary-addr; + /* and sort them */ if (npins) qsort(addr, npins, sizeof(void *), (qsort_cmp)ptr_cmp); } @@ -207,7 +302,7 @@ static void _lf_pinbox_real_free(LF_PINS *pins) list= *(void **)((char *)cur+pinbox->free_ptr_offset); if (npins) { - if (addr) + if (addr) /* use binary search */ { void **a,**b,**c; for (a= addr, b= addr+npins-1, c= a+(b-a)/2; b-a>1; c= a+(b-a)/2) @@ -220,7 +315,7 @@ static void _lf_pinbox_real_free(LF_PINS *pins) if (cur == *a || cur == *b) goto found; } - else + else /* no alloca - no cookie. linear search here */ { if (_lf_dynarray_iterate(&pinbox->pinstack, (lf_dynarray_func)match_pins, cur)) @@ -236,6 +331,10 @@ found: } } +/* + callback for _lf_pinbox_real_free to free an unpinned object - + add it back to the allocator stack +*/ static void alloc_free(void *node, LF_ALLOCATOR *allocator) { void *tmp; @@ -247,7 +346,55 @@ static void alloc_free(void *node, LF_ALLOCATOR *allocator) LF_BACKOFF); } +/* lock-free memory allocator for fixed-size objects */ + LF_REQUIRE_PINS(1); + +/* + initialize lock-free allocatod. + + SYNOPSYS + allocator - + size a size of an object to allocate + free_ptr_offset an offset inside the object to a sizeof(void *) + memory that is guaranteed to be unused after + the object is put in the purgatory. Unused by ANY + thread, not only the purgatory owner. +*/ +void lf_alloc_init(LF_ALLOCATOR *allocator, uint size, uint free_ptr_offset) +{ + lf_pinbox_init(&allocator->pinbox, free_ptr_offset, + (lf_pinbox_free_func *)alloc_free, allocator); + allocator->top= 0; + allocator->mallocs= 0; + allocator->element_size= size; + DBUG_ASSERT(size >= (int)sizeof(void *)); + DBUG_ASSERT(free_ptr_offset < size); +} + +/* + destroy the allocator, free everything that's in it +*/ +void lf_alloc_destroy(LF_ALLOCATOR *allocator) +{ + void *el= allocator->top; + while (el) + { + void *tmp= *(void **)el; + my_free(el, MYF(0)); + el= tmp; + } + lf_pinbox_destroy(&allocator->pinbox); + allocator->top= 0; +} + +/* + Allocate and return an new object. + + DESCRIPTION + Pop an unused object from the stack or malloc it is the stack is empty. + pin[0] is used, it's removed on return. +*/ void *_lf_alloc_new(LF_PINS *pins) { LF_ALLOCATOR *allocator= (LF_ALLOCATOR *)(pins->pinbox->free_func_arg); @@ -262,47 +409,25 @@ void *_lf_alloc_new(LF_PINS *pins) if (!node) { if (!(node= my_malloc(allocator->element_size, MYF(MY_WME|MY_ZEROFILL)))) - goto ret; + break; #ifdef MY_LF_EXTRA_DEBUG my_atomic_add32(&allocator->mallocs, 1); #endif - goto ret; + break; } if (my_atomic_casptr((void **)&allocator->top, (void *)&node, *(void **)node)) - goto ret; + break; } -ret: _lf_unpin(pins, 0); return node; } -void lf_alloc_init(LF_ALLOCATOR *allocator, uint size, uint free_ptr_offset) -{ - lf_pinbox_init(&allocator->pinbox, free_ptr_offset, - (lf_pinbox_free_func *)alloc_free, allocator); - allocator->top= 0; - allocator->mallocs= 0; - allocator->element_size= size; - DBUG_ASSERT(size >= (int)sizeof(void *)); -} - -void lf_alloc_destroy(LF_ALLOCATOR *allocator) -{ - void *el= allocator->top; - while (el) - { - void *tmp= *(void **)el; - my_free(el, MYF(0)); - el= tmp; - } - lf_pinbox_destroy(&allocator->pinbox); - allocator->top= 0; -} - /* + count the number of objects in a pool. + NOTE - this is NOT thread-safe !!! + This is NOT thread-safe !!! */ uint lf_alloc_in_pool(LF_ALLOCATOR *allocator) { -- cgit v1.2.1 From fb818dd7b0be3b4facd159de14bc3d9afcbcf16e Mon Sep 17 00:00:00 2001 From: unknown Date: Fri, 20 Oct 2006 14:02:18 +0200 Subject: more post-review fixes - comments, renames, error checks in unit tests concurrency bug in lock manager include/my_global.h: compile-time assert macro mysys/my_atomic.c: use compile_time_assert() macro storage/maria/lockman.c: bug in concurrent lockdelete (with retries) storage/maria/trnman.c: more post-review fixes - comments, renames storage/maria/trnman.h: more post-review fixes - comments storage/maria/unittest/lockman-t.c: friendlier error checks storage/maria/unittest/trnman-t.c: friendlier error checks --- mysys/my_atomic.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'mysys') diff --git a/mysys/my_atomic.c b/mysys/my_atomic.c index fbeb3d63bef..e1af7e0328d 100644 --- a/mysys/my_atomic.c +++ b/mysys/my_atomic.c @@ -35,7 +35,7 @@ */ int my_atomic_initialize() { - char assert_the_size[sizeof(intptr) == sizeof(void *) ? 1 : -1]; + compile_time_assert(sizeof(intptr) == sizeof(void *)); /* currently the only thing worth checking is SMP/UP issue */ #ifdef MY_ATOMIC_MODE_DUMMY return my_getncpus() == 1 ? MY_ATOMIC_OK : MY_ATOMIC_NOT_1CPU; -- cgit v1.2.1 From 7ca33ae5b592143eb773ccfb71ee76d871374b46 Mon Sep 17 00:00:00 2001 From: unknown Date: Fri, 27 Oct 2006 17:09:31 +0200 Subject: comments, minor changes --- comments mysys/lf_alloc-pin.c: comments mysys/lf_dynarray.c: comments mysys/lf_hash.c: comments, charset-aware comparison storage/maria/trnman.c: comments storage/maria/unittest/lockman-t.c: test case for a bug unittest/mysys/my_atomic-t.c: removed mistakenly copied line --- mysys/lf_alloc-pin.c | 64 ++++++++++++++++++++++---------------------- mysys/lf_dynarray.c | 25 +++++++++++++++++- mysys/lf_hash.c | 75 +++++++++++++++++++++++++++++++++++++++------------- 3 files changed, 114 insertions(+), 50 deletions(-) (limited to 'mysys') diff --git a/mysys/lf_alloc-pin.c b/mysys/lf_alloc-pin.c index ac55185864a..b96fe42311b 100644 --- a/mysys/lf_alloc-pin.c +++ b/mysys/lf_alloc-pin.c @@ -1,3 +1,4 @@ +// TODO multi-pinbox /* Copyright (C) 2000 MySQL AB This program is free software; you can redistribute it and/or modify @@ -17,24 +18,25 @@ /* wait-free concurrent allocator based on pinning addresses - It works as follows: every thread (strictly speaking - every CPU, but it's - too difficult to do) has a small array of pointers. They're called "pins". - Before using an object its address must be stored in this array (pinned). - When an object is no longer necessary its address must be removed from - this array (unpinned). When a thread wants to free() an object it - scans all pins of all threads to see if somebody has this object pinned. - If yes - the object is not freed (but stored in a purgatory). - To reduce the cost of a single free() pins are not scanned on every free() - but only added to (thread-local) purgatory. On every LF_PURGATORY_SIZE - free() purgatory is scanned and all unpinned objects are freed. + It works as follows: every thread (strictly speaking - every CPU, but + it's too difficult to do) has a small array of pointers. They're called + "pins". Before using an object its address must be stored in this array + (pinned). When an object is no longer necessary its address must be + removed from this array (unpinned). When a thread wants to free() an + object it scans all pins of all threads to see if somebody has this + object pinned. If yes - the object is not freed (but stored in a + "purgatory"). To reduce the cost of a single free() pins are not scanned + on every free() but only added to (thread-local) purgatory. On every + LF_PURGATORY_SIZE free() purgatory is scanned and all unpinned objects + are freed. Pins are used to solve ABA problem. To use pins one must obey a pinning protocol: 1. Let's assume that PTR is a shared pointer to an object. Shared means - that any thread may modify it anytime to point to a different object and - free the old object. Later the freed object may be potentially allocated - by another thread. If we're unlucky that another thread may set PTR to - point to this object again. This is ABA problem. + that any thread may modify it anytime to point to a different object + and free the old object. Later the freed object may be potentially + allocated by another thread. If we're unlucky that another thread may + set PTR to point to this object again. This is ABA problem. 2. Create a local pointer LOCAL_PTR. 3. Pin the PTR in a loop: do @@ -42,31 +44,31 @@ LOCAL_PTR= PTR; pin(PTR, PIN_NUMBER); } while (LOCAL_PTR != PTR) - 4. It is guaranteed that after the loop is ended, LOCAL_PTR + 4. It is guaranteed that after the loop has ended, LOCAL_PTR points to an object (or NULL, if PTR may be NULL), that will never be freed. It is not guaranteed though - that LOCAL_PTR == PTR + that LOCAL_PTR == PTR (as PTR can change any time) 5. When done working with the object, remove the pin: unpin(PIN_NUMBER) - 6. When copying pins (as in the list: + 6. When copying pins (as in the list traversing loop: + pin(CUR, 1); while () { - pin(CUR, 0); - do - { - NEXT=CUR->next; - pin(NEXT, 1); - } while (NEXT != CUR->next); + do // standard + { // pinning + NEXT=CUR->next; // loop + pin(NEXT, 0); // see #3 + } while (NEXT != CUR->next); // above ... ... - pin(CUR, 1); CUR=NEXT; + pin(CUR, 1); // copy pin[0] to pin[1] } - which keeps CUR address constantly pinned), note than pins may be copied - only upwards (!!!), that is pin N to pin M > N. - 7. Don't keep the object pinned longer than necessary - the number of pins - you have is limited (and small), keeping an object pinned prevents its - reuse and cause unnecessary mallocs. + which keeps CUR address constantly pinned), note than pins may be + copied only upwards (!!!), that is pin[N] to pin[M], M > N. + 7. Don't keep the object pinned longer than necessary - the number of + pins you have is limited (and small), keeping an object pinned + prevents its reuse and cause unnecessary mallocs. Implementation details: Pins are given away from a "pinbox". Pinbox is stack-based allocator. @@ -85,7 +87,7 @@ static void _lf_pinbox_real_free(LF_PINS *pins); /* - Initialize a pinbox. Must be usually called from lf_alloc_init. + Initialize a pinbox. Normally called from lf_alloc_init. See the latter for details. */ void lf_pinbox_init(LF_PINBOX *pinbox, uint free_ptr_offset, @@ -214,9 +216,9 @@ static int ptr_cmp(void **a, void **b) */ void _lf_pinbox_free(LF_PINS *pins, void *addr) { + add_to_purgatory(pins, addr); if (pins->purgatory_count % LF_PURGATORY_SIZE) _lf_pinbox_real_free(pins); - add_to_purgatory(pins, addr); } struct st_harvester { diff --git a/mysys/lf_dynarray.c b/mysys/lf_dynarray.c index ade1c28d51c..a7a4968ddd8 100644 --- a/mysys/lf_dynarray.c +++ b/mysys/lf_dynarray.c @@ -26,10 +26,15 @@ Every element is aligned to sizeof(element) boundary (to avoid false sharing if element is big enough). + LF_DYNARRAY is a recursive structure. On the zero level + LF_DYNARRAY::level[0] it's an array of LF_DYNARRAY_LEVEL_LENGTH elements, + on the first level it's an array of LF_DYNARRAY_LEVEL_LENGTH pointers + to arrays of elements, on the second level it's an array of pointers + to arrays of pointers to arrays of elements. And so on. + Actually, it's wait-free, not lock-free ;-) */ -#undef DBUG_OFF #include #include #include @@ -75,6 +80,10 @@ static const int dynarray_idxes_in_prev_level[LF_DYNARRAY_LEVELS]= LF_DYNARRAY_LEVEL_LENGTH }; +/* + Returns a valid lvalue pointer to the element number 'idx'. + Allocates memory if necessary. +*/ void *_lf_dynarray_lvalue(LF_DYNARRAY *array, uint idx) { void * ptr, * volatile * ptr_ptr= 0; @@ -123,6 +132,10 @@ void *_lf_dynarray_lvalue(LF_DYNARRAY *array, uint idx) return ptr + array->size_of_element * idx; } +/* + Returns a pointer to the element number 'idx' + or NULL if an element does not exists +*/ void *_lf_dynarray_value(LF_DYNARRAY *array, uint idx) { void * ptr, * volatile * ptr_ptr= 0; @@ -157,6 +170,16 @@ static int recursive_iterate(LF_DYNARRAY *array, void *ptr, int level, return 0; } +/* + Calls func(array, arg) on every array of LF_DYNARRAY_LEVEL_LENGTH elements + in lf_dynarray. + + DESCRIPTION + lf_dynarray consists of a set of arrays, LF_DYNARRAY_LEVEL_LENGTH elements + each. _lf_dynarray_iterate() calls user-supplied function on every array + from the set. It is the fastest way to scan the array, faster than + for (i=0; i < N; i++) { func(_lf_dynarray_value(dynarray, i)); } +*/ int _lf_dynarray_iterate(LF_DYNARRAY *array, lf_dynarray_func func, void *arg) { int i, res; diff --git a/mysys/lf_hash.c b/mysys/lf_hash.c index 45b45f7531e..66ad672f345 100644 --- a/mysys/lf_hash.c +++ b/mysys/lf_hash.c @@ -29,22 +29,35 @@ LF_REQUIRE_PINS(3); +/* An element of the list */ typedef struct { - intptr volatile link; - uint32 hashnr; + intptr volatile link; /* a pointer to the next element in a listand a flag */ + uint32 hashnr; /* reversed hash number, for sorting */ const uchar *key; uint keylen; } LF_SLIST; +/* + a structure to pass the context (pointers two the three successive elements + in a list) from lfind to linsert/ldelete +*/ typedef struct { intptr volatile *prev; LF_SLIST *curr, *next; } CURSOR; +/* + the last bit in LF_SLIST::link is a "deleted" flag. + the helper macros below convert it to a pure pointer or a pure flag +*/ #define PTR(V) (LF_SLIST *)((V) & (~(intptr)1)) #define DELETED(V) ((V) & 1) /* + DESCRIPTION + Search for hashnr/key/keylen in the list starting from 'head' and + position the cursor. The list is ORDER BY hashnr, key + RETURN 0 - not found 1 - found @@ -53,7 +66,7 @@ typedef struct { cursor is positioned in either case pins[0..2] are used, they are NOT removed on return */ -static int lfind(LF_SLIST * volatile *head, uint32 hashnr, +static int lfind(LF_SLIST * volatile *head, CHARSET_INFO *cs, uint32 hashnr, const uchar *key, uint keylen, CURSOR *cursor, LF_PINS *pins) { uint32 cur_hashnr; @@ -89,7 +102,8 @@ retry: if (cur_hashnr >= hashnr) { int r=1; - if (cur_hashnr > hashnr || (r=memcmp(cur_key, key, keylen)) >= 0) + if (cur_hashnr > hashnr || + (r=my_strnncoll(cs, cur_key, cur_keylen, key, keylen)) >= 0) return !r; } cursor->prev=&(cursor->curr->link); @@ -112,22 +126,26 @@ retry: } /* + DESCRIPTION + insert a 'node' in the list that starts from 'head' in the correct + position (as found by lfind) + RETURN 0 - inserted - not 0 - a pointer to a conflict (not pinned and thus unusable) + not 0 - a pointer to a duplicate (not pinned and thus unusable) NOTE it uses pins[0..2], on return all pins are removed. */ -static LF_SLIST *linsert(LF_SLIST * volatile *head, LF_SLIST *node, - LF_PINS *pins, uint flags) +static LF_SLIST *linsert(LF_SLIST * volatile *head, CHARSET_INFO *cs, + LF_SLIST *node, LF_PINS *pins, uint flags) { CURSOR cursor; int res=-1; do { - if (lfind(head, node->hashnr, node->key, node->keylen, + if (lfind(head, cs, node->hashnr, node->key, node->keylen, &cursor, pins) && (flags & LF_HASH_UNIQUE)) res=0; /* duplicate found */ @@ -147,13 +165,18 @@ static LF_SLIST *linsert(LF_SLIST * volatile *head, LF_SLIST *node, } /* + DESCRIPTION + deletes a node as identified by hashnr/keey/keylen from the list + that starts from 'head' + RETURN 0 - ok 1 - not found + NOTE it uses pins[0..2], on return all pins are removed. */ -static int ldelete(LF_SLIST * volatile *head, uint32 hashnr, +static int ldelete(LF_SLIST * volatile *head, CHARSET_INFO *cs, uint32 hashnr, const uchar *key, uint keylen, LF_PINS *pins) { CURSOR cursor; @@ -161,7 +184,7 @@ static int ldelete(LF_SLIST * volatile *head, uint32 hashnr, do { - if (!lfind(head, hashnr, key, keylen, &cursor, pins)) + if (!lfind(head, cs, hashnr, key, keylen, &cursor, pins)) res= 1; else if (my_atomic_casptr((void **)&(cursor.curr->link), @@ -171,7 +194,7 @@ static int ldelete(LF_SLIST * volatile *head, uint32 hashnr, (void **)&cursor.curr, cursor.next)) _lf_alloc_free(pins, cursor.curr); else - lfind(head, hashnr, key, keylen, &cursor, pins); + lfind(head, cs, hashnr, key, keylen, &cursor, pins); res= 0; } } while (res == -1); @@ -182,18 +205,24 @@ static int ldelete(LF_SLIST * volatile *head, uint32 hashnr, } /* + DESCRIPTION + searches for a node as identified by hashnr/keey/keylen in the list + that starts from 'head' + RETURN 0 - not found node - found + NOTE it uses pins[0..2], on return the pin[2] keeps the node found all other pins are removed. */ -static LF_SLIST *lsearch(LF_SLIST * volatile *head, uint32 hashnr, - const uchar *key, uint keylen, LF_PINS *pins) +static LF_SLIST *lsearch(LF_SLIST * volatile *head, CHARSET_INFO *cs, + uint32 hashnr, const uchar *key, uint keylen, + LF_PINS *pins) { CURSOR cursor; - int res=lfind(head, hashnr, key, keylen, &cursor, pins); + int res=lfind(head, cs, hashnr, key, keylen, &cursor, pins); if (res) _lf_pin(pins, 2, cursor.curr); _lf_unpin(pins, 0); _lf_unpin(pins, 1); @@ -219,6 +248,9 @@ static inline uint calc_hash(LF_HASH *hash, const uchar *key, uint keylen) #define MAX_LOAD 1.0 static void initialize_bucket(LF_HASH *, LF_SLIST * volatile*, uint, LF_PINS *); +/* + Initializes lf_hash, the arguments are compatible with hash_init +*/ void lf_hash_init(LF_HASH *hash, uint element_size, uint flags, uint key_offset, uint key_length, hash_get_key get_key, CHARSET_INFO *charset) @@ -254,9 +286,14 @@ void lf_hash_destroy(LF_HASH *hash) } /* + DESCRIPTION + inserts a new element to a hash. it will have a _copy_ of + data, not a pointer to it. + RETURN 0 - inserted 1 - didn't (unique key conflict) + NOTE see linsert() for pin usage notes */ @@ -275,7 +312,7 @@ int lf_hash_insert(LF_HASH *hash, LF_PINS *pins, const void *data) if (*el == NULL) initialize_bucket(hash, el, bucket, pins); node->hashnr=my_reverse_bits(hashnr) | 1; - if (linsert(el, node, pins, hash->flags)) + if (linsert(el, hash->charset, node, pins, hash->flags)) { _lf_alloc_free(pins, node); lf_rwunlock_by_pins(pins); @@ -305,7 +342,8 @@ int lf_hash_delete(LF_HASH *hash, LF_PINS *pins, const void *key, uint keylen) el=_lf_dynarray_lvalue(&hash->array, bucket); if (*el == NULL) initialize_bucket(hash, el, bucket, pins); - if (ldelete(el, my_reverse_bits(hashnr) | 1, (uchar *)key, keylen, pins)) + if (ldelete(el, hash->charset, my_reverse_bits(hashnr) | 1, + (uchar *)key, keylen, pins)) { lf_rwunlock_by_pins(pins); return 1; @@ -329,7 +367,8 @@ void *lf_hash_search(LF_HASH *hash, LF_PINS *pins, const void *key, uint keylen) el=_lf_dynarray_lvalue(&hash->array, bucket); if (*el == NULL) initialize_bucket(hash, el, bucket, pins); - found= lsearch(el, my_reverse_bits(hashnr) | 1, (uchar *)key, keylen, pins); + found= lsearch(el, hash->charset, my_reverse_bits(hashnr) | 1, + (uchar *)key, keylen, pins); lf_rwunlock_by_pins(pins); return found ? found+1 : 0; } @@ -348,7 +387,7 @@ static void initialize_bucket(LF_HASH *hash, LF_SLIST * volatile *node, dummy->hashnr=my_reverse_bits(bucket); dummy->key=dummy_key; dummy->keylen=0; - if ((cur= linsert(el, dummy, pins, 0))) + if ((cur= linsert(el, hash->charset, dummy, pins, 0))) { my_free((void *)dummy, MYF(0)); dummy= cur; -- cgit v1.2.1 From fe382a9fe18d4a76ab01bf9d7a98823cd1fc30ff Mon Sep 17 00:00:00 2001 From: unknown Date: Mon, 30 Oct 2006 16:58:18 +0100 Subject: fix dynarray_idxes_in_prev_level[] values --- mysys/lf_dynarray.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'mysys') diff --git a/mysys/lf_dynarray.c b/mysys/lf_dynarray.c index a7a4968ddd8..d63af91813e 100644 --- a/mysys/lf_dynarray.c +++ b/mysys/lf_dynarray.c @@ -71,13 +71,15 @@ void lf_dynarray_destroy(LF_DYNARRAY *array) bzero(array, sizeof(*array)); } -static const int dynarray_idxes_in_prev_level[LF_DYNARRAY_LEVELS]= +static const long dynarray_idxes_in_prev_level[LF_DYNARRAY_LEVELS]= { 0, /* +1 here to to avoid -1's below */ LF_DYNARRAY_LEVEL_LENGTH, - LF_DYNARRAY_LEVEL_LENGTH * LF_DYNARRAY_LEVEL_LENGTH, + LF_DYNARRAY_LEVEL_LENGTH * LF_DYNARRAY_LEVEL_LENGTH + + LF_DYNARRAY_LEVEL_LENGTH, LF_DYNARRAY_LEVEL_LENGTH * LF_DYNARRAY_LEVEL_LENGTH * - LF_DYNARRAY_LEVEL_LENGTH + LF_DYNARRAY_LEVEL_LENGTH + LF_DYNARRAY_LEVEL_LENGTH * + LF_DYNARRAY_LEVEL_LENGTH + LF_DYNARRAY_LEVEL_LENGTH }; /* -- cgit v1.2.1 From e5858e1fb429617bf16f94031b660a3694e2acdd Mon Sep 17 00:00:00 2001 From: unknown Date: Mon, 13 Nov 2006 00:26:29 +0200 Subject: postreview fixes fixed bug in the pagecache which lead to assertion in multithread test include/pagecache.h: post review fixes: - comments fixed - types fixed - keyword 'extern' added to all interface functions mysys/mf_pagecache.c: postreview fixes: - comments fixed and added - types fixed - typo fixed Added write locking flag set when we took a block from LRU and going to free it (to prevent locking/using it for write) mysys/test_pagecache_consist.c: pagecache size reduced (to be able reproduce problems found by Guilhem) typo in the comment fixed --- mysys/mf_pagecache.c | 190 ++++++++++++++++++++++++++++++----------- mysys/test_pagecache_consist.c | 4 +- 2 files changed, 144 insertions(+), 50 deletions(-) (limited to 'mysys') diff --git a/mysys/mf_pagecache.c b/mysys/mf_pagecache.c index 4fa814d8188..e1ccab9e06f 100755 --- a/mysys/mf_pagecache.c +++ b/mysys/mf_pagecache.c @@ -1,4 +1,4 @@ -/* Copyright (C) 2000 MySQL AB +/* Copyright (C) 2000-2006 MySQL AB This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -26,11 +26,11 @@ When a new block is required it is first tried to pop one from the stack. If the stack is empty, it is tried to get a never-used block from the pool. If this is empty too, then a block is taken from the LRU ring, flushing it - to disk, if neccessary. This is handled in find_key_block(). + to disk, if necessary. This is handled in find_key_block(). With the new free list, the blocks can have three temperatures: hot, warm and cold (which is free). This is remembered in the block header by the enum BLOCK_TEMPERATURE temperature variable. Remembering the - temperature is neccessary to correctly count the number of warm blocks, + temperature is necessary to correctly count the number of warm blocks, which is required to decide when blocks are allowed to become hot. Whenever a block is inserted to another (sub-)chain, we take the old and new temperature into account to decide if we got one more or less warm block. @@ -93,16 +93,22 @@ */ #define SERIALIZED_READ_FROM_CACHE yes -#define BLOCK_INFO(B) DBUG_PRINT("info", \ - ("block 0x%lx, file %lu, page %lu, s %0x", \ - (ulong)(B), \ - (ulong)((B)->hash_link ? \ - (B)->hash_link->file.file : \ - 0), \ - (ulong)((B)->hash_link ? \ - (B)->hash_link->pageno : \ - 0), \ - (B)->status)) +#define BLOCK_INFO(B) \ + DBUG_PRINT("info", \ + ("block 0x%lx, file %lu, page %lu, s %0x, hshL 0x%lx, req %u/%u", \ + (ulong)(B), \ + (ulong)((B)->hash_link ? \ + (B)->hash_link->file.file : \ + 0), \ + (ulong)((B)->hash_link ? \ + (B)->hash_link->pageno : \ + 0), \ + (B)->status, \ + (ulong)(B)->hash_link, \ + (uint) (B)->requests, \ + (uint)((B)->hash_link ? \ + (B)->hash_link->requests : \ + 0))) /* TODO: put it to my_static.c */ my_bool my_disable_flush_pagecache_blocks= 0; @@ -147,7 +153,7 @@ struct st_pagecache_hash_link }; /* simple states of a block */ -#define BLOCK_ERROR 1 /* an error occured when performing disk i/o */ +#define BLOCK_ERROR 1 /* an error occurred when performing disk i/o */ #define BLOCK_READ 2 /* the is page in the block buffer */ #define BLOCK_IN_SWITCH 4 /* block is preparing to read new page */ #define BLOCK_REASSIGNED 8 /* block does not accept requests for old page */ @@ -211,7 +217,20 @@ typedef struct st_pagecache_lock_info struct st_my_thread_var *thread; my_bool write_lock; } PAGECACHE_LOCK_INFO; -/* service functions */ + + +/* service functions maintain debugging info about pin & lock */ + + +/* + Links information about thread pinned/locked the block to the list + + SYNOPSIS + info_link() + list the list to link in + node the node which should be linked +*/ + void info_link(PAGECACHE_PIN_INFO **list, PAGECACHE_PIN_INFO *node) { if ((node->next= *list)) @@ -219,11 +238,38 @@ void info_link(PAGECACHE_PIN_INFO **list, PAGECACHE_PIN_INFO *node) *list= node; node->prev= list; } + + +/* + Unlinks information about thread pinned/locked the block from the list + + SYNOPSIS + info_unlink() + node the node which should be unlinked +*/ + void info_unlink(PAGECACHE_PIN_INFO *node) { if ((*node->prev= node->next)) node->next->prev= node->prev; } + + +/* + Finds information about given thread in the list of threads which + pinned/locked this block. + + SYNOPSIS + info_find() + list the list where to find the thread + thread thread ID (reference to the st_my_thread_var + of the thread) + + RETURN + 0 - the thread was not found + pointer to the information node of the thread in the list +*/ + PAGECACHE_PIN_INFO *info_find(PAGECACHE_PIN_INFO *list, struct st_my_thread_var *thread) { @@ -263,8 +309,8 @@ struct st_pagecache_block_link #ifdef PAGECACHE_DEBUG /* debug checks */ -bool info_check_pin(PAGECACHE_BLOCK_LINK *block, - enum pagecache_page_pin mode) +my_bool info_check_pin(PAGECACHE_BLOCK_LINK *block, + enum pagecache_page_pin mode) { struct st_my_thread_var *thread= my_thread_var; DBUG_ENTER("info_check_pin"); @@ -305,9 +351,10 @@ bool info_check_pin(PAGECACHE_BLOCK_LINK *block, } DBUG_RETURN(0); } -bool info_check_lock(PAGECACHE_BLOCK_LINK *block, - enum pagecache_page_lock lock, - enum pagecache_page_pin pin) + +my_bool info_check_lock(PAGECACHE_BLOCK_LINK *block, + enum pagecache_page_lock lock, + enum pagecache_page_pin pin) { struct st_my_thread_var *thread= my_thread_var; DBUG_ENTER("info_check_lock"); @@ -328,7 +375,7 @@ bool info_check_lock(PAGECACHE_BLOCK_LINK *block, break; case PAGECACHE_LOCK_LEFT_READLOCKED: DBUG_ASSERT(pin == PAGECACHE_PIN_LEFT_UNPINNED || - pin == PAGECACHE_PIN_LEFT_UNPINNED); + pin == PAGECACHE_PIN_LEFT_PINNED); if (info == 0 || info->write_lock) { DBUG_PRINT("info", @@ -650,6 +697,7 @@ int init_pagecache(PAGECACHE *pagecache, my_size_t use_mem, pagecache->shift= my_bit_log2(block_size); DBUG_PRINT("info", ("block_size: %u", block_size)); + DBUG_ASSERT((1 << pagecache->shift) == block_size); blocks= (int) (use_mem / (sizeof(PAGECACHE_BLOCK_LINK) + 2 * sizeof(PAGECACHE_HASH_LINK) + @@ -755,7 +803,7 @@ int init_pagecache(PAGECACHE *pagecache, my_size_t use_mem, } pagecache->blocks= pagecache->disk_blocks > 0 ? pagecache->disk_blocks : 0; - DBUG_RETURN((uint) pagecache->disk_blocks); + DBUG_RETURN((uint) pagecache->blocks); err: error= my_errno; @@ -945,7 +993,7 @@ void change_pagecache_param(PAGECACHE *pagecache, uint division_limit, /* - Remove page cache from memory + Flushes and removes page cache from memory SYNOPSIS end_pagecache() @@ -1240,7 +1288,7 @@ static void link_block(PAGECACHE *pagecache, PAGECACHE_BLOCK_LINK *block, my_bool hot, my_bool at_end) { PAGECACHE_BLOCK_LINK *ins; - PAGECACHE_BLOCK_LINK **pins; + PAGECACHE_BLOCK_LINK **ptr_ins; KEYCACHE_DBUG_ASSERT(! (block->hash_link && block->hash_link->requests)); #ifdef THREAD @@ -1285,8 +1333,8 @@ static void link_block(PAGECACHE *pagecache, PAGECACHE_BLOCK_LINK *block, KEYCACHE_DBUG_ASSERT(! (!hot && pagecache->waiting_for_block.last_thread)); /* Condition not transformed using DeMorgan, to keep the text identical */ #endif /* THREAD */ - pins= hot ? &pagecache->used_ins : &pagecache->used_last; - ins= *pins; + ptr_ins= hot ? &pagecache->used_ins : &pagecache->used_last; + ins= *ptr_ins; if (ins) { ins->next_used->prev_used= &block->next_used; @@ -1294,7 +1342,7 @@ static void link_block(PAGECACHE *pagecache, PAGECACHE_BLOCK_LINK *block, block->prev_used= &ins->next_used; ins->next_used= block; if (at_end) - *pins= block; + *ptr_ins= block; } else { @@ -1363,6 +1411,16 @@ static void unlink_block(PAGECACHE *pagecache, PAGECACHE_BLOCK_LINK *block) /* Register requests for a block + + SYNOPSIS + reg_requests() + pagecache this page cache reference + block the block we request reference + count how many requests we register (it is 1 everywhere) + + NOTE + Registration of request means we are going to use this block so we exclude + it from the LRU if it is first request */ static void reg_requests(PAGECACHE *pagecache, PAGECACHE_BLOCK_LINK *block, int count) @@ -1375,7 +1433,7 @@ static void reg_requests(PAGECACHE *pagecache, PAGECACHE_BLOCK_LINK *block, if (! block->requests) /* First request for the block unlinks it */ unlink_block(pagecache, block); - block->requests+=count; + block->requests+= count; DBUG_VOID_RETURN; } @@ -1461,8 +1519,11 @@ static void unreg_request(PAGECACHE *pagecache, static inline void remove_reader(PAGECACHE_BLOCK_LINK *block) { + DBUG_ENTER("remove_reader"); + BLOCK_INFO(block); if (! --block->hash_link->requests && block->condvar) pagecache_pthread_cond_signal(block->condvar); + DBUG_VOID_RETURN; } @@ -1563,8 +1624,21 @@ static void unlink_hash(PAGECACHE *pagecache, PAGECACHE_HASH_LINK *hash_link) hash_link->next= pagecache->free_hash_list; pagecache->free_hash_list= hash_link; } + + /* - Get the hash link for the page if it is inthe cache + Get the hash link for the page if it is in the cache + + SYNOPSIS + get_present_hash_link() + pagecache Pagecache reference + file file ID + pageno page number in the file + start where to put pointer to found hash link (for + direct referring it) + + RETURN + found hashlink pointer */ static PAGECACHE_HASH_LINK *get_present_hash_link(PAGECACHE *pagecache, @@ -1766,8 +1840,8 @@ restart: /* Remove block to invalidate the page in the block buffer as we are going to write directly on disk. - Although we have an exlusive lock for the updated key part - the control can be yieded by the current thread as we might + Although we have an exclusive lock for the updated key part + the control can be yielded by the current thread as we might have unfinished readers of other key parts in the block buffer. Still we are guaranteed not to have any readers of the key part we are writing into until the block is @@ -1777,7 +1851,7 @@ restart: free_block(pagecache, block); return 0; } - /* Wait intil the page is flushed on disk */ + /* Wait until the page is flushed on disk */ hash_link->requests--; { #ifdef THREAD @@ -1943,12 +2017,17 @@ restart: reg_requests(pagecache, block,1); hash_link->block= block; } + else + { + DBUG_ASSERT((block->status & BLOCK_WRLOCK) == 0); + } if (block->hash_link != hash_link && ! (block->status & BLOCK_IN_SWITCH) ) { /* this is a primary request for a new page */ - block->status|= BLOCK_IN_SWITCH; + DBUG_ASSERT((block->status & BLOCK_WRLOCK) == 0); + block->status|= (BLOCK_IN_SWITCH | BLOCK_WRLOCK); KEYCACHE_DBUG_PRINT("find_key_block", ("got block %u for new page", @@ -1995,7 +2074,7 @@ restart: } link_to_file_list(pagecache, block, file, (my_bool)(block->hash_link ? 1 : 0)); - DBUG_ASSERT((block->status & BLOCK_WRLOCK) == 0); + BLOCK_INFO(block); block->status= error? BLOCK_ERROR : 0; #ifndef DBUG_OFF block->type= PAGECACHE_EMPTY_PAGE; @@ -2094,7 +2173,7 @@ void pagecache_remove_pin(PAGECACHE_BLOCK_LINK *block) DBUG_VOID_RETURN; } #ifdef PAGECACHE_DEBUG -void pagecache_add_lock(PAGECACHE_BLOCK_LINK *block, bool wl) +void pagecache_add_lock(PAGECACHE_BLOCK_LINK *block, my_bool wl) { PAGECACHE_LOCK_INFO *info= (PAGECACHE_LOCK_INFO *)my_malloc(sizeof(PAGECACHE_LOCK_INFO), MYF(0)); @@ -2112,7 +2191,7 @@ void pagecache_remove_lock(PAGECACHE_BLOCK_LINK *block) info_unlink((PAGECACHE_PIN_INFO *)info); my_free((gptr)info, MYF(0)); } -void pagecache_change_lock(PAGECACHE_BLOCK_LINK *block, bool wl) +void pagecache_change_lock(PAGECACHE_BLOCK_LINK *block, my_bool wl) { PAGECACHE_LOCK_INFO *info= (PAGECACHE_LOCK_INFO *)info_find((PAGECACHE_PIN_INFO *)block->lock_list, @@ -2144,7 +2223,7 @@ my_bool pagecache_lock_block(PAGECACHE *pagecache, { DBUG_ENTER("pagecache_lock_block"); BLOCK_INFO(block); - if (block->status & BLOCK_WRLOCK) + while (block->status & BLOCK_WRLOCK) { DBUG_PRINT("info", ("fail to lock, waiting...")); /* Lock failed we will wait */ @@ -2168,6 +2247,7 @@ my_bool pagecache_lock_block(PAGECACHE *pagecache, } /* we are doing it by global cache mutex protectio, so it is OK */ block->status|= BLOCK_WRLOCK; + DBUG_PRINT("info", ("WR lock set, block 0x%lx", (ulong)block)); DBUG_RETURN(0); } @@ -2177,6 +2257,7 @@ void pagecache_unlock_block(PAGECACHE_BLOCK_LINK *block) BLOCK_INFO(block); DBUG_ASSERT(block->status & BLOCK_WRLOCK); block->status&= ~BLOCK_WRLOCK; + DBUG_PRINT("info", ("WR lock reset, block 0x%lx", (ulong)block)); #ifdef THREAD /* release all threads waiting for write lock */ if (block->wqueue[COND_FOR_WRLOCK].last_thread) @@ -2434,6 +2515,7 @@ void pagecache_unlock_page(PAGECACHE *pagecache, inc_counter_for_resize_op(pagecache); block= find_key_block(pagecache, file, pageno, 0, 0, 0, &page_st); + BLOCK_INFO(block); DBUG_ASSERT(block != 0 && page_st == PAGE_READ); if (stamp_this_page) { @@ -2776,17 +2858,18 @@ restart: inc_counter_for_resize_op(pagecache); pagecache->global_cache_r_requests++; - block= find_key_block(pagecache, file, pageno, level, 0, - (((pin == PAGECACHE_PIN_LEFT_PINNED) || - (pin == PAGECACHE_UNPIN)) ? 0 : 1), - &page_st); + block= find_key_block(pagecache, file, pageno, level, + ((lock == PAGECACHE_LOCK_WRITE) ? 1 : 0), + (((pin == PAGECACHE_PIN_LEFT_PINNED) || + (pin == PAGECACHE_UNPIN)) ? 0 : 1), + &page_st); DBUG_ASSERT(block->type == PAGECACHE_EMPTY_PAGE || block->type == type); block->type= type; if (pagecache_make_lock_and_pin(pagecache, block, lock, pin)) { /* - We failed to writelock the block, cache is unlocked, and last write + We failed to write lock the block, cache is unlocked, and last write lock is released, we will try to get the block again. */ pagecache_pthread_mutex_unlock(&pagecache->cache_lock); @@ -3095,11 +3178,16 @@ restart: inc_counter_for_resize_op(pagecache); pagecache->global_cache_w_requests++; - block= find_key_block(pagecache, file, pageno, level, - (write_mode == PAGECACHE_WRITE_DONE ? 0 : 1), - (((pin == PAGECACHE_PIN_LEFT_PINNED) || - (pin == PAGECACHE_UNPIN)) ? 0 : 1), - &page_st); + { + int need_wrlock= (write_mode != PAGECACHE_WRITE_DONE && + lock != PAGECACHE_LOCK_LEFT_WRITELOCKED && + lock != PAGECACHE_LOCK_WRITE_UNLOCK && + lock != PAGECACHE_LOCK_WRITE_TO_READ); + block= find_key_block(pagecache, file, pageno, level, + (need_wrlock ? 1 : 0), + (need_wrlock ? 1 : 0), + &page_st); + } if (!block) { DBUG_ASSERT(write_mode != PAGECACHE_WRITE_DONE); @@ -3178,9 +3266,15 @@ restart: } /* Unregister the request */ + block->hash_link->requests--; if (pin != PAGECACHE_PIN_LEFT_PINNED && pin != PAGECACHE_PIN) - unreg_request(pagecache, block, 1); + { + if (write_mode != PAGECACHE_WRITE_DONE) + { + unreg_request(pagecache, block, 1); + } + } else *link= (PAGECACHE_PAGE_LINK)block; diff --git a/mysys/test_pagecache_consist.c b/mysys/test_pagecache_consist.c index 1cc54af2460..86698fbb860 100755 --- a/mysys/test_pagecache_consist.c +++ b/mysys/test_pagecache_consist.c @@ -11,7 +11,7 @@ #include /*#define PAGE_SIZE 65536*/ -#define PCACHE_SIZE (PAGE_SIZE*1024*20) +#define PCACHE_SIZE (PAGE_SIZE*1024*8) #ifndef DBUG_OFF static const char* default_dbug_option; @@ -59,7 +59,7 @@ static uint flush_divider= 1000; #endif /*TEST_HIGH_CONCURENCY*/ -/* check page consistemcy */ +/* check page consistency */ uint check_page(uchar *buff, ulong offset, int page_locked, int page_no, int tag) { -- cgit v1.2.1 From 915cebdd53fe5071dc9443a236a798764f504c22 Mon Sep 17 00:00:00 2001 From: unknown Date: Thu, 16 Nov 2006 15:40:08 +0100 Subject: post-review fixes. tablockman: fixed a bug in finding a blocker lock mysys/my_getsystime.c: this is no longer true storage/maria/lockman.h: post-review fixes storage/maria/tablockman.h: post-review fixes storage/maria/unittest/lockman-t.c: post-review fixes storage/maria/unittest/lockman1-t.c: post-review fixes storage/maria/unittest/lockman2-t.c: post-review fixes include/my_atomic.h: moved intptr definition to my_global.h storage/maria/tablockman.c: post-review fixes BUILD/SETUP.sh: add -DMY_LF_EXTRA_DEBUG to debug builds include/atomic/nolock.h: suppress warning include/my_global.h: suppress warning mysys/lf_alloc-pin.c: post-review fixes mysys/lf_dynarray.c: post-review fixes mysys/lf_hash.c: post-review fixes storage/maria/trnman.c: suppress warning include/lf.h: post-review fix --- mysys/lf_alloc-pin.c | 29 ++++++------ mysys/lf_dynarray.c | 35 ++++++++++----- mysys/lf_hash.c | 119 +++++++++++++++++++++++++------------------------- mysys/my_getsystime.c | 4 -- 4 files changed, 98 insertions(+), 89 deletions(-) (limited to 'mysys') diff --git a/mysys/lf_alloc-pin.c b/mysys/lf_alloc-pin.c index b96fe42311b..43055766c3e 100644 --- a/mysys/lf_alloc-pin.c +++ b/mysys/lf_alloc-pin.c @@ -91,7 +91,7 @@ static void _lf_pinbox_real_free(LF_PINS *pins); See the latter for details. */ void lf_pinbox_init(LF_PINBOX *pinbox, uint free_ptr_offset, - lf_pinbox_free_func *free_func,void *free_func_arg) + lf_pinbox_free_func *free_func, void *free_func_arg) { DBUG_ASSERT(sizeof(LF_PINS) == 128); DBUG_ASSERT(free_ptr_offset % sizeof(void *) == 0); @@ -306,7 +306,7 @@ static void _lf_pinbox_real_free(LF_PINS *pins) { if (addr) /* use binary search */ { - void **a,**b,**c; + void **a, **b, **c; for (a= addr, b= addr+npins-1, c= a+(b-a)/2; b-a>1; c= a+(b-a)/2) if (cur == *c) a= b= c; @@ -337,13 +337,13 @@ found: callback for _lf_pinbox_real_free to free an unpinned object - add it back to the allocator stack */ -static void alloc_free(void *node, LF_ALLOCATOR *allocator) +static void alloc_free(struct st_lf_alloc_node *node, LF_ALLOCATOR *allocator) { - void *tmp; + struct st_lf_alloc_node *tmp; tmp= allocator->top; do { - (*(void **)node)= tmp; + node->next= tmp; } while (!my_atomic_casptr((void **)&allocator->top, (void **)&tmp, node) && LF_BACKOFF); } @@ -379,12 +379,12 @@ void lf_alloc_init(LF_ALLOCATOR *allocator, uint size, uint free_ptr_offset) */ void lf_alloc_destroy(LF_ALLOCATOR *allocator) { - void *el= allocator->top; - while (el) + struct st_lf_alloc_node *node= allocator->top; + while (node) { - void *tmp= *(void **)el; - my_free(el, MYF(0)); - el= tmp; + struct st_lf_alloc_node *tmp= node->next; + my_free((void *)node, MYF(0)); + node= tmp; } lf_pinbox_destroy(&allocator->pinbox); allocator->top= 0; @@ -400,7 +400,7 @@ void lf_alloc_destroy(LF_ALLOCATOR *allocator) void *_lf_alloc_new(LF_PINS *pins) { LF_ALLOCATOR *allocator= (LF_ALLOCATOR *)(pins->pinbox->free_func_arg); - void *node; + struct st_lf_alloc_node *node; for (;;) { do @@ -410,7 +410,8 @@ void *_lf_alloc_new(LF_PINS *pins) } while (node != allocator->top && LF_BACKOFF); if (!node) { - if (!(node= my_malloc(allocator->element_size, MYF(MY_WME|MY_ZEROFILL)))) + if (!(node= (void *)my_malloc(allocator->element_size, + MYF(MY_WME|MY_ZEROFILL)))) break; #ifdef MY_LF_EXTRA_DEBUG my_atomic_add32(&allocator->mallocs, 1); @@ -434,8 +435,8 @@ void *_lf_alloc_new(LF_PINS *pins) uint lf_alloc_in_pool(LF_ALLOCATOR *allocator) { uint i; - void *node; - for (node= allocator->top, i= 0; node; node= *(void **)node, i++) + struct st_lf_alloc_node *node; + for (node= allocator->top, i= 0; node; node= node->next, i++) /* no op */; return i; } diff --git a/mysys/lf_dynarray.c b/mysys/lf_dynarray.c index d63af91813e..c6dd654bf03 100644 --- a/mysys/lf_dynarray.c +++ b/mysys/lf_dynarray.c @@ -19,9 +19,9 @@ (so no pointer into the array may ever become invalid). Memory is allocated in non-contiguous chunks. - This data structure is not space efficient for sparce arrays. + This data structure is not space efficient for sparse arrays. - The number of elements is limited to 2^16 + The number of elements is limited to 4311810304 Every element is aligned to sizeof(element) boundary (to avoid false sharing if element is big enough). @@ -49,7 +49,8 @@ void lf_dynarray_init(LF_DYNARRAY *array, uint element_size) static void recursive_free(void **alloc, int level) { - if (!alloc) return; + if (!alloc) + return; if (level) { @@ -68,10 +69,9 @@ void lf_dynarray_destroy(LF_DYNARRAY *array) for (i= 0; i < LF_DYNARRAY_LEVELS; i++) recursive_free(array->level[i], i); my_atomic_rwlock_destroy(&array->lock); - bzero(array, sizeof(*array)); } -static const long dynarray_idxes_in_prev_level[LF_DYNARRAY_LEVELS]= +static const ulong dynarray_idxes_in_prev_levels[LF_DYNARRAY_LEVELS]= { 0, /* +1 here to to avoid -1's below */ LF_DYNARRAY_LEVEL_LENGTH, @@ -82,6 +82,15 @@ static const long dynarray_idxes_in_prev_level[LF_DYNARRAY_LEVELS]= LF_DYNARRAY_LEVEL_LENGTH + LF_DYNARRAY_LEVEL_LENGTH }; +static const ulong dynarray_idxes_in_prev_level[LF_DYNARRAY_LEVELS]= +{ + 0, /* +1 here to to avoid -1's below */ + LF_DYNARRAY_LEVEL_LENGTH, + LF_DYNARRAY_LEVEL_LENGTH * LF_DYNARRAY_LEVEL_LENGTH, + LF_DYNARRAY_LEVEL_LENGTH * LF_DYNARRAY_LEVEL_LENGTH * + LF_DYNARRAY_LEVEL_LENGTH, +}; + /* Returns a valid lvalue pointer to the element number 'idx'. Allocates memory if necessary. @@ -91,16 +100,17 @@ void *_lf_dynarray_lvalue(LF_DYNARRAY *array, uint idx) void * ptr, * volatile * ptr_ptr= 0; int i; - for (i= 3; idx < dynarray_idxes_in_prev_level[i]; i--) /* no-op */; + for (i= LF_DYNARRAY_LEVELS-1; idx < dynarray_idxes_in_prev_levels[i]; i--) + /* no-op */; ptr_ptr= &array->level[i]; - idx-= dynarray_idxes_in_prev_level[i]; + idx-= dynarray_idxes_in_prev_levels[i]; for (; i > 0; i--) { if (!(ptr= *ptr_ptr)) { void *alloc= my_malloc(LF_DYNARRAY_LEVEL_LENGTH * sizeof(void *), - MYF(MY_WME|MY_ZEROFILL)); - if (!alloc) + MYF(MY_WME|MY_ZEROFILL)); + if (unlikely(!alloc)) return(NULL); if (my_atomic_casptr(ptr_ptr, &ptr, alloc)) ptr= alloc; @@ -116,7 +126,7 @@ void *_lf_dynarray_lvalue(LF_DYNARRAY *array, uint idx) alloc= my_malloc(LF_DYNARRAY_LEVEL_LENGTH * array->size_of_element + max(array->size_of_element, sizeof(void *)), MYF(MY_WME|MY_ZEROFILL)); - if (!alloc) + if (unlikely(!alloc)) return(NULL); /* reserve the space for free() address */ data= alloc + sizeof(void *); @@ -143,9 +153,10 @@ void *_lf_dynarray_value(LF_DYNARRAY *array, uint idx) void * ptr, * volatile * ptr_ptr= 0; int i; - for (i= 3; idx < dynarray_idxes_in_prev_level[i]; i--) /* no-op */; + for (i= LF_DYNARRAY_LEVELS-1; idx < dynarray_idxes_in_prev_levels[i]; i--) + /* no-op */; ptr_ptr= &array->level[i]; - idx-= dynarray_idxes_in_prev_level[i]; + idx-= dynarray_idxes_in_prev_levels[i]; for (; i > 0; i--) { if (!(ptr= *ptr_ptr)) diff --git a/mysys/lf_hash.c b/mysys/lf_hash.c index 66ad672f345..ff0eb8326d5 100644 --- a/mysys/lf_hash.c +++ b/mysys/lf_hash.c @@ -23,6 +23,7 @@ (but how to do it in lf_hash_delete ?) */ #include +#include #include #include #include @@ -33,7 +34,7 @@ LF_REQUIRE_PINS(3); typedef struct { intptr volatile link; /* a pointer to the next element in a listand a flag */ uint32 hashnr; /* reversed hash number, for sorting */ - const uchar *key; + const byte *key; uint keylen; } LF_SLIST; @@ -67,31 +68,31 @@ typedef struct { pins[0..2] are used, they are NOT removed on return */ static int lfind(LF_SLIST * volatile *head, CHARSET_INFO *cs, uint32 hashnr, - const uchar *key, uint keylen, CURSOR *cursor, LF_PINS *pins) + const byte *key, uint keylen, CURSOR *cursor, LF_PINS *pins) { uint32 cur_hashnr; - const uchar *cur_key; + const byte *cur_key; uint cur_keylen; intptr link; retry: - cursor->prev=(intptr *)head; + cursor->prev= (intptr *)head; do { - cursor->curr=PTR(*cursor->prev); - _lf_pin(pins,1,cursor->curr); + cursor->curr= PTR(*cursor->prev); + _lf_pin(pins, 1, cursor->curr); } while(*cursor->prev != (intptr)cursor->curr && LF_BACKOFF); for (;;) { if (!cursor->curr) return 0; do { // XXX or goto retry ? - link=cursor->curr->link; - cursor->next=PTR(link); + link= cursor->curr->link; + cursor->next= PTR(link); _lf_pin(pins, 0, cursor->next); } while(link != cursor->curr->link && LF_BACKOFF); - cur_hashnr=cursor->curr->hashnr; - cur_key=cursor->curr->key; - cur_keylen=cursor->curr->keylen; + cur_hashnr= cursor->curr->hashnr; + cur_key= cursor->curr->key; + cur_keylen= cursor->curr->keylen; if (*cursor->prev != (intptr)cursor->curr) { LF_BACKOFF; @@ -101,12 +102,12 @@ retry: { if (cur_hashnr >= hashnr) { - int r=1; + int r= 1; if (cur_hashnr > hashnr || - (r=my_strnncoll(cs, cur_key, cur_keylen, key, keylen)) >= 0) + (r= my_strnncoll(cs, cur_key, cur_keylen, key, keylen)) >= 0) return !r; } - cursor->prev=&(cursor->curr->link); + cursor->prev= &(cursor->curr->link); _lf_pin(pins, 2, cursor->curr); } else @@ -120,7 +121,7 @@ retry: goto retry; } } - cursor->curr=cursor->next; + cursor->curr= cursor->next; _lf_pin(pins, 1, cursor->curr); } } @@ -141,21 +142,21 @@ static LF_SLIST *linsert(LF_SLIST * volatile *head, CHARSET_INFO *cs, LF_SLIST *node, LF_PINS *pins, uint flags) { CURSOR cursor; - int res=-1; + int res= -1; do { if (lfind(head, cs, node->hashnr, node->key, node->keylen, &cursor, pins) && (flags & LF_HASH_UNIQUE)) - res=0; /* duplicate found */ + res= 0; /* duplicate found */ else { - node->link=(intptr)cursor.curr; + node->link= (intptr)cursor.curr; assert(node->link != (intptr)node); assert(cursor.prev != &node->link); if (my_atomic_casptr((void **)cursor.prev, (void **)&cursor.curr, node)) - res=1; /* inserted ok */ + res= 1; /* inserted ok */ } } while (res == -1); _lf_unpin(pins, 0); @@ -177,10 +178,10 @@ static LF_SLIST *linsert(LF_SLIST * volatile *head, CHARSET_INFO *cs, it uses pins[0..2], on return all pins are removed. */ static int ldelete(LF_SLIST * volatile *head, CHARSET_INFO *cs, uint32 hashnr, - const uchar *key, uint keylen, LF_PINS *pins) + const byte *key, uint keylen, LF_PINS *pins) { CURSOR cursor; - int res=-1; + int res= -1; do { @@ -218,30 +219,30 @@ static int ldelete(LF_SLIST * volatile *head, CHARSET_INFO *cs, uint32 hashnr, all other pins are removed. */ static LF_SLIST *lsearch(LF_SLIST * volatile *head, CHARSET_INFO *cs, - uint32 hashnr, const uchar *key, uint keylen, + uint32 hashnr, const byte *key, uint keylen, LF_PINS *pins) { CURSOR cursor; - int res=lfind(head, cs, hashnr, key, keylen, &cursor, pins); + int res= lfind(head, cs, hashnr, key, keylen, &cursor, pins); if (res) _lf_pin(pins, 2, cursor.curr); _lf_unpin(pins, 0); _lf_unpin(pins, 1); return res ? cursor.curr : 0; } -static inline const uchar* hash_key(const LF_HASH *hash, - const uchar *record, uint *length) +static inline const byte* hash_key(const LF_HASH *hash, + const byte *record, uint *length) { if (hash->get_key) - return (*hash->get_key)(record,length,0); - *length=hash->key_length; + return (*hash->get_key)(record, length, 0); + *length= hash->key_length; return record + hash->key_offset; } -static inline uint calc_hash(LF_HASH *hash, const uchar *key, uint keylen) +static inline uint calc_hash(LF_HASH *hash, const byte *key, uint keylen) { - ulong nr1=1, nr2=4; - hash->charset->coll->hash_sort(hash->charset,key,keylen,&nr1,&nr2); + ulong nr1= 1, nr2= 4; + hash->charset->coll->hash_sort(hash->charset, key, keylen, &nr1, &nr2); return nr1 & INT_MAX32; } @@ -258,28 +259,28 @@ void lf_hash_init(LF_HASH *hash, uint element_size, uint flags, lf_alloc_init(&hash->alloc, sizeof(LF_SLIST)+element_size, offsetof(LF_SLIST, key)); lf_dynarray_init(&hash->array, sizeof(LF_SLIST **)); - hash->size=1; - hash->count=0; - hash->element_size=element_size; - hash->flags=flags; - hash->charset=charset ? charset : &my_charset_bin; - hash->key_offset=key_offset; - hash->key_length=key_length; - hash->get_key=get_key; + hash->size= 1; + hash->count= 0; + hash->element_size= element_size; + hash->flags= flags; + hash->charset= charset ? charset : &my_charset_bin; + hash->key_offset= key_offset; + hash->key_length= key_length; + hash->get_key= get_key; DBUG_ASSERT(get_key ? !key_offset && !key_length : key_length); } void lf_hash_destroy(LF_HASH *hash) { - LF_SLIST *el=*(LF_SLIST **)_lf_dynarray_lvalue(&hash->array, 0); + LF_SLIST *el= *(LF_SLIST **)_lf_dynarray_lvalue(&hash->array, 0); while (el) { - intptr next=el->link; + intptr next= el->link; if (el->hashnr & 1) lf_alloc_real_free(&hash->alloc, el); else my_free((void *)el, MYF(0)); - el=(LF_SLIST *)next; + el= (LF_SLIST *)next; } lf_alloc_destroy(&hash->alloc); lf_dynarray_destroy(&hash->array); @@ -299,19 +300,19 @@ void lf_hash_destroy(LF_HASH *hash) */ int lf_hash_insert(LF_HASH *hash, LF_PINS *pins, const void *data) { - uint csize, bucket, hashnr; + int csize, bucket, hashnr; LF_SLIST *node, * volatile *el; lf_rwlock_by_pins(pins); - node=(LF_SLIST *)_lf_alloc_new(pins); + node= (LF_SLIST *)_lf_alloc_new(pins); memcpy(node+1, data, hash->element_size); - node->key= hash_key(hash, (uchar *)(node+1), &node->keylen); + node->key= hash_key(hash, (byte *)(node+1), &node->keylen); hashnr= calc_hash(hash, node->key, node->keylen); bucket= hashnr % hash->size; - el=_lf_dynarray_lvalue(&hash->array, bucket); + el= _lf_dynarray_lvalue(&hash->array, bucket); if (*el == NULL) initialize_bucket(hash, el, bucket, pins); - node->hashnr=my_reverse_bits(hashnr) | 1; + node->hashnr= my_reverse_bits(hashnr) | 1; if (linsert(el, hash->charset, node, pins, hash->flags)) { _lf_alloc_free(pins, node); @@ -335,15 +336,15 @@ int lf_hash_insert(LF_HASH *hash, LF_PINS *pins, const void *data) int lf_hash_delete(LF_HASH *hash, LF_PINS *pins, const void *key, uint keylen) { LF_SLIST * volatile *el; - uint bucket, hashnr=calc_hash(hash, (uchar *)key, keylen); + uint bucket, hashnr= calc_hash(hash, (byte *)key, keylen); bucket= hashnr % hash->size; lf_rwlock_by_pins(pins); - el=_lf_dynarray_lvalue(&hash->array, bucket); + el= _lf_dynarray_lvalue(&hash->array, bucket); if (*el == NULL) initialize_bucket(hash, el, bucket, pins); if (ldelete(el, hash->charset, my_reverse_bits(hashnr) | 1, - (uchar *)key, keylen, pins)) + (byte *)key, keylen, pins)) { lf_rwunlock_by_pins(pins); return 1; @@ -360,33 +361,33 @@ int lf_hash_delete(LF_HASH *hash, LF_PINS *pins, const void *key, uint keylen) void *lf_hash_search(LF_HASH *hash, LF_PINS *pins, const void *key, uint keylen) { LF_SLIST * volatile *el, *found; - uint bucket, hashnr=calc_hash(hash, (uchar *)key, keylen); + uint bucket, hashnr= calc_hash(hash, (byte *)key, keylen); bucket= hashnr % hash->size; lf_rwlock_by_pins(pins); - el=_lf_dynarray_lvalue(&hash->array, bucket); + el= _lf_dynarray_lvalue(&hash->array, bucket); if (*el == NULL) initialize_bucket(hash, el, bucket, pins); found= lsearch(el, hash->charset, my_reverse_bits(hashnr) | 1, - (uchar *)key, keylen, pins); + (byte *)key, keylen, pins); lf_rwunlock_by_pins(pins); return found ? found+1 : 0; } -static char *dummy_key=""; +static char *dummy_key= ""; static void initialize_bucket(LF_HASH *hash, LF_SLIST * volatile *node, uint bucket, LF_PINS *pins) { uint parent= my_clear_highest_bit(bucket); - LF_SLIST *dummy=(LF_SLIST *)my_malloc(sizeof(LF_SLIST), MYF(MY_WME)); - LF_SLIST **tmp=0, *cur; - LF_SLIST * volatile *el=_lf_dynarray_lvalue(&hash->array, parent); + LF_SLIST *dummy= (LF_SLIST *)my_malloc(sizeof(LF_SLIST), MYF(MY_WME)); + LF_SLIST **tmp= 0, *cur; + LF_SLIST * volatile *el= _lf_dynarray_lvalue(&hash->array, parent); if (*el == NULL && bucket) initialize_bucket(hash, el, parent, pins); - dummy->hashnr=my_reverse_bits(bucket); - dummy->key=dummy_key; - dummy->keylen=0; + dummy->hashnr= my_reverse_bits(bucket); + dummy->key= dummy_key; + dummy->keylen= 0; if ((cur= linsert(el, hash->charset, dummy, pins, 0))) { my_free((void *)dummy, MYF(0)); diff --git a/mysys/my_getsystime.c b/mysys/my_getsystime.c index 91c977f0b5a..d1ed4f2ec92 100644 --- a/mysys/my_getsystime.c +++ b/mysys/my_getsystime.c @@ -35,10 +35,6 @@ ulonglong my_getsystime() LARGE_INTEGER t_cnt; if (!offset) { - /* strictly speaking there should be a mutex to protect - initialization section. But my_getsystime() is called from - UUID() code, and UUID() calls are serialized with a mutex anyway - */ LARGE_INTEGER li; FILETIME ft; GetSystemTimeAsFileTime(&ft); -- cgit v1.2.1 From 6691ce30ecd82baf9877242c078a5e1d12a78369 Mon Sep 17 00:00:00 2001 From: unknown Date: Thu, 16 Nov 2006 22:53:53 +0100 Subject: Maria - WL#3134 Key cache to Page cache conversion Moving the test_pagecache_ tests from mysys to unittest/mysys. Means fixing includes to work with the new directory, some Makefile.am editing, replacing memset() with bfill(). test_page_cache_*.c renamed to mf_pagecache_*-t.c (-t is the standard suffix for tests in the mytap protocol). Also added plan() and exit_status() calls to tests. Sanja, I put some TODOs for you at the start of mf_pagecache_*.c unittest/mysys/test_file.h: Rename: mysys/test_file.h -> unittest/mysys/test_file.h mysys/Makefile.am: pagecache test files move to top/unittest/mysys mysys/mf_pagecache.c: my_bit.h needed to compile. unittest/mysys/Makefile.am: INCLUDES is a better place for includes than AM_CPPFLAGS (the latter get overriden by prog_CPPFLAGS, which is not desirable here). Adding pagecache's test programs (moved from mysys); test_pagecache_* has been renamed to mf_pagecache*-t (-t is the required suffix for test executables in the mytap framework). unittest/mysys/mf_pagecache_consist.c: fixing includes to work with the new directory. The test must return an exit code informing if any part failed. TODOs for Sanja. unittest/mysys/mf_pagecache_single.c: fixing includes to work with new directory. adding a plan() to account for the number of tests. Adding exit_status() to tell how many tests failed. memset() was giving a compilation warning (implicit declaration etc), properly due to me removing stdio.h etc, so I replaced it with bfill(). TODOs for Sanja. unittest/mysys/test_file.c: moved from mysys (piece of the page cache tests) and includes fixed. --- mysys/Makefile.am | 55 ---- mysys/mf_pagecache.c | 2 + mysys/test_file.c | 70 ----- mysys/test_file.h | 14 - mysys/test_pagecache_consist.c | 447 ------------------------------- mysys/test_pagecache_single.c | 589 ----------------------------------------- 6 files changed, 2 insertions(+), 1175 deletions(-) delete mode 100644 mysys/test_file.c delete mode 100644 mysys/test_file.h delete mode 100755 mysys/test_pagecache_consist.c delete mode 100644 mysys/test_pagecache_single.c (limited to 'mysys') diff --git a/mysys/Makefile.am b/mysys/Makefile.am index 2f8b34dc301..4d9570febbd 100644 --- a/mysys/Makefile.am +++ b/mysys/Makefile.am @@ -130,61 +130,6 @@ test_base64$(EXEEXT): base64.c $(LIBRARIES) $(LINK) $(FLAGS) -DMAIN ./test_base64.c $(LDADD) $(LIBS) $(RM) -f ./test_base64.c -test_mf_pagecache.o: mf_pagecache.c ../include/pagecache.h $(LIBRARIES) - $(CP) $(srcdir)/mf_pagecache.c test_mf_pagecache.c - $(COMPILE) $(FLAGS) -DPAGECACHE_DEBUG -DEXTRA_DEBUG -c test_mf_pagecache.c - -test_file.o: test_file.c test_file.h - $(COMPILE) $(FLAGS) -DPAGECACHE_DEBUG -DEXTRA_DEBUG -c test_file.c - -test_pagecache_single1k$(EXEEXT): test_pagecache_single.c test_mf_pagecache.o ../unittest/mytap/tap.o test_file.o $(LIBRARIES) - $(LINK) $(FLAGS) -DMAIN -DPAGECACHE_DEBUG -DPAGE_SIZE=1024 -DEXTRA_DEBUG $(srcdir)/test_pagecache_single.c test_mf_pagecache.o ../unittest/mytap/tap.o test_file.o $(LDADD) $(LIBS) - -test_pagecache_single8k$(EXEEXT): test_pagecache_single.c test_mf_pagecache.o ../unittest/mytap/tap.o test_file.o $(LIBRARIES) - $(LINK) $(FLAGS) -DMAIN -DPAGECACHE_DEBUG -DPAGE_SIZE=8192 -DEXTRA_DEBUG $(srcdir)/test_pagecache_single.c test_mf_pagecache.o ../unittest/mytap/tap.o test_file.o $(LDADD) $(LIBS) - -test_pagecache_single64k$(EXEEXT): test_pagecache_single.c test_mf_pagecache.o ../unittest/mytap/tap.o test_file.o $(LIBRARIES) - $(LINK) $(FLAGS) -DMAIN -DPAGECACHE_DEBUG -DPAGE_SIZE=65536 -DEXTRA_DEBUG $(srcdir)/test_pagecache_single.c test_mf_pagecache.o ../unittest/mytap/tap.o test_file.o $(LDADD) $(LIBS) - -test_pagecache_single: test_pagecache_single1k$(EXEEXT) test_pagecache_single8k$(EXEEXT) test_pagecache_single64k$(EXEEXT) - ./test_pagecache_single64k$(EXEEXT) - ./test_pagecache_single8k$(EXEEXT) - ./test_pagecache_single1k$(EXEEXT) - -test_pagecache_consist1k$(EXEEXT): test_pagecache_consist.c test_mf_pagecache.o ../unittest/mytap/tap.o $(LIBRARIES) - $(LINK) $(FLAGS) -DMAIN -DPAGECACHE_DEBUG -DPAGE_SIZE=1024 -DEXTRA_DEBUG $(srcdir)/test_pagecache_consist.c test_mf_pagecache.o ../unittest/mytap/tap.o $(LDADD) $(LIBS) - -test_pagecache_consist64k$(EXEEXT): test_pagecache_consist.c test_mf_pagecache.o ../unittest/mytap/tap.o $(LIBRARIES) - $(LINK) $(FLAGS) -DMAIN -DPAGECACHE_DEBUG -DPAGE_SIZE=65536 -DEXTRA_DEBUG $(srcdir)/test_pagecache_consist.c test_mf_pagecache.o ../unittest/mytap/tap.o $(LDADD) $(LIBS) - -test_pagecache_consist1kHC$(EXEEXT): test_pagecache_consist.c test_mf_pagecache.o ../unittest/mytap/tap.o $(LIBRARIES) - $(LINK) $(FLAGS) -DMAIN -DPAGECACHE_DEBUG -DTEST_HIGH_CONCURENCY -DPAGE_SIZE=1024 -DEXTRA_DEBUG $(srcdir)/test_pagecache_consist.c test_mf_pagecache.o ../unittest/mytap/tap.o $(LDADD) $(LIBS) - -test_pagecache_consist64kHC$(EXEEXT): test_pagecache_consist.c test_mf_pagecache.o ../unittest/mytap/tap.o $(LIBRARIES) - $(LINK) $(FLAGS) -DMAIN -DPAGECACHE_DEBUG -DTEST_HIGH_CONCURENCY -DPAGE_SIZE=65536 -DEXTRA_DEBUG $(srcdir)/test_pagecache_consist.c test_mf_pagecache.o ../unittest/mytap/tap.o $(LDADD) $(LIBS) - -test_pagecache_consist1kRD$(EXEEXT): test_pagecache_consist.c test_mf_pagecache.o ../unittest/mytap/tap.o $(LIBRARIES) - $(LINK) $(FLAGS) -DMAIN -DPAGECACHE_DEBUG -DTEST_READERS -DPAGE_SIZE=1024 -DEXTRA_DEBUG $(srcdir)/test_pagecache_consist.c test_mf_pagecache.o ../unittest/mytap/tap.o $(LDADD) $(LIBS) - -test_pagecache_consist64kRD$(EXEEXT): test_pagecache_consist.c test_mf_pagecache.o ../unittest/mytap/tap.o $(LIBRARIES) - $(LINK) $(FLAGS) -DMAIN -DPAGECACHE_DEBUG -DTEST_READERS -DPAGE_SIZE=65536 -DEXTRA_DEBUG $(srcdir)/test_pagecache_consist.c test_mf_pagecache.o ../unittest/mytap/tap.o $(LDADD) $(LIBS) - -test_pagecache_consist1kWR$(EXEEXT): test_pagecache_consist.c test_mf_pagecache.o ../unittest/mytap/tap.o $(LIBRARIES) - $(LINK) $(FLAGS) -DMAIN -DPAGECACHE_DEBUG -DTEST_WRITERS -DPAGE_SIZE=1024 -DEXTRA_DEBUG $(srcdir)/test_pagecache_consist.c test_mf_pagecache.o ../unittest/mytap/tap.o $(LDADD) $(LIBS) - -test_pagecache_consist64kWR$(EXEEXT): test_pagecache_consist.c test_mf_pagecache.o ../unittest/mytap/tap.o $(LIBRARIES) - $(LINK) $(FLAGS) -DMAIN -DPAGECACHE_DEBUG -DTEST_WRITERS -DPAGE_SIZE=65536 -DEXTRA_DEBUG $(srcdir)/test_pagecache_consist.c test_mf_pagecache.o ../unittest/mytap/tap.o $(LDADD) $(LIBS) - -test_pagecache_consist: test_pagecache_consist1k$(EXEEXT) test_pagecache_consist64k$(EXEEXT) test_pagecache_consist1kHC$(EXEEXT) test_pagecache_consist64kHC$(EXEEXT) test_pagecache_consist1kRD$(EXEEXT) test_pagecache_consist64kRD$(EXEEXT) test_pagecache_consist1kWR$(EXEEXT) test_pagecache_consist64kWR$(EXEEXT) - ./test_pagecache_consist1k$(EXEEXT) - ./test_pagecache_consist64k$(EXEEXT) - ./test_pagecache_consist1kHC$(EXEEXT) - ./test_pagecache_consist64kHC$(EXEEXT) - ./test_pagecache_consist1kRD$(EXEEXT) - ./test_pagecache_consist64kRD$(EXEEXT) - ./test_pagecache_consist1kWR$(EXEEXT) - ./test_pagecache_consist64kWR$(EXEEXT) - # Don't update the files from bitkeeper %::SCCS/s.% diff --git a/mysys/mf_pagecache.c b/mysys/mf_pagecache.c index e1ccab9e06f..d8e235bc7f4 100755 --- a/mysys/mf_pagecache.c +++ b/mysys/mf_pagecache.c @@ -45,6 +45,8 @@ #include #include #include +#include + /* Some compilation flags have been added specifically for this module diff --git a/mysys/test_file.c b/mysys/test_file.c deleted file mode 100644 index 7ffca48023d..00000000000 --- a/mysys/test_file.c +++ /dev/null @@ -1,70 +0,0 @@ -#include "mysys_priv.h" -#include "my_dir.h" -#include -#include -#include -#include "test_file.h" - - -/* - Check that file contance correspond to descriptor - - SYNOPSIS - test_file() - file File to test - file_name Path (and name) of file which is tested - size size of file - buff_size size of buffer which is enought to check the file - desc file descriptor to check with - - RETURN - 1 file if OK - 0 error -*/ - -int test_file(PAGECACHE_FILE file, char *file_name, - off_t size, size_t buff_size, struct file_desc *desc) -{ - MY_STAT stat_buff, *stat; - unsigned char *buffr= malloc(buff_size); - off_t pos= 0; - size_t byte; - int step= 0; - - if ((stat= my_stat(file_name, &stat_buff, MYF(0))) == NULL) - { - diag("Can't stat() %s (errno: %d)\n", file_name, errno); - return 0; - } - if (stat->st_size != size) - { - diag("file %s size is %lu (should be %lu)\n", - file_name, (ulong) stat->st_size, (ulong) size); - return 0; - } - /* check content */ - my_seek(file.file, 0, SEEK_SET, MYF(0)); - while (desc[step].length != 0) - { - if (my_read(file.file, (char*)buffr, desc[step].length, MYF(0)) != - desc[step].length) - { - diag("Can't read %u bytes from %s (errno: %d)\n", - (uint)desc[step].length, file_name, errno); - return 0; - } - for (byte= 0; byte < desc[step].length; byte++) - { - if (buffr[byte] != desc[step].content) - { - diag("content of %s mismatch 0x%x in position %lu instead of 0x%x\n", - file_name, (uint) buffr[byte], (ulong) (pos + byte), - desc[step].content); - return 0; - } - } - pos+= desc[step].length; - step++; - } - return 1; -} diff --git a/mysys/test_file.h b/mysys/test_file.h deleted file mode 100644 index ea787c123ed..00000000000 --- a/mysys/test_file.h +++ /dev/null @@ -1,14 +0,0 @@ - -#include - -/* - File content descriptor -*/ -struct file_desc -{ - unsigned int length; - unsigned char content; -}; - -int test_file(PAGECACHE_FILE file, char *file_name, - off_t size, size_t buff_size, struct file_desc *desc); diff --git a/mysys/test_pagecache_consist.c b/mysys/test_pagecache_consist.c deleted file mode 100755 index 86698fbb860..00000000000 --- a/mysys/test_pagecache_consist.c +++ /dev/null @@ -1,447 +0,0 @@ -#include "mysys_priv.h" -#include "../include/my_pthread.h" -#include "../include/pagecache.h" -#include -#include "my_dir.h" -#include -#include -#include -#include -#include "../unittest/mytap/tap.h" -#include - -/*#define PAGE_SIZE 65536*/ -#define PCACHE_SIZE (PAGE_SIZE*1024*8) - -#ifndef DBUG_OFF -static const char* default_dbug_option; -#endif - -static char *file1_name= (char*)"page_cache_test_file_1"; -static PAGECACHE_FILE file1; -static pthread_cond_t COND_thread_count; -static pthread_mutex_t LOCK_thread_count; -static uint thread_count; -static PAGECACHE pagecache; - -#ifdef TEST_HIGH_CONCURENCY -static uint number_of_readers= 10; -static uint number_of_writers= 20; -static uint number_of_tests= 30000; -static uint record_length_limit= PAGE_SIZE/200; -static uint number_of_pages= 20; -static uint flush_divider= 1000; -#else /*TEST_HIGH_CONCURENCY*/ -#ifdef TEST_READERS -static uint number_of_readers= 10; -static uint number_of_writers= 1; -static uint number_of_tests= 30000; -static uint record_length_limit= PAGE_SIZE/200; -static uint number_of_pages= 20; -static uint flush_divider= 1000; -#else /*TEST_READERS*/ -#ifdef TEST_WRITERS -static uint number_of_readers= 0; -static uint number_of_writers= 10; -static uint number_of_tests= 30000; -static uint record_length_limit= PAGE_SIZE/200; -static uint number_of_pages= 20; -static uint flush_divider= 1000; -#else /*TEST_WRITERS*/ -static uint number_of_readers= 10; -static uint number_of_writers= 10; -static uint number_of_tests= 50000; -static uint record_length_limit= PAGE_SIZE/200; -static uint number_of_pages= 20000; -static uint flush_divider= 1000; -#endif /*TEST_WRITERS*/ -#endif /*TEST_READERS*/ -#endif /*TEST_HIGH_CONCURENCY*/ - - -/* check page consistency */ -uint check_page(uchar *buff, ulong offset, int page_locked, int page_no, - int tag) -{ - uint end= sizeof(uint); - uint num= *((uint *)buff); - uint i; - DBUG_ENTER("check_page"); - - for (i= 0; i < num; i++) - { - uint len= *((uint *)(buff + end)); - uint j; - end+= sizeof(uint)+ sizeof(uint); - if (len + end > PAGE_SIZE) - { - diag("incorrect field header #%u by offset %lu\n", i, offset + end + j); - goto err; - } - for(j= 0; j < len; j++) - { - if (buff[end + j] != (uchar)((i+1) % 256)) - { - diag("incorrect %lu byte\n", offset + end + j); - goto err; - } - } - end+= len; - } - for(i= end; i < PAGE_SIZE; i++) - { - if (buff[i] != 0) - { - int h; - DBUG_PRINT("err", - ("byte %lu (%lu + %u), page %u (%s, end: %u, recs: %u, tag: %d) should be 0\n", - offset + i, offset, i, page_no, - (page_locked ? "locked" : "unlocked"), - end, num, tag)); - diag("byte %lu (%lu + %u), page %u (%s, end: %u, recs: %u, tag: %d) should be 0\n", - offset + i, offset, i, page_no, - (page_locked ? "locked" : "unlocked"), - end, num, tag); - h= my_open("wrong_page", O_CREAT | O_TRUNC | O_RDWR, MYF(0)); - my_pwrite(h, buff, PAGE_SIZE, 0, MYF(0)); - my_close(h, MYF(0)); - goto err; - } - } - DBUG_RETURN(end); -err: - DBUG_PRINT("err", ("try to flush")); - if (page_locked) - { - pagecache_delete_page(&pagecache, &file1, page_no, - PAGECACHE_LOCK_LEFT_WRITELOCKED, 1); - } - else - { - flush_pagecache_blocks(&pagecache, &file1, FLUSH_RELEASE); - } - exit(1); -} - -void put_rec(uchar *buff, uint end, uint len, uint tag) -{ - uint i; - uint num= *((uint *)buff); - if (!len) - len= 1; - if (end + sizeof(uint)*2 + len > PAGE_SIZE) - return; - *((uint *)(buff + end))= len; - end+= sizeof(uint); - *((uint *)(buff + end))= tag; - end+= sizeof(uint); - num++; - *((uint *)buff)= num; - *((uint*)(buff + end))= len; - for (i= end; i < (len + end); i++) - { - buff[i]= (uchar) num % 256; - } -} - -/* - Recreate and reopen a file for test - - SYNOPSIS - reset_file() - file File to reset - file_name Path (and name) of file which should be reset -*/ - -void reset_file(PAGECACHE_FILE file, char *file_name) -{ - flush_pagecache_blocks(&pagecache, &file1, FLUSH_RELEASE); - if (my_close(file1.file, MYF(0)) != 0) - { - diag("Got error during %s closing from close() (errno: %d)\n", - file_name, errno); - exit(1); - } - my_delete(file_name, MYF(0)); - if ((file.file= my_open(file_name, - O_CREAT | O_TRUNC | O_RDWR, MYF(0))) == -1) - { - diag("Got error during %s creation from open() (errno: %d)\n", - file_name, errno); - exit(1); - } -} - - -void reader(int num) -{ - unsigned char *buffr= malloc(PAGE_SIZE); - uint i; - - for (i= 0; i < number_of_tests; i++) - { - uint page= rand()/(RAND_MAX/number_of_pages); - pagecache_read(&pagecache, &file1, page, 3, (char*)buffr, - PAGECACHE_PLAIN_PAGE, - PAGECACHE_LOCK_LEFT_UNLOCKED, - 0); - check_page(buffr, page * PAGE_SIZE, 0, page, -num); - if (i % 500 == 0) - printf("reader%d: %d\n", num, i); - - } - printf("reader%d: done\n", num); - free(buffr); -} - - -void writer(int num) -{ - unsigned char *buffr= malloc(PAGE_SIZE); - uint i; - - for (i= 0; i < number_of_tests; i++) - { - uint end; - uint page= rand()/(RAND_MAX/number_of_pages); - pagecache_read(&pagecache, &file1, page, 3, (char*)buffr, - PAGECACHE_PLAIN_PAGE, - PAGECACHE_LOCK_WRITE, - 0); - end= check_page(buffr, page * PAGE_SIZE, 1, page, num); - put_rec(buffr, end, rand()/(RAND_MAX/record_length_limit), num); - pagecache_write(&pagecache, &file1, page, 3, (char*)buffr, - PAGECACHE_PLAIN_PAGE, - PAGECACHE_LOCK_WRITE_UNLOCK, - PAGECACHE_UNPIN, - PAGECACHE_WRITE_DELAY, - 0); - - if (i % flush_divider == 0) - flush_pagecache_blocks(&pagecache, &file1, FLUSH_FORCE_WRITE); - if (i % 500 == 0) - printf("writer%d: %d\n", num, i); - } - printf("writer%d: done\n", num); - free(buffr); -} - - -static void *test_thread_reader(void *arg) -{ - int param=*((int*) arg); - - my_thread_init(); - DBUG_ENTER("test_reader"); - DBUG_PRINT("enter", ("param: %d", param)); - - reader(param); - - DBUG_PRINT("info", ("Thread %s ended\n", my_thread_name())); - pthread_mutex_lock(&LOCK_thread_count); - thread_count--; - VOID(pthread_cond_signal(&COND_thread_count)); /* Tell main we are ready */ - pthread_mutex_unlock(&LOCK_thread_count); - free((gptr) arg); - my_thread_end(); - DBUG_RETURN(0); -} - -static void *test_thread_writer(void *arg) -{ - int param=*((int*) arg); - - my_thread_init(); - DBUG_ENTER("test_writer"); - DBUG_PRINT("enter", ("param: %d", param)); - - writer(param); - - DBUG_PRINT("info", ("Thread %s ended\n", my_thread_name())); - pthread_mutex_lock(&LOCK_thread_count); - thread_count--; - VOID(pthread_cond_signal(&COND_thread_count)); /* Tell main we are ready */ - pthread_mutex_unlock(&LOCK_thread_count); - free((gptr) arg); - my_thread_end(); - DBUG_RETURN(0); -} - -int main(int argc, char **argv __attribute__((unused))) -{ - pthread_t tid; - pthread_attr_t thr_attr; - int *param, error, pagen; - - MY_INIT(argv[0]); - -#ifndef DBUG_OFF -#if defined(__WIN__) - default_dbug_option= "d:t:i:O,\\test_pagecache_consist.trace"; -#else - default_dbug_option= "d:t:i:o,/tmp/test_pagecache_consist.trace"; -#endif - if (argc > 1) - { - DBUG_SET(default_dbug_option); - DBUG_SET_INITIAL(default_dbug_option); - } -#endif - - - DBUG_ENTER("main"); - DBUG_PRINT("info", ("Main thread: %s\n", my_thread_name())); - if ((file1.file= my_open(file1_name, - O_CREAT | O_TRUNC | O_RDWR, MYF(0))) == -1) - { - fprintf(stderr, "Got error during file1 creation from open() (errno: %d)\n", - errno); - exit(1); - } - DBUG_PRINT("info", ("file1: %d", file1.file)); - if (chmod(file1_name, S_IRWXU | S_IRWXG | S_IRWXO) != 0) - { - fprintf(stderr, "Got error during file1 chmod() (errno: %d)\n", - errno); - exit(1); - } - my_pwrite(file1.file, "test file", 9, 0, MYF(0)); - - if ((error= pthread_cond_init(&COND_thread_count, NULL))) - { - fprintf(stderr, "COND_thread_count: %d from pthread_cond_init (errno: %d)\n", - error, errno); - exit(1); - } - if ((error= pthread_mutex_init(&LOCK_thread_count, MY_MUTEX_INIT_FAST))) - { - fprintf(stderr, "LOCK_thread_count: %d from pthread_cond_init (errno: %d)\n", - error, errno); - exit(1); - } - - if ((error= pthread_attr_init(&thr_attr))) - { - fprintf(stderr,"Got error: %d from pthread_attr_init (errno: %d)\n", - error,errno); - exit(1); - } - if ((error= pthread_attr_setdetachstate(&thr_attr, PTHREAD_CREATE_DETACHED))) - { - fprintf(stderr, - "Got error: %d from pthread_attr_setdetachstate (errno: %d)\n", - error,errno); - exit(1); - } - -#ifndef pthread_attr_setstacksize /* void return value */ - if ((error= pthread_attr_setstacksize(&thr_attr, 65536L))) - { - fprintf(stderr,"Got error: %d from pthread_attr_setstacksize (errno: %d)\n", - error,errno); - exit(1); - } -#endif -#ifdef HAVE_THR_SETCONCURRENCY - VOID(thr_setconcurrency(2)); -#endif - - my_thread_global_init(); - - - if ((pagen= init_pagecache(&pagecache, PCACHE_SIZE, 0, 0, - PAGE_SIZE, 0)) == 0) - { - fprintf(stderr,"Got error: init_pagecache() (errno: %d)\n", - errno); - exit(1); - } - DBUG_PRINT("info", ("Page cache %d pages", pagen)); - { - unsigned char *buffr= malloc(PAGE_SIZE); - uint i; - memset(buffr, '\0', PAGE_SIZE); - for (i= 0; i < number_of_pages; i++) - { - pagecache_write(&pagecache, &file1, i, 3, (char*)buffr, - PAGECACHE_PLAIN_PAGE, - PAGECACHE_LOCK_LEFT_UNLOCKED, - PAGECACHE_PIN_LEFT_UNPINNED, - PAGECACHE_WRITE_DELAY, - 0); - } - flush_pagecache_blocks(&pagecache, &file1, FLUSH_FORCE_WRITE); - free(buffr); - } - if ((error= pthread_mutex_lock(&LOCK_thread_count))) - { - fprintf(stderr,"LOCK_thread_count: %d from pthread_mutex_lock (errno: %d)\n", - error,errno); - exit(1); - } - while (number_of_readers != 0 || number_of_writers != 0) - { - if (number_of_readers != 0) - { - param=(int*) malloc(sizeof(int)); - *param= number_of_readers; - if ((error= pthread_create(&tid, &thr_attr, test_thread_reader, - (void*) param))) - { - fprintf(stderr,"Got error: %d from pthread_create (errno: %d)\n", - error,errno); - exit(1); - } - thread_count++; - number_of_readers--; - } - if (number_of_writers != 0) - { - param=(int*) malloc(sizeof(int)); - *param= number_of_writers; - if ((error= pthread_create(&tid, &thr_attr, test_thread_writer, - (void*) param))) - { - fprintf(stderr,"Got error: %d from pthread_create (errno: %d)\n", - error,errno); - exit(1); - } - thread_count++; - number_of_writers--; - } - } - DBUG_PRINT("info", ("Thread started")); - pthread_mutex_unlock(&LOCK_thread_count); - - pthread_attr_destroy(&thr_attr); - - /* wait finishing */ - if ((error= pthread_mutex_lock(&LOCK_thread_count))) - fprintf(stderr,"LOCK_thread_count: %d from pthread_mutex_lock\n",error); - while (thread_count) - { - if ((error= pthread_cond_wait(&COND_thread_count,&LOCK_thread_count))) - fprintf(stderr,"COND_thread_count: %d from pthread_cond_wait\n",error); - } - if ((error= pthread_mutex_unlock(&LOCK_thread_count))) - fprintf(stderr,"LOCK_thread_count: %d from pthread_mutex_unlock\n",error); - DBUG_PRINT("info", ("thread ended")); - - end_pagecache(&pagecache, 1); - DBUG_PRINT("info", ("Page cache ended")); - - if (my_close(file1.file, MYF(0)) != 0) - { - fprintf(stderr, "Got error during file1 closing from close() (errno: %d)\n", - errno); - exit(1); - } - /*my_delete(file1_name, MYF(0));*/ - my_thread_global_end(); - - DBUG_PRINT("info", ("file1 (%d) closed", file1.file)); - - DBUG_PRINT("info", ("Program end")); - - DBUG_RETURN(0); -} diff --git a/mysys/test_pagecache_single.c b/mysys/test_pagecache_single.c deleted file mode 100644 index 9df7844cfa5..00000000000 --- a/mysys/test_pagecache_single.c +++ /dev/null @@ -1,589 +0,0 @@ -#include "mysys_priv.h" -#include "../include/my_pthread.h" -#include "../include/pagecache.h" -#include "my_dir.h" -#include -#include -#include -#include -#include -#include "../unittest/mytap/tap.h" -#include "test_file.h" - -/* #define PAGE_SIZE 1024 */ -#define PCACHE_SIZE (PAGE_SIZE*1024*10) - -#ifndef DBUG_OFF -static const char* default_dbug_option; -#endif - -static char *file1_name= (char*)"page_cache_test_file_1"; -static PAGECACHE_FILE file1; -static pthread_cond_t COND_thread_count; -static pthread_mutex_t LOCK_thread_count; -static uint thread_count; -static PAGECACHE pagecache; - -/* - File contance descriptors -*/ -static struct file_desc simple_read_write_test_file[]= -{ - {PAGE_SIZE, '\1'}, - { 0, 0} -}; -static struct file_desc simple_read_change_write_read_test_file[]= -{ - {PAGE_SIZE/2, '\65'}, - {PAGE_SIZE/2, '\1'}, - { 0, 0} -}; -static struct file_desc simple_pin_test_file1[]= -{ - {PAGE_SIZE*2, '\1'}, - { 0, 0} -}; -static struct file_desc simple_pin_test_file2[]= -{ - {PAGE_SIZE/2, '\1'}, - {PAGE_SIZE/2, (unsigned char)129}, - {PAGE_SIZE, '\1'}, - { 0, 0} -}; -static struct file_desc simple_delete_forget_test_file[]= -{ - {PAGE_SIZE, '\1'}, - { 0, 0} -}; -static struct file_desc simple_delete_flush_test_file[]= -{ - {PAGE_SIZE, '\2'}, - { 0, 0} -}; - - -/* - Recreate and reopen a file for test - - SYNOPSIS - reset_file() - file File to reset - file_name Path (and name) of file which should be reset -*/ - -void reset_file(PAGECACHE_FILE file, char *file_name) -{ - flush_pagecache_blocks(&pagecache, &file1, FLUSH_RELEASE); - if (my_close(file1.file, MYF(0)) != 0) - { - diag("Got error during %s closing from close() (errno: %d)\n", - file_name, errno); - exit(1); - } - my_delete(file_name, MYF(0)); - if ((file.file= my_open(file_name, - O_CREAT | O_TRUNC | O_RDWR, MYF(0))) == -1) - { - diag("Got error during %s creation from open() (errno: %d)\n", - file_name, errno); - exit(1); - } -} - -/* - Write then read page, check file on disk -*/ - -int simple_read_write_test() -{ - unsigned char *buffw= malloc(PAGE_SIZE); - unsigned char *buffr= malloc(PAGE_SIZE); - int res; - DBUG_ENTER("simple_read_write_test"); - memset(buffw, '\1', PAGE_SIZE); - pagecache_write(&pagecache, &file1, 0, 3, (char*)buffw, - PAGECACHE_PLAIN_PAGE, - PAGECACHE_LOCK_LEFT_UNLOCKED, - PAGECACHE_PIN_LEFT_UNPINNED, - PAGECACHE_WRITE_DELAY, - 0); - pagecache_read(&pagecache, &file1, 0, 3, (char*)buffr, - PAGECACHE_PLAIN_PAGE, - PAGECACHE_LOCK_LEFT_UNLOCKED, - 0); - ok((res= test(memcmp(buffr, buffw, PAGE_SIZE) == 0)), - "Simple write-read page "); - flush_pagecache_blocks(&pagecache, &file1, FLUSH_FORCE_WRITE); - ok((res&= test(test_file(file1, file1_name, PAGE_SIZE, PAGE_SIZE, - simple_read_write_test_file))), - "Simple write-read page file"); - if (res) - reset_file(file1, file1_name); - free(buffw); - free(buffr); - DBUG_RETURN(res); -} - - -/* - Prepare page, then read (and lock), change (write new value and unlock), - then check the page in the cache and on the disk -*/ -int simple_read_change_write_read_test() -{ - unsigned char *buffw= malloc(PAGE_SIZE); - unsigned char *buffr= malloc(PAGE_SIZE); - int res; - DBUG_ENTER("simple_read_change_write_read_test"); - /* prepare the file */ - memset(buffw, '\1', PAGE_SIZE); - pagecache_write(&pagecache, &file1, 0, 3, (char*)buffw, - PAGECACHE_PLAIN_PAGE, - PAGECACHE_LOCK_LEFT_UNLOCKED, - PAGECACHE_PIN_LEFT_UNPINNED, - PAGECACHE_WRITE_DELAY, - 0); - flush_pagecache_blocks(&pagecache, &file1, FLUSH_FORCE_WRITE); - /* test */ - pagecache_read(&pagecache, &file1, 0, 3, (char*)buffw, - PAGECACHE_PLAIN_PAGE, - PAGECACHE_LOCK_WRITE, - 0); - memset(buffw, '\65', PAGE_SIZE/2); - pagecache_write(&pagecache, &file1, 0, 3, (char*)buffw, - PAGECACHE_PLAIN_PAGE, - PAGECACHE_LOCK_WRITE_UNLOCK, - PAGECACHE_UNPIN, - PAGECACHE_WRITE_DELAY, - 0); - - pagecache_read(&pagecache, &file1, 0, 3, (char*)buffr, - PAGECACHE_PLAIN_PAGE, - PAGECACHE_LOCK_LEFT_UNLOCKED, - 0); - ok((res= test(memcmp(buffr, buffw, PAGE_SIZE) == 0)), - "Simple read-change-write-read page "); - flush_pagecache_blocks(&pagecache, &file1, FLUSH_FORCE_WRITE); - ok((res&= test(test_file(file1, file1_name, PAGE_SIZE, PAGE_SIZE, - simple_read_change_write_read_test_file))), - "Simple read-change-write-read page file"); - if (res) - reset_file(file1, file1_name); - free(buffw); - free(buffr); - DBUG_RETURN(res); -} - - -/* - Prepare page, read page 0 (and pin) then write page 1 and page 0. - Flush the file (shold flush only page 1 and return 1 (page 0 is - still pinned). - Check file on the disk. - Unpin and flush. - Check file on the disk. -*/ -int simple_pin_test() -{ - unsigned char *buffw= malloc(PAGE_SIZE); - unsigned char *buffr= malloc(PAGE_SIZE); - int res; - DBUG_ENTER("simple_pin_test"); - /* prepare the file */ - memset(buffw, '\1', PAGE_SIZE); - pagecache_write(&pagecache, &file1, 0, 3, (char*)buffw, - PAGECACHE_PLAIN_PAGE, - PAGECACHE_LOCK_LEFT_UNLOCKED, - PAGECACHE_PIN_LEFT_UNPINNED, - PAGECACHE_WRITE_DELAY, - 0); - /* test */ - if (flush_pagecache_blocks(&pagecache, &file1, FLUSH_FORCE_WRITE)) - { - diag("error in flush_pagecache_blocks\n"); - exit(1); - } - pagecache_read(&pagecache, &file1, 0, 3, (char*)buffw, - PAGECACHE_PLAIN_PAGE, - PAGECACHE_LOCK_WRITE, - 0); - pagecache_write(&pagecache, &file1, 1, 3, (char*)buffw, - PAGECACHE_PLAIN_PAGE, - PAGECACHE_LOCK_LEFT_UNLOCKED, - PAGECACHE_PIN_LEFT_UNPINNED, - PAGECACHE_WRITE_DELAY, - 0); - memset(buffw + PAGE_SIZE/2, ((unsigned char) 129), PAGE_SIZE/2); - pagecache_write(&pagecache, &file1, 0, 3, (char*)buffw, - PAGECACHE_PLAIN_PAGE, - PAGECACHE_LOCK_WRITE_TO_READ, - PAGECACHE_PIN_LEFT_PINNED, - PAGECACHE_WRITE_DELAY, - 0); - /* - We have to get error because one page of the file is pinned, - other page should be flushed - */ - if (!flush_pagecache_blocks(&pagecache, &file1, FLUSH_FORCE_WRITE)) - { - diag("Did not get error in flush_pagecache_blocks\n"); - res= 0; - goto err; - } - ok((res= test(test_file(file1, file1_name, PAGE_SIZE*2, PAGE_SIZE*2, - simple_pin_test_file1))), - "Simple pin page file with pin"); - pagecache_unlock_page(&pagecache, - &file1, - 0, - PAGECACHE_LOCK_READ_UNLOCK, - PAGECACHE_UNPIN, - 0, 0); - if (flush_pagecache_blocks(&pagecache, &file1, FLUSH_FORCE_WRITE)) - { - diag("Got error in flush_pagecache_blocks\n"); - res= 0; - goto err; - } - ok((res&= test(test_file(file1, file1_name, PAGE_SIZE*2, PAGE_SIZE, - simple_pin_test_file2))), - "Simple pin page result file"); - if (res) - reset_file(file1, file1_name); -err: - free(buffw); - free(buffr); - DBUG_RETURN(res); -} - -/* - Prepare page, write new value, then delete page from cache without flush, - on the disk should be page with old content written during preparation -*/ - -int simple_delete_forget_test() -{ - unsigned char *buffw= malloc(PAGE_SIZE); - unsigned char *buffr= malloc(PAGE_SIZE); - int res; - DBUG_ENTER("simple_delete_forget_test"); - /* prepare the file */ - memset(buffw, '\1', PAGE_SIZE); - pagecache_write(&pagecache, &file1, 0, 3, (char*)buffw, - PAGECACHE_PLAIN_PAGE, - PAGECACHE_LOCK_LEFT_UNLOCKED, - PAGECACHE_PIN_LEFT_UNPINNED, - PAGECACHE_WRITE_DELAY, - 0); - flush_pagecache_blocks(&pagecache, &file1, FLUSH_FORCE_WRITE); - /* test */ - memset(buffw, '\2', PAGE_SIZE); - pagecache_write(&pagecache, &file1, 0, 3, (char*)buffw, - PAGECACHE_PLAIN_PAGE, - PAGECACHE_LOCK_LEFT_UNLOCKED, - PAGECACHE_PIN_LEFT_UNPINNED, - PAGECACHE_WRITE_DELAY, - 0); - pagecache_delete_page(&pagecache, &file1, 0, - PAGECACHE_LOCK_WRITE, 0); - flush_pagecache_blocks(&pagecache, &file1, FLUSH_FORCE_WRITE); - ok((res= test(test_file(file1, file1_name, PAGE_SIZE, PAGE_SIZE, - simple_delete_forget_test_file))), - "Simple delete-forget page file"); - if (res) - reset_file(file1, file1_name); - free(buffw); - free(buffr); - DBUG_RETURN(res); -} - -/* - Prepare page with locking, write new content to the page, - delete page with flush and on existing lock, - check that page on disk contain new value. -*/ - -int simple_delete_flush_test() -{ - unsigned char *buffw= malloc(PAGE_SIZE); - unsigned char *buffr= malloc(PAGE_SIZE); - int res; - DBUG_ENTER("simple_delete_flush_test"); - /* prepare the file */ - memset(buffw, '\1', PAGE_SIZE); - pagecache_write(&pagecache, &file1, 0, 3, (char*)buffw, - PAGECACHE_PLAIN_PAGE, - PAGECACHE_LOCK_WRITE, - PAGECACHE_PIN, - PAGECACHE_WRITE_DELAY, - 0); - flush_pagecache_blocks(&pagecache, &file1, FLUSH_FORCE_WRITE); - /* test */ - memset(buffw, '\2', PAGE_SIZE); - pagecache_write(&pagecache, &file1, 0, 3, (char*)buffw, - PAGECACHE_PLAIN_PAGE, - PAGECACHE_LOCK_LEFT_WRITELOCKED, - PAGECACHE_PIN_LEFT_PINNED, - PAGECACHE_WRITE_DELAY, - 0); - pagecache_delete_page(&pagecache, &file1, 0, - PAGECACHE_LOCK_LEFT_WRITELOCKED, 1); - flush_pagecache_blocks(&pagecache, &file1, FLUSH_FORCE_WRITE); - ok((res= test(test_file(file1, file1_name, PAGE_SIZE, PAGE_SIZE, - simple_delete_flush_test_file))), - "Simple delete-forget page file"); - if (res) - reset_file(file1, file1_name); - free(buffw); - free(buffr); - DBUG_RETURN(res); -} - - -/* - write then read file bigger then cache -*/ - -int simple_big_test() -{ - unsigned char *buffw= (unsigned char *)malloc(PAGE_SIZE); - unsigned char *buffr= (unsigned char *)malloc(PAGE_SIZE); - struct file_desc *desc= - (struct file_desc *)malloc((PCACHE_SIZE/(PAGE_SIZE/2)) * - sizeof(struct file_desc)); - int res, i; - DBUG_ENTER("simple_big_test"); - /* prepare the file twice larger then cache */ - for (i= 0; i < PCACHE_SIZE/(PAGE_SIZE/2); i++) - { - memset(buffw, (unsigned char) (i & 0xff), PAGE_SIZE); - desc[i].length= PAGE_SIZE; - desc[i].content= (i & 0xff); - pagecache_write(&pagecache, &file1, i, 3, (char*)buffw, - PAGECACHE_PLAIN_PAGE, - PAGECACHE_LOCK_LEFT_UNLOCKED, - PAGECACHE_PIN_LEFT_UNPINNED, - PAGECACHE_WRITE_DELAY, - 0); - } - ok(1, "Simple big file write"); - /* check written pages sequentally read */ - for (i= 0; i < PCACHE_SIZE/(PAGE_SIZE/2); i++) - { - int j; - pagecache_read(&pagecache, &file1, i, 3, (char*)buffr, - PAGECACHE_PLAIN_PAGE, - PAGECACHE_LOCK_LEFT_UNLOCKED, - 0); - for(j= 0; j < PAGE_SIZE; j++) - { - if (buffr[j] != (i & 0xff)) - { - diag("simple_big_test seq: page %u byte %u mismatch\n", i, j); - return 0; - } - } - } - ok(1, "simple big file sequentally read"); - /* chack random reads */ - for (i= 0; i < PCACHE_SIZE/(PAGE_SIZE); i++) - { - int j, page; - page= rand() % (PCACHE_SIZE/(PAGE_SIZE/2)); - pagecache_read(&pagecache, &file1, page, 3, (char*)buffr, - PAGECACHE_PLAIN_PAGE, - PAGECACHE_LOCK_LEFT_UNLOCKED, - 0); - for(j= 0; j < PAGE_SIZE; j++) - { - if (buffr[j] != (page & 0xff)) - { - diag("simple_big_test rnd: page %u byte %u mismatch\n", page, j); - return 0; - } - } - } - ok(1, "simple big file random read"); - flush_pagecache_blocks(&pagecache, &file1, FLUSH_FORCE_WRITE); - - ok((res= test(test_file(file1, file1_name, PCACHE_SIZE*2, PAGE_SIZE, - desc))), - "Simple big file"); - if (res) - reset_file(file1, file1_name); - free(buffw); - free(buffr); - DBUG_RETURN(res); -} -/* - Thread function -*/ - -static void *test_thread(void *arg) -{ - int param=*((int*) arg); - - my_thread_init(); - DBUG_ENTER("test_thread"); - - DBUG_PRINT("enter", ("param: %d", param)); - - if (!simple_read_write_test() || - !simple_read_change_write_read_test() || - !simple_pin_test() || - !simple_delete_forget_test() || - !simple_delete_flush_test() || - !simple_big_test()) - exit(1); - - DBUG_PRINT("info", ("Thread %s ended\n", my_thread_name())); - pthread_mutex_lock(&LOCK_thread_count); - thread_count--; - VOID(pthread_cond_signal(&COND_thread_count)); /* Tell main we are ready */ - pthread_mutex_unlock(&LOCK_thread_count); - free((gptr) arg); - my_thread_end(); - DBUG_RETURN(0); -} - - -int main(int argc, char **argv __attribute__((unused))) -{ - pthread_t tid; - pthread_attr_t thr_attr; - int *param, error, pagen; - - MY_INIT(argv[0]); - -#ifndef DBUG_OFF -#if defined(__WIN__) - default_dbug_option= "d:t:i:O,\\test_pagecache_single.trace"; -#else - default_dbug_option= "d:t:i:o,/tmp/test_pagecache_single.trace"; -#endif - if (argc > 1) - { - DBUG_SET(default_dbug_option); - DBUG_SET_INITIAL(default_dbug_option); - } -#endif - - - DBUG_ENTER("main"); - DBUG_PRINT("info", ("Main thread: %s\n", my_thread_name())); - if ((file1.file= my_open(file1_name, - O_CREAT | O_TRUNC | O_RDWR, MYF(0))) == -1) - { - fprintf(stderr, "Got error during file1 creation from open() (errno: %d)\n", - errno); - exit(1); - } - DBUG_PRINT("info", ("file1: %d", file1.file)); - if (chmod(file1_name, S_IRWXU | S_IRWXG | S_IRWXO) != 0) - { - fprintf(stderr, "Got error during file1 chmod() (errno: %d)\n", - errno); - exit(1); - } - my_pwrite(file1.file, "test file", 9, 0, MYF(0)); - - if ((error= pthread_cond_init(&COND_thread_count, NULL))) - { - fprintf(stderr, "Got error: %d from pthread_cond_init (errno: %d)\n", - error, errno); - exit(1); - } - if ((error= pthread_mutex_init(&LOCK_thread_count, MY_MUTEX_INIT_FAST))) - { - fprintf(stderr, "Got error: %d from pthread_cond_init (errno: %d)\n", - error, errno); - exit(1); - } - - if ((error= pthread_attr_init(&thr_attr))) - { - fprintf(stderr,"Got error: %d from pthread_attr_init (errno: %d)\n", - error,errno); - exit(1); - } - if ((error= pthread_attr_setdetachstate(&thr_attr, PTHREAD_CREATE_DETACHED))) - { - fprintf(stderr, - "Got error: %d from pthread_attr_setdetachstate (errno: %d)\n", - error,errno); - exit(1); - } - -#ifndef pthread_attr_setstacksize /* void return value */ - if ((error= pthread_attr_setstacksize(&thr_attr, 65536L))) - { - fprintf(stderr,"Got error: %d from pthread_attr_setstacksize (errno: %d)\n", - error,errno); - exit(1); - } -#endif -#ifdef HAVE_THR_SETCONCURRENCY - VOID(thr_setconcurrency(2)); -#endif - - my_thread_global_init(); - - - if ((pagen= init_pagecache(&pagecache, PCACHE_SIZE, 0, 0, - PAGE_SIZE, 0)) == 0) - { - fprintf(stderr,"Got error: init_pagecache() (errno: %d)\n", - errno); - exit(1); - } - DBUG_PRINT("info", ("Page cache %d pages", pagen)); - - if ((error=pthread_mutex_lock(&LOCK_thread_count))) - { - fprintf(stderr,"Got error: %d from pthread_mutex_lock (errno: %d)\n", - error,errno); - exit(1); - } - param=(int*) malloc(sizeof(int)); - *param= 1; - if ((error= pthread_create(&tid, &thr_attr, test_thread, (void*) param))) - { - fprintf(stderr,"Got error: %d from pthread_create (errno: %d)\n", - error,errno); - exit(1); - } - thread_count++; - DBUG_PRINT("info", ("Thread started")); - pthread_mutex_unlock(&LOCK_thread_count); - - pthread_attr_destroy(&thr_attr); - - if ((error= pthread_mutex_lock(&LOCK_thread_count))) - fprintf(stderr,"Got error: %d from pthread_mutex_lock\n",error); - while (thread_count) - { - if ((error= pthread_cond_wait(&COND_thread_count,&LOCK_thread_count))) - fprintf(stderr,"Got error: %d from pthread_cond_wait\n",error); - } - if ((error= pthread_mutex_unlock(&LOCK_thread_count))) - fprintf(stderr,"Got error: %d from pthread_mutex_unlock\n",error); - DBUG_PRINT("info", ("thread ended")); - - end_pagecache(&pagecache, 1); - DBUG_PRINT("info", ("Page cache ended")); - - if (my_close(file1.file, MYF(0)) != 0) - { - fprintf(stderr, "Got error during file1 closing from close() (errno: %d)\n", - errno); - exit(1); - } - /*my_delete(file1_name, MYF(0));*/ - my_thread_global_end(); - - DBUG_PRINT("info", ("file1 (%d) closed", file1.file)); - - DBUG_PRINT("info", ("Program end")); - - DBUG_RETURN(0); -} -- cgit v1.2.1 From 3becab22e9dd774d58983e553b6fbbfb9960f852 Mon Sep 17 00:00:00 2001 From: unknown Date: Thu, 16 Nov 2006 23:04:53 +0100 Subject: Maria: fix for compiler warning (signed-ness). #warning to warn (!) about current issues in the pagecache hindering checkpoint (we will have to fix them!). mysys/mf_pagecache.c: fix for compiler warning (signed-ness). #warning to warn (!) about current issues in the pagecache hindering checkpoint (we will have to fix them!). --- mysys/mf_pagecache.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'mysys') diff --git a/mysys/mf_pagecache.c b/mysys/mf_pagecache.c index d8e235bc7f4..ab27e056c37 100755 --- a/mysys/mf_pagecache.c +++ b/mysys/mf_pagecache.c @@ -699,7 +699,7 @@ int init_pagecache(PAGECACHE *pagecache, my_size_t use_mem, pagecache->shift= my_bit_log2(block_size); DBUG_PRINT("info", ("block_size: %u", block_size)); - DBUG_ASSERT((1 << pagecache->shift) == block_size); + DBUG_ASSERT(((uint)(1 << pagecache->shift)) == block_size); blocks= (int) (use_mem / (sizeof(PAGECACHE_BLOCK_LINK) + 2 * sizeof(PAGECACHE_HASH_LINK) + @@ -3259,6 +3259,7 @@ restart: #ifndef DBUG_OFF int rc= #endif +#warning we are doing an unlock here, so need to give the page its rec_lsn! pagecache_make_lock_and_pin(pagecache, block, write_lock_change_table[lock].unlock_lock, write_pin_change_table[pin].unlock_pin); @@ -3614,6 +3615,11 @@ restart: else { /* Link the block into a list of blocks 'in switch' */ +#warning this unlink_changed() is a serious problem for Maria's Checkpoint: it \ +removes a page from the list of dirty pages, while it's still dirty. A \ + solution is to abandon first_in_switch, just wait for this page to be \ + flushed by somebody else, and loop. TODO: check all places where we remove a \ + page from the list of dirty pages unlink_changed(block); link_changed(block, &first_in_switch); } -- cgit v1.2.1 From 6ea0c5c7d6dcddbdade4e7f41273c09284b739b1 Mon Sep 17 00:00:00 2001 From: unknown Date: Fri, 17 Nov 2006 14:41:33 +0200 Subject: Post review changes. mysys/mf_pagecache.c: Unneed volitile definitions removed. Comments added. info_check_lock() code simplified. pagecache_delete_page() fixed. --- mysys/mf_pagecache.c | 142 ++++++++++++++++++++++----------------------------- 1 file changed, 62 insertions(+), 80 deletions(-) (limited to 'mysys') diff --git a/mysys/mf_pagecache.c b/mysys/mf_pagecache.c index ab27e056c37..4b92f68d9bf 100755 --- a/mysys/mf_pagecache.c +++ b/mysys/mf_pagecache.c @@ -127,11 +127,10 @@ my_bool my_disable_flush_pagecache_blocks= 0; (TYPE *) ((char *) (a) - offsetof(TYPE, MEMBER)) /* types of condition variables */ -#define COND_FOR_REQUESTED 0 -#define COND_FOR_SAVED 1 -#define COND_FOR_WRLOCK 2 -#define COND_FOR_COPY 3 -#define COND_SIZE 4 +#define COND_FOR_REQUESTED 0 /* queue of thread waiting for read operation */ +#define COND_FOR_SAVED 1 /* queue of thread waiting for flush */ +#define COND_FOR_WRLOCK 2 /* queue of write lock */ +#define COND_SIZE 3 /* number of COND_* queues */ typedef pthread_cond_t KEYCACHE_CONDVAR; @@ -296,8 +295,8 @@ struct st_pagecache_block_link wqueue[COND_SIZE]; /* queues on waiting requests for new/old pages */ uint requests; /* number of requests for the block */ byte *buffer; /* buffer for the block page */ - volatile uint status; /* state of the block */ - volatile uint pins; /* pin counter */ + uint status; /* state of the block */ + uint pins; /* pin counter */ #ifdef PAGECACHE_DEBUG PAGECACHE_PIN_INFO *pin_list; PAGECACHE_LOCK_INFO *lock_list; @@ -354,6 +353,20 @@ my_bool info_check_pin(PAGECACHE_BLOCK_LINK *block, DBUG_RETURN(0); } + +/* + Debug function which checks current lock/pin state and requested changes + + SYNOPSIS + info_check_lock() + lock requested lock changes + pin requested pin changes + + RETURN + 0 - OK + 1 - Error +*/ + my_bool info_check_lock(PAGECACHE_BLOCK_LINK *block, enum pagecache_page_lock lock, enum pagecache_page_pin pin) @@ -368,90 +381,58 @@ my_bool info_check_lock(PAGECACHE_BLOCK_LINK *block, case PAGECACHE_LOCK_LEFT_UNLOCKED: DBUG_ASSERT(pin == PAGECACHE_PIN_LEFT_UNPINNED); if (info) - { - DBUG_PRINT("info", - ("info_check_lock: thread: 0x%lx block 0x%lx: %c : U->U", - (ulong)thread, (ulong)block, (info->write_lock?'W':'R'))); - DBUG_RETURN(1); - } + goto error; break; case PAGECACHE_LOCK_LEFT_READLOCKED: DBUG_ASSERT(pin == PAGECACHE_PIN_LEFT_UNPINNED || pin == PAGECACHE_PIN_LEFT_PINNED); if (info == 0 || info->write_lock) - { - DBUG_PRINT("info", - ("info_check_lock: thread: 0x%lx block 0x%lx: %c : R->R", - (ulong)thread, (ulong)block, (info?'W':'U'))); - DBUG_RETURN(1); - } + goto error; break; case PAGECACHE_LOCK_LEFT_WRITELOCKED: DBUG_ASSERT(pin == PAGECACHE_PIN_LEFT_PINNED); if (info == 0 || !info->write_lock) - { - DBUG_PRINT("info", - ("info_check_lock: thread: 0x%lx block 0x%lx: %c : W->W", - (ulong)thread, (ulong)block, (info?'R':'U'))); - DBUG_RETURN(1); - } + goto error; break; case PAGECACHE_LOCK_READ: DBUG_ASSERT(pin == PAGECACHE_PIN_LEFT_UNPINNED || pin == PAGECACHE_PIN); if (info != 0) - { - DBUG_PRINT("info", - ("info_check_lock: thread: 0x%lx block 0x%lx: %c : U->R", - (ulong)thread, (ulong)block, (info->write_lock?'W':'R'))); - DBUG_RETURN(1); - } + goto error; break; case PAGECACHE_LOCK_WRITE: DBUG_ASSERT(pin == PAGECACHE_PIN); if (info != 0) - { - DBUG_PRINT("info", - ("info_check_lock: thread: 0x%lx block 0x%lx: %c : U->W", - (ulong)thread, (ulong)block, (info->write_lock?'W':'R'))); - DBUG_RETURN(1); - } + goto error; break; - case PAGECACHE_LOCK_READ_UNLOCK: DBUG_ASSERT(pin == PAGECACHE_PIN_LEFT_UNPINNED || pin == PAGECACHE_UNPIN); if (info == 0 || info->write_lock) - { - DBUG_PRINT("info", - ("info_check_lock: thread: 0x%lx block 0x%lx: %c : R->U", - (ulong)thread, (ulong)block, (info?'W':'U'))); - DBUG_RETURN(1); - } + goto error; break; case PAGECACHE_LOCK_WRITE_UNLOCK: DBUG_ASSERT(pin == PAGECACHE_UNPIN); if (info == 0 || !info->write_lock) - { - DBUG_PRINT("info", - ("info_check_lock: thread: 0x%lx block 0x%lx: %c : W->U", - (ulong)thread, (ulong)block, (info?'R':'U'))); - DBUG_RETURN(1); - } + goto error; break; case PAGECACHE_LOCK_WRITE_TO_READ: DBUG_ASSERT(pin == PAGECACHE_PIN_LEFT_PINNED || pin == PAGECACHE_UNPIN); if (info == 0 || !info->write_lock) - { - DBUG_PRINT("info", - ("info_check_lock: thread: 0x%lx block 0x%lx: %c : W->U", - (ulong)thread, (ulong)block, (info?'R':'U'))); - DBUG_RETURN(1); - } + goto error; break; } DBUG_RETURN(0); +error: + DBUG_PRINT("info", + ("info_check_lock: thread: 0x%lx block 0x%lx: info: %d wrt: %d," + "to lock: %s, to pin: %s", + (ulong)thread, (ulong)block, test(info), + (info ? info->write_lock : 0), + page_cache_page_lock_str[lock], + page_cache_page_pin_str[pin])); + DBUG_RETURN(1); } #endif @@ -1629,7 +1610,8 @@ static void unlink_hash(PAGECACHE *pagecache, PAGECACHE_HASH_LINK *hash_link) /* - Get the hash link for the page if it is in the cache + Get the hash link for the page if it is in the cache (do not put the + page in the cache if it is absent there) SYNOPSIS get_present_hash_link() @@ -2982,6 +2964,7 @@ restart: if (!link) { DBUG_PRINT("info", ("There is no such page in the cache")); + pagecache_pthread_mutex_unlock(&pagecache->cache_lock); DBUG_RETURN(0); } block= link->block; @@ -2998,32 +2981,31 @@ restart: if (block->status & BLOCK_CHANGED && flush) { - if (flush) - { - /* The block contains a dirty page - push it out of the cache */ + /* The block contains a dirty page - push it out of the cache */ - KEYCACHE_DBUG_PRINT("find_key_block", ("block is dirty")); + KEYCACHE_DBUG_PRINT("find_key_block", ("block is dirty")); - pagecache_pthread_mutex_unlock(&pagecache->cache_lock); - /* - The call is thread safe because only the current - thread might change the block->hash_link value - */ - DBUG_ASSERT(block->pins == 1); - error= pagecache_fwrite(pagecache, - &block->hash_link->file, - block->buffer, - block->hash_link->pageno, - block->type, - MYF(MY_NABP | MY_WAIT_IF_FULL)); - pagecache_pthread_mutex_lock(&pagecache->cache_lock); - pagecache->global_cache_write++; - if (error) - { - block->status|= BLOCK_ERROR; - goto err; - } + pagecache_pthread_mutex_unlock(&pagecache->cache_lock); + /* + The call is thread safe because only the current + thread might change the block->hash_link value + */ + DBUG_ASSERT(block->pins == 1); + error= pagecache_fwrite(pagecache, + &block->hash_link->file, + block->buffer, + block->hash_link->pageno, + block->type, + MYF(MY_NABP | MY_WAIT_IF_FULL)); + pagecache_pthread_mutex_lock(&pagecache->cache_lock); + pagecache->global_cache_write++; + + if (error) + { + block->status|= BLOCK_ERROR; + goto err; } + pagecache->blocks_changed--; pagecache->global_blocks_changed--; /* -- cgit v1.2.1 From a41ac15b960aee306e3464835b05a835fd98771d Mon Sep 17 00:00:00 2001 From: unknown Date: Tue, 21 Nov 2006 22:22:59 +0100 Subject: Maria - various fixes around durability of files: 1) on Mac OS X >=10.3, fcntl() is recommended over fsync (from the man page: "[With fsync()] the disk drive may also re-order the data so that later writes may be present while earlier writes are not. Applications such as databases that require a strict ordering of writes should use F_FULLFSYNC to ensure their data is written in the order they expect"). I have seen two other pieces of software changing from fsync to F_FULLFSYNC on Mac OS X. 2) to make a file creation/deletion/renaming durable on Linux (at least ext2 as I have tested) (see "man fsync"), a fsync() on the directory is needed: new functions to do that, and a flag MY_SYNC_DIR to do it in my_create/my_delete/my_rename. 3) now using this directory syncing when creating he frm if opt_sync_frm, and for Maria's control file when it is created. include/my_sys.h: new flag to my_create/my_delete/my_rename, which asks to sync the directory after the operation is done (currently does nothing except on Linux) libmysql/CMakeLists.txt: my_create() now depends on my_sync() so my_sync is needed for libmysql libmysql/Makefile.shared: my_create() now depends on my_sync() so my_sync is needed for libmysql mysys/my_create.c: my_create() can now sync the directory if asked for mysys/my_delete.c: my_delete() can now sync the directory if asked for mysys/my_open.c: it was a bug that my_close() is done on fd but a positive fd would still be returned, by my_register_filename(). mysys/my_rename.c: my_rename() can now sync the two directories (the one of "from" and the one of "to") if asked for. mysys/my_sync.c: On recent Mac OS X, fcntl(F_FULLFSYNC) is recommended over fsync() (see "man fsync" on Mac OS X 10.3). my_sync_dir(): to sync a directory after a file creation/deletion/ renaming; can be called directly or via MY_SYNC_DIR in my_create/ my_delete/my_rename(). No-op except on Linux (see "man fsync" on Linux). my_sync_dir_from_file(): same as above, just more practical when the caller has a file name but no directory name ready. Should the #warning even be a #error? I mean do we want to release binaries which don't guarantee any durability? sql/log.cc: a TODO for the future. sql/unireg.cc: If we sync the frm it makes sense to also sync its creation in the directory. storage/maria/ma_control_file.c: control file is vital, try to make it to disk --- mysys/my_create.c | 3 +++ mysys/my_delete.c | 2 ++ mysys/my_open.c | 1 + mysys/my_rename.c | 5 ++++ mysys/my_sync.c | 74 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 85 insertions(+) (limited to 'mysys') diff --git a/mysys/my_create.c b/mysys/my_create.c index e1e32b50842..bb3801691a5 100644 --- a/mysys/my_create.c +++ b/mysys/my_create.c @@ -53,6 +53,9 @@ File my_create(const char *FileName, int CreateFlags, int access_flags, fd = open(FileName, access_flags); #endif + if ((MyFlags & MY_SYNC_DIR) && (fd >=0)) + my_sync_dir_by_file(FileName, MyFlags); + DBUG_RETURN(my_register_filename(fd, FileName, FILE_BY_CREATE, EE_CANTCREATEFILE, MyFlags)); } /* my_create */ diff --git a/mysys/my_delete.c b/mysys/my_delete.c index de2a9814a56..6d90caa48ed 100644 --- a/mysys/my_delete.c +++ b/mysys/my_delete.c @@ -30,6 +30,8 @@ int my_delete(const char *name, myf MyFlags) my_error(EE_DELETE,MYF(ME_BELL+ME_WAITTANG+(MyFlags & ME_NOINPUT)), name,errno); } + else if (MyFlags & MY_SYNC_DIR) + my_sync_dir_by_file(name, MyFlags); DBUG_RETURN(err); } /* my_delete */ diff --git a/mysys/my_open.c b/mysys/my_open.c index ab2f7c9ff27..344e9c0a43b 100644 --- a/mysys/my_open.c +++ b/mysys/my_open.c @@ -162,6 +162,7 @@ File my_register_filename(File fd, const char *FileName, enum file_type } pthread_mutex_unlock(&THR_LOCK_open); (void) my_close(fd, MyFlags); + fd= -1; my_errno=ENOMEM; } else diff --git a/mysys/my_rename.c b/mysys/my_rename.c index 9c27238cc72..2c9ace6223a 100644 --- a/mysys/my_rename.c +++ b/mysys/my_rename.c @@ -61,5 +61,10 @@ int my_rename(const char *from, const char *to, myf MyFlags) if (MyFlags & (MY_FAE+MY_WME)) my_error(EE_LINK, MYF(ME_BELL+ME_WAITTANG),from,to,my_errno); } + else if (MyFlags & MY_SYNC_DIR) + { + my_sync_dir_by_file(from, MyFlags); + my_sync_dir_by_file(to, MyFlags); + } DBUG_RETURN(error); } /* my_rename */ diff --git a/mysys/my_sync.c b/mysys/my_sync.c index c557324b52c..eaa26ef07a7 100644 --- a/mysys/my_sync.c +++ b/mysys/my_sync.c @@ -49,6 +49,12 @@ int my_sync(File fd, myf my_flags) do { +#if defined(F_FULLFSYNC) + /* Recent Mac OS X versions insist this call is safer than fsync() */ + if (!(res= fcntl(fd, F_FULLFSYNC, 0))) + break; /* ok */ + /* Some fs don't support F_FULLFSYNC and fail above, fallback: */ +#endif #if defined(HAVE_FDATASYNC) res= fdatasync(fd); #elif defined(HAVE_FSYNC) @@ -56,6 +62,7 @@ int my_sync(File fd, myf my_flags) #elif defined(__WIN__) res= _commit(fd); #else +#warning Cannot find a way to sync a file, durability in danger res= 0; /* No sync (strange OS) */ #endif } while (res == -1 && errno == EINTR); @@ -74,3 +81,70 @@ int my_sync(File fd, myf my_flags) DBUG_RETURN(res); } /* my_sync */ + +/* + Force directory information to disk. Only Linux is known to need this to + make sure a file creation/deletion/renaming in(from,to) this directory + durable. + + SYNOPSIS + my_sync_dir() + dir_name the name of the directory + my_flags unused + + RETURN + nothing (the sync may fail sometimes). +*/ +void my_sync_dir(const char *dir_name, myf my_flags __attribute__((unused))) +{ +#ifdef TARGET_OS_LINUX + DBUG_ENTER("my_sync_dir"); + DBUG_PRINT("my",("Dir: '%s' my_flags: %d", dir_name, my_flags)); + File dir_fd; + int error= 0; + /* + Syncing a dir does not work on all filesystems (e.g. tmpfs->EINVAL) : + ignore errors. But print them to the debug log. + */ + if (((dir_fd= my_open(dir_name, O_RDONLY, MYF(0))) >= 0)) + { + if (my_sync(dir_fd, MYF(0))) + { + error= errno; + DBUG_PRINT("info",("my_sync failed errno: %d", error)); + } + my_close(dir_fd, MYF(0)); + } + else + { + error= errno; + DBUG_PRINT("info",("my_open failed errno: %d", error)); + } + DBUG_VOID_RETURN; +#endif +} + + +/* + Force directory information to disk. Only Linux is known to need this to + make sure a file creation/deletion/renaming in(from,to) this directory + durable. + + SYNOPSIS + my_sync_dir_by_file() + file_name the name of a file in the directory + my_flags unused + + RETURN + nothing (the sync may fail sometimes). +*/ +void my_sync_dir_by_file(const char *file_name, + myf my_flags __attribute__((unused))) +{ +#ifdef TARGET_OS_LINUX + char dir_name[FN_REFLEN]; + dirname_part(dir_name, file_name); + return my_sync_dir(dir_name, my_flags); +#endif +} + -- cgit v1.2.1 From adfba203ffd1bd89d74a63ff09de9b9a40fb64d7 Mon Sep 17 00:00:00 2001 From: unknown Date: Wed, 22 Nov 2006 23:38:10 +0100 Subject: Maria - post-review fixes about my_sync_dir(): make it return an error (except if certain errno), test this error in callers. Do a single my_sync_dir() in my_rename() if possible. include/my_global.h: better have a symbol name talking about the feature, use it in the code of the feature, and define the symbol once depending on the platform, rather than have the platform "tested" in the code of the feature several times. include/my_sys.h: my_sync_dir() now can return error mysys/my_create.c: my_sync_dir() can now return an error mysys/my_delete.c: my_sync_dir() can now return an error mysys/my_rename.c: my_sync_dir() can now return an error. Do a single sync if "from" and "to" are the same directory. #ifdef here to not even compile dirname_part() if useless. mysys/my_sync.c: more comments. A compilation error if no way to make my_sync() work (I guess we don't want to ship a binary which cannot do any sync at all; users of strange OSes compile from source and can remove the #error). my_sync_dir() now returns an error (except for certain errno values considered ok; EIO "input/output error" is not ok). sql/unireg.cc: my_sync_dir() now returns an error which must be tested --- mysys/my_create.c | 8 +++++-- mysys/my_delete.c | 5 +++-- mysys/my_rename.c | 12 ++++++++-- mysys/my_sync.c | 67 ++++++++++++++++++++++++++++--------------------------- 4 files changed, 53 insertions(+), 39 deletions(-) (limited to 'mysys') diff --git a/mysys/my_create.c b/mysys/my_create.c index bb3801691a5..0b1bfa12c18 100644 --- a/mysys/my_create.c +++ b/mysys/my_create.c @@ -53,8 +53,12 @@ File my_create(const char *FileName, int CreateFlags, int access_flags, fd = open(FileName, access_flags); #endif - if ((MyFlags & MY_SYNC_DIR) && (fd >=0)) - my_sync_dir_by_file(FileName, MyFlags); + if ((MyFlags & MY_SYNC_DIR) && (fd >=0) && + my_sync_dir_by_file(FileName, MyFlags)) + { + my_close(fd, MyFlags); + fd= -1; + } DBUG_RETURN(my_register_filename(fd, FileName, FILE_BY_CREATE, EE_CANTCREATEFILE, MyFlags)); diff --git a/mysys/my_delete.c b/mysys/my_delete.c index 6d90caa48ed..d56507c36c0 100644 --- a/mysys/my_delete.c +++ b/mysys/my_delete.c @@ -30,8 +30,9 @@ int my_delete(const char *name, myf MyFlags) my_error(EE_DELETE,MYF(ME_BELL+ME_WAITTANG+(MyFlags & ME_NOINPUT)), name,errno); } - else if (MyFlags & MY_SYNC_DIR) - my_sync_dir_by_file(name, MyFlags); + else if ((MyFlags & MY_SYNC_DIR) && + my_sync_dir_by_file(name, MyFlags)) + err= -1; DBUG_RETURN(err); } /* my_delete */ diff --git a/mysys/my_rename.c b/mysys/my_rename.c index 2c9ace6223a..8c2a354324b 100644 --- a/mysys/my_rename.c +++ b/mysys/my_rename.c @@ -63,8 +63,16 @@ int my_rename(const char *from, const char *to, myf MyFlags) } else if (MyFlags & MY_SYNC_DIR) { - my_sync_dir_by_file(from, MyFlags); - my_sync_dir_by_file(to, MyFlags); +#ifdef NEED_EXPLICIT_SYNC_DIR + /* do only the needed amount of syncs: */ + char dir_from[FN_REFLEN], dir_to[FN_REFLEN]; + dirname_part(dir_from, from); + dirname_part(dir_to, to); + if (my_sync_dir(dir_from, MyFlags) || + (strcmp(dir_from, dir_to) && + my_sync_dir(dir_to, MyFlags))) + error= -1; +#endif } DBUG_RETURN(error); } /* my_rename */ diff --git a/mysys/my_sync.c b/mysys/my_sync.c index eaa26ef07a7..ada2ea84414 100644 --- a/mysys/my_sync.c +++ b/mysys/my_sync.c @@ -50,10 +50,14 @@ int my_sync(File fd, myf my_flags) do { #if defined(F_FULLFSYNC) - /* Recent Mac OS X versions insist this call is safer than fsync() */ + /* + In Mac OS X >= 10.3 this call is safer than fsync() (it forces the + disk's cache). + */ if (!(res= fcntl(fd, F_FULLFSYNC, 0))) break; /* ok */ - /* Some fs don't support F_FULLFSYNC and fail above, fallback: */ + /* Some file systems don't support F_FULLFSYNC and fail above: */ + DBUG_PRINT("info",("fcntl(F_FULLFSYNC) failed, falling back")); #endif #if defined(HAVE_FDATASYNC) res= fdatasync(fd); @@ -62,7 +66,7 @@ int my_sync(File fd, myf my_flags) #elif defined(__WIN__) res= _commit(fd); #else -#warning Cannot find a way to sync a file, durability in danger +#error Cannot find a way to sync a file, durability in danger res= 0; /* No sync (strange OS) */ #endif } while (res == -1 && errno == EINTR); @@ -74,7 +78,10 @@ int my_sync(File fd, myf my_flags) my_errno= -1; /* Unknown error */ if ((my_flags & MY_IGNORE_BADFD) && (er == EBADF || er == EINVAL || er == EROFS)) + { + DBUG_PRINT("info", ("ignoring errno %d", er)); res= 0; + } else if (my_flags & MY_WME) my_error(EE_SYNC, MYF(ME_BELL+ME_WAITTANG), my_filename(fd), my_errno); } @@ -83,68 +90,62 @@ int my_sync(File fd, myf my_flags) /* - Force directory information to disk. Only Linux is known to need this to - make sure a file creation/deletion/renaming in(from,to) this directory - durable. + Force directory information to disk. SYNOPSIS my_sync_dir() dir_name the name of the directory - my_flags unused + my_flags flags (MY_WME etc) RETURN - nothing (the sync may fail sometimes). + 0 if ok, !=0 if error */ -void my_sync_dir(const char *dir_name, myf my_flags __attribute__((unused))) +int my_sync_dir(const char *dir_name, myf my_flags) { -#ifdef TARGET_OS_LINUX +#ifdef NEED_EXPLICIT_SYNC_DIR DBUG_ENTER("my_sync_dir"); DBUG_PRINT("my",("Dir: '%s' my_flags: %d", dir_name, my_flags)); File dir_fd; - int error= 0; + int res= 0; /* - Syncing a dir does not work on all filesystems (e.g. tmpfs->EINVAL) : - ignore errors. But print them to the debug log. + Syncing a dir may give EINVAL on tmpfs on Linux, which is ok. + EIO on the other hand is very important. Hence MY_IGNORE_BADFD. */ - if (((dir_fd= my_open(dir_name, O_RDONLY, MYF(0))) >= 0)) + if ((dir_fd= my_open(dir_name, O_RDONLY, MYF(my_flags))) >= 0) { - if (my_sync(dir_fd, MYF(0))) - { - error= errno; - DBUG_PRINT("info",("my_sync failed errno: %d", error)); - } - my_close(dir_fd, MYF(0)); + if (my_sync(dir_fd, MYF(my_flags | MY_IGNORE_BADFD))) + res= 2; + if (my_close(dir_fd, MYF(my_flags))) + res= 3; } else - { - error= errno; - DBUG_PRINT("info",("my_open failed errno: %d", error)); - } - DBUG_VOID_RETURN; + res= 1; + DBUG_RETURN(res); +#else + return 0; #endif } /* - Force directory information to disk. Only Linux is known to need this to - make sure a file creation/deletion/renaming in(from,to) this directory - durable. + Force directory information to disk. SYNOPSIS my_sync_dir_by_file() file_name the name of a file in the directory - my_flags unused + my_flags flags (MY_WME etc) RETURN - nothing (the sync may fail sometimes). + 0 if ok, !=0 if error */ -void my_sync_dir_by_file(const char *file_name, - myf my_flags __attribute__((unused))) +int my_sync_dir_by_file(const char *file_name, myf my_flags) { -#ifdef TARGET_OS_LINUX +#ifdef NEED_EXPLICIT_SYNC_DIR char dir_name[FN_REFLEN]; dirname_part(dir_name, file_name); return my_sync_dir(dir_name, my_flags); +#else + return 0; #endif } -- cgit v1.2.1 From de6f550ec7015fccd044a54c7628cdf8cdc2ed8c Mon Sep 17 00:00:00 2001 From: unknown Date: Mon, 27 Nov 2006 22:01:29 +0100 Subject: WL#3072 Maria Recovery. Making DDLs durable in Maria: Sync table files after CREATE (of non-temp table), DROP, RENAME, TRUNCATE, sync directories and symlinks (for the 3 first commands). Comments for future log records. In ma_rename(), if rename of index works and then rename of data fails, try to undo the rename of the index to leave a consistent state. mysys/my_symlink.c: sync directory after creation of a symbolic link in it, if asked mysys/my_sync.c: comment. Fix for when the file's name has no directory in it. storage/maria/ma_create.c: sync files and links and dirs when creating a non-temporary table. Optimizations of the above to reduce syncs in the common cases: * if index file and data file have the exact same paths (regular and link), sync the directories (of regular and link) only once after creating the last file (the data file). * don't sync the data file if we didn't write to it (always true in our builds). storage/maria/ma_delete_all.c: sync files after truncating a table storage/maria/ma_delete_table.c: sync files and symbolic links and dirs after dropping a table storage/maria/ma_extra.c: a function which wraps the sync of the index file and the sync of the data file. storage/maria/ma_locking.c: using a wrapper function storage/maria/ma_rename.c: sync files and symbolic links and dirs after renaming a table. If rename of index works and then rename of data fails, try to undo the rename of the index to leave a consistent state. That is just a try, it may fail... storage/maria/ma_test3.c: warning to not pay attention to this test. storage/maria/maria_def.h: declaration for the function added to ma_extra.c --- mysys/my_symlink.c | 2 ++ mysys/my_sync.c | 8 ++++++-- 2 files changed, 8 insertions(+), 2 deletions(-) (limited to 'mysys') diff --git a/mysys/my_symlink.c b/mysys/my_symlink.c index 7be3fcd36f0..b3d68992578 100644 --- a/mysys/my_symlink.c +++ b/mysys/my_symlink.c @@ -85,6 +85,8 @@ int my_symlink(const char *content, const char *linkname, myf MyFlags) if (MyFlags & MY_WME) my_error(EE_CANT_SYMLINK, MYF(0), linkname, content, errno); } + else if ((MyFlags & MY_SYNC_DIR) && my_sync_dir_by_file(linkname, MyFlags)) + result= -1; DBUG_RETURN(result); #endif /* HAVE_READLINK */ } diff --git a/mysys/my_sync.c b/mysys/my_sync.c index ada2ea84414..26bee5a293f 100644 --- a/mysys/my_sync.c +++ b/mysys/my_sync.c @@ -52,7 +52,7 @@ int my_sync(File fd, myf my_flags) #if defined(F_FULLFSYNC) /* In Mac OS X >= 10.3 this call is safer than fsync() (it forces the - disk's cache). + disk's cache and guarantees ordered writes). */ if (!(res= fcntl(fd, F_FULLFSYNC, 0))) break; /* ok */ @@ -89,6 +89,7 @@ int my_sync(File fd, myf my_flags) } /* my_sync */ +static const char cur_dir_name[]= {FN_CURLIB, 0}; /* Force directory information to disk. @@ -107,11 +108,14 @@ int my_sync_dir(const char *dir_name, myf my_flags) DBUG_PRINT("my",("Dir: '%s' my_flags: %d", dir_name, my_flags)); File dir_fd; int res= 0; + const char *correct_dir_name; + /* Sometimes the path does not contain an explicit directory */ + correct_dir_name= (dir_name[0] == 0) ? cur_dir_name : dir_name; /* Syncing a dir may give EINVAL on tmpfs on Linux, which is ok. EIO on the other hand is very important. Hence MY_IGNORE_BADFD. */ - if ((dir_fd= my_open(dir_name, O_RDONLY, MYF(my_flags))) >= 0) + if ((dir_fd= my_open(correct_dir_name, O_RDONLY, MYF(my_flags))) >= 0) { if (my_sync(dir_fd, MYF(my_flags | MY_IGNORE_BADFD))) res= 2; -- cgit v1.2.1 From 7a5d0c2e6efbffd95998b723e52a22de51c21dae Mon Sep 17 00:00:00 2001 From: unknown Date: Mon, 27 Nov 2006 22:09:06 +0100 Subject: Maria - Fixes to help some pushbuild hosts compile. include/Makefile.am: put my_bit.h in the tarball (need it for mf_keycache.c) mysys/lf_alloc-pin.c: // in .c file mysys/lf_hash.c: // in .c file --- mysys/lf_alloc-pin.c | 2 +- mysys/lf_hash.c | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) (limited to 'mysys') diff --git a/mysys/lf_alloc-pin.c b/mysys/lf_alloc-pin.c index 43055766c3e..d0fa29ddaaf 100644 --- a/mysys/lf_alloc-pin.c +++ b/mysys/lf_alloc-pin.c @@ -1,4 +1,4 @@ -// TODO multi-pinbox +#warning TODO multi-pinbox /* Copyright (C) 2000 MySQL AB This program is free software; you can redistribute it and/or modify diff --git a/mysys/lf_hash.c b/mysys/lf_hash.c index ff0eb8326d5..7e61ef690c6 100644 --- a/mysys/lf_hash.c +++ b/mysys/lf_hash.c @@ -85,7 +85,8 @@ retry: { if (!cursor->curr) return 0; - do { // XXX or goto retry ? + do { +#warning XXX or goto retry ? link= cursor->curr->link; cursor->next= PTR(link); _lf_pin(pins, 0, cursor->next); -- cgit v1.2.1 From b72903fe54cf83c3dcbec7e3532e23bf946249ba Mon Sep 17 00:00:00 2001 From: unknown Date: Sun, 3 Dec 2006 17:06:27 +0100 Subject: Maria: * merging changes done to the key cache since May 2006 into Maria * enabling two small enough page cache's unit tests by default * fix to have non-buffered output in unit tests (to not have a false timeout killing in pushbuild) (patch given by Serg) * removing some warnings of gcc -ansi include/lf.h: getting rid of "warning: ISO C does not allow extra `;' outside of a function" (gcc -ansi) mysys/lf_hash.c: getting rid of "warning: ISO C does not allow extra `;' outside of a function" (gcc -ansi) mysys/mf_pagecache.c: Cosmetic changes to minimize the diff with the key cache. #define PAGECACHE_DEBUG_LOG is not needed (just define PAGECACHE_DEBUG if you want) (this change removes "warning: 'pagecache_debug_print' declared `static' but never defined"). Importing changes made to mf_keycache.c since May 2006, into the page cache. Disabling online resizing in the page cache. Fix for "warning: ISO C90 forbids mixed declarations and code". unittest/mysys/Makefile.am: Of the page cache's unit tests, two are small enough to run on pushbuild, renaming them to a -t suffix. unittest/mytap/tap.c: pushbuild kills a test after seeing no output from it for 10 minutes; so we set the mytap framework to not buffer output (patch given by Serg) so that output is seen more frequently and not "all at the end of the test". --- mysys/lf_hash.c | 2 +- mysys/mf_pagecache.c | 66 ++++++++++++++++++++++++++++------------------------ 2 files changed, 37 insertions(+), 31 deletions(-) (limited to 'mysys') diff --git a/mysys/lf_hash.c b/mysys/lf_hash.c index 7e61ef690c6..e13c5f64f54 100644 --- a/mysys/lf_hash.c +++ b/mysys/lf_hash.c @@ -28,7 +28,7 @@ #include #include -LF_REQUIRE_PINS(3); +LF_REQUIRE_PINS(3) /* An element of the list */ typedef struct { diff --git a/mysys/mf_pagecache.c b/mysys/mf_pagecache.c index 4b92f68d9bf..fe2d827ab50 100755 --- a/mysys/mf_pagecache.c +++ b/mysys/mf_pagecache.c @@ -43,9 +43,9 @@ #include #include "my_static.h" #include +#include #include #include -#include /* @@ -86,8 +86,6 @@ #define PAGECACHE_DEBUG_LOG "my_pagecache_debug.log" */ -#define PAGECACHE_DEBUG_LOG "my_pagecache_debug.log" - /* In key cache we have external raw locking here we use SERIALIZED_READ_FROM_CACHE to avoid problem of reading @@ -115,14 +113,6 @@ /* TODO: put it to my_static.c */ my_bool my_disable_flush_pagecache_blocks= 0; -#if defined(MSDOS) && !defined(M_IC80386) -/* we nead much memory */ -#undef my_malloc_lock -#undef my_free_lock -#define my_malloc_lock(A,B) halloc((long) (A/IO_SIZE),IO_SIZE) -#define my_free_lock(A,B) hfree(A) -#endif /* defined(MSDOS) && !defined(M_IC80386) */ - #define STRUCT_PTR(TYPE, MEMBER, a) \ (TYPE *) ((char *) (a) - offsetof(TYPE, MEMBER)) @@ -314,8 +304,8 @@ my_bool info_check_pin(PAGECACHE_BLOCK_LINK *block, enum pagecache_page_pin mode) { struct st_my_thread_var *thread= my_thread_var; - DBUG_ENTER("info_check_pin"); PAGECACHE_PIN_INFO *info= info_find(block->pin_list, thread); + DBUG_ENTER("info_check_pin"); if (info) { if (mode == PAGECACHE_PIN_LEFT_UNPINNED) @@ -372,10 +362,10 @@ my_bool info_check_lock(PAGECACHE_BLOCK_LINK *block, enum pagecache_page_pin pin) { struct st_my_thread_var *thread= my_thread_var; - DBUG_ENTER("info_check_lock"); PAGECACHE_LOCK_INFO *info= (PAGECACHE_LOCK_INFO *) info_find((PAGECACHE_PIN_INFO *) block->lock_list, thread); + DBUG_ENTER("info_check_lock"); switch(lock) { case PAGECACHE_LOCK_LEFT_UNLOCKED: @@ -605,15 +595,9 @@ uint pagecache_fwrite(PAGECACHE *pagecache, next_power(value) is 2 at the power of (1+floor(log2(value))); e.g. next_power(2)=4, next_power(3)=4. */ -static uint next_power(uint value) +static inline uint next_power(uint value) { - uint old_value= 1; - while (value) - { - old_value= value; - value&= value-1; - } - return (old_value << 1); + return (uint) my_round_up_to_next_power((uint32) value) << 1; } @@ -834,15 +818,24 @@ err: The function starts the operation only when all other threads performing operations with the key cache let her to proceed (when cnt_for_resize=0). -*/ + Before being usable, this function needs: + - to receive fixes for BUG#17332 "changing key_buffer_size on a running + server can crash under load" similar to those done to the key cache + - to have us (Sanja) look at the additional constraints placed on + resizing, due to the page locking specific to this page cache. + So we disable it for now. +*/ +#if 0 /* keep disabled until code is fixed see above !! */ int resize_pagecache(PAGECACHE *pagecache, my_size_t use_mem, uint division_limit, uint age_threshold) { int blocks; +#ifdef THREAD struct st_my_thread_var *thread; PAGECACHE_WQUEUE *wqueue; +#endif DBUG_ENTER("resize_pagecache"); if (!pagecache->inited) @@ -909,6 +902,7 @@ finish: pagecache_pthread_mutex_unlock(&pagecache->cache_lock); DBUG_RETURN(blocks); } +#endif /* 0 */ /* @@ -1504,8 +1498,12 @@ static inline void remove_reader(PAGECACHE_BLOCK_LINK *block) { DBUG_ENTER("remove_reader"); BLOCK_INFO(block); +#ifdef THREAD if (! --block->hash_link->requests && block->condvar) pagecache_pthread_cond_signal(block->condvar); +#else + --block->hash_link->requests; +#endif DBUG_VOID_RETURN; } @@ -1515,7 +1513,8 @@ static inline void remove_reader(PAGECACHE_BLOCK_LINK *block) signals on its termination */ -static inline void wait_for_readers(PAGECACHE *pagecache, +static inline void wait_for_readers(PAGECACHE *pagecache + __attribute__((unused)), PAGECACHE_BLOCK_LINK *block) { #ifdef THREAD @@ -1684,7 +1683,6 @@ static PAGECACHE_HASH_LINK *get_hash_link(PAGECACHE *pagecache, { reg1 PAGECACHE_HASH_LINK *hash_link; PAGECACHE_HASH_LINK **start; - PAGECACHE_PAGE page; KEYCACHE_DBUG_PRINT("get_hash_link", ("fd: %u pos: %lu", (uint) file->file, (ulong) pageno)); @@ -1710,6 +1708,7 @@ restart: #ifdef THREAD /* Wait for a free hash link */ struct st_my_thread_var *thread= my_thread_var; + PAGECACHE_PAGE page; KEYCACHE_DBUG_PRINT("get_hash_link", ("waiting")); page.file= *file; page.pageno= pageno; @@ -2053,8 +2052,10 @@ restart: /* Remove the hash link for this page from the hash table */ unlink_hash(pagecache, block->hash_link); /* All pending requests for this page must be resubmitted */ +#ifdef THREAD if (block->wqueue[COND_FOR_SAVED].last_thread) release_queue(&block->wqueue[COND_FOR_SAVED]); +#endif } link_to_file_list(pagecache, block, file, (my_bool)(block->hash_link ? 1 : 0)); @@ -2209,10 +2210,10 @@ my_bool pagecache_lock_block(PAGECACHE *pagecache, BLOCK_INFO(block); while (block->status & BLOCK_WRLOCK) { - DBUG_PRINT("info", ("fail to lock, waiting...")); /* Lock failed we will wait */ #ifdef THREAD struct st_my_thread_var *thread= my_thread_var; + DBUG_PRINT("info", ("fail to lock, waiting...")); add_to_queue(&block->wqueue[COND_FOR_WRLOCK], thread); dec_counter_for_resize_op(pagecache); do @@ -2403,8 +2404,10 @@ static void read_block(PAGECACHE *pagecache, KEYCACHE_DBUG_PRINT("read_block", ("primary request: new page in cache")); /* Signal that all pending requests for this page now can be processed */ +#ifdef THREAD if (block->wqueue[COND_FOR_REQUESTED].last_thread) release_queue(&block->wqueue[COND_FOR_REQUESTED]); +#endif } else { @@ -3210,9 +3213,11 @@ restart: block->status= (BLOCK_READ | (block->status & BLOCK_WRLOCK)); KEYCACHE_DBUG_PRINT("key_cache_insert", ("primary request: new page in cache")); +#ifdef THREAD /* Signal that all pending requests for this now can be processed. */ if (block->wqueue[COND_FOR_REQUESTED].last_thread) release_queue(&block->wqueue[COND_FOR_REQUESTED]); +#endif } } else @@ -3223,11 +3228,9 @@ restart: if (block->status & BLOCK_CHANGED) link_to_file_list(pagecache, block, &block->hash_link->file, 1); } - else - { - if (! (block->status & BLOCK_CHANGED)) + else if (! (block->status & BLOCK_CHANGED)) link_to_changed_list(pagecache, block); - } + if (! (block->status & BLOCK_ERROR)) { bmove512(block->buffer, buff, pagecache->block_size); @@ -3342,9 +3345,11 @@ static void free_block(PAGECACHE *pagecache, PAGECACHE_BLOCK_LINK *block) /* Keep track of the number of currently unused blocks. */ pagecache->blocks_unused++; +#ifdef THREAD /* All pending requests for this page must be resubmitted. */ if (block->wqueue[COND_FOR_SAVED].last_thread) release_queue(&block->wqueue[COND_FOR_SAVED]); +#endif } @@ -3438,12 +3443,14 @@ static int flush_cached_blocks(PAGECACHE *pagecache, if (!last_errno) last_errno= errno ? errno : -1; } +#ifdef THREAD /* Let to proceed for possible waiting requests to write to the block page. It might happen only during an operation to resize the key cache. */ if (block->wqueue[COND_FOR_SAVED].last_thread) release_queue(&block->wqueue[COND_FOR_SAVED]); +#endif /* type will never be FLUSH_IGNORE_CHANGED here */ if (! (type == FLUSH_KEEP || type == FLUSH_FORCE_WRITE)) { @@ -3970,7 +3977,6 @@ static void ___pagecache_pthread_mutex_unlock(pthread_mutex_t *mutex) { KEYCACHE_THREAD_TRACE_END(""); pthread_mutex_unlock(mutex); - return; } -- cgit v1.2.1 From 5750daa4cf73eba0a6f1d9909844bf106228ae80 Mon Sep 17 00:00:00 2001 From: unknown Date: Mon, 4 Dec 2006 15:31:04 +0100 Subject: Maria - post-review change of "fixes for gcc -ansi". And reducing the time taken by my_atomic-t. include/lf.h: fix to be able to add a ";" at the end of the macro's invokation (removing it here, removes a warning from "gcc -ansi" about a standalone ";"). mysys/lf_hash.c: ";" is ok now after LF_REQUIRE_PINS mysys/mf_pagecache.c: comment fix unittest/mysys/my_atomic-t.c: decreasing number of iterations to make test take less time. --- mysys/lf_hash.c | 2 +- mysys/mf_pagecache.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'mysys') diff --git a/mysys/lf_hash.c b/mysys/lf_hash.c index e13c5f64f54..7e61ef690c6 100644 --- a/mysys/lf_hash.c +++ b/mysys/lf_hash.c @@ -28,7 +28,7 @@ #include #include -LF_REQUIRE_PINS(3) +LF_REQUIRE_PINS(3); /* An element of the list */ typedef struct { diff --git a/mysys/mf_pagecache.c b/mysys/mf_pagecache.c index fe2d827ab50..3e3484d5efb 100755 --- a/mysys/mf_pagecache.c +++ b/mysys/mf_pagecache.c @@ -970,7 +970,7 @@ void change_pagecache_param(PAGECACHE *pagecache, uint division_limit, /* - Flushes and removes page cache from memory + Removes page cache from memory. Does NOT flush pages to disk. SYNOPSIS end_pagecache() -- cgit v1.2.1 From ad29d5520b1ba379a75adc447f301851ff4588a4 Mon Sep 17 00:00:00 2001 From: unknown Date: Thu, 7 Dec 2006 15:23:50 +0100 Subject: Maria - fix for "statement with no effect" warning mysys/lf_hash.c: fix for "statement with no effect" warning storage/maria/lockman.c: fix for "statement with no effect" warning --- mysys/lf_hash.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'mysys') diff --git a/mysys/lf_hash.c b/mysys/lf_hash.c index 7e61ef690c6..b2ad7a93ace 100644 --- a/mysys/lf_hash.c +++ b/mysys/lf_hash.c @@ -96,7 +96,7 @@ retry: cur_keylen= cursor->curr->keylen; if (*cursor->prev != (intptr)cursor->curr) { - LF_BACKOFF; + (void)LF_BACKOFF; goto retry; } if (!DELETED(link)) @@ -118,7 +118,7 @@ retry: _lf_alloc_free(pins, cursor->curr); else { - LF_BACKOFF; + (void)LF_BACKOFF; goto retry; } } -- cgit v1.2.1 From fa05e9c9f426a19f016897ec57c047c277bf52c7 Mon Sep 17 00:00:00 2001 From: unknown Date: Sat, 16 Dec 2006 18:10:47 +0100 Subject: WL#3071 - Maria checkpoint Adding rec_lsn to Maria's page cache. Misc fixes to Checkpoint. mysys/mf_pagecache.c: adding rec_lsn, the LSN when a page first became dirty. It is set when unlocking a page (TODO: should also be set when the unlocking is an implicit part of pagecache_write()). It is reset in link_to_file_list() and free_block() (one of which is used every time we flush a block). It is a ulonglong and not LSN, because its destination is comparisons for which ulonglong is better than a struct. storage/maria/ma_checkpoint.c: misc fixes to Checkpoint (updates now that the transaction manager and the page cache are more known) storage/maria/ma_close.c: an important note for the future. storage/maria/ma_least_recently_dirtied.c: comment --- mysys/mf_pagecache.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) (limited to 'mysys') diff --git a/mysys/mf_pagecache.c b/mysys/mf_pagecache.c index 3e3484d5efb..807a3ea520a 100755 --- a/mysys/mf_pagecache.c +++ b/mysys/mf_pagecache.c @@ -295,6 +295,7 @@ struct st_pagecache_block_link enum pagecache_page_type type; /* type of the block */ uint hits_left; /* number of hits left until promotion */ ulonglong last_hit_time; /* timestamp of the last hit */ + ulonglong rec_lsn; /* LSN when first became dirty */ KEYCACHE_CONDVAR *condvar; /* condition variable for 'no readers' event */ }; @@ -1202,6 +1203,7 @@ static void link_to_file_list(PAGECACHE *pagecache, if (block->status & BLOCK_CHANGED) { block->status&= ~BLOCK_CHANGED; + block->rec_lsn= 0; pagecache->blocks_changed--; pagecache->global_blocks_changed--; } @@ -2509,6 +2511,8 @@ void pagecache_unlock_page(PAGECACHE *pagecache, DBUG_ASSERT(lock == PAGECACHE_LOCK_WRITE_UNLOCK && pin == PAGECACHE_UNPIN); /* TODO: insert LSN writing code */ + DBUG_ASSERT(first_REDO_LSN_for_page > 0); + set_if_bigger(block->rec_lsn, first_REDO_LSN_for_page); } #ifndef DBUG_OFF @@ -2671,6 +2675,8 @@ void pagecache_unlock(PAGECACHE *pagecache, DBUG_ASSERT(lock == PAGECACHE_LOCK_WRITE_UNLOCK && pin == PAGECACHE_UNPIN); /* TODO: insert LSN writing code */ + DBUG_ASSERT(first_REDO_LSN_for_page > 0); + set_if_bigger(block->rec_lsn, first_REDO_LSN_for_page); } #ifndef DBUG_OFF @@ -3012,10 +3018,9 @@ restart: pagecache->blocks_changed--; pagecache->global_blocks_changed--; /* - free_block() will change the status of the block so no need to change - it here. + free_block() will change the status and rec_lsn of the block so no + need to change them here. */ - } /* Cache is locked, so we can relese page before freeing it */ pagecache_make_lock_and_pin(pagecache, block, @@ -3328,6 +3333,7 @@ static void free_block(PAGECACHE *pagecache, PAGECACHE_BLOCK_LINK *block) #ifndef DBUG_OFF block->type= PAGECACHE_EMPTY_PAGE; #endif + block->rec_lsn= 0; KEYCACHE_THREAD_TRACE("free block"); KEYCACHE_DBUG_PRINT("free_block", ("block is freed")); -- cgit v1.2.1 From 7199c905590391f64802913369aab7d288eff4c8 Mon Sep 17 00:00:00 2001 From: unknown Date: Mon, 18 Dec 2006 17:24:02 +0100 Subject: WL#3071 Maria checkpoint - cleanups, simplifications - moving the construction of the "dirty pages table" into the pagecache where it belongs (because it's the pagecache which knows dirty pages). TODO: do the same soon for the "transactions table". - fix for a small bug in the pagecache (decrementation of "changed_blocks") include/pagecache.h: prototype mysys/mf_pagecache.c: m_string.h moves up for LEX_STRING to be known for pagecache.h. In pagecache_delete_page(), we must decrement "blocks_changed" even if we just delete the page without flushing it. A new function pagecache_collect_changed_blocks_with_LSN() (used by the Checkpoint module), which stores information about the changed blocks (a.k.a. "the dirty pages table") into a LEX_STRING. This function is not tested now, it will be when there is a Checkpoint. storage/maria/ma_checkpoint.c: refining the checkpoint code: factoring functions, moving the construction of the "dirty pages table" into mf_pagecache.c (I'll do the same with the construction of the "transactions table" once Serg tells me what's the best way to do it). storage/maria/ma_least_recently_dirtied.c: Simplifying the thread which does background flushing of least-recently-dirtied pages: - in first version that thread will not flush, just do checkpoints - in 2nd version, flushing should re-use existing page cache functions like flush_pagecache_blocks(). unittest/mysys/test_file.h: m_string.h moves up for LEX_STRING to be known in pagecache.h --- mysys/mf_pagecache.c | 180 +++++++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 154 insertions(+), 26 deletions(-) (limited to 'mysys') diff --git a/mysys/mf_pagecache.c b/mysys/mf_pagecache.c index 807a3ea520a..96c855fda0a 100755 --- a/mysys/mf_pagecache.c +++ b/mysys/mf_pagecache.c @@ -40,9 +40,9 @@ */ #include "mysys_priv.h" +#include #include #include "my_static.h" -#include #include #include #include @@ -295,7 +295,7 @@ struct st_pagecache_block_link enum pagecache_page_type type; /* type of the block */ uint hits_left; /* number of hits left until promotion */ ulonglong last_hit_time; /* timestamp of the last hit */ - ulonglong rec_lsn; /* LSN when first became dirty */ + LSN rec_lsn; /* LSN when first became dirty */ KEYCACHE_CONDVAR *condvar; /* condition variable for 'no readers' event */ }; @@ -2988,33 +2988,35 @@ restart: goto restart; } - if (block->status & BLOCK_CHANGED && flush) + if (block->status & BLOCK_CHANGED) { - /* The block contains a dirty page - push it out of the cache */ - - KEYCACHE_DBUG_PRINT("find_key_block", ("block is dirty")); - - pagecache_pthread_mutex_unlock(&pagecache->cache_lock); - /* - The call is thread safe because only the current - thread might change the block->hash_link value - */ - DBUG_ASSERT(block->pins == 1); - error= pagecache_fwrite(pagecache, - &block->hash_link->file, - block->buffer, - block->hash_link->pageno, - block->type, - MYF(MY_NABP | MY_WAIT_IF_FULL)); - pagecache_pthread_mutex_lock(&pagecache->cache_lock); - pagecache->global_cache_write++; - - if (error) + if (flush) { - block->status|= BLOCK_ERROR; - goto err; + /* The block contains a dirty page - push it out of the cache */ + + KEYCACHE_DBUG_PRINT("find_key_block", ("block is dirty")); + + pagecache_pthread_mutex_unlock(&pagecache->cache_lock); + /* + The call is thread safe because only the current + thread might change the block->hash_link value + */ + DBUG_ASSERT(block->pins == 1); + error= pagecache_fwrite(pagecache, + &block->hash_link->file, + block->buffer, + block->hash_link->pageno, + block->type, + MYF(MY_NABP | MY_WAIT_IF_FULL)); + pagecache_pthread_mutex_lock(&pagecache->cache_lock); + pagecache->global_cache_write++; + + if (error) + { + block->status|= BLOCK_ERROR; + goto err; + } } - pagecache->blocks_changed--; pagecache->global_blocks_changed--; /* @@ -3793,6 +3795,132 @@ int reset_key_cache_counters(const char *name, PAGECACHE *key_cache) } +/* + Allocates a buffer and stores in it some information about all dirty pages + of type PAGECACHE_LSN_PAGE. + + SYNOPSIS + pagecache_collect_changed_blocks_with_LSN() + pagecache pointer to the page cache + str (OUT) pointer to a LEX_STRING where the allocated buffer, and + its size, will be put + max_lsn (OUT) pointer to a LSN where the maximum rec_lsn of all + relevant dirty pages will be put + + DESCRIPTION + Does the allocation because the caller cannot know the size itself. + Memory freeing is done by the caller. + Ignores all pages of another type than PAGECACHE_LSN_PAGE, because they + are not interesting for a checkpoint record. + The caller has the intention of doing checkpoints. + + RETURN + 0 on success + 1 on error +*/ +my_bool pagecache_collect_changed_blocks_with_LSN(PAGECACHE *pagecache, + LEX_STRING *str, + LSN *max_lsn) +{ + my_bool error; + ulong stored_LRD_size= 0; + uint file_hash; + char *ptr; + DBUG_ENTER("pagecache_collect_changed_blocks_with_LSN"); + + *max_lsn= 0; + /* + We lock the entire cache but will be quick, just reading/writing a few MBs + of memory at most. + When we enter here, we must be sure that no "first_in_switch" situation + is happening or will happen (either we have to get rid of + first_in_switch in the code or, first_in_switch has to increment a + "danger" counter for this function to know it has to wait). TODO. + */ + pagecache_pthread_mutex_lock(&pagecache->cache_lock); + + /* Count how many dirty pages are interesting */ + for (file_hash= 0; file_hash < PAGECACHE_CHANGED_BLOCKS_HASH; file_hash++) + { + PAGECACHE_BLOCK_LINK *block; + for (block= pagecache->changed_blocks[file_hash] ; + block; + block= block->next_changed) + { + /* + Q: is there somthing subtle with block->hash_link: can it be NULL? + does it have to be == hash_link->block... ? + */ + DBUG_ASSERT(block->hash_link != NULL); + DBUG_ASSERT(block->status & BLOCK_CHANGED); + if (block->type != PAGECACHE_LSN_PAGE) + continue; /* no need to store it */ + /* + In the current pagecache, rec_lsn is not set correctly: + 1) it is set on pagecache_unlock(), too late (a page is dirty + (BLOCK_CHANGED) since the first pagecache_write()). So in this + scenario: + thread1: thread2: + write_REDO + pagecache_write() checkpoint : reclsn not known + pagecache_unlock(sets rec_lsn) + commit + crash, + at recovery we will wrongly skip the REDO. It also affects the + low-water mark's computation. + 2) sometimes the unlocking can be an implicit action of + pagecache_write(), without any call to pagecache_unlock(), then + rec_lsn is not set. + 1) and 2) are critical problems. + TODO: fix this when Monty has explained how he writes BLOB pages. + */ + if (0 == block->rec_lsn) + { + DBUG_ASSERT(0); + goto err; + } + stored_LRD_size++; + } + } + + str->length= 8+(4+4+8)*stored_LRD_size; + if (NULL == (str->str= my_malloc(str->length, MYF(MY_WME)))) + goto err; + ptr= str->str; + int8store(ptr, stored_LRD_size); + ptr+= 8; + if (0 == stored_LRD_size) + goto end; + for (file_hash= 0; file_hash < PAGECACHE_CHANGED_BLOCKS_HASH; file_hash++) + { + PAGECACHE_BLOCK_LINK *block; + for (block= pagecache->changed_blocks[file_hash] ; + block; + block= block->next_changed) + { + if (block->type != PAGECACHE_LSN_PAGE) + continue; /* no need to store it in the checkpoint record */ + DBUG_ASSERT((4 == sizeof(block->hash_link->file.file)) && + (4 == sizeof(block->hash_link->pageno))); + int4store(ptr, block->hash_link->file.file); + ptr+= 4; + int4store(ptr, block->hash_link->pageno); + ptr+= 4; + int8store(ptr, (ulonglong)block->rec_lsn); + ptr+= 8; + set_if_bigger(*max_lsn, block->rec_lsn); + } + } + error= 0; + goto end; +err: + error= 1; +end: + pagecache_pthread_mutex_unlock(&pagecache->cache_lock); + DBUG_RETURN(error); +} + + #ifndef DBUG_OFF /* Test if disk-cache is ok -- cgit v1.2.1 From 714f3b73e513f2d12fb45e8256fa6299e60cd5a2 Mon Sep 17 00:00:00 2001 From: unknown Date: Wed, 20 Dec 2006 18:58:35 +0100 Subject: merge of recent MyISAM changes into Maria. Only failure is ndb_restore - could have been worse!! include/pagecache.h: LSN->lsn mysys/mf_keycache.c: page_status is int mysys/mf_pagecache.c: merge of recent key cache changes sql/mysqld.cc: post-merge fixes sql/set_var.cc: post-merge fixes storage/maria/ha_maria.cc: merge of recent MyISAM changes into Maria storage/maria/ha_maria.h: merge of recent MyISAM changes into Maria storage/maria/ma_close.c: merge of recent MyISAM changes into Maria storage/maria/ma_create.c: merge of recent MyISAM changes into Maria storage/maria/ma_delete.c: merge of recent MyISAM changes into Maria storage/maria/ma_dynrec.c: merge of recent MyISAM changes into Maria storage/maria/ma_ft_boolean_search.c: merge of recent MyISAM changes into Maria storage/maria/ma_key.c: merge of recent MyISAM changes into Maria storage/maria/ma_keycache.c: merge of recent MyISAM changes into Maria storage/maria/ma_open.c: merge of recent MyISAM changes into Maria storage/maria/ma_page.c: merge of recent MyISAM changes into Maria storage/maria/ma_rsamepos.c: merge of recent MyISAM changes into Maria storage/maria/ma_statrec.c: merge of recent MyISAM changes into Maria storage/maria/ma_unique.c: merge of recent MyISAM changes into Maria storage/maria/maria_chk.c: merge of recent MyISAM changes into Maria storage/maria/maria_pack.c: merge of recent MyISAM changes into Maria storage/myisam/myisampack.c: compiler warning --- mysys/mf_keycache.c | 4 ++-- mysys/mf_pagecache.c | 54 +++++++++++++++++++++++++++------------------------- 2 files changed, 30 insertions(+), 28 deletions(-) (limited to 'mysys') diff --git a/mysys/mf_keycache.c b/mysys/mf_keycache.c index 111a28d01a4..12be01184b6 100644 --- a/mysys/mf_keycache.c +++ b/mysys/mf_keycache.c @@ -1642,9 +1642,9 @@ restart: KEYCACHE_DBUG_ASSERT(page_status != -1); *page_st=page_status; KEYCACHE_DBUG_PRINT("find_key_block", - ("fd: %d pos: %lu block->status: %u page_status: %u", + ("fd: %d pos: %lu block->status: %u page_status: %d", file, (ulong) filepos, block->status, - (uint) page_status)); + page_status)); #if !defined(DBUG_OFF) && defined(EXTRA_DEBUG) DBUG_EXECUTE("check_keycache2", diff --git a/mysys/mf_pagecache.c b/mysys/mf_pagecache.c index 96c855fda0a..5f4e8d1a97d 100755 --- a/mysys/mf_pagecache.c +++ b/mysys/mf_pagecache.c @@ -759,9 +759,9 @@ int init_pagecache(PAGECACHE *pagecache, my_size_t use_mem, DBUG_PRINT("exit", ("disk_blocks: %d block_root: 0x%lx hash_entries: %d\ hash_root: 0x%lx hash_links: %d hash_link_root: 0x%lx", - pagecache->disk_blocks, pagecache->block_root, - pagecache->hash_entries, pagecache->hash_root, - pagecache->hash_links, pagecache->hash_link_root)); + pagecache->disk_blocks, (long) pagecache->block_root, + pagecache->hash_entries, (long) pagecache->hash_root, + pagecache->hash_links, (long) pagecache->hash_link_root)); bzero((gptr) pagecache->changed_blocks, sizeof(pagecache->changed_blocks[0]) * PAGECACHE_CHANGED_BLOCKS_HASH); @@ -985,7 +985,7 @@ void change_pagecache_param(PAGECACHE *pagecache, uint division_limit, void end_pagecache(PAGECACHE *pagecache, my_bool cleanup) { DBUG_ENTER("end_pagecache"); - DBUG_PRINT("enter", ("key_cache: 0x%lx", pagecache)); + DBUG_PRINT("enter", ("key_cache: 0x%lx", (long) pagecache)); if (!pagecache->inited) DBUG_VOID_RETURN; @@ -1004,7 +1004,7 @@ void end_pagecache(PAGECACHE *pagecache, my_bool cleanup) pagecache->blocks_changed= 0; } - DBUG_PRINT("status", ("used: %d changed: %d w_requests: %lu " + DBUG_PRINT("status", ("used: %lu changed: %lu w_requests: %lu " "writes: %lu r_requests: %lu reads: %lu", pagecache->blocks_used, pagecache->global_blocks_changed, (ulong) pagecache->global_cache_w_requests, @@ -1466,7 +1466,7 @@ static void unreg_request(PAGECACHE *pagecache, if (block->temperature == BLOCK_WARM) pagecache->warm_blocks--; block->temperature= BLOCK_HOT; - KEYCACHE_DBUG_PRINT("unreg_request", ("#warm_blocks=%u", + KEYCACHE_DBUG_PRINT("unreg_request", ("#warm_blocks: %lu", pagecache->warm_blocks)); } link_block(pagecache, block, hot, (my_bool)at_end); @@ -1485,7 +1485,7 @@ static void unreg_request(PAGECACHE *pagecache, pagecache->warm_blocks++; block->temperature= BLOCK_WARM; } - KEYCACHE_DBUG_PRINT("unreg_request", ("#warm_blocks=%u", + KEYCACHE_DBUG_PRINT("unreg_request", ("#warm_blocks: %lu", pagecache->warm_blocks)); } } @@ -1789,11 +1789,11 @@ static PAGECACHE_BLOCK_LINK *find_key_block(PAGECACHE *pagecache, DBUG_ENTER("find_key_block"); KEYCACHE_THREAD_TRACE("find_key_block:begin"); - DBUG_PRINT("enter", ("fd: %u pos %lu wrmode: %lu", - (uint) file->file, (ulong) pageno, (uint) wrmode)); - KEYCACHE_DBUG_PRINT("find_key_block", ("fd: %u pos: %lu wrmode: %lu", - (uint) file->file, (ulong) pageno, - (uint) wrmode)); + DBUG_PRINT("enter", ("fd: %d pos: %lu wrmode: %d", + file->file, (ulong) pageno, wrmode)); + KEYCACHE_DBUG_PRINT("find_key_block", ("fd: %d pos: %lu wrmode: %d", + file->file, (ulong) pageno, + wrmode)); #if !defined(DBUG_OFF) && defined(EXTRA_DEBUG) DBUG_EXECUTE("check_pagecache", test_key_cache(pagecache, "start of find_key_block", 0);); @@ -2103,14 +2103,14 @@ restart: KEYCACHE_DBUG_ASSERT(page_status != -1); *page_st=page_status; DBUG_PRINT("info", - ("block: 0x%lx fd: %u pos %lu block->status %u page_status %lu", + ("block: 0x%lx fd: %u pos %lu block->status %u page_status %u", (ulong) block, (uint) file->file, (ulong) pageno, block->status, (uint) page_status)); KEYCACHE_DBUG_PRINT("find_key_block", - ("block: 0x%lx fd: %u pos %lu block->status %u page_status %lu", + ("block: 0x%lx fd: %d pos: %lu block->status: %u page_status: %d", (ulong) block, - (uint) file->file, (ulong) pageno, block->status, - (uint) page_status)); + file->file, (ulong) pageno, block->status, + page_status)); #if !defined(DBUG_OFF) && defined(EXTRA_DEBUG) DBUG_EXECUTE("check_pagecache", @@ -3502,7 +3502,7 @@ static int flush_pagecache_blocks_int(PAGECACHE *pagecache, PAGECACHE_BLOCK_LINK *cache_buff[FLUSH_CACHE],**cache; int last_errno= 0; DBUG_ENTER("flush_pagecache_blocks_int"); - DBUG_PRINT("enter",("file: %d blocks_used: %d blocks_changed: %d", + DBUG_PRINT("enter",("file: %d blocks_used: %lu blocks_changed: %lu", file->file, pagecache->blocks_used, pagecache->blocks_changed)); #if !defined(DBUG_OFF) && defined(EXTRA_DEBUG) @@ -3714,7 +3714,7 @@ int flush_pagecache_blocks(PAGECACHE *pagecache, { int res; DBUG_ENTER("flush_pagecache_blocks"); - DBUG_PRINT("enter", ("pagecache: 0x%lx", pagecache)); + DBUG_PRINT("enter", ("pagecache: 0x%lx", (long) pagecache)); if (pagecache->disk_blocks <= 0) DBUG_RETURN(0); @@ -3800,7 +3800,7 @@ int reset_key_cache_counters(const char *name, PAGECACHE *key_cache) of type PAGECACHE_LSN_PAGE. SYNOPSIS - pagecache_collect_changed_blocks_with_LSN() + pagecache_collect_changed_blocks_with_lsn() pagecache pointer to the page cache str (OUT) pointer to a LEX_STRING where the allocated buffer, and its size, will be put @@ -3809,7 +3809,8 @@ int reset_key_cache_counters(const char *name, PAGECACHE *key_cache) DESCRIPTION Does the allocation because the caller cannot know the size itself. - Memory freeing is done by the caller. + Memory freeing is to be done by the caller (if the "str" member of the + LEX_STRING is not NULL). Ignores all pages of another type than PAGECACHE_LSN_PAGE, because they are not interesting for a checkpoint record. The caller has the intention of doing checkpoints. @@ -3818,17 +3819,18 @@ int reset_key_cache_counters(const char *name, PAGECACHE *key_cache) 0 on success 1 on error */ -my_bool pagecache_collect_changed_blocks_with_LSN(PAGECACHE *pagecache, +my_bool pagecache_collect_changed_blocks_with_lsn(PAGECACHE *pagecache, LEX_STRING *str, LSN *max_lsn) { my_bool error; - ulong stored_LRD_size= 0; + ulong stored_list_size= 0; uint file_hash; char *ptr; DBUG_ENTER("pagecache_collect_changed_blocks_with_LSN"); *max_lsn= 0; + DBUG_ASSERT(NULL == str->str); /* We lock the entire cache but will be quick, just reading/writing a few MBs of memory at most. @@ -3879,17 +3881,17 @@ my_bool pagecache_collect_changed_blocks_with_LSN(PAGECACHE *pagecache, DBUG_ASSERT(0); goto err; } - stored_LRD_size++; + stored_list_size++; } } - str->length= 8+(4+4+8)*stored_LRD_size; + str->length= 8+(4+4+8)*stored_list_size; if (NULL == (str->str= my_malloc(str->length, MYF(MY_WME)))) goto err; ptr= str->str; - int8store(ptr, stored_LRD_size); + int8store(ptr, stored_list_size); ptr+= 8; - if (0 == stored_LRD_size) + if (0 == stored_list_size) goto end; for (file_hash= 0; file_hash < PAGECACHE_CHANGED_BLOCKS_HASH; file_hash++) { -- cgit v1.2.1 From 345959c660d7401c9dc991a2c572ba145d6e199c Mon Sep 17 00:00:00 2001 From: unknown Date: Thu, 18 Jan 2007 21:38:14 +0200 Subject: Implementation of rows-in-block - Fixes some things missed in myisam->maria port - Moved variables that holds the state for the current row to 'cur_row' - Changed most uchar * to byte * to be able to remove a lot of casts - Removed RAID support - Added CHECK for rows-in-block - Added allocate_dynamic() for easier usage of dynamic rows when we know how many entries we will need - Reorder columns after CREATE for more optimal row storage (for rows-in-block) - Removed flag 'RRND_PRESERVER_LASTINX' (not needed) - Extended ma_test_all.sh to test more completely all row formats - New structs and variables to hold rows-in-block and bitmap information - Added org_data_file_type in header to allow easy restore of old record format when doing maria_pack / maria_chk -u - More virtual functions to handle different row types - Pointer to row is now MARIA_RECORD_POS instead of my_off_t - New header signature for MARIA index files - Fixed bugs in ma_test1.c and ma_test2.c - All key and row blocks are now of same size - We now only have one link chain for deleted key blocks include/m_string.h: Define bzero_if_purify include/maria.h: Implementation of rows-in-block include/my_base.h: Implementation of rows-in-block include/my_handler.h: Cleanup macros Added size_to_store_key_length() include/my_sys.h: Added 'allocate_dynamic()' include/myisamchk.h: Implementation of rows-in-block mysys/array.c: Added allocate_dynamic() mysys/mf_keycache.c: Moved DBUG_ENTER to it's right position mysys/my_pread.c: Ensure my_errno is always set sql/filesort.cc: Fixed some compiler warnings sql/gen_lex_hash.cc: Removed not needed 'inline' sql/ha_maria.cc: Implementation of rows-in-block Fixed compiler warnings sql/mysqld.cc: Fixed setting of wrong variable sql/uniques.cc: Fixed compiler warnings storage/maria/Makefile.am: Implementation of rows-in-block storage/maria/ma_check.c: Removed RAID functions Added support for CHECK of rows-in-blocks rows storage/maria/ma_checksum.c: Implementation of rows-in-block storage/maria/ma_close.c: Implementation of rows-in-block storage/maria/ma_create.c: Implementation of rows-in-block: - Reorder columns - All key blocks are now of same size - Removed old RAID support storage/maria/ma_dbug.c: Implementation of rows-in-block storage/maria/ma_delete.c: Implementation of rows-in-block storage/maria/ma_delete_all.c: Implementation of rows-in-block storage/maria/ma_dynrec.c: info->rec_buff is now allocated through _ma_alloc_buffer() Use new info->cur_row structure storage/maria/ma_extra.c: Implementation of rows-in-block storage/maria/ma_ft_boolean_search.c: Removed compiler warnings Indentation fixes storage/maria/ma_ft_nlq_search.c: Removed compiler warnings Indentation fixes storage/maria/ma_ft_update.c: Removed some casts storage/maria/ma_fulltext.h: Changed pointer type storage/maria/ma_info.c: Implementation of rows-in-block More general _ma_report_error() storage/maria/ma_init.c: Implementation of rows-in-block storage/maria/ma_key.c: Implementation of rows-in-block Removed some casts storage/maria/ma_keycache.c: Fixed DBUG entry storage/maria/ma_locking.c: Implementation of rows-in-block storage/maria/ma_open.c: Implementation of rows-in-block storage/maria/ma_packrec.c: Indentation fixes Changed uchar * to byte * to make it possible to remove some casts storage/maria/ma_page.c: Implementation of rows-in-block storage/maria/ma_range.c: Implementation of rows-in-block storage/maria/ma_rfirst.c: Implementation of rows-in-block storage/maria/ma_rkey.c: Implementation of rows-in-block Indentation fixes storage/maria/ma_rlast.c: Implementation of rows-in-block storage/maria/ma_rnext.c: Implementation of rows-in-block storage/maria/ma_rnext_same.c: Implementation of rows-in-block storage/maria/ma_rprev.c: Implementation of rows-in-block storage/maria/ma_rrnd.c: Implementation of rows-in-block Removed flag 'RRND_PRESERVER_LASTINX', by not resetting lastinx (This is reset by maria_scan_init()) storage/maria/ma_rsame.c: Implementation of rows-in-block storage/maria/ma_rsamepos.c: Implementation of rows-in-block storage/maria/ma_rt_index.c: Implementation of rows-in-block storage/maria/ma_rt_index.h: Implementation of rows-in-block storage/maria/ma_rt_key.c: Implementation of rows-in-block storage/maria/ma_rt_key.h: Implementation of rows-in-block storage/maria/ma_rt_mbr.c: Implementation of rows-in-block storage/maria/ma_rt_mbr.h: Implementation of rows-in-block storage/maria/ma_rt_split.c: Implementation of rows-in-block storage/maria/ma_rt_test.c: Indentation fix storage/maria/ma_scan.c: Implementation of rows-in-block Added 'maria_scan_end()' storage/maria/ma_search.c: Implementation of rows-in-block storage/maria/ma_sort.c: Indentation fixes uchar -> byte to be able to remove some casts storage/maria/ma_sp_defs.h: uchar * -> byte * storage/maria/ma_sp_key.c: uchar * -> byte * storage/maria/ma_sp_test.c: Indentation fixes storage/maria/ma_static.c: New header signature for MARIA storage/maria/ma_statrec.c: int -> my_bool functions my_off_t -> MARIA_RECORD_POS Fixed argument order for _ma_read_static_record() storage/maria/ma_test1.c: Implementation of rows-in-block Fixed some bugs in VARCHAR and BLOB testing storage/maria/ma_test2.c: Implementation of rows-in-block Fixed bug in BLOB testing storage/maria/ma_test3.c: Implementation of rows-in-block storage/maria/ma_test_all.sh: Run all tests with dynamic, static and block row formats (For the moment we skip REPAIR test of rows-in-block as this is not yet implemented) storage/maria/ma_unique.c: Implementation of rows-in-block storage/maria/ma_update.c: Implementation of rows-in-block storage/maria/ma_write.c: Implementation of rows-in-block Write of row is split into two parts, as rows-in-block format require us to do write of row before keys (to get row position) in contrast to all other row formats storage/maria/maria_chk.c: Implementation of rows-in-block storage/maria/maria_def.h: Implementation of rows-in-block - New structs and variables to hold rows-in-block and bitmap information - Added org_data_file_type in header to allow easy restore of old record format when doing maria_pack / maria_chk -u - More virtual functions to handle different row types - Pointer to row is now MARIA_RECORD_POS instead of my_off_t - uchar -> byte for many parameters to avoid casts storage/maria/maria_ftdump.c: Implementation of rows-in-block storage/maria/maria_pack.c: Implementation of rows-in-block storage/myisam/mi_check.c: Added new row types into switch to avoid compiler warnings Added some casts to avoid warnings after changing type of lastkey and buff storage/myisam/mi_create.c: Fix that 'pack_fields' is calculated correctly storage/myisam/mi_rsamepos.c: Implementation of rows-in-block storage/myisam/mi_test2.c: Fixed wrong printf storage/myisam/sort.c: uchar * -> byte * support-files/magic: Added support for Maria files Fided wrong entry's for MyISAM files storage/maria/ma_bitmap.c: New BitKeeper file ``storage/maria/ma_bitmap.c'' storage/maria/ma_blockrec.c: New BitKeeper file ``storage/maria/ma_blockrec.c'' storage/maria/ma_blockrec.h: New BitKeeper file ``storage/maria/ma_blockrec.h'' --- mysys/array.c | 56 +++++++++++++++++++++++++++++++++++++++-------------- mysys/mf_keycache.c | 17 ++++++++++------ mysys/my_pread.c | 6 ++++-- 3 files changed, 56 insertions(+), 23 deletions(-) (limited to 'mysys') diff --git a/mysys/array.c b/mysys/array.c index a50d8b78178..2017bba5b61 100644 --- a/mysys/array.c +++ b/mysys/array.c @@ -61,7 +61,8 @@ my_bool init_dynamic_array(DYNAMIC_ARRAY *array, uint element_size, array->max_element=init_alloc; array->alloc_increment=alloc_increment; array->size_of_element=element_size; - if (!(array->buffer=(char*) my_malloc_ci(element_size*init_alloc,MYF(MY_WME)))) + if (!(array->buffer=(char*) my_malloc_ci(element_size*init_alloc, + MYF(MY_WME)))) { array->max_element=0; DBUG_RETURN(TRUE); @@ -154,7 +155,7 @@ byte *pop_dynamic(DYNAMIC_ARRAY *array) } /* - Replace elemnent in array with given element and index + Replace element in array with given element and index SYNOPSIS set_dynamic() @@ -175,19 +176,8 @@ my_bool set_dynamic(DYNAMIC_ARRAY *array, gptr element, uint idx) { if (idx >= array->elements) { - if (idx >= array->max_element) - { - uint size; - char *new_ptr; - size=(idx+array->alloc_increment)/array->alloc_increment; - size*= array->alloc_increment; - if (!(new_ptr=(char*) my_realloc(array->buffer,size* - array->size_of_element, - MYF(MY_WME | MY_ALLOW_ZERO_PTR)))) - return TRUE; - array->buffer=new_ptr; - array->max_element=size; - } + if (idx >= array->max_element && allocate_dynamic(array, idx)) + return TRUE; bzero((gptr) (array->buffer+array->elements*array->size_of_element), (idx - array->elements)*array->size_of_element); array->elements=idx+1; @@ -197,6 +187,42 @@ my_bool set_dynamic(DYNAMIC_ARRAY *array, gptr element, uint idx) return FALSE; } + +/* + Ensure that dynamic array has enough elements + + SYNOPSIS + allocate_dynamic() + array + max_elements Numbers of elements that is needed + + NOTES + Any new allocated element are NOT initialized + + RETURN VALUE + FALSE Ok + TRUE Allocation of new memory failed +*/ + +my_bool allocate_dynamic(DYNAMIC_ARRAY *array, uint max_elements) +{ + if (max_elements >= array->max_element) + { + uint size; + char *new_ptr; + size= (max_elements + array->alloc_increment)/array->alloc_increment; + size*= array->alloc_increment; + if (!(new_ptr= (char*) my_realloc(array->buffer,size* + array->size_of_element, + MYF(MY_WME | MY_ALLOW_ZERO_PTR)))) + return TRUE; + array->buffer= new_ptr; + array->max_element= size; + } + return FALSE; +} + + /* Get an element from array by given index diff --git a/mysys/mf_keycache.c b/mysys/mf_keycache.c index e6f4348968f..10a3e85eb4d 100644 --- a/mysys/mf_keycache.c +++ b/mysys/mf_keycache.c @@ -1791,8 +1791,6 @@ byte *key_cache_read(KEY_CACHE *keycache, uint offset= 0; byte *start= buff; DBUG_ENTER("key_cache_read"); - DBUG_PRINT("enter", ("fd: %u pos: %lu length: %u", - (uint) file, (ulong) filepos, length)); if (keycache->can_be_used) { @@ -1802,6 +1800,11 @@ byte *key_cache_read(KEY_CACHE *keycache, uint status; int page_st; + DBUG_PRINT("enter", ("fd: %u pos: %lu page: %lu length: %u", + (uint) file, (ulong) filepos, + (ulong) (filepos / keycache->key_cache_block_size), + length)); + offset= (uint) (filepos & (keycache->key_cache_block_size-1)); /* Read data in key_cache_block_size increments */ do @@ -2053,10 +2056,6 @@ int key_cache_write(KEY_CACHE *keycache, reg1 BLOCK_LINK *block; int error=0; DBUG_ENTER("key_cache_write"); - DBUG_PRINT("enter", - ("fd: %u pos: %lu length: %u block_length: %u key_block_length: %u", - (uint) file, (ulong) filepos, length, block_length, - keycache ? keycache->key_cache_block_size : 0)); if (!dont_write) { @@ -2078,6 +2077,12 @@ int key_cache_write(KEY_CACHE *keycache, int page_st; uint offset; + DBUG_PRINT("enter", + ("fd: %u pos: %lu page: %lu length: %u block_length: %u", + (uint) file, (ulong) filepos, + (ulong) (filepos / keycache->key_cache_block_size), + length, block_length)); + offset= (uint) (filepos & (keycache->key_cache_block_size-1)); do { diff --git a/mysys/my_pread.c b/mysys/my_pread.c index ac52895efe9..dd174c8a4cc 100644 --- a/mysys/my_pread.c +++ b/mysys/my_pread.c @@ -52,7 +52,7 @@ uint my_pread(File Filedes, byte *Buffer, uint Count, my_off_t offset, if (!error) /* Seek was successful */ { if ((readbytes = (uint) read(Filedes, Buffer, Count)) == -1L) - my_errno= errno; + my_errno= errno ? errno : -1; /* We should seek back, even if read failed. If this fails, @@ -68,7 +68,7 @@ uint my_pread(File Filedes, byte *Buffer, uint Count, my_off_t offset, #else if ((error= ((readbytes = (uint) pread(Filedes, Buffer, Count, offset)) != Count))) - my_errno= errno; + my_errno= errno ? errno : -1; #endif if (error || readbytes != Count) { @@ -84,8 +84,10 @@ uint my_pread(File Filedes, byte *Buffer, uint Count, my_off_t offset, my_error(EE_READ, MYF(ME_BELL+ME_WAITTANG), my_filename(Filedes),my_errno); else if (MyFlags & (MY_NABP | MY_FNABP)) + { my_error(EE_EOFERR, MYF(ME_BELL+ME_WAITTANG), my_filename(Filedes),my_errno); + } } if ((int) readbytes == -1 || (MyFlags & (MY_FNABP | MY_NABP))) DBUG_RETURN(MY_FILE_ERROR); /* Return with error */ -- cgit v1.2.1 From 7412f0fa0cd498f06fe04966a6f6161e8b32d0a2 Mon Sep 17 00:00:00 2001 From: unknown Date: Fri, 26 Jan 2007 13:32:02 +0200 Subject: After merge fixes Removed compiler warnings Fixed clashing function name in maria Disable maria tests from MySQL level for now BitKeeper/deleted/.del-ha_maria.cc: Rename: libmysqld/ha_maria.cc -> BitKeeper/deleted/.del-ha_maria.cc BitKeeper/etc/ignore: added libmysqld/ha_maria.cc --- added storage/maria/unittest/maria_control unittest/maria_control --- added *.Tpo --- added unittest/page_cache_test_file_1 --- added unittest/pagecache_debug.log --- added unittest/mysys/mf_pagecache_consist_1k-t-big unittest/mysys/mf_pagecache_consist_1kHC-t-big unittest/mysys/mf_pagecache_consist_1kRD-t-big unittest/mysys/mf_pagecache_consist_1kWR-t-big unittest/mysys/mf_pagecache_consist_64k-t-big unittest/mysys/mf_pagecache_consist_64kHC-t-big unittest/mysys/mf_pagecache_consist_64kRD-t-big unittest/mysys/mf_pagecache_consist_64kWR-t-big --- added unittest/mysys/mf_pagecache_single_64k-t-big Makefile.am: Don't run 'test-unit' by default (takes too long time) client/mysqldump.c: Fixed compiler warning include/lf.h: Remove compiler warnings about not used require_pins constant include/pagecache.h: LSN should be of type ulonglong (This fixes some compiler warnings) mysql-test/r/events_logs_tests.result: Make test predictable mysql-test/r/view.result: Make test results predictable mysql-test/t/disabled.def: Disable maria tests for a while mysql-test/t/events_logs_tests.test: Make test predictable mysql-test/t/view.test: Make test results predictable mysys/lf_alloc-pin.c: #warning ->QQ mysys/lf_hash.c: #warning ->QQ Removed compiler warnings mysys/mf_pagecache.c: Removed compiler warnings mysys/my_rename.c: Removed compiler warnings plugin/daemon_example/daemon_example.c: Remove compiler warning sql/ha_ndbcluster.cc: Remove compiler warning sql/udf_example.c: Remove compiler warning storage/maria/lockman.c: Changed #warnings to QQ comment Removed compiler warnings storage/maria/ma_blockrec.c: Removed compiler warnings storage/maria/ma_check.c: After merge fixes storage/maria/ma_key.c: After merge fixes storage/maria/ma_packrec.c: After merge fixes storage/maria/ma_rkey.c: After merge fixes storage/maria/ma_sort.c: After merge fixes storage/maria/ma_sp_defs.h: Rename clashing function name storage/maria/ma_sp_key.c: Rename clashing function name storage/maria/ma_test_all.res: New test results storage/maria/ma_unique.c: Fixed compiler warning storage/maria/tablockman.c: #warning -> QQ storage/maria/tablockman.h: #warning -> QQ storage/maria/trnman.c: #warning -> QQ storage/maria/unittest/lockman2-t.c: Removed compiler warnings storage/maria/unittest/ma_control_file-t.c: Removed warning for 'maria_control' file not found storage/maria/unittest/trnman-t.c: Removed compiler warnings storage/ndb/src/mgmapi/mgmapi.cpp: Remove compiler warnings unittest/mysys/mf_pagecache_consist.c: Removed compiler warnings unittest/mysys/my_atomic-t.c: Removed compiler warnings --- mysys/lf_alloc-pin.c | 2 +- mysys/lf_hash.c | 13 +++---- mysys/mf_pagecache.c | 98 +++++++++++++++++++++++++++------------------------- mysys/my_rename.c | 3 +- 4 files changed, 61 insertions(+), 55 deletions(-) (limited to 'mysys') diff --git a/mysys/lf_alloc-pin.c b/mysys/lf_alloc-pin.c index d0fa29ddaaf..e964553a64c 100644 --- a/mysys/lf_alloc-pin.c +++ b/mysys/lf_alloc-pin.c @@ -1,4 +1,4 @@ -#warning TODO multi-pinbox +/* QQ: TODO multi-pinbox */ /* Copyright (C) 2000 MySQL AB This program is free software; you can redistribute it and/or modify diff --git a/mysys/lf_hash.c b/mysys/lf_hash.c index b2ad7a93ace..fb2fb88492f 100644 --- a/mysys/lf_hash.c +++ b/mysys/lf_hash.c @@ -86,7 +86,7 @@ retry: if (!cursor->curr) return 0; do { -#warning XXX or goto retry ? + /* QQ: XXX or goto retry ? */ link= cursor->curr->link; cursor->next= PTR(link); _lf_pin(pins, 0, cursor->next); @@ -105,7 +105,8 @@ retry: { int r= 1; if (cur_hashnr > hashnr || - (r= my_strnncoll(cs, cur_key, cur_keylen, key, keylen)) >= 0) + (r= my_strnncoll(cs, (uchar*) cur_key, cur_keylen, (uchar*) key, + keylen)) >= 0) return !r; } cursor->prev= &(cursor->curr->link); @@ -243,7 +244,8 @@ static inline const byte* hash_key(const LF_HASH *hash, static inline uint calc_hash(LF_HASH *hash, const byte *key, uint keylen) { ulong nr1= 1, nr2= 4; - hash->charset->coll->hash_sort(hash->charset, key, keylen, &nr1, &nr2); + hash->charset->coll->hash_sort(hash->charset, (uchar*) key, keylen, + &nr1, &nr2); return nr1 & INT_MAX32; } @@ -375,7 +377,7 @@ void *lf_hash_search(LF_HASH *hash, LF_PINS *pins, const void *key, uint keylen) return found ? found+1 : 0; } -static char *dummy_key= ""; +static const char *dummy_key= ""; static void initialize_bucket(LF_HASH *hash, LF_SLIST * volatile *node, uint bucket, LF_PINS *pins) @@ -387,7 +389,7 @@ static void initialize_bucket(LF_HASH *hash, LF_SLIST * volatile *node, if (*el == NULL && bucket) initialize_bucket(hash, el, parent, pins); dummy->hashnr= my_reverse_bits(bucket); - dummy->key= dummy_key; + dummy->key= (char*) dummy_key; dummy->keylen= 0; if ((cur= linsert(el, hash->charset, dummy, pins, 0))) { @@ -396,4 +398,3 @@ static void initialize_bucket(LF_HASH *hash, LF_SLIST * volatile *node, } my_atomic_casptr((void **)node, (void **)&tmp, dummy); } - diff --git a/mysys/mf_pagecache.c b/mysys/mf_pagecache.c index 5f4e8d1a97d..0116dfabfa1 100755 --- a/mysys/mf_pagecache.c +++ b/mysys/mf_pagecache.c @@ -47,7 +47,6 @@ #include #include - /* Some compilation flags have been added specifically for this module to control the following: @@ -429,7 +428,6 @@ error: #define FLUSH_CACHE 2000 /* sort this many blocks at once */ -static int flush_all_key_blocks(PAGECACHE *pagecache); #ifdef THREAD static void link_into_queue(PAGECACHE_WQUEUE *wqueue, struct st_my_thread_var *thread); @@ -793,6 +791,40 @@ err: } +/* + Flush all blocks in the key cache to disk +*/ + +#ifdef NOT_USED +static int flush_all_key_blocks(PAGECACHE *pagecache) +{ +#if defined(PAGECACHE_DEBUG) + uint cnt=0; +#endif + while (pagecache->blocks_changed > 0) + { + PAGECACHE_BLOCK_LINK *block; + for (block= pagecache->used_last->next_used ; ; block=block->next_used) + { + if (block->hash_link) + { +#if defined(PAGECACHE_DEBUG) + cnt++; + KEYCACHE_DBUG_ASSERT(cnt <= pagecache->blocks_used); +#endif + if (flush_pagecache_blocks_int(pagecache, &block->hash_link->file, + FLUSH_RELEASE)) + return 1; + break; + } + if (block == pagecache->used_last) + break; + } + } + return 0; +} +#endif /* NOT_USED */ + /* Resize a key cache @@ -827,7 +859,7 @@ err: resizing, due to the page locking specific to this page cache. So we disable it for now. */ -#if 0 /* keep disabled until code is fixed see above !! */ +#if NOT_USED /* keep disabled until code is fixed see above !! */ int resize_pagecache(PAGECACHE *pagecache, my_size_t use_mem, uint division_limit, uint age_threshold) @@ -1383,7 +1415,7 @@ static void unlink_block(PAGECACHE *pagecache, PAGECACHE_BLOCK_LINK *block) (ulong)block, BLOCK_NUMBER(pagecache, block), block->status, block->requests, pagecache->blocks_available)); BLOCK_INFO(block); - KEYCACHE_DBUG_ASSERT(pagecache->blocks_available >= 0); + KEYCACHE_DBUG_ASSERT((int) pagecache->blocks_available >= 0); #endif } @@ -2511,7 +2543,7 @@ void pagecache_unlock_page(PAGECACHE *pagecache, DBUG_ASSERT(lock == PAGECACHE_LOCK_WRITE_UNLOCK && pin == PAGECACHE_UNPIN); /* TODO: insert LSN writing code */ - DBUG_ASSERT(first_REDO_LSN_for_page > 0); + DBUG_ASSERT(first_REDO_LSN_for_page != 0); set_if_bigger(block->rec_lsn, first_REDO_LSN_for_page); } @@ -2675,7 +2707,7 @@ void pagecache_unlock(PAGECACHE *pagecache, DBUG_ASSERT(lock == PAGECACHE_LOCK_WRITE_UNLOCK && pin == PAGECACHE_UNPIN); /* TODO: insert LSN writing code */ - DBUG_ASSERT(first_REDO_LSN_for_page > 0); + DBUG_ASSERT(first_REDO_LSN_for_page != 0); set_if_bigger(block->rec_lsn, first_REDO_LSN_for_page); } @@ -3251,7 +3283,9 @@ restart: #ifndef DBUG_OFF int rc= #endif -#warning we are doing an unlock here, so need to give the page its rec_lsn! + /* + QQ: We are doing an unlock here, so need to give the page its rec_lsn + */ pagecache_make_lock_and_pin(pagecache, block, write_lock_change_table[lock].unlock_lock, write_pin_change_table[pin].unlock_pin); @@ -3612,11 +3646,14 @@ restart: else { /* Link the block into a list of blocks 'in switch' */ -#warning this unlink_changed() is a serious problem for Maria's Checkpoint: it \ -removes a page from the list of dirty pages, while it's still dirty. A \ - solution is to abandon first_in_switch, just wait for this page to be \ - flushed by somebody else, and loop. TODO: check all places where we remove a \ - page from the list of dirty pages + /* QQ: + #warning this unlink_changed() is a serious problem for + Maria's Checkpoint: it removes a page from the list of dirty + pages, while it's still dirty. A solution is to abandon + first_in_switch, just wait for this page to be + flushed by somebody else, and loop. TODO: check all places + where we remove a page from the list of dirty pages + */ unlink_changed(block); link_changed(block, &first_in_switch); } @@ -3727,39 +3764,6 @@ int flush_pagecache_blocks(PAGECACHE *pagecache, } -/* - Flush all blocks in the key cache to disk -*/ - -static int flush_all_key_blocks(PAGECACHE *pagecache) -{ -#if defined(PAGECACHE_DEBUG) - uint cnt=0; -#endif - while (pagecache->blocks_changed > 0) - { - PAGECACHE_BLOCK_LINK *block; - for (block= pagecache->used_last->next_used ; ; block=block->next_used) - { - if (block->hash_link) - { -#if defined(PAGECACHE_DEBUG) - cnt++; - KEYCACHE_DBUG_ASSERT(cnt <= pagecache->blocks_used); -#endif - if (flush_pagecache_blocks_int(pagecache, &block->hash_link->file, - FLUSH_RELEASE)) - return 1; - break; - } - if (block == pagecache->used_last) - break; - } - } - return 0; -} - - /* Reset the counters of a key cache. @@ -3876,7 +3880,7 @@ my_bool pagecache_collect_changed_blocks_with_lsn(PAGECACHE *pagecache, 1) and 2) are critical problems. TODO: fix this when Monty has explained how he writes BLOB pages. */ - if (0 == block->rec_lsn) + if (block->rec_lsn == 0) { DBUG_ASSERT(0); goto err; @@ -3908,7 +3912,7 @@ my_bool pagecache_collect_changed_blocks_with_lsn(PAGECACHE *pagecache, ptr+= 4; int4store(ptr, block->hash_link->pageno); ptr+= 4; - int8store(ptr, (ulonglong)block->rec_lsn); + int8store(ptr, (ulonglong) block->rec_lsn); ptr+= 8; set_if_bigger(*max_lsn, block->rec_lsn); } diff --git a/mysys/my_rename.c b/mysys/my_rename.c index 8c2a354324b..c3870db177d 100644 --- a/mysys/my_rename.c +++ b/mysys/my_rename.c @@ -17,8 +17,9 @@ #include "mysys_priv.h" #include #include "mysys_err.h" - +#include "m_string.h" #undef my_rename + /* On unix rename deletes to file if it exists */ int my_rename(const char *from, const char *to, myf MyFlags) -- cgit v1.2.1 From 025400922118f11a15be54c66455f20e2f72c0b4 Mon Sep 17 00:00:00 2001 From: unknown Date: Fri, 2 Feb 2007 09:41:32 +0200 Subject: postreview changes for page cache and pre review commit for loghandler storage/maria/unittest/test_file.c: Rename: unittest/mysys/test_file.c -> storage/maria/unittest/test_file.c storage/maria/unittest/test_file.h: Rename: unittest/mysys/test_file.h -> storage/maria/unittest/test_file.h include/pagecache.h: A waiting queue mechanism moved to separate file wqueue.* Pointer name changed for compatibility mysys/Makefile.am: A waiting queue mechanism moved to separate file wqueue.* mysys/mf_keycache.c: fixed unsigned comparison mysys/mf_pagecache.c: A waiting queue mechanism moved to separate file wqueue.* Fixed bug in unregistering block during write storage/maria/Makefile.am: The loghandler files added storage/maria/ma_control_file.h: Now we have loghandler and can compile control file storage/maria/maria_def.h: Including files need for compilation of maria storage/maria/unittest/Makefile.am: unit tests of loghandler storage/maria/unittest/ma_control_file-t.c: Used maria def storage/maria/unittest/mf_pagecache_consist.c: fixed memory overrun storage/maria/unittest/mf_pagecache_single.c: fixed used uninitialized memory unittest/mysys/Makefile.am: unittests of pagecache moved to maria becase pagecache need loghandler include/wqueue.h: New BitKeeper file ``include/wqueue.h'' mysys/wqueue.c: New BitKeeper file ``mysys/wqueue.c'' storage/maria/ma_loghandler.c: New BitKeeper file ``storage/maria/ma_loghandler.c'' storage/maria/ma_loghandler.h: New BitKeeper file ``storage/maria/ma_loghandler.h'' storage/maria/ma_loghandler_lsn.h: New BitKeeper file ``storage/maria/ma_loghandler_lsn.h'' storage/maria/unittest/ma_test_loghandler-t.c: New BitKeeper file ``storage/maria/unittest/ma_test_loghandler-t.c'' storage/maria/unittest/ma_test_loghandler_multigroup-t.c: New BitKeeper file ``storage/maria/unittest/ma_test_loghandler_multigroup-t.c'' storage/maria/unittest/ma_test_loghandler_multithread-t.c: New BitKeeper file ``storage/maria/unittest/ma_test_loghandler_multithread-t.c'' storage/maria/unittest/ma_test_loghandler_pagecache-t.c: New BitKeeper file ``storage/maria/unittest/ma_test_loghandler_pagecache-t.c'' --- mysys/Makefile.am | 2 +- mysys/mf_keycache.c | 2 +- mysys/mf_pagecache.c | 776 +++++++++++++++++++++++---------------------------- mysys/wqueue.c | 167 +++++++++++ 4 files changed, 516 insertions(+), 431 deletions(-) create mode 100644 mysys/wqueue.c (limited to 'mysys') diff --git a/mysys/Makefile.am b/mysys/Makefile.am index 4d9570febbd..612411404c4 100644 --- a/mysys/Makefile.am +++ b/mysys/Makefile.am @@ -56,7 +56,7 @@ libmysys_a_SOURCES = my_init.c my_getwd.c mf_getdate.c my_mmap.c \ my_handler.c my_netware.c my_largepage.c \ my_memmem.c \ my_windac.c my_access.c base64.c my_libwrap.c \ - mf_pagecache.c + mf_pagecache.c wqueue.c EXTRA_DIST = thr_alarm.c thr_lock.c my_pthread.c my_thr_init.c \ thr_mutex.c thr_rwlock.c \ CMakeLists.txt mf_soundex.c \ diff --git a/mysys/mf_keycache.c b/mysys/mf_keycache.c index 9a99a278bc5..9cb428ab200 100644 --- a/mysys/mf_keycache.c +++ b/mysys/mf_keycache.c @@ -1008,12 +1008,12 @@ static void unlink_block(KEY_CACHE *keycache, BLOCK_LINK *block) KEYCACHE_THREAD_TRACE("unlink_block"); #if defined(KEYCACHE_DEBUG) + KEYCACHE_DBUG_ASSERT(keycache->blocks_available != 0); keycache->blocks_available--; KEYCACHE_DBUG_PRINT("unlink_block", ("unlinked block %u status=%x #requests=%u #available=%u", BLOCK_NUMBER(block), block->status, block->requests, keycache->blocks_available)); - KEYCACHE_DBUG_ASSERT(keycache->blocks_available >= 0); #endif } diff --git a/mysys/mf_pagecache.c b/mysys/mf_pagecache.c index 4b92f68d9bf..97cb542f329 100755 --- a/mysys/mf_pagecache.c +++ b/mysys/mf_pagecache.c @@ -26,7 +26,7 @@ When a new block is required it is first tried to pop one from the stack. If the stack is empty, it is tried to get a never-used block from the pool. If this is empty too, then a block is taken from the LRU ring, flushing it - to disk, if necessary. This is handled in find_key_block(). + to disk, if necessary. This is handled in find_block(). With the new free list, the blocks can have three temperatures: hot, warm and cold (which is free). This is remembered in the block header by the enum BLOCK_TEMPERATURE temperature variable. Remembering the @@ -91,13 +91,16 @@ /* In key cache we have external raw locking here we use SERIALIZED_READ_FROM_CACHE to avoid problem of reading - not consistent data from te page + not consistent data from the page. + (keycache functions (key_cache_read(), key_cache_insert() and + key_cache_write()) rely on external MyISAM lock, we don't) */ #define SERIALIZED_READ_FROM_CACHE yes #define BLOCK_INFO(B) \ DBUG_PRINT("info", \ - ("block 0x%lx, file %lu, page %lu, s %0x, hshL 0x%lx, req %u/%u", \ + ("block 0x%lx file %lu page %lu s %0x hshL 0x%lx req %u/%u " \ + "wrlock: %c", \ (ulong)(B), \ (ulong)((B)->hash_link ? \ (B)->hash_link->file.file : \ @@ -110,7 +113,8 @@ (uint) (B)->requests, \ (uint)((B)->hash_link ? \ (B)->hash_link->requests : \ - 0))) + 0), \ + ((block->status & BLOCK_WRLOCK)?'Y':'N'))) /* TODO: put it to my_static.c */ my_bool my_disable_flush_pagecache_blocks= 0; @@ -138,7 +142,7 @@ typedef pthread_cond_t KEYCACHE_CONDVAR; struct st_pagecache_page { PAGECACHE_FILE file; /* file to which the page belongs to */ - maria_page_no_t pageno; /* number of the page in the file */ + pgcache_page_no_t pageno; /* number of the page in the file */ }; /* element in the chain of a hash table bucket */ @@ -149,7 +153,7 @@ struct st_pagecache_hash_link struct st_pagecache_block_link *block; /* reference to the block for the page: */ PAGECACHE_FILE file; /* from such a file */ - maria_page_no_t pageno; /* this page */ + pgcache_page_no_t pageno; /* this page */ uint requests; /* number of requests for the page */ }; @@ -162,7 +166,7 @@ struct st_pagecache_hash_link #define BLOCK_CHANGED 32 /* block buffer contains a dirty page */ #define BLOCK_WRLOCK 64 /* write locked block */ -/* page status, returned by find_key_block */ +/* page status, returned by find_block */ #define PAGE_READ 0 #define PAGE_TO_BE_READ 1 #define PAGE_WAIT_TO_BE_READ 2 @@ -232,7 +236,7 @@ typedef struct st_pagecache_lock_info node the node which should be linked */ -void info_link(PAGECACHE_PIN_INFO **list, PAGECACHE_PIN_INFO *node) +static void info_link(PAGECACHE_PIN_INFO **list, PAGECACHE_PIN_INFO *node) { if ((node->next= *list)) node->next->prev= &(node->next); @@ -249,7 +253,7 @@ void info_link(PAGECACHE_PIN_INFO **list, PAGECACHE_PIN_INFO *node) node the node which should be unlinked */ -void info_unlink(PAGECACHE_PIN_INFO *node) +static void info_unlink(PAGECACHE_PIN_INFO *node) { if ((*node->prev= node->next)) node->next->prev= node->prev; @@ -271,8 +275,8 @@ void info_unlink(PAGECACHE_PIN_INFO *node) pointer to the information node of the thread in the list */ -PAGECACHE_PIN_INFO *info_find(PAGECACHE_PIN_INFO *list, - struct st_my_thread_var *thread) +static PAGECACHE_PIN_INFO *info_find(PAGECACHE_PIN_INFO *list, + struct st_my_thread_var *thread) { register PAGECACHE_PIN_INFO *i= list; for(; i != 0; i= i->next) @@ -291,7 +295,7 @@ struct st_pagecache_block_link *next_changed, **prev_changed; /* for lists of file dirty/clean blocks */ struct st_pagecache_hash_link *hash_link; /* backward ptr to referring hash_link */ - PAGECACHE_WQUEUE + WQUEUE wqueue[COND_SIZE]; /* queues on waiting requests for new/old pages */ uint requests; /* number of requests for the block */ byte *buffer; /* buffer for the block page */ @@ -310,8 +314,8 @@ struct st_pagecache_block_link #ifdef PAGECACHE_DEBUG /* debug checks */ -my_bool info_check_pin(PAGECACHE_BLOCK_LINK *block, - enum pagecache_page_pin mode) +static my_bool info_check_pin(PAGECACHE_BLOCK_LINK *block, + enum pagecache_page_pin mode) { struct st_my_thread_var *thread= my_thread_var; DBUG_ENTER("info_check_pin"); @@ -367,9 +371,9 @@ my_bool info_check_pin(PAGECACHE_BLOCK_LINK *block, 1 - Error */ -my_bool info_check_lock(PAGECACHE_BLOCK_LINK *block, - enum pagecache_page_lock lock, - enum pagecache_page_pin pin) +static my_bool info_check_lock(PAGECACHE_BLOCK_LINK *block, + enum pagecache_page_lock lock, + enum pagecache_page_pin pin) { struct st_my_thread_var *thread= my_thread_var; DBUG_ENTER("info_check_lock"); @@ -379,47 +383,47 @@ my_bool info_check_lock(PAGECACHE_BLOCK_LINK *block, switch(lock) { case PAGECACHE_LOCK_LEFT_UNLOCKED: - DBUG_ASSERT(pin == PAGECACHE_PIN_LEFT_UNPINNED); - if (info) + if (pin != PAGECACHE_PIN_LEFT_UNPINNED || + info) goto error; break; case PAGECACHE_LOCK_LEFT_READLOCKED: - DBUG_ASSERT(pin == PAGECACHE_PIN_LEFT_UNPINNED || - pin == PAGECACHE_PIN_LEFT_PINNED); - if (info == 0 || info->write_lock) + if ((pin != PAGECACHE_PIN_LEFT_UNPINNED && + pin != PAGECACHE_PIN_LEFT_PINNED) || + info == 0 || info->write_lock) goto error; break; case PAGECACHE_LOCK_LEFT_WRITELOCKED: - DBUG_ASSERT(pin == PAGECACHE_PIN_LEFT_PINNED); - if (info == 0 || !info->write_lock) + if (pin != PAGECACHE_PIN_LEFT_PINNED || + info == 0 || !info->write_lock) goto error; break; case PAGECACHE_LOCK_READ: - DBUG_ASSERT(pin == PAGECACHE_PIN_LEFT_UNPINNED || - pin == PAGECACHE_PIN); - if (info != 0) + if ((pin != PAGECACHE_PIN_LEFT_UNPINNED && + pin != PAGECACHE_PIN) || + info != 0) goto error; break; case PAGECACHE_LOCK_WRITE: - DBUG_ASSERT(pin == PAGECACHE_PIN); - if (info != 0) + if (pin != PAGECACHE_PIN || + info != 0) goto error; break; case PAGECACHE_LOCK_READ_UNLOCK: - DBUG_ASSERT(pin == PAGECACHE_PIN_LEFT_UNPINNED || - pin == PAGECACHE_UNPIN); - if (info == 0 || info->write_lock) + if ((pin != PAGECACHE_PIN_LEFT_UNPINNED && + pin != PAGECACHE_UNPIN) || + info == 0 || info->write_lock) goto error; break; case PAGECACHE_LOCK_WRITE_UNLOCK: - DBUG_ASSERT(pin == PAGECACHE_UNPIN); - if (info == 0 || !info->write_lock) + if (pin != PAGECACHE_UNPIN || + info == 0 || !info->write_lock) goto error; break; case PAGECACHE_LOCK_WRITE_TO_READ: - DBUG_ASSERT(pin == PAGECACHE_PIN_LEFT_PINNED || - pin == PAGECACHE_UNPIN); - if (info == 0 || !info->write_lock) + if ((pin != PAGECACHE_PIN_LEFT_PINNED && + pin != PAGECACHE_UNPIN) || + info == 0 || !info->write_lock) goto error; break; } @@ -439,12 +443,6 @@ error: #define FLUSH_CACHE 2000 /* sort this many blocks at once */ static int flush_all_key_blocks(PAGECACHE *pagecache); -#ifdef THREAD -static void link_into_queue(PAGECACHE_WQUEUE *wqueue, - struct st_my_thread_var *thread); -static void unlink_from_queue(PAGECACHE_WQUEUE *wqueue, - struct st_my_thread_var *thread); -#endif static void free_block(PAGECACHE *pagecache, PAGECACHE_BLOCK_LINK *block); static void test_key_cache(PAGECACHE *pagecache, const char *where, my_bool lock); @@ -551,6 +549,7 @@ static int ___pagecache_pthread_cond_signal(pthread_cond_t *cond); #define pagecache_pthread_cond_signal pthread_cond_signal #endif /* defined(PAGECACHE_DEBUG) */ +extern my_bool translog_flush(LSN *lsn); /* Write page to the disk @@ -567,18 +566,28 @@ static int ___pagecache_pthread_cond_signal(pthread_cond_t *cond); 0 - OK !=0 - Error */ -uint pagecache_fwrite(PAGECACHE *pagecache, - PAGECACHE_FILE *filedesc, - byte *buffer, - maria_page_no_t pageno, - enum pagecache_page_type type, - myf flags) + +static uint pagecache_fwrite(PAGECACHE *pagecache, + PAGECACHE_FILE *filedesc, + byte *buffer, + pgcache_page_no_t pageno, + enum pagecache_page_type type, + myf flags) { DBUG_ENTER("pagecache_fwrite"); if (type == PAGECACHE_LSN_PAGE) { + LSN lsn; DBUG_PRINT("info", ("Log handler call")); - /* TODO: put here loghandler call */ + /* TODO: integrate with page format */ +#define PAGE_LSN_OFFSET 0 + lsn7korr(&lsn, buffer + PAGE_LSN_OFFSET); + /* + check CONTROL_FILE_IMPOSSIBLE_FILENO & + CONTROL_FILE_IMPOSSIBLE_LOG_OFFSET + */ + DBUG_ASSERT(lsn.file_no != 0 && lsn.rec_offset != 0); + translog_flush(&lsn); } DBUG_RETURN(my_pwrite(filedesc->file, buffer, pagecache->block_size, (pageno)<<(pagecache->shift), flags)); @@ -628,8 +637,6 @@ static uint next_power(uint value) division_limit division limit (may be zero) age_threshold age threshold (may be zero) block_size size of block (should be power of 2) - loghandler logfandler pointer to call it in case of - pages with LSN RETURN VALUE number of blocks in the key cache, if successful, @@ -647,12 +654,11 @@ static uint next_power(uint value) int init_pagecache(PAGECACHE *pagecache, my_size_t use_mem, uint division_limit, uint age_threshold, - uint block_size, - LOG_HANDLER *loghandler) + uint block_size) { - int blocks, hash_links, length; + uint blocks, hash_links, length; int error; - DBUG_ENTER("init_key_cache"); + DBUG_ENTER("init_pagecache"); DBUG_ASSERT(block_size >= 512); PAGECACHE_DEBUG_OPEN; @@ -662,8 +668,6 @@ int init_pagecache(PAGECACHE *pagecache, my_size_t use_mem, DBUG_RETURN(0); } - pagecache->loghandler= loghandler; - pagecache->global_cache_w_requests= pagecache->global_cache_r_requests= 0; pagecache->global_cache_read= pagecache->global_cache_write= 0; pagecache->disk_blocks= -1; @@ -692,8 +696,8 @@ int init_pagecache(PAGECACHE *pagecache, my_size_t use_mem, for ( ; ; ) { /* Set my_hash_entries to the next bigger 2 power */ - if ((pagecache->hash_entries= next_power((uint)blocks)) < - ((uint)blocks) * 5/4) + if ((pagecache->hash_entries= next_power(blocks)) < + (blocks) * 5/4) pagecache->hash_entries<<= 1; hash_links= 2 * blocks; #if defined(MAX_THREADS) @@ -704,7 +708,7 @@ int init_pagecache(PAGECACHE *pagecache, my_size_t use_mem, ALIGN_SIZE(hash_links * sizeof(PAGECACHE_HASH_LINK)) + ALIGN_SIZE(sizeof(PAGECACHE_HASH_LINK*) * pagecache->hash_entries))) + - ((ulong) blocks << pagecache->shift) > use_mem) + (((ulong) blocks) << pagecache->shift) > use_mem) blocks--; /* Allocate memory for cache page buffers */ if ((pagecache->block_mem= @@ -760,10 +764,10 @@ int init_pagecache(PAGECACHE *pagecache, my_size_t use_mem, pagecache->warm_blocks= 0; pagecache->min_warm_blocks= (division_limit ? blocks * division_limit / 100 + 1 : - (ulong)blocks); + blocks); pagecache->age_threshold= (age_threshold ? blocks * age_threshold / 100 : - (ulong)blocks); + blocks); pagecache->cnt_for_resize_op= 0; pagecache->resize_in_flush= 0; @@ -842,7 +846,8 @@ int resize_pagecache(PAGECACHE *pagecache, { int blocks; struct st_my_thread_var *thread; - PAGECACHE_WQUEUE *wqueue; + WQUEUE *wqueue; + DBUG_ENTER("resize_pagecache"); if (!pagecache->inited) @@ -859,7 +864,7 @@ int resize_pagecache(PAGECACHE *pagecache, #ifdef THREAD wqueue= &pagecache->resize_queue; thread= my_thread_var; - link_into_queue(wqueue, thread); + wqueue_link_into_queue(wqueue, thread); while (wqueue->last_thread->next != thread) { @@ -892,12 +897,11 @@ int resize_pagecache(PAGECACHE *pagecache, end_pagecache(pagecache, 0); /* Don't free mutex */ /* The following will work even if use_mem is 0 */ blocks= init_pagecache(pagecache, pagecache->block_size, use_mem, - division_limit, age_threshold, - pagecache->loghandler); + division_limit, age_threshold); finish: #ifdef THREAD - unlink_from_queue(wqueue, thread); + wqueue_unlink_from_queue(wqueue, thread); /* Signal for the next resize request to proceeed if any */ if (wqueue->last_thread) { @@ -1027,146 +1031,6 @@ void end_pagecache(PAGECACHE *pagecache, my_bool cleanup) } /* end_pagecache */ -#ifdef THREAD -/* - Link a thread into double-linked queue of waiting threads. - - SYNOPSIS - link_into_queue() - wqueue pointer to the queue structure - thread pointer to the thread to be added to the queue - - RETURN VALUE - none - - NOTES. - Queue is represented by a circular list of the thread structures - The list is double-linked of the type (**prev,*next), accessed by - a pointer to the last element. -*/ - -static void link_into_queue(PAGECACHE_WQUEUE *wqueue, - struct st_my_thread_var *thread) -{ - struct st_my_thread_var *last; - if (! (last= wqueue->last_thread)) - { - /* Queue is empty */ - thread->next= thread; - thread->prev= &thread->next; - } - else - { - thread->prev= last->next->prev; - last->next->prev= &thread->next; - thread->next= last->next; - last->next= thread; - } - wqueue->last_thread= thread; -} - -/* - Unlink a thread from double-linked queue of waiting threads - - SYNOPSIS - unlink_from_queue() - wqueue pointer to the queue structure - thread pointer to the thread to be removed from the queue - - RETURN VALUE - none - - NOTES. - See NOTES for link_into_queue -*/ - -static void unlink_from_queue(PAGECACHE_WQUEUE *wqueue, - struct st_my_thread_var *thread) -{ - KEYCACHE_DBUG_PRINT("unlink_from_queue", ("thread %ld", thread->id)); - if (thread->next == thread) - /* The queue contains only one member */ - wqueue->last_thread= NULL; - else - { - thread->next->prev= thread->prev; - *thread->prev=thread->next; - if (wqueue->last_thread == thread) - wqueue->last_thread= STRUCT_PTR(struct st_my_thread_var, next, - thread->prev); - } - thread->next= NULL; -} - - -/* - Add a thread to single-linked queue of waiting threads - - SYNOPSIS - add_to_queue() - wqueue pointer to the queue structure - thread pointer to the thread to be added to the queue - - RETURN VALUE - none - - NOTES. - Queue is represented by a circular list of the thread structures - The list is single-linked of the type (*next), accessed by a pointer - to the last element. -*/ - -static inline void add_to_queue(PAGECACHE_WQUEUE *wqueue, - struct st_my_thread_var *thread) -{ - struct st_my_thread_var *last; - if (! (last= wqueue->last_thread)) - thread->next= thread; - else - { - thread->next= last->next; - last->next= thread; - } - wqueue->last_thread= thread; -} - - -/* - Remove all threads from queue signaling them to proceed - - SYNOPSIS - realease_queue() - wqueue pointer to the queue structure - thread pointer to the thread to be added to the queue - - RETURN VALUE - none - - NOTES. - See notes for add_to_queue - When removed from the queue each thread is signaled via condition - variable thread->suspend. -*/ - -static void release_queue(PAGECACHE_WQUEUE *wqueue) -{ - struct st_my_thread_var *last= wqueue->last_thread; - struct st_my_thread_var *next= last->next; - struct st_my_thread_var *thread; - do - { - thread=next; - KEYCACHE_DBUG_PRINT("release_queue: signal", ("thread %ld", thread->id)); - pagecache_pthread_cond_signal(&thread->suspend); - next=thread->next; - thread->next= NULL; - } - while (thread != last); - wqueue->last_thread= NULL; -} -#endif - - /* Unlink a block from the chain of dirty/clean blocks */ @@ -1273,6 +1137,7 @@ static void link_block(PAGECACHE *pagecache, PAGECACHE_BLOCK_LINK *block, PAGECACHE_BLOCK_LINK *ins; PAGECACHE_BLOCK_LINK **ptr_ins; + BLOCK_INFO(block); KEYCACHE_DBUG_ASSERT(! (block->hash_link && block->hash_link->requests)); #ifdef THREAD if (!hot && pagecache->waiting_for_block.last_thread) @@ -1297,7 +1162,7 @@ static void link_block(PAGECACHE *pagecache, PAGECACHE_BLOCK_LINK *block, { KEYCACHE_DBUG_PRINT("link_block: signal", ("thread %ld", thread->id)); pagecache_pthread_cond_signal(&thread->suspend); - unlink_from_queue(&pagecache->waiting_for_block, thread); + wqueue_unlink_from_queue(&pagecache->waiting_for_block, thread); block->requests++; } } @@ -1363,6 +1228,8 @@ static void link_block(PAGECACHE *pagecache, PAGECACHE_BLOCK_LINK *block, static void unlink_block(PAGECACHE *pagecache, PAGECACHE_BLOCK_LINK *block) { + DBUG_ENTER("unlink_block"); + DBUG_PRINT("unlink_block", ("unlink 0x%lx", (ulong)block)); if (block->next_used == block) /* The list contains only one member */ pagecache->used_last= pagecache->used_ins= NULL; @@ -1381,14 +1248,15 @@ static void unlink_block(PAGECACHE *pagecache, PAGECACHE_BLOCK_LINK *block) KEYCACHE_THREAD_TRACE("unlink_block"); #if defined(PAGECACHE_DEBUG) + KEYCACHE_DBUG_ASSERT(pagecache->blocks_available != 0); pagecache->blocks_available--; KEYCACHE_DBUG_PRINT("unlink_block", ("unlinked block 0x%lx (%u) status=%x #requests=%u #available=%u", (ulong)block, BLOCK_NUMBER(pagecache, block), block->status, block->requests, pagecache->blocks_available)); BLOCK_INFO(block); - KEYCACHE_DBUG_ASSERT(pagecache->blocks_available >= 0); #endif + DBUG_VOID_RETURN; } @@ -1591,7 +1459,7 @@ static void unlink_hash(PAGECACHE *pagecache, PAGECACHE_HASH_LINK *hash_link) { KEYCACHE_DBUG_PRINT("unlink_hash: signal", ("thread %ld", thread->id)); pagecache_pthread_cond_signal(&thread->suspend); - unlink_from_queue(&pagecache->waiting_for_hash_link, thread); + wqueue_unlink_from_queue(&pagecache->waiting_for_hash_link, thread); } } while (thread != last_thread); @@ -1618,7 +1486,7 @@ static void unlink_hash(PAGECACHE *pagecache, PAGECACHE_HASH_LINK *hash_link) pagecache Pagecache reference file file ID pageno page number in the file - start where to put pointer to found hash link (for + start where to put pointer to found hash bucket (for direct referring it) RETURN @@ -1627,7 +1495,7 @@ static void unlink_hash(PAGECACHE *pagecache, PAGECACHE_HASH_LINK *hash_link) static PAGECACHE_HASH_LINK *get_present_hash_link(PAGECACHE *pagecache, PAGECACHE_FILE *file, - maria_page_no_t pageno, + pgcache_page_no_t pageno, PAGECACHE_HASH_LINK ***start) { reg1 PAGECACHE_HASH_LINK *hash_link; @@ -1670,6 +1538,12 @@ static PAGECACHE_HASH_LINK *get_present_hash_link(PAGECACHE *pagecache, KEYCACHE_DBUG_ASSERT(cnt <= pagecache->hash_links_used); #endif } + if (hash_link) + { + /* Register the request for the page */ + hash_link->requests++; + } + DBUG_RETURN(hash_link); } @@ -1680,7 +1554,7 @@ static PAGECACHE_HASH_LINK *get_present_hash_link(PAGECACHE *pagecache, static PAGECACHE_HASH_LINK *get_hash_link(PAGECACHE *pagecache, PAGECACHE_FILE *file, - maria_page_no_t pageno) + pgcache_page_no_t pageno) { reg1 PAGECACHE_HASH_LINK *hash_link; PAGECACHE_HASH_LINK **start; @@ -1693,7 +1567,7 @@ restart: /* try to find the page in the cache */ hash_link= get_present_hash_link(pagecache, file, pageno, &start); - if (! hash_link) + if (!hash_link) { /* There is no hash link in the hash table for the pair (file, pageno) */ if (pagecache->free_hash_list) @@ -1714,7 +1588,7 @@ restart: page.file= *file; page.pageno= pageno; thread->opt_info= (void *) &page; - link_into_queue(&pagecache->waiting_for_hash_link, thread); + wqueue_link_into_queue(&pagecache->waiting_for_hash_link, thread); KEYCACHE_DBUG_PRINT("get_hash_link: wait", ("suspend thread %ld", thread->id)); pagecache_pthread_cond_wait(&thread->suspend, @@ -1723,14 +1597,15 @@ restart: #else KEYCACHE_DBUG_ASSERT(0); #endif + DBUG_PRINT("info", ("restarting...")); goto restart; } hash_link->file= *file; hash_link->pageno= pageno; link_hash(start, hash_link); + /* Register the request for the page */ + hash_link->requests++; } - /* Register the request for the page */ - hash_link->requests++; return hash_link; } @@ -1743,7 +1618,7 @@ restart: SYNOPSIS - find_key_block() + find_block() pagecache pointer to a page cache data structure file handler for the file to read page from pageno number of the page in the file @@ -1773,29 +1648,29 @@ restart: waits until first of this operations links any block back. */ -static PAGECACHE_BLOCK_LINK *find_key_block(PAGECACHE *pagecache, - PAGECACHE_FILE *file, - maria_page_no_t pageno, - int init_hits_left, - my_bool wrmode, - my_bool reg_req, - int *page_st) +static PAGECACHE_BLOCK_LINK *find_block(PAGECACHE *pagecache, + PAGECACHE_FILE *file, + pgcache_page_no_t pageno, + int init_hits_left, + my_bool wrmode, + my_bool reg_req, + int *page_st) { PAGECACHE_HASH_LINK *hash_link; PAGECACHE_BLOCK_LINK *block; int error= 0; int page_status; - DBUG_ENTER("find_key_block"); - KEYCACHE_THREAD_TRACE("find_key_block:begin"); + DBUG_ENTER("find_block"); + KEYCACHE_THREAD_TRACE("find_block:begin"); DBUG_PRINT("enter", ("fd: %u pos %lu wrmode: %lu", (uint) file->file, (ulong) pageno, (uint) wrmode)); - KEYCACHE_DBUG_PRINT("find_key_block", ("fd: %u pos: %lu wrmode: %lu", - (uint) file->file, (ulong) pageno, - (uint) wrmode)); + KEYCACHE_DBUG_PRINT("find_block", ("fd: %u pos: %lu wrmode: %lu", + (uint) file->file, (ulong) pageno, + (uint) wrmode)); #if !defined(DBUG_OFF) && defined(EXTRA_DEBUG) DBUG_EXECUTE("check_pagecache", - test_key_cache(pagecache, "start of find_key_block", 0);); + test_key_cache(pagecache, "start of find_block", 0);); #endif restart: @@ -1840,10 +1715,10 @@ restart: { #ifdef THREAD struct st_my_thread_var *thread= my_thread_var; - add_to_queue(&block->wqueue[COND_FOR_SAVED], thread); + wqueue_add_to_queue(&block->wqueue[COND_FOR_SAVED], thread); do { - KEYCACHE_DBUG_PRINT("find_key_block: wait", + KEYCACHE_DBUG_PRINT("find_block: wait", ("suspend thread %ld", thread->id)); pagecache_pthread_cond_wait(&thread->suspend, &pagecache->cache_lock); @@ -1871,7 +1746,7 @@ restart: { /* This is a request for a page to be removed from cache */ - KEYCACHE_DBUG_PRINT("find_key_block", + KEYCACHE_DBUG_PRINT("find_block", ("request for old page in block %u " "wrmode: %d block->status: %d", BLOCK_NUMBER(pagecache, block), wrmode, @@ -1888,17 +1763,17 @@ restart: else { hash_link->requests--; - KEYCACHE_DBUG_PRINT("find_key_block", + KEYCACHE_DBUG_PRINT("find_block", ("request waiting for old page to be saved")); { #ifdef THREAD struct st_my_thread_var *thread= my_thread_var; /* Put the request into the queue of those waiting for the old page */ - add_to_queue(&block->wqueue[COND_FOR_SAVED], thread); + wqueue_add_to_queue(&block->wqueue[COND_FOR_SAVED], thread); /* Wait until the request can be resubmitted */ do { - KEYCACHE_DBUG_PRINT("find_key_block: wait", + KEYCACHE_DBUG_PRINT("find_block: wait", ("suspend thread %ld", thread->id)); pagecache_pthread_cond_wait(&thread->suspend, &pagecache->cache_lock); @@ -1909,11 +1784,13 @@ restart: /* No parallel requests in single-threaded case */ #endif } - KEYCACHE_DBUG_PRINT("find_key_block", + KEYCACHE_DBUG_PRINT("find_block", ("request for old page resubmitted")); + DBUG_PRINT("info", ("restarting...")); /* Resubmit the request */ goto restart; } + block->status&= ~BLOCK_IN_SWITCH; } else { @@ -1941,7 +1818,8 @@ restart: pagecache->blocks_used++; } pagecache->blocks_unused--; - DBUG_ASSERT((block->status & BLOCK_WRLOCK) == 0); + DBUG_ASSERT((block->status & BLOCK_WRLOCK)); + DBUG_ASSERT(block->pins > 0); block->status= 0; #ifndef DBUG_OFF block->type= PAGECACHE_EMPTY_PAGE; @@ -1954,7 +1832,9 @@ restart: block->hash_link= hash_link; hash_link->block= block; page_status= PAGE_TO_BE_READ; - KEYCACHE_DBUG_PRINT("find_key_block", + DBUG_PRINT("info", ("page to be read set for page 0x%lx", + (ulong)block)); + KEYCACHE_DBUG_PRINT("find_block", ("got free or never used block %u", BLOCK_NUMBER(pagecache, block))); } @@ -1973,10 +1853,10 @@ restart: { struct st_my_thread_var *thread= my_thread_var; thread->opt_info= (void *) hash_link; - link_into_queue(&pagecache->waiting_for_block, thread); + wqueue_link_into_queue(&pagecache->waiting_for_block, thread); do { - KEYCACHE_DBUG_PRINT("find_key_block: wait", + KEYCACHE_DBUG_PRINT("find_block: wait", ("suspend thread %ld", thread->id)); pagecache_pthread_cond_wait(&thread->suspend, &pagecache->cache_lock); @@ -2001,19 +1881,18 @@ restart: reg_requests(pagecache, block,1); hash_link->block= block; } - else - { - DBUG_ASSERT((block->status & BLOCK_WRLOCK) == 0); - } + DBUG_ASSERT((block->status & BLOCK_WRLOCK) == 0); + DBUG_ASSERT(block->pins > 0); if (block->hash_link != hash_link && ! (block->status & BLOCK_IN_SWITCH) ) { /* this is a primary request for a new page */ DBUG_ASSERT((block->status & BLOCK_WRLOCK) == 0); + DBUG_ASSERT(block->pins > 0); block->status|= (BLOCK_IN_SWITCH | BLOCK_WRLOCK); - KEYCACHE_DBUG_PRINT("find_key_block", + KEYCACHE_DBUG_PRINT("find_block", ("got block %u for new page", BLOCK_NUMBER(pagecache, block))); @@ -2021,7 +1900,7 @@ restart: { /* The block contains a dirty page - push it out of the cache */ - KEYCACHE_DBUG_PRINT("find_key_block", ("block is dirty")); + KEYCACHE_DBUG_PRINT("find_block", ("block is dirty")); pagecache_pthread_mutex_unlock(&pagecache->cache_lock); /* @@ -2054,7 +1933,7 @@ restart: unlink_hash(pagecache, block->hash_link); /* All pending requests for this page must be resubmitted */ if (block->wqueue[COND_FOR_SAVED].last_thread) - release_queue(&block->wqueue[COND_FOR_SAVED]); + wqueue_release_queue(&block->wqueue[COND_FOR_SAVED]); } link_to_file_list(pagecache, block, file, (my_bool)(block->hash_link ? 1 : 0)); @@ -2065,6 +1944,8 @@ restart: #endif block->hash_link= hash_link; page_status= PAGE_TO_BE_READ; + DBUG_PRINT("info", ("page to be read set for page 0x%lx", + (ulong)block)); KEYCACHE_DBUG_ASSERT(block->hash_link->block == block); KEYCACHE_DBUG_ASSERT(hash_link->block->hash_link == hash_link); @@ -2072,7 +1953,7 @@ restart: else { /* This is for secondary requests for a new page only */ - KEYCACHE_DBUG_PRINT("find_key_block", + KEYCACHE_DBUG_PRINT("find_block", ("block->hash_link: %p hash_link: %p " "block->status: %u", block->hash_link, hash_link, block->status )); @@ -2087,7 +1968,7 @@ restart: { if (reg_req) reg_requests(pagecache, block, 1); - KEYCACHE_DBUG_PRINT("find_key_block", + KEYCACHE_DBUG_PRINT("find_block", ("block->hash_link: %p hash_link: %p " "block->status: %u", block->hash_link, hash_link, block->status )); @@ -2098,12 +1979,12 @@ restart: } KEYCACHE_DBUG_ASSERT(page_status != -1); - *page_st=page_status; + *page_st= page_status; DBUG_PRINT("info", ("block: 0x%lx fd: %u pos %lu block->status %u page_status %lu", (ulong) block, (uint) file->file, (ulong) pageno, block->status, (uint) page_status)); - KEYCACHE_DBUG_PRINT("find_key_block", + KEYCACHE_DBUG_PRINT("find_block", ("block: 0x%lx fd: %u pos %lu block->status %u page_status %lu", (ulong) block, (uint) file->file, (ulong) pageno, block->status, @@ -2111,16 +1992,16 @@ restart: #if !defined(DBUG_OFF) && defined(EXTRA_DEBUG) DBUG_EXECUTE("check_pagecache", - test_key_cache(pagecache, "end of find_key_block",0);); + test_key_cache(pagecache, "end of find_block",0);); #endif - KEYCACHE_THREAD_TRACE("find_key_block:end"); + KEYCACHE_THREAD_TRACE("find_block:end"); DBUG_RETURN(block); } -void pagecache_add_pin(PAGECACHE_BLOCK_LINK *block) +static void add_pin(PAGECACHE_BLOCK_LINK *block) { - DBUG_ENTER("pagecache_add_pin"); + DBUG_ENTER("add_pin"); DBUG_PRINT("enter", ("block 0x%lx pins: %u", (ulong) block, block->pins)); @@ -2137,9 +2018,9 @@ void pagecache_add_pin(PAGECACHE_BLOCK_LINK *block) DBUG_VOID_RETURN; } -void pagecache_remove_pin(PAGECACHE_BLOCK_LINK *block) +static void remove_pin(PAGECACHE_BLOCK_LINK *block) { - DBUG_ENTER("pagecache_remove_pin"); + DBUG_ENTER("remove_pin"); DBUG_PRINT("enter", ("block 0x%lx pins: %u", (ulong) block, block->pins)); @@ -2157,7 +2038,7 @@ void pagecache_remove_pin(PAGECACHE_BLOCK_LINK *block) DBUG_VOID_RETURN; } #ifdef PAGECACHE_DEBUG -void pagecache_add_lock(PAGECACHE_BLOCK_LINK *block, my_bool wl) +static void info_add_lock(PAGECACHE_BLOCK_LINK *block, my_bool wl) { PAGECACHE_LOCK_INFO *info= (PAGECACHE_LOCK_INFO *)my_malloc(sizeof(PAGECACHE_LOCK_INFO), MYF(0)); @@ -2166,7 +2047,7 @@ void pagecache_add_lock(PAGECACHE_BLOCK_LINK *block, my_bool wl) info_link((PAGECACHE_PIN_INFO **)&block->lock_list, (PAGECACHE_PIN_INFO *)info); } -void pagecache_remove_lock(PAGECACHE_BLOCK_LINK *block) +static void info_remove_lock(PAGECACHE_BLOCK_LINK *block) { PAGECACHE_LOCK_INFO *info= (PAGECACHE_LOCK_INFO *)info_find((PAGECACHE_PIN_INFO *)block->lock_list, @@ -2175,7 +2056,7 @@ void pagecache_remove_lock(PAGECACHE_BLOCK_LINK *block) info_unlink((PAGECACHE_PIN_INFO *)info); my_free((gptr)info, MYF(0)); } -void pagecache_change_lock(PAGECACHE_BLOCK_LINK *block, my_bool wl) +static void info_change_lock(PAGECACHE_BLOCK_LINK *block, my_bool wl) { PAGECACHE_LOCK_INFO *info= (PAGECACHE_LOCK_INFO *)info_find((PAGECACHE_PIN_INFO *)block->lock_list, @@ -2184,40 +2065,47 @@ void pagecache_change_lock(PAGECACHE_BLOCK_LINK *block, my_bool wl) info->write_lock= wl; } #else -#define pagecache_add_lock(B,W) -#define pagecache_remove_lock(B) -#define pagecache_change_lock(B,W) +#define info_add_lock(B,W) +#define info_remove_lock(B) +#define info_change_lock(B,W) #endif /* - Put on the block "update" type lock + Put on the block write lock SYNOPSIS - pagecache_lock_block() + get_wrlock() pagecache pointer to a page cache data structure block the block to work with RETURN 0 - OK - 1 - Try to lock the block failed + 1 - Can't lock this block, need retry */ -my_bool pagecache_lock_block(PAGECACHE *pagecache, - PAGECACHE_BLOCK_LINK *block) -{ - DBUG_ENTER("pagecache_lock_block"); +static my_bool get_wrlock(PAGECACHE *pagecache, + PAGECACHE_BLOCK_LINK *block) +{ + PAGECACHE_FILE file= block->hash_link->file; + pgcache_page_no_t pageno= block->hash_link->pageno; + DBUG_ENTER("get_wrlock"); + DBUG_PRINT("info", ("the block 0x%lx " + "files %d(%d) pages %d(%d)", + (ulong)block, + file.file, block->hash_link->file.file, + pageno, block->hash_link->pageno)); BLOCK_INFO(block); while (block->status & BLOCK_WRLOCK) { - DBUG_PRINT("info", ("fail to lock, waiting...")); + DBUG_PRINT("info", ("fail to lock, waiting... 0x%lx", (ulong)block)); /* Lock failed we will wait */ #ifdef THREAD struct st_my_thread_var *thread= my_thread_var; - add_to_queue(&block->wqueue[COND_FOR_WRLOCK], thread); + wqueue_add_to_queue(&block->wqueue[COND_FOR_WRLOCK], thread); dec_counter_for_resize_op(pagecache); do { - KEYCACHE_DBUG_PRINT("pagecache_lock_block: wait", + KEYCACHE_DBUG_PRINT("get_wrlock: wait", ("suspend thread %ld", thread->id)); pagecache_pthread_cond_wait(&thread->suspend, &pagecache->cache_lock); @@ -2227,35 +2115,61 @@ my_bool pagecache_lock_block(PAGECACHE *pagecache, DBUG_ASSERT(0); #endif BLOCK_INFO(block); - DBUG_RETURN(1); + if ((block->status & (BLOCK_REASSIGNED | BLOCK_IN_SWITCH)) || + file.file != block->hash_link->file.file || + pageno != block->hash_link->pageno) + { + DBUG_PRINT("info", ("the block 0x%lx changed => need retry" + "status %x files %d != %d or pages %d !=%d", + (ulong)block, block->status, + file.file, block->hash_link->file.file, + pageno, block->hash_link->pageno)); + DBUG_RETURN(1); + } } - /* we are doing it by global cache mutex protectio, so it is OK */ + DBUG_ASSERT(block->pins == 0); + /* we are doing it by global cache mutex protection, so it is OK */ block->status|= BLOCK_WRLOCK; DBUG_PRINT("info", ("WR lock set, block 0x%lx", (ulong)block)); DBUG_RETURN(0); } -void pagecache_unlock_block(PAGECACHE_BLOCK_LINK *block) + +/* + Remove write lock from the block + + SYNOPSIS + release_wrlock() + pagecache pointer to a page cache data structure + block the block to work with + + RETURN + 0 - OK +*/ + +static void release_wrlock(PAGECACHE_BLOCK_LINK *block) { - DBUG_ENTER("pagecache_unlock_block"); + DBUG_ENTER("release_wrlock"); BLOCK_INFO(block); DBUG_ASSERT(block->status & BLOCK_WRLOCK); + DBUG_ASSERT(block->pins > 0); block->status&= ~BLOCK_WRLOCK; DBUG_PRINT("info", ("WR lock reset, block 0x%lx", (ulong)block)); #ifdef THREAD /* release all threads waiting for write lock */ if (block->wqueue[COND_FOR_WRLOCK].last_thread) - release_queue(&block->wqueue[COND_FOR_WRLOCK]); + wqueue_release_queue(&block->wqueue[COND_FOR_WRLOCK]); #endif BLOCK_INFO(block); DBUG_VOID_RETURN; } + /* - Try to lock/uplock and pin/unpin the block + Try to lock/unlock and pin/unpin the block SYNOPSIS - pagecache_make_lock_and_pin() + make_lock_and_pin() pagecache pointer to a page cache data structure block the block to work with lock lock change mode @@ -2266,12 +2180,12 @@ void pagecache_unlock_block(PAGECACHE_BLOCK_LINK *block) 1 - Try to lock the block failed */ -my_bool pagecache_make_lock_and_pin(PAGECACHE *pagecache, - PAGECACHE_BLOCK_LINK *block, - enum pagecache_page_lock lock, - enum pagecache_page_pin pin) +static my_bool make_lock_and_pin(PAGECACHE *pagecache, + PAGECACHE_BLOCK_LINK *block, + enum pagecache_page_lock lock, + enum pagecache_page_pin pin) { - DBUG_ENTER("pagecache_make_lock_and_pin"); + DBUG_ENTER("make_lock_and_pin"); DBUG_PRINT("enter", ("block: 0x%lx (%u), wrlock: %c pins: %u, lock %s, pin: %s", (ulong)block, BLOCK_NUMBER(pagecache, block), ((block->status & BLOCK_WRLOCK)?'Y':'N'), @@ -2287,53 +2201,47 @@ my_bool pagecache_make_lock_and_pin(PAGECACHE *pagecache, { case PAGECACHE_LOCK_WRITE: /* free -> write */ /* Writelock and pin the buffer */ - if (pagecache_lock_block(pagecache, block)) + if (get_wrlock(pagecache, block)) { - DBUG_PRINT("info", ("restart")); - /* in case of fail pagecache_lock_block unlock cache */ - DBUG_RETURN(1); + /* can't lock => need retry */ + goto retry; } - /* The cache is locked so nothing afraid off */ - pagecache_add_pin(block); - pagecache_add_lock(block, 1); + + /* The cache is locked so nothing afraid of */ + add_pin(block); + info_add_lock(block, 1); break; case PAGECACHE_LOCK_WRITE_TO_READ: /* write -> read */ case PAGECACHE_LOCK_WRITE_UNLOCK: /* write -> free */ /* - Removes writelog and puts read lock (which is nothing in our + Removes write lock and puts read lock (which is nothing in our implementation) */ - pagecache_unlock_block(block); + release_wrlock(block); case PAGECACHE_LOCK_READ_UNLOCK: /* read -> free */ case PAGECACHE_LOCK_LEFT_READLOCKED: /* read -> read */ -#ifndef DBUG_OFF if (pin == PAGECACHE_UNPIN) { - pagecache_remove_pin(block); + remove_pin(block); } -#endif -#ifdef PAGECACHE_DEBUG if (lock == PAGECACHE_LOCK_WRITE_TO_READ) { - pagecache_change_lock(block, 0); + info_change_lock(block, 0); } else if (lock == PAGECACHE_LOCK_WRITE_UNLOCK || lock == PAGECACHE_LOCK_READ_UNLOCK) { - pagecache_remove_lock(block); + info_remove_lock(block); } -#endif break; case PAGECACHE_LOCK_READ: /* free -> read */ -#ifndef DBUG_OFF if (pin == PAGECACHE_PIN) { /* The cache is locked so nothing afraid off */ - pagecache_add_pin(block); + add_pin(block); } - pagecache_add_lock(block, 0); + info_add_lock(block, 0); break; -#endif case PAGECACHE_LOCK_LEFT_UNLOCKED: /* free -> free */ case PAGECACHE_LOCK_LEFT_WRITELOCKED: /* write -> write */ break; /* do nothing */ @@ -2343,6 +2251,16 @@ my_bool pagecache_make_lock_and_pin(PAGECACHE *pagecache, BLOCK_INFO(block); DBUG_RETURN(0); +retry: + DBUG_PRINT("INFO", ("Retry block 0x%lx", (ulong)block)); + BLOCK_INFO(block); + DBUG_ASSERT(block->hash_link->requests != 0); + block->hash_link->requests--; + DBUG_ASSERT(block->requests != 0); + unreg_request(pagecache, block, 1); + BLOCK_INFO(block); + DBUG_RETURN(1); + } @@ -2355,6 +2273,8 @@ my_bool pagecache_make_lock_and_pin(PAGECACHE *pagecache, pagecache pointer to a page cache data structure block block to which buffer the data is to be read primary <-> the current thread will read the data + validator validator of read from the disk data + validator_data pointer to the data need by the validator RETURN VALUE None @@ -2368,13 +2288,15 @@ my_bool pagecache_make_lock_and_pin(PAGECACHE *pagecache, static void read_block(PAGECACHE *pagecache, PAGECACHE_BLOCK_LINK *block, - my_bool primary) + my_bool primary, + pagecache_disk_read_validator validator, + gptr validator_data) { uint got_length; /* On entry cache_lock is locked */ - KEYCACHE_THREAD_TRACE("read_block"); + DBUG_ENTER("read_block"); if (primary) { /* @@ -2382,8 +2304,8 @@ static void read_block(PAGECACHE *pagecache, that submitted primary requests */ - KEYCACHE_DBUG_PRINT("read_block", - ("page to be read by primary request")); + DBUG_PRINT("read_block", + ("page to be read by primary request")); /* Page is not in buffer yet, is to be read from disk */ pagecache_pthread_mutex_unlock(&pagecache->cache_lock); @@ -2400,11 +2322,15 @@ static void read_block(PAGECACHE *pagecache, else block->status= (BLOCK_READ | (block->status & BLOCK_WRLOCK)); - KEYCACHE_DBUG_PRINT("read_block", - ("primary request: new page in cache")); + if (validator != NULL && + (*validator)(block->buffer, validator_data)) + block->status|= BLOCK_ERROR; + + DBUG_PRINT("read_block", + ("primary request: new page in cache")); /* Signal that all pending requests for this page now can be processed */ if (block->wqueue[COND_FOR_REQUESTED].last_thread) - release_queue(&block->wqueue[COND_FOR_REQUESTED]); + wqueue_release_queue(&block->wqueue[COND_FOR_REQUESTED]); } else { @@ -2412,17 +2338,17 @@ static void read_block(PAGECACHE *pagecache, This code is executed only by threads that submitted secondary requests */ - KEYCACHE_DBUG_PRINT("read_block", - ("secondary request waiting for new page to be read")); + DBUG_PRINT("read_block", + ("secondary request waiting for new page to be read")); { #ifdef THREAD struct st_my_thread_var *thread= my_thread_var; /* Put the request into a queue and wait until it can be processed */ - add_to_queue(&block->wqueue[COND_FOR_REQUESTED], thread); + wqueue_add_to_queue(&block->wqueue[COND_FOR_REQUESTED], thread); do { - KEYCACHE_DBUG_PRINT("read_block: wait", - ("suspend thread %ld", thread->id)); + DBUG_PRINT("read_block: wait", + ("suspend thread %ld", thread->id)); pagecache_pthread_cond_wait(&thread->suspend, &pagecache->cache_lock); } @@ -2432,9 +2358,10 @@ static void read_block(PAGECACHE *pagecache, /* No parallel requests in single-threaded case */ #endif } - KEYCACHE_DBUG_PRINT("read_block", - ("secondary request: new page in cache")); + DBUG_PRINT("read_block", + ("secondary request: new page in cache")); } + DBUG_VOID_RETURN; } @@ -2454,11 +2381,11 @@ static void read_block(PAGECACHE *pagecache, void pagecache_unlock_page(PAGECACHE *pagecache, PAGECACHE_FILE *file, - maria_page_no_t pageno, + pgcache_page_no_t pageno, enum pagecache_page_lock lock, enum pagecache_page_pin pin, my_bool stamp_this_page, - LSN first_REDO_LSN_for_page) + LSN_PTR first_REDO_LSN_for_page) { PAGECACHE_BLOCK_LINK *block; int page_st; @@ -2471,24 +2398,6 @@ void pagecache_unlock_page(PAGECACHE *pagecache, DBUG_ASSERT(pin != PAGECACHE_PIN && lock != PAGECACHE_LOCK_READ && lock != PAGECACHE_LOCK_WRITE); - if (pin == PAGECACHE_PIN_LEFT_UNPINNED && - lock == PAGECACHE_LOCK_READ_UNLOCK) - { -#ifndef DBUG_OFF - if ( -#endif - /* block do not need here so we do not provide it */ - pagecache_make_lock_and_pin(pagecache, 0, lock, pin) -#ifndef DBUG_OFF - ) - { - DBUG_ASSERT(0); /* should not happend */ - } -#else - ; -#endif - DBUG_VOID_RETURN; - } pagecache_pthread_mutex_lock(&pagecache->cache_lock); /* @@ -2498,7 +2407,7 @@ void pagecache_unlock_page(PAGECACHE *pagecache, DBUG_ASSERT(pagecache->can_be_used); inc_counter_for_resize_op(pagecache); - block= find_key_block(pagecache, file, pageno, 0, 0, 0, &page_st); + block= find_block(pagecache, file, pageno, 0, 0, 0, &page_st); BLOCK_INFO(block); DBUG_ASSERT(block != 0 && page_st == PAGE_READ); if (stamp_this_page) @@ -2511,7 +2420,7 @@ void pagecache_unlock_page(PAGECACHE *pagecache, #ifndef DBUG_OFF if ( #endif - pagecache_make_lock_and_pin(pagecache, block, lock, pin) + make_lock_and_pin(pagecache, block, lock, pin) #ifndef DBUG_OFF ) { @@ -2549,7 +2458,7 @@ void pagecache_unlock_page(PAGECACHE *pagecache, void pagecache_unpin_page(PAGECACHE *pagecache, PAGECACHE_FILE *file, - maria_page_no_t pageno) + pgcache_page_no_t pageno) { PAGECACHE_BLOCK_LINK *block; int page_st; @@ -2565,7 +2474,7 @@ void pagecache_unpin_page(PAGECACHE *pagecache, DBUG_ASSERT(pagecache->can_be_used); inc_counter_for_resize_op(pagecache); - block= find_key_block(pagecache, file, pageno, 0, 0, 0, &page_st); + block= find_block(pagecache, file, pageno, 0, 0, 0, &page_st); DBUG_ASSERT(block != 0 && page_st == PAGE_READ); #ifndef DBUG_OFF @@ -2576,9 +2485,9 @@ void pagecache_unpin_page(PAGECACHE *pagecache, a) we can't pin without any lock b) we can't unpin keeping write lock */ - pagecache_make_lock_and_pin(pagecache, block, - PAGECACHE_LOCK_LEFT_READLOCKED, - PAGECACHE_UNPIN) + make_lock_and_pin(pagecache, block, + PAGECACHE_LOCK_LEFT_READLOCKED, + PAGECACHE_UNPIN) #ifndef DBUG_OFF ) { @@ -2622,7 +2531,7 @@ void pagecache_unlock(PAGECACHE *pagecache, enum pagecache_page_lock lock, enum pagecache_page_pin pin, my_bool stamp_this_page, - LSN first_REDO_LSN_for_page) + LSN_PTR first_REDO_LSN_for_page) { PAGECACHE_BLOCK_LINK *block= (PAGECACHE_BLOCK_LINK *)link; DBUG_ENTER("pagecache_unlock"); @@ -2643,7 +2552,7 @@ void pagecache_unlock(PAGECACHE *pagecache, if ( #endif /* block do not need here so we do not provide it */ - pagecache_make_lock_and_pin(pagecache, 0, lock, pin) + make_lock_and_pin(pagecache, 0, lock, pin) #ifndef DBUG_OFF ) { @@ -2673,7 +2582,7 @@ void pagecache_unlock(PAGECACHE *pagecache, #ifndef DBUG_OFF if ( #endif - pagecache_make_lock_and_pin(pagecache, block, lock, pin) + make_lock_and_pin(pagecache, block, lock, pin) #ifndef DBUG_OFF ) { @@ -2736,9 +2645,9 @@ void pagecache_unpin(PAGECACHE *pagecache, a) we can't pin without any lock b) we can't unpin keeping write lock */ - pagecache_make_lock_and_pin(pagecache, block, - PAGECACHE_LOCK_LEFT_READLOCKED, - PAGECACHE_UNPIN) + make_lock_and_pin(pagecache, block, + PAGECACHE_LOCK_LEFT_READLOCKED, + PAGECACHE_UNPIN) #ifndef DBUG_OFF ) { @@ -2767,7 +2676,7 @@ void pagecache_unpin(PAGECACHE *pagecache, Read a block of data from a cached file into a buffer; SYNOPSIS - pagecache_read() + pagecache_valid_read() pagecache pointer to a page cache data structure file handler for the file for the block of data to be read pageno number of the block of data in the file @@ -2776,16 +2685,12 @@ void pagecache_unpin(PAGECACHE *pagecache, type type of the page lock lock change link link to the page if we pin it + validator validator of read from the disk data + validator_data pointer to the data need by the validator RETURN VALUE Returns address from where the data is placed if sucessful, 0 - otherwise. - NOTES. - - The function ensures that a block of data of size length from file - positioned at pageno is in the buffers for some key cache blocks. - Then the function copies the data into the buffer buff. - Pin will be choosen according to lock parameter (see lock_to_pin) */ static enum pagecache_page_pin lock_to_pin[]= @@ -2800,19 +2705,21 @@ static enum pagecache_page_pin lock_to_pin[]= PAGECACHE_UNPIN /*PAGECACHE_LOCK_WRITE_TO_READ*/ }; -byte *pagecache_read(PAGECACHE *pagecache, - PAGECACHE_FILE *file, - maria_page_no_t pageno, - uint level, - byte *buff, - enum pagecache_page_type type, - enum pagecache_page_lock lock, - PAGECACHE_PAGE_LINK *link) +byte *pagecache_valid_read(PAGECACHE *pagecache, + PAGECACHE_FILE *file, + pgcache_page_no_t pageno, + uint level, + byte *buff, + enum pagecache_page_type type, + enum pagecache_page_lock lock, + PAGECACHE_PAGE_LINK *link, + pagecache_disk_read_validator validator, + gptr validator_data) { int error= 0; enum pagecache_page_pin pin= lock_to_pin[lock]; PAGECACHE_PAGE_LINK fake_link; - DBUG_ENTER("page_cache_read"); + DBUG_ENTER("pagecache_valid_read"); DBUG_PRINT("enter", ("fd: %u page: %lu level: %u t:%s l%s p%s", (uint) file->file, (ulong) pageno, level, page_cache_page_type_str[type], @@ -2829,7 +2736,7 @@ restart: if (pagecache->can_be_used) { /* Key cache is used */ - reg1 PAGECACHE_BLOCK_LINK *block; + PAGECACHE_BLOCK_LINK *block; uint status; int page_st; @@ -2842,29 +2749,33 @@ restart: inc_counter_for_resize_op(pagecache); pagecache->global_cache_r_requests++; - block= find_key_block(pagecache, file, pageno, level, - ((lock == PAGECACHE_LOCK_WRITE) ? 1 : 0), - (((pin == PAGECACHE_PIN_LEFT_PINNED) || - (pin == PAGECACHE_UNPIN)) ? 0 : 1), - &page_st); + block= find_block(pagecache, file, pageno, level, + test(lock == PAGECACHE_LOCK_WRITE), + test((pin == PAGECACHE_PIN_LEFT_PINNED) || + (pin == PAGECACHE_UNPIN)), + &page_st); DBUG_ASSERT(block->type == PAGECACHE_EMPTY_PAGE || block->type == type); block->type= type; - if (pagecache_make_lock_and_pin(pagecache, block, lock, pin)) + if (block->status != BLOCK_ERROR && page_st != PAGE_READ) + { + DBUG_PRINT("info", ("read block 0x%lx", (ulong)block)); + /* The requested page is to be read into the block buffer */ + read_block(pagecache, block, + (my_bool)(page_st == PAGE_TO_BE_READ), + validator, validator_data); + DBUG_PRINT("info", ("read is done")); + } + if (make_lock_and_pin(pagecache, block, lock, pin)) { /* - We failed to write lock the block, cache is unlocked, and last write - lock is released, we will try to get the block again. + We failed to write lock the block, cache is unlocked, + we will try to get the block again. */ pagecache_pthread_mutex_unlock(&pagecache->cache_lock); + DBUG_PRINT("info", ("restarting...")); goto restart; } - if (block->status != BLOCK_ERROR && page_st != PAGE_READ) - { - /* The requested page is to be read into the block buffer */ - read_block(pagecache, block, - (my_bool)(page_st == PAGE_TO_BE_READ)); - } if (! ((status= block->status) & BLOCK_ERROR)) { @@ -2933,7 +2844,7 @@ no_key_cache: /* Key cache is not used */ */ my_bool pagecache_delete_page(PAGECACHE *pagecache, PAGECACHE_FILE *file, - maria_page_no_t pageno, + pgcache_page_no_t pageno, enum pagecache_page_lock lock, my_bool flush) { @@ -2969,13 +2880,14 @@ restart: } block= link->block; DBUG_ASSERT(block != 0); - if (pagecache_make_lock_and_pin(pagecache, block, lock, pin)) + if (make_lock_and_pin(pagecache, block, lock, pin)) { /* We failed to writelock the block, cache is unlocked, and last write lock is released, we will try to get the block again. */ pagecache_pthread_mutex_unlock(&pagecache->cache_lock); + DBUG_PRINT("info", ("restarting...")); goto restart; } @@ -2983,7 +2895,7 @@ restart: { /* The block contains a dirty page - push it out of the cache */ - KEYCACHE_DBUG_PRINT("find_key_block", ("block is dirty")); + KEYCACHE_DBUG_PRINT("find_block", ("block is dirty")); pagecache_pthread_mutex_unlock(&pagecache->cache_lock); /* @@ -3015,9 +2927,10 @@ restart: } /* Cache is locked, so we can relese page before freeing it */ - pagecache_make_lock_and_pin(pagecache, block, - PAGECACHE_LOCK_WRITE_UNLOCK, - PAGECACHE_UNPIN); + make_lock_and_pin(pagecache, block, + PAGECACHE_LOCK_WRITE_UNLOCK, + PAGECACHE_UNPIN); + link->requests--; if (pin == PAGECACHE_PIN_LEFT_PINNED) unreg_request(pagecache, block, 1); free_block(pagecache, block); @@ -3053,11 +2966,12 @@ end: 0 if a success, 1 - otherwise. */ +/* description of how to change lock before and after write */ struct write_lock_change { - int need_lock_change; - enum pagecache_page_lock new_lock; - enum pagecache_page_lock unlock_lock; + int need_lock_change; /* need changing of lock at the end of write */ + enum pagecache_page_lock new_lock; /* lock at the beginning */ + enum pagecache_page_lock unlock_lock; /* lock at the end */ }; static struct write_lock_change write_lock_change_table[]= @@ -3084,10 +2998,11 @@ static struct write_lock_change write_lock_change_table[]= PAGECACHE_LOCK_WRITE_TO_READ}/*PAGECACHE_LOCK_WRITE_TO_READ*/ }; +/* description of how to change pin before and after write */ struct write_pin_change { - enum pagecache_page_pin new_pin; - enum pagecache_page_pin unlock_pin; + enum pagecache_page_pin new_pin; /* pin status at the beginning */ + enum pagecache_page_pin unlock_pin; /* pin status at the end */ }; static struct write_pin_change write_pin_change_table[]= @@ -3104,7 +3019,7 @@ static struct write_pin_change write_pin_change_table[]= my_bool pagecache_write(PAGECACHE *pagecache, PAGECACHE_FILE *file, - maria_page_no_t pageno, + pgcache_page_no_t pageno, uint level, byte *buff, enum pagecache_page_type type, @@ -3113,7 +3028,7 @@ my_bool pagecache_write(PAGECACHE *pagecache, enum pagecache_write_mode write_mode, PAGECACHE_PAGE_LINK *link) { - reg1 PAGECACHE_BLOCK_LINK *block; + reg1 PAGECACHE_BLOCK_LINK *block= NULL; PAGECACHE_PAGE_LINK fake_link; int error= 0; int need_lock_change= write_lock_change_table[lock].need_lock_change; @@ -3133,7 +3048,7 @@ my_bool pagecache_write(PAGECACHE *pagecache, if (write_mode == PAGECACHE_WRITE_NOW) { - /* we allow direct write if wwe do not use long term lockings */ + /* we allow direct write if we do not use long term lockings */ DBUG_ASSERT(lock == PAGECACHE_LOCK_LEFT_UNLOCKED); /* Force writing from buff into disk */ pagecache->global_cache_write++; @@ -3167,10 +3082,10 @@ restart: lock != PAGECACHE_LOCK_LEFT_WRITELOCKED && lock != PAGECACHE_LOCK_WRITE_UNLOCK && lock != PAGECACHE_LOCK_WRITE_TO_READ); - block= find_key_block(pagecache, file, pageno, level, - (need_wrlock ? 1 : 0), - (need_wrlock ? 1 : 0), - &page_st); + block= find_block(pagecache, file, pageno, level, + (need_wrlock ? 1 : 0), + (need_wrlock ? 1 : 0), + &page_st); } if (!block) { @@ -3186,24 +3101,25 @@ restart: block->type == type); block->type= type; - if (pagecache_make_lock_and_pin(pagecache, block, - write_lock_change_table[lock].new_lock, - (need_lock_change ? - write_pin_change_table[pin].new_pin : - pin))) + if (make_lock_and_pin(pagecache, block, + write_lock_change_table[lock].new_lock, + (need_lock_change ? + write_pin_change_table[pin].new_pin : + pin))) { /* We failed to writelock the block, cache is unlocked, and last write lock is released, we will try to get the block again. */ pagecache_pthread_mutex_unlock(&pagecache->cache_lock); + DBUG_PRINT("info", ("restarting...")); goto restart; } if (write_mode == PAGECACHE_WRITE_DONE) { - if (block->status != BLOCK_ERROR && page_st != PAGE_READ) + if ((block->status & BLOCK_ERROR) && page_st != PAGE_READ) { /* Copy data from buff */ bmove512(block->buffer, buff, pagecache->block_size); @@ -3212,7 +3128,7 @@ restart: ("primary request: new page in cache")); /* Signal that all pending requests for this now can be processed. */ if (block->wqueue[COND_FOR_REQUESTED].last_thread) - release_queue(&block->wqueue[COND_FOR_REQUESTED]); + wqueue_release_queue(&block->wqueue[COND_FOR_REQUESTED]); } } else @@ -3220,7 +3136,8 @@ restart: if (write_mode == PAGECACHE_WRITE_NOW) { /* buff has been written to disk at start */ - if (block->status & BLOCK_CHANGED) + if ((block->status & BLOCK_CHANGED) && + !(block->status & BLOCK_ERROR)) link_to_file_list(pagecache, block, &block->hash_link->file, 1); } else @@ -3231,8 +3148,8 @@ restart: if (! (block->status & BLOCK_ERROR)) { bmove512(block->buffer, buff, pagecache->block_size); + block->status|= BLOCK_READ; } - block->status|= BLOCK_READ; } @@ -3242,9 +3159,9 @@ restart: int rc= #endif #warning we are doing an unlock here, so need to give the page its rec_lsn! - pagecache_make_lock_and_pin(pagecache, block, - write_lock_change_table[lock].unlock_lock, - write_pin_change_table[pin].unlock_pin); + make_lock_and_pin(pagecache, block, + write_lock_change_table[lock].unlock_lock, + write_pin_change_table[pin].unlock_pin); #ifndef DBUG_OFF DBUG_ASSERT(rc == 0); #endif @@ -3255,10 +3172,7 @@ restart: block->hash_link->requests--; if (pin != PAGECACHE_PIN_LEFT_PINNED && pin != PAGECACHE_PIN) { - if (write_mode != PAGECACHE_WRITE_DONE) - { - unreg_request(pagecache, block, 1); - } + unreg_request(pagecache, block, 1); } else *link= (PAGECACHE_PAGE_LINK)block; @@ -3290,6 +3204,7 @@ end: DBUG_EXECUTE("exec", test_key_cache(pagecache, "end of key_cache_write", 1);); #endif + BLOCK_INFO(block); DBUG_RETURN(error); } @@ -3321,6 +3236,7 @@ static void free_block(PAGECACHE *pagecache, PAGECACHE_BLOCK_LINK *block) unlink_changed(block); DBUG_ASSERT((block->status & BLOCK_WRLOCK) == 0); + DBUG_ASSERT(block->pins > 0); block->status= 0; #ifndef DBUG_OFF block->type= PAGECACHE_EMPTY_PAGE; @@ -3344,7 +3260,7 @@ static void free_block(PAGECACHE *pagecache, PAGECACHE_BLOCK_LINK *block) /* All pending requests for this page must be resubmitted. */ if (block->wqueue[COND_FOR_SAVED].last_thread) - release_queue(&block->wqueue[COND_FOR_SAVED]); + wqueue_release_queue(&block->wqueue[COND_FOR_SAVED]); } @@ -3398,12 +3314,13 @@ static int flush_cached_blocks(PAGECACHE *pagecache, } /* if the block is not pinned then it is not write locked */ DBUG_ASSERT((block->status & BLOCK_WRLOCK) == 0); + DBUG_ASSERT(block->pins > 0); #ifndef DBUG_OFF { int rc= #endif - pagecache_make_lock_and_pin(pagecache, block, - PAGECACHE_LOCK_WRITE, PAGECACHE_PIN); + make_lock_and_pin(pagecache, block, + PAGECACHE_LOCK_WRITE, PAGECACHE_PIN); #ifndef DBUG_OFF DBUG_ASSERT(rc == 0); } @@ -3427,9 +3344,9 @@ static int flush_cached_blocks(PAGECACHE *pagecache, MYF(MY_NABP | MY_WAIT_IF_FULL)); pagecache_pthread_mutex_lock(&pagecache->cache_lock); - pagecache_make_lock_and_pin(pagecache, block, - PAGECACHE_LOCK_WRITE_UNLOCK, - PAGECACHE_UNPIN); + make_lock_and_pin(pagecache, block, + PAGECACHE_LOCK_WRITE_UNLOCK, + PAGECACHE_UNPIN); pagecache->global_cache_write++; if (error) @@ -3443,7 +3360,7 @@ static int flush_cached_blocks(PAGECACHE *pagecache, It might happen only during an operation to resize the key cache. */ if (block->wqueue[COND_FOR_SAVED].last_thread) - release_queue(&block->wqueue[COND_FOR_SAVED]); + wqueue_release_queue(&block->wqueue[COND_FOR_SAVED]); /* type will never be FLUSH_IGNORE_CHANGED here */ if (! (type == FLUSH_KEEP || type == FLUSH_FORCE_WRITE)) { @@ -3577,6 +3494,7 @@ restart: if ((error= flush_cached_blocks(pagecache, file, cache, end,type))) last_errno=error; + DBUG_PRINT("info", ("restarting...")); /* Restart the scan as some other thread might have changed the changed blocks chain: the blocks that were in switch @@ -3622,7 +3540,7 @@ removes a page from the list of dirty pages, while it's still dirty. A \ { #ifdef THREAD struct st_my_thread_var *thread= my_thread_var; - add_to_queue(&block->wqueue[COND_FOR_SAVED], thread); + wqueue_add_to_queue(&block->wqueue[COND_FOR_SAVED], thread); do { KEYCACHE_DBUG_PRINT("flush_pagecache_blocks_int: wait", @@ -3761,7 +3679,7 @@ static int flush_all_key_blocks(PAGECACHE *pagecache) 0 on success (always because it can't fail) */ -int reset_key_cache_counters(const char *name, PAGECACHE *key_cache) +static int reset_key_cache_counters(const char *name, PAGECACHE *key_cache) { DBUG_ENTER("reset_key_cache_counters"); if (!key_cache->inited) diff --git a/mysys/wqueue.c b/mysys/wqueue.c new file mode 100644 index 00000000000..28e044ff606 --- /dev/null +++ b/mysys/wqueue.c @@ -0,0 +1,167 @@ + +#include + +#define STRUCT_PTR(TYPE, MEMBER, a) \ + (TYPE *) ((char *) (a) - offsetof(TYPE, MEMBER)) +/* + Link a thread into double-linked queue of waiting threads. + + SYNOPSIS + wqueue_link_into_queue() + wqueue pointer to the queue structure + thread pointer to the thread to be added to the queue + + RETURN VALUE + none + + NOTES. + Queue is represented by a circular list of the thread structures + The list is double-linked of the type (**prev,*next), accessed by + a pointer to the last element. +*/ + +void wqueue_link_into_queue(WQUEUE *wqueue, struct st_my_thread_var *thread) +{ + struct st_my_thread_var *last; + if (!(last= wqueue->last_thread)) + { + /* Queue is empty */ + thread->next= thread; + thread->prev= &thread->next; + } + else + { + thread->prev= last->next->prev; + last->next->prev= &thread->next; + thread->next= last->next; + last->next= thread; + } + wqueue->last_thread= thread; +} + + +/* + Add a thread to single-linked queue of waiting threads + + SYNOPSIS + wqueue_add_to_queue() + wqueue pointer to the queue structure + thread pointer to the thread to be added to the queue + + RETURN VALUE + none + + NOTES. + Queue is represented by a circular list of the thread structures + The list is single-linked of the type (*next), accessed by a pointer + to the last element. +*/ + +void wqueue_add_to_queue(WQUEUE *wqueue, struct st_my_thread_var *thread) +{ + struct st_my_thread_var *last; + if (!(last= wqueue->last_thread)) + thread->next= thread; + else + { + thread->next= last->next; + last->next= thread; + } + wqueue->last_thread= thread; +} + +/* + Unlink a thread from double-linked queue of waiting threads + + SYNOPSIS + wqueue_unlink_from_queue() + wqueue pointer to the queue structure + thread pointer to the thread to be removed from the queue + + RETURN VALUE + none + + NOTES. + See NOTES for link_into_queue +*/ + +void wqueue_unlink_from_queue(WQUEUE *wqueue, struct st_my_thread_var *thread) +{ + if (thread->next == thread) + /* The queue contains only one member */ + wqueue->last_thread= NULL; + else + { + thread->next->prev= thread->prev; + *thread->prev= thread->next; + if (wqueue->last_thread == thread) + wqueue->last_thread= STRUCT_PTR(struct st_my_thread_var, next, + thread->prev); + } + thread->next= NULL; +} + + +/* + Remove all threads from queue signaling them to proceed + + SYNOPSIS + wqueue_realease_queue() + wqueue pointer to the queue structure + thread pointer to the thread to be added to the queue + + RETURN VALUE + none + + NOTES. + See notes for add_to_queue + When removed from the queue each thread is signaled via condition + variable thread->suspend. +*/ + +void wqueue_release_queue(WQUEUE *wqueue) +{ + struct st_my_thread_var *last= wqueue->last_thread; + struct st_my_thread_var *next= last->next; + struct st_my_thread_var *thread; + do + { + thread= next; + pthread_cond_signal(&thread->suspend); + next= thread->next; + thread->next= NULL; + } + while (thread != last); + wqueue->last_thread= NULL; +} + + +/* + Add thread and wait + + SYNOPSYS + wqueue_add_and_wait() + wqueue queue to add to + thread thread which is waiting + lock mutex need for the operation +*/ + +void wqueue_add_and_wait(WQUEUE *wqueue, + struct st_my_thread_var *thread, pthread_mutex_t *lock) +{ + DBUG_ENTER("wqueue_add_and_wait"); + DBUG_PRINT("enter", ("thread ox%lxcond 0x%lx, mutex 0x%lx", + (ulong) thread, (ulong) &thread->suspend, (ulong) lock)); + wqueue_add_to_queue(wqueue, thread); + do + { + DBUG_PRINT("info", ("wait... cond 0x%lx, mutex 0x%lx", + (ulong) &thread->suspend, (ulong) lock)); + pthread_cond_wait(&thread->suspend, lock); + DBUG_PRINT("info", ("wait done cond 0x%lx, mutex 0x%lx, next 0x%lx", + (ulong) &thread->suspend, (ulong) lock, + (ulong) thread->next)); + } + while (thread->next); + DBUG_VOID_RETURN; +} -- cgit v1.2.1 From 91a8199773a8ee6b4d5a00b337d9b49a69dfc1ea Mon Sep 17 00:00:00 2001 From: unknown Date: Mon, 12 Feb 2007 14:23:43 +0200 Subject: Postmerge fix (including changing type of LSN) Some debug info and comments added include/pagecache.h: postmerge fix mysys/mf_pagecache.c: Postmerge fix (including changing type of LSN) Additional DBUG_ASSERTs added Comment about pinning mechanism added storage/maria/ma_control_file.c: Used the same LSN storing procedure everywhere Postmerge fix (including changing type of LSN) storage/maria/ma_control_file.h: Postmerge fix (including changing type of LSN) storage/maria/ma_loghandler.c: Postmerge fix (including changing type of LSN) storage/maria/ma_loghandler.h: Postmerge fix (including changing type of LSN) storage/maria/ma_loghandler_lsn.h: Postmerge fix (including changing type of LSN) storage/maria/unittest/Makefile.am: Postmerge fix storage/maria/unittest/ma_control_file-t.c: Postmerge fix (including changing type of LSN) storage/maria/unittest/ma_test_loghandler-t.c: Postmerge fix (including changing type of LSN) storage/maria/unittest/ma_test_loghandler_multigroup-t.c: Postmerge fix (including changing type of LSN) storage/maria/unittest/ma_test_loghandler_multithread-t.c: Postmerge fix (including changing type of LSN) storage/maria/unittest/ma_test_loghandler_pagecache-t.c: Postmerge fix (including changing type of LSN) storage/maria/unittest/mf_pagecache_consist.c: Postmerge fix (including changing type of LSN) storage/maria/unittest/mf_pagecache_single.c: Postmerge fix (including changing type of LSN) --- mysys/mf_pagecache.c | 158 ++++++++++++++++++++++++++++++--------------------- 1 file changed, 92 insertions(+), 66 deletions(-) (limited to 'mysys') diff --git a/mysys/mf_pagecache.c b/mysys/mf_pagecache.c index 3773119109a..9ef3d5841b5 100755 --- a/mysys/mf_pagecache.c +++ b/mysys/mf_pagecache.c @@ -538,7 +538,7 @@ static int ___pagecache_pthread_cond_signal(pthread_cond_t *cond); #define pagecache_pthread_cond_signal pthread_cond_signal #endif /* defined(PAGECACHE_DEBUG) */ -extern my_bool translog_flush(LSN *lsn); +extern my_bool translog_flush(LSN lsn); /* Write page to the disk @@ -570,13 +570,13 @@ static uint pagecache_fwrite(PAGECACHE *pagecache, DBUG_PRINT("info", ("Log handler call")); /* TODO: integrate with page format */ #define PAGE_LSN_OFFSET 0 - lsn7korr(&lsn, buffer + PAGE_LSN_OFFSET); + lsn= lsn7korr(buffer + PAGE_LSN_OFFSET); /* check CONTROL_FILE_IMPOSSIBLE_FILENO & CONTROL_FILE_IMPOSSIBLE_LOG_OFFSET */ - DBUG_ASSERT(lsn.file_no != 0 && lsn.rec_offset != 0); - translog_flush(&lsn); + DBUG_ASSERT(lsn != 0); + translog_flush(lsn); } DBUG_RETURN(my_pwrite(filedesc->file, buffer, pagecache->block_size, (pageno)<<(pagecache->shift), flags)); @@ -1354,6 +1354,7 @@ static void unreg_request(PAGECACHE *pagecache, (ulong)block, BLOCK_NUMBER(pagecache, block), block->status, block->requests)); BLOCK_INFO(block); + DBUG_ASSERT(block->requests > 0); if (! --block->requests) { my_bool hot; @@ -1400,6 +1401,7 @@ static inline void remove_reader(PAGECACHE_BLOCK_LINK *block) { DBUG_ENTER("remove_reader"); BLOCK_INFO(block); + DBUG_ASSERT(block->hash_link->requests > 0); #ifdef THREAD if (! --block->hash_link->requests && block->condvar) pagecache_pthread_cond_signal(block->condvar); @@ -1722,12 +1724,14 @@ restart: if (page_status != PAGE_READ) { /* We don't need the page in the cache: we are going to write on disk */ + DBUG_ASSERT(hash_link->requests > 0); hash_link->requests--; unlink_hash(pagecache, hash_link); return 0; } if (!(block->status & BLOCK_IN_FLUSH)) { + DBUG_ASSERT(hash_link->requests > 0); hash_link->requests--; /* Remove block to invalidate the page in the block buffer @@ -1744,6 +1748,7 @@ restart: return 0; } /* Wait until the page is flushed on disk */ + DBUG_ASSERT(hash_link->requests > 0); hash_link->requests--; { #ifdef THREAD @@ -1795,6 +1800,7 @@ restart: } else { + DBUG_ASSERT(hash_link->requests > 0); hash_link->requests--; KEYCACHE_DBUG_PRINT("find_block", ("request waiting for old page to be saved")); @@ -1851,8 +1857,8 @@ restart: pagecache->blocks_used++; } pagecache->blocks_unused--; - DBUG_ASSERT((block->status & BLOCK_WRLOCK)); - DBUG_ASSERT(block->pins > 0); + DBUG_ASSERT((block->status & BLOCK_WRLOCK) == 0); + DBUG_ASSERT(block->pins == 0); block->status= 0; #ifndef DBUG_OFF block->type= PAGECACHE_EMPTY_PAGE; @@ -1911,18 +1917,19 @@ restart: block->hits_left= init_hits_left; block->last_hit_time= 0; if (reg_req) - reg_requests(pagecache, block,1); + reg_requests(pagecache, block, 1); hash_link->block= block; } + BLOCK_INFO(block); DBUG_ASSERT((block->status & BLOCK_WRLOCK) == 0); - DBUG_ASSERT(block->pins > 0); + DBUG_ASSERT(block->pins == 0); if (block->hash_link != hash_link && ! (block->status & BLOCK_IN_SWITCH) ) { /* this is a primary request for a new page */ DBUG_ASSERT((block->status & BLOCK_WRLOCK) == 0); - DBUG_ASSERT(block->pins > 0); + DBUG_ASSERT(block->pins == 0); block->status|= (BLOCK_IN_SWITCH | BLOCK_WRLOCK); KEYCACHE_DBUG_PRINT("find_block", @@ -2289,9 +2296,9 @@ static my_bool make_lock_and_pin(PAGECACHE *pagecache, retry: DBUG_PRINT("INFO", ("Retry block 0x%lx", (ulong)block)); BLOCK_INFO(block); - DBUG_ASSERT(block->hash_link->requests != 0); + DBUG_ASSERT(block->hash_link->requests > 0); block->hash_link->requests--; - DBUG_ASSERT(block->requests != 0); + DBUG_ASSERT(block->requests > 0); unreg_request(pagecache, block, 1); BLOCK_INFO(block); DBUG_RETURN(1); @@ -2412,8 +2419,19 @@ static void read_block(PAGECACHE *pagecache, pageno number of the block of data in the file lock lock change pin pin page - stamp_this_page put LSN stamp on the page - first_REDO_LSN_for_page + first_REDO_LSN_for_page do not set it if it is zero + + NOTE + Pininig uses requests registration mechanism it works following way: + | beginnig | ending | + | of func. | of func. | + ----------------------------+-------------+---------------+ + PAGECACHE_PIN_LEFT_PINNED | - | - | + PAGECACHE_PIN_LEFT_UNPINNED | reg request | unreg request | + PAGECACHE_PIN | reg request | - | + PAGECACHE_UNPIN | - | unreg request | + + */ void pagecache_unlock_page(PAGECACHE *pagecache, @@ -2421,8 +2439,7 @@ void pagecache_unlock_page(PAGECACHE *pagecache, pgcache_page_no_t pageno, enum pagecache_page_lock lock, enum pagecache_page_pin pin, - my_bool stamp_this_page, - LSN_PTR first_REDO_LSN_for_page) + LSN first_REDO_LSN_for_page) { PAGECACHE_BLOCK_LINK *block; int page_st; @@ -2444,15 +2461,15 @@ void pagecache_unlock_page(PAGECACHE *pagecache, DBUG_ASSERT(pagecache->can_be_used); inc_counter_for_resize_op(pagecache); - block= find_block(pagecache, file, pageno, 0, 0, 0, &page_st); + /* See NOTE for pagecache_unlock_page about registering requests */ + block= find_block(pagecache, file, pageno, 0, 0, + test(pin == PAGECACHE_PIN_LEFT_UNPINNED), &page_st); BLOCK_INFO(block); DBUG_ASSERT(block != 0 && page_st == PAGE_READ); - if (stamp_this_page) + if (first_REDO_LSN_for_page) { DBUG_ASSERT(lock == PAGECACHE_LOCK_WRITE_UNLOCK && pin == PAGECACHE_UNPIN); - /* TODO: insert LSN writing code */ - DBUG_ASSERT(first_REDO_LSN_for_page != 0); set_if_bigger(block->rec_lsn, first_REDO_LSN_for_page); } @@ -2472,7 +2489,8 @@ void pagecache_unlock_page(PAGECACHE *pagecache, remove_reader(block); /* Link the block into the LRU chain if it's the last submitted request - for the block and block will not be pinned + for the block and block will not be pinned. + See NOTE for pagecache_unlock_page about registering requests. */ if (pin != PAGECACHE_PIN_LEFT_PINNED) unreg_request(pagecache, block, 1); @@ -2504,7 +2522,6 @@ void pagecache_unpin_page(PAGECACHE *pagecache, DBUG_ENTER("pagecache_unpin_page"); DBUG_PRINT("enter", ("fd: %u page: %lu", (uint) file->file, (ulong) pageno)); - pagecache_pthread_mutex_lock(&pagecache->cache_lock); /* As soon as we keep lock cache can be used, and we have lock bacause want @@ -2513,6 +2530,7 @@ void pagecache_unpin_page(PAGECACHE *pagecache, DBUG_ASSERT(pagecache->can_be_used); inc_counter_for_resize_op(pagecache); + /* See NOTE for pagecache_unlock_page about registering requests */ block= find_block(pagecache, file, pageno, 0, 0, 0, &page_st); DBUG_ASSERT(block != 0 && page_st == PAGE_READ); @@ -2539,7 +2557,8 @@ void pagecache_unpin_page(PAGECACHE *pagecache, remove_reader(block); /* Link the block into the LRU chain if it's the last submitted request - for the block and block will not be pinned + for the block and block will not be pinned. + See NOTE for pagecache_unlock_page about registering requests */ unreg_request(pagecache, block, 1); @@ -2561,16 +2580,14 @@ void pagecache_unpin_page(PAGECACHE *pagecache, link direct link to page (returned by read or write) lock lock change pin pin page - stamp_this_page put LSN stamp on the page - first_REDO_LSN_for_page + first_REDO_LSN_for_page do not set it if it is zero */ void pagecache_unlock(PAGECACHE *pagecache, PAGECACHE_PAGE_LINK *link, enum pagecache_page_lock lock, enum pagecache_page_pin pin, - my_bool stamp_this_page, - LSN_PTR first_REDO_LSN_for_page) + LSN first_REDO_LSN_for_page) { PAGECACHE_BLOCK_LINK *block= (PAGECACHE_BLOCK_LINK *)link; DBUG_ENTER("pagecache_unlock"); @@ -2580,8 +2597,12 @@ void pagecache_unlock(PAGECACHE *pagecache, (ulong) block->hash_link->pageno, page_cache_page_lock_str[lock], page_cache_page_pin_str[pin])); - /* we do not allow any lock/pin increasing here */ + /* + We do not allow any lock/pin increasing here and page can't be + unpinned because we use direct link. + */ DBUG_ASSERT(pin != PAGECACHE_PIN && + pin != PAGECACHE_PIN_LEFT_UNPINNED && lock != PAGECACHE_LOCK_READ && lock != PAGECACHE_LOCK_WRITE); if (pin == PAGECACHE_PIN_LEFT_UNPINNED && @@ -2611,12 +2632,10 @@ void pagecache_unlock(PAGECACHE *pagecache, DBUG_ASSERT(pagecache->can_be_used); inc_counter_for_resize_op(pagecache); - if (stamp_this_page) + if (first_REDO_LSN_for_page) { DBUG_ASSERT(lock == PAGECACHE_LOCK_WRITE_UNLOCK && pin == PAGECACHE_UNPIN); - /* TODO: insert LSN writing code */ - DBUG_ASSERT(first_REDO_LSN_for_page != 0); set_if_bigger(block->rec_lsn, first_REDO_LSN_for_page); } @@ -2636,7 +2655,8 @@ void pagecache_unlock(PAGECACHE *pagecache, remove_reader(block); /* Link the block into the LRU chain if it's the last submitted request - for the block and block will not be pinned + for the block and block will not be pinned. + See NOTE for pagecache_unlock_page about registering requests. */ if (pin != PAGECACHE_PIN_LEFT_PINNED) unreg_request(pagecache, block, 1); @@ -2701,7 +2721,8 @@ void pagecache_unpin(PAGECACHE *pagecache, remove_reader(block); /* Link the block into the LRU chain if it's the last submitted request - for the block and block will not be pinned + for the block and block will not be pinned. + See NOTE for pagecache_unlock_page about registering requests. */ unreg_request(pagecache, block, 1); @@ -2790,10 +2811,11 @@ restart: inc_counter_for_resize_op(pagecache); pagecache->global_cache_r_requests++; + /* See NOTE for pagecache_unlock_page about registering requests. */ block= find_block(pagecache, file, pageno, level, test(lock == PAGECACHE_LOCK_WRITE), - test((pin == PAGECACHE_PIN_LEFT_PINNED) || - (pin == PAGECACHE_UNPIN)), + test((pin == PAGECACHE_PIN_LEFT_UNPINNED) || + (pin == PAGECACHE_PIN)), &page_st); DBUG_ASSERT(block->type == PAGECACHE_EMPTY_PAGE || block->type == type); @@ -2836,9 +2858,10 @@ restart: remove_reader(block); /* Link the block into the LRU chain if it's the last submitted request - for the block and block will not be pinned + for the block and block will not be pinned. + See NOTE for pagecache_unlock_page about registering requests. */ - if (pin != PAGECACHE_PIN_LEFT_PINNED && pin != PAGECACHE_PIN) + if (pin == PAGECACHE_PIN_LEFT_UNPINNED || pin == PAGECACHE_UNPIN) unreg_request(pagecache, block, 1); else *link= (PAGECACHE_PAGE_LINK)block; @@ -2898,6 +2921,8 @@ my_bool pagecache_delete_page(PAGECACHE *pagecache, page_cache_page_pin_str[pin])); DBUG_ASSERT(lock == PAGECACHE_LOCK_WRITE || lock == PAGECACHE_LOCK_LEFT_WRITELOCKED); + DBUG_ASSERT(pin == PAGECACHE_PIN || + pin == PAGECACHE_PIN_LEFT_PINNED); restart: @@ -2920,6 +2945,9 @@ restart: DBUG_RETURN(0); } block= link->block; + /* See NOTE for pagecache_unlock_page about registering requests. */ + if (pin == PAGECACHE_PIN) + reg_requests(pagecache, block, 1); DBUG_ASSERT(block != 0); if (make_lock_and_pin(pagecache, block, lock, pin)) { @@ -2972,9 +3000,9 @@ restart: make_lock_and_pin(pagecache, block, PAGECACHE_LOCK_WRITE_UNLOCK, PAGECACHE_UNPIN); + DBUG_ASSERT(link->requests > 0); link->requests--; - if (pin == PAGECACHE_PIN_LEFT_PINNED) - unreg_request(pagecache, block, 1); + /* See NOTE for pagecache_unlock_page about registering requests. */ free_block(pagecache, block); err: @@ -3119,16 +3147,15 @@ restart: inc_counter_for_resize_op(pagecache); pagecache->global_cache_w_requests++; - { - int need_wrlock= (write_mode != PAGECACHE_WRITE_DONE && - lock != PAGECACHE_LOCK_LEFT_WRITELOCKED && - lock != PAGECACHE_LOCK_WRITE_UNLOCK && - lock != PAGECACHE_LOCK_WRITE_TO_READ); - block= find_block(pagecache, file, pageno, level, - (need_wrlock ? 1 : 0), - (need_wrlock ? 1 : 0), - &page_st); - } + /* See NOTE for pagecache_unlock_page about registering requests. */ + block= find_block(pagecache, file, pageno, level, + test(write_mode != PAGECACHE_WRITE_DONE && + lock != PAGECACHE_LOCK_LEFT_WRITELOCKED && + lock != PAGECACHE_LOCK_WRITE_UNLOCK && + lock != PAGECACHE_LOCK_WRITE_TO_READ), + test((pin == PAGECACHE_PIN_LEFT_UNPINNED) || + (pin == PAGECACHE_PIN)), + &page_st); if (!block) { DBUG_ASSERT(write_mode != PAGECACHE_WRITE_DONE); @@ -3212,12 +3239,11 @@ restart: } /* Unregister the request */ - + DBUG_ASSERT(block->hash_link->requests > 0); block->hash_link->requests--; - if (pin != PAGECACHE_PIN_LEFT_PINNED && pin != PAGECACHE_PIN) - { + /* See NOTE for pagecache_unlock_page about registering requests. */ + if (pin == PAGECACHE_PIN_LEFT_UNPINNED || pin == PAGECACHE_UNPIN) unreg_request(pagecache, block, 1); - } else *link= (PAGECACHE_PAGE_LINK)block; @@ -3280,7 +3306,7 @@ static void free_block(PAGECACHE *pagecache, PAGECACHE_BLOCK_LINK *block) unlink_changed(block); DBUG_ASSERT((block->status & BLOCK_WRLOCK) == 0); - DBUG_ASSERT(block->pins > 0); + DBUG_ASSERT(block->pins == 0); block->status= 0; #ifndef DBUG_OFF block->type= PAGECACHE_EMPTY_PAGE; @@ -3361,7 +3387,7 @@ static int flush_cached_blocks(PAGECACHE *pagecache, } /* if the block is not pinned then it is not write locked */ DBUG_ASSERT((block->status & BLOCK_WRLOCK) == 0); - DBUG_ASSERT(block->pins > 0); + DBUG_ASSERT(block->pins == 0); #ifndef DBUG_OFF { int rc= @@ -3686,33 +3712,33 @@ int flush_pagecache_blocks(PAGECACHE *pagecache, Reset the counters of a key cache. SYNOPSIS - reset_key_cache_counters() + reset_pagecache_counters() name the name of a key cache - key_cache pointer to the key kache to be reset + pagecache pointer to the pagecache to be reset DESCRIPTION - This procedure is used by process_key_caches() to reset the counters of all - currently used key caches, both the default one and the named ones. + This procedure is used to reset the counters of all currently used key + caches, both the default one and the named ones. RETURN 0 on success (always because it can't fail) */ -static int reset_key_cache_counters(const char *name, PAGECACHE *key_cache) +int reset_pagecache_counters(const char *name, PAGECACHE *pagecache) { - DBUG_ENTER("reset_key_cache_counters"); - if (!key_cache->inited) + DBUG_ENTER("reset_pagecache_counters"); + if (!pagecache->inited) { DBUG_PRINT("info", ("Key cache %s not initialized.", name)); DBUG_RETURN(0); } DBUG_PRINT("info", ("Resetting counters for key cache %s.", name)); - key_cache->global_blocks_changed= 0; /* Key_blocks_not_flushed */ - key_cache->global_cache_r_requests= 0; /* Key_read_requests */ - key_cache->global_cache_read= 0; /* Key_reads */ - key_cache->global_cache_w_requests= 0; /* Key_write_requests */ - key_cache->global_cache_write= 0; /* Key_writes */ + pagecache->global_blocks_changed= 0; /* Key_blocks_not_flushed */ + pagecache->global_cache_r_requests= 0; /* Key_read_requests */ + pagecache->global_cache_read= 0; /* Key_reads */ + pagecache->global_cache_w_requests= 0; /* Key_write_requests */ + pagecache->global_cache_write= 0; /* Key_writes */ DBUG_RETURN(0); } -- cgit v1.2.1 From 3bc8f629dd35d832cbee14a26c187cb76e78bf6d Mon Sep 17 00:00:00 2001 From: unknown Date: Mon, 19 Feb 2007 23:01:27 +0200 Subject: =?UTF-8?q?Postreview=20changes.=20Fixed=20befaviour=20when=20logh?= =?UTF-8?q?andler=20flags=20changed=20from=20one=20run=20to=20another=20on?= =?UTF-8?q?e.=20Description=20of=20maria=20transaction=20log=20and=20contr?= =?UTF-8?q?ol=20file=20added=20to=20the=20file=20command=C3=8Ds=20magic=20?= =?UTF-8?q?number=20file.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit mysys/mf_pagecache.c: postreview changes storage/maria/ma_control_file.c: Postreview changes. storage/maria/ma_control_file.h: Postreview changes. storage/maria/ma_loghandler.c: Postreview changes. Fixed befaviour when loghandler flags changed from one run to another one. storage/maria/ma_loghandler.h: Postreview changes. Functions comment left only near the function body. storage/maria/ma_loghandler_lsn.h: Postreview changes. storage/maria/unittest/ma_test_loghandler-t.c: Postreview changes. storage/maria/unittest/ma_test_loghandler_multigroup-t.c: Postreview changes. storage/maria/unittest/ma_test_loghandler_multithread-t.c: Postreview changes. storage/maria/unittest/ma_test_loghandler_pagecache-t.c: Postreview changes. support-files/magic: Description of maria transaction log and control file added to the file commandÕs magic number file. --- mysys/mf_pagecache.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'mysys') diff --git a/mysys/mf_pagecache.c b/mysys/mf_pagecache.c index 9ef3d5841b5..1b9d48c80e6 100755 --- a/mysys/mf_pagecache.c +++ b/mysys/mf_pagecache.c @@ -570,7 +570,7 @@ static uint pagecache_fwrite(PAGECACHE *pagecache, DBUG_PRINT("info", ("Log handler call")); /* TODO: integrate with page format */ #define PAGE_LSN_OFFSET 0 - lsn= lsn7korr(buffer + PAGE_LSN_OFFSET); + lsn= lsn_korr(buffer + PAGE_LSN_OFFSET); /* check CONTROL_FILE_IMPOSSIBLE_FILENO & CONTROL_FILE_IMPOSSIBLE_LOG_OFFSET -- cgit v1.2.1 From 3411bfe05a2a77c6c5b9911237792eb436f16543 Mon Sep 17 00:00:00 2001 From: unknown Date: Thu, 1 Mar 2007 18:23:58 +0100 Subject: merge from MyISAM into Maria (last step of merge of 5.1 into Maria). Tests: "maria" and "ps_maria" fail like before merge (assertions), "ma_test_all" fails like before merge (ma_test2 segfaults, I'll try to find out why). mysys/mf_pagecache.c: using a more distinctive tag storage/maria/ha_maria.cc: merge from MyISAM into Maria storage/maria/ma_check.c: merge from MyISAM into Maria storage/maria/ma_close.c: TODO as a word storage/maria/ma_create.c: merge from MyISAM into Maria storage/maria/ma_delete_all.c: TODO as a word storage/maria/ma_delete_table.c: TODO as a word storage/maria/ma_dynrec.c: merge from MyISAM into Maria storage/maria/ma_extra.c: merge from MyISAM into Maria storage/maria/ma_ft_boolean_search.c: merge from MyISAM into Maria storage/maria/ma_locking.c: merge from MyISAM into Maria storage/maria/ma_loghandler.c: fix for compiler warning storage/maria/ma_open.c: merge from MyISAM into Maria. I will ask Monty to check the ASKMONTY-marked piece of code. storage/maria/ma_packrec.c: merge from MyISAM into Maria storage/maria/ma_range.c: merge from MyISAM into Maria storage/maria/ma_rename.c: TODO as a word storage/maria/ma_rt_index.c: merge from MyISAM into Maria storage/maria/ma_rt_split.c: merge from MyISAM into Maria storage/maria/ma_search.c: merge from MyISAM into Maria storage/maria/ma_sort.c: merge from MyISAM into Maria storage/maria/ma_update.c: merge from MyISAM into Maria storage/maria/ma_write.c: merge from MyISAM into Maria storage/maria/maria_chk.c: merge from MyISAM into Maria storage/maria/maria_def.h: merge from MyISAM into Maria storage/maria/maria_pack.c: merge from MyISAM into Maria storage/maria/unittest/ma_test_loghandler_pagecache-t.c: fix for compiler warning storage/myisam/ha_myisam.cc: merge from MyISAM into Maria storage/myisammrg/ha_myisammrg.cc: merge from MyISAM into Maria --- mysys/mf_pagecache.c | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) (limited to 'mysys') diff --git a/mysys/mf_pagecache.c b/mysys/mf_pagecache.c index 1b9d48c80e6..8fa651bdf36 100755 --- a/mysys/mf_pagecache.c +++ b/mysys/mf_pagecache.c @@ -3228,7 +3228,8 @@ restart: int rc= #endif /* - QQ: We are doing an unlock here, so need to give the page its rec_lsn + RECOVERY TODO BUG We are doing an unlock here, so need to give the + page its rec_lsn */ make_lock_and_pin(pagecache, block, write_lock_change_table[lock].unlock_lock, @@ -3590,13 +3591,13 @@ restart: else { /* Link the block into a list of blocks 'in switch' */ - /* QQ: - #warning this unlink_changed() is a serious problem for - Maria's Checkpoint: it removes a page from the list of dirty - pages, while it's still dirty. A solution is to abandon - first_in_switch, just wait for this page to be - flushed by somebody else, and loop. TODO: check all places - where we remove a page from the list of dirty pages + /* + RECOVERY TODO BUG this unlink_changed() is a serious problem for + Maria's Checkpoint: it removes a page from the list of dirty + pages, while it's still dirty. A solution is to abandon + first_in_switch, just wait for this page to be + flushed by somebody else, and loop. TODO: check all places + where we remove a page from the list of dirty pages */ unlink_changed(block); link_changed(block, &first_in_switch); -- cgit v1.2.1 From 39d64a1d2565b09307d11b2a665f3f2c6bc8106e Mon Sep 17 00:00:00 2001 From: unknown Date: Wed, 4 Apr 2007 23:37:09 +0300 Subject: Pagecache integration for review. storage/maria/unittest/ma_pagecache_single.c: Rename: storage/maria/unittest/mf_pagecache_single.c -> storage/maria/unittest/ma_pagecache_single.c include/maria.h: Pagecache integration. include/myisamchk.h: Pagecache integration. include/pagecache.h: removed WRITE_NOW mode Pagecache parameters management. mysys/Makefile.am: Safe hash procedures moved to the separate file. Pagecache moved to maria engine directory. mysys/mf_keycaches.c: Safe hash procedures moved to the separate file. sql/handler.cc: Pageccahe integration. sql/handler.h: Pagecache integration. sql/mysql_priv.h: pagecache integration sql/mysqld.cc: pagecache integration sql/set_var.cc: Pagecache integration. sql/set_var.h: Pagecache integration. storage/maria/Makefile.am: Pagecache integration and moving to maria engine directory. storage/maria/ha_maria.cc: File changed on PAGECCAHE_FILE. storage/maria/ma_bitmap.c: Pagecache integration. storage/maria/ma_blockrec.c: Pagecache integration. storage/maria/ma_check.c: File changed on PAGECCAHE_FILE. Pagecache integration. storage/maria/ma_close.c: File changed on PAGECCAHE_FILE. storage/maria/ma_delete_all.c: File changed on PAGECCAHE_FILE. storage/maria/ma_dynrec.c: File changed on PAGECCAHE_FILE. storage/maria/ma_extra.c: File changed on PAGECCAHE_FILE. storage/maria/ma_info.c: File changed on PAGECCAHE_FILE. storage/maria/ma_keycache.c: Pagecache integration. storage/maria/ma_locking.c: File changed on PAGECCAHE_FILE. storage/maria/ma_loghandler.c: Assert added. storage/maria/ma_loghandler.h: extern specifier added. storage/maria/ma_open.c: Pagecache integration. File changed on PAGECCAHE_FILE. storage/maria/ma_packrec.c: File changed on PAGECCAHE_FILE. storage/maria/ma_page.c: Pagecache integration. storage/maria/ma_pagecache.c: Pagecache renamed and moved to the maria directory. BLOCK_* defines renamed to avoid conflict with BLOCK_ERROR defined in maria_def.h storage/maria/ma_panic.c: File changed on PAGECCAHE_FILE. storage/maria/ma_preload.c: Pagecache integration. File changed on PAGECCAHE_FILE. storage/maria/ma_static.c: Pagecache integration. storage/maria/ma_test1.c: Pagecache integration. storage/maria/ma_test2.c: Pagecache integration. storage/maria/ma_test3.c: Pagecache integration. storage/maria/ma_write.c: File changed on PAGECCAHE_FILE. storage/maria/maria_chk.c: Pagecache integration. File changed on PAGECCAHE_FILE. storage/maria/maria_def.h: Pagecache integration. File changed on PAGECCAHE_FILE. storage/maria/maria_ftdump.c: Pagecache integration. storage/maria/maria_pack.c: File changed on PAGECCAHE_FILE. storage/maria/unittest/Makefile.am: Pagecache moved to the maria directory. storage/maria/unittest/ma_pagecache_consist.c: fixed using uninitialized variable storage/maria/ma_pagecaches.c: New BitKeeper file ``storage/maria/ma_pagecaches.c'' mysys/my_safehash.h: New BitKeeper file ``mysys/my_safehash.h'' --- mysys/Makefile.am | 3 +- mysys/mf_keycaches.c | 269 +--- mysys/mf_pagecache.c | 4102 -------------------------------------------------- mysys/my_safehash.h | 58 + 4 files changed, 64 insertions(+), 4368 deletions(-) delete mode 100755 mysys/mf_pagecache.c create mode 100644 mysys/my_safehash.h (limited to 'mysys') diff --git a/mysys/Makefile.am b/mysys/Makefile.am index bec72263ba4..3c686733b59 100644 --- a/mysys/Makefile.am +++ b/mysys/Makefile.am @@ -26,6 +26,7 @@ libmysys_a_SOURCES = my_init.c my_getwd.c mf_getdate.c my_mmap.c \ mf_path.c mf_loadpath.c my_file.c \ my_open.c my_create.c my_dup.c my_seek.c my_read.c \ my_pread.c my_write.c my_getpagesize.c \ + my_safehash.c \ mf_keycache.c mf_keycaches.c my_crc32.c \ mf_iocache.c mf_iocache2.c mf_cache.c mf_tempfile.c \ mf_tempdir.c my_lock.c mf_brkhant.c my_alarm.c \ @@ -55,7 +56,7 @@ libmysys_a_SOURCES = my_init.c my_getwd.c mf_getdate.c my_mmap.c \ my_handler.c my_netware.c my_largepage.c \ my_memmem.c \ my_windac.c my_access.c base64.c my_libwrap.c \ - mf_pagecache.c wqueue.c + wqueue.c EXTRA_DIST = thr_alarm.c thr_lock.c my_pthread.c my_thr_init.c \ thr_mutex.c thr_rwlock.c \ CMakeLists.txt mf_soundex.c \ diff --git a/mysys/mf_keycaches.c b/mysys/mf_keycaches.c index 4c20f69053d..39ff536d3e0 100644 --- a/mysys/mf_keycaches.c +++ b/mysys/mf_keycaches.c @@ -1,4 +1,4 @@ -/* Copyright (C) 2003 MySQL AB +/* Copyright (C) 2003-2007 MySQL AB This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -26,270 +26,7 @@ #include #include #include - -/***************************************************************************** - General functions to handle SAFE_HASH objects. - - A SAFE_HASH object is used to store the hash, the mutex and default value - needed by the rest of the key cache code. - This is a separate struct to make it easy to later reuse the code for other - purposes - - All entries are linked in a list to allow us to traverse all elements - and delete selected ones. (HASH doesn't allow any easy ways to do this). -*****************************************************************************/ - -/* - Struct to store a key and pointer to object -*/ - -typedef struct st_safe_hash_entry -{ - byte *key; - uint length; - byte *data; - struct st_safe_hash_entry *next, **prev; -} SAFE_HASH_ENTRY; - - -typedef struct st_safe_hash_with_default -{ -#ifdef THREAD - rw_lock_t mutex; -#endif - HASH hash; - byte *default_value; - SAFE_HASH_ENTRY *root; -} SAFE_HASH; - - -/* - Free a SAFE_HASH_ENTRY - - This function is called by the hash object on delete -*/ - -static void safe_hash_entry_free(SAFE_HASH_ENTRY *entry) -{ - DBUG_ENTER("free_assign_entry"); - my_free((gptr) entry, MYF(0)); - DBUG_VOID_RETURN; -} - - -/* Get key and length for a SAFE_HASH_ENTRY */ - -static byte *safe_hash_entry_get(SAFE_HASH_ENTRY *entry, uint *length, - my_bool not_used __attribute__((unused))) -{ - *length=entry->length; - return (byte*) entry->key; -} - - -/* - Init a SAFE_HASH object - - SYNOPSIS - safe_hash_init() - hash safe_hash handler - elements Expected max number of elements - default_value default value - - NOTES - In case of error we set hash->default_value to 0 to allow one to call - safe_hash_free on an object that couldn't be initialized. - - RETURN - 0 ok - 1 error -*/ - -static my_bool safe_hash_init(SAFE_HASH *hash, uint elements, - byte *default_value) -{ - DBUG_ENTER("safe_hash"); - if (hash_init(&hash->hash, &my_charset_bin, elements, - 0, 0, (hash_get_key) safe_hash_entry_get, - (void (*)(void*)) safe_hash_entry_free, 0)) - { - hash->default_value= 0; - DBUG_RETURN(1); - } - my_rwlock_init(&hash->mutex, 0); - hash->default_value= default_value; - hash->root= 0; - DBUG_RETURN(0); -} - - -/* - Free a SAFE_HASH object - - NOTES - This is safe to call on any object that has been sent to safe_hash_init() -*/ - -static void safe_hash_free(SAFE_HASH *hash) -{ - /* - Test if safe_hash_init succeeded. This will also guard us against multiple - free calls. - */ - if (hash->default_value) - { - hash_free(&hash->hash); - rwlock_destroy(&hash->mutex); - hash->default_value=0; - } -} - -/* - Return the value stored for a key or default value if no key -*/ - -static byte *safe_hash_search(SAFE_HASH *hash, const byte *key, uint length, - byte *def) -{ - byte *result; - DBUG_ENTER("safe_hash_search"); - rw_rdlock(&hash->mutex); - result= hash_search(&hash->hash, key, length); - rw_unlock(&hash->mutex); - if (!result) - result= def; - else - result= ((SAFE_HASH_ENTRY*) result)->data; - DBUG_PRINT("exit",("data: 0x%lx", (long) result)); - DBUG_RETURN(result); -} - - -/* - Associate a key with some data - - SYONOPSIS - safe_hash_set() - hash Hash handle - key key (path to table etc..) - length Length of key - data data to to associate with the data - - NOTES - This can be used both to insert a new entry and change an existing - entry. - If one associates a key with the default key cache, the key is deleted - - RETURN - 0 ok - 1 error (Can only be EOM). In this case my_message() is called. -*/ - -static my_bool safe_hash_set(SAFE_HASH *hash, const byte *key, uint length, - byte *data) -{ - SAFE_HASH_ENTRY *entry; - my_bool error= 0; - DBUG_ENTER("safe_hash_set"); - DBUG_PRINT("enter",("key: %.*s data: 0x%lx", length, key, (long) data)); - - rw_wrlock(&hash->mutex); - entry= (SAFE_HASH_ENTRY*) hash_search(&hash->hash, key, length); - - if (data == hash->default_value) - { - /* - The key is to be associated with the default entry. In this case - we can just delete the entry (if it existed) from the hash as a - search will return the default entry - */ - if (!entry) /* nothing to do */ - goto end; - /* unlink entry from list */ - if ((*entry->prev= entry->next)) - entry->next->prev= entry->prev; - hash_delete(&hash->hash, (byte*) entry); - goto end; - } - if (entry) - { - /* Entry existed; Just change the pointer to point at the new data */ - entry->data= data; - } - else - { - if (!(entry= (SAFE_HASH_ENTRY *) my_malloc(sizeof(*entry) + length, - MYF(MY_WME)))) - { - error= 1; - goto end; - } - entry->key= (byte*) (entry +1); - memcpy((char*) entry->key, (char*) key, length); - entry->length= length; - entry->data= data; - /* Link entry to list */ - if ((entry->next= hash->root)) - entry->next->prev= &entry->next; - entry->prev= &hash->root; - hash->root= entry; - if (my_hash_insert(&hash->hash, (byte*) entry)) - { - /* This can only happen if hash got out of memory */ - my_free((char*) entry, MYF(0)); - error= 1; - goto end; - } - } - -end: - rw_unlock(&hash->mutex); - DBUG_RETURN(error); -} - - -/* - Change all entres with one data value to another data value - - SYONOPSIS - safe_hash_change() - hash Hash handle - old_data Old data - new_data Change all 'old_data' to this - - NOTES - We use the linked list to traverse all elements in the hash as - this allows us to delete elements in the case where 'new_data' is the - default value. -*/ - -static void safe_hash_change(SAFE_HASH *hash, byte *old_data, byte *new_data) -{ - SAFE_HASH_ENTRY *entry, *next; - DBUG_ENTER("safe_hash_set"); - - rw_wrlock(&hash->mutex); - - for (entry= hash->root ; entry ; entry= next) - { - next= entry->next; - if (entry->data == old_data) - { - if (new_data == hash->default_value) - { - if ((*entry->prev= entry->next)) - entry->next->prev= entry->prev; - hash_delete(&hash->hash, (byte*) entry); - } - else - entry->data= new_data; - } - } - - rw_unlock(&hash->mutex); - DBUG_VOID_RETURN; -} - +#include "my_safehash.h" /***************************************************************************** Functions to handle the key cache objects @@ -366,3 +103,5 @@ void multi_key_cache_change(KEY_CACHE *old_data, { safe_hash_change(&key_cache_hash, (byte*) old_data, (byte*) new_data); } + + diff --git a/mysys/mf_pagecache.c b/mysys/mf_pagecache.c deleted file mode 100755 index 1b9d48c80e6..00000000000 --- a/mysys/mf_pagecache.c +++ /dev/null @@ -1,4102 +0,0 @@ -/* Copyright (C) 2000-2006 MySQL AB - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ - -/* - These functions handle page cacheing for Maria tables. - - One cache can handle many files. - It must contain buffers of the same blocksize. - init_pagecache() should be used to init cache handler. - - The free list (free_block_list) is a stack like structure. - When a block is freed by free_block(), it is pushed onto the stack. - When a new block is required it is first tried to pop one from the stack. - If the stack is empty, it is tried to get a never-used block from the pool. - If this is empty too, then a block is taken from the LRU ring, flushing it - to disk, if necessary. This is handled in find_block(). - With the new free list, the blocks can have three temperatures: - hot, warm and cold (which is free). This is remembered in the block header - by the enum BLOCK_TEMPERATURE temperature variable. Remembering the - temperature is necessary to correctly count the number of warm blocks, - which is required to decide when blocks are allowed to become hot. Whenever - a block is inserted to another (sub-)chain, we take the old and new - temperature into account to decide if we got one more or less warm block. - blocks_unused is the sum of never used blocks in the pool and of currently - free blocks. blocks_used is the number of blocks fetched from the pool and - as such gives the maximum number of in-use blocks at any time. -*/ - -#include "mysys_priv.h" -#include -#include -#include "my_static.h" -#include -#include -#include - -/* - Some compilation flags have been added specifically for this module - to control the following: - - not to let a thread to yield the control when reading directly - from page cache, which might improve performance in many cases; - to enable this add: - #define SERIALIZED_READ_FROM_CACHE - - to set an upper bound for number of threads simultaneously - using the page cache; this setting helps to determine an optimal - size for hash table and improve performance when the number of - blocks in the page cache much less than the number of threads - accessing it; - to set this number equal to add - #define MAX_THREADS - - to substitute calls of pthread_cond_wait for calls of - pthread_cond_timedwait (wait with timeout set up); - this setting should be used only when you want to trap a deadlock - situation, which theoretically should not happen; - to set timeout equal to seconds add - #define PAGECACHE_TIMEOUT - - to enable the module traps and to send debug information from - page cache module to a special debug log add: - #define PAGECACHE_DEBUG - the name of this debug log file can be set through: - #define PAGECACHE_DEBUG_LOG - if the name is not defined, it's set by default; - if the PAGECACHE_DEBUG flag is not set up and we are in a debug - mode, i.e. when ! defined(DBUG_OFF), the debug information from the - module is sent to the regular debug log. - - Example of the settings: - #define SERIALIZED_READ_FROM_CACHE - #define MAX_THREADS 100 - #define PAGECACHE_TIMEOUT 1 - #define PAGECACHE_DEBUG - #define PAGECACHE_DEBUG_LOG "my_pagecache_debug.log" -*/ - -/* - In key cache we have external raw locking here we use - SERIALIZED_READ_FROM_CACHE to avoid problem of reading - not consistent data from the page. - (keycache functions (key_cache_read(), key_cache_insert() and - key_cache_write()) rely on external MyISAM lock, we don't) -*/ -#define SERIALIZED_READ_FROM_CACHE yes - -#define BLOCK_INFO(B) \ - DBUG_PRINT("info", \ - ("block 0x%lx file %lu page %lu s %0x hshL 0x%lx req %u/%u " \ - "wrlock: %c", \ - (ulong)(B), \ - (ulong)((B)->hash_link ? \ - (B)->hash_link->file.file : \ - 0), \ - (ulong)((B)->hash_link ? \ - (B)->hash_link->pageno : \ - 0), \ - (B)->status, \ - (ulong)(B)->hash_link, \ - (uint) (B)->requests, \ - (uint)((B)->hash_link ? \ - (B)->hash_link->requests : \ - 0), \ - ((block->status & BLOCK_WRLOCK)?'Y':'N'))) - -/* TODO: put it to my_static.c */ -my_bool my_disable_flush_pagecache_blocks= 0; - -#define STRUCT_PTR(TYPE, MEMBER, a) \ - (TYPE *) ((char *) (a) - offsetof(TYPE, MEMBER)) - -/* types of condition variables */ -#define COND_FOR_REQUESTED 0 /* queue of thread waiting for read operation */ -#define COND_FOR_SAVED 1 /* queue of thread waiting for flush */ -#define COND_FOR_WRLOCK 2 /* queue of write lock */ -#define COND_SIZE 3 /* number of COND_* queues */ - -typedef pthread_cond_t KEYCACHE_CONDVAR; - -/* descriptor of the page in the page cache block buffer */ -struct st_pagecache_page -{ - PAGECACHE_FILE file; /* file to which the page belongs to */ - pgcache_page_no_t pageno; /* number of the page in the file */ -}; - -/* element in the chain of a hash table bucket */ -struct st_pagecache_hash_link -{ - struct st_pagecache_hash_link - *next, **prev; /* to connect links in the same bucket */ - struct st_pagecache_block_link - *block; /* reference to the block for the page: */ - PAGECACHE_FILE file; /* from such a file */ - pgcache_page_no_t pageno; /* this page */ - uint requests; /* number of requests for the page */ -}; - -/* simple states of a block */ -#define BLOCK_ERROR 1 /* an error occurred when performing disk i/o */ -#define BLOCK_READ 2 /* the is page in the block buffer */ -#define BLOCK_IN_SWITCH 4 /* block is preparing to read new page */ -#define BLOCK_REASSIGNED 8 /* block does not accept requests for old page */ -#define BLOCK_IN_FLUSH 16 /* block is in flush operation */ -#define BLOCK_CHANGED 32 /* block buffer contains a dirty page */ -#define BLOCK_WRLOCK 64 /* write locked block */ - -/* page status, returned by find_block */ -#define PAGE_READ 0 -#define PAGE_TO_BE_READ 1 -#define PAGE_WAIT_TO_BE_READ 2 - -/* block temperature determines in which (sub-)chain the block currently is */ -enum BLOCK_TEMPERATURE { BLOCK_COLD /*free*/ , BLOCK_WARM , BLOCK_HOT }; - -/* debug info */ -#ifndef DBUG_OFF -static char *page_cache_page_type_str[]= -{ - (char*)"PLAIN", - (char*)"LSN" -}; -static char *page_cache_page_write_mode_str[]= -{ - (char*)"DELAY", - (char*)"NOW", - (char*)"DONE" -}; -static char *page_cache_page_lock_str[]= -{ - (char*)"free -> free ", - (char*)"read -> read ", - (char*)"write -> write", - (char*)"free -> read ", - (char*)"free -> write", - (char*)"read -> free ", - (char*)"write -> free ", - (char*)"write -> read " -}; -static char *page_cache_page_pin_str[]= -{ - (char*)"pinned -> pinned ", - (char*)"unpinned -> unpinned", - (char*)"unpinned -> pinned ", - (char*)"pinned -> unpinned" -}; -#endif -#ifdef PAGECACHE_DEBUG -typedef struct st_pagecache_pin_info -{ - struct st_pagecache_pin_info *next, **prev; - struct st_my_thread_var *thread; -} PAGECACHE_PIN_INFO; -/* - st_pagecache_lock_info structure should be kept in next, prev, thread part - compatible with st_pagecache_pin_info to be compatible in functions. -*/ -typedef struct st_pagecache_lock_info -{ - struct st_pagecache_lock_info *next, **prev; - struct st_my_thread_var *thread; - my_bool write_lock; -} PAGECACHE_LOCK_INFO; - - -/* service functions maintain debugging info about pin & lock */ - - -/* - Links information about thread pinned/locked the block to the list - - SYNOPSIS - info_link() - list the list to link in - node the node which should be linked -*/ - -static void info_link(PAGECACHE_PIN_INFO **list, PAGECACHE_PIN_INFO *node) -{ - if ((node->next= *list)) - node->next->prev= &(node->next); - *list= node; - node->prev= list; -} - - -/* - Unlinks information about thread pinned/locked the block from the list - - SYNOPSIS - info_unlink() - node the node which should be unlinked -*/ - -static void info_unlink(PAGECACHE_PIN_INFO *node) -{ - if ((*node->prev= node->next)) - node->next->prev= node->prev; -} - - -/* - Finds information about given thread in the list of threads which - pinned/locked this block. - - SYNOPSIS - info_find() - list the list where to find the thread - thread thread ID (reference to the st_my_thread_var - of the thread) - - RETURN - 0 - the thread was not found - pointer to the information node of the thread in the list -*/ - -static PAGECACHE_PIN_INFO *info_find(PAGECACHE_PIN_INFO *list, - struct st_my_thread_var *thread) -{ - register PAGECACHE_PIN_INFO *i= list; - for(; i != 0; i= i->next) - if (i->thread == thread) - return i; - return 0; -} -#endif - -/* page cache block */ -struct st_pagecache_block_link -{ - struct st_pagecache_block_link - *next_used, **prev_used; /* to connect links in the LRU chain (ring) */ - struct st_pagecache_block_link - *next_changed, **prev_changed; /* for lists of file dirty/clean blocks */ - struct st_pagecache_hash_link - *hash_link; /* backward ptr to referring hash_link */ - WQUEUE - wqueue[COND_SIZE]; /* queues on waiting requests for new/old pages */ - uint requests; /* number of requests for the block */ - byte *buffer; /* buffer for the block page */ - uint status; /* state of the block */ - uint pins; /* pin counter */ -#ifdef PAGECACHE_DEBUG - PAGECACHE_PIN_INFO *pin_list; - PAGECACHE_LOCK_INFO *lock_list; -#endif - enum BLOCK_TEMPERATURE temperature; /* block temperature: cold, warm, hot */ - enum pagecache_page_type type; /* type of the block */ - uint hits_left; /* number of hits left until promotion */ - ulonglong last_hit_time; /* timestamp of the last hit */ - LSN rec_lsn; /* LSN when first became dirty */ - KEYCACHE_CONDVAR *condvar; /* condition variable for 'no readers' event */ -}; - -#ifdef PAGECACHE_DEBUG -/* debug checks */ -static my_bool info_check_pin(PAGECACHE_BLOCK_LINK *block, - enum pagecache_page_pin mode) -{ - struct st_my_thread_var *thread= my_thread_var; - PAGECACHE_PIN_INFO *info= info_find(block->pin_list, thread); - DBUG_ENTER("info_check_pin"); - if (info) - { - if (mode == PAGECACHE_PIN_LEFT_UNPINNED) - { - DBUG_PRINT("info", - ("info_check_pin: thread: 0x%lx block 0x%lx: LEFT_UNPINNED!!!", - (ulong)thread, (ulong)block)); - DBUG_RETURN(1); - } - else if (mode == PAGECACHE_PIN) - { - DBUG_PRINT("info", - ("info_check_pin: thread: 0x%lx block 0x%lx: PIN!!!", - (ulong)thread, (ulong)block)); - DBUG_RETURN(1); - } - } - else - { - if (mode == PAGECACHE_PIN_LEFT_PINNED) - { - DBUG_PRINT("info", - ("info_check_pin: thread: 0x%lx block 0x%lx: LEFT_PINNED!!!", - (ulong)thread, (ulong)block)); - DBUG_RETURN(1); - } - else if (mode == PAGECACHE_UNPIN) - { - DBUG_PRINT("info", - ("info_check_pin: thread: 0x%lx block 0x%lx: UNPIN!!!", - (ulong)thread, (ulong)block)); - DBUG_RETURN(1); - } - } - DBUG_RETURN(0); -} - - -/* - Debug function which checks current lock/pin state and requested changes - - SYNOPSIS - info_check_lock() - lock requested lock changes - pin requested pin changes - - RETURN - 0 - OK - 1 - Error -*/ - -static my_bool info_check_lock(PAGECACHE_BLOCK_LINK *block, - enum pagecache_page_lock lock, - enum pagecache_page_pin pin) -{ - struct st_my_thread_var *thread= my_thread_var; - PAGECACHE_LOCK_INFO *info= - (PAGECACHE_LOCK_INFO *) info_find((PAGECACHE_PIN_INFO *) block->lock_list, - thread); - DBUG_ENTER("info_check_lock"); - switch(lock) - { - case PAGECACHE_LOCK_LEFT_UNLOCKED: - if (pin != PAGECACHE_PIN_LEFT_UNPINNED || - info) - goto error; - break; - case PAGECACHE_LOCK_LEFT_READLOCKED: - if ((pin != PAGECACHE_PIN_LEFT_UNPINNED && - pin != PAGECACHE_PIN_LEFT_PINNED) || - info == 0 || info->write_lock) - goto error; - break; - case PAGECACHE_LOCK_LEFT_WRITELOCKED: - if (pin != PAGECACHE_PIN_LEFT_PINNED || - info == 0 || !info->write_lock) - goto error; - break; - case PAGECACHE_LOCK_READ: - if ((pin != PAGECACHE_PIN_LEFT_UNPINNED && - pin != PAGECACHE_PIN) || - info != 0) - goto error; - break; - case PAGECACHE_LOCK_WRITE: - if (pin != PAGECACHE_PIN || - info != 0) - goto error; - break; - case PAGECACHE_LOCK_READ_UNLOCK: - if ((pin != PAGECACHE_PIN_LEFT_UNPINNED && - pin != PAGECACHE_UNPIN) || - info == 0 || info->write_lock) - goto error; - break; - case PAGECACHE_LOCK_WRITE_UNLOCK: - if (pin != PAGECACHE_UNPIN || - info == 0 || !info->write_lock) - goto error; - break; - case PAGECACHE_LOCK_WRITE_TO_READ: - if ((pin != PAGECACHE_PIN_LEFT_PINNED && - pin != PAGECACHE_UNPIN) || - info == 0 || !info->write_lock) - goto error; - break; - } - DBUG_RETURN(0); -error: - DBUG_PRINT("info", - ("info_check_lock: thread: 0x%lx block 0x%lx: info: %d wrt: %d," - "to lock: %s, to pin: %s", - (ulong)thread, (ulong)block, test(info), - (info ? info->write_lock : 0), - page_cache_page_lock_str[lock], - page_cache_page_pin_str[pin])); - DBUG_RETURN(1); -} -#endif - -#define FLUSH_CACHE 2000 /* sort this many blocks at once */ - -static void free_block(PAGECACHE *pagecache, PAGECACHE_BLOCK_LINK *block); -static void test_key_cache(PAGECACHE *pagecache, - const char *where, my_bool lock); - -#define PAGECACHE_HASH(p, f, pos) (((ulong) (pos) + \ - (ulong) (f).file) & (p->hash_entries-1)) -#define FILE_HASH(f) ((uint) (f).file & (PAGECACHE_CHANGED_BLOCKS_HASH - 1)) - -#define DEFAULT_PAGECACHE_DEBUG_LOG "pagecache_debug.log" - -#if defined(PAGECACHE_DEBUG) && ! defined(PAGECACHE_DEBUG_LOG) -#define PAGECACHE_DEBUG_LOG DEFAULT_PAGECACHE_DEBUG_LOG -#endif - -#if defined(PAGECACHE_DEBUG_LOG) -static FILE *pagecache_debug_log= NULL; -static void pagecache_debug_print _VARARGS((const char *fmt, ...)); -#define PAGECACHE_DEBUG_OPEN \ - if (!pagecache_debug_log) \ - { \ - pagecache_debug_log= fopen(PAGECACHE_DEBUG_LOG, "w"); \ - (void) setvbuf(pagecache_debug_log, NULL, _IOLBF, BUFSIZ); \ - } - -#define PAGECACHE_DEBUG_CLOSE \ - if (pagecache_debug_log) \ - { \ - fclose(pagecache_debug_log); \ - pagecache_debug_log= 0; \ - } -#else -#define PAGECACHE_DEBUG_OPEN -#define PAGECACHE_DEBUG_CLOSE -#endif /* defined(PAGECACHE_DEBUG_LOG) */ - -#if defined(PAGECACHE_DEBUG_LOG) && defined(PAGECACHE_DEBUG) -#define KEYCACHE_DBUG_PRINT(l, m) \ - { if (pagecache_debug_log) \ - fprintf(pagecache_debug_log, "%s: ", l); \ - pagecache_debug_print m; } - -#define KEYCACHE_DBUG_ASSERT(a) \ - { if (! (a) && pagecache_debug_log) \ - fclose(pagecache_debug_log); \ - assert(a); } -#else -#define KEYCACHE_DBUG_PRINT(l, m) DBUG_PRINT(l, m) -#define KEYCACHE_DBUG_ASSERT(a) DBUG_ASSERT(a) -#endif /* defined(PAGECACHE_DEBUG_LOG) && defined(PAGECACHE_DEBUG) */ - -#if defined(PAGECACHE_DEBUG) || !defined(DBUG_OFF) -#ifdef THREAD -static long pagecache_thread_id; -#define KEYCACHE_THREAD_TRACE(l) \ - KEYCACHE_DBUG_PRINT(l,("|thread %ld",pagecache_thread_id)) - -#define KEYCACHE_THREAD_TRACE_BEGIN(l) \ - { struct st_my_thread_var *thread_var= my_thread_var; \ - pagecache_thread_id= thread_var->id; \ - KEYCACHE_DBUG_PRINT(l,("[thread %ld",pagecache_thread_id)) } - -#define KEYCACHE_THREAD_TRACE_END(l) \ - KEYCACHE_DBUG_PRINT(l,("]thread %ld",pagecache_thread_id)) -#else /* THREAD */ -#define KEYCACHE_THREAD_TRACE(l) KEYCACHE_DBUG_PRINT(l,("")) -#define KEYCACHE_THREAD_TRACE_BEGIN(l) KEYCACHE_DBUG_PRINT(l,("")) -#define KEYCACHE_THREAD_TRACE_END(l) KEYCACHE_DBUG_PRINT(l,("")) -#endif /* THREAD */ -#else -#define KEYCACHE_THREAD_TRACE_BEGIN(l) -#define KEYCACHE_THREAD_TRACE_END(l) -#define KEYCACHE_THREAD_TRACE(l) -#endif /* defined(PAGECACHE_DEBUG) || !defined(DBUG_OFF) */ - -#define BLOCK_NUMBER(p, b) \ - ((uint) (((char*)(b)-(char *) p->block_root)/sizeof(PAGECACHE_BLOCK_LINK))) -#define PAGECACHE_HASH_LINK_NUMBER(p, h) \ - ((uint) (((char*)(h)-(char *) p->hash_link_root)/ \ - sizeof(PAGECACHE_HASH_LINK))) - -#if (defined(PAGECACHE_TIMEOUT) && !defined(__WIN__)) || defined(PAGECACHE_DEBUG) -static int pagecache_pthread_cond_wait(pthread_cond_t *cond, - pthread_mutex_t *mutex); -#else -#define pagecache_pthread_cond_wait pthread_cond_wait -#endif - -#if defined(PAGECACHE_DEBUG) -static int ___pagecache_pthread_mutex_lock(pthread_mutex_t *mutex); -static void ___pagecache_pthread_mutex_unlock(pthread_mutex_t *mutex); -static int ___pagecache_pthread_cond_signal(pthread_cond_t *cond); -#define pagecache_pthread_mutex_lock(M) \ -{ DBUG_PRINT("lock", ("mutex lock 0x%lx %u", (ulong)(M), __LINE__)); \ - ___pagecache_pthread_mutex_lock(M);} -#define pagecache_pthread_mutex_unlock(M) \ -{ DBUG_PRINT("lock", ("mutex unlock 0x%lx %u", (ulong)(M), __LINE__)); \ - ___pagecache_pthread_mutex_unlock(M);} -#define pagecache_pthread_cond_signal(M) \ -{ DBUG_PRINT("lock", ("signal 0x%lx %u", (ulong)(M), __LINE__)); \ - ___pagecache_pthread_cond_signal(M);} -#else -#define pagecache_pthread_mutex_lock pthread_mutex_lock -#define pagecache_pthread_mutex_unlock pthread_mutex_unlock -#define pagecache_pthread_cond_signal pthread_cond_signal -#endif /* defined(PAGECACHE_DEBUG) */ - -extern my_bool translog_flush(LSN lsn); - -/* - Write page to the disk - - SYNOPSIS - pagecache_fwrite() - pagecache - page cache pointer - filedesc - pagecache file descriptor structure - buffer - buffer which we will write - type - page type (plain or with LSN) - flags - MYF() flags - - RETURN - 0 - OK - !=0 - Error -*/ - -static uint pagecache_fwrite(PAGECACHE *pagecache, - PAGECACHE_FILE *filedesc, - byte *buffer, - pgcache_page_no_t pageno, - enum pagecache_page_type type, - myf flags) -{ - DBUG_ENTER("pagecache_fwrite"); - if (type == PAGECACHE_LSN_PAGE) - { - LSN lsn; - DBUG_PRINT("info", ("Log handler call")); - /* TODO: integrate with page format */ -#define PAGE_LSN_OFFSET 0 - lsn= lsn_korr(buffer + PAGE_LSN_OFFSET); - /* - check CONTROL_FILE_IMPOSSIBLE_FILENO & - CONTROL_FILE_IMPOSSIBLE_LOG_OFFSET - */ - DBUG_ASSERT(lsn != 0); - translog_flush(lsn); - } - DBUG_RETURN(my_pwrite(filedesc->file, buffer, pagecache->block_size, - (pageno)<<(pagecache->shift), flags)); -} - - -/* - Read page from the disk - - SYNOPSIS - pagecache_fread() - pagecache - page cache pointer - filedesc - pagecache file descriptor structure - buffer - buffer in which we will read - pageno - page number - flags - MYF() flags -*/ -#define pagecache_fread(pagecache, filedesc, buffer, pageno, flags) \ - my_pread((filedesc)->file, buffer, pagecache->block_size, \ - (pageno)<<(pagecache->shift), flags) - - -/* - next_power(value) is 2 at the power of (1+floor(log2(value))); - e.g. next_power(2)=4, next_power(3)=4. -*/ -static inline uint next_power(uint value) -{ - return (uint) my_round_up_to_next_power((uint32) value) << 1; -} - - -/* - Initialize a page cache - - SYNOPSIS - init_pagecache() - pagecache pointer to a page cache data structure - key_cache_block_size size of blocks to keep cached data - use_mem total memory to use for the key cache - division_limit division limit (may be zero) - age_threshold age threshold (may be zero) - block_size size of block (should be power of 2) - - RETURN VALUE - number of blocks in the key cache, if successful, - 0 - otherwise. - - NOTES. - if pagecache->inited != 0 we assume that the key cache - is already initialized. This is for now used by myisamchk, but shouldn't - be something that a program should rely on! - - It's assumed that no two threads call this function simultaneously - referring to the same key cache handle. - -*/ - -int init_pagecache(PAGECACHE *pagecache, my_size_t use_mem, - uint division_limit, uint age_threshold, - uint block_size) -{ - uint blocks, hash_links, length; - int error; - DBUG_ENTER("init_pagecache"); - DBUG_ASSERT(block_size >= 512); - - PAGECACHE_DEBUG_OPEN; - if (pagecache->inited && pagecache->disk_blocks > 0) - { - DBUG_PRINT("warning",("key cache already in use")); - DBUG_RETURN(0); - } - - pagecache->global_cache_w_requests= pagecache->global_cache_r_requests= 0; - pagecache->global_cache_read= pagecache->global_cache_write= 0; - pagecache->disk_blocks= -1; - if (! pagecache->inited) - { - pagecache->inited= 1; - pagecache->in_init= 0; - pthread_mutex_init(&pagecache->cache_lock, MY_MUTEX_INIT_FAST); - pagecache->resize_queue.last_thread= NULL; - } - - pagecache->mem_size= use_mem; - pagecache->block_size= block_size; - pagecache->shift= my_bit_log2(block_size); - DBUG_PRINT("info", ("block_size: %u", - block_size)); - DBUG_ASSERT(((uint)(1 << pagecache->shift)) == block_size); - - blocks= (int) (use_mem / (sizeof(PAGECACHE_BLOCK_LINK) + - 2 * sizeof(PAGECACHE_HASH_LINK) + - sizeof(PAGECACHE_HASH_LINK*) * - 5/4 + block_size)); - /* It doesn't make sense to have too few blocks (less than 8) */ - if (blocks >= 8 && pagecache->disk_blocks < 0) - { - for ( ; ; ) - { - /* Set my_hash_entries to the next bigger 2 power */ - if ((pagecache->hash_entries= next_power(blocks)) < - (blocks) * 5/4) - pagecache->hash_entries<<= 1; - hash_links= 2 * blocks; -#if defined(MAX_THREADS) - if (hash_links < MAX_THREADS + blocks - 1) - hash_links= MAX_THREADS + blocks - 1; -#endif - while ((length= (ALIGN_SIZE(blocks * sizeof(PAGECACHE_BLOCK_LINK)) + - ALIGN_SIZE(hash_links * sizeof(PAGECACHE_HASH_LINK)) + - ALIGN_SIZE(sizeof(PAGECACHE_HASH_LINK*) * - pagecache->hash_entries))) + - (((ulong) blocks) << pagecache->shift) > use_mem) - blocks--; - /* Allocate memory for cache page buffers */ - if ((pagecache->block_mem= - my_large_malloc((ulong) blocks * pagecache->block_size, - MYF(MY_WME)))) - { - /* - Allocate memory for blocks, hash_links and hash entries; - For each block 2 hash links are allocated - */ - if ((pagecache->block_root= - (PAGECACHE_BLOCK_LINK*) my_malloc((uint) length, - MYF(0)))) - break; - my_large_free(pagecache->block_mem, MYF(0)); - pagecache->block_mem= 0; - } - if (blocks < 8) - { - my_errno= ENOMEM; - goto err; - } - blocks= blocks / 4*3; - } - pagecache->blocks_unused= (ulong) blocks; - pagecache->disk_blocks= (int) blocks; - pagecache->hash_links= hash_links; - pagecache->hash_root= - (PAGECACHE_HASH_LINK**) ((char*) pagecache->block_root + - ALIGN_SIZE(blocks*sizeof(PAGECACHE_BLOCK_LINK))); - pagecache->hash_link_root= - (PAGECACHE_HASH_LINK*) ((char*) pagecache->hash_root + - ALIGN_SIZE((sizeof(PAGECACHE_HASH_LINK*) * - pagecache->hash_entries))); - bzero((byte*) pagecache->block_root, - pagecache->disk_blocks * sizeof(PAGECACHE_BLOCK_LINK)); - bzero((byte*) pagecache->hash_root, - pagecache->hash_entries * sizeof(PAGECACHE_HASH_LINK*)); - bzero((byte*) pagecache->hash_link_root, - pagecache->hash_links * sizeof(PAGECACHE_HASH_LINK)); - pagecache->hash_links_used= 0; - pagecache->free_hash_list= NULL; - pagecache->blocks_used= pagecache->blocks_changed= 0; - - pagecache->global_blocks_changed= 0; - pagecache->blocks_available=0; /* For debugging */ - - /* The LRU chain is empty after initialization */ - pagecache->used_last= NULL; - pagecache->used_ins= NULL; - pagecache->free_block_list= NULL; - pagecache->time= 0; - pagecache->warm_blocks= 0; - pagecache->min_warm_blocks= (division_limit ? - blocks * division_limit / 100 + 1 : - blocks); - pagecache->age_threshold= (age_threshold ? - blocks * age_threshold / 100 : - blocks); - - pagecache->cnt_for_resize_op= 0; - pagecache->resize_in_flush= 0; - pagecache->can_be_used= 1; - - pagecache->waiting_for_hash_link.last_thread= NULL; - pagecache->waiting_for_block.last_thread= NULL; - DBUG_PRINT("exit", - ("disk_blocks: %d block_root: 0x%lx hash_entries: %d\ - hash_root: 0x%lx hash_links: %d hash_link_root: 0x%lx", - pagecache->disk_blocks, (long) pagecache->block_root, - pagecache->hash_entries, (long) pagecache->hash_root, - pagecache->hash_links, (long) pagecache->hash_link_root)); - bzero((gptr) pagecache->changed_blocks, - sizeof(pagecache->changed_blocks[0]) * - PAGECACHE_CHANGED_BLOCKS_HASH); - bzero((gptr) pagecache->file_blocks, - sizeof(pagecache->file_blocks[0]) * - PAGECACHE_CHANGED_BLOCKS_HASH); - } - - pagecache->blocks= pagecache->disk_blocks > 0 ? pagecache->disk_blocks : 0; - DBUG_RETURN((uint) pagecache->blocks); - -err: - error= my_errno; - pagecache->disk_blocks= 0; - pagecache->blocks= 0; - if (pagecache->block_mem) - { - my_large_free((gptr) pagecache->block_mem, MYF(0)); - pagecache->block_mem= NULL; - } - if (pagecache->block_root) - { - my_free((gptr) pagecache->block_root, MYF(0)); - pagecache->block_root= NULL; - } - my_errno= error; - pagecache->can_be_used= 0; - DBUG_RETURN(0); -} - - -/* - Flush all blocks in the key cache to disk -*/ - -#ifdef NOT_USED -static int flush_all_key_blocks(PAGECACHE *pagecache) -{ -#if defined(PAGECACHE_DEBUG) - uint cnt=0; -#endif - while (pagecache->blocks_changed > 0) - { - PAGECACHE_BLOCK_LINK *block; - for (block= pagecache->used_last->next_used ; ; block=block->next_used) - { - if (block->hash_link) - { -#if defined(PAGECACHE_DEBUG) - cnt++; - KEYCACHE_DBUG_ASSERT(cnt <= pagecache->blocks_used); -#endif - if (flush_pagecache_blocks_int(pagecache, &block->hash_link->file, - FLUSH_RELEASE)) - return 1; - break; - } - if (block == pagecache->used_last) - break; - } - } - return 0; -} -#endif /* NOT_USED */ - -/* - Resize a key cache - - SYNOPSIS - resize_pagecache() - pagecache pointer to a page cache data structure - use_mem total memory to use for the new key cache - division_limit new division limit (if not zero) - age_threshold new age threshold (if not zero) - - RETURN VALUE - number of blocks in the key cache, if successful, - 0 - otherwise. - - NOTES. - The function first compares the memory size parameter - with the key cache value. - - If they differ the function free the the memory allocated for the - old key cache blocks by calling the end_pagecache function and - then rebuilds the key cache with new blocks by calling - init_key_cache. - - The function starts the operation only when all other threads - performing operations with the key cache let her to proceed - (when cnt_for_resize=0). - - Before being usable, this function needs: - - to receive fixes for BUG#17332 "changing key_buffer_size on a running - server can crash under load" similar to those done to the key cache - - to have us (Sanja) look at the additional constraints placed on - resizing, due to the page locking specific to this page cache. - So we disable it for now. -*/ -#if NOT_USED /* keep disabled until code is fixed see above !! */ -int resize_pagecache(PAGECACHE *pagecache, - my_size_t use_mem, uint division_limit, - uint age_threshold) -{ - int blocks; -#ifdef THREAD - struct st_my_thread_var *thread; - WQUEUE *wqueue; - -#endif - DBUG_ENTER("resize_pagecache"); - - if (!pagecache->inited) - DBUG_RETURN(pagecache->disk_blocks); - - if(use_mem == pagecache->mem_size) - { - change_pagecache_param(pagecache, division_limit, age_threshold); - DBUG_RETURN(pagecache->disk_blocks); - } - - pagecache_pthread_mutex_lock(&pagecache->cache_lock); - -#ifdef THREAD - wqueue= &pagecache->resize_queue; - thread= my_thread_var; - wqueue_link_into_queue(wqueue, thread); - - while (wqueue->last_thread->next != thread) - { - pagecache_pthread_cond_wait(&thread->suspend, &pagecache->cache_lock); - } -#endif - - pagecache->resize_in_flush= 1; - if (flush_all_key_blocks(pagecache)) - { - /* TODO: if this happens, we should write a warning in the log file ! */ - pagecache->resize_in_flush= 0; - blocks= 0; - pagecache->can_be_used= 0; - goto finish; - } - pagecache->resize_in_flush= 0; - pagecache->can_be_used= 0; -#ifdef THREAD - while (pagecache->cnt_for_resize_op) - { - KEYCACHE_DBUG_PRINT("resize_pagecache: wait", - ("suspend thread %ld", thread->id)); - pagecache_pthread_cond_wait(&thread->suspend, &pagecache->cache_lock); - } -#else - KEYCACHE_DBUG_ASSERT(pagecache->cnt_for_resize_op == 0); -#endif - - end_pagecache(pagecache, 0); /* Don't free mutex */ - /* The following will work even if use_mem is 0 */ - blocks= init_pagecache(pagecache, pagecache->block_size, use_mem, - division_limit, age_threshold); - -finish: -#ifdef THREAD - wqueue_unlink_from_queue(wqueue, thread); - /* Signal for the next resize request to proceeed if any */ - if (wqueue->last_thread) - { - KEYCACHE_DBUG_PRINT("resize_pagecache: signal", - ("thread %ld", wqueue->last_thread->next->id)); - pagecache_pthread_cond_signal(&wqueue->last_thread->next->suspend); - } -#endif - pagecache_pthread_mutex_unlock(&pagecache->cache_lock); - DBUG_RETURN(blocks); -} -#endif /* 0 */ - - -/* - Increment counter blocking resize key cache operation -*/ -static inline void inc_counter_for_resize_op(PAGECACHE *pagecache) -{ - pagecache->cnt_for_resize_op++; -} - - -/* - Decrement counter blocking resize key cache operation; - Signal the operation to proceed when counter becomes equal zero -*/ -static inline void dec_counter_for_resize_op(PAGECACHE *pagecache) -{ -#ifdef THREAD - struct st_my_thread_var *last_thread; - if (!--pagecache->cnt_for_resize_op && - (last_thread= pagecache->resize_queue.last_thread)) - { - KEYCACHE_DBUG_PRINT("dec_counter_for_resize_op: signal", - ("thread %ld", last_thread->next->id)); - pagecache_pthread_cond_signal(&last_thread->next->suspend); - } -#else - pagecache->cnt_for_resize_op--; -#endif -} - -/* - Change the page cache parameters - - SYNOPSIS - change_pagecache_param() - pagecache pointer to a page cache data structure - division_limit new division limit (if not zero) - age_threshold new age threshold (if not zero) - - RETURN VALUE - none - - NOTES. - Presently the function resets the key cache parameters - concerning midpoint insertion strategy - division_limit and - age_threshold. -*/ - -void change_pagecache_param(PAGECACHE *pagecache, uint division_limit, - uint age_threshold) -{ - DBUG_ENTER("change_pagecache_param"); - - pagecache_pthread_mutex_lock(&pagecache->cache_lock); - if (division_limit) - pagecache->min_warm_blocks= (pagecache->disk_blocks * - division_limit / 100 + 1); - if (age_threshold) - pagecache->age_threshold= (pagecache->disk_blocks * - age_threshold / 100); - pagecache_pthread_mutex_unlock(&pagecache->cache_lock); - DBUG_VOID_RETURN; -} - - -/* - Removes page cache from memory. Does NOT flush pages to disk. - - SYNOPSIS - end_pagecache() - pagecache page cache handle - cleanup Complete free (Free also mutex for key cache) - - RETURN VALUE - none -*/ - -void end_pagecache(PAGECACHE *pagecache, my_bool cleanup) -{ - DBUG_ENTER("end_pagecache"); - DBUG_PRINT("enter", ("key_cache: 0x%lx", (long) pagecache)); - - if (!pagecache->inited) - DBUG_VOID_RETURN; - - if (pagecache->disk_blocks > 0) - { - if (pagecache->block_mem) - { - my_large_free((gptr) pagecache->block_mem, MYF(0)); - pagecache->block_mem= NULL; - my_free((gptr) pagecache->block_root, MYF(0)); - pagecache->block_root= NULL; - } - pagecache->disk_blocks= -1; - /* Reset blocks_changed to be safe if flush_all_key_blocks is called */ - pagecache->blocks_changed= 0; - } - - DBUG_PRINT("status", ("used: %lu changed: %lu w_requests: %lu " - "writes: %lu r_requests: %lu reads: %lu", - pagecache->blocks_used, pagecache->global_blocks_changed, - (ulong) pagecache->global_cache_w_requests, - (ulong) pagecache->global_cache_write, - (ulong) pagecache->global_cache_r_requests, - (ulong) pagecache->global_cache_read)); - - if (cleanup) - { - pthread_mutex_destroy(&pagecache->cache_lock); - pagecache->inited= pagecache->can_be_used= 0; - PAGECACHE_DEBUG_CLOSE; - } - DBUG_VOID_RETURN; -} /* end_pagecache */ - - -/* - Unlink a block from the chain of dirty/clean blocks -*/ - -static inline void unlink_changed(PAGECACHE_BLOCK_LINK *block) -{ - if (block->next_changed) - block->next_changed->prev_changed= block->prev_changed; - *block->prev_changed= block->next_changed; -} - - -/* - Link a block into the chain of dirty/clean blocks -*/ - -static inline void link_changed(PAGECACHE_BLOCK_LINK *block, - PAGECACHE_BLOCK_LINK **phead) -{ - block->prev_changed= phead; - if ((block->next_changed= *phead)) - (*phead)->prev_changed= &block->next_changed; - *phead= block; -} - - -/* - Unlink a block from the chain of dirty/clean blocks, if it's asked for, - and link it to the chain of clean blocks for the specified file -*/ - -static void link_to_file_list(PAGECACHE *pagecache, - PAGECACHE_BLOCK_LINK *block, - PAGECACHE_FILE *file, my_bool unlink) -{ - if (unlink) - unlink_changed(block); - link_changed(block, &pagecache->file_blocks[FILE_HASH(*file)]); - if (block->status & BLOCK_CHANGED) - { - block->status&= ~BLOCK_CHANGED; - block->rec_lsn= 0; - pagecache->blocks_changed--; - pagecache->global_blocks_changed--; - } -} - - -/* - Unlink a block from the chain of clean blocks for the specified - file and link it to the chain of dirty blocks for this file -*/ - -static inline void link_to_changed_list(PAGECACHE *pagecache, - PAGECACHE_BLOCK_LINK *block) -{ - unlink_changed(block); - link_changed(block, - &pagecache->changed_blocks[FILE_HASH(block->hash_link->file)]); - block->status|=BLOCK_CHANGED; - pagecache->blocks_changed++; - pagecache->global_blocks_changed++; -} - - -/* - Link a block to the LRU chain at the beginning or at the end of - one of two parts. - - SYNOPSIS - link_block() - pagecache pointer to a page cache data structure - block pointer to the block to link to the LRU chain - hot <-> to link the block into the hot subchain - at_end <-> to link the block at the end of the subchain - - RETURN VALUE - none - - NOTES. - The LRU chain is represented by a curcular list of block structures. - The list is double-linked of the type (**prev,*next) type. - The LRU chain is divided into two parts - hot and warm. - There are two pointers to access the last blocks of these two - parts. The beginning of the warm part follows right after the - end of the hot part. - Only blocks of the warm part can be used for replacement. - The first block from the beginning of this subchain is always - taken for eviction (pagecache->last_used->next) - - LRU chain: +------+ H O T +------+ - +----| end |----...<----| beg |----+ - | +------+last +------+ | - v<-link in latest hot (new end) | - | link in latest warm (new end)->^ - | +------+ W A R M +------+ | - +----| beg |---->...----| end |----+ - +------+ +------+ins - first for eviction -*/ - -static void link_block(PAGECACHE *pagecache, PAGECACHE_BLOCK_LINK *block, - my_bool hot, my_bool at_end) -{ - PAGECACHE_BLOCK_LINK *ins; - PAGECACHE_BLOCK_LINK **ptr_ins; - - BLOCK_INFO(block); - KEYCACHE_DBUG_ASSERT(! (block->hash_link && block->hash_link->requests)); -#ifdef THREAD - if (!hot && pagecache->waiting_for_block.last_thread) - { - /* Signal that in the LRU warm sub-chain an available block has appeared */ - struct st_my_thread_var *last_thread= - pagecache->waiting_for_block.last_thread; - struct st_my_thread_var *first_thread= last_thread->next; - struct st_my_thread_var *next_thread= first_thread; - PAGECACHE_HASH_LINK *hash_link= - (PAGECACHE_HASH_LINK *) first_thread->opt_info; - struct st_my_thread_var *thread; - do - { - thread= next_thread; - next_thread= thread->next; - /* - We notify about the event all threads that ask - for the same page as the first thread in the queue - */ - if ((PAGECACHE_HASH_LINK *) thread->opt_info == hash_link) - { - KEYCACHE_DBUG_PRINT("link_block: signal", ("thread %ld", thread->id)); - pagecache_pthread_cond_signal(&thread->suspend); - wqueue_unlink_from_queue(&pagecache->waiting_for_block, thread); - block->requests++; - } - } - while (thread != last_thread); - hash_link->block= block; - KEYCACHE_THREAD_TRACE("link_block: after signaling"); -#if defined(PAGECACHE_DEBUG) - KEYCACHE_DBUG_PRINT("link_block", - ("linked,unlinked block %u status=%x #requests=%u #available=%u", - BLOCK_NUMBER(pagecache, block), block->status, - block->requests, pagecache->blocks_available)); -#endif - return; - } -#else /* THREAD */ - KEYCACHE_DBUG_ASSERT(! (!hot && pagecache->waiting_for_block.last_thread)); - /* Condition not transformed using DeMorgan, to keep the text identical */ -#endif /* THREAD */ - ptr_ins= hot ? &pagecache->used_ins : &pagecache->used_last; - ins= *ptr_ins; - if (ins) - { - ins->next_used->prev_used= &block->next_used; - block->next_used= ins->next_used; - block->prev_used= &ins->next_used; - ins->next_used= block; - if (at_end) - *ptr_ins= block; - } - else - { - /* The LRU chain is empty */ - pagecache->used_last= pagecache->used_ins= block->next_used= block; - block->prev_used= &block->next_used; - } - KEYCACHE_THREAD_TRACE("link_block"); -#if defined(PAGECACHE_DEBUG) - pagecache->blocks_available++; - KEYCACHE_DBUG_PRINT("link_block", - ("linked block %u:%1u status=%x #requests=%u #available=%u", - BLOCK_NUMBER(pagecache, block), at_end, block->status, - block->requests, pagecache->blocks_available)); - KEYCACHE_DBUG_ASSERT((ulong) pagecache->blocks_available <= - pagecache->blocks_used); -#endif -} - - -/* - Unlink a block from the LRU chain - - SYNOPSIS - unlink_block() - pagecache pointer to a page cache data structure - block pointer to the block to unlink from the LRU chain - - RETURN VALUE - none - - NOTES. - See NOTES for link_block -*/ - -static void unlink_block(PAGECACHE *pagecache, PAGECACHE_BLOCK_LINK *block) -{ - DBUG_ENTER("unlink_block"); - DBUG_PRINT("unlink_block", ("unlink 0x%lx", (ulong)block)); - if (block->next_used == block) - /* The list contains only one member */ - pagecache->used_last= pagecache->used_ins= NULL; - else - { - block->next_used->prev_used= block->prev_used; - *block->prev_used= block->next_used; - if (pagecache->used_last == block) - pagecache->used_last= STRUCT_PTR(PAGECACHE_BLOCK_LINK, - next_used, block->prev_used); - if (pagecache->used_ins == block) - pagecache->used_ins= STRUCT_PTR(PAGECACHE_BLOCK_LINK, - next_used, block->prev_used); - } - block->next_used= NULL; - - KEYCACHE_THREAD_TRACE("unlink_block"); -#if defined(PAGECACHE_DEBUG) - KEYCACHE_DBUG_ASSERT(pagecache->blocks_available != 0); - pagecache->blocks_available--; - KEYCACHE_DBUG_PRINT("unlink_block", - ("unlinked block 0x%lx (%u) status=%x #requests=%u #available=%u", - (ulong)block, BLOCK_NUMBER(pagecache, block), block->status, - block->requests, pagecache->blocks_available)); - BLOCK_INFO(block); -#endif - DBUG_VOID_RETURN; -} - - -/* - Register requests for a block - - SYNOPSIS - reg_requests() - pagecache this page cache reference - block the block we request reference - count how many requests we register (it is 1 everywhere) - - NOTE - Registration of request means we are going to use this block so we exclude - it from the LRU if it is first request -*/ -static void reg_requests(PAGECACHE *pagecache, PAGECACHE_BLOCK_LINK *block, - int count) -{ - DBUG_ENTER("reg_requests"); - DBUG_PRINT("enter", ("block 0x%lx (%u) status=%x, reqs: %u", - (ulong)block, BLOCK_NUMBER(pagecache, block), - block->status, block->requests)); - BLOCK_INFO(block); - if (! block->requests) - /* First request for the block unlinks it */ - unlink_block(pagecache, block); - block->requests+= count; - DBUG_VOID_RETURN; -} - - -/* - Unregister request for a block - linking it to the LRU chain if it's the last request - - SYNOPSIS - unreg_request() - pagecache pointer to a page cache data structure - block pointer to the block to link to the LRU chain - at_end <-> to link the block at the end of the LRU chain - - RETURN VALUE - none - - NOTES. - Every linking to the LRU chain decrements by one a special block - counter (if it's positive). If the at_end parameter is TRUE the block is - added either at the end of warm sub-chain or at the end of hot sub-chain. - It is added to the hot subchain if its counter is zero and number of - blocks in warm sub-chain is not less than some low limit (determined by - the division_limit parameter). Otherwise the block is added to the warm - sub-chain. If the at_end parameter is FALSE the block is always added - at beginning of the warm sub-chain. - Thus a warm block can be promoted to the hot sub-chain when its counter - becomes zero for the first time. - At the same time the block at the very beginning of the hot subchain - might be moved to the beginning of the warm subchain if it stays untouched - for a too long time (this time is determined by parameter age_threshold). -*/ - -static void unreg_request(PAGECACHE *pagecache, - PAGECACHE_BLOCK_LINK *block, int at_end) -{ - DBUG_ENTER("unreg_request"); - DBUG_PRINT("enter", ("block 0x%lx (%u) status=%x, reqs: %u", - (ulong)block, BLOCK_NUMBER(pagecache, block), - block->status, block->requests)); - BLOCK_INFO(block); - DBUG_ASSERT(block->requests > 0); - if (! --block->requests) - { - my_bool hot; - if (block->hits_left) - block->hits_left--; - hot= !block->hits_left && at_end && - pagecache->warm_blocks > pagecache->min_warm_blocks; - if (hot) - { - if (block->temperature == BLOCK_WARM) - pagecache->warm_blocks--; - block->temperature= BLOCK_HOT; - KEYCACHE_DBUG_PRINT("unreg_request", ("#warm_blocks: %lu", - pagecache->warm_blocks)); - } - link_block(pagecache, block, hot, (my_bool)at_end); - block->last_hit_time= pagecache->time; - pagecache->time++; - - block= pagecache->used_ins; - /* Check if we should link a hot block to the warm block */ - if (block && pagecache->time - block->last_hit_time > - pagecache->age_threshold) - { - unlink_block(pagecache, block); - link_block(pagecache, block, 0, 0); - if (block->temperature != BLOCK_WARM) - { - pagecache->warm_blocks++; - block->temperature= BLOCK_WARM; - } - KEYCACHE_DBUG_PRINT("unreg_request", ("#warm_blocks: %lu", - pagecache->warm_blocks)); - } - } - DBUG_VOID_RETURN; -} - -/* - Remove a reader of the page in block -*/ - -static inline void remove_reader(PAGECACHE_BLOCK_LINK *block) -{ - DBUG_ENTER("remove_reader"); - BLOCK_INFO(block); - DBUG_ASSERT(block->hash_link->requests > 0); -#ifdef THREAD - if (! --block->hash_link->requests && block->condvar) - pagecache_pthread_cond_signal(block->condvar); -#else - --block->hash_link->requests; -#endif - DBUG_VOID_RETURN; -} - - -/* - Wait until the last reader of the page in block - signals on its termination -*/ - -static inline void wait_for_readers(PAGECACHE *pagecache - __attribute__((unused)), - PAGECACHE_BLOCK_LINK *block) -{ -#ifdef THREAD - struct st_my_thread_var *thread= my_thread_var; - while (block->hash_link->requests) - { - KEYCACHE_DBUG_PRINT("wait_for_readers: wait", - ("suspend thread %ld block %u", - thread->id, BLOCK_NUMBER(pagecache, block))); - block->condvar= &thread->suspend; - pagecache_pthread_cond_wait(&thread->suspend, &pagecache->cache_lock); - block->condvar= NULL; - } -#else - KEYCACHE_DBUG_ASSERT(block->hash_link->requests == 0); -#endif -} - - -/* - Add a hash link to a bucket in the hash_table -*/ - -static inline void link_hash(PAGECACHE_HASH_LINK **start, - PAGECACHE_HASH_LINK *hash_link) -{ - if (*start) - (*start)->prev= &hash_link->next; - hash_link->next= *start; - hash_link->prev= start; - *start= hash_link; -} - - -/* - Remove a hash link from the hash table -*/ - -static void unlink_hash(PAGECACHE *pagecache, PAGECACHE_HASH_LINK *hash_link) -{ - KEYCACHE_DBUG_PRINT("unlink_hash", ("fd: %u pos_ %lu #requests=%u", - (uint) hash_link->file.file, (ulong) hash_link->pageno, - hash_link->requests)); - KEYCACHE_DBUG_ASSERT(hash_link->requests == 0); - if ((*hash_link->prev= hash_link->next)) - hash_link->next->prev= hash_link->prev; - hash_link->block= NULL; -#ifdef THREAD - if (pagecache->waiting_for_hash_link.last_thread) - { - /* Signal that a free hash link has appeared */ - struct st_my_thread_var *last_thread= - pagecache->waiting_for_hash_link.last_thread; - struct st_my_thread_var *first_thread= last_thread->next; - struct st_my_thread_var *next_thread= first_thread; - PAGECACHE_PAGE *first_page= (PAGECACHE_PAGE *) (first_thread->opt_info); - struct st_my_thread_var *thread; - - hash_link->file= first_page->file; - hash_link->pageno= first_page->pageno; - do - { - PAGECACHE_PAGE *page; - thread= next_thread; - page= (PAGECACHE_PAGE *) thread->opt_info; - next_thread= thread->next; - /* - We notify about the event all threads that ask - for the same page as the first thread in the queue - */ - if (page->file.file == hash_link->file.file && - page->pageno == hash_link->pageno) - { - KEYCACHE_DBUG_PRINT("unlink_hash: signal", ("thread %ld", thread->id)); - pagecache_pthread_cond_signal(&thread->suspend); - wqueue_unlink_from_queue(&pagecache->waiting_for_hash_link, thread); - } - } - while (thread != last_thread); - link_hash(&pagecache->hash_root[PAGECACHE_HASH(pagecache, - hash_link->file, - hash_link->pageno)], - hash_link); - return; - } -#else /* THREAD */ - KEYCACHE_DBUG_ASSERT(! (pagecache->waiting_for_hash_link.last_thread)); -#endif /* THREAD */ - hash_link->next= pagecache->free_hash_list; - pagecache->free_hash_list= hash_link; -} - - -/* - Get the hash link for the page if it is in the cache (do not put the - page in the cache if it is absent there) - - SYNOPSIS - get_present_hash_link() - pagecache Pagecache reference - file file ID - pageno page number in the file - start where to put pointer to found hash bucket (for - direct referring it) - - RETURN - found hashlink pointer -*/ - -static PAGECACHE_HASH_LINK *get_present_hash_link(PAGECACHE *pagecache, - PAGECACHE_FILE *file, - pgcache_page_no_t pageno, - PAGECACHE_HASH_LINK ***start) -{ - reg1 PAGECACHE_HASH_LINK *hash_link; -#if defined(PAGECACHE_DEBUG) - int cnt; -#endif - DBUG_ENTER("get_present_hash_link"); - - KEYCACHE_DBUG_PRINT("get_present_hash_link", ("fd: %u pos: %lu", - (uint) file->file, (ulong) pageno)); - - /* - Find the bucket in the hash table for the pair (file, pageno); - start contains the head of the bucket list, - hash_link points to the first member of the list - */ - hash_link= *(*start= &pagecache->hash_root[PAGECACHE_HASH(pagecache, - *file, pageno)]); -#if defined(PAGECACHE_DEBUG) - cnt= 0; -#endif - /* Look for an element for the pair (file, pageno) in the bucket chain */ - while (hash_link && - (hash_link->pageno != pageno || - hash_link->file.file != file->file)) - { - hash_link= hash_link->next; -#if defined(PAGECACHE_DEBUG) - cnt++; - if (! (cnt <= pagecache->hash_links_used)) - { - int i; - for (i=0, hash_link= **start ; - i < cnt ; i++, hash_link= hash_link->next) - { - KEYCACHE_DBUG_PRINT("get_present_hash_link", ("fd: %u pos: %lu", - (uint) hash_link->file.file, (ulong) hash_link->pageno)); - } - } - KEYCACHE_DBUG_ASSERT(cnt <= pagecache->hash_links_used); -#endif - } - if (hash_link) - { - /* Register the request for the page */ - hash_link->requests++; - } - - DBUG_RETURN(hash_link); -} - - -/* - Get the hash link for a page -*/ - -static PAGECACHE_HASH_LINK *get_hash_link(PAGECACHE *pagecache, - PAGECACHE_FILE *file, - pgcache_page_no_t pageno) -{ - reg1 PAGECACHE_HASH_LINK *hash_link; - PAGECACHE_HASH_LINK **start; - - KEYCACHE_DBUG_PRINT("get_hash_link", ("fd: %u pos: %lu", - (uint) file->file, (ulong) pageno)); - -restart: - /* try to find the page in the cache */ - hash_link= get_present_hash_link(pagecache, file, pageno, - &start); - if (!hash_link) - { - /* There is no hash link in the hash table for the pair (file, pageno) */ - if (pagecache->free_hash_list) - { - hash_link= pagecache->free_hash_list; - pagecache->free_hash_list= hash_link->next; - } - else if (pagecache->hash_links_used < pagecache->hash_links) - { - hash_link= &pagecache->hash_link_root[pagecache->hash_links_used++]; - } - else - { -#ifdef THREAD - /* Wait for a free hash link */ - struct st_my_thread_var *thread= my_thread_var; - PAGECACHE_PAGE page; - KEYCACHE_DBUG_PRINT("get_hash_link", ("waiting")); - page.file= *file; - page.pageno= pageno; - thread->opt_info= (void *) &page; - wqueue_link_into_queue(&pagecache->waiting_for_hash_link, thread); - KEYCACHE_DBUG_PRINT("get_hash_link: wait", - ("suspend thread %ld", thread->id)); - pagecache_pthread_cond_wait(&thread->suspend, - &pagecache->cache_lock); - thread->opt_info= NULL; -#else - KEYCACHE_DBUG_ASSERT(0); -#endif - DBUG_PRINT("info", ("restarting...")); - goto restart; - } - hash_link->file= *file; - hash_link->pageno= pageno; - link_hash(start, hash_link); - /* Register the request for the page */ - hash_link->requests++; - } - - return hash_link; -} - - -/* - Get a block for the file page requested by a pagecache read/write operation; - If the page is not in the cache return a free block, if there is none - return the lru block after saving its buffer if the page is dirty. - - SYNOPSIS - - find_block() - pagecache pointer to a page cache data structure - file handler for the file to read page from - pageno number of the page in the file - init_hits_left how initialize the block counter for the page - wrmode <-> get for writing - reg_req Register request to thye page - page_st out {PAGE_READ,PAGE_TO_BE_READ,PAGE_WAIT_TO_BE_READ} - - RETURN VALUE - Pointer to the found block if successful, 0 - otherwise - - NOTES. - For the page from file positioned at pageno the function checks whether - the page is in the key cache specified by the first parameter. - If this is the case it immediately returns the block. - If not, the function first chooses a block for this page. If there is - no not used blocks in the key cache yet, the function takes the block - at the very beginning of the warm sub-chain. It saves the page in that - block if it's dirty before returning the pointer to it. - The function returns in the page_st parameter the following values: - PAGE_READ - if page already in the block, - PAGE_TO_BE_READ - if it is to be read yet by the current thread - WAIT_TO_BE_READ - if it is to be read by another thread - If an error occurs THE BLOCK_ERROR bit is set in the block status. - It might happen that there are no blocks in LRU chain (in warm part) - - all blocks are unlinked for some read/write operations. Then the function - waits until first of this operations links any block back. -*/ - -static PAGECACHE_BLOCK_LINK *find_block(PAGECACHE *pagecache, - PAGECACHE_FILE *file, - pgcache_page_no_t pageno, - int init_hits_left, - my_bool wrmode, - my_bool reg_req, - int *page_st) -{ - PAGECACHE_HASH_LINK *hash_link; - PAGECACHE_BLOCK_LINK *block; - int error= 0; - int page_status; - - DBUG_ENTER("find_block"); - KEYCACHE_THREAD_TRACE("find_block:begin"); - DBUG_PRINT("enter", ("fd: %d pos: %lu wrmode: %d", - file->file, (ulong) pageno, wrmode)); - KEYCACHE_DBUG_PRINT("find_block", ("fd: %d pos: %lu wrmode: %d", - file->file, (ulong) pageno, - wrmode)); -#if !defined(DBUG_OFF) && defined(EXTRA_DEBUG) - DBUG_EXECUTE("check_pagecache", - test_key_cache(pagecache, "start of find_block", 0);); -#endif - -restart: - /* Find the hash link for the requested page (file, pageno) */ - hash_link= get_hash_link(pagecache, file, pageno); - - page_status= -1; - if ((block= hash_link->block) && - block->hash_link == hash_link && (block->status & BLOCK_READ)) - page_status= PAGE_READ; - - if (wrmode && pagecache->resize_in_flush) - { - /* This is a write request during the flush phase of a resize operation */ - - if (page_status != PAGE_READ) - { - /* We don't need the page in the cache: we are going to write on disk */ - DBUG_ASSERT(hash_link->requests > 0); - hash_link->requests--; - unlink_hash(pagecache, hash_link); - return 0; - } - if (!(block->status & BLOCK_IN_FLUSH)) - { - DBUG_ASSERT(hash_link->requests > 0); - hash_link->requests--; - /* - Remove block to invalidate the page in the block buffer - as we are going to write directly on disk. - Although we have an exclusive lock for the updated key part - the control can be yielded by the current thread as we might - have unfinished readers of other key parts in the block - buffer. Still we are guaranteed not to have any readers - of the key part we are writing into until the block is - removed from the cache as we set the BLOCK_REASSIGNED - flag (see the code below that handles reading requests). - */ - free_block(pagecache, block); - return 0; - } - /* Wait until the page is flushed on disk */ - DBUG_ASSERT(hash_link->requests > 0); - hash_link->requests--; - { -#ifdef THREAD - struct st_my_thread_var *thread= my_thread_var; - wqueue_add_to_queue(&block->wqueue[COND_FOR_SAVED], thread); - do - { - KEYCACHE_DBUG_PRINT("find_block: wait", - ("suspend thread %ld", thread->id)); - pagecache_pthread_cond_wait(&thread->suspend, - &pagecache->cache_lock); - } - while(thread->next); -#else - KEYCACHE_DBUG_ASSERT(0); - /* - Given the use of "resize_in_flush", it seems impossible - that this whole branch is ever entered in single-threaded case - because "(wrmode && pagecache->resize_in_flush)" cannot be true. - TODO: Check this, and then put the whole branch into the - "#ifdef THREAD" guard. - */ -#endif - } - /* Invalidate page in the block if it has not been done yet */ - if (block->status) - free_block(pagecache, block); - return 0; - } - - if (page_status == PAGE_READ && - (block->status & (BLOCK_IN_SWITCH | BLOCK_REASSIGNED))) - { - /* This is a request for a page to be removed from cache */ - - KEYCACHE_DBUG_PRINT("find_block", - ("request for old page in block %u " - "wrmode: %d block->status: %d", - BLOCK_NUMBER(pagecache, block), wrmode, - block->status)); - /* - Only reading requests can proceed until the old dirty page is flushed, - all others are to be suspended, then resubmitted - */ - if (!wrmode && !(block->status & BLOCK_REASSIGNED)) - { - if (reg_req) - reg_requests(pagecache, block, 1); - } - else - { - DBUG_ASSERT(hash_link->requests > 0); - hash_link->requests--; - KEYCACHE_DBUG_PRINT("find_block", - ("request waiting for old page to be saved")); - { -#ifdef THREAD - struct st_my_thread_var *thread= my_thread_var; - /* Put the request into the queue of those waiting for the old page */ - wqueue_add_to_queue(&block->wqueue[COND_FOR_SAVED], thread); - /* Wait until the request can be resubmitted */ - do - { - KEYCACHE_DBUG_PRINT("find_block: wait", - ("suspend thread %ld", thread->id)); - pagecache_pthread_cond_wait(&thread->suspend, - &pagecache->cache_lock); - } - while(thread->next); -#else - KEYCACHE_DBUG_ASSERT(0); - /* No parallel requests in single-threaded case */ -#endif - } - KEYCACHE_DBUG_PRINT("find_block", - ("request for old page resubmitted")); - DBUG_PRINT("info", ("restarting...")); - /* Resubmit the request */ - goto restart; - } - block->status&= ~BLOCK_IN_SWITCH; - } - else - { - /* This is a request for a new page or for a page not to be removed */ - if (! block) - { - /* No block is assigned for the page yet */ - if (pagecache->blocks_unused) - { - if (pagecache->free_block_list) - { - /* There is a block in the free list. */ - block= pagecache->free_block_list; - pagecache->free_block_list= block->next_used; - block->next_used= NULL; - } - else - { - /* There are some never used blocks, take first of them */ - block= &pagecache->block_root[pagecache->blocks_used]; - block->buffer= ADD_TO_PTR(pagecache->block_mem, - ((ulong) pagecache->blocks_used* - pagecache->block_size), - byte*); - pagecache->blocks_used++; - } - pagecache->blocks_unused--; - DBUG_ASSERT((block->status & BLOCK_WRLOCK) == 0); - DBUG_ASSERT(block->pins == 0); - block->status= 0; -#ifndef DBUG_OFF - block->type= PAGECACHE_EMPTY_PAGE; -#endif - block->requests= 1; - block->temperature= BLOCK_COLD; - block->hits_left= init_hits_left; - block->last_hit_time= 0; - link_to_file_list(pagecache, block, file, 0); - block->hash_link= hash_link; - hash_link->block= block; - page_status= PAGE_TO_BE_READ; - DBUG_PRINT("info", ("page to be read set for page 0x%lx", - (ulong)block)); - KEYCACHE_DBUG_PRINT("find_block", - ("got free or never used block %u", - BLOCK_NUMBER(pagecache, block))); - } - else - { - /* There are no never used blocks, use a block from the LRU chain */ - - /* - Wait until a new block is added to the LRU chain; - several threads might wait here for the same page, - all of them must get the same block - */ - -#ifdef THREAD - if (! pagecache->used_last) - { - struct st_my_thread_var *thread= my_thread_var; - thread->opt_info= (void *) hash_link; - wqueue_link_into_queue(&pagecache->waiting_for_block, thread); - do - { - KEYCACHE_DBUG_PRINT("find_block: wait", - ("suspend thread %ld", thread->id)); - pagecache_pthread_cond_wait(&thread->suspend, - &pagecache->cache_lock); - } - while (thread->next); - thread->opt_info= NULL; - } -#else - KEYCACHE_DBUG_ASSERT(pagecache->used_last); -#endif - block= hash_link->block; - if (! block) - { - /* - Take the first block from the LRU chain - unlinking it from the chain - */ - block= pagecache->used_last->next_used; - block->hits_left= init_hits_left; - block->last_hit_time= 0; - if (reg_req) - reg_requests(pagecache, block, 1); - hash_link->block= block; - } - BLOCK_INFO(block); - DBUG_ASSERT((block->status & BLOCK_WRLOCK) == 0); - DBUG_ASSERT(block->pins == 0); - - if (block->hash_link != hash_link && - ! (block->status & BLOCK_IN_SWITCH) ) - { - /* this is a primary request for a new page */ - DBUG_ASSERT((block->status & BLOCK_WRLOCK) == 0); - DBUG_ASSERT(block->pins == 0); - block->status|= (BLOCK_IN_SWITCH | BLOCK_WRLOCK); - - KEYCACHE_DBUG_PRINT("find_block", - ("got block %u for new page", - BLOCK_NUMBER(pagecache, block))); - - if (block->status & BLOCK_CHANGED) - { - /* The block contains a dirty page - push it out of the cache */ - - KEYCACHE_DBUG_PRINT("find_block", ("block is dirty")); - - pagecache_pthread_mutex_unlock(&pagecache->cache_lock); - /* - The call is thread safe because only the current - thread might change the block->hash_link value - */ - DBUG_ASSERT(block->pins == 0); - error= pagecache_fwrite(pagecache, - &block->hash_link->file, - block->buffer, - block->hash_link->pageno, - block->type, - MYF(MY_NABP | MY_WAIT_IF_FULL)); - pagecache_pthread_mutex_lock(&pagecache->cache_lock); - pagecache->global_cache_write++; - } - - block->status|= BLOCK_REASSIGNED; - if (block->hash_link) - { - /* - Wait until all pending read requests - for this page are executed - (we could have avoided this waiting, if we had read - a page in the cache in a sweep, without yielding control) - */ - wait_for_readers(pagecache, block); - - /* Remove the hash link for this page from the hash table */ - unlink_hash(pagecache, block->hash_link); - /* All pending requests for this page must be resubmitted */ -#ifdef THREAD - if (block->wqueue[COND_FOR_SAVED].last_thread) - wqueue_release_queue(&block->wqueue[COND_FOR_SAVED]); -#endif - } - link_to_file_list(pagecache, block, file, - (my_bool)(block->hash_link ? 1 : 0)); - BLOCK_INFO(block); - block->status= error? BLOCK_ERROR : 0; -#ifndef DBUG_OFF - block->type= PAGECACHE_EMPTY_PAGE; -#endif - block->hash_link= hash_link; - page_status= PAGE_TO_BE_READ; - DBUG_PRINT("info", ("page to be read set for page 0x%lx", - (ulong)block)); - - KEYCACHE_DBUG_ASSERT(block->hash_link->block == block); - KEYCACHE_DBUG_ASSERT(hash_link->block->hash_link == hash_link); - } - else - { - /* This is for secondary requests for a new page only */ - KEYCACHE_DBUG_PRINT("find_block", - ("block->hash_link: %p hash_link: %p " - "block->status: %u", block->hash_link, - hash_link, block->status )); - page_status= (((block->hash_link == hash_link) && - (block->status & BLOCK_READ)) ? - PAGE_READ : PAGE_WAIT_TO_BE_READ); - } - } - pagecache->global_cache_read++; - } - else - { - if (reg_req) - reg_requests(pagecache, block, 1); - KEYCACHE_DBUG_PRINT("find_block", - ("block->hash_link: %p hash_link: %p " - "block->status: %u", block->hash_link, - hash_link, block->status )); - page_status= (((block->hash_link == hash_link) && - (block->status & BLOCK_READ)) ? - PAGE_READ : PAGE_WAIT_TO_BE_READ); - } - } - - KEYCACHE_DBUG_ASSERT(page_status != -1); - *page_st= page_status; - DBUG_PRINT("info", - ("block: 0x%lx fd: %u pos %lu block->status %u page_status %u", - (ulong) block, (uint) file->file, - (ulong) pageno, block->status, (uint) page_status)); - KEYCACHE_DBUG_PRINT("find_block", - ("block: 0x%lx fd: %d pos: %lu block->status: %u page_status: %d", - (ulong) block, - file->file, (ulong) pageno, block->status, - page_status)); - -#if !defined(DBUG_OFF) && defined(EXTRA_DEBUG) - DBUG_EXECUTE("check_pagecache", - test_key_cache(pagecache, "end of find_block",0);); -#endif - KEYCACHE_THREAD_TRACE("find_block:end"); - DBUG_RETURN(block); -} - - -static void add_pin(PAGECACHE_BLOCK_LINK *block) -{ - DBUG_ENTER("add_pin"); - DBUG_PRINT("enter", ("block 0x%lx pins: %u", - (ulong) block, - block->pins)); - BLOCK_INFO(block); - block->pins++; -#ifdef PAGECACHE_DEBUG - { - PAGECACHE_PIN_INFO *info= - (PAGECACHE_PIN_INFO *)my_malloc(sizeof(PAGECACHE_PIN_INFO), MYF(0)); - info->thread= my_thread_var; - info_link(&block->pin_list, info); - } -#endif - DBUG_VOID_RETURN; -} - -static void remove_pin(PAGECACHE_BLOCK_LINK *block) -{ - DBUG_ENTER("remove_pin"); - DBUG_PRINT("enter", ("block 0x%lx pins: %u", - (ulong) block, - block->pins)); - BLOCK_INFO(block); - DBUG_ASSERT(block->pins > 0); - block->pins--; -#ifdef PAGECACHE_DEBUG - { - PAGECACHE_PIN_INFO *info= info_find(block->pin_list, my_thread_var); - DBUG_ASSERT(info != 0); - info_unlink(info); - my_free((gptr) info, MYF(0)); - } -#endif - DBUG_VOID_RETURN; -} -#ifdef PAGECACHE_DEBUG -static void info_add_lock(PAGECACHE_BLOCK_LINK *block, my_bool wl) -{ - PAGECACHE_LOCK_INFO *info= - (PAGECACHE_LOCK_INFO *)my_malloc(sizeof(PAGECACHE_LOCK_INFO), MYF(0)); - info->thread= my_thread_var; - info->write_lock= wl; - info_link((PAGECACHE_PIN_INFO **)&block->lock_list, - (PAGECACHE_PIN_INFO *)info); -} -static void info_remove_lock(PAGECACHE_BLOCK_LINK *block) -{ - PAGECACHE_LOCK_INFO *info= - (PAGECACHE_LOCK_INFO *)info_find((PAGECACHE_PIN_INFO *)block->lock_list, - my_thread_var); - DBUG_ASSERT(info != 0); - info_unlink((PAGECACHE_PIN_INFO *)info); - my_free((gptr)info, MYF(0)); -} -static void info_change_lock(PAGECACHE_BLOCK_LINK *block, my_bool wl) -{ - PAGECACHE_LOCK_INFO *info= - (PAGECACHE_LOCK_INFO *)info_find((PAGECACHE_PIN_INFO *)block->lock_list, - my_thread_var); - DBUG_ASSERT(info != 0 && info->write_lock != wl); - info->write_lock= wl; -} -#else -#define info_add_lock(B,W) -#define info_remove_lock(B) -#define info_change_lock(B,W) -#endif - -/* - Put on the block write lock - - SYNOPSIS - get_wrlock() - pagecache pointer to a page cache data structure - block the block to work with - - RETURN - 0 - OK - 1 - Can't lock this block, need retry -*/ - -static my_bool get_wrlock(PAGECACHE *pagecache, - PAGECACHE_BLOCK_LINK *block) -{ - PAGECACHE_FILE file= block->hash_link->file; - pgcache_page_no_t pageno= block->hash_link->pageno; - DBUG_ENTER("get_wrlock"); - DBUG_PRINT("info", ("the block 0x%lx " - "files %d(%d) pages %d(%d)", - (ulong)block, - file.file, block->hash_link->file.file, - pageno, block->hash_link->pageno)); - BLOCK_INFO(block); - while (block->status & BLOCK_WRLOCK) - { - /* Lock failed we will wait */ -#ifdef THREAD - struct st_my_thread_var *thread= my_thread_var; - DBUG_PRINT("info", ("fail to lock, waiting... 0x%lx", (ulong)block)); - wqueue_add_to_queue(&block->wqueue[COND_FOR_WRLOCK], thread); - dec_counter_for_resize_op(pagecache); - do - { - KEYCACHE_DBUG_PRINT("get_wrlock: wait", - ("suspend thread %ld", thread->id)); - pagecache_pthread_cond_wait(&thread->suspend, - &pagecache->cache_lock); - } - while(thread->next); -#else - DBUG_ASSERT(0); -#endif - BLOCK_INFO(block); - if ((block->status & (BLOCK_REASSIGNED | BLOCK_IN_SWITCH)) || - file.file != block->hash_link->file.file || - pageno != block->hash_link->pageno) - { - DBUG_PRINT("info", ("the block 0x%lx changed => need retry" - "status %x files %d != %d or pages %d !=%d", - (ulong)block, block->status, - file.file, block->hash_link->file.file, - pageno, block->hash_link->pageno)); - DBUG_RETURN(1); - } - } - DBUG_ASSERT(block->pins == 0); - /* we are doing it by global cache mutex protection, so it is OK */ - block->status|= BLOCK_WRLOCK; - DBUG_PRINT("info", ("WR lock set, block 0x%lx", (ulong)block)); - DBUG_RETURN(0); -} - - -/* - Remove write lock from the block - - SYNOPSIS - release_wrlock() - pagecache pointer to a page cache data structure - block the block to work with - - RETURN - 0 - OK -*/ - -static void release_wrlock(PAGECACHE_BLOCK_LINK *block) -{ - DBUG_ENTER("release_wrlock"); - BLOCK_INFO(block); - DBUG_ASSERT(block->status & BLOCK_WRLOCK); - DBUG_ASSERT(block->pins > 0); - block->status&= ~BLOCK_WRLOCK; - DBUG_PRINT("info", ("WR lock reset, block 0x%lx", (ulong)block)); -#ifdef THREAD - /* release all threads waiting for write lock */ - if (block->wqueue[COND_FOR_WRLOCK].last_thread) - wqueue_release_queue(&block->wqueue[COND_FOR_WRLOCK]); -#endif - BLOCK_INFO(block); - DBUG_VOID_RETURN; -} - - -/* - Try to lock/unlock and pin/unpin the block - - SYNOPSIS - make_lock_and_pin() - pagecache pointer to a page cache data structure - block the block to work with - lock lock change mode - pin pinchange mode - - RETURN - 0 - OK - 1 - Try to lock the block failed -*/ - -static my_bool make_lock_and_pin(PAGECACHE *pagecache, - PAGECACHE_BLOCK_LINK *block, - enum pagecache_page_lock lock, - enum pagecache_page_pin pin) -{ - DBUG_ENTER("make_lock_and_pin"); - DBUG_PRINT("enter", ("block: 0x%lx (%u), wrlock: %c pins: %u, lock %s, pin: %s", - (ulong)block, BLOCK_NUMBER(pagecache, block), - ((block->status & BLOCK_WRLOCK)?'Y':'N'), - block->pins, - page_cache_page_lock_str[lock], - page_cache_page_pin_str[pin])); - BLOCK_INFO(block); -#ifdef PAGECACHE_DEBUG - DBUG_ASSERT(info_check_pin(block, pin) == 0 && - info_check_lock(block, lock, pin) == 0); -#endif - switch (lock) - { - case PAGECACHE_LOCK_WRITE: /* free -> write */ - /* Writelock and pin the buffer */ - if (get_wrlock(pagecache, block)) - { - /* can't lock => need retry */ - goto retry; - } - - /* The cache is locked so nothing afraid of */ - add_pin(block); - info_add_lock(block, 1); - break; - case PAGECACHE_LOCK_WRITE_TO_READ: /* write -> read */ - case PAGECACHE_LOCK_WRITE_UNLOCK: /* write -> free */ - /* - Removes write lock and puts read lock (which is nothing in our - implementation) - */ - release_wrlock(block); - case PAGECACHE_LOCK_READ_UNLOCK: /* read -> free */ - case PAGECACHE_LOCK_LEFT_READLOCKED: /* read -> read */ - if (pin == PAGECACHE_UNPIN) - { - remove_pin(block); - } - if (lock == PAGECACHE_LOCK_WRITE_TO_READ) - { - info_change_lock(block, 0); - } - else if (lock == PAGECACHE_LOCK_WRITE_UNLOCK || - lock == PAGECACHE_LOCK_READ_UNLOCK) - { - info_remove_lock(block); - } - break; - case PAGECACHE_LOCK_READ: /* free -> read */ - if (pin == PAGECACHE_PIN) - { - /* The cache is locked so nothing afraid off */ - add_pin(block); - } - info_add_lock(block, 0); - break; - case PAGECACHE_LOCK_LEFT_UNLOCKED: /* free -> free */ - case PAGECACHE_LOCK_LEFT_WRITELOCKED: /* write -> write */ - break; /* do nothing */ - default: - DBUG_ASSERT(0); /* Never should happened */ - } - - BLOCK_INFO(block); - DBUG_RETURN(0); -retry: - DBUG_PRINT("INFO", ("Retry block 0x%lx", (ulong)block)); - BLOCK_INFO(block); - DBUG_ASSERT(block->hash_link->requests > 0); - block->hash_link->requests--; - DBUG_ASSERT(block->requests > 0); - unreg_request(pagecache, block, 1); - BLOCK_INFO(block); - DBUG_RETURN(1); - -} - - -/* - Read into a key cache block buffer from disk. - - SYNOPSIS - - read_block() - pagecache pointer to a page cache data structure - block block to which buffer the data is to be read - primary <-> the current thread will read the data - validator validator of read from the disk data - validator_data pointer to the data need by the validator - - RETURN VALUE - None - - NOTES. - The function either reads a page data from file to the block buffer, - or waits until another thread reads it. What page to read is determined - by a block parameter - reference to a hash link for this page. - If an error occurs THE BLOCK_ERROR bit is set in the block status. -*/ - -static void read_block(PAGECACHE *pagecache, - PAGECACHE_BLOCK_LINK *block, - my_bool primary, - pagecache_disk_read_validator validator, - gptr validator_data) -{ - uint got_length; - - /* On entry cache_lock is locked */ - - DBUG_ENTER("read_block"); - if (primary) - { - /* - This code is executed only by threads - that submitted primary requests - */ - - DBUG_PRINT("read_block", - ("page to be read by primary request")); - - /* Page is not in buffer yet, is to be read from disk */ - pagecache_pthread_mutex_unlock(&pagecache->cache_lock); - /* - Here other threads may step in and register as secondary readers. - They will register in block->wqueue[COND_FOR_REQUESTED]. - */ - got_length= pagecache_fread(pagecache, &block->hash_link->file, - block->buffer, - block->hash_link->pageno, MYF(0)); - pagecache_pthread_mutex_lock(&pagecache->cache_lock); - if (got_length < pagecache->block_size) - block->status|= BLOCK_ERROR; - else - block->status= (BLOCK_READ | (block->status & BLOCK_WRLOCK)); - - if (validator != NULL && - (*validator)(block->buffer, validator_data)) - block->status|= BLOCK_ERROR; - - DBUG_PRINT("read_block", - ("primary request: new page in cache")); - /* Signal that all pending requests for this page now can be processed */ -#ifdef THREAD - if (block->wqueue[COND_FOR_REQUESTED].last_thread) - wqueue_release_queue(&block->wqueue[COND_FOR_REQUESTED]); -#endif - } - else - { - /* - This code is executed only by threads - that submitted secondary requests - */ - DBUG_PRINT("read_block", - ("secondary request waiting for new page to be read")); - { -#ifdef THREAD - struct st_my_thread_var *thread= my_thread_var; - /* Put the request into a queue and wait until it can be processed */ - wqueue_add_to_queue(&block->wqueue[COND_FOR_REQUESTED], thread); - do - { - DBUG_PRINT("read_block: wait", - ("suspend thread %ld", thread->id)); - pagecache_pthread_cond_wait(&thread->suspend, - &pagecache->cache_lock); - } - while (thread->next); -#else - KEYCACHE_DBUG_ASSERT(0); - /* No parallel requests in single-threaded case */ -#endif - } - DBUG_PRINT("read_block", - ("secondary request: new page in cache")); - } - DBUG_VOID_RETURN; -} - - -/* - Unlock/unpin page and put LSN stamp if it need - - SYNOPSIS - pagecache_unlock_page() - pagecache pointer to a page cache data structure - file handler for the file for the block of data to be read - pageno number of the block of data in the file - lock lock change - pin pin page - first_REDO_LSN_for_page do not set it if it is zero - - NOTE - Pininig uses requests registration mechanism it works following way: - | beginnig | ending | - | of func. | of func. | - ----------------------------+-------------+---------------+ - PAGECACHE_PIN_LEFT_PINNED | - | - | - PAGECACHE_PIN_LEFT_UNPINNED | reg request | unreg request | - PAGECACHE_PIN | reg request | - | - PAGECACHE_UNPIN | - | unreg request | - - -*/ - -void pagecache_unlock_page(PAGECACHE *pagecache, - PAGECACHE_FILE *file, - pgcache_page_no_t pageno, - enum pagecache_page_lock lock, - enum pagecache_page_pin pin, - LSN first_REDO_LSN_for_page) -{ - PAGECACHE_BLOCK_LINK *block; - int page_st; - DBUG_ENTER("pagecache_unlock_page"); - DBUG_PRINT("enter", ("fd: %u page: %lu l%s p%s", - (uint) file->file, (ulong) pageno, - page_cache_page_lock_str[lock], - page_cache_page_pin_str[pin])); - /* we do not allow any lock/pin increasing here */ - DBUG_ASSERT(pin != PAGECACHE_PIN && - lock != PAGECACHE_LOCK_READ && - lock != PAGECACHE_LOCK_WRITE); - - pagecache_pthread_mutex_lock(&pagecache->cache_lock); - /* - As soon as we keep lock cache can be used, and we have lock because want - to unlock. - */ - DBUG_ASSERT(pagecache->can_be_used); - - inc_counter_for_resize_op(pagecache); - /* See NOTE for pagecache_unlock_page about registering requests */ - block= find_block(pagecache, file, pageno, 0, 0, - test(pin == PAGECACHE_PIN_LEFT_UNPINNED), &page_st); - BLOCK_INFO(block); - DBUG_ASSERT(block != 0 && page_st == PAGE_READ); - if (first_REDO_LSN_for_page) - { - DBUG_ASSERT(lock == PAGECACHE_LOCK_WRITE_UNLOCK && - pin == PAGECACHE_UNPIN); - set_if_bigger(block->rec_lsn, first_REDO_LSN_for_page); - } - -#ifndef DBUG_OFF - if ( -#endif - make_lock_and_pin(pagecache, block, lock, pin) -#ifndef DBUG_OFF - ) - { - DBUG_ASSERT(0); /* should not happend */ - } -#else - ; -#endif - - remove_reader(block); - /* - Link the block into the LRU chain if it's the last submitted request - for the block and block will not be pinned. - See NOTE for pagecache_unlock_page about registering requests. - */ - if (pin != PAGECACHE_PIN_LEFT_PINNED) - unreg_request(pagecache, block, 1); - - dec_counter_for_resize_op(pagecache); - - pagecache_pthread_mutex_unlock(&pagecache->cache_lock); - - DBUG_VOID_RETURN; -} - - -/* - Unpin page - - SYNOPSIS - pagecache_unpin_page() - pagecache pointer to a page cache data structure - file handler for the file for the block of data to be read - pageno number of the block of data in the file -*/ - -void pagecache_unpin_page(PAGECACHE *pagecache, - PAGECACHE_FILE *file, - pgcache_page_no_t pageno) -{ - PAGECACHE_BLOCK_LINK *block; - int page_st; - DBUG_ENTER("pagecache_unpin_page"); - DBUG_PRINT("enter", ("fd: %u page: %lu", - (uint) file->file, (ulong) pageno)); - pagecache_pthread_mutex_lock(&pagecache->cache_lock); - /* - As soon as we keep lock cache can be used, and we have lock bacause want - aunlock. - */ - DBUG_ASSERT(pagecache->can_be_used); - - inc_counter_for_resize_op(pagecache); - /* See NOTE for pagecache_unlock_page about registering requests */ - block= find_block(pagecache, file, pageno, 0, 0, 0, &page_st); - DBUG_ASSERT(block != 0 && page_st == PAGE_READ); - -#ifndef DBUG_OFF - if ( -#endif - /* - we can just unpin only with keeping read lock because: - a) we can't pin without any lock - b) we can't unpin keeping write lock - */ - make_lock_and_pin(pagecache, block, - PAGECACHE_LOCK_LEFT_READLOCKED, - PAGECACHE_UNPIN) -#ifndef DBUG_OFF - ) - { - DBUG_ASSERT(0); /* should not happend */ - } -#else - ; -#endif - - remove_reader(block); - /* - Link the block into the LRU chain if it's the last submitted request - for the block and block will not be pinned. - See NOTE for pagecache_unlock_page about registering requests - */ - unreg_request(pagecache, block, 1); - - dec_counter_for_resize_op(pagecache); - - pagecache_pthread_mutex_unlock(&pagecache->cache_lock); - - DBUG_VOID_RETURN; -} - - -/* - Unlock/unpin page and put LSN stamp if it need - (uses direct block/page pointer) - - SYNOPSIS - pagecache_unlock() - pagecache pointer to a page cache data structure - link direct link to page (returned by read or write) - lock lock change - pin pin page - first_REDO_LSN_for_page do not set it if it is zero -*/ - -void pagecache_unlock(PAGECACHE *pagecache, - PAGECACHE_PAGE_LINK *link, - enum pagecache_page_lock lock, - enum pagecache_page_pin pin, - LSN first_REDO_LSN_for_page) -{ - PAGECACHE_BLOCK_LINK *block= (PAGECACHE_BLOCK_LINK *)link; - DBUG_ENTER("pagecache_unlock"); - DBUG_PRINT("enter", ("block: 0x%lx fd: %u page: %lu l%s p%s", - (ulong) block, - (uint) block->hash_link->file.file, - (ulong) block->hash_link->pageno, - page_cache_page_lock_str[lock], - page_cache_page_pin_str[pin])); - /* - We do not allow any lock/pin increasing here and page can't be - unpinned because we use direct link. - */ - DBUG_ASSERT(pin != PAGECACHE_PIN && - pin != PAGECACHE_PIN_LEFT_UNPINNED && - lock != PAGECACHE_LOCK_READ && - lock != PAGECACHE_LOCK_WRITE); - if (pin == PAGECACHE_PIN_LEFT_UNPINNED && - lock == PAGECACHE_LOCK_READ_UNLOCK) - { -#ifndef DBUG_OFF - if ( -#endif - /* block do not need here so we do not provide it */ - make_lock_and_pin(pagecache, 0, lock, pin) -#ifndef DBUG_OFF - ) - { - DBUG_ASSERT(0); /* should not happend */ - } -#else - ; -#endif - DBUG_VOID_RETURN; - } - - pagecache_pthread_mutex_lock(&pagecache->cache_lock); - /* - As soon as we keep lock cache can be used, and we have lock bacause want - aunlock. - */ - DBUG_ASSERT(pagecache->can_be_used); - - inc_counter_for_resize_op(pagecache); - if (first_REDO_LSN_for_page) - { - DBUG_ASSERT(lock == PAGECACHE_LOCK_WRITE_UNLOCK && - pin == PAGECACHE_UNPIN); - set_if_bigger(block->rec_lsn, first_REDO_LSN_for_page); - } - -#ifndef DBUG_OFF - if ( -#endif - make_lock_and_pin(pagecache, block, lock, pin) -#ifndef DBUG_OFF - ) - { - DBUG_ASSERT(0); /* should not happend */ - } -#else - ; -#endif - - remove_reader(block); - /* - Link the block into the LRU chain if it's the last submitted request - for the block and block will not be pinned. - See NOTE for pagecache_unlock_page about registering requests. - */ - if (pin != PAGECACHE_PIN_LEFT_PINNED) - unreg_request(pagecache, block, 1); - - dec_counter_for_resize_op(pagecache); - - pagecache_pthread_mutex_unlock(&pagecache->cache_lock); - - DBUG_VOID_RETURN; -} - - -/* - Unpin page - (uses direct block/page pointer) - - SYNOPSIS - pagecache_unpin_page() - pagecache pointer to a page cache data structure - link direct link to page (returned by read or write) -*/ - -void pagecache_unpin(PAGECACHE *pagecache, - PAGECACHE_PAGE_LINK *link) -{ - PAGECACHE_BLOCK_LINK *block= (PAGECACHE_BLOCK_LINK *)link; - DBUG_ENTER("pagecache_unpin"); - DBUG_PRINT("enter", ("block: 0x%lx fd: %u page: %lu", - (ulong) block, - (uint) block->hash_link->file.file, - (ulong) block->hash_link->pageno)); - - pagecache_pthread_mutex_lock(&pagecache->cache_lock); - /* - As soon as we keep lock cache can be used, and we have lock bacause want - aunlock. - */ - DBUG_ASSERT(pagecache->can_be_used); - - inc_counter_for_resize_op(pagecache); - -#ifndef DBUG_OFF - if ( -#endif - /* - we can just unpin only with keeping read lock because: - a) we can't pin without any lock - b) we can't unpin keeping write lock - */ - make_lock_and_pin(pagecache, block, - PAGECACHE_LOCK_LEFT_READLOCKED, - PAGECACHE_UNPIN) -#ifndef DBUG_OFF - ) - { - DBUG_ASSERT(0); /* should not happend */ - } -#else - ; -#endif - - remove_reader(block); - /* - Link the block into the LRU chain if it's the last submitted request - for the block and block will not be pinned. - See NOTE for pagecache_unlock_page about registering requests. - */ - unreg_request(pagecache, block, 1); - - dec_counter_for_resize_op(pagecache); - - pagecache_pthread_mutex_unlock(&pagecache->cache_lock); - - DBUG_VOID_RETURN; -} - - -/* - Read a block of data from a cached file into a buffer; - - SYNOPSIS - pagecache_valid_read() - pagecache pointer to a page cache data structure - file handler for the file for the block of data to be read - pageno number of the block of data in the file - level determines the weight of the data - buff buffer to where the data must be placed - type type of the page - lock lock change - link link to the page if we pin it - validator validator of read from the disk data - validator_data pointer to the data need by the validator - - RETURN VALUE - Returns address from where the data is placed if sucessful, 0 - otherwise. - - Pin will be choosen according to lock parameter (see lock_to_pin) -*/ -static enum pagecache_page_pin lock_to_pin[]= -{ - PAGECACHE_PIN_LEFT_UNPINNED /*PAGECACHE_LOCK_LEFT_UNLOCKED*/, - PAGECACHE_PIN_LEFT_UNPINNED /*PAGECACHE_LOCK_LEFT_READLOCKED*/, - PAGECACHE_PIN_LEFT_PINNED /*PAGECACHE_LOCK_LEFT_WRITELOCKED*/, - PAGECACHE_PIN_LEFT_UNPINNED /*PAGECACHE_LOCK_READ*/, - PAGECACHE_PIN /*PAGECACHE_LOCK_WRITE*/, - PAGECACHE_PIN_LEFT_UNPINNED /*PAGECACHE_LOCK_READ_UNLOCK*/, - PAGECACHE_UNPIN /*PAGECACHE_LOCK_WRITE_UNLOCK*/, - PAGECACHE_UNPIN /*PAGECACHE_LOCK_WRITE_TO_READ*/ -}; - -byte *pagecache_valid_read(PAGECACHE *pagecache, - PAGECACHE_FILE *file, - pgcache_page_no_t pageno, - uint level, - byte *buff, - enum pagecache_page_type type, - enum pagecache_page_lock lock, - PAGECACHE_PAGE_LINK *link, - pagecache_disk_read_validator validator, - gptr validator_data) -{ - int error= 0; - enum pagecache_page_pin pin= lock_to_pin[lock]; - PAGECACHE_PAGE_LINK fake_link; - DBUG_ENTER("pagecache_valid_read"); - DBUG_PRINT("enter", ("fd: %u page: %lu level: %u t:%s l%s p%s", - (uint) file->file, (ulong) pageno, level, - page_cache_page_type_str[type], - page_cache_page_lock_str[lock], - page_cache_page_pin_str[pin])); - - if (!link) - link= &fake_link; - else - *link= 0; - -restart: - - if (pagecache->can_be_used) - { - /* Key cache is used */ - PAGECACHE_BLOCK_LINK *block; - uint status; - int page_st; - - pagecache_pthread_mutex_lock(&pagecache->cache_lock); - if (!pagecache->can_be_used) - { - pagecache_pthread_mutex_unlock(&pagecache->cache_lock); - goto no_key_cache; - } - - inc_counter_for_resize_op(pagecache); - pagecache->global_cache_r_requests++; - /* See NOTE for pagecache_unlock_page about registering requests. */ - block= find_block(pagecache, file, pageno, level, - test(lock == PAGECACHE_LOCK_WRITE), - test((pin == PAGECACHE_PIN_LEFT_UNPINNED) || - (pin == PAGECACHE_PIN)), - &page_st); - DBUG_ASSERT(block->type == PAGECACHE_EMPTY_PAGE || - block->type == type); - block->type= type; - if (block->status != BLOCK_ERROR && page_st != PAGE_READ) - { - DBUG_PRINT("info", ("read block 0x%lx", (ulong)block)); - /* The requested page is to be read into the block buffer */ - read_block(pagecache, block, - (my_bool)(page_st == PAGE_TO_BE_READ), - validator, validator_data); - DBUG_PRINT("info", ("read is done")); - } - if (make_lock_and_pin(pagecache, block, lock, pin)) - { - /* - We failed to write lock the block, cache is unlocked, - we will try to get the block again. - */ - pagecache_pthread_mutex_unlock(&pagecache->cache_lock); - DBUG_PRINT("info", ("restarting...")); - goto restart; - } - - if (! ((status= block->status) & BLOCK_ERROR)) - { -#if !defined(SERIALIZED_READ_FROM_CACHE) - pagecache_pthread_mutex_unlock(&pagecache->cache_lock); -#endif - - DBUG_ASSERT((pagecache->block_size & 511) == 0); - /* Copy data from the cache buffer */ - bmove512(buff, block->buffer, pagecache->block_size); - -#if !defined(SERIALIZED_READ_FROM_CACHE) - pagecache_pthread_mutex_lock(&pagecache->cache_lock); -#endif - } - - remove_reader(block); - /* - Link the block into the LRU chain if it's the last submitted request - for the block and block will not be pinned. - See NOTE for pagecache_unlock_page about registering requests. - */ - if (pin == PAGECACHE_PIN_LEFT_UNPINNED || pin == PAGECACHE_UNPIN) - unreg_request(pagecache, block, 1); - else - *link= (PAGECACHE_PAGE_LINK)block; - - dec_counter_for_resize_op(pagecache); - - pagecache_pthread_mutex_unlock(&pagecache->cache_lock); - - if (status & BLOCK_ERROR) - DBUG_RETURN((byte *) 0); - - DBUG_RETURN(buff); - } - -no_key_cache: /* Key cache is not used */ - - /* We can't use mutex here as the key cache may not be initialized */ - pagecache->global_cache_r_requests++; - pagecache->global_cache_read++; - if (pagecache_fread(pagecache, file, (byte*) buff, pageno, MYF(MY_NABP))) - error= 1; - DBUG_RETURN(error ? (byte*) 0 : buff); -} - - -/* - Delete page from the buffer - - SYNOPSIS - pagecache_delete_page() - pagecache pointer to a page cache data structure - file handler for the file for the block of data to be read - pageno number of the block of data in the file - lock lock change - flush flush page if it is dirty - - RETURN VALUE - 0 - deleted or was not present at all - 1 - error - - NOTES. - lock can be only PAGECACHE_LOCK_LEFT_WRITELOCKED (page was write locked - before) or PAGECACHE_LOCK_WRITE (delete will write lock page before delete) -*/ -my_bool pagecache_delete_page(PAGECACHE *pagecache, - PAGECACHE_FILE *file, - pgcache_page_no_t pageno, - enum pagecache_page_lock lock, - my_bool flush) -{ - int error= 0; - enum pagecache_page_pin pin= lock_to_pin[lock]; - DBUG_ENTER("pagecache_delete_page"); - DBUG_PRINT("enter", ("fd: %u page: %lu l%s p%s", - (uint) file->file, (ulong) pageno, - page_cache_page_lock_str[lock], - page_cache_page_pin_str[pin])); - DBUG_ASSERT(lock == PAGECACHE_LOCK_WRITE || - lock == PAGECACHE_LOCK_LEFT_WRITELOCKED); - DBUG_ASSERT(pin == PAGECACHE_PIN || - pin == PAGECACHE_PIN_LEFT_PINNED); - -restart: - - if (pagecache->can_be_used) - { - /* Key cache is used */ - reg1 PAGECACHE_BLOCK_LINK *block; - PAGECACHE_HASH_LINK **unused_start, *link; - - pagecache_pthread_mutex_lock(&pagecache->cache_lock); - if (!pagecache->can_be_used) - goto end; - - inc_counter_for_resize_op(pagecache); - link= get_present_hash_link(pagecache, file, pageno, &unused_start); - if (!link) - { - DBUG_PRINT("info", ("There is no such page in the cache")); - pagecache_pthread_mutex_unlock(&pagecache->cache_lock); - DBUG_RETURN(0); - } - block= link->block; - /* See NOTE for pagecache_unlock_page about registering requests. */ - if (pin == PAGECACHE_PIN) - reg_requests(pagecache, block, 1); - DBUG_ASSERT(block != 0); - if (make_lock_and_pin(pagecache, block, lock, pin)) - { - /* - We failed to writelock the block, cache is unlocked, and last write - lock is released, we will try to get the block again. - */ - pagecache_pthread_mutex_unlock(&pagecache->cache_lock); - DBUG_PRINT("info", ("restarting...")); - goto restart; - } - - if (block->status & BLOCK_CHANGED) - { - if (flush) - { - /* The block contains a dirty page - push it out of the cache */ - - KEYCACHE_DBUG_PRINT("find_block", ("block is dirty")); - - pagecache_pthread_mutex_unlock(&pagecache->cache_lock); - /* - The call is thread safe because only the current - thread might change the block->hash_link value - */ - DBUG_ASSERT(block->pins == 1); - error= pagecache_fwrite(pagecache, - &block->hash_link->file, - block->buffer, - block->hash_link->pageno, - block->type, - MYF(MY_NABP | MY_WAIT_IF_FULL)); - pagecache_pthread_mutex_lock(&pagecache->cache_lock); - pagecache->global_cache_write++; - - if (error) - { - block->status|= BLOCK_ERROR; - goto err; - } - } - pagecache->blocks_changed--; - pagecache->global_blocks_changed--; - /* - free_block() will change the status and rec_lsn of the block so no - need to change them here. - */ - } - /* Cache is locked, so we can relese page before freeing it */ - make_lock_and_pin(pagecache, block, - PAGECACHE_LOCK_WRITE_UNLOCK, - PAGECACHE_UNPIN); - DBUG_ASSERT(link->requests > 0); - link->requests--; - /* See NOTE for pagecache_unlock_page about registering requests. */ - free_block(pagecache, block); - -err: - dec_counter_for_resize_op(pagecache); -end: - pagecache_pthread_mutex_unlock(&pagecache->cache_lock); - } - - DBUG_RETURN(error); -} - - -/* - Write a buffer into a cached file. - - SYNOPSIS - - pagecache_write() - pagecache pointer to a page cache data structure - file handler for the file to write data to - pageno number of the block of data in the file - level determines the weight of the data - buff buffer to where the data must be placed - type type of the page - lock lock change - pin pin page - write_mode how to write page - link link to the page if we pin it - - RETURN VALUE - 0 if a success, 1 - otherwise. -*/ - -/* description of how to change lock before and after write */ -struct write_lock_change -{ - int need_lock_change; /* need changing of lock at the end of write */ - enum pagecache_page_lock new_lock; /* lock at the beginning */ - enum pagecache_page_lock unlock_lock; /* lock at the end */ -}; - -static struct write_lock_change write_lock_change_table[]= -{ - {1, - PAGECACHE_LOCK_WRITE, - PAGECACHE_LOCK_WRITE_UNLOCK} /*PAGECACHE_LOCK_LEFT_UNLOCKED*/, - {0, /*unsupported*/ - PAGECACHE_LOCK_LEFT_UNLOCKED, - PAGECACHE_LOCK_LEFT_UNLOCKED} /*PAGECACHE_LOCK_LEFT_READLOCKED*/, - {0, PAGECACHE_LOCK_LEFT_WRITELOCKED, 0} /*PAGECACHE_LOCK_LEFT_WRITELOCKED*/, - {1, - PAGECACHE_LOCK_WRITE, - PAGECACHE_LOCK_WRITE_TO_READ} /*PAGECACHE_LOCK_READ*/, - {0, PAGECACHE_LOCK_WRITE, 0} /*PAGECACHE_LOCK_WRITE*/, - {0, /*unsupported*/ - PAGECACHE_LOCK_LEFT_UNLOCKED, - PAGECACHE_LOCK_LEFT_UNLOCKED} /*PAGECACHE_LOCK_READ_UNLOCK*/, - {1, - PAGECACHE_LOCK_LEFT_WRITELOCKED, - PAGECACHE_LOCK_WRITE_UNLOCK } /*PAGECACHE_LOCK_WRITE_UNLOCK*/, - {1, - PAGECACHE_LOCK_LEFT_WRITELOCKED, - PAGECACHE_LOCK_WRITE_TO_READ}/*PAGECACHE_LOCK_WRITE_TO_READ*/ -}; - -/* description of how to change pin before and after write */ -struct write_pin_change -{ - enum pagecache_page_pin new_pin; /* pin status at the beginning */ - enum pagecache_page_pin unlock_pin; /* pin status at the end */ -}; - -static struct write_pin_change write_pin_change_table[]= -{ - {PAGECACHE_PIN_LEFT_PINNED, - PAGECACHE_PIN_LEFT_PINNED} /*PAGECACHE_PIN_LEFT_PINNED*/, - {PAGECACHE_PIN, - PAGECACHE_UNPIN} /*PAGECACHE_PIN_LEFT_UNPINNED*/, - {PAGECACHE_PIN, - PAGECACHE_PIN_LEFT_PINNED} /*PAGECACHE_PIN*/, - {PAGECACHE_PIN_LEFT_PINNED, - PAGECACHE_UNPIN} /*PAGECACHE_UNPIN*/ -}; - -my_bool pagecache_write(PAGECACHE *pagecache, - PAGECACHE_FILE *file, - pgcache_page_no_t pageno, - uint level, - byte *buff, - enum pagecache_page_type type, - enum pagecache_page_lock lock, - enum pagecache_page_pin pin, - enum pagecache_write_mode write_mode, - PAGECACHE_PAGE_LINK *link) -{ - reg1 PAGECACHE_BLOCK_LINK *block= NULL; - PAGECACHE_PAGE_LINK fake_link; - int error= 0; - int need_lock_change= write_lock_change_table[lock].need_lock_change; - DBUG_ENTER("pagecache_write"); - DBUG_PRINT("enter", ("fd: %u page: %lu level: %u t:%s l%s p%s m%s", - (uint) file->file, (ulong) pageno, level, - page_cache_page_type_str[type], - page_cache_page_lock_str[lock], - page_cache_page_pin_str[pin], - page_cache_page_write_mode_str[write_mode])); - DBUG_ASSERT(lock != PAGECACHE_LOCK_LEFT_READLOCKED && - lock != PAGECACHE_LOCK_READ_UNLOCK); - if (!link) - link= &fake_link; - else - *link= 0; - - if (write_mode == PAGECACHE_WRITE_NOW) - { - /* we allow direct write if we do not use long term lockings */ - DBUG_ASSERT(lock == PAGECACHE_LOCK_LEFT_UNLOCKED); - /* Force writing from buff into disk */ - pagecache->global_cache_write++; - if (pagecache_fwrite(pagecache, file, buff, pageno, type, - MYF(MY_NABP | MY_WAIT_IF_FULL))) - DBUG_RETURN(1); - } -restart: - -#if !defined(DBUG_OFF) && defined(EXTRA_DEBUG) - DBUG_EXECUTE("check_pagecache", - test_key_cache(pagecache, "start of key_cache_write", 1);); -#endif - - if (pagecache->can_be_used) - { - /* Key cache is used */ - int page_st; - - pagecache_pthread_mutex_lock(&pagecache->cache_lock); - if (!pagecache->can_be_used) - { - pagecache_pthread_mutex_unlock(&pagecache->cache_lock); - goto no_key_cache; - } - - inc_counter_for_resize_op(pagecache); - pagecache->global_cache_w_requests++; - /* See NOTE for pagecache_unlock_page about registering requests. */ - block= find_block(pagecache, file, pageno, level, - test(write_mode != PAGECACHE_WRITE_DONE && - lock != PAGECACHE_LOCK_LEFT_WRITELOCKED && - lock != PAGECACHE_LOCK_WRITE_UNLOCK && - lock != PAGECACHE_LOCK_WRITE_TO_READ), - test((pin == PAGECACHE_PIN_LEFT_UNPINNED) || - (pin == PAGECACHE_PIN)), - &page_st); - if (!block) - { - DBUG_ASSERT(write_mode != PAGECACHE_WRITE_DONE); - /* It happens only for requests submitted during resize operation */ - dec_counter_for_resize_op(pagecache); - pagecache_pthread_mutex_unlock(&pagecache->cache_lock); - /* Write to the disk key cache is in resize at the moment*/ - goto no_key_cache; - } - - DBUG_ASSERT(block->type == PAGECACHE_EMPTY_PAGE || - block->type == type); - block->type= type; - - if (make_lock_and_pin(pagecache, block, - write_lock_change_table[lock].new_lock, - (need_lock_change ? - write_pin_change_table[pin].new_pin : - pin))) - { - /* - We failed to writelock the block, cache is unlocked, and last write - lock is released, we will try to get the block again. - */ - pagecache_pthread_mutex_unlock(&pagecache->cache_lock); - DBUG_PRINT("info", ("restarting...")); - goto restart; - } - - - if (write_mode == PAGECACHE_WRITE_DONE) - { - if ((block->status & BLOCK_ERROR) && page_st != PAGE_READ) - { - /* Copy data from buff */ - bmove512(block->buffer, buff, pagecache->block_size); - block->status= (BLOCK_READ | (block->status & BLOCK_WRLOCK)); - KEYCACHE_DBUG_PRINT("key_cache_insert", - ("primary request: new page in cache")); -#ifdef THREAD - /* Signal that all pending requests for this now can be processed. */ - if (block->wqueue[COND_FOR_REQUESTED].last_thread) - wqueue_release_queue(&block->wqueue[COND_FOR_REQUESTED]); -#endif - } - } - else - { - if (write_mode == PAGECACHE_WRITE_NOW) - { - /* buff has been written to disk at start */ - if ((block->status & BLOCK_CHANGED) && - !(block->status & BLOCK_ERROR)) - link_to_file_list(pagecache, block, &block->hash_link->file, 1); - } - else if (! (block->status & BLOCK_CHANGED)) - link_to_changed_list(pagecache, block); - - if (! (block->status & BLOCK_ERROR)) - { - bmove512(block->buffer, buff, pagecache->block_size); - block->status|= BLOCK_READ; - } - } - - - if (need_lock_change) - { -#ifndef DBUG_OFF - int rc= -#endif - /* - QQ: We are doing an unlock here, so need to give the page its rec_lsn - */ - make_lock_and_pin(pagecache, block, - write_lock_change_table[lock].unlock_lock, - write_pin_change_table[pin].unlock_pin); -#ifndef DBUG_OFF - DBUG_ASSERT(rc == 0); -#endif - } - - /* Unregister the request */ - DBUG_ASSERT(block->hash_link->requests > 0); - block->hash_link->requests--; - /* See NOTE for pagecache_unlock_page about registering requests. */ - if (pin == PAGECACHE_PIN_LEFT_UNPINNED || pin == PAGECACHE_UNPIN) - unreg_request(pagecache, block, 1); - else - *link= (PAGECACHE_PAGE_LINK)block; - - - if (block->status & BLOCK_ERROR) - error= 1; - - dec_counter_for_resize_op(pagecache); - - pagecache_pthread_mutex_unlock(&pagecache->cache_lock); - - goto end; - } - -no_key_cache: - /* Key cache is not used */ - if (write_mode == PAGECACHE_WRITE_DELAY) - { - pagecache->global_cache_w_requests++; - pagecache->global_cache_write++; - if (pagecache_fwrite(pagecache, file, (byte*) buff, pageno, type, - MYF(MY_NABP | MY_WAIT_IF_FULL))) - error=1; - } - -end: -#if !defined(DBUG_OFF) && defined(EXTRA_DEBUG) - DBUG_EXECUTE("exec", - test_key_cache(pagecache, "end of key_cache_write", 1);); -#endif - BLOCK_INFO(block); - DBUG_RETURN(error); -} - - -/* - Free block: remove reference to it from hash table, - remove it from the chain file of dirty/clean blocks - and add it to the free list. -*/ - -static void free_block(PAGECACHE *pagecache, PAGECACHE_BLOCK_LINK *block) -{ - KEYCACHE_THREAD_TRACE("free block"); - KEYCACHE_DBUG_PRINT("free_block", - ("block %u to be freed, hash_link %p", - BLOCK_NUMBER(pagecache, block), block->hash_link)); - if (block->hash_link) - { - /* - While waiting for readers to finish, new readers might request the - block. But since we set block->status|= BLOCK_REASSIGNED, they - will wait on block->wqueue[COND_FOR_SAVED]. They must be signalled - later. - */ - block->status|= BLOCK_REASSIGNED; - wait_for_readers(pagecache, block); - unlink_hash(pagecache, block->hash_link); - } - - unlink_changed(block); - DBUG_ASSERT((block->status & BLOCK_WRLOCK) == 0); - DBUG_ASSERT(block->pins == 0); - block->status= 0; -#ifndef DBUG_OFF - block->type= PAGECACHE_EMPTY_PAGE; -#endif - block->rec_lsn= 0; - KEYCACHE_THREAD_TRACE("free block"); - KEYCACHE_DBUG_PRINT("free_block", - ("block is freed")); - unreg_request(pagecache, block, 0); - block->hash_link= NULL; - - /* Remove the free block from the LRU ring. */ - unlink_block(pagecache, block); - if (block->temperature == BLOCK_WARM) - pagecache->warm_blocks--; - block->temperature= BLOCK_COLD; - /* Insert the free block in the free list. */ - block->next_used= pagecache->free_block_list; - pagecache->free_block_list= block; - /* Keep track of the number of currently unused blocks. */ - pagecache->blocks_unused++; - -#ifdef THREAD - /* All pending requests for this page must be resubmitted. */ - if (block->wqueue[COND_FOR_SAVED].last_thread) - wqueue_release_queue(&block->wqueue[COND_FOR_SAVED]); -#endif -} - - -static int cmp_sec_link(PAGECACHE_BLOCK_LINK **a, PAGECACHE_BLOCK_LINK **b) -{ - return (((*a)->hash_link->pageno < (*b)->hash_link->pageno) ? -1 : - ((*a)->hash_link->pageno > (*b)->hash_link->pageno) ? 1 : 0); -} - - -/* - Flush a portion of changed blocks to disk, - free used blocks if requested -*/ - -static int flush_cached_blocks(PAGECACHE *pagecache, - PAGECACHE_FILE *file, - PAGECACHE_BLOCK_LINK **cache, - PAGECACHE_BLOCK_LINK **end, - enum flush_type type) -{ - int error; - int last_errno= 0; - uint count= (uint) (end-cache); - DBUG_ENTER("flush_cached_blocks"); - - /* Don't lock the cache during the flush */ - pagecache_pthread_mutex_unlock(&pagecache->cache_lock); - /* - As all blocks referred in 'cache' are marked by BLOCK_IN_FLUSH - we are guarunteed no thread will change them - */ - qsort((byte*) cache, count, sizeof(*cache), (qsort_cmp) cmp_sec_link); - - pagecache_pthread_mutex_lock(&pagecache->cache_lock); - for (; cache != end; cache++) - { - PAGECACHE_BLOCK_LINK *block= *cache; - - if (block->pins) - { - KEYCACHE_DBUG_PRINT("flush_cached_blocks", - ("block %u (0x%lx) pinned", - BLOCK_NUMBER(pagecache, block), (ulong)block)); - DBUG_PRINT("info", ("block %u (0x%lx) pinned", - BLOCK_NUMBER(pagecache, block), (ulong)block)); - BLOCK_INFO(block); - last_errno= -1; - unreg_request(pagecache, block, 1); - continue; - } - /* if the block is not pinned then it is not write locked */ - DBUG_ASSERT((block->status & BLOCK_WRLOCK) == 0); - DBUG_ASSERT(block->pins == 0); -#ifndef DBUG_OFF - { - int rc= -#endif - make_lock_and_pin(pagecache, block, - PAGECACHE_LOCK_WRITE, PAGECACHE_PIN); -#ifndef DBUG_OFF - DBUG_ASSERT(rc == 0); - } -#endif - - KEYCACHE_DBUG_PRINT("flush_cached_blocks", - ("block %u (0x%lx) to be flushed", - BLOCK_NUMBER(pagecache, block), (ulong)block)); - DBUG_PRINT("info", ("block %u (0x%lx) to be flushed", - BLOCK_NUMBER(pagecache, block), (ulong)block)); - BLOCK_INFO(block); - pagecache_pthread_mutex_unlock(&pagecache->cache_lock); - DBUG_PRINT("info", ("block %u (0x%lx) pins: %u", - BLOCK_NUMBER(pagecache, block), (ulong)block, - block->pins)); - DBUG_ASSERT(block->pins == 1); - error= pagecache_fwrite(pagecache, file, - block->buffer, - block->hash_link->pageno, - block->type, - MYF(MY_NABP | MY_WAIT_IF_FULL)); - pagecache_pthread_mutex_lock(&pagecache->cache_lock); - - make_lock_and_pin(pagecache, block, - PAGECACHE_LOCK_WRITE_UNLOCK, - PAGECACHE_UNPIN); - - pagecache->global_cache_write++; - if (error) - { - block->status|= BLOCK_ERROR; - if (!last_errno) - last_errno= errno ? errno : -1; - } -#ifdef THREAD - /* - Let to proceed for possible waiting requests to write to the block page. - It might happen only during an operation to resize the key cache. - */ - if (block->wqueue[COND_FOR_SAVED].last_thread) - wqueue_release_queue(&block->wqueue[COND_FOR_SAVED]); -#endif - /* type will never be FLUSH_IGNORE_CHANGED here */ - if (! (type == FLUSH_KEEP || type == FLUSH_FORCE_WRITE)) - { - pagecache->blocks_changed--; - pagecache->global_blocks_changed--; - free_block(pagecache, block); - } - else - { - block->status&= ~BLOCK_IN_FLUSH; - link_to_file_list(pagecache, block, file, 1); - unreg_request(pagecache, block, 1); - } - } - DBUG_RETURN(last_errno); -} - - -/* - flush all key blocks for a file to disk, but don't do any mutex locks - - flush_pagecache_blocks_int() - pagecache pointer to a key cache data structure - file handler for the file to flush to - flush_type type of the flush - - NOTES - This function doesn't do any mutex locks because it needs to be called - both from flush_pagecache_blocks and flush_all_key_blocks (the later one - does the mutex lock in the resize_pagecache() function). - - RETURN - 0 ok - 1 error -*/ - -static int flush_pagecache_blocks_int(PAGECACHE *pagecache, - PAGECACHE_FILE *file, - enum flush_type type) -{ - PAGECACHE_BLOCK_LINK *cache_buff[FLUSH_CACHE],**cache; - int last_errno= 0; - DBUG_ENTER("flush_pagecache_blocks_int"); - DBUG_PRINT("enter",("file: %d blocks_used: %lu blocks_changed: %lu", - file->file, pagecache->blocks_used, pagecache->blocks_changed)); - -#if !defined(DBUG_OFF) && defined(EXTRA_DEBUG) - DBUG_EXECUTE("check_pagecache", - test_key_cache(pagecache, - "start of flush_pagecache_blocks", 0);); -#endif - - cache= cache_buff; - if (pagecache->disk_blocks > 0 && - (!my_disable_flush_pagecache_blocks || type != FLUSH_KEEP)) - { - /* Key cache exists and flush is not disabled */ - int error= 0; - uint count= 0; - PAGECACHE_BLOCK_LINK **pos, **end; - PAGECACHE_BLOCK_LINK *first_in_switch= NULL; - PAGECACHE_BLOCK_LINK *block, *next; -#if defined(PAGECACHE_DEBUG) - uint cnt= 0; -#endif - - if (type != FLUSH_IGNORE_CHANGED) - { - /* - Count how many key blocks we have to cache to be able - to flush all dirty pages with minimum seek moves - */ - for (block= pagecache->changed_blocks[FILE_HASH(*file)] ; - block; - block= block->next_changed) - { - if (block->hash_link->file.file == file->file) - { - count++; - KEYCACHE_DBUG_ASSERT(count<= pagecache->blocks_used); - } - } - /* Allocate a new buffer only if its bigger than the one we have */ - if (count > FLUSH_CACHE && - !(cache= - (PAGECACHE_BLOCK_LINK**) - my_malloc(sizeof(PAGECACHE_BLOCK_LINK*)*count, MYF(0)))) - { - cache= cache_buff; - count= FLUSH_CACHE; - } - } - - /* Retrieve the blocks and write them to a buffer to be flushed */ -restart: - end= (pos= cache)+count; - for (block= pagecache->changed_blocks[FILE_HASH(*file)] ; - block; - block= next) - { -#if defined(PAGECACHE_DEBUG) - cnt++; - KEYCACHE_DBUG_ASSERT(cnt <= pagecache->blocks_used); -#endif - next= block->next_changed; - if (block->hash_link->file.file == file->file) - { - /* - Mark the block with BLOCK_IN_FLUSH in order not to let - other threads to use it for new pages and interfere with - our sequence ot flushing dirty file pages - */ - block->status|= BLOCK_IN_FLUSH; - - if (! (block->status & BLOCK_IN_SWITCH)) - { - /* - We care only for the blocks for which flushing was not - initiated by other threads as a result of page swapping - */ - reg_requests(pagecache, block, 1); - if (type != FLUSH_IGNORE_CHANGED) - { - /* It's not a temporary file */ - if (pos == end) - { - /* - This happens only if there is not enough - memory for the big block - */ - if ((error= flush_cached_blocks(pagecache, file, cache, - end,type))) - last_errno=error; - DBUG_PRINT("info", ("restarting...")); - /* - Restart the scan as some other thread might have changed - the changed blocks chain: the blocks that were in switch - state before the flush started have to be excluded - */ - goto restart; - } - *pos++= block; - } - else - { - /* It's a temporary file */ - pagecache->blocks_changed--; - pagecache->global_blocks_changed--; - free_block(pagecache, block); - } - } - else - { - /* Link the block into a list of blocks 'in switch' */ - /* QQ: - #warning this unlink_changed() is a serious problem for - Maria's Checkpoint: it removes a page from the list of dirty - pages, while it's still dirty. A solution is to abandon - first_in_switch, just wait for this page to be - flushed by somebody else, and loop. TODO: check all places - where we remove a page from the list of dirty pages - */ - unlink_changed(block); - link_changed(block, &first_in_switch); - } - } - } - if (pos != cache) - { - if ((error= flush_cached_blocks(pagecache, file, cache, pos, type))) - last_errno= error; - } - /* Wait until list of blocks in switch is empty */ - while (first_in_switch) - { -#if defined(PAGECACHE_DEBUG) - cnt= 0; -#endif - block= first_in_switch; - { -#ifdef THREAD - struct st_my_thread_var *thread= my_thread_var; - wqueue_add_to_queue(&block->wqueue[COND_FOR_SAVED], thread); - do - { - KEYCACHE_DBUG_PRINT("flush_pagecache_blocks_int: wait", - ("suspend thread %ld", thread->id)); - pagecache_pthread_cond_wait(&thread->suspend, - &pagecache->cache_lock); - } - while (thread->next); -#else - KEYCACHE_DBUG_ASSERT(0); - /* No parallel requests in single-threaded case */ -#endif - } -#if defined(PAGECACHE_DEBUG) - cnt++; - KEYCACHE_DBUG_ASSERT(cnt <= pagecache->blocks_used); -#endif - } - /* The following happens very seldom */ - if (! (type == FLUSH_KEEP || type == FLUSH_FORCE_WRITE)) - { -#if defined(PAGECACHE_DEBUG) - cnt=0; -#endif - for (block= pagecache->file_blocks[FILE_HASH(*file)] ; - block; - block= next) - { -#if defined(PAGECACHE_DEBUG) - cnt++; - KEYCACHE_DBUG_ASSERT(cnt <= pagecache->blocks_used); -#endif - next= block->next_changed; - if (block->hash_link->file.file == file->file && - (! (block->status & BLOCK_CHANGED) - || type == FLUSH_IGNORE_CHANGED)) - { - reg_requests(pagecache, block, 1); - free_block(pagecache, block); - } - } - } - } - -#ifndef DBUG_OFF - DBUG_EXECUTE("check_pagecache", - test_key_cache(pagecache, "end of flush_pagecache_blocks", 0);); -#endif - if (cache != cache_buff) - my_free((gptr) cache, MYF(0)); - if (last_errno) - errno=last_errno; /* Return first error */ - DBUG_RETURN(last_errno != 0); -} - - -/* - Flush all blocks for a file to disk - - SYNOPSIS - - flush_pagecache_blocks() - pagecache pointer to a page cache data structure - file handler for the file to flush to - flush_type type of the flush - - RETURN - 0 ok - 1 error -*/ - -int flush_pagecache_blocks(PAGECACHE *pagecache, - PAGECACHE_FILE *file, enum flush_type type) -{ - int res; - DBUG_ENTER("flush_pagecache_blocks"); - DBUG_PRINT("enter", ("pagecache: 0x%lx", (long) pagecache)); - - if (pagecache->disk_blocks <= 0) - DBUG_RETURN(0); - pagecache_pthread_mutex_lock(&pagecache->cache_lock); - inc_counter_for_resize_op(pagecache); - res= flush_pagecache_blocks_int(pagecache, file, type); - dec_counter_for_resize_op(pagecache); - pagecache_pthread_mutex_unlock(&pagecache->cache_lock); - DBUG_RETURN(res); -} - - -/* - Reset the counters of a key cache. - - SYNOPSIS - reset_pagecache_counters() - name the name of a key cache - pagecache pointer to the pagecache to be reset - - DESCRIPTION - This procedure is used to reset the counters of all currently used key - caches, both the default one and the named ones. - - RETURN - 0 on success (always because it can't fail) -*/ - -int reset_pagecache_counters(const char *name, PAGECACHE *pagecache) -{ - DBUG_ENTER("reset_pagecache_counters"); - if (!pagecache->inited) - { - DBUG_PRINT("info", ("Key cache %s not initialized.", name)); - DBUG_RETURN(0); - } - DBUG_PRINT("info", ("Resetting counters for key cache %s.", name)); - - pagecache->global_blocks_changed= 0; /* Key_blocks_not_flushed */ - pagecache->global_cache_r_requests= 0; /* Key_read_requests */ - pagecache->global_cache_read= 0; /* Key_reads */ - pagecache->global_cache_w_requests= 0; /* Key_write_requests */ - pagecache->global_cache_write= 0; /* Key_writes */ - DBUG_RETURN(0); -} - - -/* - Allocates a buffer and stores in it some information about all dirty pages - of type PAGECACHE_LSN_PAGE. - - SYNOPSIS - pagecache_collect_changed_blocks_with_lsn() - pagecache pointer to the page cache - str (OUT) pointer to a LEX_STRING where the allocated buffer, and - its size, will be put - max_lsn (OUT) pointer to a LSN where the maximum rec_lsn of all - relevant dirty pages will be put - - DESCRIPTION - Does the allocation because the caller cannot know the size itself. - Memory freeing is to be done by the caller (if the "str" member of the - LEX_STRING is not NULL). - Ignores all pages of another type than PAGECACHE_LSN_PAGE, because they - are not interesting for a checkpoint record. - The caller has the intention of doing checkpoints. - - RETURN - 0 on success - 1 on error -*/ -my_bool pagecache_collect_changed_blocks_with_lsn(PAGECACHE *pagecache, - LEX_STRING *str, - LSN *max_lsn) -{ - my_bool error; - ulong stored_list_size= 0; - uint file_hash; - char *ptr; - DBUG_ENTER("pagecache_collect_changed_blocks_with_LSN"); - - *max_lsn= 0; - DBUG_ASSERT(NULL == str->str); - /* - We lock the entire cache but will be quick, just reading/writing a few MBs - of memory at most. - When we enter here, we must be sure that no "first_in_switch" situation - is happening or will happen (either we have to get rid of - first_in_switch in the code or, first_in_switch has to increment a - "danger" counter for this function to know it has to wait). TODO. - */ - pagecache_pthread_mutex_lock(&pagecache->cache_lock); - - /* Count how many dirty pages are interesting */ - for (file_hash= 0; file_hash < PAGECACHE_CHANGED_BLOCKS_HASH; file_hash++) - { - PAGECACHE_BLOCK_LINK *block; - for (block= pagecache->changed_blocks[file_hash] ; - block; - block= block->next_changed) - { - /* - Q: is there somthing subtle with block->hash_link: can it be NULL? - does it have to be == hash_link->block... ? - */ - DBUG_ASSERT(block->hash_link != NULL); - DBUG_ASSERT(block->status & BLOCK_CHANGED); - if (block->type != PAGECACHE_LSN_PAGE) - continue; /* no need to store it */ - /* - In the current pagecache, rec_lsn is not set correctly: - 1) it is set on pagecache_unlock(), too late (a page is dirty - (BLOCK_CHANGED) since the first pagecache_write()). So in this - scenario: - thread1: thread2: - write_REDO - pagecache_write() checkpoint : reclsn not known - pagecache_unlock(sets rec_lsn) - commit - crash, - at recovery we will wrongly skip the REDO. It also affects the - low-water mark's computation. - 2) sometimes the unlocking can be an implicit action of - pagecache_write(), without any call to pagecache_unlock(), then - rec_lsn is not set. - 1) and 2) are critical problems. - TODO: fix this when Monty has explained how he writes BLOB pages. - */ - if (block->rec_lsn == 0) - { - DBUG_ASSERT(0); - goto err; - } - stored_list_size++; - } - } - - str->length= 8+(4+4+8)*stored_list_size; - if (NULL == (str->str= my_malloc(str->length, MYF(MY_WME)))) - goto err; - ptr= str->str; - int8store(ptr, stored_list_size); - ptr+= 8; - if (0 == stored_list_size) - goto end; - for (file_hash= 0; file_hash < PAGECACHE_CHANGED_BLOCKS_HASH; file_hash++) - { - PAGECACHE_BLOCK_LINK *block; - for (block= pagecache->changed_blocks[file_hash] ; - block; - block= block->next_changed) - { - if (block->type != PAGECACHE_LSN_PAGE) - continue; /* no need to store it in the checkpoint record */ - DBUG_ASSERT((4 == sizeof(block->hash_link->file.file)) && - (4 == sizeof(block->hash_link->pageno))); - int4store(ptr, block->hash_link->file.file); - ptr+= 4; - int4store(ptr, block->hash_link->pageno); - ptr+= 4; - int8store(ptr, (ulonglong) block->rec_lsn); - ptr+= 8; - set_if_bigger(*max_lsn, block->rec_lsn); - } - } - error= 0; - goto end; -err: - error= 1; -end: - pagecache_pthread_mutex_unlock(&pagecache->cache_lock); - DBUG_RETURN(error); -} - - -#ifndef DBUG_OFF -/* - Test if disk-cache is ok -*/ -static void test_key_cache(PAGECACHE *pagecache __attribute__((unused)), - const char *where __attribute__((unused)), - my_bool lock __attribute__((unused))) -{ - /* TODO */ -} -#endif - -#if defined(PAGECACHE_TIMEOUT) - -#define KEYCACHE_DUMP_FILE "pagecache_dump.txt" -#define MAX_QUEUE_LEN 100 - - -static void pagecache_dump(PAGECACHE *pagecache) -{ - FILE *pagecache_dump_file=fopen(KEYCACHE_DUMP_FILE, "w"); - struct st_my_thread_var *last; - struct st_my_thread_var *thread; - PAGECACHE_BLOCK_LINK *block; - PAGECACHE_HASH_LINK *hash_link; - PAGECACHE_PAGE *page; - uint i; - - fprintf(pagecache_dump_file, "thread:%u\n", thread->id); - - i=0; - thread=last=waiting_for_hash_link.last_thread; - fprintf(pagecache_dump_file, "queue of threads waiting for hash link\n"); - if (thread) - do - { - thread= thread->next; - page= (PAGECACHE_PAGE *) thread->opt_info; - fprintf(pagecache_dump_file, - "thread:%u, (file,pageno)=(%u,%lu)\n", - thread->id,(uint) page->file.file,(ulong) page->pageno); - if (++i == MAX_QUEUE_LEN) - break; - } - while (thread != last); - - i=0; - thread=last=waiting_for_block.last_thread; - fprintf(pagecache_dump_file, "queue of threads waiting for block\n"); - if (thread) - do - { - thread=thread->next; - hash_link= (PAGECACHE_HASH_LINK *) thread->opt_info; - fprintf(pagecache_dump_file, - "thread:%u hash_link:%u (file,pageno)=(%u,%lu)\n", - thread->id, (uint) PAGECACHE_HASH_LINK_NUMBER(pagecache, hash_link), - (uint) hash_link->file.file,(ulong) hash_link->pageno); - if (++i == MAX_QUEUE_LEN) - break; - } - while (thread != last); - - for (i=0 ; i < pagecache->blocks_used ; i++) - { - int j; - block= &pagecache->block_root[i]; - hash_link= block->hash_link; - fprintf(pagecache_dump_file, - "block:%u hash_link:%d status:%x #requests=%u waiting_for_readers:%d\n", - i, (int) (hash_link ? - PAGECACHE_HASH_LINK_NUMBER(pagecache, hash_link) : - -1), - block->status, block->requests, block->condvar ? 1 : 0); - for (j=0 ; j < COND_SIZE; j++) - { - PAGECACHE_WQUEUE *wqueue=&block->wqueue[j]; - thread= last= wqueue->last_thread; - fprintf(pagecache_dump_file, "queue #%d\n", j); - if (thread) - { - do - { - thread=thread->next; - fprintf(pagecache_dump_file, - "thread:%u\n", thread->id); - if (++i == MAX_QUEUE_LEN) - break; - } - while (thread != last); - } - } - } - fprintf(pagecache_dump_file, "LRU chain:"); - block= pagecache= used_last; - if (block) - { - do - { - block= block->next_used; - fprintf(pagecache_dump_file, - "block:%u, ", BLOCK_NUMBER(pagecache, block)); - } - while (block != pagecache->used_last); - } - fprintf(pagecache_dump_file, "\n"); - - fclose(pagecache_dump_file); -} - -#endif /* defined(PAGECACHE_TIMEOUT) */ - -#if defined(PAGECACHE_TIMEOUT) && !defined(__WIN__) - - -static int pagecache_pthread_cond_wait(pthread_cond_t *cond, - pthread_mutex_t *mutex) -{ - int rc; - struct timeval now; /* time when we started waiting */ - struct timespec timeout; /* timeout value for the wait function */ - struct timezone tz; -#if defined(PAGECACHE_DEBUG) - int cnt=0; -#endif - - /* Get current time */ - gettimeofday(&now, &tz); - /* Prepare timeout value */ - timeout.tv_sec= now.tv_sec + PAGECACHE_TIMEOUT; - /* - timeval uses microseconds. - timespec uses nanoseconds. - 1 nanosecond = 1000 micro seconds - */ - timeout.tv_nsec= now.tv_usec * 1000; - KEYCACHE_THREAD_TRACE_END("started waiting"); -#if defined(PAGECACHE_DEBUG) - cnt++; - if (cnt % 100 == 0) - fprintf(pagecache_debug_log, "waiting...\n"); - fflush(pagecache_debug_log); -#endif - rc= pthread_cond_timedwait(cond, mutex, &timeout); - KEYCACHE_THREAD_TRACE_BEGIN("finished waiting"); - if (rc == ETIMEDOUT || rc == ETIME) - { -#if defined(PAGECACHE_DEBUG) - fprintf(pagecache_debug_log,"aborted by pagecache timeout\n"); - fclose(pagecache_debug_log); - abort(); -#endif - pagecache_dump(); - } - -#if defined(PAGECACHE_DEBUG) - KEYCACHE_DBUG_ASSERT(rc != ETIMEDOUT); -#else - assert(rc != ETIMEDOUT); -#endif - return rc; -} -#else -#if defined(PAGECACHE_DEBUG) -static int pagecache_pthread_cond_wait(pthread_cond_t *cond, - pthread_mutex_t *mutex) -{ - int rc; - KEYCACHE_THREAD_TRACE_END("started waiting"); - rc= pthread_cond_wait(cond, mutex); - KEYCACHE_THREAD_TRACE_BEGIN("finished waiting"); - return rc; -} -#endif -#endif /* defined(PAGECACHE_TIMEOUT) && !defined(__WIN__) */ - -#if defined(PAGECACHE_DEBUG) -static int ___pagecache_pthread_mutex_lock(pthread_mutex_t *mutex) -{ - int rc; - rc= pthread_mutex_lock(mutex); - KEYCACHE_THREAD_TRACE_BEGIN(""); - return rc; -} - - -static void ___pagecache_pthread_mutex_unlock(pthread_mutex_t *mutex) -{ - KEYCACHE_THREAD_TRACE_END(""); - pthread_mutex_unlock(mutex); -} - - -static int ___pagecache_pthread_cond_signal(pthread_cond_t *cond) -{ - int rc; - KEYCACHE_THREAD_TRACE("signal"); - rc= pthread_cond_signal(cond); - return rc; -} - - -#if defined(PAGECACHE_DEBUG_LOG) - - -static void pagecache_debug_print(const char * fmt, ...) -{ - va_list args; - va_start(args,fmt); - if (pagecache_debug_log) - { - VOID(vfprintf(pagecache_debug_log, fmt, args)); - VOID(fputc('\n',pagecache_debug_log)); - } - va_end(args); -} -#endif /* defined(PAGECACHE_DEBUG_LOG) */ - -#if defined(PAGECACHE_DEBUG_LOG) - - -void pagecache_debug_log_close(void) -{ - if (pagecache_debug_log) - fclose(pagecache_debug_log); -} -#endif /* defined(PAGECACHE_DEBUG_LOG) */ - -#endif /* defined(PAGECACHE_DEBUG) */ diff --git a/mysys/my_safehash.h b/mysys/my_safehash.h new file mode 100644 index 00000000000..53845a5fec7 --- /dev/null +++ b/mysys/my_safehash.h @@ -0,0 +1,58 @@ +/* Copyright (C) 2003 MySQL AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +/* + Handling of multiple key caches + + The idea is to have a thread safe hash on the table name, + with a default key cache value that is returned if the table name is not in + the cache. +*/ + +#include + +/* + Struct to store a key and pointer to object +*/ + +typedef struct st_safe_hash_entry +{ + byte *key; + uint length; + byte *data; + struct st_safe_hash_entry *next, **prev; +} SAFE_HASH_ENTRY; + + +typedef struct st_safe_hash_with_default +{ +#ifdef THREAD + rw_lock_t mutex; +#endif + HASH hash; + byte *default_value; + SAFE_HASH_ENTRY *root; +} SAFE_HASH; + + +my_bool safe_hash_init(SAFE_HASH *hash, uint elements, + byte *default_value); +void safe_hash_free(SAFE_HASH *hash); +byte *safe_hash_search(SAFE_HASH *hash, const byte *key, uint length, + byte *def); +my_bool safe_hash_set(SAFE_HASH *hash, const byte *key, uint length, + byte *data); +void safe_hash_change(SAFE_HASH *hash, byte *old_data, byte *new_data); -- cgit v1.2.1 From 92e99ce4243b5ffdc069f1681136e858e888d646 Mon Sep 17 00:00:00 2001 From: unknown Date: Wed, 18 Apr 2007 12:55:09 +0300 Subject: Postmerge fixes. added forgoten file. The patch broke maria.test (will be fixed later) sql/handler.cc: Pagecache block should be equal maria block. sql/mysqld.cc: parameters Fixed. storage/maria/ma_bitmap.c: fixed typo. storage/maria/ma_blockrec.c: fixed typo. storage/maria/ma_delete_all.c: fixed typo. storage/maria/ma_page.c: fixed typo. storage/maria/ma_pagecache.c: pin/lock debugging protection activated by default. storage/maria/ma_pagecaches.c: parameters Fixed. storage/maria/ma_preload.c: fixed typo. mysys/my_safehash.c: New BitKeeper file ``mysys/my_safehash.c'' --- mysys/my_safehash.c | 297 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 297 insertions(+) create mode 100644 mysys/my_safehash.c (limited to 'mysys') diff --git a/mysys/my_safehash.c b/mysys/my_safehash.c new file mode 100644 index 00000000000..00ca1569b91 --- /dev/null +++ b/mysys/my_safehash.c @@ -0,0 +1,297 @@ +/* Copyright (C) 2003-2007 MySQL AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +/* + Handling of multiple key caches + + The idea is to have a thread safe hash on the table name, + with a default key cache value that is returned if the table name is not in + the cache. +*/ + +#include "mysys_priv.h" +#include +#include "my_safehash.h" + +/***************************************************************************** + General functions to handle SAFE_HASH objects. + + A SAFE_HASH object is used to store the hash, the mutex and default value + needed by the rest of the key cache code. + This is a separate struct to make it easy to later reuse the code for other + purposes + + All entries are linked in a list to allow us to traverse all elements + and delete selected ones. (HASH doesn't allow any easy ways to do this). +*****************************************************************************/ + + +/* + Free a SAFE_HASH_ENTRY + + SYNOPSIS + safe_hash_entry_free() + entry The entry which should be freed + + NOTE + This function is called by the hash object on delete +*/ + +static void safe_hash_entry_free(SAFE_HASH_ENTRY *entry) +{ + DBUG_ENTER("free_assign_entry"); + my_free((gptr) entry, MYF(0)); + DBUG_VOID_RETURN; +} + + +/* + Get key and length for a SAFE_HASH_ENTRY + + SYNOPSIS + safe_hash_entry_get() + entry The entry for which the key should be returned + length Length of the key + + RETURN + # reference on the key +*/ + +static byte *safe_hash_entry_get(SAFE_HASH_ENTRY *entry, uint *length, + my_bool not_used __attribute__((unused))) +{ + *length= entry->length; + return (byte*) entry->key; +} + + +/* + Init a SAFE_HASH object + + SYNOPSIS + safe_hash_init() + hash safe_hash handler + elements Expected max number of elements + default_value default value + + NOTES + In case of error we set hash->default_value to 0 to allow one to call + safe_hash_free on an object that couldn't be initialized. + + RETURN + 0 OK + 1 error +*/ + +my_bool safe_hash_init(SAFE_HASH *hash, uint elements, + byte *default_value) +{ + DBUG_ENTER("safe_hash"); + if (hash_init(&hash->hash, &my_charset_bin, elements, + 0, 0, (hash_get_key) safe_hash_entry_get, + (void (*)(void*)) safe_hash_entry_free, 0)) + { + hash->default_value= 0; + DBUG_RETURN(1); + } + my_rwlock_init(&hash->mutex, 0); + hash->default_value= default_value; + hash->root= 0; + DBUG_RETURN(0); +} + + +/* + Free a SAFE_HASH object + + SYNOPSIS + safe_hash_free() + hash Hash handle + + NOTES + This is safe to call on any object that has been sent to safe_hash_init() +*/ + +void safe_hash_free(SAFE_HASH *hash) +{ + /* + Test if safe_hash_init succeeded. This will also guard us against multiple + free calls. + */ + if (hash->default_value) + { + hash_free(&hash->hash); + rwlock_destroy(&hash->mutex); + hash->default_value=0; + } +} + + +/* + Return the value stored for a key or default value if no key + + SYNOPSIS + safe_hash_search() + hash Hash handle + key key (path to table etc..) + length Length of key + def Default value of data + + RETURN + # data associated with the key of default value if data was not found +*/ + +byte *safe_hash_search(SAFE_HASH *hash, const byte *key, uint length, + byte *def) +{ + byte *result; + DBUG_ENTER("safe_hash_search"); + rw_rdlock(&hash->mutex); + result= hash_search(&hash->hash, key, length); + rw_unlock(&hash->mutex); + if (!result) + result= def; + else + result= ((SAFE_HASH_ENTRY*) result)->data; + DBUG_PRINT("exit",("data: 0x%lx", (long) result)); + DBUG_RETURN(result); +} + + +/* + Associate a key with some data + + SYNOPSIS + safe_hash_set() + hash Hash handle + key key (path to table etc..) + length Length of key + data data to to associate with the data + + NOTES + This can be used both to insert a new entry and change an existing + entry. + If one associates a key with the default key cache, the key is deleted + + RETURN + 0 OK + 1 error (Can only be EOM). In this case my_message() is called. +*/ + +my_bool safe_hash_set(SAFE_HASH *hash, const byte *key, uint length, + byte *data) +{ + SAFE_HASH_ENTRY *entry; + my_bool error= 0; + DBUG_ENTER("safe_hash_set"); + DBUG_PRINT("enter",("key: %.*s data: 0x%lx", length, key, (long) data)); + + rw_wrlock(&hash->mutex); + entry= (SAFE_HASH_ENTRY*) hash_search(&hash->hash, key, length); + + if (data == hash->default_value) + { + /* + The key is to be associated with the default entry. In this case + we can just delete the entry (if it existed) from the hash as a + search will return the default entry + */ + if (!entry) /* nothing to do */ + goto end; + /* unlink entry from list */ + if ((*entry->prev= entry->next)) + entry->next->prev= entry->prev; + hash_delete(&hash->hash, (byte*) entry); + goto end; + } + if (entry) + { + /* Entry existed; Just change the pointer to point at the new data */ + entry->data= data; + } + else + { + if (!(entry= (SAFE_HASH_ENTRY *) my_malloc(sizeof(*entry) + length, + MYF(MY_WME)))) + { + error= 1; + goto end; + } + entry->key= (byte*) (entry +1); + memcpy((char*) entry->key, (char*) key, length); + entry->length= length; + entry->data= data; + /* Link entry to list */ + if ((entry->next= hash->root)) + entry->next->prev= &entry->next; + entry->prev= &hash->root; + hash->root= entry; + if (my_hash_insert(&hash->hash, (byte*) entry)) + { + /* This can only happen if hash got out of memory */ + my_free((char*) entry, MYF(0)); + error= 1; + goto end; + } + } + +end: + rw_unlock(&hash->mutex); + DBUG_RETURN(error); +} + + +/* + Change all entries with one data value to another data value + + SYNOPSIS + safe_hash_change() + hash Hash handle + old_data Old data + new_data Change all 'old_data' to this + + NOTES + We use the linked list to traverse all elements in the hash as + this allows us to delete elements in the case where 'new_data' is the + default value. +*/ + +void safe_hash_change(SAFE_HASH *hash, byte *old_data, byte *new_data) +{ + SAFE_HASH_ENTRY *entry, *next; + DBUG_ENTER("safe_hash_set"); + + rw_wrlock(&hash->mutex); + + for (entry= hash->root ; entry ; entry= next) + { + next= entry->next; + if (entry->data == old_data) + { + if (new_data == hash->default_value) + { + if ((*entry->prev= entry->next)) + entry->next->prev= entry->prev; + hash_delete(&hash->hash, (byte*) entry); + } + else + entry->data= new_data; + } + } + + rw_unlock(&hash->mutex); + DBUG_VOID_RETURN; +} -- cgit v1.2.1 From ee8f8dd128ffa6fae5270918154526039b8eb80c Mon Sep 17 00:00:00 2001 From: unknown Date: Wed, 18 Apr 2007 14:45:32 +0300 Subject: Fixed dubug info. sql/set_var.cc: fixed parameters. --- mysys/my_safehash.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'mysys') diff --git a/mysys/my_safehash.c b/mysys/my_safehash.c index 00ca1569b91..57f408942bf 100644 --- a/mysys/my_safehash.c +++ b/mysys/my_safehash.c @@ -52,7 +52,7 @@ static void safe_hash_entry_free(SAFE_HASH_ENTRY *entry) { - DBUG_ENTER("free_assign_entry"); + DBUG_ENTER("safe_hash_entry_free"); my_free((gptr) entry, MYF(0)); DBUG_VOID_RETURN; } @@ -99,7 +99,7 @@ static byte *safe_hash_entry_get(SAFE_HASH_ENTRY *entry, uint *length, my_bool safe_hash_init(SAFE_HASH *hash, uint elements, byte *default_value) { - DBUG_ENTER("safe_hash"); + DBUG_ENTER("safe_hash_init"); if (hash_init(&hash->hash, &my_charset_bin, elements, 0, 0, (hash_get_key) safe_hash_entry_get, (void (*)(void*)) safe_hash_entry_free, 0)) @@ -272,7 +272,7 @@ end: void safe_hash_change(SAFE_HASH *hash, byte *old_data, byte *new_data) { SAFE_HASH_ENTRY *entry, *next; - DBUG_ENTER("safe_hash_set"); + DBUG_ENTER("safe_hash_change"); rw_wrlock(&hash->mutex); -- cgit v1.2.1 From eb7d9500a9909ce594c4d169e70fb5cecbb33e2b Mon Sep 17 00:00:00 2001 From: unknown Date: Thu, 19 Apr 2007 13:18:56 +0300 Subject: Fixes after review of guilhem of block record patch Short overview: Changed a lot of variable, functions, defines and struct elements to use more readable names More comments (mostly function and structure slot comments) Other things: Changed 'USE_WHOLE_KEY' to a big number to not interfer with long keys Ensure that tail block are at least of size 'MIN_TAIL_SIZE' Allow longer keys and key parts than before (don't limit Maria interface by HA_MAX_KEY_LENGTH) Use ma_chsize() to write initial bitmap page Added checking if using file with wrong block_size Added issing types to type_names[] (for maria_chk -d) Added maria_max_key_length() include/maria.h: Changed maria_portable_size_char_ptr to portable_size_char_ptr and moved it to my_handler.h Removed not used variable maria_delay_rec_write. More comments include/my_handler.h: Added portable_sizeof_char_ptr include/myisam.h: Changed mi_portable_size_char_ptr to portable_size_char_ptr and moved it to my_handler.h mysql-test/r/maria.result: Fix results when we now have a longer key length mysql-test/t/maria.test: More tests mysys/my_pread.c: Code cleanup sql/net_serv.cc: Changed warning to note (as in main 5.1 tree) to avoid not critical failing tests sql/sql_select.cc: Use portable_sizeof_char_ptr storage/maria/ha_maria.cc: Added max_supported_key_length(), as this is not a trival function anymore storage/maria/ha_maria.h: Moved max_supported_key_length(), as this is not a trival function anymore storage/maria/ma_bitmap.c: Lots of new comments Added maria_bitmap_marker[] to mark 2 last bytes of each bitmap (for corruption detection) Trivial code changes (based on review comments) storage/maria/ma_blockrec.c: More code comments Renamed _block_row() functions to _block_record() Trivial code changes, based on review comments Moved Code from maria_close() to _ma_end_block_record() Some function renames to make things more understandable DIR_ENTRY_OFFSET -> DIR_COUNT_OFFSET keybuff_used -> keyread_buff_used ma_recordpos_to_offset -> ma_recordpos_to_dir_entry Changed some 'rec' named variables to 'column'. Ensure that tail block are at least of size 'MIN_TAIL_SIZE' storage/maria/ma_blockrec.h: More comments DIRCOUNT_SIZE -> DIR_COUNT_SIZE Added define for maira_bitmap_marker[] ma_recordpos_to_offset -> ma_recordpos_to_dir_entry xxx_block_row() -> xxx_block_record() Made _ma_read_bitmap_page() static storage/maria/ma_check.c: More comments ma_recordpos_to_offset() -> ma_recordpos_to_dir_entry() DIR_ENTRY_OFFSET -> DIR_COUNT_OFFSET rec variables -> column variables recdef -> columndef storage/maria/ma_checksum.c: rec -> column Avoid an 'if' in _ma_checksum() for the common case storage/maria/ma_close.c: Moved resetting of info->dfile to ma_end_once_block_record() storage/maria/ma_create.c: Some variable changes to make things more readable: recinfo -> columndef rec -> column rec_end -> end_column record_type -> datafile_type ma_recinfo_write() -> ma_columndef_write() Fixed wrong setting of 'data_file_length'; Now max_rows should be calculated correctly New check if too long key. Use ma_chsize() to write bitmap page. storage/maria/ma_delete.c: keybuff_used -> keyread_buff_used storage/maria/ma_dynrec.c: rec -> columndef rec_length -> column_length maria_portable_sizeof_char_ptr -> portable_sizeof_char_ptr Better comment for _ma_read_rnd_dynamic_record() storage/maria/ma_ft_eval.c: maria_portable_sizeof_char_ptr -> portable_sizeof_char_ptr storage/maria/ma_ft_test1.c: maria_portable_sizeof_char_ptr -> portable_sizeof_char_ptr storage/maria/ma_ft_update.c: keybuff_used -> keyread_buff_used storage/maria/ma_info.c: More comments storage/maria/ma_open.c: Added checking if using file with wrong block_size New checking of max_key_length rec -> columndef _ma_recinfo_write -> _ma_columndef_write Don't change block_size (as this is checked in ma_create()) More comments storage/maria/ma_packrec.c: Trivial code changes rec -> columndef maria_portable_sizeof_char_ptr -> portable_sizeof_char_ptr storage/maria/ma_page.c: keybuff_used -> keyread_buff_used storage/maria/ma_rkey.c: Removed not needded empty line storage/maria/ma_rrnd.c: Removed not used variable storage/maria/ma_rt_index.c: keybuff_used -> keyread_buff_used storage/maria/ma_search.c: keybuff_used -> keyread_buff_used Trivial code changes storage/maria/ma_sp_test.c: maria_portable_sizeof_char_ptr -> portable_sizeof_char_ptr storage/maria/ma_test1.c: maria_portable_sizeof_char_ptr -> portable_sizeof_char_ptr storage/maria/ma_test2.c: maria_portable_sizeof_char_ptr -> portable_sizeof_char_ptr storage/maria/ma_update.c: Updated comment storage/maria/ma_write.c: keybuff_used -> keyread_buff_used storage/maria/maria_chk.c: Added missing types to type_names[] Removed not used variable rec -> columndef Replaced some numbers with define flags storage/maria/maria_def.h: More comments Added 'MARIA_INDEX_MIN_OVERHEAD_SIZE' rec -> columndef keybuff_used -> keyread_buff_used _ma_recinfo_write -> _ma_culumndef_write _ma_recinfo_read -> _ma_columndef_read Changed 'USE_WHOLE_KEY' to a big number to not interfer with long keys Added maria_max_key_length() storage/maria/maria_pack.c: Updated message strings rec -> columndef maria_portable_sizeof_char_ptr -> portable_sizeof_char_ptr More comments storage/myisam/ft_eval.c: mi_portable_sizeof_char_ptr -> portable_sizeof_char_ptr storage/myisam/ft_test1.c: mi_portable_sizeof_char_ptr -> portable_sizeof_char_ptr storage/myisam/mi_checksum.c: mi_portable_sizeof_char_ptr -> portable_sizeof_char_ptr storage/myisam/mi_create.c: mi_portable_sizeof_char_ptr -> portable_sizeof_char_ptr storage/myisam/mi_dynrec.c: mi_portable_sizeof_char_ptr -> portable_sizeof_char_ptr storage/myisam/mi_open.c: mi_portable_sizeof_char_ptr -> portable_sizeof_char_ptr storage/myisam/mi_packrec.c: mi_portable_sizeof_char_ptr -> portable_sizeof_char_ptr storage/myisam/mi_rkey.c: Unlock mutex also in case of error storage/myisam/mi_test1.c: mi_portable_sizeof_char_ptr -> portable_sizeof_char_ptr storage/myisam/mi_test2.c: mi_portable_sizeof_char_ptr -> portable_sizeof_char_ptr storage/myisam/myisampack.c: mi_portable_sizeof_char_ptr -> portable_sizeof_char_ptr storage/myisam/sp_test.c: mi_portable_sizeof_char_ptr -> portable_sizeof_char_ptr support-files/magic: Fixed typo --- mysys/my_pread.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'mysys') diff --git a/mysys/my_pread.c b/mysys/my_pread.c index 2b9a994299f..339c5627a3e 100644 --- a/mysys/my_pread.c +++ b/mysys/my_pread.c @@ -87,10 +87,8 @@ uint my_pread(File Filedes, byte *Buffer, uint Count, my_off_t offset, my_error(EE_READ, MYF(ME_BELL+ME_WAITTANG), my_filename(Filedes),my_errno); else if (MyFlags & (MY_NABP | MY_FNABP)) - { my_error(EE_EOFERR, MYF(ME_BELL+ME_WAITTANG), my_filename(Filedes),my_errno); - } } if ((int) readbytes == -1 || (MyFlags & (MY_FNABP | MY_NABP))) DBUG_RETURN(MY_FILE_ERROR); /* Return with error */ -- cgit v1.2.1 From 8f39541e7d8ba812d1198af5d4179ba44d6693fa Mon Sep 17 00:00:00 2001 From: unknown Date: Tue, 29 May 2007 20:13:56 +0300 Subject: This patch is a collection of patches from from Sanja, Sergei and Monty. Added logging and pinning of pages to block format. Integration of transaction manager, log handler. Better page cache intergration Split trnman.h into two files, so that we don't have to include my_atomic.h into C++ programs. Renaming of structures, more comments, more debugging etc. Fixed problem with small head block + long varchar. Added extra argument to delete_record() and update_record() (needed for UNDO logging) Small changes to interface of pagecache and log handler. Change initialization of log_record_type_descriptors to not be depending on enum order. Use array of LEX_STRING's to send data to log handler Added 'dummy' transaction option to MARIA_INFO so that we can always assume 'trn' exists. include/lf.h: Interface fixes Rename of structures (Patch from Sergei via Sanja) include/my_atomic.h: More comments include/my_global.h: Added MY_ERRPTR include/pagecache.h: Added undo LSN when unlocking pages mysql-test/r/maria.result: Updated results mysql-test/t/maria.test: Added autocommit around lock tables (Patch from Sanja) mysys/lf_alloc-pin.c: Post-review fixes, simple optimizations More comments Struct slot renames Check amount of memory on stack (Patch from Sergei) mysys/lf_dynarray.c: More comments mysys/lf_hash.c: More comments After review fixes (Patch from Sergei) storage/maria/ha_maria.cc: Split trnman.h into two files, so that we don't have to include my_atomic.h into the .cc program. (Temporary fix to avoid bug in gcc) Move out all deferencing of the transaction structure. Transaction manager integrated (Patch from Sergei) storage/maria/ha_maria.h: Added prototype for start_stmt() storage/maria/lockman.c: Function call rename storage/maria/ma_bitmap.c: Mark deleted pages free from page cache storage/maria/ma_blockrec.c: Offset -> rownr More debugging Fixed problem with small head block + long varchar Added logging of changed pages Added logging of undo (Including only loggging of changed fields in case of update) Added pinning/unpinning of all changed pages More comments Added free_full_pages() as the same code was used in several places. fill_rows_parts() renamed as fill_insert_undo_parts() offset -> rownr Added some optimization of not transactional tables _ma_update_block_record() has new parameter, as we need original row to do efficent undo for update storage/maria/ma_blockrec.h: Added ROW_EXTENTS_ON_STACK Changed prototype for update and delete of row storage/maria/ma_check.c: Added original row to delete_record() call storage/maria/ma_control_file.h: Added ifdefs for C++ storage/maria/ma_delete.c: Added original row to delete_record() call (Needed for efficent undo logging) storage/maria/ma_dynrec.c: Added extra argument to delete_record() and update_record() Removed not used variable storage/maria/ma_init.c: Initialize log handler storage/maria/ma_loghandler.c: Removed not used variable Change initialization of log_record_type_descriptors to not be depending on enum order Use array of LEX_STRING's to send data to log handler storage/maria/ma_loghandler.h: New defines Use array of LEX_STRING's to send data to log handler storage/maria/ma_open.c: Added 'dummy' transaction option to MARIA_INFO so that we can always assume 'trn' exists. Store in MARIA_SHARE->page_type if pages will have up to date LSN's storage/maria/ma_pagecache.c: Don't decrease number of readers when using pagecache_write()/pagecache_read() In pagecache_write() decrement request count if page was left pinned Added pagecache_delete_pages() Removed some casts Make trace output consistent with rest of code Simplify calling of DBUG_ASSERT(0) Only update LSN if the LSN is bigger than what's already on the page Added LSN parameter pagecache_unpin_page(), pagecache_unpin(), and pagecache_unlock() (Part of patch from Sanja) storage/maria/ma_static.c: Added 'dummy' transaction option to MARIA_INFO so that we can always assume 'trn' exists. Added default page cache storage/maria/ma_statrec.c: Added extra argument to delete_record() and update_record() storage/maria/ma_test1.c: Added option -T for transactions storage/maria/ma_test2.c: Added option -T for transactions storage/maria/ma_test_all.sh: Test with transactions storage/maria/ma_update.c: Changed prototype for update of row storage/maria/maria_def.h: Changed prototype for update & delete of row as block records need to access the old row Store in MARIA_SHARE->page_type if pages will have up to date LSN's Added MARIA_MAX_TREE_LEVELS to allow us to calculate the number of possible pinned pages we may need. Removed not used 'empty_bits_buffer' Added pointer to transaction object Added array for pinned pages Added log_row_parts array for logging of field data. Added MARIA_PINNED_PAGE to store pinned pages storage/maria/trnman.c: Added accessor functions to transaction object Added missing DBUG_RETURN() More debugging More comments Changed // comment of code to #ifdef NOT_USED Transaction manager integrated. Post review fixes Part of patch originally from Sergei storage/maria/trnman.h: Split trnman.h into two files, so that we don't have to include my_atomic.h into the .cc program. (Temporary fix to avoid bug in gcc) storage/maria/unittest/ma_pagecache_single.c: Added missing argument Added SKIP_BIG_TESTS (Patch from Sanja) storage/maria/unittest/ma_test_loghandler-t.c: Test logging with new LEX_STRING parameter (Patch from Sanja) storage/maria/unittest/ma_test_loghandler_multigroup-t.c: Test logging with new LEX_STRING parameter (Patch from Sanja) storage/maria/unittest/ma_test_loghandler_multithread-t.c: Test logging with new LEX_STRING parameter (Patch from Sanja) storage/maria/unittest/ma_test_loghandler_pagecache-t.c: Test logging with new LEX_STRING parameter (Patch from Sanja) storage/maria/unittest/trnman-t.c: Stack overflow detection (Patch from Sergei) unittest/unit.pl: Command-line options --big and --verbose (Patch from Sergei) unittest/mytap/tap.c: Detect --big (Patch from Sergei) unittest/mytap/tap.h: Skip_big_tests and SKIP_BIG_TESTS (Patch from Sergei) storage/maria/trnman_public.h: New BitKeeper file ``storage/maria/trnman_public.h'' --- mysys/lf_alloc-pin.c | 188 ++++++++++++++++++++++++++++++++++++--------------- mysys/lf_dynarray.c | 10 ++- mysys/lf_hash.c | 167 +++++++++++++++++++++++++++++++++++---------- 3 files changed, 271 insertions(+), 94 deletions(-) (limited to 'mysys') diff --git a/mysys/lf_alloc-pin.c b/mysys/lf_alloc-pin.c index e964553a64c..51c4df7c94a 100644 --- a/mysys/lf_alloc-pin.c +++ b/mysys/lf_alloc-pin.c @@ -1,5 +1,5 @@ /* QQ: TODO multi-pinbox */ -/* Copyright (C) 2000 MySQL AB +/* Copyright (C) 2006 MySQL AB This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -32,10 +32,11 @@ Pins are used to solve ABA problem. To use pins one must obey a pinning protocol: + 1. Let's assume that PTR is a shared pointer to an object. Shared means that any thread may modify it anytime to point to a different object and free the old object. Later the freed object may be potentially - allocated by another thread. If we're unlucky that another thread may + allocated by another thread. If we're unlucky that other thread may set PTR to point to this object again. This is ABA problem. 2. Create a local pointer LOCAL_PTR. 3. Pin the PTR in a loop: @@ -70,12 +71,34 @@ pins you have is limited (and small), keeping an object pinned prevents its reuse and cause unnecessary mallocs. + Explanations: + + 3. The loop is important. The following can occur: + thread1> LOCAL_PTR= PTR + thread2> free(PTR); PTR=0; + thread1> pin(PTR, PIN_NUMBER); + now thread1 cannot access LOCAL_PTR, even if it's pinned, + because it points to a freed memory. That is, it *must* + verify that it has indeed pinned PTR, the shared pointer. + + 6. When a thread wants to free some LOCAL_PTR, and it scans + all lists of pins to see whether it's pinned, it does it + upwards, from low pin numbers to high. Thus another thread + must copy an address from one pin to another in the same + direction - upwards, otherwise the scanning thread may + miss it. + Implementation details: + Pins are given away from a "pinbox". Pinbox is stack-based allocator. It used dynarray for storing pins, new elements are allocated by dynarray as necessary, old are pushed in the stack for reuse. ABA is solved by - versioning a pointer - because we use an array, a pointer to pins is 32 bit, - upper 32 bits are used for a version. + versioning a pointer - because we use an array, a pointer to pins is 16 bit, + upper 16 bits are used for a version. + + It is assumed that pins belong to a thread and are not transferable + between threads (LF_PINS::stack_ends_here being a primary reason + for this limitation). */ #include @@ -93,11 +116,11 @@ static void _lf_pinbox_real_free(LF_PINS *pins); void lf_pinbox_init(LF_PINBOX *pinbox, uint free_ptr_offset, lf_pinbox_free_func *free_func, void *free_func_arg) { - DBUG_ASSERT(sizeof(LF_PINS) == 128); DBUG_ASSERT(free_ptr_offset % sizeof(void *) == 0); - lf_dynarray_init(&pinbox->pinstack, sizeof(LF_PINS)); + compile_time_assert(sizeof(LF_PINS) == 128); + lf_dynarray_init(&pinbox->pinarray, sizeof(LF_PINS)); pinbox->pinstack_top_ver= 0; - pinbox->pins_in_stack= 0; + pinbox->pins_in_array= 0; pinbox->free_ptr_offset= free_ptr_offset; pinbox->free_func= free_func; pinbox->free_func_arg= free_func_arg; @@ -105,38 +128,72 @@ void lf_pinbox_init(LF_PINBOX *pinbox, uint free_ptr_offset, void lf_pinbox_destroy(LF_PINBOX *pinbox) { - lf_dynarray_destroy(&pinbox->pinstack); + lf_dynarray_destroy(&pinbox->pinarray); } /* Get pins from a pinbox. Usually called via lf_alloc_get_pins() or lf_hash_get_pins(). + SYNOPSYS + pinbox - + stack_end - a pointer to the end (top/bottom, depending on the + STACK_DIRECTION) of stack. Used for safe alloca. There's + no safety margin deducted, a caller should take care of it, + if necessary. + DESCRIPTION get a new LF_PINS structure from a stack of unused pins, or allocate a new one out of dynarray. + + NOTE + It is assumed that pins belong to a thread and are not transferable + between threads. */ -LF_PINS *_lf_pinbox_get_pins(LF_PINBOX *pinbox) +LF_PINS *_lf_pinbox_get_pins(LF_PINBOX *pinbox, void *stack_end) { uint32 pins, next, top_ver; LF_PINS *el; - + /* + We have an array of max. 64k elements. + The highest index currently allocated is pinbox->pins_in_array. + Freed elements are in a lifo stack, pinstack_top_ver. + pinstack_top_ver is 32 bits; 16 low bits are the index in the + array, to the first element of the list. 16 high bits are a version + (every time the 16 low bits are updated, the 16 high bits are + incremented). Versioniong prevents the ABA problem. + */ top_ver= pinbox->pinstack_top_ver; do { if (!(pins= top_ver % LF_PINBOX_MAX_PINS)) { - pins= my_atomic_add32(&pinbox->pins_in_stack, 1)+1; - el= (LF_PINS *)_lf_dynarray_lvalue(&pinbox->pinstack, pins); + /* the stack of free elements is empty */ + pins= my_atomic_add32(&pinbox->pins_in_array, 1)+1; + if (unlikely(pins >= LF_PINBOX_MAX_PINS)) + return 0; + /* + note that the first allocated element has index 1 (pins==1). + index 0 is reserved to mean "NULL pointer" + */ + el= (LF_PINS *)_lf_dynarray_lvalue(&pinbox->pinarray, pins); + if (unlikely(!el)) + return 0; break; } - el= (LF_PINS *)_lf_dynarray_value(&pinbox->pinstack, pins); + el= (LF_PINS *)_lf_dynarray_value(&pinbox->pinarray, pins); next= el->link; } while (!my_atomic_cas32(&pinbox->pinstack_top_ver, &top_ver, top_ver-pins+next+LF_PINBOX_MAX_PINS)); + /* + set el->link to the index of el in the dynarray (el->link has two usages: + - if element is allocated, it's its own index + - if element is free, it's its next element in the free stack + */ el->link= pins; el->purgatory_count= 0; el->pinbox= pinbox; + el->stack_ends_here= stack_end; return el; } @@ -171,25 +228,17 @@ void _lf_pinbox_put_pins(LF_PINS *pins) _lf_pinbox_real_free(pins); if (pins->purgatory_count) { - my_atomic_rwlock_wrunlock(&pins->pinbox->pinstack.lock); + my_atomic_rwlock_wrunlock(&pins->pinbox->pinarray.lock); pthread_yield(); - my_atomic_rwlock_wrlock(&pins->pinbox->pinstack.lock); + my_atomic_rwlock_wrlock(&pins->pinbox->pinarray.lock); } } top_ver= pinbox->pinstack_top_ver; - if (nr == pinbox->pins_in_stack) - { - int32 tmp= nr; - if (my_atomic_cas32(&pinbox->pins_in_stack, &tmp, tmp-1)) - goto ret; - } - do { pins->link= top_ver % LF_PINBOX_MAX_PINS; } while (!my_atomic_cas32(&pinbox->pinstack_top_ver, &top_ver, top_ver-pins->link+nr+LF_PINBOX_MAX_PINS)); -ret: return; } @@ -228,7 +277,7 @@ struct st_harvester { /* callback for _lf_dynarray_iterate: - scan all pins or all threads and accumulate all pins + scan all pins of all threads and accumulate all pins */ static int harvest_pins(LF_PINS *el, struct st_harvester *hv) { @@ -243,13 +292,19 @@ static int harvest_pins(LF_PINS *el, struct st_harvester *hv) *hv->granary++= p; } } + /* + hv->npins may become negative below, but it means that + we're on the last dynarray page and harvest_pins() won't be + called again. We don't bother to make hv->npins() correct + (that is 0) in this case. + */ hv->npins-= LF_DYNARRAY_LEVEL_LENGTH; return 0; } /* callback for _lf_dynarray_iterate: - scan all pins or all threads and see if addr is present there + scan all pins of all threads and see if addr is present there */ static int match_pins(LF_PINS *el, void *addr) { @@ -262,28 +317,35 @@ static int match_pins(LF_PINS *el, void *addr) return 0; } +#if STACK_DIRECTION < 0 +#define available_stack_size(END,CUR) (long) ((char*)(CUR) - (char*)(END)) +#else +#define available_stack_size(END,CUR) (long) ((char*)(END) - (char*)(CUR)) +#endif + /* - Scan the purgatory as free everything that can be freed + Scan the purgatory and free everything that can be freed */ static void _lf_pinbox_real_free(LF_PINS *pins) { - int npins; - void *list; - void **addr; + int npins, alloca_size; + void *list, **addr; + struct st_lf_alloc_node *first, *last= NULL; LF_PINBOX *pinbox= pins->pinbox; - npins= pinbox->pins_in_stack+1; + npins= pinbox->pins_in_array+1; #ifdef HAVE_ALLOCA + alloca_size= sizeof(void *)*LF_PINBOX_PINS*npins; /* create a sorted list of pinned addresses, to speed up searches */ - if (sizeof(void *)*LF_PINBOX_PINS*npins < my_thread_stack_size) + if (available_stack_size(&pinbox, pins->stack_ends_here) > alloca_size) { struct st_harvester hv; - addr= (void **) alloca(sizeof(void *)*LF_PINBOX_PINS*npins); + addr= (void **) alloca(alloca_size); hv.granary= addr; hv.npins= npins; /* scan the dynarray and accumulate all pinned addresses */ - _lf_dynarray_iterate(&pinbox->pinstack, + _lf_dynarray_iterate(&pinbox->pinarray, (lf_dynarray_func)harvest_pins, &hv); npins= hv.granary-addr; @@ -307,7 +369,7 @@ static void _lf_pinbox_real_free(LF_PINS *pins) if (addr) /* use binary search */ { void **a, **b, **c; - for (a= addr, b= addr+npins-1, c= a+(b-a)/2; b-a>1; c= a+(b-a)/2) + for (a= addr, b= addr+npins-1, c= a+(b-a)/2; (b-a) > 1; c= a+(b-a)/2) if (cur == *c) a= b= c; else if (cur > *c) @@ -319,41 +381,52 @@ static void _lf_pinbox_real_free(LF_PINS *pins) } else /* no alloca - no cookie. linear search here */ { - if (_lf_dynarray_iterate(&pinbox->pinstack, + if (_lf_dynarray_iterate(&pinbox->pinarray, (lf_dynarray_func)match_pins, cur)) goto found; } } /* not pinned - freeing */ - pinbox->free_func(cur, pinbox->free_func_arg); + if (last) + last= last->next= (struct st_lf_alloc_node *)cur; + else + first= last= (struct st_lf_alloc_node *)cur; continue; found: /* pinned - keeping */ add_to_purgatory(pins, cur); } + if (last) + pinbox->free_func(first, last, pinbox->free_func_arg); } +/* lock-free memory allocator for fixed-size objects */ + +LF_REQUIRE_PINS(1); + /* - callback for _lf_pinbox_real_free to free an unpinned object - + callback for _lf_pinbox_real_free to free a list of unpinned objects - add it back to the allocator stack + + DESCRIPTION + 'first' and 'last' are the ends of the linked list of st_lf_alloc_node's: + first->el->el->....->el->last. Use first==last to free only one element. */ -static void alloc_free(struct st_lf_alloc_node *node, LF_ALLOCATOR *allocator) +static void alloc_free(struct st_lf_alloc_node *first, + struct st_lf_alloc_node *last, + LF_ALLOCATOR *allocator) { struct st_lf_alloc_node *tmp; tmp= allocator->top; do { - node->next= tmp; - } while (!my_atomic_casptr((void **)&allocator->top, (void **)&tmp, node) && + last->next= tmp; + } while (!my_atomic_casptr((void **)&allocator->top, (void **)&tmp, first) && LF_BACKOFF); } -/* lock-free memory allocator for fixed-size objects */ - -LF_REQUIRE_PINS(1); - /* - initialize lock-free allocatod. + initialize lock-free allocator SYNOPSYS allocator - @@ -362,6 +435,8 @@ LF_REQUIRE_PINS(1); memory that is guaranteed to be unused after the object is put in the purgatory. Unused by ANY thread, not only the purgatory owner. + This memory will be used to link waiting-to-be-freed + objects in a purgatory list. */ void lf_alloc_init(LF_ALLOCATOR *allocator, uint size, uint free_ptr_offset) { @@ -370,12 +445,19 @@ void lf_alloc_init(LF_ALLOCATOR *allocator, uint size, uint free_ptr_offset) allocator->top= 0; allocator->mallocs= 0; allocator->element_size= size; - DBUG_ASSERT(size >= (int)sizeof(void *)); - DBUG_ASSERT(free_ptr_offset < size); + DBUG_ASSERT(size >= sizeof(void*) + free_ptr_offset); } /* destroy the allocator, free everything that's in it + + NOTE + As every other init/destroy function here and elsewhere it + is not thread safe. No, this function is no different, ensure + that no thread needs the allocator before destroying it. + We are not responsible for any damage that may be caused by + accessing the allocator when it is being or has been destroyed. + Oh yes, and don't put your cat in a microwave. */ void lf_alloc_destroy(LF_ALLOCATOR *allocator) { @@ -410,16 +492,14 @@ void *_lf_alloc_new(LF_PINS *pins) } while (node != allocator->top && LF_BACKOFF); if (!node) { - if (!(node= (void *)my_malloc(allocator->element_size, - MYF(MY_WME|MY_ZEROFILL)))) - break; + node= (void *)my_malloc(allocator->element_size, MYF(MY_WME)); #ifdef MY_LF_EXTRA_DEBUG - my_atomic_add32(&allocator->mallocs, 1); + if (likely(node)) + my_atomic_add32(&allocator->mallocs, 1); #endif break; } - if (my_atomic_casptr((void **)&allocator->top, - (void *)&node, *(void **)node)) + if (my_atomic_casptr((void **)&allocator->top, (void *)&node, node->next)) break; } _lf_unpin(pins, 0); @@ -432,7 +512,7 @@ void *_lf_alloc_new(LF_PINS *pins) NOTE This is NOT thread-safe !!! */ -uint lf_alloc_in_pool(LF_ALLOCATOR *allocator) +uint lf_alloc_pool_count(LF_ALLOCATOR *allocator) { uint i; struct st_lf_alloc_node *node; diff --git a/mysys/lf_dynarray.c b/mysys/lf_dynarray.c index c6dd654bf03..770b1f9342b 100644 --- a/mysys/lf_dynarray.c +++ b/mysys/lf_dynarray.c @@ -1,4 +1,4 @@ -/* Copyright (C) 2000 MySQL AB +/* Copyright (C) 2006 MySQL AB This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -21,8 +21,6 @@ Memory is allocated in non-contiguous chunks. This data structure is not space efficient for sparse arrays. - The number of elements is limited to 4311810304 - Every element is aligned to sizeof(element) boundary (to avoid false sharing if element is big enough). @@ -32,6 +30,9 @@ to arrays of elements, on the second level it's an array of pointers to arrays of pointers to arrays of elements. And so on. + With four levels the number of elements is limited to 4311810304 + (but as in all functions index is uint, the real limit is 2^32-1) + Actually, it's wait-free, not lock-free ;-) */ @@ -192,6 +193,9 @@ static int recursive_iterate(LF_DYNARRAY *array, void *ptr, int level, each. _lf_dynarray_iterate() calls user-supplied function on every array from the set. It is the fastest way to scan the array, faster than for (i=0; i < N; i++) { func(_lf_dynarray_value(dynarray, i)); } + + NOTE + if func() returns non-zero, the scan is aborted */ int _lf_dynarray_iterate(LF_DYNARRAY *array, lf_dynarray_func func, void *arg) { diff --git a/mysys/lf_hash.c b/mysys/lf_hash.c index fb2fb88492f..832f0eb5852 100644 --- a/mysys/lf_hash.c +++ b/mysys/lf_hash.c @@ -1,4 +1,4 @@ -/* Copyright (C) 2000 MySQL AB +/* Copyright (C) 2006 MySQL AB This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -36,6 +36,10 @@ typedef struct { uint32 hashnr; /* reversed hash number, for sorting */ const byte *key; uint keylen; + /* + data is stored here, directly after the keylen. + thus the pointer to data is (void*)(slist_element_ptr+1) + */ } LF_SLIST; /* @@ -77,20 +81,20 @@ static int lfind(LF_SLIST * volatile *head, CHARSET_INFO *cs, uint32 hashnr, retry: cursor->prev= (intptr *)head; - do { - cursor->curr= PTR(*cursor->prev); + do { /* PTR() isn't necessary below, head is a dummy node */ + cursor->curr= (LF_SLIST *)(*cursor->prev); _lf_pin(pins, 1, cursor->curr); - } while(*cursor->prev != (intptr)cursor->curr && LF_BACKOFF); + } while (*cursor->prev != (intptr)cursor->curr && LF_BACKOFF); for (;;) { - if (!cursor->curr) - return 0; + if (unlikely(!cursor->curr)) + return 0; /* end of the list */ do { /* QQ: XXX or goto retry ? */ link= cursor->curr->link; cursor->next= PTR(link); _lf_pin(pins, 0, cursor->next); - } while(link != cursor->curr->link && LF_BACKOFF); + } while (link != cursor->curr->link && LF_BACKOFF); cur_hashnr= cursor->curr->hashnr; cur_key= cursor->curr->key; cur_keylen= cursor->curr->keylen; @@ -114,6 +118,10 @@ retry: } else { + /* + we found a deleted node - be nice, help the other thread + and remove this deleted node + */ if (my_atomic_casptr((void **)cursor->prev, (void **)&cursor->curr, cursor->next)) _lf_alloc_free(pins, cursor->curr); @@ -139,31 +147,44 @@ retry: NOTE it uses pins[0..2], on return all pins are removed. + if there're nodes with the same key value, a new node is added before them. */ static LF_SLIST *linsert(LF_SLIST * volatile *head, CHARSET_INFO *cs, LF_SLIST *node, LF_PINS *pins, uint flags) { CURSOR cursor; - int res= -1; + int res; - do + for (;;) { if (lfind(head, cs, node->hashnr, node->key, node->keylen, &cursor, pins) && (flags & LF_HASH_UNIQUE)) + { res= 0; /* duplicate found */ + break; + } else { node->link= (intptr)cursor.curr; - assert(node->link != (intptr)node); - assert(cursor.prev != &node->link); + DBUG_ASSERT(node->link != (intptr)node); /* no circular references */ + DBUG_ASSERT(cursor.prev != &node->link); /* no circular references */ if (my_atomic_casptr((void **)cursor.prev, (void **)&cursor.curr, node)) + { res= 1; /* inserted ok */ + break; + } } - } while (res == -1); + } _lf_unpin(pins, 0); _lf_unpin(pins, 1); _lf_unpin(pins, 2); + /* + Note that cursor.curr is not pinned here and the pointer is unreliable, + the object may dissapear anytime. But if it points to a dummy node, the + pointer is safe, because dummy nodes are never freed - initialize_bucket() + uses this fact. + */ return res ? 0 : cursor.curr; } @@ -183,24 +204,41 @@ static int ldelete(LF_SLIST * volatile *head, CHARSET_INFO *cs, uint32 hashnr, const byte *key, uint keylen, LF_PINS *pins) { CURSOR cursor; - int res= -1; + int res; - do + for (;;) { if (!lfind(head, cs, hashnr, key, keylen, &cursor, pins)) - res= 1; + { + res= 1; /* not found */ + break; + } else + { + /* mark the node deleted */ if (my_atomic_casptr((void **)&(cursor.curr->link), - (void **)&cursor.next, 1+(char *)cursor.next)) + (void **)&cursor.next, + (void *)(((intptr)cursor.next) | 1))) { + /* and remove it from the list */ if (my_atomic_casptr((void **)cursor.prev, (void **)&cursor.curr, cursor.next)) _lf_alloc_free(pins, cursor.curr); else + { + /* + somebody already "helped" us and removed the node ? + Let's check if we need to help that someone too! + (to ensure the number of "set DELETED flag" actions + is equal to the number of "remove from the list" actions) + */ lfind(head, cs, hashnr, key, keylen, &cursor, pins); + } res= 0; + break; } - } while (res == -1); + } + } _lf_unpin(pins, 0); _lf_unpin(pins, 1); _lf_unpin(pins, 2); @@ -226,7 +264,8 @@ static LF_SLIST *lsearch(LF_SLIST * volatile *head, CHARSET_INFO *cs, { CURSOR cursor; int res= lfind(head, cs, hashnr, key, keylen, &cursor, pins); - if (res) _lf_pin(pins, 2, cursor.curr); + if (res) + _lf_pin(pins, 2, cursor.curr); _lf_unpin(pins, 0); _lf_unpin(pins, 1); return res ? cursor.curr : 0; @@ -241,6 +280,11 @@ static inline const byte* hash_key(const LF_HASH *hash, return record + hash->key_offset; } +/* + compute the hash key value from the raw key. + note, that the hash value is limited to 2^31, because we need one + bit to distinguish between normal and dummy nodes. +*/ static inline uint calc_hash(LF_HASH *hash, const byte *key, uint keylen) { ulong nr1= 1, nr2= 4; @@ -249,8 +293,9 @@ static inline uint calc_hash(LF_HASH *hash, const byte *key, uint keylen) return nr1 & INT_MAX32; } -#define MAX_LOAD 1.0 -static void initialize_bucket(LF_HASH *, LF_SLIST * volatile*, uint, LF_PINS *); +#define MAX_LOAD 1.0 /* average number of elements in a bucket */ + +static int initialize_bucket(LF_HASH *, LF_SLIST * volatile*, uint, LF_PINS *); /* Initializes lf_hash, the arguments are compatible with hash_init @@ -261,7 +306,7 @@ void lf_hash_init(LF_HASH *hash, uint element_size, uint flags, { lf_alloc_init(&hash->alloc, sizeof(LF_SLIST)+element_size, offsetof(LF_SLIST, key)); - lf_dynarray_init(&hash->array, sizeof(LF_SLIST **)); + lf_dynarray_init(&hash->array, sizeof(LF_SLIST *)); hash->size= 1; hash->count= 0; hash->element_size= element_size; @@ -275,14 +320,19 @@ void lf_hash_init(LF_HASH *hash, uint element_size, uint flags, void lf_hash_destroy(LF_HASH *hash) { - LF_SLIST *el= *(LF_SLIST **)_lf_dynarray_lvalue(&hash->array, 0); + LF_SLIST *el, **head= (LF_SLIST **)_lf_dynarray_value(&hash->array, 0); + + if (unlikely(!head)) + return; + el= *head; + while (el) { intptr next= el->link; if (el->hashnr & 1) - lf_alloc_real_free(&hash->alloc, el); + lf_alloc_direct_free(&hash->alloc, el); /* normal node */ else - my_free((void *)el, MYF(0)); + my_free((void *)el, MYF(0)); /* dummy node */ el= (LF_SLIST *)next; } lf_alloc_destroy(&hash->alloc); @@ -297,6 +347,7 @@ void lf_hash_destroy(LF_HASH *hash) RETURN 0 - inserted 1 - didn't (unique key conflict) + -1 - out of memory NOTE see linsert() for pin usage notes @@ -308,14 +359,18 @@ int lf_hash_insert(LF_HASH *hash, LF_PINS *pins, const void *data) lf_rwlock_by_pins(pins); node= (LF_SLIST *)_lf_alloc_new(pins); + if (unlikely(!node)) + return -1; memcpy(node+1, data, hash->element_size); node->key= hash_key(hash, (byte *)(node+1), &node->keylen); hashnr= calc_hash(hash, node->key, node->keylen); bucket= hashnr % hash->size; el= _lf_dynarray_lvalue(&hash->array, bucket); - if (*el == NULL) - initialize_bucket(hash, el, bucket, pins); - node->hashnr= my_reverse_bits(hashnr) | 1; + if (unlikely(!el)) + return -1; + if (*el == NULL && unlikely(initialize_bucket(hash, el, bucket, pins))) + return -1; + node->hashnr= my_reverse_bits(hashnr) | 1; /* normal node */ if (linsert(el, hash->charset, node, pins, hash->flags)) { _lf_alloc_free(pins, node); @@ -330,9 +385,14 @@ int lf_hash_insert(LF_HASH *hash, LF_PINS *pins, const void *data) } /* + DESCRIPTION + deletes an element with the given key from the hash (if a hash is + not unique and there're many elements with this key - the "first" + matching element is deleted) RETURN 0 - deleted 1 - didn't (not found) + -1 - out of memory NOTE see ldelete() for pin usage notes */ @@ -344,8 +404,16 @@ int lf_hash_delete(LF_HASH *hash, LF_PINS *pins, const void *key, uint keylen) bucket= hashnr % hash->size; lf_rwlock_by_pins(pins); el= _lf_dynarray_lvalue(&hash->array, bucket); - if (*el == NULL) - initialize_bucket(hash, el, bucket, pins); + if (unlikely(!el)) + return -1; + /* + note that we still need to initialize_bucket here, + we cannot return "node not found", because an old bucket of that + node may've been split and the node was assigned to a new bucket + that was never accessed before and thus is not initialized. + */ + if (*el == NULL && unlikely(initialize_bucket(hash, el, bucket, pins))) + return -1; if (ldelete(el, hash->charset, my_reverse_bits(hashnr) | 1, (byte *)key, keylen, pins)) { @@ -358,6 +426,12 @@ int lf_hash_delete(LF_HASH *hash, LF_PINS *pins, const void *key, uint keylen) } /* + RETURN + a pointer to an element with the given key (if a hash is not unique and + there're many elements with this key - the "first" matching element) + NULL if nothing is found + MY_ERRPTR if OOM + NOTE see lsearch() for pin usage notes */ @@ -369,32 +443,51 @@ void *lf_hash_search(LF_HASH *hash, LF_PINS *pins, const void *key, uint keylen) bucket= hashnr % hash->size; lf_rwlock_by_pins(pins); el= _lf_dynarray_lvalue(&hash->array, bucket); - if (*el == NULL) - initialize_bucket(hash, el, bucket, pins); + if (unlikely(!el)) + return MY_ERRPTR; + if (*el == NULL && unlikely(initialize_bucket(hash, el, bucket, pins))) + return MY_ERRPTR; found= lsearch(el, hash->charset, my_reverse_bits(hashnr) | 1, (byte *)key, keylen, pins); lf_rwunlock_by_pins(pins); return found ? found+1 : 0; } -static const char *dummy_key= ""; +static const byte *dummy_key= ""; -static void initialize_bucket(LF_HASH *hash, LF_SLIST * volatile *node, +/* + RETURN + 0 - ok + -1 - out of memory +*/ +static int initialize_bucket(LF_HASH *hash, LF_SLIST * volatile *node, uint bucket, LF_PINS *pins) { uint parent= my_clear_highest_bit(bucket); LF_SLIST *dummy= (LF_SLIST *)my_malloc(sizeof(LF_SLIST), MYF(MY_WME)); LF_SLIST **tmp= 0, *cur; LF_SLIST * volatile *el= _lf_dynarray_lvalue(&hash->array, parent); - if (*el == NULL && bucket) - initialize_bucket(hash, el, parent, pins); - dummy->hashnr= my_reverse_bits(bucket); + if (unlikely(!el || !dummy)) + return -1; + if (*el == NULL && bucket && + unlikely(initialize_bucket(hash, el, parent, pins))) + return -1; + dummy->hashnr= my_reverse_bits(bucket) | 0; /* dummy node */ dummy->key= (char*) dummy_key; dummy->keylen= 0; - if ((cur= linsert(el, hash->charset, dummy, pins, 0))) + if ((cur= linsert(el, hash->charset, dummy, pins, LF_HASH_UNIQUE))) { my_free((void *)dummy, MYF(0)); dummy= cur; } my_atomic_casptr((void **)node, (void **)&tmp, dummy); + /* + note that if the CAS above failed (after linsert() succeeded), + it would mean that some other thread has executed linsert() for + the same dummy node, its linsert() failed, it picked up our + dummy node (in "dummy= cur") and executed the same CAS as above. + Which means that even if CAS above failed we don't need to retry, + and we should not free(dummy) - there's no memory leak here + */ + return 0; } -- cgit v1.2.1 From fdfb51484c9b1e239fd9eb738051020967c99c7f Mon Sep 17 00:00:00 2001 From: unknown Date: Sat, 9 Jun 2007 14:52:17 +0300 Subject: Fixed compiler warnings Fixed bug in ma_dbug.c that gave valgrind warning (only relevant when using --debug) Fixed bug in blob logging (Fixes valgrind warning) maria_getint() -> maria_data_on_page() mysys/safemalloc.c: Added debug function to print out where a piece of memory was allocated sql/opt_range.cc: Remove DBUG_PRINT of unitailized memory storage/maria/ma_blockrec.c: Fixed bug in blob logging storage/maria/ma_check.c: Fixed compiler warning storage/maria/ma_dbug.c: Added missed end++; Caused usage of unitialized memory for nullable keys that was not NULL storage/maria/ma_delete.c: maria_getint() -> maria_data_on_page() storage/maria/ma_init.c: Added header file to get rid of warning storage/maria/ma_key.c: More debugging storage/maria/ma_loghandler.c: Removed some wrong ';' to get rid of compiler errors when compiling without debugging Indentation fixes Removed not needed 'break's Fixed some compiler warnings Added code to detect logging of unitialized memory storage/maria/ma_page.c: maria_getint() -> maria_data_on_page() Clear rest of index page before writing when used with valgrind (Fixes warning of writing pages with unitialized data) storage/maria/ma_range.c: maria_getint() -> maria_data_on_page() storage/maria/ma_rt_index.c: maria_getint() -> maria_data_on_page() storage/maria/ma_rt_index.h: maria_getint() -> maria_data_on_page() storage/maria/ma_rt_key.c: maria_getint() -> maria_data_on_page() storage/maria/ma_rt_split.c: maria_getint() -> maria_data_on_page() storage/maria/ma_search.c: maria_getint() -> maria_data_on_page() storage/maria/ma_test1.c: Fixed compiler warning storage/maria/ma_write.c: maria_getint() -> maria_data_on_page() storage/maria/maria_chk.c: maria_getint() -> maria_data_on_page() storage/maria/maria_def.h: maria_getint() -> maria_data_on_page() storage/maria/unittest/ma_pagecache_consist.c: Fixed compiler warning storage/maria/unittest/ma_pagecache_single.c: Fixed compiler warning storage/maria/unittest/ma_test_loghandler-t.c: Fixed compiler warning storage/maria/unittest/ma_test_loghandler_multigroup-t.c: Fixed compiler warning storage/maria/unittest/ma_test_loghandler_multithread-t.c: Fixed compiler warning storage/maria/unittest/ma_test_loghandler_pagecache-t.c: Fixed compiler warning storage/myisam/mi_dbug.c: Added missed end++; Caused usage of unitialized memory for nullable keys that was not NULL --- mysys/safemalloc.c | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) (limited to 'mysys') diff --git a/mysys/safemalloc.c b/mysys/safemalloc.c index da15b02345b..5ab6d0dda0d 100644 --- a/mysys/safemalloc.c +++ b/mysys/safemalloc.c @@ -428,6 +428,29 @@ void TERMINATE(FILE *file) } +/* + Report where a piece of memory was allocated + + This is usefull to call from withing a debugger +*/ + + +void sf_malloc_report_allocated(void *memory) +{ + struct st_irem *irem; + for (irem= sf_malloc_root ; irem ; irem=irem->next) + { + char *data= (((char*) irem) + ALIGN_SIZE(sizeof(struct st_irem)) + + sf_malloc_prehunc); + if (data <= (char*) memory && (char*) memory <= data + irem->datasize) + { + printf("%u bytes at 0x%lx, allocated at line %u in '%s'\n", + irem->datasize, (long) data, irem->linenum, irem->filename); + break; + } + } +} + /* Returns 0 if chunk is ok */ static int _checkchunk(register struct st_irem *irem, const char *filename, -- cgit v1.2.1 From 1e73169a82f86fa2fdaf43e7601705eb9a81cb85 Mon Sep 17 00:00:00 2001 From: unknown Date: Tue, 26 Jun 2007 22:30:09 +0200 Subject: WL#3072 - Maria recovery fixes for build failures; copyrights; small bugfixes and comments mysys/Makefile.am: missing .h breaks building from tarball storage/maria/ma_loghandler.c: applying Serg's bugfix of trnman_new_trid() to translog_assign_id_to_share() storage/maria/ma_loghandler.h: copyright storage/maria/ma_loghandler_lsn.h: copyright storage/maria/maria_read_log.c: fix for compiler warnings. Comments. Close tables when program ends. --- mysys/Makefile.am | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'mysys') diff --git a/mysys/Makefile.am b/mysys/Makefile.am index cd84e09a60e..60aa59fd3eb 100644 --- a/mysys/Makefile.am +++ b/mysys/Makefile.am @@ -20,7 +20,7 @@ INCLUDES = @ZLIB_INCLUDES@ -I$(top_builddir)/include \ -I$(top_srcdir)/include -I$(srcdir) pkglib_LIBRARIES = libmysys.a LDADD = libmysys.a $(top_builddir)/strings/libmystrings.a $(top_builddir)/dbug/libdbug.a -noinst_HEADERS = mysys_priv.h my_static.h +noinst_HEADERS = mysys_priv.h my_static.h my_safehash.h libmysys_a_SOURCES = my_init.c my_getwd.c mf_getdate.c my_mmap.c \ mf_path.c mf_loadpath.c my_file.c \ my_open.c my_create.c my_dup.c my_seek.c my_read.c \ -- cgit v1.2.1 From d6f2fda680ec2be373ff5694e91cfccf792f4eb0 Mon Sep 17 00:00:00 2001 From: unknown Date: Sun, 1 Jul 2007 16:20:57 +0300 Subject: Fixed REPAIR/CHECK/ANALYZE TABLE for tables with new BLOCK-ROW format. Fixed maria_chk to repair BLOCK-ROW tables. Added CREATE options ROW_FORMAT=PAGE & TRANSACTIONAL= 0|1 More DBUG information in a lot of functions Some minor code cleanups Enable handler errors earlier for better clear text error messages at handler startup / standalone usage. Don't print NULL strings in my_create_with_symlink(); Fixes core dump when used with --debug include/maria.h: Added extra variables needed for REPAIR with BLOCK records include/my_base.h: Added argument for opening copy of maria table without a shared object include/my_handler.h: Prototypes for my_handler_error_register() & my_handler_error_unregister() include/pagecache.h: Added PAGECACHE_READ_UNKNOWN_PAGE mysql-test/include/ps_conv.inc: Enforce creation of table as MyISAM (to allow one to use --default-storage-engine) mysql-test/r/maria.result: Moved some things to maria-connect.test Updared results as REPAIR now works Added tests for creation option TRANSACTIONAL mysql-test/r/ps_2myisam.result: Enforce creation of table as MyISAM (to allow one to use --default-storage-engine) mysql-test/r/ps_3innodb.result: Enforce creation of table as MyISAM (to allow one to use --default-storage-engine) mysql-test/r/ps_4heap.result: Enforce creation of table as MyISAM (to allow one to use --default-storage-engine) mysql-test/r/ps_5merge.result: Enforce creation of table as MyISAM (to allow one to use --default-storage-engine) mysql-test/r/ps_7ndb.result: Enforce creation of table as MyISAM (to allow one to use --default-storage-engine) mysql-test/r/ps_maria.result: Enforce creation of table as MyISAM (to allow one to use --default-storage-engine) mysql-test/t/maria.test: Moved some things to maria-connect.test Updared results as REPAIR now works Added tests for creation option TRANSACTIONAL mysys/mf_iocache.c: More debugging mysys/mf_tempfile.c: Added missing close() mysys/my_error.c: init_glob_errs() is now done in my_init() mysys/my_handler.c: Added functions to initialize handler error messages mysys/my_init.c: Moevd init_glob_errs() here. mysys/my_open.c: More comments More debugging Code cleanup (join multiple code paths) and indentation fixes. No change in logic. mysys/my_symlink2.c: Don't print NULL strings sql/handler.cc: Added printing of PAGE row type Moved out initializing of handler errors to allow handler to give better error messages at startup sql/handler.h: ROW_TYPE_PAGES -> ROW_TYPE_PAGE sql/lex.h: Added 'PAGE' and 'TRANSACTIONAL' sql/mysqld.cc: Initialize handler error messages early to get better error messages from handler startup sql/sql_show.cc: ROW_TYPE_PAGES -> ROW_TYPE_PAGE sql/sql_table.cc: Removed not needed initializer sql/sql_yacc.yy: Added CREATE options ROW_FORMAT=PAGE and TRANSACTIONAL=[0|1] sql/table.cc: Store transactional flag in .frm More comments sql-bench/example: Better example sql/table.h: Added transactional table option storage/maria/ha_maria.cc: More debug information Enable REPAIR Detect usage of TRANSACTIONAL table option storage/maria/ma_bitmap.c: More comments (from Guilhem) storage/maria/ma_blockrec.c: SANITY_CHECK -> SANITY_CHECKS (fixed typo) Write out pages on delete even if there is no rows. (Fixed problem with REPAIR) Removed some ASSERTS to runtime checks (for better REPAIR) Fixed bug when scanning rows More DBUG information storage/maria/ma_check.c: Partial rewrite to allow REPAIR of BLOCK/PAGE format. Repair of BLOCK format rows is for now only done with 'maria_repair()' (= repair through key cache) The new logic to repair rows with BLOCK format is: - Create new, unrelated MARIA_HA of the table - Create new datafile and associate it with new handler - Reset all statistic information in new handler - Copy all data to new handler with normal write operations - Move state of new handler to old handler - Close new handler - Close data file in old handler - Rename old data file to new data file. - Reopen data file in old handler storage/maria/ma_close.c: REmoved not needed block storage/maria/ma_create.c: Swap arguments to _ma_initialize_data_file() storage/maria/ma_delete_all.c: Split maria_delete_all_rows() to two functions to allow REPAIR to easily reset all status information. storage/maria/ma_dynrec.c: Added checksum argument to _ma_rec_check (multi-thread fix) storage/maria/ma_info.c: Indentation fix storage/maria/ma_init.c: Register error message to get better error message on init and when using as standalone module. storage/maria/ma_loghandler.c: Fixed typo that disabled some error detection by valgrind storage/maria/ma_open.c: Added 'calc_check_checksum()' Don't log things during repair Added option HA_OPEN_COPY to allow one to open a Maria table with an independent share (required by REPAIR) storage/maria/ma_pagecache.c: Fixed some compiler warnings Added support for PAGECACHE_READ_UNKNOWN_PAGE (used for scanning file without knowing page types) storage/maria/ma_test_all.sh: More test of REPAIR storage/maria/ma_update.c: Optimized checksum code storage/maria/maria_chk.c: Use DBUG_SET_INITIAL() to get DBUG to work with --parallel-repair Ensure we always use maria_repair() for BLOCK format (for now) More DBUG information storage/maria/maria_def.h: For now, always run with more checkings (SANITY_CHECKS) Added share->calc_check_checksum to be used with REPAIR / CHECK table. Swaped arguments to _ma_initialize_data_file() storage/myisam/ft_stopwords.c: Added DBUG information mysql-test/r/maria-connect.result: New BitKeeper file ``mysql-test/r/maria-connect.result'' mysql-test/t/maria-connect.test: New BitKeeper file ``mysql-test/t/maria-connect.test'' --- mysys/mf_iocache.c | 1 + mysys/mf_tempfile.c | 1 + mysys/my_error.c | 5 ---- mysys/my_handler.c | 66 ++++++++++++++++++++++++++++++++++++++++++++++++ mysys/my_init.c | 1 + mysys/my_open.c | 73 ++++++++++++++++++++++++++++------------------------- mysys/my_symlink2.c | 4 ++- 7 files changed, 111 insertions(+), 40 deletions(-) (limited to 'mysys') diff --git a/mysys/mf_iocache.c b/mysys/mf_iocache.c index e40490776f8..8b8ba540a4e 100644 --- a/mysys/mf_iocache.c +++ b/mysys/mf_iocache.c @@ -1696,6 +1696,7 @@ int my_b_flush_io_cache(IO_CACHE *info, int need_append_buffer_lock) my_bool append_cache; my_off_t pos_in_file; DBUG_ENTER("my_b_flush_io_cache"); + DBUG_PRINT("enter", ("cache: 0x%lx", (long) info)); if (!(append_cache = (info->type == SEQ_READ_APPEND))) need_append_buffer_lock=0; diff --git a/mysys/mf_tempfile.c b/mysys/mf_tempfile.c index 6c412157937..a820d09a2c6 100644 --- a/mysys/mf_tempfile.c +++ b/mysys/mf_tempfile.c @@ -107,6 +107,7 @@ File create_temp_file(char *to, const char *dir, const char *prefix, if (org_file >= 0 && file < 0) { int tmp=my_errno; + close(org_file); (void) my_delete(to, MYF(MY_WME | ME_NOINPUT)); my_errno=tmp; } diff --git a/mysys/my_error.c b/mysys/my_error.c index 48392fe84c3..00c78b64e0e 100644 --- a/mysys/my_error.c +++ b/mysys/my_error.c @@ -84,11 +84,6 @@ int my_error(int nr, myf MyFlags, ...) if (nr <= meh_p->meh_last) break; -#ifdef SHARED_LIBRARY - if ((meh_p == &my_errmsgs_globerrs) && ! globerrs[0]) - init_glob_errs(); -#endif - /* get the error message string. Default, if NULL or empty string (""). */ if (! (format= (meh_p && (nr >= meh_p->meh_first)) ? meh_p->meh_errmsgs[nr - meh_p->meh_first] : NULL) || ! *format) diff --git a/mysys/my_handler.c b/mysys/my_handler.c index 757cbe490f8..bf75d992f9d 100644 --- a/mysys/my_handler.c +++ b/mysys/my_handler.c @@ -19,6 +19,7 @@ #include #include #include +#include int ha_compare_text(CHARSET_INFO *charset_info, uchar *a, uint a_length, uchar *b, uint b_length, my_bool part_key, @@ -563,3 +564,68 @@ HA_KEYSEG *ha_find_null(HA_KEYSEG *keyseg, uchar *a) return keyseg; } + +/* + Errors a handler can give you +*/ + +static const char *handler_error_messages[]= +{ + "Didn't find key on read or update", + "Duplicate key on write or update", + "Undefined handler error 122", + "Someone has changed the row since it was read (while the table was locked to prevent it)", + "Wrong index given to function", + "Undefined handler error 125", + "Index file is crashed", + "Record file is crashed", + "Out of memory in engine", + "Undefined handler error 129", + "Incorrect file format", + "Command not supported by database", + "Old database file", + "No record read before update", + "Record was already deleted (or record file crashed)", + "No more room in record file", + "No more room in index file", + "No more records (read after end of file)", + "Unsupported extension used for table", + "Too big row", + "Wrong create options", + "Duplicate unique key or constraint on write or update", + "Unknown character set used in table", + "Conflicting table definitions in sub-tables of MERGE table", + "Table is crashed and last repair failed", + "Table was marked as crashed and should be repaired", + "Lock timed out; Retry transaction", + "Lock table is full; Restart program with a larger locktable", + "Updates are not allowed under a read only transactions", + "Lock deadlock; Retry transaction", + "Foreign key constraint is incorrectly formed", + "Cannot add a child row", + "Cannot delete a parent row", + "Unknown handler error" +}; + + +/* + Register handler error messages for usage with my_error() + + NOTES + This is safe to call multiple times as my_error_register() + will ignore calls to register already registered error numbers. +*/ + + +void my_handler_error_register(void) +{ + my_error_register(handler_error_messages, HA_ERR_FIRST, + HA_ERR_FIRST+ array_elements(handler_error_messages)-1); +} + + +void my_handler_error_unregister(void) +{ + my_error_unregister(HA_ERR_FIRST, + HA_ERR_FIRST+ array_elements(handler_error_messages)-1); +} diff --git a/mysys/my_init.c b/mysys/my_init.c index e8a55fdc1e6..2023a7da223 100644 --- a/mysys/my_init.c +++ b/mysys/my_init.c @@ -77,6 +77,7 @@ my_bool my_init(void) mysys_usage_id++; my_umask= 0660; /* Default umask for new files */ my_umask_dir= 0700; /* Default umask for new directories */ + init_glob_errs(); #if defined(THREAD) && defined(SAFE_MUTEX) safe_mutex_global_init(); /* Must be called early */ #endif diff --git a/mysys/my_open.c b/mysys/my_open.c index 6fe7883b99b..b4bb7e25810 100644 --- a/mysys/my_open.c +++ b/mysys/my_open.c @@ -71,6 +71,7 @@ File my_open(const char *FileName, int Flags, myf MyFlags) #else fd = open((my_string) FileName, Flags); #endif + DBUG_RETURN(my_register_filename(fd, FileName, FILE_BY_OPEN, EE_FILENOTFOUND, MyFlags)); } /* my_open */ @@ -124,61 +125,65 @@ int my_close(File fd, myf MyFlags) SYNOPSIS my_register_filename() - fd - FileName - type_file_type + fd File number opened, -1 if error on open + FileName File name + type_file_type How file was created + error_message_number Error message number if caller got error (fd == -1) + MyFlags Flags for my_close() + + RETURN + -1 error + # Filenumber + */ File my_register_filename(File fd, const char *FileName, enum file_type type_of_file, uint error_message_number, myf MyFlags) { + DBUG_ENTER("my_register_filename"); if ((int) fd >= 0) { if ((uint) fd >= my_file_limit) { #if defined(THREAD) && !defined(HAVE_PREAD) - (void) my_close(fd,MyFlags); - my_errno=EMFILE; - if (MyFlags & (MY_FFNF | MY_FAE | MY_WME)) - my_error(EE_OUT_OF_FILERESOURCES, MYF(ME_BELL+ME_WAITTANG), - FileName, my_errno); - return(-1); -#endif + my_errno= EMFILE; +#else thread_safe_increment(my_file_opened,&THR_LOCK_open); - return(fd); /* safeguard */ + DBUG_RETURN(fd); /* safeguard */ +#endif } - pthread_mutex_lock(&THR_LOCK_open); - if ((my_file_info[fd].name = (char*) my_strdup(FileName,MyFlags))) + else { - my_file_opened++; - my_file_info[fd].type = type_of_file; + pthread_mutex_lock(&THR_LOCK_open); + if ((my_file_info[fd].name = (char*) my_strdup(FileName,MyFlags))) + { + my_file_opened++; + my_file_info[fd].type = type_of_file; #if defined(THREAD) && !defined(HAVE_PREAD) - pthread_mutex_init(&my_file_info[fd].mutex,MY_MUTEX_INIT_FAST); + pthread_mutex_init(&my_file_info[fd].mutex,MY_MUTEX_INIT_FAST); #endif + pthread_mutex_unlock(&THR_LOCK_open); + DBUG_PRINT("exit",("fd: %d",fd)); + DBUG_RETURN(fd); + } pthread_mutex_unlock(&THR_LOCK_open); - DBUG_PRINT("exit",("fd: %d",fd)); - return(fd); + my_errno= ENOMEM; } - pthread_mutex_unlock(&THR_LOCK_open); (void) my_close(fd, MyFlags); - fd= -1; - my_errno=ENOMEM; } else - my_errno=errno; - DBUG_PRINT("error",("Got error %d on open",my_errno)); - if (MyFlags & (MY_FFNF | MY_FAE | MY_WME)) { - if (my_errno == EMFILE) { - DBUG_PRINT("error",("print err: %d",EE_OUT_OF_FILERESOURCES)); - my_error(EE_OUT_OF_FILERESOURCES, MYF(ME_BELL+ME_WAITTANG), - FileName, my_errno); - } else { - DBUG_PRINT("error",("print err: %d",error_message_number)); - my_error(error_message_number, MYF(ME_BELL+ME_WAITTANG), - FileName, my_errno); - } + my_errno= errno; + + DBUG_PRINT("error",("Got error %d on open", my_errno)); + if (MyFlags & (MY_FFNF | MY_FAE | MY_WME)) + { + if (my_errno == EMFILE) + error_message_number= EE_OUT_OF_FILERESOURCES; + DBUG_PRINT("error",("print err: %d",error_message_number)); + my_error(error_message_number, MYF(ME_BELL+ME_WAITTANG), + FileName, my_errno); } - return(fd); + DBUG_RETURN(-1); } #ifdef __WIN__ diff --git a/mysys/my_symlink2.c b/mysys/my_symlink2.c index 279672be11c..932f2b6424f 100644 --- a/mysys/my_symlink2.c +++ b/mysys/my_symlink2.c @@ -33,7 +33,9 @@ File my_create_with_symlink(const char *linkname, const char *filename, int create_link; char abs_linkname[FN_REFLEN]; DBUG_ENTER("my_create_with_symlink"); - DBUG_PRINT("enter", ("linkname: %s filename: %s", linkname, filename)); + DBUG_PRINT("enter", + ("linkname: %s filename: %s", linkname ? linkname : "NULL", + filename)); if (my_disable_symlinks) { -- cgit v1.2.1 From 631ecaabea7336a8f28367c0d1c291f0433f7e88 Mon Sep 17 00:00:00 2001 From: unknown Date: Mon, 2 Jul 2007 20:45:15 +0300 Subject: Merged with mysql-5.1 main tree. BUILD/compile-pentium-debug-max: Added definition after macro was removed from main tree. This will be fixed back in main tree later. --- mysys/lf_hash.c | 28 ++++++++++++++-------------- mysys/my_compress.c | 3 ++- mysys/my_rename.c | 5 +++-- mysys/my_safehash.c | 30 +++++++++++++++--------------- mysys/my_safehash.h | 18 +++++++++--------- mysys/my_sync.c | 3 ++- 6 files changed, 45 insertions(+), 42 deletions(-) (limited to 'mysys') diff --git a/mysys/lf_hash.c b/mysys/lf_hash.c index 832f0eb5852..3f6b9082ab9 100644 --- a/mysys/lf_hash.c +++ b/mysys/lf_hash.c @@ -34,7 +34,7 @@ LF_REQUIRE_PINS(3); typedef struct { intptr volatile link; /* a pointer to the next element in a listand a flag */ uint32 hashnr; /* reversed hash number, for sorting */ - const byte *key; + const uchar *key; uint keylen; /* data is stored here, directly after the keylen. @@ -72,10 +72,10 @@ typedef struct { pins[0..2] are used, they are NOT removed on return */ static int lfind(LF_SLIST * volatile *head, CHARSET_INFO *cs, uint32 hashnr, - const byte *key, uint keylen, CURSOR *cursor, LF_PINS *pins) + const uchar *key, uint keylen, CURSOR *cursor, LF_PINS *pins) { uint32 cur_hashnr; - const byte *cur_key; + const uchar *cur_key; uint cur_keylen; intptr link; @@ -201,7 +201,7 @@ static LF_SLIST *linsert(LF_SLIST * volatile *head, CHARSET_INFO *cs, it uses pins[0..2], on return all pins are removed. */ static int ldelete(LF_SLIST * volatile *head, CHARSET_INFO *cs, uint32 hashnr, - const byte *key, uint keylen, LF_PINS *pins) + const uchar *key, uint keylen, LF_PINS *pins) { CURSOR cursor; int res; @@ -259,7 +259,7 @@ static int ldelete(LF_SLIST * volatile *head, CHARSET_INFO *cs, uint32 hashnr, all other pins are removed. */ static LF_SLIST *lsearch(LF_SLIST * volatile *head, CHARSET_INFO *cs, - uint32 hashnr, const byte *key, uint keylen, + uint32 hashnr, const uchar *key, uint keylen, LF_PINS *pins) { CURSOR cursor; @@ -271,8 +271,8 @@ static LF_SLIST *lsearch(LF_SLIST * volatile *head, CHARSET_INFO *cs, return res ? cursor.curr : 0; } -static inline const byte* hash_key(const LF_HASH *hash, - const byte *record, uint *length) +static inline const uchar* hash_key(const LF_HASH *hash, + const uchar *record, uint *length) { if (hash->get_key) return (*hash->get_key)(record, length, 0); @@ -285,7 +285,7 @@ static inline const byte* hash_key(const LF_HASH *hash, note, that the hash value is limited to 2^31, because we need one bit to distinguish between normal and dummy nodes. */ -static inline uint calc_hash(LF_HASH *hash, const byte *key, uint keylen) +static inline uint calc_hash(LF_HASH *hash, const uchar *key, uint keylen) { ulong nr1= 1, nr2= 4; hash->charset->coll->hash_sort(hash->charset, (uchar*) key, keylen, @@ -362,7 +362,7 @@ int lf_hash_insert(LF_HASH *hash, LF_PINS *pins, const void *data) if (unlikely(!node)) return -1; memcpy(node+1, data, hash->element_size); - node->key= hash_key(hash, (byte *)(node+1), &node->keylen); + node->key= hash_key(hash, (uchar *)(node+1), &node->keylen); hashnr= calc_hash(hash, node->key, node->keylen); bucket= hashnr % hash->size; el= _lf_dynarray_lvalue(&hash->array, bucket); @@ -399,7 +399,7 @@ int lf_hash_insert(LF_HASH *hash, LF_PINS *pins, const void *data) int lf_hash_delete(LF_HASH *hash, LF_PINS *pins, const void *key, uint keylen) { LF_SLIST * volatile *el; - uint bucket, hashnr= calc_hash(hash, (byte *)key, keylen); + uint bucket, hashnr= calc_hash(hash, (uchar *)key, keylen); bucket= hashnr % hash->size; lf_rwlock_by_pins(pins); @@ -415,7 +415,7 @@ int lf_hash_delete(LF_HASH *hash, LF_PINS *pins, const void *key, uint keylen) if (*el == NULL && unlikely(initialize_bucket(hash, el, bucket, pins))) return -1; if (ldelete(el, hash->charset, my_reverse_bits(hashnr) | 1, - (byte *)key, keylen, pins)) + (uchar *)key, keylen, pins)) { lf_rwunlock_by_pins(pins); return 1; @@ -438,7 +438,7 @@ int lf_hash_delete(LF_HASH *hash, LF_PINS *pins, const void *key, uint keylen) void *lf_hash_search(LF_HASH *hash, LF_PINS *pins, const void *key, uint keylen) { LF_SLIST * volatile *el, *found; - uint bucket, hashnr= calc_hash(hash, (byte *)key, keylen); + uint bucket, hashnr= calc_hash(hash, (uchar *)key, keylen); bucket= hashnr % hash->size; lf_rwlock_by_pins(pins); @@ -448,12 +448,12 @@ void *lf_hash_search(LF_HASH *hash, LF_PINS *pins, const void *key, uint keylen) if (*el == NULL && unlikely(initialize_bucket(hash, el, bucket, pins))) return MY_ERRPTR; found= lsearch(el, hash->charset, my_reverse_bits(hashnr) | 1, - (byte *)key, keylen, pins); + (uchar *)key, keylen, pins); lf_rwunlock_by_pins(pins); return found ? found+1 : 0; } -static const byte *dummy_key= ""; +static const uchar *dummy_key= ""; /* RETURN diff --git a/mysys/my_compress.c b/mysys/my_compress.c index d495a1c1c6d..5e770959105 100644 --- a/mysys/my_compress.c +++ b/mysys/my_compress.c @@ -178,7 +178,8 @@ int packfrm(const uchar *data, size_t len, if (my_compress((uchar*)data, &org_len, &comp_len)) goto err; - DBUG_PRINT("info", ("org_len: %lu comp_len: %lu", org_len, comp_len)); + DBUG_PRINT("info", ("org_len: %lu comp_len: %lu", (ulong) org_len, + (ulong) comp_len)); DBUG_DUMP("compressed", (char*)data, org_len); error= 2; diff --git a/mysys/my_rename.c b/mysys/my_rename.c index 64dbac955ea..39e6056a9e4 100644 --- a/mysys/my_rename.c +++ b/mysys/my_rename.c @@ -66,8 +66,9 @@ int my_rename(const char *from, const char *to, myf MyFlags) #ifdef NEED_EXPLICIT_SYNC_DIR /* do only the needed amount of syncs: */ char dir_from[FN_REFLEN], dir_to[FN_REFLEN]; - dirname_part(dir_from, from); - dirname_part(dir_to, to); + size_t dir_from_length, dir_to_length; + dirname_part(dir_from, from, &dir_from_length); + dirname_part(dir_to, to, &dir_to_length); if (my_sync_dir(dir_from, MyFlags) || (strcmp(dir_from, dir_to) && my_sync_dir(dir_to, MyFlags))) diff --git a/mysys/my_safehash.c b/mysys/my_safehash.c index 57f408942bf..b34ad5f16ff 100644 --- a/mysys/my_safehash.c +++ b/mysys/my_safehash.c @@ -53,7 +53,7 @@ static void safe_hash_entry_free(SAFE_HASH_ENTRY *entry) { DBUG_ENTER("safe_hash_entry_free"); - my_free((gptr) entry, MYF(0)); + my_free((uchar*) entry, MYF(0)); DBUG_VOID_RETURN; } @@ -70,11 +70,11 @@ static void safe_hash_entry_free(SAFE_HASH_ENTRY *entry) # reference on the key */ -static byte *safe_hash_entry_get(SAFE_HASH_ENTRY *entry, uint *length, - my_bool not_used __attribute__((unused))) +static uchar *safe_hash_entry_get(SAFE_HASH_ENTRY *entry, uint *length, + my_bool not_used __attribute__((unused))) { *length= entry->length; - return (byte*) entry->key; + return (uchar*) entry->key; } @@ -97,7 +97,7 @@ static byte *safe_hash_entry_get(SAFE_HASH_ENTRY *entry, uint *length, */ my_bool safe_hash_init(SAFE_HASH *hash, uint elements, - byte *default_value) + uchar *default_value) { DBUG_ENTER("safe_hash_init"); if (hash_init(&hash->hash, &my_charset_bin, elements, @@ -154,10 +154,10 @@ void safe_hash_free(SAFE_HASH *hash) # data associated with the key of default value if data was not found */ -byte *safe_hash_search(SAFE_HASH *hash, const byte *key, uint length, - byte *def) +uchar *safe_hash_search(SAFE_HASH *hash, const uchar *key, uint length, + uchar *def) { - byte *result; + uchar *result; DBUG_ENTER("safe_hash_search"); rw_rdlock(&hash->mutex); result= hash_search(&hash->hash, key, length); @@ -191,8 +191,8 @@ byte *safe_hash_search(SAFE_HASH *hash, const byte *key, uint length, 1 error (Can only be EOM). In this case my_message() is called. */ -my_bool safe_hash_set(SAFE_HASH *hash, const byte *key, uint length, - byte *data) +my_bool safe_hash_set(SAFE_HASH *hash, const uchar *key, uint length, + uchar *data) { SAFE_HASH_ENTRY *entry; my_bool error= 0; @@ -214,7 +214,7 @@ my_bool safe_hash_set(SAFE_HASH *hash, const byte *key, uint length, /* unlink entry from list */ if ((*entry->prev= entry->next)) entry->next->prev= entry->prev; - hash_delete(&hash->hash, (byte*) entry); + hash_delete(&hash->hash, (uchar*) entry); goto end; } if (entry) @@ -230,7 +230,7 @@ my_bool safe_hash_set(SAFE_HASH *hash, const byte *key, uint length, error= 1; goto end; } - entry->key= (byte*) (entry +1); + entry->key= (uchar*) (entry +1); memcpy((char*) entry->key, (char*) key, length); entry->length= length; entry->data= data; @@ -239,7 +239,7 @@ my_bool safe_hash_set(SAFE_HASH *hash, const byte *key, uint length, entry->next->prev= &entry->next; entry->prev= &hash->root; hash->root= entry; - if (my_hash_insert(&hash->hash, (byte*) entry)) + if (my_hash_insert(&hash->hash, (uchar*) entry)) { /* This can only happen if hash got out of memory */ my_free((char*) entry, MYF(0)); @@ -269,7 +269,7 @@ end: default value. */ -void safe_hash_change(SAFE_HASH *hash, byte *old_data, byte *new_data) +void safe_hash_change(SAFE_HASH *hash, uchar *old_data, uchar *new_data) { SAFE_HASH_ENTRY *entry, *next; DBUG_ENTER("safe_hash_change"); @@ -285,7 +285,7 @@ void safe_hash_change(SAFE_HASH *hash, byte *old_data, byte *new_data) { if ((*entry->prev= entry->next)) entry->next->prev= entry->prev; - hash_delete(&hash->hash, (byte*) entry); + hash_delete(&hash->hash, (uchar*) entry); } else entry->data= new_data; diff --git a/mysys/my_safehash.h b/mysys/my_safehash.h index 53845a5fec7..8a5856b6763 100644 --- a/mysys/my_safehash.h +++ b/mysys/my_safehash.h @@ -30,9 +30,9 @@ typedef struct st_safe_hash_entry { - byte *key; + uchar *key; uint length; - byte *data; + uchar *data; struct st_safe_hash_entry *next, **prev; } SAFE_HASH_ENTRY; @@ -43,16 +43,16 @@ typedef struct st_safe_hash_with_default rw_lock_t mutex; #endif HASH hash; - byte *default_value; + uchar *default_value; SAFE_HASH_ENTRY *root; } SAFE_HASH; my_bool safe_hash_init(SAFE_HASH *hash, uint elements, - byte *default_value); + uchar *default_value); void safe_hash_free(SAFE_HASH *hash); -byte *safe_hash_search(SAFE_HASH *hash, const byte *key, uint length, - byte *def); -my_bool safe_hash_set(SAFE_HASH *hash, const byte *key, uint length, - byte *data); -void safe_hash_change(SAFE_HASH *hash, byte *old_data, byte *new_data); +uchar *safe_hash_search(SAFE_HASH *hash, const uchar *key, uint length, + uchar *def); +my_bool safe_hash_set(SAFE_HASH *hash, const uchar *key, uint length, + uchar *data); +void safe_hash_change(SAFE_HASH *hash, uchar *old_data, uchar *new_data); diff --git a/mysys/my_sync.c b/mysys/my_sync.c index ab3fc89e0d3..ba6964b00d6 100644 --- a/mysys/my_sync.c +++ b/mysys/my_sync.c @@ -145,7 +145,8 @@ int my_sync_dir_by_file(const char *file_name, myf my_flags) { #ifdef NEED_EXPLICIT_SYNC_DIR char dir_name[FN_REFLEN]; - dirname_part(dir_name, file_name); + size_t dir_name_length; + dirname_part(dir_name, file_name, &dir_name_length); return my_sync_dir(dir_name, my_flags); #else return 0; -- cgit v1.2.1 From 388122558c83643e320c08d93faa45c7c6d1245e Mon Sep 17 00:00:00 2001 From: unknown Date: Tue, 3 Jul 2007 15:20:41 +0200 Subject: Maria: * Don't modify share->base.born_transactional; now it is a value carved in stone at creation time. share->now_transactional is what can be modified: it starts at born_transactional, can become false during ALTER TABLE (when we want no logging), and restored later. * Not resetting create_rename_lsn to 0 during delete_all or repair. * when we temporarily disable transactionality, we also change the page type to PAGECACHE_PLAIN_PAGE: it bypasses some work in the page cache (optimization), and avoids assertions related to LSNs. * Disable INSERT DELAYED for transactional tables, because durability could not be guaranteed (insertion may even not happen) mysys/mf_keycache.c: comment storage/maria/ha_maria.cc: * a transactional table cannot do INSERT DELAYED * ha_maria::save_transactional not needed anymore, as now instead we don't modify MARIA_SHARE::MARIA_BASE_INFO::born_transactional (born_transactional plays the role of save_transactional), and modify MARIA_SHARE::now_transactional. * REPAIR_TABLE log record is now logged by maria_repair() * comment why we rely on born_transactional to know if we should skipping a transaction. * putting together two if()s which test for F_UNLCK storage/maria/ha_maria.h: ha_maria::save_transactional not needed anymore (moved to the C layer) storage/maria/ma_blockrec.c: * For the block record's code (writing/updating/deleting records), all that counts is now_transactional, not born_transactional. * As we now set the page type to PAGECACHE_PLAIN_PAGE for tables which have now_transactional==FALSE, pagecache will not expect a meaningful LSN for them in pagecache_unlock_by_link(), so we can pass it LSN_IMPOSSIBLE. storage/maria/ma_check.c: * writing LOGREC_REPAIR_TABLE moves from ha_maria::repair() to maria_repair(), sounds cleaner (less functions to export). * when opening a table during REPAIR, don't use the realpath-ed name, as this may fail if the table has symlinked files (maria_open() would try to find the data and index file in the directory of unique_file_name, it would fail if data and index files are in different dirs); use the unresolved name, open_file_name, which is the argument which was passed to the maria_open() which created 'info'. storage/maria/ma_close.c: assert that when a statement is done with a table, it cleans up storage/maria/ma_create.c: new name storage/maria/ma_delete_all.c: * using now_transactional * no reason to reset create_rename_lsn during delete_all (a bug); also no reason to do it during repair: it was put there because a positive create_rename_lsn caused a call to check_and_set_lsn() which asserted in DBUG_ASSERT(block->type == PAGECACHE_LSN_PAGE); first solution was to use LSN_IMPOSSIBLE in _ma_unpin_all_pages() if not transactional; but then in the case of ALTER TABLE, with transactionality temporarily disabled, it asserted in DBUG_ASSERT(LSN_VALID(lsn)) in pagecache_fwrite() (PAGECACHE_LSN_PAGE page with zero LSN - bad). The additional solution is to use PAGECACHE_PLAIN_PAGE when we disable transactionality temporarily: this avoids checks on the LSN, and also bypasses (optimization) the "flush log up to LSN" call when the pagecache flushes our page (in other words, no WAL needed). storage/maria/ma_delete_table.c: use now_transactional storage/maria/ma_locking.c: assert that when a statement is done with a table, it cleans up. storage/maria/ma_loghandler.c: * now_transactional should be used to test if we want a log record. * Assertions to make sure dummy_transaction_object is not spoilt by its many users. storage/maria/ma_open.c: base.transactional -> base.born_transactional storage/maria/ma_pagecache.c: missing name for page's type. Comment for future. storage/maria/ma_rename.c: use now_transactional storage/maria/maria_chk.c: use born_transactional storage/maria/maria_def.h: MARIA_BASE_INFO::transactional renamed to born_transactional. MARIA_SHARE::now_transactional introduced. _ma_repair_write_log_record() is made local to ma_check.c. Macros to temporarily disable, and re-enable, transactionality for a table. storage/maria/maria_read_log.c: assertions and using the new macros. Adding a forgotten resetting when we finally close all tables. --- mysys/mf_keycache.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'mysys') diff --git a/mysys/mf_keycache.c b/mysys/mf_keycache.c index 95a9f08a07a..065c10e3d73 100644 --- a/mysys/mf_keycache.c +++ b/mysys/mf_keycache.c @@ -2456,7 +2456,14 @@ restart: } else { - /* Link the block into a list of blocks 'in switch' */ + /* + Link the block into a list of blocks 'in switch'. + Note that if there could be two concurrent flush_key_blocks_int() + on this file (normally this does not happen, as MyISAM uses + intern_lock for flushing), then the first one may move the block + into its first_in_switch, and the second one would just not see + the block and wrongly consider its job done. + */ unlink_changed(block); link_changed(block, &first_in_switch); } -- cgit v1.2.1 From fcdc76c28952608524d6e5a388bc7b04ad8de09f Mon Sep 17 00:00:00 2001 From: unknown Date: Wed, 4 Jul 2007 22:27:58 +0200 Subject: in mysql_unlock_tables(), do thr_unlock() AFTER external_unlock(). it means, {update,restore}_status() should be called in external_lock, not in thr_unlock. Only affects storage engines that support TL_WRITE_CONCURRENT. --- mysys/thr_lock.c | 20 +++++++------------- 1 file changed, 7 insertions(+), 13 deletions(-) (limited to 'mysys') diff --git a/mysys/thr_lock.c b/mysys/thr_lock.c index 94c34eef2fa..b70be0d66db 100644 --- a/mysys/thr_lock.c +++ b/mysys/thr_lock.c @@ -24,7 +24,7 @@ Locks are prioritized according to: The current lock types are: -TL_READ # Low priority read +TL_READ # Low priority read TL_READ_WITH_SHARED_LOCKS TL_READ_HIGH_PRIORITY # High priority read TL_READ_NO_INSERT # Read without concurrent inserts @@ -57,8 +57,12 @@ check_status: In MyISAM this is a simple check if the insert can be done at the end of the datafile. update_status: - Before a write lock is released, this function is called. - In MyISAM this functions updates the count and length of the datafile + in thr_reschedule_write_lock(), when an insert delayed thread + downgrades TL_WRITE lock to TL_WRITE_DELAYED, to allow SELECT + threads to proceed. + A storage engine should also call update_status internally + in the ::external_lock(F_UNLCK) method. + In MyISAM and CSV this functions updates the length of the datafile. get_status: When one gets a lock this functions is called. In MyISAM this stores the number of rows and size of the datafile @@ -762,16 +766,6 @@ void thr_unlock(THR_LOCK_DATA *data) } else lock->write.last=data->prev; - if (lock_type >= TL_WRITE_CONCURRENT_INSERT) - { - if (lock->update_status) - (*lock->update_status)(data->status_param); - } - else - { - if (lock->restore_status) - (*lock->restore_status)(data->status_param); - } if (lock_type == TL_READ_NO_INSERT) lock->read_no_write_count--; data->type=TL_UNLOCK; /* Mark unlocked */ -- cgit v1.2.1 From a3d2ae4648d739a7ec7820e22c05373fde65b770 Mon Sep 17 00:00:00 2001 From: unknown Date: Fri, 27 Jul 2007 12:06:39 +0200 Subject: merging MyISAM changes into Maria (not done in 5.1->maria merge of Jul 7th). "maria.test" and "ps_maria.test" still fail; "ma_test_all" starts failing (MyISAM has the same issue see BUG#30094). include/maria.h: merging MyISAM changes into Maria mysys/mf_keycache.c: mi_test_all showed "floating point exception", this was already fixed in the latest 5.1, importing fix. sql/item_xmlfunc.cc: compiler warning (already fixed in latest 5.1) storage/maria/ha_maria.cc: merging MyISAM changes into Maria. See #ifdef ASK_MONTY. storage/maria/ha_maria.h: merging MyISAM changes into Maria storage/maria/ma_cache.c: merging MyISAM changes into Maria storage/maria/ma_check.c: merging MyISAM changes into Maria storage/maria/ma_create.c: merging MyISAM changes into Maria storage/maria/ma_dynrec.c: merging MyISAM changes into Maria storage/maria/ma_extra.c: merging MyISAM changes into Maria storage/maria/ma_ft_boolean_search.c: merging MyISAM changes into Maria storage/maria/ma_ft_nlq_search.c: merging MyISAM changes into Maria storage/maria/ma_info.c: merging MyISAM changes into Maria storage/maria/ma_key.c: merging MyISAM changes into Maria storage/maria/ma_loghandler.c: compiler warning (part->length is size_t) storage/maria/ma_open.c: merging MyISAM changes into Maria storage/maria/ma_preload.c: merging MyISAM changes into Maria storage/maria/ma_range.c: merging MyISAM changes into Maria storage/maria/ma_rkey.c: merging MyISAM changes into Maria storage/maria/ma_rt_index.c: merging MyISAM changes into Maria storage/maria/ma_rt_key.c: merging MyISAM changes into Maria storage/maria/ma_rt_split.c: merging MyISAM changes into Maria storage/maria/ma_search.c: merging MyISAM changes into Maria storage/maria/ma_sort.c: merging MyISAM changes into Maria storage/maria/maria_def.h: merging MyISAM changes into Maria --- mysys/mf_keycache.c | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) (limited to 'mysys') diff --git a/mysys/mf_keycache.c b/mysys/mf_keycache.c index 6845e63dc33..4a8ab5b6719 100644 --- a/mysys/mf_keycache.c +++ b/mysys/mf_keycache.c @@ -2522,10 +2522,8 @@ uchar *key_cache_read(KEY_CACHE *keycache, int error=0; uchar *start= buff; DBUG_ENTER("key_cache_read"); - DBUG_PRINT("enter", ("fd: %u pos: %lu page: %lu length: %u", - (uint) file, (ulong) filepos, - (ulong) (filepos / keycache->key_cache_block_size), - length)); + DBUG_PRINT("enter", ("fd: %u pos: %lu length: %u", + (uint) file, (ulong) filepos, length)); if (keycache->key_cache_inited) { @@ -2979,10 +2977,10 @@ int key_cache_write(KEY_CACHE *keycache, int error=0; DBUG_ENTER("key_cache_write"); DBUG_PRINT("enter", - ("fd: %u pos: %lu page: %lu length: %u block_length: %u", - (uint) file, (ulong) filepos, - (ulong) (filepos / keycache->key_cache_block_size), - length, block_length)); + ("fd: %u pos: %lu length: %u block_length: %u key_block_length: + %u", + (uint) file, (ulong) filepos, length, block_length, + keycache ? keycache->key_cache_block_size : 0)); if (!dont_write) { -- cgit v1.2.1 From 9b1e83dba732896f606181b21becc3b0144a91ec Mon Sep 17 00:00:00 2001 From: unknown Date: Fri, 27 Jul 2007 16:11:40 +0200 Subject: porting Serg's fix for BUG#30094 to Maria. Now ma_test_all passes. maria.test and ps_maria.test still fail. mysys/mf_keycache.c: split string annoys some compilers storage/maria/ha_maria.cc: fix for compiler warnings storage/maria/ma_test1.c: porting Serg's fix for BUG#30094 to Maria storage/maria/ma_test2.c: porting Serg's fix for BUG#30094 to Maria storage/maria/ma_test3.c: porting Serg's fix for BUG#30094 to Maria storage/maria/ma_test_recovery: don't print ma_test1's messages if no problem --- mysys/mf_keycache.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'mysys') diff --git a/mysys/mf_keycache.c b/mysys/mf_keycache.c index 4a8ab5b6719..e865d152633 100644 --- a/mysys/mf_keycache.c +++ b/mysys/mf_keycache.c @@ -2977,8 +2977,8 @@ int key_cache_write(KEY_CACHE *keycache, int error=0; DBUG_ENTER("key_cache_write"); DBUG_PRINT("enter", - ("fd: %u pos: %lu length: %u block_length: %u key_block_length: - %u", + ("fd: %u pos: %lu length: %u block_length: %u" + " key_block_length: %u", (uint) file, (ulong) filepos, length, block_length, keycache ? keycache->key_cache_block_size : 0)); -- cgit v1.2.1 From d430e5bfc1327de723911aa22f26eb83b46c6592 Mon Sep 17 00:00:00 2001 From: unknown Date: Wed, 22 Aug 2007 10:56:10 +0300 Subject: Fixed compiler warnings Fixed wrong hash function prototype (causes failure on 64 bit systems) mysql-test/r/rpl_events.result: Removed wrong merge (result file is now identical as in 5.1 tree) mysys/lf_hash.c: Fixed compiler warning mysys/my_safehash.c: Fixed wrong hash function prototype (causes failure on 64 bit systems) storage/maria/unittest/ma_test_loghandler_max_lsn-t.c: Fixed compiler warning --- mysys/lf_hash.c | 8 ++++---- mysys/my_safehash.c | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) (limited to 'mysys') diff --git a/mysys/lf_hash.c b/mysys/lf_hash.c index 3f6b9082ab9..322f04cdc41 100644 --- a/mysys/lf_hash.c +++ b/mysys/lf_hash.c @@ -32,10 +32,10 @@ LF_REQUIRE_PINS(3); /* An element of the list */ typedef struct { - intptr volatile link; /* a pointer to the next element in a listand a flag */ - uint32 hashnr; /* reversed hash number, for sorting */ + intptr volatile link; /* a pointer to the next element in a listand a flag */ + uint32 hashnr; /* reversed hash number, for sorting */ const uchar *key; - uint keylen; + size_t keylen; /* data is stored here, directly after the keylen. thus the pointer to data is (void*)(slist_element_ptr+1) @@ -272,7 +272,7 @@ static LF_SLIST *lsearch(LF_SLIST * volatile *head, CHARSET_INFO *cs, } static inline const uchar* hash_key(const LF_HASH *hash, - const uchar *record, uint *length) + const uchar *record, size_t *length) { if (hash->get_key) return (*hash->get_key)(record, length, 0); diff --git a/mysys/my_safehash.c b/mysys/my_safehash.c index b34ad5f16ff..b3d6439793c 100644 --- a/mysys/my_safehash.c +++ b/mysys/my_safehash.c @@ -70,7 +70,7 @@ static void safe_hash_entry_free(SAFE_HASH_ENTRY *entry) # reference on the key */ -static uchar *safe_hash_entry_get(SAFE_HASH_ENTRY *entry, uint *length, +static uchar *safe_hash_entry_get(SAFE_HASH_ENTRY *entry, size_t *length, my_bool not_used __attribute__((unused))) { *length= entry->length; -- cgit v1.2.1 From e27890cab0a9155b38df57748af9d20dfcccb590 Mon Sep 17 00:00:00 2001 From: unknown Date: Wed, 29 Aug 2007 16:43:01 +0200 Subject: WL#3072 Maria recovery * create page cache before initializing engine and not after, because Maria's recovery needs a page cache * make the creation of a bitmap page more crash-resistent * bugfix (see ma_blockrec.c) * back to old way: create an 8k bitmap page when creating table * preparations for the UNDO phase: recreate TRNs * preparations for Checkpoint: list of dirty pages, testing of rec_lsn to know if page should be skipped during Recovery (unused in this patch as no Checkpoint module pushed yet) * maria_chk tags repaired table with a special LSN * reworking all around in ma_recovery.c (less duplication) mysys/my_realloc.c: noted an issue in my_realloc() sql/mysqld.cc: page cache needs to be created before engines are initialized, because Maria's initialization may do a recovery which needs the page cache. storage/maria/ha_maria.cc: update to new prototype storage/maria/ma_bitmap.c: when creating the first bitmap page we used chsize to 8192 bytes then pwrite (overwrite) the last 2 bytes (8191-8192). If crash between the two operations, this leaves a bitmap page full without its end marker. A later recovery may try to read this page and find it exists and misses a marker and conclude it's corrupted and fail. Changing the chsize to only 8190 bytes: recovery will then find the page is too short and recreate it entirely. storage/maria/ma_blockrec.c: Fix for a bug: when executing a REDO, if the data page is created, data_file_length was increased before _ma_bitmap_set(): _ma_bitmap_set() called _ma_read_bitmap_page() which, due to the increased data_file_length, expected to find a bitmap page on disk with a correct end marker; if the bitmap page didn't exist already in fact, this failed. Fixed by increasing data_file_length only after _ma_read_bitmap_page() has created the new bitmap page correctly. This bug could happen every time a REDO is about creating a new bitmap page. storage/maria/ma_check.c: empty data file has a bitmap page storage/maria/ma_control_file.c: useless parameter to ma_control_file_create_or_open(), just test if this is recovery. storage/maria/ma_control_file.h: new prototype storage/maria/ma_create.c: Back to how it was before: maria_create() creates an 8k bitmap page. Thus (bugfix) data_file_length needs to reflect this instead of being 0. storage/maria/ma_loghandler.c: as ma_test1 and ma_test2 now use real transactions and not dummy_transaction_object, REDO for INSERT/UPDATE/DELETE are always about real transactions, can assert this. A function for Recovery to assign a short id to a table. storage/maria/ma_loghandler.h: new function storage/maria/ma_loghandler_lsn.h: maria_chk tags repaired tables with this LSN storage/maria/ma_open.c: * enforce that DMLs on transactional tables use real transactions and not dummy_transaction_object. * test if table was repaired with maria_chk (which has to been seen as an import of an external table into the server), test validity of create_rename_lsn (header corruption detection) * comments. storage/maria/ma_recovery.c: * preparations for the UNDO phase: recreate TRNs * preparations for Checkpoint: list of dirty pages, testing of rec_lsn to know if page should be skipped during Recovery (unused in this patch as no Checkpoint module pushed yet) * reworking all around (less duplication) storage/maria/ma_recovery.h: a parameter to say if the UNDO phase should be skipped storage/maria/maria_chk.c: tag repaired tables with a special LSN storage/maria/maria_read_log.c: * update to new prototype * no UNDO phase in maria_read_log for now storage/maria/trnman.c: * a function for Recovery to create a transaction (TRN), needed in the UNDO phase * a function for Recovery to grab an existing transaction, needed in the UNDO phase (rollback all existing transactions) storage/maria/trnman_public.h: new functions --- mysys/my_realloc.c | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'mysys') diff --git a/mysys/my_realloc.c b/mysys/my_realloc.c index c7cf1323cd4..a55282e03a0 100644 --- a/mysys/my_realloc.c +++ b/mysys/my_realloc.c @@ -22,6 +22,16 @@ /* My memory re allocator */ +/** + @brief wrapper around realloc() + + @param oldpoint pointer to currently allocated area + @param size new size requested, must be >0 + @param my_flags flags + + @note if size==0 realloc() may return NULL; my_realloc() treats this as an + error which is not the intention of realloc() +*/ void* my_realloc(void* oldpoint, size_t size, myf my_flags) { void *point; @@ -29,6 +39,7 @@ void* my_realloc(void* oldpoint, size_t size, myf my_flags) DBUG_PRINT("my",("ptr: 0x%lx size: %lu my_flags: %d", (long) oldpoint, (ulong) size, my_flags)); + DBUG_ASSERT(size > 0); if (!oldpoint && (my_flags & MY_ALLOW_ZERO_PTR)) DBUG_RETURN(my_malloc(size,my_flags)); #ifdef USE_HALLOC -- cgit v1.2.1 From 496741d5761f14dba39c7cfd01c31bcc0fe810b1 Mon Sep 17 00:00:00 2001 From: unknown Date: Tue, 9 Oct 2007 21:09:50 +0300 Subject: Moved randomize and my_rnd under mysys Added my_uuid Added pre-support for PAGE_CHECKSUM Added syntax for CREATE ... PAGE_CHECKSUM=# TABLE_CHECKSUM=# Reserved place for page checksums on index, bitmap and block pages Added index number to header of index pages Added linked list for free directory entries (speeds up inserts with BLOCK format) Calculate checksums in original column order (fixes bug with checksum on rows with BLOCK format) Cleaned up all index handling to use 'info->s->keypage_header' (variable size) as the header for index pages (before this was '2') Added 0xffffffff to end of index and block data bases and 0xfffffffe at end of bitmap pages when page checksums are not enabled Added _ma_get_page_used() and _ma_get_used_and_node() to simplify index page header handling rec_per_key_part is now in double precision Reserved place in index file for my_guid and nulls_per_key_part Give error HA_ERR_NEW_FILE if trying to open a Maria file with new, not yet supported extensions Lots of renames to increase readability: randomize() -> my_rnd_init() st_maria_info -> st_maria_handler st_maria_info -> MARIA_HA st_maria_isaminfo -> st_maria_info rand_struct -> my_rand_struct rec_per_key_rows -> records_at_analyze client/mysqladmin.cc: rand_struct -> my_rrnd_struct include/maria.h: st_maria_info -> MARIA_HA st_maria_isaminfo -> st_maria_info Changed analyze statistics to be of double precission Changed offset to field to be 32bits instead of 64 (safe as a record without blobs can't be that big) include/my_base.h: Added HA_OPTION_PAGE_CHECKSUM & HA_CREATE_PAGE_CHECKSUM Fixed comments Added HA_ERR_NEW_FILE include/my_sys.h: Added prototypes and structures for my_uuid() and my_rnd() include/myisamchk.h: Changed some buffers to size_t Added possibility to have key statistics with double precission include/mysql_com.h: Move rand functions to mysys libmysql/Makefile.shared: Added my_rnd mysql-test/r/maria.result: Updated results mysql-test/t/maria.test: More tests for checksum mysys/Makefile.am: Added my_rnd.c and my_uuid.c server-tools/instance-manager/listener.cc: Fixed include order (my_global.h should always be first) server-tools/instance-manager/mysql_connection.cc: Fixed include order (my_global.h should always be first) Use my_rnd_init() server-tools/instance-manager/mysql_connection.h: rand_struct -> my_rand_struct sql/handler.h: Added flag for page checksums sql/item_func.cc: Use new my_rnd() interface sql/item_func.h: Use new my_rnd() interface sql/item_strfunc.cc: Use new my_rnd() interface sql/lex.h: Added PAGE_CHECKSUM and TABLE_CHECKSUM sql/mysql_priv.h: Use new my_rnd() interface sql/mysqld.cc: Use new my_rnd() interface sql/password.c: Move my_rnd() to mysys Use new my_rnd() interface sql/sql_class.cc: Use new my_rnd() interface sql/sql_class.h: Use new my_rnd() interface sql/sql_crypt.cc: Use new my_rnd() interface sql/sql_crypt.h: Use new my_rnd() interface sql/sql_show.cc: Simpler handling of ha_choice_values Added PAGE_CHECKSUM sql/sql_table.cc: Enable correct checksum handling (for now) if not running in compatible mode sql/sql_yacc.yy: Added table option PAGE_CHECKSUM Added future compatible table option TABLE_CHECKSUM (alias for CHECKSUM) Added 'choice' target to simplify code sql/table.cc: Store flag for PAGE_CHECKSUM sql/table.h: Added support for PAGE_CHECKSUM storage/maria/ha_maria.cc: Remove protection for incompatbile frm and MAI (Slow, not needed test) Rec_per_key is now in double Remember row type for table Give warning if one Maria uses another row type than requested Removed some old ASK_MONTY entries (added comments instead) Added handling of PAGE_CHECKSUM flags storage/maria/ma_bitmap.c: Added page checksums to bitmap pages Added special bitmap marker for bitmap pages (Used to find bugs when running without page checksums) storage/maria/ma_blockrec.c: Added a free-link list over directory entries. This makes insert of small rows faster as we don't have to scan the whole directory to find a not used entry. Moved SANITY_CHECKS to maria_def.h Simplify code by introducing dir_entry_pos() Added support for PAGE_CHECKSUM storage/maria/ma_blockrec.h: Added DIR_FREE_SIZE (linked list of free directory entries) Added PAGE_CHECKSUM Added 'dir_entry_pos()' storage/maria/ma_check.c: Check that index pages has correct index number Calculate rec_per_key with double precission Simplify code by using '_ma_get_used_and_node()' Check free directory list Remove wrong end \n from messages maria_data_on_page() -> _ma_get_page_used() maria_putint() -> _ma_store_page_used() rec_per_key_rows -> records_at_analyze storage/maria/ma_checksum.c: Calculate checksum in original column order storage/maria/ma_create.c: Store original column order in index file Reserve place for nulls_per_key_part (future) Added support for PAGE_CHECKSUM storage/maria/ma_dbug.c: Fixed wrong debug output of key of type 'ulong' storage/maria/ma_delete.c: maria_data_on_page() -> _ma_get_used_and_node() maria_data_on_page() -> _ma_get_page_used() maria_putint() -> _ma_store_page_used() Added page header (index key number) to all index pages Reserved page for checksum on index pages Use keypage_header storage/maria/ma_ft_update.c: maria_putint() -> _ma_store_page_used() Store key number at start of page storage/maria/ma_loghandler.h: st_maria_info -> MARIA_HA storage/maria/ma_open.c: rec_per_key is now in double precission Added 'nulls_per_key_part' Added 'extra_options' (flags for future) Added support for PAGE_CHECKSUM Give error HA_ERR_NEW_FILE when using unsupported maria extensions Added comments Add maria_uuid to index file Added functions to store and read column_nr map. Changed some functions to return my_bool instead of uint storage/maria/ma_page.c: Added checks that pages has correct key nr Store 0xffffffff in checksum position if page checksums are not enabled Moved key-page-delete link to take into account keypage header storage/maria/ma_preload.c: Remove old MyISAM dependent code When scanning pages, only add pages to page cache for the requested index storage/maria/ma_range.c: maria_data_on_page() -> _ma_get_used_and_node() Use keypage_header storage/maria/ma_rt_index.c: Fixed indentation storage/maria/ma_rt_index.h: Added support for dynamic index page header Reserved place for PAGE_CHECKSUM storage/maria/ma_rt_key.c: Fixed indentation maria_data_on_page() -> _ma_get_page_used() maria_putint() -> maria_store_page_used() storage/maria/ma_rt_mbr.c: Fixed indentation storage/maria/ma_rt_split.c: Fixed indentation maria_data_on_page () -> _ma_get_page_used() storage/maria/ma_rt_test.c: Fixed indentation storage/maria/ma_search.c: Remove support of using -1 as 'last used index' to _ma_check_index() maria_data_on_page() -> _ma_get_page_used() maria_data_on_page() -> _ma_get_used_and_node() Use keypage_header storage/maria/ma_sort.c: Changed some buffers to size_t Changed rec_per_key_part to double storage/maria/ma_static.c: Removed NEAR Added maria_uuid storage/maria/ma_test2.c: Moevd testflag == 2 to correct place Remove test of reading with index number -1 (not supported anymore) storage/maria/ma_test_recovery.expected: Updated results storage/maria/ma_test_recovery: Changed tmp table names so that one can run maria_chk on them storage/maria/ma_write.c: Fixed indentation Use keypage_header Store index number on index pages maria_putint() -> _ma_store_page_used() maria_data_on_page() -> ma_get_used_and_node() maria_data_on_page() -> _ma_get_page_used() Added PAGE_CHECKSUM Added Maria handler to some functions Removed some not needed casts storage/maria/maria_chk.c: Added error handling for HA_ERR_NEW_FILE Added information about page checksums rec_per_key_part changed to double maria_data_on_page() -> _ma_get_page_used() Use keypage_header storage/maria/maria_def.h: Added IDENTICAL_PAGES_AFTER_RECOVERY and SANITY_CHECKS Changed rec_per_key_part to double Added nulls_per_key_part rec_per_key_rows -> records_at_analyze st_maria_info -> MARIA_HA Reserve place for new statistics variables, uuid, checksums per page etc. Removed NEAR tags Changed some prototypes to use my_bool and size_t storage/maria/maria_pack.c: st_maria_info -> MARIA_HA Fixed indentation storage/myisam/mi_dbug.c: Fix wrong debug output for ULONG mysys/my_rnd.c: New BitKeeper file ``mysys/my_rnd.c'' mysys/my_uuid.c: New BitKeeper file ``mysys/my_uuid.c'' --- mysys/Makefile.am | 1 + mysys/my_rnd.c | 55 ++++++++++++++++++ mysys/my_uuid.c | 169 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 225 insertions(+) create mode 100644 mysys/my_rnd.c create mode 100644 mysys/my_uuid.c (limited to 'mysys') diff --git a/mysys/Makefile.am b/mysys/Makefile.am index 3d3047b3a9c..3c9cd9ac6dd 100644 --- a/mysys/Makefile.am +++ b/mysys/Makefile.am @@ -34,6 +34,7 @@ libmysys_a_SOURCES = my_init.c my_getwd.c mf_getdate.c my_mmap.c \ my_vle.c my_atomic.c lf_hash.c \ lf_dynarray.c lf_alloc-pin.c \ my_fopen.c my_fstream.c my_getsystime.c \ + my_rnd.c my_uuid.c \ my_error.c errors.c my_div.c my_messnc.c \ mf_format.c mf_same.c mf_dirname.c mf_fn_ext.c \ my_symlink.c my_symlink2.c \ diff --git a/mysys/my_rnd.c b/mysys/my_rnd.c new file mode 100644 index 00000000000..e1aca222954 --- /dev/null +++ b/mysys/my_rnd.c @@ -0,0 +1,55 @@ +/* Copyright (C) 2007 MySQL AB & Michael Widenius + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +#include +#include + +/* + Initialize random generator + + NOTES + MySQL's password checks depends on this, so don't do any changes + that changes the random numbers that are generated! +*/ + +void my_rnd_init(struct my_rnd_struct *rand_st, ulong seed1, ulong seed2) +{ +#ifdef HAVE_purify + bzero((char*) rand_st,sizeof(*rand_st)); /* Avoid UMC varnings */ +#endif + rand_st->max_value= 0x3FFFFFFFL; + rand_st->max_value_dbl=(double) rand_st->max_value; + rand_st->seed1=seed1%rand_st->max_value ; + rand_st->seed2=seed2%rand_st->max_value; +} + + +/* + Generate random number. + + SYNOPSIS + my_rnd() + rand_st INOUT Structure used for number generation + + RETURN VALUE + generated pseudo random number +*/ + +double my_rnd(struct my_rnd_struct *rand_st) +{ + rand_st->seed1=(rand_st->seed1*3+rand_st->seed2) % rand_st->max_value; + rand_st->seed2=(rand_st->seed1+rand_st->seed2+33) % rand_st->max_value; + return (((double) rand_st->seed1)/rand_st->max_value_dbl); +} diff --git a/mysys/my_uuid.c b/mysys/my_uuid.c new file mode 100644 index 00000000000..3c3cd8836fc --- /dev/null +++ b/mysys/my_uuid.c @@ -0,0 +1,169 @@ +/* Copyright (C) 2007 MySQL AB, Sergei Golubchik & Michael Widenius + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +/* + implements Universal Unique Identifiers (UUIDs), as in + DCE 1.1: Remote Procedure Call, + Open Group Technical Standard Document Number C706, October 1997, + (supersedes C309 DCE: Remote Procedure Call 8/1994, + which was basis for ISO/IEC 11578:1996 specification) + + A UUID has the following structure: + + Field NDR Data Type Octet # Note + time_low unsigned long 0-3 The low field of the + timestamp. + time_mid unsigned short 4-5 The middle field of + the timestamp. + time_hi_and_version unsigned short 6-7 The high field of the + timestamp multiplexed + with the version number. + clock_seq_hi_and_reserved unsigned small 8 The high field of the + clock sequence multi- + plexed with the variant. + clock_seq_low unsigned small 9 The low field of the + clock sequence. + node character 10-15 The spatially unique node + identifier. +*/ + +#include "mysys_priv.h" +#include + +static my_bool my_uuid_inited= 0; +static struct my_rnd_struct uuid_rand; +static uint nanoseq; +static ulonglong uuid_time= 0; +static uchar uuid_suffix[2+6]; /* clock_seq and node */ + +#ifdef THREAD +pthread_mutex_t LOCK_uuid_generator; +#endif + +/* + Number of 100-nanosecond intervals between + 1582-10-15 00:00:00.00 and 1970-01-01 00:00:00.00 +*/ + +#define UUID_TIME_OFFSET ((ulonglong) 141427 * 24 * 60 * 60 * 1000 * 10) +#define UUID_VERSION 0x1000 +#define UUID_VARIANT 0x8000 + + +/* Helper function */ + +static void set_clock_seq() +{ + uint16 clock_seq= ((uint)(my_rnd(&uuid_rand)*16383)) | UUID_VARIANT; + int2store(uuid_suffix, clock_seq); +} + + +/** + Init structures needed for my_uuid + + @func my_uuid_init() + @param seed1 Seed for random generator + @param seed2 Seed for random generator + + @note + Seed1 & seed2 should NOT depend on clock. This is to be able to + generate a random mac address according to UUID specs. +*/ + +void my_uuid_init(ulong seed1, ulong seed2) +{ + uchar *mac= uuid_suffix+2; + ulonglong now; + + if (my_uuid_inited) + return; + my_uuid_inited= 1; + now= my_getsystime(); + nanoseq= 0; + + if (my_gethwaddr(mac)) + { + uint i; + /* + Generating random "hardware addr" + + Specs explicitly specify that node identifier should NOT + correlate with a clock_seq value, so we use a separate + randominit() here. + */ + /* purecov: begin inspected */ + my_rnd_init(&uuid_rand, (ulong) (seed2+ now/2), (seed1 + now)+random()); + for (i=0; i < sizeof(mac); i++) + mac[i]= (uchar)(my_rnd(&uuid_rand)*255); + /* purecov: end */ + } + my_rnd_init(&uuid_rand, (ulong) (seed1 + now), + (ulong) (now/2+ seed2 + getpid())); + set_clock_seq(); + pthread_mutex_init(&LOCK_uuid_generator, MY_MUTEX_INIT_FAST); +} + + +/** + Create a global unique identifier (uuid) + + @func my_uuid() + @param to Store uuid here. Must be of size MY_uuid_SIZE (16) +*/ + +void my_uuid(uchar *to) +{ + ulonglong tv; + uint32 time_low; + uint16 time_mid, time_hi_and_version; + + DBUG_ASSERT(my_uuid_inited); + + pthread_mutex_lock(&LOCK_uuid_generator); + tv= my_getsystime() + UUID_TIME_OFFSET + nanoseq; + if (unlikely(tv < uuid_time)) + set_clock_seq(); + else if (unlikely(tv == uuid_time)) + { + /* special protection for low-res system clocks */ + nanoseq++; + tv++; + } + else + { + if (nanoseq && likely(tv-nanoseq >= uuid_time)) + { + tv-=nanoseq; + nanoseq=0; + } + } + uuid_time=tv; + pthread_mutex_unlock(&LOCK_uuid_generator); + + time_low= (uint32) (tv & 0xFFFFFFFF); + time_mid= (uint16) ((tv >> 32) & 0xFFFF); + time_hi_and_version= (uint16) ((tv >> 48) | UUID_VERSION); + + /* + Note, that the standard does NOT specify byte ordering in + multi-byte fields. it's implementation defined (but must be + the same for all fields). + */ + int4store(to, time_low); + int2store(to+4, time_mid); + int2store(to+6, time_hi_and_version); + bmove(to+8, uuid_suffix, sizeof(uuid_suffix)); +} -- cgit v1.2.1 From c780abfe40da423d06ac61461abe1be82a93cdac Mon Sep 17 00:00:00 2001 From: unknown Date: Fri, 12 Oct 2007 16:17:18 +0200 Subject: bugs/warnings fixed in lf_alloc-pin.c: 1. available_stack_size() was getting the direction wrong, so alloca() was never used 2. (char*) casts added to kill "break strict-aliasing rules" warnings 3. s/node/node=0/ to kill "pointer casted to integer" warning 4. added volatiles as appropriate to prevent gcc from moving assignment out of the loop mysys/lf_alloc-pin.c: bugs/warnings fixed: 1. available_stack_size() was getting the direction wrong, so alloca() was never used 2. (char*) casts added to kill "break strict-aliasing rules" warnings 3. s/node/node=0/ to kill "pointer casted to integer" warning 4. added volatiles as appropriate to prevent gcc from moving assignment out of the loop --- mysys/lf_alloc-pin.c | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) (limited to 'mysys') diff --git a/mysys/lf_alloc-pin.c b/mysys/lf_alloc-pin.c index 51c4df7c94a..e89e071d8bd 100644 --- a/mysys/lf_alloc-pin.c +++ b/mysys/lf_alloc-pin.c @@ -318,9 +318,9 @@ static int match_pins(LF_PINS *el, void *addr) } #if STACK_DIRECTION < 0 -#define available_stack_size(END,CUR) (long) ((char*)(CUR) - (char*)(END)) +#define available_stack_size(CUR,END) (long) ((char*)(CUR) - (char*)(END)) #else -#define available_stack_size(END,CUR) (long) ((char*)(END) - (char*)(CUR)) +#define available_stack_size(CUR,END) (long) ((char*)(END) - (char*)(CUR)) #endif /* @@ -413,15 +413,16 @@ LF_REQUIRE_PINS(1); first->el->el->....->el->last. Use first==last to free only one element. */ static void alloc_free(struct st_lf_alloc_node *first, - struct st_lf_alloc_node *last, + struct st_lf_alloc_node volatile *last, LF_ALLOCATOR *allocator) { - struct st_lf_alloc_node *tmp; + struct st_lf_alloc_node * volatile tmp; tmp= allocator->top; do { last->next= tmp; - } while (!my_atomic_casptr((void **)&allocator->top, (void **)&tmp, first) && + } while (!my_atomic_casptr((void **)(char *)&allocator->top, + (void **)(char *)&tmp, first) && LF_BACKOFF); } @@ -494,12 +495,13 @@ void *_lf_alloc_new(LF_PINS *pins) { node= (void *)my_malloc(allocator->element_size, MYF(MY_WME)); #ifdef MY_LF_EXTRA_DEBUG - if (likely(node)) + if (likely(node != 0)) my_atomic_add32(&allocator->mallocs, 1); #endif break; } - if (my_atomic_casptr((void **)&allocator->top, (void *)&node, node->next)) + if (my_atomic_casptr((void **)(char *)&allocator->top, + (void *)&node, node->next)) break; } _lf_unpin(pins, 0); -- cgit v1.2.1 From 85a920dd41692e3580ac19541ac2072b34901a85 Mon Sep 17 00:00:00 2001 From: unknown Date: Fri, 12 Oct 2007 18:46:55 +0200 Subject: different fix for strict-aliasing problem --- mysys/lf_alloc-pin.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) (limited to 'mysys') diff --git a/mysys/lf_alloc-pin.c b/mysys/lf_alloc-pin.c index e89e071d8bd..a847d722023 100644 --- a/mysys/lf_alloc-pin.c +++ b/mysys/lf_alloc-pin.c @@ -416,14 +416,17 @@ static void alloc_free(struct st_lf_alloc_node *first, struct st_lf_alloc_node volatile *last, LF_ALLOCATOR *allocator) { - struct st_lf_alloc_node * volatile tmp; - tmp= allocator->top; + /* + we need a union here to access type-punned pointer reliably. + otherwise gcc -fstrict-aliasing will not see 'tmp' changed in the loop + */ + union { struct st_lf_alloc_node * node; void *ptr; } tmp; + tmp.node= allocator->top; do { - last->next= tmp; + last->next= tmp.node; } while (!my_atomic_casptr((void **)(char *)&allocator->top, - (void **)(char *)&tmp, first) && - LF_BACKOFF); + (void **)&tmp.ptr, first) && LF_BACKOFF); } /* -- cgit v1.2.1 From f1d92c4397bd848b5c761cfcbf3a1d195934c03d Mon Sep 17 00:00:00 2001 From: unknown Date: Sat, 13 Oct 2007 20:25:53 +0200 Subject: my_getopt: enforce "correctness" (min/max/block_size) of default values client/mysqltest.c: fix my_option's with incorrect defaults mysql-test/r/maria.result: update results mysql-test/t/variables.test: update results sql/mysqld.cc: fix my_option's with incorrect defaults --- mysys/my_getopt.c | 73 +++++++++++++++++++++++++++++++++++-------------------- 1 file changed, 46 insertions(+), 27 deletions(-) (limited to 'mysys') diff --git a/mysys/my_getopt.c b/mysys/my_getopt.c index 3a5b130e067..6a7386d4126 100644 --- a/mysys/my_getopt.c +++ b/mysys/my_getopt.c @@ -20,22 +20,25 @@ #include #include +#if SIZEOF_LONG < SIZEOF_LONG_LONG +#define getopt_ul getopt_ll +#define getopt_ul_limit_value getopt_ll_limit_value +#else +#define getopt_ul getopt_ull +#define getopt_ul_limit_value getopt_ull_limit_value +#endif + static void default_reporter(enum loglevel level, const char *format, ...); my_error_reporter my_getopt_error_reporter= &default_reporter; -static int findopt(char *optpat, uint length, - const struct my_option **opt_res, - char **ffname); -my_bool getopt_compare_strings(const char *s, - const char *t, - uint length); +static int findopt(char *, uint, const struct my_option **, char **); +my_bool getopt_compare_strings(const char *, const char *, uint); static longlong getopt_ll(char *arg, const struct my_option *optp, int *err); -static ulonglong getopt_ull(char *arg, const struct my_option *optp, - int *err); +static longlong getopt_ll_limit_value(longlong, const struct my_option *); +static ulonglong getopt_ull(char *, const struct my_option *, int *); static double getopt_double(char *arg, const struct my_option *optp, int *err); static void init_variables(const struct my_option *options); -static int setval(const struct my_option *opts, uchar* *value, char *argument, - my_bool set_maximum_value); +static int setval(const struct my_option *, uchar **, char *, my_bool); static char *check_struct_option(char *cur_arg, char *key_name); /* @@ -603,9 +606,11 @@ static int setval(const struct my_option *opts, uchar* *value, char *argument, *((int*) result_pos)= (int) getopt_ll(argument, opts, &err); break; case GET_LONG: - case GET_ULONG: /* fall through */ *((long*) result_pos)= (long) getopt_ll(argument, opts, &err); break; + case GET_ULONG: + *((long*) result_pos)= (long) getopt_ul(argument, opts, &err); + break; case GET_LL: *((longlong*) result_pos)= getopt_ll(argument, opts, &err); break; @@ -748,7 +753,7 @@ static longlong eval_num_suffix(char *argument, int *error, char *option_name) return num; } -/* +/* function: getopt_ll Evaluates and returns the value that user gave as an argument @@ -761,10 +766,22 @@ static longlong eval_num_suffix(char *argument, int *error, char *option_name) static longlong getopt_ll(char *arg, const struct my_option *optp, int *err) { - longlong num; + longlong num=eval_num_suffix(arg, err, (char*) optp->name); + return getopt_ll_limit_value(num, optp); +} + +/* + function: getopt_ll_limit_value + + Applies min/max/block_size to a numeric value of an option. + Returns "fixed" value. +*/ + +static longlong getopt_ll_limit_value(longlong num, + const struct my_option *optp) +{ ulonglong block_size= (optp->block_size ? (ulonglong) optp->block_size : 1L); - - num= eval_num_suffix(arg, err, (char*) optp->name); + if (num > 0 && (ulonglong) num > (ulonglong) optp->max_value && optp->max_value) /* if max value is not set -> no upper limit */ num= (ulonglong) optp->max_value; @@ -782,9 +799,7 @@ static longlong getopt_ll(char *arg, const struct my_option *optp, int *err) static ulonglong getopt_ull(char *arg, const struct my_option *optp, int *err) { - ulonglong num; - - num= eval_num_suffix(arg, err, (char*) optp->name); + ulonglong num= eval_num_suffix(arg, err, (char*) optp->name); return getopt_ull_limit_value(num, optp); } @@ -841,35 +856,39 @@ static double getopt_double(char *arg, const struct my_option *optp, int *err) SYNOPSIS init_one_value() - option Option to initialize - value Pointer to variable + optp Option to initialize + value Pointer to variable */ -static void init_one_value(const struct my_option *option, uchar* *variable, +static void init_one_value(const struct my_option *optp, uchar* *variable, longlong value) { DBUG_ENTER("init_one_value"); - switch ((option->var_type & GET_TYPE_MASK)) { + switch ((optp->var_type & GET_TYPE_MASK)) { case GET_BOOL: *((my_bool*) variable)= (my_bool) value; break; case GET_INT: - *((int*) variable)= (int) value; + *((int*) variable)= (int) getopt_ll_limit_value(value, optp); break; case GET_UINT: + *((uint*) variable)= (uint) getopt_ll_limit_value(value, optp); + break; case GET_ENUM: *((uint*) variable)= (uint) value; break; case GET_LONG: - *((long*) variable)= (long) value; + *((long*) variable)= (long) getopt_ll_limit_value(value, optp); break; case GET_ULONG: - *((ulong*) variable)= (ulong) value; + *((ulong*) variable)= (ulong) getopt_ul_limit_value(value, optp); break; case GET_LL: - *((longlong*) variable)= (longlong) value; + *((longlong*) variable)= (longlong) getopt_ll_limit_value(value, optp); break; case GET_ULL: + *((ulonglong*) variable)= (ulonglong) getopt_ull_limit_value(value, optp); + break; case GET_SET: *((ulonglong*) variable)= (ulonglong) value; break; @@ -906,7 +925,7 @@ static void init_one_value(const struct my_option *option, uchar* *variable, } -/* +/* initialize all variables to their default values SYNOPSIS -- cgit v1.2.1 From fd4ca26dfc9f7b6783d568d319d61a7fefd21ee4 Mon Sep 17 00:00:00 2001 From: unknown Date: Fri, 19 Oct 2007 00:32:51 +0200 Subject: mysys/my_getopt.c always process uint/ulong using ulonglong (unsigned) code dbug printout for adjusted option values strings/llstr.c ullstr() - the unsigned brother of llstr() include/m_string.h: ullstr() - the unsigned brother of llstr() mysql-test/t/variables.test: test adjusted for 32bit mysys/my_getopt.c: always process uint/ulong using ulonglong (unsigned) code dbug printout for adjusted option values strings/llstr.c: ullstr() - the unsigned brother of llstr() --- mysys/my_getopt.c | 32 +++++++++++++++++++------------- 1 file changed, 19 insertions(+), 13 deletions(-) (limited to 'mysys') diff --git a/mysys/my_getopt.c b/mysys/my_getopt.c index 6a7386d4126..218d9dce1f4 100644 --- a/mysys/my_getopt.c +++ b/mysys/my_getopt.c @@ -20,14 +20,6 @@ #include #include -#if SIZEOF_LONG < SIZEOF_LONG_LONG -#define getopt_ul getopt_ll -#define getopt_ul_limit_value getopt_ll_limit_value -#else -#define getopt_ul getopt_ull -#define getopt_ul_limit_value getopt_ull_limit_value -#endif - static void default_reporter(enum loglevel level, const char *format, ...); my_error_reporter my_getopt_error_reporter= &default_reporter; @@ -602,14 +594,16 @@ static int setval(const struct my_option *opts, uchar* *value, char *argument, *((my_bool*) result_pos)= (my_bool) atoi(argument) != 0; break; case GET_INT: - case GET_UINT: /* fall through */ *((int*) result_pos)= (int) getopt_ll(argument, opts, &err); break; + case GET_UINT: + *((uint*) result_pos)= (uint) getopt_ull(argument, opts, &err); + break; case GET_LONG: *((long*) result_pos)= (long) getopt_ll(argument, opts, &err); break; case GET_ULONG: - *((long*) result_pos)= (long) getopt_ul(argument, opts, &err); + *((long*) result_pos)= (long) getopt_ull(argument, opts, &err); break; case GET_LL: *((longlong*) result_pos)= getopt_ll(argument, opts, &err); @@ -781,13 +775,19 @@ static longlong getopt_ll_limit_value(longlong num, const struct my_option *optp) { ulonglong block_size= (optp->block_size ? (ulonglong) optp->block_size : 1L); + longlong old= num; + char buf1[255] __attribute__((unused)), buf2[255] __attribute__((unused)); if (num > 0 && (ulonglong) num > (ulonglong) optp->max_value && optp->max_value) /* if max value is not set -> no upper limit */ num= (ulonglong) optp->max_value; num= ((num - optp->sub_size) / block_size); num= (longlong) (num * block_size); - return max(num, optp->min_value); + num= max(num, optp->min_value); + if (num != old) + DBUG_PRINT("options", ("option '%s' adjusted %s -> %s", + optp->name, llstr(old, buf1), llstr(num, buf2))); + return num; } /* @@ -806,6 +806,9 @@ static ulonglong getopt_ull(char *arg, const struct my_option *optp, int *err) ulonglong getopt_ull_limit_value(ulonglong num, const struct my_option *optp) { + ulonglong old= num; + char buf1[255] __attribute__((unused)), buf2[255] __attribute__((unused)); + if ((ulonglong) num > (ulonglong) optp->max_value && optp->max_value) /* if max value is not set -> no upper limit */ num= (ulonglong) optp->max_value; @@ -816,6 +819,9 @@ ulonglong getopt_ull_limit_value(ulonglong num, const struct my_option *optp) } if (num < (ulonglong) optp->min_value) num= (ulonglong) optp->min_value; + if (num != old) + DBUG_PRINT("options", ("option '%s' adjusted %s -> %s", + optp->name, ullstr(old, buf1), ullstr(num, buf2))); return num; } @@ -872,7 +878,7 @@ static void init_one_value(const struct my_option *optp, uchar* *variable, *((int*) variable)= (int) getopt_ll_limit_value(value, optp); break; case GET_UINT: - *((uint*) variable)= (uint) getopt_ll_limit_value(value, optp); + *((uint*) variable)= (uint) getopt_ull_limit_value(value, optp); break; case GET_ENUM: *((uint*) variable)= (uint) value; @@ -881,7 +887,7 @@ static void init_one_value(const struct my_option *optp, uchar* *variable, *((long*) variable)= (long) getopt_ll_limit_value(value, optp); break; case GET_ULONG: - *((ulong*) variable)= (ulong) getopt_ul_limit_value(value, optp); + *((ulong*) variable)= (ulong) getopt_ull_limit_value(value, optp); break; case GET_LL: *((longlong*) variable)= (longlong) getopt_ll_limit_value(value, optp); -- cgit v1.2.1 From 77017191de1b2d05392a8ad2f202f5162aa5fc68 Mon Sep 17 00:00:00 2001 From: unknown Date: Fri, 19 Oct 2007 14:15:13 +0200 Subject: WL#3071 - Maria checkpoint - serializing calls to flush_pagecache_blocks_int() on the same file to avoid known concurrency bugs - having that, we can now enable the background thread, as the flushes it does are now supposedly safe in concurrent situations. - new type of flush FLUSH_KEEP_LAZY: when the background checkpoint thread is flushing a packet of dirty pages between two checkpoints, it uses this flush type, indeed if a file is already being flushed by another thread it's smarter to move on to the next file than wait. - maria_checkpoint_frequency renamed to maria_checkpoint_interval. include/my_sys.h: new type of flushing for the page cache: FLUSH_KEEP_LAZY mysql-test/r/maria.result: result update mysys/mf_keycache.c: indentation. No FLUSH_KEEP_LAZY support in key cache. storage/maria/ha_maria.cc: maria_checkpoint_frequency was somehow a hidden part of the Checkpoint API and that was not good. Now we have checkpoint_interval, local to ha_maria.cc, which serves as container for the user-visible maria_checkpoint_interval global variable; setting it calls update_checkpoint_interval which passes the new value to ma_checkpoint_init(). There is no hiding anymore. By default, enable background thread which does checkpoints every 30 seconds, and dirty page flush in between. That thread takes a checkpoint when it ends, so no need for maria_hton_panic to take one. The | is | and not ||, because maria_panic() must always be called. frequency->interval. storage/maria/ma_checkpoint.c: Use FLUSH_KEEP_LAZY for background thread when it flushes packets of dirty pages between two checkpoints: it is smarter to move on to the next file than wait for it to have been completely flushed, which may take long. Comments about flush concurrency bugs moved from ma_pagecache.c. Removing out-of-date comment. frequency->interval. create_background_thread -> (interval>0). In ma_checkpoint_background(), some variables need to be preserved between iterations. storage/maria/ma_checkpoint.h: new prototype storage/maria/ma_pagecache.c: - concurrent calls of flush_pagecache_blocks_int() on the same file cause bugs (see @note in that function); we fix them by serializing in this situation. For that we use a global hash of (file, wqueue). When flush_pagecache_blocks_int() starts it looks into the hash, using the file as key. If not found, it inserts (file,wqueue) into the hash, flushes the file, and finally removes itself from the hash and wakes up any waiter in the queue. If found, it adds itself to the wqueue and waits. - As a by-product, we can remove changed_blocks_is_incomplete and replace it by scanning the hash, replace the sleep() by a queue wait. - new type of flush FLUSH_KEEP_LAZY: when flushing a file, if it's already being flushed by another thread (even partially), return immediately. storage/maria/ma_pagecache.h: In pagecache, a hash of files currently being flushed (i.e. there is a call to flush_pagecache_blocks_int() for them). storage/maria/ma_recovery.c: new prototype storage/maria/ma_test1.c: new prototype storage/maria/ma_test2.c: new prototype --- mysys/mf_keycache.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'mysys') diff --git a/mysys/mf_keycache.c b/mysys/mf_keycache.c index ea05ea6e127..8b1f3ad0540 100644 --- a/mysys/mf_keycache.c +++ b/mysys/mf_keycache.c @@ -3557,10 +3557,11 @@ static int flush_key_blocks_int(KEY_CACHE *keycache, file, keycache->blocks_used, keycache->blocks_changed)); #if !defined(DBUG_OFF) && defined(EXTRA_DEBUG) - DBUG_EXECUTE("check_keycache", - test_key_cache(keycache, "start of flush_key_blocks", 0);); + DBUG_EXECUTE("check_keycache", + test_key_cache(keycache, "start of flush_key_blocks", 0);); #endif + DBUG_ASSERT(type != FLUSH_KEEP_LAZY); cache= cache_buff; if (keycache->disk_blocks > 0 && (!my_disable_flush_key_blocks || type != FLUSH_KEEP)) -- cgit v1.2.1 From 13d53bf657060acaecf055107e5e6cc7045f351e Mon Sep 17 00:00:00 2001 From: unknown Date: Sat, 20 Oct 2007 00:24:22 +0300 Subject: Merge some changes from sql directory in 5.1 tree Changed format for REDO_INSERT_ROWS_BLOBS Fixed several bugs in handling of big blobs Added redo_free_head_or_tail() & redo_insert_row_blobs() Added uuid to control file maria_checks now verifies that not used part of bitmap is 0 REDO_PURGE_BLOCKS -> REDO_FREE_BLOCKS Added REDO_FREE_HEAD_OR_TAIL Fixes problem when trying to read block outside of file during REDO include/my_global.h: STACK_DIRECTION is already set by configure mysql-test/r/maria.result: Updated results mysql-test/t/maria.test: Test shrinking of VARCHAR mysys/my_realloc.c: Fixed indentation mysys/safemalloc.c: Fixed indentation sql/filesort.cc: Removed some casts sql/mysqld.cc: Added missing setting of myisam_stats_method_str sql/uniques.cc: Removed some casts storage/maria/ma_bitmap.c: Added printing of bitmap (for debugging) Renamed _ma_print_bitmap() -> _ma_print_bitmap_changes() Added _ma_set_full_page_bits() Fixed bug in ma_bitmap_find_new_place() (affecting updates) when using big files storage/maria/ma_blockrec.c: Changed format for REDO_INSERT_ROWS_BLOBS Fixed several bugs in handling of big blobs Added code to fix some cases where redo when using blobs didn't produce idenital .MAD files as normal usage REDO_FREE_ROW_BLOCKS doesn't anymore change pages; We only mark things free in bitmap Remove TAIL and filler extents from REDO_FREE_BLOCKS log entry. (Fixed some asserts) REDO_PURGE_BLOCKS -> REDO_FREE_BLOCKS Delete tails in update. (Fixed bug when doing update that shrinks blob/varchar length) Fixed bug when doing insert in block outside of file size. Added redo_free_head_or_tail() & redo_insert_row_blobs() Added pagecache_unlock_by_link() when read fails. Much more comments, DBUG and ASSERT entries storage/maria/ma_blockrec.h: Prototypes of new functions Define of SUB_RANGE_SIZE & BLOCK_FILLER_SIZE storage/maria/ma_check.c: Verify that not used part of bitmap is 0 storage/maria/ma_control_file.c: Added uuid to control file storage/maria/ma_loghandler.c: REDO_PURGE_BLOCKS -> REDO_FREE_BLOCKS Added REDO_FREE_HEAD_OR_TAIL storage/maria/ma_loghandler.h: REDO_PURGE_BLOCKS -> REDO_FREE_BLOCKS Added REDO_FREE_HEAD_OR_TAIL storage/maria/ma_pagecache.c: If we write full block, remove error flag for block. (Fixes problem when trying to read block outside of file) storage/maria/ma_recovery.c: REDO_PURGE_BLOCKS -> REDO_FREE_BLOCKS Added REDO_FREE_HEAD_OR_TAIL storage/maria/ma_test1.c: Allow option after 'b' to be compatible with ma_test2 (This is just to simplify test scripts like ma_test_recovery) storage/maria/ma_test2.c: Default size of blob is now 1000 instead of 1 storage/maria/ma_test_all.sh: Added test for bigger blobs storage/maria/ma_test_recovery.expected: Updated results storage/maria/ma_test_recovery: Added test for bigger blobs --- mysys/my_realloc.c | 1 + mysys/safemalloc.c | 1 - 2 files changed, 1 insertion(+), 1 deletion(-) (limited to 'mysys') diff --git a/mysys/my_realloc.c b/mysys/my_realloc.c index a55282e03a0..828890a0dc2 100644 --- a/mysys/my_realloc.c +++ b/mysys/my_realloc.c @@ -32,6 +32,7 @@ @note if size==0 realloc() may return NULL; my_realloc() treats this as an error which is not the intention of realloc() */ + void* my_realloc(void* oldpoint, size_t size, myf my_flags) { void *point; diff --git a/mysys/safemalloc.c b/mysys/safemalloc.c index a7a7bcc9c53..6c8a080fbf3 100644 --- a/mysys/safemalloc.c +++ b/mysys/safemalloc.c @@ -436,7 +436,6 @@ void TERMINATE(FILE *file, uint flag) This is usefull to call from withing a debugger */ - void sf_malloc_report_allocated(void *memory) { struct st_irem *irem; -- cgit v1.2.1 From 21fd2a5a3656813c3d97760a5e7eef987dc6879d Mon Sep 17 00:00:00 2001 From: unknown Date: Wed, 14 Nov 2007 19:08:06 +0200 Subject: First part of redo/undo for key pages Added key_nr to st_maria_keydef for faster keyinfo->keynr conversion For transactional tables, shift record number in keys up with 1 bit to have place to indicate if transid follows Checksum for MyISAM now ignores NULL and not used part of VARCHAR Renamed some variables that caused shadow compiler warnings Moved extra() call when waiting for tables to not be used to after tables are removed from cache. Fixed crashing bugs when using Maria TEMPORARY tables with TRUNCATE. Removed 'hack' code in sql directory to go around this bug. pagecache_unlock_by_ulink() now has extra argument to say if page was changed. Give error message if we fail to open control file Mark page cache variables as not flushable include/maria.h: Made min page cache larger (needed for pinning key page) Added key_nr to st_maria_keydef for faster keyinfo->keynr conversion Added write_comp_flag to move some runtime code to maria_open() include/my_base.h: Added new error message to be used when handler initialization failed include/my_global.h: Renamed dummy to swap_dummy to avoid conflicts with local 'dummy' variables include/my_handler.h: Added const to some parameters mysys/array.c: More DBUG mysys/my_error.c: Fixed indentation mysys/my_handler.c: Added const to some parameters Added missing error messages sql/field.h: Renamed variables to avoid variable shadowing sql/handler.h: Renamed parameter to avoid variable name conflict sql/item.h: Renamed variables to avoid variable shadowing sql/log_event_old.h: Renamed variables to avoid variable shadowing sql/set_var.h: Renamed variables to avoid variable shadowing sql/sql_delete.cc: Removed maria hack for temporary tables Fixed indentation sql/sql_table.cc: Moved extra() call when waiting for tables to not be used to after tables are removed from cache. This was needed to ensure we don't do a PREPARE_FOR_DROP or similar call while the table is still in use. sql/table.cc: Copy page_checksum from share Removed Maria hack storage/maria/Makefile.am: Added new files storage/maria/ha_maria.cc: Renamed records -> record_count and info -> create_info to avoid variable name conflicts Mark page cache variables as not flushable storage/maria/ma_blockrec.c: Moved _ma_unpin_all_pages() to ma_key_recover.c Moved init of info->pinned_pages to ma_open.c Moved _ma_finalize_row() to maria_key_recover.h Renamed some variables to avoid variable name conflicts Mark page_link.changed for blocks we change directly Simplify handling of undo link when writing LOGREC_UNDO_ROW_INSERT (old code crashed when having redo for index) storage/maria/ma_blockrec.h: Removed extra empty line storage/maria/ma_checkpoint.c: Remove not needed trnman.h storage/maria/ma_close.c: Free pinned pages (which are now always allocated) storage/maria/ma_control_file.c: Give error message if we fail to open control file storage/maria/ma_delete.c: Changes for redo logging (first part, logging of underflow not yet done) - Log undo-key-delete - Log delete of key - Updated arguments to _ma_fetch_keypage(), _ma_dispose(), _ma_write_keypage(), _ma_insert() - Added new arguments to some functions to be able to write redo information - Mark key pages as changed when we write with PAGECACHE_LOCK_LEFT_WRITELOCKED Remove one not needed _ma_write_keypage() in d_search() when upper level will do the write anyway Changed 2 bmove_upp() to bmove() as this made code easer to understand More function comments Indentation fixes storage/maria/ma_ft_update.c: New arguments to _ma_write_keypage() storage/maria/ma_loghandler.c: Fixed some DBUG_PRINT messages Simplify code Added new log entrys for key page redo Renamed some variables to avoid variable name shadowing storage/maria/ma_loghandler.h: Moved some defines here Added define for storing key number on key pages Added new translog record types Added enum for type of operations in LOGREC_REDO_INDEX storage/maria/ma_open.c: Always allocate info.pinned_pages (we need now also for normal key page usage) Update keyinfo->key_nr Added virtual functions to convert record position o number to be stored on key pages Update keyinfo->write_comp_flag to value of search flag to be used when writing key storage/maria/ma_page.c: Added redo for key pages - Extended _ma_fetch_keypage() with type of lock to put on page and address to used MARIA_PINNED_PAGE - _ma_fetch_keypage() now pin's pages if needed - Extended _ma_write_keypage() with type of locks to be used - ma_dispose() now locks info->s->state.key_del from other threads - ma_dispose() writes redo log record - ma_new() locks info->s->state.key_del from other threads if it was used - ma_new() now pins read page Other things: - Removed some not needed arguments from _ma_new() and _ma_dispose) - Added some new variables to simplify code - If EXTRA_DEBUG is used, do crc on full page to catch not unitialized bytes storage/maria/ma_pagecache.h: Applied patch from Sanja to add extra argument to pagecache_unlock_by_ulink() to mark if page was changed Added some defines for pagecache priority levels that one can use storage/maria/ma_range.c: Added new arguments for call to _ma_fetch_keypage() storage/maria/ma_recovery.c: - Added hooks for new translog types: REDO_INDEX, REDO_INDEX_NEW_PAGE, REDO_INDEX_FREE_PAGE, UNDO_KEY_INSERT, UNDO_KEY_DELETE and UNDO_KEY_DELETE_WITH_ROOT. - Moved variable declarations to start of function (portability fixes) - Removed some not needed initializations - Set only relevant state changes for each redo/undo entry storage/maria/lockman.c: Removed end space storage/maria/ma_check.c: Removed end space storage/maria/ma_create.c: Removed end space storage/maria/ma_locking.c: Removed end space storage/maria/ma_packrec.c: Removed end space storage/maria/ma_pagecache.c: Removed end space storage/maria/ma_panic.c: Removed end space storage/maria/ma_rt_index.c: Added new arguments for call to _ma_fetch_keypage(), _ma_write_keypage(), _ma_dispose() and _ma_new() Fixed indentation storage/maria/ma_rt_key.c: Added new arguments for call to _ma_fetch_keypage() storage/maria/ma_rt_split.c: Added new arguments for call to _ma_new() Use new keypage header Added new arguments for call to _ma_write_keypage() storage/maria/ma_search.c: Updated comments & indentation Added new arguments for call to _ma_fetch_keypage() Made some variables and arguments const Added virtual functions for converting row position to number to be stored in key use MARIA_RECORD_POS of record position instead of my_off_t Record in MARIA_KEY_PARAM how page was changed one key insert (needed for REDO) storage/maria/ma_sort.c: Removed end space storage/maria/ma_statrec.c: Updated arguments for call to _ma_rec_pos() storage/maria/ma_test1.c: Fixed too small buffer to init_pagecache() Fixed bug when using insert_count and test_flag storage/maria/ma_test2.c: Use more resonable pagecache size Remove not used code Reset blob_length to fix wrong output message storage/maria/ma_test_all.sh: Fixed wrong test storage/maria/ma_write.c: Lots of new code to handle REDO of key pages No logic changes because of REDO code, mostly adding new arguments and adding new code for logging Added new arguments for calls to _ma_fetch_keypage(), _ma_write_keypage() and similar functions Move setting of comp_flag in ma_ck_wrte_btree() from runtime to maria_open() Zerofill new used pages for: - To remove possible sensitive data left in buffer - To get idenitical data on pages after running redo - Better compression of pages if archived storage/maria/maria_chk.c: Added information if table is crash safe storage/maria/maria_def.h: New virtual function to convert between record position on key and normal record position Aded mutex and extra variables to handle locking of share->state.key_del Moved some structure variables to get things more aligned Added extra arguments to MARIA_KEY_PARAM to be able to remember what was changed on key page on key insert Added argument to MARIA_PINNED_PAGE to indicate if page was changed Updated prototypes for functions Added some structures for signaling changes in REDO handling storage/maria/unittest/ma_pagecache_single.c: Updated arguments for changed function calls storage/myisam/mi_check.c: Made calc_check_checksum virtual storage/myisam/mi_checksum.c: Update checksums to ignore null columns storage/myisam/mi_create.c: Mark if table has null column (to know when we have to use mi_checksum()) storage/myisam/mi_open.c: Added virtual function for calculating checksum to be able to easily ignore NULL fields storage/myisam/mi_test2.c: Fixed bug storage/myisam/myisamdef.h: Added virtual function for calculating checksum during check table Removed ha_key_cmp() as this is in handler.h storage/maria/ma_key_recover.c: New BitKeeper file ``storage/maria/ma_key_recover.c'' storage/maria/ma_key_recover.h: New BitKeeper file ``storage/maria/ma_key_recover.h'' storage/maria/ma_key_redo.c: New BitKeeper file ``storage/maria/ma_key_redo.c'' --- mysys/array.c | 70 +++++++++++++++++++++++++++--------------------------- mysys/my_error.c | 4 ++-- mysys/my_handler.c | 48 +++++++++++++++++++++++++++---------- 3 files changed, 72 insertions(+), 50 deletions(-) (limited to 'mysys') diff --git a/mysys/array.c b/mysys/array.c index b7342f70ef8..9ff35791dde 100644 --- a/mysys/array.c +++ b/mysys/array.c @@ -30,8 +30,8 @@ alloc_increment Increment for adding new elements DESCRIPTION - init_dynamic_array() initiates array and allocate space for - init_alloc eilements. + init_dynamic_array() initiates array and allocate space for + init_alloc eilements. Array is usable even if space allocation failed. Static buffers must begin immediately after the array structure. @@ -41,7 +41,7 @@ */ my_bool init_dynamic_array2(DYNAMIC_ARRAY *array, uint element_size, - void *init_buffer, uint init_alloc, + void *init_buffer, uint init_alloc, uint alloc_increment CALLER_INFO_PROTO) { DBUG_ENTER("init_dynamic_array"); @@ -69,14 +69,14 @@ my_bool init_dynamic_array2(DYNAMIC_ARRAY *array, uint element_size, DBUG_RETURN(TRUE); } DBUG_RETURN(FALSE); -} +} my_bool init_dynamic_array(DYNAMIC_ARRAY *array, uint element_size, - uint init_alloc, + uint init_alloc, uint alloc_increment CALLER_INFO_PROTO) { /* placeholder to preserve ABI */ - return my_init_dynamic_array_ci(array, element_size, init_alloc, + return my_init_dynamic_array_ci(array, element_size, init_alloc, alloc_increment); } /* @@ -111,7 +111,7 @@ my_bool insert_dynamic(DYNAMIC_ARRAY *array, uchar* element) /* - Alloc space for next element(s) + Alloc space for next element(s) SYNOPSIS alloc_dynamic() @@ -129,6 +129,7 @@ my_bool insert_dynamic(DYNAMIC_ARRAY *array, uchar* element) uchar *alloc_dynamic(DYNAMIC_ARRAY *array) { + DBUG_ENTER("alloc_dynamic"); if (array->elements == array->max_element) { char *new_ptr; @@ -142,20 +143,20 @@ uchar *alloc_dynamic(DYNAMIC_ARRAY *array) array->alloc_increment) * array->size_of_element, MYF(MY_WME)))) - return 0; - memcpy(new_ptr, array->buffer, + DBUG_RETURN(0); + memcpy(new_ptr, array->buffer, array->elements * array->size_of_element); } - else - if (!(new_ptr=(char*) my_realloc(array->buffer,(array->max_element+ - array->alloc_increment)* - array->size_of_element, - MYF(MY_WME | MY_ALLOW_ZERO_PTR)))) - return 0; + else if (!(new_ptr=(char*) + my_realloc(array->buffer,(array->max_element+ + array->alloc_increment)* + array->size_of_element, + MYF(MY_WME | MY_ALLOW_ZERO_PTR)))) + DBUG_RETURN(0); array->buffer= (uchar*) new_ptr; array->max_element+=array->alloc_increment; } - return array->buffer+(array->elements++ * array->size_of_element); + DBUG_RETURN(array->buffer+(array->elements++ * array->size_of_element)); } @@ -165,8 +166,8 @@ uchar *alloc_dynamic(DYNAMIC_ARRAY *array) SYNOPSIS pop_dynamic() array - - RETURN VALUE + + RETURN VALUE pointer Ok 0 Array is empty */ @@ -188,9 +189,9 @@ uchar *pop_dynamic(DYNAMIC_ARRAY *array) idx Index where element is to be inserted DESCRIPTION - set_dynamic() replaces element in array. - If idx > max_element insert new element. Allocate memory if needed. - + set_dynamic() replaces element in array. + If idx > max_element insert new element. Allocate memory if needed. + RETURN VALUE TRUE Idx was out of range and allocation of new memory failed FALSE Ok @@ -230,6 +231,8 @@ my_bool set_dynamic(DYNAMIC_ARRAY *array, uchar* element, uint idx) my_bool allocate_dynamic(DYNAMIC_ARRAY *array, uint max_elements) { + DBUG_ENTER("allocate_dynamic"); + if (max_elements >= array->max_element) { uint size; @@ -245,21 +248,18 @@ my_bool allocate_dynamic(DYNAMIC_ARRAY *array, uint max_elements) if (!(new_ptr= (char *) my_malloc(size * array->size_of_element, MYF(MY_WME)))) - return 0; - memcpy(new_ptr, array->buffer, + DBUG_RETURN(0); + memcpy(new_ptr, array->buffer, array->elements * array->size_of_element); } - else - - - if (!(new_ptr= (char*) my_realloc(array->buffer,size* - array->size_of_element, - MYF(MY_WME | MY_ALLOW_ZERO_PTR)))) - return TRUE; + else if (!(new_ptr= (char*) my_realloc(array->buffer,size* + array->size_of_element, + MYF(MY_WME | MY_ALLOW_ZERO_PTR)))) + DBUG_RETURN(TRUE); array->buffer= new_ptr; array->max_element= size; } - return FALSE; + DBUG_RETURN(FALSE); } @@ -268,9 +268,9 @@ my_bool allocate_dynamic(DYNAMIC_ARRAY *array, uint max_elements) SYNOPSIS get_dynamic() - array + array uchar* Element to be returned. If idx > elements contain zeroes. - idx Index of element wanted. + idx Index of element wanted. */ void get_dynamic(DYNAMIC_ARRAY *array, uchar* element, uint idx) @@ -347,7 +347,7 @@ void freeze_size(DYNAMIC_ARRAY *array) */ if (array->buffer == (uchar *)(array + 1)) return; - + if (array->buffer && array->max_element != elements) { array->buffer=(uchar*) my_realloc(array->buffer, @@ -364,7 +364,7 @@ void freeze_size(DYNAMIC_ARRAY *array) SYNOPSIS get_index_dynamic() array Array - element Whose element index + element Whose element index */ diff --git a/mysys/my_error.c b/mysys/my_error.c index 75701536dd3..d26c3d8cfde 100644 --- a/mysys/my_error.c +++ b/mysys/my_error.c @@ -87,11 +87,11 @@ int my_error(int nr, myf MyFlags, ...) /* get the error message string. Default, if NULL or empty string (""). */ if (! (format= (meh_p && (nr >= meh_p->meh_first)) ? meh_p->meh_errmsgs[nr - meh_p->meh_first] : NULL) || ! *format) - (void) my_snprintf (ebuff, sizeof(ebuff), "Unknown error %d", nr); + (void) my_snprintf(ebuff, sizeof(ebuff), "Unknown error %d", nr); else { va_start(args,MyFlags); - (void) my_vsnprintf (ebuff, sizeof(ebuff), format, args); + (void) my_vsnprintf(ebuff, sizeof(ebuff), format, args); va_end(args); } DBUG_RETURN((*error_handler_hook)(nr, ebuff, MyFlags)); diff --git a/mysys/my_handler.c b/mysys/my_handler.c index bf75d992f9d..f7cf4f310d7 100644 --- a/mysys/my_handler.c +++ b/mysys/my_handler.c @@ -21,23 +21,25 @@ #include #include -int ha_compare_text(CHARSET_INFO *charset_info, uchar *a, uint a_length, - uchar *b, uint b_length, my_bool part_key, +int ha_compare_text(CHARSET_INFO *charset_info, const uchar *a, uint a_length, + const uchar *b, uint b_length, my_bool part_key, my_bool skip_end_space) { if (!part_key) return charset_info->coll->strnncollsp(charset_info, a, a_length, - b, b_length, (my_bool)!skip_end_space); + b, b_length, + (my_bool)!skip_end_space); return charset_info->coll->strnncoll(charset_info, a, a_length, b, b_length, part_key); } -static int compare_bin(uchar *a, uint a_length, uchar *b, uint b_length, +static int compare_bin(const uchar *a, uint a_length, + const uchar *b, uint b_length, my_bool part_key, my_bool skip_end_space) { uint length= min(a_length,b_length); - uchar *end= a+ length; + const uchar *end= a+ length; int flag; while (a < end) @@ -116,8 +118,8 @@ static int compare_bin(uchar *a, uint a_length, uchar *b, uint b_length, #define FCMP(A,B) ((int) (A) - (int) (B)) -int ha_key_cmp(register HA_KEYSEG *keyseg, register uchar *a, - register uchar *b, uint key_length, uint nextflag, +int ha_key_cmp(register HA_KEYSEG *keyseg, register const uchar *a, + register const uchar *b, uint key_length, uint nextflag, uint *diff_pos) { int flag; @@ -127,12 +129,12 @@ int ha_key_cmp(register HA_KEYSEG *keyseg, register uchar *a, float f_1,f_2; double d_1,d_2; uint next_key_length; - uchar *orig_b= b; + const uchar *orig_b= b; *diff_pos=0; for ( ; (int) key_length >0 ; key_length=next_key_length, keyseg++) { - uchar *end; + const uchar *end; uint piks=! (keyseg->flag & HA_NO_SORT); (*diff_pos)++; diff_pos[1]= (uint)(b - orig_b); @@ -364,7 +366,7 @@ int ha_key_cmp(register HA_KEYSEG *keyseg, register uchar *a, if (keyseg->flag & HA_REVERSE_SORT) { - swap_variables(uchar*, a, b); + swap_variables(const uchar*, a, b); swap_flag=1; /* Remember swap of a & b */ end= a+ (int) (end-b); } @@ -389,7 +391,7 @@ int ha_key_cmp(register HA_KEYSEG *keyseg, register uchar *a, if (*b != '-') return -1; a++; b++; - swap_variables(uchar*, a, b); + swap_variables(const uchar*, a, b); swap_variables(int, alength, blength); swap_flag=1-swap_flag; alength--; blength--; @@ -418,7 +420,7 @@ int ha_key_cmp(register HA_KEYSEG *keyseg, register uchar *a, } if (swap_flag) /* Restore pointers */ - swap_variables(uchar*, a, b); + swap_variables(const uchar*, a, b); break; } #ifdef HAVE_LONG_LONG @@ -604,7 +606,27 @@ static const char *handler_error_messages[]= "Foreign key constraint is incorrectly formed", "Cannot add a child row", "Cannot delete a parent row", - "Unknown handler error" + "No savepoint with that name", + "Non unique key block size", + "The table does not exist in engine", + "The table already existed in storage engine", + "Could not connect to storage engine", + "Unexpected null pointer found when using spatial index", + "The table changed in storage engine", + "There's no partition in table for the given value", + "Row-based binlogging of row failed", + "Index needed in foreign key constraint", + "Upholding foreign key constraints would lead to a duplicate key error in " + "some other table", + "Table needs to be upgraded before it can be used", + "Table is read only", + "Failed to get next auto increment value", + "Failed to set row auto increment value", + "Unknown (generic) error from engine", + "Record is the same", + "It is not possible to log this statement", + "The table is of a new format not supported by this version", + "Got a fatal error during initialzaction of handler" }; -- cgit v1.2.1 From fc0a25ec49f32fd292cb87c8a855e1569ccf8878 Mon Sep 17 00:00:00 2001 From: unknown Date: Fri, 16 Nov 2007 17:09:51 +0100 Subject: WL#3071 Maria checkpoint, WL#3072 Maria recovery instead of fprintf(stderr) when a task (with no user connected) gets an error, use my_printf_error(). Flags ME_JUST_WARNING and ME_JUST_INFO added to my_error()/my_printf_error(), which pass it to my_message_sql() which is modified to call the appropriate sql_print_*(). This way recovery can signal its start and end with [Note] and not [ERROR] (but failure with [ERROR]). Recovery's detailed progress (percents etc) still uses stderr as they have to stay on one single line. sql_print_error() changed to use my_progname_short (nicer display). mysql-test-run.pl --gdb/--ddd does not run mysqld, because a breakpoint in mysql_parse is too late to debug startup problems; instead, dev should set the breakpoints it wants and then "run" ("r"). include/my_sys.h: new flags to tell error_handler_hook that this is not an error but an information or warning mysql-test/mysql-test-run.pl: when running with --gdb/--ddd to debug mysqld, breaking at mysql_parse is too late to debug startup problems; now, it does not run mysqld, does not set breakpoints, developer can set as early breakpoints as it wants and is responsible for typing "run" (or "r") mysys/my_init.c: set my_progname_short mysys/my_static.c: my_progname_short added sql/mysqld.cc: * my_message_sql() can now receive info or warning, not only error; this allows mysys to tell the user (or the error log if no user) about an info or warning. Used from Maria. * plugins (or engines like Maria) may want to call my_error(), so set up the error handler hook (my_message_sql) before initializing plugins; otherwise they get my_message_no_curses which is less integrated into mysqld (is just fputs()) * using my_progname_short instead of my_progname, in my_message_sql() (less space on screen) storage/maria/ma_checkpoint.c: fprintf(stderr) -> ma_message_no_user() storage/maria/ma_checkpoint.h: function for any Maria task, not connected to a user (example: checkpoint, recovery; soon could be deleted records purger) to report a message (calls my_printf_error() which, when inside ha_maria, leads to sql_print_*(), and when outside, leads to my_message_no_curses i.e. stderr). storage/maria/ma_recovery.c: To tell that recovery starts and ends we use ma_message_no_user() (sql_print_*() in practice). Detailed progress info still uses stderr as sql_print() cannot put several messages on one line. 071116 18:42:16 [Note] mysqld: Maria engine: starting recovery recovered pages: 0% 67% 100% (0.0 seconds); transactions to roll back: 1 0 (0.0 seconds); tables to flush: 1 0 (0.0 seconds); 071116 18:42:16 [Note] mysqld: Maria engine: recovery done storage/maria/maria_chk.c: my_progname_short moved to mysys storage/maria/maria_read_log.c: my_progname_short moved to mysys storage/myisam/myisamchk.c: my_progname_short moved to mysys --- mysys/my_init.c | 1 + mysys/my_static.c | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) (limited to 'mysys') diff --git a/mysys/my_init.c b/mysys/my_init.c index eeb511f023e..8ddc6092f79 100644 --- a/mysys/my_init.c +++ b/mysys/my_init.c @@ -78,6 +78,7 @@ my_bool my_init(void) my_umask= 0660; /* Default umask for new files */ my_umask_dir= 0700; /* Default umask for new directories */ init_glob_errs(); + my_progname_short= my_progname + dirname_length(my_progname); #if defined(THREAD) && defined(SAFE_MUTEX) safe_mutex_global_init(); /* Must be called early */ #endif diff --git a/mysys/my_static.c b/mysys/my_static.c index cb482b19b57..ef25a89bad9 100644 --- a/mysys/my_static.c +++ b/mysys/my_static.c @@ -26,7 +26,7 @@ my_bool timed_mutexes= 0; /* from my_init */ char * home_dir=0; -const char *my_progname=0; +const char *my_progname= NULL, *my_progname_short= NULL; char NEAR curr_dir[FN_REFLEN]= {0}, NEAR home_dir_buff[FN_REFLEN]= {0}; ulong my_stream_opened=0,my_file_opened=0, my_tmp_file_created=0; -- cgit v1.2.1 From 4e0964cb040d833351ddd66c00b146b2e93e9fa7 Mon Sep 17 00:00:00 2001 From: unknown Date: Wed, 28 Nov 2007 21:38:30 +0200 Subject: Fixed repair_by_sort to work with BLOCK_RECORD Fixed bugs in undo logging Fixed bug where head block was split before min_row_length (caused Maria to believe row was crashed on read) Reserved place for reference-transid on key pages (for packing of transids) ALTER TABLE and INSERT ... SELECT now uses fast creation of index Known bugs: ma_test_recovery fails because of a bug in redo handling when log is cut directly after a redo (Guilhem knows how to fix) ma_test_recovery.excepted is not totally correct, because of the above bug mysqld sometimes fails to restart; Fails with error "end_of_redo_phase: Assertion `long_trid != 0' failed"; Guilhem to investigate include/maria.h: Prototype changes Added current_filepos to st_maria_sort_info mysql-test/r/maria.result: Updated results that changes as alter table and insert ... select now uses fast creation of index mysys/mf_iocache.c: Reset variable to gurard against double invocation storage/maria/ma_bitmap.c: Added _ma_bitmap_reset_cache() (needed for repair) storage/maria/ma_blockrec.c: Simplify code More initial allocations Fixed bug where head block was split before min_row_length (caused Maria to believe row was crashed on read) storage/maria/ma_blockrec.h: Moved TRANSID_SIZE to maria_def.h Added prototype for new functions storage/maria/ma_check.c: Simplicy code Fixed repair_by_sort to work with BLOCK_RECORD - When using BLOCK_RECORD or UNPACK create new Maria handle - Use common initializer function - Align code with maria_repair() Made some changes to maria_repair_parallel() to use common initializer function Removed ASK_MONTY section by fixing noted problem storage/maria/ma_close.c: Moved check for readonly to _ma_state_info_write() storage/maria/ma_key_recover.c: Use different log entries if key root changes or not. This fixed some bugs when tree grows storage/maria/ma_key_recover.h: Added keynr to st_msg_to_write_hook_for_undo_key storage/maria/ma_loghandler.c: Added INIT_LOGREC_UNDO_KEY_INSERT_WITH_ROOT storage/maria/ma_loghandler.h: Added INIT_LOGREC_UNDO_KEY_INSERT_WITH_ROOT storage/maria/ma_open.c: Added TRANSID to all key pages (for future compressing of trans id's) For compressed records, alloc a bit bigger buffer to avoid valgrind warnings If table is opened readonly, don't update state storage/maria/ma_packrec.c: Allocate bigger array for bit unpacking to avoid valgrind errors storage/maria/ma_recovery.c: Added UNDO_KEY_INSERT_WITH_ROOT & UNDO_KEY_DELETE_WITH_ROOT storage/maria/ma_sort.c: More logging storage/maria/ma_test_all.sh: More tests storage/maria/ma_test_recovery.expected: Update results Note that this is not complete becasue of a bug in recovery storage/maria/ma_test_recovery: Removed recreation of index (not needed when we have redo for index pages) storage/maria/maria_chk.c: When using flag --read-only, don't update status for files When using --unpack, don't use REPAIR_BY_SORT if other repair option is given Enable repair_by_sort for BLOCK records Removed not needed newline at start of --describe storage/maria/maria_def.h: Support for TRANSID_SIZE to key pages storage/maria/maria_read_log.c: renamed --only-display to --display-only --- mysys/mf_iocache.c | 1 + 1 file changed, 1 insertion(+) (limited to 'mysys') diff --git a/mysys/mf_iocache.c b/mysys/mf_iocache.c index 0f49dd22bb9..8d74894305a 100644 --- a/mysys/mf_iocache.c +++ b/mysys/mf_iocache.c @@ -1834,6 +1834,7 @@ int end_io_cache(IO_CACHE *info) pthread_mutex_destroy(&info->append_buffer_lock); #endif } + info->share= 0; DBUG_RETURN(error); } /* end_io_cache */ -- cgit v1.2.1 From 143f35e611ab9fdf51300d0bf458bdbf1f8b7625 Mon Sep 17 00:00:00 2001 From: unknown Date: Thu, 29 Nov 2007 11:37:05 +0200 Subject: Added missing #ifdef --- mysys/mf_iocache.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'mysys') diff --git a/mysys/mf_iocache.c b/mysys/mf_iocache.c index 8d74894305a..1124ebceb2c 100644 --- a/mysys/mf_iocache.c +++ b/mysys/mf_iocache.c @@ -1834,7 +1834,9 @@ int end_io_cache(IO_CACHE *info) pthread_mutex_destroy(&info->append_buffer_lock); #endif } +#ifdef THREAD info->share= 0; +#endif DBUG_RETURN(error); } /* end_io_cache */ -- cgit v1.2.1 From ebf7ab7bce003093745337be43cd6107726aa0fb Mon Sep 17 00:00:00 2001 From: unknown Date: Tue, 4 Dec 2007 23:23:42 +0200 Subject: Added error HA_ERR_FILE_TOO_SHORT to be used when files are shorter than expected (by my_read/my_pread) Added debugger hook _my_dbug_put_break_here() that is called if we get a CRC that matches --debug-crc-break (my_crc_dbug_break) Fixed REDO_REPAIR to use all repair modes (repair, repair_by_sort, repair_paralell REDO_REPAIR now also logs used key map Fixed some bugs in REDO logging of key pages Better error messages from maria_read_log Added my_readwrite_flags to init_pagecache() to be able to get better error messages and simplify code. Don't allow pagecaches with less than 8 blocks (Causes strange crashes) Added EXTRA_DEBUG_KEY_CHANGES. When this is defined some REDO_INDEX entries contains page checksums (these are calculated and checked in DBUG mode, ignored otherwise) Fixed bug in ma_pagecache unit tests that caused program to sometimes fail Added some missing calls to MY_INIT() that caused some unit tests to fail Fixed that TRUNCATE works properly on temporary MyISAM files Updates some result files to new table checksums results (checksum when NULL fields are ignored) perl test-insert can be replayed with maria_read_log! sql/share/Makefile.am: Change mode to -rw-rw-r-- BitKeeper/etc/ignore: added storage/maria/unittest/page_cache_test_file_1 storage/maria/unittest/pagecache_debug.log include/maria.h: Added maria_tmpdir include/my_base.h: Added error HA_ERR_FILE_TOO_SHORT include/my_sys.h: Added variable my_crc_dbug_check Added function my_dbug_put_break_here() include/myisamchk.h: Added org_key_map (Needed for writing REDO record for REPAIR) mysql-test/r/innodb.result: Updated to new checksum algorithm (NULL ignored) mysql-test/r/mix2_myisam.result: Updated to new checksum algorithm (NULL ignored) mysql-test/r/myisam.result: Updated to new checksum algorithm (NULL ignored) mysql-test/t/myisam.test: Added used table mysys/checksum.c: Added DBUG for checksum results Added debugger hook so that _my_dbug_put_break_here() is called if we get matching CRC mysys/lf_alloc-pin.c: Fixed compiler warning mysys/my_handler.c: Added new error message mysys/my_init.c: If my_progname is not given, use 'unknown' form my_progname_short Added debugger function my_debug_put_break_here() mysys/my_pread.c: In case of too short file when MY_NABP or MY_FNABP is specified, give error HA_ERR_FILE_TO_SHORT mysys/my_read.c: In case of too short file when MY_NABP or MY_FNABP is specified, give error HA_ERR_FILE_TO_SHORT sql/mysqld.cc: Added debug option --debug-crc-break sql/sql_parse.cc: Trivial optimization storage/maria/ha_maria.cc: Renamed variable to be more logical Ensure that param.testflag is correct when calling repair Added extra argument to init_pagecache Set default value for maria_tempdir storage/maria/ma_blockrec.c: Test for HA_ERR_FILE_TOO_SHORT instead for -1 storage/maria/ma_cache.c: Test for HA_ERR_FILE_TOO_SHORT instead for -1 storage/maria/ma_check.c: Set param->testflag to match how repair is run (needed for REDO logging) Simple optimization Moved flag if page is node from pagelength to keypage-flag byte Log used key map in REDO log. storage/maria/ma_delete.c: Remember previous UNDO entry when writing undo (for future CLR records) Moved flag if page is node from pagelength to keypage-flag byte Fixed some bugs in redo logging Added CRC for some translog REDO_INDEX entries storage/maria/ma_dynrec.c: Test for HA_ERR_FILE_TOO_SHORT instead for -1 storage/maria/ma_ft_update.c: Fixed call to _ma_store_page_used() storage/maria/ma_key_recover.c: Added CRC for some translog REDO_INDEX entries Removed not needed pagecache_write() in _ma_apply_redo_index() storage/maria/ma_locking.c: Test for HA_ERR_FILE_TOO_SHORT instead for -1 storage/maria/ma_loghandler.c: Added used key map to REDO_REPAIR_TABLE storage/maria/ma_loghandler.h: Added operation for checksum of key pages storage/maria/ma_open.c: Allocate storage for undo lsn pointers storage/maria/ma_pagecache.c: Remove not needed include file Change logging to use fd: for file descritors as other code Added my_readwrite_flags to init_pagecache() to be able to get better error messages for maria_chk/maria_read_log Don't allow pagecaches with less than 8 blocks Remove wrong DBUG_ASSERT() storage/maria/ma_pagecache.h: Added readwrite_flags storage/maria/ma_recovery.c: Better error messages for maria_read_log: - Added eprint() for printing error messages - Print extra \n before error message if we are printing %0 %10 ... Added used key_map to REDO_REPAIR log entry More DBUG Call same repair method that was used by mysqld storage/maria/ma_rt_index.c: Moved flag if page is node from pagelength to keypage-flag byte storage/maria/ma_rt_key.c: Fixed call to _ma_store_page_used() storage/maria/ma_rt_split.c: Moved flag if page is node from pagelength to keypage-flag byte storage/maria/ma_static.c: Added maria_tmpdir storage/maria/ma_test1.c: Updated call to init_pagecache() storage/maria/ma_test2.c: Updated call to init_pagecache() storage/maria/ma_test3.c: Updated call to init_pagecache() storage/maria/ma_write.c: Removed #ifdef NOT_YET Moved flag if page is node from pagelength to keypage-flag byte Fixed bug in _ma_log_del_prefix() storage/maria/maria_chk.c: Fixed wrong min limit for page_buffer_size Updated call to init_pagecache() storage/maria/maria_def.h: Added EXTRA_DEBUG_KEY_CHANGES. When this is defined some REDO_INDEX entries contains page checksums Moved flag if page is node from pagelength to keypage-flag byte storage/maria/maria_ftdump.c: Updated call to init_pagecache() storage/maria/maria_pack.c: Updated call to init_pagecache() Reset share->state.create_rename_lsn & share->state.is_of_horizon storage/maria/maria_read_log.c: Better error messages Added --tmpdir option (needed to set temporary directory for REDO_REPAIR) Added --start-from-lsn Changed option for --display-only to 'd' (wanted to use -o for 'offset') storage/maria/unittest/lockman2-t.c: Added missing call to MY_INIT() storage/maria/unittest/ma_pagecache_consist.c: Updated call to init_pagecache() storage/maria/unittest/ma_pagecache_single.c: Fixed bug that caused program to sometimes fail Added some DBUG_ASSERTS() Changed some calls to malloc()/free() to my_malloc()/my_free() Create extra file to expose original hard-to-find bug storage/maria/unittest/ma_test_loghandler-t.c: Updated call to init_pagecache() storage/maria/unittest/ma_test_loghandler_first_lsn-t.c: Updated call to init_pagecache() storage/maria/unittest/ma_test_loghandler_max_lsn-t.c: Updated call to init_pagecache() storage/maria/unittest/ma_test_loghandler_multigroup-t.c: Updated call to init_pagecache() storage/maria/unittest/ma_test_loghandler_multithread-t.c: Updated call to init_pagecache() storage/maria/unittest/ma_test_loghandler_noflush-t.c: Updated call to init_pagecache() storage/maria/unittest/ma_test_loghandler_pagecache-t.c: Updated call to init_pagecache() storage/maria/unittest/ma_test_loghandler_purge-t.c: Updated call to init_pagecache() storage/maria/unittest/test_file.c: Changed malloc()/free() to my_malloc()/my_free() Fixed memory leak Changd logic a bit while trying to find bug in reset_file() storage/maria/unittest/trnman-t.c: Added missing call to MY_INIT() storage/myisam/mi_cache.c: Test for HA_ERR_FILE_TOO_SHORT instead for -1 storage/myisam/mi_create.c: Removed O_EXCL to get TRUNCATE to work for temporary files storage/myisam/mi_dynrec.c: Test for HA_ERR_FILE_TOO_SHORT instead for -1 storage/myisam/mi_locking.c: Test for HA_ERR_FILE_TOO_SHORT instead for -1 mysql-test/r/old-mode.result: New BitKeeper file ``mysql-test/r/old-mode.result'' mysql-test/t/old-mode-master.opt: New BitKeeper file ``mysql-test/t/old-mode-master.opt'' mysql-test/t/old-mode.test: New BitKeeper file ``mysql-test/t/old-mode.test'' --- mysys/checksum.c | 12 +++++++++--- mysys/lf_alloc-pin.c | 1 + mysys/my_handler.c | 3 ++- mysys/my_init.c | 12 +++++++++++- mysys/my_pread.c | 7 ++++++- mysys/my_read.c | 6 ++++-- 6 files changed, 33 insertions(+), 8 deletions(-) (limited to 'mysys') diff --git a/mysys/checksum.c b/mysys/checksum.c index 4f86f6845f0..0cc9801c2b1 100644 --- a/mysys/checksum.c +++ b/mysys/checksum.c @@ -18,6 +18,8 @@ #include #include +ha_checksum my_crc_dbug_check= 1; /* Unlikely number */ + /* Calculate a long checksum for a memoryblock. @@ -34,9 +36,13 @@ ha_checksum my_checksum(ha_checksum crc, const uchar *pos, size_t length) const uchar *end=pos+length; for ( ; pos != end ; pos++) crc=((crc << 8) + *((uchar*) pos)) + (crc >> (8*sizeof(ha_checksum)-8)); - return crc; #else - return (ha_checksum)crc32((uint)crc, pos, length); + crc= (ha_checksum) crc32((uint)crc, pos, length); +#endif /* NOT_USED */ + DBUG_PRINT("info", ("crc: %lu", (ulong) crc)); +#ifndef DBUG_OFF + if (crc == my_crc_dbug_check) + my_debug_put_break_here(); #endif + return crc; } - diff --git a/mysys/lf_alloc-pin.c b/mysys/lf_alloc-pin.c index a847d722023..ff9c5a42f81 100644 --- a/mysys/lf_alloc-pin.c +++ b/mysys/lf_alloc-pin.c @@ -333,6 +333,7 @@ static void _lf_pinbox_real_free(LF_PINS *pins) struct st_lf_alloc_node *first, *last= NULL; LF_PINBOX *pinbox= pins->pinbox; + LINT_INIT(first); npins= pinbox->pins_in_array+1; #ifdef HAVE_ALLOCA diff --git a/mysys/my_handler.c b/mysys/my_handler.c index f7cf4f310d7..2b1c91a43e2 100644 --- a/mysys/my_handler.c +++ b/mysys/my_handler.c @@ -626,7 +626,8 @@ static const char *handler_error_messages[]= "Record is the same", "It is not possible to log this statement", "The table is of a new format not supported by this version", - "Got a fatal error during initialzaction of handler" + "Got a fatal error during initialzaction of handler", + "File to short; Expected more data in file" }; diff --git a/mysys/my_init.c b/mysys/my_init.c index 8ddc6092f79..850333e0100 100644 --- a/mysys/my_init.c +++ b/mysys/my_init.c @@ -78,7 +78,10 @@ my_bool my_init(void) my_umask= 0660; /* Default umask for new files */ my_umask_dir= 0700; /* Default umask for new directories */ init_glob_errs(); - my_progname_short= my_progname + dirname_length(my_progname); + my_progname_short= "unknown"; + if (my_progname) + my_progname_short= my_progname + dirname_length(my_progname); + #if defined(THREAD) && defined(SAFE_MUTEX) safe_mutex_global_init(); /* Must be called early */ #endif @@ -233,6 +236,13 @@ Voluntary context switches %ld, Involuntary context switches %ld\n", my_init_done=0; } /* my_end */ +#ifndef DBUG_OFF +/* Dummy tag function for debugging */ + +void my_debug_put_break_here(void) +{ +} +#endif #ifdef __WIN__ diff --git a/mysys/my_pread.c b/mysys/my_pread.c index de7a2b611ed..821d8636d8e 100644 --- a/mysys/my_pread.c +++ b/mysys/my_pread.c @@ -15,6 +15,7 @@ #include "mysys_priv.h" #include "mysys_err.h" +#include "my_base.h" #include #ifdef HAVE_PREAD #include @@ -63,7 +64,11 @@ size_t my_pread(File Filedes, uchar *Buffer, size_t Count, my_off_t offset, pthread_mutex_unlock(&my_file_info[Filedes].mutex); #else if ((error= ((readbytes= pread(Filedes, Buffer, Count, offset)) != Count))) - my_errno= errno ? errno : -1; + { + my_errno= errno; + if (errno == 0 || (errno == -1 && (MyFlags & (MY_NABP | MY_FNABP)))) + my_errno= HA_ERR_FILE_TOO_SHORT; + } #endif if (error || readbytes != Count) { diff --git a/mysys/my_read.c b/mysys/my_read.c index f3e8a4b300e..ee91620e163 100644 --- a/mysys/my_read.c +++ b/mysys/my_read.c @@ -15,9 +15,9 @@ #include "mysys_priv.h" #include "mysys_err.h" +#include #include - /* Read a chunk of bytes from a file with retry's if needed @@ -46,7 +46,9 @@ size_t my_read(File Filedes, uchar *Buffer, size_t Count, myf MyFlags) errno= 0; /* Linux doesn't reset this */ if ((readbytes= read(Filedes, Buffer, Count)) != Count) { - my_errno= errno ? errno : -1; + my_errno= errno; + if (errno == 0 || (errno == -1 && (MyFlags & (MY_NABP | MY_FNABP)))) + my_errno= HA_ERR_FILE_TOO_SHORT; DBUG_PRINT("warning",("Read only %d bytes off %lu from %d, errno: %d", (int) readbytes, (ulong) Count, Filedes, my_errno)); -- cgit v1.2.1 From 2f6f08ed8862122a2f2d2b199e516fe5795acbbc Mon Sep 17 00:00:00 2001 From: unknown Date: Mon, 10 Dec 2007 02:32:00 +0200 Subject: Added MARIA_SHARE *share to a lot of places to make code simpler Changed info->s -> share to get more efficent code Updated arguments to page accessor functions to use MARIA_SHARE * instead of MARIA_HA *. Tested running tests in quick mode (no balance page on insert and only when critical on delete) Fixed bug in underflow handling in quick mode Fixed bug in log handler where it accessed not initialized variable Fixed bug in log handler where it didn't free mutex in unlikely error condition Removed double write of page in case of of some underflow conditions Added DBUG_PRINT in safemutex lock/unlock dbug/dbug.c: Compile without SAFE_MUTEX (to be able to use DBUG_PRINT in safe_mutex code) Use calls to get/set my_thread_var->dbug. (Make dbug independent of compile time options for mysys) include/my_pthread.h: Added prototypes for my_thread_var_get_dbug() & my_thread_var_set_dbug() mysql-test/lib/mtr_report.pl: Don't check warnings in log files if we are using --extern mysys/my_thr_init.c: Added my_thread_var_get_dbug() & my_thread_var_set_dbug() mysys/thr_mutex.c: Added DBUG printing of addresses to mutex for lock/unlock storage/maria/ma_blockrec.c: Fixed comment storage/maria/ma_check.c: Added MARIA_SHARE *share to a lot of places to make code simpler info->s -> share Updated arguments to page accessor functions storage/maria/ma_close.c: Indentation fixes storage/maria/ma_create.c: Calculate min_key_length correctly storage/maria/ma_dbug.c: Indentation fixes storage/maria/ma_delete.c: Added MARIA_SHARE *share to a lot of places to make code simpler info->s -> share Updated arguments to page accessor functions Removed some writing of key pages that underflow (will be written by caller) Fixed crashing bug in underflow handling when using quick mode storage/maria/ma_delete_all.c: Indentation fixes storage/maria/ma_dynrec.c: Indentation fixes storage/maria/ma_extra.c: Fixed indentation Removed old useless code Reset share->changed if we have written state storage/maria/ma_ft_update.c: Added MARIA_SHARE *share to a lot of places to make code simpler info->s -> share Updated arguments to page accessor functions storage/maria/ma_info.c: Indentation fixes storage/maria/ma_key_recover.c: Added MARIA_SHARE *share to a lot of places to make code simpler info->s -> share Updated arguments to page accessor functions storage/maria/ma_locking.c: Indentation fixes storage/maria/ma_loghandler.c: Removed wrapper functions translog_mutex_lock and translog_mutex_unlock (safemutex now does same kind of printing) Renamed LOGREC_REDO_INSERT_ROW_BLOB to LOGREC_REDO_INSERT_NOT_USED to mark it free Fixed some DBUG_PRINT to ensure that convert-dbug-for-diff works Fixed bug in translog_flush() that caused log to stop syncing to disk Added missing mutex_unlock in case of error storage/maria/ma_loghandler.h: Renamed LOGREC_REDO_INSERT_ROW_BLOB to LOGREC_REDO_INSERT_NOT_USED to mark it free storage/maria/ma_open.c: Indentation fixes storage/maria/ma_packrec.c: Indentation fixes storage/maria/ma_page.c: Added MARIA_SHARE *share to a lot of places to make code simpler info->s -> share Updated arguments to page accessor functions Added check that we never write a key page without content (except in recovery where a key page may temporary be without content) storage/maria/ma_preload.c: Updated arguments to page accessor functions storage/maria/ma_range.c: Updated arguments to page accessor functions storage/maria/ma_rkey.c: Indentation fixes storage/maria/ma_rprev.c: Indentation fixes storage/maria/ma_rt_index.c: Added MARIA_SHARE *share to a lot of places to make code simpler info->s -> share Updated arguments to page accessor functions storage/maria/ma_rt_index.h: Updated arguments to page accessor functions storage/maria/ma_rt_key.c: Added MARIA_SHARE *share to a lot of places to make code simpler info->s -> share Updated arguments to page accessor functions storage/maria/ma_rt_mbr.c: Added MARIA_SHARE *share to a lot of places to make code simpler info->s -> share Updated arguments to page accessor functions storage/maria/ma_rt_split.c: Added MARIA_SHARE *share to a lot of places to make code simpler info->s -> share Updated arguments to page accessor functions storage/maria/ma_search.c: Added MARIA_SHARE *share to a lot of places to make code simpler info->s -> share Updated arguments to page accessor functions storage/maria/ma_sort.c: Indentation fixes storage/maria/ma_statrec.c: Indentation fixes storage/maria/ma_test1.c: Added extra undo test Flush also keys in -u1, to ensure that the full log is flushed storage/maria/ma_test2.c: Added extra undo test Flush also keys in -u1, to ensure that the full log is flushed storage/maria/ma_test_recovery.expected: Updated results storage/maria/ma_test_recovery: Added extra undo test storage/maria/ma_update.c: Indentation fixes storage/maria/ma_write.c: Added MARIA_SHARE *share to a lot of places to make code simpler info->s -> share Updated arguments to page accessor functions Prepare for quick mode for insert (don't balance page) storage/maria/maria_chk.c: Added MARIA_SHARE *share to a lot of places to make code simpler info->s -> share Updated arguments to page accessor functions storage/maria/maria_def.h: Updated arguments to page accessor functions --- mysys/my_thr_init.c | 31 +++++++++++++++++++++++++++++-- mysys/thr_mutex.c | 12 ++++++++++++ 2 files changed, 41 insertions(+), 2 deletions(-) (limited to 'mysys') diff --git a/mysys/my_thr_init.c b/mysys/my_thr_init.c index 1ba6e5ac92d..da61d32d35c 100644 --- a/mysys/my_thr_init.c +++ b/mysys/my_thr_init.c @@ -347,6 +347,9 @@ void my_thread_end(void) tmp->init= 0; #endif +#if !defined(__WIN__) || defined(USE_TLS) + pthread_setspecific(THR_KEY_mysys,0); +#endif /* Decrement counter for number of running threads. We are using this in my_thread_global_end() to wait until all threads have called @@ -359,10 +362,12 @@ void my_thread_end(void) pthread_cond_signal(&THR_COND_threads); pthread_mutex_unlock(&THR_LOCK_threads); } - /* The following free has to be done, even if my_thread_var() is 0 */ + else + { #if !defined(__WIN__) || defined(USE_TLS) - pthread_setspecific(THR_KEY_mysys,0); + pthread_setspecific(THR_KEY_mysys,0); #endif + } } struct st_my_thread_var *_my_thread_var(void) @@ -380,6 +385,28 @@ struct st_my_thread_var *_my_thread_var(void) return tmp; } +extern void *my_thread_var_get_dbug(my_bool *error) +{ + struct st_my_thread_var *tmp= + my_pthread_getspecific(struct st_my_thread_var*,THR_KEY_mysys); + my_bool tmp_error; + if (!error) + error= &tmp_error; + if (tmp) + { + *error= 0; + return tmp->dbug; + } + *error= 1; /* no THR_KEY_mysys */ + return (void*) 0; +} + +extern void my_thread_var_set_dbug(void *dbug) +{ + struct st_my_thread_var *tmp= _my_thread_var(); + tmp->dbug= dbug; +} + /**************************************************************************** Get name of current thread. diff --git a/mysys/thr_mutex.c b/mysys/thr_mutex.c index e7a927e562a..839cbc133df 100644 --- a/mysys/thr_mutex.c +++ b/mysys/thr_mutex.c @@ -94,6 +94,10 @@ int safe_mutex_init(safe_mutex_t *mp, int safe_mutex_lock(safe_mutex_t *mp,const char *file, uint line) { int error; +#ifndef DBUG_OFF + if (my_thread_var_get_dbug((my_bool*) 0)) + DBUG_PRINT("mutex", ("Locking mutex: 0x%lx", (ulong) mp)); +#endif if (!mp->file) { fprintf(stderr, @@ -131,6 +135,10 @@ line %d more than 1 time\n", file,line); mp->file= file; mp->line=line; pthread_mutex_unlock(&mp->global); +#ifndef DBUG_OFF + if (my_thread_var_get_dbug((my_bool*) 0)) + DBUG_PRINT("mutex", ("mutex: 0x%lx locked", (ulong) mp)); +#endif return error; } @@ -138,6 +146,10 @@ line %d more than 1 time\n", file,line); int safe_mutex_unlock(safe_mutex_t *mp,const char *file, uint line) { int error; +#ifndef DBUG_OFF + if (my_thread_var_get_dbug((my_bool*) 0)) + DBUG_PRINT("mutex", ("Unlocking mutex 0x%lx", (ulong) mp)); +#endif pthread_mutex_lock(&mp->global); if (mp->count == 0) { -- cgit v1.2.1 From abe1031ea86eab1378f46351ae516fc3952e919e Mon Sep 17 00:00:00 2001 From: unknown Date: Mon, 10 Dec 2007 03:37:29 +0200 Subject: Fixed some compiler errors mysys/my_thr_init.c: Added missing DBUG_OFF storage/maria/ma_loghandler.c: Fixed wrong macro --- mysys/my_thr_init.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'mysys') diff --git a/mysys/my_thr_init.c b/mysys/my_thr_init.c index da61d32d35c..bdf998f4c10 100644 --- a/mysys/my_thr_init.c +++ b/mysys/my_thr_init.c @@ -385,6 +385,8 @@ struct st_my_thread_var *_my_thread_var(void) return tmp; } +#ifndef DBUG_OFF + extern void *my_thread_var_get_dbug(my_bool *error) { struct st_my_thread_var *tmp= @@ -406,7 +408,7 @@ extern void my_thread_var_set_dbug(void *dbug) struct st_my_thread_var *tmp= _my_thread_var(); tmp->dbug= dbug; } - +#endif /**************************************************************************** Get name of current thread. -- cgit v1.2.1 From e8b0bb4769504c04f444c31f99d5a3d565ef9b39 Mon Sep 17 00:00:00 2001 From: unknown Date: Mon, 10 Dec 2007 14:21:45 +0200 Subject: Fixed bug in allocation of dynamic record buffer in Maria Unified printing of mutex addresses to make them easier to compare mysys/thr_mutex.c: Unified printing of mutex addresses to make them easier to compare storage/maria/ma_dynrec.c: Fixed indentation storage/maria/ma_open.c: Fixed bug in allocation of dynamic record buffer --- mysys/thr_mutex.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'mysys') diff --git a/mysys/thr_mutex.c b/mysys/thr_mutex.c index 839cbc133df..48d61a48c62 100644 --- a/mysys/thr_mutex.c +++ b/mysys/thr_mutex.c @@ -96,7 +96,7 @@ int safe_mutex_lock(safe_mutex_t *mp,const char *file, uint line) int error; #ifndef DBUG_OFF if (my_thread_var_get_dbug((my_bool*) 0)) - DBUG_PRINT("mutex", ("Locking mutex: 0x%lx", (ulong) mp)); + DBUG_PRINT("mutex", ("0x%lx locking", (ulong) mp)); #endif if (!mp->file) { @@ -137,7 +137,7 @@ line %d more than 1 time\n", file,line); pthread_mutex_unlock(&mp->global); #ifndef DBUG_OFF if (my_thread_var_get_dbug((my_bool*) 0)) - DBUG_PRINT("mutex", ("mutex: 0x%lx locked", (ulong) mp)); + DBUG_PRINT("mutex", ("0x%lx locked", (ulong) mp)); #endif return error; } @@ -148,7 +148,7 @@ int safe_mutex_unlock(safe_mutex_t *mp,const char *file, uint line) int error; #ifndef DBUG_OFF if (my_thread_var_get_dbug((my_bool*) 0)) - DBUG_PRINT("mutex", ("Unlocking mutex 0x%lx", (ulong) mp)); + DBUG_PRINT("mutex", ("0x%lx unlocking", (ulong) mp)); #endif pthread_mutex_lock(&mp->global); if (mp->count == 0) -- cgit v1.2.1 From 8224c76fbd1570d0afac76ee25e120bc5544cd7d Mon Sep 17 00:00:00 2001 From: unknown Date: Wed, 12 Dec 2007 23:57:28 +0200 Subject: Removed MARIA_BASE min_row_length (duplicate of min_block_length) Cleanup of recent code changes in dbug and my_thr_init Added name for each safe_mutex (for better DBUG and error reporting) Fixed that sort_info.max_records is calculated correctly. This fixed a bug in maria_chk Removed duplicate printing of mutex address in dbug log dbug/dbug.c: Cleanup of recent code changes include/my_pthread.h: Added name for each safe_mutex (for better DBUG and error reporting) mysys/my_thr_init.c: Cleanup of recent code changes mysys/thr_mutex.c: Added name for each safe_mutex (for better DBUG and error reporting) mysys/wqueue.c: Removed some mutex printing (as it's done now when we take mutex) storage/maria/Makefile.am: Fixed that 'make tags' works with xemacs storage/maria/ma_blockrec.c: base.min_row_length -> base.min_block_length (As they where basicly the same variable) storage/maria/ma_check.c: Moved more common stuff to initialize_variables_for_repair Fixed that sort_info.max_records is calculated correctly. This fixed a bug in maria_chk storage/maria/ma_create.c: More comments Fixed that min_pack_length is calculated more correctly Removed duplicate variable base.min_row_length storage/maria/ma_loghandler.c: Removed duplicate printing of mutex address storage/maria/ma_open.c: Removed base.min_row_length storage/maria/ma_packrec.c: Removed not anymore needed code (One should not change any .base variables as this will affect repair with unpack) storage/maria/maria_def.h: Removed base.min_row_length --- mysys/my_thr_init.c | 20 ++---------- mysys/thr_mutex.c | 90 +++++++++++++++++++++++++++++------------------------ mysys/wqueue.c | 12 ++++--- 3 files changed, 59 insertions(+), 63 deletions(-) (limited to 'mysys') diff --git a/mysys/my_thr_init.c b/mysys/my_thr_init.c index bdf998f4c10..c9ce6ab169f 100644 --- a/mysys/my_thr_init.c +++ b/mysys/my_thr_init.c @@ -386,27 +386,13 @@ struct st_my_thread_var *_my_thread_var(void) } #ifndef DBUG_OFF +/* Return pointer to DBUG for holding current state */ -extern void *my_thread_var_get_dbug(my_bool *error) +extern void **my_thread_var_dbug() { struct st_my_thread_var *tmp= my_pthread_getspecific(struct st_my_thread_var*,THR_KEY_mysys); - my_bool tmp_error; - if (!error) - error= &tmp_error; - if (tmp) - { - *error= 0; - return tmp->dbug; - } - *error= 1; /* no THR_KEY_mysys */ - return (void*) 0; -} - -extern void my_thread_var_set_dbug(void *dbug) -{ - struct st_my_thread_var *tmp= _my_thread_var(); - tmp->dbug= dbug; + return tmp ? &tmp->dbug : 0; } #endif diff --git a/mysys/thr_mutex.c b/mysys/thr_mutex.c index 48d61a48c62..015061900e5 100644 --- a/mysys/thr_mutex.c +++ b/mysys/thr_mutex.c @@ -54,7 +54,7 @@ void safe_mutex_global_init(void) int safe_mutex_init(safe_mutex_t *mp, const pthread_mutexattr_t *attr __attribute__((unused)), const char *file, - uint line) + uint line, const char *name) { bzero((char*) mp,sizeof(*mp)); pthread_mutex_init(&mp->global,MY_MUTEX_INIT_ERRCHK); @@ -62,6 +62,8 @@ int safe_mutex_init(safe_mutex_t *mp, /* Mark that mutex is initialized */ mp->file= file; mp->line= line; + /* Skip the very common '&' prefix from the autogenerated name */ + mp->name= name[0] == '&' ? name + 1 : name; #ifdef SAFE_MUTEX_DETECT_DESTROY /* @@ -94,10 +96,8 @@ int safe_mutex_init(safe_mutex_t *mp, int safe_mutex_lock(safe_mutex_t *mp,const char *file, uint line) { int error; -#ifndef DBUG_OFF - if (my_thread_var_get_dbug((my_bool*) 0)) - DBUG_PRINT("mutex", ("0x%lx locking", (ulong) mp)); -#endif + DBUG_PRINT("mutex", ("%s (0x%lx) locking", mp->name ? mp->name : "Null", + (ulong) mp)); if (!mp->file) { fprintf(stderr, @@ -110,8 +110,8 @@ int safe_mutex_lock(safe_mutex_t *mp,const char *file, uint line) pthread_mutex_lock(&mp->global); if (mp->count > 0 && pthread_equal(pthread_self(),mp->thread)) { - fprintf(stderr,"safe_mutex: Trying to lock mutex at %s, line %d, when the mutex was already locked at %s, line %d in thread %s\n", - file,line,mp->file, mp->line, my_thread_name()); + fprintf(stderr,"safe_mutex: Trying to lock mutex %s at %s, line %d, when the mutex was already locked at %s, line %d in thread %s\n", + mp->name, file,line,mp->file, mp->line, my_thread_name()); fflush(stderr); abort(); } @@ -119,26 +119,23 @@ int safe_mutex_lock(safe_mutex_t *mp,const char *file, uint line) error=pthread_mutex_lock(&mp->mutex); if (error || (error=pthread_mutex_lock(&mp->global))) { - fprintf(stderr,"Got error %d when trying to lock mutex at %s, line %d\n", - error, file, line); + fprintf(stderr,"Got error %d when trying to lock mutex %s at %s, line %d\n", + error, mp->name, file, line); fflush(stderr); abort(); } mp->thread= pthread_self(); if (mp->count++) { - fprintf(stderr,"safe_mutex: Error in thread libray: Got mutex at %s, \ -line %d more than 1 time\n", file,line); + fprintf(stderr,"safe_mutex: Error in thread libray: Got mutex %s at %s, " + "line %d more than 1 time\n", mp->name, file,line); fflush(stderr); abort(); } mp->file= file; - mp->line=line; + mp->line= line; pthread_mutex_unlock(&mp->global); -#ifndef DBUG_OFF - if (my_thread_var_get_dbug((my_bool*) 0)) - DBUG_PRINT("mutex", ("0x%lx locked", (ulong) mp)); -#endif + DBUG_PRINT("mutex", ("%s (0x%lx) locked", mp->name, (ulong) mp)); return error; } @@ -146,22 +143,22 @@ line %d more than 1 time\n", file,line); int safe_mutex_unlock(safe_mutex_t *mp,const char *file, uint line) { int error; -#ifndef DBUG_OFF - if (my_thread_var_get_dbug((my_bool*) 0)) - DBUG_PRINT("mutex", ("0x%lx unlocking", (ulong) mp)); -#endif + DBUG_PRINT("mutex", ("%s (0x%lx) unlocking", mp->name, (ulong) mp)); pthread_mutex_lock(&mp->global); if (mp->count == 0) { - fprintf(stderr,"safe_mutex: Trying to unlock mutex that wasn't locked at %s, line %d\n Last used at %s, line: %d\n", - file,line,mp->file ? mp->file : "",mp->line); + fprintf(stderr,"safe_mutex: Trying to unlock mutex %s that wasn't locked at %s, line %d\n" + "Last used at %s, line: %d\n", + mp->name ? mp->name : "Null", file, line, + mp->file ? mp->file : "Null", mp->line); fflush(stderr); abort(); } if (!pthread_equal(pthread_self(),mp->thread)) { - fprintf(stderr,"safe_mutex: Trying to unlock mutex at %s, line %d that was locked by another thread at: %s, line: %d\n", - file,line,mp->file,mp->line); + fprintf(stderr,"safe_mutex: Trying to unlock mutex %s at %s, line %d that was locked by " + "another thread at: %s, line: %d\n", + mp->name, file, line, mp->file, mp->line); fflush(stderr); abort(); } @@ -174,7 +171,8 @@ int safe_mutex_unlock(safe_mutex_t *mp,const char *file, uint line) error=pthread_mutex_unlock(&mp->mutex); if (error) { - fprintf(stderr,"safe_mutex: Got error: %d (%d) when trying to unlock mutex at %s, line %d\n", error, errno, file, line); + fprintf(stderr,"safe_mutex: Got error: %d (%d) when trying to unlock mutex %s at %s, " + "line %d\n", error, errno, mp->name, file, line); fflush(stderr); abort(); } @@ -191,22 +189,24 @@ int safe_cond_wait(pthread_cond_t *cond, safe_mutex_t *mp, const char *file, pthread_mutex_lock(&mp->global); if (mp->count == 0) { - fprintf(stderr,"safe_mutex: Trying to cond_wait on a unlocked mutex at %s, line %d\n",file,line); + fprintf(stderr,"safe_mutex: Trying to cond_wait on a unlocked mutex %s at %s, line %d\n", + mp->name ? mp->name : "Null", file, line); fflush(stderr); abort(); } if (!pthread_equal(pthread_self(),mp->thread)) { - fprintf(stderr,"safe_mutex: Trying to cond_wait on a mutex at %s, line %d that was locked by another thread at: %s, line: %d\n", - file,line,mp->file,mp->line); + fprintf(stderr,"safe_mutex: Trying to cond_wait on a mutex %s at %s, line %d that was " + "locked by another thread at: %s, line: %d\n", + mp->name, file, line, mp->file, mp->line); fflush(stderr); abort(); } if (mp->count-- != 1) { - fprintf(stderr,"safe_mutex: Count was %d on locked mutex at %s, line %d\n", - mp->count+1, file, line); + fprintf(stderr,"safe_mutex: Count was %d on locked mutex %s at %s, line %d\n", + mp->count+1, mp->name, file, line); fflush(stderr); abort(); } @@ -215,7 +215,8 @@ int safe_cond_wait(pthread_cond_t *cond, safe_mutex_t *mp, const char *file, pthread_mutex_lock(&mp->global); if (error) { - fprintf(stderr,"safe_mutex: Got error: %d (%d) when doing a safe_mutex_wait at %s, line %d\n", error, errno, file, line); + fprintf(stderr,"safe_mutex: Got error: %d (%d) when doing a safe_mutex_wait on %s at %s, " + "line %d\n", error, errno, mp->name, file, line); fflush(stderr); abort(); } @@ -223,8 +224,8 @@ int safe_cond_wait(pthread_cond_t *cond, safe_mutex_t *mp, const char *file, if (mp->count++) { fprintf(stderr, - "safe_mutex: Count was %d in thread 0x%lx when locking mutex at %s, line %d\n", - mp->count-1, my_thread_dbug_id(), file, line); + "safe_mutex: Count was %d in thread 0x%lx when locking mutex %s at %s, line %d\n", + mp->count-1, my_thread_dbug_id(), mp->name, file, line); fflush(stderr); abort(); } @@ -243,7 +244,8 @@ int safe_cond_timedwait(pthread_cond_t *cond, safe_mutex_t *mp, pthread_mutex_lock(&mp->global); if (mp->count != 1 || !pthread_equal(pthread_self(),mp->thread)) { - fprintf(stderr,"safe_mutex: Trying to cond_wait at %s, line %d on a not hold mutex\n",file,line); + fprintf(stderr,"safe_mutex: Trying to cond_wait at %s, line %d on a not hold mutex %s\n", + file, line, mp->name ? mp->name : "Null"); fflush(stderr); abort(); } @@ -253,7 +255,10 @@ int safe_cond_timedwait(pthread_cond_t *cond, safe_mutex_t *mp, #ifdef EXTRA_DEBUG if (error && (error != EINTR && error != ETIMEDOUT && error != ETIME)) { - fprintf(stderr,"safe_mutex: Got error: %d (%d) when doing a safe_mutex_timedwait at %s, line %d\n", error, errno, file, line); + fprintf(stderr, + "safe_mutex: Got error: %d (%d) when doing a safe_mutex_timedwait on %s at %s, " + "line %d\n", + error, errno, mp->name, file, line); } #endif pthread_mutex_lock(&mp->global); @@ -261,8 +266,10 @@ int safe_cond_timedwait(pthread_cond_t *cond, safe_mutex_t *mp, if (mp->count++) { fprintf(stderr, - "safe_mutex: Count was %d in thread 0x%lx when locking mutex at %s, line %d (error: %d (%d))\n", - mp->count-1, my_thread_dbug_id(), file, line, error, error); + "safe_mutex: Count was %d in thread 0x%lx when locking mutex %s at %s, line %d " + "(error: %d (%d))\n", + mp->count-1, my_thread_dbug_id(), mp->name, file, line, + error, error); fflush(stderr); abort(); } @@ -286,8 +293,9 @@ int safe_mutex_destroy(safe_mutex_t *mp, const char *file, uint line) } if (mp->count != 0) { - fprintf(stderr,"safe_mutex: Trying to destroy a mutex that was locked at %s, line %d at %s, line %d\n", - mp->file,mp->line, file, line); + fprintf(stderr,"safe_mutex: Trying to destroy a mutex %s that was locked at %s, " + "line %d at %s, line %d\n", + mp->name, mp->file, mp->line, file, line); fflush(stderr); abort(); } @@ -359,8 +367,8 @@ void safe_mutex_end(FILE *file __attribute__((unused))) struct st_safe_mutex_info_t *ptr; for (ptr= safe_mutex_root ; ptr ; ptr= ptr->next) { - fprintf(file, "\tMutex initiated at line %4u in '%s'\n", - ptr->init_line, ptr->init_file); + fprintf(file, "\tMutex %s initiated at line %4u in '%s'\n", + ptr->name, ptr->init_line, ptr->init_file); (void) fflush(file); } } diff --git a/mysys/wqueue.c b/mysys/wqueue.c index 28e044ff606..bfe9cba1235 100644 --- a/mysys/wqueue.c +++ b/mysys/wqueue.c @@ -147,18 +147,20 @@ void wqueue_release_queue(WQUEUE *wqueue) */ void wqueue_add_and_wait(WQUEUE *wqueue, - struct st_my_thread_var *thread, pthread_mutex_t *lock) + struct st_my_thread_var *thread, + pthread_mutex_t *lock) { DBUG_ENTER("wqueue_add_and_wait"); - DBUG_PRINT("enter", ("thread ox%lxcond 0x%lx, mutex 0x%lx", - (ulong) thread, (ulong) &thread->suspend, (ulong) lock)); + DBUG_PRINT("enter", + ("thread: 0x%lx cond: 0x%lx mutex: 0x%lx", + (ulong) thread, (ulong) &thread->suspend, (ulong) lock)); wqueue_add_to_queue(wqueue, thread); do { - DBUG_PRINT("info", ("wait... cond 0x%lx, mutex 0x%lx", + DBUG_PRINT("info", ("wait... cond: 0x%lx mutex: 0x%lx", (ulong) &thread->suspend, (ulong) lock)); pthread_cond_wait(&thread->suspend, lock); - DBUG_PRINT("info", ("wait done cond 0x%lx, mutex 0x%lx, next 0x%lx", + DBUG_PRINT("info", ("wait done cond: 0x%lx mutex: 0x%lx next: 0x%lx", (ulong) &thread->suspend, (ulong) lock, (ulong) thread->next)); } -- cgit v1.2.1 From 01ea6c1e7fb4687f9af5a1ce0d0bd3b1935bfece Mon Sep 17 00:00:00 2001 From: unknown Date: Thu, 13 Dec 2007 19:25:01 +0100 Subject: fixes for windows builds --- mysys/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'mysys') diff --git a/mysys/CMakeLists.txt b/mysys/CMakeLists.txt index 6b24165686a..2ec871535da 100755 --- a/mysys/CMakeLists.txt +++ b/mysys/CMakeLists.txt @@ -39,7 +39,7 @@ SET(MYSYS_SOURCES array.c charset-def.c charset.c checksum.c default.c default_ my_mkdir.c my_mmap.c my_net.c my_once.c my_open.c my_pread.c my_pthread.c my_quick.c my_read.c my_realloc.c my_redel.c my_rename.c my_seek.c my_sleep.c my_static.c my_symlink.c my_symlink2.c my_sync.c my_thr_init.c my_wincond.c - my_windac.c my_winthread.c my_write.c ptr_cmp.c queues.c + my_windac.c my_winthread.c my_write.c ptr_cmp.c queues.c rijndael.c safemalloc.c sha1.c string.c thr_alarm.c thr_lock.c thr_mutex.c thr_rwlock.c tree.c typelib.c my_vle.c base64.c my_memmem.c my_getpagesize.c) -- cgit v1.2.1 From f970477b36ea7624f69328abcc8118c9a7ae26cd Mon Sep 17 00:00:00 2001 From: unknown Date: Fri, 14 Dec 2007 09:47:00 +0100 Subject: windows fix: fix the #include directive --- mysys/my_rnd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'mysys') diff --git a/mysys/my_rnd.c b/mysys/my_rnd.c index e1aca222954..b7dca0f2afd 100644 --- a/mysys/my_rnd.c +++ b/mysys/my_rnd.c @@ -13,7 +13,7 @@ along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ -#include +#include "mysys_priv.h" #include /* -- cgit v1.2.1 From 313bb4831de789be3118f810fbde6138eb9e0f58 Mon Sep 17 00:00:00 2001 From: unknown Date: Fri, 14 Dec 2007 10:51:07 +0100 Subject: more cmake fixes --- mysys/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'mysys') diff --git a/mysys/CMakeLists.txt b/mysys/CMakeLists.txt index 2ec871535da..3ba9bef4613 100755 --- a/mysys/CMakeLists.txt +++ b/mysys/CMakeLists.txt @@ -27,7 +27,7 @@ INCLUDE_DIRECTORIES(${CMAKE_SOURCE_DIR}/zlib ${CMAKE_SOURCE_DIR}/include ${CMAKE SET(MYSYS_SOURCES array.c charset-def.c charset.c checksum.c default.c default_modify.c errors.c hash.c list.c md5.c mf_brkhant.c mf_cache.c mf_dirname.c mf_fn_ext.c - mf_format.c mf_getdate.c mf_iocache.c mf_iocache2.c mf_keycache.c + mf_format.c mf_getdate.c mf_iocache.c mf_iocache2.c mf_keycache.c my_safehash.c mf_keycaches.c mf_loadpath.c mf_pack.c mf_path.c mf_qsort.c mf_qsort2.c mf_radix.c mf_same.c mf_sort.c mf_soundex.c mf_strip.c mf_tempdir.c mf_tempfile.c mf_unixpath.c mf_wcomp.c mf_wfile.c mulalloc.c my_access.c -- cgit v1.2.1 From c4c63a1425705f8aec9a4cb2ce9aa352defad712 Mon Sep 17 00:00:00 2001 From: unknown Date: Sun, 16 Dec 2007 12:31:29 +0100 Subject: my_uuid_init() was forgotten mysys/my_uuid.c: de-corelate two randominit's sql/mysqld.cc: my_uuid_init() was forgotten here --- mysys/my_uuid.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'mysys') diff --git a/mysys/my_uuid.c b/mysys/my_uuid.c index 3c3cd8836fc..79d89920085 100644 --- a/mysys/my_uuid.c +++ b/mysys/my_uuid.c @@ -105,13 +105,12 @@ void my_uuid_init(ulong seed1, ulong seed2) randominit() here. */ /* purecov: begin inspected */ - my_rnd_init(&uuid_rand, (ulong) (seed2+ now/2), (seed1 + now)+random()); + my_rnd_init(&uuid_rand, (ulong) (seed2+ now/2), now+random()); for (i=0; i < sizeof(mac); i++) mac[i]= (uchar)(my_rnd(&uuid_rand)*255); - /* purecov: end */ + /* purecov: end */ } - my_rnd_init(&uuid_rand, (ulong) (seed1 + now), - (ulong) (now/2+ seed2 + getpid())); + my_rnd_init(&uuid_rand, (ulong) (seed1 + now), (ulong) (now/2+ getpid())); set_clock_seq(); pthread_mutex_init(&LOCK_uuid_generator, MY_MUTEX_INIT_FAST); } -- cgit v1.2.1 From be71f3ccb61d3c4f1ebc096ba4e5e1563f578681 Mon Sep 17 00:00:00 2001 From: unknown Date: Sun, 16 Dec 2007 20:37:22 +0200 Subject: Fixed after-merge problems. --- mysys/my_getopt.c | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) (limited to 'mysys') diff --git a/mysys/my_getopt.c b/mysys/my_getopt.c index d7de0d12e08..796062f75b0 100644 --- a/mysys/my_getopt.c +++ b/mysys/my_getopt.c @@ -30,7 +30,6 @@ my_error_reporter my_getopt_error_reporter= &default_reporter; static int findopt(char *, uint, const struct my_option **, char **); my_bool getopt_compare_strings(const char *, const char *, uint); static longlong getopt_ll(char *arg, const struct my_option *optp, int *err); -static longlong getopt_ll_limit_value(longlong, const struct my_option *); static ulonglong getopt_ull(char *, const struct my_option *, int *); static double getopt_double(char *arg, const struct my_option *optp, int *err); static void init_variables(const struct my_option *options, @@ -789,8 +788,8 @@ static longlong getopt_ll(char *arg, const struct my_option *optp, int *err) Returns "fixed" value. */ -static longlong getopt_ll_limit_value(longlong num, const struct my_option *optp, - bool *fix) +longlong getopt_ll_limit_value(longlong num, const struct my_option *optp, + bool *fix) { longlong old= num; bool adjusted= FALSE; @@ -859,7 +858,7 @@ static ulonglong getopt_ull(char *arg, const struct my_option *optp, int *err) ulonglong getopt_ull_limit_value(ulonglong num, const struct my_option *optp, - bool *fix); + bool *fix) { bool adjusted= FALSE; ulonglong old= num; @@ -965,25 +964,27 @@ static void init_one_value(const struct my_option *optp, uchar* *variable, *((my_bool*) variable)= (my_bool) value; break; case GET_INT: - *((int*) variable)= (int) getopt_ll_limit_value(value, optp); + *((int*) variable)= (int) getopt_ll_limit_value(value, optp, NULL); break; case GET_UINT: - *((uint*) variable)= (uint) getopt_ull_limit_value(value, optp); + *((uint*) variable)= (uint) getopt_ull_limit_value(value, optp, NULL); break; case GET_ENUM: *((uint*) variable)= (uint) value; break; case GET_LONG: - *((long*) variable)= (long) getopt_ll_limit_value(value, optp); + *((long*) variable)= (long) getopt_ll_limit_value(value, optp, NULL); break; case GET_ULONG: - *((ulong*) variable)= (ulong) getopt_ull_limit_value(value, optp); + *((ulong*) variable)= (ulong) getopt_ull_limit_value(value, optp, NULL); break; case GET_LL: - *((longlong*) variable)= (longlong) getopt_ll_limit_value(value, optp); + *((longlong*) variable)= (longlong) getopt_ll_limit_value(value, optp, + NULL); break; case GET_ULL: - *((ulonglong*) variable)= (ulonglong) getopt_ull_limit_value(value, optp); + *((ulonglong*) variable)= (ulonglong) getopt_ull_limit_value(value, optp, + NULL); break; case GET_SET: *((ulonglong*) variable)= (ulonglong) value; -- cgit v1.2.1 From cc589bef15c16b48cd5715645abb545df284fb5a Mon Sep 17 00:00:00 2001 From: unknown Date: Mon, 17 Dec 2007 01:17:37 +0200 Subject: Fixed bug in undo_key_delete; Caused crashed key files in recovery Maria is now used for internal temporary tables in MySQL Better usage of VARCHAR and long strings in temporary tables Use packed fields if BLOCK_RECORD is used null_bytes are not anymore stored in a separate field New interface to remember and restore scan position Fixed bugs in unique handling Don't sync Maria temporary tables Lock control file while it's used to stop several processes from using it Changed value of MA_DONT_OVERWRITE_FILE as it collided with MY_SYNC_DIR Split MY_DONT_WAIT into MY_NO_WAIT and MY_SHORT_WAIT (for my_lock()) Added MY_FORCE_LOCK include/my_sys.h: Changed value of MA_DONT_OVERWRITE_FILE as it collided with MY_SYNC_DIR Split MY_DONT_WAIT into MY_NO_WAIT and MY_SHORT_WAIT (for my_lock()) Added MY_FORCE_LOCK include/myisam.h: Make MyISAM columndef compile time compatible with Maria mysql-test/lib/mtr_process.pl: Removed confusing warning (It's common that there is a lot of other files than pid files) mysql-test/mysql-test-run.pl: Added --sync-frm to speed up tests mysql-test/r/maria-recovery.result: Updated results from wrong push mysql-test/suite/rpl/t/rpl_innodb_bug28430.test: Marked test as --big mysys/my_lock.c: If MY_FORCE_LOCK is given, use locking even if my_disable_locking is given If MY_NO_WAIT is given, return at once if lock is occupied If MY_SHORT_WAIT is given, wait some time for lock before returning (This was called MY_DONT_WAIT before) mysys/my_thr_init.c: Fix that we don't give name to thread before it's properly initied sql/handler.cc: Added myisam.h sql/handler.h: Changes to use Maria for internal temporary tables Removed not needed argument to restart_rnd_next() Added function remember_rnd_pos() sql/my_lock.c: If MY_FORCE_LOCK is given, use locking even if my_disable_locking is given If MY_NO_WAIT is given, return at once if lock is occupied If MY_SHORT_WAIT is given, wait some time for lock before returning (This was called MY_DONT_WAIT before) sql/mysql_priv.h: Added maria_hton sql/sql_class.h: Changes to use Maria for internal temporary tables sql/sql_select.cc: Changes to use Maria for internal temporary tables Temporary tables didn't properly switch to dynamic row format if long strings was used Better usage of VARCHAR in temporary tables Use new interface to restart scan in duplicate removal sql/sql_select.h: Changes to use Maria for internal temporary tables sql/sql_show.cc: Changes to use Maria for internal temporary tables Removed all end space sql/sql_table.cc: Set HA_OPTION_PACK_RECORD if we are not using default or static record sql/sql_union.cc: If MY_FORCE_LOCK is given, use locking even if my_disable_locking is given If MY_NO_WAIT is given, return at once if lock is occupied If MY_SHORT_WAIT is given, wait some time for lock before returning (This was called MY_DONT_WAIT before) sql/sql_update.cc: If MY_FORCE_LOCK is given, use locking even if my_disable_locking is given If MY_NO_WAIT is given, return at once if lock is occupied If MY_SHORT_WAIT is given, wait some time for lock before returning (This was called MY_DONT_WAIT before) storage/maria/ha_maria.cc: Use packed fields null_bytes are not anymore stored in a separate field Changes to use Maria for internal temporary tables Give warning if we try to do an ALTER TABLE to a unusable row format storage/maria/ha_maria.h: Allow Maria with block format to restart scanning at given position storage/maria/ma_blockrec.c: Added functions to remember and restore scan position Allocate cur_row.extents so that we don't have to do a malloc on first read Fixed bug when using packed row without packed strings Removed unneeded calls to free_full_pages() Fixed unlikely bug when using old bitmap to read head page and head page had gone away Remember row position when doing undo of delete and update row (needed for undo of key delete) storage/maria/ma_blockrec.h: Added functions to remember and restore scan position storage/maria/ma_close.c: Don't sync temporary tables storage/maria/ma_control_file.c: Lock control file while it's used to stop several processes from using it storage/maria/ma_create.c: Fixed bug when using FIELD_NORMAL that was longer than FULL_PAGE_SIZE Fixed bug that casued fields to not be ordered according to offset Fixed bug in unique creation storage/maria/ma_delete.c: Don't write record reference when deleting key. (Rowid is likely to be different when we undo this) storage/maria/ma_dynrec.c: Fixed core dump when comparing records (happended in unique handling) storage/maria/ma_extra.c: MY_DONT_WAIT -> MY_SHORT_WAIT Removed TODO comment. (Was not relevant as all other instances are guranteed to be closed when we the code is excecuted) Added DBUG_ASSERT() to prove above. storage/maria/ma_key_recover.c: CLR's for UNDO_ROW_DELETE and UNDO_ROW_UPDATE now include rowid for the row. This was needed for undo_key_delete to work, as undo of delete row is likely to put row in a new position. undo_delete_key now doesn't include row position storage/maria/ma_open.c: Added virtual functions for remembering and restoring scan position Fixed wrong key search method when using multi-byte character sets (Bug#32705) Store original column number in index file NOTE: Index files are now incompatible with previous versions! (Ok as we haven't yet made a public Maria release) storage/maria/ma_recovery.c: Set info->cur_row.lastpos when reading CLR's for UNDO_ROW_DELETE or UNDO_ROW_UPDATE storage/maria/ma_scan.c: Added default function to remember and restore scan position storage/maria/maria_def.h: Added virtual functions & variables to remember and restore scan position Added MARIA_MAX_CONTROL_FILE_LOCK_RETRY storage/myisam/ha_myisam.cc: Fixed compiler errors as columdef->type is now an enum, not an integer Added functions to remember and restore scan position storage/myisam/ha_myisam.h: Added functions to remember and restore scan position storage/myisam/mi_check.c: MY_DONT_WAIT -> MY_SHORT_WAIT storage/myisam/mi_extra.c: MY_DONT_WAIT -> MY_SHORT_WAIT storage/myisam/mi_open.c: MY_DONT_WAIT -> MY_SHORT_WAIT storage/myisam/myisamdef.h: MY_DONT_WAIT -> MY_SHORT_WAIT --- mysys/my_lock.c | 12 +++++++++--- mysys/my_thr_init.c | 4 ++-- 2 files changed, 11 insertions(+), 5 deletions(-) (limited to 'mysys') diff --git a/mysys/my_lock.c b/mysys/my_lock.c index c0522ee849d..4b8c067ba0d 100644 --- a/mysys/my_lock.c +++ b/mysys/my_lock.c @@ -54,7 +54,7 @@ int my_lock(File fd, int locktype, my_off_t start, my_off_t length, #ifdef VMS DBUG_RETURN(0); #else - if (my_disable_locking) + if (my_disable_locking && ! (MyFlags & MY_FORCE_LOCK)) DBUG_RETURN(0); #if defined(__NETWARE__) @@ -131,10 +131,16 @@ int my_lock(File fd, int locktype, my_off_t start, my_off_t length, lock.l_start= (off_t) start; lock.l_len= (off_t) length; - if (MyFlags & MY_DONT_WAIT) + if (MyFlags & (MY_NO_WAIT | MY_SHORT_WAIT)) { if (fcntl(fd,F_SETLK,&lock) != -1) /* Check if we can lock */ - DBUG_RETURN(0); /* Ok, file locked */ + DBUG_RETURN(0); /* Ok, file locked */ + if (MyFlags & MY_NO_WAIT) + { + my_errno= (errno == EACCES) ? EAGAIN : errno ? errno : -1; + DBUG_RETURN(-1); + } + DBUG_PRINT("info",("Was locked, trying with alarm")); ALARM_INIT; while ((value=fcntl(fd,F_SETLKW,&lock)) && ! ALARM_TEST && diff --git a/mysys/my_thr_init.c b/mysys/my_thr_init.c index c9ce6ab169f..aadb86d39ed 100644 --- a/mysys/my_thr_init.c +++ b/mysys/my_thr_init.c @@ -289,12 +289,12 @@ my_bool my_thread_init(void) #endif pthread_mutex_init(&tmp->mutex,MY_MUTEX_INIT_FAST); pthread_cond_init(&tmp->suspend, NULL); - tmp->init= 1; pthread_mutex_lock(&THR_LOCK_threads); tmp->id= ++thread_id; ++THR_thread_count; pthread_mutex_unlock(&THR_LOCK_threads); + tmp->init= 1; #ifndef DBUG_OFF /* Generate unique name for thread */ (void) my_thread_name(); @@ -392,7 +392,7 @@ extern void **my_thread_var_dbug() { struct st_my_thread_var *tmp= my_pthread_getspecific(struct st_my_thread_var*,THR_KEY_mysys); - return tmp ? &tmp->dbug : 0; + return tmp && tmp->init ? &tmp->dbug : 0; } #endif -- cgit v1.2.1 From 30d3d8d3fc41d04f2f90389a42f27520ed63b41b Mon Sep 17 00:00:00 2001 From: unknown Date: Tue, 18 Dec 2007 03:21:32 +0200 Subject: Fixed several bugs in page CRC handling - Ignore CRC errors in REDO for potential new pages - Ignore CRC errors when repairing tables - Don't do readcheck callback on read error - Set my_errno to HA_ERR_WRONG_CRC if we find page with wrong CRC - Check index page for length before calculating CRC to catch bad pages Fixed bugs where we used wrong file descriptor to read/write bitmaps Fixed wrong hash key in 'files_in_flush' Fixed wrong lock method when writing bitmap Fixed some wrong printf statements in check/repair that caused core dumps Fixed argument to translog_page_validator that cause reading of log files to fail Store number of bytes used for delete-linked key pages to be able to use standard index CRC for deleted key pages. Use fast 'dummy' pagecheck callbacks for temporary tables Don't die silently if flush finds pinned pages Give error (for now) if one tries to create a transactional table with fulltext or spatial keys Removed some not needed calls to pagecache_file_init() Added checking of pagecache checksums to ma_test1 and ma_test2 More DBUG Fixed some DBUG_PRINT to be in line with rest of the code include/my_base.h: Added HA_ERR_INTERNAL_ERROR (used for flush with pinned pages) and HA_ERR_WRONG_CRC mysql-test/r/binlog_unsafe.result: Added missing DROP VIEW statement mysql-test/r/maria.result: Added TRANSACTIONAL=0 when testing with fulltext keys Added test that verifies we can't yet create transactional test with fulltext or spatial keys mysql-test/r/ps_maria.result: Added TRANSACTIONAL=0 when testing with fulltext keys mysql-test/t/binlog_unsafe.test: Added missing DROP VIEW statement mysql-test/t/maria.test: Added TRANSACTIONAL=0 when testing with fulltext keys Added test that verifies we can't yet create transactional test with fulltext or spatial keys mysql-test/t/ps_maria.test: Added TRANSACTIONAL=0 when testing with fulltext keys mysys/my_fopen.c: Fd: -> fd: mysys/my_handler.c: Added new error messages mysys/my_lock.c: Fd: -> fd: mysys/my_pread.c: Fd: -> fd: mysys/my_read.c: Fd: -> fd: mysys/my_seek.c: Fd: -> fd: mysys/my_sync.c: Fd: -> fd: mysys/my_write.c: Fd: -> fd: sql/mysqld.cc: Fixed wrong argument to my_uuid_init() sql/sql_plugin.cc: Unified DBUG_PRINT (for convert-dbug-for-diff) storage/maria/ma_bitmap.c: Fixed wrong lock method when writing bitmap Fixed valgrind error Use fast 'dummy' pagecheck callbacks for temporary tables Faster bitmap handling for non transational tables storage/maria/ma_blockrec.c: Fixed that bitmap reading is done with the correct filehandle Handle reading of pages with wrong CRC when page contect doesn't matter Use the page buffer also when we get WRONG CRC or FILE_TOO_SHORT. (Faster and fixed a couple of bugs) storage/maria/ma_check.c: Split long strings for readablity Fixed some wrong printf statements that caused core dumps Use bitmap.file for bitmaps Ignore pages with wrong CRC storage/maria/ma_close.c: More DBUG_PRINT storage/maria/ma_create.c: Give error (for now) if one tries to create a crash safe table with fulltext or spatial keys storage/maria/ma_key_recover.c: Ignore HA_ERR_WRONG_CRC for new pages info->s -> share Store number of bytes used for delete-linked key pages to be able to use standard index CRC for deleted key pages. storage/maria/ma_loghandler.c: Fixed argument to translog_page_validator() storage/maria/ma_open.c: Removed old VMS specific code Added function to setup pagecache callbacks Moved code around to set 'share->temporary' early Removed some not needed calls to pagecache_file_init() storage/maria/ma_page.c: Store number of bytes used for delete-linked key pages to be able to use standard index CRC for deleted key pages. storage/maria/ma_pagecache.c: Don't do readcheck callback on read error Reset PCBLOCK_ERROR in pagecache_unlock_by_link() if we write page Set my_errno to HA_ER_INTERNAL_ERROR if flush() finds pinned pages Don't die silently if flush finds pinned pages. Use correct file descriptor when flushing pages Fixed wrong hash key in 'files_in_flush'; This must be the file descriptor, not the PAGECACHE_FILE as there may be several PAGECACHE_FILE for same file descriptor More DBUG_PRINT storage/maria/ma_pagecrc.c: Removed inline from not tiny static function Set my_errno to HA_ERR_WRONG_CRC if we find page with wrong CRC (Otherwise my_errno may be 0, and a lot of other code will be confused) CRCerror -> error (to keep code uniform) Print crc with %lu, as in my_checksum() uchar* -> uchar * Check index page for length before calculating CRC to catch bad pages Added 'dummy' crc_check and filler functions that are used for temporary tables storage/maria/ma_recovery.c: More DBUG More message to users to give information what phase failed Better error message if recovery failed storage/maria/ma_test1.c: Added checking of page checksums (combined with 'c' to not have to add more test runs) storage/maria/ma_test2.c: Added checking of page checksums (combined with 'c' to not have to add more test runs) storage/maria/maria_chk.c: Fixed wrong argument to _ma_check_print_error() storage/maria/maria_def.h: Added format information to _ma_check_print_xxxx functions uchar* -> uchar * --- mysys/my_fopen.c | 2 +- mysys/my_handler.c | 5 +++-- mysys/my_lock.c | 2 +- mysys/my_pread.c | 4 ++-- mysys/my_read.c | 2 +- mysys/my_seek.c | 4 ++-- mysys/my_sync.c | 2 +- mysys/my_write.c | 2 +- 8 files changed, 12 insertions(+), 11 deletions(-) (limited to 'mysys') diff --git a/mysys/my_fopen.c b/mysys/my_fopen.c index 44156da6ae3..351851cca76 100644 --- a/mysys/my_fopen.c +++ b/mysys/my_fopen.c @@ -134,7 +134,7 @@ FILE *my_fdopen(File Filedes, const char *name, int Flags, myf MyFlags) FILE *fd; char type[5]; DBUG_ENTER("my_fdopen"); - DBUG_PRINT("my",("Fd: %d Flags: %d MyFlags: %d", + DBUG_PRINT("my",("fd: %d Flags: %d MyFlags: %d", Filedes, Flags, MyFlags)); make_ftype(type,Flags); diff --git a/mysys/my_handler.c b/mysys/my_handler.c index 2b1c91a43e2..312227891c5 100644 --- a/mysys/my_handler.c +++ b/mysys/my_handler.c @@ -575,7 +575,7 @@ static const char *handler_error_messages[]= { "Didn't find key on read or update", "Duplicate key on write or update", - "Undefined handler error 122", + "Internal (unspecified) error in handler", "Someone has changed the row since it was read (while the table was locked to prevent it)", "Wrong index given to function", "Undefined handler error 125", @@ -627,7 +627,8 @@ static const char *handler_error_messages[]= "It is not possible to log this statement", "The table is of a new format not supported by this version", "Got a fatal error during initialzaction of handler", - "File to short; Expected more data in file" + "File to short; Expected more data in file", + "Read page with wrong checksum" }; diff --git a/mysys/my_lock.c b/mysys/my_lock.c index 4b8c067ba0d..200ee7188c9 100644 --- a/mysys/my_lock.c +++ b/mysys/my_lock.c @@ -49,7 +49,7 @@ int my_lock(File fd, int locktype, my_off_t start, my_off_t length, int nxErrno; #endif DBUG_ENTER("my_lock"); - DBUG_PRINT("my",("Fd: %d Op: %d start: %ld Length: %ld MyFlags: %d", + DBUG_PRINT("my",("fd: %d Op: %d start: %ld Length: %ld MyFlags: %d", fd,locktype,(long) start,(long) length,MyFlags)); #ifdef VMS DBUG_RETURN(0); diff --git a/mysys/my_pread.c b/mysys/my_pread.c index 821d8636d8e..e0218cd1f1f 100644 --- a/mysys/my_pread.c +++ b/mysys/my_pread.c @@ -48,7 +48,7 @@ size_t my_pread(File Filedes, uchar *Buffer, size_t Count, my_off_t offset, size_t readbytes; int error= 0; DBUG_ENTER("my_pread"); - DBUG_PRINT("my",("Fd: %d Seek: %lu Buffer: 0x%lx Count: %u MyFlags: %d", + DBUG_PRINT("my",("fd: %d Seek: %lu Buffer: 0x%lx Count: %u MyFlags: %d", Filedes, (ulong) offset, (long) Buffer, (uint) Count, MyFlags)); for (;;) @@ -128,7 +128,7 @@ size_t my_pwrite(int Filedes, const uchar *Buffer, size_t Count, size_t writenbytes, written; uint errors; DBUG_ENTER("my_pwrite"); - DBUG_PRINT("my",("Fd: %d Seek: %lu Buffer: 0x%lx Count: %u MyFlags: %d", + DBUG_PRINT("my",("fd: %d Seek: %lu Buffer: 0x%lx Count: %u MyFlags: %d", Filedes, (ulong) offset, (long) Buffer, (uint) Count, MyFlags)); errors= 0; diff --git a/mysys/my_read.c b/mysys/my_read.c index ee91620e163..63f1d4fdebd 100644 --- a/mysys/my_read.c +++ b/mysys/my_read.c @@ -37,7 +37,7 @@ size_t my_read(File Filedes, uchar *Buffer, size_t Count, myf MyFlags) { size_t readbytes, save_count; DBUG_ENTER("my_read"); - DBUG_PRINT("my",("Fd: %d Buffer: 0x%lx Count: %lu MyFlags: %d", + DBUG_PRINT("my",("fd: %d Buffer: 0x%lx Count: %lu MyFlags: %d", Filedes, (long) Buffer, (ulong) Count, MyFlags)); save_count= Count; diff --git a/mysys/my_seek.c b/mysys/my_seek.c index 2c661baeff7..4e18b510a1e 100644 --- a/mysys/my_seek.c +++ b/mysys/my_seek.c @@ -47,7 +47,7 @@ my_off_t my_seek(File fd, my_off_t pos, int whence, { reg1 os_off_t newpos= -1; DBUG_ENTER("my_seek"); - DBUG_PRINT("my",("Fd: %d Hpos: %lu Pos: %lu Whence: %d MyFlags: %d", + DBUG_PRINT("my",("fd: %d Hpos: %lu Pos: %lu Whence: %d MyFlags: %d", fd, (ulong) (((ulonglong) pos) >> 32), (ulong) pos, whence, MyFlags)); DBUG_ASSERT(pos != MY_FILEPOS_ERROR); /* safety check */ @@ -87,7 +87,7 @@ my_off_t my_tell(File fd, myf MyFlags __attribute__((unused))) { os_off_t pos; DBUG_ENTER("my_tell"); - DBUG_PRINT("my",("Fd: %d MyFlags: %d",fd, MyFlags)); + DBUG_PRINT("my",("fd: %d MyFlags: %d",fd, MyFlags)); DBUG_ASSERT(fd >= 0); #ifdef HAVE_TELL pos=tell(fd); diff --git a/mysys/my_sync.c b/mysys/my_sync.c index ba6964b00d6..1b8420c034e 100644 --- a/mysys/my_sync.c +++ b/mysys/my_sync.c @@ -44,7 +44,7 @@ int my_sync(File fd, myf my_flags) { int res; DBUG_ENTER("my_sync"); - DBUG_PRINT("my",("Fd: %d my_flags: %d", fd, my_flags)); + DBUG_PRINT("my",("fd: %d my_flags: %d", fd, my_flags)); do { diff --git a/mysys/my_write.c b/mysys/my_write.c index 056a84f1794..6586c9598f6 100644 --- a/mysys/my_write.c +++ b/mysys/my_write.c @@ -25,7 +25,7 @@ size_t my_write(int Filedes, const uchar *Buffer, size_t Count, myf MyFlags) size_t writenbytes, written; uint errors; DBUG_ENTER("my_write"); - DBUG_PRINT("my",("Fd: %d Buffer: 0x%lx Count: %lu MyFlags: %d", + DBUG_PRINT("my",("fd: %d Buffer: 0x%lx Count: %lu MyFlags: %d", Filedes, (long) Buffer, (ulong) Count, MyFlags)); errors=0; written=0; -- cgit v1.2.1 From 389dcccbed58122bb41dd0ad29aab69178592266 Mon Sep 17 00:00:00 2001 From: unknown Date: Tue, 18 Dec 2007 23:55:49 +0200 Subject: - Clean up function for my_uuid() - Merge fixes - "make test" for maria - Replaced ma_test_all with a perl version, which now can be run in unittest mode. include/my_sys.h: Added clean up function for my_uuid() mysql-test/r/create.result: Fixed result file. mysql-test/r/maria-big.result: Changed not to get a warning. mysql-test/r/maria-connect.result: Not to get a warning. mysql-test/r/maria-recovery.result: Not to get a warning. mysql-test/r/maria.result: Fixed result file. Added test for warning on log file. mysql-test/r/ps_maria.result: Fixed result file. New bit was added to source. mysql-test/t/maria-big.test: To avoid a warning. mysql-test/t/maria-connect.test: To avoid a warning. mysql-test/t/maria-recovery.test: To avoid a warning. mysql-test/t/maria.test: To avoid a warning. mysql-test/t/ps_maria.test: To avoid a warning. mysys/my_uuid.c: Added clean up function for my_uuid() sql/mysqld.cc: Manual merge. storage/maria/Makefile.am: Added "make test" for maria. storage/maria/ma_key_recover.c: Fix for maria_recovery storage/maria/ma_test_all.sh: Deprecated file. Functionality moved to unittest/ma_test_all-t BitKeeper/deleted/.del-unit.pl: BitKeeper file /home/my/mysql-maria/storage/maria/unit.pl --- mysys/my_uuid.c | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'mysys') diff --git a/mysys/my_uuid.c b/mysys/my_uuid.c index 79d89920085..d97aaf604fa 100644 --- a/mysys/my_uuid.c +++ b/mysys/my_uuid.c @@ -166,3 +166,13 @@ void my_uuid(uchar *to) int2store(to+6, time_hi_and_version); bmove(to+8, uuid_suffix, sizeof(uuid_suffix)); } + + +void my_uuid_end() +{ + if (my_uuid_inited) + { + my_uuid_inited= 0; + pthread_mutex_destroy(&LOCK_uuid_generator); + } +} -- cgit v1.2.1 From efd91dff2cb010e792bea1e812d95513be24504a Mon Sep 17 00:00:00 2001 From: unknown Date: Thu, 20 Dec 2007 14:10:07 +0200 Subject: Fixes to merge. mysql-test/r/maria.result: Fixed result file. The results will be fixed by Sergei's patch. mysql-test/t/variables.test: Fixed result file. The results will be fixed by Sergei's patch. mysys/my_getopt.c: Fixed a problem with manual merge. sql/set_var.cc: Fixed a problem with manual merge. sql/set_var.h: Fixed a problem with manual merge. sql/sql_plugin.cc: Removed unneccessary function call. This was forgotten from a previous patch. --- mysys/my_getopt.c | 42 ++++++++++++++++++++++-------------------- 1 file changed, 22 insertions(+), 20 deletions(-) (limited to 'mysys') diff --git a/mysys/my_getopt.c b/mysys/my_getopt.c index 796062f75b0..63ef57300fa 100644 --- a/mysys/my_getopt.c +++ b/mysys/my_getopt.c @@ -27,10 +27,15 @@ typedef void (*init_func_p)(const struct my_option *option, uchar* *variable, static void default_reporter(enum loglevel level, const char *format, ...); my_error_reporter my_getopt_error_reporter= &default_reporter; -static int findopt(char *, uint, const struct my_option **, char **); -my_bool getopt_compare_strings(const char *, const char *, uint); +static int findopt(char *optpat, uint length, + const struct my_option **opt_res, + char **ffname); +my_bool getopt_compare_strings(const char *s, + const char *t, + uint length); static longlong getopt_ll(char *arg, const struct my_option *optp, int *err); -static ulonglong getopt_ull(char *, const struct my_option *, int *); +static ulonglong getopt_ull(char *arg, const struct my_option *optp, + int *err); static double getopt_double(char *arg, const struct my_option *optp, int *err); static void init_variables(const struct my_option *options, init_func_p init_one_value); @@ -38,7 +43,8 @@ static void init_one_value(const struct my_option *option, uchar* *variable, longlong value); static void fini_one_value(const struct my_option *option, uchar* *variable, longlong value); -static int setval(const struct my_option *, uchar **, char *, my_bool); +static int setval(const struct my_option *opts, uchar **value, char *argument, + my_bool set_maximum_value); static char *check_struct_option(char *cur_arg, char *key_name); /* @@ -861,7 +867,7 @@ ulonglong getopt_ull_limit_value(ulonglong num, const struct my_option *optp, bool *fix) { bool adjusted= FALSE; - ulonglong old= num; + ulonglong old= num, mod; char buf1[255], buf2[255]; if ((ulonglong) num > (ulonglong) optp->max_value && @@ -886,6 +892,8 @@ ulonglong getopt_ull_limit_value(ulonglong num, const struct my_option *optp, num= ((ulonglong) ULONG_MAX); adjusted= TRUE; } +#else + num= min(num, LONG_MAX); #endif break; default: @@ -951,41 +959,35 @@ static double getopt_double(char *arg, const struct my_option *optp, int *err) SYNOPSIS init_one_value() - optp Option to initialize + option Option to initialize value Pointer to variable */ -static void init_one_value(const struct my_option *optp, uchar* *variable, +static void init_one_value(const struct my_option *option, uchar* *variable, longlong value) { DBUG_ENTER("init_one_value"); - switch ((optp->var_type & GET_TYPE_MASK)) { + switch ((option->var_type & GET_TYPE_MASK)) { case GET_BOOL: *((my_bool*) variable)= (my_bool) value; break; case GET_INT: - *((int*) variable)= (int) getopt_ll_limit_value(value, optp, NULL); - break; - case GET_UINT: - *((uint*) variable)= (uint) getopt_ull_limit_value(value, optp, NULL); + *((int*) variable)= (int) value; break; + case GET_UINT: /* Fall through */ case GET_ENUM: *((uint*) variable)= (uint) value; break; case GET_LONG: - *((long*) variable)= (long) getopt_ll_limit_value(value, optp, NULL); + *((long*) variable)= (long) value; break; case GET_ULONG: - *((ulong*) variable)= (ulong) getopt_ull_limit_value(value, optp, NULL); + *((ulong*) variable)= (ulong) value; break; case GET_LL: - *((longlong*) variable)= (longlong) getopt_ll_limit_value(value, optp, - NULL); - break; - case GET_ULL: - *((ulonglong*) variable)= (ulonglong) getopt_ull_limit_value(value, optp, - NULL); + *((longlong*) variable)= (longlong) value; break; + case GET_ULL: /* Fall through */ case GET_SET: *((ulonglong*) variable)= (ulonglong) value; break; -- cgit v1.2.1 From 4140f76f4e50a0497c8d325fef7e255bb5cf4e68 Mon Sep 17 00:00:00 2001 From: unknown Date: Fri, 28 Dec 2007 00:15:29 +0100 Subject: after merge include/mysql/plugin.h: move declarations after merge mysql-test/r/change_user.result: more tests mysql-test/t/change_user.test: more tests mysys/my_getopt.c: remove wrong code BitKeeper/etc/ignore: Added libmysqld/sql_profile.cc to the ignore list --- mysys/my_getopt.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'mysys') diff --git a/mysys/my_getopt.c b/mysys/my_getopt.c index 63ef57300fa..61716eae2c6 100644 --- a/mysys/my_getopt.c +++ b/mysys/my_getopt.c @@ -892,8 +892,6 @@ ulonglong getopt_ull_limit_value(ulonglong num, const struct my_option *optp, num= ((ulonglong) ULONG_MAX); adjusted= TRUE; } -#else - num= min(num, LONG_MAX); #endif break; default: -- cgit v1.2.1 From 6cad02044072403c652e2da9a3cc0dfd9713f1e6 Mon Sep 17 00:00:00 2001 From: unknown Date: Mon, 31 Dec 2007 11:55:46 +0200 Subject: Added maria_zerofill() This is used to bzero all not used parts of the index pages and compact and bzero the not used parts of the data pages of block-record type Added --zerofill (-z) option to maria_chk (Mostly code from Jani) Added now table states ZEROFILLED and MOVEABLE Set state.changed with new states when things changes include/maria.h: Added maria_zerofill include/myisamchk.h: Added option for zerofill Extend testflag to be 64 to allow for more flags mysql-test/r/create.result: Updated results after merge mysql-test/r/maria.result: Updated results after merge mysys/my_getopt.c: Removed not used variable sql/sql_show.cc: Fixed wrong page type storage/maria/ma_blockrec.c: Renamed compact_page() to ma_compact_block_page() and made it global Always zerofill half filled blob pages Set share.state.changed on REDO storage/maria/ma_blockrec.h: Added _ma_compact_block_page() storage/maria/ma_check.c: Added maria_zerofill() This is used to bzero all not used parts of the index pages and compact and bzero the not used parts of the data pages of block-record type This gives the following benefits: - Table is smaller if compressed - All LSN are removed for transactinal tables and this makes them movable between systems Dont set table states of we are using --quick Changed log entry for repair to use 8 bytes for flag storage/maria/ma_delete.c: Simplify code Update state.changed storage/maria/ma_key_recover.c: Update state.changed storage/maria/ma_locking.c: Set uuid for file on first change if it's not set (table was cleared with zerofill) storage/maria/ma_loghandler.c: Updated log entry for REDO_REPAIR_TABLE storage/maria/ma_recovery.c: Updated log entry for REDO_REPAIR_TABLE (flag is now 8 bytes) Set new bits in state.changed storage/maria/ma_test_all.sh: Nicer output storage/maria/ma_test_recovery.expected: Updated results (now states flags are visible) storage/maria/ma_update.c: Update state.changed storage/maria/ma_write.c: Simplify code Set state.changed storage/maria/maria_chk.c: Added option --zerofill Added printing of states for MOVABLE and ZEROFILLED MYD -> MAD MYI -> MAI storage/maria/maria_def.h: Added states STATE_NOT_MOVABLE and STATE_NOT_ZEROFILLED Added prototype for new functions storage/maria/unittest/ma_test_all-t: More tests, including tests for zerofill Removed some not needed 'print' statements storage/maria/unittest/ma_test_loghandler_multithread-t.c: Smaller buffer to not trash devlopment machines totally --- mysys/my_getopt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'mysys') diff --git a/mysys/my_getopt.c b/mysys/my_getopt.c index 61716eae2c6..e0cb771ee01 100644 --- a/mysys/my_getopt.c +++ b/mysys/my_getopt.c @@ -867,7 +867,7 @@ ulonglong getopt_ull_limit_value(ulonglong num, const struct my_option *optp, bool *fix) { bool adjusted= FALSE; - ulonglong old= num, mod; + ulonglong old= num; char buf1[255], buf2[255]; if ((ulonglong) num > (ulonglong) optp->max_value && -- cgit v1.2.1 From c719e1fd335f20bc068e3174cea097101ab4cf2a Mon Sep 17 00:00:00 2001 From: unknown Date: Thu, 3 Jan 2008 09:45:46 +0200 Subject: Added wrapper for chmod(): my_chmod() Automaticly disable ma_test_recovery if not compiled with debugging. This fixes that make test works in Maria. Fixed wrong merge of ma_init.c from 5.1 Portability fixes: - Use my_chmod() instead of my_chmod() - Use my_access() instead of my_stat() to test if file exists - Don't test result value of pthread_mutex_lock() and pthread_mutex_unlock() as this is not portable - No reason to test if file exists before we delete it include/my_sys.h: Added my_chmod include/mysys_err.h: Added error for my_chmod mysys/Makefile.am: Added my_chmod mysys/errors.c: Added error for my_chmod mysys/my_init.c: Syncronize with 5.1 to fix setting of QueryPerformanceFrequency() storage/maria/ma_test1.c: Changed short option of --skip-delete and --skip-update to be more logical storage/maria/ma_test_recovery.expected: Updated results after adding more tests storage/maria/ma_test_recovery: Abort test nicely if we are runnning without debugging Added more tests Changed temporary file names so that one can run maria_chk on them Removed some old comments storage/maria/maria_read_log.c: Added note if maria_read_log will not be able to create byte-to-byte identical tables compared to normal execution storage/maria/unittest/ma_pagecache_consist.c: Removed wrong setting of buff that caused memory overwrite Use my_chmod() instead of chmod() Don't test result value of pthread_mutex_lock() and pthread_mutex_unlock() as this is not portable storage/maria/unittest/ma_pagecache_single.c: Use my_chmod() instead of chmod() Don't test result value of pthread_mutex_lock() and pthread_mutex_unlock() as this is not portable storage/maria/unittest/ma_test_loghandler_first_lsn-t.c: No reason to test if file exists before we delete it storage/maria/unittest/ma_test_loghandler_multithread-t.c: Don't test result value of pthread_mutex_lock() and pthread_mutex_unlock() as this is not portable storage/maria/unittest/ma_test_loghandler_noflush-t.c: No reason to test if file exists before we delete it storage/maria/unittest/ma_test_loghandler_nologs-t.c: Use my_access() instead of my_stat() to test if file exists storage/maria/unittest/ma_test_loghandler_pagecache-t.c: No reason to test if file exists before we delete it chmod -> my_chmod mysys/my_chmod.c: Added wrapper for chmod() --- mysys/Makefile.am | 2 +- mysys/errors.c | 4 +++- mysys/my_chmod.c | 48 ++++++++++++++++++++++++++++++++++++++++++++++++ mysys/my_init.c | 45 ++++++++++++++++++++++++--------------------- 4 files changed, 76 insertions(+), 23 deletions(-) create mode 100644 mysys/my_chmod.c (limited to 'mysys') diff --git a/mysys/Makefile.am b/mysys/Makefile.am index 10200fde8be..27cae5c6363 100644 --- a/mysys/Makefile.am +++ b/mysys/Makefile.am @@ -45,7 +45,7 @@ libmysys_a_SOURCES = my_init.c my_getwd.c mf_getdate.c my_mmap.c \ tree.c trie.c list.c hash.c array.c string.c typelib.c \ my_copy.c my_append.c my_lib.c \ my_delete.c my_rename.c my_redel.c \ - my_chsize.c my_clock.c \ + my_chsize.c my_chmod.c my_clock.c \ my_quick.c my_lockmem.c my_static.c \ my_sync.c my_getopt.c my_mkdir.c \ default_modify.c default.c \ diff --git a/mysys/errors.c b/mysys/errors.c index 889cf6d7fe3..db63667fb77 100644 --- a/mysys/errors.c +++ b/mysys/errors.c @@ -49,7 +49,8 @@ const char * NEAR globerrs[GLOBERRS]= "Can't sync file '%s' to disk (Errcode: %d)", "Collation '%s' is not a compiled collation and is not specified in the '%s' file", "File '%s' not found (Errcode: %d)", - "File '%s' (fileno: %d) was not closed" + "File '%s' (fileno: %d) was not closed", + "Can't change mode for file '%s' to 0x%lx (Error: %d)" }; void init_glob_errs(void) @@ -90,5 +91,6 @@ void init_glob_errs() EE(EE_UNKNOWN_COLLATION)= "Collation '%s' is not a compiled collation and is not specified in the %s file"; EE(EE_FILENOTFOUND) = "File '%s' not found (Errcode: %d)"; EE(EE_FILE_NOT_CLOSED) = "File '%s' (fileno: %d) was not closed"; + EE(EE_CANT_CHMOD) = "Can't change mode for file '%s' to 0x%lx (Error: %d)"; } #endif diff --git a/mysys/my_chmod.c b/mysys/my_chmod.c new file mode 100644 index 00000000000..afdea758833 --- /dev/null +++ b/mysys/my_chmod.c @@ -0,0 +1,48 @@ +/* Copyright (C) 2000 MySQL AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +#include "mysys_priv.h" +#include "mysys_err.h" + +/** + @brief Change mode of file. + + @fn my_chmod() + @param name Filename + @param mode_t Mode + @param my_flags Flags + + @notes + The mode of the file given by path or referenced by fildes is changed + + @retval 0 Ok + @retval # Error +*/ + +int my_chmod(const char *name, mode_t mode, myf my_flags) +{ + DBUG_ENTER("my_chmod"); + DBUG_PRINT("my",("name: %s mode: %lu flags: %d", name, (ulong) mode, + my_flags)); + + if (chmod(name, mode)) + { + my_errno= errno; + if (my_flags & MY_WME) + my_error(EE_CANT_CHMOD, MYF(0), name, (ulong) mode, my_errno); + DBUG_RETURN(1); + } + DBUG_RETURN(0); +} diff --git a/mysys/my_init.c b/mysys/my_init.c index 145a435b4b6..a153275f87e 100644 --- a/mysys/my_init.c +++ b/mysys/my_init.c @@ -356,6 +356,30 @@ static void my_win_init(void) _tzset(); + /* The following is used by time functions */ +#define OFFSET_TO_EPOC ((__int64) 134774 * 24 * 60 * 60 * 1000 * 1000 * 10) +#define MS 10000000 + { + FILETIME ft; + LARGE_INTEGER li, t_cnt; + DBUG_ASSERT(sizeof(LARGE_INTEGER) == sizeof(query_performance_frequency)); + if (QueryPerformanceFrequency((LARGE_INTEGER *)&query_performance_frequency) == 0) + query_performance_frequency= 0; + else + { + GetSystemTimeAsFileTime(&ft); + li.LowPart= ft.dwLowDateTime; + li.HighPart= ft.dwHighDateTime; + query_performance_offset= li.QuadPart-OFFSET_TO_EPOC; + QueryPerformanceCounter(&t_cnt); + query_performance_offset-= (t_cnt.QuadPart / + query_performance_frequency * MS + + t_cnt.QuadPart % + query_performance_frequency * MS / + query_performance_frequency); + } + } + /* apre la chiave HKEY_LOCAL_MACHINES\software\MySQL */ if (RegOpenKeyEx(HKEY_LOCAL_MACHINE,(LPCTSTR)targetKey,0, KEY_READ,&hSoftMysql) != ERROR_SUCCESS) @@ -393,27 +417,6 @@ static void my_win_init(void) /* chiude la chiave */ RegCloseKey(hSoftMysql) ; - /* The following is used by time functions */ -#define OFFSET_TO_EPOC ((__int64) 134774 * 24 * 60 * 60 * 1000 * 1000 * 10) -#define MS 10000000 - { - FILETIME ft; - LARGE_INTEGER li, t_cnt; - DBUG_ASSERT(sizeof(LARGE_INTEGER) == sizeof(query_performance_frequency)); - if (QueryPerformanceFrequency((LARGE_INTEGER *)&query_performance_frequency)) - query_performance_frequency= 0; - else - { - GetSystemTimeAsFileTime(&ft); - li.LowPart= ft.dwLowDateTime; - li.HighPart= ft.dwHighDateTime; - query_performance_offset= li.QuadPart-OFFSET_TO_EPOC; - QueryPerformanceCounter(&t_cnt); - query_performance_offset-= (t_cnt.QuadPart / query_performance_frequency * MS + - t_cnt.QuadPart % query_performance_frequency * MS / - query_performance_frequency); - } - } DBUG_VOID_RETURN ; } -- cgit v1.2.1 From b5df1d344641716d5dee146a164e11f388d77cb6 Mon Sep 17 00:00:00 2001 From: unknown Date: Mon, 7 Jan 2008 18:54:41 +0200 Subject: Bugs fixed: - If not in autocommit mode, delete rows one by one so that we can roll back if necessary - bitmap->used_size was not correctly set, which caused bitmap pages to be overwritten - Fixed bug in bitmap handling when allocation tail pages - Ensure we reserve place for directory entry when calculation place for head and tail pages - Fixed wrong value in bitmap->size[0] - Fixed wrong assert in flush_log_for_bitmap - Fixed bug in _ma_bitmap_release_unused() where tail blocks could be wrongly reset - Mark new pages as changed (Required to get repair() to work) - Fixed problem with advancing log horizon pointer within one page bounds - Fixed DBUG_ASSERT() when enable_indexes failes for end_bulk_insert() - Fixed bug in logging of rows with more than one big blob - Fixed DBUG_ASSERTS() in pagecache to allow change of WRITE_LOCK to READ_LOCK in unlock() calls - Flush pagecache when we change from logging to not logging (if not, pagecache code breaks) - Ensure my_errno is set on return from write/delete/update - Fixed bug when using FIELD_SKIP_PRESPACE New features: - mysql_fix_privilege_tables now first uses binaries and scripts from source distribution, then in installed distribution - Fix that optimize works for Maria tables - maria_check --zerofill now also clear freed blob pages - maria_check -di now prints more information about record page utilization Optimizations: - Use pagecache_unlock_by_link() instead of pagecache_write() if possible. (Avoids a memory copy and a find_block) - Simplify code to abort when we found optimal bit pattern - Skip also full head page bit patterns when searching for tail - Increase default repair buffer to 128M for maria_chk and maria_read_log - Increase default sort buffer for maria_chk to 64M - Increase size of sortbuffer and pagecache for mysqld to 64M - VARCHAR/CHAR fields are stored in increasing length order for BLOCK_RECORD tables Better reporting: - Fixed test of error condition for flush (for better error code) - More error messages to mysqld if Maria recovery fails - Always print warning if rows are deleted in repair - Added global function _db_force_flush() that is usable when doing debugging in gdb - Added call to my_debug_put_break_here() in case of some errors (for debugging) - Remove used testfiles in unittest as these was written in different directories depending on from where the test was started This should fix the bugs found when importing a big table with many varchars and one/many blobs to Maria dbug/dbug.c: Added global function _db_force_flush() that is usable when doing debugging in gdbine extra/replace.c: Fixed memory leak include/my_dbug.h: Prototype for _db_force_flush() include/my_global.h: Added stdarg.h as my_sys.h now depends on it. include/my_sys.h: Make my_dbug_put_break_here() a NOP if not DBUG build Added my_printv_error() include/myisamchk.h: Added entry 'lost' to be able to count space that is lost forever mysql-test/r/maria.result: Updated results mysql-test/t/maria.test: Reset autocommit after test New test to check if delete_all_rows is used (verified with --debug) mysys/my_error.c: Added my_printv_error() scripts/mysql_fix_privilege_tables.sh: First use binaries and scripts from source distribution, then in installed distribution This ensures that a development branch doesn't pick up wrong scripts) sql/mysqld.cc: Fix that one can break maria recovery with ^C when debugging sql/sql_class.cc: Removed #ifdef that has no effect (The preceeding DBUG_ASSERT() ensures that the following code will not be exectued) storage/maria/ha_maria.cc: Increase size of sortbuffer and pagecache to 64M Fix that optimize works for Maria tables Fixed DBUG_ASSERT() when enable_indexes failes for end_bulk_insert() If not in autocommit mode, delete rows one by one so that we can roll back if necessary Fixed variable comments storage/maria/ma_bitmap.c: More ASSERTS to detect overwrite of bitmap pages bitmap->used_size was not correctly set, which caused bitmap pages to be overwritten Ensure we reserve place for directory entry when calculation place for head and tail pages bitmap->size[0] should not include space for directory entry Simplify code to abort when we found optimal bit pattern Skip also full head page bit patterns when searching for tail (should speed up some common cases) Fixed bug in allocate_tail() when block->used was not aligned on 6 bytes Fixed wrong assert in flush_log_for_bitmap Fixed bug in _ma_bitmap_release_unused() where tail blocks could be wrongly reset storage/maria/ma_blockrec.c: Ensure my_errno is set on return Fixed not optimal setting of row->min_length if we don't have variable length fields Use pagecache_unlock_by_link() instead of pagecache_write() if possible. (Avoids a memory copy and a find_block) Added DBUG_ASSERT() if we read or write wrong VARCHAR data Added DBUG_ASSERT() to find out if row sizes are calculated wrong Fixed bug in logging of rows with more than one big blob storage/maria/ma_check.c: Disable logging while normal repair is done to avoid logging of index changes Fixed bug that caused CHECKSUM part of key page to be used Fixed that deleted of wrong records also works for BLOCK_RECORD Clear unallocated pages: - BLOB pages are not automaticly cleared on delete, so we need to use the bitmap to know if page is used or not Better error reporting More information about record page utilization Change printing of file position to printing of pages to make output more readable Always print warning if rows are deleted storage/maria/ma_create.c: Calculate share.base_max_pack_length more accurately for BLOCK_RECORD pages (for future) Fixed that FIELD_SKIP_PRESPACE is recorded as FIELD_NORMAL; Fixed bug where fields could be used in wrong order Store FIELD_SKIP_ZERO fields before CHAR and VARCHAR fields (optimization) Store other fields in length order (to get better utilization of head block) storage/maria/ma_delete.c: Ensure my_errno is set on return storage/maria/ma_dynrec.c: Indentation fix storage/maria/ma_locking.c: Set changed if open_count is counted down. (To avoid getting error "client is using or hasn't closed the table properly" with transactional tables storage/maria/ma_loghandler.c: Fixed problem with advancing log horizon pointer within one page bounds (Patch from Sanja) Added more DBUG Indentation fixes storage/maria/ma_open.c: Removed wrong casts storage/maria/ma_page.c: Fixed usage of PAGECACHE_LOCK_WRITE_UNLOCK with _ma_new() Mark new pages as changed (Required to get repair() to work) storage/maria/ma_pagecache.c: Fixed test of error condition for flush Fixed problem when using PAGECACHE_LOCK_WRITE_TO_READ with unlock() Added call to my_debug_put_break_here() in case of errors (for debugging) storage/maria/ma_pagecrc.c: Ensure we get same crc for 32 and 64 bit systems by forcing argument to maria_page_crc to uint32 storage/maria/ma_recovery.c: Call my_printv_error() from eprint() to get critical errors to mysqld log Removed \n from error strings to eprint() to get nicer output in mysqld Added simple test in _ma_reenable_logging_for_table() to not do any work if not needed storage/maria/ma_update.c: Ensure my_errno is set on return storage/maria/ma_write.c: Ensure my_errno is set on return storage/maria/maria_chk.c: Use DEBUGGER_OFF if --debug is not use (to get slightly faster execution for debug binaries) Added option --skip-safemalloc Don't write exponents for rec/key storage/maria/maria_def.h: Increase default repair buffer to 128M for maria_chk and maria_read_log Increase default sort buffer for maria_chk to 64M storage/maria/unittest/Makefile.am: Don't update files automaticly from bitkeeper storage/maria/unittest/ma_pagecache_consist.c: Remove testfile at end storage/maria/unittest/ma_pagecache_single.c: Remove testfile at end storage/maria/unittest/ma_test_all-t: More tests Safer checking if test caused error --- mysys/my_error.c | 28 ++++++++++++++++++++++++++-- 1 file changed, 26 insertions(+), 2 deletions(-) (limited to 'mysys') diff --git a/mysys/my_error.c b/mysys/my_error.c index d26c3d8cfde..81abbde96df 100644 --- a/mysys/my_error.c +++ b/mysys/my_error.c @@ -114,15 +114,39 @@ int my_printf_error(uint error, const char *format, myf MyFlags, ...) va_list args; char ebuff[ERRMSGSIZE+20]; DBUG_ENTER("my_printf_error"); - DBUG_PRINT("my", ("nr: %d MyFlags: %d errno: %d Format: %s", + DBUG_PRINT("my", ("nr: %d MyFlags: %d errno: %d format: %s", error, MyFlags, errno, format)); va_start(args,MyFlags); - (void) my_vsnprintf (ebuff, sizeof(ebuff), format, args); + (void) my_vsnprintf(ebuff, sizeof(ebuff), format, args); va_end(args); DBUG_RETURN((*error_handler_hook)(error, ebuff, MyFlags)); } + +/* + Error with va_list + + SYNOPSIS + my_printv_error() + error Errno + format Format string + MyFlags Flags + ... variable list +*/ + +int my_printv_error(uint error, const char *format, myf MyFlags, va_list ap) +{ + char ebuff[ERRMSGSIZE+20]; + DBUG_ENTER("my_printv_error"); + DBUG_PRINT("my", ("nr: %d MyFlags: %d errno: %d format: %s", + error, MyFlags, errno, format)); + + (void) my_vsnprintf(ebuff, sizeof(ebuff), format, ap); + DBUG_RETURN((*error_handler_hook)(error, ebuff, MyFlags)); +} + + /* Give message using error_handler_hook -- cgit v1.2.1 From ce8de7afdffd46507f000048a17722f81eec7688 Mon Sep 17 00:00:00 2001 From: unknown Date: Thu, 10 Jan 2008 13:21:53 +0100 Subject: Windows fixes -new option WITH_MARIA_STORAGE_ENGINE for config.js -correct build errors -build test executables -downport changes for atomic functions from 5.2 -remove LOCK_uuid_generator from C++ files to avoid linker errors -new function my_uuid2str() BitKeeper/deleted/.del-x86-msvc.h: Delete: include/atomic/x86-msvc.h CMakeLists.txt: Windows fixes: -New option WITH_MARIA_STORAGE_ENGINE -Add unit tests include/Makefile.am: replace x86-msvc.h with generic-msvc.h include/config-win.h: my_chmod() support include/my_atomic.h: Downport my_atomic from 5.2 tree include/my_bit.h: Correct unresolved symbol errors on Windows include/my_pthread.h: pthread_mutex_unlock now returns 0 (was void previously) defined PTHREAD_STACK_MIN include/my_sys.h: New function my_uuid2str() define MY_UUID_STRING_LENGTH include/atomic/nolock.h: Downport my_atomic from 5.2 tree libmysqld/CMakeLists.txt: New option WITH_MARIA_STORAGE_ENGINE mysys/CMakeLists.txt: Add missing files mysys/lf_dynarray.c: Fix compiler errors on Windows mysys/my_getncpus.c: Windows port mysys/my_uuid.c: Windows fixes: there is no random() on Windows, use ANSI rand() New function my_uuid2str() mysys/my_winthread.c: Downport from 5.2 tree -Call my_thread_end() before pthread_exit() -Avoid crash if pthread_create is called with NULL attributes sql/CMakeLists.txt: Link mysqld with Maria storage engine sql/item_func.cc: Remove LOCK_uuid_generator from C++ to avoid linker errors. Use dedicated mutex for short uuids sql/item_strfunc.cc: Use my_uuid() and my_uuid2str() functions from mysys. sql/item_strfunc.h: Define MY_UUID_STRING_LENGTH in my_sys.h sql/mysql_priv.h: LOCK_uuid_generator must be declared as extern "C" sql/mysqld.cc: Init and destroy LOCK_uuid_short mutex storage/maria/CMakeLists.txt: -Use the same source files as in Makefile.am -Build test binaries storage/maria/ha_maria.cc: snprintf->my_snprintf storage/maria/lockman.c: Fix compiler error on Windows storage/maria/ma_check.c: Fix compiler error on Windows storage/maria/ma_loghandler.c: Fix compile errors my_open()/my_sync() do not work for directories on Windows storage/maria/ma_recovery.c: Fix compile error on Windows storage/maria/ma_test2.c: Rename variable to avoid naming conflict with Microsoft C runtime function storage/maria/ma_test3.c: Fix build errors on Windows storage/maria/tablockman.c: Fix build errors on Windows storage/maria/unittest/Makefile.am: Add CMakeLists.txt storage/maria/unittest/ma_pagecache_consist.c: Fix build errors on Windows remove loop from get_len() storage/maria/unittest/ma_pagecache_single.c: Fix build errors on Windows storage/maria/unittest/ma_test_loghandler-t.c: Windows fixes -Avoid division by 0 in expressions like x/(RAND_MAX/y), where y is larger than RAND_MAX(==0x7fff on Windows) storage/maria/unittest/ma_test_loghandler_multigroup-t.c: Windows fixes -Avoid division by 0 in expressions like x/(RAND_MAX/y), where y is larger than RAND_MAX(==0x7fff on Windows) -remove loop in get_len() storage/maria/unittest/ma_test_loghandler_multithread-t.c: Windows fixes -Avoid division by 0 in expressions like x/(RAND_MAX/y), where y is larger than RAND_MAX(==0x7fff on Windows) -remove loop in get_len() storage/maria/unittest/ma_test_loghandler_noflush-t.c: Fix build errors on Windows storage/maria/unittest/test_file.c: Correct the code to get file size on Windows. stat() information can be outdated and thus cannot be trusted. On Vista,stat() returns file size=0 until the file is closed at the first time. storage/myisam/CMakeLists.txt: Fix compiler errors on Windows Build test executables storage/myisam/mi_test2.c: Rename variable to avoid naming conflict with Microsoft C runtime function storage/myisam/mi_test3.c: Fix build errors on Windows strings/CMakeLists.txt: Add missing file unittest/unit.pl: Windows: downport unittest changes from 5.2 bk tree unittest/mysys/Makefile.am: Windows: downport unittest changes from 5.2 bk tree unittest/mysys/my_atomic-t.c: Windows: downport unittest changes from 5.2 bk tree unittest/mytap/Makefile.am: Windows: downport unittest changes from 5.2 bk tree unittest/mytap/tap.c: Windows: downport unittest changes from 5.2 bk tree win/configure.js: Add WITH_MARIA_STORAGE_ENGINE configure option unittest/mytap/CMakeLists.txt: Add missing file unittest/mysys/CMakeLists.txt: Add missing file storage/maria/unittest/CMakeLists.txt: Add missing file BitKeeper/etc/ignore: Added comments maria-win.patch to the ignore list include/atomic/generic-msvc.h: Implement atomic operations with MSVC intrinsics --- mysys/CMakeLists.txt | 8 ++++++-- mysys/lf_dynarray.c | 8 ++++---- mysys/my_getncpus.c | 24 +++++++++++++----------- mysys/my_uuid.c | 32 +++++++++++++++++++++++++++----- mysys/my_winthread.c | 22 ++++++++++++++++------ 5 files changed, 66 insertions(+), 28 deletions(-) (limited to 'mysys') diff --git a/mysys/CMakeLists.txt b/mysys/CMakeLists.txt index 5b024056fda..dffce464d8b 100755 --- a/mysys/CMakeLists.txt +++ b/mysys/CMakeLists.txt @@ -31,7 +31,7 @@ SET(MYSYS_SOURCES array.c charset-def.c charset.c checksum.c default.c default_ mf_keycaches.c mf_loadpath.c mf_pack.c mf_path.c mf_qsort.c mf_qsort2.c mf_radix.c mf_same.c mf_sort.c mf_soundex.c mf_strip.c mf_arr_appstr.c mf_tempdir.c mf_tempfile.c mf_unixpath.c mf_wcomp.c mf_wfile.c mulalloc.c my_access.c - my_aes.c my_alarm.c my_alloc.c my_append.c my_bit.c my_bitmap.c my_chsize.c + my_aes.c my_alarm.c my_alloc.c my_append.c my_bit.c my_bitmap.c my_chmod.c my_chsize.c my_clock.c my_compress.c my_conio.c my_copy.c my_crc32.c my_create.c my_delete.c my_div.c my_error.c my_file.c my_fopen.c my_fstream.c my_gethostbyname.c my_gethwaddr.c my_getopt.c my_getsystime.c my_getwd.c my_handler.c my_init.c @@ -41,7 +41,11 @@ SET(MYSYS_SOURCES array.c charset-def.c charset.c checksum.c default.c default_ my_static.c my_symlink.c my_symlink2.c my_sync.c my_thr_init.c my_wincond.c my_windac.c my_winthread.c my_write.c ptr_cmp.c queues.c rijndael.c safemalloc.c sha1.c string.c thr_alarm.c thr_lock.c thr_mutex.c - thr_rwlock.c tree.c typelib.c my_vle.c base64.c my_memmem.c my_getpagesize.c) + thr_rwlock.c tree.c typelib.c my_vle.c base64.c my_memmem.c my_getpagesize.c + lf_alloc-pin.c lf_dynarray.c lf_hash.c + my_atomic.c my_getncpus.c my_rnd.c + my_uuid.c wqueue.c +) IF(NOT SOURCE_SUBLIBS) ADD_LIBRARY(mysys ${MYSYS_SOURCES}) diff --git a/mysys/lf_dynarray.c b/mysys/lf_dynarray.c index 770b1f9342b..7c8f54f07cf 100644 --- a/mysys/lf_dynarray.c +++ b/mysys/lf_dynarray.c @@ -37,7 +37,7 @@ */ #include -#include +#include #include #include @@ -123,7 +123,7 @@ void *_lf_dynarray_lvalue(LF_DYNARRAY *array, uint idx) } if (!(ptr= *ptr_ptr)) { - void *alloc, *data; + uchar *alloc, *data; alloc= my_malloc(LF_DYNARRAY_LEVEL_LENGTH * array->size_of_element + max(array->size_of_element, sizeof(void *)), MYF(MY_WME|MY_ZEROFILL)); @@ -142,7 +142,7 @@ void *_lf_dynarray_lvalue(LF_DYNARRAY *array, uint idx) else my_free(alloc, MYF(0)); } - return ptr + array->size_of_element * idx; + return ((uchar*)ptr) + array->size_of_element * idx; } /* @@ -167,7 +167,7 @@ void *_lf_dynarray_value(LF_DYNARRAY *array, uint idx) } if (!(ptr= *ptr_ptr)) return(NULL); - return ptr + array->size_of_element * idx; + return ((uchar*)ptr) + array->size_of_element * idx; } static int recursive_iterate(LF_DYNARRAY *array, void *ptr, int level, diff --git a/mysys/my_getncpus.c b/mysys/my_getncpus.c index 82e87dee2e4..4cb96ac0bca 100644 --- a/mysys/my_getncpus.c +++ b/mysys/my_getncpus.c @@ -16,24 +16,26 @@ /* get the number of (online) CPUs */ #include "mysys_priv.h" +#ifdef HAVE_UNISTD_H #include +#endif static int ncpus=0; -#ifdef _SC_NPROCESSORS_ONLN int my_getncpus() { if (!ncpus) + { +#ifdef _SC_NPROCESSORS_ONLN ncpus= sysconf(_SC_NPROCESSORS_ONLN); - return ncpus; -} - +#elif defined(__WIN__) + SYSTEM_INFO sysinfo; + GetSystemInfo(&sysinfo); + ncpus= sysinfo.dwNumberOfProcessors; #else -/* unknown */ -int my_getncpus() -{ - return 2; -} - +/* unknown so play safe: assume SMP and forbid uniprocessor build */ + ncpus= 2; #endif - + } + return ncpus; +} diff --git a/mysys/my_uuid.c b/mysys/my_uuid.c index d97aaf604fa..1ebc394cafa 100644 --- a/mysys/my_uuid.c +++ b/mysys/my_uuid.c @@ -41,6 +41,7 @@ #include "mysys_priv.h" #include +#include /* mi_int2store, mi_int4store */ static my_bool my_uuid_inited= 0; static struct my_rnd_struct uuid_rand; @@ -67,7 +68,7 @@ pthread_mutex_t LOCK_uuid_generator; static void set_clock_seq() { uint16 clock_seq= ((uint)(my_rnd(&uuid_rand)*16383)) | UUID_VARIANT; - int2store(uuid_suffix, clock_seq); + mi_int2store(uuid_suffix, clock_seq); } @@ -105,7 +106,7 @@ void my_uuid_init(ulong seed1, ulong seed2) randominit() here. */ /* purecov: begin inspected */ - my_rnd_init(&uuid_rand, (ulong) (seed2+ now/2), now+random()); + my_rnd_init(&uuid_rand, (ulong) (seed2+ now/2), now+rand()); for (i=0; i < sizeof(mac); i++) mac[i]= (uchar)(my_rnd(&uuid_rand)*255); /* purecov: end */ @@ -160,14 +161,35 @@ void my_uuid(uchar *to) Note, that the standard does NOT specify byte ordering in multi-byte fields. it's implementation defined (but must be the same for all fields). + We use big-endian, so we can use memcmp() to compare UUIDs + and for straightforward UUID to string conversion. */ - int4store(to, time_low); - int2store(to+4, time_mid); - int2store(to+6, time_hi_and_version); + mi_int4store(to, time_low); + mi_int2store(to+4, time_mid); + mi_int2store(to+6, time_hi_and_version); bmove(to+8, uuid_suffix, sizeof(uuid_suffix)); } +/** + Convert uuid to string representation + + @func my_uuid2str() + @param guid uuid + @param s Output buffer.Must be at least MY_UUID_STRING_LENGTH+1 large. +*/ +void my_uuid2str(const uchar *guid, char *s) +{ + int i; + for (i=0; i < MY_UUID_SIZE; i++) + { + *s++= _dig_vec_lower[guid[i] >>4]; + *s++= _dig_vec_lower[guid[i] & 15]; + if(i == 4 || i == 6 || i == 8 || i == 10) + *s++= '-'; + } +} + void my_uuid_end() { if (my_uuid_inited) diff --git a/mysys/my_winthread.c b/mysys/my_winthread.c index e94369bec32..0af6a47ec4a 100644 --- a/mysys/my_winthread.c +++ b/mysys/my_winthread.c @@ -77,12 +77,15 @@ pthread_handler_t pthread_start(void *param) { pthread_handler func=((struct pthread_map *) param)->func; void *func_param=((struct pthread_map *) param)->param; + void *result; my_thread_init(); /* Will always succeed in windows */ pthread_mutex_lock(&THR_LOCK_thread); /* Wait for beginthread to return */ win_pthread_self=((struct pthread_map *) param)->pthreadself; pthread_mutex_unlock(&THR_LOCK_thread); free((char*) param); /* Free param from create */ - pthread_exit((void*) (*func)(func_param)); + result= (void*) (*func)(func_param); + my_thread_end(); + pthread_exit(result); return 0; /* Safety */ } @@ -92,21 +95,28 @@ int pthread_create(pthread_t *thread_id, pthread_attr_t *attr, { HANDLE hThread; struct pthread_map *map; + DWORD StackSize= 0; + int priority= 0; DBUG_ENTER("pthread_create"); if (!(map=malloc(sizeof(*map)))) DBUG_RETURN(-1); map->func=func; map->param=param; + if (attr != NULL) + { + StackSize= attr->dwStackSize; + priority= attr->priority; + } + if (StackSize == 0) + StackSize= PTHREAD_STACK_MIN; pthread_mutex_lock(&THR_LOCK_thread); #ifdef __BORLANDC__ hThread=(HANDLE)_beginthread((void(_USERENTRY *)(void *)) pthread_start, - attr->dwStackSize ? attr->dwStackSize : - 65535, (void*) map); + StackSize, (void*) map); #else hThread=(HANDLE)_beginthread((void( __cdecl *)(void *)) pthread_start, - attr->dwStackSize ? attr->dwStackSize : - 65535, (void*) map); + StackSize, (void*) map); #endif DBUG_PRINT("info", ("hThread=%lu",(long) hThread)); *thread_id=map->pthreadself=hThread; @@ -119,7 +129,7 @@ int pthread_create(pthread_t *thread_id, pthread_attr_t *attr, ("Can't create thread to handle request (error %d)",error)); DBUG_RETURN(error ? error : -1); } - VOID(SetThreadPriority(hThread, attr->priority)) ; + VOID(SetThreadPriority(hThread, priority)) ; DBUG_RETURN(0); } -- cgit v1.2.1 From 266fde77b283237fa2dd6db0f97fb68289fe0c21 Mon Sep 17 00:00:00 2001 From: unknown Date: Fri, 11 Jan 2008 01:47:52 +0200 Subject: Added --loose-skip-maria to MYSQLD_BOOTSTRAP_CMD to get bootstrap.test to work Allow one to run bootstrap even if --skip-maria is used (needed for bootstrap.test) Fixed lots of compiler warnings NOTE: maria-big and maria-recover tests failes becasue of bugs in transaction log handling. Sanja knows about this and is working on it! mysql-test/mysql-test-run.pl: Added --loose-skip-maria to MYSQLD_BOOTSTRAP_CMD to get bootstrap.test to work mysql-test/r/maria-recovery.result: Updated results mysql-test/t/bootstrap.test: Removed not needed empty line mysql-test/t/change_user.test: Fixed results for 32 bit systems mysql-test/t/maria-big.test: Only run this when you use --big mysql-test/t/maria-recovery.test: Added test case for recovery with big blobs mysys/my_uuid.c: Fixed compiler warning sql/mysqld.cc: Allow one to run bootstrap even if --skip-maria is used (needed for bootstrap.test) sql/set_var.cc: Compare max_join_size with ULONG_MAX instead of HA_POS_ERROR as we set max_join_size to ULONG_MAX by default storage/maria/ma_bitmap.c: Added __attribute((unused)) to fix compiler warning storage/maria/ma_blockrec.c: Added casts to remove compiler warnings Change variable types to avoid compiler warnings storage/maria/ma_check.c: Added casts to remove compiler warnings storage/maria/ma_checkpoint.c: Change variable types to avoid compiler warnings storage/maria/ma_create.c: Change variable types to avoid compiler warnings storage/maria/ma_delete.c: Added casts to remove compiler warnings storage/maria/ma_key_recover.c: Added casts to remove compiler warnings storage/maria/ma_loghandler.c: Moved initiazation of prev_buffer first as this could otherwise not be set in case of errors storage/maria/ma_page.c: Added casts to remove compiler warnings storage/maria/ma_pagecache.c: Added __attribute((unused)) to fix compiler warning storage/maria/ma_pagecrc.c: Added #ifndef DBUG_OFF to remove compiler warning storage/maria/ma_recovery.c: Added casts to remove compiler warnings storage/maria/ma_write.c: Added casts to remove compiler warnings storage/maria/maria_chk.c: Split long string into two to avoid compiler warnings storage/myisam/ft_boolean_search.c: Added LINT_INIT() to remove compiler warning support-files/compiler_warnings.supp: Suppress wrong compiler warning unittest/mytap/tap.c: Fixed declaration to match prototypes to remove compiler warnings --- mysys/my_uuid.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'mysys') diff --git a/mysys/my_uuid.c b/mysys/my_uuid.c index 1ebc394cafa..d6a4946954a 100644 --- a/mysys/my_uuid.c +++ b/mysys/my_uuid.c @@ -106,7 +106,7 @@ void my_uuid_init(ulong seed1, ulong seed2) randominit() here. */ /* purecov: begin inspected */ - my_rnd_init(&uuid_rand, (ulong) (seed2+ now/2), now+rand()); + my_rnd_init(&uuid_rand, (ulong) (seed2+ now/2), (ulong) (now+rand())); for (i=0; i < sizeof(mac); i++) mac[i]= (uchar)(my_rnd(&uuid_rand)*255); /* purecov: end */ -- cgit v1.2.1 From 978f3e3c3b2646a1f5328dffdb342c739a80b169 Mon Sep 17 00:00:00 2001 From: unknown Date: Sat, 12 Jan 2008 13:16:55 +0200 Subject: Fixes to bugs and compiler warnings detected by pushbuild Don't compare uuid's on maria_open() if table is not transactional Fixed that my_microtime() works on windows (patch added from 5.1) mysql-test/r/create.result: Portability fix mysql-test/t/create.test: Portability fix mysql-test/t/maria.test: Remove warnings, to make test portable to windows mysys/my_getsystime.c: Add missed merge from 5.1 This fixes that timing works on windows and fixes that that failed becasuse slow_query_log didn't get any queries storage/maria/ma_check.c: Removed not needed setting of variable storage/maria/ma_loghandler.c: Fixed compiler warnings storage/maria/ma_open.c: Don't compare uuid's if table is not transactional KNOWN_BUGS.txt: New BitKeeper file ``KNOWN_BUGS.txt'' --- mysys/my_getsystime.c | 46 +++++++++++++++++++++++++++------------------- 1 file changed, 27 insertions(+), 19 deletions(-) (limited to 'mysys') diff --git a/mysys/my_getsystime.c b/mysys/my_getsystime.c index 43bb6c08af9..9ccbd3433be 100644 --- a/mysys/my_getsystime.c +++ b/mysys/my_getsystime.c @@ -35,9 +35,9 @@ ulonglong my_getsystime() if (query_performance_frequency) { QueryPerformanceCounter(&t_cnt); - return (t_cnt.QuadPart / query_performance_frequency * 10000000+ - t_cnt.QuadPart % query_performance_frequency * 10000000/ - query_performance_frequency+query_performance_offset); + return ((t_cnt.QuadPart / query_performance_frequency * 10000000) + + (t_cnt.QuadPart % query_performance_frequency * 10000000 / + query_performance_frequency) + query_performance_offset); } return 0; #elif defined(__NETWARE__) @@ -108,16 +108,20 @@ ulonglong my_micro_time() if (query_performance_frequency) { QueryPerformanceCounter((LARGE_INTEGER*) &newtime); - newtime/= (query_performance_frequency * 1000000); + newtime= ((new_time / query_performance_frequency * 10000000) + + (new_time % query_performance_frequency * 10000000 / + query_performance_frequency)); } else - newtime= (GetTickCount() * 1000); /* GetTickCount only returns milliseconds */ + newtime= (GetTickCount() * 1000); /* GetTickCount only returns millisec. */ return newtime; #elif defined(HAVE_GETHRTIME) return gethrtime()/1000; #else struct timeval t; - /* The following loop is here because gettimeofday may fail on some systems */ + /* + The following loop is here because gettimeofday may fail on some systems + */ while (gettimeofday(&t, NULL) != 0) {} newtime= (ulonglong)t.tv_sec * 1000000 + t.tv_usec; @@ -131,18 +135,18 @@ ulonglong my_micro_time() SYNOPSIS my_micro_time_and_time() - time_arg Will be set to seconds since epoch (00:00:00 UTC, January 1, - 1970) + time_arg Will be set to seconds since epoch (00:00:00 UTC, + January 1, 1970) NOTES This function is to be useful when we need both the time and microtime. - For example in MySQL this is used to get the query time start of a query and - to measure the time of a query (for the slow query log) + For example in MySQL this is used to get the query time start of a query + and to measure the time of a query (for the slow query log) IMPLEMENTATION Value of time is as in time() call. - Value of microtime is same as my_micro_time(), which may be totally unrealated - to time() + Value of microtime is same as my_micro_time(), which may be totally + unrealated to time() RETURN Value in microseconds from some undefined point in time @@ -157,16 +161,18 @@ ulonglong my_micro_time_and_time(time_t *time_arg) if (query_performance_frequency) { QueryPerformanceCounter((LARGE_INTEGER*) &newtime); - newtime/= (query_performance_frequency * 1000000); + newtime= ((new_time / query_performance_frequency * 10000000) + + (new_time % query_performance_frequency * 10000000 / + query_performance_frequency)); } else - newtime= (GetTickCount() * 1000); /* GetTickCount only returns milliseconds */ + newtime= (GetTickCount() * 1000); /* GetTickCount only returns millisec. */ (void) time(time_arg); return newtime; #elif defined(HAVE_GETHRTIME) /* - Solaris has a very slow time() call. We optimize this by using the very fast - gethrtime() call and only calling time() every 1/2 second + Solaris has a very slow time() call. We optimize this by using the very + fast gethrtime() call and only calling time() every 1/2 second */ static hrtime_t prev_gethrtime= 0; static time_t cur_time= 0; @@ -184,7 +190,9 @@ ulonglong my_micro_time_and_time(time_t *time_arg) return cur_gethrtime/1000; #else struct timeval t; - /* The following loop is here because gettimeofday may fail on some systems */ + /* + The following loop is here because gettimeofday may fail on some systems + */ while (gettimeofday(&t, NULL) != 0) {} *time_arg= t.tv_sec; @@ -203,8 +211,8 @@ ulonglong my_micro_time_and_time(time_t *time_arg) NOTES This function returns the current time. The microtime argument is only used - if my_micro_time() uses a function that can safely be converted to the current - time. + if my_micro_time() uses a function that can safely be converted to the + current time. RETURN current time -- cgit v1.2.1 From 89ff50d4296e849cec70799f7bda8c6217601867 Mon Sep 17 00:00:00 2001 From: unknown Date: Sun, 13 Jan 2008 00:30:38 +0200 Subject: Fixed compiler warnings Fixed type that caused windows builds to fail include/my_alloc.h: Use size_t for memory areas instead of uint mysys/mf_iocache.c: Fixed compiler warnings by adding casts mysys/my_compress.c: Fixed compiler warnings by adding casts mysys/my_getsystime.c: Fixed typo mysys/my_static.h: Use size_t for memory areas mysys/safemalloc.c: Use size_t for memory areas storage/maria/ma_bitmap.c: Fixed compiler warnings by adding casts storage/maria/ma_blockrec.c: Fixed compiler warnings by adding casts storage/maria/ma_cache.c: Use size_t for memory areas storage/maria/ma_info.c: Use size_t for memory areas storage/maria/ma_key.c: Fixed compiler warnings by adding casts storage/maria/ma_locking.c: Fixed compiler warnings by adding casts storage/maria/ma_open.c: Fixed compiler warnings by adding casts storage/maria/ma_packrec.c: Fixed compiler warnings by fixing type for variable storage/maria/ma_statrec.c: Fixed compiler warnings by adding casts storage/maria/ma_write.c: Fixed compiler warnings by adding casts storage/maria/maria_def.h: Use size_t for memory areas storage/myisam/mi_search.c: Fixed compiler warnings by adding casts --- mysys/mf_iocache.c | 8 ++++---- mysys/my_compress.c | 4 ++-- mysys/my_getsystime.c | 9 ++++----- mysys/my_static.h | 2 +- mysys/safemalloc.c | 15 +++++++++------ 5 files changed, 20 insertions(+), 18 deletions(-) (limited to 'mysys') diff --git a/mysys/mf_iocache.c b/mysys/mf_iocache.c index 1124ebceb2c..6d63f8b8bf5 100644 --- a/mysys/mf_iocache.c +++ b/mysys/mf_iocache.c @@ -527,7 +527,7 @@ int _my_b_read(register IO_CACHE *info, uchar *Buffer, size_t Count) { if (Count) { - info->error= left_length; /* We only got this many char */ + info->error= (int) left_length; /* We only got this many char */ DBUG_RETURN(1); } length=0; /* Didn't read any chars */ @@ -1255,7 +1255,7 @@ read_append_buffer: info->append_read_pos += copy_len; Count -= copy_len; if (Count) - info->error = save_count - Count; + info->error= (int) (save_count - Count); /* Fill read buffer with data from write buffer */ memcpy(info->buffer, info->append_read_pos, @@ -1644,8 +1644,8 @@ int my_block_write(register IO_CACHE *info, const uchar *Buffer, size_t Count, { /* Of no overlap, write everything without buffering */ if (pos + Count <= info->pos_in_file) - return my_pwrite(info->file, Buffer, Count, pos, - info->myflags | MY_NABP); + return (int) my_pwrite(info->file, Buffer, Count, pos, + info->myflags | MY_NABP); /* Write the part of the block that is before buffer */ length= (uint) (info->pos_in_file - pos); if (my_pwrite(info->file, Buffer, length, pos, info->myflags | MY_NABP)) diff --git a/mysys/my_compress.c b/mysys/my_compress.c index ab17b10e72c..70d2960f48e 100644 --- a/mysys/my_compress.c +++ b/mysys/my_compress.c @@ -67,7 +67,7 @@ uchar *my_compress_alloc(const uchar *packet, size_t *len, size_t *complen) if (!(compbuf= (uchar *) my_malloc(*complen, MYF(MY_WME)))) return 0; /* Not enough memory */ - tmp_complen= *complen; + tmp_complen= (uLongf) *complen; res= compress((Bytef*) compbuf, &tmp_complen, (Bytef*) packet, (uLong) *len); *complen= tmp_complen; @@ -118,7 +118,7 @@ my_bool my_uncompress(uchar *packet, size_t len, size_t *complen) if (!compbuf) DBUG_RETURN(1); /* Not enough memory */ - tmp_complen= *complen; + tmp_complen= (uLongf) *complen; error= uncompress((Bytef*) compbuf, &tmp_complen, (Bytef*) packet, (uLong) len); *complen= tmp_complen; diff --git a/mysys/my_getsystime.c b/mysys/my_getsystime.c index 9ccbd3433be..57167711074 100644 --- a/mysys/my_getsystime.c +++ b/mysys/my_getsystime.c @@ -108,8 +108,8 @@ ulonglong my_micro_time() if (query_performance_frequency) { QueryPerformanceCounter((LARGE_INTEGER*) &newtime); - newtime= ((new_time / query_performance_frequency * 10000000) + - (new_time % query_performance_frequency * 10000000 / + newtime= ((newtime / query_performance_frequency * 10000000) + + (newtime % query_performance_frequency * 10000000 / query_performance_frequency)); } else @@ -161,8 +161,8 @@ ulonglong my_micro_time_and_time(time_t *time_arg) if (query_performance_frequency) { QueryPerformanceCounter((LARGE_INTEGER*) &newtime); - newtime= ((new_time / query_performance_frequency * 10000000) + - (new_time % query_performance_frequency * 10000000 / + newtime= ((newtime / query_performance_frequency * 10000000) + + (newtime % query_performance_frequency * 10000000 / query_performance_frequency)); } else @@ -231,4 +231,3 @@ time_t my_time_possible_from_micro(ulonglong microtime __attribute__((unused))) return (time_t) (microtime / 1000000); #endif /* defined(__WIN__) */ } - diff --git a/mysys/my_static.h b/mysys/my_static.h index 0eca196c1c9..90168b099a8 100644 --- a/mysys/my_static.h +++ b/mysys/my_static.h @@ -44,8 +44,8 @@ struct st_irem struct st_irem *next; /* Linked list of structures */ struct st_irem *prev; /* Other link */ char *filename; /* File in which memory was new'ed */ + size_t datasize; /* Size requested */ uint32 linenum; /* Line number in above file */ - uint32 datasize; /* Size requested */ uint32 SpecialValue; /* Underrun marker value */ }; diff --git a/mysys/safemalloc.c b/mysys/safemalloc.c index 6c8a080fbf3..1ccfa213756 100644 --- a/mysys/safemalloc.c +++ b/mysys/safemalloc.c @@ -404,14 +404,16 @@ void TERMINATE(FILE *file, uint flag) if (file) { fprintf(file, - "\t%6u bytes at 0x%09lx, allocated at line %4u in '%s'", - irem->datasize, (long) data, irem->linenum, irem->filename); + "\t%6lu bytes at 0x%09lx, allocated at line %4u in '%s'", + (ulong) irem->datasize, (long) data, + irem->linenum, irem->filename); fprintf(file, "\n"); (void) fflush(file); } DBUG_PRINT("safe", - ("%6u bytes at 0x%09lx, allocated at line %4d in '%s'", - irem->datasize, (long) data, irem->linenum, irem->filename)); + ("%6lu bytes at 0x%09lx, allocated at line %4d in '%s'", + (ulong) irem->datasize, (long) data, + irem->linenum, irem->filename)); irem= irem->next; } } @@ -445,8 +447,9 @@ void sf_malloc_report_allocated(void *memory) sf_malloc_prehunc); if (data <= (char*) memory && (char*) memory <= data + irem->datasize) { - printf("%u bytes at 0x%lx, allocated at line %u in '%s'\n", - irem->datasize, (long) data, irem->linenum, irem->filename); + printf("%lu bytes at 0x%lx, allocated at line %u in '%s'\n", + (ulong) irem->datasize, (long) data, + irem->linenum, irem->filename); break; } } -- cgit v1.2.1 From 2fcff8988aee4f444ed30b3248a60b7ed357bd6c Mon Sep 17 00:00:00 2001 From: unknown Date: Tue, 29 Jan 2008 22:20:59 +0100 Subject: Fix for BUG#34114 "maria_chk reports false error when several tables on command-line" and BUG#34062 "Maria table corruption on master". Use 5 bytes (instead of 4) to store page's number in the checkpoint record, to allow bigger table (1PB with maria-block-size=1kB). Help pushbuild not run out of memory by moving the portion of maria-recovery.test which generates lots of data into a -big.test. mysql-test/r/maria-recovery.result: result moved mysql-test/t/maria-recovery.test: piece which generates much data moved to maria-recovery-big.test mysys/my_pread.c: To fix BUG#34062, where a 1.1TB file was generated due to a wrong pwrite offset, it was useful to not lose precision on 'offset' in DBUG_PRINT, so that the crazy value is visible. mysys/my_read.c: To fix BUG#34062, where a 1.1TB file was generated due to a wrong pwrite offset, it was useful to not lose precision on 'offset' in DBUG_PRINT, so that the crazy value is visible. mysys/my_write.c: To fix BUG#34062, where a 1.1TB file was generated due to a wrong pwrite offset, it was useful to not lose precision on 'offset' in DBUG_PRINT, so that the crazy value is visible. storage/maria/ha_maria.cc: When starting a bulk insert, we throw away dirty index pages from the cache. Unique (non disabled) key insertions thus read out-of-date pages from the disk leading to BUG#34062 "Maria table corruption on master": a DELETE in procedure viewer_sp() had deleted all rows of viewer_tbl2 one by one, putting index page 1 into key_del; that page was thrown away at start of INSERT SELECT, then the INSERT SELECT needed a page to insert keys, looked at key_del, found 1, read page 1 from disk, and its out-of-date content was used to set the new value of key_del (crazy value of 1TB), then a later insertion needed another index page, tried to read page at this crazy offset and failed, leading to corruption mark. The fix is to destroy out-of-date pages and make the state consistent with that, i.e. call maria_delete_all_rows(). storage/maria/ma_blockrec.c: Special hook for UNDO_BULK_INSERT storage/maria/ma_blockrec.h: special hook for UNDO_BULK_INSERT storage/maria/ma_check.c: Fix for BUG#34114 "maria_chk reports false error when several tables on command-line": if the Nth (on the command line) table was BLOCK_RECORD it would start checks by using the param->record_checksum computed by checks of table N-1. storage/maria/ma_delete_all.c: comment storage/maria/ma_loghandler.c: special hook for UNDO_BULK_INSERT storage/maria/ma_page.c: comment storage/maria/ma_pagecache.c: page number is 5 bytes in checkpoint record now (allows bigger tables) storage/maria/ma_recovery.c: page number is 5 bytes in checkpoint record now storage/maria/ma_recovery_util.c: page number is 5 bytes now storage/maria/ma_write.c: typo mysql-test/r/maria-recovery-big.result: result is correct mysql-test/t/maria-recovery-big-master.opt: usual options for recovery tests mysql-test/t/maria-recovery-big.test: Moving out the big blob test to a -big test (it exhausts memory when using /dev/shm on certain machines) --- mysys/my_pread.c | 19 +++++++++++++------ mysys/my_read.c | 8 ++++++-- mysys/my_write.c | 8 ++++++-- 3 files changed, 25 insertions(+), 10 deletions(-) (limited to 'mysys') diff --git a/mysys/my_pread.c b/mysys/my_pread.c index e0218cd1f1f..cfccc40a782 100644 --- a/mysys/my_pread.c +++ b/mysys/my_pread.c @@ -16,6 +16,7 @@ #include "mysys_priv.h" #include "mysys_err.h" #include "my_base.h" +#include #include #ifdef HAVE_PREAD #include @@ -47,10 +48,13 @@ size_t my_pread(File Filedes, uchar *Buffer, size_t Count, my_off_t offset, { size_t readbytes; int error= 0; +#ifndef DBUG_OFF + char llbuf1[22], llbuf2[22]; DBUG_ENTER("my_pread"); - DBUG_PRINT("my",("fd: %d Seek: %lu Buffer: 0x%lx Count: %u MyFlags: %d", - Filedes, (ulong) offset, (long) Buffer, (uint) Count, - MyFlags)); + DBUG_PRINT("my",("fd: %d Seek: %s Buffer: 0x%lx Count: %s MyFlags: %d", + Filedes, ullstr(offset, llbuf1), + (long) Buffer, ullstr(Count, llbuf2), MyFlags)); +#endif for (;;) { #ifndef __WIN__ @@ -127,10 +131,13 @@ size_t my_pwrite(int Filedes, const uchar *Buffer, size_t Count, { size_t writenbytes, written; uint errors; +#ifndef DBUG_OFF + char llbuf1[22], llbuf2[22]; DBUG_ENTER("my_pwrite"); - DBUG_PRINT("my",("fd: %d Seek: %lu Buffer: 0x%lx Count: %u MyFlags: %d", - Filedes, (ulong) offset, (long) Buffer, (uint) Count, - MyFlags)); + DBUG_PRINT("my",("fd: %d Seek: %s Buffer: 0x%lx Count: %s MyFlags: %d", + Filedes, ullstr(offset, llbuf1), + (long) Buffer, ullstr(Count, llbuf2), MyFlags)); +#endif errors= 0; written= 0; diff --git a/mysys/my_read.c b/mysys/my_read.c index 63f1d4fdebd..0d6c8d14416 100644 --- a/mysys/my_read.c +++ b/mysys/my_read.c @@ -16,6 +16,7 @@ #include "mysys_priv.h" #include "mysys_err.h" #include +#include #include /* @@ -36,9 +37,12 @@ size_t my_read(File Filedes, uchar *Buffer, size_t Count, myf MyFlags) { size_t readbytes, save_count; +#ifndef DBUG_OFF + char llbuf[22]; DBUG_ENTER("my_read"); - DBUG_PRINT("my",("fd: %d Buffer: 0x%lx Count: %lu MyFlags: %d", - Filedes, (long) Buffer, (ulong) Count, MyFlags)); + DBUG_PRINT("my",("fd: %d Buffer: 0x%lx Count: %s MyFlags: %d", + Filedes, (long) Buffer, ullstr(Count, llbuf), MyFlags)); +#endif save_count= Count; for (;;) diff --git a/mysys/my_write.c b/mysys/my_write.c index 7f8b85c241e..515ccb5fd37 100644 --- a/mysys/my_write.c +++ b/mysys/my_write.c @@ -15,6 +15,7 @@ #include "mysys_priv.h" #include "mysys_err.h" +#include #include @@ -24,9 +25,12 @@ size_t my_write(int Filedes, const uchar *Buffer, size_t Count, myf MyFlags) { size_t writenbytes, written; uint errors; +#ifndef DBUG_OFF + char llbuf[22]; DBUG_ENTER("my_write"); - DBUG_PRINT("my",("fd: %d Buffer: 0x%lx Count: %lu MyFlags: %d", - Filedes, (long) Buffer, (ulong) Count, MyFlags)); + DBUG_PRINT("my",("fd: %d Buffer: 0x%lx Count: %s MyFlags: %d", + Filedes, (long) Buffer, ullstr(Count, llbuf), MyFlags)); +#endif errors=0; written=0; /* The behavior of write(fd, buf, 0) is not portable */ -- cgit v1.2.1 From ab0fa111fef1afbd04156624cdb29781f08adcac Mon Sep 17 00:00:00 2001 From: unknown Date: Thu, 31 Jan 2008 03:06:04 +0200 Subject: Fixed bug in restoring auto-increment value in case of duplicate key with insert or update Fixed bug when calculating max_key_length that caused some ALTER TABLE to fail if MAX_ROWS was used. Use maria_block_size instead of MARIA_MIN_KEY_BLOCK_LENGTH Fixed bug when scanning table with BLOCK format for repair; If table was > bitmap coverage one page block was read twice which caused a lot of duplicate key errors Could not repeat Bug#34106 "auto_increment is reset to 1 when table is recovered from crash" after this patch. NOTE: This is an incompatible change, so one must do maria_chk -r on ones old Maria tables! Sorry, but this was needed to fix the bug with max_key_length and to be able to handle bigger key files with smaller key references cmd-line-utils/readline/readline.c: Fixed compiler warnings mysql-test/r/maria.result: Added more test of auto-increment handling mysql-test/t/maria.test: Added more test of auto-increment handling mysys/my_pread.c: Fixed wrong test Removed not needed tests (error is always 1 if readbytes != Count) mysys/my_read.c: Fixed wrong test storage/maria/ha_maria.cc: Disable LOAD INDEX until I got Sanja's extension to pagecache interface storage/maria/ma_blockrec.c: Ensure that info->last_auto_increment is reset properly storage/maria/ma_check.c: Fixed wrong printing of row number in case of duplicate key for --safe-repair Safety fix in recreate table so that Column numbers are given to maria_create() in original order Added missing HA_OPEN_FOR_REPAIR to maria_open() Fixed bug when scanning table with BLOCK format for repair; If table was > bitmap coverage one page block was read twice which caused a lot of duplicate key errors storage/maria/ma_create.c: Use correct value for how much free space there is on a key page Remember some missing table option when doing re-create. Removed optimization where last packed fields is unpacked; Caused problems for re-create. storage/maria/ma_delete.c: Ensure that info->last_auto_increment is reset properly Fix for update to restore autoincrement value on duplicate key storage/maria/ma_key_recover.c: Moved handling of restoring value of auto-increment in case of duplicate key from clr to undo This ensures the restoring works both for insert and update and also that this is symetrical to how the auto_increment value is stored storage/maria/ma_key_recover.h: Added new prototype storage/maria/ma_loghandler.c: Added hook to write_hook_for_undo_key_delete() storage/maria/ma_open.c: Fixed wrong calculation of max_key_file_length storage/maria/ma_page.c: Use maria_block_size instead of MARIA_MIN_KEY_BLOCK_LENGTH Increase internal buffer (safety fix) storage/maria/ma_search.c: Use maria_block_size instead of MARIA_MIN_KEY_BLOCK_LENGTH Note that this is an incompatible change, so one must do maria_chk -r on ones old Maria tables (sorry) storage/maria/ma_update.c: Ensure that info->last_auto_increment is reset properly storage/maria/ma_write.c: Ensure that info->last_auto_increment is reset properly Fix for update to restore autoincrement value on duplicate key storage/maria/maria_chk.c: Allow small page_buffer_size Fixed printing for --describe to better fit into 80 characters storage/maria/maria_def.h: Added comments --- mysys/my_pread.c | 5 +++-- mysys/my_read.c | 3 ++- 2 files changed, 5 insertions(+), 3 deletions(-) (limited to 'mysys') diff --git a/mysys/my_pread.c b/mysys/my_pread.c index e0218cd1f1f..d3e308ad70f 100644 --- a/mysys/my_pread.c +++ b/mysys/my_pread.c @@ -66,11 +66,12 @@ size_t my_pread(File Filedes, uchar *Buffer, size_t Count, my_off_t offset, if ((error= ((readbytes= pread(Filedes, Buffer, Count, offset)) != Count))) { my_errno= errno; - if (errno == 0 || (errno == -1 && (MyFlags & (MY_NABP | MY_FNABP)))) + if (errno == 0 || (readbytes == (size_t) -1 && + (MyFlags & (MY_NABP | MY_FNABP)))) my_errno= HA_ERR_FILE_TOO_SHORT; } #endif - if (error || readbytes != Count) + if (error) { DBUG_PRINT("warning",("Read only %d bytes off %u from %d, errno: %d", (int) readbytes, (uint) Count,Filedes,my_errno)); diff --git a/mysys/my_read.c b/mysys/my_read.c index 63f1d4fdebd..375e0acc144 100644 --- a/mysys/my_read.c +++ b/mysys/my_read.c @@ -47,7 +47,8 @@ size_t my_read(File Filedes, uchar *Buffer, size_t Count, myf MyFlags) if ((readbytes= read(Filedes, Buffer, Count)) != Count) { my_errno= errno; - if (errno == 0 || (errno == -1 && (MyFlags & (MY_NABP | MY_FNABP)))) + if (errno == 0 || (readbytes == (size_t) -1 && + (MyFlags & (MY_NABP | MY_FNABP)))) my_errno= HA_ERR_FILE_TOO_SHORT; DBUG_PRINT("warning",("Read only %d bytes off %lu from %d, errno: %d", (int) readbytes, (ulong) Count, Filedes, -- cgit v1.2.1 From 7323df7885e9c50b4e6e7f4cc7251a845cad9630 Mon Sep 17 00:00:00 2001 From: unknown Date: Thu, 31 Jan 2008 23:17:50 +0100 Subject: Minor changes. New description in SHOW ENGINES for Maria. Test for BUG#34106 "auto_increment is reset to 1 when table is recovered from crash" (fixed by Monty yesterday) mysql-test/r/maria-recovery.result: result, which is correct (before pulling Monty's fix for BUG#34106, we got a warning about auto_increment in CHECK TABLE (done in maria-verify-recovery.inc), no AUTO_INCREMENT clause in SHOW CREATE TABLE, and a failure of the last INSERT. mysql-test/r/maria.result: result mysql-test/t/maria-recovery.test: Test for BUG#34106 mysql-test/t/maria.test: look at what is reported in SHOW ENGINES mysys/my_pread.c: changed my mind: if Count argument is >4GB, we'll surely see a segfault in the pread() call when it tries to read 4GB from memory, so no need to print it in ulonglong format (saves a function call). mysys/my_read.c: changed my mind: if Count argument is >4GB, we'll surely see a segfault in the pread() call when it tries to read 4GB from memory, so no need to print it in ulonglong format (saves a function call). mysys/my_write.c: changed my mind: if Count argument is >4GB, we'll surely see a segfault in the pread() call when it tries to read 4GB from memory, so no need to print it in ulonglong format (saves a function call). storage/maria/ha_maria.cc: Description representing the current reality. This can be changed later storage/maria/ma_page.c: When reading the new key_del from a page on disk, if there is a bug (like BUG#34062) this key_del could be wrong, we try to catch if it's out of the key file. storage/maria/ma_pagecache.c: - no truncation of page's number in DBUG_PRINT (useful for BUG#34062) - page_korr instead of uint5korr storage/maria/ma_recovery.c: page_korr instead of uint5korr storage/maria/plug.in: Description representing the current reality. This can be changed later. --- mysys/my_pread.c | 16 ++++++++-------- mysys/my_read.c | 8 ++------ mysys/my_write.c | 8 ++------ 3 files changed, 12 insertions(+), 20 deletions(-) (limited to 'mysys') diff --git a/mysys/my_pread.c b/mysys/my_pread.c index 3dff034c15b..7ad7a8faaf8 100644 --- a/mysys/my_pread.c +++ b/mysys/my_pread.c @@ -49,11 +49,11 @@ size_t my_pread(File Filedes, uchar *Buffer, size_t Count, my_off_t offset, size_t readbytes; int error= 0; #ifndef DBUG_OFF - char llbuf1[22], llbuf2[22]; + char llbuf[22]; DBUG_ENTER("my_pread"); - DBUG_PRINT("my",("fd: %d Seek: %s Buffer: 0x%lx Count: %s MyFlags: %d", - Filedes, ullstr(offset, llbuf1), - (long) Buffer, ullstr(Count, llbuf2), MyFlags)); + DBUG_PRINT("my",("fd: %d Seek: %s Buffer: 0x%lx Count: %lu MyFlags: %d", + Filedes, ullstr(offset, llbuf), (long) Buffer, + (ulong)Count, MyFlags)); #endif for (;;) { @@ -133,11 +133,11 @@ size_t my_pwrite(int Filedes, const uchar *Buffer, size_t Count, size_t writenbytes, written; uint errors; #ifndef DBUG_OFF - char llbuf1[22], llbuf2[22]; + char llbuf[22]; DBUG_ENTER("my_pwrite"); - DBUG_PRINT("my",("fd: %d Seek: %s Buffer: 0x%lx Count: %s MyFlags: %d", - Filedes, ullstr(offset, llbuf1), - (long) Buffer, ullstr(Count, llbuf2), MyFlags)); + DBUG_PRINT("my",("fd: %d Seek: %s Buffer: 0x%lx Count: %lu MyFlags: %d", + Filedes, ullstr(offset, llbuf), (long) Buffer, + (ulong)Count, MyFlags)); #endif errors= 0; written= 0; diff --git a/mysys/my_read.c b/mysys/my_read.c index 64f2cf42b4c..375e0acc144 100644 --- a/mysys/my_read.c +++ b/mysys/my_read.c @@ -16,7 +16,6 @@ #include "mysys_priv.h" #include "mysys_err.h" #include -#include #include /* @@ -37,12 +36,9 @@ size_t my_read(File Filedes, uchar *Buffer, size_t Count, myf MyFlags) { size_t readbytes, save_count; -#ifndef DBUG_OFF - char llbuf[22]; DBUG_ENTER("my_read"); - DBUG_PRINT("my",("fd: %d Buffer: 0x%lx Count: %s MyFlags: %d", - Filedes, (long) Buffer, ullstr(Count, llbuf), MyFlags)); -#endif + DBUG_PRINT("my",("fd: %d Buffer: 0x%lx Count: %lu MyFlags: %d", + Filedes, (long) Buffer, (ulong) Count, MyFlags)); save_count= Count; for (;;) diff --git a/mysys/my_write.c b/mysys/my_write.c index 515ccb5fd37..7f8b85c241e 100644 --- a/mysys/my_write.c +++ b/mysys/my_write.c @@ -15,7 +15,6 @@ #include "mysys_priv.h" #include "mysys_err.h" -#include #include @@ -25,12 +24,9 @@ size_t my_write(int Filedes, const uchar *Buffer, size_t Count, myf MyFlags) { size_t writenbytes, written; uint errors; -#ifndef DBUG_OFF - char llbuf[22]; DBUG_ENTER("my_write"); - DBUG_PRINT("my",("fd: %d Buffer: 0x%lx Count: %s MyFlags: %d", - Filedes, (long) Buffer, ullstr(Count, llbuf), MyFlags)); -#endif + DBUG_PRINT("my",("fd: %d Buffer: 0x%lx Count: %lu MyFlags: %d", + Filedes, (long) Buffer, (ulong) Count, MyFlags)); errors=0; written=0; /* The behavior of write(fd, buf, 0) is not portable */ -- cgit v1.2.1 From 4a1763e4287724421720e174f938d7303ad07c04 Mon Sep 17 00:00:00 2001 From: unknown Date: Sat, 2 Feb 2008 00:01:31 +0100 Subject: Fix for Windows-specific bugs: - one which led REDO_INSERT_ROW_BLOBS to fail to apply - one excess close ("-1 file left open") Don't need maria-path option / environment variable. Fixes for ma_test_all-t to run under Windows. Port of ma_test_recovery to Perl, written by Jani. storage/maria/unittest/ma_test_recovery.expected: Rename: storage/maria/ma_test_recovery.expected -> storage/maria/unittest/ma_test_recovery.expected mysys/my_pread.c: Fix for Windows-specific bug (maria_read_log -a failed during ma_test_all-t): Windows does not have pread() so the branch setting HA_ERR_FILE_TOO_SHORT was not compiled in, broke applying of REDO_INSERT_ROW_BLOBS. After fixing that, it appeared that in my Windows machine, errno is not changed in case of EOF; as we read it we have to reset it at start. The changed to readbytes!=-1 is to detect EOF mysys/my_read.c: The change to readbytes!=-1 is to detect EOF storage/maria/ma_loghandler.c: Fix for Windows-specific bug: as we don't open the directory we should not close it. storage/maria/ma_page.c: This is C, cannot declare variable after instruction. storage/maria/ma_test_recovery: ma_test_recovery.expected moved storage/maria/unittest/ma_test_all-t: Can now safely guess maria_path so don't need the command-line option or environment variable. Port to Windows (.exe, different locations of executables); can guess suffix, don't need --suffix. storage/maria/unittest/ma_test_recovery.pl: Perl version of ma_test_recovery, written by Jani. Will deprecate the shell version. --- mysys/my_pread.c | 17 ++++++++--------- mysys/my_read.c | 4 ++-- 2 files changed, 10 insertions(+), 11 deletions(-) (limited to 'mysys') diff --git a/mysys/my_pread.c b/mysys/my_pread.c index 7ad7a8faaf8..6b6957a24e5 100644 --- a/mysys/my_pread.c +++ b/mysys/my_pread.c @@ -47,7 +47,7 @@ size_t my_pread(File Filedes, uchar *Buffer, size_t Count, my_off_t offset, myf MyFlags) { size_t readbytes; - int error= 0; + int error= 0, save_errno; #ifndef DBUG_OFF char llbuf[22]; DBUG_ENTER("my_pread"); @@ -57,26 +57,25 @@ size_t my_pread(File Filedes, uchar *Buffer, size_t Count, my_off_t offset, #endif for (;;) { -#ifndef __WIN__ - errno=0; /* Linux doesn't reset this */ -#endif + errno=0; /* Linux, Windows don't reset this on EOF/success */ #ifndef HAVE_PREAD pthread_mutex_lock(&my_file_info[Filedes].mutex); readbytes= (uint) -1; error= (lseek(Filedes, offset, MY_SEEK_SET) == (my_off_t) -1 || (readbytes= read(Filedes, Buffer, Count)) != Count); + save_errno= errno; pthread_mutex_unlock(&my_file_info[Filedes].mutex); + if (error) + { + errno= save_errno; #else if ((error= ((readbytes= pread(Filedes, Buffer, Count, offset)) != Count))) { +#endif my_errno= errno; - if (errno == 0 || (readbytes == (size_t) -1 && + if (errno == 0 || (readbytes != (size_t) -1 && (MyFlags & (MY_NABP | MY_FNABP)))) my_errno= HA_ERR_FILE_TOO_SHORT; - } -#endif - if (error) - { DBUG_PRINT("warning",("Read only %d bytes off %u from %d, errno: %d", (int) readbytes, (uint) Count,Filedes,my_errno)); #ifdef THREAD diff --git a/mysys/my_read.c b/mysys/my_read.c index 375e0acc144..8e098924e43 100644 --- a/mysys/my_read.c +++ b/mysys/my_read.c @@ -43,11 +43,11 @@ size_t my_read(File Filedes, uchar *Buffer, size_t Count, myf MyFlags) for (;;) { - errno= 0; /* Linux doesn't reset this */ + errno= 0; /* Linux, Windows don't reset this on EOF/success */ if ((readbytes= read(Filedes, Buffer, Count)) != Count) { my_errno= errno; - if (errno == 0 || (readbytes == (size_t) -1 && + if (errno == 0 || (readbytes != (size_t) -1 && (MyFlags & (MY_NABP | MY_FNABP)))) my_errno= HA_ERR_FILE_TOO_SHORT; DBUG_PRINT("warning",("Read only %d bytes off %lu from %d, errno: %d", -- cgit v1.2.1 From a1834802c98cb541be97295aafdddca6b8369efe Mon Sep 17 00:00:00 2001 From: unknown Date: Sat, 2 Feb 2008 01:00:27 +0100 Subject: compiler warning --- mysys/my_pread.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'mysys') diff --git a/mysys/my_pread.c b/mysys/my_pread.c index 6b6957a24e5..093a5a7a40c 100644 --- a/mysys/my_pread.c +++ b/mysys/my_pread.c @@ -47,7 +47,10 @@ size_t my_pread(File Filedes, uchar *Buffer, size_t Count, my_off_t offset, myf MyFlags) { size_t readbytes; - int error= 0, save_errno; + int error= 0; +#ifndef HAVE_PREAD + int save_errno; +#endif #ifndef DBUG_OFF char llbuf[22]; DBUG_ENTER("my_pread"); -- cgit v1.2.1 From 7e54b96791b7233896f73d05e79146a1c612bc9b Mon Sep 17 00:00:00 2001 From: unknown Date: Tue, 5 Feb 2008 14:19:53 +0100 Subject: cleanup dbug/dbug.c: dbug naming conventions include/my_dbug.h: unused function --- mysys/checksum.c | 8 -------- 1 file changed, 8 deletions(-) (limited to 'mysys') diff --git a/mysys/checksum.c b/mysys/checksum.c index 0cc9801c2b1..1d264b54321 100644 --- a/mysys/checksum.c +++ b/mysys/checksum.c @@ -32,17 +32,9 @@ ha_checksum my_crc_dbug_check= 1; /* Unlikely number */ ha_checksum my_checksum(ha_checksum crc, const uchar *pos, size_t length) { -#ifdef NOT_USED - const uchar *end=pos+length; - for ( ; pos != end ; pos++) - crc=((crc << 8) + *((uchar*) pos)) + (crc >> (8*sizeof(ha_checksum)-8)); -#else crc= (ha_checksum) crc32((uint)crc, pos, length); -#endif /* NOT_USED */ DBUG_PRINT("info", ("crc: %lu", (ulong) crc)); -#ifndef DBUG_OFF if (crc == my_crc_dbug_check) my_debug_put_break_here(); -#endif return crc; } -- cgit v1.2.1 From 7300af848a5a1c6e75e909657c172af8c8169831 Mon Sep 17 00:00:00 2001 From: unknown Date: Wed, 6 Feb 2008 18:02:05 +0100 Subject: Fixes for running maria-recovery*.test and maria-purge.test under Windows. include/my_dbug.h: a DBUG expression to force a flush of the trace file then an abort of the process mysql-test/include/wait_until_connected_again.inc: mysqladmin waits for pid file to be gone only under Unix; so maria_empty_logs.inc cannot wait for mysqld to be gone, so wait_until_connected_again.inc may send its "show status" to a not-yet-dead server hence the 1053 error ("server shutdown in progress") mysys/my_thr_init.c: overload abort() under Windows, to not have an annoying CRT popup ("ignore/abort/retry" buttons) each time a test intentionally crashes mysqld sql/handler.cc: use new expression sql/log.cc: use new expression sql/mysql_priv.h: use new expression storage/maria/ha_maria.cc: use new expression storage/maria/ma_blockrec.c: use new expression storage/maria/ma_check.c: use new expression storage/maria/ma_checkpoint.c: use new expression storage/maria/ma_control_file.c: Can't yet lock control file under Windows (test suite problems, plus concerns about stray lock preventing a fast restart after crash). storage/maria/ma_loghandler.c: A file which should be closed, otherwise translog_purge() (the caller) cannot delete logs. --- mysys/my_thr_init.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'mysys') diff --git a/mysys/my_thr_init.c b/mysys/my_thr_init.c index aadb86d39ed..f4c7f607a45 100644 --- a/mysys/my_thr_init.c +++ b/mysys/my_thr_init.c @@ -444,3 +444,18 @@ static uint get_thread_lib(void) } #endif /* THREAD */ + + +#ifdef __WIN__ +/* + With Windows debug builds abort() causes a popup from CRT; as abort() + is used in tests it is annoying so we use a custom one. +*/ +void abort(void) +{ +#ifdef REENABLE_AFTER_FIX_FOR_BUG_31745 /* don't want a popup */ + raise(SIGABRT); +#endif + _exit(3); +} +#endif -- cgit v1.2.1 From 0954594b2d42d8e3acd411bc08a59cdd88ecb178 Mon Sep 17 00:00:00 2001 From: unknown Date: Thu, 7 Feb 2008 16:54:13 +0100 Subject: Fixes for DBUG_ABORT() BitKeeper/deleted/.del-.tree-is-private: Delete: .tree-is-private include/my_dbug.h: To disable the popup of abort() we use _CrtReportMode/File() (thanks Wlad) mysys/my_thr_init.c: Visual Studio 2005 does not allow overloading library functions. --- mysys/my_thr_init.c | 15 --------------- 1 file changed, 15 deletions(-) (limited to 'mysys') diff --git a/mysys/my_thr_init.c b/mysys/my_thr_init.c index f4c7f607a45..aadb86d39ed 100644 --- a/mysys/my_thr_init.c +++ b/mysys/my_thr_init.c @@ -444,18 +444,3 @@ static uint get_thread_lib(void) } #endif /* THREAD */ - - -#ifdef __WIN__ -/* - With Windows debug builds abort() causes a popup from CRT; as abort() - is used in tests it is annoying so we use a custom one. -*/ -void abort(void) -{ -#ifdef REENABLE_AFTER_FIX_FOR_BUG_31745 /* don't want a popup */ - raise(SIGABRT); -#endif - _exit(3); -} -#endif -- cgit v1.2.1 From bee40ef1d0756c2dc05c6ce75774b972a78410e9 Mon Sep 17 00:00:00 2001 From: unknown Date: Wed, 13 Feb 2008 18:25:56 +0100 Subject: build/test failures on different platforms include/atomic/rwlock.h: define MY_ATOMIC_MODE_RWLOCKS if this is the way we have to go mysys/lf_alloc-pin.c: no semicolon mysys/lf_hash.c: no semicolon storage/maria/lockman.c: no semicolon storage/maria/ma_loghandler.c: no semicolon unittest/mysys/my_atomic-t.c: powerpc is no better (condition could be a bit too broad, but hey, it's just a unit test) --- mysys/lf_alloc-pin.c | 2 +- mysys/lf_hash.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'mysys') diff --git a/mysys/lf_alloc-pin.c b/mysys/lf_alloc-pin.c index ff9c5a42f81..054f33482f9 100644 --- a/mysys/lf_alloc-pin.c +++ b/mysys/lf_alloc-pin.c @@ -403,7 +403,7 @@ found: /* lock-free memory allocator for fixed-size objects */ -LF_REQUIRE_PINS(1); +LF_REQUIRE_PINS(1) /* callback for _lf_pinbox_real_free to free a list of unpinned objects - diff --git a/mysys/lf_hash.c b/mysys/lf_hash.c index 322f04cdc41..c197cc99711 100644 --- a/mysys/lf_hash.c +++ b/mysys/lf_hash.c @@ -28,7 +28,7 @@ #include #include -LF_REQUIRE_PINS(3); +LF_REQUIRE_PINS(3) /* An element of the list */ typedef struct { -- cgit v1.2.1 From 8170b22b5ca69386fb2fcab479e989d084ca8774 Mon Sep 17 00:00:00 2001 From: unknown Date: Wed, 13 Feb 2008 21:27:12 +0200 Subject: Fixed compiler warnings in a lot of files Added IMPOSSIBLE_RESULT to avoid compiler warnings when using (Item_result) -1 as a dummy value Changed PAGE_SIZE to TEST_PAGE_SIZE to avoid compiler errors on systems where PAGE_SIZE is defined client/get_password.c: Fixed compiler warning cmd-line-utils/readline/bind.c: Fixed compiler warning cmd-line-utils/readline/chardefs.h: Fixed compiler warning by adding marco to be used when largest_char is 255 cmd-line-utils/readline/display.c: Fixed compiler warnings by removing not accessed variables cmd-line-utils/readline/histexpand.c: Fixed compiler warnings by removing not accessed variables cmd-line-utils/readline/history.c: Fixed compiler warnings by adding cast cmd-line-utils/readline/text.c: Fixed compiler warnings by removing not accessed variables and adding casts dbug/dbug.c: Fixed compiler warnings by changing types include/mysql_com.h: Added IMPOSSIBLE_RESULT to avoid compiler warnings when using (Item_result) -1 as a dummy value libmysql/libmysql.c: Fixed compiler warning mysql-test/t/query_cache_debug.test: Mark test as BIG as it uses a lot of memory mysys/mf_iocache2.c: Fixed compiler warnings by adding cast sql/event_data_objects.cc: Fixed compiler warnings by removing not used code sql/events.cc: Fixed compiler warnings by removing not used code sql/field.cc: Fixed compiler warnings by adding cast and removed not accessed variables sql/ha_partition.cc: Fixed compiler warnings by removing not used code sql/item.cc: Fixed compiler warnings by removing not accessed variables Use IMPOSSIBLE_RESULT instead of (Item_result)-1 sql/item_cmpfunc.cc: Fixed compiler warnings by removing not accessed variables sql/item_func.cc: Fixed compiler warnings by removing not used code and not accessed variables Added IMPOSSIBLE_RESULT sql/item_subselect.cc: Fixed compiler warnings by removing not accessed variables sql/item_xmlfunc.cc: Fixed forgotten setting of xpath->error sql/log.cc: Fixed compiler warnings by removing not accessed variables sql/log_event.cc: Added IMPOSSIBLE_RESULT into switch Fixed wrong usage of DBUG_ASSERT(1) Removed always true DBUG_ASSERT() sql/mysqld.cc: Fixed compiler warnings by adding casts for ULONG_MAX sql/opt_sum.cc: Fixed compiler warnings by removing not used code Removed wrong DBUG_ASSERT() sql/partition_info.cc: Fixed compiler warnings by removing not accessed variables sql/rpl_injector.h: Removed always true part from DBUG_ASSERT() to remove compiler warning sql/spatial.cc: Fixed compiler warnings by removing not accessed variables sql/sql_acl.cc: Fixed compiler warnings by removing not accessed variables sql/sql_base.cc: Fixed compiler warnings by removing not accessed variables sql/sql_cache.cc: Fixed compiler warnings by removing not accessed variables sql/sql_class.cc: Fixed compiler warnings by: - Removing always true part from DBUG_ASSERT() - Removing not used code - Added IMPOSSIBLE_RESULT into switch sql/sql_load.cc: Fixed compiler warnings by removing not accessed variables sql/sql_parse.cc: Fixed compiler warnings by: - Removing not accessed variables - Removing always true part from DBUG_ASSERT() - Removing not used code sql/sql_plugin.cc: Added comment sql/sql_prepare.cc: Fixed compiler warnings by removing not accessed variables sql/sql_show.cc: Fixed compiler warnings by using correct cast sql/sql_table.cc: Fixed compiler warnings by removing not used code and removing not accessed variables sql/table.cc: Fixed compiler warnings by removing not accessed variables sql/time.cc: Fixed wrong DBUG_ASSERT(1) storage/maria/unittest/Makefile.am: Changed PAGE_SIZE to TEST_PAGE_SIZE to avoid compiler errors on systems where PAGE_SIZE is defined storage/maria/unittest/ma_pagecache_consist.c: Changed PAGE_SIZE to TEST_PAGE_SIZE to avoid compiler errors on systems where PAGE_SIZE is defined storage/maria/unittest/ma_pagecache_single.c: Changed PAGE_SIZE to TEST_PAGE_SIZE to avoid compiler errors on systems where PAGE_SIZE is defined tests/mysql_client_test.c: Fixed compiler warnings by removing not accessed variables and changing types --- mysys/mf_iocache2.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'mysys') diff --git a/mysys/mf_iocache2.c b/mysys/mf_iocache2.c index c54c7d13548..728501e6c50 100644 --- a/mysys/mf_iocache2.c +++ b/mysys/mf_iocache2.c @@ -420,9 +420,9 @@ process_flags: /* minimum width padding */ if (minimum_width > length2) { - char *buffz; + uchar *buffz; - buffz= my_alloca(minimum_width - length2); + buffz= (uchar*) my_alloca(minimum_width - length2); if (is_zero_padded) memset(buffz, '0', minimum_width - length2); else -- cgit v1.2.1 From 8665ae2c1fa97866978ebec938479c796c1de6d7 Mon Sep 17 00:00:00 2001 From: unknown Date: Tue, 19 Feb 2008 11:41:12 +0100 Subject: Fix for build failures. Putting back "#define bool BOOL" under Windows until Windows team confers. client/get_password.c: fix for build failure (HPUX etc): no bool in C dbug/dbug.c: typo include/config-win.h: putting back the infamous #define, because without it we have 650 distinct compiler warnings "forcing value to bool 'true' or 'false'" (C4800), Windows team will confer on what to do. include/thr_alarm.h: fix for build failure on Windows libmysql/dll.c: fix for build failure on Windows mysys/thr_alarm.c: fix for build failure on HPUX --- mysys/thr_alarm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'mysys') diff --git a/mysys/thr_alarm.c b/mysys/thr_alarm.c index afa5aadece7..b710a7eee39 100644 --- a/mysys/thr_alarm.c +++ b/mysys/thr_alarm.c @@ -632,7 +632,7 @@ my_bool thr_alarm(thr_alarm_t *alrm, uint sec, ALARM *alarm) } -bool thr_got_alarm(thr_alarm_t *alrm_ptr) +my_bool thr_got_alarm(thr_alarm_t *alrm_ptr) { thr_alarm_t alrm= *alrm_ptr; MSG msg; -- cgit v1.2.1 From df843c4ce26e2c5d152098302b209252c712382e Mon Sep 17 00:00:00 2001 From: unknown Date: Thu, 21 Feb 2008 02:45:02 +0200 Subject: Fixed problems with ma_test2 and mi_test2 on high-byte-first system Fixed bug in ma_test2 when last row in table is deleted Fixed that ma_test_recovery.pl works on Solaris (by using digest instead of md5sum) Fixed some compiler warnings generated by the Forte compiler dbug/dbug.c: Added cast to get rid of compiler warning mysys/lf_alloc-pin.c: Added cast to get rid of compiler warning mysys/my_bitmap.c: Removed impossible DBUG_ASSERT()'s to get rid of compiler warnings mysys/my_compress.c: Removed wrong cast to get rid of compiler warning storage/maria/lockman.c: Added cast to get rid of compiler warning storage/maria/ma_open.c: Added fix from MyISAM to allocate space in key buffer for nod pointer storage/maria/ma_recovery.c: Fixed initialization that caused compiler warning storage/maria/ma_rsame.c: More DBUG_PRINT storage/maria/ma_scan.c: Better comment storage/maria/ma_statrec.c: More DBUG_PRINT and comments Fixed indentation BitKeeper/etc/ignore: added storage/maria/unittest/tmp/* storage/maria/ma_test2.c: Fixed bug that caused maria_rsame() to fail if test removed last row Fixed wrong usage of longget(); Should be uint4korr() storage/maria/unittest/ma_test_recovery.pl: Use md5sum or digest to calculate md5. This allows this script to be run on Linux and Solaris storage/myisam/mi_test2.c: Fixed wrong usage of longget(); Should be uint4korr() strings/ctype.c: Added casts to get rid of compiler warnings sql-bench/myisam.cnf: New BitKeeper file ``sql-bench/myisam.cnf'' --- mysys/lf_alloc-pin.c | 8 +++++--- mysys/my_bitmap.c | 4 ---- mysys/my_compress.c | 2 +- 3 files changed, 6 insertions(+), 8 deletions(-) (limited to 'mysys') diff --git a/mysys/lf_alloc-pin.c b/mysys/lf_alloc-pin.c index ff9c5a42f81..3e91b70d0f8 100644 --- a/mysys/lf_alloc-pin.c +++ b/mysys/lf_alloc-pin.c @@ -169,7 +169,7 @@ LF_PINS *_lf_pinbox_get_pins(LF_PINBOX *pinbox, void *stack_end) if (!(pins= top_ver % LF_PINBOX_MAX_PINS)) { /* the stack of free elements is empty */ - pins= my_atomic_add32(&pinbox->pins_in_array, 1)+1; + pins= my_atomic_add32((int32 volatile*) &pinbox->pins_in_array, 1)+1; if (unlikely(pins >= LF_PINBOX_MAX_PINS)) return 0; /* @@ -183,7 +183,8 @@ LF_PINS *_lf_pinbox_get_pins(LF_PINBOX *pinbox, void *stack_end) } el= (LF_PINS *)_lf_dynarray_value(&pinbox->pinarray, pins); next= el->link; - } while (!my_atomic_cas32(&pinbox->pinstack_top_ver, &top_ver, + } while (!my_atomic_cas32((int32 volatile*) &pinbox->pinstack_top_ver, + (int32*) &top_ver, top_ver-pins+next+LF_PINBOX_MAX_PINS)); /* set el->link to the index of el in the dynarray (el->link has two usages: @@ -237,7 +238,8 @@ void _lf_pinbox_put_pins(LF_PINS *pins) do { pins->link= top_ver % LF_PINBOX_MAX_PINS; - } while (!my_atomic_cas32(&pinbox->pinstack_top_ver, &top_ver, + } while (!my_atomic_cas32((int32 volatile*) &pinbox->pinstack_top_ver, + (int32*) &top_ver, top_ver-pins->link+nr+LF_PINBOX_MAX_PINS)); return; } diff --git a/mysys/my_bitmap.c b/mysys/my_bitmap.c index e127b2584ae..137127a2fda 100644 --- a/mysys/my_bitmap.c +++ b/mysys/my_bitmap.c @@ -508,10 +508,8 @@ uint bitmap_get_first_set(const MY_BITMAP *map) if (*byte_ptr & (1 << k)) return (i*32) + (j*8) + k; } - DBUG_ASSERT(0); } } - DBUG_ASSERT(0); } } return MY_BIT_NONE; @@ -542,10 +540,8 @@ uint bitmap_get_first(const MY_BITMAP *map) if (!(*byte_ptr & (1 << k))) return (i*32) + (j*8) + k; } - DBUG_ASSERT(0); } } - DBUG_ASSERT(0); } } return MY_BIT_NONE; diff --git a/mysys/my_compress.c b/mysys/my_compress.c index 70d2960f48e..45c4ab983cc 100644 --- a/mysys/my_compress.c +++ b/mysys/my_compress.c @@ -183,7 +183,7 @@ int packfrm(uchar *data, size_t len, DBUG_PRINT("info", ("org_len: %lu comp_len: %lu", (ulong) org_len, (ulong) comp_len)); - DBUG_DUMP("compressed", (char*)data, org_len); + DBUG_DUMP("compressed", data, org_len); error= 2; blob_len= BLOB_HEADER + org_len; -- cgit v1.2.1 From 190de95f6fcf37572be7cf2ff0543d74d190a989 Mon Sep 17 00:00:00 2001 From: unknown Date: Fri, 22 Feb 2008 22:32:34 +0200 Subject: Locking for read and write made waiting for each other (with loose scheme for the same thread locking). include/my_pthread.h: Added variable for lock diffirentiation. include/wqueue.h: New release call. mysys/wqueue.c: New release call in case of read/write lock. storage/maria/ma_pagecache.c: Locking for read and write made waitimg for each other. storage/maria/unittest/Makefile.am: New test added. storage/maria/unittest/ma_pagecache_consist.c: Fixed thread initialization in the test. storage/maria/unittest/ma_pagecache_rwconsist.c: New BitKeeper file ``storage/maria/unittest/ma_pagecache_rwconsist.c'' --- mysys/wqueue.c | 43 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 43 insertions(+) (limited to 'mysys') diff --git a/mysys/wqueue.c b/mysys/wqueue.c index bfe9cba1235..0766e13a4e4 100644 --- a/mysys/wqueue.c +++ b/mysys/wqueue.c @@ -136,6 +136,49 @@ void wqueue_release_queue(WQUEUE *wqueue) } +/** + @brief Removes all threads waiting for read or first one waiting for write. + + @param wqueue pointer to the queue structure + @apram thread pointer to the thread to be added to the queue +*/ + +void wqueue_release_one_locktype_from_queue(WQUEUE *wqueue) +{ + struct st_my_thread_var *last= wqueue->last_thread; + struct st_my_thread_var *next= last->next; + struct st_my_thread_var **prev= &wqueue->last_thread; + struct st_my_thread_var *thread; + uint first_type= next->lock_type; + if (first_type == MY_PTHREAD_LOCK_WRITE) + { + /* release first waiting for write lock */ + thread= next; + pthread_cond_signal(&thread->suspend); + wqueue->last_thread= next; + thread->next= NULL; + return; + } + do + { + thread= next; + next= thread->next; + if (thread->lock_type == MY_PTHREAD_LOCK_WRITE) + { + /* skip waiting for write lock */ + *prev= thread; + prev= &thread->next; + } + else + { + /* release waiting for read lock */ + pthread_cond_signal(&thread->suspend); + thread->next= NULL; + } + } while (thread != last); + *prev= NULL; +} + /* Add thread and wait -- cgit v1.2.1 From a998e0dc0cd792bdb9c3fc1c1475b58d773fa7c8 Mon Sep 17 00:00:00 2001 From: unknown Date: Mon, 25 Feb 2008 23:32:16 +0200 Subject: Removing from circular list fixed. mysys/wqueue.c: fixed removing from circular list. --- mysys/wqueue.c | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) (limited to 'mysys') diff --git a/mysys/wqueue.c b/mysys/wqueue.c index 0766e13a4e4..df944edeed6 100644 --- a/mysys/wqueue.c +++ b/mysys/wqueue.c @@ -147,15 +147,18 @@ void wqueue_release_one_locktype_from_queue(WQUEUE *wqueue) { struct st_my_thread_var *last= wqueue->last_thread; struct st_my_thread_var *next= last->next; - struct st_my_thread_var **prev= &wqueue->last_thread; + struct st_my_thread_var **prev= &last->next; struct st_my_thread_var *thread; + struct st_my_thread_var *new_last= NULL; uint first_type= next->lock_type; if (first_type == MY_PTHREAD_LOCK_WRITE) { /* release first waiting for write lock */ thread= next; pthread_cond_signal(&thread->suspend); - wqueue->last_thread= next; + if (thread == last) + wqueue->last_thread= NULL; + *prev= thread->next; thread->next= NULL; return; } @@ -168,17 +171,27 @@ void wqueue_release_one_locktype_from_queue(WQUEUE *wqueue) /* skip waiting for write lock */ *prev= thread; prev= &thread->next; + new_last= NULL; } else { /* release waiting for read lock */ pthread_cond_signal(&thread->suspend); + new_last= thread->next; thread->next= NULL; } } while (thread != last); - *prev= NULL; + if (new_last) + { + /* last was deleted */ + if (new_last == last) + wqueue->last_thread= NULL; /* empty list */ + else + wqueue->last_thread= new_last; + } } + /* Add thread and wait -- cgit v1.2.1 From f094eff1d9d0c3cc101c6506b710a054228c8691 Mon Sep 17 00:00:00 2001 From: unknown Date: Thu, 3 Apr 2008 15:40:25 +0200 Subject: Injecting more "const" declarations into code which does not change pointed data. I ran gcc -Wcast-qual on storage/maria, this identified un-needed casts, a couple of functions which said they had a const parameter though they changed the pointed content! This is fixed here. Some suspicious places receive a comment. The original intention of running -Wcast-qual was to find what code changes R-tree keys: I added const words, but hidden casts like those of int2store (casts target to (uint16*)) removed const checking; -Wcast-qual helped find those hidden casts. Log handler does not change the content pointed by LEX_STRING::str it receives, so we now use a struct which has a const inside, to emphasize this and be able to pass "const uchar*" buffers to log handler without fear of their content being changed by it. One-line fix for a merge glitch (when merging from MyISAM). include/m_string.h: As Maria's log handler uses LEX_STRING but never changes the content pointed by LEX_STRING::str, and assigns uchar* into this member most of the time, we introduce a new struct LEX_CUSTRING (C const U unsigned) for the log handler. include/my_global.h: In macros which read pointed content: use const pointers so that gcc -Wcast-qual does not warn about casting a const pointer to non-const. include/my_handler.h: In macros which read pointed content: use const pointers so that gcc -Wcast-qual does not warn about casting a const pointer to non-const. ha_find_null() does not change *a. include/my_sys.h: insert_dynamic() does not change *element. include/myisampack.h: In macros which read pointed content: use const pointers so that gcc -Wcast-qual does not warn about casting a const pointer to non-const. mysys/array.c: insert_dynamic() does not change *element mysys/my_handler.c: ha_find_null() does not change *a storage/maria/ma_bitmap.c: Log handler receives const strings now storage/maria/ma_blockrec.c: Log handler receives const strings now. _ma_apply_undo_row_delete/update() do change *header. storage/maria/ma_blockrec.h: correct prototype storage/maria/ma_check.c: Log handler receives const strings now. Un-needed casts storage/maria/ma_checkpoint.c: Log handler receives const strings now storage/maria/ma_checksum.c: unneeded cast storage/maria/ma_commit.c: Log handler receives const strings now storage/maria/ma_create.c: Log handler receives const strings now storage/maria/ma_dbug.c: fixing warning of gcc -Wcast-qual storage/maria/ma_delete.c: Log handler receives const strings now storage/maria/ma_delete_all.c: Log handler receives const strings now storage/maria/ma_delete_table.c: Log handler receives const strings now storage/maria/ma_dynrec.c: fixing some warnings of gcc -Wcast-qual. Unneeded casts removed. Comment about function which lies. storage/maria/ma_ft_parser.c: fix for warnings of gcc -Wcast-qual, removing unneeded casts storage/maria/ma_ft_update.c: less casts, comment storage/maria/ma_key.c: less casts, stay const (warnings of gcc -Wcast-qual) storage/maria/ma_key_recover.c: Log handler receives const strings now storage/maria/ma_loghandler.c: Log handler receives const strings now storage/maria/ma_loghandler.h: Log handler receives const strings now storage/maria/ma_loghandler_lsn.h: In macros which read pointed content: use const pointers so that gcc -Wcast-qual does not warn about casting a const pointer to non-const. storage/maria/ma_page.c: Log handler receives const strings now; more const storage/maria/ma_recovery.c: Log handler receives const strings now storage/maria/ma_rename.c: Log handler receives const strings now storage/maria/ma_rt_index.c: more const, to emphasize that functions don't change pointed content. best_key= NULL was forgotten during merge from MyISAM a few days ago, was causing a Valgrind warning storage/maria/ma_rt_index.h: new proto storage/maria/ma_rt_key.c: more const storage/maria/ma_rt_key.h: new proto storage/maria/ma_rt_mbr.c: more const for functions which deserve it storage/maria/ma_rt_mbr.h: new prototype storage/maria/ma_rt_split.c: make const what is not changed. storage/maria/ma_search.c: un-needed casts, more const storage/maria/ma_sp_key.c: more const storage/maria/ma_unique.c: un-needed casts. storage/maria/ma_write.c: Log handler receives const strings now storage/maria/maria_def.h: some more const storage/maria/unittest/ma_test_loghandler-t.c: Log handler receives const strings now storage/maria/unittest/ma_test_loghandler_first_lsn-t.c: Log handler receives const strings now storage/maria/unittest/ma_test_loghandler_max_lsn-t.c: Log handler receives const strings now storage/maria/unittest/ma_test_loghandler_multigroup-t.c: Log handler receives const strings now storage/maria/unittest/ma_test_loghandler_multithread-t.c: Log handler receives const strings now storage/maria/unittest/ma_test_loghandler_noflush-t.c: Log handler receives const strings now storage/maria/unittest/ma_test_loghandler_nologs-t.c: Log handler receives const strings now storage/maria/unittest/ma_test_loghandler_pagecache-t.c: Log handler receives const strings now storage/maria/unittest/ma_test_loghandler_purge-t.c: Log handler receives const strings now --- mysys/array.c | 2 +- mysys/my_handler.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'mysys') diff --git a/mysys/array.c b/mysys/array.c index 9ff35791dde..8ecc6a6cfd0 100644 --- a/mysys/array.c +++ b/mysys/array.c @@ -92,7 +92,7 @@ my_bool init_dynamic_array(DYNAMIC_ARRAY *array, uint element_size, FALSE Ok */ -my_bool insert_dynamic(DYNAMIC_ARRAY *array, uchar* element) +my_bool insert_dynamic(DYNAMIC_ARRAY *array, const uchar* element) { uchar* buffer; if (array->elements == array->max_element) diff --git a/mysys/my_handler.c b/mysys/my_handler.c index 312227891c5..dd35fae251f 100644 --- a/mysys/my_handler.c +++ b/mysys/my_handler.c @@ -498,7 +498,7 @@ end: NULLs. */ -HA_KEYSEG *ha_find_null(HA_KEYSEG *keyseg, uchar *a) +HA_KEYSEG *ha_find_null(HA_KEYSEG *keyseg, const uchar *a) { for (; (enum ha_base_keytype) keyseg->type != HA_KEYTYPE_END; keyseg++) { -- cgit v1.2.1 From 42f970de3c4234cc5dfd0b78c8c499c538079e9d Mon Sep 17 00:00:00 2001 From: unknown Date: Thu, 3 Apr 2008 17:32:35 +0200 Subject: fix for compiler warning mysys/my_handler.c: compiler warning --- mysys/my_handler.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'mysys') diff --git a/mysys/my_handler.c b/mysys/my_handler.c index dd35fae251f..3bac59f409c 100644 --- a/mysys/my_handler.c +++ b/mysys/my_handler.c @@ -502,7 +502,7 @@ HA_KEYSEG *ha_find_null(HA_KEYSEG *keyseg, const uchar *a) { for (; (enum ha_base_keytype) keyseg->type != HA_KEYTYPE_END; keyseg++) { - uchar *end; + const uchar *end; if (keyseg->null_bit) { if (!*a++) -- cgit v1.2.1 From 126c1228f5385411fbff586cbc1a48a6c61abfe9 Mon Sep 17 00:00:00 2001 From: unknown Date: Thu, 10 Apr 2008 05:26:36 +0300 Subject: Added versioning of row data Will in future changeset (soon) av versioning of status variables (number of rows) and index Changed some LEX_STRING to LEX_CUSTRING to avoid casts and warnings Removed some not needed variables (as noticed by Guilhem) include/maria.h: Added prototypes for maria_chk_init_for_check(), maria_versioning() and maria_ignore_trids() include/my_base.h: Add new error HA_ERR_ROW_NOT_VISIBLE include/myisamchk.h: Added variables for checking visibility of rows during maria_chk include/thr_lock.h: Changed argument type from int to my_bool for get_status Added variable allow_multiple_concurrent_insert, to signal if table supports multiple concurrent inserts mysql-test/r/maria-page-checksum.result: Added missing drop table mysql-test/t/maria-page-checksum.test: Added missing drop table mysys/my_handler.c: Added new error messages mysys/thr_lock.c: Added support for multiple concurrent inserts, if table handler supports it sql/sql_yacc.yy: Added LOCK TABLE table_name WRITE CONCURRENT This was added (temporarly?) to be able to check versioning with Maria storage/csv/ha_tina.cc: Updated parameter for get_status storage/maria/ha_maria.cc: Added calls to maria_chk_init_status() Fixed call to ma_control_file_open() storage/maria/ma_blockrec.c: Changed some LEX_STRING to LEX_CUSTRING to avoid casts and warnings Changed back some 'header' parameters to const char* Removed some casts Added support for versioning: - If info->row_flag & ROW_FLAG_TRANSID is set, store transaction id together with the row - When reading rows, check if rows are visible. Give error if not - When scanning table, ignore not visible rows - Added function parameters to some functions, to be able to call _ma_compact_block_page() with different parameters depending of if the page is a HEAD or TAIL page - _ma_compact_block_page() deletes transaction id's that are visible by all running transactions - Added functions for thr_lock() to enable multiple concurrent inserts - Added helper function 'mysql_versioning()' to enable/disable versioning - Added helper function maria_ignore_trids(), used by maria_chk and maria_pack to see all rows. storage/maria/ma_blockrec.h: Updated parameters for some functions. Added new functions to read/store state with thr_lock storage/maria/ma_check.c: Enable handling of transaction id's in rows Give a readable error if a table contains a transation id that makes rows not visible storage/maria/ma_control_file.c: Added option to not give warning if control file doesn't exists. storage/maria/ma_control_file.h: Updated parameter lists for ma_control_file_open() storage/maria/ma_delete.c: Removed not used variable (suggestion by Guilhem) storage/maria/ma_locking.c: Changed type of argument from int -> my_bool storage/maria/ma_open.c: Removed not used variables 'key_write_undo_lsn' and 'key_delete_undo_lsn' Added new thr_lock interface functions for BLOCK_RECORD to enable multiple concurrent insert storage/maria/ma_test1.c: Added option --versioning (-C) to check versioning storage/maria/ma_test2.c: Added option -C to check versioning storage/maria/ma_test_recovery: Forward argumetns to ma_test_recovery.pl storage/maria/ma_write.c: Removed not used variable key_write_undo_lsn storage/maria/maria_chk.c: Always read control file (if exist) at start Initialize checking of tables by calling maria_chk_init_for_check() In verbose mode and in case of error, print max found transaction id storage/maria/maria_def.h: Added Trid to MARIA_ROW to be able to check transaction id for found row Moved 'base_length' from MARIA_ROW to MARIA_HA to be able to handle different base length (with and without TRANSID) without if's Added default row_flag to MARIA_HA for the same reason Changed LEX_STRING -> LEX_CUSTRING to avoid casts in ma_blockrec.c Removed not needed variables key_write_undo_lsn and key_delete_undo_lsn Added prototypes for new functions and fixed those that had changed storage/maria/maria_pack.c: Ensure we can read all rows from the file, independent of the used transaction id storage/maria/maria_read_log.c: Updated arguments to ma_control_file_open() storage/maria/trnman.c: If we have only one transaction, fixed that min_read_from contains current transaction Fixed that trnman_can_read_from() returns that row is readable if it was written by current transaction storage/maria/unittest/ma_control_file-t.c: Updated arguments to ma_control_file_open() storage/maria/unittest/ma_test_all-t: Added test of versioning Removed printing of one extra space storage/maria/unittest/ma_test_loghandler-t.c: Updated arguments to ma_control_file_open() storage/maria/unittest/ma_test_loghandler_first_lsn-t.c: Updated arguments to ma_control_file_open() storage/maria/unittest/ma_test_loghandler_max_lsn-t.c: Updated arguments to ma_control_file_open() storage/maria/unittest/ma_test_loghandler_multigroup-t.c: Updated arguments to ma_control_file_open() storage/maria/unittest/ma_test_loghandler_multithread-t.c: Updated arguments to ma_control_file_open() storage/maria/unittest/ma_test_loghandler_noflush-t.c: Updated arguments to ma_control_file_open() storage/maria/unittest/ma_test_loghandler_nologs-t.c: Updated arguments to ma_control_file_open() storage/maria/unittest/ma_test_loghandler_pagecache-t.c: Updated arguments to ma_control_file_open() storage/maria/unittest/ma_test_loghandler_purge-t.c: Updated arguments to ma_control_file_open() storage/maria/unittest/ma_test_recovery.expected: Updated file with result from new tests storage/maria/unittest/ma_test_recovery.pl: Added options --abort-on-error and --verbose In case of --verbose, print all excuted shell commands Added test of versioning storage/myisam/mi_locking.c: Updated type of parameter storage/myisam/myisamdef.h: Updated type of parameter mysql-test/r/maria-mvcc.result: New BitKeeper file ``mysql-test/r/maria-mvcc.result'' mysql-test/t/maria-mvcc.test: New BitKeeper file ``mysql-test/t/maria-mvcc.test'' --- mysys/my_handler.c | 2 ++ mysys/thr_lock.c | 102 ++++++++++++++++++++++++++++++++++++----------------- 2 files changed, 72 insertions(+), 32 deletions(-) (limited to 'mysys') diff --git a/mysys/my_handler.c b/mysys/my_handler.c index 3bac59f409c..c5575809bb0 100644 --- a/mysys/my_handler.c +++ b/mysys/my_handler.c @@ -629,6 +629,8 @@ static const char *handler_error_messages[]= "Got a fatal error during initialzaction of handler", "File to short; Expected more data in file", "Read page with wrong checksum" + "Could not apply row event", + "Row is not visible by the current transaction", }; diff --git a/mysys/thr_lock.c b/mysys/thr_lock.c index 6decd6a6a27..484fd1dd559 100644 --- a/mysys/thr_lock.c +++ b/mysys/thr_lock.c @@ -69,9 +69,11 @@ get_status: for concurrent reads. The lock algorithm allows one to have one TL_WRITE_ALLOW_READ, -TL_WRITE_CONCURRENT_INSERT or one TL_WRITE_DELAYED lock at the same time as -multiple read locks. +TL_WRITE_CONCURRENT_INSERT or one TL_WRITE_DELAYED lock at the same +time as multiple read locks. +In addition, if lock->allow_multiple_concurrent_insert is set then there can +be any number of TL_WRITE_CONCURRENT_INSERT locks aktive at the same time. */ #if !defined(MAIN) && !defined(DBUG_OFF) && !defined(EXTRA_DEBUG) @@ -152,7 +154,8 @@ static int check_lock(struct st_lock_list *list, const char* lock_type, } if (same_owner && !thr_lock_owner_equal(data->owner, first_owner) && - last_lock_type != TL_WRITE_ALLOW_WRITE) + last_lock_type != TL_WRITE_ALLOW_WRITE && + last_lock_type != TL_WRITE_CONCURRENT_INSERT) { fprintf(stderr, "Warning: Found locks from different threads in %s: %s\n", @@ -205,7 +208,7 @@ static void check_locks(THR_LOCK *lock, const char *where, THR_LOCK_DATA *data; for (data=lock->read.data ; data ; data=data->next) { - if ((int) data->type == (int) TL_READ_NO_INSERT) + if (data->type == TL_READ_NO_INSERT) count++; /* Protect against infinite loop. */ DBUG_ASSERT(count <= lock->read_no_write_count); @@ -254,7 +257,22 @@ static void check_locks(THR_LOCK *lock, const char *where, } } else - { /* Have write lock */ + { + /* We have at least one write lock */ + if (lock->write.data->type == TL_WRITE_CONCURRENT_INSERT) + { + THR_LOCK_DATA *data; + for (data=lock->write.data->next ; data ; data=data->next) + { + if (data->type != TL_WRITE_CONCURRENT_INSERT) + { + fprintf(stderr, + "Warning at '%s': Found TL_WRITE_CONCURRENT_INSERT lock mixed with other write locks\n", + where); + break; + } + } + } if (lock->write_wait.data) { if (!allow_no_locks && @@ -514,7 +532,8 @@ thr_lock(THR_LOCK_DATA *data, THR_LOCK_OWNER *owner, /* Request for READ lock */ if (lock->write.data) { - /* We can allow a read lock even if there is already a write lock + /* + We can allow a read lock even if there is already a write lock on the table in one the following cases: - This thread alread have a write lock on the table - The write lock is TL_WRITE_ALLOW_READ or TL_WRITE_DELAYED @@ -558,11 +577,11 @@ thr_lock(THR_LOCK_DATA *data, THR_LOCK_OWNER *owner, (*lock->read.last)=data; /* Add to running FIFO */ data->prev=lock->read.last; lock->read.last= &data->next; - if (lock->get_status) - (*lock->get_status)(data->status_param, 0); if (lock_type == TL_READ_NO_INSERT) lock->read_no_write_count++; check_locks(lock,"read lock with no write locks",0); + if (lock->get_status) + (*lock->get_status)(data->status_param, 0); statistic_increment(locks_immediate,&THR_LOCK_lock); goto end; } @@ -626,16 +645,18 @@ thr_lock(THR_LOCK_DATA *data, THR_LOCK_OWNER *owner, The following test will not work if the old lock was a TL_WRITE_ALLOW_WRITE, TL_WRITE_ALLOW_READ or TL_WRITE_DELAYED in the same thread, but this will never happen within MySQL. + + The idea is to allow us to get a lock at once if we already have + a write lock or if there is no pending write locks and if all + write locks are of the same type and are either + TL_WRITE_ALLOW_WRITE or TL_WRITE_CONCURRENT_INSERT */ if (thr_lock_owner_equal(data->owner, lock->write.data->owner) || - (lock_type == TL_WRITE_ALLOW_WRITE && - !lock->write_wait.data && - lock->write.data->type == TL_WRITE_ALLOW_WRITE)) + (!lock->write_wait.data && lock_type == lock->write.data->type && + (lock_type == TL_WRITE_ALLOW_WRITE || + (lock_type == TL_WRITE_CONCURRENT_INSERT && + lock->allow_multiple_concurrent_insert)))) { - /* - We have already got a write lock or all locks are - TL_WRITE_ALLOW_WRITE - */ DBUG_PRINT("info", ("write_wait.data: 0x%lx old_type: %d", (ulong) lock->write_wait.data, lock->write.data->type)); @@ -644,8 +665,9 @@ thr_lock(THR_LOCK_DATA *data, THR_LOCK_OWNER *owner, data->prev=lock->write.last; lock->write.last= &data->next; check_locks(lock,"second write lock",0); - if (data->lock->get_status) - (*data->lock->get_status)(data->status_param, 0); + if (lock->get_status) + (*lock->get_status)(data->status_param, + lock_type == TL_WRITE_CONCURRENT_INSERT); statistic_increment(locks_immediate,&THR_LOCK_lock); goto end; } @@ -678,8 +700,8 @@ thr_lock(THR_LOCK_DATA *data, THR_LOCK_OWNER *owner, (*lock->write.last)=data; /* Add as current write lock */ data->prev=lock->write.last; lock->write.last= &data->next; - if (data->lock->get_status) - (*data->lock->get_status)(data->status_param, concurrent_insert); + if (lock->get_status) + (*lock->get_status)(data->status_param, concurrent_insert); check_locks(lock,"only write lock",0); statistic_increment(locks_immediate,&THR_LOCK_lock); goto end; @@ -809,7 +831,6 @@ static void wake_up_waiters(THR_LOCK *lock) { THR_LOCK_DATA *data; enum thr_lock_type lock_type; - DBUG_ENTER("wake_up_waiters"); if (!lock->write.data) /* If no active write locks */ @@ -1372,8 +1393,8 @@ my_bool thr_upgrade_write_delay_lock(THR_LOCK_DATA *data) { if (!lock->read.data) /* No read locks */ { /* We have the lock */ - if (data->lock->get_status) - (*data->lock->get_status)(data->status_param, 0); + if (lock->get_status) + (*lock->get_status)(data->status_param, 0); pthread_mutex_unlock(&lock->mutex); DBUG_RETURN(0); } @@ -1511,7 +1532,7 @@ struct st_test { enum thr_lock_type lock_type; }; -THR_LOCK locks[5]; /* 4 locks */ +THR_LOCK locks[6]; /* Number of locks +1 */ struct st_test test_0[] = {{0,TL_READ}}; /* One lock */ struct st_test test_1[] = {{0,TL_READ},{0,TL_WRITE}}; /* Read and write lock of lock 0 */ @@ -1531,9 +1552,20 @@ struct st_test test_14[] = {{0,TL_WRITE_CONCURRENT_INSERT},{1,TL_READ}}; struct st_test test_15[] = {{0,TL_WRITE_ALLOW_WRITE},{1,TL_READ}}; struct st_test test_16[] = {{0,TL_WRITE_ALLOW_WRITE},{1,TL_WRITE_ALLOW_WRITE}}; -struct st_test *tests[] = {test_0,test_1,test_2,test_3,test_4,test_5,test_6, - test_7,test_8,test_9,test_10,test_11,test_12, - test_13,test_14,test_15,test_16}; +struct st_test test_17[] = {{5,TL_WRITE_CONCURRENT_INSERT}}; +struct st_test test_18[] = {{5,TL_WRITE_CONCURRENT_INSERT}}; +struct st_test test_19[] = {{5,TL_READ}}; +struct st_test test_20[] = {{5,TL_READ_NO_INSERT}}; +struct st_test test_21[] = {{5,TL_WRITE}}; + + +struct st_test *tests[]= +{ + test_0, test_1, test_2, test_3, test_4, test_5, test_6, test_7, test_8, + test_9, test_10, test_11, test_12, test_13, test_14, test_15, test_16, + test_17, test_18, test_19, test_20, test_21 +}; + int lock_counts[]= {sizeof(test_0)/sizeof(struct st_test), sizeof(test_1)/sizeof(struct st_test), sizeof(test_2)/sizeof(struct st_test), @@ -1550,7 +1582,12 @@ int lock_counts[]= {sizeof(test_0)/sizeof(struct st_test), sizeof(test_13)/sizeof(struct st_test), sizeof(test_14)/sizeof(struct st_test), sizeof(test_15)/sizeof(struct st_test), - sizeof(test_16)/sizeof(struct st_test) + sizeof(test_16)/sizeof(struct st_test), + sizeof(test_17)/sizeof(struct st_test), + sizeof(test_18)/sizeof(struct st_test), + sizeof(test_19)/sizeof(struct st_test), + sizeof(test_20)/sizeof(struct st_test), + sizeof(test_21)/sizeof(struct st_test) }; @@ -1594,7 +1631,6 @@ static void *test_thread(void *arg) printf("Thread %s (%d) started\n",my_thread_name(),param); fflush(stdout); - thr_lock_info_init(&lock_info); thr_lock_owner_init(&owner, &lock_info); for (i=0; i < lock_counts[param] ; i++) @@ -1640,7 +1676,8 @@ int main(int argc __attribute__((unused)),char **argv __attribute__((unused))) { pthread_t tid; pthread_attr_t thr_attr; - int i,*param,error; + int *param,error; + uint i; MY_INIT(argv[0]); if (argc > 1 && argv[1][0] == '-' && argv[1][1] == '#') DBUG_PUSH(argv[1]+2); @@ -1660,13 +1697,14 @@ int main(int argc __attribute__((unused)),char **argv __attribute__((unused))) exit(1); } - for (i=0 ; i < (int) array_elements(locks) ; i++) + for (i=0 ; i < array_elements(locks) ; i++) { thr_lock_init(locks+i); locks[i].check_status= test_check_status; locks[i].update_status=test_update_status; locks[i].copy_status= test_copy_status; locks[i].get_status= test_get_status; + locks[i].allow_multiple_concurrent_insert= 1; } if ((error=pthread_attr_init(&thr_attr))) { @@ -1692,7 +1730,7 @@ int main(int argc __attribute__((unused)),char **argv __attribute__((unused))) #ifdef HAVE_THR_SETCONCURRENCY VOID(thr_setconcurrency(2)); #endif - for (i=0 ; i < (int) array_elements(lock_counts) ; i++) + for (i=0 ; i < array_elements(lock_counts) ; i++) { param=(int*) malloc(sizeof(int)); *param=i; @@ -1724,7 +1762,7 @@ int main(int argc __attribute__((unused)),char **argv __attribute__((unused))) } if ((error=pthread_mutex_unlock(&LOCK_thread_count))) fprintf(stderr,"Got error: %d from pthread_mutex_unlock\n",error); - for (i=0 ; i < (int) array_elements(locks) ; i++) + for (i=0 ; i < array_elements(locks) ; i++) thr_lock_delete(locks+i); #ifdef EXTRA_DEBUG if (found_errors) -- cgit v1.2.1 From 047ce0dcdc83e818ed3ef441b1c5d8b625374b2d Mon Sep 17 00:00:00 2001 From: unknown Date: Mon, 21 Apr 2008 17:14:58 +0300 Subject: Problems of partially freed waiting quque fixed (BUG#35040) mysys/wqueue.c: Problems of partially freed waiting quque fixed. storage/maria/unittest/ma_pagecache_rwconsist.c: Explicitly assigned initial value for increasing readability. Dbug file flush after each line for better debugging. Fixed code style. --- mysys/wqueue.c | 43 +++++++++++++++++++++++-------------------- 1 file changed, 23 insertions(+), 20 deletions(-) (limited to 'mysys') diff --git a/mysys/wqueue.c b/mysys/wqueue.c index df944edeed6..5a90ea302f8 100644 --- a/mysys/wqueue.c +++ b/mysys/wqueue.c @@ -140,55 +140,58 @@ void wqueue_release_queue(WQUEUE *wqueue) @brief Removes all threads waiting for read or first one waiting for write. @param wqueue pointer to the queue structure - @apram thread pointer to the thread to be added to the queue + @param thread pointer to the thread to be added to the queue + + @note This function is applicable only to single linked lists. */ void wqueue_release_one_locktype_from_queue(WQUEUE *wqueue) { struct st_my_thread_var *last= wqueue->last_thread; struct st_my_thread_var *next= last->next; - struct st_my_thread_var **prev= &last->next; struct st_my_thread_var *thread; - struct st_my_thread_var *new_last= NULL; + struct st_my_thread_var *new_list= NULL; uint first_type= next->lock_type; if (first_type == MY_PTHREAD_LOCK_WRITE) { /* release first waiting for write lock */ - thread= next; - pthread_cond_signal(&thread->suspend); - if (thread == last) + pthread_cond_signal(&next->suspend); +#ifndef DBUG_OFF + next->prev= NULL; /* force segfault if used */ +#endif + if (next == last) wqueue->last_thread= NULL; - *prev= thread->next; - thread->next= NULL; + else + last->next= next->next; + next->next= NULL; return; } do { thread= next; next= thread->next; +#ifndef DBUG_OFF + thread->prev= NULL; /* force segfault if used */ +#endif if (thread->lock_type == MY_PTHREAD_LOCK_WRITE) { /* skip waiting for write lock */ - *prev= thread; - prev= &thread->next; - new_last= NULL; + if (new_list) + { + thread->next= new_list->next; + new_list= new_list->next= thread; + } + else + new_list= thread->next= thread; } else { /* release waiting for read lock */ pthread_cond_signal(&thread->suspend); - new_last= thread->next; thread->next= NULL; } } while (thread != last); - if (new_last) - { - /* last was deleted */ - if (new_last == last) - wqueue->last_thread= NULL; /* empty list */ - else - wqueue->last_thread= new_last; - } + wqueue->last_thread= new_list; } -- cgit v1.2.1 From 86b7194c1892eb8bf53c2d808db38b5a6d47b0d8 Mon Sep 17 00:00:00 2001 From: unknown Date: Mon, 21 Apr 2008 17:43:38 +0300 Subject: Debug code fixed. --- mysys/wqueue.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) (limited to 'mysys') diff --git a/mysys/wqueue.c b/mysys/wqueue.c index 5a90ea302f8..fcc0a39725d 100644 --- a/mysys/wqueue.c +++ b/mysys/wqueue.c @@ -67,6 +67,9 @@ void wqueue_add_to_queue(WQUEUE *wqueue, struct st_my_thread_var *thread) thread->next= last->next; last->next= thread; } +#ifndef DBUG_OFF + thread->prev= NULL; /* force segfault if used */ +#endif wqueue->last_thread= thread; } @@ -156,9 +159,6 @@ void wqueue_release_one_locktype_from_queue(WQUEUE *wqueue) { /* release first waiting for write lock */ pthread_cond_signal(&next->suspend); -#ifndef DBUG_OFF - next->prev= NULL; /* force segfault if used */ -#endif if (next == last) wqueue->last_thread= NULL; else @@ -170,9 +170,6 @@ void wqueue_release_one_locktype_from_queue(WQUEUE *wqueue) { thread= next; next= thread->next; -#ifndef DBUG_OFF - thread->prev= NULL; /* force segfault if used */ -#endif if (thread->lock_type == MY_PTHREAD_LOCK_WRITE) { /* skip waiting for write lock */ -- cgit v1.2.1 From 663f971b8d6fd121825c41bf1fffcab72aaa885f Mon Sep 17 00:00:00 2001 From: unknown Date: Tue, 29 Apr 2008 09:26:37 +0300 Subject: After merge fixes. BitKeeper/deleted/.del-my_bit.h: Delete: include/my_bit.h --- mysys/my_handler_errors.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'mysys') diff --git a/mysys/my_handler_errors.h b/mysys/my_handler_errors.h index e360af8c57e..483f1090e6a 100644 --- a/mysys/my_handler_errors.h +++ b/mysys/my_handler_errors.h @@ -32,7 +32,7 @@ static const char *handler_error_messages[]= "Table is crashed and last repair failed", "Table was marked as crashed and should be repaired", "Lock timed out; Retry transaction", - "Lock table is full; Restart program with a larger locktable", + "Lock table is full; Restart program with a larger lock table", "Updates are not allowed under a read only transactions", "Lock deadlock; Retry transaction", "Foreign key constraint is incorrectly formed", @@ -46,7 +46,7 @@ static const char *handler_error_messages[]= "Unexpected null pointer found when using spatial index", "The table changed in storage engine", "There's no partition in table for the given value", - "Row-based binlogging of row failed", + "Row-based binary logging of row failed", "Index needed in foreign key constraint", "Upholding foreign key constraints would lead to a duplicate key error in " "some other table", @@ -59,9 +59,9 @@ static const char *handler_error_messages[]= "It is not possible to log this statement", "The event was corrupt, leading to illegal data being read", "The table is of a new format not supported by this version", - "The event could not be processed no other hanlder error happened", - "Got a fatal error during initialzaction of handler", - "File to short; Expected more data in file", + "The event could not be processed no other handler error happened", + "Got a fatal error during initialization of handler", + "File too short; Expected more data in file", "Read page with wrong checksum" }; -- cgit v1.2.1 From 5099033c26826fd2625b6424134999853e33a29d Mon Sep 17 00:00:00 2001 From: unknown Date: Thu, 29 May 2008 18:33:33 +0300 Subject: WL#3138: Maria - fast "SELECT COUNT(*) FROM t;" and "CHECKSUM TABLE t" Added argument to maria_end_bulk_insert() to know if the table will be deleted after the operation Fixed wrong call to strmake Don't call bulk insert in case of inserting only one row (speed optimization as starting/stopping bulk insert Allow storing year 2155 in year field When running with purify/valgrind avoid copying structures over themself Added hook 'trnnam_end_trans_hook' that is called when transaction ends Added trn->used_tables that is used to an entry for all tables used by transaction Fixed that ndb doesn't crash on duplicate key error when start_bulk_insert/end_bulk_insert are not called include/maria.h: Added argument to maria_end_bulk_insert() to know if the table will be deleted after the operation include/my_tree.h: Added macro 'reset_free_element()' to be able to ignore calls to the external free function. Is used to optimize end-bulk-insert in case of failures, in which case we don't want write the remaining keys in the tree mysql-test/install_test_db.sh: Upgrade to new mysql_install_db options mysql-test/r/maria-mvcc.result: New tests mysql-test/r/maria.result: New tests mysql-test/suite/ndb/r/ndb_auto_increment.result: Fixed error message now when bulk insert is not always called mysql-test/suite/ndb/t/ndb_auto_increment.test: Fixed error message now when bulk insert is not always called mysql-test/t/maria-mvcc.test: Added testing of versioning of count(*) mysql-test/t/maria-page-checksum.test: Added comment mysql-test/t/maria.test: More tests mysys/hash.c: Code style change sql/field.cc: Allow storing year 2155 in year field sql/ha_ndbcluster.cc: Added new argument to end_bulk_insert() to signal if the bulk insert should ignored sql/ha_ndbcluster.h: Added new argument to end_bulk_insert() to signal if the bulk insert should ignored sql/ha_partition.cc: Added new argument to end_bulk_insert() to signal if the bulk insert should ignored sql/ha_partition.h: Added new argument to end_bulk_insert() to signal if the bulk insert should ignored sql/handler.cc: Don't call get_dup_key() if there is no table object. This can happen if the handler generates a duplicate key error on commit sql/handler.h: Added new argument to end_bulk_insert() to signal if the bulk insert should ignored (ie, the table will be deleted) sql/item.cc: Style fix Removed compiler warning sql/log_event.cc: Added new argument to ha_end_bulk_insert() sql/log_event_old.cc: Added new argument to ha_end_bulk_insert() sql/mysqld.cc: Removed compiler warning sql/protocol.cc: Added DBUG sql/sql_class.cc: Added DBUG Fixed wrong call to strmake sql/sql_insert.cc: Don't call bulk insert in case of inserting only one row (speed optimization as starting/stopping bulk insert involves a lot of if's) Added new argument to ha_end_bulk_insert() sql/sql_load.cc: Added new argument to ha_end_bulk_insert() sql/sql_parse.cc: Style fixes Avoid goto in common senario sql/sql_select.cc: When running with purify/valgrind avoid copying structures over themself. This is not a real bug in itself, but it's a waste of cycles and causes valgrind warnings sql/sql_select.h: Avoid copying structures over themself. This is not a real bug in itself, but it's a waste of cycles and causes valgrind warnings sql/sql_table.cc: Call HA_EXTRA_PREPARE_FOR_DROP if table created by ALTER TABLE is going to be dropped Added new argument to ha_end_bulk_insert() storage/archive/ha_archive.cc: Added new argument to end_bulk_insert() storage/archive/ha_archive.h: Added new argument to end_bulk_insert() storage/federated/ha_federated.cc: Added new argument to end_bulk_insert() storage/federated/ha_federated.h: Added new argument to end_bulk_insert() storage/maria/Makefile.am: Added ma_state.c and ma_state.h storage/maria/ha_maria.cc: Versioning of count(*) and checksum - share->state.state is now assumed to be correct, not handler->state - Call _ma_setup_live_state() in external lock to get count(*)/checksum versioning. In case of not versioned and not concurrent insertable table, file->s->state.state contains the correct state information Other things: - file->s -> share - Added DBUG_ASSERT() for unlikely case - Optimized end_bulk_insert() to not write anything if table is going to be deleted (as in failed alter table) - Indentation changes in external_lock becasue of removed 'goto' caused a big conflict even if very little was changed storage/maria/ha_maria.h: New argument to end_bulk_insert() storage/maria/ma_blockrec.c: Update for versioning of count(*) and checksum Keep share->state.state.data_file_length up to date (not info->state->data_file_length) Moved _ma_block_xxxx_status() and maria_versioning() functions to ma_state.c storage/maria/ma_check.c: Update and use share->state.state instead of info->state info->s to share Update info->state at end of repair Call _ma_reset_state() to update share->state_history at end of repair storage/maria/ma_checkpoint.c: Call _ma_remove_not_visible_states() on checkpoint to clean up not visible state history from tables storage/maria/ma_close.c: Remember state history for running transaction even if table is closed storage/maria/ma_commit.c: Ensure we always call trnman_commit_trn() even if other calls fails. If we don't do that, the translog and state structures will not be freed storage/maria/ma_delete.c: Versioning of count(*) and checksum: - Always update info->state->checksum and info->state->records storage/maria/ma_delete_all.c: Versioning of count(*) and checksum: - Ensure that share->state.state is updated, as here is where we store the primary information storage/maria/ma_dynrec.c: Use lock_key_trees instead of concurrent_insert to check if trees should be locked. This allows us to lock trees both for concurrent_insert and for index versioning. storage/maria/ma_extra.c: Versioning of count(*) and checksum: - Use share->state.state instead of info->state - share->concurrent_insert -> share->non_transactional_concurrent_insert - Don't update share->state.state from info->state if transactional table Optimization: - Don't flush io_cache or bitmap if we are using FLUSH_IGNORE_CHANGED storage/maria/ma_info.c: Get most state information from current state storage/maria/ma_init.c: Add hash table and free function to store states for closed tables Install hook for transaction commit/rollback to update history state storage/maria/ma_key_recover.c: Versioning of count(*) and checksum: - Use share->state.state instead of info->state storage/maria/ma_locking.c: Versioning of count(*) and checksum: - Call virtual functions (if exists) to restore/update status - Move _ma_xxx_status() functions to ma_state.c info->s -> share storage/maria/ma_open.c: Versioning of count(*) and checksum: - For not transactional tables, set info->state to point to new allocated state structure. - Initialize new info->state_start variable that points to state at start of transaction - Copy old history states from hash table (maria_stored_states) first time the table is opened - Split flag share->concurrent_insert to non_transactional_concurrent_insert & lock_key_tree - For now, only enable versioning of tables without keys (to be fixed in soon!) - Added new virtual function to restore status in maria_lock_database) More DBUG storage/maria/ma_page.c: Versioning of count(*) and checksum: - Use share->state.state instead of info->state - Modify share->state.state.key_file_length under share->intern_lock storage/maria/ma_range.c: Versioning of count(*) and checksum: - Lock trees based on share->lock_key_trees info->s -> share storage/maria/ma_recovery.c: Versioning of count(*) and checksum: - Use share->state.state instead of info->state - Update state information on close and when reenabling logging storage/maria/ma_rkey.c: Versioning of count(*) and checksum: - Lock trees based on share->lock_key_trees storage/maria/ma_rnext.c: Versioning of count(*) and checksum: - Lock trees based on share->lock_key_trees storage/maria/ma_rnext_same.c: Versioning of count(*) and checksum: - Lock trees based on share->lock_key_trees - Only skip rows based on file length if non_transactional_concurrent_insert is set storage/maria/ma_rprev.c: Versioning of count(*) and checksum: - Lock trees based on share->lock_key_trees storage/maria/ma_rsame.c: Versioning of count(*) and checksum: - Lock trees based on share->lock_key_trees storage/maria/ma_sort.c: Use share->state.state instead of info->state Fixed indentation storage/maria/ma_static.c: Added maria_stored_state storage/maria/ma_update.c: Versioning of count(*) and checksum: - Always update info->state->checksum and info->state->records - Remove optimization for index file update as it doesn't work for transactional tables storage/maria/ma_write.c: Versioning of count(*) and checksum: - Always update info->state->checksum and info->state->records storage/maria/maria_def.h: Move MARIA_STATUS_INFO to ma_state.h Changes to MARIA_SHARE: - Added state_history to store count(*)/checksum states - Added in_trans as counter if table is used by running transactions - Split concurrent_insert into lock_key_trees and on_transactional_concurrent_insert. - Added virtual function lock_restore_status Changes to MARIA_HA: - save_state -> state_save - Added state_start to store state at start of transaction storage/maria/maria_pack.c: Versioning of count(*) and checksum: - Use share->state.state instead of info->state Indentation fixes storage/maria/trnman.c: Added hook 'trnnam_end_trans_hook' that is called when transaction ends Added trn->used_tables that is used to an entry for all tables used by transaction More DBUG Changed return type of trnman_end_trn() to my_bool Added trnman_get_min_trid() to get minimum trid in use. Added trnman_exists_active_transactions() to check if there exist a running transaction started between two commit id storage/maria/trnman.h: Added 'used_tables' Moved all pointers into same groups to get better memory alignment storage/maria/trnman_public.h: Added prototypes for new functions and variables Chagned return type of trnman_end_trn() to my_bool storage/myisam/ha_myisam.cc: Added argument to end_bulk_insert() if operation should be aborted storage/myisam/ha_myisam.h: Added argument to end_bulk_insert() if operation should be aborted storage/maria/ma_state.c: Functions to handle state of count(*) and checksum storage/maria/ma_state.h: Structures and declarations to handle state of count(*) and checksum --- mysys/hash.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'mysys') diff --git a/mysys/hash.c b/mysys/hash.c index 4532b06b533..80870c4f46b 100644 --- a/mysys/hash.c +++ b/mysys/hash.c @@ -318,7 +318,7 @@ my_bool my_hash_insert(HASH *info,const uchar *record) LINT_INIT(ptr_to_rec); LINT_INIT(ptr_to_rec2); - if (HASH_UNIQUE & info->flags) + if (info->flags & HASH_UNIQUE) { uchar *key= (uchar*) hash_key(info, record, &idx, 1); if (hash_search(info, key, idx)) -- cgit v1.2.1 From 0816d9a70e8e93139ec41e45967e7e44acce74b9 Mon Sep 17 00:00:00 2001 From: unknown Date: Thu, 29 May 2008 23:56:27 +0300 Subject: After merge fixes mysys/my_handler_errors.h: Updated error messages storage/maria/ha_maria.cc: After merge fix storage/maria/ma_rt_key.c: Use share->state.state instead of info->state storage/maria/ma_rt_test.c: After merge fix --- mysys/my_handler_errors.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'mysys') diff --git a/mysys/my_handler_errors.h b/mysys/my_handler_errors.h index 483f1090e6a..4c952466545 100644 --- a/mysys/my_handler_errors.h +++ b/mysys/my_handler_errors.h @@ -55,13 +55,14 @@ static const char *handler_error_messages[]= "Failed to get next auto increment value", "Failed to set row auto increment value", "Unknown (generic) error from engine", - "Record is the same", + "Record was not update. Original values was same as new values", "It is not possible to log this statement", "The event was corrupt, leading to illegal data being read", "The table is of a new format not supported by this version", - "The event could not be processed no other handler error happened", + "The event could not be processed. No other handler error happened", "Got a fatal error during initialization of handler", "File too short; Expected more data in file", - "Read page with wrong checksum" + "Read page with wrong checksum", + "Row is not visible by the current transaction" }; -- cgit v1.2.1 From 52cb0c24a6a6674ee17c4bb86fa02527043ed90f Mon Sep 17 00:00:00 2001 From: Michael Widenius Date: Thu, 26 Jun 2008 08:18:28 +0300 Subject: Added versioning of Maria index Store max_trid in index file as state.create_trid. This is used to pack all transids in the index pages relative to max possible transid for file. Enable versioning for transactional tables with index. Tables with an auto-increment key, rtree or fulltext keys are not versioned. Changed info->lastkey to type MARIA_KEY. Removed info->lastkey_length as this is now part of info->lastkey Renamed old info->lastkey to info->lastkey_buff Use exact key lenghts for keys, not USE_WHOLE_KEY For partial key searches, use SEARCH_PART_KEY When searching to insert new key on page, use SEARCH_INSERT to mark that key has rowid Changes done in a lot of files: - Modified functions to use MARIA_KEY instead of key pointer and key length - Use keyinfo->root_lock instead of share->key_root_lock[keynr] - Simplify code by using local variable keyinfo instead if share->keyinfo[i] - Added #fdef EXTERNAL_LOCKING around removed state elements - HA_MAX_KEY_BUFF -> MARIA_MAX_KEY_BUFF (to reserve space for transid) - Changed type of 'nextflag' to uint32 to ensure all SEARCH_xxx flags fits into it .bzrignore: Added missing temporary directory extra/Makefile.am: comp_err is now deleted on make distclean include/maria.h: Added structure MARIA_KEY, which is used for intern key objects in Maria. Changed functions to take MARIA_KEY as an argument instead of pointer to packed key. Changed some functions that always return true or false to my_bool. Added virtual function make_key() to avoid if in _ma_make_key() Moved rw_lock_t for locking trees from share->key_root_lock to MARIA_KEYDEF. This makes usage of the locks simpler and faster include/my_base.h: Added HA_RTREE_INDEX flag to mark rtree index. Used for easier checks in ma_check() Added SEARCH_INSERT to be used when inserting new keys Added SEARCH_PART_KEY for partial searches Added SEARCH_USER_KEY_HAS_TRANSID to be used when key we use for searching in btree has a TRANSID Added SEARCH_PAGE_KEY_HAS_TRANSID to be used when key we found in btree has a transid include/my_handler.h: Make next_flag 32 bit to make sure we can handle all SEARCH_ bits mysql-test/include/maria_empty_logs.inc: Read and restore current database; Don't assume we are using mysqltest. Don't log use databasename to log. Using this include should not cause any result changes. mysql-test/r/maria-gis-rtree-dynamic.result: Updated results after adding some check table commands to help pinpoint errors mysql-test/r/maria-mvcc.result: New tests mysql-test/r/maria-purge.result: New result after adding removal of logs mysql-test/r/maria-recovery-big.result: maria_empty_logs doesn't log 'use mysqltest' anymore mysql-test/r/maria-recovery-bitmap.result: maria_empty_logs doesn't log 'use mysqltest' anymore mysql-test/r/maria-recovery-rtree-ft.result: maria_empty_logs doesn't log 'use mysqltest' anymore mysql-test/r/maria-recovery.result: maria_empty_logs doesn't log 'use mysqltest' anymore mysql-test/r/maria.result: New tests mysql-test/r/variables-big.result: Don't log id as it's not predictable mysql-test/suite/rpl_ndb/r/rpl_truncate_7ndb_2.result: Updated results to new binlog results. (Test has not been run in a long time as it requires --big) mysql-test/suite/rpl_ndb/t/rpl_truncate_7ndb_2-master.opt: Moved file to ndb replication test directory mysql-test/suite/rpl_ndb/t/rpl_truncate_7ndb_2.test: Fixed wrong path to included tests mysql-test/t/maria-gis-rtree-dynamic.test: Added some check table commands to help pinpoint errors mysql-test/t/maria-mvcc.test: New tests mysql-test/t/maria-purge.test: Remove logs to make test results predictable mysql-test/t/maria.test: New tests for some possible problems mysql-test/t/variables-big.test: Don't log id as it's not predictable mysys/my_handler.c: Updated function comment to reflect old code Changed nextflag to be uint32 to ensure we can have flags > 16 bit Changed checking if we are in insert with NULL keys as next_flag can now include additional bits that have to be ignored. Added SEARCH_INSERT flag to be used when inserting new keys in btree. This flag tells us the that the keys includes row position and it's thus safe to remove SEARCH_FIND Added comparision of transid. This is only done if the keys actually have a transid, which is indicated by nextflag mysys/my_lock.c: Fixed wrong test (Found by Guilhem) scripts/Makefile.am: Ensure that test programs are deleted by make clean sql/rpl_rli.cc: Moved assignment order to fix compiler warning storage/heap/hp_write.c: Add SEARCH_INSERT to signal ha_key_cmp that we we should also compare rowid for keys storage/maria/Makefile.am: Remove also maria log files when doing make distclean storage/maria/ha_maria.cc: Use 'file->start_state' as default state for transactional tables without versioning At table unlock, set file->state to point to live state. (Needed for information schema to pick up right number of rows) In ha_maria::implicit_commit() move all locked (ie open) tables to new transaction. This is needed to ensure ha_maria->info doesn't point to a deleted history event. Disable concurrent inserts for insert ... select and table changes with subqueries if statement based replication as this would cause wrong results on slave storage/maria/ma_blockrec.c: Updated comment storage/maria/ma_check.c: Compact key pages (removes transid) when doing --zerofill Check that 'page_flag' on key pages contains KEYPAGE_FLAG_HAS_TRANSID if there is a single key on the page with a transid Modified functions to use MARIA_KEY instead of key pointer and key length Use new interface to _ma_rec_pos(), _ma_dpointer(), _ma_ft_del(), ma_update_state_lsn() Removed not needed argument from get_record_for_key() Fixed that we check doesn't give errors for RTREE; We now treath these like SPATIAL Remove some SPATIAL specific code where the virtual functions can handle this in a general manner Use info->lastkey_buff instead of info->lastkey _ma_dpos() -> _ma_row_pos_from_key() _ma_make_key() -> keyinfo->make_key() _ma_print_key() -> _ma_print_keydata() _ma_move_key() -> ma_copy_copy() Add SEARCH_INSERT to signal ha_key_cmp that we we should also compare rowid for keys Ensure that data on page doesn't overwrite page checksum position Use DBUG_DUMP_KEY instead of DBUG_DUMP Use exact key lengths instead of USE_WHOLE_KEY to ha_key_cmp() Fixed check if rowid points outside of BLOCK_RECORD data file Use info->lastkey_buff instead of key on stack in some safe places Added #fdef EXTERNAL_LOCKING around removed state elements storage/maria/ma_close.c: Use keyinfo->root_lock instead of share->key_root_lock[keynr] storage/maria/ma_create.c: Removed assert that is already checked in maria_init() Force transactinal tables to be of type BLOCK_RECORD Fixed wrong usage of HA_PACK_RECORD (should be HA_OPTION_PACK_RECORD) Mark keys that uses HA_KEY_ALG_RTREE with HA_RTREE_INDEX for easier handling of these in ma_check Store max_trid in index file as state.create_trid. This is used to pack all transids in the index pages relative to max possible transid for file. storage/maria/ma_dbug.c: Changed _ma_print_key() to use MARIA_KEY storage/maria/ma_delete.c: Modified functions to use MARIA_KEY instead of key pointer and key length info->lastkey2-> info->lastkey_buff2 Added SEARCH_INSERT to signal ha_key_cmp that we we should also compare rowid for keys Use new interface for get_key(), _ma_get_last_key() and others _ma_dpos() -> ma_row_pos_from_key() Simplify setting of prev_key in del() Ensure that KEYPAGE_FLAG_HAS_TRANSID is set in page_flag if key page has transid Treath key pages that may have a transid as if keys would be of variable length storage/maria/ma_delete_all.c: Reset history state if maria_delete_all_rows() are called Update parameters to _ma_update_state_lsns() call storage/maria/ma_extra.c: Store and restore info->lastkey storage/maria/ma_ft_boolean_search.c: Modified functions to use MARIA_KEY instead of key pointer and key length storage/maria/ma_ft_nlq_search.c: Modified functions to use MARIA_KEY instead of key pointer and key length Use lastkey_buff2 instead of info->lastkey+info->s->base.max_key_length (same thing) storage/maria/ma_ft_update.c: Modified functions to use MARIA_KEY instead of key pointer and key length storage/maria/ma_ftdefs.h: Modified functions to use MARIA_KEY instead of key pointer and key length storage/maria/ma_fulltext.h: Modified functions to use MARIA_KEY instead of key pointer and key length storage/maria/ma_init.c: Check if blocksize is legal (Moved test here from ma_open()) storage/maria/ma_key.c: Added functions for storing/reading of transid Modified functions to use MARIA_KEY instead of key pointer and key length Moved _ma_sp_make_key() out of _ma_make_key() as we now use keyinfo->make_key to create keys Add transid to keys if table is versioned Added _ma_copy_key() storage/maria/ma_key_recover.c: Add logging of page_flag (holds information if there are keys with transid on page) Changed DBUG_PRINT("info" -> DBUG_PRINT("redo" as the redo logging can be quite extensive Added lots of DBUG_PRINT() Added support for index page operations: KEY_OP_SET_PAGEFLAG and KEY_OP_COMPACT_PAGE storage/maria/ma_key_recover.h: Modified functions to use MARIA_KEY instead of key pointer and key length storage/maria/ma_locking.c: Added new arguments to _ma_update_state_lsns_sub() storage/maria/ma_loghandler.c: Fixed all logging of LSN to look similar in DBUG log Changed if (left != 0) to if (left) as the later is used also later in the code storage/maria/ma_loghandler.h: Added new index page operations storage/maria/ma_open.c: Removed allocated "state_dummy" and instead use share->state.common for transactional tables that are not versioned This is needed to not get double increments of state.records (one in ma_write.c and on when log is written) Changed info->lastkey to MARIA_KEY type Removed resetting of MARIA_HA variables that have 0 as default value (as info is zerofilled) Enable versioning for transactional tables with index. Tables with an auto-increment key, rtree or fulltext keys are not versioned. Check on open that state.create_trid is correct Extend share->base.max_key_length in case of transactional table so that it can hold transid Removed 4.0 compatible fulltext key mode as this is not relevant for Maria Removed old and wrong #ifdef ENABLE_WHEN_WE_HAVE_TRANS_ROW_ID code block Initialize all new virtual function pointers Removed storing of state->unique, state->process and store state->create_trid instead storage/maria/ma_page.c: Added comment to describe key page structure Added functions to compact key page and log the compact operation storage/maria/ma_range.c: Modified functions to use MARIA_KEY instead of key pointer and key length Use SEARCH_PART_KEY indicator instead of USE_WHOLE_KEY to detect if we are doing a part key search Added handling of pages with transid storage/maria/ma_recovery.c: Don't assert if table we opened are not transactional. This may be a table which has been changed from transactional to not transactinal Added new arguments to _ma_update_state_lsns() storage/maria/ma_rename.c: Added new arguments to _ma_update_state_lsns() storage/maria/ma_rkey.c: Modified functions to use MARIA_KEY instead of key pointer and key length Don't use USE_WHOLE_KEY, use real length of key Use share->row_is_visible() to test if row is visible Moved search_flag == HA_READ_KEY_EXACT out of 'read-next-row' loop as this only need to be tested once Removed test if last_used_keyseg != 0 as this is always true storage/maria/ma_rnext.c: Modified functions to use MARIA_KEY instead of key pointer and key length Simplify code by using local variable keyinfo instead if share->keyinfo[i] Use share->row_is_visible() to test if row is visible storage/maria/ma_rnext_same.c: Modified functions to use MARIA_KEY instead of key pointer and key length lastkey2 -> lastkey_buff2 storage/maria/ma_rprev.c: Modified functions to use MARIA_KEY instead of key pointer and key length Simplify code by using local variable keyinfo instead if share->keyinfo[i] Use share->row_is_visible() to test if row is visible storage/maria/ma_rsame.c: Updated comment Simplify code by using local variable keyinfo instead if share->keyinfo[i] Modified functions to use MARIA_KEY instead of key pointer and key length storage/maria/ma_rsamepos.c: Modified functions to use MARIA_KEY instead of key pointer and key length storage/maria/ma_rt_index.c: Modified functions to use MARIA_KEY instead of key pointer and key length Use better variable names Removed not needed casts _ma_dpos() -> _ma_row_pos_from_key() Use info->last_rtree_keypos to save position to key instead of info->int_keypos Simplify err: condition Changed return type for maria_rtree_insert() to my_bool as we are only intressed in ok/fail from this function storage/maria/ma_rt_index.h: Modified functions to use MARIA_KEY instead of key pointer and key length storage/maria/ma_rt_key.c: Modified functions to use MARIA_KEY instead of key pointer and key length Simplify maria_rtree_add_key by combining idenitcal code and removing added_len storage/maria/ma_rt_key.h: Modified functions to use MARIA_KEY instead of key pointer and key length storage/maria/ma_rt_mbr.c: Changed type of 'nextflag' to uint32 Added 'to' argument to RT_PAGE_MBR_XXX functions to more clearly see which variables changes value storage/maria/ma_rt_mbr.h: Changed type of 'nextflag' to uint32 storage/maria/ma_rt_split.c: Modified functions to use MARIA_KEY instead of key pointer and key length key_length -> key_data_length to catch possible errors storage/maria/ma_rt_test.c: Fixed wrong comment Reset recinfo to avoid valgrind varnings Fixed wrong argument to create_record() that caused test to fail storage/maria/ma_search.c: Modified functions to use MARIA_KEY instead of key pointer and key length Added support of keys with optional trid Test for SEARCH_PART_KEY instead of USE_WHOLE_KEY to detect part key reads _ma_dpos() -> _ma_row_pos_from_key() If there may be keys with transid on the page, have _ma_bin_search() call _ma_seq_search() Add _ma_skip_xxx() functions to quickly step over keys (faster than calling get_key() in most cases as we don't have to copy key data) Combine similar code at end of _ma_get_binary_pack_key() Removed not used function _ma_move_key() In _ma_search_next() don't call _ma_search() if we aren't on a nod page. Update info->cur_row.trid with trid for found key Removed some not needed casts Added _ma_trid_from_key() Use MARIA_SHARE instead of MARIA_HA as arguments to _ma_rec_pos(), _ma_dpointer() and _ma_xxx_keypos_to_recpos() to make functions faster and smaller storage/maria/ma_sort.c: Modified functions to use MARIA_KEY instead of key pointer and key length storage/maria/ma_sp_defs.h: _ma_sp_make_key() now fills in and returns (MARIA_KEY *) value storage/maria/ma_sp_key.c: _ma_sp_make_key() now fills in and returns (MARIA_KEY *) value Don't test sizeof(double), test against 8 as we are using float8store() Use mi_float8store() instead of doing swap of value (same thing but faster) storage/maria/ma_state.c: maria_versioning() now only calls _ma_block_get_status() if table supports versioning Added _ma_row_visible_xxx() functions for different occasions When emptying history, set info->state to point to the first history event. storage/maria/ma_state.h: Added _ma_row_visible_xxx() prototypes storage/maria/ma_static.c: Indentation changes storage/maria/ma_statrec.c: Fixed arguments to _ma_dpointer() and _ma_rec_pos() storage/maria/ma_test1.c: Call init_thr_lock() if we have versioning storage/maria/ma_test2.c: Call init_thr_lock() if we have versioning storage/maria/ma_unique.c: Modified functions to use MARIA_KEY storage/maria/ma_update.c: Modified functions to use MARIA_KEY instead of key pointer and key length storage/maria/ma_write.c: Modified functions to use MARIA_KEY instead of key pointer and key length Simplify code by using local variable keyinfo instead if share->keyinfo[i] In _ma_enlarge_root(), mark in page_flag if new key has transid _ma_dpos() -> _ma_row_pos_from_key() Changed return type of _ma_ck_write_tree() to my_bool as we are only testing if result is true or not Moved 'reversed' to outside block as area was used later storage/maria/maria_chk.c: Added error if trying to sort with HA_BINARY_PACK_KEY Use new interface to get_key() and _ma_dpointer() _ma_dpos() -> _ma_row_pos_from_key() storage/maria/maria_def.h: Modified functions to use MARIA_KEY instead of key pointer and key length Added 'common' to MARIA_SHARE->state for storing state for transactional tables without versioning Added create_trid to MARIA_SHARE Removed not used state variables 'process' and 'unique' Added defines for handling TRID's in index pages Changed to use MARIA_SHARE instead of MARIA_HA for some functions Added 'have_versioning' flag if table supports versioning Moved key_root_lock from MARIA_SHARE to MARIA_KEYDEF Changed last_key to be of type MARIA_KEY. Removed lastkey_length lastkey -> lastkey_buff, lastkey2 -> lastkey_buff2 Added _ma_get_used_and_nod_with_flag() for faster access to page data when page_flag is read Added DBUG_DUMP_KEY for easier DBUG_DUMP of a key Changed 'nextflag' and assocaited variables to uint32 storage/maria/maria_ftdump.c: lastkey -> lastkey_buff storage/maria/trnman.c: Fixed wrong initialization of min_read_from and max_commit_trid Added trnman_get_min_safe_trid() storage/maria/unittest/ma_test_all-t: Added --start-from storage/myisam/mi_check.c: Added SEARCH_INSERT, as ha_key_cmp() needs it when doing key comparision for inserting key on page in rowid order storage/myisam/mi_delete.c: Added SEARCH_INSERT, as ha_key_cmp() needs it when doing key comparision for inserting key on page in rowid order storage/myisam/mi_range.c: Updated comment storage/myisam/mi_write.c: Added SEARCH_INSERT, as ha_key_cmp() needs it when doing key comparision for inserting key on page in rowid order storage/myisam/rt_index.c: Fixed wrong parameter to rtree_get_req() which could cause crash --- mysys/my_handler.c | 99 ++++++++++++++++++++++++++++++++++++++++++++++++------ mysys/my_lock.c | 2 +- 2 files changed, 90 insertions(+), 11 deletions(-) (limited to 'mysys') diff --git a/mysys/my_handler.c b/mysys/my_handler.c index 7a3b8269190..7c13149cb27 100644 --- a/mysys/my_handler.c +++ b/mysys/my_handler.c @@ -84,13 +84,15 @@ static int compare_bin(const uchar *a, uint a_length, ha_key_cmp() keyseg Array of key segments of key to compare a First key to compare, in format from _mi_pack_key() - This is normally key specified by user - b Second key to compare. This is always from a row - key_length Length of key to compare. This can be shorter than - a to just compare sub keys + This is always from the row + b Second key to compare. This is from the row or the user + key_length Length of key to compare, based on key b. This can be shorter + than b to just compare sub keys next_flag How keys should be compared If bit SEARCH_FIND is not set the keys includes the row position and this should also be compared + If SEARCH_PAGE_KEY_HAS_TRANSID is set then 'a' has transid + If SEARCH_USER_KEY_HAS_TRANSID is set then 'b' has transid diff_pos OUT Number of first keypart where values differ, counting from one. diff_pos[1] OUT (b + diff_pos[1]) points to first value in tuple b @@ -120,7 +122,7 @@ static int compare_bin(const uchar *a, uint a_length, #define FCMP(A,B) ((int) (A) - (int) (B)) int ha_key_cmp(register HA_KEYSEG *keyseg, register const uchar *a, - register const uchar *b, uint key_length, uint nextflag, + register const uchar *b, uint key_length, uint32 nextflag, uint *diff_pos) { int flag; @@ -152,8 +154,13 @@ int ha_key_cmp(register HA_KEYSEG *keyseg, register const uchar *a, b++; if (!*a++) /* If key was NULL */ { - if (nextflag == (SEARCH_FIND | SEARCH_UPDATE)) - nextflag=SEARCH_SAME; /* Allow duplicate keys */ + if ((nextflag & (SEARCH_FIND | SEARCH_UPDATE | SEARCH_INSERT | + SEARCH_NULL_ARE_EQUAL)) == + (SEARCH_FIND | SEARCH_UPDATE | SEARCH_INSERT)) + { + /* Allow duplicate keys */ + nextflag= (nextflag & ~(SEARCH_FIND | SEARCH_UPDATE)) | SEARCH_SAME; + } else if (nextflag & SEARCH_NULL_ARE_NOT_EQUAL) { /* @@ -456,18 +463,90 @@ int ha_key_cmp(register HA_KEYSEG *keyseg, register const uchar *a, end: if (!(nextflag & SEARCH_FIND)) { + /* + Compare rowid and possible transid + This happens in the following case: + - INSERT, UPDATE, DELETE when we have not unique keys or + are using versioning + - SEARCH_NEXT, SEARCH_PREVIOUS when we need to restart search + + The logic for comparing transid are as follows: + Keys with have a transid have lowest bit in the rowidt. This means that + if we are comparing a key with a transid with another key that doesn't + have a tranid, we must reset the lowest bit for both keys. + + When we have transid, the keys are compared in transid order. + A key without a transid is regared to be smaller than a key with + a transid. + */ + uint i; + uchar key_mask, tmp_a, tmp_b; + if (nextflag & (SEARCH_NO_FIND | SEARCH_LAST)) /* Find record after key */ return (nextflag & (SEARCH_BIGGER | SEARCH_LAST)) ? -1 : 1; - flag=0; - for (i=keyseg->length ; i-- > 0 ; ) + key_mask= (uchar) 255; + + if (!(nextflag & (SEARCH_USER_KEY_HAS_TRANSID | + SEARCH_PAGE_KEY_HAS_TRANSID))) + { + /* + Neither key has a trid. Only compare row id's and don't + try to store rows in trid order + */ + key_length= keyseg->length; + nextflag&= ~SEARCH_INSERT; + } + else + { + /* + Set key_mask so that we reset the last bit in the rowid before + we compare it. This is needed as the lowest bit in the rowid is + used to mark if the key has a transid or not. + */ + key_mask= (uchar) 254; + if (!test_all_bits(nextflag, (SEARCH_USER_KEY_HAS_TRANSID | + SEARCH_PAGE_KEY_HAS_TRANSID))) + { + /* + No transaction id for user key or for key on page + Ignore transid as at least one of the keys are visible for all + */ + key_length= keyseg->length; + } + else + { + /* + Both keys have trids. No need of special handling of incomplete + trids below. + */ + nextflag&= ~SEARCH_INSERT; + } + } + DBUG_ASSERT(key_length > 0); + + for (i= key_length-1 ; (int) i-- > 0 ; ) { if (*a++ != *b++) { flag= FCMP(a[-1],b[-1]); - break; + goto found; } } + tmp_a= *a & key_mask; + tmp_b= *b & key_mask; + flag= FCMP(tmp_a, tmp_b); + + if (flag == 0 && (nextflag & SEARCH_INSERT)) + { + /* + Ensure that on insert we get rows stored in trid order. + If one of the parts doesn't have a trid, this should be regarded + as smaller than the other + */ + return (nextflag & SEARCH_USER_KEY_HAS_TRANSID) ? -1 : 1; + } +found: if (nextflag & SEARCH_SAME) return (flag); /* read same */ if (nextflag & SEARCH_BIGGER) diff --git a/mysys/my_lock.c b/mysys/my_lock.c index 200ee7188c9..8450fcfc30a 100644 --- a/mysys/my_lock.c +++ b/mysys/my_lock.c @@ -87,7 +87,7 @@ int my_lock(File fd, int locktype, my_off_t start, my_off_t length, nxLockFlags = NX_RANGE_LOCK_EXCL; } - if (MyFlags & MY_DONT_WAIT) + if (MyFlags & MY_NO_WAIT) { /* Don't block on the lock. */ nxLockFlags |= NX_RANGE_LOCK_TRYLOCK; -- cgit v1.2.1 From 6ba12f070c65a445ba3f6758c1a49a872c627561 Mon Sep 17 00:00:00 2001 From: Sergei Golubchik Date: Tue, 29 Jul 2008 16:10:24 +0200 Subject: WL#3064 - waiting threads - wait-for graph and deadlock detection client/mysqltest.c: compiler warnings configure.in: remove old tests for unused programs disable the use of gcc built-ins if smp assembler atomics were selected explictily. add waiting_threads.o to THREAD_LOBJECTS include/lf.h: replace the end-of-stack pointer with the pointer to the end-of-stack pointer. the latter could be stored in THD (mysys_vars) and updated in pool-of-threads scheduler. constructor/destructor in lf-alloc include/my_pthread.h: shuffle set_timespec/set_timespec_nsec macros a bit to be able to fill several timeout structures with only one my_getsystime() call include/waiting_threads.h: waiting threads - wait-for graph and deadlock detection mysys/Makefile.am: add waiting_threads.c mysys/lf_alloc-pin.c: replace the end-of-stack pointer with the pointer to the end-of-stack pointer. the latter could be stored in THD (mysys_vars) and updated in pool-of-threads scheduler. constructor/destructor in lf-alloc mysys/lf_hash.c: constructor/destructor in lf-alloc mysys/my_thr_init.c: remember end-of-stack pointer in the mysys_var mysys/waiting_threads.c: waiting threads - wait-for graph and deadlock detection storage/maria/ha_maria.cc: replace the end-of-stack pointer with the pointer to the end-of-stack pointer. the latter could be stored in THD (mysys_vars) and updated in pool-of-threads scheduler. storage/maria/ma_commit.c: replace the end-of-stack pointer with the pointer to the end-of-stack pointer. the latter could be stored in THD (mysys_vars) and updated in pool-of-threads scheduler. storage/maria/trnman.c: replace the end-of-stack pointer with the pointer to the end-of-stack pointer. the latter could be stored in THD (mysys_vars) and updated in pool-of-threads scheduler. storage/maria/trnman_public.h: replace the end-of-stack pointer with the pointer to the end-of-stack pointer. the latter could be stored in THD (mysys_vars) and updated in pool-of-threads scheduler. storage/maria/unittest/trnman-t.c: replace the end-of-stack pointer with the pointer to the end-of-stack pointer. the latter could be stored in THD (mysys_vars) and updated in pool-of-threads scheduler. unittest/mysys/Makefile.am: add waiting_threads-t unittest/mysys/lf-t.c: factor out the common code for multi-threaded stress unit tests move lf tests to a separate file unittest/mysys/my_atomic-t.c: factor out the common code for multi-threaded stress unit tests move lf tests to a separate file unittest/mysys/thr_template.c: factor out the common code for multi-threaded stress unit tests unittest/mysys/waiting_threads-t.c: wt tests --- mysys/Makefile.am | 2 +- mysys/lf_alloc-pin.c | 52 ++-- mysys/lf_hash.c | 15 +- mysys/my_thr_init.c | 8 +- mysys/waiting_threads.c | 641 ++++++++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 688 insertions(+), 30 deletions(-) create mode 100644 mysys/waiting_threads.c (limited to 'mysys') diff --git a/mysys/Makefile.am b/mysys/Makefile.am index 7bb98770d06..54553680341 100644 --- a/mysys/Makefile.am +++ b/mysys/Makefile.am @@ -58,7 +58,7 @@ libmysys_a_SOURCES = my_init.c my_getwd.c mf_getdate.c my_mmap.c \ my_windac.c my_access.c base64.c my_libwrap.c \ wqueue.c EXTRA_DIST = thr_alarm.c thr_lock.c my_pthread.c my_thr_init.c \ - thr_mutex.c thr_rwlock.c \ + thr_mutex.c thr_rwlock.c waiting_threads.c \ CMakeLists.txt mf_soundex.c \ my_conio.c my_wincond.c my_winthread.c libmysys_a_LIBADD = @THREAD_LOBJECTS@ diff --git a/mysys/lf_alloc-pin.c b/mysys/lf_alloc-pin.c index 40438e93596..4fae8e37ddb 100644 --- a/mysys/lf_alloc-pin.c +++ b/mysys/lf_alloc-pin.c @@ -96,11 +96,10 @@ versioning a pointer - because we use an array, a pointer to pins is 16 bit, upper 16 bits are used for a version. - It is assumed that pins belong to a thread and are not transferable - between threads (LF_PINS::stack_ends_here being a primary reason + It is assumed that pins belong to a THD and are not transferable + between THD's (LF_PINS::stack_ends_here being a primary reason for this limitation). */ - #include #include #include @@ -137,10 +136,6 @@ void lf_pinbox_destroy(LF_PINBOX *pinbox) SYNOPSYS pinbox - - stack_end - a pointer to the end (top/bottom, depending on the - STACK_DIRECTION) of stack. Used for safe alloca. There's - no safety margin deducted, a caller should take care of it, - if necessary. DESCRIPTION get a new LF_PINS structure from a stack of unused pins, @@ -150,7 +145,7 @@ void lf_pinbox_destroy(LF_PINBOX *pinbox) It is assumed that pins belong to a thread and are not transferable between threads. */ -LF_PINS *_lf_pinbox_get_pins(LF_PINBOX *pinbox, void *stack_end) +LF_PINS *_lf_pinbox_get_pins(LF_PINBOX *pinbox) { uint32 pins, next, top_ver; LF_PINS *el; @@ -194,7 +189,7 @@ LF_PINS *_lf_pinbox_get_pins(LF_PINBOX *pinbox, void *stack_end) el->link= pins; el->purgatory_count= 0; el->pinbox= pinbox; - el->stack_ends_here= stack_end; + el->stack_ends_here= & my_thread_var->stack_ends_here; return el; } @@ -325,6 +320,9 @@ static int match_pins(LF_PINS *el, void *addr) #define available_stack_size(CUR,END) (long) ((char*)(END) - (char*)(CUR)) #endif +#define next_node(P, X) (*((uchar **)(((uchar *)(X)) + (P)->free_ptr_offset))) +#define anext_node(X) next_node(&allocator->pinbox, (X)) + /* Scan the purgatory and free everything that can be freed */ @@ -332,7 +330,7 @@ static void _lf_pinbox_real_free(LF_PINS *pins) { int npins, alloca_size; void *list, **addr; - struct st_lf_alloc_node *first, *last= NULL; + uchar *first, *last= NULL; LF_PINBOX *pinbox= pins->pinbox; LINT_INIT(first); @@ -341,7 +339,7 @@ static void _lf_pinbox_real_free(LF_PINS *pins) #ifdef HAVE_ALLOCA alloca_size= sizeof(void *)*LF_PINBOX_PINS*npins; /* create a sorted list of pinned addresses, to speed up searches */ - if (available_stack_size(&pinbox, pins->stack_ends_here) > alloca_size) + if (available_stack_size(&pinbox, *pins->stack_ends_here) > alloca_size) { struct st_harvester hv; addr= (void **) alloca(alloca_size); @@ -391,9 +389,9 @@ static void _lf_pinbox_real_free(LF_PINS *pins) } /* not pinned - freeing */ if (last) - last= last->next= (struct st_lf_alloc_node *)cur; + last= next_node(pinbox, last)= (uchar *)cur; else - first= last= (struct st_lf_alloc_node *)cur; + first= last= (uchar *)cur; continue; found: /* pinned - keeping */ @@ -412,22 +410,22 @@ LF_REQUIRE_PINS(1) add it back to the allocator stack DESCRIPTION - 'first' and 'last' are the ends of the linked list of st_lf_alloc_node's: + 'first' and 'last' are the ends of the linked list of nodes: first->el->el->....->el->last. Use first==last to free only one element. */ -static void alloc_free(struct st_lf_alloc_node *first, - struct st_lf_alloc_node volatile *last, +static void alloc_free(uchar *first, + uchar volatile *last, LF_ALLOCATOR *allocator) { /* we need a union here to access type-punned pointer reliably. otherwise gcc -fstrict-aliasing will not see 'tmp' changed in the loop */ - union { struct st_lf_alloc_node * node; void *ptr; } tmp; + union { uchar * node; void *ptr; } tmp; tmp.node= allocator->top; do { - last->next= tmp.node; + anext_node(last)= tmp.node; } while (!my_atomic_casptr((void **)(char *)&allocator->top, (void **)&tmp.ptr, first) && LF_BACKOFF); } @@ -452,6 +450,8 @@ void lf_alloc_init(LF_ALLOCATOR *allocator, uint size, uint free_ptr_offset) allocator->top= 0; allocator->mallocs= 0; allocator->element_size= size; + allocator->constructor= 0; + allocator->destructor= 0; DBUG_ASSERT(size >= sizeof(void*) + free_ptr_offset); } @@ -468,10 +468,12 @@ void lf_alloc_init(LF_ALLOCATOR *allocator, uint size, uint free_ptr_offset) */ void lf_alloc_destroy(LF_ALLOCATOR *allocator) { - struct st_lf_alloc_node *node= allocator->top; + uchar *node= allocator->top; while (node) { - struct st_lf_alloc_node *tmp= node->next; + uchar *tmp= anext_node(node); + if (allocator->destructor) + allocator->destructor(node); my_free((void *)node, MYF(0)); node= tmp; } @@ -489,7 +491,7 @@ void lf_alloc_destroy(LF_ALLOCATOR *allocator) void *_lf_alloc_new(LF_PINS *pins) { LF_ALLOCATOR *allocator= (LF_ALLOCATOR *)(pins->pinbox->free_func_arg); - struct st_lf_alloc_node *node; + uchar *node; for (;;) { do @@ -500,6 +502,8 @@ void *_lf_alloc_new(LF_PINS *pins) if (!node) { node= (void *)my_malloc(allocator->element_size, MYF(MY_WME)); + if (allocator->constructor) + allocator->constructor(node); #ifdef MY_LF_EXTRA_DEBUG if (likely(node != 0)) my_atomic_add32(&allocator->mallocs, 1); @@ -507,7 +511,7 @@ void *_lf_alloc_new(LF_PINS *pins) break; } if (my_atomic_casptr((void **)(char *)&allocator->top, - (void *)&node, node->next)) + (void *)&node, anext_node(node))) break; } _lf_unpin(pins, 0); @@ -523,8 +527,8 @@ void *_lf_alloc_new(LF_PINS *pins) uint lf_alloc_pool_count(LF_ALLOCATOR *allocator) { uint i; - struct st_lf_alloc_node *node; - for (node= allocator->top, i= 0; node; node= node->next, i++) + uchar *node; + for (node= allocator->top, i= 0; node; node= anext_node(node), i++) /* no op */; return i; } diff --git a/mysys/lf_hash.c b/mysys/lf_hash.c index c197cc99711..008abef0c8b 100644 --- a/mysys/lf_hash.c +++ b/mysys/lf_hash.c @@ -299,11 +299,22 @@ static int initialize_bucket(LF_HASH *, LF_SLIST * volatile*, uint, LF_PINS *); /* Initializes lf_hash, the arguments are compatible with hash_init + + @@note element_size sets both the size of allocated memory block for + lf_alloc and a size of memcpy'ed block size in lf_hash_insert. Typically + they are the same, indeed. But LF_HASH::element_size can be decreased + after lf_hash_init, and then lf_alloc will allocate larger block that + lf_hash_insert will copy over. It is desireable if part of the element + is expensive to initialize - for example if there is a mutex or + DYNAMIC_ARRAY. In this case they should be initialize in the + LF_ALLOCATOR::constructor, and lf_hash_insert should not overwrite them. + See wt_init() for example. */ void lf_hash_init(LF_HASH *hash, uint element_size, uint flags, uint key_offset, uint key_length, hash_get_key get_key, CHARSET_INFO *charset) { + compile_time_assert(sizeof(LF_SLIST) == LF_HASH_OVERHEAD); lf_alloc_init(&hash->alloc, sizeof(LF_SLIST)+element_size, offsetof(LF_SLIST, key)); lf_dynarray_init(&hash->array, sizeof(LF_SLIST *)); @@ -453,7 +464,7 @@ void *lf_hash_search(LF_HASH *hash, LF_PINS *pins, const void *key, uint keylen) return found ? found+1 : 0; } -static const uchar *dummy_key= ""; +static const uchar *dummy_key= (uchar*)""; /* RETURN @@ -473,7 +484,7 @@ static int initialize_bucket(LF_HASH *hash, LF_SLIST * volatile *node, unlikely(initialize_bucket(hash, el, parent, pins))) return -1; dummy->hashnr= my_reverse_bits(bucket) | 0; /* dummy node */ - dummy->key= (char*) dummy_key; + dummy->key= dummy_key; dummy->keylen= 0; if ((cur= linsert(el, hash->charset, dummy, pins, LF_HASH_UNIQUE))) { diff --git a/mysys/my_thr_init.c b/mysys/my_thr_init.c index f5fee06916e..1d03577ce34 100644 --- a/mysys/my_thr_init.c +++ b/mysys/my_thr_init.c @@ -256,7 +256,7 @@ my_bool my_thread_init(void) #ifdef EXTRA_DEBUG_THREADS fprintf(stderr,"my_thread_init(): thread_id: 0x%lx\n", (ulong) pthread_self()); -#endif +#endif #if !defined(__WIN__) || defined(USE_TLS) if (my_pthread_getspecific(struct st_my_thread_var *,THR_KEY_mysys)) @@ -264,7 +264,7 @@ my_bool my_thread_init(void) #ifdef EXTRA_DEBUG_THREADS fprintf(stderr,"my_thread_init() called more than once in thread 0x%lx\n", (long) pthread_self()); -#endif +#endif goto end; } if (!(tmp= (struct st_my_thread_var *) calloc(1, sizeof(*tmp)))) @@ -290,6 +290,8 @@ my_bool my_thread_init(void) pthread_mutex_init(&tmp->mutex,MY_MUTEX_INIT_FAST); pthread_cond_init(&tmp->suspend, NULL); + tmp->stack_ends_here= &tmp + STACK_DIRECTION * my_thread_stack_size; + pthread_mutex_lock(&THR_LOCK_threads); tmp->id= ++thread_id; ++THR_thread_count; @@ -325,7 +327,7 @@ void my_thread_end(void) #ifdef EXTRA_DEBUG_THREADS fprintf(stderr,"my_thread_end(): tmp: 0x%lx pthread_self: 0x%lx thread_id: %ld\n", (long) tmp, (long) pthread_self(), tmp ? (long) tmp->id : 0L); -#endif +#endif if (tmp && tmp->init) { #if !defined(DBUG_OFF) diff --git a/mysys/waiting_threads.c b/mysys/waiting_threads.c new file mode 100644 index 00000000000..4d375fdc899 --- /dev/null +++ b/mysys/waiting_threads.c @@ -0,0 +1,641 @@ +/* Copyright (C) 2008 MySQL AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +/* + Note that if your lock system satisfy the following condition: + + there exist four lock levels A, B, C, D, such as + A is compatible with B + A is not compatible with C + D is not compatible with B + + (example A=IX, B=IS, C=S, D=X) + + you need to include lock level in the resource identifier - thread 1 + waiting for lock A on resource R and thread 2 waiting for lock B + on resource R should wait on different WT_RESOURCE structures, on different + {lock, resource} pairs. Otherwise the following is possible: + + thread1> take S-lock on R + thread2> take IS-lock on R + thread3> wants X-lock on R, starts waiting for threads 1 and 2 on R. + thread3 is killed (or timeout or whatever) + WT_RESOURCE structure for R is still in the hash, as it has two owners + thread4> wants an IX-lock on R + WT_RESOURCE for R is found in the hash, thread4 starts waiting on it. + !! now thread4 is waiting for both thread1 and thread2 + !! while, in fact, IX-lock and IS-lock are compatible and + !! thread4 should not wait for thread2. +*/ + +#include +#include + +uint wt_timeout_short=100, wt_deadlock_search_depth_short=4; +uint wt_timeout_long=10000, wt_deadlock_search_depth_long=15; + +/* + status variables: + distribution of cycle lengths + wait time log distribution + + Note: + + we call deadlock() twice per wait (with different search lengths). + it means a deadlock will be counted twice. It's difficult to avoid, + as on the second search we could find a *different* deadlock and we + *want* to count it too. So we just count all deadlocks - two searches + mean two increments on the wt_cycle_stats. +*/ + +ulonglong wt_wait_table[WT_WAIT_STATS]; +uint32 wt_wait_stats[WT_WAIT_STATS+1]; +uint32 wt_cycle_stats[2][WT_CYCLE_STATS+1], wt_success_stats; + +static my_atomic_rwlock_t cycle_stats_lock, wait_stats_lock, success_stats_lock; + +#define increment_success_stats() \ + do { \ + my_atomic_rwlock_wrlock(&success_stats_lock); \ + my_atomic_add32(&wt_success_stats, 1); \ + my_atomic_rwlock_wrunlock(&success_stats_lock); \ + } while (0) + +#define increment_cycle_stats(X,MAX) \ + do { \ + uint i= (X), j= (MAX) == wt_deadlock_search_depth_long; \ + if (i >= WT_CYCLE_STATS) \ + i= WT_CYCLE_STATS; \ + my_atomic_rwlock_wrlock(&cycle_stats_lock); \ + my_atomic_add32(&wt_cycle_stats[j][i], 1); \ + my_atomic_rwlock_wrunlock(&cycle_stats_lock); \ + } while (0) + +#define increment_wait_stats(X,RET) \ + do { \ + uint i; \ + if ((RET) == ETIMEDOUT) \ + i= WT_WAIT_STATS; \ + else \ + { \ + ulonglong w=(X)/10; \ + for (i=0; i < WT_WAIT_STATS && w > wt_wait_table[i]; i++) ; \ + } \ + my_atomic_rwlock_wrlock(&wait_stats_lock); \ + my_atomic_add32(wt_wait_stats+i, 1); \ + my_atomic_rwlock_wrunlock(&wait_stats_lock); \ + } while (0) + +#define rc_rdlock(X) \ + do { \ + WT_RESOURCE *R=(X); \ + DBUG_PRINT("wt", ("LOCK resid=%lld for READ", R->id.value.num)); \ + pthread_rwlock_rdlock(&R->lock); \ + } while (0) +#define rc_wrlock(X) \ + do { \ + WT_RESOURCE *R=(X); \ + DBUG_PRINT("wt", ("LOCK resid=%lld for WRITE", R->id.value.num)); \ + pthread_rwlock_wrlock(&R->lock); \ + } while (0) +#define rc_unlock(X) \ + do { \ + WT_RESOURCE *R=(X); \ + DBUG_PRINT("wt", ("UNLOCK resid=%lld", R->id.value.num)); \ + pthread_rwlock_unlock(&R->lock); \ + } while (0) + +static LF_HASH reshash; + +static void wt_resource_init(uchar *arg) +{ + WT_RESOURCE *rc=(WT_RESOURCE*)(arg+LF_HASH_OVERHEAD); + DBUG_ENTER("wt_resource_init"); + + bzero(rc, sizeof(*rc)); + pthread_rwlock_init(&rc->lock, 0); + pthread_cond_init(&rc->cond, 0); + my_init_dynamic_array(&rc->owners, sizeof(WT_THD *), 5, 5); + DBUG_VOID_RETURN; +} + +static void wt_resource_destroy(uchar *arg) +{ + WT_RESOURCE *rc=(WT_RESOURCE*)(arg+LF_HASH_OVERHEAD); + DBUG_ENTER("wt_resource_destroy"); + + DBUG_ASSERT(rc->owners.elements == 0); + pthread_rwlock_destroy(&rc->lock); + pthread_cond_destroy(&rc->cond); + delete_dynamic(&rc->owners); + DBUG_VOID_RETURN; +} + +void wt_init() +{ + DBUG_ENTER("wt_init"); + + lf_hash_init(&reshash, sizeof(WT_RESOURCE), LF_HASH_UNIQUE, 0, + sizeof(struct st_wt_resource_id), 0, 0); + reshash.alloc.constructor= wt_resource_init; + reshash.alloc.destructor= wt_resource_destroy; + /* + Note a trick: we initialize the hash with the real element size, + but fix it later to a shortened element size. This way + the allocator will allocate elements correctly, but + lf_hash_insert() will only overwrite part of the element with memcpy(). + lock, condition, and dynamic array will be intact. + */ + reshash.element_size= offsetof(WT_RESOURCE, lock); + bzero(wt_wait_stats, sizeof(wt_wait_stats)); + bzero(wt_cycle_stats, sizeof(wt_cycle_stats)); + wt_success_stats=0; + { + int i; + double from=log(1); /* 1 us */ + double to=log(60e6); /* 1 min */ + for (i=0; i < WT_WAIT_STATS; i++) + { + wt_wait_table[i]=(ulonglong)exp((to-from)/(WT_WAIT_STATS-1)*i+from); + DBUG_ASSERT(i==0 || wt_wait_table[i-1] != wt_wait_table[i]); + } + } + my_atomic_rwlock_init(&cycle_stats_lock); + my_atomic_rwlock_init(&success_stats_lock); + my_atomic_rwlock_init(&wait_stats_lock); + DBUG_VOID_RETURN; +} + +void wt_end() +{ + DBUG_ENTER("wt_end"); + + DBUG_ASSERT(reshash.count == 0); + lf_hash_destroy(&reshash); + my_atomic_rwlock_destroy(&cycle_stats_lock); + my_atomic_rwlock_destroy(&success_stats_lock); + my_atomic_rwlock_destroy(&wait_stats_lock); + DBUG_VOID_RETURN; +} + +void wt_thd_init(WT_THD *thd) +{ + DBUG_ENTER("wt_thd_init"); + + my_init_dynamic_array(&thd->my_resources, sizeof(WT_RESOURCE *), 10, 5); + thd->pins=lf_hash_get_pins(&reshash); + thd->waiting_for=0; + thd->weight=0; +#ifndef DBUG_OFF + thd->name=my_thread_name(); +#endif + DBUG_VOID_RETURN; +} + +void wt_thd_destroy(WT_THD *thd) +{ + DBUG_ENTER("wt_thd_destroy"); + + DBUG_ASSERT(thd->my_resources.elements == 0); + delete_dynamic(&thd->my_resources); + lf_hash_put_pins(thd->pins); + thd->waiting_for=0; + DBUG_VOID_RETURN; +} + +int wt_resource_id_memcmp(void *a, void *b) +{ + return memcmp(a, b, sizeof(WT_RESOURCE_ID)); +} + +struct deadlock_arg { + WT_THD *thd; + uint max_depth; + WT_THD *victim; + WT_RESOURCE *rc; +}; + +/* + loop detection in a wait-for graph with a limited search depth. +*/ +static int deadlock_search(struct deadlock_arg *arg, WT_THD *blocker, + uint depth) +{ + WT_RESOURCE *rc, *volatile *shared_ptr= &blocker->waiting_for; + WT_THD *cursor; + uint i; + int ret= WT_OK; + DBUG_ENTER("deadlock_search"); + DBUG_PRINT("wt", ("enter: thd=%s, blocker=%s, depth=%u", + arg->thd->name, blocker->name, depth)); + + LF_REQUIRE_PINS(1); + + arg->rc= 0; + + if (depth > arg->max_depth) + { + DBUG_PRINT("wt", ("exit: WT_DEPTH_EXCEEDED (early)")); + DBUG_RETURN(WT_DEPTH_EXCEEDED); + } + +retry: + /* safe dereference as explained in lf_alloc-pin.c */ + do + { + rc= *shared_ptr; + lf_pin(arg->thd->pins, 0, rc); + } while (rc != *shared_ptr && LF_BACKOFF); + + if (rc == 0) + { + DBUG_PRINT("wt", ("exit: OK (early)")); + DBUG_RETURN(0); + } + + rc_rdlock(rc); + if (rc->state != ACTIVE || *shared_ptr != rc) + { + rc_unlock(rc); + lf_unpin(arg->thd->pins, 0); + goto retry; + } + lf_unpin(arg->thd->pins, 0); + + for (i=0; i < rc->owners.elements; i++) + { + cursor= *dynamic_element(&rc->owners, i, WT_THD**); + if (cursor == arg->thd) + { + ret= WT_DEADLOCK; + increment_cycle_stats(depth, arg->max_depth); + arg->victim= cursor; + goto end; + } + } + for (i=0; i < rc->owners.elements; i++) + { + cursor= *dynamic_element(&rc->owners, i, WT_THD**); + switch (deadlock_search(arg, cursor, depth+1)) { + case WT_DEPTH_EXCEEDED: + ret= WT_DEPTH_EXCEEDED; + break; + case WT_DEADLOCK: + ret= WT_DEADLOCK; + if (cursor->weight < arg->victim->weight) + { + if (arg->victim != arg->thd) + { + rc_unlock(arg->victim->waiting_for); /* release the previous victim */ + DBUG_ASSERT(arg->rc == cursor->waiting_for); + } + arg->victim= cursor; + } + else if (arg->rc) + rc_unlock(arg->rc); + goto end; + case WT_OK: + break; + default: + DBUG_ASSERT(0); + } + if (arg->rc) + rc_unlock(arg->rc); + } +end: + arg->rc= rc; + DBUG_PRINT("wt", ("exit: %s", + ret == WT_DEPTH_EXCEEDED ? "WT_DEPTH_EXCEEDED" : + ret ? "WT_DEADLOCK" : "OK")); + DBUG_RETURN(ret); +} + +static int deadlock(WT_THD *thd, WT_THD *blocker, uint depth, + uint max_depth) +{ + struct deadlock_arg arg= {thd, max_depth, 0, 0}; + int ret; + DBUG_ENTER("deadlock"); + ret= deadlock_search(&arg, blocker, depth); + if (arg.rc) + rc_unlock(arg.rc); + if (ret == WT_DEPTH_EXCEEDED) + { + increment_cycle_stats(WT_CYCLE_STATS, max_depth); + ret= WT_OK; + } + if (ret == WT_DEADLOCK && arg.victim != thd) + { + DBUG_PRINT("wt", ("killing %s", arg.victim->name)); + arg.victim->killed=1; + pthread_cond_broadcast(&arg.victim->waiting_for->cond); + rc_unlock(arg.victim->waiting_for); + ret= WT_OK; + } + DBUG_RETURN(ret); +} + + +/* + Deletes an element from reshash. + rc->lock must be locked by the caller and it's unlocked on return. +*/ +static void unlock_lock_and_free_resource(WT_THD *thd, WT_RESOURCE *rc) +{ + uint keylen; + const void *key; + DBUG_ENTER("unlock_lock_and_free_resource"); + + DBUG_ASSERT(rc->state == ACTIVE); + + if (rc->owners.elements || rc->waiter_count) + { + DBUG_PRINT("wt", ("nothing to do, %d owners, %d waiters", + rc->owners.elements, rc->waiter_count)); + rc_unlock(rc); + DBUG_VOID_RETURN; + } + + /* XXX if (rc->id.type->make_key) key= rc->id.type->make_key(&rc->id, &keylen); else */ + { + key= &rc->id; + keylen= sizeof(rc->id); + } + + /* + To free the element correctly we need to: + 1. take its lock (already done). + 2. set the state to FREE + 3. release the lock + 4. remove from the hash + + I *think* it's safe to release the lock while the element is still + in the hash. If not, the corrected procedure should be + 3. pin; 4; remove; 5; release; 6; unpin and it'll need pin[3]. + */ + rc->state=FREE; + rc_unlock(rc); + lf_hash_delete(&reshash, thd->pins, key, keylen); + DBUG_VOID_RETURN; +} + + +int wt_thd_dontwait_locked(WT_THD *thd) +{ + WT_RESOURCE *rc= thd->waiting_for; + DBUG_ENTER("wt_thd_dontwait_locked"); + + DBUG_ASSERT(rc->waiter_count); + DBUG_ASSERT(rc->state == ACTIVE); + rc->waiter_count--; + thd->waiting_for= 0; + unlock_lock_and_free_resource(thd, rc); + DBUG_RETURN(thd->killed ? WT_DEADLOCK : WT_OK); +} + +int wt_thd_dontwait(WT_THD *thd) +{ + int ret; + WT_RESOURCE *rc= thd->waiting_for; + DBUG_ENTER("wt_thd_dontwait"); + + if (!rc) + DBUG_RETURN(WT_OK); + /* + nobody's trying to free the resource now, + as its waiter_count is guaranteed to be non-zero + */ + rc_wrlock(rc); + ret= wt_thd_dontwait_locked(thd); + DBUG_RETURN(ret); +} + +/* + called by a *waiter* to declare what resource it will wait for. + can be called many times, if many blockers own a blocking resource. + but must always be called with the same resource id - a thread cannot + wait for more than one resource at a time. +*/ +int wt_thd_will_wait_for(WT_THD *thd, WT_THD *blocker, WT_RESOURCE_ID *resid) +{ + uint i; + WT_RESOURCE *rc; + DBUG_ENTER("wt_thd_will_wait_for"); + + LF_REQUIRE_PINS(3); + + DBUG_PRINT("wt", ("enter: thd=%s, blocker=%s, resid=%llu", + thd->name, blocker->name, resid->value.num)); + + if (thd->waiting_for == 0) + { + uint keylen; + const void *key; + /* XXX if (restype->make_key) key= restype->make_key(resid, &keylen); else */ + { + key= resid; + keylen= sizeof(*resid); + } + + DBUG_PRINT("wt", ("first blocker")); + +retry: + while ((rc= lf_hash_search(&reshash, thd->pins, key, keylen)) == 0) + { + WT_RESOURCE tmp; + + DBUG_PRINT("wt", ("failed to find rc in hash, inserting")); + bzero(&tmp, sizeof(tmp)); + tmp.waiter_count= 0; + tmp.id= *resid; + tmp.state= ACTIVE; +#ifndef DBUG_OFF + tmp.mutex= 0; +#endif + + lf_hash_insert(&reshash, thd->pins, &tmp); + /* + Two cases: either lf_hash_insert() failed - because another thread + has just inserted a resource with the same id - and we need to retry. + Or lf_hash_insert() succeeded, and then we need to repeat + lf_hash_search() to find a real address of the newly inserted element. + That is, we don't care what lf_hash_insert() has returned. + And we need to repeat the loop anyway. + */ + } + DBUG_PRINT("wt", ("found in hash rc=%p", rc)); + + rc_wrlock(rc); + if (rc->state != ACTIVE) + { + DBUG_PRINT("wt", ("but it's not active, retrying")); + /* Somebody has freed the element while we weren't looking */ + rc_unlock(rc); + lf_hash_search_unpin(thd->pins); + goto retry; + } + + lf_hash_search_unpin(thd->pins); /* the element cannot go away anymore */ + thd->waiting_for= rc; + rc->waiter_count++; + thd->killed= 0; + + } + else + { + DBUG_ASSERT(thd->waiting_for->id.type == resid->type); + DBUG_ASSERT(resid->type->compare(&thd->waiting_for->id, resid) == 0); + DBUG_PRINT("wt", ("adding another blocker")); + + /* + we can safely access the resource here, it's in the hash as it has + at least one owner, and non-zero waiter_count + */ + rc= thd->waiting_for; + rc_wrlock(rc); + DBUG_ASSERT(rc->waiter_count); + DBUG_ASSERT(rc->state == ACTIVE); + + if (thd->killed) + { + wt_thd_dontwait_locked(thd); + DBUG_RETURN(WT_DEADLOCK); + } + } + for (i=0; i < rc->owners.elements; i++) + if (*dynamic_element(&rc->owners, i, WT_THD**) == blocker) + break; + if (i >= rc->owners.elements) + { + push_dynamic(&blocker->my_resources, (void*)&rc); + push_dynamic(&rc->owners, (void*)&blocker); + } + rc_unlock(rc); + + if (deadlock(thd, blocker, 1, wt_deadlock_search_depth_short)) + { + wt_thd_dontwait(thd); + DBUG_RETURN(WT_DEADLOCK); + } + DBUG_RETURN(0); +} + +/* + called by a *waiter* to start waiting + + It's supposed to be a drop-in replacement for + pthread_cond_timedwait(), and it takes mutex as an argument. +*/ +int wt_thd_cond_timedwait(WT_THD *thd, pthread_mutex_t *mutex) +{ + int ret= WT_OK; + struct timespec timeout; + ulonglong before, after, starttime; + WT_RESOURCE *rc= thd->waiting_for; + DBUG_ENTER("wt_thd_cond_timedwait"); + DBUG_PRINT("wt", ("enter: thd=%s, rc=%p", thd->name, rc)); + +#ifndef DBUG_OFF + if (rc->mutex) + DBUG_ASSERT(rc->mutex == mutex); + else + rc->mutex= mutex; + safe_mutex_assert_owner(mutex); +#endif + + before= starttime= my_getsystime(); + +#ifdef __WIN__ + /* + only for the sake of Windows we distinguish between + 'before' and 'starttime' + */ + GetSystemTimeAsFileTime((PFILETIME)&starttime); +#endif + + set_timespec_time_nsec(timeout, starttime, wt_timeout_short*1000); + if (!thd->killed) + ret= pthread_cond_timedwait(&rc->cond, mutex, &timeout); + if (ret == WT_TIMEOUT) + { + if (deadlock(thd, thd, 0, wt_deadlock_search_depth_long)) + ret= WT_DEADLOCK; + else if (wt_timeout_long > wt_timeout_short) + { + set_timespec_time_nsec(timeout, starttime, wt_timeout_long*1000); + if (!thd->killed) + ret= pthread_cond_timedwait(&rc->cond, mutex, &timeout); + } + } + after= my_getsystime(); + if (wt_thd_dontwait(thd) == WT_DEADLOCK) + ret= WT_DEADLOCK; + increment_wait_stats(after-before, ret); + if (ret == WT_OK) + increment_success_stats(); + DBUG_RETURN(ret); +} + +/* + called by a *blocker* when it releases a resource + + when resid==0 all resources will be freed + + Note: it's conceptually similar to pthread_cond_broadcast, and must be done + under the same mutex as wt_thd_cond_timedwait(). +*/ +void wt_thd_release(WT_THD *thd, WT_RESOURCE_ID *resid) +{ + WT_RESOURCE *rc; + uint i, j; + DBUG_ENTER("wt_thd_release"); + + for (i=0; i < thd->my_resources.elements; i++) + { + rc= *dynamic_element(&thd->my_resources, i, WT_RESOURCE**); + if (!resid || (resid->type->compare(&rc->id, resid) == 0)) + { + rc_wrlock(rc); + /* + nobody's trying to free the resource now, + as its owners[] array is not empty (at least thd must be there) + */ + DBUG_ASSERT(rc->state == ACTIVE); + for (j=0; j < rc->owners.elements; j++) + if (*dynamic_element(&rc->owners, j, WT_THD**) == thd) + break; + DBUG_ASSERT(j < rc->owners.elements); + delete_dynamic_element(&rc->owners, j); + if (rc->owners.elements == 0) + { + pthread_cond_broadcast(&rc->cond); +#ifndef DBUG_OFF + if (rc->mutex) + safe_mutex_assert_owner(rc->mutex); +#endif + } + unlock_lock_and_free_resource(thd, rc); + if (resid) + { + delete_dynamic_element(&thd->my_resources, i); + DBUG_VOID_RETURN; + } + } + } + DBUG_ASSERT(!resid); + reset_dynamic(&thd->my_resources); + DBUG_VOID_RETURN; +} + -- cgit v1.2.1 From 651f61fc1f4172bd5f76d1060e32df76f9f76d3a Mon Sep 17 00:00:00 2001 From: Sergei Golubchik Date: Mon, 4 Aug 2008 20:01:11 +0200 Subject: wt_thd_cond_timedwait() now allows the list of blockers to change after wt_thd_will_wait_for() was called. That is a caller doesn't need to hold a mutex all the time preventing blockers from releasing a resource. --- mysys/waiting_threads.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'mysys') diff --git a/mysys/waiting_threads.c b/mysys/waiting_threads.c index 4d375fdc899..1c87886f405 100644 --- a/mysys/waiting_threads.c +++ b/mysys/waiting_threads.c @@ -540,7 +540,7 @@ retry: */ int wt_thd_cond_timedwait(WT_THD *thd, pthread_mutex_t *mutex) { - int ret= WT_OK; + int ret= WT_TIMEOUT; struct timespec timeout; ulonglong before, after, starttime; WT_RESOURCE *rc= thd->waiting_for; @@ -565,8 +565,13 @@ int wt_thd_cond_timedwait(WT_THD *thd, pthread_mutex_t *mutex) GetSystemTimeAsFileTime((PFILETIME)&starttime); #endif + rc_wrlock(rc); + if (rc->owners.elements == 0 && thd->killed) + ret= WT_OK; + rc_unlock(rc); + set_timespec_time_nsec(timeout, starttime, wt_timeout_short*1000); - if (!thd->killed) + if (ret == WT_TIMEOUT) ret= pthread_cond_timedwait(&rc->cond, mutex, &timeout); if (ret == WT_TIMEOUT) { -- cgit v1.2.1 From 3971e262e99366fa4bc9b454e69cf48daac9be85 Mon Sep 17 00:00:00 2001 From: Sergei Golubchik Date: Wed, 6 Aug 2008 21:30:05 +0200 Subject: maria: deadlock detection when waiting on unique key (useless until we can rollback) include/my_pthread.h: cleanup include/waiting_threads.h: header guard mysys/waiting_threads.c: bug - kill strategy were not applied to deadlocks of length 1. cast timeout to ulonglong. storage/maria/ma_static.c: declare WT_RESOURCE_TYPE ma_rc_dup_unique storage/maria/ma_write.c: deadlock detection when waiting on unique key (useless until we can rollback) storage/maria/maria_def.h: deadlock detection when waiting on unique key (useless until we can rollback) storage/maria/trnman.c: use deadlock detector. protect state transitions of a TRN with a mutex. trnman_trid_to_trn() function. storage/maria/trnman.h: trnman_trid_to_trn() function protect state transitions of a TRN with a mutex use deadlock detector. storage/maria/trnman_public.h: trnman_trid_to_trn() --- mysys/waiting_threads.c | 36 ++++++++++++++++++++++-------------- 1 file changed, 22 insertions(+), 14 deletions(-) (limited to 'mysys') diff --git a/mysys/waiting_threads.c b/mysys/waiting_threads.c index 1c87886f405..491e7c3a726 100644 --- a/mysys/waiting_threads.c +++ b/mysys/waiting_threads.c @@ -227,6 +227,20 @@ struct deadlock_arg { WT_RESOURCE *rc; }; +static void change_victim(WT_THD* found, struct deadlock_arg *arg) +{ + if (found->weight < arg->victim->weight) + { + if (arg->victim != arg->thd) + { + rc_unlock(arg->victim->waiting_for); /* release the previous victim */ + DBUG_ASSERT(arg->rc == found->waiting_for); + } + arg->victim= found; + arg->rc= 0; + } +} + /* loop detection in a wait-for graph with a limited search depth. */ @@ -294,16 +308,8 @@ retry: break; case WT_DEADLOCK: ret= WT_DEADLOCK; - if (cursor->weight < arg->victim->weight) - { - if (arg->victim != arg->thd) - { - rc_unlock(arg->victim->waiting_for); /* release the previous victim */ - DBUG_ASSERT(arg->rc == cursor->waiting_for); - } - arg->victim= cursor; - } - else if (arg->rc) + change_victim(cursor, arg); + if (arg->rc) rc_unlock(arg->rc); goto end; case WT_OK: @@ -329,13 +335,15 @@ static int deadlock(WT_THD *thd, WT_THD *blocker, uint depth, int ret; DBUG_ENTER("deadlock"); ret= deadlock_search(&arg, blocker, depth); - if (arg.rc) - rc_unlock(arg.rc); if (ret == WT_DEPTH_EXCEEDED) { increment_cycle_stats(WT_CYCLE_STATS, max_depth); ret= WT_OK; } + if (ret == WT_DEADLOCK && depth) + change_victim(blocker, &arg); + if (arg.rc) + rc_unlock(arg.rc); if (ret == WT_DEADLOCK && arg.victim != thd) { DBUG_PRINT("wt", ("killing %s", arg.victim->name)); @@ -570,7 +578,7 @@ int wt_thd_cond_timedwait(WT_THD *thd, pthread_mutex_t *mutex) ret= WT_OK; rc_unlock(rc); - set_timespec_time_nsec(timeout, starttime, wt_timeout_short*1000); + set_timespec_time_nsec(timeout, starttime, wt_timeout_short*ULL(1000)); if (ret == WT_TIMEOUT) ret= pthread_cond_timedwait(&rc->cond, mutex, &timeout); if (ret == WT_TIMEOUT) @@ -579,7 +587,7 @@ int wt_thd_cond_timedwait(WT_THD *thd, pthread_mutex_t *mutex) ret= WT_DEADLOCK; else if (wt_timeout_long > wt_timeout_short) { - set_timespec_time_nsec(timeout, starttime, wt_timeout_long*1000); + set_timespec_time_nsec(timeout, starttime, wt_timeout_long*ULL(1000)); if (!thd->killed) ret= pthread_cond_timedwait(&rc->cond, mutex, &timeout); } -- cgit v1.2.1 From f8c1059cbf62e95d2684fdf8b9badc398f845173 Mon Sep 17 00:00:00 2001 From: Sergei Golubchik Date: Thu, 7 Aug 2008 22:57:25 +0200 Subject: move wt* maintainance from maria to the server include/waiting_threads.h: C_MODE_START/END mysys/waiting_threads.c: relax the checks - auto init thd in will_wait_for, allow to destroy uninited thd (=noop), allow a "release" an unexistent resource (=noop), sql/sql_class.cc: move wt* maintainance from maria to the server. do THD::cleanup after ha_close_connection() and plugin_thdvar_cleanup(). storage/maria/unittest/trnman-t.c: update to new prototype --- mysys/waiting_threads.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'mysys') diff --git a/mysys/waiting_threads.c b/mysys/waiting_threads.c index 491e7c3a726..55f65be2811 100644 --- a/mysys/waiting_threads.c +++ b/mysys/waiting_threads.c @@ -208,6 +208,9 @@ void wt_thd_destroy(WT_THD *thd) { DBUG_ENTER("wt_thd_destroy"); + if (thd->my_resources.buffer == 0) + DBUG_VOID_RETURN; /* nothing to do */ + DBUG_ASSERT(thd->my_resources.elements == 0); delete_dynamic(&thd->my_resources); lf_hash_put_pins(thd->pins); @@ -447,6 +450,9 @@ int wt_thd_will_wait_for(WT_THD *thd, WT_THD *blocker, WT_RESOURCE_ID *resid) DBUG_PRINT("wt", ("enter: thd=%s, blocker=%s, resid=%llu", thd->name, blocker->name, resid->value.num)); + if (unlikely(thd->my_resources.buffer == 0)) + wt_thd_init(thd); + if (thd->waiting_for == 0) { uint keylen; @@ -647,8 +653,8 @@ void wt_thd_release(WT_THD *thd, WT_RESOURCE_ID *resid) } } } - DBUG_ASSERT(!resid); - reset_dynamic(&thd->my_resources); + if (!resid) + reset_dynamic(&thd->my_resources); DBUG_VOID_RETURN; } -- cgit v1.2.1 From e2219ec965a80b2034d9debcbf12d3e73a684d89 Mon Sep 17 00:00:00 2001 From: Sergei Golubchik Date: Fri, 8 Aug 2008 13:11:27 +0200 Subject: wt_thd_lazy_init(), per-thread deadlock search depths and timeouts mysys/array.c: lazy alloc in dynamic array sql-common/client.c: for dynamic array, specify init_alloc==alloc_increment explicitly sql/mysqld.cc: per-thread deadlock search depths and timeouts sql/set_var.cc: per-thread deadlock search depths and timeouts sql/sql_class.h: per-thread deadlock search depths and timeouts --- mysys/array.c | 9 ++----- mysys/waiting_threads.c | 63 ++++++++++++++++++++++++++++++------------------- 2 files changed, 41 insertions(+), 31 deletions(-) (limited to 'mysys') diff --git a/mysys/array.c b/mysys/array.c index 039d9b4a2c2..b31260344a9 100644 --- a/mysys/array.c +++ b/mysys/array.c @@ -51,19 +51,14 @@ my_bool init_dynamic_array2(DYNAMIC_ARRAY *array, uint element_size, if (init_alloc > 8 && alloc_increment > init_alloc * 2) alloc_increment=init_alloc*2; } - - if (!init_alloc) - { - init_alloc=alloc_increment; - init_buffer= 0; - } array->elements=0; array->max_element=init_alloc; array->alloc_increment=alloc_increment; array->size_of_element=element_size; if ((array->buffer= init_buffer)) DBUG_RETURN(FALSE); - if (!(array->buffer=(uchar*) my_malloc_ci(element_size*init_alloc, + if (init_alloc && + !(array->buffer=(uchar*) my_malloc_ci(element_size*init_alloc, MYF(MY_WME)))) { array->max_element=0; diff --git a/mysys/waiting_threads.c b/mysys/waiting_threads.c index 55f65be2811..61b89f7eb64 100644 --- a/mysys/waiting_threads.c +++ b/mysys/waiting_threads.c @@ -43,9 +43,6 @@ #include #include -uint wt_timeout_short=100, wt_deadlock_search_depth_short=4; -uint wt_timeout_long=10000, wt_deadlock_search_depth_long=15; - /* status variables: distribution of cycle lengths @@ -73,13 +70,13 @@ static my_atomic_rwlock_t cycle_stats_lock, wait_stats_lock, success_stats_lock; my_atomic_rwlock_wrunlock(&success_stats_lock); \ } while (0) -#define increment_cycle_stats(X,MAX) \ +#define increment_cycle_stats(X,SLOT) \ do { \ - uint i= (X), j= (MAX) == wt_deadlock_search_depth_long; \ + uint i= (X); \ if (i >= WT_CYCLE_STATS) \ i= WT_CYCLE_STATS; \ my_atomic_rwlock_wrlock(&cycle_stats_lock); \ - my_atomic_add32(&wt_cycle_stats[j][i], 1); \ + my_atomic_add32(&wt_cycle_stats[SLOT][i], 1); \ my_atomic_rwlock_wrunlock(&cycle_stats_lock); \ } while (0) @@ -190,14 +187,29 @@ void wt_end() DBUG_VOID_RETURN; } -void wt_thd_init(WT_THD *thd) +static void fix_thd_pins(WT_THD *thd) { - DBUG_ENTER("wt_thd_init"); + if (unlikely(thd->pins == 0)) + { + thd->pins=lf_hash_get_pins(&reshash); +#ifndef DBUG_OFF + thd->name=my_thread_name(); +#endif + } +} - my_init_dynamic_array(&thd->my_resources, sizeof(WT_RESOURCE *), 10, 5); - thd->pins=lf_hash_get_pins(&reshash); +void wt_thd_lazy_init(WT_THD *thd, ulong *ds, ulong *ts, ulong *dl, ulong *tl) +{ + DBUG_ENTER("wt_thd_lazy_init"); thd->waiting_for=0; + thd->my_resources.buffer= 0; + thd->my_resources.elements= 0; thd->weight=0; + thd->deadlock_search_depth_short= ds; + thd->timeout_short= ts; + thd->deadlock_search_depth_long= dl; + thd->timeout_long= tl; + my_init_dynamic_array(&thd->my_resources, sizeof(WT_RESOURCE *), 0, 5); #ifndef DBUG_OFF thd->name=my_thread_name(); #endif @@ -208,12 +220,12 @@ void wt_thd_destroy(WT_THD *thd) { DBUG_ENTER("wt_thd_destroy"); - if (thd->my_resources.buffer == 0) - DBUG_VOID_RETURN; /* nothing to do */ - DBUG_ASSERT(thd->my_resources.elements == 0); + + if (thd->pins != 0) + lf_hash_put_pins(thd->pins); + delete_dynamic(&thd->my_resources); - lf_hash_put_pins(thd->pins); thd->waiting_for=0; DBUG_VOID_RETURN; } @@ -297,7 +309,8 @@ retry: if (cursor == arg->thd) { ret= WT_DEADLOCK; - increment_cycle_stats(depth, arg->max_depth); + increment_cycle_stats(depth, arg->max_depth == + *arg->thd->deadlock_search_depth_long); arg->victim= cursor; goto end; } @@ -340,7 +353,8 @@ static int deadlock(WT_THD *thd, WT_THD *blocker, uint depth, ret= deadlock_search(&arg, blocker, depth); if (ret == WT_DEPTH_EXCEEDED) { - increment_cycle_stats(WT_CYCLE_STATS, max_depth); + increment_cycle_stats(WT_CYCLE_STATS, max_depth == + *thd->deadlock_search_depth_long); ret= WT_OK; } if (ret == WT_DEADLOCK && depth) @@ -379,6 +393,8 @@ static void unlock_lock_and_free_resource(WT_THD *thd, WT_RESOURCE *rc) DBUG_VOID_RETURN; } + fix_thd_pins(thd); + /* XXX if (rc->id.type->make_key) key= rc->id.type->make_key(&rc->id, &keylen); else */ { key= &rc->id; @@ -450,8 +466,7 @@ int wt_thd_will_wait_for(WT_THD *thd, WT_THD *blocker, WT_RESOURCE_ID *resid) DBUG_PRINT("wt", ("enter: thd=%s, blocker=%s, resid=%llu", thd->name, blocker->name, resid->value.num)); - if (unlikely(thd->my_resources.buffer == 0)) - wt_thd_init(thd); + fix_thd_pins(thd); if (thd->waiting_for == 0) { @@ -538,7 +553,7 @@ retry: } rc_unlock(rc); - if (deadlock(thd, blocker, 1, wt_deadlock_search_depth_short)) + if (deadlock(thd, blocker, 1, *thd->deadlock_search_depth_short)) { wt_thd_dontwait(thd); DBUG_RETURN(WT_DEADLOCK); @@ -584,16 +599,16 @@ int wt_thd_cond_timedwait(WT_THD *thd, pthread_mutex_t *mutex) ret= WT_OK; rc_unlock(rc); - set_timespec_time_nsec(timeout, starttime, wt_timeout_short*ULL(1000)); + set_timespec_time_nsec(timeout, starttime, (*thd->timeout_short)*ULL(1000)); if (ret == WT_TIMEOUT) ret= pthread_cond_timedwait(&rc->cond, mutex, &timeout); if (ret == WT_TIMEOUT) { - if (deadlock(thd, thd, 0, wt_deadlock_search_depth_long)) + if (deadlock(thd, thd, 0, *thd->deadlock_search_depth_long)) ret= WT_DEADLOCK; - else if (wt_timeout_long > wt_timeout_short) + else if (*thd->timeout_long > *thd->timeout_short) { - set_timespec_time_nsec(timeout, starttime, wt_timeout_long*ULL(1000)); + set_timespec_time_nsec(timeout, starttime, (*thd->timeout_long)*ULL(1000)); if (!thd->killed) ret= pthread_cond_timedwait(&rc->cond, mutex, &timeout); } @@ -644,7 +659,7 @@ void wt_thd_release(WT_THD *thd, WT_RESOURCE_ID *resid) if (rc->mutex) safe_mutex_assert_owner(rc->mutex); #endif - } + } unlock_lock_and_free_resource(thd, rc); if (resid) { -- cgit v1.2.1 From 1a5de5bc82cee3c2411862b8ef32588b530a780f Mon Sep 17 00:00:00 2001 From: Michael Widenius Date: Mon, 25 Aug 2008 14:49:47 +0300 Subject: Changed all file names in maria to LEX_STRING and removed some calls to strlen() Ensure that pagecache gives correct error number even if error for block happend mysys/my_pread.c: Indentation fix storage/maria/ha_maria.cc: filenames changed to be of type LEX_STRING storage/maria/ma_check.c: filenames changed to be of type LEX_STRING storage/maria/ma_checkpoint.c: filenames changed to be of type LEX_STRING storage/maria/ma_create.c: filenames changed to be of type LEX_STRING storage/maria/ma_dbug.c: filenames changed to be of type LEX_STRING storage/maria/ma_delete.c: filenames changed to be of type LEX_STRING storage/maria/ma_info.c: filenames changed to be of type LEX_STRING storage/maria/ma_keycache.c: filenames changed to be of type LEX_STRING storage/maria/ma_locking.c: filenames changed to be of type LEX_STRING storage/maria/ma_loghandler.c: filenames changed to be of type LEX_STRING storage/maria/ma_open.c: filenames changed to be of type LEX_STRING storage/maria/ma_pagecache.c: Store error number for last failed operation in the page block This should fix some asserts() when errno was not properly set after failure to read block in another thread storage/maria/ma_recovery.c: filenames changed to be of type LEX_STRING storage/maria/ma_update.c: filenames changed to be of type LEX_STRING storage/maria/ma_write.c: filenames changed to be of type LEX_STRING storage/maria/maria_def.h: filenames changed to be of type LEX_STRING storage/maria/maria_ftdump.c: filenames changed to be of type LEX_STRING storage/maria/maria_pack.c: filenames changed to be of type LEX_STRING --- mysys/my_pread.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'mysys') diff --git a/mysys/my_pread.c b/mysys/my_pread.c index 146e07773e6..5c27cf73482 100644 --- a/mysys/my_pread.c +++ b/mysys/my_pread.c @@ -60,7 +60,7 @@ size_t my_pread(File Filedes, uchar *Buffer, size_t Count, my_off_t offset, #endif for (;;) { - errno=0; /* Linux, Windows don't reset this on EOF/success */ + errno= 0; /* Linux, Windows don't reset this on EOF/success */ #ifndef HAVE_PREAD pthread_mutex_lock(&my_file_info[Filedes].mutex); readbytes= (uint) -1; -- cgit v1.2.1 From ca23272e1e53e195169bec0609eb0168722e1879 Mon Sep 17 00:00:00 2001 From: Sergei Golubchik Date: Wed, 27 Aug 2008 14:15:06 +0200 Subject: proc_info_hook, mysys access to thd->proc_info include/my_global.h: move __func__ definition to my_global.h include/my_sys.h: proc_info_hook mysys/my_static.c: proc_info_hook sql/mysqld.cc: proc_info_hook sql/sql_class.cc: support thd==0 in set_thd_proc_info sql/sql_profile.cc: move __func__ definition to my_global.h sql/sql_profile.h: move __func__ definition to my_global.h --- mysys/my_static.c | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'mysys') diff --git a/mysys/my_static.c b/mysys/my_static.c index ef25a89bad9..4dc965e8a20 100644 --- a/mysys/my_static.c +++ b/mysys/my_static.c @@ -92,6 +92,15 @@ int (*error_handler_hook)(uint error,const char *str,myf MyFlags)= int (*fatal_error_handler_hook)(uint error,const char *str,myf MyFlags)= my_message_no_curses; +static const char *proc_info_dummy(void *a, const char *b, const char *c, + const char *d, const unsigned int e) +{ + return 0; +} + +const char *(*proc_info_hook)(void *, const char *, const char *, const char *, + const unsigned int)= proc_info_dummy; + #ifdef __WIN__ /* from my_getsystime.c */ ulonglong query_performance_frequency, query_performance_offset; -- cgit v1.2.1 From 942651ea6cc2b7537aa45ff1d55d64be4e191a16 Mon Sep 17 00:00:00 2001 From: Sergei Golubchik Date: Thu, 28 Aug 2008 14:43:44 +0200 Subject: wt: comments, OOM checks, test case for deadlock detection include/waiting_threads.h: make wt_thd_dontwait private mysql-test/r/maria.result: deadlock example mysql-test/t/maria.test: deadlock example mysys/waiting_threads.c: comments, OOM checks sql/mysqld.cc: fix variables sql/sql_class.cc: move wt_lazy_init to THD constructor sql/sql_class.h: move wt_lazy_init to THD constructor storage/maria/ha_maria.cc: backport from 6.0 storage/maria/ma_write.c: poset-review fixes, set thd->proc_info storage/maria/trnman.c: bugfixing storage/myisam/mi_check.c: warnings storage/myisam/mi_page.c: warnings storage/myisam/mi_search.c: warnings storage/myisammrg/myrg_create.c: warnings unittest/mysys/waiting_threads-t.c: fixes --- mysys/waiting_threads.c | 352 ++++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 293 insertions(+), 59 deletions(-) (limited to 'mysys') diff --git a/mysys/waiting_threads.c b/mysys/waiting_threads.c index 61b89f7eb64..78cea6c9673 100644 --- a/mysys/waiting_threads.c +++ b/mysys/waiting_threads.c @@ -13,6 +13,77 @@ along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ +/* + "waiting threads" subsystem - a unified interface for threads to wait + on each other, with built-in deadlock detection. + + Main concepts + ^^^^^^^^^^^^^ + a thread - is represented by a WT_THD structure. One physical thread + can have only one WT_THD descriptor. + + a resource - a thread does not wait for other threads directly, + instead it waits for a "resource", which is "owned" by other threads. + It waits, exactly, for all "owners" to "release" a resource. + It does not have to correspond to a physical resource. For example, it + may be convenient in certain cases to force resource == thread. + A resource is represented by a WT_RESOURCE structure. + + a resource identifier - a pair of {resource type, value}. A value is + either a ulonglong number or a pointer (it's a union). + WT_RESOURCE_ID structure. + + a resource type - a pointer to a statically defined instance of + WT_RESOURCE_TYPE structure. This structure contains a pointer to + a function that knows how to compare values of this resource type. + In the simple case it could be wt_resource_id_memcmp(). + + Usage + ^^^^^ + to use the interface one needs to use this thread's WT_THD, + call wt_thd_will_wait_for() for every thread it needs to wait on, + then call wt_thd_cond_timedwait(). When thread releases a resource + it should call wt_thd_release() (or wt_thd_release_all()) - it will + notify (send a signal) threads waiting in wt_thd_cond_timedwait(), + if appropriate. + + Just like with pthread's cond_wait, there could be spurious + wake-ups from wt_thd_cond_timedwait(). A caller is expected to + handle that. + + wt_thd_will_wait_for() and wt_thd_cond_timedwait() return either + WT_OK or WT_DEADLOCK. Additionally wt_thd_cond_timedwait() can return + WT_TIMEOUT. Out of memory and other fatal errors are reported as + WT_DEADLOCK - and a transaction must be aborted just the same. + + Configuration + ^^^^^^^^^^^^^ + There are four config variables. Two deadlock search depths - short and + long - and two timeouts. Deadlock search is performed with the short + depth on every wt_thd_will_wait_for() call. wt_thd_cond_timedwait() + waits with a short timeout, performs a deadlock search with the long + depth, and waits with a long timeout. As most deadlock cycles are supposed + to be short, most deadlocks will be detected at once, and waits will + rarely be necessary. + + These config variables are thread-local. Different threads may have + different search depth and timeout values. + + Also, deadlock detector supports different killing strategies, the victim + in a deadlock cycle is selected based on the "weight". See "weight" + description in waiting_threads.h for details. It's up to the caller to + set weights accordingly. + + Status + ^^^^^^ + We calculate the number of successfull waits (WT_OK returned from + wt_thd_cond_timedwait()), a number of timeouts, a deadlock cycle + length distribution - number of deadlocks with every length from + 1 to WT_CYCLE_STATS, and a wait time distribution - number + of waits with a time from 1 us to 1 min in WT_CYCLE_STATS + intervals on a log scale. +*/ + /* Note that if your lock system satisfy the following condition: @@ -114,8 +185,18 @@ static my_atomic_rwlock_t cycle_stats_lock, wait_stats_lock, success_stats_lock; pthread_rwlock_unlock(&R->lock); \ } while (0) +/* + All resources are stored in a lock-free hash. Different threads + may add new resources and perform deadlock detection concurrently. +*/ static LF_HASH reshash; +/** + WT_RESOURCE constructor + + It's called from lf_hash and takes an offset to LF_SLIST instance. + WT_RESOURCE is located at arg+sizeof(LF_SLIST) +*/ static void wt_resource_init(uchar *arg) { WT_RESOURCE *rc=(WT_RESOURCE*)(arg+LF_HASH_OVERHEAD); @@ -124,10 +205,16 @@ static void wt_resource_init(uchar *arg) bzero(rc, sizeof(*rc)); pthread_rwlock_init(&rc->lock, 0); pthread_cond_init(&rc->cond, 0); - my_init_dynamic_array(&rc->owners, sizeof(WT_THD *), 5, 5); + my_init_dynamic_array(&rc->owners, sizeof(WT_THD *), 0, 5); DBUG_VOID_RETURN; } +/** + WT_RESOURCE destructor + + It's called from lf_hash and takes an offset to LF_SLIST instance. + WT_RESOURCE is located at arg+sizeof(LF_SLIST) +*/ static void wt_resource_destroy(uchar *arg) { WT_RESOURCE *rc=(WT_RESOURCE*)(arg+LF_HASH_OVERHEAD); @@ -159,7 +246,7 @@ void wt_init() bzero(wt_wait_stats, sizeof(wt_wait_stats)); bzero(wt_cycle_stats, sizeof(wt_cycle_stats)); wt_success_stats=0; - { + { /* initialize wt_wait_table[]. from 1 us to 1 min, log scale */ int i; double from=log(1); /* 1 us */ double to=log(60e6); /* 1 min */ @@ -187,17 +274,20 @@ void wt_end() DBUG_VOID_RETURN; } -static void fix_thd_pins(WT_THD *thd) -{ - if (unlikely(thd->pins == 0)) - { - thd->pins=lf_hash_get_pins(&reshash); -#ifndef DBUG_OFF - thd->name=my_thread_name(); -#endif - } -} +/** + Lazy WT_THD initialization + + Cheap initialization of WT_THD. Only initialized fields that don't require + memory allocations - basically, it only does assignments. The rest of the + WT_THD structure will be initialized on demand, on the first use. + This allows one to initialize lazily all WT_THD structures, even if some + (or even most) of them will never be used for deadlock detection. + @param ds a pointer to deadlock search depth short value + @param ts a pointer to deadlock timeout short value + @param dl a pointer to deadlock search depth long value + @param tl a pointer to deadlock timeout long value +*/ void wt_thd_lazy_init(WT_THD *thd, ulong *ds, ulong *ts, ulong *dl, ulong *tl) { DBUG_ENTER("wt_thd_lazy_init"); @@ -209,6 +299,7 @@ void wt_thd_lazy_init(WT_THD *thd, ulong *ds, ulong *ts, ulong *dl, ulong *tl) thd->timeout_short= ts; thd->deadlock_search_depth_long= dl; thd->timeout_long= tl; + /* dynamic array is also initialized lazily - without memory allocations */ my_init_dynamic_array(&thd->my_resources, sizeof(WT_RESOURCE *), 0, 5); #ifndef DBUG_OFF thd->name=my_thread_name(); @@ -216,6 +307,26 @@ void wt_thd_lazy_init(WT_THD *thd, ulong *ds, ulong *ts, ulong *dl, ulong *tl) DBUG_VOID_RETURN; } +/** + Finalize WT_THD initialization + + After lazy WT_THD initialization, parts of the structure are still + uninitialized. This function completes the initialization, allocating + memory, if necessary. It's called automatically on demand, when WT_THD + is about to be used. +*/ +static int fix_thd_pins(WT_THD *thd) +{ + if (unlikely(thd->pins == 0)) + { + thd->pins=lf_hash_get_pins(&reshash); +#ifndef DBUG_OFF + thd->name=my_thread_name(); +#endif + } + return thd->pins == 0; +} + void wt_thd_destroy(WT_THD *thd) { DBUG_ENTER("wt_thd_destroy"); @@ -229,19 +340,30 @@ void wt_thd_destroy(WT_THD *thd) thd->waiting_for=0; DBUG_VOID_RETURN; } +/** + Trivial resource id comparison function - bytewise memcmp. + It can be used in WT_RESOURCE_TYPE structures where bytewise + comparison of values is sufficient. +*/ int wt_resource_id_memcmp(void *a, void *b) { return memcmp(a, b, sizeof(WT_RESOURCE_ID)); } +/** + arguments for the recursive deadlock_search function +*/ struct deadlock_arg { - WT_THD *thd; - uint max_depth; - WT_THD *victim; - WT_RESOURCE *rc; + WT_THD *thd; /**< starting point of a search */ + uint max_depth; /**< search depth limit */ + WT_THD *victim; /**< a thread to be killed to resolve a deadlock */ + WT_RESOURCE *rc; /**< see comment at the end of deadlock_search() */ }; +/** + helper function to change the victim, according to the weight +*/ static void change_victim(WT_THD* found, struct deadlock_arg *arg) { if (found->weight < arg->victim->weight) @@ -256,8 +378,8 @@ static void change_victim(WT_THD* found, struct deadlock_arg *arg) } } -/* - loop detection in a wait-for graph with a limited search depth. +/** + recursive loop detection in a wait-for graph with a limited search depth */ static int deadlock_search(struct deadlock_arg *arg, WT_THD *blocker, uint depth) @@ -301,11 +423,41 @@ retry: lf_unpin(arg->thd->pins, 0); goto retry; } + /* as the state is locked, we can unpin now */ lf_unpin(arg->thd->pins, 0); + /* + Below is not a pure depth-first search. It's a depth-first with a + slightest hint of breadth-first. Depth-first is: + + check(element): + foreach current in element->nodes[] do: + if current == element return error; + check(current); + + while we do + + check(element): + foreach current in element->nodes[] do: + if current == element return error; + foreach current in element->nodes[] do: + check(current); + */ for (i=0; i < rc->owners.elements; i++) { cursor= *dynamic_element(&rc->owners, i, WT_THD**); + /* + We're only looking for (and detecting) cycles that include 'arg->thd'. + That is, only deadlocks that *we* have created. For example, + thd->A->B->thd + (thd waits for A, A waits for B, while B is waiting for thd). + While walking the graph we can encounter other cicles, e.g. + thd->A->B->C->A + This will not be detected. Instead we will walk it in circles until + the search depth limit is reached (the latter guarantees that an + infinite loop is impossible). We expect the thread that has created + the cycle (one of A, B, and C) to detect its deadlock. + */ if (cursor == arg->thd) { ret= WT_DEADLOCK; @@ -319,16 +471,15 @@ retry: { cursor= *dynamic_element(&rc->owners, i, WT_THD**); switch (deadlock_search(arg, cursor, depth+1)) { + case WT_OK: + break; case WT_DEPTH_EXCEEDED: ret= WT_DEPTH_EXCEEDED; break; case WT_DEADLOCK: ret= WT_DEADLOCK; - change_victim(cursor, arg); - if (arg->rc) - rc_unlock(arg->rc); - goto end; - case WT_OK: + change_victim(cursor, arg); /* also sets arg->rc to 0 */ + i= rc->owners.elements; /* jump out of the loop */ break; default: DBUG_ASSERT(0); @@ -337,6 +488,34 @@ retry: rc_unlock(arg->rc); } end: + /* + Note that 'rc' is locked in this function, but it's never unlocked there. + Instead it's saved in arg->rc and the *caller* is expected to unlock it. + It's done to support different killing strategies. This is how it works: + Assuming a graph + + thd->A->B->C->thd + + deadlock_search() function starts from thd, locks it (in fact it locks not + a thd, but a resource it is waiting on, but below, for simplicity, I'll + talk about "locking a thd"). Then it goes down recursively, locks A, and so + on. Goes down recursively, locks B. Goes down recursively, locks C. + Notices that C is waiting on thd. Deadlock detected. Sets arg->victim=thd. + Returns from the last deadlock_search() call. C stays locked! + Now it checks whether C is a more appropriate victim then 'thd'. + If yes - arg->victim=C, otherwise C is unlocked. Returns. B stays locked. + Now it checks whether B is a more appropriate victim then arg->victim. + If yes - old arg->victim is unlocked and arg->victim=B, + otherwise B is unlocked. Return. + And so on. + + In short, a resource is locked in a frame. But it's not unlocked in the + same frame, it's unlocked by the caller, and only after the caller checks + that it doesn't need to use current WT_THD as a victim. If it does - the + lock is kept and the old victim's resource is unlocked. When the recursion + is unrolled and we are back to deadlock() function, there are only two + locks left - on thd and on the victim. + */ arg->rc= rc; DBUG_PRINT("wt", ("exit: %s", ret == WT_DEPTH_EXCEEDED ? "WT_DEPTH_EXCEEDED" : @@ -344,6 +523,25 @@ end: DBUG_RETURN(ret); } +/** + Deadlock detection in a wait-for graph + + A wrapper for recursive deadlock_search() - prepares deadlock_arg structure, + invokes deadlock_search(), increments statistics, notifies the victim. + + @param thd thread that is going to wait. Deadlock is detected + if, while walking the graph, we reach a thread that + is waiting on thd + @param blocker starting point of a search. In wt_thd_cond_timedwait() + it's thd, in wt_thd_will_wait_for() it's a thread that + thd is going to wait for + @param depth starting search depth. In general it's the number of + edges in the wait-for graph between thd and the + blocker. Practically only two values are used (and + supported) - when thd == blocker it's 0, when thd + waits directly for blocker, it's 1 + @param max_depth search depth limit +*/ static int deadlock(WT_THD *thd, WT_THD *blocker, uint depth, uint max_depth) { @@ -357,10 +555,15 @@ static int deadlock(WT_THD *thd, WT_THD *blocker, uint depth, *thd->deadlock_search_depth_long); ret= WT_OK; } + /* + if we started with depth==1, blocker was never considered for a victim + in deadlock_search(). Do it here. + */ if (ret == WT_DEADLOCK && depth) change_victim(blocker, &arg); if (arg.rc) rc_unlock(arg.rc); + /* notify the victim, if appropriate */ if (ret == WT_DEADLOCK && arg.victim != thd) { DBUG_PRINT("wt", ("killing %s", arg.victim->name)); @@ -373,11 +576,12 @@ static int deadlock(WT_THD *thd, WT_THD *blocker, uint depth, } -/* - Deletes an element from reshash. +/** + Delete an element from reshash if it has no waiters or owners + rc->lock must be locked by the caller and it's unlocked on return. */ -static void unlock_lock_and_free_resource(WT_THD *thd, WT_RESOURCE *rc) +static int unlock_lock_and_free_resource(WT_THD *thd, WT_RESOURCE *rc) { uint keylen; const void *key; @@ -390,10 +594,14 @@ static void unlock_lock_and_free_resource(WT_THD *thd, WT_RESOURCE *rc) DBUG_PRINT("wt", ("nothing to do, %d owners, %d waiters", rc->owners.elements, rc->waiter_count)); rc_unlock(rc); - DBUG_VOID_RETURN; + DBUG_RETURN(0); } - fix_thd_pins(thd); + if (fix_thd_pins(thd)) + { + rc_unlock(rc); + DBUG_RETURN(1); + } /* XXX if (rc->id.type->make_key) key= rc->id.type->make_key(&rc->id, &keylen); else */ { @@ -414,29 +622,40 @@ static void unlock_lock_and_free_resource(WT_THD *thd, WT_RESOURCE *rc) */ rc->state=FREE; rc_unlock(rc); - lf_hash_delete(&reshash, thd->pins, key, keylen); - DBUG_VOID_RETURN; + DBUG_RETURN(lf_hash_delete(&reshash, thd->pins, key, keylen) == -1); } -int wt_thd_dontwait_locked(WT_THD *thd) +/** + register the fact that thd is not waiting anymore + + decrease waiter_count, clear waiting_for, free the resource if appropriate. + thd->waiting_for must be locked! +*/ +static int stop_waiting_locked(WT_THD *thd) { + int ret; WT_RESOURCE *rc= thd->waiting_for; - DBUG_ENTER("wt_thd_dontwait_locked"); + DBUG_ENTER("stop_waiting_locked"); DBUG_ASSERT(rc->waiter_count); DBUG_ASSERT(rc->state == ACTIVE); rc->waiter_count--; thd->waiting_for= 0; - unlock_lock_and_free_resource(thd, rc); - DBUG_RETURN(thd->killed ? WT_DEADLOCK : WT_OK); + ret= unlock_lock_and_free_resource(thd, rc); + DBUG_RETURN((thd->killed || ret) ? WT_DEADLOCK : WT_OK); } -int wt_thd_dontwait(WT_THD *thd) +/** + register the fact that thd is not waiting anymore + + locks thd->waiting_for and calls stop_waiting_locked(). +*/ +static int stop_waiting(WT_THD *thd) { int ret; WT_RESOURCE *rc= thd->waiting_for; - DBUG_ENTER("wt_thd_dontwait"); + DBUG_ENTER("stop_waiting"); if (!rc) DBUG_RETURN(WT_OK); @@ -445,15 +664,20 @@ int wt_thd_dontwait(WT_THD *thd) as its waiter_count is guaranteed to be non-zero */ rc_wrlock(rc); - ret= wt_thd_dontwait_locked(thd); + ret= stop_waiting_locked(thd); DBUG_RETURN(ret); } -/* +/** + notify the system that a thread needs to wait for another thread + called by a *waiter* to declare what resource it will wait for. can be called many times, if many blockers own a blocking resource. but must always be called with the same resource id - a thread cannot wait for more than one resource at a time. + + As a new edge is added to the wait-for graph, a deadlock detection is + performed for this new edge. */ int wt_thd_will_wait_for(WT_THD *thd, WT_THD *blocker, WT_RESOURCE_ID *resid) { @@ -466,7 +690,8 @@ int wt_thd_will_wait_for(WT_THD *thd, WT_THD *blocker, WT_RESOURCE_ID *resid) DBUG_PRINT("wt", ("enter: thd=%s, blocker=%s, resid=%llu", thd->name, blocker->name, resid->value.num)); - fix_thd_pins(thd); + if (fix_thd_pins(thd)) + DBUG_RETURN(WT_DEADLOCK); if (thd->waiting_for == 0) { @@ -487,14 +712,11 @@ retry: DBUG_PRINT("wt", ("failed to find rc in hash, inserting")); bzero(&tmp, sizeof(tmp)); - tmp.waiter_count= 0; tmp.id= *resid; tmp.state= ACTIVE; -#ifndef DBUG_OFF - tmp.mutex= 0; -#endif - lf_hash_insert(&reshash, thd->pins, &tmp); + if (lf_hash_insert(&reshash, thd->pins, &tmp) == -1) /* if OOM */ + DBUG_RETURN(WT_DEADLOCK); /* Two cases: either lf_hash_insert() failed - because another thread has just inserted a resource with the same id - and we need to retry. @@ -504,6 +726,9 @@ retry: And we need to repeat the loop anyway. */ } + if (rc == MY_ERRPTR) + DBUG_RETURN(WT_DEADLOCK); + DBUG_PRINT("wt", ("found in hash rc=%p", rc)); rc_wrlock(rc); @@ -520,7 +745,6 @@ retry: thd->waiting_for= rc; rc->waiter_count++; thd->killed= 0; - } else { @@ -539,7 +763,7 @@ retry: if (thd->killed) { - wt_thd_dontwait_locked(thd); + stop_waiting_locked(thd); DBUG_RETURN(WT_DEADLOCK); } } @@ -548,20 +772,29 @@ retry: break; if (i >= rc->owners.elements) { - push_dynamic(&blocker->my_resources, (void*)&rc); - push_dynamic(&rc->owners, (void*)&blocker); + if (push_dynamic(&blocker->my_resources, (void*)&rc)) + { + stop_waiting_locked(thd); + DBUG_RETURN(WT_DEADLOCK); /* deadlock and OOM use the same error code */ + } + if (push_dynamic(&rc->owners, (void*)&blocker)) + { + pop_dynamic(&blocker->my_resources); + stop_waiting_locked(thd); + DBUG_RETURN(WT_DEADLOCK); + } } rc_unlock(rc); if (deadlock(thd, blocker, 1, *thd->deadlock_search_depth_short)) { - wt_thd_dontwait(thd); + stop_waiting(thd); DBUG_RETURN(WT_DEADLOCK); } DBUG_RETURN(0); } -/* +/** called by a *waiter* to start waiting It's supposed to be a drop-in replacement for @@ -595,7 +828,7 @@ int wt_thd_cond_timedwait(WT_THD *thd, pthread_mutex_t *mutex) #endif rc_wrlock(rc); - if (rc->owners.elements == 0 && thd->killed) + if (rc->owners.elements == 0 || thd->killed) ret= WT_OK; rc_unlock(rc); @@ -614,7 +847,7 @@ int wt_thd_cond_timedwait(WT_THD *thd, pthread_mutex_t *mutex) } } after= my_getsystime(); - if (wt_thd_dontwait(thd) == WT_DEADLOCK) + if (stop_waiting(thd) == WT_DEADLOCK) /* if we're killed */ ret= WT_DEADLOCK; increment_wait_stats(after-before, ret); if (ret == WT_OK) @@ -622,23 +855,24 @@ int wt_thd_cond_timedwait(WT_THD *thd, pthread_mutex_t *mutex) DBUG_RETURN(ret); } -/* +/** called by a *blocker* when it releases a resource - when resid==0 all resources will be freed - - Note: it's conceptually similar to pthread_cond_broadcast, and must be done + it's conceptually similar to pthread_cond_broadcast, and must be done under the same mutex as wt_thd_cond_timedwait(). + + @param resid a resource to release. 0 to release all resources */ + void wt_thd_release(WT_THD *thd, WT_RESOURCE_ID *resid) { - WT_RESOURCE *rc; - uint i, j; + uint i; DBUG_ENTER("wt_thd_release"); for (i=0; i < thd->my_resources.elements; i++) { - rc= *dynamic_element(&thd->my_resources, i, WT_RESOURCE**); + uint j; + WT_RESOURCE *rc= *dynamic_element(&thd->my_resources, i, WT_RESOURCE**); if (!resid || (resid->type->compare(&rc->id, resid) == 0)) { rc_wrlock(rc); -- cgit v1.2.1 From 2b917502c1cd78699ee99b4cd6d388fcff29d76a Mon Sep 17 00:00:00 2001 From: Sergei Golubchik Date: Fri, 29 Aug 2008 21:50:04 +0200 Subject: added __attribute__((unused)) --- mysys/my_static.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'mysys') diff --git a/mysys/my_static.c b/mysys/my_static.c index 4dc965e8a20..a82c9bf518c 100644 --- a/mysys/my_static.c +++ b/mysys/my_static.c @@ -92,8 +92,11 @@ int (*error_handler_hook)(uint error,const char *str,myf MyFlags)= int (*fatal_error_handler_hook)(uint error,const char *str,myf MyFlags)= my_message_no_curses; -static const char *proc_info_dummy(void *a, const char *b, const char *c, - const char *d, const unsigned int e) +static const char *proc_info_dummy(void *a __attribute__((unused)), + const char *b __attribute__((unused)), + const char *c __attribute__((unused)), + const char *d __attribute__((unused)), + const unsigned int e __attribute__((unused))) { return 0; } -- cgit v1.2.1 From b54ac0728dcef412ad48c092139c150cac31550c Mon Sep 17 00:00:00 2001 From: Sergei Golubchik Date: Sun, 31 Aug 2008 17:20:20 +0200 Subject: fixes for windows --- mysys/CMakeLists.txt | 2 +- mysys/waiting_threads.c | 10 +++++----- 2 files changed, 6 insertions(+), 6 deletions(-) (limited to 'mysys') diff --git a/mysys/CMakeLists.txt b/mysys/CMakeLists.txt index dffce464d8b..ea71eb208dc 100755 --- a/mysys/CMakeLists.txt +++ b/mysys/CMakeLists.txt @@ -44,7 +44,7 @@ SET(MYSYS_SOURCES array.c charset-def.c charset.c checksum.c default.c default_ thr_rwlock.c tree.c typelib.c my_vle.c base64.c my_memmem.c my_getpagesize.c lf_alloc-pin.c lf_dynarray.c lf_hash.c my_atomic.c my_getncpus.c my_rnd.c - my_uuid.c wqueue.c + my_uuid.c wqueue.c waiting_threads.c ) IF(NOT SOURCE_SUBLIBS) diff --git a/mysys/waiting_threads.c b/mysys/waiting_threads.c index 78cea6c9673..49b41111311 100644 --- a/mysys/waiting_threads.c +++ b/mysys/waiting_threads.c @@ -170,19 +170,19 @@ static my_atomic_rwlock_t cycle_stats_lock, wait_stats_lock, success_stats_lock; do { \ WT_RESOURCE *R=(X); \ DBUG_PRINT("wt", ("LOCK resid=%lld for READ", R->id.value.num)); \ - pthread_rwlock_rdlock(&R->lock); \ + rw_rdlock(&R->lock); \ } while (0) #define rc_wrlock(X) \ do { \ WT_RESOURCE *R=(X); \ DBUG_PRINT("wt", ("LOCK resid=%lld for WRITE", R->id.value.num)); \ - pthread_rwlock_wrlock(&R->lock); \ + rw_wrlock(&R->lock); \ } while (0) #define rc_unlock(X) \ do { \ WT_RESOURCE *R=(X); \ DBUG_PRINT("wt", ("UNLOCK resid=%lld", R->id.value.num)); \ - pthread_rwlock_unlock(&R->lock); \ + rw_unlock(&R->lock); \ } while (0) /* @@ -203,7 +203,7 @@ static void wt_resource_init(uchar *arg) DBUG_ENTER("wt_resource_init"); bzero(rc, sizeof(*rc)); - pthread_rwlock_init(&rc->lock, 0); + my_rwlock_init(&rc->lock, 0); pthread_cond_init(&rc->cond, 0); my_init_dynamic_array(&rc->owners, sizeof(WT_THD *), 0, 5); DBUG_VOID_RETURN; @@ -221,7 +221,7 @@ static void wt_resource_destroy(uchar *arg) DBUG_ENTER("wt_resource_destroy"); DBUG_ASSERT(rc->owners.elements == 0); - pthread_rwlock_destroy(&rc->lock); + rwlock_destroy(&rc->lock); pthread_cond_destroy(&rc->cond); delete_dynamic(&rc->owners); DBUG_VOID_RETURN; -- cgit v1.2.1 From 27dadbd89577e8ff80c926cfb3fbd36cf0fb48a4 Mon Sep 17 00:00:00 2001 From: Sergei Golubchik Date: Mon, 1 Sep 2008 21:43:11 +0200 Subject: wt: don't support a key as a union { ulonglong, void* }. Although convenient, it forces the user to bzero a key before setting it as a pointer, otherwise it'll have random content on architectures where sizeof(void*) < sizeof(ulonglong). Declaring a key as ulonglong only (not a union) makes this user mistake impossible. include/waiting_threads.h: WT_RESOURCE_ID::value is an ulonglong, not a union mysys/waiting_threads.c: WT_RESOURCE_ID::value is an ulonglong, not a union storage/maria/ma_write.c: WT_RESOURCE_ID::value is an ulonglong, not a union storage/maria/trnman.c: WT_RESOURCE_ID::value is an ulonglong, not a union unittest/mysys/waiting_threads-t.c: WT_RESOURCE_ID::value is an ulonglong, not a union --- mysys/waiting_threads.c | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) (limited to 'mysys') diff --git a/mysys/waiting_threads.c b/mysys/waiting_threads.c index 49b41111311..255317ea4cc 100644 --- a/mysys/waiting_threads.c +++ b/mysys/waiting_threads.c @@ -30,8 +30,7 @@ A resource is represented by a WT_RESOURCE structure. a resource identifier - a pair of {resource type, value}. A value is - either a ulonglong number or a pointer (it's a union). - WT_RESOURCE_ID structure. + an ulonglong number. Represented by a WT_RESOURCE_ID structure. a resource type - a pointer to a statically defined instance of WT_RESOURCE_TYPE structure. This structure contains a pointer to @@ -169,20 +168,20 @@ static my_atomic_rwlock_t cycle_stats_lock, wait_stats_lock, success_stats_lock; #define rc_rdlock(X) \ do { \ WT_RESOURCE *R=(X); \ - DBUG_PRINT("wt", ("LOCK resid=%lld for READ", R->id.value.num)); \ - rw_rdlock(&R->lock); \ + DBUG_PRINT("wt", ("LOCK resid=%lld for READ", R->id.value)); \ + rw_rdlock(&R->lock); \ } while (0) #define rc_wrlock(X) \ do { \ WT_RESOURCE *R=(X); \ - DBUG_PRINT("wt", ("LOCK resid=%lld for WRITE", R->id.value.num)); \ - rw_wrlock(&R->lock); \ + DBUG_PRINT("wt", ("LOCK resid=%lld for WRITE", R->id.value)); \ + rw_wrlock(&R->lock); \ } while (0) #define rc_unlock(X) \ do { \ WT_RESOURCE *R=(X); \ - DBUG_PRINT("wt", ("UNLOCK resid=%lld", R->id.value.num)); \ - rw_unlock(&R->lock); \ + DBUG_PRINT("wt", ("UNLOCK resid=%lld", R->id.value)); \ + rw_unlock(&R->lock); \ } while (0) /* @@ -688,7 +687,7 @@ int wt_thd_will_wait_for(WT_THD *thd, WT_THD *blocker, WT_RESOURCE_ID *resid) LF_REQUIRE_PINS(3); DBUG_PRINT("wt", ("enter: thd=%s, blocker=%s, resid=%llu", - thd->name, blocker->name, resid->value.num)); + thd->name, blocker->name, resid->value)); if (fix_thd_pins(thd)) DBUG_RETURN(WT_DEADLOCK); -- cgit v1.2.1 From c7a304a26499596a62eb9c27ba60f1e076ef90de Mon Sep 17 00:00:00 2001 From: Sergei Golubchik Date: Wed, 1 Oct 2008 22:55:23 +0200 Subject: Implement conditional building correctly. automake *must* know all sources in advance, listing a file in EXTRA_DIST doesn't make it a source, which breakes dependency tracking (.Po files aren't included) --- mysys/Makefile.am | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) (limited to 'mysys') diff --git a/mysys/Makefile.am b/mysys/Makefile.am index 54553680341..d4c36d86dbe 100644 --- a/mysys/Makefile.am +++ b/mysys/Makefile.am @@ -57,11 +57,12 @@ libmysys_a_SOURCES = my_init.c my_getwd.c mf_getdate.c my_mmap.c \ my_memmem.c \ my_windac.c my_access.c base64.c my_libwrap.c \ wqueue.c -EXTRA_DIST = thr_alarm.c thr_lock.c my_pthread.c my_thr_init.c \ - thr_mutex.c thr_rwlock.c waiting_threads.c \ - CMakeLists.txt mf_soundex.c \ +if THREAD +libmysys_a_SOURCES+= thr_alarm.c thr_lock.c my_pthread.c my_thr_init.c \ + thr_mutex.c thr_rwlock.c waiting_threads.c +endif +EXTRA_DIST = CMakeLists.txt mf_soundex.c \ my_conio.c my_wincond.c my_winthread.c -libmysys_a_LIBADD = @THREAD_LOBJECTS@ # test_dir_DEPENDENCIES= $(LIBRARIES) # testhash_DEPENDENCIES= $(LIBRARIES) # test_charset_DEPENDENCIES= $(LIBRARIES) @@ -75,8 +76,6 @@ DEFS = -DDEFAULT_BASEDIR=\"$(prefix)\" \ -DDEFAULT_SYSCONFDIR="\"$(sysconfdir)\"" \ @DEFS@ -libmysys_a_DEPENDENCIES= @THREAD_LOBJECTS@ - # I hope this always does the right thing. Otherwise this is only test programs FLAGS=$(DEFS) $(INCLUDES) $(CPPFLAGS) $(CFLAGS) @NOINST_LDFLAGS@ -- cgit v1.2.1 From 10a2bac7775912e25eeba593dbce055e2b30699b Mon Sep 17 00:00:00 2001 From: Sergei Golubchik Date: Tue, 7 Oct 2008 18:49:01 +0200 Subject: workaround for gcc 4.1.0 strict-aliasing bug mysys/lf_alloc-pin.c: workaround for gcc 4.1.0 strict-aliasing bug. and yes, I mean a *bug* - as union (in alloc_free) is a documented way to access type-punned pointers. and it helps in newer gcc, but fails in 4.1.0 --- mysys/lf_alloc-pin.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'mysys') diff --git a/mysys/lf_alloc-pin.c b/mysys/lf_alloc-pin.c index 4fae8e37ddb..7c3e3785b68 100644 --- a/mysys/lf_alloc-pin.c +++ b/mysys/lf_alloc-pin.c @@ -320,7 +320,7 @@ static int match_pins(LF_PINS *el, void *addr) #define available_stack_size(CUR,END) (long) ((char*)(END) - (char*)(CUR)) #endif -#define next_node(P, X) (*((uchar **)(((uchar *)(X)) + (P)->free_ptr_offset))) +#define next_node(P, X) (*((uchar * volatile *)(((uchar *)(X)) + (P)->free_ptr_offset))) #define anext_node(X) next_node(&allocator->pinbox, (X)) /* -- cgit v1.2.1 From 058916ae024baaf8a092e0130654f67ef7b9bcf1 Mon Sep 17 00:00:00 2001 From: Michael Widenius Date: Sun, 12 Oct 2008 13:09:52 +0300 Subject: Fix for bug#39226 Maria: crash with FLUSH TABLES WITH READ LOCK after LOCK TABLES - The problem was that we didn't inform the handler that we are going to close tables that are locked and may have (at least in Maria) be part of an active transaction. Fix for Bug#39227 Maria: crash with ALTER TABLE PARTITION Fix for Bug #39987 main.partition_not_windows fails under debug build Fixed some compiler errors & warnings found by pushbuild include/my_base.h: Added HA_EXTRA_PREPARE_FOR_FORCED_CLOSE for signaling the handler that the file will be forced closed include/my_global.h: Removed 'register' from 'swap_variables' as this gives a warnings when the variables are structs. Compilers should also now be smart enough to figure out this themselves mysql-test/r/subselect_debug.result: Reset value of the debug variable; Without setting this the subselect_innodb test will fail when run after this one mysql-test/suite/maria/r/maria.result: Merged test with myisam.test Added tests for new fixed bugs mysql-test/suite/maria/t/maria.test: Merged test with myisam.test Added tests for new fixed bugs mysql-test/t/subselect_debug.test: Reset value of the debug variable; Without setting this the subselect_innodb test will fail when run after this one mysys/my_uuid.c: Fixed compiler error on windows sql/ha_partition.cc: Added support for the new extra flag: HA_EXTRA_PREPARE_FOR_FORCED_CLOSE (Bug #39226) Ensure that we call extra() for HA_EXTRA_PREPARE_FOR_DROP (Bug#39227) sql/mysqld.cc: Fix for Bug #39987 main.partition_not_windows fails under debug build The problem was that when compiling for purify/valgrind realpath() is not used, which causes test_if_data_home_dir to fail when it shouldn't sql/sql_base.cc: Call HA_EXTRA_PREPARE_FOR_FORCED_CLOSE for tables that are locked but we are going to force close without doing a commit sql/sql_parse.cc: More DBUG_PRINT. Fixed comments storage/maria/ma_extra.c: If HA_EXTRA_PREPARE_FOR_FORCED_CLOSE is called and the table is part of a transaction, remove the table from beeing part of a transaction. This is safe as this is only used as part of flush tables or when the table is not part of a transaction storage/myisam/mi_open.c: Indentation fix unittest/mysys/waiting_threads-t.c: Remove not needed 'volatile' to get rid of compiler warnings on windows --- mysys/my_uuid.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'mysys') diff --git a/mysys/my_uuid.c b/mysys/my_uuid.c index 36f1d2227f0..ca5e94c4754 100644 --- a/mysys/my_uuid.c +++ b/mysys/my_uuid.c @@ -145,11 +145,12 @@ void my_uuid(uchar *to) */ if (nanoseq) { + long delta; DBUG_ASSERT((tv > uuid_time) && (nanoseq > 0)); /* -1 so we won't make tv= uuid_time for nanoseq >= (tv - uuid_time) */ - long delta= min(nanoseq, tv - uuid_time -1); + delta= min(nanoseq, tv - uuid_time -1); tv-= delta; nanoseq-= delta; } -- cgit v1.2.1 From 5aa1e3b364baf49075c9c1e827ba30488561de96 Mon Sep 17 00:00:00 2001 From: Guilhem Bichot Date: Wed, 15 Oct 2008 14:44:31 +0200 Subject: Small fixes for pushbuild: compiler warnings, checking that partitioning is enabled when testing it. Don't fsync() index file when closing Maria table if not transactional. mysql-test/suite/maria/r/maria.result: piece moved mysql-test/suite/maria/r/maria_partition.result: result mysql-test/suite/maria/t/maria.test: - reset default storage engine at end of test, not in the middle - move piece which requires partitioning, to maria_partition.test, otherwise test fails on builds without partitioning compiled in mysql-test/suite/maria/t/maria_partition.test: new test for those Maria bugs which are specific of partitioning mysys/my_uuid.c: compiler warning fix (fix imported from latest 5.1-main) storage/maria/ma_close.c: don't fsync() index file when closing table if not transactional (same test as in _ma_once_end_block_record() when fsync-ing data file) storage/maria/ma_create.c: compiler warning fix (char* assigned to uchar*) storage/maria/ma_loghandler.c: compiler warning fix (char* assigned to uchar*) --- mysys/my_uuid.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'mysys') diff --git a/mysys/my_uuid.c b/mysys/my_uuid.c index ca5e94c4754..d1e8331aaa1 100644 --- a/mysys/my_uuid.c +++ b/mysys/my_uuid.c @@ -145,12 +145,12 @@ void my_uuid(uchar *to) */ if (nanoseq) { - long delta; + ulong delta; DBUG_ASSERT((tv > uuid_time) && (nanoseq > 0)); /* -1 so we won't make tv= uuid_time for nanoseq >= (tv - uuid_time) */ - delta= min(nanoseq, tv - uuid_time -1); + delta= min(nanoseq, (ulong)(tv - uuid_time -1)); tv-= delta; nanoseq-= delta; } -- cgit v1.2.1 From c6a51b044739d6c1b0406ba7561334b1c373a022 Mon Sep 17 00:00:00 2001 From: Sergei Golubchik Date: Tue, 21 Oct 2008 16:10:04 +0200 Subject: fixes for hanging waiting_thread-t.c on windows mysys/my_wincond.c: race condition: block gate could be left open forever, if cond_broadcast was done right after the last thread left WaitForMultipleObjects() on timeout mysys/thr_rwlock.c: make rwlocks behave similar to their distant linux/solaris relatives --- mysys/my_wincond.c | 2 +- mysys/thr_rwlock.c | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'mysys') diff --git a/mysys/my_wincond.c b/mysys/my_wincond.c index d1b07b61408..c9bc33df8c4 100644 --- a/mysys/my_wincond.c +++ b/mysys/my_wincond.c @@ -126,7 +126,7 @@ int pthread_cond_timedwait(pthread_cond_t *cond, pthread_mutex_t *mutex, EnterCriticalSection(&cond->lock_waiting); cond->waiting--; - if (cond->waiting == 0 && result == (WAIT_OBJECT_0+BROADCAST)) + if (cond->waiting == 0) { /* We're the last waiter to be notified or to stop waiting, so diff --git a/mysys/thr_rwlock.c b/mysys/thr_rwlock.c index 0aa4d3fc3c4..2a249cbf850 100644 --- a/mysys/thr_rwlock.c +++ b/mysys/thr_rwlock.c @@ -89,7 +89,7 @@ int my_rw_rdlock(rw_lock_t *rwp) pthread_mutex_lock(&rwp->lock); /* active or queued writers */ - while (( rwp->state < 0 ) || rwp->waiters) + while (( rwp->state < 0 )) pthread_cond_wait( &rwp->readers, &rwp->lock); rwp->state++; @@ -101,7 +101,7 @@ int my_rw_tryrdlock(rw_lock_t *rwp) { int res; pthread_mutex_lock(&rwp->lock); - if ((rwp->state < 0 ) || rwp->waiters) + if ((rwp->state < 0 )) res= EBUSY; /* Can't get lock */ else { -- cgit v1.2.1 From a58d20053dbd9192905bb4525f089a32f16b88f5 Mon Sep 17 00:00:00 2001 From: Sergei Golubchik Date: Tue, 21 Oct 2008 20:10:49 +0200 Subject: win32: compilation failures, maria.test failure include/my_global.h: enable compile_time_assert for all compilers include/waiting_threads.h: 1. don't #extern "C" system includes, they don't like it. 2. remove any padding from WT_RESOURCE_ID structure - we want to compare it with memcmp mysys/waiting_threads.c: assert that WT_RESOURCE_ID can be compared with memcmp and has no random padding bytes --- mysys/waiting_threads.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'mysys') diff --git a/mysys/waiting_threads.c b/mysys/waiting_threads.c index 255317ea4cc..489be6edbad 100644 --- a/mysys/waiting_threads.c +++ b/mysys/waiting_threads.c @@ -347,6 +347,8 @@ void wt_thd_destroy(WT_THD *thd) */ int wt_resource_id_memcmp(void *a, void *b) { + /* assert that the structure is not padded with random bytes */ + compile_time_assert(sizeof(WT_RESOURCE_ID)==sizeof(ulonglong)+sizeof(void*)); return memcmp(a, b, sizeof(WT_RESOURCE_ID)); } -- cgit v1.2.1 From 14c146618707540c46e1ab1c8b8f103913e1237a Mon Sep 17 00:00:00 2001 From: Sergei Golubchik Date: Fri, 24 Oct 2008 12:34:08 +0200 Subject: wt needs to use its own implementation of rwlocks with reader preference, at least where system rwlocks are fair. include/my_global.h: wt uses mutex-based rwlock implementation unless on linux include/waiting_threads.h: mutex-based rwlock implementation with reader preference mysys/thr_rwlock.c: revert the change. make my_rw_locks fair mysys/waiting_threads.c: mutex-based rwlock implementation with reader preference. convert complex multi-line macros to static functions --- mysys/thr_rwlock.c | 4 +- mysys/waiting_threads.c | 160 +++++++++++++++++++++++++++++++----------------- 2 files changed, 107 insertions(+), 57 deletions(-) (limited to 'mysys') diff --git a/mysys/thr_rwlock.c b/mysys/thr_rwlock.c index 2a249cbf850..280a0ec19e7 100644 --- a/mysys/thr_rwlock.c +++ b/mysys/thr_rwlock.c @@ -89,7 +89,7 @@ int my_rw_rdlock(rw_lock_t *rwp) pthread_mutex_lock(&rwp->lock); /* active or queued writers */ - while (( rwp->state < 0 )) + while ((rwp->state < 0 ) || rwp->waiters) pthread_cond_wait( &rwp->readers, &rwp->lock); rwp->state++; @@ -101,7 +101,7 @@ int my_rw_tryrdlock(rw_lock_t *rwp) { int res; pthread_mutex_lock(&rwp->lock); - if ((rwp->state < 0 )) + if ((rwp->state < 0 ) || rwp->waiters) res= EBUSY; /* Can't get lock */ else { diff --git a/mysys/waiting_threads.c b/mysys/waiting_threads.c index 489be6edbad..ef19018831b 100644 --- a/mysys/waiting_threads.c +++ b/mysys/waiting_threads.c @@ -133,56 +133,105 @@ uint32 wt_cycle_stats[2][WT_CYCLE_STATS+1], wt_success_stats; static my_atomic_rwlock_t cycle_stats_lock, wait_stats_lock, success_stats_lock; -#define increment_success_stats() \ - do { \ - my_atomic_rwlock_wrlock(&success_stats_lock); \ - my_atomic_add32(&wt_success_stats, 1); \ - my_atomic_rwlock_wrunlock(&success_stats_lock); \ - } while (0) - -#define increment_cycle_stats(X,SLOT) \ - do { \ - uint i= (X); \ - if (i >= WT_CYCLE_STATS) \ - i= WT_CYCLE_STATS; \ - my_atomic_rwlock_wrlock(&cycle_stats_lock); \ - my_atomic_add32(&wt_cycle_stats[SLOT][i], 1); \ - my_atomic_rwlock_wrunlock(&cycle_stats_lock); \ - } while (0) - -#define increment_wait_stats(X,RET) \ - do { \ - uint i; \ - if ((RET) == ETIMEDOUT) \ - i= WT_WAIT_STATS; \ - else \ - { \ - ulonglong w=(X)/10; \ - for (i=0; i < WT_WAIT_STATS && w > wt_wait_table[i]; i++) ; \ - } \ - my_atomic_rwlock_wrlock(&wait_stats_lock); \ - my_atomic_add32(wt_wait_stats+i, 1); \ - my_atomic_rwlock_wrunlock(&wait_stats_lock); \ - } while (0) - -#define rc_rdlock(X) \ - do { \ - WT_RESOURCE *R=(X); \ - DBUG_PRINT("wt", ("LOCK resid=%lld for READ", R->id.value)); \ - rw_rdlock(&R->lock); \ - } while (0) -#define rc_wrlock(X) \ - do { \ - WT_RESOURCE *R=(X); \ - DBUG_PRINT("wt", ("LOCK resid=%lld for WRITE", R->id.value)); \ - rw_wrlock(&R->lock); \ - } while (0) -#define rc_unlock(X) \ - do { \ - WT_RESOURCE *R=(X); \ - DBUG_PRINT("wt", ("UNLOCK resid=%lld", R->id.value)); \ - rw_unlock(&R->lock); \ - } while (0) +static void increment_success_stats() +{ + my_atomic_rwlock_wrlock(&success_stats_lock); + my_atomic_add32(&wt_success_stats, 1); + my_atomic_rwlock_wrunlock(&success_stats_lock); +} + +static void increment_cycle_stats(uint depth, uint slot) +{ + if (depth >= WT_CYCLE_STATS) + depth= WT_CYCLE_STATS; + my_atomic_rwlock_wrlock(&cycle_stats_lock); + my_atomic_add32(&wt_cycle_stats[slot][depth], 1); + my_atomic_rwlock_wrunlock(&cycle_stats_lock); +} + +static void increment_wait_stats(ulonglong waited,int ret) +{ + uint i; + if ((ret) == ETIMEDOUT) + i= WT_WAIT_STATS; + else + for (i=0; i < WT_WAIT_STATS && waited/10 > wt_wait_table[i]; i++) ; + my_atomic_rwlock_wrlock(&wait_stats_lock); + my_atomic_add32(wt_wait_stats+i, 1); + my_atomic_rwlock_wrunlock(&wait_stats_lock); +} + +#ifdef WT_RWLOCKS_USE_MUTEXES +static void rc_rwlock_init(WT_RESOURCE *rc) +{ + pthread_cond_init(&rc->lock.cond, 0); + pthread_mutex_init(&rc->lock.mutex, MY_MUTEX_INIT_FAST); +} +static void rc_rwlock_destroy(WT_RESOURCE *rc) +{ + pthread_cond_destroy(&rc->lock.cond); + pthread_mutex_destroy(&rc->lock.mutex); +} +static void rc_rdlock(WT_RESOURCE *rc) +{ + DBUG_PRINT("wt", ("TRYLOCK resid=%ld for READ", (ulong)rc->id.value)); + pthread_mutex_lock(&rc->lock.mutex); + while (rc->lock.write_locked) + pthread_cond_wait(&rc->lock.cond, &rc->lock.mutex); + rc->lock.readers++; + pthread_mutex_unlock(&rc->lock.mutex); + DBUG_PRINT("wt", ("LOCK resid=%ld for READ", (ulong)rc->id.value)); +} +static void rc_wrlock(WT_RESOURCE *rc) +{ + DBUG_PRINT("wt", ("TRYLOCK resid=%ld for WRITE", (ulong)rc->id.value)); + pthread_mutex_lock(&rc->lock.mutex); + while (rc->lock.write_locked || rc->lock.readers) + pthread_cond_wait(&rc->lock.cond, &rc->lock.mutex); + rc->lock.write_locked=1; + pthread_mutex_unlock(&rc->lock.mutex); + DBUG_PRINT("wt", ("LOCK resid=%ld for WRITE", (ulong)rc->id.value)); +} +static void rc_unlock(WT_RESOURCE *rc) +{ + DBUG_PRINT("wt", ("UNLOCK resid=%ld", (ulong)rc->id.value)); + pthread_mutex_lock(&rc->lock.mutex); + if (rc->lock.write_locked) + { + rc->lock.write_locked=0; + pthread_cond_broadcast(&rc->lock.cond); + } + else if (--rc->lock.readers == 0) + pthread_cond_broadcast(&rc->lock.cond); + pthread_mutex_unlock(&rc->lock.mutex); +} +#else +static void rc_rwlock_init(WT_RESOURCE *rc) +{ + my_rwlock_init(&rc->lock, 0); +} +static void rc_rwlock_destroy(WT_RESOURCE *rc) +{ + rwlock_destroy(&rc->lock); +} +static void rc_rdlock(WT_RESOURCE *rc) +{ + DBUG_PRINT("wt", ("TRYLOCK resid=%ld for READ", (ulong)rc->id.value)); + rw_rdlock(&rc->lock); + DBUG_PRINT("wt", ("LOCK resid=%ld for READ", (ulong)rc->id.value)); +} +static void rc_wrlock(WT_RESOURCE *rc) +{ + DBUG_PRINT("wt", ("TRYLOCK resid=%ld for WRITE", (ulong)rc->id.value)); + rw_wrlock(&rc->lock); + DBUG_PRINT("wt", ("LOCK resid=%ld for WRITE", (ulong)rc->id.value)); +} +static void rc_unlock(WT_RESOURCE *rc) +{ + DBUG_PRINT("wt", ("UNLOCK resid=%ld", (ulong)rc->id.value)); + rw_unlock(&rc->lock); +} +#endif /* All resources are stored in a lock-free hash. Different threads @@ -202,7 +251,7 @@ static void wt_resource_init(uchar *arg) DBUG_ENTER("wt_resource_init"); bzero(rc, sizeof(*rc)); - my_rwlock_init(&rc->lock, 0); + rc_rwlock_init(rc); pthread_cond_init(&rc->cond, 0); my_init_dynamic_array(&rc->owners, sizeof(WT_THD *), 0, 5); DBUG_VOID_RETURN; @@ -220,7 +269,7 @@ static void wt_resource_destroy(uchar *arg) DBUG_ENTER("wt_resource_destroy"); DBUG_ASSERT(rc->owners.elements == 0); - rwlock_destroy(&rc->lock); + rc_rwlock_destroy(rc); pthread_cond_destroy(&rc->cond); delete_dynamic(&rc->owners); DBUG_VOID_RETURN; @@ -490,7 +539,7 @@ retry: } end: /* - Note that 'rc' is locked in this function, but it's never unlocked there. + Note that 'rc' is locked in this function, but it's never unlocked here. Instead it's saved in arg->rc and the *caller* is expected to unlock it. It's done to support different killing strategies. This is how it works: Assuming a graph @@ -549,6 +598,7 @@ static int deadlock(WT_THD *thd, WT_THD *blocker, uint depth, struct deadlock_arg arg= {thd, max_depth, 0, 0}; int ret; DBUG_ENTER("deadlock"); + DBUG_ASSERT(depth < 2); ret= deadlock_search(&arg, blocker, depth); if (ret == WT_DEPTH_EXCEEDED) { @@ -688,8 +738,8 @@ int wt_thd_will_wait_for(WT_THD *thd, WT_THD *blocker, WT_RESOURCE_ID *resid) LF_REQUIRE_PINS(3); - DBUG_PRINT("wt", ("enter: thd=%s, blocker=%s, resid=%llu", - thd->name, blocker->name, resid->value)); + DBUG_PRINT("wt", ("enter: thd=%s, blocker=%s, resid=%lu", + thd->name, blocker->name, (ulong)resid->value)); if (fix_thd_pins(thd)) DBUG_RETURN(WT_DEADLOCK); -- cgit v1.2.1 From f91219ed47604ac80c378bd917431fa42e4cb1d9 Mon Sep 17 00:00:00 2001 From: Sergei Golubchik Date: Mon, 3 Nov 2008 20:33:34 +0100 Subject: don't use #pragma pack include/waiting_threads.h: don't #pragma pack mysys/lf_hash.c: typo in a comment mysys/waiting_threads.c: use the size of data, not the size of (possibly padded) structure --- mysys/lf_hash.c | 7 ++++--- mysys/waiting_threads.c | 10 ++++------ 2 files changed, 8 insertions(+), 9 deletions(-) (limited to 'mysys') diff --git a/mysys/lf_hash.c b/mysys/lf_hash.c index 008abef0c8b..96ae3f338ab 100644 --- a/mysys/lf_hash.c +++ b/mysys/lf_hash.c @@ -281,8 +281,9 @@ static inline const uchar* hash_key(const LF_HASH *hash, } /* - compute the hash key value from the raw key. - note, that the hash value is limited to 2^31, because we need one + Compute the hash key value from the raw key. + + @note, that the hash value is limited to 2^31, because we need one bit to distinguish between normal and dummy nodes. */ static inline uint calc_hash(LF_HASH *hash, const uchar *key, uint keylen) @@ -300,7 +301,7 @@ static int initialize_bucket(LF_HASH *, LF_SLIST * volatile*, uint, LF_PINS *); /* Initializes lf_hash, the arguments are compatible with hash_init - @@note element_size sets both the size of allocated memory block for + @note element_size sets both the size of allocated memory block for lf_alloc and a size of memcpy'ed block size in lf_hash_insert. Typically they are the same, indeed. But LF_HASH::element_size can be decreased after lf_hash_init, and then lf_alloc will allocate larger block that diff --git a/mysys/waiting_threads.c b/mysys/waiting_threads.c index ef19018831b..edabc25ee51 100644 --- a/mysys/waiting_threads.c +++ b/mysys/waiting_threads.c @@ -280,7 +280,7 @@ void wt_init() DBUG_ENTER("wt_init"); lf_hash_init(&reshash, sizeof(WT_RESOURCE), LF_HASH_UNIQUE, 0, - sizeof(struct st_wt_resource_id), 0, 0); + sizeof_WT_RESOURCE_ID, 0, 0); reshash.alloc.constructor= wt_resource_init; reshash.alloc.destructor= wt_resource_destroy; /* @@ -396,9 +396,7 @@ void wt_thd_destroy(WT_THD *thd) */ int wt_resource_id_memcmp(void *a, void *b) { - /* assert that the structure is not padded with random bytes */ - compile_time_assert(sizeof(WT_RESOURCE_ID)==sizeof(ulonglong)+sizeof(void*)); - return memcmp(a, b, sizeof(WT_RESOURCE_ID)); + return memcmp(a, b, sizeof_WT_RESOURCE_ID); } /** @@ -657,7 +655,7 @@ static int unlock_lock_and_free_resource(WT_THD *thd, WT_RESOURCE *rc) /* XXX if (rc->id.type->make_key) key= rc->id.type->make_key(&rc->id, &keylen); else */ { key= &rc->id; - keylen= sizeof(rc->id); + keylen= sizeof_WT_RESOURCE_ID; } /* @@ -751,7 +749,7 @@ int wt_thd_will_wait_for(WT_THD *thd, WT_THD *blocker, WT_RESOURCE_ID *resid) /* XXX if (restype->make_key) key= restype->make_key(resid, &keylen); else */ { key= resid; - keylen= sizeof(*resid); + keylen= sizeof_WT_RESOURCE_ID; } DBUG_PRINT("wt", ("first blocker")); -- cgit v1.2.1 From 4f91e0726680b98898db16f8920420dde9d17ed8 Mon Sep 17 00:00:00 2001 From: Sergei Golubchik Date: Mon, 10 Nov 2008 20:11:27 +0100 Subject: compile_time_assert --- mysys/waiting_threads.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'mysys') diff --git a/mysys/waiting_threads.c b/mysys/waiting_threads.c index edabc25ee51..14b1d639d00 100644 --- a/mysys/waiting_threads.c +++ b/mysys/waiting_threads.c @@ -396,6 +396,8 @@ void wt_thd_destroy(WT_THD *thd) */ int wt_resource_id_memcmp(void *a, void *b) { + /* we use the fact that there's no padding in the middle of WT_RESOURCE_ID */ + compile_time_assert(offsetof(WT_RESOURCE_ID, type) == sizeof(ulonglong)); return memcmp(a, b, sizeof_WT_RESOURCE_ID); } -- cgit v1.2.1 From f1906c62d563fa1502e68a7d9854313560474be9 Mon Sep 17 00:00:00 2001 From: Sergei Golubchik Date: Sat, 29 Nov 2008 00:27:13 +0100 Subject: Bug#34374: mysql generates incorrect warning an item was evaluated unnecessary, fix that by checking preconditions before evaluating the item sql/sql_select.cc: an item was evaluated unnecessary, fix that by checking preconditions before evaluating the item --- mysys/my_getopt.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'mysys') diff --git a/mysys/my_getopt.c b/mysys/my_getopt.c index ddb0a4d3ed5..059896f5081 100644 --- a/mysys/my_getopt.c +++ b/mysys/my_getopt.c @@ -39,8 +39,7 @@ static ulonglong getopt_ull(char *arg, const struct my_option *optp, static double getopt_double(char *arg, const struct my_option *optp, int *err); static void init_variables(const struct my_option *options, init_func_p init_one_value); -static void init_one_value(const struct my_option *option, uchar* *variable, - longlong value); +static void init_one_value(const struct my_option *opt, uchar* *, longlong); static void fini_one_value(const struct my_option *option, uchar* *variable, longlong value); static int setval(const struct my_option *opts, uchar **value, char *argument, -- cgit v1.2.1 From 32f81bab7d3ed46ddc2863c7be8d69f8dcf698c3 Mon Sep 17 00:00:00 2001 From: Michael Widenius Date: Wed, 3 Dec 2008 00:02:52 +0200 Subject: WL#3262 add mutex lock order checking to safemutex (also called safe_mutex_deadlock_detector) This writes a warning on stderr if one uses mutex in different order, like if one in one case would lock mutex in the order A,B and in another case would lock mutex in the order B,A This is inspired by and loosely based on the LOCKDEP patch by Jonas Wrong mutex order is either fixed or mutex are marked with MYF_NO_DEADLOCK_DETECTION if used inconsistently (need to be fixed by server team) KNOWN_BUGS.txt: Added information that one need to dump and restore Maria tables include/hash.h: Added prototype function for walking over all elements in a hash include/my_pthread.h: Added my_pthread_mutex_init() and my_pthread_mutex_lock(); These should be used if one wants to disable mutex order checking. Changed names of the nonposix mutex_init functions to not conflict with my_phread_mutex_init() Added and extended structures for mutex deadlock detection. New arguments to sage_mutex_init() and safe_mutex_lock() to allow one to disable mutex order checking. Added variable 'safe_mutex_deadlock_detector' to enable/disable deadlock detection for all pthread_mutex_init() mysys/Makefile.am: Added cleaning of test files Added test_thr_mutex mysys/hash.c: Added hash_iterate() to iterate over all elements in a hash More comments mysys/my_init.c: Added calls to destory all mutex uses by mysys() Added waiting for threads to end before calling TERMINATE() to list not freed memory mysys/my_pthread.c: Changed names to free my_pthread_mutex_init() for mutex-lock-order-checking mysys/my_sleep.c: Fixed too long wait if using 1000000L as argument mysys/my_thr_init.c: Mark THR_LOCK_threads and THR_LOCK_malloc to not have mutex deadlock detection. (We can't have it enabled for this as these are internal mutex used by the detector Call my_thread_init() early as we need thread specific variables enabled for the following pthread_mutex_init() Move code to wait for threads to end to my_wait_for_other_threads_to_die() Don't destroy mutex and conditions unless all threads have died Added my_thread_destroy_mutex() to destroy all mutex used by the mysys thread system Name the thread specific mutex as "mysys_var->mutex" Added my_thread_var_mutex_in_use() to return pointer to mutex in use or 0 if thread variables are not initialized mysys/mysys_priv.h: Added prototypes for functions used internally with mutex-wrong-usage detection mysys/thr_mutex.c: Added runtime detection of mutex used in conflicting order See WL#3262 or test_thr_mutex.c for examples The base idea is for each mutex have two hashes: - mutex->locked_mutex points to all mutex used after this one - mutex->used_mutex points to all mutex which has this mutex in it's mutex->locked_mutex There is a wrong mutex order if any mutex currently locked before this mutex is in the mutex->locked_mutex hash sql/event_queue.cc: Mark mutex used inconsistently (need to be fixed by server team) sql/event_scheduler.cc: Declare the right order to take the mutex sql/events.cc: Mark mutex used inconsistently (need to be fixed by server team) sql/ha_ndbcluster_binlog.cc: Mark mutex used inconsistently (need to be fixed by server team) sql/log.cc: Mark mutex used inconsistently (need to be fixed by server team) sql/mysqld.cc: Use pthread_mutex_trylock instead of pthread_mutex_unlock() when sending kill signal to thread This is needed to avoid wrong mutex order as normally one takes 'current_mutex' before mysys_var->mutex. Added call to free sp cache. Add destruction of LOCK_server_started and COND_server_started. Added register_mutex_order() function to register in which order mutex should be taken (to initiailize mutex_deadlock_detector). Added option to turn off safe_mutex_deadlock_detector sql/protocol.cc: Fixed wrong argument to DBUG_PRINT (found by valgrind) sql/rpl_mi.cc: Mark mutex used inconsistently (need to be fixed by server team) sql/set_var.cc: Remove wrong locking of LOCK_global_system_variables when reading and setting log variables (would cause inconsistent mutex order). Update global variables outside of logger.unlock() as LOCK_global_system_variables has to be taken before logger locks Reviewed by gluh sql/sp_cache.cc: Added function to destroy mutex used by sp cache sql/sp_cache.h: Added function to destroy mutex used by sp cache sql/sql_class.cc: Use pthread_mutex_trylock instead of pthread_mutex_unlock() when sending kill signal to thread This is needed to avoid wrong mutex order as normally one takes 'current_mutex' before mysys_var->mutex. Register order in which LOCK_delete and mysys_var->mutex is taken sql/sql_insert.cc: Give a name for Delayed_insert::mutex Mark mutex used inconsistently (need to be fixed by server team) Move closing of tables outside of di->mutex (to avoid wrong mutex order) sql/sql_show.cc: Don't keep LOCK_global_system_variables locked over value->show_type() as this leads to wrong mutex order storage/innobase/handler/ha_innodb.cc: Disable safe_muted_deadlock_detector for innobase intern mutex (to speed up page cache initialization) storage/maria/ha_maria.cc: Added flag to ha_maria::info() to signal if we need to lock table share or not. This is needed to avoid locking mutex in wrong order storage/maria/ha_maria.h: Added flag to ha_maria::info() to signal if we need to lock table share or not. storage/maria/ma_close.c: Destroy key_del_lock Simplify freeing ftparser_param storage/maria/ma_key.c: Better comment storage/maria/ma_loghandler.c: Mark mutex used inconsistently (need to be fixed by sanja) storage/maria/ma_state.c: More comments storage/maria/ma_test1.c: Ensure that safe_mutex_deadlock_detector is always on (should be, this is just for safety) storage/maria/ma_test2.c: Ensure that safe_mutex_deadlock_detector is always on (should be, this is just for safety) --- mysys/Makefile.am | 10 ++ mysys/hash.c | 59 +++++++- mysys/my_init.c | 5 + mysys/my_pthread.c | 6 +- mysys/my_sleep.c | 2 +- mysys/my_thr_init.c | 121 +++++++++++---- mysys/mysys_priv.h | 3 + mysys/thr_mutex.c | 426 +++++++++++++++++++++++++++++++++++++++++++++++----- 8 files changed, 552 insertions(+), 80 deletions(-) (limited to 'mysys') diff --git a/mysys/Makefile.am b/mysys/Makefile.am index e5e7539ece6..6efdd0d75e7 100644 --- a/mysys/Makefile.am +++ b/mysys/Makefile.am @@ -79,6 +79,13 @@ DEFS = -DDEFAULT_BASEDIR=\"$(prefix)\" \ # I hope this always does the right thing. Otherwise this is only test programs FLAGS=$(DEFS) $(INCLUDES) $(CPPFLAGS) $(CFLAGS) @NOINST_LDFLAGS@ +CLEANFILES = test_bitmap$(EXEEXT) test_priority_queue$(EXEEXT) \ + test_thr_alarm$(EXEEXT) test_thr_lock$(EXEEXT) \ + test_vsnprintf$(EXEEXT) test_io_cache$(EXEEXT) \ + test_dir$(EXEEXT) test_charset$(EXEEXT) \ + testhash$(EXEEXT) test_gethwaddr$(EXEEXT) \ + test_base64$(EXEEXT) test_thr_mutex$(EXEEXT) + # # The CP .. RM stuff is to avoid problems with some compilers (like alpha ccc) # which automaticly removes the object files you use to compile a final program @@ -129,6 +136,9 @@ test_base64$(EXEEXT): base64.c $(LIBRARIES) $(LINK) $(FLAGS) -DMAIN ./test_base64.c $(LDADD) $(LIBS) $(RM) -f ./test_base64.c +test_thr_mutex$(EXEEXT): test_thr_mutex.c $(LIBRARIES) + $(LINK) $(FLAGS) $(srcdir)/test_thr_mutex.c $(LDADD) $(LIBS) + # Don't update the files from bitkeeper %::SCCS/s.% diff --git a/mysys/hash.c b/mysys/hash.c index 3a9f05a3e0b..0d3f79bc40f 100644 --- a/mysys/hash.c +++ b/mysys/hash.c @@ -304,7 +304,13 @@ static int hashcmp(const HASH *hash, HASH_LINK *pos, const uchar *key, } - /* Write a hash-key to the hash-index */ +/** + Write a hash-key to the hash-index + + @return + @retval 0 ok + @retval 1 Duplicate key or out of memory +*/ my_bool my_hash_insert(HASH *info,const uchar *record) { @@ -443,11 +449,21 @@ my_bool my_hash_insert(HASH *info,const uchar *record) } -/****************************************************************************** -** Remove one record from hash-table. The record with the same record -** ptr is removed. -** if there is a free-function it's called for record if found -******************************************************************************/ +/** + Remove one record from hash-table. + + @fn hash_delete() + @param hash Hash tree + @param record Row to be deleted + + @notes + The record with the same record ptr is removed. + If there is a free-function it's called if record was found. + + @return + @retval 0 ok + @retval 1 Record not found +*/ my_bool hash_delete(HASH *hash,uchar *record) { @@ -656,6 +672,37 @@ void hash_replace(HASH *hash, HASH_SEARCH_STATE *current_record, uchar *new_row) } +/** + Iterate over all elements in hash and call function with the element + + @param hash hash array + @param action function to call for each argument + @param argument second argument for call to action + + @notes + If one of functions calls returns 1 then the iteration aborts + + @retval 0 ok + @retval 1 iteration aborted becasue action returned 1 +*/ + +my_bool hash_iterate(HASH *hash, hash_walk_action action, void *argument) +{ + uint records, i; + HASH_LINK *data; + + records= hash->records; + data= dynamic_element(&hash->array,0,HASH_LINK*); + + for (i= 0 ; i < records ; i++) + { + if ((*action)(data[i].data, argument)) + return 1; + } + return 0; +} + + #ifndef DBUG_OFF my_bool hash_check(HASH *hash) diff --git a/mysys/my_init.c b/mysys/my_init.c index a153275f87e..453e62b19bb 100644 --- a/mysys/my_init.c +++ b/mysys/my_init.c @@ -165,6 +165,9 @@ void my_end(int infoflag) free_charsets(); my_error_unregister_all(); my_once_free(); +#ifdef THREAD + my_thread_destroy_mutex(); +#endif if ((infoflag & MY_GIVE_INFO) || print_info) { @@ -195,6 +198,8 @@ Voluntary context switches %ld, Involuntary context switches %ld\n", fprintf(info_file,"\nRun time: %.1f\n",(double) clock()/CLOCKS_PER_SEC); #endif #if defined(SAFEMALLOC) + /* Wait for other threads to free mysys_var */ + (void) my_wait_for_other_threads_to_die(1); TERMINATE(stderr, (infoflag & MY_GIVE_INFO) != 0); #elif defined(__WIN__) && defined(_MSC_VER) _CrtSetReportMode( _CRT_WARN, _CRTDBG_MODE_FILE ); diff --git a/mysys/my_pthread.c b/mysys/my_pthread.c index aba3e47d754..e97bbe89be0 100644 --- a/mysys/my_pthread.c +++ b/mysys/my_pthread.c @@ -429,7 +429,8 @@ int sigwait(sigset_t *setp, int *sigp) #include -int my_pthread_mutex_init(pthread_mutex_t *mp, const pthread_mutexattr_t *attr) +int my_pthread_mutex_noposix_init(pthread_mutex_t *mp, + const pthread_mutexattr_t *attr) { int error; if (!attr) @@ -439,7 +440,8 @@ int my_pthread_mutex_init(pthread_mutex_t *mp, const pthread_mutexattr_t *attr) return error; } -int my_pthread_cond_init(pthread_cond_t *mp, const pthread_condattr_t *attr) +int my_pthread_cond_noposix_init(pthread_cond_t *mp, + const pthread_condattr_t *attr) { int error; if (!attr) diff --git a/mysys/my_sleep.c b/mysys/my_sleep.c index 87170e4af41..cb21c15a925 100644 --- a/mysys/my_sleep.c +++ b/mysys/my_sleep.c @@ -30,7 +30,7 @@ void my_sleep(ulong m_seconds) t.tv_usec= m_seconds % 1000000L; select(0,0,0,0,&t); /* sleep */ #else - uint sec= (uint) (m_seconds / 1000000L); + uint sec= (uint) ((m_seconds + 999999L) / 1000000L); ulong start= (ulong) time((time_t*) 0); while ((ulong) time((time_t*) 0) < start+sec); #endif diff --git a/mysys/my_thr_init.c b/mysys/my_thr_init.c index 1d03577ce34..6ebd1f512f1 100644 --- a/mysys/my_thr_init.c +++ b/mysys/my_thr_init.c @@ -115,6 +115,15 @@ my_bool my_thread_global_init(void) } #endif /* TARGET_OS_LINUX */ + /* Mutex used by my_thread_init() and after my_thread_destroy_mutex() */ + my_pthread_mutex_init(&THR_LOCK_threads, MY_MUTEX_INIT_FAST, + "THR_LOCK_threads", MYF_NO_DEADLOCK_DETECTION); + my_pthread_mutex_init(&THR_LOCK_malloc, MY_MUTEX_INIT_FAST, + "THR_LOCK_malloc", MYF_NO_DEADLOCK_DETECTION); + + if (my_thread_init()) + return 1; + #ifdef PTHREAD_ADAPTIVE_MUTEX_INITIALIZER_NP /* Set mutex type to "fast" a.k.a "adaptive" @@ -138,7 +147,7 @@ my_bool my_thread_global_init(void) PTHREAD_MUTEX_ERRORCHECK); #endif - pthread_mutex_init(&THR_LOCK_malloc,MY_MUTEX_INIT_FAST); + /* Mutex uses by mysys */ pthread_mutex_init(&THR_LOCK_open,MY_MUTEX_INIT_FAST); pthread_mutex_init(&THR_LOCK_lock,MY_MUTEX_INIT_FAST); pthread_mutex_init(&THR_LOCK_isam,MY_MUTEX_INIT_SLOW); @@ -146,7 +155,6 @@ my_bool my_thread_global_init(void) pthread_mutex_init(&THR_LOCK_heap,MY_MUTEX_INIT_FAST); pthread_mutex_init(&THR_LOCK_net,MY_MUTEX_INIT_FAST); pthread_mutex_init(&THR_LOCK_charset,MY_MUTEX_INIT_FAST); - pthread_mutex_init(&THR_LOCK_threads,MY_MUTEX_INIT_FAST); pthread_mutex_init(&THR_LOCK_time,MY_MUTEX_INIT_FAST); pthread_cond_init(&THR_COND_threads, NULL); #if defined( __WIN__) || defined(OS2) @@ -158,44 +166,64 @@ my_bool my_thread_global_init(void) #ifndef HAVE_GETHOSTBYNAME_R pthread_mutex_init(&LOCK_gethostbyname_r,MY_MUTEX_INIT_SLOW); #endif - if (my_thread_init()) - { - my_thread_global_end(); /* Clean up */ - return 1; - } return 0; } -void my_thread_global_end(void) +/** + Wait for all threads in system to die + @fn my_wait_for_other_threads_to_die() + @param number_of_threads Wait until this number of threads + + @retval 0 Less or equal to number_of_threads left + @retval 1 Wait failed +*/ + +my_bool my_wait_for_other_threads_to_die(uint number_of_threads) { struct timespec abstime; my_bool all_threads_killed= 1; set_timespec(abstime, my_thread_end_wait_time); pthread_mutex_lock(&THR_LOCK_threads); - while (THR_thread_count > 0) + while (THR_thread_count > number_of_threads) { int error= pthread_cond_timedwait(&THR_COND_threads, &THR_LOCK_threads, &abstime); if (error == ETIMEDOUT || error == ETIME) { -#ifdef HAVE_PTHREAD_KILL - /* - We shouldn't give an error here, because if we don't have - pthread_kill(), programs like mysqld can't ensure that all threads - are killed when we enter here. - */ - if (THR_thread_count) - fprintf(stderr, - "Error in my_thread_global_end(): %d threads didn't exit\n", - THR_thread_count); -#endif all_threads_killed= 0; break; } } pthread_mutex_unlock(&THR_LOCK_threads); + return all_threads_killed; +} + + +/** + End the mysys thread system. Called when ending the last thread +*/ + + +void my_thread_global_end(void) +{ + my_bool all_threads_killed; + + if (!(all_threads_killed= my_wait_for_other_threads_to_die(0))) + { +#ifdef HAVE_PTHREAD_KILL + /* + We shouldn't give an error here, because if we don't have + pthread_kill(), programs like mysqld can't ensure that all threads + are killed when we enter here. + */ + if (THR_thread_count) + fprintf(stderr, + "Error in my_thread_global_end(): %d threads didn't exit\n", + THR_thread_count); +#endif + } pthread_key_delete(THR_KEY_mysys); #ifdef PTHREAD_ADAPTIVE_MUTEX_INITIALIZER_NP @@ -204,7 +232,25 @@ void my_thread_global_end(void) #ifdef PTHREAD_ERRORCHECK_MUTEX_INITIALIZER_NP pthread_mutexattr_destroy(&my_errorcheck_mutexattr); #endif - pthread_mutex_destroy(&THR_LOCK_malloc); + if (all_threads_killed) + { + pthread_mutex_destroy(&THR_LOCK_threads); + pthread_cond_destroy(&THR_COND_threads); + pthread_mutex_destroy(&THR_LOCK_malloc); + } +} + +/* Free all mutex used by mysys */ + +void my_thread_destroy_mutex(void) +{ + struct st_my_thread_var *tmp; + tmp= my_pthread_getspecific(struct st_my_thread_var*,THR_KEY_mysys); + if (tmp) + { + safe_mutex_free_deadlock_data(&tmp->mutex); + } + pthread_mutex_destroy(&THR_LOCK_open); pthread_mutex_destroy(&THR_LOCK_lock); pthread_mutex_destroy(&THR_LOCK_isam); @@ -213,11 +259,6 @@ void my_thread_global_end(void) pthread_mutex_destroy(&THR_LOCK_net); pthread_mutex_destroy(&THR_LOCK_time); pthread_mutex_destroy(&THR_LOCK_charset); - if (all_threads_killed) - { - pthread_mutex_destroy(&THR_LOCK_threads); - pthread_cond_destroy(&THR_COND_threads); - } #if !defined(HAVE_LOCALTIME_R) || !defined(HAVE_GMTIME_R) pthread_mutex_destroy(&LOCK_localtime_r); #endif @@ -287,7 +328,8 @@ my_bool my_thread_init(void) #else tmp->pthread_self= pthread_self(); #endif - pthread_mutex_init(&tmp->mutex,MY_MUTEX_INIT_FAST); + my_pthread_mutex_init(&tmp->mutex, MY_MUTEX_INIT_FAST, "mysys_var->mutex", + 0); pthread_cond_init(&tmp->suspend, NULL); tmp->stack_ends_here= &tmp + STACK_DIRECTION * my_thread_stack_size; @@ -330,6 +372,13 @@ void my_thread_end(void) #endif if (tmp && tmp->init) { + +#if !defined(__bsdi__) && !defined(__OpenBSD__) + /* bsdi and openbsd 3.5 dumps core here */ + pthread_cond_destroy(&tmp->suspend); +#endif + pthread_mutex_destroy(&tmp->mutex); + #if !defined(DBUG_OFF) /* tmp->dbug is allocated inside DBUG library */ if (tmp->dbug) @@ -339,12 +388,11 @@ void my_thread_end(void) tmp->dbug=0; } #endif -#if !defined(__bsdi__) && !defined(__OpenBSD__) - /* bsdi and openbsd 3.5 dumps core here */ - pthread_cond_destroy(&tmp->suspend); -#endif - pthread_mutex_destroy(&tmp->mutex); #if !defined(__WIN__) || defined(USE_TLS) +#ifndef DBUG_OFF + /* To find bugs when accessing unallocated data */ + bfill(tmp, sizeof(tmp), 0x8F); +#endif free(tmp); #else tmp->init= 0; @@ -399,6 +447,15 @@ extern void **my_thread_var_dbug() } #endif +/* Return pointer to mutex_in_use */ + +safe_mutex_t **my_thread_var_mutex_in_use() +{ + struct st_my_thread_var *tmp= + my_pthread_getspecific(struct st_my_thread_var*,THR_KEY_mysys); + return tmp ? &tmp->mutex_in_use : 0; +} + /**************************************************************************** Get name of current thread. ****************************************************************************/ diff --git a/mysys/mysys_priv.h b/mysys/mysys_priv.h index 6e0959ae08c..113b64005f2 100644 --- a/mysys/mysys_priv.h +++ b/mysys/mysys_priv.h @@ -33,6 +33,7 @@ extern pthread_mutex_t THR_LOCK_charset, THR_LOCK_time; #include #endif + /* EDQUOT is used only in 3 C files only in mysys/. If it does not exist on system, we set it to some value which can never happen. @@ -42,3 +43,5 @@ extern pthread_mutex_t THR_LOCK_charset, THR_LOCK_time; #endif void my_error_unregister_all(void); +void my_thread_destroy_mutex(void); +my_bool my_wait_for_other_threads_to_die(uint number_of_threads); diff --git a/mysys/thr_mutex.c b/mysys/thr_mutex.c index aa46021a938..ddbe613cdae 100644 --- a/mysys/thr_mutex.c +++ b/mysys/thr_mutex.c @@ -1,4 +1,4 @@ -/* Copyright (C) 2000-2003 MySQL AB +/* Copyright (C) 2000-2008 MySQL AB, 2008 Sun Microsystems, Inc This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -24,6 +24,7 @@ #include "mysys_priv.h" #include "my_static.h" #include +#include #ifndef DO_NOT_REMOVE_THREAD_WRAPPERS /* Remove wrappers */ @@ -34,28 +35,68 @@ #undef pthread_mutex_destroy #undef pthread_cond_wait #undef pthread_cond_timedwait +#undef safe_mutex_free_deadlock_data #ifdef HAVE_NONPOSIX_PTHREAD_MUTEX_INIT -#define pthread_mutex_init(a,b) my_pthread_mutex_init((a),(b)) +#define pthread_mutex_init(a,b) my_pthread_noposix_mutex_init((a),(b)) #endif #endif /* DO_NOT_REMOVE_THREAD_WRAPPERS */ static pthread_mutex_t THR_LOCK_mutex; static ulong safe_mutex_count= 0; /* Number of mutexes created */ +static ulong safe_mutex_id= 0; +my_bool safe_mutex_deadlock_detector= 1; /* On by default */ + #ifdef SAFE_MUTEX_DETECT_DESTROY -static struct st_safe_mutex_info_t *safe_mutex_root= NULL; +static struct st_safe_mutex_create_info_t *safe_mutex_create_root= NULL; #endif +static my_bool add_used_to_locked_mutex(safe_mutex_t *used_mutex, + safe_mutex_deadlock_t *locked_mutex); +static my_bool add_to_locked_mutex(safe_mutex_deadlock_t *locked_mutex, + safe_mutex_t *current_mutex); +static my_bool remove_from_locked_mutex(safe_mutex_t *mp, + safe_mutex_t *delete_mutex); +static my_bool remove_from_used_mutex(safe_mutex_deadlock_t *locked_mutex, + safe_mutex_t *mutex); +static void print_deadlock_warning(safe_mutex_t *new_mutex, + safe_mutex_t *conflicting_mutex); + void safe_mutex_global_init(void) { pthread_mutex_init(&THR_LOCK_mutex,MY_MUTEX_INIT_FAST); + safe_mutex_id= safe_mutex_count= 0; + safe_mutex_deadlock_detector= 1; + +#ifdef SAFE_MUTEX_DETECT_DESTROY + safe_mutex_create_root= 0; +#endif +} + +static inline void remove_from_active_list(safe_mutex_t *mp) +{ + if (!(mp->active_flags & (MYF_NO_DEADLOCK_DETECTION | MYF_TRY_LOCK))) + { + /* Remove mutex from active mutex linked list */ + if (mp->next) + mp->next->prev= mp->prev; + if (mp->prev) + mp->prev->next= mp->next; + else + *my_thread_var_mutex_in_use()= mp->next; + } + mp->prev= mp->next= 0; } int safe_mutex_init(safe_mutex_t *mp, const pthread_mutexattr_t *attr __attribute__((unused)), + const char *name, + myf my_flags, const char *file, - uint line, const char *name) + uint line) { + DBUG_ENTER("safe_mutex_init"); + DBUG_PRINT("enter",("mutex: 0x%lx name: %s", (ulong) mp, name)); bzero((char*) mp,sizeof(*mp)); pthread_mutex_init(&mp->global,MY_MUTEX_INIT_ERRCHK); pthread_mutex_init(&mp->mutex,attr); @@ -65,6 +106,36 @@ int safe_mutex_init(safe_mutex_t *mp, /* Skip the very common '&' prefix from the autogenerated name */ mp->name= name[0] == '&' ? name + 1 : name; + if (safe_mutex_deadlock_detector && !( my_flags & MYF_NO_DEADLOCK_DETECTION)) + { + if (!my_multi_malloc(MY_FAE | MY_WME, + &mp->locked_mutex, sizeof(*mp->locked_mutex), + &mp->used_mutex, sizeof(*mp->used_mutex), NullS)) + { + /* Disable deadlock handling for this mutex */ + my_flags|= MYF_NO_DEADLOCK_DETECTION; + } + else + { + pthread_mutex_lock(&THR_LOCK_mutex); + mp->id= ++safe_mutex_id; + pthread_mutex_unlock(&THR_LOCK_mutex); + hash_init(mp->locked_mutex, &my_charset_bin, + 1000, + offsetof(safe_mutex_deadlock_t, id), + sizeof(mp->id), + 0, 0, HASH_UNIQUE); + hash_init(mp->used_mutex, &my_charset_bin, + 1000, + offsetof(safe_mutex_t, id), + sizeof(mp->id), + 0, 0, HASH_UNIQUE); + } + } + else + my_flags|= MYF_NO_DEADLOCK_DETECTION; + mp->create_flags= my_flags; + #ifdef SAFE_MUTEX_DETECT_DESTROY /* Monitor the freeing of mutexes. This code depends on single thread init @@ -72,7 +143,7 @@ int safe_mutex_init(safe_mutex_t *mp, */ if ((mp->info= (safe_mutex_info_t *) malloc(sizeof(safe_mutex_info_t)))) { - struct st_safe_mutex_info_t *info =mp->info; + struct st_safe_mutex_info_t *info= mp->info; info->init_file= file; info->init_line= line; @@ -80,20 +151,21 @@ int safe_mutex_init(safe_mutex_t *mp, info->next= NULL; pthread_mutex_lock(&THR_LOCK_mutex); - if ((info->next= safe_mutex_root)) - safe_mutex_root->prev= info; - safe_mutex_root= info; + if ((info->next= safe_mutex_create_root)) + safe_mutex_create_root->prev= info; + safe_mutex_create_root= info; safe_mutex_count++; pthread_mutex_unlock(&THR_LOCK_mutex); } #else thread_safe_increment(safe_mutex_count, &THR_LOCK_mutex); #endif /* SAFE_MUTEX_DETECT_DESTROY */ - return 0; + DBUG_RETURN(0); } -int safe_mutex_lock(safe_mutex_t *mp, my_bool try_lock, const char *file, uint line) +int safe_mutex_lock(safe_mutex_t *mp, myf my_flags, const char *file, + uint line) { int error; DBUG_PRINT("mutex", ("%s (0x%lx) locking", mp->name ? mp->name : "Null", @@ -110,12 +182,13 @@ int safe_mutex_lock(safe_mutex_t *mp, my_bool try_lock, const char *file, uint l pthread_mutex_lock(&mp->global); if (mp->count > 0) { - if (try_lock) - { - pthread_mutex_unlock(&mp->global); - return EBUSY; - } - else if (pthread_equal(pthread_self(),mp->thread)) + /* + Check that we are not trying to lock mutex twice. This is an error + even if we are using 'try_lock' as it's not portably what happens + if you lock the mutex many times and this is in any case bad + behaviour that should not be encouraged + */ + if (pthread_equal(pthread_self(),mp->thread)) { fprintf(stderr, "safe_mutex: Trying to lock mutex at %s, line %d, when the" @@ -143,7 +216,7 @@ int safe_mutex_lock(safe_mutex_t *mp, my_bool try_lock, const char *file, uint l instead just return EBUSY, since this is the expected behaviour of trylock(). */ - if (try_lock) + if (my_flags & MYF_TRY_LOCK) { error= pthread_mutex_trylock(&mp->mutex); if (error == EBUSY) @@ -169,7 +242,93 @@ int safe_mutex_lock(safe_mutex_t *mp, my_bool try_lock, const char *file, uint l } mp->file= file; mp->line= line; + mp->active_flags= mp->create_flags | my_flags; pthread_mutex_unlock(&mp->global); + + /* Deadlock detection */ + + mp->prev= mp->next= 0; + if (!(mp->active_flags & (MYF_TRY_LOCK | MYF_NO_DEADLOCK_DETECTION))) + { + safe_mutex_t **mutex_in_use= my_thread_var_mutex_in_use(); + + if (!mutex_in_use) + { + /* thread has not called my_thread_init() */ + mp->active_flags|= MYF_NO_DEADLOCK_DETECTION; + } + else + { + safe_mutex_t *mutex_root; + if ((mutex_root= *mutex_in_use)) /* If not first locked */ + { + /* + Protect locked_mutex against changes if a mutex is deleted + */ + pthread_mutex_lock(&THR_LOCK_mutex); + + if (!hash_search(mutex_root->locked_mutex, (uchar*) &mp->id, 0)) + { + safe_mutex_deadlock_t *deadlock; + safe_mutex_t *mutex; + + /* Create object to store mutex info */ + if (!(deadlock= my_malloc(sizeof(*deadlock), + MYF(MY_ZEROFILL | MY_WME | MY_FAE)))) + goto abort_loop; + deadlock->name= mp->name; + deadlock->id= mp->id; + deadlock->mutex= mp; + /* The following is useful for debugging wrong mutex usage */ + deadlock->file= file; + deadlock->line= line; + + /* Check if potential deadlock */ + mutex= mutex_root; + do + { + if (hash_search(mp->locked_mutex, (uchar*) &mutex->id, 0)) + { + print_deadlock_warning(mp, mutex); + /* Mark wrong usage to avoid future warnings for same error */ + deadlock->warning_only= 1; + add_to_locked_mutex(deadlock, mutex_root); + DBUG_ASSERT(deadlock->count > 0); + goto abort_loop; + } + } + while ((mutex= mutex->next)); + + /* + Copy current mutex and all mutex that has been locked + after current mutex (mp->locked_mutex) to all mutex that + was locked before previous mutex (mutex_root->used_mutex) + + For example if A->B would have been done before and we + are now locking (C) in B->C, then we would add C into + B->locked_mutex and A->locked_mutex + */ + hash_iterate(mutex_root->used_mutex, + (hash_walk_action) add_used_to_locked_mutex, + deadlock); + + /* + Copy all current mutex and all mutex locked after current one + into the prev mutex + */ + add_used_to_locked_mutex(mutex_root, deadlock); + DBUG_ASSERT(deadlock->count > 0); + } + abort_loop: + pthread_mutex_unlock(&THR_LOCK_mutex); + } + /* Link mutex into mutex_in_use list */ + if ((mp->next= *mutex_in_use)) + (*mutex_in_use)->prev= mp; + *mutex_in_use= mp; + } + } + DBUG_PRINT("mutex", ("%s (0x%lx) locked", mp->name, (ulong) mp)); return error; } @@ -182,7 +341,9 @@ int safe_mutex_unlock(safe_mutex_t *mp,const char *file, uint line) pthread_mutex_lock(&mp->global); if (mp->count == 0) { - fprintf(stderr,"safe_mutex: Trying to unlock mutex %s that wasn't locked at %s, line %d\n" + fprintf(stderr, + "safe_mutex: Trying to unlock mutex %s that wasn't locked at " + "%s, line %d\n" "Last used at %s, line: %d\n", mp->name ? mp->name : "Null", file, line, mp->file ? mp->file : "Null", mp->line); @@ -191,7 +352,9 @@ int safe_mutex_unlock(safe_mutex_t *mp,const char *file, uint line) } if (!pthread_equal(pthread_self(),mp->thread)) { - fprintf(stderr,"safe_mutex: Trying to unlock mutex %s at %s, line %d that was locked by " + fprintf(stderr, + "safe_mutex: Trying to unlock mutex %s at %s, line %d that was " + "locked by " "another thread at: %s, line: %d\n", mp->name, file, line, mp->file, mp->line); fflush(stderr); @@ -199,6 +362,9 @@ int safe_mutex_unlock(safe_mutex_t *mp,const char *file, uint line) } mp->thread= 0; mp->count--; + + remove_from_active_list(mp); + #ifdef __WIN__ pthread_mutex_unlock(&mp->mutex); error=0; @@ -206,8 +372,9 @@ int safe_mutex_unlock(safe_mutex_t *mp,const char *file, uint line) error=pthread_mutex_unlock(&mp->mutex); if (error) { - fprintf(stderr,"safe_mutex: Got error: %d (%d) when trying to unlock mutex %s at %s, " - "line %d\n", error, errno, mp->name, file, line); + fprintf(stderr, + "safe_mutex: Got error: %d (%d) when trying to unlock mutex " + "%s at %s, line %d\n", error, errno, mp->name, file, line); fflush(stderr); abort(); } @@ -221,18 +388,23 @@ int safe_cond_wait(pthread_cond_t *cond, safe_mutex_t *mp, const char *file, uint line) { int error; + safe_mutex_t save_state; + pthread_mutex_lock(&mp->global); if (mp->count == 0) { - fprintf(stderr,"safe_mutex: Trying to cond_wait on a unlocked mutex %s at %s, line %d\n", + fprintf(stderr, + "safe_mutex: Trying to cond_wait on a unlocked mutex %s at %s, " + "line %d\n", mp->name ? mp->name : "Null", file, line); fflush(stderr); abort(); } if (!pthread_equal(pthread_self(),mp->thread)) { - fprintf(stderr,"safe_mutex: Trying to cond_wait on a mutex %s at %s, line %d that was " - "locked by another thread at: %s, line: %d\n", + fprintf(stderr, + "safe_mutex: Trying to cond_wait on a mutex %s at %s, line %d " + "that was locked by another thread at: %s, line: %d\n", mp->name, file, line, mp->file, mp->line); fflush(stderr); abort(); @@ -240,26 +412,37 @@ int safe_cond_wait(pthread_cond_t *cond, safe_mutex_t *mp, const char *file, if (mp->count-- != 1) { - fprintf(stderr,"safe_mutex: Count was %d on locked mutex %s at %s, line %d\n", + fprintf(stderr, + "safe_mutex: Count was %d on locked mutex %s at %s, line %d\n", mp->count+1, mp->name, file, line); fflush(stderr); abort(); } + save_state= *mp; + remove_from_active_list(mp); pthread_mutex_unlock(&mp->global); error=pthread_cond_wait(cond,&mp->mutex); pthread_mutex_lock(&mp->global); + if (error) { - fprintf(stderr,"safe_mutex: Got error: %d (%d) when doing a safe_mutex_wait on %s at %s, " - "line %d\n", error, errno, mp->name, file, line); + fprintf(stderr, + "safe_mutex: Got error: %d (%d) when doing a safe_mutex_wait on " + "%s at %s, line %d\n", error, errno, mp->name, file, line); fflush(stderr); abort(); } - mp->thread=pthread_self(); + /* Restore state as it was before */ + mp->thread= save_state.thread; + mp->active_flags= save_state.active_flags; + mp->next= save_state.next; + mp->prev= save_state.prev; + if (mp->count++) { fprintf(stderr, - "safe_mutex: Count was %d in thread 0x%lx when locking mutex %s at %s, line %d\n", + "safe_mutex: Count was %d in thread 0x%lx when locking mutex %s " + "at %s, line %d\n", mp->count-1, my_thread_dbug_id(), mp->name, file, line); fflush(stderr); abort(); @@ -276,33 +459,44 @@ int safe_cond_timedwait(pthread_cond_t *cond, safe_mutex_t *mp, const char *file, uint line) { int error; + safe_mutex_t save_state; + pthread_mutex_lock(&mp->global); if (mp->count != 1 || !pthread_equal(pthread_self(),mp->thread)) { - fprintf(stderr,"safe_mutex: Trying to cond_wait at %s, line %d on a not hold mutex %s\n", + fprintf(stderr, + "safe_mutex: Trying to cond_wait at %s, line %d on a not hold " + "mutex %s\n", file, line, mp->name ? mp->name : "Null"); fflush(stderr); abort(); } mp->count--; /* Mutex will be released */ + save_state= *mp; + remove_from_active_list(mp); pthread_mutex_unlock(&mp->global); error=pthread_cond_timedwait(cond,&mp->mutex,abstime); #ifdef EXTRA_DEBUG if (error && (error != EINTR && error != ETIMEDOUT && error != ETIME)) { fprintf(stderr, - "safe_mutex: Got error: %d (%d) when doing a safe_mutex_timedwait on %s at %s, " - "line %d\n", + "safe_mutex: Got error: %d (%d) when doing a safe_mutex_timedwait " + "on %s at %s, line %d\n", error, errno, mp->name, file, line); } #endif pthread_mutex_lock(&mp->global); - mp->thread=pthread_self(); + /* Restore state as it was before */ + mp->thread= save_state.thread; + mp->active_flags= save_state.active_flags; + mp->next= save_state.next; + mp->prev= save_state.prev; + if (mp->count++) { fprintf(stderr, - "safe_mutex: Count was %d in thread 0x%lx when locking mutex %s at %s, line %d " - "(error: %d (%d))\n", + "safe_mutex: Count was %d in thread 0x%lx when locking mutex " + "%s at %s, line %d (error: %d (%d))\n", mp->count-1, my_thread_dbug_id(), mp->name, file, line, error, error); fflush(stderr); @@ -318,6 +512,8 @@ int safe_cond_timedwait(pthread_cond_t *cond, safe_mutex_t *mp, int safe_mutex_destroy(safe_mutex_t *mp, const char *file, uint line) { int error=0; + DBUG_ENTER("safe_mutex_destroy"); + DBUG_PRINT("enter", ("mutex: 0x%lx name: %s", (ulong) mp, mp->name)); if (!mp->file) { fprintf(stderr, @@ -328,12 +524,17 @@ int safe_mutex_destroy(safe_mutex_t *mp, const char *file, uint line) } if (mp->count != 0) { - fprintf(stderr,"safe_mutex: Trying to destroy a mutex %s that was locked at %s, " + fprintf(stderr, + "safe_mutex: Trying to destroy a mutex %s that was locked at %s, " "line %d at %s, line %d\n", mp->name, mp->file, mp->line, file, line); fflush(stderr); abort(); } + + /* Free all entries that points to this one */ + safe_mutex_free_deadlock_data(mp); + #ifdef __WIN__ pthread_mutex_destroy(&mp->global); pthread_mutex_destroy(&mp->mutex); @@ -354,7 +555,7 @@ int safe_mutex_destroy(safe_mutex_t *mp, const char *file, uint line) if (info->prev) info->prev->next = info->next; else - safe_mutex_root = info->next; + safe_mutex_create_root = info->next; if (info->next) info->next->prev = info->prev; safe_mutex_count--; @@ -366,10 +567,36 @@ int safe_mutex_destroy(safe_mutex_t *mp, const char *file, uint line) #else thread_safe_sub(safe_mutex_count, 1, &THR_LOCK_mutex); #endif /* SAFE_MUTEX_DETECT_DESTROY */ - return error; + DBUG_RETURN(error); } +/** + Free all data related to deadlock detection + + This is also useful together with safemalloc when you don't want to + have reports of not freed memory for mysys mutexes. +*/ + +void safe_mutex_free_deadlock_data(safe_mutex_t *mp) +{ + /* Free all entries that points to this one */ + if (!(mp->create_flags & MYF_NO_DEADLOCK_DETECTION)) + { + pthread_mutex_lock(&THR_LOCK_mutex); + hash_iterate(mp->used_mutex, (hash_walk_action) remove_from_locked_mutex, + mp); + hash_iterate(mp->locked_mutex, (hash_walk_action) remove_from_used_mutex, + mp); + pthread_mutex_unlock(&THR_LOCK_mutex); + + hash_free(mp->used_mutex); + hash_free(mp->locked_mutex); + my_free(mp->locked_mutex, 0); + mp->create_flags|= MYF_NO_DEADLOCK_DETECTION; + } +} + /* Free global resources and check that all mutex has been destroyed @@ -400,7 +627,7 @@ void safe_mutex_end(FILE *file __attribute__((unused))) } { struct st_safe_mutex_info_t *ptr; - for (ptr= safe_mutex_root ; ptr ; ptr= ptr->next) + for (ptr= safe_mutex_create_root ; ptr ; ptr= ptr->next) { fprintf(file, "\tMutex %s initiated at line %4u in '%s'\n", ptr->name, ptr->init_line, ptr->init_file); @@ -410,6 +637,127 @@ void safe_mutex_end(FILE *file __attribute__((unused))) #endif /* SAFE_MUTEX_DETECT_DESTROY */ } + +static my_bool add_used_to_locked_mutex(safe_mutex_t *used_mutex, + safe_mutex_deadlock_t *locked_mutex) +{ + /* Add mutex to all parent of the current mutex */ + if (!locked_mutex->warning_only) + { + (void) hash_iterate(locked_mutex->mutex->locked_mutex, + (hash_walk_action) add_to_locked_mutex, + used_mutex); + /* mark that locked_mutex is locked after used_mutex */ + (void) add_to_locked_mutex(locked_mutex, used_mutex); + } + return 0; +} + + +/** + register that locked_mutex was locked after current_mutex +*/ + +static my_bool add_to_locked_mutex(safe_mutex_deadlock_t *locked_mutex, + safe_mutex_t *current_mutex) +{ + DBUG_ENTER("add_to_locked_mutex"); + DBUG_PRINT("info", ("inserting 0x%lx into 0x%lx (id: %lu -> %lu)", + (ulong) locked_mutex, (long) current_mutex, + locked_mutex->id, current_mutex->id)); + if (my_hash_insert(current_mutex->locked_mutex, (uchar*) locked_mutex)) + { + /* Got mutex through two paths; ignore */ + DBUG_RETURN(0); + } + locked_mutex->count++; + if (my_hash_insert(locked_mutex->mutex->used_mutex, + (uchar*) current_mutex)) + { + DBUG_ASSERT(0); + } + DBUG_RETURN(0); +} + + +/** + Remove mutex from the locked mutex hash + @fn remove_from_used_mutex() + @param mp Mutex that has delete_mutex in it's locked_mutex hash + @param delete_mutex Mutex should be removed from the hash + + @notes + safe_mutex_deadlock_t entries in the locked hash are shared. + When counter goes to 0, we delete the safe_mutex_deadlock_t entry. +*/ + +static my_bool remove_from_locked_mutex(safe_mutex_t *mp, + safe_mutex_t *delete_mutex) +{ + safe_mutex_deadlock_t *found; + DBUG_ENTER("remove_from_locked_mutex"); + DBUG_PRINT("enter", ("delete_mutex: 0x%lx mutex: 0x%lx (id: %lu <- %lu)", + (ulong) delete_mutex, (ulong) mp, + delete_mutex->id, mp->id)); + + found= (safe_mutex_deadlock_t *) hash_search(mp->locked_mutex, + (uchar*) &delete_mutex->id, 0); + DBUG_ASSERT(found); + if (found) + { + if (hash_delete(mp->locked_mutex, (uchar*) found)) + { + DBUG_ASSERT(0); + } + if (!--found->count) + my_free(found, MYF(0)); + } + DBUG_RETURN(0); +} + +static my_bool remove_from_used_mutex(safe_mutex_deadlock_t *locked_mutex, + safe_mutex_t *mutex) +{ + DBUG_ENTER("remove_from_used_mutex"); + DBUG_PRINT("enter", ("delete_mutex: 0x%lx mutex: 0x%lx (id: %lu <- %lu)", + (ulong) mutex, (ulong) locked_mutex, + mutex->id, locked_mutex->id)); + if (hash_delete(locked_mutex->mutex->used_mutex, (uchar*) mutex)) + { + DBUG_ASSERT(0); + } + if (!--locked_mutex->count) + my_free(locked_mutex, MYF(0)); + DBUG_RETURN(0); +} + + +static void print_deadlock_warning(safe_mutex_t *new_mutex, + safe_mutex_t *parent_mutex) +{ + safe_mutex_t *mutex_root; + DBUG_ENTER("print_deadlock_warning"); + DBUG_PRINT("enter", ("mutex: %s parent: %s", + new_mutex->name, parent_mutex->name)); + + fprintf(stderr, "safe_mutex: Found wrong usage of mutex " + "'%s' and '%s'\n", + parent_mutex->name, new_mutex->name); + fprintf(stderr, "Mutex currently locked (in reverse order):\n"); + fprintf(stderr, "%-32.32s %s line %u\n", new_mutex->name, new_mutex->file, + new_mutex->line); + for (mutex_root= *my_thread_var_mutex_in_use() ; + mutex_root; + mutex_root= mutex_root->next) + { + fprintf(stderr, "%-32.32s %s line %u\n", mutex_root->name, + mutex_root->file, mutex_root->line); + } + fflush(stderr); + DBUG_VOID_RETURN; +} + + #endif /* THREAD && SAFE_MUTEX */ #if defined(THREAD) && defined(MY_PTHREAD_FASTMUTEX) && !defined(SAFE_MUTEX) -- cgit v1.2.1 From ea7cb6c2735a8ecfc380b67c437b2ead6608d765 Mon Sep 17 00:00:00 2001 From: Michael Widenius Date: Wed, 3 Dec 2008 00:09:37 +0200 Subject: Add missing file: Testing of mutex-wrong-usage-detector --- mysys/test_thr_mutex.c | 162 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 162 insertions(+) create mode 100644 mysys/test_thr_mutex.c (limited to 'mysys') diff --git a/mysys/test_thr_mutex.c b/mysys/test_thr_mutex.c new file mode 100644 index 00000000000..0bd14a0d31b --- /dev/null +++ b/mysys/test_thr_mutex.c @@ -0,0 +1,162 @@ +/* Copyright (C) 2008 Sun Microsystems, Inc + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +/* Testing of deadlock detector */ + +#include +#include + + +int main(int argc __attribute__((unused)), char** argv) +{ + pthread_mutex_t LOCK_A, LOCK_B, LOCK_C, LOCK_D, LOCK_E, LOCK_F, LOCK_G; + pthread_mutex_t LOCK_H, LOCK_I; + MY_INIT(argv[0]); + DBUG_ENTER("main"); + + DBUG_PUSH("d:t:O,/tmp/trace"); + printf("This program is testing the mutex deadlock detection.\n" + "It should print out different failures of wrong mutex usage" + "on stderr\n\n"); + + safe_mutex_deadlock_detector= 1; + pthread_mutex_init(&LOCK_A, MY_MUTEX_INIT_FAST); + pthread_mutex_init(&LOCK_B, MY_MUTEX_INIT_FAST); + pthread_mutex_init(&LOCK_C, MY_MUTEX_INIT_FAST); + pthread_mutex_init(&LOCK_D, MY_MUTEX_INIT_FAST); + pthread_mutex_init(&LOCK_E, MY_MUTEX_INIT_FAST); + pthread_mutex_init(&LOCK_F, MY_MUTEX_INIT_FAST); + pthread_mutex_init(&LOCK_G, MY_MUTEX_INIT_FAST); + pthread_mutex_init(&LOCK_H, MY_MUTEX_INIT_FAST); + pthread_mutex_init(&LOCK_I, MY_MUTEX_INIT_FAST); + + printf("Testing A->B and B->A\n"); + fflush(stdout); + pthread_mutex_lock(&LOCK_A); + pthread_mutex_lock(&LOCK_B); + pthread_mutex_unlock(&LOCK_A); + pthread_mutex_unlock(&LOCK_B); + + /* Test different (wrong) lock order */ + pthread_mutex_lock(&LOCK_B); + pthread_mutex_lock(&LOCK_A); /* Should give warning */ + + pthread_mutex_unlock(&LOCK_A); + pthread_mutex_unlock(&LOCK_B); + + /* Check that we don't get another warning for same lock */ + printf("Testing A->B and B->A again (should not give a warning)\n"); + pthread_mutex_lock(&LOCK_B); + pthread_mutex_lock(&LOCK_A); + pthread_mutex_unlock(&LOCK_A); + pthread_mutex_unlock(&LOCK_B); + + /* + Test of ring with many mutex + We also unlock mutex in different orders to get the unlock code properly + tested. + */ + printf("Testing A->C and C->D and D->A\n"); + pthread_mutex_lock(&LOCK_A); + pthread_mutex_lock(&LOCK_C); + pthread_mutex_unlock(&LOCK_A); + pthread_mutex_unlock(&LOCK_C); + pthread_mutex_lock(&LOCK_C); + pthread_mutex_lock(&LOCK_D); + pthread_mutex_unlock(&LOCK_D); + pthread_mutex_unlock(&LOCK_C); + + pthread_mutex_lock(&LOCK_D); + pthread_mutex_lock(&LOCK_A); /* Should give warning */ + + pthread_mutex_unlock(&LOCK_A); + pthread_mutex_unlock(&LOCK_D); + + printf("Testing E -> F ; H -> I ; F -> H ; H -> I -> E\n"); + fflush(stdout); + + pthread_mutex_lock(&LOCK_E); + pthread_mutex_lock(&LOCK_F); + pthread_mutex_unlock(&LOCK_E); + pthread_mutex_unlock(&LOCK_F); + pthread_mutex_lock(&LOCK_H); + pthread_mutex_lock(&LOCK_I); + pthread_mutex_unlock(&LOCK_I); + pthread_mutex_unlock(&LOCK_H); + pthread_mutex_lock(&LOCK_F); + pthread_mutex_lock(&LOCK_H); + pthread_mutex_unlock(&LOCK_H); + pthread_mutex_unlock(&LOCK_F); + + pthread_mutex_lock(&LOCK_H); + pthread_mutex_lock(&LOCK_I); + pthread_mutex_lock(&LOCK_E); /* Should give warning */ + + pthread_mutex_unlock(&LOCK_E); + pthread_mutex_unlock(&LOCK_I); + pthread_mutex_unlock(&LOCK_H); + + printf("\nFollowing shouldn't give any warnings\n"); + printf("Testing A->B and B->A without deadlock detection\n"); + fflush(stdout); + + /* Reinitialize mutex to get rid of old wrong usage markers */ + pthread_mutex_destroy(&LOCK_A); + pthread_mutex_destroy(&LOCK_B); + pthread_mutex_init(&LOCK_A, MY_MUTEX_INIT_FAST); + pthread_mutex_init(&LOCK_B, MY_MUTEX_INIT_FAST); + + /* Start testing */ + my_pthread_mutex_lock(&LOCK_A, MYF(MYF_NO_DEADLOCK_DETECTION)); + pthread_mutex_lock(&LOCK_B); + pthread_mutex_unlock(&LOCK_A); + pthread_mutex_unlock(&LOCK_B); + + pthread_mutex_lock(&LOCK_A); + my_pthread_mutex_lock(&LOCK_B, MYF(MYF_NO_DEADLOCK_DETECTION)); + pthread_mutex_unlock(&LOCK_A); + pthread_mutex_unlock(&LOCK_B); + + printf("Testing A -> C ; B -> C ; A->B\n"); + fflush(stdout); + pthread_mutex_lock(&LOCK_A); + pthread_mutex_lock(&LOCK_C); + pthread_mutex_unlock(&LOCK_C); + pthread_mutex_unlock(&LOCK_A); + + pthread_mutex_lock(&LOCK_B); + pthread_mutex_lock(&LOCK_C); + pthread_mutex_unlock(&LOCK_C); + pthread_mutex_unlock(&LOCK_B); + + pthread_mutex_lock(&LOCK_A); + pthread_mutex_lock(&LOCK_B); + pthread_mutex_unlock(&LOCK_B); + pthread_mutex_unlock(&LOCK_A); + + /* Cleanup */ + pthread_mutex_destroy(&LOCK_A); + pthread_mutex_destroy(&LOCK_B); + pthread_mutex_destroy(&LOCK_C); + pthread_mutex_destroy(&LOCK_D); + pthread_mutex_destroy(&LOCK_E); + pthread_mutex_destroy(&LOCK_F); + pthread_mutex_destroy(&LOCK_G); + pthread_mutex_destroy(&LOCK_H); + pthread_mutex_destroy(&LOCK_I); + + my_end(MY_DONT_FREE_DBUG); + exit(0); +} -- cgit v1.2.1 From d83f6470243057fdd542bed0410dcabe09766713 Mon Sep 17 00:00:00 2001 From: Michael Widenius Date: Thu, 4 Dec 2008 02:36:55 +0200 Subject: Fixed warnings and errors discovered by pushbuild2 mysys/my_init.c: Fixed link error when compiling without thread support sql/item_create.cc: Fixed compiler warning sql/mysqld.cc: Fixed compile error on windows sql/protocol.cc: Fixed compiler warning sql/sql_class.cc: Fixed compiler warning sql/sql_class.h: Fixed compiler warning storage/myisam/mi_open.c: Fixed compiler warning storage/myisammrg/ha_myisammrg.cc: Fixed compiler warning (shadow variable) --- mysys/my_init.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'mysys') diff --git a/mysys/my_init.c b/mysys/my_init.c index 453e62b19bb..6d79472edfa 100644 --- a/mysys/my_init.c +++ b/mysys/my_init.c @@ -199,7 +199,9 @@ Voluntary context switches %ld, Involuntary context switches %ld\n", #endif #if defined(SAFEMALLOC) /* Wait for other threads to free mysys_var */ +#ifdef THREAD (void) my_wait_for_other_threads_to_die(1); +#endif TERMINATE(stderr, (infoflag & MY_GIVE_INFO) != 0); #elif defined(__WIN__) && defined(_MSC_VER) _CrtSetReportMode( _CRT_WARN, _CRTDBG_MODE_FILE ); -- cgit v1.2.1 From 86fcfb15083409bbf7138d713e45affd00e34dac Mon Sep 17 00:00:00 2001 From: Michael Widenius Date: Mon, 22 Dec 2008 02:17:37 +0200 Subject: Fix for Bug#40311 Assert in MARIA_RECORD_POS during pushbuild 2 test: Fixed bug when removing a newly inserted record (in case of duplicate key). The bug caused a crash for rows with several blobs and the first blob was small enough to fit into the head page. Don't change state_history if nothing changed (speed optimization that also simplifies logic). Reset state_history if we added/deleted or updated rows without versioning. Fixed wrong test in trnman_exists_active_transactions() if state is visible or not. Other bugs fixed: Fixed wrong argument to (lock->get_status) when we had to wait for TL_WRITE_CONCURRENT_INSERT. Item_equal::update_used_tables() didn't calculate const_item_cache properly. Added assert's to detect if join_read_const_table() was called under wrong assumptions.. Fixed that _ma_setup_live_state() is called from thr_lock() instead of handler::external_lock(). This was needed to get versioning information to be setup correctly. Fixed error in debug binaries during a call to _ma_check_table_is_closed() when another thread was opening/closing a table. Fixed wrong test when finding right history_state to use. mysql-test/suite/maria/r/maria.result: Added test for Bug#40311 Assert in MARIA_RECORD_POS during pushbuild 2 test mysql-test/suite/maria/t/maria.test: Added test for Bug#40311 Assert in MARIA_RECORD_POS during pushbuild 2 test mysys/thr_lock.c: Fixed wrong argument to (lock->get_status) when we had to wait for TL_WRITE_CONCURRENT_INSERT sql/item_cmpfunc.cc: Item_equal::update_used_tables() didn't calculate const_item_cache properly, which later caused a wrong result for item->const_item() sql/sql_base.cc: In debug mode, Initilize record buffer with unexpected data to catch usage of uninitialized memory sql/sql_select.cc: Fixed indentation Added assert's to detect if join_read_const_table() was called under wrong assumptions. One assert() is disabled for now as Item_equal() doesn't behave as expected. storage/maria/ha_maria.cc: Move calling to _ma_setup_live_state() to ma_state.c::_ma_block_get_status() This was needed as _ma_setup_live_state() needed to know if the table will be used concurrently or not storage/maria/ma_blockrec.c: Fixed bug when removing a newly inserted record (in case of duplicate key). The bug caused a crash for rows with several blobs and the first blob was small enough to fit into the head page. storage/maria/ma_dbug.c: Added mutex to protect the open table list during _ma_check_table_is_closed(). Without the protection we could get a error in debug binaries during a call to _ma_check_table_is_closed() storage/maria/ma_delete_table.c: Removed not used code storage/maria/ma_rename.c: Removed not used code storage/maria/ma_state.c: Fixed wrong test when finding right history_state to use Mark in tables->state_current.no_transid if we are using transid's or not. Don't change state_history if nothing changed (speed optimization that also simplifies logic) Reset state_history if we added/deleted or updated rows without versioning. More DBUG_ASSERT's and more DBUG Updated maria_versioning() to initialize environment before calling _ma_blok_get_status(). This was needed because of the new logic in _ma_block_get_status() storage/maria/ma_state.h: Added flags to detect if table changed and/or if we changed table without versioning storage/maria/ma_write.c: Simple cleanups (No logic changes) storage/maria/trnman.c: Fixed wrong test in trnman_exists_active_transactions() if state is visible or not. --- mysys/thr_lock.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'mysys') diff --git a/mysys/thr_lock.c b/mysys/thr_lock.c index 97700f77e3f..a9ff1b05881 100644 --- a/mysys/thr_lock.c +++ b/mysys/thr_lock.c @@ -494,7 +494,8 @@ wait_for_lock(struct st_lock_list *wait, THR_LOCK_DATA *data, { result= THR_LOCK_SUCCESS; if (data->lock->get_status) - (*data->lock->get_status)(data->status_param, 0); + (*data->lock->get_status)(data->status_param, + data->type == TL_WRITE_CONCURRENT_INSERT); check_locks(data->lock,"got wait_for_lock",0); } pthread_mutex_unlock(&data->lock->mutex); -- cgit v1.2.1 From c45bf1b3cc1dd241f26210ebdef836566124bef0 Mon Sep 17 00:00:00 2001 From: Sergei Golubchik Date: Wed, 7 Jan 2009 21:50:11 +0100 Subject: Bug#40990 Maria: failure of maria.test & maria_notemebedded in deadlock detection detect a case when a blocker has removed itself and signalled after the condition timed out but before it (cond_wait) acquired the mutex back --- mysys/waiting_threads.c | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) (limited to 'mysys') diff --git a/mysys/waiting_threads.c b/mysys/waiting_threads.c index 14b1d639d00..f2dff238b46 100644 --- a/mysys/waiting_threads.c +++ b/mysys/waiting_threads.c @@ -613,7 +613,24 @@ static int deadlock(WT_THD *thd, WT_THD *blocker, uint depth, if (ret == WT_DEADLOCK && depth) change_victim(blocker, &arg); if (arg.rc) + { + /* + Special return code if there's nobody to wait for. + + depth == 0 means that we start the search from thd (thd == blocker). + ret == WT_OK means that no cycle was found and arg.rc == thd->waiting_for. + and arg.rc->owners.elements == 0 means that (applying the rule above) + thd->waiting_for->owners.elements == 0, and thd doesn't have anybody to + wait for. + */ + if (depth == 0 && ret == WT_OK && arg.rc->owners.elements == 0) + { + DBUG_ASSERT(thd == blocker); + DBUG_ASSERT(arg.rc == thd->waiting_for); + ret= WT_FREE_TO_GO; + } rc_unlock(arg.rc); + } /* notify the victim, if appropriate */ if (ret == WT_DEADLOCK && arg.victim != thd) { @@ -888,7 +905,10 @@ int wt_thd_cond_timedwait(WT_THD *thd, pthread_mutex_t *mutex) ret= pthread_cond_timedwait(&rc->cond, mutex, &timeout); if (ret == WT_TIMEOUT) { - if (deadlock(thd, thd, 0, *thd->deadlock_search_depth_long)) + int r= deadlock(thd, thd, 0, *thd->deadlock_search_depth_long); + if (r == WT_FREE_TO_GO) + ret= WT_OK; + else if (r != WT_OK) ret= WT_DEADLOCK; else if (*thd->timeout_long > *thd->timeout_short) { -- cgit v1.2.1 From 9c96fde1206f254d0dd25dbe2cc1706c44e4bdea Mon Sep 17 00:00:00 2001 From: Sergei Golubchik Date: Thu, 15 Jan 2009 22:27:36 +0100 Subject: post-review fixes include/atomic/generic-msvc.h: prevent possible compiler warnings include/lf.h: comments, better definition for LF_HASH_OVERHEAD include/maria.h: define MARIA_CANNOT_ROLLBACK here include/my_pthread.h: avoid possible name clash include/waiting_threads.h: comments, const, move WT_RESOURCE to waiting_threads.c mysql-test/suite/maria/r/maria_notembedded.result: new test mysql-test/suite/maria/t/maria_notembedded.test: new test - 5-way deadlock mysys/lf_hash.c: better definition for LF_HASH_OVERHEAD mysys/my_static.c: comment mysys/my_thr_init.c: casts mysys/waiting_threads.c: comments, asserts, etc server-tools/instance-manager/parse.cc: fix my_init_dynamic_array() to follow new calling conventions sql/mysqld.cc: call wt_init after set_proper_floating_point_mode sql/sql_class.h: comment storage/maria/ha_maria.cc: move MARIA_CANNOT_ROLLBACK to a common header storage/maria/ma_commit.c: comment storage/maria/ma_write.c: comments, check for HA_ERR_FOUND_DUPP_KEY storage/maria/trnman.c: comments, assert storage/maria/trnman.h: comments storage/maria/unittest/trnman-t.c: be paranoid unittest/mysys/lf-t.c: comments unittest/mysys/waiting_threads-t.c: comments, safety, memory leak --- mysys/lf_alloc-pin.c | 4 +- mysys/lf_hash.c | 5 +- mysys/my_static.c | 3 +- mysys/my_thr_init.c | 5 +- mysys/waiting_threads.c | 509 ++++++++++++++++++++++++++++++++---------------- 5 files changed, 351 insertions(+), 175 deletions(-) (limited to 'mysys') diff --git a/mysys/lf_alloc-pin.c b/mysys/lf_alloc-pin.c index 7c3e3785b68..0293bfc6faf 100644 --- a/mysys/lf_alloc-pin.c +++ b/mysys/lf_alloc-pin.c @@ -1,5 +1,5 @@ /* QQ: TODO multi-pinbox */ -/* Copyright (C) 2006 MySQL AB +/* Copyright (C) 2006-2008 MySQL AB, 2008-2009 Sun Microsystems, Inc. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -330,7 +330,7 @@ static void _lf_pinbox_real_free(LF_PINS *pins) { int npins, alloca_size; void *list, **addr; - uchar *first, *last= NULL; + void *first, *last= NULL; LF_PINBOX *pinbox= pins->pinbox; LINT_INIT(first); diff --git a/mysys/lf_hash.c b/mysys/lf_hash.c index 96ae3f338ab..ce7056af995 100644 --- a/mysys/lf_hash.c +++ b/mysys/lf_hash.c @@ -1,4 +1,4 @@ -/* Copyright (C) 2006 MySQL AB +/* Copyright (C) 2006-2008 MySQL AB, 2008-2009 Sun Microsystems, Inc. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -42,6 +42,8 @@ typedef struct { */ } LF_SLIST; +const int LF_HASH_OVERHEAD= sizeof(LF_SLIST); + /* a structure to pass the context (pointers two the three successive elements in a list) from lfind to linsert/ldelete @@ -315,7 +317,6 @@ void lf_hash_init(LF_HASH *hash, uint element_size, uint flags, uint key_offset, uint key_length, hash_get_key get_key, CHARSET_INFO *charset) { - compile_time_assert(sizeof(LF_SLIST) == LF_HASH_OVERHEAD); lf_alloc_init(&hash->alloc, sizeof(LF_SLIST)+element_size, offsetof(LF_SLIST, key)); lf_dynarray_init(&hash->array, sizeof(LF_SLIST *)); diff --git a/mysys/my_static.c b/mysys/my_static.c index 04bda8d2dcc..c33d05420c9 100644 --- a/mysys/my_static.c +++ b/mysys/my_static.c @@ -1,4 +1,4 @@ -/* Copyright (C) 2000 MySQL AB +/* Copyright (C) 2000-2008 MySQL AB, 2008-2009 Sun Microsystems, Inc. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -101,6 +101,7 @@ static const char *proc_info_dummy(void *a __attribute__((unused)), return 0; } +/* this is to be able to call set_thd_proc_info from the C code */ const char *(*proc_info_hook)(void *, const char *, const char *, const char *, const unsigned int)= proc_info_dummy; diff --git a/mysys/my_thr_init.c b/mysys/my_thr_init.c index 3f08ac69b63..3f4c4a4d638 100644 --- a/mysys/my_thr_init.c +++ b/mysys/my_thr_init.c @@ -1,4 +1,4 @@ -/* Copyright (C) 2000 MySQL AB +/* Copyright (C) 2000-2008 MySQL AB, 2008-2009 Sun Microsystems, Inc. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -332,7 +332,8 @@ my_bool my_thread_init(void) 0); pthread_cond_init(&tmp->suspend, NULL); - tmp->stack_ends_here= &tmp + STACK_DIRECTION * my_thread_stack_size; + tmp->stack_ends_here= (char*)&tmp + + STACK_DIRECTION * (long)my_thread_stack_size; pthread_mutex_lock(&THR_LOCK_threads); tmp->id= ++thread_id; diff --git a/mysys/waiting_threads.c b/mysys/waiting_threads.c index f2dff238b46..5b99a5ceeba 100644 --- a/mysys/waiting_threads.c +++ b/mysys/waiting_threads.c @@ -1,4 +1,4 @@ -/* Copyright (C) 2008 MySQL AB +/* Copyright (C) 2008 MySQL AB, 2008-2009 Sun Microsystems, Inc. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -13,74 +13,134 @@ along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ -/* +/** + @file + "waiting threads" subsystem - a unified interface for threads to wait on each other, with built-in deadlock detection. Main concepts ^^^^^^^^^^^^^ - a thread - is represented by a WT_THD structure. One physical thread - can have only one WT_THD descriptor. + a thread - is represented by a WT_THD structure. One physical thread + can have only one WT_THD descriptor at any given moment. - a resource - a thread does not wait for other threads directly, - instead it waits for a "resource", which is "owned" by other threads. - It waits, exactly, for all "owners" to "release" a resource. - It does not have to correspond to a physical resource. For example, it - may be convenient in certain cases to force resource == thread. - A resource is represented by a WT_RESOURCE structure. + a resource - a thread does not wait for other threads directly, + instead it waits for a "resource", which is "owned" by other threads. + It waits, exactly, for all "owners" to "release" a resource. + It does not have to correspond to a physical resource. For example, it + may be convenient in certain cases to force resource == thread. + A resource is represented by a WT_RESOURCE structure. - a resource identifier - a pair of {resource type, value}. A value is - an ulonglong number. Represented by a WT_RESOURCE_ID structure. + a resource identifier - a pair of {resource type, value}. A value is + an ulonglong number. Represented by a WT_RESOURCE_ID structure. - a resource type - a pointer to a statically defined instance of + a resource type - a pointer to a statically defined instance of WT_RESOURCE_TYPE structure. This structure contains a pointer to a function that knows how to compare values of this resource type. In the simple case it could be wt_resource_id_memcmp(). - Usage - ^^^^^ - to use the interface one needs to use this thread's WT_THD, - call wt_thd_will_wait_for() for every thread it needs to wait on, - then call wt_thd_cond_timedwait(). When thread releases a resource - it should call wt_thd_release() (or wt_thd_release_all()) - it will - notify (send a signal) threads waiting in wt_thd_cond_timedwait(), - if appropriate. - - Just like with pthread's cond_wait, there could be spurious - wake-ups from wt_thd_cond_timedwait(). A caller is expected to - handle that. - - wt_thd_will_wait_for() and wt_thd_cond_timedwait() return either - WT_OK or WT_DEADLOCK. Additionally wt_thd_cond_timedwait() can return - WT_TIMEOUT. Out of memory and other fatal errors are reported as - WT_DEADLOCK - and a transaction must be aborted just the same. - - Configuration - ^^^^^^^^^^^^^ - There are four config variables. Two deadlock search depths - short and - long - and two timeouts. Deadlock search is performed with the short - depth on every wt_thd_will_wait_for() call. wt_thd_cond_timedwait() - waits with a short timeout, performs a deadlock search with the long - depth, and waits with a long timeout. As most deadlock cycles are supposed - to be short, most deadlocks will be detected at once, and waits will - rarely be necessary. - - These config variables are thread-local. Different threads may have - different search depth and timeout values. - - Also, deadlock detector supports different killing strategies, the victim - in a deadlock cycle is selected based on the "weight". See "weight" - description in waiting_threads.h for details. It's up to the caller to - set weights accordingly. - - Status - ^^^^^^ - We calculate the number of successfull waits (WT_OK returned from - wt_thd_cond_timedwait()), a number of timeouts, a deadlock cycle - length distribution - number of deadlocks with every length from - 1 to WT_CYCLE_STATS, and a wait time distribution - number - of waits with a time from 1 us to 1 min in WT_CYCLE_STATS - intervals on a log scale. + a wait-for graph - a graph, that represenst "wait-for" relationships. + It has two types of nodes - threads and resources. There are directed + edges from a thread to a resource it is waiting for (WT_THD::waiting_for), + from a thread to resources that it "owns" (WT_THD::my_resources), + and from a resource to threads that "own" it (WT_RESOURCE::owners) + + Graph completeness + ^^^^^^^^^^^^^^^^^^ + + For flawless deadlock detection wait-for graph must be complete. + It means that when a thread starts waiting it needs to know *all* its + blockers, and call wt_thd_will_wait_for() for every one of them. + Otherwise two phenomena should be expected: + + 1. Fuzzy timeouts: + + thread A needs to get a lock, and is blocked by a thread B. + it waits. + Just before the timeout thread B releases the lock. + thread A is ready to grab the lock but discovers that it is also + blocked by a thread C. + It waits and times out. + + As a result thread A has waited two timeout intervals, instead of one. + + 2. Unreliable cycle detection: + + Thread A waits for threads B and C + Thread C waits for D + Thread D wants to start waiting for A + + one can see immediately that thread D creates a cycle, and thus + a deadlock is detected. + + But if thread A would only wait for B, and start waiting for C + when B would unlock, thread D would be allowed to wait, a deadlock + would be only detected when B unlocks or somebody times out. + + These two phenomena don't affect a correctness, and strictly speaking, + the caller is not required to call wt_thd_will_wait_for() for *all* + blockers - it may optimize wt_thd_will_wait_for() calls. But they + may be perceived as bugs by users, it must be understood that such + an optimization comes with its price. + + Usage + ^^^^^ + + First, the wt* subsystem must be initialized by calling + wt_init(). In the server you don't need to do it, it's done + in mysqld.cc. + + Similarly, wt_end() frees wt* structures, should be called + at the end, but in the server mysqld.cc takes care of that. + + Every WT_THD should be initialized with wt_thd_lazy_init(). + After that they can be used in other wt_thd_* calls. + Before discarding, WT_THD should be free'd with + wt_thd_destroy(). In the server both are handled in sql_class.cc, + it's an error to try to do it manually. + + To use the deadlock detection one needs to use this thread's WT_THD, + call wt_thd_will_wait_for() for every thread it needs to wait on, + then call wt_thd_cond_timedwait(). When thread releases a resource + it should call wt_thd_release() (or wt_thd_release_all()) - it will + notify (send a signal) threads waiting in wt_thd_cond_timedwait(), + if appropriate. + + Just like with pthread's cond_wait, there could be spurious + wake-ups from wt_thd_cond_timedwait(). A caller is expected to + handle that (that is, to re-check the blocking criteria). + + wt_thd_will_wait_for() and wt_thd_cond_timedwait() return either + WT_OK or WT_DEADLOCK. Additionally wt_thd_cond_timedwait() can return + WT_TIMEOUT. Out of memory and other fatal errors are reported as + WT_DEADLOCK - and a transaction must be aborted just the same. + + Configuration + ^^^^^^^^^^^^^ + There are four config variables. Two deadlock search depths - short and + long - and two timeouts. Deadlock search is performed with the short + depth on every wt_thd_will_wait_for() call. wt_thd_cond_timedwait() + waits with a short timeout, performs a deadlock search with the long + depth, and waits with a long timeout. As most deadlock cycles are supposed + to be short, most deadlocks will be detected at once, and waits will + rarely be necessary. + + These config variables are thread-local. Different threads may have + different search depth and timeout values. + + Also, deadlock detector supports different killing strategies, the victim + in a deadlock cycle is selected based on the "weight". See "weight" + description in waiting_threads.h for details. It's up to the caller to + set weights accordingly. + + Status + ^^^^^^ + We calculate the number of successfull waits (WT_OK returned from + wt_thd_cond_timedwait()), a number of timeouts, a deadlock cycle + length distribution - number of deadlocks with every length from + 1 to WT_CYCLE_STATS, and a wait time distribution - number + of waits with a time from 1 us to 1 min in WT_WAIT_STATS + intervals on a log e scale. */ /* @@ -93,10 +153,11 @@ (example A=IX, B=IS, C=S, D=X) - you need to include lock level in the resource identifier - thread 1 - waiting for lock A on resource R and thread 2 waiting for lock B - on resource R should wait on different WT_RESOURCE structures, on different - {lock, resource} pairs. Otherwise the following is possible: + you need to include lock level in the resource identifier - a + thread waiting for lock of the type A on resource R and another + thread waiting for lock of the type B on resource R should wait on + different WT_RESOURCE structures, on different {lock, resource} + pairs. Otherwise the following is possible: thread1> take S-lock on R thread2> take IS-lock on R @@ -113,40 +174,46 @@ #include #include -/* - status variables: - distribution of cycle lengths - wait time log distribution - - Note: +/* status variables */ - we call deadlock() twice per wait (with different search lengths). - it means a deadlock will be counted twice. It's difficult to avoid, - as on the second search we could find a *different* deadlock and we - *want* to count it too. So we just count all deadlocks - two searches - mean two increments on the wt_cycle_stats. +/** + preset table of wait intervals */ - ulonglong wt_wait_table[WT_WAIT_STATS]; -uint32 wt_wait_stats[WT_WAIT_STATS+1]; -uint32 wt_cycle_stats[2][WT_CYCLE_STATS+1], wt_success_stats; +/** + wait time distribution (log e scale) +*/ +uint32 wt_wait_stats[WT_WAIT_STATS+1]; +/** + distribution of cycle lengths + first column tells whether this was during short or long detection +*/ +uint32 wt_cycle_stats[2][WT_CYCLE_STATS+1]; +uint32 wt_success_stats; static my_atomic_rwlock_t cycle_stats_lock, wait_stats_lock, success_stats_lock; +#ifdef SAFE_STATISTICS +#define incr(VAR, LOCK) \ + do { \ + my_atomic_rwlock_wrlock(&(LOCK)); \ + my_atomic_add32(&(VAR), 1); \ + my_atomic_rwlock_wrunlock(&(LOCK)); \ + } while(0) +#else +#define incr(VAR,LOCK) do { (VAR)++; } while(0) +#endif + static void increment_success_stats() { - my_atomic_rwlock_wrlock(&success_stats_lock); - my_atomic_add32(&wt_success_stats, 1); - my_atomic_rwlock_wrunlock(&success_stats_lock); + incr(wt_success_stats, success_stats_lock); } static void increment_cycle_stats(uint depth, uint slot) { if (depth >= WT_CYCLE_STATS) depth= WT_CYCLE_STATS; - my_atomic_rwlock_wrlock(&cycle_stats_lock); - my_atomic_add32(&wt_cycle_stats[slot][depth], 1); - my_atomic_rwlock_wrunlock(&cycle_stats_lock); + incr(wt_cycle_stats[slot][depth], cycle_stats_lock); } static void increment_wait_stats(ulonglong waited,int ret) @@ -155,12 +222,89 @@ static void increment_wait_stats(ulonglong waited,int ret) if ((ret) == ETIMEDOUT) i= WT_WAIT_STATS; else - for (i=0; i < WT_WAIT_STATS && waited/10 > wt_wait_table[i]; i++) ; - my_atomic_rwlock_wrlock(&wait_stats_lock); - my_atomic_add32(wt_wait_stats+i, 1); - my_atomic_rwlock_wrunlock(&wait_stats_lock); + for (i= 0; i < WT_WAIT_STATS && waited/10 > wt_wait_table[i]; i++) ; + incr(wt_wait_stats[i], wait_stats_lock); } +/* + 'lock' protects 'owners', 'state', and 'waiter_count' + 'id' is read-only + + a resource is picked up from a hash in a lock-free manner + it's returned pinned, so it cannot be freed at once + but it may be freed right after the pin is removed + to free a resource it should + 1. have no owners + 2. have no waiters + + two ways to access a resource: + 1. find it in a hash + - it's returned pinned. + a) take a lock in exclusive mode + b) check the state, it should be ACTIVE to be usable + c) unpin + 2. by a direct reference + - could only used if a resource cannot be freed + e.g. accessing a resource by thd->waiting_for is safe, + a resource cannot be freed as there's a thread waiting for it +*/ +struct st_wt_resource { + WT_RESOURCE_ID id; + uint waiter_count; + enum { ACTIVE, FREE } state; +#ifndef DBUG_OFF + pthread_mutex_t *cond_mutex; /* a mutex for the 'cond' below */ +#endif + /* + before the 'lock' all elements are mutable, after (and including) - + immutable in the sense that lf_hash_insert() won't memcpy() over them. + See wt_init(). + */ +#ifdef WT_RWLOCKS_USE_MUTEXES + /* + we need a special rwlock-like 'lock' to allow readers bypass + waiting writers, otherwise readers can deadlock. For example: + + A waits on resource x, owned by B, B waits on resource y, owned + by A, we have a cycle (A->x->B->y->A) + Both A and B start deadlock detection: + + A locks x B locks y + A goes deeper B goes deeper + A locks y B locks x + + with mutexes it would deadlock. With rwlocks it won't, as long + as both A and B are taking read locks (and they do). + But other threads may take write locks. Assume there's + C who wants to start waiting on x, and D who wants to start + waiting on y. + + A read-locks x B read-locks y + A goes deeper B goes deeper + => C write-locks x (to add a new edge) D write-locks y + .. C is blocked D is blocked + A read-locks y B read-locks x + + Now, if a read lock can bypass a pending wrote lock request, we're fine. + If it can not, we have a deadlock. + + writer starvation is technically possible, but unlikely, because + the contention is expected to be low. + */ + struct { + pthread_cond_t cond; + pthread_mutex_t mutex; + uint readers: 16; + uint pending_writers: 15; + uint write_locked: 1; + } lock; +#else + rw_lock_t lock; +#endif + pthread_cond_t cond; /* the corresponding mutex is provided by the caller */ + DYNAMIC_ARRAY owners; +}; + #ifdef WT_RWLOCKS_USE_MUTEXES static void rc_rwlock_init(WT_RESOURCE *rc) { @@ -169,6 +313,8 @@ static void rc_rwlock_init(WT_RESOURCE *rc) } static void rc_rwlock_destroy(WT_RESOURCE *rc) { + DBUG_ASSERT(rc->lock.write_locked == 0); + DBUG_ASSERT(rc->lock.readers == 0); pthread_cond_destroy(&rc->lock.cond); pthread_mutex_destroy(&rc->lock.mutex); } @@ -188,7 +334,7 @@ static void rc_wrlock(WT_RESOURCE *rc) pthread_mutex_lock(&rc->lock.mutex); while (rc->lock.write_locked || rc->lock.readers) pthread_cond_wait(&rc->lock.cond, &rc->lock.mutex); - rc->lock.write_locked=1; + rc->lock.write_locked= 1; pthread_mutex_unlock(&rc->lock.mutex); DBUG_PRINT("wt", ("LOCK resid=%ld for WRITE", (ulong)rc->id.value)); } @@ -198,7 +344,7 @@ static void rc_unlock(WT_RESOURCE *rc) pthread_mutex_lock(&rc->lock.mutex); if (rc->lock.write_locked) { - rc->lock.write_locked=0; + rc->lock.write_locked= 0; pthread_cond_broadcast(&rc->lock.cond); } else if (--rc->lock.readers == 0) @@ -242,12 +388,12 @@ static LF_HASH reshash; /** WT_RESOURCE constructor - It's called from lf_hash and takes an offset to LF_SLIST instance. + It's called from lf_hash and takes a pointer to an LF_SLIST instance. WT_RESOURCE is located at arg+sizeof(LF_SLIST) */ static void wt_resource_init(uchar *arg) { - WT_RESOURCE *rc=(WT_RESOURCE*)(arg+LF_HASH_OVERHEAD); + WT_RESOURCE *rc= (WT_RESOURCE*)(arg+LF_HASH_OVERHEAD); DBUG_ENTER("wt_resource_init"); bzero(rc, sizeof(*rc)); @@ -260,12 +406,12 @@ static void wt_resource_init(uchar *arg) /** WT_RESOURCE destructor - It's called from lf_hash and takes an offset to LF_SLIST instance. + It's called from lf_hash and takes a pointer to an LF_SLIST instance. WT_RESOURCE is located at arg+sizeof(LF_SLIST) */ static void wt_resource_destroy(uchar *arg) { - WT_RESOURCE *rc=(WT_RESOURCE*)(arg+LF_HASH_OVERHEAD); + WT_RESOURCE *rc= (WT_RESOURCE*)(arg+LF_HASH_OVERHEAD); DBUG_ENTER("wt_resource_destroy"); DBUG_ASSERT(rc->owners.elements == 0); @@ -278,6 +424,7 @@ static void wt_resource_destroy(uchar *arg) void wt_init() { DBUG_ENTER("wt_init"); + DBUG_ASSERT(reshash.alloc.constructor != wt_resource_init); lf_hash_init(&reshash, sizeof(WT_RESOURCE), LF_HASH_UNIQUE, 0, sizeof_WT_RESOURCE_ID, 0, 0); @@ -293,15 +440,15 @@ void wt_init() reshash.element_size= offsetof(WT_RESOURCE, lock); bzero(wt_wait_stats, sizeof(wt_wait_stats)); bzero(wt_cycle_stats, sizeof(wt_cycle_stats)); - wt_success_stats=0; - { /* initialize wt_wait_table[]. from 1 us to 1 min, log scale */ + wt_success_stats= 0; + { /* initialize wt_wait_table[]. from 1 us to 1 min, log e scale */ int i; - double from=log(1); /* 1 us */ - double to=log(60e6); /* 1 min */ - for (i=0; i < WT_WAIT_STATS; i++) + double from= log(1); /* 1 us */ + double to= log(60e6); /* 1 min */ + for (i= 0; i < WT_WAIT_STATS; i++) { - wt_wait_table[i]=(ulonglong)exp((to-from)/(WT_WAIT_STATS-1)*i+from); - DBUG_ASSERT(i==0 || wt_wait_table[i-1] != wt_wait_table[i]); + wt_wait_table[i]= (ulonglong)exp((to-from)/(WT_WAIT_STATS-1)*i+from); + DBUG_ASSERT(i == 0 || wt_wait_table[i-1] != wt_wait_table[i]); } } my_atomic_rwlock_init(&cycle_stats_lock); @@ -325,7 +472,7 @@ void wt_end() /** Lazy WT_THD initialization - Cheap initialization of WT_THD. Only initialized fields that don't require + Cheap initialization of WT_THD. Only initialize fields that don't require memory allocations - basically, it only does assignments. The rest of the WT_THD structure will be initialized on demand, on the first use. This allows one to initialize lazily all WT_THD structures, even if some @@ -335,14 +482,18 @@ void wt_end() @param ts a pointer to deadlock timeout short value @param dl a pointer to deadlock search depth long value @param tl a pointer to deadlock timeout long value + + @note these are pointers to values, and WT_THD stores them as pointers. + It allows one later to change search depths and timeouts for existing + threads. It also means that the pointers must stay valid for the lifetime + of WT_THD. */ -void wt_thd_lazy_init(WT_THD *thd, ulong *ds, ulong *ts, ulong *dl, ulong *tl) +void wt_thd_lazy_init(WT_THD *thd, const ulong *ds, const ulong *ts, + const ulong *dl, const ulong *tl) { DBUG_ENTER("wt_thd_lazy_init"); - thd->waiting_for=0; - thd->my_resources.buffer= 0; - thd->my_resources.elements= 0; - thd->weight=0; + thd->waiting_for= 0; + thd->weight= 0; thd->deadlock_search_depth_short= ds; thd->timeout_short= ts; thd->deadlock_search_depth_long= dl; @@ -350,7 +501,7 @@ void wt_thd_lazy_init(WT_THD *thd, ulong *ds, ulong *ts, ulong *dl, ulong *tl) /* dynamic array is also initialized lazily - without memory allocations */ my_init_dynamic_array(&thd->my_resources, sizeof(WT_RESOURCE *), 0, 5); #ifndef DBUG_OFF - thd->name=my_thread_name(); + thd->name= my_thread_name(); #endif DBUG_VOID_RETURN; } @@ -367,9 +518,9 @@ static int fix_thd_pins(WT_THD *thd) { if (unlikely(thd->pins == 0)) { - thd->pins=lf_hash_get_pins(&reshash); + thd->pins= lf_hash_get_pins(&reshash); #ifndef DBUG_OFF - thd->name=my_thread_name(); + thd->name= my_thread_name(); #endif } return thd->pins == 0; @@ -380,12 +531,12 @@ void wt_thd_destroy(WT_THD *thd) DBUG_ENTER("wt_thd_destroy"); DBUG_ASSERT(thd->my_resources.elements == 0); + DBUG_ASSERT(thd->waiting_for == 0); if (thd->pins != 0) lf_hash_put_pins(thd->pins); delete_dynamic(&thd->my_resources); - thd->waiting_for=0; DBUG_VOID_RETURN; } /** @@ -394,7 +545,7 @@ void wt_thd_destroy(WT_THD *thd) It can be used in WT_RESOURCE_TYPE structures where bytewise comparison of values is sufficient. */ -int wt_resource_id_memcmp(void *a, void *b) +int wt_resource_id_memcmp(const void *a, const void *b) { /* we use the fact that there's no padding in the middle of WT_RESOURCE_ID */ compile_time_assert(offsetof(WT_RESOURCE_ID, type) == sizeof(ulonglong)); @@ -405,10 +556,10 @@ int wt_resource_id_memcmp(void *a, void *b) arguments for the recursive deadlock_search function */ struct deadlock_arg { - WT_THD *thd; /**< starting point of a search */ - uint max_depth; /**< search depth limit */ - WT_THD *victim; /**< a thread to be killed to resolve a deadlock */ - WT_RESOURCE *rc; /**< see comment at the end of deadlock_search() */ + WT_THD * const thd; /**< starting point of a search */ + uint const max_depth; /**< search depth limit */ + WT_THD *victim; /**< a thread to be killed to resolve a deadlock */ + WT_RESOURCE *last_locked_rc; /**< see comment at the end of deadlock_search() */ }; /** @@ -421,10 +572,10 @@ static void change_victim(WT_THD* found, struct deadlock_arg *arg) if (arg->victim != arg->thd) { rc_unlock(arg->victim->waiting_for); /* release the previous victim */ - DBUG_ASSERT(arg->rc == found->waiting_for); + DBUG_ASSERT(arg->last_locked_rc == found->waiting_for); } arg->victim= found; - arg->rc= 0; + arg->last_locked_rc= 0; } } @@ -444,7 +595,7 @@ static int deadlock_search(struct deadlock_arg *arg, WT_THD *blocker, LF_REQUIRE_PINS(1); - arg->rc= 0; + arg->last_locked_rc= 0; if (depth > arg->max_depth) { @@ -453,7 +604,10 @@ static int deadlock_search(struct deadlock_arg *arg, WT_THD *blocker, } retry: - /* safe dereference as explained in lf_alloc-pin.c */ + /* + safe dereference as explained in lf_alloc-pin.c + (in short: protects against lf_alloc_free() in lf_hash_delete()) + */ do { rc= *shared_ptr; @@ -469,6 +623,7 @@ retry: rc_rdlock(rc); if (rc->state != ACTIVE || *shared_ptr != rc) { + /* blocker is not waiting on this resource anymore */ rc_unlock(rc); lf_unpin(arg->thd->pins, 0); goto retry; @@ -480,20 +635,22 @@ retry: Below is not a pure depth-first search. It's a depth-first with a slightest hint of breadth-first. Depth-first is: - check(element): + check(element, X): foreach current in element->nodes[] do: - if current == element return error; - check(current); + if current == X return error; + check(current, X); while we do - check(element): + check(element, X): foreach current in element->nodes[] do: - if current == element return error; + if current == X return error; foreach current in element->nodes[] do: - check(current); + check(current, X); + + preferring shorter deadlocks over longer ones. */ - for (i=0; i < rc->owners.elements; i++) + for (i= 0; i < rc->owners.elements; i++) { cursor= *dynamic_element(&rc->owners, i, WT_THD**); /* @@ -517,7 +674,7 @@ retry: goto end; } } - for (i=0; i < rc->owners.elements; i++) + for (i= 0; i < rc->owners.elements; i++) { cursor= *dynamic_element(&rc->owners, i, WT_THD**); switch (deadlock_search(arg, cursor, depth+1)) { @@ -528,20 +685,21 @@ retry: break; case WT_DEADLOCK: ret= WT_DEADLOCK; - change_victim(cursor, arg); /* also sets arg->rc to 0 */ + change_victim(cursor, arg); /* also sets arg->last_locked_rc to 0 */ i= rc->owners.elements; /* jump out of the loop */ break; default: DBUG_ASSERT(0); } - if (arg->rc) - rc_unlock(arg->rc); + if (arg->last_locked_rc) + rc_unlock(arg->last_locked_rc); } end: /* Note that 'rc' is locked in this function, but it's never unlocked here. - Instead it's saved in arg->rc and the *caller* is expected to unlock it. - It's done to support different killing strategies. This is how it works: + Instead it's saved in arg->last_locked_rc and the *caller* is + expected to unlock it. It's done to support different killing + strategies. This is how it works: Assuming a graph thd->A->B->C->thd @@ -552,9 +710,9 @@ end: on. Goes down recursively, locks B. Goes down recursively, locks C. Notices that C is waiting on thd. Deadlock detected. Sets arg->victim=thd. Returns from the last deadlock_search() call. C stays locked! - Now it checks whether C is a more appropriate victim then 'thd'. + Now it checks whether C is a more appropriate victim than 'thd'. If yes - arg->victim=C, otherwise C is unlocked. Returns. B stays locked. - Now it checks whether B is a more appropriate victim then arg->victim. + Now it checks whether B is a more appropriate victim than arg->victim. If yes - old arg->victim is unlocked and arg->victim=B, otherwise B is unlocked. Return. And so on. @@ -566,7 +724,7 @@ end: is unrolled and we are back to deadlock() function, there are only two locks left - on thd and on the victim. */ - arg->rc= rc; + arg->last_locked_rc= rc; DBUG_PRINT("wt", ("exit: %s", ret == WT_DEPTH_EXCEEDED ? "WT_DEPTH_EXCEEDED" : ret ? "WT_DEADLOCK" : "OK")); @@ -612,30 +770,31 @@ static int deadlock(WT_THD *thd, WT_THD *blocker, uint depth, */ if (ret == WT_DEADLOCK && depth) change_victim(blocker, &arg); - if (arg.rc) + if (arg.last_locked_rc) { /* Special return code if there's nobody to wait for. depth == 0 means that we start the search from thd (thd == blocker). - ret == WT_OK means that no cycle was found and arg.rc == thd->waiting_for. - and arg.rc->owners.elements == 0 means that (applying the rule above) - thd->waiting_for->owners.elements == 0, and thd doesn't have anybody to - wait for. + ret == WT_OK means that no cycle was found and + arg.last_locked_rc == thd->waiting_for. + and arg.last_locked_rc->owners.elements == 0 means that + (applying the rule above) thd->waiting_for->owners.elements == 0, + and thd doesn't have anybody to wait for. */ - if (depth == 0 && ret == WT_OK && arg.rc->owners.elements == 0) + if (depth == 0 && ret == WT_OK && arg.last_locked_rc->owners.elements == 0) { DBUG_ASSERT(thd == blocker); - DBUG_ASSERT(arg.rc == thd->waiting_for); + DBUG_ASSERT(arg.last_locked_rc == thd->waiting_for); ret= WT_FREE_TO_GO; } - rc_unlock(arg.rc); + rc_unlock(arg.last_locked_rc); } /* notify the victim, if appropriate */ if (ret == WT_DEADLOCK && arg.victim != thd) { DBUG_PRINT("wt", ("killing %s", arg.victim->name)); - arg.victim->killed=1; + arg.victim->killed= 1; pthread_cond_broadcast(&arg.victim->waiting_for->cond); rc_unlock(arg.victim->waiting_for); ret= WT_OK; @@ -659,7 +818,7 @@ static int unlock_lock_and_free_resource(WT_THD *thd, WT_RESOURCE *rc) if (rc->owners.elements || rc->waiter_count) { - DBUG_PRINT("wt", ("nothing to do, %d owners, %d waiters", + DBUG_PRINT("wt", ("nothing to do, %u owners, %u waiters", rc->owners.elements, rc->waiter_count)); rc_unlock(rc); DBUG_RETURN(0); @@ -683,12 +842,8 @@ static int unlock_lock_and_free_resource(WT_THD *thd, WT_RESOURCE *rc) 2. set the state to FREE 3. release the lock 4. remove from the hash - - I *think* it's safe to release the lock while the element is still - in the hash. If not, the corrected procedure should be - 3. pin; 4; remove; 5; release; 6; unpin and it'll need pin[3]. */ - rc->state=FREE; + rc->state= FREE; rc_unlock(rc); DBUG_RETURN(lf_hash_delete(&reshash, thd->pins, key, keylen) == -1); } @@ -739,15 +894,19 @@ static int stop_waiting(WT_THD *thd) /** notify the system that a thread needs to wait for another thread - called by a *waiter* to declare what resource it will wait for. + called by a *waiter* to declare that it (thd) will wait for another + thread (blocker) on a specific resource (resid). can be called many times, if many blockers own a blocking resource. but must always be called with the same resource id - a thread cannot wait for more than one resource at a time. + @return WT_OK or WT_DEADLOCK + As a new edge is added to the wait-for graph, a deadlock detection is performed for this new edge. */ -int wt_thd_will_wait_for(WT_THD *thd, WT_THD *blocker, WT_RESOURCE_ID *resid) +int wt_thd_will_wait_for(WT_THD *thd, WT_THD *blocker, + const WT_RESOURCE_ID *resid) { uint i; WT_RESOURCE *rc; @@ -822,7 +981,7 @@ retry: /* we can safely access the resource here, it's in the hash as it has - at least one owner, and non-zero waiter_count + non-zero waiter_count */ rc= thd->waiting_for; rc_wrlock(rc); @@ -835,7 +994,11 @@ retry: DBUG_RETURN(WT_DEADLOCK); } } - for (i=0; i < rc->owners.elements; i++) + /* + Another thread could be waiting on this resource for this very 'blocker'. + In this case we should not add it to the list for the second time. + */ + for (i= 0; i < rc->owners.elements; i++) if (*dynamic_element(&rc->owners, i, WT_THD**) == blocker) break; if (i >= rc->owners.elements) @@ -854,19 +1017,21 @@ retry: } rc_unlock(rc); - if (deadlock(thd, blocker, 1, *thd->deadlock_search_depth_short)) + if (deadlock(thd, blocker, 1, *thd->deadlock_search_depth_short) != WT_OK) { stop_waiting(thd); DBUG_RETURN(WT_DEADLOCK); } - DBUG_RETURN(0); + DBUG_RETURN(WT_OK); } /** - called by a *waiter* to start waiting + called by a *waiter* (thd) to start waiting It's supposed to be a drop-in replacement for pthread_cond_timedwait(), and it takes mutex as an argument. + + @return one of WT_TIMEOUT, WT_DEADLOCK, WT_OK */ int wt_thd_cond_timedwait(WT_THD *thd, pthread_mutex_t *mutex) { @@ -878,10 +1043,10 @@ int wt_thd_cond_timedwait(WT_THD *thd, pthread_mutex_t *mutex) DBUG_PRINT("wt", ("enter: thd=%s, rc=%p", thd->name, rc)); #ifndef DBUG_OFF - if (rc->mutex) - DBUG_ASSERT(rc->mutex == mutex); + if (rc->cond_mutex) + DBUG_ASSERT(rc->cond_mutex == mutex); else - rc->mutex= mutex; + rc->cond_mutex= mutex; safe_mutex_assert_owner(mutex); #endif @@ -890,20 +1055,27 @@ int wt_thd_cond_timedwait(WT_THD *thd, pthread_mutex_t *mutex) #ifdef __WIN__ /* only for the sake of Windows we distinguish between - 'before' and 'starttime' + 'before' and 'starttime': + + my_getsystime() returns high-resolution value, that cannot be used for + waiting (it doesn't follow system clock changes), but is good for time + intervals. + + GetSystemTimeAsFileTime() follows system clock, but is low-resolution + and will result in lousy intervals. */ GetSystemTimeAsFileTime((PFILETIME)&starttime); #endif rc_wrlock(rc); - if (rc->owners.elements == 0 || thd->killed) + if (rc->owners.elements == 0) ret= WT_OK; rc_unlock(rc); set_timespec_time_nsec(timeout, starttime, (*thd->timeout_short)*ULL(1000)); - if (ret == WT_TIMEOUT) + if (ret == WT_TIMEOUT && !thd->killed) ret= pthread_cond_timedwait(&rc->cond, mutex, &timeout); - if (ret == WT_TIMEOUT) + if (ret == WT_TIMEOUT && !thd->killed) { int r= deadlock(thd, thd, 0, *thd->deadlock_search_depth_long); if (r == WT_FREE_TO_GO) @@ -935,24 +1107,25 @@ int wt_thd_cond_timedwait(WT_THD *thd, pthread_mutex_t *mutex) @param resid a resource to release. 0 to release all resources */ -void wt_thd_release(WT_THD *thd, WT_RESOURCE_ID *resid) +void wt_thd_release(WT_THD *thd, const WT_RESOURCE_ID *resid) { uint i; DBUG_ENTER("wt_thd_release"); - for (i=0; i < thd->my_resources.elements; i++) + for (i= 0; i < thd->my_resources.elements; i++) { - uint j; WT_RESOURCE *rc= *dynamic_element(&thd->my_resources, i, WT_RESOURCE**); if (!resid || (resid->type->compare(&rc->id, resid) == 0)) { + uint j; + rc_wrlock(rc); /* nobody's trying to free the resource now, as its owners[] array is not empty (at least thd must be there) */ DBUG_ASSERT(rc->state == ACTIVE); - for (j=0; j < rc->owners.elements; j++) + for (j= 0; j < rc->owners.elements; j++) if (*dynamic_element(&rc->owners, j, WT_THD**) == thd) break; DBUG_ASSERT(j < rc->owners.elements); @@ -961,8 +1134,8 @@ void wt_thd_release(WT_THD *thd, WT_RESOURCE_ID *resid) { pthread_cond_broadcast(&rc->cond); #ifndef DBUG_OFF - if (rc->mutex) - safe_mutex_assert_owner(rc->mutex); + if (rc->cond_mutex) + safe_mutex_assert_owner(rc->cond_mutex); #endif } unlock_lock_and_free_resource(thd, rc); -- cgit v1.2.1 From 9aa2ada9d007a41d4e58447ea405b646c4d53ea3 Mon Sep 17 00:00:00 2001 From: Sergei Golubchik Date: Mon, 19 Jan 2009 16:27:49 +0100 Subject: compilation fixes --- mysys/waiting_threads.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'mysys') diff --git a/mysys/waiting_threads.c b/mysys/waiting_threads.c index 5b99a5ceeba..732929f6d99 100644 --- a/mysys/waiting_threads.c +++ b/mysys/waiting_threads.c @@ -545,7 +545,7 @@ void wt_thd_destroy(WT_THD *thd) It can be used in WT_RESOURCE_TYPE structures where bytewise comparison of values is sufficient. */ -int wt_resource_id_memcmp(const void *a, const void *b) +my_bool wt_resource_id_memcmp(const void *a, const void *b) { /* we use the fact that there's no padding in the middle of WT_RESOURCE_ID */ compile_time_assert(offsetof(WT_RESOURCE_ID, type) == sizeof(ulonglong)); -- cgit v1.2.1 From b90ff5340fc10a9306be1b1201612e382b8ab051 Mon Sep 17 00:00:00 2001 From: Guilhem Bichot Date: Thu, 12 Feb 2009 16:27:33 +0100 Subject: Fixing problems of previous 5.1-main->5.1-maria merge: - adding back Serg's "mtr --list-options" - safe_mutex deadlock detector started raising wrong deadlock warnings, fixed here by a backport from 6.0-main. include/my_pthread.h: Porting changes done to 6.0-main which satisfy the safe_mutex deadlock detector (those in 5.1-main don't), see chad@mysql.com-20090126155607-n0j3zbmgbfepnmmo for explanations mysql-test/mysql-test-run.pl: adding back Serg's --list-options mysys/my_init.c: Porting changes done to 6.0-main which satisfy the safe_mutex deadlock detector (those in 5.1-main don't), see chad@mysql.com-20090126155607-n0j3zbmgbfepnmmo for explanations mysys/my_thr_init.c: Porting changes done to 6.0-main which satisfy the safe_mutex deadlock detector (those in 5.1-main don't), see chad@mysql.com-20090126155607-n0j3zbmgbfepnmmo for explanations --- mysys/my_init.c | 10 +++++----- mysys/my_thr_init.c | 32 ++++++++++++++++++++++++++++++++ 2 files changed, 37 insertions(+), 5 deletions(-) (limited to 'mysys') diff --git a/mysys/my_init.c b/mysys/my_init.c index 2401671687b..e05536bf3ea 100644 --- a/mysys/my_init.c +++ b/mysys/my_init.c @@ -82,20 +82,20 @@ my_bool my_init(void) my_progname_short= my_progname + dirname_length(my_progname); #if defined(THREAD) - if (my_thread_global_init()) - return 1; + (void) my_threadattr_global_init(); # if defined(SAFE_MUTEX) safe_mutex_global_init(); /* Must be called early */ -# endif -#endif -#if defined(THREAD) && defined(MY_PTHREAD_FASTMUTEX) && !defined(SAFE_MUTEX) +# elif defined(MY_PTHREAD_FASTMUTEX) fastmutex_global_init(); /* Must be called early */ +# endif #endif netware_init(); #ifdef THREAD #if defined(HAVE_PTHREAD_INIT) pthread_init(); /* Must be called before DBUG_ENTER */ #endif + if (my_thread_global_init()) + return 1; #if !defined( __WIN__) && !defined(__NETWARE__) sigfillset(&my_signals); /* signals blocked by mf_brkhant */ #endif diff --git a/mysys/my_thr_init.c b/mysys/my_thr_init.c index 3f4c4a4d638..4617d0e2277 100644 --- a/mysys/my_thr_init.c +++ b/mysys/my_thr_init.c @@ -65,6 +65,38 @@ nptl_pthread_exit_hack_handler(void *arg __attribute((unused))) #endif /* TARGET_OS_LINUX */ + +/** + Initialize thread attributes. +*/ + +void my_threadattr_global_init(void) +{ +#ifdef PTHREAD_ADAPTIVE_MUTEX_INITIALIZER_NP + /* + Set mutex type to "fast" a.k.a "adaptive" + + In this case the thread may steal the mutex from some other thread + that is waiting for the same mutex. This will save us some + context switches but may cause a thread to 'starve forever' while + waiting for the mutex (not likely if the code within the mutex is + short). + */ + pthread_mutexattr_init(&my_fast_mutexattr); /* ?= MY_MUTEX_INIT_FAST */ + pthread_mutexattr_settype(&my_fast_mutexattr, + PTHREAD_MUTEX_ADAPTIVE_NP); +#endif +#ifdef PTHREAD_ERRORCHECK_MUTEX_INITIALIZER_NP + /* + Set mutex type to "errorcheck" + */ + pthread_mutexattr_init(&my_errorcheck_mutexattr); + pthread_mutexattr_settype(&my_errorcheck_mutexattr, + PTHREAD_MUTEX_ERRORCHECK); +#endif +} + + static uint get_thread_lib(void); /* -- cgit v1.2.1 From 4fe342500953e9cf337aae462fb512a7cec176f8 Mon Sep 17 00:00:00 2001 From: Michael Widenius Date: Fri, 13 Mar 2009 00:27:35 +0200 Subject: Added "pool-of-threads" handling (with libevent) This is a backport of code from MySQL 6.0 with cleanups and extensions The following new options are supported configure options: --with-libevent ; Enable use of libevent, which is needed for pool of threads mysqld options: --thread-handling=pool-of-threads ; Use a pool of threads to handle queries --thread-pool-size=# ; Define how many threads should be created to handle all queries --extra-port=# ; Extra tcp port that uses the old one-thread-per-connection method --extra-max-connections=# ; Number of connections to accept to 'extra-port' --test-ignore-wrong-options ; Ignore setting an enum value to a wrong option (for mysql-test-run) BUILD/SETUP.sh: Added libevents (and thus pool-of-threads) to max builds CMakeLists.txt: Added libevent Makefile.am: Added libevents config/ac-macros/libevent.m4: Libevent code for configure config/ac-macros/libevent_configure.m4: Libevent code for configure configure.in: Added libevents dbug/dbug.c: Added _db_is_pushed(); Needed for pool-of-threads code extra/Makefile.am: Added libevents extra/libevent: Libevent initial code extra/libevent/CMakeLists.txt: Libevent initial code extra/libevent/Makefile.am: Libevent initial code extra/libevent/README: Libevent initial code extra/libevent/WIN32-Code: Libevent initial code extra/libevent/WIN32-Code/config.h: Libevent initial code extra/libevent/WIN32-Code/misc.c: Libevent initial code extra/libevent/WIN32-Code/misc.h: Libevent initial code extra/libevent/WIN32-Code/tree.h: Libevent initial code extra/libevent/WIN32-Code/win32.c: Libevent initial code extra/libevent/buffer.c: Libevent initial code extra/libevent/compat: Libevent initial code extra/libevent/compat/sys: Libevent initial code extra/libevent/compat/sys/_time.h: Libevent initial code extra/libevent/compat/sys/queue.h: Libevent initial code extra/libevent/compat/sys/tree.h: Libevent initial code extra/libevent/devpoll.c: Libevent initial code extra/libevent/epoll.c: Libevent initial code extra/libevent/epoll_sub.c: Libevent initial code extra/libevent/evbuffer.c: Libevent initial code extra/libevent/evdns.c: Libevent initial code extra/libevent/evdns.h: Libevent initial code extra/libevent/event-config.h: Libevent initial code extra/libevent/event-internal.h: Libevent initial code extra/libevent/event.c: Libevent initial code extra/libevent/event.h: Libevent initial code extra/libevent/event_tagging.c: Libevent initial code extra/libevent/evhttp.h: Libevent initial code extra/libevent/evport.c: Libevent initial code extra/libevent/evrpc-internal.h: Libevent initial code extra/libevent/evrpc.c: Libevent initial code extra/libevent/evrpc.h: Libevent initial code extra/libevent/evsignal.h: Libevent initial code extra/libevent/evutil.c: Libevent initial code extra/libevent/evutil.h: Libevent initial code extra/libevent/http-internal.h: Libevent initial code extra/libevent/http.c: Libevent initial code extra/libevent/kqueue.c: Libevent initial code extra/libevent/log.c: Libevent initial code extra/libevent/log.h: Libevent initial code extra/libevent/min_heap.h: Libevent initial code extra/libevent/poll.c: Libevent initial code extra/libevent/select.c: Libevent initial code extra/libevent/signal.c: Libevent initial code extra/libevent/strlcpy-internal.h: Libevent initial code extra/libevent/strlcpy.c: Libevent initial code include/config-win.h: Libevent support include/my_dbug.h: ADded _db_is_pushed include/mysql.h.pp: Update to handle new prototypes include/typelib.h: Split find_type_or_exit() into two functions include/violite.h: Added vio_is_pending() libmysqld/Makefile.am: Added libevent mysql-test/include/have_pool_of_threads.inc: Added test for pool-of-threads mysql-test/mysql-test-run.pl: Don't abort based on time and don't retry test cases when run under --gdb or --debug mysql-test/r/crash_commit_before.result: USE GLOBAL for debug variable mysql-test/r/have_pool_of_threads.require: Added test for pool-of-threads mysql-test/r/pool_of_threads.result: Added test for pool-of-threads mysql-test/r/subselect_debug.result: USE GLOBAL for debug variable mysql-test/t/crash_commit_before.test: USE GLOBAL for debug variable mysql-test/t/merge-big.test: USE GLOBAL for debug variable mysql-test/t/pool_of_threads-master.opt: Added test for pool-of-threads mysql-test/t/pool_of_threads.test: Added test for pool-of-threads mysys/typelib.c: Split find_type_or_exit() into find_type_with_warning() sql/Makefile.am: Added libevent sql/handler.cc: Indentation fix. Fixed memory loss bug Fixed crash on exit when handler plugin failed sql/mysql_priv.h: Added extra_max_connections and mysqld_extra_port Added extern functions from sql_connect.cc sql/mysqld.cc: Added support for new mysqld options Added code for 'extra-port' and 'extra-max-connections' Split some functions into smaller pieces to be able to reuse code Added code for test-ignore-wrong-options sql/scheduler.cc: Updated schduler code from MySQL 6.0 sql/scheduler.h: Updated schduler code from MySQL 6.0 sql/set_var.cc: Added support for changing "extra_max_connections" sql/sql_class.cc: Iniitalize thread schduler options in THD sql/sql_class.h: Added to extra_port and scheduler to 'THD' sql/sql_connect.cc: Use thd->schduler to check number of connections and terminate connection Made some local functions global (for scheduler.cc) vio/viosocket.c: Added 'vio_pending', needed for scheduler..c --- mysys/typelib.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) (limited to 'mysys') diff --git a/mysys/typelib.c b/mysys/typelib.c index e745a9fb917..ff5dc1231e4 100644 --- a/mysys/typelib.c +++ b/mysys/typelib.c @@ -22,7 +22,7 @@ static const char field_separator=','; -int find_type_or_exit(const char *x, TYPELIB *typelib, const char *option) +int find_type_with_warning(const char *x, TYPELIB *typelib, const char *option) { int res; const char **ptr; @@ -38,12 +38,20 @@ int find_type_or_exit(const char *x, TYPELIB *typelib, const char *option) while (*++ptr) fprintf(stderr, ",'%s'", *ptr); fprintf(stderr, "\n"); - exit(1); } return res; } +uint find_type_or_exit(const char *x, TYPELIB *typelib, const char *option) +{ + int res; + if ((res= find_type_with_warning(x, typelib, option)) <= 0) + exit(1); + return (uint) res; +} + + /* Search after a string in a list of strings. Endspace in x is not compared. -- cgit v1.2.1 From 46db8aac44737fc9b8b07283140c6d0add4d1789 Mon Sep 17 00:00:00 2001 From: Michael Widenius Date: Sun, 22 Mar 2009 14:16:09 +0200 Subject: Apply patch by Antony Dovgal: - Move SAFE_MUTEX to be stored in config.h by configure.in (not as a flag used with compiler command line) - Generate my_config.h in configure BUILD/SETUP.sh: Remove -DSAFE_MUTEX as the following --with-debug flag will automaticly add it BUILD/compile-ia64-debug-max: Remove -DSAFE_MUTEX as the following --with-debug flag will automaticly add it configure.in: Move SAFE_MUTEX and SAFE_MALLOC to [my_] config.h Generate my_config.h as part of configure process dbug/dbug.c: Include my_global.h before we undef SAFE_MUTEX include/Makefile.am: Update comment. For now, lets generate my_config.h if someone deletes it after configure mysys/my_wincond.c: Include my_global.h before we undef SAFE_MUTEX mysys/my_winthread.c: Include my_global.h before we undef SAFE_MUTEX --- mysys/my_wincond.c | 1 + mysys/my_winthread.c | 1 + 2 files changed, 2 insertions(+) (limited to 'mysys') diff --git a/mysys/my_wincond.c b/mysys/my_wincond.c index c9bc33df8c4..8b548a64079 100644 --- a/mysys/my_wincond.c +++ b/mysys/my_wincond.c @@ -17,6 +17,7 @@ ** The following is a simple implementation of posix conditions *****************************************************************************/ +#include #undef SAFE_MUTEX /* Avoid safe_mutex redefinitions */ #include "mysys_priv.h" #if defined(THREAD) && defined(__WIN__) diff --git a/mysys/my_winthread.c b/mysys/my_winthread.c index 0af6a47ec4a..8bda595451b 100644 --- a/mysys/my_winthread.c +++ b/mysys/my_winthread.c @@ -18,6 +18,7 @@ *****************************************************************************/ /* SAFE_MUTEX will not work until the thread structure is up to date */ +#include #undef SAFE_MUTEX #include "mysys_priv.h" -- cgit v1.2.1 From 6120cc96c9090634211a87260dc82f8aa54521c4 Mon Sep 17 00:00:00 2001 From: unknown Date: Tue, 31 Mar 2009 10:06:51 +0200 Subject: Fix build error after last push with --with-debug=full due to SAFEMALLOC now being defined in my_config.h (as opposed to in CFLAGS before.) mysys/my_malloc.c: Need to include my_global.h before messing with SAFEMALLOC, as now that macro may be re-defined in my_config.h, which is included from my_global.h mysys/my_once.c: Need to include my_global.h before messing with SAFEMALLOC, as now that macro may be re-defined in my_config.h, which is included from my_global.h mysys/my_realloc.c: Need to include my_global.h before messing with SAFEMALLOC, as now that macro may be re-defined in my_config.h, which is included from my_global.h --- mysys/my_malloc.c | 3 +++ mysys/my_once.c | 3 +++ mysys/my_realloc.c | 3 +++ 3 files changed, 9 insertions(+) (limited to 'mysys') diff --git a/mysys/my_malloc.c b/mysys/my_malloc.c index 12793ad451b..12af5603a93 100644 --- a/mysys/my_malloc.c +++ b/mysys/my_malloc.c @@ -13,6 +13,9 @@ along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ +/* my_global.h may define SAFEMALLOC (through my_config.h). */ +#include + #ifdef SAFEMALLOC /* We don't need SAFEMALLOC here */ #undef SAFEMALLOC #endif diff --git a/mysys/my_once.c b/mysys/my_once.c index b6f6656fce2..73bdd0166e6 100644 --- a/mysys/my_once.c +++ b/mysys/my_once.c @@ -15,6 +15,9 @@ /* Not MT-SAFE */ +/* my_global.h may define SAFEMALLOC (through my_config.h). */ +#include + #ifdef SAFEMALLOC /* We don't need SAFEMALLOC here */ #undef SAFEMALLOC #endif diff --git a/mysys/my_realloc.c b/mysys/my_realloc.c index 828890a0dc2..7e49a482884 100644 --- a/mysys/my_realloc.c +++ b/mysys/my_realloc.c @@ -13,6 +13,9 @@ along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ +/* my_global.h may define SAFEMALLOC (through my_config.h). */ +#include + #ifdef SAFEMALLOC /* We don't need SAFEMALLOC here */ #undef SAFEMALLOC #endif -- cgit v1.2.1 From b125770aaadd09e839ad9211047e88095984308b Mon Sep 17 00:00:00 2001 From: unknown Date: Wed, 6 May 2009 14:03:24 +0200 Subject: We are now using Valgrind rather than purify, and have for quite some time. Consequently, rename HAVE_purify to HAVE_valgrind, and related changes. Leave some comments about purify when not clear that they apply also to Valgrind. Fix redundant IF_VALGRIND declaration. Misc. small fixes: - Fixes for pool-of-threads patch. - Fixes for push of PBXT storage engine. - mysql-test-run.pl fix. - Fix build problem in compile-pentium64-max. BUILD/SETUP.sh: Rename purify -> valgrind. BUILD/build_mccge.sh: Rename purify -> valgrind. BUILD/compile-dist: Fix that PBXT was missing in source tarball after `BUILD/compile-dist && make dist` BUILD/compile-pentium64-max: Fix a build problem with BUILD/compile-pentium64-max on CentOS/Fedora Core 10 amd64. On these systems, there is libz.so but no libz.a. Finding libz.so, ./configure decides to use system zlib. But since BUILD/compile-pentium64-max builds a fully static binary with -all-static, the link of mysqld fails due to missing libz.a. Fix by using bundled zlib in the build script. BUILD/compile-solaris-sparc-purify: Rename purify -> valgrind. include/m_string.h: Rename purify -> valgrind. include/my_global.h: Rename purify -> valgrind. mysql-test/Makefile.am: Fix that PBXT test suite was missing from `make dist` source tarball. mysql-test/lib/mtr_unique.pm: Better fix to avoid races when chmod'ing the semaphore file. (Though using chmod 666 shared files in /tmp/ is still not a very good solution). mysql-test/t/pool_of_threads.cnf: Fix that test case pool_of_threads fails if run on mysqld with no --with-libevent support. mysys/mf_qsort.c: Rename purify -> valgrind. mysys/my_alloc.c: Rename purify -> valgrind. mysys/my_init.c: Rename purify -> valgrind. mysys/my_rnd.c: Rename purify -> valgrind. mysys/safemalloc.c: Rename purify -> valgrind. scripts/mysql_config.pl.in: Rename purify -> valgrind. scripts/mysql_config.sh: Rename purify -> valgrind. sql/field_conv.cc: Rename purify -> valgrind. sql/filesort.cc: Rename purify -> valgrind. sql/ha_partition.cc: Rename purify -> valgrind. sql/hostname.cc: Rename purify -> valgrind. sql/item_timefunc.cc: Rename purify -> valgrind. sql/log_event.cc: Rename purify -> valgrind. sql/log_event_old.cc: Rename purify -> valgrind. sql/my_decimal.h: Rename purify -> valgrind. sql/mysqld.cc: Rename purify -> valgrind. Fix redundant IF_VALGRIND declaration. sql/opt_range.cc: Rename purify -> valgrind. sql/opt_range.h: Rename purify -> valgrind. sql/records.cc: Rename purify -> valgrind. sql/rpl_rli.cc: Rename purify -> valgrind. sql/rpl_rli.h: Rename purify -> valgrind. sql/set_var.cc: Fix missing static declaration on pool_of_threads. sql/slave.cc: Rename purify -> valgrind. sql/sql_base.cc: Rename purify -> valgrind. sql/sql_binlog.cc: Rename purify -> valgrind. sql/sql_class.cc: Rename purify -> valgrind. sql/sql_list.h: Rename purify -> valgrind. sql/sql_load.cc: Rename purify -> valgrind. sql/sql_select.cc: Rename purify -> valgrind. sql/table.cc: Rename purify -> valgrind. storage/archive/azio.c: Rename purify -> valgrind. storage/innobase/buf/buf0buf.c: Rename purify -> valgrind. storage/innobase/include/univ.i: Rename purify -> valgrind. storage/innobase/srv/srv0start.c: Rename purify -> valgrind. storage/maria/ha_maria.cc: Rename purify -> valgrind. storage/maria/ma_blockrec.c: Rename purify -> valgrind. storage/maria/ma_check.c: Rename purify -> valgrind. storage/maria/ma_loghandler.c: Rename purify -> valgrind. storage/maria/ma_packrec.c: Rename purify -> valgrind. storage/maria/ma_page.c: Rename purify -> valgrind. storage/maria/ma_pagecrc.c: Rename purify -> valgrind. storage/maria/ma_search.c: Rename purify -> valgrind. storage/myisam/mi_check.c: Rename purify -> valgrind. storage/myisam/mi_page.c: Rename purify -> valgrind. storage/myisam/mi_search.c: Rename purify -> valgrind. storage/myisammrg/ha_myisammrg.cc: Rename purify -> valgrind. strings/bcmp.c: Rename purify -> valgrind. strings/decimal.c: Rename purify -> valgrind. strings/strmake.c: Rename purify -> valgrind. --- mysys/mf_qsort.c | 2 +- mysys/my_alloc.c | 6 +++--- mysys/my_init.c | 2 +- mysys/my_rnd.c | 2 +- mysys/safemalloc.c | 2 +- 5 files changed, 7 insertions(+), 7 deletions(-) (limited to 'mysys') diff --git a/mysys/mf_qsort.c b/mysys/mf_qsort.c index 4b3ecb603a6..9e1ee2782a4 100644 --- a/mysys/mf_qsort.c +++ b/mysys/mf_qsort.c @@ -108,7 +108,7 @@ qsort_t my_qsort(void *base_ptr, size_t count, size_t size, qsort_cmp cmp) low = (char*) base_ptr; high = low+ size * (count - 1); stack_ptr = stack + 1; -#ifdef HAVE_purify +#ifdef HAVE_valgrind /* The first element in the stack will be accessed for the last POP */ stack[0].low=stack[0].high=0; #endif diff --git a/mysys/my_alloc.c b/mysys/my_alloc.c index 2607ea57d08..32fc75fc692 100644 --- a/mysys/my_alloc.c +++ b/mysys/my_alloc.c @@ -56,7 +56,7 @@ void init_alloc_root(MEM_ROOT *mem_root, size_t block_size, mem_root->block_num= 4; /* We shift this with >>2 */ mem_root->first_block_usage= 0; -#if !(defined(HAVE_purify) && defined(EXTRA_DEBUG)) +#if !(defined(HAVE_valgrind) && defined(EXTRA_DEBUG)) if (pre_alloc_size) { if ((mem_root->free= mem_root->pre_alloc= @@ -96,7 +96,7 @@ void reset_root_defaults(MEM_ROOT *mem_root, size_t block_size, DBUG_ASSERT(alloc_root_inited(mem_root)); mem_root->block_size= block_size - ALLOC_ROOT_MIN_BLOCK_SIZE; -#if !(defined(HAVE_purify) && defined(EXTRA_DEBUG)) +#if !(defined(HAVE_valgrind) && defined(EXTRA_DEBUG)) if (pre_alloc_size) { size_t size= pre_alloc_size + ALIGN_SIZE(sizeof(USED_MEM)); @@ -147,7 +147,7 @@ void reset_root_defaults(MEM_ROOT *mem_root, size_t block_size, void *alloc_root(MEM_ROOT *mem_root, size_t length) { -#if defined(HAVE_purify) && defined(EXTRA_DEBUG) +#if defined(HAVE_valgrind) && defined(EXTRA_DEBUG) reg1 USED_MEM *next; DBUG_ENTER("alloc_root"); DBUG_PRINT("enter",("root: 0x%lx", (long) mem_root)); diff --git a/mysys/my_init.c b/mysys/my_init.c index 0560d64d9d8..4a658da6d58 100644 --- a/mysys/my_init.c +++ b/mysys/my_init.c @@ -170,7 +170,7 @@ void my_end(int infoflag) { #ifdef HAVE_GETRUSAGE struct rusage rus; -#ifdef HAVE_purify +#ifdef HAVE_valgrind /* Purify assumes that rus is uninitialized after getrusage call */ bzero((char*) &rus, sizeof(rus)); #endif diff --git a/mysys/my_rnd.c b/mysys/my_rnd.c index b7dca0f2afd..178bcd9c539 100644 --- a/mysys/my_rnd.c +++ b/mysys/my_rnd.c @@ -26,7 +26,7 @@ void my_rnd_init(struct my_rnd_struct *rand_st, ulong seed1, ulong seed2) { -#ifdef HAVE_purify +#ifdef HAVE_valgrind bzero((char*) rand_st,sizeof(*rand_st)); /* Avoid UMC varnings */ #endif rand_st->max_value= 0x3FFFFFFFL; diff --git a/mysys/safemalloc.c b/mysys/safemalloc.c index 59bc4e73af7..ae8bf45b807 100644 --- a/mysys/safemalloc.c +++ b/mysys/safemalloc.c @@ -304,7 +304,7 @@ void _myfree(void *ptr, const char *filename, uint lineno, myf myflags) sf_malloc_count--; pthread_mutex_unlock(&THR_LOCK_malloc); -#ifndef HAVE_purify +#ifndef HAVE_valgrind /* Mark this data as free'ed */ if (!sf_malloc_quick) bfill(ptr, irem->datasize, (pchar) FREE_VAL); -- cgit v1.2.1 From ae134314b8cc35531472cf8af999fec464ad8efa Mon Sep 17 00:00:00 2001 From: unknown Date: Wed, 20 May 2009 17:34:34 +0200 Subject: Fix accessing ulong enum option as uint, failing on 64-bit big-endian. --- mysys/my_getopt.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'mysys') diff --git a/mysys/my_getopt.c b/mysys/my_getopt.c index da7e997d629..0de80b01c4f 100644 --- a/mysys/my_getopt.c +++ b/mysys/my_getopt.c @@ -647,7 +647,7 @@ static int setval(const struct my_option *opts, uchar* *value, char *argument, return EXIT_OUT_OF_MEMORY; break; case GET_ENUM: - if (((*(int*)result_pos)= find_type(argument, opts->typelib, 2) - 1) < 0) + if (((*(ulong *)result_pos)= find_type(argument, opts->typelib, 2) - 1) < 0) return EXIT_ARGUMENT_INVALID; break; case GET_SET: @@ -983,7 +983,7 @@ static void init_one_value(const struct my_option *option, uchar* *variable, *((int*) variable)= (int) getopt_ll_limit_value((int) value, option, NULL); break; case GET_ENUM: - *((uint*) variable)= (uint) value; + *((ulong*) variable)= (uint) value; break; case GET_UINT: *((uint*) variable)= (uint) getopt_ull_limit_value((uint) value, option, NULL); @@ -1221,7 +1221,7 @@ void my_print_variables(const struct my_option *options) } break; case GET_ENUM: - printf("%s\n", get_type(optp->typelib, *(uint*) value)); + printf("%s\n", get_type(optp->typelib, *(ulong*) value)); break; case GET_STR: case GET_STR_ALLOC: /* fall through */ -- cgit v1.2.1 From 48083d73d3dc6b1c678dbb6df3b49f420cce84ad Mon Sep 17 00:00:00 2001 From: unknown Date: Fri, 22 May 2009 14:38:50 +0200 Subject: After-merge fixes for problems seen in buildbot after merging MySQL-5.1.35. - Version number. - Valgrind false alarms in libz. - New variant of suppression for Valgrind warning in dlclose(). - Fix double free() in plugin init error case. configure.in: Fix version number. We should reset the maria variant back to `1' when the MySQL version number increases. include/my_sys.h: Fix false alarms in Valgrind for zlib. Apply same fix as for archive storage handler also to the cases of compression in the client protocol, and to the compression SQL function. mysql-test/valgrind.supp: A new variant of the dlclose() suppression is needed now. mysys/my_compress.c: Fix false alarms in Valgrind for zlib. Apply same fix as for archive storage handler also to the cases of compression in the client protocol, and to the compression SQL function. sql/handler.cc: Fix a double free() in error case for plugin initialisation. sql/item_strfunc.cc: Fix false alarms in Valgrind for zlib. Apply same fix as for archive storage handler also to the cases of compression in the client protocol, and to the compression SQL function. --- mysys/my_compress.c | 74 ++++++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 70 insertions(+), 4 deletions(-) (limited to 'mysys') diff --git a/mysys/my_compress.c b/mysys/my_compress.c index 45c4ab983cc..26626d70079 100644 --- a/mysys/my_compress.c +++ b/mysys/my_compress.c @@ -57,19 +57,85 @@ my_bool my_compress(uchar *packet, size_t *len, size_t *complen) } +/* + Valgrind normally gives false alarms for zlib operations, in the form of + "conditional jump depends on uninitialised values" etc. The reason is + explained in the zlib FAQ (http://www.zlib.net/zlib_faq.html#faq36): + + "That is intentional for performance reasons, and the output of deflate + is not affected." + + Also discussed on a blog + (http://www.sirena.org.uk/log/2006/02/19/zlib-generating-valgrind-warnings/): + + "...loop unrolling in the zlib library causes the mentioned + “Conditional jump or move depends on uninitialised value(s)” + warnings. These are safe since the results of the comparison are + subsequently ignored..." + + "the results of the calculations are discarded by bounds checking done + after the loop exits" + + Fix by initializing the memory allocated by zlib when running under Valgrind. + + This fix is safe, since such memory is only used internally by zlib, so we + will not hide any bugs in mysql this way. +*/ +void *my_az_allocator(void *dummy, unsigned int items, unsigned int size) +{ + return my_malloc((size_t)items*(size_t)size, IF_VALGRIND(MY_ZEROFILL, MYF(0))); +} + +void my_az_free(void *dummy, void *address) +{ + my_free(address, MYF(MY_ALLOW_ZERO_PTR)); +} + +/* + This works like zlib compress(), but using custom memory allocators to work + better with my_malloc leak detection and Valgrind. +*/ +int my_compress_buffer(uchar *dest, size_t *destLen, + const uchar *source, size_t sourceLen) +{ + z_stream stream; + int err; + + stream.next_in = (Bytef*)source; + stream.avail_in = (uInt)sourceLen; + stream.next_out = (Bytef*)dest; + stream.avail_out = (uInt)*destLen; + if ((size_t)stream.avail_out != *destLen) + return Z_BUF_ERROR; + + stream.zalloc = (alloc_func)my_az_allocator; + stream.zfree = (free_func)my_az_free; + stream.opaque = (voidpf)0; + + err = deflateInit(&stream, Z_DEFAULT_COMPRESSION); + if (err != Z_OK) return err; + + err = deflate(&stream, Z_FINISH); + if (err != Z_STREAM_END) { + deflateEnd(&stream); + return err == Z_OK ? Z_BUF_ERROR : err; + } + *destLen = stream.total_out; + + err = deflateEnd(&stream); + return err; +} + uchar *my_compress_alloc(const uchar *packet, size_t *len, size_t *complen) { uchar *compbuf; - uLongf tmp_complen; int res; *complen= *len * 120 / 100 + 12; if (!(compbuf= (uchar *) my_malloc(*complen, MYF(MY_WME)))) return 0; /* Not enough memory */ - tmp_complen= (uLongf) *complen; - res= compress((Bytef*) compbuf, &tmp_complen, (Bytef*) packet, (uLong) *len); - *complen= tmp_complen; + res= my_compress_buffer(compbuf, complen, packet, *len); if (res != Z_OK) { -- cgit v1.2.1 From bb9a3f0c2b46491ec3234f8a9df8612f88469b90 Mon Sep 17 00:00:00 2001 From: unknown Date: Tue, 9 Jun 2009 17:08:46 +0200 Subject: XtraDB after-merge fixes. Fixes to get the test suite to run without failures. mysql-test/r/information_schema.result: Additional variables available now. Sort output to avoid depending on engine order. mysql-test/r/information_schema_all_engines.result: More variables now. mysql-test/r/innodb-autoinc.result: Avoid picking up pbxt variables in result mysql-test/r/innodb-index.result: Save state to not corrupt following testcases. Suppress an expected warning. mysql-test/r/innodb-zip.result: Work around a problem with dependency on zlib version mysql-test/r/innodb.result: Checksums have changed in Maria. Save and restore server state to not corrupt following testcases. mysql-test/r/innodb_bug36169.result: Save and restore server state to not corrupt following testcases. mysql-test/r/innodb_xtradb_bug317074.result: Save and restore server state to not corrupt following testcases. mysql-test/r/row-checksum-old.result: Update result file mysql-test/r/row-checksum.result: Update result file mysql-test/t/information_schema.test: Sort output to avoid depending on engine order. mysql-test/t/innodb-analyze.test: Save and restore server state to not corrupt following testcases. mysql-test/t/innodb-autoinc.test: Save and restore server state to not corrupt following testcases. mysql-test/t/innodb-index.test: Save state to not corrupt following testcases. Suppress an expected warning. mysql-test/t/innodb-zip.test: Work around a problem with dependency on zlib version mysql-test/t/innodb.test: Save and restore server state to not corrupt following testcases. Update --replace statements for new mysql-test-run mysql-test/t/innodb_bug34300.test: Save and restore server state to not corrupt following testcases. mysql-test/t/innodb_bug36169.test: Save and restore server state to not corrupt following testcases. mysql-test/t/innodb_bug36172.test: Save and restore server state to not corrupt following testcases. mysql-test/t/innodb_xtradb_bug317074.test: Save and restore server state to not corrupt following testcases. mysql-test/t/partition_innodb.test: Fix regexps to work with new SHOW INNODB STATUS output. mysys/thr_mutex.c: Initialize mutex deadlock detection lazily. This allows to test XtraDB, which initializes huge amounts of mutexes without using any but a few of them. storage/xtradb/ibuf/ibuf0ibuf.c: Fix problem where value of INNODB_IBUF_MAX_SIZE would depend on the alignment of memory allocated by the buffer pool. storage/xtradb/include/sync0rw.h: Fix XtraDB to compile without GCC atomic operation intrinsics (performance may suffer when they are not available though). storage/xtradb/include/sync0rw.ic: Fix XtraDB to compile without GCC atomic operation intrinsics (performance may suffer when they are not available though). storage/xtradb/include/univ.i: Fix for MariaDB storage/xtradb/setup.sh: Remove no longer needed file from XtraDB. storage/xtradb/srv/srv0start.c: Fix for MariaDB --- mysys/thr_mutex.c | 63 +++++++++++++++++++++++++++++-------------------------- 1 file changed, 33 insertions(+), 30 deletions(-) (limited to 'mysys') diff --git a/mysys/thr_mutex.c b/mysys/thr_mutex.c index 80f21e53473..c46b68761db 100644 --- a/mysys/thr_mutex.c +++ b/mysys/thr_mutex.c @@ -149,6 +149,35 @@ static inline void remove_from_active_list(safe_mutex_t *mp) mp->prev= mp->next= 0; } +/* + We initialise the hashes for deadlock detection lazily. + This greatly helps with performance when lots of mutexes are initiased but + only a few of them are actually used (eg. XtraDB). +*/ +static int safe_mutex_lazy_init_deadlock_detection(safe_mutex_t *mp) +{ + if (!my_multi_malloc(MY_FAE | MY_WME, + &mp->locked_mutex, sizeof(*mp->locked_mutex), + &mp->used_mutex, sizeof(*mp->used_mutex), NullS)) + { + return 1; /* Error */ + } + + pthread_mutex_lock(&THR_LOCK_mutex); + mp->id= ++safe_mutex_id; + pthread_mutex_unlock(&THR_LOCK_mutex); + hash_init(mp->locked_mutex, &my_charset_bin, + 1000, + offsetof(safe_mutex_deadlock_t, id), + sizeof(mp->id), + 0, 0, HASH_UNIQUE); + hash_init(mp->used_mutex, &my_charset_bin, + 1000, + offsetof(safe_mutex_t, id), + sizeof(mp->id), + 0, 0, HASH_UNIQUE); + return 0; +} int safe_mutex_init(safe_mutex_t *mp, const pthread_mutexattr_t *attr __attribute__((unused)), @@ -167,35 +196,8 @@ int safe_mutex_init(safe_mutex_t *mp, mp->line= line; /* Skip the very common '&' prefix from the autogenerated name */ mp->name= name[0] == '&' ? name + 1 : name; + /* Deadlock detection is initialised only lazily, on first use. */ - if (safe_mutex_deadlock_detector && !( my_flags & MYF_NO_DEADLOCK_DETECTION)) - { - if (!my_multi_malloc(MY_FAE | MY_WME, - &mp->locked_mutex, sizeof(*mp->locked_mutex), - &mp->used_mutex, sizeof(*mp->used_mutex), NullS)) - { - /* Disable deadlock handling for this mutex */ - my_flags|= MYF_NO_DEADLOCK_DETECTION; - } - else - { - pthread_mutex_lock(&THR_LOCK_mutex); - mp->id= ++safe_mutex_id; - pthread_mutex_unlock(&THR_LOCK_mutex); - hash_init(mp->locked_mutex, &my_charset_bin, - 1000, - offsetof(safe_mutex_deadlock_t, id), - sizeof(mp->id), - 0, 0, HASH_UNIQUE); - hash_init(mp->used_mutex, &my_charset_bin, - 1000, - offsetof(safe_mutex_t, id), - sizeof(mp->id), - 0, 0, HASH_UNIQUE); - } - } - else - my_flags|= MYF_NO_DEADLOCK_DETECTION; mp->create_flags= my_flags; #ifdef SAFE_MUTEX_DETECT_DESTROY @@ -310,7 +312,8 @@ int safe_mutex_lock(safe_mutex_t *mp, myf my_flags, const char *file, /* Deadlock detection */ mp->prev= mp->next= 0; - if (!(mp->active_flags & (MYF_TRY_LOCK | MYF_NO_DEADLOCK_DETECTION))) + if (!(mp->active_flags & (MYF_TRY_LOCK | MYF_NO_DEADLOCK_DETECTION)) && + (mp->used_mutex != NULL || !safe_mutex_lazy_init_deadlock_detection(mp))) { safe_mutex_t **mutex_in_use= my_thread_var_mutex_in_use(); @@ -643,7 +646,7 @@ int safe_mutex_destroy(safe_mutex_t *mp, const char *file, uint line) void safe_mutex_free_deadlock_data(safe_mutex_t *mp) { /* Free all entries that points to this one */ - if (!(mp->create_flags & MYF_NO_DEADLOCK_DETECTION)) + if (!(mp->create_flags & MYF_NO_DEADLOCK_DETECTION) && mp->used_mutex != NULL) { pthread_mutex_lock(&THR_LOCK_mutex); my_hash_iterate(mp->used_mutex, -- cgit v1.2.1 From 7c5e321bb8a4ac0a1683447434a5cfe06452a0f9 Mon Sep 17 00:00:00 2001 From: unknown Date: Mon, 22 Jun 2009 10:06:35 +0200 Subject: More XtraDB after-merge fixes following review and buildbot runs: - Better fix for --innodb-use-sys-malloc causing Valgrind warnings. - Different fix for INNODB_IBUF_MAX_SIZE variable changing default value. - Fix some problems with the safe mutex lazy init patch. mysql-test/include/mtr_check.sql: Do not check INNODB_IBUF_MAX_SIZE for changes. It is not a dynamic variable, so cannot be changed by a test case anyway, and the value may vary slightly from one start of the server to the next. mysql-test/lib/mtr_cases.pm: Even just starting and stopping the server with --innodb-use-sys-malloc to check for disabled test case under valgrind will cause valgrind leak warnings. So add not_valgrind to the list of conditions also tested for directly in mysql-test-run.pl. mysql-test/mysql-test-run.pl: Even just starting and stopping the server with --innodb-use-sys-malloc to check for disabled test case under valgrind will cause valgrind leak warnings. So add not_valgrind to the list of conditions also tested for directly in mysql-test-run.pl. mysys/thr_mutex.c: Fix a few problems found during review of the lazy init safe mutex patch. storage/xtradb/ibuf/ibuf0ibuf.c: Revert previous fix of INNODB_IBUF_MAX_SIZE default varying slightly between server starts. (Fixed instead by ignoring that variable in the test suite). --- mysys/thr_mutex.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'mysys') diff --git a/mysys/thr_mutex.c b/mysys/thr_mutex.c index c46b68761db..b5abf987461 100644 --- a/mysys/thr_mutex.c +++ b/mysys/thr_mutex.c @@ -160,6 +160,9 @@ static int safe_mutex_lazy_init_deadlock_detection(safe_mutex_t *mp) &mp->locked_mutex, sizeof(*mp->locked_mutex), &mp->used_mutex, sizeof(*mp->used_mutex), NullS)) { + /* Disable deadlock handling for this mutex */ + mp->create_flags|= MYF_NO_DEADLOCK_DETECTION; + mp->active_flags|= MYF_NO_DEADLOCK_DETECTION; return 1; /* Error */ } @@ -196,6 +199,9 @@ int safe_mutex_init(safe_mutex_t *mp, mp->line= line; /* Skip the very common '&' prefix from the autogenerated name */ mp->name= name[0] == '&' ? name + 1 : name; + + if (!safe_mutex_deadlock_detector) + my_flags|= MYF_NO_DEADLOCK_DETECTION; /* Deadlock detection is initialised only lazily, on first use. */ mp->create_flags= my_flags; -- cgit v1.2.1 From 9db357e2bfebf9207a507c4a2244499899a960a2 Mon Sep 17 00:00:00 2001 From: Michael Widenius Date: Thu, 2 Jul 2009 13:15:33 +0300 Subject: Added MY_CS_NONASCII marker for character sets that are not compatible with latin1 for characters 0x00-0x7f This allows us to skip and speed up some very common character converts that MySQL is doing when sending data to the client and this gives us a nice speed increase for most queries that uses only characters in the range 0x00-0x7f. This code is based on Alexander Barkov's code that he has done in MySQL 6.0 include/m_ctype.h: Added MY_CS_NONASCII marker libmysqld/lib_sql.cc: Added function net_store_data(...) that takes to and from CHARSET_INFO * as arguments mysys/charset.c: Mark character sets with MY_CS_NONASCII scripts/mysql_install_db.sh: Fixed messages to refer to MariaDB instead of MySQL sql/protocol.cc: Added function net_store_data(...) that takes to and from CHARSET_INFO * as arguments sql/protocol.h: Added function net_store_data(...) that takes to and from CHARSET_INFO * as arguments sql/sql_string.cc: Quicker copy of strings with no characters above 0x7f strings/conf_to_src.c: Added printing of MY_CS_NONASCII strings/ctype-extra.c: Mark incompatible character sets with MY_CS_NONASCII Removed duplicated character set geostd strings/ctype-sjis.c: Mark incompatible character sets with MY_CS_NONASCII strings/ctype-uca.c: Mark incompatible character sets with MY_CS_NONASCII strings/ctype-ucs2.c: Mark incompatible character sets with MY_CS_NONASCII strings/ctype-utf8.c: Mark incompatible character sets with MY_CS_NONASCII strings/ctype.c: Added function to check if character set is compatible with latin1 in ranges 0x00-0x7f --- mysys/charset.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'mysys') diff --git a/mysys/charset.c b/mysys/charset.c index e61995de1d8..933694477fa 100644 --- a/mysys/charset.c +++ b/mysys/charset.c @@ -248,6 +248,7 @@ static int add_collation(CHARSET_INFO *cs) { #if defined(HAVE_CHARSET_ucs2) && defined(HAVE_UCA_COLLATIONS) copy_uca_collation(newcs, &my_charset_ucs2_unicode_ci); + newcs->state|= MY_CS_AVAILABLE | MY_CS_LOADED | MY_CS_NONASCII; #endif } else if (!strcmp(cs->csname, "utf8")) @@ -280,6 +281,8 @@ static int add_collation(CHARSET_INFO *cs) if (my_charset_is_8bit_pure_ascii(all_charsets[cs->number])) all_charsets[cs->number]->state|= MY_CS_PUREASCII; + if (!my_charset_is_ascii_compatible(cs)) + all_charsets[cs->number]->state|= MY_CS_NONASCII; } } else -- cgit v1.2.1 From 03db11cfdaabc27b57de342eb4974195745f90d6 Mon Sep 17 00:00:00 2001 From: unknown Date: Thu, 3 Sep 2009 15:05:02 +0200 Subject: MBug#423035: error in parsing enum value for plugin variable in mysqld command-line option Fix parsing of invalid plugin enum option value. Previous patch to fix plugin enum option parsing on big-endian introduced another bug due to incorrect comparison of unsigned value. This would cause an incorrect value to be parsed as value 0. See also MySQL Bug#41010 and Bug#32034. mysql-test/mysql-test-run.pl: Add a facility for test case to run the mysqld binary (to test that invalid startup options are rejected correctly). mysql-test/r/mysqld_option_err.result: Add a test case to check that invalid startup options for mysqld are rejected. This is needed to test MBug#423035. Also add a few other similar tests, as this was completely untested before this patch. mysql-test/t/mysqld_option_err.test: Add a test case to check that invalid startup options for mysqld are rejected. This is needed to test MBug#423035. Also add a few other similar tests, as this was completely untested before this patch. mysys/my_getopt.c: Fix parsing of invalid plugin enum option value. --- mysys/my_getopt.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'mysys') diff --git a/mysys/my_getopt.c b/mysys/my_getopt.c index 0de80b01c4f..d44ec162b93 100644 --- a/mysys/my_getopt.c +++ b/mysys/my_getopt.c @@ -603,6 +603,7 @@ static int setval(const struct my_option *opts, uchar* *value, char *argument, my_bool set_maximum_value) { int err= 0; + int pos; if (value && argument) { @@ -647,7 +648,9 @@ static int setval(const struct my_option *opts, uchar* *value, char *argument, return EXIT_OUT_OF_MEMORY; break; case GET_ENUM: - if (((*(ulong *)result_pos)= find_type(argument, opts->typelib, 2) - 1) < 0) + pos= find_type(argument, opts->typelib, 2) - 1; + (*(ulong *)result_pos)= pos; + if (pos < 0) return EXIT_ARGUMENT_INVALID; break; case GET_SET: -- cgit v1.2.1 From 592379fc9592821b4acb65f3694b517f22621af4 Mon Sep 17 00:00:00 2001 From: unknown Date: Thu, 3 Sep 2009 15:20:22 +0200 Subject: Fix most Compiler warnings seen in buildbot. Add suppressions for a few warnings that cannot be meaningfully fixed by MariaDB developers. Changes for XtraDB, PBXT, and YaSSL also submitted upstream. Also add a `ccfilter` wrapper that can be used to filter out suppressed warnings in a local build (to check that new warnings are not introduced). client/mysqlbinlog.cc: Fix compiler warnings. config/ac-macros/misc.m4: Fix wrong naming, autoconfig requires _cv_ in cached names. extra/yassl/include/yassl_int.hpp: Fix compiler warnings. extra/yassl/src/handshake.cpp: Fix compiler warnings. extra/yassl/src/yassl_imp.cpp: Fix compiler warnings. extra/yassl/src/yassl_int.cpp: Fix compiler warnings. extra/yassl/taocrypt/include/modes.hpp: Fix compiler warnings. extra/yassl/taocrypt/src/asn.cpp: Fix compiler warnings. mysys/my_compress.c: Fix compiler warnings. sql/mysqld.cc: Fix compiler warnings. sql/strfunc.cc: Fix compiler warnings. storage/pbxt/src/discover_xt.cc: Fix compiler warnings. storage/xtradb/fil/fil0fil.c: Fix compiler warnings. storage/xtradb/mtr/mtr0mtr.c: Fix compiler warnings. storage/xtradb/srv/srv0srv.c: Fix compiler warnings. storage/xtradb/srv/srv0start.c: Fix compiler warnings. strings/decimal.c: Fix compiler warnings. support-files/ccfilter: Add helper for suppressing compiler warnings in local developer source tree. Allows to check for not introducing new warnings into Buildbot without having to actually run the build through Buildbot. support-files/compiler_warnings.supp: Suppress a few warnings that cannot be meaningfully fixed in source code. --- mysys/my_compress.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'mysys') diff --git a/mysys/my_compress.c b/mysys/my_compress.c index 26626d70079..ade2742c4fc 100644 --- a/mysys/my_compress.c +++ b/mysys/my_compress.c @@ -81,12 +81,13 @@ my_bool my_compress(uchar *packet, size_t *len, size_t *complen) This fix is safe, since such memory is only used internally by zlib, so we will not hide any bugs in mysql this way. */ -void *my_az_allocator(void *dummy, unsigned int items, unsigned int size) +void *my_az_allocator(void *dummy __attribute__((unused)), unsigned int items, + unsigned int size) { return my_malloc((size_t)items*(size_t)size, IF_VALGRIND(MY_ZEROFILL, MYF(0))); } -void my_az_free(void *dummy, void *address) +void my_az_free(void *dummy __attribute__((unused)), void *address) { my_free(address, MYF(MY_ALLOW_ZERO_PTR)); } -- cgit v1.2.1 From 829c6099b7e0a9eb689456db4160362d1e1c8011 Mon Sep 17 00:00:00 2001 From: unknown Date: Fri, 11 Sep 2009 15:20:03 +0200 Subject: After-merge fix for MySQL 5.1.38 merge into MariaDB. Due to a bugfix for enum options in MariaDB, my_getopt parses enums into an ulong. However, some new code from MySQL was written to assume enums take an uint. Fix by using the correct type. (The new MySQL code in addition had an implicit assumption that my_bool and uint were compatible; remove this assumption). --- mysys/my_getopt.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'mysys') diff --git a/mysys/my_getopt.c b/mysys/my_getopt.c index e57c1d71a13..ceb99975cdb 100644 --- a/mysys/my_getopt.c +++ b/mysys/my_getopt.c @@ -414,11 +414,17 @@ invalid value '%s'", (optp->var_type & GET_TYPE_MASK) == GET_ENUM)) { if (optend == disabled_my_option) - *((my_bool*) value)= (my_bool) 0; + if ((optp->var_type & GET_TYPE_MASK) == GET_BOOL) + *((my_bool*) value)= (my_bool) 0; + else + *((ulong*) value)= (ulong) 0; else { if (!optend) /* No argument -> enable option */ - *((my_bool*) value)= (my_bool) 1; + if ((optp->var_type & GET_TYPE_MASK) == GET_BOOL) + *((my_bool*) value)= (my_bool) 1; + else + *((ulong*) value)= (ulong) 1; else argument= optend; } -- cgit v1.2.1 From e8d7e27fedf9db7e9143fe8eb2b6560696a1fea8 Mon Sep 17 00:00:00 2001 From: unknown Date: Tue, 15 Sep 2009 14:53:07 +0200 Subject: More after-merge fixes for merging MySQL 5.1.38 into MariaDB. mysql-test/t/mysqldump.test: Make test case work when build directory is not world readable (this is the case for Buildbot checkouts). mysys/my_getopt.c: Restore bugfix which was lost in previous merge. storage/xtradb/buf/buf0flu.c: Fix extranous line caused by bad merge. --- mysys/my_getopt.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'mysys') diff --git a/mysys/my_getopt.c b/mysys/my_getopt.c index 46e07fda32e..5a06b18d4b8 100644 --- a/mysys/my_getopt.c +++ b/mysys/my_getopt.c @@ -656,8 +656,9 @@ static int setval(const struct my_option *opts, uchar* *value, char *argument, return EXIT_OUT_OF_MEMORY; break; case GET_ENUM: - if (((*(ulong *)result_pos)= - find_type(argument, opts->typelib, 2) - 1) < 0) + pos= find_type(argument, opts->typelib, 2) - 1; + (*(ulong *)result_pos)= pos; + if (pos < 0) { /* Accept an integer representation of the enumerated item. -- cgit v1.2.1 From cbaa5aaf47736f582faf8a9ed32eae285608c9fa Mon Sep 17 00:00:00 2001 From: Peter Lieverdink Date: Thu, 1 Oct 2009 09:40:51 +1000 Subject: Typo fixes for "usefull" -> "useful". --- mysys/safemalloc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'mysys') diff --git a/mysys/safemalloc.c b/mysys/safemalloc.c index a69051e6674..9b91f15c4aa 100644 --- a/mysys/safemalloc.c +++ b/mysys/safemalloc.c @@ -436,7 +436,7 @@ void TERMINATE(FILE *file, uint flag) /* Report where a piece of memory was allocated - This is usefull to call from withing a debugger + This is useful to call from withing a debugger */ void sf_malloc_report_allocated(void *memory) -- cgit v1.2.1 From 51186f1d3d38d4fbd4e9f0ff1a67c78ea9920fbf Mon Sep 17 00:00:00 2001 From: unknown Date: Fri, 16 Oct 2009 17:01:36 +0200 Subject: When running with --skip-safemalloc, still do some basic, but cheap, overrun checks. This greatly helps with eg. slow hosts in Buildbot. --- mysys/safemalloc.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'mysys') diff --git a/mysys/safemalloc.c b/mysys/safemalloc.c index a69051e6674..627e860fac8 100644 --- a/mysys/safemalloc.c +++ b/mysys/safemalloc.c @@ -272,6 +272,9 @@ void _myfree(void *ptr, const char *filename, uint lineno, myf myflags) irem= (struct st_irem *) ((char*) ptr- ALIGN_SIZE(sizeof(struct st_irem))- sf_malloc_prehunc); + if (sf_malloc_quick) + (void) _checkchunk(irem, filename, lineno); + /* Check to make sure that we have a real remember structure. Note: this test could fail for four reasons: -- cgit v1.2.1 From 52cb2344606f1f0c151fc612862820c0863cf90e Mon Sep 17 00:00:00 2001 From: Sergey Petrunya Date: Fri, 16 Oct 2009 19:44:58 +0400 Subject: MBUG#452116: MariaDB: mysql_install_db causes server segfault - Increase thread_stack_size on 64-bit platforms to 240K, so that it can accomodate the HA_CHECK structure which is 130K. --- mysys/my_init.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'mysys') diff --git a/mysys/my_init.c b/mysys/my_init.c index 0e1a8c9a4aa..e7ab9ba7a1f 100644 --- a/mysys/my_init.c +++ b/mysys/my_init.c @@ -42,7 +42,8 @@ static void netware_init(); my_bool my_init_done= 0; uint mysys_usage_id= 0; /* Incremented for each my_init() */ -ulong my_thread_stack_size= 65536; + +ulong my_thread_stack_size= (sizeof(void*) <= 4)? 65536: ((256-16)*1024); static ulong atoi_octal(const char *str) { -- cgit v1.2.1 From ab0905c6d7041928b260adb60ff551275e8153bc Mon Sep 17 00:00:00 2001 From: Michael Widenius Date: Mon, 19 Oct 2009 20:14:48 +0300 Subject: This is based on the userstatv2 patch from Percona and OurDelta. The original code comes, as far as I know, from Google (Mark Callaghan's team) with additional work from Percona, Ourdelta and Weldon Whipple. This code provides the same functionallity, but with a lot of changes to make it faster and better fit the MariaDB infrastucture. Added new status variables: - Com_show_client_statistics, Com_show_index_statistics, Com_show_table_statistics, Com_show_user_statistics - Access_denied_errors, Busy_time (clock time), Binlog_bytes_written, Cpu_time, Empty_queries, Rows_sent, Rows_read Added new variable / startup option 'userstat' to control if user statistics should be enabled or not Added my_getcputime(); Returns cpu time used by this thread. New FLUSH commands: - FLUSH SLOW QUERY LOG - FLUSH TABLE_STATISTICS - FLUSH INDEX_STATISTICS - FLUSH USER_STATISTICS - FLUSH CLIENT_STATISTICS New SHOW commands: - SHOW CLIENT_STATISTICS - SHOW USER_STATISTICS - SHOW TABLE_STATISTICS - SHOW INDEX_STATISTICS New Information schemas: - CLIENT_STATISTICS - USER_STATISTICS - INDEX_STATISTICS - TABLE_STATISTICS Added support for all new flush commands to mysqladmin Added handler::ha_... wrappers for all handler read calls to do statistics counting - Changed all code to use new ha_... calls - Count number of read rows, changed rows and rows read trough an index Added counting of number of bytes sent to binary log (status variable Binlog_bytes_written) Added counting of access denied errors (status variable Access_denied_erors) Bugs fixed: - Fixed bug in add_to_status() and add_diff_to_status() where longlong variables where threated as long - CLOCK_GETTIME was not propely working on Linuxm client/mysqladmin.cc: Added support for all new flush commmands and some common combinations: flush-slow-log flush-table-statistics flush-index-statistics flush-user-statistics flush-client-statistics flush-all-status flush-all-statistics configure.in: Added checking if clock_gettime needs the librt. (Fixes Bug #37639 clock_gettime is never used/enabled in Linux/Unix) include/my_sys.h: Added my_getcputime() include/mysql_com.h: Added LIST_PROCESS_HOST_LEN & new REFRESH target defines mysql-test/r/information_schema.result: New information schema tables added mysql-test/r/information_schema_all_engines.result: New information schema tables added mysql-test/r/information_schema_db.result: New information schema tables added mysql-test/r/log_slow.result: Added testing that flosh slow query logs is accepted mysql-test/r/status_user.result: Basic testing of user, client, table and index statistics mysql-test/t/log_slow.test: Added testing that flosh slow query logs is accepted mysql-test/t/status_user-master.opt: Ensure that we get a fresh restart before running status_user.test mysql-test/t/status_user.test: Basic testing of user, client, table and index statistics mysys/my_getsystime.c: Added my_getcputime() Returns cpu time used by this thread. sql/authors.h: Updated authors to have core and original MySQL developers first. sql/event_data_objects.cc: Updated call to mysql_reset_thd_for_next_command() sql/event_db_repository.cc: Changed to use new ha_... calls sql/filesort.cc: Changed to use new ha_... calls sql/ha_partition.cc: Changed to use new ha_... calls Fixed comment syntax sql/handler.cc: Changed to use new ha_... calls Reset table statistics Added code to update global table and index status Added counting of rows changed sql/handler.h: Added table and index statistics variables Added function reset_statistics() Added handler::ha_... wrappers for all handler read calls to do statistics counting Protected all normal read calls to ensure we use the new calls in the server. Made ha_partition a friend class so that partition code can call the old read functions sql/item_subselect.cc: Changed to use new ha_... calls sql/lex.h: Added keywords for new information schema tables and flush commands sql/log.cc: Added flush_slow_log() Added counting of number of bytes sent to binary log Removed not needed test of thd (It's used before, so it's safe to use) Added THD object to MYSQL_BIN_LOG::write_cache() to simplify statistics counting sql/log.h: Added new parameter to write_cache() Added flush_slow_log() functions. sql/log_event.cc: Updated call to mysql_reset_thd_for_next_command() Changed to use new ha_... calls sql/log_event_old.cc: Updated call to mysql_reset_thd_for_next_command() Changed to use new ha_... calls sql/mysql_priv.h: Updated call to mysql_reset_thd_for_next_command() Added new statistics functions and variables needed by these. sql/mysqld.cc: Added new statistics variables and structures to handle these Added new status variables: - Com_show_client_statistics, Com_show_index_statistics, Com_show_table_statistics, Com_show_user_statistics - Access_denied_errors, Busy_time (clock time), Binlog_bytes_written, Cpu_time, Empty_queries, Rows_set, Rows_read Added new option 'userstat' to control if user statistics should be enabled or not sql/opt_range.cc: Changed to use new ha_... calls sql/opt_range.h: Changed to use new ha_... calls sql/opt_sum.cc: Changed to use new ha_... calls sql/records.cc: Changed to use new ha_... calls sql/set_var.cc: Added variable 'userstat' sql/sp.cc: Changed to use new ha_... calls sql/sql_acl.cc: Changed to use new ha_... calls Added counting of access_denied_errors sql/sql_base.cc: Added call to statistics functions sql/sql_class.cc: Added usage of org_status_var, to store status variables at start of command Added functions THD::update_stats(), THD::update_all_stats() Fixed bug in add_to_status() and add_diff_to_status() where longlong variables where threated as long sql/sql_class.h: Added new status variables to status_var Moved variables that was not ulong in status_var last. Added variables to THD for storing temporary values during statistics counting sql/sql_connect.cc: Variables and functions to calculate user and client statistics Added counting of access_denied_errors and lost_connections sql/sql_cursor.cc: Changed to use new ha_... calls sql/sql_handler.cc: Changed to use new ha_... calls sql/sql_help.cc: Changed to use new ha_... calls sql/sql_insert.cc: Changed to use new ha_... calls sql/sql_lex.h: Added SQLCOM_SHOW_USER_STATS, SQLCOM_SHOW_TABLE_STATS, SQLCOM_SHOW_INDEX_STATS, SQLCOM_SHOW_CLIENT_STATS sql/sql_parse.cc: Added handling of: - SHOW CLIENT_STATISTICS - SHOW USER_STATISTICS - SHOW TABLE_STATISTICS - SHOW INDEX_STATISTICS Added handling of new FLUSH commands: - FLUSH SLOW QUERY LOGS - FLUSH TABLE_STATISTICS - FLUSH INDEX_STATISTICS - FLUSH USER_STATISTICS - FLUSH CLIENT_STATISTICS Added THD parameter to mysql_reset_thd_for_next_command() Added initialization and calls to user statistics functions Added increment of statistics variables empty_queries, rows_sent and access_denied_errors. Added counting of cpu time per query sql/sql_plugin.cc: Changed to use new ha_... calls sql/sql_prepare.cc: Updated call to mysql_reset_thd_for_next_command() sql/sql_select.cc: Changed to use new ha_... calls Indentation changes sql/sql_servers.cc: Changed to use new ha_... calls sql/sql_show.cc: Added counting of access denied errors Added function for new information schema tables: - CLIENT_STATISTICS - USER_STATISTICS - INDEX_STATISTICS - TABLE_STATISTICS Changed to use new ha_... calls sql/sql_table.cc: Changed to use new ha_... calls sql/sql_udf.cc: Changed to use new ha_... calls sql/sql_update.cc: Changed to use new ha_... calls sql/sql_yacc.yy: Add new show and flush commands sql/structs.h: Add name_length to KEY to avoid some strlen Added cache_name to KEY for fast storage of keyvalue in cache Added structs USER_STATS, TABLE_STATS, INDEX_STATS Added function prototypes for statistics functions sql/table.cc: Store db+table+index name into keyinfo->cache_name sql/table.h: Added new information schema tables sql/tztime.cc: Changed to use new ha_... calls --- mysys/my_getsystime.c | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) (limited to 'mysys') diff --git a/mysys/my_getsystime.c b/mysys/my_getsystime.c index 64480c4aa7a..336d005d7d5 100644 --- a/mysys/my_getsystime.c +++ b/mysys/my_getsystime.c @@ -28,6 +28,10 @@ #ifdef __NETWARE__ #include #endif +#ifdef HAVE_LINUX_UNISTD_H +#include +#endif + ulonglong my_getsystime() { @@ -222,3 +226,25 @@ time_t my_time_possible_from_micro(ulonglong microtime __attribute__((unused))) return (time_t) (microtime / 1000000); #endif /* defined(__WIN__) */ } + + +/* + Return cpu time in milliseconds * 10 +*/ + +ulonglong my_getcputime() +{ +#ifdef HAVE_CLOCK_GETTIME + struct timespec tp; + if (clock_gettime(CLOCK_THREAD_CPUTIME_ID, &tp)) + return 0; + return (ulonglong)tp.tv_sec*10000000+(ulonglong)tp.tv_nsec/100; +#elif defined(__NR_clock_gettime) + struct timespec tp; + if (syscall(__NR_clock_gettime, CLOCK_THREAD_CPUTIME_ID, &tp)) + return 0; + return (ulonglong)tp.tv_sec*10000000+(ulonglong)tp.tv_nsec/100; +#else + return 0; +#endif /* HAVE_CLOCK_GETTIME */ +} -- cgit v1.2.1 From 5bddbc44c6fed6e153f0e3c3a5e157ada454a617 Mon Sep 17 00:00:00 2001 From: Michael Widenius Date: Mon, 26 Oct 2009 13:35:42 +0200 Subject: Fixed compiler warning message - Added checking of return value for system(), freopen(), fgets() and chown() - Ensure that calls that require a format strings gets a format string - Other trivial things Updated test suite results (especially for pbxt and embedded server) Removed warning for "Invalid (old?) table or database name 'mysqld.1'" from pbxt tests Speed up some pbxt tests by inserting begin ; commit; around "while loops with inserts" Added mysqld startup option '--debug-flush' Create maria_recovery.trace in data directory instead of current directory client/mysql.cc: Check return value from system() client/mysql_upgrade.c: Check return value from fgets() client/mysqladmin.cc: Check return value from fgets() client/mysqlslap.c: Check return value from system() (but ignore it, as it's not critical) extra/yassl/src/crypto_wrapper.cpp: Check return value from fgets() (but ignore it, as it's internal file) extra/yassl/taocrypt/src/aes.cpp: Added extra {} to remove compiler warning extra/yassl/taocrypt/src/blowfish.cpp: Added extra {} to remove compiler warning extra/yassl/taocrypt/src/misc.cpp: Ifdef not used code include/mysys_err.h: Added error message for failing chown() mysql-test/mysql-test-run.pl: Don't give warning for skipping ndbcluster (never enabled in MariaDB) mysql-test/suite/funcs_1/r/is_columns_is_embedded.result: Update with new information schema information mysql-test/suite/funcs_1/r/is_tables_is_embedded.result: New test mysql-test/suite/funcs_1/r/is_tables_myisam_embedded.result: Update test results (has not been tested for a long time) mysql-test/suite/funcs_1/r/is_tables_mysql_embedded.result: Update test results (has not been tested for a long time) mysql-test/suite/funcs_1/t/is_tables_is.test: Don't run with embedded server (as results differ) I added a new test for embedded server mysql-test/suite/funcs_1/t/is_tables_is_embedded.test: New test mysql-test/suite/pbxt/my.cnf: Allow one to run pbxt tests without having to specify --mysqld=--default-storage-engine=pbxt mysql-test/suite/pbxt/t/count_distinct3.test: Speed up test by inserting begin; ... commit; mysql-test/suite/pbxt/t/subselect.test: Speed up test by inserting begin; ... commit; mysys/errors.c: Added error message for failing chown() mysys/my_copy.c: Added error message for failing chown() mysys/my_redel.c: Added error message for failing chown() mysys/safemalloc.c: Added cast to get rid of compiler warning sql/ha_partition.cc: Fixed wrong argument to sql_print_error() (it requires a format string) sql/log.cc: Test return value of freopen() sql/mysqld.cc: Test return value of freopen() Added startup option '--debug-flush' to be used when one gets a core dump (easy to explain to people on IRC) sql/rpl_rli.cc: Fixed wrong argument to sql_print_error() (it requires a format string) sql/set_var.cc: Added {} to get rid of compiler warnings sql/slave.cc: Fixed wrong argument to mi->report() and sql_print...() (they require a format string) sql/sql_cache.cc: Fixed wrong argument to sql_printinformation() (it requires a format string) sql/sql_parse.cc: Test return value of fgets() sql/sql_plugin.cc: Fixed wrong argument to sql_print_error() (it requires a format string) sql/sql_select.cc: Use unique table name for internal temp tables instead of full path (Simple speed & space optimization) sql/udf_example.c: Removed compiler warning about not used variable storage/maria/ha_maria.cc: Fixed wrong argument to sql_print_error() and ma_check_print_error() (they require a format string) storage/maria/ma_recovery.c: Create maria_recovery.trace in data directory instead of current directory storage/maria/unittest/ma_test_loghandler-t.c: Fixed wrong argument to ok(); Requires a format string storage/pbxt/src/strutil_xt.cc: Detect temporary tables by checking if that path for the table is in the mysql data directory. The database for temporary tables is after this patch, from PBXT point of view, "" This is needed to stop PBXT from calling filename_to_tablename() with the base directory as an argument, which caused ERROR: Invalid (old?) table or database name 'mysqld.1'" in the log when running the test suite. tests/mysql_client_test.c: Fixed compiler warnings unittest/mysys/base64-t.c: Fixed wrong argument to diag() (it requires a format string) Added a comment that the current 'print' of differing buffers doesn't print the right thing, but didn't fix this as it's not important (unless we find a bug in the real code) --- mysys/errors.c | 4 +++- mysys/my_copy.c | 6 +++++- mysys/my_redel.c | 6 +++++- mysys/safemalloc.c | 2 +- 4 files changed, 14 insertions(+), 4 deletions(-) (limited to 'mysys') diff --git a/mysys/errors.c b/mysys/errors.c index d832ba37da3..7c80fc0f89f 100644 --- a/mysys/errors.c +++ b/mysys/errors.c @@ -50,7 +50,8 @@ const char * NEAR globerrs[GLOBERRS]= "Collation '%s' is not a compiled collation and is not specified in the '%s' file", "File '%s' not found (Errcode: %d)", "File '%s' (fileno: %d) was not closed", - "Can't change mode for file '%s' to 0x%lx (Error: %d)" + "Can't change mode for file '%s' to 0x%lx (Error: %d)", + "Warning: Can't copy ownership for file '%s' (Error: %d)" }; void init_glob_errs(void) @@ -92,6 +93,7 @@ void init_glob_errs() EE(EE_FILENOTFOUND) = "File '%s' not found (Errcode: %d)"; EE(EE_FILE_NOT_CLOSED) = "File '%s' (fileno: %d) was not closed"; EE(EE_CANT_CHMOD) = "Can't change mode for file '%s' to 0x%lx (Error: %d)"; + EE(EE_CANT_COPY_OWNERSHIP)= "Warning: Can't copy ownership for file '%s' (Error: %d)"; } #endif diff --git a/mysys/my_copy.c b/mysys/my_copy.c index 5679d13d39d..cb2fbf00f3e 100644 --- a/mysys/my_copy.c +++ b/mysys/my_copy.c @@ -14,6 +14,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include "mysys_priv.h" +#include "mysys_err.h" #include /* for stat */ #include #if defined(HAVE_UTIME_H) @@ -96,7 +97,10 @@ int my_copy(const char *from, const char *to, myf MyFlags) DBUG_RETURN(0); /* File copyed but not stat */ VOID(chmod(to, stat_buff.st_mode & 07777)); /* Copy modes */ #if !defined(__WIN__) && !defined(__NETWARE__) - VOID(chown(to, stat_buff.st_uid,stat_buff.st_gid)); /* Copy ownership */ + if (chown(to, stat_buff.st_uid,stat_buff.st_gid)) + { + my_error(EE_CANT_COPY_OWNERSHIP, MYF(ME_JUST_WARNING), to); + } #endif #if !defined(VMS) && !defined(__ZTC__) if (MyFlags & MY_COPYTIME) diff --git a/mysys/my_redel.c b/mysys/my_redel.c index b12cf098283..598a728393d 100644 --- a/mysys/my_redel.c +++ b/mysys/my_redel.c @@ -14,6 +14,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include "mysys_priv.h" +#include "mysys_err.h" #include #include #include "mysys_err.h" @@ -94,7 +95,10 @@ int my_copystat(const char *from, const char *to, int MyFlags) if (MyFlags & MY_LINK_WARNING) my_error(EE_LINK_WARNING,MYF(ME_BELL+ME_WAITTANG),from,statbuf.st_nlink); } - VOID(chown(to, statbuf.st_uid, statbuf.st_gid)); /* Copy ownership */ + if (chown(to, statbuf.st_uid, statbuf.st_gid)) + { + my_error(EE_CANT_COPY_OWNERSHIP, MYF(ME_JUST_WARNING), to); + } #endif /* !__WIN__ && !__NETWARE__ */ #ifndef VMS diff --git a/mysys/safemalloc.c b/mysys/safemalloc.c index a69051e6674..79ae643a301 100644 --- a/mysys/safemalloc.c +++ b/mysys/safemalloc.c @@ -158,7 +158,7 @@ void *_mymalloc(size_t size, const char *filename, uint lineno, myf MyFlags) my_message(EE_OUTOFMEMORY, buff, MYF(ME_BELL+ME_WAITTANG+ME_NOREFRESH)); } DBUG_PRINT("error",("Out of memory, in use: %ld at line %d, '%s'", - sf_malloc_max_memory,lineno, filename)); + (ulong) sf_malloc_max_memory,lineno, filename)); if (MyFlags & MY_FAE) exit(1); DBUG_RETURN ((void*) 0); -- cgit v1.2.1 From 166e0683c0d45a79716d8913ec9ecaf3177343fa Mon Sep 17 00:00:00 2001 From: Michael Widenius Date: Sat, 7 Nov 2009 12:34:19 +0200 Subject: Added error handling for my_seek() & my_tell() failures mysys/my_seek.c: Give error if MY_WME is used sql/sql_insert.cc: Fixed compiler warning storage/maria/ha_maria.cc: Changed driver of Maria storage engine project --- mysys/errors.c | 2 ++ mysys/my_seek.c | 16 +++++++++++----- 2 files changed, 13 insertions(+), 5 deletions(-) (limited to 'mysys') diff --git a/mysys/errors.c b/mysys/errors.c index 7c80fc0f89f..fc63ac9d936 100644 --- a/mysys/errors.c +++ b/mysys/errors.c @@ -51,6 +51,7 @@ const char * NEAR globerrs[GLOBERRS]= "File '%s' not found (Errcode: %d)", "File '%s' (fileno: %d) was not closed", "Can't change mode for file '%s' to 0x%lx (Error: %d)", + "Can't do seek on file '%s' (Errcode: %d)", "Warning: Can't copy ownership for file '%s' (Error: %d)" }; @@ -93,6 +94,7 @@ void init_glob_errs() EE(EE_FILENOTFOUND) = "File '%s' not found (Errcode: %d)"; EE(EE_FILE_NOT_CLOSED) = "File '%s' (fileno: %d) was not closed"; EE(EE_CANT_CHMOD) = "Can't change mode for file '%s' to 0x%lx (Error: %d)"; + EE(EE_CANT_SEEK) = "Can't do seek on file '%s' (Errcode: %d)"; EE(EE_CANT_COPY_OWNERSHIP)= "Warning: Can't copy ownership for file '%s' (Error: %d)"; } #endif diff --git a/mysys/my_seek.c b/mysys/my_seek.c index 4e18b510a1e..4ca5393e640 100644 --- a/mysys/my_seek.c +++ b/mysys/my_seek.c @@ -14,6 +14,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include "mysys_priv.h" +#include "mysys_err.h" /* Seek to a position in a file. @@ -42,8 +43,7 @@ actual error. */ -my_off_t my_seek(File fd, my_off_t pos, int whence, - myf MyFlags __attribute__((unused))) +my_off_t my_seek(File fd, my_off_t pos, int whence, myf MyFlags) { reg1 os_off_t newpos= -1; DBUG_ENTER("my_seek"); @@ -68,7 +68,9 @@ my_off_t my_seek(File fd, my_off_t pos, int whence, newpos= lseek(fd, pos, whence); if (newpos == (os_off_t) -1) { - my_errno=errno; + my_errno= errno; + if (MyFlags & MY_WME) + my_error(EE_CANT_SEEK, MYF(0), my_filename(fd), my_errno); DBUG_PRINT("error",("lseek: %lu errno: %d", (ulong) newpos,errno)); DBUG_RETURN(MY_FILEPOS_ERROR); } @@ -83,7 +85,7 @@ my_off_t my_seek(File fd, my_off_t pos, int whence, /* Tell current position of file */ /* ARGSUSED */ -my_off_t my_tell(File fd, myf MyFlags __attribute__((unused))) +my_off_t my_tell(File fd, myf MyFlags) { os_off_t pos; DBUG_ENTER("my_tell"); @@ -95,7 +97,11 @@ my_off_t my_tell(File fd, myf MyFlags __attribute__((unused))) pos=lseek(fd, 0L, MY_SEEK_CUR); #endif if (pos == (os_off_t) -1) - my_errno=errno; + { + my_errno= errno; + if (MyFlags & MY_WME) + my_error(EE_CANT_SEEK, MYF(0), my_filename(fd), my_errno); + } DBUG_PRINT("exit",("pos: %lu", (ulong) pos)); DBUG_RETURN((my_off_t) pos); } /* my_tell */ -- cgit v1.2.1 From 815b9fedefa59b2807a5736b60a89c5ed98178d1 Mon Sep 17 00:00:00 2001 From: Michael Widenius Date: Mon, 16 Nov 2009 17:34:08 +0200 Subject: Safety change to ensure read/black trees (used with heap tables) works on 64 bit setups where ulong <> size_t Don't retry test cases by default Fixed bug where we could (under unlikely error conditions) access not initialized variable include/my_tree.h: Safety change to ensure read/black trees (used with heap tables) works on 64 bit setups where ulong <> size_t (Pointed out by Bryan Aker) mysql-test/mysql-test-run.pl: Don't retry test cases by default This makes it too easy to miss failures and we have anyway to fix race conditions, not ignore them. mysys/tree.c: Safety change to ensure read/black trees (used with heap tables) works on 64 bit setups where ulong <> size_t sql/sql_delete.cc: Fixed bug where we could (under unlikely error conditions) access not initialized variable. (Pointed out by Bryan Aker) --- mysys/tree.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'mysys') diff --git a/mysys/tree.c b/mysys/tree.c index ef33f75b7c6..e4854581204 100644 --- a/mysys/tree.c +++ b/mysys/tree.c @@ -77,13 +77,13 @@ static void rb_insert(TREE *tree,TREE_ELEMENT ***parent, static void rb_delete_fixup(TREE *tree,TREE_ELEMENT ***parent); - /* The actuall code for handling binary trees */ +/* The actual code for handling binary trees */ #ifndef DBUG_OFF static int test_rb_tree(TREE_ELEMENT *element); #endif -void init_tree(TREE *tree, ulong default_alloc_size, ulong memory_limit, +void init_tree(TREE *tree, size_t default_alloc_size, size_t memory_limit, int size, qsort_cmp2 compare, my_bool with_delete, tree_element_free free_element, void *custom_arg) { @@ -96,7 +96,7 @@ void init_tree(TREE *tree, ulong default_alloc_size, ulong memory_limit, bzero((uchar*) &tree->null_element,sizeof(tree->null_element)); tree->root= &tree->null_element; tree->compare=compare; - tree->size_of_element=size > 0 ? (uint) size : 0; + tree->size_of_element= size > 0 ? (uint) size : 0; tree->memory_limit=memory_limit; tree->free=free_element; tree->allocated=0; @@ -127,7 +127,7 @@ void init_tree(TREE *tree, ulong default_alloc_size, ulong memory_limit, } if (!(tree->with_delete=with_delete)) { - init_alloc_root(&tree->mem_root, (uint) default_alloc_size, 0); + init_alloc_root(&tree->mem_root, default_alloc_size, 0); tree->mem_root.min_malloc=(sizeof(TREE_ELEMENT)+tree->size_of_element); } DBUG_VOID_RETURN; -- cgit v1.2.1 From 069eec35ae76104fd3d3ed0bc931171ba52a6f52 Mon Sep 17 00:00:00 2001 From: Michael Widenius Date: Thu, 26 Nov 2009 01:18:23 +0200 Subject: Fixed LPBUG#485443 --with-fast-mutexes and without safe mutexes (debug build) maria do not builds Added 'mariadb_SERVER' as extra config group for MariaDB embedded server client/mysql.cc: Cleanup Added 'mariadb_SERVER' as extra config group for MariaDB embedded server mysys/thr_mutex.c: Fixed LPBUG#485443 --with-fast-mutexes and without safe mutexes (debug build) maria do not builds --- mysys/thr_mutex.c | 31 +++++-------------------------- 1 file changed, 5 insertions(+), 26 deletions(-) (limited to 'mysys') diff --git a/mysys/thr_mutex.c b/mysys/thr_mutex.c index b5abf987461..5ab1e443a88 100644 --- a/mysys/thr_mutex.c +++ b/mysys/thr_mutex.c @@ -36,6 +36,7 @@ #undef pthread_mutex_init #undef pthread_mutex_lock #undef pthread_mutex_unlock +#undef pthread_mutex_trylock #undef pthread_mutex_destroy #undef pthread_cond_wait #undef pthread_cond_timedwait @@ -838,31 +839,9 @@ static void print_deadlock_warning(safe_mutex_t *new_mutex, DBUG_VOID_RETURN; } +#elif defined(MY_PTHREAD_FASTMUTEX) -#endif /* THREAD && SAFE_MUTEX */ - -#if defined(THREAD) && defined(MY_PTHREAD_FASTMUTEX) && !defined(SAFE_MUTEX) - -#include "mysys_priv.h" -#include "my_static.h" -#include - -#include -#include -#include -#include -#include - -#undef pthread_mutex_t -#undef pthread_mutex_init -#undef pthread_mutex_lock -#undef pthread_mutex_trylock -#undef pthread_mutex_unlock -#undef pthread_mutex_destroy -#undef pthread_cond_wait -#undef pthread_cond_timedwait - -ulong mutex_delay(ulong delayloops) +static ulong mutex_delay(ulong delayloops) { ulong i; volatile ulong j; @@ -943,6 +922,6 @@ void fastmutex_global_init(void) cpu_count= sysconf(_SC_NPROCESSORS_CONF); #endif } - -#endif /* SAFE_MUTEX_DEFINED */ + +#endif /* defined(MY_PTHREAD_FASTMUTEX) */ #endif /* THREAD */ -- cgit v1.2.1 From d13c54351dd7ec5c538ff746704c6a8096b25776 Mon Sep 17 00:00:00 2001 From: Michael Widenius Date: Mon, 30 Nov 2009 01:08:56 +0200 Subject: Remove compiler warnings (Including some warnings from -Wstrict-aliasing) Don't use static link by default (in compile-pentium) as some new systems doesn't have all static libraries available Change type for functions in plugin.h:str_mysql_ftparser_param() to const unsigned char and string lengths to size_t. One effect of the above change is that one needs to include mysql_global.h or define size_t before including plugin.h This fixes a case where mysql_client_test failed with newer gcc that enables strict-aliasing by default BUILD/compile-pentium: Don't use static link by default as some new systems doesn't have all static libraries available client/mysql_upgrade.c: Remove not used variable cmd-line-utils/readline/config_readline.h: Define some constants to get rid of compiler warnings on Linux cmd-line-utils/readline/display.c: Get rid of compiler warnings cmd-line-utils/readline/history.c: Got rid of compiler warnings: - Defining some strings as const - Added cast cmd-line-utils/readline/rlmbutil.h: Added cast to get rid of compiler warnings cmd-line-utils/readline/text.c: Remove not needed initialization to get rid of compiler warnings cmd-line-utils/readline/xmalloc.c: Changed types to 'const char* to get rid of compiler warnings configure.in: Ensure that we use MariaDB as suffix include/mysql/plugin.h: Changed types to 'const unsigned char* to get rid of compiler warnings (in other parts of the code) Change length for not \0 terminated string to size_t include/mysql/plugin.h.pp: Update related to plugin.h libmysql/libmysql.c: Fixed bug that caused core dump with newer gcc when strict aliasing is not turned off mysql-test/t/information_schema.test: Test is depending on innodb mysql-test/t/not_partition.test: Fixed wrong directory name (Not noticed before as we don't ususally run this test) mysys/lf_hash.c: Got rid of compiler warnings from -Wstrict-aliasing mysys/my_redel.c: Removed not used variable regex/engine.c: Changed types to 'const char* to get rid of compiler warnings regex/engine.ih: Changed types to 'const char* to get rid of compiler warnings sql/sp_head.cc: Got rid of compiler warning from -Wstrict-aliasing sql/sql_base.cc: Got rid of compiler warnings from -Wstrict-aliasing (The original code was probably wrong as nj_col->table_field was sql/sql_builtin.cc.in: plugin.h needs to have size_t defined sql/sql_parse.cc: Remove used variable sql/sql_select.cc: Got rid of compiler warnings from -Wstrict-aliasing sql/sql_show.cc: Added #ifdef to get rid of compiler warning when not using partition engine sql/table.cc: Got rid of compiler warning from -Wstrict-aliasing storage/maria/ha_maria.cc: Got rid of compiler warnings from -Wstrict-aliasing: - Use the thd_killed() API function storage/maria/lockman.c: Got rid of compiler warnings from -Wstrict-aliasing storage/maria/ma_check.c: Got rid of compiler warnings from -Wstrict-aliasing Change to use new version of _ma_killed_ptr; Don't call it as often as before storage/maria/ma_check_standalone.h: Update to compatible _ma_killed_ptr() from ha_maria.cc storage/maria/ma_ft_boolean_search.c: Changed pointers from char -> const char* and length to size_t (to get rid of compiler warnings and casts) storage/maria/ma_ft_nlq_search.c: Got rid of compiler warnings from -Wstrict-aliasing Ensure that 'subkeys' is 32 bit storage/maria/ma_ft_parser.c: Changed pointers from char -> const char* and length to size_t (to get rid of compiler warnings and casts) storage/maria/ma_ftdefs.h: Changed pointers from char -> const char* and length to size_t (to get rid of compiler warnings and casts) storage/maria/ma_sort.c: Change to use new version of _ma_killed_ptr; Don't call it as often as before storage/maria/ma_state.c: Got rid of compiler warnings from -Wstrict-aliasing storage/maria/maria_def.h: Redefine ma_killed_ptr() storage/maria/maria_ftdump.c: Got rid of compiler warnings from -Wstrict-aliasing storage/maria/trnman.c: Got rid of compiler warnings from -Wstrict-aliasing storage/myisam/ft_boolean_search.c: Changed pointers from char -> const char* and length to size_t (to get rid of compiler warnings and casts) storage/myisam/ft_nlq_search.c: Got rid of compiler warnings from -Wstrict-aliasing storage/myisam/ft_parser.c: Changed pointers from char -> const char* and length to size_t (to get rid of compiler warnings and casts) storage/myisam/ft_stopwords.c: Changed pointers from char -> const char* and length to size_t (to get rid of compiler warnings and casts) storage/myisam/ftdefs.h: Changed pointers from char -> const char* and length to size_t (to get rid of compiler warnings and casts) storage/myisam/ha_myisam.cc: Got rid of compiler warnings from -Wstrict-aliasing: - Use the thd_killed() API function storage/myisam/mi_check.c: Use new killed_ptr() function storage/myisam/myisam_ftdump.c: Got rid of compiler warnings from -Wstrict-aliasing storage/myisam/myisamchk.c: Update to compatible killed_ptr() from ha_myisam.cc storage/myisam/myisamdef.h: Redefine killed_ptr() storage/myisam/myisamlog.c: Got rid of compiler warnings from -Wstrict-aliasing storage/myisam/sort.c: Change to use new version of killed_ptr; Don't call it as often as before storage/xtradb/fil/fil0fil.c: Fixedc ompiler warning storage/xtradb/trx/trx0i_s.c: Include mysql_plugin.h later to ensure that size_t is defined --- mysys/lf_hash.c | 15 ++++++++------- mysys/my_redel.c | 3 --- 2 files changed, 8 insertions(+), 10 deletions(-) (limited to 'mysys') diff --git a/mysys/lf_hash.c b/mysys/lf_hash.c index ce7056af995..6569bafc00d 100644 --- a/mysys/lf_hash.c +++ b/mysys/lf_hash.c @@ -124,8 +124,8 @@ retry: we found a deleted node - be nice, help the other thread and remove this deleted node */ - if (my_atomic_casptr((void **)cursor->prev, - (void **)&cursor->curr, cursor->next)) + if (my_atomic_casptr((void **) cursor->prev, + (void **)(char*) &cursor->curr, cursor->next)) _lf_alloc_free(pins, cursor->curr); else { @@ -171,7 +171,8 @@ static LF_SLIST *linsert(LF_SLIST * volatile *head, CHARSET_INFO *cs, node->link= (intptr)cursor.curr; DBUG_ASSERT(node->link != (intptr)node); /* no circular references */ DBUG_ASSERT(cursor.prev != &node->link); /* no circular references */ - if (my_atomic_casptr((void **)cursor.prev, (void **)&cursor.curr, node)) + if (my_atomic_casptr((void **) cursor.prev, + (void **)(char*) &cursor.curr, node)) { res= 1; /* inserted ok */ break; @@ -218,13 +219,13 @@ static int ldelete(LF_SLIST * volatile *head, CHARSET_INFO *cs, uint32 hashnr, else { /* mark the node deleted */ - if (my_atomic_casptr((void **)&(cursor.curr->link), - (void **)&cursor.next, + if (my_atomic_casptr((void **) (char*) &(cursor.curr->link), + (void **) (char*) &cursor.next, (void *)(((intptr)cursor.next) | 1))) { /* and remove it from the list */ if (my_atomic_casptr((void **)cursor.prev, - (void **)&cursor.curr, cursor.next)) + (void **)(char*)&cursor.curr, cursor.next)) _lf_alloc_free(pins, cursor.curr); else { @@ -493,7 +494,7 @@ static int initialize_bucket(LF_HASH *hash, LF_SLIST * volatile *node, my_free((void *)dummy, MYF(0)); dummy= cur; } - my_atomic_casptr((void **)node, (void **)&tmp, dummy); + my_atomic_casptr((void **)node, (void **)(char*) &tmp, dummy); /* note that if the CAS above failed (after linsert() succeeded), it would mean that some other thread has executed linsert() for diff --git a/mysys/my_redel.c b/mysys/my_redel.c index 6b0ceb85950..598a728393d 100644 --- a/mysys/my_redel.c +++ b/mysys/my_redel.c @@ -77,9 +77,6 @@ end: int my_copystat(const char *from, const char *to, int MyFlags) { struct stat statbuf; -#if !defined(__WIN__) && !defined(__NETWARE__) - int res; -#endif if (stat((char*) from, &statbuf)) { -- cgit v1.2.1 From 4c14f9f23c725fd15b6b3c29bff07d925047e8ac Mon Sep 17 00:00:00 2001 From: Michael Widenius Date: Mon, 30 Nov 2009 14:42:24 +0200 Subject: Added more general support for sorting 2 characters as one (contractions) Added support for Croatian sorting orders utf8_croatian_ci and ucs2_croatian_ci. Patch done by Alexander Barkov. See http://www.collation-charts.org/articles/croatian.htm mysql-test/r/ctype_uca.result: Added testing of Croatian sort order mysql-test/t/ctype_uca.test: Added testing of Croatian sort order --- mysys/charset-def.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'mysys') diff --git a/mysys/charset-def.c b/mysys/charset-def.c index 63bbceef29b..5c68258ada1 100644 --- a/mysys/charset-def.c +++ b/mysys/charset-def.c @@ -42,6 +42,7 @@ extern CHARSET_INFO my_charset_ucs2_roman_uca_ci; extern CHARSET_INFO my_charset_ucs2_persian_uca_ci; extern CHARSET_INFO my_charset_ucs2_esperanto_uca_ci; extern CHARSET_INFO my_charset_ucs2_hungarian_uca_ci; +extern CHARSET_INFO my_charset_ucs2_croatian_uca_ci; #endif #ifdef HAVE_CHARSET_utf8 @@ -63,6 +64,7 @@ extern CHARSET_INFO my_charset_utf8_roman_uca_ci; extern CHARSET_INFO my_charset_utf8_persian_uca_ci; extern CHARSET_INFO my_charset_utf8_esperanto_uca_ci; extern CHARSET_INFO my_charset_utf8_hungarian_uca_ci; +extern CHARSET_INFO my_charset_utf8_croatian_uca_ci; #ifdef HAVE_UTF8_GENERAL_CS extern CHARSET_INFO my_charset_utf8_general_cs; #endif @@ -152,6 +154,7 @@ my_bool init_compiled_charsets(myf flags __attribute__((unused))) add_compiled_collation(&my_charset_ucs2_persian_uca_ci); add_compiled_collation(&my_charset_ucs2_esperanto_uca_ci); add_compiled_collation(&my_charset_ucs2_hungarian_uca_ci); + add_compiled_collation(&my_charset_ucs2_croatian_uca_ci); #endif #endif @@ -186,6 +189,7 @@ my_bool init_compiled_charsets(myf flags __attribute__((unused))) add_compiled_collation(&my_charset_utf8_persian_uca_ci); add_compiled_collation(&my_charset_utf8_esperanto_uca_ci); add_compiled_collation(&my_charset_utf8_hungarian_uca_ci); + add_compiled_collation(&my_charset_utf8_croatian_uca_ci); #endif #endif -- cgit v1.2.1 From ce068186f698872e6d7c7f475ffa2bca664f070d Mon Sep 17 00:00:00 2001 From: Michael Widenius Date: Mon, 4 Jan 2010 20:31:26 +0200 Subject: Fixed bug in my_uuid() that caused failures on hpux and ia64 --- mysys/my_uuid.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'mysys') diff --git a/mysys/my_uuid.c b/mysys/my_uuid.c index d1e8331aaa1..f115806b4e9 100644 --- a/mysys/my_uuid.c +++ b/mysys/my_uuid.c @@ -108,7 +108,7 @@ void my_uuid_init(ulong seed1, ulong seed2) */ /* purecov: begin inspected */ my_rnd_init(&uuid_rand, (ulong) (seed2+ now/2), (ulong) (now+rand())); - for (i=0; i < sizeof(mac); i++) + for (i=0; i < array_elements(uuid_suffix) -2 ; i++) mac[i]= (uchar)(my_rnd(&uuid_rand)*255); /* purecov: end */ } -- cgit v1.2.1 From f83113df07d6ef8e8a6d1db8f6dc3bb90fb0652a Mon Sep 17 00:00:00 2001 From: Michael Widenius Date: Wed, 6 Jan 2010 21:20:16 +0200 Subject: Applied Antony T Curtis patch for declaring many CHARSET objects as const Removed compiler warnings extra/libevent/epoll.c: Removed compiler warnings extra/libevent/evbuffer.c: Removed compiler warnings extra/libevent/event.c: Removed compiler warnings extra/libevent/select.c: Removed compiler warnings extra/libevent/signal.c: Removed compiler warnings include/m_ctype.h: Define CHARSET_INFO, MY_CHARSET_HANDLER, MY_COLLATION_HANDLER, MY_UNICASE_INFO, MY_UNI_CTYPE and MY_UNI_IDX as const structures. Declare that pointers point to const data include/m_string.h: Declare that pointers point to const data include/my_sys.h: Redefine variables and function prototypes include/mysql.h: Declare charset as const include/mysql.h.pp: Declare charset as const include/mysql/plugin.h: Declare charset as const include/mysql/plugin.h.pp: Declare charset as const mysys/charset-def.c: Charset can't be of type CHARSET_INFO as they are changed when they are initialized. mysys/charset.c: Functions that change CHARSET_INFO must use 'struct charset_info_st' Add temporary variables to not have to change all_charsets[] (Which now is const) sql-common/client.c: Added cast to const sql/item_cmpfunc.h: Added cast to avoid compiler error. sql/sql_class.cc: Added cast to const sql/sql_lex.cc: Added cast to const storage/maria/ma_ft_boolean_search.c: Added cast to avoid compiler error. storage/maria/ma_ft_parser.c: Added cast to avoid compiler error. storage/maria/ma_search.c: Added cast to const storage/myisam/ft_boolean_search.c: Added cast to avoid compiler error storage/myisam/ft_parser.c: Added cast to avoid compiler error storage/myisam/mi_search.c: Added cast to const storage/pbxt/src/datadic_xt.cc: Added cast to const storage/pbxt/src/ha_pbxt.cc: Added cast to const Removed compiler warning by changing prototype of XTThreadPtr() storage/pbxt/src/myxt_xt.h: Character sets should be const storage/pbxt/src/xt_defs.h: Character sets should be const storage/xtradb/btr/btr0cur.c: Removed compiler warning strings/conf_to_src.c: Added const Functions that change CHARSET_INFO must use 'struct charset_info_st' strings/ctype-big5.c: Made arrays const strings/ctype-bin.c: Made arrays const strings/ctype-cp932.c: Made arrays const strings/ctype-czech.c: Made arrays const strings/ctype-euc_kr.c: Made arrays const strings/ctype-eucjpms.c: Made arrays const strings/ctype-extra.c: Made arrays const strings/ctype-gb2312.c: Made arrays const strings/ctype-gbk.c: Made arrays const strings/ctype-latin1.c: Made arrays const strings/ctype-mb.c: Made arrays const strings/ctype-simple.c: Made arrays const strings/ctype-sjis.c: Made arrays const strings/ctype-tis620.c: Made arrays const strings/ctype-uca.c: Made arrays const strings/ctype-ucs2.c: Made arrays const strings/ctype-ujis.c: Made arrays const strings/ctype-utf8.c: Made arrays const strings/ctype-win1250ch.c: Made arrays const strings/ctype.c: Made arrays const Added cast to const Functions that change CHARSET_INFO must use 'struct charset_info_st' strings/int2str.c: Added cast to const --- mysys/charset-def.c | 80 ++++++++++++++++++++++++++--------------------------- mysys/charset.c | 73 +++++++++++++++++++++++------------------------- 2 files changed, 75 insertions(+), 78 deletions(-) (limited to 'mysys') diff --git a/mysys/charset-def.c b/mysys/charset-def.c index 5c68258ada1..db10000e174 100644 --- a/mysys/charset-def.c +++ b/mysys/charset-def.c @@ -24,49 +24,49 @@ #ifdef HAVE_UCA_COLLATIONS #ifdef HAVE_CHARSET_ucs2 -extern CHARSET_INFO my_charset_ucs2_icelandic_uca_ci; -extern CHARSET_INFO my_charset_ucs2_latvian_uca_ci; -extern CHARSET_INFO my_charset_ucs2_romanian_uca_ci; -extern CHARSET_INFO my_charset_ucs2_slovenian_uca_ci; -extern CHARSET_INFO my_charset_ucs2_polish_uca_ci; -extern CHARSET_INFO my_charset_ucs2_estonian_uca_ci; -extern CHARSET_INFO my_charset_ucs2_spanish_uca_ci; -extern CHARSET_INFO my_charset_ucs2_swedish_uca_ci; -extern CHARSET_INFO my_charset_ucs2_turkish_uca_ci; -extern CHARSET_INFO my_charset_ucs2_czech_uca_ci; -extern CHARSET_INFO my_charset_ucs2_danish_uca_ci; -extern CHARSET_INFO my_charset_ucs2_lithuanian_uca_ci; -extern CHARSET_INFO my_charset_ucs2_slovak_uca_ci; -extern CHARSET_INFO my_charset_ucs2_spanish2_uca_ci; -extern CHARSET_INFO my_charset_ucs2_roman_uca_ci; -extern CHARSET_INFO my_charset_ucs2_persian_uca_ci; -extern CHARSET_INFO my_charset_ucs2_esperanto_uca_ci; -extern CHARSET_INFO my_charset_ucs2_hungarian_uca_ci; -extern CHARSET_INFO my_charset_ucs2_croatian_uca_ci; +extern struct charset_info_st my_charset_ucs2_icelandic_uca_ci; +extern struct charset_info_st my_charset_ucs2_latvian_uca_ci; +extern struct charset_info_st my_charset_ucs2_romanian_uca_ci; +extern struct charset_info_st my_charset_ucs2_slovenian_uca_ci; +extern struct charset_info_st my_charset_ucs2_polish_uca_ci; +extern struct charset_info_st my_charset_ucs2_estonian_uca_ci; +extern struct charset_info_st my_charset_ucs2_spanish_uca_ci; +extern struct charset_info_st my_charset_ucs2_swedish_uca_ci; +extern struct charset_info_st my_charset_ucs2_turkish_uca_ci; +extern struct charset_info_st my_charset_ucs2_czech_uca_ci; +extern struct charset_info_st my_charset_ucs2_danish_uca_ci; +extern struct charset_info_st my_charset_ucs2_lithuanian_uca_ci; +extern struct charset_info_st my_charset_ucs2_slovak_uca_ci; +extern struct charset_info_st my_charset_ucs2_spanish2_uca_ci; +extern struct charset_info_st my_charset_ucs2_roman_uca_ci; +extern struct charset_info_st my_charset_ucs2_persian_uca_ci; +extern struct charset_info_st my_charset_ucs2_esperanto_uca_ci; +extern struct charset_info_st my_charset_ucs2_hungarian_uca_ci; +extern struct charset_info_st my_charset_ucs2_croatian_uca_ci; #endif #ifdef HAVE_CHARSET_utf8 -extern CHARSET_INFO my_charset_utf8_icelandic_uca_ci; -extern CHARSET_INFO my_charset_utf8_latvian_uca_ci; -extern CHARSET_INFO my_charset_utf8_romanian_uca_ci; -extern CHARSET_INFO my_charset_utf8_slovenian_uca_ci; -extern CHARSET_INFO my_charset_utf8_polish_uca_ci; -extern CHARSET_INFO my_charset_utf8_estonian_uca_ci; -extern CHARSET_INFO my_charset_utf8_spanish_uca_ci; -extern CHARSET_INFO my_charset_utf8_swedish_uca_ci; -extern CHARSET_INFO my_charset_utf8_turkish_uca_ci; -extern CHARSET_INFO my_charset_utf8_czech_uca_ci; -extern CHARSET_INFO my_charset_utf8_danish_uca_ci; -extern CHARSET_INFO my_charset_utf8_lithuanian_uca_ci; -extern CHARSET_INFO my_charset_utf8_slovak_uca_ci; -extern CHARSET_INFO my_charset_utf8_spanish2_uca_ci; -extern CHARSET_INFO my_charset_utf8_roman_uca_ci; -extern CHARSET_INFO my_charset_utf8_persian_uca_ci; -extern CHARSET_INFO my_charset_utf8_esperanto_uca_ci; -extern CHARSET_INFO my_charset_utf8_hungarian_uca_ci; -extern CHARSET_INFO my_charset_utf8_croatian_uca_ci; +extern struct charset_info_st my_charset_utf8_icelandic_uca_ci; +extern struct charset_info_st my_charset_utf8_latvian_uca_ci; +extern struct charset_info_st my_charset_utf8_romanian_uca_ci; +extern struct charset_info_st my_charset_utf8_slovenian_uca_ci; +extern struct charset_info_st my_charset_utf8_polish_uca_ci; +extern struct charset_info_st my_charset_utf8_estonian_uca_ci; +extern struct charset_info_st my_charset_utf8_spanish_uca_ci; +extern struct charset_info_st my_charset_utf8_swedish_uca_ci; +extern struct charset_info_st my_charset_utf8_turkish_uca_ci; +extern struct charset_info_st my_charset_utf8_czech_uca_ci; +extern struct charset_info_st my_charset_utf8_danish_uca_ci; +extern struct charset_info_st my_charset_utf8_lithuanian_uca_ci; +extern struct charset_info_st my_charset_utf8_slovak_uca_ci; +extern struct charset_info_st my_charset_utf8_spanish2_uca_ci; +extern struct charset_info_st my_charset_utf8_roman_uca_ci; +extern struct charset_info_st my_charset_utf8_persian_uca_ci; +extern struct charset_info_st my_charset_utf8_esperanto_uca_ci; +extern struct charset_info_st my_charset_utf8_hungarian_uca_ci; +extern struct charset_info_st my_charset_utf8_croatian_uca_ci; #ifdef HAVE_UTF8_GENERAL_CS -extern CHARSET_INFO my_charset_utf8_general_cs; +extern struct charset_info_st my_charset_utf8_general_cs; #endif #endif @@ -195,7 +195,7 @@ my_bool init_compiled_charsets(myf flags __attribute__((unused))) /* Copy compiled charsets */ for (cs=compiled_charsets; cs->name; cs++) - add_compiled_collation(cs); + add_compiled_collation((struct charset_info_st *) cs); return FALSE; } diff --git a/mysys/charset.c b/mysys/charset.c index 84723a88a96..aaf071fe0d2 100644 --- a/mysys/charset.c +++ b/mysys/charset.c @@ -53,21 +53,18 @@ get_collation_number_internal(const char *name) } -static my_bool init_state_maps(CHARSET_INFO *cs) +static my_bool init_state_maps(struct charset_info_st *cs) { uint i; uchar *state_map; uchar *ident_map; - if (!(cs->state_map= (uchar*) my_once_alloc(256, MYF(MY_WME)))) + if (!(cs->state_map= state_map= (uchar*) my_once_alloc(256, MYF(MY_WME)))) return 1; - if (!(cs->ident_map= (uchar*) my_once_alloc(256, MYF(MY_WME)))) + if (!(cs->ident_map= ident_map= (uchar*) my_once_alloc(256, MYF(MY_WME)))) return 1; - state_map= cs->state_map; - ident_map= cs->ident_map; - /* Fill state_map with states to get a faster parser */ for (i=0; i < 256 ; i++) { @@ -118,7 +115,7 @@ static my_bool init_state_maps(CHARSET_INFO *cs) } -static void simple_cs_init_functions(CHARSET_INFO *cs) +static void simple_cs_init_functions(struct charset_info_st *cs) { if (cs->state & MY_CS_BINSORT) cs->coll= &my_collation_8bit_bin_handler; @@ -130,7 +127,7 @@ static void simple_cs_init_functions(CHARSET_INFO *cs) -static int cs_copy_data(CHARSET_INFO *to, CHARSET_INFO *from) +static int cs_copy_data(struct charset_info_st *to, CHARSET_INFO *from) { to->number= from->number ? from->number : to->number; @@ -203,7 +200,7 @@ static my_bool simple_cs_is_full(CHARSET_INFO *cs) static void -copy_uca_collation(CHARSET_INFO *to, CHARSET_INFO *from) +copy_uca_collation(struct charset_info_st *to, CHARSET_INFO *from) { to->cset= from->cset; to->coll= from->coll; @@ -217,17 +214,18 @@ copy_uca_collation(CHARSET_INFO *to, CHARSET_INFO *from) } -static int add_collation(CHARSET_INFO *cs) +static int add_collation(struct charset_info_st *cs) { if (cs->name && (cs->number || (cs->number=get_collation_number_internal(cs->name)))) { - if (!all_charsets[cs->number]) + struct charset_info_st *newcs; + if (!(newcs= (struct charset_info_st*) all_charsets[cs->number])) { - if (!(all_charsets[cs->number]= - (CHARSET_INFO*) my_once_alloc(sizeof(CHARSET_INFO),MYF(0)))) + if (!(all_charsets[cs->number]= newcs= + (struct charset_info_st*) my_once_alloc(sizeof(CHARSET_INFO),MYF(0)))) return MY_XML_ERROR; - bzero((void*)all_charsets[cs->number],sizeof(CHARSET_INFO)); + bzero(newcs,sizeof(CHARSET_INFO)); } if (cs->primary_number == cs->number) @@ -236,12 +234,11 @@ static int add_collation(CHARSET_INFO *cs) if (cs->binary_number == cs->number) cs->state |= MY_CS_BINSORT; - all_charsets[cs->number]->state|= cs->state; + newcs->state|= cs->state; - if (!(all_charsets[cs->number]->state & MY_CS_COMPILED)) + if (!(newcs->state & MY_CS_COMPILED)) { - CHARSET_INFO *newcs= all_charsets[cs->number]; - if (cs_copy_data(all_charsets[cs->number],cs)) + if (cs_copy_data(newcs,cs)) return MY_XML_ERROR; if (!strcmp(cs->csname,"ucs2") ) @@ -259,15 +256,15 @@ static int add_collation(CHARSET_INFO *cs) } else { - uchar *sort_order= all_charsets[cs->number]->sort_order; - simple_cs_init_functions(all_charsets[cs->number]); + const uchar *sort_order= newcs->sort_order; + simple_cs_init_functions(newcs); newcs->mbminlen= 1; newcs->mbmaxlen= 1; - if (simple_cs_is_full(all_charsets[cs->number])) + if (simple_cs_is_full(newcs)) { - all_charsets[cs->number]->state |= MY_CS_LOADED; + newcs->state |= MY_CS_LOADED; } - all_charsets[cs->number]->state|= MY_CS_AVAILABLE; + newcs->state|= MY_CS_AVAILABLE; /* Check if case sensitive sort order: A < a < B. @@ -277,12 +274,12 @@ static int add_collation(CHARSET_INFO *cs) */ if (sort_order && sort_order['A'] < sort_order['a'] && sort_order['a'] < sort_order['B']) - all_charsets[cs->number]->state|= MY_CS_CSSORT; + newcs->state|= MY_CS_CSSORT; - if (my_charset_is_8bit_pure_ascii(all_charsets[cs->number])) - all_charsets[cs->number]->state|= MY_CS_PUREASCII; + if (my_charset_is_8bit_pure_ascii(newcs)) + newcs->state|= MY_CS_PUREASCII; if (!my_charset_is_ascii_compatible(cs)) - all_charsets[cs->number]->state|= MY_CS_NONASCII; + newcs->state|= MY_CS_NONASCII; } } else @@ -296,16 +293,15 @@ static int add_collation(CHARSET_INFO *cs) If a character set was compiled, this information will get lost and overwritten in add_compiled_collation(). */ - CHARSET_INFO *dst= all_charsets[cs->number]; - dst->number= cs->number; + newcs->number= cs->number; if (cs->comment) - if (!(dst->comment= my_once_strdup(cs->comment,MYF(MY_WME)))) + if (!(newcs->comment= my_once_strdup(cs->comment,MYF(MY_WME)))) return MY_XML_ERROR; if (cs->csname) - if (!(dst->csname= my_once_strdup(cs->csname,MYF(MY_WME)))) + if (!(newcs->csname= my_once_strdup(cs->csname,MYF(MY_WME)))) return MY_XML_ERROR; if (cs->name) - if (!(dst->name= my_once_strdup(cs->name,MYF(MY_WME)))) + if (!(newcs->name= my_once_strdup(cs->name,MYF(MY_WME)))) return MY_XML_ERROR; } cs->number= 0; @@ -390,7 +386,7 @@ char *get_charsets_dir(char *buf) CHARSET_INFO *all_charsets[256]={NULL}; CHARSET_INFO *default_charset_info = &my_charset_latin1; -void add_compiled_collation(CHARSET_INFO *cs) +void add_compiled_collation(struct charset_info_st *cs) { all_charsets[cs->number]= cs; cs->state|= MY_CS_AVAILABLE; @@ -416,7 +412,7 @@ static my_bool init_available_charsets(myf myflags) */ if (!charset_initialized) { - CHARSET_INFO **cs; + struct charset_info_st **cs; /* To make things thread safe we are not allowing other threads to interfere while we may changing the cs_info_table @@ -428,8 +424,9 @@ static my_bool init_available_charsets(myf myflags) init_compiled_charsets(myflags); /* Copy compiled charsets */ - for (cs=all_charsets; - cs < all_charsets+array_elements(all_charsets)-1 ; + for (cs= (struct charset_info_st**) all_charsets; + cs < (struct charset_info_st**) all_charsets + + array_elements(all_charsets)-1 ; cs++) { if (*cs) @@ -496,9 +493,9 @@ const char *get_charset_name(uint charset_number) static CHARSET_INFO *get_internal_charset(uint cs_number, myf flags) { char buf[FN_REFLEN]; - CHARSET_INFO *cs; + struct charset_info_st *cs; - if ((cs= all_charsets[cs_number])) + if ((cs= (struct charset_info_st*) all_charsets[cs_number])) { if (cs->state & MY_CS_READY) /* if CS is already initialized */ return cs; -- cgit v1.2.1 From b96f268acc3a693b673fceccc62311e169496731 Mon Sep 17 00:00:00 2001 From: Michael Widenius Date: Wed, 6 Jan 2010 23:27:53 +0200 Subject: Removed compiler warnings Fixed sporadic test failure for suit/pbxt/t/lock_multi.test Fixed sporadic test faulure for suit/rpl/t/do_grant.test OpenSolaris 5.11-x86 now compiles (tested with 32 bit) BUILD/compile-solaris-amd64-debug-forte: Added execute bit BUILD/compile-solaris-x86-32: Added execute bit BUILD/compile-solaris-x86-32-debug: Added execute bit BUILD/compile-solaris-x86-32-debug-forte: Added execute bit BUILD/compile-solaris-x86-forte-32: Added execute bit extra/libevent/devpoll.c: Removed compiler warning extra/libevent/evbuffer.c: Removed compiler warning extra/libevent/select.c: Removed compiler warning mysql-test/mysql-test-run.pl: Fixed sporadic test faulure for suit/rpl/t/do_grant.test (Seen on OpenSolaris) mysql-test/suite/pbxt/r/lock_multi.result: Fixed sporadic test failure for suit/pbxt/t/lock_multi.test (seen in buildbot) This was done by merging the test with main/lock_multi.test mysql-test/suite/pbxt/t/lock_multi.test: Fixed sporadic test failure for suit/pbxt/t/lock_multi.test (seen in buildbot) This was done by merging the test with main/lock_multi.test mysys/my_sync.c: Removed compiler warnings sql/ha_ndbcluster.cc: Fixed linking error on OpenSolaris when compiling without ndb Bug #34866 Can't compile on Solaris 9/Sparc with gcc storage/archive/azlib.h: Removed compiler warning about redefined symbols storage/maria/ma_blockrec.c: Removed compiler warning storage/maria/ma_loghandler.c: Removed compiler warning storage/maria/ma_test3.c: Removed compiler warning storage/myisam/mi_test3.c: Removed compiler warning storage/pbxt/src/ha_pbxt.cc: Removed compiler warning thr_main -> thr_main_pbxt storage/pbxt/src/restart_xt.cc: thr_main -> thr_main_pbxt storage/pbxt/src/thread_xt.cc: thr_main -> thr_main_pbxt This was needed as thr_main() is an internal thread function on OpenSolaris() storage/pbxt/src/thread_xt.h: thr_main -> thr_main_pbxt storage/xtradb/srv/srv0srv.c: Use compatiblity macro to get code to work on OpenSolaris support-files/compiler_warnings.supp: Ignore compiler warning from yassl --- mysys/my_sync.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'mysys') diff --git a/mysys/my_sync.c b/mysys/my_sync.c index 1b8420c034e..f3503f29a95 100644 --- a/mysys/my_sync.c +++ b/mysys/my_sync.c @@ -100,7 +100,8 @@ static const char cur_dir_name[]= {FN_CURLIB, 0}; RETURN 0 if ok, !=0 if error */ -int my_sync_dir(const char *dir_name, myf my_flags) +int my_sync_dir(const char *dir_name __attribute__((unused)), + myf my_flags __attribute__((unused))) { #ifdef NEED_EXPLICIT_SYNC_DIR DBUG_ENTER("my_sync_dir"); @@ -141,7 +142,8 @@ int my_sync_dir(const char *dir_name, myf my_flags) RETURN 0 if ok, !=0 if error */ -int my_sync_dir_by_file(const char *file_name, myf my_flags) +int my_sync_dir_by_file(const char *file_name __attribute__((unused)), + myf my_flags __attribute__((unused))) { #ifdef NEED_EXPLICIT_SYNC_DIR char dir_name[FN_REFLEN]; -- cgit v1.2.1 From e9bce6c9d4bde35306b845e22e9b5ada69c4512f Mon Sep 17 00:00:00 2001 From: Michael Widenius Date: Fri, 29 Jan 2010 20:42:22 +0200 Subject: Patch set contributed by Alex Budovski (MCA) Fix for Bug#31173: mysqlslap.exe crashes if called without any parameters .bzrignore: Fixed .bzrignore rules. Many were simply not ignoring what they were meant to. client/mysqlslap.c: Fixed bug for Bug#31173: mysqlslap.exe crashes if called without any parameters The original patch could cause memory leaks and odd problems depending on how connection was made. This code ensures that all mysql_options() are set for each mysql_real_connect(). (This patch by Monty) mysys/my_thr_init.c: Fixed multiply-initialized critical section on Windows, due to code incorrectly checking the wrong field in an attempt to prevent multiple-initialization. sql-common/client.c: Don't use shared memory if it's not set (for example after failed mysql_real_connect). Ensure that mysql_close() resets all resources so that it's safe to call it twice. (Patch by monty, related to Bug#31173: mysqlslap.exe crashes if called without any parameters) sql/CMakeLists.txt: Added page fault counters for SHOW PROFILE on Windows. sql/mysqld.cc: Fixed attempt to set a NULL event. The code now only sets the event if appropriate (i.e. shared memory is being used) sql/sql_profile.cc: Added page fault counters for SHOW PROFILE on Windows. sql/sql_profile.h: Added page fault counters for SHOW PROFILE on Windows. sql/udf_example.def: Some cleanup functions were not exported from udf_example.dll, causing them to never be executed, and as a result multiple-initialization of kernel objects occurred and resources were not being freed correctly. storage/maria/ma_close.c: Condition variable share->key_del_cond was never being destroyed, while its containing heap block was being freed in maria_close(), leaking kernel resources. --- mysys/my_thr_init.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'mysys') diff --git a/mysys/my_thr_init.c b/mysys/my_thr_init.c index e105b48f386..9de8b2764da 100644 --- a/mysys/my_thr_init.c +++ b/mysys/my_thr_init.c @@ -317,7 +317,7 @@ my_bool my_thread_init(void) /* Skip initialization if the thread specific variable is already initialized */ - if (THR_KEY_mysys.id) + if (THR_KEY_mysys.init) goto end; tmp= &THR_KEY_mysys; #endif -- cgit v1.2.1 From 7e4d89c97379249ace6633bc406ff2ec2418252a Mon Sep 17 00:00:00 2001 From: Igor Babaev Date: Tue, 16 Feb 2010 08:41:11 -0800 Subject: WL#86: Partitioned key cache for MyISAM. This is the base patch for the task. --- mysys/mf_keycache.c | 2419 ++++++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 2218 insertions(+), 201 deletions(-) (limited to 'mysys') diff --git a/mysys/mf_keycache.c b/mysys/mf_keycache.c index 0630d194234..1f1f0c35141 100644 --- a/mysys/mf_keycache.c +++ b/mysys/mf_keycache.c @@ -13,8 +13,38 @@ along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + + /** - @file + @file + The file contains the following modules: + + Simple Key Cache Module + + Partitioned Key Cache Module + + Key Cache Interface Module + +*/ + +#include "mysys_priv.h" +#include "mysys_err.h" +#include +#include "my_static.h" +#include +#include +#include +#include + +/****************************************************************************** + Simple Key Cache Module + + The module contains implementations of all key cache interface functions + employed by partitioned key caches. + +******************************************************************************/ + +/* These functions handle keyblock cacheing for ISAM and MyISAM tables. One cache can handle many files. @@ -37,9 +67,7 @@ blocks_unused is the sum of never used blocks in the pool and of currently free blocks. blocks_used is the number of blocks fetched from the pool and as such gives the maximum number of in-use blocks at any time. -*/ -/* Key Cache Locking ================= @@ -104,14 +132,77 @@ I/O finished. */ -#include "mysys_priv.h" -#include "mysys_err.h" -#include -#include "my_static.h" -#include -#include -#include -#include +/* declare structures that is used by st_key_cache */ + +struct st_block_link; +typedef struct st_block_link BLOCK_LINK; +struct st_keycache_page; +typedef struct st_keycache_page KEYCACHE_PAGE; +struct st_hash_link; +typedef struct st_hash_link HASH_LINK; + +/* info about requests in a waiting queue */ +typedef struct st_keycache_wqueue +{ + struct st_my_thread_var *last_thread; /* circular list of waiting threads */ +} KEYCACHE_WQUEUE; + +#define CHANGED_BLOCKS_HASH 128 /* must be power of 2 */ + +/* Control block for a simple (non-partitioned) key cache */ + +typedef struct st_s_key_cache_cb +{ + my_bool key_cache_inited; /* <=> control block is allocated */ + my_bool in_resize; /* true during resize operation */ + my_bool resize_in_flush; /* true during flush of resize operation */ + my_bool can_be_used; /* usage of cache for read/write is allowed */ + size_t key_cache_mem_size; /* specified size of the cache memory */ + uint key_cache_block_size; /* size of the page buffer of a cache block */ + ulong min_warm_blocks; /* min number of warm blocks; */ + ulong age_threshold; /* age threshold for hot blocks */ + ulonglong keycache_time; /* total number of block link operations */ + uint hash_entries; /* max number of entries in the hash table */ + int hash_links; /* max number of hash links */ + int hash_links_used; /* number of hash links currently used */ + int disk_blocks; /* max number of blocks in the cache */ + ulong blocks_used; /* maximum number of concurrently used blocks */ + ulong blocks_unused; /* number of currently unused blocks */ + ulong blocks_changed; /* number of currently dirty blocks */ + ulong warm_blocks; /* number of blocks in warm sub-chain */ + ulong cnt_for_resize_op; /* counter to block resize operation */ + long blocks_available; /* number of blocks available in the LRU chain */ + HASH_LINK **hash_root; /* arr. of entries into hash table buckets */ + HASH_LINK *hash_link_root; /* memory for hash table links */ + HASH_LINK *free_hash_list; /* list of free hash links */ + BLOCK_LINK *free_block_list; /* list of free blocks */ + BLOCK_LINK *block_root; /* memory for block links */ + uchar HUGE_PTR *block_mem; /* memory for block buffers */ + BLOCK_LINK *used_last; /* ptr to the last block of the LRU chain */ + BLOCK_LINK *used_ins; /* ptr to the insertion block in LRU chain */ + pthread_mutex_t cache_lock; /* to lock access to the cache structure */ + KEYCACHE_WQUEUE resize_queue; /* threads waiting during resize operation */ + /* + Waiting for a zero resize count. Using a queue for symmetry though + only one thread can wait here. + */ + KEYCACHE_WQUEUE waiting_for_resize_cnt; + KEYCACHE_WQUEUE waiting_for_hash_link; /* waiting for a free hash link */ + KEYCACHE_WQUEUE waiting_for_block; /* requests waiting for a free block */ + BLOCK_LINK *changed_blocks[CHANGED_BLOCKS_HASH]; /* hash for dirty file bl.*/ + BLOCK_LINK *file_blocks[CHANGED_BLOCKS_HASH]; /* hash for other file bl.*/ + + /* Statistics variables. These are reset in reset_key_cache_counters(). */ + ulong global_blocks_changed; /* number of currently dirty blocks */ + ulonglong global_cache_w_requests;/* number of write requests (write hits) */ + ulonglong global_cache_write; /* number of writes from cache to files */ + ulonglong global_cache_r_requests;/* number of read requests (read hits) */ + ulonglong global_cache_read; /* number of reads from files to cache */ + + int blocks; /* max number of blocks in the cache */ + uint hash_factor; /* factor used to calculate hash function */ + my_bool in_init; /* Set to 1 in MySQL during init/resize */ +} S_KEY_CACHE_CB; /* Some compilation flags have been added specifically for this module @@ -223,7 +314,12 @@ KEY_CACHE *dflt_key_cache= &dflt_key_cache_var; #define FLUSH_CACHE 2000 /* sort this many blocks at once */ -static int flush_all_key_blocks(KEY_CACHE *keycache); +static int flush_all_key_blocks(S_KEY_CACHE_CB *keycache); +/* +static void s_change_key_cache_param(void *keycache_cb, uint division_limit, + uint age_threshold); +*/ +static void s_end_key_cache(void *keycache_cb, my_bool cleanup); #ifdef THREAD static void wait_on_queue(KEYCACHE_WQUEUE *wqueue, pthread_mutex_t *mutex); @@ -232,15 +328,16 @@ static void release_whole_queue(KEYCACHE_WQUEUE *wqueue); #define wait_on_queue(wqueue, mutex) do {} while (0) #define release_whole_queue(wqueue) do {} while (0) #endif -static void free_block(KEY_CACHE *keycache, BLOCK_LINK *block); +static void free_block(S_KEY_CACHE_CB *keycache, BLOCK_LINK *block); #if !defined(DBUG_OFF) -static void test_key_cache(KEY_CACHE *keycache, +static void test_key_cache(S_KEY_CACHE_CB *keycache, const char *where, my_bool lock); #endif - +#define KEYCACHE_BASE_EXPR(f, pos) \ + ((ulong) ((pos) / keycache->key_cache_block_size) + (ulong) (f)) #define KEYCACHE_HASH(f, pos) \ -(((ulong) ((pos) / keycache->key_cache_block_size) + \ - (ulong) (f)) & (keycache->hash_entries-1)) + ((KEYCACHE_BASE_EXPR(f, pos) / keycache->hash_factor) & \ + (keycache->hash_entries-1)) #define FILE_HASH(f) ((uint) (f) & (CHANGED_BLOCKS_HASH-1)) #define DEFAULT_KEYCACHE_DEBUG_LOG "keycache_debug.log" @@ -336,9 +433,10 @@ static int keycache_pthread_cond_signal(pthread_cond_t *cond); #define inline /* disabled inline for easier debugging */ static int fail_block(BLOCK_LINK *block); static int fail_hlink(HASH_LINK *hlink); -static int cache_empty(KEY_CACHE *keycache); +static int cache_empty(S_KEY_CACHE_CB *keycache); #endif + static inline uint next_power(uint value) { return (uint) my_round_up_to_next_power((uint32) value) << 1; @@ -346,19 +444,32 @@ static inline uint next_power(uint value) /* - Initialize a key cache + Initialize a simple key cache SYNOPSIS - init_key_cache() - keycache pointer to a key cache data structure - key_cache_block_size size of blocks to keep cached data - use_mem total memory to use for the key cache - division_limit division limit (may be zero) - age_threshold age threshold (may be zero) + s_init_key_cache() + keycache_cb pointer to the control block of a simple key cache + key_cache_block_size size of blocks to keep cached data + use_mem memory to use for the key cache buferrs/structures + division_limit division limit (may be zero) + age_threshold age threshold (may be zero) + + DESCRIPTION + This function is the implementation of the init_key_cache interface + function that is employed by simple (non-partitioned) key caches. + The function builds a simple key cache and initializes the control block + structure of the type S_KEY_CACHE_CB that is used for this key cache. + The parameter keycache_cb is supposed to point to this structure. + The parameter key_cache_block_size specifies the size of the blocks in + the key cache to be built. The parameters division_limit and age_threshhold + determine the initial values of those characteristics of the key cache + that are used for midpoint insertion strategy. The parameter use_mem + specifies the total amount of memory to be allocated for key cache blocks + and auxiliary structures. RETURN VALUE number of blocks in the key cache, if successful, - 0 - otherwise. + <= 0 - otherwise. NOTES. if keycache->key_cache_inited != 0 we assume that the key cache @@ -370,10 +481,12 @@ static inline uint next_power(uint value) */ -int init_key_cache(KEY_CACHE *keycache, uint key_cache_block_size, - size_t use_mem, uint division_limit, - uint age_threshold) +static +int s_init_key_cache(void *keycache_cb, uint key_cache_block_size, + size_t use_mem, uint division_limit, + uint age_threshold) { + S_KEY_CACHE_CB *keycache= (S_KEY_CACHE_CB *) keycache_cb; ulong blocks, hash_links; size_t length; int error; @@ -387,12 +500,15 @@ int init_key_cache(KEY_CACHE *keycache, uint key_cache_block_size, DBUG_RETURN(0); } + keycache->blocks_used= keycache->blocks_unused= 0; + keycache->global_blocks_changed= 0; keycache->global_cache_w_requests= keycache->global_cache_r_requests= 0; keycache->global_cache_read= keycache->global_cache_write= 0; keycache->disk_blocks= -1; if (! keycache->key_cache_inited) { keycache->key_cache_inited= 1; + keycache->hash_factor= 1; /* Initialize these variables once only. Their value must survive re-initialization during resizing. @@ -534,51 +650,43 @@ err: /* - Resize a key cache + Prepare for resizing a simple key cache SYNOPSIS - resize_key_cache() - keycache pointer to a key cache data structure - key_cache_block_size size of blocks to keep cached data - use_mem total memory to use for the new key cache - division_limit new division limit (if not zero) - age_threshold new age threshold (if not zero) + s_prepare_resize_key_cache() + keycache_cb pointer to the control block of a simple key cache + with_resize_queue <=> resize queue is used + release_lock <=> release the key cache lock before return - RETURN VALUE - number of blocks in the key cache, if successful, - 0 - otherwise. + DESCRIPTION + This function flushes all dirty pages from a simple key cache and after + this it destroys the key cache calling s_end_key_cache. The function + considers the parameter keycache_cb as a pointer to the control block + structure of the type S_KEY_CACHE_CB for this key cache. + The parameter with_resize_queue determines weather the resize queue is + involved (MySQL server never uses this queue). The parameter release_lock + says weather the key cache lock must be released before return from + the function. - NOTES. - The function first compares the memory size and the block size parameters - with the key cache values. + RETURN VALUE + 0 - on success, + 1 - otherwise. - If they differ the function free the the memory allocated for the - old key cache blocks by calling the end_key_cache function and - then rebuilds the key cache with new blocks by calling - init_key_cache. + NOTES + This function is the called by s_resize_key_cache and p_resize_key_cache + that resize simple and partitioned key caches respectively. - The function starts the operation only when all other threads - performing operations with the key cache let her to proceed - (when cnt_for_resize=0). */ -int resize_key_cache(KEY_CACHE *keycache, uint key_cache_block_size, - size_t use_mem, uint division_limit, - uint age_threshold) +static +int s_prepare_resize_key_cache(void *keycache_cb, + my_bool with_resize_queue, + my_bool release_lock) { - int blocks; - DBUG_ENTER("resize_key_cache"); - - if (!keycache->key_cache_inited) - DBUG_RETURN(keycache->disk_blocks); - - if(key_cache_block_size == keycache->key_cache_block_size && - use_mem == keycache->key_cache_mem_size) - { - change_key_cache_param(keycache, division_limit, age_threshold); - DBUG_RETURN(keycache->disk_blocks); - } - + int res= 0; + S_KEY_CACHE_CB *keycache= (S_KEY_CACHE_CB *) keycache_cb; + DBUG_ENTER("s_prepare_resize_key_cache"); + keycache_pthread_mutex_lock(&keycache->cache_lock); #ifdef THREAD @@ -588,7 +696,7 @@ int resize_key_cache(KEY_CACHE *keycache, uint key_cache_block_size, one resizer only. In set_var.cc keycache->in_init is used to block multiple attempts. */ - while (keycache->in_resize) + while (with_resize_queue && keycache->in_resize) { /* purecov: begin inspected */ wait_on_queue(&keycache->resize_queue, &keycache->cache_lock); @@ -613,8 +721,8 @@ int resize_key_cache(KEY_CACHE *keycache, uint key_cache_block_size, { /* TODO: if this happens, we should write a warning in the log file ! */ keycache->resize_in_flush= 0; - blocks= 0; keycache->can_be_used= 0; + res= 1; goto finish; } DBUG_ASSERT(cache_empty(keycache)); @@ -640,29 +748,145 @@ int resize_key_cache(KEY_CACHE *keycache, uint key_cache_block_size, #else KEYCACHE_DBUG_ASSERT(keycache->cnt_for_resize_op == 0); #endif - - /* - Free old cache structures, allocate new structures, and initialize - them. Note that the cache_lock mutex and the resize_queue are left - untouched. We do not lose the cache_lock and will release it only at - the end of this function. - */ - end_key_cache(keycache, 0); /* Don't free mutex */ - /* The following will work even if use_mem is 0 */ - blocks= init_key_cache(keycache, key_cache_block_size, use_mem, - division_limit, age_threshold); + + s_end_key_cache(keycache_cb, 0); finish: + if (release_lock) + keycache_pthread_mutex_unlock(&keycache->cache_lock); + DBUG_RETURN(res); +} + + +/* + Finalize resizing a simple key cache + + SYNOPSIS + s_finish_resize_key_cache() + keycache_cb pointer to the control block of a simple key cache + with_resize_queue <=> resize queue is used + acquire_lock <=> acquire the key cache lock at start + + DESCRIPTION + This function performs finalizing actions for the operation of + resizing a simple key cache. The function considers the parameter + keycache_cb as a pointer to the control block structure of the type + S_KEY_CACHE_CB for this key cache. The function sets the flag + in_resize in this structure to FALSE. + The parameter with_resize_queue determines weather the resize queue + is involved (MySQL server never uses this queue). + The parameter acquire_lock says weather the key cache lock must be + acquired at the start of the function. + + RETURN VALUE + none + + NOTES + This function is the called by s_resize_key_cache and p_resize_key_cache + that resize simple and partitioned key caches respectively. + +*/ + +static +void s_finish_resize_key_cache(void *keycache_cb, + my_bool with_resize_queue, + my_bool acquire_lock) +{ + S_KEY_CACHE_CB *keycache= (S_KEY_CACHE_CB *) keycache_cb; + DBUG_ENTER("s_finish_resize_key_cache"); + + if (acquire_lock) + keycache_pthread_mutex_lock(&keycache->cache_lock); + /* Mark the resize finished. This allows other threads to start a resize or to request new cache blocks. */ keycache->in_resize= 0; - - /* Signal waiting threads. */ - release_whole_queue(&keycache->resize_queue); + + if (with_resize_queue) + { + /* Signal waiting threads. */ + release_whole_queue(&keycache->resize_queue); + } keycache_pthread_mutex_unlock(&keycache->cache_lock); + + DBUG_VOID_RETURN; +} + + +/* + Resize a simple key cache + + SYNOPSIS + s_resize_key_cache() + keycache_cb pointer to the control block of a simple key cache + key_cache_block_size size of blocks to keep cached data + use_mem memory to use for the key cache buffers/structures + division_limit new division limit (if not zero) + age_threshold new age threshold (if not zero) + + DESCRIPTION + This function is the implementation of the resize_key_cache interface + function that is employed by simple (non-partitioned) key caches. + The function considers the parameter keycache_cb as a pointer to the + control block structure of the type S_KEY_CACHE_CB for the simple key + cache to be resized. + The parameter key_cache_block_size specifies the new size of the blocks in + the key cache. The parameters division_limit and age_threshold + determine the new initial values of those characteristics of the key cache + that are used for midpoint insertion strategy. The parameter use_mem + specifies the total amount of memory to be allocated for key cache blocks + and auxiliary structures in the new key cache. + + RETURN VALUE + number of blocks in the key cache, if successful, + 0 - otherwise. + + NOTES. + The function first calls the function s_prepare_resize_key_cache + to flush all dirty blocks from key cache, to free memory used + for key cache blocks and auxiliary structures. After this the + function builds a new key cache with new parameters. + + This implementation doesn't block the calls and executions of other + functions from the key cache interface. However it assumes that the + calls of s_resize_key_cache itself are serialized. + + The function starts the operation only when all other threads + performing operations with the key cache let her to proceed + (when cnt_for_resize=0). + +*/ + +static +int s_resize_key_cache(void *keycache_cb, uint key_cache_block_size, + size_t use_mem, uint division_limit, + uint age_threshold) +{ + S_KEY_CACHE_CB *keycache= (S_KEY_CACHE_CB *) keycache_cb; + int blocks= 0; + DBUG_ENTER("s_resize_key_cache"); + + if (!keycache->key_cache_inited) + DBUG_RETURN(keycache->disk_blocks); + + /* + Note that the cache_lock mutex and the resize_queue are left untouched. + We do not lose the cache_lock and will release it only at the end of + this function. + */ + if (s_prepare_resize_key_cache(keycache_cb, 1, 0)) + goto finish; + + /* The following will work even if use_mem is 0 */ + blocks= s_init_key_cache(keycache, key_cache_block_size, use_mem, + division_limit, age_threshold); + +finish: + s_finish_resize_key_cache(keycache_cb, 1, 0); + DBUG_RETURN(blocks); } @@ -670,7 +894,7 @@ finish: /* Increment counter blocking resize key cache operation */ -static inline void inc_counter_for_resize_op(KEY_CACHE *keycache) +static inline void inc_counter_for_resize_op(S_KEY_CACHE_CB *keycache) { keycache->cnt_for_resize_op++; } @@ -680,35 +904,49 @@ static inline void inc_counter_for_resize_op(KEY_CACHE *keycache) Decrement counter blocking resize key cache operation; Signal the operation to proceed when counter becomes equal zero */ -static inline void dec_counter_for_resize_op(KEY_CACHE *keycache) +static inline void dec_counter_for_resize_op(S_KEY_CACHE_CB *keycache) { if (!--keycache->cnt_for_resize_op) release_whole_queue(&keycache->waiting_for_resize_cnt); } + /* - Change the key cache parameters + Change key cache parameters of a simple key cache SYNOPSIS - change_key_cache_param() - keycache pointer to a key cache data structure - division_limit new division limit (if not zero) - age_threshold new age threshold (if not zero) + s_change_key_cache_param() + keycache_cb pointer to the control block of a simple key cache + division_limit new division limit (if not zero) + age_threshold new age threshold (if not zero) + + DESCRIPTION + This function is the implementation of the change_key_cache_param interface + function that is employed by simple (non-partitioned) key caches. + The function considers the parameter keycache_cb as a pointer to the + control block structure of the type S_KEY_CACHE_CB for the simple key + cache where new values of the division limit and the age threshold used + for midpoint insertion strategy are to be set. The parameters + division_limit and age_threshold provide these new values. RETURN VALUE none NOTES. - Presently the function resets the key cache parameters - concerning midpoint insertion strategy - division_limit and - age_threshold. + Presently the function resets the key cache parameters concerning + midpoint insertion strategy - division_limit and age_threshold. + This function changes some parameters of a given key cache without + reformatting it. The function does not touch the contents the key + cache blocks. + */ -void change_key_cache_param(KEY_CACHE *keycache, uint division_limit, - uint age_threshold) +static +void s_change_key_cache_param(void *keycache_cb, uint division_limit, + uint age_threshold) { - DBUG_ENTER("change_key_cache_param"); - + S_KEY_CACHE_CB *keycache= (S_KEY_CACHE_CB *) keycache_cb; + DBUG_ENTER("s_change_key_cache_param"); keycache_pthread_mutex_lock(&keycache->cache_lock); if (division_limit) keycache->min_warm_blocks= (keycache->disk_blocks * @@ -722,20 +960,32 @@ void change_key_cache_param(KEY_CACHE *keycache, uint division_limit, /* - Remove key_cache from memory + Destroy a simple key cache SYNOPSIS - end_key_cache() - keycache key cache handle - cleanup Complete free (Free also mutex for key cache) + s_end_key_cache() + keycache_cb pointer to the control block of a simple key cache + cleanup <=> complete free (free also mutex for key cache) + + DESCRIPTION + This function is the implementation of the end_key_cache interface + function that is employed by simple (non-partitioned) key caches. + The function considers the parameter keycache_cb as a pointer to the + control block structure of the type S_KEY_CACHE_CB for the simple key + cache to be destroyed. + The function frees the memory allocated for the key cache blocks and + auxiliary structures. If the value of the parameter cleanup is TRUE + then even the key cache mutex is freed. RETURN VALUE none */ -void end_key_cache(KEY_CACHE *keycache, my_bool cleanup) +static +void s_end_key_cache(void *keycache_cb, my_bool cleanup) { - DBUG_ENTER("end_key_cache"); + S_KEY_CACHE_CB *keycache= (S_KEY_CACHE_CB *) keycache_cb; + DBUG_ENTER("s_end_key_cache"); DBUG_PRINT("enter", ("key_cache: 0x%lx", (long) keycache)); if (!keycache->key_cache_inited) @@ -1026,7 +1276,7 @@ static inline void link_changed(BLOCK_LINK *block, BLOCK_LINK **phead) void */ -static void link_to_file_list(KEY_CACHE *keycache, +static void link_to_file_list(S_KEY_CACHE_CB *keycache, BLOCK_LINK *block, int file, my_bool unlink_block) { @@ -1067,7 +1317,7 @@ static void link_to_file_list(KEY_CACHE *keycache, void */ -static void link_to_changed_list(KEY_CACHE *keycache, +static void link_to_changed_list(S_KEY_CACHE_CB *keycache, BLOCK_LINK *block) { DBUG_ASSERT(block->status & BLOCK_IN_USE); @@ -1122,7 +1372,7 @@ static void link_to_changed_list(KEY_CACHE *keycache, not linked in the LRU ring. */ -static void link_block(KEY_CACHE *keycache, BLOCK_LINK *block, my_bool hot, +static void link_block(S_KEY_CACHE_CB *keycache, BLOCK_LINK *block, my_bool hot, my_bool at_end) { BLOCK_LINK *ins; @@ -1243,7 +1493,7 @@ static void link_block(KEY_CACHE *keycache, BLOCK_LINK *block, my_bool hot, See NOTES for link_block */ -static void unlink_block(KEY_CACHE *keycache, BLOCK_LINK *block) +static void unlink_block(S_KEY_CACHE_CB *keycache, BLOCK_LINK *block) { DBUG_ASSERT((block->status & ~BLOCK_CHANGED) == (BLOCK_READ | BLOCK_IN_USE)); DBUG_ASSERT(block->hash_link); /*backptr to block NULL from free_block()*/ @@ -1301,7 +1551,7 @@ static void unlink_block(KEY_CACHE *keycache, BLOCK_LINK *block) RETURN void */ -static void reg_requests(KEY_CACHE *keycache, BLOCK_LINK *block, int count) +static void reg_requests(S_KEY_CACHE_CB *keycache, BLOCK_LINK *block, int count) { DBUG_ASSERT(block->status & BLOCK_IN_USE); DBUG_ASSERT(block->hash_link); @@ -1344,7 +1594,7 @@ static void reg_requests(KEY_CACHE *keycache, BLOCK_LINK *block, int count) not linked in the LRU ring. */ -static void unreg_request(KEY_CACHE *keycache, +static void unreg_request(S_KEY_CACHE_CB *keycache, BLOCK_LINK *block, int at_end) { DBUG_ASSERT(block->status & (BLOCK_READ | BLOCK_IN_USE)); @@ -1433,7 +1683,7 @@ static void remove_reader(BLOCK_LINK *block) signals on its termination */ -static void wait_for_readers(KEY_CACHE *keycache, +static void wait_for_readers(S_KEY_CACHE_CB *keycache, BLOCK_LINK *block) { #ifdef THREAD @@ -1482,7 +1732,7 @@ static inline void link_hash(HASH_LINK **start, HASH_LINK *hash_link) Remove a hash link from the hash table */ -static void unlink_hash(KEY_CACHE *keycache, HASH_LINK *hash_link) +static void unlink_hash(S_KEY_CACHE_CB *keycache, HASH_LINK *hash_link) { KEYCACHE_DBUG_PRINT("unlink_hash", ("fd: %u pos_ %lu #requests=%u", (uint) hash_link->file,(ulong) hash_link->diskpos, hash_link->requests)); @@ -1538,7 +1788,7 @@ static void unlink_hash(KEY_CACHE *keycache, HASH_LINK *hash_link) Get the hash link for a page */ -static HASH_LINK *get_hash_link(KEY_CACHE *keycache, +static HASH_LINK *get_hash_link(S_KEY_CACHE_CB *keycache, int file, my_off_t filepos) { reg1 HASH_LINK *hash_link, **start; @@ -1659,7 +1909,7 @@ restart: waits until first of this operations links any block back. */ -static BLOCK_LINK *find_key_block(KEY_CACHE *keycache, +static BLOCK_LINK *find_key_block(S_KEY_CACHE_CB *keycache, File file, my_off_t filepos, int init_hits_left, int wrmode, int *page_st) @@ -2419,7 +2669,7 @@ restart: portion is less than read_length, but not less than min_length. */ -static void read_block(KEY_CACHE *keycache, +static void read_block(S_KEY_CACHE_CB *keycache, BLOCK_LINK *block, uint read_length, uint min_length, my_bool primary) { @@ -2507,43 +2757,62 @@ static void read_block(KEY_CACHE *keycache, /* - Read a block of data from a cached file into a buffer; + Read a block of data from a simple key cache into a buffer SYNOPSIS - key_cache_read() - keycache pointer to a key cache data structure - file handler for the file for the block of data to be read - filepos position of the block of data in the file - level determines the weight of the data - buff buffer to where the data must be placed - length length of the buffer - block_length length of the block in the key cache buffer - return_buffer return pointer to the key cache buffer with the data + s_key_cache_read() + keycache_cb pointer to the control block of a simple key cache + file handler for the file for the block of data to be read + filepos position of the block of data in the file + level determines the weight of the data + buff buffer to where the data must be placed + length length of the buffer + block_length length of the read data from a key cache block + return_buffer return pointer to the key cache buffer with the data + DESCRIPTION + This function is the implementation of the key_cache_read interface + function that is employed by simple (non-partitioned) key caches. + The function considers the parameter keycache_cb as a pointer to the + control block structure of the type S_KEY_CACHE_CB for a simple key + cache. + In a general case the function reads a block of data from the key cache + into the buffer buff of the size specified by the parameter length. The + beginning of the block of data to be read is specified by the parameters + file and filepos. The length of the read data is the same as the length + of the buffer. The data is read into the buffer in key_cache_block_size + increments. If the next portion of the data is not found in any key cache + block, first it is read from file into the key cache. + If the parameter return_buffer is not ignored and its value is TRUE, and + the data to be read of the specified size block_length can be read from one + key cache buffer, then the function returns a pointer to the data in the + key cache buffer. + The function takse into account parameters block_length and return buffer + only in a single-threaded environment. + The parameter 'level' is used only by the midpoint insertion strategy + when the data or its portion cannot be found in the key cache. + RETURN VALUE - Returns address from where the data is placed if sucessful, 0 - otherwise. + Returns address from where the data is placed if successful, 0 - otherwise. - NOTES. - The function ensures that a block of data of size length from file - positioned at filepos is in the buffers for some key cache blocks. - Then the function either copies the data into the buffer buff, or, - if return_buffer is TRUE, it just returns the pointer to the key cache - buffer with the data. + NOTES Filepos must be a multiple of 'block_length', but it doesn't have to be a multiple of key_cache_block_size; + */ -uchar *key_cache_read(KEY_CACHE *keycache, - File file, my_off_t filepos, int level, - uchar *buff, uint length, - uint block_length __attribute__((unused)), - int return_buffer __attribute__((unused))) +uchar *s_key_cache_read(void *keycache_cb, + File file, my_off_t filepos, int level, + uchar *buff, uint length, + uint block_length __attribute__((unused)), + int return_buffer __attribute__((unused))) { + S_KEY_CACHE_CB *keycache= (S_KEY_CACHE_CB *) keycache_cb; my_bool locked_and_incremented= FALSE; int error=0; uchar *start= buff; - DBUG_ENTER("key_cache_read"); + DBUG_ENTER("s_key_cache_read"); DBUG_PRINT("enter", ("fd: %u pos: %lu length: %u", (uint) file, (ulong) filepos, length)); @@ -2738,29 +3007,49 @@ end: /* - Insert a block of file data from a buffer into key cache + Insert a block of file data from a buffer into a simple key cache SYNOPSIS - key_cache_insert() - keycache pointer to a key cache data structure + s_key_cache_insert() + keycache_cb pointer to the control block of a simple key cache file handler for the file to insert data from filepos position of the block of data in the file to insert level determines the weight of the data buff buffer to read data from length length of the data in the buffer - NOTES - This is used by MyISAM to move all blocks from a index file to the key - cache - + DESCRIPTION + This function is the implementation of the key_cache_insert interface + function that is employed by simple (non-partitioned) key caches. + The function considers the parameter keycache_cb as a pointer to the + control block structure of the type S_KEY_CACHE_CB for a simple key + cache. + The function writes a block of file data from a buffer into the key cache. + The buffer is specified with the parameters buff and length - the pointer + to the beginning of the buffer and its size respectively. It's assumed + the buffer contains the data from 'file' allocated from the position + filepos. The data is copied from the buffer in key_cache_block_size + increments. + The parameter level is used to set one characteristic for the key buffers + loaded with the data from buff. The characteristic is used only by the + midpoint insertion strategy. + RETURN VALUE 0 if a success, 1 - otherwise. + + NOTES + The function is used by MyISAM to move all blocks from a index file to + the key cache. It can be performed in parallel with reading the file data + from the key buffers by other threads. + */ -int key_cache_insert(KEY_CACHE *keycache, - File file, my_off_t filepos, int level, - uchar *buff, uint length) +static +int s_key_cache_insert(void *keycache_cb, + File file, my_off_t filepos, int level, + uchar *buff, uint length) { + S_KEY_CACHE_CB *keycache= (S_KEY_CACHE_CB *) keycache_cb; int error= 0; DBUG_ENTER("key_cache_insert"); DBUG_PRINT("enter", ("fd: %u pos: %lu length: %u", @@ -2979,43 +3268,65 @@ int key_cache_insert(KEY_CACHE *keycache, /* - Write a buffer into a cached file. + Write a buffer into a simple key cache SYNOPSIS - key_cache_write() - keycache pointer to a key cache data structure - file handler for the file to write data to - filepos position in the file to write data to - level determines the weight of the data - buff buffer with the data - length length of the buffer - dont_write if is 0 then all dirty pages involved in writing - should have been flushed from key cache + s_key_cache_write() + keycache_cb pointer to the control block of a simple key cache + file handler for the file to write data to + file_extra maps of key cache partitions containing + dirty pages from file + filepos position in the file to write data to + level determines the weight of the data + buff buffer with the data + length length of the buffer + dont_write if is 0 then all dirty pages involved in writing + should have been flushed from key cache + DESCRIPTION + This function is the implementation of the key_cache_write interface + function that is employed by simple (non-partitioned) key caches. + The function considers the parameter keycache_cb as a pointer to the + control block structure of the type S_KEY_CACHE_CB for a simple key + cache. + In a general case the function copies data from a buffer into the key + cache. The buffer is specified with the parameters buff and length - + the pointer to the beginning of the buffer and its size respectively. + It's assumed the buffer contains the data to be written into 'file' + starting from the position filepos. The data is copied from the buffer + in key_cache_block_size increments. + If the value of the parameter dont_write is FALSE then the function + also writes the data into file. + The parameter level is used to set one characteristic for the key buffers + filled with the data from buff. The characteristic is employed only by + the midpoint insertion strategy. + The parameter file_extra currently makes sense only for simple key caches + that are elements of a partitioned key cache. It provides a pointer to the + shared bitmap of the partitions that may contains dirty pages for the file. + This bitmap is used to optimize the function p_flush_key_blocks. + RETURN VALUE 0 if a success, 1 - otherwise. - NOTES. - The function copies the data of size length from buff into buffers - for key cache blocks that are assigned to contain the portion of - the file starting with position filepos. - It ensures that this data is flushed to the file if dont_write is FALSE. - Filepos must be a multiple of 'block_length', but it doesn't - have to be a multiple of key_cache_block_size; + NOTES + This implementation exploits the fact that the function is called only + when a thread has got an exclusive lock for the key file. - dont_write is always TRUE in the server (info->lock_type is never F_UNLCK). */ -int key_cache_write(KEY_CACHE *keycache, - File file, my_off_t filepos, int level, - uchar *buff, uint length, - uint block_length __attribute__((unused)), - int dont_write) +static +int s_key_cache_write(void *keycache_cb, + File file, void *file_extra __attribute__((unused)), + my_off_t filepos, int level, + uchar *buff, uint length, + uint block_length __attribute__((unused)), + int dont_write) { + S_KEY_CACHE_CB *keycache= (S_KEY_CACHE_CB *) keycache_cb; my_bool locked_and_incremented= FALSE; int error=0; - DBUG_ENTER("key_cache_write"); + DBUG_ENTER("s_key_cache_write"); DBUG_PRINT("enter", ("fd: %u pos: %lu length: %u block_length: %u" " key_block_length: %u", @@ -3330,7 +3641,7 @@ end: Block must have a request registered on it. */ -static void free_block(KEY_CACHE *keycache, BLOCK_LINK *block) +static void free_block(S_KEY_CACHE_CB *keycache, BLOCK_LINK *block) { KEYCACHE_THREAD_TRACE("free block"); KEYCACHE_DBUG_PRINT("free_block", @@ -3470,7 +3781,7 @@ static int cmp_sec_link(BLOCK_LINK **a, BLOCK_LINK **b) free used blocks if requested */ -static int flush_cached_blocks(KEY_CACHE *keycache, +static int flush_cached_blocks(S_KEY_CACHE_CB *keycache, File file, BLOCK_LINK **cache, BLOCK_LINK **end, enum flush_type type) @@ -3514,9 +3825,9 @@ static int flush_cached_blocks(KEY_CACHE *keycache, (BLOCK_READ | BLOCK_IN_FLUSH | BLOCK_CHANGED | BLOCK_IN_USE)); block->status|= BLOCK_IN_FLUSHWRITE; keycache_pthread_mutex_unlock(&keycache->cache_lock); - error= my_pwrite(file, block->buffer+block->offset, + error= my_pwrite(file, block->buffer + block->offset, block->length - block->offset, - block->hash_link->diskpos+ block->offset, + block->hash_link->diskpos + block->offset, MYF(MY_NABP | MY_WAIT_IF_FULL)); keycache_pthread_mutex_lock(&keycache->cache_lock); keycache->global_cache_write++; @@ -3576,7 +3887,7 @@ static int flush_cached_blocks(KEY_CACHE *keycache, /* - Flush all key blocks for a file to disk, but don't do any mutex locks. + Flush all key blocks for a file to disk, but don't do any mutex locks SYNOPSIS flush_key_blocks_int() @@ -3598,7 +3909,7 @@ static int flush_cached_blocks(KEY_CACHE *keycache, 1 error */ -static int flush_key_blocks_int(KEY_CACHE *keycache, +static int flush_key_blocks_int(S_KEY_CACHE_CB *keycache, File file, enum flush_type type) { BLOCK_LINK *cache_buff[FLUSH_CACHE],**cache; @@ -4034,23 +4345,49 @@ err: /* - Flush all blocks for a file to disk + Flush all blocks for a file from key buffers of a simple key cache SYNOPSIS - flush_key_blocks() - keycache pointer to a key cache data structure - file handler for the file to flush to - flush_type type of the flush + s_flush_key_blocks() + keycache_cb pointer to the control block of a simple key cache + file handler for the file to flush to + file_extra maps of key cache partitions containing + dirty pages from file (not used) + flush_type type of the flush operation + DESCRIPTION + This function is the implementation of the flush_key_blocks interface + function that is employed by simple (non-partitioned) key caches. + The function considers the parameter keycache_cb as a pointer to the + control block structure of the type S_KEY_CACHE_CB for a simple key + cache. + In a general case the function flushes the data from all dirty key + buffers related to the file 'file' into this file. The function does + exactly this if the value of the parameter type is FLUSH_KEEP. If the + value of this parameter is FLUSH_RELEASE, the function additionally + releases the key buffers containing data from 'file' for new usage. + If the value of the parameter type is FLUSH_IGNORE_CHANGED the function + just releases the key buffers containing data from 'file'. + The parameter file_extra currently is not used by this function. + RETURN 0 ok 1 error + + NOTES + This implementation exploits the fact that the function is called only + when a thread has got an exclusive lock for the key file. + */ -int flush_key_blocks(KEY_CACHE *keycache, - File file, enum flush_type type) +static +int s_flush_key_blocks(void *keycache_cb, + File file, + void *file_extra __attribute__((unused)), + enum flush_type type) { + S_KEY_CACHE_CB *keycache= (S_KEY_CACHE_CB *) keycache_cb; int res= 0; DBUG_ENTER("flush_key_blocks"); DBUG_PRINT("enter", ("keycache: 0x%lx", (long) keycache)); @@ -4103,7 +4440,7 @@ int flush_key_blocks(KEY_CACHE *keycache, != 0 Error */ -static int flush_all_key_blocks(KEY_CACHE *keycache) +static int flush_all_key_blocks(S_KEY_CACHE_CB *keycache) { BLOCK_LINK *block; uint total_found; @@ -4206,37 +4543,45 @@ static int flush_all_key_blocks(KEY_CACHE *keycache) /* - Reset the counters of a key cache. + Reset the counters of a simple key cache SYNOPSIS - reset_key_cache_counters() - name the name of a key cache - key_cache pointer to the key kache to be reset + s_reset_key_cache_counters() + name the name of a key cache + keycache_cb pointer to the control block of a simple key cache DESCRIPTION - This procedure is used by process_key_caches() to reset the counters of all - currently used key caches, both the default one and the named ones. + This function is the implementation of the reset_key_cache_counters + interface function that is employed by simple (non-partitioned) key caches. + The function considers the parameter keycache_cb as a pointer to the + control block structure of the type S_KEY_CACHE_CB for a simple key cache. + This function resets the values of all statistical counters for the key + cache to 0. + The parameter name is currently not used. RETURN 0 on success (always because it can't fail) + */ -int reset_key_cache_counters(const char *name __attribute__((unused)), - KEY_CACHE *key_cache) +static +int s_reset_key_cache_counters(const char *name __attribute__((unused)), + void *keycache_cb) { - DBUG_ENTER("reset_key_cache_counters"); - if (!key_cache->key_cache_inited) + S_KEY_CACHE_CB *keycache= (S_KEY_CACHE_CB *) keycache_cb; + DBUG_ENTER("s_reset_key_cache_counters"); + if (!keycache->key_cache_inited) { DBUG_PRINT("info", ("Key cache %s not initialized.", name)); DBUG_RETURN(0); } DBUG_PRINT("info", ("Resetting counters for key cache %s.", name)); - key_cache->global_blocks_changed= 0; /* Key_blocks_not_flushed */ - key_cache->global_cache_r_requests= 0; /* Key_read_requests */ - key_cache->global_cache_read= 0; /* Key_reads */ - key_cache->global_cache_w_requests= 0; /* Key_write_requests */ - key_cache->global_cache_write= 0; /* Key_writes */ + keycache->global_blocks_changed= 0; /* Key_blocks_not_flushed */ + keycache->global_cache_r_requests= 0; /* Key_read_requests */ + keycache->global_cache_read= 0; /* Key_reads */ + keycache->global_cache_w_requests= 0; /* Key_write_requests */ + keycache->global_cache_write= 0; /* Key_writes */ DBUG_RETURN(0); } @@ -4245,7 +4590,7 @@ int reset_key_cache_counters(const char *name __attribute__((unused)), /* Test if disk-cache is ok */ -static void test_key_cache(KEY_CACHE *keycache __attribute__((unused)), +static void test_key_cache(S_KEY_CACHE_CB *keycache __attribute__((unused)), const char *where __attribute__((unused)), my_bool lock __attribute__((unused))) { @@ -4259,7 +4604,7 @@ static void test_key_cache(KEY_CACHE *keycache __attribute__((unused)), #define MAX_QUEUE_LEN 100 -static void keycache_dump(KEY_CACHE *keycache) +static void keycache_dump(S_KEY_CACHE_CB *keycache) { FILE *keycache_dump_file=fopen(KEYCACHE_DUMP_FILE, "w"); struct st_my_thread_var *last; @@ -4499,7 +4844,7 @@ static int fail_hlink(HASH_LINK *hlink) return 0; /* Let the assert fail. */ } -static int cache_empty(KEY_CACHE *keycache) +static int cache_empty(S_KEY_CACHE_CB *keycache) { int errcnt= 0; int idx; @@ -4537,3 +4882,1675 @@ static int cache_empty(KEY_CACHE *keycache) } #endif + +/* + Get statistics for a simple key cache + + SYNOPSIS + get_key_cache_statistics() + keycache_cb pointer to the control block of a simple key cache + partition_no partition number (not used) + key_cache_stats OUT pointer to the structure for the returned statistics + + DESCRIPTION + This function is the implementation of the get_key_cache_statistics + interface function that is employed by simple (non-partitioned) key caches. + The function considers the parameter keycache_cb as a pointer to the + control block structure of the type S_KEY_CACHE_CB for a simple key cache. + This function returns the statistical data for the key cache. + The parameter partition_no is not used by this function. + + RETURN + none + +*/ + +static +void s_get_key_cache_statistics(void *keycache_cb, + uint partition_no __attribute__((unused)), + KEY_CACHE_STATISTICS *key_cache_stats) +{ + S_KEY_CACHE_CB *keycache= (S_KEY_CACHE_CB *) keycache_cb; + DBUG_ENTER("s_get_key_cache_statistics"); + + key_cache_stats->mem_size= (longlong) keycache->key_cache_mem_size; + key_cache_stats->block_size= (longlong) keycache->key_cache_block_size; + key_cache_stats->blocks_used= keycache->blocks_used; + key_cache_stats->blocks_unused= keycache->blocks_unused; + key_cache_stats->blocks_changed= keycache->global_blocks_changed; + key_cache_stats->read_requests= keycache->global_cache_r_requests; + key_cache_stats->reads= keycache->global_cache_read; + key_cache_stats->write_requests= keycache->global_cache_w_requests; + key_cache_stats->writes= keycache->global_cache_write; + DBUG_VOID_RETURN; +} + + +static size_t s_key_cache_stat_var_offsets[]= +{ + offsetof(S_KEY_CACHE_CB, blocks_used), + offsetof(S_KEY_CACHE_CB, blocks_unused), + offsetof(S_KEY_CACHE_CB, global_blocks_changed), + offsetof(S_KEY_CACHE_CB, global_cache_w_requests), + offsetof(S_KEY_CACHE_CB, global_cache_write), + offsetof(S_KEY_CACHE_CB, global_cache_r_requests), + offsetof(S_KEY_CACHE_CB, global_cache_read) +}; + + +/* + Get the value of a statistical variable for a simple key cache + + SYNOPSIS + s_get_key_cache_stat_value() + keycache_cb pointer to the control block of a simple key cache + var_no the ordered number of a statistical variable + + DESCRIPTION + This function is the implementation of the s_get_key_cache_stat_value + interface function that is employed by simple (non-partitioned) key caches. + The function considers the parameter keycache_cb as a pointer to the + control block structure of the type S_KEY_CACHE_CB for a simple key cache. + This function returns the value of the statistical variable var_no + for this key cache. The variables are numbered starting from 0 to 6. + + RETURN + The value of the specified statistical variable + +*/ + +static +ulonglong s_get_key_cache_stat_value(void *keycache_cb, uint var_no) +{ + S_KEY_CACHE_CB *keycache= (S_KEY_CACHE_CB *) keycache_cb; + size_t var_ofs= s_key_cache_stat_var_offsets[var_no]; + ulonglong res= 0; + DBUG_ENTER("s_get_key_cache_stat_value"); + + if (var_no < 3) + res= (ulonglong) (*(long *) ((char *) keycache + var_ofs)); + else + res= *(ulonglong *) ((char *) keycache + var_ofs); + + DBUG_RETURN(res); +} + + +/* + The array of pointer to the key cache interface functions used for simple + key caches. Any simple key cache objects including those incorporated into + partitioned keys caches exploit this array. + + The current implementation of these functions allows to call them from + the MySQL server code directly. We don't do it though. +*/ + +static KEY_CACHE_FUNCS s_key_cache_funcs = +{ + s_init_key_cache, + s_resize_key_cache, + s_change_key_cache_param, + s_key_cache_read, + s_key_cache_insert, + s_key_cache_write, + s_flush_key_blocks, + s_reset_key_cache_counters, + s_end_key_cache, + s_get_key_cache_statistics, + s_get_key_cache_stat_value +}; + + +/****************************************************************************** + Partitioned Key Cache Module + + The module contains implementations of all key cache interface functions + employed by partitioned key caches. + + A partitioned key cache is a collection of structures for simple key caches + called key cache partitions. Any page from a file can be placed into a buffer + of only one partition. The number of the partition is calculated from + the file number and the position of the page in the file, and it's always the + same for the page. The function that maps pages into partitions takes care + of even distribution of pages among partitions. + + Partition key cache mitigate one of the major problem of simple key cache: + thread contention for key cache lock (mutex). Every call of a key cache + interface function must acquire this lock. So threads compete for this lock + even in the case when they have acquired shared locks for the file and + pages they want read from are in the key cache buffers. + When working with a partitioned key cache any key cache interface function + that needs only one page has to acquire the key cache lock only for the + partition the page is ascribed to. This makes the chances for threads not + compete for the same key cache lock better. Unfortunately if we use a + partitioned key cache with N partitions for B-tree indexes we can't say + that the chances becomes N times less. The fact is that any index lookup + operation requires reading from the root page that, for any index, is always + ascribed to the same partition. To resolve this problem we should have + employed more sophisticated mechanisms of working with root pages. + + Currently the number of partitions in a partitioned key cache is limited + by 64. We could increase this limit. Simultaneously we would have to increase + accordingly the size of the bitmap dirty_part_map from the MYISAM_SHARE + structure. + +******************************************************************************/ + +/* Control block for a partitioned key cache */ + +typedef struct st_p_key_cache_cb +{ + my_bool key_cache_inited; /*<=> control block is allocated */ + S_KEY_CACHE_CB **partition_array; /* array of the key cache partitions */ + uint partitions; /* number of partitions in the key cache */ + size_t key_cache_mem_size; /* specified size of the cache memory */ + uint key_cache_block_size; /* size of the page buffer of a cache block */ +} P_KEY_CACHE_CB; + +static +void p_end_key_cache(void *keycache_cb, my_bool cleanup); + +/* + Determine the partition to which the index block to read is ascribed + + SYNOPSIS + get_key_cache_partition() + keycache pointer to the control block of a partitioned key cache + file handler for the file for the block of data to be read + filepos position of the block of data in the file + + DESCRIPTION + The function determines the number of the partition in whose buffer the + block from 'file' at the position filepos has to be placed for reading. + The function returns the control block of the simple key cache for this + partition to the caller. + + RETURN VALUE + The pointer to the control block of the partition to which the specified + file block is ascribed. +*/ + +static +S_KEY_CACHE_CB *get_key_cache_partition(P_KEY_CACHE_CB *keycache, + File file, my_off_t filepos) +{ + uint i= KEYCACHE_BASE_EXPR( file, filepos) % keycache->partitions; + return keycache->partition_array[i]; +} + + +/* + Determine the partition to which the index block to write is ascribed + + SYNOPSIS + get_key_cache_partition() + keycache pointer to the control block of a partitioned key cache + file handler for the file for the block of data to be read + filepos position of the block of data in the file + dirty_part_map pointer to the bitmap of dirty partitions for the file + + DESCRIPTION + The function determines the number of the partition in whose buffer the + block from 'file' at the position filepos has to be placed for writing and + marks the partition as dirty in the dirty_part_map bitmap. + The function returns the control block of the simple key cache for this + partition to the caller. + + RETURN VALUE + The pointer to the control block of the partition to which the specified + file block is ascribed. +*/ + +static +S_KEY_CACHE_CB *get_key_cache_partition_for_write(P_KEY_CACHE_CB *keycache, + File file, my_off_t filepos, + ulonglong* dirty_part_map) +{ + uint i= KEYCACHE_BASE_EXPR( file, filepos) % keycache->partitions; + *dirty_part_map|= 1<partition_array[i]; +} + + +/* + Initialize a partitioned key cache + + SYNOPSIS + p_init_key_cache() + keycache_cb pointer to the control block of a partitioned key cache + key_cache_block_size size of blocks to keep cached data + use_mem total memory to use for all key cache partitions + division_limit division limit (may be zero) + age_threshold age threshold (may be zero) + + DESCRIPTION + This function is the implementation of the init_key_cache interface function + that is employed by partitioned key caches. + The function builds and initializes an array of simple key caches, and then + initializes the control block structure of the type P_KEY_CACHE_CB that is + used for a partitioned key cache. The parameter keycache_cb is supposed to + point to this structure. The number of partitions in the partitioned key + cache to be built must be passed through the field 'partitions' of this + structure. The parameter key_cache_block_size specifies the size of the + blocks in the the simple key caches to be built. The parameters + division_limit and age_threshold determine the initial values of those + characteristics of the simple key caches that are used for midpoint + insertion strategy. The parameter use_mem specifies the total amount of + memory to be allocated for the key cache blocks in all simple key caches + and for all auxiliary structures. + + RETURN VALUE + total number of blocks in key cache partitions, if successful, + <= 0 - otherwise. + + NOTES + If keycache->key_cache_inited != 0 then we assume that the memory for + the array of partitions has been already allocated. + + It's assumed that no two threads call this function simultaneously + referring to the same key cache handle. +*/ + +static +int p_init_key_cache(void *keycache_cb, uint key_cache_block_size, + size_t use_mem, uint division_limit, + uint age_threshold) +{ + int i; + size_t mem_per_cache; + int cnt; + S_KEY_CACHE_CB *partition; + S_KEY_CACHE_CB **partition_ptr; + P_KEY_CACHE_CB *keycache= (P_KEY_CACHE_CB *) keycache_cb; + uint partitions= keycache->partitions; + int blocks= -1; + DBUG_ENTER("p_init_key_cache"); + + keycache->key_cache_block_size = key_cache_block_size; + + if (keycache->key_cache_inited) + partition_ptr= keycache->partition_array; + else + { + if(!(partition_ptr= + (S_KEY_CACHE_CB **) my_malloc(sizeof(S_KEY_CACHE_CB *) * partitions, + MYF(0)))) + DBUG_RETURN(blocks); + keycache->partition_array= partition_ptr; + } + + mem_per_cache = use_mem / partitions; + + for (i= 0; i < (int) partitions; i++) + { + my_bool key_cache_inited= keycache->key_cache_inited; + if (key_cache_inited) + partition= *partition_ptr; + else + { + if (!(partition= (S_KEY_CACHE_CB *) my_malloc(sizeof(S_KEY_CACHE_CB), + MYF(0)))) + continue; + partition->key_cache_inited= 0; + } + + if ((cnt= s_init_key_cache(partition, + key_cache_block_size, mem_per_cache, + division_limit, age_threshold)) <= 0) + { + s_end_key_cache(partition, 1); + my_free((uchar *) partition, MYF(0)); + partition= 0; + if (key_cache_inited) + { + memmove(partition_ptr, partition_ptr+1, + sizeof(partition_ptr)*(partitions-i-1)); + } + if (i == 0) + { + i--; + partitions--; + if (partitions) + mem_per_cache = use_mem / partitions; + } + continue; + } + + if (blocks < 0) + blocks= 0; + blocks+= cnt; + *partition_ptr++= partition; + } + + keycache->partitions= partitions= partition_ptr-keycache->partition_array; + keycache->key_cache_mem_size= mem_per_cache * partitions; + for (i= 0; i < (int) partitions; i++) + keycache->partition_array[i]->hash_factor= partitions; + + keycache->key_cache_inited= 1; + + DBUG_RETURN(blocks); +} + + +/* + Resize a partitioned key cache + + SYNOPSIS + p_resize_key_cache() + keycache_cb pointer to the control block of a partitioned key cache + key_cache_block_size size of blocks to keep cached data + use_mem total memory to use for the new key cache + division_limit new division limit (if not zero) + age_threshold new age threshold (if not zero) + + DESCRIPTION + This function is the implementation of the resize_key_cache interface + function that is employed by partitioned key caches. + The function considers the parameter keycache_cb as a pointer to the + control block structure of the type P_KEY_CACHE_CB for the partitioned + key cache to be resized. + The parameter key_cache_block_size specifies the new size of the blocks in + the simple key caches that comprise the partitioned key cache. + The parameters division_limit and age_threshold determine the new initial + values of those characteristics of the simple key cache that are used for + midpoint insertion strategy. The parameter use-mem specifies the total + amount of memory to be allocated for the key cache blocks in all new + simple key caches and for all auxiliary structures. + + RETURN VALUE + number of blocks in the key cache, if successful, + 0 - otherwise. + + NOTES. + The function first calls s_prepare_resize_key_cache for each simple + key cache effectively flushing all dirty pages from it and destroying + the key cache. Then p_init_key cache is called. This call builds all + the new array of simple key caches containing the same number of + elements as the old one. After this the function calls the function + s_finish_resize_key_cache for each simple key cache from this array. + + This implementation doesn't block the calls and executions of other + functions from the key cache interface. However it assumes that the + calls of s_resize_key_cache itself are serialized. + +*/ + +static +int p_resize_key_cache(void *keycache_cb, uint key_cache_block_size, + size_t use_mem, uint division_limit, + uint age_threshold) +{ + uint i; + P_KEY_CACHE_CB *keycache= (P_KEY_CACHE_CB *) keycache_cb; + uint partitions= keycache->partitions; + my_bool cleanup= use_mem == 0; + int blocks= -1; + int err= 0; + DBUG_ENTER("p_resize_key_cache"); + if (use_mem == 0) + { + p_end_key_cache(keycache_cb, 0); + DBUG_RETURN(blocks); + } + for (i= 0; i < partitions; i++) + { + err|= s_prepare_resize_key_cache(keycache->partition_array[i], 0, 1); + } + if (!err && use_mem) + blocks= p_init_key_cache(keycache_cb, key_cache_block_size, use_mem, + division_limit, age_threshold); + if (blocks > 0 && !cleanup) + { + for (i= 0; i < partitions; i++) + { + s_finish_resize_key_cache(keycache->partition_array[i], 0, 1); + } + } + DBUG_RETURN(blocks); +} + + +/* + Change key cache parameters of a partitioned key cache + + SYNOPSIS + p_change_key_cache_param() + keycache_cb pointer to the control block of a partitioned key cache + division_limit new division limit (if not zero) + age_threshold new age threshold (if not zero) + + DESCRIPTION + This function is the implementation of the change_key_cache_param interface + function that is employed by partitioned key caches. + The function considers the parameter keycache_cb as a pointer to the + control block structure of the type P_KEY_CACHE_CB for the simple key + cache where new values of the division limit and the age threshold used + for midpoint insertion strategy are to be set. The parameters + division_limit and age_threshold provide these new values. + + RETURN VALUE + none + + NOTES + The function just calls s_change_key_cache_param for each element from the + array of simple caches that comprise the partitioned key cache. + +*/ + +static +void p_change_key_cache_param(void *keycache_cb, uint division_limit, + uint age_threshold) +{ + uint i; + P_KEY_CACHE_CB *keycache= (P_KEY_CACHE_CB *) keycache_cb; + uint partitions= keycache->partitions; + DBUG_ENTER("p_change_key_cache_param"); + for (i= 0; i < partitions; i++) + { + s_change_key_cache_param(keycache->partition_array[i], division_limit, + age_threshold); + } + DBUG_VOID_RETURN; +} + + +/* + Destroy a partitioned key cache + + SYNOPSIS + p_end_key_cache() + keycache_cb pointer to the control block of a partitioned key cache + cleanup <=> complete free (free also control block structures + for all simple key caches) + + DESCRIPTION + This function is the implementation of the end_key_cache interface + function that is employed by partitioned key caches. + The function considers the parameter keycache_cb as a pointer to the + control block structure of the type P_KEY_CACHE_CB for the partitioned + key cache to be destroyed. + The function frees the memory allocated for the cache blocks and + auxiliary structures used by simple key caches that comprise the + partitioned key cache. If the value of the parameter cleanup is TRUE + then even the memory used for control blocks of the simple key caches + and the array of pointers to them are freed. + + RETURN VALUE + none + +*/ + +static +void p_end_key_cache(void *keycache_cb, my_bool cleanup) +{ + uint i; + P_KEY_CACHE_CB *keycache= (P_KEY_CACHE_CB *) keycache_cb; + uint partitions= keycache->partitions; + DBUG_ENTER("p_end_key_cache"); + DBUG_PRINT("enter", ("key_cache: 0x%lx", (long) keycache)); + + for (i= 0; i < partitions; i++) + { + s_end_key_cache(keycache->partition_array[i], cleanup); + } + if (cleanup) { + for (i= 0; i < partitions; i++) + my_free((uchar*) keycache->partition_array[i], MYF(0)); + my_free((uchar*) keycache->partition_array, MYF(0)); + keycache->key_cache_inited= 0; + } + DBUG_VOID_RETURN; +} + + +/* + Read a block of data from a partitioned key cache into a buffer + + SYNOPSIS + + p_key_cache_read() + keycache_cb pointer to the control block of a partitioned key cache + file handler for the file for the block of data to be read + filepos position of the block of data in the file + level determines the weight of the data + buff buffer to where the data must be placed + length length of the buffer + block_length length of the read data from a key cache block + return_buffer return pointer to the key cache buffer with the data + + DESCRIPTION + This function is the implementation of the key_cache_read interface + function that is employed by partitioned key caches. + The function considers the parameter keycache_cb as a pointer to the + control block structure of the type P_KEY_CACHE_CB for a partitioned + key cache. + In a general case the function reads a block of data from the key cache + into the buffer buff of the size specified by the parameter length. The + beginning of the block of data to be read is specified by the parameters + file and filepos. The length of the read data is the same as the length + of the buffer. The data is read into the buffer in key_cache_block_size + increments. To read each portion the function first finds out in what + partition of the key cache this portion(page) is to be saved, and calls + s_key_cache_read with the pointer to the corresponding simple key as + its first parameter. + If the parameter return_buffer is not ignored and its value is TRUE, and + the data to be read of the specified size block_length can be read from one + key cache buffer, then the function returns a pointer to the data in the + key cache buffer. + The function takes into account parameters block_length and return buffer + only in a single-threaded environment. + The parameter 'level' is used only by the midpoint insertion strategy + when the data or its portion cannot be found in the key cache. + + RETURN VALUE + Returns address from where the data is placed if successful, 0 - otherwise. + +*/ + +static +uchar *p_key_cache_read(void *keycache_cb, + File file, my_off_t filepos, int level, + uchar *buff, uint length, + uint block_length __attribute__((unused)), + int return_buffer __attribute__((unused))) +{ + uint r_length; + P_KEY_CACHE_CB *keycache= (P_KEY_CACHE_CB *) keycache_cb; + uint offset= (uint) (filepos % keycache->key_cache_block_size); + uchar *start= buff; + DBUG_ENTER("p_key_cache_read"); + DBUG_PRINT("enter", ("fd: %u pos: %lu length: %u", + (uint) file, (ulong) filepos, length)); + +#ifndef THREAD + if (block_length > keycache->key_cache_block_size || offset) + return_buffer=0; +#endif + + /* Read data in key_cache_block_size increments */ + do + { + S_KEY_CACHE_CB *partition= get_key_cache_partition(keycache, + file, filepos); + uchar *ret_buff= 0; + r_length= length; + set_if_smaller(r_length, keycache->key_cache_block_size - offset); + ret_buff= s_key_cache_read((void *) partition, + file, filepos, level, + buff, r_length, + block_length, return_buffer); + if (ret_buff == 0) + DBUG_RETURN(0); +#ifndef THREAD + /* This is only true if we were able to read everything in one block */ + if (return_buffer) + DBUG_RETURN(ret_buff); +#endif + filepos+= r_length; + buff+= r_length; + offset= 0; + } while ((length-= r_length)); + + DBUG_RETURN(start); +} + + +/* + Insert a block of file data from a buffer into a partitioned key cache + + SYNOPSIS + p_key_cache_insert() + keycache_cb pointer to the control block of a partitioned key cache + file handler for the file to insert data from + filepos position of the block of data in the file to insert + level determines the weight of the data + buff buffer to read data from + length length of the data in the buffer + + DESCRIPTION + This function is the implementation of the key_cache_insert interface + function that is employed by partitioned key caches. + The function considers the parameter keycache_cb as a pointer to the + control block structure of the type P_KEY_CACHE_CB for a partitioned key + cache. + The function writes a block of file data from a buffer into the key cache. + The buffer is specified with the parameters buff and length - the pointer + to the beginning of the buffer and its size respectively. It's assumed + that the buffer contains the data from 'file' allocated from the position + filepos. The data is copied from the buffer in key_cache_block_size + increments. For every portion of data the function finds out in what simple + key cache from the array of partitions the data must be stored, and after + this calls s_key_cache_insert to copy the data into a key buffer of this + simple key cache. + The parameter level is used to set one characteristic for the key buffers + loaded with the data from buff. The characteristic is used only by the + midpoint insertion strategy. + + RETURN VALUE + 0 if a success, 1 - otherwise. + + NOTES + The function is used by MyISAM to move all blocks from a index file to + the key cache. It can be performed in parallel with reading the file data + from the key buffers by other threads. + +*/ + +static +int p_key_cache_insert(void *keycache_cb, + File file, my_off_t filepos, int level, + uchar *buff, uint length) +{ + uint w_length; + P_KEY_CACHE_CB *keycache= (P_KEY_CACHE_CB *) keycache_cb; + uint offset= (uint) (filepos % keycache->key_cache_block_size); + DBUG_ENTER("p_key_cache_insert"); + DBUG_PRINT("enter", ("fd: %u pos: %lu length: %u", + (uint) file,(ulong) filepos, length)); + + + /* Write data in key_cache_block_size increments */ + do + { + S_KEY_CACHE_CB *partition= get_key_cache_partition(keycache, + file, filepos); + w_length= length; + set_if_smaller(w_length, keycache->key_cache_block_size); + if (s_key_cache_insert((void *) partition, + file, filepos, level, + buff, w_length)) + DBUG_RETURN(1); + + filepos+= w_length; + buff+= w_length; + offset = 0; + } while ((length-= w_length)); + + DBUG_RETURN(0); +} + + +/* + Write data from a buffer into a partitioned key cache + + SYNOPSIS + + p_key_cache_write() + keycache_cb pointer to the control block of a partitioned key cache + file handler for the file to write data to + filepos position in the file to write data to + level determines the weight of the data + buff buffer with the data + length length of the buffer + dont_write if is 0 then all dirty pages involved in writing + should have been flushed from key cache + file_extra maps of key cache partitions containing + dirty pages from file + + DESCRIPTION + This function is the implementation of the key_cache_write interface + function that is employed by partitioned key caches. + The function considers the parameter keycache_cb as a pointer to the + control block structure of the type P_KEY_CACHE_CB for a partitioned + key cache. + In a general case the function copies data from a buffer into the key + cache. The buffer is specified with the parameters buff and length - + the pointer to the beginning of the buffer and its size respectively. + It's assumed the buffer contains the data to be written into 'file' + starting from the position filepos. The data is copied from the buffer + in key_cache_block_size increments. For every portion of data the + function finds out in what simple key cache from the array of partitions + the data must be stored, and after this calls s_key_cache_write to copy + the data into a key buffer of this simple key cache. + If the value of the parameter dont_write is FALSE then the function + also writes the data into file. + The parameter level is used to set one characteristic for the key buffers + filled with the data from buff. The characteristic is employed only by + the midpoint insertion strategy. + The parameter file_expra provides a pointer to the shared bitmap of + the partitions that may contains dirty pages for the file. This bitmap + is used to optimize the function p_flush_key_blocks. + + RETURN VALUE + 0 if a success, 1 - otherwise. + + NOTES + This implementation exploits the fact that the function is called only + when a thread has got an exclusive lock for the key file. + +*/ + +static +int p_key_cache_write(void *keycache_cb, + File file, void *file_extra, + my_off_t filepos, int level, + uchar *buff, uint length, + uint block_length __attribute__((unused)), + int dont_write) +{ + uint w_length; + ulonglong *part_map= (ulonglong *) file_extra; + P_KEY_CACHE_CB *keycache= (P_KEY_CACHE_CB *) keycache_cb; + uint offset= (uint) (filepos % keycache->key_cache_block_size); + DBUG_ENTER("p_key_cache_write"); + DBUG_PRINT("enter", + ("fd: %u pos: %lu length: %u block_length: %u" + " key_block_length: %u", + (uint) file, (ulong) filepos, length, block_length, + keycache ? keycache->key_cache_block_size : 0)); + + + /* Write data in key_cache_block_size increments */ + do + { + S_KEY_CACHE_CB *partition= get_key_cache_partition_for_write(keycache, + file, filepos, + part_map); + w_length = length; + set_if_smaller(w_length, keycache->key_cache_block_size ); + if (s_key_cache_write(partition, + file, 0, filepos, level, + buff, w_length, block_length, + dont_write)) + DBUG_RETURN(1); + + filepos+= w_length; + buff+= w_length; + offset= 0; + } while ((length-= w_length)); + + DBUG_RETURN(0); +} + + +/* + Flush all blocks for a file from key buffers of a partitioned key cache + + SYNOPSIS + + p_flush_key_blocks() + keycache_cb pointer to the control block of a partitioned key cache + file handler for the file to flush to + file_extra maps of key cache partitions containing + dirty pages from file (not used) + flush_type type of the flush operation + + DESCRIPTION + This function is the implementation of the flush_key_blocks interface + function that is employed by partitioned key caches. + The function considers the parameter keycache_cb as a pointer to the + control block structure of the type P_KEY_CACHE_CB for a partitioned + key cache. + In a general case the function flushes the data from all dirty key + buffers related to the file 'file' into this file. The function does + exactly this if the value of the parameter type is FLUSH_KEEP. If the + value of this parameter is FLUSH_RELEASE, the function additionally + releases the key buffers containing data from 'file' for new usage. + If the value of the parameter type is FLUSH_IGNORE_CHANGED the function + just releases the key buffers containing data from 'file'. + The function performs the operation by calling s_flush_key_blocks + for the elements of the array of the simple key caches that comprise + the partitioned key_cache. If the value of the parameter type is + FLUSH_KEEP s_flush_key_blocks is called only for the partitions with + possibly dirty pages marked in the bitmap pointed to by the parameter + file_extra. + + RETURN + 0 ok + 1 error + + NOTES + This implementation exploits the fact that the function is called only + when a thread has got an exclusive lock for the key file. + +*/ + +static +int p_flush_key_blocks(void *keycache_cb, + File file, void *file_extra, + enum flush_type type) +{ + uint i; + P_KEY_CACHE_CB *keycache= (P_KEY_CACHE_CB *) keycache_cb; + uint partitions= keycache->partitions; + int err= 0; + ulonglong *dirty_part_map= (ulonglong *) file_extra; + DBUG_ENTER("p_flush_key_blocks"); + DBUG_PRINT("enter", ("keycache: 0x%lx", (long) keycache)); + + for (i= 0; i < partitions; i++) + { + S_KEY_CACHE_CB *partition= keycache->partition_array[i]; + if ((type == FLUSH_KEEP || type == FLUSH_FORCE_WRITE) && + !((*dirty_part_map) & (1< 0) + err= 1; + + DBUG_RETURN(err); +} + + +/* + Reset the counters of a partitioned key cache + + SYNOPSIS + p_reset_key_cache_counters() + name the name of a key cache + keycache_cb pointer to the control block of a partitioned key cache + + DESCRIPTION + This function is the implementation of the reset_key_cache_counters + interface function that is employed by partitioned key caches. + The function considers the parameter keycache_cb as a pointer to the + control block structure of the type P_KEY_CACHE_CB for a partitioned + key cache. + This function resets the values of the statistical counters of the simple + key caches comprising partitioned key cache to 0. It does it by calling + s_reset_key_cache_counters for each key cache partition. + The parameter name is currently not used. + + RETURN + 0 on success (always because it can't fail) + +*/ + +static +int p_reset_key_cache_counters(const char *name __attribute__((unused)), + void *keycache_cb) +{ + uint i; + P_KEY_CACHE_CB *keycache= (P_KEY_CACHE_CB *) keycache_cb; + uint partitions= keycache->partitions; + DBUG_ENTER("p_reset_key_cache_counters"); + + for (i = 0; i < partitions; i++) + { + s_reset_key_cache_counters(name, keycache->partition_array[i]); + } + DBUG_RETURN(0); +} + + +/* + Get statistics for a partition key cache + + SYNOPSIS + p_get_key_cache_statistics() + keycache_cb pointer to the control block of a partitioned key cache + partition_no partition number to get statistics for + key_cache_stats OUT pointer to the structure for the returned statistics + + DESCRIPTION + This function is the implementation of the get_key_cache_statistics + interface function that is employed by partitioned key caches. + The function considers the parameter keycache_cb as a pointer to the + control block structure of the type P_KEY_CACHE_CB for a partitioned + key cache. + If the value of the parameter partition_no is equal to 0 then aggregated + statistics for all partitions is returned in the fields of the + structure key_cache_stat of the type KEY_CACHE_STATISTICS . Otherwise + the function returns data for the partition number partition_no of the + key cache in the structure key_cache_stat. (Here partitions are numbered + starting from 1.) + + RETURN + none + +*/ + +static +void p_get_key_cache_statistics(void *keycache_cb, uint partition_no, + KEY_CACHE_STATISTICS *key_cache_stats) +{ + uint i; + S_KEY_CACHE_CB *partition; + P_KEY_CACHE_CB *keycache= (P_KEY_CACHE_CB *) keycache_cb; + uint partitions= keycache->partitions; + DBUG_ENTER("p_get_key_cache_statistics_"); + + if (partition_no != 0) + { + partition= keycache->partition_array[partition_no-1]; + s_get_key_cache_statistics((void *) partition, 0, key_cache_stats); + DBUG_VOID_RETURN; + } + key_cache_stats->mem_size= (longlong) keycache->key_cache_mem_size; + key_cache_stats->block_size= (longlong) keycache->key_cache_block_size; + for (i = 0; i < partitions; i++) + { + partition= keycache->partition_array[i]; + key_cache_stats->blocks_used+= partition->blocks_used; + key_cache_stats->blocks_unused+= partition->blocks_unused; + key_cache_stats->blocks_changed+= partition->global_blocks_changed; + key_cache_stats->read_requests+= partition->global_cache_r_requests; + key_cache_stats->reads+= partition->global_cache_read; + key_cache_stats->write_requests+= partition->global_cache_w_requests; + key_cache_stats->writes+= partition->global_cache_write; + } + DBUG_VOID_RETURN; +} + +/* + Get the value of a statistical variable for a partitioned key cache + + SYNOPSIS + p_get_key_cache_stat_value() + keycache_cb pointer to the control block of a partitioned key cache + var_no the ordered number of a statistical variable + + DESCRIPTION + This function is the implementation of the get_key_cache_stat_value + interface function that is employed by partitioned key caches. + The function considers the parameter keycache_cb as a pointer to the + control block structure of the type P_KEY_CACHE_CB for a partitioned + key cache. + This function returns the value of the statistical variable var_no + for this key cache. The variables are numbered starting from 0 to 6. + The returned value is calculated as the sum of the values of the + statistical variable with number var_no for all simple key caches that + comprise the partitioned key cache. + + RETURN + The value of the specified statistical variable + +*/ + +static +ulonglong p_get_key_cache_stat_value(void *keycache_cb, uint var_no) +{ + uint i; + P_KEY_CACHE_CB *keycache= (P_KEY_CACHE_CB *) keycache_cb; + uint partitions= keycache->partitions; + size_t var_ofs= s_key_cache_stat_var_offsets[var_no]; + ulonglong res= 0; + DBUG_ENTER("p_get_key_cache_stat_value"); + + if (var_no < 3) + { + for (i = 0; i < partitions; i++) + { + S_KEY_CACHE_CB *partition= keycache->partition_array[i]; + res+= (ulonglong) (*(long *) ((char *) partition + var_ofs)); + } + } + else + { + for (i = 0; i < partitions; i++) + { + S_KEY_CACHE_CB *partition= keycache->partition_array[i]; + res+= *(ulonglong *) ((char *) partition + var_ofs); + } + } + DBUG_RETURN(res); +} + + +/* + The array of pointers to the key cache interface functions used by + partitioned key caches. Any partitioned key cache object caches exploits + this array. + + The current implementation of these functions does not allow to call + them from the MySQL server code directly. The key cache interface + wrappers must be used for this purpose. +*/ + +static KEY_CACHE_FUNCS p_key_cache_funcs = +{ + p_init_key_cache, + p_resize_key_cache, + p_change_key_cache_param, + p_key_cache_read, + p_key_cache_insert, + p_key_cache_write, + p_flush_key_blocks, + p_reset_key_cache_counters, + p_end_key_cache, + p_get_key_cache_statistics, + p_get_key_cache_stat_value +}; + + +/****************************************************************************** + Key Cache Interface Module + + The module contains wrappers for all key cache interface functions. + + Currently there are key caches of two types: simple key caches and + partitioned key caches. Each type (class) has its own implementation of the + basic key cache operations used the MyISAM storage engine. The pointers + to the implementation functions are stored in two static structures of the + type KEY_CACHE_FUNC: s_key_cache_funcs - for simple key caches, and + p_key_cache_funcs - for partitioned key caches. When a key cache object is + created the constructor procedure init_key_cache places a pointer to the + corresponding table into one of its fields. The procedure also initializes + a control block for the key cache oject and saves the pointer to this + block in another field of the key cache object. + When a key cache wrapper function is invoked for a key cache object to + perform a basic key cache operation it looks into the interface table + associated with the key cache oject and calls the corresponding + implementation of the operation. It passes the saved key cache control + block to this implementation. If, for some reasons, the control block + has not been fully initialized yet, the wrapper function either does not + do anything or, in the case when it perform a read/write operation, the + function do it directly through the system i/o functions. + + As we can see the model with which the key cache interface is supported + as quite conventional for interfaces in general. + +******************************************************************************/ + + +/* + Initialize a key cache + + SYNOPSIS + init_key_cache() + keycache pointer to the key cache to be initialized + key_cache_block_size size of blocks to keep cached data + use_mem total memory to use for cache buffers/structures + division_limit division limit (may be zero) + age_threshold age threshold (may be zero) + partitions number of partitions in the key cache + + DESCRIPTION + The function creates a control block structure for a key cache and + places the pointer to this block in the structure keycache. + If the value of the parameter 'partitions' is 0 then a simple key cache + is created. Otherwise a partitioned key cache with the specified number + of partitions is created. + The parameter key_cache_block_size specifies the size of the blocks in + the key cache to be created. The parameters division_limit and + age_threshold determine the initial values of those characteristics of + the key cache that are used for midpoint insertion strategy. The parameter + use_mem specifies the total amount of memory to be allocated for the + key cache buffers and for all auxiliary structures. + + RETURN VALUE + total number of blocks in key cache partitions, if successful, + <= 0 - otherwise. + + NOTES + if keycache->key_cache_inited != 0 we assume that the memory + for the control block of the key cache has been already allocated. + + It's assumed that no two threads call this function simultaneously + referring to the same key cache handle. + +*/ + +int init_key_cache(KEY_CACHE *keycache, uint key_cache_block_size, + size_t use_mem, uint division_limit, + uint age_threshold, uint partitions) +{ + void *keycache_cb; + int blocks; + if (keycache->key_cache_inited) + keycache_cb= keycache->keycache_cb; + else + { + if (partitions == 0) + { + if (!(keycache_cb= (void *) my_malloc(sizeof(S_KEY_CACHE_CB), MYF(0)))) + return 0; + ((S_KEY_CACHE_CB *) keycache_cb)->key_cache_inited= 0; + keycache->key_cache_type= SIMPLE_KEY_CACHE; + keycache->interface_funcs= &s_key_cache_funcs; + } + else + { + if (!(keycache_cb= (void *) my_malloc(sizeof(P_KEY_CACHE_CB), MYF(0)))) + return 0; + ((P_KEY_CACHE_CB *) keycache_cb)->key_cache_inited= 0; + keycache->key_cache_type= PARTITIONED_KEY_CACHE; + keycache->interface_funcs= &p_key_cache_funcs; + } + keycache->keycache_cb= keycache_cb; + keycache->key_cache_inited= 1; + } + + if (partitions != 0) + { + ((P_KEY_CACHE_CB *) keycache_cb)->partitions= partitions; + } + keycache->can_be_used= 0; + blocks= keycache->interface_funcs->init(keycache_cb, key_cache_block_size, + use_mem, division_limit, + age_threshold); + keycache->partitions= partitions ? + ((P_KEY_CACHE_CB *) keycache_cb)->partitions : 0; + DBUG_ASSERT(partitions <= MAX_KEY_CACHE_PARTITIONS); + if (blocks > 0) + keycache->can_be_used= 1; + return blocks; +} + + +/* + Resize a key cache + + SYNOPSIS + resize_key_cache() + keycache pointer to the key cache to be resized + key_cache_block_size size of blocks to keep cached data + use_mem total memory to use for the new key cache + division_limit new division limit (if not zero) + age_threshold new age threshold (if not zero) + + DESCRIPTION + The function operates over the key cache key cache. + The parameter key_cache_block_size specifies the new size of the block + buffers in the key cache. The parameters division_limit and age_threshold + determine the new initial values of those characteristics of the key cache + that are used for midpoint insertion strategy. The parameter use_mem + specifies the total amount of memory to be allocated for the key cache + buffers and for all auxiliary structures. + + RETURN VALUE + number of blocks in the key cache, if successful, + 0 - otherwise. + + NOTES + The function does not block the calls and executions of other functions + from the key cache interface. However it assumes that the calls of + resize_key_cache itself are serialized. + + Currently the function is called when the values of the variables + key_buffer_size and/or key_cache_block_size are being reset for + the key cache keycache. + +*/ + +int resize_key_cache(KEY_CACHE *keycache, uint key_cache_block_size, + size_t use_mem, uint division_limit, uint age_threshold) +{ + int blocks= -1; + if (keycache->key_cache_inited) + { + if ((uint) keycache->param_partitions != keycache->partitions && use_mem) + blocks= repartition_key_cache (keycache, + key_cache_block_size, use_mem, + division_limit, age_threshold, + (uint) keycache->param_partitions); + else + { + blocks= keycache->interface_funcs->resize(keycache->keycache_cb, + key_cache_block_size, + use_mem, division_limit, + age_threshold); + + if (keycache->partitions) + keycache->partitions= + ((P_KEY_CACHE_CB *)(keycache->keycache_cb))->partitions; + } + if (blocks <= 0) + keycache->can_be_used= 0; + } + return blocks; +} + + +/* + Change key cache parameters of a key cache + + SYNOPSIS + change_key_cache_param() + keycache pointer to the key cache to change parameters for + division_limit new division limit (if not zero) + age_threshold new age threshold (if not zero) + + DESCRIPTION + The function sets new values of the division limit and the age threshold + used when the key cache keycach employs midpoint insertion strategy. + The parameters division_limit and age_threshold provide these new values. + + RETURN VALUE + none + + NOTES + Currently the function is called when the values of the variables + key_cache_division_limit and/or key_cache_age_threshold are being reset + for the key cache keycache. + +*/ + +void change_key_cache_param(KEY_CACHE *keycache, uint division_limit, + uint age_threshold) +{ + if (keycache->key_cache_inited) + { + + keycache->interface_funcs->change_param(keycache->keycache_cb, + division_limit, + age_threshold); + } +} + + +/* + Destroy a key cache + + SYNOPSIS + end_key_cache() + keycache pointer to the key cache to be destroyed + cleanup <=> complete free + + DESCRIPTION + The function frees the memory allocated for the cache blocks and + auxiliary structures used by the key cache keycache. If the value + of the parameter cleanup is TRUE then all resources used by the key + cache are to be freed. + + RETURN VALUE + none +*/ + +void end_key_cache(KEY_CACHE *keycache, my_bool cleanup) +{ + if (keycache->key_cache_inited) + { + keycache->interface_funcs->end(keycache->keycache_cb, cleanup); + if (cleanup) + { + if (keycache->keycache_cb) + { + my_free((uchar *) keycache->keycache_cb, MYF(0)); + keycache->keycache_cb= 0; + } + keycache->key_cache_inited= 0; + } + keycache->can_be_used= 0; + } +} + + +/* + Read a block of data from a key cache into a buffer + + SYNOPSIS + + key_cache_read() + keycache pointer to the key cache to read data from + file handler for the file for the block of data to be read + filepos position of the block of data in the file + level determines the weight of the data + buff buffer to where the data must be placed + length length of the buffer + block_length length of the data read from a key cache block + return_buffer return pointer to the key cache buffer with the data + + DESCRIPTION + The function operates over buffers of the key cache keycache. + In a general case the function reads a block of data from the key cache + into the buffer buff of the size specified by the parameter length. The + beginning of the block of data to be read is specified by the parameters + file and filepos. The length of the read data is the same as the length + of the buffer. + If the parameter return_buffer is not ignored and its value is TRUE, and + the data to be read of the specified size block_length can be read from one + key cache buffer, then the function returns a pointer to the data in the + key cache buffer. + The parameter 'level' is used only by the midpoint insertion strategy + when the data or its portion cannot be found in the key cache. + The function reads data into the buffer directly from file if the control + block of the key cache has not been initialized yet. + + RETURN VALUE + Returns address from where the data is placed if successful, 0 - otherwise. + + NOTES. + Filepos must be a multiple of 'block_length', but it doesn't + have to be a multiple of key_cache_block_size; +*/ + +uchar *key_cache_read(KEY_CACHE *keycache, + File file, my_off_t filepos, int level, + uchar *buff, uint length, + uint block_length, int return_buffer) +{ + if (keycache->key_cache_inited && keycache->can_be_used) + return keycache->interface_funcs->read(keycache->keycache_cb, + file, filepos, level, + buff, length, + block_length, return_buffer); + + /* We can't use mutex here as the key cache may not be initialized */ + keycache->global_cache_r_requests++; + keycache->global_cache_read++; + + if (my_pread(file, (uchar*) buff, length, filepos, MYF(MY_NABP))) + return (uchar *) 0; + + return buff; +} + + +/* + Insert a block of file data from a buffer into a key cache + + SYNOPSIS + key_cache_insert() + keycache pointer to the key cache to insert data into + file handler for the file to insert data from + filepos position of the block of data in the file to insert + level determines the weight of the data + buff buffer to read data from + length length of the data in the buffer + + DESCRIPTION + The function operates over buffers of the key cache keycache. + The function writes a block of file data from a buffer into the key cache. + The buffer is specified with the parameters buff and length - the pointer + to the beginning of the buffer and its size respectively. It's assumed + that the buffer contains the data from 'file' allocated from the position + filepos. + The parameter level is used to set one characteristic for the key buffers + loaded with the data from buff. The characteristic is used only by the + midpoint insertion strategy. + + RETURN VALUE + 0 if a success, 1 - otherwise. + + NOTES + The function is used by MyISAM to move all blocks from a index file to + the key cache. + It is assumed that it may be performed in parallel with reading the file + data from the key buffers by other threads. + +*/ + +int key_cache_insert(KEY_CACHE *keycache, + File file, my_off_t filepos, int level, + uchar *buff, uint length) +{ + if (keycache->key_cache_inited && keycache->can_be_used) + return keycache->interface_funcs->insert(keycache->keycache_cb, + file, filepos, level, + buff, length); + return 0; +} + + +/* + Write data from a buffer into a key cache + + SYNOPSIS + + key_cache_write() + keycache pointer to the key cache to write data to + file handler for the file to write data to + filepos position in the file to write data to + level determines the weight of the data + buff buffer with the data + length length of the buffer + dont_write if is 0 then all dirty pages involved in writing + should have been flushed from key cache + file_extra pointer to optional file attributes + + DESCRIPTION + The function operates over buffers of the key cache keycache. + In a general case the function writes data from a buffer into the key + cache. The buffer is specified with the parameters buff and length - + the pointer to the beginning of the buffer and its size respectively. + It's assumed the buffer contains the data to be written into 'file' + starting from the position filepos. + If the value of the parameter dont_write is FALSE then the function + also writes the data into file. + The parameter level is used to set one characteristic for the key buffers + filled with the data from buff. The characteristic is employed only by + the midpoint insertion strategy. + The parameter file_expra may point to additional file attributes used + for optimization or other purposes. + The function writes data from the buffer directly into file if the control + block of the key cache has not been initialized yet. + + RETURN VALUE + 0 if a success, 1 - otherwise. + + NOTES + This implementation may exploit the fact that the function is called only + when a thread has got an exclusive lock for the key file. + +*/ + +int key_cache_write(KEY_CACHE *keycache, + File file, void *file_extra, + my_off_t filepos, int level, + uchar *buff, uint length, + uint block_length, int force_write) +{ + if (keycache->key_cache_inited && keycache->can_be_used) + return keycache->interface_funcs->write(keycache->keycache_cb, + file, file_extra, + filepos, level, + buff, length, + block_length, force_write); + + /* We can't use mutex here as the key cache may not be initialized */ + keycache->global_cache_w_requests++; + keycache->global_cache_write++; + if (my_pwrite(file, buff, length, filepos, MYF(MY_NABP | MY_WAIT_IF_FULL))) + return 1; + + return 0; +} + + +/* + Flush all blocks for a file from key buffers of a key cache + + SYNOPSIS + + flush_key_blocks() + keycache pointer to the key cache whose blocks are to be flushed + file handler for the file to flush to + file_extra maps of key cache (used for partitioned key caches) + flush_type type of the flush operation + + DESCRIPTION + The function operates over buffers of the key cache keycache. + In a general case the function flushes the data from all dirty key + buffers related to the file 'file' into this file. The function does + exactly this if the value of the parameter type is FLUSH_KEEP. If the + value of this parameter is FLUSH_RELEASE, the function additionally + releases the key buffers containing data from 'file' for new usage. + If the value of the parameter type is FLUSH_IGNORE_CHANGED the function + just releases the key buffers containing data from 'file'. + If the value of the parameter type is FLUSH_KEEP the function may use + the value of the parameter file_extra pointing to possibly dirty + partitions to optimize the operation for partitioned key caches. + + RETURN + 0 ok + 1 error + + NOTES + Any implementation of the function may exploit the fact that the function + is called only when a thread has got an exclusive lock for the key file. + +*/ + +int flush_key_blocks(KEY_CACHE *keycache, + int file, void *file_extra, + enum flush_type type) +{ + if (keycache->key_cache_inited) + return keycache->interface_funcs->flush(keycache->keycache_cb, + file, file_extra, type); + return 0; +} + + +/* + Reset the counters of a key cache + + SYNOPSIS + reset_key_cache_counters() + name the name of a key cache (unused) + keycache pointer to the key cache for which to reset counters + + DESCRIPTION + This function resets the values of the statistical counters for the key + cache keycache. + The parameter name is currently not used. + + RETURN + 0 on success (always because it can't fail) + + NOTES + This procedure is used by process_key_caches() to reset the counters of all + currently used key caches, both the default one and the named ones. + +*/ + +int reset_key_cache_counters(const char *name __attribute__((unused)), + KEY_CACHE *keycache) +{ + if (keycache->key_cache_inited) + { + + return keycache->interface_funcs->reset_counters(name, + keycache->keycache_cb); + } + return 0; +} + + +/* + Get statistics for a key cache + + SYNOPSIS + get_key_cache_statistics() + keycache pointer to the key cache to get statistics for + partition_no partition number to get statistics for + key_cache_stats OUT pointer to the structure for the returned statistics + + DESCRIPTION + If the value of the parameter partition_no is equal to 0 then statistics + for the whole key cache keycache (aggregated statistics) is returned in the + fields of the structure key_cache_stat of the type KEY_CACHE_STATISTICS. + Otherwise the value of the parameter partition_no makes sense only for + a partitioned key cache. In this case the function returns statistics + for the partition with the specified number partition_no. + + RETURN + none + +*/ + +void get_key_cache_statistics(KEY_CACHE *keycache, uint partition_no, + KEY_CACHE_STATISTICS *key_cache_stats) +{ + bzero(key_cache_stats, sizeof(KEY_CACHE_STATISTICS)); + if (keycache->key_cache_inited) + { + keycache->interface_funcs->get_stats(keycache->keycache_cb, + partition_no, key_cache_stats); + } +} + + +/* + Get the value of a statistical variable for a key cache + + SYNOPSIS + get_key_cache_stat_value() + keycache pointer to the key cache to get statistics for + var_no the ordered number of a statistical variable + + DESCRIPTION + This function returns the value of the statistical variable var_no for + the key cache keycache. The variables are numbered starting from 0 to 6. + + RETURN + The value of the specified statistical variable. + + NOTES + Currently for any key cache the function can return values for the + following 7 statistical variables: + + Name Number + + blocks_used 0 + blocks_unused 1 + blocks_changed 2 + read_requests 3 + reads 4 + write_requests 5 + writes 6 + +*/ + +ulonglong get_key_cache_stat_value(KEY_CACHE *keycache, uint var_no) +{ + if (keycache->key_cache_inited) + { + return keycache->interface_funcs->get_stat_val(keycache->keycache_cb, + var_no); + } + else + return 0; +} + + +/* + Repartition a key cache + + SYNOPSIS + repartition_key_cache() + keycache pointer to the key cache to be repartitioned + key_cache_block_size size of blocks to keep cached data + use_mem total memory to use for the new key cache + division_limit new division limit (if not zero) + age_threshold new age threshold (if not zero) + partitions new number of partitions in the key cache + + DESCRIPTION + The function operates over the key cache keycache. + The parameter partitions specifies the number of partitions in the key + cache after repartitioning. If the value of this parameter is 0 then + a simple key cache must be created instead of the old one. + The parameter key_cache_block_size specifies the new size of the block + buffers in the key cache. The parameters division_limit and age_threshold + determine the new initial values of those characteristics of the key cache + that are used for midpoint insertion strategy. The parameter use_mem + specifies the total amount of memory to be allocated for the new key + cache buffers and for all auxiliary structures. + + RETURN VALUE + number of blocks in the key cache, if successful, + 0 - otherwise. + + NOTES + The function does not block the calls and executions of other functions + from the key cache interface. However it assumes that the calls of + resize_key_cache itself are serialized. + + Currently the function is called when the value of the variable + key_cache_partitions is being reset for the key cache keycache. + +*/ + +int repartition_key_cache(KEY_CACHE *keycache, uint key_cache_block_size, + size_t use_mem, uint division_limit, + uint age_threshold, uint partitions) +{ + uint blocks= -1; + if (keycache->key_cache_inited) + { + keycache->interface_funcs->resize(keycache->keycache_cb, + key_cache_block_size, 0, + division_limit, age_threshold); + end_key_cache(keycache, 1); + blocks= init_key_cache(keycache, key_cache_block_size, use_mem, + division_limit, age_threshold, partitions); + } + return blocks; +} + -- cgit v1.2.1 From 292f6568fa377420c81e0317a26b804057ce208c Mon Sep 17 00:00:00 2001 From: Michael Widenius Date: Tue, 9 Mar 2010 21:22:24 +0200 Subject: Added count of my_sync calls (to SHOW STATUS) tmp_table_size can now be set to 0 (to disable in memory internal temp tables) Improved speed for internal Maria temp tables: - Don't use packed keys, except with long text fields. - Don't copy key all accessed pages during key search. Some new benchmark tests to sql-bench (for group by) BUILD/compile-pentium64-gcov: Update script to use same pentium_config flags as other tests BUILD/compile-pentium64-gprof: Update script to use same pentium_config flags as other tests include/my_sys.h: Added count of my_sync calls mysql-test/r/variables.result: tmp_table_size can now be set to 0 sql-bench/test-select.sh: Added some new test for GROUP BY on a not key field and group by with different order by sql/mysqld.cc: Added count of my_sync calls tmp_table_size can now be set to 0 (to disable in memory internal temp tables) sql/sql_select.cc: If tmp_table_size is 0, don't use in memory temp tables (good for benchmarking MyISAM/Maria temp tables) Don't pack keys for Maria tables; The 8K page size makes packed keys too slow for temp tables. storage/maria/ma_key_recover.h: Moved definition to maria_def.h storage/maria/ma_page.c: Moved code used to simplify comparing of identical Maria tables to own function (page_cleanup()) Fixed that one can read a page with a read lock. storage/maria/ma_rkey.c: For not exact key reads, cache the page where we found key (to speed up future read-next/read-prev calls) storage/maria/ma_search.c: Moved code to cache last key page to separate function. Instead of copying pages, only get a link to the page. This notable speeds up key searches on bigger tables. storage/maria/ma_write.c: Added comment storage/maria/maria_def.h: Moved page_cleanup() to separate function. --- mysys/my_sync.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'mysys') diff --git a/mysys/my_sync.c b/mysys/my_sync.c index f8961202fa3..967a6ae6c78 100644 --- a/mysys/my_sync.c +++ b/mysys/my_sync.c @@ -17,6 +17,8 @@ #include "mysys_err.h" #include +ulong my_sync_count; /* Count number of sync calls */ + /* Sync data in file to disk @@ -46,6 +48,7 @@ int my_sync(File fd, myf my_flags) DBUG_ENTER("my_sync"); DBUG_PRINT("my",("fd: %d my_flags: %d", fd, my_flags)); + statistic_increment(my_sync_count,&THR_LOCK_open); do { #if defined(F_FULLFSYNC) -- cgit v1.2.1 From 291fd9698340f3d83ff096542720f7335cb078d2 Mon Sep 17 00:00:00 2001 From: Sergei Golubchik Date: Mon, 29 Mar 2010 17:13:53 +0200 Subject: pluggable auth with plugin examples Makefile.am: add new API files to the check_abi rule, remove duplicates client/CMakeLists.txt: now a client can use dlopen too client/Makefile.am: be csh-friendly include/my_global.h: add dummy plugs for dlopen and co. for the code that needs them to work in static builds mysys/Makefile.am: be csh-friendly plugin/auth/dialog.c: typo fixed --- mysys/Makefile.am | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'mysys') diff --git a/mysys/Makefile.am b/mysys/Makefile.am index 5137566c158..337fc86c12e 100644 --- a/mysys/Makefile.am +++ b/mysys/Makefile.am @@ -74,13 +74,13 @@ libmysys_a_LIBADD = @THREAD_LOBJECTS@ # testhash_DEPENDENCIES= $(LIBRARIES) # test_charset_DEPENDENCIES= $(LIBRARIES) # charset2html_DEPENDENCIES= $(LIBRARIES) -DEFS = -DDEFAULT_BASEDIR=\"$(prefix)\" \ - -DMYSQL_DATADIR="\"$(MYSQLDATAdir)\"" \ - -DDEFAULT_CHARSET_HOME="\"$(MYSQLBASEdir)\"" \ - -DSHAREDIR="\"$(MYSQLSHAREdir)\"" \ +DEFS = -DDEFAULT_BASEDIR='"$(prefix)"' \ + -DMYSQL_DATADIR='"$(MYSQLDATAdir)"' \ + -DDEFAULT_CHARSET_HOME='"$(MYSQLBASEdir)"' \ + -DSHAREDIR'="$(MYSQLSHAREdir)"' \ -DDEFAULT_HOME_ENV=MYSQL_HOME \ -DDEFAULT_GROUP_SUFFIX_ENV=MYSQL_GROUP_SUFFIX \ - -DDEFAULT_SYSCONFDIR="\"$(sysconfdir)\"" \ + -DDEFAULT_SYSCONFDIR='"$(sysconfdir)"' \ @DEFS@ libmysys_a_DEPENDENCIES= @THREAD_LOBJECTS@ -- cgit v1.2.1 From d904739cc1d98107f67e76a67b0ea6eb2af326ec Mon Sep 17 00:00:00 2001 From: Michael Widenius Date: Tue, 30 Mar 2010 15:36:49 +0300 Subject: Removed compiler warning Disable pbxt for test cases not using pbxt (speeds up test suite) extra/comp_err.c: Added cast to get rid of compiler warning extra/libevent/kqueue.c: Added cast to get rid of compiler warning mysql-test/lib/mtr_cases.pm: Use --skip-pbxt for test cases that doesn't need pbxt Collect default-storage-engine from suite.opt file (should actually be my.cnf file, but that wasn't easy to do) mysql-test/suite/pbxt/t/suite.opt: Added marker for mysql-test-run.pl that this suite require pbxt mysys/mf_keycache.c: Use LINT_INIT() to remove compiler warnings mysys/my_gethostbyname.c: Remove compiler warnings sql/handler.cc: Reset variable that may be used uninitialized sql/item.h: Remove compiler warning sql/mysqld.cc: Use LINT_INIT() to remove compiler warnings sql/sql_class.h: Remove compiler warning sql/sql_table.cc: Ensure variable is always set (to remove compiler warning) sql/sql_view.cc: Use LINT_INIT() to remove compiler warnings storage/maria/ma_loghandler.c: Use LINT_INIT() to remove compiler warnings storage/myisammrg/ha_myisammrg.cc: Fixed wrong type to printf storage/myisammrg/myrg_open.c: Use LINT_INIT() to remove compiler warnings storage/xtradb/include/ut0lst.h: Trivial change of macro to remove compiler warning strings/ctype-ucs2.c: Use LINT_INIT() to remove compiler warnings strings/ctype-utf8.c: Use LINT_INIT() to remove compiler warnings support-files/compiler_warnings.supp: Suppress some not relevant warnings unittest/mysys/waiting_threads-t.c: Don't use ftruncate() as this gives warning about ignored return value --- mysys/mf_keycache.c | 6 ++++++ mysys/my_gethostbyname.c | 9 ++++++--- 2 files changed, 12 insertions(+), 3 deletions(-) (limited to 'mysys') diff --git a/mysys/mf_keycache.c b/mysys/mf_keycache.c index 0630d194234..f7b420c3bf2 100644 --- a/mysys/mf_keycache.c +++ b/mysys/mf_keycache.c @@ -3924,6 +3924,12 @@ restart: uint next_status; uint hash_requests; + LINT_INIT(next_hash_link); + LINT_INIT(next_diskpos); + LINT_INIT(next_file); + LINT_INIT(next_status); + LINT_INIT(hash_requests); + total_found++; found++; KEYCACHE_DBUG_ASSERT(found <= keycache->blocks_used); diff --git a/mysys/my_gethostbyname.c b/mysys/my_gethostbyname.c index 067fdfee9db..985a76faf0d 100644 --- a/mysys/my_gethostbyname.c +++ b/mysys/my_gethostbyname.c @@ -91,9 +91,12 @@ extern pthread_mutex_t LOCK_gethostbyname_r; is finished with the structure. */ -struct hostent *my_gethostbyname_r(const char *name, - struct hostent *result, char *buffer, - int buflen, int *h_errnop) +struct hostent * +my_gethostbyname_r(const char *name, + struct hostent *result __attribute__((unused)), + char *buffer __attribute__((unused)), + int buflen__attribute__((unused)), + int *h_errnop) { struct hostent *hp; pthread_mutex_lock(&LOCK_gethostbyname_r); -- cgit v1.2.1 From 33a5571bb261ce23f7b32e4b9ba2caa53dc4c752 Mon Sep 17 00:00:00 2001 From: Michael Widenius Date: Wed, 31 Mar 2010 23:50:54 +0300 Subject: Added missing space from last push Fixed compiler warnings mysys/my_gethostbyname.c: Added missing space from last push storage/xtradb/handler/i_s.cc: Removed not used variable storage/xtradb/log/log0recv.c: Removed not used variable --- mysys/my_gethostbyname.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'mysys') diff --git a/mysys/my_gethostbyname.c b/mysys/my_gethostbyname.c index 985a76faf0d..abd388302be 100644 --- a/mysys/my_gethostbyname.c +++ b/mysys/my_gethostbyname.c @@ -95,7 +95,7 @@ struct hostent * my_gethostbyname_r(const char *name, struct hostent *result __attribute__((unused)), char *buffer __attribute__((unused)), - int buflen__attribute__((unused)), + int buflen __attribute__((unused)), int *h_errnop) { struct hostent *hp; -- cgit v1.2.1 From 59baf97d56ab2a06fd6cde5509ba933c102fb203 Mon Sep 17 00:00:00 2001 From: Igor Babaev Date: Thu, 1 Apr 2010 14:42:40 -0700 Subject: Post-review fixes. --- mysys/mf_keycache.c | 946 +++++++++++++++++++++++++--------------------------- 1 file changed, 460 insertions(+), 486 deletions(-) (limited to 'mysys') diff --git a/mysys/mf_keycache.c b/mysys/mf_keycache.c index 1f1f0c35141..45a3dd699b1 100644 --- a/mysys/mf_keycache.c +++ b/mysys/mf_keycache.c @@ -49,6 +49,7 @@ One cache can handle many files. It must contain buffers of the same blocksize. + init_key_cache() should be used to init cache handler. The free list (free_block_list) is a stack like structure. @@ -151,7 +152,7 @@ typedef struct st_keycache_wqueue /* Control block for a simple (non-partitioned) key cache */ -typedef struct st_s_key_cache_cb +typedef struct st_simple_key_cache_cb { my_bool key_cache_inited; /* <=> control block is allocated */ my_bool in_resize; /* true during resize operation */ @@ -202,7 +203,7 @@ typedef struct st_s_key_cache_cb int blocks; /* max number of blocks in the cache */ uint hash_factor; /* factor used to calculate hash function */ my_bool in_init; /* Set to 1 in MySQL during init/resize */ -} S_KEY_CACHE_CB; +} SIMPLE_KEY_CACHE_CB; /* Some compilation flags have been added specifically for this module @@ -314,12 +315,8 @@ KEY_CACHE *dflt_key_cache= &dflt_key_cache_var; #define FLUSH_CACHE 2000 /* sort this many blocks at once */ -static int flush_all_key_blocks(S_KEY_CACHE_CB *keycache); -/* -static void s_change_key_cache_param(void *keycache_cb, uint division_limit, - uint age_threshold); -*/ -static void s_end_key_cache(void *keycache_cb, my_bool cleanup); +static int flush_all_key_blocks(SIMPLE_KEY_CACHE_CB *keycache); +static void end_simple_key_cache(SIMPLE_KEY_CACHE_CB *keycache, my_bool cleanup); #ifdef THREAD static void wait_on_queue(KEYCACHE_WQUEUE *wqueue, pthread_mutex_t *mutex); @@ -328,9 +325,9 @@ static void release_whole_queue(KEYCACHE_WQUEUE *wqueue); #define wait_on_queue(wqueue, mutex) do {} while (0) #define release_whole_queue(wqueue) do {} while (0) #endif -static void free_block(S_KEY_CACHE_CB *keycache, BLOCK_LINK *block); +static void free_block(SIMPLE_KEY_CACHE_CB *keycache, BLOCK_LINK *block); #if !defined(DBUG_OFF) -static void test_key_cache(S_KEY_CACHE_CB *keycache, +static void test_key_cache(SIMPLE_KEY_CACHE_CB *keycache, const char *where, my_bool lock); #endif #define KEYCACHE_BASE_EXPR(f, pos) \ @@ -433,7 +430,7 @@ static int keycache_pthread_cond_signal(pthread_cond_t *cond); #define inline /* disabled inline for easier debugging */ static int fail_block(BLOCK_LINK *block); static int fail_hlink(HASH_LINK *hlink); -static int cache_empty(S_KEY_CACHE_CB *keycache); +static int cache_empty(SIMPLE_KEY_CACHE_CB *keycache); #endif @@ -447,8 +444,8 @@ static inline uint next_power(uint value) Initialize a simple key cache SYNOPSIS - s_init_key_cache() - keycache_cb pointer to the control block of a simple key cache + init_simple_key_cache() + keycache pointer to the control block of a simple key cache key_cache_block_size size of blocks to keep cached data use_mem memory to use for the key cache buferrs/structures division_limit division limit (may be zero) @@ -458,8 +455,8 @@ static inline uint next_power(uint value) This function is the implementation of the init_key_cache interface function that is employed by simple (non-partitioned) key caches. The function builds a simple key cache and initializes the control block - structure of the type S_KEY_CACHE_CB that is used for this key cache. - The parameter keycache_cb is supposed to point to this structure. + structure of the type SIMPLE_KEY_CACHE_CB that is used for this key cache. + The parameter keycache is supposed to point to this structure. The parameter key_cache_block_size specifies the size of the blocks in the key cache to be built. The parameters division_limit and age_threshhold determine the initial values of those characteristics of the key cache @@ -478,19 +475,17 @@ static inline uint next_power(uint value) It's assumed that no two threads call this function simultaneously referring to the same key cache handle. - */ static -int s_init_key_cache(void *keycache_cb, uint key_cache_block_size, - size_t use_mem, uint division_limit, - uint age_threshold) +int init_simple_key_cache(SIMPLE_KEY_CACHE_CB *keycache, uint key_cache_block_size, + size_t use_mem, uint division_limit, + uint age_threshold) { - S_KEY_CACHE_CB *keycache= (S_KEY_CACHE_CB *) keycache_cb; ulong blocks, hash_links; size_t length; int error; - DBUG_ENTER("init_key_cache"); + DBUG_ENTER("init_simple_key_cache"); DBUG_ASSERT(key_cache_block_size >= 512); KEYCACHE_DEBUG_OPEN; @@ -653,16 +648,16 @@ err: Prepare for resizing a simple key cache SYNOPSIS - s_prepare_resize_key_cache() - keycache_cb pointer to the control block of a simple key cache + prepare_resize_simple_key_cache() + keycache pointer to the control block of a simple key cache with_resize_queue <=> resize queue is used release_lock <=> release the key cache lock before return DESCRIPTION This function flushes all dirty pages from a simple key cache and after - this it destroys the key cache calling s_end_key_cache. The function - considers the parameter keycache_cb as a pointer to the control block - structure of the type S_KEY_CACHE_CB for this key cache. + this it destroys the key cache calling end_simple_key_cache. The function + takes the parameter keycache as a pointer to the control block + structure of the type SIMPLE_KEY_CACHE_CB for this key cache. The parameter with_resize_queue determines weather the resize queue is involved (MySQL server never uses this queue). The parameter release_lock says weather the key cache lock must be released before return from @@ -673,19 +668,18 @@ err: 1 - otherwise. NOTES - This function is the called by s_resize_key_cache and p_resize_key_cache - that resize simple and partitioned key caches respectively. - + This function is the called by resize_simple_key_cache and + resize_partitioned_key_cache that resize simple and partitioned key caches + respectively. */ static -int s_prepare_resize_key_cache(void *keycache_cb, - my_bool with_resize_queue, - my_bool release_lock) +int prepare_resize_simple_key_cache(SIMPLE_KEY_CACHE_CB *keycache, + my_bool with_resize_queue, + my_bool release_lock) { int res= 0; - S_KEY_CACHE_CB *keycache= (S_KEY_CACHE_CB *) keycache_cb; - DBUG_ENTER("s_prepare_resize_key_cache"); + DBUG_ENTER("prepare_resize_simple_key_cache"); keycache_pthread_mutex_lock(&keycache->cache_lock); @@ -749,7 +743,7 @@ int s_prepare_resize_key_cache(void *keycache_cb, KEYCACHE_DBUG_ASSERT(keycache->cnt_for_resize_op == 0); #endif - s_end_key_cache(keycache_cb, 0); + end_simple_key_cache(keycache, 0); finish: if (release_lock) @@ -762,16 +756,16 @@ finish: Finalize resizing a simple key cache SYNOPSIS - s_finish_resize_key_cache() - keycache_cb pointer to the control block of a simple key cache + finish_resize_simple_key_cache() + keycache pointer to the control block of a simple key cache with_resize_queue <=> resize queue is used acquire_lock <=> acquire the key cache lock at start DESCRIPTION This function performs finalizing actions for the operation of - resizing a simple key cache. The function considers the parameter - keycache_cb as a pointer to the control block structure of the type - S_KEY_CACHE_CB for this key cache. The function sets the flag + resizing a simple key cache. The function takes the parameter + keycache as a pointer to the control block structure of the type + SIMPLE_KEY_CACHE_CB for this key cache. The function sets the flag in_resize in this structure to FALSE. The parameter with_resize_queue determines weather the resize queue is involved (MySQL server never uses this queue). @@ -782,22 +776,23 @@ finish: none NOTES - This function is the called by s_resize_key_cache and p_resize_key_cache - that resize simple and partitioned key caches respectively. - + This function is the called by resize_simple_key_cache and + resize_partitioned_key_cache that resize simple and partitioned key caches + respectively. */ static -void s_finish_resize_key_cache(void *keycache_cb, - my_bool with_resize_queue, - my_bool acquire_lock) +void finish_resize_simple_key_cache(SIMPLE_KEY_CACHE_CB *keycache, + my_bool with_resize_queue, + my_bool acquire_lock) { - S_KEY_CACHE_CB *keycache= (S_KEY_CACHE_CB *) keycache_cb; - DBUG_ENTER("s_finish_resize_key_cache"); + DBUG_ENTER("finish_resize_simple_key_cache"); if (acquire_lock) keycache_pthread_mutex_lock(&keycache->cache_lock); - + + safe_mutex_assert_owner(&keycache->cache_lock); + /* Mark the resize finished. This allows other threads to start a resize or to request new cache blocks. @@ -820,8 +815,8 @@ void s_finish_resize_key_cache(void *keycache_cb, Resize a simple key cache SYNOPSIS - s_resize_key_cache() - keycache_cb pointer to the control block of a simple key cache + resize_simple_key_cache() + keycache pointer to the control block of a simple key cache key_cache_block_size size of blocks to keep cached data use_mem memory to use for the key cache buffers/structures division_limit new division limit (if not zero) @@ -830,8 +825,8 @@ void s_finish_resize_key_cache(void *keycache_cb, DESCRIPTION This function is the implementation of the resize_key_cache interface function that is employed by simple (non-partitioned) key caches. - The function considers the parameter keycache_cb as a pointer to the - control block structure of the type S_KEY_CACHE_CB for the simple key + The function takes the parameter keycache as a pointer to the + control block structure of the type SIMPLE_KEY_CACHE_CB for the simple key cache to be resized. The parameter key_cache_block_size specifies the new size of the blocks in the key cache. The parameters division_limit and age_threshold @@ -845,47 +840,45 @@ void s_finish_resize_key_cache(void *keycache_cb, 0 - otherwise. NOTES. - The function first calls the function s_prepare_resize_key_cache + The function first calls the function prepare_resize_simple_key_cache to flush all dirty blocks from key cache, to free memory used for key cache blocks and auxiliary structures. After this the function builds a new key cache with new parameters. This implementation doesn't block the calls and executions of other functions from the key cache interface. However it assumes that the - calls of s_resize_key_cache itself are serialized. + calls of resize_simple_key_cache itself are serialized. The function starts the operation only when all other threads performing operations with the key cache let her to proceed (when cnt_for_resize=0). - */ static -int s_resize_key_cache(void *keycache_cb, uint key_cache_block_size, - size_t use_mem, uint division_limit, - uint age_threshold) +int resize_simple_key_cache(SIMPLE_KEY_CACHE_CB *keycache, uint key_cache_block_size, + size_t use_mem, uint division_limit, + uint age_threshold) { - S_KEY_CACHE_CB *keycache= (S_KEY_CACHE_CB *) keycache_cb; int blocks= 0; - DBUG_ENTER("s_resize_key_cache"); + DBUG_ENTER("resize_simple_key_cache"); if (!keycache->key_cache_inited) - DBUG_RETURN(keycache->disk_blocks); + DBUG_RETURN(blocks); /* Note that the cache_lock mutex and the resize_queue are left untouched. We do not lose the cache_lock and will release it only at the end of this function. */ - if (s_prepare_resize_key_cache(keycache_cb, 1, 0)) + if (prepare_resize_simple_key_cache(keycache, 1, 0)) goto finish; /* The following will work even if use_mem is 0 */ - blocks= s_init_key_cache(keycache, key_cache_block_size, use_mem, - division_limit, age_threshold); + blocks= init_simple_key_cache(keycache, key_cache_block_size, use_mem, + division_limit, age_threshold); finish: - s_finish_resize_key_cache(keycache_cb, 1, 0); + finish_resize_simple_key_cache(keycache, 1, 0); DBUG_RETURN(blocks); } @@ -894,7 +887,7 @@ finish: /* Increment counter blocking resize key cache operation */ -static inline void inc_counter_for_resize_op(S_KEY_CACHE_CB *keycache) +static inline void inc_counter_for_resize_op(SIMPLE_KEY_CACHE_CB *keycache) { keycache->cnt_for_resize_op++; } @@ -904,7 +897,7 @@ static inline void inc_counter_for_resize_op(S_KEY_CACHE_CB *keycache) Decrement counter blocking resize key cache operation; Signal the operation to proceed when counter becomes equal zero */ -static inline void dec_counter_for_resize_op(S_KEY_CACHE_CB *keycache) +static inline void dec_counter_for_resize_op(SIMPLE_KEY_CACHE_CB *keycache) { if (!--keycache->cnt_for_resize_op) release_whole_queue(&keycache->waiting_for_resize_cnt); @@ -915,16 +908,16 @@ static inline void dec_counter_for_resize_op(S_KEY_CACHE_CB *keycache) Change key cache parameters of a simple key cache SYNOPSIS - s_change_key_cache_param() - keycache_cb pointer to the control block of a simple key cache + change_simple_key_cache_param() + keycache pointer to the control block of a simple key cache division_limit new division limit (if not zero) age_threshold new age threshold (if not zero) DESCRIPTION This function is the implementation of the change_key_cache_param interface function that is employed by simple (non-partitioned) key caches. - The function considers the parameter keycache_cb as a pointer to the - control block structure of the type S_KEY_CACHE_CB for the simple key + The function takes the parameter keycache as a pointer to the + control block structure of the type SIMPLE_KEY_CACHE_CB for the simple key cache where new values of the division limit and the age threshold used for midpoint insertion strategy are to be set. The parameters division_limit and age_threshold provide these new values. @@ -938,15 +931,13 @@ static inline void dec_counter_for_resize_op(S_KEY_CACHE_CB *keycache) This function changes some parameters of a given key cache without reformatting it. The function does not touch the contents the key cache blocks. - */ static -void s_change_key_cache_param(void *keycache_cb, uint division_limit, - uint age_threshold) +void change_simple_key_cache_param(SIMPLE_KEY_CACHE_CB *keycache, uint division_limit, + uint age_threshold) { - S_KEY_CACHE_CB *keycache= (S_KEY_CACHE_CB *) keycache_cb; - DBUG_ENTER("s_change_key_cache_param"); + DBUG_ENTER("change_simple_key_cache_param"); keycache_pthread_mutex_lock(&keycache->cache_lock); if (division_limit) keycache->min_warm_blocks= (keycache->disk_blocks * @@ -963,15 +954,15 @@ void s_change_key_cache_param(void *keycache_cb, uint division_limit, Destroy a simple key cache SYNOPSIS - s_end_key_cache() - keycache_cb pointer to the control block of a simple key cache + end_simple_key_cache() + keycache pointer to the control block of a simple key cache cleanup <=> complete free (free also mutex for key cache) DESCRIPTION This function is the implementation of the end_key_cache interface function that is employed by simple (non-partitioned) key caches. - The function considers the parameter keycache_cb as a pointer to the - control block structure of the type S_KEY_CACHE_CB for the simple key + The function takes the parameter keycache as a pointer to the + control block structure of the type SIMPLE_KEY_CACHE_CB for the simple key cache to be destroyed. The function frees the memory allocated for the key cache blocks and auxiliary structures. If the value of the parameter cleanup is TRUE @@ -982,10 +973,9 @@ void s_change_key_cache_param(void *keycache_cb, uint division_limit, */ static -void s_end_key_cache(void *keycache_cb, my_bool cleanup) +void end_simple_key_cache(SIMPLE_KEY_CACHE_CB *keycache, my_bool cleanup) { - S_KEY_CACHE_CB *keycache= (S_KEY_CACHE_CB *) keycache_cb; - DBUG_ENTER("s_end_key_cache"); + DBUG_ENTER("end_simple_key_cache"); DBUG_PRINT("enter", ("key_cache: 0x%lx", (long) keycache)); if (!keycache->key_cache_inited) @@ -1276,7 +1266,7 @@ static inline void link_changed(BLOCK_LINK *block, BLOCK_LINK **phead) void */ -static void link_to_file_list(S_KEY_CACHE_CB *keycache, +static void link_to_file_list(SIMPLE_KEY_CACHE_CB *keycache, BLOCK_LINK *block, int file, my_bool unlink_block) { @@ -1317,7 +1307,7 @@ static void link_to_file_list(S_KEY_CACHE_CB *keycache, void */ -static void link_to_changed_list(S_KEY_CACHE_CB *keycache, +static void link_to_changed_list(SIMPLE_KEY_CACHE_CB *keycache, BLOCK_LINK *block) { DBUG_ASSERT(block->status & BLOCK_IN_USE); @@ -1372,8 +1362,8 @@ static void link_to_changed_list(S_KEY_CACHE_CB *keycache, not linked in the LRU ring. */ -static void link_block(S_KEY_CACHE_CB *keycache, BLOCK_LINK *block, my_bool hot, - my_bool at_end) +static void link_block(SIMPLE_KEY_CACHE_CB *keycache, BLOCK_LINK *block, + my_bool hot, my_bool at_end) { BLOCK_LINK *ins; BLOCK_LINK **pins; @@ -1493,7 +1483,7 @@ static void link_block(S_KEY_CACHE_CB *keycache, BLOCK_LINK *block, my_bool hot, See NOTES for link_block */ -static void unlink_block(S_KEY_CACHE_CB *keycache, BLOCK_LINK *block) +static void unlink_block(SIMPLE_KEY_CACHE_CB *keycache, BLOCK_LINK *block) { DBUG_ASSERT((block->status & ~BLOCK_CHANGED) == (BLOCK_READ | BLOCK_IN_USE)); DBUG_ASSERT(block->hash_link); /*backptr to block NULL from free_block()*/ @@ -1551,7 +1541,8 @@ static void unlink_block(S_KEY_CACHE_CB *keycache, BLOCK_LINK *block) RETURN void */ -static void reg_requests(S_KEY_CACHE_CB *keycache, BLOCK_LINK *block, int count) +static void reg_requests(SIMPLE_KEY_CACHE_CB *keycache, + BLOCK_LINK *block, int count) { DBUG_ASSERT(block->status & BLOCK_IN_USE); DBUG_ASSERT(block->hash_link); @@ -1594,7 +1585,7 @@ static void reg_requests(S_KEY_CACHE_CB *keycache, BLOCK_LINK *block, int count) not linked in the LRU ring. */ -static void unreg_request(S_KEY_CACHE_CB *keycache, +static void unreg_request(SIMPLE_KEY_CACHE_CB *keycache, BLOCK_LINK *block, int at_end) { DBUG_ASSERT(block->status & (BLOCK_READ | BLOCK_IN_USE)); @@ -1683,7 +1674,7 @@ static void remove_reader(BLOCK_LINK *block) signals on its termination */ -static void wait_for_readers(S_KEY_CACHE_CB *keycache, +static void wait_for_readers(SIMPLE_KEY_CACHE_CB *keycache, BLOCK_LINK *block) { #ifdef THREAD @@ -1732,7 +1723,7 @@ static inline void link_hash(HASH_LINK **start, HASH_LINK *hash_link) Remove a hash link from the hash table */ -static void unlink_hash(S_KEY_CACHE_CB *keycache, HASH_LINK *hash_link) +static void unlink_hash(SIMPLE_KEY_CACHE_CB *keycache, HASH_LINK *hash_link) { KEYCACHE_DBUG_PRINT("unlink_hash", ("fd: %u pos_ %lu #requests=%u", (uint) hash_link->file,(ulong) hash_link->diskpos, hash_link->requests)); @@ -1788,7 +1779,7 @@ static void unlink_hash(S_KEY_CACHE_CB *keycache, HASH_LINK *hash_link) Get the hash link for a page */ -static HASH_LINK *get_hash_link(S_KEY_CACHE_CB *keycache, +static HASH_LINK *get_hash_link(SIMPLE_KEY_CACHE_CB *keycache, int file, my_off_t filepos) { reg1 HASH_LINK *hash_link, **start; @@ -1909,7 +1900,7 @@ restart: waits until first of this operations links any block back. */ -static BLOCK_LINK *find_key_block(S_KEY_CACHE_CB *keycache, +static BLOCK_LINK *find_key_block(SIMPLE_KEY_CACHE_CB *keycache, File file, my_off_t filepos, int init_hits_left, int wrmode, int *page_st) @@ -2669,7 +2660,7 @@ restart: portion is less than read_length, but not less than min_length. */ -static void read_block(S_KEY_CACHE_CB *keycache, +static void read_block(SIMPLE_KEY_CACHE_CB *keycache, BLOCK_LINK *block, uint read_length, uint min_length, my_bool primary) { @@ -2761,8 +2752,8 @@ static void read_block(S_KEY_CACHE_CB *keycache, SYNOPSIS - s_key_cache_read() - keycache_cb pointer to the control block of a simple key cache + simple_key_cache_read() + keycache pointer to the control block of a simple key cache file handler for the file for the block of data to be read filepos position of the block of data in the file level determines the weight of the data @@ -2774,8 +2765,8 @@ static void read_block(S_KEY_CACHE_CB *keycache, DESCRIPTION This function is the implementation of the key_cache_read interface function that is employed by simple (non-partitioned) key caches. - The function considers the parameter keycache_cb as a pointer to the - control block structure of the type S_KEY_CACHE_CB for a simple key + The function takes the parameter keycache as a pointer to the + control block structure of the type SIMPLE_KEY_CACHE_CB for a simple key cache. In a general case the function reads a block of data from the key cache into the buffer buff of the size specified by the parameter length. The @@ -2799,20 +2790,18 @@ static void read_block(S_KEY_CACHE_CB *keycache, NOTES Filepos must be a multiple of 'block_length', but it doesn't have to be a multiple of key_cache_block_size; - */ -uchar *s_key_cache_read(void *keycache_cb, - File file, my_off_t filepos, int level, - uchar *buff, uint length, - uint block_length __attribute__((unused)), - int return_buffer __attribute__((unused))) +uchar *simple_key_cache_read(SIMPLE_KEY_CACHE_CB *keycache, + File file, my_off_t filepos, int level, + uchar *buff, uint length, + uint block_length __attribute__((unused)), + int return_buffer __attribute__((unused))) { - S_KEY_CACHE_CB *keycache= (S_KEY_CACHE_CB *) keycache_cb; my_bool locked_and_incremented= FALSE; int error=0; uchar *start= buff; - DBUG_ENTER("s_key_cache_read"); + DBUG_ENTER("simple_key_cache_read"); DBUG_PRINT("enter", ("fd: %u pos: %lu length: %u", (uint) file, (ulong) filepos, length)); @@ -3010,8 +2999,8 @@ end: Insert a block of file data from a buffer into a simple key cache SYNOPSIS - s_key_cache_insert() - keycache_cb pointer to the control block of a simple key cache + simple_key_cache_insert() + keycache pointer to the control block of a simple key cache file handler for the file to insert data from filepos position of the block of data in the file to insert level determines the weight of the data @@ -3021,8 +3010,8 @@ end: DESCRIPTION This function is the implementation of the key_cache_insert interface function that is employed by simple (non-partitioned) key caches. - The function considers the parameter keycache_cb as a pointer to the - control block structure of the type S_KEY_CACHE_CB for a simple key + The function takes the parameter keycache as a pointer to the + control block structure of the type SIMPLE_KEY_CACHE_CB for a simple key cache. The function writes a block of file data from a buffer into the key cache. The buffer is specified with the parameters buff and length - the pointer @@ -3045,11 +3034,10 @@ end: */ static -int s_key_cache_insert(void *keycache_cb, - File file, my_off_t filepos, int level, - uchar *buff, uint length) +int simple_key_cache_insert(SIMPLE_KEY_CACHE_CB *keycache, + File file, my_off_t filepos, int level, + uchar *buff, uint length) { - S_KEY_CACHE_CB *keycache= (S_KEY_CACHE_CB *) keycache_cb; int error= 0; DBUG_ENTER("key_cache_insert"); DBUG_PRINT("enter", ("fd: %u pos: %lu length: %u", @@ -3272,8 +3260,8 @@ int s_key_cache_insert(void *keycache_cb, SYNOPSIS - s_key_cache_write() - keycache_cb pointer to the control block of a simple key cache + simple_key_cache_write() + keycache pointer to the control block of a simple key cache file handler for the file to write data to file_extra maps of key cache partitions containing dirty pages from file @@ -3287,8 +3275,8 @@ int s_key_cache_insert(void *keycache_cb, DESCRIPTION This function is the implementation of the key_cache_write interface function that is employed by simple (non-partitioned) key caches. - The function considers the parameter keycache_cb as a pointer to the - control block structure of the type S_KEY_CACHE_CB for a simple key + The function takes the parameter keycache as a pointer to the + control block structure of the type SIMPLE_KEY_CACHE_CB for a simple key cache. In a general case the function copies data from a buffer into the key cache. The buffer is specified with the parameters buff and length - @@ -3304,7 +3292,8 @@ int s_key_cache_insert(void *keycache_cb, The parameter file_extra currently makes sense only for simple key caches that are elements of a partitioned key cache. It provides a pointer to the shared bitmap of the partitions that may contains dirty pages for the file. - This bitmap is used to optimize the function p_flush_key_blocks. + This bitmap is used to optimize the function + flush_partitioned_key_cache_blocks. RETURN VALUE 0 if a success, 1 - otherwise. @@ -3312,21 +3301,19 @@ int s_key_cache_insert(void *keycache_cb, NOTES This implementation exploits the fact that the function is called only when a thread has got an exclusive lock for the key file. - */ static -int s_key_cache_write(void *keycache_cb, - File file, void *file_extra __attribute__((unused)), - my_off_t filepos, int level, - uchar *buff, uint length, - uint block_length __attribute__((unused)), - int dont_write) +int simple_key_cache_write(SIMPLE_KEY_CACHE_CB *keycache, + File file, void *file_extra __attribute__((unused)), + my_off_t filepos, int level, + uchar *buff, uint length, + uint block_length __attribute__((unused)), + int dont_write) { - S_KEY_CACHE_CB *keycache= (S_KEY_CACHE_CB *) keycache_cb; my_bool locked_and_incremented= FALSE; int error=0; - DBUG_ENTER("s_key_cache_write"); + DBUG_ENTER("simple_key_cache_write"); DBUG_PRINT("enter", ("fd: %u pos: %lu length: %u block_length: %u" " key_block_length: %u", @@ -3641,7 +3628,7 @@ end: Block must have a request registered on it. */ -static void free_block(S_KEY_CACHE_CB *keycache, BLOCK_LINK *block) +static void free_block(SIMPLE_KEY_CACHE_CB *keycache, BLOCK_LINK *block) { KEYCACHE_THREAD_TRACE("free block"); KEYCACHE_DBUG_PRINT("free_block", @@ -3781,7 +3768,7 @@ static int cmp_sec_link(BLOCK_LINK **a, BLOCK_LINK **b) free used blocks if requested */ -static int flush_cached_blocks(S_KEY_CACHE_CB *keycache, +static int flush_cached_blocks(SIMPLE_KEY_CACHE_CB *keycache, File file, BLOCK_LINK **cache, BLOCK_LINK **end, enum flush_type type) @@ -3909,7 +3896,7 @@ static int flush_cached_blocks(S_KEY_CACHE_CB *keycache, 1 error */ -static int flush_key_blocks_int(S_KEY_CACHE_CB *keycache, +static int flush_key_blocks_int(SIMPLE_KEY_CACHE_CB *keycache, File file, enum flush_type type) { BLOCK_LINK *cache_buff[FLUSH_CACHE],**cache; @@ -4349,8 +4336,8 @@ err: SYNOPSIS - s_flush_key_blocks() - keycache_cb pointer to the control block of a simple key cache + flush_simple_key_blocks() + keycache pointer to the control block of a simple key cache file handler for the file to flush to file_extra maps of key cache partitions containing dirty pages from file (not used) @@ -4359,7 +4346,7 @@ err: DESCRIPTION This function is the implementation of the flush_key_blocks interface function that is employed by simple (non-partitioned) key caches. - The function considers the parameter keycache_cb as a pointer to the + The function takes the parameter keycache as a pointer to the control block structure of the type S_KEY_CACHE_CB for a simple key cache. In a general case the function flushes the data from all dirty key @@ -4378,16 +4365,14 @@ err: NOTES This implementation exploits the fact that the function is called only when a thread has got an exclusive lock for the key file. - */ static -int s_flush_key_blocks(void *keycache_cb, - File file, - void *file_extra __attribute__((unused)), - enum flush_type type) +int flush_simple_key_cache_blocks(SIMPLE_KEY_CACHE_CB *keycache, + File file, + void *file_extra __attribute__((unused)), + enum flush_type type) { - S_KEY_CACHE_CB *keycache= (S_KEY_CACHE_CB *) keycache_cb; int res= 0; DBUG_ENTER("flush_key_blocks"); DBUG_PRINT("enter", ("keycache: 0x%lx", (long) keycache)); @@ -4440,7 +4425,7 @@ int s_flush_key_blocks(void *keycache_cb, != 0 Error */ -static int flush_all_key_blocks(S_KEY_CACHE_CB *keycache) +static int flush_all_key_blocks(SIMPLE_KEY_CACHE_CB *keycache) { BLOCK_LINK *block; uint total_found; @@ -4546,14 +4531,14 @@ static int flush_all_key_blocks(S_KEY_CACHE_CB *keycache) Reset the counters of a simple key cache SYNOPSIS - s_reset_key_cache_counters() + reset_simple_key_cache_counters() name the name of a key cache - keycache_cb pointer to the control block of a simple key cache + keycache pointer to the control block of a simple key cache DESCRIPTION This function is the implementation of the reset_key_cache_counters interface function that is employed by simple (non-partitioned) key caches. - The function considers the parameter keycache_cb as a pointer to the + The function takes the parameter keycache as a pointer to the control block structure of the type S_KEY_CACHE_CB for a simple key cache. This function resets the values of all statistical counters for the key cache to 0. @@ -4561,15 +4546,13 @@ static int flush_all_key_blocks(S_KEY_CACHE_CB *keycache) RETURN 0 on success (always because it can't fail) - */ static -int s_reset_key_cache_counters(const char *name __attribute__((unused)), - void *keycache_cb) +int reset_simple_key_cache_counters(const char *name __attribute__((unused)), + SIMPLE_KEY_CACHE_CB *keycache) { - S_KEY_CACHE_CB *keycache= (S_KEY_CACHE_CB *) keycache_cb; - DBUG_ENTER("s_reset_key_cache_counters"); + DBUG_ENTER("reset_simple_key_cache_counters"); if (!keycache->key_cache_inited) { DBUG_PRINT("info", ("Key cache %s not initialized.", name)); @@ -4590,9 +4573,10 @@ int s_reset_key_cache_counters(const char *name __attribute__((unused)), /* Test if disk-cache is ok */ -static void test_key_cache(S_KEY_CACHE_CB *keycache __attribute__((unused)), - const char *where __attribute__((unused)), - my_bool lock __attribute__((unused))) +static +void test_key_cache(SIMPLE_KEY_CACHE_CB *keycache __attribute__((unused)), + const char *where __attribute__((unused)), + my_bool lock __attribute__((unused))) { /* TODO */ } @@ -4604,7 +4588,7 @@ static void test_key_cache(S_KEY_CACHE_CB *keycache __attribute__((unused)), #define MAX_QUEUE_LEN 100 -static void keycache_dump(S_KEY_CACHE_CB *keycache) +static void keycache_dump(SIMPLE_KEY_CACHE_CB *keycache) { FILE *keycache_dump_file=fopen(KEYCACHE_DUMP_FILE, "w"); struct st_my_thread_var *last; @@ -4844,7 +4828,7 @@ static int fail_hlink(HASH_LINK *hlink) return 0; /* Let the assert fail. */ } -static int cache_empty(S_KEY_CACHE_CB *keycache) +static int cache_empty(SIMPLE_KEY_CACHE_CB *keycache) { int errcnt= 0; int idx; @@ -4887,54 +4871,57 @@ static int cache_empty(S_KEY_CACHE_CB *keycache) Get statistics for a simple key cache SYNOPSIS - get_key_cache_statistics() - keycache_cb pointer to the control block of a simple key cache + get_simple_key_cache_statistics() + keycache pointer to the control block of a simple key cache partition_no partition number (not used) key_cache_stats OUT pointer to the structure for the returned statistics DESCRIPTION This function is the implementation of the get_key_cache_statistics interface function that is employed by simple (non-partitioned) key caches. - The function considers the parameter keycache_cb as a pointer to the - control block structure of the type S_KEY_CACHE_CB for a simple key cache. - This function returns the statistical data for the key cache. + The function takes the parameter keycache as a pointer to the + control block structure of the type SIMPLE_KEY_CACHE_CB for a simple key + cache. This function returns the statistical data for the key cache. The parameter partition_no is not used by this function. RETURN none - */ static -void s_get_key_cache_statistics(void *keycache_cb, - uint partition_no __attribute__((unused)), - KEY_CACHE_STATISTICS *key_cache_stats) +void get_simple_key_cache_statistics(SIMPLE_KEY_CACHE_CB *keycache, + uint partition_no __attribute__((unused)), + KEY_CACHE_STATISTICS *keycache_stats) { - S_KEY_CACHE_CB *keycache= (S_KEY_CACHE_CB *) keycache_cb; - DBUG_ENTER("s_get_key_cache_statistics"); - - key_cache_stats->mem_size= (longlong) keycache->key_cache_mem_size; - key_cache_stats->block_size= (longlong) keycache->key_cache_block_size; - key_cache_stats->blocks_used= keycache->blocks_used; - key_cache_stats->blocks_unused= keycache->blocks_unused; - key_cache_stats->blocks_changed= keycache->global_blocks_changed; - key_cache_stats->read_requests= keycache->global_cache_r_requests; - key_cache_stats->reads= keycache->global_cache_read; - key_cache_stats->write_requests= keycache->global_cache_w_requests; - key_cache_stats->writes= keycache->global_cache_write; + DBUG_ENTER("simple_get_key_cache_statistics"); + + keycache_stats->mem_size= (longlong) keycache->key_cache_mem_size; + keycache_stats->block_size= (longlong) keycache->key_cache_block_size; + keycache_stats->blocks_used= keycache->blocks_used; + keycache_stats->blocks_unused= keycache->blocks_unused; + keycache_stats->blocks_changed= keycache->global_blocks_changed; + keycache_stats->read_requests= keycache->global_cache_r_requests; + keycache_stats->reads= keycache->global_cache_read; + keycache_stats->write_requests= keycache->global_cache_w_requests; + keycache_stats->writes= keycache->global_cache_write; DBUG_VOID_RETURN; } -static size_t s_key_cache_stat_var_offsets[]= +/* + Offsets of the statistical values in the control block for a simple key cache + The first NO_LONG_KEY_CACHE_STAT_VARIABLES=3 are of the ulong type while the + remaining are of the ulonglong type. + */ +static size_t simple_key_cache_stat_var_offsets[]= { - offsetof(S_KEY_CACHE_CB, blocks_used), - offsetof(S_KEY_CACHE_CB, blocks_unused), - offsetof(S_KEY_CACHE_CB, global_blocks_changed), - offsetof(S_KEY_CACHE_CB, global_cache_w_requests), - offsetof(S_KEY_CACHE_CB, global_cache_write), - offsetof(S_KEY_CACHE_CB, global_cache_r_requests), - offsetof(S_KEY_CACHE_CB, global_cache_read) + offsetof(SIMPLE_KEY_CACHE_CB, blocks_used), + offsetof(SIMPLE_KEY_CACHE_CB, blocks_unused), + offsetof(SIMPLE_KEY_CACHE_CB, global_blocks_changed), + offsetof(SIMPLE_KEY_CACHE_CB, global_cache_w_requests), + offsetof(SIMPLE_KEY_CACHE_CB, global_cache_write), + offsetof(SIMPLE_KEY_CACHE_CB, global_cache_r_requests), + offsetof(SIMPLE_KEY_CACHE_CB, global_cache_read) }; @@ -4942,16 +4929,16 @@ static size_t s_key_cache_stat_var_offsets[]= Get the value of a statistical variable for a simple key cache SYNOPSIS - s_get_key_cache_stat_value() - keycache_cb pointer to the control block of a simple key cache + get_simple_key_cache_stat_value() + keycache pointer to the control block of a simple key cache var_no the ordered number of a statistical variable DESCRIPTION - This function is the implementation of the s_get_key_cache_stat_value + This function is the implementation of the get_simple_key_cache_stat_value interface function that is employed by simple (non-partitioned) key caches. - The function considers the parameter keycache_cb as a pointer to the - control block structure of the type S_KEY_CACHE_CB for a simple key cache. - This function returns the value of the statistical variable var_no + The function takes the parameter keycache as a pointer to the + control block structure of the type SIMPLE_KEY_CACHE_CB for a simple key + cache. This function returns the value of the statistical variable var_no for this key cache. The variables are numbered starting from 0 to 6. RETURN @@ -4960,12 +4947,12 @@ static size_t s_key_cache_stat_var_offsets[]= */ static -ulonglong s_get_key_cache_stat_value(void *keycache_cb, uint var_no) +ulonglong get_simple_key_cache_stat_value(SIMPLE_KEY_CACHE_CB *keycache, + uint var_no) { - S_KEY_CACHE_CB *keycache= (S_KEY_CACHE_CB *) keycache_cb; - size_t var_ofs= s_key_cache_stat_var_offsets[var_no]; + size_t var_ofs= simple_key_cache_stat_var_offsets[var_no]; ulonglong res= 0; - DBUG_ENTER("s_get_key_cache_stat_value"); + DBUG_ENTER("get_simple_key_cache_stat_value"); if (var_no < 3) res= (ulonglong) (*(long *) ((char *) keycache + var_ofs)); @@ -4985,19 +4972,19 @@ ulonglong s_get_key_cache_stat_value(void *keycache_cb, uint var_no) the MySQL server code directly. We don't do it though. */ -static KEY_CACHE_FUNCS s_key_cache_funcs = +static KEY_CACHE_FUNCS simple_key_cache_funcs = { - s_init_key_cache, - s_resize_key_cache, - s_change_key_cache_param, - s_key_cache_read, - s_key_cache_insert, - s_key_cache_write, - s_flush_key_blocks, - s_reset_key_cache_counters, - s_end_key_cache, - s_get_key_cache_statistics, - s_get_key_cache_stat_value + (INIT_KEY_CACHE) init_simple_key_cache, + (RESIZE_KEY_CACHE) resize_simple_key_cache, + (CHANGE_KEY_CACHE_PARAM) change_simple_key_cache_param, + (KEY_CACHE_READ) simple_key_cache_read, + (KEY_CACHE_INSERT) simple_key_cache_insert, + (KEY_CACHE_WRITE) simple_key_cache_write, + (FLUSH_KEY_BLOCKS) flush_simple_key_cache_blocks, + (RESET_KEY_CACHE_COUNTERS) reset_simple_key_cache_counters, + (END_KEY_CACHE) end_simple_key_cache, + (GET_KEY_CACHE_STATISTICS) get_simple_key_cache_statistics, + (GET_KEY_CACHE_STAT_VALUE) get_simple_key_cache_stat_value }; @@ -5038,17 +5025,22 @@ static KEY_CACHE_FUNCS s_key_cache_funcs = /* Control block for a partitioned key cache */ -typedef struct st_p_key_cache_cb +typedef struct st_partitioned_key_cache_cb { my_bool key_cache_inited; /*<=> control block is allocated */ - S_KEY_CACHE_CB **partition_array; /* array of the key cache partitions */ - uint partitions; /* number of partitions in the key cache */ + SIMPLE_KEY_CACHE_CB **partition_array; /* the key cache partitions */ size_t key_cache_mem_size; /* specified size of the cache memory */ uint key_cache_block_size; /* size of the page buffer of a cache block */ -} P_KEY_CACHE_CB; + uint partitions; /* number of partitions in the key cache */ +} PARTITIONED_KEY_CACHE_CB; static -void p_end_key_cache(void *keycache_cb, my_bool cleanup); +void end_partitioned_key_cache(PARTITIONED_KEY_CACHE_CB *keycache, + my_bool cleanup); + +static int +reset_partitioned_key_cache_counters(const char *name, + PARTITIONED_KEY_CACHE_CB *keycache); /* Determine the partition to which the index block to read is ascribed @@ -5070,11 +5062,12 @@ void p_end_key_cache(void *keycache_cb, my_bool cleanup); file block is ascribed. */ -static -S_KEY_CACHE_CB *get_key_cache_partition(P_KEY_CACHE_CB *keycache, - File file, my_off_t filepos) +static +SIMPLE_KEY_CACHE_CB * +get_key_cache_partition(PARTITIONED_KEY_CACHE_CB *keycache, + File file, my_off_t filepos) { - uint i= KEYCACHE_BASE_EXPR( file, filepos) % keycache->partitions; + uint i= KEYCACHE_BASE_EXPR(file, filepos) % keycache->partitions; return keycache->partition_array[i]; } @@ -5101,10 +5094,10 @@ S_KEY_CACHE_CB *get_key_cache_partition(P_KEY_CACHE_CB *keycache, file block is ascribed. */ -static -S_KEY_CACHE_CB *get_key_cache_partition_for_write(P_KEY_CACHE_CB *keycache, - File file, my_off_t filepos, - ulonglong* dirty_part_map) +static SIMPLE_KEY_CACHE_CB +*get_key_cache_partition_for_write(PARTITIONED_KEY_CACHE_CB *keycache, + File file, my_off_t filepos, + ulonglong* dirty_part_map) { uint i= KEYCACHE_BASE_EXPR( file, filepos) % keycache->partitions; *dirty_part_map|= 1<partitions; - int blocks= -1; - DBUG_ENTER("p_init_key_cache"); + int blocks= 0; + DBUG_ENTER("partitioned_init_key_cache"); keycache->key_cache_block_size = key_cache_block_size; @@ -5173,9 +5166,9 @@ int p_init_key_cache(void *keycache_cb, uint key_cache_block_size, else { if(!(partition_ptr= - (S_KEY_CACHE_CB **) my_malloc(sizeof(S_KEY_CACHE_CB *) * partitions, - MYF(0)))) - DBUG_RETURN(blocks); + (SIMPLE_KEY_CACHE_CB **) my_malloc(sizeof(SIMPLE_KEY_CACHE_CB *) * + partitions, MYF(MY_WME)))) + DBUG_RETURN(-1); keycache->partition_array= partition_ptr; } @@ -5188,36 +5181,35 @@ int p_init_key_cache(void *keycache_cb, uint key_cache_block_size, partition= *partition_ptr; else { - if (!(partition= (S_KEY_CACHE_CB *) my_malloc(sizeof(S_KEY_CACHE_CB), - MYF(0)))) + if (!(partition= + (SIMPLE_KEY_CACHE_CB *) my_malloc(sizeof(SIMPLE_KEY_CACHE_CB), + MYF(MY_WME)))) continue; partition->key_cache_inited= 0; } - if ((cnt= s_init_key_cache(partition, - key_cache_block_size, mem_per_cache, - division_limit, age_threshold)) <= 0) + if ((cnt= init_simple_key_cache(partition, + key_cache_block_size, mem_per_cache, + division_limit, age_threshold)) <= 0) { - s_end_key_cache(partition, 1); - my_free((uchar *) partition, MYF(0)); + end_simple_key_cache(partition, 1); + my_free(partition, MYF(0)); partition= 0; if (key_cache_inited) { memmove(partition_ptr, partition_ptr+1, sizeof(partition_ptr)*(partitions-i-1)); } + if (!--partitions) + break; if (i == 0) { i--; - partitions--; - if (partitions) - mem_per_cache = use_mem / partitions; + mem_per_cache = use_mem / partitions; + continue; } - continue; } - if (blocks < 0) - blocks= 0; blocks+= cnt; *partition_ptr++= partition; } @@ -5229,6 +5221,9 @@ int p_init_key_cache(void *keycache_cb, uint key_cache_block_size, keycache->key_cache_inited= 1; + if (!partitions) + blocks= -1; + DBUG_RETURN(blocks); } @@ -5237,8 +5232,8 @@ int p_init_key_cache(void *keycache_cb, uint key_cache_block_size, Resize a partitioned key cache SYNOPSIS - p_resize_key_cache() - keycache_cb pointer to the control block of a partitioned key cache + resize_partitioned_key_cache() + keycache pointer to the control block of a partitioned key cache key_cache_block_size size of blocks to keep cached data use_mem total memory to use for the new key cache division_limit new division limit (if not zero) @@ -5247,9 +5242,9 @@ int p_init_key_cache(void *keycache_cb, uint key_cache_block_size, DESCRIPTION This function is the implementation of the resize_key_cache interface function that is employed by partitioned key caches. - The function considers the parameter keycache_cb as a pointer to the - control block structure of the type P_KEY_CACHE_CB for the partitioned - key cache to be resized. + The function takes the parameter keycache as a pointer to the + control block structure of the type PARTITIONED_KEY_CACHE_CB for the + partitioned key cache to be resized. The parameter key_cache_block_size specifies the new size of the blocks in the simple key caches that comprise the partitioned key cache. The parameters division_limit and age_threshold determine the new initial @@ -5263,48 +5258,47 @@ int p_init_key_cache(void *keycache_cb, uint key_cache_block_size, 0 - otherwise. NOTES. - The function first calls s_prepare_resize_key_cache for each simple + The function first calls prepare_resize_simple_key_cache for each simple key cache effectively flushing all dirty pages from it and destroying - the key cache. Then p_init_key cache is called. This call builds all - the new array of simple key caches containing the same number of - elements as the old one. After this the function calls the function - s_finish_resize_key_cache for each simple key cache from this array. + the key cache. Then init_partitioned_key_cache is called. This call builds + a new array of simple key caches containing the same number of elements + as the old one. After this the function calls the function + finish_resize_simple_key_cache for each simple key cache from this array. This implementation doesn't block the calls and executions of other functions from the key cache interface. However it assumes that the - calls of s_resize_key_cache itself are serialized. - + calls of resize_partitioned_key_cache itself are serialized. */ static -int p_resize_key_cache(void *keycache_cb, uint key_cache_block_size, - size_t use_mem, uint division_limit, - uint age_threshold) +int resize_partitioned_key_cache(PARTITIONED_KEY_CACHE_CB *keycache, + uint key_cache_block_size, + size_t use_mem, uint division_limit, + uint age_threshold) { uint i; - P_KEY_CACHE_CB *keycache= (P_KEY_CACHE_CB *) keycache_cb; uint partitions= keycache->partitions; my_bool cleanup= use_mem == 0; int blocks= -1; int err= 0; - DBUG_ENTER("p_resize_key_cache"); - if (use_mem == 0) + DBUG_ENTER("partitioned_resize_key_cache"); + if (cleanup) { - p_end_key_cache(keycache_cb, 0); - DBUG_RETURN(blocks); + end_partitioned_key_cache(keycache, 0); + DBUG_RETURN(-1); } for (i= 0; i < partitions; i++) { - err|= s_prepare_resize_key_cache(keycache->partition_array[i], 0, 1); + err|= prepare_resize_simple_key_cache(keycache->partition_array[i], 0, 1); } - if (!err && use_mem) - blocks= p_init_key_cache(keycache_cb, key_cache_block_size, use_mem, - division_limit, age_threshold); - if (blocks > 0 && !cleanup) + if (!err) + blocks= init_partitioned_key_cache(keycache, key_cache_block_size, + use_mem, division_limit, age_threshold); + if (blocks > 0) { for (i= 0; i < partitions; i++) { - s_finish_resize_key_cache(keycache->partition_array[i], 0, 1); + finish_resize_simple_key_cache(keycache->partition_array[i], 0, 1); } } DBUG_RETURN(blocks); @@ -5315,17 +5309,17 @@ int p_resize_key_cache(void *keycache_cb, uint key_cache_block_size, Change key cache parameters of a partitioned key cache SYNOPSIS - p_change_key_cache_param() - keycache_cb pointer to the control block of a partitioned key cache + partitioned_change_key_cache_param() + keycache pointer to the control block of a partitioned key cache division_limit new division limit (if not zero) age_threshold new age threshold (if not zero) DESCRIPTION This function is the implementation of the change_key_cache_param interface function that is employed by partitioned key caches. - The function considers the parameter keycache_cb as a pointer to the - control block structure of the type P_KEY_CACHE_CB for the simple key - cache where new values of the division limit and the age threshold used + The function takes the parameter keycache as a pointer to the + control block structure of the type PARTITIONED_KEY_CACHE_CB for the simple + key cache where new values of the division limit and the age threshold used for midpoint insertion strategy are to be set. The parameters division_limit and age_threshold provide these new values. @@ -5333,23 +5327,22 @@ int p_resize_key_cache(void *keycache_cb, uint key_cache_block_size, none NOTES - The function just calls s_change_key_cache_param for each element from the - array of simple caches that comprise the partitioned key cache. - + The function just calls change_simple_key_cache_param for each element from + the array of simple caches that comprise the partitioned key cache. */ static -void p_change_key_cache_param(void *keycache_cb, uint division_limit, - uint age_threshold) +void change_partitioned_key_cache_param(PARTITIONED_KEY_CACHE_CB *keycache, + uint division_limit, + uint age_threshold) { uint i; - P_KEY_CACHE_CB *keycache= (P_KEY_CACHE_CB *) keycache_cb; uint partitions= keycache->partitions; - DBUG_ENTER("p_change_key_cache_param"); + DBUG_ENTER("partitioned_change_key_cache_param"); for (i= 0; i < partitions; i++) { - s_change_key_cache_param(keycache->partition_array[i], division_limit, - age_threshold); + change_simple_key_cache_param(keycache->partition_array[i], division_limit, + age_threshold); } DBUG_VOID_RETURN; } @@ -5359,17 +5352,17 @@ void p_change_key_cache_param(void *keycache_cb, uint division_limit, Destroy a partitioned key cache SYNOPSIS - p_end_key_cache() - keycache_cb pointer to the control block of a partitioned key cache + end_partitioned_key_cache() + keycache pointer to the control block of a partitioned key cache cleanup <=> complete free (free also control block structures for all simple key caches) DESCRIPTION This function is the implementation of the end_key_cache interface function that is employed by partitioned key caches. - The function considers the parameter keycache_cb as a pointer to the - control block structure of the type P_KEY_CACHE_CB for the partitioned - key cache to be destroyed. + The function takes the parameter keycache as a pointer to the + control block structure of the type PARTITIONED_KEY_CACHE_CB for the + partitioned key cache to be destroyed. The function frees the memory allocated for the cache blocks and auxiliary structures used by simple key caches that comprise the partitioned key cache. If the value of the parameter cleanup is TRUE @@ -5378,23 +5371,23 @@ void p_change_key_cache_param(void *keycache_cb, uint division_limit, RETURN VALUE none - */ static -void p_end_key_cache(void *keycache_cb, my_bool cleanup) +void end_partitioned_key_cache(PARTITIONED_KEY_CACHE_CB *keycache, + my_bool cleanup) { uint i; - P_KEY_CACHE_CB *keycache= (P_KEY_CACHE_CB *) keycache_cb; uint partitions= keycache->partitions; - DBUG_ENTER("p_end_key_cache"); + DBUG_ENTER("partitioned_end_key_cache"); DBUG_PRINT("enter", ("key_cache: 0x%lx", (long) keycache)); for (i= 0; i < partitions; i++) { - s_end_key_cache(keycache->partition_array[i], cleanup); + end_simple_key_cache(keycache->partition_array[i], cleanup); } - if (cleanup) { + if (cleanup) + { for (i= 0; i < partitions; i++) my_free((uchar*) keycache->partition_array[i], MYF(0)); my_free((uchar*) keycache->partition_array, MYF(0)); @@ -5409,8 +5402,8 @@ void p_end_key_cache(void *keycache_cb, my_bool cleanup) SYNOPSIS - p_key_cache_read() - keycache_cb pointer to the control block of a partitioned key cache + partitioned_key_cache_read() + keycache pointer to the control block of a partitioned key cache file handler for the file for the block of data to be read filepos position of the block of data in the file level determines the weight of the data @@ -5422,9 +5415,9 @@ void p_end_key_cache(void *keycache_cb, my_bool cleanup) DESCRIPTION This function is the implementation of the key_cache_read interface function that is employed by partitioned key caches. - The function considers the parameter keycache_cb as a pointer to the - control block structure of the type P_KEY_CACHE_CB for a partitioned - key cache. + The function takes the parameter keycache as a pointer to the + control block structure of the type PARTITIONED_KEY_CACHE_CB for a + partitioned key cache. In a general case the function reads a block of data from the key cache into the buffer buff of the size specified by the parameter length. The beginning of the block of data to be read is specified by the parameters @@ -5432,7 +5425,7 @@ void p_end_key_cache(void *keycache_cb, my_bool cleanup) of the buffer. The data is read into the buffer in key_cache_block_size increments. To read each portion the function first finds out in what partition of the key cache this portion(page) is to be saved, and calls - s_key_cache_read with the pointer to the corresponding simple key as + simple_key_cache_read with the pointer to the corresponding simple key as its first parameter. If the parameter return_buffer is not ignored and its value is TRUE, and the data to be read of the specified size block_length can be read from one @@ -5445,21 +5438,19 @@ void p_end_key_cache(void *keycache_cb, my_bool cleanup) RETURN VALUE Returns address from where the data is placed if successful, 0 - otherwise. - */ static -uchar *p_key_cache_read(void *keycache_cb, - File file, my_off_t filepos, int level, - uchar *buff, uint length, - uint block_length __attribute__((unused)), - int return_buffer __attribute__((unused))) +uchar *partitioned_key_cache_read(PARTITIONED_KEY_CACHE_CB *keycache, + File file, my_off_t filepos, int level, + uchar *buff, uint length, + uint block_length __attribute__((unused)), + int return_buffer __attribute__((unused))) { uint r_length; - P_KEY_CACHE_CB *keycache= (P_KEY_CACHE_CB *) keycache_cb; uint offset= (uint) (filepos % keycache->key_cache_block_size); uchar *start= buff; - DBUG_ENTER("p_key_cache_read"); + DBUG_ENTER("partitioned_key_cache_read"); DBUG_PRINT("enter", ("fd: %u pos: %lu length: %u", (uint) file, (ulong) filepos, length)); @@ -5471,15 +5462,15 @@ uchar *p_key_cache_read(void *keycache_cb, /* Read data in key_cache_block_size increments */ do { - S_KEY_CACHE_CB *partition= get_key_cache_partition(keycache, - file, filepos); + SIMPLE_KEY_CACHE_CB *partition= get_key_cache_partition(keycache, + file, filepos); uchar *ret_buff= 0; r_length= length; set_if_smaller(r_length, keycache->key_cache_block_size - offset); - ret_buff= s_key_cache_read((void *) partition, - file, filepos, level, - buff, r_length, - block_length, return_buffer); + ret_buff= simple_key_cache_read((void *) partition, + file, filepos, level, + buff, r_length, + block_length, return_buffer); if (ret_buff == 0) DBUG_RETURN(0); #ifndef THREAD @@ -5500,8 +5491,8 @@ uchar *p_key_cache_read(void *keycache_cb, Insert a block of file data from a buffer into a partitioned key cache SYNOPSIS - p_key_cache_insert() - keycache_cb pointer to the control block of a partitioned key cache + partitioned_key_cache_insert() + keycache pointer to the control block of a partitioned key cache file handler for the file to insert data from filepos position of the block of data in the file to insert level determines the weight of the data @@ -5511,9 +5502,9 @@ uchar *p_key_cache_read(void *keycache_cb, DESCRIPTION This function is the implementation of the key_cache_insert interface function that is employed by partitioned key caches. - The function considers the parameter keycache_cb as a pointer to the - control block structure of the type P_KEY_CACHE_CB for a partitioned key - cache. + The function takes the parameter keycache as a pointer to the + control block structure of the type PARTITIONED_KEY_CACHE_CB for a + partitioned key cache. The function writes a block of file data from a buffer into the key cache. The buffer is specified with the parameters buff and length - the pointer to the beginning of the buffer and its size respectively. It's assumed @@ -5521,8 +5512,8 @@ uchar *p_key_cache_read(void *keycache_cb, filepos. The data is copied from the buffer in key_cache_block_size increments. For every portion of data the function finds out in what simple key cache from the array of partitions the data must be stored, and after - this calls s_key_cache_insert to copy the data into a key buffer of this - simple key cache. + this calls simple_key_cache_insert to copy the data into a key buffer of + this simple key cache. The parameter level is used to set one characteristic for the key buffers loaded with the data from buff. The characteristic is used only by the midpoint insertion strategy. @@ -5534,18 +5525,16 @@ uchar *p_key_cache_read(void *keycache_cb, The function is used by MyISAM to move all blocks from a index file to the key cache. It can be performed in parallel with reading the file data from the key buffers by other threads. - */ static -int p_key_cache_insert(void *keycache_cb, - File file, my_off_t filepos, int level, - uchar *buff, uint length) +int partitioned_key_cache_insert(PARTITIONED_KEY_CACHE_CB *keycache, + File file, my_off_t filepos, int level, + uchar *buff, uint length) { uint w_length; - P_KEY_CACHE_CB *keycache= (P_KEY_CACHE_CB *) keycache_cb; uint offset= (uint) (filepos % keycache->key_cache_block_size); - DBUG_ENTER("p_key_cache_insert"); + DBUG_ENTER("partitioned_key_cache_insert"); DBUG_PRINT("enter", ("fd: %u pos: %lu length: %u", (uint) file,(ulong) filepos, length)); @@ -5553,13 +5542,13 @@ int p_key_cache_insert(void *keycache_cb, /* Write data in key_cache_block_size increments */ do { - S_KEY_CACHE_CB *partition= get_key_cache_partition(keycache, - file, filepos); + SIMPLE_KEY_CACHE_CB *partition= get_key_cache_partition(keycache, + file, filepos); w_length= length; - set_if_smaller(w_length, keycache->key_cache_block_size); - if (s_key_cache_insert((void *) partition, - file, filepos, level, - buff, w_length)) + set_if_smaller(w_length, keycache->key_cache_block_size - offset); + if (simple_key_cache_insert((void *) partition, + file, filepos, level, + buff, w_length)) DBUG_RETURN(1); filepos+= w_length; @@ -5576,8 +5565,8 @@ int p_key_cache_insert(void *keycache_cb, SYNOPSIS - p_key_cache_write() - keycache_cb pointer to the control block of a partitioned key cache + partitioned_key_cache_write() + keycache pointer to the control block of a partitioned key cache file handler for the file to write data to filepos position in the file to write data to level determines the weight of the data @@ -5591,9 +5580,9 @@ int p_key_cache_insert(void *keycache_cb, DESCRIPTION This function is the implementation of the key_cache_write interface function that is employed by partitioned key caches. - The function considers the parameter keycache_cb as a pointer to the - control block structure of the type P_KEY_CACHE_CB for a partitioned - key cache. + The function takes the parameter keycache as a pointer to the + control block structure of the type PARTITIONED_KEY_CACHE_CB for a + partitioned key cache. In a general case the function copies data from a buffer into the key cache. The buffer is specified with the parameters buff and length - the pointer to the beginning of the buffer and its size respectively. @@ -5601,8 +5590,8 @@ int p_key_cache_insert(void *keycache_cb, starting from the position filepos. The data is copied from the buffer in key_cache_block_size increments. For every portion of data the function finds out in what simple key cache from the array of partitions - the data must be stored, and after this calls s_key_cache_write to copy - the data into a key buffer of this simple key cache. + the data must be stored, and after this calls simple_key_cache_write to + copy the data into a key buffer of this simple key cache. If the value of the parameter dont_write is FALSE then the function also writes the data into file. The parameter level is used to set one characteristic for the key buffers @@ -5610,7 +5599,7 @@ int p_key_cache_insert(void *keycache_cb, the midpoint insertion strategy. The parameter file_expra provides a pointer to the shared bitmap of the partitions that may contains dirty pages for the file. This bitmap - is used to optimize the function p_flush_key_blocks. + is used to optimize the function flush_partitioned_key_cache_blocks. RETURN VALUE 0 if a success, 1 - otherwise. @@ -5618,22 +5607,20 @@ int p_key_cache_insert(void *keycache_cb, NOTES This implementation exploits the fact that the function is called only when a thread has got an exclusive lock for the key file. - */ static -int p_key_cache_write(void *keycache_cb, - File file, void *file_extra, - my_off_t filepos, int level, - uchar *buff, uint length, - uint block_length __attribute__((unused)), - int dont_write) +int partitioned_key_cache_write(PARTITIONED_KEY_CACHE_CB *keycache, + File file, void *file_extra, + my_off_t filepos, int level, + uchar *buff, uint length, + uint block_length __attribute__((unused)), + int dont_write) { uint w_length; ulonglong *part_map= (ulonglong *) file_extra; - P_KEY_CACHE_CB *keycache= (P_KEY_CACHE_CB *) keycache_cb; uint offset= (uint) (filepos % keycache->key_cache_block_size); - DBUG_ENTER("p_key_cache_write"); + DBUG_ENTER("partitioned_key_cache_write"); DBUG_PRINT("enter", ("fd: %u pos: %lu length: %u block_length: %u" " key_block_length: %u", @@ -5644,15 +5631,16 @@ int p_key_cache_write(void *keycache_cb, /* Write data in key_cache_block_size increments */ do { - S_KEY_CACHE_CB *partition= get_key_cache_partition_for_write(keycache, - file, filepos, - part_map); + SIMPLE_KEY_CACHE_CB *partition= get_key_cache_partition_for_write(keycache, + file, + filepos, + part_map); w_length = length; - set_if_smaller(w_length, keycache->key_cache_block_size ); - if (s_key_cache_write(partition, - file, 0, filepos, level, - buff, w_length, block_length, - dont_write)) + set_if_smaller(w_length, keycache->key_cache_block_size - offset ); + if (simple_key_cache_write(partition, + file, 0, filepos, level, + buff, w_length, block_length, + dont_write)) DBUG_RETURN(1); filepos+= w_length; @@ -5669,8 +5657,8 @@ int p_key_cache_write(void *keycache_cb, SYNOPSIS - p_flush_key_blocks() - keycache_cb pointer to the control block of a partitioned key cache + flush_partitioned_key_cache_blocks() + keycache pointer to the control block of a partitioned key cache file handler for the file to flush to file_extra maps of key cache partitions containing dirty pages from file (not used) @@ -5679,9 +5667,9 @@ int p_key_cache_write(void *keycache_cb, DESCRIPTION This function is the implementation of the flush_key_blocks interface function that is employed by partitioned key caches. - The function considers the parameter keycache_cb as a pointer to the - control block structure of the type P_KEY_CACHE_CB for a partitioned - key cache. + The function takes the parameter keycache as a pointer to the + control block structure of the type PARTITIONED_KEY_CACHE_CB for a + partitioned key cache. In a general case the function flushes the data from all dirty key buffers related to the file 'file' into this file. The function does exactly this if the value of the parameter type is FLUSH_KEEP. If the @@ -5689,12 +5677,12 @@ int p_key_cache_write(void *keycache_cb, releases the key buffers containing data from 'file' for new usage. If the value of the parameter type is FLUSH_IGNORE_CHANGED the function just releases the key buffers containing data from 'file'. - The function performs the operation by calling s_flush_key_blocks - for the elements of the array of the simple key caches that comprise - the partitioned key_cache. If the value of the parameter type is - FLUSH_KEEP s_flush_key_blocks is called only for the partitions with - possibly dirty pages marked in the bitmap pointed to by the parameter - file_extra. + The function performs the operation by calling the function + flush_simple_key_cache_blocks for the elements of the array of the + simple key caches that comprise the partitioned key_cache. If the value + of the parameter type is FLUSH_KEEP s_flush_key_blocks is called only + for the partitions with possibly dirty pages marked in the bitmap + pointed to by the parameter file_extra. RETURN 0 ok @@ -5703,35 +5691,30 @@ int p_key_cache_write(void *keycache_cb, NOTES This implementation exploits the fact that the function is called only when a thread has got an exclusive lock for the key file. - */ static -int p_flush_key_blocks(void *keycache_cb, - File file, void *file_extra, - enum flush_type type) +int flush_partitioned_key_cache_blocks(PARTITIONED_KEY_CACHE_CB *keycache, + File file, void *file_extra, + enum flush_type type) { uint i; - P_KEY_CACHE_CB *keycache= (P_KEY_CACHE_CB *) keycache_cb; uint partitions= keycache->partitions; int err= 0; ulonglong *dirty_part_map= (ulonglong *) file_extra; - DBUG_ENTER("p_flush_key_blocks"); + DBUG_ENTER("partitioned_flush_key_blocks"); DBUG_PRINT("enter", ("keycache: 0x%lx", (long) keycache)); for (i= 0; i < partitions; i++) { - S_KEY_CACHE_CB *partition= keycache->partition_array[i]; + SIMPLE_KEY_CACHE_CB *partition= keycache->partition_array[i]; if ((type == FLUSH_KEEP || type == FLUSH_FORCE_WRITE) && - !((*dirty_part_map) & (1< 0) - err= 1; - DBUG_RETURN(err); } @@ -5740,38 +5723,36 @@ int p_flush_key_blocks(void *keycache_cb, Reset the counters of a partitioned key cache SYNOPSIS - p_reset_key_cache_counters() + reset_partitioned_key_cache_counters() name the name of a key cache - keycache_cb pointer to the control block of a partitioned key cache + keycache pointer to the control block of a partitioned key cache DESCRIPTION This function is the implementation of the reset_key_cache_counters interface function that is employed by partitioned key caches. - The function considers the parameter keycache_cb as a pointer to the - control block structure of the type P_KEY_CACHE_CB for a partitioned + The function takes the parameter keycache as a pointer to the + control block structure of the type PARTITIONED_KEY_CACHE_CB for a partitioned key cache. This function resets the values of the statistical counters of the simple key caches comprising partitioned key cache to 0. It does it by calling - s_reset_key_cache_counters for each key cache partition. + reset_simple_key_cache_counters for each key cache partition. The parameter name is currently not used. RETURN 0 on success (always because it can't fail) - */ -static -int p_reset_key_cache_counters(const char *name __attribute__((unused)), - void *keycache_cb) +static int +reset_partitioned_key_cache_counters(const char *name __attribute__((unused)), + PARTITIONED_KEY_CACHE_CB *keycache) { uint i; - P_KEY_CACHE_CB *keycache= (P_KEY_CACHE_CB *) keycache_cb; uint partitions= keycache->partitions; - DBUG_ENTER("p_reset_key_cache_counters"); + DBUG_ENTER("partitioned_reset_key_cache_counters"); for (i = 0; i < partitions; i++) { - s_reset_key_cache_counters(name, keycache->partition_array[i]); + reset_simple_key_cache_counters(name, keycache->partition_array[i]); } DBUG_RETURN(0); } @@ -5781,17 +5762,17 @@ int p_reset_key_cache_counters(const char *name __attribute__((unused)), Get statistics for a partition key cache SYNOPSIS - p_get_key_cache_statistics() - keycache_cb pointer to the control block of a partitioned key cache + get_partitioned_key_cache_statistics() + keycache pointer to the control block of a partitioned key cache partition_no partition number to get statistics for key_cache_stats OUT pointer to the structure for the returned statistics DESCRIPTION This function is the implementation of the get_key_cache_statistics interface function that is employed by partitioned key caches. - The function considers the parameter keycache_cb as a pointer to the - control block structure of the type P_KEY_CACHE_CB for a partitioned - key cache. + The function takes the parameter keycache as a pointer to the + control block structure of the type PARTITIONED_KEY_CACHE_CB for + a partitioned key cache. If the value of the parameter partition_no is equal to 0 then aggregated statistics for all partitions is returned in the fields of the structure key_cache_stat of the type KEY_CACHE_STATISTICS . Otherwise @@ -5801,37 +5782,38 @@ int p_reset_key_cache_counters(const char *name __attribute__((unused)), RETURN none - */ static -void p_get_key_cache_statistics(void *keycache_cb, uint partition_no, - KEY_CACHE_STATISTICS *key_cache_stats) +void +get_partitioned_key_cache_statistics(PARTITIONED_KEY_CACHE_CB *keycache, + uint partition_no, + KEY_CACHE_STATISTICS *keycache_stats) { uint i; - S_KEY_CACHE_CB *partition; - P_KEY_CACHE_CB *keycache= (P_KEY_CACHE_CB *) keycache_cb; + SIMPLE_KEY_CACHE_CB *partition; uint partitions= keycache->partitions; - DBUG_ENTER("p_get_key_cache_statistics_"); + DBUG_ENTER("get_partitioned_key_cache_statistics"); if (partition_no != 0) { partition= keycache->partition_array[partition_no-1]; - s_get_key_cache_statistics((void *) partition, 0, key_cache_stats); + get_simple_key_cache_statistics((void *) partition, 0, keycache_stats); DBUG_VOID_RETURN; } - key_cache_stats->mem_size= (longlong) keycache->key_cache_mem_size; - key_cache_stats->block_size= (longlong) keycache->key_cache_block_size; + bzero(keycache_stats, sizeof(KEY_CACHE_STATISTICS)); + keycache_stats->mem_size= (longlong) keycache->key_cache_mem_size; + keycache_stats->block_size= (longlong) keycache->key_cache_block_size; for (i = 0; i < partitions; i++) { partition= keycache->partition_array[i]; - key_cache_stats->blocks_used+= partition->blocks_used; - key_cache_stats->blocks_unused+= partition->blocks_unused; - key_cache_stats->blocks_changed+= partition->global_blocks_changed; - key_cache_stats->read_requests+= partition->global_cache_r_requests; - key_cache_stats->reads+= partition->global_cache_read; - key_cache_stats->write_requests+= partition->global_cache_w_requests; - key_cache_stats->writes+= partition->global_cache_write; + keycache_stats->blocks_used+= partition->blocks_used; + keycache_stats->blocks_unused+= partition->blocks_unused; + keycache_stats->blocks_changed+= partition->global_blocks_changed; + keycache_stats->read_requests+= partition->global_cache_r_requests; + keycache_stats->reads+= partition->global_cache_read; + keycache_stats->write_requests+= partition->global_cache_w_requests; + keycache_stats->writes+= partition->global_cache_write; } DBUG_VOID_RETURN; } @@ -5840,16 +5822,16 @@ void p_get_key_cache_statistics(void *keycache_cb, uint partition_no, Get the value of a statistical variable for a partitioned key cache SYNOPSIS - p_get_key_cache_stat_value() - keycache_cb pointer to the control block of a partitioned key cache + get_partitioned_key_cache_stat_value() + keycache pointer to the control block of a partitioned key cache var_no the ordered number of a statistical variable DESCRIPTION This function is the implementation of the get_key_cache_stat_value interface function that is employed by partitioned key caches. - The function considers the parameter keycache_cb as a pointer to the - control block structure of the type P_KEY_CACHE_CB for a partitioned - key cache. + The function takes the parameter keycache as a pointer to the + control block structure of the type PARTITIONED_KEY_CACHE_CB for a + partitioned key cache. This function returns the value of the statistical variable var_no for this key cache. The variables are numbered starting from 0 to 6. The returned value is calculated as the sum of the values of the @@ -5858,24 +5840,24 @@ void p_get_key_cache_statistics(void *keycache_cb, uint partition_no, RETURN The value of the specified statistical variable - */ static -ulonglong p_get_key_cache_stat_value(void *keycache_cb, uint var_no) +ulonglong +get_partitioned_key_cache_stat_value(PARTITIONED_KEY_CACHE_CB *keycache, + uint var_no) { uint i; - P_KEY_CACHE_CB *keycache= (P_KEY_CACHE_CB *) keycache_cb; uint partitions= keycache->partitions; - size_t var_ofs= s_key_cache_stat_var_offsets[var_no]; + size_t var_ofs= simple_key_cache_stat_var_offsets[var_no]; ulonglong res= 0; - DBUG_ENTER("p_get_key_cache_stat_value"); + DBUG_ENTER("get_partitioned_key_cache_stat_value"); - if (var_no < 3) + if (var_no < NO_LONG_KEY_CACHE_STAT_VARIABLES) { for (i = 0; i < partitions; i++) { - S_KEY_CACHE_CB *partition= keycache->partition_array[i]; + SIMPLE_KEY_CACHE_CB *partition= keycache->partition_array[i]; res+= (ulonglong) (*(long *) ((char *) partition + var_ofs)); } } @@ -5883,7 +5865,7 @@ ulonglong p_get_key_cache_stat_value(void *keycache_cb, uint var_no) { for (i = 0; i < partitions; i++) { - S_KEY_CACHE_CB *partition= keycache->partition_array[i]; + SIMPLE_KEY_CACHE_CB *partition= keycache->partition_array[i]; res+= *(ulonglong *) ((char *) partition + var_ofs); } } @@ -5901,19 +5883,19 @@ ulonglong p_get_key_cache_stat_value(void *keycache_cb, uint var_no) wrappers must be used for this purpose. */ -static KEY_CACHE_FUNCS p_key_cache_funcs = +static KEY_CACHE_FUNCS partitioned_key_cache_funcs = { - p_init_key_cache, - p_resize_key_cache, - p_change_key_cache_param, - p_key_cache_read, - p_key_cache_insert, - p_key_cache_write, - p_flush_key_blocks, - p_reset_key_cache_counters, - p_end_key_cache, - p_get_key_cache_statistics, - p_get_key_cache_stat_value + (INIT_KEY_CACHE) init_partitioned_key_cache, + (RESIZE_KEY_CACHE) resize_partitioned_key_cache, + (CHANGE_KEY_CACHE_PARAM) change_partitioned_key_cache_param, + (KEY_CACHE_READ) partitioned_key_cache_read, + (KEY_CACHE_INSERT) partitioned_key_cache_insert, + (KEY_CACHE_WRITE) partitioned_key_cache_write, + (FLUSH_KEY_BLOCKS) flush_partitioned_key_cache_blocks, + (RESET_KEY_CACHE_COUNTERS) reset_partitioned_key_cache_counters, + (END_KEY_CACHE) end_partitioned_key_cache, + (GET_KEY_CACHE_STATISTICS) get_partitioned_key_cache_statistics, + (GET_KEY_CACHE_STAT_VALUE) get_partitioned_key_cache_stat_value }; @@ -5926,12 +5908,12 @@ static KEY_CACHE_FUNCS p_key_cache_funcs = partitioned key caches. Each type (class) has its own implementation of the basic key cache operations used the MyISAM storage engine. The pointers to the implementation functions are stored in two static structures of the - type KEY_CACHE_FUNC: s_key_cache_funcs - for simple key caches, and - p_key_cache_funcs - for partitioned key caches. When a key cache object is - created the constructor procedure init_key_cache places a pointer to the - corresponding table into one of its fields. The procedure also initializes - a control block for the key cache oject and saves the pointer to this - block in another field of the key cache object. + type KEY_CACHE_FUNC: simple_key_cache_funcs - for simple key caches, and + partitioned_key_cache_funcs - for partitioned key caches. When a key cache + object is created the constructor procedure init_key_cache places a pointer + to the corresponding table into one of its fields. The procedure also + initializes a control block for the key cache oject and saves the pointer + to this block in another field of the key cache object. When a key cache wrapper function is invoked for a key cache object to perform a basic key cache operation it looks into the interface table associated with the key cache oject and calls the corresponding @@ -5982,7 +5964,6 @@ static KEY_CACHE_FUNCS p_key_cache_funcs = It's assumed that no two threads call this function simultaneously referring to the same key cache handle. - */ int init_key_cache(KEY_CACHE *keycache, uint key_cache_block_size, @@ -5997,19 +5978,21 @@ int init_key_cache(KEY_CACHE *keycache, uint key_cache_block_size, { if (partitions == 0) { - if (!(keycache_cb= (void *) my_malloc(sizeof(S_KEY_CACHE_CB), MYF(0)))) + if (!(keycache_cb= (void *) my_malloc(sizeof(SIMPLE_KEY_CACHE_CB), + MYF(0)))) return 0; - ((S_KEY_CACHE_CB *) keycache_cb)->key_cache_inited= 0; + ((SIMPLE_KEY_CACHE_CB *) keycache_cb)->key_cache_inited= 0; keycache->key_cache_type= SIMPLE_KEY_CACHE; - keycache->interface_funcs= &s_key_cache_funcs; + keycache->interface_funcs= &simple_key_cache_funcs; } else { - if (!(keycache_cb= (void *) my_malloc(sizeof(P_KEY_CACHE_CB), MYF(0)))) + if (!(keycache_cb= (void *) my_malloc(sizeof(PARTITIONED_KEY_CACHE_CB), + MYF(0)))) return 0; - ((P_KEY_CACHE_CB *) keycache_cb)->key_cache_inited= 0; + ((PARTITIONED_KEY_CACHE_CB *) keycache_cb)->key_cache_inited= 0; keycache->key_cache_type= PARTITIONED_KEY_CACHE; - keycache->interface_funcs= &p_key_cache_funcs; + keycache->interface_funcs= &partitioned_key_cache_funcs; } keycache->keycache_cb= keycache_cb; keycache->key_cache_inited= 1; @@ -6017,14 +6000,15 @@ int init_key_cache(KEY_CACHE *keycache, uint key_cache_block_size, if (partitions != 0) { - ((P_KEY_CACHE_CB *) keycache_cb)->partitions= partitions; + ((PARTITIONED_KEY_CACHE_CB *) keycache_cb)->partitions= partitions; } keycache->can_be_used= 0; blocks= keycache->interface_funcs->init(keycache_cb, key_cache_block_size, use_mem, division_limit, age_threshold); keycache->partitions= partitions ? - ((P_KEY_CACHE_CB *) keycache_cb)->partitions : 0; + ((PARTITIONED_KEY_CACHE_CB *) keycache_cb)->partitions : + 0; DBUG_ASSERT(partitions <= MAX_KEY_CACHE_PARTITIONS); if (blocks > 0) keycache->can_be_used= 1; @@ -6037,7 +6021,7 @@ int init_key_cache(KEY_CACHE *keycache, uint key_cache_block_size, SYNOPSIS resize_key_cache() - keycache pointer to the key cache to be resized + keycache pointer to the key cache to be resized key_cache_block_size size of blocks to keep cached data use_mem total memory to use for the new key cache division_limit new division limit (if not zero) @@ -6064,7 +6048,6 @@ int init_key_cache(KEY_CACHE *keycache, uint key_cache_block_size, Currently the function is called when the values of the variables key_buffer_size and/or key_cache_block_size are being reset for the key cache keycache. - */ int resize_key_cache(KEY_CACHE *keycache, uint key_cache_block_size, @@ -6074,10 +6057,10 @@ int resize_key_cache(KEY_CACHE *keycache, uint key_cache_block_size, if (keycache->key_cache_inited) { if ((uint) keycache->param_partitions != keycache->partitions && use_mem) - blocks= repartition_key_cache (keycache, - key_cache_block_size, use_mem, - division_limit, age_threshold, - (uint) keycache->param_partitions); + blocks= repartition_key_cache(keycache, + key_cache_block_size, use_mem, + division_limit, age_threshold, + (uint) keycache->param_partitions); else { blocks= keycache->interface_funcs->resize(keycache->keycache_cb, @@ -6087,10 +6070,10 @@ int resize_key_cache(KEY_CACHE *keycache, uint key_cache_block_size, if (keycache->partitions) keycache->partitions= - ((P_KEY_CACHE_CB *)(keycache->keycache_cb))->partitions; + ((PARTITIONED_KEY_CACHE_CB *)(keycache->keycache_cb))->partitions; } - if (blocks <= 0) - keycache->can_be_used= 0; + + keycache->can_be_used= (blocks >= 0); } return blocks; } @@ -6117,7 +6100,6 @@ int resize_key_cache(KEY_CACHE *keycache, uint key_cache_block_size, Currently the function is called when the values of the variables key_cache_division_limit and/or key_cache_age_threshold are being reset for the key cache keycache. - */ void change_key_cache_param(KEY_CACHE *keycache, uint division_limit, @@ -6262,7 +6244,6 @@ uchar *key_cache_read(KEY_CACHE *keycache, the key cache. It is assumed that it may be performed in parallel with reading the file data from the key buffers by other threads. - */ int key_cache_insert(KEY_CACHE *keycache, @@ -6316,7 +6297,6 @@ int key_cache_insert(KEY_CACHE *keycache, NOTES This implementation may exploit the fact that the function is called only when a thread has got an exclusive lock for the key file. - */ int key_cache_write(KEY_CACHE *keycache, @@ -6373,7 +6353,6 @@ int key_cache_write(KEY_CACHE *keycache, NOTES Any implementation of the function may exploit the fact that the function is called only when a thread has got an exclusive lock for the key file. - */ int flush_key_blocks(KEY_CACHE *keycache, @@ -6406,7 +6385,6 @@ int flush_key_blocks(KEY_CACHE *keycache, NOTES This procedure is used by process_key_caches() to reset the counters of all currently used key caches, both the default one and the named ones. - */ int reset_key_cache_counters(const char *name __attribute__((unused)), @@ -6441,13 +6419,11 @@ int reset_key_cache_counters(const char *name __attribute__((unused)), RETURN none - */ void get_key_cache_statistics(KEY_CACHE *keycache, uint partition_no, KEY_CACHE_STATISTICS *key_cache_stats) { - bzero(key_cache_stats, sizeof(KEY_CACHE_STATISTICS)); if (keycache->key_cache_inited) { keycache->interface_funcs->get_stats(keycache->keycache_cb, @@ -6484,7 +6460,6 @@ void get_key_cache_statistics(KEY_CACHE *keycache, uint partition_no, reads 4 write_requests 5 writes 6 - */ ulonglong get_key_cache_stat_value(KEY_CACHE *keycache, uint var_no) @@ -6534,7 +6509,6 @@ ulonglong get_key_cache_stat_value(KEY_CACHE *keycache, uint var_no) Currently the function is called when the value of the variable key_cache_partitions is being reset for the key cache keycache. - */ int repartition_key_cache(KEY_CACHE *keycache, uint key_cache_block_size, -- cgit v1.2.1 From a37899a81a57004612044477348cbf1707b38ba4 Mon Sep 17 00:00:00 2001 From: Igor Babaev Date: Mon, 5 Apr 2010 19:08:10 -0700 Subject: Post review fixes. --- mysys/mf_keycache.c | 73 ++++++++++++++++++++++++++++++++++++++++------------- 1 file changed, 55 insertions(+), 18 deletions(-) (limited to 'mysys') diff --git a/mysys/mf_keycache.c b/mysys/mf_keycache.c index 4ee7b0e9ccf..9ced597b44a 100644 --- a/mysys/mf_keycache.c +++ b/mysys/mf_keycache.c @@ -5158,6 +5158,7 @@ int init_partitioned_key_cache(PARTITIONED_KEY_CACHE_CB *keycache, { int i; size_t mem_per_cache; + size_t mem_decr; int cnt; SIMPLE_KEY_CACHE_CB *partition; SIMPLE_KEY_CACHE_CB **partition_ptr; @@ -5175,9 +5176,11 @@ int init_partitioned_key_cache(PARTITIONED_KEY_CACHE_CB *keycache, (SIMPLE_KEY_CACHE_CB **) my_malloc(sizeof(SIMPLE_KEY_CACHE_CB *) * partitions, MYF(MY_WME)))) DBUG_RETURN(-1); + bzero(partition_ptr, sizeof(SIMPLE_KEY_CACHE_CB *) * partitions); keycache->partition_array= partition_ptr; } + mem_decr= mem_per_cache / 5; mem_per_cache = use_mem / partitions; for (i= 0; i < (int) partitions; i++) @@ -5194,30 +5197,64 @@ int init_partitioned_key_cache(PARTITIONED_KEY_CACHE_CB *keycache, partition->key_cache_inited= 0; } - if ((cnt= init_simple_key_cache(partition, - key_cache_block_size, mem_per_cache, - division_limit, age_threshold)) <= 0) + cnt= init_simple_key_cache(partition, key_cache_block_size, mem_per_cache, + division_limit, age_threshold); + if (cnt <= 0) { end_simple_key_cache(partition, 1); - my_free(partition, MYF(0)); - partition= 0; - if (key_cache_inited) + if (!key_cache_inited) { - memmove(partition_ptr, partition_ptr+1, - sizeof(partition_ptr)*(partitions-i-1)); + my_free(partition, MYF(0)); + partition= 0; } - if (!--partitions) - break; - if (i == 0) + if (i == 0 && cnt < 0 || i > 0) { - i--; - mem_per_cache = use_mem / partitions; - continue; + /* + Here we have two cases: + 1. i == 0 and cnt < 0 + cnt < 0 => mem_per_cache is not big enough to allocate minimal + number of key blocks in the key cache of the partition. + Decrease the the number of the partitions by 1 and start again. + 2. i > 0 + There is not enough memory for one of the succeeding partitions. + Just skip this partition decreasing the number of partitions in + the key cache by one. + Do not change the value of mem_per_cache in both cases. + */ + if (key_cache_inited) + { + my_free(partition, MYF(0)); + partition= 0; + if(key_cache_inited) + memmove(partition_ptr, partition_ptr+1, + sizeof(partition_ptr)*(partitions-i-1)); + } + if (!--partitions) + break; } + else + { + /* + We come here when i == 0 && cnt == 0. + cnt == 0 => the memory allocator fails to allocate a block of + memory of the size mem_per_cache. Decrease the value of + mem_per_cache without changing the current number of partitions + and start again. Make sure that such a decrease may happen not + more than 5 times in total. + */ + if (use_mem <= mem_decr) + break; + use_mem-= mem_decr; + } + i--; + mem_per_cache= use_mem/partitions; + continue; + } + else + { + blocks+= cnt; + *partition_ptr++= partition; } - - blocks+= cnt; - *partition_ptr++= partition; } keycache->partitions= partitions= partition_ptr-keycache->partition_array; @@ -5859,7 +5896,7 @@ get_partitioned_key_cache_stat_value(PARTITIONED_KEY_CACHE_CB *keycache, ulonglong res= 0; DBUG_ENTER("get_partitioned_key_cache_stat_value"); - if (var_no < NO_LONG_KEY_CACHE_STAT_VARIABLES) + if (var_no < NUM_LONG_KEY_CACHE_STAT_VARIABLES) { for (i = 0; i < partitions; i++) { -- cgit v1.2.1 From d04d8aa6bbb05d4bb3fceafbab723649c8d650ed Mon Sep 17 00:00:00 2001 From: Sergei Golubchik Date: Tue, 6 Apr 2010 13:02:24 +0200 Subject: fix a warning --- mysys/mf_keycache.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'mysys') diff --git a/mysys/mf_keycache.c b/mysys/mf_keycache.c index 9ced597b44a..253707853fd 100644 --- a/mysys/mf_keycache.c +++ b/mysys/mf_keycache.c @@ -5207,7 +5207,7 @@ int init_partitioned_key_cache(PARTITIONED_KEY_CACHE_CB *keycache, my_free(partition, MYF(0)); partition= 0; } - if (i == 0 && cnt < 0 || i > 0) + if ((i == 0 && cnt < 0) || i > 0) { /* Here we have two cases: -- cgit v1.2.1 From 9e23495b1dc988526776768f50a30f647d793d14 Mon Sep 17 00:00:00 2001 From: Michael Widenius Date: Wed, 26 May 2010 22:27:01 +0300 Subject: Added macros to inform valgrind that memory is uninitialized include/Makefile.am: Make my_valgrind.h global (as it's used by my_global.h) include/my_global.h: Include my_valgrind.h include/my_sys.h: TRASH() moved to my_valgrind.h include/my_valgrind.h: Add VALGRIND defines to mark memory mysys/my_alloc.c: Mark internal freed memory as undefined --- mysys/my_alloc.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'mysys') diff --git a/mysys/my_alloc.c b/mysys/my_alloc.c index 32fc75fc692..8d3bc0c3fb2 100644 --- a/mysys/my_alloc.c +++ b/mysys/my_alloc.c @@ -21,7 +21,6 @@ #undef EXTRA_DEBUG #define EXTRA_DEBUG - /* Initialize memory root @@ -276,7 +275,7 @@ void *multi_alloc_root(MEM_ROOT *root, ...) DBUG_RETURN((void*) start); } -#define TRASH_MEM(X) TRASH(((char*)(X) + ((X)->size-(X)->left)), (X)->left) +#define TRASH_MEM(X) VALGRIND_MAKE_MEM_UNDEFINED(((char*)(X) + ((X)->size-(X)->left)), (X)->left) /* Mark all data in blocks free for reusage */ -- cgit v1.2.1 From 91575d1fae252efec924ea03619141b742b1075a Mon Sep 17 00:00:00 2001 From: Sergei Golubchik Date: Thu, 27 May 2010 23:30:37 +0200 Subject: first initialize variable, then use it, not the other way around --- mysys/mf_keycache.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'mysys') diff --git a/mysys/mf_keycache.c b/mysys/mf_keycache.c index 253707853fd..4e22b3f65a2 100644 --- a/mysys/mf_keycache.c +++ b/mysys/mf_keycache.c @@ -5180,8 +5180,8 @@ int init_partitioned_key_cache(PARTITIONED_KEY_CACHE_CB *keycache, keycache->partition_array= partition_ptr; } - mem_decr= mem_per_cache / 5; mem_per_cache = use_mem / partitions; + mem_decr= mem_per_cache / 5; for (i= 0; i < (int) partitions; i++) { -- cgit v1.2.1 From 5c4e95f83b16caa75e2e93d1b2c9f39a214684d6 Mon Sep 17 00:00:00 2001 From: Bo Thorsen Date: Fri, 25 Jun 2010 15:09:45 +0200 Subject: Implement an NSIS based installer --- mysys/CMakeLists.txt | 2 ++ 1 file changed, 2 insertions(+) (limited to 'mysys') diff --git a/mysys/CMakeLists.txt b/mysys/CMakeLists.txt index e9873fb0525..bf0c6551a11 100644 --- a/mysys/CMakeLists.txt +++ b/mysys/CMakeLists.txt @@ -49,4 +49,6 @@ SET(MYSYS_SOURCES array.c charset-def.c charset.c checksum.c default.c default_ IF(NOT SOURCE_SUBLIBS) ADD_LIBRARY(mysys ${MYSYS_SOURCES}) + + INSTALL(TARGETS mysys DESTINATION lib/opt COMPONENT runtime) # TODO: Component? ENDIF(NOT SOURCE_SUBLIBS) -- cgit v1.2.1 From ecbcddc03dc298ea1e6c0aa1a120bd0b4b04b3fd Mon Sep 17 00:00:00 2001 From: Michael Widenius Date: Fri, 16 Jul 2010 10:33:01 +0300 Subject: Improved speed of thr_alarm from O(N) to O(1). thr_alarm is used to handle timeouts and kill of connections. Fixed compiler warnings. queues.h and queues.c are now based on the UNIREG code and thus made BSD. Fix code to use new queue() interface. This mostly affects how you access elements in the queue. If USE_NET_CLEAR is not set, don't clear connection from unexpected characters. This should give a speed up when doing a lot of fast queries. Fixed some code in ma_ft_boolean_search.c that had not made it from myisam/ft_boolean_search.c include/queues.h: Use UNIREG code base (BSD) Changed init_queue() to take all initialization arguments. New interface to access elements in queue include/thr_alarm.h: Changed to use time_t instead of ulong (portability) Added index_in_queue, to be able to remove random element from queue in O(1) mysys/queues.c: Use UNIREG code base (BSD) init_queue() and reinit_queue() now takes more initialization arguments. (No need for init_queue_ex() anymore) Now one can tell queue_insert() to store in the element a pointer to where element is in queue. This allows one to remove elements from queue in O(1) instead of O(N) mysys/thr_alarm.c: Use new option in queue() to allow fast removal of elements. Do less inside LOCK_alarm mutex. This should give a major speed up of thr_alarm usage when there is many threads sql/create_options.cc: Fixed wrong printf sql/event_queue.cc: Use new queue interface() sql/filesort.cc: Use new queue interface() sql/ha_partition.cc: Use new queue interface() sql/ha_partition.h: Fixed compiler warning sql/item_cmpfunc.cc: Fixed compiler warning sql/item_subselect.cc: Use new queue interface() Removed not used variable sql/net_serv.cc: If USE_NET_CLEAR is not set, don't clear connection from unexpected characters. This should give a speed up when doing a lot of fast queries at the disadvantage that if there is a bug in the client protocol the connection will be dropped instead of being unnoticed. sql/opt_range.cc: Use new queue interface() Fixed compiler warnings sql/uniques.cc: Use new queue interface() storage/maria/ma_ft_boolean_search.c: Copy code from myisam/ft_boolean_search.c Use new queue interface() storage/maria/ma_ft_nlq_search.c: Use new queue interface() storage/maria/ma_sort.c: Use new queue interface() storage/maria/maria_pack.c: Use new queue interface() Use queue_fix() instead of own loop to fix queue. storage/myisam/ft_boolean_search.c: Use new queue interface() storage/myisam/ft_nlq_search.c: Use new queue interface() storage/myisam/mi_test_all.sh: Remove temporary file from last run storage/myisam/myisampack.c: Use new queue interface() Use queue_fix() instead of own loop to fix queue. storage/myisam/sort.c: Use new queue interface() storage/myisammrg/myrg_queue.c: Use new queue interface() storage/myisammrg/myrg_rnext.c: Use new queue interface() storage/myisammrg/myrg_rnext_same.c: Use new queue interface() storage/myisammrg/myrg_rprev.c: Use new queue interface() --- mysys/queues.c | 625 +++++++++++++----------------------------------------- mysys/thr_alarm.c | 186 +++++++--------- 2 files changed, 223 insertions(+), 588 deletions(-) (limited to 'mysys') diff --git a/mysys/queues.c b/mysys/queues.c index 9c85e493141..a65a7f8914c 100644 --- a/mysys/queues.c +++ b/mysys/queues.c @@ -1,25 +1,42 @@ -/* Copyright (C) 2000, 2005 MySQL AB - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; version 2 of the License. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ +/* Copyright (C) 2010 Monty Program Ab + All Rights reserved + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the following disclaimer + in the documentation and/or other materials provided with the + distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL + BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF + USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT + OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + SUCH DAMAGE. +*/ /* + This code originates from the Unireg project. + Code for generell handling of priority Queues. Implemention of queues from "Algoritms in C" by Robert Sedgewick. - An optimisation of _downheap suggested in Exercise 7.51 in "Data - Structures & Algorithms in C++" by Mark Allen Weiss, Second Edition - was implemented by Mikael Ronstrom 2005. Also the O(N) algorithm - of queue_fix was implemented. + + The queue can optionally store the position in queue in the element + that is in the queue. This allows one to remove any element from the queue + in O(1) time. + + Optimisation of _downheap() and queue_fix() is inspired by code done + by Mikael Ronström, based on an optimisation of _downheap from + Exercise 7.51 in "Data Structures & Algorithms in C++" by Mark Allen + Weiss, Second Edition. */ #include "mysys_priv.h" @@ -39,6 +56,10 @@ max_at_top Set to 1 if you want biggest element on top. compare Compare function for elements, takes 3 arguments. first_cmp_arg First argument to compare function + offset_to_queue_pos If <> 0, then offset+1 in element to store position + in queue (for fast delete of element in queue) + auto_extent When the queue is full and there is insert operation + extend the queue. NOTES Will allocate max_element pointers for queue array @@ -50,74 +71,33 @@ int init_queue(QUEUE *queue, uint max_elements, uint offset_to_key, pbool max_at_top, int (*compare) (void *, uchar *, uchar *), - void *first_cmp_arg) + void *first_cmp_arg, uint offset_to_queue_pos, + uint auto_extent) + { DBUG_ENTER("init_queue"); - if ((queue->root= (uchar **) my_malloc((max_elements+1)*sizeof(void*), + if ((queue->root= (uchar **) my_malloc((max_elements + 1) * sizeof(void*), MYF(MY_WME))) == 0) DBUG_RETURN(1); - queue->elements=0; - queue->compare=compare; - queue->first_cmp_arg=first_cmp_arg; - queue->max_elements=max_elements; - queue->offset_to_key=offset_to_key; + queue->elements= 0; + queue->compare= compare; + queue->first_cmp_arg= first_cmp_arg; + queue->max_elements= max_elements; + queue->offset_to_key= offset_to_key; + queue->offset_to_queue_pos= offset_to_queue_pos; + queue->auto_extent= auto_extent; queue_set_max_at_top(queue, max_at_top); DBUG_RETURN(0); } - -/* - Init queue, uses init_queue internally for init work but also accepts - auto_extent as parameter - - SYNOPSIS - init_queue_ex() - queue Queue to initialise - max_elements Max elements that will be put in queue - offset_to_key Offset to key in element stored in queue - Used when sending pointers to compare function - max_at_top Set to 1 if you want biggest element on top. - compare Compare function for elements, takes 3 arguments. - first_cmp_arg First argument to compare function - auto_extent When the queue is full and there is insert operation - extend the queue. - - NOTES - Will allocate max_element pointers for queue array - - RETURN - 0 ok - 1 Could not allocate memory -*/ - -int init_queue_ex(QUEUE *queue, uint max_elements, uint offset_to_key, - pbool max_at_top, int (*compare) (void *, uchar *, uchar *), - void *first_cmp_arg, uint auto_extent) -{ - int ret; - DBUG_ENTER("init_queue_ex"); - - if ((ret= init_queue(queue, max_elements, offset_to_key, max_at_top, compare, - first_cmp_arg))) - DBUG_RETURN(ret); - - queue->auto_extent= auto_extent; - DBUG_RETURN(0); -} - /* Reinitialize queue for other usage SYNOPSIS reinit_queue() queue Queue to initialise - max_elements Max elements that will be put in queue - offset_to_key Offset to key in element stored in queue - Used when sending pointers to compare function - max_at_top Set to 1 if you want biggest element on top. - compare Compare function for elements, takes 3 arguments. - first_cmp_arg First argument to compare function + For rest of arguments, see init_queue() above NOTES This will delete all elements from the queue. If you don't want this, @@ -125,21 +105,23 @@ int init_queue_ex(QUEUE *queue, uint max_elements, uint offset_to_key, RETURN 0 ok - EE_OUTOFMEMORY Wrong max_elements + 1 Wrong max_elements; Queue has old size */ int reinit_queue(QUEUE *queue, uint max_elements, uint offset_to_key, pbool max_at_top, int (*compare) (void *, uchar *, uchar *), - void *first_cmp_arg) + void *first_cmp_arg, uint offset_to_queue_pos, + uint auto_extent) { DBUG_ENTER("reinit_queue"); - queue->elements=0; - queue->compare=compare; - queue->first_cmp_arg=first_cmp_arg; - queue->offset_to_key=offset_to_key; + queue->elements= 0; + queue->compare= compare; + queue->first_cmp_arg= first_cmp_arg; + queue->offset_to_key= offset_to_key; + queue->offset_to_queue_pos= offset_to_queue_pos; + queue->auto_extent= auto_extent; queue_set_max_at_top(queue, max_at_top); - resize_queue(queue, max_elements); - DBUG_RETURN(0); + DBUG_RETURN(resize_queue(queue, max_elements)); } @@ -167,8 +149,8 @@ int resize_queue(QUEUE *queue, uint max_elements) if (queue->max_elements == max_elements) DBUG_RETURN(0); if ((new_root= (uchar **) my_realloc((void *)queue->root, - (max_elements+1)*sizeof(void*), - MYF(MY_WME))) == 0) + (max_elements + 1)* sizeof(void*), + MYF(MY_WME))) == 0) DBUG_RETURN(1); set_if_smaller(queue->elements, max_elements); queue->max_elements= max_elements; @@ -197,39 +179,58 @@ void delete_queue(QUEUE *queue) if (queue->root) { my_free((uchar*) queue->root,MYF(0)); - queue->root=0; + queue->root=0; /* Allow multiple calls */ } DBUG_VOID_RETURN; } - /* Code for insert, search and delete of elements */ +/* + Insert element in queue + + SYNOPSIS + queue_insert() + queue Queue to use + element Element to insert +*/ void queue_insert(register QUEUE *queue, uchar *element) { reg2 uint idx, next; + uint offset_to_queue_pos= queue->offset_to_queue_pos; DBUG_ASSERT(queue->elements < queue->max_elements); - queue->root[0]= element; + idx= ++queue->elements; /* max_at_top swaps the comparison if we want to order by desc */ - while ((queue->compare(queue->first_cmp_arg, + while (idx > 1 && + (queue->compare(queue->first_cmp_arg, element + queue->offset_to_key, queue->root[(next= idx >> 1)] + queue->offset_to_key) * queue->max_at_top) < 0) { queue->root[idx]= queue->root[next]; + if (offset_to_queue_pos) + (*(uint*) (queue->root[idx] + offset_to_queue_pos-1))= idx; idx= next; } queue->root[idx]= element; + if (offset_to_queue_pos) + (*(uint*) (element+ offset_to_queue_pos-1))= idx; } + /* - Does safe insert. If no more space left on the queue resize it. - Return codes: - 0 - OK - 1 - Cannot allocate more memory - 2 - auto_extend is 0, the operation would - + Like queue_insert, but resize queue if queue is full + + SYNOPSIS + queue_insert_safe() + queue Queue to use + element Element to insert + + RETURN + 0 OK + 1 Cannot allocate more memory + 2 auto_extend is 0; No insertion done */ int queue_insert_safe(register QUEUE *queue, uchar *element) @@ -239,7 +240,7 @@ int queue_insert_safe(register QUEUE *queue, uchar *element) { if (!queue->auto_extent) return 2; - else if (resize_queue(queue, queue->max_elements + queue->auto_extent)) + if (resize_queue(queue, queue->max_elements + queue->auto_extent)) return 1; } @@ -248,40 +249,48 @@ int queue_insert_safe(register QUEUE *queue, uchar *element) } - /* Remove item from queue */ - /* Returns pointer to removed element */ +/* + Remove item from queue + + SYNOPSIS + queue_remove() + queue Queue to use + element Index of element to remove. + First element in queue is 'queue_first_element(queue)' + + RETURN + pointer to removed element +*/ uchar *queue_remove(register QUEUE *queue, uint idx) { uchar *element; - DBUG_ASSERT(idx < queue->max_elements); - element= queue->root[++idx]; /* Intern index starts from 1 */ - queue->root[idx]= queue->root[queue->elements--]; - _downheap(queue, idx); + DBUG_ASSERT(idx >= 1 && idx <= queue->elements); + element= queue->root[idx]; + _downheap(queue, idx, queue->root[queue->elements--]); return element; } - /* Fix when element on top has been replaced */ -#ifndef queue_replaced -void queue_replaced(QUEUE *queue) -{ - _downheap(queue,1); -} -#endif +/* + Add element to fixed position and update heap -#ifndef OLD_VERSION + SYNOPSIS + _downheap() + queue Queue to use + idx Index of element to change + element Element to store at 'idx' +*/ -void _downheap(register QUEUE *queue, uint idx) +void _downheap(register QUEUE *queue, uint start_idx, uchar *element) { - uchar *element; - uint elements,half_queue,offset_to_key, next_index; + uint elements,half_queue,offset_to_key, next_index, offset_to_queue_pos; + register uint idx= start_idx; my_bool first= TRUE; - uint start_idx= idx; offset_to_key=queue->offset_to_key; - element=queue->root[idx]; - half_queue=(elements=queue->elements) >> 1; + offset_to_queue_pos= queue->offset_to_queue_pos; + half_queue= (elements= queue->elements) >> 1; while (idx <= half_queue) { @@ -298,393 +307,49 @@ void _downheap(register QUEUE *queue, uint idx) element+offset_to_key) * queue->max_at_top) >= 0))) { queue->root[idx]= element; + if (offset_to_queue_pos) + (*(uint*) (element + offset_to_queue_pos-1))= idx; return; } - queue->root[idx]=queue->root[next_index]; - idx=next_index; first= FALSE; - } - - next_index= idx >> 1; - while (next_index > start_idx) - { - if ((queue->compare(queue->first_cmp_arg, - queue->root[next_index]+offset_to_key, - element+offset_to_key) * - queue->max_at_top) < 0) - break; - queue->root[idx]=queue->root[next_index]; + queue->root[idx]= queue->root[next_index]; + if (offset_to_queue_pos) + (*(uint*) (queue->root[idx] + offset_to_queue_pos-1))= idx; idx=next_index; - next_index= idx >> 1; } - queue->root[idx]=element; -} -#else /* - The old _downheap version is kept for comparisons with the benchmark - suit or new benchmarks anyone wants to run for comparisons. + Insert the element into the right position. This is the same code + as we have in queue_insert() */ - /* Fix heap when index have changed */ -void _downheap(register QUEUE *queue, uint idx) -{ - uchar *element; - uint elements,half_queue,next_index,offset_to_key; - - offset_to_key=queue->offset_to_key; - element=queue->root[idx]; - half_queue=(elements=queue->elements) >> 1; - - while (idx <= half_queue) + while ((next_index= (idx >> 1)) > start_idx && + queue->compare(queue->first_cmp_arg, + element+offset_to_key, + queue->root[next_index]+offset_to_key)* + queue->max_at_top < 0) { - next_index=idx+idx; - if (next_index < elements && - (queue->compare(queue->first_cmp_arg, - queue->root[next_index]+offset_to_key, - queue->root[next_index+1]+offset_to_key) * - queue->max_at_top) > 0) - next_index++; - if ((queue->compare(queue->first_cmp_arg, - queue->root[next_index]+offset_to_key, - element+offset_to_key) * queue->max_at_top) >= 0) - break; - queue->root[idx]=queue->root[next_index]; - idx=next_index; + queue->root[idx]= queue->root[next_index]; + if (offset_to_queue_pos) + (*(uint*) (queue->root[idx] + offset_to_queue_pos-1))= idx; + idx= next_index; } - queue->root[idx]=element; + queue->root[idx]= element; + if (offset_to_queue_pos) + (*(uint*) (element + offset_to_queue_pos-1))= idx; } -#endif - /* Fix heap when every element was changed. + + SYNOPSIS + queue_fix() + queue Queue to use */ void queue_fix(QUEUE *queue) { uint i; for (i= queue->elements >> 1; i > 0; i--) - _downheap(queue, i); -} - -#ifdef MAIN - /* - A test program for the priority queue implementation. - It can also be used to benchmark changes of the implementation - Build by doing the following in the directory mysys - make test_priority_queue - ./test_priority_queue - - Written by Mikael Ronström, 2005 - */ - -static uint num_array[1025]; -static uint tot_no_parts= 0; -static uint tot_no_loops= 0; -static uint expected_part= 0; -static uint expected_num= 0; -static bool max_ind= 0; -static bool fix_used= 0; -static ulonglong start_time= 0; - -static bool is_divisible_by(uint num, uint divisor) -{ - uint quotient= num / divisor; - if (quotient * divisor == num) - return TRUE; - return FALSE; -} - -void calculate_next() -{ - uint part= expected_part, num= expected_num; - uint no_parts= tot_no_parts; - if (max_ind) - { - do - { - while (++part <= no_parts) - { - if (is_divisible_by(num, part) && - (num <= ((1 << 21) + part))) - { - expected_part= part; - expected_num= num; - return; - } - } - part= 0; - } while (--num); - } - else - { - do - { - while (--part > 0) - { - if (is_divisible_by(num, part)) - { - expected_part= part; - expected_num= num; - return; - } - } - part= no_parts + 1; - } while (++num); - } -} - -void calculate_end_next(uint part) -{ - uint no_parts= tot_no_parts, num; - num_array[part]= 0; - if (max_ind) - { - expected_num= 0; - for (part= no_parts; part > 0 ; part--) - { - if (num_array[part]) - { - num= num_array[part] & 0x3FFFFF; - if (num >= expected_num) - { - expected_num= num; - expected_part= part; - } - } - } - if (expected_num == 0) - expected_part= 0; - } - else - { - expected_num= 0xFFFFFFFF; - for (part= 1; part <= no_parts; part++) - { - if (num_array[part]) - { - num= num_array[part] & 0x3FFFFF; - if (num <= expected_num) - { - expected_num= num; - expected_part= part; - } - } - } - if (expected_num == 0xFFFFFFFF) - expected_part= 0; - } - return; -} -static int test_compare(void *null_arg, uchar *a, uchar *b) -{ - uint a_num= (*(uint*)a) & 0x3FFFFF; - uint b_num= (*(uint*)b) & 0x3FFFFF; - uint a_part, b_part; - if (a_num > b_num) - return +1; - if (a_num < b_num) - return -1; - a_part= (*(uint*)a) >> 22; - b_part= (*(uint*)b) >> 22; - if (a_part < b_part) - return +1; - if (a_part > b_part) - return -1; - return 0; -} - -bool check_num(uint num_part) -{ - uint part= num_part >> 22; - uint num= num_part & 0x3FFFFF; - if (part == expected_part) - if (num == expected_num) - return FALSE; - printf("Expect part %u Expect num 0x%x got part %u num 0x%x max_ind %u fix_used %u \n", - expected_part, expected_num, part, num, max_ind, fix_used); - return TRUE; -} - - -void perform_insert(QUEUE *queue) -{ - uint i= 1, no_parts= tot_no_parts; - uint backward_start= 0; - - expected_part= 1; - expected_num= 1; - - if (max_ind) - backward_start= 1 << 21; - - do - { - uint num= (i + backward_start); - if (max_ind) - { - while (!is_divisible_by(num, i)) - num--; - if (max_ind && (num > expected_num || - (num == expected_num && i < expected_part))) - { - expected_num= num; - expected_part= i; - } - } - num_array[i]= num + (i << 22); - if (fix_used) - queue_element(queue, i-1)= (uchar*)&num_array[i]; - else - queue_insert(queue, (uchar*)&num_array[i]); - } while (++i <= no_parts); - if (fix_used) - { - queue->elements= no_parts; - queue_fix(queue); - } -} - -bool perform_ins_del(QUEUE *queue, bool max_ind) -{ - uint i= 0, no_loops= tot_no_loops, j= tot_no_parts; - do - { - uint num_part= *(uint*)queue_top(queue); - uint part= num_part >> 22; - if (check_num(num_part)) - return TRUE; - if (j++ >= no_loops) - { - calculate_end_next(part); - queue_remove(queue, (uint) 0); - } - else - { - calculate_next(); - if (max_ind) - num_array[part]-= part; - else - num_array[part]+= part; - queue_top(queue)= (uchar*)&num_array[part]; - queue_replaced(queue); - } - } while (++i < no_loops); - return FALSE; -} - -bool do_test(uint no_parts, uint l_max_ind, bool l_fix_used) -{ - QUEUE queue; - bool result; - max_ind= l_max_ind; - fix_used= l_fix_used; - init_queue(&queue, no_parts, 0, max_ind, test_compare, NULL); - tot_no_parts= no_parts; - tot_no_loops= 1024; - perform_insert(&queue); - if ((result= perform_ins_del(&queue, max_ind))) - delete_queue(&queue); - if (result) - { - printf("Error\n"); - return TRUE; - } - return FALSE; -} - -static void start_measurement() -{ - start_time= my_getsystime(); -} - -static void stop_measurement() -{ - ulonglong stop_time= my_getsystime(); - uint time_in_micros; - stop_time-= start_time; - stop_time/= 10; /* Convert to microseconds */ - time_in_micros= (uint)stop_time; - printf("Time expired is %u microseconds \n", time_in_micros); -} - -static void benchmark_test() -{ - QUEUE queue_real; - QUEUE *queue= &queue_real; - uint i, add; - fix_used= TRUE; - max_ind= FALSE; - tot_no_parts= 1024; - init_queue(queue, tot_no_parts, 0, max_ind, test_compare, NULL); - /* - First benchmark whether queue_fix is faster than using queue_insert - for sizes of 16 partitions. - */ - for (tot_no_parts= 2, add=2; tot_no_parts < 128; - tot_no_parts+= add, add++) - { - printf("Start benchmark queue_fix, tot_no_parts= %u \n", tot_no_parts); - start_measurement(); - for (i= 0; i < 128; i++) - { - perform_insert(queue); - queue_remove_all(queue); - } - stop_measurement(); - - fix_used= FALSE; - printf("Start benchmark queue_insert\n"); - start_measurement(); - for (i= 0; i < 128; i++) - { - perform_insert(queue); - queue_remove_all(queue); - } - stop_measurement(); - } - /* - Now benchmark insertion and deletion of 16400 elements. - Used in consecutive runs this shows whether the optimised _downheap - is faster than the standard implementation. - */ - printf("Start benchmarking _downheap \n"); - start_measurement(); - perform_insert(queue); - for (i= 0; i < 65536; i++) - { - uint num, part; - num= *(uint*)queue_top(queue); - num+= 16; - part= num >> 22; - num_array[part]= num; - queue_top(queue)= (uchar*)&num_array[part]; - queue_replaced(queue); - } - for (i= 0; i < 16; i++) - queue_remove(queue, (uint) 0); - queue_remove_all(queue); - stop_measurement(); -} - -int main() -{ - int i, add= 1; - for (i= 1; i < 1024; i+=add, add++) - { - printf("Start test for priority queue of size %u\n", i); - if (do_test(i, 0, 1)) - return -1; - if (do_test(i, 1, 1)) - return -1; - if (do_test(i, 0, 0)) - return -1; - if (do_test(i, 1, 0)) - return -1; - } - benchmark_test(); - printf("OK\n"); - return 0; + _downheap(queue, i, queue_element(queue, i)); } -#endif diff --git a/mysys/thr_alarm.c b/mysys/thr_alarm.c index b710a7eee39..680ec985aeb 100644 --- a/mysys/thr_alarm.c +++ b/mysys/thr_alarm.c @@ -41,6 +41,19 @@ volatile my_bool alarm_thread_running= 0; time_t next_alarm_expire_time= ~ (time_t) 0; static sig_handler process_alarm_part2(int sig); +#ifdef DBUG_OFF +#define reset_index_in_queue(alarm_data) +#else +#define reset_index_in_queue(alarm_data) alarm_data->index_in_queue= 0; +#endif /* DBUG_OFF */ + +#ifndef USE_ONE_SIGNAL_HAND +#define one_signal_hand_sigmask(A,B,C) pthread_sigmask((A), (B), (C)) +#else +#define one_signal_hand_sigmask(A,B,C) +#endif + + #if !defined(__WIN__) static pthread_mutex_t LOCK_alarm; @@ -72,8 +85,8 @@ void init_thr_alarm(uint max_alarms) DBUG_ENTER("init_thr_alarm"); alarm_aborted=0; next_alarm_expire_time= ~ (time_t) 0; - init_queue(&alarm_queue,max_alarms+1,offsetof(ALARM,expire_time),0, - compare_ulong,NullS); + init_queue(&alarm_queue, max_alarms+1, offsetof(ALARM,expire_time), 0, + compare_ulong, NullS, offsetof(ALARM, index_in_queue)+1, 0); sigfillset(&full_signal_set); /* Neaded to block signals */ pthread_mutex_init(&LOCK_alarm,MY_MUTEX_INIT_FAST); pthread_cond_init(&COND_alarm,NULL); @@ -151,7 +164,7 @@ void resize_thr_alarm(uint max_alarms) my_bool thr_alarm(thr_alarm_t *alrm, uint sec, ALARM *alarm_data) { - time_t now; + time_t now, next; #ifndef USE_ONE_SIGNAL_HAND sigset_t old_mask; #endif @@ -161,79 +174,68 @@ my_bool thr_alarm(thr_alarm_t *alrm, uint sec, ALARM *alarm_data) DBUG_PRINT("enter",("thread: %s sec: %d",my_thread_name(),sec)); now= my_time(0); -#ifndef USE_ONE_SIGNAL_HAND - pthread_sigmask(SIG_BLOCK,&full_signal_set,&old_mask); -#endif + if (!alarm_data) + { + if (!(alarm_data=(ALARM*) my_malloc(sizeof(ALARM),MYF(MY_WME)))) + goto abort_no_unlock; + alarm_data->malloced= 1; + } + else + alarm_data->malloced= 0; + next= now + sec; + alarm_data->expire_time= next; + alarm_data->alarmed= 0; + alarm_data->thread= current_my_thread_var->pthread_self; + alarm_data->thread_id= current_my_thread_var->id; + + one_signal_hand_sigmask(SIG_BLOCK,&full_signal_set,&old_mask); pthread_mutex_lock(&LOCK_alarm); /* Lock from threads & alarms */ - if (alarm_aborted > 0) + if (unlikely(alarm_aborted)) { /* No signal thread */ DBUG_PRINT("info", ("alarm aborted")); - *alrm= 0; /* No alarm */ - pthread_mutex_unlock(&LOCK_alarm); -#ifndef USE_ONE_SIGNAL_HAND - pthread_sigmask(SIG_SETMASK,&old_mask,NULL); -#endif - DBUG_RETURN(1); - } - if (alarm_aborted < 0) + if (alarm_aborted > 0) + goto abort; sec= 1; /* Abort mode */ - + } if (alarm_queue.elements >= max_used_alarms) { if (alarm_queue.elements == alarm_queue.max_elements) { DBUG_PRINT("info", ("alarm queue full")); fprintf(stderr,"Warning: thr_alarm queue is full\n"); - *alrm= 0; /* No alarm */ - pthread_mutex_unlock(&LOCK_alarm); -#ifndef USE_ONE_SIGNAL_HAND - pthread_sigmask(SIG_SETMASK,&old_mask,NULL); -#endif - DBUG_RETURN(1); + goto abort; } max_used_alarms=alarm_queue.elements+1; } - reschedule= (ulong) next_alarm_expire_time > (ulong) now + sec; - if (!alarm_data) - { - if (!(alarm_data=(ALARM*) my_malloc(sizeof(ALARM),MYF(MY_WME)))) - { - DBUG_PRINT("info", ("failed my_malloc()")); - *alrm= 0; /* No alarm */ - pthread_mutex_unlock(&LOCK_alarm); -#ifndef USE_ONE_SIGNAL_HAND - pthread_sigmask(SIG_SETMASK,&old_mask,NULL); -#endif - DBUG_RETURN(1); - } - alarm_data->malloced=1; - } - else - alarm_data->malloced=0; - alarm_data->expire_time=now+sec; - alarm_data->alarmed=0; - alarm_data->thread= current_my_thread_var->pthread_self; - alarm_data->thread_id= current_my_thread_var->id; + reschedule= (ulong) next_alarm_expire_time > (ulong) next; queue_insert(&alarm_queue,(uchar*) alarm_data); + assert(alarm_data->index_in_queue > 0); /* Reschedule alarm if the current one has more than sec left */ - if (reschedule) + if (unlikely(reschedule)) { DBUG_PRINT("info", ("reschedule")); if (pthread_equal(pthread_self(),alarm_thread)) { alarm(sec); /* purecov: inspected */ - next_alarm_expire_time= now + sec; + next_alarm_expire_time= next; } else reschedule_alarms(); /* Reschedule alarms */ } pthread_mutex_unlock(&LOCK_alarm); -#ifndef USE_ONE_SIGNAL_HAND - pthread_sigmask(SIG_SETMASK,&old_mask,NULL); -#endif + one_signal_hand_sigmask(SIG_SETMASK,&old_mask,NULL); (*alrm)= &alarm_data->alarmed; DBUG_RETURN(0); + +abort: + if (alarm_data->malloced) + my_free(alarm_data, MYF(0)); + pthread_mutex_unlock(&LOCK_alarm); + one_signal_hand_sigmask(SIG_SETMASK,&old_mask,NULL); +abort_no_unlock: + *alrm= 0; /* No alarm */ + DBUG_RETURN(1); } @@ -247,41 +249,18 @@ void thr_end_alarm(thr_alarm_t *alarmed) #ifndef USE_ONE_SIGNAL_HAND sigset_t old_mask; #endif - uint i, found=0; DBUG_ENTER("thr_end_alarm"); -#ifndef USE_ONE_SIGNAL_HAND - pthread_sigmask(SIG_BLOCK,&full_signal_set,&old_mask); -#endif - pthread_mutex_lock(&LOCK_alarm); - + one_signal_hand_sigmask(SIG_BLOCK,&full_signal_set,&old_mask); alarm_data= (ALARM*) ((uchar*) *alarmed - offsetof(ALARM,alarmed)); - for (i=0 ; i < alarm_queue.elements ; i++) - { - if ((ALARM*) queue_element(&alarm_queue,i) == alarm_data) - { - queue_remove(&alarm_queue,i),MYF(0); - if (alarm_data->malloced) - my_free((uchar*) alarm_data,MYF(0)); - found++; -#ifdef DBUG_OFF - break; -#endif - } - } - DBUG_ASSERT(!*alarmed || found == 1); - if (!found) - { - if (*alarmed) - fprintf(stderr,"Warning: Didn't find alarm 0x%lx in queue of %d alarms\n", - (long) *alarmed, alarm_queue.elements); - DBUG_PRINT("warning",("Didn't find alarm 0x%lx in queue\n", - (long) *alarmed)); - } + pthread_mutex_lock(&LOCK_alarm); + DBUG_ASSERT(alarm_data->index_in_queue != 0); + DBUG_ASSERT(queue_element(&alarm_queue, alarm_data->index_in_queue) == + alarm_data); + queue_remove(&alarm_queue, alarm_data->index_in_queue); pthread_mutex_unlock(&LOCK_alarm); -#ifndef USE_ONE_SIGNAL_HAND - pthread_sigmask(SIG_SETMASK,&old_mask,NULL); -#endif + one_signal_hand_sigmask(SIG_SETMASK,&old_mask,NULL); + reset_index_in_queue(alarm_data); DBUG_VOID_RETURN; } @@ -344,12 +323,13 @@ static sig_handler process_alarm_part2(int sig __attribute__((unused))) #if defined(MAIN) && !defined(__bsdi__) printf("process_alarm\n"); fflush(stdout); #endif - if (alarm_queue.elements) + if (likely(alarm_queue.elements)) { - if (alarm_aborted) + if (unlikely(alarm_aborted)) { uint i; - for (i=0 ; i < alarm_queue.elements ;) + for (i= queue_first_element(&alarm_queue) ; + i <= queue_last_element(&alarm_queue) ;) { alarm_data=(ALARM*) queue_element(&alarm_queue,i); alarm_data->alarmed=1; /* Info to thread */ @@ -360,6 +340,7 @@ static sig_handler process_alarm_part2(int sig __attribute__((unused))) printf("Warning: pthread_kill couldn't find thread!!!\n"); #endif queue_remove(&alarm_queue,i); /* No thread. Remove alarm */ + reset_index_in_queue(alarm_data); } else i++; /* Signal next thread */ @@ -371,8 +352,8 @@ static sig_handler process_alarm_part2(int sig __attribute__((unused))) } else { - ulong now=(ulong) my_time(0); - ulong next=now+10-(now%10); + time_t now= my_time(0); + time_t next= now+10-(now%10); while ((alarm_data=(ALARM*) queue_top(&alarm_queue))->expire_time <= now) { alarm_data->alarmed=1; /* Info to thread */ @@ -382,15 +363,16 @@ static sig_handler process_alarm_part2(int sig __attribute__((unused))) { #ifdef MAIN printf("Warning: pthread_kill couldn't find thread!!!\n"); -#endif - queue_remove(&alarm_queue,0); /* No thread. Remove alarm */ +#endif /* MAIN */ + queue_remove_top(&alarm_queue); /* No thread. Remove alarm */ + reset_index_in_queue(alarm_data); if (!alarm_queue.elements) break; } else { alarm_data->expire_time=next; - queue_replaced(&alarm_queue); + queue_replace_top(&alarm_queue); } } #ifndef USE_ALARM_THREAD @@ -486,13 +468,15 @@ void thr_alarm_kill(my_thread_id thread_id) if (alarm_aborted) return; pthread_mutex_lock(&LOCK_alarm); - for (i=0 ; i < alarm_queue.elements ; i++) + for (i= queue_first_element(&alarm_queue) ; + i <= queue_last_element(&alarm_queue); + i++) { - if (((ALARM*) queue_element(&alarm_queue,i))->thread_id == thread_id) + ALARM *element= (ALARM*) queue_element(&alarm_queue,i); + if (element->thread_id == thread_id) { - ALARM *tmp=(ALARM*) queue_remove(&alarm_queue,i); - tmp->expire_time=0; - queue_insert(&alarm_queue,(uchar*) tmp); + element->expire_time= 0; + queue_replace(&alarm_queue, i); reschedule_alarms(); break; } @@ -508,7 +492,7 @@ void thr_alarm_info(ALARM_INFO *info) info->max_used_alarms= max_used_alarms; if ((info->active_alarms= alarm_queue.elements)) { - ulong now=(ulong) my_time(0); + time_t now= my_time(0); long time_diff; ALARM *alarm_data= (ALARM*) queue_top(&alarm_queue); time_diff= (long) (alarm_data->expire_time - now); @@ -556,7 +540,7 @@ static void *alarm_handler(void *arg __attribute__((unused))) { if (alarm_queue.elements) { - ulong sleep_time,now= my_time(0); + time_t sleep_time,now= my_time(0); if (alarm_aborted) sleep_time=now+1; else @@ -792,20 +776,6 @@ static void *test_thread(void *arg) return 0; } -#ifdef USE_ONE_SIGNAL_HAND -static sig_handler print_signal_warning(int sig) -{ - printf("Warning: Got signal %d from thread %s\n",sig,my_thread_name()); - fflush(stdout); -#ifdef DONT_REMEMBER_SIGNAL - my_sigset(sig,print_signal_warning); /* int. thread system calls */ -#endif - if (sig == SIGALRM) - alarm(2); /* reschedule alarm */ -} -#endif /* USE_ONE_SIGNAL_HAND */ - - static void *signal_hand(void *arg __attribute__((unused))) { sigset_t set; -- cgit v1.2.1 From 9d8ecf782b7f7b4322359aaca57162a08675d907 Mon Sep 17 00:00:00 2001 From: Sergei Golubchik Date: Sat, 24 Jul 2010 08:10:03 +0200 Subject: buildbot detected problems mysys/mf_keycache.c: warning on windows storage/example/ha_example.cc: fighting the warnings don't ifdef too much. a function must return a value! storage/xtradb/Makefile.am: add -lmysqlservices for ha_xtradb.so plugin to load --- mysys/mf_keycache.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'mysys') diff --git a/mysys/mf_keycache.c b/mysys/mf_keycache.c index 4e22b3f65a2..08c160910c7 100644 --- a/mysys/mf_keycache.c +++ b/mysys/mf_keycache.c @@ -5106,7 +5106,7 @@ static SIMPLE_KEY_CACHE_CB ulonglong* dirty_part_map) { uint i= KEYCACHE_BASE_EXPR( file, filepos) % keycache->partitions; - *dirty_part_map|= 1<partition_array[i]; } -- cgit v1.2.1 From d2f8b7d04503478ab6b6998194a2070891f0c2bb Mon Sep 17 00:00:00 2001 From: Michael Widenius Date: Fri, 30 Jul 2010 10:45:27 +0300 Subject: Fix for LP#602604: RQG: ma_blockrec.c:6187: _ma_apply_redo_insert_row_head_or_tail: Assertion `0' failed on Maria engine recovery More DBUG_PRINT (to simplify future debugging) Aria: Added STATE_IN_REPAIR, which is set on start of repair. This allows us to see if 'crashed' flag was set intentionally. Aria: Some trivial speedup optimization Aria: Better warning if table was marked crashed by unfinnished repair mysql-test/lib/v1/mysql-test-run.pl: Fix so one can run RQG mysql-test/suite/maria/r/maria-recovery2.result: Update for new error message. mysys/stacktrace.c: Fixed compiler warning storage/maria/ha_maria.cc: More DBUG_PRINT Added STATE_IN_REPAIR flag, which is set on start of repair. This allows us to see if 'crashed' flag was set intentionally. Don't log query for dropping temporary table. storage/maria/ha_maria.h: Added prototype for drop_table() storage/maria/ma_blockrec.c: More DBUG_PRINT Make read_long_data() inline for most cases. (Trivial speedup optimization) storage/maria/ma_check.c: Better warning if table was marked crashed by unfinnished repair storage/maria/ma_open.c: More DBUG_PRINT storage/maria/ma_recovery.c: Give warning if found crashed table. Changed warning for tables that can't be opened. storage/maria/ma_recovery_util.c: Write warnings to DBUG file storage/maria/maria_chk.c: Added STATE_IN_REPAIR flag, which is set on start of repair. This allows us to see if 'crashed' flag was set intentionally. storage/maria/maria_def.h: Added maria_mark_in_repair(x) storage/maria/maria_read_log.c: Added option: --character-sets-dir storage/maria/trnman.c: By default set min_read_from to max value. This allows us to remove TRN:s from rows during recovery to get more space. This fixes bug LP#602604: RQG: ma_blockrec.c:6187: _ma_apply_redo_insert_row_head_or_tail: Assertion `0' failed on Maria engine recovery --- mysys/stacktrace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'mysys') diff --git a/mysys/stacktrace.c b/mysys/stacktrace.c index 75fda93b56e..80122a4e70f 100644 --- a/mysys/stacktrace.c +++ b/mysys/stacktrace.c @@ -86,7 +86,7 @@ void my_print_stacktrace(uchar* stack_bottom __attribute__((unused)), #if BACKTRACE_DEMANGLE -char __attribute__ ((weak)) *my_demangle(const char *mangled_name, int *status) +char __attribute__ ((weak)) *my_demangle(const char *mangled_name __attribute__((unused)), int *status __attribute__((unused))) { return NULL; } -- cgit v1.2.1 From cd9706b27ee113e0d448cb9c509fa9a4d553c5ee Mon Sep 17 00:00:00 2001 From: Michael Widenius Date: Wed, 4 Aug 2010 16:01:13 +0300 Subject: Fixes bug when we run bcmp() on row when the storage engine hasn't filled in all fields in the row. This was triggered by innodb.innodb_multi_update, where we had a static length row without nulls and xtradb didn't fill in the delete-marker byte include/my_bitmap.h: Added prototype for bitmap_union_is_set_all() mysys/my_bitmap.c: Added function to check if union of two bit maps covers all bits. sql/mysql_priv.h: Updated protype for compare_record() sql/sql_insert.cc: Send to compare_record() flag if all fields are used. sql/sql_select.cc: Set share->null_bytes_for_compare. sql/sql_update.cc: In compare_record() don't use the fast cmp_record() (which is basically memcmp) if we don't know that all fields exists. Don't compare the null_bytes if there is no data there. sql/table.cc: Store in share->null_bytes_for_compare the number of bytes that has null or bit fields (but not delete marker) Store in can_cmp_whole_record if we can use memcmp() (assuming all rows are read) to compare rows in compare_record() sql/table.h: Added two elements in table->share to speed up checking how updated rows can be compared. --- mysys/my_bitmap.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'mysys') diff --git a/mysys/my_bitmap.c b/mysys/my_bitmap.c index 137127a2fda..69e0ca18cb8 100644 --- a/mysys/my_bitmap.c +++ b/mysys/my_bitmap.c @@ -378,6 +378,24 @@ void bitmap_intersect(MY_BITMAP *map, const MY_BITMAP *map2) } } +/* True if union of bitmaps have all bits set */ + +my_bool bitmap_union_is_set_all(const MY_BITMAP *map1, const MY_BITMAP *map2) +{ + my_bitmap_map *m1= map1->bitmap, *m2= map2->bitmap, *end; + + DBUG_ASSERT(map1->bitmap && map2->bitmap && + map1->n_bits==map2->n_bits); + *map1->last_word_ptr|= map1->last_word_mask; + + end= map1->last_word_ptr; + while ( m1 <= end) + if ((*m1++ | *m2++) != 0xFFFFFFFF) + return FALSE; + return TRUE; +} + + /* Set/clear all bits above a bit. -- cgit v1.2.1 From 165eb1186cb6f8e9a9595ffd7c172c54a6dfb6dc Mon Sep 17 00:00:00 2001 From: Michael Widenius Date: Sat, 7 Aug 2010 15:27:23 +0300 Subject: Added extra argument to longlong2str() to make it have same prototype is int2str() Changed to use longlong10_to_str() instead of longlong2str() when base is 10 or -10 as former is much faster than later Changed my_vsnprintf() to use longlong2str instead of int2str() to get rid of warnings and to get support for long pointers even when long is 32 bit. client/mysqltest.cc: longlong2str() -> longlong10_to_str() include/m_string.h: Added extra argument to longlong2str() to make it have same prototype is int2str() mysys/charset.c: Fixed compiler warning mysys/mf_soundex.c: Fixed compiler warning mysys/my_getopt.c: longlong2str() -> longlong10_to_str() sql/create_options.cc: Fixed compiler warning sql/item_strfunc.cc: Added extra argument to longlong2str sql/opt_range.cc: longlong2str() -> longlong10_to_str() sql/partition_info.cc: longlong2str() -> longlong10_to_str() sql/slave.cc: longlong2str() -> longlong10_to_str() sql/sql_bitmap.h: Added extra argument to longlong2str sql/sql_partition.cc: Added extra argument to longlong2str sql/sql_select.cc: longlong2str() -> longlong10_to_str() sql/sql_show.cc: Added extra argument to longlong2str storage/innodb_plugin/handler/ha_innodb.cc: Update to new parameters for longlong2str() storage/maria/ma_dbug.c: longlong2str() -> longlong10_to_str() storage/maria/maria_chk.c: Added extra argument to longlong2str storage/myisam/mi_dbug.c: longlong2str() -> longlong10_to_str() storage/myisam/myisamchk.c: Added extra argument to longlong2str storage/xtradb/handler/ha_innodb.cc: Update to new parameters for longlong2str() strings/longlong2str.c: Added extra argument to longlong2str() to make it have same prototype is int2str() strings/my_vsnprintf.c: Changed my_vsnprintf() to use longlong2str instead of int2str() to get rid of warnings and to get support for long pointers even when long is 32 bit. Added cast to get rid of compiler warnings --- mysys/charset.c | 4 ++-- mysys/mf_soundex.c | 2 +- mysys/my_getopt.c | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) (limited to 'mysys') diff --git a/mysys/charset.c b/mysys/charset.c index 7cf8a8b828d..153ef8b7e2c 100644 --- a/mysys/charset.c +++ b/mysys/charset.c @@ -406,9 +406,9 @@ static my_pthread_once_t charsets_template= MY_PTHREAD_ONCE_INIT; static void init_available_charsets(void) { char fname[FN_REFLEN + sizeof(MY_CHARSET_INDEX)]; - struct charset_info_st **cs; + struct charset_info_st **cs; - bzero(&all_charsets,sizeof(all_charsets)); + bzero((char*) &all_charsets,sizeof(all_charsets)); init_compiled_charsets(MYF(0)); /* Copy compiled charsets */ diff --git a/mysys/mf_soundex.c b/mysys/mf_soundex.c index fe30d8c81af..3a3dab52dd6 100644 --- a/mysys/mf_soundex.c +++ b/mysys/mf_soundex.c @@ -47,7 +47,7 @@ void soundex(CHARSET_INFO * cs,register char * out_pntr, char * in_pntr, { char ch,last_ch; reg3 char * end; - register uchar *map=cs->to_upper; + register const uchar *map=cs->to_upper; if (remove_garbage) { diff --git a/mysys/my_getopt.c b/mysys/my_getopt.c index 82a691d550a..b8a5c5d14bd 100644 --- a/mysys/my_getopt.c +++ b/mysys/my_getopt.c @@ -1271,7 +1271,7 @@ void my_print_variables(const struct my_option *options) printf("%s\n", llstr(*((longlong*) value), buff)); break; case GET_ULL: - longlong2str(*((ulonglong*) value), buff, 10); + longlong10_to_str(*((ulonglong*) value), buff, 10); printf("%s\n", buff); break; case GET_DOUBLE: -- cgit v1.2.1 From 12648015b322e885923f87d919c07529423a0eca Mon Sep 17 00:00:00 2001 From: Michael Widenius Date: Mon, 9 Aug 2010 20:05:42 +0300 Subject: Added --sync-sys=0 option for mysqld to skip sync() calls for faster testing Fixed LP#613418 (M)aria recovery failure: ma_key_recover.c:981: _ma_apply_redo_index: Assertion `check_page_length == page_length' failed include/my_sys.h: Added my_disable_sync mysql-test/mysql-test-run.pl: Added --sync-sys=0 to run test suite faster mysys/my_static.c: Added my_disable_sync mysys/my_sync.c: Added my_disable_sync sql/mysqld.cc: Added -sync-sys=0 option for mysqld to skip sync() calls for faster testing storage/maria/ma_key_recover.c: More DBUG_ASSERT() Added logging of KEY_OP_DEBUG to make examening of logs easier Fixed testing of page length in recovery to ensure we don't overwrite checksum (previous tests was too relaxed) Fixed bug in recovery logging of split pages which caused failure during recovery: - Length was not adjusted properly for pages to be split - Added KEY_OP_MAX_PAGELENGTH to tell recovery that page is now full length - This fixed LP#613418 storage/maria/ma_key_recover.h: Changed prototype for ma_log_change() for KEY_OP_DEBUG storage/maria/ma_loghandler.h: Added new enums for better debugging of recovery logs storage/maria/ma_rt_index.c: Added debugging information to calls to ma_log_change() storage/maria/ma_write.c: Added debugging information to calls to ma_log_change() and ma_log_split() --- mysys/my_static.c | 1 + mysys/my_sync.c | 3 +++ 2 files changed, 4 insertions(+) (limited to 'mysys') diff --git a/mysys/my_static.c b/mysys/my_static.c index 0ef5656b76f..08653d03d21 100644 --- a/mysys/my_static.c +++ b/mysys/my_static.c @@ -119,6 +119,7 @@ ulonglong query_performance_frequency, query_performance_offset; /* How to disable options */ my_bool NEAR my_disable_locking=0; +my_bool NEAR my_disable_sync=0; my_bool NEAR my_disable_async_io=0; my_bool NEAR my_disable_flush_key_blocks=0; my_bool NEAR my_disable_symlinks=0; diff --git a/mysys/my_sync.c b/mysys/my_sync.c index 967a6ae6c78..9f6c55f4fb6 100644 --- a/mysys/my_sync.c +++ b/mysys/my_sync.c @@ -48,6 +48,9 @@ int my_sync(File fd, myf my_flags) DBUG_ENTER("my_sync"); DBUG_PRINT("my",("fd: %d my_flags: %d", fd, my_flags)); + if (my_disable_sync) + DBUG_RETURN(0); + statistic_increment(my_sync_count,&THR_LOCK_open); do { -- cgit v1.2.1 From 864118da1b6dcffc1d5b9d91ed5527fcb8aef973 Mon Sep 17 00:00:00 2001 From: Michael Widenius Date: Mon, 9 Aug 2010 20:54:58 +0300 Subject: Ignore ENOLCK errno from FreeBSD (known problem in old FreeBSD releases) --- mysys/my_sync.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'mysys') diff --git a/mysys/my_sync.c b/mysys/my_sync.c index 9f6c55f4fb6..cb0f5794b2e 100644 --- a/mysys/my_sync.c +++ b/mysys/my_sync.c @@ -68,6 +68,8 @@ int my_sync(File fd, myf my_flags) res= fdatasync(fd); #elif defined(HAVE_FSYNC) res= fsync(fd); + if (res == -1 and errno == ENOLCK) + res= 0; /* Result Bug in Old FreeBSD */ #elif defined(__WIN__) res= _commit(fd); #else -- cgit v1.2.1 From 0f75abc459beee1ebbb66de124d2a88ecc0d75a2 Mon Sep 17 00:00:00 2001 From: Michael Widenius Date: Tue, 10 Aug 2010 19:06:34 +0300 Subject: Fixed typo that caused compile failure on Mac Added straight_join to make results predicatable mysql-test/suite/pbxt/r/range.result: Added straight_join to make results predicatable mysql-test/suite/pbxt/t/range.test: Added straight_join to make results predicatable mysys/my_sync.c: Fixed typo --- mysys/my_sync.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'mysys') diff --git a/mysys/my_sync.c b/mysys/my_sync.c index cb0f5794b2e..a2b615a8661 100644 --- a/mysys/my_sync.c +++ b/mysys/my_sync.c @@ -68,7 +68,7 @@ int my_sync(File fd, myf my_flags) res= fdatasync(fd); #elif defined(HAVE_FSYNC) res= fsync(fd); - if (res == -1 and errno == ENOLCK) + if (res == -1 && errno == ENOLCK) res= 0; /* Result Bug in Old FreeBSD */ #elif defined(__WIN__) res= _commit(fd); -- cgit v1.2.1 From b87a73773913ebdcfdacb2d8b8704958bc0e93f2 Mon Sep 17 00:00:00 2001 From: Sergei Golubchik Date: Sat, 14 Aug 2010 18:44:45 +0400 Subject: missing DBUG_RETURNs --- mysys/my_getwd.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'mysys') diff --git a/mysys/my_getwd.c b/mysys/my_getwd.c index e6b867e2753..a18c296a7e1 100644 --- a/mysys/my_getwd.c +++ b/mysys/my_getwd.c @@ -51,7 +51,7 @@ int my_getwd(char * buf, size_t size, myf MyFlags) (long) buf, (uint) size, MyFlags)); if (size < 1) - return(-1); + DBUG_RETURN(-1); if (curr_dir[0]) /* Current pos is saved here */ VOID(strmake(buf,&curr_dir[0],size-1)); @@ -59,12 +59,12 @@ int my_getwd(char * buf, size_t size, myf MyFlags) { #if defined(HAVE_GETCWD) if (size < 2) - return(-1); + DBUG_RETURN(-1); if (!getcwd(buf,(uint) (size-2)) && MyFlags & MY_WME) { my_errno=errno; my_error(EE_GETWD,MYF(ME_BELL+ME_WAITTANG),errno); - return(-1); + DBUG_RETURN(-1); } #elif defined(HAVE_GETWD) { @@ -74,12 +74,12 @@ int my_getwd(char * buf, size_t size, myf MyFlags) } #elif defined(VMS) if (size < 2) - return(-1); + DBUG_RETURN(-1); if (!getcwd(buf,size-2,1) && MyFlags & MY_WME) { my_errno=errno; my_error(EE_GETWD,MYF(ME_BELL+ME_WAITTANG),errno); - return(-1); + DBUG_RETURN(-1); } intern_filename(buf,buf); #else -- cgit v1.2.1 From f4820ea62ef2635f2d79476deb001f3ae662470e Mon Sep 17 00:00:00 2001 From: Michael Widenius Date: Wed, 15 Sep 2010 15:48:15 +0300 Subject: mysqltest now gives error messages with error code for my_delete, my_rename, my_copy etc. Fixed crashing bug when doing ALTER TABLE RENAME with transactional tables. client/mysqltest.cc: Added errno to error message for system calls (delete, rename etc) Write error message for failures of system calls mysql-test/include/cleanup_fake_relay_log.inc: Disable warnings for remove_file mysql-test/include/diff_tables.inc: Disable warnings for remove_file mysql-test/include/maria_empty_logs.inc: Disable warnings for remove_file mysql-test/include/maria_make_snapshot.inc: Disable warnings for remove_file mysql-test/include/maria_make_snapshot_for_feeding_recovery.inc: Disable warnings for remove_file mysql-test/include/mysqlhotcopy.inc: Disable warnings for remove_file mysql-test/include/ndb_backup.inc: Disable warnings for remove_file mysql-test/include/ndb_backup_print.inc: Disable warnings for remove_file mysql-test/r/alter_table_trans.result: Test of crashing ALTER TABLE RENAME bug mysql-test/t/alter_table_trans.test: Test of crashing ALTER TABLE RENAME bug mysql-test/t/mysqltest.test: Disable warnings for remove_file and move_file mysys/my_copy.c: Fixed wrong error message sql/sql_table.cc: Fixed crashing bug when doing ALTER TABLE RENAME with transactional tables. --- mysys/my_copy.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'mysys') diff --git a/mysys/my_copy.c b/mysys/my_copy.c index 8ea9620b20b..e1c549f4676 100644 --- a/mysys/my_copy.c +++ b/mysys/my_copy.c @@ -112,7 +112,7 @@ int my_copy(const char *from, const char *to, myf MyFlags) { my_errno= errno; if (MyFlags & MY_WME) - my_error(EE_CHANGE_PERMISSIONS, MYF(ME_BELL+ME_WAITTANG), from, errno); + my_error(EE_CHANGE_PERMISSIONS, MYF(ME_BELL+ME_WAITTANG), to, errno); if (MyFlags & MY_FAE) goto err; } -- cgit v1.2.1 From 9f35e98a4baa518e8e0a464ebea8571f362ac641 Mon Sep 17 00:00:00 2001 From: Michael Widenius Date: Wed, 6 Oct 2010 11:45:30 +0300 Subject: Fix for Bug#43152 "Assertion `bitmap_is_set_all(&table->s->all_set)' failed in handler::ha_reset" The reason for this was that some bitmap test functions changed the bitmap, which caused problems when the same bitmap was used by multiple threads. include/my_bitmap.h: Changed order of elements to get better alignment. mysys/my_bitmap.c: Change bitmap test functions to not modify the bitmap. Fixed compiler errors in test_bitmap --- mysys/my_bitmap.c | 179 ++++++++++++++++++++++++++++++------------------------ 1 file changed, 98 insertions(+), 81 deletions(-) (limited to 'mysys') diff --git a/mysys/my_bitmap.c b/mysys/my_bitmap.c index e7e5f75f486..0c3f45be374 100644 --- a/mysys/my_bitmap.c +++ b/mysys/my_bitmap.c @@ -40,16 +40,31 @@ #include #include -void create_last_word_mask(MY_BITMAP *map) + +/* Create a mask of the significant bits for the last byte (1,3,7,..255) */ + +static inline uchar last_byte_mask(uint bits) { - /* Get the number of used bits (1..8) in the last byte */ - unsigned int const used= 1U + ((map->n_bits-1U) & 0x7U); + /* Get the number of used bits-1 (0..7) in the last byte */ + unsigned int const used= (bits - 1U) & 7U; + /* Return bitmask for the significant bits */ + return ((2U << used) - 1); +} - /* - Create a mask with the upper 'unused' bits set and the lower 'used' - bits clear. The bits within each byte is stored in big-endian order. - */ - unsigned char const mask= (~((1 << used) - 1)) & 255; +/* + Create a mask with the upper 'unused' bits set and the lower 'used' + bits clear. The bits within each byte is stored in big-endian order. +*/ + +static inline uchar invers_last_byte_mask(uint bits) +{ + return last_byte_mask(bits) ^ 255; +} + + +void create_last_word_mask(MY_BITMAP *map) +{ + unsigned char const mask= invers_last_byte_mask(map->n_bits); /* The first bytes are to be set to zero since they represent real bits @@ -267,40 +282,41 @@ void bitmap_set_prefix(MY_BITMAP *map, uint prefix_size) my_bool bitmap_is_prefix(const MY_BITMAP *map, uint prefix_size) { - uint prefix_bits= prefix_size & 0x7, res; - uchar *m= (uchar*)map->bitmap; - uchar *end_prefix= m+prefix_size/8; + uint prefix_mask= last_byte_mask(prefix_size); + uchar *m= (uchar*) map->bitmap; + uchar *end_prefix= m+(prefix_size-1)/8; uchar *end; DBUG_ASSERT(m && prefix_size <= map->n_bits); - end= m+no_bytes_in_map(map); + + /* Empty prefix is always true */ + if (!prefix_size) + return 1; while (m < end_prefix) if (*m++ != 0xff) return 0; - *map->last_word_ptr&= ~map->last_word_mask; /*Clear bits*/ - res= 0; - if (prefix_bits && *m++ != (1 << prefix_bits)-1) - goto ret; + end= ((uchar*) map->bitmap) + no_bytes_in_map(map) - 1; + if (m == end) + return ((*m & last_byte_mask(map->n_bits)) == prefix_mask); - while (m < end) - if (*m++ != 0) - goto ret; - res= 1; -ret: - return res; -} + if (*m != prefix_mask) + return 0; + while (++m < end) + if (*m != 0) + return 0; + return ((*m & last_byte_mask(map->n_bits)) == 0); +} my_bool bitmap_is_set_all(const MY_BITMAP *map) { my_bitmap_map *data_ptr= map->bitmap; my_bitmap_map *end= map->last_word_ptr; - *map->last_word_ptr |= map->last_word_mask; - for (; data_ptr <= end; data_ptr++) + for (; data_ptr < end; data_ptr++) if (*data_ptr != 0xFFFFFFFF) return FALSE; - return TRUE; + return (*data_ptr | map->last_word_mask) == 0xFFFFFFFF; } @@ -308,13 +324,11 @@ my_bool bitmap_is_clear_all(const MY_BITMAP *map) { my_bitmap_map *data_ptr= map->bitmap; my_bitmap_map *end; - if (*map->last_word_ptr & ~map->last_word_mask) - return FALSE; end= map->last_word_ptr; for (; data_ptr < end; data_ptr++) if (*data_ptr) return FALSE; - return TRUE; + return (*data_ptr & ~map->last_word_mask) == 0; } /* Return TRUE if map1 is a subset of map2 */ @@ -327,14 +341,13 @@ my_bool bitmap_is_subset(const MY_BITMAP *map1, const MY_BITMAP *map2) map1->n_bits==map2->n_bits); end= map1->last_word_ptr; - *map1->last_word_ptr &= ~map1->last_word_mask; - *map2->last_word_ptr &= ~map2->last_word_mask; - while (m1 <= end) + while (m1 < end) { if ((*m1++) & ~(*m2++)) return 0; } - return 1; + /* here both maps have the same number of bits - see assert above */ + return ((*m1 & ~*m2 & ~map1->last_word_mask) ? 0 : 1); } /* True if bitmaps has any common bits */ @@ -347,14 +360,13 @@ my_bool bitmap_is_overlapping(const MY_BITMAP *map1, const MY_BITMAP *map2) map1->n_bits==map2->n_bits); end= map1->last_word_ptr; - *map1->last_word_ptr &= ~map1->last_word_mask; - *map2->last_word_ptr &= ~map2->last_word_mask; - while (m1 <= end) + while (m1 < end) { if ((*m1++) & (*m2++)) return 1; } - return 0; + /* here both maps have the same number of bits - see assert above */ + return ((*m1 & *m2 & ~map1->last_word_mask) ? 1 : 0); } @@ -366,15 +378,15 @@ void bitmap_intersect(MY_BITMAP *map, const MY_BITMAP *map2) DBUG_ASSERT(map->bitmap && map2->bitmap); end= to+min(len,len2); - *map2->last_word_ptr&= ~map2->last_word_mask; /*Clear last bits in map2*/ while (to < end) *to++ &= *from++; - if (len2 < len) + if (len2 <= len) { - end+=len-len2; + to[-1]&= ~map2->last_word_mask; /* Clear last not relevant bits */ + end+= len-len2; while (to < end) - *to++=0; + *to++= 0; } } @@ -386,13 +398,12 @@ my_bool bitmap_union_is_set_all(const MY_BITMAP *map1, const MY_BITMAP *map2) DBUG_ASSERT(map1->bitmap && map2->bitmap && map1->n_bits==map2->n_bits); - *map1->last_word_ptr|= map1->last_word_mask; - end= map1->last_word_ptr; - while ( m1 <= end) + while ( m1 < end) if ((*m1++ | *m2++) != 0xFFFFFFFF) return FALSE; - return TRUE; + /* here both maps have the same number of bits - see assert above */ + return ((*m1 | *m2 | map1->last_word_mask) != 0xFFFFFFFF); } @@ -479,14 +490,13 @@ void bitmap_invert(MY_BITMAP *map) uint bitmap_bits_set(const MY_BITMAP *map) { uchar *m= (uchar*)map->bitmap; - uchar *end= m + no_bytes_in_map(map); + uchar *end= m + no_bytes_in_map(map) - 1; uint res= 0; DBUG_ASSERT(map->bitmap); - *map->last_word_ptr&= ~map->last_word_mask; /*Reset last bits to zero*/ while (m < end) res+= my_count_bits_ushort(*m++); - return res; + return res + my_count_bits_ushort(*m & last_byte_mask(map->n_bits)); } @@ -510,27 +520,30 @@ uint bitmap_get_first_set(const MY_BITMAP *map) DBUG_ASSERT(map->bitmap); data_ptr= map->bitmap; - *map->last_word_ptr &= ~map->last_word_mask; - for (i=0; data_ptr <= end; data_ptr++, i++) - { + for (i=0; data_ptr < end; data_ptr++, i++) if (*data_ptr) + goto found; + if (!(*data_ptr & ~map->last_word_mask)) + return MY_BIT_NONE; + +found: + { + byte_ptr= (uchar*)data_ptr; + for (j=0; ; j++, byte_ptr++) { - byte_ptr= (uchar*)data_ptr; - for (j=0; ; j++, byte_ptr++) + if (*byte_ptr) { - if (*byte_ptr) + for (k=0; ; k++) { - for (k=0; ; k++) - { - if (*byte_ptr & (1 << k)) - return (i*32) + (j*8) + k; - } + if (*byte_ptr & (1 << k)) + return (i*32) + (j*8) + k; } } } } - return MY_BIT_NONE; + DBUG_ASSERT(0); + return MY_BIT_NONE; /* Impossible */ } @@ -544,25 +557,29 @@ uint bitmap_get_first(const MY_BITMAP *map) data_ptr= map->bitmap; *map->last_word_ptr|= map->last_word_mask; - for (i=0; data_ptr <= end; data_ptr++, i++) - { + for (i=0; data_ptr < end; data_ptr++, i++) if (*data_ptr != 0xFFFFFFFF) + goto found; + if ((*data_ptr | map->last_word_mask) == 0xFFFFFFFF) + return MY_BIT_NONE; + +found: + { + byte_ptr= (uchar*)data_ptr; + for (j=0; ; j++, byte_ptr++) { - byte_ptr= (uchar*)data_ptr; - for (j=0; ; j++, byte_ptr++) + if (*byte_ptr != 0xFF) { - if (*byte_ptr != 0xFF) + for (k=0; ; k++) { - for (k=0; ; k++) - { - if (!(*byte_ptr & (1 << k))) - return (i*32) + (j*8) + k; - } + if (!(*byte_ptr & (1 << k))) + return (i*32) + (j*8) + k; } } } } - return MY_BIT_NONE; + DBUG_ASSERT(0); + return MY_BIT_NONE; /* Impossible */ } @@ -777,7 +794,7 @@ uint get_rand_bit(uint bitsize) return (rand() % bitsize); } -bool test_set_get_clear_bit(MY_BITMAP *map, uint bitsize) +my_bool test_set_get_clear_bit(MY_BITMAP *map, uint bitsize) { uint i, test_bit; uint no_loops= bitsize > 128 ? 128 : bitsize; @@ -800,7 +817,7 @@ error2: return TRUE; } -bool test_flip_bit(MY_BITMAP *map, uint bitsize) +my_bool test_flip_bit(MY_BITMAP *map, uint bitsize) { uint i, test_bit; uint no_loops= bitsize > 128 ? 128 : bitsize; @@ -823,13 +840,13 @@ error2: return TRUE; } -bool test_operators(MY_BITMAP *map __attribute__((unused)), +my_bool test_operators(MY_BITMAP *map __attribute__((unused)), uint bitsize __attribute__((unused))) { return FALSE; } -bool test_get_all_bits(MY_BITMAP *map, uint bitsize) +my_bool test_get_all_bits(MY_BITMAP *map, uint bitsize) { uint i; bitmap_set_all(map); @@ -871,7 +888,7 @@ error6: return TRUE; } -bool test_compare_operators(MY_BITMAP *map, uint bitsize) +my_bool test_compare_operators(MY_BITMAP *map, uint bitsize) { uint i, j, test_bit1, test_bit2, test_bit3,test_bit4; uint no_loops= bitsize > 128 ? 128 : bitsize; @@ -977,7 +994,7 @@ error5: return TRUE; } -bool test_count_bits_set(MY_BITMAP *map, uint bitsize) +my_bool test_count_bits_set(MY_BITMAP *map, uint bitsize) { uint i, bit_count=0, test_bit; uint no_loops= bitsize > 128 ? 128 : bitsize; @@ -1003,7 +1020,7 @@ error2: return TRUE; } -bool test_get_first_bit(MY_BITMAP *map, uint bitsize) +my_bool test_get_first_bit(MY_BITMAP *map, uint bitsize) { uint i, test_bit; uint no_loops= bitsize > 128 ? 128 : bitsize; @@ -1028,7 +1045,7 @@ error2: return TRUE; } -bool test_get_next_bit(MY_BITMAP *map, uint bitsize) +my_bool test_get_next_bit(MY_BITMAP *map, uint bitsize) { uint i, j, test_bit; uint no_loops= bitsize > 128 ? 128 : bitsize; @@ -1047,7 +1064,7 @@ error1: return TRUE; } -bool test_prefix(MY_BITMAP *map, uint bitsize) +my_bool test_prefix(MY_BITMAP *map, uint bitsize) { uint i, j, test_bit; uint no_loops= bitsize > 128 ? 128 : bitsize; @@ -1082,7 +1099,7 @@ error3: } -bool do_test(uint bitsize) +my_bool do_test(uint bitsize) { MY_BITMAP map; my_bitmap_map buf[1024]; -- cgit v1.2.1 From 716f7843757f96649cca1f5dc3df5d9659b17da9 Mon Sep 17 00:00:00 2001 From: Sergei Golubchik Date: Fri, 29 Oct 2010 20:29:43 +0200 Subject: sane implementation of Key_% status variables. --- mysys/mf_keycache.c | 159 ---------------------------------------------------- 1 file changed, 159 deletions(-) (limited to 'mysys') diff --git a/mysys/mf_keycache.c b/mysys/mf_keycache.c index 08c160910c7..9c96d387aa0 100644 --- a/mysys/mf_keycache.c +++ b/mysys/mf_keycache.c @@ -4914,61 +4914,6 @@ void get_simple_key_cache_statistics(SIMPLE_KEY_CACHE_CB *keycache, } -/* - Offsets of the statistical values in the control block for a simple key cache - The first NO_LONG_KEY_CACHE_STAT_VARIABLES=3 are of the ulong type while the - remaining are of the ulonglong type. - */ -static size_t simple_key_cache_stat_var_offsets[]= -{ - offsetof(SIMPLE_KEY_CACHE_CB, blocks_used), - offsetof(SIMPLE_KEY_CACHE_CB, blocks_unused), - offsetof(SIMPLE_KEY_CACHE_CB, global_blocks_changed), - offsetof(SIMPLE_KEY_CACHE_CB, global_cache_w_requests), - offsetof(SIMPLE_KEY_CACHE_CB, global_cache_write), - offsetof(SIMPLE_KEY_CACHE_CB, global_cache_r_requests), - offsetof(SIMPLE_KEY_CACHE_CB, global_cache_read) -}; - - -/* - Get the value of a statistical variable for a simple key cache - - SYNOPSIS - get_simple_key_cache_stat_value() - keycache pointer to the control block of a simple key cache - var_no the ordered number of a statistical variable - - DESCRIPTION - This function is the implementation of the get_simple_key_cache_stat_value - interface function that is employed by simple (non-partitioned) key caches. - The function takes the parameter keycache as a pointer to the - control block structure of the type SIMPLE_KEY_CACHE_CB for a simple key - cache. This function returns the value of the statistical variable var_no - for this key cache. The variables are numbered starting from 0 to 6. - - RETURN - The value of the specified statistical variable - -*/ - -static -ulonglong get_simple_key_cache_stat_value(SIMPLE_KEY_CACHE_CB *keycache, - uint var_no) -{ - size_t var_ofs= simple_key_cache_stat_var_offsets[var_no]; - ulonglong res= 0; - DBUG_ENTER("get_simple_key_cache_stat_value"); - - if (var_no < 3) - res= (ulonglong) (*(long *) ((char *) keycache + var_ofs)); - else - res= *(ulonglong *) ((char *) keycache + var_ofs); - - DBUG_RETURN(res); -} - - /* The array of pointer to the key cache interface functions used for simple key caches. Any simple key cache objects including those incorporated into @@ -4990,7 +4935,6 @@ static KEY_CACHE_FUNCS simple_key_cache_funcs = (RESET_KEY_CACHE_COUNTERS) reset_simple_key_cache_counters, (END_KEY_CACHE) end_simple_key_cache, (GET_KEY_CACHE_STATISTICS) get_simple_key_cache_statistics, - (GET_KEY_CACHE_STAT_VALUE) get_simple_key_cache_stat_value }; @@ -5861,61 +5805,6 @@ get_partitioned_key_cache_statistics(PARTITIONED_KEY_CACHE_CB *keycache, DBUG_VOID_RETURN; } -/* - Get the value of a statistical variable for a partitioned key cache - - SYNOPSIS - get_partitioned_key_cache_stat_value() - keycache pointer to the control block of a partitioned key cache - var_no the ordered number of a statistical variable - - DESCRIPTION - This function is the implementation of the get_key_cache_stat_value - interface function that is employed by partitioned key caches. - The function takes the parameter keycache as a pointer to the - control block structure of the type PARTITIONED_KEY_CACHE_CB for a - partitioned key cache. - This function returns the value of the statistical variable var_no - for this key cache. The variables are numbered starting from 0 to 6. - The returned value is calculated as the sum of the values of the - statistical variable with number var_no for all simple key caches that - comprise the partitioned key cache. - - RETURN - The value of the specified statistical variable -*/ - -static -ulonglong -get_partitioned_key_cache_stat_value(PARTITIONED_KEY_CACHE_CB *keycache, - uint var_no) -{ - uint i; - uint partitions= keycache->partitions; - size_t var_ofs= simple_key_cache_stat_var_offsets[var_no]; - ulonglong res= 0; - DBUG_ENTER("get_partitioned_key_cache_stat_value"); - - if (var_no < NUM_LONG_KEY_CACHE_STAT_VARIABLES) - { - for (i = 0; i < partitions; i++) - { - SIMPLE_KEY_CACHE_CB *partition= keycache->partition_array[i]; - res+= (ulonglong) (*(long *) ((char *) partition + var_ofs)); - } - } - else - { - for (i = 0; i < partitions; i++) - { - SIMPLE_KEY_CACHE_CB *partition= keycache->partition_array[i]; - res+= *(ulonglong *) ((char *) partition + var_ofs); - } - } - DBUG_RETURN(res); -} - - /* The array of pointers to the key cache interface functions used by partitioned key caches. Any partitioned key cache object caches exploits @@ -5938,7 +5827,6 @@ static KEY_CACHE_FUNCS partitioned_key_cache_funcs = (RESET_KEY_CACHE_COUNTERS) reset_partitioned_key_cache_counters, (END_KEY_CACHE) end_partitioned_key_cache, (GET_KEY_CACHE_STATISTICS) get_partitioned_key_cache_statistics, - (GET_KEY_CACHE_STAT_VALUE) get_partitioned_key_cache_stat_value }; @@ -6246,8 +6134,6 @@ uchar *key_cache_read(KEY_CACHE *keycache, block_length, return_buffer); /* We can't use mutex here as the key cache may not be initialized */ - keycache->global_cache_r_requests++; - keycache->global_cache_read++; if (my_pread(file, (uchar*) buff, length, filepos, MYF(MY_NABP))) return (uchar *) 0; @@ -6356,8 +6242,6 @@ int key_cache_write(KEY_CACHE *keycache, block_length, force_write); /* We can't use mutex here as the key cache may not be initialized */ - keycache->global_cache_w_requests++; - keycache->global_cache_write++; if (my_pwrite(file, buff, length, filepos, MYF(MY_NABP | MY_WAIT_IF_FULL))) return 1; @@ -6474,49 +6358,6 @@ void get_key_cache_statistics(KEY_CACHE *keycache, uint partition_no, } } - -/* - Get the value of a statistical variable for a key cache - - SYNOPSIS - get_key_cache_stat_value() - keycache pointer to the key cache to get statistics for - var_no the ordered number of a statistical variable - - DESCRIPTION - This function returns the value of the statistical variable var_no for - the key cache keycache. The variables are numbered starting from 0 to 6. - - RETURN - The value of the specified statistical variable. - - NOTES - Currently for any key cache the function can return values for the - following 7 statistical variables: - - Name Number - - blocks_used 0 - blocks_unused 1 - blocks_changed 2 - read_requests 3 - reads 4 - write_requests 5 - writes 6 -*/ - -ulonglong get_key_cache_stat_value(KEY_CACHE *keycache, uint var_no) -{ - if (keycache->key_cache_inited) - { - return keycache->interface_funcs->get_stat_val(keycache->keycache_cb, - var_no); - } - else - return 0; -} - - /* Repartition a key cache -- cgit v1.2.1 From c4a5cf111bb1a2ecc2de14478a679283b081e3b2 Mon Sep 17 00:00:00 2001 From: Michael Widenius Date: Tue, 2 Nov 2010 11:03:33 +0200 Subject: Fixed wrong queue_replace(), which caused timeout failure in pbxt.flush_read_lock_kill Fixed compiler warnings. include/queues.h: Added prototype for queue_replace() mysys/queues.c: Fixed wrong queue_replace() mysys/thr_alarm.c: Added DBUG_PRINT sql/item_subselect.cc: Check return value of ha_rnd_init(). (Fixes compiler warnings) sql/sql_class.cc: Fixed wrong test sql/sql_show.cc: Removed not used variable. --- mysys/queues.c | 22 ++++++++++++++++++++++ mysys/thr_alarm.c | 4 ++++ 2 files changed, 26 insertions(+) (limited to 'mysys') diff --git a/mysys/queues.c b/mysys/queues.c index a65a7f8914c..d1f2dab18bc 100644 --- a/mysys/queues.c +++ b/mysys/queues.c @@ -280,6 +280,9 @@ uchar *queue_remove(register QUEUE *queue, uint idx) queue Queue to use idx Index of element to change element Element to store at 'idx' + + NOTE + This only works if element is >= all elements <= start_idx */ void _downheap(register QUEUE *queue, uint start_idx, uchar *element) @@ -353,3 +356,22 @@ void queue_fix(QUEUE *queue) for (i= queue->elements >> 1; i > 0; i--) _downheap(queue, i, queue_element(queue, i)); } + + +/* + Change element at fixed position + + SYNOPSIS + queue_replace() + queue Queue to use + idx Index of element to change + element Element to store at 'idx' +*/ + +void queue_replace(QUEUE *queue, uint idx) +{ + uchar *element= queue->root[idx]; + DBUG_ASSERT(idx >= 1 && idx <= queue->elements); + queue_remove(queue, idx); + queue_insert(queue, element); +} diff --git a/mysys/thr_alarm.c b/mysys/thr_alarm.c index f7ff20c9b23..15c24facac0 100644 --- a/mysys/thr_alarm.c +++ b/mysys/thr_alarm.c @@ -465,6 +465,8 @@ void end_thr_alarm(my_bool free_structures) void thr_alarm_kill(my_thread_id thread_id) { uint i; + DBUG_ENTER("thr_alarm_kill"); + if (alarm_aborted) return; pthread_mutex_lock(&LOCK_alarm); @@ -475,6 +477,7 @@ void thr_alarm_kill(my_thread_id thread_id) ALARM *element= (ALARM*) queue_element(&alarm_queue,i); if (element->thread_id == thread_id) { + DBUG_PRINT("info", ("found thread; Killing it")); element->expire_time= 0; queue_replace(&alarm_queue, i); reschedule_alarms(); @@ -482,6 +485,7 @@ void thr_alarm_kill(my_thread_id thread_id) } } pthread_mutex_unlock(&LOCK_alarm); + DBUG_VOID_RETURN; } -- cgit v1.2.1 From 20acfbf30da2eca66f9e5d602d50ac18e38272b8 Mon Sep 17 00:00:00 2001 From: Michael Widenius Date: Tue, 2 Nov 2010 17:22:57 +0200 Subject: Fix for: LP #634955: Assert in _ma_update_at_original_place() Added locking of lock mutex when updating status in external_unlock() for Aria and MyISAM tables. Fixed that 'source' command doesn't cause mysql command line tool to exit on error. DEBUG_EXECUTE() and DEBUG_EVALUATE_IF() should not execute things based on wildcards. (Allows one to run --debug with mysql-test-run scripts that uses @debug) Fixed several core dump, deadlock and crashed table bugs in handling of LOCK TABLE with MERGE tables: - Added priority of locks to avoid crashes with MERGE tables. - Added thr_lock_merge() to allow one to merge two results of thr_lock(). Fixed 'not found row' bug in REPLACE with Aria tables. Mark MyISAM tables that are part of MERGE with HA_OPEN_MERGE_TABLE and set the locks to have priority THR_LOCK_MERGE_PRIV. - By sorting MERGE tables last in thr_multi_unlock() it's safer to release and relock them many times (can happen when TRIGGERS are created) Avoid printing (null) in debug file (to easier find out wrong NULL pointer usage with %s). client/mysql.cc: Fixed that 'source' command doesn't cause mysql command line tool to exit on error. client/mysqltest.cc: Don't send NULL to fn_format(). (Can cause crash on Solaris when using --debug) dbug/dbug.c: DEBUG_EXECUTE() and DEBUG_EVALUATE_IF() should not execute things based on wildcards. include/my_base.h: Added flag to signal if one opens a MERGE table. Added extra() command to signal that one is not part of a MERGE table anymore. include/thr_lock.h: Added priority for locks (needed to fix bug in thr_lock when using MERGE tables) Added option to thr_unlock() if get_status() should be called. Added prototype for thr_merge_locks(). mysql-test/mysql-test-run.pl: Ignore crashed table warnings for tables named 'crashed'. mysql-test/r/merge.result: Renamed triggers to make debugging easier. Added some CHECK TABLES to catch errors earlier. Additional tests. mysql-test/r/merge_debug.result: Test of error handling when reopening MERGE tables. mysql-test/r/udf_query_cache.result: Added missing flush status mysql-test/suite/parts/r/partition_repair_myisam.result: Update results mysql-test/t/merge.test: Renamed triggers to make debugging easier. Added some CHECK TABLES to catch errors earlier. Additional tests. mysql-test/t/merge_debug.test: Test of error handling when reopening MERGE tables. mysql-test/t/udf_query_cache.test: Added missing flush status mysys/my_getopt.c: Removed not used variable mysys/my_symlink2.c: Changed (null) to (NULL) to make it easier to find NULL arguments to DBUG_PRINT() functions. (On linux, NULL to sprintf is printed 'null') mysys/thr_lock.c: Added priority of locks to avoid crashes with MERGE tables. Added thr_lock_merge() to allow one to merge two results of thr_lock(). - This is needed for MyISAM as all locked table must share the same status. If not, you will not see newly inserted rows in other instances of the table. If calling thr_unlock() with THR_UNLOCK_UPDATE_STATUS, call update_status() and restore_status() for the locks. This is needed in some rare cases where we call thr_unlock() followed by thr_lock() without calling external_unlock/external_lock in between. Simplify loop in thr_multi_lock(). Added 'start_trans', which is called at end of thr_multi_lock() when all locks are taken. - This was needed by Aria to ensure that transaction is started when we got all locks, not at get_status(). Without this, some rows could not be visible when we lock two tables at the same time, causing REPLACE using two tables to fail unexpectedly. sql/handler.cc: Add an assert() in handler::print_error() for "impossible errors" (like table is crashed) when --debug-assert-if-crashed-table is used. sql/lock.cc: Simplify mysql_lock_tables() code if get_lock_data() returns 0 locks. Added new parameter to thr_multi_unlock() In mysql_unlock_read_tables(), call first externa_unlock(), then thr_multi_unlock(); This is same order as we do in mysql_unlock_tables(). Don't abort locks in mysql_lock_abort() for merged tables when a MERGE table is deleted; Would cause a spin lock. Added call to thr_merge_locks() in mysql_lock_merge() to ensure consistency in thr_locks(). - New locks of same type and table is stored after the old lock to ensure that we get the status from the original lock. sql/mysql_priv.h: Added debug_assert_if_crashed_table sql/mysqld.cc: Added --debug-assert-if-crashed-table sql/parse_file.cc: Don't print '(null)' in DBUG_PRINT of no dir given sql/set_var.cc: Increase default size of buffer for @debug variable. sql/sql_base.cc: In case of error from reopen_table() in reopen_tables(), call unlock_open_table() and restart loop. - This fixed bug when we twice deleted same table from open_cache. Don't take name lock for already name locked table in open_unireg_entry(). - Fixed bug when doing repair in reopen_table(). - In detach_merge_children(), always detach if 'clear_refs' is given. We can't trust parent->children_attached as this function can be called twice, first time with clear_refs set to 0. sql/sql_class.cc: Changed printing of (null) to "" in set_thd_proc_info() sql/sql_parse.cc: Added DBUG sql/sql_trigger.cc: Don't call unlink_open_table() if reopen_table() fails as the table may already be freed. storage/maria/ma_bitmap.c: Fixed DBUG_ASSERT() in allocate_tail() storage/maria/ma_blockrec.c: Fixed wrong calculation of row length for very small rows in undo_row_update(). - Fixes ASSERT() when doing undo. storage/maria/ma_blockrec.h: Added _ma_block_start_trans() and _ma_block_start_trans_no_versioning() storage/maria/ma_locking.c: Call _ma_update_status_with_lock() when releasing write locks. - Fixes potential problem with updating status without the proper lock. storage/maria/ma_open.c: Changed to use start_trans() instead of get_status() to ensure that we see all rows in all locked tables when we got the locks. - Fixed 'not found row' bug in REPLACE with Aria tables. storage/maria/ma_state.c: Added _ma_update_status_with_lock() and _ma_block_start_trans(). This is to ensure that we see all rows in all locked tables when we got the locks. storage/maria/ma_state.h: Added _ma_update_status_with_lock() storage/maria/ma_write.c: More DBUG_PRINT storage/myisam/mi_check.c: Fixed error message storage/myisam/mi_extra.c: Added HA_EXTRA_DETACH_CHILD: - Detach MyISAM table to not be part of MERGE table (remove flag & lock priority). storage/myisam/mi_locking.c: Call mi_update_status_with_lock() when releasing write locks. - Fixes potential problem with updating status without the proper lock. Change to use new HA_OPEN_MERGE_TABLE flag to test if MERGE table. Added mi_fix_status(), called by thr_merge(). storage/myisam/mi_open.c: Added marker if part of MERGE table. Call mi_fix_status() in thr_lock() for transactional tables. storage/myisam/myisamdef.h: Change my_once_flag to uint, as it stored different values than just 0/1 Added 'open_flag' to store state given to mi_open() storage/myisammrg/ha_myisammrg.cc: Add THR_LOCK_MERGE_PRIV to THR_LOCK_DATA to get MERGE locks sorted after other types of locks. storage/myisammrg/myrg_locking.c: Remove windows specific code. storage/myisammrg/myrg_open.c: Use HA_OPEN_MERGE_TABLE to mi_open(). Set HA_OPEN_MERGE_TABLE for linked MyISAM tables. storage/xtradb/buf/buf0buf.c: Fixed compiler warning storage/xtradb/buf/buf0lru.c: Initialize variable that could be used not initialized. --- mysys/my_getopt.c | 1 - mysys/my_symlink2.c | 4 +- mysys/thr_lock.c | 207 +++++++++++++++++++++++++++++++++++++--------------- 3 files changed, 151 insertions(+), 61 deletions(-) (limited to 'mysys') diff --git a/mysys/my_getopt.c b/mysys/my_getopt.c index f9a2c17ae7c..7281f2e1420 100644 --- a/mysys/my_getopt.c +++ b/mysys/my_getopt.c @@ -614,7 +614,6 @@ static int setval(const struct my_option *opts, void *value, char *argument, my_bool set_maximum_value) { int err= 0; - int pos; if (value && argument) { diff --git a/mysys/my_symlink2.c b/mysys/my_symlink2.c index 7c3ddbb911c..bc7ac751fad 100644 --- a/mysys/my_symlink2.c +++ b/mysys/my_symlink2.c @@ -34,8 +34,8 @@ File my_create_with_symlink(const char *linkname, const char *filename, char abs_linkname[FN_REFLEN]; DBUG_ENTER("my_create_with_symlink"); DBUG_PRINT("enter", ("linkname: %s filename: %s", - linkname ? linkname : "(null)", - filename ? filename : "(null)")); + linkname ? linkname : "(NULL)", + filename ? filename : "(NULL)")); if (my_disable_symlinks) { diff --git a/mysys/thr_lock.c b/mysys/thr_lock.c index b8aa9e5fcc0..9c8236ae0e6 100644 --- a/mysys/thr_lock.c +++ b/mysys/thr_lock.c @@ -63,6 +63,11 @@ update_status: A storage engine should also call update_status internally in the ::external_lock(F_UNLCK) method. In MyISAM and CSV this functions updates the length of the datafile. + MySQL does in some exceptional cases (when doing DLL statements on + open tables calls thr_unlock() followed by thr_lock() without calling + ::external_lock() in between. In this case thr_unlock() is called with + the THR_UNLOCK_UPDATE_STATUS flag and thr_unlock() will call + update_status for write locks. get_status: When one gets a lock this functions is called. In MyISAM this stores the number of rows and size of the datafile @@ -105,8 +110,30 @@ static inline pthread_cond_t *get_cond(void) return &my_thread_var->suspend; } + +/* + Priority for locks (decides in which order locks are locked) + We want all write locks to be first, followed by read locks. + Locks from MERGE tables has a little lower priority than other + locks, to allow one to release merge tables without having + to unlock and re-lock other locks. + The lower the number, the higher the priority for the lock. + Read locks should have 4, write locks should have 0. + UNLOCK is 8, to force these last in thr_merge_locks. + For MERGE tables we add 2 (THR_LOCK_MERGE_PRIV) to the lock priority. + THR_LOCK_LATE_PRIV (1) is used when one locks other tables to be merged + with existing locks. This way we prioritize the original locks over the + new locks. +*/ + +static uint lock_priority[(uint)TL_WRITE_ONLY+1] = +{ 8, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0}; + +#define LOCK_CMP(A,B) ((uchar*) ((A)->lock) + lock_priority[(uint) (A)->type] + (A)->priority < (uchar*) ((B)->lock) + lock_priority[(uint) (B)->type] + (B)->priority) + + /* -** For the future (now the thread specific cond is alloced by my_pthread.c) + For the future (now the thread specific cond is alloced by my_pthread.c) */ my_bool init_thr_lock() @@ -530,7 +557,7 @@ wait_for_lock(struct st_lock_list *wait, THR_LOCK_DATA *data, } -enum enum_thr_lock_result +static enum enum_thr_lock_result thr_lock(THR_LOCK_DATA *data, THR_LOCK_OWNER *owner, enum thr_lock_type lock_type) { @@ -544,6 +571,7 @@ thr_lock(THR_LOCK_DATA *data, THR_LOCK_OWNER *owner, data->cond=0; /* safety */ data->type=lock_type; data->owner= owner; /* Must be reset ! */ + data->priority&= ~THR_LOCK_LATE_PRIV; VOID(pthread_mutex_lock(&lock->mutex)); DBUG_PRINT("lock",("data: 0x%lx thread: 0x%lx lock: 0x%lx type: %d", (long) data, data->owner->info->thread_id, @@ -808,7 +836,7 @@ static inline void free_all_read_locks(THR_LOCK *lock, /* Unlock lock and free next thread on same lock */ -void thr_unlock(THR_LOCK_DATA *data) +void thr_unlock(THR_LOCK_DATA *data, uint unlock_flags) { THR_LOCK *lock=data->lock; enum thr_lock_type lock_type=data->type; @@ -832,6 +860,21 @@ void thr_unlock(THR_LOCK_DATA *data) } else lock->write.last=data->prev; + + if (unlock_flags & THR_UNLOCK_UPDATE_STATUS) + { + /* External lock was not called; Update or restore status */ + if (lock_type >= TL_WRITE_CONCURRENT_INSERT) + { + if (lock->update_status) + (*lock->update_status)(data->status_param); + } + else + { + if (lock->restore_status) + (*lock->restore_status)(data->status_param); + } + } if (lock_type == TL_READ_NO_INSERT) lock->read_no_write_count--; data->type=TL_UNLOCK; /* Mark unlocked */ @@ -967,14 +1010,12 @@ end: /* -** Get all locks in a specific order to avoid dead-locks -** Sort acording to lock position and put write_locks before read_locks if -** lock on same lock. + Get all locks in a specific order to avoid dead-locks + Sort acording to lock position and put write_locks before read_locks if + lock on same lock. Locks on MERGE tables has lower priority than other + locks of the same type. See comment for lock_priority. */ - -#define LOCK_CMP(A,B) ((uchar*) (A->lock) - (uint) ((A)->type) < (uchar*) (B->lock)- (uint) ((B)->type)) - static void sort_locks(THR_LOCK_DATA **data,uint count) { THR_LOCK_DATA **pos,**end,**prev,*tmp; @@ -999,18 +1040,22 @@ static void sort_locks(THR_LOCK_DATA **data,uint count) enum enum_thr_lock_result thr_multi_lock(THR_LOCK_DATA **data, uint count, THR_LOCK_OWNER *owner) { - THR_LOCK_DATA **pos,**end; + THR_LOCK_DATA **pos, **end, **first_lock; DBUG_ENTER("thr_multi_lock"); DBUG_PRINT("lock",("data: 0x%lx count: %d", (long) data, count)); + if (count > 1) sort_locks(data,count); + else if (count == 0) + DBUG_RETURN(THR_LOCK_SUCCESS); + /* lock everything */ for (pos=data,end=data+count; pos < end ; pos++) { enum enum_thr_lock_result result= thr_lock(*pos, owner, (*pos)->type); if (result != THR_LOCK_SUCCESS) { /* Aborted */ - thr_multi_unlock(data,(uint) (pos-data)); + thr_multi_unlock(data,(uint) (pos-data), 0); DBUG_RETURN(result); } #ifdef MAIN @@ -1018,63 +1063,103 @@ thr_multi_lock(THR_LOCK_DATA **data, uint count, THR_LOCK_OWNER *owner) (long) pos[0]->lock, pos[0]->type); fflush(stdout); #endif } + /* - Ensure that all get_locks() have the same status + Call start_trans for all locks. If we lock the same table multiple times, we must use the same - status_param! + status_param; We ensure this by calling copy_status() for all + copies of the same tables. */ -#if !defined(DONT_USE_RW_LOCKS) - if (count > 1) + if ((*data)->lock->start_trans) + ((*data)->lock->start_trans)((*data)->status_param); + for (first_lock=data, pos= data+1 ; pos < end ; pos++) { - THR_LOCK_DATA *last_lock= end[-1]; - pos=end-1; - do + /* Get the current status (row count, checksum, trid etc) */ + if ((*pos)->lock->start_trans) + (*(*pos)->lock->start_trans)((*pos)->status_param); + /* + If same table as previous table use pointer to previous status + information to ensure that all read/write tables shares same + state. + */ + if (pos[0]->lock == pos[-1]->lock && pos[0]->lock->copy_status) + (pos[0]->lock->copy_status)((*pos)->status_param, + (*first_lock)->status_param); + else { - pos--; - if (last_lock->lock == (*pos)->lock && - last_lock->lock->copy_status) - { - if (last_lock->type <= TL_READ_NO_INSERT) - { - THR_LOCK_DATA **read_lock; - /* - If we are locking the same table with read locks we must ensure - that all tables share the status of the last write lock or - the same read lock. - */ - for (; - (*pos)->type <= TL_READ_NO_INSERT && - pos != data && - pos[-1]->lock == (*pos)->lock ; - pos--) ; - - read_lock = pos+1; - do - { - (last_lock->lock->copy_status)((*read_lock)->status_param, - (*pos)->status_param); - } while (*(read_lock++) != last_lock); - last_lock= (*pos); /* Point at last write lock */ - } - else - (*last_lock->lock->copy_status)((*pos)->status_param, - last_lock->status_param); - } - else - last_lock=(*pos); - } while (pos != data); + /* Different lock, use this as base for next lock */ + first_lock= pos; + } } -#endif DBUG_RETURN(THR_LOCK_SUCCESS); } - /* free all locks */ -void thr_multi_unlock(THR_LOCK_DATA **data,uint count) +/** + Merge two sets of locks. + + @param data All locks. First old locks, then new locks. + @param old_count Original number of locks. These are first in 'data'. + @param new_count How many new locks + + The merge is needed if the new locks contains same tables as the old + locks, in which case we have to ensure that same tables shares the + same status (as after a thr_multi_lock()). +*/ + +void thr_merge_locks(THR_LOCK_DATA **data, uint old_count, uint new_count) +{ + THR_LOCK_DATA **pos, **end, **first_lock= 0; + DBUG_ENTER("thr_merge_lock"); + + /* Remove marks on old locks to make them sort before new ones */ + for (pos=data, end= pos + old_count; pos < end ; pos++) + (*pos)->priority&= ~THR_LOCK_LATE_PRIV; + + /* Mark new locks with LATE_PRIV to make them sort after org ones */ + for (pos=data + old_count, end= pos + new_count; pos < end ; pos++) + (*pos)->priority|= THR_LOCK_LATE_PRIV; + + sort_locks(data, old_count + new_count); + + for (pos=data ; pos < end ; pos++) + { + /* Check if lock was unlocked before */ + if (pos[0]->type == TL_UNLOCK || ! pos[0]->lock->fix_status) + { + DBUG_PRINT("info", ("lock skipped. unlocked: %d fix_status: %d", + pos[0]->type == TL_UNLOCK, + pos[0]->lock->fix_status == 0)); + continue; + } + + /* + If same table as previous table use pointer to previous status + information to ensure that all read/write tables shares same + state. + */ + if (first_lock && pos[0]->lock == first_lock[0]->lock) + (pos[0]->lock->fix_status)((*first_lock)->status_param, + (*pos)->status_param); + else + { + /* Different lock, use this as base for next lock */ + first_lock= pos; + (pos[0]->lock->fix_status)((*first_lock)->status_param, 0); + } + } + DBUG_VOID_RETURN; +} + + +/* Unlock all locks */ + +void thr_multi_unlock(THR_LOCK_DATA **data,uint count, uint unlock_flags) { THR_LOCK_DATA **pos,**end; DBUG_ENTER("thr_multi_unlock"); - DBUG_PRINT("lock",("data: 0x%lx count: %d", (long) data, count)); + DBUG_PRINT("lock",("data: 0x%lx count: %d flags: %u", (long) data, count, + unlock_flags)); for (pos=data,end=data+count; pos < end ; pos++) { @@ -1084,7 +1169,7 @@ void thr_multi_unlock(THR_LOCK_DATA **data,uint count) fflush(stdout); #endif if ((*pos)->type != TL_UNLOCK) - thr_unlock(*pos); + thr_unlock(*pos, unlock_flags); else { DBUG_PRINT("lock",("Free lock: data: 0x%lx thread: 0x%lx lock: 0x%lx", @@ -1400,6 +1485,7 @@ my_bool thr_upgrade_write_delay_lock(THR_LOCK_DATA *data, enum thr_lock_type new_lock_type) { THR_LOCK *lock=data->lock; + enum enum_thr_lock_result res; DBUG_ENTER("thr_upgrade_write_delay_lock"); pthread_mutex_lock(&lock->mutex); @@ -1420,6 +1506,8 @@ my_bool thr_upgrade_write_delay_lock(THR_LOCK_DATA *data, if (lock->get_status) (*lock->get_status)(data->status_param, 0); pthread_mutex_unlock(&lock->mutex); + if (lock->start_trans) + (*lock->start_trans)(data->status_param); DBUG_RETURN(0); } @@ -1440,7 +1528,10 @@ my_bool thr_upgrade_write_delay_lock(THR_LOCK_DATA *data, { check_locks(lock,"waiting for lock",0); } - DBUG_RETURN(wait_for_lock(&lock->write_wait,data,1)); + res= wait_for_lock(&lock->write_wait,data,1); + if (res == THR_LOCK_SUCCESS && lock->start_trans) + DBUG_RETURN((*lock->start_trans)(data->status_param)); + DBUG_RETURN(0); } @@ -1684,7 +1775,7 @@ static void *test_thread(void *arg) } } pthread_mutex_unlock(&LOCK_thread_count); - thr_multi_unlock(multi_locks,lock_counts[param]); + thr_multi_unlock(multi_locks,lock_counts[param], THR_UNLOCK_UPDATE_STATUS); } printf("Thread %s (%d) ended\n",my_thread_name(),param); fflush(stdout); -- cgit v1.2.1 From 5b3159dbc797f7254b4850813c3bf3dfbbf66623 Mon Sep 17 00:00:00 2001 From: Michael Widenius Date: Wed, 3 Nov 2010 14:14:02 +0200 Subject: Fixed compiler & valgrind warnings from my previous push. Fixed a bug in Aria when two threads was inserting into the same table and row page and one thread did an abort becasue of duplicate key. mysys/thr_lock.c: Fixed valgrind warning sql/sql_base.cc: Remove not used variable storage/maria/ma_bitmap.c: Added ma_bitmap_lock() & ma_bitmap_unlock() to protect against two threads using the bitmap at the same time. More DBUG_PRINT() storage/maria/ma_blockrec.c: Fixed a bug in Aria when two threads was inserting into the same table and row page and one thread did an abort becasue of duplicate key. Fix was that we block other threads to modify the bitmap while we are removing the row with a duplicate key. storage/maria/ma_blockrec.h: Added ma_bitmap_lock() & ma_bitmap_unlock() to protect against two threads using the bitmap at the same time. storage/maria/maria_def.h: Changed flush_all_requested to be a counter. storage/myisam/mi_locking.c: Fixed compiler error on windows (typo). --- mysys/thr_lock.c | 1 + 1 file changed, 1 insertion(+) (limited to 'mysys') diff --git a/mysys/thr_lock.c b/mysys/thr_lock.c index 9c8236ae0e6..341b2f0058e 100644 --- a/mysys/thr_lock.c +++ b/mysys/thr_lock.c @@ -406,6 +406,7 @@ void thr_lock_data_init(THR_LOCK *lock,THR_LOCK_DATA *data, void *param) data->owner= 0; /* no owner yet */ data->status_param=param; data->cond=0; + data->priority= 0; } -- cgit v1.2.1