diff options
-rw-r--r-- | storage/innobase/CMakeLists.txt | 4 | ||||
-rw-r--r-- | storage/innobase/handler/ha_innodb.cc | 20 | ||||
-rw-r--r-- | storage/innobase/include/buf0flu.h | 13 | ||||
-rw-r--r-- | storage/innobase/include/srv0srv.h | 7 | ||||
-rw-r--r-- | storage/innobase/include/sync0sync.h | 6 | ||||
-rw-r--r-- | storage/innobase/include/trx0purge.h | 22 | ||||
-rw-r--r-- | storage/innobase/include/trx0rseg.h | 12 | ||||
-rw-r--r-- | storage/innobase/include/trx0sys.h | 10 | ||||
-rw-r--r-- | storage/innobase/include/trx0sys.ic | 12 | ||||
-rw-r--r-- | storage/innobase/include/ut0bh.ic | 123 | ||||
-rw-r--r-- | storage/innobase/srv/srv0srv.c | 22 | ||||
-rw-r--r-- | storage/innobase/sync/sync0sync.c | 14 | ||||
-rw-r--r-- | storage/innobase/trx/trx0purge.c | 399 | ||||
-rw-r--r-- | storage/innobase/trx/trx0rseg.c | 52 | ||||
-rw-r--r-- | storage/innobase/trx/trx0sys.c | 43 | ||||
-rw-r--r-- | storage/innobase/trx/trx0trx.c | 256 | ||||
-rw-r--r-- | storage/innobase/ut/ut0ut.c | 9 |
17 files changed, 652 insertions, 372 deletions
diff --git a/storage/innobase/CMakeLists.txt b/storage/innobase/CMakeLists.txt index 4bbda0d2477..d4b98f2af0d 100644 --- a/storage/innobase/CMakeLists.txt +++ b/storage/innobase/CMakeLists.txt @@ -1,4 +1,4 @@ -# Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2006, 2011, Oracle and/or its affiliates. All rights reserved. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -246,7 +246,7 @@ SET(INNOBASE_SOURCES btr/btr0btr.c btr/btr0cur.c btr/btr0pcur.c btr/btr0sea.c trx/trx0sys.c trx/trx0trx.c trx/trx0undo.c usr/usr0sess.c ut/ut0byte.c ut/ut0dbg.c ut/ut0list.c ut/ut0mem.c ut/ut0rbt.c ut/ut0rnd.c - ut/ut0ut.c ut/ut0vec.c ut/ut0wqueue.c) + ut/ut0ut.c ut/ut0vec.c ut/ut0wqueue.c ut/ut0bh.c) IF(WITH_INNODB) # Legacy option diff --git a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc index 8565809c09f..e4d041bf915 100644 --- a/storage/innobase/handler/ha_innodb.cc +++ b/storage/innobase/handler/ha_innodb.cc @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 2000, 2010, MySQL AB & Innobase Oy. All Rights Reserved. +Copyright (c) 2000, 2011, MySQL AB & Innobase Oy. All Rights Reserved. Copyright (c) 2008, 2009 Google Inc. Copyright (c) 2009, Percona Inc. @@ -262,7 +262,7 @@ static PSI_mutex_info all_innodb_mutexes[] = { # endif /* UNIV_MEM_DEBUG */ {&mem_pool_mutex_key, "mem_pool_mutex", 0}, {&mutex_list_mutex_key, "mutex_list_mutex", 0}, - {&purge_sys_mutex_key, "purge_sys_mutex", 0}, + {&purge_sys_bh_mutex_key, "purge_sys_bh_mutex", 0}, {&recv_sys_mutex_key, "recv_sys_mutex", 0}, {&rseg_mutex_key, "rseg_mutex", 0}, # ifdef UNIV_SYNC_DEBUG @@ -10971,16 +10971,23 @@ static MYSQL_SYSVAR_ULONG(io_capacity, srv_io_capacity, static MYSQL_SYSVAR_ULONG(purge_batch_size, srv_purge_batch_size, PLUGIN_VAR_OPCMDARG, - "Number of UNDO logs to purge in one batch from the history list. " - "Default is 20", + "Number of UNDO log pages to purge in one batch from the history list.", NULL, NULL, 20, /* Default setting */ 1, /* Minimum value */ - 5000, 0); /* Maximum value */ + 10000, 0); /* Maximum value */ + +static MYSQL_SYSVAR_ULONG(rollback_segments, srv_rollback_segments, + PLUGIN_VAR_OPCMDARG, + "Number of UNDO logs to use.", + NULL, NULL, + 128, /* Default setting */ + 1, /* Minimum value */ + TRX_SYS_N_RSEGS, 0); /* Maximum value */ static MYSQL_SYSVAR_ULONG(purge_threads, srv_n_purge_threads, PLUGIN_VAR_OPCMDARG | PLUGIN_VAR_READONLY, - "Purge threads can be either 0 or 1. Default is 0.", + "Purge threads can be either 0 or 1.", NULL, NULL, 0, /* Default setting */ 0, /* Minimum value */ @@ -11331,6 +11338,7 @@ static struct st_mysql_sys_var* innobase_system_variables[]= { MYSQL_SYSVAR(io_capacity), MYSQL_SYSVAR(purge_threads), MYSQL_SYSVAR(purge_batch_size), + MYSQL_SYSVAR(rollback_segments), NULL }; diff --git a/storage/innobase/include/buf0flu.h b/storage/innobase/include/buf0flu.h index 28d9b90e755..ae27f5dab0e 100644 --- a/storage/innobase/include/buf0flu.h +++ b/storage/innobase/include/buf0flu.h @@ -138,7 +138,18 @@ UNIV_INTERN void buf_flush_wait_batch_end( /*=====================*/ - buf_pool_t* buf_pool, /*!< buffer pool instance */ + buf_pool_t* buf_pool, /*!< in: buffer pool instance */ + enum buf_flush type); /*!< in: BUF_FLUSH_LRU + or BUF_FLUSH_LIST */ +/******************************************************************//** +Waits until a flush batch of the given type ends. This is called by +a thread that only wants to wait for a flush to end but doesn't do +any flushing itself. */ +UNIV_INTERN +void +buf_flush_wait_batch_end_wait_only( +/*===============================*/ + buf_pool_t* buf_pool, /*!< in: buffer pool instance */ enum buf_flush type); /*!< in: BUF_FLUSH_LRU or BUF_FLUSH_LIST */ /********************************************************************//** diff --git a/storage/innobase/include/srv0srv.h b/storage/innobase/include/srv0srv.h index d600ad4a034..252d1424c63 100644 --- a/storage/innobase/include/srv0srv.h +++ b/storage/innobase/include/srv0srv.h @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1995, 2010, Innobase Oy. All Rights Reserved. +Copyright (c) 1995, 2011, Innobase Oy. All Rights Reserved. Copyright (c) 2008, 2009, Google Inc. Copyright (c) 2009, Percona Inc. @@ -294,9 +294,12 @@ extern ulint srv_log_waits; /* the number of purge threads to use from the worker pool (currently 0 or 1) */ extern ulong srv_n_purge_threads; -/* the number of records to purge in one batch */ +/* the number of pages to purge in one batch */ extern ulong srv_purge_batch_size; +/* the number of rollback segments to use */ +extern ulong srv_rollback_segments; + /* variable that counts amount of data read in total (in bytes) */ extern ulint srv_data_read; diff --git a/storage/innobase/include/sync0sync.h b/storage/innobase/include/sync0sync.h index fb3a24c5ee5..b4f9e21933f 100644 --- a/storage/innobase/include/sync0sync.h +++ b/storage/innobase/include/sync0sync.h @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1995, 2010, Innobase Oy. All Rights Reserved. +Copyright (c) 1995, 2011, Innobase Oy. All Rights Reserved. Copyright (c) 2008, Google Inc. Portions of this file contain modifications contributed and copyrighted by @@ -93,7 +93,7 @@ extern mysql_pfs_key_t mem_hash_mutex_key; # endif /* UNIV_MEM_DEBUG */ extern mysql_pfs_key_t mem_pool_mutex_key; extern mysql_pfs_key_t mutex_list_mutex_key; -extern mysql_pfs_key_t purge_sys_mutex_key; +extern mysql_pfs_key_t purge_sys_bh_mutex_key; extern mysql_pfs_key_t recv_sys_mutex_key; extern mysql_pfs_key_t rseg_mutex_key; # ifdef UNIV_SYNC_DEBUG @@ -637,7 +637,6 @@ or row lock! */ #define SYNC_TREE_NODE_NEW 892 #define SYNC_TREE_NODE_FROM_HASH 891 #define SYNC_TREE_NODE 890 -#define SYNC_PURGE_SYS 810 #define SYNC_PURGE_LATCH 800 #define SYNC_TRX_UNDO 700 #define SYNC_RSEG 600 @@ -659,6 +658,7 @@ or row lock! */ #define SYNC_REC_LOCK 299 #define SYNC_TRX_LOCK_HEAP 298 #define SYNC_TRX_SYS_HEADER 290 +#define SYNC_PURGE_QUEUE 200 #define SYNC_LOG 170 #define SYNC_LOG_FLUSH_ORDER 147 #define SYNC_RECV 168 diff --git a/storage/innobase/include/trx0purge.h b/storage/innobase/include/trx0purge.h index d2730a68a78..0b83a76cab7 100644 --- a/storage/innobase/include/trx0purge.h +++ b/storage/innobase/include/trx0purge.h @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved. +Copyright (c) 1996, 2011, Innobase Oy. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -68,8 +68,9 @@ Creates the global purge system control structure and inits the history mutex. */ UNIV_INTERN void -trx_purge_sys_create(void); -/*======================*/ +trx_purge_sys_create( +/*=================*/ + ib_bh_t* ib_bh); /*!< in/own: UNDO log min binary heap*/ /********************************************************************//** Frees the global purge system control structure. */ UNIV_INTERN @@ -128,20 +129,20 @@ struct trx_purge_struct{ ulint state; /*!< Purge system state */ sess_t* sess; /*!< System session running the purge query */ - trx_t* trx; /*!< System transaction running the purge + trx_t* trx; /*!< System transaction running the + purge query: this trx is not in the trx list of the trx system and it never ends */ que_t* query; /*!< The query graph which will do the parallelized purge operation */ - rw_lock_t latch; /*!< The latch protecting the purge view. - A purge operation must acquire an - x-latch here for the instant at which + rw_lock_t latch; /*!< The latch protecting the purge + view. A purge operation must acquire + an x-latch here for the instant at which it changes the purge view: an undo log operation can prevent this by obtaining an s-latch here. */ read_view_t* view; /*!< The purge will not remove undo logs which are >= this view (purge view) */ - mutex_t mutex; /*!< Mutex protecting the fields below */ ulint n_pages_handled;/*!< Approximate number of undo log pages processed in purge */ ulint handle_limit; /*!< Target of how many pages to get @@ -179,6 +180,11 @@ struct trx_purge_struct{ mem_heap_t* heap; /*!< Temporary storage used during a purge: can be emptied after purge completes */ + /*-----------------------------*/ + ib_bh_t* ib_bh; /*!< Binary min-heap, ordered on + rseg_queue_t::trx_no. It is protected + by the bh_mutex */ + mutex_t bh_mutex; /*!< Mutex protecting ib_bh */ }; #define TRX_PURGE_ON 1 /* purge operation is running */ diff --git a/storage/innobase/include/trx0rseg.h b/storage/innobase/include/trx0rseg.h index a293d9b896e..5acde05de3d 100644 --- a/storage/innobase/include/trx0rseg.h +++ b/storage/innobase/include/trx0rseg.h @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1996, 2010, Innobase Oy. All Rights Reserved. +Copyright (c) 1996, 2011, Innobase Oy. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -113,7 +113,9 @@ void trx_rseg_list_and_array_init( /*=========================*/ trx_sysf_t* sys_header, /*!< in: trx system header */ + ib_bh_t* ib_bh, /*!< in: rseg queue */ mtr_t* mtr); /*!< in: mtr */ + /*************************************************************************** Free's an instance of the rollback segment in memory. */ UNIV_INTERN @@ -180,6 +182,14 @@ struct trx_rseg_struct{ memory objects */ }; +/** For prioritising the rollback segments for purge. */ +struct rseg_queue_struct { + trx_id_t trx_no; /*!< trx_rseg_t::last_trx_no */ + trx_rseg_t* rseg; /*!< Rollback segment */ +}; + +typedef struct rseg_queue_struct rseg_queue_t; + /* Undo log segment slot in a rollback segment header */ /*-------------------------------------------------------------*/ #define TRX_RSEG_SLOT_PAGE_NO 0 /* Page number of the header page of diff --git a/storage/innobase/include/trx0sys.h b/storage/innobase/include/trx0sys.h index 63e3f6be934..dc0ca2285b9 100644 --- a/storage/innobase/include/trx0sys.h +++ b/storage/innobase/include/trx0sys.h @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1996, 2010, Innobase Oy. All Rights Reserved. +Copyright (c) 1996, 2011, Innobase Oy. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -38,6 +38,7 @@ Created 3/26/1996 Heikki Tuuri #include "mem0mem.h" #include "sync0sync.h" #include "ut0lst.h" +#include "ut0bh.h" #include "read0types.h" #include "page0types.h" @@ -221,13 +222,6 @@ UNIV_INLINE trx_id_t trx_sys_get_new_trx_id(void); /*========================*/ -/*****************************************************************//** -Allocates a new transaction number. -@return new, allocated trx number */ -UNIV_INLINE -trx_id_t -trx_sys_get_new_trx_no(void); -/*========================*/ #endif /* !UNIV_HOTBACKUP */ /*****************************************************************//** Writes a trx id to an index page. In case that the id size changes in diff --git a/storage/innobase/include/trx0sys.ic b/storage/innobase/include/trx0sys.ic index 385c7f4f0cc..355f118a1ec 100644 --- a/storage/innobase/include/trx0sys.ic +++ b/storage/innobase/include/trx0sys.ic @@ -369,16 +369,4 @@ trx_sys_get_new_trx_id(void) return(id); } -/*****************************************************************//** -Allocates a new transaction number. -@return new, allocated trx number */ -UNIV_INLINE -trx_id_t -trx_sys_get_new_trx_no(void) -/*========================*/ -{ - ut_ad(mutex_own(&kernel_mutex)); - - return(trx_sys_get_new_trx_id()); -} #endif /* !UNIV_HOTBACKUP */ diff --git a/storage/innobase/include/ut0bh.ic b/storage/innobase/include/ut0bh.ic new file mode 100644 index 00000000000..10660b29559 --- /dev/null +++ b/storage/innobase/include/ut0bh.ic @@ -0,0 +1,123 @@ +/***************************************************************************//** +Copyright (c) 2011, Oracle Corpn. All Rights Reserved. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., 59 Temple +Place, Suite 330, Boston, MA 02111-1307 USA + +*****************************************************************************/ + +/******************************************************************//** +@file include/ut0bh.ic +Binary min-heap implementation. + +Created 2011-01-15 by Sunny Bains +*******************************************************/ + +#include "ut0bh.h" + +/**********************************************************************//** +Get the number of elements in the binary heap. +@return number of elements */ +UNIV_INLINE +ulint +ib_bh_size( +/*=======*/ + const ib_bh_t* ib_bh) /*!< in: instance */ +{ + return(ib_bh->n_elems); +} + +/**********************************************************************//** +Test if binary heap is empty. +@return TRUE if empty. */ +UNIV_INLINE +ibool +ib_bh_is_empty( +/*===========*/ + const ib_bh_t* ib_bh) /*!< in: instance */ +{ + return(ib_bh_size(ib_bh) == 0); +} + +/**********************************************************************//** +Test if binary heap is full. +@return TRUE if full. */ +UNIV_INLINE +ibool +ib_bh_is_full( +/*===========*/ + const ib_bh_t* ib_bh) /*!< in: instance */ +{ + return(ib_bh_size(ib_bh) >= ib_bh->max_elems); +} + +/**********************************************************************//** +Get a pointer to the element. +@return pointer to element */ +UNIV_INLINE +void* +ib_bh_get( +/*=======*/ + ib_bh_t* ib_bh, /*!< in: instance */ + ulint i) /*!< in: index */ +{ + byte* ptr = (byte*) (ib_bh + 1); + + ut_a(i < ib_bh_size(ib_bh)); + + return(ptr + (ib_bh->sizeof_elem * i)); +} + +/**********************************************************************//** +Copy an element to the binary heap. +@return pointer to copied element */ +UNIV_INLINE +void* +ib_bh_set( +/*======*/ + ib_bh_t* ib_bh, /*!< in/out: instance */ + ulint i, /*!< in: index */ + const void* elem) /*!< in: element to add */ +{ + void* ptr = ib_bh_get(ib_bh, i); + + ut_memcpy(ptr, elem, ib_bh->sizeof_elem); + + return(ptr); +} + +/**********************************************************************//** +Return the first element from the binary heap. +@return pointer to first element or NULL if empty. */ +UNIV_INLINE +void* +ib_bh_first( +/*========*/ + ib_bh_t* ib_bh) /*!< in: instance */ +{ + return(ib_bh_is_empty(ib_bh) ? NULL : ib_bh_get(ib_bh, 0)); +} + +/**********************************************************************//** +Return the last element from the binary heap. +@return pointer to last element or NULL if empty. */ +UNIV_INLINE +void* +ib_bh_last( +/*========*/ + ib_bh_t* ib_bh) /*!< in/out: instance */ +{ + return(ib_bh_is_empty(ib_bh) + ? NULL + : ib_bh_get(ib_bh, ib_bh_size(ib_bh) - 1)); +} + diff --git a/storage/innobase/srv/srv0srv.c b/storage/innobase/srv/srv0srv.c index 826145005a4..3c5eed448bd 100644 --- a/storage/innobase/srv/srv0srv.c +++ b/storage/innobase/srv/srv0srv.c @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1995, 2010, Innobase Oy. All Rights Reserved. +Copyright (c) 1995, 2011, Oracle and/or its affiliates. All Rights Reserved. Copyright (c) 2008, 2009 Google Inc. Copyright (c) 2009, Percona Inc. @@ -269,9 +269,12 @@ UNIV_INTERN ulong srv_max_buf_pool_modified_pct = 75; /* the number of purge threads to use from the worker pool (currently 0 or 1).*/ UNIV_INTERN ulong srv_n_purge_threads = 0; -/* the number of records to purge in one batch */ +/* the number of pages to purge in one batch */ UNIV_INTERN ulong srv_purge_batch_size = 20; +/* the number of rollback segments to use */ +UNIV_INTERN ulong srv_rollback_segments = TRX_SYS_N_RSEGS; + /* variable counts amount of data read in total (in bytes) */ UNIV_INTERN ulint srv_data_read = 0; @@ -3096,6 +3099,7 @@ srv_purge_thread( required by os_thread_create */ { srv_slot_t* slot; + ulint retries = 0; ulint slot_no = ULINT_UNDEFINED; ulint n_total_purged = ULINT_UNDEFINED; @@ -3122,7 +3126,7 @@ srv_purge_thread( while (srv_shutdown_state != SRV_SHUTDOWN_EXIT_THREADS) { - ulint n_pages_purged; + ulint n_pages_purged = 0; /* If there are very few records to purge or the last purge didn't purge any records then wait for activity. @@ -3130,7 +3134,8 @@ srv_purge_thread( because in the worst case we will end up waiting for the next purge event. */ if (trx_sys->rseg_history_len < srv_purge_batch_size - || n_total_purged == 0) { + || (n_total_purged == 0 + && retries >= TRX_SYS_N_RSEGS)) { os_event_t event; @@ -3141,6 +3146,8 @@ srv_purge_thread( mutex_exit(&kernel_mutex); os_event_wait(event); + + retries = 0; } /* Check for shutdown and whether we should do purge at all. */ @@ -3151,7 +3158,12 @@ srv_purge_thread( break; } - n_total_purged = 0; + if (n_total_purged == 0 && retries <= TRX_SYS_N_RSEGS) { + ++retries; + } else if (n_total_purged > 0) { + retries = 0; + n_total_purged = 0; + } /* Purge until there are no more records to purge and there is no change in configuration or server state. */ diff --git a/storage/innobase/sync/sync0sync.c b/storage/innobase/sync/sync0sync.c index 453314f465d..0f6a60ca260 100644 --- a/storage/innobase/sync/sync0sync.c +++ b/storage/innobase/sync/sync0sync.c @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1995, 2010, Innobase Oy. All Rights Reserved. +Copyright (c) 1995, 2011, Innobase Oy. All Rights Reserved. Copyright (c) 2008, Google Inc. Portions of this file contain modifications contributed and copyrighted by @@ -1172,7 +1172,7 @@ sync_thread_add_level( case SYNC_RSEG: case SYNC_TRX_UNDO: case SYNC_PURGE_LATCH: - case SYNC_PURGE_SYS: + case SYNC_PURGE_QUEUE: case SYNC_DICT_AUTOINC_MUTEX: case SYNC_DICT_OPERATION: case SYNC_DICT_HEADER: @@ -1239,10 +1239,16 @@ sync_thread_add_level( || sync_thread_levels_g(array, SYNC_FSP, TRUE)); break; case SYNC_TRX_UNDO_PAGE: + /* Purge is allowed to read in as many UNDO pages as it likes, + there was a bogus rule here earlier that forced the caller to + acquire the purge_sys_t::mutex. The purge mutex did not really + protect anything because it was only ever acquired by the + single purge thread. The purge thread can read the UNDO pages + without any covering mutex. */ + ut_a(sync_thread_levels_contain(array, SYNC_TRX_UNDO) || sync_thread_levels_contain(array, SYNC_RSEG) - || sync_thread_levels_contain(array, SYNC_PURGE_SYS) - || sync_thread_levels_g(array, SYNC_TRX_UNDO_PAGE, TRUE)); + || sync_thread_levels_g(array, level - 1, TRUE)); break; case SYNC_RSEG_HEADER: ut_a(sync_thread_levels_contain(array, SYNC_RSEG)); diff --git a/storage/innobase/trx/trx0purge.c b/storage/innobase/trx/trx0purge.c index 4c787579a03..02ec9f1c072 100644 --- a/storage/innobase/trx/trx0purge.c +++ b/storage/innobase/trx/trx0purge.c @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved. +Copyright (c) 1996, 2011, Innobase Oy. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -57,8 +57,8 @@ UNIV_INTERN mysql_pfs_key_t trx_purge_latch_key; #endif /* UNIV_PFS_RWLOCK */ #ifdef UNIV_PFS_MUTEX -/* Key to register purge_sys_mutex with performance schema */ -UNIV_INTERN mysql_pfs_key_t purge_sys_mutex_key; +/* Key to register purge_sys_bh_mutex with performance schema */ +UNIV_INTERN mysql_pfs_key_t purge_sys_bh_mutex_key; #endif /* UNIV_PFS_MUTEX */ /*****************************************************************//** @@ -219,13 +219,16 @@ Creates the global purge system control structure and inits the history mutex. */ UNIV_INTERN void -trx_purge_sys_create(void) -/*======================*/ +trx_purge_sys_create( +/*=================*/ + ib_bh_t* ib_bh) /*!< in, own: UNDO log min binary heap */ { ut_ad(mutex_own(&kernel_mutex)); - purge_sys = mem_alloc(sizeof(trx_purge_t)); + purge_sys = mem_zalloc(sizeof(trx_purge_t)); + /* Take ownership of ib_bh, we are responsible for freeing it. */ + purge_sys->ib_bh = ib_bh; purge_sys->state = TRX_STOP_PURGE; purge_sys->n_pages_handled = 0; @@ -237,8 +240,9 @@ trx_purge_sys_create(void) rw_lock_create(trx_purge_latch_key, &purge_sys->latch, SYNC_PURGE_LATCH); - mutex_create(purge_sys_mutex_key, - &purge_sys->mutex, SYNC_PURGE_SYS); + mutex_create( + purge_sys_bh_mutex_key, &purge_sys->bh_mutex, + SYNC_PURGE_QUEUE); purge_sys->heap = mem_heap_create(256); @@ -288,9 +292,12 @@ trx_purge_sys_close(void) trx_undo_arr_free(purge_sys->arr); rw_lock_free(&purge_sys->latch); - mutex_free(&purge_sys->mutex); + mutex_free(&purge_sys->bh_mutex); mem_heap_free(purge_sys->heap); + + ib_bh_free(purge_sys->ib_bh); + mem_free(purge_sys); purge_sys = NULL; @@ -311,34 +318,31 @@ trx_purge_add_update_undo_to_history( mtr_t* mtr) /*!< in: mtr */ { trx_undo_t* undo; - trx_rseg_t* rseg; trx_rsegf_t* rseg_header; -#ifdef UNIV_DEBUG - trx_usegf_t* seg_header; -#endif /* UNIV_DEBUG */ trx_ulogf_t* undo_header; - ulint hist_size; undo = trx->update_undo; ut_ad(undo); - rseg = undo->rseg; + ut_ad(mutex_own(&undo->rseg->mutex)); - ut_ad(mutex_own(&(rseg->mutex))); - - rseg_header = trx_rsegf_get(rseg->space, rseg->zip_size, - rseg->page_no, mtr); + rseg_header = trx_rsegf_get( + undo->rseg->space, undo->rseg->zip_size, undo->rseg->page_no, + mtr); undo_header = undo_page + undo->hdr_offset; + /* Add the log as the first in the history list */ + + if (undo->state != TRX_UNDO_CACHED) { + ulint hist_size; #ifdef UNIV_DEBUG - seg_header = undo_page + TRX_UNDO_SEG_HDR; + trx_usegf_t* seg_header = undo_page + TRX_UNDO_SEG_HDR; #endif /* UNIV_DEBUG */ - if (undo->state != TRX_UNDO_CACHED) { /* The undo log segment will not be reused */ - if (undo->id >= TRX_RSEG_N_SLOTS) { + if (UNIV_UNLIKELY(undo->id >= TRX_RSEG_N_SLOTS)) { fprintf(stderr, "InnoDB: Error: undo->id is %lu\n", (ulong) undo->id); @@ -347,42 +351,50 @@ trx_purge_add_update_undo_to_history( trx_rsegf_set_nth_undo(rseg_header, undo->id, FIL_NULL, mtr); - hist_size = mtr_read_ulint(rseg_header + TRX_RSEG_HISTORY_SIZE, - MLOG_4BYTES, mtr); + hist_size = mtr_read_ulint( + rseg_header + TRX_RSEG_HISTORY_SIZE, MLOG_4BYTES, mtr); + ut_ad(undo->size == flst_get_len( seg_header + TRX_UNDO_PAGE_LIST, mtr)); - mlog_write_ulint(rseg_header + TRX_RSEG_HISTORY_SIZE, - hist_size + undo->size, MLOG_4BYTES, mtr); + mlog_write_ulint( + rseg_header + TRX_RSEG_HISTORY_SIZE, + hist_size + undo->size, MLOG_4BYTES, mtr); } - /* Add the log as the first in the history list */ - flst_add_first(rseg_header + TRX_RSEG_HISTORY, - undo_header + TRX_UNDO_HISTORY_NODE, mtr); - mutex_enter(&kernel_mutex); - trx_sys->rseg_history_len++; - mutex_exit(&kernel_mutex); - - if (!(trx_sys->rseg_history_len % srv_purge_batch_size)) { - /* Inform the purge thread that there is work to do. */ - srv_wake_purge_thread_if_not_active(); - } + flst_add_first( + rseg_header + TRX_RSEG_HISTORY, + undo_header + TRX_UNDO_HISTORY_NODE, mtr); /* Write the trx number to the undo log header */ + mlog_write_ull(undo_header + TRX_UNDO_TRX_NO, trx->no, mtr); + /* Write information about delete markings to the undo log header */ if (!undo->del_marks) { - mlog_write_ulint(undo_header + TRX_UNDO_DEL_MARKS, FALSE, - MLOG_2BYTES, mtr); + mlog_write_ulint( + undo_header + TRX_UNDO_DEL_MARKS, FALSE, + MLOG_2BYTES, mtr); } - if (rseg->last_page_no == FIL_NULL) { + if (undo->rseg->last_page_no == FIL_NULL) { + undo->rseg->last_trx_no = trx->no; + undo->rseg->last_offset = undo->hdr_offset; + undo->rseg->last_page_no = undo->hdr_page_no; + undo->rseg->last_del_marks = undo->del_marks; - rseg->last_page_no = undo->hdr_page_no; - rseg->last_offset = undo->hdr_offset; - rseg->last_trx_no = trx->no; - rseg->last_del_marks = undo->del_marks; + /* FIXME: Add a bin heap validate function to check that + the rseg exists. */ + } + + mutex_enter(&kernel_mutex); + trx_sys->rseg_history_len++; + mutex_exit(&kernel_mutex); + + if (!(trx_sys->rseg_history_len % srv_purge_batch_size)) { + /* Inform the purge thread that there is work to do. */ + srv_wake_purge_thread_if_not_active(); } } @@ -411,7 +423,6 @@ trx_purge_free_segment( /* fputs("Freeing an update undo log segment\n", stderr); */ - ut_ad(mutex_own(&(purge_sys->mutex))); loop: mtr_start(&mtr); mutex_enter(&(rseg->mutex)); @@ -515,8 +526,6 @@ trx_purge_truncate_rseg_history( mtr_t mtr; trx_id_t undo_trx_no; - ut_ad(mutex_own(&(purge_sys->mutex))); - mtr_start(&mtr); mutex_enter(&(rseg->mutex)); @@ -609,10 +618,8 @@ trx_purge_truncate_history(void) trx_id_t limit_trx_no; undo_no_t limit_undo_no; - ut_ad(mutex_own(&(purge_sys->mutex))); - - trx_purge_arr_get_biggest(purge_sys->arr, &limit_trx_no, - &limit_undo_no); + trx_purge_arr_get_biggest( + purge_sys->arr, &limit_trx_no, &limit_undo_no); if (limit_trx_no == 0) { @@ -630,34 +637,29 @@ trx_purge_truncate_history(void) ut_ad(limit_trx_no <= purge_sys->view->low_limit_no); - rseg = UT_LIST_GET_FIRST(trx_sys->rseg_list); + for (rseg = UT_LIST_GET_FIRST(trx_sys->rseg_list); + rseg != NULL; + rseg = UT_LIST_GET_NEXT(rseg_list, rseg)) { - while (rseg) { - trx_purge_truncate_rseg_history(rseg, limit_trx_no, - limit_undo_no); - rseg = UT_LIST_GET_NEXT(rseg_list, rseg); + trx_purge_truncate_rseg_history( + rseg, limit_trx_no, limit_undo_no); } } /********************************************************************//** Does a truncate if the purge array is empty. NOTE that when this function is -called, the caller must not have any latches on undo log pages! -@return TRUE if array empty */ +called, the caller must not have any latches on undo log pages! */ UNIV_INLINE -ibool +void trx_purge_truncate_if_arr_empty(void) /*=================================*/ { - ut_ad(mutex_own(&(purge_sys->mutex))); + static ulint count; - if (purge_sys->arr->n_used == 0) { + if (!(++count % TRX_SYS_N_RSEGS) && purge_sys->arr->n_used == 0) { trx_purge_truncate_history(); - - return(TRUE); } - - return(FALSE); } /***********************************************************************//** @@ -675,8 +677,8 @@ trx_purge_rseg_get_next_history_log( trx_id_t trx_no; ibool del_marks; mtr_t mtr; - - ut_ad(mutex_own(&(purge_sys->mutex))); + rseg_queue_t rseg_queue; + const void* ptr; mutex_enter(&(rseg->mutex)); @@ -688,8 +690,9 @@ trx_purge_rseg_get_next_history_log( mtr_start(&mtr); - undo_page = trx_undo_page_get_s_latched(rseg->space, rseg->zip_size, - rseg->last_page_no, &mtr); + undo_page = trx_undo_page_get_s_latched( + rseg->space, rseg->zip_size, rseg->last_page_no, &mtr); + log_hdr = undo_page + rseg->last_offset; /* Increase the purge page count by one for every handled log */ @@ -698,6 +701,7 @@ trx_purge_rseg_get_next_history_log( prev_log_addr = trx_purge_get_log_from_hist( flst_get_prev_addr(log_hdr + TRX_UNDO_HISTORY_NODE, &mtr)); + if (prev_log_addr.page == FIL_NULL) { /* No logs left in the history list */ @@ -712,11 +716,11 @@ trx_purge_rseg_get_next_history_log( on the MySQL mailing list on Nov 9, 2004. The fut0lst.c file-based list was corrupt. The prev node pointer was FIL_NULL, even though the list length was over 8 million nodes! - We assume that purge truncates the history list in moderate + We assume that purge truncates the history list in large size pieces, and if we here reach the head of the list, the - list cannot be longer than 20 000 undo logs now. */ + list cannot be longer than 2000 000 undo logs now. */ - if (trx_sys->rseg_history_len > 20000) { + if (trx_sys->rseg_history_len > 2000000) { ut_print_timestamp(stderr); fprintf(stderr, " InnoDB: Warning: purge reached the" @@ -756,105 +760,150 @@ trx_purge_rseg_get_next_history_log( rseg->last_trx_no = trx_no; rseg->last_del_marks = del_marks; + rseg_queue.rseg = rseg; + rseg_queue.trx_no = rseg->last_trx_no; + + /* Purge can also produce events, however these are already ordered + in the rollback segment and any user generated event will be greater + than the events that Purge produces. ie. Purge can never produce + events from an empty rollback segment. */ + + mutex_enter(&purge_sys->bh_mutex); + + ptr = ib_bh_push(purge_sys->ib_bh, &rseg_queue); + ut_a(ptr != NULL); + + mutex_exit(&purge_sys->bh_mutex); + mutex_exit(&(rseg->mutex)); } /***********************************************************************//** -Chooses the next undo log to purge and updates the info in purge_sys. This -function is used to initialize purge_sys when the next record to purge is -not known, and also to update the purge system info on the next record when -purge has handled the whole undo log for a transaction. */ +Chooses the rollback segment with the smallest trx_id. +@return zip_size if log is for a compressed table, ULINT_UNDEFINED if + no rollback segments to purge, 0 for non compressed tables. */ static -void -trx_purge_choose_next_log(void) -/*===========================*/ +ulint +trx_purge_get_rseg_with_min_trx_id( +/*===============================*/ + trx_purge_t* purge_sys) /*!< in/out: purge instance */ + { - trx_undo_rec_t* rec; - trx_rseg_t* rseg; - trx_rseg_t* min_rseg; - trx_id_t min_trx_no; - ulint space = 0; /* remove warning (??? bug ???) */ ulint zip_size = 0; - ulint page_no = 0; /* remove warning (??? bug ???) */ - ulint offset = 0; /* remove warning (??? bug ???) */ - mtr_t mtr; - ut_ad(mutex_own(&(purge_sys->mutex))); - ut_ad(purge_sys->next_stored == FALSE); + mutex_enter(&purge_sys->bh_mutex); - rseg = UT_LIST_GET_FIRST(trx_sys->rseg_list); + /* Only purge consumes events from the binary heap, user + threads only produce the events. */ - min_trx_no = IB_ULONGLONG_MAX; + if (!ib_bh_is_empty(purge_sys->ib_bh)) { + trx_rseg_t* rseg; - min_rseg = NULL; + rseg = ((rseg_queue_t*) ib_bh_first(purge_sys->ib_bh))->rseg; + ib_bh_pop(purge_sys->ib_bh); - while (rseg) { - mutex_enter(&(rseg->mutex)); + mutex_exit(&purge_sys->bh_mutex); - if (rseg->last_page_no != FIL_NULL) { + purge_sys->rseg = rseg; + } else { + mutex_exit(&purge_sys->bh_mutex); - if (min_rseg == NULL - || min_trx_no > rseg->last_trx_no) { + purge_sys->rseg = NULL; - min_rseg = rseg; - min_trx_no = rseg->last_trx_no; - space = rseg->space; - zip_size = rseg->zip_size; - ut_a(space == 0); /* We assume in purge of - externally stored fields - that space id == 0 */ - page_no = rseg->last_page_no; - offset = rseg->last_offset; - } - } + return(ULINT_UNDEFINED); + } - mutex_exit(&(rseg->mutex)); + ut_a(purge_sys->rseg != NULL); - rseg = UT_LIST_GET_NEXT(rseg_list, rseg); - } + mutex_enter(&purge_sys->rseg->mutex); - if (min_rseg == NULL) { + ut_a(purge_sys->rseg->last_page_no != FIL_NULL); - return; - } + /* We assume in purge of externally stored fields + that space id == 0 */ + ut_a(purge_sys->rseg->space == 0); - mtr_start(&mtr); + zip_size = purge_sys->rseg->zip_size; - if (!min_rseg->last_del_marks) { - /* No need to purge this log */ + ut_a(purge_sys->purge_trx_no <= purge_sys->rseg->last_trx_no); - rec = &trx_purge_dummy_rec; - } else { - rec = trx_undo_get_first_rec(space, zip_size, page_no, offset, - RW_S_LATCH, &mtr); - if (rec == NULL) { - /* Undo log empty */ + purge_sys->purge_trx_no = purge_sys->rseg->last_trx_no; + + purge_sys->hdr_offset = purge_sys->rseg->last_offset; + + purge_sys->hdr_page_no = purge_sys->rseg->last_page_no; + + mutex_exit(&purge_sys->rseg->mutex); + + return(zip_size); +} + +/***********************************************************************//** +Position the purge sys "iterator" on the undo record to use for purging. */ +static +void +trx_purge_read_undo_rec( +/*====================*/ + trx_purge_t* purge_sys, /*!< in/out: purge instance */ + ulint zip_size) /*!< in: block size or 0 */ +{ + ulint page_no; + ulint offset = 0; + ib_uint64_t undo_no = 0; + + purge_sys->hdr_offset = purge_sys->rseg->last_offset; + page_no = purge_sys->hdr_page_no = purge_sys->rseg->last_page_no; + + if (purge_sys->rseg->last_del_marks) { + mtr_t mtr; + trx_undo_rec_t* undo_rec; - rec = &trx_purge_dummy_rec; + mtr_start(&mtr); + + undo_rec = trx_undo_get_first_rec( + 0 /* System space id */, zip_size, + purge_sys->hdr_page_no, + purge_sys->hdr_offset, RW_S_LATCH, &mtr); + + if (undo_rec != NULL) { + offset = page_offset(undo_rec); + undo_no = trx_undo_rec_get_undo_no(undo_rec); + page_no = page_get_page_no(page_align(undo_rec)); } + + mtr_commit(&mtr); } + purge_sys->offset = offset; + purge_sys->page_no = page_no; + purge_sys->purge_undo_no = undo_no; + purge_sys->next_stored = TRUE; - purge_sys->rseg = min_rseg; +} - purge_sys->hdr_page_no = page_no; - purge_sys->hdr_offset = offset; +/***********************************************************************//** +Chooses the next undo log to purge and updates the info in purge_sys. This +function is used to initialize purge_sys when the next record to purge is +not known, and also to update the purge system info on the next record when +purge has handled the whole undo log for a transaction. */ +static +void +trx_purge_choose_next_log(void) +/*===========================*/ +{ + ulint zip_size; - purge_sys->purge_trx_no = min_trx_no; + ut_ad(purge_sys->next_stored == FALSE); - if (rec == &trx_purge_dummy_rec) { + zip_size = trx_purge_get_rseg_with_min_trx_id(purge_sys); - purge_sys->purge_undo_no = 0; - purge_sys->page_no = page_no; - purge_sys->offset = 0; - } else { - purge_sys->purge_undo_no = trx_undo_rec_get_undo_no(rec); + if (purge_sys->rseg != NULL) { - purge_sys->page_no = page_get_page_no(page_align(rec)); - purge_sys->offset = page_offset(rec); + trx_purge_read_undo_rec(purge_sys, zip_size); + } else { + /* There is nothing to do yet. */ + os_thread_yield(); } - - mtr_commit(&mtr); } /***********************************************************************//** @@ -880,7 +929,6 @@ trx_purge_get_next_rec( ulint cmpl_info; mtr_t mtr; - ut_ad(mutex_own(&(purge_sys->mutex))); ut_ad(purge_sys->next_stored); space = purge_sys->rseg->space; @@ -903,8 +951,8 @@ trx_purge_get_next_rec( mtr_start(&mtr); - undo_page = trx_undo_page_get_s_latched(space, zip_size, - page_no, &mtr); + undo_page = trx_undo_page_get_s_latched(space, zip_size, page_no, &mtr); + rec = undo_page + offset; rec2 = rec; @@ -913,9 +961,9 @@ trx_purge_get_next_rec( /* Try first to find the next record which requires a purge operation from the same page of the same undo log */ - next_rec = trx_undo_page_get_next_rec(rec2, - purge_sys->hdr_page_no, - purge_sys->hdr_offset); + next_rec = trx_undo_page_get_next_rec( + rec2, purge_sys->hdr_page_no, purge_sys->hdr_offset); + if (next_rec == NULL) { rec2 = trx_undo_get_next_rec( rec2, purge_sys->hdr_page_no, @@ -995,17 +1043,12 @@ trx_purge_fetch_next_rec( { trx_undo_rec_t* undo_rec; - mutex_enter(&(purge_sys->mutex)); if (purge_sys->state == TRX_STOP_PURGE) { trx_purge_truncate_if_arr_empty(); - mutex_exit(&(purge_sys->mutex)); - return(NULL); - } - - if (!purge_sys->next_stored) { + } else if (!purge_sys->next_stored) { trx_purge_choose_next_log(); if (!purge_sys->next_stored) { @@ -1020,8 +1063,6 @@ trx_purge_fetch_next_rec( (ulong) purge_sys->n_pages_handled); } - mutex_exit(&(purge_sys->mutex)); - return(NULL); } } @@ -1032,18 +1073,12 @@ trx_purge_fetch_next_rec( trx_purge_truncate_if_arr_empty(); - mutex_exit(&(purge_sys->mutex)); - return(NULL); - } - - if (purge_sys->purge_trx_no >= purge_sys->view->low_limit_no) { + } else if (purge_sys->purge_trx_no >= purge_sys->view->low_limit_no) { purge_sys->state = TRX_STOP_PURGE; trx_purge_truncate_if_arr_empty(); - mutex_exit(&(purge_sys->mutex)); - return(NULL); } @@ -1052,12 +1087,13 @@ trx_purge_fetch_next_rec( (ullint) purge_sys->purge_trx_no, (ullint) purge_sys->purge_undo_no); */ - *roll_ptr = trx_undo_build_roll_ptr(FALSE, (purge_sys->rseg)->id, - purge_sys->page_no, - purge_sys->offset); - *cell = trx_purge_arr_store_info(purge_sys->purge_trx_no, - purge_sys->purge_undo_no); + *roll_ptr = trx_undo_build_roll_ptr( + FALSE, (purge_sys->rseg)->id, purge_sys->page_no, + purge_sys->offset); + + *cell = trx_purge_arr_store_info( + purge_sys->purge_trx_no, purge_sys->purge_undo_no); ut_ad(purge_sys->purge_trx_no < purge_sys->view->low_limit_no); @@ -1066,8 +1102,6 @@ trx_purge_fetch_next_rec( undo_rec = trx_purge_get_next_rec(heap); - mutex_exit(&(purge_sys->mutex)); - return(undo_rec); } @@ -1079,11 +1113,7 @@ trx_purge_rec_release( /*==================*/ trx_undo_inf_t* cell) /*!< in: storage cell */ { - mutex_enter(&(purge_sys->mutex)); - trx_purge_arr_remove_info(cell); - - mutex_exit(&(purge_sys->mutex)); } /*******************************************************************//** @@ -1097,23 +1127,11 @@ trx_purge( purge in one batch */ { que_thr_t* thr; - /* que_thr_t* thr2; */ ulint old_pages_handled; - mutex_enter(&(purge_sys->mutex)); - - if (purge_sys->trx->n_active_thrs > 0) { + ut_a(purge_sys->trx->n_active_thrs == 0); - mutex_exit(&(purge_sys->mutex)); - - /* Should not happen */ - - ut_error; - - return(0); - } - - rw_lock_x_lock(&(purge_sys->latch)); + rw_lock_x_lock(&purge_sys->latch); mutex_enter(&kernel_mutex); @@ -1147,8 +1165,9 @@ trx_purge( } } - purge_sys->view = read_view_oldest_copy_or_open_new(0, - purge_sys->heap); + purge_sys->view = read_view_oldest_copy_or_open_new( + 0, purge_sys->heap); + mutex_exit(&kernel_mutex); rw_lock_x_unlock(&(purge_sys->latch)); @@ -1159,7 +1178,6 @@ trx_purge( old_pages_handled = purge_sys->n_pages_handled; - mutex_exit(&(purge_sys->mutex)); mutex_enter(&kernel_mutex); @@ -1167,15 +1185,8 @@ trx_purge( ut_ad(thr); - /* thr2 = que_fork_start_command(purge_sys->query); - - ut_ad(thr2); */ - - mutex_exit(&kernel_mutex); - /* srv_que_task_enqueue(thr2); */ - if (srv_print_thread_releases) { fputs("Starting purge\n", stderr); diff --git a/storage/innobase/trx/trx0rseg.c b/storage/innobase/trx/trx0rseg.c index 740320f68c1..85beac8afbc 100644 --- a/storage/innobase/trx/trx0rseg.c +++ b/storage/innobase/trx/trx0rseg.c @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1996, 2010, Innobase Oy. All Rights Reserved. +Copyright (c) 1996, 2011, Oracle Corpn. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -50,11 +50,11 @@ trx_rseg_get_on_id( { trx_rseg_t* rseg; - rseg = UT_LIST_GET_FIRST(trx_sys->rseg_list); + ut_a(id < TRX_SYS_N_RSEGS); - while (rseg && rseg->id != id) { - rseg = UT_LIST_GET_NEXT(rseg_list, rseg); - } + rseg = trx_sys->rseg_array[id]; + + ut_a(rseg == NULL || id == rseg->id); return(rseg); } @@ -181,12 +181,15 @@ static trx_rseg_t* trx_rseg_mem_create( /*================*/ - ulint id, /*!< in: rollback segment id */ - ulint space, /*!< in: space where the segment placed */ - ulint zip_size, /*!< in: compressed page size in bytes - or 0 for uncompressed pages */ - ulint page_no, /*!< in: page number of the segment header */ - mtr_t* mtr) /*!< in: mtr */ + ulint id, /*!< in: rollback segment id */ + ulint space, /*!< in: space where the segment + placed */ + ulint zip_size, /*!< in: compressed page size in bytes + or 0 for uncompressed pages */ + ulint page_no, /*!< in: page number of the segment + header */ + ib_bh_t* ib_bh, /*!< in/out: rseg queue */ + mtr_t* mtr) /*!< in: mtr */ { ulint len; trx_rseg_t* rseg; @@ -225,6 +228,9 @@ trx_rseg_mem_create( len = flst_get_len(rseg_header + TRX_RSEG_HISTORY, mtr); if (len > 0) { + const void* ptr; + rseg_queue_t rseg_queue; + trx_sys->rseg_history_len += len; node_addr = trx_purge_get_log_from_hist( @@ -240,6 +246,17 @@ trx_rseg_mem_create( undo_log_hdr + TRX_UNDO_TRX_NO); rseg->last_del_marks = mtr_read_ulint( undo_log_hdr + TRX_UNDO_DEL_MARKS, MLOG_2BYTES, mtr); + + rseg_queue.rseg = rseg; + rseg_queue.trx_no = rseg->last_trx_no; + + if (rseg->last_page_no != FIL_NULL) { + /* There is no need to cover this operation by the purge + mutex because we are still bootstrapping. */ + + ptr = ib_bh_push(ib_bh, &rseg_queue); + ut_a(ptr != NULL); + } } else { rseg->last_page_no = FIL_NULL; } @@ -255,6 +272,7 @@ void trx_rseg_create_instance( /*=====================*/ trx_sysf_t* sys_header, /*!< in: trx system header */ + ib_bh_t* ib_bh, /*!< in/out: rseg queue */ mtr_t* mtr) /*!< in: mtr */ { ulint i; @@ -278,7 +296,7 @@ trx_rseg_create_instance( zip_size = space ? fil_space_get_zip_size(space) : 0; rseg = trx_rseg_mem_create( - i, space, zip_size, page_no, mtr); + i, space, zip_size, page_no, ib_bh, mtr); ut_a(rseg->id == i); } @@ -327,7 +345,8 @@ trx_rseg_create(void) zip_size = space ? fil_space_get_zip_size(space) : 0; rseg = trx_rseg_mem_create( - slot_no, space, zip_size, page_no, &mtr); + slot_no, space, zip_size, page_no, + purge_sys->ib_bh, &mtr); } mutex_exit(&kernel_mutex); @@ -342,13 +361,14 @@ UNIV_INTERN void trx_rseg_list_and_array_init( /*=========================*/ - trx_sysf_t* sys_header, /* in: trx system header */ - mtr_t* mtr) /* in: mtr */ + trx_sysf_t* sys_header, /*!< in: trx system header */ + ib_bh_t* ib_bh, /*!< in: rseg queue */ + mtr_t* mtr) /*!< in: mtr */ { UT_LIST_INIT(trx_sys->rseg_list); trx_sys->rseg_history_len = 0; - trx_rseg_create_instance(sys_header, mtr); + trx_rseg_create_instance(sys_header, ib_bh, mtr); } diff --git a/storage/innobase/trx/trx0sys.c b/storage/innobase/trx/trx0sys.c index 101f225a06f..7af9fbb1af8 100644 --- a/storage/innobase/trx/trx0sys.c +++ b/storage/innobase/trx/trx0sys.c @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1996, 2010, Innobase Oy. All Rights Reserved. +Copyright (c) 1996, 2011, Innobase Oy. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -946,6 +946,31 @@ trx_sysf_create( } /*****************************************************************//** +Compare two trx_rseg_t instances on last_trx_no. */ +static +int +trx_rseg_compare_last_trx_no( +/*=========================*/ + const void* p1, /*!< in: elem to compare */ + const void* p2) /*!< in: elem to compare */ +{ + ib_int64_t cmp; + + const rseg_queue_t* rseg_q1 = (const rseg_queue_t*) p1; + const rseg_queue_t* rseg_q2 = (const rseg_queue_t*) p2; + + cmp = rseg_q1->trx_no - rseg_q2->trx_no; + + if (cmp < 0) { + return(-1); + } else if (cmp > 0) { + return(1); + } + + return(0); +} + +/*****************************************************************//** Creates and initializes the central memory structures for the transaction system. This is called when the database is started. */ UNIV_INTERN @@ -958,6 +983,7 @@ trx_sys_init_at_db_start(void) const char* unit = ""; trx_t* trx; mtr_t mtr; + ib_bh_t* ib_bh; mtr_start(&mtr); @@ -965,11 +991,19 @@ trx_sys_init_at_db_start(void) mutex_enter(&kernel_mutex); - trx_sys = mem_alloc(sizeof(trx_sys_t)); + /* We create the min binary heap here and pass ownership to + purge when we init the purge sub-system. Purge is responsible + for freeing the binary heap. */ + + ib_bh = ib_bh_create( + trx_rseg_compare_last_trx_no, + sizeof(rseg_queue_t), TRX_SYS_N_RSEGS); + + trx_sys = mem_zalloc(sizeof(*trx_sys)); sys_header = trx_sysf_get(&mtr); - trx_rseg_list_and_array_init(sys_header, &mtr); + trx_rseg_list_and_array_init(sys_header, ib_bh, &mtr); trx_sys->latest_rseg = UT_LIST_GET_FIRST(trx_sys->rseg_list); @@ -1023,7 +1057,8 @@ trx_sys_init_at_db_start(void) UT_LIST_INIT(trx_sys->view_list); - trx_purge_sys_create(); + /* Transfer ownership to purge. */ + trx_purge_sys_create(ib_bh); mutex_exit(&kernel_mutex); diff --git a/storage/innobase/trx/trx0trx.c b/storage/innobase/trx/trx0trx.c index 4bbcee210b0..820c40fe857 100644 --- a/storage/innobase/trx/trx0trx.c +++ b/storage/innobase/trx/trx0trx.c @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1996, 2010, Innobase Oy. All Rights Reserved. +Copyright (c) 1996, 2011, Innobase Oy. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -42,6 +42,7 @@ Created 3/26/1996 Heikki Tuuri #include "btr0sea.h" #include "os0proc.h" #include "trx0xa.h" +#include "trx0purge.h" #include "ha_prototypes.h" /** Dummy session used currently in MySQL interface */ @@ -604,36 +605,26 @@ trx_lists_init_at_db_start(void) /******************************************************************//** Assigns a rollback segment to a transaction in a round-robin fashion. -Skips the SYSTEM rollback segment if another is available. -@return assigned rollback segment id */ +@return assigned rollback segment instance */ UNIV_INLINE -ulint -trx_assign_rseg(void) -/*=================*/ +trx_rseg_t* +trx_assign_rseg( +/*============*/ + ulint max_undo_logs) /*!< in: maximum number of UNDO logs to use */ { - trx_rseg_t* rseg = trx_sys->latest_rseg; + trx_rseg_t* rseg = trx_sys->latest_rseg; ut_ad(mutex_own(&kernel_mutex)); -loop: - /* Get next rseg in a round-robin fashion */ rseg = UT_LIST_GET_NEXT(rseg_list, rseg); - if (rseg == NULL) { + if (rseg == NULL || rseg->id == max_undo_logs - 1) { rseg = UT_LIST_GET_FIRST(trx_sys->rseg_list); } - /* If it is the SYSTEM rollback segment, and there exist others, skip - it */ - - if ((rseg->id == TRX_SYS_SYSTEM_RSEG_ID) - && (UT_LIST_GET_LEN(trx_sys->rseg_list) > 1)) { - goto loop; - } - trx_sys->latest_rseg = rseg; - return(rseg->id); + return(rseg); } /****************************************************************//** @@ -663,12 +654,9 @@ trx_start_low( ut_ad(trx->conc_state != TRX_ACTIVE); - if (rseg_id == ULINT_UNDEFINED) { - - rseg_id = trx_assign_rseg(); - } + ut_a(rseg_id == ULINT_UNDEFINED); - rseg = trx_sys_get_nth_rseg(trx_sys, rseg_id); + rseg = trx_assign_rseg(srv_rollback_segments); trx->id = trx_sys_get_new_trx_id(); @@ -719,107 +707,179 @@ trx_start( } /****************************************************************//** -Commits a transaction. */ -UNIV_INTERN +Set the transaction serialisation number. */ +static void -trx_commit_off_kernel( -/*==================*/ - trx_t* trx) /*!< in: transaction */ +trx_serialisation_number_get( +/*=========================*/ + trx_t* trx) /*!< in: transaction */ { - page_t* update_hdr_page; - ib_uint64_t lsn = 0; trx_rseg_t* rseg; - trx_undo_t* undo; - mtr_t mtr; - ut_ad(mutex_own(&kernel_mutex)); + rseg = trx->rseg; - trx->must_flush_log_later = FALSE; + ut_ad(mutex_own(&rseg->mutex)); - rseg = trx->rseg; + mutex_enter(&kernel_mutex); - if (trx->insert_undo != NULL || trx->update_undo != NULL) { + trx->no = trx_sys_get_new_trx_id(); + + /* If the rollack segment is not empty then the + new trx_t::no can't be less than any trx_t::no + already in the rollback segment. User threads only + produce events when a rollback segment is empty. */ + + if (rseg->last_page_no == FIL_NULL) { + void* ptr; + rseg_queue_t rseg_queue; + + rseg_queue.rseg = rseg; + rseg_queue.trx_no = trx->no; + + mutex_enter(&purge_sys->bh_mutex); + + /* This is to reduce the pressure on the kernel mutex, + though in reality it should make very little (read no) + difference because this code path is only taken when the + rbs is empty. */ mutex_exit(&kernel_mutex); - mtr_start(&mtr); + ptr = ib_bh_push(purge_sys->ib_bh, &rseg_queue); + ut_a(ptr); - /* Change the undo log segment states from TRX_UNDO_ACTIVE - to some other state: these modifications to the file data - structure define the transaction as committed in the file - based world, at the serialization point of the log sequence - number lsn obtained below. */ + mutex_exit(&purge_sys->bh_mutex); + } else { + mutex_exit(&kernel_mutex); + } +} - mutex_enter(&(rseg->mutex)); +/****************************************************************//** +Assign the transaction its history serialisation number and write the +update UNDO log record to the assigned rollback segment. +@return the LSN of the UNDO log write. */ +static +ib_uint64_t +trx_write_serialisation_history( +/*============================*/ + trx_t* trx) /*!< in: transaction */ +{ + mtr_t mtr; + trx_rseg_t* rseg; - if (trx->insert_undo != NULL) { - trx_undo_set_state_at_finish(trx->insert_undo, &mtr); - } + ut_ad(!mutex_own(&kernel_mutex)); - undo = trx->update_undo; + rseg = trx->rseg; - if (undo) { - mutex_enter(&kernel_mutex); - trx->no = trx_sys_get_new_trx_no(); - mutex_exit(&kernel_mutex); + mtr_start(&mtr); - /* It is not necessary to obtain trx->undo_mutex here - because only a single OS thread is allowed to do the - transaction commit for this transaction. */ + /* Change the undo log segment states from TRX_UNDO_ACTIVE + to some other state: these modifications to the file data + structure define the transaction as committed in the file + based domain, at the serialization point of the log sequence + number lsn obtained below. */ - update_hdr_page = trx_undo_set_state_at_finish( - undo, &mtr); + if (trx->update_undo != NULL) { + page_t* undo_hdr_page; + trx_undo_t* undo = trx->update_undo; - /* We have to do the cleanup for the update log while - holding the rseg mutex because update log headers - have to be put to the history list in the order of - the trx number. */ + /* We have to hold the rseg mutex because update + log headers have to be put to the history list in the + (serialisation) order of the UNDO trx number. This is + required for the purge in-memory data structures too. */ - trx_undo_update_cleanup(trx, update_hdr_page, &mtr); - } + mutex_enter(&rseg->mutex); - mutex_exit(&(rseg->mutex)); + /* Assign the transaction serialisation number and also + update the purge min binary heap if this is the first + UNDO log being written to the assigned rollback segment. */ - /* Update the latest MySQL binlog name and offset info - in trx sys header if MySQL binlogging is on or the database - server is a MySQL replication slave */ - - if (trx->mysql_log_file_name - && trx->mysql_log_file_name[0] != '\0') { - trx_sys_update_mysql_binlog_offset( - trx->mysql_log_file_name, - trx->mysql_log_offset, - TRX_SYS_MYSQL_LOG_INFO, &mtr); - trx->mysql_log_file_name = NULL; - } + trx_serialisation_number_get(trx); - /* The following call commits the mini-transaction, making the - whole transaction committed in the file-based world, at this - log sequence number. The transaction becomes 'durable' when - we write the log to disk, but in the logical sense the commit - in the file-based data structures (undo logs etc.) happens - here. - - NOTE that transaction numbers, which are assigned only to - transactions with an update undo log, do not necessarily come - in exactly the same order as commit lsn's, if the transactions - have different rollback segments. To get exactly the same - order we should hold the kernel mutex up to this point, - adding to the contention of the kernel mutex. However, if - a transaction T2 is able to see modifications made by - a transaction T1, T2 will always get a bigger transaction - number and a bigger commit lsn than T1. */ + /* It is not necessary to obtain trx->undo_mutex here + because only a single OS thread is allowed to do the + transaction commit for this transaction. */ - /*--------------*/ - mtr_commit(&mtr); - /*--------------*/ - lsn = mtr.end_lsn; + undo_hdr_page = trx_undo_set_state_at_finish(undo, &mtr); + + trx_undo_update_cleanup(trx, undo_hdr_page, &mtr); + } else { + mutex_enter(&rseg->mutex); + } + + if (trx->insert_undo != NULL) { + trx_undo_set_state_at_finish(trx->insert_undo, &mtr); + } + + mutex_exit(&rseg->mutex); + + /* Update the latest MySQL binlog name and offset info + in trx sys header if MySQL binlogging is on or the database + server is a MySQL replication slave */ + + if (trx->mysql_log_file_name + && trx->mysql_log_file_name[0] != '\0') { + + trx_sys_update_mysql_binlog_offset( + trx->mysql_log_file_name, + trx->mysql_log_offset, + TRX_SYS_MYSQL_LOG_INFO, &mtr); + + trx->mysql_log_file_name = NULL; + } + + /* The following call commits the mini-transaction, making the + whole transaction committed in the file-based world, at this + log sequence number. The transaction becomes 'durable' when + we write the log to disk, but in the logical sense the commit + in the file-based data structures (undo logs etc.) happens + here. + + NOTE that transaction numbers, which are assigned only to + transactions with an update undo log, do not necessarily come + in exactly the same order as commit lsn's, if the transactions + have different rollback segments. To get exactly the same + order we should hold the kernel mutex up to this point, + adding to the contention of the kernel mutex. However, if + a transaction T2 is able to see modifications made by + a transaction T1, T2 will always get a bigger transaction + number and a bigger commit lsn than T1. */ + + /*--------------*/ + mtr_commit(&mtr); + /*--------------*/ + + return(mtr.end_lsn); +} + +/****************************************************************//** +Commits a transaction. */ +UNIV_INTERN +void +trx_commit_off_kernel( +/*==================*/ + trx_t* trx) /*!< in: transaction */ +{ + ib_uint64_t lsn; + + ut_ad(mutex_own(&kernel_mutex)); + + trx->must_flush_log_later = FALSE; + + /* If the transaction made any updates then we need to write the + UNDO logs for the updates to the assigned rollback segment. */ + + if (trx->insert_undo != NULL || trx->update_undo != NULL) { + mutex_exit(&kernel_mutex); + + lsn = trx_write_serialisation_history(trx); mutex_enter(&kernel_mutex); + } else { + lsn = 0; } - ut_ad(trx->conc_state == TRX_ACTIVE - || trx->conc_state == TRX_PREPARED); + ut_ad(trx->conc_state == TRX_ACTIVE || trx->conc_state == TRX_PREPARED); ut_ad(mutex_own(&kernel_mutex)); /* The following assignment makes the transaction committed in memory diff --git a/storage/innobase/ut/ut0ut.c b/storage/innobase/ut/ut0ut.c index 4f4dfc5eed8..cd0894b132a 100644 --- a/storage/innobase/ut/ut0ut.c +++ b/storage/innobase/ut/ut0ut.c @@ -1,13 +1,6 @@ /***************************************************************************** -Copyright (c) 1994, 2010, Innobase Oy. All Rights Reserved. -Copyright (c) 2009, Sun Microsystems, Inc. - -Portions of this file contain modifications contributed and copyrighted by -Sun Microsystems, Inc. Those modifications are gratefully acknowledged and -are described briefly in the InnoDB documentation. The contributions by -Sun Microsystems are incorporated with their permission, and subject to the -conditions contained in the file COPYING.Sun_Microsystems. +Copyright (c) 2011, Oracle Corpn. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software |