diff options
Diffstat (limited to 'storage/innobase/sync')
-rw-r--r-- | storage/innobase/sync/sync0arr.cc | 1103 | ||||
-rw-r--r-- | storage/innobase/sync/sync0debug.cc | 1808 | ||||
-rw-r--r-- | storage/innobase/sync/sync0rw.cc | 1056 | ||||
-rw-r--r-- | storage/innobase/sync/sync0sync.cc | 1671 |
4 files changed, 3334 insertions, 2304 deletions
diff --git a/storage/innobase/sync/sync0arr.cc b/storage/innobase/sync/sync0arr.cc index d464515a228..e311023223c 100644 --- a/storage/innobase/sync/sync0arr.cc +++ b/storage/innobase/sync/sync0arr.cc @@ -31,6 +31,7 @@ The wait array used in synchronization primitives Created 9/5/1995 Heikki Tuuri *******************************************************/ +#include "ha_prototypes.h" #include "univ.i" #include "sync0arr.h" @@ -52,70 +53,78 @@ Created 9/5/1995 Heikki Tuuri #include <innodb_priv.h> #include "sync0sync.h" +#include "lock0lock.h" #include "sync0rw.h" -#include "os0sync.h" +#include "sync0debug.h" +#include "os0event.h" #include "os0file.h" -#include "lock0lock.h" #include "srv0srv.h" -#include "ha_prototypes.h" /* WAIT ARRAY ========== -The wait array consists of cells each of which has an -an operating system event object created for it. The threads -waiting for a mutex, for example, can reserve a cell -in the array and suspend themselves to wait for the event -to become signaled. When using the wait array, remember to make -sure that some thread holding the synchronization object -will eventually know that there is a waiter in the array and -signal the object, to prevent infinite wait. -Why we chose to implement a wait array? First, to make -mutexes fast, we had to code our own implementation of them, -which only in usually uncommon cases resorts to using -slow operating system primitives. Then we had the choice of -assigning a unique OS event for each mutex, which would -be simpler, or using a global wait array. In some operating systems, -the global wait array solution is more efficient and flexible, -because we can do with a very small number of OS events, -say 200. In NT 3.51, allocating events seems to be a quadratic -algorithm, because 10 000 events are created fast, but -100 000 events takes a couple of minutes to create. - -As of 5.0.30 the above mentioned design is changed. Since now -OS can handle millions of wait events efficiently, we no longer -have this concept of each cell of wait array having one event. -Instead, now the event that a thread wants to wait on is embedded -in the wait object (mutex or rw_lock). We still keep the global -wait array for the sake of diagnostics and also to avoid infinite -wait The error_monitor thread scans the global wait array to signal +The wait array consists of cells each of which has an an event object created +for it. The threads waiting for a mutex, for example, can reserve a cell +in the array and suspend themselves to wait for the event to become signaled. +When using the wait array, remember to make sure that some thread holding +the synchronization object will eventually know that there is a waiter in +the array and signal the object, to prevent infinite wait. Why we chose +to implement a wait array? First, to make mutexes fast, we had to code +our own implementation of them, which only in usually uncommon cases +resorts to using slow operating system primitives. Then we had the choice of +assigning a unique OS event for each mutex, which would be simpler, or +using a global wait array. In some operating systems, the global wait +array solution is more efficient and flexible, because we can do with +a very small number of OS events, say 200. In NT 3.51, allocating events +seems to be a quadratic algorithm, because 10 000 events are created fast, +but 100 000 events takes a couple of minutes to create. + +As of 5.0.30 the above mentioned design is changed. Since now OS can handle +millions of wait events efficiently, we no longer have this concept of each +cell of wait array having one event. Instead, now the event that a thread +wants to wait on is embedded in the wait object (mutex or rw_lock). We still +keep the global wait array for the sake of diagnostics and also to avoid +infinite wait The error_monitor thread scans the global wait array to signal any waiting threads who have missed the signal. */ -/** A cell where an individual thread may wait suspended -until a resource is released. The suspending is implemented -using an operating system event semaphore. */ +typedef SyncArrayMutex::MutexType WaitMutex; +typedef BlockSyncArrayMutex::MutexType BlockWaitMutex; + +/** The latch types that use the sync array. */ +union sync_object_t { + + /** RW lock instance */ + rw_lock_t* lock; + + /** Mutex instance */ + WaitMutex* mutex; + + /** Block mutex instance */ + BlockWaitMutex* bpmutex; +}; + +/** A cell where an individual thread may wait suspended until a resource +is released. The suspending is implemented using an operating system +event semaphore. */ + struct sync_cell_t { - void* wait_object; /*!< pointer to the object the + sync_object_t latch; /*!< pointer to the object the thread is waiting for; if NULL the cell is free for use */ - ib_mutex_t* old_wait_mutex; /*!< the latest wait mutex in cell */ - rw_lock_t* old_wait_rw_lock; - /*!< the latest wait rw-lock - in cell */ ulint request_type; /*!< lock type requested on the object */ const char* file; /*!< in debug version file where requested */ ulint line; /*!< in debug version line where requested */ - os_thread_id_t thread; /*!< thread id of this waiting + os_thread_id_t thread_id; /*!< thread id of this waiting thread */ - ibool waiting; /*!< TRUE if the thread has already + bool waiting; /*!< TRUE if the thread has already called sync_array_event_wait on this cell */ - ib_int64_t signal_count; /*!< We capture the signal_count - of the wait_object when we + int64_t signal_count; /*!< We capture the signal_count + of the latch when we reset the event. This value is then passed on to os_event_wait and we wait only if the event @@ -126,51 +135,66 @@ struct sync_cell_t { the wait cell */ }; -/* NOTE: It is allowed for a thread to wait -for an event allocated for the array without owning the -protecting mutex (depending on the case: OS or database mutex), but -all changes (set or reset) to the state of the event must be made -while owning the mutex. */ +/* NOTE: It is allowed for a thread to wait for an event allocated for +the array without owning the protecting mutex (depending on the case: +OS or database mutex), but all changes (set or reset) to the state of +the event must be made while owning the mutex. */ + /** Synchronization array */ struct sync_array_t { + + /** Constructor + Creates a synchronization wait array. It is protected by a mutex + which is automatically reserved when the functions operating on it + are called. + @param[in] num_cells Number of cells to create */ + sync_array_t(ulint num_cells) + UNIV_NOTHROW; + + /** Destructor */ + ~sync_array_t() + UNIV_NOTHROW; + ulint n_reserved; /*!< number of currently reserved cells in the wait array */ ulint n_cells; /*!< number of cells in the wait array */ sync_cell_t* array; /*!< pointer to wait array */ - ib_mutex_t mutex; /*!< possible database mutex - protecting this data structure */ - os_ib_mutex_t os_mutex; /*!< Possible operating system mutex - protecting the data structure. - As this data structure is used in - constructing the database mutex, - to prevent infinite recursion - in implementation, we fall back to - an OS mutex. */ + SysMutex mutex; /*!< System mutex protecting the + data structure. As this data + structure is used in constructing + the database mutex, to prevent + infinite recursion in implementation, + we fall back to an OS mutex. */ ulint res_count; /*!< count of cell reservations since creation of the array */ + ulint next_free_slot; /*!< the next free cell in the array */ + ulint first_free_slot;/*!< the last slot that was freed */ }; /** User configured sync array size */ -UNIV_INTERN ulong srv_sync_array_size = 32; +ulong srv_sync_array_size = 1; /** Locally stored copy of srv_sync_array_size */ -static ulint sync_array_size; +ulint sync_array_size; /** The global array of wait cells for implementation of the database's own mutexes and read-write locks */ -static sync_array_t** sync_wait_array; +sync_array_t** sync_wait_array; /** count of how many times an object has been signalled */ -static ulint sg_count; +static ulint sg_count; + +#define sync_array_exit(a) mutex_exit(&(a)->mutex) +#define sync_array_enter(a) mutex_enter(&(a)->mutex) -#ifdef UNIV_SYNC_DEBUG +#ifdef UNIV_DEBUG /******************************************************************//** This function is called only in the debug version. Detects a deadlock of one or more threads because of waits of semaphores. -@return TRUE if deadlock detected */ +@return TRUE if deadlock detected */ static -ibool +bool sync_array_detect_deadlock( /*=======================*/ sync_array_t* arr, /*!< in: wait array; NOTE! the caller must @@ -178,103 +202,68 @@ sync_array_detect_deadlock( sync_cell_t* start, /*!< in: cell where recursive search started */ sync_cell_t* cell, /*!< in: cell to search */ ulint depth); /*!< in: recursion depth */ -#endif /* UNIV_SYNC_DEBUG */ +#endif /* UNIV_DEBUG */ -/*****************************************************************//** -Gets the nth cell in array. -@return cell */ -sync_cell_t* -sync_array_get_nth_cell( -/*====================*/ - sync_array_t* arr, /*!< in: sync array */ - ulint n) /*!< in: index */ -{ - ut_a(arr); - ut_a(n < arr->n_cells); - - return(arr->array + n); -} - -/******************************************************************//** -Looks for a cell with the given thread id. -@return pointer to cell or NULL if not found */ -static -sync_cell_t* -sync_array_find_thread( -/*===================*/ - sync_array_t* arr, /*!< in: wait array */ - os_thread_id_t thread) /*!< in: thread id */ +/** Constructor +Creates a synchronization wait array. It is protected by a mutex +which is automatically reserved when the functions operating on it +are called. +@param[in] num_cells Number of cells to create */ +sync_array_t::sync_array_t(ulint num_cells) + UNIV_NOTHROW + : + n_reserved(), + n_cells(), + array(), + mutex(), + res_count(), + next_free_slot(), + first_free_slot() { - ulint i; - sync_cell_t* cell; + ut_a(num_cells > 0); - for (i = 0; i < arr->n_cells; i++) { + array = UT_NEW_ARRAY_NOKEY(sync_cell_t, num_cells); - cell = sync_array_get_nth_cell(arr, i); + ulint sz = sizeof(sync_cell_t) * num_cells; - if (cell->wait_object != NULL - && os_thread_eq(cell->thread, thread)) { + memset(array, 0x0, sz); - return(cell); /* Found */ - } - } + n_cells = num_cells; - return(NULL); /* Not found */ -} + first_free_slot = ULINT_UNDEFINED; -/******************************************************************//** -Reserves the mutex semaphore protecting a sync array. */ -static -void -sync_array_enter( -/*=============*/ - sync_array_t* arr) /*!< in: sync wait array */ -{ - os_mutex_enter(arr->os_mutex); -} - -/******************************************************************//** -Releases the mutex semaphore protecting a sync array. */ -static -void -sync_array_exit( -/*============*/ - sync_array_t* arr) /*!< in: sync wait array */ -{ - os_mutex_exit(arr->os_mutex); + /* Then create the mutex to protect the wait array */ + mutex_create(LATCH_ID_SYNC_ARRAY_MUTEX, &mutex); } -/*******************************************************************//** -Creates a synchronization wait array. It is protected by a mutex -which is automatically reserved when the functions operating on it -are called. -@return own: created wait array */ -static -sync_array_t* -sync_array_create( -/*==============*/ - ulint n_cells) /*!< in: number of cells in the array - to create */ +/** Destructor */ +sync_array_t::~sync_array_t() + UNIV_NOTHROW { - ulint sz; - sync_array_t* arr; + ut_a(n_reserved == 0); - ut_a(n_cells > 0); + sync_array_validate(this); - /* Allocate memory for the data structures */ - arr = static_cast<sync_array_t*>(ut_malloc(sizeof(*arr))); - memset(arr, 0x0, sizeof(*arr)); + /* Release the mutex protecting the wait array */ - sz = sizeof(sync_cell_t) * n_cells; - arr->array = static_cast<sync_cell_t*>(ut_malloc(sz)); - memset(arr->array, 0x0, sz); + mutex_free(&mutex); - arr->n_cells = n_cells; + UT_DELETE_ARRAY(array); +} - /* Then create the mutex to protect the wait array complex */ - arr->os_mutex = os_mutex_create(); +/*****************************************************************//** +Gets the nth cell in array. +@return cell */ +UNIV_INTERN +sync_cell_t* +sync_array_get_nth_cell( +/*====================*/ + sync_array_t* arr, /*!< in: sync array */ + ulint n) /*!< in: index */ +{ + ut_a(n < arr->n_cells); - return(arr); + return(arr->array + n); } /******************************************************************//** @@ -285,36 +274,28 @@ sync_array_free( /*============*/ sync_array_t* arr) /*!< in, own: sync wait array */ { - ut_a(arr->n_reserved == 0); - - sync_array_validate(arr); - - /* Release the mutex protecting the wait array complex */ - - os_mutex_free(arr->os_mutex); - - ut_free(arr->array); - ut_free(arr); + UT_DELETE(arr); } /********************************************************************//** Validates the integrity of the wait array. Checks that the number of reserved cells equals the count variable. */ -UNIV_INTERN void sync_array_validate( /*================*/ sync_array_t* arr) /*!< in: sync wait array */ { ulint i; - sync_cell_t* cell; ulint count = 0; sync_array_enter(arr); for (i = 0; i < arr->n_cells; i++) { + sync_cell_t* cell; + cell = sync_array_get_nth_cell(arr, i); - if (cell->wait_object != NULL) { + + if (cell->latch.mutex != NULL) { count++; } } @@ -332,87 +313,140 @@ sync_cell_get_event( /*================*/ sync_cell_t* cell) /*!< in: non-empty sync array cell */ { - ulint type = cell->request_type; + ulint type = cell->request_type; if (type == SYNC_MUTEX) { - return(((ib_mutex_t*) cell->wait_object)->event); - } else if (type == RW_LOCK_WAIT_EX) { - return(((rw_lock_t*) cell->wait_object)->wait_ex_event); - } else { /* RW_LOCK_SHARED and RW_LOCK_EX wait on the same event */ - return(((rw_lock_t*) cell->wait_object)->event); + + return(cell->latch.mutex->event()); + + } else if (type == SYNC_BUF_BLOCK) { + + return(cell->latch.bpmutex->event()); + + } else if (type == RW_LOCK_X_WAIT) { + + return(cell->latch.lock->wait_ex_event); + + } else { /* RW_LOCK_S and RW_LOCK_X wait on the same event */ + + return(cell->latch.lock->event); } } /******************************************************************//** Reserves a wait array cell for waiting for an object. The event of the cell is reset to nonsignalled state. -@return true if free cell is found, otherwise false */ -UNIV_INTERN -bool +@return sync cell to wait on */ +sync_cell_t* sync_array_reserve_cell( /*====================*/ sync_array_t* arr, /*!< in: wait array */ void* object, /*!< in: pointer to the object to wait for */ ulint type, /*!< in: lock request type */ const char* file, /*!< in: file where requested */ - ulint line, /*!< in: line where requested */ - ulint* index) /*!< out: index of the reserved cell */ + ulint line) /*!< in: line where requested */ { sync_cell_t* cell; - os_event_t event; - ulint i; - - ut_a(object); - ut_a(index); sync_array_enter(arr); - arr->res_count++; + if (arr->first_free_slot != ULINT_UNDEFINED) { + /* Try and find a slot in the free list */ + ut_ad(arr->first_free_slot < arr->next_free_slot); + cell = sync_array_get_nth_cell(arr, arr->first_free_slot); + arr->first_free_slot = cell->line; + } else if (arr->next_free_slot < arr->n_cells) { + /* Try and find a slot after the currently allocated slots */ + cell = sync_array_get_nth_cell(arr, arr->next_free_slot); + ++arr->next_free_slot; + } else { + sync_array_exit(arr); - /* Reserve a new cell. */ - for (i = 0; i < arr->n_cells; i++) { - cell = sync_array_get_nth_cell(arr, i); + // We should return NULL and if there is more than + // one sync array, try another sync array instance. + return(NULL); + } - if (cell->wait_object == NULL) { + ++arr->res_count; - cell->waiting = FALSE; - cell->wait_object = object; + ut_ad(arr->n_reserved < arr->n_cells); + ut_ad(arr->next_free_slot <= arr->n_cells); - if (type == SYNC_MUTEX) { - cell->old_wait_mutex = - static_cast<ib_mutex_t*>(object); - } else { - cell->old_wait_rw_lock = - static_cast<rw_lock_t*>(object); - } + ++arr->n_reserved; - cell->request_type = type; + /* Reserve the cell. */ + ut_ad(cell->latch.mutex == NULL); - cell->file = file; - cell->line = line; + cell->request_type = type; - arr->n_reserved++; + if (cell->request_type == SYNC_MUTEX) { + cell->latch.mutex = reinterpret_cast<WaitMutex*>(object); + } else if (cell->request_type == SYNC_BUF_BLOCK) { + cell->latch.bpmutex = reinterpret_cast<BlockWaitMutex*>(object); + } else { + cell->latch.lock = reinterpret_cast<rw_lock_t*>(object); + } - *index = i; + cell->waiting = false; - sync_array_exit(arr); + cell->file = file; + cell->line = line; - /* Make sure the event is reset and also store - the value of signal_count at which the event - was reset. */ - event = sync_cell_get_event(cell); - cell->signal_count = os_event_reset(event); + sync_array_exit(arr); - cell->reservation_time = ut_time(); + cell->thread_id = os_thread_get_curr_id(); - cell->thread = os_thread_get_curr_id(); + cell->reservation_time = ut_time(); - return(true); + /* Make sure the event is reset and also store the value of + signal_count at which the event was reset. */ + os_event_t event = sync_cell_get_event(cell); + cell->signal_count = os_event_reset(event); + + return(cell); +} + +/******************************************************************//** +Frees the cell. NOTE! sync_array_wait_event frees the cell +automatically! */ +void +sync_array_free_cell( +/*=================*/ + sync_array_t* arr, /*!< in: wait array */ + sync_cell_t*& cell) /*!< in/out: the cell in the array */ +{ + sync_array_enter(arr); + + ut_a(cell->latch.mutex != NULL); + + cell->waiting = false; + cell->signal_count = 0; + cell->latch.mutex = NULL; + + /* Setup the list of free slots in the array */ + cell->line = arr->first_free_slot; + + arr->first_free_slot = cell - arr->array; + + ut_a(arr->n_reserved > 0); + arr->n_reserved--; + + if (arr->next_free_slot > arr->n_cells / 2 && arr->n_reserved == 0) { +#ifdef UNIV_DEBUG + for (ulint i = 0; i < arr->next_free_slot; ++i) { + cell = sync_array_get_nth_cell(arr, i); + + ut_ad(!cell->waiting); + ut_ad(cell->latch.mutex == 0); + ut_ad(cell->signal_count == 0); } +#endif /* UNIV_DEBUG */ + arr->next_free_slot = 0; + arr->first_free_slot = ULINT_UNDEFINED; } + sync_array_exit(arr); - /* No free cell found */ - return false; + cell = 0; } /******************************************************************//** @@ -420,30 +454,21 @@ This function should be called when a thread starts to wait on a wait array cell. In the debug version this function checks if the wait for a semaphore will result in a deadlock, in which case prints info and asserts. */ -UNIV_INTERN void sync_array_wait_event( /*==================*/ sync_array_t* arr, /*!< in: wait array */ - ulint index) /*!< in: index of the reserved cell */ + sync_cell_t*& cell) /*!< in: index of the reserved cell */ { - sync_cell_t* cell; - os_event_t event; - - ut_a(arr); - sync_array_enter(arr); - cell = sync_array_get_nth_cell(arr, index); + ut_ad(!cell->waiting); + ut_ad(cell->latch.mutex); + ut_ad(os_thread_get_curr_id() == cell->thread_id); - ut_a(cell->wait_object); - ut_a(!cell->waiting); - ut_ad(os_thread_get_curr_id() == cell->thread); + cell->waiting = true; - event = sync_cell_get_event(cell); - cell->waiting = TRUE; - -#ifdef UNIV_SYNC_DEBUG +#ifdef UNIV_DEBUG /* We use simple enter to the mutex below, because if we cannot acquire it at once, mutex_enter would call @@ -452,19 +477,21 @@ sync_array_wait_event( rw_lock_debug_mutex_enter(); - if (TRUE == sync_array_detect_deadlock(arr, cell, cell, 0)) { + if (sync_array_detect_deadlock(arr, cell, cell, 0)) { - fputs("########################################\n", stderr); - ut_error; + ib::fatal() << "########################################" + " Deadlock Detected!"; } rw_lock_debug_mutex_exit(); -#endif +#endif /* UNIV_DEBUG */ sync_array_exit(arr); - os_event_wait_low(event, cell->signal_count); + os_event_wait_low(sync_cell_get_event(cell), cell->signal_count); - sync_array_free_cell(arr, index); + sync_array_free_cell(arr, cell); + + cell = 0; } /******************************************************************//** @@ -478,7 +505,6 @@ sync_array_cell_print( os_thread_id_t* reserver) /*!< out: write reserver or 0 */ { - ib_mutex_t* mutex; rw_lock_t* rwlock; ulint type; ulint writer; @@ -488,37 +514,73 @@ sync_array_cell_print( fprintf(file, "--Thread %lu has waited at %s line %lu" " for %.2f seconds the semaphore:\n", - (ulong) os_thread_pf(cell->thread), + (ulong) os_thread_pf(cell->thread_id), innobase_basename(cell->file), (ulong) cell->line, difftime(time(NULL), cell->reservation_time)); if (type == SYNC_MUTEX) { - /* We use old_wait_mutex in case the cell has already - been freed meanwhile */ - mutex = cell->old_wait_mutex; + WaitMutex* mutex = cell->latch.mutex; + const WaitMutex::MutexPolicy& policy = mutex->policy(); +#ifdef UNIV_DEBUG + const char* name = policy.get_enter_filename(); + if (name == NULL) { + /* The mutex might have been released. */ + name = "NULL"; + } +#endif /* UNIV_DEBUG */ if (mutex) { fprintf(file, "Mutex at %p created file %s line %lu, lock var %lu\n" "Last time reserved by thread %lu in file %s line %lu, " - "waiters flag %lu\n", - (void*) mutex, innobase_basename(mutex->cfile_name), - (ulong) mutex->cline, - (ulong) mutex->lock_word, - mutex->thread_id, - mutex->file_name, (ulong) mutex->line, - (ulong) mutex->waiters); + "\n", + (void*) mutex, + policy.to_string().c_str(), + (ulong) mutex->state() +#ifdef UNIV_DEBUG + ,name, + (ulong) policy.get_enter_line() +#endif /* UNIV_DEBUG */ + ); } - - } else if (type == RW_LOCK_EX - || type == RW_LOCK_WAIT_EX - || type == RW_LOCK_SHARED) { - - fputs(type == RW_LOCK_EX ? "X-lock on" - : type == RW_LOCK_WAIT_EX ? "X-lock (wait_ex) on" + } else if (type == SYNC_BUF_BLOCK) { + BlockWaitMutex* mutex = cell->latch.bpmutex; + + const BlockWaitMutex::MutexPolicy& policy = + mutex->policy(); +#ifdef UNIV_DEBUG + const char* name = policy.get_enter_filename(); + if (name == NULL) { + /* The mutex might have been released. */ + name = "NULL"; + } +#endif /* UNIV_DEBUG */ + + fprintf(file, + "Mutex at %p, %s, lock var %lu\n" +#ifdef UNIV_DEBUG + "Last time reserved in file %s line %lu" +#endif /* UNIV_DEBUG */ + "\n", + (void*) mutex, + policy.to_string().c_str(), + (ulong) mutex->state() +#ifdef UNIV_DEBUG + ,name, + (ulong) policy.get_enter_line() +#endif /* UNIV_DEBUG */ + ); + } else if (type == RW_LOCK_X + || type == RW_LOCK_X_WAIT + || type == RW_LOCK_SX + || type == RW_LOCK_S) { + + fputs(type == RW_LOCK_X ? "X-lock on" + : type == RW_LOCK_X_WAIT ? "X-lock (wait_ex) on" + : type == RW_LOCK_SX ? "SX-lock on" : "S-lock on", file); - rwlock = (rw_lock_t*)cell->old_wait_rw_lock; + rwlock = cell->latch.lock; if (rwlock) { fprintf(file, @@ -528,20 +590,21 @@ sync_array_cell_print( writer = rw_lock_get_writer(rwlock); - if (writer && writer != RW_LOCK_NOT_LOCKED) { + if (writer != RW_LOCK_NOT_LOCKED) { + fprintf(file, "a writer (thread id %lu) has" " reserved it in mode %s", (ulong) os_thread_pf(rwlock->writer_thread), - writer == RW_LOCK_EX - ? " exclusive\n" + writer == RW_LOCK_X ? " exclusive\n" + : writer == RW_LOCK_SX ? " SX\n" : " wait exclusive\n"); *reserver = rwlock->writer_thread; } fprintf(file, - "number of readers %lu, waiters flag %lu, " - "lock_word: %lx\n" + "number of readers %lu, waiters flag %lu," + " lock_word: %lx\n" "Last time read locked in file %s line %lu\n" "Last time write locked in file %s line %lu\n", (ulong) rw_lock_get_reader_count(rwlock), @@ -552,11 +615,14 @@ sync_array_cell_print( rwlock->last_x_file_name, (ulong) rwlock->last_x_line); + /* JAN: TODO: FIX LATER fprintf(file, "Holder thread %lu file %s line %lu\n", - rwlock->thread_id, rwlock->file_name, rwlock->line); - + rwlock->thread_id, rwlock->file_name, + rwlock->line); + */ } + } else { ut_error; } @@ -566,11 +632,37 @@ sync_array_cell_print( } } -#ifdef UNIV_SYNC_DEBUG +/******************************************************************//** +Looks for a cell with the given thread id. +@return pointer to cell or NULL if not found */ +static +sync_cell_t* +sync_array_find_thread( +/*===================*/ + sync_array_t* arr, /*!< in: wait array */ + os_thread_id_t thread) /*!< in: thread id */ +{ + ulint i; + + for (i = 0; i < arr->n_cells; i++) { + sync_cell_t* cell; + + cell = sync_array_get_nth_cell(arr, i); + + if (cell->latch.mutex != NULL + && os_thread_eq(cell->thread_id, thread)) { + + return(cell); /* Found */ + } + } + + return(NULL); /* Not found */ +} +#ifdef UNIV_DEBUG /******************************************************************//** Recursion step for deadlock detection. -@return TRUE if deadlock detected */ +@return TRUE if deadlock detected */ static ibool sync_array_deadlock_step( @@ -609,12 +701,28 @@ sync_array_deadlock_step( return(FALSE); } +/** +Report an error to stderr. +@param lock rw-lock instance +@param debug rw-lock debug information +@param cell thread context */ +void +sync_array_report_error( + rw_lock_t* lock, + rw_lock_debug_t* debug, + sync_cell_t* cell) +{ + fprintf(stderr, "rw-lock %p ", (void*) lock); + sync_array_cell_print(stderr, cell, 0); + rw_lock_debug_print(stderr, debug); +} + /******************************************************************//** This function is called only in the debug version. Detects a deadlock of one or more threads because of waits of semaphores. -@return TRUE if deadlock detected */ +@return TRUE if deadlock detected */ static -ibool +bool sync_array_detect_deadlock( /*=======================*/ sync_array_t* arr, /*!< in: wait array; NOTE! the caller must @@ -623,7 +731,6 @@ sync_array_detect_deadlock( sync_cell_t* cell, /*!< in: cell to search */ ulint depth) /*!< in: recursion depth */ { - ib_mutex_t* mutex; rw_lock_t* lock; os_thread_id_t thread; ibool ret; @@ -633,24 +740,25 @@ sync_array_detect_deadlock( ut_a(arr); ut_a(start); ut_a(cell); - ut_ad(cell->wait_object); - ut_ad(os_thread_get_curr_id() == start->thread); + ut_ad(cell->latch.mutex != 0); + ut_ad(os_thread_get_curr_id() == start->thread_id); ut_ad(depth < 100); depth++; if (!cell->waiting) { - - return(FALSE); /* No deadlock here */ + /* No deadlock here */ + return(false); } - if (cell->request_type == SYNC_MUTEX) { - - mutex = static_cast<ib_mutex_t*>(cell->wait_object); + switch (cell->request_type) { + case SYNC_MUTEX: { - if (mutex_get_lock_word(mutex) != 0) { + WaitMutex* mutex = cell->latch.mutex; + const WaitMutex::MutexPolicy& policy = mutex->policy(); - thread = mutex->thread_id; + if (mutex->state() != MUTEX_STATE_UNLOCKED) { + thread = policy.get_thread_id(); /* Note that mutex->thread_id above may be also OS_THREAD_ID_UNDEFINED, because the @@ -659,64 +767,165 @@ sync_array_detect_deadlock( released the mutex: in this case no deadlock can occur, as the wait array cannot contain a thread with ID_UNDEFINED value. */ + ret = sync_array_deadlock_step( + arr, start, thread, 0, depth); - ret = sync_array_deadlock_step(arr, start, thread, 0, - depth); if (ret) { - fprintf(stderr, - "Mutex %p owned by thread %lu file %s line %lu\n", - mutex, (ulong) os_thread_pf(mutex->thread_id), - mutex->file_name, (ulong) mutex->line); - sync_array_cell_print(stderr, cell, &reserver); + const char* name; - return(TRUE); + name = policy.get_enter_filename(); + + if (name == NULL) { + /* The mutex might have been + released. */ + name = "NULL"; + } + + ib::info() + << "Mutex " << mutex << " owned by" + " thread " << os_thread_pf(thread) + << " file " << name << " line " + << policy.get_enter_line(); + + sync_array_cell_print(stderr, cell, 0); + + return(true); } } - return(FALSE); /* No deadlock */ + /* No deadlock */ + return(false); + } - } else if (cell->request_type == RW_LOCK_EX - || cell->request_type == RW_LOCK_WAIT_EX) { + case SYNC_BUF_BLOCK: { - lock = static_cast<rw_lock_t*>(cell->wait_object); + BlockWaitMutex* mutex = cell->latch.bpmutex; + + const BlockWaitMutex::MutexPolicy& policy = + mutex->policy(); + + if (mutex->state() != MUTEX_STATE_UNLOCKED) { + thread = policy.get_thread_id(); + + /* Note that mutex->thread_id above may be + also OS_THREAD_ID_UNDEFINED, because the + thread which held the mutex maybe has not + yet updated the value, or it has already + released the mutex: in this case no deadlock + can occur, as the wait array cannot contain + a thread with ID_UNDEFINED value. */ + ret = sync_array_deadlock_step( + arr, start, thread, 0, depth); + + if (ret) { + const char* name; + + name = policy.get_enter_filename(); + + if (name == NULL) { + /* The mutex might have been + released. */ + name = "NULL"; + } + + ib::info() + << "Mutex " << mutex << " owned by" + " thread " << os_thread_pf(thread) + << " file " << name << " line " + << policy.get_enter_line(); + + sync_array_cell_print(stderr, cell, 0); + + return(true); + } + } + + /* No deadlock */ + return(false); + } + case RW_LOCK_X: + case RW_LOCK_X_WAIT: + + lock = cell->latch.lock; for (debug = UT_LIST_GET_FIRST(lock->debug_list); - debug != 0; + debug != NULL; debug = UT_LIST_GET_NEXT(list, debug)) { thread = debug->thread_id; - if (((debug->lock_type == RW_LOCK_EX) - && !os_thread_eq(thread, cell->thread)) - || ((debug->lock_type == RW_LOCK_WAIT_EX) - && !os_thread_eq(thread, cell->thread)) - || (debug->lock_type == RW_LOCK_SHARED)) { + switch (debug->lock_type) { + case RW_LOCK_X: + case RW_LOCK_SX: + case RW_LOCK_X_WAIT: + if (os_thread_eq(thread, cell->thread_id)) { + break; + } + /* fall through */ + case RW_LOCK_S: /* The (wait) x-lock request can block infinitely only if someone (can be also cell thread) is holding s-lock, or someone - (cannot be cell thread) (wait) x-lock, and - he is blocked by start thread */ + (cannot be cell thread) (wait) x-lock or + sx-lock, and he is blocked by start thread */ ret = sync_array_deadlock_step( arr, start, thread, debug->pass, depth); + if (ret) { -print: - fprintf(stderr, "rw-lock %p ", - (void*) lock); - sync_array_cell_print(stderr, cell, &reserver); + sync_array_report_error( + lock, debug, cell); rw_lock_debug_print(stderr, debug); return(TRUE); } } } - return(FALSE); + return(false); + + case RW_LOCK_SX: + + lock = cell->latch.lock; + + for (debug = UT_LIST_GET_FIRST(lock->debug_list); + debug != 0; + debug = UT_LIST_GET_NEXT(list, debug)) { + + thread = debug->thread_id; - } else if (cell->request_type == RW_LOCK_SHARED) { + switch (debug->lock_type) { + case RW_LOCK_X: + case RW_LOCK_SX: + case RW_LOCK_X_WAIT: - lock = static_cast<rw_lock_t*>(cell->wait_object); + if (os_thread_eq(thread, cell->thread_id)) { + break; + } + + /* The sx-lock request can block infinitely + only if someone (can be also cell thread) is + holding (wait) x-lock or sx-lock, and he is + blocked by start thread */ + + ret = sync_array_deadlock_step( + arr, start, thread, debug->pass, + depth); + + if (ret) { + sync_array_report_error( + lock, debug, cell); + return(TRUE); + } + } + } + + return(false); + + case RW_LOCK_S: + + lock = cell->latch.lock; for (debug = UT_LIST_GET_FIRST(lock->debug_list); debug != 0; @@ -724,8 +933,8 @@ print: thread = debug->thread_id; - if ((debug->lock_type == RW_LOCK_EX) - || (debug->lock_type == RW_LOCK_WAIT_EX)) { + if (debug->lock_type == RW_LOCK_X + || debug->lock_type == RW_LOCK_X_WAIT) { /* The s-lock request can block infinitely only if someone (can also be cell thread) is @@ -735,119 +944,107 @@ print: ret = sync_array_deadlock_step( arr, start, thread, debug->pass, depth); + if (ret) { - goto print; + sync_array_report_error( + lock, debug, cell); + return(TRUE); } } } - return(FALSE); + return(false); - } else { + default: ut_error; } - return(TRUE); /* Execution never reaches this line: for compiler - fooling only */ + return(true); } -#endif /* UNIV_SYNC_DEBUG */ +#endif /* UNIV_DEBUG */ /******************************************************************//** Determines if we can wake up the thread waiting for a sempahore. */ static -ibool +bool sync_arr_cell_can_wake_up( /*======================*/ sync_cell_t* cell) /*!< in: cell to search */ { - ib_mutex_t* mutex; rw_lock_t* lock; - if (cell->request_type == SYNC_MUTEX) { + switch (cell->request_type) { + WaitMutex* mutex; + BlockWaitMutex* bpmutex; + case SYNC_MUTEX: + mutex = cell->latch.mutex; + + os_rmb; + if (mutex->state() == MUTEX_STATE_UNLOCKED) { - mutex = static_cast<ib_mutex_t*>(cell->wait_object); + return(true); + } + + break; + + case SYNC_BUF_BLOCK: + bpmutex = cell->latch.bpmutex; os_rmb; - if (mutex_get_lock_word(mutex) == 0) { + if (bpmutex->state() == MUTEX_STATE_UNLOCKED) { - return(TRUE); + return(true); } - } else if (cell->request_type == RW_LOCK_EX) { + break; - lock = static_cast<rw_lock_t*>(cell->wait_object); + case RW_LOCK_X: + case RW_LOCK_SX: + lock = cell->latch.lock; os_rmb; - if (lock->lock_word > 0) { + if (lock->lock_word > X_LOCK_HALF_DECR) { /* Either unlocked or only read locked. */ - return(TRUE); + return(true); } - } else if (cell->request_type == RW_LOCK_WAIT_EX) { + break; - lock = static_cast<rw_lock_t*>(cell->wait_object); + case RW_LOCK_X_WAIT: - /* lock_word == 0 means all readers have left */ + lock = cell->latch.lock; + + /* lock_word == 0 means all readers or sx have left */ os_rmb; if (lock->lock_word == 0) { - return(TRUE); + return(true); } - } else if (cell->request_type == RW_LOCK_SHARED) { - lock = static_cast<rw_lock_t*>(cell->wait_object); + break; + + case RW_LOCK_S: + + lock = cell->latch.lock; /* lock_word > 0 means no writer or reserved writer */ os_rmb; if (lock->lock_word > 0) { - return(TRUE); + return(true); } } - return(FALSE); -} - -/******************************************************************//** -Frees the cell. NOTE! sync_array_wait_event frees the cell -automatically! */ -UNIV_INTERN -void -sync_array_free_cell( -/*=================*/ - sync_array_t* arr, /*!< in: wait array */ - ulint index) /*!< in: index of the cell in array */ -{ - sync_cell_t* cell; - - sync_array_enter(arr); - - cell = sync_array_get_nth_cell(arr, index); - - ut_a(cell->wait_object != NULL); - - cell->waiting = FALSE; - cell->wait_object = NULL; - cell->signal_count = 0; - - ut_a(arr->n_reserved > 0); - arr->n_reserved--; - - sync_array_exit(arr); + return(false); } /**********************************************************************//** Increments the signalled count. */ -UNIV_INTERN void -sync_array_object_signalled(void) -/*=============================*/ +sync_array_object_signalled() +/*=========================*/ { -#ifdef HAVE_ATOMIC_BUILTINS - (void) os_atomic_increment_ulint(&sg_count, 1); -#else ++sg_count; -#endif /* HAVE_ATOMIC_BUILTINS */ } /**********************************************************************//** @@ -864,27 +1061,19 @@ sync_array_wake_threads_if_sema_free_low( /*=====================================*/ sync_array_t* arr) /* in/out: wait array */ { - ulint i = 0; - ulint count; - sync_array_enter(arr); - for (count = 0; count < arr->n_reserved; ++i) { + for (ulint i = 0; i < arr->next_free_slot; ++i) { sync_cell_t* cell; cell = sync_array_get_nth_cell(arr, i); - if (cell->wait_object != NULL) { - - count++; - - if (sync_arr_cell_can_wake_up(cell)) { - os_event_t event; + if (cell->latch.mutex != 0 && sync_arr_cell_can_wake_up(cell)) { + os_event_t event; - event = sync_cell_get_event(cell); + event = sync_cell_get_event(cell); - os_event_set(event); - } + os_event_set(event); } } @@ -899,14 +1088,11 @@ function should be called about every 1 second in the server. Note that there's a race condition between this thread and mutex_exit changing the lock_word and calling signal_object, so sometimes this finds threads to wake up even when nothing has gone wrong. */ -UNIV_INTERN void sync_arr_wake_threads_if_sema_free(void) /*====================================*/ { - ulint i; - - for (i = 0; i < sync_array_size; ++i) { + for (ulint i = 0; i < sync_array_size; ++i) { sync_array_wake_threads_if_sema_free_low( sync_wait_array[i]); @@ -915,9 +1101,9 @@ sync_arr_wake_threads_if_sema_free(void) /**********************************************************************//** Prints warnings of long semaphore waits to stderr. -@return TRUE if fatal semaphore wait threshold was exceeded */ +@return TRUE if fatal semaphore wait threshold was exceeded */ static -ibool +bool sync_array_print_long_waits_low( /*============================*/ sync_array_t* arr, /*!< in: sync array instance */ @@ -925,14 +1111,14 @@ sync_array_print_long_waits_low( const void** sema, /*!< out: longest-waited-for semaphore */ ibool* noticed)/*!< out: TRUE if long wait noticed */ { - ulint i; ulint fatal_timeout = srv_fatal_semaphore_wait_threshold; ibool fatal = FALSE; double longest_diff = 0; + ulint i; /* For huge tables, skip the check during CHECK TABLE etc... */ if (fatal_timeout > SRV_SEMAPHORE_WAIT_EXTENSION) { - return(FALSE); + return(false); } #ifdef UNIV_DEBUG_VALGRIND @@ -949,26 +1135,23 @@ sync_array_print_long_waits_low( for (i = 0; i < arr->n_cells; i++) { - double diff; sync_cell_t* cell; - void* wait_object; - os_thread_id_t reserver=0; + void* latch; cell = sync_array_get_nth_cell(arr, i); - wait_object = cell->wait_object; + latch = cell->latch.mutex; - if (wait_object == NULL || !cell->waiting) { + if (latch == NULL || !cell->waiting) { continue; } - diff = difftime(time(NULL), cell->reservation_time); + double diff = difftime(time(NULL), cell->reservation_time); if (diff > SYNC_ARRAY_TIMEOUT) { - fputs("InnoDB: Warning: a long semaphore wait:\n", - stderr); - sync_array_cell_print(stderr, cell, &reserver); + ib::warn() << "A long semaphore wait:"; + sync_array_cell_print(stderr, cell, 0); *noticed = TRUE; } @@ -978,8 +1161,8 @@ sync_array_print_long_waits_low( if (diff > longest_diff) { longest_diff = diff; - *sema = wait_object; - *waiter = cell->thread; + *sema = latch; + *waiter = cell->thread_id; } } @@ -994,7 +1177,7 @@ sync_array_print_long_waits_low( cell = sync_array_get_nth_cell(arr, i); - wait_object = cell->wait_object; + wait_object = cell->latch.mutex; if (wait_object == NULL || !cell->waiting) { @@ -1003,7 +1186,7 @@ sync_array_print_long_waits_low( fputs("InnoDB: Warning: semaphore wait:\n", stderr); - sync_array_cell_print(stderr, cell, &reserver); + sync_array_cell_print(stderr, cell, 0); /* Try to output cell information for writer recursive way */ while (reserver != (os_thread_id_t)ULINT_UNDEFINED) { @@ -1012,7 +1195,7 @@ sync_array_print_long_waits_low( reserver_wait = sync_array_find_thread(arr, reserver); if (reserver_wait && - reserver_wait->wait_object != NULL && + reserver_wait->latch.mutex != NULL && reserver_wait->waiting) { fputs("InnoDB: Warning: Writer thread is waiting this semaphore:\n", stderr); @@ -1020,9 +1203,11 @@ sync_array_print_long_waits_low( sync_array_cell_print(stderr, reserver_wait, &reserver); loop++; + /* TODO: FIXME: if (reserver_wait->thread == reserver) { reserver = (os_thread_id_t)ULINT_UNDEFINED; } + */ } else { reserver = (os_thread_id_t)ULINT_UNDEFINED; } @@ -1044,8 +1229,7 @@ sync_array_print_long_waits_low( /**********************************************************************//** Prints warnings of long semaphore waits to stderr. -@return TRUE if fatal semaphore wait threshold was exceeded */ -UNIV_INTERN +@return TRUE if fatal semaphore wait threshold was exceeded */ ibool sync_array_print_long_waits( /*========================*/ @@ -1088,11 +1272,12 @@ sync_array_print_long_waits( fprintf(stderr, "InnoDB: Pending preads %lu, pwrites %lu\n", - (ulong) os_file_n_pending_preads, - (ulong) os_file_n_pending_pwrites); + (ulong) os_n_pending_reads, + (ulong) os_n_pending_writes); srv_print_innodb_monitor = TRUE; - os_event_set(srv_monitor_event); + + lock_set_timeout_event(); os_thread_sleep(30000000); @@ -1127,9 +1312,9 @@ sync_array_print_info_low( cell = sync_array_get_nth_cell(arr, i); - if (cell->wait_object != NULL) { + if (cell->latch.mutex != 0) { count++; - sync_array_cell_print(file, cell, &r); + sync_array_cell_print(file, cell, 0); } } } @@ -1152,65 +1337,50 @@ sync_array_print_info( /**********************************************************************//** Create the primary system wait array(s), they are protected by an OS mutex */ -UNIV_INTERN void sync_array_init( /*============*/ ulint n_threads) /*!< in: Number of slots to create in all arrays */ { - ulint i; - ulint n_slots; - ut_a(sync_wait_array == NULL); ut_a(srv_sync_array_size > 0); ut_a(n_threads > 0); sync_array_size = srv_sync_array_size; - /* We have to use ut_malloc() because the mutex infrastructure - hasn't been initialised yet. It is required by mem_alloc() and - the heap functions. */ - - sync_wait_array = static_cast<sync_array_t**>( - ut_malloc(sizeof(*sync_wait_array) * sync_array_size)); + sync_wait_array = UT_NEW_ARRAY_NOKEY(sync_array_t*, sync_array_size); - n_slots = 1 + (n_threads - 1) / sync_array_size; + ulint n_slots = 1 + (n_threads - 1) / sync_array_size; - for (i = 0; i < sync_array_size; ++i) { + for (ulint i = 0; i < sync_array_size; ++i) { - sync_wait_array[i] = sync_array_create(n_slots); + sync_wait_array[i] = UT_NEW_NOKEY(sync_array_t(n_slots)); } } /**********************************************************************//** Close sync array wait sub-system. */ -UNIV_INTERN void sync_array_close(void) /*==================*/ { - ulint i; - - for (i = 0; i < sync_array_size; ++i) { + for (ulint i = 0; i < sync_array_size; ++i) { sync_array_free(sync_wait_array[i]); } - ut_free(sync_wait_array); + UT_DELETE_ARRAY(sync_wait_array); sync_wait_array = NULL; } /**********************************************************************//** Print info about the sync array(s). */ -UNIV_INTERN void sync_array_print( /*=============*/ FILE* file) /*!< in/out: Print to this stream */ { - ulint i; - - for (i = 0; i < sync_array_size; ++i) { + for (ulint i = 0; i < sync_array_size; ++i) { sync_array_print_info(file, sync_wait_array[i]); } @@ -1220,25 +1390,6 @@ sync_array_print( } /**********************************************************************//** -Get an instance of the sync wait array. */ -UNIV_INTERN -sync_array_t* -sync_array_get(void) -/*================*/ -{ - ulint i; - static ulint count; - -#ifdef HAVE_ATOMIC_BUILTINS - i = os_atomic_increment_ulint(&count, 1); -#else - i = count++; -#endif /* HAVE_ATOMIC_BUILTINS */ - - return(sync_wait_array[i % sync_array_size]); -} - -/**********************************************************************//** Prints info of the wait array without using any mutexes/semaphores. */ UNIV_INTERN void @@ -1258,7 +1409,7 @@ sync_array_print_innodb(void) cell = sync_array_get_nth_cell(arr, i); - wait_object = cell->wait_object; + wait_object = cell->latch.mutex; if (wait_object == NULL || !cell->waiting) { @@ -1276,15 +1427,17 @@ sync_array_print_innodb(void) reserver_wait = sync_array_find_thread(arr, reserver); if (reserver_wait && - reserver_wait->wait_object != NULL && + reserver_wait->latch.mutex != NULL && reserver_wait->waiting) { fputs("InnoDB: Warning: Writer thread is waiting this semaphore:\n", stderr); sync_array_cell_print(stderr, reserver_wait, &reserver); + /* JAN: FIXME: if (reserver_wait->thread == reserver) { reserver = (os_thread_id_t)ULINT_UNDEFINED; } + */ } else { reserver = (os_thread_id_t)ULINT_UNDEFINED; } @@ -1333,7 +1486,7 @@ sync_arr_get_item( wait_cell = sync_array_get_nth_cell(sync_arr, i); if (wait_cell) { - wait_object = wait_cell->wait_object; + wait_object = wait_cell->latch.mutex; if(wait_object != NULL && wait_cell->waiting) { found = TRUE; @@ -1375,63 +1528,73 @@ sync_arr_fill_sys_semphore_waits_table( for(ulint i=0; i < n_items;i++) { sync_cell_t *cell=NULL; if (sync_arr_get_item(i, &cell)) { - ib_mutex_t* mutex; + WaitMutex* mutex; type = cell->request_type; - OK(field_store_ulint(fields[SYS_SEMAPHORE_WAITS_THREAD_ID], (longlong)os_thread_pf(cell->thread))); + /* JAN: FIXME + OK(field_store_ulint(fields[SYS_SEMAPHORE_WAITS_THREAD_ID], + (longlong)os_thread_pf(cell->thread))); + */ OK(field_store_string(fields[SYS_SEMAPHORE_WAITS_FILE], innobase_basename(cell->file))); OK(field_store_ulint(fields[SYS_SEMAPHORE_WAITS_LINE], cell->line)); OK(field_store_ulint(fields[SYS_SEMAPHORE_WAITS_WAIT_TIME], (longlong)difftime(time(NULL), cell->reservation_time))); if (type == SYNC_MUTEX) { - mutex = static_cast<ib_mutex_t*>(cell->old_wait_mutex); + mutex = static_cast<WaitMutex*>(cell->latch.mutex); if (mutex) { - OK(field_store_string(fields[SYS_SEMAPHORE_WAITS_OBJECT_NAME], mutex->cmutex_name)); + // JAN: FIXME + // OK(field_store_string(fields[SYS_SEMAPHORE_WAITS_OBJECT_NAME], mutex->cmutex_name)); OK(field_store_ulint(fields[SYS_SEMAPHORE_WAITS_WAIT_OBJECT], (longlong)mutex)); OK(field_store_string(fields[SYS_SEMAPHORE_WAITS_WAIT_TYPE], "MUTEX")); - OK(field_store_ulint(fields[SYS_SEMAPHORE_WAITS_HOLDER_THREAD_ID], (longlong)mutex->thread_id)); - OK(field_store_string(fields[SYS_SEMAPHORE_WAITS_HOLDER_FILE], innobase_basename(mutex->file_name))); - OK(field_store_ulint(fields[SYS_SEMAPHORE_WAITS_HOLDER_LINE], mutex->line)); - OK(field_store_string(fields[SYS_SEMAPHORE_WAITS_CREATED_FILE], innobase_basename(mutex->cfile_name))); - OK(field_store_ulint(fields[SYS_SEMAPHORE_WAITS_CREATED_LINE], mutex->cline)); - OK(field_store_ulint(fields[SYS_SEMAPHORE_WAITS_WAITERS_FLAG], (longlong)mutex->waiters)); - OK(field_store_ulint(fields[SYS_SEMAPHORE_WAITS_LOCK_WORD], (longlong)mutex->lock_word)); - OK(field_store_string(fields[SYS_SEMAPHORE_WAITS_LAST_WRITER_FILE], innobase_basename(mutex->file_name))); - OK(field_store_ulint(fields[SYS_SEMAPHORE_WAITS_LAST_WRITER_LINE], mutex->line)); - OK(field_store_ulint(fields[SYS_SEMAPHORE_WAITS_OS_WAIT_COUNT], mutex->count_os_wait)); + //OK(field_store_ulint(fields[SYS_SEMAPHORE_WAITS_HOLDER_THREAD_ID], (longlong)mutex->thread_id)); + //OK(field_store_string(fields[SYS_SEMAPHORE_WAITS_HOLDER_FILE], innobase_basename(mutex->file_name))); + //OK(field_store_ulint(fields[SYS_SEMAPHORE_WAITS_HOLDER_LINE], mutex->line)); + //OK(field_store_string(fields[SYS_SEMAPHORE_WAITS_CREATED_FILE], innobase_basename(mutex->cfile_name))); + //OK(field_store_ulint(fields[SYS_SEMAPHORE_WAITS_CREATED_LINE], mutex->cline)); + //OK(field_store_ulint(fields[SYS_SEMAPHORE_WAITS_WAITERS_FLAG], (longlong)mutex->waiters)); + //OK(field_store_ulint(fields[SYS_SEMAPHORE_WAITS_LOCK_WORD], (longlong)mutex->lock_word)); + //OK(field_store_string(fields[SYS_SEMAPHORE_WAITS_LAST_WRITER_FILE], innobase_basename(mutex->file_name))); + //OK(field_store_ulint(fields[SYS_SEMAPHORE_WAITS_LAST_WRITER_LINE], mutex->line)); + //OK(field_store_ulint(fields[SYS_SEMAPHORE_WAITS_OS_WAIT_COUNT], mutex->count_os_wait)); } - } else if (type == RW_LOCK_EX - || type == RW_LOCK_WAIT_EX - || type == RW_LOCK_SHARED) { + } else if (type == RW_LOCK_X_WAIT + || type == RW_LOCK_X + || type == RW_LOCK_SX + || type == RW_LOCK_S) { rw_lock_t* rwlock=NULL; - rwlock = static_cast<rw_lock_t *> (cell->old_wait_rw_lock); + rwlock = static_cast<rw_lock_t *> (cell->latch.lock); if (rwlock) { ulint writer = rw_lock_get_writer(rwlock); OK(field_store_ulint(fields[SYS_SEMAPHORE_WAITS_WAIT_OBJECT], (longlong)rwlock)); - if (type == RW_LOCK_EX) { - OK(field_store_string(fields[SYS_SEMAPHORE_WAITS_WAIT_TYPE], "RW_LOCK_EX")); - } else if (type == RW_LOCK_WAIT_EX) { - OK(field_store_string(fields[SYS_SEMAPHORE_WAITS_WAIT_TYPE], "RW_LOCK_WAIT_EX")); - } else if (type == RW_LOCK_SHARED) { - OK(field_store_string(fields[SYS_SEMAPHORE_WAITS_WAIT_TYPE], "RW_LOCK_SHARED")); + if (type == RW_LOCK_X) { + OK(field_store_string(fields[SYS_SEMAPHORE_WAITS_WAIT_TYPE], "RW_LOCK_X")); + } else if (type == RW_LOCK_X_WAIT) { + OK(field_store_string(fields[SYS_SEMAPHORE_WAITS_WAIT_TYPE], "RW_LOCK_X_WAIT")); + } else if (type == RW_LOCK_S) { + OK(field_store_string(fields[SYS_SEMAPHORE_WAITS_WAIT_TYPE], "RW_LOCK_S")); + } else if (type == RW_LOCK_SX) { + OK(field_store_string(fields[SYS_SEMAPHORE_WAITS_WAIT_TYPE], "RW_LOCK_SX")); } if (writer != RW_LOCK_NOT_LOCKED) { - OK(field_store_string(fields[SYS_SEMAPHORE_WAITS_OBJECT_NAME], rwlock->lock_name)); + // JAN: FIXME + // OK(field_store_string(fields[SYS_SEMAPHORE_WAITS_OBJECT_NAME], rwlock->lock_name)); OK(field_store_ulint(fields[SYS_SEMAPHORE_WAITS_WRITER_THREAD], (longlong)os_thread_pf(rwlock->writer_thread))); - if (writer == RW_LOCK_EX) { - OK(field_store_string(fields[SYS_SEMAPHORE_WAITS_RESERVATION_MODE], "RW_LOCK_EX")); - } else if (writer == RW_LOCK_WAIT_EX) { - OK(field_store_string(fields[SYS_SEMAPHORE_WAITS_RESERVATION_MODE], "RW_LOCK_WAIT_EX")); + if (writer == RW_LOCK_X) { + OK(field_store_string(fields[SYS_SEMAPHORE_WAITS_RESERVATION_MODE], "RW_LOCK_X")); + } else if (writer == RW_LOCK_X_WAIT) { + OK(field_store_string(fields[SYS_SEMAPHORE_WAITS_RESERVATION_MODE], "RW_LOCK_X_WAIT")); + } else if (type == RW_LOCK_SX) { + OK(field_store_string(fields[SYS_SEMAPHORE_WAITS_RESERVATION_MODE], "RW_LOCK_SX")); } - OK(field_store_ulint(fields[SYS_SEMAPHORE_WAITS_HOLDER_THREAD_ID], (longlong)rwlock->thread_id)); - OK(field_store_string(fields[SYS_SEMAPHORE_WAITS_HOLDER_FILE], innobase_basename(rwlock->file_name))); - OK(field_store_ulint(fields[SYS_SEMAPHORE_WAITS_HOLDER_LINE], rwlock->line)); + //OK(field_store_ulint(fields[SYS_SEMAPHORE_WAITS_HOLDER_THREAD_ID], (longlong)rwlock->thread_id)); + //OK(field_store_string(fields[SYS_SEMAPHORE_WAITS_HOLDER_FILE], innobase_basename(rwlock->file_name))); + //OK(field_store_ulint(fields[SYS_SEMAPHORE_WAITS_HOLDER_LINE], rwlock->line)); OK(field_store_ulint(fields[SYS_SEMAPHORE_WAITS_READERS], rw_lock_get_reader_count(rwlock))); OK(field_store_ulint(fields[SYS_SEMAPHORE_WAITS_WAITERS_FLAG], (longlong)rwlock->waiters)); OK(field_store_ulint(fields[SYS_SEMAPHORE_WAITS_LOCK_WORD], (longlong)rwlock->lock_word)); diff --git a/storage/innobase/sync/sync0debug.cc b/storage/innobase/sync/sync0debug.cc new file mode 100644 index 00000000000..f7fcc8d9727 --- /dev/null +++ b/storage/innobase/sync/sync0debug.cc @@ -0,0 +1,1808 @@ +/***************************************************************************** + +Copyright (c) 2014, 2015, Oracle and/or its affiliates. All Rights Reserved. + +Portions of this file contain modifications contributed and copyrighted by +Google, Inc. Those modifications are gratefully acknowledged and are described +briefly in the InnoDB documentation. The contributions by Google are +incorporated with their permission, and subject to the conditions contained in +the file COPYING.Google. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA + +*****************************************************************************/ + +/**************************************************//** +@file sync/sync0debug.cc +Debug checks for latches. + +Created 2012-08-21 Sunny Bains +*******************************************************/ + +#include "sync0sync.h" +#include "sync0debug.h" + +#include "ut0new.h" +#include "srv0start.h" + +#include <map> +#include <vector> +#include <string> +#include <algorithm> +#include <iostream> + +#ifdef UNIV_DEBUG + +my_bool srv_sync_debug; + +/** The global mutex which protects debug info lists of all rw-locks. +To modify the debug info list of an rw-lock, this mutex has to be +acquired in addition to the mutex protecting the lock. */ +static ib_mutex_t rw_lock_debug_mutex; + +/** If deadlock detection does not get immediately the mutex, +it may wait for this event */ +static os_event_t rw_lock_debug_event; + +/** This is set to true, if there may be waiters for the event */ +static bool rw_lock_debug_waiters; + +/** The latch held by a thread */ +struct Latched { + + /** Constructor */ + Latched() : m_latch(), m_level(SYNC_UNKNOWN) { } + + /** Constructor + @param[in] latch Latch instance + @param[in] level Level of latch held */ + Latched(const latch_t* latch, + latch_level_t level) + : + m_latch(latch), + m_level(level) + { + /* No op */ + } + + /** @return the latch level */ + latch_level_t get_level() const + { + return(m_level); + } + + /** Check if the rhs latch and level match + @param[in] rhs instance to compare with + @return true on match */ + bool operator==(const Latched& rhs) const + { + return(m_latch == rhs.m_latch && m_level == rhs.m_level); + } + + /** The latch instance */ + const latch_t* m_latch; + + /** The latch level. For buffer blocks we can pass a separate latch + level to check against, see buf_block_dbg_add_level() */ + latch_level_t m_level; +}; + +/** Thread specific latches. This is ordered on level in descending order. */ +typedef std::vector<Latched, ut_allocator<Latched> > Latches; + +/** The deadlock detector. */ +struct LatchDebug { + + /** Debug mutex for control structures, should not be tracked + by this module. */ + typedef OSMutex Mutex; + + /** Comparator for the ThreadMap. */ + struct os_thread_id_less + : public std::binary_function< + os_thread_id_t, + os_thread_id_t, + bool> + { + /** @return true if lhs < rhs */ + bool operator()( + const os_thread_id_t& lhs, + const os_thread_id_t& rhs) const + UNIV_NOTHROW + { + return(os_thread_pf(lhs) < os_thread_pf(rhs)); + } + }; + + /** For tracking a thread's latches. */ + typedef std::map< + os_thread_id_t, + Latches*, + os_thread_id_less, + ut_allocator<std::pair<const std::string, latch_meta_t> > > + ThreadMap; + + /** Constructor */ + LatchDebug() + UNIV_NOTHROW; + + /** Destructor */ + ~LatchDebug() + UNIV_NOTHROW + { + m_mutex.destroy(); + } + + /** Create a new instance if one doesn't exist else return + the existing one. + @param[in] add add an empty entry if one is not + found (default no) + @return pointer to a thread's acquired latches. */ + Latches* thread_latches(bool add = false) + UNIV_NOTHROW; + + /** Check that all the latches already owned by a thread have a lower + level than limit. + @param[in] latches the thread's existing (acquired) latches + @param[in] limit to check against + @return latched if there is one with a level <= limit . */ + const Latched* less( + const Latches* latches, + latch_level_t limit) const + UNIV_NOTHROW; + + /** Checks if the level value exists in the thread's acquired latches. + @param[in] latches the thread's existing (acquired) latches + @param[in] level to lookup + @return latch if found or 0 */ + const latch_t* find( + const Latches* Latches, + latch_level_t level) const + UNIV_NOTHROW; + + /** + Checks if the level value exists in the thread's acquired latches. + @param[in] level to lookup + @return latch if found or 0 */ + const latch_t* find(latch_level_t level) + UNIV_NOTHROW; + + /** Report error and abort. + @param[in] latches thread's existing latches + @param[in] latched The existing latch causing the + invariant to fail + @param[in] level The new level request that breaks + the order */ + void crash( + const Latches* latches, + const Latched* latched, + latch_level_t level) const + UNIV_NOTHROW; + + /** Do a basic ordering check. + @param[in] latches thread's existing latches + @param[in] requested_level Level requested by latch + @param[in] level declared ulint so that we can + do level - 1. The level of the + latch that the thread is trying + to acquire + @return true if passes, else crash with error message. */ + bool basic_check( + const Latches* latches, + latch_level_t requested_level, + ulint level) const + UNIV_NOTHROW; + + /** Adds a latch and its level in the thread level array. Allocates + the memory for the array if called for the first time for this + OS thread. Makes the checks against other latch levels stored + in the array for this thread. + + @param[in] latch latch that the thread wants to acqire. + @param[in] level latch level to check against */ + void lock_validate( + const latch_t* latch, + latch_level_t level) + UNIV_NOTHROW + { + /* Ignore diagnostic latches, starting with '.' */ + + if (*latch->get_name() != '.' + && latch->get_level() != SYNC_LEVEL_VARYING) { + + ut_ad(level != SYNC_LEVEL_VARYING); + + Latches* latches = check_order(latch, level); + + ut_a(latches->empty() + || level == SYNC_LEVEL_VARYING + || level == SYNC_NO_ORDER_CHECK + || latches->back().get_level() + == SYNC_NO_ORDER_CHECK + || latches->back().m_latch->get_level() + == SYNC_LEVEL_VARYING + || latches->back().get_level() >= level); + } + } + + /** Adds a latch and its level in the thread level array. Allocates + the memory for the array if called for the first time for this + OS thread. Makes the checks against other latch levels stored + in the array for this thread. + + @param[in] latch latch that the thread wants to acqire. + @param[in] level latch level to check against */ + void lock_granted( + const latch_t* latch, + latch_level_t level) + UNIV_NOTHROW + { + /* Ignore diagnostic latches, starting with '.' */ + + if (*latch->get_name() != '.' + && latch->get_level() != SYNC_LEVEL_VARYING) { + + Latches* latches = thread_latches(true); + + latches->push_back(Latched(latch, level)); + } + } + + /** For recursive X rw-locks. + @param[in] latch The RW-Lock to relock */ + void relock(const latch_t* latch) + UNIV_NOTHROW + { + ut_a(latch->m_rw_lock); + + latch_level_t level = latch->get_level(); + + /* Ignore diagnostic latches, starting with '.' */ + + if (*latch->get_name() != '.' + && latch->get_level() != SYNC_LEVEL_VARYING) { + + Latches* latches = thread_latches(true); + + Latches::iterator it = std::find( + latches->begin(), latches->end(), + Latched(latch, level)); + + ut_a(latches->empty() + || level == SYNC_LEVEL_VARYING + || level == SYNC_NO_ORDER_CHECK + || latches->back().m_latch->get_level() + == SYNC_LEVEL_VARYING + || latches->back().m_latch->get_level() + == SYNC_NO_ORDER_CHECK + || latches->back().get_level() >= level + || it != latches->end()); + + if (it == latches->end()) { + latches->push_back(Latched(latch, level)); + } else { + latches->insert(it, Latched(latch, level)); + } + } + } + + /** Iterate over a thread's latches. + @param[in,out] functor The callback + @return true if the functor returns true. */ + bool for_each(sync_check_functor_t& functor) + UNIV_NOTHROW + { + const Latches* latches = thread_latches(); + + if (latches == 0) { + return(functor.result()); + } + + Latches::const_iterator end = latches->end(); + + for (Latches::const_iterator it = latches->begin(); + it != end; + ++it) { + + if (functor(it->m_level)) { + break; + } + } + + return(functor.result()); + } + + /** Removes a latch from the thread level array if it is found there. + @param[in] latch The latch that was released + @return true if found in the array; it is not an error if the latch is + not found, as we presently are not able to determine the level for + every latch reservation the program does */ + void unlock(const latch_t* latch) UNIV_NOTHROW; + + /** Get the level name + @param[in] level The level ID to lookup + @return level name */ + const std::string& get_level_name(latch_level_t level) const + UNIV_NOTHROW + { + Levels::const_iterator it = m_levels.find(level); + + ut_ad(it != m_levels.end()); + + return(it->second); + } + + /** Initialise the debug data structures */ + static void init() + UNIV_NOTHROW; + + /** Shutdown the latch debug checking */ + static void shutdown() + UNIV_NOTHROW; + + /** @return the singleton instance */ + static LatchDebug* instance() + UNIV_NOTHROW + { + return(s_instance); + } + + /** Create the singleton instance */ + static void create_instance() + UNIV_NOTHROW + { + ut_ad(s_instance == NULL); + + s_instance = UT_NEW_NOKEY(LatchDebug()); + } + +private: + /** Disable copying */ + LatchDebug(const LatchDebug&); + LatchDebug& operator=(const LatchDebug&); + + /** Adds a latch and its level in the thread level array. Allocates + the memory for the array if called first time for this OS thread. + Makes the checks against other latch levels stored in the array + for this thread. + + @param[in] latch pointer to a mutex or an rw-lock + @param[in] level level in the latching order + @return the thread's latches */ + Latches* check_order( + const latch_t* latch, + latch_level_t level) + UNIV_NOTHROW; + + /** Print the latches acquired by a thread + @param[in] latches Latches acquired by a thread */ + void print_latches(const Latches* latches) const + UNIV_NOTHROW; + + /** Special handling for the RTR mutexes. We need to add proper + levels for them if possible. + @param[in] latch Latch to check + @return true if it is a an _RTR_ mutex */ + bool is_rtr_mutex(const latch_t* latch) const + UNIV_NOTHROW + { + return(latch->get_id() == LATCH_ID_RTR_ACTIVE_MUTEX + || latch->get_id() == LATCH_ID_RTR_PATH_MUTEX + || latch->get_id() == LATCH_ID_RTR_MATCH_MUTEX + || latch->get_id() == LATCH_ID_RTR_SSN_MUTEX); + } + +private: + /** Comparator for the Levels . */ + struct latch_level_less + : public std::binary_function< + latch_level_t, + latch_level_t, + bool> + { + /** @return true if lhs < rhs */ + bool operator()( + const latch_level_t& lhs, + const latch_level_t& rhs) const + UNIV_NOTHROW + { + return(lhs < rhs); + } + }; + + typedef std::map< + latch_level_t, + std::string, + latch_level_less, + ut_allocator<std::pair<latch_level_t, std::string> > > + Levels; + + /** Mutex protecting the deadlock detector data structures. */ + Mutex m_mutex; + + /** Thread specific data. Protected by m_mutex. */ + ThreadMap m_threads; + + /** Mapping from latche level to its string representation. */ + Levels m_levels; + + /** The singleton instance. Must be created in single threaded mode. */ + static LatchDebug* s_instance; + +public: + /** For checking whether this module has been initialised or not. */ + static bool s_initialized; +}; + +/** The latch order checking infra-structure */ +LatchDebug* LatchDebug::s_instance = NULL; +bool LatchDebug::s_initialized = false; + +#define LEVEL_MAP_INSERT(T) \ +do { \ + std::pair<Levels::iterator, bool> result = \ + m_levels.insert(Levels::value_type(T, #T)); \ + ut_ad(result.second); \ +} while(0) + +/** Setup the mapping from level ID to level name mapping */ +LatchDebug::LatchDebug() +{ + m_mutex.init(); + + LEVEL_MAP_INSERT(SYNC_UNKNOWN); + LEVEL_MAP_INSERT(SYNC_MUTEX); + LEVEL_MAP_INSERT(RW_LOCK_SX); + LEVEL_MAP_INSERT(RW_LOCK_X_WAIT); + LEVEL_MAP_INSERT(RW_LOCK_S); + LEVEL_MAP_INSERT(RW_LOCK_X); + LEVEL_MAP_INSERT(RW_LOCK_NOT_LOCKED); + LEVEL_MAP_INSERT(SYNC_MONITOR_MUTEX); + LEVEL_MAP_INSERT(SYNC_ANY_LATCH); + LEVEL_MAP_INSERT(SYNC_DOUBLEWRITE); + LEVEL_MAP_INSERT(SYNC_BUF_FLUSH_LIST); + LEVEL_MAP_INSERT(SYNC_BUF_BLOCK); + LEVEL_MAP_INSERT(SYNC_BUF_PAGE_HASH); + LEVEL_MAP_INSERT(SYNC_BUF_POOL); + LEVEL_MAP_INSERT(SYNC_POOL); + LEVEL_MAP_INSERT(SYNC_POOL_MANAGER); + LEVEL_MAP_INSERT(SYNC_SEARCH_SYS); + LEVEL_MAP_INSERT(SYNC_WORK_QUEUE); + LEVEL_MAP_INSERT(SYNC_FTS_TOKENIZE); + LEVEL_MAP_INSERT(SYNC_FTS_OPTIMIZE); + LEVEL_MAP_INSERT(SYNC_FTS_BG_THREADS); + LEVEL_MAP_INSERT(SYNC_FTS_CACHE_INIT); + LEVEL_MAP_INSERT(SYNC_RECV); + LEVEL_MAP_INSERT(SYNC_LOG_FLUSH_ORDER); + LEVEL_MAP_INSERT(SYNC_LOG); + LEVEL_MAP_INSERT(SYNC_PAGE_CLEANER); + LEVEL_MAP_INSERT(SYNC_PURGE_QUEUE); + LEVEL_MAP_INSERT(SYNC_TRX_SYS_HEADER); + LEVEL_MAP_INSERT(SYNC_REC_LOCK); + LEVEL_MAP_INSERT(SYNC_THREADS); + LEVEL_MAP_INSERT(SYNC_TRX); + LEVEL_MAP_INSERT(SYNC_TRX_SYS); + LEVEL_MAP_INSERT(SYNC_LOCK_SYS); + LEVEL_MAP_INSERT(SYNC_LOCK_WAIT_SYS); + LEVEL_MAP_INSERT(SYNC_INDEX_ONLINE_LOG); + LEVEL_MAP_INSERT(SYNC_IBUF_BITMAP); + LEVEL_MAP_INSERT(SYNC_IBUF_BITMAP_MUTEX); + LEVEL_MAP_INSERT(SYNC_IBUF_TREE_NODE); + LEVEL_MAP_INSERT(SYNC_IBUF_TREE_NODE_NEW); + LEVEL_MAP_INSERT(SYNC_IBUF_INDEX_TREE); + LEVEL_MAP_INSERT(SYNC_IBUF_MUTEX); + LEVEL_MAP_INSERT(SYNC_FSP_PAGE); + LEVEL_MAP_INSERT(SYNC_FSP); + LEVEL_MAP_INSERT(SYNC_EXTERN_STORAGE); + LEVEL_MAP_INSERT(SYNC_TRX_UNDO_PAGE); + LEVEL_MAP_INSERT(SYNC_RSEG_HEADER); + LEVEL_MAP_INSERT(SYNC_RSEG_HEADER_NEW); + LEVEL_MAP_INSERT(SYNC_NOREDO_RSEG); + LEVEL_MAP_INSERT(SYNC_REDO_RSEG); + LEVEL_MAP_INSERT(SYNC_TRX_UNDO); + LEVEL_MAP_INSERT(SYNC_PURGE_LATCH); + LEVEL_MAP_INSERT(SYNC_TREE_NODE); + LEVEL_MAP_INSERT(SYNC_TREE_NODE_FROM_HASH); + LEVEL_MAP_INSERT(SYNC_TREE_NODE_NEW); + LEVEL_MAP_INSERT(SYNC_INDEX_TREE); + LEVEL_MAP_INSERT(SYNC_IBUF_PESS_INSERT_MUTEX); + LEVEL_MAP_INSERT(SYNC_IBUF_HEADER); + LEVEL_MAP_INSERT(SYNC_DICT_HEADER); + LEVEL_MAP_INSERT(SYNC_STATS_AUTO_RECALC); + LEVEL_MAP_INSERT(SYNC_DICT_AUTOINC_MUTEX); + LEVEL_MAP_INSERT(SYNC_DICT); + LEVEL_MAP_INSERT(SYNC_FTS_CACHE); + LEVEL_MAP_INSERT(SYNC_DICT_OPERATION); + LEVEL_MAP_INSERT(SYNC_FILE_FORMAT_TAG); + LEVEL_MAP_INSERT(SYNC_TRX_I_S_LAST_READ); + LEVEL_MAP_INSERT(SYNC_TRX_I_S_RWLOCK); + LEVEL_MAP_INSERT(SYNC_RECV_WRITER); + LEVEL_MAP_INSERT(SYNC_LEVEL_VARYING); + LEVEL_MAP_INSERT(SYNC_NO_ORDER_CHECK); + + /* Enum count starts from 0 */ + ut_ad(m_levels.size() == SYNC_LEVEL_MAX + 1); +} + +/** Print the latches acquired by a thread +@param[in] latches Latches acquired by a thread */ +void +LatchDebug::print_latches(const Latches* latches) const + UNIV_NOTHROW +{ + ib::error() << "Latches already owned by this thread: "; + + Latches::const_iterator end = latches->end(); + + for (Latches::const_iterator it = latches->begin(); + it != end; + ++it) { + + ib::error() + << sync_latch_get_name(it->m_latch->get_id()) + << " -> " + << it->m_level << " " + << "(" << get_level_name(it->m_level) << ")"; + } +} + +/** Report error and abort +@param[in] latches thread's existing latches +@param[in] latched The existing latch causing the invariant to fail +@param[in] level The new level request that breaks the order */ +void +LatchDebug::crash( + const Latches* latches, + const Latched* latched, + latch_level_t level) const + UNIV_NOTHROW +{ + const latch_t* latch = latched->m_latch; + const std::string& in_level_name = get_level_name(level); + + const std::string& latch_level_name = + get_level_name(latched->m_level); + + ib::error() + << "Thread " << os_thread_pf(os_thread_get_curr_id()) + << " already owns a latch " + << sync_latch_get_name(latch->m_id) << " at level" + << " " << latched->m_level << " (" << latch_level_name + << " ), which is at a lower/same level than the" + << " requested latch: " + << level << " (" << in_level_name << "). " + << latch->to_string(); + + print_latches(latches); + + ut_error; +} + +/** Check that all the latches already owned by a thread have a lower +level than limit. +@param[in] latches the thread's existing (acquired) latches +@param[in] limit to check against +@return latched info if there is one with a level <= limit . */ +const Latched* +LatchDebug::less( + const Latches* latches, + latch_level_t limit) const + UNIV_NOTHROW +{ + Latches::const_iterator end = latches->end(); + + for (Latches::const_iterator it = latches->begin(); it != end; ++it) { + + if (it->m_level <= limit) { + return(&(*it)); + } + } + + return(NULL); +} + +/** Do a basic ordering check. +@param[in] latches thread's existing latches +@param[in] requested_level Level requested by latch +@param[in] in_level declared ulint so that we can do level - 1. + The level of the latch that the thread is + trying to acquire +@return true if passes, else crash with error message. */ +bool +LatchDebug::basic_check( + const Latches* latches, + latch_level_t requested_level, + ulint in_level) const + UNIV_NOTHROW +{ + latch_level_t level = latch_level_t(in_level); + + ut_ad(level < SYNC_LEVEL_MAX); + + const Latched* latched = less(latches, level); + + if (latched != NULL) { + crash(latches, latched, requested_level); + return(false); + } + + return(true); +} + +/** Create a new instance if one doesn't exist else return the existing one. +@param[in] add add an empty entry if one is not found + (default no) +@return pointer to a thread's acquired latches. */ +Latches* +LatchDebug::thread_latches(bool add) + UNIV_NOTHROW +{ + m_mutex.enter(); + + os_thread_id_t thread_id = os_thread_get_curr_id(); + ThreadMap::iterator lb = m_threads.lower_bound(thread_id); + + if (lb != m_threads.end() + && !(m_threads.key_comp()(thread_id, lb->first))) { + + Latches* latches = lb->second; + + m_mutex.exit(); + + return(latches); + + } else if (!add) { + + m_mutex.exit(); + + return(NULL); + + } else { + typedef ThreadMap::value_type value_type; + + Latches* latches = UT_NEW_NOKEY(Latches()); + + ut_a(latches != NULL); + + latches->reserve(32); + + m_threads.insert(lb, value_type(thread_id, latches)); + + m_mutex.exit(); + + return(latches); + } +} + +/** Checks if the level value exists in the thread's acquired latches. +@param[in] levels the thread's existing (acquired) latches +@param[in] level to lookup +@return latch if found or 0 */ +const latch_t* +LatchDebug::find( + const Latches* latches, + latch_level_t level) const UNIV_NOTHROW +{ + Latches::const_iterator end = latches->end(); + + for (Latches::const_iterator it = latches->begin(); it != end; ++it) { + + if (it->m_level == level) { + + return(it->m_latch); + } + } + + return(0); +} + +/** Checks if the level value exists in the thread's acquired latches. +@param[in] level The level to lookup +@return latch if found or NULL */ +const latch_t* +LatchDebug::find(latch_level_t level) + UNIV_NOTHROW +{ + return(find(thread_latches(), level)); +} + +/** +Adds a latch and its level in the thread level array. Allocates the memory +for the array if called first time for this OS thread. Makes the checks +against other latch levels stored in the array for this thread. +@param[in] latch pointer to a mutex or an rw-lock +@param[in] level level in the latching order +@return the thread's latches */ +Latches* +LatchDebug::check_order( + const latch_t* latch, + latch_level_t level) + UNIV_NOTHROW +{ + ut_ad(latch->get_level() != SYNC_LEVEL_VARYING); + + Latches* latches = thread_latches(true); + + /* NOTE that there is a problem with _NODE and _LEAF levels: if the + B-tree height changes, then a leaf can change to an internal node + or the other way around. We do not know at present if this can cause + unnecessary assertion failures below. */ + + switch (level) { + case SYNC_NO_ORDER_CHECK: + case SYNC_EXTERN_STORAGE: + case SYNC_TREE_NODE_FROM_HASH: + /* Do no order checking */ + break; + + case SYNC_TRX_SYS_HEADER: + + if (srv_is_being_started) { + /* This is violated during trx_sys_create_rsegs() + when creating additional rollback segments when + upgrading in innobase_start_or_create_for_mysql(). */ + break; + } + + /* Fall through */ + + case SYNC_MONITOR_MUTEX: + case SYNC_RECV: + case SYNC_FTS_BG_THREADS: + case SYNC_WORK_QUEUE: + case SYNC_FTS_TOKENIZE: + case SYNC_FTS_OPTIMIZE: + case SYNC_FTS_CACHE: + case SYNC_FTS_CACHE_INIT: + case SYNC_PAGE_CLEANER: + case SYNC_LOG: + case SYNC_LOG_FLUSH_ORDER: + case SYNC_FILE_FORMAT_TAG: + case SYNC_DOUBLEWRITE: + case SYNC_SEARCH_SYS: + case SYNC_THREADS: + case SYNC_LOCK_SYS: + case SYNC_LOCK_WAIT_SYS: + case SYNC_TRX_SYS: + case SYNC_IBUF_BITMAP_MUTEX: + case SYNC_REDO_RSEG: + case SYNC_NOREDO_RSEG: + case SYNC_TRX_UNDO: + case SYNC_PURGE_LATCH: + case SYNC_PURGE_QUEUE: + case SYNC_DICT_AUTOINC_MUTEX: + case SYNC_DICT_OPERATION: + case SYNC_DICT_HEADER: + case SYNC_TRX_I_S_RWLOCK: + case SYNC_TRX_I_S_LAST_READ: + case SYNC_IBUF_MUTEX: + case SYNC_INDEX_ONLINE_LOG: + case SYNC_STATS_AUTO_RECALC: + case SYNC_POOL: + case SYNC_POOL_MANAGER: + case SYNC_RECV_WRITER: + + basic_check(latches, level, level); + break; + + case SYNC_ANY_LATCH: + + /* Temporary workaround for LATCH_ID_RTR_*_MUTEX */ + if (is_rtr_mutex(latch)) { + + const Latched* latched = less(latches, level); + + if (latched == NULL + || (latched != NULL + && is_rtr_mutex(latched->m_latch))) { + + /* No violation */ + break; + + } + + crash(latches, latched, level); + + } else { + basic_check(latches, level, level); + } + + break; + + case SYNC_TRX: + + /* Either the thread must own the lock_sys->mutex, or + it is allowed to own only ONE trx_t::mutex. */ + + if (less(latches, level) != NULL) { + basic_check(latches, level, level - 1); + ut_a(find(latches, SYNC_LOCK_SYS) != 0); + } + break; + + case SYNC_BUF_FLUSH_LIST: + case SYNC_BUF_POOL: + + /* We can have multiple mutexes of this type therefore we + can only check whether the greater than condition holds. */ + + basic_check(latches, level, level - 1); + break; + + case SYNC_BUF_PAGE_HASH: + + /* Multiple page_hash locks are only allowed during + buf_validate and that is where buf_pool mutex is already + held. */ + + /* Fall through */ + + case SYNC_BUF_BLOCK: + + /* Either the thread must own the (buffer pool) buf_pool->mutex + or it is allowed to latch only ONE of (buffer block) + block->mutex or buf_pool->zip_mutex. */ + + if (less(latches, level) != NULL) { + basic_check(latches, level, level - 1); + ut_a(find(latches, SYNC_BUF_POOL) != 0); + } + break; + + case SYNC_REC_LOCK: + + if (find(latches, SYNC_LOCK_SYS) != 0) { + basic_check(latches, level, SYNC_REC_LOCK - 1); + } else { + basic_check(latches, level, SYNC_REC_LOCK); + } + break; + + case SYNC_IBUF_BITMAP: + + /* Either the thread must own the master mutex to all + the bitmap pages, or it is allowed to latch only ONE + bitmap page. */ + + if (find(latches, SYNC_IBUF_BITMAP_MUTEX) != 0) { + + basic_check(latches, level, SYNC_IBUF_BITMAP - 1); + + } else if (!srv_is_being_started) { + + /* This is violated during trx_sys_create_rsegs() + when creating additional rollback segments during + upgrade. */ + + basic_check(latches, level, SYNC_IBUF_BITMAP); + } + break; + + case SYNC_FSP_PAGE: + ut_a(find(latches, SYNC_FSP) != 0); + break; + + case SYNC_FSP: + + ut_a(find(latches, SYNC_FSP) != 0 + || basic_check(latches, level, SYNC_FSP)); + break; + + case SYNC_TRX_UNDO_PAGE: + + /* Purge is allowed to read in as many UNDO pages as it likes. + The purge thread can read the UNDO pages without any covering + mutex. */ + + ut_a(find(latches, SYNC_TRX_UNDO) != 0 + || find(latches, SYNC_REDO_RSEG) != 0 + || find(latches, SYNC_NOREDO_RSEG) != 0 + || basic_check(latches, level, level - 1)); + break; + + case SYNC_RSEG_HEADER: + + ut_a(find(latches, SYNC_REDO_RSEG) != 0 + || find(latches, SYNC_NOREDO_RSEG) != 0); + break; + + case SYNC_RSEG_HEADER_NEW: + + ut_a(find(latches, SYNC_FSP_PAGE) != 0); + break; + + case SYNC_TREE_NODE: + + { + const latch_t* fsp_latch; + + fsp_latch = find(latches, SYNC_FSP); + + ut_a((fsp_latch != NULL + && fsp_latch->is_temp_fsp()) + || find(latches, SYNC_INDEX_TREE) != 0 + || find(latches, SYNC_DICT_OPERATION) + || basic_check(latches, + level, SYNC_TREE_NODE - 1)); + } + + break; + + case SYNC_TREE_NODE_NEW: + + ut_a(find(latches, SYNC_FSP_PAGE) != 0); + break; + + case SYNC_INDEX_TREE: + + basic_check(latches, level, SYNC_TREE_NODE - 1); + break; + + case SYNC_IBUF_TREE_NODE: + + ut_a(find(latches, SYNC_IBUF_INDEX_TREE) != 0 + || basic_check(latches, level, SYNC_IBUF_TREE_NODE - 1)); + break; + + case SYNC_IBUF_TREE_NODE_NEW: + + /* ibuf_add_free_page() allocates new pages for the change + buffer while only holding the tablespace x-latch. These + pre-allocated new pages may only be used while holding + ibuf_mutex, in btr_page_alloc_for_ibuf(). */ + + ut_a(find(latches, SYNC_IBUF_MUTEX) != 0 + || find(latches, SYNC_FSP) != 0); + break; + + case SYNC_IBUF_INDEX_TREE: + + if (find(latches, SYNC_FSP) != 0) { + basic_check(latches, level, level - 1); + } else { + basic_check(latches, level, SYNC_IBUF_TREE_NODE - 1); + } + break; + + case SYNC_IBUF_PESS_INSERT_MUTEX: + + basic_check(latches, level, SYNC_FSP - 1); + ut_a(find(latches, SYNC_IBUF_MUTEX) == 0); + break; + + case SYNC_IBUF_HEADER: + + basic_check(latches, level, SYNC_FSP - 1); + ut_a(find(latches, SYNC_IBUF_MUTEX) == NULL); + ut_a(find(latches, SYNC_IBUF_PESS_INSERT_MUTEX) == NULL); + break; + + case SYNC_DICT: + basic_check(latches, level, SYNC_DICT); + break; + + case SYNC_MUTEX: + case SYNC_UNKNOWN: + case SYNC_LEVEL_VARYING: + case RW_LOCK_X: + case RW_LOCK_X_WAIT: + case RW_LOCK_S: + case RW_LOCK_SX: + case RW_LOCK_NOT_LOCKED: + /* These levels should never be set for a latch. */ + ut_error; + break; + } + + return(latches); +} + +/** Removes a latch from the thread level array if it is found there. +@param[in] latch that was released/unlocked +@param[in] level level of the latch +@return true if found in the array; it is not an error if the latch is +not found, as we presently are not able to determine the level for +every latch reservation the program does */ +void +LatchDebug::unlock(const latch_t* latch) + UNIV_NOTHROW +{ + if (latch->get_level() == SYNC_LEVEL_VARYING) { + // We don't have varying level mutexes + ut_ad(latch->m_rw_lock); + } + + Latches* latches; + + if (*latch->get_name() == '.') { + + /* Ignore diagnostic latches, starting with '.' */ + + } else if ((latches = thread_latches()) != NULL) { + + Latches::reverse_iterator rend = latches->rend(); + + for (Latches::reverse_iterator it = latches->rbegin(); + it != rend; + ++it) { + + if (it->m_latch != latch) { + + continue; + } + + Latches::iterator i = it.base(); + + latches->erase(--i); + + /* If this thread doesn't own any more + latches remove from the map. + + FIXME: Perhaps use the master thread + to do purge. Or, do it from close connection. + This could be expensive. */ + + if (latches->empty()) { + + m_mutex.enter(); + + os_thread_id_t thread_id; + + thread_id = os_thread_get_curr_id(); + + m_threads.erase(thread_id); + + m_mutex.exit(); + + UT_DELETE(latches); + } + + return; + } + + if (latch->get_level() != SYNC_LEVEL_VARYING) { + ib::error() + << "Couldn't find latch " + << sync_latch_get_name(latch->get_id()); + + print_latches(latches); + + /** Must find the latch. */ + ut_error; + } + } +} + +/** Get the latch id from a latch name. +@param[in] name Latch name +@return latch id if found else LATCH_ID_NONE. */ +latch_id_t +sync_latch_get_id(const char* name) +{ + LatchMetaData::const_iterator end = latch_meta.end(); + + /* Linear scan should be OK, this should be extremely rare. */ + + for (LatchMetaData::const_iterator it = latch_meta.begin(); + it != end; + ++it) { + + if (*it == NULL || (*it)->get_id() == LATCH_ID_NONE) { + + continue; + + } else if (strcmp((*it)->get_name(), name) == 0) { + + return((*it)->get_id()); + } + } + + return(LATCH_ID_NONE); +} + +/** Get the latch name from a sync level +@param[in] level Latch level to lookup +@return NULL if not found. */ +const char* +sync_latch_get_name(latch_level_t level) +{ + LatchMetaData::const_iterator end = latch_meta.end(); + + /* Linear scan should be OK, this should be extremely rare. */ + + for (LatchMetaData::const_iterator it = latch_meta.begin(); + it != end; + ++it) { + + if (*it == NULL || (*it)->get_id() == LATCH_ID_NONE) { + + continue; + + } else if ((*it)->get_level() == level) { + + return((*it)->get_name()); + } + } + + return(0); +} + +/** Check if it is OK to acquire the latch. +@param[in] latch latch type */ +void +sync_check_lock_validate(const latch_t* latch) +{ + if (LatchDebug::instance() != NULL) { + LatchDebug::instance()->lock_validate( + latch, latch->get_level()); + } +} + +/** Note that the lock has been granted +@param[in] latch latch type */ +void +sync_check_lock_granted(const latch_t* latch) +{ + if (LatchDebug::instance() != NULL) { + LatchDebug::instance()->lock_granted(latch, latch->get_level()); + } +} + +/** Check if it is OK to acquire the latch. +@param[in] latch latch type +@param[in] level Latch level */ +void +sync_check_lock( + const latch_t* latch, + latch_level_t level) +{ + if (LatchDebug::instance() != NULL) { + + ut_ad(latch->get_level() == SYNC_LEVEL_VARYING); + ut_ad(latch->get_id() == LATCH_ID_BUF_BLOCK_LOCK); + + LatchDebug::instance()->lock_validate(latch, level); + LatchDebug::instance()->lock_granted(latch, level); + } +} + +/** Check if it is OK to re-acquire the lock. +@param[in] latch RW-LOCK to relock (recursive X locks) */ +void +sync_check_relock(const latch_t* latch) +{ + if (LatchDebug::instance() != NULL) { + LatchDebug::instance()->relock(latch); + } +} + +/** Removes a latch from the thread level array if it is found there. +@param[in] latch The latch to unlock */ +void +sync_check_unlock(const latch_t* latch) +{ + if (LatchDebug::instance() != NULL) { + LatchDebug::instance()->unlock(latch); + } +} + +/** Checks if the level array for the current thread contains a +mutex or rw-latch at the specified level. +@param[in] level to find +@return a matching latch, or NULL if not found */ +const latch_t* +sync_check_find(latch_level_t level) +{ + if (LatchDebug::instance() != NULL) { + return(LatchDebug::instance()->find(level)); + } + + return(NULL); +} + +/** Iterate over the thread's latches. +@param[in,out] functor called for each element. +@return false if the sync debug hasn't been initialised +@return the value returned by the functor */ +bool +sync_check_iterate(sync_check_functor_t& functor) +{ + if (LatchDebug::instance() != NULL) { + return(LatchDebug::instance()->for_each(functor)); + } + + return(false); +} + +/** Enable sync order checking. + +Note: We don't enforce any synchronisation checks. The caller must ensure +that no races can occur */ +void +sync_check_enable() +{ + if (!srv_sync_debug) { + + return; + } + + /* We should always call this before we create threads. */ + + LatchDebug::create_instance(); +} + +/** Initialise the debug data structures */ +void +LatchDebug::init() + UNIV_NOTHROW +{ + ut_a(rw_lock_debug_event == NULL); + + mutex_create(LATCH_ID_RW_LOCK_DEBUG, &rw_lock_debug_mutex); + + rw_lock_debug_event = os_event_create("rw_lock_debug_event"); + + rw_lock_debug_waiters = FALSE; +} + +/** Shutdown the latch debug checking + +Note: We don't enforce any synchronisation checks. The caller must ensure +that no races can occur */ +void +LatchDebug::shutdown() + UNIV_NOTHROW +{ + ut_a(rw_lock_debug_event != NULL); + + os_event_destroy(rw_lock_debug_event); + + rw_lock_debug_event = NULL; + + mutex_free(&rw_lock_debug_mutex); + + if (instance() == NULL) { + + return; + } + + ut_a(s_initialized); + + s_initialized = false; + + UT_DELETE(s_instance); + + LatchDebug::s_instance = NULL; +} + +/** Acquires the debug mutex. We cannot use the mutex defined in sync0sync, +because the debug mutex is also acquired in sync0arr while holding the OS +mutex protecting the sync array, and the ordinary mutex_enter might +recursively call routines in sync0arr, leading to a deadlock on the OS +mutex. */ +void +rw_lock_debug_mutex_enter() +{ + for (;;) { + + if (0 == mutex_enter_nowait(&rw_lock_debug_mutex)) { + return; + } + + os_event_reset(rw_lock_debug_event); + + rw_lock_debug_waiters = TRUE; + + if (0 == mutex_enter_nowait(&rw_lock_debug_mutex)) { + return; + } + + os_event_wait(rw_lock_debug_event); + } +} + +/** Releases the debug mutex. */ +void +rw_lock_debug_mutex_exit() +{ + mutex_exit(&rw_lock_debug_mutex); + + if (rw_lock_debug_waiters) { + rw_lock_debug_waiters = FALSE; + os_event_set(rw_lock_debug_event); + } +} +#endif /* UNIV_DEBUG */ + +/* Meta data for all the InnoDB latches. If the latch is not in recorded +here then it will be be considered for deadlock checks. */ +LatchMetaData latch_meta; + +/** Load the latch meta data. */ +static +void +sync_latch_meta_init() + UNIV_NOTHROW +{ + latch_meta.resize(LATCH_ID_MAX); + + /* The latches should be ordered on latch_id_t. So that we can + index directly into the vector to update and fetch meta-data. */ + + LATCH_ADD(AUTOINC, SYNC_DICT_AUTOINC_MUTEX, autoinc_mutex_key); + +#if defined PFS_SKIP_BUFFER_MUTEX_RWLOCK || defined PFS_GROUP_BUFFER_SYNC + LATCH_ADD(BUF_BLOCK_MUTEX, SYNC_BUF_BLOCK, PFS_NOT_INSTRUMENTED); +#else + LATCH_ADD(BUF_BLOCK_MUTEX, SYNC_BUF_BLOCK, buffer_block_mutex_key); +#endif /* PFS_SKIP_BUFFER_MUTEX_RWLOCK || PFS_GROUP_BUFFER_SYNC */ + + LATCH_ADD(BUF_POOL, SYNC_BUF_POOL, buf_pool_mutex_key); + + LATCH_ADD(BUF_POOL_ZIP, SYNC_BUF_BLOCK, buf_pool_zip_mutex_key); + + LATCH_ADD(CACHE_LAST_READ, SYNC_TRX_I_S_LAST_READ, + cache_last_read_mutex_key); + + LATCH_ADD(DICT_FOREIGN_ERR, SYNC_NO_ORDER_CHECK, + dict_foreign_err_mutex_key); + + LATCH_ADD(DICT_SYS, SYNC_DICT, dict_sys_mutex_key); + + LATCH_ADD(FILE_FORMAT_MAX, SYNC_FILE_FORMAT_TAG, + file_format_max_mutex_key); + + LATCH_ADD(FIL_SYSTEM, SYNC_ANY_LATCH, fil_system_mutex_key); + + LATCH_ADD(FLUSH_LIST, SYNC_BUF_FLUSH_LIST, flush_list_mutex_key); + + LATCH_ADD(FTS_BG_THREADS, SYNC_FTS_BG_THREADS, + fts_bg_threads_mutex_key); + + LATCH_ADD(FTS_DELETE, SYNC_FTS_OPTIMIZE, fts_delete_mutex_key); + + LATCH_ADD(FTS_OPTIMIZE, SYNC_FTS_OPTIMIZE, fts_optimize_mutex_key); + + LATCH_ADD(FTS_DOC_ID, SYNC_FTS_OPTIMIZE, fts_doc_id_mutex_key); + + LATCH_ADD(FTS_PLL_TOKENIZE, SYNC_FTS_TOKENIZE, + fts_pll_tokenize_mutex_key); + + LATCH_ADD(HASH_TABLE_MUTEX, SYNC_BUF_PAGE_HASH, hash_table_mutex_key); + + LATCH_ADD(IBUF_BITMAP, SYNC_IBUF_BITMAP_MUTEX, ibuf_bitmap_mutex_key); + + LATCH_ADD(IBUF, SYNC_IBUF_MUTEX, ibuf_mutex_key); + + LATCH_ADD(IBUF_PESSIMISTIC_INSERT, SYNC_IBUF_PESS_INSERT_MUTEX, + ibuf_pessimistic_insert_mutex_key); + + LATCH_ADD(LOG_SYS, SYNC_LOG, log_sys_mutex_key); + + LATCH_ADD(LOG_FLUSH_ORDER, SYNC_LOG_FLUSH_ORDER, + log_flush_order_mutex_key); + + LATCH_ADD(MUTEX_LIST, SYNC_NO_ORDER_CHECK, mutex_list_mutex_key); + + LATCH_ADD(PAGE_CLEANER, SYNC_PAGE_CLEANER, page_cleaner_mutex_key); + + LATCH_ADD(PURGE_SYS_PQ, SYNC_PURGE_QUEUE, purge_sys_pq_mutex_key); + + LATCH_ADD(RECALC_POOL, SYNC_STATS_AUTO_RECALC, + recalc_pool_mutex_key); + + LATCH_ADD(RECV_SYS, SYNC_RECV, recv_sys_mutex_key); + + LATCH_ADD(RECV_WRITER, SYNC_RECV_WRITER, recv_writer_mutex_key); + + LATCH_ADD(REDO_RSEG, SYNC_REDO_RSEG, redo_rseg_mutex_key); + + LATCH_ADD(NOREDO_RSEG, SYNC_NOREDO_RSEG, noredo_rseg_mutex_key); + +#ifdef UNIV_DEBUG + /* Mutex names starting with '.' are not tracked. They are assumed + to be diagnostic mutexes used in debugging. */ + latch_meta[LATCH_ID_RW_LOCK_DEBUG] = + LATCH_ADD(RW_LOCK_DEBUG, + SYNC_NO_ORDER_CHECK, + rw_lock_debug_mutex_key); +#endif /* UNIV_DEBUG */ + + LATCH_ADD(RTR_SSN_MUTEX, SYNC_ANY_LATCH, rtr_ssn_mutex_key); + + LATCH_ADD(RTR_ACTIVE_MUTEX, SYNC_ANY_LATCH, rtr_active_mutex_key); + + LATCH_ADD(RTR_MATCH_MUTEX, SYNC_ANY_LATCH, rtr_match_mutex_key); + + LATCH_ADD(RTR_PATH_MUTEX, SYNC_ANY_LATCH, rtr_path_mutex_key); + + LATCH_ADD(RW_LOCK_LIST, SYNC_NO_ORDER_CHECK, rw_lock_list_mutex_key); + + LATCH_ADD(RW_LOCK_MUTEX, SYNC_NO_ORDER_CHECK, rw_lock_mutex_key); + + LATCH_ADD(SRV_DICT_TMPFILE, SYNC_DICT_OPERATION, + srv_dict_tmpfile_mutex_key); + + LATCH_ADD(SRV_INNODB_MONITOR, SYNC_NO_ORDER_CHECK, + srv_innodb_monitor_mutex_key); + + LATCH_ADD(SRV_MISC_TMPFILE, SYNC_ANY_LATCH, + srv_misc_tmpfile_mutex_key); + + LATCH_ADD(SRV_MONITOR_FILE, SYNC_NO_ORDER_CHECK, + srv_monitor_file_mutex_key); + +#ifdef UNIV_DEBUG + LATCH_ADD(SYNC_THREAD, SYNC_NO_ORDER_CHECK, sync_thread_mutex_key); +#endif /* UNIV_DEBUG */ + + LATCH_ADD(BUF_DBLWR, SYNC_DOUBLEWRITE, buf_dblwr_mutex_key); + + LATCH_ADD(TRX_UNDO, SYNC_TRX_UNDO, trx_undo_mutex_key); + + LATCH_ADD(TRX_POOL, SYNC_POOL, trx_pool_mutex_key); + + LATCH_ADD(TRX_POOL_MANAGER, SYNC_POOL_MANAGER, + trx_pool_manager_mutex_key); + + LATCH_ADD(TRX, SYNC_TRX, trx_mutex_key); + + LATCH_ADD(LOCK_SYS, SYNC_LOCK_SYS, lock_mutex_key); + + LATCH_ADD(LOCK_SYS_WAIT, SYNC_LOCK_WAIT_SYS, lock_wait_mutex_key); + + LATCH_ADD(TRX_SYS, SYNC_TRX_SYS, trx_sys_mutex_key); + + LATCH_ADD(SRV_SYS, SYNC_THREADS, srv_sys_mutex_key); + + LATCH_ADD(SRV_SYS_TASKS, SYNC_ANY_LATCH, srv_threads_mutex_key); + + LATCH_ADD(PAGE_ZIP_STAT_PER_INDEX, SYNC_ANY_LATCH, + page_zip_stat_per_index_mutex_key); + +#ifndef PFS_SKIP_EVENT_MUTEX + LATCH_ADD(EVENT_MANAGER, SYNC_NO_ORDER_CHECK, event_manager_mutex_key); +#else + LATCH_ADD(EVENT_MANAGER, SYNC_NO_ORDER_CHECK, PFS_NOT_INSTRUMENTED); +#endif /* !PFS_SKIP_EVENT_MUTEX */ + + LATCH_ADD(EVENT_MUTEX, SYNC_NO_ORDER_CHECK, event_mutex_key); + + LATCH_ADD(SYNC_ARRAY_MUTEX, SYNC_NO_ORDER_CHECK, + sync_array_mutex_key); + + LATCH_ADD(THREAD_MUTEX, SYNC_NO_ORDER_CHECK, thread_mutex_key); + + LATCH_ADD(ZIP_PAD_MUTEX, SYNC_NO_ORDER_CHECK, zip_pad_mutex_key); + + LATCH_ADD(OS_AIO_READ_MUTEX, SYNC_NO_ORDER_CHECK, PFS_NOT_INSTRUMENTED); + + LATCH_ADD(OS_AIO_WRITE_MUTEX, SYNC_NO_ORDER_CHECK, + PFS_NOT_INSTRUMENTED); + + LATCH_ADD(OS_AIO_LOG_MUTEX, SYNC_NO_ORDER_CHECK, PFS_NOT_INSTRUMENTED); + + LATCH_ADD(OS_AIO_IBUF_MUTEX, SYNC_NO_ORDER_CHECK, PFS_NOT_INSTRUMENTED); + + LATCH_ADD(OS_AIO_SYNC_MUTEX, SYNC_NO_ORDER_CHECK, PFS_NOT_INSTRUMENTED); + + LATCH_ADD(ROW_DROP_LIST, SYNC_NO_ORDER_CHECK, row_drop_list_mutex_key); + + LATCH_ADD(INDEX_ONLINE_LOG, SYNC_INDEX_ONLINE_LOG, + index_online_log_key); + + LATCH_ADD(WORK_QUEUE, SYNC_WORK_QUEUE, PFS_NOT_INSTRUMENTED); + + // Add the RW locks + LATCH_ADD(BTR_SEARCH, SYNC_SEARCH_SYS, btr_search_latch_key); + + LATCH_ADD(BUF_BLOCK_LOCK, SYNC_LEVEL_VARYING, buf_block_lock_key); + +#ifdef UNIV_DEBUG + LATCH_ADD(BUF_BLOCK_DEBUG, SYNC_NO_ORDER_CHECK, + buf_block_debug_latch_key); +#endif /* UNIV_DEBUG */ + + LATCH_ADD(DICT_OPERATION, SYNC_DICT, dict_operation_lock_key); + + LATCH_ADD(CHECKPOINT, SYNC_NO_ORDER_CHECK, checkpoint_lock_key); + + LATCH_ADD(FIL_SPACE, SYNC_FSP, fil_space_latch_key); + + LATCH_ADD(FTS_CACHE, SYNC_FTS_CACHE, fts_cache_rw_lock_key); + + LATCH_ADD(FTS_CACHE_INIT, SYNC_FTS_CACHE_INIT, + fts_cache_init_rw_lock_key); + + LATCH_ADD(TRX_I_S_CACHE, SYNC_TRX_I_S_RWLOCK, trx_i_s_cache_lock_key); + + LATCH_ADD(TRX_PURGE, SYNC_PURGE_LATCH, trx_purge_latch_key); + + LATCH_ADD(IBUF_INDEX_TREE, SYNC_IBUF_INDEX_TREE, + index_tree_rw_lock_key); + + LATCH_ADD(INDEX_TREE, SYNC_INDEX_TREE, index_tree_rw_lock_key); + + LATCH_ADD(DICT_TABLE_STATS, SYNC_INDEX_TREE, dict_table_stats_key); + + LATCH_ADD(HASH_TABLE_RW_LOCK, SYNC_BUF_PAGE_HASH, + hash_table_locks_key); + +#ifdef UNIV_DEBUG + LATCH_ADD(BUF_CHUNK_MAP_LATCH, SYNC_ANY_LATCH, buf_chunk_map_latch_key); +#endif /* UNIV_DEBUG */ + + LATCH_ADD(SYNC_DEBUG_MUTEX, SYNC_NO_ORDER_CHECK, PFS_NOT_INSTRUMENTED); + + /* JAN: TODO: Add PFS instrumentation */ + LATCH_ADD(SCRUB_STAT_MUTEX, SYNC_NO_ORDER_CHECK, PFS_NOT_INSTRUMENTED); + LATCH_ADD(DEFRAGMENT_MUTEX, SYNC_NO_ORDER_CHECK, PFS_NOT_INSTRUMENTED); + LATCH_ADD(MTFLUSH_THREAD_MUTEX, SYNC_NO_ORDER_CHECK, PFS_NOT_INSTRUMENTED); + LATCH_ADD(FIL_CRYPT_MUTEX, SYNC_NO_ORDER_CHECK, PFS_NOT_INSTRUMENTED); + LATCH_ADD(FIL_CRYPT_STAT_MUTEX, SYNC_NO_ORDER_CHECK, PFS_NOT_INSTRUMENTED); + LATCH_ADD(FIL_CRYPT_DATA_MUTEX, SYNC_NO_ORDER_CHECK, PFS_NOT_INSTRUMENTED); + LATCH_ADD(FIL_CRYPT_THREADS_MUTEX, SYNC_NO_ORDER_CHECK, PFS_NOT_INSTRUMENTED); + + latch_id_t id = LATCH_ID_NONE; + + /* The array should be ordered on latch ID.We need to + index directly into it from the mutex policy to update + the counters and access the meta-data. */ + + for (LatchMetaData::iterator it = latch_meta.begin(); + it != latch_meta.end(); + ++it) { + + const latch_meta_t* meta = *it; + + /* Skip blank entries */ + if (meta == NULL || meta->get_id() == LATCH_ID_NONE) { + continue; + } + + ut_a(id < meta->get_id()); + + id = meta->get_id(); + } +} + +/** Destroy the latch meta data */ +static +void +sync_latch_meta_destroy() +{ + for (LatchMetaData::iterator it = latch_meta.begin(); + it != latch_meta.end(); + ++it) { + + UT_DELETE(*it); + } + + latch_meta.clear(); +} + +/** Track mutex file creation name and line number. This is to avoid storing +{ const char* name; uint16_t line; } in every instance. This results in the +sizeof(Mutex) > 64. We use a lookup table to store it separately. Fetching +the values is very rare, only required for diagnostic purposes. And, we +don't create/destroy mutexes that frequently. */ +struct CreateTracker { + + /** Constructor */ + CreateTracker() + UNIV_NOTHROW + { + m_mutex.init(); + } + + /** Destructor */ + ~CreateTracker() + UNIV_NOTHROW + { + ut_d(m_files.empty()); + + m_mutex.destroy(); + } + + /** Register where the latch was created + @param[in] ptr Latch instance + @param[in] filename Where created + @param[in] line Line number in filename */ + void register_latch( + const void* ptr, + const char* filename, + uint16_t line) + UNIV_NOTHROW + { + m_mutex.enter(); + + Files::iterator lb = m_files.lower_bound(ptr); + + ut_ad(lb == m_files.end() + || m_files.key_comp()(ptr, lb->first)); + + typedef Files::value_type value_type; + + m_files.insert(lb, value_type(ptr, File(filename, line))); + + m_mutex.exit(); + } + + /** Deregister a latch - when it is destroyed + @param[in] ptr Latch instance being destroyed */ + void deregister_latch(const void* ptr) + UNIV_NOTHROW + { + m_mutex.enter(); + + Files::iterator lb = m_files.lower_bound(ptr); + + ut_ad(lb != m_files.end() + && !(m_files.key_comp()(ptr, lb->first))); + + m_files.erase(lb); + + m_mutex.exit(); + } + + /** Get the create string, format is "name:line" + @param[in] ptr Latch instance + @return the create string or "" if not found */ + std::string get(const void* ptr) + UNIV_NOTHROW + { + m_mutex.enter(); + + std::string created; + + Files::iterator lb = m_files.lower_bound(ptr); + + if (lb != m_files.end() + && !(m_files.key_comp()(ptr, lb->first))) { + + std::ostringstream msg; + + msg << lb->second.m_name << ":" << lb->second.m_line; + + created = msg.str(); + } + + m_mutex.exit(); + + return(created); + } + +private: + /** For tracking the filename and line number */ + struct File { + + /** Constructor */ + File() UNIV_NOTHROW : m_name(), m_line() { } + + /** Constructor + @param[in] name Filename where created + @param[in] line Line number where created */ + File(const char* name, uint16_t line) + UNIV_NOTHROW + : + m_name(sync_basename(name)), + m_line(line) + { + /* No op */ + } + + /** Filename where created */ + std::string m_name; + + /** Line number where created */ + uint16_t m_line; + }; + + /** Map the mutex instance to where it was created */ + typedef std::map< + const void*, + File, + std::less<const void*>, + ut_allocator<std::pair<const void*, File> > > + Files; + + typedef OSMutex Mutex; + + /** Mutex protecting m_files */ + Mutex m_mutex; + + /** Track the latch creation */ + Files m_files; +}; + +/** Track latch creation location. For reducing the size of the latches */ +static CreateTracker* create_tracker; + +/** Register a latch, called when it is created +@param[in] ptr Latch instance that was created +@param[in] filename Filename where it was created +@param[in] line Line number in filename */ +void +sync_file_created_register( + const void* ptr, + const char* filename, + uint16_t line) +{ + create_tracker->register_latch(ptr, filename, line); +} + +/** Deregister a latch, called when it is destroyed +@param[in] ptr Latch to be destroyed */ +void +sync_file_created_deregister(const void* ptr) +{ + create_tracker->deregister_latch(ptr); +} + +/** Get the string where the file was created. Its format is "name:line" +@param[in] ptr Latch instance +@return created information or "" if can't be found */ +std::string +sync_file_created_get(const void* ptr) +{ + return(create_tracker->get(ptr)); +} + +/** Initializes the synchronization data structures. */ +void +sync_check_init() +{ + ut_ad(!LatchDebug::s_initialized); + ut_d(LatchDebug::s_initialized = true); + + /** For collecting latch statistic - SHOW ... MUTEX */ + mutex_monitor = UT_NEW_NOKEY(MutexMonitor()); + + /** For trcking mutex creation location */ + create_tracker = UT_NEW_NOKEY(CreateTracker()); + + sync_latch_meta_init(); + + /* Init the rw-lock & mutex list and create the mutex to protect it. */ + + UT_LIST_INIT(rw_lock_list, &rw_lock_t::list); + + mutex_create(LATCH_ID_RW_LOCK_LIST, &rw_lock_list_mutex); + + ut_d(LatchDebug::init()); + + sync_array_init(OS_THREAD_MAX_N); +} + +/** Frees the resources in InnoDB's own synchronization data structures. Use +os_sync_free() after calling this. */ +void +sync_check_close() +{ + ut_d(LatchDebug::shutdown()); + + mutex_free(&rw_lock_list_mutex); + + sync_array_close(); + + UT_DELETE(mutex_monitor); + + mutex_monitor = NULL; + + UT_DELETE(create_tracker); + + create_tracker = NULL; + + //sync_latch_meta_destroy(); +} + diff --git a/storage/innobase/sync/sync0rw.cc b/storage/innobase/sync/sync0rw.cc index 8919716ff9c..a3eb4254f28 100644 --- a/storage/innobase/sync/sync0rw.cc +++ b/storage/innobase/sync/sync0rw.cc @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1995, 2013, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 1995, 2015, Oracle and/or its affiliates. All Rights Reserved. Copyright (c) 2008, Google Inc. Portions of this file contain modifications contributed and copyrighted by @@ -36,34 +36,62 @@ Created 9/11/1995 Heikki Tuuri #include "sync0arr.ic" #endif +#include "ha_prototypes.h" + #include "os0thread.h" #include "mem0mem.h" #include "srv0srv.h" -#include "os0sync.h" /* for INNODB_RW_LOCKS_USE_ATOMICS */ +#include "os0event.h" +#include "srv0mon.h" +#include "sync0debug.h" #include "ha_prototypes.h" #include "my_cpu.h" +#include <my_sys.h> /* IMPLEMENTATION OF THE RW_LOCK ============================= The status of a rw_lock is held in lock_word. The initial value of lock_word is X_LOCK_DECR. lock_word is decremented by 1 for each s-lock and by X_LOCK_DECR -for each x-lock. This describes the lock state for each value of lock_word: - -lock_word == X_LOCK_DECR: Unlocked. -0 < lock_word < X_LOCK_DECR: Read locked, no waiting writers. - (X_LOCK_DECR - lock_word) is the - number of readers that hold the lock. -lock_word == 0: Write locked --X_LOCK_DECR < lock_word < 0: Read locked, with a waiting writer. - (-lock_word) is the number of readers - that hold the lock. -lock_word <= -X_LOCK_DECR: Recursively write locked. lock_word has been - decremented by X_LOCK_DECR for the first lock - and the first recursive lock, then by 1 for - each recursive lock thereafter. - So the number of locks is: - (lock_copy == 0) ? 1 : 2 - (lock_copy + X_LOCK_DECR) +or 1 for each x-lock. This describes the lock state for each value of lock_word: + +lock_word == X_LOCK_DECR: Unlocked. +X_LOCK_HALF_DECR < lock_word < X_LOCK_DECR: + S locked, no waiting writers. + (X_LOCK_DECR - lock_word) is the number + of S locks. +lock_word == X_LOCK_HALF_DECR: SX locked, no waiting writers. +0 < lock_word < X_LOCK_HALF_DECR: + SX locked AND S locked, no waiting writers. + (X_LOCK_HALF_DECR - lock_word) is the number + of S locks. +lock_word == 0: X locked, no waiting writers. +-X_LOCK_HALF_DECR < lock_word < 0: + S locked, with a waiting writer. + (-lock_word) is the number of S locks. +lock_word == -X_LOCK_HALF_DECR: X locked and SX locked, no waiting writers. +-X_LOCK_DECR < lock_word < -X_LOCK_HALF_DECR: + S locked, with a waiting writer + which has SX lock. + -(lock_word + X_LOCK_HALF_DECR) is the number + of S locks. +lock_word == -X_LOCK_DECR: X locked with recursive X lock (2 X locks). +-(X_LOCK_DECR + X_LOCK_HALF_DECR) < lock_word < -X_LOCK_DECR: + X locked. The number of the X locks is: + 2 - (lock_word + X_LOCK_DECR) +lock_word == -(X_LOCK_DECR + X_LOCK_HALF_DECR): + X locked with recursive X lock (2 X locks) + and SX locked. +lock_word < -(X_LOCK_DECR + X_LOCK_HALF_DECR): + X locked and SX locked. + The number of the X locks is: + 2 - (lock_word + X_LOCK_DECR + X_LOCK_HALF_DECR) + + LOCK COMPATIBILITY MATRIX + S SX X + S + + - + SX + - - + X - - - The lock_word is always read and updated atomically and consistently, so that it always represents the state of the lock, and the state of the lock changes @@ -71,12 +99,13 @@ with a single atomic operation. This lock_word holds all of the information that a thread needs in order to determine if it is eligible to gain the lock or if it must spin or sleep. The one exception to this is that writer_thread must be verified before recursive write locks: to solve this scenario, we make -writer_thread readable by all threads, but only writeable by the x-lock holder. +writer_thread readable by all threads, but only writeable by the x-lock or +sx-lock holder. The other members of the lock obey the following rules to remain consistent: recursive: This and the writer_thread field together control the - behaviour of recursive x-locking. + behaviour of recursive x-locking or sx-locking. lock->recursive must be FALSE in following states: 1) The writer_thread contains garbage i.e.: the lock has just been initialized. @@ -136,28 +165,13 @@ wait_ex_event: A thread may only wait on the wait_ex_event after it has Verify lock_word == 0 (waiting thread holds x_lock) */ -UNIV_INTERN rw_lock_stats_t rw_lock_stats; +rw_lock_stats_t rw_lock_stats; /* The global list of rw-locks */ -UNIV_INTERN rw_lock_list_t rw_lock_list; -UNIV_INTERN ib_mutex_t rw_lock_list_mutex; - -#ifdef UNIV_PFS_MUTEX -UNIV_INTERN mysql_pfs_key_t rw_lock_list_mutex_key; -UNIV_INTERN mysql_pfs_key_t rw_lock_mutex_key; -#endif /* UNIV_PFS_MUTEX */ - -#ifdef UNIV_SYNC_DEBUG -/* The global mutex which protects debug info lists of all rw-locks. -To modify the debug info list of an rw-lock, this mutex has to be -acquired in addition to the mutex protecting the lock. */ - -UNIV_INTERN os_fast_mutex_t rw_lock_debug_mutex; - -# ifdef UNIV_PFS_MUTEX -UNIV_INTERN mysql_pfs_key_t rw_lock_debug_mutex_key; -# endif +rw_lock_list_t rw_lock_list; +ib_mutex_t rw_lock_list_mutex; +#ifdef UNIV_DEBUG /******************************************************************//** Creates a debug info struct. */ static @@ -174,13 +188,13 @@ rw_lock_debug_free( /******************************************************************//** Creates a debug info struct. -@return own: debug info struct */ +@return own: debug info struct */ static rw_lock_debug_t* rw_lock_debug_create(void) /*======================*/ { - return((rw_lock_debug_t*) mem_alloc(sizeof(rw_lock_debug_t))); + return((rw_lock_debug_t*) ut_malloc_nokey(sizeof(rw_lock_debug_t))); } /******************************************************************//** @@ -191,40 +205,40 @@ rw_lock_debug_free( /*===============*/ rw_lock_debug_t* info) { - mem_free(info); + ut_free(info); } -#endif /* UNIV_SYNC_DEBUG */ +#endif /* UNIV_DEBUG */ /******************************************************************//** Creates, or rather, initializes an rw-lock object in a specified memory location (which must be appropriately aligned). The rw-lock is initialized to the non-locked state. Explicit freeing of the rw-lock with rw_lock_free is necessary only if the memory block containing it is freed. */ -UNIV_INTERN void rw_lock_create_func( /*================*/ rw_lock_t* lock, /*!< in: pointer to memory */ #ifdef UNIV_DEBUG -# ifdef UNIV_SYNC_DEBUG - ulint level, /*!< in: level */ -# endif /* UNIV_SYNC_DEBUG */ + latch_level_t level, /*!< in: level */ #endif /* UNIV_DEBUG */ const char* cmutex_name, /*!< in: mutex name */ const char* cfile_name, /*!< in: file name where created */ ulint cline) /*!< in: file line where created */ { +#if defined(UNIV_DEBUG) && !defined(UNIV_PFS_RWLOCK) + /* It should have been created in pfs_rw_lock_create_func() */ + new(lock) rw_lock_t(); +#endif /* UNIV_DEBUG */ + /* If this is the very first time a synchronization object is created, then the following call initializes the sync system. */ #ifndef INNODB_RW_LOCKS_USE_ATOMICS - mutex_create(rw_lock_mutex_key, rw_lock_get_mutex(lock), - SYNC_NO_ORDER_CHECK); + mutex_create(LATCH_ID_RW_LOCK_MUTEX, rw_lock_get_mutex(lock)); lock->mutex.cfile_name = cfile_name; lock->mutex.cline = cline; lock->mutex.lock_name = cmutex_name; - ut_d(lock->mutex.ib_mutex_type = 1); #else /* INNODB_RW_LOCKS_USE_ATOMICS */ # ifdef UNIV_DEBUG UT_NOT_USED(cmutex_name); @@ -238,19 +252,28 @@ rw_lock_create_func( contains garbage at initialization and cannot be used for recursive x-locking. */ lock->recursive = FALSE; + lock->sx_recursive = 0; /* Silence Valgrind when UNIV_DEBUG_VALGRIND is not enabled. */ memset((void*) &lock->writer_thread, 0, sizeof lock->writer_thread); UNIV_MEM_INVALID(&lock->writer_thread, sizeof lock->writer_thread); -#ifdef UNIV_SYNC_DEBUG - UT_LIST_INIT(lock->debug_list); +#ifdef UNIV_DEBUG + lock->m_rw_lock = true; - lock->level = level; -#endif /* UNIV_SYNC_DEBUG */ + UT_LIST_INIT(lock->debug_list, &rw_lock_debug_t::list); + + lock->m_id = sync_latch_get_id(sync_latch_get_name(level)); + ut_a(lock->m_id != LATCH_ID_NONE); - ut_d(lock->magic_n = RW_LOCK_MAGIC_N); + lock->level = level; +#endif /* UNIV_DEBUG */ lock->cfile_name = cfile_name; + + /* This should hold in practice. If it doesn't then we need to + split the source file anyway. Or create the locks on lines + less than 8192. cline is unsigned:13. */ + ut_ad(cline <= 8192); lock->cline = (unsigned int) cline; lock->lock_name = cmutex_name; lock->count_os_wait = 0; @@ -260,15 +283,17 @@ rw_lock_create_func( lock->last_x_file_name = "not yet reserved"; lock->last_s_line = 0; lock->last_x_line = 0; - lock->event = os_event_create(); - lock->wait_ex_event = os_event_create(); + lock->event = os_event_create(0); + lock->wait_ex_event = os_event_create(0); + + lock->is_block_lock = 0; mutex_enter(&rw_lock_list_mutex); ut_ad(UT_LIST_GET_FIRST(rw_lock_list) == NULL || UT_LIST_GET_FIRST(rw_lock_list)->magic_n == RW_LOCK_MAGIC_N); - UT_LIST_ADD_FIRST(list, rw_lock_list, lock); + UT_LIST_ADD_FIRST(rw_lock_list, lock); mutex_exit(&rw_lock_list_mutex); } @@ -277,16 +302,11 @@ rw_lock_create_func( Calling this function is obligatory only if the memory buffer containing the rw-lock is freed. Removes an rw-lock object from the global list. The rw-lock is checked to be in the non-locked state. */ -UNIV_INTERN void rw_lock_free_func( /*==============*/ - rw_lock_t* lock) /*!< in: rw-lock */ + rw_lock_t* lock) /*!< in/out: rw-lock */ { -#ifndef INNODB_RW_LOCKS_USE_ATOMICS - ib_mutex_t* mutex; -#endif /* !INNODB_RW_LOCKS_USE_ATOMICS */ - os_rmb; ut_ad(rw_lock_validate(lock)); ut_a(lock->lock_word == X_LOCK_DECR); @@ -294,65 +314,26 @@ rw_lock_free_func( mutex_enter(&rw_lock_list_mutex); #ifndef INNODB_RW_LOCKS_USE_ATOMICS - mutex = rw_lock_get_mutex(lock); + mutex_free(rw_lock_get_mutex(lock)); #endif /* !INNODB_RW_LOCKS_USE_ATOMICS */ - os_event_free(lock->event); - - os_event_free(lock->wait_ex_event); + os_event_destroy(lock->event); - ut_ad(UT_LIST_GET_PREV(list, lock) == NULL - || UT_LIST_GET_PREV(list, lock)->magic_n == RW_LOCK_MAGIC_N); - ut_ad(UT_LIST_GET_NEXT(list, lock) == NULL - || UT_LIST_GET_NEXT(list, lock)->magic_n == RW_LOCK_MAGIC_N); + os_event_destroy(lock->wait_ex_event); - UT_LIST_REMOVE(list, rw_lock_list, lock); + UT_LIST_REMOVE(rw_lock_list, lock); mutex_exit(&rw_lock_list_mutex); - ut_d(lock->magic_n = 0); - -#ifndef INNODB_RW_LOCKS_USE_ATOMICS - /* We have merely removed the rw_lock from the list, the memory - has not been freed. Therefore the pointer to mutex is valid. */ - mutex_free(mutex); -#endif /* !INNODB_RW_LOCKS_USE_ATOMICS */ -} - -#ifdef UNIV_DEBUG -/******************************************************************//** -Checks that the rw-lock has been initialized and that there are no -simultaneous shared and exclusive locks. -@return TRUE */ -UNIV_INTERN -ibool -rw_lock_validate( -/*=============*/ - rw_lock_t* lock) /*!< in: rw-lock */ -{ - ulint waiters; - lint lock_word; - - ut_ad(lock); - - waiters = rw_lock_get_waiters(lock); - lock_word = lock->lock_word; - - ut_ad(lock->magic_n == RW_LOCK_MAGIC_N); - ut_ad(waiters == 0 || waiters == 1); - ut_ad(lock_word > -(2 * X_LOCK_DECR)); - ut_ad(lock_word <= X_LOCK_DECR); - - return(TRUE); + /* We did an in-place new in rw_lock_create_func() */ + ut_d(lock->~rw_lock_t()); } -#endif /* UNIV_DEBUG */ /******************************************************************//** Lock an rw-lock in shared mode for the current thread. If the rw-lock is locked in exclusive mode, or there is an exclusive lock request waiting, -the function spins a preset time (controlled by SYNC_SPIN_ROUNDS), waiting +the function spins a preset time (controlled by srv_n_spin_wait_rounds), waiting for the lock, before suspending the thread. */ -UNIV_INTERN void rw_lock_s_lock_spin( /*================*/ @@ -362,25 +343,22 @@ rw_lock_s_lock_spin( const char* file_name, /*!< in: file name where lock requested */ ulint line) /*!< in: line where requested */ { - ulint index; /* index of the reserved wait cell */ ulint i = 0; /* spin round count */ sync_array_t* sync_arr; - size_t counter_index; + ulint spin_count = 0; + ulint count_os_wait = 0; /* We reuse the thread id to index into the counter, cache it here for efficiency. */ - counter_index = (size_t) os_thread_get_curr_id(); - ut_ad(rw_lock_validate(lock)); - rw_lock_stats.rw_s_spin_wait_count.add(counter_index, 1); lock_loop: /* Spin waiting for the writer field to become free */ os_rmb; HMT_low(); - while (i < SYNC_SPIN_ROUNDS && lock->lock_word <= 0) { + while (i < srv_n_spin_wait_rounds && lock->lock_word <= 0) { if (srv_spin_wait_delay) { ut_delay(ut_rnd_interval(0, srv_spin_wait_delay)); } @@ -390,44 +368,72 @@ lock_loop: } HMT_medium(); - if (i >= SYNC_SPIN_ROUNDS) { + if (i >= srv_n_spin_wait_rounds) { os_thread_yield(); } + ++spin_count; + /* We try once again to obtain the lock */ - if (TRUE == rw_lock_s_lock_low(lock, pass, file_name, line)) { - rw_lock_stats.rw_s_spin_round_count.add(counter_index, i); + if (rw_lock_s_lock_low(lock, pass, file_name, line)) { + + if (count_os_wait > 0) { + lock->count_os_wait += count_os_wait; + rw_lock_stats.rw_s_os_wait_count.add(count_os_wait); + } + + rw_lock_stats.rw_s_spin_round_count.add(spin_count); return; /* Success */ } else { - if (i < SYNC_SPIN_ROUNDS) { + if (i < srv_n_spin_wait_rounds) { goto lock_loop; } - rw_lock_stats.rw_s_spin_round_count.add(counter_index, i); - sync_arr = sync_array_get_and_reserve_cell(lock, - RW_LOCK_SHARED, - file_name, - line, &index); + ++count_os_wait; + + sync_cell_t* cell; + + sync_arr = sync_array_get_and_reserve_cell( + lock, RW_LOCK_S, file_name, line, &cell); /* Set waiters before checking lock_word to ensure wake-up signal is sent. This may lead to some unnecessary signals. */ rw_lock_set_waiter_flag(lock); - if (TRUE == rw_lock_s_lock_low(lock, pass, file_name, line)) { - sync_array_free_cell(sync_arr, index); + if (rw_lock_s_lock_low(lock, pass, file_name, line)) { + + sync_array_free_cell(sync_arr, cell); + + if (count_os_wait > 0) { + + lock->count_os_wait += count_os_wait; + + rw_lock_stats.rw_s_os_wait_count.add( + count_os_wait); + } + + rw_lock_stats.rw_s_spin_round_count.add(spin_count); + return; /* Success */ } - /* these stats may not be accurate */ - lock->count_os_wait++; - rw_lock_stats.rw_s_os_wait_count.add(counter_index, 1); - - sync_array_wait_event(sync_arr, index); + /* see comments in trx_commit_low() to + before_trx_state_committed_in_memory explaining + this care to invoke the following sync check.*/ +#ifndef DBUG_OFF +#ifdef UNIV_DEBUG + if (lock->get_level() != SYNC_DICT_OPERATION) { + DEBUG_SYNC_C("rw_s_lock_waiting"); + } +#endif +#endif + sync_array_wait_event(sync_arr, cell); i = 0; + goto lock_loop; } } @@ -440,16 +446,15 @@ read was done. The ownership is moved because we want that the current thread is able to acquire a second x-latch which is stored in an mtr. This, in turn, is needed to pass the debug checks of index page operations. */ -UNIV_INTERN void rw_lock_x_lock_move_ownership( /*==========================*/ rw_lock_t* lock) /*!< in: lock which was x-locked in the buffer read */ { - ut_ad(rw_lock_is_locked(lock, RW_LOCK_EX)); + ut_ad(rw_lock_is_locked(lock, RW_LOCK_X)); - rw_lock_set_writer_id_and_recursion_flag(lock, TRUE); + rw_lock_set_writer_id_and_recursion_flag(lock, true); } /******************************************************************//** @@ -457,35 +462,33 @@ Function for the next writer to call. Waits for readers to exit. The caller must have already decremented lock_word by X_LOCK_DECR. */ UNIV_INLINE void -rw_lock_x_lock_wait( -/*================*/ +rw_lock_x_lock_wait_func( +/*=====================*/ rw_lock_t* lock, /*!< in: pointer to rw-lock */ -#ifdef UNIV_SYNC_DEBUG +#ifdef UNIV_DEBUG ulint pass, /*!< in: pass value; != 0, if the lock will be passed to another thread to unlock */ #endif + lint threshold,/*!< in: threshold to wait for */ const char* file_name,/*!< in: file name where lock requested */ ulint line) /*!< in: line where requested */ { - ulint index; ulint i = 0; + ulint n_spins = 0; sync_array_t* sync_arr; - size_t counter_index; - - /* We reuse the thread id to index into the counter, cache - it here for efficiency. */ - - counter_index = (size_t) os_thread_get_curr_id(); + ulint count_os_wait = 0; os_rmb; - ut_ad(lock->lock_word <= 0); + ut_ad(lock->lock_word <= threshold); + + while (lock->lock_word < threshold) { - HMT_low(); - while (lock->lock_word < 0) { + HMT_low(); if (srv_spin_wait_delay) { ut_delay(ut_rnd_interval(0, srv_spin_wait_delay)); } - if(i < SYNC_SPIN_ROUNDS) { + + if (i < srv_n_spin_wait_rounds) { i++; os_rmb; continue; @@ -493,29 +496,31 @@ rw_lock_x_lock_wait( HMT_medium(); /* If there is still a reader, then go to sleep.*/ - rw_lock_stats.rw_x_spin_round_count.add(counter_index, i); + ++n_spins; + + sync_cell_t* cell; - sync_arr = sync_array_get_and_reserve_cell(lock, - RW_LOCK_WAIT_EX, - file_name, - line, &index); + sync_arr = sync_array_get_and_reserve_cell( + lock, RW_LOCK_X_WAIT, file_name, line, &cell); i = 0; /* Check lock_word to ensure wake-up isn't missed.*/ - if (lock->lock_word < 0) { + if (lock->lock_word < threshold) { - /* these stats may not be accurate */ - lock->count_os_wait++; - rw_lock_stats.rw_x_os_wait_count.add(counter_index, 1); + ++count_os_wait; /* Add debug info as it is needed to detect possible deadlock. We must add info for WAIT_EX thread for deadlock detection to work properly. */ -#ifdef UNIV_SYNC_DEBUG - rw_lock_add_debug_info(lock, pass, RW_LOCK_WAIT_EX, - file_name, line); -#endif + ut_d(rw_lock_add_debug_info( + lock, pass, RW_LOCK_X_WAIT, + file_name, line)); + + sync_array_wait_event(sync_arr, cell); + + ut_d(rw_lock_remove_debug_info( + lock, pass, RW_LOCK_X_WAIT)); if (srv_instrument_semaphores) { lock->thread_id = os_thread_get_curr_id(); @@ -523,25 +528,35 @@ rw_lock_x_lock_wait( lock->line = line; } - sync_array_wait_event(sync_arr, index); -#ifdef UNIV_SYNC_DEBUG - rw_lock_remove_debug_info( - lock, pass, RW_LOCK_WAIT_EX); -#endif /* It is possible to wake when lock_word < 0. We must pass the while-loop check to proceed.*/ + } else { - sync_array_free_cell(sync_arr, index); + sync_array_free_cell(sync_arr, cell); + break; } HMT_low(); } HMT_medium(); - rw_lock_stats.rw_x_spin_round_count.add(counter_index, i); + rw_lock_stats.rw_x_spin_round_count.add(n_spins); + + if (count_os_wait > 0) { + lock->count_os_wait += count_os_wait; + rw_lock_stats.rw_x_os_wait_count.add(count_os_wait); + } } +#ifdef UNIV_DEBUG +# define rw_lock_x_lock_wait(L, P, T, F, O) \ + rw_lock_x_lock_wait_func(L, P, T, F, O) +#else +# define rw_lock_x_lock_wait(L, P, T, F, O) \ + rw_lock_x_lock_wait_func(L, T, F, O) +#endif /* UNIV_DBEUG */ + /******************************************************************//** Low-level function for acquiring an exclusive lock. -@return FALSE if did not succeed, TRUE if success. */ +@return FALSE if did not succeed, TRUE if success. */ UNIV_INLINE ibool rw_lock_x_lock_low( @@ -552,9 +567,8 @@ rw_lock_x_lock_low( const char* file_name,/*!< in: file name where lock requested */ ulint line) /*!< in: line where requested */ { - ibool local_recursive= lock->recursive; + if (rw_lock_lock_word_decr(lock, X_LOCK_DECR, X_LOCK_HALF_DECR)) { - if (rw_lock_lock_word_decr(lock, X_LOCK_DECR)) { /* lock->recursive also tells us if the writer_thread field is stale or active. As we are going to write @@ -564,29 +578,51 @@ rw_lock_x_lock_low( /* Decrement occurred: we are writer or next-writer. */ rw_lock_set_writer_id_and_recursion_flag( - lock, pass ? FALSE : TRUE); + lock, !pass); - rw_lock_x_lock_wait(lock, -#ifdef UNIV_SYNC_DEBUG - pass, -#endif - file_name, line); + rw_lock_x_lock_wait(lock, pass, 0, file_name, line); } else { os_thread_id_t thread_id = os_thread_get_curr_id(); + if (!pass) { + os_rmb; + } + /* Decrement failed: relock or failed lock Note: recursive must be loaded before writer_thread see comment for rw_lock_set_writer_id_and_recursion_flag(). To achieve this we load it before rw_lock_lock_word_decr(), - which implies full memory barrier in current implementation. */ - if (!pass && local_recursive + An X or SX lock is held by either + this thread or another. Try to relock. */ + if (!pass + && lock->recursive && os_thread_eq(lock->writer_thread, thread_id)) { - /* Relock */ - if (lock->lock_word == 0) { - lock->lock_word -= X_LOCK_DECR; + /* Other s-locks can be allowed. If it is request x + recursively while holding sx lock, this x lock should + be along with the latching-order. */ + + /* The existing X or SX lock is from this thread */ + if (rw_lock_lock_word_decr(lock, X_LOCK_DECR, 0)) { + /* There is at least one SX-lock from this + thread, but no X-lock. */ + + /* Wait for any the other S-locks to be + released. */ + rw_lock_x_lock_wait( + lock, pass, -X_LOCK_HALF_DECR, + file_name, line); + } else { - --lock->lock_word; + /* At least one X lock by this thread already + exists. Add another. */ + if (lock->lock_word == 0 + || lock->lock_word == -X_LOCK_HALF_DECR) { + lock->lock_word -= X_LOCK_DECR; + } else { + ut_ad(lock->lock_word <= -X_LOCK_DECR); + --lock->lock_word; + } } } else { @@ -594,9 +630,9 @@ rw_lock_x_lock_low( return(FALSE); } } -#ifdef UNIV_SYNC_DEBUG - rw_lock_add_debug_info(lock, pass, RW_LOCK_EX, file_name, line); -#endif + + ut_d(rw_lock_add_debug_info(lock, pass, RW_LOCK_X, file_name, line)); + if (srv_instrument_semaphores) { lock->thread_id = os_thread_get_curr_id(); @@ -611,15 +647,94 @@ rw_lock_x_lock_low( } /******************************************************************//** +Low-level function for acquiring an sx lock. +@return FALSE if did not succeed, TRUE if success. */ +ibool +rw_lock_sx_lock_low( +/*================*/ + rw_lock_t* lock, /*!< in: pointer to rw-lock */ + ulint pass, /*!< in: pass value; != 0, if the lock will + be passed to another thread to unlock */ + const char* file_name,/*!< in: file name where lock requested */ + ulint line) /*!< in: line where requested */ +{ + if (rw_lock_lock_word_decr(lock, X_LOCK_HALF_DECR, X_LOCK_HALF_DECR)) { + + /* lock->recursive also tells us if the writer_thread + field is stale or active. As we are going to write + our own thread id in that field it must be that the + current writer_thread value is not active. */ + ut_a(!lock->recursive); + + /* Decrement occurred: we are the SX lock owner. */ + rw_lock_set_writer_id_and_recursion_flag( + lock, !pass); + + lock->sx_recursive = 1; + + } else { + os_thread_id_t thread_id = os_thread_get_curr_id(); + + if (!pass) { + os_rmb; + } + + /* Decrement failed: It already has an X or SX lock by this + thread or another thread. If it is this thread, relock, + else fail. */ + if (!pass && lock->recursive + && os_thread_eq(lock->writer_thread, thread_id)) { + /* This thread owns an X or SX lock */ + if (lock->sx_recursive++ == 0) { + /* This thread is making first SX-lock request + and it must be holding at least one X-lock here + because: + + * There can't be a WAIT_EX thread because we are + the thread which has it's thread_id written in + the writer_thread field and we are not waiting. + + * Any other X-lock thread cannot exist because + it must update recursive flag only after + updating the thread_id. Had there been + a concurrent X-locking thread which succeeded + in decrementing the lock_word it must have + written it's thread_id before setting the + recursive flag. As we cleared the if() + condition above therefore we must be the only + thread working on this lock and it is safe to + read and write to the lock_word. */ + + ut_ad((lock->lock_word == 0) + || ((lock->lock_word <= -X_LOCK_DECR) + && (lock->lock_word + > -(X_LOCK_DECR + + X_LOCK_HALF_DECR)))); + lock->lock_word -= X_LOCK_HALF_DECR; + } + } else { + /* Another thread locked before us */ + return(FALSE); + } + } + + ut_d(rw_lock_add_debug_info(lock, pass, RW_LOCK_SX, file_name, line)); + + lock->last_x_file_name = file_name; + lock->last_x_line = (unsigned int) line; + + return(TRUE); +} + +/******************************************************************//** NOTE! Use the corresponding macro, not directly this function! Lock an rw-lock in exclusive mode for the current thread. If the rw-lock is locked in shared or exclusive mode, or there is an exclusive lock request waiting, -the function spins a preset time (controlled by SYNC_SPIN_ROUNDS), waiting +the function spins a preset time (controlled by srv_n_spin_wait_rounds), waiting for the lock before suspending the thread. If the same thread has an x-lock on the rw-lock, locking succeed, with the following exception: if pass != 0, only a single x-lock may be taken on the lock. NOTE: If the same thread has an s-lock, locking does not succeed! */ -UNIV_INTERN void rw_lock_x_lock_func( /*================*/ @@ -629,113 +744,253 @@ rw_lock_x_lock_func( const char* file_name,/*!< in: file name where lock requested */ ulint line) /*!< in: line where requested */ { - ulint i; /*!< spin round count */ - ulint index; /*!< index of the reserved wait cell */ + ulint i = 0; sync_array_t* sync_arr; - ibool spinning = FALSE; - size_t counter_index; - - /* We reuse the thread id to index into the counter, cache - it here for efficiency. */ - - counter_index = (size_t) os_thread_get_curr_id(); + ulint spin_count = 0; + ulint count_os_wait = 0; ut_ad(rw_lock_validate(lock)); -#ifdef UNIV_SYNC_DEBUG - ut_ad(!rw_lock_own(lock, RW_LOCK_SHARED)); -#endif /* UNIV_SYNC_DEBUG */ - - i = 0; + ut_ad(!rw_lock_own(lock, RW_LOCK_S)); lock_loop: if (rw_lock_x_lock_low(lock, pass, file_name, line)) { - rw_lock_stats.rw_x_spin_round_count.add(counter_index, i); - return; /* Locking succeeded */ + if (count_os_wait > 0) { + lock->count_os_wait += count_os_wait; + rw_lock_stats.rw_x_os_wait_count.add(count_os_wait); + } - } else { + rw_lock_stats.rw_x_spin_round_count.add(spin_count); - if (!spinning) { - spinning = TRUE; + /* Locking succeeded */ + return; - rw_lock_stats.rw_x_spin_wait_count.add( - counter_index, 1); - } + } else { /* Spin waiting for the lock_word to become free */ os_rmb; HMT_low(); - while (i < SYNC_SPIN_ROUNDS - && lock->lock_word <= 0) { + while (i < srv_n_spin_wait_rounds + && lock->lock_word <= X_LOCK_HALF_DECR) { + if (srv_spin_wait_delay) { - ut_delay(ut_rnd_interval(0, - srv_spin_wait_delay)); + ut_delay(ut_rnd_interval( + 0, srv_spin_wait_delay)); } i++; os_rmb; } + HMT_medium(); - if (i >= SYNC_SPIN_ROUNDS) { + spin_count += i; + + if (i >= srv_n_spin_wait_rounds) { + os_thread_yield(); + } else { + goto lock_loop; } } - rw_lock_stats.rw_x_spin_round_count.add(counter_index, i); + sync_cell_t* cell; - sync_arr = sync_array_get_and_reserve_cell(lock, RW_LOCK_EX, - file_name, line, &index); + sync_arr = sync_array_get_and_reserve_cell( + lock, RW_LOCK_X, file_name, line, &cell); /* Waiters must be set before checking lock_word, to ensure signal is sent. This could lead to a few unnecessary wake-up signals. */ rw_lock_set_waiter_flag(lock); if (rw_lock_x_lock_low(lock, pass, file_name, line)) { - sync_array_free_cell(sync_arr, index); - return; /* Locking succeeded */ + sync_array_free_cell(sync_arr, cell); + + if (count_os_wait > 0) { + lock->count_os_wait += count_os_wait; + rw_lock_stats.rw_x_os_wait_count.add(count_os_wait); + } + + rw_lock_stats.rw_x_spin_round_count.add(spin_count); + + /* Locking succeeded */ + return; } - /* these stats may not be accurate */ - lock->count_os_wait++; - rw_lock_stats.rw_x_os_wait_count.add(counter_index, 1); + ++count_os_wait; - sync_array_wait_event(sync_arr, index); + sync_array_wait_event(sync_arr, cell); i = 0; + goto lock_loop; } -#ifdef UNIV_SYNC_DEBUG /******************************************************************//** -Acquires the debug mutex. We cannot use the mutex defined in sync0sync, -because the debug mutex is also acquired in sync0arr while holding the OS -mutex protecting the sync array, and the ordinary mutex_enter might -recursively call routines in sync0arr, leading to a deadlock on the OS -mutex. */ -UNIV_INTERN +NOTE! Use the corresponding macro, not directly this function! Lock an +rw-lock in SX mode for the current thread. If the rw-lock is locked +in exclusive mode, or there is an exclusive lock request waiting, +the function spins a preset time (controlled by SYNC_SPIN_ROUNDS), waiting +for the lock, before suspending the thread. If the same thread has an x-lock +on the rw-lock, locking succeed, with the following exception: if pass != 0, +only a single sx-lock may be taken on the lock. NOTE: If the same thread has +an s-lock, locking does not succeed! */ void -rw_lock_debug_mutex_enter(void) -/*===========================*/ +rw_lock_sx_lock_func( +/*=================*/ + rw_lock_t* lock, /*!< in: pointer to rw-lock */ + ulint pass, /*!< in: pass value; != 0, if the lock will + be passed to another thread to unlock */ + const char* file_name,/*!< in: file name where lock requested */ + ulint line) /*!< in: line where requested */ + { - os_fast_mutex_lock(&rw_lock_debug_mutex); + ulint i = 0; + sync_array_t* sync_arr; + ulint spin_count = 0; + ulint count_os_wait = 0; + ulint spin_wait_count = 0; + + ut_ad(rw_lock_validate(lock)); + ut_ad(!rw_lock_own(lock, RW_LOCK_S)); + +lock_loop: + + if (rw_lock_sx_lock_low(lock, pass, file_name, line)) { + + if (count_os_wait > 0) { + lock->count_os_wait += count_os_wait; + rw_lock_stats.rw_sx_os_wait_count.add(count_os_wait); + } + + rw_lock_stats.rw_sx_spin_round_count.add(spin_count); + rw_lock_stats.rw_sx_spin_wait_count.add(spin_wait_count); + + /* Locking succeeded */ + return; + + } else { + + ++spin_wait_count; + + /* Spin waiting for the lock_word to become free */ + os_rmb; + while (i < srv_n_spin_wait_rounds + && lock->lock_word <= X_LOCK_HALF_DECR) { + + if (srv_spin_wait_delay) { + ut_delay(ut_rnd_interval( + 0, srv_spin_wait_delay)); + } + + i++; + } + + spin_count += i; + + if (i >= srv_n_spin_wait_rounds) { + + os_thread_yield(); + + } else { + + goto lock_loop; + } + } + + sync_cell_t* cell; + + sync_arr = sync_array_get_and_reserve_cell( + lock, RW_LOCK_SX, file_name, line, &cell); + + /* Waiters must be set before checking lock_word, to ensure signal + is sent. This could lead to a few unnecessary wake-up signals. */ + rw_lock_set_waiter_flag(lock); + + if (rw_lock_sx_lock_low(lock, pass, file_name, line)) { + + sync_array_free_cell(sync_arr, cell); + + if (count_os_wait > 0) { + lock->count_os_wait += count_os_wait; + rw_lock_stats.rw_sx_os_wait_count.add(count_os_wait); + } + + rw_lock_stats.rw_sx_spin_round_count.add(spin_count); + rw_lock_stats.rw_sx_spin_wait_count.add(spin_wait_count); + + /* Locking succeeded */ + return; + } + + ++count_os_wait; + + sync_array_wait_event(sync_arr, cell); + + i = 0; + + goto lock_loop; } +#ifdef UNIV_DEBUG + /******************************************************************//** -Releases the debug mutex. */ -UNIV_INTERN -void -rw_lock_debug_mutex_exit(void) -/*==========================*/ +Checks that the rw-lock has been initialized and that there are no +simultaneous shared and exclusive locks. +@return true */ +bool +rw_lock_validate( +/*=============*/ + const rw_lock_t* lock) /*!< in: rw-lock */ +{ + ulint waiters; + lint lock_word; + + ut_ad(lock); + + waiters = rw_lock_get_waiters(lock); + lock_word = lock->lock_word; + + ut_ad(lock->magic_n == RW_LOCK_MAGIC_N); + ut_ad(waiters == 0 || waiters == 1); + ut_ad(lock_word > -(2 * X_LOCK_DECR)); + ut_ad(lock_word <= X_LOCK_DECR); + + return(true); +} + +/******************************************************************//** +Checks if somebody has locked the rw-lock in the specified mode. +@return true if locked */ +bool +rw_lock_is_locked( +/*==============*/ + rw_lock_t* lock, /*!< in: rw-lock */ + ulint lock_type) /*!< in: lock type: RW_LOCK_S, + RW_LOCK_X or RW_LOCK_SX */ { - os_fast_mutex_unlock(&rw_lock_debug_mutex); + ut_ad(rw_lock_validate(lock)); + + switch (lock_type) { + case RW_LOCK_S: + return(rw_lock_get_reader_count(lock) > 0); + + case RW_LOCK_X: + return(rw_lock_get_writer(lock) == RW_LOCK_X); + + case RW_LOCK_SX: + return(rw_lock_get_sx_lock_count(lock) > 0); + + default: + ut_error; + } + return(false); /* avoid compiler warnings */ } /******************************************************************//** Inserts the debug information for an rw-lock. */ -UNIV_INTERN void rw_lock_add_debug_info( /*===================*/ @@ -745,35 +1000,43 @@ rw_lock_add_debug_info( const char* file_name, /*!< in: file where requested */ ulint line) /*!< in: line where requested */ { - rw_lock_debug_t* info; + ut_ad(file_name != NULL); - ut_ad(lock); - ut_ad(file_name); - - info = rw_lock_debug_create(); + rw_lock_debug_t* info = rw_lock_debug_create(); rw_lock_debug_mutex_enter(); - info->file_name = file_name; + info->pass = pass; info->line = line; info->lock_type = lock_type; + info->file_name = file_name; info->thread_id = os_thread_get_curr_id(); - info->pass = pass; - UT_LIST_ADD_FIRST(list, lock->debug_list, info); + UT_LIST_ADD_FIRST(lock->debug_list, info); rw_lock_debug_mutex_exit(); - if ((pass == 0) && (lock_type != RW_LOCK_WAIT_EX)) { - sync_thread_add_level(lock, lock->level, - lock_type == RW_LOCK_EX - && lock->lock_word < 0); + if (pass == 0 && lock_type != RW_LOCK_X_WAIT) { + + /* Recursive x while holding SX + (lock_type == RW_LOCK_X && lock_word == -X_LOCK_HALF_DECR) + is treated as not-relock (new lock). */ + + if ((lock_type == RW_LOCK_X + && lock->lock_word < -X_LOCK_HALF_DECR) + || (lock_type == RW_LOCK_SX + && (lock->lock_word < 0 || lock->sx_recursive == 1))) { + + sync_check_lock_validate(lock); + sync_check_lock_granted(lock); + } else { + sync_check_relock(lock); + } } } /******************************************************************//** Removes a debug information struct for an rw-lock. */ -UNIV_INTERN void rw_lock_remove_debug_info( /*======================*/ @@ -785,122 +1048,161 @@ rw_lock_remove_debug_info( ut_ad(lock); - if ((pass == 0) && (lock_type != RW_LOCK_WAIT_EX)) { - sync_thread_reset_level(lock); + if (pass == 0 && lock_type != RW_LOCK_X_WAIT) { + sync_check_unlock(lock); } rw_lock_debug_mutex_enter(); - info = UT_LIST_GET_FIRST(lock->debug_list); + for (info = UT_LIST_GET_FIRST(lock->debug_list); + info != 0; + info = UT_LIST_GET_NEXT(list, info)) { - while (info != NULL) { - if ((pass == info->pass) - && ((pass != 0) + if (pass == info->pass + && (pass != 0 || os_thread_eq(info->thread_id, os_thread_get_curr_id())) - && (info->lock_type == lock_type)) { + && info->lock_type == lock_type) { /* Found! */ - UT_LIST_REMOVE(list, lock->debug_list, info); + UT_LIST_REMOVE(lock->debug_list, info); + rw_lock_debug_mutex_exit(); rw_lock_debug_free(info); return; } - - info = UT_LIST_GET_NEXT(list, info); } ut_error; } -#endif /* UNIV_SYNC_DEBUG */ -#ifdef UNIV_SYNC_DEBUG /******************************************************************//** Checks if the thread has locked the rw-lock in the specified mode, with the pass value == 0. -@return TRUE if locked */ -UNIV_INTERN +@return TRUE if locked */ ibool rw_lock_own( /*========*/ rw_lock_t* lock, /*!< in: rw-lock */ - ulint lock_type) /*!< in: lock type: RW_LOCK_SHARED, - RW_LOCK_EX */ + ulint lock_type) /*!< in: lock type: RW_LOCK_S, + RW_LOCK_X */ { - rw_lock_debug_t* info; - ut_ad(lock); ut_ad(rw_lock_validate(lock)); rw_lock_debug_mutex_enter(); - info = UT_LIST_GET_FIRST(lock->debug_list); - - while (info != NULL) { + for (const rw_lock_debug_t* info = UT_LIST_GET_FIRST(lock->debug_list); + info != NULL; + info = UT_LIST_GET_NEXT(list, info)) { if (os_thread_eq(info->thread_id, os_thread_get_curr_id()) - && (info->pass == 0) - && (info->lock_type == lock_type)) { + && info->pass == 0 + && info->lock_type == lock_type) { rw_lock_debug_mutex_exit(); /* Found! */ return(TRUE); } - - info = UT_LIST_GET_NEXT(list, info); } rw_lock_debug_mutex_exit(); return(FALSE); } -#endif /* UNIV_SYNC_DEBUG */ -/******************************************************************//** -Checks if somebody has locked the rw-lock in the specified mode. -@return TRUE if locked */ -UNIV_INTERN -ibool -rw_lock_is_locked( -/*==============*/ - rw_lock_t* lock, /*!< in: rw-lock */ - ulint lock_type) /*!< in: lock type: RW_LOCK_SHARED, - RW_LOCK_EX */ +/** For collecting the debug information for a thread's rw-lock */ +typedef std::vector<rw_lock_debug_t*> Infos; + +/** Get the thread debug info +@param[in] infos The rw-lock mode owned by the threads +@param[in] lock rw-lock to check +@return the thread debug info or NULL if not found */ +void +rw_lock_get_debug_info(const rw_lock_t* lock, Infos* infos) { - ibool ret = FALSE; + rw_lock_debug_t* info = NULL; - ut_ad(lock); ut_ad(rw_lock_validate(lock)); - if (lock_type == RW_LOCK_SHARED) { - if (rw_lock_get_reader_count(lock) > 0) { - ret = TRUE; + rw_lock_debug_mutex_enter(); + + for (info = UT_LIST_GET_FIRST(lock->debug_list); + info != NULL; + info = UT_LIST_GET_NEXT(list, info)) { + + if (os_thread_eq(info->thread_id, os_thread_get_curr_id())) { + + infos->push_back(info); + } + } + + rw_lock_debug_mutex_exit(); +} + +/** Checks if the thread has locked the rw-lock in the specified mode, with +the pass value == 0. +@param[in] lock rw-lock +@param[in] flags specify lock types with OR of the + rw_lock_flag_t values +@return true if locked */ +bool +rw_lock_own_flagged( + const rw_lock_t* lock, + rw_lock_flags_t flags) +{ + Infos infos; + + rw_lock_get_debug_info(lock, &infos); + + Infos::const_iterator end = infos.end(); + + for (Infos::const_iterator it = infos.begin(); it != end; ++it) { + + const rw_lock_debug_t* info = *it; + + ut_ad(os_thread_eq(info->thread_id, os_thread_get_curr_id())); + + if (info->pass != 0) { + continue; } - } else if (lock_type == RW_LOCK_EX) { - if (rw_lock_get_writer(lock) == RW_LOCK_EX) { - ret = TRUE; + + switch (info->lock_type) { + case RW_LOCK_S: + + if (flags & RW_LOCK_FLAG_S) { + return(true); + } + break; + + case RW_LOCK_X: + + if (flags & RW_LOCK_FLAG_X) { + return(true); + } + break; + + case RW_LOCK_SX: + + if (flags & RW_LOCK_FLAG_SX) { + return(true); + } } - } else { - ut_error; } - return(ret); + return(false); } -#ifdef UNIV_SYNC_DEBUG /***************************************************************//** Prints debug info of currently locked rw-locks. */ -UNIV_INTERN void rw_lock_list_print_info( /*====================*/ FILE* file) /*!< in: file where to print */ { - rw_lock_t* lock; - ulint count = 0; - rw_lock_debug_t* info; + ulint count = 0; mutex_enter(&rw_lock_list_mutex); @@ -908,15 +1210,16 @@ rw_lock_list_print_info( "RW-LATCH INFO\n" "-------------\n", file); - lock = UT_LIST_GET_FIRST(rw_lock_list); - - while (lock != NULL) { + for (const rw_lock_t* lock = UT_LIST_GET_FIRST(rw_lock_list); + lock != NULL; + lock = UT_LIST_GET_NEXT(list, lock)) { count++; #ifndef INNODB_RW_LOCKS_USE_ATOMICS - mutex_enter(&(lock->mutex)); -#endif + mutex_enter(&lock->mutex); +#endif /* INNODB_RW_LOCKS_USE_ATOMICS */ + if (lock->lock_word != X_LOCK_DECR) { fprintf(file, "RW-LOCK: %p ", (void*) lock); @@ -927,19 +1230,23 @@ rw_lock_list_print_info( putc('\n', file); } + rw_lock_debug_t* info; + rw_lock_debug_mutex_enter(); - info = UT_LIST_GET_FIRST(lock->debug_list); - while (info != NULL) { + + for (info = UT_LIST_GET_FIRST(lock->debug_list); + info != NULL; + info = UT_LIST_GET_NEXT(list, info)) { + rw_lock_debug_print(file, info); - info = UT_LIST_GET_NEXT(list, info); } + rw_lock_debug_mutex_exit(); } -#ifndef INNODB_RW_LOCKS_USE_ATOMICS - mutex_exit(&(lock->mutex)); -#endif - lock = UT_LIST_GET_NEXT(list, lock); +#ifndef INNODB_RW_LOCKS_USE_ATOMICS + mutex_exit(&lock->mutex); +#endif /* INNODB_RW_LOCKS_USE_ATOMICS */ } fprintf(file, "Total number of rw-locks %ld\n", count); @@ -948,7 +1255,6 @@ rw_lock_list_print_info( /***************************************************************//** Prints debug info of an rw-lock. */ -UNIV_INTERN void rw_lock_print( /*==========*/ @@ -963,12 +1269,13 @@ rw_lock_print( #ifndef INNODB_RW_LOCKS_USE_ATOMICS /* We used to acquire lock->mutex here, but it would cause a - recursive call to sync_thread_add_level() if UNIV_SYNC_DEBUG + recursive call to sync_thread_add_level() if UNIV_DEBUG is defined. Since this function is only invoked from sync_thread_levels_g(), let us choose the smaller evil: performing dirty reads instead of causing bogus deadlocks or assertion failures. */ -#endif +#endif /* INNODB_RW_LOCKS_USE_ATOMICS */ + if (lock->lock_word != X_LOCK_DECR) { if (rw_lock_get_waiters(lock)) { @@ -978,73 +1285,128 @@ rw_lock_print( } rw_lock_debug_mutex_enter(); - info = UT_LIST_GET_FIRST(lock->debug_list); - while (info != NULL) { + + for (info = UT_LIST_GET_FIRST(lock->debug_list); + info != NULL; + info = UT_LIST_GET_NEXT(list, info)) { + rw_lock_debug_print(stderr, info); - info = UT_LIST_GET_NEXT(list, info); } + rw_lock_debug_mutex_exit(); } } /*********************************************************************//** Prints info of a debug struct. */ -UNIV_INTERN void rw_lock_debug_print( /*================*/ FILE* f, /*!< in: output stream */ - rw_lock_debug_t* info) /*!< in: debug struct */ + const rw_lock_debug_t* info) /*!< in: debug struct */ { - ulint rwt; - - rwt = info->lock_type; + ulint rwt = info->lock_type; fprintf(f, "Locked: thread %lu file %s line %lu ", - (ulong) os_thread_pf(info->thread_id), info->file_name, - (ulong) info->line); - if (rwt == RW_LOCK_SHARED) { + static_cast<ulong>(os_thread_pf(info->thread_id)), + sync_basename(info->file_name), + static_cast<ulong>(info->line)); + + switch (rwt) { + case RW_LOCK_S: fputs("S-LOCK", f); - } else if (rwt == RW_LOCK_EX) { + break; + case RW_LOCK_X: fputs("X-LOCK", f); - } else if (rwt == RW_LOCK_WAIT_EX) { + break; + case RW_LOCK_SX: + fputs("SX-LOCK", f); + break; + case RW_LOCK_X_WAIT: fputs("WAIT X-LOCK", f); - } else { + break; + default: ut_error; } + if (info->pass != 0) { fprintf(f, " pass value %lu", (ulong) info->pass); } - putc('\n', f); + + fprintf(f, "\n"); } /***************************************************************//** Returns the number of currently locked rw-locks. Works only in the debug version. -@return number of locked rw-locks */ -UNIV_INTERN +@return number of locked rw-locks */ ulint rw_lock_n_locked(void) /*==================*/ { - rw_lock_t* lock; - ulint count = 0; + ulint count = 0; mutex_enter(&rw_lock_list_mutex); - lock = UT_LIST_GET_FIRST(rw_lock_list); - - while (lock != NULL) { + for (const rw_lock_t* lock = UT_LIST_GET_FIRST(rw_lock_list); + lock != NULL; + lock = UT_LIST_GET_NEXT(list, lock)) { if (lock->lock_word != X_LOCK_DECR) { count++; } - - lock = UT_LIST_GET_NEXT(list, lock); } mutex_exit(&rw_lock_list_mutex); return(count); } -#endif /* UNIV_SYNC_DEBUG */ + +/** Print where it was locked from +@return the string representation */ +std::string +rw_lock_t::locked_from() const +{ + /* Note: For X locks it can be locked form multiple places because + the same thread can call X lock recursively. */ + + std::ostringstream msg; + Infos infos; + + rw_lock_get_debug_info(this, &infos); + + ulint i = 0; + Infos::const_iterator end = infos.end(); + + for (Infos::const_iterator it = infos.begin(); it != end; ++it, ++i) { + + const rw_lock_debug_t* info = *it; + + ut_ad(os_thread_eq(info->thread_id, os_thread_get_curr_id())); + + if (i > 0) { + msg << ", "; + } + + msg << info->file_name << ":" << info->line; + } + + return(msg.str()); + +} + +/** Print the rw-lock information. +@return the string representation */ +std::string +rw_lock_t::to_string() const +{ + std::ostringstream msg; + + msg << "RW-LATCH: " + << "thread id " << os_thread_pf(os_thread_get_curr_id()) + << " addr: " << this + << " Locked from: " << locked_from().c_str(); + + return(msg.str()); +} +#endif /* UNIV_DEBUG */ diff --git a/storage/innobase/sync/sync0sync.cc b/storage/innobase/sync/sync0sync.cc index 3e3ce353724..f304d7dbe15 100644 --- a/storage/innobase/sync/sync0sync.cc +++ b/storage/innobase/sync/sync0sync.cc @@ -30,1551 +30,248 @@ Mutex, the basic synchronization primitive Created 9/5/1995 Heikki Tuuri *******************************************************/ -#include "sync0sync.h" -#ifdef UNIV_NONINL -#include "sync0sync.ic" -#include "sync0arr.ic" -#endif - +#include "univ.i" #include "sync0rw.h" -#include "buf0buf.h" -#include "srv0srv.h" -#include "buf0types.h" -#include "os0sync.h" /* for HAVE_ATOMIC_BUILTINS */ -#ifdef UNIV_SYNC_DEBUG -# include "srv0start.h" /* srv_is_being_started */ -#endif /* UNIV_SYNC_DEBUG */ -#include "ha_prototypes.h" -#include "my_cpu.h" - -#include <vector> - -/* - REASONS FOR IMPLEMENTING THE SPIN LOCK MUTEX - ============================================ - -Semaphore operations in operating systems are slow: Solaris on a 1993 Sparc -takes 3 microseconds (us) for a lock-unlock pair and Windows NT on a 1995 -Pentium takes 20 microseconds for a lock-unlock pair. Therefore, we have to -implement our own efficient spin lock mutex. Future operating systems may -provide efficient spin locks, but we cannot count on that. - -Another reason for implementing a spin lock is that on multiprocessor systems -it can be more efficient for a processor to run a loop waiting for the -semaphore to be released than to switch to a different thread. A thread switch -takes 25 us on both platforms mentioned above. See Gray and Reuter's book -Transaction processing for background. - -How long should the spin loop last before suspending the thread? On a -uniprocessor, spinning does not help at all, because if the thread owning the -mutex is not executing, it cannot be released. Spinning actually wastes -resources. - -On a multiprocessor, we do not know if the thread owning the mutex is -executing or not. Thus it would make sense to spin as long as the operation -guarded by the mutex would typically last assuming that the thread is -executing. If the mutex is not released by that time, we may assume that the -thread owning the mutex is not executing and suspend the waiting thread. - -A typical operation (where no i/o involved) guarded by a mutex or a read-write -lock may last 1 - 20 us on the current Pentium platform. The longest -operations are the binary searches on an index node. - -We conclude that the best choice is to set the spin time at 20 us. Then the -system should work well on a multiprocessor. On a uniprocessor we have to -make sure that thread swithches due to mutex collisions are not frequent, -i.e., they do not happen every 100 us or so, because that wastes too much -resources. If the thread switches are not frequent, the 20 us wasted in spin -loop is not too much. - -Empirical studies on the effect of spin time should be done for different -platforms. - - - IMPLEMENTATION OF THE MUTEX - =========================== - -For background, see Curt Schimmel's book on Unix implementation on modern -architectures. The key points in the implementation are atomicity and -serialization of memory accesses. The test-and-set instruction (XCHG in -Pentium) must be atomic. As new processors may have weak memory models, also -serialization of memory references may be necessary. The successor of Pentium, -P6, has at least one mode where the memory model is weak. As far as we know, -in Pentium all memory accesses are serialized in the program order and we do -not have to worry about the memory model. On other processors there are -special machine instructions called a fence, memory barrier, or storage -barrier (STBAR in Sparc), which can be used to serialize the memory accesses -to happen in program order relative to the fence instruction. - -Leslie Lamport has devised a "bakery algorithm" to implement a mutex without -the atomic test-and-set, but his algorithm should be modified for weak memory -models. We do not use Lamport's algorithm, because we guess it is slower than -the atomic test-and-set. - -Our mutex implementation works as follows: After that we perform the atomic -test-and-set instruction on the memory word. If the test returns zero, we -know we got the lock first. If the test returns not zero, some other thread -was quicker and got the lock: then we spin in a loop reading the memory word, -waiting it to become zero. It is wise to just read the word in the loop, not -perform numerous test-and-set instructions, because they generate memory -traffic between the cache and the main memory. The read loop can just access -the cache, saving bus bandwidth. - -If we cannot acquire the mutex lock in the specified time, we reserve a cell -in the wait array, set the waiters byte in the mutex to 1. To avoid a race -condition, after setting the waiters byte and before suspending the waiting -thread, we still have to check that the mutex is reserved, because it may -have happened that the thread which was holding the mutex has just released -it and did not see the waiters byte set to 1, a case which would lead the -other thread to an infinite wait. - -LEMMA 1: After a thread resets the event of a mutex (or rw_lock), some -====== -thread will eventually call os_event_set() on that particular event. -Thus no infinite wait is possible in this case. - -Proof: After making the reservation the thread sets the waiters field in the -mutex to 1. Then it checks that the mutex is still reserved by some thread, -or it reserves the mutex for itself. In any case, some thread (which may be -also some earlier thread, not necessarily the one currently holding the mutex) -will set the waiters field to 0 in mutex_exit, and then call -os_event_set() with the mutex as an argument. -Q.E.D. - -LEMMA 2: If an os_event_set() call is made after some thread has called -====== -the os_event_reset() and before it starts wait on that event, the call -will not be lost to the second thread. This is true even if there is an -intervening call to os_event_reset() by another thread. -Thus no infinite wait is possible in this case. - -Proof (non-windows platforms): os_event_reset() returns a monotonically -increasing value of signal_count. This value is increased at every -call of os_event_set() If thread A has called os_event_reset() followed -by thread B calling os_event_set() and then some other thread C calling -os_event_reset(), the is_set flag of the event will be set to FALSE; -but now if thread A calls os_event_wait_low() with the signal_count -value returned from the earlier call of os_event_reset(), it will -return immediately without waiting. -Q.E.D. - -Proof (windows): If there is a writer thread which is forced to wait for -the lock, it may be able to set the state of rw_lock to RW_LOCK_WAIT_EX -The design of rw_lock ensures that there is one and only one thread -that is able to change the state to RW_LOCK_WAIT_EX and this thread is -guaranteed to acquire the lock after it is released by the current -holders and before any other waiter gets the lock. -On windows this thread waits on a separate event i.e.: wait_ex_event. -Since only one thread can wait on this event there is no chance -of this event getting reset before the writer starts wait on it. -Therefore, this thread is guaranteed to catch the os_set_event() -signalled unconditionally at the release of the lock. -Q.E.D. */ - -/* Number of spin waits on mutexes: for performance monitoring */ - -/** The number of iterations in the mutex_spin_wait() spin loop. -Intended for performance monitoring. */ -static ib_counter_t<ib_int64_t, IB_N_SLOTS> mutex_spin_round_count; -/** The number of mutex_spin_wait() calls. Intended for -performance monitoring. */ -static ib_counter_t<ib_int64_t, IB_N_SLOTS> mutex_spin_wait_count; -/** The number of OS waits in mutex_spin_wait(). Intended for -performance monitoring. */ -static ib_counter_t<ib_int64_t, IB_N_SLOTS> mutex_os_wait_count; -/** The number of mutex_exit() calls. Intended for performance -monitoring. */ -UNIV_INTERN ib_int64_t mutex_exit_count; - -/** This variable is set to TRUE when sync_init is called */ -UNIV_INTERN ibool sync_initialized = FALSE; - -#ifdef UNIV_SYNC_DEBUG -/** An acquired mutex or rw-lock and its level in the latching order */ -struct sync_level_t; -/** Mutexes or rw-locks held by a thread */ -struct sync_thread_t; - -/** The latch levels currently owned by threads are stored in this data -structure; the size of this array is OS_THREAD_MAX_N */ - -UNIV_INTERN sync_thread_t* sync_thread_level_arrays; - -/** Mutex protecting sync_thread_level_arrays */ -UNIV_INTERN ib_mutex_t sync_thread_mutex; - -# ifdef UNIV_PFS_MUTEX -UNIV_INTERN mysql_pfs_key_t sync_thread_mutex_key; -# endif /* UNIV_PFS_MUTEX */ -#endif /* UNIV_SYNC_DEBUG */ - -/** Global list of database mutexes (not OS mutexes) created. */ -UNIV_INTERN ut_list_base_node_t mutex_list; - -/** Mutex protecting the mutex_list variable */ -UNIV_INTERN ib_mutex_t mutex_list_mutex; +#include "sync0sync.h" #ifdef UNIV_PFS_MUTEX -UNIV_INTERN mysql_pfs_key_t mutex_list_mutex_key; +/* Key to register autoinc_mutex with performance schema */ +mysql_pfs_key_t autoinc_mutex_key; +mysql_pfs_key_t buffer_block_mutex_key; +mysql_pfs_key_t buf_pool_mutex_key; +mysql_pfs_key_t buf_pool_zip_mutex_key; +mysql_pfs_key_t cache_last_read_mutex_key; +mysql_pfs_key_t dict_foreign_err_mutex_key; +mysql_pfs_key_t dict_sys_mutex_key; +mysql_pfs_key_t file_format_max_mutex_key; +mysql_pfs_key_t fil_system_mutex_key; +mysql_pfs_key_t flush_list_mutex_key; +mysql_pfs_key_t fts_bg_threads_mutex_key; +mysql_pfs_key_t fts_delete_mutex_key; +mysql_pfs_key_t fts_optimize_mutex_key; +mysql_pfs_key_t fts_doc_id_mutex_key; +mysql_pfs_key_t fts_pll_tokenize_mutex_key; +mysql_pfs_key_t hash_table_mutex_key; +mysql_pfs_key_t ibuf_bitmap_mutex_key; +mysql_pfs_key_t ibuf_mutex_key; +mysql_pfs_key_t ibuf_pessimistic_insert_mutex_key; +mysql_pfs_key_t log_sys_mutex_key; +mysql_pfs_key_t log_cmdq_mutex_key; +mysql_pfs_key_t log_flush_order_mutex_key; +mysql_pfs_key_t mutex_list_mutex_key; +mysql_pfs_key_t recalc_pool_mutex_key; +mysql_pfs_key_t page_cleaner_mutex_key; +mysql_pfs_key_t purge_sys_pq_mutex_key; +mysql_pfs_key_t recv_sys_mutex_key; +mysql_pfs_key_t recv_writer_mutex_key; +mysql_pfs_key_t redo_rseg_mutex_key; +mysql_pfs_key_t noredo_rseg_mutex_key; +mysql_pfs_key_t page_zip_stat_per_index_mutex_key; +# ifdef UNIV_DEBUG +mysql_pfs_key_t rw_lock_debug_mutex_key; +# endif /* UNIV_DEBUG */ +mysql_pfs_key_t rtr_active_mutex_key; +mysql_pfs_key_t rtr_match_mutex_key; +mysql_pfs_key_t rtr_path_mutex_key; +mysql_pfs_key_t rtr_ssn_mutex_key; +mysql_pfs_key_t rw_lock_list_mutex_key; +mysql_pfs_key_t rw_lock_mutex_key; +mysql_pfs_key_t srv_dict_tmpfile_mutex_key; +mysql_pfs_key_t srv_innodb_monitor_mutex_key; +mysql_pfs_key_t srv_misc_tmpfile_mutex_key; +mysql_pfs_key_t srv_monitor_file_mutex_key; +# ifdef UNIV_DEBUG +mysql_pfs_key_t sync_thread_mutex_key; +# endif /* UNIV_DEBUG */ +mysql_pfs_key_t buf_dblwr_mutex_key; +mysql_pfs_key_t trx_undo_mutex_key; +mysql_pfs_key_t trx_mutex_key; +mysql_pfs_key_t trx_pool_mutex_key; +mysql_pfs_key_t trx_pool_manager_mutex_key; +mysql_pfs_key_t lock_mutex_key; +mysql_pfs_key_t lock_wait_mutex_key; +mysql_pfs_key_t trx_sys_mutex_key; +mysql_pfs_key_t srv_sys_mutex_key; +mysql_pfs_key_t srv_threads_mutex_key; +mysql_pfs_key_t event_mutex_key; +mysql_pfs_key_t event_manager_mutex_key; +mysql_pfs_key_t sync_array_mutex_key; +mysql_pfs_key_t thread_mutex_key; +mysql_pfs_key_t zip_pad_mutex_key; +mysql_pfs_key_t row_drop_list_mutex_key; #endif /* UNIV_PFS_MUTEX */ -#ifdef UNIV_SYNC_DEBUG -/** Latching order checks start when this is set TRUE */ -UNIV_INTERN ibool sync_order_checks_on = FALSE; - -/** Number of slots reserved for each OS thread in the sync level array */ -static const ulint SYNC_THREAD_N_LEVELS = 10000; - -/** Array for tracking sync levels per thread. */ -typedef std::vector<sync_level_t> sync_arr_t; - - -/** Mutexes or rw-locks held by a thread */ -struct sync_thread_t{ - os_thread_id_t id; /*!< OS thread id */ - sync_arr_t* levels; /*!< level array for this thread; if - this is NULL this slot is unused */ -}; - -/** An acquired mutex or rw-lock and its level in the latching order */ -struct sync_level_t{ - void* latch; /*!< pointer to a mutex or an - rw-lock; NULL means that - the slot is empty */ - ulint level; /*!< level of the latch in the - latching order. This field is - overloaded to serve as a node in a - linked list of free nodes too. When - latch == NULL then this will contain - the ordinal value of the next free - element */ -}; -#endif /* UNIV_SYNC_DEBUG */ - -/******************************************************************//** -Creates, or rather, initializes a mutex object in a specified memory -location (which must be appropriately aligned). The mutex is initialized -in the reset state. Explicit freeing of the mutex with mutex_free is -necessary only if the memory block containing it is freed. */ -UNIV_INTERN +#ifdef UNIV_PFS_RWLOCK +mysql_pfs_key_t btr_search_latch_key; +mysql_pfs_key_t buf_block_lock_key; +# ifdef UNIV_DEBUG +mysql_pfs_key_t buf_block_debug_latch_key; +# endif /* UNIV_DEBUG */ +mysql_pfs_key_t checkpoint_lock_key; +mysql_pfs_key_t dict_operation_lock_key; +mysql_pfs_key_t dict_table_stats_key; +mysql_pfs_key_t hash_table_locks_key; +mysql_pfs_key_t index_tree_rw_lock_key; +mysql_pfs_key_t index_online_log_key; +mysql_pfs_key_t fil_space_latch_key; +mysql_pfs_key_t fts_cache_rw_lock_key; +mysql_pfs_key_t fts_cache_init_rw_lock_key; +mysql_pfs_key_t trx_i_s_cache_lock_key; +mysql_pfs_key_t trx_purge_latch_key; +# ifdef UNIV_DEBUG +mysql_pfs_key_t buf_chunk_map_latch_key; +# endif /* UNIV_DEBUG */ +#endif /* UNIV_PFS_RWLOCK */ + +/** For monitoring active mutexes */ +MutexMonitor* mutex_monitor; + +/** +Prints wait info of the sync system. +@param file - where to print */ +static void -mutex_create_func( -/*==============*/ - ib_mutex_t* mutex, /*!< in: pointer to memory */ - const char* cmutex_name, /*!< in: mutex name */ -#ifdef UNIV_DEBUG -# ifdef UNIV_SYNC_DEBUG - ulint level, /*!< in: level */ -# endif /* UNIV_SYNC_DEBUG */ -#endif /* UNIV_DEBUG */ - const char* cfile_name, /*!< in: file name where created */ - ulint cline) /*!< in: file line where created */ +sync_print_wait_info(FILE* file) { -#if defined(HAVE_ATOMIC_BUILTINS) - mutex_reset_lock_word(mutex); -#else - os_fast_mutex_init(PFS_NOT_INSTRUMENTED, &mutex->os_fast_mutex); - mutex->lock_word = 0; -#endif - mutex->event = os_event_create(); - mutex_set_waiters(mutex, 0); -#ifdef UNIV_DEBUG - mutex->magic_n = MUTEX_MAGIC_N; -#endif /* UNIV_DEBUG */ - - mutex->line = 0; - mutex->file_name = "not yet reserved"; -#ifdef UNIV_SYNC_DEBUG - mutex->level = level; -#endif /* UNIV_SYNC_DEBUG */ - mutex->cfile_name = cfile_name; - mutex->cline = cline; - mutex->count_os_wait = 0; - mutex->cmutex_name = cmutex_name; - - /* Check that lock_word is aligned; this is important on Intel */ - ut_ad(((ulint)(&(mutex->lock_word))) % 4 == 0); - - /* NOTE! The very first mutexes are not put to the mutex list */ - - if (mutex == &mutex_list_mutex -#ifdef UNIV_SYNC_DEBUG - || mutex == &sync_thread_mutex -#endif /* UNIV_SYNC_DEBUG */ - ) { - - return; - } - - mutex_enter(&mutex_list_mutex); - - ut_ad(UT_LIST_GET_LEN(mutex_list) == 0 - || UT_LIST_GET_FIRST(mutex_list)->magic_n == MUTEX_MAGIC_N); - - UT_LIST_ADD_FIRST(list, mutex_list, mutex); + fprintf(file, + "RW-shared spins " UINT64PF ", rounds " UINT64PF "," + " OS waits " UINT64PF "\n" + "RW-excl spins " UINT64PF ", rounds " UINT64PF "," + " OS waits " UINT64PF "\n" + "RW-sx spins " UINT64PF ", rounds " UINT64PF "," + " OS waits " UINT64PF "\n", + (ib_uint64_t) rw_lock_stats.rw_s_spin_wait_count, + (ib_uint64_t) rw_lock_stats.rw_s_spin_round_count, + (ib_uint64_t) rw_lock_stats.rw_s_os_wait_count, + (ib_uint64_t) rw_lock_stats.rw_x_spin_wait_count, + (ib_uint64_t) rw_lock_stats.rw_x_spin_round_count, + (ib_uint64_t) rw_lock_stats.rw_x_os_wait_count, + (ib_uint64_t) rw_lock_stats.rw_sx_spin_wait_count, + (ib_uint64_t) rw_lock_stats.rw_sx_spin_round_count, + (ib_uint64_t) rw_lock_stats.rw_sx_os_wait_count); - mutex_exit(&mutex_list_mutex); + fprintf(file, + "Spin rounds per wait: %.2f RW-shared," + " %.2f RW-excl, %.2f RW-sx\n", + (double) rw_lock_stats.rw_s_spin_round_count / + (rw_lock_stats.rw_s_spin_wait_count + ? rw_lock_stats.rw_s_spin_wait_count : 1), + (double) rw_lock_stats.rw_x_spin_round_count / + (rw_lock_stats.rw_x_spin_wait_count + ? rw_lock_stats.rw_x_spin_wait_count : 1), + (double) rw_lock_stats.rw_sx_spin_round_count / + (rw_lock_stats.rw_sx_spin_wait_count + ? rw_lock_stats.rw_sx_spin_wait_count : 1)); } -/******************************************************************//** -NOTE! Use the corresponding macro mutex_free(), not directly this function! -Calling this function is obligatory only if the memory buffer containing -the mutex is freed. Removes a mutex object from the mutex list. The mutex -is checked to be in the reset state. */ -UNIV_INTERN +/** +Prints info of the sync system. +@param file - where to print */ void -mutex_free_func( -/*============*/ - ib_mutex_t* mutex) /*!< in: mutex */ +sync_print(FILE* file) { - ut_ad(mutex_validate(mutex)); - ut_a(mutex_get_lock_word(mutex) == 0); - ut_a(mutex_get_waiters(mutex) == 0); - -#ifdef UNIV_MEM_DEBUG - if (mutex == &mem_hash_mutex) { - ut_ad(UT_LIST_GET_LEN(mutex_list) == 1); - ut_ad(UT_LIST_GET_FIRST(mutex_list) == &mem_hash_mutex); - UT_LIST_REMOVE(list, mutex_list, mutex); - goto func_exit; - } -#endif /* UNIV_MEM_DEBUG */ - - if (mutex != &mutex_list_mutex -#ifdef UNIV_SYNC_DEBUG - && mutex != &sync_thread_mutex -#endif /* UNIV_SYNC_DEBUG */ - ) { - - mutex_enter(&mutex_list_mutex); - - ut_ad(!UT_LIST_GET_PREV(list, mutex) - || UT_LIST_GET_PREV(list, mutex)->magic_n - == MUTEX_MAGIC_N); - ut_ad(!UT_LIST_GET_NEXT(list, mutex) - || UT_LIST_GET_NEXT(list, mutex)->magic_n - == MUTEX_MAGIC_N); - - UT_LIST_REMOVE(list, mutex_list, mutex); - - mutex_exit(&mutex_list_mutex); - } - - os_event_free(mutex->event); -#ifdef UNIV_MEM_DEBUG -func_exit: -#endif /* UNIV_MEM_DEBUG */ -#if !defined(HAVE_ATOMIC_BUILTINS) - os_fast_mutex_free(&(mutex->os_fast_mutex)); -#endif - /* If we free the mutex protecting the mutex list (freeing is - not necessary), we have to reset the magic number AFTER removing - it from the list. */ #ifdef UNIV_DEBUG - mutex->magic_n = 0; + rw_lock_list_print_info(file); #endif /* UNIV_DEBUG */ - return; -} - -/********************************************************************//** -NOTE! Use the corresponding macro in the header file, not this function -directly. Tries to lock the mutex for the current thread. If the lock is not -acquired immediately, returns with return value 1. -@return 0 if succeed, 1 if not */ -UNIV_INTERN -ulint -mutex_enter_nowait_func( -/*====================*/ - ib_mutex_t* mutex, /*!< in: pointer to mutex */ - const char* file_name MY_ATTRIBUTE((unused)), - /*!< in: file name where mutex - requested */ - ulint line MY_ATTRIBUTE((unused))) - /*!< in: line where requested */ -{ - ut_ad(mutex_validate(mutex)); - if (!ib_mutex_test_and_set(mutex)) { - - mutex->thread_id = os_thread_get_curr_id(); -#ifdef UNIV_SYNC_DEBUG - mutex_set_debug_info(mutex, file_name, line); -#else - if (srv_instrument_semaphores) { - mutex->file_name = file_name; - mutex->line = line; - } -#endif - return(0); /* Succeeded! */ - } + sync_array_print(file); - return(1); + sync_print_wait_info(file); } -#ifdef UNIV_DEBUG -/******************************************************************//** -Checks that the mutex has been initialized. -@return TRUE */ -UNIV_INTERN -ibool -mutex_validate( -/*===========*/ - const ib_mutex_t* mutex) /*!< in: mutex */ +/** Print the filename "basename" e.g., p = "/a/b/c/d/e.cc" -> p = "e.cc" +@param[in] filename Name from where to extract the basename +@return the basename */ +const char* +sync_basename(const char* filename) { - ut_a(mutex); + const char* ptr = filename + strlen(filename) - 1; - if (mutex->magic_n != MUTEX_MAGIC_N) { - ib_logf(IB_LOG_LEVEL_ERROR, - "Mutex %p not initialized file %s line %lu.", - mutex, mutex->cfile_name, mutex->cline); + while (ptr > filename && *ptr != '/' && *ptr != '\\') { + --ptr; } - ut_ad(mutex->magic_n == MUTEX_MAGIC_N); - return(TRUE); -} + ++ptr; -/******************************************************************//** -Checks that the current thread owns the mutex. Works only in the debug -version. -@return TRUE if owns */ -UNIV_INTERN -ibool -mutex_own( -/*======*/ - const ib_mutex_t* mutex) /*!< in: mutex */ -{ - ut_ad(mutex_validate(mutex)); - - return(mutex_get_lock_word(mutex) == 1 - && os_thread_eq(mutex->thread_id, os_thread_get_curr_id())); + return(ptr); } -#endif /* UNIV_DEBUG */ -/******************************************************************//** -Sets the waiters field in a mutex. */ -UNIV_INTERN -void -mutex_set_waiters( -/*==============*/ - ib_mutex_t* mutex, /*!< in: mutex */ - ulint n) /*!< in: value to set */ +/** String representation of the filename and line number where the +latch was created +@param[in] id Latch ID +@param[in] created Filename and line number where it was crated +@return the string representation */ +std::string +sync_mutex_to_string( + latch_id_t id, + const std::string& created) { - volatile ulint* ptr; /* declared volatile to ensure that - the value is stored to memory */ - ut_ad(mutex); + std::ostringstream msg; - ptr = &(mutex->waiters); + msg << "Mutex " << sync_latch_get_name(id) << " " + << "created " << created; - *ptr = n; /* Here we assume that the write of a single - word in memory is atomic */ + return(msg.str()); } -/******************************************************************//** -Reserves a mutex for the current thread. If the mutex is reserved, the -function spins a preset time (controlled by SYNC_SPIN_ROUNDS), waiting -for the mutex before suspending the thread. */ -UNIV_INTERN +/** Enable the mutex monitoring */ void -mutex_spin_wait( -/*============*/ - ib_mutex_t* mutex, /*!< in: pointer to mutex */ - const char* file_name, /*!< in: file name where mutex - requested */ - ulint line) /*!< in: line where requested */ +MutexMonitor::enable() { - ulint i; /* spin round count */ - ulint index; /* index of the reserved wait cell */ - sync_array_t* sync_arr; - size_t counter_index; - - counter_index = (size_t) os_thread_get_curr_id(); - - ut_ad(mutex); + /** Note: We don't add any latch meta-data after startup. Therefore + there is no need to use a mutex here. */ - /* This update is not thread safe, but we don't mind if the count - isn't exact. Moved out of ifdef that follows because we are willing - to sacrifice the cost of counting this as the data is valuable. - Count the number of calls to mutex_spin_wait. */ - mutex_spin_wait_count.add(counter_index, 1); + LatchMetaData::iterator end = latch_meta.end(); -mutex_loop: + for (LatchMetaData::iterator it = latch_meta.begin(); it != end; ++it) { - i = 0; - - /* Spin waiting for the lock word to become zero. Note that we do - not have to assume that the read access to the lock word is atomic, - as the actual locking is always committed with atomic test-and-set. - In reality, however, all processors probably have an atomic read of - a memory word. */ - -spin_loop: - - HMT_low(); - os_rmb; - while (mutex_get_lock_word(mutex) != 0 && i < SYNC_SPIN_ROUNDS) { - if (srv_spin_wait_delay) { - ut_delay(ut_rnd_interval(0, srv_spin_wait_delay)); + if (*it != NULL) { + (*it)->get_counter()->enable(); } - i++; } - HMT_medium(); - - if (i >= SYNC_SPIN_ROUNDS) { - os_thread_yield(); - } - - mutex_spin_round_count.add(counter_index, i); - - if (ib_mutex_test_and_set(mutex) == 0) { - /* Succeeded! */ - - mutex->thread_id = os_thread_get_curr_id(); -#ifdef UNIV_SYNC_DEBUG - mutex_set_debug_info(mutex, file_name, line); -#endif - if (srv_instrument_semaphores) { - mutex->file_name = file_name; - mutex->line = line; - } - - return; - } - - /* We may end up with a situation where lock_word is 0 but the OS - fast mutex is still reserved. On FreeBSD the OS does not seem to - schedule a thread which is constantly calling pthread_mutex_trylock - (in ib_mutex_test_and_set implementation). Then we could end up - spinning here indefinitely. The following 'i++' stops this infinite - spin. */ - - i++; - - if (i < SYNC_SPIN_ROUNDS) { - goto spin_loop; - } - - sync_arr = sync_array_get_and_reserve_cell(mutex, SYNC_MUTEX, - file_name, line, &index); - - /* The memory order of the array reservation and the change in the - waiters field is important: when we suspend a thread, we first - reserve the cell and then set waiters field to 1. When threads are - released in mutex_exit, the waiters field is first set to zero and - then the event is set to the signaled state. */ - - mutex_set_waiters(mutex, 1); - - /* Make sure waiters store won't pass over mutex_test_and_set */ -#ifdef __powerpc__ - os_mb; -#endif - - /* Try to reserve still a few times */ - for (i = 0; i < 4; i++) { - if (ib_mutex_test_and_set(mutex) == 0) { - /* Succeeded! Free the reserved wait cell */ - - sync_array_free_cell(sync_arr, index); - - mutex->thread_id = os_thread_get_curr_id(); -#ifdef UNIV_SYNC_DEBUG - mutex_set_debug_info(mutex, file_name, line); -#endif - if (srv_instrument_semaphores) { - mutex->file_name = file_name; - mutex->line = line; - } - - return; - - /* Note that in this case we leave the waiters field - set to 1. We cannot reset it to zero, as we do not - know if there are other waiters. */ - } - } - - /* Now we know that there has been some thread holding the mutex - after the change in the wait array and the waiters field was made. - Now there is no risk of infinite wait on the event. */ - - mutex_os_wait_count.add(counter_index, 1); - - mutex->count_os_wait++; - - sync_array_wait_event(sync_arr, index); - - goto mutex_loop; -} - -/******************************************************************//** -Releases the threads waiting in the primary wait array for this mutex. */ -UNIV_INTERN -void -mutex_signal_object( -/*================*/ - ib_mutex_t* mutex) /*!< in: mutex */ -{ - mutex_set_waiters(mutex, 0); - - /* The memory order of resetting the waiters field and - signaling the object is important. See LEMMA 1 above. */ - os_event_set(mutex->event); - sync_array_object_signalled(); -} - -#ifdef UNIV_SYNC_DEBUG -/******************************************************************//** -Sets the debug information for a reserved mutex. */ -UNIV_INTERN -void -mutex_set_debug_info( -/*=================*/ - ib_mutex_t* mutex, /*!< in: mutex */ - const char* file_name, /*!< in: file where requested */ - ulint line) /*!< in: line where requested */ -{ - ut_ad(mutex); - ut_ad(file_name); - - sync_thread_add_level(mutex, mutex->level, FALSE); - - mutex->file_name = file_name; - mutex->line = line; } -/******************************************************************//** -Gets the debug information for a reserved mutex. */ -UNIV_INTERN +/** Disable the mutex monitoring */ void -mutex_get_debug_info( -/*=================*/ - ib_mutex_t* mutex, /*!< in: mutex */ - const char** file_name, /*!< out: file where requested */ - ulint* line, /*!< out: line where requested */ - os_thread_id_t* thread_id) /*!< out: id of the thread which owns - the mutex */ +MutexMonitor::disable() { - ut_ad(mutex); + /** Note: We don't add any latch meta-data after startup. Therefore + there is no need to use a mutex here. */ - *file_name = mutex->file_name; - *line = mutex->line; - *thread_id = mutex->thread_id; -} + LatchMetaData::iterator end = latch_meta.end(); -/******************************************************************//** -Prints debug info of currently reserved mutexes. */ -static -void -mutex_list_print_info( -/*==================*/ - FILE* file) /*!< in: file where to print */ -{ - ib_mutex_t* mutex; - const char* file_name; - ulint line; - os_thread_id_t thread_id; - ulint count = 0; + for (LatchMetaData::iterator it = latch_meta.begin(); it != end; ++it) { - fputs("----------\n" - "MUTEX INFO\n" - "----------\n", file); - - mutex_enter(&mutex_list_mutex); - - mutex = UT_LIST_GET_FIRST(mutex_list); - - while (mutex != NULL) { - count++; - - if (mutex_get_lock_word(mutex) != 0) { - mutex_get_debug_info(mutex, &file_name, &line, - &thread_id); - fprintf(file, - "Locked mutex: addr %p thread %ld" - " file %s line %ld\n", - (void*) mutex, os_thread_pf(thread_id), - file_name, line); + if (*it != NULL) { + (*it)->get_counter()->disable(); } - - mutex = UT_LIST_GET_NEXT(list, mutex); } - - fprintf(file, "Total number of mutexes %ld\n", count); - - mutex_exit(&mutex_list_mutex); } -/******************************************************************//** -Counts currently reserved mutexes. Works only in the debug version. -@return number of reserved mutexes */ -UNIV_INTERN -ulint -mutex_n_reserved(void) -/*==================*/ -{ - ib_mutex_t* mutex; - ulint count = 0; - - mutex_enter(&mutex_list_mutex); - - for (mutex = UT_LIST_GET_FIRST(mutex_list); - mutex != NULL; - mutex = UT_LIST_GET_NEXT(list, mutex)) { - - if (mutex_get_lock_word(mutex) != 0) { - - count++; - } - } - - mutex_exit(&mutex_list_mutex); - - ut_a(count >= 1); - - /* Subtract one, because this function itself was holding - one mutex (mutex_list_mutex) */ - - return(count - 1); -} - -/******************************************************************//** -Returns TRUE if no mutex or rw-lock is currently locked. Works only in -the debug version. -@return TRUE if no mutexes and rw-locks reserved */ -UNIV_INTERN -ibool -sync_all_freed(void) -/*================*/ -{ - return(mutex_n_reserved() + rw_lock_n_locked() == 0); -} - -/******************************************************************//** -Looks for the thread slot for the calling thread. -@return pointer to thread slot, NULL if not found */ -static -sync_thread_t* -sync_thread_level_arrays_find_slot(void) -/*====================================*/ - -{ - ulint i; - os_thread_id_t id; - - id = os_thread_get_curr_id(); - - for (i = 0; i < OS_THREAD_MAX_N; i++) { - sync_thread_t* slot; - - slot = &sync_thread_level_arrays[i]; - - if (slot->levels && os_thread_eq(slot->id, id)) { - - return(slot); - } - } - - return(NULL); -} - -/******************************************************************//** -Looks for an unused thread slot. -@return pointer to thread slot */ -static -sync_thread_t* -sync_thread_level_arrays_find_free(void) -/*====================================*/ - -{ - ulint i; - - for (i = 0; i < OS_THREAD_MAX_N; i++) { - sync_thread_t* slot; - - slot = &sync_thread_level_arrays[i]; - - if (slot->levels == NULL) { - - return(slot); - } - } - - return(NULL); -} - -/******************************************************************//** -Print warning. */ -static +/** Reset the mutex monitoring counters */ void -sync_print_warning( -/*===============*/ - const sync_level_t* slot) /*!< in: slot for which to - print warning */ +MutexMonitor::reset() { - ib_mutex_t* mutex; + /** Note: We don't add any latch meta-data after startup. Therefore + there is no need to use a mutex here. */ - mutex = static_cast<ib_mutex_t*>(slot->latch); + LatchMetaData::iterator end = latch_meta.end(); - if (mutex->magic_n == MUTEX_MAGIC_N) { - fprintf(stderr, - "Mutex created at %s %lu\n", - innobase_basename(mutex->cfile_name), - (ulong) mutex->cline); + for (LatchMetaData::iterator it = latch_meta.begin(); it != end; ++it) { - if (mutex_get_lock_word(mutex) != 0) { - ulint line; - const char* file_name; - os_thread_id_t thread_id; - - mutex_get_debug_info( - mutex, &file_name, &line, &thread_id); - - fprintf(stderr, - "InnoDB: Locked mutex:" - " addr %p thread %ld file %s line %ld\n", - (void*) mutex, os_thread_pf(thread_id), - file_name, (ulong) line); - } else { - fputs("Not locked\n", stderr); + if (*it != NULL) { + (*it)->get_counter()->reset(); } - } else { - rw_lock_t* lock; - - lock = static_cast<rw_lock_t*>(slot->latch); - - rw_lock_print(lock); } -} - -/******************************************************************//** -Checks if all the level values stored in the level array are greater than -the given limit. -@return TRUE if all greater */ -static -ibool -sync_thread_levels_g( -/*=================*/ - sync_arr_t* arr, /*!< in: pointer to level array for an OS - thread */ - ulint limit, /*!< in: level limit */ - ulint warn) /*!< in: TRUE=display a diagnostic message */ -{ - ulint i; - - for (i = 0; i < arr->size(); i++) { - const sync_level_t* slot; - slot = (const sync_level_t*)&(arr->at(i)); + mutex_enter(&rw_lock_list_mutex); - if (slot->latch != NULL && slot->level <= limit) { - if (warn) { - fprintf(stderr, - "InnoDB: sync levels should be" - " > %lu but a level is %lu\n", - (ulong) limit, (ulong) slot->level); + for (rw_lock_t* rw_lock = UT_LIST_GET_FIRST(rw_lock_list); + rw_lock != NULL; + rw_lock = UT_LIST_GET_NEXT(list, rw_lock)) { - sync_print_warning(slot); - } - - return(FALSE); - } + rw_lock->count_os_wait = 0; } - return(TRUE); -} - -/******************************************************************//** -Checks if the level value is stored in the level array. -@return slot if found or NULL */ -static -const sync_level_t* -sync_thread_levels_contain( -/*=======================*/ - sync_arr_t* arr, /*!< in: pointer to level array for an OS - thread */ - ulint level) /*!< in: level */ -{ - ulint i; - - for (i = 0; i < arr->size(); i++) { - const sync_level_t* slot; - - slot = (const sync_level_t*)&(arr->at(i)); - - if (slot->latch != NULL && slot->level == level) { - - return(slot); - } - } - - return(NULL); -} - -/******************************************************************//** -Checks if the level array for the current thread contains a -mutex or rw-latch at the specified level. -@return a matching latch, or NULL if not found */ -UNIV_INTERN -void* -sync_thread_levels_contains( -/*========================*/ - ulint level) /*!< in: latching order level - (SYNC_DICT, ...)*/ -{ - ulint i; - sync_arr_t* arr; - sync_thread_t* thread_slot; - - if (!sync_order_checks_on) { - - return(NULL); - } - - mutex_enter(&sync_thread_mutex); - - thread_slot = sync_thread_level_arrays_find_slot(); - - if (thread_slot == NULL) { - - mutex_exit(&sync_thread_mutex); - - return(NULL); - } - - arr = thread_slot->levels; - - for (i = 0; i < arr->size(); i++) { - sync_level_t* slot; - - slot = (sync_level_t*)&(arr->at(i)); - - if (slot->latch != NULL && slot->level == level) { - - mutex_exit(&sync_thread_mutex); - return(slot->latch); - } - } - - mutex_exit(&sync_thread_mutex); - - return(NULL); -} - -/******************************************************************//** -Checks that the level array for the current thread is empty. -@return a latch, or NULL if empty except the exceptions specified below */ -UNIV_INTERN -void* -sync_thread_levels_nonempty_gen( -/*============================*/ - ibool dict_mutex_allowed) /*!< in: TRUE if dictionary mutex is - allowed to be owned by the thread */ -{ - ulint i; - sync_arr_t* arr; - sync_thread_t* thread_slot; - - if (!sync_order_checks_on) { - - return(NULL); - } - - mutex_enter(&sync_thread_mutex); - - thread_slot = sync_thread_level_arrays_find_slot(); - - if (thread_slot == NULL) { - - mutex_exit(&sync_thread_mutex); - - return(NULL); - } - - arr = thread_slot->levels; - - for (i = 0; i < arr->size(); ++i) { - const sync_level_t* slot; - - slot = (const sync_level_t*)&(arr->at(i)); - - if (slot->latch != NULL - && (!dict_mutex_allowed - || (slot->level != SYNC_DICT - && slot->level != SYNC_DICT_OPERATION - && slot->level != SYNC_FTS_CACHE))) { - - mutex_exit(&sync_thread_mutex); - ut_error; - - return(slot->latch); - } - } - - mutex_exit(&sync_thread_mutex); - - return(NULL); -} - -/******************************************************************//** -Checks if the level array for the current thread is empty, -except for the btr_search_latch. -@return a latch, or NULL if empty except the exceptions specified below */ -UNIV_INTERN -void* -sync_thread_levels_nonempty_trx( -/*============================*/ - ibool has_search_latch) - /*!< in: TRUE if and only if the thread - is supposed to hold btr_search_latch */ -{ - ulint i; - sync_arr_t* arr; - sync_thread_t* thread_slot; - - if (!sync_order_checks_on) { - - return(NULL); - } - - ut_a(!has_search_latch - || sync_thread_levels_contains(SYNC_SEARCH_SYS)); - - mutex_enter(&sync_thread_mutex); - - thread_slot = sync_thread_level_arrays_find_slot(); - - if (thread_slot == NULL) { - - mutex_exit(&sync_thread_mutex); - - return(NULL); - } - - arr = thread_slot->levels; - - for (i = 0; i < arr->size(); ++i) { - const sync_level_t* slot; - - slot = (const sync_level_t*)&(arr->at(i)); - - if (slot->latch != NULL - && (!has_search_latch - || slot->level != SYNC_SEARCH_SYS)) { - - mutex_exit(&sync_thread_mutex); - ut_error; - - return(slot->latch); - } - } - - mutex_exit(&sync_thread_mutex); - - return(NULL); -} - -/******************************************************************//** -Adds a latch and its level in the thread level array. Allocates the memory -for the array if called first time for this OS thread. Makes the checks -against other latch levels stored in the array for this thread. */ -UNIV_INTERN -void -sync_thread_add_level( -/*==================*/ - void* latch, /*!< in: pointer to a mutex or an rw-lock */ - ulint level, /*!< in: level in the latching order; if - SYNC_LEVEL_VARYING, nothing is done */ - ibool relock) /*!< in: TRUE if re-entering an x-lock */ -{ - sync_arr_t* array; - sync_thread_t* thread_slot; - sync_level_t sync_level; - - if (!sync_order_checks_on) { - - return; - } - - if ((latch == (void*) &sync_thread_mutex) - || (latch == (void*) &mutex_list_mutex) - || (latch == (void*) &rw_lock_debug_mutex) - || (latch == (void*) &rw_lock_list_mutex)) { - - return; - } - - if (level == SYNC_LEVEL_VARYING) { - - return; - } - - mutex_enter(&sync_thread_mutex); - - thread_slot = sync_thread_level_arrays_find_slot(); - - if (thread_slot == NULL) { - - /* We have to allocate the level array for a new thread */ - array = new sync_arr_t(); - ut_a(array != NULL); - thread_slot = sync_thread_level_arrays_find_free(); - thread_slot->levels = array; - thread_slot->id = os_thread_get_curr_id(); - } - - array = thread_slot->levels; - - if (relock) { - goto levels_ok; - } - - /* NOTE that there is a problem with _NODE and _LEAF levels: if the - B-tree height changes, then a leaf can change to an internal node - or the other way around. We do not know at present if this can cause - unnecessary assertion failures below. */ - - switch (level) { - case SYNC_NO_ORDER_CHECK: - case SYNC_EXTERN_STORAGE: - case SYNC_TREE_NODE_FROM_HASH: - /* Do no order checking */ - break; - case SYNC_TRX_SYS_HEADER: - if (srv_is_being_started) { - /* This is violated during trx_sys_create_rsegs() - when creating additional rollback segments when - upgrading in innobase_start_or_create_for_mysql(). */ - break; - } - case SYNC_MEM_POOL: - case SYNC_MEM_HASH: - case SYNC_RECV: - case SYNC_FTS_BG_THREADS: - case SYNC_WORK_QUEUE: - case SYNC_FTS_TOKENIZE: - case SYNC_FTS_OPTIMIZE: - case SYNC_FTS_CACHE: - case SYNC_FTS_CACHE_INIT: - case SYNC_LOG: - case SYNC_LOG_FLUSH_ORDER: - case SYNC_ANY_LATCH: - case SYNC_FILE_FORMAT_TAG: - case SYNC_DOUBLEWRITE: - case SYNC_SEARCH_SYS: - case SYNC_THREADS: - case SYNC_LOCK_SYS: - case SYNC_LOCK_WAIT_SYS: - case SYNC_TRX_SYS: - case SYNC_IBUF_BITMAP_MUTEX: - case SYNC_RSEG: - case SYNC_TRX_UNDO: - case SYNC_PURGE_LATCH: - case SYNC_PURGE_QUEUE: - case SYNC_DICT_AUTOINC_MUTEX: - case SYNC_DICT_OPERATION: - case SYNC_DICT_HEADER: - case SYNC_TRX_I_S_RWLOCK: - case SYNC_TRX_I_S_LAST_READ: - case SYNC_IBUF_MUTEX: - case SYNC_INDEX_ONLINE_LOG: - case SYNC_STATS_AUTO_RECALC: - case SYNC_STATS_DEFRAG: - if (!sync_thread_levels_g(array, level, TRUE)) { - fprintf(stderr, - "InnoDB: sync_thread_levels_g(array, %lu)" - " does not hold!\n", level); - ut_error; - } - break; - case SYNC_TRX: - /* Either the thread must own the lock_sys->mutex, or - it is allowed to own only ONE trx->mutex. */ - if (!sync_thread_levels_g(array, level, FALSE)) { - ut_a(sync_thread_levels_g(array, level - 1, TRUE)); - ut_a(sync_thread_levels_contain(array, SYNC_LOCK_SYS)); - } - break; - case SYNC_BUF_FLUSH_LIST: - case SYNC_BUF_POOL: - /* We can have multiple mutexes of this type therefore we - can only check whether the greater than condition holds. */ - if (!sync_thread_levels_g(array, level-1, TRUE)) { - fprintf(stderr, - "InnoDB: sync_thread_levels_g(array, %lu)" - " does not hold!\n", level-1); - ut_error; - } - break; - - - case SYNC_BUF_PAGE_HASH: - /* Multiple page_hash locks are only allowed during - buf_validate and that is where buf_pool mutex is already - held. */ - /* Fall through */ - - case SYNC_BUF_BLOCK: - /* Either the thread must own the buffer pool mutex - (buf_pool->mutex), or it is allowed to latch only ONE - buffer block (block->mutex or buf_pool->zip_mutex). */ - if (!sync_thread_levels_g(array, level, FALSE)) { - ut_a(sync_thread_levels_g(array, level - 1, TRUE)); - ut_a(sync_thread_levels_contain(array, SYNC_BUF_POOL)); - } - break; - case SYNC_REC_LOCK: - if (sync_thread_levels_contain(array, SYNC_LOCK_SYS)) { - ut_a(sync_thread_levels_g(array, SYNC_REC_LOCK - 1, - TRUE)); - } else { - ut_a(sync_thread_levels_g(array, SYNC_REC_LOCK, TRUE)); - } - break; - case SYNC_IBUF_BITMAP: - /* Either the thread must own the master mutex to all - the bitmap pages, or it is allowed to latch only ONE - bitmap page. */ - if (sync_thread_levels_contain(array, - SYNC_IBUF_BITMAP_MUTEX)) { - ut_a(sync_thread_levels_g(array, SYNC_IBUF_BITMAP - 1, - TRUE)); - } else { - /* This is violated during trx_sys_create_rsegs() - when creating additional rollback segments when - upgrading in innobase_start_or_create_for_mysql(). */ - ut_a(srv_is_being_started - || sync_thread_levels_g(array, SYNC_IBUF_BITMAP, - TRUE)); - } - break; - case SYNC_FSP_PAGE: - ut_a(sync_thread_levels_contain(array, SYNC_FSP)); - break; - case SYNC_FSP: - ut_a(sync_thread_levels_contain(array, SYNC_FSP) - || sync_thread_levels_g(array, SYNC_FSP, TRUE)); - break; - case SYNC_TRX_UNDO_PAGE: - /* Purge is allowed to read in as many UNDO pages as it likes, - there was a bogus rule here earlier that forced the caller to - acquire the purge_sys_t::mutex. The purge mutex did not really - protect anything because it was only ever acquired by the - single purge thread. The purge thread can read the UNDO pages - without any covering mutex. */ - - ut_a(sync_thread_levels_contain(array, SYNC_TRX_UNDO) - || sync_thread_levels_contain(array, SYNC_RSEG) - || sync_thread_levels_g(array, level - 1, TRUE)); - break; - case SYNC_RSEG_HEADER: - ut_a(sync_thread_levels_contain(array, SYNC_RSEG)); - break; - case SYNC_RSEG_HEADER_NEW: - ut_a(sync_thread_levels_contain(array, SYNC_FSP_PAGE)); - break; - case SYNC_TREE_NODE: - ut_a(sync_thread_levels_contain(array, SYNC_INDEX_TREE) - || sync_thread_levels_contain(array, SYNC_DICT_OPERATION) - || sync_thread_levels_g(array, SYNC_TREE_NODE - 1, TRUE)); - break; - case SYNC_TREE_NODE_NEW: - ut_a(sync_thread_levels_contain(array, SYNC_FSP_PAGE)); - break; - case SYNC_INDEX_TREE: - ut_a(sync_thread_levels_g(array, SYNC_TREE_NODE - 1, TRUE)); - break; - case SYNC_IBUF_TREE_NODE: - ut_a(sync_thread_levels_contain(array, SYNC_IBUF_INDEX_TREE) - || sync_thread_levels_g(array, SYNC_IBUF_TREE_NODE - 1, - TRUE)); - break; - case SYNC_IBUF_TREE_NODE_NEW: - /* ibuf_add_free_page() allocates new pages for the - change buffer while only holding the tablespace - x-latch. These pre-allocated new pages may only be - taken in use while holding ibuf_mutex, in - btr_page_alloc_for_ibuf(). */ - ut_a(sync_thread_levels_contain(array, SYNC_IBUF_MUTEX) - || sync_thread_levels_contain(array, SYNC_FSP)); - break; - case SYNC_IBUF_INDEX_TREE: - if (sync_thread_levels_contain(array, SYNC_FSP)) { - ut_a(sync_thread_levels_g(array, level - 1, TRUE)); - } else { - ut_a(sync_thread_levels_g( - array, SYNC_IBUF_TREE_NODE - 1, TRUE)); - } - break; - case SYNC_IBUF_PESS_INSERT_MUTEX: - ut_a(sync_thread_levels_g(array, SYNC_FSP - 1, TRUE)); - ut_a(!sync_thread_levels_contain(array, SYNC_IBUF_MUTEX)); - break; - case SYNC_IBUF_HEADER: - ut_a(sync_thread_levels_g(array, SYNC_FSP - 1, TRUE)); - ut_a(!sync_thread_levels_contain(array, SYNC_IBUF_MUTEX)); - ut_a(!sync_thread_levels_contain(array, - SYNC_IBUF_PESS_INSERT_MUTEX)); - break; - case SYNC_DICT: -#ifdef UNIV_DEBUG - ut_a(buf_debug_prints - || sync_thread_levels_g(array, SYNC_DICT, TRUE)); -#else /* UNIV_DEBUG */ - ut_a(sync_thread_levels_g(array, SYNC_DICT, TRUE)); -#endif /* UNIV_DEBUG */ - break; - default: - ut_error; - } - -levels_ok: - - sync_level.latch = latch; - sync_level.level = level; - array->push_back(sync_level); - - mutex_exit(&sync_thread_mutex); -} - -/******************************************************************//** -Removes a latch from the thread level array if it is found there. -@return TRUE if found in the array; it is no error if the latch is -not found, as we presently are not able to determine the level for -every latch reservation the program does */ -UNIV_INTERN -ibool -sync_thread_reset_level( -/*====================*/ - void* latch) /*!< in: pointer to a mutex or an rw-lock */ -{ - sync_arr_t* array; - sync_thread_t* thread_slot; - - if (!sync_order_checks_on) { - - return(FALSE); - } - - if ((latch == (void*) &sync_thread_mutex) - || (latch == (void*) &mutex_list_mutex) - || (latch == (void*) &rw_lock_debug_mutex) - || (latch == (void*) &rw_lock_list_mutex)) { - - return(FALSE); - } - - mutex_enter(&sync_thread_mutex); - - thread_slot = sync_thread_level_arrays_find_slot(); - - if (thread_slot == NULL) { - - ut_error; - - mutex_exit(&sync_thread_mutex); - return(FALSE); - } - - array = thread_slot->levels; - - for (std::vector<sync_level_t>::iterator it = array->begin(); it != array->end(); ++it) { - sync_level_t level = *it; - - if (level.latch != latch) { - continue; - } - - array->erase(it); - mutex_exit(&sync_thread_mutex); - return(TRUE); - } - - if (((ib_mutex_t*) latch)->magic_n != MUTEX_MAGIC_N) { - rw_lock_t* rw_lock; - - rw_lock = (rw_lock_t*) latch; - - if (rw_lock->level == SYNC_LEVEL_VARYING) { - mutex_exit(&sync_thread_mutex); - - return(TRUE); - } - } - - ut_error; - - mutex_exit(&sync_thread_mutex); - - return(FALSE); -} -#endif /* UNIV_SYNC_DEBUG */ - -/******************************************************************//** -Initializes the synchronization data structures. */ -UNIV_INTERN -void -sync_init(void) -/*===========*/ -{ - ut_a(sync_initialized == FALSE); - - sync_initialized = TRUE; - - sync_array_init(OS_THREAD_MAX_N); - -#ifdef UNIV_SYNC_DEBUG - /* Create the thread latch level array where the latch levels - are stored for each OS thread */ - - sync_thread_level_arrays = static_cast<sync_thread_t*>( - calloc(sizeof(sync_thread_t), OS_THREAD_MAX_N)); - - ut_a(sync_thread_level_arrays != NULL); - -#endif /* UNIV_SYNC_DEBUG */ - /* Init the mutex list and create the mutex to protect it. */ - - UT_LIST_INIT(mutex_list); - mutex_create(mutex_list_mutex_key, &mutex_list_mutex, - SYNC_NO_ORDER_CHECK); -#ifdef UNIV_SYNC_DEBUG - mutex_create(sync_thread_mutex_key, &sync_thread_mutex, - SYNC_NO_ORDER_CHECK); -#endif /* UNIV_SYNC_DEBUG */ - - /* Init the rw-lock list and create the mutex to protect it. */ - - UT_LIST_INIT(rw_lock_list); - mutex_create(rw_lock_list_mutex_key, &rw_lock_list_mutex, - SYNC_NO_ORDER_CHECK); - -#ifdef UNIV_SYNC_DEBUG - os_fast_mutex_init(rw_lock_debug_mutex_key, &rw_lock_debug_mutex); -#endif /* UNIV_SYNC_DEBUG */ -} - -#ifdef UNIV_SYNC_DEBUG -/******************************************************************//** -Frees all debug memory. */ -static -void -sync_thread_level_arrays_free(void) -/*===============================*/ - -{ - ulint i; - - for (i = 0; i < OS_THREAD_MAX_N; i++) { - sync_thread_t* slot; - - slot = &sync_thread_level_arrays[i]; - - /* If this slot was allocated then free the slot memory too. */ - if (slot->levels != NULL) { - delete slot->levels; - } - } - - free(sync_thread_level_arrays); - sync_thread_level_arrays = NULL; -} -#endif /* UNIV_SYNC_DEBUG */ - -/******************************************************************//** -Frees the resources in InnoDB's own synchronization data structures. Use -os_sync_free() after calling this. */ -UNIV_INTERN -void -sync_close(void) -/*===========*/ -{ - ib_mutex_t* mutex; - - sync_array_close(); - - for (mutex = UT_LIST_GET_FIRST(mutex_list); - mutex != NULL; - /* No op */) { - -#ifdef UNIV_MEM_DEBUG - if (mutex == &mem_hash_mutex) { - mutex = UT_LIST_GET_NEXT(list, mutex); - continue; - } -#endif /* UNIV_MEM_DEBUG */ - - mutex_free(mutex); - - mutex = UT_LIST_GET_FIRST(mutex_list); - } - - mutex_free(&mutex_list_mutex); -#ifdef UNIV_SYNC_DEBUG - mutex_free(&sync_thread_mutex); - - /* Switch latching order checks on in sync0sync.cc */ - sync_order_checks_on = FALSE; - - sync_thread_level_arrays_free(); - os_fast_mutex_free(&rw_lock_debug_mutex); -#endif /* UNIV_SYNC_DEBUG */ - - sync_initialized = FALSE; -} - -/*******************************************************************//** -Prints wait info of the sync system. */ -UNIV_INTERN -void -sync_print_wait_info( -/*=================*/ - FILE* file) /*!< in: file where to print */ -{ - fprintf(file, - "Mutex spin waits " UINT64PF ", rounds " UINT64PF ", " - "OS waits " UINT64PF "\n" - "RW-shared spins " UINT64PF ", rounds " UINT64PF ", " - "OS waits " UINT64PF "\n" - "RW-excl spins " UINT64PF ", rounds " UINT64PF ", " - "OS waits " UINT64PF "\n", - (ib_uint64_t) mutex_spin_wait_count, - (ib_uint64_t) mutex_spin_round_count, - (ib_uint64_t) mutex_os_wait_count, - (ib_uint64_t) rw_lock_stats.rw_s_spin_wait_count, - (ib_uint64_t) rw_lock_stats.rw_s_spin_round_count, - (ib_uint64_t) rw_lock_stats.rw_s_os_wait_count, - (ib_uint64_t) rw_lock_stats.rw_x_spin_wait_count, - (ib_uint64_t) rw_lock_stats.rw_x_spin_round_count, - (ib_uint64_t) rw_lock_stats.rw_x_os_wait_count); - - fprintf(file, - "Spin rounds per wait: %.2f mutex, %.2f RW-shared, " - "%.2f RW-excl\n", - (double) mutex_spin_round_count / - (mutex_spin_wait_count ? mutex_spin_wait_count : 1), - (double) rw_lock_stats.rw_s_spin_round_count / - (rw_lock_stats.rw_s_spin_wait_count - ? rw_lock_stats.rw_s_spin_wait_count : 1), - (double) rw_lock_stats.rw_x_spin_round_count / - (rw_lock_stats.rw_x_spin_wait_count - ? rw_lock_stats.rw_x_spin_wait_count : 1)); -} - -/*******************************************************************//** -Prints info of the sync system. */ -UNIV_INTERN -void -sync_print( -/*=======*/ - FILE* file) /*!< in: file where to print */ -{ -#ifdef UNIV_SYNC_DEBUG - mutex_list_print_info(file); - - rw_lock_list_print_info(file); -#endif /* UNIV_SYNC_DEBUG */ - - sync_array_print(file); - - sync_print_wait_info(file); + mutex_exit(&rw_lock_list_mutex); } |