diff options
author | Vasil Dimov <vasil.dimov@oracle.com> | 2010-04-12 18:20:41 +0300 |
---|---|---|
committer | Vasil Dimov <vasil.dimov@oracle.com> | 2010-04-12 18:20:41 +0300 |
commit | c877ff39bceb4df96acf3e54f7c98a2bed12b8ee (patch) | |
tree | 04211a3e5734b73e9f94cff511a4a74ff87075f0 /storage/innobase/include | |
parent | fe0828b3b8193e086abe740572c9b0cb2b7da671 (diff) | |
parent | 410e23a6af8b597cdda0890d6ed9008355edee7a (diff) | |
download | mariadb-git-c877ff39bceb4df96acf3e54f7c98a2bed12b8ee.tar.gz |
Import branches/innodb+ from SVN on top of storage/innobase.
Diffstat (limited to 'storage/innobase/include')
58 files changed, 2972 insertions, 495 deletions
diff --git a/storage/innobase/include/btr0btr.h b/storage/innobase/include/btr0btr.h index d5c8258513c..cc08cc620c5 100644 --- a/storage/innobase/include/btr0btr.h +++ b/storage/innobase/include/btr0btr.h @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved. +Copyright (c) 1994, 2010, Innobase Oy. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -68,19 +68,30 @@ enum btr_latch_mode { BTR_MODIFY_PREV = 36 }; +/* BTR_INSERT, BTR_DELETE and BTR_DELETE_MARK are mutually exclusive. */ + /** If this is ORed to btr_latch_mode, it means that the search tuple -will be inserted to the index, at the searched position */ +will be inserted to the index, at the searched position. +When the record is not in the buffer pool, try to use the insert buffer. */ #define BTR_INSERT 512 /** This flag ORed to btr_latch_mode says that we do the search in query optimization */ #define BTR_ESTIMATE 1024 -/** This flag ORed to btr_latch_mode says that we can ignore possible +/** This flag ORed to BTR_INSERT says that we can ignore possible UNIQUE definition on secondary indexes when we decide if we can use the insert buffer to speed up inserts */ #define BTR_IGNORE_SEC_UNIQUE 2048 +/** Try to delete mark the record at the searched position using the +insert/delete buffer when the record is not in the buffer pool. */ +#define BTR_DELETE_MARK 4096 + +/** Try to purge the record at the searched position using the insert/delete +buffer when the record is not in the buffer pool. */ +#define BTR_DELETE 8192 + /**************************************************************//** Gets the root node of a tree and x-latches it. @return root page, x-latched */ @@ -193,6 +204,10 @@ btr_leaf_page_release( mtr_t* mtr); /*!< in: mtr */ /**************************************************************//** Gets the child node file address in a node pointer. +NOTE: the offsets array must contain all offsets for the record since +we read the last field according to offsets and assume that it contains +the child page number. In other words offsets must have been retrieved +with rec_get_offsets(n_fields=ULINT_UNDEFINED). @return child node address */ UNIV_INLINE ulint @@ -317,12 +332,16 @@ Inserts a data tuple to a tree on a non-leaf level. It is assumed that mtr holds an x-latch on the tree. */ UNIV_INTERN void -btr_insert_on_non_leaf_level( -/*=========================*/ +btr_insert_on_non_leaf_level_func( +/*==============================*/ dict_index_t* index, /*!< in: index */ ulint level, /*!< in: level, must be > 0 */ dtuple_t* tuple, /*!< in: the record to be inserted */ + const char* file, /*!< in: file name */ + ulint line, /*!< in: line where called */ mtr_t* mtr); /*!< in: mtr */ +# define btr_insert_on_non_leaf_level(i,l,t,m) \ + btr_insert_on_non_leaf_level_func(i,l,t,__FILE__,__LINE__,m) #endif /* !UNIV_HOTBACKUP */ /****************************************************************//** Sets a record as the predefined minimum record. */ diff --git a/storage/innobase/include/btr0btr.ic b/storage/innobase/include/btr0btr.ic index 2259d22c9a6..97944cc2e26 100644 --- a/storage/innobase/include/btr0btr.ic +++ b/storage/innobase/include/btr0btr.ic @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved. +Copyright (c) 1994, 2010, Innobase Oy. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -255,6 +255,10 @@ btr_page_set_prev( /**************************************************************//** Gets the child node file address in a node pointer. +NOTE: the offsets array must contain all offsets for the record since +we read the last field according to offsets and assume that it contains +the child page number. In other words offsets must have been retrieved +with rec_get_offsets(n_fields=ULINT_UNDEFINED). @return child node address */ UNIV_INLINE ulint diff --git a/storage/innobase/include/btr0cur.h b/storage/innobase/include/btr0cur.h index 480a3877e54..136d2d068a1 100644 --- a/storage/innobase/include/btr0cur.h +++ b/storage/innobase/include/btr0cur.h @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved. +Copyright (c) 1994, 2010, Innobase Oy. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -138,7 +138,8 @@ btr_cur_search_to_nth_level( should always be made using PAGE_CUR_LE to search the position! */ ulint latch_mode, /*!< in: BTR_SEARCH_LEAF, ..., ORed with - BTR_INSERT and BTR_ESTIMATE; + at most one of BTR_INSERT, BTR_DELETE_MARK, + BTR_DELETE, or BTR_ESTIMATE; cursor->left_block is used to store a pointer to the left neighbor page, in the cases BTR_SEARCH_PREV and BTR_MODIFY_PREV; @@ -152,29 +153,39 @@ btr_cur_search_to_nth_level( ulint has_search_latch,/*!< in: latch mode the caller currently has on btr_search_latch: RW_S_LATCH, or 0 */ + const char* file, /*!< in: file name */ + ulint line, /*!< in: line where called */ mtr_t* mtr); /*!< in: mtr */ /*****************************************************************//** Opens a cursor at either end of an index. */ UNIV_INTERN void -btr_cur_open_at_index_side( -/*=======================*/ +btr_cur_open_at_index_side_func( +/*============================*/ ibool from_left, /*!< in: TRUE if open to the low end, FALSE if to the high end */ dict_index_t* index, /*!< in: index */ ulint latch_mode, /*!< in: latch mode */ btr_cur_t* cursor, /*!< in: cursor */ + const char* file, /*!< in: file name */ + ulint line, /*!< in: line where called */ mtr_t* mtr); /*!< in: mtr */ +#define btr_cur_open_at_index_side(f,i,l,c,m) \ + btr_cur_open_at_index_side_func(f,i,l,c,__FILE__,__LINE__,m) /**********************************************************************//** Positions a cursor at a randomly chosen position within a B-tree. */ UNIV_INTERN void -btr_cur_open_at_rnd_pos( -/*====================*/ +btr_cur_open_at_rnd_pos_func( +/*=========================*/ dict_index_t* index, /*!< in: index */ ulint latch_mode, /*!< in: BTR_SEARCH_LEAF, ... */ btr_cur_t* cursor, /*!< in/out: B-tree cursor */ + const char* file, /*!< in: file name */ + ulint line, /*!< in: line where called */ mtr_t* mtr); /*!< in: mtr */ +#define btr_cur_open_at_rnd_pos(i,l,c,m) \ + btr_cur_open_at_rnd_pos_func(i,l,c,__FILE__,__LINE__,m) /*************************************************************//** Tries to perform an insert to a page in an index tree, next to cursor. It is assumed that mtr holds an x-latch on the page. The operation does @@ -322,19 +333,6 @@ btr_cur_del_mark_set_sec_rec( ibool val, /*!< in: value to set */ que_thr_t* thr, /*!< in: query thread */ mtr_t* mtr); /*!< in: mtr */ -/***********************************************************//** -Clear a secondary index record's delete mark. This function is only -used by the insert buffer insert merge mechanism. */ -UNIV_INTERN -void -btr_cur_del_unmark_for_ibuf( -/*========================*/ - rec_t* rec, /*!< in/out: record to delete unmark */ - page_zip_des_t* page_zip, /*!< in/out: compressed page - corresponding to rec, or NULL - when the tablespace is - uncompressed */ - mtr_t* mtr); /*!< in: mtr */ /*************************************************************//** Tries to compress a page of the tree if it seems useful. It is assumed that mtr holds an x-latch on the tree and on the cursor page. To avoid @@ -586,7 +584,20 @@ btr_push_update_extern_fields( const upd_t* update, /*!< in: update vector */ mem_heap_t* heap) /*!< in: memory heap */ __attribute__((nonnull)); - +/***********************************************************//** +Sets a secondary index record's delete mark to the given value. This +function is only used by the insert buffer merge mechanism. */ +UNIV_INTERN +void +btr_cur_set_deleted_flag_for_ibuf( +/*==============================*/ + rec_t* rec, /*!< in/out: record */ + page_zip_des_t* page_zip, /*!< in/out: compressed page + corresponding to rec, or NULL + when the tablespace is + uncompressed */ + ibool val, /*!< in: value to set */ + mtr_t* mtr); /*!< in: mtr */ /*######################################################################*/ /** In the pessimistic delete, if the page data size drops below this @@ -618,8 +629,13 @@ enum btr_cur_method { hash_node, and might be necessary to update */ BTR_CUR_BINARY, /*!< success using the binary search */ - BTR_CUR_INSERT_TO_IBUF /*!< performed the intended insert to + BTR_CUR_INSERT_TO_IBUF, /*!< performed the intended insert to the insert buffer */ + BTR_CUR_DEL_MARK_IBUF, /*!< performed the intended delete + mark in the insert/delete buffer */ + BTR_CUR_DELETE_IBUF, /*!< performed the intended delete in + the insert/delete buffer */ + BTR_CUR_DELETE_REF /*!< row_purge_poss_sec() failed */ }; /** The tree cursor: the definition appears here only for the compiler @@ -627,6 +643,7 @@ to know struct size! */ struct btr_cur_struct { dict_index_t* index; /*!< index where positioned */ page_cur_t page_cur; /*!< page cursor */ + purge_node_t* purge_node; /*!< purge node, for BTR_DELETE */ buf_block_t* left_block; /*!< this field is used to store a pointer to the left neighbor page, in the cases @@ -683,6 +700,23 @@ struct btr_cur_struct { NULL */ ulint fold; /*!< fold value used in the search if flag is BTR_CUR_HASH */ + /*----- Delete buffering -------*/ + ulint ibuf_cnt; /* in searches done on insert buffer + trees, this contains the "counter" + value (the first two bytes of the + fourth field) extracted from the + page above the leaf page, from the + father node pointer that pointed to + the leaf page. in other words, it + contains the minimum counter value + for records to be inserted on the + chosen leaf page. If for some reason + this can't be read, or if the search + ended on the leftmost leaf page in + the tree (in which case the father + node pointer had the 'minimum + record' flag set), this is + ULINT_UNDEFINED. */ /*------------------------------*/ /* @} */ btr_path_t* path_arr; /*!< in estimating the number of diff --git a/storage/innobase/include/btr0pcur.h b/storage/innobase/include/btr0pcur.h index 12b1375d8b7..2334a266280 100644 --- a/storage/innobase/include/btr0pcur.h +++ b/storage/innobase/include/btr0pcur.h @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved. +Copyright (c) 1996, 2010, Innobase Oy. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -82,8 +82,8 @@ Initializes and opens a persistent cursor to an index tree. It should be closed with btr_pcur_close. */ UNIV_INLINE void -btr_pcur_open( -/*==========*/ +btr_pcur_open_func( +/*===============*/ dict_index_t* index, /*!< in: index */ const dtuple_t* tuple, /*!< in: tuple on which search done */ ulint mode, /*!< in: PAGE_CUR_L, ...; @@ -94,14 +94,18 @@ btr_pcur_open( record! */ ulint latch_mode,/*!< in: BTR_SEARCH_LEAF, ... */ btr_pcur_t* cursor, /*!< in: memory buffer for persistent cursor */ + const char* file, /*!< in: file name */ + ulint line, /*!< in: line where called */ mtr_t* mtr); /*!< in: mtr */ +#define btr_pcur_open(i,t,md,l,c,m) \ + btr_pcur_open_func(i,t,md,l,c,__FILE__,__LINE__,m) /**************************************************************//** Opens an persistent cursor to an index tree without initializing the cursor. */ UNIV_INLINE void -btr_pcur_open_with_no_init( -/*=======================*/ +btr_pcur_open_with_no_init_func( +/*============================*/ dict_index_t* index, /*!< in: index */ const dtuple_t* tuple, /*!< in: tuple on which search done */ ulint mode, /*!< in: PAGE_CUR_L, ...; @@ -119,7 +123,12 @@ btr_pcur_open_with_no_init( ulint has_search_latch,/*!< in: latch mode the caller currently has on btr_search_latch: RW_S_LATCH, or 0 */ + const char* file, /*!< in: file name */ + ulint line, /*!< in: line where called */ mtr_t* mtr); /*!< in: mtr */ +#define btr_pcur_open_with_no_init(ix,t,md,l,cur,has,m) \ + btr_pcur_open_with_no_init_func(ix,t,md,l,cur,has,__FILE__,__LINE__,m) + /*****************************************************************//** Opens a persistent cursor at either end of an index. */ UNIV_INLINE @@ -160,8 +169,8 @@ before first in tree. The latching mode must be BTR_SEARCH_LEAF or BTR_MODIFY_LEAF. */ UNIV_INTERN void -btr_pcur_open_on_user_rec( -/*======================*/ +btr_pcur_open_on_user_rec_func( +/*===========================*/ dict_index_t* index, /*!< in: index */ const dtuple_t* tuple, /*!< in: tuple on which search done */ ulint mode, /*!< in: PAGE_CUR_L, ... */ @@ -169,17 +178,25 @@ btr_pcur_open_on_user_rec( BTR_MODIFY_LEAF */ btr_pcur_t* cursor, /*!< in: memory buffer for persistent cursor */ + const char* file, /*!< in: file name */ + ulint line, /*!< in: line where called */ mtr_t* mtr); /*!< in: mtr */ +#define btr_pcur_open_on_user_rec(i,t,md,l,c,m) \ + btr_pcur_open_on_user_rec_func(i,t,md,l,c,__FILE__,__LINE__,m) /**********************************************************************//** Positions a cursor at a randomly chosen position within a B-tree. */ UNIV_INLINE void -btr_pcur_open_at_rnd_pos( -/*=====================*/ +btr_pcur_open_at_rnd_pos_func( +/*==========================*/ dict_index_t* index, /*!< in: index */ ulint latch_mode, /*!< in: BTR_SEARCH_LEAF, ... */ btr_pcur_t* cursor, /*!< in/out: B-tree pcur */ + const char* file, /*!< in: file name */ + ulint line, /*!< in: line where called */ mtr_t* mtr); /*!< in: mtr */ +#define btr_pcur_open_at_rnd_pos(i,l,c,m) \ + btr_pcur_open_at_rnd_pos_func(i,l,c,__FILE__,__LINE__,m) /**************************************************************//** Frees the possible old_rec_buf buffer of a persistent cursor and sets the latch mode of the persistent cursor to BTR_NO_LATCHES. */ @@ -218,11 +235,15 @@ record and it can be restored on a user record whose ordering fields are identical to the ones of the original user record */ UNIV_INTERN ibool -btr_pcur_restore_position( -/*======================*/ +btr_pcur_restore_position_func( +/*===========================*/ ulint latch_mode, /*!< in: BTR_SEARCH_LEAF, ... */ btr_pcur_t* cursor, /*!< in: detached persistent cursor */ + const char* file, /*!< in: file name */ + ulint line, /*!< in: line where called */ mtr_t* mtr); /*!< in: mtr */ +#define btr_pcur_restore_position(l,cur,mtr) \ + btr_pcur_restore_position_func(l,cur,__FILE__,__LINE__,mtr) /**************************************************************//** If the latch mode of the cursor is BTR_LEAF_SEARCH or BTR_LEAF_MODIFY, releases the page latch and bufferfix reserved by the cursor. @@ -260,20 +281,13 @@ btr_pcur_get_mtr( /*=============*/ btr_pcur_t* cursor); /*!< in: persistent cursor */ /**************************************************************//** -Commits the pcur mtr and sets the pcur latch mode to BTR_NO_LATCHES, +Commits the mtr and sets the pcur latch mode to BTR_NO_LATCHES, that is, the cursor becomes detached. If there have been modifications to the page where pcur is positioned, this can be used instead of btr_pcur_release_leaf. Function btr_pcur_store_position should be used before calling this, if restoration of cursor is wanted later. */ UNIV_INLINE void -btr_pcur_commit( -/*============*/ - btr_pcur_t* pcur); /*!< in: persistent cursor */ -/**************************************************************//** -Differs from btr_pcur_commit in that we can specify the mtr to commit. */ -UNIV_INLINE -void btr_pcur_commit_specify_mtr( /*========================*/ btr_pcur_t* pcur, /*!< in: persistent cursor */ diff --git a/storage/innobase/include/btr0pcur.ic b/storage/innobase/include/btr0pcur.ic index 0ca7223f861..0c38797e6c5 100644 --- a/storage/innobase/include/btr0pcur.ic +++ b/storage/innobase/include/btr0pcur.ic @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved. +Copyright (c) 1996, 2010, Innobase Oy. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -395,30 +395,13 @@ btr_pcur_move_to_next( } /**************************************************************//** -Commits the pcur mtr and sets the pcur latch mode to BTR_NO_LATCHES, +Commits the mtr and sets the pcur latch mode to BTR_NO_LATCHES, that is, the cursor becomes detached. If there have been modifications to the page where pcur is positioned, this can be used instead of btr_pcur_release_leaf. Function btr_pcur_store_position should be used before calling this, if restoration of cursor is wanted later. */ UNIV_INLINE void -btr_pcur_commit( -/*============*/ - btr_pcur_t* pcur) /*!< in: persistent cursor */ -{ - ut_a(pcur->pos_state == BTR_PCUR_IS_POSITIONED); - - pcur->latch_mode = BTR_NO_LATCHES; - - mtr_commit(pcur->mtr); - - pcur->pos_state = BTR_PCUR_WAS_POSITIONED; -} - -/**************************************************************//** -Differs from btr_pcur_commit in that we can specify the mtr to commit. */ -UNIV_INLINE -void btr_pcur_commit_specify_mtr( /*========================*/ btr_pcur_t* pcur, /*!< in: persistent cursor */ @@ -483,8 +466,8 @@ Initializes and opens a persistent cursor to an index tree. It should be closed with btr_pcur_close. */ UNIV_INLINE void -btr_pcur_open( -/*==========*/ +btr_pcur_open_func( +/*===============*/ dict_index_t* index, /*!< in: index */ const dtuple_t* tuple, /*!< in: tuple on which search done */ ulint mode, /*!< in: PAGE_CUR_L, ...; @@ -495,6 +478,8 @@ btr_pcur_open( record! */ ulint latch_mode,/*!< in: BTR_SEARCH_LEAF, ... */ btr_pcur_t* cursor, /*!< in: memory buffer for persistent cursor */ + const char* file, /*!< in: file name */ + ulint line, /*!< in: line where called */ mtr_t* mtr) /*!< in: mtr */ { btr_cur_t* btr_cursor; @@ -511,7 +496,7 @@ btr_pcur_open( btr_cursor = btr_pcur_get_btr_cur(cursor); btr_cur_search_to_nth_level(index, 0, tuple, mode, latch_mode, - btr_cursor, 0, mtr); + btr_cursor, 0, file, line, mtr); cursor->pos_state = BTR_PCUR_IS_POSITIONED; cursor->trx_if_known = NULL; @@ -522,8 +507,8 @@ Opens an persistent cursor to an index tree without initializing the cursor. */ UNIV_INLINE void -btr_pcur_open_with_no_init( -/*=======================*/ +btr_pcur_open_with_no_init_func( +/*============================*/ dict_index_t* index, /*!< in: index */ const dtuple_t* tuple, /*!< in: tuple on which search done */ ulint mode, /*!< in: PAGE_CUR_L, ...; @@ -541,6 +526,8 @@ btr_pcur_open_with_no_init( ulint has_search_latch,/*!< in: latch mode the caller currently has on btr_search_latch: RW_S_LATCH, or 0 */ + const char* file, /*!< in: file name */ + ulint line, /*!< in: line where called */ mtr_t* mtr) /*!< in: mtr */ { btr_cur_t* btr_cursor; @@ -553,7 +540,8 @@ btr_pcur_open_with_no_init( btr_cursor = btr_pcur_get_btr_cur(cursor); btr_cur_search_to_nth_level(index, 0, tuple, mode, latch_mode, - btr_cursor, has_search_latch, mtr); + btr_cursor, has_search_latch, + file, line, mtr); cursor->pos_state = BTR_PCUR_IS_POSITIONED; cursor->old_stored = BTR_PCUR_OLD_NOT_STORED; @@ -600,11 +588,13 @@ btr_pcur_open_at_index_side( Positions a cursor at a randomly chosen position within a B-tree. */ UNIV_INLINE void -btr_pcur_open_at_rnd_pos( -/*=====================*/ +btr_pcur_open_at_rnd_pos_func( +/*==========================*/ dict_index_t* index, /*!< in: index */ ulint latch_mode, /*!< in: BTR_SEARCH_LEAF, ... */ btr_pcur_t* cursor, /*!< in/out: B-tree pcur */ + const char* file, /*!< in: file name */ + ulint line, /*!< in: line where called */ mtr_t* mtr) /*!< in: mtr */ { /* Initialize the cursor */ @@ -614,8 +604,9 @@ btr_pcur_open_at_rnd_pos( btr_pcur_init(cursor); - btr_cur_open_at_rnd_pos(index, latch_mode, - btr_pcur_get_btr_cur(cursor), mtr); + btr_cur_open_at_rnd_pos_func(index, latch_mode, + btr_pcur_get_btr_cur(cursor), + file, line, mtr); cursor->pos_state = BTR_PCUR_IS_POSITIONED; cursor->old_stored = BTR_PCUR_OLD_NOT_STORED; diff --git a/storage/innobase/include/buf0buf.h b/storage/innobase/include/buf0buf.h index 927ff893e39..62e4f54559a 100644 --- a/storage/innobase/include/buf0buf.h +++ b/storage/innobase/include/buf0buf.h @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved. +Copyright (c) 1995, 2010, Innobase Oy. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -34,6 +34,7 @@ Created 11/5/1995 Heikki Tuuri #include "ut0byte.h" #include "page0types.h" #ifndef UNIV_HOTBACKUP +#include "ut0rbt.h" #include "os0proc.h" /** @name Modes for buf_page_get_gen */ @@ -46,6 +47,10 @@ Created 11/5/1995 Heikki Tuuri it is error-prone programming not to set a latch, and it should be used with care */ +#define BUF_GET_IF_IN_POOL_OR_WATCH 15 + /*!< Get the page only if it's in the + buffer pool, if not then set a watch + on the page. */ /* @} */ /** @name Modes for buf_page_get_known_nowait */ /* @{ */ @@ -81,6 +86,8 @@ The enumeration values must be 0..7. */ enum buf_page_state { BUF_BLOCK_ZIP_FREE = 0, /*!< contains a free compressed page */ + BUF_BLOCK_POOL_WATCH = 0, /*!< a sentinel for the buffer pool + watch, element of buf_pool_watch[] */ BUF_BLOCK_ZIP_PAGE, /*!< contains a clean compressed page */ BUF_BLOCK_ZIP_DIRTY, /*!< contains a compressed @@ -202,20 +209,14 @@ with care. */ #define buf_page_get_with_no_latch(SP, ZS, OF, MTR) buf_page_get_gen(\ SP, ZS, OF, RW_NO_LATCH, NULL,\ BUF_GET_NO_LATCH, __FILE__, __LINE__, MTR) -/**************************************************************//** -NOTE! The following macros should be used instead of -buf_page_optimistic_get_func, to improve debugging. Only values RW_S_LATCH and -RW_X_LATCH are allowed as LA! */ -#define buf_page_optimistic_get(LA, BL, MC, MTR) \ - buf_page_optimistic_get_func(LA, BL, MC, __FILE__, __LINE__, MTR) /********************************************************************//** This is the general function used to get optimistic access to a database page. @return TRUE if success */ UNIV_INTERN ibool -buf_page_optimistic_get_func( -/*=========================*/ +buf_page_optimistic_get( +/*====================*/ ulint rw_latch,/*!< in: RW_S_LATCH, RW_X_LATCH */ buf_block_t* block, /*!< in: guessed block */ ib_uint64_t modify_clock,/*!< in: modify clock value if mode is @@ -291,7 +292,8 @@ buf_page_get_gen( ulint rw_latch,/*!< in: RW_S_LATCH, RW_X_LATCH, RW_NO_LATCH */ buf_block_t* guess, /*!< in: guessed block or NULL */ ulint mode, /*!< in: BUF_GET, BUF_GET_IF_IN_POOL, - BUF_GET_NO_LATCH */ + BUF_GET_NO_LATCH or + BUF_GET_IF_IN_POOL_OR_WATCH */ const char* file, /*!< in: file name */ ulint line, /*!< in: line where called */ mtr_t* mtr); /*!< in: mini-transaction */ @@ -341,9 +343,8 @@ void buf_page_release( /*=============*/ buf_block_t* block, /*!< in: buffer block */ - ulint rw_latch, /*!< in: RW_S_LATCH, RW_X_LATCH, + ulint rw_latch); /*!< in: RW_S_LATCH, RW_X_LATCH, RW_NO_LATCH */ - mtr_t* mtr); /*!< in: mtr */ /********************************************************************//** Moves a page to the start of the buffer pool LRU list. This high-level function can be used to prevent an important page from slipping out of @@ -995,6 +996,16 @@ Returns the control block of a file page, NULL if not found. @return block, NULL if not found */ UNIV_INLINE buf_page_t* +buf_page_hash_get_low( +/*==================*/ + ulint space, /*!< in: space id */ + ulint offset, /*!< in: offset of the page within space */ + ulint fold); /*!< in: buf_page_address_fold(space, offset) */ +/******************************************************************//** +Returns the control block of a file page, NULL if not found. +@return block, NULL if not found or not a real control block */ +UNIV_INLINE +buf_page_t* buf_page_hash_get( /*==============*/ ulint space, /*!< in: space id */ @@ -1016,8 +1027,49 @@ UNIV_INTERN ulint buf_get_free_list_len(void); /*=======================*/ -#endif /* !UNIV_HOTBACKUP */ +/******************************************************************** +Determine if a block is a sentinel for a buffer pool watch. +@return TRUE if a sentinel for a buffer pool watch, FALSE if not */ +UNIV_INTERN +ibool +buf_pool_watch_is( +/*==============*/ + const buf_page_t* bpage) /*!< in: block */ + __attribute__((nonnull, warn_unused_result)); +/****************************************************************//** +Add watch for the given page to be read in. Caller must have the buffer pool +@return NULL if watch set, block if the page is in the buffer pool */ +UNIV_INTERN +buf_page_t* +buf_pool_watch_set( +/*===============*/ + ulint space, /*!< in: space id */ + ulint offset, /*!< in: page number */ + ulint fold) /*!< in: buf_page_address_fold(space, offset) */ + __attribute__((warn_unused_result)); +/****************************************************************//** +Stop watching if the page has been read in. +buf_pool_watch_set(space,offset) must have returned NULL before. */ +UNIV_INTERN +void +buf_pool_watch_unset( +/*=================*/ + ulint space, /*!< in: space id */ + ulint offset);/*!< in: page number */ +/****************************************************************//** +Check if the page has been read in. +This may only be called after buf_pool_watch_set(space,offset) +has returned NULL and before invoking buf_pool_watch_unset(space,offset). +@return FALSE if the given page was not read in, TRUE if it was */ +UNIV_INTERN +ibool +buf_pool_watch_occurred( +/*====================*/ + ulint space, /*!< in: space id */ + ulint offset) /*!< in: page number */ + __attribute__((warn_unused_result)); +#endif /* !UNIV_HOTBACKUP */ /** The common buffer control block structure for compressed and uncompressed frames */ @@ -1057,7 +1109,10 @@ struct buf_page_struct{ #endif /* !UNIV_HOTBACKUP */ page_zip_des_t zip; /*!< compressed page; zip.data (but not the data it points to) is - also protected by buf_pool_mutex */ + also protected by buf_pool_mutex; + state == BUF_BLOCK_ZIP_PAGE and + zip.data == NULL means an active + buf_pool_watch */ #ifndef UNIV_HOTBACKUP buf_page_t* hash; /*!< node used in chaining to buf_pool->page_hash or @@ -1073,8 +1128,9 @@ struct buf_page_struct{ UT_LIST_NODE_T(buf_page_t) list; /*!< based on state, this is a - list node, protected only by - buf_pool_mutex, in one of the + list node, protected either by + buf_pool_mutex or by + flush_list_mutex, in one of the following lists in buf_pool: - BUF_BLOCK_NOT_USED: free @@ -1083,6 +1139,12 @@ struct buf_page_struct{ - BUF_BLOCK_ZIP_PAGE: zip_clean - BUF_BLOCK_ZIP_FREE: zip_free[] + If bpage is part of flush_list + then the node pointers are + covered by flush_list_mutex. + Otherwise these pointers are + protected by buf_pool_mutex. + The contents of the list node is undefined if !in_flush_list && state == BUF_BLOCK_FILE_PAGE, @@ -1093,10 +1155,15 @@ struct buf_page_struct{ #ifdef UNIV_DEBUG ibool in_flush_list; /*!< TRUE if in buf_pool->flush_list; - when buf_pool_mutex is free, the + when flush_list_mutex is free, the following should hold: in_flush_list == (state == BUF_BLOCK_FILE_PAGE - || state == BUF_BLOCK_ZIP_DIRTY) */ + || state == BUF_BLOCK_ZIP_DIRTY) + Writes to this field must be + covered by both block->mutex + and flush_list_mutex. Hence + reads can happen while holding + any one of the two mutexes */ ibool in_free_list; /*!< TRUE if in buf_pool->free; when buf_pool_mutex is free, the following should hold: in_free_list @@ -1106,7 +1173,8 @@ struct buf_page_struct{ /*!< log sequence number of the youngest modification to this block, zero if not - modified */ + modified. Protected by block + mutex */ ib_uint64_t oldest_modification; /*!< log sequence number of the START of the log entry @@ -1114,7 +1182,12 @@ struct buf_page_struct{ modification to this block which has not yet been flushed on disk; zero if all - modifications are on disk */ + modifications are on disk. + Writes to this field must be + covered by both block->mutex + and flush_list_mutex. Hence + reads can happen while holding + any one of the two mutexes */ /* @} */ /** @name LRU replacement algorithm fields These fields are protected by buf_pool_mutex only (not @@ -1185,15 +1258,21 @@ struct buf_block_struct{ rw_lock_t lock; /*!< read-write lock of the buffer frame */ unsigned lock_hash_val:32;/*!< hashed value of the page address - in the record lock hash table */ - unsigned check_index_page_at_flush:1; + in the record lock hash table; + protected by buf_block_t::lock + (or buf_block_t::mutex, buf_pool_mutex + in buf_page_get_gen(), + buf_page_init_for_read() + and buf_page_create()) */ + ibool check_index_page_at_flush; /*!< TRUE if we know that this is an index page, and want the database to check its consistency before flush; note that there may be pages in the buffer pool which are index pages, but this flag is not set because - we do not keep track of all pages */ + we do not keep track of all pages; + NOT protected by any mutex */ /* @} */ /** @name Optimistic search field */ /* @{ */ @@ -1346,6 +1425,21 @@ struct buf_pool_struct{ /* @{ */ + mutex_t flush_list_mutex;/*!< mutex protecting the + flush list access. This mutex + protects flush_list, flush_rbt + and bpage::list pointers when + the bpage is on flush_list. It + also protects writes to + bpage::oldest_modification */ + mutex_t flush_order_mutex;/*!< mutex to serialize access to + the flush list when we are putting + dirty blocks in the list. The idea + behind this mutex is to be able + to release log_sys->mutex during + mtr_commit and still ensure that + insertions in the flush_list happen + in the LSN order. */ UT_LIST_BASE_NODE_T(buf_page_t) flush_list; /*!< base node of the modified block list */ @@ -1359,6 +1453,20 @@ struct buf_pool_struct{ /*!< this is in the set state when there is no flush batch of the given type running */ + ib_rbt_t* flush_rbt; /*!< a red-black tree is used + exclusively during recovery to + speed up insertions in the + flush_list. This tree contains + blocks in order of + oldest_modification LSN and is + kept in sync with the + flush_list. + Each member of the tree MUST + also be on the flush_list. + This tree is relevant only in + recovery and is set to NULL + once the recovery is over. + Protected by flush_list_mutex */ ulint freed_page_clock;/*!< a sequence number used to count the number of buffer blocks removed from the end of @@ -1372,8 +1480,8 @@ struct buf_pool_struct{ this is incremented by one; this is set to zero when a buffer block is allocated */ - /* @} */ + /** @name LRU replacement algorithm fields */ /* @{ */ @@ -1439,6 +1547,31 @@ Use these instead of accessing buf_pool_mutex directly. */ mutex_enter(&buf_pool_mutex); \ } while (0) +/** Test if flush list mutex is owned. */ +#define buf_flush_list_mutex_own() mutex_own(&buf_pool->flush_list_mutex) + +/** Acquire the flush list mutex. */ +#define buf_flush_list_mutex_enter() do { \ + mutex_enter(&buf_pool->flush_list_mutex); \ +} while (0) +/** Release the flush list mutex. */ +# define buf_flush_list_mutex_exit() do { \ + mutex_exit(&buf_pool->flush_list_mutex); \ +} while (0) + +/** Test if flush order mutex is owned. */ +#define buf_flush_order_mutex_own() mutex_own(&buf_pool->flush_order_mutex) + +/** Acquire the flush order mutex. */ +#define buf_flush_order_mutex_enter() do { \ + mutex_enter(&buf_pool->flush_order_mutex); \ +} while (0) +/** Release the flush order mutex. */ +# define buf_flush_order_mutex_exit() do { \ + mutex_exit(&buf_pool->flush_order_mutex); \ +} while (0) + + #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG /** Flag to forbid the release of the buffer pool mutex. Protected by buf_pool_mutex. */ diff --git a/storage/innobase/include/buf0buf.ic b/storage/innobase/include/buf0buf.ic index 0f92a59a1c7..b9a9662fdc5 100644 --- a/storage/innobase/include/buf0buf.ic +++ b/storage/innobase/include/buf0buf.ic @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved. +Copyright (c) 1995, 2010, Innobase Oy. All Rights Reserved. Copyright (c) 2008, Google Inc. Portions of this file contain modifications contributed and copyrighted by @@ -81,7 +81,7 @@ buf_page_peek_if_too_old( unsigned access_time = buf_page_is_accessed(bpage); if (access_time > 0 - && (ut_time_ms() - access_time) + && ((ib_uint32_t) (ut_time_ms() - access_time)) >= buf_LRU_old_threshold_ms) { return(TRUE); } @@ -121,7 +121,7 @@ buf_pool_get_oldest_modification(void) buf_page_t* bpage; ib_uint64_t lsn; - buf_pool_mutex_enter(); + buf_flush_list_mutex_enter(); bpage = UT_LIST_GET_LAST(buf_pool->flush_list); @@ -132,7 +132,7 @@ buf_pool_get_oldest_modification(void) lsn = bpage->oldest_modification; } - buf_pool_mutex_exit(); + buf_flush_list_mutex_exit(); /* The returned answer may be out of date: the flush_list can change after the mutex has been released. */ @@ -705,6 +705,12 @@ buf_block_get_lock_hash_val( /*========================*/ const buf_block_t* block) /*!< in: block */ { + ut_ad(block); + ut_ad(buf_page_in_file(&block->page)); +#ifdef UNIV_SYNC_DEBUG + ut_ad(rw_lock_own(&(((buf_block_t*) block)->lock), RW_LOCK_EXCLUSIVE) + || rw_lock_own(&(((buf_block_t*) block)->lock), RW_LOCK_SHARED)); +#endif /* UNIV_SYNC_DEBUG */ return(block->lock_hash_val); } @@ -902,21 +908,20 @@ Returns the control block of a file page, NULL if not found. @return block, NULL if not found */ UNIV_INLINE buf_page_t* -buf_page_hash_get( -/*==============*/ +buf_page_hash_get_low( +/*==================*/ ulint space, /*!< in: space id */ - ulint offset) /*!< in: offset of the page within space */ + ulint offset, /*!< in: offset of the page within space */ + ulint fold) /*!< in: buf_page_address_fold(space, offset) */ { buf_page_t* bpage; - ulint fold; ut_ad(buf_pool); ut_ad(buf_pool_mutex_own()); + ut_ad(fold == buf_page_address_fold(space, offset)); /* Look for the page in the hash table */ - fold = buf_page_address_fold(space, offset); - HASH_SEARCH(hash, buf_pool->page_hash, fold, buf_page_t*, bpage, ut_ad(bpage->in_page_hash && !bpage->in_zip_hash && buf_page_in_file(bpage)), @@ -932,6 +937,26 @@ buf_page_hash_get( } /******************************************************************//** +Returns the control block of a file page, NULL if not found. +@return block, NULL if not found or not a real control block */ +UNIV_INLINE +buf_page_t* +buf_page_hash_get( +/*==============*/ + ulint space, /*!< in: space id */ + ulint offset) /*!< in: offset of the page within space */ +{ + ulint fold = buf_page_address_fold(space, offset); + buf_page_t* bpage = buf_page_hash_get_low(space, offset, fold); + + if (bpage && UNIV_UNLIKELY(buf_pool_watch_is(bpage))) { + bpage = NULL; + } + + return(bpage); +} + +/******************************************************************//** Returns the control block of a file page, NULL if not found or an uncompressed page frame does not exist. @return block, NULL if not found */ @@ -942,7 +967,11 @@ buf_block_hash_get( ulint space, /*!< in: space id */ ulint offset) /*!< in: offset of the page within space */ { - return(buf_page_get_block(buf_page_hash_get(space, offset))); + buf_block_t* block; + + block = buf_page_get_block(buf_page_hash_get(space, offset)); + + return(block); } /********************************************************************//** @@ -1018,21 +1047,14 @@ void buf_page_release( /*=============*/ buf_block_t* block, /*!< in: buffer block */ - ulint rw_latch, /*!< in: RW_S_LATCH, RW_X_LATCH, + ulint rw_latch) /*!< in: RW_S_LATCH, RW_X_LATCH, RW_NO_LATCH */ - mtr_t* mtr) /*!< in: mtr */ { ut_ad(block); ut_a(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE); ut_a(block->page.buf_fix_count > 0); - if (rw_latch == RW_X_LATCH && mtr->modifications) { - buf_pool_mutex_enter(); - buf_flush_note_modification(block, mtr); - buf_pool_mutex_exit(); - } - mutex_enter(&block->mutex); #ifdef UNIV_SYNC_DEBUG diff --git a/storage/innobase/include/buf0flu.h b/storage/innobase/include/buf0flu.h index 6c751852f54..c76fcace46e 100644 --- a/storage/innobase/include/buf0flu.h +++ b/storage/innobase/include/buf0flu.h @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved. +Copyright (c) 1995, 2010, Innobase Oy. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -39,6 +39,16 @@ void buf_flush_remove( /*=============*/ buf_page_t* bpage); /*!< in: pointer to the block in question */ +/*******************************************************************//** +Relocates a buffer control block on the flush_list. +Note that it is assumed that the contents of bpage has already been +copied to dpage. */ +UNIV_INTERN +void +buf_flush_relocate_on_flush_list( +/*=============================*/ + buf_page_t* bpage, /*!< in/out: control block being moved */ + buf_page_t* dpage); /*!< in/out: destination block */ /********************************************************************//** Updates the flush system data structures when a write is completed. */ UNIV_INTERN @@ -175,6 +185,22 @@ buf_flush_validate(void); /*====================*/ #endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */ +/********************************************************************//** +Initialize the red-black tree to speed up insertions into the flush_list +during recovery process. Should be called at the start of recovery +process before any page has been read/written. */ +UNIV_INTERN +void +buf_flush_init_flush_rbt(void); +/*==========================*/ + +/********************************************************************//** +Frees up the red-black tree. */ +UNIV_INTERN +void +buf_flush_free_flush_rbt(void); +/*==========================*/ + /** When buf_flush_free_margin is called, it tries to make this many blocks available to replacement in the free list and at the end of the LRU list (to make sure that a read-ahead batch can be read efficiently in a single diff --git a/storage/innobase/include/buf0flu.ic b/storage/innobase/include/buf0flu.ic index c90cd59e4b6..fb71932e453 100644 --- a/storage/innobase/include/buf0flu.ic +++ b/storage/innobase/include/buf0flu.ic @@ -33,7 +33,8 @@ UNIV_INTERN void buf_flush_insert_into_flush_list( /*=============================*/ - buf_block_t* block); /*!< in/out: block which is modified */ + buf_block_t* block, /*!< in/out: block which is modified */ + ib_uint64_t lsn); /*!< in: oldest modification */ /********************************************************************//** Inserts a modified block into the flush list in the right sorted position. This function is used by recovery, because there the modifications do not @@ -42,7 +43,8 @@ UNIV_INTERN void buf_flush_insert_sorted_into_flush_list( /*====================================*/ - buf_block_t* block); /*!< in/out: block which is modified */ + buf_block_t* block, /*!< in/out: block which is modified */ + ib_uint64_t lsn); /*!< in: oldest modification */ /********************************************************************//** This function should be called at a mini-transaction commit, if a page was @@ -61,24 +63,27 @@ buf_flush_note_modification( #ifdef UNIV_SYNC_DEBUG ut_ad(rw_lock_own(&(block->lock), RW_LOCK_EX)); #endif /* UNIV_SYNC_DEBUG */ - ut_ad(buf_pool_mutex_own()); + + ut_ad(!buf_pool_mutex_own()); + ut_ad(!buf_flush_list_mutex_own()); + ut_ad(buf_flush_order_mutex_own()); ut_ad(mtr->start_lsn != 0); ut_ad(mtr->modifications); + + mutex_enter(&block->mutex); ut_ad(block->page.newest_modification <= mtr->end_lsn); block->page.newest_modification = mtr->end_lsn; if (!block->page.oldest_modification) { - - block->page.oldest_modification = mtr->start_lsn; - ut_ad(block->page.oldest_modification != 0); - - buf_flush_insert_into_flush_list(block); + buf_flush_insert_into_flush_list(block, mtr->start_lsn); } else { ut_ad(block->page.oldest_modification <= mtr->start_lsn); } + mutex_exit(&block->mutex); + ++srv_buf_pool_write_requests; } @@ -101,23 +106,23 @@ buf_flush_recv_note_modification( ut_ad(rw_lock_own(&(block->lock), RW_LOCK_EX)); #endif /* UNIV_SYNC_DEBUG */ - buf_pool_mutex_enter(); + ut_ad(!buf_pool_mutex_own()); + ut_ad(!buf_flush_list_mutex_own()); + ut_ad(buf_flush_order_mutex_own()); + ut_ad(start_lsn != 0); ut_ad(block->page.newest_modification <= end_lsn); + mutex_enter(&block->mutex); block->page.newest_modification = end_lsn; if (!block->page.oldest_modification) { - - block->page.oldest_modification = start_lsn; - - ut_ad(block->page.oldest_modification != 0); - - buf_flush_insert_sorted_into_flush_list(block); + buf_flush_insert_sorted_into_flush_list(block, start_lsn); } else { ut_ad(block->page.oldest_modification <= start_lsn); } - buf_pool_mutex_exit(); + mutex_exit(&block->mutex); + } #endif /* !UNIV_HOTBACKUP */ diff --git a/storage/innobase/include/data0type.ic b/storage/innobase/include/data0type.ic index 240b4288f39..2bf67a941bd 100644 --- a/storage/innobase/include/data0type.ic +++ b/storage/innobase/include/data0type.ic @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved. +Copyright (c) 1996, 2010, Innobase Oy. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -285,6 +285,10 @@ dtype_new_store_for_order_and_null_size( #endif ulint len; + ut_ad(type); + ut_ad(type->mtype >= DATA_VARCHAR); + ut_ad(type->mtype <= DATA_MYSQL); + buf[0] = (byte)(type->mtype & 0xFFUL); if (type->prtype & DATA_BINARY_TYPE) { diff --git a/storage/innobase/include/db0err.h b/storage/innobase/include/db0err.h index 747e9b5364e..d339eb73fb9 100644 --- a/storage/innobase/include/db0err.h +++ b/storage/innobase/include/db0err.h @@ -93,6 +93,13 @@ enum db_err { DB_PRIMARY_KEY_IS_NULL, /* a column in the PRIMARY KEY was found to be NULL */ + DB_STATS_DO_NOT_EXIST, /* an operation that requires the + persistent storage, used for recording + table and index statistics, was + requested but this storage does not + exist itself or the stats for a given + table do not exist */ + /* The following are partial failure codes */ DB_FAIL = 1000, DB_OVERFLOW, diff --git a/storage/innobase/include/dict0boot.h b/storage/innobase/include/dict0boot.h index 51d37ee98d1..e01fafe652d 100644 --- a/storage/innobase/include/dict0boot.h +++ b/storage/innobase/include/dict0boot.h @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved. +Copyright (c) 1996, 2010, Innobase Oy. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -137,6 +137,7 @@ clustered index */ #define DICT_SYS_INDEXES_PAGE_NO_FIELD 8 #define DICT_SYS_INDEXES_SPACE_NO_FIELD 7 #define DICT_SYS_INDEXES_TYPE_FIELD 6 +#define DICT_SYS_INDEXES_NAME_FIELD 3 /* When a row id which is zero modulo this number (which must be a power of two) is assigned, the field DICT_HDR_ROW_ID on the dictionary header page is diff --git a/storage/innobase/include/dict0dict.h b/storage/innobase/include/dict0dict.h index 12396556c2d..788616d682a 100644 --- a/storage/innobase/include/dict0dict.h +++ b/storage/innobase/include/dict0dict.h @@ -928,9 +928,10 @@ UNIV_INTERN void dict_table_check_for_dup_indexes( /*=============================*/ - const dict_table_t* table); /*!< in: Check for dup indexes + const dict_table_t* table, /*!< in: Check for dup indexes in this table */ - + ibool tmp_ok);/*!< in: TRUE=allow temporary + index names */ #endif /* UNIV_DEBUG */ /**********************************************************************//** Builds a node pointer out of a physical record and a page number. diff --git a/storage/innobase/include/dict0mem.h b/storage/innobase/include/dict0mem.h index 2d001111938..e63fe920daa 100644 --- a/storage/innobase/include/dict0mem.h +++ b/storage/innobase/include/dict0mem.h @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved. +Copyright (c) 1996, 2010, Innobase Oy. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -80,21 +80,39 @@ combination of types */ /** File format */ /* @{ */ #define DICT_TF_FORMAT_SHIFT 5 /* file format */ -#define DICT_TF_FORMAT_MASK (127 << DICT_TF_FORMAT_SHIFT) +#define DICT_TF_FORMAT_MASK \ +((~(~0 << (DICT_TF_BITS - DICT_TF_FORMAT_SHIFT))) << DICT_TF_FORMAT_SHIFT) #define DICT_TF_FORMAT_51 0 /*!< InnoDB/MySQL up to 5.1 */ #define DICT_TF_FORMAT_ZIP 1 /*!< InnoDB plugin for 5.1: compressed tables, new BLOB treatment */ /** Maximum supported file format */ #define DICT_TF_FORMAT_MAX DICT_TF_FORMAT_ZIP - +/* @} */ #define DICT_TF_BITS 6 /*!< number of flag bits */ #if (1 << (DICT_TF_BITS - DICT_TF_FORMAT_SHIFT)) <= DICT_TF_FORMAT_MAX # error "DICT_TF_BITS is insufficient for DICT_TF_FORMAT_MAX" #endif /* @} */ + +/** @brief Additional table flags. + +These flags will be stored in SYS_TABLES.MIX_LEN. All unused flags +will be written as 0. The column may contain garbage for tables +created with old versions of InnoDB that only implemented +ROW_FORMAT=REDUNDANT. */ +/* @{ */ +#define DICT_TF2_SHIFT DICT_TF_BITS + /*!< Shift value for + table->flags. */ +#define DICT_TF2_TEMPORARY 1 /*!< TRUE for tables from + CREATE TEMPORARY TABLE. */ +#define DICT_TF2_BITS (DICT_TF2_SHIFT + 1) + /*!< Total number of bits + in table->flags. */ /* @} */ + /**********************************************************************//** Creates a table memory object. @return own: table object */ @@ -374,7 +392,7 @@ struct dict_table_struct{ unsigned space:32; /*!< space where the clustered index of the table is placed */ - unsigned flags:DICT_TF_BITS;/*!< DICT_TF_COMPACT, ... */ + unsigned flags:DICT_TF2_BITS;/*!< DICT_TF_COMPACT, ... */ unsigned ibd_file_missing:1; /*!< TRUE if this is in a single-table tablespace and the .ibd file is missing; then diff --git a/storage/innobase/include/fil0fil.h b/storage/innobase/include/fil0fil.h index 74d0fbcdacd..36660d9845b 100644 --- a/storage/innobase/include/fil0fil.h +++ b/storage/innobase/include/fil0fil.h @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved. +Copyright (c) 1995, 2010, Innobase Oy. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -26,13 +26,13 @@ Created 10/25/1995 Heikki Tuuri #ifndef fil0fil_h #define fil0fil_h -#include "univ.i" -#ifndef UNIV_HOTBACKUP -#include "sync0rw.h" -#endif /* !UNIV_HOTBACKUP */ #include "dict0types.h" #include "ut0byte.h" #include "os0file.h" +#ifndef UNIV_HOTBACKUP +#include "sync0rw.h" +#include "ibuf0types.h" +#endif /* !UNIV_HOTBACKUP */ /** When mysqld is run, the default directory "." is the mysqld datadir, but in the MySQL Embedded Server Library and ibbackup it is not the default @@ -110,9 +110,10 @@ extern fil_addr_t fil_addr_null; contents of this field is valid for all uncompressed pages. */ #define FIL_PAGE_FILE_FLUSH_LSN 26 /*!< this is only defined for the - first page in a data file: the file - has been flushed to disk at least up - to this lsn */ + first page in a system tablespace + data file (ibdata*, not *.ibd): + the file has been flushed to disk + at least up to this lsn */ #define FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID 34 /*!< starting from 4.1.x this contains the space id of the page */ #define FIL_PAGE_DATA 38 /*!< start of the data on the page */ diff --git a/storage/innobase/include/ha_prototypes.h b/storage/innobase/include/ha_prototypes.h index b737a00b3dc..9725ef05ad8 100644 --- a/storage/innobase/include/ha_prototypes.h +++ b/storage/innobase/include/ha_prototypes.h @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 2006, 2009, Innobase Oy. All Rights Reserved. +Copyright (c) 2006, 2010, Innobase Oy. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software diff --git a/storage/innobase/include/handler0alter.h b/storage/innobase/include/handler0alter.h index 7f5af6d2e76..017fe88d533 100644 --- a/storage/innobase/include/handler0alter.h +++ b/storage/innobase/include/handler0alter.h @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 2005, 2009, Innobase Oy. All Rights Reserved. +Copyright (c) 2005, 2010, Innobase Oy. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software diff --git a/storage/innobase/include/hash0hash.h b/storage/innobase/include/hash0hash.h index 977cb829f35..b17c21a45ef 100644 --- a/storage/innobase/include/hash0hash.h +++ b/storage/innobase/include/hash0hash.h @@ -434,11 +434,12 @@ struct hash_table_struct { these heaps */ #endif /* !UNIV_HOTBACKUP */ mem_heap_t* heap; +#ifdef UNIV_DEBUG ulint magic_n; +# define HASH_TABLE_MAGIC_N 76561114 +#endif /* UNIV_DEBUG */ }; -#define HASH_TABLE_MAGIC_N 76561114 - #ifndef UNIV_NONINL #include "hash0hash.ic" #endif diff --git a/storage/innobase/include/hash0hash.ic b/storage/innobase/include/hash0hash.ic index 19da2d50701..0b437894e2e 100644 --- a/storage/innobase/include/hash0hash.ic +++ b/storage/innobase/include/hash0hash.ic @@ -35,6 +35,8 @@ hash_get_nth_cell( hash_table_t* table, /*!< in: hash table */ ulint n) /*!< in: cell index */ { + ut_ad(table); + ut_ad(table->magic_n == HASH_TABLE_MAGIC_N); ut_ad(n < table->n_cells); return(table->array + n); @@ -48,6 +50,8 @@ hash_table_clear( /*=============*/ hash_table_t* table) /*!< in/out: hash table */ { + ut_ad(table); + ut_ad(table->magic_n == HASH_TABLE_MAGIC_N); memset(table->array, 0x0, table->n_cells * sizeof(*table->array)); } @@ -61,6 +65,8 @@ hash_get_n_cells( /*=============*/ hash_table_t* table) /*!< in: table */ { + ut_ad(table); + ut_ad(table->magic_n == HASH_TABLE_MAGIC_N); return(table->n_cells); } @@ -74,6 +80,8 @@ hash_calc_hash( ulint fold, /*!< in: folded value */ hash_table_t* table) /*!< in: hash table */ { + ut_ad(table); + ut_ad(table->magic_n == HASH_TABLE_MAGIC_N); return(ut_hash_ulint(fold, table->n_cells)); } @@ -88,6 +96,8 @@ hash_get_mutex_no( hash_table_t* table, /*!< in: hash table */ ulint fold) /*!< in: fold */ { + ut_ad(table); + ut_ad(table->magic_n == HASH_TABLE_MAGIC_N); ut_ad(ut_is_2pow(table->n_mutexes)); return(ut_2pow_remainder(hash_calc_hash(fold, table), table->n_mutexes)); @@ -103,6 +113,8 @@ hash_get_nth_heap( hash_table_t* table, /*!< in: hash table */ ulint i) /*!< in: index of the heap */ { + ut_ad(table); + ut_ad(table->magic_n == HASH_TABLE_MAGIC_N); ut_ad(i < table->n_mutexes); return(table->heaps[i]); @@ -120,6 +132,9 @@ hash_get_heap( { ulint i; + ut_ad(table); + ut_ad(table->magic_n == HASH_TABLE_MAGIC_N); + if (table->heap) { return(table->heap); } @@ -139,6 +154,8 @@ hash_get_nth_mutex( hash_table_t* table, /*!< in: hash table */ ulint i) /*!< in: index of the mutex */ { + ut_ad(table); + ut_ad(table->magic_n == HASH_TABLE_MAGIC_N); ut_ad(i < table->n_mutexes); return(table->mutexes + i); @@ -156,6 +173,9 @@ hash_get_mutex( { ulint i; + ut_ad(table); + ut_ad(table->magic_n == HASH_TABLE_MAGIC_N); + i = hash_get_mutex_no(table, fold); return(hash_get_nth_mutex(table, i)); diff --git a/storage/innobase/include/ibuf0ibuf.h b/storage/innobase/include/ibuf0ibuf.h index 8aa21fb9d95..0f1631fde77 100644 --- a/storage/innobase/include/ibuf0ibuf.h +++ b/storage/innobase/include/ibuf0ibuf.h @@ -35,12 +35,27 @@ Created 7/19/1997 Heikki Tuuri #ifndef UNIV_HOTBACKUP # include "ibuf0types.h" +/* Possible operations buffered in the insert/whatever buffer. See +ibuf_insert(). DO NOT CHANGE THE VALUES OF THESE, THEY ARE STORED ON DISK. */ +typedef enum { + IBUF_OP_INSERT = 0, + IBUF_OP_DELETE_MARK = 1, + IBUF_OP_DELETE = 2, + + /* Number of different operation types. */ + IBUF_OP_COUNT = 3, +} ibuf_op_t; + /** Combinations of operations that can be buffered. Because the enum values are used for indexing innobase_change_buffering_values[], they should start at 0 and there should not be any gaps. */ typedef enum { IBUF_USE_NONE = 0, IBUF_USE_INSERT, /* insert */ + IBUF_USE_DELETE_MARK, /* delete */ + IBUF_USE_INSERT_DELETE_MARK, /* insert+delete */ + IBUF_USE_DELETE, /* delete+purge */ + IBUF_USE_ALL, /* insert+delete+purge */ IBUF_USE_COUNT /* number of entries in ibuf_use_t */ } ibuf_use_t; @@ -72,8 +87,7 @@ separately committed mini-transaction, because in crash recovery, the free bits could momentarily be set too high. */ /******************************************************************//** -Creates the insert buffer data structure at a database startup and -initializes the data structures for the insert buffer of each tablespace. */ +Creates the insert buffer data structure at a database startup. */ UNIV_INTERN void ibuf_init_at_db_start(void); @@ -243,14 +257,15 @@ void ibuf_free_excess_pages(void); /*========================*/ /*********************************************************************//** -Makes an index insert to the insert buffer, instead of directly to the disk -page, if this is possible. Does not do insert if the index is clustered -or unique. +Buffer an operation in the insert/delete buffer, instead of doing it +directly to the disk page, if this is possible. Does not do it if the index +is clustered or unique. @return TRUE if success */ UNIV_INTERN ibool ibuf_insert( /*========*/ + ibuf_op_t op, /*!< in: operation type */ const dtuple_t* entry, /*!< in: index entry to insert */ dict_index_t* index, /*!< in: index where to insert */ ulint space, /*!< in: space id where to insert */ @@ -259,11 +274,11 @@ ibuf_insert( que_thr_t* thr); /*!< in: query thread */ /*********************************************************************//** When an index page is read from a disk to the buffer pool, this function -inserts to the page the possible index entries buffered in the insert buffer. -The entries are deleted from the insert buffer. If the page is not read, but -created in the buffer pool, this function deletes its buffered entries from -the insert buffer; there can exist entries for such a page if the page -belonged to an index which subsequently was dropped. */ +applies any buffered operations to the page and deletes the entries from the +insert buffer. If the page is not read, but created in the buffer pool, this +function deletes its buffered entries from the insert buffer; there can +exist entries for such a page if the page belonged to an index which +subsequently was dropped. */ UNIV_INTERN void ibuf_merge_or_delete_for_page( @@ -356,6 +371,15 @@ void ibuf_print( /*=======*/ FILE* file); /*!< in: file where to print */ +/******************************************************************** +Read the first two bytes from a record's fourth field (counter field in new +records; something else in older records). +@return "counter" field, or ULINT_UNDEFINED if for some reason it can't be read */ +UNIV_INTERN +ulint +ibuf_rec_get_counter( +/*=================*/ + const rec_t* rec); /*!< in: ibuf record */ /******************************************************************//** Closes insert buffer and frees the data structures. */ UNIV_INTERN diff --git a/storage/innobase/include/ibuf0ibuf.ic b/storage/innobase/include/ibuf0ibuf.ic index 15bbe61ab30..84c7a004be2 100644 --- a/storage/innobase/include/ibuf0ibuf.ic +++ b/storage/innobase/include/ibuf0ibuf.ic @@ -55,10 +55,15 @@ struct ibuf_struct{ ulint height; /*!< tree height */ dict_index_t* index; /*!< insert buffer index */ - ulint n_inserts; /*!< number of inserts made to - the insert buffer */ ulint n_merges; /*!< number of pages merged */ - ulint n_merged_recs; /*!< number of records merged */ + ulint n_merged_ops[IBUF_OP_COUNT]; + /*!< number of operations of each type + merged to index pages */ + ulint n_discarded_ops[IBUF_OP_COUNT]; + /*!< number of operations of each type + discarded without merging due to the + tablespace being deleted or the + index being dropped */ }; /************************************************************************//** diff --git a/storage/innobase/include/lock0lock.h b/storage/innobase/include/lock0lock.h index 82e4c9bd976..ad271a95654 100644 --- a/storage/innobase/include/lock0lock.h +++ b/storage/innobase/include/lock0lock.h @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved. +Copyright (c) 1996, 2010, Innobase Oy. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -613,13 +613,16 @@ lock_rec_print( FILE* file, /*!< in: file where to print */ const lock_t* lock); /*!< in: record type lock */ /*********************************************************************//** -Prints info of locks for all transactions. */ +Prints info of locks for all transactions. +@return FALSE if not able to obtain kernel mutex +and exits without printing info */ UNIV_INTERN -void +ibool lock_print_info_summary( /*====================*/ - FILE* file); /*!< in: file where to print */ -/*********************************************************************//** + FILE* file, /*!< in: file where to print */ + ibool nowait);/*!< in: whether to wait for the kernel mutex */ +/************************************************************************* Prints info of locks for each transaction. */ UNIV_INTERN void diff --git a/storage/innobase/include/log0log.h b/storage/innobase/include/log0log.h index 135aeb69e2d..8fce4ef96bc 100644 --- a/storage/innobase/include/log0log.h +++ b/storage/innobase/include/log0log.h @@ -1,23 +1,6 @@ /***************************************************************************** -Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., 59 Temple -Place, Suite 330, Boston, MA 02111-1307 USA - -*****************************************************************************/ -/***************************************************************************** - -Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved. +Copyright (c) 1995, 2010, Innobase Oy. All Rights Reserved. Copyright (c) 2009, Google Inc. Portions of this file contain modifications contributed and copyrighted by @@ -825,7 +808,17 @@ struct log_struct{ written to some log group; for this to be advanced, it is enough that the write i/o has been completed for all - log groups */ + log groups. + Note that since InnoDB currently + has only one log group therefore + this value is redundant. Also it + is possible that this value + falls behind the + flushed_to_disk_lsn transiently. + It is appropriate to use either + flushed_to_disk_lsn or + write_lsn which are always + up-to-date and accurate. */ ib_uint64_t write_lsn; /*!< end lsn for the current running write */ ulint write_end_offset;/*!< the data in buffer has diff --git a/storage/innobase/include/log0log.ic b/storage/innobase/include/log0log.ic index 36d151a3064..139f4041a36 100644 --- a/storage/innobase/include/log0log.ic +++ b/storage/innobase/include/log0log.ic @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved. +Copyright (c) 1995, 2010, Innobase Oy. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -314,12 +314,15 @@ log_reserve_and_write_fast( ulint data_len; #ifdef UNIV_LOG_LSN_DEBUG /* length of the LSN pseudo-record */ - ulint lsn_len = 1 - + mach_get_compressed_size(log_sys->lsn >> 32) - + mach_get_compressed_size(log_sys->lsn & 0xFFFFFFFFUL); + ulint lsn_len; #endif /* UNIV_LOG_LSN_DEBUG */ mutex_enter(&log_sys->mutex); +#ifdef UNIV_LOG_LSN_DEBUG + lsn_len = 1 + + mach_get_compressed_size(log_sys->lsn >> 32) + + mach_get_compressed_size(log_sys->lsn & 0xFFFFFFFFUL); +#endif /* UNIV_LOG_LSN_DEBUG */ data_len = len #ifdef UNIV_LOG_LSN_DEBUG diff --git a/storage/innobase/include/log0recv.h b/storage/innobase/include/log0recv.h index a3d2bd050f5..3209799e140 100644 --- a/storage/innobase/include/log0recv.h +++ b/storage/innobase/include/log0recv.h @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1997, 2009, Innobase Oy. All Rights Reserved. +Copyright (c) 1997, 2010, Innobase Oy. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -176,6 +176,12 @@ UNIV_INTERN void recv_recovery_from_checkpoint_finish(void); /*======================================*/ +/********************************************************//** +Initiates the rollback of active transactions. */ +UNIV_INTERN +void +recv_recovery_rollback_active(void); +/*===============================*/ /*******************************************************//** Scans log from a buffer and stores new log data to the parsing buffer. Parses and hashes the log records if new data found. Unless @@ -258,12 +264,14 @@ void recv_sys_init( /*==========*/ ulint available_memory); /*!< in: available memory in bytes */ +#ifndef UNIV_HOTBACKUP /********************************************************//** Reset the state of the recovery system variables. */ UNIV_INTERN void recv_sys_var_init(void); /*===================*/ +#endif /* !UNIV_HOTBACKUP */ /*******************************************************************//** Empties the hash table of stored log records, applying them to appropriate pages. */ diff --git a/storage/innobase/include/mem0dbg.h b/storage/innobase/include/mem0dbg.h index a064af5c678..d81e1418b2b 100644 --- a/storage/innobase/include/mem0dbg.h +++ b/storage/innobase/include/mem0dbg.h @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved. +Copyright (c) 1994, 2010, Innobase Oy. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -28,6 +28,13 @@ Created 6/9/1994 Heikki Tuuri check fields whose sizes are given below */ #ifdef UNIV_MEM_DEBUG +# ifndef UNIV_HOTBACKUP +/* The mutex which protects in the debug version the hash table +containing the list of live memory heaps, and also the global +variables in mem0dbg.c. */ +extern mutex_t mem_hash_mutex; +# endif /* !UNIV_HOTBACKUP */ + #define MEM_FIELD_HEADER_SIZE ut_calc_align(2 * sizeof(ulint),\ UNIV_MEM_ALIGNMENT) #define MEM_FIELD_TRAILER_SIZE sizeof(ulint) diff --git a/storage/innobase/include/mem0dbg.ic b/storage/innobase/include/mem0dbg.ic index cb9245411dc..b0c8178a623 100644 --- a/storage/innobase/include/mem0dbg.ic +++ b/storage/innobase/include/mem0dbg.ic @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved. +Copyright (c) 1994, 2010, Innobase Oy. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -25,9 +25,6 @@ Created 6/8/1994 Heikki Tuuri *************************************************************************/ #ifdef UNIV_MEM_DEBUG -# ifndef UNIV_HOTBACKUP -extern mutex_t mem_hash_mutex; -# endif /* !UNIV_HOTBACKUP */ extern ulint mem_current_allocated_memory; /******************************************************************//** diff --git a/storage/innobase/include/mem0mem.h b/storage/innobase/include/mem0mem.h index 98f8748e529..5181bb4c9f7 100644 --- a/storage/innobase/include/mem0mem.h +++ b/storage/innobase/include/mem0mem.h @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved. +Copyright (c) 1994, 2010, Innobase Oy. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -359,6 +359,9 @@ struct mem_block_info_struct { to the heap is also the first block in this list, though it also contains the base node of the list. */ ulint len; /*!< physical length of this block in bytes */ + ulint total_size; /*!< physical length in bytes of all blocks + in the heap. This is defined only in the base + node and is set to ULINT_UNDEFINED in others. */ ulint type; /*!< type of heap: MEM_HEAP_DYNAMIC, or MEM_HEAP_BUF possibly ORed to MEM_HEAP_BTR_SEARCH */ ulint free; /*!< offset in bytes of the first free position for diff --git a/storage/innobase/include/mem0mem.ic b/storage/innobase/include/mem0mem.ic index e7080d8c508..cbce2edc661 100644 --- a/storage/innobase/include/mem0mem.ic +++ b/storage/innobase/include/mem0mem.ic @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved. +Copyright (c) 1994, 2010, Innobase Oy. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -579,18 +579,12 @@ mem_heap_get_size( /*==============*/ mem_heap_t* heap) /*!< in: heap */ { - mem_block_t* block; ulint size = 0; ut_ad(mem_heap_check(heap)); - block = heap; - - while (block != NULL) { + size = heap->total_size; - size += mem_block_get_len(block); - block = UT_LIST_GET_NEXT(list, block); - } #ifndef UNIV_HOTBACKUP if (heap->free_block) { size += UNIV_PAGE_SIZE; diff --git a/storage/innobase/include/mtr0mtr.ic b/storage/innobase/include/mtr0mtr.ic index 310c7c4117f..18f8e87b3cf 100644 --- a/storage/innobase/include/mtr0mtr.ic +++ b/storage/innobase/include/mtr0mtr.ic @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved. +Copyright (c) 1995, 2010, Innobase Oy. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -70,6 +70,7 @@ mtr_memo_push( ut_ad(type <= MTR_MEMO_X_LOCK); ut_ad(mtr); ut_ad(mtr->magic_n == MTR_MAGIC_N); + ut_ad(mtr->state == MTR_ACTIVE); memo = &(mtr->memo); @@ -92,6 +93,7 @@ mtr_set_savepoint( ut_ad(mtr); ut_ad(mtr->magic_n == MTR_MAGIC_N); + ut_ad(mtr->state == MTR_ACTIVE); memo = &(mtr->memo); @@ -149,6 +151,7 @@ mtr_memo_contains( ut_ad(mtr); ut_ad(mtr->magic_n == MTR_MAGIC_N); + ut_ad(mtr->state == MTR_ACTIVE || mtr->state == MTR_COMMITTING); memo = &(mtr->memo); diff --git a/storage/innobase/include/os0file.h b/storage/innobase/include/os0file.h index 16568579f31..a112cb06697 100644 --- a/storage/innobase/include/os0file.h +++ b/storage/innobase/include/os0file.h @@ -1,23 +1,6 @@ -/***************************************************************************** - -Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., 59 Temple -Place, Suite 330, Boston, MA 02111-1307 USA - -*****************************************************************************/ /*********************************************************************** -Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved. +Copyright (c) 1995, 2010, Innobase Oy. All Rights Reserved. Copyright (c) 2009, Percona Inc. Portions of this file contain modifications contributed and copyrighted @@ -93,29 +76,23 @@ extern ulint os_n_pending_writes; #ifdef __WIN__ /** File handle */ -#define os_file_t HANDLE +# define os_file_t HANDLE /** Convert a C file descriptor to a native file handle @param fd file descriptor @return native file handle */ -#define OS_FILE_FROM_FD(fd) (HANDLE) _get_osfhandle(fd) +# define OS_FILE_FROM_FD(fd) (HANDLE) _get_osfhandle(fd) #else /** File handle */ typedef int os_file_t; /** Convert a C file descriptor to a native file handle @param fd file descriptor @return native file handle */ -#define OS_FILE_FROM_FD(fd) fd +# define OS_FILE_FROM_FD(fd) fd #endif /** Umask for creating files */ extern ulint os_innodb_umask; -/** If this flag is TRUE, then we will use the native aio of the -OS (provided we compiled Innobase with it in), otherwise we will -use simulated aio we build below with threads */ - -extern ibool os_aio_use_native_aio; - /** The next value should be smaller or equal to the smallest sector size used on any disk. A log block is required to be a portion of disk which is written so that if the start and the end of a block get written to disk, then the @@ -158,7 +135,8 @@ log. */ #define OS_FILE_SHARING_VIOLATION 76 #define OS_FILE_ERROR_NOT_SPECIFIED 77 #define OS_FILE_INSUFFICIENT_RESOURCE 78 -#define OS_FILE_OPERATION_ABORTED 79 +#define OS_FILE_AIO_INTERRUPTED 79 +#define OS_FILE_OPERATION_ABORTED 80 /* @} */ /** Types for aio operations @{ */ @@ -204,6 +182,157 @@ extern ulint os_n_file_reads; extern ulint os_n_file_writes; extern ulint os_n_fsyncs; +#ifdef UNIV_PFS_IO +/* Keys to register InnoDB I/O with performance schema */ +extern mysql_pfs_key_t innodb_file_data_key; +extern mysql_pfs_key_t innodb_file_log_key; +extern mysql_pfs_key_t innodb_file_temp_key; + +/* Following four macros are instumentations to register +various file I/O operations with performance schema. +1) register_pfs_file_open_begin() and register_pfs_file_open_end() are +used to register file creation, opening, closing and renaming. +2) register_pfs_file_io_begin() and register_pfs_file_io_end() are +used to register actual file read, write and flush */ +# define register_pfs_file_open_begin(locker, key, op, name, \ + src_file, src_line) \ +do { \ + if (PSI_server) { \ + locker = PSI_server->get_thread_file_name_locker( \ + key, op, name, &locker); \ + if (locker) { \ + PSI_server->start_file_open_wait( \ + locker, src_file, src_line); \ + } \ + } \ +} while (0) + +# define register_pfs_file_open_end(locker, file) \ +do { \ + if (locker) { \ + PSI_server->end_file_open_wait_and_bind_to_descriptor( \ + locker, file); \ + } \ +} while (0) + +# define register_pfs_file_io_begin(locker, file, count, op, \ + src_file, src_line) \ +do { \ + if (PSI_server) { \ + locker = PSI_server->get_thread_file_descriptor_locker( \ + file, op); \ + if (locker) { \ + PSI_server->start_file_wait( \ + locker, count, src_file, src_line); \ + } \ + } \ +} while (0) + +# define register_pfs_file_io_end(locker, count) \ +do { \ + if (locker) { \ + PSI_server->end_file_wait(locker, count); \ + } \ +} while (0) +#endif /* UNIV_PFS_IO */ + +/* Following macros/functions are file I/O APIs that would be performance +schema instrumented if "UNIV_PFS_IO" is defined. They would point to +wrapper functions with performance schema instrumentation in such case. + +os_file_create +os_file_create_simple +os_file_create_simple_no_error_handling +os_file_close +os_file_rename +os_aio +os_file_read +os_file_read_no_error_handling +os_file_write + +The wrapper functions have the prefix of "innodb_". */ + +#ifdef UNIV_PFS_IO +# define os_file_create(key, name, create, purpose, type, success) \ + pfs_os_file_create_func(key, name, create, purpose, type, \ + success, __FILE__, __LINE__) + +# define os_file_create_simple(key, name, create, access, success) \ + pfs_os_file_create_simple_func(key, name, create, access, \ + success, __FILE__, __LINE__) + +# define os_file_create_simple_no_error_handling( \ + key, name, create_mode, access, success) \ + pfs_os_file_create_simple_no_error_handling_func( \ + key, name, create_mode, access, success, __FILE__, __LINE__) + +# define os_file_close(file) \ + pfs_os_file_close_func(file, __FILE__, __LINE__) + +# define os_aio(type, mode, name, file, buf, offset, offset_high, \ + n, message1, message2) \ + pfs_os_aio_func(type, mode, name, file, buf, offset, \ + offset_high, n, message1, message2, \ + __FILE__, __LINE__) + +# define os_file_read(file, buf, offset, offset_high, n) \ + pfs_os_file_read_func(file, buf, offset, offset_high, n, \ + __FILE__, __LINE__) + +# define os_file_read_no_error_handling(file, buf, offset, \ + offset_high, n) \ + pfs_os_file_read_no_error_handling_func(file, buf, offset, \ + offset_high, n, \ + __FILE__, __LINE__) + +# define os_file_write(name, file, buf, offset, offset_high, n) \ + pfs_os_file_write_func(name, file, buf, offset, offset_high, \ + n, __FILE__, __LINE__) + +# define os_file_flush(file) \ + pfs_os_file_flush_func(file, __FILE__, __LINE__) + +# define os_file_rename(key, oldpath, newpath) \ + pfs_os_file_rename_func(key, oldpath, newpath, __FILE__, __LINE__) +#else /* UNIV_PFS_IO */ + +/* If UNIV_PFS_IO is not defined, these I/O APIs point +to original un-instrumented file I/O APIs */ +# define os_file_create(key, name, create, purpose, type, success) \ + os_file_create_func(name, create, purpose, type, success) + +# define os_file_create_simple(key, name, create, access, success) \ + os_file_create_simple_func(name, create_mode, access, success) + +# define os_file_create_simple_no_error_handling( \ + key, name, create_mode, access, success) \ + os_file_create_simple_no_error_handling_func( \ + name, create_mode, access, success) + +# define os_file_close(file) os_file_close_func(file) + +# define os_aio(type, mode, name, file, buf, offset, offset_high, \ + n, message1, message2) \ + os_aio_func(type, mode, name, file, buf, offset, offset_high, n,\ + message1, message2) + +# define os_file_read(file, buf, offset, offset_high, n) \ + os_file_read_func(file, buf, offset, offset_high, n) + +# define os_file_read_no_error_handling(file, buf, offset, \ + offset_high, n) \ + os_file_read_no_error_handling_func(file, buf, offset, offset_high, n) + +# define os_file_write(name, file, buf, offset, offset_high, n) \ + os_file_write_func(name, file, buf, offset, offset_high, n) + +# define os_file_flush(file) os_file_flush_func(file) + +# define os_file_rename(key, oldpath, newpath) \ + os_file_rename_func(oldpath, newpath) + +#endif /* UNIV_PFS_IO */ + /* File types for directory entry data type */ enum os_file_type_enum{ @@ -313,13 +442,15 @@ os_file_create_directory( ibool fail_if_exists);/*!< in: if TRUE, pre-existing directory is treated as an error. */ /****************************************************************//** +NOTE! Use the corresponding macro os_file_create_simple(), not directly +this function! A simple function to open or create a file. @return own: handle to the file, not defined if error, error number can be retrieved with os_file_get_last_error */ UNIV_INTERN os_file_t -os_file_create_simple( -/*==================*/ +os_file_create_simple_func( +/*=======================*/ const char* name, /*!< in: name of the file or path as a null-terminated string */ ulint create_mode,/*!< in: OS_FILE_OPEN if an existing file is @@ -333,13 +464,15 @@ os_file_create_simple( OS_FILE_READ_WRITE */ ibool* success);/*!< out: TRUE if succeed, FALSE if error */ /****************************************************************//** +NOTE! Use the corresponding macro +os_file_create_simple_no_error_handling(), not directly this function! A simple function to open or create a file. @return own: handle to the file, not defined if error, error number can be retrieved with os_file_get_last_error */ UNIV_INTERN os_file_t -os_file_create_simple_no_error_handling( -/*====================================*/ +os_file_create_simple_no_error_handling_func( +/*=========================================*/ const char* name, /*!< in: name of the file or path as a null-terminated string */ ulint create_mode,/*!< in: OS_FILE_OPEN if an existing file @@ -363,13 +496,15 @@ os_file_set_nocache( const char* operation_name);/*!< in: "open" or "create"; used in the diagnostic message */ /****************************************************************//** +NOTE! Use the corresponding macro os_file_create(), not directly +this function! Opens an existing file or creates a new. @return own: handle to the file, not defined if error, error number can be retrieved with os_file_get_last_error */ UNIV_INTERN os_file_t -os_file_create( -/*===========*/ +os_file_create_func( +/*================*/ const char* name, /*!< in: name of the file or path as a null-terminated string */ ulint create_mode,/*!< in: OS_FILE_OPEN if an existing file @@ -407,25 +542,258 @@ os_file_delete_if_exists( /*=====================*/ const char* name); /*!< in: file path as a null-terminated string */ /***********************************************************************//** +NOTE! Use the corresponding macro os_file_rename(), not directly +this function! Renames a file (can also move it to another directory). It is safest that the file is closed before calling this function. @return TRUE if success */ UNIV_INTERN ibool -os_file_rename( -/*===========*/ +os_file_rename_func( +/*================*/ const char* oldpath, /*!< in: old file path as a null-terminated string */ const char* newpath); /*!< in: new file path */ /***********************************************************************//** +NOTE! Use the corresponding macro os_file_close(), not directly this +function! Closes a file handle. In case of error, error number can be retrieved with os_file_get_last_error. @return TRUE if success */ UNIV_INTERN ibool -os_file_close( -/*==========*/ +os_file_close_func( +/*===============*/ os_file_t file); /*!< in, own: handle to a file */ + +#ifdef UNIV_PFS_IO +/****************************************************************//** +NOTE! Please use the corresponding macro os_file_create_simple(), +not directly this function! +A performance schema instrumented wrapper function for +os_file_create_simple() which opens or creates a file. +@return own: handle to the file, not defined if error, error number +can be retrieved with os_file_get_last_error */ +UNIV_INLINE +os_file_t +pfs_os_file_create_simple_func( +/*===========================*/ + mysql_pfs_key_t key, /*!< in: Performance Schema Key */ + const char* name, /*!< in: name of the file or path as a + null-terminated string */ + ulint create_mode,/*!< in: OS_FILE_OPEN if an existing file is + opened (if does not exist, error), or + OS_FILE_CREATE if a new file is created + (if exists, error), or + OS_FILE_CREATE_PATH if new file + (if exists, error) and subdirectories along + its path are created (if needed)*/ + ulint access_type,/*!< in: OS_FILE_READ_ONLY or + OS_FILE_READ_WRITE */ + ibool* success,/*!< out: TRUE if succeed, FALSE if error */ + const char* src_file,/*!< in: file name where func invoked */ + ulint src_line);/*!< in: line where the func invoked */ + +/****************************************************************//** +NOTE! Please use the corresponding macro +os_file_create_simple_no_error_handling(), not directly this function! +A performance schema instrumented wrapper function for +os_file_create_simple_no_error_handling(). Add instrumentation to +monitor file creation/open. +@return own: handle to the file, not defined if error, error number +can be retrieved with os_file_get_last_error */ +UNIV_INLINE +os_file_t +pfs_os_file_create_simple_no_error_handling_func( +/*=============================================*/ + mysql_pfs_key_t key, /*!< in: Performance Schema Key */ + const char* name, /*!< in: name of the file or path as a + null-terminated string */ + ulint create_mode,/*!< in: OS_FILE_OPEN if an existing file + is opened (if does not exist, error), or + OS_FILE_CREATE if a new file is created + (if exists, error) */ + ulint access_type,/*!< in: OS_FILE_READ_ONLY, + OS_FILE_READ_WRITE, or + OS_FILE_READ_ALLOW_DELETE; the last option is + used by a backup program reading the file */ + ibool* success,/*!< out: TRUE if succeed, FALSE if error */ + const char* src_file,/*!< in: file name where func invoked */ + ulint src_line);/*!< in: line where the func invoked */ + +/****************************************************************//** +NOTE! Please use the corresponding macro os_file_create(), not directly +this function! +A performance schema wrapper function for os_file_create(). +Add instrumentation to monitor file creation/open. +@return own: handle to the file, not defined if error, error number +can be retrieved with os_file_get_last_error */ +UNIV_INLINE +os_file_t +pfs_os_file_create_func( +/*====================*/ + mysql_pfs_key_t key, /*!< in: Performance Schema Key */ + const char* name, /*!< in: name of the file or path as a + null-terminated string */ + ulint create_mode,/*!< in: OS_FILE_OPEN if an existing file + is opened (if does not exist, error), or + OS_FILE_CREATE if a new file is created + (if exists, error), + OS_FILE_OVERWRITE if a new file is created + or an old overwritten; + OS_FILE_OPEN_RAW, if a raw device or disk + partition should be opened */ + ulint purpose,/*!< in: OS_FILE_AIO, if asynchronous, + non-buffered i/o is desired, + OS_FILE_NORMAL, if any normal file; + NOTE that it also depends on type, os_aio_.. + and srv_.. variables whether we really use + async i/o or unbuffered i/o: look in the + function source code for the exact rules */ + ulint type, /*!< in: OS_DATA_FILE or OS_LOG_FILE */ + ibool* success,/*!< out: TRUE if succeed, FALSE if error */ + const char* src_file,/*!< in: file name where func invoked */ + ulint src_line);/*!< in: line where the func invoked */ + +/***********************************************************************//** +NOTE! Please use the corresponding macro os_file_close(), not directly +this function! +A performance schema instrumented wrapper function for os_file_close(). +@return TRUE if success */ +UNIV_INLINE +ibool +pfs_os_file_close_func( +/*===================*/ + os_file_t file, /*!< in, own: handle to a file */ + const char* src_file,/*!< in: file name where func invoked */ + ulint src_line);/*!< in: line where the func invoked */ +/*******************************************************************//** +NOTE! Please use the corresponding macro os_file_read(), not directly +this function! +This is the performance schema instrumented wrapper function for +os_file_read() which requests a synchronous read operation. +@return TRUE if request was successful, FALSE if fail */ +UNIV_INLINE +ibool +pfs_os_file_read_func( +/*==================*/ + os_file_t file, /*!< in: handle to a file */ + void* buf, /*!< in: buffer where to read */ + ulint offset, /*!< in: least significant 32 bits of file + offset where to read */ + ulint offset_high,/*!< in: most significant 32 bits of + offset */ + ulint n, /*!< in: number of bytes to read */ + const char* src_file,/*!< in: file name where func invoked */ + ulint src_line);/*!< in: line where the func invoked */ + +/*******************************************************************//** +NOTE! Please use the corresponding macro os_file_read_no_error_handling(), +not directly this function! +This is the performance schema instrumented wrapper function for +os_file_read_no_error_handling_func() which requests a synchronous +read operation. +@return TRUE if request was successful, FALSE if fail */ +UNIV_INLINE +ibool +pfs_os_file_read_no_error_handling_func( +/*====================================*/ + os_file_t file, /*!< in: handle to a file */ + void* buf, /*!< in: buffer where to read */ + ulint offset, /*!< in: least significant 32 bits of file + offset where to read */ + ulint offset_high,/*!< in: most significant 32 bits of + offset */ + ulint n, /*!< in: number of bytes to read */ + const char* src_file,/*!< in: file name where func invoked */ + ulint src_line);/*!< in: line where the func invoked */ + +/*******************************************************************//** +NOTE! Please use the corresponding macro os_aio(), not directly this +function! +Performance schema wrapper function of os_aio() which requests +an asynchronous i/o operation. +@return TRUE if request was queued successfully, FALSE if fail */ +UNIV_INLINE +ibool +pfs_os_aio_func( +/*============*/ + ulint type, /*!< in: OS_FILE_READ or OS_FILE_WRITE */ + ulint mode, /*!< in: OS_AIO_NORMAL etc. I/O mode */ + const char* name, /*!< in: name of the file or path as a + null-terminated string */ + os_file_t file, /*!< in: handle to a file */ + void* buf, /*!< in: buffer where to read or from which + to write */ + ulint offset, /*!< in: least significant 32 bits of file + offset where to read or write */ + ulint offset_high,/*!< in: most significant 32 bits of + offset */ + ulint n, /*!< in: number of bytes to read or write */ + fil_node_t* message1,/*!< in: message for the aio handler + (can be used to identify a completed + aio operation); ignored if mode is + OS_AIO_SYNC */ + void* message2,/*!< in: message for the aio handler + (can be used to identify a completed + aio operation); ignored if mode is + OS_AIO_SYNC */ + const char* src_file,/*!< in: file name where func invoked */ + ulint src_line);/*!< in: line where the func invoked */ +/*******************************************************************//** +NOTE! Please use the corresponding macro os_file_write(), not directly +this function! +This is the performance schema instrumented wrapper function for +os_file_write() which requests a synchronous write operation. +@return TRUE if request was successful, FALSE if fail */ +UNIV_INLINE +ibool +pfs_os_file_write_func( +/*===================*/ + const char* name, /*!< in: name of the file or path as a + null-terminated string */ + os_file_t file, /*!< in: handle to a file */ + const void* buf, /*!< in: buffer from which to write */ + ulint offset, /*!< in: least significant 32 bits of file + offset where to write */ + ulint offset_high,/*!< in: most significant 32 bits of + offset */ + ulint n, /*!< in: number of bytes to write */ + const char* src_file,/*!< in: file name where func invoked */ + ulint src_line);/*!< in: line where the func invoked */ +/***********************************************************************//** +NOTE! Please use the corresponding macro os_file_flush(), not directly +this function! +This is the performance schema instrumented wrapper function for +os_file_flush() which flushes the write buffers of a given file to the disk. +Flushes the write buffers of a given file to the disk. +@return TRUE if success */ +UNIV_INLINE +ibool +pfs_os_file_flush_func( +/*===================*/ + os_file_t file, /*!< in, own: handle to a file */ + const char* src_file,/*!< in: file name where func invoked */ + ulint src_line);/*!< in: line where the func invoked */ + +/***********************************************************************//** +NOTE! Please use the corresponding macro os_file_rename(), not directly +this function! +This is the performance schema instrumented wrapper function for +os_file_rename() +@return TRUE if success */ +UNIV_INLINE +ibool +pfs_os_file_rename_func( +/*====================*/ + mysql_pfs_key_t key, /*!< in: Performance Schema Key */ + const char* oldpath,/*!< in: old file path as a null-terminated + string */ + const char* newpath,/*!< in: new file path */ + const char* src_file,/*!< in: file name where func invoked */ + ulint src_line);/*!< in: line where the func invoked */ +#endif /* UNIV_PFS_IO */ + #ifdef UNIV_HOTBACKUP /***********************************************************************//** Closes a file handle. @@ -477,12 +845,13 @@ os_file_set_eof( /*============*/ FILE* file); /*!< in: file to be truncated */ /***********************************************************************//** +NOTE! Use the corresponding macro os_file_flush(), not directly this function! Flushes the write buffers of a given file to the disk. @return TRUE if success */ UNIV_INTERN ibool -os_file_flush( -/*==========*/ +os_file_flush_func( +/*===============*/ os_file_t file); /*!< in, own: handle to a file */ /***********************************************************************//** Retrieves the last error number if an error occurs in a file io function. @@ -497,12 +866,13 @@ os_file_get_last_error( ibool report_all_errors); /*!< in: TRUE if we want an error message printed of all errors */ /*******************************************************************//** +NOTE! Use the corresponding macro os_file_read(), not directly this function! Requests a synchronous read operation. @return TRUE if request was successful, FALSE if fail */ UNIV_INTERN ibool -os_file_read( -/*=========*/ +os_file_read_func( +/*==============*/ os_file_t file, /*!< in: handle to a file */ void* buf, /*!< in: buffer where to read */ ulint offset, /*!< in: least significant 32 bits of file @@ -522,13 +892,15 @@ os_file_read_string( char* str, /*!< in: buffer where to read */ ulint size); /*!< in: size of buffer */ /*******************************************************************//** +NOTE! Use the corresponding macro os_file_read_no_error_handling(), +not directly this function! Requests a synchronous positioned read operation. This function does not do any error handling. In case of error it returns FALSE. @return TRUE if request was successful, FALSE if fail */ UNIV_INTERN ibool -os_file_read_no_error_handling( -/*===========================*/ +os_file_read_no_error_handling_func( +/*================================*/ os_file_t file, /*!< in: handle to a file */ void* buf, /*!< in: buffer where to read */ ulint offset, /*!< in: least significant 32 bits of file @@ -538,12 +910,14 @@ os_file_read_no_error_handling( ulint n); /*!< in: number of bytes to read */ /*******************************************************************//** +NOTE! Use the corresponding macro os_file_write(), not directly this +function! Requests a synchronous write operation. @return TRUE if request was successful, FALSE if fail */ UNIV_INTERN ibool -os_file_write( -/*==========*/ +os_file_write_func( +/*===============*/ const char* name, /*!< in: name of the file or path as a null-terminated string */ os_file_t file, /*!< in: handle to a file */ @@ -612,7 +986,7 @@ respectively. The caller must create an i/o handler thread for each segment in these arrays. This function also creates the sync array. No i/o handler thread needs to be created for that */ UNIV_INTERN -void +ibool os_aio_init( /*========*/ ulint n_per_seg, /*<! in: maximum number of pending aio @@ -629,12 +1003,13 @@ os_aio_free(void); /*=============*/ /*******************************************************************//** +NOTE! Use the corresponding macro os_aio(), not directly this function! Requests an asynchronous i/o operation. @return TRUE if request was queued successfully, FALSE if fail */ UNIV_INTERN ibool -os_aio( -/*===*/ +os_aio_func( +/*========*/ ulint type, /*!< in: OS_FILE_READ or OS_FILE_WRITE */ ulint mode, /*!< in: OS_AIO_NORMAL, ..., possibly ORed to OS_AIO_SIMULATED_WAKE_LATER: the @@ -802,4 +1177,36 @@ innobase_mysql_tmpfile(void); /*========================*/ #endif /* !UNIV_HOTBACKUP && !__NETWARE__ */ + +#if defined(LINUX_NATIVE_AIO) +/************************************************************************** +This function is only used in Linux native asynchronous i/o. +Waits for an aio operation to complete. This function is used to wait the +for completed requests. The aio array of pending requests is divided +into segments. The thread specifies which segment or slot it wants to wait +for. NOTE: this function will also take care of freeing the aio slot, +therefore no other thread is allowed to do the freeing! +@return TRUE if the IO was successful */ +UNIV_INTERN +ibool +os_aio_linux_handle( +/*================*/ + ulint global_seg, /*!< in: segment number in the aio array + to wait for; segment 0 is the ibuf + i/o thread, segment 1 is log i/o thread, + then follow the non-ibuf read threads, + and the last are the non-ibuf write + threads. */ + fil_node_t**message1, /*!< out: the messages passed with the */ + void** message2, /*!< aio request; note that in case the + aio operation failed, these output + parameters are valid and can be used to + restart the operation. */ + ulint* type); /*!< out: OS_FILE_WRITE or ..._READ */ +#endif /* LINUX_NATIVE_AIO */ + +#ifndef UNIV_NONINL +#include "os0file.ic" +#endif + #endif diff --git a/storage/innobase/include/os0file.ic b/storage/innobase/include/os0file.ic new file mode 100644 index 00000000000..32f0e7cf666 --- /dev/null +++ b/storage/innobase/include/os0file.ic @@ -0,0 +1,408 @@ +/***************************************************************************** + +Copyright (c) 2010, Oracle and/or its affiliates. All Rights Reserved. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., 59 Temple +Place, Suite 330, Boston, MA 02111-1307 USA + +*****************************************************************************/ + +/**************************************************//** +@file include/os0file.ic +The interface to the operating system file io + +Created 2/20/2010 Jimmy Yang +*******************************************************/ + +#include "univ.i" + +#ifdef UNIV_PFS_IO +/****************************************************************//** +NOTE! Please use the corresponding macro os_file_create_simple(), +not directly this function! +A performance schema instrumented wrapper function for +os_file_create_simple() which opens or creates a file. +@return own: handle to the file, not defined if error, error number +can be retrieved with os_file_get_last_error */ +UNIV_INLINE +os_file_t +pfs_os_file_create_simple_func( +/*===========================*/ + mysql_pfs_key_t key, /*!< in: Performance Schema Key */ + const char* name, /*!< in: name of the file or path as a + null-terminated string */ + ulint create_mode,/*!< in: OS_FILE_OPEN if an existing file is + opened (if does not exist, error), or + OS_FILE_CREATE if a new file is created + (if exists, error), or + OS_FILE_CREATE_PATH if new file + (if exists, error) and subdirectories along + its path are created (if needed)*/ + ulint access_type,/*!< in: OS_FILE_READ_ONLY or + OS_FILE_READ_WRITE */ + ibool* success,/*!< out: TRUE if succeed, FALSE if error */ + const char* src_file,/*!< in: file name where func invoked */ + ulint src_line)/*!< in: line where the func invoked */ +{ + os_file_t file; + struct PSI_file_locker* locker = NULL; + + /* register a file open or creation depending on "create_mode" */ + register_pfs_file_open_begin(locker, key, + ((create_mode == OS_FILE_CREATE) + ? PSI_FILE_CREATE + : PSI_FILE_OPEN), + name, src_file, src_line); + + file = os_file_create_simple_func(name, create_mode, + access_type, success); + + /* Regsiter the returning "file" value with the system */ + register_pfs_file_open_end(locker, file); + + return(file); +} + +/****************************************************************//** +NOTE! Please use the corresponding macro +os_file_create_simple_no_error_handling(), not directly this function! +A performance schema instrumented wrapper function for +os_file_create_simple_no_error_handling(). Add instrumentation to +monitor file creation/open. +@return own: handle to the file, not defined if error, error number +can be retrieved with os_file_get_last_error */ +UNIV_INLINE +os_file_t +pfs_os_file_create_simple_no_error_handling_func( +/*=============================================*/ + mysql_pfs_key_t key, /*!< in: Performance Schema Key */ + const char* name, /*!< in: name of the file or path as a + null-terminated string */ + ulint create_mode,/*!< in: OS_FILE_OPEN if an existing file + is opened (if does not exist, error), or + OS_FILE_CREATE if a new file is created + (if exists, error) */ + ulint access_type,/*!< in: OS_FILE_READ_ONLY, + OS_FILE_READ_WRITE, or + OS_FILE_READ_ALLOW_DELETE; the last option is + used by a backup program reading the file */ + ibool* success,/*!< out: TRUE if succeed, FALSE if error */ + const char* src_file,/*!< in: file name where func invoked */ + ulint src_line)/*!< in: line where the func invoked */ +{ + os_file_t file; + struct PSI_file_locker* locker = NULL; + + /* register a file open or creation depending on "create_mode" */ + register_pfs_file_open_begin(locker, key, + ((create_mode == OS_FILE_CREATE) + ? PSI_FILE_CREATE + : PSI_FILE_OPEN), + name, src_file, src_line); + + file = os_file_create_simple_no_error_handling_func( + name, create_mode, access_type, success); + + register_pfs_file_open_end(locker, file); + + return(file); +} + +/****************************************************************//** +NOTE! Please use the corresponding macro os_file_create(), not directly +this function! +A performance schema wrapper function for os_file_create(). +Add instrumentation to monitor file creation/open. +@return own: handle to the file, not defined if error, error number +can be retrieved with os_file_get_last_error */ +UNIV_INLINE +os_file_t +pfs_os_file_create_func( +/*====================*/ + mysql_pfs_key_t key, /*!< in: Performance Schema Key */ + const char* name, /*!< in: name of the file or path as a + null-terminated string */ + ulint create_mode,/*!< in: OS_FILE_OPEN if an existing file + is opened (if does not exist, error), or + OS_FILE_CREATE if a new file is created + (if exists, error), + OS_FILE_OVERWRITE if a new file is created + or an old overwritten; + OS_FILE_OPEN_RAW, if a raw device or disk + partition should be opened */ + ulint purpose,/*!< in: OS_FILE_AIO, if asynchronous, + non-buffered i/o is desired, + OS_FILE_NORMAL, if any normal file; + NOTE that it also depends on type, os_aio_.. + and srv_.. variables whether we really use + async i/o or unbuffered i/o: look in the + function source code for the exact rules */ + ulint type, /*!< in: OS_DATA_FILE or OS_LOG_FILE */ + ibool* success,/*!< out: TRUE if succeed, FALSE if error */ + const char* src_file,/*!< in: file name where func invoked */ + ulint src_line)/*!< in: line where the func invoked */ +{ + os_file_t file; + struct PSI_file_locker* locker = NULL; + + /* register a file open or creation depending on "create_mode" */ + register_pfs_file_open_begin(locker, key, + ((create_mode == OS_FILE_CREATE) + ? PSI_FILE_CREATE + : PSI_FILE_OPEN), + name, src_file, src_line); + + file = os_file_create_func(name, create_mode, purpose, type, success); + + register_pfs_file_open_end(locker, file); + + return(file); +} + +/***********************************************************************//** +NOTE! Please use the corresponding macro os_file_close(), not directly +this function! +A performance schema instrumented wrapper function for os_file_close(). +@return TRUE if success */ +UNIV_INLINE +ibool +pfs_os_file_close_func( +/*===================*/ + os_file_t file, /*!< in, own: handle to a file */ + const char* src_file,/*!< in: file name where func invoked */ + ulint src_line)/*!< in: line where the func invoked */ +{ + ibool result; + struct PSI_file_locker* locker = NULL; + + /* register the file close */ + register_pfs_file_io_begin(locker, file, 0, PSI_FILE_CLOSE, + src_file, src_line); + + result = os_file_close_func(file); + + register_pfs_file_io_end(locker, 0); + + return(result); +} + +/*******************************************************************//** +NOTE! Please use the corresponding macro os_aio(), not directly this +function! +Performance schema instrumented wrapper function of os_aio() which +requests an asynchronous i/o operation. +@return TRUE if request was queued successfully, FALSE if fail */ +UNIV_INLINE +ibool +pfs_os_aio_func( +/*============*/ + ulint type, /*!< in: OS_FILE_READ or OS_FILE_WRITE */ + ulint mode, /*!< in: OS_AIO_NORMAL etc. I/O mode */ + const char* name, /*!< in: name of the file or path as a + null-terminated string */ + os_file_t file, /*!< in: handle to a file */ + void* buf, /*!< in: buffer where to read or from which + to write */ + ulint offset, /*!< in: least significant 32 bits of file + offset where to read or write */ + ulint offset_high,/*!< in: most significant 32 bits of + offset */ + ulint n, /*!< in: number of bytes to read or write */ + fil_node_t* message1,/*!< in: message for the aio handler + (can be used to identify a completed + aio operation); ignored if mode is + OS_AIO_SYNC */ + void* message2,/*!< in: message for the aio handler + (can be used to identify a completed + aio operation); ignored if mode is + OS_AIO_SYNC */ + const char* src_file,/*!< in: file name where func invoked */ + ulint src_line)/*!< in: line where the func invoked */ +{ + ibool result; + struct PSI_file_locker* locker = NULL; + + /* Register the read or write I/O depending on "type" */ + register_pfs_file_io_begin(locker, file, n, + (type == OS_FILE_WRITE) + ? PSI_FILE_WRITE + : PSI_FILE_READ, + src_file, src_line); + + result = os_aio_func(type, mode, name, file, buf, offset, offset_high, + n, message1, message2); + + register_pfs_file_io_end(locker, n); + + return(result); +} + +/*******************************************************************//** +NOTE! Please use the corresponding macro os_file_read(), not directly +this function! +This is the performance schema instrumented wrapper function for +os_file_read() which requests a synchronous read operation. +@return TRUE if request was successful, FALSE if fail */ +UNIV_INLINE +ibool +pfs_os_file_read_func( +/*==================*/ + os_file_t file, /*!< in: handle to a file */ + void* buf, /*!< in: buffer where to read */ + ulint offset, /*!< in: least significant 32 bits of file + offset where to read */ + ulint offset_high,/*!< in: most significant 32 bits of + offset */ + ulint n, /*!< in: number of bytes to read */ + const char* src_file,/*!< in: file name where func invoked */ + ulint src_line)/*!< in: line where the func invoked */ +{ + ibool result; + struct PSI_file_locker* locker = NULL; + + register_pfs_file_io_begin(locker, file, n, PSI_FILE_READ, + src_file, src_line); + + result = os_file_read_func(file, buf, offset, offset_high, n); + + register_pfs_file_io_end(locker, n); + + return(result); +} + +/*******************************************************************//** +NOTE! Please use the corresponding macro +os_file_read_no_error_handling(), not directly this function! +This is the performance schema instrumented wrapper function for +os_file_read_no_error_handling() which requests a synchronous +positioned read operation. This function does not do any error +handling. In case of error it returns FALSE. +@return TRUE if request was successful, FALSE if fail */ +UNIV_INLINE +ibool +pfs_os_file_read_no_error_handling_func( +/*====================================*/ + os_file_t file, /*!< in: handle to a file */ + void* buf, /*!< in: buffer where to read */ + ulint offset, /*!< in: least significant 32 bits of file + offset where to read */ + ulint offset_high,/*!< in: most significant 32 bits of + offset */ + ulint n, /*!< in: number of bytes to read */ + const char* src_file,/*!< in: file name where func invoked */ + ulint src_line)/*!< in: line where the func invoked */ +{ + ibool result; + struct PSI_file_locker* locker = NULL; + + register_pfs_file_io_begin(locker, file, n, PSI_FILE_READ, + src_file, src_line); + + result = os_file_read_no_error_handling_func(file, buf, offset, + offset_high, n); + + register_pfs_file_io_end(locker, n); + + return(result); +} + +/*******************************************************************//** +NOTE! Please use the corresponding macro os_file_write(), not directly +this function! +This is the performance schema instrumented wrapper function for +os_file_write() which requests a synchronous write operation. +@return TRUE if request was successful, FALSE if fail */ +UNIV_INLINE +ibool +pfs_os_file_write_func( +/*===================*/ + const char* name, /*!< in: name of the file or path as a + null-terminated string */ + os_file_t file, /*!< in: handle to a file */ + const void* buf, /*!< in: buffer from which to write */ + ulint offset, /*!< in: least significant 32 bits of file + offset where to write */ + ulint offset_high,/*!< in: most significant 32 bits of + offset */ + ulint n, /*!< in: number of bytes to write */ + const char* src_file,/*!< in: file name where func invoked */ + ulint src_line)/*!< in: line where the func invoked */ +{ + ibool result; + struct PSI_file_locker* locker = NULL; + + register_pfs_file_io_begin(locker, file, n, PSI_FILE_WRITE, + src_file, src_line); + + result = os_file_write_func(name, file, buf, offset, offset_high, n); + + register_pfs_file_io_end(locker, n); + + return(result); +} + +/***********************************************************************//** +NOTE! Please use the corresponding macro os_file_flush(), not directly +this function! +This is the performance schema instrumented wrapper function for +os_file_flush() which flushes the write buffers of a given file to the disk. +@return TRUE if success */ +UNIV_INLINE +ibool +pfs_os_file_flush_func( +/*===================*/ + os_file_t file, /*!< in, own: handle to a file */ + const char* src_file,/*!< in: file name where func invoked */ + ulint src_line)/*!< in: line where the func invoked */ +{ + ibool result; + struct PSI_file_locker* locker = NULL; + + register_pfs_file_io_begin(locker, file, 0, PSI_FILE_SYNC, + src_file, src_line); + result = os_file_flush_func(file); + + register_pfs_file_io_end(locker, 0); + + return(result); +} + +/***********************************************************************//** +NOTE! Please use the corresponding macro os_file_rename(), not directly +this function! +This is the performance schema instrumented wrapper function for +os_file_rename() +@return TRUE if success */ +UNIV_INLINE +ibool +pfs_os_file_rename_func( +/*====================*/ + mysql_pfs_key_t key, /*!< in: Performance Schema Key */ + const char* oldpath,/*!< in: old file path as a null-terminated + string */ + const char* newpath,/*!< in: new file path */ + const char* src_file,/*!< in: file name where func invoked */ + ulint src_line)/*!< in: line where the func invoked */ +{ + ibool result; + struct PSI_file_locker* locker = NULL; + + register_pfs_file_open_begin(locker, key, PSI_FILE_RENAME, newpath, + src_file, src_line); + + result = os_file_rename_func(oldpath, newpath); + + register_pfs_file_open_end(locker, 0); + + return(result); +} +#endif /* UNIV_PFS_IO */ diff --git a/storage/innobase/include/os0thread.h b/storage/innobase/include/os0thread.h index 6583de0005f..cc56e2158ee 100644 --- a/storage/innobase/include/os0thread.h +++ b/storage/innobase/include/os0thread.h @@ -56,6 +56,11 @@ typedef os_thread_t os_thread_id_t; /*!< In Unix we use the thread /* Define a function pointer type to use in a typecast */ typedef void* (*os_posix_f_t) (void*); +#ifdef HAVE_PSI_INTERFACE +/* Define for performance schema registration key */ +typedef unsigned int mysql_pfs_key_t; +#endif + /***************************************************************//** Compares two thread ids for equality. @return TRUE if equal */ @@ -86,7 +91,7 @@ os_thread_t os_thread_create( /*=============*/ #ifndef __WIN__ - os_posix_f_t start_f, + os_posix_f_t start_f, #else ulint (*start_f)(void*), /*!< in: pointer to function from which to start */ diff --git a/storage/innobase/include/pars0pars.h b/storage/innobase/include/pars0pars.h index fe5d76ebbb0..524fe4ac3e7 100644 --- a/storage/innobase/include/pars0pars.h +++ b/storage/innobase/include/pars0pars.h @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved. +Copyright (c) 1996, 2010, Innobase Oy. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -520,6 +520,23 @@ pars_info_add_int4_literal( Equivalent to: char buf[8]; +mach_write_ull(buf, val); +pars_info_add_literal(info, name, buf, 8, DATA_INT, 0); + +except that the buffer is dynamically allocated from the info struct's +heap. */ +UNIV_INTERN +void +pars_info_add_uint64_literal( +/*=========================*/ + pars_info_t* info, /*!< in: info struct */ + const char* name, /*!< in: name */ + ib_uint64_t val); /*!< in: value */ + +/****************************************************************//** +Equivalent to: + +char buf[8]; mach_write_to_8(buf, val); pars_info_add_literal(info, name, buf, 8, DATA_BINARY, 0); diff --git a/storage/innobase/include/que0que.h b/storage/innobase/include/que0que.h index 420f34550e2..39f8d07af89 100644 --- a/storage/innobase/include/que0que.h +++ b/storage/innobase/include/que0que.h @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved. +Copyright (c) 1996, 2010, Innobase Oy. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -30,6 +30,7 @@ Created 5/27/1996 Heikki Tuuri #include "data0data.h" #include "dict0types.h" #include "trx0trx.h" +#include "trx0roll.h" #include "srv0srv.h" #include "usr0types.h" #include "que0types.h" @@ -215,6 +216,16 @@ trx_t* thr_get_trx( /*========*/ que_thr_t* thr); /*!< in: query thread */ +/*******************************************************************//** +Determines if this thread is rolling back an incomplete transaction +in crash recovery. +@return TRUE if thr is rolling back an incomplete transaction in crash +recovery */ +UNIV_INLINE +ibool +thr_is_recv( +/*========*/ + const que_thr_t* thr); /*!< in: query thread */ /***********************************************************************//** Gets the type of a graph node. */ UNIV_INLINE diff --git a/storage/innobase/include/que0que.ic b/storage/innobase/include/que0que.ic index a1c0dc1e77a..bd936670e1e 100644 --- a/storage/innobase/include/que0que.ic +++ b/storage/innobase/include/que0que.ic @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved. +Copyright (c) 1996, 2010, Innobase Oy. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -38,6 +38,20 @@ thr_get_trx( return(thr->graph->trx); } +/*******************************************************************//** +Determines if this thread is rolling back an incomplete transaction +in crash recovery. +@return TRUE if thr is rolling back an incomplete transaction in crash +recovery */ +UNIV_INLINE +ibool +thr_is_recv( +/*========*/ + const que_thr_t* thr) /*!< in: query thread */ +{ + return(trx_is_recv(thr->graph->trx)); +} + /***********************************************************************//** Gets the first thr in a fork. */ UNIV_INLINE diff --git a/storage/innobase/include/row0merge.h b/storage/innobase/include/row0merge.h index fbeb125ce7b..be7c77e7724 100644 --- a/storage/innobase/include/row0merge.h +++ b/storage/innobase/include/row0merge.h @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 2005, 2009, Innobase Oy. All Rights Reserved. +Copyright (c) 2005, 2010, Innobase Oy. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software diff --git a/storage/innobase/include/row0mysql.h b/storage/innobase/include/row0mysql.h index b05241f00f8..d2a8734c61f 100644 --- a/storage/innobase/include/row0mysql.h +++ b/storage/innobase/include/row0mysql.h @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 2000, 2009, Innobase Oy. All Rights Reserved. +Copyright (c) 2000, 2010, Innobase Oy. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -451,6 +451,12 @@ row_drop_table_for_mysql( const char* name, /*!< in: table name */ trx_t* trx, /*!< in: transaction handle */ ibool drop_db);/*!< in: TRUE=dropping whole database */ +/*********************************************************************//** +Drop all temporary tables during crash recovery. */ +UNIV_INTERN +void +row_mysql_drop_temp_tables(void); +/*============================*/ /*********************************************************************//** Discards the tablespace of a table which stored in an .ibd file. Discarding @@ -494,14 +500,19 @@ row_rename_table_for_mysql( trx_t* trx, /*!< in: transaction handle */ ibool commit); /*!< in: if TRUE then commit trx */ /*********************************************************************//** -Checks a table for corruption. -@return DB_ERROR or DB_SUCCESS */ +Checks that the index contains entries in an ascending order, unique +constraint is not broken, and calculates the number of index entries +in the read view of the current transaction. +@return DB_SUCCESS if ok */ UNIV_INTERN ulint -row_check_table_for_mysql( +row_check_index_for_mysql( /*======================*/ - row_prebuilt_t* prebuilt); /*!< in: prebuilt struct in MySQL - handle */ + row_prebuilt_t* prebuilt, /*!< in: prebuilt struct + in MySQL handle */ + const dict_index_t* index, /*!< in: index */ + ulint* n_rows); /*!< out: number of entries + seen in the consistent read */ /*********************************************************************//** Determines if a table is a magic monitor table. diff --git a/storage/innobase/include/row0purge.h b/storage/innobase/include/row0purge.h index 89ec54fb54a..485d51dbc83 100644 --- a/storage/innobase/include/row0purge.h +++ b/storage/innobase/include/row0purge.h @@ -45,6 +45,28 @@ row_purge_node_create( que_thr_t* parent, /*!< in: parent node, i.e., a thr node */ mem_heap_t* heap); /*!< in: memory heap where created */ /***********************************************************//** +Determines if it is possible to remove a secondary index entry. +Removal is possible if the secondary index entry does not refer to any +not delete marked version of a clustered index record where DB_TRX_ID +is newer than the purge view. + +NOTE: This function should only be called by the purge thread, only +while holding a latch on the leaf page of the secondary index entry +(or keeping the buffer pool watch on the page). It is possible that +this function first returns TRUE and then FALSE, if a user transaction +inserts a record that the secondary index entry would refer to. +However, in that case, the user transaction would also re-insert the +secondary index entry after purge has removed it and released the leaf +page latch. +@return TRUE if the secondary index record can be purged */ +UNIV_INTERN +ibool +row_purge_poss_sec( +/*===============*/ + purge_node_t* node, /*!< in/out: row purge node */ + dict_index_t* index, /*!< in: secondary index */ + const dtuple_t* entry); /*!< in: secondary index entry */ +/*************************************************************** Does the purge operation for a single undo log record. This is a high-level function used in an SQL execution graph. @return query thread to run next or NULL */ diff --git a/storage/innobase/include/row0row.h b/storage/innobase/include/row0row.h index 723b7b53395..195691a420b 100644 --- a/storage/innobase/include/row0row.h +++ b/storage/innobase/include/row0row.h @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved. +Copyright (c) 1996, 2010, Innobase Oy. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -257,11 +257,25 @@ row_get_clust_rec( dict_index_t* index, /*!< in: secondary index */ dict_index_t** clust_index,/*!< out: clustered index */ mtr_t* mtr); /*!< in: mtr */ + +/** Result of row_search_index_entry */ +enum row_search_result { + ROW_FOUND = 0, /*!< the record was found */ + ROW_NOT_FOUND, /*!< record not found */ + ROW_BUFFERED, /*!< one of BTR_INSERT, BTR_DELETE, or + BTR_DELETE_MARK was specified, the + secondary index leaf page was not in + the buffer pool, and the operation was + enqueued in the insert/delete buffer */ + ROW_NOT_DELETED_REF, /*!< BTR_DELETE was specified, and + row_purge_poss_sec() failed */ +}; + /***************************************************************//** Searches an index record. -@return TRUE if found */ +@return whether the record was found or buffered */ UNIV_INTERN -ibool +enum row_search_result row_search_index_entry( /*===================*/ dict_index_t* index, /*!< in: index */ diff --git a/storage/innobase/include/row0sel.h b/storage/innobase/include/row0sel.h index 01a5afaa23e..8544b9d08ba 100644 --- a/storage/innobase/include/row0sel.h +++ b/storage/innobase/include/row0sel.h @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1997, 2009, Innobase Oy. All Rights Reserved. +Copyright (c) 1997, 2010, Innobase Oy. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -105,17 +105,6 @@ row_fetch_print( /*============*/ void* row, /*!< in: sel_node_t* */ void* user_arg); /*!< in: not used */ -/****************************************************************//** -Callback function for fetch that stores an unsigned 4 byte integer to the -location pointed. The column's type must be DATA_INT, DATA_UNSIGNED, length -= 4. -@return always returns NULL */ -UNIV_INTERN -void* -row_fetch_store_uint4( -/*==================*/ - void* row, /*!< in: sel_node_t* */ - void* user_arg); /*!< in: data pointer */ /***********************************************************//** Prints a row in a select result. @return query thread to run next or NULL */ diff --git a/storage/innobase/include/row0types.h b/storage/innobase/include/row0types.h index 1be729206ba..7d6a7c8e2b1 100644 --- a/storage/innobase/include/row0types.h +++ b/storage/innobase/include/row0types.h @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved. +Copyright (c) 1996, 2010, Innobase Oy. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software diff --git a/storage/innobase/include/srv0srv.h b/storage/innobase/include/srv0srv.h index ce7f6004813..74c604124f5 100644 --- a/storage/innobase/include/srv0srv.h +++ b/storage/innobase/include/srv0srv.h @@ -1,7 +1,8 @@ /***************************************************************************** -Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved. +Copyright (c) 1995, 2010, Innobase Oy. All Rights Reserved. Copyright (c) 2008, 2009, Google Inc. +Copyright (c) 2009, Percona Inc. Portions of this file contain modifications contributed and copyrighted by Google, Inc. Those modifications are gratefully acknowledged and are described @@ -9,6 +10,13 @@ briefly in the InnoDB documentation. The contributions by Google are incorporated with their permission, and subject to the conditions contained in the file COPYING.Google. +Portions of this file contain modifications contributed and copyrighted +by Percona Inc.. Those modifications are +gratefully acknowledged and are described briefly in the InnoDB +documentation. The contributions by Percona Inc. are incorporated with +their permission, and subject to the conditions contained in the file +COPYING.Percona. + This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; version 2 of the License. @@ -22,32 +30,6 @@ this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA *****************************************************************************/ -/*********************************************************************** - -Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved. -Copyright (c) 2009, Percona Inc. - -Portions of this file contain modifications contributed and copyrighted -by Percona Inc.. Those modifications are -gratefully acknowledged and are described briefly in the InnoDB -documentation. The contributions by Percona Inc. are incorporated with -their permission, and subject to the conditions contained in the file -COPYING.Percona. - -This program is free software; you can redistribute it and/or modify it -under the terms of the GNU General Public License as published by the -Free Software Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but -WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General -Public License for more details. - -You should have received a copy of the GNU General Public License along -with this program; if not, write to the Free Software Foundation, Inc., -59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - -***********************************************************************/ /**************************************************//** @file include/srv0srv.h @@ -125,6 +107,11 @@ on duplicate key checking and foreign key checking */ extern ibool srv_locks_unsafe_for_binlog; #endif /* !UNIV_HOTBACKUP */ +/* If this flag is TRUE, then we will use the native aio of the +OS (provided we compiled Innobase with it in), otherwise we will +use simulated aio we build below with threads. +Currently we support native aio on windows and linux */ +extern my_bool srv_use_native_aio; extern ulint srv_n_data_files; extern char** srv_data_file_names; extern ulint* srv_data_file_sizes; @@ -224,7 +211,8 @@ extern ibool srv_print_innodb_tablespace_monitor; extern ibool srv_print_verbose_log; extern ibool srv_print_innodb_table_monitor; -extern ibool srv_lock_timeout_and_monitor_active; +extern ibool srv_lock_timeout_active; +extern ibool srv_monitor_active; extern ibool srv_error_monitor_active; extern ulong srv_n_spin_wait_rounds; @@ -283,6 +271,12 @@ extern ulint srv_os_log_pending_writes; log buffer and have to flush it */ extern ulint srv_log_waits; +/* the number of purge threads to use from the worker pool (currently 0 or 1) */ +extern ulint srv_n_purge_threads; + +/* the number of records to purge in one batch */ +extern ulint srv_purge_batch_size; + /* variable that counts amount of data read in total (in bytes) */ extern ulint srv_data_read; @@ -324,6 +318,37 @@ typedef struct srv_sys_struct srv_sys_t; /** The server system */ extern srv_sys_t* srv_sys; + +# ifdef UNIV_PFS_THREAD +/* Keys to register InnoDB threads with performance schema */ +extern mysql_pfs_key_t trx_rollback_clean_thread_key; +extern mysql_pfs_key_t io_handler_thread_key; +extern mysql_pfs_key_t srv_lock_timeout_thread_key; +extern mysql_pfs_key_t srv_error_monitor_thread_key; +extern mysql_pfs_key_t srv_monitor_thread_key; +extern mysql_pfs_key_t srv_master_thread_key; + +/* This macro register the current thread and its key with performance +schema */ +# define pfs_register_thread(key) \ +do { \ + if (PSI_server) { \ + struct PSI_thread* psi = PSI_server->new_thread(key, NULL, 0);\ + if (psi) { \ + PSI_server->set_thread(psi); \ + } \ + } \ +} while (0) + +/* This macro delist the current thread from performance schema */ +# define pfs_delete_thread() \ +do { \ + if (PSI_server) { \ + PSI_server->delete_current_thread(); \ + } \ +} while (0) +# endif /* UNIV_PFS_THREAD */ + #endif /* !UNIV_HOTBACKUP */ /** Types of raw partitions in innodb_data_file_path */ @@ -464,6 +489,12 @@ srv_master_thread( void* arg); /*!< in: a dummy parameter required by os_thread_create */ /*******************************************************************//** +Wakes up the purge thread if it's not already awake. */ +UNIV_INTERN +void +srv_wake_purge_thread(void); +/*=======================*/ +/*******************************************************************//** Tells the Innobase server that there has been activity in the database and wakes up the master thread if it is suspended (not sleeping). Used in the MySQL interface. Note that there is a small chance that the master @@ -479,6 +510,16 @@ UNIV_INTERN void srv_wake_master_thread(void); /*========================*/ +/*******************************************************************//** +Tells the purge thread that there has been activity in the database +and wakes up the purge thread if it is suspended (not sleeping). Note +that there is a small chance that the purge thread stays suspended +(we do not protect our operation with the kernel mutex, for +performace reasons). */ +UNIV_INTERN +void +srv_wake_purge_thread_if_not_active(void); +/*=====================================*/ /*********************************************************************//** Puts an OS thread to wait if there are too many concurrent threads (>= srv_thread_concurrency) inside InnoDB. The threads wait in a FIFO queue. */ @@ -537,15 +578,23 @@ srv_release_mysql_thread_if_suspended( MySQL OS thread */ /*********************************************************************//** A thread which wakes up threads whose lock wait may have lasted too long. -This also prints the info output by various InnoDB monitors. @return a dummy parameter */ UNIV_INTERN os_thread_ret_t -srv_lock_timeout_and_monitor_thread( -/*================================*/ +srv_lock_timeout_thread( +/*====================*/ void* arg); /*!< in: a dummy parameter required by os_thread_create */ /*********************************************************************//** +A thread which prints the info output by various InnoDB monitors. +@return a dummy parameter */ +UNIV_INTERN +os_thread_ret_t +srv_monitor_thread( +/*===============*/ + void* arg); /*!< in: a dummy parameter required by + os_thread_create */ +/************************************************************************* A thread which prints warnings about semaphore waits which have lasted too long. These can be used to track bugs which cause hangs. @return a dummy parameter */ @@ -556,12 +605,15 @@ srv_error_monitor_thread( void* arg); /*!< in: a dummy parameter required by os_thread_create */ /******************************************************************//** -Outputs to a file the output of the InnoDB Monitor. */ +Outputs to a file the output of the InnoDB Monitor. +@return FALSE if not all information printed +due to failure to obtain necessary mutex */ UNIV_INTERN -void +ibool srv_printf_innodb_monitor( /*======================*/ FILE* file, /*!< in: output stream */ + ibool nowait, /*!< in: whether to wait for kernel mutex */ ulint* trx_start, /*!< out: file position of the start of the list of active transactions */ ulint* trx_end); /*!< out: file position of the end of @@ -574,6 +626,15 @@ void srv_export_innodb_status(void); /*==========================*/ +/*********************************************************************//** +Asynchronous purge thread. +@return a dummy parameter */ +UNIV_INTERN +os_thread_ret_t +srv_purge_thread( +/*=============*/ + void* arg __attribute__((unused))); /*!< in: a dummy parameter + required by os_thread_create */ /** Thread slot in the thread table */ typedef struct srv_slot_struct srv_slot_t; @@ -643,8 +704,9 @@ struct srv_sys_struct{ extern ulint srv_n_threads_active[]; #else /* !UNIV_HOTBACKUP */ -# define srv_use_checksums TRUE # define srv_use_adaptive_hash_indexes FALSE +# define srv_use_checksums TRUE +# define srv_use_native_aio FALSE # define srv_force_recovery 0UL # define srv_set_io_thread_op_info(t,info) ((void) 0) # define srv_is_being_started 0 diff --git a/storage/innobase/include/sync0rw.h b/storage/innobase/include/sync0rw.h index aedfd5f3f86..6233ceef748 100644 --- a/storage/innobase/include/sync0rw.h +++ b/storage/innobase/include/sync0rw.h @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved. +Copyright (c) 1995, 2010, Innobase Oy. All Rights Reserved. Copyright (c) 2008, Google Inc. Portions of this file contain modifications contributed and copyrighted by @@ -105,23 +105,140 @@ extern ib_int64_t rw_x_os_wait_count; set only when UNIV_SYNC_PERF_STAT is defined */ extern ib_int64_t rw_x_exit_count; +#ifdef UNIV_PFS_RWLOCK +/* Following are rwlock keys used to register with MySQL +performance schema */ +# ifdef UNIV_LOG_ARCHIVE +extern mysql_pfs_key_t archive_lock_key; +# endif /* UNIV_LOG_ARCHIVE */ +extern mysql_pfs_key_t btr_search_latch_key; +extern mysql_pfs_key_t buf_block_lock_key; +# ifdef UNIV_SYNC_DEBUG +extern mysql_pfs_key_t buf_block_debug_latch_key; +# endif /* UNIV_SYNC_DEBUG */ +extern mysql_pfs_key_t dict_operation_lock_key; +extern mysql_pfs_key_t fil_space_latch_key; +extern mysql_pfs_key_t checkpoint_lock_key; +extern mysql_pfs_key_t trx_i_s_cache_lock_key; +extern mysql_pfs_key_t trx_purge_latch_key; +extern mysql_pfs_key_t index_tree_rw_lock_key; +#endif /* UNIV_PFS_RWLOCK */ + + +#ifndef UNIV_PFS_RWLOCK /******************************************************************//** Creates, or rather, initializes an rw-lock object in a specified memory location (which must be appropriately aligned). The rw-lock is initialized to the non-locked state. Explicit freeing of the rw-lock with rw_lock_free -is necessary only if the memory block containing it is freed. */ -#ifdef UNIV_DEBUG -# ifdef UNIV_SYNC_DEBUG -# define rw_lock_create(L, level) \ +is necessary only if the memory block containing it is freed. +if MySQL performance schema is enabled and "UNIV_PFS_RWLOCK" is +defined, the rwlock are instrumented with performance schema probes. */ +# ifdef UNIV_DEBUG +# ifdef UNIV_SYNC_DEBUG +# define rw_lock_create(K, L, level) \ rw_lock_create_func((L), (level), #L, __FILE__, __LINE__) -# else /* UNIV_SYNC_DEBUG */ -# define rw_lock_create(L, level) \ +# else /* UNIV_SYNC_DEBUG */ +# define rw_lock_create(K, L, level) \ rw_lock_create_func((L), #L, __FILE__, __LINE__) -# endif /* UNIV_SYNC_DEBUG */ -#else /* UNIV_DEBUG */ -# define rw_lock_create(L, level) \ +# endif/* UNIV_SYNC_DEBUG */ +# else /* UNIV_DEBUG */ +# define rw_lock_create(K, L, level) \ rw_lock_create_func((L), __FILE__, __LINE__) -#endif /* UNIV_DEBUG */ +# endif /* UNIV_DEBUG */ + +/**************************************************************//** +NOTE! The following macros should be used in rw locking and +unlocking, not the corresponding function. */ + +# define rw_lock_s_lock(M) \ + rw_lock_s_lock_func((M), 0, __FILE__, __LINE__) + +# define rw_lock_s_lock_gen(M, P) \ + rw_lock_s_lock_func((M), (P), __FILE__, __LINE__) + +# define rw_lock_s_lock_nowait(M, F, L) \ + rw_lock_s_lock_low((M), 0, (F), (L)) + +# ifdef UNIV_SYNC_DEBUG +# define rw_lock_s_unlock_gen(L, P) rw_lock_s_unlock_func(P, L) +# else +# define rw_lock_s_unlock_gen(L, P) rw_lock_s_unlock_func(L) +# endif + + +# define rw_lock_x_lock(M) \ + rw_lock_x_lock_func((M), 0, __FILE__, __LINE__) + +# define rw_lock_x_lock_gen(M, P) \ + rw_lock_x_lock_func((M), (P), __FILE__, __LINE__) + +# define rw_lock_x_lock_nowait(M) \ + rw_lock_x_lock_func_nowait((M), __FILE__, __LINE__) + +# ifdef UNIV_SYNC_DEBUG +# define rw_lock_x_unlock_gen(L, P) rw_lock_x_unlock_func(P, L) +# else +# define rw_lock_x_unlock_gen(L, P) rw_lock_x_unlock_func(L) +# endif + +# define rw_lock_free(M) rw_lock_free_func(M) + +#else /* !UNIV_PFS_RWLOCK */ + +/* Following macros point to Performance Schema instrumented functions. */ +# ifdef UNIV_DEBUG +# ifdef UNIV_SYNC_DEBUG +# define rw_lock_create(K, L, level) \ + pfs_rw_lock_create_func((K), (L), (level), #L, __FILE__, __LINE__) +# else /* UNIV_SYNC_DEBUG */ +# define rw_lock_create(K, L, level) \ + pfs_rw_lock_create_func((K), (L), #L, __FILE__, __LINE__) +# endif/* UNIV_SYNC_DEBUG */ +# else /* UNIV_DEBUG */ +# define rw_lock_create(K, L, level) \ + pfs_rw_lock_create_func((K), (L), __FILE__, __LINE__) +# endif /* UNIV_DEBUG */ + +/****************************************************************** +NOTE! The following macros should be used in rw locking and +unlocking, not the corresponding function. */ + +# define rw_lock_s_lock(M) \ + pfs_rw_lock_s_lock_func((M), 0, __FILE__, __LINE__) + +# define rw_lock_s_lock_gen(M, P) \ + pfs_rw_lock_s_lock_func((M), (P), __FILE__, __LINE__) + +# define rw_lock_s_lock_nowait(M, F, L) \ + pfs_rw_lock_s_lock_low((M), 0, (F), (L)) + +# ifdef UNIV_SYNC_DEBUG +# define rw_lock_s_unlock_gen(L, P) pfs_rw_lock_s_unlock_func(P, L) +# else +# define rw_lock_s_unlock_gen(L, P) pfs_rw_lock_s_unlock_func(L) +# endif + +# define rw_lock_x_lock(M) \ + pfs_rw_lock_x_lock_func((M), 0, __FILE__, __LINE__) + +# define rw_lock_x_lock_gen(M, P) \ + pfs_rw_lock_x_lock_func((M), (P), __FILE__, __LINE__) + +# define rw_lock_x_lock_nowait(M) \ + pfs_rw_lock_x_lock_func_nowait((M), __FILE__, __LINE__) + +# ifdef UNIV_SYNC_DEBUG +# define rw_lock_x_unlock_gen(L, P) pfs_rw_lock_x_unlock_func(P, L) +# else +# define rw_lock_x_unlock_gen(L, P) pfs_rw_lock_x_unlock_func(L) +# endif + +# define rw_lock_free(M) pfs_rw_lock_free_func(M) + +#endif /* UNIV_PFS_RWLOCK */ + +#define rw_lock_s_unlock(L) rw_lock_s_unlock_gen(L, 0) +#define rw_lock_x_unlock(L) rw_lock_x_unlock_gen(L, 0) /******************************************************************//** Creates, or rather, initializes an rw-lock object in a specified memory @@ -137,18 +254,18 @@ rw_lock_create_func( # ifdef UNIV_SYNC_DEBUG ulint level, /*!< in: level */ # endif /* UNIV_SYNC_DEBUG */ - const char* cmutex_name, /*!< in: mutex name */ + const char* cmutex_name, /*!< in: mutex name */ #endif /* UNIV_DEBUG */ const char* cfile_name, /*!< in: file name where created */ - ulint cline); /*!< in: file line where created */ + ulint cline); /*!< in: file line where created */ /******************************************************************//** Calling this function is obligatory only if the memory buffer containing the rw-lock is freed. Removes an rw-lock object from the global list. The rw-lock is checked to be in the non-locked state. */ UNIV_INTERN void -rw_lock_free( -/*=========*/ +rw_lock_free_func( +/*==============*/ rw_lock_t* lock); /*!< in: rw-lock */ #ifdef UNIV_DEBUG /******************************************************************//** @@ -161,24 +278,6 @@ rw_lock_validate( /*=============*/ rw_lock_t* lock); /*!< in: rw-lock */ #endif /* UNIV_DEBUG */ -/**************************************************************//** -NOTE! The following macros should be used in rw s-locking, not the -corresponding function. */ - -#define rw_lock_s_lock(M) rw_lock_s_lock_func(\ - (M), 0, __FILE__, __LINE__) -/**************************************************************//** -NOTE! The following macros should be used in rw s-locking, not the -corresponding function. */ - -#define rw_lock_s_lock_gen(M, P) rw_lock_s_lock_func(\ - (M), (P), __FILE__, __LINE__) -/**************************************************************//** -NOTE! The following macros should be used in rw s-locking, not the -corresponding function. */ - -#define rw_lock_s_lock_nowait(M, F, L) rw_lock_s_lock_low(\ - (M), 0, (F), (L)) /******************************************************************//** Low-level function which tries to lock an rw-lock in s-mode. Performs no spinning. @@ -233,33 +332,6 @@ rw_lock_s_unlock_func( #endif rw_lock_t* lock); /*!< in/out: rw-lock */ -#ifdef UNIV_SYNC_DEBUG -# define rw_lock_s_unlock_gen(L, P) rw_lock_s_unlock_func(P, L) -#else -# define rw_lock_s_unlock_gen(L, P) rw_lock_s_unlock_func(L) -#endif -/*******************************************************************//** -Releases a shared mode lock. */ -#define rw_lock_s_unlock(L) rw_lock_s_unlock_gen(L, 0) - -/**************************************************************//** -NOTE! The following macro should be used in rw x-locking, not the -corresponding function. */ - -#define rw_lock_x_lock(M) rw_lock_x_lock_func(\ - (M), 0, __FILE__, __LINE__) -/**************************************************************//** -NOTE! The following macro should be used in rw x-locking, not the -corresponding function. */ - -#define rw_lock_x_lock_gen(M, P) rw_lock_x_lock_func(\ - (M), (P), __FILE__, __LINE__) -/**************************************************************//** -NOTE! The following macros should be used in rw x-locking, not the -corresponding function. */ - -#define rw_lock_x_lock_nowait(M) rw_lock_x_lock_func_nowait(\ - (M), __FILE__, __LINE__) /******************************************************************//** NOTE! Use the corresponding macro, not directly this function! Lock an rw-lock in exclusive mode for the current thread. If the rw-lock is locked @@ -290,14 +362,6 @@ rw_lock_x_unlock_func( #endif rw_lock_t* lock); /*!< in/out: rw-lock */ -#ifdef UNIV_SYNC_DEBUG -# define rw_lock_x_unlock_gen(L, P) rw_lock_x_unlock_func(P, L) -#else -# define rw_lock_x_unlock_gen(L, P) rw_lock_x_unlock_func(L) -#endif -/*******************************************************************//** -Releases an exclusive mode lock. */ -#define rw_lock_x_unlock(L) rw_lock_x_unlock_gen(L, 0) /******************************************************************//** Low-level function which locks an rw-lock in s-mode when we know that it @@ -429,8 +493,9 @@ ibool rw_lock_own( /*========*/ rw_lock_t* lock, /*!< in: rw-lock */ - ulint lock_type); /*!< in: lock type: RW_LOCK_SHARED, + ulint lock_type) /*!< in: lock type: RW_LOCK_SHARED, RW_LOCK_EX */ + __attribute__((warn_unused_result)); #endif /* UNIV_SYNC_DEBUG */ /******************************************************************//** Checks if somebody has locked the rw-lock in the specified mode. */ @@ -539,6 +604,9 @@ struct rw_lock_struct { info list of the lock */ ulint level; /*!< Level in the global latching order. */ #endif /* UNIV_SYNC_DEBUG */ +#ifdef UNIV_PFS_RWLOCK + struct PSI_rwlock *pfs_psi;/*!< The instrumentation hook */ +#endif ulint count_os_wait; /*!< Count of os_waits. May not be accurate */ const char* cfile_name;/*!< File name where lock created */ /* last s-lock file/line is not guaranteed to be correct */ @@ -577,6 +645,160 @@ struct rw_lock_debug_struct { }; #endif /* UNIV_SYNC_DEBUG */ +/* For performance schema instrumentation, a new set of rwlock +wrap functions are created if "UNIV_PFS_RWLOCK" is defined. +The instrumentations are not planted directly into original +functions, so that we keep the underlying function as they +are. And in case, user wants to "take out" some rwlock from +instrumentation even if performance schema (UNIV_PFS_RWLOCK) +is defined, they can do so by reinstating APIs directly link to +original underlying functions. +The instrumented function names have prefix of "pfs_rw_lock_" vs. +original name prefix of "rw_lock_". Following are list of functions +that have been instrumented: + +rw_lock_create() +rw_lock_x_lock() +rw_lock_x_lock_gen() +rw_lock_x_lock_nowait() +rw_lock_x_unlock_gen() +rw_lock_s_lock() +rw_lock_s_lock_gen() +rw_lock_s_lock_nowait() +rw_lock_s_unlock_gen() +rw_lock_free() + +Two function APIs rw_lock_x_unlock_direct() and rw_lock_s_unlock_direct() +do not have any caller/user, they are not instrumented. +*/ + +#ifdef UNIV_PFS_RWLOCK +/******************************************************************//** +Performance schema instrumented wrap function for rw_lock_create_func() +NOTE! Please use the corresponding macro rw_lock_create(), not +directly this function! */ +UNIV_INLINE +void +pfs_rw_lock_create_func( +/*====================*/ + PSI_rwlock_key key, /*!< in: key registered with + performance schema */ + rw_lock_t* lock, /*!< in: rw lock */ +#ifdef UNIV_DEBUG +# ifdef UNIV_SYNC_DEBUG + ulint level, /*!< in: level */ +# endif /* UNIV_SYNC_DEBUG */ + const char* cmutex_name, /*!< in: mutex name */ +#endif /* UNIV_DEBUG */ + const char* cfile_name, /*!< in: file name where created */ + ulint cline); /*!< in: file line where created */ + +/******************************************************************//** +Performance schema instrumented wrap function for rw_lock_x_lock_func() +NOTE! Please use the corresponding macro rw_lock_x_lock(), not +directly this function! */ +UNIV_INLINE +void +pfs_rw_lock_x_lock_func( +/*====================*/ + rw_lock_t* lock, /*!< in: pointer to rw-lock */ + ulint pass, /*!< in: pass value; != 0, if the lock will + be passed to another thread to unlock */ + const char* file_name,/*!< in: file name where lock requested */ + ulint line); /*!< in: line where requested */ +/******************************************************************//** +Performance schema instrumented wrap function for +rw_lock_x_lock_func_nowait() +NOTE! Please use the corresponding macro, not directly this function! +@return TRUE if success */ +UNIV_INLINE +ibool +pfs_rw_lock_x_lock_func_nowait( +/*===========================*/ + rw_lock_t* lock, /*!< in: pointer to rw-lock */ + const char* file_name,/*!< in: file name where lock requested */ + ulint line); /*!< in: line where requested */ +/******************************************************************//** +Performance schema instrumented wrap function for rw_lock_s_lock_func() +NOTE! Please use the corresponding macro rw_lock_s_lock(), not directly +this function! */ +UNIV_INLINE +void +pfs_rw_lock_s_lock_func( +/*====================*/ + rw_lock_t* lock, /*!< in: pointer to rw-lock */ + ulint pass, /*!< in: pass value; != 0, if the lock will + be passed to another thread to unlock */ + const char* file_name,/*!< in: file name where lock requested */ + ulint line); /*!< in: line where requested */ +/******************************************************************//** +Performance schema instrumented wrap function for rw_lock_s_lock_func() +NOTE! Please use the corresponding macro rw_lock_s_lock(), not directly +this function! +@return TRUE if success */ +UNIV_INLINE +ibool +pfs_rw_lock_s_lock_low( +/*===================*/ + rw_lock_t* lock, /*!< in: pointer to rw-lock */ + ulint pass, /*!< in: pass value; != 0, if the + lock will be passed to another + thread to unlock */ + const char* file_name, /*!< in: file name where lock requested */ + ulint line); /*!< in: line where requested */ +/******************************************************************//** +Performance schema instrumented wrap function for rw_lock_x_lock_func() +NOTE! Please use the corresponding macro rw_lock_x_lock(), not directly +this function! */ +UNIV_INLINE +void +pfs_rw_lock_x_lock_func( +/*====================*/ + rw_lock_t* lock, /*!< in: pointer to rw-lock */ + ulint pass, /*!< in: pass value; != 0, if the lock will + be passed to another thread to unlock */ + const char* file_name,/*!< in: file name where lock requested */ + ulint line); /*!< in: line where requested */ +/******************************************************************//** +Performance schema instrumented wrap function for rw_lock_s_unlock_func() +NOTE! Please use the corresponding macro rw_lock_s_unlock(), not directly +this function! */ +UNIV_INLINE +void +pfs_rw_lock_s_unlock_func( +/*======================*/ +#ifdef UNIV_SYNC_DEBUG + ulint pass, /*!< in: pass value; != 0, if the + lock may have been passed to another + thread to unlock */ +#endif + rw_lock_t* lock); /*!< in/out: rw-lock */ +/******************************************************************//** +Performance schema instrumented wrap function for rw_lock_s_unlock_func() +NOTE! Please use the corresponding macro rw_lock_x_unlock(), not directly +this function! */ +UNIV_INLINE +void +pfs_rw_lock_x_unlock_func( +/*======================*/ +#ifdef UNIV_SYNC_DEBUG + ulint pass, /*!< in: pass value; != 0, if the + lock may have been passed to another + thread to unlock */ +#endif + rw_lock_t* lock); /*!< in/out: rw-lock */ +/******************************************************************//** +Performance schema instrumented wrap function for rw_lock_free_func() +NOTE! Please use the corresponding macro rw_lock_free(), not directly +this function! */ +UNIV_INLINE +void +pfs_rw_lock_free_func( +/*==================*/ + rw_lock_t* lock); /*!< in: rw-lock */ +#endif /* UNIV_PFS_RWLOCK */ + + #ifndef UNIV_NONINL #include "sync0rw.ic" #endif diff --git a/storage/innobase/include/sync0rw.ic b/storage/innobase/include/sync0rw.ic index 7116f1b7c9b..73405759bce 100644 --- a/storage/innobase/include/sync0rw.ic +++ b/storage/innobase/include/sync0rw.ic @@ -622,3 +622,265 @@ rw_lock_x_unlock_direct( rw_x_exit_count++; #endif } + +#ifdef UNIV_PFS_RWLOCK + +/******************************************************************//** +Performance schema instrumented wrap function for rw_lock_create_func(). +NOTE! Please use the corresponding macro rw_lock_create(), not directly +this function! */ +UNIV_INLINE +void +pfs_rw_lock_create_func( +/*====================*/ + mysql_pfs_key_t key, /*!< in: key registered with + performance schema */ + rw_lock_t* lock, /*!< in: pointer to memory */ +# ifdef UNIV_DEBUG +# ifdef UNIV_SYNC_DEBUG + ulint level, /*!< in: level */ +# endif /* UNIV_SYNC_DEBUG */ + const char* cmutex_name, /*!< in: mutex name */ +# endif /* UNIV_DEBUG */ + const char* cfile_name, /*!< in: file name where created */ + ulint cline) /*!< in: file line where created */ +{ + /* Initialize the rwlock for performance schema */ + lock->pfs_psi = (PSI_server && PFS_IS_INSTRUMENTED(key)) + ? PSI_server->init_rwlock(key, lock) + : NULL; + + /* The actual function to initialize an rwlock */ + rw_lock_create_func(lock, +# ifdef UNIV_DEBUG +# ifdef UNIV_SYNC_DEBUG + level, +# endif /* UNIV_SYNC_DEBUG */ + cmutex_name, +# endif /* UNIV_DEBUG */ + cfile_name, + cline); +} +/******************************************************************//** +Performance schema instrumented wrap function for rw_lock_x_lock_func() +NOTE! Please use the corresponding macro rw_lock_x_lock(), not directly +this function! */ +UNIV_INLINE +void +pfs_rw_lock_x_lock_func( +/*====================*/ + rw_lock_t* lock, /*!< in: pointer to rw-lock */ + ulint pass, /*!< in: pass value; != 0, if the lock will + be passed to another thread to unlock */ + const char* file_name,/*!< in: file name where lock requested */ + ulint line) /*!< in: line where requested */ +{ + struct PSI_rwlock_locker* locker = NULL; + + /* Record the entry of rw x lock request in performance schema */ + if (UNIV_LIKELY(PSI_server && lock->pfs_psi)) { + locker = PSI_server->get_thread_rwlock_locker( + lock->pfs_psi, PSI_RWLOCK_WRITELOCK); + + if (locker) { + PSI_server->start_rwlock_wrwait(locker, + file_name, line); + } + } + + rw_lock_x_lock_func(lock, pass, file_name, line); + + if (locker) { + PSI_server->end_rwlock_wrwait(locker, 0); + } +} +/******************************************************************//** +Performance schema instrumented wrap function for +rw_lock_x_lock_func_nowait() +NOTE! Please use the corresponding macro rw_lock_x_lock_func(), +not directly this function! +@return TRUE if success */ +UNIV_INLINE +ibool +pfs_rw_lock_x_lock_func_nowait( +/*===========================*/ + rw_lock_t* lock, /*!< in: pointer to rw-lock */ + const char* file_name,/*!< in: file name where lock + requested */ + ulint line) /*!< in: line where requested */ +{ + struct PSI_rwlock_locker* locker = NULL; + ibool ret; + + /* Record the entry of rw x lock request in performance schema */ + if (UNIV_LIKELY(PSI_server && lock->pfs_psi)) { + locker = PSI_server->get_thread_rwlock_locker( + lock->pfs_psi, PSI_RWLOCK_WRITELOCK); + + if (locker) { + PSI_server->start_rwlock_wrwait(locker, + file_name, line); + } + } + + ret = rw_lock_x_lock_func_nowait(lock, file_name, line); + + if (locker) { + PSI_server->end_rwlock_wrwait(locker, 0); + } + + return(ret); +} +/******************************************************************//** +Performance schema instrumented wrap function for rw_lock_free_func() +NOTE! Please use the corresponding macro rw_lock_free(), not directly +this function! */ +UNIV_INLINE +void +pfs_rw_lock_free_func( +/*==================*/ + rw_lock_t* lock) /*!< in: pointer to rw-lock */ +{ + if (UNIV_LIKELY(PSI_server && lock->pfs_psi)) { + PSI_server->destroy_rwlock(lock->pfs_psi); + lock->pfs_psi = NULL; + } + + rw_lock_free_func(lock); +} +/******************************************************************//** +Performance schema instrumented wrap function for rw_lock_s_lock_func() +NOTE! Please use the corresponding macro rw_lock_s_lock(), not +directly this function! */ +UNIV_INLINE +void +pfs_rw_lock_s_lock_func( +/*====================*/ + rw_lock_t* lock, /*!< in: pointer to rw-lock */ + ulint pass, /*!< in: pass value; != 0, if the + lock will be passed to another + thread to unlock */ + const char* file_name,/*!< in: file name where lock + requested */ + ulint line) /*!< in: line where requested */ +{ + struct PSI_rwlock_locker* locker = NULL; + + /* Instrumented to inform we are aquiring a shared rwlock */ + if (UNIV_LIKELY(PSI_server && lock->pfs_psi)) { + locker = PSI_server->get_thread_rwlock_locker( + lock->pfs_psi, PSI_RWLOCK_READLOCK); + if (locker) { + PSI_server->start_rwlock_rdwait(locker, + file_name, line); + } + } + + rw_lock_s_lock_func(lock, pass, file_name, line); + + if (locker) { + PSI_server->end_rwlock_rdwait(locker, 0); + } +} +/******************************************************************//** +Performance schema instrumented wrap function for rw_lock_s_lock_func() +NOTE! Please use the corresponding macro rw_lock_s_lock(), not +directly this function! +@return TRUE if success */ +UNIV_INLINE +ibool +pfs_rw_lock_s_lock_low( +/*===================*/ + rw_lock_t* lock, /*!< in: pointer to rw-lock */ + ulint pass, /*!< in: pass value; != 0, if the + lock will be passed to another + thread to unlock */ + const char* file_name, /*!< in: file name where lock requested */ + ulint line) /*!< in: line where requested */ +{ + + struct PSI_rwlock_locker* locker = NULL; + ibool ret; + + /* Instrumented to inform we are aquiring a shared rwlock */ + if (UNIV_LIKELY(PSI_server && lock->pfs_psi)) { + locker = PSI_server->get_thread_rwlock_locker( + lock->pfs_psi, PSI_RWLOCK_READLOCK); + if (locker) { + PSI_server->start_rwlock_rdwait(locker, + file_name, line); + } + } + + ret = rw_lock_s_lock_low(lock, pass, file_name, line); + + if (locker) { + PSI_server->end_rwlock_rdwait(locker, 0); + } + + return(ret); +} + +/******************************************************************//** +Performance schema instrumented wrap function for rw_lock_x_unlock_func() +NOTE! Please use the corresponding macro rw_lock_x_unlock(), not directly +this function! */ +UNIV_INLINE +void +pfs_rw_lock_x_unlock_func( +/*======================*/ +#ifdef UNIV_SYNC_DEBUG + ulint pass, /*!< in: pass value; != 0, if the + lock may have been passed to another + thread to unlock */ +#endif + rw_lock_t* lock) /*!< in/out: rw-lock */ +{ + /* Inform performance schema we are unlocking the lock */ + if (UNIV_LIKELY(PSI_server && lock->pfs_psi)) { + struct PSI_thread* thread; + thread = PSI_server->get_thread(); + if (thread) { + PSI_server->unlock_rwlock(thread, lock->pfs_psi); + } + } + + rw_lock_x_unlock_func( +#ifdef UNIV_SYNC_DEBUG + pass, +#endif + lock); +} + +/******************************************************************//** +Performance schema instrumented wrap function for rw_lock_s_unlock_func() +NOTE! Please use the corresponding macro pfs_rw_lock_s_unlock(), not +directly this function! */ +UNIV_INLINE +void +pfs_rw_lock_s_unlock_func( +/*======================*/ +#ifdef UNIV_SYNC_DEBUG + ulint pass, /*!< in: pass value; != 0, if the + lock may have been passed to another + thread to unlock */ +#endif + rw_lock_t* lock) /*!< in/out: rw-lock */ +{ + /* Inform performance schema we are unlocking the lock */ + if (UNIV_LIKELY(PSI_server && lock->pfs_psi)) { + struct PSI_thread* thread; + thread = PSI_server->get_thread(); + if (thread) { + PSI_server->unlock_rwlock(thread, lock->pfs_psi); + } + } + + rw_lock_s_unlock_func( +#ifdef UNIV_SYNC_DEBUG + pass, +#endif + lock); + +} +#endif /* UNIV_PFS_RWLOCK */ diff --git a/storage/innobase/include/sync0sync.h b/storage/innobase/include/sync0sync.h index df990823cc4..69c0382d5b9 100644 --- a/storage/innobase/include/sync0sync.h +++ b/storage/innobase/include/sync0sync.h @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved. +Copyright (c) 1995, 2010, Innobase Oy. All Rights Reserved. Copyright (c) 2008, Google Inc. Portions of this file contain modifications contributed and copyrighted by @@ -52,6 +52,69 @@ typedef LONG lock_word_t; /*!< On Windows, InterlockedExchange operates typedef byte lock_word_t; #endif +#if defined UNIV_PFS_MUTEX || defined UNIV_PFS_RWLOCK +/* There are mutexes/rwlocks that we want to exclude from +instrumentation even if their corresponding performance schema +define is set. And this PFS_NOT_INSTRUMENTED is used +as the key value to dentify those objects that would +be excluded from instrumentation. */ +# define PFS_NOT_INSTRUMENTED ULINT32_UNDEFINED + +# define PFS_IS_INSTRUMENTED(key) ((key) != PFS_NOT_INSTRUMENTED) + +/* By default, buffer mutexes and rwlocks will be excluded from +instrumentation due to their large number of instances. */ +# define PFS_SKIP_BUFFER_MUTEX_RWLOCK + +#endif /* UNIV_PFS_MUTEX || UNIV_PFS_RWLOCK */ + +#ifdef UNIV_PFS_MUTEX +/* Key defines to register InnoDB mutexes with performance schema */ +extern mysql_pfs_key_t autoinc_mutex_key; +extern mysql_pfs_key_t btr_search_enabled_mutex_key; +extern mysql_pfs_key_t buffer_block_mutex_key; +extern mysql_pfs_key_t buf_pool_mutex_key; +extern mysql_pfs_key_t buf_pool_zip_mutex_key; +extern mysql_pfs_key_t cache_last_read_mutex_key; +extern mysql_pfs_key_t dict_foreign_err_mutex_key; +extern mysql_pfs_key_t dict_sys_mutex_key; +extern mysql_pfs_key_t file_format_max_mutex_key; +extern mysql_pfs_key_t fil_system_mutex_key; +extern mysql_pfs_key_t flush_list_mutex_key; +extern mysql_pfs_key_t flush_order_mutex_key; +extern mysql_pfs_key_t hash_table_mutex_key; +extern mysql_pfs_key_t ibuf_bitmap_mutex_key; +extern mysql_pfs_key_t ibuf_mutex_key; +extern mysql_pfs_key_t ibuf_pessimistic_insert_mutex_key; +extern mysql_pfs_key_t ios_mutex_key; +extern mysql_pfs_key_t log_sys_mutex_key; +extern mysql_pfs_key_t kernel_mutex_key; +# ifdef UNIV_MEM_DEBUG +extern mysql_pfs_key_t mem_hash_mutex_key; +# endif /* UNIV_MEM_DEBUG */ +extern mysql_pfs_key_t mem_pool_mutex_key; +extern mysql_pfs_key_t mutex_list_mutex_key; +extern mysql_pfs_key_t purge_sys_mutex_key; +extern mysql_pfs_key_t recv_sys_mutex_key; +extern mysql_pfs_key_t rseg_mutex_key; +# ifdef UNIV_SYNC_DEBUG +extern mysql_pfs_key_t rw_lock_debug_mutex_key; +# endif /* UNIV_SYNC_DEBUG */ +extern mysql_pfs_key_t rw_lock_list_mutex_key; +extern mysql_pfs_key_t rw_lock_mutex_key; +extern mysql_pfs_key_t srv_dict_tmpfile_mutex_key; +extern mysql_pfs_key_t srv_innodb_monitor_mutex_key; +extern mysql_pfs_key_t srv_misc_tmpfile_mutex_key; +extern mysql_pfs_key_t srv_monitor_file_mutex_key; +extern mysql_pfs_key_t syn_arr_mutex_key; +# ifdef UNIV_SYNC_DEBUG +extern mysql_pfs_key_t sync_thread_mutex_key; +# endif /* UNIV_SYNC_DEBUG */ +extern mysql_pfs_key_t trx_doublewrite_mutex_key; +extern mysql_pfs_key_t thr_local_mutex_key; +extern mysql_pfs_key_t trx_undo_mutex_key; +#endif /* UNIV_PFS_MUTEX */ + /******************************************************************//** Initializes the synchronization data structures. */ UNIV_INTERN @@ -64,24 +127,82 @@ UNIV_INTERN void sync_close(void); /*===========*/ + +#undef mutex_free /* Fix for MacOS X */ + +#ifdef UNIV_PFS_MUTEX +/********************************************************************** +Following mutex APIs would be performance schema instrumented +if "UNIV_PFS_MUTEX" is defined: + +mutex_create +mutex_enter +mutex_exit +mutex_enter_nowait +mutex_free + +These mutex APIs will point to corresponding wrapper functions that contain +the performance schema instrumentation if "UNIV_PFS_MUTEX" is defined. +The instrumented wrapper functions have the prefix of "innodb_". + +NOTE! The following macro should be used in mutex operation, not the +corresponding function. */ + /******************************************************************//** Creates, or rather, initializes a mutex object to a specified memory location (which must be appropriately aligned). The mutex is initialized in the reset state. Explicit freeing of the mutex with mutex_free is necessary only if the memory block containing it is freed. */ +# ifdef UNIV_DEBUG +# ifdef UNIV_SYNC_DEBUG +# define mutex_create(K, M, level) \ + pfs_mutex_create_func((K), (M), #M, (level), __FILE__, __LINE__) +# else +# define mutex_create(K, M, level) \ + pfs_mutex_create_func((K), (M), #M, __FILE__, __LINE__) +# endif/* UNIV_SYNC_DEBUG */ +# else +# define mutex_create(K, M, level) \ + pfs_mutex_create_func((K), (M), __FILE__, __LINE__) +# endif /* UNIV_DEBUG */ -#ifdef UNIV_DEBUG -# ifdef UNIV_SYNC_DEBUG -# define mutex_create(M, level) \ +# define mutex_enter(M) \ + pfs_mutex_enter_func((M), __FILE__, __LINE__) + +# define mutex_enter_nowait(M) \ + pfs_mutex_enter_nowait_func((M), __FILE__, __LINE__) + +# define mutex_exit(M) pfs_mutex_exit_func(M) + +# define mutex_free(M) pfs_mutex_free_func(M) + +#else /* UNIV_PFS_MUTEX */ + +/* If "UNIV_PFS_MUTEX" is not defined, the mutex APIs point to +original non-instrumented functions */ +# ifdef UNIV_DEBUG +# ifdef UNIV_SYNC_DEBUG +# define mutex_create(K, M, level) \ mutex_create_func((M), #M, (level), __FILE__, __LINE__) -# else -# define mutex_create(M, level) \ +# else /* UNIV_SYNC_DEBUG */ +# define mutex_create(K, M, level) \ mutex_create_func((M), #M, __FILE__, __LINE__) -# endif -#else -# define mutex_create(M, level) \ +# endif /* UNIV_SYNC_DEBUG */ +# else /* UNIV_DEBUG */ +# define mutex_create(K, M, level) \ mutex_create_func((M), __FILE__, __LINE__) -#endif +# endif /* UNIV_DEBUG */ + +# define mutex_enter(M) mutex_enter_func((M), __FILE__, __LINE__) + +# define mutex_enter_nowait(M) \ + mutex_enter_nowait_func((M), __FILE__, __LINE__) + +# define mutex_exit(M) mutex_exit_func(M) + +# define mutex_free(M) mutex_free_func(M) + +#endif /* UNIV_PFS_MUTEX */ /******************************************************************//** Creates, or rather, initializes a mutex object in a specified memory @@ -102,26 +223,20 @@ mutex_create_func( const char* cfile_name, /*!< in: file name where created */ ulint cline); /*!< in: file line where created */ -#undef mutex_free /* Fix for MacOS X */ - /******************************************************************//** +NOTE! Use the corresponding macro mutex_free(), not directly this function! Calling this function is obligatory only if the memory buffer containing the mutex is freed. Removes a mutex object from the mutex list. The mutex is checked to be in the reset state. */ UNIV_INTERN void -mutex_free( -/*=======*/ +mutex_free_func( +/*============*/ mutex_t* mutex); /*!< in: mutex */ /**************************************************************//** NOTE! The following macro should be used in mutex locking, not the corresponding function. */ -#define mutex_enter(M) mutex_enter_func((M), __FILE__, __LINE__) -/**************************************************************//** -NOTE! The following macro should be used in mutex locking, not the -corresponding function. */ - /* NOTE! currently same as mutex_enter! */ #define mutex_enter_fast(M) mutex_enter_func((M), __FILE__, __LINE__) @@ -137,12 +252,6 @@ mutex_enter_func( mutex_t* mutex, /*!< in: pointer to mutex */ const char* file_name, /*!< in: file name where locked */ ulint line); /*!< in: line where locked */ -/**************************************************************//** -NOTE! The following macro should be used in mutex locking, not the -corresponding function. */ - -#define mutex_enter_nowait(M) \ - mutex_enter_nowait_func((M), __FILE__, __LINE__) /********************************************************************//** NOTE! Use the corresponding macro in the header file, not this function directly. Tries to lock the mutex for the current thread. If the lock is not @@ -157,12 +266,86 @@ mutex_enter_nowait_func( requested */ ulint line); /*!< in: line where requested */ /******************************************************************//** +NOTE! Use the corresponding macro mutex_exit(), not directly this function! Unlocks a mutex owned by the current thread. */ UNIV_INLINE void -mutex_exit( -/*=======*/ +mutex_exit_func( +/*============*/ mutex_t* mutex); /*!< in: pointer to mutex */ + + +#ifdef UNIV_PFS_MUTEX +/******************************************************************//** +NOTE! Please use the corresponding macro mutex_create(), not directly +this function! +A wrapper function for mutex_create_func(), registers the mutex +with peformance schema if "UNIV_PFS_MUTEX" is defined when +creating the mutex */ +UNIV_INLINE +void +pfs_mutex_create_func( +/*==================*/ + PSI_mutex_key key, /*!< in: Performance Schema key */ + mutex_t* mutex, /*!< in: pointer to memory */ +# ifdef UNIV_DEBUG + const char* cmutex_name, /*!< in: mutex name */ +# ifdef UNIV_SYNC_DEBUG + ulint level, /*!< in: level */ +# endif /* UNIV_SYNC_DEBUG */ +# endif /* UNIV_DEBUG */ + const char* cfile_name, /*!< in: file name where created */ + ulint cline); /*!< in: file line where created */ +/******************************************************************//** +NOTE! Please use the corresponding macro mutex_enter(), not directly +this function! +This is a performance schema instrumented wrapper function for +mutex_enter_func(). */ +UNIV_INLINE +void +pfs_mutex_enter_func( +/*=================*/ + mutex_t* mutex, /*!< in: pointer to mutex */ + const char* file_name, /*!< in: file name where locked */ + ulint line); /*!< in: line where locked */ +/********************************************************************//** +NOTE! Please use the corresponding macro mutex_enter_nowait(), not directly +this function! +This is a performance schema instrumented wrapper function for +mutex_enter_nowait_func. +@return 0 if succeed, 1 if not */ +UNIV_INLINE +ulint +pfs_mutex_enter_nowait_func( +/*========================*/ + mutex_t* mutex, /*!< in: pointer to mutex */ + const char* file_name, /*!< in: file name where mutex + requested */ + ulint line); /*!< in: line where requested */ +/******************************************************************//** +NOTE! Please use the corresponding macro mutex_exit(), not directly +this function! +A wrap function of mutex_exit_func() with peformance schema instrumentation. +Unlocks a mutex owned by the current thread. */ +UNIV_INLINE +void +pfs_mutex_exit_func( +/*================*/ + mutex_t* mutex); /*!< in: pointer to mutex */ + +/******************************************************************//** +NOTE! Please use the corresponding macro mutex_free(), not directly +this function! +Wrapper function for mutex_free_func(). Also destroys the performance +schema probes when freeing the mutex */ +UNIV_INLINE +void +pfs_mutex_free_func( +/*================*/ + mutex_t* mutex); /*!< in: mutex */ + +#endif /* UNIV_PFS_MUTEX */ + #ifdef UNIV_SYNC_DEBUG /******************************************************************//** Returns TRUE if no mutex or rw-lock is currently locked. @@ -206,7 +389,8 @@ UNIV_INTERN ibool mutex_own( /*======*/ - const mutex_t* mutex); /*!< in: mutex */ + const mutex_t* mutex) /*!< in: mutex */ + __attribute__((warn_unused_result)); #endif /* UNIV_DEBUG */ #ifdef UNIV_SYNC_DEBUG /******************************************************************//** @@ -238,16 +422,27 @@ ibool sync_thread_levels_empty(void); /*==========================*/ /******************************************************************//** -Checks that the level array for the current thread is empty. -@return TRUE if empty except the exceptions specified below */ +Checks if the level array for the current thread contains a +mutex or rw-latch at the specified level. +@return a matching latch, or NULL if not found */ UNIV_INTERN -ibool -sync_thread_levels_empty_gen( -/*=========================*/ +void* +sync_thread_levels_contains( +/*========================*/ + ulint level); /*!< in: latching order level + (SYNC_DICT, ...)*/ +/******************************************************************//** +Checks if the level array for the current thread is empty. +@return a latch, or NULL if empty except the exceptions specified below */ +UNIV_INTERN +void* +sync_thread_levels_nonempty_gen( +/*============================*/ ibool dict_mutex_allowed); /*!< in: TRUE if dictionary mutex is allowed to be owned by the thread, also purge_is_running mutex is allowed */ +#define sync_thread_levels_empty_gen(d) (!sync_thread_levels_nonempty_gen(d)) /******************************************************************//** Gets the debug information for a reserved mutex. */ UNIV_INTERN @@ -475,8 +670,10 @@ or row lock! */ SYNC_SEARCH_SYS, as memory allocation can call routines there! Otherwise the level is SYNC_MEM_HASH. */ -#define SYNC_BUF_POOL 150 -#define SYNC_BUF_BLOCK 149 +#define SYNC_BUF_POOL 150 /* Buffer pool mutex */ +#define SYNC_BUF_FLUSH_ORDER 147 +#define SYNC_BUF_BLOCK 146 /* Block mutex */ +#define SYNC_BUF_FLUSH_LIST 145 /* Buffer flush list mutex */ #define SYNC_DOUBLEWRITE 140 #define SYNC_ANY_LATCH 135 #define SYNC_THR_LOCAL 133 @@ -538,6 +735,10 @@ struct mutex_struct { const char* cmutex_name; /*!< mutex name */ ulint mutex_type; /*!< 0=usual mutex, 1=rw_lock mutex */ #endif /* UNIV_DEBUG */ +#ifdef UNIV_PFS_MUTEX + struct PSI_mutex* pfs_psi; /*!< The performance schema + instrumentation hook */ +#endif }; /** The global array of wait cells for implementation of the databases own diff --git a/storage/innobase/include/sync0sync.ic b/storage/innobase/include/sync0sync.ic index b05020b5660..fdd70ad052f 100644 --- a/storage/innobase/include/sync0sync.ic +++ b/storage/innobase/include/sync0sync.ic @@ -152,11 +152,12 @@ mutex_get_waiters( } /******************************************************************//** +NOTE! Use the corresponding macro mutex_exit(), not directly this function! Unlocks a mutex owned by the current thread. */ UNIV_INLINE void -mutex_exit( -/*=======*/ +mutex_exit_func( +/*============*/ mutex_t* mutex) /*!< in: pointer to mutex */ { ut_ad(mutex_own(mutex)); @@ -220,3 +221,148 @@ mutex_enter_func( mutex_spin_wait(mutex, file_name, line); } + +#ifdef UNIV_PFS_MUTEX +/******************************************************************//** +NOTE! Please use the corresponding macro mutex_enter(), not directly +this function! +This is a performance schema instrumented wrapper function for +mutex_enter_func(). */ +UNIV_INLINE +void +pfs_mutex_enter_func( +/*=================*/ + mutex_t* mutex, /*!< in: pointer to mutex */ + const char* file_name, /*!< in: file name where locked */ + ulint line) /*!< in: line where locked */ +{ + struct PSI_mutex_locker* locker = NULL; + int result = 0; + + if (UNIV_LIKELY(PSI_server && mutex->pfs_psi)) { + locker = PSI_server->get_thread_mutex_locker( + mutex->pfs_psi, PSI_MUTEX_LOCK); + if (locker) { + PSI_server->start_mutex_wait(locker, file_name, line); + } + } + + mutex_enter_func(mutex, file_name, line); + + if (locker) { + PSI_server->end_mutex_wait(locker, result); + } +} +/********************************************************************//** +NOTE! Please use the corresponding macro mutex_enter_nowait(), not directly +this function! +This is a performance schema instrumented wrapper function for +mutex_enter_nowait_func. +@return 0 if succeed, 1 if not */ +UNIV_INLINE +ulint +pfs_mutex_enter_nowait_func( +/*========================*/ + mutex_t* mutex, /*!< in: pointer to mutex */ + const char* file_name, /*!< in: file name where mutex + requested */ + ulint line) /*!< in: line where requested */ +{ + ulint ret; + struct PSI_mutex_locker* locker = NULL; + int result = 0; + + if (UNIV_LIKELY(PSI_server && mutex->pfs_psi)) { + locker = PSI_server->get_thread_mutex_locker( + mutex->pfs_psi, PSI_MUTEX_LOCK); + if (locker) { + PSI_server->start_mutex_wait(locker, file_name, line); + } + } + + ret = mutex_enter_nowait_func(mutex, file_name, line); + + if (locker) { + PSI_server->end_mutex_wait(locker, result); + } + + return(ret); +} +/******************************************************************//** +NOTE! Please use the corresponding macro mutex_exit(), not directly +this function! +A wrap function of mutex_exit_func() with performance schema instrumentation. +Unlocks a mutex owned by the current thread. */ +UNIV_INLINE +void +pfs_mutex_exit_func( +/*================*/ + mutex_t* mutex) /*!< in: pointer to mutex */ +{ + if (UNIV_LIKELY(PSI_server && mutex->pfs_psi)) { + struct PSI_thread* thread; + thread = PSI_server->get_thread(); + + if (thread) { + PSI_server->unlock_mutex(thread, mutex->pfs_psi); + } + } + + mutex_exit_func(mutex); +} + +/******************************************************************//** +NOTE! Please use the corresponding macro mutex_create(), not directly +this function! +A wrapper function for mutex_create_func(), registers the mutex +with performance schema if "UNIV_PFS_MUTEX" is defined when +creating the mutex */ +UNIV_INLINE +void +pfs_mutex_create_func( +/*==================*/ + mysql_pfs_key_t key, /*!< in: Performance Schema key */ + mutex_t* mutex, /*!< in: pointer to memory */ +# ifdef UNIV_DEBUG + const char* cmutex_name, /*!< in: mutex name */ +# ifdef UNIV_SYNC_DEBUG + ulint level, /*!< in: level */ +# endif /* UNIV_SYNC_DEBUG */ +# endif /* UNIV_DEBUG */ + const char* cfile_name, /*!< in: file name where created */ + ulint cline) /*!< in: file line where created */ +{ + mutex->pfs_psi = (PSI_server && PFS_IS_INSTRUMENTED(key)) + ? PSI_server->init_mutex(key, mutex) + : NULL; + + mutex_create_func(mutex, +# ifdef UNIV_DEBUG + cmutex_name, +# ifdef UNIV_SYNC_DEBUG + level, +# endif /* UNIV_SYNC_DEBUG */ +# endif /* UNIV_DEBUG */ + cfile_name, + cline); +} +/******************************************************************//** +NOTE! Please use the corresponding macro mutex_free(), not directly +this function! +Wrapper function for mutex_free_func(). Also destroys the performance +schema probes when freeing the mutex */ +UNIV_INLINE +void +pfs_mutex_free_func( +/*===================*/ + mutex_t* mutex) /*!< in: mutex */ +{ + if (UNIV_LIKELY(PSI_server && mutex->pfs_psi)) { + PSI_server->destroy_mutex(mutex->pfs_psi); + mutex->pfs_psi= NULL; + } + + mutex_free_func(mutex); +} + +#endif /* UNIV_PFS_MUTEX */ diff --git a/storage/innobase/include/trx0purge.h b/storage/innobase/include/trx0purge.h index 908760580f6..d2730a68a78 100644 --- a/storage/innobase/include/trx0purge.h +++ b/storage/innobase/include/trx0purge.h @@ -112,8 +112,10 @@ This function runs a purge batch. @return number of undo log pages handled in the batch */ UNIV_INTERN ulint -trx_purge(void); -/*===========*/ +trx_purge( +/*======*/ + ulint limit); /*!< in: the maximum number of records to + purge in one batch */ /******************************************************************//** Prints information of the purge system to stderr. */ UNIV_INTERN diff --git a/storage/innobase/include/trx0rseg.h b/storage/innobase/include/trx0rseg.h index ba1fc88b6c4..78a7a8c4bb0 100644 --- a/storage/innobase/include/trx0rseg.h +++ b/storage/innobase/include/trx0rseg.h @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved. +Copyright (c) 1996, 2010, Innobase Oy. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -103,7 +103,7 @@ trx_rseg_header_create( ulint zip_size, /*!< in: compressed page size in bytes or 0 for uncompressed pages */ ulint max_size, /*!< in: max size in pages */ - ulint* slot_no, /*!< out: rseg id == slot number in trx sys */ + ulint rseg_slot_no, /*!< in: rseg id == slot number in trx sys */ mtr_t* mtr); /*!< in: mtr */ /*********************************************************************//** Creates the memory copies for rollback segments and initializes the @@ -114,17 +114,6 @@ trx_rseg_list_and_array_init( /*=========================*/ trx_sysf_t* sys_header, /*!< in: trx system header */ mtr_t* mtr); /*!< in: mtr */ -/****************************************************************//** -Creates a new rollback segment to the database. -@return the created segment object, NULL if fail */ -UNIV_INTERN -trx_rseg_t* -trx_rseg_create( -/*============*/ - ulint space, /*!< in: space id */ - ulint max_size, /*!< in: max size in pages */ - ulint* id, /*!< out: rseg id */ - mtr_t* mtr); /*!< in: mtr */ /*************************************************************************** Free's an instance of the rollback segment in memory. */ UNIV_INTERN @@ -133,6 +122,12 @@ trx_rseg_mem_free( /*==============*/ trx_rseg_t* rseg); /* in, own: instance to free */ +/********************************************************************* +Creates a rollback segment. */ +UNIV_INTERN +trx_rseg_t* +trx_rseg_create(void); +/*==================*/ /* Number of undo log slots in a rollback segment file copy */ #define TRX_RSEG_N_SLOTS (UNIV_PAGE_SIZE / 16) diff --git a/storage/innobase/include/trx0sys.h b/storage/innobase/include/trx0sys.h index a53296a06d9..fc92b4317d5 100644 --- a/storage/innobase/include/trx0sys.h +++ b/storage/innobase/include/trx0sys.h @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved. +Copyright (c) 1996, 2010, Innobase Oy. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -333,12 +333,14 @@ UNIV_INTERN void trx_sys_file_format_tag_init(void); /*==============================*/ +#ifndef UNIV_HOTBACKUP /*****************************************************************//** Shutdown/Close the transaction system. */ UNIV_INTERN void trx_sys_close(void); /*===============*/ +#endif /* !UNIV_HOTBACKUP */ /*****************************************************************//** Get the name representation of the file format from its id. @return pointer to the name */ @@ -429,6 +431,14 @@ trx_sys_file_format_id_to_name( const ulint id); /*!< in: id of the file format */ #endif /* !UNIV_HOTBACKUP */ +/********************************************************************* +Creates the rollback segments */ +UNIV_INTERN +void +trx_sys_create_rsegs( +/*=================*/ + ulint n_rsegs); /*!< number of rollback segments to create */ + /* The automatically created system rollback segment has this id */ #define TRX_SYS_SYSTEM_RSEG_ID 0 @@ -463,11 +473,16 @@ trx_sys_file_format_id_to_name( slots */ /*------------------------------------------------------------- @} */ -/** Maximum number of rollback segments: the number of segment -specification slots in the transaction system array; rollback segment -id must fit in one byte, therefore 256; each slot is currently 8 bytes -in size */ -#define TRX_SYS_N_RSEGS 256 +/* Max number of rollback segments: the number of segment specification slots +in the transaction system array; rollback segment id must fit in one (signed) +byte, therefore 128; each slot is currently 8 bytes in size. If you want +to raise the level to 256 then you will need to fix some assertions that +impose the 7 bit restriction. e.g., mach_write_to_3() */ +#define TRX_SYS_N_RSEGS 128 +/* Originally, InnoDB defined TRX_SYS_N_RSEGS as 256 but created only one +rollback segment. It initialized some arrays with this number of entries. +We must remember this limit in order to keep file compatibility. */ +#define TRX_SYS_OLD_N_RSEGS 256 /** Maximum length of MySQL binlog file name, in bytes. @see trx_sys_mysql_master_log_name @@ -495,7 +510,6 @@ this contains the same fields as TRX_SYS_MYSQL_LOG_INFO below */ within that file */ #define TRX_SYS_MYSQL_LOG_NAME 12 /*!< MySQL log file name */ -#ifndef UNIV_HOTBACKUP /** Doublewrite buffer */ /* @{ */ /** The offset of the doublewrite buffer header on the trx system header page */ @@ -547,6 +561,7 @@ FIL_PAGE_ARCH_LOG_NO_OR_SPACE_NO. */ #define TRX_SYS_DOUBLEWRITE_BLOCK_SIZE FSP_EXTENT_SIZE /* @} */ +#ifndef UNIV_HOTBACKUP /** File format tag */ /* @{ */ /** The offset of the file format tag on the trx system header page diff --git a/storage/innobase/include/trx0trx.h b/storage/innobase/include/trx0trx.h index 5f2c1246f37..480f265a138 100644 --- a/storage/innobase/include/trx0trx.h +++ b/storage/innobase/include/trx0trx.h @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved. +Copyright (c) 1996, 2010, Innobase Oy. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -349,7 +349,7 @@ trx_print( use the default max length */ /** Type of data dictionary operation */ -enum trx_dict_op { +typedef enum trx_dict_op { /** The transaction is not modifying the data dictionary. */ TRX_DICT_OP_NONE = 0, /** The transaction is creating a table or an index, or @@ -361,7 +361,7 @@ enum trx_dict_op { existing table. In crash recovery, the data dictionary must be locked, but the table must not be dropped. */ TRX_DICT_OP_INDEX = 2 -}; +} trx_dict_op_t; /**********************************************************************//** Determine if a transaction is a dictionary operation. @@ -463,69 +463,79 @@ rolling back after a database recovery */ struct trx_struct{ ulint magic_n; - /* All the next fields are protected by the kernel mutex, except the - undo logs which are protected by undo_mutex */ + + /* These fields are not protected by any mutex. */ const char* op_info; /*!< English text describing the current operation, or an empty string */ - unsigned is_purge:1; /*!< 0=user transaction, 1=purge */ - unsigned is_recovered:1; /*!< 0=normal transaction, - 1=recovered, must be rolled back */ - unsigned conc_state:2; /*!< state of the trx from the point + ulint conc_state; /*!< state of the trx from the point of view of concurrency control: TRX_ACTIVE, TRX_COMMITTED_IN_MEMORY, ... */ - unsigned que_state:2; /*!< valid when conc_state == TRX_ACTIVE: - TRX_QUE_RUNNING, TRX_QUE_LOCK_WAIT, - ... */ - unsigned isolation_level:2;/* TRX_ISO_REPEATABLE_READ, ... */ - unsigned check_foreigns:1;/* normally TRUE, but if the user + ulint isolation_level;/* TRX_ISO_REPEATABLE_READ, ... */ + ulint check_foreigns; /* normally TRUE, but if the user wants to suppress foreign key checks, (in table imports, for example) we set this FALSE */ - unsigned check_unique_secondary:1; + ulint check_unique_secondary; /* normally TRUE, but if the user wants to speed up inserts by suppressing unique key checks for secondary indexes when we decide if we can use the insert buffer for them, we set this FALSE */ - unsigned support_xa:1; /*!< normally we do the XA two-phase + ulint support_xa; /*!< normally we do the XA two-phase commit steps, but by setting this to FALSE, one can save CPU time and about 150 bytes in the undo log size as then we skip XA steps */ - unsigned flush_log_later:1;/* In 2PC, we hold the + ulint flush_log_later;/* In 2PC, we hold the prepare_commit mutex across both phases. In that case, we defer flush of the logs to disk until after we release the mutex. */ - unsigned must_flush_log_later:1;/* this flag is set to TRUE in + ulint must_flush_log_later;/* this flag is set to TRUE in trx_commit_off_kernel() if flush_log_later was TRUE, and there were modifications by the transaction; in that case we must flush the log in trx_commit_complete_for_mysql() */ - unsigned dict_operation:2;/**< @see enum trx_dict_op */ - unsigned duplicates:2; /*!< TRX_DUP_IGNORE | TRX_DUP_REPLACE */ - unsigned active_trans:2; /*!< 1 - if a transaction in MySQL + ulint duplicates; /*!< TRX_DUP_IGNORE | TRX_DUP_REPLACE */ + ulint active_trans; /*!< 1 - if a transaction in MySQL is active. 2 - if prepare_commit_mutex was taken */ - unsigned has_search_latch:1; + ulint has_search_latch; /* TRUE if this trx has latched the search system latch in S-mode */ - unsigned declared_to_be_inside_innodb:1; + ulint deadlock_mark; /*!< a mark field used in deadlock + checking algorithm. */ + trx_dict_op_t dict_operation; /**< @see enum trx_dict_op */ + + /* Fields protected by the srv_conc_mutex. */ + ulint declared_to_be_inside_innodb; /* this is TRUE if we have declared this transaction in srv_conc_enter_innodb to be inside the InnoDB engine */ - unsigned handling_signals:1;/* this is TRUE as long as the trx - is handling signals */ - unsigned dict_operation_lock_mode:2; - /* 0, RW_S_LATCH, or RW_X_LATCH: + + /* Fields protected by dict_operation_lock. The very latch + it is used to track. */ + ulint dict_operation_lock_mode; + /*!< 0, RW_S_LATCH, or RW_X_LATCH: the latch mode trx currently holds on dict_operation_lock */ + + /* All the next fields are protected by the kernel mutex, except the + undo logs which are protected by undo_mutex */ + ulint is_purge; /*!< 0=user transaction, 1=purge */ + ulint is_recovered; /*!< 0=normal transaction, + 1=recovered, must be rolled back */ + ulint que_state; /*!< valid when conc_state + == TRX_ACTIVE: TRX_QUE_RUNNING, + TRX_QUE_LOCK_WAIT, ... */ + ulint handling_signals;/* this is TRUE as long as the trx + is handling signals */ time_t start_time; /*!< time the trx object was created or the state last time became TRX_ACTIVE */ @@ -640,11 +650,6 @@ struct trx_struct{ wait_thrs; /*!< query threads belonging to this trx that are in the QUE_THR_LOCK_WAIT state */ - ulint deadlock_mark; /*!< a mark field used in deadlock - checking algorithm. This must be - in its own machine word, because - it can be changed by other - threads while holding kernel_mutex. */ /*------------------------------*/ mem_heap_t* lock_heap; /*!< memory heap for the locks of the transaction */ diff --git a/storage/innobase/include/trx0types.h b/storage/innobase/include/trx0types.h index 24cf57d53d5..40a7256cbfd 100644 --- a/storage/innobase/include/trx0types.h +++ b/storage/innobase/include/trx0types.h @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved. +Copyright (c) 1996, 2010, Innobase Oy. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -70,6 +70,13 @@ typedef struct trx_named_savept_struct trx_named_savept_t; enum trx_rb_ctx { RB_NONE = 0, /*!< no rollback */ RB_NORMAL, /*!< normal rollback */ + RB_RECOVERY_PURGE_REC, + /*!< rolling back an incomplete transaction, + in crash recovery, rolling back an + INSERT that was performed by updating a + delete-marked record; if the delete-marked record + no longer exists in an active read view, it will + be purged */ RB_RECOVERY /*!< rolling back an incomplete transaction, in crash recovery */ }; diff --git a/storage/innobase/include/trx0undo.ic b/storage/innobase/include/trx0undo.ic index 2d289b34ef1..6502ee826e5 100644 --- a/storage/innobase/include/trx0undo.ic +++ b/storage/innobase/include/trx0undo.ic @@ -42,7 +42,7 @@ trx_undo_build_roll_ptr( #if DATA_ROLL_PTR_LEN != 7 # error "DATA_ROLL_PTR_LEN != 7" #endif - ut_ad(rseg_id < 128); + ut_ad(rseg_id < TRX_SYS_N_RSEGS); return(ut_dulint_create(is_insert * 128 * 256 * 256 + rseg_id * 256 * 256 diff --git a/storage/innobase/include/univ.i b/storage/innobase/include/univ.i index 2081e136590..ea0ad4e790c 100644 --- a/storage/innobase/include/univ.i +++ b/storage/innobase/include/univ.i @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved. +Copyright (c) 1994, 2010, Innobase Oy. All Rights Reserved. Copyright (c) 2008, Google Inc. Copyright (c) 2009, Sun Microsystems, Inc. @@ -45,8 +45,8 @@ Created 1/20/1994 Heikki Tuuri #endif /* UNIV_HOTBACKUP */ #define INNODB_VERSION_MAJOR 1 -#define INNODB_VERSION_MINOR 0 -#define INNODB_VERSION_BUGFIX 6 +#define INNODB_VERSION_MINOR 1 +#define INNODB_VERSION_BUGFIX 0 /* The following is the InnoDB version as shown in SELECT plugin_version FROM information_schema.plugins; @@ -144,6 +144,23 @@ Sun Studio */ #endif /* #if (defined(WIN32) || ... */ +/* Following defines are to enable performance schema +instrumentation in each of four InnoDB modules if +HAVE_PSI_INTERFACE is defined. */ +#ifdef HAVE_PSI_INTERFACE +# define UNIV_PFS_MUTEX +# define UNIV_PFS_RWLOCK +/* For I/O instrumentation, performance schema rely +on a native descriptor to identify the file, this +descriptor could conflict with our OS level descriptor. +Disable IO instrumentation on Windows until this is +resolved */ +# ifndef __WIN__ +# define UNIV_PFS_IO +# endif +# define UNIV_PFS_THREAD +#endif /* HAVE_PSI_INTERFACE */ + /* DEBUG VERSION CONTROL ===================== */ @@ -208,6 +225,9 @@ operations (very slow); also UNIV_DEBUG must be defined */ for compressed pages */ #define UNIV_ZIP_COPY /* call page_zip_copy_recs() more often */ +#define UNIV_AIO_DEBUG /* prints info about + submitted and reaped AIO + requests to the log. */ #endif #define UNIV_BTR_DEBUG /* check B-tree links */ @@ -229,11 +249,6 @@ by one. */ /* the above option prevents forcing of log to disk at a buffer page write: it should be tested with this option off; also some ibuf tests are suppressed */ -/* -#define UNIV_BASIC_LOG_DEBUG -*/ - /* the above option enables basic recovery debugging: - new allocated file pages are reset */ /* Linkage specifier for non-static InnoDB symbols (variables and functions) that are only referenced from within InnoDB, not from MySQL */ diff --git a/storage/innobase/include/ut0lst.h b/storage/innobase/include/ut0lst.h index 261d33963dc..bb295ea1b22 100644 --- a/storage/innobase/include/ut0lst.h +++ b/storage/innobase/include/ut0lst.h @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved. +Copyright (c) 1995, 2010, Innobase Oy. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -110,7 +110,7 @@ Adds the node as the last element in a two-way linked list. */ #define UT_LIST_ADD_LAST(NAME, BASE, N)\ {\ - ut_ad(N);\ + ut_ad(N != NULL);\ ((BASE).count)++;\ ((N)->NAME).prev = (BASE).end;\ ((N)->NAME).next = NULL;\ diff --git a/storage/innobase/include/ut0rbt.h b/storage/innobase/include/ut0rbt.h new file mode 100644 index 00000000000..a35807be442 --- /dev/null +++ b/storage/innobase/include/ut0rbt.h @@ -0,0 +1,293 @@ +/****************************************************** +Red-Black tree implementation. +(c) 2007 Oracle/Innobase Oy + +Created 2007-03-20 Sunny Bains +*******************************************************/ + +#ifndef INNOBASE_UT0RBT_H +#define INNOBASE_UT0RBT_H + +#if !defined(IB_RBT_TESTING) +#include "univ.i" +#include "ut0mem.h" +#else +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <assert.h> + +#define ut_malloc malloc +#define ut_free free +#define ulint unsigned long +#define ut_a(c) assert(c) +#define ut_error assert(0) +#define ibool unsigned int +#define TRUE 1 +#define FALSE 0 +#endif + +/* Red black tree typedefs */ +typedef struct ib_rbt_struct ib_rbt_t; +typedef struct ib_rbt_node_struct ib_rbt_node_t; +// FIXME: Iterator is a better name than _bound_ +typedef struct ib_rbt_bound_struct ib_rbt_bound_t; +typedef void (*ib_rbt_print_node)(const ib_rbt_node_t* node); +typedef int (*ib_rbt_compare)(const void* p1, const void* p2); + +/* Red black tree color types */ +enum ib_rbt_color_enum { + IB_RBT_RED, + IB_RBT_BLACK +}; + +typedef enum ib_rbt_color_enum ib_rbt_color_t; + +/* Red black tree node */ +struct ib_rbt_node_struct { + ib_rbt_color_t color; /* color of this node */ + + ib_rbt_node_t* left; /* points left child */ + ib_rbt_node_t* right; /* points right child */ + ib_rbt_node_t* parent; /* points parent node */ + + char value[1]; /* Data value */ +}; + +/* Red black tree instance.*/ +struct ib_rbt_struct { + ib_rbt_node_t* nil; /* Black colored node that is + used as a sentinel. This is + pre-allocated too.*/ + + ib_rbt_node_t* root; /* Root of the tree, this is + pre-allocated and the first + data node is the left child.*/ + + ulint n_nodes; /* Total number of data nodes */ + + ib_rbt_compare compare; /* Fn. to use for comparison */ + ulint sizeof_value; /* Sizeof the item in bytes */ +}; + +/* The result of searching for a key in the tree, this is useful for +a speedy lookup and insert if key doesn't exist.*/ +struct ib_rbt_bound_struct { + const ib_rbt_node_t* + last; /* Last node visited */ + + int result; /* Result of comparing with + the last non-nil node that + was visited */ +}; + +/* Size in elements (t is an rb tree instance) */ +#define rbt_size(t) (t->n_nodes) + +/* Check whether the rb tree is empty (t is an rb tree instance) */ +#define rbt_empty(t) (rbt_size(t) == 0) + +/* Get data value (t is the data type, n is an rb tree node instance) */ +#define rbt_value(t, n) ((t*) &n->value[0]) + +/* Compare a key with the node value (t is tree, k is key, n is node)*/ +#define rbt_compare(t, k, n) (t->compare(k, n->value)) + +/************************************************************************ +Free an instance of a red black tree */ +UNIV_INTERN +void +rbt_free( +/*=====*/ + ib_rbt_t* tree); /*!< in: rb tree to free */ +/************************************************************************ +Create an instance of a red black tree +@return rb tree instance */ +UNIV_INTERN +ib_rbt_t* +rbt_create( +/*=======*/ + size_t sizeof_value, /*!< in: size in bytes */ + ib_rbt_compare compare); /*!< in: comparator */ +/************************************************************************ +Delete a node from the red black tree, identified by key */ +UNIV_INTERN +ibool +rbt_delete( +/*=======*/ + /* in: TRUE on success */ + ib_rbt_t* tree, /* in: rb tree */ + const void* key); /* in: key to delete */ +/************************************************************************ +Remove a node from the red black tree, NOTE: This function will not delete +the node instance, THAT IS THE CALLERS RESPONSIBILITY. +@return the deleted node with the const. */ +UNIV_INTERN +ib_rbt_node_t* +rbt_remove_node( +/*============*/ + ib_rbt_t* tree, /*!< in: rb tree */ + const ib_rbt_node_t* + node); /*!< in: node to delete, this + is a fudge and declared const + because the caller has access + only to const nodes.*/ +/************************************************************************ +Return a node from the red black tree, identified by +key, NULL if not found +@return node if found else return NULL */ +UNIV_INTERN +const ib_rbt_node_t* +rbt_lookup( +/*=======*/ + const ib_rbt_t* tree, /*!< in: rb tree to search */ + const void* key); /*!< in: key to lookup */ +/************************************************************************ +Add data to the red black tree, identified by key (no dups yet!) +@return inserted node */ +UNIV_INTERN +const ib_rbt_node_t* +rbt_insert( +/*=======*/ + ib_rbt_t* tree, /*!< in: rb tree */ + const void* key, /*!< in: key for ordering */ + const void* value); /*!< in: data that will be + copied to the node.*/ +/************************************************************************ +Add a new node to the tree, useful for data that is pre-sorted. +@return appended node */ +UNIV_INTERN +const ib_rbt_node_t* +rbt_add_node( +/*=========*/ + ib_rbt_t* tree, /*!< in: rb tree */ + ib_rbt_bound_t* parent, /*!< in: parent */ + const void* value); /*!< in: this value is copied + to the node */ +/************************************************************************ +Return the left most data node in the tree +@return left most node */ +UNIV_INTERN +const ib_rbt_node_t* +rbt_first( +/*======*/ + const ib_rbt_t* tree); /*!< in: rb tree */ +/************************************************************************ +Return the right most data node in the tree +@return right most node */ +UNIV_INTERN +const ib_rbt_node_t* +rbt_last( +/*=====*/ + const ib_rbt_t* tree); /*!< in: rb tree */ +/************************************************************************ +Return the next node from current. +@return successor node to current that is passed in. */ +UNIV_INTERN +const ib_rbt_node_t* +rbt_next( +/*=====*/ + const ib_rbt_t* tree, /*!< in: rb tree */ + const ib_rbt_node_t* /* in: current node */ + current); +/************************************************************************ +Return the prev node from current. +@return precedessor node to current that is passed in */ +UNIV_INTERN +const ib_rbt_node_t* +rbt_prev( +/*=====*/ + const ib_rbt_t* tree, /*!< in: rb tree */ + const ib_rbt_node_t* /* in: current node */ + current); +/************************************************************************ +Find the node that has the lowest key that is >= key. +@return node that satisfies the lower bound constraint or NULL */ +UNIV_INTERN +const ib_rbt_node_t* +rbt_lower_bound( +/*============*/ + const ib_rbt_t* tree, /*!< in: rb tree */ + const void* key); /*!< in: key to search */ +/************************************************************************ +Find the node that has the greatest key that is <= key. +@return node that satisifies the upper bound constraint or NULL */ +UNIV_INTERN +const ib_rbt_node_t* +rbt_upper_bound( +/*============*/ + const ib_rbt_t* tree, /*!< in: rb tree */ + const void* key); /*!< in: key to search */ +/************************************************************************ +Search for the key, a node will be retuned in parent.last, whether it +was found or not. If not found then parent.last will contain the +parent node for the possibly new key otherwise the matching node. +@return result of last comparison */ +UNIV_INTERN +int +rbt_search( +/*=======*/ + const ib_rbt_t* tree, /*!< in: rb tree */ + ib_rbt_bound_t* parent, /*!< in: search bounds */ + const void* key); /*!< in: key to search */ +/************************************************************************ +Search for the key, a node will be retuned in parent.last, whether it +was found or not. If not found then parent.last will contain the +parent node for the possibly new key otherwise the matching node. +@return result of last comparison */ +UNIV_INTERN +int +rbt_search_cmp( +/*===========*/ + const ib_rbt_t* tree, /*!< in: rb tree */ + ib_rbt_bound_t* parent, /*!< in: search bounds */ + const void* key, /*!< in: key to search */ + ib_rbt_compare compare); /*!< in: comparator */ +/************************************************************************ +Clear the tree, deletes (and free's) all the nodes. */ +UNIV_INTERN +void +rbt_clear( +/*======*/ + ib_rbt_t* tree); /*!< in: rb tree */ +/************************************************************************ +Merge the node from dst into src. Return the number of nodes merged. +@return no. of recs merged */ +UNIV_INTERN +ulint +rbt_merge_uniq( +/*===========*/ + ib_rbt_t* dst, /*!< in: dst rb tree */ + const ib_rbt_t* src); /*!< in: src rb tree */ +/************************************************************************ +Merge the node from dst into src. Return the number of nodes merged. +Delete the nodes from src after copying node to dst. As a side effect +the duplicates will be left untouched in the src, since we don't support +duplicates (yet). NOTE: src and dst must be similar, the function doesn't +check for this condition (yet). +@return no. of recs merged */ +UNIV_INTERN +ulint +rbt_merge_uniq_destructive( +/*=======================*/ + ib_rbt_t* dst, /*!< in: dst rb tree */ + ib_rbt_t* src); /*!< in: src rb tree */ +/************************************************************************ +Verify the integrity of the RB tree. For debugging. 0 failure else height +of tree (in count of black nodes). +@return TRUE if OK FALSE if tree invalid. */ +UNIV_INTERN +ibool +rbt_validate( +/*=========*/ + const ib_rbt_t* tree); /*!< in: tree to validate */ +/************************************************************************ +Iterate over the tree in depth first order. */ +UNIV_INTERN +void +rbt_print( +/*======*/ + const ib_rbt_t* tree, /*!< in: tree to traverse */ + ib_rbt_print_node print); /*!< in: print function */ + +#endif /* INNOBASE_UT0RBT_H */ diff --git a/storage/innobase/include/ut0rnd.ic b/storage/innobase/include/ut0rnd.ic index 763469142ec..c3dbd86923c 100644 --- a/storage/innobase/include/ut0rnd.ic +++ b/storage/innobase/include/ut0rnd.ic @@ -152,6 +152,7 @@ ut_hash_ulint( ulint key, /*!< in: value to be hashed */ ulint table_size) /*!< in: hash table size */ { + ut_ad(table_size); key = key ^ UT_HASH_RANDOM_MASK2; return(key % table_size); diff --git a/storage/innobase/include/ut0ut.h b/storage/innobase/include/ut0ut.h index 197b8401428..dd59b3eba46 100644 --- a/storage/innobase/include/ut0ut.h +++ b/storage/innobase/include/ut0ut.h @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved. +Copyright (c) 1994, 2010, Innobase Oy. All Rights Reserved. Copyright (c) 2009, Sun Microsystems, Inc. Portions of this file contain modifications contributed and copyrighted by @@ -35,6 +35,8 @@ Created 1/20/1994 Heikki Tuuri #include "univ.i" +#include "db0err.h" + #ifndef UNIV_HOTBACKUP # include "os0sync.h" /* for HAVE_ATOMIC_BUILTINS */ #endif /* UNIV_HOTBACKUP */ @@ -395,6 +397,16 @@ a limited buffer. */ # define ut_snprintf snprintf #endif /* __WIN__ */ +/*************************************************************//** +Convert an error number to a human readable text message. The +returned string is static and should not be freed or modified. +@return string, describing the error */ +UNIV_INTERN +const char* +ut_strerr( +/*======*/ + enum db_err num); /*!< in: error number */ + #ifndef UNIV_NONINL #include "ut0ut.ic" #endif |