diff options
Diffstat (limited to 'storage/innobase/ibuf/ibuf0ibuf.c')
-rw-r--r-- | storage/innobase/ibuf/ibuf0ibuf.c | 3543 |
1 files changed, 3543 insertions, 0 deletions
diff --git a/storage/innobase/ibuf/ibuf0ibuf.c b/storage/innobase/ibuf/ibuf0ibuf.c new file mode 100644 index 00000000000..5ad61e2590f --- /dev/null +++ b/storage/innobase/ibuf/ibuf0ibuf.c @@ -0,0 +1,3543 @@ +/****************************************************** +Insert buffer + +(c) 1997 Innobase Oy + +Created 7/19/1997 Heikki Tuuri +*******************************************************/ + +#include "ibuf0ibuf.h" + +#ifdef UNIV_NONINL +#include "ibuf0ibuf.ic" +#endif + +#include "buf0buf.h" +#include "buf0rea.h" +#include "fsp0fsp.h" +#include "trx0sys.h" +#include "fil0fil.h" +#include "thr0loc.h" +#include "rem0rec.h" +#include "btr0cur.h" +#include "btr0pcur.h" +#include "btr0btr.h" +#include "sync0sync.h" +#include "dict0boot.h" +#include "fut0lst.h" +#include "lock0lock.h" +#include "log0recv.h" +#include "que0que.h" + +/* STRUCTURE OF AN INSERT BUFFER RECORD + +In versions < 4.1.x: + +1. The first field is the page number. +2. The second field is an array which stores type info for each subsequent + field. We store the information which affects the ordering of records, and + also the physical storage size of an SQL NULL value. E.g., for CHAR(10) it + is 10 bytes. +3. Next we have the fields of the actual index record. + +In versions >= 4.1.x: + +Note that contary to what we planned in the 1990's, there will only be one +insert buffer tree, and that is in the system tablespace of InnoDB. + +1. The first field is the space id. +2. The second field is a one-byte marker (0) which differentiates records from + the < 4.1.x storage format. +3. The third field is the page number. +4. The fourth field contains the type info, where we have also added 2 bytes to + store the charset. In the compressed table format of 5.0.x we must add more + information here so that we can build a dummy 'index' struct which 5.0.x + can use in the binary search on the index page in the ibuf merge phase. +5. The rest of the fields contain the fields of the actual index record. + +In versions >= 5.0.3: + +The first byte of the fourth field is an additional marker (0) if the record +is in the compact format. The presence of this marker can be detected by +looking at the length of the field modulo DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE. + +The high-order bit of the character set field in the type info is the +"nullable" flag for the field. */ + + +/* PREVENTING DEADLOCKS IN THE INSERT BUFFER SYSTEM + +If an OS thread performs any operation that brings in disk pages from +non-system tablespaces into the buffer pool, or creates such a page there, +then the operation may have as a side effect an insert buffer index tree +compression. Thus, the tree latch of the insert buffer tree may be acquired +in the x-mode, and also the file space latch of the system tablespace may +be acquired in the x-mode. + +Also, an insert to an index in a non-system tablespace can have the same +effect. How do we know this cannot lead to a deadlock of OS threads? There +is a problem with the i\o-handler threads: they break the latching order +because they own x-latches to pages which are on a lower level than the +insert buffer tree latch, its page latches, and the tablespace latch an +insert buffer operation can reserve. + +The solution is the following: Let all the tree and page latches connected +with the insert buffer be later in the latching order than the fsp latch and +fsp page latches. + +Insert buffer pages must be such that the insert buffer is never invoked +when these pages are accessed as this would result in a recursion violating +the latching order. We let a special i/o-handler thread take care of i/o to +the insert buffer pages and the ibuf bitmap pages, as well as the fsp bitmap +pages and the first inode page, which contains the inode of the ibuf tree: let +us call all these ibuf pages. To prevent deadlocks, we do not let a read-ahead +access both non-ibuf and ibuf pages. + +Then an i/o-handler for the insert buffer never needs to access recursively the +insert buffer tree and thus obeys the latching order. On the other hand, other +i/o-handlers for other tablespaces may require access to the insert buffer, +but because all kinds of latches they need to access there are later in the +latching order, no violation of the latching order occurs in this case, +either. + +A problem is how to grow and contract an insert buffer tree. As it is later +in the latching order than the fsp management, we have to reserve the fsp +latch first, before adding or removing pages from the insert buffer tree. +We let the insert buffer tree have its own file space management: a free +list of pages linked to the tree root. To prevent recursive using of the +insert buffer when adding pages to the tree, we must first load these pages +to memory, obtaining a latch on them, and only after that add them to the +free list of the insert buffer tree. More difficult is removing of pages +from the free list. If there is an excess of pages in the free list of the +ibuf tree, they might be needed if some thread reserves the fsp latch, +intending to allocate more file space. So we do the following: if a thread +reserves the fsp latch, we check the writer count field of the latch. If +this field has value 1, it means that the thread did not own the latch +before entering the fsp system, and the mtr of the thread contains no +modifications to the fsp pages. Now we are free to reserve the ibuf latch, +and check if there is an excess of pages in the free list. We can then, in a +separate mini-transaction, take them out of the free list and free them to +the fsp system. + +To avoid deadlocks in the ibuf system, we divide file pages into three levels: + +(1) non-ibuf pages, +(2) ibuf tree pages and the pages in the ibuf tree free list, and +(3) ibuf bitmap pages. + +No OS thread is allowed to access higher level pages if it has latches to +lower level pages; even if the thread owns a B-tree latch it must not access +the B-tree non-leaf pages if it has latches on lower level pages. Read-ahead +is only allowed for level 1 and 2 pages. Dedicated i/o-handler threads handle +exclusively level 1 i/o. A dedicated i/o handler thread handles exclusively +level 2 i/o. However, if an OS thread does the i/o handling for itself, i.e., +it uses synchronous aio, it can access any pages, as long as it obeys the +access order rules. */ + +/* Buffer pool size per the maximum insert buffer size */ +#define IBUF_POOL_SIZE_PER_MAX_SIZE 2 + +/* The insert buffer control structure */ +ibuf_t* ibuf = NULL; + +static +ulint ibuf_rnd = 986058871; + +ulint ibuf_flush_count = 0; + +/* Dimensions for the ibuf_count array */ +#define IBUF_COUNT_N_SPACES 500 +#define IBUF_COUNT_N_PAGES 2000 + +/* Buffered entry counts for file pages, used in debugging */ +static ulint* ibuf_counts[IBUF_COUNT_N_SPACES]; + +static ibool ibuf_counts_inited = FALSE; + +/* The start address for an insert buffer bitmap page bitmap */ +#define IBUF_BITMAP PAGE_DATA + +/* Offsets in bits for the bits describing a single page in the bitmap */ +#define IBUF_BITMAP_FREE 0 +#define IBUF_BITMAP_BUFFERED 2 +#define IBUF_BITMAP_IBUF 3 /* TRUE if page is a part of the ibuf + tree, excluding the root page, or is + in the free list of the ibuf */ + +/* Number of bits describing a single page */ +#define IBUF_BITS_PER_PAGE 4 +#if IBUF_BITS_PER_PAGE % 2 +# error "IBUF_BITS_PER_PAGE must be an even number!" +#endif + +/* The mutex used to block pessimistic inserts to ibuf trees */ +static mutex_t ibuf_pessimistic_insert_mutex; + +/* The mutex protecting the insert buffer structs */ +static mutex_t ibuf_mutex; + +/* The mutex protecting the insert buffer bitmaps */ +static mutex_t ibuf_bitmap_mutex; + +/* The area in pages from which contract looks for page numbers for merge */ +#define IBUF_MERGE_AREA 8 + +/* Inside the merge area, pages which have at most 1 per this number less +buffered entries compared to maximum volume that can buffered for a single +page are merged along with the page whose buffer became full */ +#define IBUF_MERGE_THRESHOLD 4 + +/* In ibuf_contract at most this number of pages is read to memory in one +batch, in order to merge the entries for them in the insert buffer */ +#define IBUF_MAX_N_PAGES_MERGED IBUF_MERGE_AREA + +/* If the combined size of the ibuf trees exceeds ibuf->max_size by this +many pages, we start to contract it in connection to inserts there, using +non-synchronous contract */ +#define IBUF_CONTRACT_ON_INSERT_NON_SYNC 0 + +/* Same as above, but use synchronous contract */ +#define IBUF_CONTRACT_ON_INSERT_SYNC 5 + +/* Same as above, but no insert is done, only contract is called */ +#define IBUF_CONTRACT_DO_NOT_INSERT 10 + +/* TODO: how to cope with drop table if there are records in the insert +buffer for the indexes of the table? Is there actually any problem, +because ibuf merge is done to a page when it is read in, and it is +still physically like the index page even if the index would have been +dropped! So, there seems to be no problem. */ + +/********************************************************************** +Validates the ibuf data structures when the caller owns ibuf_mutex. */ + +ibool +ibuf_validate_low(void); +/*===================*/ + /* out: TRUE if ok */ + +/********************************************************************** +Sets the flag in the current OS thread local storage denoting that it is +inside an insert buffer routine. */ +UNIV_INLINE +void +ibuf_enter(void) +/*============*/ +{ + ibool* ptr; + + ptr = thr_local_get_in_ibuf_field(); + + ut_ad(*ptr == FALSE); + + *ptr = TRUE; +} + +/********************************************************************** +Sets the flag in the current OS thread local storage denoting that it is +exiting an insert buffer routine. */ +UNIV_INLINE +void +ibuf_exit(void) +/*===========*/ +{ + ibool* ptr; + + ptr = thr_local_get_in_ibuf_field(); + + ut_ad(*ptr == TRUE); + + *ptr = FALSE; +} + +/********************************************************************** +Returns TRUE if the current OS thread is performing an insert buffer +routine. */ + +ibool +ibuf_inside(void) +/*=============*/ + /* out: TRUE if inside an insert buffer routine: for instance, + a read-ahead of non-ibuf pages is then forbidden */ +{ + return(*thr_local_get_in_ibuf_field()); +} + +/********************************************************************** +Gets the ibuf header page and x-latches it. */ +static +page_t* +ibuf_header_page_get( +/*=================*/ + /* out: insert buffer header page */ + ulint space, /* in: space id */ + mtr_t* mtr) /* in: mtr */ +{ + page_t* page; + + ut_a(space == 0); + + ut_ad(!ibuf_inside()); + + page = buf_page_get(space, FSP_IBUF_HEADER_PAGE_NO, RW_X_LATCH, mtr); + +#ifdef UNIV_SYNC_DEBUG + buf_page_dbg_add_level(page, SYNC_IBUF_HEADER); +#endif /* UNIV_SYNC_DEBUG */ + + return(page); +} + +/********************************************************************** +Gets the root page and x-latches it. */ +static +page_t* +ibuf_tree_root_get( +/*===============*/ + /* out: insert buffer tree root page */ + ibuf_data_t* data, /* in: ibuf data */ + ulint space, /* in: space id */ + mtr_t* mtr) /* in: mtr */ +{ + page_t* page; + + ut_a(space == 0); + ut_ad(ibuf_inside()); + + mtr_x_lock(dict_tree_get_lock((data->index)->tree), mtr); + + page = buf_page_get(space, FSP_IBUF_TREE_ROOT_PAGE_NO, RW_X_LATCH, + mtr); +#ifdef UNIV_SYNC_DEBUG + buf_page_dbg_add_level(page, SYNC_TREE_NODE); +#endif /* UNIV_SYNC_DEBUG */ + + return(page); +} + +/********************************************************************** +Gets the ibuf count for a given page. */ + +ulint +ibuf_count_get( +/*===========*/ + /* out: number of entries in the insert buffer + currently buffered for this page */ + ulint space, /* in: space id */ + ulint page_no)/* in: page number */ +{ + ut_ad(space < IBUF_COUNT_N_SPACES); + ut_ad(page_no < IBUF_COUNT_N_PAGES); + + if (!ibuf_counts_inited) { + + return(0); + } + + return(*(ibuf_counts[space] + page_no)); +} + +/********************************************************************** +Sets the ibuf count for a given page. */ +#ifdef UNIV_IBUF_DEBUG +static +void +ibuf_count_set( +/*===========*/ + ulint space, /* in: space id */ + ulint page_no,/* in: page number */ + ulint val) /* in: value to set */ +{ + ut_a(space < IBUF_COUNT_N_SPACES); + ut_a(page_no < IBUF_COUNT_N_PAGES); + ut_a(val < UNIV_PAGE_SIZE); + + *(ibuf_counts[space] + page_no) = val; +} +#endif + +/********************************************************************** +Creates the insert buffer data structure at a database startup and initializes +the data structures for the insert buffer. */ + +void +ibuf_init_at_db_start(void) +/*=======================*/ +{ + ibuf = mem_alloc(sizeof(ibuf_t)); + + /* Note that also a pessimistic delete can sometimes make a B-tree + grow in size, as the references on the upper levels of the tree can + change */ + + ibuf->max_size = buf_pool_get_curr_size() / UNIV_PAGE_SIZE + / IBUF_POOL_SIZE_PER_MAX_SIZE; + ibuf->meter = IBUF_THRESHOLD + 1; + + UT_LIST_INIT(ibuf->data_list); + + ibuf->size = 0; + +#ifdef UNIV_IBUF_DEBUG + { + ulint i, j; + + for (i = 0; i < IBUF_COUNT_N_SPACES; i++) { + + ibuf_counts[i] = mem_alloc(sizeof(ulint) + * IBUF_COUNT_N_PAGES); + for (j = 0; j < IBUF_COUNT_N_PAGES; j++) { + ibuf_count_set(i, j, 0); + } + } + } +#endif + mutex_create(&ibuf_pessimistic_insert_mutex); + + mutex_set_level(&ibuf_pessimistic_insert_mutex, + SYNC_IBUF_PESS_INSERT_MUTEX); + mutex_create(&ibuf_mutex); + + mutex_set_level(&ibuf_mutex, SYNC_IBUF_MUTEX); + + mutex_create(&ibuf_bitmap_mutex); + + mutex_set_level(&ibuf_bitmap_mutex, SYNC_IBUF_BITMAP_MUTEX); + + fil_ibuf_init_at_db_start(); + + ibuf_counts_inited = TRUE; +} + +/********************************************************************** +Updates the size information in an ibuf data, assuming the segment size has +not changed. */ +static +void +ibuf_data_sizes_update( +/*===================*/ + ibuf_data_t* data, /* in: ibuf data struct */ + page_t* root, /* in: ibuf tree root */ + mtr_t* mtr) /* in: mtr */ +{ + ulint old_size; + +#ifdef UNIV_SYNC_DEBUG + ut_ad(mutex_own(&ibuf_mutex)); +#endif /* UNIV_SYNC_DEBUG */ + + old_size = data->size; + + data->free_list_len = flst_get_len(root + PAGE_HEADER + + PAGE_BTR_IBUF_FREE_LIST, mtr); + + data->height = 1 + btr_page_get_level(root, mtr); + + data->size = data->seg_size - (1 + data->free_list_len); + /* the '1 +' is the ibuf header page */ + ut_ad(data->size < data->seg_size); + + if (page_get_n_recs(root) == 0) { + + data->empty = TRUE; + } else { + data->empty = FALSE; + } + + ut_ad(ibuf->size + data->size >= old_size); + + ibuf->size = ibuf->size + data->size - old_size; + +/* fprintf(stderr, "ibuf size %lu, space ibuf size %lu\n", ibuf->size, + data->size); */ +} + +/********************************************************************** +Creates the insert buffer data struct for a single tablespace. Reads the +root page of the insert buffer tree in the tablespace. This function can +be called only after the dictionary system has been initialized, as this +creates also the insert buffer table and index into this tablespace. */ + +ibuf_data_t* +ibuf_data_init_for_space( +/*=====================*/ + /* out, own: ibuf data struct, linked to the list + in ibuf control structure */ + ulint space) /* in: space id */ +{ + ibuf_data_t* data; + page_t* root; + page_t* header_page; + mtr_t mtr; + char buf[50]; + dict_table_t* table; + dict_index_t* index; + ulint n_used; + + ut_a(space == 0); + +#ifdef UNIV_LOG_DEBUG + if (space % 2 == 1) { + + fputs("No ibuf op in replicate space\n", stderr); + + return(NULL); + } +#endif + data = mem_alloc(sizeof(ibuf_data_t)); + + data->space = space; + + mtr_start(&mtr); + + mutex_enter(&ibuf_mutex); + + mtr_x_lock(fil_space_get_latch(space), &mtr); + + header_page = ibuf_header_page_get(space, &mtr); + + fseg_n_reserved_pages(header_page + IBUF_HEADER + IBUF_TREE_SEG_HEADER, + &n_used, &mtr); + ibuf_enter(); + + ut_ad(n_used >= 2); + + data->seg_size = n_used; + + root = buf_page_get(space, FSP_IBUF_TREE_ROOT_PAGE_NO, RW_X_LATCH, + &mtr); +#ifdef UNIV_SYNC_DEBUG + buf_page_dbg_add_level(root, SYNC_TREE_NODE); +#endif /* UNIV_SYNC_DEBUG */ + + data->size = 0; + data->n_inserts = 0; + data->n_merges = 0; + data->n_merged_recs = 0; + + ibuf_data_sizes_update(data, root, &mtr); +/* + if (!data->empty) { + fprintf(stderr, +"InnoDB: index entries found in the insert buffer\n"); + } else { + fprintf(stderr, +"InnoDB: insert buffer empty\n"); + } +*/ + mutex_exit(&ibuf_mutex); + + mtr_commit(&mtr); + + ibuf_exit(); + + sprintf(buf, "SYS_IBUF_TABLE_%lu", (ulong) space); + /* use old-style record format for the insert buffer */ + table = dict_mem_table_create(buf, space, 2, FALSE); + + dict_mem_table_add_col(table, "PAGE_NO", DATA_BINARY, 0, 0, 0); + dict_mem_table_add_col(table, "TYPES", DATA_BINARY, 0, 0, 0); + + table->id = ut_dulint_add(DICT_IBUF_ID_MIN, space); + + dict_table_add_to_cache(table); + + index = dict_mem_index_create(buf, "CLUST_IND", space, + DICT_CLUSTERED | DICT_UNIVERSAL | DICT_IBUF,2); + + dict_mem_index_add_field(index, "PAGE_NO", 0, 0); + dict_mem_index_add_field(index, "TYPES", 0, 0); + + index->id = ut_dulint_add(DICT_IBUF_ID_MIN, space); + + dict_index_add_to_cache(table, index, FSP_IBUF_TREE_ROOT_PAGE_NO); + + data->index = dict_table_get_first_index(table); + + mutex_enter(&ibuf_mutex); + + UT_LIST_ADD_LAST(data_list, ibuf->data_list, data); + + mutex_exit(&ibuf_mutex); + + return(data); +} + +/************************************************************************* +Initializes an ibuf bitmap page. */ + +void +ibuf_bitmap_page_init( +/*==================*/ + page_t* page, /* in: bitmap page */ + mtr_t* mtr) /* in: mtr */ +{ + ulint bit_offset; + ulint byte_offset; + ulint i; + + /* Write all zeros to the bitmap */ + + bit_offset = XDES_DESCRIBED_PER_PAGE * IBUF_BITS_PER_PAGE; + + byte_offset = bit_offset / 8 + 1; + + for (i = IBUF_BITMAP; i < IBUF_BITMAP + byte_offset; i++) { + + *(page + i) = (byte)0; + } + + mlog_write_initial_log_record(page, MLOG_IBUF_BITMAP_INIT, mtr); +} + +/************************************************************************* +Parses a redo log record of an ibuf bitmap page init. */ + +byte* +ibuf_parse_bitmap_init( +/*===================*/ + /* out: end of log record or NULL */ + byte* ptr, /* in: buffer */ + byte* end_ptr __attribute__((unused)), /* in: buffer end */ + page_t* page, /* in: page or NULL */ + mtr_t* mtr) /* in: mtr or NULL */ +{ + ut_ad(ptr && end_ptr); + + if (page) { + ibuf_bitmap_page_init(page, mtr); + } + + return(ptr); +} + +/************************************************************************ +Gets the desired bits for a given page from a bitmap page. */ +UNIV_INLINE +ulint +ibuf_bitmap_page_get_bits( +/*======================*/ + /* out: value of bits */ + page_t* page, /* in: bitmap page */ + ulint page_no,/* in: page whose bits to get */ + ulint bit, /* in: IBUF_BITMAP_FREE, IBUF_BITMAP_BUFFERED, ... */ + mtr_t* mtr __attribute__((unused))) /* in: mtr containing an x-latch + to the bitmap page */ +{ + ulint byte_offset; + ulint bit_offset; + ulint map_byte; + ulint value; + + ut_ad(bit < IBUF_BITS_PER_PAGE); + ut_ad(IBUF_BITS_PER_PAGE % 2 == 0); + ut_ad(mtr_memo_contains(mtr, buf_block_align(page), + MTR_MEMO_PAGE_X_FIX)); + + bit_offset = (page_no % XDES_DESCRIBED_PER_PAGE) * IBUF_BITS_PER_PAGE + + bit; + + byte_offset = bit_offset / 8; + bit_offset = bit_offset % 8; + + ut_ad(byte_offset + IBUF_BITMAP < UNIV_PAGE_SIZE); + + map_byte = mach_read_from_1(page + IBUF_BITMAP + byte_offset); + + value = ut_bit_get_nth(map_byte, bit_offset); + + if (bit == IBUF_BITMAP_FREE) { + ut_ad(bit_offset + 1 < 8); + + value = value * 2 + ut_bit_get_nth(map_byte, bit_offset + 1); + } + + return(value); +} + +/************************************************************************ +Sets the desired bit for a given page in a bitmap page. */ +static +void +ibuf_bitmap_page_set_bits( +/*======================*/ + page_t* page, /* in: bitmap page */ + ulint page_no,/* in: page whose bits to set */ + ulint bit, /* in: IBUF_BITMAP_FREE, IBUF_BITMAP_BUFFERED, ... */ + ulint val, /* in: value to set */ + mtr_t* mtr) /* in: mtr containing an x-latch to the bitmap page */ +{ + ulint byte_offset; + ulint bit_offset; + ulint map_byte; + + ut_ad(bit < IBUF_BITS_PER_PAGE); + ut_ad(IBUF_BITS_PER_PAGE % 2 == 0); + ut_ad(mtr_memo_contains(mtr, buf_block_align(page), + MTR_MEMO_PAGE_X_FIX)); +#ifdef UNIV_IBUF_DEBUG + ut_a((bit != IBUF_BITMAP_BUFFERED) || (val != FALSE) + || (0 == ibuf_count_get(buf_frame_get_space_id(page), page_no))); +#endif + bit_offset = (page_no % XDES_DESCRIBED_PER_PAGE) * IBUF_BITS_PER_PAGE + + bit; + + byte_offset = bit_offset / 8; + bit_offset = bit_offset % 8; + + ut_ad(byte_offset + IBUF_BITMAP < UNIV_PAGE_SIZE); + + map_byte = mach_read_from_1(page + IBUF_BITMAP + byte_offset); + + if (bit == IBUF_BITMAP_FREE) { + ut_ad(bit_offset + 1 < 8); + ut_ad(val <= 3); + + map_byte = ut_bit_set_nth(map_byte, bit_offset, val / 2); + map_byte = ut_bit_set_nth(map_byte, bit_offset + 1, val % 2); + } else { + ut_ad(val <= 1); + map_byte = ut_bit_set_nth(map_byte, bit_offset, val); + } + + mlog_write_ulint(page + IBUF_BITMAP + byte_offset, map_byte, + MLOG_1BYTE, mtr); +} + +/************************************************************************ +Calculates the bitmap page number for a given page number. */ +UNIV_INLINE +ulint +ibuf_bitmap_page_no_calc( +/*=====================*/ + /* out: the bitmap page number where + the file page is mapped */ + ulint page_no) /* in: tablespace page number */ +{ + return(FSP_IBUF_BITMAP_OFFSET + + XDES_DESCRIBED_PER_PAGE + * (page_no / XDES_DESCRIBED_PER_PAGE)); +} + +/************************************************************************ +Gets the ibuf bitmap page where the bits describing a given file page are +stored. */ +static +page_t* +ibuf_bitmap_get_map_page( +/*=====================*/ + /* out: bitmap page where the file page is mapped, + that is, the bitmap page containing the descriptor + bits for the file page; the bitmap page is + x-latched */ + ulint space, /* in: space id of the file page */ + ulint page_no,/* in: page number of the file page */ + mtr_t* mtr) /* in: mtr */ +{ + page_t* page; + + page = buf_page_get(space, ibuf_bitmap_page_no_calc(page_no), + RW_X_LATCH, mtr); +#ifdef UNIV_SYNC_DEBUG + buf_page_dbg_add_level(page, SYNC_IBUF_BITMAP); +#endif /* UNIV_SYNC_DEBUG */ + + return(page); +} + +/**************************************************************************** +Sets the free bits of the page in the ibuf bitmap. This is done in a separate +mini-transaction, hence this operation does not restrict further work to only +ibuf bitmap operations, which would result if the latch to the bitmap page +were kept. */ +UNIV_INLINE +void +ibuf_set_free_bits_low( +/*===================*/ + ulint type, /* in: index type */ + page_t* page, /* in: index page; free bit is set if the index is + non-clustered and page level is 0 */ + ulint val, /* in: value to set: < 4 */ + mtr_t* mtr) /* in: mtr */ +{ + page_t* bitmap_page; + + if (type & DICT_CLUSTERED) { + + return; + } + + if (btr_page_get_level_low(page) != 0) { + + return; + } + + bitmap_page = ibuf_bitmap_get_map_page(buf_frame_get_space_id(page), + buf_frame_get_page_no(page), mtr); +#ifdef UNIV_IBUF_DEBUG + /* fprintf(stderr, + "Setting page no %lu free bits to %lu should be %lu\n", + buf_frame_get_page_no(page), val, + ibuf_index_page_calc_free(page)); */ + + ut_a(val <= ibuf_index_page_calc_free(page)); +#endif + ibuf_bitmap_page_set_bits(bitmap_page, buf_frame_get_page_no(page), + IBUF_BITMAP_FREE, val, mtr); + +} + +/**************************************************************************** +Sets the free bit of the page in the ibuf bitmap. This is done in a separate +mini-transaction, hence this operation does not restrict further work to only +ibuf bitmap operations, which would result if the latch to the bitmap page +were kept. */ + +void +ibuf_set_free_bits( +/*===============*/ + ulint type, /* in: index type */ + page_t* page, /* in: index page; free bit is set if the index is + non-clustered and page level is 0 */ + ulint val, /* in: value to set: < 4 */ + ulint max_val)/* in: ULINT_UNDEFINED or a maximum value which + the bits must have before setting; this is for + debugging */ +{ + mtr_t mtr; + page_t* bitmap_page; + + if (type & DICT_CLUSTERED) { + + return; + } + + if (btr_page_get_level_low(page) != 0) { + + return; + } + + mtr_start(&mtr); + + bitmap_page = ibuf_bitmap_get_map_page(buf_frame_get_space_id(page), + buf_frame_get_page_no(page), &mtr); + + if (max_val != ULINT_UNDEFINED) { +#ifdef UNIV_IBUF_DEBUG + ulint old_val; + + old_val = ibuf_bitmap_page_get_bits(bitmap_page, + buf_frame_get_page_no(page), + IBUF_BITMAP_FREE, &mtr); + if (old_val != max_val) { + /* fprintf(stderr, + "Ibuf: page %lu old val %lu max val %lu\n", + buf_frame_get_page_no(page), old_val, max_val); */ + } + + ut_a(old_val <= max_val); +#endif + } +#ifdef UNIV_IBUF_DEBUG +/* fprintf(stderr, "Setting page no %lu free bits to %lu should be %lu\n", + buf_frame_get_page_no(page), val, + ibuf_index_page_calc_free(page)); */ + + ut_a(val <= ibuf_index_page_calc_free(page)); +#endif + ibuf_bitmap_page_set_bits(bitmap_page, buf_frame_get_page_no(page), + IBUF_BITMAP_FREE, val, &mtr); + mtr_commit(&mtr); +} + +/**************************************************************************** +Resets the free bits of the page in the ibuf bitmap. This is done in a +separate mini-transaction, hence this operation does not restrict further +work to only ibuf bitmap operations, which would result if the latch to the +bitmap page were kept. */ + +void +ibuf_reset_free_bits_with_type( +/*===========================*/ + ulint type, /* in: index type */ + page_t* page) /* in: index page; free bits are set to 0 if the index + is non-clustered and non-unique and the page level is + 0 */ +{ + ibuf_set_free_bits(type, page, 0, ULINT_UNDEFINED); +} + +/**************************************************************************** +Resets the free bits of the page in the ibuf bitmap. This is done in a +separate mini-transaction, hence this operation does not restrict further +work to solely ibuf bitmap operations, which would result if the latch to +the bitmap page were kept. */ + +void +ibuf_reset_free_bits( +/*=================*/ + dict_index_t* index, /* in: index */ + page_t* page) /* in: index page; free bits are set to 0 if + the index is non-clustered and non-unique and + the page level is 0 */ +{ + ibuf_set_free_bits(index->type, page, 0, ULINT_UNDEFINED); +} + +/************************************************************************** +Updates the free bits for a page to reflect the present state. Does this +in the mtr given, which means that the latching order rules virtually prevent +any further operations for this OS thread until mtr is committed. */ + +void +ibuf_update_free_bits_low( +/*======================*/ + dict_index_t* index, /* in: index */ + page_t* page, /* in: index page */ + ulint max_ins_size, /* in: value of maximum insert size + with reorganize before the latest + operation performed to the page */ + mtr_t* mtr) /* in: mtr */ +{ + ulint before; + ulint after; + + before = ibuf_index_page_calc_free_bits(max_ins_size); + + after = ibuf_index_page_calc_free(page); + + if (before != after) { + ibuf_set_free_bits_low(index->type, page, after, mtr); + } +} + +/************************************************************************** +Updates the free bits for the two pages to reflect the present state. Does +this in the mtr given, which means that the latching order rules virtually +prevent any further operations until mtr is committed. */ + +void +ibuf_update_free_bits_for_two_pages_low( +/*====================================*/ + dict_index_t* index, /* in: index */ + page_t* page1, /* in: index page */ + page_t* page2, /* in: index page */ + mtr_t* mtr) /* in: mtr */ +{ + ulint state; + + /* As we have to x-latch two random bitmap pages, we have to acquire + the bitmap mutex to prevent a deadlock with a similar operation + performed by another OS thread. */ + + mutex_enter(&ibuf_bitmap_mutex); + + state = ibuf_index_page_calc_free(page1); + + ibuf_set_free_bits_low(index->type, page1, state, mtr); + + state = ibuf_index_page_calc_free(page2); + + ibuf_set_free_bits_low(index->type, page2, state, mtr); + + mutex_exit(&ibuf_bitmap_mutex); +} + +/************************************************************************** +Returns TRUE if the page is one of the fixed address ibuf pages. */ +UNIV_INLINE +ibool +ibuf_fixed_addr_page( +/*=================*/ + /* out: TRUE if a fixed address ibuf i/o page */ + ulint page_no)/* in: page number */ +{ + if ((ibuf_bitmap_page(page_no)) + || (page_no == IBUF_TREE_ROOT_PAGE_NO)) { + return(TRUE); + } + + return(FALSE); +} + +/*************************************************************************** +Checks if a page is a level 2 or 3 page in the ibuf hierarchy of pages. */ + +ibool +ibuf_page( +/*======*/ + /* out: TRUE if level 2 or level 3 page */ + ulint space, /* in: space id */ + ulint page_no)/* in: page number */ +{ + page_t* bitmap_page; + mtr_t mtr; + ibool ret; + + if (recv_no_ibuf_operations) { + /* Recovery is running: no ibuf operations should be + performed */ + + return(FALSE); + } + + if (ibuf_fixed_addr_page(page_no)) { + + return(TRUE); + } + + if (space != 0) { + /* Currently we only have an ibuf tree in space 0 */ + + return(FALSE); + } + + ut_ad(fil_space_get_type(space) == FIL_TABLESPACE); + + mtr_start(&mtr); + + bitmap_page = ibuf_bitmap_get_map_page(space, page_no, &mtr); + + ret = ibuf_bitmap_page_get_bits(bitmap_page, page_no, IBUF_BITMAP_IBUF, + &mtr); + mtr_commit(&mtr); + + return(ret); +} + +/*************************************************************************** +Checks if a page is a level 2 or 3 page in the ibuf hierarchy of pages. */ + +ibool +ibuf_page_low( +/*==========*/ + /* out: TRUE if level 2 or level 3 page */ + ulint space, /* in: space id */ + ulint page_no,/* in: page number */ + mtr_t* mtr) /* in: mtr which will contain an x-latch to the + bitmap page if the page is not one of the fixed + address ibuf pages */ +{ + page_t* bitmap_page; + ibool ret; + +#ifdef UNIV_LOG_DEBUG + if (space % 2 != 0) { + + fputs("No ibuf in a replicate space\n", stderr); + + return(FALSE); + } +#endif + if (ibuf_fixed_addr_page(page_no)) { + + return(TRUE); + } + + bitmap_page = ibuf_bitmap_get_map_page(space, page_no, mtr); + + ret = ibuf_bitmap_page_get_bits(bitmap_page, page_no, IBUF_BITMAP_IBUF, + mtr); + return(ret); +} + +/************************************************************************ +Returns the page number field of an ibuf record. */ +static +ulint +ibuf_rec_get_page_no( +/*=================*/ + /* out: page number */ + rec_t* rec) /* in: ibuf record */ +{ + byte* field; + ulint len; + + ut_ad(ibuf_inside()); + ut_ad(rec_get_n_fields_old(rec) > 2); + + field = rec_get_nth_field_old(rec, 1, &len); + + if (len == 1) { + /* This is of the >= 4.1.x record format */ + ut_a(trx_sys_multiple_tablespace_format); + + field = rec_get_nth_field_old(rec, 2, &len); + } else { + ut_a(trx_doublewrite_must_reset_space_ids); + ut_a(!trx_sys_multiple_tablespace_format); + + field = rec_get_nth_field_old(rec, 0, &len); + } + + ut_a(len == 4); + + return(mach_read_from_4(field)); +} + +/************************************************************************ +Returns the space id field of an ibuf record. For < 4.1.x format records +returns 0. */ +static +ulint +ibuf_rec_get_space( +/*===============*/ + /* out: space id */ + rec_t* rec) /* in: ibuf record */ +{ + byte* field; + ulint len; + + ut_ad(ibuf_inside()); + ut_ad(rec_get_n_fields_old(rec) > 2); + + field = rec_get_nth_field_old(rec, 1, &len); + + if (len == 1) { + /* This is of the >= 4.1.x record format */ + + ut_a(trx_sys_multiple_tablespace_format); + field = rec_get_nth_field_old(rec, 0, &len); + ut_a(len == 4); + + return(mach_read_from_4(field)); + } + + ut_a(trx_doublewrite_must_reset_space_ids); + ut_a(!trx_sys_multiple_tablespace_format); + + return(0); +} + +/************************************************************************ +Creates a dummy index for inserting a record to a non-clustered index. +*/ +static +dict_index_t* +ibuf_dummy_index_create( +/*====================*/ + /* out: dummy index */ + ulint n, /* in: number of fields */ + ibool comp) /* in: TRUE=use compact record format */ +{ + dict_table_t* table; + dict_index_t* index; + table = dict_mem_table_create("IBUF_DUMMY", + DICT_HDR_SPACE, n, comp); + index = dict_mem_index_create("IBUF_DUMMY", "IBUF_DUMMY", + DICT_HDR_SPACE, 0, n); + index->table = table; + /* avoid ut_ad(index->cached) in dict_index_get_n_unique_in_tree */ + index->cached = TRUE; + return(index); +} +/************************************************************************ +Add a column to the dummy index */ +static +void +ibuf_dummy_index_add_col( +/*====================*/ + dict_index_t* index, /* in: dummy index */ + dtype_t* type, /* in: the data type of the column */ + ulint len) /* in: length of the column */ +{ + ulint i = index->table->n_def; + dict_mem_table_add_col(index->table, "DUMMY", + dtype_get_mtype(type), + dtype_get_prtype(type), + dtype_get_len(type), + dtype_get_prec(type)); + dict_index_add_col(index, + dict_table_get_nth_col(index->table, i), 0, len); +} +/************************************************************************ +Deallocates a dummy index for inserting a record to a non-clustered index. +*/ +static +void +ibuf_dummy_index_free( +/*====================*/ + dict_index_t* index) /* in: dummy index */ +{ + dict_table_t* table = index->table; + mem_heap_free(index->heap); + mutex_free(&(table->autoinc_mutex)); + mem_heap_free(table->heap); +} + +/************************************************************************* +Builds the entry to insert into a non-clustered index when we have the +corresponding record in an ibuf index. */ +static +dtuple_t* +ibuf_build_entry_from_ibuf_rec( +/*===========================*/ + /* out, own: entry to insert to + a non-clustered index; NOTE that + as we copy pointers to fields in + ibuf_rec, the caller must hold a + latch to the ibuf_rec page as long + as the entry is used! */ + rec_t* ibuf_rec, /* in: record in an insert buffer */ + mem_heap_t* heap, /* in: heap where built */ + dict_index_t** pindex) /* out, own: dummy index that + describes the entry */ +{ + dtuple_t* tuple; + dfield_t* field; + ulint n_fields; + byte* types; + const byte* data; + ulint len; + ulint i; + dict_index_t* index; + + data = rec_get_nth_field_old(ibuf_rec, 1, &len); + + if (len > 1) { + /* This a < 4.1.x format record */ + + ut_a(trx_doublewrite_must_reset_space_ids); + ut_a(!trx_sys_multiple_tablespace_format); + + n_fields = rec_get_n_fields_old(ibuf_rec) - 2; + tuple = dtuple_create(heap, n_fields); + types = rec_get_nth_field_old(ibuf_rec, 1, &len); + + ut_a(len == n_fields * DATA_ORDER_NULL_TYPE_BUF_SIZE); + + for (i = 0; i < n_fields; i++) { + field = dtuple_get_nth_field(tuple, i); + + data = rec_get_nth_field_old(ibuf_rec, i + 2, &len); + + dfield_set_data(field, data, len); + + dtype_read_for_order_and_null_size( + dfield_get_type(field), + types + i * DATA_ORDER_NULL_TYPE_BUF_SIZE); + } + + *pindex = ibuf_dummy_index_create(n_fields, FALSE); + return(tuple); + } + + /* This a >= 4.1.x format record */ + + ut_a(trx_sys_multiple_tablespace_format); + ut_a(*data == 0); + ut_a(rec_get_n_fields_old(ibuf_rec) > 4); + + n_fields = rec_get_n_fields_old(ibuf_rec) - 4; + + tuple = dtuple_create(heap, n_fields); + + types = rec_get_nth_field_old(ibuf_rec, 3, &len); + + ut_a(len % DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE <= 1); + index = ibuf_dummy_index_create(n_fields, + len % DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE); + + if (len % DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE) { + /* compact record format */ + len--; + ut_a(*types == 0); + types++; + } + + ut_a(len == n_fields * DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE); + + for (i = 0; i < n_fields; i++) { + field = dtuple_get_nth_field(tuple, i); + + data = rec_get_nth_field_old(ibuf_rec, i + 4, &len); + + dfield_set_data(field, data, len); + + dtype_new_read_for_order_and_null_size( + dfield_get_type(field), + types + i * DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE); + + ibuf_dummy_index_add_col(index, dfield_get_type(field), len); + } + + *pindex = index; + return(tuple); +} + +/************************************************************************ +Returns the space taken by a stored non-clustered index entry if converted to +an index record. */ +static +ulint +ibuf_rec_get_volume( +/*================*/ + /* out: size of index record in bytes + an upper + limit of the space taken in the page directory */ + rec_t* ibuf_rec)/* in: ibuf record */ +{ + dtype_t dtype; + ibool new_format = FALSE; + ulint data_size = 0; + ulint n_fields; + byte* types; + byte* data; + ulint len; + ulint i; + + ut_ad(ibuf_inside()); + ut_ad(rec_get_n_fields_old(ibuf_rec) > 2); + + data = rec_get_nth_field_old(ibuf_rec, 1, &len); + + if (len > 1) { + /* < 4.1.x format record */ + + ut_a(trx_doublewrite_must_reset_space_ids); + ut_a(!trx_sys_multiple_tablespace_format); + + n_fields = rec_get_n_fields_old(ibuf_rec) - 2; + + types = rec_get_nth_field_old(ibuf_rec, 1, &len); + + ut_ad(len == n_fields * DATA_ORDER_NULL_TYPE_BUF_SIZE); + } else { + /* >= 4.1.x format record */ + + ut_a(trx_sys_multiple_tablespace_format); + ut_a(*data == 0); + + types = rec_get_nth_field_old(ibuf_rec, 3, &len); + + ut_a(len % DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE <= 1); + if (len % DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE) { + /* compact record format */ + ulint volume; + dict_index_t* dummy_index; + mem_heap_t* heap = mem_heap_create(500); + dtuple_t* entry = + ibuf_build_entry_from_ibuf_rec( + ibuf_rec, heap, &dummy_index); + volume = rec_get_converted_size(dummy_index, entry); + ibuf_dummy_index_free(dummy_index); + mem_heap_free(heap); + return(volume + page_dir_calc_reserved_space(1)); + } + + n_fields = rec_get_n_fields_old(ibuf_rec) - 4; + + new_format = TRUE; + } + + for (i = 0; i < n_fields; i++) { + if (new_format) { + data = rec_get_nth_field_old(ibuf_rec, i + 4, &len); + + dtype_new_read_for_order_and_null_size(&dtype, + types + i * DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE); + } else { + data = rec_get_nth_field_old(ibuf_rec, i + 2, &len); + + dtype_read_for_order_and_null_size(&dtype, + types + i * DATA_ORDER_NULL_TYPE_BUF_SIZE); + } + + if (len == UNIV_SQL_NULL) { + data_size += dtype_get_sql_null_size(&dtype); + } else { + data_size += len; + } + } + + return(data_size + rec_get_converted_extra_size(data_size, n_fields) + + page_dir_calc_reserved_space(1)); +} + +/************************************************************************* +Builds the tuple to insert to an ibuf tree when we have an entry for a +non-clustered index. */ +static +dtuple_t* +ibuf_entry_build( +/*=============*/ + /* out, own: entry to insert into an ibuf + index tree; NOTE that the original entry + must be kept because we copy pointers to its + fields */ + dtuple_t* entry, /* in: entry for a non-clustered index */ + ibool comp, /* in: flag: TRUE=compact record format */ + ulint space, /* in: space id */ + ulint page_no,/* in: index page number where entry should + be inserted */ + mem_heap_t* heap) /* in: heap into which to build */ +{ + dtuple_t* tuple; + dfield_t* field; + dfield_t* entry_field; + ulint n_fields; + byte* buf; + byte* buf2; + ulint i; + + /* Starting from 4.1.x, we have to build a tuple whose + (1) first field is the space id, + (2) the second field a single marker byte (0) to tell that this + is a new format record, + (3) the third contains the page number, and + (4) the fourth contains the relevent type information of each data + field; the length of this field % DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE is + (a) 0 for b-trees in the old format, and + (b) 1 for b-trees in the compact format, the first byte of the field + being the marker (0); + (5) and the rest of the fields are copied from entry. All fields + in the tuple are ordered like the type binary in our insert buffer + tree. */ + + n_fields = dtuple_get_n_fields(entry); + + tuple = dtuple_create(heap, n_fields + 4); + + /* Store the space id in tuple */ + + field = dtuple_get_nth_field(tuple, 0); + + buf = mem_heap_alloc(heap, 4); + + mach_write_to_4(buf, space); + + dfield_set_data(field, buf, 4); + + /* Store the marker byte field in tuple */ + + field = dtuple_get_nth_field(tuple, 1); + + buf = mem_heap_alloc(heap, 1); + + /* We set the marker byte zero */ + + mach_write_to_1(buf, 0); + + dfield_set_data(field, buf, 1); + + /* Store the page number in tuple */ + + field = dtuple_get_nth_field(tuple, 2); + + buf = mem_heap_alloc(heap, 4); + + mach_write_to_4(buf, page_no); + + dfield_set_data(field, buf, 4); + + ut_ad(comp == 0 || comp == 1); + /* Store the type info in buf2, and add the fields from entry to + tuple */ + buf2 = mem_heap_alloc(heap, n_fields + * DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE + + comp); + if (comp) { + *buf2++ = 0; /* write the compact format indicator */ + } + for (i = 0; i < n_fields; i++) { + /* We add 4 below because we have the 4 extra fields at the + start of an ibuf record */ + + field = dtuple_get_nth_field(tuple, i + 4); + entry_field = dtuple_get_nth_field(entry, i); + dfield_copy(field, entry_field); + + dtype_new_store_for_order_and_null_size( + buf2 + i * DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE, + dfield_get_type(entry_field)); + } + + /* Store the type info in buf2 to field 3 of tuple */ + + field = dtuple_get_nth_field(tuple, 3); + + if (comp) { + buf2--; + } + + dfield_set_data(field, buf2, n_fields + * DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE + + comp); + /* Set all the types in the new tuple binary */ + + dtuple_set_types_binary(tuple, n_fields + 4); + + return(tuple); +} + +/************************************************************************* +Builds a search tuple used to search buffered inserts for an index page. +This is for < 4.1.x format records */ +static +dtuple_t* +ibuf_search_tuple_build( +/*====================*/ + /* out, own: search tuple */ + ulint space, /* in: space id */ + ulint page_no,/* in: index page number */ + mem_heap_t* heap) /* in: heap into which to build */ +{ + dtuple_t* tuple; + dfield_t* field; + byte* buf; + + ut_a(space == 0); + ut_a(trx_doublewrite_must_reset_space_ids); + ut_a(!trx_sys_multiple_tablespace_format); + + tuple = dtuple_create(heap, 1); + + /* Store the page number in tuple */ + + field = dtuple_get_nth_field(tuple, 0); + + buf = mem_heap_alloc(heap, 4); + + mach_write_to_4(buf, page_no); + + dfield_set_data(field, buf, 4); + + dtuple_set_types_binary(tuple, 1); + + return(tuple); +} + +/************************************************************************* +Builds a search tuple used to search buffered inserts for an index page. +This is for >= 4.1.x format records. */ +static +dtuple_t* +ibuf_new_search_tuple_build( +/*========================*/ + /* out, own: search tuple */ + ulint space, /* in: space id */ + ulint page_no,/* in: index page number */ + mem_heap_t* heap) /* in: heap into which to build */ +{ + dtuple_t* tuple; + dfield_t* field; + byte* buf; + + ut_a(trx_sys_multiple_tablespace_format); + + tuple = dtuple_create(heap, 3); + + /* Store the space id in tuple */ + + field = dtuple_get_nth_field(tuple, 0); + + buf = mem_heap_alloc(heap, 4); + + mach_write_to_4(buf, space); + + dfield_set_data(field, buf, 4); + + /* Store the new format record marker byte */ + + field = dtuple_get_nth_field(tuple, 1); + + buf = mem_heap_alloc(heap, 1); + + mach_write_to_1(buf, 0); + + dfield_set_data(field, buf, 1); + + /* Store the page number in tuple */ + + field = dtuple_get_nth_field(tuple, 2); + + buf = mem_heap_alloc(heap, 4); + + mach_write_to_4(buf, page_no); + + dfield_set_data(field, buf, 4); + + dtuple_set_types_binary(tuple, 3); + + return(tuple); +} + +/************************************************************************* +Checks if there are enough pages in the free list of the ibuf tree that we +dare to start a pessimistic insert to the insert buffer. */ +UNIV_INLINE +ibool +ibuf_data_enough_free_for_insert( +/*=============================*/ + /* out: TRUE if enough free pages in list */ + ibuf_data_t* data) /* in: ibuf data for the space */ +{ +#ifdef UNIV_SYNC_DEBUG + ut_ad(mutex_own(&ibuf_mutex)); +#endif /* UNIV_SYNC_DEBUG */ + + /* We want a big margin of free pages, because a B-tree can sometimes + grow in size also if records are deleted from it, as the node pointers + can change, and we must make sure that we are able to delete the + inserts buffered for pages that we read to the buffer pool, without + any risk of running out of free space in the insert buffer. */ + + if (data->free_list_len >= data->size / 2 + 3 * data->height) { + + return(TRUE); + } + + return(FALSE); +} + +/************************************************************************* +Checks if there are enough pages in the free list of the ibuf tree that we +should remove them and free to the file space management. */ +UNIV_INLINE +ibool +ibuf_data_too_much_free( +/*====================*/ + /* out: TRUE if enough free pages in list */ + ibuf_data_t* data) /* in: ibuf data for the space */ +{ +#ifdef UNIV_SYNC_DEBUG + ut_ad(mutex_own(&ibuf_mutex)); +#endif /* UNIV_SYNC_DEBUG */ + + if (data->free_list_len >= 3 + data->size / 2 + 3 * data->height) { + + return(TRUE); + } + + return(FALSE); +} + +/************************************************************************* +Allocates a new page from the ibuf file segment and adds it to the free +list. */ +static +ulint +ibuf_add_free_page( +/*===============*/ + /* out: DB_SUCCESS, or DB_STRONG_FAIL + if no space left */ + ulint space, /* in: space id */ + ibuf_data_t* ibuf_data) /* in: ibuf data for the space */ +{ + mtr_t mtr; + page_t* header_page; + ulint page_no; + page_t* page; + page_t* root; + page_t* bitmap_page; + + ut_a(space == 0); + + mtr_start(&mtr); + + /* Acquire the fsp latch before the ibuf header, obeying the latching + order */ + mtr_x_lock(fil_space_get_latch(space), &mtr); + + header_page = ibuf_header_page_get(space, &mtr); + + /* Allocate a new page: NOTE that if the page has been a part of a + non-clustered index which has subsequently been dropped, then the + page may have buffered inserts in the insert buffer, and these + should be deleted from there. These get deleted when the page + allocation creates the page in buffer. Thus the call below may end + up calling the insert buffer routines and, as we yet have no latches + to insert buffer tree pages, these routines can run without a risk + of a deadlock. This is the reason why we created a special ibuf + header page apart from the ibuf tree. */ + + page_no = fseg_alloc_free_page(header_page + IBUF_HEADER + + IBUF_TREE_SEG_HEADER, 0, FSP_UP, + &mtr); + if (page_no == FIL_NULL) { + mtr_commit(&mtr); + + return(DB_STRONG_FAIL); + } + + page = buf_page_get(space, page_no, RW_X_LATCH, &mtr); + +#ifdef UNIV_SYNC_DEBUG + buf_page_dbg_add_level(page, SYNC_TREE_NODE_NEW); +#endif /* UNIV_SYNC_DEBUG */ + + ibuf_enter(); + + mutex_enter(&ibuf_mutex); + + root = ibuf_tree_root_get(ibuf_data, space, &mtr); + + /* Add the page to the free list and update the ibuf size data */ + + flst_add_last(root + PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST, + page + PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST_NODE, &mtr); + + fil_page_set_type(page, FIL_PAGE_IBUF_FREE_LIST); + + ibuf_data->seg_size++; + ibuf_data->free_list_len++; + + /* Set the bit indicating that this page is now an ibuf tree page + (level 2 page) */ + + bitmap_page = ibuf_bitmap_get_map_page(space, page_no, &mtr); + + ibuf_bitmap_page_set_bits(bitmap_page, page_no, IBUF_BITMAP_IBUF, + TRUE, &mtr); + mtr_commit(&mtr); + + mutex_exit(&ibuf_mutex); + + ibuf_exit(); + + return(DB_SUCCESS); +} + +/************************************************************************* +Removes a page from the free list and frees it to the fsp system. */ +static +void +ibuf_remove_free_page( +/*==================*/ + ulint space, /* in: space id */ + ibuf_data_t* ibuf_data) /* in: ibuf data for the space */ +{ + mtr_t mtr; + mtr_t mtr2; + page_t* header_page; + ulint page_no; + page_t* page; + page_t* root; + page_t* bitmap_page; + + ut_a(space == 0); + + mtr_start(&mtr); + + /* Acquire the fsp latch before the ibuf header, obeying the latching + order */ + mtr_x_lock(fil_space_get_latch(space), &mtr); + + header_page = ibuf_header_page_get(space, &mtr); + + /* Prevent pessimistic inserts to insert buffer trees for a while */ + mutex_enter(&ibuf_pessimistic_insert_mutex); + + ibuf_enter(); + + mutex_enter(&ibuf_mutex); + + if (!ibuf_data_too_much_free(ibuf_data)) { + + mutex_exit(&ibuf_mutex); + + ibuf_exit(); + + mutex_exit(&ibuf_pessimistic_insert_mutex); + + mtr_commit(&mtr); + + return; + } + + mtr_start(&mtr2); + + root = ibuf_tree_root_get(ibuf_data, space, &mtr2); + + page_no = flst_get_last(root + PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST, + &mtr2) + .page; + + /* NOTE that we must release the latch on the ibuf tree root + because in fseg_free_page we access level 1 pages, and the root + is a level 2 page. */ + + mtr_commit(&mtr2); + mutex_exit(&ibuf_mutex); + + ibuf_exit(); + + /* Since pessimistic inserts were prevented, we know that the + page is still in the free list. NOTE that also deletes may take + pages from the free list, but they take them from the start, and + the free list was so long that they cannot have taken the last + page from it. */ + + fseg_free_page(header_page + IBUF_HEADER + IBUF_TREE_SEG_HEADER, + space, page_no, &mtr); +#ifdef UNIV_DEBUG_FILE_ACCESSES + buf_page_reset_file_page_was_freed(space, page_no); +#endif + ibuf_enter(); + + mutex_enter(&ibuf_mutex); + + root = ibuf_tree_root_get(ibuf_data, space, &mtr); + + ut_ad(page_no == flst_get_last(root + PAGE_HEADER + + PAGE_BTR_IBUF_FREE_LIST, &mtr) + .page); + + page = buf_page_get(space, page_no, RW_X_LATCH, &mtr); + +#ifdef UNIV_SYNC_DEBUG + buf_page_dbg_add_level(page, SYNC_TREE_NODE); +#endif /* UNIV_SYNC_DEBUG */ + + /* Remove the page from the free list and update the ibuf size data */ + + flst_remove(root + PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST, + page + PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST_NODE, &mtr); + + ibuf_data->seg_size--; + ibuf_data->free_list_len--; + + mutex_exit(&ibuf_pessimistic_insert_mutex); + + /* Set the bit indicating that this page is no more an ibuf tree page + (level 2 page) */ + + bitmap_page = ibuf_bitmap_get_map_page(space, page_no, &mtr); + + ibuf_bitmap_page_set_bits(bitmap_page, page_no, IBUF_BITMAP_IBUF, + FALSE, &mtr); +#ifdef UNIV_DEBUG_FILE_ACCESSES + buf_page_set_file_page_was_freed(space, page_no); +#endif + mtr_commit(&mtr); + + mutex_exit(&ibuf_mutex); + + ibuf_exit(); +} + +/*************************************************************************** +Frees excess pages from the ibuf free list. This function is called when an OS +thread calls fsp services to allocate a new file segment, or a new page to a +file segment, and the thread did not own the fsp latch before this call. */ + +void +ibuf_free_excess_pages( +/*===================*/ + ulint space) /* in: space id */ +{ + ibuf_data_t* ibuf_data; + ulint i; + + if (space != 0) { + fprintf(stderr, +"InnoDB: Error: calling ibuf_free_excess_pages for space %lu\n", (ulong) space); + return; + } + +#ifdef UNIV_SYNC_DEBUG + ut_ad(rw_lock_own(fil_space_get_latch(space), RW_LOCK_EX)); +#endif /* UNIV_SYNC_DEBUG */ + ut_ad(rw_lock_get_x_lock_count(fil_space_get_latch(space)) == 1); + ut_ad(!ibuf_inside()); + + /* NOTE: We require that the thread did not own the latch before, + because then we know that we can obey the correct latching order + for ibuf latches */ + + ibuf_data = fil_space_get_ibuf_data(space); + + if (ibuf_data == NULL) { + /* Not yet initialized */ + +#ifdef UNIV_DEBUG + /*fprintf(stderr, + "Ibuf for space %lu not yet initialized\n", space); */ +#endif + + return; + } + + /* Free at most a few pages at a time, so that we do not delay the + requested service too much */ + + for (i = 0; i < 4; i++) { + + mutex_enter(&ibuf_mutex); + + if (!ibuf_data_too_much_free(ibuf_data)) { + + mutex_exit(&ibuf_mutex); + + return; + } + + mutex_exit(&ibuf_mutex); + + ibuf_remove_free_page(space, ibuf_data); + } +} + +/************************************************************************* +Reads page numbers from a leaf in an ibuf tree. */ +static +ulint +ibuf_get_merge_page_nos( +/*====================*/ + /* out: a lower limit for the combined volume + of records which will be merged */ + ibool contract,/* in: TRUE if this function is called to + contract the tree, FALSE if this is called + when a single page becomes full and we look + if it pays to read also nearby pages */ + rec_t* first_rec,/* in: record from which we read up and down + in the chain of records */ + ulint* space_ids,/* in/out: space id's of the pages */ + ib_longlong* space_versions,/* in/out: tablespace version + timestamps; used to prevent reading in old + pages after DISCARD + IMPORT tablespace */ + ulint* page_nos,/* in/out: buffer for at least + IBUF_MAX_N_PAGES_MERGED many page numbers; + the page numbers are in an ascending order */ + ulint* n_stored)/* out: number of page numbers stored to + page_nos in this function */ +{ + ulint prev_page_no; + ulint prev_space_id; + ulint first_page_no; + ulint first_space_id; + ulint rec_page_no; + ulint rec_space_id; + rec_t* rec; + ulint sum_volumes; + ulint volume_for_page; + ulint rec_volume; + ulint limit; + page_t* page; + ulint n_pages; + + *n_stored = 0; + + limit = ut_min(IBUF_MAX_N_PAGES_MERGED, buf_pool->curr_size / 4); + + page = buf_frame_align(first_rec); + + if (first_rec == page_get_supremum_rec(page)) { + + first_rec = page_rec_get_prev(first_rec); + } + + if (first_rec == page_get_infimum_rec(page)) { + + first_rec = page_rec_get_next(first_rec); + } + + if (first_rec == page_get_supremum_rec(page)) { + + return(0); + } + + rec = first_rec; + first_page_no = ibuf_rec_get_page_no(first_rec); + first_space_id = ibuf_rec_get_space(first_rec); + n_pages = 0; + prev_page_no = 0; + prev_space_id = 0; + + /* Go backwards from the first_rec until we reach the border of the + 'merge area', or the page start or the limit of storeable pages is + reached */ + + while ((rec != page_get_infimum_rec(page)) && (n_pages < limit)) { + + rec_page_no = ibuf_rec_get_page_no(rec); + rec_space_id = ibuf_rec_get_space(rec); + + if (rec_space_id != first_space_id + || rec_page_no / IBUF_MERGE_AREA + != first_page_no / IBUF_MERGE_AREA) { + + break; + } + + if (rec_page_no != prev_page_no + || rec_space_id != prev_space_id) { + n_pages++; + } + + prev_page_no = rec_page_no; + prev_space_id = rec_space_id; + + rec = page_rec_get_prev(rec); + } + + rec = page_rec_get_next(rec); + + /* At the loop start there is no prev page; we mark this with a pair + of space id, page no (0, 0) for which there can never be entries in + the insert buffer */ + + prev_page_no = 0; + prev_space_id = 0; + sum_volumes = 0; + volume_for_page = 0; + + while (*n_stored < limit) { + if (rec == page_get_supremum_rec(page)) { + /* When no more records available, mark this with + another 'impossible' pair of space id, page no */ + rec_page_no = 1; + rec_space_id = 0; + } else { + rec_page_no = ibuf_rec_get_page_no(rec); + rec_space_id = ibuf_rec_get_space(rec); + ut_ad(rec_page_no > IBUF_TREE_ROOT_PAGE_NO); + } + +#ifdef UNIV_IBUF_DEBUG + ut_a(*n_stored < IBUF_MAX_N_PAGES_MERGED); +#endif + if ((rec_space_id != prev_space_id + || rec_page_no != prev_page_no) + && (prev_space_id != 0 || prev_page_no != 0)) { + + if ((prev_page_no == first_page_no + && prev_space_id == first_space_id) + || contract + || (volume_for_page > + ((IBUF_MERGE_THRESHOLD - 1) + * 4 * UNIV_PAGE_SIZE + / IBUF_PAGE_SIZE_PER_FREE_SPACE) + / IBUF_MERGE_THRESHOLD)) { + + space_ids[*n_stored] = prev_space_id; + space_versions[*n_stored] + = fil_space_get_version( + prev_space_id); + page_nos[*n_stored] = prev_page_no; + + (*n_stored)++; + + sum_volumes += volume_for_page; + } + + if (rec_space_id != first_space_id + || rec_page_no / IBUF_MERGE_AREA + != first_page_no / IBUF_MERGE_AREA) { + + break; + } + + volume_for_page = 0; + } + + if (rec_page_no == 1 && rec_space_id == 0) { + /* Supremum record */ + + break; + } + + rec_volume = ibuf_rec_get_volume(rec); + + volume_for_page += rec_volume; + + prev_page_no = rec_page_no; + prev_space_id = rec_space_id; + + rec = page_rec_get_next(rec); + } + +#ifdef UNIV_IBUF_DEBUG + ut_a(*n_stored <= IBUF_MAX_N_PAGES_MERGED); +#endif +/* fprintf(stderr, "Ibuf merge batch %lu pages %lu volume\n", *n_stored, + sum_volumes); */ + return(sum_volumes); +} + +/************************************************************************* +Contracts insert buffer trees by reading pages to the buffer pool. */ +static +ulint +ibuf_contract_ext( +/*==============*/ + /* out: a lower limit for the combined size in bytes + of entries which will be merged from ibuf trees to the + pages read, 0 if ibuf is empty */ + ulint* n_pages,/* out: number of pages to which merged */ + ibool sync) /* in: TRUE if the caller wants to wait for the + issued read with the highest tablespace address + to complete */ +{ + ulint rnd_pos; + ibuf_data_t* data; + btr_pcur_t pcur; + ulint space; + ibool all_trees_empty; + ulint page_nos[IBUF_MAX_N_PAGES_MERGED]; + ulint space_ids[IBUF_MAX_N_PAGES_MERGED]; + ib_longlong space_versions[IBUF_MAX_N_PAGES_MERGED]; + ulint n_stored; + ulint sum_sizes; + mtr_t mtr; + + *n_pages = 0; +loop: + ut_ad(!ibuf_inside()); + + mutex_enter(&ibuf_mutex); + + ut_ad(ibuf_validate_low()); + + /* Choose an ibuf tree at random (though there really is only one tree + in the current implementation) */ + ibuf_rnd += 865558671; + + rnd_pos = ibuf_rnd % ibuf->size; + + all_trees_empty = TRUE; + + data = UT_LIST_GET_FIRST(ibuf->data_list); + + for (;;) { + if (!data->empty) { + all_trees_empty = FALSE; + + if (rnd_pos < data->size) { + + break; + } + + rnd_pos -= data->size; + } + + data = UT_LIST_GET_NEXT(data_list, data); + + if (data == NULL) { + if (all_trees_empty) { + mutex_exit(&ibuf_mutex); + + return(0); + } + + data = UT_LIST_GET_FIRST(ibuf->data_list); + } + } + + ut_ad(data); + + space = data->index->space; + + ut_a(space == 0); /* We currently only have an ibuf tree in + space 0 */ + mtr_start(&mtr); + + ibuf_enter(); + + /* Open a cursor to a randomly chosen leaf of the tree, at a random + position within the leaf */ + + btr_pcur_open_at_rnd_pos(data->index, BTR_SEARCH_LEAF, &pcur, &mtr); + + if (0 == page_get_n_recs(btr_pcur_get_page(&pcur))) { + + /* This tree is empty */ + + data->empty = TRUE; + + ibuf_exit(); + + mtr_commit(&mtr); + btr_pcur_close(&pcur); + + mutex_exit(&ibuf_mutex); + + goto loop; + } + + mutex_exit(&ibuf_mutex); + + sum_sizes = ibuf_get_merge_page_nos(TRUE, btr_pcur_get_rec(&pcur), + space_ids, space_versions, page_nos, &n_stored); +#ifdef UNIV_IBUF_DEBUG + /* fprintf(stderr, "Ibuf contract sync %lu pages %lu volume %lu\n", + sync, n_stored, sum_sizes); */ +#endif + ibuf_exit(); + + mtr_commit(&mtr); + btr_pcur_close(&pcur); + + buf_read_ibuf_merge_pages(sync, space_ids, space_versions, page_nos, + n_stored); + *n_pages = n_stored; + + return(sum_sizes + 1); +} + +/************************************************************************* +Contracts insert buffer trees by reading pages to the buffer pool. */ + +ulint +ibuf_contract( +/*==========*/ + /* out: a lower limit for the combined size in bytes + of entries which will be merged from ibuf trees to the + pages read, 0 if ibuf is empty */ + ibool sync) /* in: TRUE if the caller wants to wait for the + issued read with the highest tablespace address + to complete */ +{ + ulint n_pages; + + return(ibuf_contract_ext(&n_pages, sync)); +} + +/************************************************************************* +Contracts insert buffer trees by reading pages to the buffer pool. */ + +ulint +ibuf_contract_for_n_pages( +/*======================*/ + /* out: a lower limit for the combined size in bytes + of entries which will be merged from ibuf trees to the + pages read, 0 if ibuf is empty */ + ibool sync, /* in: TRUE if the caller wants to wait for the + issued read with the highest tablespace address + to complete */ + ulint n_pages)/* in: try to read at least this many pages to + the buffer pool and merge the ibuf contents to + them */ +{ + ulint sum_bytes = 0; + ulint sum_pages = 0; + ulint n_bytes; + ulint n_pag2; + + while (sum_pages < n_pages) { + n_bytes = ibuf_contract_ext(&n_pag2, sync); + + if (n_bytes == 0) { + return(sum_bytes); + } + + sum_bytes += n_bytes; + sum_pages += n_pag2; + } + + return(sum_bytes); +} + +/************************************************************************* +Contract insert buffer trees after insert if they are too big. */ +UNIV_INLINE +void +ibuf_contract_after_insert( +/*=======================*/ + ulint entry_size) /* in: size of a record which was inserted + into an ibuf tree */ +{ + ibool sync; + ulint sum_sizes; + ulint size; + + mutex_enter(&ibuf_mutex); + + if (ibuf->size < ibuf->max_size + IBUF_CONTRACT_ON_INSERT_NON_SYNC) { + mutex_exit(&ibuf_mutex); + + return; + } + + sync = FALSE; + + if (ibuf->size >= ibuf->max_size + IBUF_CONTRACT_ON_INSERT_SYNC) { + + sync = TRUE; + } + + mutex_exit(&ibuf_mutex); + + /* Contract at least entry_size many bytes */ + sum_sizes = 0; + size = 1; + + while ((size > 0) && (sum_sizes < entry_size)) { + + size = ibuf_contract(sync); + sum_sizes += size; + } +} + +/************************************************************************* +Gets an upper limit for the combined size of entries buffered in the insert +buffer for a given page. */ + +ulint +ibuf_get_volume_buffered( +/*=====================*/ + /* out: upper limit for the volume of + buffered inserts for the index page, in bytes; + we may also return UNIV_PAGE_SIZE, if the + entries for the index page span on several + pages in the insert buffer */ + btr_pcur_t* pcur, /* in: pcur positioned at a place in an + insert buffer tree where we would insert an + entry for the index page whose number is + page_no, latch mode has to be BTR_MODIFY_PREV + or BTR_MODIFY_TREE */ + ulint space, /* in: space id */ + ulint page_no,/* in: page number of an index page */ + mtr_t* mtr) /* in: mtr */ +{ + ulint volume; + rec_t* rec; + page_t* page; + ulint prev_page_no; + page_t* prev_page; + ulint next_page_no; + page_t* next_page; + + ut_a(trx_sys_multiple_tablespace_format); + + ut_ad((pcur->latch_mode == BTR_MODIFY_PREV) + || (pcur->latch_mode == BTR_MODIFY_TREE)); + + /* Count the volume of records earlier in the alphabetical order than + pcur */ + + volume = 0; + + rec = btr_pcur_get_rec(pcur); + + page = buf_frame_align(rec); + + if (rec == page_get_supremum_rec(page)) { + rec = page_rec_get_prev(rec); + } + + for (;;) { + if (rec == page_get_infimum_rec(page)) { + + break; + } + + if (page_no != ibuf_rec_get_page_no(rec) + || space != ibuf_rec_get_space(rec)) { + + goto count_later; + } + + volume += ibuf_rec_get_volume(rec); + + rec = page_rec_get_prev(rec); + } + + /* Look at the previous page */ + + prev_page_no = btr_page_get_prev(page, mtr); + + if (prev_page_no == FIL_NULL) { + + goto count_later; + } + + prev_page = buf_page_get(0, prev_page_no, RW_X_LATCH, mtr); + +#ifdef UNIV_SYNC_DEBUG + buf_page_dbg_add_level(prev_page, SYNC_TREE_NODE); +#endif /* UNIV_SYNC_DEBUG */ + + rec = page_get_supremum_rec(prev_page); + rec = page_rec_get_prev(rec); + + for (;;) { + if (rec == page_get_infimum_rec(prev_page)) { + + /* We cannot go to yet a previous page, because we + do not have the x-latch on it, and cannot acquire one + because of the latching order: we have to give up */ + + return(UNIV_PAGE_SIZE); + } + + if (page_no != ibuf_rec_get_page_no(rec) + || space != ibuf_rec_get_space(rec)) { + + goto count_later; + } + + volume += ibuf_rec_get_volume(rec); + + rec = page_rec_get_prev(rec); + } + +count_later: + rec = btr_pcur_get_rec(pcur); + + if (rec != page_get_supremum_rec(page)) { + rec = page_rec_get_next(rec); + } + + for (;;) { + if (rec == page_get_supremum_rec(page)) { + + break; + } + + if (page_no != ibuf_rec_get_page_no(rec) + || space != ibuf_rec_get_space(rec)) { + + return(volume); + } + + volume += ibuf_rec_get_volume(rec); + + rec = page_rec_get_next(rec); + } + + /* Look at the next page */ + + next_page_no = btr_page_get_next(page, mtr); + + if (next_page_no == FIL_NULL) { + + return(volume); + } + + next_page = buf_page_get(0, next_page_no, RW_X_LATCH, mtr); + +#ifdef UNIV_SYNC_DEBUG + buf_page_dbg_add_level(next_page, SYNC_TREE_NODE); +#endif /* UNIV_SYNC_DEBUG */ + + rec = page_get_infimum_rec(next_page); + rec = page_rec_get_next(rec); + + for (;;) { + if (rec == page_get_supremum_rec(next_page)) { + + /* We give up */ + + return(UNIV_PAGE_SIZE); + } + + if (page_no != ibuf_rec_get_page_no(rec) + || space != ibuf_rec_get_space(rec)) { + + return(volume); + } + + volume += ibuf_rec_get_volume(rec); + + rec = page_rec_get_next(rec); + } +} + +/************************************************************************* +Reads the biggest tablespace id from the high end of the insert buffer +tree and updates the counter in fil_system. */ + +void +ibuf_update_max_tablespace_id(void) +/*===============================*/ +{ + ulint max_space_id; + rec_t* rec; + byte* field; + ulint len; + ibuf_data_t* ibuf_data; + dict_index_t* ibuf_index; + btr_pcur_t pcur; + mtr_t mtr; + + ibuf_data = fil_space_get_ibuf_data(0); + + ibuf_index = ibuf_data->index; + ut_a(!ibuf_index->table->comp); + + ibuf_enter(); + + mtr_start(&mtr); + + btr_pcur_open_at_index_side(FALSE, ibuf_index, BTR_SEARCH_LEAF, + &pcur, TRUE, &mtr); + btr_pcur_move_to_prev(&pcur, &mtr); + + if (btr_pcur_is_before_first_on_page(&pcur, &mtr)) { + /* The tree is empty */ + + max_space_id = 0; + } else { + rec = btr_pcur_get_rec(&pcur); + + field = rec_get_nth_field_old(rec, 0, &len); + + ut_a(len == 4); + + max_space_id = mach_read_from_4(field); + } + + mtr_commit(&mtr); + ibuf_exit(); + + /* printf("Maximum space id in insert buffer %lu\n", max_space_id); */ + + fil_set_max_space_id_if_bigger(max_space_id); +} + +/************************************************************************* +Makes an index insert to the insert buffer, instead of directly to the disk +page, if this is possible. */ +static +ulint +ibuf_insert_low( +/*============*/ + /* out: DB_SUCCESS, DB_FAIL, DB_STRONG_FAIL */ + ulint mode, /* in: BTR_MODIFY_PREV or BTR_MODIFY_TREE */ + dtuple_t* entry, /* in: index entry to insert */ + dict_index_t* index, /* in: index where to insert; must not be + unique or clustered */ + ulint space, /* in: space id where to insert */ + ulint page_no,/* in: page number where to insert */ + que_thr_t* thr) /* in: query thread */ +{ + big_rec_t* dummy_big_rec; + ulint entry_size; + btr_pcur_t pcur; + btr_cur_t* cursor; + dtuple_t* ibuf_entry; + mem_heap_t* heap; + ulint buffered; + rec_t* ins_rec; + ibool old_bit_value; + page_t* bitmap_page; + ibuf_data_t* ibuf_data; + dict_index_t* ibuf_index; + page_t* root; + ulint err; + ibool do_merge; + ulint space_ids[IBUF_MAX_N_PAGES_MERGED]; + ib_longlong space_versions[IBUF_MAX_N_PAGES_MERGED]; + ulint page_nos[IBUF_MAX_N_PAGES_MERGED]; + ulint n_stored; + ulint bits; + mtr_t mtr; + mtr_t bitmap_mtr; + + ut_a(!(index->type & DICT_CLUSTERED)); + ut_ad(dtuple_check_typed(entry)); + + ut_a(trx_sys_multiple_tablespace_format); + + do_merge = FALSE; + + /* Currently the insert buffer of space 0 takes care of inserts to all + tablespaces */ + + ibuf_data = fil_space_get_ibuf_data(0); + + ibuf_index = ibuf_data->index; + + mutex_enter(&ibuf_mutex); + + if (ibuf->size >= ibuf->max_size + IBUF_CONTRACT_DO_NOT_INSERT) { + /* Insert buffer is now too big, contract it but do not try + to insert */ + + mutex_exit(&ibuf_mutex); + +#ifdef UNIV_IBUF_DEBUG + fputs("Ibuf too big\n", stderr); +#endif + /* Use synchronous contract (== TRUE) */ + ibuf_contract(TRUE); + + return(DB_STRONG_FAIL); + } + + mutex_exit(&ibuf_mutex); + + if (mode == BTR_MODIFY_TREE) { + mutex_enter(&ibuf_pessimistic_insert_mutex); + + ibuf_enter(); + + mutex_enter(&ibuf_mutex); + + while (!ibuf_data_enough_free_for_insert(ibuf_data)) { + + mutex_exit(&ibuf_mutex); + + ibuf_exit(); + + mutex_exit(&ibuf_pessimistic_insert_mutex); + + err = ibuf_add_free_page(0, ibuf_data); + + if (err == DB_STRONG_FAIL) { + + return(err); + } + + mutex_enter(&ibuf_pessimistic_insert_mutex); + + ibuf_enter(); + + mutex_enter(&ibuf_mutex); + } + } else { + ibuf_enter(); + } + + entry_size = rec_get_converted_size(index, entry); + + heap = mem_heap_create(512); + + /* Build the entry which contains the space id and the page number as + the first fields and the type information for other fields, and which + will be inserted to the insert buffer. */ + + ibuf_entry = ibuf_entry_build(entry, index->table->comp, + space, page_no, heap); + + /* Open a cursor to the insert buffer tree to calculate if we can add + the new entry to it without exceeding the free space limit for the + page. */ + + mtr_start(&mtr); + + btr_pcur_open(ibuf_index, ibuf_entry, PAGE_CUR_LE, mode, &pcur, &mtr); + + /* Find out the volume of already buffered inserts for the same index + page */ + buffered = ibuf_get_volume_buffered(&pcur, space, page_no, &mtr); + +#ifdef UNIV_IBUF_DEBUG + ut_a((buffered == 0) || ibuf_count_get(space, page_no)); +#endif + mtr_start(&bitmap_mtr); + + bitmap_page = ibuf_bitmap_get_map_page(space, page_no, &bitmap_mtr); + + /* We check if the index page is suitable for buffered entries */ + + if (buf_page_peek(space, page_no) + || lock_rec_expl_exist_on_page(space, page_no)) { + err = DB_STRONG_FAIL; + + mtr_commit(&bitmap_mtr); + + goto function_exit; + } + + bits = ibuf_bitmap_page_get_bits(bitmap_page, page_no, + IBUF_BITMAP_FREE, &bitmap_mtr); + + if (buffered + entry_size + page_dir_calc_reserved_space(1) + > ibuf_index_page_calc_free_from_bits(bits)) { + mtr_commit(&bitmap_mtr); + + /* It may not fit */ + err = DB_STRONG_FAIL; + + do_merge = TRUE; + + ibuf_get_merge_page_nos(FALSE, btr_pcur_get_rec(&pcur), + space_ids, space_versions, + page_nos, &n_stored); + goto function_exit; + } + + /* Set the bitmap bit denoting that the insert buffer contains + buffered entries for this index page, if the bit is not set yet */ + + old_bit_value = ibuf_bitmap_page_get_bits(bitmap_page, page_no, + IBUF_BITMAP_BUFFERED, &bitmap_mtr); + if (!old_bit_value) { + ibuf_bitmap_page_set_bits(bitmap_page, page_no, + IBUF_BITMAP_BUFFERED, TRUE, &bitmap_mtr); + } + + mtr_commit(&bitmap_mtr); + + cursor = btr_pcur_get_btr_cur(&pcur); + + if (mode == BTR_MODIFY_PREV) { + err = btr_cur_optimistic_insert(BTR_NO_LOCKING_FLAG, cursor, + ibuf_entry, &ins_rec, + &dummy_big_rec, thr, + &mtr); + if (err == DB_SUCCESS) { + /* Update the page max trx id field */ + page_update_max_trx_id(buf_frame_align(ins_rec), + thr_get_trx(thr)->id); + } + } else { + ut_ad(mode == BTR_MODIFY_TREE); + + /* We acquire an x-latch to the root page before the insert, + because a pessimistic insert releases the tree x-latch, + which would cause the x-latching of the root after that to + break the latching order. */ + + root = ibuf_tree_root_get(ibuf_data, 0, &mtr); + + err = btr_cur_pessimistic_insert(BTR_NO_LOCKING_FLAG + | BTR_NO_UNDO_LOG_FLAG, + cursor, + ibuf_entry, &ins_rec, + &dummy_big_rec, thr, + &mtr); + if (err == DB_SUCCESS) { + /* Update the page max trx id field */ + page_update_max_trx_id(buf_frame_align(ins_rec), + thr_get_trx(thr)->id); + } + + ibuf_data_sizes_update(ibuf_data, root, &mtr); + } + +function_exit: +#ifdef UNIV_IBUF_DEBUG + if (err == DB_SUCCESS) { + printf( +"Incrementing ibuf count of space %lu page %lu\n" +"from %lu by 1\n", space, page_no, ibuf_count_get(space, page_no)); + + ibuf_count_set(space, page_no, + ibuf_count_get(space, page_no) + 1); + } +#endif + if (mode == BTR_MODIFY_TREE) { + ut_ad(ibuf_validate_low()); + + mutex_exit(&ibuf_mutex); + mutex_exit(&ibuf_pessimistic_insert_mutex); + } + + mtr_commit(&mtr); + btr_pcur_close(&pcur); + ibuf_exit(); + + mem_heap_free(heap); + + mutex_enter(&ibuf_mutex); + + if (err == DB_SUCCESS) { + ibuf_data->empty = FALSE; + ibuf_data->n_inserts++; + } + + mutex_exit(&ibuf_mutex); + + if ((mode == BTR_MODIFY_TREE) && (err == DB_SUCCESS)) { + ibuf_contract_after_insert(entry_size); + } + + if (do_merge) { +#ifdef UNIV_IBUF_DEBUG + ut_a(n_stored <= IBUF_MAX_N_PAGES_MERGED); +#endif + buf_read_ibuf_merge_pages(FALSE, space_ids, space_versions, + page_nos, n_stored); + } + + return(err); +} + +/************************************************************************* +Makes an index insert to the insert buffer, instead of directly to the disk +page, if this is possible. Does not do insert if the index is clustered +or unique. */ + +ibool +ibuf_insert( +/*========*/ + /* out: TRUE if success */ + dtuple_t* entry, /* in: index entry to insert */ + dict_index_t* index, /* in: index where to insert */ + ulint space, /* in: space id where to insert */ + ulint page_no,/* in: page number where to insert */ + que_thr_t* thr) /* in: query thread */ +{ + ulint err; + + ut_a(trx_sys_multiple_tablespace_format); + ut_ad(dtuple_check_typed(entry)); + + ut_a(!(index->type & DICT_CLUSTERED)); + + if (rec_get_converted_size(index, entry) + >= page_get_free_space_of_empty(index->table->comp) / 2) { + return(FALSE); + } + + err = ibuf_insert_low(BTR_MODIFY_PREV, entry, index, space, page_no, + thr); + if (err == DB_FAIL) { + err = ibuf_insert_low(BTR_MODIFY_TREE, entry, index, space, + page_no, thr); + } + + if (err == DB_SUCCESS) { +#ifdef UNIV_IBUF_DEBUG + /* fprintf(stderr, "Ibuf insert for page no %lu of index %s\n", + page_no, index->name); */ +#endif + return(TRUE); + + } else { + ut_a(err == DB_STRONG_FAIL); + + return(FALSE); + } +} + +/************************************************************************ +During merge, inserts to an index page a secondary index entry extracted +from the insert buffer. */ +static +void +ibuf_insert_to_index_page( +/*======================*/ + dtuple_t* entry, /* in: buffered entry to insert */ + page_t* page, /* in: index page where the buffered entry + should be placed */ + dict_index_t* index, /* in: record descriptor */ + mtr_t* mtr) /* in: mtr */ +{ + page_cur_t page_cur; + ulint low_match; + rec_t* rec; + page_t* bitmap_page; + ulint old_bits; + + ut_ad(ibuf_inside()); + ut_ad(dtuple_check_typed(entry)); + + if (index->table->comp != page_is_comp(page)) { + fputs( +"InnoDB: Trying to insert a record from the insert buffer to an index page\n" +"InnoDB: but the 'compact' flag does not match!\n", stderr); + goto dump; + } + + rec = page_rec_get_next(page_get_infimum_rec(page)); + + if (rec_get_n_fields(rec, index) != dtuple_get_n_fields(entry)) { + fputs( +"InnoDB: Trying to insert a record from the insert buffer to an index page\n" +"InnoDB: but the number of fields does not match!\n", stderr); + dump: + buf_page_print(page); + + dtuple_print(stderr, entry); + + fputs( +"InnoDB: The table where where this index record belongs\n" +"InnoDB: is now probably corrupt. Please run CHECK TABLE on\n" +"InnoDB: your tables.\n" +"InnoDB: Send a detailed bug report to mysql@lists.mysql.com!\n", stderr); + + return; + } + + low_match = page_cur_search(page, index, entry, + PAGE_CUR_LE, &page_cur); + + if (low_match == dtuple_get_n_fields(entry)) { + rec = page_cur_get_rec(&page_cur); + + btr_cur_del_unmark_for_ibuf(rec, index, mtr); + } else { + rec = page_cur_tuple_insert(&page_cur, entry, index, mtr); + + if (rec == NULL) { + /* If the record did not fit, reorganize */ + + btr_page_reorganize(page, index, mtr); + + page_cur_search(page, index, entry, + PAGE_CUR_LE, &page_cur); + + /* This time the record must fit */ + if (!page_cur_tuple_insert(&page_cur, entry, + index, mtr)) { + + ut_print_timestamp(stderr); + + fprintf(stderr, +"InnoDB: Error: Insert buffer insert fails; page free %lu, dtuple size %lu\n", + (ulong) page_get_max_insert_size(page, 1), + (ulong) rec_get_converted_size(index, entry)); + fputs("InnoDB: Cannot insert index record ", + stderr); + dtuple_print(stderr, entry); + fputs( +"\nInnoDB: The table where where this index record belongs\n" +"InnoDB: is now probably corrupt. Please run CHECK TABLE on\n" +"InnoDB: that table.\n", stderr); + + bitmap_page = ibuf_bitmap_get_map_page( + buf_frame_get_space_id(page), + buf_frame_get_page_no(page), + mtr); + old_bits = ibuf_bitmap_page_get_bits( + bitmap_page, + buf_frame_get_page_no(page), + IBUF_BITMAP_FREE, mtr); + + fprintf(stderr, "Bitmap bits %lu\n", (ulong) old_bits); + + fputs( +"InnoDB: Submit a detailed bug report to http://bugs.mysql.com\n", stderr); + } + } + } +} + +/************************************************************************* +Deletes from ibuf the record on which pcur is positioned. If we have to +resort to a pessimistic delete, this function commits mtr and closes +the cursor. */ +static +ibool +ibuf_delete_rec( +/*============*/ + /* out: TRUE if mtr was committed and pcur + closed in this operation */ + ulint space, /* in: space id */ + ulint page_no,/* in: index page number where the record + should belong */ + btr_pcur_t* pcur, /* in: pcur positioned on the record to + delete, having latch mode BTR_MODIFY_LEAF */ + dtuple_t* search_tuple, + /* in: search tuple for entries of page_no */ + mtr_t* mtr) /* in: mtr */ +{ + ibool success; + ibuf_data_t* ibuf_data; + page_t* root; + ulint err; + + ut_ad(ibuf_inside()); + + success = btr_cur_optimistic_delete(btr_pcur_get_btr_cur(pcur), mtr); + + if (success) { +#ifdef UNIV_IBUF_DEBUG + printf( +"Decrementing ibuf count of space %lu page %lu\n" +"from %lu by 1\n", space, page_no, ibuf_count_get(space, page_no)); + ibuf_count_set(space, page_no, + ibuf_count_get(space, page_no) - 1); +#endif + return(FALSE); + } + + /* We have to resort to a pessimistic delete from ibuf */ + btr_pcur_store_position(pcur, mtr); + + btr_pcur_commit_specify_mtr(pcur, mtr); + + /* Currently the insert buffer of space 0 takes care of inserts to all + tablespaces */ + + ibuf_data = fil_space_get_ibuf_data(0); + + mutex_enter(&ibuf_mutex); + + mtr_start(mtr); + + success = btr_pcur_restore_position(BTR_MODIFY_TREE, pcur, mtr); + + if (!success) { + fprintf(stderr, + "InnoDB: ERROR: Submit the output to http://bugs.mysql.com\n" + "InnoDB: ibuf cursor restoration fails!\n" + "InnoDB: ibuf record inserted to page %lu\n", (ulong) page_no); + fflush(stderr); + + rec_print_old(stderr, btr_pcur_get_rec(pcur)); + rec_print_old(stderr, pcur->old_rec); + dtuple_print(stderr, search_tuple); + + rec_print_old(stderr, + page_rec_get_next(btr_pcur_get_rec(pcur))); + fflush(stderr); + + btr_pcur_commit_specify_mtr(pcur, mtr); + + fputs("InnoDB: Validating insert buffer tree:\n", stderr); + ut_a(btr_validate_tree(ibuf_data->index->tree)); + + fprintf(stderr, "InnoDB: ibuf tree ok\n"); + fflush(stderr); + + btr_pcur_close(pcur); + + mutex_exit(&ibuf_mutex); + + return(TRUE); + } + + root = ibuf_tree_root_get(ibuf_data, 0, mtr); + + btr_cur_pessimistic_delete(&err, TRUE, btr_pcur_get_btr_cur(pcur), + FALSE, mtr); + ut_a(err == DB_SUCCESS); + +#ifdef UNIV_IBUF_DEBUG + ibuf_count_set(space, page_no, ibuf_count_get(space, page_no) - 1); +#else + UT_NOT_USED(space); +#endif + ibuf_data_sizes_update(ibuf_data, root, mtr); + + ut_ad(ibuf_validate_low()); + + btr_pcur_commit_specify_mtr(pcur, mtr); + + btr_pcur_close(pcur); + + mutex_exit(&ibuf_mutex); + + return(TRUE); +} + +/************************************************************************* +When an index page is read from a disk to the buffer pool, this function +inserts to the page the possible index entries buffered in the insert buffer. +The entries are deleted from the insert buffer. If the page is not read, but +created in the buffer pool, this function deletes its buffered entries from +the insert buffer; there can exist entries for such a page if the page +belonged to an index which subsequently was dropped. */ + +void +ibuf_merge_or_delete_for_page( +/*==========================*/ + page_t* page, /* in: if page has been read from disk, pointer to + the page x-latched, else NULL */ + ulint space, /* in: space id of the index page */ + ulint page_no,/* in: page number of the index page */ + ibool update_ibuf_bitmap)/* in: normally this is set to TRUE, but if + we have deleted or are deleting the tablespace, then we + naturally do not want to update a non-existent bitmap + page */ +{ + mem_heap_t* heap; + btr_pcur_t pcur; + dtuple_t* entry; + dtuple_t* search_tuple; + rec_t* ibuf_rec; + buf_block_t* block; + page_t* bitmap_page; + ibuf_data_t* ibuf_data; + ulint n_inserts; +#ifdef UNIV_IBUF_DEBUG + ulint volume; +#endif + ibool tablespace_being_deleted = FALSE; + ibool corruption_noticed = FALSE; + mtr_t mtr; + + if (srv_force_recovery >= SRV_FORCE_NO_IBUF_MERGE) { + + return; + } + +#ifdef UNIV_LOG_DEBUG + if (space % 2 != 0) { + + fputs("No ibuf operation in a replicate space\n", stderr); + + return; + } +#endif + if (ibuf_fixed_addr_page(page_no) || fsp_descr_page(page_no) + || trx_sys_hdr_page(space, page_no)) { + return; + } + + if (update_ibuf_bitmap) { + /* If the following returns FALSE, we get the counter + incremented, and must decrement it when we leave this + function. When the counter is > 0, that prevents tablespace + from being dropped. */ + + tablespace_being_deleted = fil_inc_pending_ibuf_merges(space); + + if (tablespace_being_deleted) { + /* Do not try to read the bitmap page from space; + just delete the ibuf records for the page */ + + page = NULL; + update_ibuf_bitmap = FALSE; + } + } + + if (update_ibuf_bitmap) { + mtr_start(&mtr); + bitmap_page = ibuf_bitmap_get_map_page(space, page_no, &mtr); + + if (!ibuf_bitmap_page_get_bits(bitmap_page, page_no, + IBUF_BITMAP_BUFFERED, &mtr)) { + /* No inserts buffered for this page */ + mtr_commit(&mtr); + + if (!tablespace_being_deleted) { + fil_decr_pending_ibuf_merges(space); + } + + return; + } + mtr_commit(&mtr); + } + + /* Currently the insert buffer of space 0 takes care of inserts to all + tablespaces */ + + ibuf_data = fil_space_get_ibuf_data(0); + + ibuf_enter(); + + heap = mem_heap_create(512); + + if (!trx_sys_multiple_tablespace_format) { + ut_a(trx_doublewrite_must_reset_space_ids); + search_tuple = ibuf_search_tuple_build(space, page_no, heap); + } else { + search_tuple = ibuf_new_search_tuple_build(space, page_no, + heap); + } + + if (page) { + /* Move the ownership of the x-latch on the page to this OS + thread, so that we can acquire a second x-latch on it. This + is needed for the insert operations to the index page to pass + the debug checks. */ + + block = buf_block_align(page); + rw_lock_x_lock_move_ownership(&(block->lock)); + + if (fil_page_get_type(page) != FIL_PAGE_INDEX) { + + corruption_noticed = TRUE; + + ut_print_timestamp(stderr); + + mtr_start(&mtr); + + fputs(" InnoDB: Dump of the ibuf bitmap page:\n", + stderr); + + bitmap_page = ibuf_bitmap_get_map_page(space, page_no, + &mtr); + buf_page_print(bitmap_page); + + mtr_commit(&mtr); + + fputs("\nInnoDB: Dump of the page:\n", stderr); + + buf_page_print(page); + + fprintf(stderr, +"InnoDB: Error: corruption in the tablespace. Bitmap shows insert\n" +"InnoDB: buffer records to page n:o %lu though the page\n" +"InnoDB: type is %lu, which is not an index page!\n" +"InnoDB: We try to resolve the problem by skipping the insert buffer\n" +"InnoDB: merge for this page. Please run CHECK TABLE on your tables\n" +"InnoDB: to determine if they are corrupt after this.\n\n" +"InnoDB: Please submit a detailed bug report to http://bugs.mysql.com\n\n", + (ulong) page_no, + (ulong) fil_page_get_type(page)); + } + } + + n_inserts = 0; +#ifdef UNIV_IBUF_DEBUG + volume = 0; +#endif +loop: + mtr_start(&mtr); + + if (page) { + ibool success = buf_page_get_known_nowait(RW_X_LATCH, page, + BUF_KEEP_OLD, + __FILE__, __LINE__, + &mtr); + ut_a(success); +#ifdef UNIV_SYNC_DEBUG + buf_page_dbg_add_level(page, SYNC_TREE_NODE); +#endif /* UNIV_SYNC_DEBUG */ + } + + /* Position pcur in the insert buffer at the first entry for this + index page */ + btr_pcur_open_on_user_rec(ibuf_data->index, search_tuple, PAGE_CUR_GE, + BTR_MODIFY_LEAF, &pcur, &mtr); + if (!btr_pcur_is_on_user_rec(&pcur, &mtr)) { + ut_ad(btr_pcur_is_after_last_in_tree(&pcur, &mtr)); + + goto reset_bit; + } + + for (;;) { + ut_ad(btr_pcur_is_on_user_rec(&pcur, &mtr)); + + ibuf_rec = btr_pcur_get_rec(&pcur); + + /* Check if the entry is for this index page */ + if (ibuf_rec_get_page_no(ibuf_rec) != page_no + || ibuf_rec_get_space(ibuf_rec) != space) { + if (page) { + page_header_reset_last_insert(page, &mtr); + } + goto reset_bit; + } + + if (corruption_noticed) { + fputs("InnoDB: Discarding record\n ", stderr); + rec_print_old(stderr, ibuf_rec); + fputs("\n from the insert buffer!\n\n", stderr); + } else if (page) { + /* Now we have at pcur a record which should be + inserted to the index page; NOTE that the call below + copies pointers to fields in ibuf_rec, and we must + keep the latch to the ibuf_rec page until the + insertion is finished! */ + dict_index_t* dummy_index; + dulint max_trx_id = page_get_max_trx_id( + buf_frame_align(ibuf_rec)); + page_update_max_trx_id(page, max_trx_id); + + entry = ibuf_build_entry_from_ibuf_rec(ibuf_rec, + heap, &dummy_index); +#ifdef UNIV_IBUF_DEBUG + volume += rec_get_converted_size(dummy_index, entry) + + page_dir_calc_reserved_space(1); + ut_a(volume <= 4 * UNIV_PAGE_SIZE + / IBUF_PAGE_SIZE_PER_FREE_SPACE); +#endif + ibuf_insert_to_index_page(entry, page, + dummy_index, &mtr); + ibuf_dummy_index_free(dummy_index); + } + + n_inserts++; + + /* Delete the record from ibuf */ + if (ibuf_delete_rec(space, page_no, &pcur, search_tuple, + &mtr)) { + /* Deletion was pessimistic and mtr was committed: + we start from the beginning again */ + + goto loop; + } + + if (btr_pcur_is_after_last_on_page(&pcur, &mtr)) { + mtr_commit(&mtr); + btr_pcur_close(&pcur); + + goto loop; + } + } + +reset_bit: +#ifdef UNIV_IBUF_DEBUG + if (ibuf_count_get(space, page_no) > 0) { + /* btr_print_tree(ibuf_data->index->tree, 100); + ibuf_print(); */ + } +#endif + if (update_ibuf_bitmap) { + bitmap_page = ibuf_bitmap_get_map_page(space, page_no, &mtr); + ibuf_bitmap_page_set_bits(bitmap_page, page_no, + IBUF_BITMAP_BUFFERED, FALSE, &mtr); + if (page) { + ulint old_bits = ibuf_bitmap_page_get_bits(bitmap_page, + page_no, IBUF_BITMAP_FREE, &mtr); + ulint new_bits = ibuf_index_page_calc_free(page); +#ifdef UNIV_IBUF_DEBUG + /* fprintf(stderr, "Old bits %lu new bits %lu max size %lu\n", + old_bits, new_bits, + page_get_max_insert_size_after_reorganize(page, 1)); */ +#endif + if (old_bits != new_bits) { + ibuf_bitmap_page_set_bits(bitmap_page, page_no, + IBUF_BITMAP_FREE, + new_bits, &mtr); + } + } + } +#ifdef UNIV_IBUF_DEBUG + /* fprintf(stderr, + "Ibuf merge %lu records volume %lu to page no %lu\n", + n_inserts, volume, page_no); */ +#endif + mtr_commit(&mtr); + btr_pcur_close(&pcur); + mem_heap_free(heap); + + /* Protect our statistics keeping from race conditions */ + mutex_enter(&ibuf_mutex); + + ibuf_data->n_merges++; + ibuf_data->n_merged_recs += n_inserts; + + mutex_exit(&ibuf_mutex); + + if (update_ibuf_bitmap && !tablespace_being_deleted) { + + fil_decr_pending_ibuf_merges(space); + } + + ibuf_exit(); +#ifdef UNIV_IBUF_DEBUG + ut_a(ibuf_count_get(space, page_no) == 0); +#endif +} + +/************************************************************************* +Deletes all entries in the insert buffer for a given space id. This is used +in DISCARD TABLESPACE and IMPORT TABLESPACE. +NOTE: this does not update the page free bitmaps in the space. The space will +become CORRUPT when you call this function! */ + +void +ibuf_delete_for_discarded_space( +/*============================*/ + ulint space) /* in: space id */ +{ + mem_heap_t* heap; + btr_pcur_t pcur; + dtuple_t* search_tuple; + rec_t* ibuf_rec; + ulint page_no; + ibool closed; + ibuf_data_t* ibuf_data; + ulint n_inserts; + mtr_t mtr; + + /* Currently the insert buffer of space 0 takes care of inserts to all + tablespaces */ + + ibuf_data = fil_space_get_ibuf_data(0); + + heap = mem_heap_create(512); + + /* Use page number 0 to build the search tuple so that we get the + cursor positioned at the first entry for this space id */ + + search_tuple = ibuf_new_search_tuple_build(space, 0, heap); + + n_inserts = 0; +loop: + ibuf_enter(); + + mtr_start(&mtr); + + /* Position pcur in the insert buffer at the first entry for the + space */ + btr_pcur_open_on_user_rec(ibuf_data->index, search_tuple, PAGE_CUR_GE, + BTR_MODIFY_LEAF, &pcur, &mtr); + if (!btr_pcur_is_on_user_rec(&pcur, &mtr)) { + ut_ad(btr_pcur_is_after_last_in_tree(&pcur, &mtr)); + + goto leave_loop; + } + + for (;;) { + ut_ad(btr_pcur_is_on_user_rec(&pcur, &mtr)); + + ibuf_rec = btr_pcur_get_rec(&pcur); + + /* Check if the entry is for this space */ + if (ibuf_rec_get_space(ibuf_rec) != space) { + + goto leave_loop; + } + + page_no = ibuf_rec_get_page_no(ibuf_rec); + + n_inserts++; + + /* Delete the record from ibuf */ + closed = ibuf_delete_rec(space, page_no, &pcur, search_tuple, + &mtr); + if (closed) { + /* Deletion was pessimistic and mtr was committed: + we start from the beginning again */ + + ibuf_exit(); + + goto loop; + } + + if (btr_pcur_is_after_last_on_page(&pcur, &mtr)) { + mtr_commit(&mtr); + btr_pcur_close(&pcur); + + ibuf_exit(); + + goto loop; + } + } + +leave_loop: + mtr_commit(&mtr); + btr_pcur_close(&pcur); + + /* Protect our statistics keeping from race conditions */ + mutex_enter(&ibuf_mutex); + + ibuf_data->n_merges++; + ibuf_data->n_merged_recs += n_inserts; + + mutex_exit(&ibuf_mutex); + /* + fprintf(stderr, + "InnoDB: Discarded %lu ibuf entries for space %lu\n", + (ulong) n_inserts, (ulong) space); + */ + ibuf_exit(); + + mem_heap_free(heap); +} + + +/********************************************************************** +Validates the ibuf data structures when the caller owns ibuf_mutex. */ + +ibool +ibuf_validate_low(void) +/*===================*/ + /* out: TRUE if ok */ +{ + ibuf_data_t* data; + ulint sum_sizes; + +#ifdef UNIV_SYNC_DEBUG + ut_ad(mutex_own(&ibuf_mutex)); +#endif /* UNIV_SYNC_DEBUG */ + + sum_sizes = 0; + + data = UT_LIST_GET_FIRST(ibuf->data_list); + + while (data) { + sum_sizes += data->size; + + data = UT_LIST_GET_NEXT(data_list, data); + } + + ut_a(sum_sizes == ibuf->size); + + return(TRUE); +} + +/********************************************************************** +Looks if the insert buffer is empty. */ + +ibool +ibuf_is_empty(void) +/*===============*/ + /* out: TRUE if empty */ +{ + ibuf_data_t* data; + ibool is_empty; + page_t* root; + mtr_t mtr; + + ibuf_enter(); + + mutex_enter(&ibuf_mutex); + + data = UT_LIST_GET_FIRST(ibuf->data_list); + + mtr_start(&mtr); + + root = ibuf_tree_root_get(data, 0, &mtr); + + if (page_get_n_recs(root) == 0) { + + is_empty = TRUE; + + if (data->empty == FALSE) { + fprintf(stderr, +"InnoDB: Warning: insert buffer tree is empty but the data struct does not\n" +"InnoDB: know it. This condition is legal if the master thread has not yet\n" +"InnoDB: run to completion.\n"); + } + } else { + ut_a(data->empty == FALSE); + + is_empty = FALSE; + } + + mtr_commit(&mtr); + + ut_a(data->space == 0); + + mutex_exit(&ibuf_mutex); + + ibuf_exit(); + + return(is_empty); +} + +/********************************************************************** +Prints info of ibuf. */ + +void +ibuf_print( +/*=======*/ + FILE* file) /* in: file where to print */ +{ + ibuf_data_t* data; +#ifdef UNIV_IBUF_DEBUG + ulint i; +#endif + + mutex_enter(&ibuf_mutex); + + data = UT_LIST_GET_FIRST(ibuf->data_list); + + while (data) { + fprintf(file, + "Ibuf for space %lu: size %lu, free list len %lu, seg size %lu,", + (ulong) data->space, (ulong) data->size, + (ulong) data->free_list_len, + (ulong) data->seg_size); + + if (data->empty) { + fputs(" is empty\n", file); + } else { + fputs(" is not empty\n", file); + } + fprintf(file, + "Ibuf for space %lu: size %lu, free list len %lu, seg size %lu,\n" + "%lu inserts, %lu merged recs, %lu merges\n", + (ulong) data->space, + (ulong) data->size, + (ulong) data->free_list_len, + (ulong) data->seg_size, + (ulong) data->n_inserts, + (ulong) data->n_merged_recs, + (ulong) data->n_merges); +#ifdef UNIV_IBUF_DEBUG + for (i = 0; i < IBUF_COUNT_N_PAGES; i++) { + if (ibuf_count_get(data->space, i) > 0) { + + fprintf(stderr, + "Ibuf count for page %lu is %lu\n", + (ulong) i, + (ulong) ibuf_count_get(data->space, i)); + } + } +#endif + data = UT_LIST_GET_NEXT(data_list, data); + } + + mutex_exit(&ibuf_mutex); +} |