diff options
Diffstat (limited to 'innobase/include')
62 files changed, 1150 insertions, 306 deletions
diff --git a/innobase/include/Makefile.am b/innobase/include/Makefile.am index fd5cc8b1a80..8664f6dfc17 100644 --- a/innobase/include/Makefile.am +++ b/innobase/include/Makefile.am @@ -55,5 +55,7 @@ noinst_HEADERS = btr0btr.h btr0btr.ic btr0cur.h btr0cur.ic \ ut0dbg.h ut0lst.h ut0mem.h ut0mem.ic ut0rnd.h ut0rnd.ic \ ut0sort.h ut0ut.h ut0ut.ic +EXTRA_DIST = Makefile.i + # Don't update the files from bitkeeper %::SCCS/s.% diff --git a/innobase/include/Makefile.i b/innobase/include/Makefile.i index 8c7e9910f26..f3e3fbe989e 100644 --- a/innobase/include/Makefile.i +++ b/innobase/include/Makefile.i @@ -1,8 +1,6 @@ # Makefile included in Makefile.am in every subdirectory -libsdir = ../libs - -INCLUDES = -I../../include -I../include +INCLUDES = -I$(srcdir)/../include -I$(srcdir)/../../include -I../../include # Don't update the files from bitkeeper %::SCCS/s.% diff --git a/innobase/include/btr0btr.h b/innobase/include/btr0btr.h index 7e9d4b73d90..8606fcd2a5c 100644 --- a/innobase/include/btr0btr.h +++ b/innobase/include/btr0btr.h @@ -408,6 +408,19 @@ btr_print_tree( dict_tree_t* tree, /* in: tree */ ulint width); /* in: print this many entries from start and end */ +/**************************************************************** +Checks the size and number of fields in a record based on the definition of +the index. */ + +ibool +btr_index_rec_validate( +/*====================*/ + /* out: TRUE if ok */ + rec_t* rec, /* in: index record */ + dict_index_t* index, /* in: index */ + ibool dump_on_error); /* in: TRUE if the function + should print hex dump of record + and page on error */ /****************************************************************** Checks the consistency of an index tree. */ diff --git a/innobase/include/btr0btr.ic b/innobase/include/btr0btr.ic index 5c1c89e9840..09006828cc9 100644 --- a/innobase/include/btr0btr.ic +++ b/innobase/include/btr0btr.ic @@ -89,7 +89,7 @@ btr_page_get_level( /*===============*/ /* out: level, leaf level == 0 */ page_t* page, /* in: index page */ - mtr_t* mtr) /* in: mini-transaction handle */ + mtr_t* mtr __attribute__((unused))) /* in: mini-transaction handle */ { ut_ad(page && mtr); @@ -121,7 +121,7 @@ btr_page_get_next( /*==============*/ /* out: next page number */ page_t* page, /* in: index page */ - mtr_t* mtr) /* in: mini-transaction handle */ + mtr_t* mtr __attribute__((unused))) /* in: mini-transaction handle */ { ut_ad(page && mtr); ut_ad(mtr_memo_contains(mtr, buf_block_align(page), @@ -155,7 +155,7 @@ btr_page_get_prev( /*==============*/ /* out: prev page number */ page_t* page, /* in: index page */ - mtr_t* mtr) /* in: mini-transaction handle */ + mtr_t* mtr __attribute__((unused))) /* in: mini-transaction handle */ { ut_ad(page && mtr); diff --git a/innobase/include/btr0cur.h b/innobase/include/btr0cur.h index 7039ceba245..31aecef8104 100644 --- a/innobase/include/btr0cur.h +++ b/innobase/include/btr0cur.h @@ -188,22 +188,6 @@ btr_cur_pessimistic_insert( que_thr_t* thr, /* in: query thread or NULL */ mtr_t* mtr); /* in: mtr */ /***************************************************************** -Updates a secondary index record when the update causes no size -changes in its fields. The only case when this function is currently -called is that in a char field characters change to others which -are identified in the collation order. */ - -ulint -btr_cur_update_sec_rec_in_place( -/*============================*/ - /* out: DB_SUCCESS or error number */ - btr_cur_t* cursor, /* in: cursor on the record to update; - cursor stays valid and positioned on the - same record */ - upd_t* update, /* in: update vector */ - que_thr_t* thr, /* in: query thread */ - mtr_t* mtr); /* in: mtr */ -/***************************************************************** Updates a record when the update causes no size changes in its fields. */ ulint @@ -507,7 +491,13 @@ void btr_free_externally_stored_field( /*=============================*/ dict_index_t* index, /* in: index of the data, the index - tree MUST be X-latched */ + tree MUST be X-latched; if the tree + height is 1, then also the root page + must be X-latched! (this is relevant + in the case this function is called + from purge where 'data' is located on + an undo log page, not an index + page) */ byte* data, /* in: internally stored data + reference to the externally stored part */ @@ -684,7 +674,13 @@ and sleep this many microseconds in between */ #define BTR_CUR_RETRY_DELETE_N_TIMES 100 #define BTR_CUR_RETRY_SLEEP_TIME 50000 -/* The reference in a field of which data is stored on a different page */ +/* The reference in a field for which data is stored on a different page. +The reference is at the end of the 'locally' stored part of the field. +'Locally' means storage in the index record. +We store locally a long enough prefix of each column so that we can determine +the ordering parts of each index record without looking into the externally +stored part. */ + /*--------------------------------------*/ #define BTR_EXTERN_SPACE_ID 0 /* space id where stored */ #define BTR_EXTERN_PAGE_NO 4 /* page no where stored */ diff --git a/innobase/include/btr0pcur.h b/innobase/include/btr0pcur.h index 05b55e4491d..9d07dd0de18 100644 --- a/innobase/include/btr0pcur.h +++ b/innobase/include/btr0pcur.h @@ -298,6 +298,14 @@ btr_pcur_move_to_prev( function may release the page latch */ mtr_t* mtr); /* in: mtr */ /************************************************************* +Moves the persistent cursor to the last record on the same page. */ +UNIV_INLINE +void +btr_pcur_move_to_last_on_page( +/*==========================*/ + btr_pcur_t* cursor, /* in: persistent cursor */ + mtr_t* mtr); /* in: mtr */ +/************************************************************* Moves the persistent cursor to the next user record in the tree. If no user records are left, the cursor ends up 'after last in tree'. */ UNIV_INLINE diff --git a/innobase/include/btr0pcur.ic b/innobase/include/btr0pcur.ic index a60140e4aa9..a1db2cc52dd 100644 --- a/innobase/include/btr0pcur.ic +++ b/innobase/include/btr0pcur.ic @@ -285,6 +285,24 @@ btr_pcur_move_to_prev_on_page( } /************************************************************* +Moves the persistent cursor to the last record on the same page. */ +UNIV_INLINE +void +btr_pcur_move_to_last_on_page( +/*==========================*/ + btr_pcur_t* cursor, /* in: persistent cursor */ + mtr_t* mtr) /* in: mtr */ +{ + UT_NOT_USED(mtr); + ut_ad(cursor->latch_mode != BTR_NO_LATCHES); + + page_cur_set_after_last(buf_frame_align(btr_pcur_get_rec(cursor)), + btr_pcur_get_page_cur(cursor)); + + cursor->old_stored = BTR_PCUR_OLD_NOT_STORED; +} + +/************************************************************* Moves the persistent cursor to the next user record in the tree. If no user records are left, the cursor ends up 'after last in tree'. */ UNIV_INLINE diff --git a/innobase/include/btr0sea.h b/innobase/include/btr0sea.h index 14feca5d5c5..ee762a12221 100644 --- a/innobase/include/btr0sea.h +++ b/innobase/include/btr0sea.h @@ -234,10 +234,16 @@ struct btr_search_sys_struct{ extern btr_search_sys_t* btr_search_sys; /* The latch protecting the adaptive search system: this latch protects the -(1) positions of records on those pages where a hash index has been built. -NOTE: It does not protect values of non-ordering fields within a record from -being updated in-place! We can use fact (1) to perform unique searches to -indexes. */ +(1) hash index; +(2) columns of a record to which we have a pointer in the hash index; + +but does NOT protect: + +(3) next record offset field in a record; +(4) next or previous records on the same page. + +Bear in mind (3) and (4) when using the hash index. +*/ extern rw_lock_t* btr_search_latch_temp; diff --git a/innobase/include/buf0buf.h b/innobase/include/buf0buf.h index 591c0ec54ab..2963efd6396 100644 --- a/innobase/include/buf0buf.h +++ b/innobase/include/buf0buf.h @@ -274,6 +274,15 @@ buf_page_peek_block( ulint space, /* in: space id */ ulint offset);/* in: page number */ /************************************************************************ +Resets the check_index_page_at_flush field of a page if found in the buffer +pool. */ + +void +buf_reset_check_index_page_at_flush( +/*================================*/ + ulint space, /* in: space id */ + ulint offset);/* in: page number */ +/************************************************************************ Sets file_page_was_freed TRUE if the page is found in the buffer pool. This function should be called when we free a file page and want the debug version to check that it is not accessed any more unless @@ -355,11 +364,24 @@ to a file. Note that we must be careful to calculate the same value on 32-bit and 64-bit architectures. */ ulint -buf_calc_page_checksum( -/*===================*/ +buf_calc_page_new_checksum( +/*=======================*/ /* out: checksum */ byte* page); /* in: buffer page */ /************************************************************************ +In versions < 4.0.14 and < 4.1.1 there was a bug that the checksum only +looked at the first few bytes of the page. This calculates that old +checksum. +NOTE: we must first store the new formula checksum to +FIL_PAGE_SPACE_OR_CHKSUM before calculating and storing this old checksum +because this takes that field as an input! */ + +ulint +buf_calc_page_old_checksum( +/*=======================*/ + /* out: checksum */ + byte* page); /* in: buffer page */ +/************************************************************************ Checks if a page is corrupt. */ ibool @@ -463,6 +485,13 @@ buf_print_io( /*=========*/ char* buf, /* in/out: buffer where to print */ char* buf_end);/* in: buffer end */ +/************************************************************************* +Returns the ratio in percents of modified pages in the buffer pool / +database pages in the buffer pool. */ + +ulint +buf_get_modified_ratio_pct(void); +/*============================*/ /************************************************************************** Refreshes the statistics used to print per-second averages. */ @@ -648,6 +677,14 @@ struct buf_block_struct{ then it can wait for this rw-lock */ buf_block_t* hash; /* node used in chaining to the page hash table */ + ibool check_index_page_at_flush; + /* TRUE if we know that this is + an index page, and want the database + to check its consistency before flush; + note that there may be pages in the + buffer pool which are index pages, + but this flag is not set because + we do not keep track of all pages */ /* 2. Page flushing fields */ UT_LIST_NODE_T(buf_block_t) flush_list; @@ -711,8 +748,8 @@ struct buf_block_struct{ bufferfixed, or (2) the thread has an x-latch on the block */ - /* 5. Hash search fields: NOTE that these fields are protected by - btr_search_mutex */ + /* 5. Hash search fields: NOTE that the first 4 fields are NOT + protected by any semaphore! */ ulint n_hash_helps; /* counter which controls building of a new hash index for the page */ @@ -725,6 +762,9 @@ struct buf_block_struct{ whether the leftmost record of several records with the same prefix should be indexed in the hash index */ + + /* The following 4 fields are protected by btr_search_latch: */ + ibool is_hashed; /* TRUE if hash index has already been built on this page; note that it does not guarantee that the index is diff --git a/innobase/include/buf0buf.ic b/innobase/include/buf0buf.ic index 51e2541e04d..7227c79dc6a 100644 --- a/innobase/include/buf0buf.ic +++ b/innobase/include/buf0buf.ic @@ -652,9 +652,10 @@ UNIV_INLINE void buf_page_dbg_add_level( /*===================*/ - buf_frame_t* frame, /* in: buffer page where we have acquired - a latch */ - ulint level) /* in: latching order level */ + buf_frame_t* frame __attribute__((unused)), /* in: buffer page + where we have acquired latch */ + ulint level __attribute__((unused))) /* in: latching order + level */ { #ifdef UNIV_SYNC_DEBUG sync_thread_add_level(&(buf_block_align(frame)->lock), level); diff --git a/innobase/include/buf0lru.h b/innobase/include/buf0lru.h index 946b6c4e31d..eb9d43d3b93 100644 --- a/innobase/include/buf0lru.h +++ b/innobase/include/buf0lru.h @@ -46,6 +46,20 @@ buf_LRU_get_recent_limit(void); /*==========================*/ /* out: the limit; zero if could not determine it */ /********************************************************************** +Look for a replaceable block from the end of the LRU list and put it to +the free list if found. */ + +ibool +buf_LRU_search_and_free_block( +/*==========================*/ + /* out: TRUE if freed */ + ulint n_iterations); /* in: how many times this has been called + repeatedly without result: a high value means + that we should search farther; if value is + k < 10, then we only search k/10 * number + of pages in the buffer pool from the end + of the LRU list */ +/********************************************************************** Returns a free block from the buf_pool. The block is taken off the free list. If it is empty, blocks are moved from the end of the LRU list to the free list. */ @@ -86,17 +100,6 @@ void buf_LRU_make_block_old( /*===================*/ buf_block_t* block); /* in: control block */ -/********************************************************************** -Look for a replaceable block from the end of the LRU list and put it to -the free list if found. */ - -ibool -buf_LRU_search_and_free_block( -/*==========================*/ - /* out: TRUE if freed */ - ulint n_iterations); /* in: how many times this has been called - repeatedly without result: a high value - means that we should search farther */ /************************************************************************** Validates the LRU list. */ diff --git a/innobase/include/data0data.h b/innobase/include/data0data.h index e0fb06e5018..2ec94a9517a 100644 --- a/innobase/include/data0data.h +++ b/innobase/include/data0data.h @@ -262,6 +262,14 @@ dtuple_set_types_binary( /*====================*/ dtuple_t* tuple, /* in: data tuple */ ulint n); /* in: number of fields to set */ +/************************************************************************** +Checks if a dtuple contains an SQL null value. */ +UNIV_INLINE +ibool +dtuple_contains_null( +/*=================*/ + /* out: TRUE if some field is SQL null */ + dtuple_t* tuple); /* in: dtuple */ /************************************************************** Checks that a data field is typed. Asserts an error if not. */ @@ -453,8 +461,6 @@ struct dfield_struct{ void* data; /* pointer to data */ ulint len; /* data length; UNIV_SQL_NULL if SQL null; */ dtype_t type; /* type of data */ - ulint col_no; /* when building index entries, the column - number can be stored here */ }; struct dtuple_struct { diff --git a/innobase/include/data0data.ic b/innobase/include/data0data.ic index d356664df21..def80d3f430 100644 --- a/innobase/include/data0data.ic +++ b/innobase/include/data0data.ic @@ -406,3 +406,28 @@ data_write_sql_null( data[j] = '\0'; } } + +/************************************************************************** +Checks if a dtuple contains an SQL null value. */ +UNIV_INLINE +ibool +dtuple_contains_null( +/*=================*/ + /* out: TRUE if some field is SQL null */ + dtuple_t* tuple) /* in: dtuple */ +{ + ulint n; + ulint i; + + n = dtuple_get_n_fields(tuple); + + for (i = 0; i < n; i++) { + if (dfield_get_len(dtuple_get_nth_field(tuple, i)) + == UNIV_SQL_NULL) { + + return(TRUE); + } + } + + return(FALSE); +} diff --git a/innobase/include/data0type.h b/innobase/include/data0type.h index b53a70a8909..4da686bf2e1 100644 --- a/innobase/include/data0type.h +++ b/innobase/include/data0type.h @@ -18,14 +18,16 @@ typedef struct dtype_struct dtype_t; data type */ extern dtype_t* dtype_binary; -/* Data main types of SQL data; NOTE! character data types requiring -collation transformation must have the smallest codes! All codes must be -less than 256! */ +/* Data main types of SQL data */ #define DATA_VARCHAR 1 /* character varying */ #define DATA_CHAR 2 /* fixed length character */ #define DATA_FIXBINARY 3 /* binary string of fixed length */ #define DATA_BINARY 4 /* binary string */ -#define DATA_BLOB 5 /* binary large object */ +#define DATA_BLOB 5 /* binary large object, or a TEXT type; if + prtype & DATA_NONLATIN1 != 0 the data must + be compared by MySQL as a whole field; if + prtype & DATA_BINARY_TYPE == 0, then this is + actually a TEXT column */ #define DATA_INT 6 /* integer: can be any size 1 - 8 bytes */ #define DATA_SYS_CHILD 7 /* address of the child page in node pointer */ #define DATA_SYS 8 /* system column */ @@ -34,35 +36,55 @@ binary strings */ #define DATA_FLOAT 9 #define DATA_DOUBLE 10 #define DATA_DECIMAL 11 /* decimal number stored as an ASCII string */ -#define DATA_VARMYSQL 12 /* data types for which comparisons must be */ -#define DATA_MYSQL 13 /* made by MySQL */ -#define DATA_ERROR 111 /* error value */ -#define DATA_MTYPE_MAX 255 +#define DATA_VARMYSQL 12 /* non-latin1 varying length char */ +#define DATA_MYSQL 13 /* non-latin1 fixed length char */ +#define DATA_MTYPE_MAX 63 /* dtype_store_for_order_and_null_size() + requires the values are <= 63 */ /*-------------------------------------------*/ -/* Precise data types for system columns; NOTE: the values must run -from 0 up in the order given! All codes must be less than 256! */ +/* In the lowest byte in the precise type we store the MySQL type code +(not applicable for system columns). */ + +#define DATA_ENGLISH 4 /* English language character string: this + is a relic from pre-MySQL time and only used + for InnoDB's own system tables */ +#define DATA_ERROR 111 /* another relic from pre-MySQL time */ + +#define DATA_MYSQL_TYPE_MASK 255 /* AND with this mask to extract the MySQL + type from the precise type */ + +/* Precise data types for system columns and the length of those columns; +NOTE: the values must run from 0 up in the order given! All codes must +be less than 256 */ #define DATA_ROW_ID 0 /* row id: a dulint */ #define DATA_ROW_ID_LEN 6 /* stored length for row id */ + #define DATA_TRX_ID 1 /* transaction id: 6 bytes */ #define DATA_TRX_ID_LEN 6 + #define DATA_ROLL_PTR 2 /* rollback data pointer: 7 bytes */ #define DATA_ROLL_PTR_LEN 7 + #define DATA_MIX_ID 3 /* mixed index label: a dulint, stored in a row in a compressed form */ #define DATA_MIX_ID_LEN 9 /* maximum stored length for mix id (in a compressed dulint form) */ #define DATA_N_SYS_COLS 4 /* number of system columns defined above */ +/*-------------------------------------------*/ +/* Flags ORed to the precise data type */ #define DATA_NOT_NULL 256 /* this is ORed to the precise type when the column is declared as NOT NULL */ #define DATA_UNSIGNED 512 /* this id ORed to the precise type when we have an unsigned integer type */ +#define DATA_BINARY_TYPE 1024 /* if the data type is a binary character + string, this is ORed to the precise type: + this only holds for tables created with + >= MySQL-4.0.14 */ +#define DATA_NONLATIN1 2048 /* if the data type is a DATA_BLOB (actually + TEXT) of a non-latin1 type, this is ORed to + the precise type: this only holds for tables + created with >= MySQL-4.0.14 */ /*-------------------------------------------*/ -/* Precise types of a char or varchar data. All codes must be less than 256! */ -#define DATA_ENGLISH 4 /* English language character string */ -#define DATA_FINNISH 5 /* Finnish */ -#define DATA_PRTYPE_MAX 255 - /* This many bytes we need to store the type information affecting the alphabetical order for a single field and decide the storage size of an SQL null*/ @@ -123,7 +145,7 @@ dtype_get_pad_char( /*===============*/ /* out: padding character code, or ULINT_UNDEFINED if no padding specified */ - dtype_t* type); /* in: typeumn */ + dtype_t* type); /* in: type */ /*************************************************************************** Returns the size of a fixed size data type, 0 if not a fixed size type. */ UNIV_INLINE @@ -150,24 +172,24 @@ dtype_is_fixed_size( /* out: TRUE if fixed size */ dtype_t* type); /* in: type */ /************************************************************************** -Stores to a type the information which determines its alphabetical -ordering. */ +Stores for a type the information which determines its alphabetical ordering +and the storage size of an SQL NULL value. */ UNIV_INLINE void dtype_store_for_order_and_null_size( /*================================*/ byte* buf, /* in: buffer for DATA_ORDER_NULL_TYPE_BUF_SIZE - bytes */ + bytes where we store the info */ dtype_t* type); /* in: type struct */ /************************************************************************** -Reads of a type the stored information which determines its alphabetical -ordering. */ +Reads to a type the stored information which determines its alphabetical +ordering and the storage size of an SQL NULL value. */ UNIV_INLINE void dtype_read_for_order_and_null_size( /*===============================*/ dtype_t* type, /* in: type struct */ - byte* buf); /* in: buffer for type order info */ + byte* buf); /* in: buffer for the stored order info */ /************************************************************************* Validates a data type structure. */ diff --git a/innobase/include/data0type.ic b/innobase/include/data0type.ic index d82d976d076..ddd0b0ae8cc 100644 --- a/innobase/include/data0type.ic +++ b/innobase/include/data0type.ic @@ -110,7 +110,9 @@ dtype_get_pad_char( if (type->mtype == DATA_CHAR || type->mtype == DATA_VARCHAR || type->mtype == DATA_BINARY - || type->mtype == DATA_FIXBINARY) { + || type->mtype == DATA_FIXBINARY + || type->mtype == DATA_MYSQL + || type->mtype == DATA_VARMYSQL) { /* Space is the padding character for all char and binary strings */ @@ -124,39 +126,56 @@ dtype_get_pad_char( } /************************************************************************** -Stores to a type the information which determines its alphabetical -ordering. */ +Stores for a type the information which determines its alphabetical ordering +and the storage size of an SQL NULL value. */ UNIV_INLINE void dtype_store_for_order_and_null_size( /*================================*/ byte* buf, /* in: buffer for DATA_ORDER_NULL_TYPE_BUF_SIZE - bytes */ + bytes where we store the info */ dtype_t* type) /* in: type struct */ { ut_ad(4 == DATA_ORDER_NULL_TYPE_BUF_SIZE); buf[0] = (byte)(type->mtype & 0xFF); + + if (type->prtype & DATA_BINARY_TYPE) { + buf[0] = buf[0] | 128; + } + + if (type->prtype & DATA_NONLATIN1) { + buf[0] = buf[0] | 64; + } + buf[1] = (byte)(type->prtype & 0xFF); mach_write_to_2(buf + 2, type->len & 0xFFFF); } /************************************************************************** -Reads of a type the stored information which determines its alphabetical -ordering. */ +Reads to a type the stored information which determines its alphabetical +ordering and the storage size of an SQL NULL value. */ UNIV_INLINE void dtype_read_for_order_and_null_size( /*===============================*/ dtype_t* type, /* in: type struct */ - byte* buf) /* in: buffer for type order info */ + byte* buf) /* in: buffer for stored type order info */ { ut_ad(4 == DATA_ORDER_NULL_TYPE_BUF_SIZE); - type->mtype = buf[0]; + type->mtype = buf[0] & 63; type->prtype = buf[1]; + if (buf[0] & 128) { + type->prtype = type->prtype | DATA_BINARY_TYPE; + } + + if (buf[0] & 64) { + type->prtype = type->prtype | DATA_NONLATIN1; + } + type->len = mach_read_from_2(buf + 2); } diff --git a/innobase/include/db0err.h b/innobase/include/db0err.h index df74b06dfc0..854b9794c00 100644 --- a/innobase/include/db0err.h +++ b/innobase/include/db0err.h @@ -41,9 +41,14 @@ Created 5/24/1996 Heikki Tuuri which is referenced */ #define DB_CANNOT_ADD_CONSTRAINT 38 /* adding a foreign key constraint to a table failed */ - -#define DB_COL_APPEARS_TWICE_IN_INDEX 40 - +#define DB_CORRUPTION 39 /* data structure corruption noticed */ +#define DB_COL_APPEARS_TWICE_IN_INDEX 40 /* InnoDB cannot handle an index + where same column appears twice */ +#define DB_CANNOT_DROP_CONSTRAINT 41 /* dropping a foreign key constraint + from a table failed */ +#define DB_NO_SAVEPOINT 42 /* no savepoint exists with the given + name */ + /* The following are partial failure codes */ #define DB_FAIL 1000 #define DB_OVERFLOW 1001 diff --git a/innobase/include/dict0dict.h b/innobase/include/dict0dict.h index 832654d2666..b5ec5381db2 100644 --- a/innobase/include/dict0dict.h +++ b/innobase/include/dict0dict.h @@ -26,6 +26,18 @@ Created 1/8/1996 Heikki Tuuri #include "ut0byte.h" #include "trx0types.h" +/************************************************************************* +Accepts a specified string. Comparisons are case-insensitive. */ + +char* +dict_accept( +/*========*/ + /* out: if string was accepted, the pointer + is moved after that, else ptr is returned */ + char* ptr, /* in: scan from this */ + const char* string,/* in: accept only this string as the next + non-whitespace string */ + ibool* success);/* out: TRUE if accepted */ /************************************************************************ Decrements the count of open MySQL handles to a table. */ @@ -114,13 +126,20 @@ dict_table_autoinc_get( /* out: value for a new row, or 0 */ dict_table_t* table); /* in: table */ /************************************************************************ -Reads the autoinc counter value, 0 if not yet initialized. Does not -increment the counter. */ +Decrements the autoinc counter value by 1. */ + +void +dict_table_autoinc_decrement( +/*=========================*/ + dict_table_t* table); /* in: table */ +/************************************************************************ +Reads the next autoinc value (== autoinc counter value), 0 if not yet +initialized. */ ib_longlong dict_table_autoinc_read( /*====================*/ - /* out: value of the counter */ + /* out: value for a new row, or 0 */ dict_table_t* table); /* in: table */ /************************************************************************ Peeks the autoinc counter value, 0 if not yet initialized. Does not @@ -200,6 +219,24 @@ dict_create_foreign_constraints( char* name); /* in: table full name in the normalized form database_name/table_name */ /************************************************************************** +Parses the CONSTRAINT id's to be dropped in an ALTER TABLE statement. */ + +ulint +dict_foreign_parse_drop_constraints( +/*================================*/ + /* out: DB_SUCCESS or + DB_CANNOT_DROP_CONSTRAINT if + syntax error or the constraint + id does not match */ + mem_heap_t* heap, /* in: heap from which we can + allocate memory */ + trx_t* trx, /* in: transaction */ + dict_table_t* table, /* in: table */ + ulint* n, /* out: number of constraints + to drop */ + char*** constraints_to_drop); /* out: id's of the + constraints to drop */ +/************************************************************************** Returns a table object and memoryfixes it. NOTE! This is a high-level function to be used mainly from outside the 'dict' directory. Inside this directory dict_table_get_low is usually the appropriate function. */ @@ -314,6 +351,16 @@ dict_print_info_on_foreign_keys( char* str, /* in/out: pointer to a string */ ulint len, /* in: space in str available for info */ dict_table_t* table); /* in: table */ +/************************************************************************** +Sprintfs to a string info on a foreign key of a table in a format suitable +for CREATE TABLE. */ + +char* +dict_print_info_on_foreign_key_in_create_format( +/*============================================*/ + /* out: how far in buf we printed */ + dict_foreign_t* foreign,/* in: foreign key constraint */ + char* buf); /* in: buffer of at least 5000 bytes */ /************************************************************************ Gets the first index on the table (the clustered index). */ UNIV_INLINE @@ -522,6 +569,29 @@ dict_index_get_nth_col_pos( dict_index_t* index, /* in: index */ ulint n); /* in: column number */ /************************************************************************ +Returns TRUE if the index contains a column or a prefix of that column. */ + +ibool +dict_index_contains_col_or_prefix( +/*==============================*/ + /* out: TRUE if contains the column or its + prefix */ + dict_index_t* index, /* in: index */ + ulint n); /* in: column number */ +/************************************************************************ +Looks for a matching field in an index. The column and the prefix len has +to be the same. */ + +ulint +dict_index_get_nth_field_pos( +/*=========================*/ + /* out: position in internal representation + of the index; if not contained, returns + ULINT_UNDEFINED */ + dict_index_t* index, /* in: index from which to search */ + dict_index_t* index2, /* in: index */ + ulint n); /* in: field number in index2 */ +/************************************************************************ Looks for column n position in the clustered index. */ ulint @@ -789,9 +859,17 @@ void dict_mutex_exit_for_mysql(void); /*===========================*/ +/* The following len must be at least 10000 bytes! */ +#define DICT_FOREIGN_ERR_BUF_LEN 10000 + +/* Buffers for storing detailed information about the latest foreign key +and unique key errors */ +extern char* dict_foreign_err_buf; +extern char* dict_unique_err_buf; +extern mutex_t dict_foreign_err_mutex; /* mutex protecting the buffers */ extern dict_sys_t* dict_sys; /* the dictionary system */ -extern rw_lock_t dict_foreign_key_check_lock; +extern rw_lock_t dict_operation_lock; /* Dictionary system struct */ struct dict_sys_struct{ diff --git a/innobase/include/dict0dict.ic b/innobase/include/dict0dict.ic index 821465f96a8..c5982c162a7 100644 --- a/innobase/include/dict0dict.ic +++ b/innobase/include/dict0dict.ic @@ -106,7 +106,7 @@ dict_table_get_n_sys_cols( /*======================*/ /* out: number of system (e.g., ROW_ID) columns of a table */ - dict_table_t* table) /* in: table */ + dict_table_t* table __attribute__((unused))) /* in: table */ { ut_ad(table); ut_ad(table->magic_n == DICT_TABLE_MAGIC_N); @@ -203,7 +203,6 @@ dict_index_get_n_fields( { ut_ad(index); ut_ad(index->magic_n == DICT_INDEX_MAGIC_N); - ut_ad(index->cached); return(index->n_fields); } diff --git a/innobase/include/dict0mem.h b/innobase/include/dict0mem.h index cc27f2bad12..03dc913a7c9 100644 --- a/innobase/include/dict0mem.h +++ b/innobase/include/dict0mem.h @@ -111,10 +111,13 @@ by the column name may be released only after publishing the index. */ void dict_mem_index_add_field( /*=====================*/ - dict_index_t* index, /* in: index */ - char* name, /* in: column name */ - ulint order); /* in: order criterion; 0 means an ascending - order */ + dict_index_t* index, /* in: index */ + char* name, /* in: column name */ + ulint order, /* in: order criterion; 0 means an + ascending order */ + ulint prefix_len); /* in: 0 or the column prefix length + in a MySQL index like + INDEX (textcol(25)) */ /************************************************************************** Frees an index memory object. */ @@ -158,12 +161,18 @@ struct dict_col_struct{ in some of the functions below */ }; +#define DICT_MAX_COL_PREFIX_LEN 512 + /* Data structure for a field in an index */ struct dict_field_struct{ - dict_col_t* col; /* pointer to the table column */ - char* name; /* name of the column */ - ulint order; /* flags for ordering this field: - DICT_DESCEND, ... */ + dict_col_t* col; /* pointer to the table column */ + char* name; /* name of the column */ + ulint order; /* flags for ordering this field: + DICT_DESCEND, ... */ + ulint prefix_len; /* 0 or the length of the column + prefix in a MySQL index of type, e.g., + INDEX (textcol(25)); must be smaller + than DICT_MAX_COL_PREFIX_LEN */ }; /* Data structure for an index tree */ @@ -280,8 +289,15 @@ struct dict_foreign_struct{ table */ }; +/* The flags for ON_UPDATE and ON_DELETE can be ORed; the default is that +a foreign key constraint is enforced, therefore RESTRICT just means no flag */ #define DICT_FOREIGN_ON_DELETE_CASCADE 1 #define DICT_FOREIGN_ON_DELETE_SET_NULL 2 +#define DICT_FOREIGN_ON_UPDATE_CASCADE 4 +#define DICT_FOREIGN_ON_UPDATE_SET_NULL 8 +#define DICT_FOREIGN_ON_DELETE_NO_ACTION 16 +#define DICT_FOREIGN_ON_UPDATE_NO_ACTION 32 + #define DICT_INDEX_MAGIC_N 76789786 @@ -333,6 +349,13 @@ struct dict_table_struct{ space from the lock heap of the trx: otherwise the lock heap would grow rapidly if we do a large insert from a select */ + dulint query_cache_inv_trx_id; + /* transactions whose trx id < than this + number are not allowed to store to the MySQL + query cache or retrieve from it; when a trx + with undo logs commits, it sets this to the + value of the trx id counter for the tables it + had an IX lock on */ UT_LIST_BASE_NODE_T(lock_t) locks; /* list of locks on the table */ /*----------------------*/ diff --git a/innobase/include/dyn0dyn.h b/innobase/include/dyn0dyn.h index cca302994c1..501fde05e90 100644 --- a/innobase/include/dyn0dyn.h +++ b/innobase/include/dyn0dyn.h @@ -19,7 +19,6 @@ typedef dyn_block_t dyn_array_t; /* This is the initial 'payload' size of a dynamic array; this must be > MLOG_BUF_MARGIN + 30! */ - #define DYN_ARRAY_DATA_SIZE 512 /************************************************************************* diff --git a/innobase/include/fil0fil.h b/innobase/include/fil0fil.h index 63e20221c16..ad3149f0b36 100644 --- a/innobase/include/fil0fil.h +++ b/innobase/include/fil0fil.h @@ -43,7 +43,10 @@ struct fil_addr_struct{ extern fil_addr_t fil_addr_null; /* The byte offsets on a file page for various variables */ -#define FIL_PAGE_SPACE 0 /* space id the page belongs to */ +#define FIL_PAGE_SPACE_OR_CHKSUM 0 /* in < MySQL-4.0.14 space id the + page belongs to (== 0) but in later + versions the 'new' checksum of the + page */ #define FIL_PAGE_OFFSET 4 /* page offset inside space */ #define FIL_PAGE_PREV 8 /* if there is a 'natural' predecessor of the page, its offset */ @@ -64,7 +67,7 @@ extern fil_addr_t fil_addr_null; #define FIL_PAGE_DATA 38 /* start of the data on the page */ /* File page trailer */ -#define FIL_PAGE_END_LSN 8 /* the low 4 bytes of this are used +#define FIL_PAGE_END_LSN_OLD_CHKSUM 8 /* the low 4 bytes of this are used to store the page checksum, the last 4 bytes should be identical to the last 4 bytes of FIL_PAGE_LSN */ @@ -73,6 +76,8 @@ extern fil_addr_t fil_addr_null; /* File page types */ #define FIL_PAGE_INDEX 17855 #define FIL_PAGE_UNDO_LOG 2 +#define FIL_PAGE_INODE 3 +#define FIL_PAGE_IBUF_FREE_LIST 4 /* Space types */ #define FIL_TABLESPACE 501 @@ -381,6 +386,14 @@ fil_space_release_free_extents( /*===========================*/ ulint id, /* in: space id */ ulint n_reserved); /* in: how many one reserved */ +/*********************************************************************** +Gets the number of reserved extents. If the database is silent, this number +should be zero. */ + +ulint +fil_space_get_n_reserved_extents( +/*=============================*/ + ulint id); /* in: space id */ typedef struct fil_space_struct fil_space_t; diff --git a/innobase/include/ha0ha.ic b/innobase/include/ha0ha.ic index 1aad7d5a36f..761bc3b20de 100644 --- a/innobase/include/ha0ha.ic +++ b/innobase/include/ha0ha.ic @@ -49,7 +49,7 @@ ha_node_t* ha_chain_get_next( /*==============*/ /* out: next node, NULL if none */ - hash_table_t* table, /* in: hash table */ + hash_table_t* table __attribute__((unused)), /* in: hash table */ ha_node_t* node) /* in: hash chain node */ { ut_ad(table); diff --git a/innobase/include/lock0lock.h b/innobase/include/lock0lock.h index 80afba97416..5608ba020b7 100644 --- a/innobase/include/lock0lock.h +++ b/innobase/include/lock0lock.h @@ -292,16 +292,12 @@ lock_sec_rec_modify_check_and_lock( dict_index_t* index, /* in: secondary index */ que_thr_t* thr); /* in: query thread */ /************************************************************************* -Checks if locks of other transactions prevent an immediate read, or passing -over by a read cursor, of a clustered index record. If they do, first tests -if the query thread should anyway be suspended for some reason; if not, then -puts the transaction and the query thread to the lock wait state and inserts a -waiting request for a record lock to the lock queue. Sets the requested mode -lock on the record. */ +Like the counterpart for a clustered index below, but now we read a +secondary index record. */ ulint -lock_clust_rec_read_check_and_lock( -/*===============================*/ +lock_sec_rec_read_check_and_lock( +/*=============================*/ /* out: DB_SUCCESS, DB_LOCK_WAIT, DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */ ulint flags, /* in: if BTR_NO_LOCKING_FLAG bit is set, @@ -309,18 +305,24 @@ lock_clust_rec_read_check_and_lock( rec_t* rec, /* in: user record or page supremum record which should be read or passed over by a read cursor */ - dict_index_t* index, /* in: clustered index */ + dict_index_t* index, /* in: secondary index */ ulint mode, /* in: mode of the lock which the read cursor should set on records: LOCK_S or LOCK_X; the latter is possible in SELECT FOR UPDATE */ + ulint gap_mode,/* in: LOCK_ORDINARY, LOCK_GAP, or + LOCK_REC_NOT_GAP */ que_thr_t* thr); /* in: query thread */ /************************************************************************* -Like the counterpart for a clustered index above, but now we read a -secondary index record. */ +Checks if locks of other transactions prevent an immediate read, or passing +over by a read cursor, of a clustered index record. If they do, first tests +if the query thread should anyway be suspended for some reason; if not, then +puts the transaction and the query thread to the lock wait state and inserts a +waiting request for a record lock to the lock queue. Sets the requested mode +lock on the record. */ ulint -lock_sec_rec_read_check_and_lock( -/*=============================*/ +lock_clust_rec_read_check_and_lock( +/*===============================*/ /* out: DB_SUCCESS, DB_LOCK_WAIT, DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */ ulint flags, /* in: if BTR_NO_LOCKING_FLAG bit is set, @@ -328,10 +330,12 @@ lock_sec_rec_read_check_and_lock( rec_t* rec, /* in: user record or page supremum record which should be read or passed over by a read cursor */ - dict_index_t* index, /* in: secondary index */ + dict_index_t* index, /* in: clustered index */ ulint mode, /* in: mode of the lock which the read cursor should set on records: LOCK_S or LOCK_X; the latter is possible in SELECT FOR UPDATE */ + ulint gap_mode,/* in: LOCK_ORDINARY, LOCK_GAP, or + LOCK_REC_NOT_GAP */ que_thr_t* thr); /* in: query thread */ /************************************************************************* Checks that a record is seen in a consistent read. */ @@ -446,6 +450,18 @@ lock_rec_get_mutex_for_addr( ulint space, /* in: space id */ ulint page_no);/* in: page number */ /************************************************************************* +Checks that a transaction id is sensible, i.e., not in the future. */ + +ibool +lock_check_trx_id_sanity( +/*=====================*/ + /* out: TRUE if ok */ + dulint trx_id, /* in: trx id */ + rec_t* rec, /* in: user record */ + dict_index_t* index, /* in: clustered index */ + ibool has_kernel_mutex);/* in: TRUE if the caller owns the + kernel mutex */ +/************************************************************************* Validates the lock queue on a single record. */ ibool @@ -509,6 +525,7 @@ lock_validate(void); extern lock_sys_t* lock_sys; /* Lock modes and types */ +/* Basic modes */ #define LOCK_NONE 0 /* this flag is used elsewhere to note consistent read */ #define LOCK_IS 2 /* intention shared */ @@ -519,15 +536,20 @@ extern lock_sys_t* lock_sys; in an exclusive mode */ #define LOCK_MODE_MASK 0xF /* mask used to extract mode from the type_mode field in a lock */ +/* Lock types */ #define LOCK_TABLE 16 /* these type values should be so high that */ #define LOCK_REC 32 /* they can be ORed to the lock mode */ #define LOCK_TYPE_MASK 0xF0 /* mask used to extract lock type from the type_mode field in a lock */ +/* Waiting lock flag */ #define LOCK_WAIT 256 /* this wait bit should be so high that it can be ORed to the lock mode and type; when this bit is set, it means that the lock has not yet been granted, it is just waiting for its turn in the wait queue */ +/* Precise modes */ +#define LOCK_ORDINARY 0 /* this flag denotes an ordinary next-key lock + in contrast to LOCK_GAP or LOCK_REC_NOT_GAP */ #define LOCK_GAP 512 /* this gap bit should be so high that it can be ORed to the other flags; when this bit is set, it means that the @@ -537,7 +559,23 @@ extern lock_sys_t* lock_sys; the bit is set; locks of this type are created when records are removed from the index chain of records */ - +#define LOCK_REC_NOT_GAP 1024 /* this bit means that the lock is only on + the index record and does NOT block inserts + to the gap before the index record; this is + used in the case when we retrieve a record + with a unique key, and is also used in + locking plain SELECTs (not part of UPDATE + or DELETE) when the user has set the READ + COMMITTED isolation level */ +#define LOCK_INSERT_INTENTION 2048 /* this bit is set when we place a waiting + gap type record lock request in order to let + an insert of an index record to wait until + there are no conflicting locks by other + transactions on the gap; note that this flag + remains set when the waiting lock is granted, + or if the lock is inherited to a neighboring + record */ + /* When lock bits are reset, the following flags are available: */ #define LOCK_RELEASE_WAIT 1 #define LOCK_NOT_RELEASE_WAIT 2 diff --git a/innobase/include/log0log.h b/innobase/include/log0log.h index f200371de9d..24ec28a56e6 100644 --- a/innobase/include/log0log.h +++ b/innobase/include/log0log.h @@ -20,7 +20,7 @@ typedef struct log_group_struct log_group_t; extern ibool log_do_write; extern ibool log_debug_writes; -/* Wait modes for log_flush_up_to */ +/* Wait modes for log_write_up_to */ #define LOG_NO_WAIT 91 #define LOG_WAIT_ONE_GROUP 92 #define LOG_WAIT_ALL_GROUPS 93 @@ -157,26 +157,27 @@ log_io_complete( /*============*/ log_group_t* group); /* in: log group */ /********************************************************** -Flushes the log files to the disk, using, for example, the Unix fsync. -This function does the flush even if the user has set -srv_flush_log_at_trx_commit = FALSE. */ - -void -log_flush_to_disk(void); -/*===================*/ -/********************************************************** This function is called, e.g., when a transaction wants to commit. It checks -that the log has been flushed to disk up to the last log entry written by the -transaction. If there is a flush running, it waits and checks if the flush -flushed enough. If not, starts a new flush. */ +that the log has been written to the log file up to the last log entry written +by the transaction. If there is a flush running, it waits and checks if the +flush flushed enough. If not, starts a new flush. */ void -log_flush_up_to( +log_write_up_to( /*============*/ dulint lsn, /* in: log sequence number up to which the log should - be flushed, ut_dulint_max if not specified */ - ulint wait); /* in: LOG_NO_WAIT, LOG_WAIT_ONE_GROUP, + be written, ut_dulint_max if not specified */ + ulint wait, /* in: LOG_NO_WAIT, LOG_WAIT_ONE_GROUP, or LOG_WAIT_ALL_GROUPS */ + ibool flush_to_disk); + /* in: TRUE if we want the written log also to be + flushed to disk */ +/******************************************************************** +Does a syncronous flush of the log buffer to disk. */ + +void +log_buffer_flush_to_disk(void); +/*==========================*/ /******************************************************************** Advances the smallest lsn for which there are unflushed dirty blocks in the buffer pool and also may make a new checkpoint. NOTE: this function may only @@ -512,6 +513,15 @@ log_print( /*======*/ char* buf, /* in/out: buffer where to print */ char* buf_end);/* in: buffer end */ +/********************************************************** +Peeks the current lsn. */ + +ibool +log_peek_lsn( +/*=========*/ + /* out: TRUE if success, FALSE if could not get the + log system mutex */ + dulint* lsn); /* out: if returns TRUE, current lsn is here */ /************************************************************************** Refreshes the statistics used to print per-second averages. */ @@ -741,27 +751,37 @@ struct log_struct{ be advanced, it is enough that the write i/o has been completed for all log groups */ - dulint flush_lsn; /* end lsn for the current flush */ - ulint flush_end_offset;/* the data in buffer has been flushed + dulint write_lsn; /* end lsn for the current running + write */ + ulint write_end_offset;/* the data in buffer has been written up to this offset when the current - flush ends: this field will then + write ends: this field will then be copied to buf_next_to_write */ - ulint n_pending_writes;/* number of currently pending flush - writes */ + dulint current_flush_lsn;/* end lsn for the current running + write + flush operation */ + dulint flushed_to_disk_lsn; + /* how far we have written the log + AND flushed to disk */ + ulint n_pending_writes;/* number of currently pending flushes + or writes */ + /* NOTE on the 'flush' in names of the fields below: starting from + 4.0.14, we separate the write of the log file and the actual fsync() + or other method to flush it to disk. The names below shhould really + be 'flush_or_write'! */ os_event_t no_flush_event; /* this event is in the reset state - when a flush is running; a thread - should wait for this without owning - the log mutex, but NOTE that to set or - reset this event, the thread MUST own - the log mutex! */ + when a flush or a write is running; + a thread should wait for this without + owning the log mutex, but NOTE that + to set or reset this event, the + thread MUST own the log mutex! */ ibool one_flushed; /* during a flush, this is first FALSE and becomes TRUE when one log group - has been flushed */ + has been written or flushed */ os_event_t one_flushed_event;/* this event is reset when the - flush has not yet completed for any - log group; e.g., this means that a - transaction has been committed when - this is set; a thread should wait + flush or write has not yet completed + for any log group; e.g., this means + that a transaction has been committed + when this is set; a thread should wait for this without owning the log mutex, but NOTE that to set or reset this event, the thread MUST own the log @@ -774,6 +794,11 @@ struct log_struct{ called */ /* Fields involved in checkpoints */ + ulint log_group_capacity; /* capacity of the log group; if + the checkpoint age exceeds this, it is + a serious error because it is possible + we will then overwrite log and spoil + crash recovery */ ulint max_modified_age_async; /* when this recommended value for lsn - buf_pool_get_oldest_modification() diff --git a/innobase/include/log0recv.h b/innobase/include/log0recv.h index baa2ba50c7d..e5a5bc05563 100644 --- a/innobase/include/log0recv.h +++ b/innobase/include/log0recv.h @@ -333,7 +333,10 @@ extern ibool recv_recovery_on; extern ibool recv_no_ibuf_operations; extern ibool recv_needed_recovery; +extern ibool recv_lsn_checks_on; + extern ibool recv_is_making_a_backup; +extern ulint recv_max_parsed_page_no; /* Size of the parsing buffer; it must accommodate RECV_SCAN_SIZE many times! */ diff --git a/innobase/include/mem0mem.h b/innobase/include/mem0mem.h index bfd25f5bdbe..9ab3b2cd754 100644 --- a/innobase/include/mem0mem.h +++ b/innobase/include/mem0mem.h @@ -127,16 +127,18 @@ mem_heap_create_func( ulint line /* in: line where created */ ); /********************************************************************* -NOTE: Use the corresponding macro instead of this function. -Frees the space occupied by a memory heap. */ +NOTE: Use the corresponding macro instead of this function. Frees the space +occupied by a memory heap. In the debug version erases the heap memory +blocks. */ UNIV_INLINE void mem_heap_free_func( /*===============*/ - mem_heap_t* heap, /* in, own: heap to be freed */ - char* file_name, /* in: file name where freed */ - ulint line /* in: line where freed */ -); + mem_heap_t* heap, /* in, own: heap to be freed */ + char* file_name __attribute__((unused)), + /* in: file name where freed */ + ulint line __attribute__((unused))); + /* in: line where freed */ /******************************************************************* Allocates n bytes of memory from a memory heap. */ UNIV_INLINE diff --git a/innobase/include/mem0mem.ic b/innobase/include/mem0mem.ic index a7abb93d91d..1ff8c66e80a 100644 --- a/innobase/include/mem0mem.ic +++ b/innobase/include/mem0mem.ic @@ -440,9 +440,10 @@ void mem_heap_free_func( /*===============*/ mem_heap_t* heap, /* in, own: heap to be freed */ - char* file_name, /* in: file name where freed */ - ulint line /* in: line where freed */ - ) + char* file_name __attribute__((unused)), + /* in: file name where freed */ + ulint line __attribute__((unused))) + /* in: line where freed */ { mem_block_t* block; mem_block_t* prev_block; diff --git a/innobase/include/os0file.h b/innobase/include/os0file.h index d65c7fd47e3..5c52f0e92bf 100644 --- a/innobase/include/os0file.h +++ b/innobase/include/os0file.h @@ -111,6 +111,7 @@ log. */ #define OS_WIN31 1 #define OS_WIN95 2 #define OS_WINNT 3 +#define OS_WIN2000 4 extern ulint os_n_file_reads; extern ulint os_n_file_writes; @@ -122,7 +123,7 @@ Gets the operating system version. Currently works only on Windows. */ ulint os_get_os_version(void); /*===================*/ - /* out: OS_WIN95, OS_WIN31, OS_WINNT (2000 == NT) */ + /* out: OS_WIN95, OS_WIN31, OS_WINNT, or OS_WIN2000 */ /******************************************************************** Creates the seek mutexes used in positioned reads and writes. */ @@ -145,6 +146,21 @@ os_file_create_simple( ulint access_type,/* in: OS_FILE_READ_ONLY or OS_FILE_READ_WRITE */ ibool* success);/* out: TRUE if succeed, FALSE if error */ /******************************************************************** +A simple function to open or create a file. */ + +os_file_t +os_file_create_simple_no_error_handling( +/*====================================*/ + /* out, own: handle to the file, not defined if error, + error number can be retrieved with os_get_last_error */ + char* name, /* in: name of the file or path as a null-terminated + string */ + ulint create_mode,/* in: OS_FILE_OPEN if an existing file is opened + (if does not exist, error), or OS_FILE_CREATE if a new + file is created (if exists, error) */ + ulint access_type,/* in: OS_FILE_READ_ONLY or OS_FILE_READ_WRITE */ + ibool* success);/* out: TRUE if succeed, FALSE if error */ +/******************************************************************** Opens an existing file or creates a new. */ os_file_t @@ -159,7 +175,11 @@ os_file_create( file is created (if exists, error), OS_FILE_OVERWRITE if a new file is created or an old overwritten */ ulint purpose,/* in: OS_FILE_AIO, if asynchronous, non-buffered i/o - is desired, OS_FILE_NORMAL, if any normal file */ + is desired, OS_FILE_NORMAL, if any normal file; + NOTE that it also depends on type, os_aio_.. and srv_.. + variables whether we really use async i/o or + unbuffered i/o: look in the function source code for + the exact rules */ ulint type, /* in: OS_DATA_FILE or OS_LOG_FILE */ ibool* success);/* out: TRUE if succeed, FALSE if error */ /*************************************************************************** @@ -172,6 +192,14 @@ os_file_close( /* out: TRUE if success */ os_file_t file); /* in, own: handle to a file */ /*************************************************************************** +Closes a file handle. */ + +ibool +os_file_close_no_error_handling( +/*============================*/ + /* out: TRUE if success */ + os_file_t file); /* in, own: handle to a file */ +/*************************************************************************** Gets a file size. */ ibool @@ -300,6 +328,13 @@ os_aio( are ignored */ void* message2); /**************************************************************************** +Wakes up all async i/o threads so that they know to exit themselves in +shutdown. */ + +void +os_aio_wake_all_threads_at_shutdown(void); +/*=====================================*/ +/**************************************************************************** Waits until there are no pending writes in os_aio_write_array. There can be other, synchronous, pending writes. */ diff --git a/innobase/include/os0proc.h b/innobase/include/os0proc.h index 9da1f33e070..79750e5c1f7 100644 --- a/innobase/include/os0proc.h +++ b/innobase/include/os0proc.h @@ -16,6 +16,15 @@ typedef void* os_process_t; typedef unsigned long int os_process_id_t; /******************************************************************** +Converts the current process id to a number. It is not guaranteed that the +number is unique. In Linux returns the 'process number' of the current +thread. That number is the same as one sees in 'top', for example. In Linux +the thread id is not the same as one sees in 'top'. */ + +ulint +os_proc_get_number(void); +/*====================*/ +/******************************************************************** Allocates non-cacheable memory. */ void* diff --git a/innobase/include/os0sync.h b/innobase/include/os0sync.h index b2d613c4619..e1cf263216e 100644 --- a/innobase/include/os0sync.h +++ b/innobase/include/os0sync.h @@ -10,25 +10,43 @@ Created 9/6/1995 Heikki Tuuri #define os0sync_h #include "univ.i" +#include "ut0lst.h" #ifdef __WIN__ #define os_fast_mutex_t CRITICAL_SECTION -typedef void* os_event_t; -#else +typedef HANDLE os_native_event_t; + +typedef struct os_event_struct os_event_struct_t; +typedef os_event_struct_t* os_event_t; +struct os_event_struct { + os_native_event_t handle; + /* Windows event */ + UT_LIST_NODE_T(os_event_struct_t) os_event_list; + /* list of all created events */ +}; +#else typedef pthread_mutex_t os_fast_mutex_t; + +typedef struct os_event_struct os_event_struct_t; +typedef os_event_struct_t* os_event_t; + struct os_event_struct { os_fast_mutex_t os_mutex; /* this mutex protects the next fields */ - ibool is_set; /* this is TRUE if the next mutex is - not reserved */ + ibool is_set; /* this is TRUE when the event is + in the signaled state, i.e., a thread + does not stop if it tries to wait for + this event */ + ib_longlong signal_count; /* this is incremented each time + the event becomes signaled */ pthread_cond_t cond_var; /* condition variable is used in waiting for the event */ + UT_LIST_NODE_T(os_event_struct_t) os_event_list; + /* list of all created events */ }; -typedef struct os_event_struct os_event_struct_t; -typedef os_event_struct_t* os_event_t; #endif typedef struct os_mutex_struct os_mutex_str_t; @@ -38,10 +56,32 @@ typedef os_mutex_str_t* os_mutex_t; #define OS_SYNC_TIME_EXCEEDED 1 +/* Mutex protecting counts and the event and OS 'slow' mutex lists */ +extern os_mutex_t os_sync_mutex; + +/* This is incremented by 1 in os_thread_create and decremented by 1 in +os_thread_exit */ +extern ulint os_thread_count; + +extern ulint os_event_count; +extern ulint os_mutex_count; +extern ulint os_fast_mutex_count; + +/************************************************************* +Initializes global event and OS 'slow' mutex lists. */ + +void +os_sync_init(void); +/*==============*/ /************************************************************* -Creates an event semaphore, i.e., a semaphore which may -just have two states: signaled and nonsignaled. -The created event is manual reset: it must be reset +Frees created events and OS 'slow' mutexes. */ + +void +os_sync_free(void); +/*==============*/ +/************************************************************* +Creates an event semaphore, i.e., a semaphore which may just have two states: +signaled and nonsignaled. The created event is manual reset: it must be reset explicitly by calling sync_os_reset_event. */ os_event_t @@ -50,10 +90,10 @@ os_event_create( /* out: the event handle */ char* name); /* in: the name of the event, if NULL the event is created without a name */ +#ifdef __WIN__ /************************************************************* -Creates an auto-reset event semaphore, i.e., an event -which is automatically reset when a single thread is -released. */ +Creates an auto-reset event semaphore, i.e., an event which is automatically +reset when a single thread is released. Works only in Windows. */ os_event_t os_event_create_auto( @@ -61,6 +101,7 @@ os_event_create_auto( /* out: the event handle */ char* name); /* in: the name of the event, if NULL the event is created without a name */ +#endif /************************************************************** Sets an event semaphore to the signaled state: lets waiting threads proceed. */ @@ -85,7 +126,10 @@ os_event_free( /*==========*/ os_event_t event); /* in: event to free */ /************************************************************** -Waits for an event object until it is in the signaled state. */ +Waits for an event object until it is in the signaled state. If +srv_shutdown_state == SRV_SHUTDOWN_EXIT_THREADS this also exits the +waiting thread when the event becomes signaled (or immediately if the +event is already in the signaled state). */ void os_event_wait( @@ -93,7 +137,7 @@ os_event_wait( os_event_t event); /* in: event to wait */ /************************************************************** Waits for an event object until it is in the signaled state or -a timeout is exceeded. */ +a timeout is exceeded. In Unix the timeout is always infinite. */ ulint os_event_wait_time( @@ -104,8 +148,9 @@ os_event_wait_time( os_event_t event, /* in: event to wait */ ulint time); /* in: timeout in microseconds, or OS_SYNC_INFINITE_TIME */ +#ifdef __WIN__ /************************************************************** -Waits for any event in an event array. Returns if even a single +Waits for any event in an OS native event array. Returns if even a single one is signaled or becomes signaled. */ ulint @@ -113,14 +158,15 @@ os_event_wait_multiple( /*===================*/ /* out: index of the event which was signaled */ - ulint n, /* in: number of events in the + ulint n, /* in: number of events in the array */ - os_event_t* event_array); /* in: pointer to an array of event + os_native_event_t* native_event_array); + /* in: pointer to an array of event handles */ +#endif /************************************************************* -Creates an operating system mutex semaphore. -Because these are slow, the mutex semaphore of the database -itself (sync_mutex_t) should be used where possible. */ +Creates an operating system mutex semaphore. Because these are slow, the +mutex semaphore of InnoDB itself (mutex_t) should be used where possible. */ os_mutex_t os_mutex_create( diff --git a/innobase/include/os0sync.ic b/innobase/include/os0sync.ic index 10b85c435e3..1337e97152a 100644 --- a/innobase/include/os0sync.ic +++ b/innobase/include/os0sync.ic @@ -44,4 +44,3 @@ os_fast_mutex_trylock( #endif #endif } - diff --git a/innobase/include/os0thread.h b/innobase/include/os0thread.h index 8355afa46e9..554ca0563e4 100644 --- a/innobase/include/os0thread.h +++ b/innobase/include/os0thread.h @@ -15,16 +15,9 @@ Created 9/8/1995 Heikki Tuuri /* Maximum number of threads which can be created in the program; this is also the size of the wait slot array for MySQL threads which can wait inside InnoDB */ -#ifdef __WIN__ -/* Windows 95/98/ME seemed to have difficulties creating the all -the event semaphores for the wait array slots. If the computer had -<= 64 MB memory, InnoDB startup could take minutes or even crash. -That is why we set this to only 1000 in Windows. */ -#define OS_THREAD_MAX_N 1000 -#else -#define OS_THREAD_MAX_N 10000 -#endif +#define OS_THREAD_MAX_N srv_max_n_threads + /* Possible fixed priorities for threads */ #define OS_THREAD_PRIORITY_NONE 100 @@ -43,7 +36,6 @@ typedef os_thread_t os_thread_id_t; /* In Unix we use the thread the thread */ #endif - /* Define a function pointer type to use in a typecast */ typedef void* (*os_posix_f_t) (void*); @@ -68,7 +60,9 @@ os_thread_pf( /******************************************************************** Creates a new thread of execution. The execution starts from the function given. The start function takes a void* parameter -and returns a ulint. */ +and returns a ulint. +NOTE: We count the number of threads in os_thread_exit(). A created +thread should always use that to exit and not use return() to exit. */ os_thread_t os_thread_create( @@ -85,12 +79,13 @@ os_thread_create( os_thread_id_t* thread_id); /* out: id of the created thread */ /********************************************************************* -A thread calling this function ends its execution. */ +Exits the current thread. */ void os_thread_exit( /*===========*/ - ulint code); /* in: exit code */ + void* exit_value); /* in: exit value; in Windows this void* + is cast as a DWORD */ /********************************************************************* Returns the thread identifier of current thread. */ @@ -146,7 +141,6 @@ ulint os_thread_get_last_error(void); /*==========================*/ - #ifndef UNIV_NONINL #include "os0thread.ic" #endif diff --git a/innobase/include/page0cur.h b/innobase/include/page0cur.h index 144e0e02b21..c3f0decdb4b 100644 --- a/innobase/include/page0cur.h +++ b/innobase/include/page0cur.h @@ -26,7 +26,12 @@ Created 10/4/1994 Heikki Tuuri #define PAGE_CUR_GE 2 #define PAGE_CUR_L 3 #define PAGE_CUR_LE 4 -#define PAGE_CUR_DBG 5 +#define PAGE_CUR_LE_OR_EXTENDS 5 /* This is a search mode used in + "column LIKE 'abc%' ORDER BY column DESC"; + we have to find strings which are <= 'abc' or + which extend it */ +#define PAGE_CUR_DBG 6 + extern ulint page_cur_short_succ; diff --git a/innobase/include/page0page.h b/innobase/include/page0page.h index 2f77127466f..04f771c3abd 100644 --- a/innobase/include/page0page.h +++ b/innobase/include/page0page.h @@ -666,6 +666,25 @@ page_rec_validate( /* out: TRUE if ok */ rec_t* rec); /* in: record on the page */ /******************************************************************* +Checks that the first directory slot points to the infimum record and +the last to the supremum. This function is intended to track if the +bug fixed in 4.0.14 has caused corruption to users' databases. */ + +void +page_check_dir( +/*===========*/ + page_t* page); /* in: index page */ +/******************************************************************* +This function checks the consistency of an index page when we do not +know the index. This is also resilient so that this should never crash +even if the page is total garbage. */ + +ibool +page_simple_validate( +/*=================*/ + /* out: TRUE if ok */ + page_t* page); /* in: index page */ +/******************************************************************* This function checks the consistency of an index page. */ ibool diff --git a/innobase/include/que0que.h b/innobase/include/que0que.h index cdaeeae1fde..a3ed18e2b14 100644 --- a/innobase/include/que0que.h +++ b/innobase/include/que0que.h @@ -117,6 +117,7 @@ que_thr_stop( /************************************************************************** Moves a thread from another state to the QUE_THR_RUNNING state. Increments the n_active_thrs counters of the query graph and transaction. */ + void que_thr_move_to_run_state_for_mysql( /*================================*/ @@ -125,14 +126,17 @@ que_thr_move_to_run_state_for_mysql( /************************************************************************** A patch for MySQL used to 'stop' a dummy query thread used in MySQL select, when there is no error or lock wait. */ + void que_thr_stop_for_mysql_no_error( /*============================*/ que_thr_t* thr, /* in: query thread */ trx_t* trx); /* in: transaction */ /************************************************************************** -A patch for MySQL used to 'stop' a dummy query thread used in MySQL -select. */ +A patch for MySQL used to 'stop' a dummy query thread used in MySQL. The +query thread is stopped and made inactive, except in the case where +it was put to the lock wait state in lock0lock.c, but the lock has already +been granted or the transaction chosen as a victim in deadlock resolution. */ void que_thr_stop_for_mysql( diff --git a/innobase/include/read0read.h b/innobase/include/read0read.h index cebb2d6701c..db6bf888095 100644 --- a/innobase/include/read0read.h +++ b/innobase/include/read0read.h @@ -45,6 +45,14 @@ read_view_close( /*============*/ read_view_t* view); /* in: read view */ /************************************************************************* +Closes a consistent read view for MySQL. This function is called at an SQL +statement end if the trx isolation level is <= TRX_ISO_READ_COMMITTED. */ + +void +read_view_close_for_mysql( +/*======================*/ + trx_t* trx); /* in: trx which has a read view */ +/************************************************************************* Checks if a read view sees the specified transaction. */ UNIV_INLINE ibool diff --git a/innobase/include/rem0cmp.h b/innobase/include/rem0cmp.h index 6f2a99fc8c2..712e263350e 100644 --- a/innobase/include/rem0cmp.h +++ b/innobase/include/rem0cmp.h @@ -42,6 +42,22 @@ cmp_data_data( buffer) */ ulint len2); /* in: data field length or UNIV_SQL_NULL */ /***************************************************************** +This function is used to compare two data fields for which we know the +data type. */ + +int +cmp_data_data_slow( +/*===============*/ + /* out: 1, 0, -1, if data1 is greater, equal, + less than data2, respectively */ + dtype_t* cur_type,/* in: data type of the fields */ + byte* data1, /* in: data field (== a pointer to a memory + buffer) */ + ulint len1, /* in: data field length or UNIV_SQL_NULL */ + byte* data2, /* in: data field (== a pointer to a memory + buffer) */ + ulint len2); /* in: data field length or UNIV_SQL_NULL */ +/***************************************************************** This function is used to compare two dfields where at least the first has its data type field set. */ UNIV_INLINE diff --git a/innobase/include/rem0rec.h b/innobase/include/rem0rec.h index 12e3a8b39d6..b28f39925c1 100644 --- a/innobase/include/rem0rec.h +++ b/innobase/include/rem0rec.h @@ -148,12 +148,22 @@ data field in the record. */ byte* rec_get_nth_field( /*==============*/ - /* out: pointer to the field, NULL if SQL null */ + /* out: pointer to the field */ rec_t* rec, /* in: record */ ulint n, /* in: index of the field */ ulint* len); /* out: length of the field; UNIV_SQL_NULL if SQL null */ /**************************************************************** +Return field length or UNIV_SQL_NULL. */ +UNIV_INLINE +ulint +rec_get_nth_field_len( +/*==================*/ + /* out: length of the field; UNIV_SQL_NULL if SQL + null */ + rec_t* rec, /* in: record */ + ulint n); /* in: index of the field */ +/**************************************************************** Gets the physical size of a field. Also an SQL null may have a field of size > 0, if the data type is of a fixed size. */ UNIV_INLINE diff --git a/innobase/include/rem0rec.ic b/innobase/include/rem0rec.ic index aaa3c58a003..9dfd4faeec8 100644 --- a/innobase/include/rem0rec.ic +++ b/innobase/include/rem0rec.ic @@ -65,6 +65,24 @@ a field stored to another page: */ #define REC_2BYTE_EXTERN_MASK 0x4000 +/**************************************************************** +Return field length or UNIV_SQL_NULL. */ +UNIV_INLINE +ulint +rec_get_nth_field_len( +/*==================*/ + /* out: length of the field; UNIV_SQL_NULL if SQL + null */ + rec_t* rec, /* in: record */ + ulint n) /* in: index of the field */ +{ + ulint len; + + rec_get_nth_field(rec, n, &len); + + return(len); +} + /*************************************************************** Sets the value of the ith field SQL null bit. */ diff --git a/innobase/include/row0ins.h b/innobase/include/row0ins.h index cc3b9fa7e9a..a5b4b74e7fc 100644 --- a/innobase/include/row0ins.h +++ b/innobase/include/row0ins.h @@ -35,7 +35,6 @@ row_ins_check_foreign_constraint( dictionary cache if they exist at all */ dict_table_t* table, /* in: if check_ref is TRUE, then the foreign table, else the referenced table */ - dict_index_t* index, /* in: index in table */ dtuple_t* entry, /* in: index entry for index */ que_thr_t* thr); /* in: query thread */ /************************************************************************* diff --git a/innobase/include/row0mysql.h b/innobase/include/row0mysql.h index 75c16384458..940b4c61b2f 100644 --- a/innobase/include/row0mysql.h +++ b/innobase/include/row0mysql.h @@ -230,18 +230,35 @@ row_update_cascade_for_mysql( or set null operation */ dict_table_t* table); /* in: table where we do the operation */ /************************************************************************* -Locks the data dictionary exclusively for performing a table create -operation. */ +Locks the data dictionary exclusively for performing a table create or other +data dictionary modification operation. */ void -row_mysql_lock_data_dictionary(void); -/*================================*/ +row_mysql_lock_data_dictionary( +/*===========================*/ + trx_t* trx); /* in: transaction */ +/************************************************************************* +Unlocks the data dictionary exclusive lock. */ + +void +row_mysql_unlock_data_dictionary( +/*=============================*/ + trx_t* trx); /* in: transaction */ +/************************************************************************* +Locks the data dictionary in shared mode from modifications, for performing +foreign key check, rollback, or other operation invisible to MySQL. */ + +void +row_mysql_freeze_data_dictionary( +/*=============================*/ + trx_t* trx); /* in: transaction */ /************************************************************************* -Unlocks the data dictionary exclusively lock. */ +Unlocks the data dictionary shared lock. */ void -row_mysql_unlock_data_dictionary(void); -/*==================================*/ +row_mysql_unfreeze_data_dictionary( +/*===============================*/ + trx_t* trx); /* in: transaction */ /************************************************************************* Does a table creation operation for MySQL. If the name of the created table ends to characters INNODB_MONITOR, then this also starts @@ -310,11 +327,9 @@ output by the master thread. */ int row_drop_table_for_mysql( /*=====================*/ - /* out: error code or DB_SUCCESS */ - char* name, /* in: table name */ - trx_t* trx, /* in: transaction handle */ - ibool has_dict_mutex);/* in: TRUE if the caller already owns the - dictionary system mutex */ + /* out: error code or DB_SUCCESS */ + char* name, /* in: table name */ + trx_t* trx); /* in: transaction handle */ /************************************************************************* Drops a database for MySQL. */ @@ -393,7 +408,10 @@ struct row_prebuilt_struct { an SQL statement: we may have to set an intention lock on the table, create a consistent read view etc. */ - ibool mysql_has_locked; + ibool mysql_has_locked; /* this is set TRUE when MySQL + calls external_lock on this handle + with a lock flag, and set FALSE when + with the F_UNLOCK flag */ ibool clust_index_was_generated; /* if the user did not define a primary key in MySQL, then Innobase @@ -401,13 +419,21 @@ struct row_prebuilt_struct { index where the ordering column is the row id: in this case this flag is set to TRUE */ - dict_index_t* index; /* current index for a search, if any */ + dict_index_t* index; /* current index for a search, if + any */ ulint read_just_key; /* set to 1 when MySQL calls ha_innobase::extra with the argument HA_EXTRA_KEYREAD; it is enough to read just columns defined in the index (i.e., no read of the clustered index record necessary) */ + ibool used_in_HANDLER;/* TRUE if we have been using this + handle in a MySQL HANDLER low level + index cursor command: then we must + store the pcur position even in a + unique search from a clustered index, + because HANDLER allows NEXT and PREV + in such a situation */ ulint template_type; /* ROW_MYSQL_WHOLE_ROW, ROW_MYSQL_REC_FIELDS, ROW_MYSQL_DUMMY_TEMPLATE, or @@ -474,7 +500,11 @@ struct row_prebuilt_struct { fetch many rows from the same cursor: it saves CPU time to fetch them in a batch; we reserve mysql_row_len - bytes for each such row */ + bytes for each such row; these + pointers point 4 bytes past the + allocated mem buf start, because + there is a 4 byte magic number at the + start and at the end */ ulint fetch_cache_first;/* position of the first not yet fetched row in fetch_cache */ ulint n_fetch_cached; /* number of not yet fetched rows @@ -483,8 +513,12 @@ struct row_prebuilt_struct { to this heap */ mem_heap_t* old_vers_heap; /* memory heap where a previous version is built in consistent read */ + ulint magic_n2; /* this should be the same as + magic_n */ }; +#define ROW_PREBUILT_FETCH_MAGIC_N 465765687 + #define ROW_MYSQL_WHOLE_ROW 0 #define ROW_MYSQL_REC_FIELDS 1 #define ROW_MYSQL_NO_TEMPLATE 2 diff --git a/innobase/include/row0mysql.ic b/innobase/include/row0mysql.ic index 6096e5771f7..4ecd66e06ec 100644 --- a/innobase/include/row0mysql.ic +++ b/innobase/include/row0mysql.ic @@ -15,7 +15,8 @@ row_mysql_store_var_len( /*====================*/ /* out: dest + 2 */ byte* dest, /* in: where to store */ - ulint len) /* in: length, must fit in two bytes */ + ulint len __attribute__((unused))) /* in: length, must fit in two + bytes */ { ut_ad(len < 256 * 256); /* @@ -57,7 +58,8 @@ row_mysql_store_col_in_innobase_format( /*===================================*/ dfield_t* dfield, /* in/out: dfield */ byte* buf, /* in/out: buffer for the converted - value */ + value; this must be at least col_len + long! */ byte* mysql_data, /* in: MySQL column value, not SQL NULL; NOTE that dfield may also get a pointer to mysql_data, @@ -95,7 +97,6 @@ row_mysql_store_col_in_innobase_format( while (col_len > 0 && ptr[col_len - 1] == ' ') { col_len--; } - } else if (type == DATA_BLOB) { ptr = row_mysql_read_blob_ref(&col_len, mysql_data, col_len); } diff --git a/innobase/include/row0row.h b/innobase/include/row0row.h index 09a79e19fd7..d1befbbbad3 100644 --- a/innobase/include/row0row.h +++ b/innobase/include/row0row.h @@ -86,9 +86,10 @@ dtuple_t* row_build( /*======*/ /* out, own: row built; see the NOTE below! */ - ulint type, /* in: ROW_COPY_DATA, or ROW_COPY_POINTERS: - the former copies also the data fields to - heap as the latter only places pointers to + ulint type, /* in: ROW_COPY_POINTERS, ROW_COPY_DATA, or + ROW_COPY_ALSO_EXTERNALS, + the two last copy also the data fields to + heap as the first only places pointers to data fields on the index page, and thus is more efficient */ dict_index_t* index, /* in: clustered index */ diff --git a/innobase/include/row0sel.h b/innobase/include/row0sel.h index a64d3f8e425..5ef7ff9399a 100644 --- a/innobase/include/row0sel.h +++ b/innobase/include/row0sel.h @@ -87,9 +87,11 @@ row_printf_step( /* out: query thread to run next or NULL */ que_thr_t* thr); /* in: query thread */ /******************************************************************** -Converts a key value stored in MySQL format to an Innobase dtuple. -The last field of the key value may be just a prefix of a fixed length -field: hence the parameter key_len. */ +Converts a key value stored in MySQL format to an Innobase dtuple. The last +field of the key value may be just a prefix of a fixed length field: hence +the parameter key_len. But currently we do not allow search keys where the +last field is only a prefix of the full key field len and print a warning if +such appears. */ void row_sel_convert_mysql_key_to_innobase( @@ -100,6 +102,7 @@ row_sel_convert_mysql_key_to_innobase( to index! */ byte* buf, /* in: buffer to use in field conversions */ + ulint buf_len, /* in: buffer length */ dict_index_t* index, /* in: index of the key value */ byte* key_ptr, /* in: MySQL key value */ ulint key_len); /* in: MySQL key value length */ @@ -133,6 +136,18 @@ row_search_for_mysql( then prebuilt must have a pcur with stored position! In opening of a cursor 'direction' should be 0. */ +/*********************************************************************** +Checks if MySQL at the moment is allowed for this table to retrieve a +consistent read result, or store it to the query cache. */ + +ibool +row_search_check_if_query_cache_permitted( +/*======================================*/ + /* out: TRUE if storing or retrieving from + the query cache is permitted */ + trx_t* trx, /* in: transaction object */ + char* norm_name); /* in: concatenation of database name, '/' + char, table name */ /* A structure for caching column values for prefetched rows */ diff --git a/innobase/include/row0upd.h b/innobase/include/row0upd.h index 9a3e2463267..f5e0a88231f 100644 --- a/innobase/include/row0upd.h +++ b/innobase/include/row0upd.h @@ -114,15 +114,17 @@ row_upd_index_write_log( closed within this function */ mtr_t* mtr); /* in: mtr into whose log to write */ /*************************************************************** -Returns TRUE if row update changes size of some field in index. */ +Returns TRUE if row update changes size of some field in index or if some +field to be updated is stored externally in rec or update. */ ibool -row_upd_changes_field_size( -/*=======================*/ +row_upd_changes_field_size_or_external( +/*===================================*/ /* out: TRUE if the update changes the size of - some field in index */ - rec_t* rec, /* in: record in clustered index */ - dict_index_t* index, /* in: clustered index */ + some field in index or the field is external + in rec or update */ + rec_t* rec, /* in: record in index */ + dict_index_t* index, /* in: index */ upd_t* update);/* in: update vector */ /*************************************************************** Replaces the new column values stored in the update vector to the record @@ -170,21 +172,33 @@ Replaces the new column values stored in the update vector to the index entry given. */ void -row_upd_index_replace_new_col_vals( -/*===============================*/ +row_upd_index_replace_new_col_vals_index_pos( +/*=========================================*/ dtuple_t* entry, /* in/out: index entry where replaced */ - dict_index_t* index, /* in: index; NOTE that may also be a + dict_index_t* index, /* in: index; NOTE that this may also be a non-clustered index */ - upd_t* update); /* in: update vector */ + upd_t* update, /* in: an update vector built for the index so + that the field number in an upd_field is the + index position */ + mem_heap_t* heap); /* in: memory heap to which we allocate and + copy the new values, set this as NULL if you + do not want allocation */ /*************************************************************** -Replaces the new column values stored in the update vector to the -clustered index entry given. */ +Replaces the new column values stored in the update vector to the index entry +given. */ void -row_upd_clust_index_replace_new_col_vals( -/*=====================================*/ +row_upd_index_replace_new_col_vals( +/*===============================*/ dtuple_t* entry, /* in/out: index entry where replaced */ - upd_t* update); /* in: update vector */ + dict_index_t* index, /* in: index; NOTE that this may also be a + non-clustered index */ + upd_t* update, /* in: an update vector built for the + CLUSTERED index so that the field number in + an upd_field is the clustered index position */ + mem_heap_t* heap); /* in: memory heap to which we allocate and + copy the new values, set this as NULL if you + do not want allocation */ /*************************************************************** Checks if an update vector changes an ordering field of an index record. This function is fast if the update vector is short or the number of ordering @@ -203,7 +217,9 @@ row_upd_changes_ord_field_binary( known when this function is called, e.g., at compile time */ dict_index_t* index, /* in: index of the record */ - upd_t* update);/* in: update vector for the row */ + upd_t* update);/* in: update vector for the row; NOTE: the + field numbers in this MUST be clustered index + positions! */ /*************************************************************** Checks if an update vector changes an ordering field of an index record. This function is fast if the update vector is short or the number of ordering @@ -275,7 +291,10 @@ row_upd_index_parse( /* Update vector field */ struct upd_field_struct{ - ulint field_no; /* field number in the clustered + ulint field_no; /* field number in an index, usually + the clustered index, but in updating + a secondary index record in btr0cur.c + this is the position in the secondary index */ que_node_t* exp; /* expression for calculating a new value: it refers to column values and @@ -312,8 +331,11 @@ struct upd_node_struct{ ibool in_mysql_interface; /* TRUE if the update node was created for the MySQL interface */ + dict_foreign_t* foreign;/* NULL or pointer to a foreign key + constraint if this update node is used in + doing an ON DELETE or ON UPDATE operation */ upd_node_t* cascade_node;/* NULL or an update node template which - is used to implement ON DELETE CASCADE + is used to implement ON DELETE/UPDATE CASCADE or ... SET NULL for foreign keys */ mem_heap_t* cascade_heap;/* NULL or a mem heap where the cascade node is created */ @@ -355,9 +377,9 @@ struct upd_node_struct{ externally in the clustered index record of row */ ulint n_ext_vec;/* number of fields in ext_vec */ - mem_heap_t* heap; /* memory heap used as auxiliary storage for - row; this must be emptied after a successful - update if node->row != NULL */ + mem_heap_t* heap; /* memory heap used as auxiliary storage; + this must be emptied after a successful + update */ /*----------------------*/ sym_node_t* table_sym;/* table node in symbol table */ que_node_t* col_assign_list; diff --git a/innobase/include/row0vers.ic b/innobase/include/row0vers.ic index aa7a7aa2299..5ece47c35d1 100644 --- a/innobase/include/row0vers.ic +++ b/innobase/include/row0vers.ic @@ -60,7 +60,7 @@ row_vers_sec_rec_may_see_older( /*===========================*/ /* out: FALSE if can be read immediately */ rec_t* rec, /* in: record which should be read or passed */ - dict_index_t* index, /* in: secondary index */ + dict_index_t* index __attribute__((unused)),/* in: secondary index */ read_view_t* view) /* in: read view */ { page_t* page; diff --git a/innobase/include/srv0srv.h b/innobase/include/srv0srv.h index f457d52dec7..02d3d3bba0a 100644 --- a/innobase/include/srv0srv.h +++ b/innobase/include/srv0srv.h @@ -17,6 +17,8 @@ Created 10/10/1995 Heikki Tuuri #include "que0types.h" #include "trx0types.h" +extern char* srv_main_thread_op_info; + /* Buffer which can be used in printing fatal error messages */ extern char srv_fatal_errbuf[]; @@ -28,6 +30,9 @@ extern os_event_t srv_lock_timeout_thread_event; at a time */ #define SRV_AUTO_EXTEND_INCREMENT (8 * ((1024 * 1024) / UNIV_PAGE_SIZE)) +/* This is set to TRUE if the MySQL user has set it in MySQL */ +extern ibool srv_lower_case_table_names; + /* Server parameters which are read from the initfile */ extern char* srv_data_home; @@ -57,8 +62,6 @@ extern ulint srv_flush_log_at_trx_commit; extern byte srv_latin1_ordering[256];/* The sort order table of the latin1 character set */ -extern ibool srv_use_native_aio; - extern ulint srv_pool_size; extern ulint srv_mem_pool_size; extern ulint srv_lock_table_size; @@ -70,11 +73,17 @@ extern dulint srv_archive_recovery_limit_lsn; extern ulint srv_lock_wait_timeout; -extern char* srv_unix_file_flush_method_str; +extern char* srv_file_flush_method_str; extern ulint srv_unix_file_flush_method; +extern ulint srv_win_file_flush_method; + +extern ulint srv_max_dirty_pages_pct; + extern ulint srv_force_recovery; extern ulint srv_thread_concurrency; +extern ulint srv_max_n_threads; + extern lint srv_conc_n_threads; extern ibool srv_fast_shutdown; @@ -94,6 +103,7 @@ extern ulint srv_n_rows_read; extern ibool srv_print_innodb_monitor; extern ibool srv_print_innodb_lock_monitor; extern ibool srv_print_innodb_tablespace_monitor; +extern ibool srv_print_verbose_log; extern ibool srv_print_innodb_table_monitor; extern ibool srv_lock_timeout_and_monitor_active; @@ -147,18 +157,26 @@ extern mutex_t* kernel_mutex_temp;/* mutex protecting the server, trx structs, /* Array of English strings describing the current state of an i/o handler thread */ extern char* srv_io_thread_op_info[]; +extern char* srv_io_thread_function[]; typedef struct srv_sys_struct srv_sys_t; /* The server system */ extern srv_sys_t* srv_sys; -/* Alternatives for the field flush option in Unix; see the InnoDB manual about +/* Alternatives for the file flush option in Unix; see the InnoDB manual about what these mean */ -#define SRV_UNIX_FDATASYNC 1 +#define SRV_UNIX_FDATASYNC 1 /* This is the default; it is currently mapped + to a call of fsync() because fdatasync() + seemed to corrupt files in Linux and Solaris */ #define SRV_UNIX_O_DSYNC 2 #define SRV_UNIX_LITTLESYNC 3 #define SRV_UNIX_NOSYNC 4 +#define SRV_UNIX_O_DIRECT 5 + +/* Alternatives for file i/o in Windows */ +#define SRV_WIN_IO_NORMAL 1 +#define SRV_WIN_IO_UNBUFFERED 2 /* This is the default */ /* Alternatives for srv_force_recovery. Non-zero values are intended to help the user get a damaged database up so that he can dump intact @@ -197,6 +215,12 @@ void srv_init(void); /*==========*/ /************************************************************************* +Frees the OS fast mutex created in srv_init(). */ + +void +srv_free(void); +/*==========*/ +/************************************************************************* Initializes the synchronization primitives, memory system, and the thread local storage. */ @@ -310,15 +334,17 @@ srv_conc_exit_innodb( trx_t* trx); /* in: transaction object associated with the thread */ /******************************************************************* -Puts a MySQL OS thread to wait for a lock to be released. */ +Puts a MySQL OS thread to wait for a lock to be released. If an error +occurs during the wait trx->error_state associated with thr is +!= DB_SUCCESS when we return. DB_LOCK_WAIT_TIMEOUT and DB_DEADLOCK +are possible errors. DB_DEADLOCK is returned if selective deadlock +resolution chose this transaction as a victim. */ -ibool +void srv_suspend_mysql_thread( /*=====================*/ - /* out: TRUE if the lock wait timeout was - exceeded */ - que_thr_t* thr); /* in: query thread associated with - the MySQL OS thread */ + que_thr_t* thr); /* in: query thread associated with the MySQL + OS thread */ /************************************************************************ Releases a MySQL OS thread waiting for a lock to be released, if the thread is already suspended. */ @@ -406,3 +432,4 @@ struct srv_sys_struct{ extern ulint srv_n_threads_active[]; #endif + diff --git a/innobase/include/srv0start.h b/innobase/include/srv0start.h index 646d2c1bb06..8d2c3fa12c5 100644 --- a/innobase/include/srv0start.h +++ b/innobase/include/srv0start.h @@ -79,15 +79,19 @@ innobase_shutdown_for_mysql(void); /*=============================*/ /* out: DB_SUCCESS or error code */ +extern ulint srv_sizeof_trx_t_in_ha_innodb_cc; + +extern ibool srv_is_being_started; extern ibool srv_startup_is_before_trx_rollback_phase; extern ibool srv_is_being_shut_down; /* At a shutdown the value first climbs from 0 to SRV_SHUTDOWN_CLEANUP -and then to SRV_SHUTDOWN_LAST_PHASE */ +and then to SRV_SHUTDOWN_LAST_PHASE, and so on */ extern ulint srv_shutdown_state; -#define SRV_SHUTDOWN_CLEANUP 1 -#define SRV_SHUTDOWN_LAST_PHASE 2 +#define SRV_SHUTDOWN_CLEANUP 1 +#define SRV_SHUTDOWN_LAST_PHASE 2 +#define SRV_SHUTDOWN_EXIT_THREADS 3 #endif diff --git a/innobase/include/sync0rw.h b/innobase/include/sync0rw.h index 7ad38f5bc7f..5aa3dcdffc3 100644 --- a/innobase/include/sync0rw.h +++ b/innobase/include/sync0rw.h @@ -335,7 +335,8 @@ ibool rw_lock_own( /*========*/ rw_lock_t* lock, /* in: rw-lock */ - ulint lock_type); /* in: lock type */ + ulint lock_type); /* in: lock type: RW_LOCK_SHARED, + RW_LOCK_EX */ /********************************************************************** Checks if somebody has locked the rw-lock in the specified mode. */ diff --git a/innobase/include/sync0rw.ic b/innobase/include/sync0rw.ic index 7015ff34b99..36ef0a985ed 100644 --- a/innobase/include/sync0rw.ic +++ b/innobase/include/sync0rw.ic @@ -126,7 +126,8 @@ rw_lock_s_lock_low( /*===============*/ /* out: TRUE if success */ rw_lock_t* lock, /* in: pointer to rw-lock */ - ulint pass, /* in: pass value; != 0, if the lock will be + ulint pass __attribute__((unused)), + /* in: pass value; != 0, if the lock will be passed to another thread to unlock */ char* file_name, /* in: file name where lock requested */ ulint line) /* in: line where requested */ diff --git a/innobase/include/sync0sync.h b/innobase/include/sync0sync.h index 5bfa0bc2d48..320f8faf12d 100644 --- a/innobase/include/sync0sync.h +++ b/innobase/include/sync0sync.h @@ -371,10 +371,12 @@ or row lock! */ #define SYNC_NO_ORDER_CHECK 3000 /* this can be used to suppress latching order checking */ #define SYNC_LEVEL_NONE 2000 /* default: level not defined */ -#define SYNC_FOREIGN_KEY_CHECK 1001 +#define SYNC_DICT_OPERATION 1001 /* table create, drop, etc. reserve + this in X-mode, implicit or backround + operations purge, rollback, foreign + key checks reserve this in S-mode */ #define SYNC_DICT 1000 #define SYNC_DICT_AUTOINC_MUTEX 999 -#define SYNC_PURGE_IS_RUNNING 997 #define SYNC_DICT_HEADER 995 #define SYNC_IBUF_HEADER 914 #define SYNC_IBUF_PESS_INSERT_MUTEX 912 diff --git a/innobase/include/trx0purge.h b/innobase/include/trx0purge.h index 087be2f060e..049c79aec9b 100644 --- a/innobase/include/trx0purge.h +++ b/innobase/include/trx0purge.h @@ -111,9 +111,6 @@ struct trx_purge_struct{ of the trx system and it never ends */ que_t* query; /* The query graph which will do the parallelized purge operation */ - rw_lock_t purge_is_running;/* Purge operation set an x-latch here - while it is accessing a table: this - prevents dropping of the table */ rw_lock_t latch; /* The latch protecting the purge view. A purge operation must acquire an x-latch here for the instant at which diff --git a/innobase/include/trx0roll.h b/innobase/include/trx0roll.h index 820af4cd014..0d7126c9c57 100644 --- a/innobase/include/trx0roll.h +++ b/innobase/include/trx0roll.h @@ -177,6 +177,55 @@ trx_general_rollback_for_mysql( ibool partial,/* in: TRUE if partial rollback requested */ trx_savept_t* savept);/* in: pointer to savepoint undo number, if partial rollback requested */ +/*********************************************************************** +Rolls back a transaction back to a named savepoint. Modifications after the +savepoint are undone but InnoDB does NOT release the corresponding locks +which are stored in memory. If a lock is 'implicit', that is, a new inserted +row holds a lock where the lock information is carried by the trx id stored in +the row, these locks are naturally released in the rollback. Savepoints which +were set after this savepoint are deleted. */ + +ulint +trx_rollback_to_savepoint_for_mysql( +/*================================*/ + /* out: if no savepoint + of the name found then + DB_NO_SAVEPOINT, + otherwise DB_SUCCESS */ + trx_t* trx, /* in: transaction handle */ + char* savepoint_name, /* in: savepoint name */ + ib_longlong* mysql_binlog_cache_pos);/* out: the MySQL binlog cache + position corresponding to this + savepoint; MySQL needs this + information to remove the + binlog entries of the queries + executed after the savepoint */ +/*********************************************************************** +Creates a named savepoint. If the transaction is not yet started, starts it. +If there is already a savepoint of the same name, this call erases that old +savepoint and replaces it with a new. Savepoints are deleted in a transaction +commit or rollback. */ + +ulint +trx_savepoint_for_mysql( +/*====================*/ + /* out: always DB_SUCCESS */ + trx_t* trx, /* in: transaction handle */ + char* savepoint_name, /* in: savepoint name */ + ib_longlong binlog_cache_pos); /* in: MySQL binlog cache + position corresponding to this + connection at the time of the + savepoint */ +/*********************************************************************** +Frees savepoint structs. */ + +void +trx_roll_savepoints_free( +/*=====================*/ + trx_t* trx, /* in: transaction handle */ + trx_named_savept_t* savep); /* in: free all savepoints > this one; + if this is NULL, free all savepoints + of trx */ extern sess_t* trx_dummy_sess; @@ -207,6 +256,21 @@ struct roll_node_struct{ case of a partial rollback */ }; +/* A savepoint set with SQL's "SAVEPOINT savepoint_id" command */ +struct trx_named_savept_struct{ + char* name; /* savepoint name */ + trx_savept_t savept; /* the undo number corresponding to + the savepoint */ + ib_longlong mysql_binlog_cache_pos; + /* the MySQL binlog cache position + corresponding to this savepoint, not + defined if the MySQL binlogging is not + enabled */ + UT_LIST_NODE_T(trx_named_savept_t) + trx_savepoints; /* the list of savepoints of a + transaction */ +}; + /* Rollback node states */ #define ROLL_NODE_SEND 1 #define ROLL_NODE_WAIT 2 diff --git a/innobase/include/trx0sys.h b/innobase/include/trx0sys.h index a54a6424a4f..a8ed675a8a5 100644 --- a/innobase/include/trx0sys.h +++ b/innobase/include/trx0sys.h @@ -24,6 +24,18 @@ Created 3/26/1996 Heikki Tuuri #include "fsp0fsp.h" #include "read0types.h" +/* Do NOT merge this to the 4.1 code base! */ +extern ibool trx_sys_downgrading_from_4_1_1; + +/******************************************************************** +Do NOT merge this to the 4.1 code base! +Marks the trx sys header when we have successfully downgraded from the >= 4.1.1 +multiple tablespace format back to the 4.0 format. */ + +void +trx_sys_mark_downgraded_from_4_1_1(void); +/*====================================*/ + /* In a MySQL replication slave, in crash recovery we store the master log file name and position here. We have successfully got the updates to InnoDB up to this position. If .._pos is -1, it means no crash recovery was needed, @@ -354,8 +366,14 @@ this contains the same fields as TRX_SYS_MYSQL_LOG_INFO below */ sys header is half-written to disk, we still may be able to recover the information */ +#define TRX_SYS_DOUBLEWRITE_SPACE_ID_STORED (24 + FSEG_HEADER_SIZE) + /* If this is set to + .._N, then we are + DOWNGRADING from >= 4.1.1 to + 4.0 */ /*-------------------------------------------------------------*/ #define TRX_SYS_DOUBLEWRITE_MAGIC_N 536853855 +#define TRX_SYS_DOUBLEWRITE_SPACE_ID_STORED_N 1783657386 #define TRX_SYS_DOUBLEWRITE_BLOCK_SIZE FSP_EXTENT_SIZE diff --git a/innobase/include/trx0sys.ic b/innobase/include/trx0sys.ic index ada2d8cb19c..343e6d7c2fa 100644 --- a/innobase/include/trx0sys.ic +++ b/innobase/include/trx0sys.ic @@ -296,6 +296,16 @@ trx_is_active( return(FALSE); } + if (ut_dulint_cmp(trx_id, trx_sys->max_trx_id) >= 0) { + + /* There must be corruption: we return TRUE because this + function is only called by lock_clust_rec_some_has_impl() + and row_vers_impl_x_locked_off_kernel() and they have + diagnostic prints in this case */ + + return(TRUE); + } + trx = trx_get_on_id(trx_id); if (trx && (trx->conc_state == TRX_ACTIVE)) { diff --git a/innobase/include/trx0trx.h b/innobase/include/trx0trx.h index e1f65e9da0f..6b08b674db8 100644 --- a/innobase/include/trx0trx.h +++ b/innobase/include/trx0trx.h @@ -118,6 +118,14 @@ trx_start_if_not_started( /*=====================*/ trx_t* trx); /* in: transaction */ /***************************************************************** +Starts the transaction if it is not yet started. Assumes we have reserved +the kernel mutex! */ +UNIV_INLINE +void +trx_start_if_not_started_low( +/*=========================*/ + trx_t* trx); /* in: transaction */ +/***************************************************************** Starts the transaction if it is not yet started. */ void @@ -149,6 +157,15 @@ trx_commit_for_mysql( /* out: 0 or error number */ trx_t* trx); /* in: trx handle */ /************************************************************************** +If required, flushes the log to disk if we called trx_commit_for_mysql() +with trx->flush_log_later == TRUE. */ + +ulint +trx_commit_complete_for_mysql( +/*==========================*/ + /* out: 0 or error number */ + trx_t* trx); /* in: trx handle */ +/************************************************************************** Marks the latest SQL statement ended. */ void @@ -319,6 +336,7 @@ struct trx_struct{ time_t start_time; /* time the trx object was created or the state last time became TRX_ACTIVE */ + ulint isolation_level;/* TRX_ISO_REPEATABLE_READ, ... */ ibool check_foreigns; /* normally TRUE, but if the user wants to suppress foreign key checks, (in table imports, for example) we @@ -334,6 +352,11 @@ struct trx_struct{ dulint no; /* transaction serialization number == max trx id when the transaction is moved to COMMITTED_IN_MEMORY state */ + ibool flush_log_later;/* when we commit the transaction + in MySQL's binlog write, we will + flush the log to disk later in + a separate call */ + dulint commit_lsn; /* lsn at the time of the commit */ ibool dict_operation; /* TRUE if the trx is used to create a table, create an index, or drop a table */ @@ -342,6 +365,9 @@ struct trx_struct{ /*------------------------------*/ void* mysql_thd; /* MySQL thread handle corresponding to this trx, or NULL */ + char** mysql_query_str;/* pointer to the field in mysqld_thd + which contains the pointer to the + current SQL query string */ char* mysql_log_file_name; /* if MySQL binlog is used, this field contains a pointer to the latest file @@ -355,7 +381,8 @@ struct trx_struct{ replication slave, we have here the master binlog name up to which replication has processed; otherwise - this is a pointer to a null character */ + this is a pointer to a null + character */ ib_longlong mysql_master_log_pos; /* if the database server is a MySQL replication slave, this is the @@ -363,6 +390,9 @@ struct trx_struct{ replication has processed */ os_thread_id_t mysql_thread_id;/* id of the MySQL thread associated with this transaction object */ + ulint mysql_process_no;/* since in Linux, 'top' reports + process id's and not thread id's, we + store the process number too */ /*------------------------------*/ ulint n_mysql_tables_in_use; /* number of Innobase tables used in the processing of the current @@ -371,9 +401,10 @@ struct trx_struct{ /* how many tables the current SQL statement uses, except those in consistent read */ - ibool has_dict_foreign_key_check_lock; - /* TRUE if the trx currently holds - an s-lock on dict_foreign_... */ + ibool dict_operation_lock_mode; + /* 0, RW_S_LATCH, or RW_X_LATCH: + the latch mode trx currently holds + on dict_operation_lock */ ibool has_search_latch; /* TRUE if this trx has latched the search system latch in S-mode */ @@ -402,46 +433,17 @@ struct trx_struct{ lock_t* auto_inc_lock; /* possible auto-inc lock reserved by the transaction; note that it is also in the lock list trx_locks */ - ibool ignore_duplicates_in_insert; - /* in an insert roll back only insert - of the latest row in case - of a duplicate key error */ UT_LIST_NODE_T(trx_t) trx_list; /* list of transactions */ UT_LIST_NODE_T(trx_t) mysql_trx_list; /* list of transactions created for MySQL */ /*------------------------------*/ - mutex_t undo_mutex; /* mutex protecting the fields in this - section (down to undo_no_arr), EXCEPT - last_sql_stat_start, which can be - accessed only when we know that there - cannot be any activity in the undo - logs! */ - dulint undo_no; /* next undo log record number to - assign */ - trx_savept_t last_sql_stat_start; - /* undo_no when the last sql statement - was started: in case of an error, trx - is rolled back down to this undo - number; see note at undo_mutex! */ - trx_rseg_t* rseg; /* rollback segment assigned to the - transaction, or NULL if not assigned - yet */ - trx_undo_t* insert_undo; /* pointer to the insert undo log, or - NULL if no inserts performed yet */ - trx_undo_t* update_undo; /* pointer to the update undo log, or - NULL if no update performed yet */ - dulint roll_limit; /* least undo number to undo during - a rollback */ - ulint pages_undone; /* number of undo log pages undone - since the last undo log truncation */ - trx_undo_arr_t* undo_no_arr; /* array of undo numbers of undo log - records which are currently processed - by a rollback operation */ - /*------------------------------*/ ulint error_state; /* 0 if no error, otherwise error - number */ + number; NOTE That ONLY the thread + doing the transaction is allowed to + set this field: this is NOT protected + by the kernel mutex */ void* error_info; /* if the error number indicates a duplicate key error, a pointer to the problematic index is stored here */ @@ -478,6 +480,12 @@ struct trx_struct{ TRX_QUE_LOCK_WAIT, this points to the lock request, otherwise this is NULL */ + ibool was_chosen_as_deadlock_victim; + /* when the transaction decides to wait + for a lock, this it sets this to FALSE; + if another transaction chooses this + transaction as a victim in deadlock + resolution, it sets this to TRUE */ time_t wait_started; /* lock wait started at this time */ UT_LIST_BASE_NODE_T(que_thr_t) wait_thrs; /* query threads belonging to this @@ -493,6 +501,38 @@ struct trx_struct{ /*------------------------------*/ mem_heap_t* read_view_heap; /* memory heap for the read view */ read_view_t* read_view; /* consistent read view or NULL */ + /*------------------------------*/ + UT_LIST_BASE_NODE_T(trx_named_savept_t) + trx_savepoints; /* savepoints set with SAVEPOINT ..., + oldest first */ + /*------------------------------*/ + mutex_t undo_mutex; /* mutex protecting the fields in this + section (down to undo_no_arr), EXCEPT + last_sql_stat_start, which can be + accessed only when we know that there + cannot be any activity in the undo + logs! */ + dulint undo_no; /* next undo log record number to + assign */ + trx_savept_t last_sql_stat_start; + /* undo_no when the last sql statement + was started: in case of an error, trx + is rolled back down to this undo + number; see note at undo_mutex! */ + trx_rseg_t* rseg; /* rollback segment assigned to the + transaction, or NULL if not assigned + yet */ + trx_undo_t* insert_undo; /* pointer to the insert undo log, or + NULL if no inserts performed yet */ + trx_undo_t* update_undo; /* pointer to the update undo log, or + NULL if no update performed yet */ + dulint roll_limit; /* least undo number to undo during + a rollback */ + ulint pages_undone; /* number of undo log pages undone + since the last undo log truncation */ + trx_undo_arr_t* undo_no_arr; /* array of undo numbers of undo log + records which are currently processed + by a rollback operation */ }; #define TRX_MAX_N_THREADS 32 /* maximum number of concurrent @@ -515,6 +555,41 @@ struct trx_struct{ #define TRX_QUE_ROLLING_BACK 3 /* transaction is rolling back */ #define TRX_QUE_COMMITTING 4 /* transaction is committing */ +/* Transaction isolation levels */ +#define TRX_ISO_READ_UNCOMMITTED 1 /* dirty read: non-locking + SELECTs are performed so that + we do not look at a possible + earlier version of a record; + thus they are not 'consistent' + reads under this isolation + level; otherwise like level + 2 */ + +#define TRX_ISO_READ_COMMITTED 2 /* somewhat Oracle-like + isolation, except that in + range UPDATE and DELETE we + must block phantom rows + with next-key locks; + SELECT ... FOR UPDATE and ... + LOCK IN SHARE MODE only lock + the index records, NOT the + gaps before them, and thus + allow free inserting; + each consistent read reads its + own snapshot */ + +#define TRX_ISO_REPEATABLE_READ 3 /* this is the default; + all consistent reads in the + same trx read the same + snapshot; + full next-key locking used + in locking reads to block + insertions into gaps */ + +#define TRX_ISO_SERIALIZABLE 4 /* all plain SELECTs are + converted to LOCK IN SHARE + MODE reads */ + /* Types of a trx signal */ #define TRX_SIG_NO_SIGNAL 100 #define TRX_SIG_TOTAL_ROLLBACK 1 diff --git a/innobase/include/trx0trx.ic b/innobase/include/trx0trx.ic index 9d453047600..78e5acda148 100644 --- a/innobase/include/trx0trx.ic +++ b/innobase/include/trx0trx.ic @@ -21,3 +21,22 @@ trx_start_if_not_started( trx_start(trx, ULINT_UNDEFINED); } } + +/***************************************************************** +Starts the transaction if it is not yet started. Assumes we have reserved +the kernel mutex! */ +UNIV_INLINE +void +trx_start_if_not_started_low( +/*=========================*/ + trx_t* trx) /* in: transaction */ +{ + ut_ad(trx->conc_state != TRX_COMMITTED_IN_MEMORY); + + if (trx->conc_state == TRX_NOT_STARTED) { + + trx_start_low(trx, ULINT_UNDEFINED); + } +} + + diff --git a/innobase/include/trx0types.h b/innobase/include/trx0types.h index b8befe7172f..2965eb4451f 100644 --- a/innobase/include/trx0types.h +++ b/innobase/include/trx0types.h @@ -24,6 +24,7 @@ typedef struct trx_undo_inf_struct trx_undo_inf_t; typedef struct trx_purge_struct trx_purge_t; typedef struct roll_node_struct roll_node_t; typedef struct commit_node_struct commit_node_t; +typedef struct trx_named_savept_struct trx_named_savept_t; /* Transaction savepoint */ typedef struct trx_savept_struct trx_savept_t; diff --git a/innobase/include/univ.i b/innobase/include/univ.i index b511ec044a2..4854e5a7b78 100644 --- a/innobase/include/univ.i +++ b/innobase/include/univ.i @@ -9,7 +9,8 @@ Created 1/20/1994 Heikki Tuuri #ifndef univ_i #define univ_i -#if (defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64)) && !defined(MYSQL_SERVER) +#if (defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64)) && !defined(MYSQL_SERVER) && !defined(__WIN__) +#undef __WIN__ #define __WIN__ #include <windows.h> @@ -29,7 +30,7 @@ Created 1/20/1994 Heikki Tuuri in compiling more Posix-compatible. These headers also define __WIN__ if we are compiling on Windows. */ -#include <global.h> +#include <my_global.h> #include <my_pthread.h> /* Include <sys/stat.h> to get S_I... macros defined for os0file.c */ @@ -56,6 +57,7 @@ of the 32-bit x86 assembler in mutex operations. */ Microsoft Visual C++ */ #if !defined(__GNUC__) && !defined(__WIN__) +#undef UNIV_MUST_NOT_INLINE /* Remove compiler warning */ #define UNIV_MUST_NOT_INLINE #endif @@ -98,6 +100,15 @@ memory is read outside the allocated blocks. */ #define YYDEBUG 1 +#ifdef HAVE_purify +/* The following sets all new allocated memory to zero before use: +this can be used to eliminate unnecessary Purify warnings, but note that +it also masks many bugs Purify could detect. For detailed Purify analysis it +is best to remove the define below and look through the warnings one +by one. */ +#define UNIV_SET_MEM_TO_ZERO +#endif + /* #define UNIV_SQL_DEBUG #define UNIV_LOG_DEBUG @@ -176,7 +187,11 @@ management to ensure correct alignment for doubles etc. */ /* Another basic type we use is unsigned long integer which is intended to be equal to the word size of the machine. */ +#ifdef _WIN64 +typedef unsigned __int64 ulint; +#else typedef unsigned long int ulint; +#endif typedef long int lint; diff --git a/innobase/include/ut0dbg.h b/innobase/include/ut0dbg.h index 3407483696c..802557099fc 100644 --- a/innobase/include/ut0dbg.h +++ b/innobase/include/ut0dbg.h @@ -20,7 +20,6 @@ extern ibool ut_dbg_stop_threads; extern ulint* ut_dbg_null_ptr; - #define ut_a(EXPR)\ {\ ulint dbg_i;\ @@ -31,8 +30,41 @@ extern ulint* ut_dbg_null_ptr; " InnoDB: Assertion failure in thread %lu in file %s line %lu\n",\ os_thread_pf(os_thread_get_curr_id()), IB__FILE__,\ (ulint)__LINE__);\ + fprintf(stderr,\ + "InnoDB: Failing assertion: " #EXPR);\ fprintf(stderr,\ - "InnoDB: We intentionally generate a memory trap.\n");\ + "\nInnoDB: We intentionally generate a memory trap.\n");\ + fprintf(stderr,\ + "InnoDB: Send a detailed bug report to mysql@lists.mysql.com\n");\ + ut_dbg_stop_threads = TRUE;\ + dbg_i = *(ut_dbg_null_ptr);\ + if (dbg_i) {\ + ut_dbg_null_ptr = NULL;\ + }\ + }\ + if (ut_dbg_stop_threads) {\ + fprintf(stderr,\ + "InnoDB: Thread %lu stopped in file %s line %lu\n",\ + os_thread_pf(os_thread_get_curr_id()), IB__FILE__, (ulint)__LINE__);\ + os_thread_sleep(1000000000);\ + }\ +} + +/* This can be used if there are % characters in the assertion formula: +if we try to printf the formula gcc would complain of illegal print +format characters */ +#define ut_anp(EXPR)\ +{\ + ulint dbg_i;\ +\ + if (!((ulint)(EXPR) + ut_dbg_zero)) {\ + ut_print_timestamp(stderr);\ + fprintf(stderr,\ + " InnoDB: Assertion failure in thread %lu in file %s line %lu\n",\ + os_thread_pf(os_thread_get_curr_id()), IB__FILE__,\ + (ulint)__LINE__);\ + fprintf(stderr,\ + "\nInnoDB: We intentionally generate a memory trap.\n");\ fprintf(stderr,\ "InnoDB: Send a detailed bug report to mysql@lists.mysql.com\n");\ ut_dbg_stop_threads = TRUE;\ diff --git a/innobase/include/ut0mem.h b/innobase/include/ut0mem.h index 2d245e5f72f..4e8566eba1b 100644 --- a/innobase/include/ut0mem.h +++ b/innobase/include/ut0mem.h @@ -57,7 +57,7 @@ ut_free( /*====*/ void* ptr); /* in, own: memory block */ /************************************************************************** -Frees all allocated memory not freed yet. */ +Frees in shutdown all allocated memory not freed yet. */ void ut_free_all_mem(void); @@ -69,7 +69,7 @@ ut_strcpy(char* dest, char* sour); UNIV_INLINE ulint -ut_strlen(char* str); +ut_strlen(const char* str); UNIV_INLINE int diff --git a/innobase/include/ut0mem.ic b/innobase/include/ut0mem.ic index 7ae9bc8bd74..1049aee8ecc 100644 --- a/innobase/include/ut0mem.ic +++ b/innobase/include/ut0mem.ic @@ -36,7 +36,7 @@ ut_strcpy(char* dest, char* sour) UNIV_INLINE ulint -ut_strlen(char* str) +ut_strlen(const char* str) { return(strlen(str)); } |