summaryrefslogtreecommitdiff
path: root/innobase/include
diff options
context:
space:
mode:
Diffstat (limited to 'innobase/include')
-rw-r--r--innobase/include/Makefile.am2
-rw-r--r--innobase/include/Makefile.i4
-rw-r--r--innobase/include/btr0btr.h13
-rw-r--r--innobase/include/btr0btr.ic6
-rw-r--r--innobase/include/btr0cur.h32
-rw-r--r--innobase/include/btr0pcur.h8
-rw-r--r--innobase/include/btr0pcur.ic18
-rw-r--r--innobase/include/btr0sea.h14
-rw-r--r--innobase/include/buf0buf.h48
-rw-r--r--innobase/include/buf0buf.ic7
-rw-r--r--innobase/include/buf0lru.h25
-rw-r--r--innobase/include/data0data.h10
-rw-r--r--innobase/include/data0data.ic25
-rw-r--r--innobase/include/data0type.h66
-rw-r--r--innobase/include/data0type.ic35
-rw-r--r--innobase/include/db0err.h11
-rw-r--r--innobase/include/dict0dict.h86
-rw-r--r--innobase/include/dict0dict.ic3
-rw-r--r--innobase/include/dict0mem.h39
-rw-r--r--innobase/include/dyn0dyn.h1
-rw-r--r--innobase/include/fil0fil.h17
-rw-r--r--innobase/include/ha0ha.ic2
-rw-r--r--innobase/include/lock0lock.h68
-rw-r--r--innobase/include/log0log.h85
-rw-r--r--innobase/include/log0recv.h3
-rw-r--r--innobase/include/mem0mem.h14
-rw-r--r--innobase/include/mem0mem.ic7
-rw-r--r--innobase/include/os0file.h39
-rw-r--r--innobase/include/os0proc.h9
-rw-r--r--innobase/include/os0sync.h86
-rw-r--r--innobase/include/os0sync.ic1
-rw-r--r--innobase/include/os0thread.h22
-rw-r--r--innobase/include/page0cur.h7
-rw-r--r--innobase/include/page0page.h19
-rw-r--r--innobase/include/que0que.h8
-rw-r--r--innobase/include/read0read.h8
-rw-r--r--innobase/include/rem0cmp.h16
-rw-r--r--innobase/include/rem0rec.h12
-rw-r--r--innobase/include/rem0rec.ic18
-rw-r--r--innobase/include/row0ins.h1
-rw-r--r--innobase/include/row0mysql.h64
-rw-r--r--innobase/include/row0mysql.ic7
-rw-r--r--innobase/include/row0row.h7
-rw-r--r--innobase/include/row0sel.h21
-rw-r--r--innobase/include/row0upd.h64
-rw-r--r--innobase/include/row0vers.ic2
-rw-r--r--innobase/include/srv0srv.h49
-rw-r--r--innobase/include/srv0start.h10
-rw-r--r--innobase/include/sync0rw.h3
-rw-r--r--innobase/include/sync0rw.ic3
-rw-r--r--innobase/include/sync0sync.h6
-rw-r--r--innobase/include/trx0purge.h3
-rw-r--r--innobase/include/trx0roll.h64
-rw-r--r--innobase/include/trx0sys.h18
-rw-r--r--innobase/include/trx0sys.ic10
-rw-r--r--innobase/include/trx0trx.h149
-rw-r--r--innobase/include/trx0trx.ic19
-rw-r--r--innobase/include/trx0types.h1
-rw-r--r--innobase/include/univ.i19
-rw-r--r--innobase/include/ut0dbg.h36
-rw-r--r--innobase/include/ut0mem.h4
-rw-r--r--innobase/include/ut0mem.ic2
62 files changed, 1150 insertions, 306 deletions
diff --git a/innobase/include/Makefile.am b/innobase/include/Makefile.am
index fd5cc8b1a80..8664f6dfc17 100644
--- a/innobase/include/Makefile.am
+++ b/innobase/include/Makefile.am
@@ -55,5 +55,7 @@ noinst_HEADERS = btr0btr.h btr0btr.ic btr0cur.h btr0cur.ic \
ut0dbg.h ut0lst.h ut0mem.h ut0mem.ic ut0rnd.h ut0rnd.ic \
ut0sort.h ut0ut.h ut0ut.ic
+EXTRA_DIST = Makefile.i
+
# Don't update the files from bitkeeper
%::SCCS/s.%
diff --git a/innobase/include/Makefile.i b/innobase/include/Makefile.i
index 8c7e9910f26..f3e3fbe989e 100644
--- a/innobase/include/Makefile.i
+++ b/innobase/include/Makefile.i
@@ -1,8 +1,6 @@
# Makefile included in Makefile.am in every subdirectory
-libsdir = ../libs
-
-INCLUDES = -I../../include -I../include
+INCLUDES = -I$(srcdir)/../include -I$(srcdir)/../../include -I../../include
# Don't update the files from bitkeeper
%::SCCS/s.%
diff --git a/innobase/include/btr0btr.h b/innobase/include/btr0btr.h
index 7e9d4b73d90..8606fcd2a5c 100644
--- a/innobase/include/btr0btr.h
+++ b/innobase/include/btr0btr.h
@@ -408,6 +408,19 @@ btr_print_tree(
dict_tree_t* tree, /* in: tree */
ulint width); /* in: print this many entries from start
and end */
+/****************************************************************
+Checks the size and number of fields in a record based on the definition of
+the index. */
+
+ibool
+btr_index_rec_validate(
+/*====================*/
+ /* out: TRUE if ok */
+ rec_t* rec, /* in: index record */
+ dict_index_t* index, /* in: index */
+ ibool dump_on_error); /* in: TRUE if the function
+ should print hex dump of record
+ and page on error */
/******************************************************************
Checks the consistency of an index tree. */
diff --git a/innobase/include/btr0btr.ic b/innobase/include/btr0btr.ic
index 5c1c89e9840..09006828cc9 100644
--- a/innobase/include/btr0btr.ic
+++ b/innobase/include/btr0btr.ic
@@ -89,7 +89,7 @@ btr_page_get_level(
/*===============*/
/* out: level, leaf level == 0 */
page_t* page, /* in: index page */
- mtr_t* mtr) /* in: mini-transaction handle */
+ mtr_t* mtr __attribute__((unused))) /* in: mini-transaction handle */
{
ut_ad(page && mtr);
@@ -121,7 +121,7 @@ btr_page_get_next(
/*==============*/
/* out: next page number */
page_t* page, /* in: index page */
- mtr_t* mtr) /* in: mini-transaction handle */
+ mtr_t* mtr __attribute__((unused))) /* in: mini-transaction handle */
{
ut_ad(page && mtr);
ut_ad(mtr_memo_contains(mtr, buf_block_align(page),
@@ -155,7 +155,7 @@ btr_page_get_prev(
/*==============*/
/* out: prev page number */
page_t* page, /* in: index page */
- mtr_t* mtr) /* in: mini-transaction handle */
+ mtr_t* mtr __attribute__((unused))) /* in: mini-transaction handle */
{
ut_ad(page && mtr);
diff --git a/innobase/include/btr0cur.h b/innobase/include/btr0cur.h
index 7039ceba245..31aecef8104 100644
--- a/innobase/include/btr0cur.h
+++ b/innobase/include/btr0cur.h
@@ -188,22 +188,6 @@ btr_cur_pessimistic_insert(
que_thr_t* thr, /* in: query thread or NULL */
mtr_t* mtr); /* in: mtr */
/*****************************************************************
-Updates a secondary index record when the update causes no size
-changes in its fields. The only case when this function is currently
-called is that in a char field characters change to others which
-are identified in the collation order. */
-
-ulint
-btr_cur_update_sec_rec_in_place(
-/*============================*/
- /* out: DB_SUCCESS or error number */
- btr_cur_t* cursor, /* in: cursor on the record to update;
- cursor stays valid and positioned on the
- same record */
- upd_t* update, /* in: update vector */
- que_thr_t* thr, /* in: query thread */
- mtr_t* mtr); /* in: mtr */
-/*****************************************************************
Updates a record when the update causes no size changes in its fields. */
ulint
@@ -507,7 +491,13 @@ void
btr_free_externally_stored_field(
/*=============================*/
dict_index_t* index, /* in: index of the data, the index
- tree MUST be X-latched */
+ tree MUST be X-latched; if the tree
+ height is 1, then also the root page
+ must be X-latched! (this is relevant
+ in the case this function is called
+ from purge where 'data' is located on
+ an undo log page, not an index
+ page) */
byte* data, /* in: internally stored data
+ reference to the externally
stored part */
@@ -684,7 +674,13 @@ and sleep this many microseconds in between */
#define BTR_CUR_RETRY_DELETE_N_TIMES 100
#define BTR_CUR_RETRY_SLEEP_TIME 50000
-/* The reference in a field of which data is stored on a different page */
+/* The reference in a field for which data is stored on a different page.
+The reference is at the end of the 'locally' stored part of the field.
+'Locally' means storage in the index record.
+We store locally a long enough prefix of each column so that we can determine
+the ordering parts of each index record without looking into the externally
+stored part. */
+
/*--------------------------------------*/
#define BTR_EXTERN_SPACE_ID 0 /* space id where stored */
#define BTR_EXTERN_PAGE_NO 4 /* page no where stored */
diff --git a/innobase/include/btr0pcur.h b/innobase/include/btr0pcur.h
index 05b55e4491d..9d07dd0de18 100644
--- a/innobase/include/btr0pcur.h
+++ b/innobase/include/btr0pcur.h
@@ -298,6 +298,14 @@ btr_pcur_move_to_prev(
function may release the page latch */
mtr_t* mtr); /* in: mtr */
/*************************************************************
+Moves the persistent cursor to the last record on the same page. */
+UNIV_INLINE
+void
+btr_pcur_move_to_last_on_page(
+/*==========================*/
+ btr_pcur_t* cursor, /* in: persistent cursor */
+ mtr_t* mtr); /* in: mtr */
+/*************************************************************
Moves the persistent cursor to the next user record in the tree. If no user
records are left, the cursor ends up 'after last in tree'. */
UNIV_INLINE
diff --git a/innobase/include/btr0pcur.ic b/innobase/include/btr0pcur.ic
index a60140e4aa9..a1db2cc52dd 100644
--- a/innobase/include/btr0pcur.ic
+++ b/innobase/include/btr0pcur.ic
@@ -285,6 +285,24 @@ btr_pcur_move_to_prev_on_page(
}
/*************************************************************
+Moves the persistent cursor to the last record on the same page. */
+UNIV_INLINE
+void
+btr_pcur_move_to_last_on_page(
+/*==========================*/
+ btr_pcur_t* cursor, /* in: persistent cursor */
+ mtr_t* mtr) /* in: mtr */
+{
+ UT_NOT_USED(mtr);
+ ut_ad(cursor->latch_mode != BTR_NO_LATCHES);
+
+ page_cur_set_after_last(buf_frame_align(btr_pcur_get_rec(cursor)),
+ btr_pcur_get_page_cur(cursor));
+
+ cursor->old_stored = BTR_PCUR_OLD_NOT_STORED;
+}
+
+/*************************************************************
Moves the persistent cursor to the next user record in the tree. If no user
records are left, the cursor ends up 'after last in tree'. */
UNIV_INLINE
diff --git a/innobase/include/btr0sea.h b/innobase/include/btr0sea.h
index 14feca5d5c5..ee762a12221 100644
--- a/innobase/include/btr0sea.h
+++ b/innobase/include/btr0sea.h
@@ -234,10 +234,16 @@ struct btr_search_sys_struct{
extern btr_search_sys_t* btr_search_sys;
/* The latch protecting the adaptive search system: this latch protects the
-(1) positions of records on those pages where a hash index has been built.
-NOTE: It does not protect values of non-ordering fields within a record from
-being updated in-place! We can use fact (1) to perform unique searches to
-indexes. */
+(1) hash index;
+(2) columns of a record to which we have a pointer in the hash index;
+
+but does NOT protect:
+
+(3) next record offset field in a record;
+(4) next or previous records on the same page.
+
+Bear in mind (3) and (4) when using the hash index.
+*/
extern rw_lock_t* btr_search_latch_temp;
diff --git a/innobase/include/buf0buf.h b/innobase/include/buf0buf.h
index 591c0ec54ab..2963efd6396 100644
--- a/innobase/include/buf0buf.h
+++ b/innobase/include/buf0buf.h
@@ -274,6 +274,15 @@ buf_page_peek_block(
ulint space, /* in: space id */
ulint offset);/* in: page number */
/************************************************************************
+Resets the check_index_page_at_flush field of a page if found in the buffer
+pool. */
+
+void
+buf_reset_check_index_page_at_flush(
+/*================================*/
+ ulint space, /* in: space id */
+ ulint offset);/* in: page number */
+/************************************************************************
Sets file_page_was_freed TRUE if the page is found in the buffer pool.
This function should be called when we free a file page and want the
debug version to check that it is not accessed any more unless
@@ -355,11 +364,24 @@ to a file. Note that we must be careful to calculate the same value
on 32-bit and 64-bit architectures. */
ulint
-buf_calc_page_checksum(
-/*===================*/
+buf_calc_page_new_checksum(
+/*=======================*/
/* out: checksum */
byte* page); /* in: buffer page */
/************************************************************************
+In versions < 4.0.14 and < 4.1.1 there was a bug that the checksum only
+looked at the first few bytes of the page. This calculates that old
+checksum.
+NOTE: we must first store the new formula checksum to
+FIL_PAGE_SPACE_OR_CHKSUM before calculating and storing this old checksum
+because this takes that field as an input! */
+
+ulint
+buf_calc_page_old_checksum(
+/*=======================*/
+ /* out: checksum */
+ byte* page); /* in: buffer page */
+/************************************************************************
Checks if a page is corrupt. */
ibool
@@ -463,6 +485,13 @@ buf_print_io(
/*=========*/
char* buf, /* in/out: buffer where to print */
char* buf_end);/* in: buffer end */
+/*************************************************************************
+Returns the ratio in percents of modified pages in the buffer pool /
+database pages in the buffer pool. */
+
+ulint
+buf_get_modified_ratio_pct(void);
+/*============================*/
/**************************************************************************
Refreshes the statistics used to print per-second averages. */
@@ -648,6 +677,14 @@ struct buf_block_struct{
then it can wait for this rw-lock */
buf_block_t* hash; /* node used in chaining to the page
hash table */
+ ibool check_index_page_at_flush;
+ /* TRUE if we know that this is
+ an index page, and want the database
+ to check its consistency before flush;
+ note that there may be pages in the
+ buffer pool which are index pages,
+ but this flag is not set because
+ we do not keep track of all pages */
/* 2. Page flushing fields */
UT_LIST_NODE_T(buf_block_t) flush_list;
@@ -711,8 +748,8 @@ struct buf_block_struct{
bufferfixed, or (2) the thread has an
x-latch on the block */
- /* 5. Hash search fields: NOTE that these fields are protected by
- btr_search_mutex */
+ /* 5. Hash search fields: NOTE that the first 4 fields are NOT
+ protected by any semaphore! */
ulint n_hash_helps; /* counter which controls building
of a new hash index for the page */
@@ -725,6 +762,9 @@ struct buf_block_struct{
whether the leftmost record of several
records with the same prefix should be
indexed in the hash index */
+
+ /* The following 4 fields are protected by btr_search_latch: */
+
ibool is_hashed; /* TRUE if hash index has already been
built on this page; note that it does
not guarantee that the index is
diff --git a/innobase/include/buf0buf.ic b/innobase/include/buf0buf.ic
index 51e2541e04d..7227c79dc6a 100644
--- a/innobase/include/buf0buf.ic
+++ b/innobase/include/buf0buf.ic
@@ -652,9 +652,10 @@ UNIV_INLINE
void
buf_page_dbg_add_level(
/*===================*/
- buf_frame_t* frame, /* in: buffer page where we have acquired
- a latch */
- ulint level) /* in: latching order level */
+ buf_frame_t* frame __attribute__((unused)), /* in: buffer page
+ where we have acquired latch */
+ ulint level __attribute__((unused))) /* in: latching order
+ level */
{
#ifdef UNIV_SYNC_DEBUG
sync_thread_add_level(&(buf_block_align(frame)->lock), level);
diff --git a/innobase/include/buf0lru.h b/innobase/include/buf0lru.h
index 946b6c4e31d..eb9d43d3b93 100644
--- a/innobase/include/buf0lru.h
+++ b/innobase/include/buf0lru.h
@@ -46,6 +46,20 @@ buf_LRU_get_recent_limit(void);
/*==========================*/
/* out: the limit; zero if could not determine it */
/**********************************************************************
+Look for a replaceable block from the end of the LRU list and put it to
+the free list if found. */
+
+ibool
+buf_LRU_search_and_free_block(
+/*==========================*/
+ /* out: TRUE if freed */
+ ulint n_iterations); /* in: how many times this has been called
+ repeatedly without result: a high value means
+ that we should search farther; if value is
+ k < 10, then we only search k/10 * number
+ of pages in the buffer pool from the end
+ of the LRU list */
+/**********************************************************************
Returns a free block from the buf_pool. The block is taken off the
free list. If it is empty, blocks are moved from the end of the
LRU list to the free list. */
@@ -86,17 +100,6 @@ void
buf_LRU_make_block_old(
/*===================*/
buf_block_t* block); /* in: control block */
-/**********************************************************************
-Look for a replaceable block from the end of the LRU list and put it to
-the free list if found. */
-
-ibool
-buf_LRU_search_and_free_block(
-/*==========================*/
- /* out: TRUE if freed */
- ulint n_iterations); /* in: how many times this has been called
- repeatedly without result: a high value
- means that we should search farther */
/**************************************************************************
Validates the LRU list. */
diff --git a/innobase/include/data0data.h b/innobase/include/data0data.h
index e0fb06e5018..2ec94a9517a 100644
--- a/innobase/include/data0data.h
+++ b/innobase/include/data0data.h
@@ -262,6 +262,14 @@ dtuple_set_types_binary(
/*====================*/
dtuple_t* tuple, /* in: data tuple */
ulint n); /* in: number of fields to set */
+/**************************************************************************
+Checks if a dtuple contains an SQL null value. */
+UNIV_INLINE
+ibool
+dtuple_contains_null(
+/*=================*/
+ /* out: TRUE if some field is SQL null */
+ dtuple_t* tuple); /* in: dtuple */
/**************************************************************
Checks that a data field is typed. Asserts an error if not. */
@@ -453,8 +461,6 @@ struct dfield_struct{
void* data; /* pointer to data */
ulint len; /* data length; UNIV_SQL_NULL if SQL null; */
dtype_t type; /* type of data */
- ulint col_no; /* when building index entries, the column
- number can be stored here */
};
struct dtuple_struct {
diff --git a/innobase/include/data0data.ic b/innobase/include/data0data.ic
index d356664df21..def80d3f430 100644
--- a/innobase/include/data0data.ic
+++ b/innobase/include/data0data.ic
@@ -406,3 +406,28 @@ data_write_sql_null(
data[j] = '\0';
}
}
+
+/**************************************************************************
+Checks if a dtuple contains an SQL null value. */
+UNIV_INLINE
+ibool
+dtuple_contains_null(
+/*=================*/
+ /* out: TRUE if some field is SQL null */
+ dtuple_t* tuple) /* in: dtuple */
+{
+ ulint n;
+ ulint i;
+
+ n = dtuple_get_n_fields(tuple);
+
+ for (i = 0; i < n; i++) {
+ if (dfield_get_len(dtuple_get_nth_field(tuple, i))
+ == UNIV_SQL_NULL) {
+
+ return(TRUE);
+ }
+ }
+
+ return(FALSE);
+}
diff --git a/innobase/include/data0type.h b/innobase/include/data0type.h
index b53a70a8909..4da686bf2e1 100644
--- a/innobase/include/data0type.h
+++ b/innobase/include/data0type.h
@@ -18,14 +18,16 @@ typedef struct dtype_struct dtype_t;
data type */
extern dtype_t* dtype_binary;
-/* Data main types of SQL data; NOTE! character data types requiring
-collation transformation must have the smallest codes! All codes must be
-less than 256! */
+/* Data main types of SQL data */
#define DATA_VARCHAR 1 /* character varying */
#define DATA_CHAR 2 /* fixed length character */
#define DATA_FIXBINARY 3 /* binary string of fixed length */
#define DATA_BINARY 4 /* binary string */
-#define DATA_BLOB 5 /* binary large object */
+#define DATA_BLOB 5 /* binary large object, or a TEXT type; if
+ prtype & DATA_NONLATIN1 != 0 the data must
+ be compared by MySQL as a whole field; if
+ prtype & DATA_BINARY_TYPE == 0, then this is
+ actually a TEXT column */
#define DATA_INT 6 /* integer: can be any size 1 - 8 bytes */
#define DATA_SYS_CHILD 7 /* address of the child page in node pointer */
#define DATA_SYS 8 /* system column */
@@ -34,35 +36,55 @@ binary strings */
#define DATA_FLOAT 9
#define DATA_DOUBLE 10
#define DATA_DECIMAL 11 /* decimal number stored as an ASCII string */
-#define DATA_VARMYSQL 12 /* data types for which comparisons must be */
-#define DATA_MYSQL 13 /* made by MySQL */
-#define DATA_ERROR 111 /* error value */
-#define DATA_MTYPE_MAX 255
+#define DATA_VARMYSQL 12 /* non-latin1 varying length char */
+#define DATA_MYSQL 13 /* non-latin1 fixed length char */
+#define DATA_MTYPE_MAX 63 /* dtype_store_for_order_and_null_size()
+ requires the values are <= 63 */
/*-------------------------------------------*/
-/* Precise data types for system columns; NOTE: the values must run
-from 0 up in the order given! All codes must be less than 256! */
+/* In the lowest byte in the precise type we store the MySQL type code
+(not applicable for system columns). */
+
+#define DATA_ENGLISH 4 /* English language character string: this
+ is a relic from pre-MySQL time and only used
+ for InnoDB's own system tables */
+#define DATA_ERROR 111 /* another relic from pre-MySQL time */
+
+#define DATA_MYSQL_TYPE_MASK 255 /* AND with this mask to extract the MySQL
+ type from the precise type */
+
+/* Precise data types for system columns and the length of those columns;
+NOTE: the values must run from 0 up in the order given! All codes must
+be less than 256 */
#define DATA_ROW_ID 0 /* row id: a dulint */
#define DATA_ROW_ID_LEN 6 /* stored length for row id */
+
#define DATA_TRX_ID 1 /* transaction id: 6 bytes */
#define DATA_TRX_ID_LEN 6
+
#define DATA_ROLL_PTR 2 /* rollback data pointer: 7 bytes */
#define DATA_ROLL_PTR_LEN 7
+
#define DATA_MIX_ID 3 /* mixed index label: a dulint, stored in
a row in a compressed form */
#define DATA_MIX_ID_LEN 9 /* maximum stored length for mix id (in a
compressed dulint form) */
#define DATA_N_SYS_COLS 4 /* number of system columns defined above */
+/*-------------------------------------------*/
+/* Flags ORed to the precise data type */
#define DATA_NOT_NULL 256 /* this is ORed to the precise type when
the column is declared as NOT NULL */
#define DATA_UNSIGNED 512 /* this id ORed to the precise type when
we have an unsigned integer type */
+#define DATA_BINARY_TYPE 1024 /* if the data type is a binary character
+ string, this is ORed to the precise type:
+ this only holds for tables created with
+ >= MySQL-4.0.14 */
+#define DATA_NONLATIN1 2048 /* if the data type is a DATA_BLOB (actually
+ TEXT) of a non-latin1 type, this is ORed to
+ the precise type: this only holds for tables
+ created with >= MySQL-4.0.14 */
/*-------------------------------------------*/
-/* Precise types of a char or varchar data. All codes must be less than 256! */
-#define DATA_ENGLISH 4 /* English language character string */
-#define DATA_FINNISH 5 /* Finnish */
-#define DATA_PRTYPE_MAX 255
-
/* This many bytes we need to store the type information affecting the
alphabetical order for a single field and decide the storage size of an
SQL null*/
@@ -123,7 +145,7 @@ dtype_get_pad_char(
/*===============*/
/* out: padding character code, or
ULINT_UNDEFINED if no padding specified */
- dtype_t* type); /* in: typeumn */
+ dtype_t* type); /* in: type */
/***************************************************************************
Returns the size of a fixed size data type, 0 if not a fixed size type. */
UNIV_INLINE
@@ -150,24 +172,24 @@ dtype_is_fixed_size(
/* out: TRUE if fixed size */
dtype_t* type); /* in: type */
/**************************************************************************
-Stores to a type the information which determines its alphabetical
-ordering. */
+Stores for a type the information which determines its alphabetical ordering
+and the storage size of an SQL NULL value. */
UNIV_INLINE
void
dtype_store_for_order_and_null_size(
/*================================*/
byte* buf, /* in: buffer for DATA_ORDER_NULL_TYPE_BUF_SIZE
- bytes */
+ bytes where we store the info */
dtype_t* type); /* in: type struct */
/**************************************************************************
-Reads of a type the stored information which determines its alphabetical
-ordering. */
+Reads to a type the stored information which determines its alphabetical
+ordering and the storage size of an SQL NULL value. */
UNIV_INLINE
void
dtype_read_for_order_and_null_size(
/*===============================*/
dtype_t* type, /* in: type struct */
- byte* buf); /* in: buffer for type order info */
+ byte* buf); /* in: buffer for the stored order info */
/*************************************************************************
Validates a data type structure. */
diff --git a/innobase/include/data0type.ic b/innobase/include/data0type.ic
index d82d976d076..ddd0b0ae8cc 100644
--- a/innobase/include/data0type.ic
+++ b/innobase/include/data0type.ic
@@ -110,7 +110,9 @@ dtype_get_pad_char(
if (type->mtype == DATA_CHAR
|| type->mtype == DATA_VARCHAR
|| type->mtype == DATA_BINARY
- || type->mtype == DATA_FIXBINARY) {
+ || type->mtype == DATA_FIXBINARY
+ || type->mtype == DATA_MYSQL
+ || type->mtype == DATA_VARMYSQL) {
/* Space is the padding character for all char and binary
strings */
@@ -124,39 +126,56 @@ dtype_get_pad_char(
}
/**************************************************************************
-Stores to a type the information which determines its alphabetical
-ordering. */
+Stores for a type the information which determines its alphabetical ordering
+and the storage size of an SQL NULL value. */
UNIV_INLINE
void
dtype_store_for_order_and_null_size(
/*================================*/
byte* buf, /* in: buffer for DATA_ORDER_NULL_TYPE_BUF_SIZE
- bytes */
+ bytes where we store the info */
dtype_t* type) /* in: type struct */
{
ut_ad(4 == DATA_ORDER_NULL_TYPE_BUF_SIZE);
buf[0] = (byte)(type->mtype & 0xFF);
+
+ if (type->prtype & DATA_BINARY_TYPE) {
+ buf[0] = buf[0] | 128;
+ }
+
+ if (type->prtype & DATA_NONLATIN1) {
+ buf[0] = buf[0] | 64;
+ }
+
buf[1] = (byte)(type->prtype & 0xFF);
mach_write_to_2(buf + 2, type->len & 0xFFFF);
}
/**************************************************************************
-Reads of a type the stored information which determines its alphabetical
-ordering. */
+Reads to a type the stored information which determines its alphabetical
+ordering and the storage size of an SQL NULL value. */
UNIV_INLINE
void
dtype_read_for_order_and_null_size(
/*===============================*/
dtype_t* type, /* in: type struct */
- byte* buf) /* in: buffer for type order info */
+ byte* buf) /* in: buffer for stored type order info */
{
ut_ad(4 == DATA_ORDER_NULL_TYPE_BUF_SIZE);
- type->mtype = buf[0];
+ type->mtype = buf[0] & 63;
type->prtype = buf[1];
+ if (buf[0] & 128) {
+ type->prtype = type->prtype | DATA_BINARY_TYPE;
+ }
+
+ if (buf[0] & 64) {
+ type->prtype = type->prtype | DATA_NONLATIN1;
+ }
+
type->len = mach_read_from_2(buf + 2);
}
diff --git a/innobase/include/db0err.h b/innobase/include/db0err.h
index df74b06dfc0..854b9794c00 100644
--- a/innobase/include/db0err.h
+++ b/innobase/include/db0err.h
@@ -41,9 +41,14 @@ Created 5/24/1996 Heikki Tuuri
which is referenced */
#define DB_CANNOT_ADD_CONSTRAINT 38 /* adding a foreign key constraint
to a table failed */
-
-#define DB_COL_APPEARS_TWICE_IN_INDEX 40
-
+#define DB_CORRUPTION 39 /* data structure corruption noticed */
+#define DB_COL_APPEARS_TWICE_IN_INDEX 40 /* InnoDB cannot handle an index
+ where same column appears twice */
+#define DB_CANNOT_DROP_CONSTRAINT 41 /* dropping a foreign key constraint
+ from a table failed */
+#define DB_NO_SAVEPOINT 42 /* no savepoint exists with the given
+ name */
+
/* The following are partial failure codes */
#define DB_FAIL 1000
#define DB_OVERFLOW 1001
diff --git a/innobase/include/dict0dict.h b/innobase/include/dict0dict.h
index 832654d2666..b5ec5381db2 100644
--- a/innobase/include/dict0dict.h
+++ b/innobase/include/dict0dict.h
@@ -26,6 +26,18 @@ Created 1/8/1996 Heikki Tuuri
#include "ut0byte.h"
#include "trx0types.h"
+/*************************************************************************
+Accepts a specified string. Comparisons are case-insensitive. */
+
+char*
+dict_accept(
+/*========*/
+ /* out: if string was accepted, the pointer
+ is moved after that, else ptr is returned */
+ char* ptr, /* in: scan from this */
+ const char* string,/* in: accept only this string as the next
+ non-whitespace string */
+ ibool* success);/* out: TRUE if accepted */
/************************************************************************
Decrements the count of open MySQL handles to a table. */
@@ -114,13 +126,20 @@ dict_table_autoinc_get(
/* out: value for a new row, or 0 */
dict_table_t* table); /* in: table */
/************************************************************************
-Reads the autoinc counter value, 0 if not yet initialized. Does not
-increment the counter. */
+Decrements the autoinc counter value by 1. */
+
+void
+dict_table_autoinc_decrement(
+/*=========================*/
+ dict_table_t* table); /* in: table */
+/************************************************************************
+Reads the next autoinc value (== autoinc counter value), 0 if not yet
+initialized. */
ib_longlong
dict_table_autoinc_read(
/*====================*/
- /* out: value of the counter */
+ /* out: value for a new row, or 0 */
dict_table_t* table); /* in: table */
/************************************************************************
Peeks the autoinc counter value, 0 if not yet initialized. Does not
@@ -200,6 +219,24 @@ dict_create_foreign_constraints(
char* name); /* in: table full name in the normalized form
database_name/table_name */
/**************************************************************************
+Parses the CONSTRAINT id's to be dropped in an ALTER TABLE statement. */
+
+ulint
+dict_foreign_parse_drop_constraints(
+/*================================*/
+ /* out: DB_SUCCESS or
+ DB_CANNOT_DROP_CONSTRAINT if
+ syntax error or the constraint
+ id does not match */
+ mem_heap_t* heap, /* in: heap from which we can
+ allocate memory */
+ trx_t* trx, /* in: transaction */
+ dict_table_t* table, /* in: table */
+ ulint* n, /* out: number of constraints
+ to drop */
+ char*** constraints_to_drop); /* out: id's of the
+ constraints to drop */
+/**************************************************************************
Returns a table object and memoryfixes it. NOTE! This is a high-level
function to be used mainly from outside the 'dict' directory. Inside this
directory dict_table_get_low is usually the appropriate function. */
@@ -314,6 +351,16 @@ dict_print_info_on_foreign_keys(
char* str, /* in/out: pointer to a string */
ulint len, /* in: space in str available for info */
dict_table_t* table); /* in: table */
+/**************************************************************************
+Sprintfs to a string info on a foreign key of a table in a format suitable
+for CREATE TABLE. */
+
+char*
+dict_print_info_on_foreign_key_in_create_format(
+/*============================================*/
+ /* out: how far in buf we printed */
+ dict_foreign_t* foreign,/* in: foreign key constraint */
+ char* buf); /* in: buffer of at least 5000 bytes */
/************************************************************************
Gets the first index on the table (the clustered index). */
UNIV_INLINE
@@ -522,6 +569,29 @@ dict_index_get_nth_col_pos(
dict_index_t* index, /* in: index */
ulint n); /* in: column number */
/************************************************************************
+Returns TRUE if the index contains a column or a prefix of that column. */
+
+ibool
+dict_index_contains_col_or_prefix(
+/*==============================*/
+ /* out: TRUE if contains the column or its
+ prefix */
+ dict_index_t* index, /* in: index */
+ ulint n); /* in: column number */
+/************************************************************************
+Looks for a matching field in an index. The column and the prefix len has
+to be the same. */
+
+ulint
+dict_index_get_nth_field_pos(
+/*=========================*/
+ /* out: position in internal representation
+ of the index; if not contained, returns
+ ULINT_UNDEFINED */
+ dict_index_t* index, /* in: index from which to search */
+ dict_index_t* index2, /* in: index */
+ ulint n); /* in: field number in index2 */
+/************************************************************************
Looks for column n position in the clustered index. */
ulint
@@ -789,9 +859,17 @@ void
dict_mutex_exit_for_mysql(void);
/*===========================*/
+/* The following len must be at least 10000 bytes! */
+#define DICT_FOREIGN_ERR_BUF_LEN 10000
+
+/* Buffers for storing detailed information about the latest foreign key
+and unique key errors */
+extern char* dict_foreign_err_buf;
+extern char* dict_unique_err_buf;
+extern mutex_t dict_foreign_err_mutex; /* mutex protecting the buffers */
extern dict_sys_t* dict_sys; /* the dictionary system */
-extern rw_lock_t dict_foreign_key_check_lock;
+extern rw_lock_t dict_operation_lock;
/* Dictionary system struct */
struct dict_sys_struct{
diff --git a/innobase/include/dict0dict.ic b/innobase/include/dict0dict.ic
index 821465f96a8..c5982c162a7 100644
--- a/innobase/include/dict0dict.ic
+++ b/innobase/include/dict0dict.ic
@@ -106,7 +106,7 @@ dict_table_get_n_sys_cols(
/*======================*/
/* out: number of system (e.g.,
ROW_ID) columns of a table */
- dict_table_t* table) /* in: table */
+ dict_table_t* table __attribute__((unused))) /* in: table */
{
ut_ad(table);
ut_ad(table->magic_n == DICT_TABLE_MAGIC_N);
@@ -203,7 +203,6 @@ dict_index_get_n_fields(
{
ut_ad(index);
ut_ad(index->magic_n == DICT_INDEX_MAGIC_N);
- ut_ad(index->cached);
return(index->n_fields);
}
diff --git a/innobase/include/dict0mem.h b/innobase/include/dict0mem.h
index cc27f2bad12..03dc913a7c9 100644
--- a/innobase/include/dict0mem.h
+++ b/innobase/include/dict0mem.h
@@ -111,10 +111,13 @@ by the column name may be released only after publishing the index. */
void
dict_mem_index_add_field(
/*=====================*/
- dict_index_t* index, /* in: index */
- char* name, /* in: column name */
- ulint order); /* in: order criterion; 0 means an ascending
- order */
+ dict_index_t* index, /* in: index */
+ char* name, /* in: column name */
+ ulint order, /* in: order criterion; 0 means an
+ ascending order */
+ ulint prefix_len); /* in: 0 or the column prefix length
+ in a MySQL index like
+ INDEX (textcol(25)) */
/**************************************************************************
Frees an index memory object. */
@@ -158,12 +161,18 @@ struct dict_col_struct{
in some of the functions below */
};
+#define DICT_MAX_COL_PREFIX_LEN 512
+
/* Data structure for a field in an index */
struct dict_field_struct{
- dict_col_t* col; /* pointer to the table column */
- char* name; /* name of the column */
- ulint order; /* flags for ordering this field:
- DICT_DESCEND, ... */
+ dict_col_t* col; /* pointer to the table column */
+ char* name; /* name of the column */
+ ulint order; /* flags for ordering this field:
+ DICT_DESCEND, ... */
+ ulint prefix_len; /* 0 or the length of the column
+ prefix in a MySQL index of type, e.g.,
+ INDEX (textcol(25)); must be smaller
+ than DICT_MAX_COL_PREFIX_LEN */
};
/* Data structure for an index tree */
@@ -280,8 +289,15 @@ struct dict_foreign_struct{
table */
};
+/* The flags for ON_UPDATE and ON_DELETE can be ORed; the default is that
+a foreign key constraint is enforced, therefore RESTRICT just means no flag */
#define DICT_FOREIGN_ON_DELETE_CASCADE 1
#define DICT_FOREIGN_ON_DELETE_SET_NULL 2
+#define DICT_FOREIGN_ON_UPDATE_CASCADE 4
+#define DICT_FOREIGN_ON_UPDATE_SET_NULL 8
+#define DICT_FOREIGN_ON_DELETE_NO_ACTION 16
+#define DICT_FOREIGN_ON_UPDATE_NO_ACTION 32
+
#define DICT_INDEX_MAGIC_N 76789786
@@ -333,6 +349,13 @@ struct dict_table_struct{
space from the lock heap of the trx:
otherwise the lock heap would grow rapidly
if we do a large insert from a select */
+ dulint query_cache_inv_trx_id;
+ /* transactions whose trx id < than this
+ number are not allowed to store to the MySQL
+ query cache or retrieve from it; when a trx
+ with undo logs commits, it sets this to the
+ value of the trx id counter for the tables it
+ had an IX lock on */
UT_LIST_BASE_NODE_T(lock_t)
locks; /* list of locks on the table */
/*----------------------*/
diff --git a/innobase/include/dyn0dyn.h b/innobase/include/dyn0dyn.h
index cca302994c1..501fde05e90 100644
--- a/innobase/include/dyn0dyn.h
+++ b/innobase/include/dyn0dyn.h
@@ -19,7 +19,6 @@ typedef dyn_block_t dyn_array_t;
/* This is the initial 'payload' size of a dynamic array;
this must be > MLOG_BUF_MARGIN + 30! */
-
#define DYN_ARRAY_DATA_SIZE 512
/*************************************************************************
diff --git a/innobase/include/fil0fil.h b/innobase/include/fil0fil.h
index 63e20221c16..ad3149f0b36 100644
--- a/innobase/include/fil0fil.h
+++ b/innobase/include/fil0fil.h
@@ -43,7 +43,10 @@ struct fil_addr_struct{
extern fil_addr_t fil_addr_null;
/* The byte offsets on a file page for various variables */
-#define FIL_PAGE_SPACE 0 /* space id the page belongs to */
+#define FIL_PAGE_SPACE_OR_CHKSUM 0 /* in < MySQL-4.0.14 space id the
+ page belongs to (== 0) but in later
+ versions the 'new' checksum of the
+ page */
#define FIL_PAGE_OFFSET 4 /* page offset inside space */
#define FIL_PAGE_PREV 8 /* if there is a 'natural' predecessor
of the page, its offset */
@@ -64,7 +67,7 @@ extern fil_addr_t fil_addr_null;
#define FIL_PAGE_DATA 38 /* start of the data on the page */
/* File page trailer */
-#define FIL_PAGE_END_LSN 8 /* the low 4 bytes of this are used
+#define FIL_PAGE_END_LSN_OLD_CHKSUM 8 /* the low 4 bytes of this are used
to store the page checksum, the
last 4 bytes should be identical
to the last 4 bytes of FIL_PAGE_LSN */
@@ -73,6 +76,8 @@ extern fil_addr_t fil_addr_null;
/* File page types */
#define FIL_PAGE_INDEX 17855
#define FIL_PAGE_UNDO_LOG 2
+#define FIL_PAGE_INODE 3
+#define FIL_PAGE_IBUF_FREE_LIST 4
/* Space types */
#define FIL_TABLESPACE 501
@@ -381,6 +386,14 @@ fil_space_release_free_extents(
/*===========================*/
ulint id, /* in: space id */
ulint n_reserved); /* in: how many one reserved */
+/***********************************************************************
+Gets the number of reserved extents. If the database is silent, this number
+should be zero. */
+
+ulint
+fil_space_get_n_reserved_extents(
+/*=============================*/
+ ulint id); /* in: space id */
typedef struct fil_space_struct fil_space_t;
diff --git a/innobase/include/ha0ha.ic b/innobase/include/ha0ha.ic
index 1aad7d5a36f..761bc3b20de 100644
--- a/innobase/include/ha0ha.ic
+++ b/innobase/include/ha0ha.ic
@@ -49,7 +49,7 @@ ha_node_t*
ha_chain_get_next(
/*==============*/
/* out: next node, NULL if none */
- hash_table_t* table, /* in: hash table */
+ hash_table_t* table __attribute__((unused)), /* in: hash table */
ha_node_t* node) /* in: hash chain node */
{
ut_ad(table);
diff --git a/innobase/include/lock0lock.h b/innobase/include/lock0lock.h
index 80afba97416..5608ba020b7 100644
--- a/innobase/include/lock0lock.h
+++ b/innobase/include/lock0lock.h
@@ -292,16 +292,12 @@ lock_sec_rec_modify_check_and_lock(
dict_index_t* index, /* in: secondary index */
que_thr_t* thr); /* in: query thread */
/*************************************************************************
-Checks if locks of other transactions prevent an immediate read, or passing
-over by a read cursor, of a clustered index record. If they do, first tests
-if the query thread should anyway be suspended for some reason; if not, then
-puts the transaction and the query thread to the lock wait state and inserts a
-waiting request for a record lock to the lock queue. Sets the requested mode
-lock on the record. */
+Like the counterpart for a clustered index below, but now we read a
+secondary index record. */
ulint
-lock_clust_rec_read_check_and_lock(
-/*===============================*/
+lock_sec_rec_read_check_and_lock(
+/*=============================*/
/* out: DB_SUCCESS, DB_LOCK_WAIT,
DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */
ulint flags, /* in: if BTR_NO_LOCKING_FLAG bit is set,
@@ -309,18 +305,24 @@ lock_clust_rec_read_check_and_lock(
rec_t* rec, /* in: user record or page supremum record
which should be read or passed over by a read
cursor */
- dict_index_t* index, /* in: clustered index */
+ dict_index_t* index, /* in: secondary index */
ulint mode, /* in: mode of the lock which the read cursor
should set on records: LOCK_S or LOCK_X; the
latter is possible in SELECT FOR UPDATE */
+ ulint gap_mode,/* in: LOCK_ORDINARY, LOCK_GAP, or
+ LOCK_REC_NOT_GAP */
que_thr_t* thr); /* in: query thread */
/*************************************************************************
-Like the counterpart for a clustered index above, but now we read a
-secondary index record. */
+Checks if locks of other transactions prevent an immediate read, or passing
+over by a read cursor, of a clustered index record. If they do, first tests
+if the query thread should anyway be suspended for some reason; if not, then
+puts the transaction and the query thread to the lock wait state and inserts a
+waiting request for a record lock to the lock queue. Sets the requested mode
+lock on the record. */
ulint
-lock_sec_rec_read_check_and_lock(
-/*=============================*/
+lock_clust_rec_read_check_and_lock(
+/*===============================*/
/* out: DB_SUCCESS, DB_LOCK_WAIT,
DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */
ulint flags, /* in: if BTR_NO_LOCKING_FLAG bit is set,
@@ -328,10 +330,12 @@ lock_sec_rec_read_check_and_lock(
rec_t* rec, /* in: user record or page supremum record
which should be read or passed over by a read
cursor */
- dict_index_t* index, /* in: secondary index */
+ dict_index_t* index, /* in: clustered index */
ulint mode, /* in: mode of the lock which the read cursor
should set on records: LOCK_S or LOCK_X; the
latter is possible in SELECT FOR UPDATE */
+ ulint gap_mode,/* in: LOCK_ORDINARY, LOCK_GAP, or
+ LOCK_REC_NOT_GAP */
que_thr_t* thr); /* in: query thread */
/*************************************************************************
Checks that a record is seen in a consistent read. */
@@ -446,6 +450,18 @@ lock_rec_get_mutex_for_addr(
ulint space, /* in: space id */
ulint page_no);/* in: page number */
/*************************************************************************
+Checks that a transaction id is sensible, i.e., not in the future. */
+
+ibool
+lock_check_trx_id_sanity(
+/*=====================*/
+ /* out: TRUE if ok */
+ dulint trx_id, /* in: trx id */
+ rec_t* rec, /* in: user record */
+ dict_index_t* index, /* in: clustered index */
+ ibool has_kernel_mutex);/* in: TRUE if the caller owns the
+ kernel mutex */
+/*************************************************************************
Validates the lock queue on a single record. */
ibool
@@ -509,6 +525,7 @@ lock_validate(void);
extern lock_sys_t* lock_sys;
/* Lock modes and types */
+/* Basic modes */
#define LOCK_NONE 0 /* this flag is used elsewhere to note
consistent read */
#define LOCK_IS 2 /* intention shared */
@@ -519,15 +536,20 @@ extern lock_sys_t* lock_sys;
in an exclusive mode */
#define LOCK_MODE_MASK 0xF /* mask used to extract mode from the
type_mode field in a lock */
+/* Lock types */
#define LOCK_TABLE 16 /* these type values should be so high that */
#define LOCK_REC 32 /* they can be ORed to the lock mode */
#define LOCK_TYPE_MASK 0xF0 /* mask used to extract lock type from the
type_mode field in a lock */
+/* Waiting lock flag */
#define LOCK_WAIT 256 /* this wait bit should be so high that
it can be ORed to the lock mode and type;
when this bit is set, it means that the
lock has not yet been granted, it is just
waiting for its turn in the wait queue */
+/* Precise modes */
+#define LOCK_ORDINARY 0 /* this flag denotes an ordinary next-key lock
+ in contrast to LOCK_GAP or LOCK_REC_NOT_GAP */
#define LOCK_GAP 512 /* this gap bit should be so high that
it can be ORed to the other flags;
when this bit is set, it means that the
@@ -537,7 +559,23 @@ extern lock_sys_t* lock_sys;
the bit is set; locks of this type are created
when records are removed from the index chain
of records */
-
+#define LOCK_REC_NOT_GAP 1024 /* this bit means that the lock is only on
+ the index record and does NOT block inserts
+ to the gap before the index record; this is
+ used in the case when we retrieve a record
+ with a unique key, and is also used in
+ locking plain SELECTs (not part of UPDATE
+ or DELETE) when the user has set the READ
+ COMMITTED isolation level */
+#define LOCK_INSERT_INTENTION 2048 /* this bit is set when we place a waiting
+ gap type record lock request in order to let
+ an insert of an index record to wait until
+ there are no conflicting locks by other
+ transactions on the gap; note that this flag
+ remains set when the waiting lock is granted,
+ or if the lock is inherited to a neighboring
+ record */
+
/* When lock bits are reset, the following flags are available: */
#define LOCK_RELEASE_WAIT 1
#define LOCK_NOT_RELEASE_WAIT 2
diff --git a/innobase/include/log0log.h b/innobase/include/log0log.h
index f200371de9d..24ec28a56e6 100644
--- a/innobase/include/log0log.h
+++ b/innobase/include/log0log.h
@@ -20,7 +20,7 @@ typedef struct log_group_struct log_group_t;
extern ibool log_do_write;
extern ibool log_debug_writes;
-/* Wait modes for log_flush_up_to */
+/* Wait modes for log_write_up_to */
#define LOG_NO_WAIT 91
#define LOG_WAIT_ONE_GROUP 92
#define LOG_WAIT_ALL_GROUPS 93
@@ -157,26 +157,27 @@ log_io_complete(
/*============*/
log_group_t* group); /* in: log group */
/**********************************************************
-Flushes the log files to the disk, using, for example, the Unix fsync.
-This function does the flush even if the user has set
-srv_flush_log_at_trx_commit = FALSE. */
-
-void
-log_flush_to_disk(void);
-/*===================*/
-/**********************************************************
This function is called, e.g., when a transaction wants to commit. It checks
-that the log has been flushed to disk up to the last log entry written by the
-transaction. If there is a flush running, it waits and checks if the flush
-flushed enough. If not, starts a new flush. */
+that the log has been written to the log file up to the last log entry written
+by the transaction. If there is a flush running, it waits and checks if the
+flush flushed enough. If not, starts a new flush. */
void
-log_flush_up_to(
+log_write_up_to(
/*============*/
dulint lsn, /* in: log sequence number up to which the log should
- be flushed, ut_dulint_max if not specified */
- ulint wait); /* in: LOG_NO_WAIT, LOG_WAIT_ONE_GROUP,
+ be written, ut_dulint_max if not specified */
+ ulint wait, /* in: LOG_NO_WAIT, LOG_WAIT_ONE_GROUP,
or LOG_WAIT_ALL_GROUPS */
+ ibool flush_to_disk);
+ /* in: TRUE if we want the written log also to be
+ flushed to disk */
+/********************************************************************
+Does a syncronous flush of the log buffer to disk. */
+
+void
+log_buffer_flush_to_disk(void);
+/*==========================*/
/********************************************************************
Advances the smallest lsn for which there are unflushed dirty blocks in the
buffer pool and also may make a new checkpoint. NOTE: this function may only
@@ -512,6 +513,15 @@ log_print(
/*======*/
char* buf, /* in/out: buffer where to print */
char* buf_end);/* in: buffer end */
+/**********************************************************
+Peeks the current lsn. */
+
+ibool
+log_peek_lsn(
+/*=========*/
+ /* out: TRUE if success, FALSE if could not get the
+ log system mutex */
+ dulint* lsn); /* out: if returns TRUE, current lsn is here */
/**************************************************************************
Refreshes the statistics used to print per-second averages. */
@@ -741,27 +751,37 @@ struct log_struct{
be advanced, it is enough that the
write i/o has been completed for all
log groups */
- dulint flush_lsn; /* end lsn for the current flush */
- ulint flush_end_offset;/* the data in buffer has been flushed
+ dulint write_lsn; /* end lsn for the current running
+ write */
+ ulint write_end_offset;/* the data in buffer has been written
up to this offset when the current
- flush ends: this field will then
+ write ends: this field will then
be copied to buf_next_to_write */
- ulint n_pending_writes;/* number of currently pending flush
- writes */
+ dulint current_flush_lsn;/* end lsn for the current running
+ write + flush operation */
+ dulint flushed_to_disk_lsn;
+ /* how far we have written the log
+ AND flushed to disk */
+ ulint n_pending_writes;/* number of currently pending flushes
+ or writes */
+ /* NOTE on the 'flush' in names of the fields below: starting from
+ 4.0.14, we separate the write of the log file and the actual fsync()
+ or other method to flush it to disk. The names below shhould really
+ be 'flush_or_write'! */
os_event_t no_flush_event; /* this event is in the reset state
- when a flush is running; a thread
- should wait for this without owning
- the log mutex, but NOTE that to set or
- reset this event, the thread MUST own
- the log mutex! */
+ when a flush or a write is running;
+ a thread should wait for this without
+ owning the log mutex, but NOTE that
+ to set or reset this event, the
+ thread MUST own the log mutex! */
ibool one_flushed; /* during a flush, this is first FALSE
and becomes TRUE when one log group
- has been flushed */
+ has been written or flushed */
os_event_t one_flushed_event;/* this event is reset when the
- flush has not yet completed for any
- log group; e.g., this means that a
- transaction has been committed when
- this is set; a thread should wait
+ flush or write has not yet completed
+ for any log group; e.g., this means
+ that a transaction has been committed
+ when this is set; a thread should wait
for this without owning the log mutex,
but NOTE that to set or reset this
event, the thread MUST own the log
@@ -774,6 +794,11 @@ struct log_struct{
called */
/* Fields involved in checkpoints */
+ ulint log_group_capacity; /* capacity of the log group; if
+ the checkpoint age exceeds this, it is
+ a serious error because it is possible
+ we will then overwrite log and spoil
+ crash recovery */
ulint max_modified_age_async;
/* when this recommended value for lsn
- buf_pool_get_oldest_modification()
diff --git a/innobase/include/log0recv.h b/innobase/include/log0recv.h
index baa2ba50c7d..e5a5bc05563 100644
--- a/innobase/include/log0recv.h
+++ b/innobase/include/log0recv.h
@@ -333,7 +333,10 @@ extern ibool recv_recovery_on;
extern ibool recv_no_ibuf_operations;
extern ibool recv_needed_recovery;
+extern ibool recv_lsn_checks_on;
+
extern ibool recv_is_making_a_backup;
+extern ulint recv_max_parsed_page_no;
/* Size of the parsing buffer; it must accommodate RECV_SCAN_SIZE many
times! */
diff --git a/innobase/include/mem0mem.h b/innobase/include/mem0mem.h
index bfd25f5bdbe..9ab3b2cd754 100644
--- a/innobase/include/mem0mem.h
+++ b/innobase/include/mem0mem.h
@@ -127,16 +127,18 @@ mem_heap_create_func(
ulint line /* in: line where created */
);
/*********************************************************************
-NOTE: Use the corresponding macro instead of this function.
-Frees the space occupied by a memory heap. */
+NOTE: Use the corresponding macro instead of this function. Frees the space
+occupied by a memory heap. In the debug version erases the heap memory
+blocks. */
UNIV_INLINE
void
mem_heap_free_func(
/*===============*/
- mem_heap_t* heap, /* in, own: heap to be freed */
- char* file_name, /* in: file name where freed */
- ulint line /* in: line where freed */
-);
+ mem_heap_t* heap, /* in, own: heap to be freed */
+ char* file_name __attribute__((unused)),
+ /* in: file name where freed */
+ ulint line __attribute__((unused)));
+ /* in: line where freed */
/*******************************************************************
Allocates n bytes of memory from a memory heap. */
UNIV_INLINE
diff --git a/innobase/include/mem0mem.ic b/innobase/include/mem0mem.ic
index a7abb93d91d..1ff8c66e80a 100644
--- a/innobase/include/mem0mem.ic
+++ b/innobase/include/mem0mem.ic
@@ -440,9 +440,10 @@ void
mem_heap_free_func(
/*===============*/
mem_heap_t* heap, /* in, own: heap to be freed */
- char* file_name, /* in: file name where freed */
- ulint line /* in: line where freed */
- )
+ char* file_name __attribute__((unused)),
+ /* in: file name where freed */
+ ulint line __attribute__((unused)))
+ /* in: line where freed */
{
mem_block_t* block;
mem_block_t* prev_block;
diff --git a/innobase/include/os0file.h b/innobase/include/os0file.h
index d65c7fd47e3..5c52f0e92bf 100644
--- a/innobase/include/os0file.h
+++ b/innobase/include/os0file.h
@@ -111,6 +111,7 @@ log. */
#define OS_WIN31 1
#define OS_WIN95 2
#define OS_WINNT 3
+#define OS_WIN2000 4
extern ulint os_n_file_reads;
extern ulint os_n_file_writes;
@@ -122,7 +123,7 @@ Gets the operating system version. Currently works only on Windows. */
ulint
os_get_os_version(void);
/*===================*/
- /* out: OS_WIN95, OS_WIN31, OS_WINNT (2000 == NT) */
+ /* out: OS_WIN95, OS_WIN31, OS_WINNT, or OS_WIN2000 */
/********************************************************************
Creates the seek mutexes used in positioned reads and writes. */
@@ -145,6 +146,21 @@ os_file_create_simple(
ulint access_type,/* in: OS_FILE_READ_ONLY or OS_FILE_READ_WRITE */
ibool* success);/* out: TRUE if succeed, FALSE if error */
/********************************************************************
+A simple function to open or create a file. */
+
+os_file_t
+os_file_create_simple_no_error_handling(
+/*====================================*/
+ /* out, own: handle to the file, not defined if error,
+ error number can be retrieved with os_get_last_error */
+ char* name, /* in: name of the file or path as a null-terminated
+ string */
+ ulint create_mode,/* in: OS_FILE_OPEN if an existing file is opened
+ (if does not exist, error), or OS_FILE_CREATE if a new
+ file is created (if exists, error) */
+ ulint access_type,/* in: OS_FILE_READ_ONLY or OS_FILE_READ_WRITE */
+ ibool* success);/* out: TRUE if succeed, FALSE if error */
+/********************************************************************
Opens an existing file or creates a new. */
os_file_t
@@ -159,7 +175,11 @@ os_file_create(
file is created (if exists, error), OS_FILE_OVERWRITE
if a new file is created or an old overwritten */
ulint purpose,/* in: OS_FILE_AIO, if asynchronous, non-buffered i/o
- is desired, OS_FILE_NORMAL, if any normal file */
+ is desired, OS_FILE_NORMAL, if any normal file;
+ NOTE that it also depends on type, os_aio_.. and srv_..
+ variables whether we really use async i/o or
+ unbuffered i/o: look in the function source code for
+ the exact rules */
ulint type, /* in: OS_DATA_FILE or OS_LOG_FILE */
ibool* success);/* out: TRUE if succeed, FALSE if error */
/***************************************************************************
@@ -172,6 +192,14 @@ os_file_close(
/* out: TRUE if success */
os_file_t file); /* in, own: handle to a file */
/***************************************************************************
+Closes a file handle. */
+
+ibool
+os_file_close_no_error_handling(
+/*============================*/
+ /* out: TRUE if success */
+ os_file_t file); /* in, own: handle to a file */
+/***************************************************************************
Gets a file size. */
ibool
@@ -300,6 +328,13 @@ os_aio(
are ignored */
void* message2);
/****************************************************************************
+Wakes up all async i/o threads so that they know to exit themselves in
+shutdown. */
+
+void
+os_aio_wake_all_threads_at_shutdown(void);
+/*=====================================*/
+/****************************************************************************
Waits until there are no pending writes in os_aio_write_array. There can
be other, synchronous, pending writes. */
diff --git a/innobase/include/os0proc.h b/innobase/include/os0proc.h
index 9da1f33e070..79750e5c1f7 100644
--- a/innobase/include/os0proc.h
+++ b/innobase/include/os0proc.h
@@ -16,6 +16,15 @@ typedef void* os_process_t;
typedef unsigned long int os_process_id_t;
/********************************************************************
+Converts the current process id to a number. It is not guaranteed that the
+number is unique. In Linux returns the 'process number' of the current
+thread. That number is the same as one sees in 'top', for example. In Linux
+the thread id is not the same as one sees in 'top'. */
+
+ulint
+os_proc_get_number(void);
+/*====================*/
+/********************************************************************
Allocates non-cacheable memory. */
void*
diff --git a/innobase/include/os0sync.h b/innobase/include/os0sync.h
index b2d613c4619..e1cf263216e 100644
--- a/innobase/include/os0sync.h
+++ b/innobase/include/os0sync.h
@@ -10,25 +10,43 @@ Created 9/6/1995 Heikki Tuuri
#define os0sync_h
#include "univ.i"
+#include "ut0lst.h"
#ifdef __WIN__
#define os_fast_mutex_t CRITICAL_SECTION
-typedef void* os_event_t;
-#else
+typedef HANDLE os_native_event_t;
+
+typedef struct os_event_struct os_event_struct_t;
+typedef os_event_struct_t* os_event_t;
+struct os_event_struct {
+ os_native_event_t handle;
+ /* Windows event */
+ UT_LIST_NODE_T(os_event_struct_t) os_event_list;
+ /* list of all created events */
+};
+#else
typedef pthread_mutex_t os_fast_mutex_t;
+
+typedef struct os_event_struct os_event_struct_t;
+typedef os_event_struct_t* os_event_t;
+
struct os_event_struct {
os_fast_mutex_t os_mutex; /* this mutex protects the next
fields */
- ibool is_set; /* this is TRUE if the next mutex is
- not reserved */
+ ibool is_set; /* this is TRUE when the event is
+ in the signaled state, i.e., a thread
+ does not stop if it tries to wait for
+ this event */
+ ib_longlong signal_count; /* this is incremented each time
+ the event becomes signaled */
pthread_cond_t cond_var; /* condition variable is used in
waiting for the event */
+ UT_LIST_NODE_T(os_event_struct_t) os_event_list;
+ /* list of all created events */
};
-typedef struct os_event_struct os_event_struct_t;
-typedef os_event_struct_t* os_event_t;
#endif
typedef struct os_mutex_struct os_mutex_str_t;
@@ -38,10 +56,32 @@ typedef os_mutex_str_t* os_mutex_t;
#define OS_SYNC_TIME_EXCEEDED 1
+/* Mutex protecting counts and the event and OS 'slow' mutex lists */
+extern os_mutex_t os_sync_mutex;
+
+/* This is incremented by 1 in os_thread_create and decremented by 1 in
+os_thread_exit */
+extern ulint os_thread_count;
+
+extern ulint os_event_count;
+extern ulint os_mutex_count;
+extern ulint os_fast_mutex_count;
+
+/*************************************************************
+Initializes global event and OS 'slow' mutex lists. */
+
+void
+os_sync_init(void);
+/*==============*/
/*************************************************************
-Creates an event semaphore, i.e., a semaphore which may
-just have two states: signaled and nonsignaled.
-The created event is manual reset: it must be reset
+Frees created events and OS 'slow' mutexes. */
+
+void
+os_sync_free(void);
+/*==============*/
+/*************************************************************
+Creates an event semaphore, i.e., a semaphore which may just have two states:
+signaled and nonsignaled. The created event is manual reset: it must be reset
explicitly by calling sync_os_reset_event. */
os_event_t
@@ -50,10 +90,10 @@ os_event_create(
/* out: the event handle */
char* name); /* in: the name of the event, if NULL
the event is created without a name */
+#ifdef __WIN__
/*************************************************************
-Creates an auto-reset event semaphore, i.e., an event
-which is automatically reset when a single thread is
-released. */
+Creates an auto-reset event semaphore, i.e., an event which is automatically
+reset when a single thread is released. Works only in Windows. */
os_event_t
os_event_create_auto(
@@ -61,6 +101,7 @@ os_event_create_auto(
/* out: the event handle */
char* name); /* in: the name of the event, if NULL
the event is created without a name */
+#endif
/**************************************************************
Sets an event semaphore to the signaled state: lets waiting threads
proceed. */
@@ -85,7 +126,10 @@ os_event_free(
/*==========*/
os_event_t event); /* in: event to free */
/**************************************************************
-Waits for an event object until it is in the signaled state. */
+Waits for an event object until it is in the signaled state. If
+srv_shutdown_state == SRV_SHUTDOWN_EXIT_THREADS this also exits the
+waiting thread when the event becomes signaled (or immediately if the
+event is already in the signaled state). */
void
os_event_wait(
@@ -93,7 +137,7 @@ os_event_wait(
os_event_t event); /* in: event to wait */
/**************************************************************
Waits for an event object until it is in the signaled state or
-a timeout is exceeded. */
+a timeout is exceeded. In Unix the timeout is always infinite. */
ulint
os_event_wait_time(
@@ -104,8 +148,9 @@ os_event_wait_time(
os_event_t event, /* in: event to wait */
ulint time); /* in: timeout in microseconds, or
OS_SYNC_INFINITE_TIME */
+#ifdef __WIN__
/**************************************************************
-Waits for any event in an event array. Returns if even a single
+Waits for any event in an OS native event array. Returns if even a single
one is signaled or becomes signaled. */
ulint
@@ -113,14 +158,15 @@ os_event_wait_multiple(
/*===================*/
/* out: index of the event
which was signaled */
- ulint n, /* in: number of events in the
+ ulint n, /* in: number of events in the
array */
- os_event_t* event_array); /* in: pointer to an array of event
+ os_native_event_t* native_event_array);
+ /* in: pointer to an array of event
handles */
+#endif
/*************************************************************
-Creates an operating system mutex semaphore.
-Because these are slow, the mutex semaphore of the database
-itself (sync_mutex_t) should be used where possible. */
+Creates an operating system mutex semaphore. Because these are slow, the
+mutex semaphore of InnoDB itself (mutex_t) should be used where possible. */
os_mutex_t
os_mutex_create(
diff --git a/innobase/include/os0sync.ic b/innobase/include/os0sync.ic
index 10b85c435e3..1337e97152a 100644
--- a/innobase/include/os0sync.ic
+++ b/innobase/include/os0sync.ic
@@ -44,4 +44,3 @@ os_fast_mutex_trylock(
#endif
#endif
}
-
diff --git a/innobase/include/os0thread.h b/innobase/include/os0thread.h
index 8355afa46e9..554ca0563e4 100644
--- a/innobase/include/os0thread.h
+++ b/innobase/include/os0thread.h
@@ -15,16 +15,9 @@ Created 9/8/1995 Heikki Tuuri
/* Maximum number of threads which can be created in the program;
this is also the size of the wait slot array for MySQL threads which
can wait inside InnoDB */
-#ifdef __WIN__
-/* Windows 95/98/ME seemed to have difficulties creating the all
-the event semaphores for the wait array slots. If the computer had
-<= 64 MB memory, InnoDB startup could take minutes or even crash.
-That is why we set this to only 1000 in Windows. */
-#define OS_THREAD_MAX_N 1000
-#else
-#define OS_THREAD_MAX_N 10000
-#endif
+#define OS_THREAD_MAX_N srv_max_n_threads
+
/* Possible fixed priorities for threads */
#define OS_THREAD_PRIORITY_NONE 100
@@ -43,7 +36,6 @@ typedef os_thread_t os_thread_id_t; /* In Unix we use the thread
the thread */
#endif
-
/* Define a function pointer type to use in a typecast */
typedef void* (*os_posix_f_t) (void*);
@@ -68,7 +60,9 @@ os_thread_pf(
/********************************************************************
Creates a new thread of execution. The execution starts from
the function given. The start function takes a void* parameter
-and returns a ulint. */
+and returns a ulint.
+NOTE: We count the number of threads in os_thread_exit(). A created
+thread should always use that to exit and not use return() to exit. */
os_thread_t
os_thread_create(
@@ -85,12 +79,13 @@ os_thread_create(
os_thread_id_t* thread_id); /* out: id of the created
thread */
/*********************************************************************
-A thread calling this function ends its execution. */
+Exits the current thread. */
void
os_thread_exit(
/*===========*/
- ulint code); /* in: exit code */
+ void* exit_value); /* in: exit value; in Windows this void*
+ is cast as a DWORD */
/*********************************************************************
Returns the thread identifier of current thread. */
@@ -146,7 +141,6 @@ ulint
os_thread_get_last_error(void);
/*==========================*/
-
#ifndef UNIV_NONINL
#include "os0thread.ic"
#endif
diff --git a/innobase/include/page0cur.h b/innobase/include/page0cur.h
index 144e0e02b21..c3f0decdb4b 100644
--- a/innobase/include/page0cur.h
+++ b/innobase/include/page0cur.h
@@ -26,7 +26,12 @@ Created 10/4/1994 Heikki Tuuri
#define PAGE_CUR_GE 2
#define PAGE_CUR_L 3
#define PAGE_CUR_LE 4
-#define PAGE_CUR_DBG 5
+#define PAGE_CUR_LE_OR_EXTENDS 5 /* This is a search mode used in
+ "column LIKE 'abc%' ORDER BY column DESC";
+ we have to find strings which are <= 'abc' or
+ which extend it */
+#define PAGE_CUR_DBG 6
+
extern ulint page_cur_short_succ;
diff --git a/innobase/include/page0page.h b/innobase/include/page0page.h
index 2f77127466f..04f771c3abd 100644
--- a/innobase/include/page0page.h
+++ b/innobase/include/page0page.h
@@ -666,6 +666,25 @@ page_rec_validate(
/* out: TRUE if ok */
rec_t* rec); /* in: record on the page */
/*******************************************************************
+Checks that the first directory slot points to the infimum record and
+the last to the supremum. This function is intended to track if the
+bug fixed in 4.0.14 has caused corruption to users' databases. */
+
+void
+page_check_dir(
+/*===========*/
+ page_t* page); /* in: index page */
+/*******************************************************************
+This function checks the consistency of an index page when we do not
+know the index. This is also resilient so that this should never crash
+even if the page is total garbage. */
+
+ibool
+page_simple_validate(
+/*=================*/
+ /* out: TRUE if ok */
+ page_t* page); /* in: index page */
+/*******************************************************************
This function checks the consistency of an index page. */
ibool
diff --git a/innobase/include/que0que.h b/innobase/include/que0que.h
index cdaeeae1fde..a3ed18e2b14 100644
--- a/innobase/include/que0que.h
+++ b/innobase/include/que0que.h
@@ -117,6 +117,7 @@ que_thr_stop(
/**************************************************************************
Moves a thread from another state to the QUE_THR_RUNNING state. Increments
the n_active_thrs counters of the query graph and transaction. */
+
void
que_thr_move_to_run_state_for_mysql(
/*================================*/
@@ -125,14 +126,17 @@ que_thr_move_to_run_state_for_mysql(
/**************************************************************************
A patch for MySQL used to 'stop' a dummy query thread used in MySQL
select, when there is no error or lock wait. */
+
void
que_thr_stop_for_mysql_no_error(
/*============================*/
que_thr_t* thr, /* in: query thread */
trx_t* trx); /* in: transaction */
/**************************************************************************
-A patch for MySQL used to 'stop' a dummy query thread used in MySQL
-select. */
+A patch for MySQL used to 'stop' a dummy query thread used in MySQL. The
+query thread is stopped and made inactive, except in the case where
+it was put to the lock wait state in lock0lock.c, but the lock has already
+been granted or the transaction chosen as a victim in deadlock resolution. */
void
que_thr_stop_for_mysql(
diff --git a/innobase/include/read0read.h b/innobase/include/read0read.h
index cebb2d6701c..db6bf888095 100644
--- a/innobase/include/read0read.h
+++ b/innobase/include/read0read.h
@@ -45,6 +45,14 @@ read_view_close(
/*============*/
read_view_t* view); /* in: read view */
/*************************************************************************
+Closes a consistent read view for MySQL. This function is called at an SQL
+statement end if the trx isolation level is <= TRX_ISO_READ_COMMITTED. */
+
+void
+read_view_close_for_mysql(
+/*======================*/
+ trx_t* trx); /* in: trx which has a read view */
+/*************************************************************************
Checks if a read view sees the specified transaction. */
UNIV_INLINE
ibool
diff --git a/innobase/include/rem0cmp.h b/innobase/include/rem0cmp.h
index 6f2a99fc8c2..712e263350e 100644
--- a/innobase/include/rem0cmp.h
+++ b/innobase/include/rem0cmp.h
@@ -42,6 +42,22 @@ cmp_data_data(
buffer) */
ulint len2); /* in: data field length or UNIV_SQL_NULL */
/*****************************************************************
+This function is used to compare two data fields for which we know the
+data type. */
+
+int
+cmp_data_data_slow(
+/*===============*/
+ /* out: 1, 0, -1, if data1 is greater, equal,
+ less than data2, respectively */
+ dtype_t* cur_type,/* in: data type of the fields */
+ byte* data1, /* in: data field (== a pointer to a memory
+ buffer) */
+ ulint len1, /* in: data field length or UNIV_SQL_NULL */
+ byte* data2, /* in: data field (== a pointer to a memory
+ buffer) */
+ ulint len2); /* in: data field length or UNIV_SQL_NULL */
+/*****************************************************************
This function is used to compare two dfields where at least the first
has its data type field set. */
UNIV_INLINE
diff --git a/innobase/include/rem0rec.h b/innobase/include/rem0rec.h
index 12e3a8b39d6..b28f39925c1 100644
--- a/innobase/include/rem0rec.h
+++ b/innobase/include/rem0rec.h
@@ -148,12 +148,22 @@ data field in the record. */
byte*
rec_get_nth_field(
/*==============*/
- /* out: pointer to the field, NULL if SQL null */
+ /* out: pointer to the field */
rec_t* rec, /* in: record */
ulint n, /* in: index of the field */
ulint* len); /* out: length of the field; UNIV_SQL_NULL
if SQL null */
/****************************************************************
+Return field length or UNIV_SQL_NULL. */
+UNIV_INLINE
+ulint
+rec_get_nth_field_len(
+/*==================*/
+ /* out: length of the field; UNIV_SQL_NULL if SQL
+ null */
+ rec_t* rec, /* in: record */
+ ulint n); /* in: index of the field */
+/****************************************************************
Gets the physical size of a field. Also an SQL null may have a field of
size > 0, if the data type is of a fixed size. */
UNIV_INLINE
diff --git a/innobase/include/rem0rec.ic b/innobase/include/rem0rec.ic
index aaa3c58a003..9dfd4faeec8 100644
--- a/innobase/include/rem0rec.ic
+++ b/innobase/include/rem0rec.ic
@@ -65,6 +65,24 @@ a field stored to another page: */
#define REC_2BYTE_EXTERN_MASK 0x4000
+/****************************************************************
+Return field length or UNIV_SQL_NULL. */
+UNIV_INLINE
+ulint
+rec_get_nth_field_len(
+/*==================*/
+ /* out: length of the field; UNIV_SQL_NULL if SQL
+ null */
+ rec_t* rec, /* in: record */
+ ulint n) /* in: index of the field */
+{
+ ulint len;
+
+ rec_get_nth_field(rec, n, &len);
+
+ return(len);
+}
+
/***************************************************************
Sets the value of the ith field SQL null bit. */
diff --git a/innobase/include/row0ins.h b/innobase/include/row0ins.h
index cc3b9fa7e9a..a5b4b74e7fc 100644
--- a/innobase/include/row0ins.h
+++ b/innobase/include/row0ins.h
@@ -35,7 +35,6 @@ row_ins_check_foreign_constraint(
dictionary cache if they exist at all */
dict_table_t* table, /* in: if check_ref is TRUE, then the foreign
table, else the referenced table */
- dict_index_t* index, /* in: index in table */
dtuple_t* entry, /* in: index entry for index */
que_thr_t* thr); /* in: query thread */
/*************************************************************************
diff --git a/innobase/include/row0mysql.h b/innobase/include/row0mysql.h
index 75c16384458..940b4c61b2f 100644
--- a/innobase/include/row0mysql.h
+++ b/innobase/include/row0mysql.h
@@ -230,18 +230,35 @@ row_update_cascade_for_mysql(
or set null operation */
dict_table_t* table); /* in: table where we do the operation */
/*************************************************************************
-Locks the data dictionary exclusively for performing a table create
-operation. */
+Locks the data dictionary exclusively for performing a table create or other
+data dictionary modification operation. */
void
-row_mysql_lock_data_dictionary(void);
-/*================================*/
+row_mysql_lock_data_dictionary(
+/*===========================*/
+ trx_t* trx); /* in: transaction */
+/*************************************************************************
+Unlocks the data dictionary exclusive lock. */
+
+void
+row_mysql_unlock_data_dictionary(
+/*=============================*/
+ trx_t* trx); /* in: transaction */
+/*************************************************************************
+Locks the data dictionary in shared mode from modifications, for performing
+foreign key check, rollback, or other operation invisible to MySQL. */
+
+void
+row_mysql_freeze_data_dictionary(
+/*=============================*/
+ trx_t* trx); /* in: transaction */
/*************************************************************************
-Unlocks the data dictionary exclusively lock. */
+Unlocks the data dictionary shared lock. */
void
-row_mysql_unlock_data_dictionary(void);
-/*==================================*/
+row_mysql_unfreeze_data_dictionary(
+/*===============================*/
+ trx_t* trx); /* in: transaction */
/*************************************************************************
Does a table creation operation for MySQL. If the name of the created
table ends to characters INNODB_MONITOR, then this also starts
@@ -310,11 +327,9 @@ output by the master thread. */
int
row_drop_table_for_mysql(
/*=====================*/
- /* out: error code or DB_SUCCESS */
- char* name, /* in: table name */
- trx_t* trx, /* in: transaction handle */
- ibool has_dict_mutex);/* in: TRUE if the caller already owns the
- dictionary system mutex */
+ /* out: error code or DB_SUCCESS */
+ char* name, /* in: table name */
+ trx_t* trx); /* in: transaction handle */
/*************************************************************************
Drops a database for MySQL. */
@@ -393,7 +408,10 @@ struct row_prebuilt_struct {
an SQL statement: we may have to set
an intention lock on the table,
create a consistent read view etc. */
- ibool mysql_has_locked;
+ ibool mysql_has_locked; /* this is set TRUE when MySQL
+ calls external_lock on this handle
+ with a lock flag, and set FALSE when
+ with the F_UNLOCK flag */
ibool clust_index_was_generated;
/* if the user did not define a
primary key in MySQL, then Innobase
@@ -401,13 +419,21 @@ struct row_prebuilt_struct {
index where the ordering column is
the row id: in this case this flag
is set to TRUE */
- dict_index_t* index; /* current index for a search, if any */
+ dict_index_t* index; /* current index for a search, if
+ any */
ulint read_just_key; /* set to 1 when MySQL calls
ha_innobase::extra with the
argument HA_EXTRA_KEYREAD; it is enough
to read just columns defined in
the index (i.e., no read of the
clustered index record necessary) */
+ ibool used_in_HANDLER;/* TRUE if we have been using this
+ handle in a MySQL HANDLER low level
+ index cursor command: then we must
+ store the pcur position even in a
+ unique search from a clustered index,
+ because HANDLER allows NEXT and PREV
+ in such a situation */
ulint template_type; /* ROW_MYSQL_WHOLE_ROW,
ROW_MYSQL_REC_FIELDS,
ROW_MYSQL_DUMMY_TEMPLATE, or
@@ -474,7 +500,11 @@ struct row_prebuilt_struct {
fetch many rows from the same cursor:
it saves CPU time to fetch them in a
batch; we reserve mysql_row_len
- bytes for each such row */
+ bytes for each such row; these
+ pointers point 4 bytes past the
+ allocated mem buf start, because
+ there is a 4 byte magic number at the
+ start and at the end */
ulint fetch_cache_first;/* position of the first not yet
fetched row in fetch_cache */
ulint n_fetch_cached; /* number of not yet fetched rows
@@ -483,8 +513,12 @@ struct row_prebuilt_struct {
to this heap */
mem_heap_t* old_vers_heap; /* memory heap where a previous
version is built in consistent read */
+ ulint magic_n2; /* this should be the same as
+ magic_n */
};
+#define ROW_PREBUILT_FETCH_MAGIC_N 465765687
+
#define ROW_MYSQL_WHOLE_ROW 0
#define ROW_MYSQL_REC_FIELDS 1
#define ROW_MYSQL_NO_TEMPLATE 2
diff --git a/innobase/include/row0mysql.ic b/innobase/include/row0mysql.ic
index 6096e5771f7..4ecd66e06ec 100644
--- a/innobase/include/row0mysql.ic
+++ b/innobase/include/row0mysql.ic
@@ -15,7 +15,8 @@ row_mysql_store_var_len(
/*====================*/
/* out: dest + 2 */
byte* dest, /* in: where to store */
- ulint len) /* in: length, must fit in two bytes */
+ ulint len __attribute__((unused))) /* in: length, must fit in two
+ bytes */
{
ut_ad(len < 256 * 256);
/*
@@ -57,7 +58,8 @@ row_mysql_store_col_in_innobase_format(
/*===================================*/
dfield_t* dfield, /* in/out: dfield */
byte* buf, /* in/out: buffer for the converted
- value */
+ value; this must be at least col_len
+ long! */
byte* mysql_data, /* in: MySQL column value, not
SQL NULL; NOTE that dfield may also
get a pointer to mysql_data,
@@ -95,7 +97,6 @@ row_mysql_store_col_in_innobase_format(
while (col_len > 0 && ptr[col_len - 1] == ' ') {
col_len--;
}
-
} else if (type == DATA_BLOB) {
ptr = row_mysql_read_blob_ref(&col_len, mysql_data, col_len);
}
diff --git a/innobase/include/row0row.h b/innobase/include/row0row.h
index 09a79e19fd7..d1befbbbad3 100644
--- a/innobase/include/row0row.h
+++ b/innobase/include/row0row.h
@@ -86,9 +86,10 @@ dtuple_t*
row_build(
/*======*/
/* out, own: row built; see the NOTE below! */
- ulint type, /* in: ROW_COPY_DATA, or ROW_COPY_POINTERS:
- the former copies also the data fields to
- heap as the latter only places pointers to
+ ulint type, /* in: ROW_COPY_POINTERS, ROW_COPY_DATA, or
+ ROW_COPY_ALSO_EXTERNALS,
+ the two last copy also the data fields to
+ heap as the first only places pointers to
data fields on the index page, and thus is
more efficient */
dict_index_t* index, /* in: clustered index */
diff --git a/innobase/include/row0sel.h b/innobase/include/row0sel.h
index a64d3f8e425..5ef7ff9399a 100644
--- a/innobase/include/row0sel.h
+++ b/innobase/include/row0sel.h
@@ -87,9 +87,11 @@ row_printf_step(
/* out: query thread to run next or NULL */
que_thr_t* thr); /* in: query thread */
/********************************************************************
-Converts a key value stored in MySQL format to an Innobase dtuple.
-The last field of the key value may be just a prefix of a fixed length
-field: hence the parameter key_len. */
+Converts a key value stored in MySQL format to an Innobase dtuple. The last
+field of the key value may be just a prefix of a fixed length field: hence
+the parameter key_len. But currently we do not allow search keys where the
+last field is only a prefix of the full key field len and print a warning if
+such appears. */
void
row_sel_convert_mysql_key_to_innobase(
@@ -100,6 +102,7 @@ row_sel_convert_mysql_key_to_innobase(
to index! */
byte* buf, /* in: buffer to use in field
conversions */
+ ulint buf_len, /* in: buffer length */
dict_index_t* index, /* in: index of the key value */
byte* key_ptr, /* in: MySQL key value */
ulint key_len); /* in: MySQL key value length */
@@ -133,6 +136,18 @@ row_search_for_mysql(
then prebuilt must have a pcur
with stored position! In opening of a
cursor 'direction' should be 0. */
+/***********************************************************************
+Checks if MySQL at the moment is allowed for this table to retrieve a
+consistent read result, or store it to the query cache. */
+
+ibool
+row_search_check_if_query_cache_permitted(
+/*======================================*/
+ /* out: TRUE if storing or retrieving from
+ the query cache is permitted */
+ trx_t* trx, /* in: transaction object */
+ char* norm_name); /* in: concatenation of database name, '/'
+ char, table name */
/* A structure for caching column values for prefetched rows */
diff --git a/innobase/include/row0upd.h b/innobase/include/row0upd.h
index 9a3e2463267..f5e0a88231f 100644
--- a/innobase/include/row0upd.h
+++ b/innobase/include/row0upd.h
@@ -114,15 +114,17 @@ row_upd_index_write_log(
closed within this function */
mtr_t* mtr); /* in: mtr into whose log to write */
/***************************************************************
-Returns TRUE if row update changes size of some field in index. */
+Returns TRUE if row update changes size of some field in index or if some
+field to be updated is stored externally in rec or update. */
ibool
-row_upd_changes_field_size(
-/*=======================*/
+row_upd_changes_field_size_or_external(
+/*===================================*/
/* out: TRUE if the update changes the size of
- some field in index */
- rec_t* rec, /* in: record in clustered index */
- dict_index_t* index, /* in: clustered index */
+ some field in index or the field is external
+ in rec or update */
+ rec_t* rec, /* in: record in index */
+ dict_index_t* index, /* in: index */
upd_t* update);/* in: update vector */
/***************************************************************
Replaces the new column values stored in the update vector to the record
@@ -170,21 +172,33 @@ Replaces the new column values stored in the update vector to the index entry
given. */
void
-row_upd_index_replace_new_col_vals(
-/*===============================*/
+row_upd_index_replace_new_col_vals_index_pos(
+/*=========================================*/
dtuple_t* entry, /* in/out: index entry where replaced */
- dict_index_t* index, /* in: index; NOTE that may also be a
+ dict_index_t* index, /* in: index; NOTE that this may also be a
non-clustered index */
- upd_t* update); /* in: update vector */
+ upd_t* update, /* in: an update vector built for the index so
+ that the field number in an upd_field is the
+ index position */
+ mem_heap_t* heap); /* in: memory heap to which we allocate and
+ copy the new values, set this as NULL if you
+ do not want allocation */
/***************************************************************
-Replaces the new column values stored in the update vector to the
-clustered index entry given. */
+Replaces the new column values stored in the update vector to the index entry
+given. */
void
-row_upd_clust_index_replace_new_col_vals(
-/*=====================================*/
+row_upd_index_replace_new_col_vals(
+/*===============================*/
dtuple_t* entry, /* in/out: index entry where replaced */
- upd_t* update); /* in: update vector */
+ dict_index_t* index, /* in: index; NOTE that this may also be a
+ non-clustered index */
+ upd_t* update, /* in: an update vector built for the
+ CLUSTERED index so that the field number in
+ an upd_field is the clustered index position */
+ mem_heap_t* heap); /* in: memory heap to which we allocate and
+ copy the new values, set this as NULL if you
+ do not want allocation */
/***************************************************************
Checks if an update vector changes an ordering field of an index record.
This function is fast if the update vector is short or the number of ordering
@@ -203,7 +217,9 @@ row_upd_changes_ord_field_binary(
known when this function is called, e.g., at
compile time */
dict_index_t* index, /* in: index of the record */
- upd_t* update);/* in: update vector for the row */
+ upd_t* update);/* in: update vector for the row; NOTE: the
+ field numbers in this MUST be clustered index
+ positions! */
/***************************************************************
Checks if an update vector changes an ordering field of an index record.
This function is fast if the update vector is short or the number of ordering
@@ -275,7 +291,10 @@ row_upd_index_parse(
/* Update vector field */
struct upd_field_struct{
- ulint field_no; /* field number in the clustered
+ ulint field_no; /* field number in an index, usually
+ the clustered index, but in updating
+ a secondary index record in btr0cur.c
+ this is the position in the secondary
index */
que_node_t* exp; /* expression for calculating a new
value: it refers to column values and
@@ -312,8 +331,11 @@ struct upd_node_struct{
ibool in_mysql_interface;
/* TRUE if the update node was created
for the MySQL interface */
+ dict_foreign_t* foreign;/* NULL or pointer to a foreign key
+ constraint if this update node is used in
+ doing an ON DELETE or ON UPDATE operation */
upd_node_t* cascade_node;/* NULL or an update node template which
- is used to implement ON DELETE CASCADE
+ is used to implement ON DELETE/UPDATE CASCADE
or ... SET NULL for foreign keys */
mem_heap_t* cascade_heap;/* NULL or a mem heap where the cascade
node is created */
@@ -355,9 +377,9 @@ struct upd_node_struct{
externally in the clustered index record of
row */
ulint n_ext_vec;/* number of fields in ext_vec */
- mem_heap_t* heap; /* memory heap used as auxiliary storage for
- row; this must be emptied after a successful
- update if node->row != NULL */
+ mem_heap_t* heap; /* memory heap used as auxiliary storage;
+ this must be emptied after a successful
+ update */
/*----------------------*/
sym_node_t* table_sym;/* table node in symbol table */
que_node_t* col_assign_list;
diff --git a/innobase/include/row0vers.ic b/innobase/include/row0vers.ic
index aa7a7aa2299..5ece47c35d1 100644
--- a/innobase/include/row0vers.ic
+++ b/innobase/include/row0vers.ic
@@ -60,7 +60,7 @@ row_vers_sec_rec_may_see_older(
/*===========================*/
/* out: FALSE if can be read immediately */
rec_t* rec, /* in: record which should be read or passed */
- dict_index_t* index, /* in: secondary index */
+ dict_index_t* index __attribute__((unused)),/* in: secondary index */
read_view_t* view) /* in: read view */
{
page_t* page;
diff --git a/innobase/include/srv0srv.h b/innobase/include/srv0srv.h
index f457d52dec7..02d3d3bba0a 100644
--- a/innobase/include/srv0srv.h
+++ b/innobase/include/srv0srv.h
@@ -17,6 +17,8 @@ Created 10/10/1995 Heikki Tuuri
#include "que0types.h"
#include "trx0types.h"
+extern char* srv_main_thread_op_info;
+
/* Buffer which can be used in printing fatal error messages */
extern char srv_fatal_errbuf[];
@@ -28,6 +30,9 @@ extern os_event_t srv_lock_timeout_thread_event;
at a time */
#define SRV_AUTO_EXTEND_INCREMENT (8 * ((1024 * 1024) / UNIV_PAGE_SIZE))
+/* This is set to TRUE if the MySQL user has set it in MySQL */
+extern ibool srv_lower_case_table_names;
+
/* Server parameters which are read from the initfile */
extern char* srv_data_home;
@@ -57,8 +62,6 @@ extern ulint srv_flush_log_at_trx_commit;
extern byte srv_latin1_ordering[256];/* The sort order table of the latin1
character set */
-extern ibool srv_use_native_aio;
-
extern ulint srv_pool_size;
extern ulint srv_mem_pool_size;
extern ulint srv_lock_table_size;
@@ -70,11 +73,17 @@ extern dulint srv_archive_recovery_limit_lsn;
extern ulint srv_lock_wait_timeout;
-extern char* srv_unix_file_flush_method_str;
+extern char* srv_file_flush_method_str;
extern ulint srv_unix_file_flush_method;
+extern ulint srv_win_file_flush_method;
+
+extern ulint srv_max_dirty_pages_pct;
+
extern ulint srv_force_recovery;
extern ulint srv_thread_concurrency;
+extern ulint srv_max_n_threads;
+
extern lint srv_conc_n_threads;
extern ibool srv_fast_shutdown;
@@ -94,6 +103,7 @@ extern ulint srv_n_rows_read;
extern ibool srv_print_innodb_monitor;
extern ibool srv_print_innodb_lock_monitor;
extern ibool srv_print_innodb_tablespace_monitor;
+extern ibool srv_print_verbose_log;
extern ibool srv_print_innodb_table_monitor;
extern ibool srv_lock_timeout_and_monitor_active;
@@ -147,18 +157,26 @@ extern mutex_t* kernel_mutex_temp;/* mutex protecting the server, trx structs,
/* Array of English strings describing the current state of an
i/o handler thread */
extern char* srv_io_thread_op_info[];
+extern char* srv_io_thread_function[];
typedef struct srv_sys_struct srv_sys_t;
/* The server system */
extern srv_sys_t* srv_sys;
-/* Alternatives for the field flush option in Unix; see the InnoDB manual about
+/* Alternatives for the file flush option in Unix; see the InnoDB manual about
what these mean */
-#define SRV_UNIX_FDATASYNC 1
+#define SRV_UNIX_FDATASYNC 1 /* This is the default; it is currently mapped
+ to a call of fsync() because fdatasync()
+ seemed to corrupt files in Linux and Solaris */
#define SRV_UNIX_O_DSYNC 2
#define SRV_UNIX_LITTLESYNC 3
#define SRV_UNIX_NOSYNC 4
+#define SRV_UNIX_O_DIRECT 5
+
+/* Alternatives for file i/o in Windows */
+#define SRV_WIN_IO_NORMAL 1
+#define SRV_WIN_IO_UNBUFFERED 2 /* This is the default */
/* Alternatives for srv_force_recovery. Non-zero values are intended
to help the user get a damaged database up so that he can dump intact
@@ -197,6 +215,12 @@ void
srv_init(void);
/*==========*/
/*************************************************************************
+Frees the OS fast mutex created in srv_init(). */
+
+void
+srv_free(void);
+/*==========*/
+/*************************************************************************
Initializes the synchronization primitives, memory system, and the thread
local storage. */
@@ -310,15 +334,17 @@ srv_conc_exit_innodb(
trx_t* trx); /* in: transaction object associated with the
thread */
/*******************************************************************
-Puts a MySQL OS thread to wait for a lock to be released. */
+Puts a MySQL OS thread to wait for a lock to be released. If an error
+occurs during the wait trx->error_state associated with thr is
+!= DB_SUCCESS when we return. DB_LOCK_WAIT_TIMEOUT and DB_DEADLOCK
+are possible errors. DB_DEADLOCK is returned if selective deadlock
+resolution chose this transaction as a victim. */
-ibool
+void
srv_suspend_mysql_thread(
/*=====================*/
- /* out: TRUE if the lock wait timeout was
- exceeded */
- que_thr_t* thr); /* in: query thread associated with
- the MySQL OS thread */
+ que_thr_t* thr); /* in: query thread associated with the MySQL
+ OS thread */
/************************************************************************
Releases a MySQL OS thread waiting for a lock to be released, if the
thread is already suspended. */
@@ -406,3 +432,4 @@ struct srv_sys_struct{
extern ulint srv_n_threads_active[];
#endif
+
diff --git a/innobase/include/srv0start.h b/innobase/include/srv0start.h
index 646d2c1bb06..8d2c3fa12c5 100644
--- a/innobase/include/srv0start.h
+++ b/innobase/include/srv0start.h
@@ -79,15 +79,19 @@ innobase_shutdown_for_mysql(void);
/*=============================*/
/* out: DB_SUCCESS or error code */
+extern ulint srv_sizeof_trx_t_in_ha_innodb_cc;
+
+extern ibool srv_is_being_started;
extern ibool srv_startup_is_before_trx_rollback_phase;
extern ibool srv_is_being_shut_down;
/* At a shutdown the value first climbs from 0 to SRV_SHUTDOWN_CLEANUP
-and then to SRV_SHUTDOWN_LAST_PHASE */
+and then to SRV_SHUTDOWN_LAST_PHASE, and so on */
extern ulint srv_shutdown_state;
-#define SRV_SHUTDOWN_CLEANUP 1
-#define SRV_SHUTDOWN_LAST_PHASE 2
+#define SRV_SHUTDOWN_CLEANUP 1
+#define SRV_SHUTDOWN_LAST_PHASE 2
+#define SRV_SHUTDOWN_EXIT_THREADS 3
#endif
diff --git a/innobase/include/sync0rw.h b/innobase/include/sync0rw.h
index 7ad38f5bc7f..5aa3dcdffc3 100644
--- a/innobase/include/sync0rw.h
+++ b/innobase/include/sync0rw.h
@@ -335,7 +335,8 @@ ibool
rw_lock_own(
/*========*/
rw_lock_t* lock, /* in: rw-lock */
- ulint lock_type); /* in: lock type */
+ ulint lock_type); /* in: lock type: RW_LOCK_SHARED,
+ RW_LOCK_EX */
/**********************************************************************
Checks if somebody has locked the rw-lock in the specified mode. */
diff --git a/innobase/include/sync0rw.ic b/innobase/include/sync0rw.ic
index 7015ff34b99..36ef0a985ed 100644
--- a/innobase/include/sync0rw.ic
+++ b/innobase/include/sync0rw.ic
@@ -126,7 +126,8 @@ rw_lock_s_lock_low(
/*===============*/
/* out: TRUE if success */
rw_lock_t* lock, /* in: pointer to rw-lock */
- ulint pass, /* in: pass value; != 0, if the lock will be
+ ulint pass __attribute__((unused)),
+ /* in: pass value; != 0, if the lock will be
passed to another thread to unlock */
char* file_name, /* in: file name where lock requested */
ulint line) /* in: line where requested */
diff --git a/innobase/include/sync0sync.h b/innobase/include/sync0sync.h
index 5bfa0bc2d48..320f8faf12d 100644
--- a/innobase/include/sync0sync.h
+++ b/innobase/include/sync0sync.h
@@ -371,10 +371,12 @@ or row lock! */
#define SYNC_NO_ORDER_CHECK 3000 /* this can be used to suppress
latching order checking */
#define SYNC_LEVEL_NONE 2000 /* default: level not defined */
-#define SYNC_FOREIGN_KEY_CHECK 1001
+#define SYNC_DICT_OPERATION 1001 /* table create, drop, etc. reserve
+ this in X-mode, implicit or backround
+ operations purge, rollback, foreign
+ key checks reserve this in S-mode */
#define SYNC_DICT 1000
#define SYNC_DICT_AUTOINC_MUTEX 999
-#define SYNC_PURGE_IS_RUNNING 997
#define SYNC_DICT_HEADER 995
#define SYNC_IBUF_HEADER 914
#define SYNC_IBUF_PESS_INSERT_MUTEX 912
diff --git a/innobase/include/trx0purge.h b/innobase/include/trx0purge.h
index 087be2f060e..049c79aec9b 100644
--- a/innobase/include/trx0purge.h
+++ b/innobase/include/trx0purge.h
@@ -111,9 +111,6 @@ struct trx_purge_struct{
of the trx system and it never ends */
que_t* query; /* The query graph which will do the
parallelized purge operation */
- rw_lock_t purge_is_running;/* Purge operation set an x-latch here
- while it is accessing a table: this
- prevents dropping of the table */
rw_lock_t latch; /* The latch protecting the purge view.
A purge operation must acquire an
x-latch here for the instant at which
diff --git a/innobase/include/trx0roll.h b/innobase/include/trx0roll.h
index 820af4cd014..0d7126c9c57 100644
--- a/innobase/include/trx0roll.h
+++ b/innobase/include/trx0roll.h
@@ -177,6 +177,55 @@ trx_general_rollback_for_mysql(
ibool partial,/* in: TRUE if partial rollback requested */
trx_savept_t* savept);/* in: pointer to savepoint undo number, if
partial rollback requested */
+/***********************************************************************
+Rolls back a transaction back to a named savepoint. Modifications after the
+savepoint are undone but InnoDB does NOT release the corresponding locks
+which are stored in memory. If a lock is 'implicit', that is, a new inserted
+row holds a lock where the lock information is carried by the trx id stored in
+the row, these locks are naturally released in the rollback. Savepoints which
+were set after this savepoint are deleted. */
+
+ulint
+trx_rollback_to_savepoint_for_mysql(
+/*================================*/
+ /* out: if no savepoint
+ of the name found then
+ DB_NO_SAVEPOINT,
+ otherwise DB_SUCCESS */
+ trx_t* trx, /* in: transaction handle */
+ char* savepoint_name, /* in: savepoint name */
+ ib_longlong* mysql_binlog_cache_pos);/* out: the MySQL binlog cache
+ position corresponding to this
+ savepoint; MySQL needs this
+ information to remove the
+ binlog entries of the queries
+ executed after the savepoint */
+/***********************************************************************
+Creates a named savepoint. If the transaction is not yet started, starts it.
+If there is already a savepoint of the same name, this call erases that old
+savepoint and replaces it with a new. Savepoints are deleted in a transaction
+commit or rollback. */
+
+ulint
+trx_savepoint_for_mysql(
+/*====================*/
+ /* out: always DB_SUCCESS */
+ trx_t* trx, /* in: transaction handle */
+ char* savepoint_name, /* in: savepoint name */
+ ib_longlong binlog_cache_pos); /* in: MySQL binlog cache
+ position corresponding to this
+ connection at the time of the
+ savepoint */
+/***********************************************************************
+Frees savepoint structs. */
+
+void
+trx_roll_savepoints_free(
+/*=====================*/
+ trx_t* trx, /* in: transaction handle */
+ trx_named_savept_t* savep); /* in: free all savepoints > this one;
+ if this is NULL, free all savepoints
+ of trx */
extern sess_t* trx_dummy_sess;
@@ -207,6 +256,21 @@ struct roll_node_struct{
case of a partial rollback */
};
+/* A savepoint set with SQL's "SAVEPOINT savepoint_id" command */
+struct trx_named_savept_struct{
+ char* name; /* savepoint name */
+ trx_savept_t savept; /* the undo number corresponding to
+ the savepoint */
+ ib_longlong mysql_binlog_cache_pos;
+ /* the MySQL binlog cache position
+ corresponding to this savepoint, not
+ defined if the MySQL binlogging is not
+ enabled */
+ UT_LIST_NODE_T(trx_named_savept_t)
+ trx_savepoints; /* the list of savepoints of a
+ transaction */
+};
+
/* Rollback node states */
#define ROLL_NODE_SEND 1
#define ROLL_NODE_WAIT 2
diff --git a/innobase/include/trx0sys.h b/innobase/include/trx0sys.h
index a54a6424a4f..a8ed675a8a5 100644
--- a/innobase/include/trx0sys.h
+++ b/innobase/include/trx0sys.h
@@ -24,6 +24,18 @@ Created 3/26/1996 Heikki Tuuri
#include "fsp0fsp.h"
#include "read0types.h"
+/* Do NOT merge this to the 4.1 code base! */
+extern ibool trx_sys_downgrading_from_4_1_1;
+
+/********************************************************************
+Do NOT merge this to the 4.1 code base!
+Marks the trx sys header when we have successfully downgraded from the >= 4.1.1
+multiple tablespace format back to the 4.0 format. */
+
+void
+trx_sys_mark_downgraded_from_4_1_1(void);
+/*====================================*/
+
/* In a MySQL replication slave, in crash recovery we store the master log
file name and position here. We have successfully got the updates to InnoDB
up to this position. If .._pos is -1, it means no crash recovery was needed,
@@ -354,8 +366,14 @@ this contains the same fields as TRX_SYS_MYSQL_LOG_INFO below */
sys header is half-written
to disk, we still may be able
to recover the information */
+#define TRX_SYS_DOUBLEWRITE_SPACE_ID_STORED (24 + FSEG_HEADER_SIZE)
+ /* If this is set to
+ .._N, then we are
+ DOWNGRADING from >= 4.1.1 to
+ 4.0 */
/*-------------------------------------------------------------*/
#define TRX_SYS_DOUBLEWRITE_MAGIC_N 536853855
+#define TRX_SYS_DOUBLEWRITE_SPACE_ID_STORED_N 1783657386
#define TRX_SYS_DOUBLEWRITE_BLOCK_SIZE FSP_EXTENT_SIZE
diff --git a/innobase/include/trx0sys.ic b/innobase/include/trx0sys.ic
index ada2d8cb19c..343e6d7c2fa 100644
--- a/innobase/include/trx0sys.ic
+++ b/innobase/include/trx0sys.ic
@@ -296,6 +296,16 @@ trx_is_active(
return(FALSE);
}
+ if (ut_dulint_cmp(trx_id, trx_sys->max_trx_id) >= 0) {
+
+ /* There must be corruption: we return TRUE because this
+ function is only called by lock_clust_rec_some_has_impl()
+ and row_vers_impl_x_locked_off_kernel() and they have
+ diagnostic prints in this case */
+
+ return(TRUE);
+ }
+
trx = trx_get_on_id(trx_id);
if (trx && (trx->conc_state == TRX_ACTIVE)) {
diff --git a/innobase/include/trx0trx.h b/innobase/include/trx0trx.h
index e1f65e9da0f..6b08b674db8 100644
--- a/innobase/include/trx0trx.h
+++ b/innobase/include/trx0trx.h
@@ -118,6 +118,14 @@ trx_start_if_not_started(
/*=====================*/
trx_t* trx); /* in: transaction */
/*****************************************************************
+Starts the transaction if it is not yet started. Assumes we have reserved
+the kernel mutex! */
+UNIV_INLINE
+void
+trx_start_if_not_started_low(
+/*=========================*/
+ trx_t* trx); /* in: transaction */
+/*****************************************************************
Starts the transaction if it is not yet started. */
void
@@ -149,6 +157,15 @@ trx_commit_for_mysql(
/* out: 0 or error number */
trx_t* trx); /* in: trx handle */
/**************************************************************************
+If required, flushes the log to disk if we called trx_commit_for_mysql()
+with trx->flush_log_later == TRUE. */
+
+ulint
+trx_commit_complete_for_mysql(
+/*==========================*/
+ /* out: 0 or error number */
+ trx_t* trx); /* in: trx handle */
+/**************************************************************************
Marks the latest SQL statement ended. */
void
@@ -319,6 +336,7 @@ struct trx_struct{
time_t start_time; /* time the trx object was created
or the state last time became
TRX_ACTIVE */
+ ulint isolation_level;/* TRX_ISO_REPEATABLE_READ, ... */
ibool check_foreigns; /* normally TRUE, but if the user
wants to suppress foreign key checks,
(in table imports, for example) we
@@ -334,6 +352,11 @@ struct trx_struct{
dulint no; /* transaction serialization number ==
max trx id when the transaction is
moved to COMMITTED_IN_MEMORY state */
+ ibool flush_log_later;/* when we commit the transaction
+ in MySQL's binlog write, we will
+ flush the log to disk later in
+ a separate call */
+ dulint commit_lsn; /* lsn at the time of the commit */
ibool dict_operation; /* TRUE if the trx is used to create
a table, create an index, or drop a
table */
@@ -342,6 +365,9 @@ struct trx_struct{
/*------------------------------*/
void* mysql_thd; /* MySQL thread handle corresponding
to this trx, or NULL */
+ char** mysql_query_str;/* pointer to the field in mysqld_thd
+ which contains the pointer to the
+ current SQL query string */
char* mysql_log_file_name;
/* if MySQL binlog is used, this field
contains a pointer to the latest file
@@ -355,7 +381,8 @@ struct trx_struct{
replication slave, we have here the
master binlog name up to which
replication has processed; otherwise
- this is a pointer to a null character */
+ this is a pointer to a null
+ character */
ib_longlong mysql_master_log_pos;
/* if the database server is a MySQL
replication slave, this is the
@@ -363,6 +390,9 @@ struct trx_struct{
replication has processed */
os_thread_id_t mysql_thread_id;/* id of the MySQL thread associated
with this transaction object */
+ ulint mysql_process_no;/* since in Linux, 'top' reports
+ process id's and not thread id's, we
+ store the process number too */
/*------------------------------*/
ulint n_mysql_tables_in_use; /* number of Innobase tables
used in the processing of the current
@@ -371,9 +401,10 @@ struct trx_struct{
/* how many tables the current SQL
statement uses, except those
in consistent read */
- ibool has_dict_foreign_key_check_lock;
- /* TRUE if the trx currently holds
- an s-lock on dict_foreign_... */
+ ibool dict_operation_lock_mode;
+ /* 0, RW_S_LATCH, or RW_X_LATCH:
+ the latch mode trx currently holds
+ on dict_operation_lock */
ibool has_search_latch;
/* TRUE if this trx has latched the
search system latch in S-mode */
@@ -402,46 +433,17 @@ struct trx_struct{
lock_t* auto_inc_lock; /* possible auto-inc lock reserved by
the transaction; note that it is also
in the lock list trx_locks */
- ibool ignore_duplicates_in_insert;
- /* in an insert roll back only insert
- of the latest row in case
- of a duplicate key error */
UT_LIST_NODE_T(trx_t)
trx_list; /* list of transactions */
UT_LIST_NODE_T(trx_t)
mysql_trx_list; /* list of transactions created for
MySQL */
/*------------------------------*/
- mutex_t undo_mutex; /* mutex protecting the fields in this
- section (down to undo_no_arr), EXCEPT
- last_sql_stat_start, which can be
- accessed only when we know that there
- cannot be any activity in the undo
- logs! */
- dulint undo_no; /* next undo log record number to
- assign */
- trx_savept_t last_sql_stat_start;
- /* undo_no when the last sql statement
- was started: in case of an error, trx
- is rolled back down to this undo
- number; see note at undo_mutex! */
- trx_rseg_t* rseg; /* rollback segment assigned to the
- transaction, or NULL if not assigned
- yet */
- trx_undo_t* insert_undo; /* pointer to the insert undo log, or
- NULL if no inserts performed yet */
- trx_undo_t* update_undo; /* pointer to the update undo log, or
- NULL if no update performed yet */
- dulint roll_limit; /* least undo number to undo during
- a rollback */
- ulint pages_undone; /* number of undo log pages undone
- since the last undo log truncation */
- trx_undo_arr_t* undo_no_arr; /* array of undo numbers of undo log
- records which are currently processed
- by a rollback operation */
- /*------------------------------*/
ulint error_state; /* 0 if no error, otherwise error
- number */
+ number; NOTE That ONLY the thread
+ doing the transaction is allowed to
+ set this field: this is NOT protected
+ by the kernel mutex */
void* error_info; /* if the error number indicates a
duplicate key error, a pointer to
the problematic index is stored here */
@@ -478,6 +480,12 @@ struct trx_struct{
TRX_QUE_LOCK_WAIT, this points to
the lock request, otherwise this is
NULL */
+ ibool was_chosen_as_deadlock_victim;
+ /* when the transaction decides to wait
+ for a lock, this it sets this to FALSE;
+ if another transaction chooses this
+ transaction as a victim in deadlock
+ resolution, it sets this to TRUE */
time_t wait_started; /* lock wait started at this time */
UT_LIST_BASE_NODE_T(que_thr_t)
wait_thrs; /* query threads belonging to this
@@ -493,6 +501,38 @@ struct trx_struct{
/*------------------------------*/
mem_heap_t* read_view_heap; /* memory heap for the read view */
read_view_t* read_view; /* consistent read view or NULL */
+ /*------------------------------*/
+ UT_LIST_BASE_NODE_T(trx_named_savept_t)
+ trx_savepoints; /* savepoints set with SAVEPOINT ...,
+ oldest first */
+ /*------------------------------*/
+ mutex_t undo_mutex; /* mutex protecting the fields in this
+ section (down to undo_no_arr), EXCEPT
+ last_sql_stat_start, which can be
+ accessed only when we know that there
+ cannot be any activity in the undo
+ logs! */
+ dulint undo_no; /* next undo log record number to
+ assign */
+ trx_savept_t last_sql_stat_start;
+ /* undo_no when the last sql statement
+ was started: in case of an error, trx
+ is rolled back down to this undo
+ number; see note at undo_mutex! */
+ trx_rseg_t* rseg; /* rollback segment assigned to the
+ transaction, or NULL if not assigned
+ yet */
+ trx_undo_t* insert_undo; /* pointer to the insert undo log, or
+ NULL if no inserts performed yet */
+ trx_undo_t* update_undo; /* pointer to the update undo log, or
+ NULL if no update performed yet */
+ dulint roll_limit; /* least undo number to undo during
+ a rollback */
+ ulint pages_undone; /* number of undo log pages undone
+ since the last undo log truncation */
+ trx_undo_arr_t* undo_no_arr; /* array of undo numbers of undo log
+ records which are currently processed
+ by a rollback operation */
};
#define TRX_MAX_N_THREADS 32 /* maximum number of concurrent
@@ -515,6 +555,41 @@ struct trx_struct{
#define TRX_QUE_ROLLING_BACK 3 /* transaction is rolling back */
#define TRX_QUE_COMMITTING 4 /* transaction is committing */
+/* Transaction isolation levels */
+#define TRX_ISO_READ_UNCOMMITTED 1 /* dirty read: non-locking
+ SELECTs are performed so that
+ we do not look at a possible
+ earlier version of a record;
+ thus they are not 'consistent'
+ reads under this isolation
+ level; otherwise like level
+ 2 */
+
+#define TRX_ISO_READ_COMMITTED 2 /* somewhat Oracle-like
+ isolation, except that in
+ range UPDATE and DELETE we
+ must block phantom rows
+ with next-key locks;
+ SELECT ... FOR UPDATE and ...
+ LOCK IN SHARE MODE only lock
+ the index records, NOT the
+ gaps before them, and thus
+ allow free inserting;
+ each consistent read reads its
+ own snapshot */
+
+#define TRX_ISO_REPEATABLE_READ 3 /* this is the default;
+ all consistent reads in the
+ same trx read the same
+ snapshot;
+ full next-key locking used
+ in locking reads to block
+ insertions into gaps */
+
+#define TRX_ISO_SERIALIZABLE 4 /* all plain SELECTs are
+ converted to LOCK IN SHARE
+ MODE reads */
+
/* Types of a trx signal */
#define TRX_SIG_NO_SIGNAL 100
#define TRX_SIG_TOTAL_ROLLBACK 1
diff --git a/innobase/include/trx0trx.ic b/innobase/include/trx0trx.ic
index 9d453047600..78e5acda148 100644
--- a/innobase/include/trx0trx.ic
+++ b/innobase/include/trx0trx.ic
@@ -21,3 +21,22 @@ trx_start_if_not_started(
trx_start(trx, ULINT_UNDEFINED);
}
}
+
+/*****************************************************************
+Starts the transaction if it is not yet started. Assumes we have reserved
+the kernel mutex! */
+UNIV_INLINE
+void
+trx_start_if_not_started_low(
+/*=========================*/
+ trx_t* trx) /* in: transaction */
+{
+ ut_ad(trx->conc_state != TRX_COMMITTED_IN_MEMORY);
+
+ if (trx->conc_state == TRX_NOT_STARTED) {
+
+ trx_start_low(trx, ULINT_UNDEFINED);
+ }
+}
+
+
diff --git a/innobase/include/trx0types.h b/innobase/include/trx0types.h
index b8befe7172f..2965eb4451f 100644
--- a/innobase/include/trx0types.h
+++ b/innobase/include/trx0types.h
@@ -24,6 +24,7 @@ typedef struct trx_undo_inf_struct trx_undo_inf_t;
typedef struct trx_purge_struct trx_purge_t;
typedef struct roll_node_struct roll_node_t;
typedef struct commit_node_struct commit_node_t;
+typedef struct trx_named_savept_struct trx_named_savept_t;
/* Transaction savepoint */
typedef struct trx_savept_struct trx_savept_t;
diff --git a/innobase/include/univ.i b/innobase/include/univ.i
index b511ec044a2..4854e5a7b78 100644
--- a/innobase/include/univ.i
+++ b/innobase/include/univ.i
@@ -9,7 +9,8 @@ Created 1/20/1994 Heikki Tuuri
#ifndef univ_i
#define univ_i
-#if (defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64)) && !defined(MYSQL_SERVER)
+#if (defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64)) && !defined(MYSQL_SERVER) && !defined(__WIN__)
+#undef __WIN__
#define __WIN__
#include <windows.h>
@@ -29,7 +30,7 @@ Created 1/20/1994 Heikki Tuuri
in compiling more Posix-compatible. These headers also define __WIN__
if we are compiling on Windows. */
-#include <global.h>
+#include <my_global.h>
#include <my_pthread.h>
/* Include <sys/stat.h> to get S_I... macros defined for os0file.c */
@@ -56,6 +57,7 @@ of the 32-bit x86 assembler in mutex operations. */
Microsoft Visual C++ */
#if !defined(__GNUC__) && !defined(__WIN__)
+#undef UNIV_MUST_NOT_INLINE /* Remove compiler warning */
#define UNIV_MUST_NOT_INLINE
#endif
@@ -98,6 +100,15 @@ memory is read outside the allocated blocks. */
#define YYDEBUG 1
+#ifdef HAVE_purify
+/* The following sets all new allocated memory to zero before use:
+this can be used to eliminate unnecessary Purify warnings, but note that
+it also masks many bugs Purify could detect. For detailed Purify analysis it
+is best to remove the define below and look through the warnings one
+by one. */
+#define UNIV_SET_MEM_TO_ZERO
+#endif
+
/*
#define UNIV_SQL_DEBUG
#define UNIV_LOG_DEBUG
@@ -176,7 +187,11 @@ management to ensure correct alignment for doubles etc. */
/* Another basic type we use is unsigned long integer which is intended to be
equal to the word size of the machine. */
+#ifdef _WIN64
+typedef unsigned __int64 ulint;
+#else
typedef unsigned long int ulint;
+#endif
typedef long int lint;
diff --git a/innobase/include/ut0dbg.h b/innobase/include/ut0dbg.h
index 3407483696c..802557099fc 100644
--- a/innobase/include/ut0dbg.h
+++ b/innobase/include/ut0dbg.h
@@ -20,7 +20,6 @@ extern ibool ut_dbg_stop_threads;
extern ulint* ut_dbg_null_ptr;
-
#define ut_a(EXPR)\
{\
ulint dbg_i;\
@@ -31,8 +30,41 @@ extern ulint* ut_dbg_null_ptr;
" InnoDB: Assertion failure in thread %lu in file %s line %lu\n",\
os_thread_pf(os_thread_get_curr_id()), IB__FILE__,\
(ulint)__LINE__);\
+ fprintf(stderr,\
+ "InnoDB: Failing assertion: " #EXPR);\
fprintf(stderr,\
- "InnoDB: We intentionally generate a memory trap.\n");\
+ "\nInnoDB: We intentionally generate a memory trap.\n");\
+ fprintf(stderr,\
+ "InnoDB: Send a detailed bug report to mysql@lists.mysql.com\n");\
+ ut_dbg_stop_threads = TRUE;\
+ dbg_i = *(ut_dbg_null_ptr);\
+ if (dbg_i) {\
+ ut_dbg_null_ptr = NULL;\
+ }\
+ }\
+ if (ut_dbg_stop_threads) {\
+ fprintf(stderr,\
+ "InnoDB: Thread %lu stopped in file %s line %lu\n",\
+ os_thread_pf(os_thread_get_curr_id()), IB__FILE__, (ulint)__LINE__);\
+ os_thread_sleep(1000000000);\
+ }\
+}
+
+/* This can be used if there are % characters in the assertion formula:
+if we try to printf the formula gcc would complain of illegal print
+format characters */
+#define ut_anp(EXPR)\
+{\
+ ulint dbg_i;\
+\
+ if (!((ulint)(EXPR) + ut_dbg_zero)) {\
+ ut_print_timestamp(stderr);\
+ fprintf(stderr,\
+ " InnoDB: Assertion failure in thread %lu in file %s line %lu\n",\
+ os_thread_pf(os_thread_get_curr_id()), IB__FILE__,\
+ (ulint)__LINE__);\
+ fprintf(stderr,\
+ "\nInnoDB: We intentionally generate a memory trap.\n");\
fprintf(stderr,\
"InnoDB: Send a detailed bug report to mysql@lists.mysql.com\n");\
ut_dbg_stop_threads = TRUE;\
diff --git a/innobase/include/ut0mem.h b/innobase/include/ut0mem.h
index 2d245e5f72f..4e8566eba1b 100644
--- a/innobase/include/ut0mem.h
+++ b/innobase/include/ut0mem.h
@@ -57,7 +57,7 @@ ut_free(
/*====*/
void* ptr); /* in, own: memory block */
/**************************************************************************
-Frees all allocated memory not freed yet. */
+Frees in shutdown all allocated memory not freed yet. */
void
ut_free_all_mem(void);
@@ -69,7 +69,7 @@ ut_strcpy(char* dest, char* sour);
UNIV_INLINE
ulint
-ut_strlen(char* str);
+ut_strlen(const char* str);
UNIV_INLINE
int
diff --git a/innobase/include/ut0mem.ic b/innobase/include/ut0mem.ic
index 7ae9bc8bd74..1049aee8ecc 100644
--- a/innobase/include/ut0mem.ic
+++ b/innobase/include/ut0mem.ic
@@ -36,7 +36,7 @@ ut_strcpy(char* dest, char* sour)
UNIV_INLINE
ulint
-ut_strlen(char* str)
+ut_strlen(const char* str)
{
return(strlen(str));
}