diff options
Diffstat (limited to 'storage/xtradb/include')
24 files changed, 643 insertions, 97 deletions
diff --git a/storage/xtradb/include/btr0cur.h b/storage/xtradb/include/btr0cur.h index 8a35cb1a3da..4ed66e76fe0 100644 --- a/storage/xtradb/include/btr0cur.h +++ b/storage/xtradb/include/btr0cur.h @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1994, 2013, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 1994, 2014, Oracle and/or its affiliates. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -582,6 +582,17 @@ void btr_estimate_number_of_different_key_vals( /*======================================*/ dict_index_t* index); /*!< in: index */ + +/** Gets the externally stored size of a record, in units of a database page. +@param[in] rec record +@param[in] offsets array returned by rec_get_offsets() +@return externally stored part, in units of a database page */ + +ulint +btr_rec_get_externally_stored_len( + const rec_t* rec, + const ulint* offsets); + /*******************************************************************//** Marks non-updated off-page fields as disowned by this record. The ownership must be transferred to the updated record which is inserted elsewhere in the diff --git a/storage/xtradb/include/buf0buf.h b/storage/xtradb/include/buf0buf.h index d5cc03d8c86..84db9cfcf2b 100644 --- a/storage/xtradb/include/buf0buf.h +++ b/storage/xtradb/include/buf0buf.h @@ -273,6 +273,15 @@ buf_pool_get_oldest_modification(void); /*==================================*/ /********************************************************************//** +Gets the smallest oldest_modification lsn for any page in the pool. Returns +zero if all modified pages have been flushed to disk. +@return oldest modification in pool, zero if none */ +UNIV_INTERN +lsn_t +buf_pool_get_oldest_modification_peek(void); +/*=======================================*/ + +/********************************************************************//** Allocates a buf_page_t descriptor. This function must succeed. In case of failure we assert in this function. */ UNIV_INLINE @@ -437,7 +446,7 @@ buf_page_create( mtr_t* mtr); /*!< in: mini-transaction handle */ #else /* !UNIV_HOTBACKUP */ /********************************************************************//** -Inits a page to the buffer buf_pool, for use in ibbackup --restore. */ +Inits a page to the buffer buf_pool, for use in mysqlbackup --restore. */ UNIV_INTERN void buf_page_init_for_backup_restore( diff --git a/storage/xtradb/include/buf0buf.ic b/storage/xtradb/include/buf0buf.ic index c49061621f3..10f0e02cb8f 100644 --- a/storage/xtradb/include/buf0buf.ic +++ b/storage/xtradb/include/buf0buf.ic @@ -662,6 +662,11 @@ buf_page_get_block( buf_page_t* bpage) /*!< in: control block, or NULL */ { if (bpage != NULL) { +#ifdef UNIV_DEBUG + buf_pool_t* buf_pool = buf_pool_from_bpage(bpage); + ut_ad(buf_page_hash_lock_held_s_or_x(buf_pool, bpage) + || mutex_own(&buf_pool->LRU_list_mutex)); +#endif ut_ad(buf_page_in_file(bpage)); if (buf_page_get_state(bpage) == BUF_BLOCK_FILE_PAGE) { @@ -1176,12 +1181,6 @@ buf_page_hash_get_low( ut_a(buf_page_in_file(bpage)); ut_ad(bpage->in_page_hash); ut_ad(!bpage->in_zip_hash); -#if UNIV_WORD_SIZE == 4 - /* On 32-bit systems, there is no padding in - buf_page_t. On other systems, Valgrind could complain - about uninitialized pad bytes. */ - UNIV_MEM_ASSERT_RW(bpage, sizeof *bpage); -#endif } return(bpage); diff --git a/storage/xtradb/include/dict0crea.h b/storage/xtradb/include/dict0crea.h index 6ec1079957b..67eab9058da 100644 --- a/storage/xtradb/include/dict0crea.h +++ b/storage/xtradb/include/dict0crea.h @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1996, 2012, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 1996, 2014, Oracle and/or its affiliates. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -124,28 +124,24 @@ dict_create_add_foreign_id( const char* name, /*!< in: table name */ dict_foreign_t* foreign)/*!< in/out: foreign key */ __attribute__((nonnull)); -/********************************************************************//** -Adds foreign key definitions to data dictionary tables in the database. We -look at table->foreign_list, and also generate names to constraints that were -not named by the user. A generated constraint has a name of the format -databasename/tablename_ibfk_NUMBER, where the numbers start from 1, and are -given locally for this table, that is, the number is not global, as in the -old format constraints < 4.0.18 it used to be. -@return error code or DB_SUCCESS */ + +/** Adds the given set of foreign key objects to the dictionary tables +in the database. This function does not modify the dictionary cache. The +caller must ensure that all foreign key objects contain a valid constraint +name in foreign->id. +@param[in] local_fk_set set of foreign key objects, to be added to +the dictionary tables +@param[in] table table to which the foreign key objects in +local_fk_set belong to +@param[in,out] trx transaction +@return error code or DB_SUCCESS */ UNIV_INTERN dberr_t dict_create_add_foreigns_to_dictionary( /*===================================*/ - ulint start_id,/*!< in: if we are actually doing ALTER TABLE - ADD CONSTRAINT, we want to generate constraint - numbers which are bigger than in the table so - far; we number the constraints from - start_id + 1 up; start_id should be set to 0 if - we are creating a new table, or if the table - so far has no constraints for which the name - was generated here */ - dict_table_t* table, /*!< in: table */ - trx_t* trx) /*!< in: transaction */ + const dict_foreign_set& local_fk_set, + const dict_table_t* table, + trx_t* trx) __attribute__((nonnull, warn_unused_result)); /****************************************************************//** Creates the tablespaces and datafiles system tables inside InnoDB diff --git a/storage/xtradb/include/dict0dict.h b/storage/xtradb/include/dict0dict.h index 52ac5eee86b..78503d954ba 100644 --- a/storage/xtradb/include/dict0dict.h +++ b/storage/xtradb/include/dict0dict.h @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1996, 2013, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 1996, 2014, Oracle and/or its affiliates. All Rights Reserved. Copyright (c) 2012, Facebook Inc. Copyright (c) 2013, SkySQL Ab. All Rights Reserved. @@ -46,6 +46,9 @@ Created 1/8/1996 Heikki Tuuri #include "fsp0fsp.h" #include "dict0pagecompress.h" +extern bool innodb_table_stats_not_found; +extern bool innodb_index_stats_not_found; + #ifndef UNIV_HOTBACKUP # include "sync0sync.h" # include "sync0rw.h" @@ -1447,6 +1450,28 @@ UNIV_INTERN void dict_mutex_exit_for_mysql(void); /*===========================*/ + +/** Create a dict_table_t's stats latch or delay for lazy creation. +This function is only called from either single threaded environment +or from a thread that has not shared the table object with other threads. +@param[in,out] table table whose stats latch to create +@param[in] enabled if false then the latch is disabled +and dict_table_stats_lock()/unlock() become noop on this table. */ + +void +dict_table_stats_latch_create( + dict_table_t* table, + bool enabled); + +/** Destroy a dict_table_t's stats latch. +This function is only called from either single threaded environment +or from a thread that has not shared the table object with other threads. +@param[in,out] table table whose stats latch to destroy */ + +void +dict_table_stats_latch_destroy( + dict_table_t* table); + /**********************************************************************//** Lock the appropriate latch to protect a given table's statistics. table->id is used to pick the corresponding latch from a global array of diff --git a/storage/xtradb/include/dict0mem.h b/storage/xtradb/include/dict0mem.h index 8de9206cb81..5bea2334131 100644 --- a/storage/xtradb/include/dict0mem.h +++ b/storage/xtradb/include/dict0mem.h @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1996, 2013, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 1996, 2014, Oracle and/or its affiliates. All Rights Reserved. Copyright (c) 2012, Facebook Inc. Copyright (c) 2013, SkySQL Ab. All Rights Reserved. @@ -50,6 +50,9 @@ Created 1/8/1996 Heikki Tuuri #include "hash0hash.h" #include "trx0types.h" #include "fts0fts.h" +#include "os0once.h" +#include <set> +#include <algorithm> /* Forward declaration. */ struct ib_rbt_t; @@ -695,6 +698,9 @@ struct dict_index_t{ ulint stat_n_leaf_pages; /*!< approximate number of leaf pages in the index tree */ + bool stats_error_printed; + /*!< has persistent statistics error printed + for this index ? */ /* @} */ /** Statistics for defragmentation, these numbers are estimations and could be very inaccurate at certain times, e.g. right after restart, @@ -790,12 +796,106 @@ struct dict_foreign_t{ does not generate new indexes implicitly */ dict_index_t* referenced_index;/*!< referenced index */ - UT_LIST_NODE_T(dict_foreign_t) - foreign_list; /*!< list node for foreign keys of the - table */ - UT_LIST_NODE_T(dict_foreign_t) - referenced_list;/*!< list node for referenced - keys of the table */ +}; + +/** Compare two dict_foreign_t objects using their ids. Used in the ordering +of dict_table_t::foreign_set and dict_table_t::referenced_set. It returns +true if the first argument is considered to go before the second in the +strict weak ordering it defines, and false otherwise. */ +struct dict_foreign_compare { + + bool operator()( + const dict_foreign_t* lhs, + const dict_foreign_t* rhs) const + { + return(ut_strcmp(lhs->id, rhs->id) < 0); + } +}; + +/** A function object to find a foreign key with the given index as the +referenced index. Return the foreign key with matching criteria or NULL */ +struct dict_foreign_with_index { + + dict_foreign_with_index(const dict_index_t* index) + : m_index(index) + {} + + bool operator()(const dict_foreign_t* foreign) const + { + return(foreign->referenced_index == m_index); + } + + const dict_index_t* m_index; +}; + +/* A function object to check if the foreign constraint is between different +tables. Returns true if foreign key constraint is between different tables, +false otherwise. */ +struct dict_foreign_different_tables { + + bool operator()(const dict_foreign_t* foreign) const + { + return(foreign->foreign_table != foreign->referenced_table); + } +}; + +/** A function object to check if the foreign key constraint has the same +name as given. If the full name of the foreign key constraint doesn't match, +then, check if removing the database name from the foreign key constraint +matches. Return true if it matches, false otherwise. */ +struct dict_foreign_matches_id { + + dict_foreign_matches_id(const char* id) + : m_id(id) + {} + + bool operator()(const dict_foreign_t* foreign) const + { + if (0 == innobase_strcasecmp(foreign->id, m_id)) { + return(true); + } + if (const char* pos = strchr(foreign->id, '/')) { + if (0 == innobase_strcasecmp(m_id, pos + 1)) { + return(true); + } + } + return(false); + } + + const char* m_id; +}; + +typedef std::set<dict_foreign_t*, dict_foreign_compare> dict_foreign_set; + +/*********************************************************************//** +Frees a foreign key struct. */ +inline +void +dict_foreign_free( +/*==============*/ + dict_foreign_t* foreign) /*!< in, own: foreign key struct */ +{ + mem_heap_free(foreign->heap); +} + +/** The destructor will free all the foreign key constraints in the set +by calling dict_foreign_free() on each of the foreign key constraints. +This is used to free the allocated memory when a local set goes out +of scope. */ +struct dict_foreign_set_free { + + dict_foreign_set_free(const dict_foreign_set& foreign_set) + : m_foreign_set(foreign_set) + {} + + ~dict_foreign_set_free() + { + std::for_each(m_foreign_set.begin(), + m_foreign_set.end(), + dict_foreign_free); + } + + const dict_foreign_set& m_foreign_set; }; /** The flags for ON_UPDATE and ON_DELETE can be ORed; the default is that @@ -817,6 +917,8 @@ the table, DML from memcached will be blocked. */ /** Data structure for a database table. Most fields will be initialized to 0, NULL or FALSE in dict_mem_table_create(). */ struct dict_table_t{ + + table_id_t id; /*!< id of the table */ mem_heap_t* heap; /*!< memory heap */ char* name; /*!< table name */ @@ -871,13 +973,16 @@ struct dict_table_t{ hash_node_t id_hash; /*!< hash chain node */ UT_LIST_BASE_NODE_T(dict_index_t) indexes; /*!< list of indexes of the table */ - UT_LIST_BASE_NODE_T(dict_foreign_t) - foreign_list;/*!< list of foreign key constraints + + dict_foreign_set foreign_set; + /*!< set of foreign key constraints in the table; these refer to columns in other tables */ - UT_LIST_BASE_NODE_T(dict_foreign_t) - referenced_list;/*!< list of foreign key constraints + + dict_foreign_set referenced_set; + /*!< list of foreign key constraints which refer to this table */ + UT_LIST_NODE_T(dict_table_t) table_LRU; /*!< node of the LRU list of tables */ unsigned fk_max_recusive_level:8; @@ -927,6 +1032,10 @@ struct dict_table_t{ initialized in dict_table_add_to_cache() */ /** Statistics for query optimization */ /* @{ */ + + volatile os_once::state_t stats_latch_created; + /*!< Creation state of 'stats_latch'. */ + rw_lock_t* stats_latch; /*!< this latch protects: dict_table_t::stat_initialized dict_table_t::stat_n_rows (*) @@ -1036,6 +1145,9 @@ struct dict_table_t{ /*!< see BG_STAT_* above. Writes are covered by dict_sys->mutex. Dirty reads are possible. */ + bool stats_error_printed; + /*!< Has persistent stats error beein + already printed for this table ? */ /* @} */ /*----------------------*/ /**!< The following fields are used by the @@ -1116,6 +1228,19 @@ struct dict_table_t{ #endif /* UNIV_DEBUG */ }; +/** A function object to add the foreign key constraint to the referenced set +of the referenced table, if it exists in the dictionary cache. */ +struct dict_foreign_add_to_referenced_table { + void operator()(dict_foreign_t* foreign) const + { + if (dict_table_t* table = foreign->referenced_table) { + std::pair<dict_foreign_set::iterator, bool> ret + = table->referenced_set.insert(foreign); + ut_a(ret.second); + } + } +}; + #ifndef UNIV_NONINL #include "dict0mem.ic" #endif diff --git a/storage/xtradb/include/fil0fil.h b/storage/xtradb/include/fil0fil.h index 3960eef5d7e..c2d113bdc1f 100644 --- a/storage/xtradb/include/fil0fil.h +++ b/storage/xtradb/include/fil0fil.h @@ -50,7 +50,7 @@ struct fil_space_t; typedef std::list<const char*> space_name_list_t; /** When mysqld is run, the default directory "." is the mysqld datadir, -but in the MySQL Embedded Server Library and ibbackup it is not the default +but in the MySQL Embedded Server Library and mysqlbackup it is not the default directory, and we must set the base file path explicitly */ extern const char* fil_path_to_mysql_datadir; @@ -456,8 +456,8 @@ exists and the space id in it matches. Replays the create operation if a file at that path does not exist yet. If the database directory for the file to be created does not exist, then we create the directory, too. -Note that ibbackup --apply-log sets fil_path_to_mysql_datadir to point to the -datadir that we should use in replaying the file operations. +Note that mysqlbackup --apply-log sets fil_path_to_mysql_datadir to point to +the datadir that we should use in replaying the file operations. @return end of log record, or NULL if the record was not completely contained between ptr and end_ptr */ UNIV_INTERN @@ -710,9 +710,9 @@ fil_space_for_table_exists_in_mem( #else /* !UNIV_HOTBACKUP */ /********************************************************************//** Extends all tablespaces to the size stored in the space header. During the -ibbackup --apply-log phase we extended the spaces on-demand so that log records -could be appllied, but that may have left spaces still too small compared to -the size stored in the space header. */ +mysqlbackup --apply-log phase we extended the spaces on-demand so that log +records could be appllied, but that may have left spaces still too small +compared to the size stored in the space header. */ UNIV_INTERN void fil_extend_tablespaces_to_stored_len(void); diff --git a/storage/xtradb/include/fts0ast.h b/storage/xtradb/include/fts0ast.h index c0aac6d8e4c..50ee587e282 100644 --- a/storage/xtradb/include/fts0ast.h +++ b/storage/xtradb/include/fts0ast.h @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 2007, 2013, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 2007, 2014, Oracle and/or its affiliates. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -76,6 +76,7 @@ enum fts_ast_oper_t { struct fts_lexer_t; struct fts_ast_node_t; struct fts_ast_state_t; +struct fts_ast_string_t; typedef dberr_t (*fts_ast_callback)(fts_ast_oper_t, fts_ast_node_t*, void*); @@ -101,16 +102,16 @@ extern fts_ast_node_t* fts_ast_create_node_term( /*=====================*/ - void* arg, /*!< in: ast state */ - const char* ptr); /*!< in: term string */ + void* arg, /*!< in: ast state */ + const fts_ast_string_t* ptr); /*!< in: term string */ /******************************************************************** Create an AST text node */ extern fts_ast_node_t* fts_ast_create_node_text( /*=====================*/ - void* arg, /*!< in: ast state */ - const char* ptr); /*!< in: text string */ + void* arg, /*!< in: ast state */ + const fts_ast_string_t* ptr); /*!< in: text string */ /******************************************************************** Create an AST expr list node */ extern @@ -233,16 +234,66 @@ fts_lexer_free( free */ __attribute__((nonnull)); +/** +Create an ast string object, with NUL-terminator, so the string +has one more byte than len +@param[in] str pointer to string +@param[in] len length of the string +@return ast string with NUL-terminator */ +UNIV_INTERN +fts_ast_string_t* +fts_ast_string_create( + const byte* str, + ulint len); + +/** +Free an ast string instance +@param[in,out] ast_str string to free */ +UNIV_INTERN +void +fts_ast_string_free( + fts_ast_string_t* ast_str); + +/** +Translate ast string of type FTS_AST_NUMB to unsigned long by strtoul +@param[in] str string to translate +@param[in] base the base +@return translated number */ +UNIV_INTERN +ulint +fts_ast_string_to_ul( + const fts_ast_string_t* ast_str, + int base); + +/** +Print the ast string +@param[in] str string to print */ +UNIV_INTERN +void +fts_ast_string_print( + const fts_ast_string_t* ast_str); + +/* String of length len. +We always store the string of length len with a terminating '\0', +regardless of there is any 0x00 in the string itself */ +struct fts_ast_string_t { + /*!< Pointer to string. */ + byte* str; + + /*!< Length of the string. */ + ulint len; +}; + /* Query term type */ struct fts_ast_term_t { - byte* ptr; /*!< Pointer to term string.*/ - ibool wildcard; /*!< TRUE if wild card set.*/ + fts_ast_string_t* ptr; /*!< Pointer to term string.*/ + ibool wildcard; /*!< TRUE if wild card set.*/ }; /* Query text type */ struct fts_ast_text_t { - byte* ptr; /*!< Pointer to term string.*/ - ulint distance; /*!< > 0 if proximity distance + fts_ast_string_t* ptr; /*!< Pointer to text string.*/ + ulint distance; /*!< > 0 if proximity distance set */ }; diff --git a/storage/xtradb/include/fts0fts.h b/storage/xtradb/include/fts0fts.h index 5bea5bc0e97..a2996ecacc8 100644 --- a/storage/xtradb/include/fts0fts.h +++ b/storage/xtradb/include/fts0fts.h @@ -745,6 +745,7 @@ void fts_savepoint_take( /*===============*/ trx_t* trx, /*!< in: transaction */ + fts_trx_t* fts_trx, /*!< in: fts transaction */ const char* name) /*!< in: savepoint name */ __attribute__((nonnull)); /**********************************************************************//** diff --git a/storage/xtradb/include/fts0pars.h b/storage/xtradb/include/fts0pars.h index 50f636944e5..8108e811599 100644 --- a/storage/xtradb/include/fts0pars.h +++ b/storage/xtradb/include/fts0pars.h @@ -53,9 +53,9 @@ typedef union YYSTYPE /* Line 2068 of yacc.c */ #line 61 "fts0pars.y" - int oper; - char* token; - fts_ast_node_t* node; + int oper; + fts_ast_string_t* token; + fts_ast_node_t* node; diff --git a/storage/xtradb/include/lock0lock.h b/storage/xtradb/include/lock0lock.h index 8d5515b5eb5..235b2373c25 100644 --- a/storage/xtradb/include/lock0lock.h +++ b/storage/xtradb/include/lock0lock.h @@ -301,7 +301,7 @@ lock_rec_insert_check_and_lock( inserted record maybe should inherit LOCK_GAP type locks from the successor record */ - __attribute__((nonnull, warn_unused_result)); + __attribute__((nonnull(2,3,4,6,7), warn_unused_result)); /*********************************************************************//** Checks if locks of other transactions prevent an immediate modify (update, delete mark, or delete unmark) of a clustered index record. If they do, diff --git a/storage/xtradb/include/log0log.h b/storage/xtradb/include/log0log.h index b9e0c2ef516..f130c8de423 100644 --- a/storage/xtradb/include/log0log.h +++ b/storage/xtradb/include/log0log.h @@ -168,6 +168,13 @@ lsn_t log_get_lsn(void); /*=============*/ /************************************************************//** +Gets the current lsn. +@return current lsn */ +UNIV_INLINE +lsn_t +log_get_lsn_nowait(void); +/*=============*/ +/************************************************************//** Gets the last lsn that is fully flushed to disk. @return last flushed lsn */ UNIV_INLINE @@ -615,6 +622,27 @@ void log_mem_free(void); /*==============*/ +/****************************************************************//** +Safely reads the log_sys->tracked_lsn value. Uses atomic operations +if available, otherwise this field is protected with the log system +mutex. The writer counterpart function is log_set_tracked_lsn() in +log0online.c. + +@return log_sys->tracked_lsn value. */ +UNIV_INLINE +lsn_t +log_get_tracked_lsn(void); +/*=====================*/ +/****************************************************************//** +Unsafely reads the log_sys->tracked_lsn value. Uses atomic operations +if available, or use dirty read. Use for printing only. + +@return log_sys->tracked_lsn value. */ +UNIV_INLINE +lsn_t +log_get_tracked_lsn_peek(void); +/*==========================*/ + extern log_t* log_sys; /* Values used as flags */ @@ -696,13 +724,13 @@ extern log_t* log_sys; megabyte. This information might have been used - since ibbackup version 0.35 but + since mysqlbackup version 0.35 but before 1.41 to decide if unused ends of non-auto-extending data files in space 0 can be truncated. This information was made obsolete - by ibbackup --compress. */ + by mysqlbackup --compress. */ #define LOG_CHECKPOINT_FSP_MAGIC_N (12 + LOG_CHECKPOINT_ARRAY_END) /*!< Not used (0); This magic number tells if the @@ -731,7 +759,7 @@ extern log_t* log_sys; /* a 32-byte field which contains the string 'ibbackup' and the creation time if the log file was - created by ibbackup --restore; + created by mysqlbackup --restore; when mysqld is first time started on the restored database, it can print helpful info for the user */ diff --git a/storage/xtradb/include/log0log.ic b/storage/xtradb/include/log0log.ic index 7724d94b51a..853027daa7e 100644 --- a/storage/xtradb/include/log0log.ic +++ b/storage/xtradb/include/log0log.ic @@ -486,6 +486,26 @@ log_get_flush_lsn(void) return(lsn); } +/************************************************************//** +Gets the current lsn with a trylock +@return current lsn or 0 if false*/ +UNIV_INLINE +lsn_t +log_get_lsn_nowait(void) +/*=============*/ +{ + lsn_t lsn; + + if (mutex_enter_nowait(&(log_sys->mutex))) + return 0; + + lsn = log_sys->lsn; + + mutex_exit(&(log_sys->mutex)); + + return(lsn); +} + /**************************************************************** Gets the log group capacity. It is OK to read the value without holding log_sys->mutex because it is constant. @@ -531,3 +551,39 @@ log_free_check(void) } } #endif /* !UNIV_HOTBACKUP */ + +/****************************************************************//** +Unsafely reads the log_sys->tracked_lsn value. Uses atomic operations +if available, or use dirty read. Use for printing only. + +@return log_sys->tracked_lsn value. */ +UNIV_INLINE +lsn_t +log_get_tracked_lsn_peek(void) +/*==========================*/ +{ +#ifdef HAVE_ATOMIC_BUILTINS_64 + return os_atomic_increment_uint64(&log_sys->tracked_lsn, 0); +#else + return log_sys->tracked_lsn; +#endif +} + +/****************************************************************//** +Safely reads the log_sys->tracked_lsn value. Uses atomic operations +if available, otherwise this field is protected with the log system +mutex. The writer counterpart function is log_set_tracked_lsn() in +log0online.c. +@return log_sys->tracked_lsn value. */ +UNIV_INLINE +lsn_t +log_get_tracked_lsn(void) +/*=====================*/ +{ +#ifdef HAVE_ATOMIC_BUILTINS_64 + return os_atomic_increment_uint64(&log_sys->tracked_lsn, 0); +#else + ut_ad(mutex_own(&(log_sys->mutex))); + return log_sys->tracked_lsn; +#endif +} diff --git a/storage/xtradb/include/os0file.h b/storage/xtradb/include/os0file.h index 76e77799b43..ba362a0e458 100644 --- a/storage/xtradb/include/os0file.h +++ b/storage/xtradb/include/os0file.h @@ -132,7 +132,7 @@ enum os_file_create_t { #define OS_FILE_READ_ONLY 333 #define OS_FILE_READ_WRITE 444 -#define OS_FILE_READ_ALLOW_DELETE 555 /* for ibbackup */ +#define OS_FILE_READ_ALLOW_DELETE 555 /* for mysqlbackup */ /* Options for file_create */ #define OS_FILE_AIO 61 @@ -168,8 +168,8 @@ enum os_file_create_t { #define OS_FILE_LOG 256 /* This can be ORed to type */ /* @} */ -#define OS_AIO_N_PENDING_IOS_PER_THREAD 256 /*!< Windows might be able to handle -more */ +#define OS_AIO_N_PENDING_IOS_PER_THREAD 32 /*!< Win NT does not allow more + than 64 */ /** Modes for aio operations @{ */ #define OS_AIO_NORMAL 21 /*!< Normal asynchronous i/o not for ibuf diff --git a/storage/xtradb/include/os0once.h b/storage/xtradb/include/os0once.h new file mode 100644 index 00000000000..a8bbaf1d2d4 --- /dev/null +++ b/storage/xtradb/include/os0once.h @@ -0,0 +1,125 @@ +/***************************************************************************** + +Copyright (c) 2014, Oracle and/or its affiliates. All Rights Reserved. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA + +*****************************************************************************/ + +/**************************************************//** +@file include/os0once.h +A class that aids executing a given function exactly once in a multi-threaded +environment. + +Created Feb 20, 2014 Vasil Dimov +*******************************************************/ + +#ifndef os0once_h +#define os0once_h + +#include "univ.i" + +#include "os0sync.h" +#include "ut0ut.h" + +/** Execute a given function exactly once in a multi-threaded environment +or wait for the function to be executed by another thread. + +Example usage: +First the user must create a control variable of type os_once::state_t and +assign it os_once::NEVER_DONE. +Then the user must pass this variable, together with a function to be +executed to os_once::do_or_wait_for_done(). + +Multiple threads can call os_once::do_or_wait_for_done() simultaneously with +the same (os_once::state_t) control variable. The provided function will be +called exactly once and when os_once::do_or_wait_for_done() returns then this +function has completed execution, by this or another thread. In other words +os_once::do_or_wait_for_done() will either execute the provided function or +will wait for its execution to complete if it is already called by another +thread or will do nothing if the function has already completed its execution +earlier. + +This mimics pthread_once(3), but unfortunatelly pthread_once(3) does not +support passing arguments to the init_routine() function. We should use +std::call_once() when we start compiling with C++11 enabled. */ +class os_once { +public: + /** Control variables' state type */ + typedef ib_uint32_t state_t; + + /** Not yet executed. */ + static const state_t NEVER_DONE = 0; + + /** Currently being executed by this or another thread. */ + static const state_t IN_PROGRESS = 1; + + /** Finished execution. */ + static const state_t DONE = 2; + +#ifdef HAVE_ATOMIC_BUILTINS + /** Call a given function or wait its execution to complete if it is + already called by another thread. + @param[in,out] state control variable + @param[in] do_func function to call + @param[in,out] do_func_arg an argument to pass to do_func(). */ + static + void + do_or_wait_for_done( + volatile state_t* state, + void (*do_func)(void*), + void* do_func_arg) + { + /* Avoid calling os_compare_and_swap_uint32() in the most + common case. */ + if (*state == DONE) { + return; + } + + if (os_compare_and_swap_uint32(state, + NEVER_DONE, IN_PROGRESS)) { + /* We are the first. Call the function. */ + + do_func(do_func_arg); + + const bool swapped = os_compare_and_swap_uint32( + state, IN_PROGRESS, DONE); + + ut_a(swapped); + } else { + /* The state is not NEVER_DONE, so either it is + IN_PROGRESS (somebody is calling the function right + now or DONE (it has already been called and completed). + Wait for it to become DONE. */ + for (;;) { + const state_t s = *state; + + switch (s) { + case DONE: + return; + case IN_PROGRESS: + break; + case NEVER_DONE: + /* fall through */ + default: + ut_error; + } + + UT_RELAX_CPU(); + } + } + } +#endif /* HAVE_ATOMIC_BUILTINS */ +}; + +#endif /* os0once_h */ diff --git a/storage/xtradb/include/os0sync.h b/storage/xtradb/include/os0sync.h index ea5d09ec535..066fd34d668 100644 --- a/storage/xtradb/include/os0sync.h +++ b/storage/xtradb/include/os0sync.h @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1995, 2011, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 1995, 2014, Oracle and/or its affiliates. All Rights Reserved. Copyright (c) 2008, Google Inc. Portions of this file contain modifications contributed and copyrighted by @@ -357,6 +357,10 @@ Atomic compare-and-swap and increment for InnoDB. */ # define HAVE_ATOMIC_BUILTINS +# ifdef HAVE_IB_GCC_ATOMIC_BUILTINS_BYTE +# define HAVE_ATOMIC_BUILTINS_BYTE +# endif + # ifdef HAVE_IB_GCC_ATOMIC_BUILTINS_64 # define HAVE_ATOMIC_BUILTINS_64 # endif @@ -434,9 +438,13 @@ Returns the old value of *ptr, atomically sets *ptr to new_val */ # define os_atomic_test_and_set_ulint(ptr, new_val) \ __sync_lock_test_and_set(ptr, new_val) +# define os_atomic_lock_release_byte(ptr) \ + __sync_lock_release(ptr) + #elif defined(HAVE_IB_SOLARIS_ATOMICS) # define HAVE_ATOMIC_BUILTINS +# define HAVE_ATOMIC_BUILTINS_BYTE # define HAVE_ATOMIC_BUILTINS_64 /* If not compiling with GCC or GCC doesn't support the atomic @@ -515,9 +523,13 @@ Returns the old value of *ptr, atomically sets *ptr to new_val */ # define os_atomic_test_and_set_ulint(ptr, new_val) \ atomic_swap_ulong(ptr, new_val) +# define os_atomic_lock_release_byte(ptr) \ + (void) atomic_swap_uchar(ptr, 0) + #elif defined(HAVE_WINDOWS_ATOMICS) # define HAVE_ATOMIC_BUILTINS +# define HAVE_ATOMIC_BUILTINS_BYTE # ifndef _WIN32 # define HAVE_ATOMIC_BUILTINS_64 @@ -574,7 +586,8 @@ Returns true if swapped, ptr is pointer to target, old_val is value to compare to, new_val is the value to swap in. */ # define os_compare_and_swap_uint32(ptr, old_val, new_val) \ - (win_cmp_and_xchg_dword(ptr, new_val, old_val) == old_val) + (InterlockedCompareExchange(reinterpret_cast<volatile long*>(ptr), \ + new_val, old_val) == old_val) # define os_compare_and_swap_ulint(ptr, old_val, new_val) \ (win_cmp_and_xchg_ulint(ptr, new_val, old_val) == old_val) @@ -637,6 +650,9 @@ clobbered */ # define os_atomic_test_and_set_ulong(ptr, new_val) \ InterlockedExchange(ptr, new_val) +# define os_atomic_lock_release_byte(ptr) \ + (void) InterlockedExchange(ptr, 0) + #else # define IB_ATOMICS_STARTUP_MSG \ "Mutexes and rw_locks use InnoDB's own implementation" @@ -684,6 +700,65 @@ for synchronization */ os_decrement_counter_by_amount(mutex, counter, 1);\ } while (0); +/** barrier definitions for memory ordering */ +#if defined __i386__ || defined __x86_64__ || defined _M_IX86 || defined _M_X64 || defined __WIN__ +/* Performance regression was observed at some conditions for Intel +architecture. Disable memory barrier for Intel architecture for now. */ +# define os_rmb do { } while(0) +# define os_wmb do { } while(0) +# define os_isync do { } while(0) +# define IB_MEMORY_BARRIER_STARTUP_MSG \ + "Memory barrier is not used" +#elif defined(HAVE_IB_GCC_ATOMIC_THREAD_FENCE) +# define HAVE_MEMORY_BARRIER +# define os_rmb __atomic_thread_fence(__ATOMIC_ACQUIRE) +# define os_wmb __atomic_thread_fence(__ATOMIC_RELEASE) +#ifdef __powerpc__ +# define os_isync __asm __volatile ("isync":::"memory") +#else +#define os_isync do { } while(0) +#endif + +# define IB_MEMORY_BARRIER_STARTUP_MSG \ + "GCC builtin __atomic_thread_fence() is used for memory barrier" + +#elif defined(HAVE_IB_GCC_SYNC_SYNCHRONISE) +# define HAVE_MEMORY_BARRIER +# define os_rmb __sync_synchronize() +# define os_wmb __sync_synchronize() +# define os_isync __sync_synchronize() +# define IB_MEMORY_BARRIER_STARTUP_MSG \ + "GCC builtin __sync_synchronize() is used for memory barrier" + +#elif defined(HAVE_IB_MACHINE_BARRIER_SOLARIS) +# define HAVE_MEMORY_BARRIER +# include <mbarrier.h> +# define os_rmb __machine_r_barrier() +# define os_wmb __machine_w_barrier() +# define os_isync os_rmb; os_wmb +# define IB_MEMORY_BARRIER_STARTUP_MSG \ + "Solaris memory ordering functions are used for memory barrier" + +#elif defined(HAVE_WINDOWS_MM_FENCE) +# define HAVE_MEMORY_BARRIER +# include <intrin.h> +# define os_rmb _mm_lfence() +# define os_wmb _mm_sfence() +# define os_isync os_rmb; os_wmb +# define IB_MEMORY_BARRIER_STARTUP_MSG \ + "_mm_lfence() and _mm_sfence() are used for memory barrier" + +# define os_atomic_lock_release_byte(ptr) \ + (void) InterlockedExchange(ptr, 0) + +#else +# define os_rmb do { } while(0) +# define os_wmb do { } while(0) +# define os_isync do { } while(0) +# define IB_MEMORY_BARRIER_STARTUP_MSG \ + "Memory barrier is not used" +#endif + #ifndef UNIV_NONINL #include "os0sync.ic" #endif diff --git a/storage/xtradb/include/read0read.h b/storage/xtradb/include/read0read.h index e17d49b1321..0352f129c30 100644 --- a/storage/xtradb/include/read0read.h +++ b/storage/xtradb/include/read0read.h @@ -50,6 +50,27 @@ read_view_open_now( NULL if a new one needs to be created */ /*********************************************************************//** +Clones a read view object. This function will allocate space for two read +views contiguously, one identical in size and content as @param view (starting +at returned pointer) and another view immediately following the trx_ids array. +The second view will have space for an extra trx_id_t element. +@return read view struct */ +UNIV_INTERN +read_view_t* +read_view_clone( +/*============*/ + const read_view_t* view, /*!< in: view to clone */ + read_view_t*& prebuilt_clone);/*!< in,out: prebuilt view or + NULL */ +/*********************************************************************//** +Insert the view in the proper order into the trx_sys->view_list. The +read view list is ordered by read_view_t::low_limit_no in descending order. */ +UNIV_INTERN +void +read_view_add( +/*==========*/ + read_view_t* view); /*!< in: view to add to */ +/*********************************************************************//** Makes a copy of the oldest existing read view, or opens a new. The view must be closed with ..._close. @return own: read view struct */ diff --git a/storage/xtradb/include/srv0srv.h b/storage/xtradb/include/srv0srv.h index aef04d003d5..3ed3ba71698 100644 --- a/storage/xtradb/include/srv0srv.h +++ b/storage/xtradb/include/srv0srv.h @@ -1,9 +1,9 @@ /***************************************************************************** -Copyright (c) 1995, 2013, Oracle and/or its affiliates. All rights reserved. +Copyright (c) 1995, 2013, Oracle and/or its affiliates. Copyright (c) 2008, 2009, Google Inc. Copyright (c) 2009, Percona Inc. -Copyright (c) 2013, SkySQL Ab. All Rights Reserved. +Copyright (c) 2013, 2014, SkySQL Ab. Portions of this file contain modifications contributed and copyrighted by Google, Inc. Those modifications are gratefully acknowledged and are described @@ -183,9 +183,12 @@ extern char srv_disable_sort_file_cache; thread */ extern os_event_t srv_checkpoint_completed_event; -/* This event is set on the online redo log following thread exit to signal -that the (slow) shutdown may proceed */ -extern os_event_t srv_redo_log_thread_finished_event; +/* This event is set on the online redo log following thread after a successful +log tracking iteration */ +extern os_event_t srv_redo_log_tracked_event; + +/** srv_redo_log_follow_thread spawn flag */ +extern bool srv_redo_log_thread_started; /* If the last data file is auto-extended, we add this many pages to it at a time */ @@ -642,6 +645,8 @@ extern srv_stats_t srv_stats; When FALSE, row locks are not taken at all. */ extern my_bool srv_fake_changes_locks; +/** Simulate compression failures. */ +extern uint srv_simulate_comp_failures; # ifdef UNIV_PFS_THREAD /* Keys to register InnoDB threads with performance schema */ diff --git a/storage/xtradb/include/sync0rw.h b/storage/xtradb/include/sync0rw.h index 95bb7e16b26..0ac6b0f3f69 100644 --- a/storage/xtradb/include/sync0rw.h +++ b/storage/xtradb/include/sync0rw.h @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1995, 2011, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 1995, 2014, Oracle and/or its affiliates. All Rights Reserved. Copyright (c) 2008, Google Inc. Portions of this file contain modifications contributed and copyrighted by @@ -109,14 +109,8 @@ extern ib_mutex_t rw_lock_list_mutex; #ifdef UNIV_SYNC_DEBUG /* The global mutex which protects debug info lists of all rw-locks. To modify the debug info list of an rw-lock, this mutex has to be - acquired in addition to the mutex protecting the lock. */ -extern ib_mutex_t rw_lock_debug_mutex; -extern os_event_t rw_lock_debug_event; /*!< If deadlock detection does - not get immediately the mutex it - may wait for this event */ -extern ibool rw_lock_debug_waiters; /*!< This is set to TRUE, if - there may be waiters for the event */ +extern os_fast_mutex_t rw_lock_debug_mutex; #endif /* UNIV_SYNC_DEBUG */ /** Counters for RW locks. */ @@ -142,7 +136,7 @@ extern mysql_pfs_key_t trx_i_s_cache_lock_key; extern mysql_pfs_key_t trx_purge_latch_key; extern mysql_pfs_key_t index_tree_rw_lock_key; extern mysql_pfs_key_t index_online_log_key; -extern mysql_pfs_key_t dict_table_stats_latch_key; +extern mysql_pfs_key_t dict_table_stats_key; extern mysql_pfs_key_t trx_sys_rw_lock_key; extern mysql_pfs_key_t hash_table_rw_lock_key; #endif /* UNIV_PFS_RWLOCK */ diff --git a/storage/xtradb/include/sync0rw.ic b/storage/xtradb/include/sync0rw.ic index 3511987dbb0..8aadc406132 100644 --- a/storage/xtradb/include/sync0rw.ic +++ b/storage/xtradb/include/sync0rw.ic @@ -112,6 +112,7 @@ rw_lock_set_waiter_flag( (void) os_compare_and_swap_ulint(&lock->waiters, 0, 1); #else /* INNODB_RW_LOCKS_USE_ATOMICS */ lock->waiters = 1; + os_wmb; #endif /* INNODB_RW_LOCKS_USE_ATOMICS */ } @@ -129,6 +130,7 @@ rw_lock_reset_waiter_flag( (void) os_compare_and_swap_ulint(&lock->waiters, 1, 0); #else /* INNODB_RW_LOCKS_USE_ATOMICS */ lock->waiters = 0; + os_wmb; #endif /* INNODB_RW_LOCKS_USE_ATOMICS */ } @@ -256,7 +258,10 @@ rw_lock_lock_word_decr( ulint amount) /*!< in: amount to decrement */ { #ifdef INNODB_RW_LOCKS_USE_ATOMICS - lint local_lock_word = lock->lock_word; + lint local_lock_word; + + os_rmb; + local_lock_word = lock->lock_word; while (local_lock_word > 0) { if (os_compare_and_swap_lint(&lock->lock_word, local_lock_word, @@ -620,10 +625,6 @@ rw_lock_s_unlock_func( /* A waiting next-writer exists, either high priority or regular, sharing the same wait event. */ - if (lock->high_priority_wait_ex_waiter) { - - lock->high_priority_wait_ex_waiter = 0; - } os_event_set(lock->base_lock.wait_ex_event); sync_array_object_signalled(); @@ -916,8 +917,9 @@ pfs_rw_lock_x_lock_func( rw_lock_x_lock_func(lock, pass, file_name, line); - if (locker != NULL) + if (locker != NULL) { PSI_RWLOCK_CALL(end_rwlock_wrwait)(locker, 0); + } } else { @@ -1072,8 +1074,9 @@ pfs_rw_lock_s_lock_func( rw_lock_s_lock_func(lock, pass, file_name, line); - if (locker != NULL) + if (locker != NULL) { PSI_RWLOCK_CALL(end_rwlock_rdwait)(locker, 0); + } } else { diff --git a/storage/xtradb/include/sync0sync.h b/storage/xtradb/include/sync0sync.h index 72cfbf61dd8..5ba385ce75f 100644 --- a/storage/xtradb/include/sync0sync.h +++ b/storage/xtradb/include/sync0sync.h @@ -50,6 +50,8 @@ extern "C" my_bool timed_mutexes; #ifdef _WIN32 typedef LONG lock_word_t; /*!< On Windows, InterlockedExchange operates on LONG variable */ +#elif defined(HAVE_ATOMIC_BUILTINS) && !defined(HAVE_ATOMIC_BUILTINS_BYTE) +typedef ulint lock_word_t; #else typedef byte lock_word_t; #endif diff --git a/storage/xtradb/include/sync0sync.ic b/storage/xtradb/include/sync0sync.ic index a302e1473a5..0c4a8ace887 100644 --- a/storage/xtradb/include/sync0sync.ic +++ b/storage/xtradb/include/sync0sync.ic @@ -83,7 +83,11 @@ ib_mutex_test_and_set( ib_mutex_t* mutex) /*!< in: mutex */ { #if defined(HAVE_ATOMIC_BUILTINS) +# if defined(HAVE_ATOMIC_BUILTINS_BYTE) return(os_atomic_test_and_set_byte(&mutex->lock_word, 1)); +# else + return(os_atomic_test_and_set_ulint(&mutex->lock_word, 1)); +# endif #else ibool ret; @@ -95,6 +99,7 @@ ib_mutex_test_and_set( ut_a(mutex->lock_word == 0); mutex->lock_word = 1; + os_wmb; } return((byte) ret); @@ -111,10 +116,7 @@ mutex_reset_lock_word( ib_mutex_t* mutex) /*!< in: mutex */ { #if defined(HAVE_ATOMIC_BUILTINS) - /* In theory __sync_lock_release should be used to release the lock. - Unfortunately, it does not work properly alone. The workaround is - that more conservative __sync_lock_test_and_set is used instead. */ - os_atomic_test_and_set_byte(&mutex->lock_word, 0); + os_atomic_lock_release_byte(&mutex->lock_word); #else mutex->lock_word = 0; @@ -150,6 +152,7 @@ mutex_get_waiters( ptr = &(mutex->waiters); + os_rmb; return(*ptr); /* Here we assume that the read of a single word from memory is atomic */ } @@ -184,6 +187,7 @@ mutex_exit_func( to wake up possible hanging threads if they are missed in mutex_signal_object. */ + os_isync; if (mutex_get_waiters(mutex) != 0) { mutex_signal_object(mutex); diff --git a/storage/xtradb/include/trx0trx.h b/storage/xtradb/include/trx0trx.h index be13c48fdfc..75325d73f4d 100644 --- a/storage/xtradb/include/trx0trx.h +++ b/storage/xtradb/include/trx0trx.h @@ -275,6 +275,17 @@ read_view_t* trx_assign_read_view( /*=================*/ trx_t* trx); /*!< in: active transaction */ +/********************************************************************//** +Clones the read view from another transaction. All the consistent reads within +the receiver transaction will get the same read view as the donor transaction +@return read view clone */ +UNIV_INTERN +read_view_t* +trx_clone_read_view( +/*================*/ + trx_t* trx, /*!< in: receiver transaction */ + trx_t* from_trx) /*!< in: donor transaction */ + __attribute__((nonnull, warn_unused_result)); /****************************************************************//** Prepares a transaction for commit/rollback. */ UNIV_INTERN @@ -1019,6 +1030,11 @@ struct trx_t{ count of tables being flushed. */ /*------------------------------*/ + THD* current_lock_mutex_owner; + /*!< If this is equal to current_thd, + then in innobase_kill_query() we know we + already hold the lock_sys->mutex. */ + /*------------------------------*/ #ifdef UNIV_DEBUG ulint start_line; /*!< Track where it was started from */ const char* start_file; /*!< Filename where it was started */ diff --git a/storage/xtradb/include/univ.i b/storage/xtradb/include/univ.i index 2a3a85c219f..8353b1dcf8a 100644 --- a/storage/xtradb/include/univ.i +++ b/storage/xtradb/include/univ.i @@ -45,10 +45,10 @@ Created 1/20/1994 Heikki Tuuri #define INNODB_VERSION_MAJOR 5 #define INNODB_VERSION_MINOR 6 -#define INNODB_VERSION_BUGFIX 17 +#define INNODB_VERSION_BUGFIX 20 #ifndef PERCONA_INNODB_VERSION -#define PERCONA_INNODB_VERSION 65.0 +#define PERCONA_INNODB_VERSION 68.0 #endif /* Enable UNIV_LOG_ARCHIVE in XtraDB */ @@ -481,10 +481,10 @@ typedef unsigned __int64 ib_uint64_t; typedef unsigned __int32 ib_uint32_t; #else /* Use the integer types and formatting strings defined in the C99 standard. */ -# define UINT32PF "%"PRIu32 -# define INT64PF "%"PRId64 -# define UINT64PF "%"PRIu64 -# define UINT64PFx "%016"PRIx64 +# define UINT32PF "%" PRIu32 +# define INT64PF "%" PRId64 +# define UINT64PF "%" PRIu64 +# define UINT64PFx "%016" PRIx64 # define DBUG_LSN_PF UINT64PF typedef int64_t ib_int64_t; typedef uint64_t ib_uint64_t; |