diff options
Diffstat (limited to 'sql')
106 files changed, 4298 insertions, 2273 deletions
diff --git a/sql/CMakeLists.txt b/sql/CMakeLists.txt index bff01ca817e..b172059373c 100644 --- a/sql/CMakeLists.txt +++ b/sql/CMakeLists.txt @@ -146,7 +146,6 @@ SET (SQL_SOURCE opt_index_cond_pushdown.cc opt_subselect.cc opt_table_elimination.cc sql_expression_cache.cc gcalc_slicescan.cc gcalc_tools.cc - ../sql-common/mysql_async.c my_apc.cc mf_iocache_encr.cc item_jsonfunc.cc my_json_writer.cc rpl_gtid.cc rpl_parallel.cc @@ -179,7 +178,7 @@ IF ((CMAKE_SYSTEM_NAME MATCHES "Linux" OR AND (NOT DISABLE_THREADPOOL)) ADD_DEFINITIONS(-DHAVE_POOL_OF_THREADS) IF(WIN32) - SET(SQL_SOURCE ${SQL_SOURCE} threadpool_win.cc) + SET(SQL_SOURCE ${SQL_SOURCE} threadpool_win.cc threadpool_winsockets.cc threadpool_winsockets.h) ENDIF() SET(SQL_SOURCE ${SQL_SOURCE} threadpool_generic.cc) SET(SQL_SOURCE ${SQL_SOURCE} threadpool_common.cc) @@ -187,7 +186,7 @@ IF ((CMAKE_SYSTEM_NAME MATCHES "Linux" OR ENDIF() IF(WIN32) - SET(SQL_SOURCE ${SQL_SOURCE} handle_connections_win.cc nt_servc.cc) + SET(SQL_SOURCE ${SQL_SOURCE} handle_connections_win.cc) ENDIF() MYSQL_ADD_PLUGIN(partition ha_partition.cc STORAGE_ENGINE DEFAULT STATIC_ONLY @@ -217,7 +216,7 @@ FOREACH(se aria partition perfschema sql_sequence wsrep) ENDFOREACH() IF(WIN32) - SET(MYSQLD_SOURCE main.cc message.rc) + SET(MYSQLD_SOURCE winmain.cc message.rc) ELSE() SET(MYSQLD_SOURCE main.cc ${DTRACE_PROBES_ALL}) ENDIF() @@ -368,7 +367,7 @@ ADD_CUSTOM_COMMAND( DEPENDS gen_lex_hash ) -MYSQL_ADD_EXECUTABLE(mariadb-tzinfo-to-sql tztime.cc COMPONENT Server) +MYSQL_ADD_EXECUTABLE(mariadb-tzinfo-to-sql tztime.cc) SET_TARGET_PROPERTIES(mariadb-tzinfo-to-sql PROPERTIES COMPILE_FLAGS "-DTZINFO2SQL") TARGET_LINK_LIBRARIES(mariadb-tzinfo-to-sql mysys mysys_ssl) @@ -417,7 +416,7 @@ IF(TARGET mariadbd AND (NOT CMAKE_CROSSCOMPILING OR DEFINED CMAKE_CROSSCOMPILING COMMAND ${CMAKE_COMMAND} -E make_directory data COMMAND ${CMAKE_COMMAND} -E chdir data ${CMAKE_COMMAND} ${CONFIG_PARAM} - -DTOP_SRCDIR="${CMAKE_SOURCE_DIR}" + -DTOP_SRCDIR="${CMAKE_BINARY_DIR}" -DBINDIR="${CMAKE_CURRENT_BINARY_DIR}" -DMYSQLD_EXECUTABLE="$<TARGET_FILE:mariadbd>" -DCMAKE_CFG_INTDIR="${CMAKE_CFG_INTDIR}" @@ -444,14 +443,15 @@ IF(WIN32) # Create bootstrapper SQL script ADD_CUSTOM_COMMAND(OUTPUT ${my_bootstrap_sql} - COMMAND ${CMAKE_COMMAND} -E chdir ${CMAKE_SOURCE_DIR}/scripts - cmd /c copy mysql_system_tables.sql+mysql_system_tables_data.sql+fill_help_tables.sql+mysql_performance_tables.sql+mysql_test_db.sql ${native_outfile} + COMMAND ${CMAKE_COMMAND} -E chdir ${CMAKE_BINARY_DIR}/scripts + cmd /c copy mysql_system_tables.sql+mysql_system_tables_data.sql+fill_help_tables.sql+mysql_performance_tables.sql+mysql_test_db.sql+mysql_sys_schema.sql ${native_outfile} DEPENDS ${CMAKE_SOURCE_DIR}/scripts/mysql_system_tables.sql ${CMAKE_SOURCE_DIR}/scripts/mysql_system_tables_data.sql ${CMAKE_SOURCE_DIR}/scripts/fill_help_tables.sql ${CMAKE_SOURCE_DIR}/scripts/mysql_performance_tables.sql ${CMAKE_SOURCE_DIR}/scripts/mysql_test_db.sql + ${CMAKE_BINARY_DIR}/scripts/mysql_sys_schema.sql ) ADD_CUSTOM_COMMAND( diff --git a/sql/event_db_repository.cc b/sql/event_db_repository.cc index a11f7adb9dd..4d3e6ba6ca1 100644 --- a/sql/event_db_repository.cc +++ b/sql/event_db_repository.cc @@ -57,7 +57,7 @@ const TABLE_FIELD_TYPE event_table_fields[ET_FIELD_COUNT] = }, { { STRING_WITH_LEN("definer") }, - { STRING_WITH_LEN("char(") }, + { STRING_WITH_LEN("varchar(") }, { STRING_WITH_LEN("utf8") } }, { diff --git a/sql/filesort.cc b/sql/filesort.cc index 4eea588007e..0337325b544 100644 --- a/sql/filesort.cc +++ b/sql/filesort.cc @@ -251,6 +251,9 @@ SORT_INFO *filesort(THD *thd, TABLE *table, Filesort *filesort, param.init_for_filesort(sort_len, table, max_rows, filesort->sort_positions); + param.set_all_read_bits= filesort->set_all_read_bits; + param.unpack= filesort->unpack; + sort->addon_fields= param.addon_fields; sort->sort_keys= param.sort_keys; @@ -884,6 +887,8 @@ static ha_rows find_all_keys(THD *thd, Sort_param *param, SQL_SELECT *select, goto err; } + if (param->set_all_read_bits) + sort_form->column_bitmaps_set(save_read_set, save_write_set); DEBUG_SYNC(thd, "after_index_merge_phase1"); for (;;) @@ -891,7 +896,11 @@ static ha_rows find_all_keys(THD *thd, Sort_param *param, SQL_SELECT *select, if (quick_select) error= select->quick->get_next(); else /* Not quick-select */ + { error= file->ha_rnd_next(sort_form->record[0]); + if (param->unpack) + param->unpack(sort_form); + } if (unlikely(error)) break; file->position(sort_form->record[0]); diff --git a/sql/filesort.h b/sql/filesort.h index 9f71da02c96..29ae5e20cc6 100644 --- a/sql/filesort.h +++ b/sql/filesort.h @@ -62,6 +62,13 @@ public: Filesort_tracker *tracker; Sort_keys *sort_keys; + /* + TRUE means all the fields of table of whose bitmap read_set is set + need to be read while reading records in the sort buffer. + FALSE otherwise + */ + bool set_all_read_bits; + Filesort(ORDER *order_arg, ha_rows limit_arg, bool sort_positions_arg, SQL_SELECT *select_arg): order(order_arg), @@ -71,7 +78,9 @@ public: own_select(false), using_pq(false), sort_positions(sort_positions_arg), - sort_keys(NULL) + sort_keys(NULL), + set_all_read_bits(FALSE), + unpack(NULL) { DBUG_ASSERT(order); }; @@ -79,6 +88,8 @@ public: ~Filesort() { cleanup(); } /* Prepare ORDER BY list for sorting. */ Sort_keys* make_sortorder(THD *thd, JOIN *join, table_map first_table_bit); + /* Unpack temp table columns to base table columns*/ + void (*unpack)(TABLE *); private: void cleanup(); diff --git a/sql/ha_partition.cc b/sql/ha_partition.cc index 18c6ca5a63c..940a7b05ce7 100644 --- a/sql/ha_partition.cc +++ b/sql/ha_partition.cc @@ -4244,15 +4244,8 @@ int ha_partition::start_stmt(THD *thd, thr_lock_type lock_type) /* Add partition to be called in reset(). */ bitmap_set_bit(&m_partitions_to_reset, i); } - switch (lock_type) - { - case TL_WRITE_ALLOW_WRITE: - case TL_WRITE_CONCURRENT_INSERT: - case TL_WRITE_DELAYED: - case TL_WRITE_DEFAULT: - case TL_WRITE_LOW_PRIORITY: - case TL_WRITE: - case TL_WRITE_ONLY: + if (lock_type >= TL_FIRST_WRITE) + { if (m_part_info->part_expr) m_part_info->part_expr->walk(&Item::register_field_in_read_map, 1, 0); if (m_part_info->part_type == VERSIONING_PARTITION && @@ -4260,7 +4253,6 @@ int ha_partition::start_stmt(THD *thd, thr_lock_type lock_type) thd->lex->sql_command != SQLCOM_SELECT && thd->lex->sql_command != SQLCOM_INSERT_SELECT) m_part_info->vers_set_hist_part(thd); - default:; } DBUG_RETURN(error); } @@ -9183,6 +9175,7 @@ int ha_partition::extra(enum ha_extra_function operation) case HA_EXTRA_STARTING_ORDERED_INDEX_SCAN: case HA_EXTRA_BEGIN_ALTER_COPY: case HA_EXTRA_END_ALTER_COPY: + case HA_EXTRA_IGNORE_INSERT: DBUG_RETURN(loop_partitions(extra_cb, &operation)); default: { diff --git a/sql/handle_connections_win.cc b/sql/handle_connections_win.cc index b61130dd6e9..ec6bb9cb6d0 100644 --- a/sql/handle_connections_win.cc +++ b/sql/handle_connections_win.cc @@ -22,12 +22,12 @@ #include <mswsock.h> #include <mysql/psi/mysql_socket.h> #include <sddl.h> - +#include <vector> #include <handle_connections_win.h> /* From mysqld.cc */ extern HANDLE hEventShutdown; -extern MYSQL_SOCKET base_ip_sock, extra_ip_sock; +extern Dynamic_array<MYSQL_SOCKET> listen_sockets; #ifdef HAVE_POOL_OF_THREADS extern PTP_CALLBACK_ENVIRON get_threadpool_win_callback_environ(); extern void tp_win_callback_prolog(); @@ -129,6 +129,9 @@ struct Socket_Listener: public Listener /** Client socket passed to AcceptEx() call.*/ SOCKET m_client_socket; + /** Listening socket. */ + MYSQL_SOCKET m_listen_socket; + /** Buffer for sockaddrs passed to AcceptEx()/GetAcceptExSockaddrs() */ char m_buffer[2 * sizeof(sockaddr_storage) + 32]; @@ -163,7 +166,8 @@ struct Socket_Listener: public Listener */ Socket_Listener(MYSQL_SOCKET listen_socket, PTP_CALLBACK_ENVIRON callback_environ) : Listener((HANDLE)listen_socket.fd,0), - m_client_socket(INVALID_SOCKET) + m_client_socket(INVALID_SOCKET), + m_listen_socket(listen_socket) { if (callback_environ) { @@ -185,7 +189,8 @@ struct Socket_Listener: public Listener void begin_accept() { retry : - m_client_socket= socket(server_socket_ai_family, SOCK_STREAM, IPPROTO_TCP); + m_client_socket= socket(m_listen_socket.address_family, SOCK_STREAM, + IPPROTO_TCP); if (m_client_socket == INVALID_SOCKET) { sql_perror("socket() call failed."); @@ -234,7 +239,6 @@ retry : } MYSQL_SOCKET s_client{m_client_socket}; - MYSQL_SOCKET s_listen{(SOCKET)m_handle}; #ifdef HAVE_PSI_SOCKET_INTERFACE /* Parse socket addresses buffer filled by AcceptEx(), @@ -247,7 +251,8 @@ retry : &local_addr, &local_addr_len, &remote_addr, &remote_addr_len); s_client.m_psi= PSI_SOCKET_CALL(init_socket) - (key_socket_client_connection, (const my_socket*)&s_listen.fd, remote_addr, remote_addr_len); + (key_socket_client_connection, (const my_socket*)&m_listen_socket.fd, + remote_addr, remote_addr_len); #endif /* Start accepting new connection. After this point, do not use @@ -256,7 +261,7 @@ retry : /* Some chores post-AcceptEx() that we need to create a normal socket.*/ if (setsockopt(s_client.fd, SOL_SOCKET, SO_UPDATE_ACCEPT_CONTEXT, - (char *)&s_listen.fd, sizeof(s_listen.fd))) + (char *)&m_listen_socket.fd, sizeof(m_listen_socket.fd))) { if (!abort_loop) { @@ -266,7 +271,7 @@ retry : } /* Create a new connection.*/ - handle_accepted_socket(s_client, s_listen); + handle_accepted_socket(s_client, m_listen_socket); } ~Socket_Listener() @@ -281,14 +286,12 @@ retry : */ static void init_winsock_extensions() { - SOCKET s= mysql_socket_getfd(base_ip_sock); - if (s == INVALID_SOCKET) - s= mysql_socket_getfd(extra_ip_sock); - if (s == INVALID_SOCKET) - { + if (listen_sockets.size() == 0) { /* --skip-networking was used*/ return; } + + SOCKET s= mysql_socket_getfd(listen_sockets.at(0)); GUID guid_AcceptEx= WSAID_ACCEPTEX; GUID guid_GetAcceptExSockaddrs= WSAID_GETACCEPTEXSOCKADDRS; @@ -510,6 +513,18 @@ struct Pipe_Listener : public Listener } }; + /* The shutdown event, which is set whenever*/ +static void create_shutdown_event() +{ + char shutdown_event_name[40]; + sprintf_s(shutdown_event_name, "MySQLShutdown%u", GetCurrentProcessId()); + if (!(hEventShutdown= CreateEvent(0, FALSE, FALSE, shutdown_event_name))) + { + sql_print_error("Can't create shutdown event, Windows error %u", GetLastError()); + unireg_abort(1); + } +} + /** Accept new client connections on Windows. @@ -529,22 +544,24 @@ struct Pipe_Listener : public Listener */ -#define MAX_WAIT_HANDLES 32 #define NUM_PIPE_LISTENERS 24 #define SHUTDOWN_IDX 0 #define LISTENER_START_IDX 1 -static Listener *all_listeners[MAX_WAIT_HANDLES]; -static HANDLE wait_events[MAX_WAIT_HANDLES]; -static int n_listeners; +static std::vector<Listener *> all_listeners; +static std::vector<HANDLE> wait_events; void network_init_win() { Socket_Listener::init_winsock_extensions(); /* Listen for TCP connections on "extra-port" (no threadpool).*/ - if (extra_ip_sock.fd != INVALID_SOCKET) - all_listeners[n_listeners++]= new Socket_Listener(extra_ip_sock, 0); + for (uint i= 0 ; i < listen_sockets.elements() ; i++) + { + MYSQL_SOCKET *sock= listen_sockets.get_pos(i); + if (sock->is_extra_port) + all_listeners.push_back(new Socket_Listener(*sock, 0)); + } /* Listen for named pipe connections */ if (mysqld_unix_port[0] && !opt_bootstrap && opt_enable_named_pipe) @@ -553,17 +570,22 @@ void network_init_win() Use several listeners for pipe, to reduce ERROR_PIPE_BUSY on client side. */ for (int i= 0; i < NUM_PIPE_LISTENERS; i++) - all_listeners[n_listeners++]= new Pipe_Listener(); + all_listeners.push_back(new Pipe_Listener()); } - if (base_ip_sock.fd != INVALID_SOCKET) + for (uint i= 0 ; i < listen_sockets.elements() ; i++) { - /* Wait for TCP connections.*/ - SetFileCompletionNotificationModes((HANDLE)base_ip_sock.fd, FILE_SKIP_SET_EVENT_ON_HANDLE); - all_listeners[n_listeners++]= new Socket_Listener(base_ip_sock, get_threadpool_win_callback_environ()); + MYSQL_SOCKET *sock= listen_sockets.get_pos(i); + if (sock->is_extra_port) + continue; + /* Wait for TCP connections.*/ + SetFileCompletionNotificationModes((HANDLE) sock->fd, + FILE_SKIP_SET_EVENT_ON_HANDLE); + all_listeners.push_back( + new Socket_Listener(*sock, get_threadpool_win_callback_environ())); } - if (!n_listeners && !opt_bootstrap) + if (all_listeners.size() == 0 && !opt_bootstrap) { sql_print_error("Either TCP connections or named pipe connections must be enabled."); unireg_abort(1); @@ -572,27 +594,44 @@ void network_init_win() void handle_connections_win() { - DBUG_ASSERT(hEventShutdown); int n_waits; - wait_events[SHUTDOWN_IDX]= hEventShutdown; + create_shutdown_event(); + wait_events.push_back(hEventShutdown); n_waits= 1; - for (int i= 0; i < n_listeners; i++) + for (size_t i= 0; i < all_listeners.size(); i++) { HANDLE wait_handle= all_listeners[i]->wait_handle(); if (wait_handle) { DBUG_ASSERT((i == 0) || (all_listeners[i - 1]->wait_handle() != 0)); - wait_events[n_waits++]= wait_handle; + wait_events.push_back(wait_handle); } all_listeners[i]->begin_accept(); } + mysqld_win_set_startup_complete(); + + // WaitForMultipleObjects can't wait on more than MAXIMUM_WAIT_OBJECTS + // handles simultaneously. Since MAXIMUM_WAIT_OBJECTS is only 64, there is + // a theoretical possiblity of exceeding that limit on installations where + // host name resolves to a lot of addresses. + if (wait_events.size() > MAXIMUM_WAIT_OBJECTS) + { + sql_print_warning( + "Too many wait events (%lu). Some connection listeners won't be handled. " + "Try to switch \"thread-handling\" to \"pool-of-threads\" and/or disable " + "\"extra-port\".", static_cast<ulong>(wait_events.size())); + wait_events.resize(MAXIMUM_WAIT_OBJECTS); + } + for (;;) { - DWORD idx = WaitForMultipleObjects(n_waits ,wait_events, FALSE, INFINITE); - DBUG_ASSERT((int)idx >= 0 && (int)idx < n_waits); + DBUG_ASSERT(wait_events.size() <= MAXIMUM_WAIT_OBJECTS); + DWORD idx = WaitForMultipleObjects((DWORD)wait_events.size(), + wait_events.data(), FALSE, INFINITE); + DBUG_ASSERT((int)idx >= 0 && (int)idx < (int)wait_events.size()); if (idx == SHUTDOWN_IDX) break; @@ -600,8 +639,10 @@ void handle_connections_win() all_listeners[idx - LISTENER_START_IDX]->completion_callback(); } + mysqld_win_initiate_shutdown(); + /* Cleanup */ - for (int i= 0; i < n_listeners; i++) + for (size_t i= 0; i < all_listeners.size(); i++) { Listener *listener= all_listeners[i]; if (listener->wait_handle()) diff --git a/sql/handler.cc b/sql/handler.cc index c6031e252a6..985d3c9cc83 100644 --- a/sql/handler.cc +++ b/sql/handler.cc @@ -110,7 +110,7 @@ static handlerton *installed_htons[128]; #define BITMAP_STACKBUF_SIZE (128/8) KEY_CREATE_INFO default_key_create_info= -{ HA_KEY_ALG_UNDEF, 0, 0, {NullS, 0}, {NullS, 0}, true }; +{ HA_KEY_ALG_UNDEF, 0, 0, {NullS, 0}, {NullS, 0}, true, false }; /* number of entries in handlertons[] */ ulong total_ha= 0; @@ -4886,7 +4886,8 @@ handler::check_if_supported_inplace_alter(TABLE *altered_table, ALTER_PARTITIONED | ALTER_VIRTUAL_GCOL_EXPR | ALTER_RENAME | - ALTER_RENAME_INDEX; + ALTER_RENAME_INDEX | + ALTER_INDEX_IGNORABILITY; /* Is there at least one operation that requires copy algorithm? */ if (ha_alter_info->handler_flags & ~inplace_offline_operations) @@ -4934,6 +4935,7 @@ Alter_inplace_info::Alter_inplace_info(HA_CREATE_INFO *create_info_arg, index_drop_buffer(nullptr), index_add_count(0), index_add_buffer(nullptr), + index_altered_ignorability_count(0), rename_keys(current_thd->mem_root), handler_ctx(nullptr), group_commit_ctx(nullptr), diff --git a/sql/handler.h b/sql/handler.h index 81fbd8303cd..3f8f5f5dd23 100644 --- a/sql/handler.h +++ b/sql/handler.h @@ -353,7 +353,10 @@ enum chf_create_flags { */ #define HA_ONLINE_ANALYZE (1ULL << 59) -#define HA_LAST_TABLE_FLAG HA_ONLINE_ANALYZE +/* Implements SELECT ... FOR UPDATE SKIP LOCKED */ +#define HA_CAN_SKIP_LOCKED (1ULL << 60) + +#define HA_LAST_TABLE_FLAG HA_CAN_SKIP_LOCKED /* bits in index_flags(index_number) for what you can do with index */ @@ -539,7 +542,7 @@ enum legacy_db_type DB_TYPE_PERFORMANCE_SCHEMA=28, DB_TYPE_S3=41, DB_TYPE_ARIA=42, - DB_TYPE_TOKUDB=43, + DB_TYPE_TOKUDB=43, /* disabled in MariaDB Server 10.5, removed in 10.6 */ DB_TYPE_SEQUENCE=44, DB_TYPE_FIRST_DYNAMIC=45, DB_TYPE_DEFAULT=127 // Must be last @@ -785,6 +788,12 @@ typedef bool Log_func(THD*, TABLE*, bool, const uchar*, const uchar*); */ #define ALTER_COLUMN_INDEX_LENGTH (1ULL << 60) + +/** + Means that the ignorability of an index is changed. +*/ +#define ALTER_INDEX_IGNORABILITY (1ULL << 61) + /* Flags set in partition_flags when altering partitions */ @@ -2363,6 +2372,26 @@ struct Table_specification_st: public HA_CREATE_INFO, /** + Structure describing changes to an index to be caused by ALTER TABLE. +*/ + +struct KEY_PAIR +{ + /** + Pointer to KEY object describing old version of index in + TABLE::key_info array for TABLE instance representing old + version of table. + */ + KEY *old_key; + /** + Pointer to KEY object describing new version of index in + Alter_inplace_info::key_info_buffer array. + */ + KEY *new_key; +}; + + +/** In-place alter handler context. This is a superclass intended to be subclassed by individual handlers @@ -2461,6 +2490,11 @@ public: */ uint *index_add_buffer; + KEY_PAIR *index_altered_ignorability_buffer; + + /** Size of index_altered_ignorability_buffer array. */ + uint index_altered_ignorability_count; + /** Old and new index names. Used for index rename. */ @@ -2569,6 +2603,18 @@ public: */ void report_unsupported_error(const char *not_supported, const char *try_instead) const; + void add_altered_index_ignorability(KEY *old_key, KEY *new_key) + { + KEY_PAIR *key_pair= index_altered_ignorability_buffer + + index_altered_ignorability_count++; + key_pair->old_key= old_key; + key_pair->new_key= new_key; + DBUG_PRINT("info", ("index had ignorability altered: %i to %i", + old_key->is_ignored, + new_key->is_ignored)); + } + + }; @@ -2585,6 +2631,7 @@ typedef struct st_key_create_information directly by the user (set by the parser). */ bool check_for_duplicate_indexes; + bool is_ignored; } KEY_CREATE_INFO; diff --git a/sql/hostname.cc b/sql/hostname.cc index edf31c11081..fed47e88597 100644 --- a/sql/hostname.cc +++ b/sql/hostname.cc @@ -554,6 +554,13 @@ int ip_to_hostname(struct sockaddr_storage *ip_storage, } ); + DBUG_EXECUTE_IF("getnameinfo_fake_long_host", + { + strcpy(hostname_buffer, "host5678901_345678902_345678903_345678904_345678905_345678906_345678907_345678908_345678909_345678910_345678911_345678912_345678913_345678914_345678915_345678916_345678917_345678918_345678919_345678920_345678921_345678922_345678923_345678924_345678925_345"); + err_code= 0; + } + ); + /* =========================================================================== DEBUG code only (end) diff --git a/sql/item_func.cc b/sql/item_func.cc index b3f60e052ae..f308435eea7 100644 --- a/sql/item_func.cc +++ b/sql/item_func.cc @@ -6323,7 +6323,7 @@ bool Item_func_match::fix_index() { if ((table->key_info[keynr].flags & HA_FULLTEXT) && (flags & FT_BOOL ? table->keys_in_use_for_query.is_set(keynr) : - table->s->keys_in_use.is_set(keynr))) + table->s->usable_indexes(table->in_use).is_set(keynr))) { ft_to_key[fts]=keynr; diff --git a/sql/item_jsonfunc.cc b/sql/item_jsonfunc.cc index 220a3e8de92..370852b2057 100644 --- a/sql/item_jsonfunc.cc +++ b/sql/item_jsonfunc.cc @@ -23,7 +23,7 @@ /* Compare ASCII string against the string with the specified character set. - Only compares the equality, case insencitive. + Only compares the equality, case insensitive. */ static bool eq_ascii_string(const CHARSET_INFO *cs, const char *ascii, @@ -1114,7 +1114,7 @@ static int check_contains(json_engine_t *js, json_engine_t *value) if (value->value_type != JSON_VALUE_STRING) return FALSE; /* - TODO: make proper json-json comparison here that takes excapint + TODO: make proper json-json comparison here that takes excipient into account. */ return value->value_len == js->value_len && @@ -1402,7 +1402,7 @@ longlong Item_func_json_contains_path::val_int() n_found= arg_count - 2; } else - n_found= 0; /* Jost to prevent 'uninitialized value' warnings */ + n_found= 0; /* Just to prevent 'uninitialized value' warnings */ result= 0; while (json_get_path_next(&je, &p) == 0) @@ -3372,7 +3372,7 @@ bool Item_func_json_search::fix_length_and_dec() /* It's rather difficult to estimate the length of the result. - I belive arglen^2 is the reasonable upper limit. + I believe arglen^2 is the reasonable upper limit. */ if (args[0]->max_length > SQR_MAX_BLOB_WIDTH) max_length= MAX_BLOB_WIDTH; diff --git a/sql/item_strfunc.cc b/sql/item_strfunc.cc index 27f00c308d4..3123844865f 100644 --- a/sql/item_strfunc.cc +++ b/sql/item_strfunc.cc @@ -55,7 +55,7 @@ C_MODE_END #include <sql_repl.h> #include "sql_statistics.h" -size_t username_char_length= 80; +size_t username_char_length= USERNAME_CHAR_LENGTH; /* Calculate max length of string from length argument to LEFT and RIGHT diff --git a/sql/item_subselect.cc b/sql/item_subselect.cc index 7fc7f4139e9..d30042a72ef 100644 --- a/sql/item_subselect.cc +++ b/sql/item_subselect.cc @@ -3050,7 +3050,7 @@ bool Item_exists_subselect::exists2in_processor(void *opt_arg) Query_arena *arena= NULL, backup; int res= FALSE; List<Item> outer; - Dynamic_array<EQ_FIELD_OUTER> eqs(5, 5); + Dynamic_array<EQ_FIELD_OUTER> eqs(PSI_INSTRUMENT_MEM, 5, 5); bool will_be_correlated; DBUG_ENTER("Item_exists_subselect::exists2in_processor"); diff --git a/sql/lex.h b/sql/lex.h index 542356c0e43..b40883c4ea3 100644 --- a/sql/lex.h +++ b/sql/lex.h @@ -289,6 +289,7 @@ static SYMBOL symbols[] = { { "IDENTIFIED", SYM(IDENTIFIED_SYM)}, { "IF", SYM(IF_SYM)}, { "IGNORE", SYM(IGNORE_SYM)}, + { "IGNORED", SYM(IGNORED_SYM)}, { "IGNORE_DOMAIN_IDS", SYM(IGNORE_DOMAIN_IDS_SYM)}, { "IGNORE_SERVER_IDS", SYM(IGNORE_SERVER_IDS_SYM)}, { "IMMEDIATE", SYM(IMMEDIATE_SYM)}, @@ -351,6 +352,7 @@ static SYMBOL symbols[] = { { "LOCALTIME", SYM(NOW_SYM)}, { "LOCALTIMESTAMP", SYM(NOW_SYM)}, { "LOCK", SYM(LOCK_SYM)}, + { "LOCKED", SYM(LOCKED_SYM)}, { "LOCKS", SYM(LOCKS_SYM)}, { "LOGFILE", SYM(LOGFILE_SYM)}, { "LOGS", SYM(LOGS_SYM)}, @@ -583,6 +585,7 @@ static SYMBOL symbols[] = { { "SIGNAL", SYM(SIGNAL_SYM)}, { "SIGNED", SYM(SIGNED_SYM)}, { "SIMPLE", SYM(SIMPLE_SYM)}, + { "SKIP", SYM(SKIP_SYM)}, { "SLAVE", SYM(SLAVE)}, { "SLAVES", SYM(SLAVES)}, { "SLAVE_POS", SYM(SLAVE_POS_SYM)}, diff --git a/sql/lock.cc b/sql/lock.cc index 5d502755541..d62a8d49979 100644 --- a/sql/lock.cc +++ b/sql/lock.cc @@ -140,7 +140,7 @@ lock_tables_check(THD *thd, TABLE **tables, uint count, uint flags) or hold any type of lock in a session, since this would be a DOS attack. */ - if ((t->reginfo.lock_type >= TL_READ_NO_INSERT) + if ((t->reginfo.lock_type >= TL_FIRST_WRITE) || (thd->lex->sql_command == SQLCOM_LOCK_TABLES)) { my_error(ER_CANT_LOCK_LOG_TABLE, MYF(0)); @@ -148,7 +148,7 @@ lock_tables_check(THD *thd, TABLE **tables, uint count, uint flags) } } - if (t->reginfo.lock_type >= TL_WRITE_ALLOW_WRITE) + if (t->reginfo.lock_type >= TL_FIRST_WRITE) { if (t->s->table_category == TABLE_CATEGORY_SYSTEM) system_count++; @@ -170,7 +170,7 @@ lock_tables_check(THD *thd, TABLE **tables, uint count, uint flags) DBUG_ASSERT(t->s->tmp_table || thd->mdl_context.is_lock_owner(MDL_key::TABLE, t->s->db.str, t->s->table_name.str, - t->reginfo.lock_type >= TL_WRITE_ALLOW_WRITE ? + t->reginfo.lock_type >= TL_FIRST_WRITE ? MDL_SHARED_WRITE : MDL_SHARED_READ)); /* @@ -179,7 +179,7 @@ lock_tables_check(THD *thd, TABLE **tables, uint count, uint flags) */ if (!(flags & MYSQL_LOCK_IGNORE_GLOBAL_READ_ONLY) && !t->s->tmp_table) { - if (t->reginfo.lock_type >= TL_WRITE_ALLOW_WRITE && + if (t->reginfo.lock_type >= TL_FIRST_WRITE && !ignore_read_only && opt_readonly && !thd->slave_thread) { my_error(ER_OPTION_PREVENTS_STATEMENT, MYF(0), "--read-only"); @@ -387,7 +387,7 @@ static int lock_external(THD *thd, TABLE **tables, uint count) lock_type=F_WRLCK; /* Lock exclusive */ if ((*tables)->db_stat & HA_READ_ONLY || ((*tables)->reginfo.lock_type >= TL_READ && - (*tables)->reginfo.lock_type <= TL_READ_NO_INSERT)) + (*tables)->reginfo.lock_type < TL_FIRST_WRITE)) lock_type=F_RDLCK; if (unlikely((error=(*tables)->file->ha_external_lock(thd,lock_type)))) @@ -481,7 +481,7 @@ int mysql_unlock_read_tables(THD *thd, MYSQL_LOCK *sql_lock) for (i=found=0 ; i < sql_lock->table_count ; i++) { DBUG_ASSERT(sql_lock->table[i]->lock_position == i); - if ((uint) sql_lock->table[i]->reginfo.lock_type > TL_WRITE_ALLOW_WRITE) + if ((uint) sql_lock->table[i]->reginfo.lock_type >= TL_FIRST_WRITE) { swap_variables(TABLE *, *table, sql_lock->table[i]); table++; @@ -501,7 +501,7 @@ int mysql_unlock_read_tables(THD *thd, MYSQL_LOCK *sql_lock) THR_LOCK_DATA **lock=sql_lock->locks; for (i=found=0 ; i < sql_lock->lock_count ; i++) { - if (sql_lock->locks[i]->type >= TL_WRITE_ALLOW_WRITE) + if (sql_lock->locks[i]->type >= TL_FIRST_WRITE) { swap_variables(THR_LOCK_DATA *, *lock, sql_lock->locks[i]); lock++; diff --git a/sql/log.cc b/sql/log.cc index a4351b8c8ce..be94a7f7523 100644 --- a/sql/log.cc +++ b/sql/log.cc @@ -7972,7 +7972,6 @@ MYSQL_BIN_LOG::queue_for_group_commit(group_commit_entry *orig_entry) DBUG_ASSERT(entry != NULL); cur= entry->thd->wait_for_commit_ptr; } - #ifdef WITH_WSREP if (wsrep_is_active(entry->thd) && wsrep_run_commit_hook(entry->thd, entry->all)) @@ -7986,7 +7985,7 @@ MYSQL_BIN_LOG::queue_for_group_commit(group_commit_entry *orig_entry) result= -3; } else - DBUG_ASSERT(result != -2 && result != -3); + DBUG_ASSERT(result == 0); #endif /* WITH_WSREP */ if (opt_binlog_commit_wait_count > 0 && orig_queue != NULL) diff --git a/sql/log_event_server.cc b/sql/log_event_server.cc index 607d5451134..bbb824ef8ee 100644 --- a/sql/log_event_server.cc +++ b/sql/log_event_server.cc @@ -1903,8 +1903,7 @@ int Query_log_event::do_apply_event(rpl_group_info *rgi, thd->variables.sql_log_slow= !MY_TEST(global_system_variables.log_slow_disabled_statements & LOG_SLOW_DISABLE_SLAVE); } - mysql_parse(thd, thd->query(), thd->query_length(), &parser_state, - FALSE, FALSE); + mysql_parse(thd, thd->query(), thd->query_length(), &parser_state); /* Finalize server status flags after executing a statement. */ thd->update_server_status(); log_slow_statement(thd); @@ -7493,13 +7492,21 @@ Write_rows_log_event::do_exec_row(rpl_group_info *rgi) { DBUG_ASSERT(m_table != NULL); const char *tmp= thd->get_proc_info(); - const char *message= "Write_rows_log_event::write_row()"; + LEX_CSTRING tmp_db= thd->db; + char *message, msg[128]; + const char *table_name= m_table->s->table_name.str; + char quote_char= get_quote_char_for_identifier(thd, STRING_WITH_LEN(table_name)); + my_snprintf(msg, sizeof(msg),"Write_rows_log_event::write_row() on table %c%s%c", + quote_char, table_name, quote_char); + thd->reset_db(&m_table->s->db); + message= msg; int error; #ifdef WSREP_PROC_INFO my_snprintf(thd->wsrep_info, sizeof(thd->wsrep_info) - 1, - "Write_rows_log_event::write_row(%lld)", - (long long) wsrep_thd_trx_seqno(thd)); + "Write_rows_log_event::write_row(%lld) on table %c%s%c", + (long long) wsrep_thd_trx_seqno(thd), quote_char, table_name, + quote_char); message= thd->wsrep_info; #endif /* WSREP_PROC_INFO */ @@ -7513,6 +7520,7 @@ Write_rows_log_event::do_exec_row(rpl_group_info *rgi) my_error(ER_UNKNOWN_ERROR, MYF(0)); } + thd->reset_db(&tmp_db); return error; } @@ -8109,14 +8117,22 @@ int Delete_rows_log_event::do_exec_row(rpl_group_info *rgi) { int error; const char *tmp= thd->get_proc_info(); - const char *message= "Delete_rows_log_event::find_row()"; + LEX_CSTRING tmp_db= thd->db; + char *message, msg[128]; + const char *table_name= m_table->s->table_name.str; + char quote_char= get_quote_char_for_identifier(thd, STRING_WITH_LEN(table_name)); + my_snprintf(msg, sizeof(msg),"Delete_rows_log_event::find_row() on table %c%s%c", + quote_char, table_name, quote_char); + thd->reset_db(&m_table->s->db); + message= msg; const bool invoke_triggers= (m_table->triggers && do_invoke_trigger()); DBUG_ASSERT(m_table != NULL); #ifdef WSREP_PROC_INFO my_snprintf(thd->wsrep_info, sizeof(thd->wsrep_info) - 1, - "Delete_rows_log_event::find_row(%lld)", - (long long) wsrep_thd_trx_seqno(thd)); + "Delete_rows_log_event::find_row(%lld) on table %c%s%c", + (long long) wsrep_thd_trx_seqno(thd), quote_char, table_name, + quote_char); message= thd->wsrep_info; #endif /* WSREP_PROC_INFO */ @@ -8126,11 +8142,14 @@ int Delete_rows_log_event::do_exec_row(rpl_group_info *rgi) /* Delete the record found, located in record[0] */ - message= "Delete_rows_log_event::ha_delete_row()"; + my_snprintf(msg, sizeof(msg),"Delete_rows_log_event::ha_delete_row() on table %c%s%c", + quote_char, table_name, quote_char); + message= msg; #ifdef WSREP_PROC_INFO snprintf(thd->wsrep_info, sizeof(thd->wsrep_info) - 1, - "Delete_rows_log_event::ha_delete_row(%lld)", - (long long) wsrep_thd_trx_seqno(thd)); + "Delete_rows_log_event::ha_delete_row(%lld) on table %c%s%c", + (long long) wsrep_thd_trx_seqno(thd), quote_char, table_name, + quote_char); message= thd->wsrep_info; #endif thd_proc_info(thd, message); @@ -8161,6 +8180,7 @@ int Delete_rows_log_event::do_exec_row(rpl_group_info *rgi) error= HA_ERR_GENERIC; // in case if error is not set yet m_table->file->ha_index_or_rnd_end(); } + thd->reset_db(&tmp_db); thd_proc_info(thd, tmp); return error; } @@ -8260,13 +8280,21 @@ Update_rows_log_event::do_exec_row(rpl_group_info *rgi) { const bool invoke_triggers= (m_table->triggers && do_invoke_trigger()); const char *tmp= thd->get_proc_info(); - const char *message= "Update_rows_log_event::find_row()"; DBUG_ASSERT(m_table != NULL); + LEX_CSTRING tmp_db= thd->db; + char *message, msg[128]; + const char *table_name= m_table->s->table_name.str; + char quote_char= get_quote_char_for_identifier(thd, STRING_WITH_LEN(table_name)); + my_snprintf(msg, sizeof(msg),"Update_rows_log_event::find_row() on table %c%s%c", + quote_char, table_name, quote_char); + thd->reset_db(&m_table->s->db); + message= msg; #ifdef WSREP_PROC_INFO my_snprintf(thd->wsrep_info, sizeof(thd->wsrep_info) - 1, - "Update_rows_log_event::find_row(%lld)", - (long long) wsrep_thd_trx_seqno(thd)); + "Update_rows_log_event::find_row(%lld) on table %c%s%c", + (long long) wsrep_thd_trx_seqno(thd), quote_char, table_name, + quote_char); message= thd->wsrep_info; #endif /* WSREP_PROC_INFO */ @@ -8287,6 +8315,7 @@ Update_rows_log_event::do_exec_row(rpl_group_info *rgi) if ((m_curr_row= m_curr_row_end)) unpack_current_row(rgi, &m_cols_ai); thd_proc_info(thd, tmp); + thd->reset_db(&tmp_db); return error; } @@ -8304,11 +8333,14 @@ Update_rows_log_event::do_exec_row(rpl_group_info *rgi) store_record(m_table,record[1]); m_curr_row= m_curr_row_end; - message= "Update_rows_log_event::unpack_current_row()"; + my_snprintf(msg, sizeof(msg),"Update_rows_log_event::unpack_current_row() on table %c%s%c", + quote_char, table_name, quote_char); + message= msg; #ifdef WSREP_PROC_INFO my_snprintf(thd->wsrep_info, sizeof(thd->wsrep_info) - 1, - "Update_rows_log_event::unpack_current_row(%lld)", - (long long) wsrep_thd_trx_seqno(thd)); + "Update_rows_log_event::unpack_current_row(%lld) on table %c%s%c", + (long long) wsrep_thd_trx_seqno(thd), quote_char, table_name, + quote_char); message= thd->wsrep_info; #endif /* WSREP_PROC_INFO */ @@ -8331,11 +8363,13 @@ Update_rows_log_event::do_exec_row(rpl_group_info *rgi) DBUG_DUMP("new values", m_table->record[0], m_table->s->reclength); #endif - message= "Update_rows_log_event::ha_update_row()"; + my_snprintf(msg, sizeof(msg),"Update_rows_log_event::ha_update_row() on table %c%s%c", + quote_char, table_name, quote_char); + message= msg; #ifdef WSREP_PROC_INFO my_snprintf(thd->wsrep_info, sizeof(thd->wsrep_info) - 1, - "Update_rows_log_event::ha_update_row(%lld)", - (long long) wsrep_thd_trx_seqno(thd)); + "Update_rows_log_event::ha_update_row(%lld) on table %c%s%c", + (long long) wsrep_thd_trx_seqno(thd), quote_char, table_name, quote_char); message= thd->wsrep_info; #endif /* WSREP_PROC_INFO */ @@ -8364,9 +8398,10 @@ Update_rows_log_event::do_exec_row(rpl_group_info *rgi) unlikely(process_triggers(TRG_EVENT_UPDATE, TRG_ACTION_AFTER, TRUE))) error= HA_ERR_GENERIC; // in case if error is not set yet - thd_proc_info(thd, tmp); err: + thd_proc_info(thd, tmp); + thd->reset_db(&tmp_db); m_table->file->ha_index_or_rnd_end(); return error; } diff --git a/sql/my_json_writer.h b/sql/my_json_writer.h index bc8002de529..27aec74d08d 100644 --- a/sql/my_json_writer.h +++ b/sql/my_json_writer.h @@ -491,7 +491,7 @@ public: if (my_writer) { add_member("select_id"); - if (unlikely(select_number >= INT_MAX)) + if (unlikely(select_number == FAKE_SELECT_LEX_ID)) context.add_str("fake"); else context.add_ll(static_cast<longlong>(select_number)); diff --git a/sql/mysql_install_db.cc b/sql/mysql_install_db.cc index 35e24a521e4..f712e29b843 100644 --- a/sql/mysql_install_db.cc +++ b/sql/mysql_install_db.cc @@ -26,9 +26,13 @@ #include <shellapi.h> #include <accctrl.h> #include <aclapi.h> +#include <ntsecapi.h> +#include <sddl.h> struct IUnknown; #include <shlwapi.h> +#include <string> + #define USAGETEXT \ "mysql_install_db.exe Ver 1.00 for Windows\n" \ "Copyright (C) 2010-2011 Monty Program Ab & Vladislav Vaintroub\n" \ @@ -39,9 +43,8 @@ struct IUnknown; extern "C" const char* mysql_bootstrap_sql[]; -static char default_os_user[]= "NT AUTHORITY\\NetworkService"; static char default_datadir[MAX_PATH]; -static int create_db_instance(); +static int create_db_instance(const char *datadir); static uint opt_silent; static char datadir_buffer[FN_REFLEN]; static char mysqld_path[FN_REFLEN]; @@ -51,13 +54,13 @@ static char *opt_password; static int opt_port; static int opt_innodb_page_size; static char *opt_socket; -static char *opt_os_user; -static char *opt_os_password; static my_bool opt_default_user; static my_bool opt_allow_remote_root_access; static my_bool opt_skip_networking; static my_bool opt_verbose_bootstrap; static my_bool verbose_errors; +static my_bool opt_large_pages; +static char *opt_config; #define DEFAULT_INNODB_PAGE_SIZE 16*1024 @@ -73,14 +76,14 @@ static struct my_option my_long_options[]= &opt_password, &opt_password, 0, GET_STR, REQUIRED_ARG, 0, 0, 0, 0, 0, 0}, {"port", 'P', "mysql port", &opt_port, &opt_port, 0, GET_INT, REQUIRED_ARG, 0, 0, 0, 0, 0, 0}, - {"socket", 'W', + {"socket", 'W', "named pipe name (if missing, it will be set the same as service)", &opt_socket, &opt_socket, 0, GET_STR, REQUIRED_ARG, 0, 0, 0, 0, 0, 0}, {"default-user", 'D', "Create default user", &opt_default_user, &opt_default_user, 0 , GET_BOOL, OPT_ARG, 0, 0, 0, 0, 0, 0}, - {"allow-remote-root-access", 'R', + {"allow-remote-root-access", 'R', "Allows remote access from network for user root", - &opt_allow_remote_root_access, &opt_allow_remote_root_access, 0 , GET_BOOL, + &opt_allow_remote_root_access, &opt_allow_remote_root_access, 0 , GET_BOOL, OPT_ARG, 0, 0, 0, 0, 0, 0}, {"skip-networking", 'N', "Do not use TCP connections, use pipe instead", &opt_skip_networking, &opt_skip_networking, 0 , GET_BOOL, OPT_ARG, 0, 0, 0, 0, @@ -91,6 +94,10 @@ static struct my_option my_long_options[]= &opt_silent, 0, GET_BOOL, NO_ARG, 0, 0, 0, 0, 0, 0}, {"verbose-bootstrap", 'o', "Include mysqld bootstrap output",&opt_verbose_bootstrap, &opt_verbose_bootstrap, 0, GET_BOOL, NO_ARG, 0, 0, 0, 0, 0, 0}, + { "large-pages",'l', "Use large pages", &opt_large_pages, + &opt_large_pages, 0, GET_BOOL, NO_ARG, 0, 0, 0, 0, 0, 0}, + {"config",'c', "my.ini config template file", &opt_config, + &opt_config, 0, GET_STR, REQUIRED_ARG, 0, 0, 0, 0, 0, 0}, {0, 0, 0, 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0} }; @@ -135,7 +142,7 @@ ATTRIBUTE_NORETURN static void die(const char *fmt, ...) } -static void verbose(const char *fmt, ...) +static void verbose( const char *fmt, ...) { va_list args; @@ -150,15 +157,16 @@ static void verbose(const char *fmt, ...) va_end(args); } +static char full_config_path[MAX_PATH]; int main(int argc, char **argv) { int error; - char self_name[FN_REFLEN]; + char self_name[MAX_PATH]; char *p; - + char *datadir = NULL; MY_INIT(argv[0]); - GetModuleFileName(NULL, self_name, FN_REFLEN); + GetModuleFileName(NULL, self_name, MAX_PATH); strcpy(mysqld_path,self_name); p= strrchr(mysqld_path, FN_LIBCHAR); if (p) @@ -168,7 +176,56 @@ int main(int argc, char **argv) if ((error= handle_options(&argc, &argv, my_long_options, get_one_option))) exit(error); - if (!opt_datadir) + + if (opt_config != 0 && _access(opt_config, 04) != 0) + { + int err= errno; + switch(err) + { + case EACCES: + die("File %s can't be read", opt_config); + break; + case ENOENT: + die("File %s does not exist", opt_config); + break; + default: + die("Can't access file %s, errno %d",opt_config, err); + break; + } + } + if (opt_config) + { + DWORD dwret = GetFullPathName(opt_config, sizeof(full_config_path), full_config_path, NULL); + if (dwret == 0) + { + die("GetFullPathName failed, last error %u", GetLastError()); + } + else if (dwret > sizeof(full_config_path)) + { + die("Can't resolve the config file name, path too large"); + } + opt_config= full_config_path; + } + + if(opt_datadir) + datadir = opt_datadir; + + if (!datadir && opt_config) + { + for(auto section : {"server","mysqld"}) + { + auto ret = GetPrivateProfileStringA(section,"datadir", NULL, default_datadir, + sizeof(default_datadir)-1, opt_config); + if (ret) + { + datadir= default_datadir; + printf("Data directory (from config file) is %s\n",datadir); + break; + } + } + } + + if (!datadir) { /* Figure out default data directory. It "data" directory, next to "bin" directory, where @@ -189,31 +246,32 @@ int main(int argc, char **argv) my_print_help(my_long_options); } strcat_s(default_datadir, "\\data"); - opt_datadir= default_datadir; - printf("Default data directory is %s\n",opt_datadir); + datadir= default_datadir; + printf("Default data directory is %s\n",datadir); } + DBUG_ASSERT(datadir); + /* Print some help on errors */ verbose_errors= TRUE; - if (!opt_os_user) - { - opt_os_user= default_os_user; - opt_os_password= NULL; - } /* Workaround WiX bug (strip possible quote character at the end of path) */ - size_t len= strlen(opt_datadir); + size_t len= strlen(datadir); if (len > 0) { - if (opt_datadir[len-1] == '"') + if (datadir[len-1] == '"') + { + datadir[len-1]= 0; + } + if (datadir[0] == '"') { - opt_datadir[len-1]= 0; + datadir++; } } - GetFullPathName(opt_datadir, FN_REFLEN, datadir_buffer, NULL); - opt_datadir= datadir_buffer; + GetFullPathName(datadir, FN_REFLEN, datadir_buffer, NULL); + datadir= datadir_buffer; - if (create_db_instance()) + if (create_db_instance(datadir)) { die("database creation failed"); } @@ -279,19 +337,37 @@ static char *get_plugindir() static char *init_bootstrap_command_line(char *cmdline, size_t size) { - char basedir[MAX_PATH]; - get_basedir(basedir, sizeof(basedir), mysqld_path); - - my_snprintf(cmdline, size - 1, - "\"\"%s\" --no-defaults %s --innodb-page-size=%d --bootstrap" - " \"--lc-messages-dir=%s/share\"" - " --basedir=. --datadir=. --default-storage-engine=myisam" - " --max_allowed_packet=9M " - " --net-buffer-length=16k\"", mysqld_path, - opt_verbose_bootstrap ? "--console" : "", opt_innodb_page_size, basedir); + snprintf(cmdline, size - 1, + "\"\"%s\"" + " --defaults-file=my.ini" + " %s" + " --bootstrap" + " --datadir=." + " --loose-innodb-buffer-pool-size=10M" + "\"" + , mysqld_path, opt_verbose_bootstrap ? "--console" : ""); return cmdline; } +static char my_ini_path[MAX_PATH]; + +static void write_myini_str(const char *key, const char* val, const char *section="mysqld") +{ + DBUG_ASSERT(my_ini_path[0]); + if (!WritePrivateProfileString(section, key, val, my_ini_path)) + { + die("Can't write to ini file key=%s, val=%s, section=%s, Windows error %u",key,val,section, + GetLastError()); + } +} + + +static void write_myini_int(const char* key, int val, const char* section = "mysqld") +{ + char buf[10]; + itoa(val, buf, 10); + write_myini_str(key, buf, section); +} /** Create my.ini in current directory (this is assumed to be @@ -305,59 +381,63 @@ static int create_myini() char path_buf[MAX_PATH]; GetCurrentDirectory(MAX_PATH, path_buf); - - /* Create ini file. */ - FILE *myini= fopen("my.ini","wt"); - if (!myini) + snprintf(my_ini_path,sizeof(my_ini_path), "%s\\my.ini", path_buf); + if (opt_config) { - die("Can't create my.ini in data directory"); + if (!CopyFile(opt_config, my_ini_path,TRUE)) + { + die("Can't copy %s to my.ini , last error %lu", opt_config, GetLastError()); + } } /* Write out server settings. */ - fprintf(myini, "[mysqld]\n"); convert_slashes(path_buf); - fprintf(myini, "datadir=%s\n", path_buf); + write_myini_str("datadir",path_buf); + if (opt_skip_networking) { - fprintf(myini,"skip-networking\n"); + write_myini_str("skip-networking","ON"); if (!opt_socket) opt_socket= opt_service; } - enable_named_pipe= (my_bool) + enable_named_pipe= (my_bool) ((opt_socket && opt_socket[0]) || opt_skip_networking); if (enable_named_pipe) { - fprintf(myini,"named-pipe=ON\n"); + write_myini_str("named-pipe","ON"); } if (opt_socket && opt_socket[0]) { - fprintf(myini, "socket=%s\n", opt_socket); + write_myini_str("socket", opt_socket); } if (opt_port) { - fprintf(myini,"port=%d\n", opt_port); + write_myini_int("port", opt_port); } if (opt_innodb_page_size != DEFAULT_INNODB_PAGE_SIZE) { - fprintf(myini, "innodb-page-size=%d\n", opt_innodb_page_size); + write_myini_int("innodb-page-size", opt_innodb_page_size); + } + if (opt_large_pages) + { + write_myini_str("large-pages","ON"); } + /* Write out client settings. */ - fprintf(myini, "[client]\n"); /* Used for named pipes */ if (opt_socket && opt_socket[0]) - fprintf(myini,"socket=%s\n",opt_socket); + write_myini_str("socket",opt_socket,"client"); if (opt_skip_networking) - fprintf(myini,"protocol=pipe\n"); + write_myini_str("protocol", "pipe", "client"); else if (opt_port) - fprintf(myini,"port=%d\n",opt_port); + write_myini_int("port",opt_port,"client"); char *plugin_dir = get_plugindir(); if (plugin_dir) - fprintf(myini, "plugin-dir=%s\n", plugin_dir); - fclose(myini); + write_myini_str("plugin-dir", plugin_dir, "client"); return 0; } @@ -380,22 +460,92 @@ static const char allow_remote_root_access_cmd[]= "DROP TABLE tmp_user;\n"; static const char end_of_script[]="-- end."; +/* +Add or remove privilege for a user +@param[in] account_name - user name, Windows style, e.g "NT SERVICE\mariadb", or ".\joe" +@param[in] privilege name - standard Windows privilege name, e.g "SeLockMemoryPrivilege" +@param[in] add - when true, add privilege, otherwise remove it + +In special case where privilege name is NULL, and add is false +all privileges for the user are removed. +*/ +static int handle_user_privileges(const char *account_name, const wchar_t *privilege_name, bool add) +{ + LSA_OBJECT_ATTRIBUTES attr{}; + LSA_HANDLE lsa_handle; + auto status= LsaOpenPolicy( + 0, &attr, POLICY_LOOKUP_NAMES | POLICY_CREATE_ACCOUNT, &lsa_handle); + if (status) + { + verbose("LsaOpenPolicy returned %lu", LsaNtStatusToWinError(status)); + return 1; + } + BYTE sidbuf[SECURITY_MAX_SID_SIZE]; + PSID sid= (PSID) sidbuf; + SID_NAME_USE name_use; + char domain_name[256]; + DWORD cbSid= sizeof(sidbuf); + DWORD cbDomain= sizeof(domain_name); + BOOL ok= LookupAccountNameA(0, account_name, sid, &cbSid, domain_name, + &cbDomain, &name_use); + if (!ok) + { + verbose("LsaOpenPolicy returned %lu", LsaNtStatusToWinError(status)); + return 1; + } + + if (privilege_name) + { + LSA_UNICODE_STRING priv{}; + priv.Buffer= (PWSTR) privilege_name; + priv.Length= (USHORT) wcslen(privilege_name) * sizeof(wchar_t); + priv.MaximumLength= priv.Length; + if (add) + { + status= LsaAddAccountRights(lsa_handle, sid, &priv, 1); + if (status) + { + verbose("LsaAddAccountRights returned %lu/%lu", status, + LsaNtStatusToWinError(status)); + return 1; + } + } + else + { + status= LsaRemoveAccountRights(lsa_handle, sid, FALSE, &priv, 1); + if (status) + { + verbose("LsaRemoveRights returned %lu/%lu", + LsaNtStatusToWinError(status)); + return 1; + } + } + } + else + { + DBUG_ASSERT(!add); + status= LsaRemoveAccountRights(lsa_handle, sid, TRUE, 0, 0); + } + LsaClose(lsa_handle); + return 0; +} + /* Register service. Assume my.ini is in datadir */ -static int register_service() +static int register_service(const char *datadir, const char *user, const char *passwd) { char buf[3*MAX_PATH +32]; /* path to mysqld.exe, to my.ini, service name */ SC_HANDLE sc_manager, sc_service; - size_t datadir_len= strlen(opt_datadir); + size_t datadir_len= strlen(datadir); const char *backslash_after_datadir= "\\"; - if (datadir_len && opt_datadir[datadir_len-1] == '\\') + if (datadir_len && datadir[datadir_len-1] == '\\') backslash_after_datadir= ""; verbose("Registering service '%s'", opt_service); my_snprintf(buf, sizeof(buf)-1, - "\"%s\" \"--defaults-file=%s%smy.ini\" \"%s\"" , mysqld_path, opt_datadir, + "\"%s\" \"--defaults-file=%s%smy.ini\" \"%s\"" , mysqld_path, datadir, backslash_after_datadir, opt_service); /* Get a handle to the SCM database. */ @@ -408,7 +558,7 @@ static int register_service() /* Create the service. */ sc_service= CreateService(sc_manager, opt_service, opt_service, SERVICE_ALL_ACCESS, SERVICE_WIN32_OWN_PROCESS, SERVICE_AUTO_START, - SERVICE_ERROR_NORMAL, buf, NULL, NULL, NULL, opt_os_user, opt_os_password); + SERVICE_ERROR_NORMAL, buf, NULL, NULL, NULL, user, passwd); if (!sc_service) { @@ -549,7 +699,7 @@ static int set_directory_permissions(const char *dir, const char *os_user) /* Create database instance (including registering as service etc) .*/ -static int create_db_instance() +static int create_db_instance(const char *datadir) { int ret= 0; char cwd[MAX_PATH]; @@ -558,6 +708,8 @@ static int create_db_instance() FILE *in; bool created_datadir= false; DWORD last_error; + bool service_created= false; + std::string mysql_db_dir; verbose("Running bootstrap"); @@ -565,7 +717,7 @@ static int create_db_instance() /* Create datadir and datadir/mysql, if they do not already exist. */ - if (CreateDirectory(opt_datadir, NULL)) + if (CreateDirectory(datadir, NULL)) { created_datadir= true; } @@ -576,71 +728,91 @@ static int create_db_instance() { case ERROR_ACCESS_DENIED: die("Can't create data directory '%s' (access denied)\n", - opt_datadir); + datadir); break; case ERROR_PATH_NOT_FOUND: die("Can't create data directory '%s' " "(one or more intermediate directories do not exist)\n", - opt_datadir); + datadir); break; default: die("Can't create data directory '%s', last error %u\n", - opt_datadir, last_error); + datadir, last_error); break; } } - if (!SetCurrentDirectory(opt_datadir)) + if (!SetCurrentDirectory(datadir)) { last_error = GetLastError(); switch (last_error) { case ERROR_DIRECTORY: die("Can't set current directory to '%s', the path is not a valid directory \n", - opt_datadir); + datadir); break; default: die("Can' set current directory to '%s', last error %u\n", - opt_datadir, last_error); + datadir, last_error); break; } } - if (!PathIsDirectoryEmpty(opt_datadir)) + if (!PathIsDirectoryEmpty(datadir)) { - fprintf(stderr,"ERROR : Data directory %s is not empty." - " Only new or empty existing directories are accepted for --datadir\n",opt_datadir); + fprintf(stderr, "ERROR : Data directory %s is not empty." + " Only new or empty existing directories are accepted for --datadir\n", datadir); exit(1); } - if (!CreateDirectory("mysql",NULL)) + std::string service_user; + /* Register service if requested. */ + if (opt_service && opt_service[0]) { - last_error = GetLastError(); - DWORD attributes; - switch(last_error) - { - case ERROR_ACCESS_DENIED: - die("Can't create subdirectory 'mysql' in '%s' (access denied)\n",opt_datadir); - break; - case ERROR_ALREADY_EXISTS: - attributes = GetFileAttributes("mysql"); - - if (attributes == INVALID_FILE_ATTRIBUTES) - die("GetFileAttributes() failed for existing file '%s\\mysql', last error %u", - opt_datadir, GetLastError()); - else if (!(attributes & FILE_ATTRIBUTE_DIRECTORY)) - die("File '%s\\mysql' exists, but it is not a directory", opt_datadir); - - break; - } + /* Run service under virtual account NT SERVICE\service_name.*/ + service_user.append("NT SERVICE\\").append(opt_service); + ret = register_service(datadir, service_user.c_str(), NULL); + if (ret) + goto end; + service_created = true; + } + if (opt_large_pages) + { + handle_user_privileges(service_user.c_str(), L"SeLockMemoryPrivilege", true); + } + /* + Set data directory permissions for both current user and + the one who who runs services. + */ + set_directory_permissions(datadir, NULL); + if (!service_user.empty()) + { + set_directory_permissions(datadir, service_user.c_str()); } /* - Set data directory permissions for both current user and - default_os_user (the one who runs services). + Get security descriptor for the data directory. + It will be passed, as SDDL text, to the mysqld bootstrap subprocess, + to allow for correct subdirectory permissions. */ - set_directory_permissions(opt_datadir, NULL); - set_directory_permissions(opt_datadir, default_os_user); + PSECURITY_DESCRIPTOR pSD; + if (GetNamedSecurityInfoA(datadir, SE_FILE_OBJECT, DACL_SECURITY_INFORMATION, + 0, 0, 0, 0, &pSD) == ERROR_SUCCESS) + { + char* string_sd = NULL; + if (ConvertSecurityDescriptorToStringSecurityDescriptor(pSD, SDDL_REVISION_1, + DACL_SECURITY_INFORMATION, &string_sd, 0)) + { + _putenv_s("MARIADB_NEW_DIRECTORY_SDDL", string_sd); + LocalFree(string_sd); + } + LocalFree(pSD); + } + + /* Create my.ini file in data directory.*/ + ret = create_myini(); + if (ret) + goto end; /* Do mysqld --bootstrap. */ init_bootstrap_command_line(cmdline, sizeof(cmdline)); @@ -656,18 +828,23 @@ static int create_db_instance() { verbose("WARNING: Can't disable buffering on mysqld's stdin"); } - if (fwrite("use mysql;\n",11,1, in) != 1) + static const char *pre_bootstrap_sql[] = { "create database mysql;\n","use mysql;\n"}; + for (auto cmd : pre_bootstrap_sql) { - verbose("ERROR: Can't write to mysqld's stdin"); - ret= 1; - goto end; + /* Write the bootstrap script to stdin. */ + if (fwrite(cmd, strlen(cmd), 1, in) != 1) + { + verbose("ERROR: Can't write to mysqld's stdin"); + ret= 1; + goto end; + } } - int i; - for (i=0; mysql_bootstrap_sql[i]; i++) + for (int i= 0; mysql_bootstrap_sql[i]; i++) { + auto cmd = mysql_bootstrap_sql[i]; /* Write the bootstrap script to stdin. */ - if (fwrite(mysql_bootstrap_sql[i], strlen(mysql_bootstrap_sql[i]), 1, in) != 1) + if (fwrite(cmd, strlen(cmd), 1, in) != 1) { verbose("ERROR: Can't write to mysqld's stdin"); ret= 1; @@ -709,7 +886,7 @@ static int create_db_instance() } /* - On some reason, bootstrap chokes if last command sent via stdin ends with + On some reason, bootstrap chokes if last command sent via stdin ends with newline, so we supply a dummy comment, that does not end with newline. */ fputs(end_of_script, in); @@ -723,25 +900,37 @@ static int create_db_instance() goto end; } +end: + if (!ret) + return ret; - /* Create my.ini file in data directory.*/ - ret= create_myini(); - if (ret) - goto end; - - /* Register service if requested. */ - if (opt_service && opt_service[0]) + /* Cleanup after error.*/ + if (created_datadir) { - ret= register_service(); - if (ret) - goto end; + SetCurrentDirectory(cwd); + clean_directory(datadir); } -end: - if (ret) + if (service_created) { - SetCurrentDirectory(cwd); - clean_directory(opt_datadir); + auto sc_manager = OpenSCManager(NULL, NULL, SC_MANAGER_ALL_ACCESS); + if (sc_manager) + { + auto sc_handle= OpenServiceA(sc_manager,opt_service, DELETE); + if (sc_handle) + { + DeleteService(sc_handle); + CloseServiceHandle(sc_handle); + } + CloseServiceHandle(sc_manager); + } + + /*Remove all service user privileges for the user.*/ + if(strncmp(service_user.c_str(), "NT SERVICE\\", + sizeof("NT SERVICE\\")-1)) + { + handle_user_privileges(service_user.c_str(), 0, false); + } if (created_datadir) RemoveDirectory(opt_datadir); } diff --git a/sql/mysqld.cc b/sql/mysqld.cc index 29aaf389ca1..4a61897856d 100644 --- a/sql/mysqld.cc +++ b/sql/mysqld.cc @@ -117,23 +117,21 @@ #include "sql_reload.h" // reload_acl_and_cache #include "sp_head.h" // init_sp_psi_keys +#include <mysqld_default_groups.h> + #ifdef HAVE_POLL_H #include <poll.h> #endif #ifdef _WIN32 #include <handle_connections_win.h> +#include <sddl.h> #endif #include <my_service_manager.h> #define mysqld_charset &my_charset_latin1 -/* We have HAVE_valgrind below as this speeds up the shutdown of MySQL */ - -#if defined(HAVE_valgrind) && defined(__linux__) -#define HAVE_CLOSE_SERVER_SOCK 1 -#endif extern "C" { // Because of SCO 3.2V4.2 #include <sys/stat.h> @@ -335,7 +333,6 @@ static char *character_set_filesystem_name; static char *lc_messages; static char *lc_time_names_name; char *my_bind_addr_str; -int server_socket_ai_family; static char *default_collation_name; char *default_storage_engine, *default_tmp_storage_engine; char *enforced_storage_engine=NULL; @@ -368,7 +365,7 @@ my_bool locked_in_memory; bool opt_using_transactions; bool volatile abort_loop; uint volatile global_disable_checkpoint; -#if defined(_WIN32) && !defined(EMBEDDED_LIBRARY) +#if defined(_WIN32) ulong slow_start_timeout; #endif static MEM_ROOT startup_root; @@ -1338,7 +1335,12 @@ static Buffered_logs buffered_logs; struct my_rnd_struct sql_rand; ///< used by sql_class.cc:THD::THD() #ifndef EMBEDDED_LIBRARY -MYSQL_SOCKET unix_sock, base_ip_sock, extra_ip_sock; + +Dynamic_array<MYSQL_SOCKET> listen_sockets(PSI_INSTRUMENT_MEM, 0); +bool unix_sock_is_online= false; +static int systemd_sock_activation; /* systemd socket activation */ + + /** Error reporter that buffer log messages. @param level log message level @@ -1390,21 +1392,10 @@ static pthread_t select_thread; /* OS specific variables */ -#ifdef __WIN__ -#undef getpid -#include <process.h> - -static bool start_mode=0, use_opt_args; -static int opt_argc; -static char **opt_argv; - -#if !defined(EMBEDDED_LIBRARY) +#ifdef _WIN32 HANDLE hEventShutdown; -static char shutdown_event_name[40]; -#include "nt_servc.h" -static NTService Service; ///< Service object for WinNT -#endif /* EMBEDDED_LIBRARY */ -#endif /* __WIN__ */ +#endif + #ifndef EMBEDDED_LIBRARY bool mysqld_embedded=0; @@ -1492,7 +1483,6 @@ static int test_if_case_insensitive(const char *dir_name); static bool pid_file_created= false; static void usage(void); static void start_signal_handler(void); -static void close_server_sock(); static void clean_up_mutexes(void); static void wait_for_signal_thread_to_end(void); static void create_pid_file(); @@ -1600,9 +1590,8 @@ static void break_connect_loop() abort_loop= 1; -#if defined(__WIN__) - if (!SetEvent(hEventShutdown)) - DBUG_PRINT("error", ("Got error: %ld from SetEvent", GetLastError())); +#if defined(_WIN32) + mysqld_win_initiate_shutdown(); #else /* Avoid waiting for ourselves when thread-handling=no-threads. */ if (pthread_equal(pthread_self(), select_thread)) @@ -1633,7 +1622,6 @@ static void break_connect_loop() if (error != 0 && error != ETIMEDOUT && !count++) sql_print_error("Got error %d from mysql_cond_timedwait", error); #endif - close_server_sock(); } mysql_mutex_unlock(&LOCK_start_thread); #endif /* __WIN__ */ @@ -1691,28 +1679,20 @@ static void close_connections(void) /* Abort listening to new connections */ DBUG_PRINT("quit",("Closing sockets")); - if (!opt_disable_networking ) + /* Protect against pthread_kill() calling close_server_sock(*) */ + mysql_mutex_lock(&LOCK_start_thread); + for (uint i= 0 ; i < listen_sockets.elements() ; i++) { - if (mysql_socket_getfd(base_ip_sock) != INVALID_SOCKET) - { - (void) mysql_socket_close(base_ip_sock); - base_ip_sock= MYSQL_INVALID_SOCKET; - } - if (mysql_socket_getfd(extra_ip_sock) != INVALID_SOCKET) + MYSQL_SOCKET *sock= listen_sockets.get_pos(i); + (void) mysql_socket_close(*sock); + if (sock->is_unix_domain_socket && !systemd_sock_activation) { - (void) mysql_socket_close(extra_ip_sock); - extra_ip_sock= MYSQL_INVALID_SOCKET; + (void) unlink(mysqld_unix_port); } } + listen_sockets.free_memory(); + mysql_mutex_unlock(&LOCK_start_thread); -#ifdef HAVE_SYS_UN_H - if (mysql_socket_getfd(unix_sock) != INVALID_SOCKET) - { - (void) mysql_socket_close(unix_sock); - (void) unlink(mysqld_unix_port); - unix_sock= MYSQL_INVALID_SOCKET; - } -#endif end_thr_alarm(0); // Abort old alarms. /* @@ -1780,39 +1760,6 @@ static void close_connections(void) DBUG_VOID_RETURN; } - -#ifdef HAVE_CLOSE_SERVER_SOCK -static void close_socket(MYSQL_SOCKET sock, const char *info) -{ - DBUG_ENTER("close_socket"); - - if (mysql_socket_getfd(sock) != INVALID_SOCKET) - { - DBUG_PRINT("info", ("calling shutdown on %s socket", info)); - (void) mysql_socket_shutdown(sock, SHUT_RDWR); - } - DBUG_VOID_RETURN; -} -#endif - - -static void close_server_sock() -{ -#ifdef HAVE_CLOSE_SERVER_SOCK - DBUG_ENTER("close_server_sock"); - - close_socket(base_ip_sock, "TCP/IP"); - close_socket(extra_ip_sock, "TCP/IP"); - close_socket(unix_sock, "unix/IP"); - - if (mysql_socket_getfd(unix_sock) != INVALID_SOCKET) - (void) unlink(mysqld_unix_port); - base_ip_sock= extra_ip_sock= unix_sock= MYSQL_INVALID_SOCKET; - - DBUG_VOID_RETURN; -#endif -} - #endif /*EMBEDDED_LIBRARY*/ @@ -1875,6 +1822,12 @@ extern "C" void unireg_abort(int exit_code) mysqld_exit(exit_code); } +#ifdef _WIN32 +typedef void (*report_svc_status_t)(DWORD current_state, DWORD win32_exit_code, + DWORD wait_hint); +static void dummy_svc_status(DWORD, DWORD, DWORD) {} +static report_svc_status_t my_report_svc_status= dummy_svc_status; +#endif static void mysqld_exit(int exit_code) { @@ -1905,6 +1858,9 @@ static void mysqld_exit(int exit_code) SAFEMALLOC_REPORT_MEMORY(0); } DBUG_LEAVE; +#ifdef _WIN32 + my_report_svc_status(SERVICE_STOPPED, exit_code, 0); +#endif sd_notify(0, "STATUS=MariaDB server is down"); exit(exit_code); /* purecov: inspected */ } @@ -2233,7 +2189,9 @@ static void set_root(const char *path) Activate usage of a tcp port */ -static MYSQL_SOCKET activate_tcp_port(uint port) +static void activate_tcp_port(uint port, + Dynamic_array<MYSQL_SOCKET> *sockets, + bool is_extra_port= false) { struct addrinfo *ai, *a; struct addrinfo hints; @@ -2265,20 +2223,6 @@ static MYSQL_SOCKET activate_tcp_port(uint port) unireg_abort(1); /* purecov: tested */ } - /* - special case: for wildcard addresses prefer ipv6 over ipv4, - because we later switch off IPV6_V6ONLY, so ipv6 wildcard - addresses will work for ipv4 too - */ - if (!real_bind_addr_str && ai->ai_family == AF_INET && ai->ai_next - && ai->ai_next->ai_family == AF_INET6) - { - a= ai; - ai= ai->ai_next; - a->ai_next= ai->ai_next; - ai->ai_next= a; - } - for (a= ai; a != NULL; a= a->ai_next) { ip_sock= mysql_socket_socket(key_socket_tcpip, a->ai_family, @@ -2301,101 +2245,245 @@ static MYSQL_SOCKET activate_tcp_port(uint port) } else { - server_socket_ai_family= a->ai_family; + ip_sock.address_family= a->ai_family; sql_print_information("Server socket created on IP: '%s'.", (const char *) ip_addr); - break; - } - } - if (mysql_socket_getfd(ip_sock) == INVALID_SOCKET) - { - DBUG_PRINT("error",("Got error: %d from socket()",socket_errno)); - sql_perror(ER_DEFAULT(ER_IPSOCK_ERROR)); /* purecov: tested */ - unireg_abort(1); /* purecov: tested */ - } + if (mysql_socket_getfd(ip_sock) == INVALID_SOCKET) + { + DBUG_PRINT("error",("Got error: %d from socket()",socket_errno)); + sql_perror(ER_DEFAULT(ER_IPSOCK_ERROR)); /* purecov: tested */ + unireg_abort(1); /* purecov: tested */ + } - mysql_socket_set_thread_owner(ip_sock); + mysql_socket_set_thread_owner(ip_sock); #ifndef __WIN__ - /* - We should not use SO_REUSEADDR on windows as this would enable a - user to open two mysqld servers with the same TCP/IP port. - */ - arg= 1; - (void) mysql_socket_setsockopt(ip_sock,SOL_SOCKET,SO_REUSEADDR,(char*)&arg, - sizeof(arg)); + /* + We should not use SO_REUSEADDR on windows as this would enable a + user to open two mysqld servers with the same TCP/IP port. + */ + arg= 1; + (void) mysql_socket_setsockopt(ip_sock, SOL_SOCKET, SO_REUSEADDR, + (char*)&arg, sizeof(arg)); #endif /* __WIN__ */ #ifdef IPV6_V6ONLY - /* - For interoperability with older clients, IPv6 socket should - listen on both IPv6 and IPv4 wildcard addresses. - Turn off IPV6_V6ONLY option. - - NOTE: this will work starting from Windows Vista only. - On Windows XP dual stack is not available, so it will not - listen on the corresponding IPv4-address. - */ - if (a->ai_family == AF_INET6) - { - arg= 0; - (void) mysql_socket_setsockopt(ip_sock, IPPROTO_IPV6, IPV6_V6ONLY, - (char*)&arg, sizeof(arg)); - } + /* + If an address name resolves to both IPv4 and IPv6 addresses, the server + will listen on them both. With IPV6_V6ONLY unset, listening on an IPv6 + wildcard address may cause listening on an IPv4 wildcard address + to fail. That's why IPV6_V6ONLY needs to be forcefully turned on. + */ + if (a->ai_family == AF_INET6) + { + arg= 1; + (void) mysql_socket_setsockopt(ip_sock, IPPROTO_IPV6, IPV6_V6ONLY, + (char*)&arg, sizeof(arg)); + } #endif #ifdef IP_FREEBIND - arg= 1; - (void) mysql_socket_setsockopt(ip_sock, IPPROTO_IP, IP_FREEBIND, (char*) &arg, - sizeof(arg)); + arg= 1; + (void) mysql_socket_setsockopt(ip_sock, IPPROTO_IP, IP_FREEBIND, + (char*) &arg, sizeof(arg)); #endif - /* - Sometimes the port is not released fast enough when stopping and - restarting the server. This happens quite often with the test suite - on busy Linux systems. Retry to bind the address at these intervals: - Sleep intervals: 1, 2, 4, 6, 9, 13, 17, 22, ... - Retry at second: 1, 3, 7, 13, 22, 35, 52, 74, ... - Limit the sequence by mysqld_port_timeout (set --port-open-timeout=#). - */ - int ret; - uint waited, retry, this_wait; - for (waited= 0, retry= 1; ; retry++, waited+= this_wait) - { - if (((ret= mysql_socket_bind(ip_sock, a->ai_addr, a->ai_addrlen)) >= 0 ) || - (socket_errno != SOCKET_EADDRINUSE) || - (waited >= mysqld_port_timeout)) - break; - sql_print_information("Retrying bind on TCP/IP port %u", port); - this_wait= retry * retry / 3 + 1; - sleep(this_wait); + /* + Sometimes the port is not released fast enough when stopping and + restarting the server. This happens quite often with the test suite + on busy Linux systems. Retry to bind the address at these intervals: + Sleep intervals: 1, 2, 4, 6, 9, 13, 17, 22, ... + Retry at second: 1, 3, 7, 13, 22, 35, 52, 74, ... + Limit the sequence by mysqld_port_timeout (set --port-open-timeout=#). + */ + int ret; + uint waited, retry, this_wait; + for (waited= 0, retry= 1; ; retry++, waited+= this_wait) + { + if (((ret= mysql_socket_bind(ip_sock, a->ai_addr, a->ai_addrlen)) >= 0 ) + || (socket_errno != SOCKET_EADDRINUSE) + || (waited >= mysqld_port_timeout)) + break; + sql_print_information("Retrying bind on TCP/IP port %u", port); + this_wait= retry * retry / 3 + 1; + sleep(this_wait); + } + + if (ret < 0) + { + char buff[100]; + sprintf(buff, "Can't start server: Bind on TCP/IP port. Got error: %d", + (int) socket_errno); + sql_perror(buff); + sql_print_error("Do you already have another mysqld server running on " + "port: %u ?", port); + unireg_abort(1); + } + if (mysql_socket_listen(ip_sock,(int) back_log) < 0) + { + sql_perror("Can't start server: listen() on TCP/IP port"); + sql_print_error("listen() on TCP/IP failed with error %d", + socket_errno); + unireg_abort(1); + } + +#ifdef FD_CLOEXEC + (void) fcntl(mysql_socket_getfd(ip_sock), F_SETFD, FD_CLOEXEC); +#endif + ip_sock.is_extra_port= is_extra_port; + sockets->push(ip_sock); + } } + freeaddrinfo(ai); - if (ret < 0) - { - char buff[100]; - sprintf(buff, "Can't start server: Bind on TCP/IP port. Got error: %d", - (int) socket_errno); - sql_perror(buff); - sql_print_error("Do you already have another mysqld server running on " - "port: %u ?", port); - unireg_abort(1); - } - if (mysql_socket_listen(ip_sock,(int) back_log) < 0) + DBUG_VOID_RETURN; +} + + +/** + Activate usage of a systemd activated sockets + i.e started by mariadb.socket +*/ + +static void use_systemd_activated_sockets() +{ +#ifndef __linux__ + return; +#else + char **names = NULL; + int sd_sockets; + DBUG_ENTER("use_systemd_activated_sockets"); + + sd_sockets= sd_listen_fds_with_names(0, &names); + + if (!sd_sockets) + DBUG_VOID_RETURN; + + DBUG_PRINT("general",("Systemd listen_fds is %d", sd_sockets)); + while (sd_sockets--) { - sql_perror("Can't start server: listen() on TCP/IP port"); - sql_print_error("listen() on TCP/IP failed with error %d", - socket_errno); - unireg_abort(1); + MYSQL_SOCKET sock; + int stype= 0, accepting= 0, getnameinfo_err; + socklen_t l; + union + { + struct sockaddr sa; + struct sockaddr_storage storage; + struct sockaddr_in in; + struct sockaddr_in6 in6; + struct sockaddr_un un; + } addr; + SOCKET_SIZE_TYPE addrlen= sizeof(addr); + char hbuf[NI_MAXHOST], sbuf[NI_MAXSERV]; + + int fd= SD_LISTEN_FDS_START + sd_sockets; + + if (getsockname(fd, &addr.sa, &addrlen)) + { + sql_print_error("Unable to getsockname on systemd socket activation socket %d," + " errno %d", fd, errno); + goto err; + } + + l= sizeof(stype); + if (getsockopt(fd, SOL_SOCKET, SO_TYPE, &stype, &l) < 0) + { + sql_print_error("Unable to getsockopt(SOL_SOCKET, SO_TYPE) on" + " systemd socket activation socket %d," + " errno %d", fd, errno); + goto err; + } + + if (stype != SOCK_STREAM) + { + sql_print_error("Unknown systemd socket activation socket %d," + " not of type SOCK_STREAM - type %d", fd, stype); + goto err; + } + + l= sizeof(accepting); + if (getsockopt(fd, SOL_SOCKET, SO_ACCEPTCONN, &accepting, &l) < 0) + { + sql_print_error("Unable to getsockopt(SOL_SOCKET, SO_ACCEPTCONN) on" + " systemd socket activation socket %d," + " errno %d", fd, errno); + goto err; + } + + if (!accepting) + { + sql_print_error("Unknown systemd socket activation socket %d," + " is not listening", fd); + goto err; + } + + switch (addr.sa.sa_family) + { + case AF_INET: + sock= mysql_socket_fd(key_socket_tcpip, fd); + sock.is_unix_domain_socket= 0; + mysqld_port= ntohs(addr.in.sin_port); + break; + case AF_INET6: + sock= mysql_socket_fd(key_socket_tcpip, fd); + sock.is_unix_domain_socket= 0; + mysqld_port= ntohs(addr.in6.sin6_port); + break; + case AF_UNIX: + sock= mysql_socket_fd(key_socket_unix, fd); + sock.is_unix_domain_socket= 1; + break; + default: + sql_print_error("Unknown systemd socket activation socket %d," + " not UNIX or INET socket", fd); + goto err; + } + + /* + We check names!=NULL here because sd_listen_fds_with_names maybe + just sd_listen_fds on older pre v227 systemd + */ + sock.is_extra_port= names && strcmp(names[sd_sockets], "extra") == 0; + + if (addr.sa.sa_family == AF_UNIX) + { + /* + Handle abstract sockets and present them in @ form. + */ + if (addr.un.sun_path[0] == '\0') + addr.un.sun_path[0] = '@'; + sql_print_information("Using systemd activated unix socket %s%s", + addr.un.sun_path, sock.is_extra_port ? " (extra)" : ""); + } + else + { + getnameinfo_err= getnameinfo(&addr.sa, addrlen, hbuf, sizeof(hbuf), sbuf, + sizeof(sbuf), NI_NUMERICHOST | NI_NUMERICSERV); + if (getnameinfo_err) + sql_print_warning("getnameinfo() on systemd socket activation socket %d" + " failed with error %s(%d)", fd, + gai_strerror(getnameinfo_err), getnameinfo_err); + else + sql_print_information("Using systemd activated socket host %s port %s%s", hbuf, sbuf, + sock.is_extra_port ? " (extra)" : ""); + } + + mysql_socket_set_thread_owner(sock); + listen_sockets.push(sock); } + systemd_sock_activation= 1; + free(names); -#ifdef FD_CLOEXEC - (void) fcntl(mysql_socket_getfd(ip_sock), F_SETFD, FD_CLOEXEC); -#endif + DBUG_VOID_RETURN; - DBUG_RETURN(ip_sock); +err: + free(names); + unireg_abort(1); + DBUG_VOID_RETURN; +#endif /* __linux__ */ } + static void network_init(void) { #ifdef HAVE_SYS_UN_H @@ -2404,6 +2492,8 @@ static void network_init(void) #endif DBUG_ENTER("network_init"); + use_systemd_activated_sockets(); + if (MYSQL_CALLBACK_ELSE(thread_scheduler, init, (), 0)) unireg_abort(1); /* purecov: inspected */ @@ -2420,20 +2510,23 @@ static void network_init(void) if (!opt_disable_networking) DBUG_ASSERT(report_port != 0); #endif - if (!opt_disable_networking && !opt_bootstrap) + if (!opt_disable_networking && !opt_bootstrap && !systemd_sock_activation) { if (mysqld_port) - base_ip_sock= activate_tcp_port(mysqld_port); + activate_tcp_port(mysqld_port, &listen_sockets, + /* is_extra_port= */ false); if (mysqld_extra_port) - extra_ip_sock= activate_tcp_port(mysqld_extra_port); + activate_tcp_port(mysqld_extra_port, &listen_sockets, + /* is_extra_port= */ true); } #if defined(HAVE_SYS_UN_H) /* ** Create the UNIX socket */ - if (mysqld_unix_port[0] && !opt_bootstrap) + if (mysqld_unix_port[0] && !opt_bootstrap && systemd_sock_activation==0) { + MYSQL_SOCKET unix_sock= MYSQL_INVALID_SOCKET; size_t port_len; DBUG_PRINT("general",("UNIX Socket is %s",mysqld_unix_port)); @@ -2450,6 +2543,9 @@ static void network_init(void) unireg_abort(1); /* purecov: inspected */ } + unix_sock.is_unix_domain_socket= true; + listen_sockets.push(unix_sock); + unix_sock_is_online= true; mysql_socket_set_thread_owner(unix_sock); bzero((char*) &UNIXaddr, sizeof(UNIXaddr)); @@ -2586,14 +2682,60 @@ void unlink_thd(THD *thd) } -/****************************************************************************** - Setup a signal thread with handles all signals. - Because Linux doesn't support schemas use a mutex to check that - the signal thread is ready before continuing -******************************************************************************/ +#if defined(_WIN32) +/* + If server is started as service, the service routine will set + the callback function. +*/ +void mysqld_set_service_status_callback(void (*r)(DWORD, DWORD, DWORD)) +{ + my_report_svc_status= r; +} -#if defined(__WIN__) +static bool startup_complete() +{ + return hEventShutdown != NULL; +} + +/** + Initiates shutdown on Windows by setting shutdown event. + Reports windows service status. + If startup was not finished, terminates process (no good + cleanup possible) +*/ +void mysqld_win_initiate_shutdown() +{ + if (startup_complete()) + { + my_report_svc_status(SERVICE_STOP_PENDING, 0, 0); + abort_loop= 1; + if (!SetEvent(hEventShutdown)) + /* This should never fail.*/ + abort(); + } + else + { + my_report_svc_status(SERVICE_STOPPED, 1, 0); + TerminateProcess(GetCurrentProcess(), 1); + } +} + +/* + Signal when server has started and can accept connections. +*/ +void mysqld_win_set_startup_complete() +{ + my_report_svc_status(SERVICE_RUNNING, 0, 0); + DBUG_ASSERT(startup_complete()); +} + + +void mysqld_win_set_service_name(const char *name) +{ + if (stricmp(name, "mysql")) + load_default_groups[array_elements(load_default_groups) - 2]= name; +} /* On Windows, we use native SetConsoleCtrlHandler for handle events like Ctrl-C @@ -2604,33 +2746,30 @@ void unlink_thd(THD *thd) callstack. */ -static BOOL WINAPI console_event_handler( DWORD type ) +static BOOL WINAPI console_event_handler( DWORD type ) { - DBUG_ENTER("console_event_handler"); -#ifndef EMBEDDED_LIBRARY - if(type == CTRL_C_EVENT) + static const char *names[]= { + "CTRL_C_EVENT","CTRL_BREAK_EVENT", "CTRL_CLOSE_EVENT", "", "", + "CTRL_LOGOFF_EVENT", "CTRL_SHUTDOWN_EVENT"}; + + switch (type) { - /* - Do not shutdown before startup is finished and shutdown - thread is initialized. Otherwise there is a race condition - between main thread doing initialization and CTRL-C thread doing - cleanup, which can result into crash. - */ -#ifndef EMBEDDED_LIBRARY - if(hEventShutdown) - break_connect_loop(); - else -#endif - sql_print_warning("CTRL-C ignored during startup"); - DBUG_RETURN(TRUE); + case CTRL_C_EVENT: + case CTRL_BREAK_EVENT: + sql_print_information("console_event_handler: received %s event, shutting down", + names[type]); + mysqld_win_initiate_shutdown(); + return TRUE; + case CTRL_CLOSE_EVENT: + sql_print_information("console_event_handler: received CTRL_CLOSE_EVENT event, terminating"); + TerminateProcess(GetCurrentProcess(), 1); + return TRUE; + default: + return FALSE; } -#endif - DBUG_RETURN(FALSE); } - - #ifdef DEBUG_UNHANDLED_EXCEPTION_FILTER #define DEBUGGER_ATTACH_TIMEOUT 120 /* @@ -2661,7 +2800,7 @@ static void wait_for_debugger(int timeout_sec) } #endif /* DEBUG_UNHANDLED_EXCEPTION_FILTER */ -LONG WINAPI my_unhandler_exception_filter(EXCEPTION_POINTERS *ex_pointers) +static LONG WINAPI my_unhandler_exception_filter(EXCEPTION_POINTERS *ex_pointers) { static BOOL first_time= TRUE; if(!first_time) @@ -2708,10 +2847,9 @@ LONG WINAPI my_unhandler_exception_filter(EXCEPTION_POINTERS *ex_pointers) void init_signals(void) { - if(opt_console) - SetConsoleCtrlHandler(console_event_handler,TRUE); + SetConsoleCtrlHandler(console_event_handler,TRUE); - /* Avoid MessageBox()es*/ + /* Avoid MessageBox()es*/ _CrtSetReportMode(_CRT_WARN, _CRTDBG_MODE_FILE); _CrtSetReportFile(_CRT_WARN, _CRTDBG_FILE_STDERR); _CrtSetReportMode(_CRT_ERROR, _CRTDBG_MODE_FILE); @@ -2728,7 +2866,8 @@ void init_signals(void) */ SetErrorMode(SetErrorMode(0) | SEM_FAILCRITICALERRORS | SEM_NOOPENFILEERRORBOX); - SetUnhandledExceptionFilter(my_unhandler_exception_filter); + if(!opt_debugging) + SetUnhandledExceptionFilter(my_unhandler_exception_filter); } @@ -3112,12 +3251,7 @@ void *my_str_realloc_mysqld(void *ptr, size_t size) } #endif -#include <mysqld_default_groups.h> -#if defined(__WIN__) && !defined(EMBEDDED_LIBRARY) -static const int load_default_groups_sz= -sizeof(load_default_groups)/sizeof(load_default_groups[0]); -#endif /** @@ -3268,7 +3402,6 @@ SHOW_VAR com_status_vars[]= { {"kill", STMT_STATUS(SQLCOM_KILL)}, {"load", STMT_STATUS(SQLCOM_LOAD)}, {"lock_tables", STMT_STATUS(SQLCOM_LOCK_TABLES)}, - {"multi", COM_STATUS(com_multi)}, {"optimize", STMT_STATUS(SQLCOM_OPTIMIZE)}, {"preload_keys", STMT_STATUS(SQLCOM_PRELOAD_KEYS)}, {"prepare_sql", STMT_STATUS(SQLCOM_PREPARE)}, @@ -3782,7 +3915,7 @@ static int init_common_variables() of SQLCOM_ constants. */ compile_time_assert(sizeof(com_status_vars)/sizeof(com_status_vars[0]) - 1 == - SQLCOM_END + 11); + SQLCOM_END + 10); #endif if (get_options(&remaining_argc, &remaining_argv)) @@ -4965,6 +5098,31 @@ static int init_server_components() /* The following options were added after 5.6.10 */ MYSQL_TO_BE_IMPLEMENTED_OPTION("rpl-stop-slave-timeout"), MYSQL_TO_BE_IMPLEMENTED_OPTION("validate-user-plugins"), // NO_EMBEDDED_ACCESS_CHECKS + + /* The following options were deprecated in 10.5 or earlier */ + MARIADB_REMOVED_OPTION("innodb-adaptive-max-sleep-delay"), + MARIADB_REMOVED_OPTION("innodb-background-scrub-data-check-interval"), + MARIADB_REMOVED_OPTION("innodb-background-scrub-data-compressed"), + MARIADB_REMOVED_OPTION("innodb-background-scrub-data-interval"), + MARIADB_REMOVED_OPTION("innodb-background-scrub-data-uncompressed"), + MARIADB_REMOVED_OPTION("innodb-buffer-pool-instances"), + MARIADB_REMOVED_OPTION("innodb-commit-concurrency"), + MARIADB_REMOVED_OPTION("innodb-concurrency-tickets"), + MARIADB_REMOVED_OPTION("innodb-file-format"), + MARIADB_REMOVED_OPTION("innodb-large-prefix"), + MARIADB_REMOVED_OPTION("innodb-lock-schedule-algorithm"), + MARIADB_REMOVED_OPTION("innodb-log-checksums"), + MARIADB_REMOVED_OPTION("innodb-log-compressed-pages"), + MARIADB_REMOVED_OPTION("innodb-log-files-in-group"), + MARIADB_REMOVED_OPTION("innodb-log-optimize-ddl"), + MARIADB_REMOVED_OPTION("innodb-page-cleaners"), + MARIADB_REMOVED_OPTION("innodb-replication-delay"), + MARIADB_REMOVED_OPTION("innodb-scrub-log"), + MARIADB_REMOVED_OPTION("innodb-scrub-log-speed"), + MARIADB_REMOVED_OPTION("innodb-sync-array-size"), + MARIADB_REMOVED_OPTION("innodb-thread-concurrency"), + MARIADB_REMOVED_OPTION("innodb-thread-sleep-delay"), + MARIADB_REMOVED_OPTION("innodb-undo-logs"), {0, 0, 0, 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0} }; /* @@ -5179,19 +5337,6 @@ static int init_server_components() #ifndef EMBEDDED_LIBRARY -#ifdef _WIN32 -static void create_shutdown_event() -{ - hEventShutdown=CreateEvent(0, FALSE, FALSE, shutdown_event_name); - // On "Stop Service" we have to do regular shutdown - Service.SetShutdownEvent(hEventShutdown); -} -#else /*_WIN32*/ -#define create_shutdown_event() -#endif -#endif /* EMBEDDED_LIBRARY */ - -#ifndef EMBEDDED_LIBRARY #ifndef DBUG_OFF /* @@ -5231,11 +5376,7 @@ static void test_lc_time_sz() #endif//DBUG_OFF -#ifdef __WIN__ -int win_main(int argc, char **argv) -#else int mysqld_main(int argc, char **argv) -#endif { #ifndef _WIN32 /* We can't close stdin just now, because it may be booststrap mode. */ @@ -5253,7 +5394,6 @@ int mysqld_main(int argc, char **argv) if (init_early_variables()) exit(1); -#ifndef _WIN32 #ifdef WITH_PERFSCHEMA_STORAGE_ENGINE pre_initialize_performance_schema(); #endif /*WITH_PERFSCHEMA_STORAGE_ENGINE */ @@ -5263,7 +5403,6 @@ int mysqld_main(int argc, char **argv) fprintf(stderr, "my_init() failed."); return 1; } -#endif orig_argc= argc; orig_argv= argv; @@ -5463,16 +5602,16 @@ int mysqld_main(int argc, char **argv) } #ifdef WITH_WSREP - wsrep_set_wsrep_on(); + wsrep_set_wsrep_on(nullptr); if (WSREP_ON && wsrep_check_opts()) unireg_abort(1); #endif +#ifdef _WIN32 /* The subsequent calls may take a long time : e.g. innodb log read. Thus set the long running service control manager timeout */ -#if defined(_WIN32) && !defined(EMBEDDED_LIBRARY) - Service.SetSlowStarting(slow_start_timeout); + my_report_svc_status(SERVICE_START_PENDING, NO_ERROR, slow_start_timeout); #endif if (init_server_components()) @@ -5481,13 +5620,6 @@ int mysqld_main(int argc, char **argv) init_ssl(); network_init(); -#ifdef _WIN32 - if (!opt_console) - { - FreeConsole(); // Remove window - } -#endif - #ifdef WITH_WSREP // Recover and exit. if (wsrep_recovery) @@ -5578,7 +5710,6 @@ int mysqld_main(int argc, char **argv) } } - create_shutdown_event(); start_handle_manager(); /* Copy default global rpl_filter to global_rpl_filter */ @@ -5605,8 +5736,8 @@ int mysqld_main(int argc, char **argv) if (IS_SYSVAR_AUTOSIZE(&server_version_ptr)) sql_print_information(ER_DEFAULT(ER_STARTUP), my_progname, server_version, - ((mysql_socket_getfd(unix_sock) == INVALID_SOCKET) ? - (char*) "" : mysqld_unix_port), + (systemd_sock_activation ? "Systemd socket activated ports" : + (unix_sock_is_online ? mysqld_unix_port : (char*) "")), mysqld_port, MYSQL_COMPILATION_COMMENT); else { @@ -5618,8 +5749,8 @@ int mysqld_main(int argc, char **argv) sql_print_information(ER_DEFAULT(ER_STARTUP), my_progname, real_server_version, - ((mysql_socket_getfd(unix_sock) == INVALID_SOCKET) ? - (char*) "" : mysqld_unix_port), + (systemd_sock_activation ? "Systemd socket activated ports" : + (unix_sock_is_online ? mysqld_unix_port : (char*) "")), mysqld_port, MYSQL_COMPILATION_COMMENT); } @@ -5632,9 +5763,6 @@ int mysqld_main(int argc, char **argv) } #endif -#if defined(_WIN32) && !defined(EMBEDDED_LIBRARY) - Service.SetRunning(); -#endif /* Signal threads waiting for server to be started */ mysql_mutex_lock(&LOCK_server_started); @@ -5690,16 +5818,6 @@ int mysqld_main(int argc, char **argv) */ PSI_CALL_delete_current_thread(); -#if defined(__WIN__) && !defined(EMBEDDED_LIBRARY) - if (start_mode) - Service.Stop(); - else - { - Service.SetShutdownEvent(0); - if (hEventShutdown) - CloseHandle(hEventShutdown); - } -#endif #if (defined(HAVE_OPENSSL) && !defined(EMBEDDED_LIBRARY)) ERR_remove_state(0); #endif @@ -5710,245 +5828,6 @@ int mysqld_main(int argc, char **argv) #endif /* !EMBEDDED_LIBRARY */ -/**************************************************************************** - Main and thread entry function for Win32 - (all this is needed only to run mysqld as a service on WinNT) -****************************************************************************/ - -#if defined(__WIN__) && !defined(EMBEDDED_LIBRARY) -void mysql_service(void *p) -{ - if (my_thread_init()) - abort(); - - if (use_opt_args) - win_main(opt_argc, opt_argv); - else - win_main(Service.my_argc, Service.my_argv); - - my_thread_end(); -} - - -/* Quote string if it contains space, else copy */ - -static char *add_quoted_string(char *to, const char *from, char *to_end) -{ - uint length= (uint) (to_end-to); - - if (!strchr(from, ' ')) - return strmake(to, from, length-1); - return strxnmov(to, length-1, "\"", from, "\"", NullS); -} - - -/** - Handle basic handling of services, like installation and removal. - - @param argv Pointer to argument list - @param servicename Internal name of service - @param displayname Display name of service (in taskbar ?) - @param file_path Path to this program - @param startup_option Startup option to mysqld - - @retval 0 option handled - @retval 1 Could not handle option -*/ - -static bool -default_service_handling(char **argv, - const char *servicename, - const char *displayname, - const char *file_path, - const char *extra_opt, - const char *account_name) -{ - char path_and_service[FN_REFLEN+FN_REFLEN+32], *pos, *end; - const char *opt_delim; - end= path_and_service + sizeof(path_and_service)-3; - - /* We have to quote filename if it contains spaces */ - pos= add_quoted_string(path_and_service, file_path, end); - if (extra_opt && *extra_opt) - { - /* - Add option after file_path. There will be zero or one extra option. It's - assumed to be --defaults-file=file but isn't checked. The variable (not - the option name) should be quoted if it contains a string. - */ - *pos++= ' '; - if ((opt_delim= strchr(extra_opt, '='))) - { - size_t length= ++opt_delim - extra_opt; - pos= strnmov(pos, extra_opt, length); - } - else - opt_delim= extra_opt; - - pos= add_quoted_string(pos, opt_delim, end); - } - /* We must have servicename last */ - *pos++= ' '; - (void) add_quoted_string(pos, servicename, end); - - if (Service.got_service_option(argv, "install")) - { - Service.Install(1, servicename, displayname, path_and_service, - account_name); - return 0; - } - if (Service.got_service_option(argv, "install-manual")) - { - Service.Install(0, servicename, displayname, path_and_service, - account_name); - return 0; - } - if (Service.got_service_option(argv, "remove")) - { - Service.Remove(servicename); - return 0; - } - return 1; -} - -/* Remove service name from the command line arguments, and pass -resulting command line to the service via opt_args.*/ -#include <vector> -static void service_init_cmdline_args(int argc, char **argv) -{ - start_mode= 1; - use_opt_args= 1; - - if(argc == 1) - { - opt_argc= argc; - opt_argv= argv; - } - else - { - static std::vector<char *> argv_no_service; - for (int i= 0; argv[i]; i++) - argv_no_service.push_back(argv[i]); - // Remove the last argument, service name - argv_no_service[argv_no_service.size() - 1]= 0; - opt_argc= (int)argv_no_service.size() - 1; - opt_argv= &argv_no_service[0]; - } - DBUG_ASSERT(!opt_argv[opt_argc]); -} - -int mysqld_main(int argc, char **argv) -{ - my_progname= argv[0]; - - /* - When several instances are running on the same machine, we - need to have an unique named hEventShudown through the - application PID e.g.: MySQLShutdown1890; MySQLShutdown2342 - */ - int10_to_str((int) GetCurrentProcessId(),strmov(shutdown_event_name, - "MySQLShutdown"), 10); - - /* Must be initialized early for comparison of service name */ - system_charset_info= &my_charset_utf8mb3_general_ci; - -#ifdef WITH_PERFSCHEMA_STORAGE_ENGINE - pre_initialize_performance_schema(); -#endif /*WITH_PERFSCHEMA_STORAGE_ENGINE */ - - if (my_init()) - { - fprintf(stderr, "my_init() failed."); - return 1; - } - - - char file_path[FN_REFLEN]; - my_path(file_path, argv[0], ""); /* Find name in path */ - fn_format(file_path,argv[0],file_path,"", MY_REPLACE_DIR | MY_UNPACK_FILENAME | MY_RESOLVE_SYMLINKS); - - - if (argc == 2) - { - if (!default_service_handling(argv, MYSQL_SERVICENAME, MYSQL_SERVICENAME, - file_path, "", NULL)) - return 0; - - if (Service.IsService(argv[1])) /* Start an optional service */ - { - /* - Only add the service name to the groups read from the config file - if it's not "MySQL". (The default service name should be 'mysqld' - but we started a bad tradition by calling it MySQL from the start - and we are now stuck with it. - */ - if (my_strcasecmp(system_charset_info, argv[1],"mysql")) - load_default_groups[load_default_groups_sz-2]= argv[1]; - service_init_cmdline_args(argc, argv); - Service.Init(argv[1], mysql_service); - return 0; - } - } - else if (argc == 3) /* install or remove any optional service */ - { - if (!default_service_handling(argv, argv[2], argv[2], file_path, "", - NULL)) - return 0; - if (Service.IsService(argv[2])) - { - /* - mysqld was started as - mysqld --defaults-file=my_path\my.ini service-name - */ - if (my_strcasecmp(system_charset_info, argv[2],"mysql")) - load_default_groups[load_default_groups_sz-2]= argv[2]; - service_init_cmdline_args(argc, argv); - Service.Init(argv[2], mysql_service); - return 0; - } - } - else if (argc == 4 || argc == 5) - { - /* - This may seem strange, because we handle --local-service while - preserving 4.1's behavior of allowing any one other argument that is - passed to the service on startup. (The assumption is that this is - --defaults-file=file, but that was not enforced in 4.1, so we don't - enforce it here.) - */ - const char *extra_opt= NullS; - const char *account_name = NullS; - int index; - for (index = 3; index < argc; index++) - { - if (!strcmp(argv[index], "--local-service")) - account_name= "NT AUTHORITY\\LocalService"; - else - extra_opt= argv[index]; - } - - if (argc == 4 || account_name) - if (!default_service_handling(argv, argv[2], argv[2], file_path, - extra_opt, account_name)) - return 0; - } - else if (argc == 1 && Service.IsService(MYSQL_SERVICENAME)) - { - /* start the default service */ - service_init_cmdline_args(argc, argv); - Service.Init(MYSQL_SERVICENAME, mysql_service); - return 0; - } - - /* Start as standalone server */ - Service.my_argc=argc; - Service.my_argv=argv; - mysql_service(NULL); - return 0; -} -#endif - - static bool read_init_file(char *file_name) { MYSQL_FILE *file; @@ -6078,8 +5957,7 @@ void handle_accepted_socket(MYSQL_SOCKET new_sock, MYSQL_SOCKET sock) { #ifdef HAVE_LIBWRAP { - if (mysql_socket_getfd(sock) == mysql_socket_getfd(base_ip_sock) || - mysql_socket_getfd(sock) == mysql_socket_getfd(extra_ip_sock)) + if (!sock.is_unix_domain_socket) { struct request_info req; signal(SIGCHLD, SIG_DFL); @@ -6122,11 +6000,9 @@ void handle_accepted_socket(MYSQL_SOCKET new_sock, MYSQL_SOCKET sock) DBUG_PRINT("info", ("Creating CONNECT for new connection")); if (auto connect= new CONNECT(new_sock, - mysql_socket_getfd(sock) == - mysql_socket_getfd(unix_sock) ? + sock.is_unix_domain_socket ? VIO_TYPE_SOCKET : VIO_TYPE_TCPIP, - mysql_socket_getfd(sock) == - mysql_socket_getfd(extra_ip_sock) ? + sock.is_extra_port ? extra_thread_scheduler : thread_scheduler)) create_new_thread(connect); else @@ -6162,36 +6038,32 @@ void handle_connections_sockets() struct sockaddr_storage cAddr; int retval; #ifdef HAVE_POLL - int socket_count= 0; - struct pollfd fds[3]; // for ip_sock, unix_sock and extra_ip_sock - MYSQL_SOCKET pfs_fds[3]; // for performance schema -#define setup_fds(X) \ - mysql_socket_set_thread_owner(X); \ - pfs_fds[socket_count]= (X); \ - fds[socket_count].fd= mysql_socket_getfd(X); \ - fds[socket_count].events= POLLIN; \ - socket_count++ + // for ip_sock, unix_sock and extra_ip_sock + Dynamic_array<struct pollfd> fds(PSI_INSTRUMENT_MEM); #else -#define setup_fds(X) FD_SET(mysql_socket_getfd(X),&clientFDs) fd_set readFDs,clientFDs; - FD_ZERO(&clientFDs); #endif DBUG_ENTER("handle_connections_sockets"); - if (mysql_socket_getfd(base_ip_sock) != INVALID_SOCKET) +#ifdef HAVE_POLL + for (size_t i= 0; i < listen_sockets.size(); i++) { - setup_fds(base_ip_sock); - set_non_blocking_if_supported(base_ip_sock); + struct pollfd local_fds; + mysql_socket_set_thread_owner(listen_sockets.at(i)); + local_fds.fd= mysql_socket_getfd(listen_sockets.at(i)); + local_fds.events= POLLIN; + fds.push(local_fds); + set_non_blocking_if_supported(listen_sockets.at(i)); } - if (mysql_socket_getfd(extra_ip_sock) != INVALID_SOCKET) +#else + FD_ZERO(&clientFDs); + for (size_t i= 0; i < listen_sockets.size(); i++) { - setup_fds(extra_ip_sock); - set_non_blocking_if_supported(extra_ip_sock); + int fd= mysql_socket_getfd(listen_sockets.at(i)); + FD_SET(fd, &clientFDs); + set_non_blocking_if_supported(listen_sockets.at(i)); } -#ifdef HAVE_SYS_UN_H - setup_fds(unix_sock); - set_non_blocking_if_supported(unix_sock); #endif sd_notify(0, "READY=1\n" @@ -6201,10 +6073,10 @@ void handle_connections_sockets() while (!abort_loop) { #ifdef HAVE_POLL - retval= poll(fds, socket_count, -1); + retval= poll(fds.get_pos(0), fds.size(), -1); #else readFDs=clientFDs; - retval= select((int) 0,&readFDs,0,0,0); + retval= select(FD_SETSIZE, &readFDs, NULL, NULL, NULL); #endif if (retval < 0) @@ -6228,22 +6100,23 @@ void handle_connections_sockets() /* Is this a new connection request ? */ #ifdef HAVE_POLL - for (int i= 0; i < socket_count; ++i) + for (size_t i= 0; i < fds.size(); ++i) { - if (fds[i].revents & POLLIN) + if (fds.at(i).revents & POLLIN) { - sock= pfs_fds[i]; + sock= listen_sockets.at(i); break; } } #else // HAVE_POLL - if (FD_ISSET(mysql_socket_getfd(base_ip_sock),&readFDs)) - sock= base_ip_sock; - else - if (FD_ISSET(mysql_socket_getfd(extra_ip_sock),&readFDs)) - sock= extra_ip_sock; - else - sock = unix_sock; + for (size_t i=0; i < listen_sockets.size(); i++) + { + if (FD_ISSET(mysql_socket_getfd(listen_sockets.at(i)), &readFDs)) + { + sock= listen_sockets.at(i); + break; + } + } #endif // HAVE_POLL for (uint retry=0; retry < MAX_ACCEPT_RETRY; retry++) @@ -6584,7 +6457,7 @@ struct my_option my_long_options[]= {"master-retry-count", 0, "The number of tries the slave will make to connect to the master before giving up.", &master_retry_count, &master_retry_count, 0, GET_ULONG, - REQUIRED_ARG, 3600*24, 0, 0, 0, 0, 0}, + REQUIRED_ARG, 100000, 0, 0, 0, 0, 0}, #ifdef HAVE_REPLICATION {"init-rpl-role", 0, "Set the replication role", &rpl_status, &rpl_status, &rpl_role_typelib, @@ -7375,6 +7248,7 @@ SHOW_VAR status_vars[]= { {"Max_used_connections", (char*) &max_used_connections, SHOW_LONG}, {"Memory_used", (char*) &show_memory_used, SHOW_SIMPLE_FUNC}, {"Memory_used_initial", (char*) &start_memory_used, SHOW_LONGLONG}, + {"Resultset_metadata_skipped", (char *) offsetof(STATUS_VAR, skip_metadata_count),SHOW_LONG_STATUS}, {"Not_flushed_delayed_rows", (char*) &delayed_rows_in_use, SHOW_LONG_NOFLUSH}, {"Open_files", (char*) &my_file_opened, SHOW_SINT}, {"Open_streams", (char*) &my_stream_opened, SHOW_LONG_NOFLUSH}, @@ -7728,9 +7602,6 @@ static int mysql_init_variables(void) character_set_filesystem= &my_charset_bin; opt_specialflag= SPECIAL_ENGLISH; -#ifndef EMBEDDED_LIBRARY - unix_sock= base_ip_sock= extra_ip_sock= MYSQL_INVALID_SOCKET; -#endif mysql_home_ptr= mysql_home; log_error_file_ptr= log_error_file; protocol_version= PROTOCOL_VERSION; @@ -8179,6 +8050,23 @@ mysqld_get_one_option(const struct my_option *opt, const char *argument, break; case OPT_BOOTSTRAP: opt_noacl=opt_bootstrap=1; +#ifdef _WIN32 + { + /* + Check if security descriptor is passed from + mysql_install_db.exe. + Used by Windows installer to correctly setup + privileges on the new directories. + */ + char* dir_sddl = getenv("MARIADB_NEW_DIRECTORY_SDDL"); + if (dir_sddl) + { + ConvertStringSecurityDescriptorToSecurityDescriptor( + dir_sddl, SDDL_REVISION_1, &my_dir_security_attributes.lpSecurityDescriptor, NULL); + DBUG_ASSERT(my_dir_security_attributes.lpSecurityDescriptor); + } + } +#endif break; case OPT_SERVER_ID: ::server_id= global_system_variables.server_id; @@ -9246,6 +9134,7 @@ PSI_memory_key key_memory_binlog_ver_1_event; PSI_memory_key key_memory_bison_stack; PSI_memory_key key_memory_blob_mem_storage; PSI_memory_key key_memory_dboptions_hash; +PSI_memory_key key_memory_dbnames_cache; PSI_memory_key key_memory_errmsgs; PSI_memory_key key_memory_frm_string; PSI_memory_key key_memory_gdl; @@ -9546,6 +9435,7 @@ static PSI_memory_info all_server_memory[]= { &key_memory_THD_handler_tables_hash, "THD::handler_tables_hash", 0}, { &key_memory_hash_index_key_buffer, "hash_index_key_buffer", 0}, { &key_memory_dboptions_hash, "dboptions_hash", 0}, + { &key_memory_dbnames_cache, "dbnames_cache", 0}, { &key_memory_user_conn, "user_conn", 0}, // { &key_memory_LOG_POS_COORD, "LOG_POS_COORD", 0}, // { &key_memory_XID_STATE, "XID_STATE", 0}, diff --git a/sql/mysqld.h b/sql/mysqld.h index 24580d6bb90..64cf1c5ebb0 100644 --- a/sql/mysqld.h +++ b/sql/mysqld.h @@ -147,7 +147,6 @@ extern ulong opt_replicate_events_marked_for_skip; extern char *default_tz_name; extern Time_zone *default_tz; extern char *my_bind_addr_str; -extern int server_socket_ai_family; extern char *default_storage_engine, *default_tmp_storage_engine; extern char *enforced_storage_engine; extern char *gtid_pos_auto_engines; @@ -502,6 +501,7 @@ extern PSI_memory_key key_memory_TABLE; extern PSI_memory_key key_memory_binlog_statement_buffer; extern PSI_memory_key key_memory_user_conn; extern PSI_memory_key key_memory_dboptions_hash; +extern PSI_memory_key key_memory_dbnames_cache; extern PSI_memory_key key_memory_hash_index_key_buffer; extern PSI_memory_key key_memory_THD_handler_tables_hash; extern PSI_memory_key key_memory_JOIN_CACHE; @@ -956,4 +956,14 @@ extern ulong opt_binlog_dbug_fsync_sleep; extern uint volatile global_disable_checkpoint; extern my_bool opt_help; +extern int mysqld_main(int argc, char **argv); + +#ifdef _WIN32 +extern HANDLE hEventShutdown; +extern void mysqld_win_initiate_shutdown(); +extern void mysqld_win_set_startup_complete(); +extern void mysqld_set_service_status_callback(void (*)(DWORD, DWORD, DWORD)); +extern void mysqld_win_set_service_name(const char *name); +#endif + #endif /* MYSQLD_INCLUDED */ diff --git a/sql/net_serv.cc b/sql/net_serv.cc index a96c43a94fe..409d3cac85e 100644 --- a/sql/net_serv.cc +++ b/sql/net_serv.cc @@ -640,8 +640,20 @@ net_real_write(NET *net,const uchar *packet, size_t len) my_bool net_blocking = vio_is_blocking(net->vio); DBUG_ENTER("net_real_write"); -#if defined(MYSQL_SERVER) && defined(USE_QUERY_CACHE) - query_cache_insert(net->thd, (char*) packet, len, net->pkt_nr); +#if defined(MYSQL_SERVER) + THD *thd= (THD *)net->thd; +#if defined(USE_QUERY_CACHE) + query_cache_insert(thd, (char*) packet, len, net->pkt_nr); +#endif + if (likely(thd)) + { + /* + Wait until pending operations (currently it is engine + asynchronous group commit) are finished before replying + to the client, to keep durability promise. + */ + thd->async_state.wait_for_pending_ops(); + } #endif if (unlikely(net->error == 2)) diff --git a/sql/nt_servc.cc b/sql/nt_servc.cc deleted file mode 100644 index 9c754763aab..00000000000 --- a/sql/nt_servc.cc +++ /dev/null @@ -1,555 +0,0 @@ -/** - @file - - @brief - Windows NT Service class library. - - Copyright Abandoned 1998 Irena Pancirov - Irnet Snc - This file is public domain and comes with NO WARRANTY of any kind -*/ -#include <windows.h> -#include <process.h> -#include <stdio.h> -#include <stdlib.h> -#include "nt_servc.h" - - -static NTService *pService; - -/* ------------------------------------------------------------------------ - - -------------------------------------------------------------------------- */ -NTService::NTService() -{ - - bOsNT = FALSE; - //service variables - ServiceName = NULL; - hExitEvent = 0; - bPause = FALSE; - bRunning = FALSE; - hThreadHandle = 0; - fpServiceThread = NULL; - - //time-out variables - nStartTimeOut = 15000; - nStopTimeOut = 86400000; - nPauseTimeOut = 5000; - nResumeTimeOut = 5000; - - //install variables - dwDesiredAccess = SERVICE_ALL_ACCESS; - dwServiceType = SERVICE_WIN32_OWN_PROCESS; - dwStartType = SERVICE_AUTO_START; - dwErrorControl = SERVICE_ERROR_NORMAL; - szLoadOrderGroup = NULL; - lpdwTagID = NULL; - szDependencies = NULL; - - my_argc = 0; - my_argv = NULL; - hShutdownEvent = 0; - nError = 0; - dwState = 0; -} - -/* ------------------------------------------------------------------------ - - -------------------------------------------------------------------------- */ -NTService::~NTService() -{ - if (ServiceName != NULL) delete[] ServiceName; -} -/* ------------------------------------------------------------------------ - - -------------------------------------------------------------------------- */ - - -/** - Registers the main service thread with the service manager. - - @param ServiceThread pointer to the main programs entry function - when the service is started -*/ - - -long NTService::Init(LPCSTR szInternName, THREAD_FC ServiceThread) -{ - - pService = this; - - fpServiceThread = ServiceThread; - ServiceName = new char[lstrlen(szInternName)+1]; - lstrcpy(ServiceName,szInternName); - - SERVICE_TABLE_ENTRY stb[] = - { - { (char *)szInternName, ServiceMain} , - { NULL, NULL } - }; - - return StartServiceCtrlDispatcher(stb); //register with the Service Manager -} - - -/** - Installs the service with Service manager. - - nError values: - - 0 success - - 1 Can't open the Service manager - - 2 Failed to create service. -*/ - - -BOOL NTService::Install(int startType, LPCSTR szInternName, - LPCSTR szDisplayName, - LPCSTR szFullPath, LPCSTR szAccountName, - LPCSTR szPassword) -{ - BOOL ret_val=FALSE; - SC_HANDLE newService, scm; - - if (!SeekStatus(szInternName,1)) - return FALSE; - - char szFilePath[_MAX_PATH]; - GetModuleFileName(NULL, szFilePath, sizeof(szFilePath)); - - // open a connection to the SCM - if (!(scm = OpenSCManager(0, 0,SC_MANAGER_CREATE_SERVICE))) - printf("Failed to install the service (Couldn't open the SCM)\n"); - else // Install the new service - { - if (!(newService= - CreateService(scm, - szInternName, - szDisplayName, - dwDesiredAccess,//default: SERVICE_ALL_ACCESS - dwServiceType, //default: SERVICE_WIN32_OWN_PROCESS - //default: SERVICE_AUTOSTART - (startType == 1 ? SERVICE_AUTO_START : - SERVICE_DEMAND_START), - dwErrorControl, //default: SERVICE_ERROR_NORMAL - szFullPath, //exec full path - szLoadOrderGroup, //default: NULL - lpdwTagID, //default: NULL - szDependencies, //default: NULL - szAccountName, //default: NULL - szPassword))) //default: NULL - printf("Failed to install the service (Couldn't create service)\n"); - else - { - printf("Service successfully installed.\n"); - CloseServiceHandle(newService); - ret_val=TRUE; // Everything went ok - } - CloseServiceHandle(scm); - } - return ret_val; -} - - -/** - Removes the service. - - nError values: - - 0 success - - 1 Can't open the Service manager - - 2 Failed to locate service - - 3 Failed to delete service. -*/ - - -BOOL NTService::Remove(LPCSTR szInternName) -{ - BOOL ret_value=FALSE; - SC_HANDLE service, scm; - - if (!SeekStatus(szInternName,0)) - return FALSE; - - nError=0; - - // open a connection to the SCM - if (!(scm = OpenSCManager(0, 0,SC_MANAGER_CREATE_SERVICE))) - { - printf("Failed to remove the service (Couldn't open the SCM)\n"); - } - else - { - if ((service = OpenService(scm,szInternName, DELETE))) - { - if (!DeleteService(service)) - printf("Failed to remove the service\n"); - else - { - printf("Service successfully removed.\n"); - ret_value=TRUE; // everything went ok - } - CloseServiceHandle(service); - } - else - printf("Failed to remove the service (Couldn't open the service)\n"); - CloseServiceHandle(scm); - } - return ret_value; -} - -/** - this function should be called before the app. exits to stop - the service -*/ -void NTService::Stop(void) -{ - SetStatus(SERVICE_STOP_PENDING,NO_ERROR, 0, 1, 60000); - StopService(); - SetStatus(SERVICE_STOPPED, NO_ERROR, 0, 1, 1000); -} - -/** - This is the function that is called from the - service manager to start the service. -*/ - - -void NTService::ServiceMain(DWORD argc, LPTSTR *argv) -{ - - // registration function - if (!(pService->hServiceStatusHandle = - RegisterServiceCtrlHandler(pService->ServiceName, - NTService::ServiceCtrlHandler))) - goto error; - - // notify SCM of progress - if (!pService->SetStatus(SERVICE_START_PENDING,NO_ERROR, 0, 1, 8000)) - goto error; - - // create the exit event - if (!(pService->hExitEvent = CreateEvent (0, TRUE, FALSE,0))) - goto error; - - if (!pService->SetStatus(SERVICE_START_PENDING,NO_ERROR, 0, 3, - pService->nStartTimeOut)) - goto error; - - // save start arguments - pService->my_argc=argc; - pService->my_argv=argv; - - // start the service - if (!pService->StartService()) - goto error; - - // wait for exit event - WaitForSingleObject (pService->hExitEvent, INFINITE); - - // wait for thread to exit - if (WaitForSingleObject (pService->hThreadHandle, INFINITE) == WAIT_TIMEOUT) - CloseHandle(pService->hThreadHandle); - - pService->Exit(0); - return; - -error: - pService->Exit(GetLastError()); - return; -} - - - -void NTService::SetRunning() -{ - if (pService) - pService->SetStatus(SERVICE_RUNNING, NO_ERROR, 0, 0, 0); -} - -void NTService::SetSlowStarting(unsigned long timeout) -{ - if (pService) - pService->SetStatus(SERVICE_START_PENDING,NO_ERROR, 0, 0, timeout); -} - - -/* ------------------------------------------------------------------------ - StartService() - starts the application thread - -------------------------------------------------------------------------- */ - -BOOL NTService::StartService() -{ - // Start the real service's thread (application) - if (!(hThreadHandle = (HANDLE) _beginthread(fpServiceThread,0, - (void *) this))) - return FALSE; - bRunning = TRUE; - return TRUE; -} -/* ------------------------------------------------------------------------ - - -------------------------------------------------------------------------- */ -void NTService::StopService() -{ - bRunning=FALSE; - - // Set the event for application - if (hShutdownEvent) - SetEvent(hShutdownEvent); - - // Set the event for ServiceMain - SetEvent(hExitEvent); -} -/* ------------------------------------------------------------------------ - - -------------------------------------------------------------------------- */ -void NTService::PauseService() -{ - bPause = TRUE; - SuspendThread(hThreadHandle); -} -/* ------------------------------------------------------------------------ - - -------------------------------------------------------------------------- */ -void NTService::ResumeService() -{ - bPause=FALSE; - ResumeThread(hThreadHandle); -} -/* ------------------------------------------------------------------------ - - -------------------------------------------------------------------------- */ -BOOL NTService::SetStatus (DWORD dwCurrentState,DWORD dwWin32ExitCode, - DWORD dwServiceSpecificExitCode, DWORD dwCheckPoint, - DWORD dwWaitHint) -{ - BOOL bRet; - SERVICE_STATUS serviceStatus; - - dwState=dwCurrentState; - - serviceStatus.dwServiceType = SERVICE_WIN32_OWN_PROCESS; - serviceStatus.dwCurrentState = dwCurrentState; - - if (dwCurrentState == SERVICE_START_PENDING) - serviceStatus.dwControlsAccepted = 0; //don't accept control events - else - serviceStatus.dwControlsAccepted = (SERVICE_ACCEPT_STOP | - SERVICE_ACCEPT_PAUSE_CONTINUE | - SERVICE_ACCEPT_SHUTDOWN); - - // if a specific exit code is defined,set up the win32 exit code properly - if (dwServiceSpecificExitCode == 0) - serviceStatus.dwWin32ExitCode = dwWin32ExitCode; - else - serviceStatus.dwWin32ExitCode = ERROR_SERVICE_SPECIFIC_ERROR; - - serviceStatus.dwServiceSpecificExitCode = dwServiceSpecificExitCode; - - serviceStatus.dwCheckPoint = dwCheckPoint; - serviceStatus.dwWaitHint = dwWaitHint; - - // Pass the status to the Service Manager - if (!(bRet=SetServiceStatus (hServiceStatusHandle, &serviceStatus))) - StopService(); - - return bRet; -} -/* ------------------------------------------------------------------------ - - -------------------------------------------------------------------------- */ -void NTService::ServiceCtrlHandler(DWORD ctrlCode) -{ - DWORD dwState; - - if (!pService) - return; - - dwState=pService->dwState; // get current state - - switch(ctrlCode) { - case SERVICE_CONTROL_SHUTDOWN: - case SERVICE_CONTROL_STOP: - dwState = SERVICE_STOP_PENDING; - pService->SetStatus(SERVICE_STOP_PENDING,NO_ERROR, 0, 1, - pService->nStopTimeOut); - pService->StopService(); - break; - - default: - pService->SetStatus(dwState, NO_ERROR,0, 0, 0); - break; - } - //pService->SetStatus(dwState, NO_ERROR,0, 0, 0); -} - -/* ------------------------------------------------------------------------ - - -------------------------------------------------------------------------- */ - -void NTService::Exit(DWORD error) -{ - if (hExitEvent) - CloseHandle(hExitEvent); - - // Send a message to the scm to tell that we stop - if (hServiceStatusHandle) - SetStatus(SERVICE_STOPPED, error,0, 0, 0); - - // If the thread has started kill it ??? - // if (hThreadHandle) CloseHandle(hThreadHandle); - -} - -/* ------------------------------------------------------------------------ - - -------------------------------------------------------------------------- */ - -BOOL NTService::SeekStatus(LPCSTR szInternName, int OperationType) -{ - BOOL ret_value=FALSE; - SC_HANDLE service, scm; - - // open a connection to the SCM - if (!(scm = OpenSCManager(0, 0,SC_MANAGER_CREATE_SERVICE))) - { - DWORD ret_error=GetLastError(); - if (ret_error == ERROR_ACCESS_DENIED) - { - printf("Install/Remove of the Service Denied!\n"); - if (!is_super_user()) - printf("That operation should be made by an user with Administrator privileges!\n"); - } - else - printf("There is a problem for to open the Service Control Manager!\n"); - } - else - { - if (OperationType == 1) - { - /* an install operation */ - if ((service = OpenService(scm,szInternName, SERVICE_ALL_ACCESS ))) - { - LPQUERY_SERVICE_CONFIG ConfigBuf; - DWORD dwSize; - - ConfigBuf = (LPQUERY_SERVICE_CONFIG) LocalAlloc(LPTR, 4096); - printf("The service already exists!\n"); - if (QueryServiceConfig(service,ConfigBuf,4096,&dwSize)) - printf("The current server installed: %s\n", - ConfigBuf->lpBinaryPathName); - LocalFree(ConfigBuf); - CloseServiceHandle(service); - } - else - ret_value=TRUE; - } - else - { - /* a remove operation */ - if (!(service = OpenService(scm,szInternName, SERVICE_ALL_ACCESS ))) - printf("The service doesn't exist!\n"); - else - { - SERVICE_STATUS ss; - - memset(&ss, 0, sizeof(ss)); - if (QueryServiceStatus(service,&ss)) - { - DWORD dwState = ss.dwCurrentState; - if (dwState == SERVICE_RUNNING) - printf("Failed to remove the service because the service is running\nStop the service and try again\n"); - else if (dwState == SERVICE_STOP_PENDING) - printf("\ -Failed to remove the service because the service is in stop pending state!\n\ -Wait 30 seconds and try again.\n\ -If this condition persist, reboot the machine and try again\n"); - else - ret_value= TRUE; - } - CloseServiceHandle(service); - } - } - CloseServiceHandle(scm); - } - - return ret_value; -} -/* ------------------------------------------------------------------------ - -------------------------------------------------------------------------- */ -BOOL NTService::IsService(LPCSTR ServiceName) -{ - BOOL ret_value=FALSE; - SC_HANDLE service, scm; - - if ((scm= OpenSCManager(0, 0,SC_MANAGER_ENUMERATE_SERVICE))) - { - if ((service = OpenService(scm,ServiceName, SERVICE_QUERY_STATUS))) - { - ret_value=TRUE; - CloseServiceHandle(service); - } - CloseServiceHandle(scm); - } - return ret_value; -} -/* ------------------------------------------------------------------------ - -------------------------------------------------------------------------- */ -BOOL NTService::got_service_option(char **argv, const char *service_option) -{ - char *option; - for (option= argv[1]; *option; option++) - if (!strcmp(option, service_option)) - return TRUE; - return FALSE; -} -/* ------------------------------------------------------------------------ - -------------------------------------------------------------------------- */ -BOOL NTService::is_super_user() -{ - HANDLE hAccessToken; - UCHAR InfoBuffer[1024]; - PTOKEN_GROUPS ptgGroups=(PTOKEN_GROUPS)InfoBuffer; - DWORD dwInfoBufferSize; - PSID psidAdministrators; - SID_IDENTIFIER_AUTHORITY siaNtAuthority = SECURITY_NT_AUTHORITY; - UINT x; - BOOL ret_value=FALSE; - - if (!OpenThreadToken(GetCurrentThread(), TOKEN_QUERY, TRUE,&hAccessToken )) - { - if (GetLastError() != ERROR_NO_TOKEN) - return FALSE; - - if (!OpenProcessToken(GetCurrentProcess(), TOKEN_QUERY, &hAccessToken)) - return FALSE; - } - - ret_value= GetTokenInformation(hAccessToken,TokenGroups,InfoBuffer, - 1024, &dwInfoBufferSize); - - CloseHandle(hAccessToken); - - if (!ret_value ) - return FALSE; - - if (!AllocateAndInitializeSid(&siaNtAuthority, 2, - SECURITY_BUILTIN_DOMAIN_RID, - DOMAIN_ALIAS_RID_ADMINS, - 0, 0, 0, 0, 0, 0, - &psidAdministrators)) - return FALSE; - - ret_value = FALSE; - - for (x=0;x<ptgGroups->GroupCount;x++) - { - if ( EqualSid(psidAdministrators, ptgGroups->Groups[x].Sid) ) - { - ret_value = TRUE; - break; - } - - } - FreeSid(psidAdministrators); - return ret_value; -} diff --git a/sql/nt_servc.h b/sql/nt_servc.h deleted file mode 100644 index 8ba29519c8f..00000000000 --- a/sql/nt_servc.h +++ /dev/null @@ -1,113 +0,0 @@ -#ifndef NT_SERVC_INCLUDED -#define NT_SERVC_INCLUDED - -/** - @file - - @brief - Windows NT Service class library - - Copyright Abandoned 1998 Irena Pancirov - Irnet Snc - This file is public domain and comes with NO WARRANTY of any kind -*/ - -// main application thread -typedef void (*THREAD_FC)(void *); - -class NTService -{ - public: - NTService(); - ~NTService(); - - BOOL bOsNT; ///< true if OS is NT, false for Win95 - //install optinos - DWORD dwDesiredAccess; - DWORD dwServiceType; - DWORD dwStartType; - DWORD dwErrorControl; - - LPSTR szLoadOrderGroup; - LPDWORD lpdwTagID; - LPSTR szDependencies; - OSVERSIONINFO osVer; - - // time-out (in milisec) - int nStartTimeOut; - int nStopTimeOut; - int nPauseTimeOut; - int nResumeTimeOut; - - // - DWORD my_argc; - LPTSTR *my_argv; - HANDLE hShutdownEvent; - int nError; - DWORD dwState; - - //init service entry point - long Init(LPCSTR szInternName,THREAD_FC ServiceThread); - - //application shutdown event - void SetShutdownEvent(HANDLE hEvent){ hShutdownEvent=hEvent; } - - - //service install / un-install - BOOL Install(int startType,LPCSTR szInternName,LPCSTR szDisplayName, - LPCSTR szFullPath, LPCSTR szAccountName=NULL, - LPCSTR szPassword=NULL); - BOOL SeekStatus(LPCSTR szInternName, int OperationType); - BOOL Remove(LPCSTR szInternName); - BOOL IsService(LPCSTR ServiceName); - BOOL got_service_option(char **argv, const char *service_option); - BOOL is_super_user(); - - /* - SetRunning() is to be called by the application - when initialization completes and it can accept - stop request - */ - void SetRunning(void); - - /** - Sets a timeout after which SCM will abort service startup if SetRunning() - was not called or the timeout was not extended with another call to - SetSlowStarting(). Should be called when static initialization completes, - and the variable initialization part begins - - @arg timeout the timeout to pass to the SCM (in milliseconds) - */ - void SetSlowStarting(unsigned long timeout); - - /* - Stop() is to be called by the application to stop - the service - */ - void Stop(void); - - protected: - LPSTR ServiceName; - HANDLE hExitEvent; - SERVICE_STATUS_HANDLE hServiceStatusHandle; - BOOL bPause; - BOOL bRunning; - HANDLE hThreadHandle; - THREAD_FC fpServiceThread; - - void PauseService(); - void ResumeService(); - void StopService(); - BOOL StartService(); - - static void WINAPI ServiceMain(DWORD argc, LPTSTR *argv); - static void WINAPI ServiceCtrlHandler (DWORD ctrlCode); - - void Exit(DWORD error); - BOOL SetStatus (DWORD dwCurrentState,DWORD dwWin32ExitCode, - DWORD dwServiceSpecificExitCode, - DWORD dwCheckPoint,DWORD dwWaitHint); - -}; -/* ------------------------- the end -------------------------------------- */ - -#endif /* NT_SERVC_INCLUDED */ diff --git a/sql/opt_range.cc b/sql/opt_range.cc index 9ec8781bc30..0ecba16730b 100644 --- a/sql/opt_range.cc +++ b/sql/opt_range.cc @@ -8031,7 +8031,7 @@ SEL_TREE *Item_func_in::get_func_row_mm_tree(RANGE_OPT_PARAM *param, table_map param_comp= ~(param->prev_tables | param->read_tables | param->current_table); uint row_cols= key_row->cols(); - Dynamic_array <Key_col_info> key_cols_info(row_cols); + Dynamic_array <Key_col_info> key_cols_info(PSI_INSTRUMENT_MEM,row_cols); cmp_item_row *row_cmp_item; if (array) diff --git a/sql/opt_split.cc b/sql/opt_split.cc index 28a54838169..c5bd8076209 100644 --- a/sql/opt_split.cc +++ b/sql/opt_split.cc @@ -187,6 +187,7 @@ #include "mariadb.h" #include "sql_select.h" +#include "opt_trace.h" /* Info on a splitting field */ struct SplM_field_info @@ -957,6 +958,7 @@ SplM_plan_info * JOIN_TAB::choose_best_splitting(double record_count, The key for splitting was chosen, look for the plan for this key in the cache */ + Json_writer_array spl_trace(thd, "choose_best_splitting"); spl_plan= spl_opt_info->find_plan(best_table, best_key, best_key_parts); if (!spl_plan && (spl_plan= (SplM_plan_info *) thd->alloc(sizeof(SplM_plan_info))) && @@ -988,6 +990,16 @@ SplM_plan_info * JOIN_TAB::choose_best_splitting(double record_count, spl_plan->cost= join->best_positions[join->table_count-1].read_time + + oper_cost; + if (unlikely(thd->trace_started())) + { + Json_writer_object wrapper(thd); + Json_writer_object find_trace(thd, "best_splitting"); + find_trace.add("table", best_table->alias.c_ptr()); + find_trace.add("key", best_table->key_info[best_key].name); + find_trace.add("record_count", record_count); + find_trace.add("cost", spl_plan->cost); + find_trace.add("unsplit_cost", spl_opt_info->unsplit_cost); + } memcpy((char *) spl_plan->best_positions, (char *) join->best_positions, sizeof(POSITION) * join->table_count); @@ -1014,6 +1026,11 @@ SplM_plan_info * JOIN_TAB::choose_best_splitting(double record_count, { startup_cost= record_count * spl_plan->cost; records= (ha_rows) (records * spl_plan->split_sel); + + Json_writer_object trace(thd, "lateral_derived"); + trace.add("startup_cost", startup_cost); + trace.add("splitting_cost", spl_plan->cost); + trace.add("records", records); } else startup_cost= spl_opt_info->unsplit_cost; diff --git a/sql/opt_subselect.cc b/sql/opt_subselect.cc index dd981a8fdcb..23a652bb87c 100644 --- a/sql/opt_subselect.cc +++ b/sql/opt_subselect.cc @@ -4291,11 +4291,11 @@ bool setup_sj_materialization_part2(JOIN_TAB *sjm_tab) sjm_tab->type= JT_ALL; /* Initialize full scan */ - sjm_tab->read_first_record= join_read_record_no_init; + sjm_tab->read_first_record= join_init_read_record; sjm_tab->read_record.copy_field= sjm->copy_field; sjm_tab->read_record.copy_field_end= sjm->copy_field + sjm->sjm_table_cols.elements; - sjm_tab->read_record.read_record_func= rr_sequential_and_unpack; + sjm_tab->read_record.read_record_func= read_record_func_for_rr_and_unpack; } sjm_tab->bush_children->end[-1].next_select= end_sj_materialize; @@ -7145,3 +7145,16 @@ exit: thd->lex->current_select= save_curr_select; DBUG_RETURN(FALSE); } + +/* + @brief + Check if a table is a SJM Scan table + + @retval + TRUE SJM scan table + FALSE Otherwise +*/ +bool TABLE_LIST::is_sjm_scan_table() +{ + return is_active_sjm() && sj_mat_info->is_sj_scan; +} diff --git a/sql/opt_trace.cc b/sql/opt_trace.cc index ddec6d5ed2d..e1f402a4d7c 100644 --- a/sql/opt_trace.cc +++ b/sql/opt_trace.cc @@ -595,6 +595,18 @@ void Json_writer::add_table_name(const TABLE *table) } +void trace_condition(THD * thd, const char *name, const char *transform_type, + Item *item, const char *table_name) +{ + Json_writer_object trace_wrapper(thd); + Json_writer_object trace_cond(thd, transform_type); + trace_cond.add("condition", name); + if (table_name) + trace_cond.add("attached_to", table_name); + trace_cond.add("resulting_condition", item); +} + + void add_table_scan_values_to_trace(THD *thd, JOIN_TAB *tab) { DBUG_ASSERT(thd->trace_started()); diff --git a/sql/opt_trace.h b/sql/opt_trace.h index 550f18c0797..101fb5f707e 100644 --- a/sql/opt_trace.h +++ b/sql/opt_trace.h @@ -108,6 +108,10 @@ void print_final_join_order(JOIN *join); void print_best_access_for_table(THD *thd, POSITION *pos, enum join_type type); +void trace_condition(THD * thd, const char *name, const char *transform_type, + Item *item, const char *table_name= nullptr); + + /* Security related (need to add a proper comment here) */ diff --git a/sql/protocol.cc b/sql/protocol.cc index 08b874adba1..f369fa1c66f 100644 --- a/sql/protocol.cc +++ b/sql/protocol.cc @@ -211,8 +211,7 @@ bool Protocol::net_send_ok(THD *thd, uint server_status, uint statement_warn_count, ulonglong affected_rows, ulonglong id, - const char *message, bool is_eof, - bool skip_flush) + const char *message, bool is_eof) { NET *net= &thd->net; StringBuffer<MYSQL_ERRMSG_SIZE + 10> store; @@ -285,7 +284,7 @@ Protocol::net_send_ok(THD *thd, DBUG_ASSERT(store.length() <= MAX_PACKET_LENGTH); error= my_net_write(net, (const unsigned char*)store.ptr(), store.length()); - if (likely(!error) && (!skip_flush || is_eof)) + if (likely(!error)) error= net_flush(net); thd->server_status&= ~SERVER_SESSION_STATE_CHANGED; @@ -340,7 +339,7 @@ Protocol::net_send_eof(THD *thd, uint server_status, uint statement_warn_count) (thd->get_command() != COM_BINLOG_DUMP )) { error= net_send_ok(thd, server_status, statement_warn_count, 0, 0, NULL, - true, false); + true); DBUG_RETURN(error); } @@ -607,16 +606,14 @@ void Protocol::end_statement() thd->get_stmt_da()->statement_warn_count(), thd->get_stmt_da()->affected_rows(), thd->get_stmt_da()->last_insert_id(), - thd->get_stmt_da()->message(), - thd->get_stmt_da()->skip_flush()); + thd->get_stmt_da()->message()); break; case Diagnostics_area::DA_DISABLED: break; case Diagnostics_area::DA_EMPTY: default: DBUG_ASSERT(0); - error= send_ok(thd->server_status, 0, 0, 0, NULL, - thd->get_stmt_da()->skip_flush()); + error= send_ok(thd->server_status, 0, 0, 0, NULL); break; } if (likely(!error)) @@ -635,12 +632,12 @@ void Protocol::end_statement() bool Protocol::send_ok(uint server_status, uint statement_warn_count, ulonglong affected_rows, ulonglong last_insert_id, - const char *message, bool skip_flush) + const char *message) { DBUG_ENTER("Protocol::send_ok"); const bool retval= net_send_ok(thd, server_status, statement_warn_count, - affected_rows, last_insert_id, message, false, skip_flush); + affected_rows, last_insert_id, message, false); DBUG_RETURN(retval); } @@ -916,6 +913,241 @@ bool Protocol_text::store_field_metadata(const THD * thd, } +/* + MARIADB_CLIENT_CACHE_METADATA support. + + Bulk of the code below is dedicated to detecting whether column metadata has + changed after prepare, or between executions of a prepared statement. + + For some prepared statements, metadata can't change without going through + Prepared_Statement::reprepare(), which makes detecting changes easy. + + Others, "SELECT ?" & Co, are more fragile, and sensitive to input parameters, + or user variables. Detecting metadata change for this class of PS is harder, + we calculate signature (hash value), and check whether this changes between + executions. This is a more expensive method. +*/ + + +/** + Detect whether column info can be changed without + PS repreparing. + + Such colum info is called fragile. The opposite of + fragile is. + + + @param it - Item representing column info + @return true, if columninfo is "fragile", false if it is stable + + + @todo does not work due to MDEV-23913. Currently, + everything about prepared statements is fragile. +*/ + +static bool is_fragile_columnifo(Item *it) +{ +#define MDEV_23913_FIXED 0 +#if MDEV_23913_FIXED + if (dynamic_cast<Item_param *>(it)) + return true; + + if (dynamic_cast<Item_func_user_var *>(it)) + return true; + + if (dynamic_cast <Item_sp_variable*>(it)) + return true; + + /* Check arguments of functions.*/ + auto item_args= dynamic_cast<Item_args *>(it); + if (!item_args) + return false; + auto args= item_args->arguments(); + auto arg_count= item_args->argument_count(); + for (uint i= 0; i < arg_count; i++) + { + if (is_fragile_columnifo(args[i])) + return true; + } + return false; +#else /* MDEV-23913 fixed*/ + return true; +#endif +} + + +#define INVALID_METADATA_CHECKSUM 0 + + +/** + Calculate signature for column info sent to the client as CRC32 over data, + that goes into the column info packet. + We assume that if checksum does not change, then column info was not + modified. + + @param thd THD + @param list column info + + @return CRC32 of the metadata +*/ + +static uint32 calc_metadata_hash(THD *thd, List<Item> *list) +{ + List_iterator_fast<Item> it(*list); + Item *item; + uint32 crc32_c= 0; + while ((item= it++)) + { + Send_field field(thd, item); + auto field_type= item->type_handler()->field_type(); + auto charset= item->charset_for_protocol(); + /* + The data below should contain everything that influences + content of the column info packet. + */ + LEX_CSTRING data[]= + { + field.table_name, + field.org_table_name, + field.col_name, + field.org_col_name, + field.db_name, + field.attr(MARIADB_FIELD_ATTR_DATA_TYPE_NAME), + field.attr(MARIADB_FIELD_ATTR_FORMAT_NAME), + {(const char *) &field.length, sizeof(field.length)}, + {(const char *) &field.flags, sizeof(field.flags)}, + {(const char *) &field.decimals, sizeof(field.decimals)}, + {(const char *) &charset, sizeof(charset)}, + {(const char *) &field_type, sizeof(field_type)}, + }; + for (const auto &chunk : data) + crc32_c= my_crc32c(crc32_c, chunk.str, chunk.length); + } + + if (crc32_c == INVALID_METADATA_CHECKSUM) + return 1; + return crc32_c; +} + + + +/** + Check if metadata columns have changed since last call to this + function. + + @param send_column_info_state saved state, changed if the function + return true. + @param thd THD + @param list columninfo Items + @return true,if metadata columns have changed since last call, + false otherwise +*/ + +static bool metadata_columns_changed(send_column_info_state &state, THD *thd, + List<Item> &list) +{ + if (!state.initialized) + { + state.initialized= true; + state.immutable= true; + Item *item; + List_iterator_fast<Item> it(list); + while ((item= it++)) + { + if (is_fragile_columnifo(item)) + { + state.immutable= false; + state.checksum= calc_metadata_hash(thd, &list); + break; + } + } + state.last_charset= thd->variables.character_set_client; + return true; + } + + /* + Since column info can change under our feet, we use more expensive + checksumming to check if column metadata has not changed since last time. + */ + if (!state.immutable) + { + uint32 checksum= calc_metadata_hash(thd, &list); + if (checksum != state.checksum) + { + state.checksum= checksum; + state.last_charset= thd->variables.character_set_client; + return true; + } + } + + /* + Character_set_client influences result set metadata, thus resend metadata + whenever it changes. + */ + if (state.last_charset != thd->variables.character_set_client) + { + state.last_charset= thd->variables.character_set_client; + return true; + } + + return false; +} + + +/** + Determine whether column info must be sent to the client. + Skip column info, if client supports caching, and (prepared) statement + output fields have not changed. + + @param thd THD + @param list column info + @param flags send flags. If Protocol::SEND_FORCE_COLUMN_INFO is set, + this function will return true + @return true, if column info must be sent to the client. + false otherwise +*/ + +static bool should_send_column_info(THD* thd, List<Item>* list, uint flags) +{ + if (!(thd->client_capabilities & MARIADB_CLIENT_CACHE_METADATA)) + { + /* Client does not support abbreviated metadata.*/ + return true; + } + + if (!thd->cur_stmt) + { + /* Neither COM_PREPARE nor COM_EXECUTE run.*/ + return true; + } + + if (thd->spcont) + { + /* Always sent full metadata from inside the stored procedure.*/ + return true; + } + + if (flags & Protocol::SEND_FORCE_COLUMN_INFO) + return true; + + auto &column_info_state= thd->cur_stmt->column_info_state; +#ifndef DBUG_OFF + auto cmd= thd->get_command(); +#endif + + DBUG_ASSERT(cmd == COM_STMT_EXECUTE || cmd == COM_STMT_PREPARE); + DBUG_ASSERT(cmd != COM_STMT_PREPARE || !column_info_state.initialized); + + bool ret= metadata_columns_changed(column_info_state, thd, *list); + + DBUG_ASSERT(cmd != COM_STMT_PREPARE || ret); + if (!ret) + thd->status_var.skip_metadata_count++; + + return ret; +} + + /** Send name and type of result to client. @@ -941,30 +1173,44 @@ bool Protocol::send_result_set_metadata(List<Item> *list, uint flags) Protocol_text prot(thd, thd->variables.net_buffer_length); DBUG_ENTER("Protocol::send_result_set_metadata"); + bool send_column_info= should_send_column_info(thd, list, flags); + if (flags & SEND_NUM_ROWS) - { // Packet with number of elements - uchar buff[MAX_INT_WIDTH]; + { + /* + Packet with number of columns. + + Will also have a 1 byte column info indicator, in case + MARIADB_CLIENT_CACHE_METADATA client capability is set. + */ + uchar buff[MAX_INT_WIDTH+1]; uchar *pos= net_store_length(buff, list->elements); + if (thd->client_capabilities & MARIADB_CLIENT_CACHE_METADATA) + *pos++= (uchar)send_column_info; + DBUG_ASSERT(pos <= buff + sizeof(buff)); if (my_net_write(&thd->net, buff, (size_t) (pos-buff))) DBUG_RETURN(1); } + if (send_column_info) + { #ifndef DBUG_OFF - field_handlers= (const Type_handler**) thd->alloc(sizeof(field_handlers[0]) * - list->elements); + field_handlers= (const Type_handler **) thd->alloc( + sizeof(field_handlers[0]) * list->elements); #endif - for (uint pos= 0; (item=it++); pos++) - { - prot.prepare_for_resend(); - if (prot.store_item_metadata(thd, item, pos)) - goto err; - if (prot.write()) - DBUG_RETURN(1); + for (uint pos= 0; (item= it++); pos++) + { + prot.prepare_for_resend(); + if (prot.store_item_metadata(thd, item, pos)) + goto err; + if (prot.write()) + DBUG_RETURN(1); #ifndef DBUG_OFF - field_handlers[pos]= item->type_handler(); + field_handlers[pos]= item->type_handler(); #endif + } } if (flags & SEND_EOF) @@ -1688,7 +1934,8 @@ bool Protocol_binary::send_out_parameters(List<Item_param> *sp_params) thd->server_status|= SERVER_PS_OUT_PARAMS | SERVER_MORE_RESULTS_EXISTS; /* Send meta-data. */ - if (send_result_set_metadata(&out_param_lst, SEND_NUM_ROWS | SEND_EOF)) + if (send_result_set_metadata(&out_param_lst, + SEND_NUM_ROWS | SEND_EOF | SEND_FORCE_COLUMN_INFO)) return TRUE; /* Send data. */ diff --git a/sql/protocol.h b/sql/protocol.h index eb11304a4d5..a1868342ab4 100644 --- a/sql/protocol.h +++ b/sql/protocol.h @@ -54,7 +54,7 @@ protected: virtual bool net_store_data_cs(const uchar *from, size_t length, CHARSET_INFO *fromcs, CHARSET_INFO *tocs); virtual bool net_send_ok(THD *, uint, uint, ulonglong, ulonglong, const char *, - bool, bool); + bool); virtual bool net_send_error_packet(THD *, uint, const char *, const char *); #ifdef EMBEDDED_LIBRARY char **next_field; @@ -78,7 +78,7 @@ protected: virtual bool send_ok(uint server_status, uint statement_warn_count, ulonglong affected_rows, ulonglong last_insert_id, - const char *message, bool skip_flush); + const char *message); virtual bool send_eof(uint server_status, uint statement_warn_count); @@ -93,7 +93,7 @@ public: virtual ~Protocol() {} void init(THD* thd_arg); - enum { SEND_NUM_ROWS= 1, SEND_EOF= 2 }; + enum { SEND_NUM_ROWS= 1, SEND_EOF= 2, SEND_FORCE_COLUMN_INFO= 4 }; virtual bool send_result_set_metadata(List<Item> *list, uint flags); bool send_list_fields(List<Field> *list, const TABLE_LIST *table_list); bool send_result_set_row(List<Item> *row_items); diff --git a/sql/records.cc b/sql/records.cc index 900eacf5943..09500b3acef 100644 --- a/sql/records.cc +++ b/sql/records.cc @@ -255,7 +255,7 @@ bool init_read_record(READ_RECORD *info,THD *thd, TABLE *table, thd->variables.read_rnd_buff_size && !(table->file->ha_table_flags() & HA_FAST_KEY_READ) && (table->db_stat & HA_READ_ONLY || - table->reginfo.lock_type <= TL_READ_NO_INSERT) && + table->reginfo.lock_type < TL_FIRST_WRITE) && (ulonglong) table->s->reclength* (table->file->stats.records+ table->file->stats.deleted) > (ulonglong) MIN_FILE_LENGTH_TO_USE_ROW_CACHE && @@ -830,3 +830,32 @@ inline void SORT_INFO::unpack_addon_fields(uchar *buff) field->unpack(field->ptr, buff + addonf->offset, buff_end, 0); } } + + +/* + @brief + Read and unpack next record from a table + + @details + The function first reads the next record from the table. + If a success then it unpacks the values to the base table fields. + This is used by SJM scan table to unpack the values of the materialized + table to the base table fields + + @retval + 0 Record successfully read. + @retval + -1 There is no record to be read anymore. + >0 Error +*/ +int read_record_func_for_rr_and_unpack(READ_RECORD *info) +{ + int error; + if ((error= info->read_record_func_and_unpack_calls(info))) + return error; + + for (Copy_field *cp= info->copy_field; cp != info->copy_field_end; cp++) + (*cp->do_copy)(cp); + + return error; +} diff --git a/sql/records.h b/sql/records.h index 272bbd0d9b5..9bc1b98fde4 100644 --- a/sql/records.h +++ b/sql/records.h @@ -56,6 +56,7 @@ struct READ_RECORD TABLE *table; /* Head-form */ Unlock_row_func unlock_row; Read_func read_record_func; + Read_func read_record_func_and_unpack_calls; THD *thd; SQL_SELECT *select; uint ref_length, reclength, rec_cache_size, error_offset; diff --git a/sql/rowid_filter.h b/sql/rowid_filter.h index 467b6884ca6..12710aecb18 100644 --- a/sql/rowid_filter.h +++ b/sql/rowid_filter.h @@ -311,7 +311,8 @@ public: bool alloc() { - array= new Dynamic_array<char> (elem_size * max_elements, + array= new Dynamic_array<char> (PSI_INSTRUMENT_MEM, + elem_size * max_elements, elem_size * max_elements/sizeof(char) + 1); return array == NULL; } diff --git a/sql/rpl_mi.h b/sql/rpl_mi.h index 4d47689ac18..946d138d618 100644 --- a/sql/rpl_mi.h +++ b/sql/rpl_mi.h @@ -40,6 +40,13 @@ private: */ bool m_filter; +public: + /* domain id list types */ + enum enum_list_type { + DO_DOMAIN_IDS= 0, + IGNORE_DOMAIN_IDS + }; + /* DO_DOMAIN_IDS (0): Ignore all the events which do not belong to any of the domain ids in the @@ -50,13 +57,6 @@ private: */ DYNAMIC_ARRAY m_domain_ids[2]; -public: - /* domain id list types */ - enum enum_list_type { - DO_DOMAIN_IDS= 0, - IGNORE_DOMAIN_IDS - }; - Domain_id_filter(); ~Domain_id_filter(); diff --git a/sql/rpl_parallel.cc b/sql/rpl_parallel.cc index 65d5a06a76a..8be1964b762 100644 --- a/sql/rpl_parallel.cc +++ b/sql/rpl_parallel.cc @@ -799,6 +799,7 @@ do_retry: mysql_mutex_lock(&rli->data_lock); ++rli->retried_trans; + ++rpt->last_trans_retry_count; statistic_increment(slave_retried_transactions, LOCK_status); mysql_mutex_unlock(&rli->data_lock); @@ -1099,6 +1100,11 @@ handle_rpl_parallel_thread(void *arg) mysql_mutex_lock(&rpt->LOCK_rpl_thread); rpt->thd= thd; + PSI_thread *psi= PSI_CALL_get_thread(); + PSI_CALL_set_thread_os_id(psi); + PSI_CALL_set_thread_THD(psi, thd); + PSI_CALL_set_thread_id(psi, thd->thread_id); + rpt->thd->set_psi(psi); while (rpt->delay_start) mysql_cond_wait(&rpt->COND_rpl_thread, &rpt->LOCK_rpl_thread); @@ -1120,6 +1126,7 @@ handle_rpl_parallel_thread(void *arg) uint wait_count= 0; rpl_parallel_thread::queued_event *qev, *next_qev; + rpt->start_time_tracker(); thd->ENTER_COND(&rpt->COND_rpl_thread, &rpt->LOCK_rpl_thread, &stage_waiting_for_work_from_sql_thread, &old_stage); /* @@ -1143,6 +1150,7 @@ handle_rpl_parallel_thread(void *arg) } rpt->dequeue1(events); thd->EXIT_COND(&old_stage); + rpt->add_to_worker_idle_time_and_reset(); more_events: for (qev= events; qev; qev= next_qev) @@ -1188,6 +1196,13 @@ handle_rpl_parallel_thread(void *arg) /* Handle a new event group, which will be initiated by a GTID event. */ if ((event_type= qev->ev->get_type_code()) == GTID_EVENT) { + rpt->last_trans_retry_count= 0; + rpt->last_seen_gtid= rgi->current_gtid; + rpt->channel_name_length= (uint)rgi->rli->mi->connection_name.length; + if (rpt->channel_name_length) + memcpy(rpt->channel_name, rgi->rli->mi->connection_name.str, + rgi->rli->mi->connection_name.length); + bool did_enter_cond= false; PSI_stage_info old_stage; @@ -1737,6 +1752,7 @@ int rpl_parallel_activate_pool(rpl_parallel_thread_pool *pool) { int rc= 0; + struct pool_bkp_for_pfs* bkp= &pool->pfs_bkp; if ((rc= pool_mark_busy(pool, current_thd))) return rc; // killed @@ -1746,6 +1762,23 @@ rpl_parallel_activate_pool(rpl_parallel_thread_pool *pool) pool_mark_not_busy(pool); rc= rpl_parallel_change_thread_count(pool, opt_slave_parallel_threads, 0); + if (!rc) + { + if (pool->count) + { + if (bkp->inited) + { + if (bkp->count != pool->count) + { + bkp->destroy(); + bkp->init(pool->count); + } + } + else + bkp->init(pool->count); + } + } + } else { @@ -2003,8 +2036,16 @@ rpl_parallel_thread::loc_free_gco(group_commit_orderer *gco) } +rpl_parallel_thread::rpl_parallel_thread() + : channel_name_length(0), last_error_number(0), last_error_timestamp(0), + worker_idle_time(0), last_trans_retry_count(0), start_time(0) +{ +} + + rpl_parallel_thread_pool::rpl_parallel_thread_pool() - : threads(0), free_list(0), count(0), inited(false), busy(false) + : threads(0), free_list(0), count(0), inited(false), busy(false), + pfs_bkp{0, false, NULL} { } @@ -2035,6 +2076,7 @@ void rpl_parallel_thread_pool::destroy() { deactivate(); + pfs_bkp.destroy(); destroy_cond_mutex(); } @@ -2103,6 +2145,37 @@ rpl_parallel_thread_pool::release_thread(rpl_parallel_thread *rpt) mysql_mutex_unlock(&LOCK_rpl_thread_pool); } +void +rpl_parallel_thread_pool::copy_pool_for_pfs(Relay_log_info *rli) +{ + if (pfs_bkp.inited) + { + for(uint i=0; i<count;i++) + { + rpl_parallel_thread *rpt, *pfs_rpt; + rpt= threads[i]; + pfs_rpt= pfs_bkp.rpl_thread_arr[i]; + if (rpt->channel_name_length) + { + pfs_rpt->channel_name_length= rpt->channel_name_length; + strmake(pfs_rpt->channel_name, rpt->channel_name, + rpt->channel_name_length); + } + pfs_rpt->thd= rpt->thd; + pfs_rpt->last_seen_gtid= rpt->last_seen_gtid; + if (rli->err_thread_id && rpt->thd->thread_id == rli->err_thread_id) + { + pfs_rpt->last_error_number= rli->last_error().number; + strmake(pfs_rpt->last_error_message, + rli->last_error().message, sizeof(rli->last_error().message)); + pfs_rpt->last_error_timestamp= rli->last_error().skr*1000000; + } + pfs_rpt->running= false; + pfs_rpt->worker_idle_time= rpt->get_worker_idle_time(); + pfs_rpt->last_trans_retry_count= rpt->last_trans_retry_count; + } + } +} /* Obtain a worker thread that we can queue an event to. @@ -2371,6 +2444,7 @@ rpl_parallel::wait_for_done(THD *thd, Relay_log_info *rli) STRING_WITH_LEN("now SIGNAL wait_for_done_waiting")); };); + global_rpl_thread_pool.copy_pool_for_pfs(rli); for (i= 0; i < domain_hash.records; ++i) { e= (struct rpl_parallel_entry *)my_hash_element(&domain_hash, i); diff --git a/sql/rpl_parallel.h b/sql/rpl_parallel.h index b88e77d5427..cc7795b4b0d 100644 --- a/sql/rpl_parallel.h +++ b/sql/rpl_parallel.h @@ -7,6 +7,7 @@ struct rpl_parallel; struct rpl_parallel_entry; struct rpl_parallel_thread_pool; +extern struct rpl_parallel_thread_pool pool_bkp_for_pfs; class Relay_log_info; struct inuse_relaylog; @@ -161,6 +162,35 @@ struct rpl_parallel_thread { inuse_relaylog *accumulated_ir_last; uint64 accumulated_ir_count; + char channel_name[MAX_CONNECTION_NAME]; + uint channel_name_length; + rpl_gtid last_seen_gtid; + int last_error_number; + char last_error_message[MAX_SLAVE_ERRMSG]; + ulonglong last_error_timestamp; + ulonglong worker_idle_time; + ulong last_trans_retry_count; + ulonglong start_time; + void start_time_tracker() + { + start_time= microsecond_interval_timer(); + } + ulonglong compute_time_lapsed() + { + return (ulonglong)((microsecond_interval_timer() - start_time) / 1000000.0); + } + void add_to_worker_idle_time_and_reset() + { + worker_idle_time+= compute_time_lapsed(); + start_time=0; + } + ulonglong get_worker_idle_time() + { + if (start_time) + return compute_time_lapsed(); + else + return worker_idle_time; + } void enqueue(queued_event *qev) { if (last_in_queue) @@ -224,9 +254,42 @@ struct rpl_parallel_thread { void batch_free(); /* Update inuse_relaylog refcounts with what we have accumulated so far. */ void inuse_relaylog_refcount_update(); + rpl_parallel_thread(); }; +struct pool_bkp_for_pfs{ + uint32 count; + bool inited; + struct rpl_parallel_thread **rpl_thread_arr; + void init(uint32 thd_count) + { + DBUG_ASSERT(thd_count); + rpl_thread_arr= (rpl_parallel_thread **) + my_malloc(PSI_INSTRUMENT_ME, + thd_count * sizeof(rpl_parallel_thread*), + MYF(MY_WME | MY_ZEROFILL)); + for (uint i=0; i<thd_count; i++) + rpl_thread_arr[i]= (rpl_parallel_thread *) + my_malloc(PSI_INSTRUMENT_ME, sizeof(rpl_parallel_thread), + MYF(MY_WME | MY_ZEROFILL)); + count= thd_count; + inited= true; + } + + void destroy() + { + if (inited) + { + for (uint i=0; i<count; i++) + my_free(rpl_thread_arr[i]); + + my_free(rpl_thread_arr); + rpl_thread_arr= NULL; + } + } +}; + struct rpl_parallel_thread_pool { struct rpl_parallel_thread **threads; struct rpl_parallel_thread *free_list; @@ -240,8 +303,10 @@ struct rpl_parallel_thread_pool { is in progress. */ bool busy; + struct pool_bkp_for_pfs pfs_bkp; rpl_parallel_thread_pool(); + void copy_pool_for_pfs(Relay_log_info *rli); int init(uint32 size); void destroy(); void deactivate(); diff --git a/sql/rpl_reporting.cc b/sql/rpl_reporting.cc index aa69168d44c..d04f18c9c44 100644 --- a/sql/rpl_reporting.cc +++ b/sql/rpl_reporting.cc @@ -22,7 +22,7 @@ #include "sql_class.h" Slave_reporting_capability::Slave_reporting_capability(char const *thread_name) - : m_thread_name(thread_name) + : err_thread_id(0), m_thread_name(thread_name) { mysql_mutex_init(key_mutex_slave_reporting_capability_err_lock, &err_lock, MY_MUTEX_INIT_FAST); @@ -51,6 +51,7 @@ Slave_reporting_capability::report(loglevel level, int err_code, pbuff= m_last_error.message; pbuffsize= sizeof(m_last_error.message); m_last_error.number = err_code; + m_last_error.update_timestamp(); report_function= sql_print_error; break; case WARNING_LEVEL: @@ -69,6 +70,7 @@ Slave_reporting_capability::report(loglevel level, int err_code, mysql_mutex_unlock(&err_lock); va_end(args); + err_thread_id= current_thd->thread_id; /* If the msg string ends with '.', do not add a ',' it would be ugly */ report_function("%s %s: %s%s %s%sInternal MariaDB error code: %d", diff --git a/sql/rpl_reporting.h b/sql/rpl_reporting.h index 62b934c1527..46a71ff5ad6 100644 --- a/sql/rpl_reporting.h +++ b/sql/rpl_reporting.h @@ -41,6 +41,7 @@ public: @param thread_name Printable name of the slave thread that is reporting. */ Slave_reporting_capability(char const *thread_name); + mutable my_thread_id err_thread_id; /** Writes a message and, if it's an error message, to Last_Error @@ -81,12 +82,35 @@ public: { number= 0; message[0]= '\0'; + timestamp[0]= '\0'; + } + void update_timestamp() + { + struct tm tm_tmp; + struct tm *start; + + skr= my_time(0); + localtime_r(&skr, &tm_tmp); + start=&tm_tmp; + + sprintf(timestamp, "%02d%02d%02d %02d:%02d:%02d", + start->tm_year % 100, + start->tm_mon+1, + start->tm_mday, + start->tm_hour, + start->tm_min, + start->tm_sec); + timestamp[15]= '\0'; } /** Error code */ uint32 number; /** Error message */ char message[MAX_SLAVE_ERRMSG]; + /** Error timestamp as string */ + char timestamp[64]; + /** Error timestamp as time_t variable. Used in performance_schema */ + time_t skr; }; Error const& last_error() const { return m_last_error; } diff --git a/sql/rpl_rli.cc b/sql/rpl_rli.cc index 4c04382a5dc..957d440de94 100644 --- a/sql/rpl_rli.cc +++ b/sql/rpl_rli.cc @@ -61,7 +61,7 @@ Relay_log_info::Relay_log_info(bool is_slave_recovery, const char* thread_name) gtid_skip_flag(GTID_SKIP_NOT), inited(0), abort_slave(0), stop_for_until(0), slave_running(MYSQL_SLAVE_NOT_RUN), until_condition(UNTIL_NONE), until_log_pos(0), retried_trans(0), executed_entries(0), - sql_delay(0), sql_delay_end(0), + last_trans_retry_count(0), sql_delay(0), sql_delay_end(0), until_relay_log_names_defer(false), m_flags(0) { @@ -87,6 +87,7 @@ Relay_log_info::Relay_log_info(bool is_slave_recovery, const char* thread_name) max_relay_log_size= global_system_variables.max_relay_log_size; bzero((char*) &info_file, sizeof(info_file)); bzero((char*) &cache_buf, sizeof(cache_buf)); + bzero(&last_seen_gtid, sizeof(last_seen_gtid)); mysql_mutex_init(key_relay_log_info_run_lock, &run_lock, MY_MUTEX_INIT_FAST); mysql_mutex_init(key_relay_log_info_data_lock, &data_lock, MY_MUTEX_INIT_FAST); @@ -1710,7 +1711,8 @@ scan_all_gtid_slave_pos_table(THD *thd, int (*cb)(THD *, LEX_CSTRING *, void *), else { size_t i; - Dynamic_array<LEX_CSTRING*> files(dirp->number_of_files); + Dynamic_array<LEX_CSTRING*> files(PSI_INSTRUMENT_MEM, + dirp->number_of_files); Discovered_table_list tl(thd, &files); int err; diff --git a/sql/rpl_rli.h b/sql/rpl_rli.h index 85e31ef8187..cc807852bf2 100644 --- a/sql/rpl_rli.h +++ b/sql/rpl_rli.h @@ -539,7 +539,8 @@ public: int32 get_sql_delay() { return sql_delay; } void set_sql_delay(int32 _sql_delay) { sql_delay= _sql_delay; } time_t get_sql_delay_end() { return sql_delay_end; } - + rpl_gtid last_seen_gtid; + ulong last_trans_retry_count; private: diff --git a/sql/scheduler.h b/sql/scheduler.h index ebf8d6e9e64..68387390d81 100644 --- a/sql/scheduler.h +++ b/sql/scheduler.h @@ -40,6 +40,8 @@ struct scheduler_functions void (*thd_wait_end)(THD *thd); void (*post_kill_notification)(THD *thd); void (*end)(void); + /** resume previous unfinished command (threadpool only)*/ + void (*thd_resume)(THD* thd); }; diff --git a/sql/session_tracker.cc b/sql/session_tracker.cc index de82d8be90c..3eacdc03b50 100644 --- a/sql/session_tracker.cc +++ b/sql/session_tracker.cc @@ -996,7 +996,7 @@ enum_tx_state Transaction_state_tracker::calc_trx_state(THD *thd, bool has_trx) { enum_tx_state s; - bool read= (l <= TL_READ_NO_INSERT); + bool read= (l < TL_FIRST_WRITE); if (read) s= has_trx ? TX_READ_TRX : TX_READ_UNSAFE; diff --git a/sql/share/errmsg-utf8.txt b/sql/share/errmsg-utf8.txt index 3f3cb7677fc..555836406fb 100644 --- a/sql/share/errmsg-utf8.txt +++ b/sql/share/errmsg-utf8.txt @@ -7546,13 +7546,10 @@ ER_ALTER_OPERATION_NOT_SUPPORTED_REASON_GIS # MariaDB extra error numbers starts from 4000 skip-to-error-number 4000 -ER_COMMULTI_BADCONTEXT 0A000 - eng "COM_MULTI can't return a result set in the given context" - ger "COM_MULTI kann im gegebenen Kontext keine Ergebnismenge zurückgeben" - ukr "COM_MULTI не може повернути результати у цьому контексті" -ER_BAD_COMMAND_IN_MULTI - eng "Command '%s' is not allowed for COM_MULTI" - ukr "Команда '%s' не дозволена для COM_MULTI" +ER_UNUSED_26 0A000 + eng "This error never happens" +ER_UNUSED_27 + eng "This error never happens" ER_WITH_COL_WRONG_LIST eng "WITH column list and SELECT field list have different column counts" ER_TOO_MANY_DEFINITIONS_IN_WITH_CLAUSE @@ -7648,8 +7645,8 @@ ER_JSON_PATH_ARRAY eng "JSON path should end with an array identifier in argument %d to function '%s'" ER_JSON_ONE_OR_ALL eng "Argument 2 to function '%s' must be "one" or "all"." -ER_UNSUPPORT_COMPRESSED_TEMPORARY_TABLE - eng "CREATE TEMPORARY TABLE is not allowed with ROW_FORMAT=COMPRESSED or KEY_BLOCK_SIZE." +ER_UNSUPPORTED_COMPRESSED_TABLE + eng "InnoDB refuses to write tables with ROW_FORMAT=COMPRESSED or KEY_BLOCK_SIZE." ER_GEOJSON_INCORRECT eng "Incorrect GeoJSON format specified for st_geomfromgeojson function." ER_GEOJSON_TOO_FEW_POINTS @@ -7951,7 +7948,7 @@ ER_WARN_HISTORY_ROW_START_TIME ER_PART_STARTS_BEYOND_INTERVAL eng "%`s: STARTS is later than query time, first history partition may exceed INTERVAL value" ER_GALERA_REPLICATION_NOT_SUPPORTED - eng "DDL-statement is forbidden as table storage engine does not support Galera replication" + eng "Galera replication not supported" ER_LOAD_INFILE_CAPABILITY_DISABLED eng "The used command is not allowed because the MariaDB server or client has disabled the local infile capability" rum "Comanda folosită nu este permisă deoarece clientul sau serverul MariaDB a dezactivat această capabilitate" @@ -7974,3 +7971,7 @@ ER_NOT_ALLOWED_IN_THIS_CONTEXT eng "'%-.128s' is not allowed in this context" ER_DATA_WAS_COMMITED_UNDER_ROLLBACK eng "Engine %s does not support rollback. Changes were committed during rollback call" +ER_PK_INDEX_CANT_BE_IGNORED + eng "A primary key cannot be marked as IGNORE" +ER_BINLOG_UNSAFE_SKIP_LOCKED + eng "SKIP LOCKED makes this statement unsafe" diff --git a/sql/slave.cc b/sql/slave.cc index 1da030084ef..1bd2802858c 100644 --- a/sql/slave.cc +++ b/sql/slave.cc @@ -1,5 +1,5 @@ /* Copyright (c) 2000, 2017, Oracle and/or its affiliates. - Copyright (c) 2009, 2020, MariaDB Corporation. + Copyright (c) 2009, 2021, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -348,8 +348,7 @@ gtid_pos_table_creation(THD *thd, plugin_ref engine, LEX_CSTRING *table_name) err= parser_state.init(thd, thd->query(), thd->query_length()); if (err) goto end; - mysql_parse(thd, thd->query(), thd->query_length(), &parser_state, - FALSE, FALSE); + mysql_parse(thd, thd->query(), thd->query_length(), &parser_state); if (unlikely(thd->is_error())) err= 1; /* The warning is relevant to 10.3 and earlier. */ @@ -4298,6 +4297,8 @@ static int exec_relay_log_event(THD* thd, Relay_log_info* rli, DBUG_RETURN(1); } + rli->last_seen_gtid= serial_rgi->current_gtid; + rli->last_trans_retry_count= serial_rgi->trans_retries; if (opt_gtid_ignore_duplicates && rli->mi->using_gtid != Master_info::USE_GTID_NO) { @@ -4950,20 +4951,17 @@ log space"); err: // print the current replication position if (mi->using_gtid == Master_info::USE_GTID_NO) - { sql_print_information("Slave I/O thread exiting, read up to log '%s', " - "position %llu", IO_RPL_LOG_NAME, mi->master_log_pos); - sql_print_information("master was %s:%d", mi->host, mi->port); - } + "position %llu, master %s:%d", IO_RPL_LOG_NAME, mi->master_log_pos, + mi->host, mi->port); else { StringBuffer<100> tmp; mi->gtid_current_pos.to_string(&tmp); sql_print_information("Slave I/O thread exiting, read up to log '%s', " - "position %llu; GTID position %s", + "position %llu; GTID position %s, master %s:%d", IO_RPL_LOG_NAME, mi->master_log_pos, - tmp.c_ptr_safe()); - sql_print_information("master was %s:%d", mi->host, mi->port); + tmp.c_ptr_safe(), mi->host, mi->port); } repl_semisync_slave.slave_stop(mi); thd->reset_query(); @@ -5309,6 +5307,7 @@ pthread_handler_t handle_slave_sql(void *arg) serial_rgi->gtid_sub_id= 0; serial_rgi->gtid_pending= false; + rli->last_seen_gtid= serial_rgi->current_gtid; if (mi->using_gtid != Master_info::USE_GTID_NO && mi->using_parallel() && rli->restart_gtid_pos.count() > 0) { @@ -5566,9 +5565,9 @@ pthread_handler_t handle_slave_sql(void *arg) tmp.append(STRING_WITH_LEN("'")); } sql_print_information("Slave SQL thread exiting, replication stopped in " - "log '%s' at position %llu%s", RPL_LOG_NAME, - rli->group_master_log_pos, tmp.c_ptr_safe()); - sql_print_information("master was %s:%d", mi->host, mi->port); + "log '%s' at position %llu%s, master: %s:%d", RPL_LOG_NAME, + rli->group_master_log_pos, tmp.c_ptr_safe(), + mi->host, mi->port); } #ifdef WITH_WSREP wsrep_after_command_before_result(thd); diff --git a/sql/sp.cc b/sql/sp.cc index 46494fb2393..ac9203ffb8a 100644 --- a/sql/sp.cc +++ b/sql/sp.cc @@ -177,7 +177,7 @@ TABLE_FIELD_TYPE proc_table_fields[MYSQL_PROC_FIELD_COUNT] = }, { { STRING_WITH_LEN("definer") }, - { STRING_WITH_LEN("char(") }, + { STRING_WITH_LEN("varchar(") }, { STRING_WITH_LEN("utf8") } }, { diff --git a/sql/sp_head.cc b/sql/sp_head.cc index 513e7207b7e..ec5b2d0e832 100644 --- a/sql/sp_head.cc +++ b/sql/sp_head.cc @@ -1473,7 +1473,9 @@ sp_head::execute(THD *thd, bool merge_da_on_success) WSREP_DEBUG("MUST_REPLAY set after SP, err_status %d trx state: %d", err_status, thd->wsrep_trx().state()); } - (void) wsrep_after_statement(thd); + + if (wsrep_thd_is_local(thd)) + (void) wsrep_after_statement(thd); /* Reset the return code to zero if the transaction was diff --git a/sql/sql_acl.cc b/sql/sql_acl.cc index bba875d883c..270031db645 100644 --- a/sql/sql_acl.cc +++ b/sql/sql_acl.cc @@ -64,8 +64,8 @@ bool using_global_priv_table= true; // set that from field length in acl_load? #ifndef NO_EMBEDDED_ACCESS_CHECKS -const uint max_hostname_length= 60; -const uint max_dbname_length= 64; +const uint max_hostname_length= HOSTNAME_LENGTH; +const uint max_dbname_length= NAME_CHAR_LEN; #endif const char *safe_vio_type_name(Vio *vio) @@ -655,7 +655,7 @@ bool ROLE_GRANT_PAIR::init(MEM_ROOT *mem, const char *username, #define ROLE_OPENED (1L << 3) static DYNAMIC_ARRAY acl_hosts, acl_users, acl_proxy_users; -static Dynamic_array<ACL_DB> acl_dbs(PSI_INSTRUMENT_MEM, 0U, 50U); +static Dynamic_array<ACL_DB> acl_dbs(PSI_INSTRUMENT_MEM, 0, 50); typedef Dynamic_array<ACL_DB>::CMP_FUNC acl_dbs_cmp; static HASH acl_roles; /* @@ -1942,7 +1942,7 @@ class Grant_tables We can read privilege tables even when !initialized. This can be acl_load() - server startup or FLUSH PRIVILEGES */ - if (lock_type >= TL_WRITE_ALLOW_WRITE && !initialized) + if (lock_type >= TL_FIRST_WRITE && !initialized) { my_error(ER_OPTION_PREVENTS_STATEMENT, MYF(0), "--skip-grant-tables"); DBUG_RETURN(-1); @@ -1957,7 +1957,7 @@ class Grant_tables NULL, lock_type); tl->open_type= OT_BASE_ONLY; tl->i_s_requested_object= OPEN_TABLE_ONLY; - tl->updating= lock_type >= TL_WRITE_ALLOW_WRITE; + tl->updating= lock_type >= TL_FIRST_WRITE; if (i >= FIRST_OPTIONAL_TABLE) tl->open_strategy= TABLE_LIST::OPEN_IF_EXISTS; tl->next_global= tl->next_local= first; @@ -1982,7 +1982,7 @@ class Grant_tables NULL, lock_type); tl->open_type= OT_BASE_ONLY; tl->i_s_requested_object= OPEN_TABLE_ONLY; - tl->updating= lock_type >= TL_WRITE_ALLOW_WRITE; + tl->updating= lock_type >= TL_FIRST_WRITE; p_user_table= &m_user_table_tabular; counter++; res= really_open(thd, tl, &unused); @@ -2049,7 +2049,7 @@ class Grant_tables { DBUG_ENTER("Grant_tables::really_open:"); #ifdef HAVE_REPLICATION - if (tables->lock_type >= TL_WRITE_ALLOW_WRITE && + if (tables->lock_type >= TL_FIRST_WRITE && thd->slave_thread && !thd->spcont) { /* @@ -2786,7 +2786,7 @@ void acl_free(bool end) bool acl_reload(THD *thd) { DYNAMIC_ARRAY old_acl_hosts, old_acl_users, old_acl_proxy_users; - Dynamic_array<ACL_DB> old_acl_dbs(0U,0U); + Dynamic_array<ACL_DB> old_acl_dbs(PSI_INSTRUMENT_MEM, 0, 0); HASH old_acl_roles, old_acl_roles_mappings; MEM_ROOT old_mem; int result; @@ -6184,8 +6184,8 @@ static int traverse_role_graph_impl(ACL_USER_BASE *user, void *context, It uses a Dynamic_array to reduce the number of malloc calls to a minimum */ - Dynamic_array<NODE_STATE> stack(20,50); - Dynamic_array<ACL_USER_BASE *> to_clear(20,50); + Dynamic_array<NODE_STATE> stack(PSI_INSTRUMENT_MEM, 20,50); + Dynamic_array<ACL_USER_BASE *> to_clear(PSI_INSTRUMENT_MEM, 20, 50); NODE_STATE state; /* variable used to insert elements in the stack */ int result= 0; @@ -8130,7 +8130,7 @@ bool check_grant(THD *thd, privilege_t want_access, TABLE_LIST *tables, We want to have either SELECT or INSERT rights to sequences depending on how they are accessed */ - orig_want_access= ((t_ref->lock_type == TL_WRITE_ALLOW_WRITE) ? + orig_want_access= ((t_ref->lock_type >= TL_FIRST_WRITE) ? INSERT_ACL : SELECT_ACL); } diff --git a/sql/sql_acl_getsort.ic b/sql/sql_acl_getsort.ic index df55c7c5f1e..046b412d5f6 100644 --- a/sql/sql_acl_getsort.ic +++ b/sql/sql_acl_getsort.ic @@ -14,6 +14,8 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA */ #ifndef NO_EMBEDDED_ACCESS_CHECKS + +#define magic_bits 30 /* Returns a number which, if sorted in descending order, magically puts patterns in the order from most specific (e.g. no wildcards) to most generic @@ -21,8 +23,8 @@ Takes a template that lists types of following patterns (by the first letter of _h_ostname, _d_bname, _u_sername) and up to four patterns. - No more than two can be of 'h' or 'd' type (because one magic value takes 26 - bits, see below). + No more than two can be of 'h' or 'd' type (because one magic value takes + magic_bits bits, see below). ======================================================================== @@ -142,7 +144,7 @@ case 2: ((M*2*(maxlen+1) + L)*(maxlen+1) + K)*(maxlen+1) + P upper bound: L<=maxlen, M<=maxlen, K<=maxlen/2, P<maxlen - for a current maxlen=64, the magic number needs 26 bits. + for a current maxlen=64, the magic number needs magic_bits bits. */ static ulonglong get_magic_sort(const char *templ, ...) @@ -165,9 +167,9 @@ static ulonglong get_magic_sort(const char *templ, ...) continue; } - /* A wildcard pattern. Encoded in 26 bits. */ + /* A wildcard pattern. Encoded in magic_bits bits. */ uint maxlen= *templ == 'd' ? max_dbname_length : max_hostname_length; - DBUG_ASSERT(maxlen <= 64); + DBUG_ASSERT(maxlen <= 255); DBUG_ASSERT(*templ == 'd' || *templ == 'h'); uint N= 0, M= 0, K= 0, P= 0; @@ -189,14 +191,19 @@ static ulonglong get_magic_sort(const char *templ, ...) if (pat[i] == wild_prefix && pat[i+1]) i++; N++; } - uint L= K ? maxlen - N - M : 0, d= maxlen + 1, magic; + + set_if_smaller(K, 31); + set_if_smaller(M, 31); + + ulonglong L= K ? maxlen - N - M : 0, d= maxlen + 1, magic; + ulonglong d1= MY_MIN(d, 32); if (L > M) - magic= (((L * 2 + 1) * d + K) * d + M) * d + P; + magic= (((L * 2 + 1) * d + K) * d1 + M) * d + P; else - magic= (((M * 2 + 0) * d + L) * d + K) * d + P; - DBUG_ASSERT(magic < 1<<26); - sort= (sort << 26) + magic; - IF_DBUG(bits_used+= 26,); + magic= (((M * 2 + 0) * d + L) * d1 + K) * d + P; + DBUG_ASSERT(magic < (1ULL << magic_bits)); + sort= (sort << magic_bits) + magic; + IF_DBUG(bits_used+= magic_bits,); } DBUG_ASSERT(bits_used < 8*sizeof(sort)); va_end(args); diff --git a/sql/sql_admin.cc b/sql/sql_admin.cc index 3a4cc281e8c..31192720610 100644 --- a/sql/sql_admin.cc +++ b/sql/sql_admin.cc @@ -571,7 +571,7 @@ static bool mysql_admin_table(THD* thd, TABLE_LIST* tables, */ table->mdl_request.set_type(lex->sql_command == SQLCOM_REPAIR ? MDL_SHARED_NO_READ_WRITE - : lock_type >= TL_WRITE_ALLOW_WRITE + : lock_type >= TL_FIRST_WRITE ? MDL_SHARED_WRITE : MDL_SHARED_READ); /* open only one table from local list of command */ diff --git a/sql/sql_alter.cc b/sql/sql_alter.cc index 7901c9b5e32..9af4dfde77d 100644 --- a/sql/sql_alter.cc +++ b/sql/sql_alter.cc @@ -27,6 +27,7 @@ Alter_info::Alter_info(const Alter_info &rhs, MEM_ROOT *mem_root) key_list(rhs.key_list, mem_root), alter_rename_key_list(rhs.alter_rename_key_list, mem_root), create_list(rhs.create_list, mem_root), + alter_index_ignorability_list(rhs.alter_index_ignorability_list, mem_root), check_constraint_list(rhs.check_constraint_list, mem_root), flags(rhs.flags), partition_flags(rhs.partition_flags), keys_onoff(rhs.keys_onoff), diff --git a/sql/sql_alter.h b/sql/sql_alter.h index 89eb4ebb3e9..1c98ac1651d 100644 --- a/sql/sql_alter.h +++ b/sql/sql_alter.h @@ -20,6 +20,7 @@ class Alter_drop; class Alter_column; class Alter_rename_key; +class Alter_index_ignorability; class Key; /** @@ -95,6 +96,8 @@ public: List<Alter_rename_key> alter_rename_key_list; // List of columns, used by both CREATE and ALTER TABLE. List<Create_field> create_list; + // Indexes whose ignorability needs to be changed. + List<Alter_index_ignorability> alter_index_ignorability_list; List<Virtual_column_info> check_constraint_list; // Type of ALTER TABLE operation. alter_table_operations flags; @@ -129,6 +132,7 @@ public: key_list.empty(); alter_rename_key_list.empty(); create_list.empty(); + alter_index_ignorability_list.empty(); check_constraint_list.empty(); flags= 0; partition_flags= 0; diff --git a/sql/sql_array.h b/sql/sql_array.h index b6de1b18d78..8610e971016 100644 --- a/sql/sql_array.h +++ b/sql/sql_array.h @@ -112,7 +112,7 @@ private: template <class Elem> class Dynamic_array { - DYNAMIC_ARRAY array; + DYNAMIC_ARRAY array; public: Dynamic_array(PSI_memory_key psi_key, uint prealloc=16, uint increment=16) { @@ -170,6 +170,8 @@ public: return ((const Elem*)array.buffer) + array.elements - 1; } + size_t size() const { return array.elements; } + const Elem *end() const { return back() + 1; diff --git a/sql/sql_base.cc b/sql/sql_base.cc index 5f66ea9608f..16d1440dcb9 100644 --- a/sql/sql_base.cc +++ b/sql/sql_base.cc @@ -2724,7 +2724,7 @@ bool Locked_tables_list::restore_lock(THD *thd, TABLE_LIST *dst_table_list, add_back_last_deleted_lock(dst_table_list); table->mdl_ticket->downgrade_lock(table->reginfo.lock_type >= - TL_WRITE_ALLOW_WRITE ? + TL_FIRST_WRITE ? MDL_SHARED_NO_READ_WRITE : MDL_SHARED_READ); @@ -3518,7 +3518,7 @@ bool extend_table_list(THD *thd, TABLE_LIST *tables, bool error= false; LEX *lex= thd->lex; bool maybe_need_prelocking= - (tables->updating && tables->lock_type >= TL_WRITE_ALLOW_WRITE) + (tables->updating && tables->lock_type >= TL_FIRST_WRITE) || thd->lex->default_used; if (thd->locked_tables_mode <= LTM_LOCK_TABLES && @@ -4421,46 +4421,72 @@ restart: /* Set appropriate TABLE::lock_type. */ if (tbl && tables->lock_type != TL_UNLOCK && !thd->locked_tables_mode) { - if (tables->lock_type == TL_WRITE_DEFAULT) - tbl->reginfo.lock_type= thd->update_lock_default; - else if (tables->lock_type == TL_READ_DEFAULT) - tbl->reginfo.lock_type= - read_lock_type_for_table(thd, thd->lex, tables, - some_routine_modifies_data); + if (tables->lock_type == TL_WRITE_DEFAULT || + unlikely(tables->lock_type == TL_WRITE_SKIP_LOCKED && + !(tables->table->file->ha_table_flags() & HA_CAN_SKIP_LOCKED))) + tbl->reginfo.lock_type= thd->update_lock_default; + else if (likely(tables->lock_type == TL_READ_DEFAULT) || + (tables->lock_type == TL_READ_SKIP_LOCKED && + !(tables->table->file->ha_table_flags() & HA_CAN_SKIP_LOCKED))) + tbl->reginfo.lock_type= read_lock_type_for_table(thd, thd->lex, tables, + some_routine_modifies_data); else tbl->reginfo.lock_type= tables->lock_type; + tbl->reginfo.skip_locked= tables->skip_locked; } - } - #ifdef WITH_WSREP - if (WSREP(thd) && - wsrep_replicate_myisam && - (*start) && - (*start)->table && - (*start)->table->file->ht == myisam_hton && - wsrep_thd_is_local(thd) && - !is_stat_table(&(*start)->db, &(*start)->alias) && - thd->get_command() != COM_STMT_PREPARE && - ((thd->lex->sql_command == SQLCOM_INSERT || - thd->lex->sql_command == SQLCOM_INSERT_SELECT || - thd->lex->sql_command == SQLCOM_REPLACE || - thd->lex->sql_command == SQLCOM_REPLACE_SELECT || - thd->lex->sql_command == SQLCOM_UPDATE || - thd->lex->sql_command == SQLCOM_UPDATE_MULTI || - thd->lex->sql_command == SQLCOM_LOAD || - thd->lex->sql_command == SQLCOM_DELETE))) - { - wsrep_before_rollback(thd, true); - wsrep_after_rollback(thd, true); - wsrep_after_statement(thd); - WSREP_TO_ISOLATION_BEGIN(NULL, NULL, (*start)); - } + /* + At this point we have SE associated with table so we can check wsrep_mode + rules at this point. + */ + if (WSREP(thd) && + wsrep_thd_is_local(thd) && + tbl && + tables == *start && + !wsrep_check_mode_after_open_table(thd, + tbl->file->ht, tables)) + { + error= TRUE; + goto error; + } + + /* If user has issued wsrep_on = OFF and wsrep was on before + we need to check is local gtid feature disabled */ + if (thd->wsrep_was_on && + thd->variables.sql_log_bin == 1 && + !WSREP(thd) && + wsrep_check_mode(WSREP_MODE_DISALLOW_LOCAL_GTID)) + { + enum_sql_command sql_command= thd->lex->sql_command; + bool is_dml_stmt= thd->get_command() != COM_STMT_PREPARE && + (sql_command == SQLCOM_INSERT || + sql_command == SQLCOM_INSERT_SELECT || + sql_command == SQLCOM_REPLACE || + sql_command == SQLCOM_REPLACE_SELECT || + sql_command == SQLCOM_UPDATE || + sql_command == SQLCOM_UPDATE_MULTI || + sql_command == SQLCOM_LOAD || + sql_command == SQLCOM_DELETE); + + if (is_dml_stmt && !is_temporary_table(tables)) + { + /* wsrep_mode = WSREP_MODE_DISALLOW_LOCAL_GTID, treat as error */ + my_error(ER_GALERA_REPLICATION_NOT_SUPPORTED, MYF(0)); + push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN, + ER_OPTION_PREVENTS_STATEMENT, + "You can't execute statements that would generate local " + "GTIDs when wsrep_mode = DISALLOW_LOCAL_GTID is set. " + "Try disabling binary logging with SET sql_log_bin=0 " + "to execute this statement."); + + error= TRUE; + goto error; + } + } #endif /* WITH_WSREP */ + } error: -#ifdef WITH_WSREP -wsrep_error_label: -#endif THD_STAGE_INFO(thd, stage_after_opening_tables); thd_proc_info(thd, 0); @@ -4687,7 +4713,7 @@ handle_table(THD *thd, Query_tables_list *prelocking_ctx, DBUG_ENTER("handle_table"); TABLE *table= table_list->table; /* We rely on a caller to check that table is going to be changed. */ - DBUG_ASSERT(table_list->lock_type >= TL_WRITE_ALLOW_WRITE || + DBUG_ASSERT(table_list->lock_type >= TL_FIRST_WRITE || thd->lex->default_used); if (table_list->trg_event_map) @@ -4869,7 +4895,7 @@ handle_table(THD *thd, Query_tables_list *prelocking_ctx, tl->open_strategy= TABLE_LIST::OPEN_NORMAL; /* We rely on a caller to check that table is going to be changed. */ - DBUG_ASSERT(table_list->lock_type >= TL_WRITE_ALLOW_WRITE); + DBUG_ASSERT(table_list->lock_type >= TL_FIRST_WRITE); return FALSE; } @@ -4976,8 +5002,8 @@ static bool check_lock_and_start_stmt(THD *thd, else lock_type= table_list->lock_type; - if ((int) lock_type >= (int) TL_WRITE_ALLOW_WRITE && - (int) table_list->table->reginfo.lock_type < (int) TL_WRITE_ALLOW_WRITE) + if ((int) lock_type >= (int) TL_FIRST_WRITE && + (int) table_list->table->reginfo.lock_type < (int) TL_FIRST_WRITE) { my_error(ER_TABLE_NOT_LOCKED_FOR_WRITE, MYF(0), table_list->table->alias.c_ptr()); @@ -5414,7 +5440,7 @@ static bool fix_all_session_vcol_exprs(THD *thd, TABLE_LIST *tables) { TABLE *t= table->table; if (!table->placeholder() && t->s->vcols_need_refixing && - table->lock_type >= TL_WRITE_ALLOW_WRITE) + table->lock_type >= TL_FIRST_WRITE) { Query_arena *stmt_backup= thd->stmt_arena; if (thd->stmt_arena->is_conventional()) @@ -5572,7 +5598,7 @@ bool lock_tables(THD *thd, TABLE_LIST *tables, uint count, uint flags) a table that is already used by the calling statement. */ if (thd->locked_tables_mode >= LTM_PRELOCKED && - table->lock_type >= TL_WRITE_ALLOW_WRITE) + table->lock_type >= TL_FIRST_WRITE) { for (TABLE* opentab= thd->open_tables; opentab; opentab= opentab->next) { @@ -9025,7 +9051,7 @@ open_system_tables_for_read(THD *thd, TABLE_LIST *table_list) if (open_and_lock_tables(thd, table_list, FALSE, (MYSQL_OPEN_IGNORE_FLUSH | MYSQL_OPEN_IGNORE_LOGGING_FORMAT | - (table_list->lock_type < TL_WRITE_ALLOW_WRITE ? + (table_list->lock_type < TL_FIRST_WRITE ? MYSQL_LOCK_IGNORE_TIMEOUT : 0)))) { lex->restore_backup_query_tables_list(&query_tables_list_backup); diff --git a/sql/sql_bootstrap.cc b/sql/sql_bootstrap.cc index dbeb971cd5a..b39d7a57bc0 100644 --- a/sql/sql_bootstrap.cc +++ b/sql/sql_bootstrap.cc @@ -18,9 +18,20 @@ #include <ctype.h> #include <string.h> #include "sql_bootstrap.h" +#include <string> -int read_bootstrap_query(char *query, int *query_length, - fgets_input_t input, fgets_fn_t fgets_fn, int *error) +static bool is_end_of_query(const char *line, size_t len, + const std::string& delimiter) +{ + if (delimiter.length() > len) + return false; + return !strcmp(line + len-delimiter.length(),delimiter.c_str()); +} + +static std::string delimiter= ";"; +extern "C" int read_bootstrap_query(char *query, int *query_length, + fgets_input_t input, fgets_fn_t fgets_fn, + int preserve_delimiter, int *error) { char line_buffer[MAX_BOOTSTRAP_LINE_SIZE]; const char *line; @@ -73,9 +84,32 @@ int read_bootstrap_query(char *query, int *query_length, if ((line[0] == '-') && (line[1] == '-')) continue; - /* Skip delimiter, ignored. */ - if (strncmp(line, "delimiter", 9) == 0) + size_t i=0; + while (line[i] == ' ') + i++; + + /* Skip -- comments */ + if (line[i] == '-' && line[i+1] == '-') + continue; + + if (strncmp(line, "DELIMITER", 9) == 0) + { + const char *p= strrchr(line,' '); + if (!p || !p[1]) + { + /* Invalid DELIMITER specifier */ + return READ_BOOTSTRAP_ERROR; + } + delimiter.assign(p+1); + if (preserve_delimiter) + { + memcpy(query,line,len); + query[len]=0; + *query_length = (int)len; + return READ_BOOTSTRAP_SUCCESS; + } continue; + } /* Append the current line to a multi line query. If the new line will make the query too long, preserve the partial line to provide context for the @@ -105,13 +139,18 @@ int read_bootstrap_query(char *query, int *query_length, memcpy(query + query_len, line, len); query_len+= len; - if (line[len - 1] == ';') + if (is_end_of_query(line, len, delimiter)) { /* - The last line is terminated by ';'. + The last line is terminated by delimiter Return the query found. */ - query[query_len]= '\0'; + if (!preserve_delimiter) + { + query_len-= delimiter.length(); + query[query_len++]= ';'; + } + query[query_len]= 0; *query_length= (int)query_len; return READ_BOOTSTRAP_SUCCESS; } diff --git a/sql/sql_bootstrap.h b/sql/sql_bootstrap.h index f12d66a522e..e5b9b3a55c2 100644 --- a/sql/sql_bootstrap.h +++ b/sql/sql_bootstrap.h @@ -23,7 +23,7 @@ The longest query in use depends on the documentation content, see the file fill_help_tables.sql */ -#define MAX_BOOTSTRAP_QUERY_SIZE 20000 +#define MAX_BOOTSTRAP_QUERY_SIZE 60000 /** The maximum size of a bootstrap query, expressed in a single line. Do not increase this size, use the multiline syntax instead. @@ -39,8 +39,16 @@ typedef void *fgets_input_t; typedef char * (*fgets_fn_t)(char *, size_t, fgets_input_t, int *error); -int read_bootstrap_query(char *query, int *query_length, - fgets_input_t input, fgets_fn_t fgets_fn, int *error); +#ifdef __cplusplus +extern "C" { +#endif +int read_bootstrap_query(char *query, int *query_length, fgets_input_t input, + fgets_fn_t fgets_fn, + int preserve_delimiter, + int *error); +#ifdef __cplusplus +} +#endif #endif diff --git a/sql/sql_cache.cc b/sql/sql_cache.cc index b81dea78bcf..f65466115df 100644 --- a/sql/sql_cache.cc +++ b/sql/sql_cache.cc @@ -2291,7 +2291,7 @@ void Query_cache::invalidate_locked_for_write(THD *thd, for (; tables_used; tables_used= tables_used->next_local) { THD_STAGE_INFO(thd, stage_invalidating_query_cache_entries_table); - if (tables_used->lock_type >= TL_WRITE_ALLOW_WRITE && + if (tables_used->lock_type >= TL_FIRST_WRITE && tables_used->table) { invalidate_table(thd, tables_used->table); diff --git a/sql/sql_class.cc b/sql/sql_class.cc index 85fb0367dfb..e2371b5bdb4 100644 --- a/sql/sql_class.cc +++ b/sql/sql_class.cc @@ -683,7 +683,8 @@ THD::THD(my_thread_id id, bool is_wsrep_applier) m_stmt_da(&main_da), tdc_hash_pins(0), xid_hash_pins(0), - m_tmp_tables_locked(false) + m_tmp_tables_locked(false), + async_state() #ifdef HAVE_REPLICATION , current_linfo(0), @@ -712,6 +713,7 @@ THD::THD(my_thread_id id, bool is_wsrep_applier) wsrep_current_gtid_seqno(0), wsrep_affected_rows(0), wsrep_has_ignored_error(false), + wsrep_was_on(false), wsrep_ignore_table(false), wsrep_aborter(0), @@ -4718,6 +4720,19 @@ extern "C" void thd_create_random_password(MYSQL_THD thd, } +extern "C" const char *thd_priv_host(MYSQL_THD thd, size_t *length) +{ + const Security_context *sctx= thd->security_ctx; + if (!sctx) + { + *length= 0; + return NULL; + } + *length= strlen(sctx->priv_host); + return sctx->priv_host; +} + + #ifdef INNODB_COMPATIBILITY_HOOKS /** open a table and add it to thd->open_tables @@ -4923,6 +4938,56 @@ void reset_thd(MYSQL_THD thd) free_root(thd->mem_root, MYF(MY_KEEP_PREALLOC)); } +/** + This function can be used by storage engine + to indicate a start of an async operation. + + This asynchronous is such operation needs to be + finished before we write response to the client +. + An example of this operation is Innodb's asynchronous + group commit. Server needs to wait for the end of it + before writing response to client, to provide durability + guarantees, in other words, server can't send OK packet + before modified data is durable in redo log. +*/ +extern "C" MYSQL_THD thd_increment_pending_ops(void) +{ + THD *thd = current_thd; + if (!thd) + return NULL; + thd->async_state.inc_pending_ops(); + return thd; +} + +/** + This function can be used by plugin/engine to indicate + end of async operation (such as end of group commit + write flush) + + @param thd THD +*/ +extern "C" void thd_decrement_pending_ops(MYSQL_THD thd) +{ + DBUG_ASSERT(thd); + thd_async_state::enum_async_state state; + if (thd->async_state.dec_pending_ops(&state) == 0) + { + switch(state) + { + case thd_async_state::enum_async_state::SUSPENDED: + DBUG_ASSERT(thd->scheduler->thd_resume); + thd->scheduler->thd_resume(thd); + break; + case thd_async_state::enum_async_state::NONE: + break; + default: + DBUG_ASSERT(0); + } + } +} + + unsigned long long thd_get_query_id(const MYSQL_THD thd) { return((unsigned long long)thd->query_id); @@ -5038,14 +5103,13 @@ extern "C" enum enum_server_command thd_current_command(MYSQL_THD thd) return thd->get_command(); } - -extern "C" int thd_slave_thread(const MYSQL_THD thd) +#ifdef HAVE_REPLICATION /* Working around MDEV-24622 */ +/** @return whether the current thread is for applying binlog in a replica */ +extern "C" int thd_is_slave(const MYSQL_THD thd) { - return(thd->slave_thread); + return thd && thd->slave_thread; } - - - +#endif /* HAVE_REPLICATION */ /* Returns high resolution timestamp for the start of the current query. */ @@ -5084,7 +5148,7 @@ thd_need_wait_reports(const MYSQL_THD thd) } /* - Used by storage engines (currently TokuDB and InnoDB) to report that + Used by storage engines (currently InnoDB) to report that one transaction THD is about to go to wait for a transactional lock held by another transactions OTHER_THD. @@ -6186,7 +6250,7 @@ int THD::decide_logging_format(TABLE_LIST *tables) */ lex->set_stmt_unsafe(LEX::BINLOG_STMT_UNSAFE_SYSTEM_TABLE); - if (tbl->lock_type >= TL_WRITE_ALLOW_WRITE) + if (tbl->lock_type >= TL_FIRST_WRITE) { non_replicated_tables_count++; continue; @@ -6199,10 +6263,10 @@ int THD::decide_logging_format(TABLE_LIST *tables) if (tbl->prelocking_placeholder != TABLE_LIST::PRELOCK_FK) { - if (tbl->lock_type <= TL_READ_NO_INSERT) + if (tbl->lock_type < TL_FIRST_WRITE) has_read_tables= true; else if (table->found_next_number_field && - (tbl->lock_type >= TL_WRITE_ALLOW_WRITE)) + (tbl->lock_type >= TL_FIRST_WRITE)) { has_auto_increment_write_tables= true; has_auto_increment_write_tables_not_first= found_first_not_own_table; @@ -6211,7 +6275,7 @@ int THD::decide_logging_format(TABLE_LIST *tables) } } - if (tbl->lock_type >= TL_WRITE_ALLOW_WRITE) + if (tbl->lock_type >= TL_FIRST_WRITE) { bool trans; if (prev_write_table && prev_write_table->file->ht != @@ -6481,7 +6545,7 @@ int THD::decide_logging_format(TABLE_LIST *tables) if (table->placeholder()) continue; if (table->table->file->ht->db_type == DB_TYPE_BLACKHOLE_DB && - table->lock_type >= TL_WRITE_ALLOW_WRITE) + table->lock_type >= TL_FIRST_WRITE) { table_names.append(&table->table_name); table_names.append(","); diff --git a/sql/sql_class.h b/sql/sql_class.h index b5c56f1e62e..64edab59bcb 100644 --- a/sql/sql_class.h +++ b/sql/sql_class.h @@ -197,6 +197,7 @@ extern MYSQL_PLUGIN_IMPORT const char **errmesg; extern "C" LEX_STRING * thd_query_string (MYSQL_THD thd); extern "C" unsigned long long thd_query_id(const MYSQL_THD thd); extern "C" size_t thd_query_safe(MYSQL_THD thd, char *buf, size_t buflen); +extern "C" const char *thd_priv_host(MYSQL_THD thd, size_t *length); extern "C" const char *thd_user_name(MYSQL_THD thd); extern "C" const char *thd_client_host(MYSQL_THD thd); extern "C" const char *thd_client_ip(MYSQL_THD thd); @@ -381,6 +382,29 @@ public: }; +/* An ALTER INDEX operation that changes the ignorability of an index. */ +class Alter_index_ignorability: public Sql_alloc +{ +public: + Alter_index_ignorability(const char *name, bool is_ignored) : + m_name(name), m_is_ignored(is_ignored) + { + assert(name != NULL); + } + + const char *name() const { return m_name; } + + /* The ignorability after the operation is performed. */ + bool is_ignored() const { return m_is_ignored; } + Alter_index_ignorability *clone(MEM_ROOT *mem_root) const + { return new (mem_root) Alter_index_ignorability(*this); } + +private: + const char *m_name; + bool m_is_ignored; +}; + + class Key :public Sql_alloc, public DDL_options { public: enum Keytype { PRIMARY, UNIQUE, MULTIPLE, FULLTEXT, SPATIAL, FOREIGN_KEY}; @@ -836,7 +860,6 @@ typedef struct system_status_var ulong com_create_tmp_table; ulong com_drop_tmp_table; ulong com_other; - ulong com_multi; ulong com_stmt_prepare; ulong com_stmt_reprepare; @@ -933,6 +956,12 @@ typedef struct system_status_var ulong lost_connections; ulong max_statement_time_exceeded; /* + Number of times where column info was not + sent with prepared statement metadata. + */ + ulong skip_metadata_count; + + /* Number of statements sent from the client */ ulong questions; @@ -951,6 +980,7 @@ typedef struct system_status_var ulonglong table_open_cache_hits; ulonglong table_open_cache_misses; ulonglong table_open_cache_overflows; + ulonglong send_metadata_skips; double last_query_cost; double cpu_time, busy_time; uint32 threads_running; @@ -1209,6 +1239,38 @@ public: class Server_side_cursor; +/* + Struct to catch changes in column metadata that is sent to client. + in the "result set metadata". Used to support + MARIADB_CLIENT_CACHE_METADATA. +*/ +struct send_column_info_state +{ + /* Last client charset (affects metadata) */ + CHARSET_INFO *last_charset= nullptr; + + /* Checksum, only used to check changes if 'immutable' is false*/ + uint32 checksum= 0; + + /* + Column info can only be changed by PreparedStatement::reprepare() + + There is a class of "weird" prepared statements like SELECT ? or SELECT @a + that are not immutable, and depend on input parameters or user variables + */ + bool immutable= false; + + bool initialized= false; + + /* Used by PreparedStatement::reprepare()*/ + void reset() + { + initialized= false; + checksum= 0; + } +}; + + /** @class Statement @brief State of a single command executed against this connection. @@ -1298,6 +1360,8 @@ public: LEX_CSTRING db; + send_column_info_state column_info_state; + /* This is set to 1 of last call to send_result_to_client() was ok */ my_bool query_cache_is_applicable; @@ -2282,6 +2346,164 @@ struct THD_count ~THD_count() { thread_count--; } }; +/** + Support structure for asynchronous group commit, or more generally + any asynchronous operation that needs to finish before server writes + response to client. + + An engine, or any other server component, can signal that there is + a pending operation by incrementing a counter, i.e inc_pending_ops() + and that pending operation is finished by decrementing that counter + dec_pending_ops(). + + NOTE: Currently, pending operations can not fail, i.e there is no + way to pass a return code in dec_pending_ops() + + The server does not write response to the client before the counter + becomes 0. In case of group commit it ensures that data is persistent + before success reported to client, i.e durability in ACID. +*/ +struct thd_async_state +{ + enum class enum_async_state + { + NONE, + SUSPENDED, /* do_command() did not finish, and needs to be resumed */ + RESUMED /* do_command() is resumed*/ + }; + enum_async_state m_state{enum_async_state::NONE}; + + /* Stuff we need to resume do_command where we finished last time*/ + enum enum_server_command m_command{COM_SLEEP}; + LEX_STRING m_packet{0,0}; + + mysql_mutex_t m_mtx; + mysql_cond_t m_cond; + + /** Pending counter*/ + Atomic_counter<int> m_pending_ops=0; + +#ifndef DBUG_OFF + /* Checks */ + pthread_t m_dbg_thread; +#endif + + thd_async_state() + { + mysql_mutex_init(PSI_NOT_INSTRUMENTED, &m_mtx, 0); + mysql_cond_init(PSI_INSTRUMENT_ME, &m_cond, 0); + } + + /* + Currently only used with threadpool, one can "suspend" and "resume" a THD. + Suspend only means leaving do_command earlier, after saving some state. + Resume is continuing suspended THD's do_command(), from where it finished last time. + */ + bool try_suspend() + { + bool ret; + mysql_mutex_lock(&m_mtx); + DBUG_ASSERT(m_state == enum_async_state::NONE); + DBUG_ASSERT(m_pending_ops >= 0); + + if(m_pending_ops) + { + ret=true; + m_state= enum_async_state::SUSPENDED; + } + else + { + /* + If there is no pending operations, can't suspend, since + nobody can resume it. + */ + ret=false; + } + mysql_mutex_unlock(&m_mtx); + return ret; + } + + ~thd_async_state() + { + wait_for_pending_ops(); + mysql_mutex_destroy(&m_mtx); + mysql_cond_destroy(&m_cond); + } + + /* + Increment pending asynchronous operations. + The client response may not be written if + this count > 0. + So, without threadpool query needs to wait for + the operations to finish. + With threadpool, THD can be suspended and resumed + when this counter goes to 0. + */ + void inc_pending_ops() + { + mysql_mutex_lock(&m_mtx); + +#ifndef DBUG_OFF + /* + Check that increments are always done by the same thread. + */ + if (!m_pending_ops) + m_dbg_thread= pthread_self(); + else + DBUG_ASSERT(pthread_equal(pthread_self(),m_dbg_thread)); +#endif + + m_pending_ops++; + mysql_mutex_unlock(&m_mtx); + } + + int dec_pending_ops(enum_async_state* state) + { + int ret; + mysql_mutex_lock(&m_mtx); + ret= --m_pending_ops; + if (!ret) + mysql_cond_signal(&m_cond); + *state = m_state; + mysql_mutex_unlock(&m_mtx); + return ret; + } + + /* + This is used for "dirty" reading pending ops, + when dirty read is OK. + */ + int pending_ops() + { + return m_pending_ops; + } + + /* Wait for pending operations to finish.*/ + void wait_for_pending_ops() + { + /* + It is fine to read m_pending_ops and compare it with 0, + without mutex protection. + + The value is only incremented by the current thread, and will + be decremented by another one, thus "dirty" may show positive number + when it is really 0, but this is not a problem, and the only + bad thing from that will be rechecking under mutex. + */ + if (!pending_ops()) + return; + + mysql_mutex_lock(&m_mtx); + DBUG_ASSERT(m_pending_ops >= 0); + while (m_pending_ops) + mysql_cond_wait(&m_cond, &m_mtx); + mysql_mutex_unlock(&m_mtx); + } +}; + +extern "C" MYSQL_THD thd_increment_pending_ops(void); +extern "C" void thd_decrement_pending_ops(MYSQL_THD); + /** @class THD @@ -2401,6 +2623,8 @@ public: /* Last created prepared statement */ Statement *last_stmt; + Statement *cur_stmt= 0; + inline void set_last_stmt(Statement *stmt) { last_stmt= (is_error() ? NULL : stmt); } inline void clear_last_stmt() { last_stmt= NULL; } @@ -4989,6 +5213,7 @@ private: } public: + thd_async_state async_state; #ifdef HAVE_REPLICATION /* If we do a purge of binary logs, log index info of the threads @@ -5051,6 +5276,8 @@ public: uint64 wsrep_current_gtid_seqno; ulong wsrep_affected_rows; bool wsrep_has_ignored_error; + /* true if wsrep_on was ON in last wsrep_on_update */ + bool wsrep_was_on; /* When enabled, do not replicate/binlog updates from the current table that's @@ -6965,10 +7192,6 @@ public: #define CF_SKIP_WSREP_CHECK 0 #endif /* WITH_WSREP */ -/** - Do not allow it for COM_MULTI batch -*/ -#define CF_NO_COM_MULTI (1U << 3) /* Inline functions */ diff --git a/sql/sql_const.h b/sql/sql_const.h index 3f053a1606d..762728aa876 100644 --- a/sql/sql_const.h +++ b/sql/sql_const.h @@ -49,7 +49,7 @@ #else #define MAX_REFLENGTH 4 /* Max length for record ref */ #endif -#define MAX_HOSTNAME 61 /* len+1 in mysql.user */ +#define MAX_HOSTNAME (HOSTNAME_LENGTH + 1) /* len+1 in mysql.user */ #define MAX_CONNECTION_NAME NAME_LEN #define MAX_MBWIDTH 3 /* Max multibyte sequence */ diff --git a/sql/sql_db.cc b/sql/sql_db.cc index 3447032f193..d4fd66e233d 100644 --- a/sql/sql_db.cc +++ b/sql/sql_db.cc @@ -104,8 +104,137 @@ cmp_db_names(LEX_CSTRING *db1_name, const LEX_CSTRING *db2_name) db1_name->str, db2_name->str) == 0)); } +#ifdef HAVE_PSI_INTERFACE +static PSI_rwlock_key key_rwlock_LOCK_dboptions; +static PSI_rwlock_key key_rwlock_LOCK_dbnames; +static PSI_rwlock_key key_rwlock_LOCK_rmdir; + +static PSI_rwlock_info all_database_names_rwlocks[]= { + {&key_rwlock_LOCK_dboptions, "LOCK_dboptions", PSI_FLAG_GLOBAL}, + {&key_rwlock_LOCK_dbnames, "LOCK_dbnames", PSI_FLAG_GLOBAL}, + {&key_rwlock_LOCK_rmdir, "LOCK_rmdir",PSI_FLAG_GLOBAL}, +}; + +static void init_database_names_psi_keys(void) +{ + const char *category= "sql"; + int count; + + if (PSI_server == NULL) + return; + + count= array_elements(all_database_names_rwlocks); + PSI_server->register_rwlock(category, all_database_names_rwlocks, count); +} +#endif + +static mysql_rwlock_t rmdir_lock; /* + Cache of C strings for existing database names. + + The only use of it is to avoid repeated expensive + my_access() calls. + + Provided operations are lookup, insert (after successfull my_access()) + and clear (this is called whenever rmdir is called). +*/ +struct dbname_cache_t +{ +private: + Hash_set<LEX_STRING> m_set; + mysql_rwlock_t m_lock; + + static uchar *get_key(const LEX_STRING *ls, size_t *sz, my_bool) + { + *sz= ls->length; + return (uchar *) ls->str; + } + +public: + dbname_cache_t() + : m_set(key_memory_dbnames_cache, table_alias_charset, 10, 0, + sizeof(char *), (my_hash_get_key) get_key, my_free, 0) + { + mysql_rwlock_init(key_rwlock_LOCK_dbnames, &m_lock); + } + + bool contains(const char *s) + { + auto sz= strlen(s); + mysql_rwlock_rdlock(&m_lock); + bool ret= m_set.find(s, sz) != 0; + mysql_rwlock_unlock(&m_lock); + return ret; + } + + void insert(const char *s) + { + auto len= strlen(s); + auto ls= (LEX_STRING *) my_malloc(key_memory_dbnames_cache, + sizeof(LEX_STRING) + strlen(s) + 1, 0); + + if (!ls) + return; + + ls->length= len; + ls->str= (char *) (ls + 1); + + memcpy(ls->str, s, len + 1); + mysql_rwlock_wrlock(&m_lock); + bool found= m_set.find(s, len) != 0; + if (!found) + m_set.insert(ls); + mysql_rwlock_unlock(&m_lock); + if (found) + my_free(ls); + } + + void clear() + { + mysql_rwlock_wrlock(&m_lock); + m_set.clear(); + mysql_rwlock_unlock(&m_lock); + } + + ~dbname_cache_t() + { + mysql_rwlock_destroy(&m_lock); + } +}; + +static dbname_cache_t* dbname_cache; + +static void dbname_cache_init() +{ + static MY_ALIGNED(16) char buf[sizeof(dbname_cache_t)]; + DBUG_ASSERT(!dbname_cache); + dbname_cache= new (buf) dbname_cache_t; + mysql_rwlock_init(key_rwlock_LOCK_rmdir, &rmdir_lock); +} + +static void dbname_cache_destroy() +{ + if (!dbname_cache) + return; + + dbname_cache->~dbname_cache_t(); + dbname_cache= 0; + mysql_rwlock_destroy(&rmdir_lock); +} + +static int my_rmdir(const char *dir) +{ + auto ret= rmdir(dir); + if (ret) + return ret; + mysql_rwlock_wrlock(&rmdir_lock); + dbname_cache->clear(); + mysql_rwlock_unlock(&rmdir_lock); + return 0; +} + + /* Function we use in the creation of our hash to get key. */ @@ -131,7 +260,7 @@ static inline int write_to_binlog(THD *thd, const char *query, size_t q_len, qinfo.db= db; qinfo.db_len= (uint32)db_len; return mysql_bin_log.write(&qinfo); -} +} /* @@ -145,26 +274,7 @@ void free_dbopt(void *dbopt) my_free(dbopt); } -#ifdef HAVE_PSI_INTERFACE -static PSI_rwlock_key key_rwlock_LOCK_dboptions; -static PSI_rwlock_info all_database_names_rwlocks[]= -{ - { &key_rwlock_LOCK_dboptions, "LOCK_dboptions", PSI_FLAG_GLOBAL} -}; - -static void init_database_names_psi_keys(void) -{ - const char* category= "sql"; - int count; - - if (PSI_server == NULL) - return; - - count= array_elements(all_database_names_rwlocks); - PSI_server->register_rwlock(category, all_database_names_rwlocks, count); -} -#endif /** Initialize database option cache. @@ -190,6 +300,7 @@ bool my_dboptions_cache_init(void) table_alias_charset, 32, 0, 0, (my_hash_get_key) dboptions_get_key, free_dbopt, 0); } + dbname_cache_init(); return error; } @@ -205,6 +316,7 @@ void my_dboptions_cache_free(void) { dboptions_init= 0; my_hash_free(&dboptions); + dbname_cache_destroy(); mysql_rwlock_destroy(&LOCK_dboptions); } } @@ -692,7 +804,7 @@ mysql_create_db_internal(THD *thd, const LEX_CSTRING *db, Restore things to beginning. */ path[path_len]= 0; - if (rmdir(path) >= 0) + if (my_rmdir(path) >= 0) DBUG_RETURN(-1); /* We come here when we managed to create the database, but not the option @@ -1116,7 +1228,7 @@ static bool find_db_tables_and_rm_known_files(THD *thd, MY_DIR *dirp, DBUG_PRINT("enter",("path: %s", path)); /* first, get the list of tables */ - Dynamic_array<LEX_CSTRING*> files(dirp->number_of_files); + Dynamic_array<LEX_CSTRING*> files(PSI_INSTRUMENT_MEM, dirp->number_of_files); Discovered_table_list tl(thd, &files); if (ha_discover_table_names(thd, &db, dirp, &tl, true)) DBUG_RETURN(1); @@ -1252,7 +1364,7 @@ static my_bool rm_dir_w_symlink(const char *org_path, my_bool send_error) if (pos > path && pos[-1] == FN_LIBCHAR) *--pos=0; - if (unlikely(rmdir(path) < 0 && send_error)) + if (unlikely(my_rmdir(path) < 0 && send_error)) { my_error(ER_DB_DROP_RMDIR, MYF(0), path, errno); DBUG_RETURN(1); @@ -1824,7 +1936,7 @@ bool mysql_upgrade_db(THD *thd, const LEX_CSTRING *old_db) length= build_table_filename(path, sizeof(path)-1, new_db.str, "", "", 0); if (length && path[length-1] == FN_LIBCHAR) path[length-1]=0; // remove ending '\' - rmdir(path); + my_rmdir(path); goto exit; } @@ -1919,20 +2031,34 @@ exit: TRUE The directory does not exist. */ + bool check_db_dir_existence(const char *db_name) { char db_dir_path[FN_REFLEN + 1]; uint db_dir_path_len; + if (dbname_cache->contains(db_name)) + return 0; + db_dir_path_len= build_table_filename(db_dir_path, sizeof(db_dir_path) - 1, db_name, "", "", 0); if (db_dir_path_len && db_dir_path[db_dir_path_len - 1] == FN_LIBCHAR) db_dir_path[db_dir_path_len - 1]= 0; - /* Check access. */ + /* + Check access. - return my_access(db_dir_path, F_OK); + The locking is to prevent creating permanent stale + entries for deleted databases, in case of + race condition with my_rmdir. + */ + mysql_rwlock_rdlock(&rmdir_lock); + int ret= my_access(db_dir_path, F_OK); + if (!ret) + dbname_cache->insert(db_name); + mysql_rwlock_unlock(&rmdir_lock); + return ret; } diff --git a/sql/sql_delete.cc b/sql/sql_delete.cc index 94d5ceb309d..6c301fd1e18 100644 --- a/sql/sql_delete.cc +++ b/sql/sql_delete.cc @@ -368,6 +368,16 @@ bool mysql_delete(THD *thd, TABLE_LIST *table_list, COND *conds, if (mysql_prepare_delete(thd, table_list, &conds, &delete_while_scanning)) DBUG_RETURN(TRUE); + if (table_list->has_period()) + { + if (!table_list->period_conditions.start.item->const_item() + || !table_list->period_conditions.end.item->const_item()) + { + my_error(ER_NOT_CONSTANT_EXPRESSION, MYF(0), "FOR PORTION OF"); + DBUG_RETURN(true); + } + } + if (delete_history) table->vers_write= false; diff --git a/sql/sql_error.cc b/sql/sql_error.cc index b3ef0d89a98..9af57ea6c01 100644 --- a/sql/sql_error.cc +++ b/sql/sql_error.cc @@ -302,7 +302,6 @@ void Diagnostics_area::reset_diagnostics_area() { DBUG_ENTER("reset_diagnostics_area"); - m_skip_flush= FALSE; #ifdef DBUG_OFF m_can_overwrite_status= FALSE; /** Don't take chances in production */ diff --git a/sql/sql_error.h b/sql/sql_error.h index a0497af78cb..318d5076534 100644 --- a/sql/sql_error.h +++ b/sql/sql_error.h @@ -1022,14 +1022,6 @@ public: { DBUG_ASSERT(m_status == DA_ERROR || m_status == DA_OK || m_status == DA_OK_BULK); return m_message; } - bool skip_flush() const - { - DBUG_ASSERT(m_status == DA_OK || m_status == DA_OK_BULK); - return m_skip_flush; - } - - void set_skip_flush() - { m_skip_flush= TRUE; } uint sql_errno() const { @@ -1215,9 +1207,6 @@ private: /** Set to make set_error_status after set_{ok,eof}_status possible. */ bool m_can_overwrite_status; - /** Skip flushing network buffer after writing OK (for COM_MULTI) */ - bool m_skip_flush; - /** Message buffer. Can be used by OK or ERROR status. */ char m_message[MYSQL_ERRMSG_SIZE]; diff --git a/sql/sql_explain.h b/sql/sql_explain.h index 9090416847f..42590e0bea0 100644 --- a/sql/sql_explain.h +++ b/sql/sql_explain.h @@ -74,7 +74,7 @@ class Json_writer; *************************************************************************************/ -const int FAKE_SELECT_LEX_ID= (int)UINT_MAX; +const uint FAKE_SELECT_LEX_ID= UINT_MAX; class Explain_query; @@ -108,7 +108,7 @@ public: }; virtual enum explain_node_type get_type()= 0; - virtual int get_select_id()= 0; + virtual uint get_select_id()= 0; /** expression cache statistics @@ -166,9 +166,9 @@ public: bool add_table(Explain_table_access *tab, Explain_query *query); - int get_select_id() { return select_id; } + uint get_select_id() { return select_id; } - int select_id; + uint select_id; int print_explain(Explain_query *query, select_result_sink *output, uint8 explain_flags, bool is_analyze); @@ -352,7 +352,7 @@ public: enum explain_node_type get_type() { return EXPLAIN_UNION; } unit_common_op operation; - int get_select_id() + uint get_select_id() { DBUG_ASSERT(union_members.elements() > 0); return union_members.at(0); @@ -879,7 +879,7 @@ public: {} virtual enum explain_node_type get_type() { return EXPLAIN_UPDATE; } - virtual int get_select_id() { return 1; /* always root */ } + virtual uint get_select_id() { return 1; /* always root */ } const char *select_type; @@ -959,7 +959,7 @@ public: StringBuffer<64> table_name; enum explain_node_type get_type() { return EXPLAIN_INSERT; } - int get_select_id() { return 1; /* always root */ } + uint get_select_id() { return 1; /* always root */ } int print_explain(Explain_query *query, select_result_sink *output, uint8 explain_flags, bool is_analyze); @@ -986,7 +986,7 @@ public: bool deleting_all_rows; virtual enum explain_node_type get_type() { return EXPLAIN_DELETE; } - virtual int get_select_id() { return 1; /* always root */ } + virtual uint get_select_id() { return 1; /* always root */ } virtual int print_explain(Explain_query *query, select_result_sink *output, uint8 explain_flags, bool is_analyze); diff --git a/sql/sql_insert.cc b/sql/sql_insert.cc index 9379421e8cd..041a64d402d 100644 --- a/sql/sql_insert.cc +++ b/sql/sql_insert.cc @@ -1,6 +1,6 @@ /* Copyright (c) 2000, 2016, Oracle and/or its affiliates. - Copyright (c) 2010, 2019, MariaDB Corporation + Copyright (c) 2010, 2021, MariaDB This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -2116,6 +2116,9 @@ int write_record(THD *thd, TABLE *table, COPY_INFO *info, select_result *sink) goto after_trg_or_ignored_err; } + /* Notify the engine about insert ignore operation */ + if (info->handle_duplicates == DUP_ERROR && info->ignore) + table->file->extra(HA_EXTRA_IGNORE_INSERT); after_trg_n_copied_inc: info->copied++; thd->record_first_successful_insert_id_in_cur_stmt(table->file->insert_id_for_cur_row); @@ -4132,6 +4135,7 @@ bool select_insert::prepare_eof() if (info.ignore || info.handle_duplicates != DUP_ERROR) if (table->file->ha_table_flags() & HA_DUPLICATE_POS) table->file->ha_rnd_end(); + table->file->extra(HA_EXTRA_END_ALTER_COPY); table->file->extra(HA_EXTRA_NO_IGNORE_DUP_KEY); table->file->extra(HA_EXTRA_WRITE_CANNOT_REPLACE); @@ -4724,7 +4728,11 @@ select_create::prepare(List<Item> &_values, SELECT_LEX_UNIT *u) if (info.handle_duplicates == DUP_UPDATE) table->file->extra(HA_EXTRA_INSERT_WITH_UPDATE); if (thd->locked_tables_mode <= LTM_LOCK_TABLES) + { table->file->ha_start_bulk_insert((ha_rows) 0); + if (thd->lex->duplicates == DUP_ERROR && !thd->lex->ignore) + table->file->extra(HA_EXTRA_BEGIN_ALTER_COPY); + } thd->abort_on_warning= !info.ignore && thd->is_strict_mode(); if (check_that_all_fields_are_given_values(thd, table, table_list)) DBUG_RETURN(1); diff --git a/sql/sql_lex.cc b/sql/sql_lex.cc index 2b6b3f87b58..93b8befd277 100644 --- a/sql/sql_lex.cc +++ b/sql/sql_lex.cc @@ -612,7 +612,8 @@ Query_tables_list::binlog_stmt_unsafe_errcode[BINLOG_STMT_UNSAFE_COUNT] = ER_BINLOG_UNSAFE_CREATE_SELECT_AUTOINC, ER_BINLOG_UNSAFE_UPDATE_IGNORE, ER_BINLOG_UNSAFE_INSERT_TWO_KEYS, - ER_BINLOG_UNSAFE_AUTOINC_NOT_FIRST + ER_BINLOG_UNSAFE_AUTOINC_NOT_FIRST, + ER_BINLOG_UNSAFE_SKIP_LOCKED }; @@ -3010,6 +3011,8 @@ void st_select_lex::init_select() select_limit= 0; /* denotes the default limit = HA_POS_ERROR */ offset_limit= 0; /* denotes the default offset = 0 */ is_set_query_expr_tail= false; + select_lock= select_lock_type::NONE; + skip_locked= false; with_sum_func= 0; with_all_modifier= 0; is_correlated= 0; @@ -4374,7 +4377,7 @@ void LEX::set_trg_event_type_for_tables() parsing. */ if (static_cast<int>(tables->lock_type) >= - static_cast<int>(TL_WRITE_ALLOW_WRITE)) + static_cast<int>(TL_FIRST_WRITE)) tables->trg_event_map= new_trg_event_map; tables= tables->next_local; } @@ -9695,19 +9698,24 @@ void Lex_select_lock::set_to(SELECT_LEX *sel) sel->master_unit()->set_lock_to_the_last_select(*this); else { + thr_lock_type lock_type; sel->parent_lex->safe_to_cache_query= 0; - if (update_lock) + if (unlikely(skip_locked)) { - sel->lock_type= TL_WRITE; - sel->set_lock_for_tables(TL_WRITE, false); + lock_type= update_lock ? TL_WRITE_SKIP_LOCKED : TL_READ_SKIP_LOCKED; } else { - sel->lock_type= TL_READ_WITH_SHARED_LOCKS; - sel->set_lock_for_tables(TL_READ_WITH_SHARED_LOCKS, false); + lock_type= update_lock ? TL_WRITE : TL_READ_WITH_SHARED_LOCKS; } + sel->lock_type= lock_type; + sel->select_lock= (update_lock ? st_select_lex::select_lock_type::FOR_UPDATE : + st_select_lex::select_lock_type::IN_SHARE_MODE); + sel->set_lock_for_tables(lock_type, false, skip_locked); } } + else + sel->select_lock= st_select_lex::select_lock_type::NONE; } bool Lex_order_limit_lock::set_to(SELECT_LEX *sel) diff --git a/sql/sql_lex.h b/sql/sql_lex.h index 846d6b9a822..7e3a1152d0b 100644 --- a/sql/sql_lex.h +++ b/sql/sql_lex.h @@ -1298,6 +1298,11 @@ public: /* index in the select list of the expression currently being fixed */ int cur_pos_in_select_list; + /* SELECT [FOR UPDATE/LOCK IN SHARE MODE] [SKIP LOCKED] */ + enum select_lock_type {NONE, IN_SHARE_MODE, FOR_UPDATE}; + enum select_lock_type select_lock; + bool skip_locked; + List<udf_func> udf_list; /* udf function calls stack */ /* @@ -1411,7 +1416,8 @@ public: TABLE_LIST *convert_right_join(); List<Item>* get_item_list(); ulong get_table_join_options(); - void set_lock_for_tables(thr_lock_type lock_type, bool for_update); + void set_lock_for_tables(thr_lock_type lock_type, bool for_update, + bool skip_locks); /* This method created for reiniting LEX in mysql_admin_table() and can be used only if you are going remove all SELECT_LEX & units except belonger @@ -1943,6 +1949,13 @@ public: */ BINLOG_STMT_UNSAFE_AUTOINC_NOT_FIRST, + /** + INSERT .. SELECT ... SKIP LOCKED is unlikely to have the same + rows locked on the replica. + primary key. + */ + BINLOG_STMT_UNSAFE_SKIP_LOCKED, + /* The last element of this enumeration type. */ BINLOG_STMT_UNSAFE_COUNT }; diff --git a/sql/sql_parse.cc b/sql/sql_parse.cc index 344be7bf273..0d8ff8fee36 100644 --- a/sql/sql_parse.cc +++ b/sql/sql_parse.cc @@ -113,9 +113,7 @@ #include "wsrep_trans_observer.h" /* wsrep transaction hooks */ static bool wsrep_mysql_parse(THD *thd, char *rawbuf, uint length, - Parser_state *parser_state, - bool is_com_multi, - bool is_next_command); + Parser_state *parser_state); #endif /* WITH_WSREP */ /** @@ -391,7 +389,7 @@ const LEX_CSTRING command_name[257]={ { STRING_WITH_LEN("Slave_worker") }, //251 { STRING_WITH_LEN("Slave_IO") }, //252 { STRING_WITH_LEN("Slave_SQL") }, //253 - { STRING_WITH_LEN("Com_multi") }, //254 + { 0, 0}, { STRING_WITH_LEN("Error") } // Last command number 255 }; @@ -490,7 +488,7 @@ void init_update_queries(void) memset(server_command_flags, 0, sizeof(server_command_flags)); server_command_flags[COM_STATISTICS]= CF_SKIP_QUERY_ID | CF_SKIP_QUESTIONS | CF_SKIP_WSREP_CHECK; - server_command_flags[COM_PING]= CF_SKIP_QUERY_ID | CF_SKIP_QUESTIONS | CF_SKIP_WSREP_CHECK | CF_NO_COM_MULTI; + server_command_flags[COM_PING]= CF_SKIP_QUERY_ID | CF_SKIP_QUESTIONS | CF_SKIP_WSREP_CHECK; server_command_flags[COM_QUIT]= CF_SKIP_WSREP_CHECK; server_command_flags[COM_PROCESS_INFO]= CF_SKIP_WSREP_CHECK; @@ -519,7 +517,6 @@ void init_update_queries(void) server_command_flags[COM_STMT_EXECUTE]= CF_SKIP_WSREP_CHECK; server_command_flags[COM_STMT_SEND_LONG_DATA]= CF_SKIP_WSREP_CHECK; server_command_flags[COM_REGISTER_SLAVE]= CF_SKIP_WSREP_CHECK; - server_command_flags[COM_MULTI]= CF_SKIP_WSREP_CHECK | CF_NO_COM_MULTI; /* Initialize the sql command flags array. */ memset(sql_command_flags, 0, sizeof(sql_command_flags)); @@ -958,7 +955,7 @@ void execute_init_command(THD *thd, LEX_STRING *init_command, save_vio= thd->net.vio; thd->net.vio= 0; thd->clear_error(1); - dispatch_command(COM_QUERY, thd, buf, (uint)len, FALSE, FALSE); + dispatch_command(COM_QUERY, thd, buf, (uint)len); thd->client_capabilities= save_client_capabilities; thd->net.vio= save_vio; @@ -981,6 +978,7 @@ int bootstrap(MYSQL_FILE *file) DBUG_ENTER("handle_bootstrap"); THD *thd= new THD(next_thread_id()); + char *buffer= new char[MAX_BOOTSTRAP_QUERY_SIZE]; #ifdef WITH_WSREP thd->variables.wsrep_on= 0; #endif @@ -1014,12 +1012,12 @@ int bootstrap(MYSQL_FILE *file) for ( ; ; ) { - char buffer[MAX_BOOTSTRAP_QUERY_SIZE] = ""; + buffer[0]= 0; int rc, length; char *query; int error= 0; - rc= read_bootstrap_query(buffer, &length, file, fgets_fn, &error); + rc= read_bootstrap_query(buffer, &length, file, fgets_fn, 0, &error); if (rc == READ_BOOTSTRAP_EOF) break; @@ -1084,7 +1082,7 @@ int bootstrap(MYSQL_FILE *file) break; } - mysql_parse(thd, thd->query(), length, &parser_state, FALSE, FALSE); + mysql_parse(thd, thd->query(), length, &parser_state); bootstrap_error= thd->is_error(); thd->protocol->end_statement(); @@ -1102,6 +1100,7 @@ int bootstrap(MYSQL_FILE *file) thd->lex->restore_set_statement_var(); } delete thd; + delete[] buffer; DBUG_RETURN(bootstrap_error); } @@ -1132,23 +1131,6 @@ void cleanup_items(Item *item) DBUG_VOID_RETURN; } -static enum enum_server_command fetch_command(THD *thd, char *packet) -{ - enum enum_server_command - command= (enum enum_server_command) (uchar) packet[0]; - DBUG_ENTER("fetch_command"); - - if (command >= COM_END || - (command >= COM_MDB_GAP_BEG && command <= COM_MDB_GAP_END)) - command= COM_END; // Wrong command - - DBUG_PRINT("info",("Command on %s = %d (%s)", - vio_description(thd->net.vio), command, - command_name[command].str)); - DBUG_RETURN(command); -} - - #ifdef WITH_WSREP static bool wsrep_tables_accessible_when_detached(const TABLE_LIST *tables) { @@ -1170,28 +1152,80 @@ static bool wsrep_command_no_result(char command) } #endif /* WITH_WSREP */ #ifndef EMBEDDED_LIBRARY +static enum enum_server_command fetch_command(THD *thd, char *packet) +{ + enum enum_server_command + command= (enum enum_server_command) (uchar) packet[0]; + DBUG_ENTER("fetch_command"); + + if (command >= COM_END || + (command >= COM_MDB_GAP_BEG && command <= COM_MDB_GAP_END)) + command= COM_END; // Wrong command + + DBUG_PRINT("info",("Command on %s = %d (%s)", + vio_description(thd->net.vio), command, + command_name[command].str)); + DBUG_RETURN(command); +} /** Read one command from connection and execute it (query or simple command). - This function is called in loop from thread function. + This function is to be used by different schedulers (one-thread-per-connection, + pool-of-threads) For profiling to work, it must never be called recursively. + @param thd - client connection context + + @param blocking - wait for command to finish. + if false (nonblocking), then the function might + return when command is "half-finished", with + DISPATCH_COMMAND_WOULDBLOCK. + Currenly, this can *only* happen when using + threadpool. The command will resume, after all outstanding + async operations (i.e group commit) finish. + Threadpool scheduler takes care of "resume". + @retval - 0 success + DISPATCH_COMMAND_SUCCESS - success @retval - 1 request of thread shutdown (see dispatch_command() description) + DISPATCH_COMMAND_CLOSE_CONNECTION request of THD shutdown + (s. dispatch_command() description) + @retval + DISPATCH_COMMAND_WOULDBLOCK - need to wait for asyncronous operations + to finish. Only returned if parameter + 'blocking' is false. */ -bool do_command(THD *thd) +dispatch_command_return do_command(THD *thd, bool blocking) { - bool return_value; + dispatch_command_return return_value; char *packet= 0; ulong packet_length; NET *net= &thd->net; enum enum_server_command command; DBUG_ENTER("do_command"); +#ifdef WITH_WSREP + DBUG_ASSERT(!thd->async_state.pending_ops() || + (WSREP(thd) && + thd->wsrep_trx().state() == wsrep::transaction::s_aborted)); +#else + DBUG_ASSERT(!thd->async_state.pending_ops()); +#endif + + if (thd->async_state.m_state == thd_async_state::enum_async_state::RESUMED) + { + /* + Resuming previously suspended command. + Restore the state + */ + command = thd->async_state.m_command; + packet = thd->async_state.m_packet.str; + packet_length = (ulong)thd->async_state.m_packet.length; + goto resume; + } + /* indicator of uninitialized lex => normal flow of errors handling (see my_message_sql) @@ -1258,12 +1292,12 @@ bool do_command(THD *thd) if (net->error != 3) { - return_value= TRUE; // We have to close it. + return_value= DISPATCH_COMMAND_CLOSE_CONNECTION; // We have to close it. goto out; } net->error= 0; - return_value= FALSE; + return_value= DISPATCH_COMMAND_SUCCESS; goto out; } @@ -1330,7 +1364,7 @@ bool do_command(THD *thd) MYSQL_END_STATEMENT(thd->m_statement_psi, thd->get_stmt_da()); thd->m_statement_psi= NULL; thd->m_digest= NULL; - return_value= FALSE; + return_value= DISPATCH_COMMAND_SUCCESS; wsrep_after_command_before_result(thd); goto out; @@ -1356,7 +1390,7 @@ bool do_command(THD *thd) thd->m_statement_psi= NULL; thd->m_digest= NULL; - return_value= FALSE; + return_value= DISPATCH_COMMAND_SUCCESS; wsrep_after_command_before_result(thd); goto out; } @@ -1367,8 +1401,18 @@ bool do_command(THD *thd) DBUG_ASSERT(packet_length); DBUG_ASSERT(!thd->apc_target.is_enabled()); + +resume: return_value= dispatch_command(command, thd, packet+1, - (uint) (packet_length-1), FALSE, FALSE); + (uint) (packet_length-1), blocking); + if (return_value == DISPATCH_COMMAND_WOULDBLOCK) + { + /* Save current state, and resume later.*/ + thd->async_state.m_command= command; + thd->async_state.m_packet={packet,packet_length}; + DBUG_RETURN(return_value); + } + DBUG_ASSERT(!thd->apc_target.is_enabled()); out: @@ -1479,45 +1523,6 @@ static void wsrep_copy_query(THD *thd) } #endif /* WITH_WSREP */ -/** - check COM_MULTI packet - - @param thd thread handle - @param packet pointer on the packet of commands - @param packet_length length of this packet - - @retval 0 - Error - @retval # - Number of commands in the batch -*/ - -uint maria_multi_check(THD *thd, char *packet, size_t packet_length) -{ - uint counter= 0; - DBUG_ENTER("maria_multi_check"); - while (packet_length) - { - char *packet_start= packet; - size_t subpacket_length= net_field_length((uchar **)&packet_start); - size_t length_length= packet_start - packet; - // length of command + 3 bytes where that length was stored - DBUG_PRINT("info", ("sub-packet length: %zu + %zu command: %x", - subpacket_length, length_length, - packet_start[3])); - - if (subpacket_length == 0 || - (subpacket_length + length_length) > packet_length) - { - my_message(ER_UNKNOWN_COM_ERROR, ER_THD(thd, ER_UNKNOWN_COM_ERROR), - MYF(0)); - DBUG_RETURN(0); - } - - counter++; - packet= packet_start + subpacket_length; - packet_length-= (subpacket_length + length_length); - } - DBUG_RETURN(counter); -} #if defined(WITH_ARIA_STORAGE_ENGINE) @@ -1554,8 +1559,13 @@ public: @param packet_length length of packet + 1 (to show that data is null-terminated) except for COM_SLEEP, where it can be zero. - @param is_com_multi recursive call from COM_MULTI - @param is_next_command there will be more command in the COM_MULTI batch + @param blocking if false (nonblocking), then the function might + return when command is "half-finished", with + DISPATCH_COMMAND_WOULDBLOCK. + Currenly, this can *only* happen when using threadpool. + The current command will resume, after all outstanding + async operations (i.e group commit) finish. + Threadpool scheduler takes care of "resume". @todo set thd->lex->sql_command to SQLCOM_END here. @@ -1568,9 +1578,8 @@ public: 1 request of thread shutdown, i. e. if command is COM_QUIT/COM_SHUTDOWN */ -bool dispatch_command(enum enum_server_command command, THD *thd, - char* packet, uint packet_length, bool is_com_multi, - bool is_next_command) +dispatch_command_return dispatch_command(enum enum_server_command command, THD *thd, + char* packet, uint packet_length, bool blocking) { NET *net= &thd->net; bool error= 0; @@ -1582,6 +1591,12 @@ bool dispatch_command(enum enum_server_command command, THD *thd, "<?>"))); bool drop_more_results= 0; + if (thd->async_state.m_state == thd_async_state::enum_async_state::RESUMED) + { + thd->async_state.m_state = thd_async_state::enum_async_state::NONE; + goto resume; + } + /* keep it withing 1 byte */ compile_time_assert(COM_END == 255); @@ -1651,14 +1666,6 @@ bool dispatch_command(enum enum_server_command command, THD *thd, beginning of each command. */ thd->server_status&= ~SERVER_STATUS_CLEAR_SET; - if (is_next_command) - { - drop_more_results= !MY_TEST(thd->server_status & - SERVER_MORE_RESULTS_EXISTS); - thd->server_status|= SERVER_MORE_RESULTS_EXISTS; - if (is_com_multi) - thd->get_stmt_da()->set_skip_flush(); - } if (unlikely(thd->security_ctx->password_expired && command != COM_QUERY && @@ -1875,8 +1882,7 @@ bool dispatch_command(enum enum_server_command command, THD *thd, if (WSREP(thd)) { if (wsrep_mysql_parse(thd, thd->query(), thd->query_length(), - &parser_state, - is_com_multi, is_next_command)) + &parser_state)) { WSREP_DEBUG("Deadlock error for: %s", thd->query()); mysql_mutex_lock(&thd->LOCK_thd_data); @@ -1888,8 +1894,7 @@ bool dispatch_command(enum enum_server_command command, THD *thd, } else #endif /* WITH_WSREP */ - mysql_parse(thd, thd->query(), thd->query_length(), &parser_state, - is_com_multi, is_next_command); + mysql_parse(thd, thd->query(), thd->query_length(), &parser_state); while (!thd->killed && (parser_state.m_lip.found_semicolon != NULL) && ! thd->is_error()) @@ -1973,8 +1978,7 @@ bool dispatch_command(enum enum_server_command command, THD *thd, if (WSREP(thd)) { if (wsrep_mysql_parse(thd, beginning_of_next_stmt, - length, &parser_state, - is_com_multi, is_next_command)) + length, &parser_state)) { WSREP_DEBUG("Deadlock error for: %s", thd->query()); mysql_mutex_lock(&thd->LOCK_thd_data); @@ -1987,8 +1991,7 @@ bool dispatch_command(enum enum_server_command command, THD *thd, } else #endif /* WITH_WSREP */ - mysql_parse(thd, beginning_of_next_stmt, length, &parser_state, - is_com_multi, is_next_command); + mysql_parse(thd, beginning_of_next_stmt, length, &parser_state); } @@ -2039,13 +2042,6 @@ bool dispatch_command(enum enum_server_command command, THD *thd, break; } packet= arg_end + 1; - // thd->reset_for_next_command reset state => restore it - if (is_next_command) - { - thd->server_status|= SERVER_MORE_RESULTS_EXISTS; - if (is_com_multi) - thd->get_stmt_da()->set_skip_flush(); - } lex_start(thd); /* Must be before we init the table list. */ @@ -2334,84 +2330,7 @@ bool dispatch_command(enum enum_server_command command, THD *thd, general_log_print(thd, command, NullS); my_eof(thd); break; - case COM_MULTI: - { - uint counter; - uint current_com= 0; - DBUG_ASSERT(!is_com_multi); - if (!(thd->client_capabilities & CLIENT_MULTI_RESULTS)) - { - /* The client does not support multiple result sets being sent back */ - my_error(ER_COMMULTI_BADCONTEXT, MYF(0)); - break; - } - - if (!(counter= maria_multi_check(thd, packet, packet_length))) - break; - - { - char *packet_start= packet; - /* We have to store next length because it will be destroyed by '\0' */ - size_t next_subpacket_length= net_field_length((uchar **)&packet_start); - size_t next_length_length= packet_start - packet; - unsigned char *readbuff= net->buff; - - if (net_allocate_new_packet(net, thd, MYF(0))) - break; - - PSI_statement_locker *save_locker= thd->m_statement_psi; - sql_digest_state *save_digest= thd->m_digest; - thd->m_statement_psi= NULL; - thd->m_digest= NULL; - - while (packet_length) - { - current_com++; - size_t subpacket_length= next_subpacket_length + next_length_length; - size_t length_length= next_length_length; - if (subpacket_length < packet_length) - { - packet_start= packet + subpacket_length; - next_subpacket_length= net_field_length((uchar**)&packet_start); - next_length_length= packet_start - (packet + subpacket_length); - } - /* safety like in do_command() */ - packet[subpacket_length]= '\0'; - enum enum_server_command subcommand= - fetch_command(thd, (packet + length_length)); - - if (server_command_flags[subcommand] & CF_NO_COM_MULTI) - { - my_error(ER_BAD_COMMAND_IN_MULTI, MYF(0), - command_name[subcommand].str); - goto com_multi_end; - } - - if (dispatch_command(subcommand, thd, packet + (1 + length_length), - (uint)(subpacket_length - (1 + length_length)), TRUE, - (current_com != counter))) - { - DBUG_ASSERT(thd->is_error()); - goto com_multi_end; - } - - DBUG_ASSERT(subpacket_length <= packet_length); - packet+= subpacket_length; - packet_length-= (uint)subpacket_length; - } - -com_multi_end: - thd->m_statement_psi= save_locker; - thd->m_digest= save_digest; - - /* release old buffer */ - net_flush(net); - DBUG_ASSERT(net->buff == net->write_pos); // nothing to send - my_free(readbuff); - } - break; - } case COM_SLEEP: case COM_CONNECT: // Impossible here case COM_TIME: // Impossible from client @@ -2425,7 +2344,18 @@ com_multi_end: } dispatch_end: - do_end_of_statement= true; + /* + For the threadpool i.e if non-blocking call, if not all async operations + are finished, return without cleanup. The cleanup will be done on + later, when command execution is resumed. + */ + if (!blocking && !error && thd->async_state.pending_ops()) + { + DBUG_RETURN(DISPATCH_COMMAND_WOULDBLOCK); + } + +resume: + #ifdef WITH_WSREP /* Next test should really be WSREP(thd), but that causes a failure when doing @@ -2472,11 +2402,8 @@ dispatch_end: thd_proc_info(thd, "Updating status"); /* Finalize server status flags after executing a command. */ thd->update_server_status(); - if (command != COM_MULTI) - { - thd->protocol->end_statement(); - query_cache_end_of_result(thd); - } + thd->protocol->end_statement(); + query_cache_end_of_result(thd); } if (drop_more_results) thd->server_status&= ~SERVER_MORE_RESULTS_EXISTS; @@ -2504,8 +2431,7 @@ dispatch_end: thd->m_statement_psi= NULL; thd->m_digest= NULL; - if (!is_com_multi) - thd->packet.shrink(thd->variables.net_buffer_length); // Reclaim some memory + thd->packet.shrink(thd->variables.net_buffer_length); // Reclaim some memory thd->reset_kill_query(); /* Ensure that killed_errmsg is released */ /* @@ -2533,7 +2459,7 @@ dispatch_end: /* Check that some variables are reset properly */ DBUG_ASSERT(thd->abort_on_warning == 0); thd->lex->restore_set_statement_var(); - DBUG_RETURN(error); + DBUG_RETURN(error?DISPATCH_COMMAND_CLOSE_CONNECTION: DISPATCH_COMMAND_SUCCESS); } static bool slow_filter_masked(THD *thd, ulonglong mask) @@ -3734,7 +3660,7 @@ mysql_execute_command(THD *thd) { for (TABLE_LIST *table= all_tables; table; table= table->next_global) { - if (table->lock_type >= TL_WRITE_ALLOW_WRITE) + if (table->lock_type >= TL_FIRST_WRITE) { lex->sql_command= SQLCOM_BEGIN; thd->wsrep_converted_lock_session= true; @@ -3887,6 +3813,11 @@ mysql_execute_command(THD *thd) thd->set_query_timer(); #ifdef WITH_WSREP + /* Check wsrep_mode rules before command execution. */ + if (WSREP(thd) && + wsrep_thd_is_local(thd) && !wsrep_check_mode_before_cmd_execute(thd)) + goto error; + /* Always start a new transaction for a wsrep THD unless the current command is DDL or explicit BEGIN. This will guarantee that @@ -7178,7 +7109,7 @@ check_table_access(THD *thd, privilege_t requirements, TABLE_LIST *tables, /* We want to have either SELECT or INSERT rights to sequences depending on how they are accessed */ - want_access= ((table_ref->lock_type == TL_WRITE_ALLOW_WRITE) ? + want_access= ((table_ref->lock_type >= TL_FIRST_WRITE) ? INSERT_ACL : SELECT_ACL); } @@ -7710,7 +7641,7 @@ mysql_new_select(LEX *lex, bool move_down, SELECT_LEX *select_lex) bool new_select= select_lex == NULL; int old_nest_level= lex->current_select->nest_level; DBUG_ENTER("mysql_new_select"); - Name_resolution_context *curr_context; + Name_resolution_context *curr_context= lex->context_stack.head(); if (new_select) { @@ -7718,7 +7649,6 @@ mysql_new_select(LEX *lex, bool move_down, SELECT_LEX *select_lex) DBUG_RETURN(1); select_lex->select_number= ++thd->lex->stmt_lex->current_select_number; select_lex->parent_lex= lex; /* Used in init_query. */ - curr_context= lex->context_stack.head(); select_lex->init_query(); select_lex->init_select(); } @@ -7891,9 +7821,7 @@ static void wsrep_prepare_for_autocommit_retry(THD* thd, } static bool wsrep_mysql_parse(THD *thd, char *rawbuf, uint length, - Parser_state *parser_state, - bool is_com_multi, - bool is_next_command) + Parser_state *parser_state) { bool is_autocommit= !thd->in_multi_stmt_transaction_mode() && @@ -7902,7 +7830,7 @@ static bool wsrep_mysql_parse(THD *thd, char *rawbuf, uint length, do { retry_autocommit= false; - mysql_parse(thd, rawbuf, length, parser_state, is_com_multi, is_next_command); + mysql_parse(thd, rawbuf, length, parser_state); /* Convert all ER_QUERY_INTERRUPTED errors to ER_LOCK_DEADLOCK @@ -8008,15 +7936,10 @@ static bool wsrep_mysql_parse(THD *thd, char *rawbuf, uint length, @param thd Current thread @param rawbuf Begining of the query text @param length Length of the query text - @param[out] found_semicolon For multi queries, position of the character of - the next query in the query text. - @param is_next_command there will be more command in the COM_MULTI batch */ void mysql_parse(THD *thd, char *rawbuf, uint length, - Parser_state *parser_state, - bool is_com_multi, - bool is_next_command) + Parser_state *parser_state) { DBUG_ENTER("mysql_parse"); DBUG_EXECUTE_IF("parser_debug", turn_parser_debug_on_MYSQLparse();); @@ -8040,12 +7963,6 @@ void mysql_parse(THD *thd, char *rawbuf, uint length, */ lex_start(thd); thd->reset_for_next_command(); - if (is_next_command) - { - thd->server_status|= SERVER_MORE_RESULTS_EXISTS; - if (is_com_multi) - thd->get_stmt_da()->set_skip_flush(); - } if (query_cache_send_result_to_client(thd, rawbuf, length) <= 0) { @@ -8973,7 +8890,8 @@ bool st_select_lex::add_window_spec(THD *thd, /** Set lock for all tables in current select level. - @param lock_type Lock to set for tables + @param lock_type Lock to set for tables + @param skip_locked (SELECT {FOR UPDATE/LOCK IN SHARED MODE} SKIP LOCKED) @note If lock is a write lock, then tables->updating is set 1 @@ -8981,18 +8899,21 @@ bool st_select_lex::add_window_spec(THD *thd, query */ -void st_select_lex::set_lock_for_tables(thr_lock_type lock_type, bool for_update) +void st_select_lex::set_lock_for_tables(thr_lock_type lock_type, bool for_update, + bool skip_locked_arg) { DBUG_ENTER("set_lock_for_tables"); - DBUG_PRINT("enter", ("lock_type: %d for_update: %d", lock_type, - for_update)); + DBUG_PRINT("enter", ("lock_type: %d for_update: %d skip_locked %d", + lock_type, for_update, skip_locked)); + skip_locked= skip_locked_arg; for (TABLE_LIST *tables= table_list.first; tables; tables= tables->next_local) { tables->lock_type= lock_type; + tables->skip_locked= skip_locked; tables->updating= for_update; - tables->mdl_request.set_type((lock_type >= TL_WRITE_ALLOW_WRITE) ? + tables->mdl_request.set_type((lock_type >= TL_FIRST_WRITE) ? MDL_SHARED_WRITE : MDL_SHARED_READ); } DBUG_VOID_RETURN; @@ -9035,7 +8956,7 @@ bool st_select_lex_unit::add_fake_select_lex(THD *thd_arg) DBUG_RETURN(1); fake_select_lex->include_standalone(this, (SELECT_LEX_NODE**)&fake_select_lex); - fake_select_lex->select_number= INT_MAX; + fake_select_lex->select_number= FAKE_SELECT_LEX_ID; fake_select_lex->parent_lex= thd_arg->lex; /* Used in init_query. */ fake_select_lex->make_empty_select(); fake_select_lex->set_linkage(GLOBAL_OPTIONS_TYPE); @@ -9785,7 +9706,7 @@ bool multi_delete_set_locks_and_link_aux_tables(LEX *lex) walk->updating= target_tbl->updating; walk->lock_type= target_tbl->lock_type; /* We can assume that tables to be deleted from are locked for write. */ - DBUG_ASSERT(walk->lock_type >= TL_WRITE_ALLOW_WRITE); + DBUG_ASSERT(walk->lock_type >= TL_FIRST_WRITE); walk->mdl_request.set_type(MDL_SHARED_WRITE); target_tbl->correspondent_table= walk; // Remember corresponding table } diff --git a/sql/sql_parse.h b/sql/sql_parse.h index ac5786dbaa0..62c3775f408 100644 --- a/sql/sql_parse.h +++ b/sql/sql_parse.h @@ -91,8 +91,7 @@ bool is_log_table_write_query(enum enum_sql_command command); bool alloc_query(THD *thd, const char *packet, size_t packet_length); void mysql_init_select(LEX *lex); void mysql_parse(THD *thd, char *rawbuf, uint length, - Parser_state *parser_state, bool is_com_multi, - bool is_next_command); + Parser_state *parser_state); bool mysql_new_select(LEX *lex, bool move_down, SELECT_LEX *sel); void create_select_for_variable(THD *thd, LEX_CSTRING *var_name); void create_table_set_open_action_and_adjust_tables(LEX *lex); @@ -102,10 +101,16 @@ void create_table_set_open_action_and_adjust_tables(LEX *lex); int bootstrap(MYSQL_FILE *file); bool run_set_statement_if_requested(THD *thd, LEX *lex); int mysql_execute_command(THD *thd); -bool do_command(THD *thd); -bool dispatch_command(enum enum_server_command command, THD *thd, - char* packet, uint packet_length, - bool is_com_multi, bool is_next_command); +enum dispatch_command_return +{ + DISPATCH_COMMAND_SUCCESS=0, + DISPATCH_COMMAND_CLOSE_CONNECTION= 1, + DISPATCH_COMMAND_WOULDBLOCK= 2 +}; + +dispatch_command_return do_command(THD *thd, bool blocking = true); +dispatch_command_return dispatch_command(enum enum_server_command command, THD *thd, + char* packet, uint packet_length, bool blocking = true); void log_slow_statement(THD *thd); bool append_file_to_dir(THD *thd, const char **filename_ptr, const LEX_CSTRING *table_name); diff --git a/sql/sql_prepare.cc b/sql/sql_prepare.cc index 3f28f818acc..134c96fd28f 100644 --- a/sql/sql_prepare.cc +++ b/sql/sql_prepare.cc @@ -334,9 +334,13 @@ static bool send_prep_stmt(Prepared_statement *stmt, uint columns) error= my_net_write(net, buff, sizeof(buff)); if (stmt->param_count && likely(!error)) { - error= thd->protocol_text.send_result_set_metadata((List<Item> *) - &stmt->lex->param_list, - Protocol::SEND_EOF); + /* + Force the column info to be written + (in this case PS parameter type info). + */ + error= thd->protocol_text.send_result_set_metadata( + (List<Item> *)&stmt->lex->param_list, + Protocol::SEND_EOF | Protocol::SEND_FORCE_COLUMN_INFO); } if (likely(!error)) @@ -3456,10 +3460,15 @@ static void mysql_stmt_execute_common(THD *thd, thd->protocol= &thd->protocol_binary; MYSQL_EXECUTE_PS(thd->m_statement_psi, stmt->m_prepared_stmt); + auto save_cur_stmt= thd->cur_stmt; + thd->cur_stmt= stmt; + if (!bulk_op) stmt->execute_loop(&expanded_query, open_cursor, packet, packet_end); else stmt->execute_bulk_loop(&expanded_query, open_cursor, packet, packet_end); + + thd->cur_stmt= save_cur_stmt; thd->protocol= save_protocol; sp_cache_enforce_limit(thd->sp_proc_cache, stored_program_cache_size); @@ -4218,6 +4227,8 @@ bool Prepared_statement::prepare(const char *packet, uint packet_len) old_stmt_arena= thd->stmt_arena; thd->stmt_arena= this; + auto save_cur_stmt= thd->cur_stmt; + thd->cur_stmt= this; Parser_state parser_state; if (parser_state.init(thd, thd->query(), thd->query_length())) @@ -4225,6 +4236,7 @@ bool Prepared_statement::prepare(const char *packet, uint packet_len) thd->restore_backup_statement(this, &stmt_backup); thd->restore_active_arena(this, &stmt_backup); thd->stmt_arena= old_stmt_arena; + thd->cur_stmt = save_cur_stmt; DBUG_RETURN(TRUE); } @@ -4234,6 +4246,7 @@ bool Prepared_statement::prepare(const char *packet, uint packet_len) lex_start(thd); lex->context_analysis_only|= CONTEXT_ANALYSIS_ONLY_PREPARE; + error= (parse_sql(thd, & parser_state, NULL) || thd->is_error() || init_param_array(this)); @@ -4244,6 +4257,7 @@ bool Prepared_statement::prepare(const char *packet, uint packet_len) thd->restore_backup_statement(this, &stmt_backup); thd->restore_active_arena(this, &stmt_backup); thd->stmt_arena= old_stmt_arena; + thd->cur_stmt = save_cur_stmt; my_error(ER_MUST_CHANGE_PASSWORD, MYF(0)); DBUG_RETURN(true); } @@ -4334,6 +4348,7 @@ bool Prepared_statement::prepare(const char *packet, uint packet_len) cleanup_stmt(); thd->restore_backup_statement(this, &stmt_backup); thd->stmt_arena= old_stmt_arena; + thd->cur_stmt= save_cur_stmt; if (likely(error == 0)) { @@ -4792,6 +4807,7 @@ Prepared_statement::reprepare() it's failed, we need to return all the warnings to the user. */ thd->get_stmt_da()->clear_warning_info(thd->query_id); + column_info_state.reset(); } else { @@ -5434,7 +5450,7 @@ protected: CHARSET_INFO *fromcs, CHARSET_INFO *tocs); bool net_send_eof(THD *thd, uint server_status, uint statement_warn_count); bool net_send_ok(THD *, uint, uint, ulonglong, ulonglong, const char *, - bool, bool); + bool); bool net_send_error_packet(THD *, uint, const char *, const char *); bool begin_dataset(); bool begin_dataset(THD *thd, uint numfields); @@ -5597,7 +5613,7 @@ bool Protocol_local::net_store_data_cs(const uchar *from, size_t length, bool Protocol_local::net_send_ok(THD *thd, uint server_status, uint statement_warn_count, - ulonglong affected_rows, ulonglong id, const char *message, bool, bool) + ulonglong affected_rows, ulonglong id, const char *message, bool) { DBUG_ENTER("emb_net_send_ok"); MYSQL_DATA *data; diff --git a/sql/sql_rename.cc b/sql/sql_rename.cc index 77a1e46a75a..b7aed97a8a2 100644 --- a/sql/sql_rename.cc +++ b/sql/sql_rename.cc @@ -325,7 +325,7 @@ do_rename(THD *thd, TABLE_LIST *ren_table, const LEX_CSTRING *new_db, #ifdef WITH_WSREP if (WSREP(thd) && hton && hton != view_pseudo_hton && - !wsrep_should_replicate_ddl(thd, hton->db_type)) + !wsrep_should_replicate_ddl(thd, hton)) DBUG_RETURN(1); #endif diff --git a/sql/sql_select.cc b/sql/sql_select.cc index dccd0dff1ae..01b8e45dd15 100644 --- a/sql/sql_select.cc +++ b/sql/sql_select.cc @@ -1073,7 +1073,7 @@ int SELECT_LEX::vers_setup_conds(THD *thd, TABLE_LIST *tables) if (vers_conditions.is_set()) { if (vers_conditions.was_set() && - table->lock_type > TL_READ_NO_INSERT && + table->lock_type >= TL_FIRST_WRITE && !vers_conditions.delete_history) { my_error(ER_TABLE_NOT_LOCKED_FOR_WRITE, MYF(0), table->alias.str); @@ -1609,7 +1609,7 @@ bool JOIN::build_explain() JOIN_TAB *curr_tab= join_tab + exec_join_tab_cnt(); for (uint i= 0; i < aggr_tables; i++, curr_tab++) { - if (select_nr == INT_MAX) + if (select_nr == FAKE_SELECT_LEX_ID) { /* this is a fake_select_lex of a union */ select_nr= select_lex->master_unit()->first_select()->select_number; @@ -2435,6 +2435,10 @@ int JOIN::optimize_stage2() DBUG_RETURN(1); } conds->update_used_tables(); + + if (unlikely(thd->trace_started())) + trace_condition(thd, "WHERE", "substitute_best_equal", conds); + DBUG_EXECUTE("where", print_where(conds, "after substitute_best_equal", @@ -2451,7 +2455,12 @@ int JOIN::optimize_stage2() DBUG_RETURN(1); } if (having) + { having->update_used_tables(); + if (unlikely(thd->trace_started())) + trace_condition(thd, "HAVING", "substitute_best_equal", having); + } + DBUG_EXECUTE("having", print_where(having, "after substitute_best_equal", @@ -2478,6 +2487,11 @@ int JOIN::optimize_stage2() DBUG_RETURN(1); } (*tab->on_expr_ref)->update_used_tables(); + if (unlikely(thd->trace_started())) + { + trace_condition(thd, "ON expr", "substitute_best_equal", + (*tab->on_expr_ref), tab->table->alias.c_ptr()); + } } } @@ -3901,6 +3915,16 @@ JOIN::add_sorting_to_table(JOIN_TAB *tab, ORDER *order) tab->select); if (!tab->filesort) return true; + + TABLE *table= tab->table; + if ((tab == join_tab + const_tables) && + table->pos_in_table_list && + table->pos_in_table_list->is_sjm_scan_table()) + { + tab->filesort->set_all_read_bits= TRUE; + tab->filesort->unpack= unpack_to_base_table_fields; + } + /* Select was moved to filesort->select to force join_init_read_record to use sorted result instead of reading table through select. @@ -4209,14 +4233,12 @@ bool JOIN::save_explain_data(Explain_query *output, bool can_overwrite, If there is SELECT in this statement with the same number it must be the same SELECT */ - DBUG_ASSERT(select_lex->select_number == UINT_MAX || - select_lex->select_number == INT_MAX || !output || + DBUG_ASSERT(select_lex->select_number == FAKE_SELECT_LEX_ID || !output || !output->get_select(select_lex->select_number) || output->get_select(select_lex->select_number)->select_lex == select_lex); - if (select_lex->select_number != UINT_MAX && - select_lex->select_number != INT_MAX /* this is not a UNION's "fake select */ && + if (select_lex->select_number != FAKE_SELECT_LEX_ID && have_query_plan != JOIN::QEP_NOT_PRESENT_YET && have_query_plan != JOIN::QEP_DELETED && // this happens when there was // no QEP ever, but then @@ -7443,7 +7465,6 @@ best_access_path(JOIN *join, DBUG_ENTER("best_access_path"); Json_writer_object trace_wrapper(thd, "best_access_path"); - Json_writer_array trace_paths(thd, "considered_access_paths"); bitmap_clear_all(eq_join_set); @@ -7451,6 +7472,7 @@ best_access_path(JOIN *join, if (s->table->is_splittable()) spl_plan= s->choose_best_splitting(record_count, remaining_tables); + Json_writer_array trace_paths(thd, "considered_access_paths"); if (s->keyuse) { /* Use key if possible */ @@ -7655,6 +7677,7 @@ best_access_path(JOIN *join, { if (!(records= keyinfo->actual_rec_per_key(key_parts-1))) { /* Prefer longer keys */ + trace_access_idx.add("rec_per_key_stats_missing", true); records= ((double) s->records / (double) rec * (1.0 + @@ -7681,7 +7704,7 @@ best_access_path(JOIN *join, records > (double) table->opt_range[key].rows) { records= (double) table->opt_range[key].rows; - trace_access_idx.add("used_range_estimates", true); + trace_access_idx.add("used_range_estimates", "clipped down"); } else { @@ -7798,19 +7821,15 @@ best_access_path(JOIN *join, if (!found_ref && // (1) records < rows) // (3) { - trace_access_idx.add("used_range_estimates", true); + trace_access_idx.add("used_range_estimates", "clipped up"); records= rows; } } - else /* (table->quick_key_parts[key] < max_key_part) */ - { - trace_access_idx.add("chosen", true); - cause= "range uses less keyparts"; - } } } else { + trace_access_idx.add("rec_per_key_stats_missing", true); /* Assume that the first key part matches 1% of the file and that the whole key matches 10 (duplicates) or 1 @@ -7874,6 +7893,7 @@ best_access_path(JOIN *join, const_part)) && records > (double) table->opt_range[key].rows) { + trace_access_idx.add("used_range_estimates", true); records= (double) table->opt_range[key].rows; } } @@ -8009,7 +8029,8 @@ best_access_path(JOIN *join, access is to use the same index IDX, with the same or more key parts. (note: it is not clear how this rule is/should be extended to index_merge quick selects). Also if we have a hash join we prefer that - over a table scan + over a table scan. This heuristic doesn't apply if the quick select + uses the group-by min-max optimization. (3) See above note about InnoDB. (4) NOT ("FORCE INDEX(...)" is used for table and there is 'ref' access path, but there is no quick select) @@ -8027,7 +8048,9 @@ best_access_path(JOIN *join, Json_writer_object trace_access_scan(thd); if ((records >= s->found_records || best > s->read_time) && // (1) !(best_key && best_key->key == MAX_KEY) && // (2) - !(s->quick && best_key && s->quick->index == best_key->key && // (2) + !(s->quick && + s->quick->get_type() != QUICK_SELECT_I::QS_TYPE_GROUP_MIN_MAX && // (2) + best_key && s->quick->index == best_key->key && // (2) best_max_key_part >= s->table->opt_range[best_key->key].key_parts) &&// (2) !((s->table->file->ha_table_flags() & HA_TABLE_SCAN_ON_INDEX) && // (3) ! s->table->covering_keys.is_clear_all() && best_key && !s->quick) &&// (3) @@ -11496,7 +11519,6 @@ make_join_select(JOIN *join,SQL_SELECT *select,COND *cond) */ Json_writer_object trace_wrapper(thd); Json_writer_object trace_conditions(thd, "attaching_conditions_to_tables"); - trace_conditions.add("original_condition", cond); Json_writer_array trace_attached_comp(thd, "attached_conditions_computation"); uint i; @@ -14332,37 +14354,8 @@ remove_const(JOIN *join,ORDER *first_order, COND *cond, can be used without tmp. table. */ bool can_subst_to_first_table= false; - bool first_is_in_sjm_nest= false; - if (first_is_base_table) - { - TABLE_LIST *tbl_for_first= - join->join_tab[join->const_tables].table->pos_in_table_list; - first_is_in_sjm_nest= tbl_for_first->sj_mat_info && - tbl_for_first->sj_mat_info->is_used; - } - /* - Currently we do not employ the optimization that uses multiple - equalities for ORDER BY to remove tmp table in the case when - the first table happens to be the result of materialization of - a semi-join nest ( <=> first_is_in_sjm_nest == true). - - When a semi-join nest is materialized and scanned to look for - possible matches in the remaining tables for every its row - the fields from the result of materialization are copied - into the record buffers of tables from the semi-join nest. - So these copies are used to access the remaining tables rather - than the fields from the result of materialization. - - Unfortunately now this so-called 'copy back' technique is - supported only if the rows are scanned with the rr_sequential - function, but not with other rr_* functions that are employed - when the result of materialization is required to be sorted. - - TODO: either to support 'copy back' technique for the above case, - or to get rid of this technique altogether. - */ if (optimizer_flag(join->thd, OPTIMIZER_SWITCH_ORDERBY_EQ_PROP) && - first_is_base_table && !first_is_in_sjm_nest && + first_is_base_table && order->item[0]->real_item()->type() == Item::FIELD_ITEM && join->cond_equal) { @@ -15524,6 +15517,16 @@ static COND *build_equal_items(JOIN *join, COND *cond, table->on_expr= build_equal_items(join, table->on_expr, inherited, nested_join_list, ignore_on_conds, &table->cond_equal); + if (unlikely(join->thd->trace_started())) + { + const char *table_name; + if (table->nested_join) + table_name= table->nested_join->join_list.head()->alias.str; + else + table_name= table->alias.str; + trace_condition(join->thd, "ON expr", "build_equal_items", + table->on_expr, table_name); + } } } } @@ -20415,19 +20418,6 @@ do_select(JOIN *join, Procedure *procedure) } -int rr_sequential_and_unpack(READ_RECORD *info) -{ - int error; - if (unlikely((error= rr_sequential(info)))) - return error; - - for (Copy_field *cp= info->copy_field; cp != info->copy_field_end; cp++) - (*cp->do_copy)(cp); - - return error; -} - - /** @brief Instantiates temporary table @@ -21723,6 +21713,8 @@ bool test_if_use_dynamic_range_scan(JOIN_TAB *join_tab) int join_init_read_record(JOIN_TAB *tab) { + bool need_unpacking= FALSE; + JOIN *join= tab->join; /* Note: the query plan tree for the below operations is constructed in save_agg_explain_data. @@ -21730,6 +21722,12 @@ int join_init_read_record(JOIN_TAB *tab) if (tab->distinct && tab->remove_duplicates()) // Remove duplicates. return 1; + if (join->top_join_tab_count != join->const_tables) + { + TABLE_LIST *tbl= tab->table->pos_in_table_list; + need_unpacking= tbl ? tbl->is_sjm_scan_table() : FALSE; + } + tab->build_range_rowid_filter_if_needed(); if (tab->filesort && tab->sort_table()) // Sort table. @@ -21737,6 +21735,11 @@ int join_init_read_record(JOIN_TAB *tab) DBUG_EXECUTE_IF("kill_join_init_read_record", tab->join->thd->set_killed(KILL_QUERY);); + + + if (!tab->preread_init_done && tab->preread_init()) + return 1; + if (tab->select && tab->select->quick && tab->select->quick->reset()) { /* Ensures error status is propagated back to client */ @@ -21747,19 +21750,7 @@ int join_init_read_record(JOIN_TAB *tab) /* make sure we won't get ER_QUERY_INTERRUPTED from any code below */ DBUG_EXECUTE_IF("kill_join_init_read_record", tab->join->thd->reset_killed();); - if (!tab->preread_init_done && tab->preread_init()) - return 1; - - - if (init_read_record(&tab->read_record, tab->join->thd, tab->table, - tab->select, tab->filesort_result, 1,1, FALSE)) - return 1; - return tab->read_record.read_record(); -} -int -join_read_record_no_init(JOIN_TAB *tab) -{ Copy_field *save_copy, *save_copy_end; /* @@ -21769,12 +21760,19 @@ join_read_record_no_init(JOIN_TAB *tab) save_copy= tab->read_record.copy_field; save_copy_end= tab->read_record.copy_field_end; - init_read_record(&tab->read_record, tab->join->thd, tab->table, - tab->select, tab->filesort_result, 1, 1, FALSE); + if (init_read_record(&tab->read_record, tab->join->thd, tab->table, + tab->select, tab->filesort_result, 1, 1, FALSE)) + return 1; tab->read_record.copy_field= save_copy; tab->read_record.copy_field_end= save_copy_end; - tab->read_record.read_record_func= rr_sequential_and_unpack; + + if (need_unpacking) + { + tab->read_record.read_record_func_and_unpack_calls= + tab->read_record.read_record_func; + tab->read_record.read_record_func = read_record_func_for_rr_and_unpack; + } return tab->read_record.read_record(); } @@ -27806,8 +27804,7 @@ void st_select_lex::print(THD *thd, String *str, enum_query_type query_type) if ((query_type & QT_SHOW_SELECT_NUMBER) && thd->lex->all_selects_list && thd->lex->all_selects_list->link_next && - select_number != UINT_MAX && - select_number != INT_MAX) + select_number != FAKE_SELECT_LEX_ID) { str->append("/* select#"); str->append_ulonglong(select_number); @@ -27968,11 +27965,14 @@ void st_select_lex::print(THD *thd, String *str, enum_query_type query_type) print_limit(thd, str, query_type); // lock type - if (lock_type == TL_READ_WITH_SHARED_LOCKS) + if (select_lock == select_lock_type::IN_SHARE_MODE) str->append(" lock in share mode"); - else if (lock_type == TL_WRITE) + else if (select_lock == select_lock_type::FOR_UPDATE) str->append(" for update"); + if (unlikely(skip_locked)) + str->append(" skip locked"); + // PROCEDURE unsupported here } @@ -29627,6 +29627,20 @@ void JOIN::init_join_cache_and_keyread() } +/* + @brief + Unpack temp table fields to base table fields. +*/ + +void unpack_to_base_table_fields(TABLE *table) +{ + JOIN_TAB *tab= table->reginfo.join_tab; + for (Copy_field *cp= tab->read_record.copy_field; + cp != tab->read_record.copy_field_end; cp++) + (*cp->do_copy)(cp); +} + + /** @} (end of group Query_Optimizer) diff --git a/sql/sql_select.h b/sql/sql_select.h index dd364e441cb..2ba0b5faf18 100644 --- a/sql/sql_select.h +++ b/sql/sql_select.h @@ -223,7 +223,7 @@ typedef enum_nested_loop_state (*Next_select_func)(JOIN *, struct st_join_table *, bool); Next_select_func setup_end_select_func(JOIN *join, JOIN_TAB *tab); int rr_sequential(READ_RECORD *info); -int rr_sequential_and_unpack(READ_RECORD *info); +int read_record_func_for_rr_and_unpack(READ_RECORD *info); Item *remove_pushed_top_conjuncts(THD *thd, Item *cond); Item *and_new_conditions_to_optimized_cond(THD *thd, Item *cond, COND_EQUAL **cond_eq, @@ -2354,7 +2354,6 @@ create_virtual_tmp_table(THD *thd, Field *field) int test_if_item_cache_changed(List<Cached_item> &list); int join_init_read_record(JOIN_TAB *tab); -int join_read_record_no_init(JOIN_TAB *tab); void set_position(JOIN *join,uint idx,JOIN_TAB *table,KEYUSE *key); inline Item * and_items(THD *thd, Item* cond, Item *item) { @@ -2412,6 +2411,7 @@ int print_explain_message_line(select_result_sink *result, void explain_append_mrr_info(QUICK_RANGE_SELECT *quick, String *res); int append_possible_keys(MEM_ROOT *alloc, String_list &list, TABLE *table, key_map possible_keys); +void unpack_to_base_table_fields(TABLE *table); /**************************************************************************** Temporary table support for SQL Runtime diff --git a/sql/sql_show.cc b/sql/sql_show.cc index d7050bcf2d1..38b3d62d539 100644 --- a/sql/sql_show.cc +++ b/sql/sql_show.cc @@ -2493,6 +2493,9 @@ static void store_key_options(THD *thd, String *packet, TABLE *table, append_unescaped(packet, key_info->comment.str, key_info->comment.length); } + + if (key_info->is_ignored) + packet->append(STRING_WITH_LEN(" IGNORED")); } } @@ -6717,6 +6720,12 @@ static int get_schema_stat_record(THD *thd, TABLE_LIST *tables, if (key_info->flags & HA_USES_COMMENT) table->field[15]->store(key_info->comment.str, key_info->comment.length, cs); + + // IGNORED column + const char *is_ignored= key_info->is_ignored ? "YES" : "NO"; + table->field[16]->store(is_ignored, strlen(is_ignored), cs); + table->field[16]->set_notnull(); + if (schema_table_store_record(thd, table)) DBUG_RETURN(1); } @@ -9097,6 +9106,7 @@ ST_FIELD_INFO stat_fields_info[]= Column("COMMENT", Varchar(16), NULLABLE, "Comment", OPEN_FRM_ONLY), Column("INDEX_COMMENT", Varchar(INDEX_COMMENT_MAXLEN), NOT_NULL, "Index_comment",OPEN_FRM_ONLY), + Column("IGNORED", Varchar(3), NOT_NULL, "Ignored", OPEN_FRM_ONLY), CEnd() }; diff --git a/sql/sql_sort.h b/sql/sql_sort.h index a474d7c25e9..3b23328183c 100644 --- a/sql/sql_sort.h +++ b/sql/sql_sort.h @@ -559,6 +559,7 @@ public: Addon_fields *addon_fields; // Descriptors for companion fields. Sort_keys *sort_keys; bool using_pq; + bool set_all_read_bits; uchar *unique_buff; bool not_killable; @@ -579,6 +580,8 @@ public: } void init_for_filesort(uint sortlen, TABLE *table, ha_rows maxrows, bool sort_positions); + + void (*unpack)(TABLE *); /// Enables the packing of addons if possible. void try_to_pack_addons(ulong max_length_for_sort_data); diff --git a/sql/sql_table.cc b/sql/sql_table.cc index 00815fdd36c..3643b19f8fb 100644 --- a/sql/sql_table.cc +++ b/sql/sql_table.cc @@ -49,7 +49,6 @@ #include "sp_head.h" #include "sp.h" #include "sql_trigger.h" -#include "sql_parse.h" #include "sql_show.h" #include "transaction.h" #include "sql_audit.h" @@ -2473,7 +2472,7 @@ int mysql_rm_table_no_locks(THD *thd, TABLE_LIST *tables, bool if_exists, } else { - if (WSREP(thd) && hton && !wsrep_should_replicate_ddl(thd, hton->db_type)) + if (WSREP(thd) && hton && !wsrep_should_replicate_ddl(thd, hton)) { error= 1; goto err; @@ -4429,7 +4428,7 @@ without_overlaps_err: } create_info->period_info.unique_keys++; } - + key_info->is_ignored= key->key_create_info.is_ignored; key_info++; } @@ -7029,6 +7028,33 @@ Compare_keys compare_keys_but_name(const KEY *table_key, const KEY *new_key, return result; } + +/** + Look-up KEY object by index name using case-insensitive comparison. + + @param key_name Index name. + @param key_start Start of array of KEYs for table. + @param key_end End of array of KEYs for table. + + @note Case-insensitive comparison is necessary to correctly + handle renaming of keys. + + @retval non-NULL - pointer to KEY object for index found. + @retval NULL - no index with such name found (or it is marked + as renamed). +*/ + +static KEY *find_key_ci(const char *key_name, KEY *key_start, KEY *key_end) +{ + for (KEY *key = key_start; key < key_end; key++) + { + if (!my_strcasecmp(system_charset_info, key_name, key->name.str)) + return key; + } + return NULL; +} + + /** Compare original and new versions of a table and fill Alter_inplace_info describing differences between those versions. @@ -7088,7 +7114,10 @@ static bool fill_alter_inplace_info(THD *thd, TABLE *table, bool varchar, ! (ha_alter_info->index_add_buffer= (uint*) thd->alloc(sizeof(uint) * alter_info->key_list.elements)) || - ha_alter_info->rename_keys.reserve(ha_alter_info->index_add_count)) + ha_alter_info->rename_keys.reserve(ha_alter_info->index_add_count) || + ! (ha_alter_info->index_altered_ignorability_buffer= + (KEY_PAIR*)thd->alloc(sizeof(KEY_PAIR) * + alter_info->alter_index_ignorability_list.elements))) DBUG_RETURN(true); /* @@ -7493,6 +7522,29 @@ static bool fill_alter_inplace_info(THD *thd, TABLE *table, bool varchar, } } + List_iterator<Alter_index_ignorability> + ignorability_index_it(alter_info->alter_index_ignorability_list); + Alter_index_ignorability *alter_index_ignorability; + while((alter_index_ignorability= ignorability_index_it++)) + { + const char *name= alter_index_ignorability->name(); + + KEY *old_key, *new_key; + old_key= find_key_ci(name, table->key_info, table_key_end); + new_key= find_key_ci(name, ha_alter_info->key_info_buffer, new_key_end); + + DBUG_ASSERT(old_key != NULL); + + if (new_key == NULL) + { + my_error(ER_KEY_DOES_NOT_EXISTS, MYF(0), name, table->s->table_name.str); + DBUG_RETURN(true); + } + new_key->is_ignored= alter_index_ignorability->is_ignored(); + ha_alter_info->handler_flags|= ALTER_RENAME_INDEX; + ha_alter_info->add_altered_index_ignorability(old_key, new_key); + } + /* Sort index_add_buffer according to how key_info_buffer is sorted. I.e. with primary keys first - see sort_keys(). @@ -8327,6 +8379,17 @@ mysql_prepare_alter_table(THD *thd, TABLE *table, /* New key definitions are added here */ List<Key> new_key_list; List<Alter_rename_key> rename_key_list(alter_info->alter_rename_key_list); + + /* + Create a deep copy of the list of visibility for indexes, as it will be + altered here. + */ + List<Alter_index_ignorability> + alter_index_ignorability_list(alter_info->alter_index_ignorability_list, + thd->mem_root); + + list_copy_and_replace_each_value(alter_index_ignorability_list, thd->mem_root); + List_iterator<Alter_drop> drop_it(alter_info->drop_list); List_iterator<Create_field> def_it(alter_info->create_list); List_iterator<Alter_column> alter_it(alter_info->alter_list); @@ -8794,6 +8857,18 @@ mysql_prepare_alter_table(THD *thd, TABLE *table, continue; } + List_iterator<Alter_index_ignorability> + ignorability_index_it(alter_index_ignorability_list); + + Alter_index_ignorability *index_ignorability; + while((index_ignorability= ignorability_index_it++)) + { + const char* name= index_ignorability->name(); + if (!my_strcasecmp(system_charset_info, key_name, name)) + ignorability_index_it.remove(); + } + + /* If this index is to stay in the table check if it has to be renamed. */ List_iterator<Alter_rename_key> rename_key_it(rename_key_list); Alter_rename_key *rename_key; @@ -8953,6 +9028,7 @@ mysql_prepare_alter_table(THD *thd, TABLE *table, key_create_info.parser_name= *plugin_name(key_info->parser); if (key_info->flags & HA_USES_COMMENT) key_create_info.comment= key_info->comment; + key_create_info.is_ignored= key_info->is_ignored; /* We're refreshing an already existing index. Since the index is not @@ -8984,6 +9060,24 @@ mysql_prepare_alter_table(THD *thd, TABLE *table, else key_type= Key::MULTIPLE; + List_iterator<Alter_index_ignorability> + ignorability_index_it(alter_info->alter_index_ignorability_list); + Alter_index_ignorability *index_ignorability; + while((index_ignorability= ignorability_index_it++)) + { + const char *name= index_ignorability->name(); + if (!my_strcasecmp(system_charset_info, key_name, name)) + { + if (table->s->primary_key <= MAX_KEY && + table->key_info + table->s->primary_key == key_info) + { + my_error(ER_PK_INDEX_CANT_BE_IGNORED, MYF(0)); + goto err; + } + key_create_info.is_ignored= index_ignorability->is_ignored(); + } + } + tmp_name.str= key_name; tmp_name.length= strlen(key_name); /* We dont need LONG_UNIQUE_HASH_FIELD flag because it will be autogenerated */ @@ -9178,6 +9272,14 @@ mysql_prepare_alter_table(THD *thd, TABLE *table, goto err; } + if (alter_index_ignorability_list.elements) + { + my_error(ER_KEY_DOES_NOT_EXISTS, MYF(0), + alter_index_ignorability_list.head()->name(), + table->s->table_name.str); + goto err; + } + if (!create_info->comment.str) { create_info->comment.str= table->s->comment.str; @@ -9992,7 +10094,7 @@ bool mysql_alter_table(THD *thd, const LEX_CSTRING *new_db, (thd->lex->sql_command == SQLCOM_ALTER_TABLE || thd->lex->sql_command == SQLCOM_CREATE_INDEX || thd->lex->sql_command == SQLCOM_DROP_INDEX) && - !wsrep_should_replicate_ddl(thd, table_list->table->s->db_type()->db_type)) + !wsrep_should_replicate_ddl(thd, table_list->table->s->db_type())) DBUG_RETURN(true); #endif @@ -11535,6 +11637,9 @@ copy_data_between_tables(THD *thd, TABLE *from, TABLE *to, } else { + /* In case of alter ignore, notify the engine about it. */ + if (ignore) + to->file->extra(HA_EXTRA_IGNORE_INSERT); DEBUG_SYNC(thd, "copy_data_between_tables_before"); found_count++; mysql_stage_set_work_completed(thd->m_stage_progress_psi, found_count); diff --git a/sql/sql_test.cc b/sql/sql_test.cc index 07ebcc7a37a..8c9c56f1db7 100644 --- a/sql/sql_test.cc +++ b/sql/sql_test.cc @@ -51,11 +51,13 @@ static const char *lock_descriptions[] = /* TL_READ_WITH_SHARED_LOCKS */ "Shared read lock", /* TL_READ_HIGH_PRIORITY */ "High priority read lock", /* TL_READ_NO_INSERT */ "Read lock without concurrent inserts", + /* TL_READ_SKIP_LOCKED */ "Read lock without blocking if row is locked", /* TL_WRITE_ALLOW_WRITE */ "Write lock that allows other writers", /* TL_WRITE_CONCURRENT_INSERT */ "Concurrent insert lock", /* TL_WRITE_DELAYED */ "Lock used by delayed insert", /* TL_WRITE_DEFAULT */ NULL, /* TL_WRITE_LOW_PRIORITY */ "Low priority write lock", + /* TL_WRITE_SKIP_LOCKED */ "Write lock but skip existing locked rows", /* TL_WRITE */ "High priority write lock", /* TL_WRITE_ONLY */ "Highest priority write lock" }; diff --git a/sql/sql_trigger.cc b/sql/sql_trigger.cc index 9417ec667ff..fd9095ea659 100644 --- a/sql/sql_trigger.cc +++ b/sql/sql_trigger.cc @@ -551,7 +551,7 @@ bool mysql_create_or_drop_trigger(THD *thd, TABLE_LIST *tables, bool create) #ifdef WITH_WSREP if (WSREP(thd) && - !wsrep_should_replicate_ddl(thd, table->s->db_type()->db_type)) + !wsrep_should_replicate_ddl(thd, table->s->db_type())) goto wsrep_error_label; #endif @@ -2270,7 +2270,7 @@ add_tables_and_routines_for_triggers(THD *thd, TABLE_LIST *table_list) { DBUG_ASSERT(static_cast<int>(table_list->lock_type) >= - static_cast<int>(TL_WRITE_ALLOW_WRITE)); + static_cast<int>(TL_FIRST_WRITE)); for (int i= 0; i < (int)TRG_EVENT_MAX; i++) { diff --git a/sql/sql_truncate.cc b/sql/sql_truncate.cc index e699869d644..ab1ce65bae5 100644 --- a/sql/sql_truncate.cc +++ b/sql/sql_truncate.cc @@ -305,7 +305,7 @@ bool Sql_cmd_truncate_table::lock_table(THD *thd, TABLE_LIST *table_ref, hton= table->file->ht; #ifdef WITH_WSREP if (WSREP(thd) && - !wsrep_should_replicate_ddl(thd, hton->db_type)) + !wsrep_should_replicate_ddl(thd, hton)) DBUG_RETURN(TRUE); #endif @@ -329,7 +329,7 @@ bool Sql_cmd_truncate_table::lock_table(THD *thd, TABLE_LIST *table_ref, #ifdef WITH_WSREP if (WSREP(thd) && hton != view_pseudo_hton && - !wsrep_should_replicate_ddl(thd, hton->db_type)) + !wsrep_should_replicate_ddl(thd, hton)) { tdc_release_share(share); DBUG_RETURN(TRUE); diff --git a/sql/sql_tvc.cc b/sql/sql_tvc.cc index 6984fdd6bcf..576927ea086 100644 --- a/sql/sql_tvc.cc +++ b/sql/sql_tvc.cc @@ -397,8 +397,7 @@ bool table_value_constr::optimize(THD *thd) create_explain_query_if_not_exists(thd->lex, thd->mem_root); have_query_plan= QEP_AVAILABLE; - if (select_lex->select_number != UINT_MAX && - select_lex->select_number != INT_MAX /* this is not a UNION's "fake select */ && + if (select_lex->select_number != FAKE_SELECT_LEX_ID && have_query_plan != QEP_NOT_PRESENT_YET && thd->lex->explain && // for "SET" command in SPs. (!thd->lex->explain->get_select(select_lex->select_number))) diff --git a/sql/sql_view.cc b/sql/sql_view.cc index cfd43bd13ab..3bacc3d1499 100644 --- a/sql/sql_view.cc +++ b/sql/sql_view.cc @@ -444,7 +444,7 @@ bool mysql_create_view(THD *thd, TABLE_LIST *views, */ if (lex->current_select->lock_type != TL_READ_DEFAULT) { - lex->current_select->set_lock_for_tables(TL_READ_DEFAULT, false); + lex->current_select->set_lock_for_tables(TL_READ_DEFAULT, false, select_lex->skip_locked); view->mdl_request.set_type(MDL_EXCLUSIVE); } diff --git a/sql/sql_yacc.yy b/sql/sql_yacc.yy index 1a045c5416f..06b02bc2b93 100644 --- a/sql/sql_yacc.yy +++ b/sql/sql_yacc.yy @@ -532,6 +532,7 @@ End SQL_MODE_ORACLE_SPECIFIC */ %token <kwd> IF_SYM %token <kwd> IGNORE_DOMAIN_IDS_SYM %token <kwd> IGNORE_SYM +%token <kwd> IGNORED_SYM %token <kwd> INDEX_SYM %token <kwd> INFILE %token <kwd> INNER_SYM /* SQL-2003-R */ @@ -901,6 +902,7 @@ End SQL_MODE_ORACLE_SPECIFIC */ %token <kwd> LEVEL_SYM %token <kwd> LIST_SYM %token <kwd> LOCAL_SYM /* SQL-2003-R */ +%token <kwd> LOCKED_SYM %token <kwd> LOCKS_SYM %token <kwd> LOGFILE_SYM %token <kwd> LOGS_SYM @@ -1061,6 +1063,7 @@ End SQL_MODE_ORACLE_SPECIFIC */ %token <kwd> SHUTDOWN %token <kwd> SIGNED_SYM %token <kwd> SIMPLE_SYM /* SQL-2003-N */ +%token <kwd> SKIP_SYM %token <kwd> SLAVE %token <kwd> SLAVES %token <kwd> SLAVE_POS_SYM @@ -1388,6 +1391,7 @@ End SQL_MODE_ORACLE_SPECIFIC */ case_stmt_body opt_bin_mod opt_for_system_time_clause opt_if_exists_table_element opt_if_not_exists_table_element opt_recursive opt_format_xid opt_for_portion_of_time_clause + ignorability %type <object_ddl_options> create_or_replace @@ -7031,7 +7035,11 @@ all_key_opt: { Lex->last_key->key_create_info.comment= $2; } | VISIBLE_SYM { - /* This is mainly for MySQL 8.0 compatiblity */ + /* This is mainly for MySQL 8.0 compatibility */ + } + | ignorability + { + Lex->last_key->key_create_info.is_ignored= $1; } | IDENT_sys equal TEXT_STRING_sys { @@ -7089,6 +7097,11 @@ btree_or_rtree: | HASH_SYM { $$= HA_KEY_ALG_HASH; } ; +ignorability: + IGNORED_SYM { $$= true; } + | NOT_SYM IGNORED_SYM { $$= false; } + ; + key_list: key_list ',' key_part order_dir { @@ -7783,6 +7796,16 @@ alter_list_item: if (unlikely(Lex->add_alter_list($4, $7, $3))) MYSQL_YYABORT; } + | ALTER key_or_index ident ignorability + { + LEX *lex= Lex; + Alter_index_ignorability *ac= new (thd->mem_root) + Alter_index_ignorability($3.str, $4); + if (ac == NULL) + MYSQL_YYABORT; + lex->alter_info.alter_index_ignorability_list.push_back(ac); + lex->alter_info.flags|= ALTER_INDEX_IGNORABILITY; + } | ALTER opt_column opt_if_exists_table_element field_ident DROP DEFAULT { if (unlikely(Lex->add_alter_list($4, (Virtual_column_info*) 0, $3))) @@ -9109,7 +9132,6 @@ opt_select_lock_type: } ; - opt_lock_wait_timeout_new: /* empty */ { @@ -9117,14 +9139,22 @@ opt_lock_wait_timeout_new: } | WAIT_SYM ulong_num { + $$.empty(); $$.defined_timeout= TRUE; $$.timeout= $2; } | NOWAIT_SYM { + $$.empty(); $$.defined_timeout= TRUE; $$.timeout= 0; } + | SKIP_SYM LOCKED_SYM + { + $$.empty(); + $$.skip_locked= 1; + Lex->set_stmt_unsafe(LEX::BINLOG_STMT_UNSAFE_SKIP_LOCKED); + } ; select_item_list: @@ -11754,6 +11784,16 @@ table_primary_derived: if (!($$= Lex->parsed_derived_table($1->master_unit(), $2, $3))) MYSQL_YYABORT; } +/* Start SQL_MODE_ORACLE_SPECIFIC + | subquery + opt_for_system_time_clause + { + LEX_CSTRING alias; + if ($1->make_unique_derived_name(thd, &alias) || + !($$= Lex->parsed_derived_table($1->master_unit(), $2, &alias))) + MYSQL_YYABORT; + } +End SQL_MODE_ORACLE_SPECIFIC */ ; opt_outer: @@ -12840,7 +12880,7 @@ insert: } insert_start insert_lock_option opt_ignore opt_into insert_table { - Select->set_lock_for_tables($4, true); + Select->set_lock_for_tables($4, true, false); } insert_field_spec opt_insert_update opt_returning stmt_end @@ -12857,7 +12897,7 @@ replace: } insert_start replace_lock_option opt_into insert_table { - Select->set_lock_for_tables($4, true); + Select->set_lock_for_tables($4, true, false); } insert_field_spec opt_returning stmt_end @@ -13153,7 +13193,7 @@ update: be too pessimistic. We will decrease lock level if possible in mysql_multi_update(). */ - slex->set_lock_for_tables($3, slex->table_list.elements == 1); + slex->set_lock_for_tables($3, slex->table_list.elements == 1, false); } opt_where_clause opt_order_clause delete_limit_clause { @@ -15285,6 +15325,7 @@ keyword_table_alias: | keyword_verb_clause | FUNCTION_SYM | EXCEPTION_ORACLE_SYM + | IGNORED_SYM ; /* Keyword that we allow for identifiers (except SP labels) */ @@ -15301,6 +15342,7 @@ keyword_ident: | FUNCTION_SYM | WINDOW_SYM | EXCEPTION_ORACLE_SYM + | IGNORED_SYM ; keyword_sysvar_name: @@ -15315,6 +15357,7 @@ keyword_sysvar_name: | FUNCTION_SYM | WINDOW_SYM | EXCEPTION_ORACLE_SYM + | IGNORED_SYM ; keyword_set_usual_case: @@ -15329,6 +15372,7 @@ keyword_set_usual_case: | FUNCTION_SYM | WINDOW_SYM | EXCEPTION_ORACLE_SYM + | IGNORED_SYM ; non_reserved_keyword_udt: @@ -15660,6 +15704,7 @@ keyword_sp_var_and_label: | LESS_SYM | LEVEL_SYM | LIST_SYM + | LOCKED_SYM | LOCKS_SYM | LOGFILE_SYM | LOGS_SYM @@ -15788,6 +15833,7 @@ keyword_sp_var_and_label: | SETVAL_SYM | SIMPLE_SYM | SHARE_SYM + | SKIP_SYM | SLAVE_POS_SYM | SLOW | SNAPSHOT_SYM @@ -15945,6 +15991,7 @@ reserved_keyword_udt_not_param_type: | IF_SYM | IGNORE_DOMAIN_IDS_SYM | IGNORE_SYM + | IGNORED_SYM | INDEX_SYM | INFILE | INNER_SYM @@ -16668,7 +16715,7 @@ table_lock: table_ident opt_table_alias_clause lock_option { thr_lock_type lock_type= (thr_lock_type) $3; - bool lock_for_write= (lock_type >= TL_WRITE_ALLOW_WRITE); + bool lock_for_write= (lock_type >= TL_FIRST_WRITE); ulong table_options= lock_for_write ? TL_OPTION_UPDATING : 0; enum_mdl_type mdl_type= !lock_for_write ? MDL_SHARED_READ @@ -17840,6 +17887,7 @@ keyword_label: | keyword_sysvar_type | FUNCTION_SYM | EXCEPTION_ORACLE_SYM + | IGNORED_SYM ; keyword_sp_decl: @@ -17854,6 +17902,7 @@ keyword_sp_decl: | keyword_verb_clause | FUNCTION_SYM | WINDOW_SYM + | IGNORED_SYM ; opt_truncate_table_storage_clause: @@ -18253,6 +18302,7 @@ keyword_label: | FUNCTION_SYM | COMPRESSED_SYM | EXCEPTION_ORACLE_SYM + | IGNORED_SYM ; keyword_sp_decl: @@ -18263,6 +18313,7 @@ keyword_sp_decl: | keyword_sysvar_type | keyword_verb_clause | WINDOW_SYM + | IGNORED_SYM ; opt_truncate_table_storage_clause: diff --git a/sql/structs.h b/sql/structs.h index bcd38ffbdd6..df362f76f82 100644 --- a/sql/structs.h +++ b/sql/structs.h @@ -165,6 +165,10 @@ typedef struct st_key { double actual_rec_per_key(uint i); bool without_overlaps; + /* + TRUE if index needs to be ignored + */ + bool is_ignored; } KEY; @@ -173,6 +177,7 @@ struct st_join_table; typedef struct st_reginfo { /* Extra info about reg */ struct st_join_table *join_tab; /* Used by SELECT() */ enum thr_lock_type lock_type; /* How database is used */ + bool skip_locked; bool not_exists_optimize; /* TRUE <=> range optimizer found that there is no rows satisfying @@ -803,13 +808,14 @@ public: uint defined_lock:1; uint update_lock:1; uint defined_timeout:1; + uint skip_locked:1; }; ulong timeout; void empty() { - defined_lock= update_lock= defined_timeout= FALSE; + defined_lock= update_lock= defined_timeout= skip_locked= FALSE; timeout= 0; } void set_to(st_select_lex *sel); diff --git a/sql/sys_vars.cc b/sql/sys_vars.cc index a8e4ff2dded..ebec2a203aa 100644 --- a/sql/sys_vars.cc +++ b/sql/sys_vars.cc @@ -2457,7 +2457,7 @@ static Sys_var_ulong Sys_max_recursive_iterations( "max_recursive_iterations", "Maximum number of iterations when executing recursive queries", SESSION_VAR(max_recursive_iterations), CMD_LINE(OPT_ARG), - VALID_RANGE(0, UINT_MAX), DEFAULT(UINT_MAX), BLOCK_SIZE(1)); + VALID_RANGE(0, UINT_MAX), DEFAULT(1000), BLOCK_SIZE(1)); static Sys_var_ulong Sys_max_sort_length( "max_sort_length", @@ -5934,6 +5934,25 @@ static Sys_var_uint Sys_wsrep_sync_wait( NO_MUTEX_GUARD, NOT_IN_BINLOG, ON_CHECK(0), ON_UPDATE(wsrep_sync_wait_update)); +static const char *wsrep_mode_names[]= +{ + "STRICT_REPLICATION", + "BINLOG_ROW_FORMAT_ONLY", + "REQUIRED_PRIMARY_KEY", + "REPLICATE_MYISAM", + "REPLICATE_ARIA", + "DISALLOW_LOCAL_GTID", + NullS +}; +static Sys_var_set Sys_wsrep_mode( + "wsrep_mode", + "Set of WSREP features that are enabled.", + GLOBAL_VAR(wsrep_mode), CMD_LINE(REQUIRED_ARG), + wsrep_mode_names, + DEFAULT(0), + NO_MUTEX_GUARD, NOT_IN_BINLOG, + ON_CHECK(wsrep_mode_check)); + static const char *wsrep_OSU_method_names[]= { "TOI", "RSU", NullS }; static Sys_var_enum Sys_wsrep_OSU_method( "wsrep_OSU_method", "Method for Online Schema Upgrade", @@ -5950,12 +5969,14 @@ static Sys_var_mybool Sys_wsrep_desync ( ON_UPDATE(wsrep_desync_update)); static Sys_var_mybool Sys_wsrep_strict_ddl ( - "wsrep_strict_ddl", "If set, reject DDL on affected tables not supporting Galera replication", + "wsrep_strict_ddl", + "If set, reject DDL on affected tables not supporting Galera replication", GLOBAL_VAR(wsrep_strict_ddl), CMD_LINE(OPT_ARG), DEFAULT(FALSE), NO_MUTEX_GUARD, NOT_IN_BINLOG, ON_CHECK(0), - ON_UPDATE(0)); + ON_UPDATE(wsrep_strict_ddl_update), + DEPRECATED("'@@wsrep_mode=STRICT_REPLICATION'")); // since 10.6.0 static const char *wsrep_reject_queries_names[]= { "NONE", "ALL", "ALL_KILL", NullS }; static Sys_var_enum Sys_wsrep_reject_queries( @@ -5979,7 +6000,10 @@ static Sys_var_mybool Sys_wsrep_recover_datadir( static Sys_var_mybool Sys_wsrep_replicate_myisam( "wsrep_replicate_myisam", "To enable myisam replication", - GLOBAL_VAR(wsrep_replicate_myisam), CMD_LINE(OPT_ARG), DEFAULT(FALSE)); + GLOBAL_VAR(wsrep_replicate_myisam), CMD_LINE(OPT_ARG), DEFAULT(FALSE), + NO_MUTEX_GUARD, NOT_IN_BINLOG, ON_CHECK(0), + ON_UPDATE(wsrep_replicate_myisam_update), + DEPRECATED("'@@wsrep_mode=REPLICATE_MYISAM'")); // since 10.6.0 static Sys_var_mybool Sys_wsrep_log_conflicts( "wsrep_log_conflicts", "To log multi-master conflicts", diff --git a/sql/table.cc b/sql/table.cc index 271460bad52..26dadba9ebc 100644 --- a/sql/table.cc +++ b/sql/table.cc @@ -72,6 +72,7 @@ struct extra2_fields LEX_CUSTRING application_period; LEX_CUSTRING field_data_type_info; LEX_CUSTRING without_overlaps; + LEX_CUSTRING index_flags; void reset() { bzero((void*)this, sizeof(*this)); } }; @@ -1423,6 +1424,35 @@ void TABLE_SHARE::set_overlapped_keys() } +/* + @brief + Set of indexes that are marked as IGNORE. +*/ + +void TABLE_SHARE::set_ignored_indexes() +{ + KEY *keyinfo= key_info; + for (uint i= 0; i < keys; i++, keyinfo++) + { + if (keyinfo->is_ignored) + ignored_indexes.set_bit(i); + } +} + + +/* + @brief + Set of indexes that the optimizer may use when creating an execution plan. +*/ + +key_map TABLE_SHARE::usable_indexes(THD *thd) +{ + key_map usable_indexes(keys_in_use); + usable_indexes.subtract(ignored_indexes); + return usable_indexes; +} + + bool Item_field::check_index_dependence(void *arg) { TABLE *table= (TABLE *)arg; @@ -1590,6 +1620,9 @@ bool read_extra2(const uchar *frm_image, size_t len, extra2_fields *fields) case EXTRA2_FIELD_DATA_TYPE_INFO: fail= read_extra2_section_once(extra2, length, &fields->field_data_type_info); break; + case EXTRA2_INDEX_FLAGS: + fail= read_extra2_section_once(extra2, length, &fields->index_flags); + break; default: /* abort frm parsing if it's an unknown but important extra2 value */ if (type >= EXTRA2_ENGINE_IMPORTANT) @@ -1748,6 +1781,7 @@ int TABLE_SHARE::init_from_binary_frm_image(THD *thd, bool write, MEM_ROOT *old_root= thd->mem_root; Virtual_column_info **table_check_constraints; extra2_fields extra2; + bool extra_index_flags_present= FALSE; DBUG_ENTER("TABLE_SHARE::init_from_binary_frm_image"); keyinfo= &first_keyinfo; @@ -1902,9 +1936,13 @@ int TABLE_SHARE::init_from_binary_frm_image(THD *thd, bool write, share->key_parts= key_parts= disk_buff[1]; } share->keys_for_keyread.init(0); + share->ignored_indexes.init(0); share->keys_in_use.init(keys); ext_key_parts= key_parts; + if (extra2.index_flags.str && extra2.index_flags.length != keys) + goto err; + len= (uint) uint2korr(disk_buff+4); share->reclength = uint2korr(frm_image+16); @@ -2100,9 +2138,26 @@ int TABLE_SHARE::init_from_binary_frm_image(THD *thd, bool write, } share->key_block_size= uint2korr(frm_image+62); keyinfo= share->key_info; + + + if (extra2.index_flags.str) + extra_index_flags_present= TRUE; + for (uint i= 0; i < share->keys; i++, keyinfo++) + { + if (extra_index_flags_present) + { + uchar flags= *extra2.index_flags.str++; + keyinfo->is_ignored= (flags & EXTRA2_IGNORED_KEY); + } + else + keyinfo->is_ignored= FALSE; + if (keyinfo->algorithm == HA_KEY_ALG_LONG_HASH) hash_fields++; + } + + share->set_ignored_indexes(); #ifdef WITH_PARTITION_STORAGE_ENGINE if (par_image && plugin_data(se_plugin, handlerton*) == partition_hton) @@ -2780,6 +2835,33 @@ int TABLE_SHARE::init_from_binary_frm_image(THD *thd, bool write, } } + /* + Make sure that the primary key is not marked as IGNORE + This can happen in the case + 1) when IGNORE is mentioned in the Key specification + 2) When a unique NON-NULLABLE key is promted to a primary key. + The unqiue key could have been marked as IGNORE when there + was a primary key in the table. + + Eg: + CREATE TABLE t1(a INT NOT NULL, primary key(a), UNIQUE key1(a)) + so for this table when we try to IGNORE key1 + then we run: + ALTER TABLE t1 ALTER INDEX key1 IGNORE + this runs successsfully and key1 is marked as IGNORE. + + But lets say then we drop the primary key + ALTER TABLE t1 DROP PRIMARY + then the UNIQUE key will be promoted to become the primary key + but then the UNIQUE key cannot be marked as IGNORE, so an + error is thrown + */ + if (primary_key != MAX_KEY && keyinfo && keyinfo->is_ignored) + { + my_error(ER_PK_INDEX_CANT_BE_IGNORED, MYF(0)); + goto err; + } + if (share->use_ext_keys) { if (primary_key >= MAX_KEY) @@ -3534,7 +3616,7 @@ bool fix_session_vcol_expr_for_read(THD *thd, Field *field, { DBUG_ENTER("fix_session_vcol_expr_for_read"); TABLE_LIST *tl= field->table->pos_in_table_list; - if (!tl || tl->lock_type >= TL_WRITE_ALLOW_WRITE) + if (!tl || tl->lock_type >= TL_FIRST_WRITE) DBUG_RETURN(0); Security_context *save_security_ctx= thd->security_ctx; if (tl->security_ctx) @@ -3962,6 +4044,7 @@ enum open_frm_error open_table_from_share(THD *thd, TABLE_SHARE *share, } outparam->reginfo.lock_type= TL_UNLOCK; + outparam->reginfo.skip_locked= false; outparam->current_lock= F_UNLCK; records=0; if ((db_stat & HA_OPEN_KEYFILE) || (prgflag & DELAYED_OPEN)) @@ -5410,6 +5493,7 @@ void TABLE::init(THD *thd, TABLE_LIST *tl) reginfo.impossible_range= 0; reginfo.join_tab= NULL; reginfo.not_exists_optimize= FALSE; + reginfo.skip_locked= false; created= TRUE; cond_selectivity= 1.0; cond_selectivity_sampling_explain= NULL; @@ -8248,7 +8332,7 @@ bool TABLE_LIST::process_index_hints(TABLE *tbl) { /* initialize the result variables */ tbl->keys_in_use_for_query= tbl->keys_in_use_for_group_by= - tbl->keys_in_use_for_order_by= tbl->s->keys_in_use; + tbl->keys_in_use_for_order_by= tbl->s->usable_indexes(tbl->in_use); /* index hint list processing */ if (index_hints) @@ -8302,7 +8386,8 @@ bool TABLE_LIST::process_index_hints(TABLE *tbl) */ if (tbl->s->keynames.type_names == 0 || (pos= find_type(&tbl->s->keynames, hint->key_name.str, - hint->key_name.length, 1)) <= 0) + hint->key_name.length, 1)) <= 0 || + (tbl->s->key_info[pos - 1].is_ignored)) { my_error(ER_KEY_DOES_NOT_EXISTS, MYF(0), hint->key_name.str, alias.str); return 1; @@ -8411,7 +8496,7 @@ void init_mdl_requests(TABLE_LIST *table_list) for ( ; table_list ; table_list= table_list->next_global) MDL_REQUEST_INIT(&table_list->mdl_request, MDL_key::TABLE, table_list->db.str, table_list->table_name.str, - table_list->lock_type >= TL_WRITE_ALLOW_WRITE + table_list->lock_type >= TL_FIRST_WRITE ? MDL_SHARED_WRITE : MDL_SHARED_READ, MDL_TRANSACTION); } diff --git a/sql/table.h b/sql/table.h index 58789dc3826..f0499ee63fc 100644 --- a/sql/table.h +++ b/sql/table.h @@ -764,6 +764,10 @@ struct TABLE_SHARE Excludes keys disabled by ALTER TABLE ... DISABLE KEYS. */ key_map keys_in_use; + + /* The set of ignored indexes for a table. */ + key_map ignored_indexes; + key_map keys_for_keyread; ha_rows min_rows, max_rows; /* create information */ ulong avg_row_length; /* create information */ @@ -1137,7 +1141,7 @@ struct TABLE_SHARE bool write_frm_image(const uchar *frm_image, size_t frm_length); bool write_par_image(const uchar *par_image, size_t par_length); - /* Only used by tokudb */ + /* Only used by S3 */ bool write_frm_image(void) { return frm_image ? write_frm_image(frm_image->str, frm_image->length) : 0; } @@ -1151,6 +1155,8 @@ struct TABLE_SHARE void free_frm_image(const uchar *frm); void set_overlapped_keys(); + void set_ignored_indexes(); + key_map usable_indexes(THD *thd); }; /* not NULL, but cannot be dereferenced */ @@ -2129,7 +2135,7 @@ struct TABLE_LIST enum thr_lock_type lock_type_arg) { enum enum_mdl_type mdl_type; - if (lock_type_arg >= TL_WRITE_ALLOW_WRITE) + if (lock_type_arg >= TL_FIRST_WRITE) mdl_type= MDL_SHARED_WRITE; else if (lock_type_arg == TL_READ_NO_INSERT) mdl_type= MDL_SHARED_NO_WRITE; @@ -2144,7 +2150,7 @@ struct TABLE_LIST table_name= *table_name_arg; alias= (alias_arg ? *alias_arg : *table_name_arg); lock_type= lock_type_arg; - updating= lock_type >= TL_WRITE_ALLOW_WRITE; + updating= lock_type >= TL_FIRST_WRITE; MDL_REQUEST_INIT(&mdl_request, MDL_key::TABLE, db.str, table_name.str, mdl_type, MDL_TRANSACTION); } @@ -2178,7 +2184,7 @@ struct TABLE_LIST belong_to_view= belong_to_view_arg; trg_event_map= trg_event_map_arg; /* MDL is enough for read-only FK checks, we don't need the table */ - if (prelocking_type == PRELOCK_FK && lock_type < TL_WRITE_ALLOW_WRITE) + if (prelocking_type == PRELOCK_FK && lock_type < TL_FIRST_WRITE) open_strategy= OPEN_STUB; **last_ptr= this; @@ -2450,7 +2456,8 @@ struct TABLE_LIST bool updating; /* for replicate-do/ignore table */ bool force_index; /* prefer index over table scan */ bool ignore_leaves; /* preload only non-leaf nodes */ - bool crashed; /* Table was found crashed */ + bool crashed; /* Table was found crashed */ + bool skip_locked; /* Skip locked in view defination */ table_map dep_tables; /* tables the table depends on */ table_map on_expr_dep_tables; /* tables on expression depends on */ struct st_nested_join *nested_join; /* if the element is a nested join */ @@ -2781,6 +2788,7 @@ struct TABLE_LIST */ const char *get_table_name() const { return view != NULL ? view_name.str : table_name.str; } bool is_active_sjm(); + bool is_sjm_scan_table(); bool is_jtbm() { return MY_TEST(jtbm_subselect != NULL); } st_select_lex_unit *get_unit(); st_select_lex *get_single_select(); diff --git a/sql/threadpool.h b/sql/threadpool.h index 27da872c5cc..7737d056b4a 100644 --- a/sql/threadpool.h +++ b/sql/threadpool.h @@ -37,6 +37,8 @@ extern uint threadpool_mode; /* Thread pool implementation , windows or generic #define DEFAULT_THREADPOOL_STALL_LIMIT 500U struct TP_connection; +struct st_vio; + extern void tp_callback(TP_connection *c); extern void tp_timeout_handler(TP_connection *c); @@ -113,7 +115,7 @@ struct TP_connection virtual void wait_begin(int type)= 0; virtual void wait_end() = 0; - + IF_WIN(virtual,) void init_vio(st_vio *){}; }; @@ -131,9 +133,11 @@ struct TP_pool virtual int set_stall_limit(uint){ return 0; } virtual int get_thread_count() { return tp_stats.num_worker_threads; } virtual int get_idle_thread_count(){ return 0; } + virtual void resume(TP_connection* c)=0; }; #ifdef _WIN32 + struct TP_pool_win:TP_pool { TP_pool_win(); @@ -143,6 +147,7 @@ struct TP_pool_win:TP_pool virtual void add(TP_connection *); virtual int set_max_threads(uint); virtual int set_min_threads(uint); + void resume(TP_connection *c); }; #endif @@ -156,6 +161,7 @@ struct TP_pool_generic :TP_pool virtual int set_pool_size(uint); virtual int set_stall_limit(uint); virtual int get_idle_thread_count(); + void resume(TP_connection* c); }; #endif /* HAVE_POOL_OF_THREADS */ diff --git a/sql/threadpool_common.cc b/sql/threadpool_common.cc index e8eb0dcc29d..07555ac21ed 100644 --- a/sql/threadpool_common.cc +++ b/sql/threadpool_common.cc @@ -23,11 +23,17 @@ #include <sql_audit.h> #include <debug_sync.h> #include <threadpool.h> +#include <sql_class.h> +#include <sql_parse.h> #ifdef WITH_WSREP #include "wsrep_trans_observer.h" #endif /* WITH_WSREP */ +#ifdef _WIN32 +#include "threadpool_winsockets.h" +#endif + /* Threadpool parameters */ uint threadpool_min_threads; @@ -47,8 +53,8 @@ TP_STATISTICS tp_stats; static void threadpool_remove_connection(THD *thd); -static int threadpool_process_request(THD *thd); -static THD* threadpool_add_connection(CONNECT *connect, void *scheduler_data); +static dispatch_command_return threadpool_process_request(THD *thd); +static THD* threadpool_add_connection(CONNECT *connect, TP_connection *c); extern bool do_command(THD*); @@ -191,10 +197,30 @@ void tp_callback(TP_connection *c) } c->connect= 0; } - else if (threadpool_process_request(thd)) + else { - /* QUIT or an error occurred. */ - goto error; +retry: + switch(threadpool_process_request(thd)) + { + case DISPATCH_COMMAND_WOULDBLOCK: + if (!thd->async_state.try_suspend()) + { + /* + All async operations finished meanwhile, thus nobody is will wake up + this THD. Therefore, we'll resume "manually" here. + */ + thd->async_state.m_state = thd_async_state::enum_async_state::RESUMED; + goto retry; + } + worker_context.restore(); + return; + case DISPATCH_COMMAND_CLOSE_CONNECTION: + /* QUIT or an error occurred. */ + goto error; + case DISPATCH_COMMAND_SUCCESS: + break; + } + thd->async_state.m_state= thd_async_state::enum_async_state::NONE; } /* Set priority */ @@ -220,7 +246,7 @@ error: } -static THD* threadpool_add_connection(CONNECT *connect, void *scheduler_data) +static THD *threadpool_add_connection(CONNECT *connect, TP_connection *c) { THD *thd= NULL; @@ -243,11 +269,10 @@ static THD* threadpool_add_connection(CONNECT *connect, void *scheduler_data) } delete connect; - thd->event_scheduler.data = scheduler_data; + thd->event_scheduler.data= c; server_threads.insert(thd); thd->set_mysys_var(mysys_var); - /* Login. */ thread_attach(thd); re_init_net_server_extension(thd); @@ -261,6 +286,8 @@ static THD* threadpool_add_connection(CONNECT *connect, void *scheduler_data) if (thd_prepare_connection(thd)) goto end; + c->init_vio(thd->net.vio); + /* Check if THD is ok, as prepare_new_connection_state() can fail, for example if init command failed. @@ -326,10 +353,13 @@ static bool has_unread_data(THD* thd) /** Process a single client request or a single batch. */ -static int threadpool_process_request(THD *thd) +static dispatch_command_return threadpool_process_request(THD *thd) { - int retval= 0; + dispatch_command_return retval= DISPATCH_COMMAND_SUCCESS; + thread_attach(thd); + if(thd->async_state.m_state == thd_async_state::enum_async_state::RESUMED) + goto resume; if (thd->killed >= KILL_CONNECTION) { @@ -337,7 +367,7 @@ static int threadpool_process_request(THD *thd) killed flag was set by timeout handler or KILL command. Return error. */ - retval= 1; + retval= DISPATCH_COMMAND_CLOSE_CONNECTION; if(thd->killed == KILL_WAIT_TIMEOUT) handle_wait_timeout(thd); goto end; @@ -360,19 +390,27 @@ static int threadpool_process_request(THD *thd) if (mysql_audit_release_required(thd)) mysql_audit_release(thd); - if ((retval= do_command(thd)) != 0) - goto end; +resume: + retval= do_command(thd, false); + switch(retval) + { + case DISPATCH_COMMAND_WOULDBLOCK: + case DISPATCH_COMMAND_CLOSE_CONNECTION: + goto end; + case DISPATCH_COMMAND_SUCCESS: + break; + } if (!thd_is_connection_alive(thd)) { - retval= 1; + retval=DISPATCH_COMMAND_CLOSE_CONNECTION; goto end; } set_thd_idle(thd); if (!has_unread_data(thd)) - { + { /* More info on this debug sync is in sql_parse.cc*/ DEBUG_SYNC(thd, "before_do_command_net_read"); goto end; @@ -405,6 +443,9 @@ static bool tp_init() pool= 0; return true; } +#ifdef _WIN32 + init_win_aio_buffers(max_connections); +#endif return false; } @@ -506,6 +547,9 @@ static void tp_wait_end(THD *thd) static void tp_end() { delete pool; +#ifdef _WIN32 + destroy_win_aio_buffers(); +#endif } static void tp_post_kill_notification(THD *thd) @@ -516,6 +560,15 @@ static void tp_post_kill_notification(THD *thd) post_kill_notification(thd); } +/* Resume previously suspended THD */ +static void tp_resume(THD* thd) +{ + DBUG_ASSERT(thd->async_state.m_state == thd_async_state::enum_async_state::SUSPENDED); + thd->async_state.m_state = thd_async_state::enum_async_state::RESUMED; + TP_connection* c = get_TP_connection(thd); + pool->resume(c); +} + static scheduler_functions tp_scheduler_functions= { 0, // max_threads @@ -526,7 +579,8 @@ static scheduler_functions tp_scheduler_functions= tp_wait_begin, // thd_wait_begin tp_wait_end, // thd_wait_end tp_post_kill_notification, // post kill notification - tp_end // end + tp_end, // end + tp_resume }; void pool_of_threads_scheduler(struct scheduler_functions *func, diff --git a/sql/threadpool_generic.cc b/sql/threadpool_generic.cc index b6bb47e8f29..19193be0354 100644 --- a/sql/threadpool_generic.cc +++ b/sql/threadpool_generic.cc @@ -29,8 +29,8 @@ #include <sql_plist.h> #include <threadpool.h> #include <algorithm> - -#ifdef HAVE_IOCP +#ifdef _WIN32 +#include "threadpool_winsockets.h" #define OPTIONAL_IO_POLL_READ_PARAM this #else #define OPTIONAL_IO_POLL_READ_PARAM 0 @@ -347,7 +347,7 @@ static void* native_event_get_userdata(native_event *event) return event->portev_user; } -#elif defined(HAVE_IOCP) +#elif defined(_WIN32) static TP_file_handle io_poll_create() @@ -358,29 +358,8 @@ static TP_file_handle io_poll_create() int io_poll_start_read(TP_file_handle pollfd, TP_file_handle fd, void *, void *opt) { - static char c; - TP_connection_generic *con= (TP_connection_generic *)opt; - OVERLAPPED *overlapped= &con->overlapped; - if (con->vio_type == VIO_TYPE_NAMEDPIPE) - { - if (ReadFile(fd, &c, 0, NULL, overlapped)) - return 0; - } - else - { - WSABUF buf; - buf.buf= &c; - buf.len= 0; - DWORD flags=0; - - if (WSARecv((SOCKET)fd, &buf, 1,NULL, &flags,overlapped, NULL) == 0) - return 0; - } - - if (GetLastError() == ERROR_IO_PENDING) - return 0; - - return 1; + auto c= (TP_connection_generic *) opt; + return (int) c->win_sock.begin_read(); } @@ -429,20 +408,33 @@ int io_poll_disassociate_fd(TP_file_handle pollfd, TP_file_handle fd) } -int io_poll_wait(TP_file_handle pollfd, native_event *events, int maxevents, int timeout_ms) +static void *native_event_get_userdata(native_event *event) { - ULONG n; - BOOL ok = GetQueuedCompletionStatusEx(pollfd, events, - maxevents, &n, timeout_ms, FALSE); - - return ok ? (int)n : -1; + return (void *) event->lpCompletionKey; } - -static void* native_event_get_userdata(native_event *event) +int io_poll_wait(TP_file_handle pollfd, native_event *events, int maxevents, + int timeout_ms) { - return (void *)event->lpCompletionKey; + ULONG n; + if (!GetQueuedCompletionStatusEx(pollfd, events, maxevents, &n, timeout_ms, FALSE)) + return -1; + + /* Update win_sock with number of bytes read.*/ + for (ULONG i= 0; i < n; i++) + { + auto ev= &events[i]; + auto c= (TP_connection_generic *) native_event_get_userdata(ev); + /* null userdata zero means shutdown (see PostQueuedCompletionStatus() usage*/ + if (c) + { + c->win_sock.end_read(ev->dwNumberOfBytesTransferred, 0); + } + } + + return (int) n; } + #endif @@ -1005,7 +997,7 @@ void thread_group_destroy(thread_group_t *thread_group) io_poll_close(thread_group->pollfd); thread_group->pollfd= INVALID_HANDLE_VALUE; } -#ifndef HAVE_IOCP +#ifndef _WIN32 for(int i=0; i < 2; i++) { if(thread_group->shutdown_pipe[i] != -1) @@ -1052,7 +1044,7 @@ static int wake_thread(thread_group_t *thread_group,bool due_to_stall) */ static int wake_listener(thread_group_t *thread_group) { -#ifndef HAVE_IOCP +#ifndef _WIN32 if (pipe(thread_group->shutdown_pipe)) { return -1; @@ -1335,7 +1327,10 @@ void TP_pool_generic::add(TP_connection *c) DBUG_VOID_RETURN; } - +void TP_pool_generic::resume(TP_connection* c) +{ + add(c); +} /** MySQL scheduler callback: wait begin @@ -1398,12 +1393,6 @@ TP_connection_generic::TP_connection_generic(CONNECT *c): bound_to_poll_descriptor(false), waiting(false), fix_group(false) -#ifdef HAVE_IOCP -, overlapped() -#endif -#ifdef _WIN32 -, vio_type(c->vio_type) -#endif { DBUG_ASSERT(c->vio_type != VIO_CLOSED); diff --git a/sql/threadpool_generic.h b/sql/threadpool_generic.h index acf5ec6978b..b7a35b7cbf0 100644 --- a/sql/threadpool_generic.h +++ b/sql/threadpool_generic.h @@ -23,6 +23,7 @@ #ifdef _WIN32 #include <windows.h> +#include "threadpool_winsockets.h" /* AIX may define this, too ?*/ #define HAVE_IOCP #endif @@ -75,11 +76,11 @@ struct TP_connection_generic :public TP_connection TP_connection_generic(CONNECT* c); ~TP_connection_generic(); - virtual int init() { return 0; }; - virtual void set_io_timeout(int sec); - virtual int start_io(); - virtual void wait_begin(int type); - virtual void wait_end(); + int init() override { return 0; } + void set_io_timeout(int sec) override; + int start_io() override; + void wait_begin(int type) override; + void wait_end() override; thread_group_t* thread_group; TP_connection_generic* next_in_queue; @@ -90,12 +91,12 @@ struct TP_connection_generic :public TP_connection bool bound_to_poll_descriptor; int waiting; bool fix_group; -#ifdef HAVE_IOCP - OVERLAPPED overlapped; -#endif #ifdef _WIN32 - enum_vio_type vio_type; + win_aiosocket win_sock{}; + void init_vio(st_vio *vio) override + { win_sock.init(vio);} #endif + }; diff --git a/sql/threadpool_win.cc b/sql/threadpool_win.cc index 6003b06bc7b..ed68e31c755 100644 --- a/sql/threadpool_win.cc +++ b/sql/threadpool_win.cc @@ -30,6 +30,9 @@ #include <debug_sync.h> #include <threadpool.h> #include <windows.h> +#include <set_var.h> + +#include "threadpool_winsockets.h" /* Log a warning */ static void tp_log_warning(const char *msg, const char *fct) @@ -43,8 +46,6 @@ static PTP_POOL pool; static TP_CALLBACK_ENVIRON callback_environ; static DWORD fls; -static bool skip_completion_port_on_success = false; - PTP_CALLBACK_ENVIRON get_threadpool_win_callback_environ() { return pool? &callback_environ: 0; @@ -83,22 +84,21 @@ struct TP_connection_win:public TP_connection public: TP_connection_win(CONNECT*); ~TP_connection_win(); - virtual int init(); - virtual int start_io(); - virtual void set_io_timeout(int sec); - virtual void wait_begin(int type); - virtual void wait_end(); - - ulonglong timeout; - enum_vio_type vio_type; - HANDLE handle; - OVERLAPPED overlapped; - PTP_CALLBACK_INSTANCE callback_instance; - PTP_IO io; - PTP_TIMER timer; - PTP_WORK work; - bool long_callback; - + int init() override; + void init_vio(st_vio *vio) override; + int start_io() override; + void set_io_timeout(int sec) override; + void wait_begin(int type) override; + void wait_end() override; + + ulonglong timeout=ULLONG_MAX; + OVERLAPPED overlapped{}; + PTP_CALLBACK_INSTANCE callback_instance{}; + PTP_IO io{}; + PTP_TIMER timer{}; + PTP_WORK work{}; + bool long_callback{}; + win_aiosocket sock; }; struct TP_connection *new_TP_connection(CONNECT *connect) @@ -125,120 +125,56 @@ void TP_pool_win::add(TP_connection *c) } } - -TP_connection_win::TP_connection_win(CONNECT *c) : - TP_connection(c), - timeout(ULONGLONG_MAX), - callback_instance(0), - io(0), - timer(0), - work(0) +void TP_pool_win::resume(TP_connection* c) { + DBUG_ASSERT(c->state == TP_STATE_RUNNING); + SubmitThreadpoolWork(((TP_connection_win*)c)->work); } -#define CHECK_ALLOC_ERROR(op) if (!(op)) {tp_log_warning("Allocation failed", #op); DBUG_ASSERT(0); return -1; } +#define CHECK_ALLOC_ERROR(op) \ + do \ + { \ + if (!(op)) \ + { \ + tp_log_warning("Allocation failed", #op); \ + } \ + } while (0) -int TP_connection_win::init() +TP_connection_win::TP_connection_win(CONNECT *c) : + TP_connection(c) { - - memset(&overlapped, 0, sizeof(OVERLAPPED)); - switch ((vio_type = connect->vio_type)) - { - case VIO_TYPE_SSL: - case VIO_TYPE_TCPIP: - handle= (HANDLE) mysql_socket_getfd(connect->sock); - break; - case VIO_TYPE_NAMEDPIPE: - handle= connect->pipe; - break; - default: - abort(); - } - - - /* Performance tweaks (s. MSDN documentation)*/ - UCHAR flags= FILE_SKIP_SET_EVENT_ON_HANDLE; - if (skip_completion_port_on_success) - { - flags |= FILE_SKIP_COMPLETION_PORT_ON_SUCCESS; - } - (void)SetFileCompletionNotificationModes(handle, flags); /* Assign io completion callback */ - CHECK_ALLOC_ERROR(io= CreateThreadpoolIo(handle, io_completion_callback, this, &callback_environ)); - CHECK_ALLOC_ERROR(timer= CreateThreadpoolTimer(timer_callback, this, &callback_environ)); + HANDLE h= c->vio_type == VIO_TYPE_NAMEDPIPE ? c->pipe + : (HANDLE)mysql_socket_getfd(c->sock); + + CHECK_ALLOC_ERROR(io=CreateThreadpoolIo(h, io_completion_callback, this, &callback_environ)); + CHECK_ALLOC_ERROR(timer= CreateThreadpoolTimer(timer_callback, this, &callback_environ)); CHECK_ALLOC_ERROR(work= CreateThreadpoolWork(work_callback, this, &callback_environ)); - return 0; } +int TP_connection_win::init() +{ + return !io || !timer || !work ; +} + +void TP_connection_win::init_vio(st_vio* vio) +{ + sock.init(vio); +} /* Start asynchronous read */ int TP_connection_win::start_io() { - DWORD num_bytes = 0; - static char c; - WSABUF buf; - buf.buf= &c; - buf.len= 0; - DWORD flags=0; - DWORD last_error= 0; - - int retval; StartThreadpoolIo(io); - - if (vio_type == VIO_TYPE_TCPIP || vio_type == VIO_TYPE_SSL) - { - /* Start async io (sockets). */ - if (WSARecv((SOCKET)handle , &buf, 1, &num_bytes, &flags, - &overlapped, NULL) == 0) - { - retval= last_error= 0; - } - else - { - retval= -1; - last_error= WSAGetLastError(); - } - } - else - { - /* Start async io (named pipe) */ - if (ReadFile(handle, &c, 0, &num_bytes,&overlapped)) - { - retval= last_error= 0; - } - else - { - retval= -1; - last_error= GetLastError(); - } - } - - if (retval == 0 || last_error == ERROR_MORE_DATA) + if (sock.begin_read()) { - /* - IO successfully finished (synchronously). - If skip_completion_port_on_success is set, we need to handle it right - here, because completion callback would not be executed by the pool. - */ - if (skip_completion_port_on_success) - { - CancelThreadpoolIo(io); - io_completion_callback(callback_instance, this, &overlapped, last_error, - num_bytes, io); - } - return 0; - } - - if (last_error == ERROR_IO_PENDING) - { - return 0; + /* Some error occurred */ + CancelThreadpoolIo(io); + return -1; } - - /* Some error occurred */ - CancelThreadpoolIo(io); - return -1; + return 0; } /* @@ -305,7 +241,7 @@ void tp_win_callback_prolog() { /* Running in new worker thread*/ FlsSetValue(fls, (void *)1); - statistic_increment(thread_created, &LOCK_status); + thread_created++; tp_stats.num_worker_threads++; my_thread_init(); } @@ -350,6 +286,10 @@ static VOID CALLBACK io_completion_callback(PTP_CALLBACK_INSTANCE instance, PVOID context, PVOID overlapped, ULONG io_result, ULONG_PTR nbytes, PTP_IO io) { TP_connection_win *c= (TP_connection_win *)context; + + /* How many bytes were preread into read buffer */ + c->sock.end_read((ULONG)nbytes, io_result); + /* Execute high priority connections immediately. 'Yield' in case of low priority connections, i.e SubmitThreadpoolWork (with the same callback) @@ -412,12 +352,24 @@ int TP_pool_win::init() InitializeThreadpoolEnvironment(&callback_environ); SetThreadpoolCallbackPool(&callback_environ, pool); - if (threadpool_max_threads) + if (IS_SYSVAR_AUTOSIZE(&threadpool_max_threads)) + { + /* + Nr 500 comes from Microsoft documentation, + there is no API for GetThreadpoolThreadMaxThreads() + */ + SYSVAR_AUTOSIZE(threadpool_max_threads,500); + } + else { SetThreadpoolThreadMaximum(pool, threadpool_max_threads); } - if (threadpool_min_threads) + if (IS_SYSVAR_AUTOSIZE(&threadpool_min_threads)) + { + SYSVAR_AUTOSIZE(threadpool_min_threads,1); + } + else { if (!SetThreadpoolThreadMinimum(pool, threadpool_min_threads)) { @@ -426,6 +378,18 @@ int TP_pool_win::init() } } + + if (IS_SYSVAR_AUTOSIZE(&global_system_variables.threadpool_priority)) + { + /* + There is a notable overhead for "auto" priority implementation, + use "high" which handles socket IO callbacks as they come + without rescheduling to work queue. + */ + SYSVAR_AUTOSIZE(global_system_variables.threadpool_priority, + TP_PRIORITY_HIGH); + } + TP_POOL_STACK_INFORMATION stackinfo; stackinfo.StackCommit = 0; stackinfo.StackReserve = (SIZE_T)my_thread_stack_size; @@ -480,3 +444,4 @@ TP_connection *TP_pool_win::new_connection(CONNECT *connect) } return c; } + diff --git a/sql/threadpool_winsockets.cc b/sql/threadpool_winsockets.cc new file mode 100644 index 00000000000..6b4758a451f --- /dev/null +++ b/sql/threadpool_winsockets.cc @@ -0,0 +1,259 @@ +/* Copyright (C) 2012 Monty Program Ab + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA + */ + +#include <winsock2.h> +#include <my_global.h> +#include <violite.h> +#include "threadpool_winsockets.h" +#include <algorithm> +#include <vector> +#include <mutex> + +/* + A cache for IO buffers for asynchronous socket(or named pipe) reads. + + Considerations on Windows : since Windows locks the AIO buffers in physical memory, + it is important that these buffers are compactly allocated. + We try to to prevent any kinds of memory fragmentation + + A relatively small region (at most 1MB) is allocated, for equally sized smallish(256 bytes) + This allow buffers. The region is pagesize-aligned (via VirtualAlloc allocation) + + We use smallish IO buffers, 256 bytes is probably large enough for most of + the queries. Larger buffers could have funny effects(thread hogginng) + on threadpool scheduling in case client is using protocol pipelining. + + Also note, that even in an unlikely situation where cache runs out of buffers, + this does not lead to errors, zero szed reads will be used in WSARecv then. +*/ + +constexpr size_t READ_BUFSIZ= 256; +class AIO_buffer_cache +{ + const size_t ITEM_SIZE= READ_BUFSIZ; + + /** Limit the whole cache to 1MB*/ + const size_t MAX_SIZE= 1048576; + + /* Allocation base */ + char *m_base= 0; + + /* "Free list" with LIFO policy */ + std::vector<char *> m_cache; + std::mutex m_mtx; + size_t m_elements=0; + +public: + void set_size(size_t n_items); + char *acquire_buffer(); + void release_buffer(char *v); + void clear(); + ~AIO_buffer_cache(); +}; + + +void AIO_buffer_cache::set_size(size_t n_items) +{ + DBUG_ASSERT(!m_base); + m_elements= std::min(n_items, MAX_SIZE / ITEM_SIZE); + auto sz= m_elements * ITEM_SIZE; + + m_base= + (char *) VirtualAlloc(0, sz, MEM_COMMIT | MEM_RESERVE, PAGE_READWRITE); + if (!m_base) + { + m_elements= 0; + return; + } + + /* Try to help memory manager here, by prelocking region in memory*/ + (void) VirtualLock(m_base, sz); + + m_cache.reserve(m_elements); + for (ssize_t i= m_elements - 1; i >= 0 ; i--) + m_cache.push_back(m_base + i * ITEM_SIZE); +} + +/* + Returns a buffer, or NULL if no free buffers. + + LIFO policy is implemented, so we do not touch too many + pages (no std::stack though) +*/ +char *AIO_buffer_cache::acquire_buffer() +{ + std::unique_lock<std::mutex> lk(m_mtx); + if (m_cache.empty()) + return nullptr; + auto p= m_cache.back(); + m_cache.pop_back(); + return p; +} + +void AIO_buffer_cache::release_buffer(char *v) +{ + std::unique_lock<std::mutex> lk(m_mtx); + m_cache.push_back(v); +} + +void AIO_buffer_cache::clear() +{ + if (!m_base) + return; + + /* Check that all items are returned to the cache. */ + DBUG_ASSERT(m_cache.size() == m_elements); + VirtualFree(m_base, 0, MEM_RELEASE); + m_cache.clear(); + m_base= 0; + m_elements= 0; +} + +AIO_buffer_cache::~AIO_buffer_cache() { clear(); } + +/* Global variable for the cache buffers.*/ +AIO_buffer_cache read_buffers; + +win_aiosocket::~win_aiosocket() +{ + if (m_buf_ptr) + read_buffers.release_buffer(m_buf_ptr); +} + + +/** Return number of unread bytes.*/ +size_t win_aiosocket::buffer_remaining() +{ + return m_buf_datalen - m_buf_off; +} + +static my_bool my_vio_has_data(st_vio *vio) +{ + auto sock= (win_aiosocket *) vio->tp_ctx; + return sock->buffer_remaining() || sock->m_orig_vio_has_data(vio); +} + +/* + (Half-)buffered read. + + The buffer is filled once, by completion of the async IO. + + We do not refill the buffer once it is read off, + does not make sense. +*/ +static size_t my_vio_read(st_vio *vio, uchar *dest, size_t sz) +{ + auto sock= (win_aiosocket *) vio->tp_ctx; + DBUG_ASSERT(sock); + + auto nbytes= std::min(sock->buffer_remaining(), sz); + + if (nbytes > 0) + { + /* Copy to output, adjust the offset.*/ + memcpy(dest, sock->m_buf_ptr + sock->m_buf_off, nbytes); + sock->m_buf_off += nbytes; + return nbytes; + } + + return sock->m_orig_vio_read(vio, dest, sz); +} + +DWORD win_aiosocket::begin_read() +{ + DWORD err = ERROR_SUCCESS; + static char c; + WSABUF buf; + + DBUG_ASSERT(!buffer_remaining()); + + /* + If there is no internal buffer to store data, + we do zero size read, but still need a valid + pointer for the buffer parameter. + */ + if (m_buf_ptr) + buf= {(ULONG)READ_BUFSIZ, m_buf_ptr}; + else + buf= {0, &c}; + + + if (!m_is_pipe) + { + /* Do async io (sockets). */ + DWORD flags= 0; + if (WSARecv((SOCKET) m_handle, &buf, 1, 0, &flags, &m_overlapped, NULL)) + err= WSAGetLastError(); + } + else + { + /* Do async read (named pipe) */ + if (ReadFile(m_handle, buf.buf, buf.len, 0, &m_overlapped)) + err= GetLastError(); + } + + if (!err || err == ERROR_IO_PENDING) + return 0; + return err; +} + +void win_aiosocket::end_read(ULONG nbytes, DWORD err) +{ + DBUG_ASSERT(!buffer_remaining()); + DBUG_ASSERT(!nbytes || m_buf_ptr); + m_buf_off= 0; + m_buf_datalen= nbytes; +} + +void win_aiosocket::init(Vio *vio) +{ + m_is_pipe= vio->type == VIO_TYPE_NAMEDPIPE; + m_handle= + m_is_pipe ? vio->hPipe : (HANDLE) mysql_socket_getfd(vio->mysql_socket); + + SetFileCompletionNotificationModes(m_handle, FILE_SKIP_SET_EVENT_ON_HANDLE); + if (vio->type == VIO_TYPE_SSL) + { + /* + TODO : This requires fixing viossl to call our manipulated VIO + */ + return; + } + + if (!(m_buf_ptr = read_buffers.acquire_buffer())) + { + /* Ran out of buffers, that's fine.*/ + return; + } + + vio->tp_ctx= this; + + m_orig_vio_has_data= vio->has_data; + vio->has_data= my_vio_has_data; + + m_orig_vio_read= vio->read; + vio->read= my_vio_read; +} + +void init_win_aio_buffers(unsigned int n_buffers) +{ + read_buffers.set_size(n_buffers); +} + +extern void destroy_win_aio_buffers() +{ + read_buffers.clear(); +} diff --git a/sql/threadpool_winsockets.h b/sql/threadpool_winsockets.h new file mode 100644 index 00000000000..ca2068b759d --- /dev/null +++ b/sql/threadpool_winsockets.h @@ -0,0 +1,80 @@ +/* Copyright (C) 2020 Monty Program Ab + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA + */ +#pragma once + +#include <WinSock2.h> +#include <windows.h> + +struct st_vio; + +struct win_aiosocket +{ + /** OVERLAPPED is needed by all Windows AIO*/ + OVERLAPPED m_overlapped{}; + /** Handle to pipe, or socket */ + HANDLE m_handle{}; + /** Whether the m_handle refers to pipe*/ + bool m_is_pipe{}; + + /* Read buffer handling */ + + /** Pointer to buffer of size READ_BUFSIZ. Can be NULL.*/ + char *m_buf_ptr{}; + /** Offset to current buffer position*/ + size_t m_buf_off{}; + /** Size of valid data in the buffer*/ + size_t m_buf_datalen{}; + + /* Vio handling */ + /** Pointer to original vio->vio_read/vio->has_data function */ + size_t (*m_orig_vio_read)(st_vio *, unsigned char *, size_t){}; + char (*m_orig_vio_has_data)(st_vio *){}; + + + + /** + Begins asynchronnous reading from socket/pipe. + On IO completion, pre-read some bytes into internal buffer + */ + DWORD begin_read(); + + /** + Update number of bytes returned, and IO error status + + Should be called right after IO is completed + GetQueuedCompletionStatus() , or threadpool IO completion + callback would return nbytes and the error. + + Sets the valid data length in the read buffer. + */ + void end_read(ULONG nbytes, DWORD err); + + /** + Override VIO routines with ours, accounting for + one-shot buffering. + */ + void init(st_vio *vio); + + /** Return number of unread bytes.*/ + size_t buffer_remaining(); + + /* Frees the read buffer.*/ + ~win_aiosocket(); +}; + +/* Functions related to IO buffers caches.*/ +extern void init_win_aio_buffers(unsigned int n_buffers); +extern void destroy_win_aio_buffers(); diff --git a/sql/unireg.cc b/sql/unireg.cc index 51c4eeb4a4c..29348bfcec3 100644 --- a/sql/unireg.cc +++ b/sql/unireg.cc @@ -119,6 +119,20 @@ static uchar *extra2_write_field_properties(uchar *pos, return pos; } +static uchar *extra2_write_index_properties(uchar *pos, const KEY *keyinfo, + uint keys) +{ + *pos++= EXTRA2_INDEX_FLAGS; + pos= extra2_write_len(pos, keys); + for (uint i=0; i < keys; i++) + { + *pos++= keyinfo[i].is_ignored ? + EXTRA2_IGNORED_KEY : + EXTRA2_DEFAULT_INDEX_FLAGS; + } + return pos; +} + static uint16 get_fieldno_by_name(HA_CREATE_INFO *create_info, List<Create_field> &create_fields, const Lex_ident &field_name) @@ -403,6 +417,14 @@ LEX_CUSTRING build_frm_image(THD *thd, const LEX_CSTRING &table, extra2_size+= 1 + extra2_str_size(create_fields.elements); } + /* + To store the ignorability flag for each key. + Here 1 bytes is reserved to store the extra index flags for keys. + Currently only 1 bit is used, rest of the bits can be used in the future + */ + if (keys) + extra2_size+= 1 + extra2_str_size(keys); + for (i= 0; i < keys; i++) if (key_info[i].algorithm == HA_KEY_ALG_LONG_HASH) e_unique_hash_extra_parts++; @@ -519,6 +541,10 @@ LEX_CUSTRING build_frm_image(THD *thd, const LEX_CSTRING &table, if (has_extra2_field_flags_) pos= extra2_write_field_properties(pos, create_fields); + + if (keys) + pos= extra2_write_index_properties(pos, key_info, keys); + int4store(pos, filepos); // end of the extra2 segment pos+= 4; diff --git a/sql/unireg.h b/sql/unireg.h index dbff9ff77f8..0edb0a50ebd 100644 --- a/sql/unireg.h +++ b/sql/unireg.h @@ -173,6 +173,7 @@ enum extra2_frm_value_type { EXTRA2_GIS=2, EXTRA2_APPLICATION_TIME_PERIOD=3, EXTRA2_PERIOD_FOR_SYSTEM_TIME=4, + EXTRA2_INDEX_FLAGS=5, #define EXTRA2_ENGINE_IMPORTANT 128 @@ -186,6 +187,11 @@ enum extra2_field_flags { VERS_OPTIMIZED_UPDATE= 1 << INVISIBLE_MAX_BITS, }; +enum extra2_index_flags { + EXTRA2_DEFAULT_INDEX_FLAGS, + EXTRA2_IGNORED_KEY +}; + LEX_CUSTRING build_frm_image(THD *thd, const LEX_CSTRING &table, HA_CREATE_INFO *create_info, List<Create_field> &create_fields, diff --git a/sql/upgrade_conf_file.cc b/sql/upgrade_conf_file.cc index e41e4dfd857..a30502d0dea 100644 --- a/sql/upgrade_conf_file.cc +++ b/sql/upgrade_conf_file.cc @@ -42,13 +42,19 @@ static const char *removed_variables[] = "have_partitioning", "innodb_adaptive_flushing_method", "innodb_adaptive_hash_index_partitions", +"innodb_adaptive_max_sleep_delay", "innodb_additional_mem_pool_size", "innodb_api_bk_commit_interval", "innodb_api_disable_rowlock", "innodb_api_enable_binlog", "innodb_api_enable_mdl", "innodb_api_trx_level", +"innodb_background_scrub_data_check_interval", +"innodb_background_scrub_data_compressed", +"innodb_background_scrub_data_interval", +"innodb_background_scrub_data_uncompressed", "innodb_blocking_buffer_pool_restore", +"innodb_buffer_pool_instances", "innodb_buffer_pool_populate", "innodb_buffer_pool_restore_at_startup", "innodb_buffer_pool_shm_checksum", @@ -62,6 +68,8 @@ static const char *removed_variables[] = "innodb_cleaner_lsn_age_factor", "innodb_cleaner_max_flush_time", "innodb_cleaner_max_lru_time", +"innodb_commit_concurrency", +"innodb_concurrency_tickets", "innodb_corrupt_table_action", "innodb_dict_size_limit", "innodb_doublewrite_file", @@ -89,12 +97,16 @@ static const char *removed_variables[] = "innodb_log_archive", "innodb_log_block_size", "innodb_log_checksum_algorithm", -"innodb_rollback_segments", +"innodb_log_checksums", +"innodb_log_compressed_pages", +"innodb_log_files_in_group", +"innodb_log_optimize_ddl", "innodb_max_bitmap_file_size", "innodb_max_changed_pages", "innodb_merge_sort_block_size", "innodb_mirrored_log_groups", "innodb_mtflush_threads", +"innodb_page_cleaners", "innodb_persistent_stats_root_page", "innodb_print_lock_wait_timeout_info", "innodb_purge_run_now", @@ -102,15 +114,23 @@ static const char *removed_variables[] = "innodb_read_ahead", "innodb_recovery_stats", "innodb_recovery_update_relay_log", +"innodb_replication_delay", +"innodb_rollback_segments", +"innodb_scrub_log", +"innodb_scrub_log_speed", "innodb_show_locks_held", "innodb_show_verbose_locks", "innodb_stats_auto_update", "innodb_stats_sample_pages", "innodb_stats_update_need_lock", "innodb_support_xa", +"innodb_sync_array_size", +"innodb_thread_concurrency", "innodb_thread_concurrency_timer_based", +"innodb_thread_sleep_delay", "innodb_track_changed_pages", "innodb_track_redo_log_now", +"innodb_undo_logs", "innodb_use_fallocate", "innodb_use_global_flush_log_at_trx_commit", "innodb_use_mtflush", diff --git a/sql/winmain.cc b/sql/winmain.cc new file mode 100644 index 00000000000..fb5da40cf2f --- /dev/null +++ b/sql/winmain.cc @@ -0,0 +1,371 @@ +/* Copyright (C) 2020 MariaDB Corporation + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA +*/ + +/* + main() function for the server on Windows is implemented here. + The core functionality is implemented elsewhere, in mysqld_main(), and running as + service is done here. + + Main tasks of the service are + + 1. Report current status back to service control manager. Here we're + providing callbacks so code outside of winmain.cc can call it + (via mysqld_set_service_status_callback()) + + 2. React to notification, the only one we care about is the "stop" + notification. we initiate shutdown, when instructed. + + Note that our service might not be too Windows-friendly, as it might take + a while to startup (recovery), and a while to shut down(innodb cleanups). + + Most of the code more of less standard service stuff, taken from Microsoft + docs examples. + + Notable oddity in running services, is that we do not know for sure, + whether we should run as a service or not (there is no --service parameter that + would tell).Heuristics are used, and if the last command line argument is + valid service name, we try to run as service, but fallback to usual process + if this fails. + + As an example, even if mysqld.exe is started with command line like "mysqld.exe --help", + it is entirely possible that mysqld.exe run as service "--help". + + Apart from that, now deprecated and obsolete service registration/removal functionality is + still provided (mysqld.exe --install/--remove) +*/ + +#include <my_global.h> +#include <mysqld.h> +#include <log.h> + +#include <stdio.h> +#include <windows.h> +#include <string> +#include <cassert> + +static SERVICE_STATUS svc_status{SERVICE_WIN32_OWN_PROCESS}; +static SERVICE_STATUS_HANDLE svc_status_handle; +static char *svc_name; + +static char **orig_argv; +static int orig_argc; + +static int install_service(int argc, char **argv, const char *name); +static int remove_service(const char *name); + +/* + Report service status to SCM. This function is indirectly invoked + by the server to report state transitions. + + 1. from START_PENDING to SERVICE_RUNNING, when we start accepting user connections + 2. from SERVICE_RUNNING to STOP_PENDING, when we start shutdown + 3. from STOP_PENDING to SERVICE_STOPPED, in mysqld_exit() + sometimes also START_PENDING to SERVICE_STOPPED, on startup errors +*/ +static void report_svc_status(DWORD current_state, DWORD exit_code, DWORD wait_hint) +{ + if (!svc_status_handle) + return; + + static DWORD check_point= 1; + svc_status.dwCurrentState= current_state; + svc_status.dwWaitHint= wait_hint; + + if (exit_code) + { + svc_status.dwWin32ExitCode= ERROR_SERVICE_SPECIFIC_ERROR; + svc_status.dwServiceSpecificExitCode= exit_code; + } + else + { + svc_status.dwWin32ExitCode= 0; + } + + if (current_state == SERVICE_START_PENDING) + svc_status.dwControlsAccepted= 0; + else + svc_status.dwControlsAccepted= SERVICE_ACCEPT_STOP|SERVICE_ACCEPT_SHUTDOWN; + + if ((current_state == SERVICE_RUNNING) || (current_state == SERVICE_STOPPED)) + svc_status.dwCheckPoint= 0; + else + svc_status.dwCheckPoint= check_point++; + + SetServiceStatus(svc_status_handle, &svc_status); +} + +/* Report unexpected errors. */ +static void svc_report_event(const char *svc_name, const char *command) +{ + char buffer[80]; + sprintf_s(buffer, "mariadb service %s, %s failed with %d", + svc_name, command, GetLastError()); + OutputDebugString(buffer); +} + +/* + Service control function. + Reacts to service stop, initiates shutdown. +*/ +static void WINAPI svc_ctrl_handle(DWORD cntrl) +{ + switch (cntrl) + { + case SERVICE_CONTROL_SHUTDOWN: + case SERVICE_CONTROL_STOP: + sql_print_information( + "Windows service \"%s\": received %s", + svc_name, + cntrl == SERVICE_CONTROL_STOP? "SERVICE_CONTROL_STOP": "SERVICE_CONTROL_SHUTDOWN"); + + /* The below will also set the status to STOP_PENDING. */ + mysqld_win_initiate_shutdown(); + break; + + case SERVICE_CONTROL_INTERROGATE: + default: + break; + } +} + +/* Service main routine, mainly runs mysqld_main() */ +static void WINAPI svc_main(DWORD svc_argc, char **svc_argv) +{ + /* Register the handler function for the service */ + char *name= svc_argv[0]; + + svc_status_handle= RegisterServiceCtrlHandler(name, svc_ctrl_handle); + if (!svc_status_handle) + { + svc_report_event(name, "RegisterServiceCtrlHandler"); + return; + } + report_svc_status(SERVICE_START_PENDING, NO_ERROR, 0); + + /* Make server report service status via our callback.*/ + mysqld_set_service_status_callback(report_svc_status); + + /* This would add service name entry to load_defaults.*/ + mysqld_win_set_service_name(name); + + /* + Do not pass the service name parameter (last on the command line) + to mysqld_main(), it is unaware of it. + */ + orig_argv[orig_argc - 1]= 0; + mysqld_main(orig_argc - 1, orig_argv); +} + +/* + This start the service. Sometimes it will fail, because + currently we do not know for sure whether we run as service or not. + If this fails, the fallback is to run as normal process. +*/ +static int run_as_service(char *name) +{ + SERVICE_TABLE_ENTRY stb[]= {{name, svc_main}, {0, 0}}; + if (!StartServiceCtrlDispatcher(stb)) + { + assert(GetLastError() == ERROR_FAILED_SERVICE_CONTROLLER_CONNECT); + return -1; + } + return 0; +} + +/* + Check for valid existing service name. + Part of our guesswork, whether we run as service or not. +*/ +static bool is_existing_service(const char *name) +{ + if (strchr(name, '\\') || strchr(name, '/')) + { + /* Invalid characters in service name */ + return false; + } + + SC_HANDLE sc_service= 0, scm= 0; + bool ret= ((scm= OpenSCManager(0, 0, SC_MANAGER_ENUMERATE_SERVICE)) != 0) && + ((sc_service= OpenService(scm, name, SERVICE_QUERY_STATUS)) != 0); + + if (sc_service) + CloseServiceHandle(sc_service); + if (scm) + CloseServiceHandle(scm); + + return ret; +} + +/* + If service name is not given to --install/--remove + it is assumed to be "MySQL" (traditional handling) +*/ +static const char *get_svc_name(const char *arg) +{ + return arg ? arg : "MySQL"; +} + +/* + Main function on Windows. + Runs mysqld as normal process, or as a service. + + Plus, the obsolete functionality to register/remove services. +*/ +int main(int argc, char **argv) +{ + orig_argv= argv; + orig_argc= argc; + + /* + If no special arguments are given, service name is nor present + run as normal program. + */ + if (argc == 1) + return mysqld_main(argc, argv); + + auto cmd= argv[1]; + + /* Handle install/remove */ + if (!strcmp(cmd, "--install") || !strcmp(cmd, "--install-manual")) + return install_service(argc, argv, get_svc_name(argv[2])); + + if (!strcmp(cmd, "--remove")) + return remove_service(get_svc_name(argv[2])); + + /* Try to run as service, and fallback to mysqld_main(), if this fails */ + svc_name= argv[argc - 1]; + if (is_existing_service(svc_name) && !run_as_service(svc_name)) + return 0; + svc_name= 0; + + /* Run as normal program.*/ + return mysqld_main(argc, argv); +} + + +/* + Register/remove services functionality. + This is kept for backward compatibility only, and is + superseeded by much more versatile mysql_install_db.exe + + "mysqld --remove=svc" has no advantage over + OS own "sc delete svc" +*/ +static void ATTRIBUTE_NORETURN die(const char *func, const char *name) +{ + DWORD err= GetLastError(); + fprintf(stderr, "FATAL ERROR : %s failed (%lu)\n", func, err); + switch (err) + { + case ERROR_SERVICE_EXISTS: + fprintf(stderr, "Service %s already exists.\n", name); + break; + case ERROR_SERVICE_DOES_NOT_EXIST: + fprintf(stderr, "Service %s does not exist.\n", name); + break; + case ERROR_ACCESS_DENIED: + fprintf(stderr, "Access is denied. " + "Make sure to run as elevated admin user.\n"); + break; + case ERROR_INVALID_NAME: + fprintf(stderr, "Invalid service name '%s'\n", name); + default: + break; + } + exit(1); +} + +static inline std::string quoted(const char *src) +{ + std::string s; + s.append("\"").append(src).append("\""); + return s; +} + +static int install_service(int argc, char **argv, const char *name) +{ + std::string cmdline; + + char path[MAX_PATH]; + auto nSize = GetModuleFileName(0, path, sizeof(path)); + + if (nSize == (DWORD) sizeof(path) && GetLastError() == ERROR_INSUFFICIENT_BUFFER) + die("GetModuleName", name); + + cmdline.append(quoted(path)); + + const char *user= 0; + // mysqld --install[-manual] name ...[--local-service] + if (argc > 2) + { + for (int i= 3; argv[i]; i++) + { + if (!strcmp(argv[i], "--local-service")) + user= "NT AUTHORITY\\LocalService"; + else + { + cmdline.append(" ").append(quoted(argv[i])); + } + } + } + cmdline.append(" ").append(quoted(name)); + + DWORD start_type; + if (!strcmp(argv[1], "--install-manual")) + start_type= SERVICE_DEMAND_START; + else + start_type= SERVICE_AUTO_START; + + SC_HANDLE scm, sc_service; + if (!(scm= OpenSCManager(0, 0, SC_MANAGER_CREATE_SERVICE))) + die("OpenSCManager", name); + + if (!(sc_service= CreateService( + scm, name, name, SERVICE_ALL_ACCESS, + SERVICE_WIN32_OWN_PROCESS, start_type, SERVICE_ERROR_NORMAL, + cmdline.c_str(), 0, 0, 0, user, 0))) + die("CreateService", name); + + char description[]= "MariaDB database server"; + SERVICE_DESCRIPTION sd= {description}; + ChangeServiceConfig2(sc_service, SERVICE_CONFIG_DESCRIPTION, &sd); + + CloseServiceHandle(sc_service); + CloseServiceHandle(scm); + + printf("Service '%s' successfully installed.\n", name); + return 0; +} + +static int remove_service(const char *name) +{ + SC_HANDLE scm, sc_service; + + if (!(scm= OpenSCManager(0, 0, SC_MANAGER_CREATE_SERVICE))) + die("OpenSCManager", name); + + if (!(sc_service= OpenService(scm, name, DELETE))) + die("OpenService", name); + + if (!DeleteService(sc_service)) + die("DeleteService", name); + + CloseServiceHandle(sc_service); + CloseServiceHandle(scm); + + printf("Service '%s' successfully deleted.\n", name); + return 0; +} diff --git a/sql/winservice.c b/sql/winservice.c index d7cfd2f7584..a11087e5cd5 100644 --- a/sql/winservice.c +++ b/sql/winservice.c @@ -40,7 +40,7 @@ void get_file_version(const char *path, int *major, int *minor, int *patch) *major= *minor= *patch= 0; size= GetFileVersionInfoSize(path, &version_handle); - if (size == 0) + if (size == 0) return; ver= (char *)malloc(size); if(!GetFileVersionInfo(path, version_handle, size, ver)) @@ -65,7 +65,7 @@ void normalize_path(char *path, size_t size) char *p; strcpy_s(buf, MAX_PATH, path+1); p= strchr(buf, '"'); - if (p) + if (p) *p=0; } else @@ -136,15 +136,15 @@ static void get_datadir_from_ini(const char *ini, char *service_name, char *data /* Retrieve some properties from windows mysqld service binary path. - We're interested in ini file location and datadir, and also in version of + We're interested in ini file location and datadir, and also in version of the data. We tolerate missing mysqld.exe. - Note that this function carefully avoids using mysql libraries (e.g dbug), + Note that this function carefully avoids using mysql libraries (e.g dbug), since it is used in unusual environments (windows installer, MFC), where we - do not have much control over how threads are created and destroyed, so we + do not have much control over how threads are created and destroyed, so we cannot assume MySQL thread initilization here. */ -int get_mysql_service_properties(const wchar_t *bin_path, +int get_mysql_service_properties(const wchar_t *bin_path, mysqld_service_properties *props) { int numargs; @@ -193,9 +193,10 @@ int get_mysql_service_properties(const wchar_t *bin_path, if(wcsstr(mysqld_path, L".exe") == NULL) wcscat(mysqld_path, L".exe"); - if(wcsicmp(file_part, L"mysqld.exe") != 0 && + if(wcsicmp(file_part, L"mysqld.exe") != 0 && wcsicmp(file_part, L"mysqld-debug.exe") != 0 && - wcsicmp(file_part, L"mysqld-nt.exe") != 0) + wcsicmp(file_part, L"mysqld-nt.exe") != 0 && + wcsicmp(file_part, L"mariadbd.exe") != 0) { /* The service executable is not mysqld. */ goto end; @@ -205,7 +206,7 @@ int get_mysql_service_properties(const wchar_t *bin_path, /* If mysqld.exe exists, try to get its version from executable */ if (GetFileAttributes(props->mysqld_exe) != INVALID_FILE_ATTRIBUTES) { - get_file_version(props->mysqld_exe, &props->version_major, + get_file_version(props->mysqld_exe, &props->version_major, &props->version_minor, &props->version_patch); } @@ -235,7 +236,7 @@ int get_mysql_service_properties(const wchar_t *bin_path, { /* Hard, although a rare case, we're guessing datadir and defaults-file. - On Windows, defaults-file is traditionally install-root\my.ini + On Windows, defaults-file is traditionally install-root\my.ini and datadir is install-root\data */ char install_root[MAX_PATH]; @@ -297,7 +298,7 @@ int get_mysql_service_properties(const wchar_t *bin_path, } /* - If version could not be determined so far, try mysql_upgrade_info in + If version could not be determined so far, try mysql_upgrade_info in database directory. */ if(props->version_major == 0) diff --git a/sql/wsrep_mysqld.cc b/sql/wsrep_mysqld.cc index d39646af082..1618a8b6d50 100644 --- a/sql/wsrep_mysqld.cc +++ b/sql/wsrep_mysqld.cc @@ -24,6 +24,7 @@ #include <sql_class.h> #include <sql_parse.h> #include <sql_base.h> /* find_temporary_table() */ +#include <sql_statistics.h> /* is_stat_table() */ #include "slave.h" #include "rpl_mi.h" #include "sql_repl.h" @@ -98,7 +99,8 @@ my_bool wsrep_restart_slave; // Should mysql slave thread be // restarted, when node joins back? my_bool wsrep_desync; // De(re)synchronize the node from the // cluster -my_bool wsrep_strict_ddl; // Reject DDL to +ulonglong wsrep_mode; +my_bool wsrep_strict_ddl; // Deprecated: Reject DDL to // effected tables not // supporting Galera replication bool wsrep_service_started; // If Galera was initialized @@ -1079,7 +1081,6 @@ void wsrep_recover() { WSREP_INFO("Recovered position: %s", oss.str().c_str()); } - } @@ -1099,7 +1100,7 @@ void wsrep_stop_replication(THD *thd) */ if (thd && !thd->wsrep_applier) trans_rollback(thd); wsrep_close_client_connections(TRUE, thd); - + /* wait until appliers have stopped */ wsrep_wait_appliers_close(thd); @@ -1175,6 +1176,327 @@ bool wsrep_start_replication(const char *wsrep_cluster_address) return true; } +bool wsrep_check_mode (enum_wsrep_mode mask) +{ + return wsrep_mode & mask; +} + +//seconds after which the limit warnings suppression will be activated +#define WSREP_WARNING_ACTIVATION_TIMEOUT 5*60 +//number of limit warnings after which the suppression will be activated +#define WSREP_WARNING_ACTIVATION_THRESHOLD 10 + +enum wsrep_warning_type { + WSREP_DISABLED = 0, + WSREP_REQUIRE_PRIMARY_KEY= 1, + WSREP_REQUIRE_INNODB= 2, + WSREP_REQUIRE_MAX=3, +}; + +static ulonglong wsrep_warning_start_time=0; +static bool wsrep_warning_active[WSREP_REQUIRE_MAX+1]; +static ulonglong wsrep_warning_count[WSREP_REQUIRE_MAX+1]; +static ulonglong wsrep_total_warnings_count=0; + +/** + Auxiliary function to reset the limit of wsrep warnings. + This is done without mutex protection, but this should be good + enough as it doesn't matter if we loose a couple of suppressed + messages or if this is called multiple times. +*/ + +static void wsrep_reset_warnings(ulonglong now) +{ + uint i; + + wsrep_warning_start_time= now; + wsrep_total_warnings_count= 0; + + for (i= 0 ; i < WSREP_REQUIRE_MAX ; i++) + { + wsrep_warning_active[i]= false; + wsrep_warning_count[i]= 0; + } +} + +static const char* wsrep_warning_name(const enum wsrep_warning_type type) +{ + switch(type) + { + case WSREP_REQUIRE_PRIMARY_KEY: + return "WSREP_REQUIRE_PRIMARY_KEY"; break; + case WSREP_REQUIRE_INNODB: + return "WSREP_REQUIRE_INNODB"; break; + default: assert(0); return " "; break; // for compiler + } +} +/** + Auxiliary function to check if the warning statements should be + thrown or suppressed. + + Logic is: + - If we get more than WSREP_WARNING_ACTIVATION_THRESHOLD errors + of one type, that type of errors will be suppressed for + WSREP_WARNING_ACTIVATION_TIMEOUT. + - When the time limit has been reached, all suppressions are reset. + + This means that if one gets many different types of errors, some of them + may be reset less than WSREP_WARNING_ACTIVATION_TIMEOUT. However at + least one error is disabled for this time. + + SYNOPSIS: + @params + warning_type - The type of warning. + + RETURN: + 0 0k to log + 1 Message suppressed +*/ + +static bool wsrep_protect_against_warning_flood( + enum wsrep_warning_type warning_type) +{ + ulonglong count; + ulonglong now= my_interval_timer()/1000000000ULL; + + count= ++wsrep_warning_count[warning_type]; + wsrep_total_warnings_count++; + + /* + INITIALIZING: + If this is the first time this function is called with log warning + enabled, the monitoring the warnings should start. + */ + if (wsrep_warning_start_time == 0) + { + wsrep_reset_warnings(now); + return false; + } + + /* + The following is true if we got too many errors or if the error was + already suppressed + */ + if (count >= WSREP_WARNING_ACTIVATION_THRESHOLD) + { + ulonglong diff_time= (now - wsrep_warning_start_time); + + if (!wsrep_warning_active[warning_type]) + { + /* + ACTIVATION: + We got WSREP_WARNING_ACTIVATION_THRESHOLD warnings in + less than WSREP_WARNING_ACTIVATION_TIMEOUT we activate the + suppression. + */ + if (diff_time <= WSREP_WARNING_ACTIVATION_TIMEOUT) + { + wsrep_warning_active[warning_type]= true; + WSREP_INFO("Suppressing warnings of type '%s' for up to %d seconds because of flooding", + wsrep_warning_name(warning_type), + WSREP_WARNING_ACTIVATION_TIMEOUT); + } + else + { + /* + There is no flooding till now, therefore we restart the monitoring + */ + wsrep_reset_warnings(now); + } + } + else + { + /* This type of warnings was suppressed */ + if (diff_time > WSREP_WARNING_ACTIVATION_TIMEOUT) + { + ulonglong save_count= wsrep_total_warnings_count; + /* Print a suppression note and remove the suppression */ + wsrep_reset_warnings(now); + WSREP_INFO("Suppressed %lu unsafe warnings during " + "the last %d seconds", + save_count, (int) diff_time); + } + } + } + + return wsrep_warning_active[warning_type]; +} + +/** + Auxiliary function to push warning to client and to the error log +*/ +static void wsrep_push_warning(THD *thd, + enum wsrep_warning_type type, + const handlerton *hton, + const TABLE_LIST *tables) +{ + switch(type) + { + case WSREP_REQUIRE_PRIMARY_KEY: + push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN, + ER_OPTION_PREVENTS_STATEMENT, + "WSREP: wsrep_mode = REQUIRED_PRIMARY_KEY enabled. " + "Table '%s'.'%s' should have PRIMARY KEY defined.", + tables->db.str, tables->table_name.str); + if (global_system_variables.log_warnings > 1 && + !wsrep_protect_against_warning_flood(type)) + WSREP_WARN("wsrep_mode = REQUIRED_PRIMARY_KEY enabled. " + "Table '%s'.'%s' should have PRIMARY KEY defined", + tables->db.str, tables->table_name.str); + break; + case WSREP_REQUIRE_INNODB: + push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN, + ER_OPTION_PREVENTS_STATEMENT, + "WSREP: wsrep_mode = STRICT_REPLICATION enabled. " + "Storage engine %s for table '%s'.'%s' is " + "not supported in Galera", + ha_resolve_storage_engine_name(hton), + tables->db.str, tables->table_name.str); + if (global_system_variables.log_warnings > 1 && + !wsrep_protect_against_warning_flood(type)) + WSREP_WARN("wsrep_mode = STRICT_REPLICATION enabled. " + "Storage engine %s for table '%s'.'%s' is " + "not supported in Galera", + ha_resolve_storage_engine_name(hton), + tables->db.str, tables->table_name.str); + break; + + default: assert(0); break; + } +} + +bool wsrep_check_mode_after_open_table (THD *thd, + const handlerton *hton, + TABLE_LIST *tables) +{ + enum_sql_command sql_command= thd->lex->sql_command; + bool is_dml_stmt= thd->get_command() != COM_STMT_PREPARE && + (sql_command == SQLCOM_INSERT || + sql_command == SQLCOM_INSERT_SELECT || + sql_command == SQLCOM_REPLACE || + sql_command == SQLCOM_REPLACE_SELECT || + sql_command == SQLCOM_UPDATE || + sql_command == SQLCOM_UPDATE_MULTI || + sql_command == SQLCOM_LOAD || + sql_command == SQLCOM_DELETE); + + if (!is_dml_stmt) + return true; + + const legacy_db_type db_type= hton->db_type; + bool replicate= ((db_type == DB_TYPE_MYISAM && wsrep_check_mode(WSREP_MODE_REPLICATE_MYISAM)) || + (db_type == DB_TYPE_ARIA && wsrep_check_mode(WSREP_MODE_REPLICATE_ARIA))); + TABLE *tbl= tables->table; + + if (replicate) + { + /* It is not recommended to replicate MyISAM as it lacks rollback feature + but if user demands then actions are replicated using TOI. + Following code will kick-start the TOI but this has to be done only once + per statement. + Note: kick-start will take-care of creating isolation key for all tables + involved in the list (provided all of them are MYISAM or Aria tables). */ + if (!is_stat_table(&tables->db, &tables->alias)) + { + if (tbl->s->primary_key == MAX_KEY && + wsrep_check_mode(WSREP_MODE_REQUIRED_PRIMARY_KEY)) + { + /* Other replicated table doesn't have explicit primary-key defined. */ + wsrep_push_warning(thd, WSREP_REQUIRE_PRIMARY_KEY, hton, tables); + } + + wsrep_before_rollback(thd, true); + wsrep_after_rollback(thd, true); + wsrep_after_statement(thd); + WSREP_TO_ISOLATION_BEGIN(NULL, NULL, (tables)); + } + } else if (db_type != DB_TYPE_UNKNOWN && + db_type != DB_TYPE_PERFORMANCE_SCHEMA) + { + bool is_system_db= (tbl && + ((strcmp(tbl->s->db.str, "mysql") == 0) || + (strcmp(tbl->s->db.str, "information_schema") == 0))); + + if (!is_system_db && + !is_temporary_table(tables)) + { + + if (db_type != DB_TYPE_INNODB && + wsrep_check_mode(WSREP_MODE_STRICT_REPLICATION)) + { + /* Table is not an InnoDB table and strict replication is requested*/ + wsrep_push_warning(thd, WSREP_REQUIRE_INNODB, hton, tables); + } + + if (tbl->s->primary_key == MAX_KEY && + db_type == DB_TYPE_INNODB && + wsrep_check_mode(WSREP_MODE_REQUIRED_PRIMARY_KEY)) + { + /* InnoDB table doesn't have explicit primary-key defined. */ + wsrep_push_warning(thd, WSREP_REQUIRE_PRIMARY_KEY, hton, tables); + } + + if (db_type != DB_TYPE_INNODB && + thd->variables.sql_log_bin == 1 && + wsrep_check_mode(WSREP_MODE_DISALLOW_LOCAL_GTID)) + { + /* Table is not an InnoDB table and local GTIDs are disallowed */ + my_error(ER_GALERA_REPLICATION_NOT_SUPPORTED, MYF(0)); + push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN, + ER_OPTION_PREVENTS_STATEMENT, + "You can't execute statements that would generate local " + "GTIDs when wsrep_mode = DISALLOW_LOCAL_GTID is set. " + "Try disabling binary logging with SET sql_log_bin=0 " + "to execute this statement."); + goto wsrep_error_label; + } + } + } + + return true; + +wsrep_error_label: + return false; +} + +bool wsrep_check_mode_before_cmd_execute (THD *thd) +{ + bool ret= true; + if (wsrep_check_mode(WSREP_MODE_BINLOG_ROW_FORMAT_ONLY) && + !thd->is_current_stmt_binlog_format_row() && is_update_query(thd->lex->sql_command)) + { + my_error(ER_GALERA_REPLICATION_NOT_SUPPORTED, MYF(0)); + push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN, + ER_OPTION_PREVENTS_STATEMENT, + "WSREP: wsrep_mode = BINLOG_ROW_FORMAT_ONLY enabled. Only ROW binlog format is supported."); + ret= false; + } + if (wsrep_check_mode(WSREP_MODE_REQUIRED_PRIMARY_KEY) && + thd->lex->sql_command == SQLCOM_CREATE_TABLE) + { + Key *key; + List_iterator<Key> key_iterator(thd->lex->alter_info.key_list); + bool primary_key_found= false; + while ((key= key_iterator++)) + { + if (key->type == Key::PRIMARY) + { + primary_key_found= true; + break; + } + } + if (!primary_key_found) + { + my_error(ER_GALERA_REPLICATION_NOT_SUPPORTED, MYF(0)); + push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN, + ER_OPTION_PREVENTS_STATEMENT, + "WSREP: wsrep_mode = REQUIRED_PRIMARY_KEY enabled. Table should have PRIMARY KEY defined."); + ret= false; + } + } + return ret; +} + bool wsrep_must_sync_wait (THD* thd, uint mask) { bool ret= 0; @@ -1895,43 +2217,47 @@ bool wsrep_should_replicate_ddl_iterate(THD* thd, const TABLE_LIST* table_list) for (const TABLE_LIST* it= table_list; it; it= it->next_global) { if (it->table && - !wsrep_should_replicate_ddl(thd, it->table->s->db_type()->db_type)) + !wsrep_should_replicate_ddl(thd, it->table->s->db_type())) return false; } } return true; } -bool wsrep_should_replicate_ddl(THD* thd, - const enum legacy_db_type db_type) +bool wsrep_should_replicate_ddl(THD* thd, const handlerton *hton) { - if (!wsrep_strict_ddl) + if (!wsrep_check_mode(WSREP_MODE_STRICT_REPLICATION)) return true; - switch (db_type) + switch (hton->db_type) { case DB_TYPE_INNODB: return true; break; case DB_TYPE_MYISAM: - if (wsrep_replicate_myisam) + if (wsrep_check_mode(WSREP_MODE_REPLICATE_MYISAM)) return true; else WSREP_DEBUG("wsrep OSU failed for %s", wsrep_thd_query(thd)); break; case DB_TYPE_ARIA: - /* if (wsrep_replicate_aria) */ - /* fallthrough */ + if (wsrep_check_mode(WSREP_MODE_REPLICATE_ARIA)) + return true; + else + WSREP_DEBUG("wsrep OSU failed for %s", wsrep_thd_query(thd)); + break; default: WSREP_DEBUG("wsrep OSU failed for %s", wsrep_thd_query(thd)); break; } - /* STRICT, treat as error */ + /* wsrep_mode = STRICT_REPLICATION, treat as error */ my_error(ER_GALERA_REPLICATION_NOT_SUPPORTED, MYF(0)); push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN, - ER_ILLEGAL_HA, - "WSREP: wsrep_strict_ddl=true and storage engine does not support Galera replication."); + ER_ILLEGAL_HA, + "WSREP: wsrep_mode = STRICT_REPLICATION enabled. " + "Storage engine %s not supported.", + ha_resolve_storage_engine_name(hton)); return false; } /* @@ -1962,7 +2288,7 @@ bool wsrep_can_run_in_toi(THD *thd, const char *db, const char *table, { return false; } - if (!wsrep_should_replicate_ddl(thd, create_info->db_type->db_type)) + if (!wsrep_should_replicate_ddl(thd, create_info->db_type)) { return false; } @@ -2038,23 +2364,16 @@ bool wsrep_can_run_in_toi(THD *thd, const char *db, const char *table, return true; break; case SQLCOM_ALTER_TABLE: - { if (create_info) { - enum legacy_db_type db_type; + const handlerton *hton= create_info->db_type; - if (create_info->db_type) - db_type= create_info->db_type->db_type; - else - { - const handlerton *hton= ha_default_handlerton(thd); - db_type= hton->db_type; - } - if (!wsrep_should_replicate_ddl(thd, db_type)) + if (!hton) + hton= ha_default_handlerton(thd); + if (!wsrep_should_replicate_ddl(thd, hton)) return false; } - } - /* fallthrough */ + /* fallthrough */ default: if (table && !thd->find_temporary_table(db, table)) { @@ -2342,6 +2661,23 @@ static int wsrep_RSU_begin(THD *thd, const char *db_, const char *table_) { WSREP_DEBUG("RSU BEGIN: %lld, : %s", wsrep_thd_trx_seqno(thd), wsrep_thd_query(thd)); + + if (thd->variables.wsrep_OSU_method == WSREP_OSU_RSU && + thd->variables.sql_log_bin == 1 && + wsrep_check_mode(WSREP_MODE_DISALLOW_LOCAL_GTID)) + { + /* wsrep_mode = WSREP_MODE_DISALLOW_LOCAL_GTID, treat as error */ + my_error(ER_GALERA_REPLICATION_NOT_SUPPORTED, MYF(0)); + push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN, + ER_OPTION_PREVENTS_STATEMENT, + "You can't execute statements that would generate local " + "GTIDs when wsrep_mode = DISALLOW_LOCAL_GTID is set. " + "Try disabling binary logging with SET sql_log_bin=0 " + "to execute this statement."); + + return -1; + } + if (thd->wsrep_cs().begin_rsu(5000)) { WSREP_WARN("RSU begin failed"); @@ -2717,7 +3053,7 @@ void wsrep_close_client_connections(my_bool wait_to_end, THD* except_caller_thd) { /* Clear thread cache */ thread_cache.final_flush(); - + /* First signal all threads that it's time to die */ diff --git a/sql/wsrep_mysqld.h b/sql/wsrep_mysqld.h index 04ea633ac53..0541f2b02ad 100644 --- a/sql/wsrep_mysqld.h +++ b/sql/wsrep_mysqld.h @@ -101,8 +101,9 @@ extern ulong wsrep_running_applier_threads; extern ulong wsrep_running_rollbacker_threads; extern bool wsrep_new_cluster; extern bool wsrep_gtid_mode; -extern my_bool wsrep_strict_ddl; extern uint wsrep_gtid_domain_id; +extern ulonglong wsrep_mode; +extern my_bool wsrep_strict_ddl; enum enum_wsrep_reject_types { WSREP_REJECT_NONE, /* nothing rejected */ @@ -134,6 +135,16 @@ enum enum_wsrep_ignore_apply_error { WSREP_IGNORE_ERRORS_MAX= 0x7 }; +/* wsrep_mode features */ +enum enum_wsrep_mode { + WSREP_MODE_STRICT_REPLICATION= (1ULL << 0), + WSREP_MODE_BINLOG_ROW_FORMAT_ONLY= (1ULL << 1), + WSREP_MODE_REQUIRED_PRIMARY_KEY= (1ULL << 2), + WSREP_MODE_REPLICATE_MYISAM= (1ULL << 3), + WSREP_MODE_REPLICATE_ARIA= (1ULL << 4), + WSREP_MODE_DISALLOW_LOCAL_GTID= (1ULL << 5) +}; + // Streaming Replication #define WSREP_FRAG_BYTES 0 #define WSREP_FRAG_ROWS 1 @@ -210,6 +221,10 @@ extern void wsrep_close_applier_threads(int count); extern void wsrep_stop_replication(THD *thd); extern bool wsrep_start_replication(const char *wsrep_cluster_address); extern void wsrep_shutdown_replication(); +extern bool wsrep_check_mode (enum_wsrep_mode mask); +extern bool wsrep_check_mode_after_open_table (THD *thd, const handlerton *hton, + TABLE_LIST *tables); +extern bool wsrep_check_mode_before_cmd_execute (THD *thd); extern bool wsrep_must_sync_wait (THD* thd, uint mask= WSREP_SYNC_WAIT_BEFORE_READ); extern bool wsrep_sync_wait (THD* thd, uint mask= WSREP_SYNC_WAIT_BEFORE_READ); extern enum wsrep::provider::status @@ -378,7 +393,7 @@ int wsrep_to_isolation_begin(THD *thd, const char *db_, const char *table_, const wsrep::key_array *fk_tables= nullptr, const HA_CREATE_INFO* create_info= nullptr); -bool wsrep_should_replicate_ddl(THD* thd, const enum legacy_db_type db_type); +bool wsrep_should_replicate_ddl(THD* thd, const handlerton *db_type); bool wsrep_should_replicate_ddl_iterate(THD* thd, const TABLE_LIST* table_list); void wsrep_to_isolation_end(THD *thd); diff --git a/sql/wsrep_schema.cc b/sql/wsrep_schema.cc index 6fa00dfe979..122233b239a 100644 --- a/sql/wsrep_schema.cc +++ b/sql/wsrep_schema.cc @@ -214,7 +214,7 @@ static int execute_SQL(THD* thd, const char* sql, uint length) { thd->set_query((char*)sql, length); thd->set_query_id(next_query_id()); - mysql_parse(thd, (char*)sql, length, & parser_state, FALSE, FALSE); + mysql_parse(thd, (char*)sql, length, & parser_state); if (thd->is_error()) { WSREP_WARN("Wsrep_schema::execute_sql() failed, %d %s\nSQL: %s", diff --git a/sql/wsrep_sst.cc b/sql/wsrep_sst.cc index f30b4d1f666..17e4393d46c 100644 --- a/sql/wsrep_sst.cc +++ b/sql/wsrep_sst.cc @@ -702,49 +702,30 @@ err: return NULL; } -#define WSREP_SST_AUTH_ENV "WSREP_SST_OPT_AUTH" +#define WSREP_SST_AUTH_ENV "WSREP_SST_OPT_AUTH" +#define WSREP_SST_REMOTE_AUTH_ENV "WSREP_SST_OPT_REMOTE_AUTH" +#define DATA_HOME_DIR_ENV "INNODB_DATA_HOME_DIR" -static int sst_append_auth_env(wsp::env& env, const char* sst_auth) +static int sst_append_env_var(wsp::env& env, + const char* const var, + const char* const val) { - int const sst_auth_size= strlen(WSREP_SST_AUTH_ENV) + 1 /* = */ - + (sst_auth ? strlen(sst_auth) : 0) + 1 /* \0 */; + int const env_str_size= strlen(var) + 1 /* = */ + + (val ? strlen(val) : 0) + 1 /* \0 */; - wsp::string sst_auth_str(sst_auth_size); // for automatic cleanup on return - if (!sst_auth_str()) return -ENOMEM; + wsp::string env_str(env_str_size); // for automatic cleanup on return + if (!env_str()) return -ENOMEM; - int ret= snprintf(sst_auth_str(), sst_auth_size, "%s=%s", - WSREP_SST_AUTH_ENV, sst_auth ? sst_auth : ""); + int ret= snprintf(env_str(), env_str_size, "%s=%s", var, val ? val : ""); - if (ret < 0 || ret >= sst_auth_size) + if (ret < 0 || ret >= env_str_size) { - WSREP_ERROR("sst_append_auth_env(): snprintf() failed: %d", ret); + WSREP_ERROR("sst_append_env_var(): snprintf(%s=%s) failed: %d", + var, val, ret); return (ret < 0 ? ret : -EMSGSIZE); } - env.append(sst_auth_str()); - return -env.error(); -} - -#define DATA_HOME_DIR_ENV "INNODB_DATA_HOME_DIR" - -static int sst_append_data_dir(wsp::env& env, const char* data_dir) -{ - int const data_dir_size= strlen(DATA_HOME_DIR_ENV) + 1 /* = */ - + (data_dir ? strlen(data_dir) : 0) + 1 /* \0 */; - - wsp::string data_dir_str(data_dir_size); // for automatic cleanup on return - if (!data_dir_str()) return -ENOMEM; - - int ret= snprintf(data_dir_str(), data_dir_size, "%s=%s", - DATA_HOME_DIR_ENV, data_dir ? data_dir : ""); - - if (ret < 0 || ret >= data_dir_size) - { - WSREP_ERROR("sst_append_data_dir(): snprintf() failed: %d", ret); - return (ret < 0 ? ret : -EMSGSIZE); - } - - env.append(data_dir_str()); + env.append(env_str()); return -env.error(); } @@ -1155,7 +1136,7 @@ static ssize_t sst_prepare_other (const char* method, return -env.error(); } - if ((ret= sst_append_auth_env(env, sst_auth))) + if ((ret= sst_append_env_var(env, WSREP_SST_AUTH_ENV, sst_auth))) { WSREP_ERROR("sst_prepare_other(): appending auth failed: %d", ret); return ret; @@ -1163,7 +1144,7 @@ static ssize_t sst_prepare_other (const char* method, if (data_home_dir) { - if ((ret= sst_append_data_dir(env, data_home_dir))) + if ((ret= sst_append_env_var(env, DATA_HOME_DIR_ENV, data_home_dir))) { WSREP_ERROR("sst_prepare_other(): appending data " "directory failed: %d", ret); @@ -1491,6 +1472,8 @@ static int sst_donate_mysqldump (const char* addr, wsrep::seqno::undefined())); Wsrep_server_state::instance().sst_sent(sst_sent_gtid, ret); + wsrep_donor_monitor_end(); + return ret; } @@ -1555,7 +1538,7 @@ static int run_sql_command(THD *thd, const char *query) return -1; } - mysql_parse(thd, thd->query(), thd->query_length(), &ps, FALSE, FALSE); + mysql_parse(thd, thd->query(), thd->query_length(), &ps); if (thd->is_error()) { int const err= thd->get_stmt_da()->sql_errno(); @@ -1872,6 +1855,7 @@ static int sst_donate_other (const char* method, "wsrep_sst_%s " WSREP_SST_OPT_ROLE " 'donor' " WSREP_SST_OPT_ADDR " '%s' " + WSREP_SST_OPT_LPORT " '%u' " WSREP_SST_OPT_SOCKET " '%s' " WSREP_SST_OPT_DATA " '%s' " "%s" @@ -1880,7 +1864,8 @@ static int sst_donate_other (const char* method, "%s" "%s" "%s", - method, addr, mysqld_unix_port, mysql_real_data_home, + method, addr, mysqld_port, mysqld_unix_port, + mysql_real_data_home, wsrep_defaults_file, uuid_oss.str().c_str(), gtid.seqno().get(), wsrep_gtid_server.domain_id, binlog_opt_val, binlog_index_opt_val, @@ -1969,7 +1954,21 @@ int wsrep_sst_donate(const std::string& msg, const char* data= method + method_len + 1; - if (check_request_str(data, address_char)) + /* check for auth@addr separator */ + const char* addr= strrchr(data, '@'); + wsp::string remote_auth; + if (addr) + { + remote_auth.set(strndup(data, addr - data)); + addr++; + } + else + { + // no auth part + addr= data; + } + + if (check_request_str(addr, address_char)) { WSREP_ERROR("Bad SST address string. SST canceled."); return WSREP_CB_FAILURE; @@ -1983,15 +1982,25 @@ int wsrep_sst_donate(const std::string& msg, } int ret; - if ((ret= sst_append_auth_env(env, sst_auth_real))) + if ((ret= sst_append_env_var(env, WSREP_SST_AUTH_ENV, sst_auth_real))) { WSREP_ERROR("wsrep_sst_donate_cb(): appending auth env failed: %d", ret); return WSREP_CB_FAILURE; } + if (remote_auth()) + { + if ((ret= sst_append_env_var(env, WSREP_SST_REMOTE_AUTH_ENV,remote_auth()))) + { + WSREP_ERROR("wsrep_sst_donate_cb(): appending remote auth env failed: " + "%d", ret); + return WSREP_CB_FAILURE; + } + } + if (data_home_dir) { - if ((ret= sst_append_data_dir(env, data_home_dir))) + if ((ret= sst_append_env_var(env, DATA_HOME_DIR_ENV, data_home_dir))) { WSREP_ERROR("wsrep_sst_donate_cb(): appending data " "directory failed: %d", ret); @@ -2013,11 +2022,11 @@ int wsrep_sst_donate(const std::string& msg, if (!strcmp (WSREP_SST_MYSQLDUMP, method)) { - ret= sst_donate_mysqldump(data, current_gtid, bypass, env()); + ret= sst_donate_mysqldump(addr, current_gtid, bypass, env()); } else { - ret= sst_donate_other(method, data, current_gtid, bypass, env()); + ret= sst_donate_other(method, addr, current_gtid, bypass, env()); } return (ret >= 0 ? 0 : 1); diff --git a/sql/wsrep_thd.cc b/sql/wsrep_thd.cc index 15fd87eda5d..e4e598d472a 100644 --- a/sql/wsrep_thd.cc +++ b/sql/wsrep_thd.cc @@ -348,8 +348,23 @@ bool wsrep_bf_abort(const THD* bf_thd, THD* victim_thd) if (WSREP(victim_thd) && !victim_thd->wsrep_trx().active()) { - WSREP_DEBUG("wsrep_bf_abort, BF abort for non active transaction"); - wsrep_start_transaction(victim_thd, victim_thd->wsrep_next_trx_id()); + WSREP_DEBUG("wsrep_bf_abort, BF abort for non active transaction." + " Victim state %s bf state %s", + wsrep::to_c_string(victim_thd->wsrep_trx().state()), + wsrep::to_c_string(bf_thd->wsrep_trx().state())); + + switch (victim_thd->wsrep_trx().state()) { + case wsrep::transaction::s_aborting: /* fall through */ + case wsrep::transaction::s_aborted: + WSREP_DEBUG("victim is aborting or has aborted"); + break; + default: break; + } + /* victim may not have started transaction yet in wsrep context, but it may + have acquired MDL locks (due to DDL execution), and this has caused BF conflict. + such case does not require aborting in wsrep or replication provider state. + */ + return false; } bool ret; @@ -359,6 +374,7 @@ bool wsrep_bf_abort(const THD* bf_thd, THD* victim_thd) } else { + DBUG_ASSERT(WSREP(victim_thd) ? victim_thd->wsrep_trx().active() : 1); ret= victim_thd->wsrep_cs().bf_abort(bf_seqno); } if (ret) diff --git a/sql/wsrep_var.cc b/sql/wsrep_var.cc index 3c557554cf4..386da305a26 100644 --- a/sql/wsrep_var.cc +++ b/sql/wsrep_var.cc @@ -92,19 +92,15 @@ static bool refresh_provider_options() } } -void wsrep_set_wsrep_on() +void wsrep_set_wsrep_on(THD* thd) { + if (thd) + thd->wsrep_was_on= WSREP_ON_; WSREP_PROVIDER_EXISTS_= wsrep_provider && strncasecmp(wsrep_provider, WSREP_NONE, FN_REFLEN); WSREP_ON_= global_system_variables.wsrep_on && WSREP_PROVIDER_EXISTS_; } -/* This is intentionally declared as a weak global symbol, so that -linking will succeed even if the server is built with a dynamically -linked InnoDB. */ -ulong innodb_lock_schedule_algorithm __attribute__((weak)); -struct handlerton* innodb_hton_ptr __attribute__((weak)); - bool wsrep_on_update (sys_var *self, THD* thd, enum_var_type var_type) { if (var_type == OPT_GLOBAL) @@ -134,7 +130,7 @@ bool wsrep_on_update (sys_var *self, THD* thd, enum_var_type var_type) thd->variables.wsrep_on= global_system_variables.wsrep_on= saved_wsrep_on; } - wsrep_set_wsrep_on(); + wsrep_set_wsrep_on(thd); if (var_type == OPT_GLOBAL) { @@ -158,14 +154,6 @@ bool wsrep_on_check(sys_var *self, THD* thd, set_var* var) if (new_wsrep_on) { - if (innodb_hton_ptr && innodb_lock_schedule_algorithm != 0) - { - my_message(ER_WRONG_ARGUMENTS, " WSREP (galera) can't be enabled " - "if innodb_lock_schedule_algorithm=VATS. Please configure" - " innodb_lock_schedule_algorithm=FCFS and restart.", MYF(0)); - return true; - } - if (!WSREP_PROVIDER_EXISTS) { my_message(ER_WRONG_ARGUMENTS, "WSREP (galera) can't be enabled " @@ -520,7 +508,7 @@ bool wsrep_provider_update (sys_var *self, THD* thd, enum_var_type type) if (!rcode) refresh_provider_options(); - wsrep_set_wsrep_on(); + wsrep_set_wsrep_on(thd); mysql_mutex_lock(&LOCK_global_system_variables); return rcode; @@ -540,7 +528,7 @@ void wsrep_provider_init (const char* value) if (wsrep_provider) my_free((void *)wsrep_provider); wsrep_provider= my_strdup(PSI_INSTRUMENT_MEM, value, MYF(0)); - wsrep_set_wsrep_on(); + wsrep_set_wsrep_on(NULL); } bool wsrep_provider_options_check(sys_var *self, THD* thd, set_var* var) @@ -978,6 +966,11 @@ bool wsrep_max_ws_size_update(sys_var *self, THD *thd, enum_var_type) return refresh_provider_options(); } +bool wsrep_mode_check(sys_var *self, THD* thd, set_var* var) +{ + return false; +} + #if UNUSED /* eaec266eb16c (Sergei Golubchik 2014-09-28) */ static SHOW_VAR wsrep_status_vars[]= { @@ -1106,3 +1099,25 @@ bool wsrep_gtid_domain_id_update(sys_var* self, THD *thd, enum_var_type) wsrep_gtid_server.domain_id= wsrep_gtid_domain_id; return false; } + +bool wsrep_strict_ddl_update(sys_var *self, THD* thd, enum_var_type var_type) +{ + // In case user still sets wsrep_strict_ddl we set new + // option to wsrep_mode + if (wsrep_strict_ddl) + wsrep_mode|= WSREP_MODE_STRICT_REPLICATION; + else + wsrep_mode&= (~WSREP_MODE_STRICT_REPLICATION); + return false; +} + +bool wsrep_replicate_myisam_update(sys_var *self, THD* thd, enum_var_type var_type) +{ + // In case user still sets wsrep_replicate_myisam we set new + // option to wsrep_mode + if (wsrep_replicate_myisam) + wsrep_mode|= WSREP_MODE_REPLICATE_MYISAM; + else + wsrep_mode&= (~WSREP_MODE_REPLICATE_MYISAM); + return false; +} diff --git a/sql/wsrep_var.h b/sql/wsrep_var.h index 997784674dd..7908e873795 100644 --- a/sql/wsrep_var.h +++ b/sql/wsrep_var.h @@ -36,7 +36,7 @@ class set_var; class THD; int wsrep_init_vars(); -void wsrep_set_wsrep_on(); +void wsrep_set_wsrep_on(THD *thd); #define CHECK_ARGS (sys_var *self, THD* thd, set_var *var) #define UPDATE_ARGS (sys_var *self, THD* thd, enum_var_type type) @@ -107,6 +107,10 @@ extern bool wsrep_debug_update UPDATE_ARGS; extern bool wsrep_gtid_seq_no_check CHECK_ARGS; extern bool wsrep_gtid_domain_id_update UPDATE_ARGS; + +extern bool wsrep_mode_check CHECK_ARGS; +extern bool wsrep_strict_ddl_update UPDATE_ARGS; +extern bool wsrep_replicate_myisam_update UPDATE_ARGS; #else /* WITH_WSREP */ #define wsrep_provider_init(X) |