diff options
Diffstat (limited to 'sql')
58 files changed, 2575 insertions, 1751 deletions
diff --git a/sql/CMakeLists.txt b/sql/CMakeLists.txt index b9cd418f295..39307ab8183 100644 --- a/sql/CMakeLists.txt +++ b/sql/CMakeLists.txt @@ -146,7 +146,6 @@ SET (SQL_SOURCE opt_index_cond_pushdown.cc opt_subselect.cc opt_table_elimination.cc sql_expression_cache.cc gcalc_slicescan.cc gcalc_tools.cc - ../sql-common/mysql_async.c my_apc.cc mf_iocache_encr.cc item_jsonfunc.cc my_json_writer.cc rpl_gtid.cc rpl_parallel.cc @@ -179,7 +178,7 @@ IF ((CMAKE_SYSTEM_NAME MATCHES "Linux" OR AND (NOT DISABLE_THREADPOOL)) ADD_DEFINITIONS(-DHAVE_POOL_OF_THREADS) IF(WIN32) - SET(SQL_SOURCE ${SQL_SOURCE} threadpool_win.cc) + SET(SQL_SOURCE ${SQL_SOURCE} threadpool_win.cc threadpool_winsockets.cc threadpool_winsockets.h) ENDIF() SET(SQL_SOURCE ${SQL_SOURCE} threadpool_generic.cc) SET(SQL_SOURCE ${SQL_SOURCE} threadpool_common.cc) @@ -187,7 +186,7 @@ IF ((CMAKE_SYSTEM_NAME MATCHES "Linux" OR ENDIF() IF(WIN32) - SET(SQL_SOURCE ${SQL_SOURCE} handle_connections_win.cc nt_servc.cc) + SET(SQL_SOURCE ${SQL_SOURCE} handle_connections_win.cc) ENDIF() MYSQL_ADD_PLUGIN(partition ha_partition.cc STORAGE_ENGINE DEFAULT STATIC_ONLY @@ -217,7 +216,7 @@ FOREACH(se aria partition perfschema sql_sequence wsrep) ENDFOREACH() IF(WIN32) - SET(MYSQLD_SOURCE main.cc message.rc) + SET(MYSQLD_SOURCE winmain.cc message.rc) ELSE() SET(MYSQLD_SOURCE main.cc ${DTRACE_PROBES_ALL}) ENDIF() diff --git a/sql/filesort.cc b/sql/filesort.cc index 4eea588007e..0337325b544 100644 --- a/sql/filesort.cc +++ b/sql/filesort.cc @@ -251,6 +251,9 @@ SORT_INFO *filesort(THD *thd, TABLE *table, Filesort *filesort, param.init_for_filesort(sort_len, table, max_rows, filesort->sort_positions); + param.set_all_read_bits= filesort->set_all_read_bits; + param.unpack= filesort->unpack; + sort->addon_fields= param.addon_fields; sort->sort_keys= param.sort_keys; @@ -884,6 +887,8 @@ static ha_rows find_all_keys(THD *thd, Sort_param *param, SQL_SELECT *select, goto err; } + if (param->set_all_read_bits) + sort_form->column_bitmaps_set(save_read_set, save_write_set); DEBUG_SYNC(thd, "after_index_merge_phase1"); for (;;) @@ -891,7 +896,11 @@ static ha_rows find_all_keys(THD *thd, Sort_param *param, SQL_SELECT *select, if (quick_select) error= select->quick->get_next(); else /* Not quick-select */ + { error= file->ha_rnd_next(sort_form->record[0]); + if (param->unpack) + param->unpack(sort_form); + } if (unlikely(error)) break; file->position(sort_form->record[0]); diff --git a/sql/filesort.h b/sql/filesort.h index 9f71da02c96..29ae5e20cc6 100644 --- a/sql/filesort.h +++ b/sql/filesort.h @@ -62,6 +62,13 @@ public: Filesort_tracker *tracker; Sort_keys *sort_keys; + /* + TRUE means all the fields of table of whose bitmap read_set is set + need to be read while reading records in the sort buffer. + FALSE otherwise + */ + bool set_all_read_bits; + Filesort(ORDER *order_arg, ha_rows limit_arg, bool sort_positions_arg, SQL_SELECT *select_arg): order(order_arg), @@ -71,7 +78,9 @@ public: own_select(false), using_pq(false), sort_positions(sort_positions_arg), - sort_keys(NULL) + sort_keys(NULL), + set_all_read_bits(FALSE), + unpack(NULL) { DBUG_ASSERT(order); }; @@ -79,6 +88,8 @@ public: ~Filesort() { cleanup(); } /* Prepare ORDER BY list for sorting. */ Sort_keys* make_sortorder(THD *thd, JOIN *join, table_map first_table_bit); + /* Unpack temp table columns to base table columns*/ + void (*unpack)(TABLE *); private: void cleanup(); diff --git a/sql/ha_partition.cc b/sql/ha_partition.cc index cdfde2cc3ee..7527015682f 100644 --- a/sql/ha_partition.cc +++ b/sql/ha_partition.cc @@ -9183,6 +9183,7 @@ int ha_partition::extra(enum ha_extra_function operation) case HA_EXTRA_BEGIN_ALTER_COPY: case HA_EXTRA_END_ALTER_COPY: case HA_EXTRA_FAKE_START_STMT: + case HA_EXTRA_IGNORE_INSERT: DBUG_RETURN(loop_partitions(extra_cb, &operation)); default: { diff --git a/sql/handle_connections_win.cc b/sql/handle_connections_win.cc index b61130dd6e9..debbce998fa 100644 --- a/sql/handle_connections_win.cc +++ b/sql/handle_connections_win.cc @@ -26,7 +26,6 @@ #include <handle_connections_win.h> /* From mysqld.cc */ -extern HANDLE hEventShutdown; extern MYSQL_SOCKET base_ip_sock, extra_ip_sock; #ifdef HAVE_POOL_OF_THREADS extern PTP_CALLBACK_ENVIRON get_threadpool_win_callback_environ(); @@ -510,6 +509,18 @@ struct Pipe_Listener : public Listener } }; + /* The shutdown event, which is set whenever*/ +static void create_shutdown_event() +{ + char shutdown_event_name[40]; + sprintf_s(shutdown_event_name, "MySQLShutdown%u", GetCurrentProcessId()); + if (!(hEventShutdown= CreateEvent(0, FALSE, FALSE, shutdown_event_name))) + { + sql_print_error("Can't create shutdown event, Windows error %u", GetLastError()); + unireg_abort(1); + } +} + /** Accept new client connections on Windows. @@ -572,9 +583,9 @@ void network_init_win() void handle_connections_win() { - DBUG_ASSERT(hEventShutdown); int n_waits; + create_shutdown_event(); wait_events[SHUTDOWN_IDX]= hEventShutdown; n_waits= 1; @@ -589,6 +600,8 @@ void handle_connections_win() all_listeners[i]->begin_accept(); } + mysqld_win_set_startup_complete(); + for (;;) { DWORD idx = WaitForMultipleObjects(n_waits ,wait_events, FALSE, INFINITE); @@ -600,6 +613,8 @@ void handle_connections_win() all_listeners[idx - LISTENER_START_IDX]->completion_callback(); } + mysqld_win_initiate_shutdown(); + /* Cleanup */ for (int i= 0; i < n_listeners; i++) { diff --git a/sql/handler.h b/sql/handler.h index b0a90f24494..777a6b455ef 100644 --- a/sql/handler.h +++ b/sql/handler.h @@ -535,7 +535,7 @@ enum legacy_db_type DB_TYPE_PERFORMANCE_SCHEMA=28, DB_TYPE_S3=41, DB_TYPE_ARIA=42, - DB_TYPE_TOKUDB=43, + DB_TYPE_TOKUDB=43, /* disabled in MariaDB Server 10.5, removed in 10.6 */ DB_TYPE_SEQUENCE=44, DB_TYPE_FIRST_DYNAMIC=45, DB_TYPE_DEFAULT=127 // Must be last diff --git a/sql/log_event_server.cc b/sql/log_event_server.cc index 6dee9b9adf6..55ad50511ec 100644 --- a/sql/log_event_server.cc +++ b/sql/log_event_server.cc @@ -1903,8 +1903,7 @@ int Query_log_event::do_apply_event(rpl_group_info *rgi, thd->variables.sql_log_slow= !MY_TEST(global_system_variables.log_slow_disabled_statements & LOG_SLOW_DISABLE_SLAVE); } - mysql_parse(thd, thd->query(), thd->query_length(), &parser_state, - FALSE, FALSE); + mysql_parse(thd, thd->query(), thd->query_length(), &parser_state); /* Finalize server status flags after executing a statement. */ thd->update_server_status(); log_slow_statement(thd); diff --git a/sql/mysql_install_db.cc b/sql/mysql_install_db.cc index 35e24a521e4..f712e29b843 100644 --- a/sql/mysql_install_db.cc +++ b/sql/mysql_install_db.cc @@ -26,9 +26,13 @@ #include <shellapi.h> #include <accctrl.h> #include <aclapi.h> +#include <ntsecapi.h> +#include <sddl.h> struct IUnknown; #include <shlwapi.h> +#include <string> + #define USAGETEXT \ "mysql_install_db.exe Ver 1.00 for Windows\n" \ "Copyright (C) 2010-2011 Monty Program Ab & Vladislav Vaintroub\n" \ @@ -39,9 +43,8 @@ struct IUnknown; extern "C" const char* mysql_bootstrap_sql[]; -static char default_os_user[]= "NT AUTHORITY\\NetworkService"; static char default_datadir[MAX_PATH]; -static int create_db_instance(); +static int create_db_instance(const char *datadir); static uint opt_silent; static char datadir_buffer[FN_REFLEN]; static char mysqld_path[FN_REFLEN]; @@ -51,13 +54,13 @@ static char *opt_password; static int opt_port; static int opt_innodb_page_size; static char *opt_socket; -static char *opt_os_user; -static char *opt_os_password; static my_bool opt_default_user; static my_bool opt_allow_remote_root_access; static my_bool opt_skip_networking; static my_bool opt_verbose_bootstrap; static my_bool verbose_errors; +static my_bool opt_large_pages; +static char *opt_config; #define DEFAULT_INNODB_PAGE_SIZE 16*1024 @@ -73,14 +76,14 @@ static struct my_option my_long_options[]= &opt_password, &opt_password, 0, GET_STR, REQUIRED_ARG, 0, 0, 0, 0, 0, 0}, {"port", 'P', "mysql port", &opt_port, &opt_port, 0, GET_INT, REQUIRED_ARG, 0, 0, 0, 0, 0, 0}, - {"socket", 'W', + {"socket", 'W', "named pipe name (if missing, it will be set the same as service)", &opt_socket, &opt_socket, 0, GET_STR, REQUIRED_ARG, 0, 0, 0, 0, 0, 0}, {"default-user", 'D', "Create default user", &opt_default_user, &opt_default_user, 0 , GET_BOOL, OPT_ARG, 0, 0, 0, 0, 0, 0}, - {"allow-remote-root-access", 'R', + {"allow-remote-root-access", 'R', "Allows remote access from network for user root", - &opt_allow_remote_root_access, &opt_allow_remote_root_access, 0 , GET_BOOL, + &opt_allow_remote_root_access, &opt_allow_remote_root_access, 0 , GET_BOOL, OPT_ARG, 0, 0, 0, 0, 0, 0}, {"skip-networking", 'N', "Do not use TCP connections, use pipe instead", &opt_skip_networking, &opt_skip_networking, 0 , GET_BOOL, OPT_ARG, 0, 0, 0, 0, @@ -91,6 +94,10 @@ static struct my_option my_long_options[]= &opt_silent, 0, GET_BOOL, NO_ARG, 0, 0, 0, 0, 0, 0}, {"verbose-bootstrap", 'o', "Include mysqld bootstrap output",&opt_verbose_bootstrap, &opt_verbose_bootstrap, 0, GET_BOOL, NO_ARG, 0, 0, 0, 0, 0, 0}, + { "large-pages",'l', "Use large pages", &opt_large_pages, + &opt_large_pages, 0, GET_BOOL, NO_ARG, 0, 0, 0, 0, 0, 0}, + {"config",'c', "my.ini config template file", &opt_config, + &opt_config, 0, GET_STR, REQUIRED_ARG, 0, 0, 0, 0, 0, 0}, {0, 0, 0, 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0} }; @@ -135,7 +142,7 @@ ATTRIBUTE_NORETURN static void die(const char *fmt, ...) } -static void verbose(const char *fmt, ...) +static void verbose( const char *fmt, ...) { va_list args; @@ -150,15 +157,16 @@ static void verbose(const char *fmt, ...) va_end(args); } +static char full_config_path[MAX_PATH]; int main(int argc, char **argv) { int error; - char self_name[FN_REFLEN]; + char self_name[MAX_PATH]; char *p; - + char *datadir = NULL; MY_INIT(argv[0]); - GetModuleFileName(NULL, self_name, FN_REFLEN); + GetModuleFileName(NULL, self_name, MAX_PATH); strcpy(mysqld_path,self_name); p= strrchr(mysqld_path, FN_LIBCHAR); if (p) @@ -168,7 +176,56 @@ int main(int argc, char **argv) if ((error= handle_options(&argc, &argv, my_long_options, get_one_option))) exit(error); - if (!opt_datadir) + + if (opt_config != 0 && _access(opt_config, 04) != 0) + { + int err= errno; + switch(err) + { + case EACCES: + die("File %s can't be read", opt_config); + break; + case ENOENT: + die("File %s does not exist", opt_config); + break; + default: + die("Can't access file %s, errno %d",opt_config, err); + break; + } + } + if (opt_config) + { + DWORD dwret = GetFullPathName(opt_config, sizeof(full_config_path), full_config_path, NULL); + if (dwret == 0) + { + die("GetFullPathName failed, last error %u", GetLastError()); + } + else if (dwret > sizeof(full_config_path)) + { + die("Can't resolve the config file name, path too large"); + } + opt_config= full_config_path; + } + + if(opt_datadir) + datadir = opt_datadir; + + if (!datadir && opt_config) + { + for(auto section : {"server","mysqld"}) + { + auto ret = GetPrivateProfileStringA(section,"datadir", NULL, default_datadir, + sizeof(default_datadir)-1, opt_config); + if (ret) + { + datadir= default_datadir; + printf("Data directory (from config file) is %s\n",datadir); + break; + } + } + } + + if (!datadir) { /* Figure out default data directory. It "data" directory, next to "bin" directory, where @@ -189,31 +246,32 @@ int main(int argc, char **argv) my_print_help(my_long_options); } strcat_s(default_datadir, "\\data"); - opt_datadir= default_datadir; - printf("Default data directory is %s\n",opt_datadir); + datadir= default_datadir; + printf("Default data directory is %s\n",datadir); } + DBUG_ASSERT(datadir); + /* Print some help on errors */ verbose_errors= TRUE; - if (!opt_os_user) - { - opt_os_user= default_os_user; - opt_os_password= NULL; - } /* Workaround WiX bug (strip possible quote character at the end of path) */ - size_t len= strlen(opt_datadir); + size_t len= strlen(datadir); if (len > 0) { - if (opt_datadir[len-1] == '"') + if (datadir[len-1] == '"') + { + datadir[len-1]= 0; + } + if (datadir[0] == '"') { - opt_datadir[len-1]= 0; + datadir++; } } - GetFullPathName(opt_datadir, FN_REFLEN, datadir_buffer, NULL); - opt_datadir= datadir_buffer; + GetFullPathName(datadir, FN_REFLEN, datadir_buffer, NULL); + datadir= datadir_buffer; - if (create_db_instance()) + if (create_db_instance(datadir)) { die("database creation failed"); } @@ -279,19 +337,37 @@ static char *get_plugindir() static char *init_bootstrap_command_line(char *cmdline, size_t size) { - char basedir[MAX_PATH]; - get_basedir(basedir, sizeof(basedir), mysqld_path); - - my_snprintf(cmdline, size - 1, - "\"\"%s\" --no-defaults %s --innodb-page-size=%d --bootstrap" - " \"--lc-messages-dir=%s/share\"" - " --basedir=. --datadir=. --default-storage-engine=myisam" - " --max_allowed_packet=9M " - " --net-buffer-length=16k\"", mysqld_path, - opt_verbose_bootstrap ? "--console" : "", opt_innodb_page_size, basedir); + snprintf(cmdline, size - 1, + "\"\"%s\"" + " --defaults-file=my.ini" + " %s" + " --bootstrap" + " --datadir=." + " --loose-innodb-buffer-pool-size=10M" + "\"" + , mysqld_path, opt_verbose_bootstrap ? "--console" : ""); return cmdline; } +static char my_ini_path[MAX_PATH]; + +static void write_myini_str(const char *key, const char* val, const char *section="mysqld") +{ + DBUG_ASSERT(my_ini_path[0]); + if (!WritePrivateProfileString(section, key, val, my_ini_path)) + { + die("Can't write to ini file key=%s, val=%s, section=%s, Windows error %u",key,val,section, + GetLastError()); + } +} + + +static void write_myini_int(const char* key, int val, const char* section = "mysqld") +{ + char buf[10]; + itoa(val, buf, 10); + write_myini_str(key, buf, section); +} /** Create my.ini in current directory (this is assumed to be @@ -305,59 +381,63 @@ static int create_myini() char path_buf[MAX_PATH]; GetCurrentDirectory(MAX_PATH, path_buf); - - /* Create ini file. */ - FILE *myini= fopen("my.ini","wt"); - if (!myini) + snprintf(my_ini_path,sizeof(my_ini_path), "%s\\my.ini", path_buf); + if (opt_config) { - die("Can't create my.ini in data directory"); + if (!CopyFile(opt_config, my_ini_path,TRUE)) + { + die("Can't copy %s to my.ini , last error %lu", opt_config, GetLastError()); + } } /* Write out server settings. */ - fprintf(myini, "[mysqld]\n"); convert_slashes(path_buf); - fprintf(myini, "datadir=%s\n", path_buf); + write_myini_str("datadir",path_buf); + if (opt_skip_networking) { - fprintf(myini,"skip-networking\n"); + write_myini_str("skip-networking","ON"); if (!opt_socket) opt_socket= opt_service; } - enable_named_pipe= (my_bool) + enable_named_pipe= (my_bool) ((opt_socket && opt_socket[0]) || opt_skip_networking); if (enable_named_pipe) { - fprintf(myini,"named-pipe=ON\n"); + write_myini_str("named-pipe","ON"); } if (opt_socket && opt_socket[0]) { - fprintf(myini, "socket=%s\n", opt_socket); + write_myini_str("socket", opt_socket); } if (opt_port) { - fprintf(myini,"port=%d\n", opt_port); + write_myini_int("port", opt_port); } if (opt_innodb_page_size != DEFAULT_INNODB_PAGE_SIZE) { - fprintf(myini, "innodb-page-size=%d\n", opt_innodb_page_size); + write_myini_int("innodb-page-size", opt_innodb_page_size); + } + if (opt_large_pages) + { + write_myini_str("large-pages","ON"); } + /* Write out client settings. */ - fprintf(myini, "[client]\n"); /* Used for named pipes */ if (opt_socket && opt_socket[0]) - fprintf(myini,"socket=%s\n",opt_socket); + write_myini_str("socket",opt_socket,"client"); if (opt_skip_networking) - fprintf(myini,"protocol=pipe\n"); + write_myini_str("protocol", "pipe", "client"); else if (opt_port) - fprintf(myini,"port=%d\n",opt_port); + write_myini_int("port",opt_port,"client"); char *plugin_dir = get_plugindir(); if (plugin_dir) - fprintf(myini, "plugin-dir=%s\n", plugin_dir); - fclose(myini); + write_myini_str("plugin-dir", plugin_dir, "client"); return 0; } @@ -380,22 +460,92 @@ static const char allow_remote_root_access_cmd[]= "DROP TABLE tmp_user;\n"; static const char end_of_script[]="-- end."; +/* +Add or remove privilege for a user +@param[in] account_name - user name, Windows style, e.g "NT SERVICE\mariadb", or ".\joe" +@param[in] privilege name - standard Windows privilege name, e.g "SeLockMemoryPrivilege" +@param[in] add - when true, add privilege, otherwise remove it + +In special case where privilege name is NULL, and add is false +all privileges for the user are removed. +*/ +static int handle_user_privileges(const char *account_name, const wchar_t *privilege_name, bool add) +{ + LSA_OBJECT_ATTRIBUTES attr{}; + LSA_HANDLE lsa_handle; + auto status= LsaOpenPolicy( + 0, &attr, POLICY_LOOKUP_NAMES | POLICY_CREATE_ACCOUNT, &lsa_handle); + if (status) + { + verbose("LsaOpenPolicy returned %lu", LsaNtStatusToWinError(status)); + return 1; + } + BYTE sidbuf[SECURITY_MAX_SID_SIZE]; + PSID sid= (PSID) sidbuf; + SID_NAME_USE name_use; + char domain_name[256]; + DWORD cbSid= sizeof(sidbuf); + DWORD cbDomain= sizeof(domain_name); + BOOL ok= LookupAccountNameA(0, account_name, sid, &cbSid, domain_name, + &cbDomain, &name_use); + if (!ok) + { + verbose("LsaOpenPolicy returned %lu", LsaNtStatusToWinError(status)); + return 1; + } + + if (privilege_name) + { + LSA_UNICODE_STRING priv{}; + priv.Buffer= (PWSTR) privilege_name; + priv.Length= (USHORT) wcslen(privilege_name) * sizeof(wchar_t); + priv.MaximumLength= priv.Length; + if (add) + { + status= LsaAddAccountRights(lsa_handle, sid, &priv, 1); + if (status) + { + verbose("LsaAddAccountRights returned %lu/%lu", status, + LsaNtStatusToWinError(status)); + return 1; + } + } + else + { + status= LsaRemoveAccountRights(lsa_handle, sid, FALSE, &priv, 1); + if (status) + { + verbose("LsaRemoveRights returned %lu/%lu", + LsaNtStatusToWinError(status)); + return 1; + } + } + } + else + { + DBUG_ASSERT(!add); + status= LsaRemoveAccountRights(lsa_handle, sid, TRUE, 0, 0); + } + LsaClose(lsa_handle); + return 0; +} + /* Register service. Assume my.ini is in datadir */ -static int register_service() +static int register_service(const char *datadir, const char *user, const char *passwd) { char buf[3*MAX_PATH +32]; /* path to mysqld.exe, to my.ini, service name */ SC_HANDLE sc_manager, sc_service; - size_t datadir_len= strlen(opt_datadir); + size_t datadir_len= strlen(datadir); const char *backslash_after_datadir= "\\"; - if (datadir_len && opt_datadir[datadir_len-1] == '\\') + if (datadir_len && datadir[datadir_len-1] == '\\') backslash_after_datadir= ""; verbose("Registering service '%s'", opt_service); my_snprintf(buf, sizeof(buf)-1, - "\"%s\" \"--defaults-file=%s%smy.ini\" \"%s\"" , mysqld_path, opt_datadir, + "\"%s\" \"--defaults-file=%s%smy.ini\" \"%s\"" , mysqld_path, datadir, backslash_after_datadir, opt_service); /* Get a handle to the SCM database. */ @@ -408,7 +558,7 @@ static int register_service() /* Create the service. */ sc_service= CreateService(sc_manager, opt_service, opt_service, SERVICE_ALL_ACCESS, SERVICE_WIN32_OWN_PROCESS, SERVICE_AUTO_START, - SERVICE_ERROR_NORMAL, buf, NULL, NULL, NULL, opt_os_user, opt_os_password); + SERVICE_ERROR_NORMAL, buf, NULL, NULL, NULL, user, passwd); if (!sc_service) { @@ -549,7 +699,7 @@ static int set_directory_permissions(const char *dir, const char *os_user) /* Create database instance (including registering as service etc) .*/ -static int create_db_instance() +static int create_db_instance(const char *datadir) { int ret= 0; char cwd[MAX_PATH]; @@ -558,6 +708,8 @@ static int create_db_instance() FILE *in; bool created_datadir= false; DWORD last_error; + bool service_created= false; + std::string mysql_db_dir; verbose("Running bootstrap"); @@ -565,7 +717,7 @@ static int create_db_instance() /* Create datadir and datadir/mysql, if they do not already exist. */ - if (CreateDirectory(opt_datadir, NULL)) + if (CreateDirectory(datadir, NULL)) { created_datadir= true; } @@ -576,71 +728,91 @@ static int create_db_instance() { case ERROR_ACCESS_DENIED: die("Can't create data directory '%s' (access denied)\n", - opt_datadir); + datadir); break; case ERROR_PATH_NOT_FOUND: die("Can't create data directory '%s' " "(one or more intermediate directories do not exist)\n", - opt_datadir); + datadir); break; default: die("Can't create data directory '%s', last error %u\n", - opt_datadir, last_error); + datadir, last_error); break; } } - if (!SetCurrentDirectory(opt_datadir)) + if (!SetCurrentDirectory(datadir)) { last_error = GetLastError(); switch (last_error) { case ERROR_DIRECTORY: die("Can't set current directory to '%s', the path is not a valid directory \n", - opt_datadir); + datadir); break; default: die("Can' set current directory to '%s', last error %u\n", - opt_datadir, last_error); + datadir, last_error); break; } } - if (!PathIsDirectoryEmpty(opt_datadir)) + if (!PathIsDirectoryEmpty(datadir)) { - fprintf(stderr,"ERROR : Data directory %s is not empty." - " Only new or empty existing directories are accepted for --datadir\n",opt_datadir); + fprintf(stderr, "ERROR : Data directory %s is not empty." + " Only new or empty existing directories are accepted for --datadir\n", datadir); exit(1); } - if (!CreateDirectory("mysql",NULL)) + std::string service_user; + /* Register service if requested. */ + if (opt_service && opt_service[0]) { - last_error = GetLastError(); - DWORD attributes; - switch(last_error) - { - case ERROR_ACCESS_DENIED: - die("Can't create subdirectory 'mysql' in '%s' (access denied)\n",opt_datadir); - break; - case ERROR_ALREADY_EXISTS: - attributes = GetFileAttributes("mysql"); - - if (attributes == INVALID_FILE_ATTRIBUTES) - die("GetFileAttributes() failed for existing file '%s\\mysql', last error %u", - opt_datadir, GetLastError()); - else if (!(attributes & FILE_ATTRIBUTE_DIRECTORY)) - die("File '%s\\mysql' exists, but it is not a directory", opt_datadir); - - break; - } + /* Run service under virtual account NT SERVICE\service_name.*/ + service_user.append("NT SERVICE\\").append(opt_service); + ret = register_service(datadir, service_user.c_str(), NULL); + if (ret) + goto end; + service_created = true; + } + if (opt_large_pages) + { + handle_user_privileges(service_user.c_str(), L"SeLockMemoryPrivilege", true); + } + /* + Set data directory permissions for both current user and + the one who who runs services. + */ + set_directory_permissions(datadir, NULL); + if (!service_user.empty()) + { + set_directory_permissions(datadir, service_user.c_str()); } /* - Set data directory permissions for both current user and - default_os_user (the one who runs services). + Get security descriptor for the data directory. + It will be passed, as SDDL text, to the mysqld bootstrap subprocess, + to allow for correct subdirectory permissions. */ - set_directory_permissions(opt_datadir, NULL); - set_directory_permissions(opt_datadir, default_os_user); + PSECURITY_DESCRIPTOR pSD; + if (GetNamedSecurityInfoA(datadir, SE_FILE_OBJECT, DACL_SECURITY_INFORMATION, + 0, 0, 0, 0, &pSD) == ERROR_SUCCESS) + { + char* string_sd = NULL; + if (ConvertSecurityDescriptorToStringSecurityDescriptor(pSD, SDDL_REVISION_1, + DACL_SECURITY_INFORMATION, &string_sd, 0)) + { + _putenv_s("MARIADB_NEW_DIRECTORY_SDDL", string_sd); + LocalFree(string_sd); + } + LocalFree(pSD); + } + + /* Create my.ini file in data directory.*/ + ret = create_myini(); + if (ret) + goto end; /* Do mysqld --bootstrap. */ init_bootstrap_command_line(cmdline, sizeof(cmdline)); @@ -656,18 +828,23 @@ static int create_db_instance() { verbose("WARNING: Can't disable buffering on mysqld's stdin"); } - if (fwrite("use mysql;\n",11,1, in) != 1) + static const char *pre_bootstrap_sql[] = { "create database mysql;\n","use mysql;\n"}; + for (auto cmd : pre_bootstrap_sql) { - verbose("ERROR: Can't write to mysqld's stdin"); - ret= 1; - goto end; + /* Write the bootstrap script to stdin. */ + if (fwrite(cmd, strlen(cmd), 1, in) != 1) + { + verbose("ERROR: Can't write to mysqld's stdin"); + ret= 1; + goto end; + } } - int i; - for (i=0; mysql_bootstrap_sql[i]; i++) + for (int i= 0; mysql_bootstrap_sql[i]; i++) { + auto cmd = mysql_bootstrap_sql[i]; /* Write the bootstrap script to stdin. */ - if (fwrite(mysql_bootstrap_sql[i], strlen(mysql_bootstrap_sql[i]), 1, in) != 1) + if (fwrite(cmd, strlen(cmd), 1, in) != 1) { verbose("ERROR: Can't write to mysqld's stdin"); ret= 1; @@ -709,7 +886,7 @@ static int create_db_instance() } /* - On some reason, bootstrap chokes if last command sent via stdin ends with + On some reason, bootstrap chokes if last command sent via stdin ends with newline, so we supply a dummy comment, that does not end with newline. */ fputs(end_of_script, in); @@ -723,25 +900,37 @@ static int create_db_instance() goto end; } +end: + if (!ret) + return ret; - /* Create my.ini file in data directory.*/ - ret= create_myini(); - if (ret) - goto end; - - /* Register service if requested. */ - if (opt_service && opt_service[0]) + /* Cleanup after error.*/ + if (created_datadir) { - ret= register_service(); - if (ret) - goto end; + SetCurrentDirectory(cwd); + clean_directory(datadir); } -end: - if (ret) + if (service_created) { - SetCurrentDirectory(cwd); - clean_directory(opt_datadir); + auto sc_manager = OpenSCManager(NULL, NULL, SC_MANAGER_ALL_ACCESS); + if (sc_manager) + { + auto sc_handle= OpenServiceA(sc_manager,opt_service, DELETE); + if (sc_handle) + { + DeleteService(sc_handle); + CloseServiceHandle(sc_handle); + } + CloseServiceHandle(sc_manager); + } + + /*Remove all service user privileges for the user.*/ + if(strncmp(service_user.c_str(), "NT SERVICE\\", + sizeof("NT SERVICE\\")-1)) + { + handle_user_privileges(service_user.c_str(), 0, false); + } if (created_datadir) RemoveDirectory(opt_datadir); } diff --git a/sql/mysqld.cc b/sql/mysqld.cc index 48a09d15755..02f69e1d512 100644 --- a/sql/mysqld.cc +++ b/sql/mysqld.cc @@ -117,12 +117,15 @@ #include "sql_reload.h" // reload_acl_and_cache #include "sp_head.h" // init_sp_psi_keys +#include <mysqld_default_groups.h> + #ifdef HAVE_POLL_H #include <poll.h> #endif #ifdef _WIN32 #include <handle_connections_win.h> +#include <sddl.h> #endif #include <my_service_manager.h> @@ -368,7 +371,7 @@ my_bool locked_in_memory; bool opt_using_transactions; bool volatile abort_loop; uint volatile global_disable_checkpoint; -#if defined(_WIN32) && !defined(EMBEDDED_LIBRARY) +#if defined(_WIN32) ulong slow_start_timeout; #endif static MEM_ROOT startup_root; @@ -1398,21 +1401,10 @@ static pthread_t select_thread; /* OS specific variables */ -#ifdef __WIN__ -#undef getpid -#include <process.h> - -static bool start_mode=0, use_opt_args; -static int opt_argc; -static char **opt_argv; - -#if !defined(EMBEDDED_LIBRARY) +#ifdef _WIN32 HANDLE hEventShutdown; -static char shutdown_event_name[40]; -#include "nt_servc.h" -static NTService Service; ///< Service object for WinNT -#endif /* EMBEDDED_LIBRARY */ -#endif /* __WIN__ */ +#endif + #ifndef EMBEDDED_LIBRARY bool mysqld_embedded=0; @@ -1608,9 +1600,8 @@ static void break_connect_loop() abort_loop= 1; -#if defined(__WIN__) - if (!SetEvent(hEventShutdown)) - DBUG_PRINT("error", ("Got error: %ld from SetEvent", GetLastError())); +#if defined(_WIN32) + mysqld_win_initiate_shutdown(); #else /* Avoid waiting for ourselves when thread-handling=no-threads. */ if (pthread_equal(pthread_self(), select_thread)) @@ -1883,6 +1874,12 @@ extern "C" void unireg_abort(int exit_code) mysqld_exit(exit_code); } +#ifdef _WIN32 +typedef void (*report_svc_status_t)(DWORD current_state, DWORD win32_exit_code, + DWORD wait_hint); +static void dummy_svc_status(DWORD, DWORD, DWORD) {} +static report_svc_status_t my_report_svc_status= dummy_svc_status; +#endif static void mysqld_exit(int exit_code) { @@ -1913,6 +1910,9 @@ static void mysqld_exit(int exit_code) SAFEMALLOC_REPORT_MEMORY(0); } DBUG_LEAVE; +#ifdef _WIN32 + my_report_svc_status(SERVICE_STOPPED, exit_code, 0); +#endif sd_notify(0, "STATUS=MariaDB server is down"); exit(exit_code); /* purecov: inspected */ } @@ -2594,14 +2594,60 @@ void unlink_thd(THD *thd) } -/****************************************************************************** - Setup a signal thread with handles all signals. - Because Linux doesn't support schemas use a mutex to check that - the signal thread is ready before continuing -******************************************************************************/ +#if defined(_WIN32) +/* + If server is started as service, the service routine will set + the callback function. +*/ +void mysqld_set_service_status_callback(void (*r)(DWORD, DWORD, DWORD)) +{ + my_report_svc_status= r; +} -#if defined(__WIN__) +static bool startup_complete() +{ + return hEventShutdown != NULL; +} +/** + Initiates shutdown on Windows by setting shutdown event. + Reports windows service status. + + If startup was not finished, terminates process (no good + cleanup possible) +*/ +void mysqld_win_initiate_shutdown() +{ + if (startup_complete()) + { + my_report_svc_status(SERVICE_STOP_PENDING, 0, 0); + abort_loop= 1; + if (!SetEvent(hEventShutdown)) + /* This should never fail.*/ + abort(); + } + else + { + my_report_svc_status(SERVICE_STOPPED, 1, 0); + TerminateProcess(GetCurrentProcess(), 1); + } +} + +/* + Signal when server has started and can accept connections. +*/ +void mysqld_win_set_startup_complete() +{ + my_report_svc_status(SERVICE_RUNNING, 0, 0); + DBUG_ASSERT(startup_complete()); +} + + +void mysqld_win_set_service_name(const char *name) +{ + if (stricmp(name, "mysql")) + load_default_groups[array_elements(load_default_groups) - 2]= name; +} /* On Windows, we use native SetConsoleCtrlHandler for handle events like Ctrl-C @@ -2612,33 +2658,30 @@ void unlink_thd(THD *thd) callstack. */ -static BOOL WINAPI console_event_handler( DWORD type ) +static BOOL WINAPI console_event_handler( DWORD type ) { - DBUG_ENTER("console_event_handler"); -#ifndef EMBEDDED_LIBRARY - if(type == CTRL_C_EVENT) + static const char *names[]= { + "CTRL_C_EVENT","CTRL_BREAK_EVENT", "CTRL_CLOSE_EVENT", "", "", + "CTRL_LOGOFF_EVENT", "CTRL_SHUTDOWN_EVENT"}; + + switch (type) { - /* - Do not shutdown before startup is finished and shutdown - thread is initialized. Otherwise there is a race condition - between main thread doing initialization and CTRL-C thread doing - cleanup, which can result into crash. - */ -#ifndef EMBEDDED_LIBRARY - if(hEventShutdown) - break_connect_loop(); - else -#endif - sql_print_warning("CTRL-C ignored during startup"); - DBUG_RETURN(TRUE); + case CTRL_C_EVENT: + case CTRL_BREAK_EVENT: + sql_print_information("console_event_handler: received %s event, shutting down", + names[type]); + mysqld_win_initiate_shutdown(); + return TRUE; + case CTRL_CLOSE_EVENT: + sql_print_information("console_event_handler: received CTRL_CLOSE_EVENT event, terminating"); + TerminateProcess(GetCurrentProcess(), 1); + return TRUE; + default: + return FALSE; } -#endif - DBUG_RETURN(FALSE); } - - #ifdef DEBUG_UNHANDLED_EXCEPTION_FILTER #define DEBUGGER_ATTACH_TIMEOUT 120 /* @@ -2669,7 +2712,7 @@ static void wait_for_debugger(int timeout_sec) } #endif /* DEBUG_UNHANDLED_EXCEPTION_FILTER */ -LONG WINAPI my_unhandler_exception_filter(EXCEPTION_POINTERS *ex_pointers) +static LONG WINAPI my_unhandler_exception_filter(EXCEPTION_POINTERS *ex_pointers) { static BOOL first_time= TRUE; if(!first_time) @@ -2716,10 +2759,9 @@ LONG WINAPI my_unhandler_exception_filter(EXCEPTION_POINTERS *ex_pointers) void init_signals(void) { - if(opt_console) - SetConsoleCtrlHandler(console_event_handler,TRUE); + SetConsoleCtrlHandler(console_event_handler,TRUE); - /* Avoid MessageBox()es*/ + /* Avoid MessageBox()es*/ _CrtSetReportMode(_CRT_WARN, _CRTDBG_MODE_FILE); _CrtSetReportFile(_CRT_WARN, _CRTDBG_FILE_STDERR); _CrtSetReportMode(_CRT_ERROR, _CRTDBG_MODE_FILE); @@ -2736,7 +2778,8 @@ void init_signals(void) */ SetErrorMode(SetErrorMode(0) | SEM_FAILCRITICALERRORS | SEM_NOOPENFILEERRORBOX); - SetUnhandledExceptionFilter(my_unhandler_exception_filter); + if(!opt_debugging) + SetUnhandledExceptionFilter(my_unhandler_exception_filter); } @@ -3114,12 +3157,7 @@ void *my_str_realloc_mysqld(void *ptr, size_t size) } #endif -#include <mysqld_default_groups.h> -#if defined(__WIN__) && !defined(EMBEDDED_LIBRARY) -static const int load_default_groups_sz= -sizeof(load_default_groups)/sizeof(load_default_groups[0]); -#endif /** @@ -3270,7 +3308,6 @@ SHOW_VAR com_status_vars[]= { {"kill", STMT_STATUS(SQLCOM_KILL)}, {"load", STMT_STATUS(SQLCOM_LOAD)}, {"lock_tables", STMT_STATUS(SQLCOM_LOCK_TABLES)}, - {"multi", COM_STATUS(com_multi)}, {"optimize", STMT_STATUS(SQLCOM_OPTIMIZE)}, {"preload_keys", STMT_STATUS(SQLCOM_PRELOAD_KEYS)}, {"prepare_sql", STMT_STATUS(SQLCOM_PREPARE)}, @@ -3784,7 +3821,7 @@ static int init_common_variables() of SQLCOM_ constants. */ compile_time_assert(sizeof(com_status_vars)/sizeof(com_status_vars[0]) - 1 == - SQLCOM_END + 11); + SQLCOM_END + 10); #endif if (get_options(&remaining_argc, &remaining_argv)) @@ -4967,6 +5004,31 @@ static int init_server_components() /* The following options were added after 5.6.10 */ MYSQL_TO_BE_IMPLEMENTED_OPTION("rpl-stop-slave-timeout"), MYSQL_TO_BE_IMPLEMENTED_OPTION("validate-user-plugins"), // NO_EMBEDDED_ACCESS_CHECKS + + /* The following options were deprecated in 10.5 or earlier */ + MARIADB_REMOVED_OPTION("innodb-adaptive-max-sleep-delay"), + MARIADB_REMOVED_OPTION("innodb-background-scrub-data-check-interval"), + MARIADB_REMOVED_OPTION("innodb-background-scrub-data-compressed"), + MARIADB_REMOVED_OPTION("innodb-background-scrub-data-interval"), + MARIADB_REMOVED_OPTION("innodb-background-scrub-data-uncompressed"), + MARIADB_REMOVED_OPTION("innodb-buffer-pool-instances"), + MARIADB_REMOVED_OPTION("innodb-commit-concurrency"), + MARIADB_REMOVED_OPTION("innodb-concurrency-tickets"), + MARIADB_REMOVED_OPTION("innodb-file-format"), + MARIADB_REMOVED_OPTION("innodb-large-prefix"), + MARIADB_REMOVED_OPTION("innodb-lock-schedule-algorithm"), + MARIADB_REMOVED_OPTION("innodb-log-checksums"), + MARIADB_REMOVED_OPTION("innodb-log-compressed-pages"), + MARIADB_REMOVED_OPTION("innodb-log-files-in-group"), + MARIADB_REMOVED_OPTION("innodb-log-optimize-ddl"), + MARIADB_REMOVED_OPTION("innodb-page-cleaners"), + MARIADB_REMOVED_OPTION("innodb-replication-delay"), + MARIADB_REMOVED_OPTION("innodb-scrub-log"), + MARIADB_REMOVED_OPTION("innodb-scrub-log-speed"), + MARIADB_REMOVED_OPTION("innodb-sync-array-size"), + MARIADB_REMOVED_OPTION("innodb-thread-concurrency"), + MARIADB_REMOVED_OPTION("innodb-thread-sleep-delay"), + MARIADB_REMOVED_OPTION("innodb-undo-logs"), {0, 0, 0, 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0} }; /* @@ -5181,19 +5243,6 @@ static int init_server_components() #ifndef EMBEDDED_LIBRARY -#ifdef _WIN32 -static void create_shutdown_event() -{ - hEventShutdown=CreateEvent(0, FALSE, FALSE, shutdown_event_name); - // On "Stop Service" we have to do regular shutdown - Service.SetShutdownEvent(hEventShutdown); -} -#else /*_WIN32*/ -#define create_shutdown_event() -#endif -#endif /* EMBEDDED_LIBRARY */ - -#ifndef EMBEDDED_LIBRARY #ifndef DBUG_OFF /* @@ -5233,11 +5282,7 @@ static void test_lc_time_sz() #endif//DBUG_OFF -#ifdef __WIN__ -int win_main(int argc, char **argv) -#else int mysqld_main(int argc, char **argv) -#endif { #ifndef _WIN32 /* We can't close stdin just now, because it may be booststrap mode. */ @@ -5255,7 +5300,6 @@ int mysqld_main(int argc, char **argv) if (init_early_variables()) exit(1); -#ifndef _WIN32 #ifdef WITH_PERFSCHEMA_STORAGE_ENGINE pre_initialize_performance_schema(); #endif /*WITH_PERFSCHEMA_STORAGE_ENGINE */ @@ -5265,7 +5309,6 @@ int mysqld_main(int argc, char **argv) fprintf(stderr, "my_init() failed."); return 1; } -#endif orig_argc= argc; orig_argv= argv; @@ -5472,12 +5515,12 @@ int mysqld_main(int argc, char **argv) if (WSREP_ON && wsrep_check_opts()) unireg_abort(1); #endif +#ifdef _WIN32 /* The subsequent calls may take a long time : e.g. innodb log read. Thus set the long running service control manager timeout */ -#if defined(_WIN32) && !defined(EMBEDDED_LIBRARY) - Service.SetSlowStarting(slow_start_timeout); + my_report_svc_status(SERVICE_START_PENDING, NO_ERROR, slow_start_timeout); #endif if (init_server_components()) @@ -5486,13 +5529,6 @@ int mysqld_main(int argc, char **argv) init_ssl(); network_init(); -#ifdef _WIN32 - if (!opt_console) - { - FreeConsole(); // Remove window - } -#endif - #ifdef WITH_WSREP // Recover and exit. if (wsrep_recovery) @@ -5583,7 +5619,6 @@ int mysqld_main(int argc, char **argv) } } - create_shutdown_event(); start_handle_manager(); /* Copy default global rpl_filter to global_rpl_filter */ @@ -5637,9 +5672,6 @@ int mysqld_main(int argc, char **argv) } #endif -#if defined(_WIN32) && !defined(EMBEDDED_LIBRARY) - Service.SetRunning(); -#endif /* Signal threads waiting for server to be started */ mysql_mutex_lock(&LOCK_server_started); @@ -5695,16 +5727,6 @@ int mysqld_main(int argc, char **argv) */ PSI_CALL_delete_current_thread(); -#if defined(__WIN__) && !defined(EMBEDDED_LIBRARY) - if (start_mode) - Service.Stop(); - else - { - Service.SetShutdownEvent(0); - if (hEventShutdown) - CloseHandle(hEventShutdown); - } -#endif #if (defined(HAVE_OPENSSL) && !defined(EMBEDDED_LIBRARY)) ERR_remove_state(0); #endif @@ -5715,245 +5737,6 @@ int mysqld_main(int argc, char **argv) #endif /* !EMBEDDED_LIBRARY */ -/**************************************************************************** - Main and thread entry function for Win32 - (all this is needed only to run mysqld as a service on WinNT) -****************************************************************************/ - -#if defined(__WIN__) && !defined(EMBEDDED_LIBRARY) -void mysql_service(void *p) -{ - if (my_thread_init()) - abort(); - - if (use_opt_args) - win_main(opt_argc, opt_argv); - else - win_main(Service.my_argc, Service.my_argv); - - my_thread_end(); -} - - -/* Quote string if it contains space, else copy */ - -static char *add_quoted_string(char *to, const char *from, char *to_end) -{ - uint length= (uint) (to_end-to); - - if (!strchr(from, ' ')) - return strmake(to, from, length-1); - return strxnmov(to, length-1, "\"", from, "\"", NullS); -} - - -/** - Handle basic handling of services, like installation and removal. - - @param argv Pointer to argument list - @param servicename Internal name of service - @param displayname Display name of service (in taskbar ?) - @param file_path Path to this program - @param startup_option Startup option to mysqld - - @retval 0 option handled - @retval 1 Could not handle option -*/ - -static bool -default_service_handling(char **argv, - const char *servicename, - const char *displayname, - const char *file_path, - const char *extra_opt, - const char *account_name) -{ - char path_and_service[FN_REFLEN+FN_REFLEN+32], *pos, *end; - const char *opt_delim; - end= path_and_service + sizeof(path_and_service)-3; - - /* We have to quote filename if it contains spaces */ - pos= add_quoted_string(path_and_service, file_path, end); - if (extra_opt && *extra_opt) - { - /* - Add option after file_path. There will be zero or one extra option. It's - assumed to be --defaults-file=file but isn't checked. The variable (not - the option name) should be quoted if it contains a string. - */ - *pos++= ' '; - if ((opt_delim= strchr(extra_opt, '='))) - { - size_t length= ++opt_delim - extra_opt; - pos= strnmov(pos, extra_opt, length); - } - else - opt_delim= extra_opt; - - pos= add_quoted_string(pos, opt_delim, end); - } - /* We must have servicename last */ - *pos++= ' '; - (void) add_quoted_string(pos, servicename, end); - - if (Service.got_service_option(argv, "install")) - { - Service.Install(1, servicename, displayname, path_and_service, - account_name); - return 0; - } - if (Service.got_service_option(argv, "install-manual")) - { - Service.Install(0, servicename, displayname, path_and_service, - account_name); - return 0; - } - if (Service.got_service_option(argv, "remove")) - { - Service.Remove(servicename); - return 0; - } - return 1; -} - -/* Remove service name from the command line arguments, and pass -resulting command line to the service via opt_args.*/ -#include <vector> -static void service_init_cmdline_args(int argc, char **argv) -{ - start_mode= 1; - use_opt_args= 1; - - if(argc == 1) - { - opt_argc= argc; - opt_argv= argv; - } - else - { - static std::vector<char *> argv_no_service; - for (int i= 0; argv[i]; i++) - argv_no_service.push_back(argv[i]); - // Remove the last argument, service name - argv_no_service[argv_no_service.size() - 1]= 0; - opt_argc= (int)argv_no_service.size() - 1; - opt_argv= &argv_no_service[0]; - } - DBUG_ASSERT(!opt_argv[opt_argc]); -} - -int mysqld_main(int argc, char **argv) -{ - my_progname= argv[0]; - - /* - When several instances are running on the same machine, we - need to have an unique named hEventShudown through the - application PID e.g.: MySQLShutdown1890; MySQLShutdown2342 - */ - int10_to_str((int) GetCurrentProcessId(),strmov(shutdown_event_name, - "MySQLShutdown"), 10); - - /* Must be initialized early for comparison of service name */ - system_charset_info= &my_charset_utf8mb3_general_ci; - -#ifdef WITH_PERFSCHEMA_STORAGE_ENGINE - pre_initialize_performance_schema(); -#endif /*WITH_PERFSCHEMA_STORAGE_ENGINE */ - - if (my_init()) - { - fprintf(stderr, "my_init() failed."); - return 1; - } - - - char file_path[FN_REFLEN]; - my_path(file_path, argv[0], ""); /* Find name in path */ - fn_format(file_path,argv[0],file_path,"", MY_REPLACE_DIR | MY_UNPACK_FILENAME | MY_RESOLVE_SYMLINKS); - - - if (argc == 2) - { - if (!default_service_handling(argv, MYSQL_SERVICENAME, MYSQL_SERVICENAME, - file_path, "", NULL)) - return 0; - - if (Service.IsService(argv[1])) /* Start an optional service */ - { - /* - Only add the service name to the groups read from the config file - if it's not "MySQL". (The default service name should be 'mysqld' - but we started a bad tradition by calling it MySQL from the start - and we are now stuck with it. - */ - if (my_strcasecmp(system_charset_info, argv[1],"mysql")) - load_default_groups[load_default_groups_sz-2]= argv[1]; - service_init_cmdline_args(argc, argv); - Service.Init(argv[1], mysql_service); - return 0; - } - } - else if (argc == 3) /* install or remove any optional service */ - { - if (!default_service_handling(argv, argv[2], argv[2], file_path, "", - NULL)) - return 0; - if (Service.IsService(argv[2])) - { - /* - mysqld was started as - mysqld --defaults-file=my_path\my.ini service-name - */ - if (my_strcasecmp(system_charset_info, argv[2],"mysql")) - load_default_groups[load_default_groups_sz-2]= argv[2]; - service_init_cmdline_args(argc, argv); - Service.Init(argv[2], mysql_service); - return 0; - } - } - else if (argc == 4 || argc == 5) - { - /* - This may seem strange, because we handle --local-service while - preserving 4.1's behavior of allowing any one other argument that is - passed to the service on startup. (The assumption is that this is - --defaults-file=file, but that was not enforced in 4.1, so we don't - enforce it here.) - */ - const char *extra_opt= NullS; - const char *account_name = NullS; - int index; - for (index = 3; index < argc; index++) - { - if (!strcmp(argv[index], "--local-service")) - account_name= "NT AUTHORITY\\LocalService"; - else - extra_opt= argv[index]; - } - - if (argc == 4 || account_name) - if (!default_service_handling(argv, argv[2], argv[2], file_path, - extra_opt, account_name)) - return 0; - } - else if (argc == 1 && Service.IsService(MYSQL_SERVICENAME)) - { - /* start the default service */ - service_init_cmdline_args(argc, argv); - Service.Init(MYSQL_SERVICENAME, mysql_service); - return 0; - } - - /* Start as standalone server */ - Service.my_argc=argc; - Service.my_argv=argv; - mysql_service(NULL); - return 0; -} -#endif - - static bool read_init_file(char *file_name) { MYSQL_FILE *file; @@ -7377,6 +7160,7 @@ SHOW_VAR status_vars[]= { {"Max_used_connections", (char*) &max_used_connections, SHOW_LONG}, {"Memory_used", (char*) &show_memory_used, SHOW_SIMPLE_FUNC}, {"Memory_used_initial", (char*) &start_memory_used, SHOW_LONGLONG}, + {"Resultset_metadata_skipped", (char *) offsetof(STATUS_VAR, skip_metadata_count),SHOW_LONG_STATUS}, {"Not_flushed_delayed_rows", (char*) &delayed_rows_in_use, SHOW_LONG_NOFLUSH}, {"Open_files", (char*) &my_file_opened, SHOW_SINT}, {"Open_streams", (char*) &my_stream_opened, SHOW_LONG_NOFLUSH}, @@ -8181,6 +7965,23 @@ mysqld_get_one_option(const struct my_option *opt, const char *argument, break; case OPT_BOOTSTRAP: opt_noacl=opt_bootstrap=1; +#ifdef _WIN32 + { + /* + Check if security descriptor is passed from + mysql_install_db.exe. + Used by Windows installer to correctly setup + privileges on the new directories. + */ + char* dir_sddl = getenv("MARIADB_NEW_DIRECTORY_SDDL"); + if (dir_sddl) + { + ConvertStringSecurityDescriptorToSecurityDescriptor( + dir_sddl, SDDL_REVISION_1, &my_dir_security_attributes.lpSecurityDescriptor, NULL); + DBUG_ASSERT(my_dir_security_attributes.lpSecurityDescriptor); + } + } +#endif break; case OPT_SERVER_ID: ::server_id= global_system_variables.server_id; @@ -9248,6 +9049,7 @@ PSI_memory_key key_memory_binlog_ver_1_event; PSI_memory_key key_memory_bison_stack; PSI_memory_key key_memory_blob_mem_storage; PSI_memory_key key_memory_dboptions_hash; +PSI_memory_key key_memory_dbnames_cache; PSI_memory_key key_memory_errmsgs; PSI_memory_key key_memory_frm_string; PSI_memory_key key_memory_gdl; @@ -9548,6 +9350,7 @@ static PSI_memory_info all_server_memory[]= { &key_memory_THD_handler_tables_hash, "THD::handler_tables_hash", 0}, { &key_memory_hash_index_key_buffer, "hash_index_key_buffer", 0}, { &key_memory_dboptions_hash, "dboptions_hash", 0}, + { &key_memory_dbnames_cache, "dbnames_cache", 0}, { &key_memory_user_conn, "user_conn", 0}, // { &key_memory_LOG_POS_COORD, "LOG_POS_COORD", 0}, // { &key_memory_XID_STATE, "XID_STATE", 0}, diff --git a/sql/mysqld.h b/sql/mysqld.h index 24580d6bb90..2a73e518ded 100644 --- a/sql/mysqld.h +++ b/sql/mysqld.h @@ -502,6 +502,7 @@ extern PSI_memory_key key_memory_TABLE; extern PSI_memory_key key_memory_binlog_statement_buffer; extern PSI_memory_key key_memory_user_conn; extern PSI_memory_key key_memory_dboptions_hash; +extern PSI_memory_key key_memory_dbnames_cache; extern PSI_memory_key key_memory_hash_index_key_buffer; extern PSI_memory_key key_memory_THD_handler_tables_hash; extern PSI_memory_key key_memory_JOIN_CACHE; @@ -956,4 +957,14 @@ extern ulong opt_binlog_dbug_fsync_sleep; extern uint volatile global_disable_checkpoint; extern my_bool opt_help; +extern int mysqld_main(int argc, char **argv); + +#ifdef _WIN32 +extern HANDLE hEventShutdown; +extern void mysqld_win_initiate_shutdown(); +extern void mysqld_win_set_startup_complete(); +extern void mysqld_set_service_status_callback(void (*)(DWORD, DWORD, DWORD)); +extern void mysqld_win_set_service_name(const char *name); +#endif + #endif /* MYSQLD_INCLUDED */ diff --git a/sql/net_serv.cc b/sql/net_serv.cc index a96c43a94fe..409d3cac85e 100644 --- a/sql/net_serv.cc +++ b/sql/net_serv.cc @@ -640,8 +640,20 @@ net_real_write(NET *net,const uchar *packet, size_t len) my_bool net_blocking = vio_is_blocking(net->vio); DBUG_ENTER("net_real_write"); -#if defined(MYSQL_SERVER) && defined(USE_QUERY_CACHE) - query_cache_insert(net->thd, (char*) packet, len, net->pkt_nr); +#if defined(MYSQL_SERVER) + THD *thd= (THD *)net->thd; +#if defined(USE_QUERY_CACHE) + query_cache_insert(thd, (char*) packet, len, net->pkt_nr); +#endif + if (likely(thd)) + { + /* + Wait until pending operations (currently it is engine + asynchronous group commit) are finished before replying + to the client, to keep durability promise. + */ + thd->async_state.wait_for_pending_ops(); + } #endif if (unlikely(net->error == 2)) diff --git a/sql/nt_servc.cc b/sql/nt_servc.cc deleted file mode 100644 index 9c754763aab..00000000000 --- a/sql/nt_servc.cc +++ /dev/null @@ -1,555 +0,0 @@ -/** - @file - - @brief - Windows NT Service class library. - - Copyright Abandoned 1998 Irena Pancirov - Irnet Snc - This file is public domain and comes with NO WARRANTY of any kind -*/ -#include <windows.h> -#include <process.h> -#include <stdio.h> -#include <stdlib.h> -#include "nt_servc.h" - - -static NTService *pService; - -/* ------------------------------------------------------------------------ - - -------------------------------------------------------------------------- */ -NTService::NTService() -{ - - bOsNT = FALSE; - //service variables - ServiceName = NULL; - hExitEvent = 0; - bPause = FALSE; - bRunning = FALSE; - hThreadHandle = 0; - fpServiceThread = NULL; - - //time-out variables - nStartTimeOut = 15000; - nStopTimeOut = 86400000; - nPauseTimeOut = 5000; - nResumeTimeOut = 5000; - - //install variables - dwDesiredAccess = SERVICE_ALL_ACCESS; - dwServiceType = SERVICE_WIN32_OWN_PROCESS; - dwStartType = SERVICE_AUTO_START; - dwErrorControl = SERVICE_ERROR_NORMAL; - szLoadOrderGroup = NULL; - lpdwTagID = NULL; - szDependencies = NULL; - - my_argc = 0; - my_argv = NULL; - hShutdownEvent = 0; - nError = 0; - dwState = 0; -} - -/* ------------------------------------------------------------------------ - - -------------------------------------------------------------------------- */ -NTService::~NTService() -{ - if (ServiceName != NULL) delete[] ServiceName; -} -/* ------------------------------------------------------------------------ - - -------------------------------------------------------------------------- */ - - -/** - Registers the main service thread with the service manager. - - @param ServiceThread pointer to the main programs entry function - when the service is started -*/ - - -long NTService::Init(LPCSTR szInternName, THREAD_FC ServiceThread) -{ - - pService = this; - - fpServiceThread = ServiceThread; - ServiceName = new char[lstrlen(szInternName)+1]; - lstrcpy(ServiceName,szInternName); - - SERVICE_TABLE_ENTRY stb[] = - { - { (char *)szInternName, ServiceMain} , - { NULL, NULL } - }; - - return StartServiceCtrlDispatcher(stb); //register with the Service Manager -} - - -/** - Installs the service with Service manager. - - nError values: - - 0 success - - 1 Can't open the Service manager - - 2 Failed to create service. -*/ - - -BOOL NTService::Install(int startType, LPCSTR szInternName, - LPCSTR szDisplayName, - LPCSTR szFullPath, LPCSTR szAccountName, - LPCSTR szPassword) -{ - BOOL ret_val=FALSE; - SC_HANDLE newService, scm; - - if (!SeekStatus(szInternName,1)) - return FALSE; - - char szFilePath[_MAX_PATH]; - GetModuleFileName(NULL, szFilePath, sizeof(szFilePath)); - - // open a connection to the SCM - if (!(scm = OpenSCManager(0, 0,SC_MANAGER_CREATE_SERVICE))) - printf("Failed to install the service (Couldn't open the SCM)\n"); - else // Install the new service - { - if (!(newService= - CreateService(scm, - szInternName, - szDisplayName, - dwDesiredAccess,//default: SERVICE_ALL_ACCESS - dwServiceType, //default: SERVICE_WIN32_OWN_PROCESS - //default: SERVICE_AUTOSTART - (startType == 1 ? SERVICE_AUTO_START : - SERVICE_DEMAND_START), - dwErrorControl, //default: SERVICE_ERROR_NORMAL - szFullPath, //exec full path - szLoadOrderGroup, //default: NULL - lpdwTagID, //default: NULL - szDependencies, //default: NULL - szAccountName, //default: NULL - szPassword))) //default: NULL - printf("Failed to install the service (Couldn't create service)\n"); - else - { - printf("Service successfully installed.\n"); - CloseServiceHandle(newService); - ret_val=TRUE; // Everything went ok - } - CloseServiceHandle(scm); - } - return ret_val; -} - - -/** - Removes the service. - - nError values: - - 0 success - - 1 Can't open the Service manager - - 2 Failed to locate service - - 3 Failed to delete service. -*/ - - -BOOL NTService::Remove(LPCSTR szInternName) -{ - BOOL ret_value=FALSE; - SC_HANDLE service, scm; - - if (!SeekStatus(szInternName,0)) - return FALSE; - - nError=0; - - // open a connection to the SCM - if (!(scm = OpenSCManager(0, 0,SC_MANAGER_CREATE_SERVICE))) - { - printf("Failed to remove the service (Couldn't open the SCM)\n"); - } - else - { - if ((service = OpenService(scm,szInternName, DELETE))) - { - if (!DeleteService(service)) - printf("Failed to remove the service\n"); - else - { - printf("Service successfully removed.\n"); - ret_value=TRUE; // everything went ok - } - CloseServiceHandle(service); - } - else - printf("Failed to remove the service (Couldn't open the service)\n"); - CloseServiceHandle(scm); - } - return ret_value; -} - -/** - this function should be called before the app. exits to stop - the service -*/ -void NTService::Stop(void) -{ - SetStatus(SERVICE_STOP_PENDING,NO_ERROR, 0, 1, 60000); - StopService(); - SetStatus(SERVICE_STOPPED, NO_ERROR, 0, 1, 1000); -} - -/** - This is the function that is called from the - service manager to start the service. -*/ - - -void NTService::ServiceMain(DWORD argc, LPTSTR *argv) -{ - - // registration function - if (!(pService->hServiceStatusHandle = - RegisterServiceCtrlHandler(pService->ServiceName, - NTService::ServiceCtrlHandler))) - goto error; - - // notify SCM of progress - if (!pService->SetStatus(SERVICE_START_PENDING,NO_ERROR, 0, 1, 8000)) - goto error; - - // create the exit event - if (!(pService->hExitEvent = CreateEvent (0, TRUE, FALSE,0))) - goto error; - - if (!pService->SetStatus(SERVICE_START_PENDING,NO_ERROR, 0, 3, - pService->nStartTimeOut)) - goto error; - - // save start arguments - pService->my_argc=argc; - pService->my_argv=argv; - - // start the service - if (!pService->StartService()) - goto error; - - // wait for exit event - WaitForSingleObject (pService->hExitEvent, INFINITE); - - // wait for thread to exit - if (WaitForSingleObject (pService->hThreadHandle, INFINITE) == WAIT_TIMEOUT) - CloseHandle(pService->hThreadHandle); - - pService->Exit(0); - return; - -error: - pService->Exit(GetLastError()); - return; -} - - - -void NTService::SetRunning() -{ - if (pService) - pService->SetStatus(SERVICE_RUNNING, NO_ERROR, 0, 0, 0); -} - -void NTService::SetSlowStarting(unsigned long timeout) -{ - if (pService) - pService->SetStatus(SERVICE_START_PENDING,NO_ERROR, 0, 0, timeout); -} - - -/* ------------------------------------------------------------------------ - StartService() - starts the application thread - -------------------------------------------------------------------------- */ - -BOOL NTService::StartService() -{ - // Start the real service's thread (application) - if (!(hThreadHandle = (HANDLE) _beginthread(fpServiceThread,0, - (void *) this))) - return FALSE; - bRunning = TRUE; - return TRUE; -} -/* ------------------------------------------------------------------------ - - -------------------------------------------------------------------------- */ -void NTService::StopService() -{ - bRunning=FALSE; - - // Set the event for application - if (hShutdownEvent) - SetEvent(hShutdownEvent); - - // Set the event for ServiceMain - SetEvent(hExitEvent); -} -/* ------------------------------------------------------------------------ - - -------------------------------------------------------------------------- */ -void NTService::PauseService() -{ - bPause = TRUE; - SuspendThread(hThreadHandle); -} -/* ------------------------------------------------------------------------ - - -------------------------------------------------------------------------- */ -void NTService::ResumeService() -{ - bPause=FALSE; - ResumeThread(hThreadHandle); -} -/* ------------------------------------------------------------------------ - - -------------------------------------------------------------------------- */ -BOOL NTService::SetStatus (DWORD dwCurrentState,DWORD dwWin32ExitCode, - DWORD dwServiceSpecificExitCode, DWORD dwCheckPoint, - DWORD dwWaitHint) -{ - BOOL bRet; - SERVICE_STATUS serviceStatus; - - dwState=dwCurrentState; - - serviceStatus.dwServiceType = SERVICE_WIN32_OWN_PROCESS; - serviceStatus.dwCurrentState = dwCurrentState; - - if (dwCurrentState == SERVICE_START_PENDING) - serviceStatus.dwControlsAccepted = 0; //don't accept control events - else - serviceStatus.dwControlsAccepted = (SERVICE_ACCEPT_STOP | - SERVICE_ACCEPT_PAUSE_CONTINUE | - SERVICE_ACCEPT_SHUTDOWN); - - // if a specific exit code is defined,set up the win32 exit code properly - if (dwServiceSpecificExitCode == 0) - serviceStatus.dwWin32ExitCode = dwWin32ExitCode; - else - serviceStatus.dwWin32ExitCode = ERROR_SERVICE_SPECIFIC_ERROR; - - serviceStatus.dwServiceSpecificExitCode = dwServiceSpecificExitCode; - - serviceStatus.dwCheckPoint = dwCheckPoint; - serviceStatus.dwWaitHint = dwWaitHint; - - // Pass the status to the Service Manager - if (!(bRet=SetServiceStatus (hServiceStatusHandle, &serviceStatus))) - StopService(); - - return bRet; -} -/* ------------------------------------------------------------------------ - - -------------------------------------------------------------------------- */ -void NTService::ServiceCtrlHandler(DWORD ctrlCode) -{ - DWORD dwState; - - if (!pService) - return; - - dwState=pService->dwState; // get current state - - switch(ctrlCode) { - case SERVICE_CONTROL_SHUTDOWN: - case SERVICE_CONTROL_STOP: - dwState = SERVICE_STOP_PENDING; - pService->SetStatus(SERVICE_STOP_PENDING,NO_ERROR, 0, 1, - pService->nStopTimeOut); - pService->StopService(); - break; - - default: - pService->SetStatus(dwState, NO_ERROR,0, 0, 0); - break; - } - //pService->SetStatus(dwState, NO_ERROR,0, 0, 0); -} - -/* ------------------------------------------------------------------------ - - -------------------------------------------------------------------------- */ - -void NTService::Exit(DWORD error) -{ - if (hExitEvent) - CloseHandle(hExitEvent); - - // Send a message to the scm to tell that we stop - if (hServiceStatusHandle) - SetStatus(SERVICE_STOPPED, error,0, 0, 0); - - // If the thread has started kill it ??? - // if (hThreadHandle) CloseHandle(hThreadHandle); - -} - -/* ------------------------------------------------------------------------ - - -------------------------------------------------------------------------- */ - -BOOL NTService::SeekStatus(LPCSTR szInternName, int OperationType) -{ - BOOL ret_value=FALSE; - SC_HANDLE service, scm; - - // open a connection to the SCM - if (!(scm = OpenSCManager(0, 0,SC_MANAGER_CREATE_SERVICE))) - { - DWORD ret_error=GetLastError(); - if (ret_error == ERROR_ACCESS_DENIED) - { - printf("Install/Remove of the Service Denied!\n"); - if (!is_super_user()) - printf("That operation should be made by an user with Administrator privileges!\n"); - } - else - printf("There is a problem for to open the Service Control Manager!\n"); - } - else - { - if (OperationType == 1) - { - /* an install operation */ - if ((service = OpenService(scm,szInternName, SERVICE_ALL_ACCESS ))) - { - LPQUERY_SERVICE_CONFIG ConfigBuf; - DWORD dwSize; - - ConfigBuf = (LPQUERY_SERVICE_CONFIG) LocalAlloc(LPTR, 4096); - printf("The service already exists!\n"); - if (QueryServiceConfig(service,ConfigBuf,4096,&dwSize)) - printf("The current server installed: %s\n", - ConfigBuf->lpBinaryPathName); - LocalFree(ConfigBuf); - CloseServiceHandle(service); - } - else - ret_value=TRUE; - } - else - { - /* a remove operation */ - if (!(service = OpenService(scm,szInternName, SERVICE_ALL_ACCESS ))) - printf("The service doesn't exist!\n"); - else - { - SERVICE_STATUS ss; - - memset(&ss, 0, sizeof(ss)); - if (QueryServiceStatus(service,&ss)) - { - DWORD dwState = ss.dwCurrentState; - if (dwState == SERVICE_RUNNING) - printf("Failed to remove the service because the service is running\nStop the service and try again\n"); - else if (dwState == SERVICE_STOP_PENDING) - printf("\ -Failed to remove the service because the service is in stop pending state!\n\ -Wait 30 seconds and try again.\n\ -If this condition persist, reboot the machine and try again\n"); - else - ret_value= TRUE; - } - CloseServiceHandle(service); - } - } - CloseServiceHandle(scm); - } - - return ret_value; -} -/* ------------------------------------------------------------------------ - -------------------------------------------------------------------------- */ -BOOL NTService::IsService(LPCSTR ServiceName) -{ - BOOL ret_value=FALSE; - SC_HANDLE service, scm; - - if ((scm= OpenSCManager(0, 0,SC_MANAGER_ENUMERATE_SERVICE))) - { - if ((service = OpenService(scm,ServiceName, SERVICE_QUERY_STATUS))) - { - ret_value=TRUE; - CloseServiceHandle(service); - } - CloseServiceHandle(scm); - } - return ret_value; -} -/* ------------------------------------------------------------------------ - -------------------------------------------------------------------------- */ -BOOL NTService::got_service_option(char **argv, const char *service_option) -{ - char *option; - for (option= argv[1]; *option; option++) - if (!strcmp(option, service_option)) - return TRUE; - return FALSE; -} -/* ------------------------------------------------------------------------ - -------------------------------------------------------------------------- */ -BOOL NTService::is_super_user() -{ - HANDLE hAccessToken; - UCHAR InfoBuffer[1024]; - PTOKEN_GROUPS ptgGroups=(PTOKEN_GROUPS)InfoBuffer; - DWORD dwInfoBufferSize; - PSID psidAdministrators; - SID_IDENTIFIER_AUTHORITY siaNtAuthority = SECURITY_NT_AUTHORITY; - UINT x; - BOOL ret_value=FALSE; - - if (!OpenThreadToken(GetCurrentThread(), TOKEN_QUERY, TRUE,&hAccessToken )) - { - if (GetLastError() != ERROR_NO_TOKEN) - return FALSE; - - if (!OpenProcessToken(GetCurrentProcess(), TOKEN_QUERY, &hAccessToken)) - return FALSE; - } - - ret_value= GetTokenInformation(hAccessToken,TokenGroups,InfoBuffer, - 1024, &dwInfoBufferSize); - - CloseHandle(hAccessToken); - - if (!ret_value ) - return FALSE; - - if (!AllocateAndInitializeSid(&siaNtAuthority, 2, - SECURITY_BUILTIN_DOMAIN_RID, - DOMAIN_ALIAS_RID_ADMINS, - 0, 0, 0, 0, 0, 0, - &psidAdministrators)) - return FALSE; - - ret_value = FALSE; - - for (x=0;x<ptgGroups->GroupCount;x++) - { - if ( EqualSid(psidAdministrators, ptgGroups->Groups[x].Sid) ) - { - ret_value = TRUE; - break; - } - - } - FreeSid(psidAdministrators); - return ret_value; -} diff --git a/sql/nt_servc.h b/sql/nt_servc.h deleted file mode 100644 index 8ba29519c8f..00000000000 --- a/sql/nt_servc.h +++ /dev/null @@ -1,113 +0,0 @@ -#ifndef NT_SERVC_INCLUDED -#define NT_SERVC_INCLUDED - -/** - @file - - @brief - Windows NT Service class library - - Copyright Abandoned 1998 Irena Pancirov - Irnet Snc - This file is public domain and comes with NO WARRANTY of any kind -*/ - -// main application thread -typedef void (*THREAD_FC)(void *); - -class NTService -{ - public: - NTService(); - ~NTService(); - - BOOL bOsNT; ///< true if OS is NT, false for Win95 - //install optinos - DWORD dwDesiredAccess; - DWORD dwServiceType; - DWORD dwStartType; - DWORD dwErrorControl; - - LPSTR szLoadOrderGroup; - LPDWORD lpdwTagID; - LPSTR szDependencies; - OSVERSIONINFO osVer; - - // time-out (in milisec) - int nStartTimeOut; - int nStopTimeOut; - int nPauseTimeOut; - int nResumeTimeOut; - - // - DWORD my_argc; - LPTSTR *my_argv; - HANDLE hShutdownEvent; - int nError; - DWORD dwState; - - //init service entry point - long Init(LPCSTR szInternName,THREAD_FC ServiceThread); - - //application shutdown event - void SetShutdownEvent(HANDLE hEvent){ hShutdownEvent=hEvent; } - - - //service install / un-install - BOOL Install(int startType,LPCSTR szInternName,LPCSTR szDisplayName, - LPCSTR szFullPath, LPCSTR szAccountName=NULL, - LPCSTR szPassword=NULL); - BOOL SeekStatus(LPCSTR szInternName, int OperationType); - BOOL Remove(LPCSTR szInternName); - BOOL IsService(LPCSTR ServiceName); - BOOL got_service_option(char **argv, const char *service_option); - BOOL is_super_user(); - - /* - SetRunning() is to be called by the application - when initialization completes and it can accept - stop request - */ - void SetRunning(void); - - /** - Sets a timeout after which SCM will abort service startup if SetRunning() - was not called or the timeout was not extended with another call to - SetSlowStarting(). Should be called when static initialization completes, - and the variable initialization part begins - - @arg timeout the timeout to pass to the SCM (in milliseconds) - */ - void SetSlowStarting(unsigned long timeout); - - /* - Stop() is to be called by the application to stop - the service - */ - void Stop(void); - - protected: - LPSTR ServiceName; - HANDLE hExitEvent; - SERVICE_STATUS_HANDLE hServiceStatusHandle; - BOOL bPause; - BOOL bRunning; - HANDLE hThreadHandle; - THREAD_FC fpServiceThread; - - void PauseService(); - void ResumeService(); - void StopService(); - BOOL StartService(); - - static void WINAPI ServiceMain(DWORD argc, LPTSTR *argv); - static void WINAPI ServiceCtrlHandler (DWORD ctrlCode); - - void Exit(DWORD error); - BOOL SetStatus (DWORD dwCurrentState,DWORD dwWin32ExitCode, - DWORD dwServiceSpecificExitCode, - DWORD dwCheckPoint,DWORD dwWaitHint); - -}; -/* ------------------------- the end -------------------------------------- */ - -#endif /* NT_SERVC_INCLUDED */ diff --git a/sql/opt_subselect.cc b/sql/opt_subselect.cc index 0a87d9ccd2f..96a35e10bf2 100644 --- a/sql/opt_subselect.cc +++ b/sql/opt_subselect.cc @@ -4291,11 +4291,11 @@ bool setup_sj_materialization_part2(JOIN_TAB *sjm_tab) sjm_tab->type= JT_ALL; /* Initialize full scan */ - sjm_tab->read_first_record= join_read_record_no_init; + sjm_tab->read_first_record= join_init_read_record; sjm_tab->read_record.copy_field= sjm->copy_field; sjm_tab->read_record.copy_field_end= sjm->copy_field + sjm->sjm_table_cols.elements; - sjm_tab->read_record.read_record_func= rr_sequential_and_unpack; + sjm_tab->read_record.read_record_func= read_record_func_for_rr_and_unpack; } sjm_tab->bush_children->end[-1].next_select= end_sj_materialize; @@ -7145,3 +7145,16 @@ exit: thd->lex->current_select= save_curr_select; DBUG_RETURN(FALSE); } + +/* + @brief + Check if a table is a SJM Scan table + + @retval + TRUE SJM scan table + FALSE Otherwise +*/ +bool TABLE_LIST::is_sjm_scan_table() +{ + return is_active_sjm() && sj_mat_info->is_sj_scan; +} diff --git a/sql/protocol.cc b/sql/protocol.cc index 08b874adba1..f369fa1c66f 100644 --- a/sql/protocol.cc +++ b/sql/protocol.cc @@ -211,8 +211,7 @@ bool Protocol::net_send_ok(THD *thd, uint server_status, uint statement_warn_count, ulonglong affected_rows, ulonglong id, - const char *message, bool is_eof, - bool skip_flush) + const char *message, bool is_eof) { NET *net= &thd->net; StringBuffer<MYSQL_ERRMSG_SIZE + 10> store; @@ -285,7 +284,7 @@ Protocol::net_send_ok(THD *thd, DBUG_ASSERT(store.length() <= MAX_PACKET_LENGTH); error= my_net_write(net, (const unsigned char*)store.ptr(), store.length()); - if (likely(!error) && (!skip_flush || is_eof)) + if (likely(!error)) error= net_flush(net); thd->server_status&= ~SERVER_SESSION_STATE_CHANGED; @@ -340,7 +339,7 @@ Protocol::net_send_eof(THD *thd, uint server_status, uint statement_warn_count) (thd->get_command() != COM_BINLOG_DUMP )) { error= net_send_ok(thd, server_status, statement_warn_count, 0, 0, NULL, - true, false); + true); DBUG_RETURN(error); } @@ -607,16 +606,14 @@ void Protocol::end_statement() thd->get_stmt_da()->statement_warn_count(), thd->get_stmt_da()->affected_rows(), thd->get_stmt_da()->last_insert_id(), - thd->get_stmt_da()->message(), - thd->get_stmt_da()->skip_flush()); + thd->get_stmt_da()->message()); break; case Diagnostics_area::DA_DISABLED: break; case Diagnostics_area::DA_EMPTY: default: DBUG_ASSERT(0); - error= send_ok(thd->server_status, 0, 0, 0, NULL, - thd->get_stmt_da()->skip_flush()); + error= send_ok(thd->server_status, 0, 0, 0, NULL); break; } if (likely(!error)) @@ -635,12 +632,12 @@ void Protocol::end_statement() bool Protocol::send_ok(uint server_status, uint statement_warn_count, ulonglong affected_rows, ulonglong last_insert_id, - const char *message, bool skip_flush) + const char *message) { DBUG_ENTER("Protocol::send_ok"); const bool retval= net_send_ok(thd, server_status, statement_warn_count, - affected_rows, last_insert_id, message, false, skip_flush); + affected_rows, last_insert_id, message, false); DBUG_RETURN(retval); } @@ -916,6 +913,241 @@ bool Protocol_text::store_field_metadata(const THD * thd, } +/* + MARIADB_CLIENT_CACHE_METADATA support. + + Bulk of the code below is dedicated to detecting whether column metadata has + changed after prepare, or between executions of a prepared statement. + + For some prepared statements, metadata can't change without going through + Prepared_Statement::reprepare(), which makes detecting changes easy. + + Others, "SELECT ?" & Co, are more fragile, and sensitive to input parameters, + or user variables. Detecting metadata change for this class of PS is harder, + we calculate signature (hash value), and check whether this changes between + executions. This is a more expensive method. +*/ + + +/** + Detect whether column info can be changed without + PS repreparing. + + Such colum info is called fragile. The opposite of + fragile is. + + + @param it - Item representing column info + @return true, if columninfo is "fragile", false if it is stable + + + @todo does not work due to MDEV-23913. Currently, + everything about prepared statements is fragile. +*/ + +static bool is_fragile_columnifo(Item *it) +{ +#define MDEV_23913_FIXED 0 +#if MDEV_23913_FIXED + if (dynamic_cast<Item_param *>(it)) + return true; + + if (dynamic_cast<Item_func_user_var *>(it)) + return true; + + if (dynamic_cast <Item_sp_variable*>(it)) + return true; + + /* Check arguments of functions.*/ + auto item_args= dynamic_cast<Item_args *>(it); + if (!item_args) + return false; + auto args= item_args->arguments(); + auto arg_count= item_args->argument_count(); + for (uint i= 0; i < arg_count; i++) + { + if (is_fragile_columnifo(args[i])) + return true; + } + return false; +#else /* MDEV-23913 fixed*/ + return true; +#endif +} + + +#define INVALID_METADATA_CHECKSUM 0 + + +/** + Calculate signature for column info sent to the client as CRC32 over data, + that goes into the column info packet. + We assume that if checksum does not change, then column info was not + modified. + + @param thd THD + @param list column info + + @return CRC32 of the metadata +*/ + +static uint32 calc_metadata_hash(THD *thd, List<Item> *list) +{ + List_iterator_fast<Item> it(*list); + Item *item; + uint32 crc32_c= 0; + while ((item= it++)) + { + Send_field field(thd, item); + auto field_type= item->type_handler()->field_type(); + auto charset= item->charset_for_protocol(); + /* + The data below should contain everything that influences + content of the column info packet. + */ + LEX_CSTRING data[]= + { + field.table_name, + field.org_table_name, + field.col_name, + field.org_col_name, + field.db_name, + field.attr(MARIADB_FIELD_ATTR_DATA_TYPE_NAME), + field.attr(MARIADB_FIELD_ATTR_FORMAT_NAME), + {(const char *) &field.length, sizeof(field.length)}, + {(const char *) &field.flags, sizeof(field.flags)}, + {(const char *) &field.decimals, sizeof(field.decimals)}, + {(const char *) &charset, sizeof(charset)}, + {(const char *) &field_type, sizeof(field_type)}, + }; + for (const auto &chunk : data) + crc32_c= my_crc32c(crc32_c, chunk.str, chunk.length); + } + + if (crc32_c == INVALID_METADATA_CHECKSUM) + return 1; + return crc32_c; +} + + + +/** + Check if metadata columns have changed since last call to this + function. + + @param send_column_info_state saved state, changed if the function + return true. + @param thd THD + @param list columninfo Items + @return true,if metadata columns have changed since last call, + false otherwise +*/ + +static bool metadata_columns_changed(send_column_info_state &state, THD *thd, + List<Item> &list) +{ + if (!state.initialized) + { + state.initialized= true; + state.immutable= true; + Item *item; + List_iterator_fast<Item> it(list); + while ((item= it++)) + { + if (is_fragile_columnifo(item)) + { + state.immutable= false; + state.checksum= calc_metadata_hash(thd, &list); + break; + } + } + state.last_charset= thd->variables.character_set_client; + return true; + } + + /* + Since column info can change under our feet, we use more expensive + checksumming to check if column metadata has not changed since last time. + */ + if (!state.immutable) + { + uint32 checksum= calc_metadata_hash(thd, &list); + if (checksum != state.checksum) + { + state.checksum= checksum; + state.last_charset= thd->variables.character_set_client; + return true; + } + } + + /* + Character_set_client influences result set metadata, thus resend metadata + whenever it changes. + */ + if (state.last_charset != thd->variables.character_set_client) + { + state.last_charset= thd->variables.character_set_client; + return true; + } + + return false; +} + + +/** + Determine whether column info must be sent to the client. + Skip column info, if client supports caching, and (prepared) statement + output fields have not changed. + + @param thd THD + @param list column info + @param flags send flags. If Protocol::SEND_FORCE_COLUMN_INFO is set, + this function will return true + @return true, if column info must be sent to the client. + false otherwise +*/ + +static bool should_send_column_info(THD* thd, List<Item>* list, uint flags) +{ + if (!(thd->client_capabilities & MARIADB_CLIENT_CACHE_METADATA)) + { + /* Client does not support abbreviated metadata.*/ + return true; + } + + if (!thd->cur_stmt) + { + /* Neither COM_PREPARE nor COM_EXECUTE run.*/ + return true; + } + + if (thd->spcont) + { + /* Always sent full metadata from inside the stored procedure.*/ + return true; + } + + if (flags & Protocol::SEND_FORCE_COLUMN_INFO) + return true; + + auto &column_info_state= thd->cur_stmt->column_info_state; +#ifndef DBUG_OFF + auto cmd= thd->get_command(); +#endif + + DBUG_ASSERT(cmd == COM_STMT_EXECUTE || cmd == COM_STMT_PREPARE); + DBUG_ASSERT(cmd != COM_STMT_PREPARE || !column_info_state.initialized); + + bool ret= metadata_columns_changed(column_info_state, thd, *list); + + DBUG_ASSERT(cmd != COM_STMT_PREPARE || ret); + if (!ret) + thd->status_var.skip_metadata_count++; + + return ret; +} + + /** Send name and type of result to client. @@ -941,30 +1173,44 @@ bool Protocol::send_result_set_metadata(List<Item> *list, uint flags) Protocol_text prot(thd, thd->variables.net_buffer_length); DBUG_ENTER("Protocol::send_result_set_metadata"); + bool send_column_info= should_send_column_info(thd, list, flags); + if (flags & SEND_NUM_ROWS) - { // Packet with number of elements - uchar buff[MAX_INT_WIDTH]; + { + /* + Packet with number of columns. + + Will also have a 1 byte column info indicator, in case + MARIADB_CLIENT_CACHE_METADATA client capability is set. + */ + uchar buff[MAX_INT_WIDTH+1]; uchar *pos= net_store_length(buff, list->elements); + if (thd->client_capabilities & MARIADB_CLIENT_CACHE_METADATA) + *pos++= (uchar)send_column_info; + DBUG_ASSERT(pos <= buff + sizeof(buff)); if (my_net_write(&thd->net, buff, (size_t) (pos-buff))) DBUG_RETURN(1); } + if (send_column_info) + { #ifndef DBUG_OFF - field_handlers= (const Type_handler**) thd->alloc(sizeof(field_handlers[0]) * - list->elements); + field_handlers= (const Type_handler **) thd->alloc( + sizeof(field_handlers[0]) * list->elements); #endif - for (uint pos= 0; (item=it++); pos++) - { - prot.prepare_for_resend(); - if (prot.store_item_metadata(thd, item, pos)) - goto err; - if (prot.write()) - DBUG_RETURN(1); + for (uint pos= 0; (item= it++); pos++) + { + prot.prepare_for_resend(); + if (prot.store_item_metadata(thd, item, pos)) + goto err; + if (prot.write()) + DBUG_RETURN(1); #ifndef DBUG_OFF - field_handlers[pos]= item->type_handler(); + field_handlers[pos]= item->type_handler(); #endif + } } if (flags & SEND_EOF) @@ -1688,7 +1934,8 @@ bool Protocol_binary::send_out_parameters(List<Item_param> *sp_params) thd->server_status|= SERVER_PS_OUT_PARAMS | SERVER_MORE_RESULTS_EXISTS; /* Send meta-data. */ - if (send_result_set_metadata(&out_param_lst, SEND_NUM_ROWS | SEND_EOF)) + if (send_result_set_metadata(&out_param_lst, + SEND_NUM_ROWS | SEND_EOF | SEND_FORCE_COLUMN_INFO)) return TRUE; /* Send data. */ diff --git a/sql/protocol.h b/sql/protocol.h index eb11304a4d5..a1868342ab4 100644 --- a/sql/protocol.h +++ b/sql/protocol.h @@ -54,7 +54,7 @@ protected: virtual bool net_store_data_cs(const uchar *from, size_t length, CHARSET_INFO *fromcs, CHARSET_INFO *tocs); virtual bool net_send_ok(THD *, uint, uint, ulonglong, ulonglong, const char *, - bool, bool); + bool); virtual bool net_send_error_packet(THD *, uint, const char *, const char *); #ifdef EMBEDDED_LIBRARY char **next_field; @@ -78,7 +78,7 @@ protected: virtual bool send_ok(uint server_status, uint statement_warn_count, ulonglong affected_rows, ulonglong last_insert_id, - const char *message, bool skip_flush); + const char *message); virtual bool send_eof(uint server_status, uint statement_warn_count); @@ -93,7 +93,7 @@ public: virtual ~Protocol() {} void init(THD* thd_arg); - enum { SEND_NUM_ROWS= 1, SEND_EOF= 2 }; + enum { SEND_NUM_ROWS= 1, SEND_EOF= 2, SEND_FORCE_COLUMN_INFO= 4 }; virtual bool send_result_set_metadata(List<Item> *list, uint flags); bool send_list_fields(List<Field> *list, const TABLE_LIST *table_list); bool send_result_set_row(List<Item> *row_items); diff --git a/sql/records.cc b/sql/records.cc index 900eacf5943..77ee3c65321 100644 --- a/sql/records.cc +++ b/sql/records.cc @@ -830,3 +830,32 @@ inline void SORT_INFO::unpack_addon_fields(uchar *buff) field->unpack(field->ptr, buff + addonf->offset, buff_end, 0); } } + + +/* + @brief + Read and unpack next record from a table + + @details + The function first reads the next record from the table. + If a success then it unpacks the values to the base table fields. + This is used by SJM scan table to unpack the values of the materialized + table to the base table fields + + @retval + 0 Record successfully read. + @retval + -1 There is no record to be read anymore. + >0 Error +*/ +int read_record_func_for_rr_and_unpack(READ_RECORD *info) +{ + int error; + if ((error= info->read_record_func_and_unpack_calls(info))) + return error; + + for (Copy_field *cp= info->copy_field; cp != info->copy_field_end; cp++) + (*cp->do_copy)(cp); + + return error; +} diff --git a/sql/records.h b/sql/records.h index 272bbd0d9b5..9bc1b98fde4 100644 --- a/sql/records.h +++ b/sql/records.h @@ -56,6 +56,7 @@ struct READ_RECORD TABLE *table; /* Head-form */ Unlock_row_func unlock_row; Read_func read_record_func; + Read_func read_record_func_and_unpack_calls; THD *thd; SQL_SELECT *select; uint ref_length, reclength, rec_cache_size, error_offset; diff --git a/sql/scheduler.h b/sql/scheduler.h index ebf8d6e9e64..68387390d81 100644 --- a/sql/scheduler.h +++ b/sql/scheduler.h @@ -40,6 +40,8 @@ struct scheduler_functions void (*thd_wait_end)(THD *thd); void (*post_kill_notification)(THD *thd); void (*end)(void); + /** resume previous unfinished command (threadpool only)*/ + void (*thd_resume)(THD* thd); }; diff --git a/sql/share/errmsg-utf8.txt b/sql/share/errmsg-utf8.txt index 3f3cb7677fc..add46a4cc75 100644 --- a/sql/share/errmsg-utf8.txt +++ b/sql/share/errmsg-utf8.txt @@ -7546,13 +7546,10 @@ ER_ALTER_OPERATION_NOT_SUPPORTED_REASON_GIS # MariaDB extra error numbers starts from 4000 skip-to-error-number 4000 -ER_COMMULTI_BADCONTEXT 0A000 - eng "COM_MULTI can't return a result set in the given context" - ger "COM_MULTI kann im gegebenen Kontext keine Ergebnismenge zurückgeben" - ukr "COM_MULTI не може повернути результати у цьому контексті" -ER_BAD_COMMAND_IN_MULTI - eng "Command '%s' is not allowed for COM_MULTI" - ukr "Команда '%s' не дозволена для COM_MULTI" +ER_UNUSED_26 0A000 + eng "This error never happens" +ER_UNUSED_27 + eng "This error never happens" ER_WITH_COL_WRONG_LIST eng "WITH column list and SELECT field list have different column counts" ER_TOO_MANY_DEFINITIONS_IN_WITH_CLAUSE @@ -7648,8 +7645,8 @@ ER_JSON_PATH_ARRAY eng "JSON path should end with an array identifier in argument %d to function '%s'" ER_JSON_ONE_OR_ALL eng "Argument 2 to function '%s' must be "one" or "all"." -ER_UNSUPPORT_COMPRESSED_TEMPORARY_TABLE - eng "CREATE TEMPORARY TABLE is not allowed with ROW_FORMAT=COMPRESSED or KEY_BLOCK_SIZE." +ER_UNSUPPORTED_COMPRESSED_TABLE + eng "InnoDB refuses to write tables with ROW_FORMAT=COMPRESSED or KEY_BLOCK_SIZE." ER_GEOJSON_INCORRECT eng "Incorrect GeoJSON format specified for st_geomfromgeojson function." ER_GEOJSON_TOO_FEW_POINTS @@ -7951,7 +7948,7 @@ ER_WARN_HISTORY_ROW_START_TIME ER_PART_STARTS_BEYOND_INTERVAL eng "%`s: STARTS is later than query time, first history partition may exceed INTERVAL value" ER_GALERA_REPLICATION_NOT_SUPPORTED - eng "DDL-statement is forbidden as table storage engine does not support Galera replication" + eng "Galera replication not supported" ER_LOAD_INFILE_CAPABILITY_DISABLED eng "The used command is not allowed because the MariaDB server or client has disabled the local infile capability" rum "Comanda folosită nu este permisă deoarece clientul sau serverul MariaDB a dezactivat această capabilitate" diff --git a/sql/slave.cc b/sql/slave.cc index 1da030084ef..d8c85432928 100644 --- a/sql/slave.cc +++ b/sql/slave.cc @@ -348,8 +348,7 @@ gtid_pos_table_creation(THD *thd, plugin_ref engine, LEX_CSTRING *table_name) err= parser_state.init(thd, thd->query(), thd->query_length()); if (err) goto end; - mysql_parse(thd, thd->query(), thd->query_length(), &parser_state, - FALSE, FALSE); + mysql_parse(thd, thd->query(), thd->query_length(), &parser_state); if (unlikely(thd->is_error())) err= 1; /* The warning is relevant to 10.3 and earlier. */ @@ -4952,18 +4951,16 @@ err: if (mi->using_gtid == Master_info::USE_GTID_NO) { sql_print_information("Slave I/O thread exiting, read up to log '%s', " - "position %llu", IO_RPL_LOG_NAME, mi->master_log_pos); - sql_print_information("master was %s:%d", mi->host, mi->port); - } + "position %llu, master %s:%d", IO_RPL_LOG_NAME, mi->master_log_pos, + mi->host, mi->port); else { StringBuffer<100> tmp; mi->gtid_current_pos.to_string(&tmp); sql_print_information("Slave I/O thread exiting, read up to log '%s', " - "position %llu; GTID position %s", + "position %llu; GTID position %s, master %s:%d", IO_RPL_LOG_NAME, mi->master_log_pos, - tmp.c_ptr_safe()); - sql_print_information("master was %s:%d", mi->host, mi->port); + tmp.c_ptr_safe(), mi->host, mi->port); } repl_semisync_slave.slave_stop(mi); thd->reset_query(); @@ -5566,9 +5563,9 @@ pthread_handler_t handle_slave_sql(void *arg) tmp.append(STRING_WITH_LEN("'")); } sql_print_information("Slave SQL thread exiting, replication stopped in " - "log '%s' at position %llu%s", RPL_LOG_NAME, - rli->group_master_log_pos, tmp.c_ptr_safe()); - sql_print_information("master was %s:%d", mi->host, mi->port); + "log '%s' at position %llu%s, master: %s:%d", RPL_LOG_NAME, + rli->group_master_log_pos, tmp.c_ptr_safe(), + mi->host, mi->port); } #ifdef WITH_WSREP wsrep_after_command_before_result(thd); diff --git a/sql/sql_base.cc b/sql/sql_base.cc index 87970ee11b4..623c0eff5b9 100644 --- a/sql/sql_base.cc +++ b/sql/sql_base.cc @@ -4430,6 +4430,19 @@ restart: else tbl->reginfo.lock_type= tables->lock_type; } +#ifdef WITH_WSREP + /* + At this point we have SE associated with table so we can check wsrep_mode + rules at this point. + */ + if (WSREP(thd) && + wsrep_thd_is_local(thd) && + !wsrep_check_mode_after_open_table(thd, tbl->file->ht->db_type)) + { + error= TRUE; + goto error; + } +#endif } #ifdef WITH_WSREP diff --git a/sql/sql_class.cc b/sql/sql_class.cc index c10eedaadfb..c7e5f5e6dc2 100644 --- a/sql/sql_class.cc +++ b/sql/sql_class.cc @@ -683,7 +683,8 @@ THD::THD(my_thread_id id, bool is_wsrep_applier) m_stmt_da(&main_da), tdc_hash_pins(0), xid_hash_pins(0), - m_tmp_tables_locked(false) + m_tmp_tables_locked(false), + async_state() #ifdef HAVE_REPLICATION , current_linfo(0), @@ -4917,6 +4918,56 @@ void reset_thd(MYSQL_THD thd) free_root(thd->mem_root, MYF(MY_KEEP_PREALLOC)); } +/** + This function can be used by storage engine + to indicate a start of an async operation. + + This asynchronous is such operation needs to be + finished before we write response to the client +. + An example of this operation is Innodb's asynchronous + group commit. Server needs to wait for the end of it + before writing response to client, to provide durability + guarantees, in other words, server can't send OK packet + before modified data is durable in redo log. +*/ +extern "C" MYSQL_THD thd_increment_pending_ops(void) +{ + THD *thd = current_thd; + if (!thd) + return NULL; + thd->async_state.inc_pending_ops(); + return thd; +} + +/** + This function can be used by plugin/engine to indicate + end of async operation (such as end of group commit + write flush) + + @param thd THD +*/ +extern "C" void thd_decrement_pending_ops(MYSQL_THD thd) +{ + DBUG_ASSERT(thd); + thd_async_state::enum_async_state state; + if (thd->async_state.dec_pending_ops(&state) == 0) + { + switch(state) + { + case thd_async_state::enum_async_state::SUSPENDED: + DBUG_ASSERT(thd->scheduler->thd_resume); + thd->scheduler->thd_resume(thd); + break; + case thd_async_state::enum_async_state::NONE: + break; + default: + DBUG_ASSERT(0); + } + } +} + + unsigned long long thd_get_query_id(const MYSQL_THD thd) { return((unsigned long long)thd->query_id); @@ -5032,14 +5083,13 @@ extern "C" enum enum_server_command thd_current_command(MYSQL_THD thd) return thd->get_command(); } - -extern "C" int thd_slave_thread(const MYSQL_THD thd) +#ifdef HAVE_REPLICATION /* Working around MDEV-24622 */ +/** @return whether the current thread is for applying binlog in a replica */ +extern "C" int thd_is_slave(const MYSQL_THD thd) { - return(thd->slave_thread); + return thd && thd->slave_thread; } - - - +#endif /* HAVE_REPLICATION */ /* Returns high resolution timestamp for the start of the current query. */ @@ -5078,7 +5128,7 @@ thd_need_wait_reports(const MYSQL_THD thd) } /* - Used by storage engines (currently TokuDB and InnoDB) to report that + Used by storage engines (currently InnoDB) to report that one transaction THD is about to go to wait for a transactional lock held by another transactions OTHER_THD. diff --git a/sql/sql_class.h b/sql/sql_class.h index ecdea73ff6e..2ab1d530e13 100644 --- a/sql/sql_class.h +++ b/sql/sql_class.h @@ -836,7 +836,6 @@ typedef struct system_status_var ulong com_create_tmp_table; ulong com_drop_tmp_table; ulong com_other; - ulong com_multi; ulong com_stmt_prepare; ulong com_stmt_reprepare; @@ -933,6 +932,12 @@ typedef struct system_status_var ulong lost_connections; ulong max_statement_time_exceeded; /* + Number of times where column info was not + sent with prepared statement metadata. + */ + ulong skip_metadata_count; + + /* Number of statements sent from the client */ ulong questions; @@ -951,6 +956,7 @@ typedef struct system_status_var ulonglong table_open_cache_hits; ulonglong table_open_cache_misses; ulonglong table_open_cache_overflows; + ulonglong send_metadata_skips; double last_query_cost; double cpu_time, busy_time; uint32 threads_running; @@ -1196,6 +1202,38 @@ public: class Server_side_cursor; +/* + Struct to catch changes in column metadata that is sent to client. + in the "result set metadata". Used to support + MARIADB_CLIENT_CACHE_METADATA. +*/ +struct send_column_info_state +{ + /* Last client charset (affects metadata) */ + CHARSET_INFO *last_charset= nullptr; + + /* Checksum, only used to check changes if 'immutable' is false*/ + uint32 checksum= 0; + + /* + Column info can only be changed by PreparedStatement::reprepare() + + There is a class of "weird" prepared statements like SELECT ? or SELECT @a + that are not immutable, and depend on input parameters or user variables + */ + bool immutable= false; + + bool initialized= false; + + /* Used by PreparedStatement::reprepare()*/ + void reset() + { + initialized= false; + checksum= 0; + } +}; + + /** @class Statement @brief State of a single command executed against this connection. @@ -1285,6 +1323,8 @@ public: LEX_CSTRING db; + send_column_info_state column_info_state; + /* This is set to 1 of last call to send_result_to_client() was ok */ my_bool query_cache_is_applicable; @@ -2269,6 +2309,164 @@ struct THD_count ~THD_count() { thread_count--; } }; +/** + Support structure for asynchronous group commit, or more generally + any asynchronous operation that needs to finish before server writes + response to client. + + An engine, or any other server component, can signal that there is + a pending operation by incrementing a counter, i.e inc_pending_ops() + and that pending operation is finished by decrementing that counter + dec_pending_ops(). + + NOTE: Currently, pending operations can not fail, i.e there is no + way to pass a return code in dec_pending_ops() + + The server does not write response to the client before the counter + becomes 0. In case of group commit it ensures that data is persistent + before success reported to client, i.e durability in ACID. +*/ +struct thd_async_state +{ + enum class enum_async_state + { + NONE, + SUSPENDED, /* do_command() did not finish, and needs to be resumed */ + RESUMED /* do_command() is resumed*/ + }; + enum_async_state m_state{enum_async_state::NONE}; + + /* Stuff we need to resume do_command where we finished last time*/ + enum enum_server_command m_command{COM_SLEEP}; + LEX_STRING m_packet{}; + + mysql_mutex_t m_mtx; + mysql_cond_t m_cond; + + /** Pending counter*/ + Atomic_counter<int> m_pending_ops=0; + +#ifndef DBUG_OFF + /* Checks */ + pthread_t m_dbg_thread; +#endif + + thd_async_state() + { + mysql_mutex_init(PSI_NOT_INSTRUMENTED, &m_mtx, 0); + mysql_cond_init(PSI_INSTRUMENT_ME, &m_cond, 0); + } + + /* + Currently only used with threadpool, one can "suspend" and "resume" a THD. + Suspend only means leaving do_command earlier, after saving some state. + Resume is continuing suspended THD's do_command(), from where it finished last time. + */ + bool try_suspend() + { + bool ret; + mysql_mutex_lock(&m_mtx); + DBUG_ASSERT(m_state == enum_async_state::NONE); + DBUG_ASSERT(m_pending_ops >= 0); + + if(m_pending_ops) + { + ret=true; + m_state= enum_async_state::SUSPENDED; + } + else + { + /* + If there is no pending operations, can't suspend, since + nobody can resume it. + */ + ret=false; + } + mysql_mutex_unlock(&m_mtx); + return ret; + } + + ~thd_async_state() + { + wait_for_pending_ops(); + mysql_mutex_destroy(&m_mtx); + mysql_cond_destroy(&m_cond); + } + + /* + Increment pending asynchronous operations. + The client response may not be written if + this count > 0. + So, without threadpool query needs to wait for + the operations to finish. + With threadpool, THD can be suspended and resumed + when this counter goes to 0. + */ + void inc_pending_ops() + { + mysql_mutex_lock(&m_mtx); + +#ifndef DBUG_OFF + /* + Check that increments are always done by the same thread. + */ + if (!m_pending_ops) + m_dbg_thread= pthread_self(); + else + DBUG_ASSERT(pthread_equal(pthread_self(),m_dbg_thread)); +#endif + + m_pending_ops++; + mysql_mutex_unlock(&m_mtx); + } + + int dec_pending_ops(enum_async_state* state) + { + int ret; + mysql_mutex_lock(&m_mtx); + ret= --m_pending_ops; + if (!ret) + mysql_cond_signal(&m_cond); + *state = m_state; + mysql_mutex_unlock(&m_mtx); + return ret; + } + + /* + This is used for "dirty" reading pending ops, + when dirty read is OK. + */ + int pending_ops() + { + return m_pending_ops; + } + + /* Wait for pending operations to finish.*/ + void wait_for_pending_ops() + { + /* + It is fine to read m_pending_ops and compare it with 0, + without mutex protection. + + The value is only incremented by the current thread, and will + be decremented by another one, thus "dirty" may show positive number + when it is really 0, but this is not a problem, and the only + bad thing from that will be rechecking under mutex. + */ + if (!pending_ops()) + return; + + mysql_mutex_lock(&m_mtx); + DBUG_ASSERT(m_pending_ops >= 0); + while (m_pending_ops) + mysql_cond_wait(&m_cond, &m_mtx); + mysql_mutex_unlock(&m_mtx); + } +}; + +extern "C" MYSQL_THD thd_increment_pending_ops(void); +extern "C" void thd_decrement_pending_ops(MYSQL_THD); + /** @class THD @@ -2388,6 +2586,8 @@ public: /* Last created prepared statement */ Statement *last_stmt; + Statement *cur_stmt= 0; + inline void set_last_stmt(Statement *stmt) { last_stmt= (is_error() ? NULL : stmt); } inline void clear_last_stmt() { last_stmt= NULL; } @@ -4976,6 +5176,7 @@ private: } public: + thd_async_state async_state; #ifdef HAVE_REPLICATION /* If we do a purge of binary logs, log index info of the threads @@ -6950,10 +7151,6 @@ public: #define CF_SKIP_WSREP_CHECK 0 #endif /* WITH_WSREP */ -/** - Do not allow it for COM_MULTI batch -*/ -#define CF_NO_COM_MULTI (1U << 3) /* Inline functions */ diff --git a/sql/sql_db.cc b/sql/sql_db.cc index 3447032f193..9bf16220535 100644 --- a/sql/sql_db.cc +++ b/sql/sql_db.cc @@ -104,8 +104,137 @@ cmp_db_names(LEX_CSTRING *db1_name, const LEX_CSTRING *db2_name) db1_name->str, db2_name->str) == 0)); } +#ifdef HAVE_PSI_INTERFACE +static PSI_rwlock_key key_rwlock_LOCK_dboptions; +static PSI_rwlock_key key_rwlock_LOCK_dbnames; +static PSI_rwlock_key key_rwlock_LOCK_rmdir; + +static PSI_rwlock_info all_database_names_rwlocks[]= { + {&key_rwlock_LOCK_dboptions, "LOCK_dboptions", PSI_FLAG_GLOBAL}, + {&key_rwlock_LOCK_dbnames, "LOCK_dbnames", PSI_FLAG_GLOBAL}, + {&key_rwlock_LOCK_rmdir, "LOCK_rmdir",PSI_FLAG_GLOBAL}, +}; + +static void init_database_names_psi_keys(void) +{ + const char *category= "sql"; + int count; + + if (PSI_server == NULL) + return; + + count= array_elements(all_database_names_rwlocks); + PSI_server->register_rwlock(category, all_database_names_rwlocks, count); +} +#endif + +static mysql_rwlock_t rmdir_lock; /* + Cache of C strings for existing database names. + + The only use of it is to avoid repeated expensive + my_access() calls. + + Provided operations are lookup, insert (after successfull my_access()) + and clear (this is called whenever rmdir is called). +*/ +struct dbname_cache_t +{ +private: + Hash_set<LEX_STRING> m_set; + mysql_rwlock_t m_lock; + + static uchar *get_key(const LEX_STRING *ls, size_t *sz, my_bool) + { + *sz= ls->length; + return (uchar *) ls->str; + } + +public: + dbname_cache_t() + : m_set(key_memory_dbnames_cache, table_alias_charset, 10, 0, + sizeof(char *), (my_hash_get_key) get_key, my_free, 0) + { + mysql_rwlock_init(key_rwlock_LOCK_dbnames, &m_lock); + } + + bool contains(const char *s) + { + auto sz= strlen(s); + mysql_rwlock_rdlock(&m_lock); + bool ret= m_set.find(s, sz) != 0; + mysql_rwlock_unlock(&m_lock); + return ret; + } + + void insert(const char *s) + { + auto len= strlen(s); + auto ls= (LEX_STRING *) my_malloc(key_memory_dbnames_cache, + sizeof(LEX_STRING) + strlen(s) + 1, 0); + + if (!ls) + return; + + ls->length= len; + ls->str= (char *) (ls + 1); + + memcpy(ls->str, s, len + 1); + mysql_rwlock_wrlock(&m_lock); + bool found= m_set.find(s, len) != 0; + if (!found) + m_set.insert(ls); + mysql_rwlock_unlock(&m_lock); + if (found) + my_free(ls); + } + + void clear() + { + mysql_rwlock_wrlock(&m_lock); + m_set.clear(); + mysql_rwlock_unlock(&m_lock); + } + + ~dbname_cache_t() + { + mysql_rwlock_destroy(&m_lock); + } +}; + +static dbname_cache_t* dbname_cache; + +static void dbname_cache_init() +{ + static MY_ALIGNED(16) char buf[sizeof(dbname_cache_t)]; + DBUG_ASSERT(!dbname_cache); + dbname_cache= new (buf) dbname_cache_t; + mysql_rwlock_init(key_rwlock_LOCK_rmdir, &rmdir_lock); +} + +static void dbname_cache_destroy() +{ + if (!dbname_cache) + return; + + dbname_cache->~dbname_cache_t(); + dbname_cache= 0; + mysql_rwlock_destroy(&rmdir_lock); +} + +static int my_rmdir(const char *dir) +{ + auto ret= rmdir(dir); + if (ret) + return ret; + mysql_rwlock_wrlock(&rmdir_lock); + dbname_cache->clear(); + mysql_rwlock_unlock(&rmdir_lock); + return 0; +} + + /* Function we use in the creation of our hash to get key. */ @@ -131,7 +260,7 @@ static inline int write_to_binlog(THD *thd, const char *query, size_t q_len, qinfo.db= db; qinfo.db_len= (uint32)db_len; return mysql_bin_log.write(&qinfo); -} +} /* @@ -145,26 +274,7 @@ void free_dbopt(void *dbopt) my_free(dbopt); } -#ifdef HAVE_PSI_INTERFACE -static PSI_rwlock_key key_rwlock_LOCK_dboptions; -static PSI_rwlock_info all_database_names_rwlocks[]= -{ - { &key_rwlock_LOCK_dboptions, "LOCK_dboptions", PSI_FLAG_GLOBAL} -}; - -static void init_database_names_psi_keys(void) -{ - const char* category= "sql"; - int count; - - if (PSI_server == NULL) - return; - - count= array_elements(all_database_names_rwlocks); - PSI_server->register_rwlock(category, all_database_names_rwlocks, count); -} -#endif /** Initialize database option cache. @@ -190,6 +300,7 @@ bool my_dboptions_cache_init(void) table_alias_charset, 32, 0, 0, (my_hash_get_key) dboptions_get_key, free_dbopt, 0); } + dbname_cache_init(); return error; } @@ -205,6 +316,7 @@ void my_dboptions_cache_free(void) { dboptions_init= 0; my_hash_free(&dboptions); + dbname_cache_destroy(); mysql_rwlock_destroy(&LOCK_dboptions); } } @@ -692,7 +804,7 @@ mysql_create_db_internal(THD *thd, const LEX_CSTRING *db, Restore things to beginning. */ path[path_len]= 0; - if (rmdir(path) >= 0) + if (my_rmdir(path) >= 0) DBUG_RETURN(-1); /* We come here when we managed to create the database, but not the option @@ -1252,7 +1364,7 @@ static my_bool rm_dir_w_symlink(const char *org_path, my_bool send_error) if (pos > path && pos[-1] == FN_LIBCHAR) *--pos=0; - if (unlikely(rmdir(path) < 0 && send_error)) + if (unlikely(my_rmdir(path) < 0 && send_error)) { my_error(ER_DB_DROP_RMDIR, MYF(0), path, errno); DBUG_RETURN(1); @@ -1824,7 +1936,7 @@ bool mysql_upgrade_db(THD *thd, const LEX_CSTRING *old_db) length= build_table_filename(path, sizeof(path)-1, new_db.str, "", "", 0); if (length && path[length-1] == FN_LIBCHAR) path[length-1]=0; // remove ending '\' - rmdir(path); + my_rmdir(path); goto exit; } @@ -1919,20 +2031,34 @@ exit: TRUE The directory does not exist. */ + bool check_db_dir_existence(const char *db_name) { char db_dir_path[FN_REFLEN + 1]; uint db_dir_path_len; + if (dbname_cache->contains(db_name)) + return 0; + db_dir_path_len= build_table_filename(db_dir_path, sizeof(db_dir_path) - 1, db_name, "", "", 0); if (db_dir_path_len && db_dir_path[db_dir_path_len - 1] == FN_LIBCHAR) db_dir_path[db_dir_path_len - 1]= 0; - /* Check access. */ + /* + Check access. - return my_access(db_dir_path, F_OK); + The locking is to prevent creating permanent stale + entries for deleted databases, in case of + race condition with my_rmdir. + */ + mysql_rwlock_rdlock(&rmdir_lock); + int ret= my_access(db_dir_path, F_OK); + if (!ret) + dbname_cache->insert(db_name); + mysql_rwlock_unlock(&rmdir_lock); + return ret; } diff --git a/sql/sql_delete.cc b/sql/sql_delete.cc index 94d5ceb309d..6c301fd1e18 100644 --- a/sql/sql_delete.cc +++ b/sql/sql_delete.cc @@ -368,6 +368,16 @@ bool mysql_delete(THD *thd, TABLE_LIST *table_list, COND *conds, if (mysql_prepare_delete(thd, table_list, &conds, &delete_while_scanning)) DBUG_RETURN(TRUE); + if (table_list->has_period()) + { + if (!table_list->period_conditions.start.item->const_item() + || !table_list->period_conditions.end.item->const_item()) + { + my_error(ER_NOT_CONSTANT_EXPRESSION, MYF(0), "FOR PORTION OF"); + DBUG_RETURN(true); + } + } + if (delete_history) table->vers_write= false; diff --git a/sql/sql_error.cc b/sql/sql_error.cc index b3ef0d89a98..9af57ea6c01 100644 --- a/sql/sql_error.cc +++ b/sql/sql_error.cc @@ -302,7 +302,6 @@ void Diagnostics_area::reset_diagnostics_area() { DBUG_ENTER("reset_diagnostics_area"); - m_skip_flush= FALSE; #ifdef DBUG_OFF m_can_overwrite_status= FALSE; /** Don't take chances in production */ diff --git a/sql/sql_error.h b/sql/sql_error.h index a0497af78cb..318d5076534 100644 --- a/sql/sql_error.h +++ b/sql/sql_error.h @@ -1022,14 +1022,6 @@ public: { DBUG_ASSERT(m_status == DA_ERROR || m_status == DA_OK || m_status == DA_OK_BULK); return m_message; } - bool skip_flush() const - { - DBUG_ASSERT(m_status == DA_OK || m_status == DA_OK_BULK); - return m_skip_flush; - } - - void set_skip_flush() - { m_skip_flush= TRUE; } uint sql_errno() const { @@ -1215,9 +1207,6 @@ private: /** Set to make set_error_status after set_{ok,eof}_status possible. */ bool m_can_overwrite_status; - /** Skip flushing network buffer after writing OK (for COM_MULTI) */ - bool m_skip_flush; - /** Message buffer. Can be used by OK or ERROR status. */ char m_message[MYSQL_ERRMSG_SIZE]; diff --git a/sql/sql_insert.cc b/sql/sql_insert.cc index 5039259febc..f0b84ab126f 100644 --- a/sql/sql_insert.cc +++ b/sql/sql_insert.cc @@ -1,6 +1,6 @@ /* Copyright (c) 2000, 2016, Oracle and/or its affiliates. - Copyright (c) 2010, 2019, MariaDB Corporation + Copyright (c) 2010, 2021, MariaDB This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -2116,6 +2116,9 @@ int write_record(THD *thd, TABLE *table, COPY_INFO *info, select_result *sink) goto after_trg_or_ignored_err; } + /* Notify the engine about insert ignore operation */ + if (info->handle_duplicates == DUP_ERROR && info->ignore) + table->file->extra(HA_EXTRA_IGNORE_INSERT); after_trg_n_copied_inc: info->copied++; thd->record_first_successful_insert_id_in_cur_stmt(table->file->insert_id_for_cur_row); @@ -4132,6 +4135,7 @@ bool select_insert::prepare_eof() if (info.ignore || info.handle_duplicates != DUP_ERROR) if (table->file->ha_table_flags() & HA_DUPLICATE_POS) table->file->ha_rnd_end(); + table->file->extra(HA_EXTRA_END_ALTER_COPY); table->file->extra(HA_EXTRA_NO_IGNORE_DUP_KEY); table->file->extra(HA_EXTRA_WRITE_CANNOT_REPLACE); @@ -4724,7 +4728,11 @@ select_create::prepare(List<Item> &_values, SELECT_LEX_UNIT *u) if (info.handle_duplicates == DUP_UPDATE) table->file->extra(HA_EXTRA_INSERT_WITH_UPDATE); if (thd->locked_tables_mode <= LTM_LOCK_TABLES) + { table->file->ha_start_bulk_insert((ha_rows) 0); + if (thd->lex->duplicates == DUP_ERROR && !thd->lex->ignore) + table->file->extra(HA_EXTRA_BEGIN_ALTER_COPY); + } thd->abort_on_warning= !info.ignore && thd->is_strict_mode(); if (check_that_all_fields_are_given_values(thd, table, table_list)) DBUG_RETURN(1); diff --git a/sql/sql_parse.cc b/sql/sql_parse.cc index cb960a89cb6..bbb880cb117 100644 --- a/sql/sql_parse.cc +++ b/sql/sql_parse.cc @@ -113,9 +113,7 @@ #include "wsrep_trans_observer.h" /* wsrep transaction hooks */ static bool wsrep_mysql_parse(THD *thd, char *rawbuf, uint length, - Parser_state *parser_state, - bool is_com_multi, - bool is_next_command); + Parser_state *parser_state); #endif /* WITH_WSREP */ /** @@ -391,7 +389,7 @@ const LEX_CSTRING command_name[257]={ { STRING_WITH_LEN("Slave_worker") }, //251 { STRING_WITH_LEN("Slave_IO") }, //252 { STRING_WITH_LEN("Slave_SQL") }, //253 - { STRING_WITH_LEN("Com_multi") }, //254 + { 0, 0}, { STRING_WITH_LEN("Error") } // Last command number 255 }; @@ -490,7 +488,7 @@ void init_update_queries(void) memset(server_command_flags, 0, sizeof(server_command_flags)); server_command_flags[COM_STATISTICS]= CF_SKIP_QUERY_ID | CF_SKIP_QUESTIONS | CF_SKIP_WSREP_CHECK; - server_command_flags[COM_PING]= CF_SKIP_QUERY_ID | CF_SKIP_QUESTIONS | CF_SKIP_WSREP_CHECK | CF_NO_COM_MULTI; + server_command_flags[COM_PING]= CF_SKIP_QUERY_ID | CF_SKIP_QUESTIONS | CF_SKIP_WSREP_CHECK; server_command_flags[COM_QUIT]= CF_SKIP_WSREP_CHECK; server_command_flags[COM_PROCESS_INFO]= CF_SKIP_WSREP_CHECK; @@ -519,7 +517,6 @@ void init_update_queries(void) server_command_flags[COM_STMT_EXECUTE]= CF_SKIP_WSREP_CHECK; server_command_flags[COM_STMT_SEND_LONG_DATA]= CF_SKIP_WSREP_CHECK; server_command_flags[COM_REGISTER_SLAVE]= CF_SKIP_WSREP_CHECK; - server_command_flags[COM_MULTI]= CF_SKIP_WSREP_CHECK | CF_NO_COM_MULTI; /* Initialize the sql command flags array. */ memset(sql_command_flags, 0, sizeof(sql_command_flags)); @@ -958,7 +955,7 @@ void execute_init_command(THD *thd, LEX_STRING *init_command, save_vio= thd->net.vio; thd->net.vio= 0; thd->clear_error(1); - dispatch_command(COM_QUERY, thd, buf, (uint)len, FALSE, FALSE); + dispatch_command(COM_QUERY, thd, buf, (uint)len); thd->client_capabilities= save_client_capabilities; thd->net.vio= save_vio; @@ -1084,7 +1081,7 @@ int bootstrap(MYSQL_FILE *file) break; } - mysql_parse(thd, thd->query(), length, &parser_state, FALSE, FALSE); + mysql_parse(thd, thd->query(), length, &parser_state); bootstrap_error= thd->is_error(); thd->protocol->end_statement(); @@ -1132,23 +1129,6 @@ void cleanup_items(Item *item) DBUG_VOID_RETURN; } -static enum enum_server_command fetch_command(THD *thd, char *packet) -{ - enum enum_server_command - command= (enum enum_server_command) (uchar) packet[0]; - DBUG_ENTER("fetch_command"); - - if (command >= COM_END || - (command >= COM_MDB_GAP_BEG && command <= COM_MDB_GAP_END)) - command= COM_END; // Wrong command - - DBUG_PRINT("info",("Command on %s = %d (%s)", - vio_description(thd->net.vio), command, - command_name[command].str)); - DBUG_RETURN(command); -} - - #ifdef WITH_WSREP static bool wsrep_tables_accessible_when_detached(const TABLE_LIST *tables) { @@ -1170,28 +1150,73 @@ static bool wsrep_command_no_result(char command) } #endif /* WITH_WSREP */ #ifndef EMBEDDED_LIBRARY +static enum enum_server_command fetch_command(THD *thd, char *packet) +{ + enum enum_server_command + command= (enum enum_server_command) (uchar) packet[0]; + DBUG_ENTER("fetch_command"); + + if (command >= COM_END || + (command >= COM_MDB_GAP_BEG && command <= COM_MDB_GAP_END)) + command= COM_END; // Wrong command + + DBUG_PRINT("info",("Command on %s = %d (%s)", + vio_description(thd->net.vio), command, + command_name[command].str)); + DBUG_RETURN(command); +} /** Read one command from connection and execute it (query or simple command). - This function is called in loop from thread function. + This function is to be used by different schedulers (one-thread-per-connection, + pool-of-threads) For profiling to work, it must never be called recursively. + @param thd - client connection context + + @param blocking - wait for command to finish. + if false (nonblocking), then the function might + return when command is "half-finished", with + DISPATCH_COMMAND_WOULDBLOCK. + Currenly, this can *only* happen when using + threadpool. The command will resume, after all outstanding + async operations (i.e group commit) finish. + Threadpool scheduler takes care of "resume". + + @retval + DISPATCH_COMMAND_SUCCESS - success @retval - 0 success + DISPATCH_COMMAND_CLOSE_CONNECTION request of THD shutdown + (s. dispatch_command() description) @retval - 1 request of thread shutdown (see dispatch_command() description) + DISPATCH_COMMAND_WOULDBLOCK - need to wait for asyncronous operations + to finish. Only returned if parameter + 'blocking' is false. */ -bool do_command(THD *thd) +dispatch_command_return do_command(THD *thd, bool blocking) { - bool return_value; + dispatch_command_return return_value; char *packet= 0; ulong packet_length; NET *net= &thd->net; enum enum_server_command command; DBUG_ENTER("do_command"); + DBUG_ASSERT(!thd->async_state.pending_ops()); + if (thd->async_state.m_state == thd_async_state::enum_async_state::RESUMED) + { + /* + Resuming previously suspended command. + Restore the state + */ + command = thd->async_state.m_command; + packet = thd->async_state.m_packet.str; + packet_length = (ulong)thd->async_state.m_packet.length; + goto resume; + } + /* indicator of uninitialized lex => normal flow of errors handling (see my_message_sql) @@ -1258,12 +1283,12 @@ bool do_command(THD *thd) if (net->error != 3) { - return_value= TRUE; // We have to close it. + return_value= DISPATCH_COMMAND_CLOSE_CONNECTION; // We have to close it. goto out; } net->error= 0; - return_value= FALSE; + return_value= DISPATCH_COMMAND_SUCCESS; goto out; } @@ -1330,7 +1355,7 @@ bool do_command(THD *thd) MYSQL_END_STATEMENT(thd->m_statement_psi, thd->get_stmt_da()); thd->m_statement_psi= NULL; thd->m_digest= NULL; - return_value= FALSE; + return_value= DISPATCH_COMMAND_SUCCESS; wsrep_after_command_before_result(thd); goto out; @@ -1356,7 +1381,7 @@ bool do_command(THD *thd) thd->m_statement_psi= NULL; thd->m_digest= NULL; - return_value= FALSE; + return_value= DISPATCH_COMMAND_SUCCESS; wsrep_after_command_before_result(thd); goto out; } @@ -1367,8 +1392,18 @@ bool do_command(THD *thd) DBUG_ASSERT(packet_length); DBUG_ASSERT(!thd->apc_target.is_enabled()); + +resume: return_value= dispatch_command(command, thd, packet+1, - (uint) (packet_length-1), FALSE, FALSE); + (uint) (packet_length-1), blocking); + if (return_value == DISPATCH_COMMAND_WOULDBLOCK) + { + /* Save current state, and resume later.*/ + thd->async_state.m_command= command; + thd->async_state.m_packet={packet,packet_length}; + DBUG_RETURN(return_value); + } + DBUG_ASSERT(!thd->apc_target.is_enabled()); out: @@ -1479,45 +1514,6 @@ static void wsrep_copy_query(THD *thd) } #endif /* WITH_WSREP */ -/** - check COM_MULTI packet - - @param thd thread handle - @param packet pointer on the packet of commands - @param packet_length length of this packet - - @retval 0 - Error - @retval # - Number of commands in the batch -*/ - -uint maria_multi_check(THD *thd, char *packet, size_t packet_length) -{ - uint counter= 0; - DBUG_ENTER("maria_multi_check"); - while (packet_length) - { - char *packet_start= packet; - size_t subpacket_length= net_field_length((uchar **)&packet_start); - size_t length_length= packet_start - packet; - // length of command + 3 bytes where that length was stored - DBUG_PRINT("info", ("sub-packet length: %zu + %zu command: %x", - subpacket_length, length_length, - packet_start[3])); - - if (subpacket_length == 0 || - (subpacket_length + length_length) > packet_length) - { - my_message(ER_UNKNOWN_COM_ERROR, ER_THD(thd, ER_UNKNOWN_COM_ERROR), - MYF(0)); - DBUG_RETURN(0); - } - - counter++; - packet= packet_start + subpacket_length; - packet_length-= (subpacket_length + length_length); - } - DBUG_RETURN(counter); -} #if defined(WITH_ARIA_STORAGE_ENGINE) @@ -1554,8 +1550,13 @@ public: @param packet_length length of packet + 1 (to show that data is null-terminated) except for COM_SLEEP, where it can be zero. - @param is_com_multi recursive call from COM_MULTI - @param is_next_command there will be more command in the COM_MULTI batch + @param blocking if false (nonblocking), then the function might + return when command is "half-finished", with + DISPATCH_COMMAND_WOULDBLOCK. + Currenly, this can *only* happen when using threadpool. + The current command will resume, after all outstanding + async operations (i.e group commit) finish. + Threadpool scheduler takes care of "resume". @todo set thd->lex->sql_command to SQLCOM_END here. @@ -1568,9 +1569,8 @@ public: 1 request of thread shutdown, i. e. if command is COM_QUIT/COM_SHUTDOWN */ -bool dispatch_command(enum enum_server_command command, THD *thd, - char* packet, uint packet_length, bool is_com_multi, - bool is_next_command) +dispatch_command_return dispatch_command(enum enum_server_command command, THD *thd, + char* packet, uint packet_length, bool blocking) { NET *net= &thd->net; bool error= 0; @@ -1582,6 +1582,12 @@ bool dispatch_command(enum enum_server_command command, THD *thd, "<?>"))); bool drop_more_results= 0; + if (thd->async_state.m_state == thd_async_state::enum_async_state::RESUMED) + { + thd->async_state.m_state = thd_async_state::enum_async_state::NONE; + goto resume; + } + /* keep it withing 1 byte */ compile_time_assert(COM_END == 255); @@ -1651,14 +1657,6 @@ bool dispatch_command(enum enum_server_command command, THD *thd, beginning of each command. */ thd->server_status&= ~SERVER_STATUS_CLEAR_SET; - if (is_next_command) - { - drop_more_results= !MY_TEST(thd->server_status & - SERVER_MORE_RESULTS_EXISTS); - thd->server_status|= SERVER_MORE_RESULTS_EXISTS; - if (is_com_multi) - thd->get_stmt_da()->set_skip_flush(); - } if (unlikely(thd->security_ctx->password_expired && command != COM_QUERY && @@ -1873,8 +1871,7 @@ bool dispatch_command(enum enum_server_command command, THD *thd, if (WSREP(thd)) { if (wsrep_mysql_parse(thd, thd->query(), thd->query_length(), - &parser_state, - is_com_multi, is_next_command)) + &parser_state)) { WSREP_DEBUG("Deadlock error for: %s", thd->query()); mysql_mutex_lock(&thd->LOCK_thd_data); @@ -1886,8 +1883,7 @@ bool dispatch_command(enum enum_server_command command, THD *thd, } else #endif /* WITH_WSREP */ - mysql_parse(thd, thd->query(), thd->query_length(), &parser_state, - is_com_multi, is_next_command); + mysql_parse(thd, thd->query(), thd->query_length(), &parser_state); while (!thd->killed && (parser_state.m_lip.found_semicolon != NULL) && ! thd->is_error()) @@ -1971,8 +1967,7 @@ bool dispatch_command(enum enum_server_command command, THD *thd, if (WSREP(thd)) { if (wsrep_mysql_parse(thd, beginning_of_next_stmt, - length, &parser_state, - is_com_multi, is_next_command)) + length, &parser_state)) { WSREP_DEBUG("Deadlock error for: %s", thd->query()); mysql_mutex_lock(&thd->LOCK_thd_data); @@ -1985,8 +1980,7 @@ bool dispatch_command(enum enum_server_command command, THD *thd, } else #endif /* WITH_WSREP */ - mysql_parse(thd, beginning_of_next_stmt, length, &parser_state, - is_com_multi, is_next_command); + mysql_parse(thd, beginning_of_next_stmt, length, &parser_state); } @@ -2037,13 +2031,6 @@ bool dispatch_command(enum enum_server_command command, THD *thd, break; } packet= arg_end + 1; - // thd->reset_for_next_command reset state => restore it - if (is_next_command) - { - thd->server_status|= SERVER_MORE_RESULTS_EXISTS; - if (is_com_multi) - thd->get_stmt_da()->set_skip_flush(); - } lex_start(thd); /* Must be before we init the table list. */ @@ -2332,84 +2319,7 @@ bool dispatch_command(enum enum_server_command command, THD *thd, general_log_print(thd, command, NullS); my_eof(thd); break; - case COM_MULTI: - { - uint counter; - uint current_com= 0; - DBUG_ASSERT(!is_com_multi); - if (!(thd->client_capabilities & CLIENT_MULTI_RESULTS)) - { - /* The client does not support multiple result sets being sent back */ - my_error(ER_COMMULTI_BADCONTEXT, MYF(0)); - break; - } - - if (!(counter= maria_multi_check(thd, packet, packet_length))) - break; - - { - char *packet_start= packet; - /* We have to store next length because it will be destroyed by '\0' */ - size_t next_subpacket_length= net_field_length((uchar **)&packet_start); - size_t next_length_length= packet_start - packet; - unsigned char *readbuff= net->buff; - - if (net_allocate_new_packet(net, thd, MYF(0))) - break; - - PSI_statement_locker *save_locker= thd->m_statement_psi; - sql_digest_state *save_digest= thd->m_digest; - thd->m_statement_psi= NULL; - thd->m_digest= NULL; - - while (packet_length) - { - current_com++; - size_t subpacket_length= next_subpacket_length + next_length_length; - size_t length_length= next_length_length; - if (subpacket_length < packet_length) - { - packet_start= packet + subpacket_length; - next_subpacket_length= net_field_length((uchar**)&packet_start); - next_length_length= packet_start - (packet + subpacket_length); - } - /* safety like in do_command() */ - packet[subpacket_length]= '\0'; - - enum enum_server_command subcommand= - fetch_command(thd, (packet + length_length)); - if (server_command_flags[subcommand] & CF_NO_COM_MULTI) - { - my_error(ER_BAD_COMMAND_IN_MULTI, MYF(0), - command_name[subcommand].str); - goto com_multi_end; - } - - if (dispatch_command(subcommand, thd, packet + (1 + length_length), - (uint)(subpacket_length - (1 + length_length)), TRUE, - (current_com != counter))) - { - DBUG_ASSERT(thd->is_error()); - goto com_multi_end; - } - - DBUG_ASSERT(subpacket_length <= packet_length); - packet+= subpacket_length; - packet_length-= (uint)subpacket_length; - } - -com_multi_end: - thd->m_statement_psi= save_locker; - thd->m_digest= save_digest; - - /* release old buffer */ - net_flush(net); - DBUG_ASSERT(net->buff == net->write_pos); // nothing to send - my_free(readbuff); - } - break; - } case COM_SLEEP: case COM_CONNECT: // Impossible here case COM_TIME: // Impossible from client @@ -2423,7 +2333,18 @@ com_multi_end: } dispatch_end: - do_end_of_statement= true; + /* + For the threadpool i.e if non-blocking call, if not all async operations + are finished, return without cleanup. The cleanup will be done on + later, when command execution is resumed. + */ + if (!blocking && !error && thd->async_state.pending_ops()) + { + DBUG_RETURN(DISPATCH_COMMAND_WOULDBLOCK); + } + +resume: + #ifdef WITH_WSREP /* Next test should really be WSREP(thd), but that causes a failure when doing @@ -2470,11 +2391,8 @@ dispatch_end: thd_proc_info(thd, "Updating status"); /* Finalize server status flags after executing a command. */ thd->update_server_status(); - if (command != COM_MULTI) - { - thd->protocol->end_statement(); - query_cache_end_of_result(thd); - } + thd->protocol->end_statement(); + query_cache_end_of_result(thd); } if (drop_more_results) thd->server_status&= ~SERVER_MORE_RESULTS_EXISTS; @@ -2502,8 +2420,7 @@ dispatch_end: thd->m_statement_psi= NULL; thd->m_digest= NULL; - if (!is_com_multi) - thd->packet.shrink(thd->variables.net_buffer_length); // Reclaim some memory + thd->packet.shrink(thd->variables.net_buffer_length); // Reclaim some memory thd->reset_kill_query(); /* Ensure that killed_errmsg is released */ /* @@ -2531,7 +2448,7 @@ dispatch_end: /* Check that some variables are reset properly */ DBUG_ASSERT(thd->abort_on_warning == 0); thd->lex->restore_set_statement_var(); - DBUG_RETURN(error); + DBUG_RETURN(error?DISPATCH_COMMAND_CLOSE_CONNECTION: DISPATCH_COMMAND_SUCCESS); } static bool slow_filter_masked(THD *thd, ulonglong mask) @@ -3856,6 +3773,11 @@ mysql_execute_command(THD *thd) thd->set_query_timer(); #ifdef WITH_WSREP + /* Check wsrep_mode rules before command execution. */ + if (WSREP(thd) && + wsrep_thd_is_local(thd) && !wsrep_check_mode_before_cmd_execute(thd)) + goto error; + /* Always start a new transaction for a wsrep THD unless the current command is DDL or explicit BEGIN. This will guarantee that @@ -7852,9 +7774,7 @@ static void wsrep_prepare_for_autocommit_retry(THD* thd, } static bool wsrep_mysql_parse(THD *thd, char *rawbuf, uint length, - Parser_state *parser_state, - bool is_com_multi, - bool is_next_command) + Parser_state *parser_state) { bool is_autocommit= !thd->in_multi_stmt_transaction_mode() && @@ -7863,7 +7783,7 @@ static bool wsrep_mysql_parse(THD *thd, char *rawbuf, uint length, do { retry_autocommit= false; - mysql_parse(thd, rawbuf, length, parser_state, is_com_multi, is_next_command); + mysql_parse(thd, rawbuf, length, parser_state); /* Convert all ER_QUERY_INTERRUPTED errors to ER_LOCK_DEADLOCK @@ -7969,15 +7889,10 @@ static bool wsrep_mysql_parse(THD *thd, char *rawbuf, uint length, @param thd Current thread @param rawbuf Begining of the query text @param length Length of the query text - @param[out] found_semicolon For multi queries, position of the character of - the next query in the query text. - @param is_next_command there will be more command in the COM_MULTI batch */ void mysql_parse(THD *thd, char *rawbuf, uint length, - Parser_state *parser_state, - bool is_com_multi, - bool is_next_command) + Parser_state *parser_state) { DBUG_ENTER("mysql_parse"); DBUG_EXECUTE_IF("parser_debug", turn_parser_debug_on_MYSQLparse();); @@ -8001,12 +7916,6 @@ void mysql_parse(THD *thd, char *rawbuf, uint length, */ lex_start(thd); thd->reset_for_next_command(); - if (is_next_command) - { - thd->server_status|= SERVER_MORE_RESULTS_EXISTS; - if (is_com_multi) - thd->get_stmt_da()->set_skip_flush(); - } if (query_cache_send_result_to_client(thd, rawbuf, length) <= 0) { diff --git a/sql/sql_parse.h b/sql/sql_parse.h index 0e5cc7f4bad..44fd2185726 100644 --- a/sql/sql_parse.h +++ b/sql/sql_parse.h @@ -91,8 +91,7 @@ bool is_log_table_write_query(enum enum_sql_command command); bool alloc_query(THD *thd, const char *packet, size_t packet_length); void mysql_init_select(LEX *lex); void mysql_parse(THD *thd, char *rawbuf, uint length, - Parser_state *parser_state, bool is_com_multi, - bool is_next_command); + Parser_state *parser_state); bool mysql_new_select(LEX *lex, bool move_down, SELECT_LEX *sel); void create_select_for_variable(THD *thd, LEX_CSTRING *var_name); void create_table_set_open_action_and_adjust_tables(LEX *lex); @@ -101,10 +100,16 @@ bool multi_delete_set_locks_and_link_aux_tables(LEX *lex); void create_table_set_open_action_and_adjust_tables(LEX *lex); int bootstrap(MYSQL_FILE *file); int mysql_execute_command(THD *thd); -bool do_command(THD *thd); -bool dispatch_command(enum enum_server_command command, THD *thd, - char* packet, uint packet_length, - bool is_com_multi, bool is_next_command); +enum dispatch_command_return +{ + DISPATCH_COMMAND_SUCCESS=0, + DISPATCH_COMMAND_CLOSE_CONNECTION= 1, + DISPATCH_COMMAND_WOULDBLOCK= 2 +}; + +dispatch_command_return do_command(THD *thd, bool blocking = true); +dispatch_command_return dispatch_command(enum enum_server_command command, THD *thd, + char* packet, uint packet_length, bool blocking = true); void log_slow_statement(THD *thd); bool append_file_to_dir(THD *thd, const char **filename_ptr, const LEX_CSTRING *table_name); diff --git a/sql/sql_prepare.cc b/sql/sql_prepare.cc index 21c8f739dd2..f0bb99da2fc 100644 --- a/sql/sql_prepare.cc +++ b/sql/sql_prepare.cc @@ -334,9 +334,13 @@ static bool send_prep_stmt(Prepared_statement *stmt, uint columns) error= my_net_write(net, buff, sizeof(buff)); if (stmt->param_count && likely(!error)) { - error= thd->protocol_text.send_result_set_metadata((List<Item> *) - &stmt->lex->param_list, - Protocol::SEND_EOF); + /* + Force the column info to be written + (in this case PS parameter type info). + */ + error= thd->protocol_text.send_result_set_metadata( + (List<Item> *)&stmt->lex->param_list, + Protocol::SEND_EOF | Protocol::SEND_FORCE_COLUMN_INFO); } if (likely(!error)) @@ -3454,10 +3458,15 @@ static void mysql_stmt_execute_common(THD *thd, thd->protocol= &thd->protocol_binary; MYSQL_EXECUTE_PS(thd->m_statement_psi, stmt->m_prepared_stmt); + auto save_cur_stmt= thd->cur_stmt; + thd->cur_stmt= stmt; + if (!bulk_op) stmt->execute_loop(&expanded_query, open_cursor, packet, packet_end); else stmt->execute_bulk_loop(&expanded_query, open_cursor, packet, packet_end); + + thd->cur_stmt= save_cur_stmt; thd->protocol= save_protocol; sp_cache_enforce_limit(thd->sp_proc_cache, stored_program_cache_size); @@ -4216,6 +4225,8 @@ bool Prepared_statement::prepare(const char *packet, uint packet_len) old_stmt_arena= thd->stmt_arena; thd->stmt_arena= this; + auto save_cur_stmt= thd->cur_stmt; + thd->cur_stmt= this; Parser_state parser_state; if (parser_state.init(thd, thd->query(), thd->query_length())) @@ -4223,6 +4234,7 @@ bool Prepared_statement::prepare(const char *packet, uint packet_len) thd->restore_backup_statement(this, &stmt_backup); thd->restore_active_arena(this, &stmt_backup); thd->stmt_arena= old_stmt_arena; + thd->cur_stmt = save_cur_stmt; DBUG_RETURN(TRUE); } @@ -4232,6 +4244,7 @@ bool Prepared_statement::prepare(const char *packet, uint packet_len) lex_start(thd); lex->context_analysis_only|= CONTEXT_ANALYSIS_ONLY_PREPARE; + error= (parse_sql(thd, & parser_state, NULL) || thd->is_error() || init_param_array(this)); @@ -4307,6 +4320,7 @@ bool Prepared_statement::prepare(const char *packet, uint packet_len) cleanup_stmt(); thd->restore_backup_statement(this, &stmt_backup); thd->stmt_arena= old_stmt_arena; + thd->cur_stmt= save_cur_stmt; if (likely(error == 0)) { @@ -4765,6 +4779,7 @@ Prepared_statement::reprepare() it's failed, we need to return all the warnings to the user. */ thd->get_stmt_da()->clear_warning_info(thd->query_id); + column_info_state.reset(); } else { @@ -5407,7 +5422,7 @@ protected: CHARSET_INFO *fromcs, CHARSET_INFO *tocs); bool net_send_eof(THD *thd, uint server_status, uint statement_warn_count); bool net_send_ok(THD *, uint, uint, ulonglong, ulonglong, const char *, - bool, bool); + bool); bool net_send_error_packet(THD *, uint, const char *, const char *); bool begin_dataset(); bool begin_dataset(THD *thd, uint numfields); @@ -5570,7 +5585,7 @@ bool Protocol_local::net_store_data_cs(const uchar *from, size_t length, bool Protocol_local::net_send_ok(THD *thd, uint server_status, uint statement_warn_count, - ulonglong affected_rows, ulonglong id, const char *message, bool, bool) + ulonglong affected_rows, ulonglong id, const char *message, bool) { DBUG_ENTER("emb_net_send_ok"); MYSQL_DATA *data; diff --git a/sql/sql_rename.cc b/sql/sql_rename.cc index 77a1e46a75a..b7aed97a8a2 100644 --- a/sql/sql_rename.cc +++ b/sql/sql_rename.cc @@ -325,7 +325,7 @@ do_rename(THD *thd, TABLE_LIST *ren_table, const LEX_CSTRING *new_db, #ifdef WITH_WSREP if (WSREP(thd) && hton && hton != view_pseudo_hton && - !wsrep_should_replicate_ddl(thd, hton->db_type)) + !wsrep_should_replicate_ddl(thd, hton)) DBUG_RETURN(1); #endif diff --git a/sql/sql_select.cc b/sql/sql_select.cc index 6029b92e0d9..670b0ba79e9 100644 --- a/sql/sql_select.cc +++ b/sql/sql_select.cc @@ -3873,6 +3873,16 @@ JOIN::add_sorting_to_table(JOIN_TAB *tab, ORDER *order) tab->select); if (!tab->filesort) return true; + + TABLE *table= tab->table; + if ((tab == join_tab + const_tables) && + table->pos_in_table_list && + table->pos_in_table_list->is_sjm_scan_table()) + { + tab->filesort->set_all_read_bits= TRUE; + tab->filesort->unpack= unpack_to_base_table_fields; + } + /* Select was moved to filesort->select to force join_init_read_record to use sorted result instead of reading table through select. @@ -7977,7 +7987,8 @@ best_access_path(JOIN *join, access is to use the same index IDX, with the same or more key parts. (note: it is not clear how this rule is/should be extended to index_merge quick selects). Also if we have a hash join we prefer that - over a table scan + over a table scan. This heuristic doesn't apply if the quick select + uses the group-by min-max optimization. (3) See above note about InnoDB. (4) NOT ("FORCE INDEX(...)" is used for table and there is 'ref' access path, but there is no quick select) @@ -7995,7 +8006,9 @@ best_access_path(JOIN *join, Json_writer_object trace_access_scan(thd); if ((records >= s->found_records || best > s->read_time) && // (1) !(best_key && best_key->key == MAX_KEY) && // (2) - !(s->quick && best_key && s->quick->index == best_key->key && // (2) + !(s->quick && + s->quick->get_type() != QUICK_SELECT_I::QS_TYPE_GROUP_MIN_MAX && // (2) + best_key && s->quick->index == best_key->key && // (2) best_max_key_part >= s->table->opt_range[best_key->key].key_parts) &&// (2) !((s->table->file->ha_table_flags() & HA_TABLE_SCAN_ON_INDEX) && // (3) ! s->table->covering_keys.is_clear_all() && best_key && !s->quick) &&// (3) @@ -14296,37 +14309,8 @@ remove_const(JOIN *join,ORDER *first_order, COND *cond, can be used without tmp. table. */ bool can_subst_to_first_table= false; - bool first_is_in_sjm_nest= false; - if (first_is_base_table) - { - TABLE_LIST *tbl_for_first= - join->join_tab[join->const_tables].table->pos_in_table_list; - first_is_in_sjm_nest= tbl_for_first->sj_mat_info && - tbl_for_first->sj_mat_info->is_used; - } - /* - Currently we do not employ the optimization that uses multiple - equalities for ORDER BY to remove tmp table in the case when - the first table happens to be the result of materialization of - a semi-join nest ( <=> first_is_in_sjm_nest == true). - - When a semi-join nest is materialized and scanned to look for - possible matches in the remaining tables for every its row - the fields from the result of materialization are copied - into the record buffers of tables from the semi-join nest. - So these copies are used to access the remaining tables rather - than the fields from the result of materialization. - - Unfortunately now this so-called 'copy back' technique is - supported only if the rows are scanned with the rr_sequential - function, but not with other rr_* functions that are employed - when the result of materialization is required to be sorted. - - TODO: either to support 'copy back' technique for the above case, - or to get rid of this technique altogether. - */ if (optimizer_flag(join->thd, OPTIMIZER_SWITCH_ORDERBY_EQ_PROP) && - first_is_base_table && !first_is_in_sjm_nest && + first_is_base_table && order->item[0]->real_item()->type() == Item::FIELD_ITEM && join->cond_equal) { @@ -20291,19 +20275,6 @@ do_select(JOIN *join, Procedure *procedure) } -int rr_sequential_and_unpack(READ_RECORD *info) -{ - int error; - if (unlikely((error= rr_sequential(info)))) - return error; - - for (Copy_field *cp= info->copy_field; cp != info->copy_field_end; cp++) - (*cp->do_copy)(cp); - - return error; -} - - /** @brief Instantiates temporary table @@ -21599,6 +21570,8 @@ bool test_if_use_dynamic_range_scan(JOIN_TAB *join_tab) int join_init_read_record(JOIN_TAB *tab) { + bool need_unpacking= FALSE; + JOIN *join= tab->join; /* Note: the query plan tree for the below operations is constructed in save_agg_explain_data. @@ -21606,6 +21579,12 @@ int join_init_read_record(JOIN_TAB *tab) if (tab->distinct && tab->remove_duplicates()) // Remove duplicates. return 1; + if (join->top_join_tab_count != join->const_tables) + { + TABLE_LIST *tbl= tab->table->pos_in_table_list; + need_unpacking= tbl ? tbl->is_sjm_scan_table() : FALSE; + } + tab->build_range_rowid_filter_if_needed(); if (tab->filesort && tab->sort_table()) // Sort table. @@ -21613,6 +21592,11 @@ int join_init_read_record(JOIN_TAB *tab) DBUG_EXECUTE_IF("kill_join_init_read_record", tab->join->thd->set_killed(KILL_QUERY);); + + + if (!tab->preread_init_done && tab->preread_init()) + return 1; + if (tab->select && tab->select->quick && tab->select->quick->reset()) { /* Ensures error status is propagated back to client */ @@ -21623,19 +21607,7 @@ int join_init_read_record(JOIN_TAB *tab) /* make sure we won't get ER_QUERY_INTERRUPTED from any code below */ DBUG_EXECUTE_IF("kill_join_init_read_record", tab->join->thd->reset_killed();); - if (!tab->preread_init_done && tab->preread_init()) - return 1; - - if (init_read_record(&tab->read_record, tab->join->thd, tab->table, - tab->select, tab->filesort_result, 1,1, FALSE)) - return 1; - return tab->read_record.read_record(); -} - -int -join_read_record_no_init(JOIN_TAB *tab) -{ Copy_field *save_copy, *save_copy_end; /* @@ -21645,12 +21617,19 @@ join_read_record_no_init(JOIN_TAB *tab) save_copy= tab->read_record.copy_field; save_copy_end= tab->read_record.copy_field_end; - init_read_record(&tab->read_record, tab->join->thd, tab->table, - tab->select, tab->filesort_result, 1, 1, FALSE); + if (init_read_record(&tab->read_record, tab->join->thd, tab->table, + tab->select, tab->filesort_result, 1, 1, FALSE)) + return 1; tab->read_record.copy_field= save_copy; tab->read_record.copy_field_end= save_copy_end; - tab->read_record.read_record_func= rr_sequential_and_unpack; + + if (need_unpacking) + { + tab->read_record.read_record_func_and_unpack_calls= + tab->read_record.read_record_func; + tab->read_record.read_record_func = read_record_func_for_rr_and_unpack; + } return tab->read_record.read_record(); } @@ -29488,6 +29467,20 @@ void JOIN::init_join_cache_and_keyread() } +/* + @brief + Unpack temp table fields to base table fields. +*/ + +void unpack_to_base_table_fields(TABLE *table) +{ + JOIN_TAB *tab= table->reginfo.join_tab; + for (Copy_field *cp= tab->read_record.copy_field; + cp != tab->read_record.copy_field_end; cp++) + (*cp->do_copy)(cp); +} + + /** @} (end of group Query_Optimizer) */ diff --git a/sql/sql_select.h b/sql/sql_select.h index 9574c93cd7d..ff9264d8bf7 100644 --- a/sql/sql_select.h +++ b/sql/sql_select.h @@ -223,7 +223,7 @@ typedef enum_nested_loop_state (*Next_select_func)(JOIN *, struct st_join_table *, bool); Next_select_func setup_end_select_func(JOIN *join, JOIN_TAB *tab); int rr_sequential(READ_RECORD *info); -int rr_sequential_and_unpack(READ_RECORD *info); +int read_record_func_for_rr_and_unpack(READ_RECORD *info); Item *remove_pushed_top_conjuncts(THD *thd, Item *cond); Item *and_new_conditions_to_optimized_cond(THD *thd, Item *cond, COND_EQUAL **cond_eq, @@ -2353,7 +2353,6 @@ create_virtual_tmp_table(THD *thd, Field *field) int test_if_item_cache_changed(List<Cached_item> &list); int join_init_read_record(JOIN_TAB *tab); -int join_read_record_no_init(JOIN_TAB *tab); void set_position(JOIN *join,uint idx,JOIN_TAB *table,KEYUSE *key); inline Item * and_items(THD *thd, Item* cond, Item *item) { @@ -2411,6 +2410,7 @@ int print_explain_message_line(select_result_sink *result, void explain_append_mrr_info(QUICK_RANGE_SELECT *quick, String *res); int append_possible_keys(MEM_ROOT *alloc, String_list &list, TABLE *table, key_map possible_keys); +void unpack_to_base_table_fields(TABLE *table); /**************************************************************************** Temporary table support for SQL Runtime diff --git a/sql/sql_sort.h b/sql/sql_sort.h index a474d7c25e9..3b23328183c 100644 --- a/sql/sql_sort.h +++ b/sql/sql_sort.h @@ -559,6 +559,7 @@ public: Addon_fields *addon_fields; // Descriptors for companion fields. Sort_keys *sort_keys; bool using_pq; + bool set_all_read_bits; uchar *unique_buff; bool not_killable; @@ -579,6 +580,8 @@ public: } void init_for_filesort(uint sortlen, TABLE *table, ha_rows maxrows, bool sort_positions); + + void (*unpack)(TABLE *); /// Enables the packing of addons if possible. void try_to_pack_addons(ulong max_length_for_sort_data); diff --git a/sql/sql_table.cc b/sql/sql_table.cc index 0824672df4a..c9e21e36b45 100644 --- a/sql/sql_table.cc +++ b/sql/sql_table.cc @@ -2473,7 +2473,7 @@ int mysql_rm_table_no_locks(THD *thd, TABLE_LIST *tables, bool if_exists, } else { - if (WSREP(thd) && hton && !wsrep_should_replicate_ddl(thd, hton->db_type)) + if (WSREP(thd) && hton && !wsrep_should_replicate_ddl(thd, hton)) { error= 1; goto err; @@ -9995,7 +9995,7 @@ bool mysql_alter_table(THD *thd, const LEX_CSTRING *new_db, (thd->lex->sql_command == SQLCOM_ALTER_TABLE || thd->lex->sql_command == SQLCOM_CREATE_INDEX || thd->lex->sql_command == SQLCOM_DROP_INDEX) && - !wsrep_should_replicate_ddl(thd, table_list->table->s->db_type()->db_type)) + !wsrep_should_replicate_ddl(thd, table_list->table->s->db_type())) DBUG_RETURN(true); #endif @@ -11527,6 +11527,9 @@ copy_data_between_tables(THD *thd, TABLE *from, TABLE *to, } else { + /* In case of alter ignore, notify the engine about it. */ + if (ignore) + to->file->extra(HA_EXTRA_IGNORE_INSERT); DEBUG_SYNC(thd, "copy_data_between_tables_before"); found_count++; mysql_stage_set_work_completed(thd->m_stage_progress_psi, found_count); diff --git a/sql/sql_trigger.cc b/sql/sql_trigger.cc index 9417ec667ff..f93fe196728 100644 --- a/sql/sql_trigger.cc +++ b/sql/sql_trigger.cc @@ -551,7 +551,7 @@ bool mysql_create_or_drop_trigger(THD *thd, TABLE_LIST *tables, bool create) #ifdef WITH_WSREP if (WSREP(thd) && - !wsrep_should_replicate_ddl(thd, table->s->db_type()->db_type)) + !wsrep_should_replicate_ddl(thd, table->s->db_type())) goto wsrep_error_label; #endif diff --git a/sql/sql_truncate.cc b/sql/sql_truncate.cc index 5e89c4d19ee..68b3570db9e 100644 --- a/sql/sql_truncate.cc +++ b/sql/sql_truncate.cc @@ -305,7 +305,7 @@ bool Sql_cmd_truncate_table::lock_table(THD *thd, TABLE_LIST *table_ref, hton= table->file->ht; #ifdef WITH_WSREP if (WSREP(thd) && - !wsrep_should_replicate_ddl(thd, hton->db_type)) + !wsrep_should_replicate_ddl(thd, hton)) DBUG_RETURN(TRUE); #endif @@ -329,7 +329,7 @@ bool Sql_cmd_truncate_table::lock_table(THD *thd, TABLE_LIST *table_ref, #ifdef WITH_WSREP if (WSREP(thd) && hton != view_pseudo_hton && - !wsrep_should_replicate_ddl(thd, hton->db_type)) + !wsrep_should_replicate_ddl(thd, hton)) { tdc_release_share(share); DBUG_RETURN(TRUE); diff --git a/sql/sql_yacc.yy b/sql/sql_yacc.yy index 1a045c5416f..69d2aa9f470 100644 --- a/sql/sql_yacc.yy +++ b/sql/sql_yacc.yy @@ -11754,6 +11754,16 @@ table_primary_derived: if (!($$= Lex->parsed_derived_table($1->master_unit(), $2, $3))) MYSQL_YYABORT; } +/* Start SQL_MODE_ORACLE_SPECIFIC + | subquery + opt_for_system_time_clause + { + LEX_CSTRING alias; + if ($1->make_unique_derived_name(thd, &alias) || + !($$= Lex->parsed_derived_table($1->master_unit(), $2, &alias))) + MYSQL_YYABORT; + } +End SQL_MODE_ORACLE_SPECIFIC */ ; opt_outer: diff --git a/sql/sys_vars.cc b/sql/sys_vars.cc index b1e45c61d10..0768d4f89c4 100644 --- a/sql/sys_vars.cc +++ b/sql/sys_vars.cc @@ -5926,6 +5926,22 @@ static Sys_var_uint Sys_wsrep_sync_wait( NO_MUTEX_GUARD, NOT_IN_BINLOG, ON_CHECK(0), ON_UPDATE(wsrep_sync_wait_update)); +static const char *wsrep_mode_names[]= +{ + "STRICT_REPLICATION", + "BINLOG_ROW_FORMAT_ONLY", + "REQUIRED_PRIMARY_KEY", + NullS +}; +static Sys_var_set Sys_wsrep_mode( + "wsrep_mode", + "Set of WSREP features that are enabled.", + GLOBAL_VAR(wsrep_mode), CMD_LINE(REQUIRED_ARG), + wsrep_mode_names, + DEFAULT(0), + NO_MUTEX_GUARD, NOT_IN_BINLOG, + ON_CHECK(wsrep_mode_check)); + static const char *wsrep_OSU_method_names[]= { "TOI", "RSU", NullS }; static Sys_var_enum Sys_wsrep_OSU_method( "wsrep_OSU_method", "Method for Online Schema Upgrade", @@ -5942,12 +5958,14 @@ static Sys_var_mybool Sys_wsrep_desync ( ON_UPDATE(wsrep_desync_update)); static Sys_var_mybool Sys_wsrep_strict_ddl ( - "wsrep_strict_ddl", "If set, reject DDL on affected tables not supporting Galera replication", + "wsrep_strict_ddl", + "If set, reject DDL on affected tables not supporting Galera replication", GLOBAL_VAR(wsrep_strict_ddl), CMD_LINE(OPT_ARG), DEFAULT(FALSE), NO_MUTEX_GUARD, NOT_IN_BINLOG, ON_CHECK(0), - ON_UPDATE(0)); + ON_UPDATE(wsrep_strict_ddl_update), + DEPRECATED("'@@wsrep_mode=STRICT_REPLICATION'")); // since 10.6.0 static const char *wsrep_reject_queries_names[]= { "NONE", "ALL", "ALL_KILL", NullS }; static Sys_var_enum Sys_wsrep_reject_queries( diff --git a/sql/table.h b/sql/table.h index 53f385be7fd..758d2c3d949 100644 --- a/sql/table.h +++ b/sql/table.h @@ -1137,7 +1137,7 @@ struct TABLE_SHARE bool write_frm_image(const uchar *frm_image, size_t frm_length); bool write_par_image(const uchar *par_image, size_t par_length); - /* Only used by tokudb */ + /* Only used by S3 */ bool write_frm_image(void) { return frm_image ? write_frm_image(frm_image->str, frm_image->length) : 0; } @@ -2781,6 +2781,7 @@ struct TABLE_LIST */ const char *get_table_name() const { return view != NULL ? view_name.str : table_name.str; } bool is_active_sjm(); + bool is_sjm_scan_table(); bool is_jtbm() { return MY_TEST(jtbm_subselect != NULL); } st_select_lex_unit *get_unit(); st_select_lex *get_single_select(); diff --git a/sql/threadpool.h b/sql/threadpool.h index 27da872c5cc..7737d056b4a 100644 --- a/sql/threadpool.h +++ b/sql/threadpool.h @@ -37,6 +37,8 @@ extern uint threadpool_mode; /* Thread pool implementation , windows or generic #define DEFAULT_THREADPOOL_STALL_LIMIT 500U struct TP_connection; +struct st_vio; + extern void tp_callback(TP_connection *c); extern void tp_timeout_handler(TP_connection *c); @@ -113,7 +115,7 @@ struct TP_connection virtual void wait_begin(int type)= 0; virtual void wait_end() = 0; - + IF_WIN(virtual,) void init_vio(st_vio *){}; }; @@ -131,9 +133,11 @@ struct TP_pool virtual int set_stall_limit(uint){ return 0; } virtual int get_thread_count() { return tp_stats.num_worker_threads; } virtual int get_idle_thread_count(){ return 0; } + virtual void resume(TP_connection* c)=0; }; #ifdef _WIN32 + struct TP_pool_win:TP_pool { TP_pool_win(); @@ -143,6 +147,7 @@ struct TP_pool_win:TP_pool virtual void add(TP_connection *); virtual int set_max_threads(uint); virtual int set_min_threads(uint); + void resume(TP_connection *c); }; #endif @@ -156,6 +161,7 @@ struct TP_pool_generic :TP_pool virtual int set_pool_size(uint); virtual int set_stall_limit(uint); virtual int get_idle_thread_count(); + void resume(TP_connection* c); }; #endif /* HAVE_POOL_OF_THREADS */ diff --git a/sql/threadpool_common.cc b/sql/threadpool_common.cc index e8eb0dcc29d..07555ac21ed 100644 --- a/sql/threadpool_common.cc +++ b/sql/threadpool_common.cc @@ -23,11 +23,17 @@ #include <sql_audit.h> #include <debug_sync.h> #include <threadpool.h> +#include <sql_class.h> +#include <sql_parse.h> #ifdef WITH_WSREP #include "wsrep_trans_observer.h" #endif /* WITH_WSREP */ +#ifdef _WIN32 +#include "threadpool_winsockets.h" +#endif + /* Threadpool parameters */ uint threadpool_min_threads; @@ -47,8 +53,8 @@ TP_STATISTICS tp_stats; static void threadpool_remove_connection(THD *thd); -static int threadpool_process_request(THD *thd); -static THD* threadpool_add_connection(CONNECT *connect, void *scheduler_data); +static dispatch_command_return threadpool_process_request(THD *thd); +static THD* threadpool_add_connection(CONNECT *connect, TP_connection *c); extern bool do_command(THD*); @@ -191,10 +197,30 @@ void tp_callback(TP_connection *c) } c->connect= 0; } - else if (threadpool_process_request(thd)) + else { - /* QUIT or an error occurred. */ - goto error; +retry: + switch(threadpool_process_request(thd)) + { + case DISPATCH_COMMAND_WOULDBLOCK: + if (!thd->async_state.try_suspend()) + { + /* + All async operations finished meanwhile, thus nobody is will wake up + this THD. Therefore, we'll resume "manually" here. + */ + thd->async_state.m_state = thd_async_state::enum_async_state::RESUMED; + goto retry; + } + worker_context.restore(); + return; + case DISPATCH_COMMAND_CLOSE_CONNECTION: + /* QUIT or an error occurred. */ + goto error; + case DISPATCH_COMMAND_SUCCESS: + break; + } + thd->async_state.m_state= thd_async_state::enum_async_state::NONE; } /* Set priority */ @@ -220,7 +246,7 @@ error: } -static THD* threadpool_add_connection(CONNECT *connect, void *scheduler_data) +static THD *threadpool_add_connection(CONNECT *connect, TP_connection *c) { THD *thd= NULL; @@ -243,11 +269,10 @@ static THD* threadpool_add_connection(CONNECT *connect, void *scheduler_data) } delete connect; - thd->event_scheduler.data = scheduler_data; + thd->event_scheduler.data= c; server_threads.insert(thd); thd->set_mysys_var(mysys_var); - /* Login. */ thread_attach(thd); re_init_net_server_extension(thd); @@ -261,6 +286,8 @@ static THD* threadpool_add_connection(CONNECT *connect, void *scheduler_data) if (thd_prepare_connection(thd)) goto end; + c->init_vio(thd->net.vio); + /* Check if THD is ok, as prepare_new_connection_state() can fail, for example if init command failed. @@ -326,10 +353,13 @@ static bool has_unread_data(THD* thd) /** Process a single client request or a single batch. */ -static int threadpool_process_request(THD *thd) +static dispatch_command_return threadpool_process_request(THD *thd) { - int retval= 0; + dispatch_command_return retval= DISPATCH_COMMAND_SUCCESS; + thread_attach(thd); + if(thd->async_state.m_state == thd_async_state::enum_async_state::RESUMED) + goto resume; if (thd->killed >= KILL_CONNECTION) { @@ -337,7 +367,7 @@ static int threadpool_process_request(THD *thd) killed flag was set by timeout handler or KILL command. Return error. */ - retval= 1; + retval= DISPATCH_COMMAND_CLOSE_CONNECTION; if(thd->killed == KILL_WAIT_TIMEOUT) handle_wait_timeout(thd); goto end; @@ -360,19 +390,27 @@ static int threadpool_process_request(THD *thd) if (mysql_audit_release_required(thd)) mysql_audit_release(thd); - if ((retval= do_command(thd)) != 0) - goto end; +resume: + retval= do_command(thd, false); + switch(retval) + { + case DISPATCH_COMMAND_WOULDBLOCK: + case DISPATCH_COMMAND_CLOSE_CONNECTION: + goto end; + case DISPATCH_COMMAND_SUCCESS: + break; + } if (!thd_is_connection_alive(thd)) { - retval= 1; + retval=DISPATCH_COMMAND_CLOSE_CONNECTION; goto end; } set_thd_idle(thd); if (!has_unread_data(thd)) - { + { /* More info on this debug sync is in sql_parse.cc*/ DEBUG_SYNC(thd, "before_do_command_net_read"); goto end; @@ -405,6 +443,9 @@ static bool tp_init() pool= 0; return true; } +#ifdef _WIN32 + init_win_aio_buffers(max_connections); +#endif return false; } @@ -506,6 +547,9 @@ static void tp_wait_end(THD *thd) static void tp_end() { delete pool; +#ifdef _WIN32 + destroy_win_aio_buffers(); +#endif } static void tp_post_kill_notification(THD *thd) @@ -516,6 +560,15 @@ static void tp_post_kill_notification(THD *thd) post_kill_notification(thd); } +/* Resume previously suspended THD */ +static void tp_resume(THD* thd) +{ + DBUG_ASSERT(thd->async_state.m_state == thd_async_state::enum_async_state::SUSPENDED); + thd->async_state.m_state = thd_async_state::enum_async_state::RESUMED; + TP_connection* c = get_TP_connection(thd); + pool->resume(c); +} + static scheduler_functions tp_scheduler_functions= { 0, // max_threads @@ -526,7 +579,8 @@ static scheduler_functions tp_scheduler_functions= tp_wait_begin, // thd_wait_begin tp_wait_end, // thd_wait_end tp_post_kill_notification, // post kill notification - tp_end // end + tp_end, // end + tp_resume }; void pool_of_threads_scheduler(struct scheduler_functions *func, diff --git a/sql/threadpool_generic.cc b/sql/threadpool_generic.cc index b6bb47e8f29..19193be0354 100644 --- a/sql/threadpool_generic.cc +++ b/sql/threadpool_generic.cc @@ -29,8 +29,8 @@ #include <sql_plist.h> #include <threadpool.h> #include <algorithm> - -#ifdef HAVE_IOCP +#ifdef _WIN32 +#include "threadpool_winsockets.h" #define OPTIONAL_IO_POLL_READ_PARAM this #else #define OPTIONAL_IO_POLL_READ_PARAM 0 @@ -347,7 +347,7 @@ static void* native_event_get_userdata(native_event *event) return event->portev_user; } -#elif defined(HAVE_IOCP) +#elif defined(_WIN32) static TP_file_handle io_poll_create() @@ -358,29 +358,8 @@ static TP_file_handle io_poll_create() int io_poll_start_read(TP_file_handle pollfd, TP_file_handle fd, void *, void *opt) { - static char c; - TP_connection_generic *con= (TP_connection_generic *)opt; - OVERLAPPED *overlapped= &con->overlapped; - if (con->vio_type == VIO_TYPE_NAMEDPIPE) - { - if (ReadFile(fd, &c, 0, NULL, overlapped)) - return 0; - } - else - { - WSABUF buf; - buf.buf= &c; - buf.len= 0; - DWORD flags=0; - - if (WSARecv((SOCKET)fd, &buf, 1,NULL, &flags,overlapped, NULL) == 0) - return 0; - } - - if (GetLastError() == ERROR_IO_PENDING) - return 0; - - return 1; + auto c= (TP_connection_generic *) opt; + return (int) c->win_sock.begin_read(); } @@ -429,20 +408,33 @@ int io_poll_disassociate_fd(TP_file_handle pollfd, TP_file_handle fd) } -int io_poll_wait(TP_file_handle pollfd, native_event *events, int maxevents, int timeout_ms) +static void *native_event_get_userdata(native_event *event) { - ULONG n; - BOOL ok = GetQueuedCompletionStatusEx(pollfd, events, - maxevents, &n, timeout_ms, FALSE); - - return ok ? (int)n : -1; + return (void *) event->lpCompletionKey; } - -static void* native_event_get_userdata(native_event *event) +int io_poll_wait(TP_file_handle pollfd, native_event *events, int maxevents, + int timeout_ms) { - return (void *)event->lpCompletionKey; + ULONG n; + if (!GetQueuedCompletionStatusEx(pollfd, events, maxevents, &n, timeout_ms, FALSE)) + return -1; + + /* Update win_sock with number of bytes read.*/ + for (ULONG i= 0; i < n; i++) + { + auto ev= &events[i]; + auto c= (TP_connection_generic *) native_event_get_userdata(ev); + /* null userdata zero means shutdown (see PostQueuedCompletionStatus() usage*/ + if (c) + { + c->win_sock.end_read(ev->dwNumberOfBytesTransferred, 0); + } + } + + return (int) n; } + #endif @@ -1005,7 +997,7 @@ void thread_group_destroy(thread_group_t *thread_group) io_poll_close(thread_group->pollfd); thread_group->pollfd= INVALID_HANDLE_VALUE; } -#ifndef HAVE_IOCP +#ifndef _WIN32 for(int i=0; i < 2; i++) { if(thread_group->shutdown_pipe[i] != -1) @@ -1052,7 +1044,7 @@ static int wake_thread(thread_group_t *thread_group,bool due_to_stall) */ static int wake_listener(thread_group_t *thread_group) { -#ifndef HAVE_IOCP +#ifndef _WIN32 if (pipe(thread_group->shutdown_pipe)) { return -1; @@ -1335,7 +1327,10 @@ void TP_pool_generic::add(TP_connection *c) DBUG_VOID_RETURN; } - +void TP_pool_generic::resume(TP_connection* c) +{ + add(c); +} /** MySQL scheduler callback: wait begin @@ -1398,12 +1393,6 @@ TP_connection_generic::TP_connection_generic(CONNECT *c): bound_to_poll_descriptor(false), waiting(false), fix_group(false) -#ifdef HAVE_IOCP -, overlapped() -#endif -#ifdef _WIN32 -, vio_type(c->vio_type) -#endif { DBUG_ASSERT(c->vio_type != VIO_CLOSED); diff --git a/sql/threadpool_generic.h b/sql/threadpool_generic.h index acf5ec6978b..b7a35b7cbf0 100644 --- a/sql/threadpool_generic.h +++ b/sql/threadpool_generic.h @@ -23,6 +23,7 @@ #ifdef _WIN32 #include <windows.h> +#include "threadpool_winsockets.h" /* AIX may define this, too ?*/ #define HAVE_IOCP #endif @@ -75,11 +76,11 @@ struct TP_connection_generic :public TP_connection TP_connection_generic(CONNECT* c); ~TP_connection_generic(); - virtual int init() { return 0; }; - virtual void set_io_timeout(int sec); - virtual int start_io(); - virtual void wait_begin(int type); - virtual void wait_end(); + int init() override { return 0; } + void set_io_timeout(int sec) override; + int start_io() override; + void wait_begin(int type) override; + void wait_end() override; thread_group_t* thread_group; TP_connection_generic* next_in_queue; @@ -90,12 +91,12 @@ struct TP_connection_generic :public TP_connection bool bound_to_poll_descriptor; int waiting; bool fix_group; -#ifdef HAVE_IOCP - OVERLAPPED overlapped; -#endif #ifdef _WIN32 - enum_vio_type vio_type; + win_aiosocket win_sock{}; + void init_vio(st_vio *vio) override + { win_sock.init(vio);} #endif + }; diff --git a/sql/threadpool_win.cc b/sql/threadpool_win.cc index 6003b06bc7b..ed68e31c755 100644 --- a/sql/threadpool_win.cc +++ b/sql/threadpool_win.cc @@ -30,6 +30,9 @@ #include <debug_sync.h> #include <threadpool.h> #include <windows.h> +#include <set_var.h> + +#include "threadpool_winsockets.h" /* Log a warning */ static void tp_log_warning(const char *msg, const char *fct) @@ -43,8 +46,6 @@ static PTP_POOL pool; static TP_CALLBACK_ENVIRON callback_environ; static DWORD fls; -static bool skip_completion_port_on_success = false; - PTP_CALLBACK_ENVIRON get_threadpool_win_callback_environ() { return pool? &callback_environ: 0; @@ -83,22 +84,21 @@ struct TP_connection_win:public TP_connection public: TP_connection_win(CONNECT*); ~TP_connection_win(); - virtual int init(); - virtual int start_io(); - virtual void set_io_timeout(int sec); - virtual void wait_begin(int type); - virtual void wait_end(); - - ulonglong timeout; - enum_vio_type vio_type; - HANDLE handle; - OVERLAPPED overlapped; - PTP_CALLBACK_INSTANCE callback_instance; - PTP_IO io; - PTP_TIMER timer; - PTP_WORK work; - bool long_callback; - + int init() override; + void init_vio(st_vio *vio) override; + int start_io() override; + void set_io_timeout(int sec) override; + void wait_begin(int type) override; + void wait_end() override; + + ulonglong timeout=ULLONG_MAX; + OVERLAPPED overlapped{}; + PTP_CALLBACK_INSTANCE callback_instance{}; + PTP_IO io{}; + PTP_TIMER timer{}; + PTP_WORK work{}; + bool long_callback{}; + win_aiosocket sock; }; struct TP_connection *new_TP_connection(CONNECT *connect) @@ -125,120 +125,56 @@ void TP_pool_win::add(TP_connection *c) } } - -TP_connection_win::TP_connection_win(CONNECT *c) : - TP_connection(c), - timeout(ULONGLONG_MAX), - callback_instance(0), - io(0), - timer(0), - work(0) +void TP_pool_win::resume(TP_connection* c) { + DBUG_ASSERT(c->state == TP_STATE_RUNNING); + SubmitThreadpoolWork(((TP_connection_win*)c)->work); } -#define CHECK_ALLOC_ERROR(op) if (!(op)) {tp_log_warning("Allocation failed", #op); DBUG_ASSERT(0); return -1; } +#define CHECK_ALLOC_ERROR(op) \ + do \ + { \ + if (!(op)) \ + { \ + tp_log_warning("Allocation failed", #op); \ + } \ + } while (0) -int TP_connection_win::init() +TP_connection_win::TP_connection_win(CONNECT *c) : + TP_connection(c) { - - memset(&overlapped, 0, sizeof(OVERLAPPED)); - switch ((vio_type = connect->vio_type)) - { - case VIO_TYPE_SSL: - case VIO_TYPE_TCPIP: - handle= (HANDLE) mysql_socket_getfd(connect->sock); - break; - case VIO_TYPE_NAMEDPIPE: - handle= connect->pipe; - break; - default: - abort(); - } - - - /* Performance tweaks (s. MSDN documentation)*/ - UCHAR flags= FILE_SKIP_SET_EVENT_ON_HANDLE; - if (skip_completion_port_on_success) - { - flags |= FILE_SKIP_COMPLETION_PORT_ON_SUCCESS; - } - (void)SetFileCompletionNotificationModes(handle, flags); /* Assign io completion callback */ - CHECK_ALLOC_ERROR(io= CreateThreadpoolIo(handle, io_completion_callback, this, &callback_environ)); - CHECK_ALLOC_ERROR(timer= CreateThreadpoolTimer(timer_callback, this, &callback_environ)); + HANDLE h= c->vio_type == VIO_TYPE_NAMEDPIPE ? c->pipe + : (HANDLE)mysql_socket_getfd(c->sock); + + CHECK_ALLOC_ERROR(io=CreateThreadpoolIo(h, io_completion_callback, this, &callback_environ)); + CHECK_ALLOC_ERROR(timer= CreateThreadpoolTimer(timer_callback, this, &callback_environ)); CHECK_ALLOC_ERROR(work= CreateThreadpoolWork(work_callback, this, &callback_environ)); - return 0; } +int TP_connection_win::init() +{ + return !io || !timer || !work ; +} + +void TP_connection_win::init_vio(st_vio* vio) +{ + sock.init(vio); +} /* Start asynchronous read */ int TP_connection_win::start_io() { - DWORD num_bytes = 0; - static char c; - WSABUF buf; - buf.buf= &c; - buf.len= 0; - DWORD flags=0; - DWORD last_error= 0; - - int retval; StartThreadpoolIo(io); - - if (vio_type == VIO_TYPE_TCPIP || vio_type == VIO_TYPE_SSL) - { - /* Start async io (sockets). */ - if (WSARecv((SOCKET)handle , &buf, 1, &num_bytes, &flags, - &overlapped, NULL) == 0) - { - retval= last_error= 0; - } - else - { - retval= -1; - last_error= WSAGetLastError(); - } - } - else - { - /* Start async io (named pipe) */ - if (ReadFile(handle, &c, 0, &num_bytes,&overlapped)) - { - retval= last_error= 0; - } - else - { - retval= -1; - last_error= GetLastError(); - } - } - - if (retval == 0 || last_error == ERROR_MORE_DATA) + if (sock.begin_read()) { - /* - IO successfully finished (synchronously). - If skip_completion_port_on_success is set, we need to handle it right - here, because completion callback would not be executed by the pool. - */ - if (skip_completion_port_on_success) - { - CancelThreadpoolIo(io); - io_completion_callback(callback_instance, this, &overlapped, last_error, - num_bytes, io); - } - return 0; - } - - if (last_error == ERROR_IO_PENDING) - { - return 0; + /* Some error occurred */ + CancelThreadpoolIo(io); + return -1; } - - /* Some error occurred */ - CancelThreadpoolIo(io); - return -1; + return 0; } /* @@ -305,7 +241,7 @@ void tp_win_callback_prolog() { /* Running in new worker thread*/ FlsSetValue(fls, (void *)1); - statistic_increment(thread_created, &LOCK_status); + thread_created++; tp_stats.num_worker_threads++; my_thread_init(); } @@ -350,6 +286,10 @@ static VOID CALLBACK io_completion_callback(PTP_CALLBACK_INSTANCE instance, PVOID context, PVOID overlapped, ULONG io_result, ULONG_PTR nbytes, PTP_IO io) { TP_connection_win *c= (TP_connection_win *)context; + + /* How many bytes were preread into read buffer */ + c->sock.end_read((ULONG)nbytes, io_result); + /* Execute high priority connections immediately. 'Yield' in case of low priority connections, i.e SubmitThreadpoolWork (with the same callback) @@ -412,12 +352,24 @@ int TP_pool_win::init() InitializeThreadpoolEnvironment(&callback_environ); SetThreadpoolCallbackPool(&callback_environ, pool); - if (threadpool_max_threads) + if (IS_SYSVAR_AUTOSIZE(&threadpool_max_threads)) + { + /* + Nr 500 comes from Microsoft documentation, + there is no API for GetThreadpoolThreadMaxThreads() + */ + SYSVAR_AUTOSIZE(threadpool_max_threads,500); + } + else { SetThreadpoolThreadMaximum(pool, threadpool_max_threads); } - if (threadpool_min_threads) + if (IS_SYSVAR_AUTOSIZE(&threadpool_min_threads)) + { + SYSVAR_AUTOSIZE(threadpool_min_threads,1); + } + else { if (!SetThreadpoolThreadMinimum(pool, threadpool_min_threads)) { @@ -426,6 +378,18 @@ int TP_pool_win::init() } } + + if (IS_SYSVAR_AUTOSIZE(&global_system_variables.threadpool_priority)) + { + /* + There is a notable overhead for "auto" priority implementation, + use "high" which handles socket IO callbacks as they come + without rescheduling to work queue. + */ + SYSVAR_AUTOSIZE(global_system_variables.threadpool_priority, + TP_PRIORITY_HIGH); + } + TP_POOL_STACK_INFORMATION stackinfo; stackinfo.StackCommit = 0; stackinfo.StackReserve = (SIZE_T)my_thread_stack_size; @@ -480,3 +444,4 @@ TP_connection *TP_pool_win::new_connection(CONNECT *connect) } return c; } + diff --git a/sql/threadpool_winsockets.cc b/sql/threadpool_winsockets.cc new file mode 100644 index 00000000000..6b4758a451f --- /dev/null +++ b/sql/threadpool_winsockets.cc @@ -0,0 +1,259 @@ +/* Copyright (C) 2012 Monty Program Ab + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA + */ + +#include <winsock2.h> +#include <my_global.h> +#include <violite.h> +#include "threadpool_winsockets.h" +#include <algorithm> +#include <vector> +#include <mutex> + +/* + A cache for IO buffers for asynchronous socket(or named pipe) reads. + + Considerations on Windows : since Windows locks the AIO buffers in physical memory, + it is important that these buffers are compactly allocated. + We try to to prevent any kinds of memory fragmentation + + A relatively small region (at most 1MB) is allocated, for equally sized smallish(256 bytes) + This allow buffers. The region is pagesize-aligned (via VirtualAlloc allocation) + + We use smallish IO buffers, 256 bytes is probably large enough for most of + the queries. Larger buffers could have funny effects(thread hogginng) + on threadpool scheduling in case client is using protocol pipelining. + + Also note, that even in an unlikely situation where cache runs out of buffers, + this does not lead to errors, zero szed reads will be used in WSARecv then. +*/ + +constexpr size_t READ_BUFSIZ= 256; +class AIO_buffer_cache +{ + const size_t ITEM_SIZE= READ_BUFSIZ; + + /** Limit the whole cache to 1MB*/ + const size_t MAX_SIZE= 1048576; + + /* Allocation base */ + char *m_base= 0; + + /* "Free list" with LIFO policy */ + std::vector<char *> m_cache; + std::mutex m_mtx; + size_t m_elements=0; + +public: + void set_size(size_t n_items); + char *acquire_buffer(); + void release_buffer(char *v); + void clear(); + ~AIO_buffer_cache(); +}; + + +void AIO_buffer_cache::set_size(size_t n_items) +{ + DBUG_ASSERT(!m_base); + m_elements= std::min(n_items, MAX_SIZE / ITEM_SIZE); + auto sz= m_elements * ITEM_SIZE; + + m_base= + (char *) VirtualAlloc(0, sz, MEM_COMMIT | MEM_RESERVE, PAGE_READWRITE); + if (!m_base) + { + m_elements= 0; + return; + } + + /* Try to help memory manager here, by prelocking region in memory*/ + (void) VirtualLock(m_base, sz); + + m_cache.reserve(m_elements); + for (ssize_t i= m_elements - 1; i >= 0 ; i--) + m_cache.push_back(m_base + i * ITEM_SIZE); +} + +/* + Returns a buffer, or NULL if no free buffers. + + LIFO policy is implemented, so we do not touch too many + pages (no std::stack though) +*/ +char *AIO_buffer_cache::acquire_buffer() +{ + std::unique_lock<std::mutex> lk(m_mtx); + if (m_cache.empty()) + return nullptr; + auto p= m_cache.back(); + m_cache.pop_back(); + return p; +} + +void AIO_buffer_cache::release_buffer(char *v) +{ + std::unique_lock<std::mutex> lk(m_mtx); + m_cache.push_back(v); +} + +void AIO_buffer_cache::clear() +{ + if (!m_base) + return; + + /* Check that all items are returned to the cache. */ + DBUG_ASSERT(m_cache.size() == m_elements); + VirtualFree(m_base, 0, MEM_RELEASE); + m_cache.clear(); + m_base= 0; + m_elements= 0; +} + +AIO_buffer_cache::~AIO_buffer_cache() { clear(); } + +/* Global variable for the cache buffers.*/ +AIO_buffer_cache read_buffers; + +win_aiosocket::~win_aiosocket() +{ + if (m_buf_ptr) + read_buffers.release_buffer(m_buf_ptr); +} + + +/** Return number of unread bytes.*/ +size_t win_aiosocket::buffer_remaining() +{ + return m_buf_datalen - m_buf_off; +} + +static my_bool my_vio_has_data(st_vio *vio) +{ + auto sock= (win_aiosocket *) vio->tp_ctx; + return sock->buffer_remaining() || sock->m_orig_vio_has_data(vio); +} + +/* + (Half-)buffered read. + + The buffer is filled once, by completion of the async IO. + + We do not refill the buffer once it is read off, + does not make sense. +*/ +static size_t my_vio_read(st_vio *vio, uchar *dest, size_t sz) +{ + auto sock= (win_aiosocket *) vio->tp_ctx; + DBUG_ASSERT(sock); + + auto nbytes= std::min(sock->buffer_remaining(), sz); + + if (nbytes > 0) + { + /* Copy to output, adjust the offset.*/ + memcpy(dest, sock->m_buf_ptr + sock->m_buf_off, nbytes); + sock->m_buf_off += nbytes; + return nbytes; + } + + return sock->m_orig_vio_read(vio, dest, sz); +} + +DWORD win_aiosocket::begin_read() +{ + DWORD err = ERROR_SUCCESS; + static char c; + WSABUF buf; + + DBUG_ASSERT(!buffer_remaining()); + + /* + If there is no internal buffer to store data, + we do zero size read, but still need a valid + pointer for the buffer parameter. + */ + if (m_buf_ptr) + buf= {(ULONG)READ_BUFSIZ, m_buf_ptr}; + else + buf= {0, &c}; + + + if (!m_is_pipe) + { + /* Do async io (sockets). */ + DWORD flags= 0; + if (WSARecv((SOCKET) m_handle, &buf, 1, 0, &flags, &m_overlapped, NULL)) + err= WSAGetLastError(); + } + else + { + /* Do async read (named pipe) */ + if (ReadFile(m_handle, buf.buf, buf.len, 0, &m_overlapped)) + err= GetLastError(); + } + + if (!err || err == ERROR_IO_PENDING) + return 0; + return err; +} + +void win_aiosocket::end_read(ULONG nbytes, DWORD err) +{ + DBUG_ASSERT(!buffer_remaining()); + DBUG_ASSERT(!nbytes || m_buf_ptr); + m_buf_off= 0; + m_buf_datalen= nbytes; +} + +void win_aiosocket::init(Vio *vio) +{ + m_is_pipe= vio->type == VIO_TYPE_NAMEDPIPE; + m_handle= + m_is_pipe ? vio->hPipe : (HANDLE) mysql_socket_getfd(vio->mysql_socket); + + SetFileCompletionNotificationModes(m_handle, FILE_SKIP_SET_EVENT_ON_HANDLE); + if (vio->type == VIO_TYPE_SSL) + { + /* + TODO : This requires fixing viossl to call our manipulated VIO + */ + return; + } + + if (!(m_buf_ptr = read_buffers.acquire_buffer())) + { + /* Ran out of buffers, that's fine.*/ + return; + } + + vio->tp_ctx= this; + + m_orig_vio_has_data= vio->has_data; + vio->has_data= my_vio_has_data; + + m_orig_vio_read= vio->read; + vio->read= my_vio_read; +} + +void init_win_aio_buffers(unsigned int n_buffers) +{ + read_buffers.set_size(n_buffers); +} + +extern void destroy_win_aio_buffers() +{ + read_buffers.clear(); +} diff --git a/sql/threadpool_winsockets.h b/sql/threadpool_winsockets.h new file mode 100644 index 00000000000..ca2068b759d --- /dev/null +++ b/sql/threadpool_winsockets.h @@ -0,0 +1,80 @@ +/* Copyright (C) 2020 Monty Program Ab + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA + */ +#pragma once + +#include <WinSock2.h> +#include <windows.h> + +struct st_vio; + +struct win_aiosocket +{ + /** OVERLAPPED is needed by all Windows AIO*/ + OVERLAPPED m_overlapped{}; + /** Handle to pipe, or socket */ + HANDLE m_handle{}; + /** Whether the m_handle refers to pipe*/ + bool m_is_pipe{}; + + /* Read buffer handling */ + + /** Pointer to buffer of size READ_BUFSIZ. Can be NULL.*/ + char *m_buf_ptr{}; + /** Offset to current buffer position*/ + size_t m_buf_off{}; + /** Size of valid data in the buffer*/ + size_t m_buf_datalen{}; + + /* Vio handling */ + /** Pointer to original vio->vio_read/vio->has_data function */ + size_t (*m_orig_vio_read)(st_vio *, unsigned char *, size_t){}; + char (*m_orig_vio_has_data)(st_vio *){}; + + + + /** + Begins asynchronnous reading from socket/pipe. + On IO completion, pre-read some bytes into internal buffer + */ + DWORD begin_read(); + + /** + Update number of bytes returned, and IO error status + + Should be called right after IO is completed + GetQueuedCompletionStatus() , or threadpool IO completion + callback would return nbytes and the error. + + Sets the valid data length in the read buffer. + */ + void end_read(ULONG nbytes, DWORD err); + + /** + Override VIO routines with ours, accounting for + one-shot buffering. + */ + void init(st_vio *vio); + + /** Return number of unread bytes.*/ + size_t buffer_remaining(); + + /* Frees the read buffer.*/ + ~win_aiosocket(); +}; + +/* Functions related to IO buffers caches.*/ +extern void init_win_aio_buffers(unsigned int n_buffers); +extern void destroy_win_aio_buffers(); diff --git a/sql/upgrade_conf_file.cc b/sql/upgrade_conf_file.cc index e41e4dfd857..a30502d0dea 100644 --- a/sql/upgrade_conf_file.cc +++ b/sql/upgrade_conf_file.cc @@ -42,13 +42,19 @@ static const char *removed_variables[] = "have_partitioning", "innodb_adaptive_flushing_method", "innodb_adaptive_hash_index_partitions", +"innodb_adaptive_max_sleep_delay", "innodb_additional_mem_pool_size", "innodb_api_bk_commit_interval", "innodb_api_disable_rowlock", "innodb_api_enable_binlog", "innodb_api_enable_mdl", "innodb_api_trx_level", +"innodb_background_scrub_data_check_interval", +"innodb_background_scrub_data_compressed", +"innodb_background_scrub_data_interval", +"innodb_background_scrub_data_uncompressed", "innodb_blocking_buffer_pool_restore", +"innodb_buffer_pool_instances", "innodb_buffer_pool_populate", "innodb_buffer_pool_restore_at_startup", "innodb_buffer_pool_shm_checksum", @@ -62,6 +68,8 @@ static const char *removed_variables[] = "innodb_cleaner_lsn_age_factor", "innodb_cleaner_max_flush_time", "innodb_cleaner_max_lru_time", +"innodb_commit_concurrency", +"innodb_concurrency_tickets", "innodb_corrupt_table_action", "innodb_dict_size_limit", "innodb_doublewrite_file", @@ -89,12 +97,16 @@ static const char *removed_variables[] = "innodb_log_archive", "innodb_log_block_size", "innodb_log_checksum_algorithm", -"innodb_rollback_segments", +"innodb_log_checksums", +"innodb_log_compressed_pages", +"innodb_log_files_in_group", +"innodb_log_optimize_ddl", "innodb_max_bitmap_file_size", "innodb_max_changed_pages", "innodb_merge_sort_block_size", "innodb_mirrored_log_groups", "innodb_mtflush_threads", +"innodb_page_cleaners", "innodb_persistent_stats_root_page", "innodb_print_lock_wait_timeout_info", "innodb_purge_run_now", @@ -102,15 +114,23 @@ static const char *removed_variables[] = "innodb_read_ahead", "innodb_recovery_stats", "innodb_recovery_update_relay_log", +"innodb_replication_delay", +"innodb_rollback_segments", +"innodb_scrub_log", +"innodb_scrub_log_speed", "innodb_show_locks_held", "innodb_show_verbose_locks", "innodb_stats_auto_update", "innodb_stats_sample_pages", "innodb_stats_update_need_lock", "innodb_support_xa", +"innodb_sync_array_size", +"innodb_thread_concurrency", "innodb_thread_concurrency_timer_based", +"innodb_thread_sleep_delay", "innodb_track_changed_pages", "innodb_track_redo_log_now", +"innodb_undo_logs", "innodb_use_fallocate", "innodb_use_global_flush_log_at_trx_commit", "innodb_use_mtflush", diff --git a/sql/winmain.cc b/sql/winmain.cc new file mode 100644 index 00000000000..fb5da40cf2f --- /dev/null +++ b/sql/winmain.cc @@ -0,0 +1,371 @@ +/* Copyright (C) 2020 MariaDB Corporation + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA +*/ + +/* + main() function for the server on Windows is implemented here. + The core functionality is implemented elsewhere, in mysqld_main(), and running as + service is done here. + + Main tasks of the service are + + 1. Report current status back to service control manager. Here we're + providing callbacks so code outside of winmain.cc can call it + (via mysqld_set_service_status_callback()) + + 2. React to notification, the only one we care about is the "stop" + notification. we initiate shutdown, when instructed. + + Note that our service might not be too Windows-friendly, as it might take + a while to startup (recovery), and a while to shut down(innodb cleanups). + + Most of the code more of less standard service stuff, taken from Microsoft + docs examples. + + Notable oddity in running services, is that we do not know for sure, + whether we should run as a service or not (there is no --service parameter that + would tell).Heuristics are used, and if the last command line argument is + valid service name, we try to run as service, but fallback to usual process + if this fails. + + As an example, even if mysqld.exe is started with command line like "mysqld.exe --help", + it is entirely possible that mysqld.exe run as service "--help". + + Apart from that, now deprecated and obsolete service registration/removal functionality is + still provided (mysqld.exe --install/--remove) +*/ + +#include <my_global.h> +#include <mysqld.h> +#include <log.h> + +#include <stdio.h> +#include <windows.h> +#include <string> +#include <cassert> + +static SERVICE_STATUS svc_status{SERVICE_WIN32_OWN_PROCESS}; +static SERVICE_STATUS_HANDLE svc_status_handle; +static char *svc_name; + +static char **orig_argv; +static int orig_argc; + +static int install_service(int argc, char **argv, const char *name); +static int remove_service(const char *name); + +/* + Report service status to SCM. This function is indirectly invoked + by the server to report state transitions. + + 1. from START_PENDING to SERVICE_RUNNING, when we start accepting user connections + 2. from SERVICE_RUNNING to STOP_PENDING, when we start shutdown + 3. from STOP_PENDING to SERVICE_STOPPED, in mysqld_exit() + sometimes also START_PENDING to SERVICE_STOPPED, on startup errors +*/ +static void report_svc_status(DWORD current_state, DWORD exit_code, DWORD wait_hint) +{ + if (!svc_status_handle) + return; + + static DWORD check_point= 1; + svc_status.dwCurrentState= current_state; + svc_status.dwWaitHint= wait_hint; + + if (exit_code) + { + svc_status.dwWin32ExitCode= ERROR_SERVICE_SPECIFIC_ERROR; + svc_status.dwServiceSpecificExitCode= exit_code; + } + else + { + svc_status.dwWin32ExitCode= 0; + } + + if (current_state == SERVICE_START_PENDING) + svc_status.dwControlsAccepted= 0; + else + svc_status.dwControlsAccepted= SERVICE_ACCEPT_STOP|SERVICE_ACCEPT_SHUTDOWN; + + if ((current_state == SERVICE_RUNNING) || (current_state == SERVICE_STOPPED)) + svc_status.dwCheckPoint= 0; + else + svc_status.dwCheckPoint= check_point++; + + SetServiceStatus(svc_status_handle, &svc_status); +} + +/* Report unexpected errors. */ +static void svc_report_event(const char *svc_name, const char *command) +{ + char buffer[80]; + sprintf_s(buffer, "mariadb service %s, %s failed with %d", + svc_name, command, GetLastError()); + OutputDebugString(buffer); +} + +/* + Service control function. + Reacts to service stop, initiates shutdown. +*/ +static void WINAPI svc_ctrl_handle(DWORD cntrl) +{ + switch (cntrl) + { + case SERVICE_CONTROL_SHUTDOWN: + case SERVICE_CONTROL_STOP: + sql_print_information( + "Windows service \"%s\": received %s", + svc_name, + cntrl == SERVICE_CONTROL_STOP? "SERVICE_CONTROL_STOP": "SERVICE_CONTROL_SHUTDOWN"); + + /* The below will also set the status to STOP_PENDING. */ + mysqld_win_initiate_shutdown(); + break; + + case SERVICE_CONTROL_INTERROGATE: + default: + break; + } +} + +/* Service main routine, mainly runs mysqld_main() */ +static void WINAPI svc_main(DWORD svc_argc, char **svc_argv) +{ + /* Register the handler function for the service */ + char *name= svc_argv[0]; + + svc_status_handle= RegisterServiceCtrlHandler(name, svc_ctrl_handle); + if (!svc_status_handle) + { + svc_report_event(name, "RegisterServiceCtrlHandler"); + return; + } + report_svc_status(SERVICE_START_PENDING, NO_ERROR, 0); + + /* Make server report service status via our callback.*/ + mysqld_set_service_status_callback(report_svc_status); + + /* This would add service name entry to load_defaults.*/ + mysqld_win_set_service_name(name); + + /* + Do not pass the service name parameter (last on the command line) + to mysqld_main(), it is unaware of it. + */ + orig_argv[orig_argc - 1]= 0; + mysqld_main(orig_argc - 1, orig_argv); +} + +/* + This start the service. Sometimes it will fail, because + currently we do not know for sure whether we run as service or not. + If this fails, the fallback is to run as normal process. +*/ +static int run_as_service(char *name) +{ + SERVICE_TABLE_ENTRY stb[]= {{name, svc_main}, {0, 0}}; + if (!StartServiceCtrlDispatcher(stb)) + { + assert(GetLastError() == ERROR_FAILED_SERVICE_CONTROLLER_CONNECT); + return -1; + } + return 0; +} + +/* + Check for valid existing service name. + Part of our guesswork, whether we run as service or not. +*/ +static bool is_existing_service(const char *name) +{ + if (strchr(name, '\\') || strchr(name, '/')) + { + /* Invalid characters in service name */ + return false; + } + + SC_HANDLE sc_service= 0, scm= 0; + bool ret= ((scm= OpenSCManager(0, 0, SC_MANAGER_ENUMERATE_SERVICE)) != 0) && + ((sc_service= OpenService(scm, name, SERVICE_QUERY_STATUS)) != 0); + + if (sc_service) + CloseServiceHandle(sc_service); + if (scm) + CloseServiceHandle(scm); + + return ret; +} + +/* + If service name is not given to --install/--remove + it is assumed to be "MySQL" (traditional handling) +*/ +static const char *get_svc_name(const char *arg) +{ + return arg ? arg : "MySQL"; +} + +/* + Main function on Windows. + Runs mysqld as normal process, or as a service. + + Plus, the obsolete functionality to register/remove services. +*/ +int main(int argc, char **argv) +{ + orig_argv= argv; + orig_argc= argc; + + /* + If no special arguments are given, service name is nor present + run as normal program. + */ + if (argc == 1) + return mysqld_main(argc, argv); + + auto cmd= argv[1]; + + /* Handle install/remove */ + if (!strcmp(cmd, "--install") || !strcmp(cmd, "--install-manual")) + return install_service(argc, argv, get_svc_name(argv[2])); + + if (!strcmp(cmd, "--remove")) + return remove_service(get_svc_name(argv[2])); + + /* Try to run as service, and fallback to mysqld_main(), if this fails */ + svc_name= argv[argc - 1]; + if (is_existing_service(svc_name) && !run_as_service(svc_name)) + return 0; + svc_name= 0; + + /* Run as normal program.*/ + return mysqld_main(argc, argv); +} + + +/* + Register/remove services functionality. + This is kept for backward compatibility only, and is + superseeded by much more versatile mysql_install_db.exe + + "mysqld --remove=svc" has no advantage over + OS own "sc delete svc" +*/ +static void ATTRIBUTE_NORETURN die(const char *func, const char *name) +{ + DWORD err= GetLastError(); + fprintf(stderr, "FATAL ERROR : %s failed (%lu)\n", func, err); + switch (err) + { + case ERROR_SERVICE_EXISTS: + fprintf(stderr, "Service %s already exists.\n", name); + break; + case ERROR_SERVICE_DOES_NOT_EXIST: + fprintf(stderr, "Service %s does not exist.\n", name); + break; + case ERROR_ACCESS_DENIED: + fprintf(stderr, "Access is denied. " + "Make sure to run as elevated admin user.\n"); + break; + case ERROR_INVALID_NAME: + fprintf(stderr, "Invalid service name '%s'\n", name); + default: + break; + } + exit(1); +} + +static inline std::string quoted(const char *src) +{ + std::string s; + s.append("\"").append(src).append("\""); + return s; +} + +static int install_service(int argc, char **argv, const char *name) +{ + std::string cmdline; + + char path[MAX_PATH]; + auto nSize = GetModuleFileName(0, path, sizeof(path)); + + if (nSize == (DWORD) sizeof(path) && GetLastError() == ERROR_INSUFFICIENT_BUFFER) + die("GetModuleName", name); + + cmdline.append(quoted(path)); + + const char *user= 0; + // mysqld --install[-manual] name ...[--local-service] + if (argc > 2) + { + for (int i= 3; argv[i]; i++) + { + if (!strcmp(argv[i], "--local-service")) + user= "NT AUTHORITY\\LocalService"; + else + { + cmdline.append(" ").append(quoted(argv[i])); + } + } + } + cmdline.append(" ").append(quoted(name)); + + DWORD start_type; + if (!strcmp(argv[1], "--install-manual")) + start_type= SERVICE_DEMAND_START; + else + start_type= SERVICE_AUTO_START; + + SC_HANDLE scm, sc_service; + if (!(scm= OpenSCManager(0, 0, SC_MANAGER_CREATE_SERVICE))) + die("OpenSCManager", name); + + if (!(sc_service= CreateService( + scm, name, name, SERVICE_ALL_ACCESS, + SERVICE_WIN32_OWN_PROCESS, start_type, SERVICE_ERROR_NORMAL, + cmdline.c_str(), 0, 0, 0, user, 0))) + die("CreateService", name); + + char description[]= "MariaDB database server"; + SERVICE_DESCRIPTION sd= {description}; + ChangeServiceConfig2(sc_service, SERVICE_CONFIG_DESCRIPTION, &sd); + + CloseServiceHandle(sc_service); + CloseServiceHandle(scm); + + printf("Service '%s' successfully installed.\n", name); + return 0; +} + +static int remove_service(const char *name) +{ + SC_HANDLE scm, sc_service; + + if (!(scm= OpenSCManager(0, 0, SC_MANAGER_CREATE_SERVICE))) + die("OpenSCManager", name); + + if (!(sc_service= OpenService(scm, name, DELETE))) + die("OpenService", name); + + if (!DeleteService(sc_service)) + die("DeleteService", name); + + CloseServiceHandle(sc_service); + CloseServiceHandle(scm); + + printf("Service '%s' successfully deleted.\n", name); + return 0; +} diff --git a/sql/winservice.c b/sql/winservice.c index d7cfd2f7584..a11087e5cd5 100644 --- a/sql/winservice.c +++ b/sql/winservice.c @@ -40,7 +40,7 @@ void get_file_version(const char *path, int *major, int *minor, int *patch) *major= *minor= *patch= 0; size= GetFileVersionInfoSize(path, &version_handle); - if (size == 0) + if (size == 0) return; ver= (char *)malloc(size); if(!GetFileVersionInfo(path, version_handle, size, ver)) @@ -65,7 +65,7 @@ void normalize_path(char *path, size_t size) char *p; strcpy_s(buf, MAX_PATH, path+1); p= strchr(buf, '"'); - if (p) + if (p) *p=0; } else @@ -136,15 +136,15 @@ static void get_datadir_from_ini(const char *ini, char *service_name, char *data /* Retrieve some properties from windows mysqld service binary path. - We're interested in ini file location and datadir, and also in version of + We're interested in ini file location and datadir, and also in version of the data. We tolerate missing mysqld.exe. - Note that this function carefully avoids using mysql libraries (e.g dbug), + Note that this function carefully avoids using mysql libraries (e.g dbug), since it is used in unusual environments (windows installer, MFC), where we - do not have much control over how threads are created and destroyed, so we + do not have much control over how threads are created and destroyed, so we cannot assume MySQL thread initilization here. */ -int get_mysql_service_properties(const wchar_t *bin_path, +int get_mysql_service_properties(const wchar_t *bin_path, mysqld_service_properties *props) { int numargs; @@ -193,9 +193,10 @@ int get_mysql_service_properties(const wchar_t *bin_path, if(wcsstr(mysqld_path, L".exe") == NULL) wcscat(mysqld_path, L".exe"); - if(wcsicmp(file_part, L"mysqld.exe") != 0 && + if(wcsicmp(file_part, L"mysqld.exe") != 0 && wcsicmp(file_part, L"mysqld-debug.exe") != 0 && - wcsicmp(file_part, L"mysqld-nt.exe") != 0) + wcsicmp(file_part, L"mysqld-nt.exe") != 0 && + wcsicmp(file_part, L"mariadbd.exe") != 0) { /* The service executable is not mysqld. */ goto end; @@ -205,7 +206,7 @@ int get_mysql_service_properties(const wchar_t *bin_path, /* If mysqld.exe exists, try to get its version from executable */ if (GetFileAttributes(props->mysqld_exe) != INVALID_FILE_ATTRIBUTES) { - get_file_version(props->mysqld_exe, &props->version_major, + get_file_version(props->mysqld_exe, &props->version_major, &props->version_minor, &props->version_patch); } @@ -235,7 +236,7 @@ int get_mysql_service_properties(const wchar_t *bin_path, { /* Hard, although a rare case, we're guessing datadir and defaults-file. - On Windows, defaults-file is traditionally install-root\my.ini + On Windows, defaults-file is traditionally install-root\my.ini and datadir is install-root\data */ char install_root[MAX_PATH]; @@ -297,7 +298,7 @@ int get_mysql_service_properties(const wchar_t *bin_path, } /* - If version could not be determined so far, try mysql_upgrade_info in + If version could not be determined so far, try mysql_upgrade_info in database directory. */ if(props->version_major == 0) diff --git a/sql/wsrep_mysqld.cc b/sql/wsrep_mysqld.cc index 4940b0839b9..35d0547d0ab 100644 --- a/sql/wsrep_mysqld.cc +++ b/sql/wsrep_mysqld.cc @@ -97,7 +97,8 @@ my_bool wsrep_restart_slave; // Should mysql slave thread be // restarted, when node joins back? my_bool wsrep_desync; // De(re)synchronize the node from the // cluster -my_bool wsrep_strict_ddl; // Reject DDL to +ulonglong wsrep_mode; +my_bool wsrep_strict_ddl; // Deprecated: Reject DDL to // effected tables not // supporting Galera replication bool wsrep_service_started; // If Galera was initialized @@ -1162,6 +1163,54 @@ bool wsrep_start_replication(const char *wsrep_cluster_address) return true; } +bool wsrep_check_mode (enum_wsrep_mode mask) +{ + return wsrep_mode & mask; +} + +bool wsrep_check_mode_after_open_table (THD *thd, legacy_db_type db_type) +{ + return true; +} + +bool wsrep_check_mode_before_cmd_execute (THD *thd) +{ + bool ret= true; + if (wsrep_check_mode(WSREP_MODE_BINLOG_ROW_FORMAT_ONLY) && + !thd->is_current_stmt_binlog_format_row() && is_update_query(thd->lex->sql_command)) + { + my_error(ER_GALERA_REPLICATION_NOT_SUPPORTED, MYF(0)); + push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN, + ER_OPTION_PREVENTS_STATEMENT, + "WSREP: wsrep_mode = BINLOG_ROW_FORMAT_ONLY enabled. Only ROW binlog format is supported."); + ret= false; + } + if (wsrep_check_mode(WSREP_MODE_REQURIED_PRIMARY_KEY) && + thd->lex->sql_command == SQLCOM_CREATE_TABLE) + { + Key *key; + List_iterator<Key> key_iterator(thd->lex->alter_info.key_list); + bool primary_key_found= false; + while ((key= key_iterator++)) + { + if (key->type == Key::PRIMARY) + { + primary_key_found= true; + break; + } + } + if (!primary_key_found) + { + my_error(ER_GALERA_REPLICATION_NOT_SUPPORTED, MYF(0)); + push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN, + ER_OPTION_PREVENTS_STATEMENT, + "WSREP: wsrep_mode = REQUIRED_PRIMARY_KEY enabled. Table should have PRIMARY KEY defined."); + ret= false; + } + } + return ret; +} + bool wsrep_must_sync_wait (THD* thd, uint mask) { bool ret= 0; @@ -1843,20 +1892,19 @@ bool wsrep_should_replicate_ddl_iterate(THD* thd, const TABLE_LIST* table_list) for (const TABLE_LIST* it= table_list; it; it= it->next_global) { if (it->table && - !wsrep_should_replicate_ddl(thd, it->table->s->db_type()->db_type)) + !wsrep_should_replicate_ddl(thd, it->table->s->db_type())) return false; } } return true; } -bool wsrep_should_replicate_ddl(THD* thd, - const enum legacy_db_type db_type) +bool wsrep_should_replicate_ddl(THD* thd, const handlerton *hton) { - if (!wsrep_strict_ddl) + if (!wsrep_check_mode(WSREP_MODE_STRICT_REPLICATION)) return true; - switch (db_type) + switch (hton->db_type) { case DB_TYPE_INNODB: return true; @@ -1875,11 +1923,13 @@ bool wsrep_should_replicate_ddl(THD* thd, break; } - /* STRICT, treat as error */ + /* wsrep_mode = STRICT_REPLICATION, treat as error */ my_error(ER_GALERA_REPLICATION_NOT_SUPPORTED, MYF(0)); push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN, - ER_ILLEGAL_HA, - "WSREP: wsrep_strict_ddl=true and storage engine does not support Galera replication."); + ER_ILLEGAL_HA, + "WSREP: wsrep_mode = STRICT_REPLICATION enabled. " + "Storage engine %s not supported.", + ha_resolve_storage_engine_name(hton)); return false; } /* @@ -1910,7 +1960,7 @@ bool wsrep_can_run_in_toi(THD *thd, const char *db, const char *table, { return false; } - if (!wsrep_should_replicate_ddl(thd, create_info->db_type->db_type)) + if (!wsrep_should_replicate_ddl(thd, create_info->db_type)) { return false; } @@ -1987,7 +2037,7 @@ bool wsrep_can_run_in_toi(THD *thd, const char *db, const char *table, break; case SQLCOM_ALTER_TABLE: if (create_info && - !wsrep_should_replicate_ddl(thd, create_info->db_type->db_type)) + !wsrep_should_replicate_ddl(thd, create_info->db_type)) return false; /* fallthrough */ default: diff --git a/sql/wsrep_mysqld.h b/sql/wsrep_mysqld.h index 73ee5d7b4c1..87991ce33f5 100644 --- a/sql/wsrep_mysqld.h +++ b/sql/wsrep_mysqld.h @@ -100,8 +100,9 @@ extern ulong wsrep_running_applier_threads; extern ulong wsrep_running_rollbacker_threads; extern bool wsrep_new_cluster; extern bool wsrep_gtid_mode; -extern my_bool wsrep_strict_ddl; extern uint wsrep_gtid_domain_id; +extern ulonglong wsrep_mode; +extern my_bool wsrep_strict_ddl; enum enum_wsrep_reject_types { WSREP_REJECT_NONE, /* nothing rejected */ @@ -133,6 +134,12 @@ enum enum_wsrep_ignore_apply_error { WSREP_IGNORE_ERRORS_MAX= 0x7 }; +enum enum_wsrep_mode { + WSREP_MODE_STRICT_REPLICATION= (1ULL << 0), + WSREP_MODE_BINLOG_ROW_FORMAT_ONLY= (1ULL << 1), + WSREP_MODE_REQURIED_PRIMARY_KEY= (1ULL << 2) +}; + // Streaming Replication #define WSREP_FRAG_BYTES 0 #define WSREP_FRAG_ROWS 1 @@ -209,6 +216,9 @@ extern void wsrep_close_applier_threads(int count); extern void wsrep_stop_replication(THD *thd); extern bool wsrep_start_replication(const char *wsrep_cluster_address); extern void wsrep_shutdown_replication(); +extern bool wsrep_check_mode (enum_wsrep_mode mask); +extern bool wsrep_check_mode_after_open_table (THD *thd, legacy_db_type db_type); +extern bool wsrep_check_mode_before_cmd_execute (THD *thd); extern bool wsrep_must_sync_wait (THD* thd, uint mask= WSREP_SYNC_WAIT_BEFORE_READ); extern bool wsrep_sync_wait (THD* thd, uint mask= WSREP_SYNC_WAIT_BEFORE_READ); extern enum wsrep::provider::status @@ -376,7 +386,7 @@ int wsrep_to_isolation_begin(THD *thd, const char *db_, const char *table_, const wsrep::key_array *fk_tables= nullptr, const HA_CREATE_INFO* create_info= nullptr); -bool wsrep_should_replicate_ddl(THD* thd, const enum legacy_db_type db_type); +bool wsrep_should_replicate_ddl(THD* thd, const handlerton *db_type); bool wsrep_should_replicate_ddl_iterate(THD* thd, const TABLE_LIST* table_list); void wsrep_to_isolation_end(THD *thd); diff --git a/sql/wsrep_schema.cc b/sql/wsrep_schema.cc index 1054c9e11bc..f92700629bb 100644 --- a/sql/wsrep_schema.cc +++ b/sql/wsrep_schema.cc @@ -196,7 +196,7 @@ static int execute_SQL(THD* thd, const char* sql, uint length) { thd->set_query((char*)sql, length); thd->set_query_id(next_query_id()); - mysql_parse(thd, (char*)sql, length, & parser_state, FALSE, FALSE); + mysql_parse(thd, (char*)sql, length, & parser_state); if (thd->is_error()) { WSREP_WARN("Wsrep_schema::execute_sql() failed, %d %s\nSQL: %s", diff --git a/sql/wsrep_sst.cc b/sql/wsrep_sst.cc index dc51c06bcc3..0fca31bd4ea 100644 --- a/sql/wsrep_sst.cc +++ b/sql/wsrep_sst.cc @@ -1500,7 +1500,7 @@ static int run_sql_command(THD *thd, const char *query) return -1; } - mysql_parse(thd, thd->query(), thd->query_length(), &ps, FALSE, FALSE); + mysql_parse(thd, thd->query(), thd->query_length(), &ps); if (thd->is_error()) { int const err= thd->get_stmt_da()->sql_errno(); diff --git a/sql/wsrep_var.cc b/sql/wsrep_var.cc index 3a3f3b8c3b7..3c0a7234b15 100644 --- a/sql/wsrep_var.cc +++ b/sql/wsrep_var.cc @@ -97,12 +97,6 @@ static void wsrep_set_wsrep_on() strcmp(wsrep_provider, WSREP_NONE); } -/* This is intentionally declared as a weak global symbol, so that -linking will succeed even if the server is built with a dynamically -linked InnoDB. */ -ulong innodb_lock_schedule_algorithm __attribute__((weak)); -struct handlerton* innodb_hton_ptr __attribute__((weak)); - bool wsrep_on_update (sys_var *self, THD* thd, enum_var_type var_type) { if (var_type == OPT_GLOBAL) { @@ -138,18 +132,7 @@ bool wsrep_on_update (sys_var *self, THD* thd, enum_var_type var_type) bool wsrep_on_check(sys_var *self, THD* thd, set_var* var) { - bool new_wsrep_on= (bool)var->save_result.ulonglong_value; - - if (check_has_super(self, thd, var)) - return true; - - if (new_wsrep_on && innodb_hton_ptr && innodb_lock_schedule_algorithm != 0) { - my_message(ER_WRONG_ARGUMENTS, " WSREP (galera) can't be enabled " - "if innodb_lock_schedule_algorithm=VATS. Please configure" - " innodb_lock_schedule_algorithm=FCFS and restart.", MYF(0)); - return true; - } - return false; + return check_has_super(self, thd, var); } bool wsrep_causal_reads_update (sys_var *self, THD* thd, enum_var_type var_type) @@ -914,6 +897,11 @@ bool wsrep_max_ws_size_update(sys_var *self, THD *thd, enum_var_type) return refresh_provider_options(); } +bool wsrep_mode_check(sys_var *self, THD* thd, set_var* var) +{ + return false; +} + #if UNUSED /* eaec266eb16c (Sergei Golubchik 2014-09-28) */ static SHOW_VAR wsrep_status_vars[]= { @@ -1042,3 +1030,14 @@ bool wsrep_gtid_domain_id_update(sys_var* self, THD *thd, enum_var_type) wsrep_gtid_server.domain_id= wsrep_gtid_domain_id; return false; } + +bool wsrep_strict_ddl_update(sys_var *self, THD* thd, enum_var_type var_type) +{ + // In case user still sets wsrep_strict_ddl we set new + // option to wsrep_mode + if (wsrep_strict_ddl) + wsrep_mode|= WSREP_MODE_STRICT_REPLICATION; + else + wsrep_mode&= (~WSREP_MODE_STRICT_REPLICATION); + return false; +} diff --git a/sql/wsrep_var.h b/sql/wsrep_var.h index fb23182dbf2..ae4f39a4034 100644 --- a/sql/wsrep_var.h +++ b/sql/wsrep_var.h @@ -106,6 +106,9 @@ extern bool wsrep_debug_update UPDATE_ARGS; extern bool wsrep_gtid_seq_no_check CHECK_ARGS; extern bool wsrep_gtid_domain_id_update UPDATE_ARGS; + +extern bool wsrep_mode_check CHECK_ARGS; +extern bool wsrep_strict_ddl_update UPDATE_ARGS; #else /* WITH_WSREP */ #define wsrep_provider_init(X) |